1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/Error.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyA16, 167 ImmTyLWE, 168 ImmTyExpTgt, 169 ImmTyExpCompr, 170 ImmTyExpVM, 171 ImmTyFORMAT, 172 ImmTyHwreg, 173 ImmTyOff, 174 ImmTySendMsg, 175 ImmTyInterpSlot, 176 ImmTyInterpAttr, 177 ImmTyAttrChan, 178 ImmTyOpSel, 179 ImmTyOpSelHi, 180 ImmTyNegLo, 181 ImmTyNegHi, 182 ImmTySwizzle, 183 ImmTyGprIdxMode, 184 ImmTyHigh, 185 ImmTyBLGP, 186 ImmTyCBSZ, 187 ImmTyABID, 188 ImmTyEndpgm, 189 }; 190 191 private: 192 struct TokOp { 193 const char *Data; 194 unsigned Length; 195 }; 196 197 struct ImmOp { 198 int64_t Val; 199 ImmTy Type; 200 bool IsFPImm; 201 Modifiers Mods; 202 }; 203 204 struct RegOp { 205 unsigned RegNo; 206 Modifiers Mods; 207 }; 208 209 union { 210 TokOp Tok; 211 ImmOp Imm; 212 RegOp Reg; 213 const MCExpr *Expr; 214 }; 215 216 public: 217 bool isToken() const override { 218 if (Kind == Token) 219 return true; 220 221 // When parsing operands, we can't always tell if something was meant to be 222 // a token, like 'gds', or an expression that references a global variable. 223 // In this case, we assume the string is an expression, and if we need to 224 // interpret is a token, then we treat the symbol name as the token. 225 return isSymbolRefExpr(); 226 } 227 228 bool isSymbolRefExpr() const { 229 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 230 } 231 232 bool isImm() const override { 233 return Kind == Immediate; 234 } 235 236 bool isInlinableImm(MVT type) const; 237 bool isLiteralImm(MVT type) const; 238 239 bool isRegKind() const { 240 return Kind == Register; 241 } 242 243 bool isReg() const override { 244 return isRegKind() && !hasModifiers(); 245 } 246 247 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 248 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 249 } 250 251 bool isRegOrImmWithInt16InputMods() const { 252 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 253 } 254 255 bool isRegOrImmWithInt32InputMods() const { 256 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 257 } 258 259 bool isRegOrImmWithInt64InputMods() const { 260 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 261 } 262 263 bool isRegOrImmWithFP16InputMods() const { 264 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 265 } 266 267 bool isRegOrImmWithFP32InputMods() const { 268 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 269 } 270 271 bool isRegOrImmWithFP64InputMods() const { 272 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 273 } 274 275 bool isVReg() const { 276 return isRegClass(AMDGPU::VGPR_32RegClassID) || 277 isRegClass(AMDGPU::VReg_64RegClassID) || 278 isRegClass(AMDGPU::VReg_96RegClassID) || 279 isRegClass(AMDGPU::VReg_128RegClassID) || 280 isRegClass(AMDGPU::VReg_160RegClassID) || 281 isRegClass(AMDGPU::VReg_192RegClassID) || 282 isRegClass(AMDGPU::VReg_256RegClassID) || 283 isRegClass(AMDGPU::VReg_512RegClassID) || 284 isRegClass(AMDGPU::VReg_1024RegClassID); 285 } 286 287 bool isVReg32() const { 288 return isRegClass(AMDGPU::VGPR_32RegClassID); 289 } 290 291 bool isVReg32OrOff() const { 292 return isOff() || isVReg32(); 293 } 294 295 bool isNull() const { 296 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 297 } 298 299 bool isSDWAOperand(MVT type) const; 300 bool isSDWAFP16Operand() const; 301 bool isSDWAFP32Operand() const; 302 bool isSDWAInt16Operand() const; 303 bool isSDWAInt32Operand() const; 304 305 bool isImmTy(ImmTy ImmT) const { 306 return isImm() && Imm.Type == ImmT; 307 } 308 309 bool isImmModifier() const { 310 return isImm() && Imm.Type != ImmTyNone; 311 } 312 313 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 314 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 315 bool isDMask() const { return isImmTy(ImmTyDMask); } 316 bool isDim() const { return isImmTy(ImmTyDim); } 317 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 318 bool isDA() const { return isImmTy(ImmTyDA); } 319 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 320 bool isGFX10A16() const { return isImmTy(ImmTyA16); } 321 bool isLWE() const { return isImmTy(ImmTyLWE); } 322 bool isOff() const { return isImmTy(ImmTyOff); } 323 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 324 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 325 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 326 bool isOffen() const { return isImmTy(ImmTyOffen); } 327 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 328 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 329 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 330 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 331 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 332 333 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 334 bool isGDS() const { return isImmTy(ImmTyGDS); } 335 bool isLDS() const { return isImmTy(ImmTyLDS); } 336 bool isDLC() const { return isImmTy(ImmTyDLC); } 337 bool isGLC() const { return isImmTy(ImmTyGLC); } 338 bool isSLC() const { return isImmTy(ImmTySLC); } 339 bool isSWZ() const { return isImmTy(ImmTySWZ); } 340 bool isTFE() const { return isImmTy(ImmTyTFE); } 341 bool isD16() const { return isImmTy(ImmTyD16); } 342 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); } 343 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 344 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 345 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 346 bool isFI() const { return isImmTy(ImmTyDppFi); } 347 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 348 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 349 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 350 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 351 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 352 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 353 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 354 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 355 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 356 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 357 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 358 bool isHigh() const { return isImmTy(ImmTyHigh); } 359 360 bool isMod() const { 361 return isClampSI() || isOModSI(); 362 } 363 364 bool isRegOrImm() const { 365 return isReg() || isImm(); 366 } 367 368 bool isRegClass(unsigned RCID) const; 369 370 bool isInlineValue() const; 371 372 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 373 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 374 } 375 376 bool isSCSrcB16() const { 377 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 378 } 379 380 bool isSCSrcV2B16() const { 381 return isSCSrcB16(); 382 } 383 384 bool isSCSrcB32() const { 385 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 386 } 387 388 bool isSCSrcB64() const { 389 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 390 } 391 392 bool isBoolReg() const; 393 394 bool isSCSrcF16() const { 395 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 396 } 397 398 bool isSCSrcV2F16() const { 399 return isSCSrcF16(); 400 } 401 402 bool isSCSrcF32() const { 403 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 404 } 405 406 bool isSCSrcF64() const { 407 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 408 } 409 410 bool isSSrcB32() const { 411 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 412 } 413 414 bool isSSrcB16() const { 415 return isSCSrcB16() || isLiteralImm(MVT::i16); 416 } 417 418 bool isSSrcV2B16() const { 419 llvm_unreachable("cannot happen"); 420 return isSSrcB16(); 421 } 422 423 bool isSSrcB64() const { 424 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 425 // See isVSrc64(). 426 return isSCSrcB64() || isLiteralImm(MVT::i64); 427 } 428 429 bool isSSrcF32() const { 430 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 431 } 432 433 bool isSSrcF64() const { 434 return isSCSrcB64() || isLiteralImm(MVT::f64); 435 } 436 437 bool isSSrcF16() const { 438 return isSCSrcB16() || isLiteralImm(MVT::f16); 439 } 440 441 bool isSSrcV2F16() const { 442 llvm_unreachable("cannot happen"); 443 return isSSrcF16(); 444 } 445 446 bool isSSrcOrLdsB32() const { 447 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 448 isLiteralImm(MVT::i32) || isExpr(); 449 } 450 451 bool isVCSrcB32() const { 452 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 453 } 454 455 bool isVCSrcB64() const { 456 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 457 } 458 459 bool isVCSrcB16() const { 460 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 461 } 462 463 bool isVCSrcV2B16() const { 464 return isVCSrcB16(); 465 } 466 467 bool isVCSrcF32() const { 468 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 469 } 470 471 bool isVCSrcF64() const { 472 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 473 } 474 475 bool isVCSrcF16() const { 476 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 477 } 478 479 bool isVCSrcV2F16() const { 480 return isVCSrcF16(); 481 } 482 483 bool isVSrcB32() const { 484 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 485 } 486 487 bool isVSrcB64() const { 488 return isVCSrcF64() || isLiteralImm(MVT::i64); 489 } 490 491 bool isVSrcB16() const { 492 return isVCSrcB16() || isLiteralImm(MVT::i16); 493 } 494 495 bool isVSrcV2B16() const { 496 return isVSrcB16() || isLiteralImm(MVT::v2i16); 497 } 498 499 bool isVSrcF32() const { 500 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 501 } 502 503 bool isVSrcF64() const { 504 return isVCSrcF64() || isLiteralImm(MVT::f64); 505 } 506 507 bool isVSrcF16() const { 508 return isVCSrcF16() || isLiteralImm(MVT::f16); 509 } 510 511 bool isVSrcV2F16() const { 512 return isVSrcF16() || isLiteralImm(MVT::v2f16); 513 } 514 515 bool isVISrcB32() const { 516 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 517 } 518 519 bool isVISrcB16() const { 520 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 521 } 522 523 bool isVISrcV2B16() const { 524 return isVISrcB16(); 525 } 526 527 bool isVISrcF32() const { 528 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 529 } 530 531 bool isVISrcF16() const { 532 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 533 } 534 535 bool isVISrcV2F16() const { 536 return isVISrcF16() || isVISrcB32(); 537 } 538 539 bool isAISrcB32() const { 540 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 541 } 542 543 bool isAISrcB16() const { 544 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 545 } 546 547 bool isAISrcV2B16() const { 548 return isAISrcB16(); 549 } 550 551 bool isAISrcF32() const { 552 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 553 } 554 555 bool isAISrcF16() const { 556 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 557 } 558 559 bool isAISrcV2F16() const { 560 return isAISrcF16() || isAISrcB32(); 561 } 562 563 bool isAISrc_128B32() const { 564 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 565 } 566 567 bool isAISrc_128B16() const { 568 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 569 } 570 571 bool isAISrc_128V2B16() const { 572 return isAISrc_128B16(); 573 } 574 575 bool isAISrc_128F32() const { 576 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 577 } 578 579 bool isAISrc_128F16() const { 580 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 581 } 582 583 bool isAISrc_128V2F16() const { 584 return isAISrc_128F16() || isAISrc_128B32(); 585 } 586 587 bool isAISrc_512B32() const { 588 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 589 } 590 591 bool isAISrc_512B16() const { 592 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 593 } 594 595 bool isAISrc_512V2B16() const { 596 return isAISrc_512B16(); 597 } 598 599 bool isAISrc_512F32() const { 600 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 601 } 602 603 bool isAISrc_512F16() const { 604 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 605 } 606 607 bool isAISrc_512V2F16() const { 608 return isAISrc_512F16() || isAISrc_512B32(); 609 } 610 611 bool isAISrc_1024B32() const { 612 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 613 } 614 615 bool isAISrc_1024B16() const { 616 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 617 } 618 619 bool isAISrc_1024V2B16() const { 620 return isAISrc_1024B16(); 621 } 622 623 bool isAISrc_1024F32() const { 624 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 625 } 626 627 bool isAISrc_1024F16() const { 628 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 629 } 630 631 bool isAISrc_1024V2F16() const { 632 return isAISrc_1024F16() || isAISrc_1024B32(); 633 } 634 635 bool isKImmFP32() const { 636 return isLiteralImm(MVT::f32); 637 } 638 639 bool isKImmFP16() const { 640 return isLiteralImm(MVT::f16); 641 } 642 643 bool isMem() const override { 644 return false; 645 } 646 647 bool isExpr() const { 648 return Kind == Expression; 649 } 650 651 bool isSoppBrTarget() const { 652 return isExpr() || isImm(); 653 } 654 655 bool isSWaitCnt() const; 656 bool isHwreg() const; 657 bool isSendMsg() const; 658 bool isSwizzle() const; 659 bool isSMRDOffset8() const; 660 bool isSMEMOffset() const; 661 bool isSMRDLiteralOffset() const; 662 bool isDPP8() const; 663 bool isDPPCtrl() const; 664 bool isBLGP() const; 665 bool isCBSZ() const; 666 bool isABID() const; 667 bool isGPRIdxMode() const; 668 bool isS16Imm() const; 669 bool isU16Imm() const; 670 bool isEndpgm() const; 671 672 StringRef getExpressionAsToken() const { 673 assert(isExpr()); 674 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 675 return S->getSymbol().getName(); 676 } 677 678 StringRef getToken() const { 679 assert(isToken()); 680 681 if (Kind == Expression) 682 return getExpressionAsToken(); 683 684 return StringRef(Tok.Data, Tok.Length); 685 } 686 687 int64_t getImm() const { 688 assert(isImm()); 689 return Imm.Val; 690 } 691 692 void setImm(int64_t Val) { 693 assert(isImm()); 694 Imm.Val = Val; 695 } 696 697 ImmTy getImmTy() const { 698 assert(isImm()); 699 return Imm.Type; 700 } 701 702 unsigned getReg() const override { 703 assert(isRegKind()); 704 return Reg.RegNo; 705 } 706 707 SMLoc getStartLoc() const override { 708 return StartLoc; 709 } 710 711 SMLoc getEndLoc() const override { 712 return EndLoc; 713 } 714 715 SMRange getLocRange() const { 716 return SMRange(StartLoc, EndLoc); 717 } 718 719 Modifiers getModifiers() const { 720 assert(isRegKind() || isImmTy(ImmTyNone)); 721 return isRegKind() ? Reg.Mods : Imm.Mods; 722 } 723 724 void setModifiers(Modifiers Mods) { 725 assert(isRegKind() || isImmTy(ImmTyNone)); 726 if (isRegKind()) 727 Reg.Mods = Mods; 728 else 729 Imm.Mods = Mods; 730 } 731 732 bool hasModifiers() const { 733 return getModifiers().hasModifiers(); 734 } 735 736 bool hasFPModifiers() const { 737 return getModifiers().hasFPModifiers(); 738 } 739 740 bool hasIntModifiers() const { 741 return getModifiers().hasIntModifiers(); 742 } 743 744 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 745 746 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 747 748 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 749 750 template <unsigned Bitwidth> 751 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 752 753 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 754 addKImmFPOperands<16>(Inst, N); 755 } 756 757 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 758 addKImmFPOperands<32>(Inst, N); 759 } 760 761 void addRegOperands(MCInst &Inst, unsigned N) const; 762 763 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 764 addRegOperands(Inst, N); 765 } 766 767 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 768 if (isRegKind()) 769 addRegOperands(Inst, N); 770 else if (isExpr()) 771 Inst.addOperand(MCOperand::createExpr(Expr)); 772 else 773 addImmOperands(Inst, N); 774 } 775 776 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 777 Modifiers Mods = getModifiers(); 778 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 779 if (isRegKind()) { 780 addRegOperands(Inst, N); 781 } else { 782 addImmOperands(Inst, N, false); 783 } 784 } 785 786 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 787 assert(!hasIntModifiers()); 788 addRegOrImmWithInputModsOperands(Inst, N); 789 } 790 791 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasFPModifiers()); 793 addRegOrImmWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 797 Modifiers Mods = getModifiers(); 798 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 799 assert(isRegKind()); 800 addRegOperands(Inst, N); 801 } 802 803 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 804 assert(!hasIntModifiers()); 805 addRegWithInputModsOperands(Inst, N); 806 } 807 808 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 809 assert(!hasFPModifiers()); 810 addRegWithInputModsOperands(Inst, N); 811 } 812 813 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 814 if (isImm()) 815 addImmOperands(Inst, N); 816 else { 817 assert(isExpr()); 818 Inst.addOperand(MCOperand::createExpr(Expr)); 819 } 820 } 821 822 static void printImmTy(raw_ostream& OS, ImmTy Type) { 823 switch (Type) { 824 case ImmTyNone: OS << "None"; break; 825 case ImmTyGDS: OS << "GDS"; break; 826 case ImmTyLDS: OS << "LDS"; break; 827 case ImmTyOffen: OS << "Offen"; break; 828 case ImmTyIdxen: OS << "Idxen"; break; 829 case ImmTyAddr64: OS << "Addr64"; break; 830 case ImmTyOffset: OS << "Offset"; break; 831 case ImmTyInstOffset: OS << "InstOffset"; break; 832 case ImmTyOffset0: OS << "Offset0"; break; 833 case ImmTyOffset1: OS << "Offset1"; break; 834 case ImmTyDLC: OS << "DLC"; break; 835 case ImmTyGLC: OS << "GLC"; break; 836 case ImmTySLC: OS << "SLC"; break; 837 case ImmTySWZ: OS << "SWZ"; break; 838 case ImmTyTFE: OS << "TFE"; break; 839 case ImmTyD16: OS << "D16"; break; 840 case ImmTyFORMAT: OS << "FORMAT"; break; 841 case ImmTyClampSI: OS << "ClampSI"; break; 842 case ImmTyOModSI: OS << "OModSI"; break; 843 case ImmTyDPP8: OS << "DPP8"; break; 844 case ImmTyDppCtrl: OS << "DppCtrl"; break; 845 case ImmTyDppRowMask: OS << "DppRowMask"; break; 846 case ImmTyDppBankMask: OS << "DppBankMask"; break; 847 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 848 case ImmTyDppFi: OS << "FI"; break; 849 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 850 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 851 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 852 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 853 case ImmTyDMask: OS << "DMask"; break; 854 case ImmTyDim: OS << "Dim"; break; 855 case ImmTyUNorm: OS << "UNorm"; break; 856 case ImmTyDA: OS << "DA"; break; 857 case ImmTyR128A16: OS << "R128A16"; break; 858 case ImmTyA16: OS << "A16"; break; 859 case ImmTyLWE: OS << "LWE"; break; 860 case ImmTyOff: OS << "Off"; break; 861 case ImmTyExpTgt: OS << "ExpTgt"; break; 862 case ImmTyExpCompr: OS << "ExpCompr"; break; 863 case ImmTyExpVM: OS << "ExpVM"; break; 864 case ImmTyHwreg: OS << "Hwreg"; break; 865 case ImmTySendMsg: OS << "SendMsg"; break; 866 case ImmTyInterpSlot: OS << "InterpSlot"; break; 867 case ImmTyInterpAttr: OS << "InterpAttr"; break; 868 case ImmTyAttrChan: OS << "AttrChan"; break; 869 case ImmTyOpSel: OS << "OpSel"; break; 870 case ImmTyOpSelHi: OS << "OpSelHi"; break; 871 case ImmTyNegLo: OS << "NegLo"; break; 872 case ImmTyNegHi: OS << "NegHi"; break; 873 case ImmTySwizzle: OS << "Swizzle"; break; 874 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 875 case ImmTyHigh: OS << "High"; break; 876 case ImmTyBLGP: OS << "BLGP"; break; 877 case ImmTyCBSZ: OS << "CBSZ"; break; 878 case ImmTyABID: OS << "ABID"; break; 879 case ImmTyEndpgm: OS << "Endpgm"; break; 880 } 881 } 882 883 void print(raw_ostream &OS) const override { 884 switch (Kind) { 885 case Register: 886 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 887 break; 888 case Immediate: 889 OS << '<' << getImm(); 890 if (getImmTy() != ImmTyNone) { 891 OS << " type: "; printImmTy(OS, getImmTy()); 892 } 893 OS << " mods: " << Imm.Mods << '>'; 894 break; 895 case Token: 896 OS << '\'' << getToken() << '\''; 897 break; 898 case Expression: 899 OS << "<expr " << *Expr << '>'; 900 break; 901 } 902 } 903 904 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 905 int64_t Val, SMLoc Loc, 906 ImmTy Type = ImmTyNone, 907 bool IsFPImm = false) { 908 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 909 Op->Imm.Val = Val; 910 Op->Imm.IsFPImm = IsFPImm; 911 Op->Imm.Type = Type; 912 Op->Imm.Mods = Modifiers(); 913 Op->StartLoc = Loc; 914 Op->EndLoc = Loc; 915 return Op; 916 } 917 918 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 919 StringRef Str, SMLoc Loc, 920 bool HasExplicitEncodingSize = true) { 921 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 922 Res->Tok.Data = Str.data(); 923 Res->Tok.Length = Str.size(); 924 Res->StartLoc = Loc; 925 Res->EndLoc = Loc; 926 return Res; 927 } 928 929 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 930 unsigned RegNo, SMLoc S, 931 SMLoc E) { 932 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 933 Op->Reg.RegNo = RegNo; 934 Op->Reg.Mods = Modifiers(); 935 Op->StartLoc = S; 936 Op->EndLoc = E; 937 return Op; 938 } 939 940 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 941 const class MCExpr *Expr, SMLoc S) { 942 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 943 Op->Expr = Expr; 944 Op->StartLoc = S; 945 Op->EndLoc = S; 946 return Op; 947 } 948 }; 949 950 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 951 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 952 return OS; 953 } 954 955 //===----------------------------------------------------------------------===// 956 // AsmParser 957 //===----------------------------------------------------------------------===// 958 959 // Holds info related to the current kernel, e.g. count of SGPRs used. 960 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 961 // .amdgpu_hsa_kernel or at EOF. 962 class KernelScopeInfo { 963 int SgprIndexUnusedMin = -1; 964 int VgprIndexUnusedMin = -1; 965 MCContext *Ctx = nullptr; 966 967 void usesSgprAt(int i) { 968 if (i >= SgprIndexUnusedMin) { 969 SgprIndexUnusedMin = ++i; 970 if (Ctx) { 971 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 972 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 973 } 974 } 975 } 976 977 void usesVgprAt(int i) { 978 if (i >= VgprIndexUnusedMin) { 979 VgprIndexUnusedMin = ++i; 980 if (Ctx) { 981 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 982 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 983 } 984 } 985 } 986 987 public: 988 KernelScopeInfo() = default; 989 990 void initialize(MCContext &Context) { 991 Ctx = &Context; 992 usesSgprAt(SgprIndexUnusedMin = -1); 993 usesVgprAt(VgprIndexUnusedMin = -1); 994 } 995 996 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 997 switch (RegKind) { 998 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 999 case IS_AGPR: // fall through 1000 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 1001 default: break; 1002 } 1003 } 1004 }; 1005 1006 class AMDGPUAsmParser : public MCTargetAsmParser { 1007 MCAsmParser &Parser; 1008 1009 // Number of extra operands parsed after the first optional operand. 1010 // This may be necessary to skip hardcoded mandatory operands. 1011 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1012 1013 unsigned ForcedEncodingSize = 0; 1014 bool ForcedDPP = false; 1015 bool ForcedSDWA = false; 1016 KernelScopeInfo KernelScope; 1017 1018 /// @name Auto-generated Match Functions 1019 /// { 1020 1021 #define GET_ASSEMBLER_HEADER 1022 #include "AMDGPUGenAsmMatcher.inc" 1023 1024 /// } 1025 1026 private: 1027 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1028 bool OutOfRangeError(SMRange Range); 1029 /// Calculate VGPR/SGPR blocks required for given target, reserved 1030 /// registers, and user-specified NextFreeXGPR values. 1031 /// 1032 /// \param Features [in] Target features, used for bug corrections. 1033 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1034 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1035 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1036 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1037 /// descriptor field, if valid. 1038 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1039 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1040 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1041 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1042 /// \param VGPRBlocks [out] Result VGPR block count. 1043 /// \param SGPRBlocks [out] Result SGPR block count. 1044 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1045 bool FlatScrUsed, bool XNACKUsed, 1046 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1047 SMRange VGPRRange, unsigned NextFreeSGPR, 1048 SMRange SGPRRange, unsigned &VGPRBlocks, 1049 unsigned &SGPRBlocks); 1050 bool ParseDirectiveAMDGCNTarget(); 1051 bool ParseDirectiveAMDHSAKernel(); 1052 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1053 bool ParseDirectiveHSACodeObjectVersion(); 1054 bool ParseDirectiveHSACodeObjectISA(); 1055 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1056 bool ParseDirectiveAMDKernelCodeT(); 1057 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1058 bool ParseDirectiveAMDGPUHsaKernel(); 1059 1060 bool ParseDirectiveISAVersion(); 1061 bool ParseDirectiveHSAMetadata(); 1062 bool ParseDirectivePALMetadataBegin(); 1063 bool ParseDirectivePALMetadata(); 1064 bool ParseDirectiveAMDGPULDS(); 1065 1066 /// Common code to parse out a block of text (typically YAML) between start and 1067 /// end directives. 1068 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1069 const char *AssemblerDirectiveEnd, 1070 std::string &CollectString); 1071 1072 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1073 RegisterKind RegKind, unsigned Reg1, SMLoc Loc); 1074 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1075 unsigned &RegNum, unsigned &RegWidth, 1076 bool RestoreOnFailure = false); 1077 bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 1078 unsigned &RegNum, unsigned &RegWidth, 1079 SmallVectorImpl<AsmToken> &Tokens); 1080 unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum, 1081 unsigned &RegWidth, 1082 SmallVectorImpl<AsmToken> &Tokens); 1083 unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum, 1084 unsigned &RegWidth, 1085 SmallVectorImpl<AsmToken> &Tokens); 1086 unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 1087 unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens); 1088 bool ParseRegRange(unsigned& Num, unsigned& Width); 1089 unsigned getRegularReg(RegisterKind RegKind, 1090 unsigned RegNum, 1091 unsigned RegWidth, 1092 SMLoc Loc); 1093 1094 bool isRegister(); 1095 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1096 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1097 void initializeGprCountSymbol(RegisterKind RegKind); 1098 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1099 unsigned RegWidth); 1100 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1101 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1102 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1103 bool IsGdsHardcoded); 1104 1105 public: 1106 enum AMDGPUMatchResultTy { 1107 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1108 }; 1109 enum OperandMode { 1110 OperandMode_Default, 1111 OperandMode_NSA, 1112 }; 1113 1114 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1115 1116 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1117 const MCInstrInfo &MII, 1118 const MCTargetOptions &Options) 1119 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1120 MCAsmParserExtension::Initialize(Parser); 1121 1122 if (getFeatureBits().none()) { 1123 // Set default features. 1124 copySTI().ToggleFeature("southern-islands"); 1125 } 1126 1127 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1128 1129 { 1130 // TODO: make those pre-defined variables read-only. 1131 // Currently there is none suitable machinery in the core llvm-mc for this. 1132 // MCSymbol::isRedefinable is intended for another purpose, and 1133 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1134 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1135 MCContext &Ctx = getContext(); 1136 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1137 MCSymbol *Sym = 1138 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1139 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1140 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1141 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1142 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1143 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1144 } else { 1145 MCSymbol *Sym = 1146 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1147 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1148 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1149 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1150 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1151 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1152 } 1153 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1154 initializeGprCountSymbol(IS_VGPR); 1155 initializeGprCountSymbol(IS_SGPR); 1156 } else 1157 KernelScope.initialize(getContext()); 1158 } 1159 } 1160 1161 bool hasXNACK() const { 1162 return AMDGPU::hasXNACK(getSTI()); 1163 } 1164 1165 bool hasMIMG_R128() const { 1166 return AMDGPU::hasMIMG_R128(getSTI()); 1167 } 1168 1169 bool hasPackedD16() const { 1170 return AMDGPU::hasPackedD16(getSTI()); 1171 } 1172 1173 bool hasGFX10A16() const { 1174 return AMDGPU::hasGFX10A16(getSTI()); 1175 } 1176 1177 bool isSI() const { 1178 return AMDGPU::isSI(getSTI()); 1179 } 1180 1181 bool isCI() const { 1182 return AMDGPU::isCI(getSTI()); 1183 } 1184 1185 bool isVI() const { 1186 return AMDGPU::isVI(getSTI()); 1187 } 1188 1189 bool isGFX9() const { 1190 return AMDGPU::isGFX9(getSTI()); 1191 } 1192 1193 bool isGFX10() const { 1194 return AMDGPU::isGFX10(getSTI()); 1195 } 1196 1197 bool isGFX10_BEncoding() const { 1198 return AMDGPU::isGFX10_BEncoding(getSTI()); 1199 } 1200 1201 bool hasInv2PiInlineImm() const { 1202 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1203 } 1204 1205 bool hasFlatOffsets() const { 1206 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1207 } 1208 1209 bool hasSGPR102_SGPR103() const { 1210 return !isVI() && !isGFX9(); 1211 } 1212 1213 bool hasSGPR104_SGPR105() const { 1214 return isGFX10(); 1215 } 1216 1217 bool hasIntClamp() const { 1218 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1219 } 1220 1221 AMDGPUTargetStreamer &getTargetStreamer() { 1222 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1223 return static_cast<AMDGPUTargetStreamer &>(TS); 1224 } 1225 1226 const MCRegisterInfo *getMRI() const { 1227 // We need this const_cast because for some reason getContext() is not const 1228 // in MCAsmParser. 1229 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1230 } 1231 1232 const MCInstrInfo *getMII() const { 1233 return &MII; 1234 } 1235 1236 const FeatureBitset &getFeatureBits() const { 1237 return getSTI().getFeatureBits(); 1238 } 1239 1240 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1241 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1242 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1243 1244 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1245 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1246 bool isForcedDPP() const { return ForcedDPP; } 1247 bool isForcedSDWA() const { return ForcedSDWA; } 1248 ArrayRef<unsigned> getMatchedVariants() const; 1249 1250 std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false); 1251 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc, 1252 bool RestoreOnFailure); 1253 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1254 OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1255 SMLoc &EndLoc) override; 1256 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1257 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1258 unsigned Kind) override; 1259 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1260 OperandVector &Operands, MCStreamer &Out, 1261 uint64_t &ErrorInfo, 1262 bool MatchingInlineAsm) override; 1263 bool ParseDirective(AsmToken DirectiveID) override; 1264 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1265 OperandMode Mode = OperandMode_Default); 1266 StringRef parseMnemonicSuffix(StringRef Name); 1267 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1268 SMLoc NameLoc, OperandVector &Operands) override; 1269 //bool ProcessInstruction(MCInst &Inst); 1270 1271 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1272 1273 OperandMatchResultTy 1274 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1275 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1276 bool (*ConvertResult)(int64_t &) = nullptr); 1277 1278 OperandMatchResultTy 1279 parseOperandArrayWithPrefix(const char *Prefix, 1280 OperandVector &Operands, 1281 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1282 bool (*ConvertResult)(int64_t&) = nullptr); 1283 1284 OperandMatchResultTy 1285 parseNamedBit(const char *Name, OperandVector &Operands, 1286 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1287 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1288 StringRef &Value); 1289 1290 bool isModifier(); 1291 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1292 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1293 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1294 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1295 bool parseSP3NegModifier(); 1296 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1297 OperandMatchResultTy parseReg(OperandVector &Operands); 1298 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1299 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1300 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1301 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1302 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1303 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1304 OperandMatchResultTy parseDfmtNfmt(int64_t &Format); 1305 OperandMatchResultTy parseUfmt(int64_t &Format); 1306 OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1307 OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format); 1308 OperandMatchResultTy parseFORMAT(OperandVector &Operands); 1309 OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format); 1310 OperandMatchResultTy parseNumericFormat(int64_t &Format); 1311 bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val); 1312 bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc); 1313 1314 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1315 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1316 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1317 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1318 1319 bool parseCnt(int64_t &IntVal); 1320 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1321 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1322 1323 private: 1324 struct OperandInfoTy { 1325 int64_t Id; 1326 bool IsSymbolic = false; 1327 bool IsDefined = false; 1328 1329 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1330 }; 1331 1332 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1333 bool validateSendMsg(const OperandInfoTy &Msg, 1334 const OperandInfoTy &Op, 1335 const OperandInfoTy &Stream, 1336 const SMLoc Loc); 1337 1338 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1339 bool validateHwreg(const OperandInfoTy &HwReg, 1340 const int64_t Offset, 1341 const int64_t Width, 1342 const SMLoc Loc); 1343 1344 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1345 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1346 SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const; 1347 1348 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1349 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1350 bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands); 1351 bool validateSOPLiteral(const MCInst &Inst) const; 1352 bool validateConstantBusLimitations(const MCInst &Inst); 1353 bool validateEarlyClobberLimitations(const MCInst &Inst); 1354 bool validateIntClampSupported(const MCInst &Inst); 1355 bool validateMIMGAtomicDMask(const MCInst &Inst); 1356 bool validateMIMGGatherDMask(const MCInst &Inst); 1357 bool validateMovrels(const MCInst &Inst); 1358 bool validateMIMGDataSize(const MCInst &Inst); 1359 bool validateMIMGAddrSize(const MCInst &Inst); 1360 bool validateMIMGD16(const MCInst &Inst); 1361 bool validateMIMGDim(const MCInst &Inst); 1362 bool validateLdsDirect(const MCInst &Inst); 1363 bool validateOpSel(const MCInst &Inst); 1364 bool validateVccOperand(unsigned Reg) const; 1365 bool validateVOP3Literal(const MCInst &Inst) const; 1366 bool validateMAIAccWrite(const MCInst &Inst); 1367 unsigned getConstantBusLimit(unsigned Opcode) const; 1368 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1369 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1370 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1371 1372 bool isId(const StringRef Id) const; 1373 bool isId(const AsmToken &Token, const StringRef Id) const; 1374 bool isToken(const AsmToken::TokenKind Kind) const; 1375 bool trySkipId(const StringRef Id); 1376 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1377 bool trySkipToken(const AsmToken::TokenKind Kind); 1378 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1379 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1380 bool parseId(StringRef &Val, const StringRef ErrMsg); 1381 1382 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1383 AsmToken::TokenKind getTokenKind() const; 1384 bool parseExpr(int64_t &Imm); 1385 bool parseExpr(OperandVector &Operands); 1386 StringRef getTokenStr() const; 1387 AsmToken peekToken(); 1388 AsmToken getToken() const; 1389 SMLoc getLoc() const; 1390 void lex(); 1391 1392 public: 1393 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1394 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1395 1396 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1397 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1398 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1399 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1400 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1401 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1402 1403 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1404 const unsigned MinVal, 1405 const unsigned MaxVal, 1406 const StringRef ErrMsg); 1407 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1408 bool parseSwizzleOffset(int64_t &Imm); 1409 bool parseSwizzleMacro(int64_t &Imm); 1410 bool parseSwizzleQuadPerm(int64_t &Imm); 1411 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1412 bool parseSwizzleBroadcast(int64_t &Imm); 1413 bool parseSwizzleSwap(int64_t &Imm); 1414 bool parseSwizzleReverse(int64_t &Imm); 1415 1416 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1417 int64_t parseGPRIdxMacro(); 1418 1419 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1420 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1421 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1422 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1423 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1424 1425 AMDGPUOperand::Ptr defaultDLC() const; 1426 AMDGPUOperand::Ptr defaultGLC() const; 1427 AMDGPUOperand::Ptr defaultSLC() const; 1428 1429 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1430 AMDGPUOperand::Ptr defaultSMEMOffset() const; 1431 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1432 AMDGPUOperand::Ptr defaultFlatOffset() const; 1433 1434 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1435 1436 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1437 OptionalImmIndexMap &OptionalIdx); 1438 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1439 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1440 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1441 1442 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1443 1444 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1445 bool IsAtomic = false); 1446 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1447 void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands); 1448 1449 OperandMatchResultTy parseDim(OperandVector &Operands); 1450 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1451 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1452 AMDGPUOperand::Ptr defaultRowMask() const; 1453 AMDGPUOperand::Ptr defaultBankMask() const; 1454 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1455 AMDGPUOperand::Ptr defaultFI() const; 1456 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1457 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1458 1459 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1460 AMDGPUOperand::ImmTy Type); 1461 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1462 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1463 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1464 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1465 void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands); 1466 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1467 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1468 uint64_t BasicInstType, 1469 bool SkipDstVcc = false, 1470 bool SkipSrcVcc = false); 1471 1472 AMDGPUOperand::Ptr defaultBLGP() const; 1473 AMDGPUOperand::Ptr defaultCBSZ() const; 1474 AMDGPUOperand::Ptr defaultABID() const; 1475 1476 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1477 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1478 }; 1479 1480 struct OptionalOperand { 1481 const char *Name; 1482 AMDGPUOperand::ImmTy Type; 1483 bool IsBit; 1484 bool (*ConvertResult)(int64_t&); 1485 }; 1486 1487 } // end anonymous namespace 1488 1489 // May be called with integer type with equivalent bitwidth. 1490 static const fltSemantics *getFltSemantics(unsigned Size) { 1491 switch (Size) { 1492 case 4: 1493 return &APFloat::IEEEsingle(); 1494 case 8: 1495 return &APFloat::IEEEdouble(); 1496 case 2: 1497 return &APFloat::IEEEhalf(); 1498 default: 1499 llvm_unreachable("unsupported fp type"); 1500 } 1501 } 1502 1503 static const fltSemantics *getFltSemantics(MVT VT) { 1504 return getFltSemantics(VT.getSizeInBits() / 8); 1505 } 1506 1507 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1508 switch (OperandType) { 1509 case AMDGPU::OPERAND_REG_IMM_INT32: 1510 case AMDGPU::OPERAND_REG_IMM_FP32: 1511 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1512 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1513 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1514 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1515 return &APFloat::IEEEsingle(); 1516 case AMDGPU::OPERAND_REG_IMM_INT64: 1517 case AMDGPU::OPERAND_REG_IMM_FP64: 1518 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1519 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1520 return &APFloat::IEEEdouble(); 1521 case AMDGPU::OPERAND_REG_IMM_INT16: 1522 case AMDGPU::OPERAND_REG_IMM_FP16: 1523 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1524 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1525 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1526 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1527 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1528 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1529 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1530 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1531 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1532 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1533 return &APFloat::IEEEhalf(); 1534 default: 1535 llvm_unreachable("unsupported fp type"); 1536 } 1537 } 1538 1539 //===----------------------------------------------------------------------===// 1540 // Operand 1541 //===----------------------------------------------------------------------===// 1542 1543 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1544 bool Lost; 1545 1546 // Convert literal to single precision 1547 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1548 APFloat::rmNearestTiesToEven, 1549 &Lost); 1550 // We allow precision lost but not overflow or underflow 1551 if (Status != APFloat::opOK && 1552 Lost && 1553 ((Status & APFloat::opOverflow) != 0 || 1554 (Status & APFloat::opUnderflow) != 0)) { 1555 return false; 1556 } 1557 1558 return true; 1559 } 1560 1561 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1562 return isUIntN(Size, Val) || isIntN(Size, Val); 1563 } 1564 1565 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) { 1566 if (VT.getScalarType() == MVT::i16) { 1567 // FP immediate values are broken. 1568 return isInlinableIntLiteral(Val); 1569 } 1570 1571 // f16/v2f16 operands work correctly for all values. 1572 return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi); 1573 } 1574 1575 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1576 1577 // This is a hack to enable named inline values like 1578 // shared_base with both 32-bit and 64-bit operands. 1579 // Note that these values are defined as 1580 // 32-bit operands only. 1581 if (isInlineValue()) { 1582 return true; 1583 } 1584 1585 if (!isImmTy(ImmTyNone)) { 1586 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1587 return false; 1588 } 1589 // TODO: We should avoid using host float here. It would be better to 1590 // check the float bit values which is what a few other places do. 1591 // We've had bot failures before due to weird NaN support on mips hosts. 1592 1593 APInt Literal(64, Imm.Val); 1594 1595 if (Imm.IsFPImm) { // We got fp literal token 1596 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1597 return AMDGPU::isInlinableLiteral64(Imm.Val, 1598 AsmParser->hasInv2PiInlineImm()); 1599 } 1600 1601 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1602 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1603 return false; 1604 1605 if (type.getScalarSizeInBits() == 16) { 1606 return isInlineableLiteralOp16( 1607 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1608 type, AsmParser->hasInv2PiInlineImm()); 1609 } 1610 1611 // Check if single precision literal is inlinable 1612 return AMDGPU::isInlinableLiteral32( 1613 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1614 AsmParser->hasInv2PiInlineImm()); 1615 } 1616 1617 // We got int literal token. 1618 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1619 return AMDGPU::isInlinableLiteral64(Imm.Val, 1620 AsmParser->hasInv2PiInlineImm()); 1621 } 1622 1623 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1624 return false; 1625 } 1626 1627 if (type.getScalarSizeInBits() == 16) { 1628 return isInlineableLiteralOp16( 1629 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1630 type, AsmParser->hasInv2PiInlineImm()); 1631 } 1632 1633 return AMDGPU::isInlinableLiteral32( 1634 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1635 AsmParser->hasInv2PiInlineImm()); 1636 } 1637 1638 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1639 // Check that this immediate can be added as literal 1640 if (!isImmTy(ImmTyNone)) { 1641 return false; 1642 } 1643 1644 if (!Imm.IsFPImm) { 1645 // We got int literal token. 1646 1647 if (type == MVT::f64 && hasFPModifiers()) { 1648 // Cannot apply fp modifiers to int literals preserving the same semantics 1649 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1650 // disable these cases. 1651 return false; 1652 } 1653 1654 unsigned Size = type.getSizeInBits(); 1655 if (Size == 64) 1656 Size = 32; 1657 1658 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1659 // types. 1660 return isSafeTruncation(Imm.Val, Size); 1661 } 1662 1663 // We got fp literal token 1664 if (type == MVT::f64) { // Expected 64-bit fp operand 1665 // We would set low 64-bits of literal to zeroes but we accept this literals 1666 return true; 1667 } 1668 1669 if (type == MVT::i64) { // Expected 64-bit int operand 1670 // We don't allow fp literals in 64-bit integer instructions. It is 1671 // unclear how we should encode them. 1672 return false; 1673 } 1674 1675 // We allow fp literals with f16x2 operands assuming that the specified 1676 // literal goes into the lower half and the upper half is zero. We also 1677 // require that the literal may be losslesly converted to f16. 1678 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1679 (type == MVT::v2i16)? MVT::i16 : type; 1680 1681 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1682 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1683 } 1684 1685 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1686 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1687 } 1688 1689 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1690 if (AsmParser->isVI()) 1691 return isVReg32(); 1692 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1693 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1694 else 1695 return false; 1696 } 1697 1698 bool AMDGPUOperand::isSDWAFP16Operand() const { 1699 return isSDWAOperand(MVT::f16); 1700 } 1701 1702 bool AMDGPUOperand::isSDWAFP32Operand() const { 1703 return isSDWAOperand(MVT::f32); 1704 } 1705 1706 bool AMDGPUOperand::isSDWAInt16Operand() const { 1707 return isSDWAOperand(MVT::i16); 1708 } 1709 1710 bool AMDGPUOperand::isSDWAInt32Operand() const { 1711 return isSDWAOperand(MVT::i32); 1712 } 1713 1714 bool AMDGPUOperand::isBoolReg() const { 1715 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1716 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1717 } 1718 1719 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1720 { 1721 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1722 assert(Size == 2 || Size == 4 || Size == 8); 1723 1724 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1725 1726 if (Imm.Mods.Abs) { 1727 Val &= ~FpSignMask; 1728 } 1729 if (Imm.Mods.Neg) { 1730 Val ^= FpSignMask; 1731 } 1732 1733 return Val; 1734 } 1735 1736 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1737 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1738 Inst.getNumOperands())) { 1739 addLiteralImmOperand(Inst, Imm.Val, 1740 ApplyModifiers & 1741 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1742 } else { 1743 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1744 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1745 } 1746 } 1747 1748 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1749 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1750 auto OpNum = Inst.getNumOperands(); 1751 // Check that this operand accepts literals 1752 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1753 1754 if (ApplyModifiers) { 1755 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1756 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1757 Val = applyInputFPModifiers(Val, Size); 1758 } 1759 1760 APInt Literal(64, Val); 1761 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1762 1763 if (Imm.IsFPImm) { // We got fp literal token 1764 switch (OpTy) { 1765 case AMDGPU::OPERAND_REG_IMM_INT64: 1766 case AMDGPU::OPERAND_REG_IMM_FP64: 1767 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1768 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1769 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1770 AsmParser->hasInv2PiInlineImm())) { 1771 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1772 return; 1773 } 1774 1775 // Non-inlineable 1776 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1777 // For fp operands we check if low 32 bits are zeros 1778 if (Literal.getLoBits(32) != 0) { 1779 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1780 "Can't encode literal as exact 64-bit floating-point operand. " 1781 "Low 32-bits will be set to zero"); 1782 } 1783 1784 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1785 return; 1786 } 1787 1788 // We don't allow fp literals in 64-bit integer instructions. It is 1789 // unclear how we should encode them. This case should be checked earlier 1790 // in predicate methods (isLiteralImm()) 1791 llvm_unreachable("fp literal in 64-bit integer instruction."); 1792 1793 case AMDGPU::OPERAND_REG_IMM_INT32: 1794 case AMDGPU::OPERAND_REG_IMM_FP32: 1795 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1796 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1798 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1799 case AMDGPU::OPERAND_REG_IMM_INT16: 1800 case AMDGPU::OPERAND_REG_IMM_FP16: 1801 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1802 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1803 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1804 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1805 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1806 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1807 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1809 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1810 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1811 bool lost; 1812 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1813 // Convert literal to single precision 1814 FPLiteral.convert(*getOpFltSemantics(OpTy), 1815 APFloat::rmNearestTiesToEven, &lost); 1816 // We allow precision lost but not overflow or underflow. This should be 1817 // checked earlier in isLiteralImm() 1818 1819 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1820 Inst.addOperand(MCOperand::createImm(ImmVal)); 1821 return; 1822 } 1823 default: 1824 llvm_unreachable("invalid operand size"); 1825 } 1826 1827 return; 1828 } 1829 1830 // We got int literal token. 1831 // Only sign extend inline immediates. 1832 switch (OpTy) { 1833 case AMDGPU::OPERAND_REG_IMM_INT32: 1834 case AMDGPU::OPERAND_REG_IMM_FP32: 1835 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1836 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1837 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1838 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1839 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1840 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1841 if (isSafeTruncation(Val, 32) && 1842 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1843 AsmParser->hasInv2PiInlineImm())) { 1844 Inst.addOperand(MCOperand::createImm(Val)); 1845 return; 1846 } 1847 1848 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1849 return; 1850 1851 case AMDGPU::OPERAND_REG_IMM_INT64: 1852 case AMDGPU::OPERAND_REG_IMM_FP64: 1853 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1854 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1855 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1856 Inst.addOperand(MCOperand::createImm(Val)); 1857 return; 1858 } 1859 1860 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1861 return; 1862 1863 case AMDGPU::OPERAND_REG_IMM_INT16: 1864 case AMDGPU::OPERAND_REG_IMM_FP16: 1865 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1866 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1867 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1868 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1869 if (isSafeTruncation(Val, 16) && 1870 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1871 AsmParser->hasInv2PiInlineImm())) { 1872 Inst.addOperand(MCOperand::createImm(Val)); 1873 return; 1874 } 1875 1876 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1877 return; 1878 1879 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1880 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1881 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1882 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1883 assert(isSafeTruncation(Val, 16)); 1884 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1885 AsmParser->hasInv2PiInlineImm())); 1886 1887 Inst.addOperand(MCOperand::createImm(Val)); 1888 return; 1889 } 1890 default: 1891 llvm_unreachable("invalid operand size"); 1892 } 1893 } 1894 1895 template <unsigned Bitwidth> 1896 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1897 APInt Literal(64, Imm.Val); 1898 1899 if (!Imm.IsFPImm) { 1900 // We got int literal token. 1901 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1902 return; 1903 } 1904 1905 bool Lost; 1906 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1907 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1908 APFloat::rmNearestTiesToEven, &Lost); 1909 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1910 } 1911 1912 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1913 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1914 } 1915 1916 static bool isInlineValue(unsigned Reg) { 1917 switch (Reg) { 1918 case AMDGPU::SRC_SHARED_BASE: 1919 case AMDGPU::SRC_SHARED_LIMIT: 1920 case AMDGPU::SRC_PRIVATE_BASE: 1921 case AMDGPU::SRC_PRIVATE_LIMIT: 1922 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1923 return true; 1924 case AMDGPU::SRC_VCCZ: 1925 case AMDGPU::SRC_EXECZ: 1926 case AMDGPU::SRC_SCC: 1927 return true; 1928 case AMDGPU::SGPR_NULL: 1929 return true; 1930 default: 1931 return false; 1932 } 1933 } 1934 1935 bool AMDGPUOperand::isInlineValue() const { 1936 return isRegKind() && ::isInlineValue(getReg()); 1937 } 1938 1939 //===----------------------------------------------------------------------===// 1940 // AsmParser 1941 //===----------------------------------------------------------------------===// 1942 1943 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1944 if (Is == IS_VGPR) { 1945 switch (RegWidth) { 1946 default: return -1; 1947 case 1: return AMDGPU::VGPR_32RegClassID; 1948 case 2: return AMDGPU::VReg_64RegClassID; 1949 case 3: return AMDGPU::VReg_96RegClassID; 1950 case 4: return AMDGPU::VReg_128RegClassID; 1951 case 5: return AMDGPU::VReg_160RegClassID; 1952 case 6: return AMDGPU::VReg_192RegClassID; 1953 case 8: return AMDGPU::VReg_256RegClassID; 1954 case 16: return AMDGPU::VReg_512RegClassID; 1955 case 32: return AMDGPU::VReg_1024RegClassID; 1956 } 1957 } else if (Is == IS_TTMP) { 1958 switch (RegWidth) { 1959 default: return -1; 1960 case 1: return AMDGPU::TTMP_32RegClassID; 1961 case 2: return AMDGPU::TTMP_64RegClassID; 1962 case 4: return AMDGPU::TTMP_128RegClassID; 1963 case 8: return AMDGPU::TTMP_256RegClassID; 1964 case 16: return AMDGPU::TTMP_512RegClassID; 1965 } 1966 } else if (Is == IS_SGPR) { 1967 switch (RegWidth) { 1968 default: return -1; 1969 case 1: return AMDGPU::SGPR_32RegClassID; 1970 case 2: return AMDGPU::SGPR_64RegClassID; 1971 case 3: return AMDGPU::SGPR_96RegClassID; 1972 case 4: return AMDGPU::SGPR_128RegClassID; 1973 case 5: return AMDGPU::SGPR_160RegClassID; 1974 case 6: return AMDGPU::SGPR_192RegClassID; 1975 case 8: return AMDGPU::SGPR_256RegClassID; 1976 case 16: return AMDGPU::SGPR_512RegClassID; 1977 } 1978 } else if (Is == IS_AGPR) { 1979 switch (RegWidth) { 1980 default: return -1; 1981 case 1: return AMDGPU::AGPR_32RegClassID; 1982 case 2: return AMDGPU::AReg_64RegClassID; 1983 case 3: return AMDGPU::AReg_96RegClassID; 1984 case 4: return AMDGPU::AReg_128RegClassID; 1985 case 5: return AMDGPU::AReg_160RegClassID; 1986 case 6: return AMDGPU::AReg_192RegClassID; 1987 case 8: return AMDGPU::AReg_256RegClassID; 1988 case 16: return AMDGPU::AReg_512RegClassID; 1989 case 32: return AMDGPU::AReg_1024RegClassID; 1990 } 1991 } 1992 return -1; 1993 } 1994 1995 static unsigned getSpecialRegForName(StringRef RegName) { 1996 return StringSwitch<unsigned>(RegName) 1997 .Case("exec", AMDGPU::EXEC) 1998 .Case("vcc", AMDGPU::VCC) 1999 .Case("flat_scratch", AMDGPU::FLAT_SCR) 2000 .Case("xnack_mask", AMDGPU::XNACK_MASK) 2001 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 2002 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 2003 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2004 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 2005 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 2006 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 2007 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2008 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 2009 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2010 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 2011 .Case("lds_direct", AMDGPU::LDS_DIRECT) 2012 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 2013 .Case("m0", AMDGPU::M0) 2014 .Case("vccz", AMDGPU::SRC_VCCZ) 2015 .Case("src_vccz", AMDGPU::SRC_VCCZ) 2016 .Case("execz", AMDGPU::SRC_EXECZ) 2017 .Case("src_execz", AMDGPU::SRC_EXECZ) 2018 .Case("scc", AMDGPU::SRC_SCC) 2019 .Case("src_scc", AMDGPU::SRC_SCC) 2020 .Case("tba", AMDGPU::TBA) 2021 .Case("tma", AMDGPU::TMA) 2022 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 2023 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 2024 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 2025 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 2026 .Case("vcc_lo", AMDGPU::VCC_LO) 2027 .Case("vcc_hi", AMDGPU::VCC_HI) 2028 .Case("exec_lo", AMDGPU::EXEC_LO) 2029 .Case("exec_hi", AMDGPU::EXEC_HI) 2030 .Case("tma_lo", AMDGPU::TMA_LO) 2031 .Case("tma_hi", AMDGPU::TMA_HI) 2032 .Case("tba_lo", AMDGPU::TBA_LO) 2033 .Case("tba_hi", AMDGPU::TBA_HI) 2034 .Case("pc", AMDGPU::PC_REG) 2035 .Case("null", AMDGPU::SGPR_NULL) 2036 .Default(AMDGPU::NoRegister); 2037 } 2038 2039 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2040 SMLoc &EndLoc, bool RestoreOnFailure) { 2041 auto R = parseRegister(); 2042 if (!R) return true; 2043 assert(R->isReg()); 2044 RegNo = R->getReg(); 2045 StartLoc = R->getStartLoc(); 2046 EndLoc = R->getEndLoc(); 2047 return false; 2048 } 2049 2050 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 2051 SMLoc &EndLoc) { 2052 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false); 2053 } 2054 2055 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo, 2056 SMLoc &StartLoc, 2057 SMLoc &EndLoc) { 2058 bool Result = 2059 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true); 2060 bool PendingErrors = getParser().hasPendingError(); 2061 getParser().clearPendingErrors(); 2062 if (PendingErrors) 2063 return MatchOperand_ParseFail; 2064 if (Result) 2065 return MatchOperand_NoMatch; 2066 return MatchOperand_Success; 2067 } 2068 2069 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 2070 RegisterKind RegKind, unsigned Reg1, 2071 SMLoc Loc) { 2072 switch (RegKind) { 2073 case IS_SPECIAL: 2074 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 2075 Reg = AMDGPU::EXEC; 2076 RegWidth = 2; 2077 return true; 2078 } 2079 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 2080 Reg = AMDGPU::FLAT_SCR; 2081 RegWidth = 2; 2082 return true; 2083 } 2084 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2085 Reg = AMDGPU::XNACK_MASK; 2086 RegWidth = 2; 2087 return true; 2088 } 2089 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2090 Reg = AMDGPU::VCC; 2091 RegWidth = 2; 2092 return true; 2093 } 2094 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2095 Reg = AMDGPU::TBA; 2096 RegWidth = 2; 2097 return true; 2098 } 2099 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2100 Reg = AMDGPU::TMA; 2101 RegWidth = 2; 2102 return true; 2103 } 2104 Error(Loc, "register does not fit in the list"); 2105 return false; 2106 case IS_VGPR: 2107 case IS_SGPR: 2108 case IS_AGPR: 2109 case IS_TTMP: 2110 if (Reg1 != Reg + RegWidth) { 2111 Error(Loc, "registers in a list must have consecutive indices"); 2112 return false; 2113 } 2114 RegWidth++; 2115 return true; 2116 default: 2117 llvm_unreachable("unexpected register kind"); 2118 } 2119 } 2120 2121 struct RegInfo { 2122 StringLiteral Name; 2123 RegisterKind Kind; 2124 }; 2125 2126 static constexpr RegInfo RegularRegisters[] = { 2127 {{"v"}, IS_VGPR}, 2128 {{"s"}, IS_SGPR}, 2129 {{"ttmp"}, IS_TTMP}, 2130 {{"acc"}, IS_AGPR}, 2131 {{"a"}, IS_AGPR}, 2132 }; 2133 2134 static bool isRegularReg(RegisterKind Kind) { 2135 return Kind == IS_VGPR || 2136 Kind == IS_SGPR || 2137 Kind == IS_TTMP || 2138 Kind == IS_AGPR; 2139 } 2140 2141 static const RegInfo* getRegularRegInfo(StringRef Str) { 2142 for (const RegInfo &Reg : RegularRegisters) 2143 if (Str.startswith(Reg.Name)) 2144 return &Reg; 2145 return nullptr; 2146 } 2147 2148 static bool getRegNum(StringRef Str, unsigned& Num) { 2149 return !Str.getAsInteger(10, Num); 2150 } 2151 2152 bool 2153 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2154 const AsmToken &NextToken) const { 2155 2156 // A list of consecutive registers: [s0,s1,s2,s3] 2157 if (Token.is(AsmToken::LBrac)) 2158 return true; 2159 2160 if (!Token.is(AsmToken::Identifier)) 2161 return false; 2162 2163 // A single register like s0 or a range of registers like s[0:1] 2164 2165 StringRef Str = Token.getString(); 2166 const RegInfo *Reg = getRegularRegInfo(Str); 2167 if (Reg) { 2168 StringRef RegName = Reg->Name; 2169 StringRef RegSuffix = Str.substr(RegName.size()); 2170 if (!RegSuffix.empty()) { 2171 unsigned Num; 2172 // A single register with an index: rXX 2173 if (getRegNum(RegSuffix, Num)) 2174 return true; 2175 } else { 2176 // A range of registers: r[XX:YY]. 2177 if (NextToken.is(AsmToken::LBrac)) 2178 return true; 2179 } 2180 } 2181 2182 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2183 } 2184 2185 bool 2186 AMDGPUAsmParser::isRegister() 2187 { 2188 return isRegister(getToken(), peekToken()); 2189 } 2190 2191 unsigned 2192 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2193 unsigned RegNum, 2194 unsigned RegWidth, 2195 SMLoc Loc) { 2196 2197 assert(isRegularReg(RegKind)); 2198 2199 unsigned AlignSize = 1; 2200 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2201 // SGPR and TTMP registers must be aligned. 2202 // Max required alignment is 4 dwords. 2203 AlignSize = std::min(RegWidth, 4u); 2204 } 2205 2206 if (RegNum % AlignSize != 0) { 2207 Error(Loc, "invalid register alignment"); 2208 return AMDGPU::NoRegister; 2209 } 2210 2211 unsigned RegIdx = RegNum / AlignSize; 2212 int RCID = getRegClass(RegKind, RegWidth); 2213 if (RCID == -1) { 2214 Error(Loc, "invalid or unsupported register size"); 2215 return AMDGPU::NoRegister; 2216 } 2217 2218 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2219 const MCRegisterClass RC = TRI->getRegClass(RCID); 2220 if (RegIdx >= RC.getNumRegs()) { 2221 Error(Loc, "register index is out of range"); 2222 return AMDGPU::NoRegister; 2223 } 2224 2225 return RC.getRegister(RegIdx); 2226 } 2227 2228 bool 2229 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2230 int64_t RegLo, RegHi; 2231 if (!skipToken(AsmToken::LBrac, "missing register index")) 2232 return false; 2233 2234 SMLoc FirstIdxLoc = getLoc(); 2235 SMLoc SecondIdxLoc; 2236 2237 if (!parseExpr(RegLo)) 2238 return false; 2239 2240 if (trySkipToken(AsmToken::Colon)) { 2241 SecondIdxLoc = getLoc(); 2242 if (!parseExpr(RegHi)) 2243 return false; 2244 } else { 2245 RegHi = RegLo; 2246 } 2247 2248 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 2249 return false; 2250 2251 if (!isUInt<32>(RegLo)) { 2252 Error(FirstIdxLoc, "invalid register index"); 2253 return false; 2254 } 2255 2256 if (!isUInt<32>(RegHi)) { 2257 Error(SecondIdxLoc, "invalid register index"); 2258 return false; 2259 } 2260 2261 if (RegLo > RegHi) { 2262 Error(FirstIdxLoc, "first register index should not exceed second index"); 2263 return false; 2264 } 2265 2266 Num = static_cast<unsigned>(RegLo); 2267 Width = (RegHi - RegLo) + 1; 2268 return true; 2269 } 2270 2271 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2272 unsigned &RegNum, unsigned &RegWidth, 2273 SmallVectorImpl<AsmToken> &Tokens) { 2274 assert(isToken(AsmToken::Identifier)); 2275 unsigned Reg = getSpecialRegForName(getTokenStr()); 2276 if (Reg) { 2277 RegNum = 0; 2278 RegWidth = 1; 2279 RegKind = IS_SPECIAL; 2280 Tokens.push_back(getToken()); 2281 lex(); // skip register name 2282 } 2283 return Reg; 2284 } 2285 2286 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2287 unsigned &RegNum, unsigned &RegWidth, 2288 SmallVectorImpl<AsmToken> &Tokens) { 2289 assert(isToken(AsmToken::Identifier)); 2290 StringRef RegName = getTokenStr(); 2291 auto Loc = getLoc(); 2292 2293 const RegInfo *RI = getRegularRegInfo(RegName); 2294 if (!RI) { 2295 Error(Loc, "invalid register name"); 2296 return AMDGPU::NoRegister; 2297 } 2298 2299 Tokens.push_back(getToken()); 2300 lex(); // skip register name 2301 2302 RegKind = RI->Kind; 2303 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2304 if (!RegSuffix.empty()) { 2305 // Single 32-bit register: vXX. 2306 if (!getRegNum(RegSuffix, RegNum)) { 2307 Error(Loc, "invalid register index"); 2308 return AMDGPU::NoRegister; 2309 } 2310 RegWidth = 1; 2311 } else { 2312 // Range of registers: v[XX:YY]. ":YY" is optional. 2313 if (!ParseRegRange(RegNum, RegWidth)) 2314 return AMDGPU::NoRegister; 2315 } 2316 2317 return getRegularReg(RegKind, RegNum, RegWidth, Loc); 2318 } 2319 2320 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum, 2321 unsigned &RegWidth, 2322 SmallVectorImpl<AsmToken> &Tokens) { 2323 unsigned Reg = AMDGPU::NoRegister; 2324 auto ListLoc = getLoc(); 2325 2326 if (!skipToken(AsmToken::LBrac, 2327 "expected a register or a list of registers")) { 2328 return AMDGPU::NoRegister; 2329 } 2330 2331 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2332 2333 auto Loc = getLoc(); 2334 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2335 return AMDGPU::NoRegister; 2336 if (RegWidth != 1) { 2337 Error(Loc, "expected a single 32-bit register"); 2338 return AMDGPU::NoRegister; 2339 } 2340 2341 for (; trySkipToken(AsmToken::Comma); ) { 2342 RegisterKind NextRegKind; 2343 unsigned NextReg, NextRegNum, NextRegWidth; 2344 Loc = getLoc(); 2345 2346 if (!ParseAMDGPURegister(NextRegKind, NextReg, 2347 NextRegNum, NextRegWidth, 2348 Tokens)) { 2349 return AMDGPU::NoRegister; 2350 } 2351 if (NextRegWidth != 1) { 2352 Error(Loc, "expected a single 32-bit register"); 2353 return AMDGPU::NoRegister; 2354 } 2355 if (NextRegKind != RegKind) { 2356 Error(Loc, "registers in a list must be of the same kind"); 2357 return AMDGPU::NoRegister; 2358 } 2359 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc)) 2360 return AMDGPU::NoRegister; 2361 } 2362 2363 if (!skipToken(AsmToken::RBrac, 2364 "expected a comma or a closing square bracket")) { 2365 return AMDGPU::NoRegister; 2366 } 2367 2368 if (isRegularReg(RegKind)) 2369 Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc); 2370 2371 return Reg; 2372 } 2373 2374 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2375 unsigned &RegNum, unsigned &RegWidth, 2376 SmallVectorImpl<AsmToken> &Tokens) { 2377 auto Loc = getLoc(); 2378 Reg = AMDGPU::NoRegister; 2379 2380 if (isToken(AsmToken::Identifier)) { 2381 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens); 2382 if (Reg == AMDGPU::NoRegister) 2383 Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens); 2384 } else { 2385 Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens); 2386 } 2387 2388 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2389 if (Reg == AMDGPU::NoRegister) { 2390 assert(Parser.hasPendingError()); 2391 return false; 2392 } 2393 2394 if (!subtargetHasRegister(*TRI, Reg)) { 2395 if (Reg == AMDGPU::SGPR_NULL) { 2396 Error(Loc, "'null' operand is not supported on this GPU"); 2397 } else { 2398 Error(Loc, "register not available on this GPU"); 2399 } 2400 return false; 2401 } 2402 2403 return true; 2404 } 2405 2406 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2407 unsigned &RegNum, unsigned &RegWidth, 2408 bool RestoreOnFailure /*=false*/) { 2409 Reg = AMDGPU::NoRegister; 2410 2411 SmallVector<AsmToken, 1> Tokens; 2412 if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) { 2413 if (RestoreOnFailure) { 2414 while (!Tokens.empty()) { 2415 getLexer().UnLex(Tokens.pop_back_val()); 2416 } 2417 } 2418 return true; 2419 } 2420 return false; 2421 } 2422 2423 Optional<StringRef> 2424 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2425 switch (RegKind) { 2426 case IS_VGPR: 2427 return StringRef(".amdgcn.next_free_vgpr"); 2428 case IS_SGPR: 2429 return StringRef(".amdgcn.next_free_sgpr"); 2430 default: 2431 return None; 2432 } 2433 } 2434 2435 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2436 auto SymbolName = getGprCountSymbolName(RegKind); 2437 assert(SymbolName && "initializing invalid register kind"); 2438 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2439 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2440 } 2441 2442 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2443 unsigned DwordRegIndex, 2444 unsigned RegWidth) { 2445 // Symbols are only defined for GCN targets 2446 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2447 return true; 2448 2449 auto SymbolName = getGprCountSymbolName(RegKind); 2450 if (!SymbolName) 2451 return true; 2452 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2453 2454 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2455 int64_t OldCount; 2456 2457 if (!Sym->isVariable()) 2458 return !Error(getParser().getTok().getLoc(), 2459 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2460 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2461 return !Error( 2462 getParser().getTok().getLoc(), 2463 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2464 2465 if (OldCount <= NewMax) 2466 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2467 2468 return true; 2469 } 2470 2471 std::unique_ptr<AMDGPUOperand> 2472 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) { 2473 const auto &Tok = Parser.getTok(); 2474 SMLoc StartLoc = Tok.getLoc(); 2475 SMLoc EndLoc = Tok.getEndLoc(); 2476 RegisterKind RegKind; 2477 unsigned Reg, RegNum, RegWidth; 2478 2479 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2480 return nullptr; 2481 } 2482 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2483 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2484 return nullptr; 2485 } else 2486 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2487 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2488 } 2489 2490 OperandMatchResultTy 2491 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2492 // TODO: add syntactic sugar for 1/(2*PI) 2493 2494 assert(!isRegister()); 2495 assert(!isModifier()); 2496 2497 const auto& Tok = getToken(); 2498 const auto& NextTok = peekToken(); 2499 bool IsReal = Tok.is(AsmToken::Real); 2500 SMLoc S = getLoc(); 2501 bool Negate = false; 2502 2503 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2504 lex(); 2505 IsReal = true; 2506 Negate = true; 2507 } 2508 2509 if (IsReal) { 2510 // Floating-point expressions are not supported. 2511 // Can only allow floating-point literals with an 2512 // optional sign. 2513 2514 StringRef Num = getTokenStr(); 2515 lex(); 2516 2517 APFloat RealVal(APFloat::IEEEdouble()); 2518 auto roundMode = APFloat::rmNearestTiesToEven; 2519 if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) { 2520 return MatchOperand_ParseFail; 2521 } 2522 if (Negate) 2523 RealVal.changeSign(); 2524 2525 Operands.push_back( 2526 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2527 AMDGPUOperand::ImmTyNone, true)); 2528 2529 return MatchOperand_Success; 2530 2531 } else { 2532 int64_t IntVal; 2533 const MCExpr *Expr; 2534 SMLoc S = getLoc(); 2535 2536 if (HasSP3AbsModifier) { 2537 // This is a workaround for handling expressions 2538 // as arguments of SP3 'abs' modifier, for example: 2539 // |1.0| 2540 // |-1| 2541 // |1+x| 2542 // This syntax is not compatible with syntax of standard 2543 // MC expressions (due to the trailing '|'). 2544 SMLoc EndLoc; 2545 if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr)) 2546 return MatchOperand_ParseFail; 2547 } else { 2548 if (Parser.parseExpression(Expr)) 2549 return MatchOperand_ParseFail; 2550 } 2551 2552 if (Expr->evaluateAsAbsolute(IntVal)) { 2553 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2554 } else { 2555 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2556 } 2557 2558 return MatchOperand_Success; 2559 } 2560 2561 return MatchOperand_NoMatch; 2562 } 2563 2564 OperandMatchResultTy 2565 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2566 if (!isRegister()) 2567 return MatchOperand_NoMatch; 2568 2569 if (auto R = parseRegister()) { 2570 assert(R->isReg()); 2571 Operands.push_back(std::move(R)); 2572 return MatchOperand_Success; 2573 } 2574 return MatchOperand_ParseFail; 2575 } 2576 2577 OperandMatchResultTy 2578 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2579 auto res = parseReg(Operands); 2580 if (res != MatchOperand_NoMatch) { 2581 return res; 2582 } else if (isModifier()) { 2583 return MatchOperand_NoMatch; 2584 } else { 2585 return parseImm(Operands, HasSP3AbsMod); 2586 } 2587 } 2588 2589 bool 2590 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2591 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2592 const auto &str = Token.getString(); 2593 return str == "abs" || str == "neg" || str == "sext"; 2594 } 2595 return false; 2596 } 2597 2598 bool 2599 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2600 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2601 } 2602 2603 bool 2604 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2605 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2606 } 2607 2608 bool 2609 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2610 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2611 } 2612 2613 // Check if this is an operand modifier or an opcode modifier 2614 // which may look like an expression but it is not. We should 2615 // avoid parsing these modifiers as expressions. Currently 2616 // recognized sequences are: 2617 // |...| 2618 // abs(...) 2619 // neg(...) 2620 // sext(...) 2621 // -reg 2622 // -|...| 2623 // -abs(...) 2624 // name:... 2625 // Note that simple opcode modifiers like 'gds' may be parsed as 2626 // expressions; this is a special case. See getExpressionAsToken. 2627 // 2628 bool 2629 AMDGPUAsmParser::isModifier() { 2630 2631 AsmToken Tok = getToken(); 2632 AsmToken NextToken[2]; 2633 peekTokens(NextToken); 2634 2635 return isOperandModifier(Tok, NextToken[0]) || 2636 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2637 isOpcodeModifierWithVal(Tok, NextToken[0]); 2638 } 2639 2640 // Check if the current token is an SP3 'neg' modifier. 2641 // Currently this modifier is allowed in the following context: 2642 // 2643 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2644 // 2. Before an 'abs' modifier: -abs(...) 2645 // 3. Before an SP3 'abs' modifier: -|...| 2646 // 2647 // In all other cases "-" is handled as a part 2648 // of an expression that follows the sign. 2649 // 2650 // Note: When "-" is followed by an integer literal, 2651 // this is interpreted as integer negation rather 2652 // than a floating-point NEG modifier applied to N. 2653 // Beside being contr-intuitive, such use of floating-point 2654 // NEG modifier would have resulted in different meaning 2655 // of integer literals used with VOP1/2/C and VOP3, 2656 // for example: 2657 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2658 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2659 // Negative fp literals with preceding "-" are 2660 // handled likewise for unifomtity 2661 // 2662 bool 2663 AMDGPUAsmParser::parseSP3NegModifier() { 2664 2665 AsmToken NextToken[2]; 2666 peekTokens(NextToken); 2667 2668 if (isToken(AsmToken::Minus) && 2669 (isRegister(NextToken[0], NextToken[1]) || 2670 NextToken[0].is(AsmToken::Pipe) || 2671 isId(NextToken[0], "abs"))) { 2672 lex(); 2673 return true; 2674 } 2675 2676 return false; 2677 } 2678 2679 OperandMatchResultTy 2680 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2681 bool AllowImm) { 2682 bool Neg, SP3Neg; 2683 bool Abs, SP3Abs; 2684 SMLoc Loc; 2685 2686 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2687 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2688 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2689 return MatchOperand_ParseFail; 2690 } 2691 2692 SP3Neg = parseSP3NegModifier(); 2693 2694 Loc = getLoc(); 2695 Neg = trySkipId("neg"); 2696 if (Neg && SP3Neg) { 2697 Error(Loc, "expected register or immediate"); 2698 return MatchOperand_ParseFail; 2699 } 2700 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2701 return MatchOperand_ParseFail; 2702 2703 Abs = trySkipId("abs"); 2704 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2705 return MatchOperand_ParseFail; 2706 2707 Loc = getLoc(); 2708 SP3Abs = trySkipToken(AsmToken::Pipe); 2709 if (Abs && SP3Abs) { 2710 Error(Loc, "expected register or immediate"); 2711 return MatchOperand_ParseFail; 2712 } 2713 2714 OperandMatchResultTy Res; 2715 if (AllowImm) { 2716 Res = parseRegOrImm(Operands, SP3Abs); 2717 } else { 2718 Res = parseReg(Operands); 2719 } 2720 if (Res != MatchOperand_Success) { 2721 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2722 } 2723 2724 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2725 return MatchOperand_ParseFail; 2726 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2727 return MatchOperand_ParseFail; 2728 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2729 return MatchOperand_ParseFail; 2730 2731 AMDGPUOperand::Modifiers Mods; 2732 Mods.Abs = Abs || SP3Abs; 2733 Mods.Neg = Neg || SP3Neg; 2734 2735 if (Mods.hasFPModifiers()) { 2736 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2737 if (Op.isExpr()) { 2738 Error(Op.getStartLoc(), "expected an absolute expression"); 2739 return MatchOperand_ParseFail; 2740 } 2741 Op.setModifiers(Mods); 2742 } 2743 return MatchOperand_Success; 2744 } 2745 2746 OperandMatchResultTy 2747 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2748 bool AllowImm) { 2749 bool Sext = trySkipId("sext"); 2750 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2751 return MatchOperand_ParseFail; 2752 2753 OperandMatchResultTy Res; 2754 if (AllowImm) { 2755 Res = parseRegOrImm(Operands); 2756 } else { 2757 Res = parseReg(Operands); 2758 } 2759 if (Res != MatchOperand_Success) { 2760 return Sext? MatchOperand_ParseFail : Res; 2761 } 2762 2763 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2764 return MatchOperand_ParseFail; 2765 2766 AMDGPUOperand::Modifiers Mods; 2767 Mods.Sext = Sext; 2768 2769 if (Mods.hasIntModifiers()) { 2770 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2771 if (Op.isExpr()) { 2772 Error(Op.getStartLoc(), "expected an absolute expression"); 2773 return MatchOperand_ParseFail; 2774 } 2775 Op.setModifiers(Mods); 2776 } 2777 2778 return MatchOperand_Success; 2779 } 2780 2781 OperandMatchResultTy 2782 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2783 return parseRegOrImmWithFPInputMods(Operands, false); 2784 } 2785 2786 OperandMatchResultTy 2787 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2788 return parseRegOrImmWithIntInputMods(Operands, false); 2789 } 2790 2791 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2792 auto Loc = getLoc(); 2793 if (trySkipId("off")) { 2794 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2795 AMDGPUOperand::ImmTyOff, false)); 2796 return MatchOperand_Success; 2797 } 2798 2799 if (!isRegister()) 2800 return MatchOperand_NoMatch; 2801 2802 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2803 if (Reg) { 2804 Operands.push_back(std::move(Reg)); 2805 return MatchOperand_Success; 2806 } 2807 2808 return MatchOperand_ParseFail; 2809 2810 } 2811 2812 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2813 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2814 2815 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2816 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2817 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2818 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2819 return Match_InvalidOperand; 2820 2821 if ((TSFlags & SIInstrFlags::VOP3) && 2822 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2823 getForcedEncodingSize() != 64) 2824 return Match_PreferE32; 2825 2826 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2827 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2828 // v_mac_f32/16 allow only dst_sel == DWORD; 2829 auto OpNum = 2830 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2831 const auto &Op = Inst.getOperand(OpNum); 2832 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2833 return Match_InvalidOperand; 2834 } 2835 } 2836 2837 return Match_Success; 2838 } 2839 2840 // What asm variants we should check 2841 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2842 if (getForcedEncodingSize() == 32) { 2843 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2844 return makeArrayRef(Variants); 2845 } 2846 2847 if (isForcedVOP3()) { 2848 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2849 return makeArrayRef(Variants); 2850 } 2851 2852 if (isForcedSDWA()) { 2853 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2854 AMDGPUAsmVariants::SDWA9}; 2855 return makeArrayRef(Variants); 2856 } 2857 2858 if (isForcedDPP()) { 2859 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2860 return makeArrayRef(Variants); 2861 } 2862 2863 static const unsigned Variants[] = { 2864 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2865 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2866 }; 2867 2868 return makeArrayRef(Variants); 2869 } 2870 2871 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2872 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2873 const unsigned Num = Desc.getNumImplicitUses(); 2874 for (unsigned i = 0; i < Num; ++i) { 2875 unsigned Reg = Desc.ImplicitUses[i]; 2876 switch (Reg) { 2877 case AMDGPU::FLAT_SCR: 2878 case AMDGPU::VCC: 2879 case AMDGPU::VCC_LO: 2880 case AMDGPU::VCC_HI: 2881 case AMDGPU::M0: 2882 return Reg; 2883 default: 2884 break; 2885 } 2886 } 2887 return AMDGPU::NoRegister; 2888 } 2889 2890 // NB: This code is correct only when used to check constant 2891 // bus limitations because GFX7 support no f16 inline constants. 2892 // Note that there are no cases when a GFX7 opcode violates 2893 // constant bus limitations due to the use of an f16 constant. 2894 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2895 unsigned OpIdx) const { 2896 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2897 2898 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2899 return false; 2900 } 2901 2902 const MCOperand &MO = Inst.getOperand(OpIdx); 2903 2904 int64_t Val = MO.getImm(); 2905 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2906 2907 switch (OpSize) { // expected operand size 2908 case 8: 2909 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2910 case 4: 2911 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2912 case 2: { 2913 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2914 if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 || 2915 OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 || 2916 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16) 2917 return AMDGPU::isInlinableIntLiteral(Val); 2918 2919 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2920 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2921 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16) 2922 return AMDGPU::isInlinableIntLiteralV216(Val); 2923 2924 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2925 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2926 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) 2927 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2928 2929 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2930 } 2931 default: 2932 llvm_unreachable("invalid operand size"); 2933 } 2934 } 2935 2936 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2937 if (!isGFX10()) 2938 return 1; 2939 2940 switch (Opcode) { 2941 // 64-bit shift instructions can use only one scalar value input 2942 case AMDGPU::V_LSHLREV_B64: 2943 case AMDGPU::V_LSHLREV_B64_gfx10: 2944 case AMDGPU::V_LSHL_B64: 2945 case AMDGPU::V_LSHRREV_B64: 2946 case AMDGPU::V_LSHRREV_B64_gfx10: 2947 case AMDGPU::V_LSHR_B64: 2948 case AMDGPU::V_ASHRREV_I64: 2949 case AMDGPU::V_ASHRREV_I64_gfx10: 2950 case AMDGPU::V_ASHR_I64: 2951 return 1; 2952 default: 2953 return 2; 2954 } 2955 } 2956 2957 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2958 const MCOperand &MO = Inst.getOperand(OpIdx); 2959 if (MO.isImm()) { 2960 return !isInlineConstant(Inst, OpIdx); 2961 } else if (MO.isReg()) { 2962 auto Reg = MO.getReg(); 2963 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2964 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2965 } else { 2966 return true; 2967 } 2968 } 2969 2970 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2971 const unsigned Opcode = Inst.getOpcode(); 2972 const MCInstrDesc &Desc = MII.get(Opcode); 2973 unsigned ConstantBusUseCount = 0; 2974 unsigned NumLiterals = 0; 2975 unsigned LiteralSize; 2976 2977 if (Desc.TSFlags & 2978 (SIInstrFlags::VOPC | 2979 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2980 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2981 SIInstrFlags::SDWA)) { 2982 // Check special imm operands (used by madmk, etc) 2983 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2984 ++ConstantBusUseCount; 2985 } 2986 2987 SmallDenseSet<unsigned> SGPRsUsed; 2988 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2989 if (SGPRUsed != AMDGPU::NoRegister) { 2990 SGPRsUsed.insert(SGPRUsed); 2991 ++ConstantBusUseCount; 2992 } 2993 2994 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2995 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2996 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2997 2998 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2999 3000 for (int OpIdx : OpIndices) { 3001 if (OpIdx == -1) break; 3002 3003 const MCOperand &MO = Inst.getOperand(OpIdx); 3004 if (usesConstantBus(Inst, OpIdx)) { 3005 if (MO.isReg()) { 3006 const unsigned Reg = mc2PseudoReg(MO.getReg()); 3007 // Pairs of registers with a partial intersections like these 3008 // s0, s[0:1] 3009 // flat_scratch_lo, flat_scratch 3010 // flat_scratch_lo, flat_scratch_hi 3011 // are theoretically valid but they are disabled anyway. 3012 // Note that this code mimics SIInstrInfo::verifyInstruction 3013 if (!SGPRsUsed.count(Reg)) { 3014 SGPRsUsed.insert(Reg); 3015 ++ConstantBusUseCount; 3016 } 3017 } else { // Expression or a literal 3018 3019 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 3020 continue; // special operand like VINTERP attr_chan 3021 3022 // An instruction may use only one literal. 3023 // This has been validated on the previous step. 3024 // See validateVOP3Literal. 3025 // This literal may be used as more than one operand. 3026 // If all these operands are of the same size, 3027 // this literal counts as one scalar value. 3028 // Otherwise it counts as 2 scalar values. 3029 // See "GFX10 Shader Programming", section 3.6.2.3. 3030 3031 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 3032 if (Size < 4) Size = 4; 3033 3034 if (NumLiterals == 0) { 3035 NumLiterals = 1; 3036 LiteralSize = Size; 3037 } else if (LiteralSize != Size) { 3038 NumLiterals = 2; 3039 } 3040 } 3041 } 3042 } 3043 } 3044 ConstantBusUseCount += NumLiterals; 3045 3046 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 3047 } 3048 3049 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 3050 const unsigned Opcode = Inst.getOpcode(); 3051 const MCInstrDesc &Desc = MII.get(Opcode); 3052 3053 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 3054 if (DstIdx == -1 || 3055 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 3056 return true; 3057 } 3058 3059 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3060 3061 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3062 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3063 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3064 3065 assert(DstIdx != -1); 3066 const MCOperand &Dst = Inst.getOperand(DstIdx); 3067 assert(Dst.isReg()); 3068 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 3069 3070 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3071 3072 for (int SrcIdx : SrcIndices) { 3073 if (SrcIdx == -1) break; 3074 const MCOperand &Src = Inst.getOperand(SrcIdx); 3075 if (Src.isReg()) { 3076 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 3077 if (isRegIntersect(DstReg, SrcReg, TRI)) { 3078 return false; 3079 } 3080 } 3081 } 3082 3083 return true; 3084 } 3085 3086 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 3087 3088 const unsigned Opc = Inst.getOpcode(); 3089 const MCInstrDesc &Desc = MII.get(Opc); 3090 3091 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 3092 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 3093 assert(ClampIdx != -1); 3094 return Inst.getOperand(ClampIdx).getImm() == 0; 3095 } 3096 3097 return true; 3098 } 3099 3100 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 3101 3102 const unsigned Opc = Inst.getOpcode(); 3103 const MCInstrDesc &Desc = MII.get(Opc); 3104 3105 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3106 return true; 3107 3108 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 3109 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3110 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 3111 3112 assert(VDataIdx != -1); 3113 3114 if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray 3115 return true; 3116 3117 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 3118 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 3119 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3120 if (DMask == 0) 3121 DMask = 1; 3122 3123 unsigned DataSize = 3124 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 3125 if (hasPackedD16()) { 3126 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3127 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 3128 DataSize = (DataSize + 1) / 2; 3129 } 3130 3131 return (VDataSize / 4) == DataSize + TFESize; 3132 } 3133 3134 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 3135 const unsigned Opc = Inst.getOpcode(); 3136 const MCInstrDesc &Desc = MII.get(Opc); 3137 3138 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 3139 return true; 3140 3141 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 3142 3143 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 3144 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 3145 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 3146 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 3147 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3148 3149 assert(VAddr0Idx != -1); 3150 assert(SrsrcIdx != -1); 3151 assert(SrsrcIdx > VAddr0Idx); 3152 3153 if (DimIdx == -1) 3154 return true; // intersect_ray 3155 3156 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 3157 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 3158 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 3159 unsigned VAddrSize = 3160 IsNSA ? SrsrcIdx - VAddr0Idx 3161 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 3162 3163 unsigned AddrSize = BaseOpcode->NumExtraArgs + 3164 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 3165 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 3166 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3167 if (!IsNSA) { 3168 if (AddrSize > 8) 3169 AddrSize = 16; 3170 else if (AddrSize > 4) 3171 AddrSize = 8; 3172 } 3173 3174 return VAddrSize == AddrSize; 3175 } 3176 3177 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3178 3179 const unsigned Opc = Inst.getOpcode(); 3180 const MCInstrDesc &Desc = MII.get(Opc); 3181 3182 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3183 return true; 3184 if (!Desc.mayLoad() || !Desc.mayStore()) 3185 return true; // Not atomic 3186 3187 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3188 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3189 3190 // This is an incomplete check because image_atomic_cmpswap 3191 // may only use 0x3 and 0xf while other atomic operations 3192 // may use 0x1 and 0x3. However these limitations are 3193 // verified when we check that dmask matches dst size. 3194 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3195 } 3196 3197 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3198 3199 const unsigned Opc = Inst.getOpcode(); 3200 const MCInstrDesc &Desc = MII.get(Opc); 3201 3202 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3203 return true; 3204 3205 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3206 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3207 3208 // GATHER4 instructions use dmask in a different fashion compared to 3209 // other MIMG instructions. The only useful DMASK values are 3210 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3211 // (red,red,red,red) etc.) The ISA document doesn't mention 3212 // this. 3213 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3214 } 3215 3216 static bool IsMovrelsSDWAOpcode(const unsigned Opcode) 3217 { 3218 switch (Opcode) { 3219 case AMDGPU::V_MOVRELS_B32_sdwa_gfx10: 3220 case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10: 3221 case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10: 3222 return true; 3223 default: 3224 return false; 3225 } 3226 } 3227 3228 // movrels* opcodes should only allow VGPRS as src0. 3229 // This is specified in .td description for vop1/vop3, 3230 // but sdwa is handled differently. See isSDWAOperand. 3231 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst) { 3232 3233 const unsigned Opc = Inst.getOpcode(); 3234 const MCInstrDesc &Desc = MII.get(Opc); 3235 3236 if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc)) 3237 return true; 3238 3239 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3240 assert(Src0Idx != -1); 3241 3242 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3243 if (!Src0.isReg()) 3244 return false; 3245 3246 auto Reg = Src0.getReg(); 3247 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3248 return !isSGPR(mc2PseudoReg(Reg), TRI); 3249 } 3250 3251 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst) { 3252 3253 const unsigned Opc = Inst.getOpcode(); 3254 3255 if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi) 3256 return true; 3257 3258 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0); 3259 assert(Src0Idx != -1); 3260 3261 const MCOperand &Src0 = Inst.getOperand(Src0Idx); 3262 if (!Src0.isReg()) 3263 return true; 3264 3265 auto Reg = Src0.getReg(); 3266 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 3267 if (isSGPR(mc2PseudoReg(Reg), TRI)) { 3268 Error(getLoc(), "source operand must be either a VGPR or an inline constant"); 3269 return false; 3270 } 3271 3272 return true; 3273 } 3274 3275 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3276 3277 const unsigned Opc = Inst.getOpcode(); 3278 const MCInstrDesc &Desc = MII.get(Opc); 3279 3280 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3281 return true; 3282 3283 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3284 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3285 if (isCI() || isSI()) 3286 return false; 3287 } 3288 3289 return true; 3290 } 3291 3292 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3293 const unsigned Opc = Inst.getOpcode(); 3294 const MCInstrDesc &Desc = MII.get(Opc); 3295 3296 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3297 return true; 3298 3299 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3300 if (DimIdx < 0) 3301 return true; 3302 3303 long Imm = Inst.getOperand(DimIdx).getImm(); 3304 if (Imm < 0 || Imm >= 8) 3305 return false; 3306 3307 return true; 3308 } 3309 3310 static bool IsRevOpcode(const unsigned Opcode) 3311 { 3312 switch (Opcode) { 3313 case AMDGPU::V_SUBREV_F32_e32: 3314 case AMDGPU::V_SUBREV_F32_e64: 3315 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3316 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3317 case AMDGPU::V_SUBREV_F32_e32_vi: 3318 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3319 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3320 case AMDGPU::V_SUBREV_F32_e64_vi: 3321 3322 case AMDGPU::V_SUBREV_CO_U32_e32: 3323 case AMDGPU::V_SUBREV_CO_U32_e64: 3324 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3325 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3326 3327 case AMDGPU::V_SUBBREV_U32_e32: 3328 case AMDGPU::V_SUBBREV_U32_e64: 3329 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3330 case AMDGPU::V_SUBBREV_U32_e32_vi: 3331 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3332 case AMDGPU::V_SUBBREV_U32_e64_vi: 3333 3334 case AMDGPU::V_SUBREV_U32_e32: 3335 case AMDGPU::V_SUBREV_U32_e64: 3336 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3337 case AMDGPU::V_SUBREV_U32_e32_vi: 3338 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3339 case AMDGPU::V_SUBREV_U32_e64_vi: 3340 3341 case AMDGPU::V_SUBREV_F16_e32: 3342 case AMDGPU::V_SUBREV_F16_e64: 3343 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3344 case AMDGPU::V_SUBREV_F16_e32_vi: 3345 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3346 case AMDGPU::V_SUBREV_F16_e64_vi: 3347 3348 case AMDGPU::V_SUBREV_U16_e32: 3349 case AMDGPU::V_SUBREV_U16_e64: 3350 case AMDGPU::V_SUBREV_U16_e32_vi: 3351 case AMDGPU::V_SUBREV_U16_e64_vi: 3352 3353 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3354 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3355 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3356 3357 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3358 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3359 3360 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3361 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3362 3363 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3364 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3365 3366 case AMDGPU::V_LSHRREV_B32_e32: 3367 case AMDGPU::V_LSHRREV_B32_e64: 3368 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3369 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3370 case AMDGPU::V_LSHRREV_B32_e32_vi: 3371 case AMDGPU::V_LSHRREV_B32_e64_vi: 3372 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3373 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3374 3375 case AMDGPU::V_ASHRREV_I32_e32: 3376 case AMDGPU::V_ASHRREV_I32_e64: 3377 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3378 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3379 case AMDGPU::V_ASHRREV_I32_e32_vi: 3380 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3381 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3382 case AMDGPU::V_ASHRREV_I32_e64_vi: 3383 3384 case AMDGPU::V_LSHLREV_B32_e32: 3385 case AMDGPU::V_LSHLREV_B32_e64: 3386 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3387 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3388 case AMDGPU::V_LSHLREV_B32_e32_vi: 3389 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3390 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3391 case AMDGPU::V_LSHLREV_B32_e64_vi: 3392 3393 case AMDGPU::V_LSHLREV_B16_e32: 3394 case AMDGPU::V_LSHLREV_B16_e64: 3395 case AMDGPU::V_LSHLREV_B16_e32_vi: 3396 case AMDGPU::V_LSHLREV_B16_e64_vi: 3397 case AMDGPU::V_LSHLREV_B16_gfx10: 3398 3399 case AMDGPU::V_LSHRREV_B16_e32: 3400 case AMDGPU::V_LSHRREV_B16_e64: 3401 case AMDGPU::V_LSHRREV_B16_e32_vi: 3402 case AMDGPU::V_LSHRREV_B16_e64_vi: 3403 case AMDGPU::V_LSHRREV_B16_gfx10: 3404 3405 case AMDGPU::V_ASHRREV_I16_e32: 3406 case AMDGPU::V_ASHRREV_I16_e64: 3407 case AMDGPU::V_ASHRREV_I16_e32_vi: 3408 case AMDGPU::V_ASHRREV_I16_e64_vi: 3409 case AMDGPU::V_ASHRREV_I16_gfx10: 3410 3411 case AMDGPU::V_LSHLREV_B64: 3412 case AMDGPU::V_LSHLREV_B64_gfx10: 3413 case AMDGPU::V_LSHLREV_B64_vi: 3414 3415 case AMDGPU::V_LSHRREV_B64: 3416 case AMDGPU::V_LSHRREV_B64_gfx10: 3417 case AMDGPU::V_LSHRREV_B64_vi: 3418 3419 case AMDGPU::V_ASHRREV_I64: 3420 case AMDGPU::V_ASHRREV_I64_gfx10: 3421 case AMDGPU::V_ASHRREV_I64_vi: 3422 3423 case AMDGPU::V_PK_LSHLREV_B16: 3424 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3425 case AMDGPU::V_PK_LSHLREV_B16_vi: 3426 3427 case AMDGPU::V_PK_LSHRREV_B16: 3428 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3429 case AMDGPU::V_PK_LSHRREV_B16_vi: 3430 case AMDGPU::V_PK_ASHRREV_I16: 3431 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3432 case AMDGPU::V_PK_ASHRREV_I16_vi: 3433 return true; 3434 default: 3435 return false; 3436 } 3437 } 3438 3439 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3440 3441 using namespace SIInstrFlags; 3442 const unsigned Opcode = Inst.getOpcode(); 3443 const MCInstrDesc &Desc = MII.get(Opcode); 3444 3445 // lds_direct register is defined so that it can be used 3446 // with 9-bit operands only. Ignore encodings which do not accept these. 3447 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3448 return true; 3449 3450 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3451 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3452 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3453 3454 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3455 3456 // lds_direct cannot be specified as either src1 or src2. 3457 for (int SrcIdx : SrcIndices) { 3458 if (SrcIdx == -1) break; 3459 const MCOperand &Src = Inst.getOperand(SrcIdx); 3460 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3461 return false; 3462 } 3463 } 3464 3465 if (Src0Idx == -1) 3466 return true; 3467 3468 const MCOperand &Src = Inst.getOperand(Src0Idx); 3469 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3470 return true; 3471 3472 // lds_direct is specified as src0. Check additional limitations. 3473 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3474 } 3475 3476 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3477 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3478 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3479 if (Op.isFlatOffset()) 3480 return Op.getStartLoc(); 3481 } 3482 return getLoc(); 3483 } 3484 3485 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3486 const OperandVector &Operands) { 3487 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3488 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3489 return true; 3490 3491 auto Opcode = Inst.getOpcode(); 3492 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3493 assert(OpNum != -1); 3494 3495 const auto &Op = Inst.getOperand(OpNum); 3496 if (!hasFlatOffsets() && Op.getImm() != 0) { 3497 Error(getFlatOffsetLoc(Operands), 3498 "flat offset modifier is not supported on this GPU"); 3499 return false; 3500 } 3501 3502 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3503 // For FLAT segment the offset must be positive; 3504 // MSB is ignored and forced to zero. 3505 unsigned OffsetSize = isGFX9() ? 13 : 12; 3506 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3507 if (!isIntN(OffsetSize, Op.getImm())) { 3508 Error(getFlatOffsetLoc(Operands), 3509 isGFX9() ? "expected a 13-bit signed offset" : 3510 "expected a 12-bit signed offset"); 3511 return false; 3512 } 3513 } else { 3514 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3515 Error(getFlatOffsetLoc(Operands), 3516 isGFX9() ? "expected a 12-bit unsigned offset" : 3517 "expected an 11-bit unsigned offset"); 3518 return false; 3519 } 3520 } 3521 3522 return true; 3523 } 3524 3525 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const { 3526 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3527 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3528 if (Op.isSMEMOffset()) 3529 return Op.getStartLoc(); 3530 } 3531 return getLoc(); 3532 } 3533 3534 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst, 3535 const OperandVector &Operands) { 3536 if (isCI() || isSI()) 3537 return true; 3538 3539 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3540 if ((TSFlags & SIInstrFlags::SMRD) == 0) 3541 return true; 3542 3543 auto Opcode = Inst.getOpcode(); 3544 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3545 if (OpNum == -1) 3546 return true; 3547 3548 const auto &Op = Inst.getOperand(OpNum); 3549 if (!Op.isImm()) 3550 return true; 3551 3552 uint64_t Offset = Op.getImm(); 3553 bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode); 3554 if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) || 3555 AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer)) 3556 return true; 3557 3558 Error(getSMEMOffsetLoc(Operands), 3559 (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" : 3560 "expected a 21-bit signed offset"); 3561 3562 return false; 3563 } 3564 3565 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3566 unsigned Opcode = Inst.getOpcode(); 3567 const MCInstrDesc &Desc = MII.get(Opcode); 3568 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3569 return true; 3570 3571 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3572 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3573 3574 const int OpIndices[] = { Src0Idx, Src1Idx }; 3575 3576 unsigned NumExprs = 0; 3577 unsigned NumLiterals = 0; 3578 uint32_t LiteralValue; 3579 3580 for (int OpIdx : OpIndices) { 3581 if (OpIdx == -1) break; 3582 3583 const MCOperand &MO = Inst.getOperand(OpIdx); 3584 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3585 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3586 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3587 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3588 if (NumLiterals == 0 || LiteralValue != Value) { 3589 LiteralValue = Value; 3590 ++NumLiterals; 3591 } 3592 } else if (MO.isExpr()) { 3593 ++NumExprs; 3594 } 3595 } 3596 } 3597 3598 return NumLiterals + NumExprs <= 1; 3599 } 3600 3601 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3602 const unsigned Opc = Inst.getOpcode(); 3603 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3604 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3605 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3606 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3607 3608 if (OpSel & ~3) 3609 return false; 3610 } 3611 return true; 3612 } 3613 3614 // Check if VCC register matches wavefront size 3615 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3616 auto FB = getFeatureBits(); 3617 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3618 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3619 } 3620 3621 // VOP3 literal is only allowed in GFX10+ and only one can be used 3622 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3623 unsigned Opcode = Inst.getOpcode(); 3624 const MCInstrDesc &Desc = MII.get(Opcode); 3625 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3626 return true; 3627 3628 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3629 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3630 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3631 3632 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3633 3634 unsigned NumExprs = 0; 3635 unsigned NumLiterals = 0; 3636 uint32_t LiteralValue; 3637 3638 for (int OpIdx : OpIndices) { 3639 if (OpIdx == -1) break; 3640 3641 const MCOperand &MO = Inst.getOperand(OpIdx); 3642 if (!MO.isImm() && !MO.isExpr()) 3643 continue; 3644 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3645 continue; 3646 3647 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3648 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3649 return false; 3650 3651 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3652 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3653 if (NumLiterals == 0 || LiteralValue != Value) { 3654 LiteralValue = Value; 3655 ++NumLiterals; 3656 } 3657 } else if (MO.isExpr()) { 3658 ++NumExprs; 3659 } 3660 } 3661 NumLiterals += NumExprs; 3662 3663 return !NumLiterals || 3664 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3665 } 3666 3667 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3668 const SMLoc &IDLoc, 3669 const OperandVector &Operands) { 3670 if (!validateLdsDirect(Inst)) { 3671 Error(IDLoc, 3672 "invalid use of lds_direct"); 3673 return false; 3674 } 3675 if (!validateSOPLiteral(Inst)) { 3676 Error(IDLoc, 3677 "only one literal operand is allowed"); 3678 return false; 3679 } 3680 if (!validateVOP3Literal(Inst)) { 3681 Error(IDLoc, 3682 "invalid literal operand"); 3683 return false; 3684 } 3685 if (!validateConstantBusLimitations(Inst)) { 3686 Error(IDLoc, 3687 "invalid operand (violates constant bus restrictions)"); 3688 return false; 3689 } 3690 if (!validateEarlyClobberLimitations(Inst)) { 3691 Error(IDLoc, 3692 "destination must be different than all sources"); 3693 return false; 3694 } 3695 if (!validateIntClampSupported(Inst)) { 3696 Error(IDLoc, 3697 "integer clamping is not supported on this GPU"); 3698 return false; 3699 } 3700 if (!validateOpSel(Inst)) { 3701 Error(IDLoc, 3702 "invalid op_sel operand"); 3703 return false; 3704 } 3705 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3706 if (!validateMIMGD16(Inst)) { 3707 Error(IDLoc, 3708 "d16 modifier is not supported on this GPU"); 3709 return false; 3710 } 3711 if (!validateMIMGDim(Inst)) { 3712 Error(IDLoc, "dim modifier is required on this GPU"); 3713 return false; 3714 } 3715 if (!validateMIMGDataSize(Inst)) { 3716 Error(IDLoc, 3717 "image data size does not match dmask and tfe"); 3718 return false; 3719 } 3720 if (!validateMIMGAddrSize(Inst)) { 3721 Error(IDLoc, 3722 "image address size does not match dim and a16"); 3723 return false; 3724 } 3725 if (!validateMIMGAtomicDMask(Inst)) { 3726 Error(IDLoc, 3727 "invalid atomic image dmask"); 3728 return false; 3729 } 3730 if (!validateMIMGGatherDMask(Inst)) { 3731 Error(IDLoc, 3732 "invalid image_gather dmask: only one bit must be set"); 3733 return false; 3734 } 3735 if (!validateMovrels(Inst)) { 3736 Error(IDLoc, "source operand must be a VGPR"); 3737 return false; 3738 } 3739 if (!validateFlatOffset(Inst, Operands)) { 3740 return false; 3741 } 3742 if (!validateSMEMOffset(Inst, Operands)) { 3743 return false; 3744 } 3745 if (!validateMAIAccWrite(Inst)) { 3746 return false; 3747 } 3748 3749 return true; 3750 } 3751 3752 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3753 const FeatureBitset &FBS, 3754 unsigned VariantID = 0); 3755 3756 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3757 OperandVector &Operands, 3758 MCStreamer &Out, 3759 uint64_t &ErrorInfo, 3760 bool MatchingInlineAsm) { 3761 MCInst Inst; 3762 unsigned Result = Match_Success; 3763 for (auto Variant : getMatchedVariants()) { 3764 uint64_t EI; 3765 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3766 Variant); 3767 // We order match statuses from least to most specific. We use most specific 3768 // status as resulting 3769 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3770 if ((R == Match_Success) || 3771 (R == Match_PreferE32) || 3772 (R == Match_MissingFeature && Result != Match_PreferE32) || 3773 (R == Match_InvalidOperand && Result != Match_MissingFeature 3774 && Result != Match_PreferE32) || 3775 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3776 && Result != Match_MissingFeature 3777 && Result != Match_PreferE32)) { 3778 Result = R; 3779 ErrorInfo = EI; 3780 } 3781 if (R == Match_Success) 3782 break; 3783 } 3784 3785 switch (Result) { 3786 default: break; 3787 case Match_Success: 3788 if (!validateInstruction(Inst, IDLoc, Operands)) { 3789 return true; 3790 } 3791 Inst.setLoc(IDLoc); 3792 Out.emitInstruction(Inst, getSTI()); 3793 return false; 3794 3795 case Match_MissingFeature: 3796 return Error(IDLoc, "instruction not supported on this GPU"); 3797 3798 case Match_MnemonicFail: { 3799 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3800 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3801 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3802 return Error(IDLoc, "invalid instruction" + Suggestion, 3803 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3804 } 3805 3806 case Match_InvalidOperand: { 3807 SMLoc ErrorLoc = IDLoc; 3808 if (ErrorInfo != ~0ULL) { 3809 if (ErrorInfo >= Operands.size()) { 3810 return Error(IDLoc, "too few operands for instruction"); 3811 } 3812 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3813 if (ErrorLoc == SMLoc()) 3814 ErrorLoc = IDLoc; 3815 } 3816 return Error(ErrorLoc, "invalid operand for instruction"); 3817 } 3818 3819 case Match_PreferE32: 3820 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3821 "should be encoded as e32"); 3822 } 3823 llvm_unreachable("Implement any new match types added!"); 3824 } 3825 3826 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3827 int64_t Tmp = -1; 3828 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3829 return true; 3830 } 3831 if (getParser().parseAbsoluteExpression(Tmp)) { 3832 return true; 3833 } 3834 Ret = static_cast<uint32_t>(Tmp); 3835 return false; 3836 } 3837 3838 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3839 uint32_t &Minor) { 3840 if (ParseAsAbsoluteExpression(Major)) 3841 return TokError("invalid major version"); 3842 3843 if (getLexer().isNot(AsmToken::Comma)) 3844 return TokError("minor version number required, comma expected"); 3845 Lex(); 3846 3847 if (ParseAsAbsoluteExpression(Minor)) 3848 return TokError("invalid minor version"); 3849 3850 return false; 3851 } 3852 3853 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3854 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3855 return TokError("directive only supported for amdgcn architecture"); 3856 3857 std::string Target; 3858 3859 SMLoc TargetStart = getTok().getLoc(); 3860 if (getParser().parseEscapedString(Target)) 3861 return true; 3862 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3863 3864 std::string ExpectedTarget; 3865 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3866 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3867 3868 if (Target != ExpectedTargetOS.str()) 3869 return getParser().Error(TargetRange.Start, "target must match options", 3870 TargetRange); 3871 3872 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3873 return false; 3874 } 3875 3876 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3877 return getParser().Error(Range.Start, "value out of range", Range); 3878 } 3879 3880 bool AMDGPUAsmParser::calculateGPRBlocks( 3881 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3882 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3883 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3884 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3885 // TODO(scott.linder): These calculations are duplicated from 3886 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3887 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3888 3889 unsigned NumVGPRs = NextFreeVGPR; 3890 unsigned NumSGPRs = NextFreeSGPR; 3891 3892 if (Version.Major >= 10) 3893 NumSGPRs = 0; 3894 else { 3895 unsigned MaxAddressableNumSGPRs = 3896 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3897 3898 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3899 NumSGPRs > MaxAddressableNumSGPRs) 3900 return OutOfRangeError(SGPRRange); 3901 3902 NumSGPRs += 3903 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3904 3905 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3906 NumSGPRs > MaxAddressableNumSGPRs) 3907 return OutOfRangeError(SGPRRange); 3908 3909 if (Features.test(FeatureSGPRInitBug)) 3910 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3911 } 3912 3913 VGPRBlocks = 3914 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3915 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3916 3917 return false; 3918 } 3919 3920 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3921 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3922 return TokError("directive only supported for amdgcn architecture"); 3923 3924 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3925 return TokError("directive only supported for amdhsa OS"); 3926 3927 StringRef KernelName; 3928 if (getParser().parseIdentifier(KernelName)) 3929 return true; 3930 3931 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3932 3933 StringSet<> Seen; 3934 3935 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3936 3937 SMRange VGPRRange; 3938 uint64_t NextFreeVGPR = 0; 3939 SMRange SGPRRange; 3940 uint64_t NextFreeSGPR = 0; 3941 unsigned UserSGPRCount = 0; 3942 bool ReserveVCC = true; 3943 bool ReserveFlatScr = true; 3944 bool ReserveXNACK = hasXNACK(); 3945 Optional<bool> EnableWavefrontSize32; 3946 3947 while (true) { 3948 while (getLexer().is(AsmToken::EndOfStatement)) 3949 Lex(); 3950 3951 if (getLexer().isNot(AsmToken::Identifier)) 3952 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3953 3954 StringRef ID = getTok().getIdentifier(); 3955 SMRange IDRange = getTok().getLocRange(); 3956 Lex(); 3957 3958 if (ID == ".end_amdhsa_kernel") 3959 break; 3960 3961 if (Seen.find(ID) != Seen.end()) 3962 return TokError(".amdhsa_ directives cannot be repeated"); 3963 Seen.insert(ID); 3964 3965 SMLoc ValStart = getTok().getLoc(); 3966 int64_t IVal; 3967 if (getParser().parseAbsoluteExpression(IVal)) 3968 return true; 3969 SMLoc ValEnd = getTok().getLoc(); 3970 SMRange ValRange = SMRange(ValStart, ValEnd); 3971 3972 if (IVal < 0) 3973 return OutOfRangeError(ValRange); 3974 3975 uint64_t Val = IVal; 3976 3977 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3978 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3979 return OutOfRangeError(RANGE); \ 3980 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3981 3982 if (ID == ".amdhsa_group_segment_fixed_size") { 3983 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3984 return OutOfRangeError(ValRange); 3985 KD.group_segment_fixed_size = Val; 3986 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3987 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3988 return OutOfRangeError(ValRange); 3989 KD.private_segment_fixed_size = Val; 3990 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3991 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3992 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3993 Val, ValRange); 3994 if (Val) 3995 UserSGPRCount += 4; 3996 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3997 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3998 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3999 ValRange); 4000 if (Val) 4001 UserSGPRCount += 2; 4002 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 4003 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4004 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 4005 ValRange); 4006 if (Val) 4007 UserSGPRCount += 2; 4008 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 4009 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4010 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 4011 Val, ValRange); 4012 if (Val) 4013 UserSGPRCount += 2; 4014 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 4015 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4016 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 4017 ValRange); 4018 if (Val) 4019 UserSGPRCount += 2; 4020 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 4021 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4022 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 4023 ValRange); 4024 if (Val) 4025 UserSGPRCount += 2; 4026 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 4027 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4028 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 4029 Val, ValRange); 4030 if (Val) 4031 UserSGPRCount += 1; 4032 } else if (ID == ".amdhsa_wavefront_size32") { 4033 if (IVersion.Major < 10) 4034 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4035 IDRange); 4036 EnableWavefrontSize32 = Val; 4037 PARSE_BITS_ENTRY(KD.kernel_code_properties, 4038 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 4039 Val, ValRange); 4040 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 4041 PARSE_BITS_ENTRY( 4042 KD.compute_pgm_rsrc2, 4043 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 4044 ValRange); 4045 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 4046 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4047 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 4048 ValRange); 4049 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 4050 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4051 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 4052 ValRange); 4053 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 4054 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4055 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 4056 ValRange); 4057 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 4058 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4059 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 4060 ValRange); 4061 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 4062 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4063 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 4064 ValRange); 4065 } else if (ID == ".amdhsa_next_free_vgpr") { 4066 VGPRRange = ValRange; 4067 NextFreeVGPR = Val; 4068 } else if (ID == ".amdhsa_next_free_sgpr") { 4069 SGPRRange = ValRange; 4070 NextFreeSGPR = Val; 4071 } else if (ID == ".amdhsa_reserve_vcc") { 4072 if (!isUInt<1>(Val)) 4073 return OutOfRangeError(ValRange); 4074 ReserveVCC = Val; 4075 } else if (ID == ".amdhsa_reserve_flat_scratch") { 4076 if (IVersion.Major < 7) 4077 return getParser().Error(IDRange.Start, "directive requires gfx7+", 4078 IDRange); 4079 if (!isUInt<1>(Val)) 4080 return OutOfRangeError(ValRange); 4081 ReserveFlatScr = Val; 4082 } else if (ID == ".amdhsa_reserve_xnack_mask") { 4083 if (IVersion.Major < 8) 4084 return getParser().Error(IDRange.Start, "directive requires gfx8+", 4085 IDRange); 4086 if (!isUInt<1>(Val)) 4087 return OutOfRangeError(ValRange); 4088 ReserveXNACK = Val; 4089 } else if (ID == ".amdhsa_float_round_mode_32") { 4090 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4091 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 4092 } else if (ID == ".amdhsa_float_round_mode_16_64") { 4093 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4094 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 4095 } else if (ID == ".amdhsa_float_denorm_mode_32") { 4096 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4097 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 4098 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 4099 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4100 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 4101 ValRange); 4102 } else if (ID == ".amdhsa_dx10_clamp") { 4103 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 4104 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 4105 } else if (ID == ".amdhsa_ieee_mode") { 4106 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 4107 Val, ValRange); 4108 } else if (ID == ".amdhsa_fp16_overflow") { 4109 if (IVersion.Major < 9) 4110 return getParser().Error(IDRange.Start, "directive requires gfx9+", 4111 IDRange); 4112 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 4113 ValRange); 4114 } else if (ID == ".amdhsa_workgroup_processor_mode") { 4115 if (IVersion.Major < 10) 4116 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4117 IDRange); 4118 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 4119 ValRange); 4120 } else if (ID == ".amdhsa_memory_ordered") { 4121 if (IVersion.Major < 10) 4122 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4123 IDRange); 4124 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 4125 ValRange); 4126 } else if (ID == ".amdhsa_forward_progress") { 4127 if (IVersion.Major < 10) 4128 return getParser().Error(IDRange.Start, "directive requires gfx10+", 4129 IDRange); 4130 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 4131 ValRange); 4132 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 4133 PARSE_BITS_ENTRY( 4134 KD.compute_pgm_rsrc2, 4135 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 4136 ValRange); 4137 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 4138 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4139 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 4140 Val, ValRange); 4141 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 4142 PARSE_BITS_ENTRY( 4143 KD.compute_pgm_rsrc2, 4144 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 4145 ValRange); 4146 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 4147 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4148 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 4149 Val, ValRange); 4150 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 4151 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4152 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 4153 Val, ValRange); 4154 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 4155 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4156 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 4157 Val, ValRange); 4158 } else if (ID == ".amdhsa_exception_int_div_zero") { 4159 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 4160 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 4161 Val, ValRange); 4162 } else { 4163 return getParser().Error(IDRange.Start, 4164 "unknown .amdhsa_kernel directive", IDRange); 4165 } 4166 4167 #undef PARSE_BITS_ENTRY 4168 } 4169 4170 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 4171 return TokError(".amdhsa_next_free_vgpr directive is required"); 4172 4173 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 4174 return TokError(".amdhsa_next_free_sgpr directive is required"); 4175 4176 unsigned VGPRBlocks; 4177 unsigned SGPRBlocks; 4178 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 4179 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 4180 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 4181 SGPRBlocks)) 4182 return true; 4183 4184 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 4185 VGPRBlocks)) 4186 return OutOfRangeError(VGPRRange); 4187 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4188 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 4189 4190 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 4191 SGPRBlocks)) 4192 return OutOfRangeError(SGPRRange); 4193 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 4194 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 4195 SGPRBlocks); 4196 4197 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 4198 return TokError("too many user SGPRs enabled"); 4199 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 4200 UserSGPRCount); 4201 4202 getTargetStreamer().EmitAmdhsaKernelDescriptor( 4203 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 4204 ReserveFlatScr, ReserveXNACK); 4205 return false; 4206 } 4207 4208 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 4209 uint32_t Major; 4210 uint32_t Minor; 4211 4212 if (ParseDirectiveMajorMinor(Major, Minor)) 4213 return true; 4214 4215 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 4216 return false; 4217 } 4218 4219 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 4220 uint32_t Major; 4221 uint32_t Minor; 4222 uint32_t Stepping; 4223 StringRef VendorName; 4224 StringRef ArchName; 4225 4226 // If this directive has no arguments, then use the ISA version for the 4227 // targeted GPU. 4228 if (getLexer().is(AsmToken::EndOfStatement)) { 4229 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4230 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 4231 ISA.Stepping, 4232 "AMD", "AMDGPU"); 4233 return false; 4234 } 4235 4236 if (ParseDirectiveMajorMinor(Major, Minor)) 4237 return true; 4238 4239 if (getLexer().isNot(AsmToken::Comma)) 4240 return TokError("stepping version number required, comma expected"); 4241 Lex(); 4242 4243 if (ParseAsAbsoluteExpression(Stepping)) 4244 return TokError("invalid stepping version"); 4245 4246 if (getLexer().isNot(AsmToken::Comma)) 4247 return TokError("vendor name required, comma expected"); 4248 Lex(); 4249 4250 if (getLexer().isNot(AsmToken::String)) 4251 return TokError("invalid vendor name"); 4252 4253 VendorName = getLexer().getTok().getStringContents(); 4254 Lex(); 4255 4256 if (getLexer().isNot(AsmToken::Comma)) 4257 return TokError("arch name required, comma expected"); 4258 Lex(); 4259 4260 if (getLexer().isNot(AsmToken::String)) 4261 return TokError("invalid arch name"); 4262 4263 ArchName = getLexer().getTok().getStringContents(); 4264 Lex(); 4265 4266 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 4267 VendorName, ArchName); 4268 return false; 4269 } 4270 4271 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 4272 amd_kernel_code_t &Header) { 4273 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 4274 // assembly for backwards compatibility. 4275 if (ID == "max_scratch_backing_memory_byte_size") { 4276 Parser.eatToEndOfStatement(); 4277 return false; 4278 } 4279 4280 SmallString<40> ErrStr; 4281 raw_svector_ostream Err(ErrStr); 4282 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4283 return TokError(Err.str()); 4284 } 4285 Lex(); 4286 4287 if (ID == "enable_wavefront_size32") { 4288 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4289 if (!isGFX10()) 4290 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4291 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4292 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4293 } else { 4294 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4295 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4296 } 4297 } 4298 4299 if (ID == "wavefront_size") { 4300 if (Header.wavefront_size == 5) { 4301 if (!isGFX10()) 4302 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4303 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4304 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4305 } else if (Header.wavefront_size == 6) { 4306 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4307 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4308 } 4309 } 4310 4311 if (ID == "enable_wgp_mode") { 4312 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4313 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4314 } 4315 4316 if (ID == "enable_mem_ordered") { 4317 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4318 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4319 } 4320 4321 if (ID == "enable_fwd_progress") { 4322 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4323 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4324 } 4325 4326 return false; 4327 } 4328 4329 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4330 amd_kernel_code_t Header; 4331 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4332 4333 while (true) { 4334 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4335 // will set the current token to EndOfStatement. 4336 while(getLexer().is(AsmToken::EndOfStatement)) 4337 Lex(); 4338 4339 if (getLexer().isNot(AsmToken::Identifier)) 4340 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4341 4342 StringRef ID = getLexer().getTok().getIdentifier(); 4343 Lex(); 4344 4345 if (ID == ".end_amd_kernel_code_t") 4346 break; 4347 4348 if (ParseAMDKernelCodeTValue(ID, Header)) 4349 return true; 4350 } 4351 4352 getTargetStreamer().EmitAMDKernelCodeT(Header); 4353 4354 return false; 4355 } 4356 4357 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4358 if (getLexer().isNot(AsmToken::Identifier)) 4359 return TokError("expected symbol name"); 4360 4361 StringRef KernelName = Parser.getTok().getString(); 4362 4363 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4364 ELF::STT_AMDGPU_HSA_KERNEL); 4365 Lex(); 4366 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4367 KernelScope.initialize(getContext()); 4368 return false; 4369 } 4370 4371 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4372 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4373 return Error(getParser().getTok().getLoc(), 4374 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4375 "architectures"); 4376 } 4377 4378 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4379 4380 std::string ISAVersionStringFromSTI; 4381 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4382 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4383 4384 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4385 return Error(getParser().getTok().getLoc(), 4386 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4387 "arguments specified through the command line"); 4388 } 4389 4390 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4391 Lex(); 4392 4393 return false; 4394 } 4395 4396 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4397 const char *AssemblerDirectiveBegin; 4398 const char *AssemblerDirectiveEnd; 4399 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4400 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4401 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4402 HSAMD::V3::AssemblerDirectiveEnd) 4403 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4404 HSAMD::AssemblerDirectiveEnd); 4405 4406 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4407 return Error(getParser().getTok().getLoc(), 4408 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4409 "not available on non-amdhsa OSes")).str()); 4410 } 4411 4412 std::string HSAMetadataString; 4413 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4414 HSAMetadataString)) 4415 return true; 4416 4417 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4418 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4419 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4420 } else { 4421 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4422 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4423 } 4424 4425 return false; 4426 } 4427 4428 /// Common code to parse out a block of text (typically YAML) between start and 4429 /// end directives. 4430 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4431 const char *AssemblerDirectiveEnd, 4432 std::string &CollectString) { 4433 4434 raw_string_ostream CollectStream(CollectString); 4435 4436 getLexer().setSkipSpace(false); 4437 4438 bool FoundEnd = false; 4439 while (!getLexer().is(AsmToken::Eof)) { 4440 while (getLexer().is(AsmToken::Space)) { 4441 CollectStream << getLexer().getTok().getString(); 4442 Lex(); 4443 } 4444 4445 if (getLexer().is(AsmToken::Identifier)) { 4446 StringRef ID = getLexer().getTok().getIdentifier(); 4447 if (ID == AssemblerDirectiveEnd) { 4448 Lex(); 4449 FoundEnd = true; 4450 break; 4451 } 4452 } 4453 4454 CollectStream << Parser.parseStringToEndOfStatement() 4455 << getContext().getAsmInfo()->getSeparatorString(); 4456 4457 Parser.eatToEndOfStatement(); 4458 } 4459 4460 getLexer().setSkipSpace(true); 4461 4462 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4463 return TokError(Twine("expected directive ") + 4464 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4465 } 4466 4467 CollectStream.flush(); 4468 return false; 4469 } 4470 4471 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4472 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4473 std::string String; 4474 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4475 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4476 return true; 4477 4478 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4479 if (!PALMetadata->setFromString(String)) 4480 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4481 return false; 4482 } 4483 4484 /// Parse the assembler directive for old linear-format PAL metadata. 4485 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4486 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4487 return Error(getParser().getTok().getLoc(), 4488 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4489 "not available on non-amdpal OSes")).str()); 4490 } 4491 4492 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4493 PALMetadata->setLegacy(); 4494 for (;;) { 4495 uint32_t Key, Value; 4496 if (ParseAsAbsoluteExpression(Key)) { 4497 return TokError(Twine("invalid value in ") + 4498 Twine(PALMD::AssemblerDirective)); 4499 } 4500 if (getLexer().isNot(AsmToken::Comma)) { 4501 return TokError(Twine("expected an even number of values in ") + 4502 Twine(PALMD::AssemblerDirective)); 4503 } 4504 Lex(); 4505 if (ParseAsAbsoluteExpression(Value)) { 4506 return TokError(Twine("invalid value in ") + 4507 Twine(PALMD::AssemblerDirective)); 4508 } 4509 PALMetadata->setRegister(Key, Value); 4510 if (getLexer().isNot(AsmToken::Comma)) 4511 break; 4512 Lex(); 4513 } 4514 return false; 4515 } 4516 4517 /// ParseDirectiveAMDGPULDS 4518 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4519 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4520 if (getParser().checkForValidSection()) 4521 return true; 4522 4523 StringRef Name; 4524 SMLoc NameLoc = getLexer().getLoc(); 4525 if (getParser().parseIdentifier(Name)) 4526 return TokError("expected identifier in directive"); 4527 4528 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4529 if (parseToken(AsmToken::Comma, "expected ','")) 4530 return true; 4531 4532 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4533 4534 int64_t Size; 4535 SMLoc SizeLoc = getLexer().getLoc(); 4536 if (getParser().parseAbsoluteExpression(Size)) 4537 return true; 4538 if (Size < 0) 4539 return Error(SizeLoc, "size must be non-negative"); 4540 if (Size > LocalMemorySize) 4541 return Error(SizeLoc, "size is too large"); 4542 4543 int64_t Alignment = 4; 4544 if (getLexer().is(AsmToken::Comma)) { 4545 Lex(); 4546 SMLoc AlignLoc = getLexer().getLoc(); 4547 if (getParser().parseAbsoluteExpression(Alignment)) 4548 return true; 4549 if (Alignment < 0 || !isPowerOf2_64(Alignment)) 4550 return Error(AlignLoc, "alignment must be a power of two"); 4551 4552 // Alignment larger than the size of LDS is possible in theory, as long 4553 // as the linker manages to place to symbol at address 0, but we do want 4554 // to make sure the alignment fits nicely into a 32-bit integer. 4555 if (Alignment >= 1u << 31) 4556 return Error(AlignLoc, "alignment is too large"); 4557 } 4558 4559 if (parseToken(AsmToken::EndOfStatement, 4560 "unexpected token in '.amdgpu_lds' directive")) 4561 return true; 4562 4563 Symbol->redefineIfPossible(); 4564 if (!Symbol->isUndefined()) 4565 return Error(NameLoc, "invalid symbol redefinition"); 4566 4567 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment)); 4568 return false; 4569 } 4570 4571 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4572 StringRef IDVal = DirectiveID.getString(); 4573 4574 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4575 if (IDVal == ".amdgcn_target") 4576 return ParseDirectiveAMDGCNTarget(); 4577 4578 if (IDVal == ".amdhsa_kernel") 4579 return ParseDirectiveAMDHSAKernel(); 4580 4581 // TODO: Restructure/combine with PAL metadata directive. 4582 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4583 return ParseDirectiveHSAMetadata(); 4584 } else { 4585 if (IDVal == ".hsa_code_object_version") 4586 return ParseDirectiveHSACodeObjectVersion(); 4587 4588 if (IDVal == ".hsa_code_object_isa") 4589 return ParseDirectiveHSACodeObjectISA(); 4590 4591 if (IDVal == ".amd_kernel_code_t") 4592 return ParseDirectiveAMDKernelCodeT(); 4593 4594 if (IDVal == ".amdgpu_hsa_kernel") 4595 return ParseDirectiveAMDGPUHsaKernel(); 4596 4597 if (IDVal == ".amd_amdgpu_isa") 4598 return ParseDirectiveISAVersion(); 4599 4600 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4601 return ParseDirectiveHSAMetadata(); 4602 } 4603 4604 if (IDVal == ".amdgpu_lds") 4605 return ParseDirectiveAMDGPULDS(); 4606 4607 if (IDVal == PALMD::AssemblerDirectiveBegin) 4608 return ParseDirectivePALMetadataBegin(); 4609 4610 if (IDVal == PALMD::AssemblerDirective) 4611 return ParseDirectivePALMetadata(); 4612 4613 return true; 4614 } 4615 4616 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4617 unsigned RegNo) const { 4618 4619 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4620 R.isValid(); ++R) { 4621 if (*R == RegNo) 4622 return isGFX9() || isGFX10(); 4623 } 4624 4625 // GFX10 has 2 more SGPRs 104 and 105. 4626 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4627 R.isValid(); ++R) { 4628 if (*R == RegNo) 4629 return hasSGPR104_SGPR105(); 4630 } 4631 4632 switch (RegNo) { 4633 case AMDGPU::SRC_SHARED_BASE: 4634 case AMDGPU::SRC_SHARED_LIMIT: 4635 case AMDGPU::SRC_PRIVATE_BASE: 4636 case AMDGPU::SRC_PRIVATE_LIMIT: 4637 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4638 return !isCI() && !isSI() && !isVI(); 4639 case AMDGPU::TBA: 4640 case AMDGPU::TBA_LO: 4641 case AMDGPU::TBA_HI: 4642 case AMDGPU::TMA: 4643 case AMDGPU::TMA_LO: 4644 case AMDGPU::TMA_HI: 4645 return !isGFX9() && !isGFX10(); 4646 case AMDGPU::XNACK_MASK: 4647 case AMDGPU::XNACK_MASK_LO: 4648 case AMDGPU::XNACK_MASK_HI: 4649 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4650 case AMDGPU::SGPR_NULL: 4651 return isGFX10(); 4652 default: 4653 break; 4654 } 4655 4656 if (isCI()) 4657 return true; 4658 4659 if (isSI() || isGFX10()) { 4660 // No flat_scr on SI. 4661 // On GFX10 flat scratch is not a valid register operand and can only be 4662 // accessed with s_setreg/s_getreg. 4663 switch (RegNo) { 4664 case AMDGPU::FLAT_SCR: 4665 case AMDGPU::FLAT_SCR_LO: 4666 case AMDGPU::FLAT_SCR_HI: 4667 return false; 4668 default: 4669 return true; 4670 } 4671 } 4672 4673 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4674 // SI/CI have. 4675 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4676 R.isValid(); ++R) { 4677 if (*R == RegNo) 4678 return hasSGPR102_SGPR103(); 4679 } 4680 4681 return true; 4682 } 4683 4684 OperandMatchResultTy 4685 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4686 OperandMode Mode) { 4687 // Try to parse with a custom parser 4688 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4689 4690 // If we successfully parsed the operand or if there as an error parsing, 4691 // we are done. 4692 // 4693 // If we are parsing after we reach EndOfStatement then this means we 4694 // are appending default values to the Operands list. This is only done 4695 // by custom parser, so we shouldn't continue on to the generic parsing. 4696 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4697 getLexer().is(AsmToken::EndOfStatement)) 4698 return ResTy; 4699 4700 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4701 unsigned Prefix = Operands.size(); 4702 SMLoc LBraceLoc = getTok().getLoc(); 4703 Parser.Lex(); // eat the '[' 4704 4705 for (;;) { 4706 ResTy = parseReg(Operands); 4707 if (ResTy != MatchOperand_Success) 4708 return ResTy; 4709 4710 if (getLexer().is(AsmToken::RBrac)) 4711 break; 4712 4713 if (getLexer().isNot(AsmToken::Comma)) 4714 return MatchOperand_ParseFail; 4715 Parser.Lex(); 4716 } 4717 4718 if (Operands.size() - Prefix > 1) { 4719 Operands.insert(Operands.begin() + Prefix, 4720 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4721 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4722 getTok().getLoc())); 4723 } 4724 4725 Parser.Lex(); // eat the ']' 4726 return MatchOperand_Success; 4727 } 4728 4729 return parseRegOrImm(Operands); 4730 } 4731 4732 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4733 // Clear any forced encodings from the previous instruction. 4734 setForcedEncodingSize(0); 4735 setForcedDPP(false); 4736 setForcedSDWA(false); 4737 4738 if (Name.endswith("_e64")) { 4739 setForcedEncodingSize(64); 4740 return Name.substr(0, Name.size() - 4); 4741 } else if (Name.endswith("_e32")) { 4742 setForcedEncodingSize(32); 4743 return Name.substr(0, Name.size() - 4); 4744 } else if (Name.endswith("_dpp")) { 4745 setForcedDPP(true); 4746 return Name.substr(0, Name.size() - 4); 4747 } else if (Name.endswith("_sdwa")) { 4748 setForcedSDWA(true); 4749 return Name.substr(0, Name.size() - 5); 4750 } 4751 return Name; 4752 } 4753 4754 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4755 StringRef Name, 4756 SMLoc NameLoc, OperandVector &Operands) { 4757 // Add the instruction mnemonic 4758 Name = parseMnemonicSuffix(Name); 4759 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4760 4761 bool IsMIMG = Name.startswith("image_"); 4762 4763 while (!getLexer().is(AsmToken::EndOfStatement)) { 4764 OperandMode Mode = OperandMode_Default; 4765 if (IsMIMG && isGFX10() && Operands.size() == 2) 4766 Mode = OperandMode_NSA; 4767 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4768 4769 // Eat the comma or space if there is one. 4770 if (getLexer().is(AsmToken::Comma)) 4771 Parser.Lex(); 4772 4773 if (Res != MatchOperand_Success) { 4774 if (!Parser.hasPendingError()) { 4775 // FIXME: use real operand location rather than the current location. 4776 StringRef Msg = 4777 (Res == MatchOperand_ParseFail) ? "failed parsing operand." : 4778 "not a valid operand."; 4779 Error(getLexer().getLoc(), Msg); 4780 } 4781 while (!getLexer().is(AsmToken::EndOfStatement)) { 4782 Parser.Lex(); 4783 } 4784 return true; 4785 } 4786 } 4787 4788 return false; 4789 } 4790 4791 //===----------------------------------------------------------------------===// 4792 // Utility functions 4793 //===----------------------------------------------------------------------===// 4794 4795 OperandMatchResultTy 4796 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4797 4798 if (!trySkipId(Prefix, AsmToken::Colon)) 4799 return MatchOperand_NoMatch; 4800 4801 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4802 } 4803 4804 OperandMatchResultTy 4805 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4806 AMDGPUOperand::ImmTy ImmTy, 4807 bool (*ConvertResult)(int64_t&)) { 4808 SMLoc S = getLoc(); 4809 int64_t Value = 0; 4810 4811 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4812 if (Res != MatchOperand_Success) 4813 return Res; 4814 4815 if (ConvertResult && !ConvertResult(Value)) { 4816 Error(S, "invalid " + StringRef(Prefix) + " value."); 4817 } 4818 4819 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4820 return MatchOperand_Success; 4821 } 4822 4823 OperandMatchResultTy 4824 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4825 OperandVector &Operands, 4826 AMDGPUOperand::ImmTy ImmTy, 4827 bool (*ConvertResult)(int64_t&)) { 4828 SMLoc S = getLoc(); 4829 if (!trySkipId(Prefix, AsmToken::Colon)) 4830 return MatchOperand_NoMatch; 4831 4832 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4833 return MatchOperand_ParseFail; 4834 4835 unsigned Val = 0; 4836 const unsigned MaxSize = 4; 4837 4838 // FIXME: How to verify the number of elements matches the number of src 4839 // operands? 4840 for (int I = 0; ; ++I) { 4841 int64_t Op; 4842 SMLoc Loc = getLoc(); 4843 if (!parseExpr(Op)) 4844 return MatchOperand_ParseFail; 4845 4846 if (Op != 0 && Op != 1) { 4847 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4848 return MatchOperand_ParseFail; 4849 } 4850 4851 Val |= (Op << I); 4852 4853 if (trySkipToken(AsmToken::RBrac)) 4854 break; 4855 4856 if (I + 1 == MaxSize) { 4857 Error(getLoc(), "expected a closing square bracket"); 4858 return MatchOperand_ParseFail; 4859 } 4860 4861 if (!skipToken(AsmToken::Comma, "expected a comma")) 4862 return MatchOperand_ParseFail; 4863 } 4864 4865 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4866 return MatchOperand_Success; 4867 } 4868 4869 OperandMatchResultTy 4870 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4871 AMDGPUOperand::ImmTy ImmTy) { 4872 int64_t Bit = 0; 4873 SMLoc S = Parser.getTok().getLoc(); 4874 4875 // We are at the end of the statement, and this is a default argument, so 4876 // use a default value. 4877 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4878 switch(getLexer().getKind()) { 4879 case AsmToken::Identifier: { 4880 StringRef Tok = Parser.getTok().getString(); 4881 if (Tok == Name) { 4882 if (Tok == "r128" && !hasMIMG_R128()) 4883 Error(S, "r128 modifier is not supported on this GPU"); 4884 if (Tok == "a16" && !isGFX9() && !hasGFX10A16()) 4885 Error(S, "a16 modifier is not supported on this GPU"); 4886 Bit = 1; 4887 Parser.Lex(); 4888 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4889 Bit = 0; 4890 Parser.Lex(); 4891 } else { 4892 return MatchOperand_NoMatch; 4893 } 4894 break; 4895 } 4896 default: 4897 return MatchOperand_NoMatch; 4898 } 4899 } 4900 4901 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4902 return MatchOperand_ParseFail; 4903 4904 if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16) 4905 ImmTy = AMDGPUOperand::ImmTyR128A16; 4906 4907 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4908 return MatchOperand_Success; 4909 } 4910 4911 static void addOptionalImmOperand( 4912 MCInst& Inst, const OperandVector& Operands, 4913 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4914 AMDGPUOperand::ImmTy ImmT, 4915 int64_t Default = 0) { 4916 auto i = OptionalIdx.find(ImmT); 4917 if (i != OptionalIdx.end()) { 4918 unsigned Idx = i->second; 4919 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4920 } else { 4921 Inst.addOperand(MCOperand::createImm(Default)); 4922 } 4923 } 4924 4925 OperandMatchResultTy 4926 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4927 if (getLexer().isNot(AsmToken::Identifier)) { 4928 return MatchOperand_NoMatch; 4929 } 4930 StringRef Tok = Parser.getTok().getString(); 4931 if (Tok != Prefix) { 4932 return MatchOperand_NoMatch; 4933 } 4934 4935 Parser.Lex(); 4936 if (getLexer().isNot(AsmToken::Colon)) { 4937 return MatchOperand_ParseFail; 4938 } 4939 4940 Parser.Lex(); 4941 if (getLexer().isNot(AsmToken::Identifier)) { 4942 return MatchOperand_ParseFail; 4943 } 4944 4945 Value = Parser.getTok().getString(); 4946 return MatchOperand_Success; 4947 } 4948 4949 //===----------------------------------------------------------------------===// 4950 // MTBUF format 4951 //===----------------------------------------------------------------------===// 4952 4953 bool AMDGPUAsmParser::tryParseFmt(const char *Pref, 4954 int64_t MaxVal, 4955 int64_t &Fmt) { 4956 int64_t Val; 4957 SMLoc Loc = getLoc(); 4958 4959 auto Res = parseIntWithPrefix(Pref, Val); 4960 if (Res == MatchOperand_ParseFail) 4961 return false; 4962 if (Res == MatchOperand_NoMatch) 4963 return true; 4964 4965 if (Val < 0 || Val > MaxVal) { 4966 Error(Loc, Twine("out of range ", StringRef(Pref))); 4967 return false; 4968 } 4969 4970 Fmt = Val; 4971 return true; 4972 } 4973 4974 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4975 // values to live in a joint format operand in the MCInst encoding. 4976 OperandMatchResultTy 4977 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) { 4978 using namespace llvm::AMDGPU::MTBUFFormat; 4979 4980 int64_t Dfmt = DFMT_UNDEF; 4981 int64_t Nfmt = NFMT_UNDEF; 4982 4983 // dfmt and nfmt can appear in either order, and each is optional. 4984 for (int I = 0; I < 2; ++I) { 4985 if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt)) 4986 return MatchOperand_ParseFail; 4987 4988 if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) { 4989 return MatchOperand_ParseFail; 4990 } 4991 // Skip optional comma between dfmt/nfmt 4992 // but guard against 2 commas following each other. 4993 if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) && 4994 !peekToken().is(AsmToken::Comma)) { 4995 trySkipToken(AsmToken::Comma); 4996 } 4997 } 4998 4999 if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF) 5000 return MatchOperand_NoMatch; 5001 5002 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5003 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5004 5005 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5006 return MatchOperand_Success; 5007 } 5008 5009 OperandMatchResultTy 5010 AMDGPUAsmParser::parseUfmt(int64_t &Format) { 5011 using namespace llvm::AMDGPU::MTBUFFormat; 5012 5013 int64_t Fmt = UFMT_UNDEF; 5014 5015 if (!tryParseFmt("format", UFMT_MAX, Fmt)) 5016 return MatchOperand_ParseFail; 5017 5018 if (Fmt == UFMT_UNDEF) 5019 return MatchOperand_NoMatch; 5020 5021 Format = Fmt; 5022 return MatchOperand_Success; 5023 } 5024 5025 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt, 5026 int64_t &Nfmt, 5027 StringRef FormatStr, 5028 SMLoc Loc) { 5029 using namespace llvm::AMDGPU::MTBUFFormat; 5030 int64_t Format; 5031 5032 Format = getDfmt(FormatStr); 5033 if (Format != DFMT_UNDEF) { 5034 Dfmt = Format; 5035 return true; 5036 } 5037 5038 Format = getNfmt(FormatStr, getSTI()); 5039 if (Format != NFMT_UNDEF) { 5040 Nfmt = Format; 5041 return true; 5042 } 5043 5044 Error(Loc, "unsupported format"); 5045 return false; 5046 } 5047 5048 OperandMatchResultTy 5049 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr, 5050 SMLoc FormatLoc, 5051 int64_t &Format) { 5052 using namespace llvm::AMDGPU::MTBUFFormat; 5053 5054 int64_t Dfmt = DFMT_UNDEF; 5055 int64_t Nfmt = NFMT_UNDEF; 5056 if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc)) 5057 return MatchOperand_ParseFail; 5058 5059 if (trySkipToken(AsmToken::Comma)) { 5060 StringRef Str; 5061 SMLoc Loc = getLoc(); 5062 if (!parseId(Str, "expected a format string") || 5063 !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) { 5064 return MatchOperand_ParseFail; 5065 } 5066 if (Dfmt == DFMT_UNDEF) { 5067 Error(Loc, "duplicate numeric format"); 5068 return MatchOperand_ParseFail; 5069 } else if (Nfmt == NFMT_UNDEF) { 5070 Error(Loc, "duplicate data format"); 5071 return MatchOperand_ParseFail; 5072 } 5073 } 5074 5075 Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt; 5076 Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt; 5077 5078 if (isGFX10()) { 5079 auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt); 5080 if (Ufmt == UFMT_UNDEF) { 5081 Error(FormatLoc, "unsupported format"); 5082 return MatchOperand_ParseFail; 5083 } 5084 Format = Ufmt; 5085 } else { 5086 Format = encodeDfmtNfmt(Dfmt, Nfmt); 5087 } 5088 5089 return MatchOperand_Success; 5090 } 5091 5092 OperandMatchResultTy 5093 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr, 5094 SMLoc Loc, 5095 int64_t &Format) { 5096 using namespace llvm::AMDGPU::MTBUFFormat; 5097 5098 auto Id = getUnifiedFormat(FormatStr); 5099 if (Id == UFMT_UNDEF) 5100 return MatchOperand_NoMatch; 5101 5102 if (!isGFX10()) { 5103 Error(Loc, "unified format is not supported on this GPU"); 5104 return MatchOperand_ParseFail; 5105 } 5106 5107 Format = Id; 5108 return MatchOperand_Success; 5109 } 5110 5111 OperandMatchResultTy 5112 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) { 5113 using namespace llvm::AMDGPU::MTBUFFormat; 5114 SMLoc Loc = getLoc(); 5115 5116 if (!parseExpr(Format)) 5117 return MatchOperand_ParseFail; 5118 if (!isValidFormatEncoding(Format, getSTI())) { 5119 Error(Loc, "out of range format"); 5120 return MatchOperand_ParseFail; 5121 } 5122 5123 return MatchOperand_Success; 5124 } 5125 5126 OperandMatchResultTy 5127 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) { 5128 using namespace llvm::AMDGPU::MTBUFFormat; 5129 5130 if (!trySkipId("format", AsmToken::Colon)) 5131 return MatchOperand_NoMatch; 5132 5133 if (trySkipToken(AsmToken::LBrac)) { 5134 StringRef FormatStr; 5135 SMLoc Loc = getLoc(); 5136 if (!parseId(FormatStr, "expected a format string")) 5137 return MatchOperand_ParseFail; 5138 5139 auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format); 5140 if (Res == MatchOperand_NoMatch) 5141 Res = parseSymbolicSplitFormat(FormatStr, Loc, Format); 5142 if (Res != MatchOperand_Success) 5143 return Res; 5144 5145 if (!skipToken(AsmToken::RBrac, "expected a closing square bracket")) 5146 return MatchOperand_ParseFail; 5147 5148 return MatchOperand_Success; 5149 } 5150 5151 return parseNumericFormat(Format); 5152 } 5153 5154 OperandMatchResultTy 5155 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) { 5156 using namespace llvm::AMDGPU::MTBUFFormat; 5157 5158 int64_t Format = getDefaultFormatEncoding(getSTI()); 5159 OperandMatchResultTy Res; 5160 SMLoc Loc = getLoc(); 5161 5162 // Parse legacy format syntax. 5163 Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format); 5164 if (Res == MatchOperand_ParseFail) 5165 return Res; 5166 5167 bool FormatFound = (Res == MatchOperand_Success); 5168 5169 Operands.push_back( 5170 AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT)); 5171 5172 if (FormatFound) 5173 trySkipToken(AsmToken::Comma); 5174 5175 if (isToken(AsmToken::EndOfStatement)) { 5176 // We are expecting an soffset operand, 5177 // but let matcher handle the error. 5178 return MatchOperand_Success; 5179 } 5180 5181 // Parse soffset. 5182 Res = parseRegOrImm(Operands); 5183 if (Res != MatchOperand_Success) 5184 return Res; 5185 5186 trySkipToken(AsmToken::Comma); 5187 5188 if (!FormatFound) { 5189 Res = parseSymbolicOrNumericFormat(Format); 5190 if (Res == MatchOperand_ParseFail) 5191 return Res; 5192 if (Res == MatchOperand_Success) { 5193 auto Size = Operands.size(); 5194 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]); 5195 assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT); 5196 Op.setImm(Format); 5197 } 5198 return MatchOperand_Success; 5199 } 5200 5201 if (isId("format") && peekToken().is(AsmToken::Colon)) { 5202 Error(getLoc(), "duplicate format"); 5203 return MatchOperand_ParseFail; 5204 } 5205 return MatchOperand_Success; 5206 } 5207 5208 //===----------------------------------------------------------------------===// 5209 // ds 5210 //===----------------------------------------------------------------------===// 5211 5212 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 5213 const OperandVector &Operands) { 5214 OptionalImmIndexMap OptionalIdx; 5215 5216 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5217 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5218 5219 // Add the register arguments 5220 if (Op.isReg()) { 5221 Op.addRegOperands(Inst, 1); 5222 continue; 5223 } 5224 5225 // Handle optional arguments 5226 OptionalIdx[Op.getImmTy()] = i; 5227 } 5228 5229 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 5230 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 5231 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5232 5233 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5234 } 5235 5236 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 5237 bool IsGdsHardcoded) { 5238 OptionalImmIndexMap OptionalIdx; 5239 5240 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5241 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5242 5243 // Add the register arguments 5244 if (Op.isReg()) { 5245 Op.addRegOperands(Inst, 1); 5246 continue; 5247 } 5248 5249 if (Op.isToken() && Op.getToken() == "gds") { 5250 IsGdsHardcoded = true; 5251 continue; 5252 } 5253 5254 // Handle optional arguments 5255 OptionalIdx[Op.getImmTy()] = i; 5256 } 5257 5258 AMDGPUOperand::ImmTy OffsetType = 5259 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 5260 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 5261 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 5262 AMDGPUOperand::ImmTyOffset; 5263 5264 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 5265 5266 if (!IsGdsHardcoded) { 5267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 5268 } 5269 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 5270 } 5271 5272 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 5273 OptionalImmIndexMap OptionalIdx; 5274 5275 unsigned OperandIdx[4]; 5276 unsigned EnMask = 0; 5277 int SrcIdx = 0; 5278 5279 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5280 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5281 5282 // Add the register arguments 5283 if (Op.isReg()) { 5284 assert(SrcIdx < 4); 5285 OperandIdx[SrcIdx] = Inst.size(); 5286 Op.addRegOperands(Inst, 1); 5287 ++SrcIdx; 5288 continue; 5289 } 5290 5291 if (Op.isOff()) { 5292 assert(SrcIdx < 4); 5293 OperandIdx[SrcIdx] = Inst.size(); 5294 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 5295 ++SrcIdx; 5296 continue; 5297 } 5298 5299 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 5300 Op.addImmOperands(Inst, 1); 5301 continue; 5302 } 5303 5304 if (Op.isToken() && Op.getToken() == "done") 5305 continue; 5306 5307 // Handle optional arguments 5308 OptionalIdx[Op.getImmTy()] = i; 5309 } 5310 5311 assert(SrcIdx == 4); 5312 5313 bool Compr = false; 5314 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 5315 Compr = true; 5316 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 5317 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 5318 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 5319 } 5320 5321 for (auto i = 0; i < SrcIdx; ++i) { 5322 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 5323 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 5324 } 5325 } 5326 5327 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 5328 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 5329 5330 Inst.addOperand(MCOperand::createImm(EnMask)); 5331 } 5332 5333 //===----------------------------------------------------------------------===// 5334 // s_waitcnt 5335 //===----------------------------------------------------------------------===// 5336 5337 static bool 5338 encodeCnt( 5339 const AMDGPU::IsaVersion ISA, 5340 int64_t &IntVal, 5341 int64_t CntVal, 5342 bool Saturate, 5343 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 5344 unsigned (*decode)(const IsaVersion &Version, unsigned)) 5345 { 5346 bool Failed = false; 5347 5348 IntVal = encode(ISA, IntVal, CntVal); 5349 if (CntVal != decode(ISA, IntVal)) { 5350 if (Saturate) { 5351 IntVal = encode(ISA, IntVal, -1); 5352 } else { 5353 Failed = true; 5354 } 5355 } 5356 return Failed; 5357 } 5358 5359 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 5360 5361 SMLoc CntLoc = getLoc(); 5362 StringRef CntName = getTokenStr(); 5363 5364 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 5365 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 5366 return false; 5367 5368 int64_t CntVal; 5369 SMLoc ValLoc = getLoc(); 5370 if (!parseExpr(CntVal)) 5371 return false; 5372 5373 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5374 5375 bool Failed = true; 5376 bool Sat = CntName.endswith("_sat"); 5377 5378 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 5379 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 5380 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 5381 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 5382 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 5383 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 5384 } else { 5385 Error(CntLoc, "invalid counter name " + CntName); 5386 return false; 5387 } 5388 5389 if (Failed) { 5390 Error(ValLoc, "too large value for " + CntName); 5391 return false; 5392 } 5393 5394 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 5395 return false; 5396 5397 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 5398 if (isToken(AsmToken::EndOfStatement)) { 5399 Error(getLoc(), "expected a counter name"); 5400 return false; 5401 } 5402 } 5403 5404 return true; 5405 } 5406 5407 OperandMatchResultTy 5408 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 5409 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 5410 int64_t Waitcnt = getWaitcntBitMask(ISA); 5411 SMLoc S = getLoc(); 5412 5413 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 5414 while (!isToken(AsmToken::EndOfStatement)) { 5415 if (!parseCnt(Waitcnt)) 5416 return MatchOperand_ParseFail; 5417 } 5418 } else { 5419 if (!parseExpr(Waitcnt)) 5420 return MatchOperand_ParseFail; 5421 } 5422 5423 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 5424 return MatchOperand_Success; 5425 } 5426 5427 bool 5428 AMDGPUOperand::isSWaitCnt() const { 5429 return isImm(); 5430 } 5431 5432 //===----------------------------------------------------------------------===// 5433 // hwreg 5434 //===----------------------------------------------------------------------===// 5435 5436 bool 5437 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 5438 int64_t &Offset, 5439 int64_t &Width) { 5440 using namespace llvm::AMDGPU::Hwreg; 5441 5442 // The register may be specified by name or using a numeric code 5443 if (isToken(AsmToken::Identifier) && 5444 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 5445 HwReg.IsSymbolic = true; 5446 lex(); // skip message name 5447 } else if (!parseExpr(HwReg.Id)) { 5448 return false; 5449 } 5450 5451 if (trySkipToken(AsmToken::RParen)) 5452 return true; 5453 5454 // parse optional params 5455 return 5456 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 5457 parseExpr(Offset) && 5458 skipToken(AsmToken::Comma, "expected a comma") && 5459 parseExpr(Width) && 5460 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5461 } 5462 5463 bool 5464 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 5465 const int64_t Offset, 5466 const int64_t Width, 5467 const SMLoc Loc) { 5468 5469 using namespace llvm::AMDGPU::Hwreg; 5470 5471 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 5472 Error(Loc, "specified hardware register is not supported on this GPU"); 5473 return false; 5474 } else if (!isValidHwreg(HwReg.Id)) { 5475 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 5476 return false; 5477 } else if (!isValidHwregOffset(Offset)) { 5478 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 5479 return false; 5480 } else if (!isValidHwregWidth(Width)) { 5481 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 5482 return false; 5483 } 5484 return true; 5485 } 5486 5487 OperandMatchResultTy 5488 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5489 using namespace llvm::AMDGPU::Hwreg; 5490 5491 int64_t ImmVal = 0; 5492 SMLoc Loc = getLoc(); 5493 5494 if (trySkipId("hwreg", AsmToken::LParen)) { 5495 OperandInfoTy HwReg(ID_UNKNOWN_); 5496 int64_t Offset = OFFSET_DEFAULT_; 5497 int64_t Width = WIDTH_DEFAULT_; 5498 if (parseHwregBody(HwReg, Offset, Width) && 5499 validateHwreg(HwReg, Offset, Width, Loc)) { 5500 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5501 } else { 5502 return MatchOperand_ParseFail; 5503 } 5504 } else if (parseExpr(ImmVal)) { 5505 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5506 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5507 return MatchOperand_ParseFail; 5508 } 5509 } else { 5510 return MatchOperand_ParseFail; 5511 } 5512 5513 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5514 return MatchOperand_Success; 5515 } 5516 5517 bool AMDGPUOperand::isHwreg() const { 5518 return isImmTy(ImmTyHwreg); 5519 } 5520 5521 //===----------------------------------------------------------------------===// 5522 // sendmsg 5523 //===----------------------------------------------------------------------===// 5524 5525 bool 5526 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5527 OperandInfoTy &Op, 5528 OperandInfoTy &Stream) { 5529 using namespace llvm::AMDGPU::SendMsg; 5530 5531 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5532 Msg.IsSymbolic = true; 5533 lex(); // skip message name 5534 } else if (!parseExpr(Msg.Id)) { 5535 return false; 5536 } 5537 5538 if (trySkipToken(AsmToken::Comma)) { 5539 Op.IsDefined = true; 5540 if (isToken(AsmToken::Identifier) && 5541 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5542 lex(); // skip operation name 5543 } else if (!parseExpr(Op.Id)) { 5544 return false; 5545 } 5546 5547 if (trySkipToken(AsmToken::Comma)) { 5548 Stream.IsDefined = true; 5549 if (!parseExpr(Stream.Id)) 5550 return false; 5551 } 5552 } 5553 5554 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5555 } 5556 5557 bool 5558 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5559 const OperandInfoTy &Op, 5560 const OperandInfoTy &Stream, 5561 const SMLoc S) { 5562 using namespace llvm::AMDGPU::SendMsg; 5563 5564 // Validation strictness depends on whether message is specified 5565 // in a symbolc or in a numeric form. In the latter case 5566 // only encoding possibility is checked. 5567 bool Strict = Msg.IsSymbolic; 5568 5569 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5570 Error(S, "invalid message id"); 5571 return false; 5572 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5573 Error(S, Op.IsDefined ? 5574 "message does not support operations" : 5575 "missing message operation"); 5576 return false; 5577 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5578 Error(S, "invalid operation id"); 5579 return false; 5580 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5581 Error(S, "message operation does not support streams"); 5582 return false; 5583 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5584 Error(S, "invalid message stream id"); 5585 return false; 5586 } 5587 return true; 5588 } 5589 5590 OperandMatchResultTy 5591 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5592 using namespace llvm::AMDGPU::SendMsg; 5593 5594 int64_t ImmVal = 0; 5595 SMLoc Loc = getLoc(); 5596 5597 if (trySkipId("sendmsg", AsmToken::LParen)) { 5598 OperandInfoTy Msg(ID_UNKNOWN_); 5599 OperandInfoTy Op(OP_NONE_); 5600 OperandInfoTy Stream(STREAM_ID_NONE_); 5601 if (parseSendMsgBody(Msg, Op, Stream) && 5602 validateSendMsg(Msg, Op, Stream, Loc)) { 5603 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5604 } else { 5605 return MatchOperand_ParseFail; 5606 } 5607 } else if (parseExpr(ImmVal)) { 5608 if (ImmVal < 0 || !isUInt<16>(ImmVal)) { 5609 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5610 return MatchOperand_ParseFail; 5611 } 5612 } else { 5613 return MatchOperand_ParseFail; 5614 } 5615 5616 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5617 return MatchOperand_Success; 5618 } 5619 5620 bool AMDGPUOperand::isSendMsg() const { 5621 return isImmTy(ImmTySendMsg); 5622 } 5623 5624 //===----------------------------------------------------------------------===// 5625 // v_interp 5626 //===----------------------------------------------------------------------===// 5627 5628 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5629 if (getLexer().getKind() != AsmToken::Identifier) 5630 return MatchOperand_NoMatch; 5631 5632 StringRef Str = Parser.getTok().getString(); 5633 int Slot = StringSwitch<int>(Str) 5634 .Case("p10", 0) 5635 .Case("p20", 1) 5636 .Case("p0", 2) 5637 .Default(-1); 5638 5639 SMLoc S = Parser.getTok().getLoc(); 5640 if (Slot == -1) 5641 return MatchOperand_ParseFail; 5642 5643 Parser.Lex(); 5644 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5645 AMDGPUOperand::ImmTyInterpSlot)); 5646 return MatchOperand_Success; 5647 } 5648 5649 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5650 if (getLexer().getKind() != AsmToken::Identifier) 5651 return MatchOperand_NoMatch; 5652 5653 StringRef Str = Parser.getTok().getString(); 5654 if (!Str.startswith("attr")) 5655 return MatchOperand_NoMatch; 5656 5657 StringRef Chan = Str.take_back(2); 5658 int AttrChan = StringSwitch<int>(Chan) 5659 .Case(".x", 0) 5660 .Case(".y", 1) 5661 .Case(".z", 2) 5662 .Case(".w", 3) 5663 .Default(-1); 5664 if (AttrChan == -1) 5665 return MatchOperand_ParseFail; 5666 5667 Str = Str.drop_back(2).drop_front(4); 5668 5669 uint8_t Attr; 5670 if (Str.getAsInteger(10, Attr)) 5671 return MatchOperand_ParseFail; 5672 5673 SMLoc S = Parser.getTok().getLoc(); 5674 Parser.Lex(); 5675 if (Attr > 63) { 5676 Error(S, "out of bounds attr"); 5677 return MatchOperand_ParseFail; 5678 } 5679 5680 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5681 5682 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5683 AMDGPUOperand::ImmTyInterpAttr)); 5684 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5685 AMDGPUOperand::ImmTyAttrChan)); 5686 return MatchOperand_Success; 5687 } 5688 5689 //===----------------------------------------------------------------------===// 5690 // exp 5691 //===----------------------------------------------------------------------===// 5692 5693 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5694 uint8_t &Val) { 5695 if (Str == "null") { 5696 Val = 9; 5697 return MatchOperand_Success; 5698 } 5699 5700 if (Str.startswith("mrt")) { 5701 Str = Str.drop_front(3); 5702 if (Str == "z") { // == mrtz 5703 Val = 8; 5704 return MatchOperand_Success; 5705 } 5706 5707 if (Str.getAsInteger(10, Val)) 5708 return MatchOperand_ParseFail; 5709 5710 if (Val > 7) { 5711 Error(getLoc(), "invalid exp target"); 5712 return MatchOperand_ParseFail; 5713 } 5714 5715 return MatchOperand_Success; 5716 } 5717 5718 if (Str.startswith("pos")) { 5719 Str = Str.drop_front(3); 5720 if (Str.getAsInteger(10, Val)) 5721 return MatchOperand_ParseFail; 5722 5723 if (Val > 4 || (Val == 4 && !isGFX10())) { 5724 Error(getLoc(), "invalid exp target"); 5725 return MatchOperand_ParseFail; 5726 } 5727 5728 Val += 12; 5729 return MatchOperand_Success; 5730 } 5731 5732 if (isGFX10() && Str == "prim") { 5733 Val = 20; 5734 return MatchOperand_Success; 5735 } 5736 5737 if (Str.startswith("param")) { 5738 Str = Str.drop_front(5); 5739 if (Str.getAsInteger(10, Val)) 5740 return MatchOperand_ParseFail; 5741 5742 if (Val >= 32) { 5743 Error(getLoc(), "invalid exp target"); 5744 return MatchOperand_ParseFail; 5745 } 5746 5747 Val += 32; 5748 return MatchOperand_Success; 5749 } 5750 5751 if (Str.startswith("invalid_target_")) { 5752 Str = Str.drop_front(15); 5753 if (Str.getAsInteger(10, Val)) 5754 return MatchOperand_ParseFail; 5755 5756 Error(getLoc(), "invalid exp target"); 5757 return MatchOperand_ParseFail; 5758 } 5759 5760 return MatchOperand_NoMatch; 5761 } 5762 5763 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5764 uint8_t Val; 5765 StringRef Str = Parser.getTok().getString(); 5766 5767 auto Res = parseExpTgtImpl(Str, Val); 5768 if (Res != MatchOperand_Success) 5769 return Res; 5770 5771 SMLoc S = Parser.getTok().getLoc(); 5772 Parser.Lex(); 5773 5774 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5775 AMDGPUOperand::ImmTyExpTgt)); 5776 return MatchOperand_Success; 5777 } 5778 5779 //===----------------------------------------------------------------------===// 5780 // parser helpers 5781 //===----------------------------------------------------------------------===// 5782 5783 bool 5784 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5785 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5786 } 5787 5788 bool 5789 AMDGPUAsmParser::isId(const StringRef Id) const { 5790 return isId(getToken(), Id); 5791 } 5792 5793 bool 5794 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5795 return getTokenKind() == Kind; 5796 } 5797 5798 bool 5799 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5800 if (isId(Id)) { 5801 lex(); 5802 return true; 5803 } 5804 return false; 5805 } 5806 5807 bool 5808 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5809 if (isId(Id) && peekToken().is(Kind)) { 5810 lex(); 5811 lex(); 5812 return true; 5813 } 5814 return false; 5815 } 5816 5817 bool 5818 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5819 if (isToken(Kind)) { 5820 lex(); 5821 return true; 5822 } 5823 return false; 5824 } 5825 5826 bool 5827 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5828 const StringRef ErrMsg) { 5829 if (!trySkipToken(Kind)) { 5830 Error(getLoc(), ErrMsg); 5831 return false; 5832 } 5833 return true; 5834 } 5835 5836 bool 5837 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5838 return !getParser().parseAbsoluteExpression(Imm); 5839 } 5840 5841 bool 5842 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5843 SMLoc S = getLoc(); 5844 5845 const MCExpr *Expr; 5846 if (Parser.parseExpression(Expr)) 5847 return false; 5848 5849 int64_t IntVal; 5850 if (Expr->evaluateAsAbsolute(IntVal)) { 5851 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5852 } else { 5853 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5854 } 5855 return true; 5856 } 5857 5858 bool 5859 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5860 if (isToken(AsmToken::String)) { 5861 Val = getToken().getStringContents(); 5862 lex(); 5863 return true; 5864 } else { 5865 Error(getLoc(), ErrMsg); 5866 return false; 5867 } 5868 } 5869 5870 bool 5871 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) { 5872 if (isToken(AsmToken::Identifier)) { 5873 Val = getTokenStr(); 5874 lex(); 5875 return true; 5876 } else { 5877 Error(getLoc(), ErrMsg); 5878 return false; 5879 } 5880 } 5881 5882 AsmToken 5883 AMDGPUAsmParser::getToken() const { 5884 return Parser.getTok(); 5885 } 5886 5887 AsmToken 5888 AMDGPUAsmParser::peekToken() { 5889 return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok(); 5890 } 5891 5892 void 5893 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5894 auto TokCount = getLexer().peekTokens(Tokens); 5895 5896 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5897 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5898 } 5899 5900 AsmToken::TokenKind 5901 AMDGPUAsmParser::getTokenKind() const { 5902 return getLexer().getKind(); 5903 } 5904 5905 SMLoc 5906 AMDGPUAsmParser::getLoc() const { 5907 return getToken().getLoc(); 5908 } 5909 5910 StringRef 5911 AMDGPUAsmParser::getTokenStr() const { 5912 return getToken().getString(); 5913 } 5914 5915 void 5916 AMDGPUAsmParser::lex() { 5917 Parser.Lex(); 5918 } 5919 5920 //===----------------------------------------------------------------------===// 5921 // swizzle 5922 //===----------------------------------------------------------------------===// 5923 5924 LLVM_READNONE 5925 static unsigned 5926 encodeBitmaskPerm(const unsigned AndMask, 5927 const unsigned OrMask, 5928 const unsigned XorMask) { 5929 using namespace llvm::AMDGPU::Swizzle; 5930 5931 return BITMASK_PERM_ENC | 5932 (AndMask << BITMASK_AND_SHIFT) | 5933 (OrMask << BITMASK_OR_SHIFT) | 5934 (XorMask << BITMASK_XOR_SHIFT); 5935 } 5936 5937 bool 5938 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5939 const unsigned MinVal, 5940 const unsigned MaxVal, 5941 const StringRef ErrMsg) { 5942 for (unsigned i = 0; i < OpNum; ++i) { 5943 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5944 return false; 5945 } 5946 SMLoc ExprLoc = Parser.getTok().getLoc(); 5947 if (!parseExpr(Op[i])) { 5948 return false; 5949 } 5950 if (Op[i] < MinVal || Op[i] > MaxVal) { 5951 Error(ExprLoc, ErrMsg); 5952 return false; 5953 } 5954 } 5955 5956 return true; 5957 } 5958 5959 bool 5960 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5961 using namespace llvm::AMDGPU::Swizzle; 5962 5963 int64_t Lane[LANE_NUM]; 5964 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5965 "expected a 2-bit lane id")) { 5966 Imm = QUAD_PERM_ENC; 5967 for (unsigned I = 0; I < LANE_NUM; ++I) { 5968 Imm |= Lane[I] << (LANE_SHIFT * I); 5969 } 5970 return true; 5971 } 5972 return false; 5973 } 5974 5975 bool 5976 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5977 using namespace llvm::AMDGPU::Swizzle; 5978 5979 SMLoc S = Parser.getTok().getLoc(); 5980 int64_t GroupSize; 5981 int64_t LaneIdx; 5982 5983 if (!parseSwizzleOperands(1, &GroupSize, 5984 2, 32, 5985 "group size must be in the interval [2,32]")) { 5986 return false; 5987 } 5988 if (!isPowerOf2_64(GroupSize)) { 5989 Error(S, "group size must be a power of two"); 5990 return false; 5991 } 5992 if (parseSwizzleOperands(1, &LaneIdx, 5993 0, GroupSize - 1, 5994 "lane id must be in the interval [0,group size - 1]")) { 5995 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5996 return true; 5997 } 5998 return false; 5999 } 6000 6001 bool 6002 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 6003 using namespace llvm::AMDGPU::Swizzle; 6004 6005 SMLoc S = Parser.getTok().getLoc(); 6006 int64_t GroupSize; 6007 6008 if (!parseSwizzleOperands(1, &GroupSize, 6009 2, 32, "group size must be in the interval [2,32]")) { 6010 return false; 6011 } 6012 if (!isPowerOf2_64(GroupSize)) { 6013 Error(S, "group size must be a power of two"); 6014 return false; 6015 } 6016 6017 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 6018 return true; 6019 } 6020 6021 bool 6022 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 6023 using namespace llvm::AMDGPU::Swizzle; 6024 6025 SMLoc S = Parser.getTok().getLoc(); 6026 int64_t GroupSize; 6027 6028 if (!parseSwizzleOperands(1, &GroupSize, 6029 1, 16, "group size must be in the interval [1,16]")) { 6030 return false; 6031 } 6032 if (!isPowerOf2_64(GroupSize)) { 6033 Error(S, "group size must be a power of two"); 6034 return false; 6035 } 6036 6037 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 6038 return true; 6039 } 6040 6041 bool 6042 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 6043 using namespace llvm::AMDGPU::Swizzle; 6044 6045 if (!skipToken(AsmToken::Comma, "expected a comma")) { 6046 return false; 6047 } 6048 6049 StringRef Ctl; 6050 SMLoc StrLoc = Parser.getTok().getLoc(); 6051 if (!parseString(Ctl)) { 6052 return false; 6053 } 6054 if (Ctl.size() != BITMASK_WIDTH) { 6055 Error(StrLoc, "expected a 5-character mask"); 6056 return false; 6057 } 6058 6059 unsigned AndMask = 0; 6060 unsigned OrMask = 0; 6061 unsigned XorMask = 0; 6062 6063 for (size_t i = 0; i < Ctl.size(); ++i) { 6064 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 6065 switch(Ctl[i]) { 6066 default: 6067 Error(StrLoc, "invalid mask"); 6068 return false; 6069 case '0': 6070 break; 6071 case '1': 6072 OrMask |= Mask; 6073 break; 6074 case 'p': 6075 AndMask |= Mask; 6076 break; 6077 case 'i': 6078 AndMask |= Mask; 6079 XorMask |= Mask; 6080 break; 6081 } 6082 } 6083 6084 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 6085 return true; 6086 } 6087 6088 bool 6089 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 6090 6091 SMLoc OffsetLoc = Parser.getTok().getLoc(); 6092 6093 if (!parseExpr(Imm)) { 6094 return false; 6095 } 6096 if (!isUInt<16>(Imm)) { 6097 Error(OffsetLoc, "expected a 16-bit offset"); 6098 return false; 6099 } 6100 return true; 6101 } 6102 6103 bool 6104 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 6105 using namespace llvm::AMDGPU::Swizzle; 6106 6107 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 6108 6109 SMLoc ModeLoc = Parser.getTok().getLoc(); 6110 bool Ok = false; 6111 6112 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 6113 Ok = parseSwizzleQuadPerm(Imm); 6114 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 6115 Ok = parseSwizzleBitmaskPerm(Imm); 6116 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 6117 Ok = parseSwizzleBroadcast(Imm); 6118 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 6119 Ok = parseSwizzleSwap(Imm); 6120 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 6121 Ok = parseSwizzleReverse(Imm); 6122 } else { 6123 Error(ModeLoc, "expected a swizzle mode"); 6124 } 6125 6126 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 6127 } 6128 6129 return false; 6130 } 6131 6132 OperandMatchResultTy 6133 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 6134 SMLoc S = Parser.getTok().getLoc(); 6135 int64_t Imm = 0; 6136 6137 if (trySkipId("offset")) { 6138 6139 bool Ok = false; 6140 if (skipToken(AsmToken::Colon, "expected a colon")) { 6141 if (trySkipId("swizzle")) { 6142 Ok = parseSwizzleMacro(Imm); 6143 } else { 6144 Ok = parseSwizzleOffset(Imm); 6145 } 6146 } 6147 6148 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 6149 6150 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 6151 } else { 6152 // Swizzle "offset" operand is optional. 6153 // If it is omitted, try parsing other optional operands. 6154 return parseOptionalOpr(Operands); 6155 } 6156 } 6157 6158 bool 6159 AMDGPUOperand::isSwizzle() const { 6160 return isImmTy(ImmTySwizzle); 6161 } 6162 6163 //===----------------------------------------------------------------------===// 6164 // VGPR Index Mode 6165 //===----------------------------------------------------------------------===// 6166 6167 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 6168 6169 using namespace llvm::AMDGPU::VGPRIndexMode; 6170 6171 if (trySkipToken(AsmToken::RParen)) { 6172 return OFF; 6173 } 6174 6175 int64_t Imm = 0; 6176 6177 while (true) { 6178 unsigned Mode = 0; 6179 SMLoc S = Parser.getTok().getLoc(); 6180 6181 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 6182 if (trySkipId(IdSymbolic[ModeId])) { 6183 Mode = 1 << ModeId; 6184 break; 6185 } 6186 } 6187 6188 if (Mode == 0) { 6189 Error(S, (Imm == 0)? 6190 "expected a VGPR index mode or a closing parenthesis" : 6191 "expected a VGPR index mode"); 6192 return UNDEF; 6193 } 6194 6195 if (Imm & Mode) { 6196 Error(S, "duplicate VGPR index mode"); 6197 return UNDEF; 6198 } 6199 Imm |= Mode; 6200 6201 if (trySkipToken(AsmToken::RParen)) 6202 break; 6203 if (!skipToken(AsmToken::Comma, 6204 "expected a comma or a closing parenthesis")) 6205 return UNDEF; 6206 } 6207 6208 return Imm; 6209 } 6210 6211 OperandMatchResultTy 6212 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 6213 6214 using namespace llvm::AMDGPU::VGPRIndexMode; 6215 6216 int64_t Imm = 0; 6217 SMLoc S = Parser.getTok().getLoc(); 6218 6219 if (getLexer().getKind() == AsmToken::Identifier && 6220 Parser.getTok().getString() == "gpr_idx" && 6221 getLexer().peekTok().is(AsmToken::LParen)) { 6222 6223 Parser.Lex(); 6224 Parser.Lex(); 6225 6226 Imm = parseGPRIdxMacro(); 6227 if (Imm == UNDEF) 6228 return MatchOperand_ParseFail; 6229 6230 } else { 6231 if (getParser().parseAbsoluteExpression(Imm)) 6232 return MatchOperand_ParseFail; 6233 if (Imm < 0 || !isUInt<4>(Imm)) { 6234 Error(S, "invalid immediate: only 4-bit values are legal"); 6235 return MatchOperand_ParseFail; 6236 } 6237 } 6238 6239 Operands.push_back( 6240 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 6241 return MatchOperand_Success; 6242 } 6243 6244 bool AMDGPUOperand::isGPRIdxMode() const { 6245 return isImmTy(ImmTyGprIdxMode); 6246 } 6247 6248 //===----------------------------------------------------------------------===// 6249 // sopp branch targets 6250 //===----------------------------------------------------------------------===// 6251 6252 OperandMatchResultTy 6253 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 6254 6255 // Make sure we are not parsing something 6256 // that looks like a label or an expression but is not. 6257 // This will improve error messages. 6258 if (isRegister() || isModifier()) 6259 return MatchOperand_NoMatch; 6260 6261 if (!parseExpr(Operands)) 6262 return MatchOperand_ParseFail; 6263 6264 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 6265 assert(Opr.isImm() || Opr.isExpr()); 6266 SMLoc Loc = Opr.getStartLoc(); 6267 6268 // Currently we do not support arbitrary expressions as branch targets. 6269 // Only labels and absolute expressions are accepted. 6270 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 6271 Error(Loc, "expected an absolute expression or a label"); 6272 } else if (Opr.isImm() && !Opr.isS16Imm()) { 6273 Error(Loc, "expected a 16-bit signed jump offset"); 6274 } 6275 6276 return MatchOperand_Success; 6277 } 6278 6279 //===----------------------------------------------------------------------===// 6280 // Boolean holding registers 6281 //===----------------------------------------------------------------------===// 6282 6283 OperandMatchResultTy 6284 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 6285 return parseReg(Operands); 6286 } 6287 6288 //===----------------------------------------------------------------------===// 6289 // mubuf 6290 //===----------------------------------------------------------------------===// 6291 6292 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 6293 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 6294 } 6295 6296 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 6297 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 6298 } 6299 6300 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 6301 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 6302 } 6303 6304 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 6305 const OperandVector &Operands, 6306 bool IsAtomic, 6307 bool IsAtomicReturn, 6308 bool IsLds) { 6309 bool IsLdsOpcode = IsLds; 6310 bool HasLdsModifier = false; 6311 OptionalImmIndexMap OptionalIdx; 6312 assert(IsAtomicReturn ? IsAtomic : true); 6313 unsigned FirstOperandIdx = 1; 6314 6315 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 6316 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6317 6318 // Add the register arguments 6319 if (Op.isReg()) { 6320 Op.addRegOperands(Inst, 1); 6321 // Insert a tied src for atomic return dst. 6322 // This cannot be postponed as subsequent calls to 6323 // addImmOperands rely on correct number of MC operands. 6324 if (IsAtomicReturn && i == FirstOperandIdx) 6325 Op.addRegOperands(Inst, 1); 6326 continue; 6327 } 6328 6329 // Handle the case where soffset is an immediate 6330 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6331 Op.addImmOperands(Inst, 1); 6332 continue; 6333 } 6334 6335 HasLdsModifier |= Op.isLDS(); 6336 6337 // Handle tokens like 'offen' which are sometimes hard-coded into the 6338 // asm string. There are no MCInst operands for these. 6339 if (Op.isToken()) { 6340 continue; 6341 } 6342 assert(Op.isImm()); 6343 6344 // Handle optional arguments 6345 OptionalIdx[Op.getImmTy()] = i; 6346 } 6347 6348 // This is a workaround for an llvm quirk which may result in an 6349 // incorrect instruction selection. Lds and non-lds versions of 6350 // MUBUF instructions are identical except that lds versions 6351 // have mandatory 'lds' modifier. However this modifier follows 6352 // optional modifiers and llvm asm matcher regards this 'lds' 6353 // modifier as an optional one. As a result, an lds version 6354 // of opcode may be selected even if it has no 'lds' modifier. 6355 if (IsLdsOpcode && !HasLdsModifier) { 6356 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 6357 if (NoLdsOpcode != -1) { // Got lds version - correct it. 6358 Inst.setOpcode(NoLdsOpcode); 6359 IsLdsOpcode = false; 6360 } 6361 } 6362 6363 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 6364 if (!IsAtomic) { // glc is hard-coded. 6365 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6366 } 6367 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6368 6369 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 6370 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6371 } 6372 6373 if (isGFX10()) 6374 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6375 } 6376 6377 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 6378 OptionalImmIndexMap OptionalIdx; 6379 6380 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 6381 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 6382 6383 // Add the register arguments 6384 if (Op.isReg()) { 6385 Op.addRegOperands(Inst, 1); 6386 continue; 6387 } 6388 6389 // Handle the case where soffset is an immediate 6390 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 6391 Op.addImmOperands(Inst, 1); 6392 continue; 6393 } 6394 6395 // Handle tokens like 'offen' which are sometimes hard-coded into the 6396 // asm string. There are no MCInst operands for these. 6397 if (Op.isToken()) { 6398 continue; 6399 } 6400 assert(Op.isImm()); 6401 6402 // Handle optional arguments 6403 OptionalIdx[Op.getImmTy()] = i; 6404 } 6405 6406 addOptionalImmOperand(Inst, Operands, OptionalIdx, 6407 AMDGPUOperand::ImmTyOffset); 6408 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 6409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6410 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6411 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6412 6413 if (isGFX10()) 6414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6415 } 6416 6417 //===----------------------------------------------------------------------===// 6418 // mimg 6419 //===----------------------------------------------------------------------===// 6420 6421 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 6422 bool IsAtomic) { 6423 unsigned I = 1; 6424 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6425 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6426 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6427 } 6428 6429 if (IsAtomic) { 6430 // Add src, same as dst 6431 assert(Desc.getNumDefs() == 1); 6432 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 6433 } 6434 6435 OptionalImmIndexMap OptionalIdx; 6436 6437 for (unsigned E = Operands.size(); I != E; ++I) { 6438 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6439 6440 // Add the register arguments 6441 if (Op.isReg()) { 6442 Op.addRegOperands(Inst, 1); 6443 } else if (Op.isImmModifier()) { 6444 OptionalIdx[Op.getImmTy()] = I; 6445 } else if (!Op.isToken()) { 6446 llvm_unreachable("unexpected operand type"); 6447 } 6448 } 6449 6450 bool IsGFX10 = isGFX10(); 6451 6452 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 6453 if (IsGFX10) 6454 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 6455 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 6456 if (IsGFX10) 6457 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 6458 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 6459 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 6460 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 6461 if (IsGFX10) 6462 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16); 6463 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 6464 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 6465 if (!IsGFX10) 6466 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 6467 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 6468 } 6469 6470 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 6471 cvtMIMG(Inst, Operands, true); 6472 } 6473 6474 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst, 6475 const OperandVector &Operands) { 6476 for (unsigned I = 1; I < Operands.size(); ++I) { 6477 auto &Operand = (AMDGPUOperand &)*Operands[I]; 6478 if (Operand.isReg()) 6479 Operand.addRegOperands(Inst, 1); 6480 } 6481 6482 Inst.addOperand(MCOperand::createImm(1)); // a16 6483 } 6484 6485 //===----------------------------------------------------------------------===// 6486 // smrd 6487 //===----------------------------------------------------------------------===// 6488 6489 bool AMDGPUOperand::isSMRDOffset8() const { 6490 return isImm() && isUInt<8>(getImm()); 6491 } 6492 6493 bool AMDGPUOperand::isSMEMOffset() const { 6494 return isImm(); // Offset range is checked later by validator. 6495 } 6496 6497 bool AMDGPUOperand::isSMRDLiteralOffset() const { 6498 // 32-bit literals are only supported on CI and we only want to use them 6499 // when the offset is > 8-bits. 6500 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 6501 } 6502 6503 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 6504 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6505 } 6506 6507 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const { 6508 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6509 } 6510 6511 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 6512 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6513 } 6514 6515 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 6516 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 6517 } 6518 6519 //===----------------------------------------------------------------------===// 6520 // vop3 6521 //===----------------------------------------------------------------------===// 6522 6523 static bool ConvertOmodMul(int64_t &Mul) { 6524 if (Mul != 1 && Mul != 2 && Mul != 4) 6525 return false; 6526 6527 Mul >>= 1; 6528 return true; 6529 } 6530 6531 static bool ConvertOmodDiv(int64_t &Div) { 6532 if (Div == 1) { 6533 Div = 0; 6534 return true; 6535 } 6536 6537 if (Div == 2) { 6538 Div = 3; 6539 return true; 6540 } 6541 6542 return false; 6543 } 6544 6545 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6546 if (BoundCtrl == 0) { 6547 BoundCtrl = 1; 6548 return true; 6549 } 6550 6551 if (BoundCtrl == -1) { 6552 BoundCtrl = 0; 6553 return true; 6554 } 6555 6556 return false; 6557 } 6558 6559 // Note: the order in this table matches the order of operands in AsmString. 6560 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6561 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6562 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6563 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6564 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6565 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6566 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6567 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6568 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6569 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6570 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6571 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6572 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6573 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6574 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6575 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6576 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6577 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6578 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6579 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6580 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6581 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6582 {"a16", AMDGPUOperand::ImmTyA16, true, nullptr}, 6583 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6584 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6585 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6586 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6587 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6588 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6589 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6590 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6591 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6592 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6593 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6594 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6595 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6596 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6597 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6598 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6599 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6600 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6601 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6602 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6603 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6604 }; 6605 6606 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6607 6608 OperandMatchResultTy res = parseOptionalOpr(Operands); 6609 6610 // This is a hack to enable hardcoded mandatory operands which follow 6611 // optional operands. 6612 // 6613 // Current design assumes that all operands after the first optional operand 6614 // are also optional. However implementation of some instructions violates 6615 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6616 // 6617 // To alleviate this problem, we have to (implicitly) parse extra operands 6618 // to make sure autogenerated parser of custom operands never hit hardcoded 6619 // mandatory operands. 6620 6621 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6622 if (res != MatchOperand_Success || 6623 isToken(AsmToken::EndOfStatement)) 6624 break; 6625 6626 trySkipToken(AsmToken::Comma); 6627 res = parseOptionalOpr(Operands); 6628 } 6629 6630 return res; 6631 } 6632 6633 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6634 OperandMatchResultTy res; 6635 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6636 // try to parse any optional operand here 6637 if (Op.IsBit) { 6638 res = parseNamedBit(Op.Name, Operands, Op.Type); 6639 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6640 res = parseOModOperand(Operands); 6641 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6642 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6643 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6644 res = parseSDWASel(Operands, Op.Name, Op.Type); 6645 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6646 res = parseSDWADstUnused(Operands); 6647 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6648 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6649 Op.Type == AMDGPUOperand::ImmTyNegLo || 6650 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6651 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6652 Op.ConvertResult); 6653 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6654 res = parseDim(Operands); 6655 } else { 6656 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6657 } 6658 if (res != MatchOperand_NoMatch) { 6659 return res; 6660 } 6661 } 6662 return MatchOperand_NoMatch; 6663 } 6664 6665 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6666 StringRef Name = Parser.getTok().getString(); 6667 if (Name == "mul") { 6668 return parseIntWithPrefix("mul", Operands, 6669 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6670 } 6671 6672 if (Name == "div") { 6673 return parseIntWithPrefix("div", Operands, 6674 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6675 } 6676 6677 return MatchOperand_NoMatch; 6678 } 6679 6680 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6681 cvtVOP3P(Inst, Operands); 6682 6683 int Opc = Inst.getOpcode(); 6684 6685 int SrcNum; 6686 const int Ops[] = { AMDGPU::OpName::src0, 6687 AMDGPU::OpName::src1, 6688 AMDGPU::OpName::src2 }; 6689 for (SrcNum = 0; 6690 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6691 ++SrcNum); 6692 assert(SrcNum > 0); 6693 6694 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6695 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6696 6697 if ((OpSel & (1 << SrcNum)) != 0) { 6698 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6699 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6700 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6701 } 6702 } 6703 6704 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6705 // 1. This operand is input modifiers 6706 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6707 // 2. This is not last operand 6708 && Desc.NumOperands > (OpNum + 1) 6709 // 3. Next operand is register class 6710 && Desc.OpInfo[OpNum + 1].RegClass != -1 6711 // 4. Next register is not tied to any other operand 6712 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6713 } 6714 6715 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6716 { 6717 OptionalImmIndexMap OptionalIdx; 6718 unsigned Opc = Inst.getOpcode(); 6719 6720 unsigned I = 1; 6721 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6722 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6723 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6724 } 6725 6726 for (unsigned E = Operands.size(); I != E; ++I) { 6727 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6728 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6729 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6730 } else if (Op.isInterpSlot() || 6731 Op.isInterpAttr() || 6732 Op.isAttrChan()) { 6733 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6734 } else if (Op.isImmModifier()) { 6735 OptionalIdx[Op.getImmTy()] = I; 6736 } else { 6737 llvm_unreachable("unhandled operand type"); 6738 } 6739 } 6740 6741 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6743 } 6744 6745 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6746 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6747 } 6748 6749 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6750 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6751 } 6752 } 6753 6754 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6755 OptionalImmIndexMap &OptionalIdx) { 6756 unsigned Opc = Inst.getOpcode(); 6757 6758 unsigned I = 1; 6759 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6760 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6761 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6762 } 6763 6764 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6765 // This instruction has src modifiers 6766 for (unsigned E = Operands.size(); I != E; ++I) { 6767 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6768 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6769 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6770 } else if (Op.isImmModifier()) { 6771 OptionalIdx[Op.getImmTy()] = I; 6772 } else if (Op.isRegOrImm()) { 6773 Op.addRegOrImmOperands(Inst, 1); 6774 } else { 6775 llvm_unreachable("unhandled operand type"); 6776 } 6777 } 6778 } else { 6779 // No src modifiers 6780 for (unsigned E = Operands.size(); I != E; ++I) { 6781 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6782 if (Op.isMod()) { 6783 OptionalIdx[Op.getImmTy()] = I; 6784 } else { 6785 Op.addRegOrImmOperands(Inst, 1); 6786 } 6787 } 6788 } 6789 6790 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6791 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6792 } 6793 6794 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6795 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6796 } 6797 6798 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6799 // it has src2 register operand that is tied to dst operand 6800 // we don't allow modifiers for this operand in assembler so src2_modifiers 6801 // should be 0. 6802 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6803 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6804 Opc == AMDGPU::V_MAC_F32_e64_vi || 6805 Opc == AMDGPU::V_MAC_F16_e64_vi || 6806 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6807 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6808 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6809 auto it = Inst.begin(); 6810 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6811 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6812 ++it; 6813 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6814 } 6815 } 6816 6817 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6818 OptionalImmIndexMap OptionalIdx; 6819 cvtVOP3(Inst, Operands, OptionalIdx); 6820 } 6821 6822 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6823 const OperandVector &Operands) { 6824 OptionalImmIndexMap OptIdx; 6825 const int Opc = Inst.getOpcode(); 6826 const MCInstrDesc &Desc = MII.get(Opc); 6827 6828 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6829 6830 cvtVOP3(Inst, Operands, OptIdx); 6831 6832 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6833 assert(!IsPacked); 6834 Inst.addOperand(Inst.getOperand(0)); 6835 } 6836 6837 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6838 // instruction, and then figure out where to actually put the modifiers 6839 6840 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6841 6842 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6843 if (OpSelHiIdx != -1) { 6844 int DefaultVal = IsPacked ? -1 : 0; 6845 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6846 DefaultVal); 6847 } 6848 6849 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6850 if (NegLoIdx != -1) { 6851 assert(IsPacked); 6852 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6853 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6854 } 6855 6856 const int Ops[] = { AMDGPU::OpName::src0, 6857 AMDGPU::OpName::src1, 6858 AMDGPU::OpName::src2 }; 6859 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6860 AMDGPU::OpName::src1_modifiers, 6861 AMDGPU::OpName::src2_modifiers }; 6862 6863 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6864 6865 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6866 unsigned OpSelHi = 0; 6867 unsigned NegLo = 0; 6868 unsigned NegHi = 0; 6869 6870 if (OpSelHiIdx != -1) { 6871 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6872 } 6873 6874 if (NegLoIdx != -1) { 6875 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6876 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6877 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6878 } 6879 6880 for (int J = 0; J < 3; ++J) { 6881 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6882 if (OpIdx == -1) 6883 break; 6884 6885 uint32_t ModVal = 0; 6886 6887 if ((OpSel & (1 << J)) != 0) 6888 ModVal |= SISrcMods::OP_SEL_0; 6889 6890 if ((OpSelHi & (1 << J)) != 0) 6891 ModVal |= SISrcMods::OP_SEL_1; 6892 6893 if ((NegLo & (1 << J)) != 0) 6894 ModVal |= SISrcMods::NEG; 6895 6896 if ((NegHi & (1 << J)) != 0) 6897 ModVal |= SISrcMods::NEG_HI; 6898 6899 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6900 6901 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6902 } 6903 } 6904 6905 //===----------------------------------------------------------------------===// 6906 // dpp 6907 //===----------------------------------------------------------------------===// 6908 6909 bool AMDGPUOperand::isDPP8() const { 6910 return isImmTy(ImmTyDPP8); 6911 } 6912 6913 bool AMDGPUOperand::isDPPCtrl() const { 6914 using namespace AMDGPU::DPP; 6915 6916 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6917 if (result) { 6918 int64_t Imm = getImm(); 6919 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6920 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6921 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6922 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6923 (Imm == DppCtrl::WAVE_SHL1) || 6924 (Imm == DppCtrl::WAVE_ROL1) || 6925 (Imm == DppCtrl::WAVE_SHR1) || 6926 (Imm == DppCtrl::WAVE_ROR1) || 6927 (Imm == DppCtrl::ROW_MIRROR) || 6928 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6929 (Imm == DppCtrl::BCAST15) || 6930 (Imm == DppCtrl::BCAST31) || 6931 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6932 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6933 } 6934 return false; 6935 } 6936 6937 //===----------------------------------------------------------------------===// 6938 // mAI 6939 //===----------------------------------------------------------------------===// 6940 6941 bool AMDGPUOperand::isBLGP() const { 6942 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6943 } 6944 6945 bool AMDGPUOperand::isCBSZ() const { 6946 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6947 } 6948 6949 bool AMDGPUOperand::isABID() const { 6950 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6951 } 6952 6953 bool AMDGPUOperand::isS16Imm() const { 6954 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6955 } 6956 6957 bool AMDGPUOperand::isU16Imm() const { 6958 return isImm() && isUInt<16>(getImm()); 6959 } 6960 6961 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6962 if (!isGFX10()) 6963 return MatchOperand_NoMatch; 6964 6965 SMLoc S = Parser.getTok().getLoc(); 6966 6967 if (getLexer().isNot(AsmToken::Identifier)) 6968 return MatchOperand_NoMatch; 6969 if (getLexer().getTok().getString() != "dim") 6970 return MatchOperand_NoMatch; 6971 6972 Parser.Lex(); 6973 if (getLexer().isNot(AsmToken::Colon)) 6974 return MatchOperand_ParseFail; 6975 6976 Parser.Lex(); 6977 6978 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6979 // integer. 6980 std::string Token; 6981 if (getLexer().is(AsmToken::Integer)) { 6982 SMLoc Loc = getLexer().getTok().getEndLoc(); 6983 Token = std::string(getLexer().getTok().getString()); 6984 Parser.Lex(); 6985 if (getLexer().getTok().getLoc() != Loc) 6986 return MatchOperand_ParseFail; 6987 } 6988 if (getLexer().isNot(AsmToken::Identifier)) 6989 return MatchOperand_ParseFail; 6990 Token += getLexer().getTok().getString(); 6991 6992 StringRef DimId = Token; 6993 if (DimId.startswith("SQ_RSRC_IMG_")) 6994 DimId = DimId.substr(12); 6995 6996 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6997 if (!DimInfo) 6998 return MatchOperand_ParseFail; 6999 7000 Parser.Lex(); 7001 7002 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 7003 AMDGPUOperand::ImmTyDim)); 7004 return MatchOperand_Success; 7005 } 7006 7007 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 7008 SMLoc S = Parser.getTok().getLoc(); 7009 StringRef Prefix; 7010 7011 if (getLexer().getKind() == AsmToken::Identifier) { 7012 Prefix = Parser.getTok().getString(); 7013 } else { 7014 return MatchOperand_NoMatch; 7015 } 7016 7017 if (Prefix != "dpp8") 7018 return parseDPPCtrl(Operands); 7019 if (!isGFX10()) 7020 return MatchOperand_NoMatch; 7021 7022 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 7023 7024 int64_t Sels[8]; 7025 7026 Parser.Lex(); 7027 if (getLexer().isNot(AsmToken::Colon)) 7028 return MatchOperand_ParseFail; 7029 7030 Parser.Lex(); 7031 if (getLexer().isNot(AsmToken::LBrac)) 7032 return MatchOperand_ParseFail; 7033 7034 Parser.Lex(); 7035 if (getParser().parseAbsoluteExpression(Sels[0])) 7036 return MatchOperand_ParseFail; 7037 if (0 > Sels[0] || 7 < Sels[0]) 7038 return MatchOperand_ParseFail; 7039 7040 for (size_t i = 1; i < 8; ++i) { 7041 if (getLexer().isNot(AsmToken::Comma)) 7042 return MatchOperand_ParseFail; 7043 7044 Parser.Lex(); 7045 if (getParser().parseAbsoluteExpression(Sels[i])) 7046 return MatchOperand_ParseFail; 7047 if (0 > Sels[i] || 7 < Sels[i]) 7048 return MatchOperand_ParseFail; 7049 } 7050 7051 if (getLexer().isNot(AsmToken::RBrac)) 7052 return MatchOperand_ParseFail; 7053 Parser.Lex(); 7054 7055 unsigned DPP8 = 0; 7056 for (size_t i = 0; i < 8; ++i) 7057 DPP8 |= (Sels[i] << (i * 3)); 7058 7059 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 7060 return MatchOperand_Success; 7061 } 7062 7063 OperandMatchResultTy 7064 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 7065 using namespace AMDGPU::DPP; 7066 7067 SMLoc S = Parser.getTok().getLoc(); 7068 StringRef Prefix; 7069 int64_t Int; 7070 7071 if (getLexer().getKind() == AsmToken::Identifier) { 7072 Prefix = Parser.getTok().getString(); 7073 } else { 7074 return MatchOperand_NoMatch; 7075 } 7076 7077 if (Prefix == "row_mirror") { 7078 Int = DppCtrl::ROW_MIRROR; 7079 Parser.Lex(); 7080 } else if (Prefix == "row_half_mirror") { 7081 Int = DppCtrl::ROW_HALF_MIRROR; 7082 Parser.Lex(); 7083 } else { 7084 // Check to prevent parseDPPCtrlOps from eating invalid tokens 7085 if (Prefix != "quad_perm" 7086 && Prefix != "row_shl" 7087 && Prefix != "row_shr" 7088 && Prefix != "row_ror" 7089 && Prefix != "wave_shl" 7090 && Prefix != "wave_rol" 7091 && Prefix != "wave_shr" 7092 && Prefix != "wave_ror" 7093 && Prefix != "row_bcast" 7094 && Prefix != "row_share" 7095 && Prefix != "row_xmask") { 7096 return MatchOperand_NoMatch; 7097 } 7098 7099 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 7100 return MatchOperand_NoMatch; 7101 7102 if (!isVI() && !isGFX9() && 7103 (Prefix == "wave_shl" || Prefix == "wave_shr" || 7104 Prefix == "wave_rol" || Prefix == "wave_ror" || 7105 Prefix == "row_bcast")) 7106 return MatchOperand_NoMatch; 7107 7108 Parser.Lex(); 7109 if (getLexer().isNot(AsmToken::Colon)) 7110 return MatchOperand_ParseFail; 7111 7112 if (Prefix == "quad_perm") { 7113 // quad_perm:[%d,%d,%d,%d] 7114 Parser.Lex(); 7115 if (getLexer().isNot(AsmToken::LBrac)) 7116 return MatchOperand_ParseFail; 7117 Parser.Lex(); 7118 7119 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 7120 return MatchOperand_ParseFail; 7121 7122 for (int i = 0; i < 3; ++i) { 7123 if (getLexer().isNot(AsmToken::Comma)) 7124 return MatchOperand_ParseFail; 7125 Parser.Lex(); 7126 7127 int64_t Temp; 7128 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 7129 return MatchOperand_ParseFail; 7130 const int shift = i*2 + 2; 7131 Int += (Temp << shift); 7132 } 7133 7134 if (getLexer().isNot(AsmToken::RBrac)) 7135 return MatchOperand_ParseFail; 7136 Parser.Lex(); 7137 } else { 7138 // sel:%d 7139 Parser.Lex(); 7140 if (getParser().parseAbsoluteExpression(Int)) 7141 return MatchOperand_ParseFail; 7142 7143 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 7144 Int |= DppCtrl::ROW_SHL0; 7145 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 7146 Int |= DppCtrl::ROW_SHR0; 7147 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 7148 Int |= DppCtrl::ROW_ROR0; 7149 } else if (Prefix == "wave_shl" && 1 == Int) { 7150 Int = DppCtrl::WAVE_SHL1; 7151 } else if (Prefix == "wave_rol" && 1 == Int) { 7152 Int = DppCtrl::WAVE_ROL1; 7153 } else if (Prefix == "wave_shr" && 1 == Int) { 7154 Int = DppCtrl::WAVE_SHR1; 7155 } else if (Prefix == "wave_ror" && 1 == Int) { 7156 Int = DppCtrl::WAVE_ROR1; 7157 } else if (Prefix == "row_bcast") { 7158 if (Int == 15) { 7159 Int = DppCtrl::BCAST15; 7160 } else if (Int == 31) { 7161 Int = DppCtrl::BCAST31; 7162 } else { 7163 return MatchOperand_ParseFail; 7164 } 7165 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 7166 Int |= DppCtrl::ROW_SHARE_FIRST; 7167 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 7168 Int |= DppCtrl::ROW_XMASK_FIRST; 7169 } else { 7170 return MatchOperand_ParseFail; 7171 } 7172 } 7173 } 7174 7175 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 7176 return MatchOperand_Success; 7177 } 7178 7179 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 7180 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 7181 } 7182 7183 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 7184 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 7185 } 7186 7187 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 7188 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 7189 } 7190 7191 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 7192 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 7193 } 7194 7195 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 7196 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 7197 } 7198 7199 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 7200 OptionalImmIndexMap OptionalIdx; 7201 7202 unsigned I = 1; 7203 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7204 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7205 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7206 } 7207 7208 int Fi = 0; 7209 for (unsigned E = Operands.size(); I != E; ++I) { 7210 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 7211 MCOI::TIED_TO); 7212 if (TiedTo != -1) { 7213 assert((unsigned)TiedTo < Inst.getNumOperands()); 7214 // handle tied old or src2 for MAC instructions 7215 Inst.addOperand(Inst.getOperand(TiedTo)); 7216 } 7217 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7218 // Add the register arguments 7219 if (Op.isReg() && validateVccOperand(Op.getReg())) { 7220 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 7221 // Skip it. 7222 continue; 7223 } 7224 7225 if (IsDPP8) { 7226 if (Op.isDPP8()) { 7227 Op.addImmOperands(Inst, 1); 7228 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7229 Op.addRegWithFPInputModsOperands(Inst, 2); 7230 } else if (Op.isFI()) { 7231 Fi = Op.getImm(); 7232 } else if (Op.isReg()) { 7233 Op.addRegOperands(Inst, 1); 7234 } else { 7235 llvm_unreachable("Invalid operand type"); 7236 } 7237 } else { 7238 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7239 Op.addRegWithFPInputModsOperands(Inst, 2); 7240 } else if (Op.isDPPCtrl()) { 7241 Op.addImmOperands(Inst, 1); 7242 } else if (Op.isImm()) { 7243 // Handle optional arguments 7244 OptionalIdx[Op.getImmTy()] = I; 7245 } else { 7246 llvm_unreachable("Invalid operand type"); 7247 } 7248 } 7249 } 7250 7251 if (IsDPP8) { 7252 using namespace llvm::AMDGPU::DPP; 7253 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 7254 } else { 7255 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 7256 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 7257 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 7258 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 7259 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 7260 } 7261 } 7262 } 7263 7264 //===----------------------------------------------------------------------===// 7265 // sdwa 7266 //===----------------------------------------------------------------------===// 7267 7268 OperandMatchResultTy 7269 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 7270 AMDGPUOperand::ImmTy Type) { 7271 using namespace llvm::AMDGPU::SDWA; 7272 7273 SMLoc S = Parser.getTok().getLoc(); 7274 StringRef Value; 7275 OperandMatchResultTy res; 7276 7277 res = parseStringWithPrefix(Prefix, Value); 7278 if (res != MatchOperand_Success) { 7279 return res; 7280 } 7281 7282 int64_t Int; 7283 Int = StringSwitch<int64_t>(Value) 7284 .Case("BYTE_0", SdwaSel::BYTE_0) 7285 .Case("BYTE_1", SdwaSel::BYTE_1) 7286 .Case("BYTE_2", SdwaSel::BYTE_2) 7287 .Case("BYTE_3", SdwaSel::BYTE_3) 7288 .Case("WORD_0", SdwaSel::WORD_0) 7289 .Case("WORD_1", SdwaSel::WORD_1) 7290 .Case("DWORD", SdwaSel::DWORD) 7291 .Default(0xffffffff); 7292 Parser.Lex(); // eat last token 7293 7294 if (Int == 0xffffffff) { 7295 return MatchOperand_ParseFail; 7296 } 7297 7298 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 7299 return MatchOperand_Success; 7300 } 7301 7302 OperandMatchResultTy 7303 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 7304 using namespace llvm::AMDGPU::SDWA; 7305 7306 SMLoc S = Parser.getTok().getLoc(); 7307 StringRef Value; 7308 OperandMatchResultTy res; 7309 7310 res = parseStringWithPrefix("dst_unused", Value); 7311 if (res != MatchOperand_Success) { 7312 return res; 7313 } 7314 7315 int64_t Int; 7316 Int = StringSwitch<int64_t>(Value) 7317 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 7318 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 7319 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 7320 .Default(0xffffffff); 7321 Parser.Lex(); // eat last token 7322 7323 if (Int == 0xffffffff) { 7324 return MatchOperand_ParseFail; 7325 } 7326 7327 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 7328 return MatchOperand_Success; 7329 } 7330 7331 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 7332 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 7333 } 7334 7335 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 7336 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 7337 } 7338 7339 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 7340 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true); 7341 } 7342 7343 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) { 7344 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true); 7345 } 7346 7347 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 7348 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 7349 } 7350 7351 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 7352 uint64_t BasicInstType, 7353 bool SkipDstVcc, 7354 bool SkipSrcVcc) { 7355 using namespace llvm::AMDGPU::SDWA; 7356 7357 OptionalImmIndexMap OptionalIdx; 7358 bool SkipVcc = SkipDstVcc || SkipSrcVcc; 7359 bool SkippedVcc = false; 7360 7361 unsigned I = 1; 7362 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 7363 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 7364 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 7365 } 7366 7367 for (unsigned E = Operands.size(); I != E; ++I) { 7368 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 7369 if (SkipVcc && !SkippedVcc && Op.isReg() && 7370 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 7371 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 7372 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 7373 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 7374 // Skip VCC only if we didn't skip it on previous iteration. 7375 // Note that src0 and src1 occupy 2 slots each because of modifiers. 7376 if (BasicInstType == SIInstrFlags::VOP2 && 7377 ((SkipDstVcc && Inst.getNumOperands() == 1) || 7378 (SkipSrcVcc && Inst.getNumOperands() == 5))) { 7379 SkippedVcc = true; 7380 continue; 7381 } else if (BasicInstType == SIInstrFlags::VOPC && 7382 Inst.getNumOperands() == 0) { 7383 SkippedVcc = true; 7384 continue; 7385 } 7386 } 7387 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 7388 Op.addRegOrImmWithInputModsOperands(Inst, 2); 7389 } else if (Op.isImm()) { 7390 // Handle optional arguments 7391 OptionalIdx[Op.getImmTy()] = I; 7392 } else { 7393 llvm_unreachable("Invalid operand type"); 7394 } 7395 SkippedVcc = false; 7396 } 7397 7398 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 7399 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 7400 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 7401 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 7402 switch (BasicInstType) { 7403 case SIInstrFlags::VOP1: 7404 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7405 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7406 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7407 } 7408 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7409 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7410 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7411 break; 7412 7413 case SIInstrFlags::VOP2: 7414 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7415 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 7416 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 7417 } 7418 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 7419 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 7420 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7421 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7422 break; 7423 7424 case SIInstrFlags::VOPC: 7425 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 7426 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 7427 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 7428 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 7429 break; 7430 7431 default: 7432 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 7433 } 7434 } 7435 7436 // special case v_mac_{f16, f32}: 7437 // it has src2 register operand that is tied to dst operand 7438 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 7439 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 7440 auto it = Inst.begin(); 7441 std::advance( 7442 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 7443 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 7444 } 7445 } 7446 7447 //===----------------------------------------------------------------------===// 7448 // mAI 7449 //===----------------------------------------------------------------------===// 7450 7451 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 7452 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 7453 } 7454 7455 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 7456 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 7457 } 7458 7459 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 7460 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 7461 } 7462 7463 /// Force static initialization. 7464 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() { 7465 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 7466 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 7467 } 7468 7469 #define GET_REGISTER_MATCHER 7470 #define GET_MATCHER_IMPLEMENTATION 7471 #define GET_MNEMONIC_SPELL_CHECKER 7472 #include "AMDGPUGenAsmMatcher.inc" 7473 7474 // This fuction should be defined after auto-generated include so that we have 7475 // MatchClassKind enum defined 7476 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 7477 unsigned Kind) { 7478 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 7479 // But MatchInstructionImpl() expects to meet token and fails to validate 7480 // operand. This method checks if we are given immediate operand but expect to 7481 // get corresponding token. 7482 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 7483 switch (Kind) { 7484 case MCK_addr64: 7485 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 7486 case MCK_gds: 7487 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 7488 case MCK_lds: 7489 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 7490 case MCK_glc: 7491 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 7492 case MCK_idxen: 7493 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 7494 case MCK_offen: 7495 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 7496 case MCK_SSrcB32: 7497 // When operands have expression values, they will return true for isToken, 7498 // because it is not possible to distinguish between a token and an 7499 // expression at parse time. MatchInstructionImpl() will always try to 7500 // match an operand as a token, when isToken returns true, and when the 7501 // name of the expression is not a valid token, the match will fail, 7502 // so we need to handle it here. 7503 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 7504 case MCK_SSrcF32: 7505 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 7506 case MCK_SoppBrTarget: 7507 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 7508 case MCK_VReg32OrOff: 7509 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 7510 case MCK_InterpSlot: 7511 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 7512 case MCK_Attr: 7513 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 7514 case MCK_AttrChan: 7515 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 7516 case MCK_ImmSMEMOffset: 7517 return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand; 7518 case MCK_SReg_64: 7519 case MCK_SReg_64_XEXEC: 7520 // Null is defined as a 32-bit register but 7521 // it should also be enabled with 64-bit operands. 7522 // The following code enables it for SReg_64 operands 7523 // used as source and destination. Remaining source 7524 // operands are handled in isInlinableImm. 7525 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 7526 default: 7527 return Match_InvalidOperand; 7528 } 7529 } 7530 7531 //===----------------------------------------------------------------------===// 7532 // endpgm 7533 //===----------------------------------------------------------------------===// 7534 7535 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7536 SMLoc S = Parser.getTok().getLoc(); 7537 int64_t Imm = 0; 7538 7539 if (!parseExpr(Imm)) { 7540 // The operand is optional, if not present default to 0 7541 Imm = 0; 7542 } 7543 7544 if (!isUInt<16>(Imm)) { 7545 Error(S, "expected a 16-bit value"); 7546 return MatchOperand_ParseFail; 7547 } 7548 7549 Operands.push_back( 7550 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7551 return MatchOperand_Success; 7552 } 7553 7554 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7555