1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyLWE, 167 ImmTyExpTgt, 168 ImmTyExpCompr, 169 ImmTyExpVM, 170 ImmTyFORMAT, 171 ImmTyHwreg, 172 ImmTyOff, 173 ImmTySendMsg, 174 ImmTyInterpSlot, 175 ImmTyInterpAttr, 176 ImmTyAttrChan, 177 ImmTyOpSel, 178 ImmTyOpSelHi, 179 ImmTyNegLo, 180 ImmTyNegHi, 181 ImmTySwizzle, 182 ImmTyGprIdxMode, 183 ImmTyHigh, 184 ImmTyBLGP, 185 ImmTyCBSZ, 186 ImmTyABID, 187 ImmTyEndpgm, 188 }; 189 190 private: 191 struct TokOp { 192 const char *Data; 193 unsigned Length; 194 }; 195 196 struct ImmOp { 197 int64_t Val; 198 ImmTy Type; 199 bool IsFPImm; 200 Modifiers Mods; 201 }; 202 203 struct RegOp { 204 unsigned RegNo; 205 Modifiers Mods; 206 }; 207 208 union { 209 TokOp Tok; 210 ImmOp Imm; 211 RegOp Reg; 212 const MCExpr *Expr; 213 }; 214 215 public: 216 bool isToken() const override { 217 if (Kind == Token) 218 return true; 219 220 // When parsing operands, we can't always tell if something was meant to be 221 // a token, like 'gds', or an expression that references a global variable. 222 // In this case, we assume the string is an expression, and if we need to 223 // interpret is a token, then we treat the symbol name as the token. 224 return isSymbolRefExpr(); 225 } 226 227 bool isSymbolRefExpr() const { 228 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 229 } 230 231 bool isImm() const override { 232 return Kind == Immediate; 233 } 234 235 bool isInlinableImm(MVT type) const; 236 bool isLiteralImm(MVT type) const; 237 238 bool isRegKind() const { 239 return Kind == Register; 240 } 241 242 bool isReg() const override { 243 return isRegKind() && !hasModifiers(); 244 } 245 246 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 247 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 248 } 249 250 bool isRegOrImmWithInt16InputMods() const { 251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 252 } 253 254 bool isRegOrImmWithInt32InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 256 } 257 258 bool isRegOrImmWithInt64InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 260 } 261 262 bool isRegOrImmWithFP16InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 264 } 265 266 bool isRegOrImmWithFP32InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 268 } 269 270 bool isRegOrImmWithFP64InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 272 } 273 274 bool isVReg() const { 275 return isRegClass(AMDGPU::VGPR_32RegClassID) || 276 isRegClass(AMDGPU::VReg_64RegClassID) || 277 isRegClass(AMDGPU::VReg_96RegClassID) || 278 isRegClass(AMDGPU::VReg_128RegClassID) || 279 isRegClass(AMDGPU::VReg_160RegClassID) || 280 isRegClass(AMDGPU::VReg_256RegClassID) || 281 isRegClass(AMDGPU::VReg_512RegClassID) || 282 isRegClass(AMDGPU::VReg_1024RegClassID); 283 } 284 285 bool isVReg32() const { 286 return isRegClass(AMDGPU::VGPR_32RegClassID); 287 } 288 289 bool isVReg32OrOff() const { 290 return isOff() || isVReg32(); 291 } 292 293 bool isSDWAOperand(MVT type) const; 294 bool isSDWAFP16Operand() const; 295 bool isSDWAFP32Operand() const; 296 bool isSDWAInt16Operand() const; 297 bool isSDWAInt32Operand() const; 298 299 bool isImmTy(ImmTy ImmT) const { 300 return isImm() && Imm.Type == ImmT; 301 } 302 303 bool isImmModifier() const { 304 return isImm() && Imm.Type != ImmTyNone; 305 } 306 307 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 308 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 309 bool isDMask() const { return isImmTy(ImmTyDMask); } 310 bool isDim() const { return isImmTy(ImmTyDim); } 311 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 312 bool isDA() const { return isImmTy(ImmTyDA); } 313 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 314 bool isLWE() const { return isImmTy(ImmTyLWE); } 315 bool isOff() const { return isImmTy(ImmTyOff); } 316 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 317 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 318 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 319 bool isOffen() const { return isImmTy(ImmTyOffen); } 320 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 321 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 322 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 323 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 324 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 325 326 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 327 bool isGDS() const { return isImmTy(ImmTyGDS); } 328 bool isLDS() const { return isImmTy(ImmTyLDS); } 329 bool isDLC() const { return isImmTy(ImmTyDLC); } 330 bool isGLC() const { return isImmTy(ImmTyGLC); } 331 bool isSLC() const { return isImmTy(ImmTySLC); } 332 bool isSWZ() const { return isImmTy(ImmTySWZ); } 333 bool isTFE() const { return isImmTy(ImmTyTFE); } 334 bool isD16() const { return isImmTy(ImmTyD16); } 335 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 336 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 337 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 338 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 339 bool isFI() const { return isImmTy(ImmTyDppFi); } 340 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 341 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 342 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 343 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 344 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 345 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 346 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 347 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 348 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 349 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 350 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 351 bool isHigh() const { return isImmTy(ImmTyHigh); } 352 353 bool isMod() const { 354 return isClampSI() || isOModSI(); 355 } 356 357 bool isRegOrImm() const { 358 return isReg() || isImm(); 359 } 360 361 bool isRegClass(unsigned RCID) const; 362 363 bool isInlineValue() const; 364 365 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 366 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 367 } 368 369 bool isSCSrcB16() const { 370 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 371 } 372 373 bool isSCSrcV2B16() const { 374 return isSCSrcB16(); 375 } 376 377 bool isSCSrcB32() const { 378 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 379 } 380 381 bool isSCSrcB64() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 383 } 384 385 bool isBoolReg() const; 386 387 bool isSCSrcF16() const { 388 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 389 } 390 391 bool isSCSrcV2F16() const { 392 return isSCSrcF16(); 393 } 394 395 bool isSCSrcF32() const { 396 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 397 } 398 399 bool isSCSrcF64() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 401 } 402 403 bool isSSrcB32() const { 404 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 405 } 406 407 bool isSSrcB16() const { 408 return isSCSrcB16() || isLiteralImm(MVT::i16); 409 } 410 411 bool isSSrcV2B16() const { 412 llvm_unreachable("cannot happen"); 413 return isSSrcB16(); 414 } 415 416 bool isSSrcB64() const { 417 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 418 // See isVSrc64(). 419 return isSCSrcB64() || isLiteralImm(MVT::i64); 420 } 421 422 bool isSSrcF32() const { 423 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 424 } 425 426 bool isSSrcF64() const { 427 return isSCSrcB64() || isLiteralImm(MVT::f64); 428 } 429 430 bool isSSrcF16() const { 431 return isSCSrcB16() || isLiteralImm(MVT::f16); 432 } 433 434 bool isSSrcV2F16() const { 435 llvm_unreachable("cannot happen"); 436 return isSSrcF16(); 437 } 438 439 bool isSSrcOrLdsB32() const { 440 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 441 isLiteralImm(MVT::i32) || isExpr(); 442 } 443 444 bool isVCSrcB32() const { 445 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 446 } 447 448 bool isVCSrcB64() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 450 } 451 452 bool isVCSrcB16() const { 453 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 454 } 455 456 bool isVCSrcV2B16() const { 457 return isVCSrcB16(); 458 } 459 460 bool isVCSrcF32() const { 461 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 462 } 463 464 bool isVCSrcF64() const { 465 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 466 } 467 468 bool isVCSrcF16() const { 469 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 470 } 471 472 bool isVCSrcV2F16() const { 473 return isVCSrcF16(); 474 } 475 476 bool isVSrcB32() const { 477 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 478 } 479 480 bool isVSrcB64() const { 481 return isVCSrcF64() || isLiteralImm(MVT::i64); 482 } 483 484 bool isVSrcB16() const { 485 return isVCSrcF16() || isLiteralImm(MVT::i16); 486 } 487 488 bool isVSrcV2B16() const { 489 return isVSrcB16() || isLiteralImm(MVT::v2i16); 490 } 491 492 bool isVSrcF32() const { 493 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 494 } 495 496 bool isVSrcF64() const { 497 return isVCSrcF64() || isLiteralImm(MVT::f64); 498 } 499 500 bool isVSrcF16() const { 501 return isVCSrcF16() || isLiteralImm(MVT::f16); 502 } 503 504 bool isVSrcV2F16() const { 505 return isVSrcF16() || isLiteralImm(MVT::v2f16); 506 } 507 508 bool isVISrcB32() const { 509 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 510 } 511 512 bool isVISrcB16() const { 513 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 514 } 515 516 bool isVISrcV2B16() const { 517 return isVISrcB16(); 518 } 519 520 bool isVISrcF32() const { 521 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 522 } 523 524 bool isVISrcF16() const { 525 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 526 } 527 528 bool isVISrcV2F16() const { 529 return isVISrcF16() || isVISrcB32(); 530 } 531 532 bool isAISrcB32() const { 533 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 534 } 535 536 bool isAISrcB16() const { 537 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 538 } 539 540 bool isAISrcV2B16() const { 541 return isAISrcB16(); 542 } 543 544 bool isAISrcF32() const { 545 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 546 } 547 548 bool isAISrcF16() const { 549 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 550 } 551 552 bool isAISrcV2F16() const { 553 return isAISrcF16() || isAISrcB32(); 554 } 555 556 bool isAISrc_128B32() const { 557 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 558 } 559 560 bool isAISrc_128B16() const { 561 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 562 } 563 564 bool isAISrc_128V2B16() const { 565 return isAISrc_128B16(); 566 } 567 568 bool isAISrc_128F32() const { 569 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 570 } 571 572 bool isAISrc_128F16() const { 573 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 574 } 575 576 bool isAISrc_128V2F16() const { 577 return isAISrc_128F16() || isAISrc_128B32(); 578 } 579 580 bool isAISrc_512B32() const { 581 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 582 } 583 584 bool isAISrc_512B16() const { 585 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 586 } 587 588 bool isAISrc_512V2B16() const { 589 return isAISrc_512B16(); 590 } 591 592 bool isAISrc_512F32() const { 593 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 594 } 595 596 bool isAISrc_512F16() const { 597 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 598 } 599 600 bool isAISrc_512V2F16() const { 601 return isAISrc_512F16() || isAISrc_512B32(); 602 } 603 604 bool isAISrc_1024B32() const { 605 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 606 } 607 608 bool isAISrc_1024B16() const { 609 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 610 } 611 612 bool isAISrc_1024V2B16() const { 613 return isAISrc_1024B16(); 614 } 615 616 bool isAISrc_1024F32() const { 617 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 618 } 619 620 bool isAISrc_1024F16() const { 621 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 622 } 623 624 bool isAISrc_1024V2F16() const { 625 return isAISrc_1024F16() || isAISrc_1024B32(); 626 } 627 628 bool isKImmFP32() const { 629 return isLiteralImm(MVT::f32); 630 } 631 632 bool isKImmFP16() const { 633 return isLiteralImm(MVT::f16); 634 } 635 636 bool isMem() const override { 637 return false; 638 } 639 640 bool isExpr() const { 641 return Kind == Expression; 642 } 643 644 bool isSoppBrTarget() const { 645 return isExpr() || isImm(); 646 } 647 648 bool isSWaitCnt() const; 649 bool isHwreg() const; 650 bool isSendMsg() const; 651 bool isSwizzle() const; 652 bool isSMRDOffset8() const; 653 bool isSMRDOffset20() const; 654 bool isSMRDLiteralOffset() const; 655 bool isDPP8() const; 656 bool isDPPCtrl() const; 657 bool isBLGP() const; 658 bool isCBSZ() const; 659 bool isABID() const; 660 bool isGPRIdxMode() const; 661 bool isS16Imm() const; 662 bool isU16Imm() const; 663 bool isEndpgm() const; 664 665 StringRef getExpressionAsToken() const { 666 assert(isExpr()); 667 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 668 return S->getSymbol().getName(); 669 } 670 671 StringRef getToken() const { 672 assert(isToken()); 673 674 if (Kind == Expression) 675 return getExpressionAsToken(); 676 677 return StringRef(Tok.Data, Tok.Length); 678 } 679 680 int64_t getImm() const { 681 assert(isImm()); 682 return Imm.Val; 683 } 684 685 ImmTy getImmTy() const { 686 assert(isImm()); 687 return Imm.Type; 688 } 689 690 unsigned getReg() const override { 691 assert(isRegKind()); 692 return Reg.RegNo; 693 } 694 695 SMLoc getStartLoc() const override { 696 return StartLoc; 697 } 698 699 SMLoc getEndLoc() const override { 700 return EndLoc; 701 } 702 703 SMRange getLocRange() const { 704 return SMRange(StartLoc, EndLoc); 705 } 706 707 Modifiers getModifiers() const { 708 assert(isRegKind() || isImmTy(ImmTyNone)); 709 return isRegKind() ? Reg.Mods : Imm.Mods; 710 } 711 712 void setModifiers(Modifiers Mods) { 713 assert(isRegKind() || isImmTy(ImmTyNone)); 714 if (isRegKind()) 715 Reg.Mods = Mods; 716 else 717 Imm.Mods = Mods; 718 } 719 720 bool hasModifiers() const { 721 return getModifiers().hasModifiers(); 722 } 723 724 bool hasFPModifiers() const { 725 return getModifiers().hasFPModifiers(); 726 } 727 728 bool hasIntModifiers() const { 729 return getModifiers().hasIntModifiers(); 730 } 731 732 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 733 734 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 735 736 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 737 738 template <unsigned Bitwidth> 739 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 740 741 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 742 addKImmFPOperands<16>(Inst, N); 743 } 744 745 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 746 addKImmFPOperands<32>(Inst, N); 747 } 748 749 void addRegOperands(MCInst &Inst, unsigned N) const; 750 751 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 752 addRegOperands(Inst, N); 753 } 754 755 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 756 if (isRegKind()) 757 addRegOperands(Inst, N); 758 else if (isExpr()) 759 Inst.addOperand(MCOperand::createExpr(Expr)); 760 else 761 addImmOperands(Inst, N); 762 } 763 764 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 765 Modifiers Mods = getModifiers(); 766 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 767 if (isRegKind()) { 768 addRegOperands(Inst, N); 769 } else { 770 addImmOperands(Inst, N, false); 771 } 772 } 773 774 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 775 assert(!hasIntModifiers()); 776 addRegOrImmWithInputModsOperands(Inst, N); 777 } 778 779 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 780 assert(!hasFPModifiers()); 781 addRegOrImmWithInputModsOperands(Inst, N); 782 } 783 784 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 785 Modifiers Mods = getModifiers(); 786 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 787 assert(isRegKind()); 788 addRegOperands(Inst, N); 789 } 790 791 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 792 assert(!hasIntModifiers()); 793 addRegWithInputModsOperands(Inst, N); 794 } 795 796 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 797 assert(!hasFPModifiers()); 798 addRegWithInputModsOperands(Inst, N); 799 } 800 801 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 802 if (isImm()) 803 addImmOperands(Inst, N); 804 else { 805 assert(isExpr()); 806 Inst.addOperand(MCOperand::createExpr(Expr)); 807 } 808 } 809 810 static void printImmTy(raw_ostream& OS, ImmTy Type) { 811 switch (Type) { 812 case ImmTyNone: OS << "None"; break; 813 case ImmTyGDS: OS << "GDS"; break; 814 case ImmTyLDS: OS << "LDS"; break; 815 case ImmTyOffen: OS << "Offen"; break; 816 case ImmTyIdxen: OS << "Idxen"; break; 817 case ImmTyAddr64: OS << "Addr64"; break; 818 case ImmTyOffset: OS << "Offset"; break; 819 case ImmTyInstOffset: OS << "InstOffset"; break; 820 case ImmTyOffset0: OS << "Offset0"; break; 821 case ImmTyOffset1: OS << "Offset1"; break; 822 case ImmTyDLC: OS << "DLC"; break; 823 case ImmTyGLC: OS << "GLC"; break; 824 case ImmTySLC: OS << "SLC"; break; 825 case ImmTySWZ: OS << "SWZ"; break; 826 case ImmTyTFE: OS << "TFE"; break; 827 case ImmTyD16: OS << "D16"; break; 828 case ImmTyFORMAT: OS << "FORMAT"; break; 829 case ImmTyClampSI: OS << "ClampSI"; break; 830 case ImmTyOModSI: OS << "OModSI"; break; 831 case ImmTyDPP8: OS << "DPP8"; break; 832 case ImmTyDppCtrl: OS << "DppCtrl"; break; 833 case ImmTyDppRowMask: OS << "DppRowMask"; break; 834 case ImmTyDppBankMask: OS << "DppBankMask"; break; 835 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 836 case ImmTyDppFi: OS << "FI"; break; 837 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 838 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 839 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 840 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 841 case ImmTyDMask: OS << "DMask"; break; 842 case ImmTyDim: OS << "Dim"; break; 843 case ImmTyUNorm: OS << "UNorm"; break; 844 case ImmTyDA: OS << "DA"; break; 845 case ImmTyR128A16: OS << "R128A16"; break; 846 case ImmTyLWE: OS << "LWE"; break; 847 case ImmTyOff: OS << "Off"; break; 848 case ImmTyExpTgt: OS << "ExpTgt"; break; 849 case ImmTyExpCompr: OS << "ExpCompr"; break; 850 case ImmTyExpVM: OS << "ExpVM"; break; 851 case ImmTyHwreg: OS << "Hwreg"; break; 852 case ImmTySendMsg: OS << "SendMsg"; break; 853 case ImmTyInterpSlot: OS << "InterpSlot"; break; 854 case ImmTyInterpAttr: OS << "InterpAttr"; break; 855 case ImmTyAttrChan: OS << "AttrChan"; break; 856 case ImmTyOpSel: OS << "OpSel"; break; 857 case ImmTyOpSelHi: OS << "OpSelHi"; break; 858 case ImmTyNegLo: OS << "NegLo"; break; 859 case ImmTyNegHi: OS << "NegHi"; break; 860 case ImmTySwizzle: OS << "Swizzle"; break; 861 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 862 case ImmTyHigh: OS << "High"; break; 863 case ImmTyBLGP: OS << "BLGP"; break; 864 case ImmTyCBSZ: OS << "CBSZ"; break; 865 case ImmTyABID: OS << "ABID"; break; 866 case ImmTyEndpgm: OS << "Endpgm"; break; 867 } 868 } 869 870 void print(raw_ostream &OS) const override { 871 switch (Kind) { 872 case Register: 873 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 874 break; 875 case Immediate: 876 OS << '<' << getImm(); 877 if (getImmTy() != ImmTyNone) { 878 OS << " type: "; printImmTy(OS, getImmTy()); 879 } 880 OS << " mods: " << Imm.Mods << '>'; 881 break; 882 case Token: 883 OS << '\'' << getToken() << '\''; 884 break; 885 case Expression: 886 OS << "<expr " << *Expr << '>'; 887 break; 888 } 889 } 890 891 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 892 int64_t Val, SMLoc Loc, 893 ImmTy Type = ImmTyNone, 894 bool IsFPImm = false) { 895 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 896 Op->Imm.Val = Val; 897 Op->Imm.IsFPImm = IsFPImm; 898 Op->Imm.Type = Type; 899 Op->Imm.Mods = Modifiers(); 900 Op->StartLoc = Loc; 901 Op->EndLoc = Loc; 902 return Op; 903 } 904 905 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 906 StringRef Str, SMLoc Loc, 907 bool HasExplicitEncodingSize = true) { 908 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 909 Res->Tok.Data = Str.data(); 910 Res->Tok.Length = Str.size(); 911 Res->StartLoc = Loc; 912 Res->EndLoc = Loc; 913 return Res; 914 } 915 916 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 917 unsigned RegNo, SMLoc S, 918 SMLoc E) { 919 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 920 Op->Reg.RegNo = RegNo; 921 Op->Reg.Mods = Modifiers(); 922 Op->StartLoc = S; 923 Op->EndLoc = E; 924 return Op; 925 } 926 927 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 928 const class MCExpr *Expr, SMLoc S) { 929 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 930 Op->Expr = Expr; 931 Op->StartLoc = S; 932 Op->EndLoc = S; 933 return Op; 934 } 935 }; 936 937 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 938 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 939 return OS; 940 } 941 942 //===----------------------------------------------------------------------===// 943 // AsmParser 944 //===----------------------------------------------------------------------===// 945 946 // Holds info related to the current kernel, e.g. count of SGPRs used. 947 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 948 // .amdgpu_hsa_kernel or at EOF. 949 class KernelScopeInfo { 950 int SgprIndexUnusedMin = -1; 951 int VgprIndexUnusedMin = -1; 952 MCContext *Ctx = nullptr; 953 954 void usesSgprAt(int i) { 955 if (i >= SgprIndexUnusedMin) { 956 SgprIndexUnusedMin = ++i; 957 if (Ctx) { 958 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 959 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 960 } 961 } 962 } 963 964 void usesVgprAt(int i) { 965 if (i >= VgprIndexUnusedMin) { 966 VgprIndexUnusedMin = ++i; 967 if (Ctx) { 968 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 969 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 970 } 971 } 972 } 973 974 public: 975 KernelScopeInfo() = default; 976 977 void initialize(MCContext &Context) { 978 Ctx = &Context; 979 usesSgprAt(SgprIndexUnusedMin = -1); 980 usesVgprAt(VgprIndexUnusedMin = -1); 981 } 982 983 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 984 switch (RegKind) { 985 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 986 case IS_AGPR: // fall through 987 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 988 default: break; 989 } 990 } 991 }; 992 993 class AMDGPUAsmParser : public MCTargetAsmParser { 994 MCAsmParser &Parser; 995 996 // Number of extra operands parsed after the first optional operand. 997 // This may be necessary to skip hardcoded mandatory operands. 998 static const unsigned MAX_OPR_LOOKAHEAD = 8; 999 1000 unsigned ForcedEncodingSize = 0; 1001 bool ForcedDPP = false; 1002 bool ForcedSDWA = false; 1003 KernelScopeInfo KernelScope; 1004 1005 /// @name Auto-generated Match Functions 1006 /// { 1007 1008 #define GET_ASSEMBLER_HEADER 1009 #include "AMDGPUGenAsmMatcher.inc" 1010 1011 /// } 1012 1013 private: 1014 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1015 bool OutOfRangeError(SMRange Range); 1016 /// Calculate VGPR/SGPR blocks required for given target, reserved 1017 /// registers, and user-specified NextFreeXGPR values. 1018 /// 1019 /// \param Features [in] Target features, used for bug corrections. 1020 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1021 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1022 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1023 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1024 /// descriptor field, if valid. 1025 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1026 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1027 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1028 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1029 /// \param VGPRBlocks [out] Result VGPR block count. 1030 /// \param SGPRBlocks [out] Result SGPR block count. 1031 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1032 bool FlatScrUsed, bool XNACKUsed, 1033 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1034 SMRange VGPRRange, unsigned NextFreeSGPR, 1035 SMRange SGPRRange, unsigned &VGPRBlocks, 1036 unsigned &SGPRBlocks); 1037 bool ParseDirectiveAMDGCNTarget(); 1038 bool ParseDirectiveAMDHSAKernel(); 1039 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1040 bool ParseDirectiveHSACodeObjectVersion(); 1041 bool ParseDirectiveHSACodeObjectISA(); 1042 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1043 bool ParseDirectiveAMDKernelCodeT(); 1044 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1045 bool ParseDirectiveAMDGPUHsaKernel(); 1046 1047 bool ParseDirectiveISAVersion(); 1048 bool ParseDirectiveHSAMetadata(); 1049 bool ParseDirectivePALMetadataBegin(); 1050 bool ParseDirectivePALMetadata(); 1051 bool ParseDirectiveAMDGPULDS(); 1052 1053 /// Common code to parse out a block of text (typically YAML) between start and 1054 /// end directives. 1055 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1056 const char *AssemblerDirectiveEnd, 1057 std::string &CollectString); 1058 1059 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1060 RegisterKind RegKind, unsigned Reg1); 1061 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1062 unsigned& RegNum, unsigned& RegWidth); 1063 unsigned ParseRegularReg(RegisterKind &RegKind, 1064 unsigned &RegNum, 1065 unsigned &RegWidth); 1066 unsigned ParseSpecialReg(RegisterKind &RegKind, 1067 unsigned &RegNum, 1068 unsigned &RegWidth); 1069 unsigned ParseRegList(RegisterKind &RegKind, 1070 unsigned &RegNum, 1071 unsigned &RegWidth); 1072 bool ParseRegRange(unsigned& Num, unsigned& Width); 1073 unsigned getRegularReg(RegisterKind RegKind, 1074 unsigned RegNum, 1075 unsigned RegWidth); 1076 1077 bool isRegister(); 1078 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1079 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1080 void initializeGprCountSymbol(RegisterKind RegKind); 1081 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1082 unsigned RegWidth); 1083 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1084 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1085 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1086 bool IsGdsHardcoded); 1087 1088 public: 1089 enum AMDGPUMatchResultTy { 1090 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1091 }; 1092 enum OperandMode { 1093 OperandMode_Default, 1094 OperandMode_NSA, 1095 }; 1096 1097 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1098 1099 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1100 const MCInstrInfo &MII, 1101 const MCTargetOptions &Options) 1102 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1103 MCAsmParserExtension::Initialize(Parser); 1104 1105 if (getFeatureBits().none()) { 1106 // Set default features. 1107 copySTI().ToggleFeature("southern-islands"); 1108 } 1109 1110 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1111 1112 { 1113 // TODO: make those pre-defined variables read-only. 1114 // Currently there is none suitable machinery in the core llvm-mc for this. 1115 // MCSymbol::isRedefinable is intended for another purpose, and 1116 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1117 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1118 MCContext &Ctx = getContext(); 1119 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1120 MCSymbol *Sym = 1121 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1122 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1123 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1124 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1125 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1126 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1127 } else { 1128 MCSymbol *Sym = 1129 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1130 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1131 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1132 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1133 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1134 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1135 } 1136 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1137 initializeGprCountSymbol(IS_VGPR); 1138 initializeGprCountSymbol(IS_SGPR); 1139 } else 1140 KernelScope.initialize(getContext()); 1141 } 1142 } 1143 1144 bool hasXNACK() const { 1145 return AMDGPU::hasXNACK(getSTI()); 1146 } 1147 1148 bool hasMIMG_R128() const { 1149 return AMDGPU::hasMIMG_R128(getSTI()); 1150 } 1151 1152 bool hasPackedD16() const { 1153 return AMDGPU::hasPackedD16(getSTI()); 1154 } 1155 1156 bool isSI() const { 1157 return AMDGPU::isSI(getSTI()); 1158 } 1159 1160 bool isCI() const { 1161 return AMDGPU::isCI(getSTI()); 1162 } 1163 1164 bool isVI() const { 1165 return AMDGPU::isVI(getSTI()); 1166 } 1167 1168 bool isGFX9() const { 1169 return AMDGPU::isGFX9(getSTI()); 1170 } 1171 1172 bool isGFX10() const { 1173 return AMDGPU::isGFX10(getSTI()); 1174 } 1175 1176 bool hasInv2PiInlineImm() const { 1177 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1178 } 1179 1180 bool hasFlatOffsets() const { 1181 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1182 } 1183 1184 bool hasSGPR102_SGPR103() const { 1185 return !isVI() && !isGFX9(); 1186 } 1187 1188 bool hasSGPR104_SGPR105() const { 1189 return isGFX10(); 1190 } 1191 1192 bool hasIntClamp() const { 1193 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1194 } 1195 1196 AMDGPUTargetStreamer &getTargetStreamer() { 1197 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1198 return static_cast<AMDGPUTargetStreamer &>(TS); 1199 } 1200 1201 const MCRegisterInfo *getMRI() const { 1202 // We need this const_cast because for some reason getContext() is not const 1203 // in MCAsmParser. 1204 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1205 } 1206 1207 const MCInstrInfo *getMII() const { 1208 return &MII; 1209 } 1210 1211 const FeatureBitset &getFeatureBits() const { 1212 return getSTI().getFeatureBits(); 1213 } 1214 1215 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1216 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1217 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1218 1219 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1220 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1221 bool isForcedDPP() const { return ForcedDPP; } 1222 bool isForcedSDWA() const { return ForcedSDWA; } 1223 ArrayRef<unsigned> getMatchedVariants() const; 1224 1225 std::unique_ptr<AMDGPUOperand> parseRegister(); 1226 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1227 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1228 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1229 unsigned Kind) override; 1230 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1231 OperandVector &Operands, MCStreamer &Out, 1232 uint64_t &ErrorInfo, 1233 bool MatchingInlineAsm) override; 1234 bool ParseDirective(AsmToken DirectiveID) override; 1235 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1236 OperandMode Mode = OperandMode_Default); 1237 StringRef parseMnemonicSuffix(StringRef Name); 1238 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1239 SMLoc NameLoc, OperandVector &Operands) override; 1240 //bool ProcessInstruction(MCInst &Inst); 1241 1242 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1243 1244 OperandMatchResultTy 1245 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1246 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1247 bool (*ConvertResult)(int64_t &) = nullptr); 1248 1249 OperandMatchResultTy 1250 parseOperandArrayWithPrefix(const char *Prefix, 1251 OperandVector &Operands, 1252 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1253 bool (*ConvertResult)(int64_t&) = nullptr); 1254 1255 OperandMatchResultTy 1256 parseNamedBit(const char *Name, OperandVector &Operands, 1257 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1258 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1259 StringRef &Value); 1260 1261 bool isModifier(); 1262 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1263 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1264 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1265 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1266 bool parseSP3NegModifier(); 1267 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1268 OperandMatchResultTy parseReg(OperandVector &Operands); 1269 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1270 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1271 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1272 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1273 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1274 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1275 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1276 1277 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1278 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1279 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1280 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1281 1282 bool parseCnt(int64_t &IntVal); 1283 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1284 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1285 1286 private: 1287 struct OperandInfoTy { 1288 int64_t Id; 1289 bool IsSymbolic = false; 1290 bool IsDefined = false; 1291 1292 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1293 }; 1294 1295 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1296 bool validateSendMsg(const OperandInfoTy &Msg, 1297 const OperandInfoTy &Op, 1298 const OperandInfoTy &Stream, 1299 const SMLoc Loc); 1300 1301 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1302 bool validateHwreg(const OperandInfoTy &HwReg, 1303 const int64_t Offset, 1304 const int64_t Width, 1305 const SMLoc Loc); 1306 1307 void errorExpTgt(); 1308 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1309 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1310 1311 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1312 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1313 bool validateSOPLiteral(const MCInst &Inst) const; 1314 bool validateConstantBusLimitations(const MCInst &Inst); 1315 bool validateEarlyClobberLimitations(const MCInst &Inst); 1316 bool validateIntClampSupported(const MCInst &Inst); 1317 bool validateMIMGAtomicDMask(const MCInst &Inst); 1318 bool validateMIMGGatherDMask(const MCInst &Inst); 1319 bool validateMIMGDataSize(const MCInst &Inst); 1320 bool validateMIMGAddrSize(const MCInst &Inst); 1321 bool validateMIMGD16(const MCInst &Inst); 1322 bool validateMIMGDim(const MCInst &Inst); 1323 bool validateLdsDirect(const MCInst &Inst); 1324 bool validateOpSel(const MCInst &Inst); 1325 bool validateVccOperand(unsigned Reg) const; 1326 bool validateVOP3Literal(const MCInst &Inst) const; 1327 unsigned getConstantBusLimit(unsigned Opcode) const; 1328 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1329 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1330 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1331 1332 bool isId(const StringRef Id) const; 1333 bool isId(const AsmToken &Token, const StringRef Id) const; 1334 bool isToken(const AsmToken::TokenKind Kind) const; 1335 bool trySkipId(const StringRef Id); 1336 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1337 bool trySkipToken(const AsmToken::TokenKind Kind); 1338 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1339 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1340 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1341 AsmToken::TokenKind getTokenKind() const; 1342 bool parseExpr(int64_t &Imm); 1343 bool parseExpr(OperandVector &Operands); 1344 StringRef getTokenStr() const; 1345 AsmToken peekToken(); 1346 AsmToken getToken() const; 1347 SMLoc getLoc() const; 1348 void lex(); 1349 1350 public: 1351 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1352 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1353 1354 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1355 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1356 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1357 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1358 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1359 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1360 1361 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1362 const unsigned MinVal, 1363 const unsigned MaxVal, 1364 const StringRef ErrMsg); 1365 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1366 bool parseSwizzleOffset(int64_t &Imm); 1367 bool parseSwizzleMacro(int64_t &Imm); 1368 bool parseSwizzleQuadPerm(int64_t &Imm); 1369 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1370 bool parseSwizzleBroadcast(int64_t &Imm); 1371 bool parseSwizzleSwap(int64_t &Imm); 1372 bool parseSwizzleReverse(int64_t &Imm); 1373 1374 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1375 int64_t parseGPRIdxMacro(); 1376 1377 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1378 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1379 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1380 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1381 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1382 1383 AMDGPUOperand::Ptr defaultDLC() const; 1384 AMDGPUOperand::Ptr defaultGLC() const; 1385 AMDGPUOperand::Ptr defaultSLC() const; 1386 1387 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1388 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1389 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1390 AMDGPUOperand::Ptr defaultFlatOffset() const; 1391 1392 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1393 1394 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1395 OptionalImmIndexMap &OptionalIdx); 1396 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1397 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1398 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1399 1400 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1401 1402 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1403 bool IsAtomic = false); 1404 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1405 1406 OperandMatchResultTy parseDim(OperandVector &Operands); 1407 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1408 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1409 AMDGPUOperand::Ptr defaultRowMask() const; 1410 AMDGPUOperand::Ptr defaultBankMask() const; 1411 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1412 AMDGPUOperand::Ptr defaultFI() const; 1413 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1414 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1415 1416 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1417 AMDGPUOperand::ImmTy Type); 1418 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1419 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1420 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1421 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1422 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1423 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1424 uint64_t BasicInstType, bool skipVcc = false); 1425 1426 AMDGPUOperand::Ptr defaultBLGP() const; 1427 AMDGPUOperand::Ptr defaultCBSZ() const; 1428 AMDGPUOperand::Ptr defaultABID() const; 1429 1430 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1431 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1432 }; 1433 1434 struct OptionalOperand { 1435 const char *Name; 1436 AMDGPUOperand::ImmTy Type; 1437 bool IsBit; 1438 bool (*ConvertResult)(int64_t&); 1439 }; 1440 1441 } // end anonymous namespace 1442 1443 // May be called with integer type with equivalent bitwidth. 1444 static const fltSemantics *getFltSemantics(unsigned Size) { 1445 switch (Size) { 1446 case 4: 1447 return &APFloat::IEEEsingle(); 1448 case 8: 1449 return &APFloat::IEEEdouble(); 1450 case 2: 1451 return &APFloat::IEEEhalf(); 1452 default: 1453 llvm_unreachable("unsupported fp type"); 1454 } 1455 } 1456 1457 static const fltSemantics *getFltSemantics(MVT VT) { 1458 return getFltSemantics(VT.getSizeInBits() / 8); 1459 } 1460 1461 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1462 switch (OperandType) { 1463 case AMDGPU::OPERAND_REG_IMM_INT32: 1464 case AMDGPU::OPERAND_REG_IMM_FP32: 1465 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1466 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1467 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1468 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1469 return &APFloat::IEEEsingle(); 1470 case AMDGPU::OPERAND_REG_IMM_INT64: 1471 case AMDGPU::OPERAND_REG_IMM_FP64: 1472 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1473 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1474 return &APFloat::IEEEdouble(); 1475 case AMDGPU::OPERAND_REG_IMM_INT16: 1476 case AMDGPU::OPERAND_REG_IMM_FP16: 1477 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1478 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1479 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1480 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1481 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1482 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1483 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1484 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1485 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1486 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1487 return &APFloat::IEEEhalf(); 1488 default: 1489 llvm_unreachable("unsupported fp type"); 1490 } 1491 } 1492 1493 //===----------------------------------------------------------------------===// 1494 // Operand 1495 //===----------------------------------------------------------------------===// 1496 1497 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1498 bool Lost; 1499 1500 // Convert literal to single precision 1501 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1502 APFloat::rmNearestTiesToEven, 1503 &Lost); 1504 // We allow precision lost but not overflow or underflow 1505 if (Status != APFloat::opOK && 1506 Lost && 1507 ((Status & APFloat::opOverflow) != 0 || 1508 (Status & APFloat::opUnderflow) != 0)) { 1509 return false; 1510 } 1511 1512 return true; 1513 } 1514 1515 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1516 return isUIntN(Size, Val) || isIntN(Size, Val); 1517 } 1518 1519 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1520 1521 // This is a hack to enable named inline values like 1522 // shared_base with both 32-bit and 64-bit operands. 1523 // Note that these values are defined as 1524 // 32-bit operands only. 1525 if (isInlineValue()) { 1526 return true; 1527 } 1528 1529 if (!isImmTy(ImmTyNone)) { 1530 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1531 return false; 1532 } 1533 // TODO: We should avoid using host float here. It would be better to 1534 // check the float bit values which is what a few other places do. 1535 // We've had bot failures before due to weird NaN support on mips hosts. 1536 1537 APInt Literal(64, Imm.Val); 1538 1539 if (Imm.IsFPImm) { // We got fp literal token 1540 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1541 return AMDGPU::isInlinableLiteral64(Imm.Val, 1542 AsmParser->hasInv2PiInlineImm()); 1543 } 1544 1545 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1546 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1547 return false; 1548 1549 if (type.getScalarSizeInBits() == 16) { 1550 return AMDGPU::isInlinableLiteral16( 1551 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1552 AsmParser->hasInv2PiInlineImm()); 1553 } 1554 1555 // Check if single precision literal is inlinable 1556 return AMDGPU::isInlinableLiteral32( 1557 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1558 AsmParser->hasInv2PiInlineImm()); 1559 } 1560 1561 // We got int literal token. 1562 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1563 return AMDGPU::isInlinableLiteral64(Imm.Val, 1564 AsmParser->hasInv2PiInlineImm()); 1565 } 1566 1567 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1568 return false; 1569 } 1570 1571 if (type.getScalarSizeInBits() == 16) { 1572 return AMDGPU::isInlinableLiteral16( 1573 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1574 AsmParser->hasInv2PiInlineImm()); 1575 } 1576 1577 return AMDGPU::isInlinableLiteral32( 1578 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1579 AsmParser->hasInv2PiInlineImm()); 1580 } 1581 1582 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1583 // Check that this immediate can be added as literal 1584 if (!isImmTy(ImmTyNone)) { 1585 return false; 1586 } 1587 1588 if (!Imm.IsFPImm) { 1589 // We got int literal token. 1590 1591 if (type == MVT::f64 && hasFPModifiers()) { 1592 // Cannot apply fp modifiers to int literals preserving the same semantics 1593 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1594 // disable these cases. 1595 return false; 1596 } 1597 1598 unsigned Size = type.getSizeInBits(); 1599 if (Size == 64) 1600 Size = 32; 1601 1602 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1603 // types. 1604 return isSafeTruncation(Imm.Val, Size); 1605 } 1606 1607 // We got fp literal token 1608 if (type == MVT::f64) { // Expected 64-bit fp operand 1609 // We would set low 64-bits of literal to zeroes but we accept this literals 1610 return true; 1611 } 1612 1613 if (type == MVT::i64) { // Expected 64-bit int operand 1614 // We don't allow fp literals in 64-bit integer instructions. It is 1615 // unclear how we should encode them. 1616 return false; 1617 } 1618 1619 // We allow fp literals with f16x2 operands assuming that the specified 1620 // literal goes into the lower half and the upper half is zero. We also 1621 // require that the literal may be losslesly converted to f16. 1622 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1623 (type == MVT::v2i16)? MVT::i16 : type; 1624 1625 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1626 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1627 } 1628 1629 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1630 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1631 } 1632 1633 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1634 if (AsmParser->isVI()) 1635 return isVReg32(); 1636 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1637 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1638 else 1639 return false; 1640 } 1641 1642 bool AMDGPUOperand::isSDWAFP16Operand() const { 1643 return isSDWAOperand(MVT::f16); 1644 } 1645 1646 bool AMDGPUOperand::isSDWAFP32Operand() const { 1647 return isSDWAOperand(MVT::f32); 1648 } 1649 1650 bool AMDGPUOperand::isSDWAInt16Operand() const { 1651 return isSDWAOperand(MVT::i16); 1652 } 1653 1654 bool AMDGPUOperand::isSDWAInt32Operand() const { 1655 return isSDWAOperand(MVT::i32); 1656 } 1657 1658 bool AMDGPUOperand::isBoolReg() const { 1659 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1660 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1661 } 1662 1663 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1664 { 1665 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1666 assert(Size == 2 || Size == 4 || Size == 8); 1667 1668 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1669 1670 if (Imm.Mods.Abs) { 1671 Val &= ~FpSignMask; 1672 } 1673 if (Imm.Mods.Neg) { 1674 Val ^= FpSignMask; 1675 } 1676 1677 return Val; 1678 } 1679 1680 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1681 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1682 Inst.getNumOperands())) { 1683 addLiteralImmOperand(Inst, Imm.Val, 1684 ApplyModifiers & 1685 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1686 } else { 1687 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1688 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1689 } 1690 } 1691 1692 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1693 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1694 auto OpNum = Inst.getNumOperands(); 1695 // Check that this operand accepts literals 1696 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1697 1698 if (ApplyModifiers) { 1699 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1700 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1701 Val = applyInputFPModifiers(Val, Size); 1702 } 1703 1704 APInt Literal(64, Val); 1705 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1706 1707 if (Imm.IsFPImm) { // We got fp literal token 1708 switch (OpTy) { 1709 case AMDGPU::OPERAND_REG_IMM_INT64: 1710 case AMDGPU::OPERAND_REG_IMM_FP64: 1711 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1712 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1713 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1714 AsmParser->hasInv2PiInlineImm())) { 1715 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1716 return; 1717 } 1718 1719 // Non-inlineable 1720 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1721 // For fp operands we check if low 32 bits are zeros 1722 if (Literal.getLoBits(32) != 0) { 1723 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1724 "Can't encode literal as exact 64-bit floating-point operand. " 1725 "Low 32-bits will be set to zero"); 1726 } 1727 1728 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1729 return; 1730 } 1731 1732 // We don't allow fp literals in 64-bit integer instructions. It is 1733 // unclear how we should encode them. This case should be checked earlier 1734 // in predicate methods (isLiteralImm()) 1735 llvm_unreachable("fp literal in 64-bit integer instruction."); 1736 1737 case AMDGPU::OPERAND_REG_IMM_INT32: 1738 case AMDGPU::OPERAND_REG_IMM_FP32: 1739 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1740 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1741 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1742 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1743 case AMDGPU::OPERAND_REG_IMM_INT16: 1744 case AMDGPU::OPERAND_REG_IMM_FP16: 1745 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1746 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1747 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1748 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1750 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1751 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1752 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1753 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1754 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1755 bool lost; 1756 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1757 // Convert literal to single precision 1758 FPLiteral.convert(*getOpFltSemantics(OpTy), 1759 APFloat::rmNearestTiesToEven, &lost); 1760 // We allow precision lost but not overflow or underflow. This should be 1761 // checked earlier in isLiteralImm() 1762 1763 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1764 Inst.addOperand(MCOperand::createImm(ImmVal)); 1765 return; 1766 } 1767 default: 1768 llvm_unreachable("invalid operand size"); 1769 } 1770 1771 return; 1772 } 1773 1774 // We got int literal token. 1775 // Only sign extend inline immediates. 1776 switch (OpTy) { 1777 case AMDGPU::OPERAND_REG_IMM_INT32: 1778 case AMDGPU::OPERAND_REG_IMM_FP32: 1779 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1780 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1781 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1782 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1783 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1784 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1785 if (isSafeTruncation(Val, 32) && 1786 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1787 AsmParser->hasInv2PiInlineImm())) { 1788 Inst.addOperand(MCOperand::createImm(Val)); 1789 return; 1790 } 1791 1792 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1793 return; 1794 1795 case AMDGPU::OPERAND_REG_IMM_INT64: 1796 case AMDGPU::OPERAND_REG_IMM_FP64: 1797 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1798 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1799 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1800 Inst.addOperand(MCOperand::createImm(Val)); 1801 return; 1802 } 1803 1804 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1805 return; 1806 1807 case AMDGPU::OPERAND_REG_IMM_INT16: 1808 case AMDGPU::OPERAND_REG_IMM_FP16: 1809 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1810 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1811 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1812 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1813 if (isSafeTruncation(Val, 16) && 1814 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1815 AsmParser->hasInv2PiInlineImm())) { 1816 Inst.addOperand(MCOperand::createImm(Val)); 1817 return; 1818 } 1819 1820 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1821 return; 1822 1823 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1824 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1825 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1826 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1827 assert(isSafeTruncation(Val, 16)); 1828 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1829 AsmParser->hasInv2PiInlineImm())); 1830 1831 Inst.addOperand(MCOperand::createImm(Val)); 1832 return; 1833 } 1834 default: 1835 llvm_unreachable("invalid operand size"); 1836 } 1837 } 1838 1839 template <unsigned Bitwidth> 1840 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1841 APInt Literal(64, Imm.Val); 1842 1843 if (!Imm.IsFPImm) { 1844 // We got int literal token. 1845 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1846 return; 1847 } 1848 1849 bool Lost; 1850 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1851 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1852 APFloat::rmNearestTiesToEven, &Lost); 1853 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1854 } 1855 1856 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1857 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1858 } 1859 1860 static bool isInlineValue(unsigned Reg) { 1861 switch (Reg) { 1862 case AMDGPU::SRC_SHARED_BASE: 1863 case AMDGPU::SRC_SHARED_LIMIT: 1864 case AMDGPU::SRC_PRIVATE_BASE: 1865 case AMDGPU::SRC_PRIVATE_LIMIT: 1866 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1867 return true; 1868 case AMDGPU::SRC_VCCZ: 1869 case AMDGPU::SRC_EXECZ: 1870 case AMDGPU::SRC_SCC: 1871 return true; 1872 case AMDGPU::SGPR_NULL: 1873 return true; 1874 default: 1875 return false; 1876 } 1877 } 1878 1879 bool AMDGPUOperand::isInlineValue() const { 1880 return isRegKind() && ::isInlineValue(getReg()); 1881 } 1882 1883 //===----------------------------------------------------------------------===// 1884 // AsmParser 1885 //===----------------------------------------------------------------------===// 1886 1887 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1888 if (Is == IS_VGPR) { 1889 switch (RegWidth) { 1890 default: return -1; 1891 case 1: return AMDGPU::VGPR_32RegClassID; 1892 case 2: return AMDGPU::VReg_64RegClassID; 1893 case 3: return AMDGPU::VReg_96RegClassID; 1894 case 4: return AMDGPU::VReg_128RegClassID; 1895 case 5: return AMDGPU::VReg_160RegClassID; 1896 case 8: return AMDGPU::VReg_256RegClassID; 1897 case 16: return AMDGPU::VReg_512RegClassID; 1898 case 32: return AMDGPU::VReg_1024RegClassID; 1899 } 1900 } else if (Is == IS_TTMP) { 1901 switch (RegWidth) { 1902 default: return -1; 1903 case 1: return AMDGPU::TTMP_32RegClassID; 1904 case 2: return AMDGPU::TTMP_64RegClassID; 1905 case 4: return AMDGPU::TTMP_128RegClassID; 1906 case 8: return AMDGPU::TTMP_256RegClassID; 1907 case 16: return AMDGPU::TTMP_512RegClassID; 1908 } 1909 } else if (Is == IS_SGPR) { 1910 switch (RegWidth) { 1911 default: return -1; 1912 case 1: return AMDGPU::SGPR_32RegClassID; 1913 case 2: return AMDGPU::SGPR_64RegClassID; 1914 case 4: return AMDGPU::SGPR_128RegClassID; 1915 case 8: return AMDGPU::SGPR_256RegClassID; 1916 case 16: return AMDGPU::SGPR_512RegClassID; 1917 } 1918 } else if (Is == IS_AGPR) { 1919 switch (RegWidth) { 1920 default: return -1; 1921 case 1: return AMDGPU::AGPR_32RegClassID; 1922 case 2: return AMDGPU::AReg_64RegClassID; 1923 case 4: return AMDGPU::AReg_128RegClassID; 1924 case 16: return AMDGPU::AReg_512RegClassID; 1925 case 32: return AMDGPU::AReg_1024RegClassID; 1926 } 1927 } 1928 return -1; 1929 } 1930 1931 static unsigned getSpecialRegForName(StringRef RegName) { 1932 return StringSwitch<unsigned>(RegName) 1933 .Case("exec", AMDGPU::EXEC) 1934 .Case("vcc", AMDGPU::VCC) 1935 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1936 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1937 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1938 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1939 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1940 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1941 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1942 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1943 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1944 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1945 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1946 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1947 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1948 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1949 .Case("m0", AMDGPU::M0) 1950 .Case("vccz", AMDGPU::SRC_VCCZ) 1951 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1952 .Case("execz", AMDGPU::SRC_EXECZ) 1953 .Case("src_execz", AMDGPU::SRC_EXECZ) 1954 .Case("scc", AMDGPU::SRC_SCC) 1955 .Case("src_scc", AMDGPU::SRC_SCC) 1956 .Case("tba", AMDGPU::TBA) 1957 .Case("tma", AMDGPU::TMA) 1958 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1959 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1960 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1961 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1962 .Case("vcc_lo", AMDGPU::VCC_LO) 1963 .Case("vcc_hi", AMDGPU::VCC_HI) 1964 .Case("exec_lo", AMDGPU::EXEC_LO) 1965 .Case("exec_hi", AMDGPU::EXEC_HI) 1966 .Case("tma_lo", AMDGPU::TMA_LO) 1967 .Case("tma_hi", AMDGPU::TMA_HI) 1968 .Case("tba_lo", AMDGPU::TBA_LO) 1969 .Case("tba_hi", AMDGPU::TBA_HI) 1970 .Case("null", AMDGPU::SGPR_NULL) 1971 .Default(AMDGPU::NoRegister); 1972 } 1973 1974 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1975 SMLoc &EndLoc) { 1976 auto R = parseRegister(); 1977 if (!R) return true; 1978 assert(R->isReg()); 1979 RegNo = R->getReg(); 1980 StartLoc = R->getStartLoc(); 1981 EndLoc = R->getEndLoc(); 1982 return false; 1983 } 1984 1985 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1986 RegisterKind RegKind, unsigned Reg1) { 1987 switch (RegKind) { 1988 case IS_SPECIAL: 1989 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1990 Reg = AMDGPU::EXEC; 1991 RegWidth = 2; 1992 return true; 1993 } 1994 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1995 Reg = AMDGPU::FLAT_SCR; 1996 RegWidth = 2; 1997 return true; 1998 } 1999 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2000 Reg = AMDGPU::XNACK_MASK; 2001 RegWidth = 2; 2002 return true; 2003 } 2004 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2005 Reg = AMDGPU::VCC; 2006 RegWidth = 2; 2007 return true; 2008 } 2009 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2010 Reg = AMDGPU::TBA; 2011 RegWidth = 2; 2012 return true; 2013 } 2014 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2015 Reg = AMDGPU::TMA; 2016 RegWidth = 2; 2017 return true; 2018 } 2019 return false; 2020 case IS_VGPR: 2021 case IS_SGPR: 2022 case IS_AGPR: 2023 case IS_TTMP: 2024 if (Reg1 != Reg + RegWidth) { 2025 return false; 2026 } 2027 RegWidth++; 2028 return true; 2029 default: 2030 llvm_unreachable("unexpected register kind"); 2031 } 2032 } 2033 2034 struct RegInfo { 2035 StringLiteral Name; 2036 RegisterKind Kind; 2037 }; 2038 2039 static constexpr RegInfo RegularRegisters[] = { 2040 {{"v"}, IS_VGPR}, 2041 {{"s"}, IS_SGPR}, 2042 {{"ttmp"}, IS_TTMP}, 2043 {{"acc"}, IS_AGPR}, 2044 {{"a"}, IS_AGPR}, 2045 }; 2046 2047 static bool isRegularReg(RegisterKind Kind) { 2048 return Kind == IS_VGPR || 2049 Kind == IS_SGPR || 2050 Kind == IS_TTMP || 2051 Kind == IS_AGPR; 2052 } 2053 2054 static const RegInfo* getRegularRegInfo(StringRef Str) { 2055 for (const RegInfo &Reg : RegularRegisters) 2056 if (Str.startswith(Reg.Name)) 2057 return &Reg; 2058 return nullptr; 2059 } 2060 2061 static bool getRegNum(StringRef Str, unsigned& Num) { 2062 return !Str.getAsInteger(10, Num); 2063 } 2064 2065 bool 2066 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2067 const AsmToken &NextToken) const { 2068 2069 // A list of consecutive registers: [s0,s1,s2,s3] 2070 if (Token.is(AsmToken::LBrac)) 2071 return true; 2072 2073 if (!Token.is(AsmToken::Identifier)) 2074 return false; 2075 2076 // A single register like s0 or a range of registers like s[0:1] 2077 2078 StringRef Str = Token.getString(); 2079 const RegInfo *Reg = getRegularRegInfo(Str); 2080 if (Reg) { 2081 StringRef RegName = Reg->Name; 2082 StringRef RegSuffix = Str.substr(RegName.size()); 2083 if (!RegSuffix.empty()) { 2084 unsigned Num; 2085 // A single register with an index: rXX 2086 if (getRegNum(RegSuffix, Num)) 2087 return true; 2088 } else { 2089 // A range of registers: r[XX:YY]. 2090 if (NextToken.is(AsmToken::LBrac)) 2091 return true; 2092 } 2093 } 2094 2095 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2096 } 2097 2098 bool 2099 AMDGPUAsmParser::isRegister() 2100 { 2101 return isRegister(getToken(), peekToken()); 2102 } 2103 2104 unsigned 2105 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2106 unsigned RegNum, 2107 unsigned RegWidth) { 2108 2109 assert(isRegularReg(RegKind)); 2110 2111 unsigned AlignSize = 1; 2112 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2113 // SGPR and TTMP registers must be aligned. 2114 // Max required alignment is 4 dwords. 2115 AlignSize = std::min(RegWidth, 4u); 2116 } 2117 2118 if (RegNum % AlignSize != 0) 2119 return AMDGPU::NoRegister; 2120 2121 unsigned RegIdx = RegNum / AlignSize; 2122 int RCID = getRegClass(RegKind, RegWidth); 2123 if (RCID == -1) 2124 return AMDGPU::NoRegister; 2125 2126 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2127 const MCRegisterClass RC = TRI->getRegClass(RCID); 2128 if (RegIdx >= RC.getNumRegs()) 2129 return AMDGPU::NoRegister; 2130 2131 return RC.getRegister(RegIdx); 2132 } 2133 2134 bool 2135 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2136 int64_t RegLo, RegHi; 2137 if (!trySkipToken(AsmToken::LBrac)) 2138 return false; 2139 2140 if (!parseExpr(RegLo)) 2141 return false; 2142 2143 if (trySkipToken(AsmToken::Colon)) { 2144 if (!parseExpr(RegHi)) 2145 return false; 2146 } else { 2147 RegHi = RegLo; 2148 } 2149 2150 if (!trySkipToken(AsmToken::RBrac)) 2151 return false; 2152 2153 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2154 return false; 2155 2156 Num = static_cast<unsigned>(RegLo); 2157 Width = (RegHi - RegLo) + 1; 2158 return true; 2159 } 2160 2161 unsigned 2162 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2163 unsigned &RegNum, 2164 unsigned &RegWidth) { 2165 assert(isToken(AsmToken::Identifier)); 2166 unsigned Reg = getSpecialRegForName(getTokenStr()); 2167 if (Reg) { 2168 RegNum = 0; 2169 RegWidth = 1; 2170 RegKind = IS_SPECIAL; 2171 lex(); // skip register name 2172 } 2173 return Reg; 2174 } 2175 2176 unsigned 2177 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2178 unsigned &RegNum, 2179 unsigned &RegWidth) { 2180 assert(isToken(AsmToken::Identifier)); 2181 StringRef RegName = getTokenStr(); 2182 2183 const RegInfo *RI = getRegularRegInfo(RegName); 2184 if (!RI) 2185 return AMDGPU::NoRegister; 2186 lex(); // skip register name 2187 2188 RegKind = RI->Kind; 2189 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2190 if (!RegSuffix.empty()) { 2191 // Single 32-bit register: vXX. 2192 if (!getRegNum(RegSuffix, RegNum)) 2193 return AMDGPU::NoRegister; 2194 RegWidth = 1; 2195 } else { 2196 // Range of registers: v[XX:YY]. ":YY" is optional. 2197 if (!ParseRegRange(RegNum, RegWidth)) 2198 return AMDGPU::NoRegister; 2199 } 2200 2201 return getRegularReg(RegKind, RegNum, RegWidth); 2202 } 2203 2204 unsigned 2205 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 2206 unsigned &RegNum, 2207 unsigned &RegWidth) { 2208 unsigned Reg = AMDGPU::NoRegister; 2209 2210 if (!trySkipToken(AsmToken::LBrac)) 2211 return AMDGPU::NoRegister; 2212 2213 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2214 2215 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2216 return AMDGPU::NoRegister; 2217 if (RegWidth != 1) 2218 return AMDGPU::NoRegister; 2219 2220 for (; trySkipToken(AsmToken::Comma); ) { 2221 RegisterKind NextRegKind; 2222 unsigned NextReg, NextRegNum, NextRegWidth; 2223 2224 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) 2225 return AMDGPU::NoRegister; 2226 if (NextRegWidth != 1) 2227 return AMDGPU::NoRegister; 2228 if (NextRegKind != RegKind) 2229 return AMDGPU::NoRegister; 2230 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2231 return AMDGPU::NoRegister; 2232 } 2233 2234 if (!trySkipToken(AsmToken::RBrac)) 2235 return AMDGPU::NoRegister; 2236 2237 if (isRegularReg(RegKind)) 2238 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2239 2240 return Reg; 2241 } 2242 2243 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 2244 unsigned &Reg, 2245 unsigned &RegNum, 2246 unsigned &RegWidth) { 2247 Reg = AMDGPU::NoRegister; 2248 2249 if (isToken(AsmToken::Identifier)) { 2250 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); 2251 if (Reg == AMDGPU::NoRegister) 2252 Reg = ParseRegularReg(RegKind, RegNum, RegWidth); 2253 } else { 2254 Reg = ParseRegList(RegKind, RegNum, RegWidth); 2255 } 2256 2257 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2258 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2259 } 2260 2261 Optional<StringRef> 2262 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2263 switch (RegKind) { 2264 case IS_VGPR: 2265 return StringRef(".amdgcn.next_free_vgpr"); 2266 case IS_SGPR: 2267 return StringRef(".amdgcn.next_free_sgpr"); 2268 default: 2269 return None; 2270 } 2271 } 2272 2273 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2274 auto SymbolName = getGprCountSymbolName(RegKind); 2275 assert(SymbolName && "initializing invalid register kind"); 2276 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2277 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2278 } 2279 2280 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2281 unsigned DwordRegIndex, 2282 unsigned RegWidth) { 2283 // Symbols are only defined for GCN targets 2284 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2285 return true; 2286 2287 auto SymbolName = getGprCountSymbolName(RegKind); 2288 if (!SymbolName) 2289 return true; 2290 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2291 2292 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2293 int64_t OldCount; 2294 2295 if (!Sym->isVariable()) 2296 return !Error(getParser().getTok().getLoc(), 2297 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2298 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2299 return !Error( 2300 getParser().getTok().getLoc(), 2301 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2302 2303 if (OldCount <= NewMax) 2304 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2305 2306 return true; 2307 } 2308 2309 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2310 const auto &Tok = Parser.getTok(); 2311 SMLoc StartLoc = Tok.getLoc(); 2312 SMLoc EndLoc = Tok.getEndLoc(); 2313 RegisterKind RegKind; 2314 unsigned Reg, RegNum, RegWidth; 2315 2316 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2317 //FIXME: improve error messages (bug 41303). 2318 Error(StartLoc, "not a valid operand."); 2319 return nullptr; 2320 } 2321 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2322 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2323 return nullptr; 2324 } else 2325 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2326 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2327 } 2328 2329 OperandMatchResultTy 2330 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2331 // TODO: add syntactic sugar for 1/(2*PI) 2332 2333 assert(!isRegister()); 2334 assert(!isModifier()); 2335 2336 const auto& Tok = getToken(); 2337 const auto& NextTok = peekToken(); 2338 bool IsReal = Tok.is(AsmToken::Real); 2339 SMLoc S = getLoc(); 2340 bool Negate = false; 2341 2342 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2343 lex(); 2344 IsReal = true; 2345 Negate = true; 2346 } 2347 2348 if (IsReal) { 2349 // Floating-point expressions are not supported. 2350 // Can only allow floating-point literals with an 2351 // optional sign. 2352 2353 StringRef Num = getTokenStr(); 2354 lex(); 2355 2356 APFloat RealVal(APFloat::IEEEdouble()); 2357 auto roundMode = APFloat::rmNearestTiesToEven; 2358 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2359 return MatchOperand_ParseFail; 2360 } 2361 if (Negate) 2362 RealVal.changeSign(); 2363 2364 Operands.push_back( 2365 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2366 AMDGPUOperand::ImmTyNone, true)); 2367 2368 return MatchOperand_Success; 2369 2370 } else { 2371 int64_t IntVal; 2372 const MCExpr *Expr; 2373 SMLoc S = getLoc(); 2374 2375 if (HasSP3AbsModifier) { 2376 // This is a workaround for handling expressions 2377 // as arguments of SP3 'abs' modifier, for example: 2378 // |1.0| 2379 // |-1| 2380 // |1+x| 2381 // This syntax is not compatible with syntax of standard 2382 // MC expressions (due to the trailing '|'). 2383 SMLoc EndLoc; 2384 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2385 return MatchOperand_ParseFail; 2386 } else { 2387 if (Parser.parseExpression(Expr)) 2388 return MatchOperand_ParseFail; 2389 } 2390 2391 if (Expr->evaluateAsAbsolute(IntVal)) { 2392 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2393 } else { 2394 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2395 } 2396 2397 return MatchOperand_Success; 2398 } 2399 2400 return MatchOperand_NoMatch; 2401 } 2402 2403 OperandMatchResultTy 2404 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2405 if (!isRegister()) 2406 return MatchOperand_NoMatch; 2407 2408 if (auto R = parseRegister()) { 2409 assert(R->isReg()); 2410 Operands.push_back(std::move(R)); 2411 return MatchOperand_Success; 2412 } 2413 return MatchOperand_ParseFail; 2414 } 2415 2416 OperandMatchResultTy 2417 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2418 auto res = parseReg(Operands); 2419 if (res != MatchOperand_NoMatch) { 2420 return res; 2421 } else if (isModifier()) { 2422 return MatchOperand_NoMatch; 2423 } else { 2424 return parseImm(Operands, HasSP3AbsMod); 2425 } 2426 } 2427 2428 bool 2429 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2430 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2431 const auto &str = Token.getString(); 2432 return str == "abs" || str == "neg" || str == "sext"; 2433 } 2434 return false; 2435 } 2436 2437 bool 2438 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2439 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2440 } 2441 2442 bool 2443 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2444 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2445 } 2446 2447 bool 2448 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2449 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2450 } 2451 2452 // Check if this is an operand modifier or an opcode modifier 2453 // which may look like an expression but it is not. We should 2454 // avoid parsing these modifiers as expressions. Currently 2455 // recognized sequences are: 2456 // |...| 2457 // abs(...) 2458 // neg(...) 2459 // sext(...) 2460 // -reg 2461 // -|...| 2462 // -abs(...) 2463 // name:... 2464 // Note that simple opcode modifiers like 'gds' may be parsed as 2465 // expressions; this is a special case. See getExpressionAsToken. 2466 // 2467 bool 2468 AMDGPUAsmParser::isModifier() { 2469 2470 AsmToken Tok = getToken(); 2471 AsmToken NextToken[2]; 2472 peekTokens(NextToken); 2473 2474 return isOperandModifier(Tok, NextToken[0]) || 2475 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2476 isOpcodeModifierWithVal(Tok, NextToken[0]); 2477 } 2478 2479 // Check if the current token is an SP3 'neg' modifier. 2480 // Currently this modifier is allowed in the following context: 2481 // 2482 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2483 // 2. Before an 'abs' modifier: -abs(...) 2484 // 3. Before an SP3 'abs' modifier: -|...| 2485 // 2486 // In all other cases "-" is handled as a part 2487 // of an expression that follows the sign. 2488 // 2489 // Note: When "-" is followed by an integer literal, 2490 // this is interpreted as integer negation rather 2491 // than a floating-point NEG modifier applied to N. 2492 // Beside being contr-intuitive, such use of floating-point 2493 // NEG modifier would have resulted in different meaning 2494 // of integer literals used with VOP1/2/C and VOP3, 2495 // for example: 2496 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2497 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2498 // Negative fp literals with preceding "-" are 2499 // handled likewise for unifomtity 2500 // 2501 bool 2502 AMDGPUAsmParser::parseSP3NegModifier() { 2503 2504 AsmToken NextToken[2]; 2505 peekTokens(NextToken); 2506 2507 if (isToken(AsmToken::Minus) && 2508 (isRegister(NextToken[0], NextToken[1]) || 2509 NextToken[0].is(AsmToken::Pipe) || 2510 isId(NextToken[0], "abs"))) { 2511 lex(); 2512 return true; 2513 } 2514 2515 return false; 2516 } 2517 2518 OperandMatchResultTy 2519 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2520 bool AllowImm) { 2521 bool Neg, SP3Neg; 2522 bool Abs, SP3Abs; 2523 SMLoc Loc; 2524 2525 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2526 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2527 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2528 return MatchOperand_ParseFail; 2529 } 2530 2531 SP3Neg = parseSP3NegModifier(); 2532 2533 Loc = getLoc(); 2534 Neg = trySkipId("neg"); 2535 if (Neg && SP3Neg) { 2536 Error(Loc, "expected register or immediate"); 2537 return MatchOperand_ParseFail; 2538 } 2539 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2540 return MatchOperand_ParseFail; 2541 2542 Abs = trySkipId("abs"); 2543 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2544 return MatchOperand_ParseFail; 2545 2546 Loc = getLoc(); 2547 SP3Abs = trySkipToken(AsmToken::Pipe); 2548 if (Abs && SP3Abs) { 2549 Error(Loc, "expected register or immediate"); 2550 return MatchOperand_ParseFail; 2551 } 2552 2553 OperandMatchResultTy Res; 2554 if (AllowImm) { 2555 Res = parseRegOrImm(Operands, SP3Abs); 2556 } else { 2557 Res = parseReg(Operands); 2558 } 2559 if (Res != MatchOperand_Success) { 2560 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2561 } 2562 2563 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2564 return MatchOperand_ParseFail; 2565 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2566 return MatchOperand_ParseFail; 2567 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2568 return MatchOperand_ParseFail; 2569 2570 AMDGPUOperand::Modifiers Mods; 2571 Mods.Abs = Abs || SP3Abs; 2572 Mods.Neg = Neg || SP3Neg; 2573 2574 if (Mods.hasFPModifiers()) { 2575 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2576 if (Op.isExpr()) { 2577 Error(Op.getStartLoc(), "expected an absolute expression"); 2578 return MatchOperand_ParseFail; 2579 } 2580 Op.setModifiers(Mods); 2581 } 2582 return MatchOperand_Success; 2583 } 2584 2585 OperandMatchResultTy 2586 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2587 bool AllowImm) { 2588 bool Sext = trySkipId("sext"); 2589 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2590 return MatchOperand_ParseFail; 2591 2592 OperandMatchResultTy Res; 2593 if (AllowImm) { 2594 Res = parseRegOrImm(Operands); 2595 } else { 2596 Res = parseReg(Operands); 2597 } 2598 if (Res != MatchOperand_Success) { 2599 return Sext? MatchOperand_ParseFail : Res; 2600 } 2601 2602 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2603 return MatchOperand_ParseFail; 2604 2605 AMDGPUOperand::Modifiers Mods; 2606 Mods.Sext = Sext; 2607 2608 if (Mods.hasIntModifiers()) { 2609 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2610 if (Op.isExpr()) { 2611 Error(Op.getStartLoc(), "expected an absolute expression"); 2612 return MatchOperand_ParseFail; 2613 } 2614 Op.setModifiers(Mods); 2615 } 2616 2617 return MatchOperand_Success; 2618 } 2619 2620 OperandMatchResultTy 2621 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2622 return parseRegOrImmWithFPInputMods(Operands, false); 2623 } 2624 2625 OperandMatchResultTy 2626 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2627 return parseRegOrImmWithIntInputMods(Operands, false); 2628 } 2629 2630 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2631 auto Loc = getLoc(); 2632 if (trySkipId("off")) { 2633 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2634 AMDGPUOperand::ImmTyOff, false)); 2635 return MatchOperand_Success; 2636 } 2637 2638 if (!isRegister()) 2639 return MatchOperand_NoMatch; 2640 2641 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2642 if (Reg) { 2643 Operands.push_back(std::move(Reg)); 2644 return MatchOperand_Success; 2645 } 2646 2647 return MatchOperand_ParseFail; 2648 2649 } 2650 2651 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2652 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2653 2654 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2655 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2656 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2657 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2658 return Match_InvalidOperand; 2659 2660 if ((TSFlags & SIInstrFlags::VOP3) && 2661 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2662 getForcedEncodingSize() != 64) 2663 return Match_PreferE32; 2664 2665 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2666 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2667 // v_mac_f32/16 allow only dst_sel == DWORD; 2668 auto OpNum = 2669 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2670 const auto &Op = Inst.getOperand(OpNum); 2671 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2672 return Match_InvalidOperand; 2673 } 2674 } 2675 2676 return Match_Success; 2677 } 2678 2679 // What asm variants we should check 2680 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2681 if (getForcedEncodingSize() == 32) { 2682 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2683 return makeArrayRef(Variants); 2684 } 2685 2686 if (isForcedVOP3()) { 2687 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2688 return makeArrayRef(Variants); 2689 } 2690 2691 if (isForcedSDWA()) { 2692 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2693 AMDGPUAsmVariants::SDWA9}; 2694 return makeArrayRef(Variants); 2695 } 2696 2697 if (isForcedDPP()) { 2698 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2699 return makeArrayRef(Variants); 2700 } 2701 2702 static const unsigned Variants[] = { 2703 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2704 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2705 }; 2706 2707 return makeArrayRef(Variants); 2708 } 2709 2710 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2711 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2712 const unsigned Num = Desc.getNumImplicitUses(); 2713 for (unsigned i = 0; i < Num; ++i) { 2714 unsigned Reg = Desc.ImplicitUses[i]; 2715 switch (Reg) { 2716 case AMDGPU::FLAT_SCR: 2717 case AMDGPU::VCC: 2718 case AMDGPU::VCC_LO: 2719 case AMDGPU::VCC_HI: 2720 case AMDGPU::M0: 2721 return Reg; 2722 default: 2723 break; 2724 } 2725 } 2726 return AMDGPU::NoRegister; 2727 } 2728 2729 // NB: This code is correct only when used to check constant 2730 // bus limitations because GFX7 support no f16 inline constants. 2731 // Note that there are no cases when a GFX7 opcode violates 2732 // constant bus limitations due to the use of an f16 constant. 2733 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2734 unsigned OpIdx) const { 2735 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2736 2737 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2738 return false; 2739 } 2740 2741 const MCOperand &MO = Inst.getOperand(OpIdx); 2742 2743 int64_t Val = MO.getImm(); 2744 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2745 2746 switch (OpSize) { // expected operand size 2747 case 8: 2748 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2749 case 4: 2750 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2751 case 2: { 2752 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2753 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2754 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2755 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2756 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2757 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2758 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2759 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2760 } else { 2761 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2762 } 2763 } 2764 default: 2765 llvm_unreachable("invalid operand size"); 2766 } 2767 } 2768 2769 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2770 if (!isGFX10()) 2771 return 1; 2772 2773 switch (Opcode) { 2774 // 64-bit shift instructions can use only one scalar value input 2775 case AMDGPU::V_LSHLREV_B64: 2776 case AMDGPU::V_LSHLREV_B64_gfx10: 2777 case AMDGPU::V_LSHL_B64: 2778 case AMDGPU::V_LSHRREV_B64: 2779 case AMDGPU::V_LSHRREV_B64_gfx10: 2780 case AMDGPU::V_LSHR_B64: 2781 case AMDGPU::V_ASHRREV_I64: 2782 case AMDGPU::V_ASHRREV_I64_gfx10: 2783 case AMDGPU::V_ASHR_I64: 2784 return 1; 2785 default: 2786 return 2; 2787 } 2788 } 2789 2790 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2791 const MCOperand &MO = Inst.getOperand(OpIdx); 2792 if (MO.isImm()) { 2793 return !isInlineConstant(Inst, OpIdx); 2794 } else if (MO.isReg()) { 2795 auto Reg = MO.getReg(); 2796 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2797 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2798 } else { 2799 return true; 2800 } 2801 } 2802 2803 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2804 const unsigned Opcode = Inst.getOpcode(); 2805 const MCInstrDesc &Desc = MII.get(Opcode); 2806 unsigned ConstantBusUseCount = 0; 2807 unsigned NumLiterals = 0; 2808 unsigned LiteralSize; 2809 2810 if (Desc.TSFlags & 2811 (SIInstrFlags::VOPC | 2812 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2813 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2814 SIInstrFlags::SDWA)) { 2815 // Check special imm operands (used by madmk, etc) 2816 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2817 ++ConstantBusUseCount; 2818 } 2819 2820 SmallDenseSet<unsigned> SGPRsUsed; 2821 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2822 if (SGPRUsed != AMDGPU::NoRegister) { 2823 SGPRsUsed.insert(SGPRUsed); 2824 ++ConstantBusUseCount; 2825 } 2826 2827 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2828 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2829 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2830 2831 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2832 2833 for (int OpIdx : OpIndices) { 2834 if (OpIdx == -1) break; 2835 2836 const MCOperand &MO = Inst.getOperand(OpIdx); 2837 if (usesConstantBus(Inst, OpIdx)) { 2838 if (MO.isReg()) { 2839 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2840 // Pairs of registers with a partial intersections like these 2841 // s0, s[0:1] 2842 // flat_scratch_lo, flat_scratch 2843 // flat_scratch_lo, flat_scratch_hi 2844 // are theoretically valid but they are disabled anyway. 2845 // Note that this code mimics SIInstrInfo::verifyInstruction 2846 if (!SGPRsUsed.count(Reg)) { 2847 SGPRsUsed.insert(Reg); 2848 ++ConstantBusUseCount; 2849 } 2850 } else { // Expression or a literal 2851 2852 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2853 continue; // special operand like VINTERP attr_chan 2854 2855 // An instruction may use only one literal. 2856 // This has been validated on the previous step. 2857 // See validateVOP3Literal. 2858 // This literal may be used as more than one operand. 2859 // If all these operands are of the same size, 2860 // this literal counts as one scalar value. 2861 // Otherwise it counts as 2 scalar values. 2862 // See "GFX10 Shader Programming", section 3.6.2.3. 2863 2864 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2865 if (Size < 4) Size = 4; 2866 2867 if (NumLiterals == 0) { 2868 NumLiterals = 1; 2869 LiteralSize = Size; 2870 } else if (LiteralSize != Size) { 2871 NumLiterals = 2; 2872 } 2873 } 2874 } 2875 } 2876 } 2877 ConstantBusUseCount += NumLiterals; 2878 2879 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2880 } 2881 2882 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2883 const unsigned Opcode = Inst.getOpcode(); 2884 const MCInstrDesc &Desc = MII.get(Opcode); 2885 2886 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2887 if (DstIdx == -1 || 2888 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2889 return true; 2890 } 2891 2892 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2893 2894 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2895 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2896 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2897 2898 assert(DstIdx != -1); 2899 const MCOperand &Dst = Inst.getOperand(DstIdx); 2900 assert(Dst.isReg()); 2901 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2902 2903 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2904 2905 for (int SrcIdx : SrcIndices) { 2906 if (SrcIdx == -1) break; 2907 const MCOperand &Src = Inst.getOperand(SrcIdx); 2908 if (Src.isReg()) { 2909 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2910 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2911 return false; 2912 } 2913 } 2914 } 2915 2916 return true; 2917 } 2918 2919 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2920 2921 const unsigned Opc = Inst.getOpcode(); 2922 const MCInstrDesc &Desc = MII.get(Opc); 2923 2924 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2925 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2926 assert(ClampIdx != -1); 2927 return Inst.getOperand(ClampIdx).getImm() == 0; 2928 } 2929 2930 return true; 2931 } 2932 2933 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2934 2935 const unsigned Opc = Inst.getOpcode(); 2936 const MCInstrDesc &Desc = MII.get(Opc); 2937 2938 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2939 return true; 2940 2941 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2942 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2943 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2944 2945 assert(VDataIdx != -1); 2946 assert(DMaskIdx != -1); 2947 assert(TFEIdx != -1); 2948 2949 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2950 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2951 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2952 if (DMask == 0) 2953 DMask = 1; 2954 2955 unsigned DataSize = 2956 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2957 if (hasPackedD16()) { 2958 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2959 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2960 DataSize = (DataSize + 1) / 2; 2961 } 2962 2963 return (VDataSize / 4) == DataSize + TFESize; 2964 } 2965 2966 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2967 const unsigned Opc = Inst.getOpcode(); 2968 const MCInstrDesc &Desc = MII.get(Opc); 2969 2970 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2971 return true; 2972 2973 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2974 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2975 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2976 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2977 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2978 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2979 2980 assert(VAddr0Idx != -1); 2981 assert(SrsrcIdx != -1); 2982 assert(DimIdx != -1); 2983 assert(SrsrcIdx > VAddr0Idx); 2984 2985 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2986 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2987 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2988 unsigned VAddrSize = 2989 IsNSA ? SrsrcIdx - VAddr0Idx 2990 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2991 2992 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2993 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2994 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2995 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2996 if (!IsNSA) { 2997 if (AddrSize > 8) 2998 AddrSize = 16; 2999 else if (AddrSize > 4) 3000 AddrSize = 8; 3001 } 3002 3003 return VAddrSize == AddrSize; 3004 } 3005 3006 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3007 3008 const unsigned Opc = Inst.getOpcode(); 3009 const MCInstrDesc &Desc = MII.get(Opc); 3010 3011 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3012 return true; 3013 if (!Desc.mayLoad() || !Desc.mayStore()) 3014 return true; // Not atomic 3015 3016 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3017 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3018 3019 // This is an incomplete check because image_atomic_cmpswap 3020 // may only use 0x3 and 0xf while other atomic operations 3021 // may use 0x1 and 0x3. However these limitations are 3022 // verified when we check that dmask matches dst size. 3023 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3024 } 3025 3026 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3027 3028 const unsigned Opc = Inst.getOpcode(); 3029 const MCInstrDesc &Desc = MII.get(Opc); 3030 3031 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3032 return true; 3033 3034 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3035 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3036 3037 // GATHER4 instructions use dmask in a different fashion compared to 3038 // other MIMG instructions. The only useful DMASK values are 3039 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3040 // (red,red,red,red) etc.) The ISA document doesn't mention 3041 // this. 3042 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3043 } 3044 3045 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3046 3047 const unsigned Opc = Inst.getOpcode(); 3048 const MCInstrDesc &Desc = MII.get(Opc); 3049 3050 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3051 return true; 3052 3053 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3054 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3055 if (isCI() || isSI()) 3056 return false; 3057 } 3058 3059 return true; 3060 } 3061 3062 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3063 const unsigned Opc = Inst.getOpcode(); 3064 const MCInstrDesc &Desc = MII.get(Opc); 3065 3066 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3067 return true; 3068 3069 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3070 if (DimIdx < 0) 3071 return true; 3072 3073 long Imm = Inst.getOperand(DimIdx).getImm(); 3074 if (Imm < 0 || Imm >= 8) 3075 return false; 3076 3077 return true; 3078 } 3079 3080 static bool IsRevOpcode(const unsigned Opcode) 3081 { 3082 switch (Opcode) { 3083 case AMDGPU::V_SUBREV_F32_e32: 3084 case AMDGPU::V_SUBREV_F32_e64: 3085 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3086 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3087 case AMDGPU::V_SUBREV_F32_e32_vi: 3088 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3089 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3090 case AMDGPU::V_SUBREV_F32_e64_vi: 3091 3092 case AMDGPU::V_SUBREV_I32_e32: 3093 case AMDGPU::V_SUBREV_I32_e64: 3094 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3095 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3096 3097 case AMDGPU::V_SUBBREV_U32_e32: 3098 case AMDGPU::V_SUBBREV_U32_e64: 3099 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3100 case AMDGPU::V_SUBBREV_U32_e32_vi: 3101 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3102 case AMDGPU::V_SUBBREV_U32_e64_vi: 3103 3104 case AMDGPU::V_SUBREV_U32_e32: 3105 case AMDGPU::V_SUBREV_U32_e64: 3106 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3107 case AMDGPU::V_SUBREV_U32_e32_vi: 3108 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3109 case AMDGPU::V_SUBREV_U32_e64_vi: 3110 3111 case AMDGPU::V_SUBREV_F16_e32: 3112 case AMDGPU::V_SUBREV_F16_e64: 3113 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3114 case AMDGPU::V_SUBREV_F16_e32_vi: 3115 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3116 case AMDGPU::V_SUBREV_F16_e64_vi: 3117 3118 case AMDGPU::V_SUBREV_U16_e32: 3119 case AMDGPU::V_SUBREV_U16_e64: 3120 case AMDGPU::V_SUBREV_U16_e32_vi: 3121 case AMDGPU::V_SUBREV_U16_e64_vi: 3122 3123 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3124 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3125 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3126 3127 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3128 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3129 3130 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3131 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3132 3133 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3134 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3135 3136 case AMDGPU::V_LSHRREV_B32_e32: 3137 case AMDGPU::V_LSHRREV_B32_e64: 3138 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3139 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3140 case AMDGPU::V_LSHRREV_B32_e32_vi: 3141 case AMDGPU::V_LSHRREV_B32_e64_vi: 3142 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3143 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3144 3145 case AMDGPU::V_ASHRREV_I32_e32: 3146 case AMDGPU::V_ASHRREV_I32_e64: 3147 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3148 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3149 case AMDGPU::V_ASHRREV_I32_e32_vi: 3150 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3151 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3152 case AMDGPU::V_ASHRREV_I32_e64_vi: 3153 3154 case AMDGPU::V_LSHLREV_B32_e32: 3155 case AMDGPU::V_LSHLREV_B32_e64: 3156 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3157 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3158 case AMDGPU::V_LSHLREV_B32_e32_vi: 3159 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3160 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3161 case AMDGPU::V_LSHLREV_B32_e64_vi: 3162 3163 case AMDGPU::V_LSHLREV_B16_e32: 3164 case AMDGPU::V_LSHLREV_B16_e64: 3165 case AMDGPU::V_LSHLREV_B16_e32_vi: 3166 case AMDGPU::V_LSHLREV_B16_e64_vi: 3167 case AMDGPU::V_LSHLREV_B16_gfx10: 3168 3169 case AMDGPU::V_LSHRREV_B16_e32: 3170 case AMDGPU::V_LSHRREV_B16_e64: 3171 case AMDGPU::V_LSHRREV_B16_e32_vi: 3172 case AMDGPU::V_LSHRREV_B16_e64_vi: 3173 case AMDGPU::V_LSHRREV_B16_gfx10: 3174 3175 case AMDGPU::V_ASHRREV_I16_e32: 3176 case AMDGPU::V_ASHRREV_I16_e64: 3177 case AMDGPU::V_ASHRREV_I16_e32_vi: 3178 case AMDGPU::V_ASHRREV_I16_e64_vi: 3179 case AMDGPU::V_ASHRREV_I16_gfx10: 3180 3181 case AMDGPU::V_LSHLREV_B64: 3182 case AMDGPU::V_LSHLREV_B64_gfx10: 3183 case AMDGPU::V_LSHLREV_B64_vi: 3184 3185 case AMDGPU::V_LSHRREV_B64: 3186 case AMDGPU::V_LSHRREV_B64_gfx10: 3187 case AMDGPU::V_LSHRREV_B64_vi: 3188 3189 case AMDGPU::V_ASHRREV_I64: 3190 case AMDGPU::V_ASHRREV_I64_gfx10: 3191 case AMDGPU::V_ASHRREV_I64_vi: 3192 3193 case AMDGPU::V_PK_LSHLREV_B16: 3194 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3195 case AMDGPU::V_PK_LSHLREV_B16_vi: 3196 3197 case AMDGPU::V_PK_LSHRREV_B16: 3198 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3199 case AMDGPU::V_PK_LSHRREV_B16_vi: 3200 case AMDGPU::V_PK_ASHRREV_I16: 3201 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3202 case AMDGPU::V_PK_ASHRREV_I16_vi: 3203 return true; 3204 default: 3205 return false; 3206 } 3207 } 3208 3209 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3210 3211 using namespace SIInstrFlags; 3212 const unsigned Opcode = Inst.getOpcode(); 3213 const MCInstrDesc &Desc = MII.get(Opcode); 3214 3215 // lds_direct register is defined so that it can be used 3216 // with 9-bit operands only. Ignore encodings which do not accept these. 3217 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3218 return true; 3219 3220 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3221 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3222 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3223 3224 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3225 3226 // lds_direct cannot be specified as either src1 or src2. 3227 for (int SrcIdx : SrcIndices) { 3228 if (SrcIdx == -1) break; 3229 const MCOperand &Src = Inst.getOperand(SrcIdx); 3230 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3231 return false; 3232 } 3233 } 3234 3235 if (Src0Idx == -1) 3236 return true; 3237 3238 const MCOperand &Src = Inst.getOperand(Src0Idx); 3239 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3240 return true; 3241 3242 // lds_direct is specified as src0. Check additional limitations. 3243 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3244 } 3245 3246 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3247 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3248 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3249 if (Op.isFlatOffset()) 3250 return Op.getStartLoc(); 3251 } 3252 return getLoc(); 3253 } 3254 3255 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3256 const OperandVector &Operands) { 3257 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3258 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3259 return true; 3260 3261 auto Opcode = Inst.getOpcode(); 3262 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3263 assert(OpNum != -1); 3264 3265 const auto &Op = Inst.getOperand(OpNum); 3266 if (!hasFlatOffsets() && Op.getImm() != 0) { 3267 Error(getFlatOffsetLoc(Operands), 3268 "flat offset modifier is not supported on this GPU"); 3269 return false; 3270 } 3271 3272 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3273 // For FLAT segment the offset must be positive; 3274 // MSB is ignored and forced to zero. 3275 unsigned OffsetSize = isGFX9() ? 13 : 12; 3276 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3277 if (!isIntN(OffsetSize, Op.getImm())) { 3278 Error(getFlatOffsetLoc(Operands), 3279 isGFX9() ? "expected a 13-bit signed offset" : 3280 "expected a 12-bit signed offset"); 3281 return false; 3282 } 3283 } else { 3284 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3285 Error(getFlatOffsetLoc(Operands), 3286 isGFX9() ? "expected a 12-bit unsigned offset" : 3287 "expected an 11-bit unsigned offset"); 3288 return false; 3289 } 3290 } 3291 3292 return true; 3293 } 3294 3295 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3296 unsigned Opcode = Inst.getOpcode(); 3297 const MCInstrDesc &Desc = MII.get(Opcode); 3298 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3299 return true; 3300 3301 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3302 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3303 3304 const int OpIndices[] = { Src0Idx, Src1Idx }; 3305 3306 unsigned NumExprs = 0; 3307 unsigned NumLiterals = 0; 3308 uint32_t LiteralValue; 3309 3310 for (int OpIdx : OpIndices) { 3311 if (OpIdx == -1) break; 3312 3313 const MCOperand &MO = Inst.getOperand(OpIdx); 3314 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3315 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3316 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3317 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3318 if (NumLiterals == 0 || LiteralValue != Value) { 3319 LiteralValue = Value; 3320 ++NumLiterals; 3321 } 3322 } else if (MO.isExpr()) { 3323 ++NumExprs; 3324 } 3325 } 3326 } 3327 3328 return NumLiterals + NumExprs <= 1; 3329 } 3330 3331 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3332 const unsigned Opc = Inst.getOpcode(); 3333 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3334 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3335 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3336 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3337 3338 if (OpSel & ~3) 3339 return false; 3340 } 3341 return true; 3342 } 3343 3344 // Check if VCC register matches wavefront size 3345 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3346 auto FB = getFeatureBits(); 3347 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3348 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3349 } 3350 3351 // VOP3 literal is only allowed in GFX10+ and only one can be used 3352 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3353 unsigned Opcode = Inst.getOpcode(); 3354 const MCInstrDesc &Desc = MII.get(Opcode); 3355 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3356 return true; 3357 3358 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3359 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3360 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3361 3362 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3363 3364 unsigned NumExprs = 0; 3365 unsigned NumLiterals = 0; 3366 uint32_t LiteralValue; 3367 3368 for (int OpIdx : OpIndices) { 3369 if (OpIdx == -1) break; 3370 3371 const MCOperand &MO = Inst.getOperand(OpIdx); 3372 if (!MO.isImm() && !MO.isExpr()) 3373 continue; 3374 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3375 continue; 3376 3377 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3378 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3379 return false; 3380 3381 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3382 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3383 if (NumLiterals == 0 || LiteralValue != Value) { 3384 LiteralValue = Value; 3385 ++NumLiterals; 3386 } 3387 } else if (MO.isExpr()) { 3388 ++NumExprs; 3389 } 3390 } 3391 NumLiterals += NumExprs; 3392 3393 return !NumLiterals || 3394 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3395 } 3396 3397 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3398 const SMLoc &IDLoc, 3399 const OperandVector &Operands) { 3400 if (!validateLdsDirect(Inst)) { 3401 Error(IDLoc, 3402 "invalid use of lds_direct"); 3403 return false; 3404 } 3405 if (!validateSOPLiteral(Inst)) { 3406 Error(IDLoc, 3407 "only one literal operand is allowed"); 3408 return false; 3409 } 3410 if (!validateVOP3Literal(Inst)) { 3411 Error(IDLoc, 3412 "invalid literal operand"); 3413 return false; 3414 } 3415 if (!validateConstantBusLimitations(Inst)) { 3416 Error(IDLoc, 3417 "invalid operand (violates constant bus restrictions)"); 3418 return false; 3419 } 3420 if (!validateEarlyClobberLimitations(Inst)) { 3421 Error(IDLoc, 3422 "destination must be different than all sources"); 3423 return false; 3424 } 3425 if (!validateIntClampSupported(Inst)) { 3426 Error(IDLoc, 3427 "integer clamping is not supported on this GPU"); 3428 return false; 3429 } 3430 if (!validateOpSel(Inst)) { 3431 Error(IDLoc, 3432 "invalid op_sel operand"); 3433 return false; 3434 } 3435 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3436 if (!validateMIMGD16(Inst)) { 3437 Error(IDLoc, 3438 "d16 modifier is not supported on this GPU"); 3439 return false; 3440 } 3441 if (!validateMIMGDim(Inst)) { 3442 Error(IDLoc, "dim modifier is required on this GPU"); 3443 return false; 3444 } 3445 if (!validateMIMGDataSize(Inst)) { 3446 Error(IDLoc, 3447 "image data size does not match dmask and tfe"); 3448 return false; 3449 } 3450 if (!validateMIMGAddrSize(Inst)) { 3451 Error(IDLoc, 3452 "image address size does not match dim and a16"); 3453 return false; 3454 } 3455 if (!validateMIMGAtomicDMask(Inst)) { 3456 Error(IDLoc, 3457 "invalid atomic image dmask"); 3458 return false; 3459 } 3460 if (!validateMIMGGatherDMask(Inst)) { 3461 Error(IDLoc, 3462 "invalid image_gather dmask: only one bit must be set"); 3463 return false; 3464 } 3465 if (!validateFlatOffset(Inst, Operands)) { 3466 return false; 3467 } 3468 3469 return true; 3470 } 3471 3472 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3473 const FeatureBitset &FBS, 3474 unsigned VariantID = 0); 3475 3476 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3477 OperandVector &Operands, 3478 MCStreamer &Out, 3479 uint64_t &ErrorInfo, 3480 bool MatchingInlineAsm) { 3481 MCInst Inst; 3482 unsigned Result = Match_Success; 3483 for (auto Variant : getMatchedVariants()) { 3484 uint64_t EI; 3485 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3486 Variant); 3487 // We order match statuses from least to most specific. We use most specific 3488 // status as resulting 3489 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3490 if ((R == Match_Success) || 3491 (R == Match_PreferE32) || 3492 (R == Match_MissingFeature && Result != Match_PreferE32) || 3493 (R == Match_InvalidOperand && Result != Match_MissingFeature 3494 && Result != Match_PreferE32) || 3495 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3496 && Result != Match_MissingFeature 3497 && Result != Match_PreferE32)) { 3498 Result = R; 3499 ErrorInfo = EI; 3500 } 3501 if (R == Match_Success) 3502 break; 3503 } 3504 3505 switch (Result) { 3506 default: break; 3507 case Match_Success: 3508 if (!validateInstruction(Inst, IDLoc, Operands)) { 3509 return true; 3510 } 3511 Inst.setLoc(IDLoc); 3512 Out.EmitInstruction(Inst, getSTI()); 3513 return false; 3514 3515 case Match_MissingFeature: 3516 return Error(IDLoc, "instruction not supported on this GPU"); 3517 3518 case Match_MnemonicFail: { 3519 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3520 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3521 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3522 return Error(IDLoc, "invalid instruction" + Suggestion, 3523 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3524 } 3525 3526 case Match_InvalidOperand: { 3527 SMLoc ErrorLoc = IDLoc; 3528 if (ErrorInfo != ~0ULL) { 3529 if (ErrorInfo >= Operands.size()) { 3530 return Error(IDLoc, "too few operands for instruction"); 3531 } 3532 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3533 if (ErrorLoc == SMLoc()) 3534 ErrorLoc = IDLoc; 3535 } 3536 return Error(ErrorLoc, "invalid operand for instruction"); 3537 } 3538 3539 case Match_PreferE32: 3540 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3541 "should be encoded as e32"); 3542 } 3543 llvm_unreachable("Implement any new match types added!"); 3544 } 3545 3546 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3547 int64_t Tmp = -1; 3548 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3549 return true; 3550 } 3551 if (getParser().parseAbsoluteExpression(Tmp)) { 3552 return true; 3553 } 3554 Ret = static_cast<uint32_t>(Tmp); 3555 return false; 3556 } 3557 3558 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3559 uint32_t &Minor) { 3560 if (ParseAsAbsoluteExpression(Major)) 3561 return TokError("invalid major version"); 3562 3563 if (getLexer().isNot(AsmToken::Comma)) 3564 return TokError("minor version number required, comma expected"); 3565 Lex(); 3566 3567 if (ParseAsAbsoluteExpression(Minor)) 3568 return TokError("invalid minor version"); 3569 3570 return false; 3571 } 3572 3573 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3574 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3575 return TokError("directive only supported for amdgcn architecture"); 3576 3577 std::string Target; 3578 3579 SMLoc TargetStart = getTok().getLoc(); 3580 if (getParser().parseEscapedString(Target)) 3581 return true; 3582 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3583 3584 std::string ExpectedTarget; 3585 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3586 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3587 3588 if (Target != ExpectedTargetOS.str()) 3589 return getParser().Error(TargetRange.Start, "target must match options", 3590 TargetRange); 3591 3592 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3593 return false; 3594 } 3595 3596 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3597 return getParser().Error(Range.Start, "value out of range", Range); 3598 } 3599 3600 bool AMDGPUAsmParser::calculateGPRBlocks( 3601 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3602 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3603 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3604 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3605 // TODO(scott.linder): These calculations are duplicated from 3606 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3607 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3608 3609 unsigned NumVGPRs = NextFreeVGPR; 3610 unsigned NumSGPRs = NextFreeSGPR; 3611 3612 if (Version.Major >= 10) 3613 NumSGPRs = 0; 3614 else { 3615 unsigned MaxAddressableNumSGPRs = 3616 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3617 3618 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3619 NumSGPRs > MaxAddressableNumSGPRs) 3620 return OutOfRangeError(SGPRRange); 3621 3622 NumSGPRs += 3623 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3624 3625 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3626 NumSGPRs > MaxAddressableNumSGPRs) 3627 return OutOfRangeError(SGPRRange); 3628 3629 if (Features.test(FeatureSGPRInitBug)) 3630 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3631 } 3632 3633 VGPRBlocks = 3634 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3635 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3636 3637 return false; 3638 } 3639 3640 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3641 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3642 return TokError("directive only supported for amdgcn architecture"); 3643 3644 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3645 return TokError("directive only supported for amdhsa OS"); 3646 3647 StringRef KernelName; 3648 if (getParser().parseIdentifier(KernelName)) 3649 return true; 3650 3651 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3652 3653 StringSet<> Seen; 3654 3655 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3656 3657 SMRange VGPRRange; 3658 uint64_t NextFreeVGPR = 0; 3659 SMRange SGPRRange; 3660 uint64_t NextFreeSGPR = 0; 3661 unsigned UserSGPRCount = 0; 3662 bool ReserveVCC = true; 3663 bool ReserveFlatScr = true; 3664 bool ReserveXNACK = hasXNACK(); 3665 Optional<bool> EnableWavefrontSize32; 3666 3667 while (true) { 3668 while (getLexer().is(AsmToken::EndOfStatement)) 3669 Lex(); 3670 3671 if (getLexer().isNot(AsmToken::Identifier)) 3672 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3673 3674 StringRef ID = getTok().getIdentifier(); 3675 SMRange IDRange = getTok().getLocRange(); 3676 Lex(); 3677 3678 if (ID == ".end_amdhsa_kernel") 3679 break; 3680 3681 if (Seen.find(ID) != Seen.end()) 3682 return TokError(".amdhsa_ directives cannot be repeated"); 3683 Seen.insert(ID); 3684 3685 SMLoc ValStart = getTok().getLoc(); 3686 int64_t IVal; 3687 if (getParser().parseAbsoluteExpression(IVal)) 3688 return true; 3689 SMLoc ValEnd = getTok().getLoc(); 3690 SMRange ValRange = SMRange(ValStart, ValEnd); 3691 3692 if (IVal < 0) 3693 return OutOfRangeError(ValRange); 3694 3695 uint64_t Val = IVal; 3696 3697 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3698 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3699 return OutOfRangeError(RANGE); \ 3700 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3701 3702 if (ID == ".amdhsa_group_segment_fixed_size") { 3703 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3704 return OutOfRangeError(ValRange); 3705 KD.group_segment_fixed_size = Val; 3706 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3707 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3708 return OutOfRangeError(ValRange); 3709 KD.private_segment_fixed_size = Val; 3710 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3711 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3712 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3713 Val, ValRange); 3714 if (Val) 3715 UserSGPRCount += 4; 3716 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3717 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3718 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3719 ValRange); 3720 if (Val) 3721 UserSGPRCount += 2; 3722 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3723 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3724 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3725 ValRange); 3726 if (Val) 3727 UserSGPRCount += 2; 3728 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3729 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3730 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3731 Val, ValRange); 3732 if (Val) 3733 UserSGPRCount += 2; 3734 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3735 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3736 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3737 ValRange); 3738 if (Val) 3739 UserSGPRCount += 2; 3740 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3741 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3742 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3743 ValRange); 3744 if (Val) 3745 UserSGPRCount += 2; 3746 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3747 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3748 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3749 Val, ValRange); 3750 if (Val) 3751 UserSGPRCount += 1; 3752 } else if (ID == ".amdhsa_wavefront_size32") { 3753 if (IVersion.Major < 10) 3754 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3755 IDRange); 3756 EnableWavefrontSize32 = Val; 3757 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3758 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3759 Val, ValRange); 3760 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3761 PARSE_BITS_ENTRY( 3762 KD.compute_pgm_rsrc2, 3763 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3764 ValRange); 3765 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3766 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3767 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3768 ValRange); 3769 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3771 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3772 ValRange); 3773 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3775 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3776 ValRange); 3777 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3779 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3780 ValRange); 3781 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3783 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3784 ValRange); 3785 } else if (ID == ".amdhsa_next_free_vgpr") { 3786 VGPRRange = ValRange; 3787 NextFreeVGPR = Val; 3788 } else if (ID == ".amdhsa_next_free_sgpr") { 3789 SGPRRange = ValRange; 3790 NextFreeSGPR = Val; 3791 } else if (ID == ".amdhsa_reserve_vcc") { 3792 if (!isUInt<1>(Val)) 3793 return OutOfRangeError(ValRange); 3794 ReserveVCC = Val; 3795 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3796 if (IVersion.Major < 7) 3797 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3798 IDRange); 3799 if (!isUInt<1>(Val)) 3800 return OutOfRangeError(ValRange); 3801 ReserveFlatScr = Val; 3802 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3803 if (IVersion.Major < 8) 3804 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3805 IDRange); 3806 if (!isUInt<1>(Val)) 3807 return OutOfRangeError(ValRange); 3808 ReserveXNACK = Val; 3809 } else if (ID == ".amdhsa_float_round_mode_32") { 3810 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3811 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3812 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3813 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3814 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3815 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3816 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3817 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3818 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3819 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3820 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3821 ValRange); 3822 } else if (ID == ".amdhsa_dx10_clamp") { 3823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3824 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3825 } else if (ID == ".amdhsa_ieee_mode") { 3826 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3827 Val, ValRange); 3828 } else if (ID == ".amdhsa_fp16_overflow") { 3829 if (IVersion.Major < 9) 3830 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3831 IDRange); 3832 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3833 ValRange); 3834 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3835 if (IVersion.Major < 10) 3836 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3837 IDRange); 3838 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3839 ValRange); 3840 } else if (ID == ".amdhsa_memory_ordered") { 3841 if (IVersion.Major < 10) 3842 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3843 IDRange); 3844 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3845 ValRange); 3846 } else if (ID == ".amdhsa_forward_progress") { 3847 if (IVersion.Major < 10) 3848 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3849 IDRange); 3850 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3851 ValRange); 3852 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3853 PARSE_BITS_ENTRY( 3854 KD.compute_pgm_rsrc2, 3855 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3856 ValRange); 3857 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3858 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3859 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3860 Val, ValRange); 3861 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3862 PARSE_BITS_ENTRY( 3863 KD.compute_pgm_rsrc2, 3864 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3865 ValRange); 3866 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3867 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3868 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3869 Val, ValRange); 3870 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3871 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3873 Val, ValRange); 3874 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3875 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3876 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3877 Val, ValRange); 3878 } else if (ID == ".amdhsa_exception_int_div_zero") { 3879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3880 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3881 Val, ValRange); 3882 } else { 3883 return getParser().Error(IDRange.Start, 3884 "unknown .amdhsa_kernel directive", IDRange); 3885 } 3886 3887 #undef PARSE_BITS_ENTRY 3888 } 3889 3890 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3891 return TokError(".amdhsa_next_free_vgpr directive is required"); 3892 3893 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3894 return TokError(".amdhsa_next_free_sgpr directive is required"); 3895 3896 unsigned VGPRBlocks; 3897 unsigned SGPRBlocks; 3898 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3899 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3900 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3901 SGPRBlocks)) 3902 return true; 3903 3904 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3905 VGPRBlocks)) 3906 return OutOfRangeError(VGPRRange); 3907 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3908 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3909 3910 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3911 SGPRBlocks)) 3912 return OutOfRangeError(SGPRRange); 3913 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3914 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3915 SGPRBlocks); 3916 3917 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3918 return TokError("too many user SGPRs enabled"); 3919 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3920 UserSGPRCount); 3921 3922 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3923 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3924 ReserveFlatScr, ReserveXNACK); 3925 return false; 3926 } 3927 3928 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3929 uint32_t Major; 3930 uint32_t Minor; 3931 3932 if (ParseDirectiveMajorMinor(Major, Minor)) 3933 return true; 3934 3935 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3936 return false; 3937 } 3938 3939 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3940 uint32_t Major; 3941 uint32_t Minor; 3942 uint32_t Stepping; 3943 StringRef VendorName; 3944 StringRef ArchName; 3945 3946 // If this directive has no arguments, then use the ISA version for the 3947 // targeted GPU. 3948 if (getLexer().is(AsmToken::EndOfStatement)) { 3949 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3950 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3951 ISA.Stepping, 3952 "AMD", "AMDGPU"); 3953 return false; 3954 } 3955 3956 if (ParseDirectiveMajorMinor(Major, Minor)) 3957 return true; 3958 3959 if (getLexer().isNot(AsmToken::Comma)) 3960 return TokError("stepping version number required, comma expected"); 3961 Lex(); 3962 3963 if (ParseAsAbsoluteExpression(Stepping)) 3964 return TokError("invalid stepping version"); 3965 3966 if (getLexer().isNot(AsmToken::Comma)) 3967 return TokError("vendor name required, comma expected"); 3968 Lex(); 3969 3970 if (getLexer().isNot(AsmToken::String)) 3971 return TokError("invalid vendor name"); 3972 3973 VendorName = getLexer().getTok().getStringContents(); 3974 Lex(); 3975 3976 if (getLexer().isNot(AsmToken::Comma)) 3977 return TokError("arch name required, comma expected"); 3978 Lex(); 3979 3980 if (getLexer().isNot(AsmToken::String)) 3981 return TokError("invalid arch name"); 3982 3983 ArchName = getLexer().getTok().getStringContents(); 3984 Lex(); 3985 3986 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3987 VendorName, ArchName); 3988 return false; 3989 } 3990 3991 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3992 amd_kernel_code_t &Header) { 3993 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3994 // assembly for backwards compatibility. 3995 if (ID == "max_scratch_backing_memory_byte_size") { 3996 Parser.eatToEndOfStatement(); 3997 return false; 3998 } 3999 4000 SmallString<40> ErrStr; 4001 raw_svector_ostream Err(ErrStr); 4002 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4003 return TokError(Err.str()); 4004 } 4005 Lex(); 4006 4007 if (ID == "enable_wavefront_size32") { 4008 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4009 if (!isGFX10()) 4010 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4011 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4012 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4013 } else { 4014 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4015 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4016 } 4017 } 4018 4019 if (ID == "wavefront_size") { 4020 if (Header.wavefront_size == 5) { 4021 if (!isGFX10()) 4022 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4023 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4024 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4025 } else if (Header.wavefront_size == 6) { 4026 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4027 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4028 } 4029 } 4030 4031 if (ID == "enable_wgp_mode") { 4032 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4033 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4034 } 4035 4036 if (ID == "enable_mem_ordered") { 4037 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4038 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4039 } 4040 4041 if (ID == "enable_fwd_progress") { 4042 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4043 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4044 } 4045 4046 return false; 4047 } 4048 4049 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4050 amd_kernel_code_t Header; 4051 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4052 4053 while (true) { 4054 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4055 // will set the current token to EndOfStatement. 4056 while(getLexer().is(AsmToken::EndOfStatement)) 4057 Lex(); 4058 4059 if (getLexer().isNot(AsmToken::Identifier)) 4060 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4061 4062 StringRef ID = getLexer().getTok().getIdentifier(); 4063 Lex(); 4064 4065 if (ID == ".end_amd_kernel_code_t") 4066 break; 4067 4068 if (ParseAMDKernelCodeTValue(ID, Header)) 4069 return true; 4070 } 4071 4072 getTargetStreamer().EmitAMDKernelCodeT(Header); 4073 4074 return false; 4075 } 4076 4077 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4078 if (getLexer().isNot(AsmToken::Identifier)) 4079 return TokError("expected symbol name"); 4080 4081 StringRef KernelName = Parser.getTok().getString(); 4082 4083 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4084 ELF::STT_AMDGPU_HSA_KERNEL); 4085 Lex(); 4086 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4087 KernelScope.initialize(getContext()); 4088 return false; 4089 } 4090 4091 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4092 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4093 return Error(getParser().getTok().getLoc(), 4094 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4095 "architectures"); 4096 } 4097 4098 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4099 4100 std::string ISAVersionStringFromSTI; 4101 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4102 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4103 4104 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4105 return Error(getParser().getTok().getLoc(), 4106 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4107 "arguments specified through the command line"); 4108 } 4109 4110 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4111 Lex(); 4112 4113 return false; 4114 } 4115 4116 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4117 const char *AssemblerDirectiveBegin; 4118 const char *AssemblerDirectiveEnd; 4119 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4120 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4121 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4122 HSAMD::V3::AssemblerDirectiveEnd) 4123 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4124 HSAMD::AssemblerDirectiveEnd); 4125 4126 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4127 return Error(getParser().getTok().getLoc(), 4128 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4129 "not available on non-amdhsa OSes")).str()); 4130 } 4131 4132 std::string HSAMetadataString; 4133 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4134 HSAMetadataString)) 4135 return true; 4136 4137 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4138 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4139 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4140 } else { 4141 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4142 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4143 } 4144 4145 return false; 4146 } 4147 4148 /// Common code to parse out a block of text (typically YAML) between start and 4149 /// end directives. 4150 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4151 const char *AssemblerDirectiveEnd, 4152 std::string &CollectString) { 4153 4154 raw_string_ostream CollectStream(CollectString); 4155 4156 getLexer().setSkipSpace(false); 4157 4158 bool FoundEnd = false; 4159 while (!getLexer().is(AsmToken::Eof)) { 4160 while (getLexer().is(AsmToken::Space)) { 4161 CollectStream << getLexer().getTok().getString(); 4162 Lex(); 4163 } 4164 4165 if (getLexer().is(AsmToken::Identifier)) { 4166 StringRef ID = getLexer().getTok().getIdentifier(); 4167 if (ID == AssemblerDirectiveEnd) { 4168 Lex(); 4169 FoundEnd = true; 4170 break; 4171 } 4172 } 4173 4174 CollectStream << Parser.parseStringToEndOfStatement() 4175 << getContext().getAsmInfo()->getSeparatorString(); 4176 4177 Parser.eatToEndOfStatement(); 4178 } 4179 4180 getLexer().setSkipSpace(true); 4181 4182 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4183 return TokError(Twine("expected directive ") + 4184 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4185 } 4186 4187 CollectStream.flush(); 4188 return false; 4189 } 4190 4191 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4192 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4193 std::string String; 4194 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4195 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4196 return true; 4197 4198 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4199 if (!PALMetadata->setFromString(String)) 4200 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4201 return false; 4202 } 4203 4204 /// Parse the assembler directive for old linear-format PAL metadata. 4205 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4206 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4207 return Error(getParser().getTok().getLoc(), 4208 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4209 "not available on non-amdpal OSes")).str()); 4210 } 4211 4212 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4213 PALMetadata->setLegacy(); 4214 for (;;) { 4215 uint32_t Key, Value; 4216 if (ParseAsAbsoluteExpression(Key)) { 4217 return TokError(Twine("invalid value in ") + 4218 Twine(PALMD::AssemblerDirective)); 4219 } 4220 if (getLexer().isNot(AsmToken::Comma)) { 4221 return TokError(Twine("expected an even number of values in ") + 4222 Twine(PALMD::AssemblerDirective)); 4223 } 4224 Lex(); 4225 if (ParseAsAbsoluteExpression(Value)) { 4226 return TokError(Twine("invalid value in ") + 4227 Twine(PALMD::AssemblerDirective)); 4228 } 4229 PALMetadata->setRegister(Key, Value); 4230 if (getLexer().isNot(AsmToken::Comma)) 4231 break; 4232 Lex(); 4233 } 4234 return false; 4235 } 4236 4237 /// ParseDirectiveAMDGPULDS 4238 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4239 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4240 if (getParser().checkForValidSection()) 4241 return true; 4242 4243 StringRef Name; 4244 SMLoc NameLoc = getLexer().getLoc(); 4245 if (getParser().parseIdentifier(Name)) 4246 return TokError("expected identifier in directive"); 4247 4248 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4249 if (parseToken(AsmToken::Comma, "expected ','")) 4250 return true; 4251 4252 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4253 4254 int64_t Size; 4255 SMLoc SizeLoc = getLexer().getLoc(); 4256 if (getParser().parseAbsoluteExpression(Size)) 4257 return true; 4258 if (Size < 0) 4259 return Error(SizeLoc, "size must be non-negative"); 4260 if (Size > LocalMemorySize) 4261 return Error(SizeLoc, "size is too large"); 4262 4263 int64_t Align = 4; 4264 if (getLexer().is(AsmToken::Comma)) { 4265 Lex(); 4266 SMLoc AlignLoc = getLexer().getLoc(); 4267 if (getParser().parseAbsoluteExpression(Align)) 4268 return true; 4269 if (Align < 0 || !isPowerOf2_64(Align)) 4270 return Error(AlignLoc, "alignment must be a power of two"); 4271 4272 // Alignment larger than the size of LDS is possible in theory, as long 4273 // as the linker manages to place to symbol at address 0, but we do want 4274 // to make sure the alignment fits nicely into a 32-bit integer. 4275 if (Align >= 1u << 31) 4276 return Error(AlignLoc, "alignment is too large"); 4277 } 4278 4279 if (parseToken(AsmToken::EndOfStatement, 4280 "unexpected token in '.amdgpu_lds' directive")) 4281 return true; 4282 4283 Symbol->redefineIfPossible(); 4284 if (!Symbol->isUndefined()) 4285 return Error(NameLoc, "invalid symbol redefinition"); 4286 4287 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4288 return false; 4289 } 4290 4291 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4292 StringRef IDVal = DirectiveID.getString(); 4293 4294 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4295 if (IDVal == ".amdgcn_target") 4296 return ParseDirectiveAMDGCNTarget(); 4297 4298 if (IDVal == ".amdhsa_kernel") 4299 return ParseDirectiveAMDHSAKernel(); 4300 4301 // TODO: Restructure/combine with PAL metadata directive. 4302 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4303 return ParseDirectiveHSAMetadata(); 4304 } else { 4305 if (IDVal == ".hsa_code_object_version") 4306 return ParseDirectiveHSACodeObjectVersion(); 4307 4308 if (IDVal == ".hsa_code_object_isa") 4309 return ParseDirectiveHSACodeObjectISA(); 4310 4311 if (IDVal == ".amd_kernel_code_t") 4312 return ParseDirectiveAMDKernelCodeT(); 4313 4314 if (IDVal == ".amdgpu_hsa_kernel") 4315 return ParseDirectiveAMDGPUHsaKernel(); 4316 4317 if (IDVal == ".amd_amdgpu_isa") 4318 return ParseDirectiveISAVersion(); 4319 4320 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4321 return ParseDirectiveHSAMetadata(); 4322 } 4323 4324 if (IDVal == ".amdgpu_lds") 4325 return ParseDirectiveAMDGPULDS(); 4326 4327 if (IDVal == PALMD::AssemblerDirectiveBegin) 4328 return ParseDirectivePALMetadataBegin(); 4329 4330 if (IDVal == PALMD::AssemblerDirective) 4331 return ParseDirectivePALMetadata(); 4332 4333 return true; 4334 } 4335 4336 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4337 unsigned RegNo) const { 4338 4339 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4340 R.isValid(); ++R) { 4341 if (*R == RegNo) 4342 return isGFX9() || isGFX10(); 4343 } 4344 4345 // GFX10 has 2 more SGPRs 104 and 105. 4346 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4347 R.isValid(); ++R) { 4348 if (*R == RegNo) 4349 return hasSGPR104_SGPR105(); 4350 } 4351 4352 switch (RegNo) { 4353 case AMDGPU::SRC_SHARED_BASE: 4354 case AMDGPU::SRC_SHARED_LIMIT: 4355 case AMDGPU::SRC_PRIVATE_BASE: 4356 case AMDGPU::SRC_PRIVATE_LIMIT: 4357 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4358 return !isCI() && !isSI() && !isVI(); 4359 case AMDGPU::TBA: 4360 case AMDGPU::TBA_LO: 4361 case AMDGPU::TBA_HI: 4362 case AMDGPU::TMA: 4363 case AMDGPU::TMA_LO: 4364 case AMDGPU::TMA_HI: 4365 return !isGFX9() && !isGFX10(); 4366 case AMDGPU::XNACK_MASK: 4367 case AMDGPU::XNACK_MASK_LO: 4368 case AMDGPU::XNACK_MASK_HI: 4369 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4370 case AMDGPU::SGPR_NULL: 4371 return isGFX10(); 4372 default: 4373 break; 4374 } 4375 4376 if (isCI()) 4377 return true; 4378 4379 if (isSI() || isGFX10()) { 4380 // No flat_scr on SI. 4381 // On GFX10 flat scratch is not a valid register operand and can only be 4382 // accessed with s_setreg/s_getreg. 4383 switch (RegNo) { 4384 case AMDGPU::FLAT_SCR: 4385 case AMDGPU::FLAT_SCR_LO: 4386 case AMDGPU::FLAT_SCR_HI: 4387 return false; 4388 default: 4389 return true; 4390 } 4391 } 4392 4393 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4394 // SI/CI have. 4395 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4396 R.isValid(); ++R) { 4397 if (*R == RegNo) 4398 return hasSGPR102_SGPR103(); 4399 } 4400 4401 return true; 4402 } 4403 4404 OperandMatchResultTy 4405 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4406 OperandMode Mode) { 4407 // Try to parse with a custom parser 4408 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4409 4410 // If we successfully parsed the operand or if there as an error parsing, 4411 // we are done. 4412 // 4413 // If we are parsing after we reach EndOfStatement then this means we 4414 // are appending default values to the Operands list. This is only done 4415 // by custom parser, so we shouldn't continue on to the generic parsing. 4416 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4417 getLexer().is(AsmToken::EndOfStatement)) 4418 return ResTy; 4419 4420 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4421 unsigned Prefix = Operands.size(); 4422 SMLoc LBraceLoc = getTok().getLoc(); 4423 Parser.Lex(); // eat the '[' 4424 4425 for (;;) { 4426 ResTy = parseReg(Operands); 4427 if (ResTy != MatchOperand_Success) 4428 return ResTy; 4429 4430 if (getLexer().is(AsmToken::RBrac)) 4431 break; 4432 4433 if (getLexer().isNot(AsmToken::Comma)) 4434 return MatchOperand_ParseFail; 4435 Parser.Lex(); 4436 } 4437 4438 if (Operands.size() - Prefix > 1) { 4439 Operands.insert(Operands.begin() + Prefix, 4440 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4441 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4442 getTok().getLoc())); 4443 } 4444 4445 Parser.Lex(); // eat the ']' 4446 return MatchOperand_Success; 4447 } 4448 4449 return parseRegOrImm(Operands); 4450 } 4451 4452 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4453 // Clear any forced encodings from the previous instruction. 4454 setForcedEncodingSize(0); 4455 setForcedDPP(false); 4456 setForcedSDWA(false); 4457 4458 if (Name.endswith("_e64")) { 4459 setForcedEncodingSize(64); 4460 return Name.substr(0, Name.size() - 4); 4461 } else if (Name.endswith("_e32")) { 4462 setForcedEncodingSize(32); 4463 return Name.substr(0, Name.size() - 4); 4464 } else if (Name.endswith("_dpp")) { 4465 setForcedDPP(true); 4466 return Name.substr(0, Name.size() - 4); 4467 } else if (Name.endswith("_sdwa")) { 4468 setForcedSDWA(true); 4469 return Name.substr(0, Name.size() - 5); 4470 } 4471 return Name; 4472 } 4473 4474 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4475 StringRef Name, 4476 SMLoc NameLoc, OperandVector &Operands) { 4477 // Add the instruction mnemonic 4478 Name = parseMnemonicSuffix(Name); 4479 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4480 4481 bool IsMIMG = Name.startswith("image_"); 4482 4483 while (!getLexer().is(AsmToken::EndOfStatement)) { 4484 OperandMode Mode = OperandMode_Default; 4485 if (IsMIMG && isGFX10() && Operands.size() == 2) 4486 Mode = OperandMode_NSA; 4487 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4488 4489 // Eat the comma or space if there is one. 4490 if (getLexer().is(AsmToken::Comma)) 4491 Parser.Lex(); 4492 4493 switch (Res) { 4494 case MatchOperand_Success: break; 4495 case MatchOperand_ParseFail: 4496 // FIXME: use real operand location rather than the current location. 4497 Error(getLexer().getLoc(), "failed parsing operand."); 4498 while (!getLexer().is(AsmToken::EndOfStatement)) { 4499 Parser.Lex(); 4500 } 4501 return true; 4502 case MatchOperand_NoMatch: 4503 // FIXME: use real operand location rather than the current location. 4504 Error(getLexer().getLoc(), "not a valid operand."); 4505 while (!getLexer().is(AsmToken::EndOfStatement)) { 4506 Parser.Lex(); 4507 } 4508 return true; 4509 } 4510 } 4511 4512 return false; 4513 } 4514 4515 //===----------------------------------------------------------------------===// 4516 // Utility functions 4517 //===----------------------------------------------------------------------===// 4518 4519 OperandMatchResultTy 4520 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4521 4522 if (!trySkipId(Prefix, AsmToken::Colon)) 4523 return MatchOperand_NoMatch; 4524 4525 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4526 } 4527 4528 OperandMatchResultTy 4529 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4530 AMDGPUOperand::ImmTy ImmTy, 4531 bool (*ConvertResult)(int64_t&)) { 4532 SMLoc S = getLoc(); 4533 int64_t Value = 0; 4534 4535 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4536 if (Res != MatchOperand_Success) 4537 return Res; 4538 4539 if (ConvertResult && !ConvertResult(Value)) { 4540 Error(S, "invalid " + StringRef(Prefix) + " value."); 4541 } 4542 4543 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4544 return MatchOperand_Success; 4545 } 4546 4547 OperandMatchResultTy 4548 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4549 OperandVector &Operands, 4550 AMDGPUOperand::ImmTy ImmTy, 4551 bool (*ConvertResult)(int64_t&)) { 4552 SMLoc S = getLoc(); 4553 if (!trySkipId(Prefix, AsmToken::Colon)) 4554 return MatchOperand_NoMatch; 4555 4556 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4557 return MatchOperand_ParseFail; 4558 4559 unsigned Val = 0; 4560 const unsigned MaxSize = 4; 4561 4562 // FIXME: How to verify the number of elements matches the number of src 4563 // operands? 4564 for (int I = 0; ; ++I) { 4565 int64_t Op; 4566 SMLoc Loc = getLoc(); 4567 if (!parseExpr(Op)) 4568 return MatchOperand_ParseFail; 4569 4570 if (Op != 0 && Op != 1) { 4571 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4572 return MatchOperand_ParseFail; 4573 } 4574 4575 Val |= (Op << I); 4576 4577 if (trySkipToken(AsmToken::RBrac)) 4578 break; 4579 4580 if (I + 1 == MaxSize) { 4581 Error(getLoc(), "expected a closing square bracket"); 4582 return MatchOperand_ParseFail; 4583 } 4584 4585 if (!skipToken(AsmToken::Comma, "expected a comma")) 4586 return MatchOperand_ParseFail; 4587 } 4588 4589 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4590 return MatchOperand_Success; 4591 } 4592 4593 OperandMatchResultTy 4594 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4595 AMDGPUOperand::ImmTy ImmTy) { 4596 int64_t Bit = 0; 4597 SMLoc S = Parser.getTok().getLoc(); 4598 4599 // We are at the end of the statement, and this is a default argument, so 4600 // use a default value. 4601 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4602 switch(getLexer().getKind()) { 4603 case AsmToken::Identifier: { 4604 StringRef Tok = Parser.getTok().getString(); 4605 if (Tok == Name) { 4606 if (Tok == "r128" && isGFX9()) 4607 Error(S, "r128 modifier is not supported on this GPU"); 4608 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4609 Error(S, "a16 modifier is not supported on this GPU"); 4610 Bit = 1; 4611 Parser.Lex(); 4612 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4613 Bit = 0; 4614 Parser.Lex(); 4615 } else { 4616 return MatchOperand_NoMatch; 4617 } 4618 break; 4619 } 4620 default: 4621 return MatchOperand_NoMatch; 4622 } 4623 } 4624 4625 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4626 return MatchOperand_ParseFail; 4627 4628 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4629 return MatchOperand_Success; 4630 } 4631 4632 static void addOptionalImmOperand( 4633 MCInst& Inst, const OperandVector& Operands, 4634 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4635 AMDGPUOperand::ImmTy ImmT, 4636 int64_t Default = 0) { 4637 auto i = OptionalIdx.find(ImmT); 4638 if (i != OptionalIdx.end()) { 4639 unsigned Idx = i->second; 4640 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4641 } else { 4642 Inst.addOperand(MCOperand::createImm(Default)); 4643 } 4644 } 4645 4646 OperandMatchResultTy 4647 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4648 if (getLexer().isNot(AsmToken::Identifier)) { 4649 return MatchOperand_NoMatch; 4650 } 4651 StringRef Tok = Parser.getTok().getString(); 4652 if (Tok != Prefix) { 4653 return MatchOperand_NoMatch; 4654 } 4655 4656 Parser.Lex(); 4657 if (getLexer().isNot(AsmToken::Colon)) { 4658 return MatchOperand_ParseFail; 4659 } 4660 4661 Parser.Lex(); 4662 if (getLexer().isNot(AsmToken::Identifier)) { 4663 return MatchOperand_ParseFail; 4664 } 4665 4666 Value = Parser.getTok().getString(); 4667 return MatchOperand_Success; 4668 } 4669 4670 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4671 // values to live in a joint format operand in the MCInst encoding. 4672 OperandMatchResultTy 4673 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4674 SMLoc S = Parser.getTok().getLoc(); 4675 int64_t Dfmt = 0, Nfmt = 0; 4676 // dfmt and nfmt can appear in either order, and each is optional. 4677 bool GotDfmt = false, GotNfmt = false; 4678 while (!GotDfmt || !GotNfmt) { 4679 if (!GotDfmt) { 4680 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4681 if (Res != MatchOperand_NoMatch) { 4682 if (Res != MatchOperand_Success) 4683 return Res; 4684 if (Dfmt >= 16) { 4685 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4686 return MatchOperand_ParseFail; 4687 } 4688 GotDfmt = true; 4689 Parser.Lex(); 4690 continue; 4691 } 4692 } 4693 if (!GotNfmt) { 4694 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4695 if (Res != MatchOperand_NoMatch) { 4696 if (Res != MatchOperand_Success) 4697 return Res; 4698 if (Nfmt >= 8) { 4699 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4700 return MatchOperand_ParseFail; 4701 } 4702 GotNfmt = true; 4703 Parser.Lex(); 4704 continue; 4705 } 4706 } 4707 break; 4708 } 4709 if (!GotDfmt && !GotNfmt) 4710 return MatchOperand_NoMatch; 4711 auto Format = Dfmt | Nfmt << 4; 4712 Operands.push_back( 4713 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4714 return MatchOperand_Success; 4715 } 4716 4717 //===----------------------------------------------------------------------===// 4718 // ds 4719 //===----------------------------------------------------------------------===// 4720 4721 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4722 const OperandVector &Operands) { 4723 OptionalImmIndexMap OptionalIdx; 4724 4725 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4726 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4727 4728 // Add the register arguments 4729 if (Op.isReg()) { 4730 Op.addRegOperands(Inst, 1); 4731 continue; 4732 } 4733 4734 // Handle optional arguments 4735 OptionalIdx[Op.getImmTy()] = i; 4736 } 4737 4738 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4739 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4740 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4741 4742 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4743 } 4744 4745 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4746 bool IsGdsHardcoded) { 4747 OptionalImmIndexMap OptionalIdx; 4748 4749 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4750 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4751 4752 // Add the register arguments 4753 if (Op.isReg()) { 4754 Op.addRegOperands(Inst, 1); 4755 continue; 4756 } 4757 4758 if (Op.isToken() && Op.getToken() == "gds") { 4759 IsGdsHardcoded = true; 4760 continue; 4761 } 4762 4763 // Handle optional arguments 4764 OptionalIdx[Op.getImmTy()] = i; 4765 } 4766 4767 AMDGPUOperand::ImmTy OffsetType = 4768 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4769 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4770 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4771 AMDGPUOperand::ImmTyOffset; 4772 4773 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4774 4775 if (!IsGdsHardcoded) { 4776 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4777 } 4778 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4779 } 4780 4781 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4782 OptionalImmIndexMap OptionalIdx; 4783 4784 unsigned OperandIdx[4]; 4785 unsigned EnMask = 0; 4786 int SrcIdx = 0; 4787 4788 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4789 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4790 4791 // Add the register arguments 4792 if (Op.isReg()) { 4793 assert(SrcIdx < 4); 4794 OperandIdx[SrcIdx] = Inst.size(); 4795 Op.addRegOperands(Inst, 1); 4796 ++SrcIdx; 4797 continue; 4798 } 4799 4800 if (Op.isOff()) { 4801 assert(SrcIdx < 4); 4802 OperandIdx[SrcIdx] = Inst.size(); 4803 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4804 ++SrcIdx; 4805 continue; 4806 } 4807 4808 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4809 Op.addImmOperands(Inst, 1); 4810 continue; 4811 } 4812 4813 if (Op.isToken() && Op.getToken() == "done") 4814 continue; 4815 4816 // Handle optional arguments 4817 OptionalIdx[Op.getImmTy()] = i; 4818 } 4819 4820 assert(SrcIdx == 4); 4821 4822 bool Compr = false; 4823 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4824 Compr = true; 4825 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4826 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4827 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4828 } 4829 4830 for (auto i = 0; i < SrcIdx; ++i) { 4831 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4832 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4833 } 4834 } 4835 4836 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4837 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4838 4839 Inst.addOperand(MCOperand::createImm(EnMask)); 4840 } 4841 4842 //===----------------------------------------------------------------------===// 4843 // s_waitcnt 4844 //===----------------------------------------------------------------------===// 4845 4846 static bool 4847 encodeCnt( 4848 const AMDGPU::IsaVersion ISA, 4849 int64_t &IntVal, 4850 int64_t CntVal, 4851 bool Saturate, 4852 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4853 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4854 { 4855 bool Failed = false; 4856 4857 IntVal = encode(ISA, IntVal, CntVal); 4858 if (CntVal != decode(ISA, IntVal)) { 4859 if (Saturate) { 4860 IntVal = encode(ISA, IntVal, -1); 4861 } else { 4862 Failed = true; 4863 } 4864 } 4865 return Failed; 4866 } 4867 4868 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4869 4870 SMLoc CntLoc = getLoc(); 4871 StringRef CntName = getTokenStr(); 4872 4873 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4874 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4875 return false; 4876 4877 int64_t CntVal; 4878 SMLoc ValLoc = getLoc(); 4879 if (!parseExpr(CntVal)) 4880 return false; 4881 4882 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4883 4884 bool Failed = true; 4885 bool Sat = CntName.endswith("_sat"); 4886 4887 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4888 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4889 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4890 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4891 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4892 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4893 } else { 4894 Error(CntLoc, "invalid counter name " + CntName); 4895 return false; 4896 } 4897 4898 if (Failed) { 4899 Error(ValLoc, "too large value for " + CntName); 4900 return false; 4901 } 4902 4903 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4904 return false; 4905 4906 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4907 if (isToken(AsmToken::EndOfStatement)) { 4908 Error(getLoc(), "expected a counter name"); 4909 return false; 4910 } 4911 } 4912 4913 return true; 4914 } 4915 4916 OperandMatchResultTy 4917 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4918 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4919 int64_t Waitcnt = getWaitcntBitMask(ISA); 4920 SMLoc S = getLoc(); 4921 4922 // If parse failed, do not return error code 4923 // to avoid excessive error messages. 4924 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4925 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4926 } else { 4927 parseExpr(Waitcnt); 4928 } 4929 4930 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4931 return MatchOperand_Success; 4932 } 4933 4934 bool 4935 AMDGPUOperand::isSWaitCnt() const { 4936 return isImm(); 4937 } 4938 4939 //===----------------------------------------------------------------------===// 4940 // hwreg 4941 //===----------------------------------------------------------------------===// 4942 4943 bool 4944 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4945 int64_t &Offset, 4946 int64_t &Width) { 4947 using namespace llvm::AMDGPU::Hwreg; 4948 4949 // The register may be specified by name or using a numeric code 4950 if (isToken(AsmToken::Identifier) && 4951 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4952 HwReg.IsSymbolic = true; 4953 lex(); // skip message name 4954 } else if (!parseExpr(HwReg.Id)) { 4955 return false; 4956 } 4957 4958 if (trySkipToken(AsmToken::RParen)) 4959 return true; 4960 4961 // parse optional params 4962 return 4963 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4964 parseExpr(Offset) && 4965 skipToken(AsmToken::Comma, "expected a comma") && 4966 parseExpr(Width) && 4967 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4968 } 4969 4970 bool 4971 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4972 const int64_t Offset, 4973 const int64_t Width, 4974 const SMLoc Loc) { 4975 4976 using namespace llvm::AMDGPU::Hwreg; 4977 4978 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4979 Error(Loc, "specified hardware register is not supported on this GPU"); 4980 return false; 4981 } else if (!isValidHwreg(HwReg.Id)) { 4982 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4983 return false; 4984 } else if (!isValidHwregOffset(Offset)) { 4985 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4986 return false; 4987 } else if (!isValidHwregWidth(Width)) { 4988 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4989 return false; 4990 } 4991 return true; 4992 } 4993 4994 OperandMatchResultTy 4995 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4996 using namespace llvm::AMDGPU::Hwreg; 4997 4998 int64_t ImmVal = 0; 4999 SMLoc Loc = getLoc(); 5000 5001 // If parse failed, do not return error code 5002 // to avoid excessive error messages. 5003 if (trySkipId("hwreg", AsmToken::LParen)) { 5004 OperandInfoTy HwReg(ID_UNKNOWN_); 5005 int64_t Offset = OFFSET_DEFAULT_; 5006 int64_t Width = WIDTH_DEFAULT_; 5007 if (parseHwregBody(HwReg, Offset, Width) && 5008 validateHwreg(HwReg, Offset, Width, Loc)) { 5009 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5010 } 5011 } else if (parseExpr(ImmVal)) { 5012 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5013 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5014 } 5015 5016 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5017 return MatchOperand_Success; 5018 } 5019 5020 bool AMDGPUOperand::isHwreg() const { 5021 return isImmTy(ImmTyHwreg); 5022 } 5023 5024 //===----------------------------------------------------------------------===// 5025 // sendmsg 5026 //===----------------------------------------------------------------------===// 5027 5028 bool 5029 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5030 OperandInfoTy &Op, 5031 OperandInfoTy &Stream) { 5032 using namespace llvm::AMDGPU::SendMsg; 5033 5034 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5035 Msg.IsSymbolic = true; 5036 lex(); // skip message name 5037 } else if (!parseExpr(Msg.Id)) { 5038 return false; 5039 } 5040 5041 if (trySkipToken(AsmToken::Comma)) { 5042 Op.IsDefined = true; 5043 if (isToken(AsmToken::Identifier) && 5044 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5045 lex(); // skip operation name 5046 } else if (!parseExpr(Op.Id)) { 5047 return false; 5048 } 5049 5050 if (trySkipToken(AsmToken::Comma)) { 5051 Stream.IsDefined = true; 5052 if (!parseExpr(Stream.Id)) 5053 return false; 5054 } 5055 } 5056 5057 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5058 } 5059 5060 bool 5061 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5062 const OperandInfoTy &Op, 5063 const OperandInfoTy &Stream, 5064 const SMLoc S) { 5065 using namespace llvm::AMDGPU::SendMsg; 5066 5067 // Validation strictness depends on whether message is specified 5068 // in a symbolc or in a numeric form. In the latter case 5069 // only encoding possibility is checked. 5070 bool Strict = Msg.IsSymbolic; 5071 5072 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5073 Error(S, "invalid message id"); 5074 return false; 5075 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5076 Error(S, Op.IsDefined ? 5077 "message does not support operations" : 5078 "missing message operation"); 5079 return false; 5080 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5081 Error(S, "invalid operation id"); 5082 return false; 5083 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5084 Error(S, "message operation does not support streams"); 5085 return false; 5086 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5087 Error(S, "invalid message stream id"); 5088 return false; 5089 } 5090 return true; 5091 } 5092 5093 OperandMatchResultTy 5094 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5095 using namespace llvm::AMDGPU::SendMsg; 5096 5097 int64_t ImmVal = 0; 5098 SMLoc Loc = getLoc(); 5099 5100 // If parse failed, do not return error code 5101 // to avoid excessive error messages. 5102 if (trySkipId("sendmsg", AsmToken::LParen)) { 5103 OperandInfoTy Msg(ID_UNKNOWN_); 5104 OperandInfoTy Op(OP_NONE_); 5105 OperandInfoTy Stream(STREAM_ID_NONE_); 5106 if (parseSendMsgBody(Msg, Op, Stream) && 5107 validateSendMsg(Msg, Op, Stream, Loc)) { 5108 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5109 } 5110 } else if (parseExpr(ImmVal)) { 5111 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5112 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5113 } 5114 5115 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5116 return MatchOperand_Success; 5117 } 5118 5119 bool AMDGPUOperand::isSendMsg() const { 5120 return isImmTy(ImmTySendMsg); 5121 } 5122 5123 //===----------------------------------------------------------------------===// 5124 // v_interp 5125 //===----------------------------------------------------------------------===// 5126 5127 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5128 if (getLexer().getKind() != AsmToken::Identifier) 5129 return MatchOperand_NoMatch; 5130 5131 StringRef Str = Parser.getTok().getString(); 5132 int Slot = StringSwitch<int>(Str) 5133 .Case("p10", 0) 5134 .Case("p20", 1) 5135 .Case("p0", 2) 5136 .Default(-1); 5137 5138 SMLoc S = Parser.getTok().getLoc(); 5139 if (Slot == -1) 5140 return MatchOperand_ParseFail; 5141 5142 Parser.Lex(); 5143 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5144 AMDGPUOperand::ImmTyInterpSlot)); 5145 return MatchOperand_Success; 5146 } 5147 5148 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5149 if (getLexer().getKind() != AsmToken::Identifier) 5150 return MatchOperand_NoMatch; 5151 5152 StringRef Str = Parser.getTok().getString(); 5153 if (!Str.startswith("attr")) 5154 return MatchOperand_NoMatch; 5155 5156 StringRef Chan = Str.take_back(2); 5157 int AttrChan = StringSwitch<int>(Chan) 5158 .Case(".x", 0) 5159 .Case(".y", 1) 5160 .Case(".z", 2) 5161 .Case(".w", 3) 5162 .Default(-1); 5163 if (AttrChan == -1) 5164 return MatchOperand_ParseFail; 5165 5166 Str = Str.drop_back(2).drop_front(4); 5167 5168 uint8_t Attr; 5169 if (Str.getAsInteger(10, Attr)) 5170 return MatchOperand_ParseFail; 5171 5172 SMLoc S = Parser.getTok().getLoc(); 5173 Parser.Lex(); 5174 if (Attr > 63) { 5175 Error(S, "out of bounds attr"); 5176 return MatchOperand_Success; 5177 } 5178 5179 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5180 5181 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5182 AMDGPUOperand::ImmTyInterpAttr)); 5183 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5184 AMDGPUOperand::ImmTyAttrChan)); 5185 return MatchOperand_Success; 5186 } 5187 5188 //===----------------------------------------------------------------------===// 5189 // exp 5190 //===----------------------------------------------------------------------===// 5191 5192 void AMDGPUAsmParser::errorExpTgt() { 5193 Error(Parser.getTok().getLoc(), "invalid exp target"); 5194 } 5195 5196 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5197 uint8_t &Val) { 5198 if (Str == "null") { 5199 Val = 9; 5200 return MatchOperand_Success; 5201 } 5202 5203 if (Str.startswith("mrt")) { 5204 Str = Str.drop_front(3); 5205 if (Str == "z") { // == mrtz 5206 Val = 8; 5207 return MatchOperand_Success; 5208 } 5209 5210 if (Str.getAsInteger(10, Val)) 5211 return MatchOperand_ParseFail; 5212 5213 if (Val > 7) 5214 errorExpTgt(); 5215 5216 return MatchOperand_Success; 5217 } 5218 5219 if (Str.startswith("pos")) { 5220 Str = Str.drop_front(3); 5221 if (Str.getAsInteger(10, Val)) 5222 return MatchOperand_ParseFail; 5223 5224 if (Val > 4 || (Val == 4 && !isGFX10())) 5225 errorExpTgt(); 5226 5227 Val += 12; 5228 return MatchOperand_Success; 5229 } 5230 5231 if (isGFX10() && Str == "prim") { 5232 Val = 20; 5233 return MatchOperand_Success; 5234 } 5235 5236 if (Str.startswith("param")) { 5237 Str = Str.drop_front(5); 5238 if (Str.getAsInteger(10, Val)) 5239 return MatchOperand_ParseFail; 5240 5241 if (Val >= 32) 5242 errorExpTgt(); 5243 5244 Val += 32; 5245 return MatchOperand_Success; 5246 } 5247 5248 if (Str.startswith("invalid_target_")) { 5249 Str = Str.drop_front(15); 5250 if (Str.getAsInteger(10, Val)) 5251 return MatchOperand_ParseFail; 5252 5253 errorExpTgt(); 5254 return MatchOperand_Success; 5255 } 5256 5257 return MatchOperand_NoMatch; 5258 } 5259 5260 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5261 uint8_t Val; 5262 StringRef Str = Parser.getTok().getString(); 5263 5264 auto Res = parseExpTgtImpl(Str, Val); 5265 if (Res != MatchOperand_Success) 5266 return Res; 5267 5268 SMLoc S = Parser.getTok().getLoc(); 5269 Parser.Lex(); 5270 5271 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5272 AMDGPUOperand::ImmTyExpTgt)); 5273 return MatchOperand_Success; 5274 } 5275 5276 //===----------------------------------------------------------------------===// 5277 // parser helpers 5278 //===----------------------------------------------------------------------===// 5279 5280 bool 5281 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5282 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5283 } 5284 5285 bool 5286 AMDGPUAsmParser::isId(const StringRef Id) const { 5287 return isId(getToken(), Id); 5288 } 5289 5290 bool 5291 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5292 return getTokenKind() == Kind; 5293 } 5294 5295 bool 5296 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5297 if (isId(Id)) { 5298 lex(); 5299 return true; 5300 } 5301 return false; 5302 } 5303 5304 bool 5305 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5306 if (isId(Id) && peekToken().is(Kind)) { 5307 lex(); 5308 lex(); 5309 return true; 5310 } 5311 return false; 5312 } 5313 5314 bool 5315 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5316 if (isToken(Kind)) { 5317 lex(); 5318 return true; 5319 } 5320 return false; 5321 } 5322 5323 bool 5324 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5325 const StringRef ErrMsg) { 5326 if (!trySkipToken(Kind)) { 5327 Error(getLoc(), ErrMsg); 5328 return false; 5329 } 5330 return true; 5331 } 5332 5333 bool 5334 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5335 return !getParser().parseAbsoluteExpression(Imm); 5336 } 5337 5338 bool 5339 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5340 SMLoc S = getLoc(); 5341 5342 const MCExpr *Expr; 5343 if (Parser.parseExpression(Expr)) 5344 return false; 5345 5346 int64_t IntVal; 5347 if (Expr->evaluateAsAbsolute(IntVal)) { 5348 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5349 } else { 5350 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5351 } 5352 return true; 5353 } 5354 5355 bool 5356 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5357 if (isToken(AsmToken::String)) { 5358 Val = getToken().getStringContents(); 5359 lex(); 5360 return true; 5361 } else { 5362 Error(getLoc(), ErrMsg); 5363 return false; 5364 } 5365 } 5366 5367 AsmToken 5368 AMDGPUAsmParser::getToken() const { 5369 return Parser.getTok(); 5370 } 5371 5372 AsmToken 5373 AMDGPUAsmParser::peekToken() { 5374 return getLexer().peekTok(); 5375 } 5376 5377 void 5378 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5379 auto TokCount = getLexer().peekTokens(Tokens); 5380 5381 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5382 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5383 } 5384 5385 AsmToken::TokenKind 5386 AMDGPUAsmParser::getTokenKind() const { 5387 return getLexer().getKind(); 5388 } 5389 5390 SMLoc 5391 AMDGPUAsmParser::getLoc() const { 5392 return getToken().getLoc(); 5393 } 5394 5395 StringRef 5396 AMDGPUAsmParser::getTokenStr() const { 5397 return getToken().getString(); 5398 } 5399 5400 void 5401 AMDGPUAsmParser::lex() { 5402 Parser.Lex(); 5403 } 5404 5405 //===----------------------------------------------------------------------===// 5406 // swizzle 5407 //===----------------------------------------------------------------------===// 5408 5409 LLVM_READNONE 5410 static unsigned 5411 encodeBitmaskPerm(const unsigned AndMask, 5412 const unsigned OrMask, 5413 const unsigned XorMask) { 5414 using namespace llvm::AMDGPU::Swizzle; 5415 5416 return BITMASK_PERM_ENC | 5417 (AndMask << BITMASK_AND_SHIFT) | 5418 (OrMask << BITMASK_OR_SHIFT) | 5419 (XorMask << BITMASK_XOR_SHIFT); 5420 } 5421 5422 bool 5423 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5424 const unsigned MinVal, 5425 const unsigned MaxVal, 5426 const StringRef ErrMsg) { 5427 for (unsigned i = 0; i < OpNum; ++i) { 5428 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5429 return false; 5430 } 5431 SMLoc ExprLoc = Parser.getTok().getLoc(); 5432 if (!parseExpr(Op[i])) { 5433 return false; 5434 } 5435 if (Op[i] < MinVal || Op[i] > MaxVal) { 5436 Error(ExprLoc, ErrMsg); 5437 return false; 5438 } 5439 } 5440 5441 return true; 5442 } 5443 5444 bool 5445 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5446 using namespace llvm::AMDGPU::Swizzle; 5447 5448 int64_t Lane[LANE_NUM]; 5449 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5450 "expected a 2-bit lane id")) { 5451 Imm = QUAD_PERM_ENC; 5452 for (unsigned I = 0; I < LANE_NUM; ++I) { 5453 Imm |= Lane[I] << (LANE_SHIFT * I); 5454 } 5455 return true; 5456 } 5457 return false; 5458 } 5459 5460 bool 5461 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5462 using namespace llvm::AMDGPU::Swizzle; 5463 5464 SMLoc S = Parser.getTok().getLoc(); 5465 int64_t GroupSize; 5466 int64_t LaneIdx; 5467 5468 if (!parseSwizzleOperands(1, &GroupSize, 5469 2, 32, 5470 "group size must be in the interval [2,32]")) { 5471 return false; 5472 } 5473 if (!isPowerOf2_64(GroupSize)) { 5474 Error(S, "group size must be a power of two"); 5475 return false; 5476 } 5477 if (parseSwizzleOperands(1, &LaneIdx, 5478 0, GroupSize - 1, 5479 "lane id must be in the interval [0,group size - 1]")) { 5480 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5481 return true; 5482 } 5483 return false; 5484 } 5485 5486 bool 5487 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5488 using namespace llvm::AMDGPU::Swizzle; 5489 5490 SMLoc S = Parser.getTok().getLoc(); 5491 int64_t GroupSize; 5492 5493 if (!parseSwizzleOperands(1, &GroupSize, 5494 2, 32, "group size must be in the interval [2,32]")) { 5495 return false; 5496 } 5497 if (!isPowerOf2_64(GroupSize)) { 5498 Error(S, "group size must be a power of two"); 5499 return false; 5500 } 5501 5502 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5503 return true; 5504 } 5505 5506 bool 5507 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5508 using namespace llvm::AMDGPU::Swizzle; 5509 5510 SMLoc S = Parser.getTok().getLoc(); 5511 int64_t GroupSize; 5512 5513 if (!parseSwizzleOperands(1, &GroupSize, 5514 1, 16, "group size must be in the interval [1,16]")) { 5515 return false; 5516 } 5517 if (!isPowerOf2_64(GroupSize)) { 5518 Error(S, "group size must be a power of two"); 5519 return false; 5520 } 5521 5522 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5523 return true; 5524 } 5525 5526 bool 5527 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5528 using namespace llvm::AMDGPU::Swizzle; 5529 5530 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5531 return false; 5532 } 5533 5534 StringRef Ctl; 5535 SMLoc StrLoc = Parser.getTok().getLoc(); 5536 if (!parseString(Ctl)) { 5537 return false; 5538 } 5539 if (Ctl.size() != BITMASK_WIDTH) { 5540 Error(StrLoc, "expected a 5-character mask"); 5541 return false; 5542 } 5543 5544 unsigned AndMask = 0; 5545 unsigned OrMask = 0; 5546 unsigned XorMask = 0; 5547 5548 for (size_t i = 0; i < Ctl.size(); ++i) { 5549 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5550 switch(Ctl[i]) { 5551 default: 5552 Error(StrLoc, "invalid mask"); 5553 return false; 5554 case '0': 5555 break; 5556 case '1': 5557 OrMask |= Mask; 5558 break; 5559 case 'p': 5560 AndMask |= Mask; 5561 break; 5562 case 'i': 5563 AndMask |= Mask; 5564 XorMask |= Mask; 5565 break; 5566 } 5567 } 5568 5569 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5570 return true; 5571 } 5572 5573 bool 5574 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5575 5576 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5577 5578 if (!parseExpr(Imm)) { 5579 return false; 5580 } 5581 if (!isUInt<16>(Imm)) { 5582 Error(OffsetLoc, "expected a 16-bit offset"); 5583 return false; 5584 } 5585 return true; 5586 } 5587 5588 bool 5589 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5590 using namespace llvm::AMDGPU::Swizzle; 5591 5592 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5593 5594 SMLoc ModeLoc = Parser.getTok().getLoc(); 5595 bool Ok = false; 5596 5597 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5598 Ok = parseSwizzleQuadPerm(Imm); 5599 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5600 Ok = parseSwizzleBitmaskPerm(Imm); 5601 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5602 Ok = parseSwizzleBroadcast(Imm); 5603 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5604 Ok = parseSwizzleSwap(Imm); 5605 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5606 Ok = parseSwizzleReverse(Imm); 5607 } else { 5608 Error(ModeLoc, "expected a swizzle mode"); 5609 } 5610 5611 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5612 } 5613 5614 return false; 5615 } 5616 5617 OperandMatchResultTy 5618 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5619 SMLoc S = Parser.getTok().getLoc(); 5620 int64_t Imm = 0; 5621 5622 if (trySkipId("offset")) { 5623 5624 bool Ok = false; 5625 if (skipToken(AsmToken::Colon, "expected a colon")) { 5626 if (trySkipId("swizzle")) { 5627 Ok = parseSwizzleMacro(Imm); 5628 } else { 5629 Ok = parseSwizzleOffset(Imm); 5630 } 5631 } 5632 5633 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5634 5635 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5636 } else { 5637 // Swizzle "offset" operand is optional. 5638 // If it is omitted, try parsing other optional operands. 5639 return parseOptionalOpr(Operands); 5640 } 5641 } 5642 5643 bool 5644 AMDGPUOperand::isSwizzle() const { 5645 return isImmTy(ImmTySwizzle); 5646 } 5647 5648 //===----------------------------------------------------------------------===// 5649 // VGPR Index Mode 5650 //===----------------------------------------------------------------------===// 5651 5652 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5653 5654 using namespace llvm::AMDGPU::VGPRIndexMode; 5655 5656 if (trySkipToken(AsmToken::RParen)) { 5657 return OFF; 5658 } 5659 5660 int64_t Imm = 0; 5661 5662 while (true) { 5663 unsigned Mode = 0; 5664 SMLoc S = Parser.getTok().getLoc(); 5665 5666 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5667 if (trySkipId(IdSymbolic[ModeId])) { 5668 Mode = 1 << ModeId; 5669 break; 5670 } 5671 } 5672 5673 if (Mode == 0) { 5674 Error(S, (Imm == 0)? 5675 "expected a VGPR index mode or a closing parenthesis" : 5676 "expected a VGPR index mode"); 5677 break; 5678 } 5679 5680 if (Imm & Mode) { 5681 Error(S, "duplicate VGPR index mode"); 5682 break; 5683 } 5684 Imm |= Mode; 5685 5686 if (trySkipToken(AsmToken::RParen)) 5687 break; 5688 if (!skipToken(AsmToken::Comma, 5689 "expected a comma or a closing parenthesis")) 5690 break; 5691 } 5692 5693 return Imm; 5694 } 5695 5696 OperandMatchResultTy 5697 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5698 5699 int64_t Imm = 0; 5700 SMLoc S = Parser.getTok().getLoc(); 5701 5702 if (getLexer().getKind() == AsmToken::Identifier && 5703 Parser.getTok().getString() == "gpr_idx" && 5704 getLexer().peekTok().is(AsmToken::LParen)) { 5705 5706 Parser.Lex(); 5707 Parser.Lex(); 5708 5709 // If parse failed, trigger an error but do not return error code 5710 // to avoid excessive error messages. 5711 Imm = parseGPRIdxMacro(); 5712 5713 } else { 5714 if (getParser().parseAbsoluteExpression(Imm)) 5715 return MatchOperand_NoMatch; 5716 if (Imm < 0 || !isUInt<4>(Imm)) { 5717 Error(S, "invalid immediate: only 4-bit values are legal"); 5718 } 5719 } 5720 5721 Operands.push_back( 5722 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5723 return MatchOperand_Success; 5724 } 5725 5726 bool AMDGPUOperand::isGPRIdxMode() const { 5727 return isImmTy(ImmTyGprIdxMode); 5728 } 5729 5730 //===----------------------------------------------------------------------===// 5731 // sopp branch targets 5732 //===----------------------------------------------------------------------===// 5733 5734 OperandMatchResultTy 5735 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5736 5737 // Make sure we are not parsing something 5738 // that looks like a label or an expression but is not. 5739 // This will improve error messages. 5740 if (isRegister() || isModifier()) 5741 return MatchOperand_NoMatch; 5742 5743 if (parseExpr(Operands)) { 5744 5745 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5746 assert(Opr.isImm() || Opr.isExpr()); 5747 SMLoc Loc = Opr.getStartLoc(); 5748 5749 // Currently we do not support arbitrary expressions as branch targets. 5750 // Only labels and absolute expressions are accepted. 5751 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5752 Error(Loc, "expected an absolute expression or a label"); 5753 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5754 Error(Loc, "expected a 16-bit signed jump offset"); 5755 } 5756 } 5757 5758 return MatchOperand_Success; // avoid excessive error messages 5759 } 5760 5761 //===----------------------------------------------------------------------===// 5762 // Boolean holding registers 5763 //===----------------------------------------------------------------------===// 5764 5765 OperandMatchResultTy 5766 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5767 return parseReg(Operands); 5768 } 5769 5770 //===----------------------------------------------------------------------===// 5771 // mubuf 5772 //===----------------------------------------------------------------------===// 5773 5774 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5775 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5776 } 5777 5778 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5779 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5780 } 5781 5782 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5783 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5784 } 5785 5786 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5787 const OperandVector &Operands, 5788 bool IsAtomic, 5789 bool IsAtomicReturn, 5790 bool IsLds) { 5791 bool IsLdsOpcode = IsLds; 5792 bool HasLdsModifier = false; 5793 OptionalImmIndexMap OptionalIdx; 5794 assert(IsAtomicReturn ? IsAtomic : true); 5795 unsigned FirstOperandIdx = 1; 5796 5797 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5798 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5799 5800 // Add the register arguments 5801 if (Op.isReg()) { 5802 Op.addRegOperands(Inst, 1); 5803 // Insert a tied src for atomic return dst. 5804 // This cannot be postponed as subsequent calls to 5805 // addImmOperands rely on correct number of MC operands. 5806 if (IsAtomicReturn && i == FirstOperandIdx) 5807 Op.addRegOperands(Inst, 1); 5808 continue; 5809 } 5810 5811 // Handle the case where soffset is an immediate 5812 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5813 Op.addImmOperands(Inst, 1); 5814 continue; 5815 } 5816 5817 HasLdsModifier |= Op.isLDS(); 5818 5819 // Handle tokens like 'offen' which are sometimes hard-coded into the 5820 // asm string. There are no MCInst operands for these. 5821 if (Op.isToken()) { 5822 continue; 5823 } 5824 assert(Op.isImm()); 5825 5826 // Handle optional arguments 5827 OptionalIdx[Op.getImmTy()] = i; 5828 } 5829 5830 // This is a workaround for an llvm quirk which may result in an 5831 // incorrect instruction selection. Lds and non-lds versions of 5832 // MUBUF instructions are identical except that lds versions 5833 // have mandatory 'lds' modifier. However this modifier follows 5834 // optional modifiers and llvm asm matcher regards this 'lds' 5835 // modifier as an optional one. As a result, an lds version 5836 // of opcode may be selected even if it has no 'lds' modifier. 5837 if (IsLdsOpcode && !HasLdsModifier) { 5838 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5839 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5840 Inst.setOpcode(NoLdsOpcode); 5841 IsLdsOpcode = false; 5842 } 5843 } 5844 5845 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5846 if (!IsAtomic) { // glc is hard-coded. 5847 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5848 } 5849 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5850 5851 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5852 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5853 } 5854 5855 if (isGFX10()) 5856 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5857 } 5858 5859 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5860 OptionalImmIndexMap OptionalIdx; 5861 5862 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5863 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5864 5865 // Add the register arguments 5866 if (Op.isReg()) { 5867 Op.addRegOperands(Inst, 1); 5868 continue; 5869 } 5870 5871 // Handle the case where soffset is an immediate 5872 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5873 Op.addImmOperands(Inst, 1); 5874 continue; 5875 } 5876 5877 // Handle tokens like 'offen' which are sometimes hard-coded into the 5878 // asm string. There are no MCInst operands for these. 5879 if (Op.isToken()) { 5880 continue; 5881 } 5882 assert(Op.isImm()); 5883 5884 // Handle optional arguments 5885 OptionalIdx[Op.getImmTy()] = i; 5886 } 5887 5888 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5889 AMDGPUOperand::ImmTyOffset); 5890 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5891 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5894 5895 if (isGFX10()) 5896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5897 } 5898 5899 //===----------------------------------------------------------------------===// 5900 // mimg 5901 //===----------------------------------------------------------------------===// 5902 5903 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5904 bool IsAtomic) { 5905 unsigned I = 1; 5906 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5907 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5908 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5909 } 5910 5911 if (IsAtomic) { 5912 // Add src, same as dst 5913 assert(Desc.getNumDefs() == 1); 5914 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5915 } 5916 5917 OptionalImmIndexMap OptionalIdx; 5918 5919 for (unsigned E = Operands.size(); I != E; ++I) { 5920 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5921 5922 // Add the register arguments 5923 if (Op.isReg()) { 5924 Op.addRegOperands(Inst, 1); 5925 } else if (Op.isImmModifier()) { 5926 OptionalIdx[Op.getImmTy()] = I; 5927 } else if (!Op.isToken()) { 5928 llvm_unreachable("unexpected operand type"); 5929 } 5930 } 5931 5932 bool IsGFX10 = isGFX10(); 5933 5934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5935 if (IsGFX10) 5936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5938 if (IsGFX10) 5939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5942 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5945 if (!IsGFX10) 5946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5948 } 5949 5950 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5951 cvtMIMG(Inst, Operands, true); 5952 } 5953 5954 //===----------------------------------------------------------------------===// 5955 // smrd 5956 //===----------------------------------------------------------------------===// 5957 5958 bool AMDGPUOperand::isSMRDOffset8() const { 5959 return isImm() && isUInt<8>(getImm()); 5960 } 5961 5962 bool AMDGPUOperand::isSMRDOffset20() const { 5963 return isImm() && isUInt<20>(getImm()); 5964 } 5965 5966 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5967 // 32-bit literals are only supported on CI and we only want to use them 5968 // when the offset is > 8-bits. 5969 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5970 } 5971 5972 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5973 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5974 } 5975 5976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5977 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5978 } 5979 5980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5981 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5982 } 5983 5984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5985 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5986 } 5987 5988 //===----------------------------------------------------------------------===// 5989 // vop3 5990 //===----------------------------------------------------------------------===// 5991 5992 static bool ConvertOmodMul(int64_t &Mul) { 5993 if (Mul != 1 && Mul != 2 && Mul != 4) 5994 return false; 5995 5996 Mul >>= 1; 5997 return true; 5998 } 5999 6000 static bool ConvertOmodDiv(int64_t &Div) { 6001 if (Div == 1) { 6002 Div = 0; 6003 return true; 6004 } 6005 6006 if (Div == 2) { 6007 Div = 3; 6008 return true; 6009 } 6010 6011 return false; 6012 } 6013 6014 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6015 if (BoundCtrl == 0) { 6016 BoundCtrl = 1; 6017 return true; 6018 } 6019 6020 if (BoundCtrl == -1) { 6021 BoundCtrl = 0; 6022 return true; 6023 } 6024 6025 return false; 6026 } 6027 6028 // Note: the order in this table matches the order of operands in AsmString. 6029 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6030 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6031 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6032 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6033 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6034 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6035 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6036 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6037 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6038 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6039 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6040 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6041 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6042 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6043 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6044 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6045 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6046 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6047 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6048 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6049 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6050 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6051 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6052 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6053 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6054 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6055 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6056 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6057 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6058 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6059 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6060 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6061 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6062 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6063 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6064 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6065 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6066 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6067 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6068 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6069 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6070 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6071 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6072 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6073 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6074 }; 6075 6076 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6077 unsigned size = Operands.size(); 6078 assert(size > 0); 6079 6080 OperandMatchResultTy res = parseOptionalOpr(Operands); 6081 6082 // This is a hack to enable hardcoded mandatory operands which follow 6083 // optional operands. 6084 // 6085 // Current design assumes that all operands after the first optional operand 6086 // are also optional. However implementation of some instructions violates 6087 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6088 // 6089 // To alleviate this problem, we have to (implicitly) parse extra operands 6090 // to make sure autogenerated parser of custom operands never hit hardcoded 6091 // mandatory operands. 6092 6093 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 6094 6095 // We have parsed the first optional operand. 6096 // Parse as many operands as necessary to skip all mandatory operands. 6097 6098 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6099 if (res != MatchOperand_Success || 6100 getLexer().is(AsmToken::EndOfStatement)) break; 6101 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 6102 res = parseOptionalOpr(Operands); 6103 } 6104 } 6105 6106 return res; 6107 } 6108 6109 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6110 OperandMatchResultTy res; 6111 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6112 // try to parse any optional operand here 6113 if (Op.IsBit) { 6114 res = parseNamedBit(Op.Name, Operands, Op.Type); 6115 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6116 res = parseOModOperand(Operands); 6117 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6118 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6119 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6120 res = parseSDWASel(Operands, Op.Name, Op.Type); 6121 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6122 res = parseSDWADstUnused(Operands); 6123 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6124 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6125 Op.Type == AMDGPUOperand::ImmTyNegLo || 6126 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6127 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6128 Op.ConvertResult); 6129 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6130 res = parseDim(Operands); 6131 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6132 res = parseDfmtNfmt(Operands); 6133 } else { 6134 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6135 } 6136 if (res != MatchOperand_NoMatch) { 6137 return res; 6138 } 6139 } 6140 return MatchOperand_NoMatch; 6141 } 6142 6143 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6144 StringRef Name = Parser.getTok().getString(); 6145 if (Name == "mul") { 6146 return parseIntWithPrefix("mul", Operands, 6147 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6148 } 6149 6150 if (Name == "div") { 6151 return parseIntWithPrefix("div", Operands, 6152 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6153 } 6154 6155 return MatchOperand_NoMatch; 6156 } 6157 6158 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6159 cvtVOP3P(Inst, Operands); 6160 6161 int Opc = Inst.getOpcode(); 6162 6163 int SrcNum; 6164 const int Ops[] = { AMDGPU::OpName::src0, 6165 AMDGPU::OpName::src1, 6166 AMDGPU::OpName::src2 }; 6167 for (SrcNum = 0; 6168 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6169 ++SrcNum); 6170 assert(SrcNum > 0); 6171 6172 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6173 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6174 6175 if ((OpSel & (1 << SrcNum)) != 0) { 6176 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6177 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6178 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6179 } 6180 } 6181 6182 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6183 // 1. This operand is input modifiers 6184 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6185 // 2. This is not last operand 6186 && Desc.NumOperands > (OpNum + 1) 6187 // 3. Next operand is register class 6188 && Desc.OpInfo[OpNum + 1].RegClass != -1 6189 // 4. Next register is not tied to any other operand 6190 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6191 } 6192 6193 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6194 { 6195 OptionalImmIndexMap OptionalIdx; 6196 unsigned Opc = Inst.getOpcode(); 6197 6198 unsigned I = 1; 6199 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6200 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6201 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6202 } 6203 6204 for (unsigned E = Operands.size(); I != E; ++I) { 6205 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6206 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6207 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6208 } else if (Op.isInterpSlot() || 6209 Op.isInterpAttr() || 6210 Op.isAttrChan()) { 6211 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6212 } else if (Op.isImmModifier()) { 6213 OptionalIdx[Op.getImmTy()] = I; 6214 } else { 6215 llvm_unreachable("unhandled operand type"); 6216 } 6217 } 6218 6219 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6221 } 6222 6223 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6225 } 6226 6227 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6228 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6229 } 6230 } 6231 6232 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6233 OptionalImmIndexMap &OptionalIdx) { 6234 unsigned Opc = Inst.getOpcode(); 6235 6236 unsigned I = 1; 6237 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6238 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6239 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6240 } 6241 6242 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6243 // This instruction has src modifiers 6244 for (unsigned E = Operands.size(); I != E; ++I) { 6245 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6246 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6247 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6248 } else if (Op.isImmModifier()) { 6249 OptionalIdx[Op.getImmTy()] = I; 6250 } else if (Op.isRegOrImm()) { 6251 Op.addRegOrImmOperands(Inst, 1); 6252 } else { 6253 llvm_unreachable("unhandled operand type"); 6254 } 6255 } 6256 } else { 6257 // No src modifiers 6258 for (unsigned E = Operands.size(); I != E; ++I) { 6259 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6260 if (Op.isMod()) { 6261 OptionalIdx[Op.getImmTy()] = I; 6262 } else { 6263 Op.addRegOrImmOperands(Inst, 1); 6264 } 6265 } 6266 } 6267 6268 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6270 } 6271 6272 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6273 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6274 } 6275 6276 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6277 // it has src2 register operand that is tied to dst operand 6278 // we don't allow modifiers for this operand in assembler so src2_modifiers 6279 // should be 0. 6280 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6281 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6282 Opc == AMDGPU::V_MAC_F32_e64_vi || 6283 Opc == AMDGPU::V_MAC_F16_e64_vi || 6284 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6285 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6286 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6287 auto it = Inst.begin(); 6288 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6289 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6290 ++it; 6291 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6292 } 6293 } 6294 6295 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6296 OptionalImmIndexMap OptionalIdx; 6297 cvtVOP3(Inst, Operands, OptionalIdx); 6298 } 6299 6300 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6301 const OperandVector &Operands) { 6302 OptionalImmIndexMap OptIdx; 6303 const int Opc = Inst.getOpcode(); 6304 const MCInstrDesc &Desc = MII.get(Opc); 6305 6306 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6307 6308 cvtVOP3(Inst, Operands, OptIdx); 6309 6310 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6311 assert(!IsPacked); 6312 Inst.addOperand(Inst.getOperand(0)); 6313 } 6314 6315 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6316 // instruction, and then figure out where to actually put the modifiers 6317 6318 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6319 6320 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6321 if (OpSelHiIdx != -1) { 6322 int DefaultVal = IsPacked ? -1 : 0; 6323 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6324 DefaultVal); 6325 } 6326 6327 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6328 if (NegLoIdx != -1) { 6329 assert(IsPacked); 6330 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6331 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6332 } 6333 6334 const int Ops[] = { AMDGPU::OpName::src0, 6335 AMDGPU::OpName::src1, 6336 AMDGPU::OpName::src2 }; 6337 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6338 AMDGPU::OpName::src1_modifiers, 6339 AMDGPU::OpName::src2_modifiers }; 6340 6341 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6342 6343 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6344 unsigned OpSelHi = 0; 6345 unsigned NegLo = 0; 6346 unsigned NegHi = 0; 6347 6348 if (OpSelHiIdx != -1) { 6349 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6350 } 6351 6352 if (NegLoIdx != -1) { 6353 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6354 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6355 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6356 } 6357 6358 for (int J = 0; J < 3; ++J) { 6359 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6360 if (OpIdx == -1) 6361 break; 6362 6363 uint32_t ModVal = 0; 6364 6365 if ((OpSel & (1 << J)) != 0) 6366 ModVal |= SISrcMods::OP_SEL_0; 6367 6368 if ((OpSelHi & (1 << J)) != 0) 6369 ModVal |= SISrcMods::OP_SEL_1; 6370 6371 if ((NegLo & (1 << J)) != 0) 6372 ModVal |= SISrcMods::NEG; 6373 6374 if ((NegHi & (1 << J)) != 0) 6375 ModVal |= SISrcMods::NEG_HI; 6376 6377 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6378 6379 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6380 } 6381 } 6382 6383 //===----------------------------------------------------------------------===// 6384 // dpp 6385 //===----------------------------------------------------------------------===// 6386 6387 bool AMDGPUOperand::isDPP8() const { 6388 return isImmTy(ImmTyDPP8); 6389 } 6390 6391 bool AMDGPUOperand::isDPPCtrl() const { 6392 using namespace AMDGPU::DPP; 6393 6394 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6395 if (result) { 6396 int64_t Imm = getImm(); 6397 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6398 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6399 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6400 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6401 (Imm == DppCtrl::WAVE_SHL1) || 6402 (Imm == DppCtrl::WAVE_ROL1) || 6403 (Imm == DppCtrl::WAVE_SHR1) || 6404 (Imm == DppCtrl::WAVE_ROR1) || 6405 (Imm == DppCtrl::ROW_MIRROR) || 6406 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6407 (Imm == DppCtrl::BCAST15) || 6408 (Imm == DppCtrl::BCAST31) || 6409 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6410 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6411 } 6412 return false; 6413 } 6414 6415 //===----------------------------------------------------------------------===// 6416 // mAI 6417 //===----------------------------------------------------------------------===// 6418 6419 bool AMDGPUOperand::isBLGP() const { 6420 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6421 } 6422 6423 bool AMDGPUOperand::isCBSZ() const { 6424 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6425 } 6426 6427 bool AMDGPUOperand::isABID() const { 6428 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6429 } 6430 6431 bool AMDGPUOperand::isS16Imm() const { 6432 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6433 } 6434 6435 bool AMDGPUOperand::isU16Imm() const { 6436 return isImm() && isUInt<16>(getImm()); 6437 } 6438 6439 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6440 if (!isGFX10()) 6441 return MatchOperand_NoMatch; 6442 6443 SMLoc S = Parser.getTok().getLoc(); 6444 6445 if (getLexer().isNot(AsmToken::Identifier)) 6446 return MatchOperand_NoMatch; 6447 if (getLexer().getTok().getString() != "dim") 6448 return MatchOperand_NoMatch; 6449 6450 Parser.Lex(); 6451 if (getLexer().isNot(AsmToken::Colon)) 6452 return MatchOperand_ParseFail; 6453 6454 Parser.Lex(); 6455 6456 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6457 // integer. 6458 std::string Token; 6459 if (getLexer().is(AsmToken::Integer)) { 6460 SMLoc Loc = getLexer().getTok().getEndLoc(); 6461 Token = getLexer().getTok().getString(); 6462 Parser.Lex(); 6463 if (getLexer().getTok().getLoc() != Loc) 6464 return MatchOperand_ParseFail; 6465 } 6466 if (getLexer().isNot(AsmToken::Identifier)) 6467 return MatchOperand_ParseFail; 6468 Token += getLexer().getTok().getString(); 6469 6470 StringRef DimId = Token; 6471 if (DimId.startswith("SQ_RSRC_IMG_")) 6472 DimId = DimId.substr(12); 6473 6474 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6475 if (!DimInfo) 6476 return MatchOperand_ParseFail; 6477 6478 Parser.Lex(); 6479 6480 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6481 AMDGPUOperand::ImmTyDim)); 6482 return MatchOperand_Success; 6483 } 6484 6485 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6486 SMLoc S = Parser.getTok().getLoc(); 6487 StringRef Prefix; 6488 6489 if (getLexer().getKind() == AsmToken::Identifier) { 6490 Prefix = Parser.getTok().getString(); 6491 } else { 6492 return MatchOperand_NoMatch; 6493 } 6494 6495 if (Prefix != "dpp8") 6496 return parseDPPCtrl(Operands); 6497 if (!isGFX10()) 6498 return MatchOperand_NoMatch; 6499 6500 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6501 6502 int64_t Sels[8]; 6503 6504 Parser.Lex(); 6505 if (getLexer().isNot(AsmToken::Colon)) 6506 return MatchOperand_ParseFail; 6507 6508 Parser.Lex(); 6509 if (getLexer().isNot(AsmToken::LBrac)) 6510 return MatchOperand_ParseFail; 6511 6512 Parser.Lex(); 6513 if (getParser().parseAbsoluteExpression(Sels[0])) 6514 return MatchOperand_ParseFail; 6515 if (0 > Sels[0] || 7 < Sels[0]) 6516 return MatchOperand_ParseFail; 6517 6518 for (size_t i = 1; i < 8; ++i) { 6519 if (getLexer().isNot(AsmToken::Comma)) 6520 return MatchOperand_ParseFail; 6521 6522 Parser.Lex(); 6523 if (getParser().parseAbsoluteExpression(Sels[i])) 6524 return MatchOperand_ParseFail; 6525 if (0 > Sels[i] || 7 < Sels[i]) 6526 return MatchOperand_ParseFail; 6527 } 6528 6529 if (getLexer().isNot(AsmToken::RBrac)) 6530 return MatchOperand_ParseFail; 6531 Parser.Lex(); 6532 6533 unsigned DPP8 = 0; 6534 for (size_t i = 0; i < 8; ++i) 6535 DPP8 |= (Sels[i] << (i * 3)); 6536 6537 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6538 return MatchOperand_Success; 6539 } 6540 6541 OperandMatchResultTy 6542 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6543 using namespace AMDGPU::DPP; 6544 6545 SMLoc S = Parser.getTok().getLoc(); 6546 StringRef Prefix; 6547 int64_t Int; 6548 6549 if (getLexer().getKind() == AsmToken::Identifier) { 6550 Prefix = Parser.getTok().getString(); 6551 } else { 6552 return MatchOperand_NoMatch; 6553 } 6554 6555 if (Prefix == "row_mirror") { 6556 Int = DppCtrl::ROW_MIRROR; 6557 Parser.Lex(); 6558 } else if (Prefix == "row_half_mirror") { 6559 Int = DppCtrl::ROW_HALF_MIRROR; 6560 Parser.Lex(); 6561 } else { 6562 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6563 if (Prefix != "quad_perm" 6564 && Prefix != "row_shl" 6565 && Prefix != "row_shr" 6566 && Prefix != "row_ror" 6567 && Prefix != "wave_shl" 6568 && Prefix != "wave_rol" 6569 && Prefix != "wave_shr" 6570 && Prefix != "wave_ror" 6571 && Prefix != "row_bcast" 6572 && Prefix != "row_share" 6573 && Prefix != "row_xmask") { 6574 return MatchOperand_NoMatch; 6575 } 6576 6577 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6578 return MatchOperand_NoMatch; 6579 6580 if (!isVI() && !isGFX9() && 6581 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6582 Prefix == "wave_rol" || Prefix == "wave_ror" || 6583 Prefix == "row_bcast")) 6584 return MatchOperand_NoMatch; 6585 6586 Parser.Lex(); 6587 if (getLexer().isNot(AsmToken::Colon)) 6588 return MatchOperand_ParseFail; 6589 6590 if (Prefix == "quad_perm") { 6591 // quad_perm:[%d,%d,%d,%d] 6592 Parser.Lex(); 6593 if (getLexer().isNot(AsmToken::LBrac)) 6594 return MatchOperand_ParseFail; 6595 Parser.Lex(); 6596 6597 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6598 return MatchOperand_ParseFail; 6599 6600 for (int i = 0; i < 3; ++i) { 6601 if (getLexer().isNot(AsmToken::Comma)) 6602 return MatchOperand_ParseFail; 6603 Parser.Lex(); 6604 6605 int64_t Temp; 6606 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6607 return MatchOperand_ParseFail; 6608 const int shift = i*2 + 2; 6609 Int += (Temp << shift); 6610 } 6611 6612 if (getLexer().isNot(AsmToken::RBrac)) 6613 return MatchOperand_ParseFail; 6614 Parser.Lex(); 6615 } else { 6616 // sel:%d 6617 Parser.Lex(); 6618 if (getParser().parseAbsoluteExpression(Int)) 6619 return MatchOperand_ParseFail; 6620 6621 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6622 Int |= DppCtrl::ROW_SHL0; 6623 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6624 Int |= DppCtrl::ROW_SHR0; 6625 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6626 Int |= DppCtrl::ROW_ROR0; 6627 } else if (Prefix == "wave_shl" && 1 == Int) { 6628 Int = DppCtrl::WAVE_SHL1; 6629 } else if (Prefix == "wave_rol" && 1 == Int) { 6630 Int = DppCtrl::WAVE_ROL1; 6631 } else if (Prefix == "wave_shr" && 1 == Int) { 6632 Int = DppCtrl::WAVE_SHR1; 6633 } else if (Prefix == "wave_ror" && 1 == Int) { 6634 Int = DppCtrl::WAVE_ROR1; 6635 } else if (Prefix == "row_bcast") { 6636 if (Int == 15) { 6637 Int = DppCtrl::BCAST15; 6638 } else if (Int == 31) { 6639 Int = DppCtrl::BCAST31; 6640 } else { 6641 return MatchOperand_ParseFail; 6642 } 6643 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6644 Int |= DppCtrl::ROW_SHARE_FIRST; 6645 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6646 Int |= DppCtrl::ROW_XMASK_FIRST; 6647 } else { 6648 return MatchOperand_ParseFail; 6649 } 6650 } 6651 } 6652 6653 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6654 return MatchOperand_Success; 6655 } 6656 6657 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6658 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6659 } 6660 6661 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6662 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6663 } 6664 6665 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6666 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6667 } 6668 6669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6670 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6671 } 6672 6673 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6674 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6675 } 6676 6677 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6678 OptionalImmIndexMap OptionalIdx; 6679 6680 unsigned I = 1; 6681 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6682 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6683 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6684 } 6685 6686 int Fi = 0; 6687 for (unsigned E = Operands.size(); I != E; ++I) { 6688 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6689 MCOI::TIED_TO); 6690 if (TiedTo != -1) { 6691 assert((unsigned)TiedTo < Inst.getNumOperands()); 6692 // handle tied old or src2 for MAC instructions 6693 Inst.addOperand(Inst.getOperand(TiedTo)); 6694 } 6695 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6696 // Add the register arguments 6697 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6698 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6699 // Skip it. 6700 continue; 6701 } 6702 6703 if (IsDPP8) { 6704 if (Op.isDPP8()) { 6705 Op.addImmOperands(Inst, 1); 6706 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6707 Op.addRegWithFPInputModsOperands(Inst, 2); 6708 } else if (Op.isFI()) { 6709 Fi = Op.getImm(); 6710 } else if (Op.isReg()) { 6711 Op.addRegOperands(Inst, 1); 6712 } else { 6713 llvm_unreachable("Invalid operand type"); 6714 } 6715 } else { 6716 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6717 Op.addRegWithFPInputModsOperands(Inst, 2); 6718 } else if (Op.isDPPCtrl()) { 6719 Op.addImmOperands(Inst, 1); 6720 } else if (Op.isImm()) { 6721 // Handle optional arguments 6722 OptionalIdx[Op.getImmTy()] = I; 6723 } else { 6724 llvm_unreachable("Invalid operand type"); 6725 } 6726 } 6727 } 6728 6729 if (IsDPP8) { 6730 using namespace llvm::AMDGPU::DPP; 6731 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6732 } else { 6733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6734 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6736 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6738 } 6739 } 6740 } 6741 6742 //===----------------------------------------------------------------------===// 6743 // sdwa 6744 //===----------------------------------------------------------------------===// 6745 6746 OperandMatchResultTy 6747 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6748 AMDGPUOperand::ImmTy Type) { 6749 using namespace llvm::AMDGPU::SDWA; 6750 6751 SMLoc S = Parser.getTok().getLoc(); 6752 StringRef Value; 6753 OperandMatchResultTy res; 6754 6755 res = parseStringWithPrefix(Prefix, Value); 6756 if (res != MatchOperand_Success) { 6757 return res; 6758 } 6759 6760 int64_t Int; 6761 Int = StringSwitch<int64_t>(Value) 6762 .Case("BYTE_0", SdwaSel::BYTE_0) 6763 .Case("BYTE_1", SdwaSel::BYTE_1) 6764 .Case("BYTE_2", SdwaSel::BYTE_2) 6765 .Case("BYTE_3", SdwaSel::BYTE_3) 6766 .Case("WORD_0", SdwaSel::WORD_0) 6767 .Case("WORD_1", SdwaSel::WORD_1) 6768 .Case("DWORD", SdwaSel::DWORD) 6769 .Default(0xffffffff); 6770 Parser.Lex(); // eat last token 6771 6772 if (Int == 0xffffffff) { 6773 return MatchOperand_ParseFail; 6774 } 6775 6776 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6777 return MatchOperand_Success; 6778 } 6779 6780 OperandMatchResultTy 6781 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6782 using namespace llvm::AMDGPU::SDWA; 6783 6784 SMLoc S = Parser.getTok().getLoc(); 6785 StringRef Value; 6786 OperandMatchResultTy res; 6787 6788 res = parseStringWithPrefix("dst_unused", Value); 6789 if (res != MatchOperand_Success) { 6790 return res; 6791 } 6792 6793 int64_t Int; 6794 Int = StringSwitch<int64_t>(Value) 6795 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6796 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6797 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6798 .Default(0xffffffff); 6799 Parser.Lex(); // eat last token 6800 6801 if (Int == 0xffffffff) { 6802 return MatchOperand_ParseFail; 6803 } 6804 6805 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6806 return MatchOperand_Success; 6807 } 6808 6809 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6810 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6811 } 6812 6813 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6814 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6815 } 6816 6817 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6818 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6819 } 6820 6821 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6822 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6823 } 6824 6825 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6826 uint64_t BasicInstType, bool skipVcc) { 6827 using namespace llvm::AMDGPU::SDWA; 6828 6829 OptionalImmIndexMap OptionalIdx; 6830 bool skippedVcc = false; 6831 6832 unsigned I = 1; 6833 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6834 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6835 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6836 } 6837 6838 for (unsigned E = Operands.size(); I != E; ++I) { 6839 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6840 if (skipVcc && !skippedVcc && Op.isReg() && 6841 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6842 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6843 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6844 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6845 // Skip VCC only if we didn't skip it on previous iteration. 6846 if (BasicInstType == SIInstrFlags::VOP2 && 6847 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6848 skippedVcc = true; 6849 continue; 6850 } else if (BasicInstType == SIInstrFlags::VOPC && 6851 Inst.getNumOperands() == 0) { 6852 skippedVcc = true; 6853 continue; 6854 } 6855 } 6856 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6857 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6858 } else if (Op.isImm()) { 6859 // Handle optional arguments 6860 OptionalIdx[Op.getImmTy()] = I; 6861 } else { 6862 llvm_unreachable("Invalid operand type"); 6863 } 6864 skippedVcc = false; 6865 } 6866 6867 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6868 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6869 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6870 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6871 switch (BasicInstType) { 6872 case SIInstrFlags::VOP1: 6873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6874 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6876 } 6877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6878 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6879 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6880 break; 6881 6882 case SIInstrFlags::VOP2: 6883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6884 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6886 } 6887 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6889 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6890 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6891 break; 6892 6893 case SIInstrFlags::VOPC: 6894 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6895 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6897 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6898 break; 6899 6900 default: 6901 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6902 } 6903 } 6904 6905 // special case v_mac_{f16, f32}: 6906 // it has src2 register operand that is tied to dst operand 6907 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6908 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6909 auto it = Inst.begin(); 6910 std::advance( 6911 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6912 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6913 } 6914 } 6915 6916 //===----------------------------------------------------------------------===// 6917 // mAI 6918 //===----------------------------------------------------------------------===// 6919 6920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6921 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6922 } 6923 6924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6925 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6926 } 6927 6928 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6929 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6930 } 6931 6932 /// Force static initialization. 6933 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6934 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6935 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6936 } 6937 6938 #define GET_REGISTER_MATCHER 6939 #define GET_MATCHER_IMPLEMENTATION 6940 #define GET_MNEMONIC_SPELL_CHECKER 6941 #include "AMDGPUGenAsmMatcher.inc" 6942 6943 // This fuction should be defined after auto-generated include so that we have 6944 // MatchClassKind enum defined 6945 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6946 unsigned Kind) { 6947 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6948 // But MatchInstructionImpl() expects to meet token and fails to validate 6949 // operand. This method checks if we are given immediate operand but expect to 6950 // get corresponding token. 6951 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6952 switch (Kind) { 6953 case MCK_addr64: 6954 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6955 case MCK_gds: 6956 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6957 case MCK_lds: 6958 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6959 case MCK_glc: 6960 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6961 case MCK_idxen: 6962 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6963 case MCK_offen: 6964 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6965 case MCK_SSrcB32: 6966 // When operands have expression values, they will return true for isToken, 6967 // because it is not possible to distinguish between a token and an 6968 // expression at parse time. MatchInstructionImpl() will always try to 6969 // match an operand as a token, when isToken returns true, and when the 6970 // name of the expression is not a valid token, the match will fail, 6971 // so we need to handle it here. 6972 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6973 case MCK_SSrcF32: 6974 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6975 case MCK_SoppBrTarget: 6976 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6977 case MCK_VReg32OrOff: 6978 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6979 case MCK_InterpSlot: 6980 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6981 case MCK_Attr: 6982 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6983 case MCK_AttrChan: 6984 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6985 default: 6986 return Match_InvalidOperand; 6987 } 6988 } 6989 6990 //===----------------------------------------------------------------------===// 6991 // endpgm 6992 //===----------------------------------------------------------------------===// 6993 6994 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6995 SMLoc S = Parser.getTok().getLoc(); 6996 int64_t Imm = 0; 6997 6998 if (!parseExpr(Imm)) { 6999 // The operand is optional, if not present default to 0 7000 Imm = 0; 7001 } 7002 7003 if (!isUInt<16>(Imm)) { 7004 Error(S, "expected a 16-bit value"); 7005 return MatchOperand_ParseFail; 7006 } 7007 7008 Operands.push_back( 7009 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7010 return MatchOperand_Success; 7011 } 7012 7013 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7014