1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isSymbolRefExpr(); 224 } 225 226 bool isSymbolRefExpr() const { 227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 228 } 229 230 bool isImm() const override { 231 return Kind == Immediate; 232 } 233 234 bool isInlinableImm(MVT type) const; 235 bool isLiteralImm(MVT type) const; 236 237 bool isRegKind() const { 238 return Kind == Register; 239 } 240 241 bool isReg() const override { 242 return isRegKind() && !hasModifiers(); 243 } 244 245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 247 } 248 249 bool isRegOrImmWithInt16InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 251 } 252 253 bool isRegOrImmWithInt32InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 255 } 256 257 bool isRegOrImmWithInt64InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 259 } 260 261 bool isRegOrImmWithFP16InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 263 } 264 265 bool isRegOrImmWithFP32InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 267 } 268 269 bool isRegOrImmWithFP64InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 271 } 272 273 bool isVReg() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID) || 275 isRegClass(AMDGPU::VReg_64RegClassID) || 276 isRegClass(AMDGPU::VReg_96RegClassID) || 277 isRegClass(AMDGPU::VReg_128RegClassID) || 278 isRegClass(AMDGPU::VReg_160RegClassID) || 279 isRegClass(AMDGPU::VReg_256RegClassID) || 280 isRegClass(AMDGPU::VReg_512RegClassID) || 281 isRegClass(AMDGPU::VReg_1024RegClassID); 282 } 283 284 bool isVReg32() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID); 286 } 287 288 bool isVReg32OrOff() const { 289 return isOff() || isVReg32(); 290 } 291 292 bool isSDWAOperand(MVT type) const; 293 bool isSDWAFP16Operand() const; 294 bool isSDWAFP32Operand() const; 295 bool isSDWAInt16Operand() const; 296 bool isSDWAInt32Operand() const; 297 298 bool isImmTy(ImmTy ImmT) const { 299 return isImm() && Imm.Type == ImmT; 300 } 301 302 bool isImmModifier() const { 303 return isImm() && Imm.Type != ImmTyNone; 304 } 305 306 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 307 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 308 bool isDMask() const { return isImmTy(ImmTyDMask); } 309 bool isDim() const { return isImmTy(ImmTyDim); } 310 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 311 bool isDA() const { return isImmTy(ImmTyDA); } 312 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 313 bool isLWE() const { return isImmTy(ImmTyLWE); } 314 bool isOff() const { return isImmTy(ImmTyOff); } 315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 316 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 318 bool isOffen() const { return isImmTy(ImmTyOffen); } 319 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 320 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 324 325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 326 bool isGDS() const { return isImmTy(ImmTyGDS); } 327 bool isLDS() const { return isImmTy(ImmTyLDS); } 328 bool isDLC() const { return isImmTy(ImmTyDLC); } 329 bool isGLC() const { return isImmTy(ImmTyGLC); } 330 bool isSLC() const { return isImmTy(ImmTySLC); } 331 bool isTFE() const { return isImmTy(ImmTyTFE); } 332 bool isD16() const { return isImmTy(ImmTyD16); } 333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 337 bool isFI() const { return isImmTy(ImmTyDppFi); } 338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 345 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 347 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 348 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 349 bool isHigh() const { return isImmTy(ImmTyHigh); } 350 351 bool isMod() const { 352 return isClampSI() || isOModSI(); 353 } 354 355 bool isRegOrImm() const { 356 return isReg() || isImm(); 357 } 358 359 bool isRegClass(unsigned RCID) const; 360 361 bool isInlineValue() const; 362 363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 365 } 366 367 bool isSCSrcB16() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 369 } 370 371 bool isSCSrcV2B16() const { 372 return isSCSrcB16(); 373 } 374 375 bool isSCSrcB32() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 377 } 378 379 bool isSCSrcB64() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 381 } 382 383 bool isBoolReg() const; 384 385 bool isSCSrcF16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 387 } 388 389 bool isSCSrcV2F16() const { 390 return isSCSrcF16(); 391 } 392 393 bool isSCSrcF32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 395 } 396 397 bool isSCSrcF64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 399 } 400 401 bool isSSrcB32() const { 402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 403 } 404 405 bool isSSrcB16() const { 406 return isSCSrcB16() || isLiteralImm(MVT::i16); 407 } 408 409 bool isSSrcV2B16() const { 410 llvm_unreachable("cannot happen"); 411 return isSSrcB16(); 412 } 413 414 bool isSSrcB64() const { 415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 416 // See isVSrc64(). 417 return isSCSrcB64() || isLiteralImm(MVT::i64); 418 } 419 420 bool isSSrcF32() const { 421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 422 } 423 424 bool isSSrcF64() const { 425 return isSCSrcB64() || isLiteralImm(MVT::f64); 426 } 427 428 bool isSSrcF16() const { 429 return isSCSrcB16() || isLiteralImm(MVT::f16); 430 } 431 432 bool isSSrcV2F16() const { 433 llvm_unreachable("cannot happen"); 434 return isSSrcF16(); 435 } 436 437 bool isSSrcOrLdsB32() const { 438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 439 isLiteralImm(MVT::i32) || isExpr(); 440 } 441 442 bool isVCSrcB32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 444 } 445 446 bool isVCSrcB64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 448 } 449 450 bool isVCSrcB16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 452 } 453 454 bool isVCSrcV2B16() const { 455 return isVCSrcB16(); 456 } 457 458 bool isVCSrcF32() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 460 } 461 462 bool isVCSrcF64() const { 463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 464 } 465 466 bool isVCSrcF16() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 468 } 469 470 bool isVCSrcV2F16() const { 471 return isVCSrcF16(); 472 } 473 474 bool isVSrcB32() const { 475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 476 } 477 478 bool isVSrcB64() const { 479 return isVCSrcF64() || isLiteralImm(MVT::i64); 480 } 481 482 bool isVSrcB16() const { 483 return isVCSrcF16() || isLiteralImm(MVT::i16); 484 } 485 486 bool isVSrcV2B16() const { 487 return isVSrcB16() || isLiteralImm(MVT::v2i16); 488 } 489 490 bool isVSrcF32() const { 491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 492 } 493 494 bool isVSrcF64() const { 495 return isVCSrcF64() || isLiteralImm(MVT::f64); 496 } 497 498 bool isVSrcF16() const { 499 return isVCSrcF16() || isLiteralImm(MVT::f16); 500 } 501 502 bool isVSrcV2F16() const { 503 return isVSrcF16() || isLiteralImm(MVT::v2f16); 504 } 505 506 bool isVISrcB32() const { 507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 508 } 509 510 bool isVISrcB16() const { 511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 512 } 513 514 bool isVISrcV2B16() const { 515 return isVISrcB16(); 516 } 517 518 bool isVISrcF32() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 520 } 521 522 bool isVISrcF16() const { 523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 524 } 525 526 bool isVISrcV2F16() const { 527 return isVISrcF16() || isVISrcB32(); 528 } 529 530 bool isAISrcB32() const { 531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 532 } 533 534 bool isAISrcB16() const { 535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 536 } 537 538 bool isAISrcV2B16() const { 539 return isAISrcB16(); 540 } 541 542 bool isAISrcF32() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 544 } 545 546 bool isAISrcF16() const { 547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 548 } 549 550 bool isAISrcV2F16() const { 551 return isAISrcF16() || isAISrcB32(); 552 } 553 554 bool isAISrc_128B32() const { 555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 556 } 557 558 bool isAISrc_128B16() const { 559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 560 } 561 562 bool isAISrc_128V2B16() const { 563 return isAISrc_128B16(); 564 } 565 566 bool isAISrc_128F32() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 568 } 569 570 bool isAISrc_128F16() const { 571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 572 } 573 574 bool isAISrc_128V2F16() const { 575 return isAISrc_128F16() || isAISrc_128B32(); 576 } 577 578 bool isAISrc_512B32() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 580 } 581 582 bool isAISrc_512B16() const { 583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 584 } 585 586 bool isAISrc_512V2B16() const { 587 return isAISrc_512B16(); 588 } 589 590 bool isAISrc_512F32() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 592 } 593 594 bool isAISrc_512F16() const { 595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 596 } 597 598 bool isAISrc_512V2F16() const { 599 return isAISrc_512F16() || isAISrc_512B32(); 600 } 601 602 bool isAISrc_1024B32() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 604 } 605 606 bool isAISrc_1024B16() const { 607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 608 } 609 610 bool isAISrc_1024V2B16() const { 611 return isAISrc_1024B16(); 612 } 613 614 bool isAISrc_1024F32() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 616 } 617 618 bool isAISrc_1024F16() const { 619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 620 } 621 622 bool isAISrc_1024V2F16() const { 623 return isAISrc_1024F16() || isAISrc_1024B32(); 624 } 625 626 bool isKImmFP32() const { 627 return isLiteralImm(MVT::f32); 628 } 629 630 bool isKImmFP16() const { 631 return isLiteralImm(MVT::f16); 632 } 633 634 bool isMem() const override { 635 return false; 636 } 637 638 bool isExpr() const { 639 return Kind == Expression; 640 } 641 642 bool isSoppBrTarget() const { 643 return isExpr() || isImm(); 644 } 645 646 bool isSWaitCnt() const; 647 bool isHwreg() const; 648 bool isSendMsg() const; 649 bool isSwizzle() const; 650 bool isSMRDOffset8() const; 651 bool isSMRDOffset20() const; 652 bool isSMRDLiteralOffset() const; 653 bool isDPP8() const; 654 bool isDPPCtrl() const; 655 bool isBLGP() const; 656 bool isCBSZ() const; 657 bool isABID() const; 658 bool isGPRIdxMode() const; 659 bool isS16Imm() const; 660 bool isU16Imm() const; 661 bool isEndpgm() const; 662 663 StringRef getExpressionAsToken() const { 664 assert(isExpr()); 665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 666 return S->getSymbol().getName(); 667 } 668 669 StringRef getToken() const { 670 assert(isToken()); 671 672 if (Kind == Expression) 673 return getExpressionAsToken(); 674 675 return StringRef(Tok.Data, Tok.Length); 676 } 677 678 int64_t getImm() const { 679 assert(isImm()); 680 return Imm.Val; 681 } 682 683 ImmTy getImmTy() const { 684 assert(isImm()); 685 return Imm.Type; 686 } 687 688 unsigned getReg() const override { 689 assert(isRegKind()); 690 return Reg.RegNo; 691 } 692 693 SMLoc getStartLoc() const override { 694 return StartLoc; 695 } 696 697 SMLoc getEndLoc() const override { 698 return EndLoc; 699 } 700 701 SMRange getLocRange() const { 702 return SMRange(StartLoc, EndLoc); 703 } 704 705 Modifiers getModifiers() const { 706 assert(isRegKind() || isImmTy(ImmTyNone)); 707 return isRegKind() ? Reg.Mods : Imm.Mods; 708 } 709 710 void setModifiers(Modifiers Mods) { 711 assert(isRegKind() || isImmTy(ImmTyNone)); 712 if (isRegKind()) 713 Reg.Mods = Mods; 714 else 715 Imm.Mods = Mods; 716 } 717 718 bool hasModifiers() const { 719 return getModifiers().hasModifiers(); 720 } 721 722 bool hasFPModifiers() const { 723 return getModifiers().hasFPModifiers(); 724 } 725 726 bool hasIntModifiers() const { 727 return getModifiers().hasIntModifiers(); 728 } 729 730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 731 732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 733 734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 735 736 template <unsigned Bitwidth> 737 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 738 739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 740 addKImmFPOperands<16>(Inst, N); 741 } 742 743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 744 addKImmFPOperands<32>(Inst, N); 745 } 746 747 void addRegOperands(MCInst &Inst, unsigned N) const; 748 749 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 750 addRegOperands(Inst, N); 751 } 752 753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 754 if (isRegKind()) 755 addRegOperands(Inst, N); 756 else if (isExpr()) 757 Inst.addOperand(MCOperand::createExpr(Expr)); 758 else 759 addImmOperands(Inst, N); 760 } 761 762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 763 Modifiers Mods = getModifiers(); 764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 765 if (isRegKind()) { 766 addRegOperands(Inst, N); 767 } else { 768 addImmOperands(Inst, N, false); 769 } 770 } 771 772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 773 assert(!hasIntModifiers()); 774 addRegOrImmWithInputModsOperands(Inst, N); 775 } 776 777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 778 assert(!hasFPModifiers()); 779 addRegOrImmWithInputModsOperands(Inst, N); 780 } 781 782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 assert(isRegKind()); 786 addRegOperands(Inst, N); 787 } 788 789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 790 assert(!hasIntModifiers()); 791 addRegWithInputModsOperands(Inst, N); 792 } 793 794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 795 assert(!hasFPModifiers()); 796 addRegWithInputModsOperands(Inst, N); 797 } 798 799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 800 if (isImm()) 801 addImmOperands(Inst, N); 802 else { 803 assert(isExpr()); 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 } 806 } 807 808 static void printImmTy(raw_ostream& OS, ImmTy Type) { 809 switch (Type) { 810 case ImmTyNone: OS << "None"; break; 811 case ImmTyGDS: OS << "GDS"; break; 812 case ImmTyLDS: OS << "LDS"; break; 813 case ImmTyOffen: OS << "Offen"; break; 814 case ImmTyIdxen: OS << "Idxen"; break; 815 case ImmTyAddr64: OS << "Addr64"; break; 816 case ImmTyOffset: OS << "Offset"; break; 817 case ImmTyInstOffset: OS << "InstOffset"; break; 818 case ImmTyOffset0: OS << "Offset0"; break; 819 case ImmTyOffset1: OS << "Offset1"; break; 820 case ImmTyDLC: OS << "DLC"; break; 821 case ImmTyGLC: OS << "GLC"; break; 822 case ImmTySLC: OS << "SLC"; break; 823 case ImmTyTFE: OS << "TFE"; break; 824 case ImmTyD16: OS << "D16"; break; 825 case ImmTyFORMAT: OS << "FORMAT"; break; 826 case ImmTyClampSI: OS << "ClampSI"; break; 827 case ImmTyOModSI: OS << "OModSI"; break; 828 case ImmTyDPP8: OS << "DPP8"; break; 829 case ImmTyDppCtrl: OS << "DppCtrl"; break; 830 case ImmTyDppRowMask: OS << "DppRowMask"; break; 831 case ImmTyDppBankMask: OS << "DppBankMask"; break; 832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 833 case ImmTyDppFi: OS << "FI"; break; 834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 838 case ImmTyDMask: OS << "DMask"; break; 839 case ImmTyDim: OS << "Dim"; break; 840 case ImmTyUNorm: OS << "UNorm"; break; 841 case ImmTyDA: OS << "DA"; break; 842 case ImmTyR128A16: OS << "R128A16"; break; 843 case ImmTyLWE: OS << "LWE"; break; 844 case ImmTyOff: OS << "Off"; break; 845 case ImmTyExpTgt: OS << "ExpTgt"; break; 846 case ImmTyExpCompr: OS << "ExpCompr"; break; 847 case ImmTyExpVM: OS << "ExpVM"; break; 848 case ImmTyHwreg: OS << "Hwreg"; break; 849 case ImmTySendMsg: OS << "SendMsg"; break; 850 case ImmTyInterpSlot: OS << "InterpSlot"; break; 851 case ImmTyInterpAttr: OS << "InterpAttr"; break; 852 case ImmTyAttrChan: OS << "AttrChan"; break; 853 case ImmTyOpSel: OS << "OpSel"; break; 854 case ImmTyOpSelHi: OS << "OpSelHi"; break; 855 case ImmTyNegLo: OS << "NegLo"; break; 856 case ImmTyNegHi: OS << "NegHi"; break; 857 case ImmTySwizzle: OS << "Swizzle"; break; 858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 859 case ImmTyHigh: OS << "High"; break; 860 case ImmTyBLGP: OS << "BLGP"; break; 861 case ImmTyCBSZ: OS << "CBSZ"; break; 862 case ImmTyABID: OS << "ABID"; break; 863 case ImmTyEndpgm: OS << "Endpgm"; break; 864 } 865 } 866 867 void print(raw_ostream &OS) const override { 868 switch (Kind) { 869 case Register: 870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 871 break; 872 case Immediate: 873 OS << '<' << getImm(); 874 if (getImmTy() != ImmTyNone) { 875 OS << " type: "; printImmTy(OS, getImmTy()); 876 } 877 OS << " mods: " << Imm.Mods << '>'; 878 break; 879 case Token: 880 OS << '\'' << getToken() << '\''; 881 break; 882 case Expression: 883 OS << "<expr " << *Expr << '>'; 884 break; 885 } 886 } 887 888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 889 int64_t Val, SMLoc Loc, 890 ImmTy Type = ImmTyNone, 891 bool IsFPImm = false) { 892 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 893 Op->Imm.Val = Val; 894 Op->Imm.IsFPImm = IsFPImm; 895 Op->Imm.Type = Type; 896 Op->Imm.Mods = Modifiers(); 897 Op->StartLoc = Loc; 898 Op->EndLoc = Loc; 899 return Op; 900 } 901 902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 903 StringRef Str, SMLoc Loc, 904 bool HasExplicitEncodingSize = true) { 905 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 906 Res->Tok.Data = Str.data(); 907 Res->Tok.Length = Str.size(); 908 Res->StartLoc = Loc; 909 Res->EndLoc = Loc; 910 return Res; 911 } 912 913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 914 unsigned RegNo, SMLoc S, 915 SMLoc E) { 916 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 917 Op->Reg.RegNo = RegNo; 918 Op->Reg.Mods = Modifiers(); 919 Op->StartLoc = S; 920 Op->EndLoc = E; 921 return Op; 922 } 923 924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 925 const class MCExpr *Expr, SMLoc S) { 926 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 927 Op->Expr = Expr; 928 Op->StartLoc = S; 929 Op->EndLoc = S; 930 return Op; 931 } 932 }; 933 934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 936 return OS; 937 } 938 939 //===----------------------------------------------------------------------===// 940 // AsmParser 941 //===----------------------------------------------------------------------===// 942 943 // Holds info related to the current kernel, e.g. count of SGPRs used. 944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 945 // .amdgpu_hsa_kernel or at EOF. 946 class KernelScopeInfo { 947 int SgprIndexUnusedMin = -1; 948 int VgprIndexUnusedMin = -1; 949 MCContext *Ctx = nullptr; 950 951 void usesSgprAt(int i) { 952 if (i >= SgprIndexUnusedMin) { 953 SgprIndexUnusedMin = ++i; 954 if (Ctx) { 955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 957 } 958 } 959 } 960 961 void usesVgprAt(int i) { 962 if (i >= VgprIndexUnusedMin) { 963 VgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 public: 972 KernelScopeInfo() = default; 973 974 void initialize(MCContext &Context) { 975 Ctx = &Context; 976 usesSgprAt(SgprIndexUnusedMin = -1); 977 usesVgprAt(VgprIndexUnusedMin = -1); 978 } 979 980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 981 switch (RegKind) { 982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 983 case IS_AGPR: // fall through 984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 985 default: break; 986 } 987 } 988 }; 989 990 class AMDGPUAsmParser : public MCTargetAsmParser { 991 MCAsmParser &Parser; 992 993 // Number of extra operands parsed after the first optional operand. 994 // This may be necessary to skip hardcoded mandatory operands. 995 static const unsigned MAX_OPR_LOOKAHEAD = 8; 996 997 unsigned ForcedEncodingSize = 0; 998 bool ForcedDPP = false; 999 bool ForcedSDWA = false; 1000 KernelScopeInfo KernelScope; 1001 1002 /// @name Auto-generated Match Functions 1003 /// { 1004 1005 #define GET_ASSEMBLER_HEADER 1006 #include "AMDGPUGenAsmMatcher.inc" 1007 1008 /// } 1009 1010 private: 1011 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1012 bool OutOfRangeError(SMRange Range); 1013 /// Calculate VGPR/SGPR blocks required for given target, reserved 1014 /// registers, and user-specified NextFreeXGPR values. 1015 /// 1016 /// \param Features [in] Target features, used for bug corrections. 1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1021 /// descriptor field, if valid. 1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1026 /// \param VGPRBlocks [out] Result VGPR block count. 1027 /// \param SGPRBlocks [out] Result SGPR block count. 1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1029 bool FlatScrUsed, bool XNACKUsed, 1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1031 SMRange VGPRRange, unsigned NextFreeSGPR, 1032 SMRange SGPRRange, unsigned &VGPRBlocks, 1033 unsigned &SGPRBlocks); 1034 bool ParseDirectiveAMDGCNTarget(); 1035 bool ParseDirectiveAMDHSAKernel(); 1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1037 bool ParseDirectiveHSACodeObjectVersion(); 1038 bool ParseDirectiveHSACodeObjectISA(); 1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1040 bool ParseDirectiveAMDKernelCodeT(); 1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1042 bool ParseDirectiveAMDGPUHsaKernel(); 1043 1044 bool ParseDirectiveISAVersion(); 1045 bool ParseDirectiveHSAMetadata(); 1046 bool ParseDirectivePALMetadataBegin(); 1047 bool ParseDirectivePALMetadata(); 1048 bool ParseDirectiveAMDGPULDS(); 1049 1050 /// Common code to parse out a block of text (typically YAML) between start and 1051 /// end directives. 1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1053 const char *AssemblerDirectiveEnd, 1054 std::string &CollectString); 1055 1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1057 RegisterKind RegKind, unsigned Reg1, 1058 unsigned RegNum); 1059 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1060 unsigned& RegNum, unsigned& RegWidth, 1061 unsigned *DwordRegIndex); 1062 bool isRegister(); 1063 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1064 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1065 void initializeGprCountSymbol(RegisterKind RegKind); 1066 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1067 unsigned RegWidth); 1068 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1069 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1070 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1071 bool IsGdsHardcoded); 1072 1073 public: 1074 enum AMDGPUMatchResultTy { 1075 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1076 }; 1077 enum OperandMode { 1078 OperandMode_Default, 1079 OperandMode_NSA, 1080 }; 1081 1082 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1083 1084 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1085 const MCInstrInfo &MII, 1086 const MCTargetOptions &Options) 1087 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1088 MCAsmParserExtension::Initialize(Parser); 1089 1090 if (getFeatureBits().none()) { 1091 // Set default features. 1092 copySTI().ToggleFeature("southern-islands"); 1093 } 1094 1095 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1096 1097 { 1098 // TODO: make those pre-defined variables read-only. 1099 // Currently there is none suitable machinery in the core llvm-mc for this. 1100 // MCSymbol::isRedefinable is intended for another purpose, and 1101 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1102 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1103 MCContext &Ctx = getContext(); 1104 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1105 MCSymbol *Sym = 1106 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1107 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1108 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1109 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1110 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1111 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1112 } else { 1113 MCSymbol *Sym = 1114 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1115 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1116 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1117 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1118 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1120 } 1121 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1122 initializeGprCountSymbol(IS_VGPR); 1123 initializeGprCountSymbol(IS_SGPR); 1124 } else 1125 KernelScope.initialize(getContext()); 1126 } 1127 } 1128 1129 bool hasXNACK() const { 1130 return AMDGPU::hasXNACK(getSTI()); 1131 } 1132 1133 bool hasMIMG_R128() const { 1134 return AMDGPU::hasMIMG_R128(getSTI()); 1135 } 1136 1137 bool hasPackedD16() const { 1138 return AMDGPU::hasPackedD16(getSTI()); 1139 } 1140 1141 bool isSI() const { 1142 return AMDGPU::isSI(getSTI()); 1143 } 1144 1145 bool isCI() const { 1146 return AMDGPU::isCI(getSTI()); 1147 } 1148 1149 bool isVI() const { 1150 return AMDGPU::isVI(getSTI()); 1151 } 1152 1153 bool isGFX9() const { 1154 return AMDGPU::isGFX9(getSTI()); 1155 } 1156 1157 bool isGFX10() const { 1158 return AMDGPU::isGFX10(getSTI()); 1159 } 1160 1161 bool hasInv2PiInlineImm() const { 1162 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1163 } 1164 1165 bool hasFlatOffsets() const { 1166 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1167 } 1168 1169 bool hasSGPR102_SGPR103() const { 1170 return !isVI() && !isGFX9(); 1171 } 1172 1173 bool hasSGPR104_SGPR105() const { 1174 return isGFX10(); 1175 } 1176 1177 bool hasIntClamp() const { 1178 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1179 } 1180 1181 AMDGPUTargetStreamer &getTargetStreamer() { 1182 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1183 return static_cast<AMDGPUTargetStreamer &>(TS); 1184 } 1185 1186 const MCRegisterInfo *getMRI() const { 1187 // We need this const_cast because for some reason getContext() is not const 1188 // in MCAsmParser. 1189 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1190 } 1191 1192 const MCInstrInfo *getMII() const { 1193 return &MII; 1194 } 1195 1196 const FeatureBitset &getFeatureBits() const { 1197 return getSTI().getFeatureBits(); 1198 } 1199 1200 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1201 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1202 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1203 1204 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1205 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1206 bool isForcedDPP() const { return ForcedDPP; } 1207 bool isForcedSDWA() const { return ForcedSDWA; } 1208 ArrayRef<unsigned> getMatchedVariants() const; 1209 1210 std::unique_ptr<AMDGPUOperand> parseRegister(); 1211 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1212 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1213 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1214 unsigned Kind) override; 1215 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1216 OperandVector &Operands, MCStreamer &Out, 1217 uint64_t &ErrorInfo, 1218 bool MatchingInlineAsm) override; 1219 bool ParseDirective(AsmToken DirectiveID) override; 1220 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1221 OperandMode Mode = OperandMode_Default); 1222 StringRef parseMnemonicSuffix(StringRef Name); 1223 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1224 SMLoc NameLoc, OperandVector &Operands) override; 1225 //bool ProcessInstruction(MCInst &Inst); 1226 1227 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1228 1229 OperandMatchResultTy 1230 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1231 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1232 bool (*ConvertResult)(int64_t &) = nullptr); 1233 1234 OperandMatchResultTy 1235 parseOperandArrayWithPrefix(const char *Prefix, 1236 OperandVector &Operands, 1237 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1238 bool (*ConvertResult)(int64_t&) = nullptr); 1239 1240 OperandMatchResultTy 1241 parseNamedBit(const char *Name, OperandVector &Operands, 1242 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1243 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1244 StringRef &Value); 1245 1246 bool isModifier(); 1247 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1248 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1249 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1250 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1251 bool parseSP3NegModifier(); 1252 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1253 OperandMatchResultTy parseReg(OperandVector &Operands); 1254 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1255 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1256 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1257 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1258 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1259 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1260 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1261 1262 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1263 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1264 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1265 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1266 1267 bool parseCnt(int64_t &IntVal); 1268 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1269 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1270 1271 private: 1272 struct OperandInfoTy { 1273 int64_t Id; 1274 bool IsSymbolic = false; 1275 bool IsDefined = false; 1276 1277 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1278 }; 1279 1280 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1281 bool validateSendMsg(const OperandInfoTy &Msg, 1282 const OperandInfoTy &Op, 1283 const OperandInfoTy &Stream, 1284 const SMLoc Loc); 1285 1286 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1287 bool validateHwreg(const OperandInfoTy &HwReg, 1288 const int64_t Offset, 1289 const int64_t Width, 1290 const SMLoc Loc); 1291 1292 void errorExpTgt(); 1293 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1294 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1295 1296 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1297 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1298 bool validateSOPLiteral(const MCInst &Inst) const; 1299 bool validateConstantBusLimitations(const MCInst &Inst); 1300 bool validateEarlyClobberLimitations(const MCInst &Inst); 1301 bool validateIntClampSupported(const MCInst &Inst); 1302 bool validateMIMGAtomicDMask(const MCInst &Inst); 1303 bool validateMIMGGatherDMask(const MCInst &Inst); 1304 bool validateMIMGDataSize(const MCInst &Inst); 1305 bool validateMIMGAddrSize(const MCInst &Inst); 1306 bool validateMIMGD16(const MCInst &Inst); 1307 bool validateMIMGDim(const MCInst &Inst); 1308 bool validateLdsDirect(const MCInst &Inst); 1309 bool validateOpSel(const MCInst &Inst); 1310 bool validateVccOperand(unsigned Reg) const; 1311 bool validateVOP3Literal(const MCInst &Inst) const; 1312 unsigned getConstantBusLimit(unsigned Opcode) const; 1313 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1314 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1315 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1316 1317 bool isId(const StringRef Id) const; 1318 bool isId(const AsmToken &Token, const StringRef Id) const; 1319 bool isToken(const AsmToken::TokenKind Kind) const; 1320 bool trySkipId(const StringRef Id); 1321 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1322 bool trySkipToken(const AsmToken::TokenKind Kind); 1323 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1324 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1325 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1326 AsmToken::TokenKind getTokenKind() const; 1327 bool parseExpr(int64_t &Imm); 1328 bool parseExpr(OperandVector &Operands); 1329 StringRef getTokenStr() const; 1330 AsmToken peekToken(); 1331 AsmToken getToken() const; 1332 SMLoc getLoc() const; 1333 void lex(); 1334 1335 public: 1336 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1337 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1338 1339 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1340 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1341 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1342 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1343 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1344 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1345 1346 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1347 const unsigned MinVal, 1348 const unsigned MaxVal, 1349 const StringRef ErrMsg); 1350 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1351 bool parseSwizzleOffset(int64_t &Imm); 1352 bool parseSwizzleMacro(int64_t &Imm); 1353 bool parseSwizzleQuadPerm(int64_t &Imm); 1354 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1355 bool parseSwizzleBroadcast(int64_t &Imm); 1356 bool parseSwizzleSwap(int64_t &Imm); 1357 bool parseSwizzleReverse(int64_t &Imm); 1358 1359 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1360 int64_t parseGPRIdxMacro(); 1361 1362 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1363 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1364 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1365 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1366 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1367 1368 AMDGPUOperand::Ptr defaultDLC() const; 1369 AMDGPUOperand::Ptr defaultGLC() const; 1370 AMDGPUOperand::Ptr defaultSLC() const; 1371 1372 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1373 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1374 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1375 AMDGPUOperand::Ptr defaultFlatOffset() const; 1376 1377 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1378 1379 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1380 OptionalImmIndexMap &OptionalIdx); 1381 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1382 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1383 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1384 1385 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1386 1387 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1388 bool IsAtomic = false); 1389 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1390 1391 OperandMatchResultTy parseDim(OperandVector &Operands); 1392 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1393 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1394 AMDGPUOperand::Ptr defaultRowMask() const; 1395 AMDGPUOperand::Ptr defaultBankMask() const; 1396 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1397 AMDGPUOperand::Ptr defaultFI() const; 1398 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1399 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1400 1401 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1402 AMDGPUOperand::ImmTy Type); 1403 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1404 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1405 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1406 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1407 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1408 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1409 uint64_t BasicInstType, bool skipVcc = false); 1410 1411 AMDGPUOperand::Ptr defaultBLGP() const; 1412 AMDGPUOperand::Ptr defaultCBSZ() const; 1413 AMDGPUOperand::Ptr defaultABID() const; 1414 1415 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1416 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1417 }; 1418 1419 struct OptionalOperand { 1420 const char *Name; 1421 AMDGPUOperand::ImmTy Type; 1422 bool IsBit; 1423 bool (*ConvertResult)(int64_t&); 1424 }; 1425 1426 } // end anonymous namespace 1427 1428 // May be called with integer type with equivalent bitwidth. 1429 static const fltSemantics *getFltSemantics(unsigned Size) { 1430 switch (Size) { 1431 case 4: 1432 return &APFloat::IEEEsingle(); 1433 case 8: 1434 return &APFloat::IEEEdouble(); 1435 case 2: 1436 return &APFloat::IEEEhalf(); 1437 default: 1438 llvm_unreachable("unsupported fp type"); 1439 } 1440 } 1441 1442 static const fltSemantics *getFltSemantics(MVT VT) { 1443 return getFltSemantics(VT.getSizeInBits() / 8); 1444 } 1445 1446 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1447 switch (OperandType) { 1448 case AMDGPU::OPERAND_REG_IMM_INT32: 1449 case AMDGPU::OPERAND_REG_IMM_FP32: 1450 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1451 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1452 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1453 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1454 return &APFloat::IEEEsingle(); 1455 case AMDGPU::OPERAND_REG_IMM_INT64: 1456 case AMDGPU::OPERAND_REG_IMM_FP64: 1457 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1458 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1459 return &APFloat::IEEEdouble(); 1460 case AMDGPU::OPERAND_REG_IMM_INT16: 1461 case AMDGPU::OPERAND_REG_IMM_FP16: 1462 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1463 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1464 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1465 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1466 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1467 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1468 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1469 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1470 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1471 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1472 return &APFloat::IEEEhalf(); 1473 default: 1474 llvm_unreachable("unsupported fp type"); 1475 } 1476 } 1477 1478 //===----------------------------------------------------------------------===// 1479 // Operand 1480 //===----------------------------------------------------------------------===// 1481 1482 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1483 bool Lost; 1484 1485 // Convert literal to single precision 1486 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1487 APFloat::rmNearestTiesToEven, 1488 &Lost); 1489 // We allow precision lost but not overflow or underflow 1490 if (Status != APFloat::opOK && 1491 Lost && 1492 ((Status & APFloat::opOverflow) != 0 || 1493 (Status & APFloat::opUnderflow) != 0)) { 1494 return false; 1495 } 1496 1497 return true; 1498 } 1499 1500 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1501 return isUIntN(Size, Val) || isIntN(Size, Val); 1502 } 1503 1504 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1505 1506 // This is a hack to enable named inline values like 1507 // shared_base with both 32-bit and 64-bit operands. 1508 // Note that these values are defined as 1509 // 32-bit operands only. 1510 if (isInlineValue()) { 1511 return true; 1512 } 1513 1514 if (!isImmTy(ImmTyNone)) { 1515 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1516 return false; 1517 } 1518 // TODO: We should avoid using host float here. It would be better to 1519 // check the float bit values which is what a few other places do. 1520 // We've had bot failures before due to weird NaN support on mips hosts. 1521 1522 APInt Literal(64, Imm.Val); 1523 1524 if (Imm.IsFPImm) { // We got fp literal token 1525 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1526 return AMDGPU::isInlinableLiteral64(Imm.Val, 1527 AsmParser->hasInv2PiInlineImm()); 1528 } 1529 1530 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1531 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1532 return false; 1533 1534 if (type.getScalarSizeInBits() == 16) { 1535 return AMDGPU::isInlinableLiteral16( 1536 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1537 AsmParser->hasInv2PiInlineImm()); 1538 } 1539 1540 // Check if single precision literal is inlinable 1541 return AMDGPU::isInlinableLiteral32( 1542 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1543 AsmParser->hasInv2PiInlineImm()); 1544 } 1545 1546 // We got int literal token. 1547 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1548 return AMDGPU::isInlinableLiteral64(Imm.Val, 1549 AsmParser->hasInv2PiInlineImm()); 1550 } 1551 1552 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1553 return false; 1554 } 1555 1556 if (type.getScalarSizeInBits() == 16) { 1557 return AMDGPU::isInlinableLiteral16( 1558 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1559 AsmParser->hasInv2PiInlineImm()); 1560 } 1561 1562 return AMDGPU::isInlinableLiteral32( 1563 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1564 AsmParser->hasInv2PiInlineImm()); 1565 } 1566 1567 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1568 // Check that this immediate can be added as literal 1569 if (!isImmTy(ImmTyNone)) { 1570 return false; 1571 } 1572 1573 if (!Imm.IsFPImm) { 1574 // We got int literal token. 1575 1576 if (type == MVT::f64 && hasFPModifiers()) { 1577 // Cannot apply fp modifiers to int literals preserving the same semantics 1578 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1579 // disable these cases. 1580 return false; 1581 } 1582 1583 unsigned Size = type.getSizeInBits(); 1584 if (Size == 64) 1585 Size = 32; 1586 1587 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1588 // types. 1589 return isSafeTruncation(Imm.Val, Size); 1590 } 1591 1592 // We got fp literal token 1593 if (type == MVT::f64) { // Expected 64-bit fp operand 1594 // We would set low 64-bits of literal to zeroes but we accept this literals 1595 return true; 1596 } 1597 1598 if (type == MVT::i64) { // Expected 64-bit int operand 1599 // We don't allow fp literals in 64-bit integer instructions. It is 1600 // unclear how we should encode them. 1601 return false; 1602 } 1603 1604 // We allow fp literals with f16x2 operands assuming that the specified 1605 // literal goes into the lower half and the upper half is zero. We also 1606 // require that the literal may be losslesly converted to f16. 1607 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1608 (type == MVT::v2i16)? MVT::i16 : type; 1609 1610 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1611 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1612 } 1613 1614 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1615 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1616 } 1617 1618 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1619 if (AsmParser->isVI()) 1620 return isVReg32(); 1621 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1622 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1623 else 1624 return false; 1625 } 1626 1627 bool AMDGPUOperand::isSDWAFP16Operand() const { 1628 return isSDWAOperand(MVT::f16); 1629 } 1630 1631 bool AMDGPUOperand::isSDWAFP32Operand() const { 1632 return isSDWAOperand(MVT::f32); 1633 } 1634 1635 bool AMDGPUOperand::isSDWAInt16Operand() const { 1636 return isSDWAOperand(MVT::i16); 1637 } 1638 1639 bool AMDGPUOperand::isSDWAInt32Operand() const { 1640 return isSDWAOperand(MVT::i32); 1641 } 1642 1643 bool AMDGPUOperand::isBoolReg() const { 1644 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1645 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1646 } 1647 1648 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1649 { 1650 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1651 assert(Size == 2 || Size == 4 || Size == 8); 1652 1653 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1654 1655 if (Imm.Mods.Abs) { 1656 Val &= ~FpSignMask; 1657 } 1658 if (Imm.Mods.Neg) { 1659 Val ^= FpSignMask; 1660 } 1661 1662 return Val; 1663 } 1664 1665 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1666 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1667 Inst.getNumOperands())) { 1668 addLiteralImmOperand(Inst, Imm.Val, 1669 ApplyModifiers & 1670 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1671 } else { 1672 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1673 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1674 } 1675 } 1676 1677 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1678 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1679 auto OpNum = Inst.getNumOperands(); 1680 // Check that this operand accepts literals 1681 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1682 1683 if (ApplyModifiers) { 1684 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1685 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1686 Val = applyInputFPModifiers(Val, Size); 1687 } 1688 1689 APInt Literal(64, Val); 1690 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1691 1692 if (Imm.IsFPImm) { // We got fp literal token 1693 switch (OpTy) { 1694 case AMDGPU::OPERAND_REG_IMM_INT64: 1695 case AMDGPU::OPERAND_REG_IMM_FP64: 1696 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1697 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1698 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1699 AsmParser->hasInv2PiInlineImm())) { 1700 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1701 return; 1702 } 1703 1704 // Non-inlineable 1705 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1706 // For fp operands we check if low 32 bits are zeros 1707 if (Literal.getLoBits(32) != 0) { 1708 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1709 "Can't encode literal as exact 64-bit floating-point operand. " 1710 "Low 32-bits will be set to zero"); 1711 } 1712 1713 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1714 return; 1715 } 1716 1717 // We don't allow fp literals in 64-bit integer instructions. It is 1718 // unclear how we should encode them. This case should be checked earlier 1719 // in predicate methods (isLiteralImm()) 1720 llvm_unreachable("fp literal in 64-bit integer instruction."); 1721 1722 case AMDGPU::OPERAND_REG_IMM_INT32: 1723 case AMDGPU::OPERAND_REG_IMM_FP32: 1724 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1725 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1726 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1727 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1728 case AMDGPU::OPERAND_REG_IMM_INT16: 1729 case AMDGPU::OPERAND_REG_IMM_FP16: 1730 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1731 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1732 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1733 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1734 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1735 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1736 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1737 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1738 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1739 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1740 bool lost; 1741 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1742 // Convert literal to single precision 1743 FPLiteral.convert(*getOpFltSemantics(OpTy), 1744 APFloat::rmNearestTiesToEven, &lost); 1745 // We allow precision lost but not overflow or underflow. This should be 1746 // checked earlier in isLiteralImm() 1747 1748 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1749 Inst.addOperand(MCOperand::createImm(ImmVal)); 1750 return; 1751 } 1752 default: 1753 llvm_unreachable("invalid operand size"); 1754 } 1755 1756 return; 1757 } 1758 1759 // We got int literal token. 1760 // Only sign extend inline immediates. 1761 switch (OpTy) { 1762 case AMDGPU::OPERAND_REG_IMM_INT32: 1763 case AMDGPU::OPERAND_REG_IMM_FP32: 1764 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1765 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1766 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1767 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1768 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1769 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1770 if (isSafeTruncation(Val, 32) && 1771 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1772 AsmParser->hasInv2PiInlineImm())) { 1773 Inst.addOperand(MCOperand::createImm(Val)); 1774 return; 1775 } 1776 1777 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1778 return; 1779 1780 case AMDGPU::OPERAND_REG_IMM_INT64: 1781 case AMDGPU::OPERAND_REG_IMM_FP64: 1782 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1783 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1784 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1785 Inst.addOperand(MCOperand::createImm(Val)); 1786 return; 1787 } 1788 1789 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1790 return; 1791 1792 case AMDGPU::OPERAND_REG_IMM_INT16: 1793 case AMDGPU::OPERAND_REG_IMM_FP16: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1796 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1797 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1798 if (isSafeTruncation(Val, 16) && 1799 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1800 AsmParser->hasInv2PiInlineImm())) { 1801 Inst.addOperand(MCOperand::createImm(Val)); 1802 return; 1803 } 1804 1805 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1806 return; 1807 1808 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1809 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1810 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1811 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1812 assert(isSafeTruncation(Val, 16)); 1813 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1814 AsmParser->hasInv2PiInlineImm())); 1815 1816 Inst.addOperand(MCOperand::createImm(Val)); 1817 return; 1818 } 1819 default: 1820 llvm_unreachable("invalid operand size"); 1821 } 1822 } 1823 1824 template <unsigned Bitwidth> 1825 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1826 APInt Literal(64, Imm.Val); 1827 1828 if (!Imm.IsFPImm) { 1829 // We got int literal token. 1830 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1831 return; 1832 } 1833 1834 bool Lost; 1835 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1836 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1837 APFloat::rmNearestTiesToEven, &Lost); 1838 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1839 } 1840 1841 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1842 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1843 } 1844 1845 static bool isInlineValue(unsigned Reg) { 1846 switch (Reg) { 1847 case AMDGPU::SRC_SHARED_BASE: 1848 case AMDGPU::SRC_SHARED_LIMIT: 1849 case AMDGPU::SRC_PRIVATE_BASE: 1850 case AMDGPU::SRC_PRIVATE_LIMIT: 1851 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1852 return true; 1853 case AMDGPU::SRC_VCCZ: 1854 case AMDGPU::SRC_EXECZ: 1855 case AMDGPU::SRC_SCC: 1856 return true; 1857 case AMDGPU::SGPR_NULL: 1858 return true; 1859 default: 1860 return false; 1861 } 1862 } 1863 1864 bool AMDGPUOperand::isInlineValue() const { 1865 return isRegKind() && ::isInlineValue(getReg()); 1866 } 1867 1868 //===----------------------------------------------------------------------===// 1869 // AsmParser 1870 //===----------------------------------------------------------------------===// 1871 1872 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1873 if (Is == IS_VGPR) { 1874 switch (RegWidth) { 1875 default: return -1; 1876 case 1: return AMDGPU::VGPR_32RegClassID; 1877 case 2: return AMDGPU::VReg_64RegClassID; 1878 case 3: return AMDGPU::VReg_96RegClassID; 1879 case 4: return AMDGPU::VReg_128RegClassID; 1880 case 5: return AMDGPU::VReg_160RegClassID; 1881 case 8: return AMDGPU::VReg_256RegClassID; 1882 case 16: return AMDGPU::VReg_512RegClassID; 1883 case 32: return AMDGPU::VReg_1024RegClassID; 1884 } 1885 } else if (Is == IS_TTMP) { 1886 switch (RegWidth) { 1887 default: return -1; 1888 case 1: return AMDGPU::TTMP_32RegClassID; 1889 case 2: return AMDGPU::TTMP_64RegClassID; 1890 case 4: return AMDGPU::TTMP_128RegClassID; 1891 case 8: return AMDGPU::TTMP_256RegClassID; 1892 case 16: return AMDGPU::TTMP_512RegClassID; 1893 } 1894 } else if (Is == IS_SGPR) { 1895 switch (RegWidth) { 1896 default: return -1; 1897 case 1: return AMDGPU::SGPR_32RegClassID; 1898 case 2: return AMDGPU::SGPR_64RegClassID; 1899 case 4: return AMDGPU::SGPR_128RegClassID; 1900 case 8: return AMDGPU::SGPR_256RegClassID; 1901 case 16: return AMDGPU::SGPR_512RegClassID; 1902 } 1903 } else if (Is == IS_AGPR) { 1904 switch (RegWidth) { 1905 default: return -1; 1906 case 1: return AMDGPU::AGPR_32RegClassID; 1907 case 2: return AMDGPU::AReg_64RegClassID; 1908 case 4: return AMDGPU::AReg_128RegClassID; 1909 case 16: return AMDGPU::AReg_512RegClassID; 1910 case 32: return AMDGPU::AReg_1024RegClassID; 1911 } 1912 } 1913 return -1; 1914 } 1915 1916 static unsigned getSpecialRegForName(StringRef RegName) { 1917 return StringSwitch<unsigned>(RegName) 1918 .Case("exec", AMDGPU::EXEC) 1919 .Case("vcc", AMDGPU::VCC) 1920 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1921 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1922 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1923 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1924 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1925 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1926 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1927 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1928 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1929 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1930 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1931 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1932 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1933 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1934 .Case("m0", AMDGPU::M0) 1935 .Case("vccz", AMDGPU::SRC_VCCZ) 1936 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1937 .Case("execz", AMDGPU::SRC_EXECZ) 1938 .Case("src_execz", AMDGPU::SRC_EXECZ) 1939 .Case("scc", AMDGPU::SRC_SCC) 1940 .Case("src_scc", AMDGPU::SRC_SCC) 1941 .Case("tba", AMDGPU::TBA) 1942 .Case("tma", AMDGPU::TMA) 1943 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1944 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1945 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1946 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1947 .Case("vcc_lo", AMDGPU::VCC_LO) 1948 .Case("vcc_hi", AMDGPU::VCC_HI) 1949 .Case("exec_lo", AMDGPU::EXEC_LO) 1950 .Case("exec_hi", AMDGPU::EXEC_HI) 1951 .Case("tma_lo", AMDGPU::TMA_LO) 1952 .Case("tma_hi", AMDGPU::TMA_HI) 1953 .Case("tba_lo", AMDGPU::TBA_LO) 1954 .Case("tba_hi", AMDGPU::TBA_HI) 1955 .Case("null", AMDGPU::SGPR_NULL) 1956 .Default(0); 1957 } 1958 1959 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1960 SMLoc &EndLoc) { 1961 auto R = parseRegister(); 1962 if (!R) return true; 1963 assert(R->isReg()); 1964 RegNo = R->getReg(); 1965 StartLoc = R->getStartLoc(); 1966 EndLoc = R->getEndLoc(); 1967 return false; 1968 } 1969 1970 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1971 RegisterKind RegKind, unsigned Reg1, 1972 unsigned RegNum) { 1973 switch (RegKind) { 1974 case IS_SPECIAL: 1975 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1976 Reg = AMDGPU::EXEC; 1977 RegWidth = 2; 1978 return true; 1979 } 1980 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1981 Reg = AMDGPU::FLAT_SCR; 1982 RegWidth = 2; 1983 return true; 1984 } 1985 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1986 Reg = AMDGPU::XNACK_MASK; 1987 RegWidth = 2; 1988 return true; 1989 } 1990 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 1991 Reg = AMDGPU::VCC; 1992 RegWidth = 2; 1993 return true; 1994 } 1995 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 1996 Reg = AMDGPU::TBA; 1997 RegWidth = 2; 1998 return true; 1999 } 2000 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2001 Reg = AMDGPU::TMA; 2002 RegWidth = 2; 2003 return true; 2004 } 2005 return false; 2006 case IS_VGPR: 2007 case IS_SGPR: 2008 case IS_AGPR: 2009 case IS_TTMP: 2010 if (Reg1 != Reg + RegWidth) { 2011 return false; 2012 } 2013 RegWidth++; 2014 return true; 2015 default: 2016 llvm_unreachable("unexpected register kind"); 2017 } 2018 } 2019 2020 static const StringRef Registers[] = { 2021 { "v" }, 2022 { "s" }, 2023 { "ttmp" }, 2024 { "acc" }, 2025 { "a" }, 2026 }; 2027 2028 bool 2029 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2030 const AsmToken &NextToken) const { 2031 2032 // A list of consecutive registers: [s0,s1,s2,s3] 2033 if (Token.is(AsmToken::LBrac)) 2034 return true; 2035 2036 if (!Token.is(AsmToken::Identifier)) 2037 return false; 2038 2039 // A single register like s0 or a range of registers like s[0:1] 2040 2041 StringRef RegName = Token.getString(); 2042 2043 for (StringRef Reg : Registers) { 2044 if (RegName.startswith(Reg)) { 2045 if (Reg.size() < RegName.size()) { 2046 unsigned RegNum; 2047 // A single register with an index: rXX 2048 if (!RegName.substr(Reg.size()).getAsInteger(10, RegNum)) 2049 return true; 2050 } else { 2051 // A range of registers: r[XX:YY]. 2052 if (NextToken.is(AsmToken::LBrac)) 2053 return true; 2054 } 2055 } 2056 } 2057 2058 return getSpecialRegForName(RegName); 2059 } 2060 2061 bool 2062 AMDGPUAsmParser::isRegister() 2063 { 2064 return isRegister(getToken(), peekToken()); 2065 } 2066 2067 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg, 2068 unsigned &RegNum, unsigned &RegWidth, 2069 unsigned *DwordRegIndex) { 2070 if (DwordRegIndex) { *DwordRegIndex = 0; } 2071 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2072 if (getLexer().is(AsmToken::Identifier)) { 2073 StringRef RegName = Parser.getTok().getString(); 2074 if ((Reg = getSpecialRegForName(RegName))) { 2075 Parser.Lex(); 2076 RegKind = IS_SPECIAL; 2077 } else { 2078 unsigned RegNumIndex = 0; 2079 if (RegName[0] == 'v') { 2080 RegNumIndex = 1; 2081 RegKind = IS_VGPR; 2082 } else if (RegName[0] == 's') { 2083 RegNumIndex = 1; 2084 RegKind = IS_SGPR; 2085 } else if (RegName[0] == 'a') { 2086 RegNumIndex = RegName.startswith("acc") ? 3 : 1; 2087 RegKind = IS_AGPR; 2088 } else if (RegName.startswith("ttmp")) { 2089 RegNumIndex = strlen("ttmp"); 2090 RegKind = IS_TTMP; 2091 } else { 2092 return false; 2093 } 2094 if (RegName.size() > RegNumIndex) { 2095 // Single 32-bit register: vXX. 2096 if (RegName.substr(RegNumIndex).getAsInteger(10, RegNum)) 2097 return false; 2098 Parser.Lex(); 2099 RegWidth = 1; 2100 } else { 2101 // Range of registers: v[XX:YY]. ":YY" is optional. 2102 Parser.Lex(); 2103 int64_t RegLo, RegHi; 2104 if (getLexer().isNot(AsmToken::LBrac)) 2105 return false; 2106 Parser.Lex(); 2107 2108 if (getParser().parseAbsoluteExpression(RegLo)) 2109 return false; 2110 2111 const bool isRBrace = getLexer().is(AsmToken::RBrac); 2112 if (!isRBrace && getLexer().isNot(AsmToken::Colon)) 2113 return false; 2114 Parser.Lex(); 2115 2116 if (isRBrace) { 2117 RegHi = RegLo; 2118 } else { 2119 if (getParser().parseAbsoluteExpression(RegHi)) 2120 return false; 2121 2122 if (getLexer().isNot(AsmToken::RBrac)) 2123 return false; 2124 Parser.Lex(); 2125 } 2126 RegNum = (unsigned) RegLo; 2127 RegWidth = (RegHi - RegLo) + 1; 2128 } 2129 } 2130 } else if (getLexer().is(AsmToken::LBrac)) { 2131 // List of consecutive registers: [s0,s1,s2,s3] 2132 Parser.Lex(); 2133 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, nullptr)) 2134 return false; 2135 if (RegWidth != 1) 2136 return false; 2137 RegisterKind RegKind1; 2138 unsigned Reg1, RegNum1, RegWidth1; 2139 do { 2140 if (getLexer().is(AsmToken::Comma)) { 2141 Parser.Lex(); 2142 } else if (getLexer().is(AsmToken::RBrac)) { 2143 Parser.Lex(); 2144 break; 2145 } else if (ParseAMDGPURegister(RegKind1, Reg1, RegNum1, RegWidth1, nullptr)) { 2146 if (RegWidth1 != 1) { 2147 return false; 2148 } 2149 if (RegKind1 != RegKind) { 2150 return false; 2151 } 2152 if (!AddNextRegisterToList(Reg, RegWidth, RegKind1, Reg1, RegNum1)) { 2153 return false; 2154 } 2155 } else { 2156 return false; 2157 } 2158 } while (true); 2159 } else { 2160 return false; 2161 } 2162 switch (RegKind) { 2163 case IS_SPECIAL: 2164 RegNum = 0; 2165 RegWidth = 1; 2166 break; 2167 case IS_VGPR: 2168 case IS_SGPR: 2169 case IS_AGPR: 2170 case IS_TTMP: 2171 { 2172 unsigned Size = 1; 2173 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2174 // SGPR and TTMP registers must be aligned. Max required alignment is 4 dwords. 2175 Size = std::min(RegWidth, 4u); 2176 } 2177 if (RegNum % Size != 0) 2178 return false; 2179 if (DwordRegIndex) { *DwordRegIndex = RegNum; } 2180 RegNum = RegNum / Size; 2181 int RCID = getRegClass(RegKind, RegWidth); 2182 if (RCID == -1) 2183 return false; 2184 const MCRegisterClass RC = TRI->getRegClass(RCID); 2185 if (RegNum >= RC.getNumRegs()) 2186 return false; 2187 Reg = RC.getRegister(RegNum); 2188 break; 2189 } 2190 2191 default: 2192 llvm_unreachable("unexpected register kind"); 2193 } 2194 2195 if (!subtargetHasRegister(*TRI, Reg)) 2196 return false; 2197 return true; 2198 } 2199 2200 Optional<StringRef> 2201 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2202 switch (RegKind) { 2203 case IS_VGPR: 2204 return StringRef(".amdgcn.next_free_vgpr"); 2205 case IS_SGPR: 2206 return StringRef(".amdgcn.next_free_sgpr"); 2207 default: 2208 return None; 2209 } 2210 } 2211 2212 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2213 auto SymbolName = getGprCountSymbolName(RegKind); 2214 assert(SymbolName && "initializing invalid register kind"); 2215 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2216 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2217 } 2218 2219 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2220 unsigned DwordRegIndex, 2221 unsigned RegWidth) { 2222 // Symbols are only defined for GCN targets 2223 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2224 return true; 2225 2226 auto SymbolName = getGprCountSymbolName(RegKind); 2227 if (!SymbolName) 2228 return true; 2229 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2230 2231 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2232 int64_t OldCount; 2233 2234 if (!Sym->isVariable()) 2235 return !Error(getParser().getTok().getLoc(), 2236 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2237 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2238 return !Error( 2239 getParser().getTok().getLoc(), 2240 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2241 2242 if (OldCount <= NewMax) 2243 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2244 2245 return true; 2246 } 2247 2248 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2249 const auto &Tok = Parser.getTok(); 2250 SMLoc StartLoc = Tok.getLoc(); 2251 SMLoc EndLoc = Tok.getEndLoc(); 2252 RegisterKind RegKind; 2253 unsigned Reg, RegNum, RegWidth, DwordRegIndex; 2254 2255 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, &DwordRegIndex)) { 2256 //FIXME: improve error messages (bug 41303). 2257 Error(StartLoc, "not a valid operand."); 2258 return nullptr; 2259 } 2260 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2261 if (!updateGprCountSymbols(RegKind, DwordRegIndex, RegWidth)) 2262 return nullptr; 2263 } else 2264 KernelScope.usesRegister(RegKind, DwordRegIndex, RegWidth); 2265 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2266 } 2267 2268 OperandMatchResultTy 2269 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2270 // TODO: add syntactic sugar for 1/(2*PI) 2271 2272 assert(!isRegister()); 2273 assert(!isModifier()); 2274 2275 const auto& Tok = getToken(); 2276 const auto& NextTok = peekToken(); 2277 bool IsReal = Tok.is(AsmToken::Real); 2278 SMLoc S = getLoc(); 2279 bool Negate = false; 2280 2281 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2282 lex(); 2283 IsReal = true; 2284 Negate = true; 2285 } 2286 2287 if (IsReal) { 2288 // Floating-point expressions are not supported. 2289 // Can only allow floating-point literals with an 2290 // optional sign. 2291 2292 StringRef Num = getTokenStr(); 2293 lex(); 2294 2295 APFloat RealVal(APFloat::IEEEdouble()); 2296 auto roundMode = APFloat::rmNearestTiesToEven; 2297 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2298 return MatchOperand_ParseFail; 2299 } 2300 if (Negate) 2301 RealVal.changeSign(); 2302 2303 Operands.push_back( 2304 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2305 AMDGPUOperand::ImmTyNone, true)); 2306 2307 return MatchOperand_Success; 2308 2309 } else { 2310 int64_t IntVal; 2311 const MCExpr *Expr; 2312 SMLoc S = getLoc(); 2313 2314 if (HasSP3AbsModifier) { 2315 // This is a workaround for handling expressions 2316 // as arguments of SP3 'abs' modifier, for example: 2317 // |1.0| 2318 // |-1| 2319 // |1+x| 2320 // This syntax is not compatible with syntax of standard 2321 // MC expressions (due to the trailing '|'). 2322 SMLoc EndLoc; 2323 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2324 return MatchOperand_ParseFail; 2325 } else { 2326 if (Parser.parseExpression(Expr)) 2327 return MatchOperand_ParseFail; 2328 } 2329 2330 if (Expr->evaluateAsAbsolute(IntVal)) { 2331 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2332 } else { 2333 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2334 } 2335 2336 return MatchOperand_Success; 2337 } 2338 2339 return MatchOperand_NoMatch; 2340 } 2341 2342 OperandMatchResultTy 2343 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2344 if (!isRegister()) 2345 return MatchOperand_NoMatch; 2346 2347 if (auto R = parseRegister()) { 2348 assert(R->isReg()); 2349 Operands.push_back(std::move(R)); 2350 return MatchOperand_Success; 2351 } 2352 return MatchOperand_ParseFail; 2353 } 2354 2355 OperandMatchResultTy 2356 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2357 auto res = parseReg(Operands); 2358 if (res != MatchOperand_NoMatch) { 2359 return res; 2360 } else if (isModifier()) { 2361 return MatchOperand_NoMatch; 2362 } else { 2363 return parseImm(Operands, HasSP3AbsMod); 2364 } 2365 } 2366 2367 bool 2368 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2369 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2370 const auto &str = Token.getString(); 2371 return str == "abs" || str == "neg" || str == "sext"; 2372 } 2373 return false; 2374 } 2375 2376 bool 2377 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2378 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2379 } 2380 2381 bool 2382 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2383 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2384 } 2385 2386 bool 2387 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2388 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2389 } 2390 2391 // Check if this is an operand modifier or an opcode modifier 2392 // which may look like an expression but it is not. We should 2393 // avoid parsing these modifiers as expressions. Currently 2394 // recognized sequences are: 2395 // |...| 2396 // abs(...) 2397 // neg(...) 2398 // sext(...) 2399 // -reg 2400 // -|...| 2401 // -abs(...) 2402 // name:... 2403 // Note that simple opcode modifiers like 'gds' may be parsed as 2404 // expressions; this is a special case. See getExpressionAsToken. 2405 // 2406 bool 2407 AMDGPUAsmParser::isModifier() { 2408 2409 AsmToken Tok = getToken(); 2410 AsmToken NextToken[2]; 2411 peekTokens(NextToken); 2412 2413 return isOperandModifier(Tok, NextToken[0]) || 2414 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2415 isOpcodeModifierWithVal(Tok, NextToken[0]); 2416 } 2417 2418 // Check if the current token is an SP3 'neg' modifier. 2419 // Currently this modifier is allowed in the following context: 2420 // 2421 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2422 // 2. Before an 'abs' modifier: -abs(...) 2423 // 3. Before an SP3 'abs' modifier: -|...| 2424 // 2425 // In all other cases "-" is handled as a part 2426 // of an expression that follows the sign. 2427 // 2428 // Note: When "-" is followed by an integer literal, 2429 // this is interpreted as integer negation rather 2430 // than a floating-point NEG modifier applied to N. 2431 // Beside being contr-intuitive, such use of floating-point 2432 // NEG modifier would have resulted in different meaning 2433 // of integer literals used with VOP1/2/C and VOP3, 2434 // for example: 2435 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2436 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2437 // Negative fp literals with preceding "-" are 2438 // handled likewise for unifomtity 2439 // 2440 bool 2441 AMDGPUAsmParser::parseSP3NegModifier() { 2442 2443 AsmToken NextToken[2]; 2444 peekTokens(NextToken); 2445 2446 if (isToken(AsmToken::Minus) && 2447 (isRegister(NextToken[0], NextToken[1]) || 2448 NextToken[0].is(AsmToken::Pipe) || 2449 isId(NextToken[0], "abs"))) { 2450 lex(); 2451 return true; 2452 } 2453 2454 return false; 2455 } 2456 2457 OperandMatchResultTy 2458 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2459 bool AllowImm) { 2460 bool Neg, SP3Neg; 2461 bool Abs, SP3Abs; 2462 SMLoc Loc; 2463 2464 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2465 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2466 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2467 return MatchOperand_ParseFail; 2468 } 2469 2470 SP3Neg = parseSP3NegModifier(); 2471 2472 Loc = getLoc(); 2473 Neg = trySkipId("neg"); 2474 if (Neg && SP3Neg) { 2475 Error(Loc, "expected register or immediate"); 2476 return MatchOperand_ParseFail; 2477 } 2478 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2479 return MatchOperand_ParseFail; 2480 2481 Abs = trySkipId("abs"); 2482 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2483 return MatchOperand_ParseFail; 2484 2485 Loc = getLoc(); 2486 SP3Abs = trySkipToken(AsmToken::Pipe); 2487 if (Abs && SP3Abs) { 2488 Error(Loc, "expected register or immediate"); 2489 return MatchOperand_ParseFail; 2490 } 2491 2492 OperandMatchResultTy Res; 2493 if (AllowImm) { 2494 Res = parseRegOrImm(Operands, SP3Abs); 2495 } else { 2496 Res = parseReg(Operands); 2497 } 2498 if (Res != MatchOperand_Success) { 2499 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2500 } 2501 2502 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2503 return MatchOperand_ParseFail; 2504 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2505 return MatchOperand_ParseFail; 2506 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2507 return MatchOperand_ParseFail; 2508 2509 AMDGPUOperand::Modifiers Mods; 2510 Mods.Abs = Abs || SP3Abs; 2511 Mods.Neg = Neg || SP3Neg; 2512 2513 if (Mods.hasFPModifiers()) { 2514 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2515 if (Op.isExpr()) { 2516 Error(Op.getStartLoc(), "expected an absolute expression"); 2517 return MatchOperand_ParseFail; 2518 } 2519 Op.setModifiers(Mods); 2520 } 2521 return MatchOperand_Success; 2522 } 2523 2524 OperandMatchResultTy 2525 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2526 bool AllowImm) { 2527 bool Sext = trySkipId("sext"); 2528 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2529 return MatchOperand_ParseFail; 2530 2531 OperandMatchResultTy Res; 2532 if (AllowImm) { 2533 Res = parseRegOrImm(Operands); 2534 } else { 2535 Res = parseReg(Operands); 2536 } 2537 if (Res != MatchOperand_Success) { 2538 return Sext? MatchOperand_ParseFail : Res; 2539 } 2540 2541 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2542 return MatchOperand_ParseFail; 2543 2544 AMDGPUOperand::Modifiers Mods; 2545 Mods.Sext = Sext; 2546 2547 if (Mods.hasIntModifiers()) { 2548 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2549 if (Op.isExpr()) { 2550 Error(Op.getStartLoc(), "expected an absolute expression"); 2551 return MatchOperand_ParseFail; 2552 } 2553 Op.setModifiers(Mods); 2554 } 2555 2556 return MatchOperand_Success; 2557 } 2558 2559 OperandMatchResultTy 2560 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2561 return parseRegOrImmWithFPInputMods(Operands, false); 2562 } 2563 2564 OperandMatchResultTy 2565 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2566 return parseRegOrImmWithIntInputMods(Operands, false); 2567 } 2568 2569 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2570 auto Loc = getLoc(); 2571 if (trySkipId("off")) { 2572 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2573 AMDGPUOperand::ImmTyOff, false)); 2574 return MatchOperand_Success; 2575 } 2576 2577 if (!isRegister()) 2578 return MatchOperand_NoMatch; 2579 2580 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2581 if (Reg) { 2582 Operands.push_back(std::move(Reg)); 2583 return MatchOperand_Success; 2584 } 2585 2586 return MatchOperand_ParseFail; 2587 2588 } 2589 2590 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2591 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2592 2593 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2594 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2595 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2596 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2597 return Match_InvalidOperand; 2598 2599 if ((TSFlags & SIInstrFlags::VOP3) && 2600 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2601 getForcedEncodingSize() != 64) 2602 return Match_PreferE32; 2603 2604 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2605 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2606 // v_mac_f32/16 allow only dst_sel == DWORD; 2607 auto OpNum = 2608 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2609 const auto &Op = Inst.getOperand(OpNum); 2610 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2611 return Match_InvalidOperand; 2612 } 2613 } 2614 2615 return Match_Success; 2616 } 2617 2618 // What asm variants we should check 2619 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2620 if (getForcedEncodingSize() == 32) { 2621 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2622 return makeArrayRef(Variants); 2623 } 2624 2625 if (isForcedVOP3()) { 2626 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2627 return makeArrayRef(Variants); 2628 } 2629 2630 if (isForcedSDWA()) { 2631 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2632 AMDGPUAsmVariants::SDWA9}; 2633 return makeArrayRef(Variants); 2634 } 2635 2636 if (isForcedDPP()) { 2637 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2638 return makeArrayRef(Variants); 2639 } 2640 2641 static const unsigned Variants[] = { 2642 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2643 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2644 }; 2645 2646 return makeArrayRef(Variants); 2647 } 2648 2649 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2650 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2651 const unsigned Num = Desc.getNumImplicitUses(); 2652 for (unsigned i = 0; i < Num; ++i) { 2653 unsigned Reg = Desc.ImplicitUses[i]; 2654 switch (Reg) { 2655 case AMDGPU::FLAT_SCR: 2656 case AMDGPU::VCC: 2657 case AMDGPU::VCC_LO: 2658 case AMDGPU::VCC_HI: 2659 case AMDGPU::M0: 2660 return Reg; 2661 default: 2662 break; 2663 } 2664 } 2665 return AMDGPU::NoRegister; 2666 } 2667 2668 // NB: This code is correct only when used to check constant 2669 // bus limitations because GFX7 support no f16 inline constants. 2670 // Note that there are no cases when a GFX7 opcode violates 2671 // constant bus limitations due to the use of an f16 constant. 2672 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2673 unsigned OpIdx) const { 2674 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2675 2676 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2677 return false; 2678 } 2679 2680 const MCOperand &MO = Inst.getOperand(OpIdx); 2681 2682 int64_t Val = MO.getImm(); 2683 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2684 2685 switch (OpSize) { // expected operand size 2686 case 8: 2687 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2688 case 4: 2689 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2690 case 2: { 2691 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2692 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2693 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2694 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2695 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2696 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2697 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2698 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2699 } else { 2700 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2701 } 2702 } 2703 default: 2704 llvm_unreachable("invalid operand size"); 2705 } 2706 } 2707 2708 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2709 if (!isGFX10()) 2710 return 1; 2711 2712 switch (Opcode) { 2713 // 64-bit shift instructions can use only one scalar value input 2714 case AMDGPU::V_LSHLREV_B64: 2715 case AMDGPU::V_LSHLREV_B64_gfx10: 2716 case AMDGPU::V_LSHL_B64: 2717 case AMDGPU::V_LSHRREV_B64: 2718 case AMDGPU::V_LSHRREV_B64_gfx10: 2719 case AMDGPU::V_LSHR_B64: 2720 case AMDGPU::V_ASHRREV_I64: 2721 case AMDGPU::V_ASHRREV_I64_gfx10: 2722 case AMDGPU::V_ASHR_I64: 2723 return 1; 2724 default: 2725 return 2; 2726 } 2727 } 2728 2729 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2730 const MCOperand &MO = Inst.getOperand(OpIdx); 2731 if (MO.isImm()) { 2732 return !isInlineConstant(Inst, OpIdx); 2733 } else if (MO.isReg()) { 2734 auto Reg = MO.getReg(); 2735 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2736 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2737 } else { 2738 return true; 2739 } 2740 } 2741 2742 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2743 const unsigned Opcode = Inst.getOpcode(); 2744 const MCInstrDesc &Desc = MII.get(Opcode); 2745 unsigned ConstantBusUseCount = 0; 2746 unsigned NumLiterals = 0; 2747 unsigned LiteralSize; 2748 2749 if (Desc.TSFlags & 2750 (SIInstrFlags::VOPC | 2751 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2752 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2753 SIInstrFlags::SDWA)) { 2754 // Check special imm operands (used by madmk, etc) 2755 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2756 ++ConstantBusUseCount; 2757 } 2758 2759 SmallDenseSet<unsigned> SGPRsUsed; 2760 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2761 if (SGPRUsed != AMDGPU::NoRegister) { 2762 SGPRsUsed.insert(SGPRUsed); 2763 ++ConstantBusUseCount; 2764 } 2765 2766 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2767 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2768 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2769 2770 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2771 2772 for (int OpIdx : OpIndices) { 2773 if (OpIdx == -1) break; 2774 2775 const MCOperand &MO = Inst.getOperand(OpIdx); 2776 if (usesConstantBus(Inst, OpIdx)) { 2777 if (MO.isReg()) { 2778 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2779 // Pairs of registers with a partial intersections like these 2780 // s0, s[0:1] 2781 // flat_scratch_lo, flat_scratch 2782 // flat_scratch_lo, flat_scratch_hi 2783 // are theoretically valid but they are disabled anyway. 2784 // Note that this code mimics SIInstrInfo::verifyInstruction 2785 if (!SGPRsUsed.count(Reg)) { 2786 SGPRsUsed.insert(Reg); 2787 ++ConstantBusUseCount; 2788 } 2789 } else { // Expression or a literal 2790 2791 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2792 continue; // special operand like VINTERP attr_chan 2793 2794 // An instruction may use only one literal. 2795 // This has been validated on the previous step. 2796 // See validateVOP3Literal. 2797 // This literal may be used as more than one operand. 2798 // If all these operands are of the same size, 2799 // this literal counts as one scalar value. 2800 // Otherwise it counts as 2 scalar values. 2801 // See "GFX10 Shader Programming", section 3.6.2.3. 2802 2803 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2804 if (Size < 4) Size = 4; 2805 2806 if (NumLiterals == 0) { 2807 NumLiterals = 1; 2808 LiteralSize = Size; 2809 } else if (LiteralSize != Size) { 2810 NumLiterals = 2; 2811 } 2812 } 2813 } 2814 } 2815 } 2816 ConstantBusUseCount += NumLiterals; 2817 2818 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2819 } 2820 2821 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2822 const unsigned Opcode = Inst.getOpcode(); 2823 const MCInstrDesc &Desc = MII.get(Opcode); 2824 2825 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2826 if (DstIdx == -1 || 2827 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2828 return true; 2829 } 2830 2831 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2832 2833 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2834 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2835 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2836 2837 assert(DstIdx != -1); 2838 const MCOperand &Dst = Inst.getOperand(DstIdx); 2839 assert(Dst.isReg()); 2840 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2841 2842 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2843 2844 for (int SrcIdx : SrcIndices) { 2845 if (SrcIdx == -1) break; 2846 const MCOperand &Src = Inst.getOperand(SrcIdx); 2847 if (Src.isReg()) { 2848 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2849 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2850 return false; 2851 } 2852 } 2853 } 2854 2855 return true; 2856 } 2857 2858 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2859 2860 const unsigned Opc = Inst.getOpcode(); 2861 const MCInstrDesc &Desc = MII.get(Opc); 2862 2863 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2864 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2865 assert(ClampIdx != -1); 2866 return Inst.getOperand(ClampIdx).getImm() == 0; 2867 } 2868 2869 return true; 2870 } 2871 2872 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2873 2874 const unsigned Opc = Inst.getOpcode(); 2875 const MCInstrDesc &Desc = MII.get(Opc); 2876 2877 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2878 return true; 2879 2880 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2881 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2882 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2883 2884 assert(VDataIdx != -1); 2885 assert(DMaskIdx != -1); 2886 assert(TFEIdx != -1); 2887 2888 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2889 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2890 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2891 if (DMask == 0) 2892 DMask = 1; 2893 2894 unsigned DataSize = 2895 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2896 if (hasPackedD16()) { 2897 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2898 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2899 DataSize = (DataSize + 1) / 2; 2900 } 2901 2902 return (VDataSize / 4) == DataSize + TFESize; 2903 } 2904 2905 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2906 const unsigned Opc = Inst.getOpcode(); 2907 const MCInstrDesc &Desc = MII.get(Opc); 2908 2909 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2910 return true; 2911 2912 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2913 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2914 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2915 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2916 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2917 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2918 2919 assert(VAddr0Idx != -1); 2920 assert(SrsrcIdx != -1); 2921 assert(DimIdx != -1); 2922 assert(SrsrcIdx > VAddr0Idx); 2923 2924 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2925 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2926 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2927 unsigned VAddrSize = 2928 IsNSA ? SrsrcIdx - VAddr0Idx 2929 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2930 2931 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2932 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2933 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2934 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2935 if (!IsNSA) { 2936 if (AddrSize > 8) 2937 AddrSize = 16; 2938 else if (AddrSize > 4) 2939 AddrSize = 8; 2940 } 2941 2942 return VAddrSize == AddrSize; 2943 } 2944 2945 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 2946 2947 const unsigned Opc = Inst.getOpcode(); 2948 const MCInstrDesc &Desc = MII.get(Opc); 2949 2950 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2951 return true; 2952 if (!Desc.mayLoad() || !Desc.mayStore()) 2953 return true; // Not atomic 2954 2955 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2956 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2957 2958 // This is an incomplete check because image_atomic_cmpswap 2959 // may only use 0x3 and 0xf while other atomic operations 2960 // may use 0x1 and 0x3. However these limitations are 2961 // verified when we check that dmask matches dst size. 2962 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 2963 } 2964 2965 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 2966 2967 const unsigned Opc = Inst.getOpcode(); 2968 const MCInstrDesc &Desc = MII.get(Opc); 2969 2970 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 2971 return true; 2972 2973 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2974 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2975 2976 // GATHER4 instructions use dmask in a different fashion compared to 2977 // other MIMG instructions. The only useful DMASK values are 2978 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 2979 // (red,red,red,red) etc.) The ISA document doesn't mention 2980 // this. 2981 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 2982 } 2983 2984 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 2985 2986 const unsigned Opc = Inst.getOpcode(); 2987 const MCInstrDesc &Desc = MII.get(Opc); 2988 2989 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2990 return true; 2991 2992 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2993 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 2994 if (isCI() || isSI()) 2995 return false; 2996 } 2997 2998 return true; 2999 } 3000 3001 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3002 const unsigned Opc = Inst.getOpcode(); 3003 const MCInstrDesc &Desc = MII.get(Opc); 3004 3005 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3006 return true; 3007 3008 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3009 if (DimIdx < 0) 3010 return true; 3011 3012 long Imm = Inst.getOperand(DimIdx).getImm(); 3013 if (Imm < 0 || Imm >= 8) 3014 return false; 3015 3016 return true; 3017 } 3018 3019 static bool IsRevOpcode(const unsigned Opcode) 3020 { 3021 switch (Opcode) { 3022 case AMDGPU::V_SUBREV_F32_e32: 3023 case AMDGPU::V_SUBREV_F32_e64: 3024 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3025 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3026 case AMDGPU::V_SUBREV_F32_e32_vi: 3027 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3028 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3029 case AMDGPU::V_SUBREV_F32_e64_vi: 3030 3031 case AMDGPU::V_SUBREV_I32_e32: 3032 case AMDGPU::V_SUBREV_I32_e64: 3033 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3034 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3035 3036 case AMDGPU::V_SUBBREV_U32_e32: 3037 case AMDGPU::V_SUBBREV_U32_e64: 3038 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3039 case AMDGPU::V_SUBBREV_U32_e32_vi: 3040 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3041 case AMDGPU::V_SUBBREV_U32_e64_vi: 3042 3043 case AMDGPU::V_SUBREV_U32_e32: 3044 case AMDGPU::V_SUBREV_U32_e64: 3045 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3046 case AMDGPU::V_SUBREV_U32_e32_vi: 3047 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3048 case AMDGPU::V_SUBREV_U32_e64_vi: 3049 3050 case AMDGPU::V_SUBREV_F16_e32: 3051 case AMDGPU::V_SUBREV_F16_e64: 3052 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3053 case AMDGPU::V_SUBREV_F16_e32_vi: 3054 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3055 case AMDGPU::V_SUBREV_F16_e64_vi: 3056 3057 case AMDGPU::V_SUBREV_U16_e32: 3058 case AMDGPU::V_SUBREV_U16_e64: 3059 case AMDGPU::V_SUBREV_U16_e32_vi: 3060 case AMDGPU::V_SUBREV_U16_e64_vi: 3061 3062 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3063 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3064 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3065 3066 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3067 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3068 3069 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3070 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3071 3072 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3073 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3074 3075 case AMDGPU::V_LSHRREV_B32_e32: 3076 case AMDGPU::V_LSHRREV_B32_e64: 3077 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3078 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3079 case AMDGPU::V_LSHRREV_B32_e32_vi: 3080 case AMDGPU::V_LSHRREV_B32_e64_vi: 3081 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3082 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3083 3084 case AMDGPU::V_ASHRREV_I32_e32: 3085 case AMDGPU::V_ASHRREV_I32_e64: 3086 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3087 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3088 case AMDGPU::V_ASHRREV_I32_e32_vi: 3089 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3090 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3091 case AMDGPU::V_ASHRREV_I32_e64_vi: 3092 3093 case AMDGPU::V_LSHLREV_B32_e32: 3094 case AMDGPU::V_LSHLREV_B32_e64: 3095 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3096 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3097 case AMDGPU::V_LSHLREV_B32_e32_vi: 3098 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3099 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3100 case AMDGPU::V_LSHLREV_B32_e64_vi: 3101 3102 case AMDGPU::V_LSHLREV_B16_e32: 3103 case AMDGPU::V_LSHLREV_B16_e64: 3104 case AMDGPU::V_LSHLREV_B16_e32_vi: 3105 case AMDGPU::V_LSHLREV_B16_e64_vi: 3106 case AMDGPU::V_LSHLREV_B16_gfx10: 3107 3108 case AMDGPU::V_LSHRREV_B16_e32: 3109 case AMDGPU::V_LSHRREV_B16_e64: 3110 case AMDGPU::V_LSHRREV_B16_e32_vi: 3111 case AMDGPU::V_LSHRREV_B16_e64_vi: 3112 case AMDGPU::V_LSHRREV_B16_gfx10: 3113 3114 case AMDGPU::V_ASHRREV_I16_e32: 3115 case AMDGPU::V_ASHRREV_I16_e64: 3116 case AMDGPU::V_ASHRREV_I16_e32_vi: 3117 case AMDGPU::V_ASHRREV_I16_e64_vi: 3118 case AMDGPU::V_ASHRREV_I16_gfx10: 3119 3120 case AMDGPU::V_LSHLREV_B64: 3121 case AMDGPU::V_LSHLREV_B64_gfx10: 3122 case AMDGPU::V_LSHLREV_B64_vi: 3123 3124 case AMDGPU::V_LSHRREV_B64: 3125 case AMDGPU::V_LSHRREV_B64_gfx10: 3126 case AMDGPU::V_LSHRREV_B64_vi: 3127 3128 case AMDGPU::V_ASHRREV_I64: 3129 case AMDGPU::V_ASHRREV_I64_gfx10: 3130 case AMDGPU::V_ASHRREV_I64_vi: 3131 3132 case AMDGPU::V_PK_LSHLREV_B16: 3133 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3134 case AMDGPU::V_PK_LSHLREV_B16_vi: 3135 3136 case AMDGPU::V_PK_LSHRREV_B16: 3137 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3138 case AMDGPU::V_PK_LSHRREV_B16_vi: 3139 case AMDGPU::V_PK_ASHRREV_I16: 3140 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3141 case AMDGPU::V_PK_ASHRREV_I16_vi: 3142 return true; 3143 default: 3144 return false; 3145 } 3146 } 3147 3148 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3149 3150 using namespace SIInstrFlags; 3151 const unsigned Opcode = Inst.getOpcode(); 3152 const MCInstrDesc &Desc = MII.get(Opcode); 3153 3154 // lds_direct register is defined so that it can be used 3155 // with 9-bit operands only. Ignore encodings which do not accept these. 3156 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3157 return true; 3158 3159 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3160 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3161 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3162 3163 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3164 3165 // lds_direct cannot be specified as either src1 or src2. 3166 for (int SrcIdx : SrcIndices) { 3167 if (SrcIdx == -1) break; 3168 const MCOperand &Src = Inst.getOperand(SrcIdx); 3169 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3170 return false; 3171 } 3172 } 3173 3174 if (Src0Idx == -1) 3175 return true; 3176 3177 const MCOperand &Src = Inst.getOperand(Src0Idx); 3178 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3179 return true; 3180 3181 // lds_direct is specified as src0. Check additional limitations. 3182 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3183 } 3184 3185 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3186 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3187 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3188 if (Op.isFlatOffset()) 3189 return Op.getStartLoc(); 3190 } 3191 return getLoc(); 3192 } 3193 3194 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3195 const OperandVector &Operands) { 3196 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3197 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3198 return true; 3199 3200 auto Opcode = Inst.getOpcode(); 3201 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3202 assert(OpNum != -1); 3203 3204 const auto &Op = Inst.getOperand(OpNum); 3205 if (!hasFlatOffsets() && Op.getImm() != 0) { 3206 Error(getFlatOffsetLoc(Operands), 3207 "flat offset modifier is not supported on this GPU"); 3208 return false; 3209 } 3210 3211 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3212 // For FLAT segment the offset must be positive; 3213 // MSB is ignored and forced to zero. 3214 unsigned OffsetSize = isGFX9() ? 13 : 12; 3215 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3216 if (!isIntN(OffsetSize, Op.getImm())) { 3217 Error(getFlatOffsetLoc(Operands), 3218 isGFX9() ? "expected a 13-bit signed offset" : 3219 "expected a 12-bit signed offset"); 3220 return false; 3221 } 3222 } else { 3223 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3224 Error(getFlatOffsetLoc(Operands), 3225 isGFX9() ? "expected a 12-bit unsigned offset" : 3226 "expected an 11-bit unsigned offset"); 3227 return false; 3228 } 3229 } 3230 3231 return true; 3232 } 3233 3234 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3235 unsigned Opcode = Inst.getOpcode(); 3236 const MCInstrDesc &Desc = MII.get(Opcode); 3237 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3238 return true; 3239 3240 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3241 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3242 3243 const int OpIndices[] = { Src0Idx, Src1Idx }; 3244 3245 unsigned NumLiterals = 0; 3246 uint32_t LiteralValue; 3247 3248 for (int OpIdx : OpIndices) { 3249 if (OpIdx == -1) break; 3250 3251 const MCOperand &MO = Inst.getOperand(OpIdx); 3252 if (MO.isImm() && 3253 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3254 AMDGPU::isSISrcOperand(Desc, OpIdx) && 3255 !isInlineConstant(Inst, OpIdx)) { 3256 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3257 if (NumLiterals == 0 || LiteralValue != Value) { 3258 LiteralValue = Value; 3259 ++NumLiterals; 3260 } 3261 } 3262 } 3263 3264 return NumLiterals <= 1; 3265 } 3266 3267 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3268 const unsigned Opc = Inst.getOpcode(); 3269 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3270 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3271 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3272 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3273 3274 if (OpSel & ~3) 3275 return false; 3276 } 3277 return true; 3278 } 3279 3280 // Check if VCC register matches wavefront size 3281 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3282 auto FB = getFeatureBits(); 3283 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3284 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3285 } 3286 3287 // VOP3 literal is only allowed in GFX10+ and only one can be used 3288 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3289 unsigned Opcode = Inst.getOpcode(); 3290 const MCInstrDesc &Desc = MII.get(Opcode); 3291 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3292 return true; 3293 3294 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3295 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3296 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3297 3298 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3299 3300 unsigned NumLiterals = 0; 3301 uint32_t LiteralValue; 3302 3303 for (int OpIdx : OpIndices) { 3304 if (OpIdx == -1) break; 3305 3306 const MCOperand &MO = Inst.getOperand(OpIdx); 3307 if (!MO.isImm() || !AMDGPU::isSISrcOperand(Desc, OpIdx)) 3308 continue; 3309 3310 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3311 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3312 return false; 3313 3314 if (!isInlineConstant(Inst, OpIdx)) { 3315 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3316 if (NumLiterals == 0 || LiteralValue != Value) { 3317 LiteralValue = Value; 3318 ++NumLiterals; 3319 } 3320 } 3321 } 3322 3323 return !NumLiterals || 3324 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3325 } 3326 3327 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3328 const SMLoc &IDLoc, 3329 const OperandVector &Operands) { 3330 if (!validateLdsDirect(Inst)) { 3331 Error(IDLoc, 3332 "invalid use of lds_direct"); 3333 return false; 3334 } 3335 if (!validateSOPLiteral(Inst)) { 3336 Error(IDLoc, 3337 "only one literal operand is allowed"); 3338 return false; 3339 } 3340 if (!validateVOP3Literal(Inst)) { 3341 Error(IDLoc, 3342 "invalid literal operand"); 3343 return false; 3344 } 3345 if (!validateConstantBusLimitations(Inst)) { 3346 Error(IDLoc, 3347 "invalid operand (violates constant bus restrictions)"); 3348 return false; 3349 } 3350 if (!validateEarlyClobberLimitations(Inst)) { 3351 Error(IDLoc, 3352 "destination must be different than all sources"); 3353 return false; 3354 } 3355 if (!validateIntClampSupported(Inst)) { 3356 Error(IDLoc, 3357 "integer clamping is not supported on this GPU"); 3358 return false; 3359 } 3360 if (!validateOpSel(Inst)) { 3361 Error(IDLoc, 3362 "invalid op_sel operand"); 3363 return false; 3364 } 3365 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3366 if (!validateMIMGD16(Inst)) { 3367 Error(IDLoc, 3368 "d16 modifier is not supported on this GPU"); 3369 return false; 3370 } 3371 if (!validateMIMGDim(Inst)) { 3372 Error(IDLoc, "dim modifier is required on this GPU"); 3373 return false; 3374 } 3375 if (!validateMIMGDataSize(Inst)) { 3376 Error(IDLoc, 3377 "image data size does not match dmask and tfe"); 3378 return false; 3379 } 3380 if (!validateMIMGAddrSize(Inst)) { 3381 Error(IDLoc, 3382 "image address size does not match dim and a16"); 3383 return false; 3384 } 3385 if (!validateMIMGAtomicDMask(Inst)) { 3386 Error(IDLoc, 3387 "invalid atomic image dmask"); 3388 return false; 3389 } 3390 if (!validateMIMGGatherDMask(Inst)) { 3391 Error(IDLoc, 3392 "invalid image_gather dmask: only one bit must be set"); 3393 return false; 3394 } 3395 if (!validateFlatOffset(Inst, Operands)) { 3396 return false; 3397 } 3398 3399 return true; 3400 } 3401 3402 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3403 const FeatureBitset &FBS, 3404 unsigned VariantID = 0); 3405 3406 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3407 OperandVector &Operands, 3408 MCStreamer &Out, 3409 uint64_t &ErrorInfo, 3410 bool MatchingInlineAsm) { 3411 MCInst Inst; 3412 unsigned Result = Match_Success; 3413 for (auto Variant : getMatchedVariants()) { 3414 uint64_t EI; 3415 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3416 Variant); 3417 // We order match statuses from least to most specific. We use most specific 3418 // status as resulting 3419 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3420 if ((R == Match_Success) || 3421 (R == Match_PreferE32) || 3422 (R == Match_MissingFeature && Result != Match_PreferE32) || 3423 (R == Match_InvalidOperand && Result != Match_MissingFeature 3424 && Result != Match_PreferE32) || 3425 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3426 && Result != Match_MissingFeature 3427 && Result != Match_PreferE32)) { 3428 Result = R; 3429 ErrorInfo = EI; 3430 } 3431 if (R == Match_Success) 3432 break; 3433 } 3434 3435 switch (Result) { 3436 default: break; 3437 case Match_Success: 3438 if (!validateInstruction(Inst, IDLoc, Operands)) { 3439 return true; 3440 } 3441 Inst.setLoc(IDLoc); 3442 Out.EmitInstruction(Inst, getSTI()); 3443 return false; 3444 3445 case Match_MissingFeature: 3446 return Error(IDLoc, "instruction not supported on this GPU"); 3447 3448 case Match_MnemonicFail: { 3449 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3450 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3451 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3452 return Error(IDLoc, "invalid instruction" + Suggestion, 3453 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3454 } 3455 3456 case Match_InvalidOperand: { 3457 SMLoc ErrorLoc = IDLoc; 3458 if (ErrorInfo != ~0ULL) { 3459 if (ErrorInfo >= Operands.size()) { 3460 return Error(IDLoc, "too few operands for instruction"); 3461 } 3462 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3463 if (ErrorLoc == SMLoc()) 3464 ErrorLoc = IDLoc; 3465 } 3466 return Error(ErrorLoc, "invalid operand for instruction"); 3467 } 3468 3469 case Match_PreferE32: 3470 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3471 "should be encoded as e32"); 3472 } 3473 llvm_unreachable("Implement any new match types added!"); 3474 } 3475 3476 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3477 int64_t Tmp = -1; 3478 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3479 return true; 3480 } 3481 if (getParser().parseAbsoluteExpression(Tmp)) { 3482 return true; 3483 } 3484 Ret = static_cast<uint32_t>(Tmp); 3485 return false; 3486 } 3487 3488 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3489 uint32_t &Minor) { 3490 if (ParseAsAbsoluteExpression(Major)) 3491 return TokError("invalid major version"); 3492 3493 if (getLexer().isNot(AsmToken::Comma)) 3494 return TokError("minor version number required, comma expected"); 3495 Lex(); 3496 3497 if (ParseAsAbsoluteExpression(Minor)) 3498 return TokError("invalid minor version"); 3499 3500 return false; 3501 } 3502 3503 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3504 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3505 return TokError("directive only supported for amdgcn architecture"); 3506 3507 std::string Target; 3508 3509 SMLoc TargetStart = getTok().getLoc(); 3510 if (getParser().parseEscapedString(Target)) 3511 return true; 3512 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3513 3514 std::string ExpectedTarget; 3515 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3516 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3517 3518 if (Target != ExpectedTargetOS.str()) 3519 return getParser().Error(TargetRange.Start, "target must match options", 3520 TargetRange); 3521 3522 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3523 return false; 3524 } 3525 3526 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3527 return getParser().Error(Range.Start, "value out of range", Range); 3528 } 3529 3530 bool AMDGPUAsmParser::calculateGPRBlocks( 3531 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3532 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3533 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3534 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3535 // TODO(scott.linder): These calculations are duplicated from 3536 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3537 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3538 3539 unsigned NumVGPRs = NextFreeVGPR; 3540 unsigned NumSGPRs = NextFreeSGPR; 3541 3542 if (Version.Major >= 10) 3543 NumSGPRs = 0; 3544 else { 3545 unsigned MaxAddressableNumSGPRs = 3546 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3547 3548 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3549 NumSGPRs > MaxAddressableNumSGPRs) 3550 return OutOfRangeError(SGPRRange); 3551 3552 NumSGPRs += 3553 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3554 3555 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3556 NumSGPRs > MaxAddressableNumSGPRs) 3557 return OutOfRangeError(SGPRRange); 3558 3559 if (Features.test(FeatureSGPRInitBug)) 3560 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3561 } 3562 3563 VGPRBlocks = 3564 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3565 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3566 3567 return false; 3568 } 3569 3570 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3571 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3572 return TokError("directive only supported for amdgcn architecture"); 3573 3574 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3575 return TokError("directive only supported for amdhsa OS"); 3576 3577 StringRef KernelName; 3578 if (getParser().parseIdentifier(KernelName)) 3579 return true; 3580 3581 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3582 3583 StringSet<> Seen; 3584 3585 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3586 3587 SMRange VGPRRange; 3588 uint64_t NextFreeVGPR = 0; 3589 SMRange SGPRRange; 3590 uint64_t NextFreeSGPR = 0; 3591 unsigned UserSGPRCount = 0; 3592 bool ReserveVCC = true; 3593 bool ReserveFlatScr = true; 3594 bool ReserveXNACK = hasXNACK(); 3595 Optional<bool> EnableWavefrontSize32; 3596 3597 while (true) { 3598 while (getLexer().is(AsmToken::EndOfStatement)) 3599 Lex(); 3600 3601 if (getLexer().isNot(AsmToken::Identifier)) 3602 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3603 3604 StringRef ID = getTok().getIdentifier(); 3605 SMRange IDRange = getTok().getLocRange(); 3606 Lex(); 3607 3608 if (ID == ".end_amdhsa_kernel") 3609 break; 3610 3611 if (Seen.find(ID) != Seen.end()) 3612 return TokError(".amdhsa_ directives cannot be repeated"); 3613 Seen.insert(ID); 3614 3615 SMLoc ValStart = getTok().getLoc(); 3616 int64_t IVal; 3617 if (getParser().parseAbsoluteExpression(IVal)) 3618 return true; 3619 SMLoc ValEnd = getTok().getLoc(); 3620 SMRange ValRange = SMRange(ValStart, ValEnd); 3621 3622 if (IVal < 0) 3623 return OutOfRangeError(ValRange); 3624 3625 uint64_t Val = IVal; 3626 3627 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3628 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3629 return OutOfRangeError(RANGE); \ 3630 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3631 3632 if (ID == ".amdhsa_group_segment_fixed_size") { 3633 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3634 return OutOfRangeError(ValRange); 3635 KD.group_segment_fixed_size = Val; 3636 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3637 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3638 return OutOfRangeError(ValRange); 3639 KD.private_segment_fixed_size = Val; 3640 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3641 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3642 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3643 Val, ValRange); 3644 if (Val) 3645 UserSGPRCount += 4; 3646 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3647 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3648 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3649 ValRange); 3650 if (Val) 3651 UserSGPRCount += 2; 3652 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3653 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3654 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3655 ValRange); 3656 if (Val) 3657 UserSGPRCount += 2; 3658 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3659 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3660 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3661 Val, ValRange); 3662 if (Val) 3663 UserSGPRCount += 2; 3664 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3665 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3666 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3667 ValRange); 3668 if (Val) 3669 UserSGPRCount += 2; 3670 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3671 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3672 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3673 ValRange); 3674 if (Val) 3675 UserSGPRCount += 2; 3676 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3677 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3678 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3679 Val, ValRange); 3680 if (Val) 3681 UserSGPRCount += 1; 3682 } else if (ID == ".amdhsa_wavefront_size32") { 3683 if (IVersion.Major < 10) 3684 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3685 IDRange); 3686 EnableWavefrontSize32 = Val; 3687 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3688 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3689 Val, ValRange); 3690 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3691 PARSE_BITS_ENTRY( 3692 KD.compute_pgm_rsrc2, 3693 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3694 ValRange); 3695 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3696 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3697 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3698 ValRange); 3699 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3700 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3701 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3702 ValRange); 3703 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3704 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3705 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3706 ValRange); 3707 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3708 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3709 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3710 ValRange); 3711 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3712 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3713 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3714 ValRange); 3715 } else if (ID == ".amdhsa_next_free_vgpr") { 3716 VGPRRange = ValRange; 3717 NextFreeVGPR = Val; 3718 } else if (ID == ".amdhsa_next_free_sgpr") { 3719 SGPRRange = ValRange; 3720 NextFreeSGPR = Val; 3721 } else if (ID == ".amdhsa_reserve_vcc") { 3722 if (!isUInt<1>(Val)) 3723 return OutOfRangeError(ValRange); 3724 ReserveVCC = Val; 3725 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3726 if (IVersion.Major < 7) 3727 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3728 IDRange); 3729 if (!isUInt<1>(Val)) 3730 return OutOfRangeError(ValRange); 3731 ReserveFlatScr = Val; 3732 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3733 if (IVersion.Major < 8) 3734 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3735 IDRange); 3736 if (!isUInt<1>(Val)) 3737 return OutOfRangeError(ValRange); 3738 ReserveXNACK = Val; 3739 } else if (ID == ".amdhsa_float_round_mode_32") { 3740 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3741 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3742 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3743 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3744 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3745 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3746 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3747 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3748 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3749 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3750 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3751 ValRange); 3752 } else if (ID == ".amdhsa_dx10_clamp") { 3753 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3754 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3755 } else if (ID == ".amdhsa_ieee_mode") { 3756 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3757 Val, ValRange); 3758 } else if (ID == ".amdhsa_fp16_overflow") { 3759 if (IVersion.Major < 9) 3760 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3761 IDRange); 3762 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3763 ValRange); 3764 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3765 if (IVersion.Major < 10) 3766 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3767 IDRange); 3768 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3769 ValRange); 3770 } else if (ID == ".amdhsa_memory_ordered") { 3771 if (IVersion.Major < 10) 3772 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3773 IDRange); 3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3775 ValRange); 3776 } else if (ID == ".amdhsa_forward_progress") { 3777 if (IVersion.Major < 10) 3778 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3779 IDRange); 3780 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3781 ValRange); 3782 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3783 PARSE_BITS_ENTRY( 3784 KD.compute_pgm_rsrc2, 3785 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3786 ValRange); 3787 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3788 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3789 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3790 Val, ValRange); 3791 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3792 PARSE_BITS_ENTRY( 3793 KD.compute_pgm_rsrc2, 3794 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3795 ValRange); 3796 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3797 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3798 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3799 Val, ValRange); 3800 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3801 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3802 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3803 Val, ValRange); 3804 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3805 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3806 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3807 Val, ValRange); 3808 } else if (ID == ".amdhsa_exception_int_div_zero") { 3809 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3810 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3811 Val, ValRange); 3812 } else { 3813 return getParser().Error(IDRange.Start, 3814 "unknown .amdhsa_kernel directive", IDRange); 3815 } 3816 3817 #undef PARSE_BITS_ENTRY 3818 } 3819 3820 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3821 return TokError(".amdhsa_next_free_vgpr directive is required"); 3822 3823 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3824 return TokError(".amdhsa_next_free_sgpr directive is required"); 3825 3826 unsigned VGPRBlocks; 3827 unsigned SGPRBlocks; 3828 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3829 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3830 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3831 SGPRBlocks)) 3832 return true; 3833 3834 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3835 VGPRBlocks)) 3836 return OutOfRangeError(VGPRRange); 3837 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3838 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3839 3840 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3841 SGPRBlocks)) 3842 return OutOfRangeError(SGPRRange); 3843 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3844 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3845 SGPRBlocks); 3846 3847 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3848 return TokError("too many user SGPRs enabled"); 3849 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3850 UserSGPRCount); 3851 3852 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3853 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3854 ReserveFlatScr, ReserveXNACK); 3855 return false; 3856 } 3857 3858 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3859 uint32_t Major; 3860 uint32_t Minor; 3861 3862 if (ParseDirectiveMajorMinor(Major, Minor)) 3863 return true; 3864 3865 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3866 return false; 3867 } 3868 3869 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3870 uint32_t Major; 3871 uint32_t Minor; 3872 uint32_t Stepping; 3873 StringRef VendorName; 3874 StringRef ArchName; 3875 3876 // If this directive has no arguments, then use the ISA version for the 3877 // targeted GPU. 3878 if (getLexer().is(AsmToken::EndOfStatement)) { 3879 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3880 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3881 ISA.Stepping, 3882 "AMD", "AMDGPU"); 3883 return false; 3884 } 3885 3886 if (ParseDirectiveMajorMinor(Major, Minor)) 3887 return true; 3888 3889 if (getLexer().isNot(AsmToken::Comma)) 3890 return TokError("stepping version number required, comma expected"); 3891 Lex(); 3892 3893 if (ParseAsAbsoluteExpression(Stepping)) 3894 return TokError("invalid stepping version"); 3895 3896 if (getLexer().isNot(AsmToken::Comma)) 3897 return TokError("vendor name required, comma expected"); 3898 Lex(); 3899 3900 if (getLexer().isNot(AsmToken::String)) 3901 return TokError("invalid vendor name"); 3902 3903 VendorName = getLexer().getTok().getStringContents(); 3904 Lex(); 3905 3906 if (getLexer().isNot(AsmToken::Comma)) 3907 return TokError("arch name required, comma expected"); 3908 Lex(); 3909 3910 if (getLexer().isNot(AsmToken::String)) 3911 return TokError("invalid arch name"); 3912 3913 ArchName = getLexer().getTok().getStringContents(); 3914 Lex(); 3915 3916 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3917 VendorName, ArchName); 3918 return false; 3919 } 3920 3921 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3922 amd_kernel_code_t &Header) { 3923 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3924 // assembly for backwards compatibility. 3925 if (ID == "max_scratch_backing_memory_byte_size") { 3926 Parser.eatToEndOfStatement(); 3927 return false; 3928 } 3929 3930 SmallString<40> ErrStr; 3931 raw_svector_ostream Err(ErrStr); 3932 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 3933 return TokError(Err.str()); 3934 } 3935 Lex(); 3936 3937 if (ID == "enable_wavefront_size32") { 3938 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 3939 if (!isGFX10()) 3940 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 3941 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3942 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 3943 } else { 3944 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3945 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 3946 } 3947 } 3948 3949 if (ID == "wavefront_size") { 3950 if (Header.wavefront_size == 5) { 3951 if (!isGFX10()) 3952 return TokError("wavefront_size=5 is only allowed on GFX10+"); 3953 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 3954 return TokError("wavefront_size=5 requires +WavefrontSize32"); 3955 } else if (Header.wavefront_size == 6) { 3956 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 3957 return TokError("wavefront_size=6 requires +WavefrontSize64"); 3958 } 3959 } 3960 3961 if (ID == "enable_wgp_mode") { 3962 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 3963 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 3964 } 3965 3966 if (ID == "enable_mem_ordered") { 3967 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 3968 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 3969 } 3970 3971 if (ID == "enable_fwd_progress") { 3972 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 3973 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 3974 } 3975 3976 return false; 3977 } 3978 3979 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 3980 amd_kernel_code_t Header; 3981 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 3982 3983 while (true) { 3984 // Lex EndOfStatement. This is in a while loop, because lexing a comment 3985 // will set the current token to EndOfStatement. 3986 while(getLexer().is(AsmToken::EndOfStatement)) 3987 Lex(); 3988 3989 if (getLexer().isNot(AsmToken::Identifier)) 3990 return TokError("expected value identifier or .end_amd_kernel_code_t"); 3991 3992 StringRef ID = getLexer().getTok().getIdentifier(); 3993 Lex(); 3994 3995 if (ID == ".end_amd_kernel_code_t") 3996 break; 3997 3998 if (ParseAMDKernelCodeTValue(ID, Header)) 3999 return true; 4000 } 4001 4002 getTargetStreamer().EmitAMDKernelCodeT(Header); 4003 4004 return false; 4005 } 4006 4007 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4008 if (getLexer().isNot(AsmToken::Identifier)) 4009 return TokError("expected symbol name"); 4010 4011 StringRef KernelName = Parser.getTok().getString(); 4012 4013 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4014 ELF::STT_AMDGPU_HSA_KERNEL); 4015 Lex(); 4016 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4017 KernelScope.initialize(getContext()); 4018 return false; 4019 } 4020 4021 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4022 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4023 return Error(getParser().getTok().getLoc(), 4024 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4025 "architectures"); 4026 } 4027 4028 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4029 4030 std::string ISAVersionStringFromSTI; 4031 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4032 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4033 4034 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4035 return Error(getParser().getTok().getLoc(), 4036 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4037 "arguments specified through the command line"); 4038 } 4039 4040 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4041 Lex(); 4042 4043 return false; 4044 } 4045 4046 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4047 const char *AssemblerDirectiveBegin; 4048 const char *AssemblerDirectiveEnd; 4049 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4050 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4051 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4052 HSAMD::V3::AssemblerDirectiveEnd) 4053 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4054 HSAMD::AssemblerDirectiveEnd); 4055 4056 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4057 return Error(getParser().getTok().getLoc(), 4058 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4059 "not available on non-amdhsa OSes")).str()); 4060 } 4061 4062 std::string HSAMetadataString; 4063 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4064 HSAMetadataString)) 4065 return true; 4066 4067 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4068 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4069 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4070 } else { 4071 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4072 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4073 } 4074 4075 return false; 4076 } 4077 4078 /// Common code to parse out a block of text (typically YAML) between start and 4079 /// end directives. 4080 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4081 const char *AssemblerDirectiveEnd, 4082 std::string &CollectString) { 4083 4084 raw_string_ostream CollectStream(CollectString); 4085 4086 getLexer().setSkipSpace(false); 4087 4088 bool FoundEnd = false; 4089 while (!getLexer().is(AsmToken::Eof)) { 4090 while (getLexer().is(AsmToken::Space)) { 4091 CollectStream << getLexer().getTok().getString(); 4092 Lex(); 4093 } 4094 4095 if (getLexer().is(AsmToken::Identifier)) { 4096 StringRef ID = getLexer().getTok().getIdentifier(); 4097 if (ID == AssemblerDirectiveEnd) { 4098 Lex(); 4099 FoundEnd = true; 4100 break; 4101 } 4102 } 4103 4104 CollectStream << Parser.parseStringToEndOfStatement() 4105 << getContext().getAsmInfo()->getSeparatorString(); 4106 4107 Parser.eatToEndOfStatement(); 4108 } 4109 4110 getLexer().setSkipSpace(true); 4111 4112 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4113 return TokError(Twine("expected directive ") + 4114 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4115 } 4116 4117 CollectStream.flush(); 4118 return false; 4119 } 4120 4121 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4122 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4123 std::string String; 4124 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4125 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4126 return true; 4127 4128 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4129 if (!PALMetadata->setFromString(String)) 4130 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4131 return false; 4132 } 4133 4134 /// Parse the assembler directive for old linear-format PAL metadata. 4135 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4136 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4137 return Error(getParser().getTok().getLoc(), 4138 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4139 "not available on non-amdpal OSes")).str()); 4140 } 4141 4142 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4143 PALMetadata->setLegacy(); 4144 for (;;) { 4145 uint32_t Key, Value; 4146 if (ParseAsAbsoluteExpression(Key)) { 4147 return TokError(Twine("invalid value in ") + 4148 Twine(PALMD::AssemblerDirective)); 4149 } 4150 if (getLexer().isNot(AsmToken::Comma)) { 4151 return TokError(Twine("expected an even number of values in ") + 4152 Twine(PALMD::AssemblerDirective)); 4153 } 4154 Lex(); 4155 if (ParseAsAbsoluteExpression(Value)) { 4156 return TokError(Twine("invalid value in ") + 4157 Twine(PALMD::AssemblerDirective)); 4158 } 4159 PALMetadata->setRegister(Key, Value); 4160 if (getLexer().isNot(AsmToken::Comma)) 4161 break; 4162 Lex(); 4163 } 4164 return false; 4165 } 4166 4167 /// ParseDirectiveAMDGPULDS 4168 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4169 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4170 if (getParser().checkForValidSection()) 4171 return true; 4172 4173 StringRef Name; 4174 SMLoc NameLoc = getLexer().getLoc(); 4175 if (getParser().parseIdentifier(Name)) 4176 return TokError("expected identifier in directive"); 4177 4178 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4179 if (parseToken(AsmToken::Comma, "expected ','")) 4180 return true; 4181 4182 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4183 4184 int64_t Size; 4185 SMLoc SizeLoc = getLexer().getLoc(); 4186 if (getParser().parseAbsoluteExpression(Size)) 4187 return true; 4188 if (Size < 0) 4189 return Error(SizeLoc, "size must be non-negative"); 4190 if (Size > LocalMemorySize) 4191 return Error(SizeLoc, "size is too large"); 4192 4193 int64_t Align = 4; 4194 if (getLexer().is(AsmToken::Comma)) { 4195 Lex(); 4196 SMLoc AlignLoc = getLexer().getLoc(); 4197 if (getParser().parseAbsoluteExpression(Align)) 4198 return true; 4199 if (Align < 0 || !isPowerOf2_64(Align)) 4200 return Error(AlignLoc, "alignment must be a power of two"); 4201 4202 // Alignment larger than the size of LDS is possible in theory, as long 4203 // as the linker manages to place to symbol at address 0, but we do want 4204 // to make sure the alignment fits nicely into a 32-bit integer. 4205 if (Align >= 1u << 31) 4206 return Error(AlignLoc, "alignment is too large"); 4207 } 4208 4209 if (parseToken(AsmToken::EndOfStatement, 4210 "unexpected token in '.amdgpu_lds' directive")) 4211 return true; 4212 4213 Symbol->redefineIfPossible(); 4214 if (!Symbol->isUndefined()) 4215 return Error(NameLoc, "invalid symbol redefinition"); 4216 4217 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4218 return false; 4219 } 4220 4221 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4222 StringRef IDVal = DirectiveID.getString(); 4223 4224 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4225 if (IDVal == ".amdgcn_target") 4226 return ParseDirectiveAMDGCNTarget(); 4227 4228 if (IDVal == ".amdhsa_kernel") 4229 return ParseDirectiveAMDHSAKernel(); 4230 4231 // TODO: Restructure/combine with PAL metadata directive. 4232 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4233 return ParseDirectiveHSAMetadata(); 4234 } else { 4235 if (IDVal == ".hsa_code_object_version") 4236 return ParseDirectiveHSACodeObjectVersion(); 4237 4238 if (IDVal == ".hsa_code_object_isa") 4239 return ParseDirectiveHSACodeObjectISA(); 4240 4241 if (IDVal == ".amd_kernel_code_t") 4242 return ParseDirectiveAMDKernelCodeT(); 4243 4244 if (IDVal == ".amdgpu_hsa_kernel") 4245 return ParseDirectiveAMDGPUHsaKernel(); 4246 4247 if (IDVal == ".amd_amdgpu_isa") 4248 return ParseDirectiveISAVersion(); 4249 4250 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4251 return ParseDirectiveHSAMetadata(); 4252 } 4253 4254 if (IDVal == ".amdgpu_lds") 4255 return ParseDirectiveAMDGPULDS(); 4256 4257 if (IDVal == PALMD::AssemblerDirectiveBegin) 4258 return ParseDirectivePALMetadataBegin(); 4259 4260 if (IDVal == PALMD::AssemblerDirective) 4261 return ParseDirectivePALMetadata(); 4262 4263 return true; 4264 } 4265 4266 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4267 unsigned RegNo) const { 4268 4269 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4270 R.isValid(); ++R) { 4271 if (*R == RegNo) 4272 return isGFX9() || isGFX10(); 4273 } 4274 4275 // GFX10 has 2 more SGPRs 104 and 105. 4276 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4277 R.isValid(); ++R) { 4278 if (*R == RegNo) 4279 return hasSGPR104_SGPR105(); 4280 } 4281 4282 switch (RegNo) { 4283 case AMDGPU::SRC_SHARED_BASE: 4284 case AMDGPU::SRC_SHARED_LIMIT: 4285 case AMDGPU::SRC_PRIVATE_BASE: 4286 case AMDGPU::SRC_PRIVATE_LIMIT: 4287 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4288 return !isCI() && !isSI() && !isVI(); 4289 case AMDGPU::TBA: 4290 case AMDGPU::TBA_LO: 4291 case AMDGPU::TBA_HI: 4292 case AMDGPU::TMA: 4293 case AMDGPU::TMA_LO: 4294 case AMDGPU::TMA_HI: 4295 return !isGFX9() && !isGFX10(); 4296 case AMDGPU::XNACK_MASK: 4297 case AMDGPU::XNACK_MASK_LO: 4298 case AMDGPU::XNACK_MASK_HI: 4299 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4300 case AMDGPU::SGPR_NULL: 4301 return isGFX10(); 4302 default: 4303 break; 4304 } 4305 4306 if (isCI()) 4307 return true; 4308 4309 if (isSI() || isGFX10()) { 4310 // No flat_scr on SI. 4311 // On GFX10 flat scratch is not a valid register operand and can only be 4312 // accessed with s_setreg/s_getreg. 4313 switch (RegNo) { 4314 case AMDGPU::FLAT_SCR: 4315 case AMDGPU::FLAT_SCR_LO: 4316 case AMDGPU::FLAT_SCR_HI: 4317 return false; 4318 default: 4319 return true; 4320 } 4321 } 4322 4323 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4324 // SI/CI have. 4325 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4326 R.isValid(); ++R) { 4327 if (*R == RegNo) 4328 return hasSGPR102_SGPR103(); 4329 } 4330 4331 return true; 4332 } 4333 4334 OperandMatchResultTy 4335 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4336 OperandMode Mode) { 4337 // Try to parse with a custom parser 4338 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4339 4340 // If we successfully parsed the operand or if there as an error parsing, 4341 // we are done. 4342 // 4343 // If we are parsing after we reach EndOfStatement then this means we 4344 // are appending default values to the Operands list. This is only done 4345 // by custom parser, so we shouldn't continue on to the generic parsing. 4346 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4347 getLexer().is(AsmToken::EndOfStatement)) 4348 return ResTy; 4349 4350 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4351 unsigned Prefix = Operands.size(); 4352 SMLoc LBraceLoc = getTok().getLoc(); 4353 Parser.Lex(); // eat the '[' 4354 4355 for (;;) { 4356 ResTy = parseReg(Operands); 4357 if (ResTy != MatchOperand_Success) 4358 return ResTy; 4359 4360 if (getLexer().is(AsmToken::RBrac)) 4361 break; 4362 4363 if (getLexer().isNot(AsmToken::Comma)) 4364 return MatchOperand_ParseFail; 4365 Parser.Lex(); 4366 } 4367 4368 if (Operands.size() - Prefix > 1) { 4369 Operands.insert(Operands.begin() + Prefix, 4370 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4371 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4372 getTok().getLoc())); 4373 } 4374 4375 Parser.Lex(); // eat the ']' 4376 return MatchOperand_Success; 4377 } 4378 4379 return parseRegOrImm(Operands); 4380 } 4381 4382 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4383 // Clear any forced encodings from the previous instruction. 4384 setForcedEncodingSize(0); 4385 setForcedDPP(false); 4386 setForcedSDWA(false); 4387 4388 if (Name.endswith("_e64")) { 4389 setForcedEncodingSize(64); 4390 return Name.substr(0, Name.size() - 4); 4391 } else if (Name.endswith("_e32")) { 4392 setForcedEncodingSize(32); 4393 return Name.substr(0, Name.size() - 4); 4394 } else if (Name.endswith("_dpp")) { 4395 setForcedDPP(true); 4396 return Name.substr(0, Name.size() - 4); 4397 } else if (Name.endswith("_sdwa")) { 4398 setForcedSDWA(true); 4399 return Name.substr(0, Name.size() - 5); 4400 } 4401 return Name; 4402 } 4403 4404 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4405 StringRef Name, 4406 SMLoc NameLoc, OperandVector &Operands) { 4407 // Add the instruction mnemonic 4408 Name = parseMnemonicSuffix(Name); 4409 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4410 4411 bool IsMIMG = Name.startswith("image_"); 4412 4413 while (!getLexer().is(AsmToken::EndOfStatement)) { 4414 OperandMode Mode = OperandMode_Default; 4415 if (IsMIMG && isGFX10() && Operands.size() == 2) 4416 Mode = OperandMode_NSA; 4417 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4418 4419 // Eat the comma or space if there is one. 4420 if (getLexer().is(AsmToken::Comma)) 4421 Parser.Lex(); 4422 4423 switch (Res) { 4424 case MatchOperand_Success: break; 4425 case MatchOperand_ParseFail: 4426 // FIXME: use real operand location rather than the current location. 4427 Error(getLexer().getLoc(), "failed parsing operand."); 4428 while (!getLexer().is(AsmToken::EndOfStatement)) { 4429 Parser.Lex(); 4430 } 4431 return true; 4432 case MatchOperand_NoMatch: 4433 // FIXME: use real operand location rather than the current location. 4434 Error(getLexer().getLoc(), "not a valid operand."); 4435 while (!getLexer().is(AsmToken::EndOfStatement)) { 4436 Parser.Lex(); 4437 } 4438 return true; 4439 } 4440 } 4441 4442 return false; 4443 } 4444 4445 //===----------------------------------------------------------------------===// 4446 // Utility functions 4447 //===----------------------------------------------------------------------===// 4448 4449 OperandMatchResultTy 4450 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4451 4452 if (!trySkipId(Prefix, AsmToken::Colon)) 4453 return MatchOperand_NoMatch; 4454 4455 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4456 } 4457 4458 OperandMatchResultTy 4459 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4460 AMDGPUOperand::ImmTy ImmTy, 4461 bool (*ConvertResult)(int64_t&)) { 4462 SMLoc S = getLoc(); 4463 int64_t Value = 0; 4464 4465 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4466 if (Res != MatchOperand_Success) 4467 return Res; 4468 4469 if (ConvertResult && !ConvertResult(Value)) { 4470 Error(S, "invalid " + StringRef(Prefix) + " value."); 4471 } 4472 4473 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4474 return MatchOperand_Success; 4475 } 4476 4477 OperandMatchResultTy 4478 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4479 OperandVector &Operands, 4480 AMDGPUOperand::ImmTy ImmTy, 4481 bool (*ConvertResult)(int64_t&)) { 4482 SMLoc S = getLoc(); 4483 if (!trySkipId(Prefix, AsmToken::Colon)) 4484 return MatchOperand_NoMatch; 4485 4486 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4487 return MatchOperand_ParseFail; 4488 4489 unsigned Val = 0; 4490 const unsigned MaxSize = 4; 4491 4492 // FIXME: How to verify the number of elements matches the number of src 4493 // operands? 4494 for (int I = 0; ; ++I) { 4495 int64_t Op; 4496 SMLoc Loc = getLoc(); 4497 if (!parseExpr(Op)) 4498 return MatchOperand_ParseFail; 4499 4500 if (Op != 0 && Op != 1) { 4501 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4502 return MatchOperand_ParseFail; 4503 } 4504 4505 Val |= (Op << I); 4506 4507 if (trySkipToken(AsmToken::RBrac)) 4508 break; 4509 4510 if (I + 1 == MaxSize) { 4511 Error(getLoc(), "expected a closing square bracket"); 4512 return MatchOperand_ParseFail; 4513 } 4514 4515 if (!skipToken(AsmToken::Comma, "expected a comma")) 4516 return MatchOperand_ParseFail; 4517 } 4518 4519 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4520 return MatchOperand_Success; 4521 } 4522 4523 OperandMatchResultTy 4524 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4525 AMDGPUOperand::ImmTy ImmTy) { 4526 int64_t Bit = 0; 4527 SMLoc S = Parser.getTok().getLoc(); 4528 4529 // We are at the end of the statement, and this is a default argument, so 4530 // use a default value. 4531 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4532 switch(getLexer().getKind()) { 4533 case AsmToken::Identifier: { 4534 StringRef Tok = Parser.getTok().getString(); 4535 if (Tok == Name) { 4536 if (Tok == "r128" && isGFX9()) 4537 Error(S, "r128 modifier is not supported on this GPU"); 4538 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4539 Error(S, "a16 modifier is not supported on this GPU"); 4540 Bit = 1; 4541 Parser.Lex(); 4542 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4543 Bit = 0; 4544 Parser.Lex(); 4545 } else { 4546 return MatchOperand_NoMatch; 4547 } 4548 break; 4549 } 4550 default: 4551 return MatchOperand_NoMatch; 4552 } 4553 } 4554 4555 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4556 return MatchOperand_ParseFail; 4557 4558 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4559 return MatchOperand_Success; 4560 } 4561 4562 static void addOptionalImmOperand( 4563 MCInst& Inst, const OperandVector& Operands, 4564 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4565 AMDGPUOperand::ImmTy ImmT, 4566 int64_t Default = 0) { 4567 auto i = OptionalIdx.find(ImmT); 4568 if (i != OptionalIdx.end()) { 4569 unsigned Idx = i->second; 4570 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4571 } else { 4572 Inst.addOperand(MCOperand::createImm(Default)); 4573 } 4574 } 4575 4576 OperandMatchResultTy 4577 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4578 if (getLexer().isNot(AsmToken::Identifier)) { 4579 return MatchOperand_NoMatch; 4580 } 4581 StringRef Tok = Parser.getTok().getString(); 4582 if (Tok != Prefix) { 4583 return MatchOperand_NoMatch; 4584 } 4585 4586 Parser.Lex(); 4587 if (getLexer().isNot(AsmToken::Colon)) { 4588 return MatchOperand_ParseFail; 4589 } 4590 4591 Parser.Lex(); 4592 if (getLexer().isNot(AsmToken::Identifier)) { 4593 return MatchOperand_ParseFail; 4594 } 4595 4596 Value = Parser.getTok().getString(); 4597 return MatchOperand_Success; 4598 } 4599 4600 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4601 // values to live in a joint format operand in the MCInst encoding. 4602 OperandMatchResultTy 4603 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4604 SMLoc S = Parser.getTok().getLoc(); 4605 int64_t Dfmt = 0, Nfmt = 0; 4606 // dfmt and nfmt can appear in either order, and each is optional. 4607 bool GotDfmt = false, GotNfmt = false; 4608 while (!GotDfmt || !GotNfmt) { 4609 if (!GotDfmt) { 4610 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4611 if (Res != MatchOperand_NoMatch) { 4612 if (Res != MatchOperand_Success) 4613 return Res; 4614 if (Dfmt >= 16) { 4615 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4616 return MatchOperand_ParseFail; 4617 } 4618 GotDfmt = true; 4619 Parser.Lex(); 4620 continue; 4621 } 4622 } 4623 if (!GotNfmt) { 4624 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4625 if (Res != MatchOperand_NoMatch) { 4626 if (Res != MatchOperand_Success) 4627 return Res; 4628 if (Nfmt >= 8) { 4629 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4630 return MatchOperand_ParseFail; 4631 } 4632 GotNfmt = true; 4633 Parser.Lex(); 4634 continue; 4635 } 4636 } 4637 break; 4638 } 4639 if (!GotDfmt && !GotNfmt) 4640 return MatchOperand_NoMatch; 4641 auto Format = Dfmt | Nfmt << 4; 4642 Operands.push_back( 4643 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4644 return MatchOperand_Success; 4645 } 4646 4647 //===----------------------------------------------------------------------===// 4648 // ds 4649 //===----------------------------------------------------------------------===// 4650 4651 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4652 const OperandVector &Operands) { 4653 OptionalImmIndexMap OptionalIdx; 4654 4655 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4656 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4657 4658 // Add the register arguments 4659 if (Op.isReg()) { 4660 Op.addRegOperands(Inst, 1); 4661 continue; 4662 } 4663 4664 // Handle optional arguments 4665 OptionalIdx[Op.getImmTy()] = i; 4666 } 4667 4668 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4669 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4670 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4671 4672 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4673 } 4674 4675 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4676 bool IsGdsHardcoded) { 4677 OptionalImmIndexMap OptionalIdx; 4678 4679 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4680 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4681 4682 // Add the register arguments 4683 if (Op.isReg()) { 4684 Op.addRegOperands(Inst, 1); 4685 continue; 4686 } 4687 4688 if (Op.isToken() && Op.getToken() == "gds") { 4689 IsGdsHardcoded = true; 4690 continue; 4691 } 4692 4693 // Handle optional arguments 4694 OptionalIdx[Op.getImmTy()] = i; 4695 } 4696 4697 AMDGPUOperand::ImmTy OffsetType = 4698 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4699 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4700 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4701 AMDGPUOperand::ImmTyOffset; 4702 4703 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4704 4705 if (!IsGdsHardcoded) { 4706 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4707 } 4708 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4709 } 4710 4711 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4712 OptionalImmIndexMap OptionalIdx; 4713 4714 unsigned OperandIdx[4]; 4715 unsigned EnMask = 0; 4716 int SrcIdx = 0; 4717 4718 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4719 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4720 4721 // Add the register arguments 4722 if (Op.isReg()) { 4723 assert(SrcIdx < 4); 4724 OperandIdx[SrcIdx] = Inst.size(); 4725 Op.addRegOperands(Inst, 1); 4726 ++SrcIdx; 4727 continue; 4728 } 4729 4730 if (Op.isOff()) { 4731 assert(SrcIdx < 4); 4732 OperandIdx[SrcIdx] = Inst.size(); 4733 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4734 ++SrcIdx; 4735 continue; 4736 } 4737 4738 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4739 Op.addImmOperands(Inst, 1); 4740 continue; 4741 } 4742 4743 if (Op.isToken() && Op.getToken() == "done") 4744 continue; 4745 4746 // Handle optional arguments 4747 OptionalIdx[Op.getImmTy()] = i; 4748 } 4749 4750 assert(SrcIdx == 4); 4751 4752 bool Compr = false; 4753 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4754 Compr = true; 4755 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4756 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4757 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4758 } 4759 4760 for (auto i = 0; i < SrcIdx; ++i) { 4761 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4762 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4763 } 4764 } 4765 4766 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4767 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4768 4769 Inst.addOperand(MCOperand::createImm(EnMask)); 4770 } 4771 4772 //===----------------------------------------------------------------------===// 4773 // s_waitcnt 4774 //===----------------------------------------------------------------------===// 4775 4776 static bool 4777 encodeCnt( 4778 const AMDGPU::IsaVersion ISA, 4779 int64_t &IntVal, 4780 int64_t CntVal, 4781 bool Saturate, 4782 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4783 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4784 { 4785 bool Failed = false; 4786 4787 IntVal = encode(ISA, IntVal, CntVal); 4788 if (CntVal != decode(ISA, IntVal)) { 4789 if (Saturate) { 4790 IntVal = encode(ISA, IntVal, -1); 4791 } else { 4792 Failed = true; 4793 } 4794 } 4795 return Failed; 4796 } 4797 4798 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4799 4800 SMLoc CntLoc = getLoc(); 4801 StringRef CntName = getTokenStr(); 4802 4803 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4804 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4805 return false; 4806 4807 int64_t CntVal; 4808 SMLoc ValLoc = getLoc(); 4809 if (!parseExpr(CntVal)) 4810 return false; 4811 4812 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4813 4814 bool Failed = true; 4815 bool Sat = CntName.endswith("_sat"); 4816 4817 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4818 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4819 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4820 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4821 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4822 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4823 } else { 4824 Error(CntLoc, "invalid counter name " + CntName); 4825 return false; 4826 } 4827 4828 if (Failed) { 4829 Error(ValLoc, "too large value for " + CntName); 4830 return false; 4831 } 4832 4833 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4834 return false; 4835 4836 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4837 if (isToken(AsmToken::EndOfStatement)) { 4838 Error(getLoc(), "expected a counter name"); 4839 return false; 4840 } 4841 } 4842 4843 return true; 4844 } 4845 4846 OperandMatchResultTy 4847 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4848 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4849 int64_t Waitcnt = getWaitcntBitMask(ISA); 4850 SMLoc S = getLoc(); 4851 4852 // If parse failed, do not return error code 4853 // to avoid excessive error messages. 4854 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4855 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4856 } else { 4857 parseExpr(Waitcnt); 4858 } 4859 4860 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4861 return MatchOperand_Success; 4862 } 4863 4864 bool 4865 AMDGPUOperand::isSWaitCnt() const { 4866 return isImm(); 4867 } 4868 4869 //===----------------------------------------------------------------------===// 4870 // hwreg 4871 //===----------------------------------------------------------------------===// 4872 4873 bool 4874 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4875 int64_t &Offset, 4876 int64_t &Width) { 4877 using namespace llvm::AMDGPU::Hwreg; 4878 4879 // The register may be specified by name or using a numeric code 4880 if (isToken(AsmToken::Identifier) && 4881 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4882 HwReg.IsSymbolic = true; 4883 lex(); // skip message name 4884 } else if (!parseExpr(HwReg.Id)) { 4885 return false; 4886 } 4887 4888 if (trySkipToken(AsmToken::RParen)) 4889 return true; 4890 4891 // parse optional params 4892 return 4893 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4894 parseExpr(Offset) && 4895 skipToken(AsmToken::Comma, "expected a comma") && 4896 parseExpr(Width) && 4897 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4898 } 4899 4900 bool 4901 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4902 const int64_t Offset, 4903 const int64_t Width, 4904 const SMLoc Loc) { 4905 4906 using namespace llvm::AMDGPU::Hwreg; 4907 4908 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4909 Error(Loc, "specified hardware register is not supported on this GPU"); 4910 return false; 4911 } else if (!isValidHwreg(HwReg.Id)) { 4912 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4913 return false; 4914 } else if (!isValidHwregOffset(Offset)) { 4915 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4916 return false; 4917 } else if (!isValidHwregWidth(Width)) { 4918 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4919 return false; 4920 } 4921 return true; 4922 } 4923 4924 OperandMatchResultTy 4925 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4926 using namespace llvm::AMDGPU::Hwreg; 4927 4928 int64_t ImmVal = 0; 4929 SMLoc Loc = getLoc(); 4930 4931 // If parse failed, do not return error code 4932 // to avoid excessive error messages. 4933 if (trySkipId("hwreg", AsmToken::LParen)) { 4934 OperandInfoTy HwReg(ID_UNKNOWN_); 4935 int64_t Offset = OFFSET_DEFAULT_; 4936 int64_t Width = WIDTH_DEFAULT_; 4937 if (parseHwregBody(HwReg, Offset, Width) && 4938 validateHwreg(HwReg, Offset, Width, Loc)) { 4939 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 4940 } 4941 } else if (parseExpr(ImmVal)) { 4942 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 4943 Error(Loc, "invalid immediate: only 16-bit values are legal"); 4944 } 4945 4946 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 4947 return MatchOperand_Success; 4948 } 4949 4950 bool AMDGPUOperand::isHwreg() const { 4951 return isImmTy(ImmTyHwreg); 4952 } 4953 4954 //===----------------------------------------------------------------------===// 4955 // sendmsg 4956 //===----------------------------------------------------------------------===// 4957 4958 bool 4959 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 4960 OperandInfoTy &Op, 4961 OperandInfoTy &Stream) { 4962 using namespace llvm::AMDGPU::SendMsg; 4963 4964 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 4965 Msg.IsSymbolic = true; 4966 lex(); // skip message name 4967 } else if (!parseExpr(Msg.Id)) { 4968 return false; 4969 } 4970 4971 if (trySkipToken(AsmToken::Comma)) { 4972 Op.IsDefined = true; 4973 if (isToken(AsmToken::Identifier) && 4974 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 4975 lex(); // skip operation name 4976 } else if (!parseExpr(Op.Id)) { 4977 return false; 4978 } 4979 4980 if (trySkipToken(AsmToken::Comma)) { 4981 Stream.IsDefined = true; 4982 if (!parseExpr(Stream.Id)) 4983 return false; 4984 } 4985 } 4986 4987 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4988 } 4989 4990 bool 4991 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 4992 const OperandInfoTy &Op, 4993 const OperandInfoTy &Stream, 4994 const SMLoc S) { 4995 using namespace llvm::AMDGPU::SendMsg; 4996 4997 // Validation strictness depends on whether message is specified 4998 // in a symbolc or in a numeric form. In the latter case 4999 // only encoding possibility is checked. 5000 bool Strict = Msg.IsSymbolic; 5001 5002 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5003 Error(S, "invalid message id"); 5004 return false; 5005 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5006 Error(S, Op.IsDefined ? 5007 "message does not support operations" : 5008 "missing message operation"); 5009 return false; 5010 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5011 Error(S, "invalid operation id"); 5012 return false; 5013 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5014 Error(S, "message operation does not support streams"); 5015 return false; 5016 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5017 Error(S, "invalid message stream id"); 5018 return false; 5019 } 5020 return true; 5021 } 5022 5023 OperandMatchResultTy 5024 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5025 using namespace llvm::AMDGPU::SendMsg; 5026 5027 int64_t ImmVal = 0; 5028 SMLoc Loc = getLoc(); 5029 5030 // If parse failed, do not return error code 5031 // to avoid excessive error messages. 5032 if (trySkipId("sendmsg", AsmToken::LParen)) { 5033 OperandInfoTy Msg(ID_UNKNOWN_); 5034 OperandInfoTy Op(OP_NONE_); 5035 OperandInfoTy Stream(STREAM_ID_NONE_); 5036 if (parseSendMsgBody(Msg, Op, Stream) && 5037 validateSendMsg(Msg, Op, Stream, Loc)) { 5038 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5039 } 5040 } else if (parseExpr(ImmVal)) { 5041 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5042 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5043 } 5044 5045 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5046 return MatchOperand_Success; 5047 } 5048 5049 bool AMDGPUOperand::isSendMsg() const { 5050 return isImmTy(ImmTySendMsg); 5051 } 5052 5053 //===----------------------------------------------------------------------===// 5054 // v_interp 5055 //===----------------------------------------------------------------------===// 5056 5057 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5058 if (getLexer().getKind() != AsmToken::Identifier) 5059 return MatchOperand_NoMatch; 5060 5061 StringRef Str = Parser.getTok().getString(); 5062 int Slot = StringSwitch<int>(Str) 5063 .Case("p10", 0) 5064 .Case("p20", 1) 5065 .Case("p0", 2) 5066 .Default(-1); 5067 5068 SMLoc S = Parser.getTok().getLoc(); 5069 if (Slot == -1) 5070 return MatchOperand_ParseFail; 5071 5072 Parser.Lex(); 5073 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5074 AMDGPUOperand::ImmTyInterpSlot)); 5075 return MatchOperand_Success; 5076 } 5077 5078 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5079 if (getLexer().getKind() != AsmToken::Identifier) 5080 return MatchOperand_NoMatch; 5081 5082 StringRef Str = Parser.getTok().getString(); 5083 if (!Str.startswith("attr")) 5084 return MatchOperand_NoMatch; 5085 5086 StringRef Chan = Str.take_back(2); 5087 int AttrChan = StringSwitch<int>(Chan) 5088 .Case(".x", 0) 5089 .Case(".y", 1) 5090 .Case(".z", 2) 5091 .Case(".w", 3) 5092 .Default(-1); 5093 if (AttrChan == -1) 5094 return MatchOperand_ParseFail; 5095 5096 Str = Str.drop_back(2).drop_front(4); 5097 5098 uint8_t Attr; 5099 if (Str.getAsInteger(10, Attr)) 5100 return MatchOperand_ParseFail; 5101 5102 SMLoc S = Parser.getTok().getLoc(); 5103 Parser.Lex(); 5104 if (Attr > 63) { 5105 Error(S, "out of bounds attr"); 5106 return MatchOperand_Success; 5107 } 5108 5109 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5110 5111 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5112 AMDGPUOperand::ImmTyInterpAttr)); 5113 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5114 AMDGPUOperand::ImmTyAttrChan)); 5115 return MatchOperand_Success; 5116 } 5117 5118 //===----------------------------------------------------------------------===// 5119 // exp 5120 //===----------------------------------------------------------------------===// 5121 5122 void AMDGPUAsmParser::errorExpTgt() { 5123 Error(Parser.getTok().getLoc(), "invalid exp target"); 5124 } 5125 5126 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5127 uint8_t &Val) { 5128 if (Str == "null") { 5129 Val = 9; 5130 return MatchOperand_Success; 5131 } 5132 5133 if (Str.startswith("mrt")) { 5134 Str = Str.drop_front(3); 5135 if (Str == "z") { // == mrtz 5136 Val = 8; 5137 return MatchOperand_Success; 5138 } 5139 5140 if (Str.getAsInteger(10, Val)) 5141 return MatchOperand_ParseFail; 5142 5143 if (Val > 7) 5144 errorExpTgt(); 5145 5146 return MatchOperand_Success; 5147 } 5148 5149 if (Str.startswith("pos")) { 5150 Str = Str.drop_front(3); 5151 if (Str.getAsInteger(10, Val)) 5152 return MatchOperand_ParseFail; 5153 5154 if (Val > 4 || (Val == 4 && !isGFX10())) 5155 errorExpTgt(); 5156 5157 Val += 12; 5158 return MatchOperand_Success; 5159 } 5160 5161 if (isGFX10() && Str == "prim") { 5162 Val = 20; 5163 return MatchOperand_Success; 5164 } 5165 5166 if (Str.startswith("param")) { 5167 Str = Str.drop_front(5); 5168 if (Str.getAsInteger(10, Val)) 5169 return MatchOperand_ParseFail; 5170 5171 if (Val >= 32) 5172 errorExpTgt(); 5173 5174 Val += 32; 5175 return MatchOperand_Success; 5176 } 5177 5178 if (Str.startswith("invalid_target_")) { 5179 Str = Str.drop_front(15); 5180 if (Str.getAsInteger(10, Val)) 5181 return MatchOperand_ParseFail; 5182 5183 errorExpTgt(); 5184 return MatchOperand_Success; 5185 } 5186 5187 return MatchOperand_NoMatch; 5188 } 5189 5190 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5191 uint8_t Val; 5192 StringRef Str = Parser.getTok().getString(); 5193 5194 auto Res = parseExpTgtImpl(Str, Val); 5195 if (Res != MatchOperand_Success) 5196 return Res; 5197 5198 SMLoc S = Parser.getTok().getLoc(); 5199 Parser.Lex(); 5200 5201 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5202 AMDGPUOperand::ImmTyExpTgt)); 5203 return MatchOperand_Success; 5204 } 5205 5206 //===----------------------------------------------------------------------===// 5207 // parser helpers 5208 //===----------------------------------------------------------------------===// 5209 5210 bool 5211 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5212 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5213 } 5214 5215 bool 5216 AMDGPUAsmParser::isId(const StringRef Id) const { 5217 return isId(getToken(), Id); 5218 } 5219 5220 bool 5221 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5222 return getTokenKind() == Kind; 5223 } 5224 5225 bool 5226 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5227 if (isId(Id)) { 5228 lex(); 5229 return true; 5230 } 5231 return false; 5232 } 5233 5234 bool 5235 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5236 if (isId(Id) && peekToken().is(Kind)) { 5237 lex(); 5238 lex(); 5239 return true; 5240 } 5241 return false; 5242 } 5243 5244 bool 5245 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5246 if (isToken(Kind)) { 5247 lex(); 5248 return true; 5249 } 5250 return false; 5251 } 5252 5253 bool 5254 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5255 const StringRef ErrMsg) { 5256 if (!trySkipToken(Kind)) { 5257 Error(getLoc(), ErrMsg); 5258 return false; 5259 } 5260 return true; 5261 } 5262 5263 bool 5264 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5265 return !getParser().parseAbsoluteExpression(Imm); 5266 } 5267 5268 bool 5269 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5270 SMLoc S = getLoc(); 5271 5272 const MCExpr *Expr; 5273 if (Parser.parseExpression(Expr)) 5274 return false; 5275 5276 int64_t IntVal; 5277 if (Expr->evaluateAsAbsolute(IntVal)) { 5278 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5279 } else { 5280 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5281 } 5282 return true; 5283 } 5284 5285 bool 5286 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5287 if (isToken(AsmToken::String)) { 5288 Val = getToken().getStringContents(); 5289 lex(); 5290 return true; 5291 } else { 5292 Error(getLoc(), ErrMsg); 5293 return false; 5294 } 5295 } 5296 5297 AsmToken 5298 AMDGPUAsmParser::getToken() const { 5299 return Parser.getTok(); 5300 } 5301 5302 AsmToken 5303 AMDGPUAsmParser::peekToken() { 5304 return getLexer().peekTok(); 5305 } 5306 5307 void 5308 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5309 auto TokCount = getLexer().peekTokens(Tokens); 5310 5311 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5312 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5313 } 5314 5315 AsmToken::TokenKind 5316 AMDGPUAsmParser::getTokenKind() const { 5317 return getLexer().getKind(); 5318 } 5319 5320 SMLoc 5321 AMDGPUAsmParser::getLoc() const { 5322 return getToken().getLoc(); 5323 } 5324 5325 StringRef 5326 AMDGPUAsmParser::getTokenStr() const { 5327 return getToken().getString(); 5328 } 5329 5330 void 5331 AMDGPUAsmParser::lex() { 5332 Parser.Lex(); 5333 } 5334 5335 //===----------------------------------------------------------------------===// 5336 // swizzle 5337 //===----------------------------------------------------------------------===// 5338 5339 LLVM_READNONE 5340 static unsigned 5341 encodeBitmaskPerm(const unsigned AndMask, 5342 const unsigned OrMask, 5343 const unsigned XorMask) { 5344 using namespace llvm::AMDGPU::Swizzle; 5345 5346 return BITMASK_PERM_ENC | 5347 (AndMask << BITMASK_AND_SHIFT) | 5348 (OrMask << BITMASK_OR_SHIFT) | 5349 (XorMask << BITMASK_XOR_SHIFT); 5350 } 5351 5352 bool 5353 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5354 const unsigned MinVal, 5355 const unsigned MaxVal, 5356 const StringRef ErrMsg) { 5357 for (unsigned i = 0; i < OpNum; ++i) { 5358 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5359 return false; 5360 } 5361 SMLoc ExprLoc = Parser.getTok().getLoc(); 5362 if (!parseExpr(Op[i])) { 5363 return false; 5364 } 5365 if (Op[i] < MinVal || Op[i] > MaxVal) { 5366 Error(ExprLoc, ErrMsg); 5367 return false; 5368 } 5369 } 5370 5371 return true; 5372 } 5373 5374 bool 5375 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5376 using namespace llvm::AMDGPU::Swizzle; 5377 5378 int64_t Lane[LANE_NUM]; 5379 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5380 "expected a 2-bit lane id")) { 5381 Imm = QUAD_PERM_ENC; 5382 for (unsigned I = 0; I < LANE_NUM; ++I) { 5383 Imm |= Lane[I] << (LANE_SHIFT * I); 5384 } 5385 return true; 5386 } 5387 return false; 5388 } 5389 5390 bool 5391 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5392 using namespace llvm::AMDGPU::Swizzle; 5393 5394 SMLoc S = Parser.getTok().getLoc(); 5395 int64_t GroupSize; 5396 int64_t LaneIdx; 5397 5398 if (!parseSwizzleOperands(1, &GroupSize, 5399 2, 32, 5400 "group size must be in the interval [2,32]")) { 5401 return false; 5402 } 5403 if (!isPowerOf2_64(GroupSize)) { 5404 Error(S, "group size must be a power of two"); 5405 return false; 5406 } 5407 if (parseSwizzleOperands(1, &LaneIdx, 5408 0, GroupSize - 1, 5409 "lane id must be in the interval [0,group size - 1]")) { 5410 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5411 return true; 5412 } 5413 return false; 5414 } 5415 5416 bool 5417 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5418 using namespace llvm::AMDGPU::Swizzle; 5419 5420 SMLoc S = Parser.getTok().getLoc(); 5421 int64_t GroupSize; 5422 5423 if (!parseSwizzleOperands(1, &GroupSize, 5424 2, 32, "group size must be in the interval [2,32]")) { 5425 return false; 5426 } 5427 if (!isPowerOf2_64(GroupSize)) { 5428 Error(S, "group size must be a power of two"); 5429 return false; 5430 } 5431 5432 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5433 return true; 5434 } 5435 5436 bool 5437 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5438 using namespace llvm::AMDGPU::Swizzle; 5439 5440 SMLoc S = Parser.getTok().getLoc(); 5441 int64_t GroupSize; 5442 5443 if (!parseSwizzleOperands(1, &GroupSize, 5444 1, 16, "group size must be in the interval [1,16]")) { 5445 return false; 5446 } 5447 if (!isPowerOf2_64(GroupSize)) { 5448 Error(S, "group size must be a power of two"); 5449 return false; 5450 } 5451 5452 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5453 return true; 5454 } 5455 5456 bool 5457 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5458 using namespace llvm::AMDGPU::Swizzle; 5459 5460 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5461 return false; 5462 } 5463 5464 StringRef Ctl; 5465 SMLoc StrLoc = Parser.getTok().getLoc(); 5466 if (!parseString(Ctl)) { 5467 return false; 5468 } 5469 if (Ctl.size() != BITMASK_WIDTH) { 5470 Error(StrLoc, "expected a 5-character mask"); 5471 return false; 5472 } 5473 5474 unsigned AndMask = 0; 5475 unsigned OrMask = 0; 5476 unsigned XorMask = 0; 5477 5478 for (size_t i = 0; i < Ctl.size(); ++i) { 5479 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5480 switch(Ctl[i]) { 5481 default: 5482 Error(StrLoc, "invalid mask"); 5483 return false; 5484 case '0': 5485 break; 5486 case '1': 5487 OrMask |= Mask; 5488 break; 5489 case 'p': 5490 AndMask |= Mask; 5491 break; 5492 case 'i': 5493 AndMask |= Mask; 5494 XorMask |= Mask; 5495 break; 5496 } 5497 } 5498 5499 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5500 return true; 5501 } 5502 5503 bool 5504 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5505 5506 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5507 5508 if (!parseExpr(Imm)) { 5509 return false; 5510 } 5511 if (!isUInt<16>(Imm)) { 5512 Error(OffsetLoc, "expected a 16-bit offset"); 5513 return false; 5514 } 5515 return true; 5516 } 5517 5518 bool 5519 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5520 using namespace llvm::AMDGPU::Swizzle; 5521 5522 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5523 5524 SMLoc ModeLoc = Parser.getTok().getLoc(); 5525 bool Ok = false; 5526 5527 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5528 Ok = parseSwizzleQuadPerm(Imm); 5529 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5530 Ok = parseSwizzleBitmaskPerm(Imm); 5531 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5532 Ok = parseSwizzleBroadcast(Imm); 5533 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5534 Ok = parseSwizzleSwap(Imm); 5535 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5536 Ok = parseSwizzleReverse(Imm); 5537 } else { 5538 Error(ModeLoc, "expected a swizzle mode"); 5539 } 5540 5541 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5542 } 5543 5544 return false; 5545 } 5546 5547 OperandMatchResultTy 5548 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5549 SMLoc S = Parser.getTok().getLoc(); 5550 int64_t Imm = 0; 5551 5552 if (trySkipId("offset")) { 5553 5554 bool Ok = false; 5555 if (skipToken(AsmToken::Colon, "expected a colon")) { 5556 if (trySkipId("swizzle")) { 5557 Ok = parseSwizzleMacro(Imm); 5558 } else { 5559 Ok = parseSwizzleOffset(Imm); 5560 } 5561 } 5562 5563 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5564 5565 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5566 } else { 5567 // Swizzle "offset" operand is optional. 5568 // If it is omitted, try parsing other optional operands. 5569 return parseOptionalOpr(Operands); 5570 } 5571 } 5572 5573 bool 5574 AMDGPUOperand::isSwizzle() const { 5575 return isImmTy(ImmTySwizzle); 5576 } 5577 5578 //===----------------------------------------------------------------------===// 5579 // VGPR Index Mode 5580 //===----------------------------------------------------------------------===// 5581 5582 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5583 5584 using namespace llvm::AMDGPU::VGPRIndexMode; 5585 5586 if (trySkipToken(AsmToken::RParen)) { 5587 return OFF; 5588 } 5589 5590 int64_t Imm = 0; 5591 5592 while (true) { 5593 unsigned Mode = 0; 5594 SMLoc S = Parser.getTok().getLoc(); 5595 5596 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5597 if (trySkipId(IdSymbolic[ModeId])) { 5598 Mode = 1 << ModeId; 5599 break; 5600 } 5601 } 5602 5603 if (Mode == 0) { 5604 Error(S, (Imm == 0)? 5605 "expected a VGPR index mode or a closing parenthesis" : 5606 "expected a VGPR index mode"); 5607 break; 5608 } 5609 5610 if (Imm & Mode) { 5611 Error(S, "duplicate VGPR index mode"); 5612 break; 5613 } 5614 Imm |= Mode; 5615 5616 if (trySkipToken(AsmToken::RParen)) 5617 break; 5618 if (!skipToken(AsmToken::Comma, 5619 "expected a comma or a closing parenthesis")) 5620 break; 5621 } 5622 5623 return Imm; 5624 } 5625 5626 OperandMatchResultTy 5627 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5628 5629 int64_t Imm = 0; 5630 SMLoc S = Parser.getTok().getLoc(); 5631 5632 if (getLexer().getKind() == AsmToken::Identifier && 5633 Parser.getTok().getString() == "gpr_idx" && 5634 getLexer().peekTok().is(AsmToken::LParen)) { 5635 5636 Parser.Lex(); 5637 Parser.Lex(); 5638 5639 // If parse failed, trigger an error but do not return error code 5640 // to avoid excessive error messages. 5641 Imm = parseGPRIdxMacro(); 5642 5643 } else { 5644 if (getParser().parseAbsoluteExpression(Imm)) 5645 return MatchOperand_NoMatch; 5646 if (Imm < 0 || !isUInt<4>(Imm)) { 5647 Error(S, "invalid immediate: only 4-bit values are legal"); 5648 } 5649 } 5650 5651 Operands.push_back( 5652 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5653 return MatchOperand_Success; 5654 } 5655 5656 bool AMDGPUOperand::isGPRIdxMode() const { 5657 return isImmTy(ImmTyGprIdxMode); 5658 } 5659 5660 //===----------------------------------------------------------------------===// 5661 // sopp branch targets 5662 //===----------------------------------------------------------------------===// 5663 5664 OperandMatchResultTy 5665 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5666 5667 // Make sure we are not parsing something 5668 // that looks like a label or an expression but is not. 5669 // This will improve error messages. 5670 if (isRegister() || isModifier()) 5671 return MatchOperand_NoMatch; 5672 5673 if (parseExpr(Operands)) { 5674 5675 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5676 assert(Opr.isImm() || Opr.isExpr()); 5677 SMLoc Loc = Opr.getStartLoc(); 5678 5679 // Currently we do not support arbitrary expressions as branch targets. 5680 // Only labels and absolute expressions are accepted. 5681 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5682 Error(Loc, "expected an absolute expression or a label"); 5683 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5684 Error(Loc, "expected a 16-bit signed jump offset"); 5685 } 5686 } 5687 5688 return MatchOperand_Success; // avoid excessive error messages 5689 } 5690 5691 //===----------------------------------------------------------------------===// 5692 // Boolean holding registers 5693 //===----------------------------------------------------------------------===// 5694 5695 OperandMatchResultTy 5696 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5697 return parseReg(Operands); 5698 } 5699 5700 //===----------------------------------------------------------------------===// 5701 // mubuf 5702 //===----------------------------------------------------------------------===// 5703 5704 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5705 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5706 } 5707 5708 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5709 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5710 } 5711 5712 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5713 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5714 } 5715 5716 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5717 const OperandVector &Operands, 5718 bool IsAtomic, 5719 bool IsAtomicReturn, 5720 bool IsLds) { 5721 bool IsLdsOpcode = IsLds; 5722 bool HasLdsModifier = false; 5723 OptionalImmIndexMap OptionalIdx; 5724 assert(IsAtomicReturn ? IsAtomic : true); 5725 unsigned FirstOperandIdx = 1; 5726 5727 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5728 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5729 5730 // Add the register arguments 5731 if (Op.isReg()) { 5732 Op.addRegOperands(Inst, 1); 5733 // Insert a tied src for atomic return dst. 5734 // This cannot be postponed as subsequent calls to 5735 // addImmOperands rely on correct number of MC operands. 5736 if (IsAtomicReturn && i == FirstOperandIdx) 5737 Op.addRegOperands(Inst, 1); 5738 continue; 5739 } 5740 5741 // Handle the case where soffset is an immediate 5742 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5743 Op.addImmOperands(Inst, 1); 5744 continue; 5745 } 5746 5747 HasLdsModifier |= Op.isLDS(); 5748 5749 // Handle tokens like 'offen' which are sometimes hard-coded into the 5750 // asm string. There are no MCInst operands for these. 5751 if (Op.isToken()) { 5752 continue; 5753 } 5754 assert(Op.isImm()); 5755 5756 // Handle optional arguments 5757 OptionalIdx[Op.getImmTy()] = i; 5758 } 5759 5760 // This is a workaround for an llvm quirk which may result in an 5761 // incorrect instruction selection. Lds and non-lds versions of 5762 // MUBUF instructions are identical except that lds versions 5763 // have mandatory 'lds' modifier. However this modifier follows 5764 // optional modifiers and llvm asm matcher regards this 'lds' 5765 // modifier as an optional one. As a result, an lds version 5766 // of opcode may be selected even if it has no 'lds' modifier. 5767 if (IsLdsOpcode && !HasLdsModifier) { 5768 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5769 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5770 Inst.setOpcode(NoLdsOpcode); 5771 IsLdsOpcode = false; 5772 } 5773 } 5774 5775 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5776 if (!IsAtomic) { // glc is hard-coded. 5777 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5778 } 5779 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5780 5781 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5782 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5783 } 5784 5785 if (isGFX10()) 5786 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5787 } 5788 5789 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5790 OptionalImmIndexMap OptionalIdx; 5791 5792 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5793 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5794 5795 // Add the register arguments 5796 if (Op.isReg()) { 5797 Op.addRegOperands(Inst, 1); 5798 continue; 5799 } 5800 5801 // Handle the case where soffset is an immediate 5802 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5803 Op.addImmOperands(Inst, 1); 5804 continue; 5805 } 5806 5807 // Handle tokens like 'offen' which are sometimes hard-coded into the 5808 // asm string. There are no MCInst operands for these. 5809 if (Op.isToken()) { 5810 continue; 5811 } 5812 assert(Op.isImm()); 5813 5814 // Handle optional arguments 5815 OptionalIdx[Op.getImmTy()] = i; 5816 } 5817 5818 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5819 AMDGPUOperand::ImmTyOffset); 5820 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5821 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5822 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5823 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5824 5825 if (isGFX10()) 5826 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5827 } 5828 5829 //===----------------------------------------------------------------------===// 5830 // mimg 5831 //===----------------------------------------------------------------------===// 5832 5833 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5834 bool IsAtomic) { 5835 unsigned I = 1; 5836 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5837 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5838 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5839 } 5840 5841 if (IsAtomic) { 5842 // Add src, same as dst 5843 assert(Desc.getNumDefs() == 1); 5844 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5845 } 5846 5847 OptionalImmIndexMap OptionalIdx; 5848 5849 for (unsigned E = Operands.size(); I != E; ++I) { 5850 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5851 5852 // Add the register arguments 5853 if (Op.isReg()) { 5854 Op.addRegOperands(Inst, 1); 5855 } else if (Op.isImmModifier()) { 5856 OptionalIdx[Op.getImmTy()] = I; 5857 } else if (!Op.isToken()) { 5858 llvm_unreachable("unexpected operand type"); 5859 } 5860 } 5861 5862 bool IsGFX10 = isGFX10(); 5863 5864 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5865 if (IsGFX10) 5866 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5867 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5868 if (IsGFX10) 5869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5870 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5872 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5875 if (!IsGFX10) 5876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5878 } 5879 5880 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5881 cvtMIMG(Inst, Operands, true); 5882 } 5883 5884 //===----------------------------------------------------------------------===// 5885 // smrd 5886 //===----------------------------------------------------------------------===// 5887 5888 bool AMDGPUOperand::isSMRDOffset8() const { 5889 return isImm() && isUInt<8>(getImm()); 5890 } 5891 5892 bool AMDGPUOperand::isSMRDOffset20() const { 5893 return isImm() && isUInt<20>(getImm()); 5894 } 5895 5896 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5897 // 32-bit literals are only supported on CI and we only want to use them 5898 // when the offset is > 8-bits. 5899 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5900 } 5901 5902 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5903 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5904 } 5905 5906 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5907 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5908 } 5909 5910 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5911 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5912 } 5913 5914 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5915 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5916 } 5917 5918 //===----------------------------------------------------------------------===// 5919 // vop3 5920 //===----------------------------------------------------------------------===// 5921 5922 static bool ConvertOmodMul(int64_t &Mul) { 5923 if (Mul != 1 && Mul != 2 && Mul != 4) 5924 return false; 5925 5926 Mul >>= 1; 5927 return true; 5928 } 5929 5930 static bool ConvertOmodDiv(int64_t &Div) { 5931 if (Div == 1) { 5932 Div = 0; 5933 return true; 5934 } 5935 5936 if (Div == 2) { 5937 Div = 3; 5938 return true; 5939 } 5940 5941 return false; 5942 } 5943 5944 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 5945 if (BoundCtrl == 0) { 5946 BoundCtrl = 1; 5947 return true; 5948 } 5949 5950 if (BoundCtrl == -1) { 5951 BoundCtrl = 0; 5952 return true; 5953 } 5954 5955 return false; 5956 } 5957 5958 // Note: the order in this table matches the order of operands in AsmString. 5959 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 5960 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 5961 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 5962 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 5963 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 5964 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 5965 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 5966 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 5967 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 5968 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 5969 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 5970 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 5971 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 5972 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 5973 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 5974 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5975 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 5976 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 5977 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 5978 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 5979 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 5980 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5981 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 5982 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 5983 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 5984 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 5985 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 5986 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 5987 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 5988 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 5989 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 5990 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 5991 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 5992 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 5993 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 5994 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 5995 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 5996 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 5997 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 5998 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 5999 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6000 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6001 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6002 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6003 }; 6004 6005 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6006 unsigned size = Operands.size(); 6007 assert(size > 0); 6008 6009 OperandMatchResultTy res = parseOptionalOpr(Operands); 6010 6011 // This is a hack to enable hardcoded mandatory operands which follow 6012 // optional operands. 6013 // 6014 // Current design assumes that all operands after the first optional operand 6015 // are also optional. However implementation of some instructions violates 6016 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6017 // 6018 // To alleviate this problem, we have to (implicitly) parse extra operands 6019 // to make sure autogenerated parser of custom operands never hit hardcoded 6020 // mandatory operands. 6021 6022 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 6023 6024 // We have parsed the first optional operand. 6025 // Parse as many operands as necessary to skip all mandatory operands. 6026 6027 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6028 if (res != MatchOperand_Success || 6029 getLexer().is(AsmToken::EndOfStatement)) break; 6030 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 6031 res = parseOptionalOpr(Operands); 6032 } 6033 } 6034 6035 return res; 6036 } 6037 6038 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6039 OperandMatchResultTy res; 6040 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6041 // try to parse any optional operand here 6042 if (Op.IsBit) { 6043 res = parseNamedBit(Op.Name, Operands, Op.Type); 6044 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6045 res = parseOModOperand(Operands); 6046 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6047 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6048 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6049 res = parseSDWASel(Operands, Op.Name, Op.Type); 6050 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6051 res = parseSDWADstUnused(Operands); 6052 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6053 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6054 Op.Type == AMDGPUOperand::ImmTyNegLo || 6055 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6056 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6057 Op.ConvertResult); 6058 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6059 res = parseDim(Operands); 6060 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6061 res = parseDfmtNfmt(Operands); 6062 } else { 6063 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6064 } 6065 if (res != MatchOperand_NoMatch) { 6066 return res; 6067 } 6068 } 6069 return MatchOperand_NoMatch; 6070 } 6071 6072 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6073 StringRef Name = Parser.getTok().getString(); 6074 if (Name == "mul") { 6075 return parseIntWithPrefix("mul", Operands, 6076 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6077 } 6078 6079 if (Name == "div") { 6080 return parseIntWithPrefix("div", Operands, 6081 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6082 } 6083 6084 return MatchOperand_NoMatch; 6085 } 6086 6087 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6088 cvtVOP3P(Inst, Operands); 6089 6090 int Opc = Inst.getOpcode(); 6091 6092 int SrcNum; 6093 const int Ops[] = { AMDGPU::OpName::src0, 6094 AMDGPU::OpName::src1, 6095 AMDGPU::OpName::src2 }; 6096 for (SrcNum = 0; 6097 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6098 ++SrcNum); 6099 assert(SrcNum > 0); 6100 6101 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6102 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6103 6104 if ((OpSel & (1 << SrcNum)) != 0) { 6105 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6106 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6107 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6108 } 6109 } 6110 6111 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6112 // 1. This operand is input modifiers 6113 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6114 // 2. This is not last operand 6115 && Desc.NumOperands > (OpNum + 1) 6116 // 3. Next operand is register class 6117 && Desc.OpInfo[OpNum + 1].RegClass != -1 6118 // 4. Next register is not tied to any other operand 6119 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6120 } 6121 6122 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6123 { 6124 OptionalImmIndexMap OptionalIdx; 6125 unsigned Opc = Inst.getOpcode(); 6126 6127 unsigned I = 1; 6128 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6129 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6130 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6131 } 6132 6133 for (unsigned E = Operands.size(); I != E; ++I) { 6134 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6135 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6136 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6137 } else if (Op.isInterpSlot() || 6138 Op.isInterpAttr() || 6139 Op.isAttrChan()) { 6140 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6141 } else if (Op.isImmModifier()) { 6142 OptionalIdx[Op.getImmTy()] = I; 6143 } else { 6144 llvm_unreachable("unhandled operand type"); 6145 } 6146 } 6147 6148 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6149 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6150 } 6151 6152 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6153 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6154 } 6155 6156 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6157 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6158 } 6159 } 6160 6161 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6162 OptionalImmIndexMap &OptionalIdx) { 6163 unsigned Opc = Inst.getOpcode(); 6164 6165 unsigned I = 1; 6166 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6167 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6168 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6169 } 6170 6171 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6172 // This instruction has src modifiers 6173 for (unsigned E = Operands.size(); I != E; ++I) { 6174 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6175 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6176 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6177 } else if (Op.isImmModifier()) { 6178 OptionalIdx[Op.getImmTy()] = I; 6179 } else if (Op.isRegOrImm()) { 6180 Op.addRegOrImmOperands(Inst, 1); 6181 } else { 6182 llvm_unreachable("unhandled operand type"); 6183 } 6184 } 6185 } else { 6186 // No src modifiers 6187 for (unsigned E = Operands.size(); I != E; ++I) { 6188 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6189 if (Op.isMod()) { 6190 OptionalIdx[Op.getImmTy()] = I; 6191 } else { 6192 Op.addRegOrImmOperands(Inst, 1); 6193 } 6194 } 6195 } 6196 6197 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6198 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6199 } 6200 6201 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6202 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6203 } 6204 6205 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6206 // it has src2 register operand that is tied to dst operand 6207 // we don't allow modifiers for this operand in assembler so src2_modifiers 6208 // should be 0. 6209 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6210 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6211 Opc == AMDGPU::V_MAC_F32_e64_vi || 6212 Opc == AMDGPU::V_MAC_F16_e64_vi || 6213 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6214 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6215 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6216 auto it = Inst.begin(); 6217 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6218 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6219 ++it; 6220 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6221 } 6222 } 6223 6224 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6225 OptionalImmIndexMap OptionalIdx; 6226 cvtVOP3(Inst, Operands, OptionalIdx); 6227 } 6228 6229 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6230 const OperandVector &Operands) { 6231 OptionalImmIndexMap OptIdx; 6232 const int Opc = Inst.getOpcode(); 6233 const MCInstrDesc &Desc = MII.get(Opc); 6234 6235 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6236 6237 cvtVOP3(Inst, Operands, OptIdx); 6238 6239 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6240 assert(!IsPacked); 6241 Inst.addOperand(Inst.getOperand(0)); 6242 } 6243 6244 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6245 // instruction, and then figure out where to actually put the modifiers 6246 6247 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6248 6249 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6250 if (OpSelHiIdx != -1) { 6251 int DefaultVal = IsPacked ? -1 : 0; 6252 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6253 DefaultVal); 6254 } 6255 6256 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6257 if (NegLoIdx != -1) { 6258 assert(IsPacked); 6259 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6260 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6261 } 6262 6263 const int Ops[] = { AMDGPU::OpName::src0, 6264 AMDGPU::OpName::src1, 6265 AMDGPU::OpName::src2 }; 6266 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6267 AMDGPU::OpName::src1_modifiers, 6268 AMDGPU::OpName::src2_modifiers }; 6269 6270 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6271 6272 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6273 unsigned OpSelHi = 0; 6274 unsigned NegLo = 0; 6275 unsigned NegHi = 0; 6276 6277 if (OpSelHiIdx != -1) { 6278 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6279 } 6280 6281 if (NegLoIdx != -1) { 6282 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6283 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6284 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6285 } 6286 6287 for (int J = 0; J < 3; ++J) { 6288 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6289 if (OpIdx == -1) 6290 break; 6291 6292 uint32_t ModVal = 0; 6293 6294 if ((OpSel & (1 << J)) != 0) 6295 ModVal |= SISrcMods::OP_SEL_0; 6296 6297 if ((OpSelHi & (1 << J)) != 0) 6298 ModVal |= SISrcMods::OP_SEL_1; 6299 6300 if ((NegLo & (1 << J)) != 0) 6301 ModVal |= SISrcMods::NEG; 6302 6303 if ((NegHi & (1 << J)) != 0) 6304 ModVal |= SISrcMods::NEG_HI; 6305 6306 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6307 6308 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6309 } 6310 } 6311 6312 //===----------------------------------------------------------------------===// 6313 // dpp 6314 //===----------------------------------------------------------------------===// 6315 6316 bool AMDGPUOperand::isDPP8() const { 6317 return isImmTy(ImmTyDPP8); 6318 } 6319 6320 bool AMDGPUOperand::isDPPCtrl() const { 6321 using namespace AMDGPU::DPP; 6322 6323 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6324 if (result) { 6325 int64_t Imm = getImm(); 6326 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6327 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6328 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6329 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6330 (Imm == DppCtrl::WAVE_SHL1) || 6331 (Imm == DppCtrl::WAVE_ROL1) || 6332 (Imm == DppCtrl::WAVE_SHR1) || 6333 (Imm == DppCtrl::WAVE_ROR1) || 6334 (Imm == DppCtrl::ROW_MIRROR) || 6335 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6336 (Imm == DppCtrl::BCAST15) || 6337 (Imm == DppCtrl::BCAST31) || 6338 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6339 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6340 } 6341 return false; 6342 } 6343 6344 //===----------------------------------------------------------------------===// 6345 // mAI 6346 //===----------------------------------------------------------------------===// 6347 6348 bool AMDGPUOperand::isBLGP() const { 6349 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6350 } 6351 6352 bool AMDGPUOperand::isCBSZ() const { 6353 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6354 } 6355 6356 bool AMDGPUOperand::isABID() const { 6357 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6358 } 6359 6360 bool AMDGPUOperand::isS16Imm() const { 6361 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6362 } 6363 6364 bool AMDGPUOperand::isU16Imm() const { 6365 return isImm() && isUInt<16>(getImm()); 6366 } 6367 6368 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6369 if (!isGFX10()) 6370 return MatchOperand_NoMatch; 6371 6372 SMLoc S = Parser.getTok().getLoc(); 6373 6374 if (getLexer().isNot(AsmToken::Identifier)) 6375 return MatchOperand_NoMatch; 6376 if (getLexer().getTok().getString() != "dim") 6377 return MatchOperand_NoMatch; 6378 6379 Parser.Lex(); 6380 if (getLexer().isNot(AsmToken::Colon)) 6381 return MatchOperand_ParseFail; 6382 6383 Parser.Lex(); 6384 6385 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6386 // integer. 6387 std::string Token; 6388 if (getLexer().is(AsmToken::Integer)) { 6389 SMLoc Loc = getLexer().getTok().getEndLoc(); 6390 Token = getLexer().getTok().getString(); 6391 Parser.Lex(); 6392 if (getLexer().getTok().getLoc() != Loc) 6393 return MatchOperand_ParseFail; 6394 } 6395 if (getLexer().isNot(AsmToken::Identifier)) 6396 return MatchOperand_ParseFail; 6397 Token += getLexer().getTok().getString(); 6398 6399 StringRef DimId = Token; 6400 if (DimId.startswith("SQ_RSRC_IMG_")) 6401 DimId = DimId.substr(12); 6402 6403 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6404 if (!DimInfo) 6405 return MatchOperand_ParseFail; 6406 6407 Parser.Lex(); 6408 6409 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6410 AMDGPUOperand::ImmTyDim)); 6411 return MatchOperand_Success; 6412 } 6413 6414 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6415 SMLoc S = Parser.getTok().getLoc(); 6416 StringRef Prefix; 6417 6418 if (getLexer().getKind() == AsmToken::Identifier) { 6419 Prefix = Parser.getTok().getString(); 6420 } else { 6421 return MatchOperand_NoMatch; 6422 } 6423 6424 if (Prefix != "dpp8") 6425 return parseDPPCtrl(Operands); 6426 if (!isGFX10()) 6427 return MatchOperand_NoMatch; 6428 6429 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6430 6431 int64_t Sels[8]; 6432 6433 Parser.Lex(); 6434 if (getLexer().isNot(AsmToken::Colon)) 6435 return MatchOperand_ParseFail; 6436 6437 Parser.Lex(); 6438 if (getLexer().isNot(AsmToken::LBrac)) 6439 return MatchOperand_ParseFail; 6440 6441 Parser.Lex(); 6442 if (getParser().parseAbsoluteExpression(Sels[0])) 6443 return MatchOperand_ParseFail; 6444 if (0 > Sels[0] || 7 < Sels[0]) 6445 return MatchOperand_ParseFail; 6446 6447 for (size_t i = 1; i < 8; ++i) { 6448 if (getLexer().isNot(AsmToken::Comma)) 6449 return MatchOperand_ParseFail; 6450 6451 Parser.Lex(); 6452 if (getParser().parseAbsoluteExpression(Sels[i])) 6453 return MatchOperand_ParseFail; 6454 if (0 > Sels[i] || 7 < Sels[i]) 6455 return MatchOperand_ParseFail; 6456 } 6457 6458 if (getLexer().isNot(AsmToken::RBrac)) 6459 return MatchOperand_ParseFail; 6460 Parser.Lex(); 6461 6462 unsigned DPP8 = 0; 6463 for (size_t i = 0; i < 8; ++i) 6464 DPP8 |= (Sels[i] << (i * 3)); 6465 6466 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6467 return MatchOperand_Success; 6468 } 6469 6470 OperandMatchResultTy 6471 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6472 using namespace AMDGPU::DPP; 6473 6474 SMLoc S = Parser.getTok().getLoc(); 6475 StringRef Prefix; 6476 int64_t Int; 6477 6478 if (getLexer().getKind() == AsmToken::Identifier) { 6479 Prefix = Parser.getTok().getString(); 6480 } else { 6481 return MatchOperand_NoMatch; 6482 } 6483 6484 if (Prefix == "row_mirror") { 6485 Int = DppCtrl::ROW_MIRROR; 6486 Parser.Lex(); 6487 } else if (Prefix == "row_half_mirror") { 6488 Int = DppCtrl::ROW_HALF_MIRROR; 6489 Parser.Lex(); 6490 } else { 6491 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6492 if (Prefix != "quad_perm" 6493 && Prefix != "row_shl" 6494 && Prefix != "row_shr" 6495 && Prefix != "row_ror" 6496 && Prefix != "wave_shl" 6497 && Prefix != "wave_rol" 6498 && Prefix != "wave_shr" 6499 && Prefix != "wave_ror" 6500 && Prefix != "row_bcast" 6501 && Prefix != "row_share" 6502 && Prefix != "row_xmask") { 6503 return MatchOperand_NoMatch; 6504 } 6505 6506 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6507 return MatchOperand_NoMatch; 6508 6509 if (!isVI() && !isGFX9() && 6510 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6511 Prefix == "wave_rol" || Prefix == "wave_ror" || 6512 Prefix == "row_bcast")) 6513 return MatchOperand_NoMatch; 6514 6515 Parser.Lex(); 6516 if (getLexer().isNot(AsmToken::Colon)) 6517 return MatchOperand_ParseFail; 6518 6519 if (Prefix == "quad_perm") { 6520 // quad_perm:[%d,%d,%d,%d] 6521 Parser.Lex(); 6522 if (getLexer().isNot(AsmToken::LBrac)) 6523 return MatchOperand_ParseFail; 6524 Parser.Lex(); 6525 6526 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6527 return MatchOperand_ParseFail; 6528 6529 for (int i = 0; i < 3; ++i) { 6530 if (getLexer().isNot(AsmToken::Comma)) 6531 return MatchOperand_ParseFail; 6532 Parser.Lex(); 6533 6534 int64_t Temp; 6535 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6536 return MatchOperand_ParseFail; 6537 const int shift = i*2 + 2; 6538 Int += (Temp << shift); 6539 } 6540 6541 if (getLexer().isNot(AsmToken::RBrac)) 6542 return MatchOperand_ParseFail; 6543 Parser.Lex(); 6544 } else { 6545 // sel:%d 6546 Parser.Lex(); 6547 if (getParser().parseAbsoluteExpression(Int)) 6548 return MatchOperand_ParseFail; 6549 6550 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6551 Int |= DppCtrl::ROW_SHL0; 6552 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6553 Int |= DppCtrl::ROW_SHR0; 6554 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6555 Int |= DppCtrl::ROW_ROR0; 6556 } else if (Prefix == "wave_shl" && 1 == Int) { 6557 Int = DppCtrl::WAVE_SHL1; 6558 } else if (Prefix == "wave_rol" && 1 == Int) { 6559 Int = DppCtrl::WAVE_ROL1; 6560 } else if (Prefix == "wave_shr" && 1 == Int) { 6561 Int = DppCtrl::WAVE_SHR1; 6562 } else if (Prefix == "wave_ror" && 1 == Int) { 6563 Int = DppCtrl::WAVE_ROR1; 6564 } else if (Prefix == "row_bcast") { 6565 if (Int == 15) { 6566 Int = DppCtrl::BCAST15; 6567 } else if (Int == 31) { 6568 Int = DppCtrl::BCAST31; 6569 } else { 6570 return MatchOperand_ParseFail; 6571 } 6572 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6573 Int |= DppCtrl::ROW_SHARE_FIRST; 6574 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6575 Int |= DppCtrl::ROW_XMASK_FIRST; 6576 } else { 6577 return MatchOperand_ParseFail; 6578 } 6579 } 6580 } 6581 6582 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6583 return MatchOperand_Success; 6584 } 6585 6586 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6587 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6588 } 6589 6590 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6591 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6592 } 6593 6594 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6595 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6596 } 6597 6598 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6599 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6600 } 6601 6602 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6603 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6604 } 6605 6606 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6607 OptionalImmIndexMap OptionalIdx; 6608 6609 unsigned I = 1; 6610 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6611 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6612 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6613 } 6614 6615 int Fi = 0; 6616 for (unsigned E = Operands.size(); I != E; ++I) { 6617 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6618 MCOI::TIED_TO); 6619 if (TiedTo != -1) { 6620 assert((unsigned)TiedTo < Inst.getNumOperands()); 6621 // handle tied old or src2 for MAC instructions 6622 Inst.addOperand(Inst.getOperand(TiedTo)); 6623 } 6624 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6625 // Add the register arguments 6626 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6627 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6628 // Skip it. 6629 continue; 6630 } 6631 6632 if (IsDPP8) { 6633 if (Op.isDPP8()) { 6634 Op.addImmOperands(Inst, 1); 6635 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6636 Op.addRegWithFPInputModsOperands(Inst, 2); 6637 } else if (Op.isFI()) { 6638 Fi = Op.getImm(); 6639 } else if (Op.isReg()) { 6640 Op.addRegOperands(Inst, 1); 6641 } else { 6642 llvm_unreachable("Invalid operand type"); 6643 } 6644 } else { 6645 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6646 Op.addRegWithFPInputModsOperands(Inst, 2); 6647 } else if (Op.isDPPCtrl()) { 6648 Op.addImmOperands(Inst, 1); 6649 } else if (Op.isImm()) { 6650 // Handle optional arguments 6651 OptionalIdx[Op.getImmTy()] = I; 6652 } else { 6653 llvm_unreachable("Invalid operand type"); 6654 } 6655 } 6656 } 6657 6658 if (IsDPP8) { 6659 using namespace llvm::AMDGPU::DPP; 6660 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6661 } else { 6662 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6663 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6664 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6665 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6666 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6667 } 6668 } 6669 } 6670 6671 //===----------------------------------------------------------------------===// 6672 // sdwa 6673 //===----------------------------------------------------------------------===// 6674 6675 OperandMatchResultTy 6676 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6677 AMDGPUOperand::ImmTy Type) { 6678 using namespace llvm::AMDGPU::SDWA; 6679 6680 SMLoc S = Parser.getTok().getLoc(); 6681 StringRef Value; 6682 OperandMatchResultTy res; 6683 6684 res = parseStringWithPrefix(Prefix, Value); 6685 if (res != MatchOperand_Success) { 6686 return res; 6687 } 6688 6689 int64_t Int; 6690 Int = StringSwitch<int64_t>(Value) 6691 .Case("BYTE_0", SdwaSel::BYTE_0) 6692 .Case("BYTE_1", SdwaSel::BYTE_1) 6693 .Case("BYTE_2", SdwaSel::BYTE_2) 6694 .Case("BYTE_3", SdwaSel::BYTE_3) 6695 .Case("WORD_0", SdwaSel::WORD_0) 6696 .Case("WORD_1", SdwaSel::WORD_1) 6697 .Case("DWORD", SdwaSel::DWORD) 6698 .Default(0xffffffff); 6699 Parser.Lex(); // eat last token 6700 6701 if (Int == 0xffffffff) { 6702 return MatchOperand_ParseFail; 6703 } 6704 6705 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6706 return MatchOperand_Success; 6707 } 6708 6709 OperandMatchResultTy 6710 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6711 using namespace llvm::AMDGPU::SDWA; 6712 6713 SMLoc S = Parser.getTok().getLoc(); 6714 StringRef Value; 6715 OperandMatchResultTy res; 6716 6717 res = parseStringWithPrefix("dst_unused", Value); 6718 if (res != MatchOperand_Success) { 6719 return res; 6720 } 6721 6722 int64_t Int; 6723 Int = StringSwitch<int64_t>(Value) 6724 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6725 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6726 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6727 .Default(0xffffffff); 6728 Parser.Lex(); // eat last token 6729 6730 if (Int == 0xffffffff) { 6731 return MatchOperand_ParseFail; 6732 } 6733 6734 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6735 return MatchOperand_Success; 6736 } 6737 6738 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6739 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6740 } 6741 6742 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6743 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6744 } 6745 6746 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6747 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6748 } 6749 6750 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6751 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6752 } 6753 6754 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6755 uint64_t BasicInstType, bool skipVcc) { 6756 using namespace llvm::AMDGPU::SDWA; 6757 6758 OptionalImmIndexMap OptionalIdx; 6759 bool skippedVcc = false; 6760 6761 unsigned I = 1; 6762 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6763 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6764 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6765 } 6766 6767 for (unsigned E = Operands.size(); I != E; ++I) { 6768 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6769 if (skipVcc && !skippedVcc && Op.isReg() && 6770 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6771 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6772 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6773 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6774 // Skip VCC only if we didn't skip it on previous iteration. 6775 if (BasicInstType == SIInstrFlags::VOP2 && 6776 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6777 skippedVcc = true; 6778 continue; 6779 } else if (BasicInstType == SIInstrFlags::VOPC && 6780 Inst.getNumOperands() == 0) { 6781 skippedVcc = true; 6782 continue; 6783 } 6784 } 6785 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6786 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6787 } else if (Op.isImm()) { 6788 // Handle optional arguments 6789 OptionalIdx[Op.getImmTy()] = I; 6790 } else { 6791 llvm_unreachable("Invalid operand type"); 6792 } 6793 skippedVcc = false; 6794 } 6795 6796 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6797 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6798 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6799 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6800 switch (BasicInstType) { 6801 case SIInstrFlags::VOP1: 6802 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6803 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6804 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6805 } 6806 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6807 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6808 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6809 break; 6810 6811 case SIInstrFlags::VOP2: 6812 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6813 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6814 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6815 } 6816 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6817 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6818 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6819 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6820 break; 6821 6822 case SIInstrFlags::VOPC: 6823 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6824 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6825 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6826 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6827 break; 6828 6829 default: 6830 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6831 } 6832 } 6833 6834 // special case v_mac_{f16, f32}: 6835 // it has src2 register operand that is tied to dst operand 6836 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6837 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6838 auto it = Inst.begin(); 6839 std::advance( 6840 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6841 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6842 } 6843 } 6844 6845 //===----------------------------------------------------------------------===// 6846 // mAI 6847 //===----------------------------------------------------------------------===// 6848 6849 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6850 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6851 } 6852 6853 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6854 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6855 } 6856 6857 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6858 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6859 } 6860 6861 /// Force static initialization. 6862 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6863 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6864 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6865 } 6866 6867 #define GET_REGISTER_MATCHER 6868 #define GET_MATCHER_IMPLEMENTATION 6869 #define GET_MNEMONIC_SPELL_CHECKER 6870 #include "AMDGPUGenAsmMatcher.inc" 6871 6872 // This fuction should be defined after auto-generated include so that we have 6873 // MatchClassKind enum defined 6874 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6875 unsigned Kind) { 6876 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6877 // But MatchInstructionImpl() expects to meet token and fails to validate 6878 // operand. This method checks if we are given immediate operand but expect to 6879 // get corresponding token. 6880 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6881 switch (Kind) { 6882 case MCK_addr64: 6883 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6884 case MCK_gds: 6885 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6886 case MCK_lds: 6887 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6888 case MCK_glc: 6889 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6890 case MCK_idxen: 6891 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6892 case MCK_offen: 6893 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6894 case MCK_SSrcB32: 6895 // When operands have expression values, they will return true for isToken, 6896 // because it is not possible to distinguish between a token and an 6897 // expression at parse time. MatchInstructionImpl() will always try to 6898 // match an operand as a token, when isToken returns true, and when the 6899 // name of the expression is not a valid token, the match will fail, 6900 // so we need to handle it here. 6901 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6902 case MCK_SSrcF32: 6903 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6904 case MCK_SoppBrTarget: 6905 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6906 case MCK_VReg32OrOff: 6907 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6908 case MCK_InterpSlot: 6909 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6910 case MCK_Attr: 6911 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6912 case MCK_AttrChan: 6913 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6914 default: 6915 return Match_InvalidOperand; 6916 } 6917 } 6918 6919 //===----------------------------------------------------------------------===// 6920 // endpgm 6921 //===----------------------------------------------------------------------===// 6922 6923 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6924 SMLoc S = Parser.getTok().getLoc(); 6925 int64_t Imm = 0; 6926 6927 if (!parseExpr(Imm)) { 6928 // The operand is optional, if not present default to 0 6929 Imm = 0; 6930 } 6931 6932 if (!isUInt<16>(Imm)) { 6933 Error(S, "expected a 16-bit value"); 6934 return MatchOperand_ParseFail; 6935 } 6936 6937 Operands.push_back( 6938 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 6939 return MatchOperand_Success; 6940 } 6941 6942 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 6943