1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTyTFE, 147 ImmTyD16, 148 ImmTyClampSI, 149 ImmTyOModSI, 150 ImmTyDPP8, 151 ImmTyDppCtrl, 152 ImmTyDppRowMask, 153 ImmTyDppBankMask, 154 ImmTyDppBoundCtrl, 155 ImmTyDppFi, 156 ImmTySdwaDstSel, 157 ImmTySdwaSrc0Sel, 158 ImmTySdwaSrc1Sel, 159 ImmTySdwaDstUnused, 160 ImmTyDMask, 161 ImmTyDim, 162 ImmTyUNorm, 163 ImmTyDA, 164 ImmTyR128A16, 165 ImmTyLWE, 166 ImmTyExpTgt, 167 ImmTyExpCompr, 168 ImmTyExpVM, 169 ImmTyFORMAT, 170 ImmTyHwreg, 171 ImmTyOff, 172 ImmTySendMsg, 173 ImmTyInterpSlot, 174 ImmTyInterpAttr, 175 ImmTyAttrChan, 176 ImmTyOpSel, 177 ImmTyOpSelHi, 178 ImmTyNegLo, 179 ImmTyNegHi, 180 ImmTySwizzle, 181 ImmTyGprIdxMode, 182 ImmTyHigh, 183 ImmTyBLGP, 184 ImmTyCBSZ, 185 ImmTyABID, 186 ImmTyEndpgm, 187 }; 188 189 private: 190 struct TokOp { 191 const char *Data; 192 unsigned Length; 193 }; 194 195 struct ImmOp { 196 int64_t Val; 197 ImmTy Type; 198 bool IsFPImm; 199 Modifiers Mods; 200 }; 201 202 struct RegOp { 203 unsigned RegNo; 204 Modifiers Mods; 205 }; 206 207 union { 208 TokOp Tok; 209 ImmOp Imm; 210 RegOp Reg; 211 const MCExpr *Expr; 212 }; 213 214 public: 215 bool isToken() const override { 216 if (Kind == Token) 217 return true; 218 219 // When parsing operands, we can't always tell if something was meant to be 220 // a token, like 'gds', or an expression that references a global variable. 221 // In this case, we assume the string is an expression, and if we need to 222 // interpret is a token, then we treat the symbol name as the token. 223 return isSymbolRefExpr(); 224 } 225 226 bool isSymbolRefExpr() const { 227 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 228 } 229 230 bool isImm() const override { 231 return Kind == Immediate; 232 } 233 234 bool isInlinableImm(MVT type) const; 235 bool isLiteralImm(MVT type) const; 236 237 bool isRegKind() const { 238 return Kind == Register; 239 } 240 241 bool isReg() const override { 242 return isRegKind() && !hasModifiers(); 243 } 244 245 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 246 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 247 } 248 249 bool isRegOrImmWithInt16InputMods() const { 250 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 251 } 252 253 bool isRegOrImmWithInt32InputMods() const { 254 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 255 } 256 257 bool isRegOrImmWithInt64InputMods() const { 258 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 259 } 260 261 bool isRegOrImmWithFP16InputMods() const { 262 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 263 } 264 265 bool isRegOrImmWithFP32InputMods() const { 266 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 267 } 268 269 bool isRegOrImmWithFP64InputMods() const { 270 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 271 } 272 273 bool isVReg() const { 274 return isRegClass(AMDGPU::VGPR_32RegClassID) || 275 isRegClass(AMDGPU::VReg_64RegClassID) || 276 isRegClass(AMDGPU::VReg_96RegClassID) || 277 isRegClass(AMDGPU::VReg_128RegClassID) || 278 isRegClass(AMDGPU::VReg_160RegClassID) || 279 isRegClass(AMDGPU::VReg_256RegClassID) || 280 isRegClass(AMDGPU::VReg_512RegClassID) || 281 isRegClass(AMDGPU::VReg_1024RegClassID); 282 } 283 284 bool isVReg32() const { 285 return isRegClass(AMDGPU::VGPR_32RegClassID); 286 } 287 288 bool isVReg32OrOff() const { 289 return isOff() || isVReg32(); 290 } 291 292 bool isSDWAOperand(MVT type) const; 293 bool isSDWAFP16Operand() const; 294 bool isSDWAFP32Operand() const; 295 bool isSDWAInt16Operand() const; 296 bool isSDWAInt32Operand() const; 297 298 bool isImmTy(ImmTy ImmT) const { 299 return isImm() && Imm.Type == ImmT; 300 } 301 302 bool isImmModifier() const { 303 return isImm() && Imm.Type != ImmTyNone; 304 } 305 306 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 307 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 308 bool isDMask() const { return isImmTy(ImmTyDMask); } 309 bool isDim() const { return isImmTy(ImmTyDim); } 310 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 311 bool isDA() const { return isImmTy(ImmTyDA); } 312 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 313 bool isLWE() const { return isImmTy(ImmTyLWE); } 314 bool isOff() const { return isImmTy(ImmTyOff); } 315 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 316 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 317 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 318 bool isOffen() const { return isImmTy(ImmTyOffen); } 319 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 320 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 321 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 322 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 323 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 324 325 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 326 bool isGDS() const { return isImmTy(ImmTyGDS); } 327 bool isLDS() const { return isImmTy(ImmTyLDS); } 328 bool isDLC() const { return isImmTy(ImmTyDLC); } 329 bool isGLC() const { return isImmTy(ImmTyGLC); } 330 bool isSLC() const { return isImmTy(ImmTySLC); } 331 bool isTFE() const { return isImmTy(ImmTyTFE); } 332 bool isD16() const { return isImmTy(ImmTyD16); } 333 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 334 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 335 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 336 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 337 bool isFI() const { return isImmTy(ImmTyDppFi); } 338 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 339 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 340 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 341 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 342 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 343 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 344 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 345 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 346 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 347 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 348 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 349 bool isHigh() const { return isImmTy(ImmTyHigh); } 350 351 bool isMod() const { 352 return isClampSI() || isOModSI(); 353 } 354 355 bool isRegOrImm() const { 356 return isReg() || isImm(); 357 } 358 359 bool isRegClass(unsigned RCID) const; 360 361 bool isInlineValue() const; 362 363 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 364 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 365 } 366 367 bool isSCSrcB16() const { 368 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 369 } 370 371 bool isSCSrcV2B16() const { 372 return isSCSrcB16(); 373 } 374 375 bool isSCSrcB32() const { 376 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 377 } 378 379 bool isSCSrcB64() const { 380 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 381 } 382 383 bool isBoolReg() const; 384 385 bool isSCSrcF16() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 387 } 388 389 bool isSCSrcV2F16() const { 390 return isSCSrcF16(); 391 } 392 393 bool isSCSrcF32() const { 394 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 395 } 396 397 bool isSCSrcF64() const { 398 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 399 } 400 401 bool isSSrcB32() const { 402 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 403 } 404 405 bool isSSrcB16() const { 406 return isSCSrcB16() || isLiteralImm(MVT::i16); 407 } 408 409 bool isSSrcV2B16() const { 410 llvm_unreachable("cannot happen"); 411 return isSSrcB16(); 412 } 413 414 bool isSSrcB64() const { 415 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 416 // See isVSrc64(). 417 return isSCSrcB64() || isLiteralImm(MVT::i64); 418 } 419 420 bool isSSrcF32() const { 421 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 422 } 423 424 bool isSSrcF64() const { 425 return isSCSrcB64() || isLiteralImm(MVT::f64); 426 } 427 428 bool isSSrcF16() const { 429 return isSCSrcB16() || isLiteralImm(MVT::f16); 430 } 431 432 bool isSSrcV2F16() const { 433 llvm_unreachable("cannot happen"); 434 return isSSrcF16(); 435 } 436 437 bool isSSrcOrLdsB32() const { 438 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 439 isLiteralImm(MVT::i32) || isExpr(); 440 } 441 442 bool isVCSrcB32() const { 443 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 444 } 445 446 bool isVCSrcB64() const { 447 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 448 } 449 450 bool isVCSrcB16() const { 451 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 452 } 453 454 bool isVCSrcV2B16() const { 455 return isVCSrcB16(); 456 } 457 458 bool isVCSrcF32() const { 459 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 460 } 461 462 bool isVCSrcF64() const { 463 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 464 } 465 466 bool isVCSrcF16() const { 467 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 468 } 469 470 bool isVCSrcV2F16() const { 471 return isVCSrcF16(); 472 } 473 474 bool isVSrcB32() const { 475 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 476 } 477 478 bool isVSrcB64() const { 479 return isVCSrcF64() || isLiteralImm(MVT::i64); 480 } 481 482 bool isVSrcB16() const { 483 return isVCSrcF16() || isLiteralImm(MVT::i16); 484 } 485 486 bool isVSrcV2B16() const { 487 return isVSrcB16() || isLiteralImm(MVT::v2i16); 488 } 489 490 bool isVSrcF32() const { 491 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 492 } 493 494 bool isVSrcF64() const { 495 return isVCSrcF64() || isLiteralImm(MVT::f64); 496 } 497 498 bool isVSrcF16() const { 499 return isVCSrcF16() || isLiteralImm(MVT::f16); 500 } 501 502 bool isVSrcV2F16() const { 503 return isVSrcF16() || isLiteralImm(MVT::v2f16); 504 } 505 506 bool isVISrcB32() const { 507 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 508 } 509 510 bool isVISrcB16() const { 511 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 512 } 513 514 bool isVISrcV2B16() const { 515 return isVISrcB16(); 516 } 517 518 bool isVISrcF32() const { 519 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 520 } 521 522 bool isVISrcF16() const { 523 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 524 } 525 526 bool isVISrcV2F16() const { 527 return isVISrcF16() || isVISrcB32(); 528 } 529 530 bool isAISrcB32() const { 531 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 532 } 533 534 bool isAISrcB16() const { 535 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 536 } 537 538 bool isAISrcV2B16() const { 539 return isAISrcB16(); 540 } 541 542 bool isAISrcF32() const { 543 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 544 } 545 546 bool isAISrcF16() const { 547 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 548 } 549 550 bool isAISrcV2F16() const { 551 return isAISrcF16() || isAISrcB32(); 552 } 553 554 bool isAISrc_128B32() const { 555 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 556 } 557 558 bool isAISrc_128B16() const { 559 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 560 } 561 562 bool isAISrc_128V2B16() const { 563 return isAISrc_128B16(); 564 } 565 566 bool isAISrc_128F32() const { 567 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 568 } 569 570 bool isAISrc_128F16() const { 571 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 572 } 573 574 bool isAISrc_128V2F16() const { 575 return isAISrc_128F16() || isAISrc_128B32(); 576 } 577 578 bool isAISrc_512B32() const { 579 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 580 } 581 582 bool isAISrc_512B16() const { 583 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 584 } 585 586 bool isAISrc_512V2B16() const { 587 return isAISrc_512B16(); 588 } 589 590 bool isAISrc_512F32() const { 591 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 592 } 593 594 bool isAISrc_512F16() const { 595 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 596 } 597 598 bool isAISrc_512V2F16() const { 599 return isAISrc_512F16() || isAISrc_512B32(); 600 } 601 602 bool isAISrc_1024B32() const { 603 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 604 } 605 606 bool isAISrc_1024B16() const { 607 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 608 } 609 610 bool isAISrc_1024V2B16() const { 611 return isAISrc_1024B16(); 612 } 613 614 bool isAISrc_1024F32() const { 615 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 616 } 617 618 bool isAISrc_1024F16() const { 619 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 620 } 621 622 bool isAISrc_1024V2F16() const { 623 return isAISrc_1024F16() || isAISrc_1024B32(); 624 } 625 626 bool isKImmFP32() const { 627 return isLiteralImm(MVT::f32); 628 } 629 630 bool isKImmFP16() const { 631 return isLiteralImm(MVT::f16); 632 } 633 634 bool isMem() const override { 635 return false; 636 } 637 638 bool isExpr() const { 639 return Kind == Expression; 640 } 641 642 bool isSoppBrTarget() const { 643 return isExpr() || isImm(); 644 } 645 646 bool isSWaitCnt() const; 647 bool isHwreg() const; 648 bool isSendMsg() const; 649 bool isSwizzle() const; 650 bool isSMRDOffset8() const; 651 bool isSMRDOffset20() const; 652 bool isSMRDLiteralOffset() const; 653 bool isDPP8() const; 654 bool isDPPCtrl() const; 655 bool isBLGP() const; 656 bool isCBSZ() const; 657 bool isABID() const; 658 bool isGPRIdxMode() const; 659 bool isS16Imm() const; 660 bool isU16Imm() const; 661 bool isEndpgm() const; 662 663 StringRef getExpressionAsToken() const { 664 assert(isExpr()); 665 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 666 return S->getSymbol().getName(); 667 } 668 669 StringRef getToken() const { 670 assert(isToken()); 671 672 if (Kind == Expression) 673 return getExpressionAsToken(); 674 675 return StringRef(Tok.Data, Tok.Length); 676 } 677 678 int64_t getImm() const { 679 assert(isImm()); 680 return Imm.Val; 681 } 682 683 ImmTy getImmTy() const { 684 assert(isImm()); 685 return Imm.Type; 686 } 687 688 unsigned getReg() const override { 689 assert(isRegKind()); 690 return Reg.RegNo; 691 } 692 693 SMLoc getStartLoc() const override { 694 return StartLoc; 695 } 696 697 SMLoc getEndLoc() const override { 698 return EndLoc; 699 } 700 701 SMRange getLocRange() const { 702 return SMRange(StartLoc, EndLoc); 703 } 704 705 Modifiers getModifiers() const { 706 assert(isRegKind() || isImmTy(ImmTyNone)); 707 return isRegKind() ? Reg.Mods : Imm.Mods; 708 } 709 710 void setModifiers(Modifiers Mods) { 711 assert(isRegKind() || isImmTy(ImmTyNone)); 712 if (isRegKind()) 713 Reg.Mods = Mods; 714 else 715 Imm.Mods = Mods; 716 } 717 718 bool hasModifiers() const { 719 return getModifiers().hasModifiers(); 720 } 721 722 bool hasFPModifiers() const { 723 return getModifiers().hasFPModifiers(); 724 } 725 726 bool hasIntModifiers() const { 727 return getModifiers().hasIntModifiers(); 728 } 729 730 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 731 732 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 733 734 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 735 736 template <unsigned Bitwidth> 737 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 738 739 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 740 addKImmFPOperands<16>(Inst, N); 741 } 742 743 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 744 addKImmFPOperands<32>(Inst, N); 745 } 746 747 void addRegOperands(MCInst &Inst, unsigned N) const; 748 749 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 750 addRegOperands(Inst, N); 751 } 752 753 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 754 if (isRegKind()) 755 addRegOperands(Inst, N); 756 else if (isExpr()) 757 Inst.addOperand(MCOperand::createExpr(Expr)); 758 else 759 addImmOperands(Inst, N); 760 } 761 762 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 763 Modifiers Mods = getModifiers(); 764 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 765 if (isRegKind()) { 766 addRegOperands(Inst, N); 767 } else { 768 addImmOperands(Inst, N, false); 769 } 770 } 771 772 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 773 assert(!hasIntModifiers()); 774 addRegOrImmWithInputModsOperands(Inst, N); 775 } 776 777 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 778 assert(!hasFPModifiers()); 779 addRegOrImmWithInputModsOperands(Inst, N); 780 } 781 782 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 783 Modifiers Mods = getModifiers(); 784 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 785 assert(isRegKind()); 786 addRegOperands(Inst, N); 787 } 788 789 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 790 assert(!hasIntModifiers()); 791 addRegWithInputModsOperands(Inst, N); 792 } 793 794 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 795 assert(!hasFPModifiers()); 796 addRegWithInputModsOperands(Inst, N); 797 } 798 799 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 800 if (isImm()) 801 addImmOperands(Inst, N); 802 else { 803 assert(isExpr()); 804 Inst.addOperand(MCOperand::createExpr(Expr)); 805 } 806 } 807 808 static void printImmTy(raw_ostream& OS, ImmTy Type) { 809 switch (Type) { 810 case ImmTyNone: OS << "None"; break; 811 case ImmTyGDS: OS << "GDS"; break; 812 case ImmTyLDS: OS << "LDS"; break; 813 case ImmTyOffen: OS << "Offen"; break; 814 case ImmTyIdxen: OS << "Idxen"; break; 815 case ImmTyAddr64: OS << "Addr64"; break; 816 case ImmTyOffset: OS << "Offset"; break; 817 case ImmTyInstOffset: OS << "InstOffset"; break; 818 case ImmTyOffset0: OS << "Offset0"; break; 819 case ImmTyOffset1: OS << "Offset1"; break; 820 case ImmTyDLC: OS << "DLC"; break; 821 case ImmTyGLC: OS << "GLC"; break; 822 case ImmTySLC: OS << "SLC"; break; 823 case ImmTyTFE: OS << "TFE"; break; 824 case ImmTyD16: OS << "D16"; break; 825 case ImmTyFORMAT: OS << "FORMAT"; break; 826 case ImmTyClampSI: OS << "ClampSI"; break; 827 case ImmTyOModSI: OS << "OModSI"; break; 828 case ImmTyDPP8: OS << "DPP8"; break; 829 case ImmTyDppCtrl: OS << "DppCtrl"; break; 830 case ImmTyDppRowMask: OS << "DppRowMask"; break; 831 case ImmTyDppBankMask: OS << "DppBankMask"; break; 832 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 833 case ImmTyDppFi: OS << "FI"; break; 834 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 835 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 836 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 837 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 838 case ImmTyDMask: OS << "DMask"; break; 839 case ImmTyDim: OS << "Dim"; break; 840 case ImmTyUNorm: OS << "UNorm"; break; 841 case ImmTyDA: OS << "DA"; break; 842 case ImmTyR128A16: OS << "R128A16"; break; 843 case ImmTyLWE: OS << "LWE"; break; 844 case ImmTyOff: OS << "Off"; break; 845 case ImmTyExpTgt: OS << "ExpTgt"; break; 846 case ImmTyExpCompr: OS << "ExpCompr"; break; 847 case ImmTyExpVM: OS << "ExpVM"; break; 848 case ImmTyHwreg: OS << "Hwreg"; break; 849 case ImmTySendMsg: OS << "SendMsg"; break; 850 case ImmTyInterpSlot: OS << "InterpSlot"; break; 851 case ImmTyInterpAttr: OS << "InterpAttr"; break; 852 case ImmTyAttrChan: OS << "AttrChan"; break; 853 case ImmTyOpSel: OS << "OpSel"; break; 854 case ImmTyOpSelHi: OS << "OpSelHi"; break; 855 case ImmTyNegLo: OS << "NegLo"; break; 856 case ImmTyNegHi: OS << "NegHi"; break; 857 case ImmTySwizzle: OS << "Swizzle"; break; 858 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 859 case ImmTyHigh: OS << "High"; break; 860 case ImmTyBLGP: OS << "BLGP"; break; 861 case ImmTyCBSZ: OS << "CBSZ"; break; 862 case ImmTyABID: OS << "ABID"; break; 863 case ImmTyEndpgm: OS << "Endpgm"; break; 864 } 865 } 866 867 void print(raw_ostream &OS) const override { 868 switch (Kind) { 869 case Register: 870 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 871 break; 872 case Immediate: 873 OS << '<' << getImm(); 874 if (getImmTy() != ImmTyNone) { 875 OS << " type: "; printImmTy(OS, getImmTy()); 876 } 877 OS << " mods: " << Imm.Mods << '>'; 878 break; 879 case Token: 880 OS << '\'' << getToken() << '\''; 881 break; 882 case Expression: 883 OS << "<expr " << *Expr << '>'; 884 break; 885 } 886 } 887 888 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 889 int64_t Val, SMLoc Loc, 890 ImmTy Type = ImmTyNone, 891 bool IsFPImm = false) { 892 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 893 Op->Imm.Val = Val; 894 Op->Imm.IsFPImm = IsFPImm; 895 Op->Imm.Type = Type; 896 Op->Imm.Mods = Modifiers(); 897 Op->StartLoc = Loc; 898 Op->EndLoc = Loc; 899 return Op; 900 } 901 902 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 903 StringRef Str, SMLoc Loc, 904 bool HasExplicitEncodingSize = true) { 905 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 906 Res->Tok.Data = Str.data(); 907 Res->Tok.Length = Str.size(); 908 Res->StartLoc = Loc; 909 Res->EndLoc = Loc; 910 return Res; 911 } 912 913 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 914 unsigned RegNo, SMLoc S, 915 SMLoc E) { 916 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 917 Op->Reg.RegNo = RegNo; 918 Op->Reg.Mods = Modifiers(); 919 Op->StartLoc = S; 920 Op->EndLoc = E; 921 return Op; 922 } 923 924 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 925 const class MCExpr *Expr, SMLoc S) { 926 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 927 Op->Expr = Expr; 928 Op->StartLoc = S; 929 Op->EndLoc = S; 930 return Op; 931 } 932 }; 933 934 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 935 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 936 return OS; 937 } 938 939 //===----------------------------------------------------------------------===// 940 // AsmParser 941 //===----------------------------------------------------------------------===// 942 943 // Holds info related to the current kernel, e.g. count of SGPRs used. 944 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 945 // .amdgpu_hsa_kernel or at EOF. 946 class KernelScopeInfo { 947 int SgprIndexUnusedMin = -1; 948 int VgprIndexUnusedMin = -1; 949 MCContext *Ctx = nullptr; 950 951 void usesSgprAt(int i) { 952 if (i >= SgprIndexUnusedMin) { 953 SgprIndexUnusedMin = ++i; 954 if (Ctx) { 955 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 956 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 957 } 958 } 959 } 960 961 void usesVgprAt(int i) { 962 if (i >= VgprIndexUnusedMin) { 963 VgprIndexUnusedMin = ++i; 964 if (Ctx) { 965 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 966 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 967 } 968 } 969 } 970 971 public: 972 KernelScopeInfo() = default; 973 974 void initialize(MCContext &Context) { 975 Ctx = &Context; 976 usesSgprAt(SgprIndexUnusedMin = -1); 977 usesVgprAt(VgprIndexUnusedMin = -1); 978 } 979 980 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 981 switch (RegKind) { 982 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 983 case IS_AGPR: // fall through 984 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 985 default: break; 986 } 987 } 988 }; 989 990 class AMDGPUAsmParser : public MCTargetAsmParser { 991 MCAsmParser &Parser; 992 993 // Number of extra operands parsed after the first optional operand. 994 // This may be necessary to skip hardcoded mandatory operands. 995 static const unsigned MAX_OPR_LOOKAHEAD = 8; 996 997 unsigned ForcedEncodingSize = 0; 998 bool ForcedDPP = false; 999 bool ForcedSDWA = false; 1000 KernelScopeInfo KernelScope; 1001 1002 /// @name Auto-generated Match Functions 1003 /// { 1004 1005 #define GET_ASSEMBLER_HEADER 1006 #include "AMDGPUGenAsmMatcher.inc" 1007 1008 /// } 1009 1010 private: 1011 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1012 bool OutOfRangeError(SMRange Range); 1013 /// Calculate VGPR/SGPR blocks required for given target, reserved 1014 /// registers, and user-specified NextFreeXGPR values. 1015 /// 1016 /// \param Features [in] Target features, used for bug corrections. 1017 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1018 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1019 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1020 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1021 /// descriptor field, if valid. 1022 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1023 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1024 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1025 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1026 /// \param VGPRBlocks [out] Result VGPR block count. 1027 /// \param SGPRBlocks [out] Result SGPR block count. 1028 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1029 bool FlatScrUsed, bool XNACKUsed, 1030 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1031 SMRange VGPRRange, unsigned NextFreeSGPR, 1032 SMRange SGPRRange, unsigned &VGPRBlocks, 1033 unsigned &SGPRBlocks); 1034 bool ParseDirectiveAMDGCNTarget(); 1035 bool ParseDirectiveAMDHSAKernel(); 1036 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1037 bool ParseDirectiveHSACodeObjectVersion(); 1038 bool ParseDirectiveHSACodeObjectISA(); 1039 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1040 bool ParseDirectiveAMDKernelCodeT(); 1041 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1042 bool ParseDirectiveAMDGPUHsaKernel(); 1043 1044 bool ParseDirectiveISAVersion(); 1045 bool ParseDirectiveHSAMetadata(); 1046 bool ParseDirectivePALMetadataBegin(); 1047 bool ParseDirectivePALMetadata(); 1048 bool ParseDirectiveAMDGPULDS(); 1049 1050 /// Common code to parse out a block of text (typically YAML) between start and 1051 /// end directives. 1052 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1053 const char *AssemblerDirectiveEnd, 1054 std::string &CollectString); 1055 1056 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1057 RegisterKind RegKind, unsigned Reg1); 1058 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1059 unsigned& RegNum, unsigned& RegWidth); 1060 unsigned ParseRegularReg(RegisterKind &RegKind, 1061 unsigned &RegNum, 1062 unsigned &RegWidth); 1063 unsigned ParseSpecialReg(RegisterKind &RegKind, 1064 unsigned &RegNum, 1065 unsigned &RegWidth); 1066 unsigned ParseRegList(RegisterKind &RegKind, 1067 unsigned &RegNum, 1068 unsigned &RegWidth); 1069 bool ParseRegRange(unsigned& Num, unsigned& Width); 1070 unsigned getRegularReg(RegisterKind RegKind, 1071 unsigned RegNum, 1072 unsigned RegWidth); 1073 1074 bool isRegister(); 1075 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1076 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1077 void initializeGprCountSymbol(RegisterKind RegKind); 1078 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1079 unsigned RegWidth); 1080 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1081 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1082 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1083 bool IsGdsHardcoded); 1084 1085 public: 1086 enum AMDGPUMatchResultTy { 1087 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1088 }; 1089 enum OperandMode { 1090 OperandMode_Default, 1091 OperandMode_NSA, 1092 }; 1093 1094 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1095 1096 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1097 const MCInstrInfo &MII, 1098 const MCTargetOptions &Options) 1099 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1100 MCAsmParserExtension::Initialize(Parser); 1101 1102 if (getFeatureBits().none()) { 1103 // Set default features. 1104 copySTI().ToggleFeature("southern-islands"); 1105 } 1106 1107 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1108 1109 { 1110 // TODO: make those pre-defined variables read-only. 1111 // Currently there is none suitable machinery in the core llvm-mc for this. 1112 // MCSymbol::isRedefinable is intended for another purpose, and 1113 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1114 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1115 MCContext &Ctx = getContext(); 1116 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1117 MCSymbol *Sym = 1118 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1119 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1120 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1121 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1122 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1123 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1124 } else { 1125 MCSymbol *Sym = 1126 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1127 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1128 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1129 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1130 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1131 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1132 } 1133 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1134 initializeGprCountSymbol(IS_VGPR); 1135 initializeGprCountSymbol(IS_SGPR); 1136 } else 1137 KernelScope.initialize(getContext()); 1138 } 1139 } 1140 1141 bool hasXNACK() const { 1142 return AMDGPU::hasXNACK(getSTI()); 1143 } 1144 1145 bool hasMIMG_R128() const { 1146 return AMDGPU::hasMIMG_R128(getSTI()); 1147 } 1148 1149 bool hasPackedD16() const { 1150 return AMDGPU::hasPackedD16(getSTI()); 1151 } 1152 1153 bool isSI() const { 1154 return AMDGPU::isSI(getSTI()); 1155 } 1156 1157 bool isCI() const { 1158 return AMDGPU::isCI(getSTI()); 1159 } 1160 1161 bool isVI() const { 1162 return AMDGPU::isVI(getSTI()); 1163 } 1164 1165 bool isGFX9() const { 1166 return AMDGPU::isGFX9(getSTI()); 1167 } 1168 1169 bool isGFX10() const { 1170 return AMDGPU::isGFX10(getSTI()); 1171 } 1172 1173 bool hasInv2PiInlineImm() const { 1174 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1175 } 1176 1177 bool hasFlatOffsets() const { 1178 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1179 } 1180 1181 bool hasSGPR102_SGPR103() const { 1182 return !isVI() && !isGFX9(); 1183 } 1184 1185 bool hasSGPR104_SGPR105() const { 1186 return isGFX10(); 1187 } 1188 1189 bool hasIntClamp() const { 1190 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1191 } 1192 1193 AMDGPUTargetStreamer &getTargetStreamer() { 1194 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1195 return static_cast<AMDGPUTargetStreamer &>(TS); 1196 } 1197 1198 const MCRegisterInfo *getMRI() const { 1199 // We need this const_cast because for some reason getContext() is not const 1200 // in MCAsmParser. 1201 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1202 } 1203 1204 const MCInstrInfo *getMII() const { 1205 return &MII; 1206 } 1207 1208 const FeatureBitset &getFeatureBits() const { 1209 return getSTI().getFeatureBits(); 1210 } 1211 1212 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1213 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1214 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1215 1216 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1217 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1218 bool isForcedDPP() const { return ForcedDPP; } 1219 bool isForcedSDWA() const { return ForcedSDWA; } 1220 ArrayRef<unsigned> getMatchedVariants() const; 1221 1222 std::unique_ptr<AMDGPUOperand> parseRegister(); 1223 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1224 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1225 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1226 unsigned Kind) override; 1227 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1228 OperandVector &Operands, MCStreamer &Out, 1229 uint64_t &ErrorInfo, 1230 bool MatchingInlineAsm) override; 1231 bool ParseDirective(AsmToken DirectiveID) override; 1232 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1233 OperandMode Mode = OperandMode_Default); 1234 StringRef parseMnemonicSuffix(StringRef Name); 1235 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1236 SMLoc NameLoc, OperandVector &Operands) override; 1237 //bool ProcessInstruction(MCInst &Inst); 1238 1239 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1240 1241 OperandMatchResultTy 1242 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1243 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1244 bool (*ConvertResult)(int64_t &) = nullptr); 1245 1246 OperandMatchResultTy 1247 parseOperandArrayWithPrefix(const char *Prefix, 1248 OperandVector &Operands, 1249 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1250 bool (*ConvertResult)(int64_t&) = nullptr); 1251 1252 OperandMatchResultTy 1253 parseNamedBit(const char *Name, OperandVector &Operands, 1254 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1255 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1256 StringRef &Value); 1257 1258 bool isModifier(); 1259 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1260 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1261 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1262 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1263 bool parseSP3NegModifier(); 1264 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1265 OperandMatchResultTy parseReg(OperandVector &Operands); 1266 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1267 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1268 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1269 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1270 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1271 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1272 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1273 1274 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1275 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1276 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1277 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1278 1279 bool parseCnt(int64_t &IntVal); 1280 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1281 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1282 1283 private: 1284 struct OperandInfoTy { 1285 int64_t Id; 1286 bool IsSymbolic = false; 1287 bool IsDefined = false; 1288 1289 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1290 }; 1291 1292 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1293 bool validateSendMsg(const OperandInfoTy &Msg, 1294 const OperandInfoTy &Op, 1295 const OperandInfoTy &Stream, 1296 const SMLoc Loc); 1297 1298 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1299 bool validateHwreg(const OperandInfoTy &HwReg, 1300 const int64_t Offset, 1301 const int64_t Width, 1302 const SMLoc Loc); 1303 1304 void errorExpTgt(); 1305 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1306 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1307 1308 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1309 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1310 bool validateSOPLiteral(const MCInst &Inst) const; 1311 bool validateConstantBusLimitations(const MCInst &Inst); 1312 bool validateEarlyClobberLimitations(const MCInst &Inst); 1313 bool validateIntClampSupported(const MCInst &Inst); 1314 bool validateMIMGAtomicDMask(const MCInst &Inst); 1315 bool validateMIMGGatherDMask(const MCInst &Inst); 1316 bool validateMIMGDataSize(const MCInst &Inst); 1317 bool validateMIMGAddrSize(const MCInst &Inst); 1318 bool validateMIMGD16(const MCInst &Inst); 1319 bool validateMIMGDim(const MCInst &Inst); 1320 bool validateLdsDirect(const MCInst &Inst); 1321 bool validateOpSel(const MCInst &Inst); 1322 bool validateVccOperand(unsigned Reg) const; 1323 bool validateVOP3Literal(const MCInst &Inst) const; 1324 unsigned getConstantBusLimit(unsigned Opcode) const; 1325 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1326 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1327 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1328 1329 bool isId(const StringRef Id) const; 1330 bool isId(const AsmToken &Token, const StringRef Id) const; 1331 bool isToken(const AsmToken::TokenKind Kind) const; 1332 bool trySkipId(const StringRef Id); 1333 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1334 bool trySkipToken(const AsmToken::TokenKind Kind); 1335 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1336 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1337 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1338 AsmToken::TokenKind getTokenKind() const; 1339 bool parseExpr(int64_t &Imm); 1340 bool parseExpr(OperandVector &Operands); 1341 StringRef getTokenStr() const; 1342 AsmToken peekToken(); 1343 AsmToken getToken() const; 1344 SMLoc getLoc() const; 1345 void lex(); 1346 1347 public: 1348 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1349 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1350 1351 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1352 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1353 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1354 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1355 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1356 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1357 1358 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1359 const unsigned MinVal, 1360 const unsigned MaxVal, 1361 const StringRef ErrMsg); 1362 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1363 bool parseSwizzleOffset(int64_t &Imm); 1364 bool parseSwizzleMacro(int64_t &Imm); 1365 bool parseSwizzleQuadPerm(int64_t &Imm); 1366 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1367 bool parseSwizzleBroadcast(int64_t &Imm); 1368 bool parseSwizzleSwap(int64_t &Imm); 1369 bool parseSwizzleReverse(int64_t &Imm); 1370 1371 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1372 int64_t parseGPRIdxMacro(); 1373 1374 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1375 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1376 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1377 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1378 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1379 1380 AMDGPUOperand::Ptr defaultDLC() const; 1381 AMDGPUOperand::Ptr defaultGLC() const; 1382 AMDGPUOperand::Ptr defaultSLC() const; 1383 1384 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1385 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1386 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1387 AMDGPUOperand::Ptr defaultFlatOffset() const; 1388 1389 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1390 1391 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1392 OptionalImmIndexMap &OptionalIdx); 1393 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1394 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1395 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1396 1397 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1398 1399 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1400 bool IsAtomic = false); 1401 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1402 1403 OperandMatchResultTy parseDim(OperandVector &Operands); 1404 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1405 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1406 AMDGPUOperand::Ptr defaultRowMask() const; 1407 AMDGPUOperand::Ptr defaultBankMask() const; 1408 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1409 AMDGPUOperand::Ptr defaultFI() const; 1410 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1411 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1412 1413 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1414 AMDGPUOperand::ImmTy Type); 1415 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1416 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1417 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1418 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1419 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1420 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1421 uint64_t BasicInstType, bool skipVcc = false); 1422 1423 AMDGPUOperand::Ptr defaultBLGP() const; 1424 AMDGPUOperand::Ptr defaultCBSZ() const; 1425 AMDGPUOperand::Ptr defaultABID() const; 1426 1427 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1428 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1429 }; 1430 1431 struct OptionalOperand { 1432 const char *Name; 1433 AMDGPUOperand::ImmTy Type; 1434 bool IsBit; 1435 bool (*ConvertResult)(int64_t&); 1436 }; 1437 1438 } // end anonymous namespace 1439 1440 // May be called with integer type with equivalent bitwidth. 1441 static const fltSemantics *getFltSemantics(unsigned Size) { 1442 switch (Size) { 1443 case 4: 1444 return &APFloat::IEEEsingle(); 1445 case 8: 1446 return &APFloat::IEEEdouble(); 1447 case 2: 1448 return &APFloat::IEEEhalf(); 1449 default: 1450 llvm_unreachable("unsupported fp type"); 1451 } 1452 } 1453 1454 static const fltSemantics *getFltSemantics(MVT VT) { 1455 return getFltSemantics(VT.getSizeInBits() / 8); 1456 } 1457 1458 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1459 switch (OperandType) { 1460 case AMDGPU::OPERAND_REG_IMM_INT32: 1461 case AMDGPU::OPERAND_REG_IMM_FP32: 1462 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1463 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1464 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1465 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1466 return &APFloat::IEEEsingle(); 1467 case AMDGPU::OPERAND_REG_IMM_INT64: 1468 case AMDGPU::OPERAND_REG_IMM_FP64: 1469 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1470 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1471 return &APFloat::IEEEdouble(); 1472 case AMDGPU::OPERAND_REG_IMM_INT16: 1473 case AMDGPU::OPERAND_REG_IMM_FP16: 1474 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1475 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1476 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1477 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1478 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1479 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1480 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1481 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1482 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1483 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1484 return &APFloat::IEEEhalf(); 1485 default: 1486 llvm_unreachable("unsupported fp type"); 1487 } 1488 } 1489 1490 //===----------------------------------------------------------------------===// 1491 // Operand 1492 //===----------------------------------------------------------------------===// 1493 1494 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1495 bool Lost; 1496 1497 // Convert literal to single precision 1498 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1499 APFloat::rmNearestTiesToEven, 1500 &Lost); 1501 // We allow precision lost but not overflow or underflow 1502 if (Status != APFloat::opOK && 1503 Lost && 1504 ((Status & APFloat::opOverflow) != 0 || 1505 (Status & APFloat::opUnderflow) != 0)) { 1506 return false; 1507 } 1508 1509 return true; 1510 } 1511 1512 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1513 return isUIntN(Size, Val) || isIntN(Size, Val); 1514 } 1515 1516 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1517 1518 // This is a hack to enable named inline values like 1519 // shared_base with both 32-bit and 64-bit operands. 1520 // Note that these values are defined as 1521 // 32-bit operands only. 1522 if (isInlineValue()) { 1523 return true; 1524 } 1525 1526 if (!isImmTy(ImmTyNone)) { 1527 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1528 return false; 1529 } 1530 // TODO: We should avoid using host float here. It would be better to 1531 // check the float bit values which is what a few other places do. 1532 // We've had bot failures before due to weird NaN support on mips hosts. 1533 1534 APInt Literal(64, Imm.Val); 1535 1536 if (Imm.IsFPImm) { // We got fp literal token 1537 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1538 return AMDGPU::isInlinableLiteral64(Imm.Val, 1539 AsmParser->hasInv2PiInlineImm()); 1540 } 1541 1542 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1543 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1544 return false; 1545 1546 if (type.getScalarSizeInBits() == 16) { 1547 return AMDGPU::isInlinableLiteral16( 1548 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1549 AsmParser->hasInv2PiInlineImm()); 1550 } 1551 1552 // Check if single precision literal is inlinable 1553 return AMDGPU::isInlinableLiteral32( 1554 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1555 AsmParser->hasInv2PiInlineImm()); 1556 } 1557 1558 // We got int literal token. 1559 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1560 return AMDGPU::isInlinableLiteral64(Imm.Val, 1561 AsmParser->hasInv2PiInlineImm()); 1562 } 1563 1564 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1565 return false; 1566 } 1567 1568 if (type.getScalarSizeInBits() == 16) { 1569 return AMDGPU::isInlinableLiteral16( 1570 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1571 AsmParser->hasInv2PiInlineImm()); 1572 } 1573 1574 return AMDGPU::isInlinableLiteral32( 1575 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1576 AsmParser->hasInv2PiInlineImm()); 1577 } 1578 1579 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1580 // Check that this immediate can be added as literal 1581 if (!isImmTy(ImmTyNone)) { 1582 return false; 1583 } 1584 1585 if (!Imm.IsFPImm) { 1586 // We got int literal token. 1587 1588 if (type == MVT::f64 && hasFPModifiers()) { 1589 // Cannot apply fp modifiers to int literals preserving the same semantics 1590 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1591 // disable these cases. 1592 return false; 1593 } 1594 1595 unsigned Size = type.getSizeInBits(); 1596 if (Size == 64) 1597 Size = 32; 1598 1599 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1600 // types. 1601 return isSafeTruncation(Imm.Val, Size); 1602 } 1603 1604 // We got fp literal token 1605 if (type == MVT::f64) { // Expected 64-bit fp operand 1606 // We would set low 64-bits of literal to zeroes but we accept this literals 1607 return true; 1608 } 1609 1610 if (type == MVT::i64) { // Expected 64-bit int operand 1611 // We don't allow fp literals in 64-bit integer instructions. It is 1612 // unclear how we should encode them. 1613 return false; 1614 } 1615 1616 // We allow fp literals with f16x2 operands assuming that the specified 1617 // literal goes into the lower half and the upper half is zero. We also 1618 // require that the literal may be losslesly converted to f16. 1619 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1620 (type == MVT::v2i16)? MVT::i16 : type; 1621 1622 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1623 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1624 } 1625 1626 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1627 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1628 } 1629 1630 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1631 if (AsmParser->isVI()) 1632 return isVReg32(); 1633 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1634 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1635 else 1636 return false; 1637 } 1638 1639 bool AMDGPUOperand::isSDWAFP16Operand() const { 1640 return isSDWAOperand(MVT::f16); 1641 } 1642 1643 bool AMDGPUOperand::isSDWAFP32Operand() const { 1644 return isSDWAOperand(MVT::f32); 1645 } 1646 1647 bool AMDGPUOperand::isSDWAInt16Operand() const { 1648 return isSDWAOperand(MVT::i16); 1649 } 1650 1651 bool AMDGPUOperand::isSDWAInt32Operand() const { 1652 return isSDWAOperand(MVT::i32); 1653 } 1654 1655 bool AMDGPUOperand::isBoolReg() const { 1656 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1657 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1658 } 1659 1660 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1661 { 1662 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1663 assert(Size == 2 || Size == 4 || Size == 8); 1664 1665 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1666 1667 if (Imm.Mods.Abs) { 1668 Val &= ~FpSignMask; 1669 } 1670 if (Imm.Mods.Neg) { 1671 Val ^= FpSignMask; 1672 } 1673 1674 return Val; 1675 } 1676 1677 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1678 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1679 Inst.getNumOperands())) { 1680 addLiteralImmOperand(Inst, Imm.Val, 1681 ApplyModifiers & 1682 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1683 } else { 1684 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1685 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1686 } 1687 } 1688 1689 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1690 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1691 auto OpNum = Inst.getNumOperands(); 1692 // Check that this operand accepts literals 1693 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1694 1695 if (ApplyModifiers) { 1696 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1697 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1698 Val = applyInputFPModifiers(Val, Size); 1699 } 1700 1701 APInt Literal(64, Val); 1702 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1703 1704 if (Imm.IsFPImm) { // We got fp literal token 1705 switch (OpTy) { 1706 case AMDGPU::OPERAND_REG_IMM_INT64: 1707 case AMDGPU::OPERAND_REG_IMM_FP64: 1708 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1709 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1710 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1711 AsmParser->hasInv2PiInlineImm())) { 1712 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1713 return; 1714 } 1715 1716 // Non-inlineable 1717 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1718 // For fp operands we check if low 32 bits are zeros 1719 if (Literal.getLoBits(32) != 0) { 1720 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1721 "Can't encode literal as exact 64-bit floating-point operand. " 1722 "Low 32-bits will be set to zero"); 1723 } 1724 1725 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1726 return; 1727 } 1728 1729 // We don't allow fp literals in 64-bit integer instructions. It is 1730 // unclear how we should encode them. This case should be checked earlier 1731 // in predicate methods (isLiteralImm()) 1732 llvm_unreachable("fp literal in 64-bit integer instruction."); 1733 1734 case AMDGPU::OPERAND_REG_IMM_INT32: 1735 case AMDGPU::OPERAND_REG_IMM_FP32: 1736 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1737 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1738 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1739 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1740 case AMDGPU::OPERAND_REG_IMM_INT16: 1741 case AMDGPU::OPERAND_REG_IMM_FP16: 1742 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1743 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1744 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1745 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1746 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1747 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1748 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1749 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1750 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1751 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1752 bool lost; 1753 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1754 // Convert literal to single precision 1755 FPLiteral.convert(*getOpFltSemantics(OpTy), 1756 APFloat::rmNearestTiesToEven, &lost); 1757 // We allow precision lost but not overflow or underflow. This should be 1758 // checked earlier in isLiteralImm() 1759 1760 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1761 Inst.addOperand(MCOperand::createImm(ImmVal)); 1762 return; 1763 } 1764 default: 1765 llvm_unreachable("invalid operand size"); 1766 } 1767 1768 return; 1769 } 1770 1771 // We got int literal token. 1772 // Only sign extend inline immediates. 1773 switch (OpTy) { 1774 case AMDGPU::OPERAND_REG_IMM_INT32: 1775 case AMDGPU::OPERAND_REG_IMM_FP32: 1776 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1777 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1778 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1779 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1780 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1781 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1782 if (isSafeTruncation(Val, 32) && 1783 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1784 AsmParser->hasInv2PiInlineImm())) { 1785 Inst.addOperand(MCOperand::createImm(Val)); 1786 return; 1787 } 1788 1789 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1790 return; 1791 1792 case AMDGPU::OPERAND_REG_IMM_INT64: 1793 case AMDGPU::OPERAND_REG_IMM_FP64: 1794 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1795 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1796 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1797 Inst.addOperand(MCOperand::createImm(Val)); 1798 return; 1799 } 1800 1801 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1802 return; 1803 1804 case AMDGPU::OPERAND_REG_IMM_INT16: 1805 case AMDGPU::OPERAND_REG_IMM_FP16: 1806 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1807 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1808 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1809 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1810 if (isSafeTruncation(Val, 16) && 1811 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1812 AsmParser->hasInv2PiInlineImm())) { 1813 Inst.addOperand(MCOperand::createImm(Val)); 1814 return; 1815 } 1816 1817 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1818 return; 1819 1820 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1821 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1822 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1823 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1824 assert(isSafeTruncation(Val, 16)); 1825 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1826 AsmParser->hasInv2PiInlineImm())); 1827 1828 Inst.addOperand(MCOperand::createImm(Val)); 1829 return; 1830 } 1831 default: 1832 llvm_unreachable("invalid operand size"); 1833 } 1834 } 1835 1836 template <unsigned Bitwidth> 1837 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1838 APInt Literal(64, Imm.Val); 1839 1840 if (!Imm.IsFPImm) { 1841 // We got int literal token. 1842 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1843 return; 1844 } 1845 1846 bool Lost; 1847 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1848 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1849 APFloat::rmNearestTiesToEven, &Lost); 1850 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1851 } 1852 1853 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1854 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1855 } 1856 1857 static bool isInlineValue(unsigned Reg) { 1858 switch (Reg) { 1859 case AMDGPU::SRC_SHARED_BASE: 1860 case AMDGPU::SRC_SHARED_LIMIT: 1861 case AMDGPU::SRC_PRIVATE_BASE: 1862 case AMDGPU::SRC_PRIVATE_LIMIT: 1863 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1864 return true; 1865 case AMDGPU::SRC_VCCZ: 1866 case AMDGPU::SRC_EXECZ: 1867 case AMDGPU::SRC_SCC: 1868 return true; 1869 case AMDGPU::SGPR_NULL: 1870 return true; 1871 default: 1872 return false; 1873 } 1874 } 1875 1876 bool AMDGPUOperand::isInlineValue() const { 1877 return isRegKind() && ::isInlineValue(getReg()); 1878 } 1879 1880 //===----------------------------------------------------------------------===// 1881 // AsmParser 1882 //===----------------------------------------------------------------------===// 1883 1884 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1885 if (Is == IS_VGPR) { 1886 switch (RegWidth) { 1887 default: return -1; 1888 case 1: return AMDGPU::VGPR_32RegClassID; 1889 case 2: return AMDGPU::VReg_64RegClassID; 1890 case 3: return AMDGPU::VReg_96RegClassID; 1891 case 4: return AMDGPU::VReg_128RegClassID; 1892 case 5: return AMDGPU::VReg_160RegClassID; 1893 case 8: return AMDGPU::VReg_256RegClassID; 1894 case 16: return AMDGPU::VReg_512RegClassID; 1895 case 32: return AMDGPU::VReg_1024RegClassID; 1896 } 1897 } else if (Is == IS_TTMP) { 1898 switch (RegWidth) { 1899 default: return -1; 1900 case 1: return AMDGPU::TTMP_32RegClassID; 1901 case 2: return AMDGPU::TTMP_64RegClassID; 1902 case 4: return AMDGPU::TTMP_128RegClassID; 1903 case 8: return AMDGPU::TTMP_256RegClassID; 1904 case 16: return AMDGPU::TTMP_512RegClassID; 1905 } 1906 } else if (Is == IS_SGPR) { 1907 switch (RegWidth) { 1908 default: return -1; 1909 case 1: return AMDGPU::SGPR_32RegClassID; 1910 case 2: return AMDGPU::SGPR_64RegClassID; 1911 case 4: return AMDGPU::SGPR_128RegClassID; 1912 case 8: return AMDGPU::SGPR_256RegClassID; 1913 case 16: return AMDGPU::SGPR_512RegClassID; 1914 } 1915 } else if (Is == IS_AGPR) { 1916 switch (RegWidth) { 1917 default: return -1; 1918 case 1: return AMDGPU::AGPR_32RegClassID; 1919 case 2: return AMDGPU::AReg_64RegClassID; 1920 case 4: return AMDGPU::AReg_128RegClassID; 1921 case 16: return AMDGPU::AReg_512RegClassID; 1922 case 32: return AMDGPU::AReg_1024RegClassID; 1923 } 1924 } 1925 return -1; 1926 } 1927 1928 static unsigned getSpecialRegForName(StringRef RegName) { 1929 return StringSwitch<unsigned>(RegName) 1930 .Case("exec", AMDGPU::EXEC) 1931 .Case("vcc", AMDGPU::VCC) 1932 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1933 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1934 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1935 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1936 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1937 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1938 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1939 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1940 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1941 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1942 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1943 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1944 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1945 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1946 .Case("m0", AMDGPU::M0) 1947 .Case("vccz", AMDGPU::SRC_VCCZ) 1948 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1949 .Case("execz", AMDGPU::SRC_EXECZ) 1950 .Case("src_execz", AMDGPU::SRC_EXECZ) 1951 .Case("scc", AMDGPU::SRC_SCC) 1952 .Case("src_scc", AMDGPU::SRC_SCC) 1953 .Case("tba", AMDGPU::TBA) 1954 .Case("tma", AMDGPU::TMA) 1955 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1956 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1957 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1958 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1959 .Case("vcc_lo", AMDGPU::VCC_LO) 1960 .Case("vcc_hi", AMDGPU::VCC_HI) 1961 .Case("exec_lo", AMDGPU::EXEC_LO) 1962 .Case("exec_hi", AMDGPU::EXEC_HI) 1963 .Case("tma_lo", AMDGPU::TMA_LO) 1964 .Case("tma_hi", AMDGPU::TMA_HI) 1965 .Case("tba_lo", AMDGPU::TBA_LO) 1966 .Case("tba_hi", AMDGPU::TBA_HI) 1967 .Case("null", AMDGPU::SGPR_NULL) 1968 .Default(AMDGPU::NoRegister); 1969 } 1970 1971 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1972 SMLoc &EndLoc) { 1973 auto R = parseRegister(); 1974 if (!R) return true; 1975 assert(R->isReg()); 1976 RegNo = R->getReg(); 1977 StartLoc = R->getStartLoc(); 1978 EndLoc = R->getEndLoc(); 1979 return false; 1980 } 1981 1982 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1983 RegisterKind RegKind, unsigned Reg1) { 1984 switch (RegKind) { 1985 case IS_SPECIAL: 1986 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1987 Reg = AMDGPU::EXEC; 1988 RegWidth = 2; 1989 return true; 1990 } 1991 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1992 Reg = AMDGPU::FLAT_SCR; 1993 RegWidth = 2; 1994 return true; 1995 } 1996 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 1997 Reg = AMDGPU::XNACK_MASK; 1998 RegWidth = 2; 1999 return true; 2000 } 2001 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2002 Reg = AMDGPU::VCC; 2003 RegWidth = 2; 2004 return true; 2005 } 2006 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2007 Reg = AMDGPU::TBA; 2008 RegWidth = 2; 2009 return true; 2010 } 2011 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2012 Reg = AMDGPU::TMA; 2013 RegWidth = 2; 2014 return true; 2015 } 2016 return false; 2017 case IS_VGPR: 2018 case IS_SGPR: 2019 case IS_AGPR: 2020 case IS_TTMP: 2021 if (Reg1 != Reg + RegWidth) { 2022 return false; 2023 } 2024 RegWidth++; 2025 return true; 2026 default: 2027 llvm_unreachable("unexpected register kind"); 2028 } 2029 } 2030 2031 struct RegInfo { 2032 StringLiteral Name; 2033 RegisterKind Kind; 2034 }; 2035 2036 static constexpr RegInfo RegularRegisters[] = { 2037 {{"v"}, IS_VGPR}, 2038 {{"s"}, IS_SGPR}, 2039 {{"ttmp"}, IS_TTMP}, 2040 {{"acc"}, IS_AGPR}, 2041 {{"a"}, IS_AGPR}, 2042 }; 2043 2044 static bool isRegularReg(RegisterKind Kind) { 2045 return Kind == IS_VGPR || 2046 Kind == IS_SGPR || 2047 Kind == IS_TTMP || 2048 Kind == IS_AGPR; 2049 } 2050 2051 static const RegInfo* getRegularRegInfo(StringRef Str) { 2052 for (const RegInfo &Reg : RegularRegisters) 2053 if (Str.startswith(Reg.Name)) 2054 return &Reg; 2055 return nullptr; 2056 } 2057 2058 static bool getRegNum(StringRef Str, unsigned& Num) { 2059 return !Str.getAsInteger(10, Num); 2060 } 2061 2062 bool 2063 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2064 const AsmToken &NextToken) const { 2065 2066 // A list of consecutive registers: [s0,s1,s2,s3] 2067 if (Token.is(AsmToken::LBrac)) 2068 return true; 2069 2070 if (!Token.is(AsmToken::Identifier)) 2071 return false; 2072 2073 // A single register like s0 or a range of registers like s[0:1] 2074 2075 StringRef Str = Token.getString(); 2076 const RegInfo *Reg = getRegularRegInfo(Str); 2077 if (Reg) { 2078 StringRef RegName = Reg->Name; 2079 StringRef RegSuffix = Str.substr(RegName.size()); 2080 if (!RegSuffix.empty()) { 2081 unsigned Num; 2082 // A single register with an index: rXX 2083 if (getRegNum(RegSuffix, Num)) 2084 return true; 2085 } else { 2086 // A range of registers: r[XX:YY]. 2087 if (NextToken.is(AsmToken::LBrac)) 2088 return true; 2089 } 2090 } 2091 2092 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2093 } 2094 2095 bool 2096 AMDGPUAsmParser::isRegister() 2097 { 2098 return isRegister(getToken(), peekToken()); 2099 } 2100 2101 unsigned 2102 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2103 unsigned RegNum, 2104 unsigned RegWidth) { 2105 2106 assert(isRegularReg(RegKind)); 2107 2108 unsigned AlignSize = 1; 2109 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2110 // SGPR and TTMP registers must be aligned. 2111 // Max required alignment is 4 dwords. 2112 AlignSize = std::min(RegWidth, 4u); 2113 } 2114 2115 if (RegNum % AlignSize != 0) 2116 return AMDGPU::NoRegister; 2117 2118 unsigned RegIdx = RegNum / AlignSize; 2119 int RCID = getRegClass(RegKind, RegWidth); 2120 if (RCID == -1) 2121 return AMDGPU::NoRegister; 2122 2123 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2124 const MCRegisterClass RC = TRI->getRegClass(RCID); 2125 if (RegIdx >= RC.getNumRegs()) 2126 return AMDGPU::NoRegister; 2127 2128 return RC.getRegister(RegIdx); 2129 } 2130 2131 bool 2132 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2133 int64_t RegLo, RegHi; 2134 if (!trySkipToken(AsmToken::LBrac)) 2135 return false; 2136 2137 if (!parseExpr(RegLo)) 2138 return false; 2139 2140 if (trySkipToken(AsmToken::Colon)) { 2141 if (!parseExpr(RegHi)) 2142 return false; 2143 } else { 2144 RegHi = RegLo; 2145 } 2146 2147 if (!trySkipToken(AsmToken::RBrac)) 2148 return false; 2149 2150 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2151 return false; 2152 2153 Num = static_cast<unsigned>(RegLo); 2154 Width = (RegHi - RegLo) + 1; 2155 return true; 2156 } 2157 2158 unsigned 2159 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2160 unsigned &RegNum, 2161 unsigned &RegWidth) { 2162 assert(isToken(AsmToken::Identifier)); 2163 unsigned Reg = getSpecialRegForName(getTokenStr()); 2164 if (Reg) { 2165 RegNum = 0; 2166 RegWidth = 1; 2167 RegKind = IS_SPECIAL; 2168 lex(); // skip register name 2169 } 2170 return Reg; 2171 } 2172 2173 unsigned 2174 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2175 unsigned &RegNum, 2176 unsigned &RegWidth) { 2177 assert(isToken(AsmToken::Identifier)); 2178 StringRef RegName = getTokenStr(); 2179 2180 const RegInfo *RI = getRegularRegInfo(RegName); 2181 if (!RI) 2182 return AMDGPU::NoRegister; 2183 lex(); // skip register name 2184 2185 RegKind = RI->Kind; 2186 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2187 if (!RegSuffix.empty()) { 2188 // Single 32-bit register: vXX. 2189 if (!getRegNum(RegSuffix, RegNum)) 2190 return AMDGPU::NoRegister; 2191 RegWidth = 1; 2192 } else { 2193 // Range of registers: v[XX:YY]. ":YY" is optional. 2194 if (!ParseRegRange(RegNum, RegWidth)) 2195 return AMDGPU::NoRegister; 2196 } 2197 2198 return getRegularReg(RegKind, RegNum, RegWidth); 2199 } 2200 2201 unsigned 2202 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 2203 unsigned &RegNum, 2204 unsigned &RegWidth) { 2205 unsigned Reg = AMDGPU::NoRegister; 2206 2207 if (!trySkipToken(AsmToken::LBrac)) 2208 return AMDGPU::NoRegister; 2209 2210 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2211 2212 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2213 return AMDGPU::NoRegister; 2214 if (RegWidth != 1) 2215 return AMDGPU::NoRegister; 2216 2217 for (; trySkipToken(AsmToken::Comma); ) { 2218 RegisterKind NextRegKind; 2219 unsigned NextReg, NextRegNum, NextRegWidth; 2220 2221 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) 2222 return AMDGPU::NoRegister; 2223 if (NextRegWidth != 1) 2224 return AMDGPU::NoRegister; 2225 if (NextRegKind != RegKind) 2226 return AMDGPU::NoRegister; 2227 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2228 return AMDGPU::NoRegister; 2229 } 2230 2231 if (!trySkipToken(AsmToken::RBrac)) 2232 return AMDGPU::NoRegister; 2233 2234 if (isRegularReg(RegKind)) 2235 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2236 2237 return Reg; 2238 } 2239 2240 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 2241 unsigned &Reg, 2242 unsigned &RegNum, 2243 unsigned &RegWidth) { 2244 Reg = AMDGPU::NoRegister; 2245 2246 if (isToken(AsmToken::Identifier)) { 2247 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); 2248 if (Reg == AMDGPU::NoRegister) 2249 Reg = ParseRegularReg(RegKind, RegNum, RegWidth); 2250 } else { 2251 Reg = ParseRegList(RegKind, RegNum, RegWidth); 2252 } 2253 2254 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2255 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2256 } 2257 2258 Optional<StringRef> 2259 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2260 switch (RegKind) { 2261 case IS_VGPR: 2262 return StringRef(".amdgcn.next_free_vgpr"); 2263 case IS_SGPR: 2264 return StringRef(".amdgcn.next_free_sgpr"); 2265 default: 2266 return None; 2267 } 2268 } 2269 2270 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2271 auto SymbolName = getGprCountSymbolName(RegKind); 2272 assert(SymbolName && "initializing invalid register kind"); 2273 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2274 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2275 } 2276 2277 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2278 unsigned DwordRegIndex, 2279 unsigned RegWidth) { 2280 // Symbols are only defined for GCN targets 2281 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2282 return true; 2283 2284 auto SymbolName = getGprCountSymbolName(RegKind); 2285 if (!SymbolName) 2286 return true; 2287 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2288 2289 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2290 int64_t OldCount; 2291 2292 if (!Sym->isVariable()) 2293 return !Error(getParser().getTok().getLoc(), 2294 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2295 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2296 return !Error( 2297 getParser().getTok().getLoc(), 2298 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2299 2300 if (OldCount <= NewMax) 2301 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2302 2303 return true; 2304 } 2305 2306 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2307 const auto &Tok = Parser.getTok(); 2308 SMLoc StartLoc = Tok.getLoc(); 2309 SMLoc EndLoc = Tok.getEndLoc(); 2310 RegisterKind RegKind; 2311 unsigned Reg, RegNum, RegWidth; 2312 2313 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2314 //FIXME: improve error messages (bug 41303). 2315 Error(StartLoc, "not a valid operand."); 2316 return nullptr; 2317 } 2318 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2319 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2320 return nullptr; 2321 } else 2322 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2323 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2324 } 2325 2326 OperandMatchResultTy 2327 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2328 // TODO: add syntactic sugar for 1/(2*PI) 2329 2330 assert(!isRegister()); 2331 assert(!isModifier()); 2332 2333 const auto& Tok = getToken(); 2334 const auto& NextTok = peekToken(); 2335 bool IsReal = Tok.is(AsmToken::Real); 2336 SMLoc S = getLoc(); 2337 bool Negate = false; 2338 2339 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2340 lex(); 2341 IsReal = true; 2342 Negate = true; 2343 } 2344 2345 if (IsReal) { 2346 // Floating-point expressions are not supported. 2347 // Can only allow floating-point literals with an 2348 // optional sign. 2349 2350 StringRef Num = getTokenStr(); 2351 lex(); 2352 2353 APFloat RealVal(APFloat::IEEEdouble()); 2354 auto roundMode = APFloat::rmNearestTiesToEven; 2355 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2356 return MatchOperand_ParseFail; 2357 } 2358 if (Negate) 2359 RealVal.changeSign(); 2360 2361 Operands.push_back( 2362 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2363 AMDGPUOperand::ImmTyNone, true)); 2364 2365 return MatchOperand_Success; 2366 2367 } else { 2368 int64_t IntVal; 2369 const MCExpr *Expr; 2370 SMLoc S = getLoc(); 2371 2372 if (HasSP3AbsModifier) { 2373 // This is a workaround for handling expressions 2374 // as arguments of SP3 'abs' modifier, for example: 2375 // |1.0| 2376 // |-1| 2377 // |1+x| 2378 // This syntax is not compatible with syntax of standard 2379 // MC expressions (due to the trailing '|'). 2380 SMLoc EndLoc; 2381 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2382 return MatchOperand_ParseFail; 2383 } else { 2384 if (Parser.parseExpression(Expr)) 2385 return MatchOperand_ParseFail; 2386 } 2387 2388 if (Expr->evaluateAsAbsolute(IntVal)) { 2389 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2390 } else { 2391 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2392 } 2393 2394 return MatchOperand_Success; 2395 } 2396 2397 return MatchOperand_NoMatch; 2398 } 2399 2400 OperandMatchResultTy 2401 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2402 if (!isRegister()) 2403 return MatchOperand_NoMatch; 2404 2405 if (auto R = parseRegister()) { 2406 assert(R->isReg()); 2407 Operands.push_back(std::move(R)); 2408 return MatchOperand_Success; 2409 } 2410 return MatchOperand_ParseFail; 2411 } 2412 2413 OperandMatchResultTy 2414 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2415 auto res = parseReg(Operands); 2416 if (res != MatchOperand_NoMatch) { 2417 return res; 2418 } else if (isModifier()) { 2419 return MatchOperand_NoMatch; 2420 } else { 2421 return parseImm(Operands, HasSP3AbsMod); 2422 } 2423 } 2424 2425 bool 2426 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2427 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2428 const auto &str = Token.getString(); 2429 return str == "abs" || str == "neg" || str == "sext"; 2430 } 2431 return false; 2432 } 2433 2434 bool 2435 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2436 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2437 } 2438 2439 bool 2440 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2441 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2442 } 2443 2444 bool 2445 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2446 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2447 } 2448 2449 // Check if this is an operand modifier or an opcode modifier 2450 // which may look like an expression but it is not. We should 2451 // avoid parsing these modifiers as expressions. Currently 2452 // recognized sequences are: 2453 // |...| 2454 // abs(...) 2455 // neg(...) 2456 // sext(...) 2457 // -reg 2458 // -|...| 2459 // -abs(...) 2460 // name:... 2461 // Note that simple opcode modifiers like 'gds' may be parsed as 2462 // expressions; this is a special case. See getExpressionAsToken. 2463 // 2464 bool 2465 AMDGPUAsmParser::isModifier() { 2466 2467 AsmToken Tok = getToken(); 2468 AsmToken NextToken[2]; 2469 peekTokens(NextToken); 2470 2471 return isOperandModifier(Tok, NextToken[0]) || 2472 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2473 isOpcodeModifierWithVal(Tok, NextToken[0]); 2474 } 2475 2476 // Check if the current token is an SP3 'neg' modifier. 2477 // Currently this modifier is allowed in the following context: 2478 // 2479 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2480 // 2. Before an 'abs' modifier: -abs(...) 2481 // 3. Before an SP3 'abs' modifier: -|...| 2482 // 2483 // In all other cases "-" is handled as a part 2484 // of an expression that follows the sign. 2485 // 2486 // Note: When "-" is followed by an integer literal, 2487 // this is interpreted as integer negation rather 2488 // than a floating-point NEG modifier applied to N. 2489 // Beside being contr-intuitive, such use of floating-point 2490 // NEG modifier would have resulted in different meaning 2491 // of integer literals used with VOP1/2/C and VOP3, 2492 // for example: 2493 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2494 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2495 // Negative fp literals with preceding "-" are 2496 // handled likewise for unifomtity 2497 // 2498 bool 2499 AMDGPUAsmParser::parseSP3NegModifier() { 2500 2501 AsmToken NextToken[2]; 2502 peekTokens(NextToken); 2503 2504 if (isToken(AsmToken::Minus) && 2505 (isRegister(NextToken[0], NextToken[1]) || 2506 NextToken[0].is(AsmToken::Pipe) || 2507 isId(NextToken[0], "abs"))) { 2508 lex(); 2509 return true; 2510 } 2511 2512 return false; 2513 } 2514 2515 OperandMatchResultTy 2516 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2517 bool AllowImm) { 2518 bool Neg, SP3Neg; 2519 bool Abs, SP3Abs; 2520 SMLoc Loc; 2521 2522 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2523 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2524 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2525 return MatchOperand_ParseFail; 2526 } 2527 2528 SP3Neg = parseSP3NegModifier(); 2529 2530 Loc = getLoc(); 2531 Neg = trySkipId("neg"); 2532 if (Neg && SP3Neg) { 2533 Error(Loc, "expected register or immediate"); 2534 return MatchOperand_ParseFail; 2535 } 2536 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2537 return MatchOperand_ParseFail; 2538 2539 Abs = trySkipId("abs"); 2540 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2541 return MatchOperand_ParseFail; 2542 2543 Loc = getLoc(); 2544 SP3Abs = trySkipToken(AsmToken::Pipe); 2545 if (Abs && SP3Abs) { 2546 Error(Loc, "expected register or immediate"); 2547 return MatchOperand_ParseFail; 2548 } 2549 2550 OperandMatchResultTy Res; 2551 if (AllowImm) { 2552 Res = parseRegOrImm(Operands, SP3Abs); 2553 } else { 2554 Res = parseReg(Operands); 2555 } 2556 if (Res != MatchOperand_Success) { 2557 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2558 } 2559 2560 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2561 return MatchOperand_ParseFail; 2562 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2563 return MatchOperand_ParseFail; 2564 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2565 return MatchOperand_ParseFail; 2566 2567 AMDGPUOperand::Modifiers Mods; 2568 Mods.Abs = Abs || SP3Abs; 2569 Mods.Neg = Neg || SP3Neg; 2570 2571 if (Mods.hasFPModifiers()) { 2572 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2573 if (Op.isExpr()) { 2574 Error(Op.getStartLoc(), "expected an absolute expression"); 2575 return MatchOperand_ParseFail; 2576 } 2577 Op.setModifiers(Mods); 2578 } 2579 return MatchOperand_Success; 2580 } 2581 2582 OperandMatchResultTy 2583 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2584 bool AllowImm) { 2585 bool Sext = trySkipId("sext"); 2586 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2587 return MatchOperand_ParseFail; 2588 2589 OperandMatchResultTy Res; 2590 if (AllowImm) { 2591 Res = parseRegOrImm(Operands); 2592 } else { 2593 Res = parseReg(Operands); 2594 } 2595 if (Res != MatchOperand_Success) { 2596 return Sext? MatchOperand_ParseFail : Res; 2597 } 2598 2599 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2600 return MatchOperand_ParseFail; 2601 2602 AMDGPUOperand::Modifiers Mods; 2603 Mods.Sext = Sext; 2604 2605 if (Mods.hasIntModifiers()) { 2606 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2607 if (Op.isExpr()) { 2608 Error(Op.getStartLoc(), "expected an absolute expression"); 2609 return MatchOperand_ParseFail; 2610 } 2611 Op.setModifiers(Mods); 2612 } 2613 2614 return MatchOperand_Success; 2615 } 2616 2617 OperandMatchResultTy 2618 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2619 return parseRegOrImmWithFPInputMods(Operands, false); 2620 } 2621 2622 OperandMatchResultTy 2623 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2624 return parseRegOrImmWithIntInputMods(Operands, false); 2625 } 2626 2627 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2628 auto Loc = getLoc(); 2629 if (trySkipId("off")) { 2630 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2631 AMDGPUOperand::ImmTyOff, false)); 2632 return MatchOperand_Success; 2633 } 2634 2635 if (!isRegister()) 2636 return MatchOperand_NoMatch; 2637 2638 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2639 if (Reg) { 2640 Operands.push_back(std::move(Reg)); 2641 return MatchOperand_Success; 2642 } 2643 2644 return MatchOperand_ParseFail; 2645 2646 } 2647 2648 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2649 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2650 2651 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2652 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2653 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2654 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2655 return Match_InvalidOperand; 2656 2657 if ((TSFlags & SIInstrFlags::VOP3) && 2658 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2659 getForcedEncodingSize() != 64) 2660 return Match_PreferE32; 2661 2662 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2663 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2664 // v_mac_f32/16 allow only dst_sel == DWORD; 2665 auto OpNum = 2666 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2667 const auto &Op = Inst.getOperand(OpNum); 2668 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2669 return Match_InvalidOperand; 2670 } 2671 } 2672 2673 return Match_Success; 2674 } 2675 2676 // What asm variants we should check 2677 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2678 if (getForcedEncodingSize() == 32) { 2679 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2680 return makeArrayRef(Variants); 2681 } 2682 2683 if (isForcedVOP3()) { 2684 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2685 return makeArrayRef(Variants); 2686 } 2687 2688 if (isForcedSDWA()) { 2689 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2690 AMDGPUAsmVariants::SDWA9}; 2691 return makeArrayRef(Variants); 2692 } 2693 2694 if (isForcedDPP()) { 2695 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2696 return makeArrayRef(Variants); 2697 } 2698 2699 static const unsigned Variants[] = { 2700 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2701 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2702 }; 2703 2704 return makeArrayRef(Variants); 2705 } 2706 2707 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2708 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2709 const unsigned Num = Desc.getNumImplicitUses(); 2710 for (unsigned i = 0; i < Num; ++i) { 2711 unsigned Reg = Desc.ImplicitUses[i]; 2712 switch (Reg) { 2713 case AMDGPU::FLAT_SCR: 2714 case AMDGPU::VCC: 2715 case AMDGPU::VCC_LO: 2716 case AMDGPU::VCC_HI: 2717 case AMDGPU::M0: 2718 return Reg; 2719 default: 2720 break; 2721 } 2722 } 2723 return AMDGPU::NoRegister; 2724 } 2725 2726 // NB: This code is correct only when used to check constant 2727 // bus limitations because GFX7 support no f16 inline constants. 2728 // Note that there are no cases when a GFX7 opcode violates 2729 // constant bus limitations due to the use of an f16 constant. 2730 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2731 unsigned OpIdx) const { 2732 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2733 2734 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2735 return false; 2736 } 2737 2738 const MCOperand &MO = Inst.getOperand(OpIdx); 2739 2740 int64_t Val = MO.getImm(); 2741 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2742 2743 switch (OpSize) { // expected operand size 2744 case 8: 2745 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2746 case 4: 2747 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2748 case 2: { 2749 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2750 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2751 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2752 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2753 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2754 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2755 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2756 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2757 } else { 2758 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2759 } 2760 } 2761 default: 2762 llvm_unreachable("invalid operand size"); 2763 } 2764 } 2765 2766 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2767 if (!isGFX10()) 2768 return 1; 2769 2770 switch (Opcode) { 2771 // 64-bit shift instructions can use only one scalar value input 2772 case AMDGPU::V_LSHLREV_B64: 2773 case AMDGPU::V_LSHLREV_B64_gfx10: 2774 case AMDGPU::V_LSHL_B64: 2775 case AMDGPU::V_LSHRREV_B64: 2776 case AMDGPU::V_LSHRREV_B64_gfx10: 2777 case AMDGPU::V_LSHR_B64: 2778 case AMDGPU::V_ASHRREV_I64: 2779 case AMDGPU::V_ASHRREV_I64_gfx10: 2780 case AMDGPU::V_ASHR_I64: 2781 return 1; 2782 default: 2783 return 2; 2784 } 2785 } 2786 2787 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2788 const MCOperand &MO = Inst.getOperand(OpIdx); 2789 if (MO.isImm()) { 2790 return !isInlineConstant(Inst, OpIdx); 2791 } else if (MO.isReg()) { 2792 auto Reg = MO.getReg(); 2793 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2794 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2795 } else { 2796 return true; 2797 } 2798 } 2799 2800 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2801 const unsigned Opcode = Inst.getOpcode(); 2802 const MCInstrDesc &Desc = MII.get(Opcode); 2803 unsigned ConstantBusUseCount = 0; 2804 unsigned NumLiterals = 0; 2805 unsigned LiteralSize; 2806 2807 if (Desc.TSFlags & 2808 (SIInstrFlags::VOPC | 2809 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2810 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2811 SIInstrFlags::SDWA)) { 2812 // Check special imm operands (used by madmk, etc) 2813 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2814 ++ConstantBusUseCount; 2815 } 2816 2817 SmallDenseSet<unsigned> SGPRsUsed; 2818 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2819 if (SGPRUsed != AMDGPU::NoRegister) { 2820 SGPRsUsed.insert(SGPRUsed); 2821 ++ConstantBusUseCount; 2822 } 2823 2824 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2825 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2826 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2827 2828 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2829 2830 for (int OpIdx : OpIndices) { 2831 if (OpIdx == -1) break; 2832 2833 const MCOperand &MO = Inst.getOperand(OpIdx); 2834 if (usesConstantBus(Inst, OpIdx)) { 2835 if (MO.isReg()) { 2836 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2837 // Pairs of registers with a partial intersections like these 2838 // s0, s[0:1] 2839 // flat_scratch_lo, flat_scratch 2840 // flat_scratch_lo, flat_scratch_hi 2841 // are theoretically valid but they are disabled anyway. 2842 // Note that this code mimics SIInstrInfo::verifyInstruction 2843 if (!SGPRsUsed.count(Reg)) { 2844 SGPRsUsed.insert(Reg); 2845 ++ConstantBusUseCount; 2846 } 2847 } else { // Expression or a literal 2848 2849 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2850 continue; // special operand like VINTERP attr_chan 2851 2852 // An instruction may use only one literal. 2853 // This has been validated on the previous step. 2854 // See validateVOP3Literal. 2855 // This literal may be used as more than one operand. 2856 // If all these operands are of the same size, 2857 // this literal counts as one scalar value. 2858 // Otherwise it counts as 2 scalar values. 2859 // See "GFX10 Shader Programming", section 3.6.2.3. 2860 2861 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2862 if (Size < 4) Size = 4; 2863 2864 if (NumLiterals == 0) { 2865 NumLiterals = 1; 2866 LiteralSize = Size; 2867 } else if (LiteralSize != Size) { 2868 NumLiterals = 2; 2869 } 2870 } 2871 } 2872 } 2873 } 2874 ConstantBusUseCount += NumLiterals; 2875 2876 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2877 } 2878 2879 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2880 const unsigned Opcode = Inst.getOpcode(); 2881 const MCInstrDesc &Desc = MII.get(Opcode); 2882 2883 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2884 if (DstIdx == -1 || 2885 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2886 return true; 2887 } 2888 2889 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2890 2891 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2892 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2893 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2894 2895 assert(DstIdx != -1); 2896 const MCOperand &Dst = Inst.getOperand(DstIdx); 2897 assert(Dst.isReg()); 2898 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2899 2900 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2901 2902 for (int SrcIdx : SrcIndices) { 2903 if (SrcIdx == -1) break; 2904 const MCOperand &Src = Inst.getOperand(SrcIdx); 2905 if (Src.isReg()) { 2906 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2907 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2908 return false; 2909 } 2910 } 2911 } 2912 2913 return true; 2914 } 2915 2916 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2917 2918 const unsigned Opc = Inst.getOpcode(); 2919 const MCInstrDesc &Desc = MII.get(Opc); 2920 2921 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2922 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2923 assert(ClampIdx != -1); 2924 return Inst.getOperand(ClampIdx).getImm() == 0; 2925 } 2926 2927 return true; 2928 } 2929 2930 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2931 2932 const unsigned Opc = Inst.getOpcode(); 2933 const MCInstrDesc &Desc = MII.get(Opc); 2934 2935 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2936 return true; 2937 2938 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2939 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2940 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2941 2942 assert(VDataIdx != -1); 2943 assert(DMaskIdx != -1); 2944 assert(TFEIdx != -1); 2945 2946 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2947 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2948 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2949 if (DMask == 0) 2950 DMask = 1; 2951 2952 unsigned DataSize = 2953 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2954 if (hasPackedD16()) { 2955 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2956 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2957 DataSize = (DataSize + 1) / 2; 2958 } 2959 2960 return (VDataSize / 4) == DataSize + TFESize; 2961 } 2962 2963 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2964 const unsigned Opc = Inst.getOpcode(); 2965 const MCInstrDesc &Desc = MII.get(Opc); 2966 2967 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2968 return true; 2969 2970 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2971 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2972 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2973 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2974 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2975 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2976 2977 assert(VAddr0Idx != -1); 2978 assert(SrsrcIdx != -1); 2979 assert(DimIdx != -1); 2980 assert(SrsrcIdx > VAddr0Idx); 2981 2982 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2983 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2984 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2985 unsigned VAddrSize = 2986 IsNSA ? SrsrcIdx - VAddr0Idx 2987 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2988 2989 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2990 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2991 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2992 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 2993 if (!IsNSA) { 2994 if (AddrSize > 8) 2995 AddrSize = 16; 2996 else if (AddrSize > 4) 2997 AddrSize = 8; 2998 } 2999 3000 return VAddrSize == AddrSize; 3001 } 3002 3003 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3004 3005 const unsigned Opc = Inst.getOpcode(); 3006 const MCInstrDesc &Desc = MII.get(Opc); 3007 3008 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3009 return true; 3010 if (!Desc.mayLoad() || !Desc.mayStore()) 3011 return true; // Not atomic 3012 3013 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3014 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3015 3016 // This is an incomplete check because image_atomic_cmpswap 3017 // may only use 0x3 and 0xf while other atomic operations 3018 // may use 0x1 and 0x3. However these limitations are 3019 // verified when we check that dmask matches dst size. 3020 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3021 } 3022 3023 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3024 3025 const unsigned Opc = Inst.getOpcode(); 3026 const MCInstrDesc &Desc = MII.get(Opc); 3027 3028 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3029 return true; 3030 3031 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3032 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3033 3034 // GATHER4 instructions use dmask in a different fashion compared to 3035 // other MIMG instructions. The only useful DMASK values are 3036 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3037 // (red,red,red,red) etc.) The ISA document doesn't mention 3038 // this. 3039 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3040 } 3041 3042 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3043 3044 const unsigned Opc = Inst.getOpcode(); 3045 const MCInstrDesc &Desc = MII.get(Opc); 3046 3047 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3048 return true; 3049 3050 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3051 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3052 if (isCI() || isSI()) 3053 return false; 3054 } 3055 3056 return true; 3057 } 3058 3059 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3060 const unsigned Opc = Inst.getOpcode(); 3061 const MCInstrDesc &Desc = MII.get(Opc); 3062 3063 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3064 return true; 3065 3066 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3067 if (DimIdx < 0) 3068 return true; 3069 3070 long Imm = Inst.getOperand(DimIdx).getImm(); 3071 if (Imm < 0 || Imm >= 8) 3072 return false; 3073 3074 return true; 3075 } 3076 3077 static bool IsRevOpcode(const unsigned Opcode) 3078 { 3079 switch (Opcode) { 3080 case AMDGPU::V_SUBREV_F32_e32: 3081 case AMDGPU::V_SUBREV_F32_e64: 3082 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3083 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3084 case AMDGPU::V_SUBREV_F32_e32_vi: 3085 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3086 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3087 case AMDGPU::V_SUBREV_F32_e64_vi: 3088 3089 case AMDGPU::V_SUBREV_I32_e32: 3090 case AMDGPU::V_SUBREV_I32_e64: 3091 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3092 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3093 3094 case AMDGPU::V_SUBBREV_U32_e32: 3095 case AMDGPU::V_SUBBREV_U32_e64: 3096 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3097 case AMDGPU::V_SUBBREV_U32_e32_vi: 3098 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3099 case AMDGPU::V_SUBBREV_U32_e64_vi: 3100 3101 case AMDGPU::V_SUBREV_U32_e32: 3102 case AMDGPU::V_SUBREV_U32_e64: 3103 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3104 case AMDGPU::V_SUBREV_U32_e32_vi: 3105 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3106 case AMDGPU::V_SUBREV_U32_e64_vi: 3107 3108 case AMDGPU::V_SUBREV_F16_e32: 3109 case AMDGPU::V_SUBREV_F16_e64: 3110 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3111 case AMDGPU::V_SUBREV_F16_e32_vi: 3112 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3113 case AMDGPU::V_SUBREV_F16_e64_vi: 3114 3115 case AMDGPU::V_SUBREV_U16_e32: 3116 case AMDGPU::V_SUBREV_U16_e64: 3117 case AMDGPU::V_SUBREV_U16_e32_vi: 3118 case AMDGPU::V_SUBREV_U16_e64_vi: 3119 3120 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3121 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3122 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3123 3124 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3125 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3126 3127 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3128 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3129 3130 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3131 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3132 3133 case AMDGPU::V_LSHRREV_B32_e32: 3134 case AMDGPU::V_LSHRREV_B32_e64: 3135 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3136 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3137 case AMDGPU::V_LSHRREV_B32_e32_vi: 3138 case AMDGPU::V_LSHRREV_B32_e64_vi: 3139 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3140 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3141 3142 case AMDGPU::V_ASHRREV_I32_e32: 3143 case AMDGPU::V_ASHRREV_I32_e64: 3144 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3145 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3146 case AMDGPU::V_ASHRREV_I32_e32_vi: 3147 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3148 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3149 case AMDGPU::V_ASHRREV_I32_e64_vi: 3150 3151 case AMDGPU::V_LSHLREV_B32_e32: 3152 case AMDGPU::V_LSHLREV_B32_e64: 3153 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3154 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3155 case AMDGPU::V_LSHLREV_B32_e32_vi: 3156 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3157 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3158 case AMDGPU::V_LSHLREV_B32_e64_vi: 3159 3160 case AMDGPU::V_LSHLREV_B16_e32: 3161 case AMDGPU::V_LSHLREV_B16_e64: 3162 case AMDGPU::V_LSHLREV_B16_e32_vi: 3163 case AMDGPU::V_LSHLREV_B16_e64_vi: 3164 case AMDGPU::V_LSHLREV_B16_gfx10: 3165 3166 case AMDGPU::V_LSHRREV_B16_e32: 3167 case AMDGPU::V_LSHRREV_B16_e64: 3168 case AMDGPU::V_LSHRREV_B16_e32_vi: 3169 case AMDGPU::V_LSHRREV_B16_e64_vi: 3170 case AMDGPU::V_LSHRREV_B16_gfx10: 3171 3172 case AMDGPU::V_ASHRREV_I16_e32: 3173 case AMDGPU::V_ASHRREV_I16_e64: 3174 case AMDGPU::V_ASHRREV_I16_e32_vi: 3175 case AMDGPU::V_ASHRREV_I16_e64_vi: 3176 case AMDGPU::V_ASHRREV_I16_gfx10: 3177 3178 case AMDGPU::V_LSHLREV_B64: 3179 case AMDGPU::V_LSHLREV_B64_gfx10: 3180 case AMDGPU::V_LSHLREV_B64_vi: 3181 3182 case AMDGPU::V_LSHRREV_B64: 3183 case AMDGPU::V_LSHRREV_B64_gfx10: 3184 case AMDGPU::V_LSHRREV_B64_vi: 3185 3186 case AMDGPU::V_ASHRREV_I64: 3187 case AMDGPU::V_ASHRREV_I64_gfx10: 3188 case AMDGPU::V_ASHRREV_I64_vi: 3189 3190 case AMDGPU::V_PK_LSHLREV_B16: 3191 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3192 case AMDGPU::V_PK_LSHLREV_B16_vi: 3193 3194 case AMDGPU::V_PK_LSHRREV_B16: 3195 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3196 case AMDGPU::V_PK_LSHRREV_B16_vi: 3197 case AMDGPU::V_PK_ASHRREV_I16: 3198 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3199 case AMDGPU::V_PK_ASHRREV_I16_vi: 3200 return true; 3201 default: 3202 return false; 3203 } 3204 } 3205 3206 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3207 3208 using namespace SIInstrFlags; 3209 const unsigned Opcode = Inst.getOpcode(); 3210 const MCInstrDesc &Desc = MII.get(Opcode); 3211 3212 // lds_direct register is defined so that it can be used 3213 // with 9-bit operands only. Ignore encodings which do not accept these. 3214 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3215 return true; 3216 3217 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3218 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3219 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3220 3221 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3222 3223 // lds_direct cannot be specified as either src1 or src2. 3224 for (int SrcIdx : SrcIndices) { 3225 if (SrcIdx == -1) break; 3226 const MCOperand &Src = Inst.getOperand(SrcIdx); 3227 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3228 return false; 3229 } 3230 } 3231 3232 if (Src0Idx == -1) 3233 return true; 3234 3235 const MCOperand &Src = Inst.getOperand(Src0Idx); 3236 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3237 return true; 3238 3239 // lds_direct is specified as src0. Check additional limitations. 3240 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3241 } 3242 3243 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3244 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3245 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3246 if (Op.isFlatOffset()) 3247 return Op.getStartLoc(); 3248 } 3249 return getLoc(); 3250 } 3251 3252 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3253 const OperandVector &Operands) { 3254 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3255 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3256 return true; 3257 3258 auto Opcode = Inst.getOpcode(); 3259 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3260 assert(OpNum != -1); 3261 3262 const auto &Op = Inst.getOperand(OpNum); 3263 if (!hasFlatOffsets() && Op.getImm() != 0) { 3264 Error(getFlatOffsetLoc(Operands), 3265 "flat offset modifier is not supported on this GPU"); 3266 return false; 3267 } 3268 3269 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3270 // For FLAT segment the offset must be positive; 3271 // MSB is ignored and forced to zero. 3272 unsigned OffsetSize = isGFX9() ? 13 : 12; 3273 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3274 if (!isIntN(OffsetSize, Op.getImm())) { 3275 Error(getFlatOffsetLoc(Operands), 3276 isGFX9() ? "expected a 13-bit signed offset" : 3277 "expected a 12-bit signed offset"); 3278 return false; 3279 } 3280 } else { 3281 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3282 Error(getFlatOffsetLoc(Operands), 3283 isGFX9() ? "expected a 12-bit unsigned offset" : 3284 "expected an 11-bit unsigned offset"); 3285 return false; 3286 } 3287 } 3288 3289 return true; 3290 } 3291 3292 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3293 unsigned Opcode = Inst.getOpcode(); 3294 const MCInstrDesc &Desc = MII.get(Opcode); 3295 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3296 return true; 3297 3298 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3299 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3300 3301 const int OpIndices[] = { Src0Idx, Src1Idx }; 3302 3303 unsigned NumExprs = 0; 3304 unsigned NumLiterals = 0; 3305 uint32_t LiteralValue; 3306 3307 for (int OpIdx : OpIndices) { 3308 if (OpIdx == -1) break; 3309 3310 const MCOperand &MO = Inst.getOperand(OpIdx); 3311 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3312 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3313 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3314 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3315 if (NumLiterals == 0 || LiteralValue != Value) { 3316 LiteralValue = Value; 3317 ++NumLiterals; 3318 } 3319 } else if (MO.isExpr()) { 3320 ++NumExprs; 3321 } 3322 } 3323 } 3324 3325 return NumLiterals + NumExprs <= 1; 3326 } 3327 3328 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3329 const unsigned Opc = Inst.getOpcode(); 3330 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3331 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3332 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3333 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3334 3335 if (OpSel & ~3) 3336 return false; 3337 } 3338 return true; 3339 } 3340 3341 // Check if VCC register matches wavefront size 3342 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3343 auto FB = getFeatureBits(); 3344 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3345 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3346 } 3347 3348 // VOP3 literal is only allowed in GFX10+ and only one can be used 3349 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3350 unsigned Opcode = Inst.getOpcode(); 3351 const MCInstrDesc &Desc = MII.get(Opcode); 3352 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3353 return true; 3354 3355 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3356 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3357 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3358 3359 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3360 3361 unsigned NumExprs = 0; 3362 unsigned NumLiterals = 0; 3363 uint32_t LiteralValue; 3364 3365 for (int OpIdx : OpIndices) { 3366 if (OpIdx == -1) break; 3367 3368 const MCOperand &MO = Inst.getOperand(OpIdx); 3369 if (!MO.isImm() && !MO.isExpr()) 3370 continue; 3371 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3372 continue; 3373 3374 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3375 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3376 return false; 3377 3378 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3379 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3380 if (NumLiterals == 0 || LiteralValue != Value) { 3381 LiteralValue = Value; 3382 ++NumLiterals; 3383 } 3384 } else if (MO.isExpr()) { 3385 ++NumExprs; 3386 } 3387 } 3388 NumLiterals += NumExprs; 3389 3390 return !NumLiterals || 3391 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3392 } 3393 3394 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3395 const SMLoc &IDLoc, 3396 const OperandVector &Operands) { 3397 if (!validateLdsDirect(Inst)) { 3398 Error(IDLoc, 3399 "invalid use of lds_direct"); 3400 return false; 3401 } 3402 if (!validateSOPLiteral(Inst)) { 3403 Error(IDLoc, 3404 "only one literal operand is allowed"); 3405 return false; 3406 } 3407 if (!validateVOP3Literal(Inst)) { 3408 Error(IDLoc, 3409 "invalid literal operand"); 3410 return false; 3411 } 3412 if (!validateConstantBusLimitations(Inst)) { 3413 Error(IDLoc, 3414 "invalid operand (violates constant bus restrictions)"); 3415 return false; 3416 } 3417 if (!validateEarlyClobberLimitations(Inst)) { 3418 Error(IDLoc, 3419 "destination must be different than all sources"); 3420 return false; 3421 } 3422 if (!validateIntClampSupported(Inst)) { 3423 Error(IDLoc, 3424 "integer clamping is not supported on this GPU"); 3425 return false; 3426 } 3427 if (!validateOpSel(Inst)) { 3428 Error(IDLoc, 3429 "invalid op_sel operand"); 3430 return false; 3431 } 3432 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3433 if (!validateMIMGD16(Inst)) { 3434 Error(IDLoc, 3435 "d16 modifier is not supported on this GPU"); 3436 return false; 3437 } 3438 if (!validateMIMGDim(Inst)) { 3439 Error(IDLoc, "dim modifier is required on this GPU"); 3440 return false; 3441 } 3442 if (!validateMIMGDataSize(Inst)) { 3443 Error(IDLoc, 3444 "image data size does not match dmask and tfe"); 3445 return false; 3446 } 3447 if (!validateMIMGAddrSize(Inst)) { 3448 Error(IDLoc, 3449 "image address size does not match dim and a16"); 3450 return false; 3451 } 3452 if (!validateMIMGAtomicDMask(Inst)) { 3453 Error(IDLoc, 3454 "invalid atomic image dmask"); 3455 return false; 3456 } 3457 if (!validateMIMGGatherDMask(Inst)) { 3458 Error(IDLoc, 3459 "invalid image_gather dmask: only one bit must be set"); 3460 return false; 3461 } 3462 if (!validateFlatOffset(Inst, Operands)) { 3463 return false; 3464 } 3465 3466 return true; 3467 } 3468 3469 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3470 const FeatureBitset &FBS, 3471 unsigned VariantID = 0); 3472 3473 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3474 OperandVector &Operands, 3475 MCStreamer &Out, 3476 uint64_t &ErrorInfo, 3477 bool MatchingInlineAsm) { 3478 MCInst Inst; 3479 unsigned Result = Match_Success; 3480 for (auto Variant : getMatchedVariants()) { 3481 uint64_t EI; 3482 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3483 Variant); 3484 // We order match statuses from least to most specific. We use most specific 3485 // status as resulting 3486 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3487 if ((R == Match_Success) || 3488 (R == Match_PreferE32) || 3489 (R == Match_MissingFeature && Result != Match_PreferE32) || 3490 (R == Match_InvalidOperand && Result != Match_MissingFeature 3491 && Result != Match_PreferE32) || 3492 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3493 && Result != Match_MissingFeature 3494 && Result != Match_PreferE32)) { 3495 Result = R; 3496 ErrorInfo = EI; 3497 } 3498 if (R == Match_Success) 3499 break; 3500 } 3501 3502 switch (Result) { 3503 default: break; 3504 case Match_Success: 3505 if (!validateInstruction(Inst, IDLoc, Operands)) { 3506 return true; 3507 } 3508 Inst.setLoc(IDLoc); 3509 Out.EmitInstruction(Inst, getSTI()); 3510 return false; 3511 3512 case Match_MissingFeature: 3513 return Error(IDLoc, "instruction not supported on this GPU"); 3514 3515 case Match_MnemonicFail: { 3516 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3517 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3518 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3519 return Error(IDLoc, "invalid instruction" + Suggestion, 3520 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3521 } 3522 3523 case Match_InvalidOperand: { 3524 SMLoc ErrorLoc = IDLoc; 3525 if (ErrorInfo != ~0ULL) { 3526 if (ErrorInfo >= Operands.size()) { 3527 return Error(IDLoc, "too few operands for instruction"); 3528 } 3529 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3530 if (ErrorLoc == SMLoc()) 3531 ErrorLoc = IDLoc; 3532 } 3533 return Error(ErrorLoc, "invalid operand for instruction"); 3534 } 3535 3536 case Match_PreferE32: 3537 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3538 "should be encoded as e32"); 3539 } 3540 llvm_unreachable("Implement any new match types added!"); 3541 } 3542 3543 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3544 int64_t Tmp = -1; 3545 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3546 return true; 3547 } 3548 if (getParser().parseAbsoluteExpression(Tmp)) { 3549 return true; 3550 } 3551 Ret = static_cast<uint32_t>(Tmp); 3552 return false; 3553 } 3554 3555 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3556 uint32_t &Minor) { 3557 if (ParseAsAbsoluteExpression(Major)) 3558 return TokError("invalid major version"); 3559 3560 if (getLexer().isNot(AsmToken::Comma)) 3561 return TokError("minor version number required, comma expected"); 3562 Lex(); 3563 3564 if (ParseAsAbsoluteExpression(Minor)) 3565 return TokError("invalid minor version"); 3566 3567 return false; 3568 } 3569 3570 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3571 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3572 return TokError("directive only supported for amdgcn architecture"); 3573 3574 std::string Target; 3575 3576 SMLoc TargetStart = getTok().getLoc(); 3577 if (getParser().parseEscapedString(Target)) 3578 return true; 3579 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3580 3581 std::string ExpectedTarget; 3582 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3583 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3584 3585 if (Target != ExpectedTargetOS.str()) 3586 return getParser().Error(TargetRange.Start, "target must match options", 3587 TargetRange); 3588 3589 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3590 return false; 3591 } 3592 3593 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3594 return getParser().Error(Range.Start, "value out of range", Range); 3595 } 3596 3597 bool AMDGPUAsmParser::calculateGPRBlocks( 3598 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3599 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3600 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3601 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3602 // TODO(scott.linder): These calculations are duplicated from 3603 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3604 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3605 3606 unsigned NumVGPRs = NextFreeVGPR; 3607 unsigned NumSGPRs = NextFreeSGPR; 3608 3609 if (Version.Major >= 10) 3610 NumSGPRs = 0; 3611 else { 3612 unsigned MaxAddressableNumSGPRs = 3613 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3614 3615 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3616 NumSGPRs > MaxAddressableNumSGPRs) 3617 return OutOfRangeError(SGPRRange); 3618 3619 NumSGPRs += 3620 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3621 3622 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3623 NumSGPRs > MaxAddressableNumSGPRs) 3624 return OutOfRangeError(SGPRRange); 3625 3626 if (Features.test(FeatureSGPRInitBug)) 3627 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3628 } 3629 3630 VGPRBlocks = 3631 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3632 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3633 3634 return false; 3635 } 3636 3637 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3638 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3639 return TokError("directive only supported for amdgcn architecture"); 3640 3641 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3642 return TokError("directive only supported for amdhsa OS"); 3643 3644 StringRef KernelName; 3645 if (getParser().parseIdentifier(KernelName)) 3646 return true; 3647 3648 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3649 3650 StringSet<> Seen; 3651 3652 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3653 3654 SMRange VGPRRange; 3655 uint64_t NextFreeVGPR = 0; 3656 SMRange SGPRRange; 3657 uint64_t NextFreeSGPR = 0; 3658 unsigned UserSGPRCount = 0; 3659 bool ReserveVCC = true; 3660 bool ReserveFlatScr = true; 3661 bool ReserveXNACK = hasXNACK(); 3662 Optional<bool> EnableWavefrontSize32; 3663 3664 while (true) { 3665 while (getLexer().is(AsmToken::EndOfStatement)) 3666 Lex(); 3667 3668 if (getLexer().isNot(AsmToken::Identifier)) 3669 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3670 3671 StringRef ID = getTok().getIdentifier(); 3672 SMRange IDRange = getTok().getLocRange(); 3673 Lex(); 3674 3675 if (ID == ".end_amdhsa_kernel") 3676 break; 3677 3678 if (Seen.find(ID) != Seen.end()) 3679 return TokError(".amdhsa_ directives cannot be repeated"); 3680 Seen.insert(ID); 3681 3682 SMLoc ValStart = getTok().getLoc(); 3683 int64_t IVal; 3684 if (getParser().parseAbsoluteExpression(IVal)) 3685 return true; 3686 SMLoc ValEnd = getTok().getLoc(); 3687 SMRange ValRange = SMRange(ValStart, ValEnd); 3688 3689 if (IVal < 0) 3690 return OutOfRangeError(ValRange); 3691 3692 uint64_t Val = IVal; 3693 3694 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3695 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3696 return OutOfRangeError(RANGE); \ 3697 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3698 3699 if (ID == ".amdhsa_group_segment_fixed_size") { 3700 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3701 return OutOfRangeError(ValRange); 3702 KD.group_segment_fixed_size = Val; 3703 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3704 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3705 return OutOfRangeError(ValRange); 3706 KD.private_segment_fixed_size = Val; 3707 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3708 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3709 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3710 Val, ValRange); 3711 if (Val) 3712 UserSGPRCount += 4; 3713 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3714 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3715 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3716 ValRange); 3717 if (Val) 3718 UserSGPRCount += 2; 3719 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3720 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3721 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3722 ValRange); 3723 if (Val) 3724 UserSGPRCount += 2; 3725 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3726 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3727 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3728 Val, ValRange); 3729 if (Val) 3730 UserSGPRCount += 2; 3731 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3732 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3733 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3734 ValRange); 3735 if (Val) 3736 UserSGPRCount += 2; 3737 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3738 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3739 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3740 ValRange); 3741 if (Val) 3742 UserSGPRCount += 2; 3743 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3744 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3745 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3746 Val, ValRange); 3747 if (Val) 3748 UserSGPRCount += 1; 3749 } else if (ID == ".amdhsa_wavefront_size32") { 3750 if (IVersion.Major < 10) 3751 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3752 IDRange); 3753 EnableWavefrontSize32 = Val; 3754 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3755 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3756 Val, ValRange); 3757 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3758 PARSE_BITS_ENTRY( 3759 KD.compute_pgm_rsrc2, 3760 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3761 ValRange); 3762 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3763 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3764 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3765 ValRange); 3766 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3767 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3768 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3769 ValRange); 3770 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3771 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3772 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3773 ValRange); 3774 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3775 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3776 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3777 ValRange); 3778 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3779 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3780 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3781 ValRange); 3782 } else if (ID == ".amdhsa_next_free_vgpr") { 3783 VGPRRange = ValRange; 3784 NextFreeVGPR = Val; 3785 } else if (ID == ".amdhsa_next_free_sgpr") { 3786 SGPRRange = ValRange; 3787 NextFreeSGPR = Val; 3788 } else if (ID == ".amdhsa_reserve_vcc") { 3789 if (!isUInt<1>(Val)) 3790 return OutOfRangeError(ValRange); 3791 ReserveVCC = Val; 3792 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3793 if (IVersion.Major < 7) 3794 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3795 IDRange); 3796 if (!isUInt<1>(Val)) 3797 return OutOfRangeError(ValRange); 3798 ReserveFlatScr = Val; 3799 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3800 if (IVersion.Major < 8) 3801 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3802 IDRange); 3803 if (!isUInt<1>(Val)) 3804 return OutOfRangeError(ValRange); 3805 ReserveXNACK = Val; 3806 } else if (ID == ".amdhsa_float_round_mode_32") { 3807 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3808 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3809 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3810 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3811 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3812 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3813 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3814 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3815 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3816 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3817 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3818 ValRange); 3819 } else if (ID == ".amdhsa_dx10_clamp") { 3820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3821 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3822 } else if (ID == ".amdhsa_ieee_mode") { 3823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3824 Val, ValRange); 3825 } else if (ID == ".amdhsa_fp16_overflow") { 3826 if (IVersion.Major < 9) 3827 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3828 IDRange); 3829 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3830 ValRange); 3831 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3832 if (IVersion.Major < 10) 3833 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3834 IDRange); 3835 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3836 ValRange); 3837 } else if (ID == ".amdhsa_memory_ordered") { 3838 if (IVersion.Major < 10) 3839 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3840 IDRange); 3841 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3842 ValRange); 3843 } else if (ID == ".amdhsa_forward_progress") { 3844 if (IVersion.Major < 10) 3845 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3846 IDRange); 3847 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3848 ValRange); 3849 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3850 PARSE_BITS_ENTRY( 3851 KD.compute_pgm_rsrc2, 3852 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3853 ValRange); 3854 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3855 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3856 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3857 Val, ValRange); 3858 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3859 PARSE_BITS_ENTRY( 3860 KD.compute_pgm_rsrc2, 3861 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3862 ValRange); 3863 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3864 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3865 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3866 Val, ValRange); 3867 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3868 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3869 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3870 Val, ValRange); 3871 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3872 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3873 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3874 Val, ValRange); 3875 } else if (ID == ".amdhsa_exception_int_div_zero") { 3876 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3877 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3878 Val, ValRange); 3879 } else { 3880 return getParser().Error(IDRange.Start, 3881 "unknown .amdhsa_kernel directive", IDRange); 3882 } 3883 3884 #undef PARSE_BITS_ENTRY 3885 } 3886 3887 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3888 return TokError(".amdhsa_next_free_vgpr directive is required"); 3889 3890 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3891 return TokError(".amdhsa_next_free_sgpr directive is required"); 3892 3893 unsigned VGPRBlocks; 3894 unsigned SGPRBlocks; 3895 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3896 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3897 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3898 SGPRBlocks)) 3899 return true; 3900 3901 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3902 VGPRBlocks)) 3903 return OutOfRangeError(VGPRRange); 3904 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3905 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3906 3907 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3908 SGPRBlocks)) 3909 return OutOfRangeError(SGPRRange); 3910 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3911 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3912 SGPRBlocks); 3913 3914 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3915 return TokError("too many user SGPRs enabled"); 3916 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3917 UserSGPRCount); 3918 3919 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3920 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3921 ReserveFlatScr, ReserveXNACK); 3922 return false; 3923 } 3924 3925 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3926 uint32_t Major; 3927 uint32_t Minor; 3928 3929 if (ParseDirectiveMajorMinor(Major, Minor)) 3930 return true; 3931 3932 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3933 return false; 3934 } 3935 3936 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3937 uint32_t Major; 3938 uint32_t Minor; 3939 uint32_t Stepping; 3940 StringRef VendorName; 3941 StringRef ArchName; 3942 3943 // If this directive has no arguments, then use the ISA version for the 3944 // targeted GPU. 3945 if (getLexer().is(AsmToken::EndOfStatement)) { 3946 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3947 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3948 ISA.Stepping, 3949 "AMD", "AMDGPU"); 3950 return false; 3951 } 3952 3953 if (ParseDirectiveMajorMinor(Major, Minor)) 3954 return true; 3955 3956 if (getLexer().isNot(AsmToken::Comma)) 3957 return TokError("stepping version number required, comma expected"); 3958 Lex(); 3959 3960 if (ParseAsAbsoluteExpression(Stepping)) 3961 return TokError("invalid stepping version"); 3962 3963 if (getLexer().isNot(AsmToken::Comma)) 3964 return TokError("vendor name required, comma expected"); 3965 Lex(); 3966 3967 if (getLexer().isNot(AsmToken::String)) 3968 return TokError("invalid vendor name"); 3969 3970 VendorName = getLexer().getTok().getStringContents(); 3971 Lex(); 3972 3973 if (getLexer().isNot(AsmToken::Comma)) 3974 return TokError("arch name required, comma expected"); 3975 Lex(); 3976 3977 if (getLexer().isNot(AsmToken::String)) 3978 return TokError("invalid arch name"); 3979 3980 ArchName = getLexer().getTok().getStringContents(); 3981 Lex(); 3982 3983 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3984 VendorName, ArchName); 3985 return false; 3986 } 3987 3988 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3989 amd_kernel_code_t &Header) { 3990 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3991 // assembly for backwards compatibility. 3992 if (ID == "max_scratch_backing_memory_byte_size") { 3993 Parser.eatToEndOfStatement(); 3994 return false; 3995 } 3996 3997 SmallString<40> ErrStr; 3998 raw_svector_ostream Err(ErrStr); 3999 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4000 return TokError(Err.str()); 4001 } 4002 Lex(); 4003 4004 if (ID == "enable_wavefront_size32") { 4005 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4006 if (!isGFX10()) 4007 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4008 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4009 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4010 } else { 4011 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4012 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4013 } 4014 } 4015 4016 if (ID == "wavefront_size") { 4017 if (Header.wavefront_size == 5) { 4018 if (!isGFX10()) 4019 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4020 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4021 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4022 } else if (Header.wavefront_size == 6) { 4023 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4024 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4025 } 4026 } 4027 4028 if (ID == "enable_wgp_mode") { 4029 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4030 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4031 } 4032 4033 if (ID == "enable_mem_ordered") { 4034 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4035 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4036 } 4037 4038 if (ID == "enable_fwd_progress") { 4039 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4040 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4041 } 4042 4043 return false; 4044 } 4045 4046 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4047 amd_kernel_code_t Header; 4048 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4049 4050 while (true) { 4051 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4052 // will set the current token to EndOfStatement. 4053 while(getLexer().is(AsmToken::EndOfStatement)) 4054 Lex(); 4055 4056 if (getLexer().isNot(AsmToken::Identifier)) 4057 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4058 4059 StringRef ID = getLexer().getTok().getIdentifier(); 4060 Lex(); 4061 4062 if (ID == ".end_amd_kernel_code_t") 4063 break; 4064 4065 if (ParseAMDKernelCodeTValue(ID, Header)) 4066 return true; 4067 } 4068 4069 getTargetStreamer().EmitAMDKernelCodeT(Header); 4070 4071 return false; 4072 } 4073 4074 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4075 if (getLexer().isNot(AsmToken::Identifier)) 4076 return TokError("expected symbol name"); 4077 4078 StringRef KernelName = Parser.getTok().getString(); 4079 4080 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4081 ELF::STT_AMDGPU_HSA_KERNEL); 4082 Lex(); 4083 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4084 KernelScope.initialize(getContext()); 4085 return false; 4086 } 4087 4088 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4089 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4090 return Error(getParser().getTok().getLoc(), 4091 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4092 "architectures"); 4093 } 4094 4095 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4096 4097 std::string ISAVersionStringFromSTI; 4098 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4099 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4100 4101 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4102 return Error(getParser().getTok().getLoc(), 4103 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4104 "arguments specified through the command line"); 4105 } 4106 4107 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4108 Lex(); 4109 4110 return false; 4111 } 4112 4113 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4114 const char *AssemblerDirectiveBegin; 4115 const char *AssemblerDirectiveEnd; 4116 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4117 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4118 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4119 HSAMD::V3::AssemblerDirectiveEnd) 4120 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4121 HSAMD::AssemblerDirectiveEnd); 4122 4123 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4124 return Error(getParser().getTok().getLoc(), 4125 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4126 "not available on non-amdhsa OSes")).str()); 4127 } 4128 4129 std::string HSAMetadataString; 4130 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4131 HSAMetadataString)) 4132 return true; 4133 4134 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4135 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4136 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4137 } else { 4138 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4139 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4140 } 4141 4142 return false; 4143 } 4144 4145 /// Common code to parse out a block of text (typically YAML) between start and 4146 /// end directives. 4147 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4148 const char *AssemblerDirectiveEnd, 4149 std::string &CollectString) { 4150 4151 raw_string_ostream CollectStream(CollectString); 4152 4153 getLexer().setSkipSpace(false); 4154 4155 bool FoundEnd = false; 4156 while (!getLexer().is(AsmToken::Eof)) { 4157 while (getLexer().is(AsmToken::Space)) { 4158 CollectStream << getLexer().getTok().getString(); 4159 Lex(); 4160 } 4161 4162 if (getLexer().is(AsmToken::Identifier)) { 4163 StringRef ID = getLexer().getTok().getIdentifier(); 4164 if (ID == AssemblerDirectiveEnd) { 4165 Lex(); 4166 FoundEnd = true; 4167 break; 4168 } 4169 } 4170 4171 CollectStream << Parser.parseStringToEndOfStatement() 4172 << getContext().getAsmInfo()->getSeparatorString(); 4173 4174 Parser.eatToEndOfStatement(); 4175 } 4176 4177 getLexer().setSkipSpace(true); 4178 4179 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4180 return TokError(Twine("expected directive ") + 4181 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4182 } 4183 4184 CollectStream.flush(); 4185 return false; 4186 } 4187 4188 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4189 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4190 std::string String; 4191 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4192 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4193 return true; 4194 4195 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4196 if (!PALMetadata->setFromString(String)) 4197 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4198 return false; 4199 } 4200 4201 /// Parse the assembler directive for old linear-format PAL metadata. 4202 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4203 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4204 return Error(getParser().getTok().getLoc(), 4205 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4206 "not available on non-amdpal OSes")).str()); 4207 } 4208 4209 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4210 PALMetadata->setLegacy(); 4211 for (;;) { 4212 uint32_t Key, Value; 4213 if (ParseAsAbsoluteExpression(Key)) { 4214 return TokError(Twine("invalid value in ") + 4215 Twine(PALMD::AssemblerDirective)); 4216 } 4217 if (getLexer().isNot(AsmToken::Comma)) { 4218 return TokError(Twine("expected an even number of values in ") + 4219 Twine(PALMD::AssemblerDirective)); 4220 } 4221 Lex(); 4222 if (ParseAsAbsoluteExpression(Value)) { 4223 return TokError(Twine("invalid value in ") + 4224 Twine(PALMD::AssemblerDirective)); 4225 } 4226 PALMetadata->setRegister(Key, Value); 4227 if (getLexer().isNot(AsmToken::Comma)) 4228 break; 4229 Lex(); 4230 } 4231 return false; 4232 } 4233 4234 /// ParseDirectiveAMDGPULDS 4235 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4236 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4237 if (getParser().checkForValidSection()) 4238 return true; 4239 4240 StringRef Name; 4241 SMLoc NameLoc = getLexer().getLoc(); 4242 if (getParser().parseIdentifier(Name)) 4243 return TokError("expected identifier in directive"); 4244 4245 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4246 if (parseToken(AsmToken::Comma, "expected ','")) 4247 return true; 4248 4249 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4250 4251 int64_t Size; 4252 SMLoc SizeLoc = getLexer().getLoc(); 4253 if (getParser().parseAbsoluteExpression(Size)) 4254 return true; 4255 if (Size < 0) 4256 return Error(SizeLoc, "size must be non-negative"); 4257 if (Size > LocalMemorySize) 4258 return Error(SizeLoc, "size is too large"); 4259 4260 int64_t Align = 4; 4261 if (getLexer().is(AsmToken::Comma)) { 4262 Lex(); 4263 SMLoc AlignLoc = getLexer().getLoc(); 4264 if (getParser().parseAbsoluteExpression(Align)) 4265 return true; 4266 if (Align < 0 || !isPowerOf2_64(Align)) 4267 return Error(AlignLoc, "alignment must be a power of two"); 4268 4269 // Alignment larger than the size of LDS is possible in theory, as long 4270 // as the linker manages to place to symbol at address 0, but we do want 4271 // to make sure the alignment fits nicely into a 32-bit integer. 4272 if (Align >= 1u << 31) 4273 return Error(AlignLoc, "alignment is too large"); 4274 } 4275 4276 if (parseToken(AsmToken::EndOfStatement, 4277 "unexpected token in '.amdgpu_lds' directive")) 4278 return true; 4279 4280 Symbol->redefineIfPossible(); 4281 if (!Symbol->isUndefined()) 4282 return Error(NameLoc, "invalid symbol redefinition"); 4283 4284 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4285 return false; 4286 } 4287 4288 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4289 StringRef IDVal = DirectiveID.getString(); 4290 4291 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4292 if (IDVal == ".amdgcn_target") 4293 return ParseDirectiveAMDGCNTarget(); 4294 4295 if (IDVal == ".amdhsa_kernel") 4296 return ParseDirectiveAMDHSAKernel(); 4297 4298 // TODO: Restructure/combine with PAL metadata directive. 4299 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4300 return ParseDirectiveHSAMetadata(); 4301 } else { 4302 if (IDVal == ".hsa_code_object_version") 4303 return ParseDirectiveHSACodeObjectVersion(); 4304 4305 if (IDVal == ".hsa_code_object_isa") 4306 return ParseDirectiveHSACodeObjectISA(); 4307 4308 if (IDVal == ".amd_kernel_code_t") 4309 return ParseDirectiveAMDKernelCodeT(); 4310 4311 if (IDVal == ".amdgpu_hsa_kernel") 4312 return ParseDirectiveAMDGPUHsaKernel(); 4313 4314 if (IDVal == ".amd_amdgpu_isa") 4315 return ParseDirectiveISAVersion(); 4316 4317 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4318 return ParseDirectiveHSAMetadata(); 4319 } 4320 4321 if (IDVal == ".amdgpu_lds") 4322 return ParseDirectiveAMDGPULDS(); 4323 4324 if (IDVal == PALMD::AssemblerDirectiveBegin) 4325 return ParseDirectivePALMetadataBegin(); 4326 4327 if (IDVal == PALMD::AssemblerDirective) 4328 return ParseDirectivePALMetadata(); 4329 4330 return true; 4331 } 4332 4333 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4334 unsigned RegNo) const { 4335 4336 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4337 R.isValid(); ++R) { 4338 if (*R == RegNo) 4339 return isGFX9() || isGFX10(); 4340 } 4341 4342 // GFX10 has 2 more SGPRs 104 and 105. 4343 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4344 R.isValid(); ++R) { 4345 if (*R == RegNo) 4346 return hasSGPR104_SGPR105(); 4347 } 4348 4349 switch (RegNo) { 4350 case AMDGPU::SRC_SHARED_BASE: 4351 case AMDGPU::SRC_SHARED_LIMIT: 4352 case AMDGPU::SRC_PRIVATE_BASE: 4353 case AMDGPU::SRC_PRIVATE_LIMIT: 4354 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4355 return !isCI() && !isSI() && !isVI(); 4356 case AMDGPU::TBA: 4357 case AMDGPU::TBA_LO: 4358 case AMDGPU::TBA_HI: 4359 case AMDGPU::TMA: 4360 case AMDGPU::TMA_LO: 4361 case AMDGPU::TMA_HI: 4362 return !isGFX9() && !isGFX10(); 4363 case AMDGPU::XNACK_MASK: 4364 case AMDGPU::XNACK_MASK_LO: 4365 case AMDGPU::XNACK_MASK_HI: 4366 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4367 case AMDGPU::SGPR_NULL: 4368 return isGFX10(); 4369 default: 4370 break; 4371 } 4372 4373 if (isCI()) 4374 return true; 4375 4376 if (isSI() || isGFX10()) { 4377 // No flat_scr on SI. 4378 // On GFX10 flat scratch is not a valid register operand and can only be 4379 // accessed with s_setreg/s_getreg. 4380 switch (RegNo) { 4381 case AMDGPU::FLAT_SCR: 4382 case AMDGPU::FLAT_SCR_LO: 4383 case AMDGPU::FLAT_SCR_HI: 4384 return false; 4385 default: 4386 return true; 4387 } 4388 } 4389 4390 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4391 // SI/CI have. 4392 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4393 R.isValid(); ++R) { 4394 if (*R == RegNo) 4395 return hasSGPR102_SGPR103(); 4396 } 4397 4398 return true; 4399 } 4400 4401 OperandMatchResultTy 4402 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4403 OperandMode Mode) { 4404 // Try to parse with a custom parser 4405 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4406 4407 // If we successfully parsed the operand or if there as an error parsing, 4408 // we are done. 4409 // 4410 // If we are parsing after we reach EndOfStatement then this means we 4411 // are appending default values to the Operands list. This is only done 4412 // by custom parser, so we shouldn't continue on to the generic parsing. 4413 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4414 getLexer().is(AsmToken::EndOfStatement)) 4415 return ResTy; 4416 4417 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4418 unsigned Prefix = Operands.size(); 4419 SMLoc LBraceLoc = getTok().getLoc(); 4420 Parser.Lex(); // eat the '[' 4421 4422 for (;;) { 4423 ResTy = parseReg(Operands); 4424 if (ResTy != MatchOperand_Success) 4425 return ResTy; 4426 4427 if (getLexer().is(AsmToken::RBrac)) 4428 break; 4429 4430 if (getLexer().isNot(AsmToken::Comma)) 4431 return MatchOperand_ParseFail; 4432 Parser.Lex(); 4433 } 4434 4435 if (Operands.size() - Prefix > 1) { 4436 Operands.insert(Operands.begin() + Prefix, 4437 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4438 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4439 getTok().getLoc())); 4440 } 4441 4442 Parser.Lex(); // eat the ']' 4443 return MatchOperand_Success; 4444 } 4445 4446 return parseRegOrImm(Operands); 4447 } 4448 4449 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4450 // Clear any forced encodings from the previous instruction. 4451 setForcedEncodingSize(0); 4452 setForcedDPP(false); 4453 setForcedSDWA(false); 4454 4455 if (Name.endswith("_e64")) { 4456 setForcedEncodingSize(64); 4457 return Name.substr(0, Name.size() - 4); 4458 } else if (Name.endswith("_e32")) { 4459 setForcedEncodingSize(32); 4460 return Name.substr(0, Name.size() - 4); 4461 } else if (Name.endswith("_dpp")) { 4462 setForcedDPP(true); 4463 return Name.substr(0, Name.size() - 4); 4464 } else if (Name.endswith("_sdwa")) { 4465 setForcedSDWA(true); 4466 return Name.substr(0, Name.size() - 5); 4467 } 4468 return Name; 4469 } 4470 4471 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4472 StringRef Name, 4473 SMLoc NameLoc, OperandVector &Operands) { 4474 // Add the instruction mnemonic 4475 Name = parseMnemonicSuffix(Name); 4476 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4477 4478 bool IsMIMG = Name.startswith("image_"); 4479 4480 while (!getLexer().is(AsmToken::EndOfStatement)) { 4481 OperandMode Mode = OperandMode_Default; 4482 if (IsMIMG && isGFX10() && Operands.size() == 2) 4483 Mode = OperandMode_NSA; 4484 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4485 4486 // Eat the comma or space if there is one. 4487 if (getLexer().is(AsmToken::Comma)) 4488 Parser.Lex(); 4489 4490 switch (Res) { 4491 case MatchOperand_Success: break; 4492 case MatchOperand_ParseFail: 4493 // FIXME: use real operand location rather than the current location. 4494 Error(getLexer().getLoc(), "failed parsing operand."); 4495 while (!getLexer().is(AsmToken::EndOfStatement)) { 4496 Parser.Lex(); 4497 } 4498 return true; 4499 case MatchOperand_NoMatch: 4500 // FIXME: use real operand location rather than the current location. 4501 Error(getLexer().getLoc(), "not a valid operand."); 4502 while (!getLexer().is(AsmToken::EndOfStatement)) { 4503 Parser.Lex(); 4504 } 4505 return true; 4506 } 4507 } 4508 4509 return false; 4510 } 4511 4512 //===----------------------------------------------------------------------===// 4513 // Utility functions 4514 //===----------------------------------------------------------------------===// 4515 4516 OperandMatchResultTy 4517 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4518 4519 if (!trySkipId(Prefix, AsmToken::Colon)) 4520 return MatchOperand_NoMatch; 4521 4522 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4523 } 4524 4525 OperandMatchResultTy 4526 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4527 AMDGPUOperand::ImmTy ImmTy, 4528 bool (*ConvertResult)(int64_t&)) { 4529 SMLoc S = getLoc(); 4530 int64_t Value = 0; 4531 4532 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4533 if (Res != MatchOperand_Success) 4534 return Res; 4535 4536 if (ConvertResult && !ConvertResult(Value)) { 4537 Error(S, "invalid " + StringRef(Prefix) + " value."); 4538 } 4539 4540 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4541 return MatchOperand_Success; 4542 } 4543 4544 OperandMatchResultTy 4545 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4546 OperandVector &Operands, 4547 AMDGPUOperand::ImmTy ImmTy, 4548 bool (*ConvertResult)(int64_t&)) { 4549 SMLoc S = getLoc(); 4550 if (!trySkipId(Prefix, AsmToken::Colon)) 4551 return MatchOperand_NoMatch; 4552 4553 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4554 return MatchOperand_ParseFail; 4555 4556 unsigned Val = 0; 4557 const unsigned MaxSize = 4; 4558 4559 // FIXME: How to verify the number of elements matches the number of src 4560 // operands? 4561 for (int I = 0; ; ++I) { 4562 int64_t Op; 4563 SMLoc Loc = getLoc(); 4564 if (!parseExpr(Op)) 4565 return MatchOperand_ParseFail; 4566 4567 if (Op != 0 && Op != 1) { 4568 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4569 return MatchOperand_ParseFail; 4570 } 4571 4572 Val |= (Op << I); 4573 4574 if (trySkipToken(AsmToken::RBrac)) 4575 break; 4576 4577 if (I + 1 == MaxSize) { 4578 Error(getLoc(), "expected a closing square bracket"); 4579 return MatchOperand_ParseFail; 4580 } 4581 4582 if (!skipToken(AsmToken::Comma, "expected a comma")) 4583 return MatchOperand_ParseFail; 4584 } 4585 4586 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4587 return MatchOperand_Success; 4588 } 4589 4590 OperandMatchResultTy 4591 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4592 AMDGPUOperand::ImmTy ImmTy) { 4593 int64_t Bit = 0; 4594 SMLoc S = Parser.getTok().getLoc(); 4595 4596 // We are at the end of the statement, and this is a default argument, so 4597 // use a default value. 4598 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4599 switch(getLexer().getKind()) { 4600 case AsmToken::Identifier: { 4601 StringRef Tok = Parser.getTok().getString(); 4602 if (Tok == Name) { 4603 if (Tok == "r128" && isGFX9()) 4604 Error(S, "r128 modifier is not supported on this GPU"); 4605 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4606 Error(S, "a16 modifier is not supported on this GPU"); 4607 Bit = 1; 4608 Parser.Lex(); 4609 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4610 Bit = 0; 4611 Parser.Lex(); 4612 } else { 4613 return MatchOperand_NoMatch; 4614 } 4615 break; 4616 } 4617 default: 4618 return MatchOperand_NoMatch; 4619 } 4620 } 4621 4622 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4623 return MatchOperand_ParseFail; 4624 4625 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4626 return MatchOperand_Success; 4627 } 4628 4629 static void addOptionalImmOperand( 4630 MCInst& Inst, const OperandVector& Operands, 4631 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4632 AMDGPUOperand::ImmTy ImmT, 4633 int64_t Default = 0) { 4634 auto i = OptionalIdx.find(ImmT); 4635 if (i != OptionalIdx.end()) { 4636 unsigned Idx = i->second; 4637 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4638 } else { 4639 Inst.addOperand(MCOperand::createImm(Default)); 4640 } 4641 } 4642 4643 OperandMatchResultTy 4644 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4645 if (getLexer().isNot(AsmToken::Identifier)) { 4646 return MatchOperand_NoMatch; 4647 } 4648 StringRef Tok = Parser.getTok().getString(); 4649 if (Tok != Prefix) { 4650 return MatchOperand_NoMatch; 4651 } 4652 4653 Parser.Lex(); 4654 if (getLexer().isNot(AsmToken::Colon)) { 4655 return MatchOperand_ParseFail; 4656 } 4657 4658 Parser.Lex(); 4659 if (getLexer().isNot(AsmToken::Identifier)) { 4660 return MatchOperand_ParseFail; 4661 } 4662 4663 Value = Parser.getTok().getString(); 4664 return MatchOperand_Success; 4665 } 4666 4667 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4668 // values to live in a joint format operand in the MCInst encoding. 4669 OperandMatchResultTy 4670 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4671 SMLoc S = Parser.getTok().getLoc(); 4672 int64_t Dfmt = 0, Nfmt = 0; 4673 // dfmt and nfmt can appear in either order, and each is optional. 4674 bool GotDfmt = false, GotNfmt = false; 4675 while (!GotDfmt || !GotNfmt) { 4676 if (!GotDfmt) { 4677 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4678 if (Res != MatchOperand_NoMatch) { 4679 if (Res != MatchOperand_Success) 4680 return Res; 4681 if (Dfmt >= 16) { 4682 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4683 return MatchOperand_ParseFail; 4684 } 4685 GotDfmt = true; 4686 Parser.Lex(); 4687 continue; 4688 } 4689 } 4690 if (!GotNfmt) { 4691 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4692 if (Res != MatchOperand_NoMatch) { 4693 if (Res != MatchOperand_Success) 4694 return Res; 4695 if (Nfmt >= 8) { 4696 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4697 return MatchOperand_ParseFail; 4698 } 4699 GotNfmt = true; 4700 Parser.Lex(); 4701 continue; 4702 } 4703 } 4704 break; 4705 } 4706 if (!GotDfmt && !GotNfmt) 4707 return MatchOperand_NoMatch; 4708 auto Format = Dfmt | Nfmt << 4; 4709 Operands.push_back( 4710 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4711 return MatchOperand_Success; 4712 } 4713 4714 //===----------------------------------------------------------------------===// 4715 // ds 4716 //===----------------------------------------------------------------------===// 4717 4718 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4719 const OperandVector &Operands) { 4720 OptionalImmIndexMap OptionalIdx; 4721 4722 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4723 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4724 4725 // Add the register arguments 4726 if (Op.isReg()) { 4727 Op.addRegOperands(Inst, 1); 4728 continue; 4729 } 4730 4731 // Handle optional arguments 4732 OptionalIdx[Op.getImmTy()] = i; 4733 } 4734 4735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4736 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4737 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4738 4739 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4740 } 4741 4742 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4743 bool IsGdsHardcoded) { 4744 OptionalImmIndexMap OptionalIdx; 4745 4746 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4747 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4748 4749 // Add the register arguments 4750 if (Op.isReg()) { 4751 Op.addRegOperands(Inst, 1); 4752 continue; 4753 } 4754 4755 if (Op.isToken() && Op.getToken() == "gds") { 4756 IsGdsHardcoded = true; 4757 continue; 4758 } 4759 4760 // Handle optional arguments 4761 OptionalIdx[Op.getImmTy()] = i; 4762 } 4763 4764 AMDGPUOperand::ImmTy OffsetType = 4765 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4766 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4767 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4768 AMDGPUOperand::ImmTyOffset; 4769 4770 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4771 4772 if (!IsGdsHardcoded) { 4773 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4774 } 4775 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4776 } 4777 4778 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4779 OptionalImmIndexMap OptionalIdx; 4780 4781 unsigned OperandIdx[4]; 4782 unsigned EnMask = 0; 4783 int SrcIdx = 0; 4784 4785 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4786 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4787 4788 // Add the register arguments 4789 if (Op.isReg()) { 4790 assert(SrcIdx < 4); 4791 OperandIdx[SrcIdx] = Inst.size(); 4792 Op.addRegOperands(Inst, 1); 4793 ++SrcIdx; 4794 continue; 4795 } 4796 4797 if (Op.isOff()) { 4798 assert(SrcIdx < 4); 4799 OperandIdx[SrcIdx] = Inst.size(); 4800 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4801 ++SrcIdx; 4802 continue; 4803 } 4804 4805 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4806 Op.addImmOperands(Inst, 1); 4807 continue; 4808 } 4809 4810 if (Op.isToken() && Op.getToken() == "done") 4811 continue; 4812 4813 // Handle optional arguments 4814 OptionalIdx[Op.getImmTy()] = i; 4815 } 4816 4817 assert(SrcIdx == 4); 4818 4819 bool Compr = false; 4820 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4821 Compr = true; 4822 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4823 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4824 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4825 } 4826 4827 for (auto i = 0; i < SrcIdx; ++i) { 4828 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4829 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4830 } 4831 } 4832 4833 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4834 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4835 4836 Inst.addOperand(MCOperand::createImm(EnMask)); 4837 } 4838 4839 //===----------------------------------------------------------------------===// 4840 // s_waitcnt 4841 //===----------------------------------------------------------------------===// 4842 4843 static bool 4844 encodeCnt( 4845 const AMDGPU::IsaVersion ISA, 4846 int64_t &IntVal, 4847 int64_t CntVal, 4848 bool Saturate, 4849 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4850 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4851 { 4852 bool Failed = false; 4853 4854 IntVal = encode(ISA, IntVal, CntVal); 4855 if (CntVal != decode(ISA, IntVal)) { 4856 if (Saturate) { 4857 IntVal = encode(ISA, IntVal, -1); 4858 } else { 4859 Failed = true; 4860 } 4861 } 4862 return Failed; 4863 } 4864 4865 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4866 4867 SMLoc CntLoc = getLoc(); 4868 StringRef CntName = getTokenStr(); 4869 4870 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4871 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4872 return false; 4873 4874 int64_t CntVal; 4875 SMLoc ValLoc = getLoc(); 4876 if (!parseExpr(CntVal)) 4877 return false; 4878 4879 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4880 4881 bool Failed = true; 4882 bool Sat = CntName.endswith("_sat"); 4883 4884 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4885 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4886 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4887 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4888 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4889 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4890 } else { 4891 Error(CntLoc, "invalid counter name " + CntName); 4892 return false; 4893 } 4894 4895 if (Failed) { 4896 Error(ValLoc, "too large value for " + CntName); 4897 return false; 4898 } 4899 4900 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4901 return false; 4902 4903 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4904 if (isToken(AsmToken::EndOfStatement)) { 4905 Error(getLoc(), "expected a counter name"); 4906 return false; 4907 } 4908 } 4909 4910 return true; 4911 } 4912 4913 OperandMatchResultTy 4914 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4915 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4916 int64_t Waitcnt = getWaitcntBitMask(ISA); 4917 SMLoc S = getLoc(); 4918 4919 // If parse failed, do not return error code 4920 // to avoid excessive error messages. 4921 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4922 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4923 } else { 4924 parseExpr(Waitcnt); 4925 } 4926 4927 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4928 return MatchOperand_Success; 4929 } 4930 4931 bool 4932 AMDGPUOperand::isSWaitCnt() const { 4933 return isImm(); 4934 } 4935 4936 //===----------------------------------------------------------------------===// 4937 // hwreg 4938 //===----------------------------------------------------------------------===// 4939 4940 bool 4941 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4942 int64_t &Offset, 4943 int64_t &Width) { 4944 using namespace llvm::AMDGPU::Hwreg; 4945 4946 // The register may be specified by name or using a numeric code 4947 if (isToken(AsmToken::Identifier) && 4948 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4949 HwReg.IsSymbolic = true; 4950 lex(); // skip message name 4951 } else if (!parseExpr(HwReg.Id)) { 4952 return false; 4953 } 4954 4955 if (trySkipToken(AsmToken::RParen)) 4956 return true; 4957 4958 // parse optional params 4959 return 4960 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4961 parseExpr(Offset) && 4962 skipToken(AsmToken::Comma, "expected a comma") && 4963 parseExpr(Width) && 4964 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4965 } 4966 4967 bool 4968 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4969 const int64_t Offset, 4970 const int64_t Width, 4971 const SMLoc Loc) { 4972 4973 using namespace llvm::AMDGPU::Hwreg; 4974 4975 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4976 Error(Loc, "specified hardware register is not supported on this GPU"); 4977 return false; 4978 } else if (!isValidHwreg(HwReg.Id)) { 4979 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4980 return false; 4981 } else if (!isValidHwregOffset(Offset)) { 4982 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4983 return false; 4984 } else if (!isValidHwregWidth(Width)) { 4985 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4986 return false; 4987 } 4988 return true; 4989 } 4990 4991 OperandMatchResultTy 4992 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 4993 using namespace llvm::AMDGPU::Hwreg; 4994 4995 int64_t ImmVal = 0; 4996 SMLoc Loc = getLoc(); 4997 4998 // If parse failed, do not return error code 4999 // to avoid excessive error messages. 5000 if (trySkipId("hwreg", AsmToken::LParen)) { 5001 OperandInfoTy HwReg(ID_UNKNOWN_); 5002 int64_t Offset = OFFSET_DEFAULT_; 5003 int64_t Width = WIDTH_DEFAULT_; 5004 if (parseHwregBody(HwReg, Offset, Width) && 5005 validateHwreg(HwReg, Offset, Width, Loc)) { 5006 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5007 } 5008 } else if (parseExpr(ImmVal)) { 5009 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5010 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5011 } 5012 5013 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5014 return MatchOperand_Success; 5015 } 5016 5017 bool AMDGPUOperand::isHwreg() const { 5018 return isImmTy(ImmTyHwreg); 5019 } 5020 5021 //===----------------------------------------------------------------------===// 5022 // sendmsg 5023 //===----------------------------------------------------------------------===// 5024 5025 bool 5026 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5027 OperandInfoTy &Op, 5028 OperandInfoTy &Stream) { 5029 using namespace llvm::AMDGPU::SendMsg; 5030 5031 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5032 Msg.IsSymbolic = true; 5033 lex(); // skip message name 5034 } else if (!parseExpr(Msg.Id)) { 5035 return false; 5036 } 5037 5038 if (trySkipToken(AsmToken::Comma)) { 5039 Op.IsDefined = true; 5040 if (isToken(AsmToken::Identifier) && 5041 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5042 lex(); // skip operation name 5043 } else if (!parseExpr(Op.Id)) { 5044 return false; 5045 } 5046 5047 if (trySkipToken(AsmToken::Comma)) { 5048 Stream.IsDefined = true; 5049 if (!parseExpr(Stream.Id)) 5050 return false; 5051 } 5052 } 5053 5054 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5055 } 5056 5057 bool 5058 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5059 const OperandInfoTy &Op, 5060 const OperandInfoTy &Stream, 5061 const SMLoc S) { 5062 using namespace llvm::AMDGPU::SendMsg; 5063 5064 // Validation strictness depends on whether message is specified 5065 // in a symbolc or in a numeric form. In the latter case 5066 // only encoding possibility is checked. 5067 bool Strict = Msg.IsSymbolic; 5068 5069 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5070 Error(S, "invalid message id"); 5071 return false; 5072 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5073 Error(S, Op.IsDefined ? 5074 "message does not support operations" : 5075 "missing message operation"); 5076 return false; 5077 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5078 Error(S, "invalid operation id"); 5079 return false; 5080 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5081 Error(S, "message operation does not support streams"); 5082 return false; 5083 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5084 Error(S, "invalid message stream id"); 5085 return false; 5086 } 5087 return true; 5088 } 5089 5090 OperandMatchResultTy 5091 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5092 using namespace llvm::AMDGPU::SendMsg; 5093 5094 int64_t ImmVal = 0; 5095 SMLoc Loc = getLoc(); 5096 5097 // If parse failed, do not return error code 5098 // to avoid excessive error messages. 5099 if (trySkipId("sendmsg", AsmToken::LParen)) { 5100 OperandInfoTy Msg(ID_UNKNOWN_); 5101 OperandInfoTy Op(OP_NONE_); 5102 OperandInfoTy Stream(STREAM_ID_NONE_); 5103 if (parseSendMsgBody(Msg, Op, Stream) && 5104 validateSendMsg(Msg, Op, Stream, Loc)) { 5105 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5106 } 5107 } else if (parseExpr(ImmVal)) { 5108 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5109 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5110 } 5111 5112 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5113 return MatchOperand_Success; 5114 } 5115 5116 bool AMDGPUOperand::isSendMsg() const { 5117 return isImmTy(ImmTySendMsg); 5118 } 5119 5120 //===----------------------------------------------------------------------===// 5121 // v_interp 5122 //===----------------------------------------------------------------------===// 5123 5124 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5125 if (getLexer().getKind() != AsmToken::Identifier) 5126 return MatchOperand_NoMatch; 5127 5128 StringRef Str = Parser.getTok().getString(); 5129 int Slot = StringSwitch<int>(Str) 5130 .Case("p10", 0) 5131 .Case("p20", 1) 5132 .Case("p0", 2) 5133 .Default(-1); 5134 5135 SMLoc S = Parser.getTok().getLoc(); 5136 if (Slot == -1) 5137 return MatchOperand_ParseFail; 5138 5139 Parser.Lex(); 5140 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5141 AMDGPUOperand::ImmTyInterpSlot)); 5142 return MatchOperand_Success; 5143 } 5144 5145 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5146 if (getLexer().getKind() != AsmToken::Identifier) 5147 return MatchOperand_NoMatch; 5148 5149 StringRef Str = Parser.getTok().getString(); 5150 if (!Str.startswith("attr")) 5151 return MatchOperand_NoMatch; 5152 5153 StringRef Chan = Str.take_back(2); 5154 int AttrChan = StringSwitch<int>(Chan) 5155 .Case(".x", 0) 5156 .Case(".y", 1) 5157 .Case(".z", 2) 5158 .Case(".w", 3) 5159 .Default(-1); 5160 if (AttrChan == -1) 5161 return MatchOperand_ParseFail; 5162 5163 Str = Str.drop_back(2).drop_front(4); 5164 5165 uint8_t Attr; 5166 if (Str.getAsInteger(10, Attr)) 5167 return MatchOperand_ParseFail; 5168 5169 SMLoc S = Parser.getTok().getLoc(); 5170 Parser.Lex(); 5171 if (Attr > 63) { 5172 Error(S, "out of bounds attr"); 5173 return MatchOperand_Success; 5174 } 5175 5176 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5177 5178 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5179 AMDGPUOperand::ImmTyInterpAttr)); 5180 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5181 AMDGPUOperand::ImmTyAttrChan)); 5182 return MatchOperand_Success; 5183 } 5184 5185 //===----------------------------------------------------------------------===// 5186 // exp 5187 //===----------------------------------------------------------------------===// 5188 5189 void AMDGPUAsmParser::errorExpTgt() { 5190 Error(Parser.getTok().getLoc(), "invalid exp target"); 5191 } 5192 5193 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5194 uint8_t &Val) { 5195 if (Str == "null") { 5196 Val = 9; 5197 return MatchOperand_Success; 5198 } 5199 5200 if (Str.startswith("mrt")) { 5201 Str = Str.drop_front(3); 5202 if (Str == "z") { // == mrtz 5203 Val = 8; 5204 return MatchOperand_Success; 5205 } 5206 5207 if (Str.getAsInteger(10, Val)) 5208 return MatchOperand_ParseFail; 5209 5210 if (Val > 7) 5211 errorExpTgt(); 5212 5213 return MatchOperand_Success; 5214 } 5215 5216 if (Str.startswith("pos")) { 5217 Str = Str.drop_front(3); 5218 if (Str.getAsInteger(10, Val)) 5219 return MatchOperand_ParseFail; 5220 5221 if (Val > 4 || (Val == 4 && !isGFX10())) 5222 errorExpTgt(); 5223 5224 Val += 12; 5225 return MatchOperand_Success; 5226 } 5227 5228 if (isGFX10() && Str == "prim") { 5229 Val = 20; 5230 return MatchOperand_Success; 5231 } 5232 5233 if (Str.startswith("param")) { 5234 Str = Str.drop_front(5); 5235 if (Str.getAsInteger(10, Val)) 5236 return MatchOperand_ParseFail; 5237 5238 if (Val >= 32) 5239 errorExpTgt(); 5240 5241 Val += 32; 5242 return MatchOperand_Success; 5243 } 5244 5245 if (Str.startswith("invalid_target_")) { 5246 Str = Str.drop_front(15); 5247 if (Str.getAsInteger(10, Val)) 5248 return MatchOperand_ParseFail; 5249 5250 errorExpTgt(); 5251 return MatchOperand_Success; 5252 } 5253 5254 return MatchOperand_NoMatch; 5255 } 5256 5257 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5258 uint8_t Val; 5259 StringRef Str = Parser.getTok().getString(); 5260 5261 auto Res = parseExpTgtImpl(Str, Val); 5262 if (Res != MatchOperand_Success) 5263 return Res; 5264 5265 SMLoc S = Parser.getTok().getLoc(); 5266 Parser.Lex(); 5267 5268 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5269 AMDGPUOperand::ImmTyExpTgt)); 5270 return MatchOperand_Success; 5271 } 5272 5273 //===----------------------------------------------------------------------===// 5274 // parser helpers 5275 //===----------------------------------------------------------------------===// 5276 5277 bool 5278 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5279 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5280 } 5281 5282 bool 5283 AMDGPUAsmParser::isId(const StringRef Id) const { 5284 return isId(getToken(), Id); 5285 } 5286 5287 bool 5288 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5289 return getTokenKind() == Kind; 5290 } 5291 5292 bool 5293 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5294 if (isId(Id)) { 5295 lex(); 5296 return true; 5297 } 5298 return false; 5299 } 5300 5301 bool 5302 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5303 if (isId(Id) && peekToken().is(Kind)) { 5304 lex(); 5305 lex(); 5306 return true; 5307 } 5308 return false; 5309 } 5310 5311 bool 5312 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5313 if (isToken(Kind)) { 5314 lex(); 5315 return true; 5316 } 5317 return false; 5318 } 5319 5320 bool 5321 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5322 const StringRef ErrMsg) { 5323 if (!trySkipToken(Kind)) { 5324 Error(getLoc(), ErrMsg); 5325 return false; 5326 } 5327 return true; 5328 } 5329 5330 bool 5331 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5332 return !getParser().parseAbsoluteExpression(Imm); 5333 } 5334 5335 bool 5336 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5337 SMLoc S = getLoc(); 5338 5339 const MCExpr *Expr; 5340 if (Parser.parseExpression(Expr)) 5341 return false; 5342 5343 int64_t IntVal; 5344 if (Expr->evaluateAsAbsolute(IntVal)) { 5345 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5346 } else { 5347 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5348 } 5349 return true; 5350 } 5351 5352 bool 5353 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5354 if (isToken(AsmToken::String)) { 5355 Val = getToken().getStringContents(); 5356 lex(); 5357 return true; 5358 } else { 5359 Error(getLoc(), ErrMsg); 5360 return false; 5361 } 5362 } 5363 5364 AsmToken 5365 AMDGPUAsmParser::getToken() const { 5366 return Parser.getTok(); 5367 } 5368 5369 AsmToken 5370 AMDGPUAsmParser::peekToken() { 5371 return getLexer().peekTok(); 5372 } 5373 5374 void 5375 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5376 auto TokCount = getLexer().peekTokens(Tokens); 5377 5378 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5379 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5380 } 5381 5382 AsmToken::TokenKind 5383 AMDGPUAsmParser::getTokenKind() const { 5384 return getLexer().getKind(); 5385 } 5386 5387 SMLoc 5388 AMDGPUAsmParser::getLoc() const { 5389 return getToken().getLoc(); 5390 } 5391 5392 StringRef 5393 AMDGPUAsmParser::getTokenStr() const { 5394 return getToken().getString(); 5395 } 5396 5397 void 5398 AMDGPUAsmParser::lex() { 5399 Parser.Lex(); 5400 } 5401 5402 //===----------------------------------------------------------------------===// 5403 // swizzle 5404 //===----------------------------------------------------------------------===// 5405 5406 LLVM_READNONE 5407 static unsigned 5408 encodeBitmaskPerm(const unsigned AndMask, 5409 const unsigned OrMask, 5410 const unsigned XorMask) { 5411 using namespace llvm::AMDGPU::Swizzle; 5412 5413 return BITMASK_PERM_ENC | 5414 (AndMask << BITMASK_AND_SHIFT) | 5415 (OrMask << BITMASK_OR_SHIFT) | 5416 (XorMask << BITMASK_XOR_SHIFT); 5417 } 5418 5419 bool 5420 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5421 const unsigned MinVal, 5422 const unsigned MaxVal, 5423 const StringRef ErrMsg) { 5424 for (unsigned i = 0; i < OpNum; ++i) { 5425 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5426 return false; 5427 } 5428 SMLoc ExprLoc = Parser.getTok().getLoc(); 5429 if (!parseExpr(Op[i])) { 5430 return false; 5431 } 5432 if (Op[i] < MinVal || Op[i] > MaxVal) { 5433 Error(ExprLoc, ErrMsg); 5434 return false; 5435 } 5436 } 5437 5438 return true; 5439 } 5440 5441 bool 5442 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5443 using namespace llvm::AMDGPU::Swizzle; 5444 5445 int64_t Lane[LANE_NUM]; 5446 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5447 "expected a 2-bit lane id")) { 5448 Imm = QUAD_PERM_ENC; 5449 for (unsigned I = 0; I < LANE_NUM; ++I) { 5450 Imm |= Lane[I] << (LANE_SHIFT * I); 5451 } 5452 return true; 5453 } 5454 return false; 5455 } 5456 5457 bool 5458 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5459 using namespace llvm::AMDGPU::Swizzle; 5460 5461 SMLoc S = Parser.getTok().getLoc(); 5462 int64_t GroupSize; 5463 int64_t LaneIdx; 5464 5465 if (!parseSwizzleOperands(1, &GroupSize, 5466 2, 32, 5467 "group size must be in the interval [2,32]")) { 5468 return false; 5469 } 5470 if (!isPowerOf2_64(GroupSize)) { 5471 Error(S, "group size must be a power of two"); 5472 return false; 5473 } 5474 if (parseSwizzleOperands(1, &LaneIdx, 5475 0, GroupSize - 1, 5476 "lane id must be in the interval [0,group size - 1]")) { 5477 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5478 return true; 5479 } 5480 return false; 5481 } 5482 5483 bool 5484 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5485 using namespace llvm::AMDGPU::Swizzle; 5486 5487 SMLoc S = Parser.getTok().getLoc(); 5488 int64_t GroupSize; 5489 5490 if (!parseSwizzleOperands(1, &GroupSize, 5491 2, 32, "group size must be in the interval [2,32]")) { 5492 return false; 5493 } 5494 if (!isPowerOf2_64(GroupSize)) { 5495 Error(S, "group size must be a power of two"); 5496 return false; 5497 } 5498 5499 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5500 return true; 5501 } 5502 5503 bool 5504 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5505 using namespace llvm::AMDGPU::Swizzle; 5506 5507 SMLoc S = Parser.getTok().getLoc(); 5508 int64_t GroupSize; 5509 5510 if (!parseSwizzleOperands(1, &GroupSize, 5511 1, 16, "group size must be in the interval [1,16]")) { 5512 return false; 5513 } 5514 if (!isPowerOf2_64(GroupSize)) { 5515 Error(S, "group size must be a power of two"); 5516 return false; 5517 } 5518 5519 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5520 return true; 5521 } 5522 5523 bool 5524 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5525 using namespace llvm::AMDGPU::Swizzle; 5526 5527 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5528 return false; 5529 } 5530 5531 StringRef Ctl; 5532 SMLoc StrLoc = Parser.getTok().getLoc(); 5533 if (!parseString(Ctl)) { 5534 return false; 5535 } 5536 if (Ctl.size() != BITMASK_WIDTH) { 5537 Error(StrLoc, "expected a 5-character mask"); 5538 return false; 5539 } 5540 5541 unsigned AndMask = 0; 5542 unsigned OrMask = 0; 5543 unsigned XorMask = 0; 5544 5545 for (size_t i = 0; i < Ctl.size(); ++i) { 5546 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5547 switch(Ctl[i]) { 5548 default: 5549 Error(StrLoc, "invalid mask"); 5550 return false; 5551 case '0': 5552 break; 5553 case '1': 5554 OrMask |= Mask; 5555 break; 5556 case 'p': 5557 AndMask |= Mask; 5558 break; 5559 case 'i': 5560 AndMask |= Mask; 5561 XorMask |= Mask; 5562 break; 5563 } 5564 } 5565 5566 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5567 return true; 5568 } 5569 5570 bool 5571 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5572 5573 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5574 5575 if (!parseExpr(Imm)) { 5576 return false; 5577 } 5578 if (!isUInt<16>(Imm)) { 5579 Error(OffsetLoc, "expected a 16-bit offset"); 5580 return false; 5581 } 5582 return true; 5583 } 5584 5585 bool 5586 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5587 using namespace llvm::AMDGPU::Swizzle; 5588 5589 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5590 5591 SMLoc ModeLoc = Parser.getTok().getLoc(); 5592 bool Ok = false; 5593 5594 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5595 Ok = parseSwizzleQuadPerm(Imm); 5596 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5597 Ok = parseSwizzleBitmaskPerm(Imm); 5598 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5599 Ok = parseSwizzleBroadcast(Imm); 5600 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5601 Ok = parseSwizzleSwap(Imm); 5602 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5603 Ok = parseSwizzleReverse(Imm); 5604 } else { 5605 Error(ModeLoc, "expected a swizzle mode"); 5606 } 5607 5608 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5609 } 5610 5611 return false; 5612 } 5613 5614 OperandMatchResultTy 5615 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5616 SMLoc S = Parser.getTok().getLoc(); 5617 int64_t Imm = 0; 5618 5619 if (trySkipId("offset")) { 5620 5621 bool Ok = false; 5622 if (skipToken(AsmToken::Colon, "expected a colon")) { 5623 if (trySkipId("swizzle")) { 5624 Ok = parseSwizzleMacro(Imm); 5625 } else { 5626 Ok = parseSwizzleOffset(Imm); 5627 } 5628 } 5629 5630 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5631 5632 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5633 } else { 5634 // Swizzle "offset" operand is optional. 5635 // If it is omitted, try parsing other optional operands. 5636 return parseOptionalOpr(Operands); 5637 } 5638 } 5639 5640 bool 5641 AMDGPUOperand::isSwizzle() const { 5642 return isImmTy(ImmTySwizzle); 5643 } 5644 5645 //===----------------------------------------------------------------------===// 5646 // VGPR Index Mode 5647 //===----------------------------------------------------------------------===// 5648 5649 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5650 5651 using namespace llvm::AMDGPU::VGPRIndexMode; 5652 5653 if (trySkipToken(AsmToken::RParen)) { 5654 return OFF; 5655 } 5656 5657 int64_t Imm = 0; 5658 5659 while (true) { 5660 unsigned Mode = 0; 5661 SMLoc S = Parser.getTok().getLoc(); 5662 5663 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5664 if (trySkipId(IdSymbolic[ModeId])) { 5665 Mode = 1 << ModeId; 5666 break; 5667 } 5668 } 5669 5670 if (Mode == 0) { 5671 Error(S, (Imm == 0)? 5672 "expected a VGPR index mode or a closing parenthesis" : 5673 "expected a VGPR index mode"); 5674 break; 5675 } 5676 5677 if (Imm & Mode) { 5678 Error(S, "duplicate VGPR index mode"); 5679 break; 5680 } 5681 Imm |= Mode; 5682 5683 if (trySkipToken(AsmToken::RParen)) 5684 break; 5685 if (!skipToken(AsmToken::Comma, 5686 "expected a comma or a closing parenthesis")) 5687 break; 5688 } 5689 5690 return Imm; 5691 } 5692 5693 OperandMatchResultTy 5694 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5695 5696 int64_t Imm = 0; 5697 SMLoc S = Parser.getTok().getLoc(); 5698 5699 if (getLexer().getKind() == AsmToken::Identifier && 5700 Parser.getTok().getString() == "gpr_idx" && 5701 getLexer().peekTok().is(AsmToken::LParen)) { 5702 5703 Parser.Lex(); 5704 Parser.Lex(); 5705 5706 // If parse failed, trigger an error but do not return error code 5707 // to avoid excessive error messages. 5708 Imm = parseGPRIdxMacro(); 5709 5710 } else { 5711 if (getParser().parseAbsoluteExpression(Imm)) 5712 return MatchOperand_NoMatch; 5713 if (Imm < 0 || !isUInt<4>(Imm)) { 5714 Error(S, "invalid immediate: only 4-bit values are legal"); 5715 } 5716 } 5717 5718 Operands.push_back( 5719 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5720 return MatchOperand_Success; 5721 } 5722 5723 bool AMDGPUOperand::isGPRIdxMode() const { 5724 return isImmTy(ImmTyGprIdxMode); 5725 } 5726 5727 //===----------------------------------------------------------------------===// 5728 // sopp branch targets 5729 //===----------------------------------------------------------------------===// 5730 5731 OperandMatchResultTy 5732 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5733 5734 // Make sure we are not parsing something 5735 // that looks like a label or an expression but is not. 5736 // This will improve error messages. 5737 if (isRegister() || isModifier()) 5738 return MatchOperand_NoMatch; 5739 5740 if (parseExpr(Operands)) { 5741 5742 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5743 assert(Opr.isImm() || Opr.isExpr()); 5744 SMLoc Loc = Opr.getStartLoc(); 5745 5746 // Currently we do not support arbitrary expressions as branch targets. 5747 // Only labels and absolute expressions are accepted. 5748 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5749 Error(Loc, "expected an absolute expression or a label"); 5750 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5751 Error(Loc, "expected a 16-bit signed jump offset"); 5752 } 5753 } 5754 5755 return MatchOperand_Success; // avoid excessive error messages 5756 } 5757 5758 //===----------------------------------------------------------------------===// 5759 // Boolean holding registers 5760 //===----------------------------------------------------------------------===// 5761 5762 OperandMatchResultTy 5763 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5764 return parseReg(Operands); 5765 } 5766 5767 //===----------------------------------------------------------------------===// 5768 // mubuf 5769 //===----------------------------------------------------------------------===// 5770 5771 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5772 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5773 } 5774 5775 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5776 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5777 } 5778 5779 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5780 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5781 } 5782 5783 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5784 const OperandVector &Operands, 5785 bool IsAtomic, 5786 bool IsAtomicReturn, 5787 bool IsLds) { 5788 bool IsLdsOpcode = IsLds; 5789 bool HasLdsModifier = false; 5790 OptionalImmIndexMap OptionalIdx; 5791 assert(IsAtomicReturn ? IsAtomic : true); 5792 unsigned FirstOperandIdx = 1; 5793 5794 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5795 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5796 5797 // Add the register arguments 5798 if (Op.isReg()) { 5799 Op.addRegOperands(Inst, 1); 5800 // Insert a tied src for atomic return dst. 5801 // This cannot be postponed as subsequent calls to 5802 // addImmOperands rely on correct number of MC operands. 5803 if (IsAtomicReturn && i == FirstOperandIdx) 5804 Op.addRegOperands(Inst, 1); 5805 continue; 5806 } 5807 5808 // Handle the case where soffset is an immediate 5809 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5810 Op.addImmOperands(Inst, 1); 5811 continue; 5812 } 5813 5814 HasLdsModifier |= Op.isLDS(); 5815 5816 // Handle tokens like 'offen' which are sometimes hard-coded into the 5817 // asm string. There are no MCInst operands for these. 5818 if (Op.isToken()) { 5819 continue; 5820 } 5821 assert(Op.isImm()); 5822 5823 // Handle optional arguments 5824 OptionalIdx[Op.getImmTy()] = i; 5825 } 5826 5827 // This is a workaround for an llvm quirk which may result in an 5828 // incorrect instruction selection. Lds and non-lds versions of 5829 // MUBUF instructions are identical except that lds versions 5830 // have mandatory 'lds' modifier. However this modifier follows 5831 // optional modifiers and llvm asm matcher regards this 'lds' 5832 // modifier as an optional one. As a result, an lds version 5833 // of opcode may be selected even if it has no 'lds' modifier. 5834 if (IsLdsOpcode && !HasLdsModifier) { 5835 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5836 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5837 Inst.setOpcode(NoLdsOpcode); 5838 IsLdsOpcode = false; 5839 } 5840 } 5841 5842 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5843 if (!IsAtomic) { // glc is hard-coded. 5844 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5845 } 5846 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5847 5848 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5849 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5850 } 5851 5852 if (isGFX10()) 5853 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5854 } 5855 5856 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5857 OptionalImmIndexMap OptionalIdx; 5858 5859 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5860 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5861 5862 // Add the register arguments 5863 if (Op.isReg()) { 5864 Op.addRegOperands(Inst, 1); 5865 continue; 5866 } 5867 5868 // Handle the case where soffset is an immediate 5869 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5870 Op.addImmOperands(Inst, 1); 5871 continue; 5872 } 5873 5874 // Handle tokens like 'offen' which are sometimes hard-coded into the 5875 // asm string. There are no MCInst operands for these. 5876 if (Op.isToken()) { 5877 continue; 5878 } 5879 assert(Op.isImm()); 5880 5881 // Handle optional arguments 5882 OptionalIdx[Op.getImmTy()] = i; 5883 } 5884 5885 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5886 AMDGPUOperand::ImmTyOffset); 5887 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5889 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5890 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5891 5892 if (isGFX10()) 5893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5894 } 5895 5896 //===----------------------------------------------------------------------===// 5897 // mimg 5898 //===----------------------------------------------------------------------===// 5899 5900 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5901 bool IsAtomic) { 5902 unsigned I = 1; 5903 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5904 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5905 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5906 } 5907 5908 if (IsAtomic) { 5909 // Add src, same as dst 5910 assert(Desc.getNumDefs() == 1); 5911 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5912 } 5913 5914 OptionalImmIndexMap OptionalIdx; 5915 5916 for (unsigned E = Operands.size(); I != E; ++I) { 5917 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5918 5919 // Add the register arguments 5920 if (Op.isReg()) { 5921 Op.addRegOperands(Inst, 1); 5922 } else if (Op.isImmModifier()) { 5923 OptionalIdx[Op.getImmTy()] = I; 5924 } else if (!Op.isToken()) { 5925 llvm_unreachable("unexpected operand type"); 5926 } 5927 } 5928 5929 bool IsGFX10 = isGFX10(); 5930 5931 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5932 if (IsGFX10) 5933 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5934 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5935 if (IsGFX10) 5936 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5937 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5939 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5942 if (!IsGFX10) 5943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5945 } 5946 5947 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5948 cvtMIMG(Inst, Operands, true); 5949 } 5950 5951 //===----------------------------------------------------------------------===// 5952 // smrd 5953 //===----------------------------------------------------------------------===// 5954 5955 bool AMDGPUOperand::isSMRDOffset8() const { 5956 return isImm() && isUInt<8>(getImm()); 5957 } 5958 5959 bool AMDGPUOperand::isSMRDOffset20() const { 5960 return isImm() && isUInt<20>(getImm()); 5961 } 5962 5963 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5964 // 32-bit literals are only supported on CI and we only want to use them 5965 // when the offset is > 8-bits. 5966 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5967 } 5968 5969 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5970 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5971 } 5972 5973 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5974 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5975 } 5976 5977 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5978 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5979 } 5980 5981 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5982 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5983 } 5984 5985 //===----------------------------------------------------------------------===// 5986 // vop3 5987 //===----------------------------------------------------------------------===// 5988 5989 static bool ConvertOmodMul(int64_t &Mul) { 5990 if (Mul != 1 && Mul != 2 && Mul != 4) 5991 return false; 5992 5993 Mul >>= 1; 5994 return true; 5995 } 5996 5997 static bool ConvertOmodDiv(int64_t &Div) { 5998 if (Div == 1) { 5999 Div = 0; 6000 return true; 6001 } 6002 6003 if (Div == 2) { 6004 Div = 3; 6005 return true; 6006 } 6007 6008 return false; 6009 } 6010 6011 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6012 if (BoundCtrl == 0) { 6013 BoundCtrl = 1; 6014 return true; 6015 } 6016 6017 if (BoundCtrl == -1) { 6018 BoundCtrl = 0; 6019 return true; 6020 } 6021 6022 return false; 6023 } 6024 6025 // Note: the order in this table matches the order of operands in AsmString. 6026 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6027 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6028 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6029 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6030 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6031 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6032 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6033 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6034 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6035 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6036 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6037 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6038 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6039 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6040 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6041 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6042 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6043 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6044 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6045 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6046 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6047 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6048 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6049 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6050 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6051 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6052 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6053 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6054 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6055 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6056 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6057 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6058 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6059 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6060 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6061 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6062 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6063 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6064 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6065 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6066 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6067 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6068 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6069 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6070 }; 6071 6072 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6073 unsigned size = Operands.size(); 6074 assert(size > 0); 6075 6076 OperandMatchResultTy res = parseOptionalOpr(Operands); 6077 6078 // This is a hack to enable hardcoded mandatory operands which follow 6079 // optional operands. 6080 // 6081 // Current design assumes that all operands after the first optional operand 6082 // are also optional. However implementation of some instructions violates 6083 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6084 // 6085 // To alleviate this problem, we have to (implicitly) parse extra operands 6086 // to make sure autogenerated parser of custom operands never hit hardcoded 6087 // mandatory operands. 6088 6089 if (size == 1 || ((AMDGPUOperand &)*Operands[size - 1]).isRegKind()) { 6090 6091 // We have parsed the first optional operand. 6092 // Parse as many operands as necessary to skip all mandatory operands. 6093 6094 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6095 if (res != MatchOperand_Success || 6096 getLexer().is(AsmToken::EndOfStatement)) break; 6097 if (getLexer().is(AsmToken::Comma)) Parser.Lex(); 6098 res = parseOptionalOpr(Operands); 6099 } 6100 } 6101 6102 return res; 6103 } 6104 6105 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6106 OperandMatchResultTy res; 6107 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6108 // try to parse any optional operand here 6109 if (Op.IsBit) { 6110 res = parseNamedBit(Op.Name, Operands, Op.Type); 6111 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6112 res = parseOModOperand(Operands); 6113 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6114 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6115 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6116 res = parseSDWASel(Operands, Op.Name, Op.Type); 6117 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6118 res = parseSDWADstUnused(Operands); 6119 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6120 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6121 Op.Type == AMDGPUOperand::ImmTyNegLo || 6122 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6123 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6124 Op.ConvertResult); 6125 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6126 res = parseDim(Operands); 6127 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6128 res = parseDfmtNfmt(Operands); 6129 } else { 6130 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6131 } 6132 if (res != MatchOperand_NoMatch) { 6133 return res; 6134 } 6135 } 6136 return MatchOperand_NoMatch; 6137 } 6138 6139 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6140 StringRef Name = Parser.getTok().getString(); 6141 if (Name == "mul") { 6142 return parseIntWithPrefix("mul", Operands, 6143 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6144 } 6145 6146 if (Name == "div") { 6147 return parseIntWithPrefix("div", Operands, 6148 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6149 } 6150 6151 return MatchOperand_NoMatch; 6152 } 6153 6154 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6155 cvtVOP3P(Inst, Operands); 6156 6157 int Opc = Inst.getOpcode(); 6158 6159 int SrcNum; 6160 const int Ops[] = { AMDGPU::OpName::src0, 6161 AMDGPU::OpName::src1, 6162 AMDGPU::OpName::src2 }; 6163 for (SrcNum = 0; 6164 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6165 ++SrcNum); 6166 assert(SrcNum > 0); 6167 6168 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6169 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6170 6171 if ((OpSel & (1 << SrcNum)) != 0) { 6172 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6173 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6174 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6175 } 6176 } 6177 6178 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6179 // 1. This operand is input modifiers 6180 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6181 // 2. This is not last operand 6182 && Desc.NumOperands > (OpNum + 1) 6183 // 3. Next operand is register class 6184 && Desc.OpInfo[OpNum + 1].RegClass != -1 6185 // 4. Next register is not tied to any other operand 6186 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6187 } 6188 6189 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6190 { 6191 OptionalImmIndexMap OptionalIdx; 6192 unsigned Opc = Inst.getOpcode(); 6193 6194 unsigned I = 1; 6195 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6196 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6197 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6198 } 6199 6200 for (unsigned E = Operands.size(); I != E; ++I) { 6201 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6202 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6203 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6204 } else if (Op.isInterpSlot() || 6205 Op.isInterpAttr() || 6206 Op.isAttrChan()) { 6207 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6208 } else if (Op.isImmModifier()) { 6209 OptionalIdx[Op.getImmTy()] = I; 6210 } else { 6211 llvm_unreachable("unhandled operand type"); 6212 } 6213 } 6214 6215 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6216 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6217 } 6218 6219 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6220 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6221 } 6222 6223 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6224 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6225 } 6226 } 6227 6228 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6229 OptionalImmIndexMap &OptionalIdx) { 6230 unsigned Opc = Inst.getOpcode(); 6231 6232 unsigned I = 1; 6233 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6234 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6235 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6236 } 6237 6238 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6239 // This instruction has src modifiers 6240 for (unsigned E = Operands.size(); I != E; ++I) { 6241 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6242 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6243 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6244 } else if (Op.isImmModifier()) { 6245 OptionalIdx[Op.getImmTy()] = I; 6246 } else if (Op.isRegOrImm()) { 6247 Op.addRegOrImmOperands(Inst, 1); 6248 } else { 6249 llvm_unreachable("unhandled operand type"); 6250 } 6251 } 6252 } else { 6253 // No src modifiers 6254 for (unsigned E = Operands.size(); I != E; ++I) { 6255 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6256 if (Op.isMod()) { 6257 OptionalIdx[Op.getImmTy()] = I; 6258 } else { 6259 Op.addRegOrImmOperands(Inst, 1); 6260 } 6261 } 6262 } 6263 6264 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6265 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6266 } 6267 6268 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6269 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6270 } 6271 6272 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6273 // it has src2 register operand that is tied to dst operand 6274 // we don't allow modifiers for this operand in assembler so src2_modifiers 6275 // should be 0. 6276 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6277 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6278 Opc == AMDGPU::V_MAC_F32_e64_vi || 6279 Opc == AMDGPU::V_MAC_F16_e64_vi || 6280 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6281 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6282 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6283 auto it = Inst.begin(); 6284 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6285 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6286 ++it; 6287 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6288 } 6289 } 6290 6291 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6292 OptionalImmIndexMap OptionalIdx; 6293 cvtVOP3(Inst, Operands, OptionalIdx); 6294 } 6295 6296 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6297 const OperandVector &Operands) { 6298 OptionalImmIndexMap OptIdx; 6299 const int Opc = Inst.getOpcode(); 6300 const MCInstrDesc &Desc = MII.get(Opc); 6301 6302 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6303 6304 cvtVOP3(Inst, Operands, OptIdx); 6305 6306 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6307 assert(!IsPacked); 6308 Inst.addOperand(Inst.getOperand(0)); 6309 } 6310 6311 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6312 // instruction, and then figure out where to actually put the modifiers 6313 6314 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6315 6316 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6317 if (OpSelHiIdx != -1) { 6318 int DefaultVal = IsPacked ? -1 : 0; 6319 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6320 DefaultVal); 6321 } 6322 6323 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6324 if (NegLoIdx != -1) { 6325 assert(IsPacked); 6326 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6327 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6328 } 6329 6330 const int Ops[] = { AMDGPU::OpName::src0, 6331 AMDGPU::OpName::src1, 6332 AMDGPU::OpName::src2 }; 6333 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6334 AMDGPU::OpName::src1_modifiers, 6335 AMDGPU::OpName::src2_modifiers }; 6336 6337 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6338 6339 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6340 unsigned OpSelHi = 0; 6341 unsigned NegLo = 0; 6342 unsigned NegHi = 0; 6343 6344 if (OpSelHiIdx != -1) { 6345 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6346 } 6347 6348 if (NegLoIdx != -1) { 6349 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6350 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6351 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6352 } 6353 6354 for (int J = 0; J < 3; ++J) { 6355 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6356 if (OpIdx == -1) 6357 break; 6358 6359 uint32_t ModVal = 0; 6360 6361 if ((OpSel & (1 << J)) != 0) 6362 ModVal |= SISrcMods::OP_SEL_0; 6363 6364 if ((OpSelHi & (1 << J)) != 0) 6365 ModVal |= SISrcMods::OP_SEL_1; 6366 6367 if ((NegLo & (1 << J)) != 0) 6368 ModVal |= SISrcMods::NEG; 6369 6370 if ((NegHi & (1 << J)) != 0) 6371 ModVal |= SISrcMods::NEG_HI; 6372 6373 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6374 6375 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6376 } 6377 } 6378 6379 //===----------------------------------------------------------------------===// 6380 // dpp 6381 //===----------------------------------------------------------------------===// 6382 6383 bool AMDGPUOperand::isDPP8() const { 6384 return isImmTy(ImmTyDPP8); 6385 } 6386 6387 bool AMDGPUOperand::isDPPCtrl() const { 6388 using namespace AMDGPU::DPP; 6389 6390 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6391 if (result) { 6392 int64_t Imm = getImm(); 6393 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6394 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6395 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6396 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6397 (Imm == DppCtrl::WAVE_SHL1) || 6398 (Imm == DppCtrl::WAVE_ROL1) || 6399 (Imm == DppCtrl::WAVE_SHR1) || 6400 (Imm == DppCtrl::WAVE_ROR1) || 6401 (Imm == DppCtrl::ROW_MIRROR) || 6402 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6403 (Imm == DppCtrl::BCAST15) || 6404 (Imm == DppCtrl::BCAST31) || 6405 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6406 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6407 } 6408 return false; 6409 } 6410 6411 //===----------------------------------------------------------------------===// 6412 // mAI 6413 //===----------------------------------------------------------------------===// 6414 6415 bool AMDGPUOperand::isBLGP() const { 6416 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6417 } 6418 6419 bool AMDGPUOperand::isCBSZ() const { 6420 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6421 } 6422 6423 bool AMDGPUOperand::isABID() const { 6424 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6425 } 6426 6427 bool AMDGPUOperand::isS16Imm() const { 6428 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6429 } 6430 6431 bool AMDGPUOperand::isU16Imm() const { 6432 return isImm() && isUInt<16>(getImm()); 6433 } 6434 6435 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6436 if (!isGFX10()) 6437 return MatchOperand_NoMatch; 6438 6439 SMLoc S = Parser.getTok().getLoc(); 6440 6441 if (getLexer().isNot(AsmToken::Identifier)) 6442 return MatchOperand_NoMatch; 6443 if (getLexer().getTok().getString() != "dim") 6444 return MatchOperand_NoMatch; 6445 6446 Parser.Lex(); 6447 if (getLexer().isNot(AsmToken::Colon)) 6448 return MatchOperand_ParseFail; 6449 6450 Parser.Lex(); 6451 6452 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6453 // integer. 6454 std::string Token; 6455 if (getLexer().is(AsmToken::Integer)) { 6456 SMLoc Loc = getLexer().getTok().getEndLoc(); 6457 Token = getLexer().getTok().getString(); 6458 Parser.Lex(); 6459 if (getLexer().getTok().getLoc() != Loc) 6460 return MatchOperand_ParseFail; 6461 } 6462 if (getLexer().isNot(AsmToken::Identifier)) 6463 return MatchOperand_ParseFail; 6464 Token += getLexer().getTok().getString(); 6465 6466 StringRef DimId = Token; 6467 if (DimId.startswith("SQ_RSRC_IMG_")) 6468 DimId = DimId.substr(12); 6469 6470 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6471 if (!DimInfo) 6472 return MatchOperand_ParseFail; 6473 6474 Parser.Lex(); 6475 6476 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6477 AMDGPUOperand::ImmTyDim)); 6478 return MatchOperand_Success; 6479 } 6480 6481 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6482 SMLoc S = Parser.getTok().getLoc(); 6483 StringRef Prefix; 6484 6485 if (getLexer().getKind() == AsmToken::Identifier) { 6486 Prefix = Parser.getTok().getString(); 6487 } else { 6488 return MatchOperand_NoMatch; 6489 } 6490 6491 if (Prefix != "dpp8") 6492 return parseDPPCtrl(Operands); 6493 if (!isGFX10()) 6494 return MatchOperand_NoMatch; 6495 6496 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6497 6498 int64_t Sels[8]; 6499 6500 Parser.Lex(); 6501 if (getLexer().isNot(AsmToken::Colon)) 6502 return MatchOperand_ParseFail; 6503 6504 Parser.Lex(); 6505 if (getLexer().isNot(AsmToken::LBrac)) 6506 return MatchOperand_ParseFail; 6507 6508 Parser.Lex(); 6509 if (getParser().parseAbsoluteExpression(Sels[0])) 6510 return MatchOperand_ParseFail; 6511 if (0 > Sels[0] || 7 < Sels[0]) 6512 return MatchOperand_ParseFail; 6513 6514 for (size_t i = 1; i < 8; ++i) { 6515 if (getLexer().isNot(AsmToken::Comma)) 6516 return MatchOperand_ParseFail; 6517 6518 Parser.Lex(); 6519 if (getParser().parseAbsoluteExpression(Sels[i])) 6520 return MatchOperand_ParseFail; 6521 if (0 > Sels[i] || 7 < Sels[i]) 6522 return MatchOperand_ParseFail; 6523 } 6524 6525 if (getLexer().isNot(AsmToken::RBrac)) 6526 return MatchOperand_ParseFail; 6527 Parser.Lex(); 6528 6529 unsigned DPP8 = 0; 6530 for (size_t i = 0; i < 8; ++i) 6531 DPP8 |= (Sels[i] << (i * 3)); 6532 6533 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6534 return MatchOperand_Success; 6535 } 6536 6537 OperandMatchResultTy 6538 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6539 using namespace AMDGPU::DPP; 6540 6541 SMLoc S = Parser.getTok().getLoc(); 6542 StringRef Prefix; 6543 int64_t Int; 6544 6545 if (getLexer().getKind() == AsmToken::Identifier) { 6546 Prefix = Parser.getTok().getString(); 6547 } else { 6548 return MatchOperand_NoMatch; 6549 } 6550 6551 if (Prefix == "row_mirror") { 6552 Int = DppCtrl::ROW_MIRROR; 6553 Parser.Lex(); 6554 } else if (Prefix == "row_half_mirror") { 6555 Int = DppCtrl::ROW_HALF_MIRROR; 6556 Parser.Lex(); 6557 } else { 6558 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6559 if (Prefix != "quad_perm" 6560 && Prefix != "row_shl" 6561 && Prefix != "row_shr" 6562 && Prefix != "row_ror" 6563 && Prefix != "wave_shl" 6564 && Prefix != "wave_rol" 6565 && Prefix != "wave_shr" 6566 && Prefix != "wave_ror" 6567 && Prefix != "row_bcast" 6568 && Prefix != "row_share" 6569 && Prefix != "row_xmask") { 6570 return MatchOperand_NoMatch; 6571 } 6572 6573 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6574 return MatchOperand_NoMatch; 6575 6576 if (!isVI() && !isGFX9() && 6577 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6578 Prefix == "wave_rol" || Prefix == "wave_ror" || 6579 Prefix == "row_bcast")) 6580 return MatchOperand_NoMatch; 6581 6582 Parser.Lex(); 6583 if (getLexer().isNot(AsmToken::Colon)) 6584 return MatchOperand_ParseFail; 6585 6586 if (Prefix == "quad_perm") { 6587 // quad_perm:[%d,%d,%d,%d] 6588 Parser.Lex(); 6589 if (getLexer().isNot(AsmToken::LBrac)) 6590 return MatchOperand_ParseFail; 6591 Parser.Lex(); 6592 6593 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6594 return MatchOperand_ParseFail; 6595 6596 for (int i = 0; i < 3; ++i) { 6597 if (getLexer().isNot(AsmToken::Comma)) 6598 return MatchOperand_ParseFail; 6599 Parser.Lex(); 6600 6601 int64_t Temp; 6602 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6603 return MatchOperand_ParseFail; 6604 const int shift = i*2 + 2; 6605 Int += (Temp << shift); 6606 } 6607 6608 if (getLexer().isNot(AsmToken::RBrac)) 6609 return MatchOperand_ParseFail; 6610 Parser.Lex(); 6611 } else { 6612 // sel:%d 6613 Parser.Lex(); 6614 if (getParser().parseAbsoluteExpression(Int)) 6615 return MatchOperand_ParseFail; 6616 6617 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6618 Int |= DppCtrl::ROW_SHL0; 6619 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6620 Int |= DppCtrl::ROW_SHR0; 6621 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6622 Int |= DppCtrl::ROW_ROR0; 6623 } else if (Prefix == "wave_shl" && 1 == Int) { 6624 Int = DppCtrl::WAVE_SHL1; 6625 } else if (Prefix == "wave_rol" && 1 == Int) { 6626 Int = DppCtrl::WAVE_ROL1; 6627 } else if (Prefix == "wave_shr" && 1 == Int) { 6628 Int = DppCtrl::WAVE_SHR1; 6629 } else if (Prefix == "wave_ror" && 1 == Int) { 6630 Int = DppCtrl::WAVE_ROR1; 6631 } else if (Prefix == "row_bcast") { 6632 if (Int == 15) { 6633 Int = DppCtrl::BCAST15; 6634 } else if (Int == 31) { 6635 Int = DppCtrl::BCAST31; 6636 } else { 6637 return MatchOperand_ParseFail; 6638 } 6639 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6640 Int |= DppCtrl::ROW_SHARE_FIRST; 6641 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6642 Int |= DppCtrl::ROW_XMASK_FIRST; 6643 } else { 6644 return MatchOperand_ParseFail; 6645 } 6646 } 6647 } 6648 6649 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6650 return MatchOperand_Success; 6651 } 6652 6653 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6654 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6655 } 6656 6657 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6658 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6659 } 6660 6661 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6662 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6663 } 6664 6665 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6666 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6667 } 6668 6669 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6670 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6671 } 6672 6673 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6674 OptionalImmIndexMap OptionalIdx; 6675 6676 unsigned I = 1; 6677 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6678 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6679 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6680 } 6681 6682 int Fi = 0; 6683 for (unsigned E = Operands.size(); I != E; ++I) { 6684 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6685 MCOI::TIED_TO); 6686 if (TiedTo != -1) { 6687 assert((unsigned)TiedTo < Inst.getNumOperands()); 6688 // handle tied old or src2 for MAC instructions 6689 Inst.addOperand(Inst.getOperand(TiedTo)); 6690 } 6691 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6692 // Add the register arguments 6693 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6694 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6695 // Skip it. 6696 continue; 6697 } 6698 6699 if (IsDPP8) { 6700 if (Op.isDPP8()) { 6701 Op.addImmOperands(Inst, 1); 6702 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6703 Op.addRegWithFPInputModsOperands(Inst, 2); 6704 } else if (Op.isFI()) { 6705 Fi = Op.getImm(); 6706 } else if (Op.isReg()) { 6707 Op.addRegOperands(Inst, 1); 6708 } else { 6709 llvm_unreachable("Invalid operand type"); 6710 } 6711 } else { 6712 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6713 Op.addRegWithFPInputModsOperands(Inst, 2); 6714 } else if (Op.isDPPCtrl()) { 6715 Op.addImmOperands(Inst, 1); 6716 } else if (Op.isImm()) { 6717 // Handle optional arguments 6718 OptionalIdx[Op.getImmTy()] = I; 6719 } else { 6720 llvm_unreachable("Invalid operand type"); 6721 } 6722 } 6723 } 6724 6725 if (IsDPP8) { 6726 using namespace llvm::AMDGPU::DPP; 6727 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6728 } else { 6729 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6730 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6732 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6734 } 6735 } 6736 } 6737 6738 //===----------------------------------------------------------------------===// 6739 // sdwa 6740 //===----------------------------------------------------------------------===// 6741 6742 OperandMatchResultTy 6743 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6744 AMDGPUOperand::ImmTy Type) { 6745 using namespace llvm::AMDGPU::SDWA; 6746 6747 SMLoc S = Parser.getTok().getLoc(); 6748 StringRef Value; 6749 OperandMatchResultTy res; 6750 6751 res = parseStringWithPrefix(Prefix, Value); 6752 if (res != MatchOperand_Success) { 6753 return res; 6754 } 6755 6756 int64_t Int; 6757 Int = StringSwitch<int64_t>(Value) 6758 .Case("BYTE_0", SdwaSel::BYTE_0) 6759 .Case("BYTE_1", SdwaSel::BYTE_1) 6760 .Case("BYTE_2", SdwaSel::BYTE_2) 6761 .Case("BYTE_3", SdwaSel::BYTE_3) 6762 .Case("WORD_0", SdwaSel::WORD_0) 6763 .Case("WORD_1", SdwaSel::WORD_1) 6764 .Case("DWORD", SdwaSel::DWORD) 6765 .Default(0xffffffff); 6766 Parser.Lex(); // eat last token 6767 6768 if (Int == 0xffffffff) { 6769 return MatchOperand_ParseFail; 6770 } 6771 6772 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6773 return MatchOperand_Success; 6774 } 6775 6776 OperandMatchResultTy 6777 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6778 using namespace llvm::AMDGPU::SDWA; 6779 6780 SMLoc S = Parser.getTok().getLoc(); 6781 StringRef Value; 6782 OperandMatchResultTy res; 6783 6784 res = parseStringWithPrefix("dst_unused", Value); 6785 if (res != MatchOperand_Success) { 6786 return res; 6787 } 6788 6789 int64_t Int; 6790 Int = StringSwitch<int64_t>(Value) 6791 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6792 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6793 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6794 .Default(0xffffffff); 6795 Parser.Lex(); // eat last token 6796 6797 if (Int == 0xffffffff) { 6798 return MatchOperand_ParseFail; 6799 } 6800 6801 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6802 return MatchOperand_Success; 6803 } 6804 6805 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6806 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6807 } 6808 6809 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6810 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6811 } 6812 6813 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6814 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6815 } 6816 6817 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6818 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6819 } 6820 6821 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6822 uint64_t BasicInstType, bool skipVcc) { 6823 using namespace llvm::AMDGPU::SDWA; 6824 6825 OptionalImmIndexMap OptionalIdx; 6826 bool skippedVcc = false; 6827 6828 unsigned I = 1; 6829 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6830 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6831 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6832 } 6833 6834 for (unsigned E = Operands.size(); I != E; ++I) { 6835 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6836 if (skipVcc && !skippedVcc && Op.isReg() && 6837 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6838 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6839 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6840 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6841 // Skip VCC only if we didn't skip it on previous iteration. 6842 if (BasicInstType == SIInstrFlags::VOP2 && 6843 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6844 skippedVcc = true; 6845 continue; 6846 } else if (BasicInstType == SIInstrFlags::VOPC && 6847 Inst.getNumOperands() == 0) { 6848 skippedVcc = true; 6849 continue; 6850 } 6851 } 6852 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6853 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6854 } else if (Op.isImm()) { 6855 // Handle optional arguments 6856 OptionalIdx[Op.getImmTy()] = I; 6857 } else { 6858 llvm_unreachable("Invalid operand type"); 6859 } 6860 skippedVcc = false; 6861 } 6862 6863 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6864 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6865 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6866 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6867 switch (BasicInstType) { 6868 case SIInstrFlags::VOP1: 6869 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6870 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6872 } 6873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6874 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6876 break; 6877 6878 case SIInstrFlags::VOP2: 6879 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6880 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6882 } 6883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6884 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6886 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6887 break; 6888 6889 case SIInstrFlags::VOPC: 6890 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6891 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6892 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6894 break; 6895 6896 default: 6897 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6898 } 6899 } 6900 6901 // special case v_mac_{f16, f32}: 6902 // it has src2 register operand that is tied to dst operand 6903 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6904 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6905 auto it = Inst.begin(); 6906 std::advance( 6907 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6908 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6909 } 6910 } 6911 6912 //===----------------------------------------------------------------------===// 6913 // mAI 6914 //===----------------------------------------------------------------------===// 6915 6916 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6917 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6918 } 6919 6920 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6921 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6922 } 6923 6924 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6925 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6926 } 6927 6928 /// Force static initialization. 6929 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6930 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6931 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6932 } 6933 6934 #define GET_REGISTER_MATCHER 6935 #define GET_MATCHER_IMPLEMENTATION 6936 #define GET_MNEMONIC_SPELL_CHECKER 6937 #include "AMDGPUGenAsmMatcher.inc" 6938 6939 // This fuction should be defined after auto-generated include so that we have 6940 // MatchClassKind enum defined 6941 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6942 unsigned Kind) { 6943 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6944 // But MatchInstructionImpl() expects to meet token and fails to validate 6945 // operand. This method checks if we are given immediate operand but expect to 6946 // get corresponding token. 6947 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6948 switch (Kind) { 6949 case MCK_addr64: 6950 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6951 case MCK_gds: 6952 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6953 case MCK_lds: 6954 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6955 case MCK_glc: 6956 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6957 case MCK_idxen: 6958 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6959 case MCK_offen: 6960 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6961 case MCK_SSrcB32: 6962 // When operands have expression values, they will return true for isToken, 6963 // because it is not possible to distinguish between a token and an 6964 // expression at parse time. MatchInstructionImpl() will always try to 6965 // match an operand as a token, when isToken returns true, and when the 6966 // name of the expression is not a valid token, the match will fail, 6967 // so we need to handle it here. 6968 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6969 case MCK_SSrcF32: 6970 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6971 case MCK_SoppBrTarget: 6972 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6973 case MCK_VReg32OrOff: 6974 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6975 case MCK_InterpSlot: 6976 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6977 case MCK_Attr: 6978 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6979 case MCK_AttrChan: 6980 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6981 default: 6982 return Match_InvalidOperand; 6983 } 6984 } 6985 6986 //===----------------------------------------------------------------------===// 6987 // endpgm 6988 //===----------------------------------------------------------------------===// 6989 6990 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 6991 SMLoc S = Parser.getTok().getLoc(); 6992 int64_t Imm = 0; 6993 6994 if (!parseExpr(Imm)) { 6995 // The operand is optional, if not present default to 0 6996 Imm = 0; 6997 } 6998 6999 if (!isUInt<16>(Imm)) { 7000 Error(S, "expected a 16-bit value"); 7001 return MatchOperand_ParseFail; 7002 } 7003 7004 Operands.push_back( 7005 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7006 return MatchOperand_Success; 7007 } 7008 7009 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7010