1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "AMDKernelCodeT.h" 11 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 12 #include "MCTargetDesc/AMDGPUTargetStreamer.h" 13 #include "SIDefines.h" 14 #include "SIInstrInfo.h" 15 #include "TargetInfo/AMDGPUTargetInfo.h" 16 #include "Utils/AMDGPUAsmUtils.h" 17 #include "Utils/AMDGPUBaseInfo.h" 18 #include "Utils/AMDKernelCodeTUtils.h" 19 #include "llvm/ADT/APFloat.h" 20 #include "llvm/ADT/APInt.h" 21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallBitVector.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/StringSwitch.h" 27 #include "llvm/ADT/Twine.h" 28 #include "llvm/BinaryFormat/ELF.h" 29 #include "llvm/MC/MCAsmInfo.h" 30 #include "llvm/MC/MCContext.h" 31 #include "llvm/MC/MCExpr.h" 32 #include "llvm/MC/MCInst.h" 33 #include "llvm/MC/MCInstrDesc.h" 34 #include "llvm/MC/MCInstrInfo.h" 35 #include "llvm/MC/MCParser/MCAsmLexer.h" 36 #include "llvm/MC/MCParser/MCAsmParser.h" 37 #include "llvm/MC/MCParser/MCAsmParserExtension.h" 38 #include "llvm/MC/MCParser/MCParsedAsmOperand.h" 39 #include "llvm/MC/MCParser/MCTargetAsmParser.h" 40 #include "llvm/MC/MCRegisterInfo.h" 41 #include "llvm/MC/MCStreamer.h" 42 #include "llvm/MC/MCSubtargetInfo.h" 43 #include "llvm/MC/MCSymbol.h" 44 #include "llvm/Support/AMDGPUMetadata.h" 45 #include "llvm/Support/AMDHSAKernelDescriptor.h" 46 #include "llvm/Support/Casting.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Support/ErrorHandling.h" 49 #include "llvm/Support/MachineValueType.h" 50 #include "llvm/Support/MathExtras.h" 51 #include "llvm/Support/SMLoc.h" 52 #include "llvm/Support/TargetParser.h" 53 #include "llvm/Support/TargetRegistry.h" 54 #include "llvm/Support/raw_ostream.h" 55 #include <algorithm> 56 #include <cassert> 57 #include <cstdint> 58 #include <cstring> 59 #include <iterator> 60 #include <map> 61 #include <memory> 62 #include <string> 63 64 using namespace llvm; 65 using namespace llvm::AMDGPU; 66 using namespace llvm::amdhsa; 67 68 namespace { 69 70 class AMDGPUAsmParser; 71 72 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL }; 73 74 //===----------------------------------------------------------------------===// 75 // Operand 76 //===----------------------------------------------------------------------===// 77 78 class AMDGPUOperand : public MCParsedAsmOperand { 79 enum KindTy { 80 Token, 81 Immediate, 82 Register, 83 Expression 84 } Kind; 85 86 SMLoc StartLoc, EndLoc; 87 const AMDGPUAsmParser *AsmParser; 88 89 public: 90 AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_) 91 : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {} 92 93 using Ptr = std::unique_ptr<AMDGPUOperand>; 94 95 struct Modifiers { 96 bool Abs = false; 97 bool Neg = false; 98 bool Sext = false; 99 100 bool hasFPModifiers() const { return Abs || Neg; } 101 bool hasIntModifiers() const { return Sext; } 102 bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); } 103 104 int64_t getFPModifiersOperand() const { 105 int64_t Operand = 0; 106 Operand |= Abs ? SISrcMods::ABS : 0u; 107 Operand |= Neg ? SISrcMods::NEG : 0u; 108 return Operand; 109 } 110 111 int64_t getIntModifiersOperand() const { 112 int64_t Operand = 0; 113 Operand |= Sext ? SISrcMods::SEXT : 0u; 114 return Operand; 115 } 116 117 int64_t getModifiersOperand() const { 118 assert(!(hasFPModifiers() && hasIntModifiers()) 119 && "fp and int modifiers should not be used simultaneously"); 120 if (hasFPModifiers()) { 121 return getFPModifiersOperand(); 122 } else if (hasIntModifiers()) { 123 return getIntModifiersOperand(); 124 } else { 125 return 0; 126 } 127 } 128 129 friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods); 130 }; 131 132 enum ImmTy { 133 ImmTyNone, 134 ImmTyGDS, 135 ImmTyLDS, 136 ImmTyOffen, 137 ImmTyIdxen, 138 ImmTyAddr64, 139 ImmTyOffset, 140 ImmTyInstOffset, 141 ImmTyOffset0, 142 ImmTyOffset1, 143 ImmTyDLC, 144 ImmTyGLC, 145 ImmTySLC, 146 ImmTySWZ, 147 ImmTyTFE, 148 ImmTyD16, 149 ImmTyClampSI, 150 ImmTyOModSI, 151 ImmTyDPP8, 152 ImmTyDppCtrl, 153 ImmTyDppRowMask, 154 ImmTyDppBankMask, 155 ImmTyDppBoundCtrl, 156 ImmTyDppFi, 157 ImmTySdwaDstSel, 158 ImmTySdwaSrc0Sel, 159 ImmTySdwaSrc1Sel, 160 ImmTySdwaDstUnused, 161 ImmTyDMask, 162 ImmTyDim, 163 ImmTyUNorm, 164 ImmTyDA, 165 ImmTyR128A16, 166 ImmTyLWE, 167 ImmTyExpTgt, 168 ImmTyExpCompr, 169 ImmTyExpVM, 170 ImmTyFORMAT, 171 ImmTyHwreg, 172 ImmTyOff, 173 ImmTySendMsg, 174 ImmTyInterpSlot, 175 ImmTyInterpAttr, 176 ImmTyAttrChan, 177 ImmTyOpSel, 178 ImmTyOpSelHi, 179 ImmTyNegLo, 180 ImmTyNegHi, 181 ImmTySwizzle, 182 ImmTyGprIdxMode, 183 ImmTyHigh, 184 ImmTyBLGP, 185 ImmTyCBSZ, 186 ImmTyABID, 187 ImmTyEndpgm, 188 }; 189 190 private: 191 struct TokOp { 192 const char *Data; 193 unsigned Length; 194 }; 195 196 struct ImmOp { 197 int64_t Val; 198 ImmTy Type; 199 bool IsFPImm; 200 Modifiers Mods; 201 }; 202 203 struct RegOp { 204 unsigned RegNo; 205 Modifiers Mods; 206 }; 207 208 union { 209 TokOp Tok; 210 ImmOp Imm; 211 RegOp Reg; 212 const MCExpr *Expr; 213 }; 214 215 public: 216 bool isToken() const override { 217 if (Kind == Token) 218 return true; 219 220 // When parsing operands, we can't always tell if something was meant to be 221 // a token, like 'gds', or an expression that references a global variable. 222 // In this case, we assume the string is an expression, and if we need to 223 // interpret is a token, then we treat the symbol name as the token. 224 return isSymbolRefExpr(); 225 } 226 227 bool isSymbolRefExpr() const { 228 return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr); 229 } 230 231 bool isImm() const override { 232 return Kind == Immediate; 233 } 234 235 bool isInlinableImm(MVT type) const; 236 bool isLiteralImm(MVT type) const; 237 238 bool isRegKind() const { 239 return Kind == Register; 240 } 241 242 bool isReg() const override { 243 return isRegKind() && !hasModifiers(); 244 } 245 246 bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const { 247 return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type); 248 } 249 250 bool isRegOrImmWithInt16InputMods() const { 251 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16); 252 } 253 254 bool isRegOrImmWithInt32InputMods() const { 255 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32); 256 } 257 258 bool isRegOrImmWithInt64InputMods() const { 259 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64); 260 } 261 262 bool isRegOrImmWithFP16InputMods() const { 263 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16); 264 } 265 266 bool isRegOrImmWithFP32InputMods() const { 267 return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32); 268 } 269 270 bool isRegOrImmWithFP64InputMods() const { 271 return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64); 272 } 273 274 bool isVReg() const { 275 return isRegClass(AMDGPU::VGPR_32RegClassID) || 276 isRegClass(AMDGPU::VReg_64RegClassID) || 277 isRegClass(AMDGPU::VReg_96RegClassID) || 278 isRegClass(AMDGPU::VReg_128RegClassID) || 279 isRegClass(AMDGPU::VReg_160RegClassID) || 280 isRegClass(AMDGPU::VReg_256RegClassID) || 281 isRegClass(AMDGPU::VReg_512RegClassID) || 282 isRegClass(AMDGPU::VReg_1024RegClassID); 283 } 284 285 bool isVReg32() const { 286 return isRegClass(AMDGPU::VGPR_32RegClassID); 287 } 288 289 bool isVReg32OrOff() const { 290 return isOff() || isVReg32(); 291 } 292 293 bool isNull() const { 294 return isRegKind() && getReg() == AMDGPU::SGPR_NULL; 295 } 296 297 bool isSDWAOperand(MVT type) const; 298 bool isSDWAFP16Operand() const; 299 bool isSDWAFP32Operand() const; 300 bool isSDWAInt16Operand() const; 301 bool isSDWAInt32Operand() const; 302 303 bool isImmTy(ImmTy ImmT) const { 304 return isImm() && Imm.Type == ImmT; 305 } 306 307 bool isImmModifier() const { 308 return isImm() && Imm.Type != ImmTyNone; 309 } 310 311 bool isClampSI() const { return isImmTy(ImmTyClampSI); } 312 bool isOModSI() const { return isImmTy(ImmTyOModSI); } 313 bool isDMask() const { return isImmTy(ImmTyDMask); } 314 bool isDim() const { return isImmTy(ImmTyDim); } 315 bool isUNorm() const { return isImmTy(ImmTyUNorm); } 316 bool isDA() const { return isImmTy(ImmTyDA); } 317 bool isR128A16() const { return isImmTy(ImmTyR128A16); } 318 bool isLWE() const { return isImmTy(ImmTyLWE); } 319 bool isOff() const { return isImmTy(ImmTyOff); } 320 bool isExpTgt() const { return isImmTy(ImmTyExpTgt); } 321 bool isExpVM() const { return isImmTy(ImmTyExpVM); } 322 bool isExpCompr() const { return isImmTy(ImmTyExpCompr); } 323 bool isOffen() const { return isImmTy(ImmTyOffen); } 324 bool isIdxen() const { return isImmTy(ImmTyIdxen); } 325 bool isAddr64() const { return isImmTy(ImmTyAddr64); } 326 bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); } 327 bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); } 328 bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); } 329 330 bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); } 331 bool isGDS() const { return isImmTy(ImmTyGDS); } 332 bool isLDS() const { return isImmTy(ImmTyLDS); } 333 bool isDLC() const { return isImmTy(ImmTyDLC); } 334 bool isGLC() const { return isImmTy(ImmTyGLC); } 335 bool isSLC() const { return isImmTy(ImmTySLC); } 336 bool isSWZ() const { return isImmTy(ImmTySWZ); } 337 bool isTFE() const { return isImmTy(ImmTyTFE); } 338 bool isD16() const { return isImmTy(ImmTyD16); } 339 bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); } 340 bool isBankMask() const { return isImmTy(ImmTyDppBankMask); } 341 bool isRowMask() const { return isImmTy(ImmTyDppRowMask); } 342 bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); } 343 bool isFI() const { return isImmTy(ImmTyDppFi); } 344 bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); } 345 bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); } 346 bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); } 347 bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); } 348 bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); } 349 bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); } 350 bool isAttrChan() const { return isImmTy(ImmTyAttrChan); } 351 bool isOpSel() const { return isImmTy(ImmTyOpSel); } 352 bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); } 353 bool isNegLo() const { return isImmTy(ImmTyNegLo); } 354 bool isNegHi() const { return isImmTy(ImmTyNegHi); } 355 bool isHigh() const { return isImmTy(ImmTyHigh); } 356 357 bool isMod() const { 358 return isClampSI() || isOModSI(); 359 } 360 361 bool isRegOrImm() const { 362 return isReg() || isImm(); 363 } 364 365 bool isRegClass(unsigned RCID) const; 366 367 bool isInlineValue() const; 368 369 bool isRegOrInlineNoMods(unsigned RCID, MVT type) const { 370 return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers(); 371 } 372 373 bool isSCSrcB16() const { 374 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16); 375 } 376 377 bool isSCSrcV2B16() const { 378 return isSCSrcB16(); 379 } 380 381 bool isSCSrcB32() const { 382 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32); 383 } 384 385 bool isSCSrcB64() const { 386 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64); 387 } 388 389 bool isBoolReg() const; 390 391 bool isSCSrcF16() const { 392 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16); 393 } 394 395 bool isSCSrcV2F16() const { 396 return isSCSrcF16(); 397 } 398 399 bool isSCSrcF32() const { 400 return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32); 401 } 402 403 bool isSCSrcF64() const { 404 return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64); 405 } 406 407 bool isSSrcB32() const { 408 return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr(); 409 } 410 411 bool isSSrcB16() const { 412 return isSCSrcB16() || isLiteralImm(MVT::i16); 413 } 414 415 bool isSSrcV2B16() const { 416 llvm_unreachable("cannot happen"); 417 return isSSrcB16(); 418 } 419 420 bool isSSrcB64() const { 421 // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits. 422 // See isVSrc64(). 423 return isSCSrcB64() || isLiteralImm(MVT::i64); 424 } 425 426 bool isSSrcF32() const { 427 return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr(); 428 } 429 430 bool isSSrcF64() const { 431 return isSCSrcB64() || isLiteralImm(MVT::f64); 432 } 433 434 bool isSSrcF16() const { 435 return isSCSrcB16() || isLiteralImm(MVT::f16); 436 } 437 438 bool isSSrcV2F16() const { 439 llvm_unreachable("cannot happen"); 440 return isSSrcF16(); 441 } 442 443 bool isSSrcOrLdsB32() const { 444 return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) || 445 isLiteralImm(MVT::i32) || isExpr(); 446 } 447 448 bool isVCSrcB32() const { 449 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32); 450 } 451 452 bool isVCSrcB64() const { 453 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64); 454 } 455 456 bool isVCSrcB16() const { 457 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16); 458 } 459 460 bool isVCSrcV2B16() const { 461 return isVCSrcB16(); 462 } 463 464 bool isVCSrcF32() const { 465 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32); 466 } 467 468 bool isVCSrcF64() const { 469 return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64); 470 } 471 472 bool isVCSrcF16() const { 473 return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16); 474 } 475 476 bool isVCSrcV2F16() const { 477 return isVCSrcF16(); 478 } 479 480 bool isVSrcB32() const { 481 return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr(); 482 } 483 484 bool isVSrcB64() const { 485 return isVCSrcF64() || isLiteralImm(MVT::i64); 486 } 487 488 bool isVSrcB16() const { 489 return isVCSrcF16() || isLiteralImm(MVT::i16); 490 } 491 492 bool isVSrcV2B16() const { 493 return isVSrcB16() || isLiteralImm(MVT::v2i16); 494 } 495 496 bool isVSrcF32() const { 497 return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr(); 498 } 499 500 bool isVSrcF64() const { 501 return isVCSrcF64() || isLiteralImm(MVT::f64); 502 } 503 504 bool isVSrcF16() const { 505 return isVCSrcF16() || isLiteralImm(MVT::f16); 506 } 507 508 bool isVSrcV2F16() const { 509 return isVSrcF16() || isLiteralImm(MVT::v2f16); 510 } 511 512 bool isVISrcB32() const { 513 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32); 514 } 515 516 bool isVISrcB16() const { 517 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16); 518 } 519 520 bool isVISrcV2B16() const { 521 return isVISrcB16(); 522 } 523 524 bool isVISrcF32() const { 525 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32); 526 } 527 528 bool isVISrcF16() const { 529 return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16); 530 } 531 532 bool isVISrcV2F16() const { 533 return isVISrcF16() || isVISrcB32(); 534 } 535 536 bool isAISrcB32() const { 537 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32); 538 } 539 540 bool isAISrcB16() const { 541 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16); 542 } 543 544 bool isAISrcV2B16() const { 545 return isAISrcB16(); 546 } 547 548 bool isAISrcF32() const { 549 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32); 550 } 551 552 bool isAISrcF16() const { 553 return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16); 554 } 555 556 bool isAISrcV2F16() const { 557 return isAISrcF16() || isAISrcB32(); 558 } 559 560 bool isAISrc_128B32() const { 561 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32); 562 } 563 564 bool isAISrc_128B16() const { 565 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16); 566 } 567 568 bool isAISrc_128V2B16() const { 569 return isAISrc_128B16(); 570 } 571 572 bool isAISrc_128F32() const { 573 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32); 574 } 575 576 bool isAISrc_128F16() const { 577 return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16); 578 } 579 580 bool isAISrc_128V2F16() const { 581 return isAISrc_128F16() || isAISrc_128B32(); 582 } 583 584 bool isAISrc_512B32() const { 585 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32); 586 } 587 588 bool isAISrc_512B16() const { 589 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16); 590 } 591 592 bool isAISrc_512V2B16() const { 593 return isAISrc_512B16(); 594 } 595 596 bool isAISrc_512F32() const { 597 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32); 598 } 599 600 bool isAISrc_512F16() const { 601 return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16); 602 } 603 604 bool isAISrc_512V2F16() const { 605 return isAISrc_512F16() || isAISrc_512B32(); 606 } 607 608 bool isAISrc_1024B32() const { 609 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32); 610 } 611 612 bool isAISrc_1024B16() const { 613 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16); 614 } 615 616 bool isAISrc_1024V2B16() const { 617 return isAISrc_1024B16(); 618 } 619 620 bool isAISrc_1024F32() const { 621 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32); 622 } 623 624 bool isAISrc_1024F16() const { 625 return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16); 626 } 627 628 bool isAISrc_1024V2F16() const { 629 return isAISrc_1024F16() || isAISrc_1024B32(); 630 } 631 632 bool isKImmFP32() const { 633 return isLiteralImm(MVT::f32); 634 } 635 636 bool isKImmFP16() const { 637 return isLiteralImm(MVT::f16); 638 } 639 640 bool isMem() const override { 641 return false; 642 } 643 644 bool isExpr() const { 645 return Kind == Expression; 646 } 647 648 bool isSoppBrTarget() const { 649 return isExpr() || isImm(); 650 } 651 652 bool isSWaitCnt() const; 653 bool isHwreg() const; 654 bool isSendMsg() const; 655 bool isSwizzle() const; 656 bool isSMRDOffset8() const; 657 bool isSMRDOffset20() const; 658 bool isSMRDLiteralOffset() const; 659 bool isDPP8() const; 660 bool isDPPCtrl() const; 661 bool isBLGP() const; 662 bool isCBSZ() const; 663 bool isABID() const; 664 bool isGPRIdxMode() const; 665 bool isS16Imm() const; 666 bool isU16Imm() const; 667 bool isEndpgm() const; 668 669 StringRef getExpressionAsToken() const { 670 assert(isExpr()); 671 const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr); 672 return S->getSymbol().getName(); 673 } 674 675 StringRef getToken() const { 676 assert(isToken()); 677 678 if (Kind == Expression) 679 return getExpressionAsToken(); 680 681 return StringRef(Tok.Data, Tok.Length); 682 } 683 684 int64_t getImm() const { 685 assert(isImm()); 686 return Imm.Val; 687 } 688 689 ImmTy getImmTy() const { 690 assert(isImm()); 691 return Imm.Type; 692 } 693 694 unsigned getReg() const override { 695 assert(isRegKind()); 696 return Reg.RegNo; 697 } 698 699 SMLoc getStartLoc() const override { 700 return StartLoc; 701 } 702 703 SMLoc getEndLoc() const override { 704 return EndLoc; 705 } 706 707 SMRange getLocRange() const { 708 return SMRange(StartLoc, EndLoc); 709 } 710 711 Modifiers getModifiers() const { 712 assert(isRegKind() || isImmTy(ImmTyNone)); 713 return isRegKind() ? Reg.Mods : Imm.Mods; 714 } 715 716 void setModifiers(Modifiers Mods) { 717 assert(isRegKind() || isImmTy(ImmTyNone)); 718 if (isRegKind()) 719 Reg.Mods = Mods; 720 else 721 Imm.Mods = Mods; 722 } 723 724 bool hasModifiers() const { 725 return getModifiers().hasModifiers(); 726 } 727 728 bool hasFPModifiers() const { 729 return getModifiers().hasFPModifiers(); 730 } 731 732 bool hasIntModifiers() const { 733 return getModifiers().hasIntModifiers(); 734 } 735 736 uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const; 737 738 void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const; 739 740 void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const; 741 742 template <unsigned Bitwidth> 743 void addKImmFPOperands(MCInst &Inst, unsigned N) const; 744 745 void addKImmFP16Operands(MCInst &Inst, unsigned N) const { 746 addKImmFPOperands<16>(Inst, N); 747 } 748 749 void addKImmFP32Operands(MCInst &Inst, unsigned N) const { 750 addKImmFPOperands<32>(Inst, N); 751 } 752 753 void addRegOperands(MCInst &Inst, unsigned N) const; 754 755 void addBoolRegOperands(MCInst &Inst, unsigned N) const { 756 addRegOperands(Inst, N); 757 } 758 759 void addRegOrImmOperands(MCInst &Inst, unsigned N) const { 760 if (isRegKind()) 761 addRegOperands(Inst, N); 762 else if (isExpr()) 763 Inst.addOperand(MCOperand::createExpr(Expr)); 764 else 765 addImmOperands(Inst, N); 766 } 767 768 void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const { 769 Modifiers Mods = getModifiers(); 770 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 771 if (isRegKind()) { 772 addRegOperands(Inst, N); 773 } else { 774 addImmOperands(Inst, N, false); 775 } 776 } 777 778 void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 779 assert(!hasIntModifiers()); 780 addRegOrImmWithInputModsOperands(Inst, N); 781 } 782 783 void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 784 assert(!hasFPModifiers()); 785 addRegOrImmWithInputModsOperands(Inst, N); 786 } 787 788 void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const { 789 Modifiers Mods = getModifiers(); 790 Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand())); 791 assert(isRegKind()); 792 addRegOperands(Inst, N); 793 } 794 795 void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const { 796 assert(!hasIntModifiers()); 797 addRegWithInputModsOperands(Inst, N); 798 } 799 800 void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const { 801 assert(!hasFPModifiers()); 802 addRegWithInputModsOperands(Inst, N); 803 } 804 805 void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const { 806 if (isImm()) 807 addImmOperands(Inst, N); 808 else { 809 assert(isExpr()); 810 Inst.addOperand(MCOperand::createExpr(Expr)); 811 } 812 } 813 814 static void printImmTy(raw_ostream& OS, ImmTy Type) { 815 switch (Type) { 816 case ImmTyNone: OS << "None"; break; 817 case ImmTyGDS: OS << "GDS"; break; 818 case ImmTyLDS: OS << "LDS"; break; 819 case ImmTyOffen: OS << "Offen"; break; 820 case ImmTyIdxen: OS << "Idxen"; break; 821 case ImmTyAddr64: OS << "Addr64"; break; 822 case ImmTyOffset: OS << "Offset"; break; 823 case ImmTyInstOffset: OS << "InstOffset"; break; 824 case ImmTyOffset0: OS << "Offset0"; break; 825 case ImmTyOffset1: OS << "Offset1"; break; 826 case ImmTyDLC: OS << "DLC"; break; 827 case ImmTyGLC: OS << "GLC"; break; 828 case ImmTySLC: OS << "SLC"; break; 829 case ImmTySWZ: OS << "SWZ"; break; 830 case ImmTyTFE: OS << "TFE"; break; 831 case ImmTyD16: OS << "D16"; break; 832 case ImmTyFORMAT: OS << "FORMAT"; break; 833 case ImmTyClampSI: OS << "ClampSI"; break; 834 case ImmTyOModSI: OS << "OModSI"; break; 835 case ImmTyDPP8: OS << "DPP8"; break; 836 case ImmTyDppCtrl: OS << "DppCtrl"; break; 837 case ImmTyDppRowMask: OS << "DppRowMask"; break; 838 case ImmTyDppBankMask: OS << "DppBankMask"; break; 839 case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break; 840 case ImmTyDppFi: OS << "FI"; break; 841 case ImmTySdwaDstSel: OS << "SdwaDstSel"; break; 842 case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break; 843 case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break; 844 case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break; 845 case ImmTyDMask: OS << "DMask"; break; 846 case ImmTyDim: OS << "Dim"; break; 847 case ImmTyUNorm: OS << "UNorm"; break; 848 case ImmTyDA: OS << "DA"; break; 849 case ImmTyR128A16: OS << "R128A16"; break; 850 case ImmTyLWE: OS << "LWE"; break; 851 case ImmTyOff: OS << "Off"; break; 852 case ImmTyExpTgt: OS << "ExpTgt"; break; 853 case ImmTyExpCompr: OS << "ExpCompr"; break; 854 case ImmTyExpVM: OS << "ExpVM"; break; 855 case ImmTyHwreg: OS << "Hwreg"; break; 856 case ImmTySendMsg: OS << "SendMsg"; break; 857 case ImmTyInterpSlot: OS << "InterpSlot"; break; 858 case ImmTyInterpAttr: OS << "InterpAttr"; break; 859 case ImmTyAttrChan: OS << "AttrChan"; break; 860 case ImmTyOpSel: OS << "OpSel"; break; 861 case ImmTyOpSelHi: OS << "OpSelHi"; break; 862 case ImmTyNegLo: OS << "NegLo"; break; 863 case ImmTyNegHi: OS << "NegHi"; break; 864 case ImmTySwizzle: OS << "Swizzle"; break; 865 case ImmTyGprIdxMode: OS << "GprIdxMode"; break; 866 case ImmTyHigh: OS << "High"; break; 867 case ImmTyBLGP: OS << "BLGP"; break; 868 case ImmTyCBSZ: OS << "CBSZ"; break; 869 case ImmTyABID: OS << "ABID"; break; 870 case ImmTyEndpgm: OS << "Endpgm"; break; 871 } 872 } 873 874 void print(raw_ostream &OS) const override { 875 switch (Kind) { 876 case Register: 877 OS << "<register " << getReg() << " mods: " << Reg.Mods << '>'; 878 break; 879 case Immediate: 880 OS << '<' << getImm(); 881 if (getImmTy() != ImmTyNone) { 882 OS << " type: "; printImmTy(OS, getImmTy()); 883 } 884 OS << " mods: " << Imm.Mods << '>'; 885 break; 886 case Token: 887 OS << '\'' << getToken() << '\''; 888 break; 889 case Expression: 890 OS << "<expr " << *Expr << '>'; 891 break; 892 } 893 } 894 895 static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser, 896 int64_t Val, SMLoc Loc, 897 ImmTy Type = ImmTyNone, 898 bool IsFPImm = false) { 899 auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser); 900 Op->Imm.Val = Val; 901 Op->Imm.IsFPImm = IsFPImm; 902 Op->Imm.Type = Type; 903 Op->Imm.Mods = Modifiers(); 904 Op->StartLoc = Loc; 905 Op->EndLoc = Loc; 906 return Op; 907 } 908 909 static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser, 910 StringRef Str, SMLoc Loc, 911 bool HasExplicitEncodingSize = true) { 912 auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser); 913 Res->Tok.Data = Str.data(); 914 Res->Tok.Length = Str.size(); 915 Res->StartLoc = Loc; 916 Res->EndLoc = Loc; 917 return Res; 918 } 919 920 static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser, 921 unsigned RegNo, SMLoc S, 922 SMLoc E) { 923 auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser); 924 Op->Reg.RegNo = RegNo; 925 Op->Reg.Mods = Modifiers(); 926 Op->StartLoc = S; 927 Op->EndLoc = E; 928 return Op; 929 } 930 931 static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser, 932 const class MCExpr *Expr, SMLoc S) { 933 auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser); 934 Op->Expr = Expr; 935 Op->StartLoc = S; 936 Op->EndLoc = S; 937 return Op; 938 } 939 }; 940 941 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) { 942 OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext; 943 return OS; 944 } 945 946 //===----------------------------------------------------------------------===// 947 // AsmParser 948 //===----------------------------------------------------------------------===// 949 950 // Holds info related to the current kernel, e.g. count of SGPRs used. 951 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next 952 // .amdgpu_hsa_kernel or at EOF. 953 class KernelScopeInfo { 954 int SgprIndexUnusedMin = -1; 955 int VgprIndexUnusedMin = -1; 956 MCContext *Ctx = nullptr; 957 958 void usesSgprAt(int i) { 959 if (i >= SgprIndexUnusedMin) { 960 SgprIndexUnusedMin = ++i; 961 if (Ctx) { 962 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count")); 963 Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx)); 964 } 965 } 966 } 967 968 void usesVgprAt(int i) { 969 if (i >= VgprIndexUnusedMin) { 970 VgprIndexUnusedMin = ++i; 971 if (Ctx) { 972 MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count")); 973 Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx)); 974 } 975 } 976 } 977 978 public: 979 KernelScopeInfo() = default; 980 981 void initialize(MCContext &Context) { 982 Ctx = &Context; 983 usesSgprAt(SgprIndexUnusedMin = -1); 984 usesVgprAt(VgprIndexUnusedMin = -1); 985 } 986 987 void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) { 988 switch (RegKind) { 989 case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break; 990 case IS_AGPR: // fall through 991 case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break; 992 default: break; 993 } 994 } 995 }; 996 997 class AMDGPUAsmParser : public MCTargetAsmParser { 998 MCAsmParser &Parser; 999 1000 // Number of extra operands parsed after the first optional operand. 1001 // This may be necessary to skip hardcoded mandatory operands. 1002 static const unsigned MAX_OPR_LOOKAHEAD = 8; 1003 1004 unsigned ForcedEncodingSize = 0; 1005 bool ForcedDPP = false; 1006 bool ForcedSDWA = false; 1007 KernelScopeInfo KernelScope; 1008 1009 /// @name Auto-generated Match Functions 1010 /// { 1011 1012 #define GET_ASSEMBLER_HEADER 1013 #include "AMDGPUGenAsmMatcher.inc" 1014 1015 /// } 1016 1017 private: 1018 bool ParseAsAbsoluteExpression(uint32_t &Ret); 1019 bool OutOfRangeError(SMRange Range); 1020 /// Calculate VGPR/SGPR blocks required for given target, reserved 1021 /// registers, and user-specified NextFreeXGPR values. 1022 /// 1023 /// \param Features [in] Target features, used for bug corrections. 1024 /// \param VCCUsed [in] Whether VCC special SGPR is reserved. 1025 /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved. 1026 /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved. 1027 /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel 1028 /// descriptor field, if valid. 1029 /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one. 1030 /// \param VGPRRange [in] Token range, used for VGPR diagnostics. 1031 /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one. 1032 /// \param SGPRRange [in] Token range, used for SGPR diagnostics. 1033 /// \param VGPRBlocks [out] Result VGPR block count. 1034 /// \param SGPRBlocks [out] Result SGPR block count. 1035 bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed, 1036 bool FlatScrUsed, bool XNACKUsed, 1037 Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 1038 SMRange VGPRRange, unsigned NextFreeSGPR, 1039 SMRange SGPRRange, unsigned &VGPRBlocks, 1040 unsigned &SGPRBlocks); 1041 bool ParseDirectiveAMDGCNTarget(); 1042 bool ParseDirectiveAMDHSAKernel(); 1043 bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor); 1044 bool ParseDirectiveHSACodeObjectVersion(); 1045 bool ParseDirectiveHSACodeObjectISA(); 1046 bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header); 1047 bool ParseDirectiveAMDKernelCodeT(); 1048 bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo) const; 1049 bool ParseDirectiveAMDGPUHsaKernel(); 1050 1051 bool ParseDirectiveISAVersion(); 1052 bool ParseDirectiveHSAMetadata(); 1053 bool ParseDirectivePALMetadataBegin(); 1054 bool ParseDirectivePALMetadata(); 1055 bool ParseDirectiveAMDGPULDS(); 1056 1057 /// Common code to parse out a block of text (typically YAML) between start and 1058 /// end directives. 1059 bool ParseToEndDirective(const char *AssemblerDirectiveBegin, 1060 const char *AssemblerDirectiveEnd, 1061 std::string &CollectString); 1062 1063 bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth, 1064 RegisterKind RegKind, unsigned Reg1); 1065 bool ParseAMDGPURegister(RegisterKind& RegKind, unsigned& Reg, 1066 unsigned& RegNum, unsigned& RegWidth); 1067 unsigned ParseRegularReg(RegisterKind &RegKind, 1068 unsigned &RegNum, 1069 unsigned &RegWidth); 1070 unsigned ParseSpecialReg(RegisterKind &RegKind, 1071 unsigned &RegNum, 1072 unsigned &RegWidth); 1073 unsigned ParseRegList(RegisterKind &RegKind, 1074 unsigned &RegNum, 1075 unsigned &RegWidth); 1076 bool ParseRegRange(unsigned& Num, unsigned& Width); 1077 unsigned getRegularReg(RegisterKind RegKind, 1078 unsigned RegNum, 1079 unsigned RegWidth); 1080 1081 bool isRegister(); 1082 bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const; 1083 Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind); 1084 void initializeGprCountSymbol(RegisterKind RegKind); 1085 bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex, 1086 unsigned RegWidth); 1087 void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands, 1088 bool IsAtomic, bool IsAtomicReturn, bool IsLds = false); 1089 void cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 1090 bool IsGdsHardcoded); 1091 1092 public: 1093 enum AMDGPUMatchResultTy { 1094 Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY 1095 }; 1096 enum OperandMode { 1097 OperandMode_Default, 1098 OperandMode_NSA, 1099 }; 1100 1101 using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>; 1102 1103 AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser, 1104 const MCInstrInfo &MII, 1105 const MCTargetOptions &Options) 1106 : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) { 1107 MCAsmParserExtension::Initialize(Parser); 1108 1109 if (getFeatureBits().none()) { 1110 // Set default features. 1111 copySTI().ToggleFeature("southern-islands"); 1112 } 1113 1114 setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits())); 1115 1116 { 1117 // TODO: make those pre-defined variables read-only. 1118 // Currently there is none suitable machinery in the core llvm-mc for this. 1119 // MCSymbol::isRedefinable is intended for another purpose, and 1120 // AsmParser::parseDirectiveSet() cannot be specialized for specific target. 1121 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 1122 MCContext &Ctx = getContext(); 1123 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1124 MCSymbol *Sym = 1125 Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number")); 1126 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1127 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor")); 1128 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1129 Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping")); 1130 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1131 } else { 1132 MCSymbol *Sym = 1133 Ctx.getOrCreateSymbol(Twine(".option.machine_version_major")); 1134 Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx)); 1135 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor")); 1136 Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx)); 1137 Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping")); 1138 Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx)); 1139 } 1140 if (ISA.Major >= 6 && AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 1141 initializeGprCountSymbol(IS_VGPR); 1142 initializeGprCountSymbol(IS_SGPR); 1143 } else 1144 KernelScope.initialize(getContext()); 1145 } 1146 } 1147 1148 bool hasXNACK() const { 1149 return AMDGPU::hasXNACK(getSTI()); 1150 } 1151 1152 bool hasMIMG_R128() const { 1153 return AMDGPU::hasMIMG_R128(getSTI()); 1154 } 1155 1156 bool hasPackedD16() const { 1157 return AMDGPU::hasPackedD16(getSTI()); 1158 } 1159 1160 bool isSI() const { 1161 return AMDGPU::isSI(getSTI()); 1162 } 1163 1164 bool isCI() const { 1165 return AMDGPU::isCI(getSTI()); 1166 } 1167 1168 bool isVI() const { 1169 return AMDGPU::isVI(getSTI()); 1170 } 1171 1172 bool isGFX9() const { 1173 return AMDGPU::isGFX9(getSTI()); 1174 } 1175 1176 bool isGFX10() const { 1177 return AMDGPU::isGFX10(getSTI()); 1178 } 1179 1180 bool hasInv2PiInlineImm() const { 1181 return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm]; 1182 } 1183 1184 bool hasFlatOffsets() const { 1185 return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets]; 1186 } 1187 1188 bool hasSGPR102_SGPR103() const { 1189 return !isVI() && !isGFX9(); 1190 } 1191 1192 bool hasSGPR104_SGPR105() const { 1193 return isGFX10(); 1194 } 1195 1196 bool hasIntClamp() const { 1197 return getFeatureBits()[AMDGPU::FeatureIntClamp]; 1198 } 1199 1200 AMDGPUTargetStreamer &getTargetStreamer() { 1201 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer(); 1202 return static_cast<AMDGPUTargetStreamer &>(TS); 1203 } 1204 1205 const MCRegisterInfo *getMRI() const { 1206 // We need this const_cast because for some reason getContext() is not const 1207 // in MCAsmParser. 1208 return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo(); 1209 } 1210 1211 const MCInstrInfo *getMII() const { 1212 return &MII; 1213 } 1214 1215 const FeatureBitset &getFeatureBits() const { 1216 return getSTI().getFeatureBits(); 1217 } 1218 1219 void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; } 1220 void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; } 1221 void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; } 1222 1223 unsigned getForcedEncodingSize() const { return ForcedEncodingSize; } 1224 bool isForcedVOP3() const { return ForcedEncodingSize == 64; } 1225 bool isForcedDPP() const { return ForcedDPP; } 1226 bool isForcedSDWA() const { return ForcedSDWA; } 1227 ArrayRef<unsigned> getMatchedVariants() const; 1228 1229 std::unique_ptr<AMDGPUOperand> parseRegister(); 1230 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override; 1231 unsigned checkTargetMatchPredicate(MCInst &Inst) override; 1232 unsigned validateTargetOperandClass(MCParsedAsmOperand &Op, 1233 unsigned Kind) override; 1234 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 1235 OperandVector &Operands, MCStreamer &Out, 1236 uint64_t &ErrorInfo, 1237 bool MatchingInlineAsm) override; 1238 bool ParseDirective(AsmToken DirectiveID) override; 1239 OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic, 1240 OperandMode Mode = OperandMode_Default); 1241 StringRef parseMnemonicSuffix(StringRef Name); 1242 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name, 1243 SMLoc NameLoc, OperandVector &Operands) override; 1244 //bool ProcessInstruction(MCInst &Inst); 1245 1246 OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int); 1247 1248 OperandMatchResultTy 1249 parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 1250 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1251 bool (*ConvertResult)(int64_t &) = nullptr); 1252 1253 OperandMatchResultTy 1254 parseOperandArrayWithPrefix(const char *Prefix, 1255 OperandVector &Operands, 1256 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone, 1257 bool (*ConvertResult)(int64_t&) = nullptr); 1258 1259 OperandMatchResultTy 1260 parseNamedBit(const char *Name, OperandVector &Operands, 1261 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone); 1262 OperandMatchResultTy parseStringWithPrefix(StringRef Prefix, 1263 StringRef &Value); 1264 1265 bool isModifier(); 1266 bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1267 bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1268 bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const; 1269 bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const; 1270 bool parseSP3NegModifier(); 1271 OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false); 1272 OperandMatchResultTy parseReg(OperandVector &Operands); 1273 OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false); 1274 OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true); 1275 OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true); 1276 OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands); 1277 OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands); 1278 OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands); 1279 OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands); 1280 1281 void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands); 1282 void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); } 1283 void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); } 1284 void cvtExp(MCInst &Inst, const OperandVector &Operands); 1285 1286 bool parseCnt(int64_t &IntVal); 1287 OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands); 1288 OperandMatchResultTy parseHwreg(OperandVector &Operands); 1289 1290 private: 1291 struct OperandInfoTy { 1292 int64_t Id; 1293 bool IsSymbolic = false; 1294 bool IsDefined = false; 1295 1296 OperandInfoTy(int64_t Id_) : Id(Id_) {} 1297 }; 1298 1299 bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream); 1300 bool validateSendMsg(const OperandInfoTy &Msg, 1301 const OperandInfoTy &Op, 1302 const OperandInfoTy &Stream, 1303 const SMLoc Loc); 1304 1305 bool parseHwregBody(OperandInfoTy &HwReg, int64_t &Offset, int64_t &Width); 1306 bool validateHwreg(const OperandInfoTy &HwReg, 1307 const int64_t Offset, 1308 const int64_t Width, 1309 const SMLoc Loc); 1310 1311 void errorExpTgt(); 1312 OperandMatchResultTy parseExpTgtImpl(StringRef Str, uint8_t &Val); 1313 SMLoc getFlatOffsetLoc(const OperandVector &Operands) const; 1314 1315 bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands); 1316 bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands); 1317 bool validateSOPLiteral(const MCInst &Inst) const; 1318 bool validateConstantBusLimitations(const MCInst &Inst); 1319 bool validateEarlyClobberLimitations(const MCInst &Inst); 1320 bool validateIntClampSupported(const MCInst &Inst); 1321 bool validateMIMGAtomicDMask(const MCInst &Inst); 1322 bool validateMIMGGatherDMask(const MCInst &Inst); 1323 bool validateMIMGDataSize(const MCInst &Inst); 1324 bool validateMIMGAddrSize(const MCInst &Inst); 1325 bool validateMIMGD16(const MCInst &Inst); 1326 bool validateMIMGDim(const MCInst &Inst); 1327 bool validateLdsDirect(const MCInst &Inst); 1328 bool validateOpSel(const MCInst &Inst); 1329 bool validateVccOperand(unsigned Reg) const; 1330 bool validateVOP3Literal(const MCInst &Inst) const; 1331 unsigned getConstantBusLimit(unsigned Opcode) const; 1332 bool usesConstantBus(const MCInst &Inst, unsigned OpIdx); 1333 bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const; 1334 unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const; 1335 1336 bool isId(const StringRef Id) const; 1337 bool isId(const AsmToken &Token, const StringRef Id) const; 1338 bool isToken(const AsmToken::TokenKind Kind) const; 1339 bool trySkipId(const StringRef Id); 1340 bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind); 1341 bool trySkipToken(const AsmToken::TokenKind Kind); 1342 bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg); 1343 bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string"); 1344 void peekTokens(MutableArrayRef<AsmToken> Tokens); 1345 AsmToken::TokenKind getTokenKind() const; 1346 bool parseExpr(int64_t &Imm); 1347 bool parseExpr(OperandVector &Operands); 1348 StringRef getTokenStr() const; 1349 AsmToken peekToken(); 1350 AsmToken getToken() const; 1351 SMLoc getLoc() const; 1352 void lex(); 1353 1354 public: 1355 OperandMatchResultTy parseOptionalOperand(OperandVector &Operands); 1356 OperandMatchResultTy parseOptionalOpr(OperandVector &Operands); 1357 1358 OperandMatchResultTy parseExpTgt(OperandVector &Operands); 1359 OperandMatchResultTy parseSendMsgOp(OperandVector &Operands); 1360 OperandMatchResultTy parseInterpSlot(OperandVector &Operands); 1361 OperandMatchResultTy parseInterpAttr(OperandVector &Operands); 1362 OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands); 1363 OperandMatchResultTy parseBoolReg(OperandVector &Operands); 1364 1365 bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 1366 const unsigned MinVal, 1367 const unsigned MaxVal, 1368 const StringRef ErrMsg); 1369 OperandMatchResultTy parseSwizzleOp(OperandVector &Operands); 1370 bool parseSwizzleOffset(int64_t &Imm); 1371 bool parseSwizzleMacro(int64_t &Imm); 1372 bool parseSwizzleQuadPerm(int64_t &Imm); 1373 bool parseSwizzleBitmaskPerm(int64_t &Imm); 1374 bool parseSwizzleBroadcast(int64_t &Imm); 1375 bool parseSwizzleSwap(int64_t &Imm); 1376 bool parseSwizzleReverse(int64_t &Imm); 1377 1378 OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands); 1379 int64_t parseGPRIdxMacro(); 1380 1381 void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false); } 1382 void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, false); } 1383 void cvtMubufAtomicReturn(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true, true); } 1384 void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, false, true); } 1385 void cvtMtbuf(MCInst &Inst, const OperandVector &Operands); 1386 1387 AMDGPUOperand::Ptr defaultDLC() const; 1388 AMDGPUOperand::Ptr defaultGLC() const; 1389 AMDGPUOperand::Ptr defaultSLC() const; 1390 1391 AMDGPUOperand::Ptr defaultSMRDOffset8() const; 1392 AMDGPUOperand::Ptr defaultSMRDOffset20() const; 1393 AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const; 1394 AMDGPUOperand::Ptr defaultFlatOffset() const; 1395 1396 OperandMatchResultTy parseOModOperand(OperandVector &Operands); 1397 1398 void cvtVOP3(MCInst &Inst, const OperandVector &Operands, 1399 OptionalImmIndexMap &OptionalIdx); 1400 void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands); 1401 void cvtVOP3(MCInst &Inst, const OperandVector &Operands); 1402 void cvtVOP3P(MCInst &Inst, const OperandVector &Operands); 1403 1404 void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands); 1405 1406 void cvtMIMG(MCInst &Inst, const OperandVector &Operands, 1407 bool IsAtomic = false); 1408 void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands); 1409 1410 OperandMatchResultTy parseDim(OperandVector &Operands); 1411 OperandMatchResultTy parseDPP8(OperandVector &Operands); 1412 OperandMatchResultTy parseDPPCtrl(OperandVector &Operands); 1413 AMDGPUOperand::Ptr defaultRowMask() const; 1414 AMDGPUOperand::Ptr defaultBankMask() const; 1415 AMDGPUOperand::Ptr defaultBoundCtrl() const; 1416 AMDGPUOperand::Ptr defaultFI() const; 1417 void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false); 1418 void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); } 1419 1420 OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix, 1421 AMDGPUOperand::ImmTy Type); 1422 OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands); 1423 void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands); 1424 void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands); 1425 void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands); 1426 void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands); 1427 void cvtSDWA(MCInst &Inst, const OperandVector &Operands, 1428 uint64_t BasicInstType, bool skipVcc = false); 1429 1430 AMDGPUOperand::Ptr defaultBLGP() const; 1431 AMDGPUOperand::Ptr defaultCBSZ() const; 1432 AMDGPUOperand::Ptr defaultABID() const; 1433 1434 OperandMatchResultTy parseEndpgmOp(OperandVector &Operands); 1435 AMDGPUOperand::Ptr defaultEndpgmImmOperands() const; 1436 }; 1437 1438 struct OptionalOperand { 1439 const char *Name; 1440 AMDGPUOperand::ImmTy Type; 1441 bool IsBit; 1442 bool (*ConvertResult)(int64_t&); 1443 }; 1444 1445 } // end anonymous namespace 1446 1447 // May be called with integer type with equivalent bitwidth. 1448 static const fltSemantics *getFltSemantics(unsigned Size) { 1449 switch (Size) { 1450 case 4: 1451 return &APFloat::IEEEsingle(); 1452 case 8: 1453 return &APFloat::IEEEdouble(); 1454 case 2: 1455 return &APFloat::IEEEhalf(); 1456 default: 1457 llvm_unreachable("unsupported fp type"); 1458 } 1459 } 1460 1461 static const fltSemantics *getFltSemantics(MVT VT) { 1462 return getFltSemantics(VT.getSizeInBits() / 8); 1463 } 1464 1465 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) { 1466 switch (OperandType) { 1467 case AMDGPU::OPERAND_REG_IMM_INT32: 1468 case AMDGPU::OPERAND_REG_IMM_FP32: 1469 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1470 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1471 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1472 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1473 return &APFloat::IEEEsingle(); 1474 case AMDGPU::OPERAND_REG_IMM_INT64: 1475 case AMDGPU::OPERAND_REG_IMM_FP64: 1476 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1477 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1478 return &APFloat::IEEEdouble(); 1479 case AMDGPU::OPERAND_REG_IMM_INT16: 1480 case AMDGPU::OPERAND_REG_IMM_FP16: 1481 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1482 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1483 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1484 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1485 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1486 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1487 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1488 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1489 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1490 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1491 return &APFloat::IEEEhalf(); 1492 default: 1493 llvm_unreachable("unsupported fp type"); 1494 } 1495 } 1496 1497 //===----------------------------------------------------------------------===// 1498 // Operand 1499 //===----------------------------------------------------------------------===// 1500 1501 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) { 1502 bool Lost; 1503 1504 // Convert literal to single precision 1505 APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT), 1506 APFloat::rmNearestTiesToEven, 1507 &Lost); 1508 // We allow precision lost but not overflow or underflow 1509 if (Status != APFloat::opOK && 1510 Lost && 1511 ((Status & APFloat::opOverflow) != 0 || 1512 (Status & APFloat::opUnderflow) != 0)) { 1513 return false; 1514 } 1515 1516 return true; 1517 } 1518 1519 static bool isSafeTruncation(int64_t Val, unsigned Size) { 1520 return isUIntN(Size, Val) || isIntN(Size, Val); 1521 } 1522 1523 bool AMDGPUOperand::isInlinableImm(MVT type) const { 1524 1525 // This is a hack to enable named inline values like 1526 // shared_base with both 32-bit and 64-bit operands. 1527 // Note that these values are defined as 1528 // 32-bit operands only. 1529 if (isInlineValue()) { 1530 return true; 1531 } 1532 1533 if (!isImmTy(ImmTyNone)) { 1534 // Only plain immediates are inlinable (e.g. "clamp" attribute is not) 1535 return false; 1536 } 1537 // TODO: We should avoid using host float here. It would be better to 1538 // check the float bit values which is what a few other places do. 1539 // We've had bot failures before due to weird NaN support on mips hosts. 1540 1541 APInt Literal(64, Imm.Val); 1542 1543 if (Imm.IsFPImm) { // We got fp literal token 1544 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1545 return AMDGPU::isInlinableLiteral64(Imm.Val, 1546 AsmParser->hasInv2PiInlineImm()); 1547 } 1548 1549 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1550 if (!canLosslesslyConvertToFPType(FPLiteral, type)) 1551 return false; 1552 1553 if (type.getScalarSizeInBits() == 16) { 1554 return AMDGPU::isInlinableLiteral16( 1555 static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1556 AsmParser->hasInv2PiInlineImm()); 1557 } 1558 1559 // Check if single precision literal is inlinable 1560 return AMDGPU::isInlinableLiteral32( 1561 static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()), 1562 AsmParser->hasInv2PiInlineImm()); 1563 } 1564 1565 // We got int literal token. 1566 if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand 1567 return AMDGPU::isInlinableLiteral64(Imm.Val, 1568 AsmParser->hasInv2PiInlineImm()); 1569 } 1570 1571 if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) { 1572 return false; 1573 } 1574 1575 if (type.getScalarSizeInBits() == 16) { 1576 return AMDGPU::isInlinableLiteral16( 1577 static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()), 1578 AsmParser->hasInv2PiInlineImm()); 1579 } 1580 1581 return AMDGPU::isInlinableLiteral32( 1582 static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()), 1583 AsmParser->hasInv2PiInlineImm()); 1584 } 1585 1586 bool AMDGPUOperand::isLiteralImm(MVT type) const { 1587 // Check that this immediate can be added as literal 1588 if (!isImmTy(ImmTyNone)) { 1589 return false; 1590 } 1591 1592 if (!Imm.IsFPImm) { 1593 // We got int literal token. 1594 1595 if (type == MVT::f64 && hasFPModifiers()) { 1596 // Cannot apply fp modifiers to int literals preserving the same semantics 1597 // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity, 1598 // disable these cases. 1599 return false; 1600 } 1601 1602 unsigned Size = type.getSizeInBits(); 1603 if (Size == 64) 1604 Size = 32; 1605 1606 // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP 1607 // types. 1608 return isSafeTruncation(Imm.Val, Size); 1609 } 1610 1611 // We got fp literal token 1612 if (type == MVT::f64) { // Expected 64-bit fp operand 1613 // We would set low 64-bits of literal to zeroes but we accept this literals 1614 return true; 1615 } 1616 1617 if (type == MVT::i64) { // Expected 64-bit int operand 1618 // We don't allow fp literals in 64-bit integer instructions. It is 1619 // unclear how we should encode them. 1620 return false; 1621 } 1622 1623 // We allow fp literals with f16x2 operands assuming that the specified 1624 // literal goes into the lower half and the upper half is zero. We also 1625 // require that the literal may be losslesly converted to f16. 1626 MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 : 1627 (type == MVT::v2i16)? MVT::i16 : type; 1628 1629 APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val)); 1630 return canLosslesslyConvertToFPType(FPLiteral, ExpectedType); 1631 } 1632 1633 bool AMDGPUOperand::isRegClass(unsigned RCID) const { 1634 return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg()); 1635 } 1636 1637 bool AMDGPUOperand::isSDWAOperand(MVT type) const { 1638 if (AsmParser->isVI()) 1639 return isVReg32(); 1640 else if (AsmParser->isGFX9() || AsmParser->isGFX10()) 1641 return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type); 1642 else 1643 return false; 1644 } 1645 1646 bool AMDGPUOperand::isSDWAFP16Operand() const { 1647 return isSDWAOperand(MVT::f16); 1648 } 1649 1650 bool AMDGPUOperand::isSDWAFP32Operand() const { 1651 return isSDWAOperand(MVT::f32); 1652 } 1653 1654 bool AMDGPUOperand::isSDWAInt16Operand() const { 1655 return isSDWAOperand(MVT::i16); 1656 } 1657 1658 bool AMDGPUOperand::isSDWAInt32Operand() const { 1659 return isSDWAOperand(MVT::i32); 1660 } 1661 1662 bool AMDGPUOperand::isBoolReg() const { 1663 return (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) || 1664 (AsmParser->getFeatureBits()[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()); 1665 } 1666 1667 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const 1668 { 1669 assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1670 assert(Size == 2 || Size == 4 || Size == 8); 1671 1672 const uint64_t FpSignMask = (1ULL << (Size * 8 - 1)); 1673 1674 if (Imm.Mods.Abs) { 1675 Val &= ~FpSignMask; 1676 } 1677 if (Imm.Mods.Neg) { 1678 Val ^= FpSignMask; 1679 } 1680 1681 return Val; 1682 } 1683 1684 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const { 1685 if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()), 1686 Inst.getNumOperands())) { 1687 addLiteralImmOperand(Inst, Imm.Val, 1688 ApplyModifiers & 1689 isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers()); 1690 } else { 1691 assert(!isImmTy(ImmTyNone) || !hasModifiers()); 1692 Inst.addOperand(MCOperand::createImm(Imm.Val)); 1693 } 1694 } 1695 1696 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const { 1697 const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode()); 1698 auto OpNum = Inst.getNumOperands(); 1699 // Check that this operand accepts literals 1700 assert(AMDGPU::isSISrcOperand(InstDesc, OpNum)); 1701 1702 if (ApplyModifiers) { 1703 assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum)); 1704 const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum); 1705 Val = applyInputFPModifiers(Val, Size); 1706 } 1707 1708 APInt Literal(64, Val); 1709 uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType; 1710 1711 if (Imm.IsFPImm) { // We got fp literal token 1712 switch (OpTy) { 1713 case AMDGPU::OPERAND_REG_IMM_INT64: 1714 case AMDGPU::OPERAND_REG_IMM_FP64: 1715 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1716 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1717 if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(), 1718 AsmParser->hasInv2PiInlineImm())) { 1719 Inst.addOperand(MCOperand::createImm(Literal.getZExtValue())); 1720 return; 1721 } 1722 1723 // Non-inlineable 1724 if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand 1725 // For fp operands we check if low 32 bits are zeros 1726 if (Literal.getLoBits(32) != 0) { 1727 const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(), 1728 "Can't encode literal as exact 64-bit floating-point operand. " 1729 "Low 32-bits will be set to zero"); 1730 } 1731 1732 Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue())); 1733 return; 1734 } 1735 1736 // We don't allow fp literals in 64-bit integer instructions. It is 1737 // unclear how we should encode them. This case should be checked earlier 1738 // in predicate methods (isLiteralImm()) 1739 llvm_unreachable("fp literal in 64-bit integer instruction."); 1740 1741 case AMDGPU::OPERAND_REG_IMM_INT32: 1742 case AMDGPU::OPERAND_REG_IMM_FP32: 1743 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1744 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1745 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1746 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1747 case AMDGPU::OPERAND_REG_IMM_INT16: 1748 case AMDGPU::OPERAND_REG_IMM_FP16: 1749 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1750 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1751 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1752 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1753 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1754 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1755 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1756 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1757 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1758 case AMDGPU::OPERAND_REG_IMM_V2FP16: { 1759 bool lost; 1760 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1761 // Convert literal to single precision 1762 FPLiteral.convert(*getOpFltSemantics(OpTy), 1763 APFloat::rmNearestTiesToEven, &lost); 1764 // We allow precision lost but not overflow or underflow. This should be 1765 // checked earlier in isLiteralImm() 1766 1767 uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue(); 1768 Inst.addOperand(MCOperand::createImm(ImmVal)); 1769 return; 1770 } 1771 default: 1772 llvm_unreachable("invalid operand size"); 1773 } 1774 1775 return; 1776 } 1777 1778 // We got int literal token. 1779 // Only sign extend inline immediates. 1780 switch (OpTy) { 1781 case AMDGPU::OPERAND_REG_IMM_INT32: 1782 case AMDGPU::OPERAND_REG_IMM_FP32: 1783 case AMDGPU::OPERAND_REG_INLINE_C_INT32: 1784 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1785 case AMDGPU::OPERAND_REG_INLINE_AC_INT32: 1786 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1787 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1788 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1789 if (isSafeTruncation(Val, 32) && 1790 AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val), 1791 AsmParser->hasInv2PiInlineImm())) { 1792 Inst.addOperand(MCOperand::createImm(Val)); 1793 return; 1794 } 1795 1796 Inst.addOperand(MCOperand::createImm(Val & 0xffffffff)); 1797 return; 1798 1799 case AMDGPU::OPERAND_REG_IMM_INT64: 1800 case AMDGPU::OPERAND_REG_IMM_FP64: 1801 case AMDGPU::OPERAND_REG_INLINE_C_INT64: 1802 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1803 if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) { 1804 Inst.addOperand(MCOperand::createImm(Val)); 1805 return; 1806 } 1807 1808 Inst.addOperand(MCOperand::createImm(Lo_32(Val))); 1809 return; 1810 1811 case AMDGPU::OPERAND_REG_IMM_INT16: 1812 case AMDGPU::OPERAND_REG_IMM_FP16: 1813 case AMDGPU::OPERAND_REG_INLINE_C_INT16: 1814 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1815 case AMDGPU::OPERAND_REG_INLINE_AC_INT16: 1816 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1817 if (isSafeTruncation(Val, 16) && 1818 AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1819 AsmParser->hasInv2PiInlineImm())) { 1820 Inst.addOperand(MCOperand::createImm(Val)); 1821 return; 1822 } 1823 1824 Inst.addOperand(MCOperand::createImm(Val & 0xffff)); 1825 return; 1826 1827 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1828 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1829 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1830 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: { 1831 assert(isSafeTruncation(Val, 16)); 1832 assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val), 1833 AsmParser->hasInv2PiInlineImm())); 1834 1835 Inst.addOperand(MCOperand::createImm(Val)); 1836 return; 1837 } 1838 default: 1839 llvm_unreachable("invalid operand size"); 1840 } 1841 } 1842 1843 template <unsigned Bitwidth> 1844 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const { 1845 APInt Literal(64, Imm.Val); 1846 1847 if (!Imm.IsFPImm) { 1848 // We got int literal token. 1849 Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue())); 1850 return; 1851 } 1852 1853 bool Lost; 1854 APFloat FPLiteral(APFloat::IEEEdouble(), Literal); 1855 FPLiteral.convert(*getFltSemantics(Bitwidth / 8), 1856 APFloat::rmNearestTiesToEven, &Lost); 1857 Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue())); 1858 } 1859 1860 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const { 1861 Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI()))); 1862 } 1863 1864 static bool isInlineValue(unsigned Reg) { 1865 switch (Reg) { 1866 case AMDGPU::SRC_SHARED_BASE: 1867 case AMDGPU::SRC_SHARED_LIMIT: 1868 case AMDGPU::SRC_PRIVATE_BASE: 1869 case AMDGPU::SRC_PRIVATE_LIMIT: 1870 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 1871 return true; 1872 case AMDGPU::SRC_VCCZ: 1873 case AMDGPU::SRC_EXECZ: 1874 case AMDGPU::SRC_SCC: 1875 return true; 1876 case AMDGPU::SGPR_NULL: 1877 return true; 1878 default: 1879 return false; 1880 } 1881 } 1882 1883 bool AMDGPUOperand::isInlineValue() const { 1884 return isRegKind() && ::isInlineValue(getReg()); 1885 } 1886 1887 //===----------------------------------------------------------------------===// 1888 // AsmParser 1889 //===----------------------------------------------------------------------===// 1890 1891 static int getRegClass(RegisterKind Is, unsigned RegWidth) { 1892 if (Is == IS_VGPR) { 1893 switch (RegWidth) { 1894 default: return -1; 1895 case 1: return AMDGPU::VGPR_32RegClassID; 1896 case 2: return AMDGPU::VReg_64RegClassID; 1897 case 3: return AMDGPU::VReg_96RegClassID; 1898 case 4: return AMDGPU::VReg_128RegClassID; 1899 case 5: return AMDGPU::VReg_160RegClassID; 1900 case 8: return AMDGPU::VReg_256RegClassID; 1901 case 16: return AMDGPU::VReg_512RegClassID; 1902 case 32: return AMDGPU::VReg_1024RegClassID; 1903 } 1904 } else if (Is == IS_TTMP) { 1905 switch (RegWidth) { 1906 default: return -1; 1907 case 1: return AMDGPU::TTMP_32RegClassID; 1908 case 2: return AMDGPU::TTMP_64RegClassID; 1909 case 4: return AMDGPU::TTMP_128RegClassID; 1910 case 8: return AMDGPU::TTMP_256RegClassID; 1911 case 16: return AMDGPU::TTMP_512RegClassID; 1912 } 1913 } else if (Is == IS_SGPR) { 1914 switch (RegWidth) { 1915 default: return -1; 1916 case 1: return AMDGPU::SGPR_32RegClassID; 1917 case 2: return AMDGPU::SGPR_64RegClassID; 1918 case 4: return AMDGPU::SGPR_128RegClassID; 1919 case 8: return AMDGPU::SGPR_256RegClassID; 1920 case 16: return AMDGPU::SGPR_512RegClassID; 1921 } 1922 } else if (Is == IS_AGPR) { 1923 switch (RegWidth) { 1924 default: return -1; 1925 case 1: return AMDGPU::AGPR_32RegClassID; 1926 case 2: return AMDGPU::AReg_64RegClassID; 1927 case 4: return AMDGPU::AReg_128RegClassID; 1928 case 16: return AMDGPU::AReg_512RegClassID; 1929 case 32: return AMDGPU::AReg_1024RegClassID; 1930 } 1931 } 1932 return -1; 1933 } 1934 1935 static unsigned getSpecialRegForName(StringRef RegName) { 1936 return StringSwitch<unsigned>(RegName) 1937 .Case("exec", AMDGPU::EXEC) 1938 .Case("vcc", AMDGPU::VCC) 1939 .Case("flat_scratch", AMDGPU::FLAT_SCR) 1940 .Case("xnack_mask", AMDGPU::XNACK_MASK) 1941 .Case("shared_base", AMDGPU::SRC_SHARED_BASE) 1942 .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE) 1943 .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1944 .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT) 1945 .Case("private_base", AMDGPU::SRC_PRIVATE_BASE) 1946 .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE) 1947 .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1948 .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT) 1949 .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1950 .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID) 1951 .Case("lds_direct", AMDGPU::LDS_DIRECT) 1952 .Case("src_lds_direct", AMDGPU::LDS_DIRECT) 1953 .Case("m0", AMDGPU::M0) 1954 .Case("vccz", AMDGPU::SRC_VCCZ) 1955 .Case("src_vccz", AMDGPU::SRC_VCCZ) 1956 .Case("execz", AMDGPU::SRC_EXECZ) 1957 .Case("src_execz", AMDGPU::SRC_EXECZ) 1958 .Case("scc", AMDGPU::SRC_SCC) 1959 .Case("src_scc", AMDGPU::SRC_SCC) 1960 .Case("tba", AMDGPU::TBA) 1961 .Case("tma", AMDGPU::TMA) 1962 .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO) 1963 .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI) 1964 .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO) 1965 .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI) 1966 .Case("vcc_lo", AMDGPU::VCC_LO) 1967 .Case("vcc_hi", AMDGPU::VCC_HI) 1968 .Case("exec_lo", AMDGPU::EXEC_LO) 1969 .Case("exec_hi", AMDGPU::EXEC_HI) 1970 .Case("tma_lo", AMDGPU::TMA_LO) 1971 .Case("tma_hi", AMDGPU::TMA_HI) 1972 .Case("tba_lo", AMDGPU::TBA_LO) 1973 .Case("tba_hi", AMDGPU::TBA_HI) 1974 .Case("null", AMDGPU::SGPR_NULL) 1975 .Default(AMDGPU::NoRegister); 1976 } 1977 1978 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc, 1979 SMLoc &EndLoc) { 1980 auto R = parseRegister(); 1981 if (!R) return true; 1982 assert(R->isReg()); 1983 RegNo = R->getReg(); 1984 StartLoc = R->getStartLoc(); 1985 EndLoc = R->getEndLoc(); 1986 return false; 1987 } 1988 1989 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth, 1990 RegisterKind RegKind, unsigned Reg1) { 1991 switch (RegKind) { 1992 case IS_SPECIAL: 1993 if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) { 1994 Reg = AMDGPU::EXEC; 1995 RegWidth = 2; 1996 return true; 1997 } 1998 if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) { 1999 Reg = AMDGPU::FLAT_SCR; 2000 RegWidth = 2; 2001 return true; 2002 } 2003 if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) { 2004 Reg = AMDGPU::XNACK_MASK; 2005 RegWidth = 2; 2006 return true; 2007 } 2008 if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) { 2009 Reg = AMDGPU::VCC; 2010 RegWidth = 2; 2011 return true; 2012 } 2013 if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) { 2014 Reg = AMDGPU::TBA; 2015 RegWidth = 2; 2016 return true; 2017 } 2018 if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) { 2019 Reg = AMDGPU::TMA; 2020 RegWidth = 2; 2021 return true; 2022 } 2023 return false; 2024 case IS_VGPR: 2025 case IS_SGPR: 2026 case IS_AGPR: 2027 case IS_TTMP: 2028 if (Reg1 != Reg + RegWidth) { 2029 return false; 2030 } 2031 RegWidth++; 2032 return true; 2033 default: 2034 llvm_unreachable("unexpected register kind"); 2035 } 2036 } 2037 2038 struct RegInfo { 2039 StringLiteral Name; 2040 RegisterKind Kind; 2041 }; 2042 2043 static constexpr RegInfo RegularRegisters[] = { 2044 {{"v"}, IS_VGPR}, 2045 {{"s"}, IS_SGPR}, 2046 {{"ttmp"}, IS_TTMP}, 2047 {{"acc"}, IS_AGPR}, 2048 {{"a"}, IS_AGPR}, 2049 }; 2050 2051 static bool isRegularReg(RegisterKind Kind) { 2052 return Kind == IS_VGPR || 2053 Kind == IS_SGPR || 2054 Kind == IS_TTMP || 2055 Kind == IS_AGPR; 2056 } 2057 2058 static const RegInfo* getRegularRegInfo(StringRef Str) { 2059 for (const RegInfo &Reg : RegularRegisters) 2060 if (Str.startswith(Reg.Name)) 2061 return &Reg; 2062 return nullptr; 2063 } 2064 2065 static bool getRegNum(StringRef Str, unsigned& Num) { 2066 return !Str.getAsInteger(10, Num); 2067 } 2068 2069 bool 2070 AMDGPUAsmParser::isRegister(const AsmToken &Token, 2071 const AsmToken &NextToken) const { 2072 2073 // A list of consecutive registers: [s0,s1,s2,s3] 2074 if (Token.is(AsmToken::LBrac)) 2075 return true; 2076 2077 if (!Token.is(AsmToken::Identifier)) 2078 return false; 2079 2080 // A single register like s0 or a range of registers like s[0:1] 2081 2082 StringRef Str = Token.getString(); 2083 const RegInfo *Reg = getRegularRegInfo(Str); 2084 if (Reg) { 2085 StringRef RegName = Reg->Name; 2086 StringRef RegSuffix = Str.substr(RegName.size()); 2087 if (!RegSuffix.empty()) { 2088 unsigned Num; 2089 // A single register with an index: rXX 2090 if (getRegNum(RegSuffix, Num)) 2091 return true; 2092 } else { 2093 // A range of registers: r[XX:YY]. 2094 if (NextToken.is(AsmToken::LBrac)) 2095 return true; 2096 } 2097 } 2098 2099 return getSpecialRegForName(Str) != AMDGPU::NoRegister; 2100 } 2101 2102 bool 2103 AMDGPUAsmParser::isRegister() 2104 { 2105 return isRegister(getToken(), peekToken()); 2106 } 2107 2108 unsigned 2109 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind, 2110 unsigned RegNum, 2111 unsigned RegWidth) { 2112 2113 assert(isRegularReg(RegKind)); 2114 2115 unsigned AlignSize = 1; 2116 if (RegKind == IS_SGPR || RegKind == IS_TTMP) { 2117 // SGPR and TTMP registers must be aligned. 2118 // Max required alignment is 4 dwords. 2119 AlignSize = std::min(RegWidth, 4u); 2120 } 2121 2122 if (RegNum % AlignSize != 0) 2123 return AMDGPU::NoRegister; 2124 2125 unsigned RegIdx = RegNum / AlignSize; 2126 int RCID = getRegClass(RegKind, RegWidth); 2127 if (RCID == -1) 2128 return AMDGPU::NoRegister; 2129 2130 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2131 const MCRegisterClass RC = TRI->getRegClass(RCID); 2132 if (RegIdx >= RC.getNumRegs()) 2133 return AMDGPU::NoRegister; 2134 2135 return RC.getRegister(RegIdx); 2136 } 2137 2138 bool 2139 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) { 2140 int64_t RegLo, RegHi; 2141 if (!trySkipToken(AsmToken::LBrac)) 2142 return false; 2143 2144 if (!parseExpr(RegLo)) 2145 return false; 2146 2147 if (trySkipToken(AsmToken::Colon)) { 2148 if (!parseExpr(RegHi)) 2149 return false; 2150 } else { 2151 RegHi = RegLo; 2152 } 2153 2154 if (!trySkipToken(AsmToken::RBrac)) 2155 return false; 2156 2157 if (!isUInt<32>(RegLo) || !isUInt<32>(RegHi) || RegLo > RegHi) 2158 return false; 2159 2160 Num = static_cast<unsigned>(RegLo); 2161 Width = (RegHi - RegLo) + 1; 2162 return true; 2163 } 2164 2165 unsigned 2166 AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind, 2167 unsigned &RegNum, 2168 unsigned &RegWidth) { 2169 assert(isToken(AsmToken::Identifier)); 2170 unsigned Reg = getSpecialRegForName(getTokenStr()); 2171 if (Reg) { 2172 RegNum = 0; 2173 RegWidth = 1; 2174 RegKind = IS_SPECIAL; 2175 lex(); // skip register name 2176 } 2177 return Reg; 2178 } 2179 2180 unsigned 2181 AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind, 2182 unsigned &RegNum, 2183 unsigned &RegWidth) { 2184 assert(isToken(AsmToken::Identifier)); 2185 StringRef RegName = getTokenStr(); 2186 2187 const RegInfo *RI = getRegularRegInfo(RegName); 2188 if (!RI) 2189 return AMDGPU::NoRegister; 2190 lex(); // skip register name 2191 2192 RegKind = RI->Kind; 2193 StringRef RegSuffix = RegName.substr(RI->Name.size()); 2194 if (!RegSuffix.empty()) { 2195 // Single 32-bit register: vXX. 2196 if (!getRegNum(RegSuffix, RegNum)) 2197 return AMDGPU::NoRegister; 2198 RegWidth = 1; 2199 } else { 2200 // Range of registers: v[XX:YY]. ":YY" is optional. 2201 if (!ParseRegRange(RegNum, RegWidth)) 2202 return AMDGPU::NoRegister; 2203 } 2204 2205 return getRegularReg(RegKind, RegNum, RegWidth); 2206 } 2207 2208 unsigned 2209 AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, 2210 unsigned &RegNum, 2211 unsigned &RegWidth) { 2212 unsigned Reg = AMDGPU::NoRegister; 2213 2214 if (!trySkipToken(AsmToken::LBrac)) 2215 return AMDGPU::NoRegister; 2216 2217 // List of consecutive registers, e.g.: [s0,s1,s2,s3] 2218 2219 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) 2220 return AMDGPU::NoRegister; 2221 if (RegWidth != 1) 2222 return AMDGPU::NoRegister; 2223 2224 for (; trySkipToken(AsmToken::Comma); ) { 2225 RegisterKind NextRegKind; 2226 unsigned NextReg, NextRegNum, NextRegWidth; 2227 2228 if (!ParseAMDGPURegister(NextRegKind, NextReg, NextRegNum, NextRegWidth)) 2229 return AMDGPU::NoRegister; 2230 if (NextRegWidth != 1) 2231 return AMDGPU::NoRegister; 2232 if (NextRegKind != RegKind) 2233 return AMDGPU::NoRegister; 2234 if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg)) 2235 return AMDGPU::NoRegister; 2236 } 2237 2238 if (!trySkipToken(AsmToken::RBrac)) 2239 return AMDGPU::NoRegister; 2240 2241 if (isRegularReg(RegKind)) 2242 Reg = getRegularReg(RegKind, RegNum, RegWidth); 2243 2244 return Reg; 2245 } 2246 2247 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, 2248 unsigned &Reg, 2249 unsigned &RegNum, 2250 unsigned &RegWidth) { 2251 Reg = AMDGPU::NoRegister; 2252 2253 if (isToken(AsmToken::Identifier)) { 2254 Reg = ParseSpecialReg(RegKind, RegNum, RegWidth); 2255 if (Reg == AMDGPU::NoRegister) 2256 Reg = ParseRegularReg(RegKind, RegNum, RegWidth); 2257 } else { 2258 Reg = ParseRegList(RegKind, RegNum, RegWidth); 2259 } 2260 2261 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2262 return Reg != AMDGPU::NoRegister && subtargetHasRegister(*TRI, Reg); 2263 } 2264 2265 Optional<StringRef> 2266 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) { 2267 switch (RegKind) { 2268 case IS_VGPR: 2269 return StringRef(".amdgcn.next_free_vgpr"); 2270 case IS_SGPR: 2271 return StringRef(".amdgcn.next_free_sgpr"); 2272 default: 2273 return None; 2274 } 2275 } 2276 2277 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) { 2278 auto SymbolName = getGprCountSymbolName(RegKind); 2279 assert(SymbolName && "initializing invalid register kind"); 2280 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2281 Sym->setVariableValue(MCConstantExpr::create(0, getContext())); 2282 } 2283 2284 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind, 2285 unsigned DwordRegIndex, 2286 unsigned RegWidth) { 2287 // Symbols are only defined for GCN targets 2288 if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6) 2289 return true; 2290 2291 auto SymbolName = getGprCountSymbolName(RegKind); 2292 if (!SymbolName) 2293 return true; 2294 MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName); 2295 2296 int64_t NewMax = DwordRegIndex + RegWidth - 1; 2297 int64_t OldCount; 2298 2299 if (!Sym->isVariable()) 2300 return !Error(getParser().getTok().getLoc(), 2301 ".amdgcn.next_free_{v,s}gpr symbols must be variable"); 2302 if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount)) 2303 return !Error( 2304 getParser().getTok().getLoc(), 2305 ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions"); 2306 2307 if (OldCount <= NewMax) 2308 Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext())); 2309 2310 return true; 2311 } 2312 2313 std::unique_ptr<AMDGPUOperand> AMDGPUAsmParser::parseRegister() { 2314 const auto &Tok = Parser.getTok(); 2315 SMLoc StartLoc = Tok.getLoc(); 2316 SMLoc EndLoc = Tok.getEndLoc(); 2317 RegisterKind RegKind; 2318 unsigned Reg, RegNum, RegWidth; 2319 2320 if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) { 2321 //FIXME: improve error messages (bug 41303). 2322 Error(StartLoc, "not a valid operand."); 2323 return nullptr; 2324 } 2325 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 2326 if (!updateGprCountSymbols(RegKind, RegNum, RegWidth)) 2327 return nullptr; 2328 } else 2329 KernelScope.usesRegister(RegKind, RegNum, RegWidth); 2330 return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc); 2331 } 2332 2333 OperandMatchResultTy 2334 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) { 2335 // TODO: add syntactic sugar for 1/(2*PI) 2336 2337 assert(!isRegister()); 2338 assert(!isModifier()); 2339 2340 const auto& Tok = getToken(); 2341 const auto& NextTok = peekToken(); 2342 bool IsReal = Tok.is(AsmToken::Real); 2343 SMLoc S = getLoc(); 2344 bool Negate = false; 2345 2346 if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) { 2347 lex(); 2348 IsReal = true; 2349 Negate = true; 2350 } 2351 2352 if (IsReal) { 2353 // Floating-point expressions are not supported. 2354 // Can only allow floating-point literals with an 2355 // optional sign. 2356 2357 StringRef Num = getTokenStr(); 2358 lex(); 2359 2360 APFloat RealVal(APFloat::IEEEdouble()); 2361 auto roundMode = APFloat::rmNearestTiesToEven; 2362 if (RealVal.convertFromString(Num, roundMode) == APFloat::opInvalidOp) { 2363 return MatchOperand_ParseFail; 2364 } 2365 if (Negate) 2366 RealVal.changeSign(); 2367 2368 Operands.push_back( 2369 AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S, 2370 AMDGPUOperand::ImmTyNone, true)); 2371 2372 return MatchOperand_Success; 2373 2374 } else { 2375 int64_t IntVal; 2376 const MCExpr *Expr; 2377 SMLoc S = getLoc(); 2378 2379 if (HasSP3AbsModifier) { 2380 // This is a workaround for handling expressions 2381 // as arguments of SP3 'abs' modifier, for example: 2382 // |1.0| 2383 // |-1| 2384 // |1+x| 2385 // This syntax is not compatible with syntax of standard 2386 // MC expressions (due to the trailing '|'). 2387 SMLoc EndLoc; 2388 if (getParser().parsePrimaryExpr(Expr, EndLoc)) 2389 return MatchOperand_ParseFail; 2390 } else { 2391 if (Parser.parseExpression(Expr)) 2392 return MatchOperand_ParseFail; 2393 } 2394 2395 if (Expr->evaluateAsAbsolute(IntVal)) { 2396 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 2397 } else { 2398 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 2399 } 2400 2401 return MatchOperand_Success; 2402 } 2403 2404 return MatchOperand_NoMatch; 2405 } 2406 2407 OperandMatchResultTy 2408 AMDGPUAsmParser::parseReg(OperandVector &Operands) { 2409 if (!isRegister()) 2410 return MatchOperand_NoMatch; 2411 2412 if (auto R = parseRegister()) { 2413 assert(R->isReg()); 2414 Operands.push_back(std::move(R)); 2415 return MatchOperand_Success; 2416 } 2417 return MatchOperand_ParseFail; 2418 } 2419 2420 OperandMatchResultTy 2421 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) { 2422 auto res = parseReg(Operands); 2423 if (res != MatchOperand_NoMatch) { 2424 return res; 2425 } else if (isModifier()) { 2426 return MatchOperand_NoMatch; 2427 } else { 2428 return parseImm(Operands, HasSP3AbsMod); 2429 } 2430 } 2431 2432 bool 2433 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2434 if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) { 2435 const auto &str = Token.getString(); 2436 return str == "abs" || str == "neg" || str == "sext"; 2437 } 2438 return false; 2439 } 2440 2441 bool 2442 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const { 2443 return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon); 2444 } 2445 2446 bool 2447 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2448 return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe); 2449 } 2450 2451 bool 2452 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const { 2453 return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken); 2454 } 2455 2456 // Check if this is an operand modifier or an opcode modifier 2457 // which may look like an expression but it is not. We should 2458 // avoid parsing these modifiers as expressions. Currently 2459 // recognized sequences are: 2460 // |...| 2461 // abs(...) 2462 // neg(...) 2463 // sext(...) 2464 // -reg 2465 // -|...| 2466 // -abs(...) 2467 // name:... 2468 // Note that simple opcode modifiers like 'gds' may be parsed as 2469 // expressions; this is a special case. See getExpressionAsToken. 2470 // 2471 bool 2472 AMDGPUAsmParser::isModifier() { 2473 2474 AsmToken Tok = getToken(); 2475 AsmToken NextToken[2]; 2476 peekTokens(NextToken); 2477 2478 return isOperandModifier(Tok, NextToken[0]) || 2479 (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) || 2480 isOpcodeModifierWithVal(Tok, NextToken[0]); 2481 } 2482 2483 // Check if the current token is an SP3 'neg' modifier. 2484 // Currently this modifier is allowed in the following context: 2485 // 2486 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]". 2487 // 2. Before an 'abs' modifier: -abs(...) 2488 // 3. Before an SP3 'abs' modifier: -|...| 2489 // 2490 // In all other cases "-" is handled as a part 2491 // of an expression that follows the sign. 2492 // 2493 // Note: When "-" is followed by an integer literal, 2494 // this is interpreted as integer negation rather 2495 // than a floating-point NEG modifier applied to N. 2496 // Beside being contr-intuitive, such use of floating-point 2497 // NEG modifier would have resulted in different meaning 2498 // of integer literals used with VOP1/2/C and VOP3, 2499 // for example: 2500 // v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF 2501 // v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001 2502 // Negative fp literals with preceding "-" are 2503 // handled likewise for unifomtity 2504 // 2505 bool 2506 AMDGPUAsmParser::parseSP3NegModifier() { 2507 2508 AsmToken NextToken[2]; 2509 peekTokens(NextToken); 2510 2511 if (isToken(AsmToken::Minus) && 2512 (isRegister(NextToken[0], NextToken[1]) || 2513 NextToken[0].is(AsmToken::Pipe) || 2514 isId(NextToken[0], "abs"))) { 2515 lex(); 2516 return true; 2517 } 2518 2519 return false; 2520 } 2521 2522 OperandMatchResultTy 2523 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands, 2524 bool AllowImm) { 2525 bool Neg, SP3Neg; 2526 bool Abs, SP3Abs; 2527 SMLoc Loc; 2528 2529 // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead. 2530 if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) { 2531 Error(getLoc(), "invalid syntax, expected 'neg' modifier"); 2532 return MatchOperand_ParseFail; 2533 } 2534 2535 SP3Neg = parseSP3NegModifier(); 2536 2537 Loc = getLoc(); 2538 Neg = trySkipId("neg"); 2539 if (Neg && SP3Neg) { 2540 Error(Loc, "expected register or immediate"); 2541 return MatchOperand_ParseFail; 2542 } 2543 if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg")) 2544 return MatchOperand_ParseFail; 2545 2546 Abs = trySkipId("abs"); 2547 if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs")) 2548 return MatchOperand_ParseFail; 2549 2550 Loc = getLoc(); 2551 SP3Abs = trySkipToken(AsmToken::Pipe); 2552 if (Abs && SP3Abs) { 2553 Error(Loc, "expected register or immediate"); 2554 return MatchOperand_ParseFail; 2555 } 2556 2557 OperandMatchResultTy Res; 2558 if (AllowImm) { 2559 Res = parseRegOrImm(Operands, SP3Abs); 2560 } else { 2561 Res = parseReg(Operands); 2562 } 2563 if (Res != MatchOperand_Success) { 2564 return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res; 2565 } 2566 2567 if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar")) 2568 return MatchOperand_ParseFail; 2569 if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2570 return MatchOperand_ParseFail; 2571 if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2572 return MatchOperand_ParseFail; 2573 2574 AMDGPUOperand::Modifiers Mods; 2575 Mods.Abs = Abs || SP3Abs; 2576 Mods.Neg = Neg || SP3Neg; 2577 2578 if (Mods.hasFPModifiers()) { 2579 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2580 if (Op.isExpr()) { 2581 Error(Op.getStartLoc(), "expected an absolute expression"); 2582 return MatchOperand_ParseFail; 2583 } 2584 Op.setModifiers(Mods); 2585 } 2586 return MatchOperand_Success; 2587 } 2588 2589 OperandMatchResultTy 2590 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands, 2591 bool AllowImm) { 2592 bool Sext = trySkipId("sext"); 2593 if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext")) 2594 return MatchOperand_ParseFail; 2595 2596 OperandMatchResultTy Res; 2597 if (AllowImm) { 2598 Res = parseRegOrImm(Operands); 2599 } else { 2600 Res = parseReg(Operands); 2601 } 2602 if (Res != MatchOperand_Success) { 2603 return Sext? MatchOperand_ParseFail : Res; 2604 } 2605 2606 if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses")) 2607 return MatchOperand_ParseFail; 2608 2609 AMDGPUOperand::Modifiers Mods; 2610 Mods.Sext = Sext; 2611 2612 if (Mods.hasIntModifiers()) { 2613 AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back()); 2614 if (Op.isExpr()) { 2615 Error(Op.getStartLoc(), "expected an absolute expression"); 2616 return MatchOperand_ParseFail; 2617 } 2618 Op.setModifiers(Mods); 2619 } 2620 2621 return MatchOperand_Success; 2622 } 2623 2624 OperandMatchResultTy 2625 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) { 2626 return parseRegOrImmWithFPInputMods(Operands, false); 2627 } 2628 2629 OperandMatchResultTy 2630 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) { 2631 return parseRegOrImmWithIntInputMods(Operands, false); 2632 } 2633 2634 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) { 2635 auto Loc = getLoc(); 2636 if (trySkipId("off")) { 2637 Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc, 2638 AMDGPUOperand::ImmTyOff, false)); 2639 return MatchOperand_Success; 2640 } 2641 2642 if (!isRegister()) 2643 return MatchOperand_NoMatch; 2644 2645 std::unique_ptr<AMDGPUOperand> Reg = parseRegister(); 2646 if (Reg) { 2647 Operands.push_back(std::move(Reg)); 2648 return MatchOperand_Success; 2649 } 2650 2651 return MatchOperand_ParseFail; 2652 2653 } 2654 2655 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) { 2656 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 2657 2658 if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) || 2659 (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) || 2660 (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) || 2661 (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) ) 2662 return Match_InvalidOperand; 2663 2664 if ((TSFlags & SIInstrFlags::VOP3) && 2665 (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) && 2666 getForcedEncodingSize() != 64) 2667 return Match_PreferE32; 2668 2669 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 2670 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 2671 // v_mac_f32/16 allow only dst_sel == DWORD; 2672 auto OpNum = 2673 AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel); 2674 const auto &Op = Inst.getOperand(OpNum); 2675 if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) { 2676 return Match_InvalidOperand; 2677 } 2678 } 2679 2680 return Match_Success; 2681 } 2682 2683 // What asm variants we should check 2684 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const { 2685 if (getForcedEncodingSize() == 32) { 2686 static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT}; 2687 return makeArrayRef(Variants); 2688 } 2689 2690 if (isForcedVOP3()) { 2691 static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3}; 2692 return makeArrayRef(Variants); 2693 } 2694 2695 if (isForcedSDWA()) { 2696 static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA, 2697 AMDGPUAsmVariants::SDWA9}; 2698 return makeArrayRef(Variants); 2699 } 2700 2701 if (isForcedDPP()) { 2702 static const unsigned Variants[] = {AMDGPUAsmVariants::DPP}; 2703 return makeArrayRef(Variants); 2704 } 2705 2706 static const unsigned Variants[] = { 2707 AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3, 2708 AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP 2709 }; 2710 2711 return makeArrayRef(Variants); 2712 } 2713 2714 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const { 2715 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2716 const unsigned Num = Desc.getNumImplicitUses(); 2717 for (unsigned i = 0; i < Num; ++i) { 2718 unsigned Reg = Desc.ImplicitUses[i]; 2719 switch (Reg) { 2720 case AMDGPU::FLAT_SCR: 2721 case AMDGPU::VCC: 2722 case AMDGPU::VCC_LO: 2723 case AMDGPU::VCC_HI: 2724 case AMDGPU::M0: 2725 return Reg; 2726 default: 2727 break; 2728 } 2729 } 2730 return AMDGPU::NoRegister; 2731 } 2732 2733 // NB: This code is correct only when used to check constant 2734 // bus limitations because GFX7 support no f16 inline constants. 2735 // Note that there are no cases when a GFX7 opcode violates 2736 // constant bus limitations due to the use of an f16 constant. 2737 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst, 2738 unsigned OpIdx) const { 2739 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 2740 2741 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) { 2742 return false; 2743 } 2744 2745 const MCOperand &MO = Inst.getOperand(OpIdx); 2746 2747 int64_t Val = MO.getImm(); 2748 auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx); 2749 2750 switch (OpSize) { // expected operand size 2751 case 8: 2752 return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm()); 2753 case 4: 2754 return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm()); 2755 case 2: { 2756 const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType; 2757 if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 || 2758 OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 || 2759 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 || 2760 OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 || 2761 OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16 || 2762 OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16) { 2763 return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm()); 2764 } else { 2765 return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm()); 2766 } 2767 } 2768 default: 2769 llvm_unreachable("invalid operand size"); 2770 } 2771 } 2772 2773 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const { 2774 if (!isGFX10()) 2775 return 1; 2776 2777 switch (Opcode) { 2778 // 64-bit shift instructions can use only one scalar value input 2779 case AMDGPU::V_LSHLREV_B64: 2780 case AMDGPU::V_LSHLREV_B64_gfx10: 2781 case AMDGPU::V_LSHL_B64: 2782 case AMDGPU::V_LSHRREV_B64: 2783 case AMDGPU::V_LSHRREV_B64_gfx10: 2784 case AMDGPU::V_LSHR_B64: 2785 case AMDGPU::V_ASHRREV_I64: 2786 case AMDGPU::V_ASHRREV_I64_gfx10: 2787 case AMDGPU::V_ASHR_I64: 2788 return 1; 2789 default: 2790 return 2; 2791 } 2792 } 2793 2794 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) { 2795 const MCOperand &MO = Inst.getOperand(OpIdx); 2796 if (MO.isImm()) { 2797 return !isInlineConstant(Inst, OpIdx); 2798 } else if (MO.isReg()) { 2799 auto Reg = MO.getReg(); 2800 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2801 return isSGPR(mc2PseudoReg(Reg), TRI) && Reg != SGPR_NULL; 2802 } else { 2803 return true; 2804 } 2805 } 2806 2807 bool AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst) { 2808 const unsigned Opcode = Inst.getOpcode(); 2809 const MCInstrDesc &Desc = MII.get(Opcode); 2810 unsigned ConstantBusUseCount = 0; 2811 unsigned NumLiterals = 0; 2812 unsigned LiteralSize; 2813 2814 if (Desc.TSFlags & 2815 (SIInstrFlags::VOPC | 2816 SIInstrFlags::VOP1 | SIInstrFlags::VOP2 | 2817 SIInstrFlags::VOP3 | SIInstrFlags::VOP3P | 2818 SIInstrFlags::SDWA)) { 2819 // Check special imm operands (used by madmk, etc) 2820 if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) { 2821 ++ConstantBusUseCount; 2822 } 2823 2824 SmallDenseSet<unsigned> SGPRsUsed; 2825 unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst); 2826 if (SGPRUsed != AMDGPU::NoRegister) { 2827 SGPRsUsed.insert(SGPRUsed); 2828 ++ConstantBusUseCount; 2829 } 2830 2831 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2832 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2833 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2834 2835 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2836 2837 for (int OpIdx : OpIndices) { 2838 if (OpIdx == -1) break; 2839 2840 const MCOperand &MO = Inst.getOperand(OpIdx); 2841 if (usesConstantBus(Inst, OpIdx)) { 2842 if (MO.isReg()) { 2843 const unsigned Reg = mc2PseudoReg(MO.getReg()); 2844 // Pairs of registers with a partial intersections like these 2845 // s0, s[0:1] 2846 // flat_scratch_lo, flat_scratch 2847 // flat_scratch_lo, flat_scratch_hi 2848 // are theoretically valid but they are disabled anyway. 2849 // Note that this code mimics SIInstrInfo::verifyInstruction 2850 if (!SGPRsUsed.count(Reg)) { 2851 SGPRsUsed.insert(Reg); 2852 ++ConstantBusUseCount; 2853 } 2854 } else { // Expression or a literal 2855 2856 if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE) 2857 continue; // special operand like VINTERP attr_chan 2858 2859 // An instruction may use only one literal. 2860 // This has been validated on the previous step. 2861 // See validateVOP3Literal. 2862 // This literal may be used as more than one operand. 2863 // If all these operands are of the same size, 2864 // this literal counts as one scalar value. 2865 // Otherwise it counts as 2 scalar values. 2866 // See "GFX10 Shader Programming", section 3.6.2.3. 2867 2868 unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx); 2869 if (Size < 4) Size = 4; 2870 2871 if (NumLiterals == 0) { 2872 NumLiterals = 1; 2873 LiteralSize = Size; 2874 } else if (LiteralSize != Size) { 2875 NumLiterals = 2; 2876 } 2877 } 2878 } 2879 } 2880 } 2881 ConstantBusUseCount += NumLiterals; 2882 2883 return ConstantBusUseCount <= getConstantBusLimit(Opcode); 2884 } 2885 2886 bool AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst) { 2887 const unsigned Opcode = Inst.getOpcode(); 2888 const MCInstrDesc &Desc = MII.get(Opcode); 2889 2890 const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst); 2891 if (DstIdx == -1 || 2892 Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) { 2893 return true; 2894 } 2895 2896 const MCRegisterInfo *TRI = getContext().getRegisterInfo(); 2897 2898 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 2899 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 2900 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 2901 2902 assert(DstIdx != -1); 2903 const MCOperand &Dst = Inst.getOperand(DstIdx); 2904 assert(Dst.isReg()); 2905 const unsigned DstReg = mc2PseudoReg(Dst.getReg()); 2906 2907 const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 2908 2909 for (int SrcIdx : SrcIndices) { 2910 if (SrcIdx == -1) break; 2911 const MCOperand &Src = Inst.getOperand(SrcIdx); 2912 if (Src.isReg()) { 2913 const unsigned SrcReg = mc2PseudoReg(Src.getReg()); 2914 if (isRegIntersect(DstReg, SrcReg, TRI)) { 2915 return false; 2916 } 2917 } 2918 } 2919 2920 return true; 2921 } 2922 2923 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) { 2924 2925 const unsigned Opc = Inst.getOpcode(); 2926 const MCInstrDesc &Desc = MII.get(Opc); 2927 2928 if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) { 2929 int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp); 2930 assert(ClampIdx != -1); 2931 return Inst.getOperand(ClampIdx).getImm() == 0; 2932 } 2933 2934 return true; 2935 } 2936 2937 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) { 2938 2939 const unsigned Opc = Inst.getOpcode(); 2940 const MCInstrDesc &Desc = MII.get(Opc); 2941 2942 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 2943 return true; 2944 2945 int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata); 2946 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 2947 int TFEIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe); 2948 2949 assert(VDataIdx != -1); 2950 assert(DMaskIdx != -1); 2951 assert(TFEIdx != -1); 2952 2953 unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx); 2954 unsigned TFESize = Inst.getOperand(TFEIdx).getImm()? 1 : 0; 2955 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 2956 if (DMask == 0) 2957 DMask = 1; 2958 2959 unsigned DataSize = 2960 (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask); 2961 if (hasPackedD16()) { 2962 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 2963 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) 2964 DataSize = (DataSize + 1) / 2; 2965 } 2966 2967 return (VDataSize / 4) == DataSize + TFESize; 2968 } 2969 2970 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) { 2971 const unsigned Opc = Inst.getOpcode(); 2972 const MCInstrDesc &Desc = MII.get(Opc); 2973 2974 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10()) 2975 return true; 2976 2977 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc); 2978 const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode = 2979 AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 2980 int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0); 2981 int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc); 2982 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 2983 2984 assert(VAddr0Idx != -1); 2985 assert(SrsrcIdx != -1); 2986 assert(DimIdx != -1); 2987 assert(SrsrcIdx > VAddr0Idx); 2988 2989 unsigned Dim = Inst.getOperand(DimIdx).getImm(); 2990 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim); 2991 bool IsNSA = SrsrcIdx - VAddr0Idx > 1; 2992 unsigned VAddrSize = 2993 IsNSA ? SrsrcIdx - VAddr0Idx 2994 : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4; 2995 2996 unsigned AddrSize = BaseOpcode->NumExtraArgs + 2997 (BaseOpcode->Gradients ? DimInfo->NumGradients : 0) + 2998 (BaseOpcode->Coordinates ? DimInfo->NumCoords : 0) + 2999 (BaseOpcode->LodOrClampOrMip ? 1 : 0); 3000 if (!IsNSA) { 3001 if (AddrSize > 8) 3002 AddrSize = 16; 3003 else if (AddrSize > 4) 3004 AddrSize = 8; 3005 } 3006 3007 return VAddrSize == AddrSize; 3008 } 3009 3010 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) { 3011 3012 const unsigned Opc = Inst.getOpcode(); 3013 const MCInstrDesc &Desc = MII.get(Opc); 3014 3015 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3016 return true; 3017 if (!Desc.mayLoad() || !Desc.mayStore()) 3018 return true; // Not atomic 3019 3020 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3021 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3022 3023 // This is an incomplete check because image_atomic_cmpswap 3024 // may only use 0x3 and 0xf while other atomic operations 3025 // may use 0x1 and 0x3. However these limitations are 3026 // verified when we check that dmask matches dst size. 3027 return DMask == 0x1 || DMask == 0x3 || DMask == 0xf; 3028 } 3029 3030 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) { 3031 3032 const unsigned Opc = Inst.getOpcode(); 3033 const MCInstrDesc &Desc = MII.get(Opc); 3034 3035 if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0) 3036 return true; 3037 3038 int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask); 3039 unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf; 3040 3041 // GATHER4 instructions use dmask in a different fashion compared to 3042 // other MIMG instructions. The only useful DMASK values are 3043 // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns 3044 // (red,red,red,red) etc.) The ISA document doesn't mention 3045 // this. 3046 return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8; 3047 } 3048 3049 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) { 3050 3051 const unsigned Opc = Inst.getOpcode(); 3052 const MCInstrDesc &Desc = MII.get(Opc); 3053 3054 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3055 return true; 3056 3057 int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16); 3058 if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) { 3059 if (isCI() || isSI()) 3060 return false; 3061 } 3062 3063 return true; 3064 } 3065 3066 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) { 3067 const unsigned Opc = Inst.getOpcode(); 3068 const MCInstrDesc &Desc = MII.get(Opc); 3069 3070 if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0) 3071 return true; 3072 3073 int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim); 3074 if (DimIdx < 0) 3075 return true; 3076 3077 long Imm = Inst.getOperand(DimIdx).getImm(); 3078 if (Imm < 0 || Imm >= 8) 3079 return false; 3080 3081 return true; 3082 } 3083 3084 static bool IsRevOpcode(const unsigned Opcode) 3085 { 3086 switch (Opcode) { 3087 case AMDGPU::V_SUBREV_F32_e32: 3088 case AMDGPU::V_SUBREV_F32_e64: 3089 case AMDGPU::V_SUBREV_F32_e32_gfx10: 3090 case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7: 3091 case AMDGPU::V_SUBREV_F32_e32_vi: 3092 case AMDGPU::V_SUBREV_F32_e64_gfx10: 3093 case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7: 3094 case AMDGPU::V_SUBREV_F32_e64_vi: 3095 3096 case AMDGPU::V_SUBREV_I32_e32: 3097 case AMDGPU::V_SUBREV_I32_e64: 3098 case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7: 3099 case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7: 3100 3101 case AMDGPU::V_SUBBREV_U32_e32: 3102 case AMDGPU::V_SUBBREV_U32_e64: 3103 case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7: 3104 case AMDGPU::V_SUBBREV_U32_e32_vi: 3105 case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7: 3106 case AMDGPU::V_SUBBREV_U32_e64_vi: 3107 3108 case AMDGPU::V_SUBREV_U32_e32: 3109 case AMDGPU::V_SUBREV_U32_e64: 3110 case AMDGPU::V_SUBREV_U32_e32_gfx9: 3111 case AMDGPU::V_SUBREV_U32_e32_vi: 3112 case AMDGPU::V_SUBREV_U32_e64_gfx9: 3113 case AMDGPU::V_SUBREV_U32_e64_vi: 3114 3115 case AMDGPU::V_SUBREV_F16_e32: 3116 case AMDGPU::V_SUBREV_F16_e64: 3117 case AMDGPU::V_SUBREV_F16_e32_gfx10: 3118 case AMDGPU::V_SUBREV_F16_e32_vi: 3119 case AMDGPU::V_SUBREV_F16_e64_gfx10: 3120 case AMDGPU::V_SUBREV_F16_e64_vi: 3121 3122 case AMDGPU::V_SUBREV_U16_e32: 3123 case AMDGPU::V_SUBREV_U16_e64: 3124 case AMDGPU::V_SUBREV_U16_e32_vi: 3125 case AMDGPU::V_SUBREV_U16_e64_vi: 3126 3127 case AMDGPU::V_SUBREV_CO_U32_e32_gfx9: 3128 case AMDGPU::V_SUBREV_CO_U32_e64_gfx10: 3129 case AMDGPU::V_SUBREV_CO_U32_e64_gfx9: 3130 3131 case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9: 3132 case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9: 3133 3134 case AMDGPU::V_SUBREV_NC_U32_e32_gfx10: 3135 case AMDGPU::V_SUBREV_NC_U32_e64_gfx10: 3136 3137 case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10: 3138 case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10: 3139 3140 case AMDGPU::V_LSHRREV_B32_e32: 3141 case AMDGPU::V_LSHRREV_B32_e64: 3142 case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7: 3143 case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7: 3144 case AMDGPU::V_LSHRREV_B32_e32_vi: 3145 case AMDGPU::V_LSHRREV_B32_e64_vi: 3146 case AMDGPU::V_LSHRREV_B32_e32_gfx10: 3147 case AMDGPU::V_LSHRREV_B32_e64_gfx10: 3148 3149 case AMDGPU::V_ASHRREV_I32_e32: 3150 case AMDGPU::V_ASHRREV_I32_e64: 3151 case AMDGPU::V_ASHRREV_I32_e32_gfx10: 3152 case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7: 3153 case AMDGPU::V_ASHRREV_I32_e32_vi: 3154 case AMDGPU::V_ASHRREV_I32_e64_gfx10: 3155 case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7: 3156 case AMDGPU::V_ASHRREV_I32_e64_vi: 3157 3158 case AMDGPU::V_LSHLREV_B32_e32: 3159 case AMDGPU::V_LSHLREV_B32_e64: 3160 case AMDGPU::V_LSHLREV_B32_e32_gfx10: 3161 case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7: 3162 case AMDGPU::V_LSHLREV_B32_e32_vi: 3163 case AMDGPU::V_LSHLREV_B32_e64_gfx10: 3164 case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7: 3165 case AMDGPU::V_LSHLREV_B32_e64_vi: 3166 3167 case AMDGPU::V_LSHLREV_B16_e32: 3168 case AMDGPU::V_LSHLREV_B16_e64: 3169 case AMDGPU::V_LSHLREV_B16_e32_vi: 3170 case AMDGPU::V_LSHLREV_B16_e64_vi: 3171 case AMDGPU::V_LSHLREV_B16_gfx10: 3172 3173 case AMDGPU::V_LSHRREV_B16_e32: 3174 case AMDGPU::V_LSHRREV_B16_e64: 3175 case AMDGPU::V_LSHRREV_B16_e32_vi: 3176 case AMDGPU::V_LSHRREV_B16_e64_vi: 3177 case AMDGPU::V_LSHRREV_B16_gfx10: 3178 3179 case AMDGPU::V_ASHRREV_I16_e32: 3180 case AMDGPU::V_ASHRREV_I16_e64: 3181 case AMDGPU::V_ASHRREV_I16_e32_vi: 3182 case AMDGPU::V_ASHRREV_I16_e64_vi: 3183 case AMDGPU::V_ASHRREV_I16_gfx10: 3184 3185 case AMDGPU::V_LSHLREV_B64: 3186 case AMDGPU::V_LSHLREV_B64_gfx10: 3187 case AMDGPU::V_LSHLREV_B64_vi: 3188 3189 case AMDGPU::V_LSHRREV_B64: 3190 case AMDGPU::V_LSHRREV_B64_gfx10: 3191 case AMDGPU::V_LSHRREV_B64_vi: 3192 3193 case AMDGPU::V_ASHRREV_I64: 3194 case AMDGPU::V_ASHRREV_I64_gfx10: 3195 case AMDGPU::V_ASHRREV_I64_vi: 3196 3197 case AMDGPU::V_PK_LSHLREV_B16: 3198 case AMDGPU::V_PK_LSHLREV_B16_gfx10: 3199 case AMDGPU::V_PK_LSHLREV_B16_vi: 3200 3201 case AMDGPU::V_PK_LSHRREV_B16: 3202 case AMDGPU::V_PK_LSHRREV_B16_gfx10: 3203 case AMDGPU::V_PK_LSHRREV_B16_vi: 3204 case AMDGPU::V_PK_ASHRREV_I16: 3205 case AMDGPU::V_PK_ASHRREV_I16_gfx10: 3206 case AMDGPU::V_PK_ASHRREV_I16_vi: 3207 return true; 3208 default: 3209 return false; 3210 } 3211 } 3212 3213 bool AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) { 3214 3215 using namespace SIInstrFlags; 3216 const unsigned Opcode = Inst.getOpcode(); 3217 const MCInstrDesc &Desc = MII.get(Opcode); 3218 3219 // lds_direct register is defined so that it can be used 3220 // with 9-bit operands only. Ignore encodings which do not accept these. 3221 if ((Desc.TSFlags & (VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA)) == 0) 3222 return true; 3223 3224 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3225 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3226 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3227 3228 const int SrcIndices[] = { Src1Idx, Src2Idx }; 3229 3230 // lds_direct cannot be specified as either src1 or src2. 3231 for (int SrcIdx : SrcIndices) { 3232 if (SrcIdx == -1) break; 3233 const MCOperand &Src = Inst.getOperand(SrcIdx); 3234 if (Src.isReg() && Src.getReg() == LDS_DIRECT) { 3235 return false; 3236 } 3237 } 3238 3239 if (Src0Idx == -1) 3240 return true; 3241 3242 const MCOperand &Src = Inst.getOperand(Src0Idx); 3243 if (!Src.isReg() || Src.getReg() != LDS_DIRECT) 3244 return true; 3245 3246 // lds_direct is specified as src0. Check additional limitations. 3247 return (Desc.TSFlags & SIInstrFlags::SDWA) == 0 && !IsRevOpcode(Opcode); 3248 } 3249 3250 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const { 3251 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 3252 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 3253 if (Op.isFlatOffset()) 3254 return Op.getStartLoc(); 3255 } 3256 return getLoc(); 3257 } 3258 3259 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst, 3260 const OperandVector &Operands) { 3261 uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags; 3262 if ((TSFlags & SIInstrFlags::FLAT) == 0) 3263 return true; 3264 3265 auto Opcode = Inst.getOpcode(); 3266 auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset); 3267 assert(OpNum != -1); 3268 3269 const auto &Op = Inst.getOperand(OpNum); 3270 if (!hasFlatOffsets() && Op.getImm() != 0) { 3271 Error(getFlatOffsetLoc(Operands), 3272 "flat offset modifier is not supported on this GPU"); 3273 return false; 3274 } 3275 3276 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 3277 // For FLAT segment the offset must be positive; 3278 // MSB is ignored and forced to zero. 3279 unsigned OffsetSize = isGFX9() ? 13 : 12; 3280 if (TSFlags & SIInstrFlags::IsNonFlatSeg) { 3281 if (!isIntN(OffsetSize, Op.getImm())) { 3282 Error(getFlatOffsetLoc(Operands), 3283 isGFX9() ? "expected a 13-bit signed offset" : 3284 "expected a 12-bit signed offset"); 3285 return false; 3286 } 3287 } else { 3288 if (!isUIntN(OffsetSize - 1, Op.getImm())) { 3289 Error(getFlatOffsetLoc(Operands), 3290 isGFX9() ? "expected a 12-bit unsigned offset" : 3291 "expected an 11-bit unsigned offset"); 3292 return false; 3293 } 3294 } 3295 3296 return true; 3297 } 3298 3299 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const { 3300 unsigned Opcode = Inst.getOpcode(); 3301 const MCInstrDesc &Desc = MII.get(Opcode); 3302 if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC))) 3303 return true; 3304 3305 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3306 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3307 3308 const int OpIndices[] = { Src0Idx, Src1Idx }; 3309 3310 unsigned NumExprs = 0; 3311 unsigned NumLiterals = 0; 3312 uint32_t LiteralValue; 3313 3314 for (int OpIdx : OpIndices) { 3315 if (OpIdx == -1) break; 3316 3317 const MCOperand &MO = Inst.getOperand(OpIdx); 3318 // Exclude special imm operands (like that used by s_set_gpr_idx_on) 3319 if (AMDGPU::isSISrcOperand(Desc, OpIdx)) { 3320 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3321 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3322 if (NumLiterals == 0 || LiteralValue != Value) { 3323 LiteralValue = Value; 3324 ++NumLiterals; 3325 } 3326 } else if (MO.isExpr()) { 3327 ++NumExprs; 3328 } 3329 } 3330 } 3331 3332 return NumLiterals + NumExprs <= 1; 3333 } 3334 3335 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) { 3336 const unsigned Opc = Inst.getOpcode(); 3337 if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 || 3338 Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) { 3339 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 3340 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 3341 3342 if (OpSel & ~3) 3343 return false; 3344 } 3345 return true; 3346 } 3347 3348 // Check if VCC register matches wavefront size 3349 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const { 3350 auto FB = getFeatureBits(); 3351 return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) || 3352 (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO); 3353 } 3354 3355 // VOP3 literal is only allowed in GFX10+ and only one can be used 3356 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst) const { 3357 unsigned Opcode = Inst.getOpcode(); 3358 const MCInstrDesc &Desc = MII.get(Opcode); 3359 if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P))) 3360 return true; 3361 3362 const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0); 3363 const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1); 3364 const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2); 3365 3366 const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx }; 3367 3368 unsigned NumExprs = 0; 3369 unsigned NumLiterals = 0; 3370 uint32_t LiteralValue; 3371 3372 for (int OpIdx : OpIndices) { 3373 if (OpIdx == -1) break; 3374 3375 const MCOperand &MO = Inst.getOperand(OpIdx); 3376 if (!MO.isImm() && !MO.isExpr()) 3377 continue; 3378 if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) 3379 continue; 3380 3381 if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) && 3382 getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) 3383 return false; 3384 3385 if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) { 3386 uint32_t Value = static_cast<uint32_t>(MO.getImm()); 3387 if (NumLiterals == 0 || LiteralValue != Value) { 3388 LiteralValue = Value; 3389 ++NumLiterals; 3390 } 3391 } else if (MO.isExpr()) { 3392 ++NumExprs; 3393 } 3394 } 3395 NumLiterals += NumExprs; 3396 3397 return !NumLiterals || 3398 (NumLiterals == 1 && getFeatureBits()[AMDGPU::FeatureVOP3Literal]); 3399 } 3400 3401 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst, 3402 const SMLoc &IDLoc, 3403 const OperandVector &Operands) { 3404 if (!validateLdsDirect(Inst)) { 3405 Error(IDLoc, 3406 "invalid use of lds_direct"); 3407 return false; 3408 } 3409 if (!validateSOPLiteral(Inst)) { 3410 Error(IDLoc, 3411 "only one literal operand is allowed"); 3412 return false; 3413 } 3414 if (!validateVOP3Literal(Inst)) { 3415 Error(IDLoc, 3416 "invalid literal operand"); 3417 return false; 3418 } 3419 if (!validateConstantBusLimitations(Inst)) { 3420 Error(IDLoc, 3421 "invalid operand (violates constant bus restrictions)"); 3422 return false; 3423 } 3424 if (!validateEarlyClobberLimitations(Inst)) { 3425 Error(IDLoc, 3426 "destination must be different than all sources"); 3427 return false; 3428 } 3429 if (!validateIntClampSupported(Inst)) { 3430 Error(IDLoc, 3431 "integer clamping is not supported on this GPU"); 3432 return false; 3433 } 3434 if (!validateOpSel(Inst)) { 3435 Error(IDLoc, 3436 "invalid op_sel operand"); 3437 return false; 3438 } 3439 // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate. 3440 if (!validateMIMGD16(Inst)) { 3441 Error(IDLoc, 3442 "d16 modifier is not supported on this GPU"); 3443 return false; 3444 } 3445 if (!validateMIMGDim(Inst)) { 3446 Error(IDLoc, "dim modifier is required on this GPU"); 3447 return false; 3448 } 3449 if (!validateMIMGDataSize(Inst)) { 3450 Error(IDLoc, 3451 "image data size does not match dmask and tfe"); 3452 return false; 3453 } 3454 if (!validateMIMGAddrSize(Inst)) { 3455 Error(IDLoc, 3456 "image address size does not match dim and a16"); 3457 return false; 3458 } 3459 if (!validateMIMGAtomicDMask(Inst)) { 3460 Error(IDLoc, 3461 "invalid atomic image dmask"); 3462 return false; 3463 } 3464 if (!validateMIMGGatherDMask(Inst)) { 3465 Error(IDLoc, 3466 "invalid image_gather dmask: only one bit must be set"); 3467 return false; 3468 } 3469 if (!validateFlatOffset(Inst, Operands)) { 3470 return false; 3471 } 3472 3473 return true; 3474 } 3475 3476 static std::string AMDGPUMnemonicSpellCheck(StringRef S, 3477 const FeatureBitset &FBS, 3478 unsigned VariantID = 0); 3479 3480 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, 3481 OperandVector &Operands, 3482 MCStreamer &Out, 3483 uint64_t &ErrorInfo, 3484 bool MatchingInlineAsm) { 3485 MCInst Inst; 3486 unsigned Result = Match_Success; 3487 for (auto Variant : getMatchedVariants()) { 3488 uint64_t EI; 3489 auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm, 3490 Variant); 3491 // We order match statuses from least to most specific. We use most specific 3492 // status as resulting 3493 // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32 3494 if ((R == Match_Success) || 3495 (R == Match_PreferE32) || 3496 (R == Match_MissingFeature && Result != Match_PreferE32) || 3497 (R == Match_InvalidOperand && Result != Match_MissingFeature 3498 && Result != Match_PreferE32) || 3499 (R == Match_MnemonicFail && Result != Match_InvalidOperand 3500 && Result != Match_MissingFeature 3501 && Result != Match_PreferE32)) { 3502 Result = R; 3503 ErrorInfo = EI; 3504 } 3505 if (R == Match_Success) 3506 break; 3507 } 3508 3509 switch (Result) { 3510 default: break; 3511 case Match_Success: 3512 if (!validateInstruction(Inst, IDLoc, Operands)) { 3513 return true; 3514 } 3515 Inst.setLoc(IDLoc); 3516 Out.EmitInstruction(Inst, getSTI()); 3517 return false; 3518 3519 case Match_MissingFeature: 3520 return Error(IDLoc, "instruction not supported on this GPU"); 3521 3522 case Match_MnemonicFail: { 3523 FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits()); 3524 std::string Suggestion = AMDGPUMnemonicSpellCheck( 3525 ((AMDGPUOperand &)*Operands[0]).getToken(), FBS); 3526 return Error(IDLoc, "invalid instruction" + Suggestion, 3527 ((AMDGPUOperand &)*Operands[0]).getLocRange()); 3528 } 3529 3530 case Match_InvalidOperand: { 3531 SMLoc ErrorLoc = IDLoc; 3532 if (ErrorInfo != ~0ULL) { 3533 if (ErrorInfo >= Operands.size()) { 3534 return Error(IDLoc, "too few operands for instruction"); 3535 } 3536 ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc(); 3537 if (ErrorLoc == SMLoc()) 3538 ErrorLoc = IDLoc; 3539 } 3540 return Error(ErrorLoc, "invalid operand for instruction"); 3541 } 3542 3543 case Match_PreferE32: 3544 return Error(IDLoc, "internal error: instruction without _e64 suffix " 3545 "should be encoded as e32"); 3546 } 3547 llvm_unreachable("Implement any new match types added!"); 3548 } 3549 3550 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) { 3551 int64_t Tmp = -1; 3552 if (getLexer().isNot(AsmToken::Integer) && getLexer().isNot(AsmToken::Identifier)) { 3553 return true; 3554 } 3555 if (getParser().parseAbsoluteExpression(Tmp)) { 3556 return true; 3557 } 3558 Ret = static_cast<uint32_t>(Tmp); 3559 return false; 3560 } 3561 3562 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major, 3563 uint32_t &Minor) { 3564 if (ParseAsAbsoluteExpression(Major)) 3565 return TokError("invalid major version"); 3566 3567 if (getLexer().isNot(AsmToken::Comma)) 3568 return TokError("minor version number required, comma expected"); 3569 Lex(); 3570 3571 if (ParseAsAbsoluteExpression(Minor)) 3572 return TokError("invalid minor version"); 3573 3574 return false; 3575 } 3576 3577 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() { 3578 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3579 return TokError("directive only supported for amdgcn architecture"); 3580 3581 std::string Target; 3582 3583 SMLoc TargetStart = getTok().getLoc(); 3584 if (getParser().parseEscapedString(Target)) 3585 return true; 3586 SMRange TargetRange = SMRange(TargetStart, getTok().getLoc()); 3587 3588 std::string ExpectedTarget; 3589 raw_string_ostream ExpectedTargetOS(ExpectedTarget); 3590 IsaInfo::streamIsaVersion(&getSTI(), ExpectedTargetOS); 3591 3592 if (Target != ExpectedTargetOS.str()) 3593 return getParser().Error(TargetRange.Start, "target must match options", 3594 TargetRange); 3595 3596 getTargetStreamer().EmitDirectiveAMDGCNTarget(Target); 3597 return false; 3598 } 3599 3600 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) { 3601 return getParser().Error(Range.Start, "value out of range", Range); 3602 } 3603 3604 bool AMDGPUAsmParser::calculateGPRBlocks( 3605 const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed, 3606 bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR, 3607 SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange, 3608 unsigned &VGPRBlocks, unsigned &SGPRBlocks) { 3609 // TODO(scott.linder): These calculations are duplicated from 3610 // AMDGPUAsmPrinter::getSIProgramInfo and could be unified. 3611 IsaVersion Version = getIsaVersion(getSTI().getCPU()); 3612 3613 unsigned NumVGPRs = NextFreeVGPR; 3614 unsigned NumSGPRs = NextFreeSGPR; 3615 3616 if (Version.Major >= 10) 3617 NumSGPRs = 0; 3618 else { 3619 unsigned MaxAddressableNumSGPRs = 3620 IsaInfo::getAddressableNumSGPRs(&getSTI()); 3621 3622 if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) && 3623 NumSGPRs > MaxAddressableNumSGPRs) 3624 return OutOfRangeError(SGPRRange); 3625 3626 NumSGPRs += 3627 IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed); 3628 3629 if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) && 3630 NumSGPRs > MaxAddressableNumSGPRs) 3631 return OutOfRangeError(SGPRRange); 3632 3633 if (Features.test(FeatureSGPRInitBug)) 3634 NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG; 3635 } 3636 3637 VGPRBlocks = 3638 IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32); 3639 SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs); 3640 3641 return false; 3642 } 3643 3644 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() { 3645 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) 3646 return TokError("directive only supported for amdgcn architecture"); 3647 3648 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) 3649 return TokError("directive only supported for amdhsa OS"); 3650 3651 StringRef KernelName; 3652 if (getParser().parseIdentifier(KernelName)) 3653 return true; 3654 3655 kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI()); 3656 3657 StringSet<> Seen; 3658 3659 IsaVersion IVersion = getIsaVersion(getSTI().getCPU()); 3660 3661 SMRange VGPRRange; 3662 uint64_t NextFreeVGPR = 0; 3663 SMRange SGPRRange; 3664 uint64_t NextFreeSGPR = 0; 3665 unsigned UserSGPRCount = 0; 3666 bool ReserveVCC = true; 3667 bool ReserveFlatScr = true; 3668 bool ReserveXNACK = hasXNACK(); 3669 Optional<bool> EnableWavefrontSize32; 3670 3671 while (true) { 3672 while (getLexer().is(AsmToken::EndOfStatement)) 3673 Lex(); 3674 3675 if (getLexer().isNot(AsmToken::Identifier)) 3676 return TokError("expected .amdhsa_ directive or .end_amdhsa_kernel"); 3677 3678 StringRef ID = getTok().getIdentifier(); 3679 SMRange IDRange = getTok().getLocRange(); 3680 Lex(); 3681 3682 if (ID == ".end_amdhsa_kernel") 3683 break; 3684 3685 if (Seen.find(ID) != Seen.end()) 3686 return TokError(".amdhsa_ directives cannot be repeated"); 3687 Seen.insert(ID); 3688 3689 SMLoc ValStart = getTok().getLoc(); 3690 int64_t IVal; 3691 if (getParser().parseAbsoluteExpression(IVal)) 3692 return true; 3693 SMLoc ValEnd = getTok().getLoc(); 3694 SMRange ValRange = SMRange(ValStart, ValEnd); 3695 3696 if (IVal < 0) 3697 return OutOfRangeError(ValRange); 3698 3699 uint64_t Val = IVal; 3700 3701 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE) \ 3702 if (!isUInt<ENTRY##_WIDTH>(VALUE)) \ 3703 return OutOfRangeError(RANGE); \ 3704 AMDHSA_BITS_SET(FIELD, ENTRY, VALUE); 3705 3706 if (ID == ".amdhsa_group_segment_fixed_size") { 3707 if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val)) 3708 return OutOfRangeError(ValRange); 3709 KD.group_segment_fixed_size = Val; 3710 } else if (ID == ".amdhsa_private_segment_fixed_size") { 3711 if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val)) 3712 return OutOfRangeError(ValRange); 3713 KD.private_segment_fixed_size = Val; 3714 } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") { 3715 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3716 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER, 3717 Val, ValRange); 3718 if (Val) 3719 UserSGPRCount += 4; 3720 } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") { 3721 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3722 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val, 3723 ValRange); 3724 if (Val) 3725 UserSGPRCount += 2; 3726 } else if (ID == ".amdhsa_user_sgpr_queue_ptr") { 3727 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3728 KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val, 3729 ValRange); 3730 if (Val) 3731 UserSGPRCount += 2; 3732 } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") { 3733 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3734 KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR, 3735 Val, ValRange); 3736 if (Val) 3737 UserSGPRCount += 2; 3738 } else if (ID == ".amdhsa_user_sgpr_dispatch_id") { 3739 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3740 KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val, 3741 ValRange); 3742 if (Val) 3743 UserSGPRCount += 2; 3744 } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") { 3745 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3746 KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val, 3747 ValRange); 3748 if (Val) 3749 UserSGPRCount += 2; 3750 } else if (ID == ".amdhsa_user_sgpr_private_segment_size") { 3751 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3752 KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE, 3753 Val, ValRange); 3754 if (Val) 3755 UserSGPRCount += 1; 3756 } else if (ID == ".amdhsa_wavefront_size32") { 3757 if (IVersion.Major < 10) 3758 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3759 IDRange); 3760 EnableWavefrontSize32 = Val; 3761 PARSE_BITS_ENTRY(KD.kernel_code_properties, 3762 KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 3763 Val, ValRange); 3764 } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") { 3765 PARSE_BITS_ENTRY( 3766 KD.compute_pgm_rsrc2, 3767 COMPUTE_PGM_RSRC2_ENABLE_SGPR_PRIVATE_SEGMENT_WAVEFRONT_OFFSET, Val, 3768 ValRange); 3769 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") { 3770 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3771 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val, 3772 ValRange); 3773 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") { 3774 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3775 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val, 3776 ValRange); 3777 } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") { 3778 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3779 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val, 3780 ValRange); 3781 } else if (ID == ".amdhsa_system_sgpr_workgroup_info") { 3782 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3783 COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val, 3784 ValRange); 3785 } else if (ID == ".amdhsa_system_vgpr_workitem_id") { 3786 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3787 COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val, 3788 ValRange); 3789 } else if (ID == ".amdhsa_next_free_vgpr") { 3790 VGPRRange = ValRange; 3791 NextFreeVGPR = Val; 3792 } else if (ID == ".amdhsa_next_free_sgpr") { 3793 SGPRRange = ValRange; 3794 NextFreeSGPR = Val; 3795 } else if (ID == ".amdhsa_reserve_vcc") { 3796 if (!isUInt<1>(Val)) 3797 return OutOfRangeError(ValRange); 3798 ReserveVCC = Val; 3799 } else if (ID == ".amdhsa_reserve_flat_scratch") { 3800 if (IVersion.Major < 7) 3801 return getParser().Error(IDRange.Start, "directive requires gfx7+", 3802 IDRange); 3803 if (!isUInt<1>(Val)) 3804 return OutOfRangeError(ValRange); 3805 ReserveFlatScr = Val; 3806 } else if (ID == ".amdhsa_reserve_xnack_mask") { 3807 if (IVersion.Major < 8) 3808 return getParser().Error(IDRange.Start, "directive requires gfx8+", 3809 IDRange); 3810 if (!isUInt<1>(Val)) 3811 return OutOfRangeError(ValRange); 3812 ReserveXNACK = Val; 3813 } else if (ID == ".amdhsa_float_round_mode_32") { 3814 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3815 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange); 3816 } else if (ID == ".amdhsa_float_round_mode_16_64") { 3817 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3818 COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange); 3819 } else if (ID == ".amdhsa_float_denorm_mode_32") { 3820 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3821 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange); 3822 } else if (ID == ".amdhsa_float_denorm_mode_16_64") { 3823 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3824 COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val, 3825 ValRange); 3826 } else if (ID == ".amdhsa_dx10_clamp") { 3827 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, 3828 COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange); 3829 } else if (ID == ".amdhsa_ieee_mode") { 3830 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 3831 Val, ValRange); 3832 } else if (ID == ".amdhsa_fp16_overflow") { 3833 if (IVersion.Major < 9) 3834 return getParser().Error(IDRange.Start, "directive requires gfx9+", 3835 IDRange); 3836 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val, 3837 ValRange); 3838 } else if (ID == ".amdhsa_workgroup_processor_mode") { 3839 if (IVersion.Major < 10) 3840 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3841 IDRange); 3842 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val, 3843 ValRange); 3844 } else if (ID == ".amdhsa_memory_ordered") { 3845 if (IVersion.Major < 10) 3846 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3847 IDRange); 3848 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val, 3849 ValRange); 3850 } else if (ID == ".amdhsa_forward_progress") { 3851 if (IVersion.Major < 10) 3852 return getParser().Error(IDRange.Start, "directive requires gfx10+", 3853 IDRange); 3854 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val, 3855 ValRange); 3856 } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") { 3857 PARSE_BITS_ENTRY( 3858 KD.compute_pgm_rsrc2, 3859 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val, 3860 ValRange); 3861 } else if (ID == ".amdhsa_exception_fp_denorm_src") { 3862 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3863 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE, 3864 Val, ValRange); 3865 } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") { 3866 PARSE_BITS_ENTRY( 3867 KD.compute_pgm_rsrc2, 3868 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val, 3869 ValRange); 3870 } else if (ID == ".amdhsa_exception_fp_ieee_overflow") { 3871 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3872 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW, 3873 Val, ValRange); 3874 } else if (ID == ".amdhsa_exception_fp_ieee_underflow") { 3875 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3876 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW, 3877 Val, ValRange); 3878 } else if (ID == ".amdhsa_exception_fp_ieee_inexact") { 3879 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3880 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT, 3881 Val, ValRange); 3882 } else if (ID == ".amdhsa_exception_int_div_zero") { 3883 PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2, 3884 COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO, 3885 Val, ValRange); 3886 } else { 3887 return getParser().Error(IDRange.Start, 3888 "unknown .amdhsa_kernel directive", IDRange); 3889 } 3890 3891 #undef PARSE_BITS_ENTRY 3892 } 3893 3894 if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end()) 3895 return TokError(".amdhsa_next_free_vgpr directive is required"); 3896 3897 if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end()) 3898 return TokError(".amdhsa_next_free_sgpr directive is required"); 3899 3900 unsigned VGPRBlocks; 3901 unsigned SGPRBlocks; 3902 if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr, 3903 ReserveXNACK, EnableWavefrontSize32, NextFreeVGPR, 3904 VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks, 3905 SGPRBlocks)) 3906 return true; 3907 3908 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>( 3909 VGPRBlocks)) 3910 return OutOfRangeError(VGPRRange); 3911 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3912 COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks); 3913 3914 if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>( 3915 SGPRBlocks)) 3916 return OutOfRangeError(SGPRRange); 3917 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 3918 COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT, 3919 SGPRBlocks); 3920 3921 if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount)) 3922 return TokError("too many user SGPRs enabled"); 3923 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT, 3924 UserSGPRCount); 3925 3926 getTargetStreamer().EmitAmdhsaKernelDescriptor( 3927 getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC, 3928 ReserveFlatScr, ReserveXNACK); 3929 return false; 3930 } 3931 3932 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() { 3933 uint32_t Major; 3934 uint32_t Minor; 3935 3936 if (ParseDirectiveMajorMinor(Major, Minor)) 3937 return true; 3938 3939 getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor); 3940 return false; 3941 } 3942 3943 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() { 3944 uint32_t Major; 3945 uint32_t Minor; 3946 uint32_t Stepping; 3947 StringRef VendorName; 3948 StringRef ArchName; 3949 3950 // If this directive has no arguments, then use the ISA version for the 3951 // targeted GPU. 3952 if (getLexer().is(AsmToken::EndOfStatement)) { 3953 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 3954 getTargetStreamer().EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, 3955 ISA.Stepping, 3956 "AMD", "AMDGPU"); 3957 return false; 3958 } 3959 3960 if (ParseDirectiveMajorMinor(Major, Minor)) 3961 return true; 3962 3963 if (getLexer().isNot(AsmToken::Comma)) 3964 return TokError("stepping version number required, comma expected"); 3965 Lex(); 3966 3967 if (ParseAsAbsoluteExpression(Stepping)) 3968 return TokError("invalid stepping version"); 3969 3970 if (getLexer().isNot(AsmToken::Comma)) 3971 return TokError("vendor name required, comma expected"); 3972 Lex(); 3973 3974 if (getLexer().isNot(AsmToken::String)) 3975 return TokError("invalid vendor name"); 3976 3977 VendorName = getLexer().getTok().getStringContents(); 3978 Lex(); 3979 3980 if (getLexer().isNot(AsmToken::Comma)) 3981 return TokError("arch name required, comma expected"); 3982 Lex(); 3983 3984 if (getLexer().isNot(AsmToken::String)) 3985 return TokError("invalid arch name"); 3986 3987 ArchName = getLexer().getTok().getStringContents(); 3988 Lex(); 3989 3990 getTargetStreamer().EmitDirectiveHSACodeObjectISA(Major, Minor, Stepping, 3991 VendorName, ArchName); 3992 return false; 3993 } 3994 3995 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID, 3996 amd_kernel_code_t &Header) { 3997 // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing 3998 // assembly for backwards compatibility. 3999 if (ID == "max_scratch_backing_memory_byte_size") { 4000 Parser.eatToEndOfStatement(); 4001 return false; 4002 } 4003 4004 SmallString<40> ErrStr; 4005 raw_svector_ostream Err(ErrStr); 4006 if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) { 4007 return TokError(Err.str()); 4008 } 4009 Lex(); 4010 4011 if (ID == "enable_wavefront_size32") { 4012 if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) { 4013 if (!isGFX10()) 4014 return TokError("enable_wavefront_size32=1 is only allowed on GFX10+"); 4015 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4016 return TokError("enable_wavefront_size32=1 requires +WavefrontSize32"); 4017 } else { 4018 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4019 return TokError("enable_wavefront_size32=0 requires +WavefrontSize64"); 4020 } 4021 } 4022 4023 if (ID == "wavefront_size") { 4024 if (Header.wavefront_size == 5) { 4025 if (!isGFX10()) 4026 return TokError("wavefront_size=5 is only allowed on GFX10+"); 4027 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32]) 4028 return TokError("wavefront_size=5 requires +WavefrontSize32"); 4029 } else if (Header.wavefront_size == 6) { 4030 if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64]) 4031 return TokError("wavefront_size=6 requires +WavefrontSize64"); 4032 } 4033 } 4034 4035 if (ID == "enable_wgp_mode") { 4036 if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) && !isGFX10()) 4037 return TokError("enable_wgp_mode=1 is only allowed on GFX10+"); 4038 } 4039 4040 if (ID == "enable_mem_ordered") { 4041 if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) && !isGFX10()) 4042 return TokError("enable_mem_ordered=1 is only allowed on GFX10+"); 4043 } 4044 4045 if (ID == "enable_fwd_progress") { 4046 if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) && !isGFX10()) 4047 return TokError("enable_fwd_progress=1 is only allowed on GFX10+"); 4048 } 4049 4050 return false; 4051 } 4052 4053 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() { 4054 amd_kernel_code_t Header; 4055 AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI()); 4056 4057 while (true) { 4058 // Lex EndOfStatement. This is in a while loop, because lexing a comment 4059 // will set the current token to EndOfStatement. 4060 while(getLexer().is(AsmToken::EndOfStatement)) 4061 Lex(); 4062 4063 if (getLexer().isNot(AsmToken::Identifier)) 4064 return TokError("expected value identifier or .end_amd_kernel_code_t"); 4065 4066 StringRef ID = getLexer().getTok().getIdentifier(); 4067 Lex(); 4068 4069 if (ID == ".end_amd_kernel_code_t") 4070 break; 4071 4072 if (ParseAMDKernelCodeTValue(ID, Header)) 4073 return true; 4074 } 4075 4076 getTargetStreamer().EmitAMDKernelCodeT(Header); 4077 4078 return false; 4079 } 4080 4081 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() { 4082 if (getLexer().isNot(AsmToken::Identifier)) 4083 return TokError("expected symbol name"); 4084 4085 StringRef KernelName = Parser.getTok().getString(); 4086 4087 getTargetStreamer().EmitAMDGPUSymbolType(KernelName, 4088 ELF::STT_AMDGPU_HSA_KERNEL); 4089 Lex(); 4090 if (!AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) 4091 KernelScope.initialize(getContext()); 4092 return false; 4093 } 4094 4095 bool AMDGPUAsmParser::ParseDirectiveISAVersion() { 4096 if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) { 4097 return Error(getParser().getTok().getLoc(), 4098 ".amd_amdgpu_isa directive is not available on non-amdgcn " 4099 "architectures"); 4100 } 4101 4102 auto ISAVersionStringFromASM = getLexer().getTok().getStringContents(); 4103 4104 std::string ISAVersionStringFromSTI; 4105 raw_string_ostream ISAVersionStreamFromSTI(ISAVersionStringFromSTI); 4106 IsaInfo::streamIsaVersion(&getSTI(), ISAVersionStreamFromSTI); 4107 4108 if (ISAVersionStringFromASM != ISAVersionStreamFromSTI.str()) { 4109 return Error(getParser().getTok().getLoc(), 4110 ".amd_amdgpu_isa directive does not match triple and/or mcpu " 4111 "arguments specified through the command line"); 4112 } 4113 4114 getTargetStreamer().EmitISAVersion(ISAVersionStreamFromSTI.str()); 4115 Lex(); 4116 4117 return false; 4118 } 4119 4120 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() { 4121 const char *AssemblerDirectiveBegin; 4122 const char *AssemblerDirectiveEnd; 4123 std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) = 4124 AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI()) 4125 ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin, 4126 HSAMD::V3::AssemblerDirectiveEnd) 4127 : std::make_tuple(HSAMD::AssemblerDirectiveBegin, 4128 HSAMD::AssemblerDirectiveEnd); 4129 4130 if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) { 4131 return Error(getParser().getTok().getLoc(), 4132 (Twine(AssemblerDirectiveBegin) + Twine(" directive is " 4133 "not available on non-amdhsa OSes")).str()); 4134 } 4135 4136 std::string HSAMetadataString; 4137 if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd, 4138 HSAMetadataString)) 4139 return true; 4140 4141 if (IsaInfo::hasCodeObjectV3(&getSTI())) { 4142 if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString)) 4143 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4144 } else { 4145 if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString)) 4146 return Error(getParser().getTok().getLoc(), "invalid HSA metadata"); 4147 } 4148 4149 return false; 4150 } 4151 4152 /// Common code to parse out a block of text (typically YAML) between start and 4153 /// end directives. 4154 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin, 4155 const char *AssemblerDirectiveEnd, 4156 std::string &CollectString) { 4157 4158 raw_string_ostream CollectStream(CollectString); 4159 4160 getLexer().setSkipSpace(false); 4161 4162 bool FoundEnd = false; 4163 while (!getLexer().is(AsmToken::Eof)) { 4164 while (getLexer().is(AsmToken::Space)) { 4165 CollectStream << getLexer().getTok().getString(); 4166 Lex(); 4167 } 4168 4169 if (getLexer().is(AsmToken::Identifier)) { 4170 StringRef ID = getLexer().getTok().getIdentifier(); 4171 if (ID == AssemblerDirectiveEnd) { 4172 Lex(); 4173 FoundEnd = true; 4174 break; 4175 } 4176 } 4177 4178 CollectStream << Parser.parseStringToEndOfStatement() 4179 << getContext().getAsmInfo()->getSeparatorString(); 4180 4181 Parser.eatToEndOfStatement(); 4182 } 4183 4184 getLexer().setSkipSpace(true); 4185 4186 if (getLexer().is(AsmToken::Eof) && !FoundEnd) { 4187 return TokError(Twine("expected directive ") + 4188 Twine(AssemblerDirectiveEnd) + Twine(" not found")); 4189 } 4190 4191 CollectStream.flush(); 4192 return false; 4193 } 4194 4195 /// Parse the assembler directive for new MsgPack-format PAL metadata. 4196 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() { 4197 std::string String; 4198 if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin, 4199 AMDGPU::PALMD::AssemblerDirectiveEnd, String)) 4200 return true; 4201 4202 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4203 if (!PALMetadata->setFromString(String)) 4204 return Error(getParser().getTok().getLoc(), "invalid PAL metadata"); 4205 return false; 4206 } 4207 4208 /// Parse the assembler directive for old linear-format PAL metadata. 4209 bool AMDGPUAsmParser::ParseDirectivePALMetadata() { 4210 if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) { 4211 return Error(getParser().getTok().getLoc(), 4212 (Twine(PALMD::AssemblerDirective) + Twine(" directive is " 4213 "not available on non-amdpal OSes")).str()); 4214 } 4215 4216 auto PALMetadata = getTargetStreamer().getPALMetadata(); 4217 PALMetadata->setLegacy(); 4218 for (;;) { 4219 uint32_t Key, Value; 4220 if (ParseAsAbsoluteExpression(Key)) { 4221 return TokError(Twine("invalid value in ") + 4222 Twine(PALMD::AssemblerDirective)); 4223 } 4224 if (getLexer().isNot(AsmToken::Comma)) { 4225 return TokError(Twine("expected an even number of values in ") + 4226 Twine(PALMD::AssemblerDirective)); 4227 } 4228 Lex(); 4229 if (ParseAsAbsoluteExpression(Value)) { 4230 return TokError(Twine("invalid value in ") + 4231 Twine(PALMD::AssemblerDirective)); 4232 } 4233 PALMetadata->setRegister(Key, Value); 4234 if (getLexer().isNot(AsmToken::Comma)) 4235 break; 4236 Lex(); 4237 } 4238 return false; 4239 } 4240 4241 /// ParseDirectiveAMDGPULDS 4242 /// ::= .amdgpu_lds identifier ',' size_expression [',' align_expression] 4243 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() { 4244 if (getParser().checkForValidSection()) 4245 return true; 4246 4247 StringRef Name; 4248 SMLoc NameLoc = getLexer().getLoc(); 4249 if (getParser().parseIdentifier(Name)) 4250 return TokError("expected identifier in directive"); 4251 4252 MCSymbol *Symbol = getContext().getOrCreateSymbol(Name); 4253 if (parseToken(AsmToken::Comma, "expected ','")) 4254 return true; 4255 4256 unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI()); 4257 4258 int64_t Size; 4259 SMLoc SizeLoc = getLexer().getLoc(); 4260 if (getParser().parseAbsoluteExpression(Size)) 4261 return true; 4262 if (Size < 0) 4263 return Error(SizeLoc, "size must be non-negative"); 4264 if (Size > LocalMemorySize) 4265 return Error(SizeLoc, "size is too large"); 4266 4267 int64_t Align = 4; 4268 if (getLexer().is(AsmToken::Comma)) { 4269 Lex(); 4270 SMLoc AlignLoc = getLexer().getLoc(); 4271 if (getParser().parseAbsoluteExpression(Align)) 4272 return true; 4273 if (Align < 0 || !isPowerOf2_64(Align)) 4274 return Error(AlignLoc, "alignment must be a power of two"); 4275 4276 // Alignment larger than the size of LDS is possible in theory, as long 4277 // as the linker manages to place to symbol at address 0, but we do want 4278 // to make sure the alignment fits nicely into a 32-bit integer. 4279 if (Align >= 1u << 31) 4280 return Error(AlignLoc, "alignment is too large"); 4281 } 4282 4283 if (parseToken(AsmToken::EndOfStatement, 4284 "unexpected token in '.amdgpu_lds' directive")) 4285 return true; 4286 4287 Symbol->redefineIfPossible(); 4288 if (!Symbol->isUndefined()) 4289 return Error(NameLoc, "invalid symbol redefinition"); 4290 4291 getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align); 4292 return false; 4293 } 4294 4295 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) { 4296 StringRef IDVal = DirectiveID.getString(); 4297 4298 if (AMDGPU::IsaInfo::hasCodeObjectV3(&getSTI())) { 4299 if (IDVal == ".amdgcn_target") 4300 return ParseDirectiveAMDGCNTarget(); 4301 4302 if (IDVal == ".amdhsa_kernel") 4303 return ParseDirectiveAMDHSAKernel(); 4304 4305 // TODO: Restructure/combine with PAL metadata directive. 4306 if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin) 4307 return ParseDirectiveHSAMetadata(); 4308 } else { 4309 if (IDVal == ".hsa_code_object_version") 4310 return ParseDirectiveHSACodeObjectVersion(); 4311 4312 if (IDVal == ".hsa_code_object_isa") 4313 return ParseDirectiveHSACodeObjectISA(); 4314 4315 if (IDVal == ".amd_kernel_code_t") 4316 return ParseDirectiveAMDKernelCodeT(); 4317 4318 if (IDVal == ".amdgpu_hsa_kernel") 4319 return ParseDirectiveAMDGPUHsaKernel(); 4320 4321 if (IDVal == ".amd_amdgpu_isa") 4322 return ParseDirectiveISAVersion(); 4323 4324 if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin) 4325 return ParseDirectiveHSAMetadata(); 4326 } 4327 4328 if (IDVal == ".amdgpu_lds") 4329 return ParseDirectiveAMDGPULDS(); 4330 4331 if (IDVal == PALMD::AssemblerDirectiveBegin) 4332 return ParseDirectivePALMetadataBegin(); 4333 4334 if (IDVal == PALMD::AssemblerDirective) 4335 return ParseDirectivePALMetadata(); 4336 4337 return true; 4338 } 4339 4340 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI, 4341 unsigned RegNo) const { 4342 4343 for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true); 4344 R.isValid(); ++R) { 4345 if (*R == RegNo) 4346 return isGFX9() || isGFX10(); 4347 } 4348 4349 // GFX10 has 2 more SGPRs 104 and 105. 4350 for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true); 4351 R.isValid(); ++R) { 4352 if (*R == RegNo) 4353 return hasSGPR104_SGPR105(); 4354 } 4355 4356 switch (RegNo) { 4357 case AMDGPU::SRC_SHARED_BASE: 4358 case AMDGPU::SRC_SHARED_LIMIT: 4359 case AMDGPU::SRC_PRIVATE_BASE: 4360 case AMDGPU::SRC_PRIVATE_LIMIT: 4361 case AMDGPU::SRC_POPS_EXITING_WAVE_ID: 4362 return !isCI() && !isSI() && !isVI(); 4363 case AMDGPU::TBA: 4364 case AMDGPU::TBA_LO: 4365 case AMDGPU::TBA_HI: 4366 case AMDGPU::TMA: 4367 case AMDGPU::TMA_LO: 4368 case AMDGPU::TMA_HI: 4369 return !isGFX9() && !isGFX10(); 4370 case AMDGPU::XNACK_MASK: 4371 case AMDGPU::XNACK_MASK_LO: 4372 case AMDGPU::XNACK_MASK_HI: 4373 return !isCI() && !isSI() && !isGFX10() && hasXNACK(); 4374 case AMDGPU::SGPR_NULL: 4375 return isGFX10(); 4376 default: 4377 break; 4378 } 4379 4380 if (isCI()) 4381 return true; 4382 4383 if (isSI() || isGFX10()) { 4384 // No flat_scr on SI. 4385 // On GFX10 flat scratch is not a valid register operand and can only be 4386 // accessed with s_setreg/s_getreg. 4387 switch (RegNo) { 4388 case AMDGPU::FLAT_SCR: 4389 case AMDGPU::FLAT_SCR_LO: 4390 case AMDGPU::FLAT_SCR_HI: 4391 return false; 4392 default: 4393 return true; 4394 } 4395 } 4396 4397 // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that 4398 // SI/CI have. 4399 for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true); 4400 R.isValid(); ++R) { 4401 if (*R == RegNo) 4402 return hasSGPR102_SGPR103(); 4403 } 4404 4405 return true; 4406 } 4407 4408 OperandMatchResultTy 4409 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic, 4410 OperandMode Mode) { 4411 // Try to parse with a custom parser 4412 OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic); 4413 4414 // If we successfully parsed the operand or if there as an error parsing, 4415 // we are done. 4416 // 4417 // If we are parsing after we reach EndOfStatement then this means we 4418 // are appending default values to the Operands list. This is only done 4419 // by custom parser, so we shouldn't continue on to the generic parsing. 4420 if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail || 4421 getLexer().is(AsmToken::EndOfStatement)) 4422 return ResTy; 4423 4424 if (Mode == OperandMode_NSA && getLexer().is(AsmToken::LBrac)) { 4425 unsigned Prefix = Operands.size(); 4426 SMLoc LBraceLoc = getTok().getLoc(); 4427 Parser.Lex(); // eat the '[' 4428 4429 for (;;) { 4430 ResTy = parseReg(Operands); 4431 if (ResTy != MatchOperand_Success) 4432 return ResTy; 4433 4434 if (getLexer().is(AsmToken::RBrac)) 4435 break; 4436 4437 if (getLexer().isNot(AsmToken::Comma)) 4438 return MatchOperand_ParseFail; 4439 Parser.Lex(); 4440 } 4441 4442 if (Operands.size() - Prefix > 1) { 4443 Operands.insert(Operands.begin() + Prefix, 4444 AMDGPUOperand::CreateToken(this, "[", LBraceLoc)); 4445 Operands.push_back(AMDGPUOperand::CreateToken(this, "]", 4446 getTok().getLoc())); 4447 } 4448 4449 Parser.Lex(); // eat the ']' 4450 return MatchOperand_Success; 4451 } 4452 4453 return parseRegOrImm(Operands); 4454 } 4455 4456 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) { 4457 // Clear any forced encodings from the previous instruction. 4458 setForcedEncodingSize(0); 4459 setForcedDPP(false); 4460 setForcedSDWA(false); 4461 4462 if (Name.endswith("_e64")) { 4463 setForcedEncodingSize(64); 4464 return Name.substr(0, Name.size() - 4); 4465 } else if (Name.endswith("_e32")) { 4466 setForcedEncodingSize(32); 4467 return Name.substr(0, Name.size() - 4); 4468 } else if (Name.endswith("_dpp")) { 4469 setForcedDPP(true); 4470 return Name.substr(0, Name.size() - 4); 4471 } else if (Name.endswith("_sdwa")) { 4472 setForcedSDWA(true); 4473 return Name.substr(0, Name.size() - 5); 4474 } 4475 return Name; 4476 } 4477 4478 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info, 4479 StringRef Name, 4480 SMLoc NameLoc, OperandVector &Operands) { 4481 // Add the instruction mnemonic 4482 Name = parseMnemonicSuffix(Name); 4483 Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc)); 4484 4485 bool IsMIMG = Name.startswith("image_"); 4486 4487 while (!getLexer().is(AsmToken::EndOfStatement)) { 4488 OperandMode Mode = OperandMode_Default; 4489 if (IsMIMG && isGFX10() && Operands.size() == 2) 4490 Mode = OperandMode_NSA; 4491 OperandMatchResultTy Res = parseOperand(Operands, Name, Mode); 4492 4493 // Eat the comma or space if there is one. 4494 if (getLexer().is(AsmToken::Comma)) 4495 Parser.Lex(); 4496 4497 switch (Res) { 4498 case MatchOperand_Success: break; 4499 case MatchOperand_ParseFail: 4500 // FIXME: use real operand location rather than the current location. 4501 Error(getLexer().getLoc(), "failed parsing operand."); 4502 while (!getLexer().is(AsmToken::EndOfStatement)) { 4503 Parser.Lex(); 4504 } 4505 return true; 4506 case MatchOperand_NoMatch: 4507 // FIXME: use real operand location rather than the current location. 4508 Error(getLexer().getLoc(), "not a valid operand."); 4509 while (!getLexer().is(AsmToken::EndOfStatement)) { 4510 Parser.Lex(); 4511 } 4512 return true; 4513 } 4514 } 4515 4516 return false; 4517 } 4518 4519 //===----------------------------------------------------------------------===// 4520 // Utility functions 4521 //===----------------------------------------------------------------------===// 4522 4523 OperandMatchResultTy 4524 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) { 4525 4526 if (!trySkipId(Prefix, AsmToken::Colon)) 4527 return MatchOperand_NoMatch; 4528 4529 return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail; 4530 } 4531 4532 OperandMatchResultTy 4533 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands, 4534 AMDGPUOperand::ImmTy ImmTy, 4535 bool (*ConvertResult)(int64_t&)) { 4536 SMLoc S = getLoc(); 4537 int64_t Value = 0; 4538 4539 OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value); 4540 if (Res != MatchOperand_Success) 4541 return Res; 4542 4543 if (ConvertResult && !ConvertResult(Value)) { 4544 Error(S, "invalid " + StringRef(Prefix) + " value."); 4545 } 4546 4547 Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy)); 4548 return MatchOperand_Success; 4549 } 4550 4551 OperandMatchResultTy 4552 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix, 4553 OperandVector &Operands, 4554 AMDGPUOperand::ImmTy ImmTy, 4555 bool (*ConvertResult)(int64_t&)) { 4556 SMLoc S = getLoc(); 4557 if (!trySkipId(Prefix, AsmToken::Colon)) 4558 return MatchOperand_NoMatch; 4559 4560 if (!skipToken(AsmToken::LBrac, "expected a left square bracket")) 4561 return MatchOperand_ParseFail; 4562 4563 unsigned Val = 0; 4564 const unsigned MaxSize = 4; 4565 4566 // FIXME: How to verify the number of elements matches the number of src 4567 // operands? 4568 for (int I = 0; ; ++I) { 4569 int64_t Op; 4570 SMLoc Loc = getLoc(); 4571 if (!parseExpr(Op)) 4572 return MatchOperand_ParseFail; 4573 4574 if (Op != 0 && Op != 1) { 4575 Error(Loc, "invalid " + StringRef(Prefix) + " value."); 4576 return MatchOperand_ParseFail; 4577 } 4578 4579 Val |= (Op << I); 4580 4581 if (trySkipToken(AsmToken::RBrac)) 4582 break; 4583 4584 if (I + 1 == MaxSize) { 4585 Error(getLoc(), "expected a closing square bracket"); 4586 return MatchOperand_ParseFail; 4587 } 4588 4589 if (!skipToken(AsmToken::Comma, "expected a comma")) 4590 return MatchOperand_ParseFail; 4591 } 4592 4593 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy)); 4594 return MatchOperand_Success; 4595 } 4596 4597 OperandMatchResultTy 4598 AMDGPUAsmParser::parseNamedBit(const char *Name, OperandVector &Operands, 4599 AMDGPUOperand::ImmTy ImmTy) { 4600 int64_t Bit = 0; 4601 SMLoc S = Parser.getTok().getLoc(); 4602 4603 // We are at the end of the statement, and this is a default argument, so 4604 // use a default value. 4605 if (getLexer().isNot(AsmToken::EndOfStatement)) { 4606 switch(getLexer().getKind()) { 4607 case AsmToken::Identifier: { 4608 StringRef Tok = Parser.getTok().getString(); 4609 if (Tok == Name) { 4610 if (Tok == "r128" && isGFX9()) 4611 Error(S, "r128 modifier is not supported on this GPU"); 4612 if (Tok == "a16" && !isGFX9() && !isGFX10()) 4613 Error(S, "a16 modifier is not supported on this GPU"); 4614 Bit = 1; 4615 Parser.Lex(); 4616 } else if (Tok.startswith("no") && Tok.endswith(Name)) { 4617 Bit = 0; 4618 Parser.Lex(); 4619 } else { 4620 return MatchOperand_NoMatch; 4621 } 4622 break; 4623 } 4624 default: 4625 return MatchOperand_NoMatch; 4626 } 4627 } 4628 4629 if (!isGFX10() && ImmTy == AMDGPUOperand::ImmTyDLC) 4630 return MatchOperand_ParseFail; 4631 4632 Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy)); 4633 return MatchOperand_Success; 4634 } 4635 4636 static void addOptionalImmOperand( 4637 MCInst& Inst, const OperandVector& Operands, 4638 AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx, 4639 AMDGPUOperand::ImmTy ImmT, 4640 int64_t Default = 0) { 4641 auto i = OptionalIdx.find(ImmT); 4642 if (i != OptionalIdx.end()) { 4643 unsigned Idx = i->second; 4644 ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1); 4645 } else { 4646 Inst.addOperand(MCOperand::createImm(Default)); 4647 } 4648 } 4649 4650 OperandMatchResultTy 4651 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) { 4652 if (getLexer().isNot(AsmToken::Identifier)) { 4653 return MatchOperand_NoMatch; 4654 } 4655 StringRef Tok = Parser.getTok().getString(); 4656 if (Tok != Prefix) { 4657 return MatchOperand_NoMatch; 4658 } 4659 4660 Parser.Lex(); 4661 if (getLexer().isNot(AsmToken::Colon)) { 4662 return MatchOperand_ParseFail; 4663 } 4664 4665 Parser.Lex(); 4666 if (getLexer().isNot(AsmToken::Identifier)) { 4667 return MatchOperand_ParseFail; 4668 } 4669 4670 Value = Parser.getTok().getString(); 4671 return MatchOperand_Success; 4672 } 4673 4674 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their 4675 // values to live in a joint format operand in the MCInst encoding. 4676 OperandMatchResultTy 4677 AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) { 4678 SMLoc S = Parser.getTok().getLoc(); 4679 int64_t Dfmt = 0, Nfmt = 0; 4680 // dfmt and nfmt can appear in either order, and each is optional. 4681 bool GotDfmt = false, GotNfmt = false; 4682 while (!GotDfmt || !GotNfmt) { 4683 if (!GotDfmt) { 4684 auto Res = parseIntWithPrefix("dfmt", Dfmt); 4685 if (Res != MatchOperand_NoMatch) { 4686 if (Res != MatchOperand_Success) 4687 return Res; 4688 if (Dfmt >= 16) { 4689 Error(Parser.getTok().getLoc(), "out of range dfmt"); 4690 return MatchOperand_ParseFail; 4691 } 4692 GotDfmt = true; 4693 Parser.Lex(); 4694 continue; 4695 } 4696 } 4697 if (!GotNfmt) { 4698 auto Res = parseIntWithPrefix("nfmt", Nfmt); 4699 if (Res != MatchOperand_NoMatch) { 4700 if (Res != MatchOperand_Success) 4701 return Res; 4702 if (Nfmt >= 8) { 4703 Error(Parser.getTok().getLoc(), "out of range nfmt"); 4704 return MatchOperand_ParseFail; 4705 } 4706 GotNfmt = true; 4707 Parser.Lex(); 4708 continue; 4709 } 4710 } 4711 break; 4712 } 4713 if (!GotDfmt && !GotNfmt) 4714 return MatchOperand_NoMatch; 4715 auto Format = Dfmt | Nfmt << 4; 4716 Operands.push_back( 4717 AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT)); 4718 return MatchOperand_Success; 4719 } 4720 4721 //===----------------------------------------------------------------------===// 4722 // ds 4723 //===----------------------------------------------------------------------===// 4724 4725 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst, 4726 const OperandVector &Operands) { 4727 OptionalImmIndexMap OptionalIdx; 4728 4729 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4730 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4731 4732 // Add the register arguments 4733 if (Op.isReg()) { 4734 Op.addRegOperands(Inst, 1); 4735 continue; 4736 } 4737 4738 // Handle optional arguments 4739 OptionalIdx[Op.getImmTy()] = i; 4740 } 4741 4742 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0); 4743 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1); 4744 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4745 4746 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4747 } 4748 4749 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands, 4750 bool IsGdsHardcoded) { 4751 OptionalImmIndexMap OptionalIdx; 4752 4753 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4754 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4755 4756 // Add the register arguments 4757 if (Op.isReg()) { 4758 Op.addRegOperands(Inst, 1); 4759 continue; 4760 } 4761 4762 if (Op.isToken() && Op.getToken() == "gds") { 4763 IsGdsHardcoded = true; 4764 continue; 4765 } 4766 4767 // Handle optional arguments 4768 OptionalIdx[Op.getImmTy()] = i; 4769 } 4770 4771 AMDGPUOperand::ImmTy OffsetType = 4772 (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 || 4773 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 || 4774 Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle : 4775 AMDGPUOperand::ImmTyOffset; 4776 4777 addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType); 4778 4779 if (!IsGdsHardcoded) { 4780 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS); 4781 } 4782 Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0 4783 } 4784 4785 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) { 4786 OptionalImmIndexMap OptionalIdx; 4787 4788 unsigned OperandIdx[4]; 4789 unsigned EnMask = 0; 4790 int SrcIdx = 0; 4791 4792 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 4793 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 4794 4795 // Add the register arguments 4796 if (Op.isReg()) { 4797 assert(SrcIdx < 4); 4798 OperandIdx[SrcIdx] = Inst.size(); 4799 Op.addRegOperands(Inst, 1); 4800 ++SrcIdx; 4801 continue; 4802 } 4803 4804 if (Op.isOff()) { 4805 assert(SrcIdx < 4); 4806 OperandIdx[SrcIdx] = Inst.size(); 4807 Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister)); 4808 ++SrcIdx; 4809 continue; 4810 } 4811 4812 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) { 4813 Op.addImmOperands(Inst, 1); 4814 continue; 4815 } 4816 4817 if (Op.isToken() && Op.getToken() == "done") 4818 continue; 4819 4820 // Handle optional arguments 4821 OptionalIdx[Op.getImmTy()] = i; 4822 } 4823 4824 assert(SrcIdx == 4); 4825 4826 bool Compr = false; 4827 if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) { 4828 Compr = true; 4829 Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]); 4830 Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister); 4831 Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister); 4832 } 4833 4834 for (auto i = 0; i < SrcIdx; ++i) { 4835 if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) { 4836 EnMask |= Compr? (0x3 << i * 2) : (0x1 << i); 4837 } 4838 } 4839 4840 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM); 4841 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr); 4842 4843 Inst.addOperand(MCOperand::createImm(EnMask)); 4844 } 4845 4846 //===----------------------------------------------------------------------===// 4847 // s_waitcnt 4848 //===----------------------------------------------------------------------===// 4849 4850 static bool 4851 encodeCnt( 4852 const AMDGPU::IsaVersion ISA, 4853 int64_t &IntVal, 4854 int64_t CntVal, 4855 bool Saturate, 4856 unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned), 4857 unsigned (*decode)(const IsaVersion &Version, unsigned)) 4858 { 4859 bool Failed = false; 4860 4861 IntVal = encode(ISA, IntVal, CntVal); 4862 if (CntVal != decode(ISA, IntVal)) { 4863 if (Saturate) { 4864 IntVal = encode(ISA, IntVal, -1); 4865 } else { 4866 Failed = true; 4867 } 4868 } 4869 return Failed; 4870 } 4871 4872 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) { 4873 4874 SMLoc CntLoc = getLoc(); 4875 StringRef CntName = getTokenStr(); 4876 4877 if (!skipToken(AsmToken::Identifier, "expected a counter name") || 4878 !skipToken(AsmToken::LParen, "expected a left parenthesis")) 4879 return false; 4880 4881 int64_t CntVal; 4882 SMLoc ValLoc = getLoc(); 4883 if (!parseExpr(CntVal)) 4884 return false; 4885 4886 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4887 4888 bool Failed = true; 4889 bool Sat = CntName.endswith("_sat"); 4890 4891 if (CntName == "vmcnt" || CntName == "vmcnt_sat") { 4892 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt); 4893 } else if (CntName == "expcnt" || CntName == "expcnt_sat") { 4894 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt); 4895 } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") { 4896 Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt); 4897 } else { 4898 Error(CntLoc, "invalid counter name " + CntName); 4899 return false; 4900 } 4901 4902 if (Failed) { 4903 Error(ValLoc, "too large value for " + CntName); 4904 return false; 4905 } 4906 4907 if (!skipToken(AsmToken::RParen, "expected a closing parenthesis")) 4908 return false; 4909 4910 if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) { 4911 if (isToken(AsmToken::EndOfStatement)) { 4912 Error(getLoc(), "expected a counter name"); 4913 return false; 4914 } 4915 } 4916 4917 return true; 4918 } 4919 4920 OperandMatchResultTy 4921 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) { 4922 AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU()); 4923 int64_t Waitcnt = getWaitcntBitMask(ISA); 4924 SMLoc S = getLoc(); 4925 4926 // If parse failed, do not return error code 4927 // to avoid excessive error messages. 4928 if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) { 4929 while (parseCnt(Waitcnt) && !isToken(AsmToken::EndOfStatement)); 4930 } else { 4931 parseExpr(Waitcnt); 4932 } 4933 4934 Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S)); 4935 return MatchOperand_Success; 4936 } 4937 4938 bool 4939 AMDGPUOperand::isSWaitCnt() const { 4940 return isImm(); 4941 } 4942 4943 //===----------------------------------------------------------------------===// 4944 // hwreg 4945 //===----------------------------------------------------------------------===// 4946 4947 bool 4948 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg, 4949 int64_t &Offset, 4950 int64_t &Width) { 4951 using namespace llvm::AMDGPU::Hwreg; 4952 4953 // The register may be specified by name or using a numeric code 4954 if (isToken(AsmToken::Identifier) && 4955 (HwReg.Id = getHwregId(getTokenStr())) >= 0) { 4956 HwReg.IsSymbolic = true; 4957 lex(); // skip message name 4958 } else if (!parseExpr(HwReg.Id)) { 4959 return false; 4960 } 4961 4962 if (trySkipToken(AsmToken::RParen)) 4963 return true; 4964 4965 // parse optional params 4966 return 4967 skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis") && 4968 parseExpr(Offset) && 4969 skipToken(AsmToken::Comma, "expected a comma") && 4970 parseExpr(Width) && 4971 skipToken(AsmToken::RParen, "expected a closing parenthesis"); 4972 } 4973 4974 bool 4975 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg, 4976 const int64_t Offset, 4977 const int64_t Width, 4978 const SMLoc Loc) { 4979 4980 using namespace llvm::AMDGPU::Hwreg; 4981 4982 if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) { 4983 Error(Loc, "specified hardware register is not supported on this GPU"); 4984 return false; 4985 } else if (!isValidHwreg(HwReg.Id)) { 4986 Error(Loc, "invalid code of hardware register: only 6-bit values are legal"); 4987 return false; 4988 } else if (!isValidHwregOffset(Offset)) { 4989 Error(Loc, "invalid bit offset: only 5-bit values are legal"); 4990 return false; 4991 } else if (!isValidHwregWidth(Width)) { 4992 Error(Loc, "invalid bitfield width: only values from 1 to 32 are legal"); 4993 return false; 4994 } 4995 return true; 4996 } 4997 4998 OperandMatchResultTy 4999 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) { 5000 using namespace llvm::AMDGPU::Hwreg; 5001 5002 int64_t ImmVal = 0; 5003 SMLoc Loc = getLoc(); 5004 5005 // If parse failed, do not return error code 5006 // to avoid excessive error messages. 5007 if (trySkipId("hwreg", AsmToken::LParen)) { 5008 OperandInfoTy HwReg(ID_UNKNOWN_); 5009 int64_t Offset = OFFSET_DEFAULT_; 5010 int64_t Width = WIDTH_DEFAULT_; 5011 if (parseHwregBody(HwReg, Offset, Width) && 5012 validateHwreg(HwReg, Offset, Width, Loc)) { 5013 ImmVal = encodeHwreg(HwReg.Id, Offset, Width); 5014 } 5015 } else if (parseExpr(ImmVal)) { 5016 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5017 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5018 } 5019 5020 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg)); 5021 return MatchOperand_Success; 5022 } 5023 5024 bool AMDGPUOperand::isHwreg() const { 5025 return isImmTy(ImmTyHwreg); 5026 } 5027 5028 //===----------------------------------------------------------------------===// 5029 // sendmsg 5030 //===----------------------------------------------------------------------===// 5031 5032 bool 5033 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg, 5034 OperandInfoTy &Op, 5035 OperandInfoTy &Stream) { 5036 using namespace llvm::AMDGPU::SendMsg; 5037 5038 if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) { 5039 Msg.IsSymbolic = true; 5040 lex(); // skip message name 5041 } else if (!parseExpr(Msg.Id)) { 5042 return false; 5043 } 5044 5045 if (trySkipToken(AsmToken::Comma)) { 5046 Op.IsDefined = true; 5047 if (isToken(AsmToken::Identifier) && 5048 (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) { 5049 lex(); // skip operation name 5050 } else if (!parseExpr(Op.Id)) { 5051 return false; 5052 } 5053 5054 if (trySkipToken(AsmToken::Comma)) { 5055 Stream.IsDefined = true; 5056 if (!parseExpr(Stream.Id)) 5057 return false; 5058 } 5059 } 5060 5061 return skipToken(AsmToken::RParen, "expected a closing parenthesis"); 5062 } 5063 5064 bool 5065 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg, 5066 const OperandInfoTy &Op, 5067 const OperandInfoTy &Stream, 5068 const SMLoc S) { 5069 using namespace llvm::AMDGPU::SendMsg; 5070 5071 // Validation strictness depends on whether message is specified 5072 // in a symbolc or in a numeric form. In the latter case 5073 // only encoding possibility is checked. 5074 bool Strict = Msg.IsSymbolic; 5075 5076 if (!isValidMsgId(Msg.Id, getSTI(), Strict)) { 5077 Error(S, "invalid message id"); 5078 return false; 5079 } else if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) { 5080 Error(S, Op.IsDefined ? 5081 "message does not support operations" : 5082 "missing message operation"); 5083 return false; 5084 } else if (!isValidMsgOp(Msg.Id, Op.Id, Strict)) { 5085 Error(S, "invalid operation id"); 5086 return false; 5087 } else if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) { 5088 Error(S, "message operation does not support streams"); 5089 return false; 5090 } else if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, Strict)) { 5091 Error(S, "invalid message stream id"); 5092 return false; 5093 } 5094 return true; 5095 } 5096 5097 OperandMatchResultTy 5098 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) { 5099 using namespace llvm::AMDGPU::SendMsg; 5100 5101 int64_t ImmVal = 0; 5102 SMLoc Loc = getLoc(); 5103 5104 // If parse failed, do not return error code 5105 // to avoid excessive error messages. 5106 if (trySkipId("sendmsg", AsmToken::LParen)) { 5107 OperandInfoTy Msg(ID_UNKNOWN_); 5108 OperandInfoTy Op(OP_NONE_); 5109 OperandInfoTy Stream(STREAM_ID_NONE_); 5110 if (parseSendMsgBody(Msg, Op, Stream) && 5111 validateSendMsg(Msg, Op, Stream, Loc)) { 5112 ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id); 5113 } 5114 } else if (parseExpr(ImmVal)) { 5115 if (ImmVal < 0 || !isUInt<16>(ImmVal)) 5116 Error(Loc, "invalid immediate: only 16-bit values are legal"); 5117 } 5118 5119 Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg)); 5120 return MatchOperand_Success; 5121 } 5122 5123 bool AMDGPUOperand::isSendMsg() const { 5124 return isImmTy(ImmTySendMsg); 5125 } 5126 5127 //===----------------------------------------------------------------------===// 5128 // v_interp 5129 //===----------------------------------------------------------------------===// 5130 5131 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) { 5132 if (getLexer().getKind() != AsmToken::Identifier) 5133 return MatchOperand_NoMatch; 5134 5135 StringRef Str = Parser.getTok().getString(); 5136 int Slot = StringSwitch<int>(Str) 5137 .Case("p10", 0) 5138 .Case("p20", 1) 5139 .Case("p0", 2) 5140 .Default(-1); 5141 5142 SMLoc S = Parser.getTok().getLoc(); 5143 if (Slot == -1) 5144 return MatchOperand_ParseFail; 5145 5146 Parser.Lex(); 5147 Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S, 5148 AMDGPUOperand::ImmTyInterpSlot)); 5149 return MatchOperand_Success; 5150 } 5151 5152 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) { 5153 if (getLexer().getKind() != AsmToken::Identifier) 5154 return MatchOperand_NoMatch; 5155 5156 StringRef Str = Parser.getTok().getString(); 5157 if (!Str.startswith("attr")) 5158 return MatchOperand_NoMatch; 5159 5160 StringRef Chan = Str.take_back(2); 5161 int AttrChan = StringSwitch<int>(Chan) 5162 .Case(".x", 0) 5163 .Case(".y", 1) 5164 .Case(".z", 2) 5165 .Case(".w", 3) 5166 .Default(-1); 5167 if (AttrChan == -1) 5168 return MatchOperand_ParseFail; 5169 5170 Str = Str.drop_back(2).drop_front(4); 5171 5172 uint8_t Attr; 5173 if (Str.getAsInteger(10, Attr)) 5174 return MatchOperand_ParseFail; 5175 5176 SMLoc S = Parser.getTok().getLoc(); 5177 Parser.Lex(); 5178 if (Attr > 63) { 5179 Error(S, "out of bounds attr"); 5180 return MatchOperand_Success; 5181 } 5182 5183 SMLoc SChan = SMLoc::getFromPointer(Chan.data()); 5184 5185 Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S, 5186 AMDGPUOperand::ImmTyInterpAttr)); 5187 Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan, 5188 AMDGPUOperand::ImmTyAttrChan)); 5189 return MatchOperand_Success; 5190 } 5191 5192 //===----------------------------------------------------------------------===// 5193 // exp 5194 //===----------------------------------------------------------------------===// 5195 5196 void AMDGPUAsmParser::errorExpTgt() { 5197 Error(Parser.getTok().getLoc(), "invalid exp target"); 5198 } 5199 5200 OperandMatchResultTy AMDGPUAsmParser::parseExpTgtImpl(StringRef Str, 5201 uint8_t &Val) { 5202 if (Str == "null") { 5203 Val = 9; 5204 return MatchOperand_Success; 5205 } 5206 5207 if (Str.startswith("mrt")) { 5208 Str = Str.drop_front(3); 5209 if (Str == "z") { // == mrtz 5210 Val = 8; 5211 return MatchOperand_Success; 5212 } 5213 5214 if (Str.getAsInteger(10, Val)) 5215 return MatchOperand_ParseFail; 5216 5217 if (Val > 7) 5218 errorExpTgt(); 5219 5220 return MatchOperand_Success; 5221 } 5222 5223 if (Str.startswith("pos")) { 5224 Str = Str.drop_front(3); 5225 if (Str.getAsInteger(10, Val)) 5226 return MatchOperand_ParseFail; 5227 5228 if (Val > 4 || (Val == 4 && !isGFX10())) 5229 errorExpTgt(); 5230 5231 Val += 12; 5232 return MatchOperand_Success; 5233 } 5234 5235 if (isGFX10() && Str == "prim") { 5236 Val = 20; 5237 return MatchOperand_Success; 5238 } 5239 5240 if (Str.startswith("param")) { 5241 Str = Str.drop_front(5); 5242 if (Str.getAsInteger(10, Val)) 5243 return MatchOperand_ParseFail; 5244 5245 if (Val >= 32) 5246 errorExpTgt(); 5247 5248 Val += 32; 5249 return MatchOperand_Success; 5250 } 5251 5252 if (Str.startswith("invalid_target_")) { 5253 Str = Str.drop_front(15); 5254 if (Str.getAsInteger(10, Val)) 5255 return MatchOperand_ParseFail; 5256 5257 errorExpTgt(); 5258 return MatchOperand_Success; 5259 } 5260 5261 return MatchOperand_NoMatch; 5262 } 5263 5264 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) { 5265 uint8_t Val; 5266 StringRef Str = Parser.getTok().getString(); 5267 5268 auto Res = parseExpTgtImpl(Str, Val); 5269 if (Res != MatchOperand_Success) 5270 return Res; 5271 5272 SMLoc S = Parser.getTok().getLoc(); 5273 Parser.Lex(); 5274 5275 Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, 5276 AMDGPUOperand::ImmTyExpTgt)); 5277 return MatchOperand_Success; 5278 } 5279 5280 //===----------------------------------------------------------------------===// 5281 // parser helpers 5282 //===----------------------------------------------------------------------===// 5283 5284 bool 5285 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const { 5286 return Token.is(AsmToken::Identifier) && Token.getString() == Id; 5287 } 5288 5289 bool 5290 AMDGPUAsmParser::isId(const StringRef Id) const { 5291 return isId(getToken(), Id); 5292 } 5293 5294 bool 5295 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const { 5296 return getTokenKind() == Kind; 5297 } 5298 5299 bool 5300 AMDGPUAsmParser::trySkipId(const StringRef Id) { 5301 if (isId(Id)) { 5302 lex(); 5303 return true; 5304 } 5305 return false; 5306 } 5307 5308 bool 5309 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) { 5310 if (isId(Id) && peekToken().is(Kind)) { 5311 lex(); 5312 lex(); 5313 return true; 5314 } 5315 return false; 5316 } 5317 5318 bool 5319 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) { 5320 if (isToken(Kind)) { 5321 lex(); 5322 return true; 5323 } 5324 return false; 5325 } 5326 5327 bool 5328 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind, 5329 const StringRef ErrMsg) { 5330 if (!trySkipToken(Kind)) { 5331 Error(getLoc(), ErrMsg); 5332 return false; 5333 } 5334 return true; 5335 } 5336 5337 bool 5338 AMDGPUAsmParser::parseExpr(int64_t &Imm) { 5339 return !getParser().parseAbsoluteExpression(Imm); 5340 } 5341 5342 bool 5343 AMDGPUAsmParser::parseExpr(OperandVector &Operands) { 5344 SMLoc S = getLoc(); 5345 5346 const MCExpr *Expr; 5347 if (Parser.parseExpression(Expr)) 5348 return false; 5349 5350 int64_t IntVal; 5351 if (Expr->evaluateAsAbsolute(IntVal)) { 5352 Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S)); 5353 } else { 5354 Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S)); 5355 } 5356 return true; 5357 } 5358 5359 bool 5360 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) { 5361 if (isToken(AsmToken::String)) { 5362 Val = getToken().getStringContents(); 5363 lex(); 5364 return true; 5365 } else { 5366 Error(getLoc(), ErrMsg); 5367 return false; 5368 } 5369 } 5370 5371 AsmToken 5372 AMDGPUAsmParser::getToken() const { 5373 return Parser.getTok(); 5374 } 5375 5376 AsmToken 5377 AMDGPUAsmParser::peekToken() { 5378 return getLexer().peekTok(); 5379 } 5380 5381 void 5382 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) { 5383 auto TokCount = getLexer().peekTokens(Tokens); 5384 5385 for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx) 5386 Tokens[Idx] = AsmToken(AsmToken::Error, ""); 5387 } 5388 5389 AsmToken::TokenKind 5390 AMDGPUAsmParser::getTokenKind() const { 5391 return getLexer().getKind(); 5392 } 5393 5394 SMLoc 5395 AMDGPUAsmParser::getLoc() const { 5396 return getToken().getLoc(); 5397 } 5398 5399 StringRef 5400 AMDGPUAsmParser::getTokenStr() const { 5401 return getToken().getString(); 5402 } 5403 5404 void 5405 AMDGPUAsmParser::lex() { 5406 Parser.Lex(); 5407 } 5408 5409 //===----------------------------------------------------------------------===// 5410 // swizzle 5411 //===----------------------------------------------------------------------===// 5412 5413 LLVM_READNONE 5414 static unsigned 5415 encodeBitmaskPerm(const unsigned AndMask, 5416 const unsigned OrMask, 5417 const unsigned XorMask) { 5418 using namespace llvm::AMDGPU::Swizzle; 5419 5420 return BITMASK_PERM_ENC | 5421 (AndMask << BITMASK_AND_SHIFT) | 5422 (OrMask << BITMASK_OR_SHIFT) | 5423 (XorMask << BITMASK_XOR_SHIFT); 5424 } 5425 5426 bool 5427 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op, 5428 const unsigned MinVal, 5429 const unsigned MaxVal, 5430 const StringRef ErrMsg) { 5431 for (unsigned i = 0; i < OpNum; ++i) { 5432 if (!skipToken(AsmToken::Comma, "expected a comma")){ 5433 return false; 5434 } 5435 SMLoc ExprLoc = Parser.getTok().getLoc(); 5436 if (!parseExpr(Op[i])) { 5437 return false; 5438 } 5439 if (Op[i] < MinVal || Op[i] > MaxVal) { 5440 Error(ExprLoc, ErrMsg); 5441 return false; 5442 } 5443 } 5444 5445 return true; 5446 } 5447 5448 bool 5449 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) { 5450 using namespace llvm::AMDGPU::Swizzle; 5451 5452 int64_t Lane[LANE_NUM]; 5453 if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX, 5454 "expected a 2-bit lane id")) { 5455 Imm = QUAD_PERM_ENC; 5456 for (unsigned I = 0; I < LANE_NUM; ++I) { 5457 Imm |= Lane[I] << (LANE_SHIFT * I); 5458 } 5459 return true; 5460 } 5461 return false; 5462 } 5463 5464 bool 5465 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) { 5466 using namespace llvm::AMDGPU::Swizzle; 5467 5468 SMLoc S = Parser.getTok().getLoc(); 5469 int64_t GroupSize; 5470 int64_t LaneIdx; 5471 5472 if (!parseSwizzleOperands(1, &GroupSize, 5473 2, 32, 5474 "group size must be in the interval [2,32]")) { 5475 return false; 5476 } 5477 if (!isPowerOf2_64(GroupSize)) { 5478 Error(S, "group size must be a power of two"); 5479 return false; 5480 } 5481 if (parseSwizzleOperands(1, &LaneIdx, 5482 0, GroupSize - 1, 5483 "lane id must be in the interval [0,group size - 1]")) { 5484 Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0); 5485 return true; 5486 } 5487 return false; 5488 } 5489 5490 bool 5491 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) { 5492 using namespace llvm::AMDGPU::Swizzle; 5493 5494 SMLoc S = Parser.getTok().getLoc(); 5495 int64_t GroupSize; 5496 5497 if (!parseSwizzleOperands(1, &GroupSize, 5498 2, 32, "group size must be in the interval [2,32]")) { 5499 return false; 5500 } 5501 if (!isPowerOf2_64(GroupSize)) { 5502 Error(S, "group size must be a power of two"); 5503 return false; 5504 } 5505 5506 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1); 5507 return true; 5508 } 5509 5510 bool 5511 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) { 5512 using namespace llvm::AMDGPU::Swizzle; 5513 5514 SMLoc S = Parser.getTok().getLoc(); 5515 int64_t GroupSize; 5516 5517 if (!parseSwizzleOperands(1, &GroupSize, 5518 1, 16, "group size must be in the interval [1,16]")) { 5519 return false; 5520 } 5521 if (!isPowerOf2_64(GroupSize)) { 5522 Error(S, "group size must be a power of two"); 5523 return false; 5524 } 5525 5526 Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize); 5527 return true; 5528 } 5529 5530 bool 5531 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) { 5532 using namespace llvm::AMDGPU::Swizzle; 5533 5534 if (!skipToken(AsmToken::Comma, "expected a comma")) { 5535 return false; 5536 } 5537 5538 StringRef Ctl; 5539 SMLoc StrLoc = Parser.getTok().getLoc(); 5540 if (!parseString(Ctl)) { 5541 return false; 5542 } 5543 if (Ctl.size() != BITMASK_WIDTH) { 5544 Error(StrLoc, "expected a 5-character mask"); 5545 return false; 5546 } 5547 5548 unsigned AndMask = 0; 5549 unsigned OrMask = 0; 5550 unsigned XorMask = 0; 5551 5552 for (size_t i = 0; i < Ctl.size(); ++i) { 5553 unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i); 5554 switch(Ctl[i]) { 5555 default: 5556 Error(StrLoc, "invalid mask"); 5557 return false; 5558 case '0': 5559 break; 5560 case '1': 5561 OrMask |= Mask; 5562 break; 5563 case 'p': 5564 AndMask |= Mask; 5565 break; 5566 case 'i': 5567 AndMask |= Mask; 5568 XorMask |= Mask; 5569 break; 5570 } 5571 } 5572 5573 Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask); 5574 return true; 5575 } 5576 5577 bool 5578 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) { 5579 5580 SMLoc OffsetLoc = Parser.getTok().getLoc(); 5581 5582 if (!parseExpr(Imm)) { 5583 return false; 5584 } 5585 if (!isUInt<16>(Imm)) { 5586 Error(OffsetLoc, "expected a 16-bit offset"); 5587 return false; 5588 } 5589 return true; 5590 } 5591 5592 bool 5593 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) { 5594 using namespace llvm::AMDGPU::Swizzle; 5595 5596 if (skipToken(AsmToken::LParen, "expected a left parentheses")) { 5597 5598 SMLoc ModeLoc = Parser.getTok().getLoc(); 5599 bool Ok = false; 5600 5601 if (trySkipId(IdSymbolic[ID_QUAD_PERM])) { 5602 Ok = parseSwizzleQuadPerm(Imm); 5603 } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) { 5604 Ok = parseSwizzleBitmaskPerm(Imm); 5605 } else if (trySkipId(IdSymbolic[ID_BROADCAST])) { 5606 Ok = parseSwizzleBroadcast(Imm); 5607 } else if (trySkipId(IdSymbolic[ID_SWAP])) { 5608 Ok = parseSwizzleSwap(Imm); 5609 } else if (trySkipId(IdSymbolic[ID_REVERSE])) { 5610 Ok = parseSwizzleReverse(Imm); 5611 } else { 5612 Error(ModeLoc, "expected a swizzle mode"); 5613 } 5614 5615 return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses"); 5616 } 5617 5618 return false; 5619 } 5620 5621 OperandMatchResultTy 5622 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) { 5623 SMLoc S = Parser.getTok().getLoc(); 5624 int64_t Imm = 0; 5625 5626 if (trySkipId("offset")) { 5627 5628 bool Ok = false; 5629 if (skipToken(AsmToken::Colon, "expected a colon")) { 5630 if (trySkipId("swizzle")) { 5631 Ok = parseSwizzleMacro(Imm); 5632 } else { 5633 Ok = parseSwizzleOffset(Imm); 5634 } 5635 } 5636 5637 Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle)); 5638 5639 return Ok? MatchOperand_Success : MatchOperand_ParseFail; 5640 } else { 5641 // Swizzle "offset" operand is optional. 5642 // If it is omitted, try parsing other optional operands. 5643 return parseOptionalOpr(Operands); 5644 } 5645 } 5646 5647 bool 5648 AMDGPUOperand::isSwizzle() const { 5649 return isImmTy(ImmTySwizzle); 5650 } 5651 5652 //===----------------------------------------------------------------------===// 5653 // VGPR Index Mode 5654 //===----------------------------------------------------------------------===// 5655 5656 int64_t AMDGPUAsmParser::parseGPRIdxMacro() { 5657 5658 using namespace llvm::AMDGPU::VGPRIndexMode; 5659 5660 if (trySkipToken(AsmToken::RParen)) { 5661 return OFF; 5662 } 5663 5664 int64_t Imm = 0; 5665 5666 while (true) { 5667 unsigned Mode = 0; 5668 SMLoc S = Parser.getTok().getLoc(); 5669 5670 for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) { 5671 if (trySkipId(IdSymbolic[ModeId])) { 5672 Mode = 1 << ModeId; 5673 break; 5674 } 5675 } 5676 5677 if (Mode == 0) { 5678 Error(S, (Imm == 0)? 5679 "expected a VGPR index mode or a closing parenthesis" : 5680 "expected a VGPR index mode"); 5681 break; 5682 } 5683 5684 if (Imm & Mode) { 5685 Error(S, "duplicate VGPR index mode"); 5686 break; 5687 } 5688 Imm |= Mode; 5689 5690 if (trySkipToken(AsmToken::RParen)) 5691 break; 5692 if (!skipToken(AsmToken::Comma, 5693 "expected a comma or a closing parenthesis")) 5694 break; 5695 } 5696 5697 return Imm; 5698 } 5699 5700 OperandMatchResultTy 5701 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) { 5702 5703 int64_t Imm = 0; 5704 SMLoc S = Parser.getTok().getLoc(); 5705 5706 if (getLexer().getKind() == AsmToken::Identifier && 5707 Parser.getTok().getString() == "gpr_idx" && 5708 getLexer().peekTok().is(AsmToken::LParen)) { 5709 5710 Parser.Lex(); 5711 Parser.Lex(); 5712 5713 // If parse failed, trigger an error but do not return error code 5714 // to avoid excessive error messages. 5715 Imm = parseGPRIdxMacro(); 5716 5717 } else { 5718 if (getParser().parseAbsoluteExpression(Imm)) 5719 return MatchOperand_NoMatch; 5720 if (Imm < 0 || !isUInt<4>(Imm)) { 5721 Error(S, "invalid immediate: only 4-bit values are legal"); 5722 } 5723 } 5724 5725 Operands.push_back( 5726 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode)); 5727 return MatchOperand_Success; 5728 } 5729 5730 bool AMDGPUOperand::isGPRIdxMode() const { 5731 return isImmTy(ImmTyGprIdxMode); 5732 } 5733 5734 //===----------------------------------------------------------------------===// 5735 // sopp branch targets 5736 //===----------------------------------------------------------------------===// 5737 5738 OperandMatchResultTy 5739 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) { 5740 5741 // Make sure we are not parsing something 5742 // that looks like a label or an expression but is not. 5743 // This will improve error messages. 5744 if (isRegister() || isModifier()) 5745 return MatchOperand_NoMatch; 5746 5747 if (parseExpr(Operands)) { 5748 5749 AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]); 5750 assert(Opr.isImm() || Opr.isExpr()); 5751 SMLoc Loc = Opr.getStartLoc(); 5752 5753 // Currently we do not support arbitrary expressions as branch targets. 5754 // Only labels and absolute expressions are accepted. 5755 if (Opr.isExpr() && !Opr.isSymbolRefExpr()) { 5756 Error(Loc, "expected an absolute expression or a label"); 5757 } else if (Opr.isImm() && !Opr.isS16Imm()) { 5758 Error(Loc, "expected a 16-bit signed jump offset"); 5759 } 5760 } 5761 5762 return MatchOperand_Success; // avoid excessive error messages 5763 } 5764 5765 //===----------------------------------------------------------------------===// 5766 // Boolean holding registers 5767 //===----------------------------------------------------------------------===// 5768 5769 OperandMatchResultTy 5770 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) { 5771 return parseReg(Operands); 5772 } 5773 5774 //===----------------------------------------------------------------------===// 5775 // mubuf 5776 //===----------------------------------------------------------------------===// 5777 5778 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultDLC() const { 5779 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDLC); 5780 } 5781 5782 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultGLC() const { 5783 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyGLC); 5784 } 5785 5786 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSLC() const { 5787 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTySLC); 5788 } 5789 5790 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst, 5791 const OperandVector &Operands, 5792 bool IsAtomic, 5793 bool IsAtomicReturn, 5794 bool IsLds) { 5795 bool IsLdsOpcode = IsLds; 5796 bool HasLdsModifier = false; 5797 OptionalImmIndexMap OptionalIdx; 5798 assert(IsAtomicReturn ? IsAtomic : true); 5799 unsigned FirstOperandIdx = 1; 5800 5801 for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) { 5802 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5803 5804 // Add the register arguments 5805 if (Op.isReg()) { 5806 Op.addRegOperands(Inst, 1); 5807 // Insert a tied src for atomic return dst. 5808 // This cannot be postponed as subsequent calls to 5809 // addImmOperands rely on correct number of MC operands. 5810 if (IsAtomicReturn && i == FirstOperandIdx) 5811 Op.addRegOperands(Inst, 1); 5812 continue; 5813 } 5814 5815 // Handle the case where soffset is an immediate 5816 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5817 Op.addImmOperands(Inst, 1); 5818 continue; 5819 } 5820 5821 HasLdsModifier |= Op.isLDS(); 5822 5823 // Handle tokens like 'offen' which are sometimes hard-coded into the 5824 // asm string. There are no MCInst operands for these. 5825 if (Op.isToken()) { 5826 continue; 5827 } 5828 assert(Op.isImm()); 5829 5830 // Handle optional arguments 5831 OptionalIdx[Op.getImmTy()] = i; 5832 } 5833 5834 // This is a workaround for an llvm quirk which may result in an 5835 // incorrect instruction selection. Lds and non-lds versions of 5836 // MUBUF instructions are identical except that lds versions 5837 // have mandatory 'lds' modifier. However this modifier follows 5838 // optional modifiers and llvm asm matcher regards this 'lds' 5839 // modifier as an optional one. As a result, an lds version 5840 // of opcode may be selected even if it has no 'lds' modifier. 5841 if (IsLdsOpcode && !HasLdsModifier) { 5842 int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode()); 5843 if (NoLdsOpcode != -1) { // Got lds version - correct it. 5844 Inst.setOpcode(NoLdsOpcode); 5845 IsLdsOpcode = false; 5846 } 5847 } 5848 5849 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset); 5850 if (!IsAtomic) { // glc is hard-coded. 5851 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5852 } 5853 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5854 5855 if (!IsLdsOpcode) { // tfe is not legal with lds opcodes 5856 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5857 } 5858 5859 if (isGFX10()) 5860 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5861 } 5862 5863 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) { 5864 OptionalImmIndexMap OptionalIdx; 5865 5866 for (unsigned i = 1, e = Operands.size(); i != e; ++i) { 5867 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]); 5868 5869 // Add the register arguments 5870 if (Op.isReg()) { 5871 Op.addRegOperands(Inst, 1); 5872 continue; 5873 } 5874 5875 // Handle the case where soffset is an immediate 5876 if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) { 5877 Op.addImmOperands(Inst, 1); 5878 continue; 5879 } 5880 5881 // Handle tokens like 'offen' which are sometimes hard-coded into the 5882 // asm string. There are no MCInst operands for these. 5883 if (Op.isToken()) { 5884 continue; 5885 } 5886 assert(Op.isImm()); 5887 5888 // Handle optional arguments 5889 OptionalIdx[Op.getImmTy()] = i; 5890 } 5891 5892 addOptionalImmOperand(Inst, Operands, OptionalIdx, 5893 AMDGPUOperand::ImmTyOffset); 5894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT); 5895 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5896 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5897 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5898 5899 if (isGFX10()) 5900 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5901 } 5902 5903 //===----------------------------------------------------------------------===// 5904 // mimg 5905 //===----------------------------------------------------------------------===// 5906 5907 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands, 5908 bool IsAtomic) { 5909 unsigned I = 1; 5910 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 5911 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 5912 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 5913 } 5914 5915 if (IsAtomic) { 5916 // Add src, same as dst 5917 assert(Desc.getNumDefs() == 1); 5918 ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1); 5919 } 5920 5921 OptionalImmIndexMap OptionalIdx; 5922 5923 for (unsigned E = Operands.size(); I != E; ++I) { 5924 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 5925 5926 // Add the register arguments 5927 if (Op.isReg()) { 5928 Op.addRegOperands(Inst, 1); 5929 } else if (Op.isImmModifier()) { 5930 OptionalIdx[Op.getImmTy()] = I; 5931 } else if (!Op.isToken()) { 5932 llvm_unreachable("unexpected operand type"); 5933 } 5934 } 5935 5936 bool IsGFX10 = isGFX10(); 5937 5938 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask); 5939 if (IsGFX10) 5940 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1); 5941 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm); 5942 if (IsGFX10) 5943 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDLC); 5944 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGLC); 5945 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySLC); 5946 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16); 5947 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE); 5948 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE); 5949 if (!IsGFX10) 5950 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA); 5951 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16); 5952 } 5953 5954 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) { 5955 cvtMIMG(Inst, Operands, true); 5956 } 5957 5958 //===----------------------------------------------------------------------===// 5959 // smrd 5960 //===----------------------------------------------------------------------===// 5961 5962 bool AMDGPUOperand::isSMRDOffset8() const { 5963 return isImm() && isUInt<8>(getImm()); 5964 } 5965 5966 bool AMDGPUOperand::isSMRDOffset20() const { 5967 return isImm() && isUInt<20>(getImm()); 5968 } 5969 5970 bool AMDGPUOperand::isSMRDLiteralOffset() const { 5971 // 32-bit literals are only supported on CI and we only want to use them 5972 // when the offset is > 8-bits. 5973 return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm()); 5974 } 5975 5976 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const { 5977 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5978 } 5979 5980 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset20() const { 5981 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5982 } 5983 5984 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const { 5985 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5986 } 5987 5988 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const { 5989 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset); 5990 } 5991 5992 //===----------------------------------------------------------------------===// 5993 // vop3 5994 //===----------------------------------------------------------------------===// 5995 5996 static bool ConvertOmodMul(int64_t &Mul) { 5997 if (Mul != 1 && Mul != 2 && Mul != 4) 5998 return false; 5999 6000 Mul >>= 1; 6001 return true; 6002 } 6003 6004 static bool ConvertOmodDiv(int64_t &Div) { 6005 if (Div == 1) { 6006 Div = 0; 6007 return true; 6008 } 6009 6010 if (Div == 2) { 6011 Div = 3; 6012 return true; 6013 } 6014 6015 return false; 6016 } 6017 6018 static bool ConvertBoundCtrl(int64_t &BoundCtrl) { 6019 if (BoundCtrl == 0) { 6020 BoundCtrl = 1; 6021 return true; 6022 } 6023 6024 if (BoundCtrl == -1) { 6025 BoundCtrl = 0; 6026 return true; 6027 } 6028 6029 return false; 6030 } 6031 6032 // Note: the order in this table matches the order of operands in AsmString. 6033 static const OptionalOperand AMDGPUOptionalOperandTable[] = { 6034 {"offen", AMDGPUOperand::ImmTyOffen, true, nullptr}, 6035 {"idxen", AMDGPUOperand::ImmTyIdxen, true, nullptr}, 6036 {"addr64", AMDGPUOperand::ImmTyAddr64, true, nullptr}, 6037 {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr}, 6038 {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr}, 6039 {"gds", AMDGPUOperand::ImmTyGDS, true, nullptr}, 6040 {"lds", AMDGPUOperand::ImmTyLDS, true, nullptr}, 6041 {"offset", AMDGPUOperand::ImmTyOffset, false, nullptr}, 6042 {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr}, 6043 {"dlc", AMDGPUOperand::ImmTyDLC, true, nullptr}, 6044 {"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr}, 6045 {"glc", AMDGPUOperand::ImmTyGLC, true, nullptr}, 6046 {"slc", AMDGPUOperand::ImmTySLC, true, nullptr}, 6047 {"swz", AMDGPUOperand::ImmTySWZ, true, nullptr}, 6048 {"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr}, 6049 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6050 {"high", AMDGPUOperand::ImmTyHigh, true, nullptr}, 6051 {"clamp", AMDGPUOperand::ImmTyClampSI, true, nullptr}, 6052 {"omod", AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul}, 6053 {"unorm", AMDGPUOperand::ImmTyUNorm, true, nullptr}, 6054 {"da", AMDGPUOperand::ImmTyDA, true, nullptr}, 6055 {"r128", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6056 {"a16", AMDGPUOperand::ImmTyR128A16, true, nullptr}, 6057 {"lwe", AMDGPUOperand::ImmTyLWE, true, nullptr}, 6058 {"d16", AMDGPUOperand::ImmTyD16, true, nullptr}, 6059 {"dmask", AMDGPUOperand::ImmTyDMask, false, nullptr}, 6060 {"dim", AMDGPUOperand::ImmTyDim, false, nullptr}, 6061 {"row_mask", AMDGPUOperand::ImmTyDppRowMask, false, nullptr}, 6062 {"bank_mask", AMDGPUOperand::ImmTyDppBankMask, false, nullptr}, 6063 {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl}, 6064 {"fi", AMDGPUOperand::ImmTyDppFi, false, nullptr}, 6065 {"dst_sel", AMDGPUOperand::ImmTySdwaDstSel, false, nullptr}, 6066 {"src0_sel", AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr}, 6067 {"src1_sel", AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr}, 6068 {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr}, 6069 {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr }, 6070 {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr}, 6071 {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr}, 6072 {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr}, 6073 {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr}, 6074 {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr}, 6075 {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr}, 6076 {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr}, 6077 {"abid", AMDGPUOperand::ImmTyABID, false, nullptr} 6078 }; 6079 6080 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) { 6081 6082 OperandMatchResultTy res = parseOptionalOpr(Operands); 6083 6084 // This is a hack to enable hardcoded mandatory operands which follow 6085 // optional operands. 6086 // 6087 // Current design assumes that all operands after the first optional operand 6088 // are also optional. However implementation of some instructions violates 6089 // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands). 6090 // 6091 // To alleviate this problem, we have to (implicitly) parse extra operands 6092 // to make sure autogenerated parser of custom operands never hit hardcoded 6093 // mandatory operands. 6094 6095 for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) { 6096 if (res != MatchOperand_Success || 6097 isToken(AsmToken::EndOfStatement)) 6098 break; 6099 6100 trySkipToken(AsmToken::Comma); 6101 res = parseOptionalOpr(Operands); 6102 } 6103 6104 return res; 6105 } 6106 6107 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) { 6108 OperandMatchResultTy res; 6109 for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) { 6110 // try to parse any optional operand here 6111 if (Op.IsBit) { 6112 res = parseNamedBit(Op.Name, Operands, Op.Type); 6113 } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) { 6114 res = parseOModOperand(Operands); 6115 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel || 6116 Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel || 6117 Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) { 6118 res = parseSDWASel(Operands, Op.Name, Op.Type); 6119 } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) { 6120 res = parseSDWADstUnused(Operands); 6121 } else if (Op.Type == AMDGPUOperand::ImmTyOpSel || 6122 Op.Type == AMDGPUOperand::ImmTyOpSelHi || 6123 Op.Type == AMDGPUOperand::ImmTyNegLo || 6124 Op.Type == AMDGPUOperand::ImmTyNegHi) { 6125 res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type, 6126 Op.ConvertResult); 6127 } else if (Op.Type == AMDGPUOperand::ImmTyDim) { 6128 res = parseDim(Operands); 6129 } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) { 6130 res = parseDfmtNfmt(Operands); 6131 } else { 6132 res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult); 6133 } 6134 if (res != MatchOperand_NoMatch) { 6135 return res; 6136 } 6137 } 6138 return MatchOperand_NoMatch; 6139 } 6140 6141 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) { 6142 StringRef Name = Parser.getTok().getString(); 6143 if (Name == "mul") { 6144 return parseIntWithPrefix("mul", Operands, 6145 AMDGPUOperand::ImmTyOModSI, ConvertOmodMul); 6146 } 6147 6148 if (Name == "div") { 6149 return parseIntWithPrefix("div", Operands, 6150 AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv); 6151 } 6152 6153 return MatchOperand_NoMatch; 6154 } 6155 6156 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) { 6157 cvtVOP3P(Inst, Operands); 6158 6159 int Opc = Inst.getOpcode(); 6160 6161 int SrcNum; 6162 const int Ops[] = { AMDGPU::OpName::src0, 6163 AMDGPU::OpName::src1, 6164 AMDGPU::OpName::src2 }; 6165 for (SrcNum = 0; 6166 SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1; 6167 ++SrcNum); 6168 assert(SrcNum > 0); 6169 6170 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6171 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6172 6173 if ((OpSel & (1 << SrcNum)) != 0) { 6174 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers); 6175 uint32_t ModVal = Inst.getOperand(ModIdx).getImm(); 6176 Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL); 6177 } 6178 } 6179 6180 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) { 6181 // 1. This operand is input modifiers 6182 return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS 6183 // 2. This is not last operand 6184 && Desc.NumOperands > (OpNum + 1) 6185 // 3. Next operand is register class 6186 && Desc.OpInfo[OpNum + 1].RegClass != -1 6187 // 4. Next register is not tied to any other operand 6188 && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1; 6189 } 6190 6191 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands) 6192 { 6193 OptionalImmIndexMap OptionalIdx; 6194 unsigned Opc = Inst.getOpcode(); 6195 6196 unsigned I = 1; 6197 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6198 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6199 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6200 } 6201 6202 for (unsigned E = Operands.size(); I != E; ++I) { 6203 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6204 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6205 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6206 } else if (Op.isInterpSlot() || 6207 Op.isInterpAttr() || 6208 Op.isAttrChan()) { 6209 Inst.addOperand(MCOperand::createImm(Op.getImm())); 6210 } else if (Op.isImmModifier()) { 6211 OptionalIdx[Op.getImmTy()] = I; 6212 } else { 6213 llvm_unreachable("unhandled operand type"); 6214 } 6215 } 6216 6217 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) { 6218 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh); 6219 } 6220 6221 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6222 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6223 } 6224 6225 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6226 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6227 } 6228 } 6229 6230 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands, 6231 OptionalImmIndexMap &OptionalIdx) { 6232 unsigned Opc = Inst.getOpcode(); 6233 6234 unsigned I = 1; 6235 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6236 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6237 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6238 } 6239 6240 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) { 6241 // This instruction has src modifiers 6242 for (unsigned E = Operands.size(); I != E; ++I) { 6243 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6244 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6245 Op.addRegOrImmWithFPInputModsOperands(Inst, 2); 6246 } else if (Op.isImmModifier()) { 6247 OptionalIdx[Op.getImmTy()] = I; 6248 } else if (Op.isRegOrImm()) { 6249 Op.addRegOrImmOperands(Inst, 1); 6250 } else { 6251 llvm_unreachable("unhandled operand type"); 6252 } 6253 } 6254 } else { 6255 // No src modifiers 6256 for (unsigned E = Operands.size(); I != E; ++I) { 6257 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6258 if (Op.isMod()) { 6259 OptionalIdx[Op.getImmTy()] = I; 6260 } else { 6261 Op.addRegOrImmOperands(Inst, 1); 6262 } 6263 } 6264 } 6265 6266 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) { 6267 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI); 6268 } 6269 6270 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) { 6271 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI); 6272 } 6273 6274 // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+): 6275 // it has src2 register operand that is tied to dst operand 6276 // we don't allow modifiers for this operand in assembler so src2_modifiers 6277 // should be 0. 6278 if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 || 6279 Opc == AMDGPU::V_MAC_F32_e64_gfx10 || 6280 Opc == AMDGPU::V_MAC_F32_e64_vi || 6281 Opc == AMDGPU::V_MAC_F16_e64_vi || 6282 Opc == AMDGPU::V_FMAC_F32_e64_gfx10 || 6283 Opc == AMDGPU::V_FMAC_F32_e64_vi || 6284 Opc == AMDGPU::V_FMAC_F16_e64_gfx10) { 6285 auto it = Inst.begin(); 6286 std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers)); 6287 it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2 6288 ++it; 6289 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6290 } 6291 } 6292 6293 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) { 6294 OptionalImmIndexMap OptionalIdx; 6295 cvtVOP3(Inst, Operands, OptionalIdx); 6296 } 6297 6298 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, 6299 const OperandVector &Operands) { 6300 OptionalImmIndexMap OptIdx; 6301 const int Opc = Inst.getOpcode(); 6302 const MCInstrDesc &Desc = MII.get(Opc); 6303 6304 const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0; 6305 6306 cvtVOP3(Inst, Operands, OptIdx); 6307 6308 if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) { 6309 assert(!IsPacked); 6310 Inst.addOperand(Inst.getOperand(0)); 6311 } 6312 6313 // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3 6314 // instruction, and then figure out where to actually put the modifiers 6315 6316 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel); 6317 6318 int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi); 6319 if (OpSelHiIdx != -1) { 6320 int DefaultVal = IsPacked ? -1 : 0; 6321 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi, 6322 DefaultVal); 6323 } 6324 6325 int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo); 6326 if (NegLoIdx != -1) { 6327 assert(IsPacked); 6328 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo); 6329 addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi); 6330 } 6331 6332 const int Ops[] = { AMDGPU::OpName::src0, 6333 AMDGPU::OpName::src1, 6334 AMDGPU::OpName::src2 }; 6335 const int ModOps[] = { AMDGPU::OpName::src0_modifiers, 6336 AMDGPU::OpName::src1_modifiers, 6337 AMDGPU::OpName::src2_modifiers }; 6338 6339 int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel); 6340 6341 unsigned OpSel = Inst.getOperand(OpSelIdx).getImm(); 6342 unsigned OpSelHi = 0; 6343 unsigned NegLo = 0; 6344 unsigned NegHi = 0; 6345 6346 if (OpSelHiIdx != -1) { 6347 OpSelHi = Inst.getOperand(OpSelHiIdx).getImm(); 6348 } 6349 6350 if (NegLoIdx != -1) { 6351 int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi); 6352 NegLo = Inst.getOperand(NegLoIdx).getImm(); 6353 NegHi = Inst.getOperand(NegHiIdx).getImm(); 6354 } 6355 6356 for (int J = 0; J < 3; ++J) { 6357 int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]); 6358 if (OpIdx == -1) 6359 break; 6360 6361 uint32_t ModVal = 0; 6362 6363 if ((OpSel & (1 << J)) != 0) 6364 ModVal |= SISrcMods::OP_SEL_0; 6365 6366 if ((OpSelHi & (1 << J)) != 0) 6367 ModVal |= SISrcMods::OP_SEL_1; 6368 6369 if ((NegLo & (1 << J)) != 0) 6370 ModVal |= SISrcMods::NEG; 6371 6372 if ((NegHi & (1 << J)) != 0) 6373 ModVal |= SISrcMods::NEG_HI; 6374 6375 int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]); 6376 6377 Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal); 6378 } 6379 } 6380 6381 //===----------------------------------------------------------------------===// 6382 // dpp 6383 //===----------------------------------------------------------------------===// 6384 6385 bool AMDGPUOperand::isDPP8() const { 6386 return isImmTy(ImmTyDPP8); 6387 } 6388 6389 bool AMDGPUOperand::isDPPCtrl() const { 6390 using namespace AMDGPU::DPP; 6391 6392 bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm()); 6393 if (result) { 6394 int64_t Imm = getImm(); 6395 return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) || 6396 (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) || 6397 (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) || 6398 (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) || 6399 (Imm == DppCtrl::WAVE_SHL1) || 6400 (Imm == DppCtrl::WAVE_ROL1) || 6401 (Imm == DppCtrl::WAVE_SHR1) || 6402 (Imm == DppCtrl::WAVE_ROR1) || 6403 (Imm == DppCtrl::ROW_MIRROR) || 6404 (Imm == DppCtrl::ROW_HALF_MIRROR) || 6405 (Imm == DppCtrl::BCAST15) || 6406 (Imm == DppCtrl::BCAST31) || 6407 (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) || 6408 (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST); 6409 } 6410 return false; 6411 } 6412 6413 //===----------------------------------------------------------------------===// 6414 // mAI 6415 //===----------------------------------------------------------------------===// 6416 6417 bool AMDGPUOperand::isBLGP() const { 6418 return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm()); 6419 } 6420 6421 bool AMDGPUOperand::isCBSZ() const { 6422 return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm()); 6423 } 6424 6425 bool AMDGPUOperand::isABID() const { 6426 return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm()); 6427 } 6428 6429 bool AMDGPUOperand::isS16Imm() const { 6430 return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm())); 6431 } 6432 6433 bool AMDGPUOperand::isU16Imm() const { 6434 return isImm() && isUInt<16>(getImm()); 6435 } 6436 6437 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) { 6438 if (!isGFX10()) 6439 return MatchOperand_NoMatch; 6440 6441 SMLoc S = Parser.getTok().getLoc(); 6442 6443 if (getLexer().isNot(AsmToken::Identifier)) 6444 return MatchOperand_NoMatch; 6445 if (getLexer().getTok().getString() != "dim") 6446 return MatchOperand_NoMatch; 6447 6448 Parser.Lex(); 6449 if (getLexer().isNot(AsmToken::Colon)) 6450 return MatchOperand_ParseFail; 6451 6452 Parser.Lex(); 6453 6454 // We want to allow "dim:1D" etc., but the initial 1 is tokenized as an 6455 // integer. 6456 std::string Token; 6457 if (getLexer().is(AsmToken::Integer)) { 6458 SMLoc Loc = getLexer().getTok().getEndLoc(); 6459 Token = getLexer().getTok().getString(); 6460 Parser.Lex(); 6461 if (getLexer().getTok().getLoc() != Loc) 6462 return MatchOperand_ParseFail; 6463 } 6464 if (getLexer().isNot(AsmToken::Identifier)) 6465 return MatchOperand_ParseFail; 6466 Token += getLexer().getTok().getString(); 6467 6468 StringRef DimId = Token; 6469 if (DimId.startswith("SQ_RSRC_IMG_")) 6470 DimId = DimId.substr(12); 6471 6472 const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId); 6473 if (!DimInfo) 6474 return MatchOperand_ParseFail; 6475 6476 Parser.Lex(); 6477 6478 Operands.push_back(AMDGPUOperand::CreateImm(this, DimInfo->Encoding, S, 6479 AMDGPUOperand::ImmTyDim)); 6480 return MatchOperand_Success; 6481 } 6482 6483 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) { 6484 SMLoc S = Parser.getTok().getLoc(); 6485 StringRef Prefix; 6486 6487 if (getLexer().getKind() == AsmToken::Identifier) { 6488 Prefix = Parser.getTok().getString(); 6489 } else { 6490 return MatchOperand_NoMatch; 6491 } 6492 6493 if (Prefix != "dpp8") 6494 return parseDPPCtrl(Operands); 6495 if (!isGFX10()) 6496 return MatchOperand_NoMatch; 6497 6498 // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d] 6499 6500 int64_t Sels[8]; 6501 6502 Parser.Lex(); 6503 if (getLexer().isNot(AsmToken::Colon)) 6504 return MatchOperand_ParseFail; 6505 6506 Parser.Lex(); 6507 if (getLexer().isNot(AsmToken::LBrac)) 6508 return MatchOperand_ParseFail; 6509 6510 Parser.Lex(); 6511 if (getParser().parseAbsoluteExpression(Sels[0])) 6512 return MatchOperand_ParseFail; 6513 if (0 > Sels[0] || 7 < Sels[0]) 6514 return MatchOperand_ParseFail; 6515 6516 for (size_t i = 1; i < 8; ++i) { 6517 if (getLexer().isNot(AsmToken::Comma)) 6518 return MatchOperand_ParseFail; 6519 6520 Parser.Lex(); 6521 if (getParser().parseAbsoluteExpression(Sels[i])) 6522 return MatchOperand_ParseFail; 6523 if (0 > Sels[i] || 7 < Sels[i]) 6524 return MatchOperand_ParseFail; 6525 } 6526 6527 if (getLexer().isNot(AsmToken::RBrac)) 6528 return MatchOperand_ParseFail; 6529 Parser.Lex(); 6530 6531 unsigned DPP8 = 0; 6532 for (size_t i = 0; i < 8; ++i) 6533 DPP8 |= (Sels[i] << (i * 3)); 6534 6535 Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8)); 6536 return MatchOperand_Success; 6537 } 6538 6539 OperandMatchResultTy 6540 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) { 6541 using namespace AMDGPU::DPP; 6542 6543 SMLoc S = Parser.getTok().getLoc(); 6544 StringRef Prefix; 6545 int64_t Int; 6546 6547 if (getLexer().getKind() == AsmToken::Identifier) { 6548 Prefix = Parser.getTok().getString(); 6549 } else { 6550 return MatchOperand_NoMatch; 6551 } 6552 6553 if (Prefix == "row_mirror") { 6554 Int = DppCtrl::ROW_MIRROR; 6555 Parser.Lex(); 6556 } else if (Prefix == "row_half_mirror") { 6557 Int = DppCtrl::ROW_HALF_MIRROR; 6558 Parser.Lex(); 6559 } else { 6560 // Check to prevent parseDPPCtrlOps from eating invalid tokens 6561 if (Prefix != "quad_perm" 6562 && Prefix != "row_shl" 6563 && Prefix != "row_shr" 6564 && Prefix != "row_ror" 6565 && Prefix != "wave_shl" 6566 && Prefix != "wave_rol" 6567 && Prefix != "wave_shr" 6568 && Prefix != "wave_ror" 6569 && Prefix != "row_bcast" 6570 && Prefix != "row_share" 6571 && Prefix != "row_xmask") { 6572 return MatchOperand_NoMatch; 6573 } 6574 6575 if (!isGFX10() && (Prefix == "row_share" || Prefix == "row_xmask")) 6576 return MatchOperand_NoMatch; 6577 6578 if (!isVI() && !isGFX9() && 6579 (Prefix == "wave_shl" || Prefix == "wave_shr" || 6580 Prefix == "wave_rol" || Prefix == "wave_ror" || 6581 Prefix == "row_bcast")) 6582 return MatchOperand_NoMatch; 6583 6584 Parser.Lex(); 6585 if (getLexer().isNot(AsmToken::Colon)) 6586 return MatchOperand_ParseFail; 6587 6588 if (Prefix == "quad_perm") { 6589 // quad_perm:[%d,%d,%d,%d] 6590 Parser.Lex(); 6591 if (getLexer().isNot(AsmToken::LBrac)) 6592 return MatchOperand_ParseFail; 6593 Parser.Lex(); 6594 6595 if (getParser().parseAbsoluteExpression(Int) || !(0 <= Int && Int <=3)) 6596 return MatchOperand_ParseFail; 6597 6598 for (int i = 0; i < 3; ++i) { 6599 if (getLexer().isNot(AsmToken::Comma)) 6600 return MatchOperand_ParseFail; 6601 Parser.Lex(); 6602 6603 int64_t Temp; 6604 if (getParser().parseAbsoluteExpression(Temp) || !(0 <= Temp && Temp <=3)) 6605 return MatchOperand_ParseFail; 6606 const int shift = i*2 + 2; 6607 Int += (Temp << shift); 6608 } 6609 6610 if (getLexer().isNot(AsmToken::RBrac)) 6611 return MatchOperand_ParseFail; 6612 Parser.Lex(); 6613 } else { 6614 // sel:%d 6615 Parser.Lex(); 6616 if (getParser().parseAbsoluteExpression(Int)) 6617 return MatchOperand_ParseFail; 6618 6619 if (Prefix == "row_shl" && 1 <= Int && Int <= 15) { 6620 Int |= DppCtrl::ROW_SHL0; 6621 } else if (Prefix == "row_shr" && 1 <= Int && Int <= 15) { 6622 Int |= DppCtrl::ROW_SHR0; 6623 } else if (Prefix == "row_ror" && 1 <= Int && Int <= 15) { 6624 Int |= DppCtrl::ROW_ROR0; 6625 } else if (Prefix == "wave_shl" && 1 == Int) { 6626 Int = DppCtrl::WAVE_SHL1; 6627 } else if (Prefix == "wave_rol" && 1 == Int) { 6628 Int = DppCtrl::WAVE_ROL1; 6629 } else if (Prefix == "wave_shr" && 1 == Int) { 6630 Int = DppCtrl::WAVE_SHR1; 6631 } else if (Prefix == "wave_ror" && 1 == Int) { 6632 Int = DppCtrl::WAVE_ROR1; 6633 } else if (Prefix == "row_bcast") { 6634 if (Int == 15) { 6635 Int = DppCtrl::BCAST15; 6636 } else if (Int == 31) { 6637 Int = DppCtrl::BCAST31; 6638 } else { 6639 return MatchOperand_ParseFail; 6640 } 6641 } else if (Prefix == "row_share" && 0 <= Int && Int <= 15) { 6642 Int |= DppCtrl::ROW_SHARE_FIRST; 6643 } else if (Prefix == "row_xmask" && 0 <= Int && Int <= 15) { 6644 Int |= DppCtrl::ROW_XMASK_FIRST; 6645 } else { 6646 return MatchOperand_ParseFail; 6647 } 6648 } 6649 } 6650 6651 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTyDppCtrl)); 6652 return MatchOperand_Success; 6653 } 6654 6655 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const { 6656 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask); 6657 } 6658 6659 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const { 6660 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm); 6661 } 6662 6663 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const { 6664 return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask); 6665 } 6666 6667 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const { 6668 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl); 6669 } 6670 6671 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const { 6672 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi); 6673 } 6674 6675 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) { 6676 OptionalImmIndexMap OptionalIdx; 6677 6678 unsigned I = 1; 6679 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6680 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6681 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6682 } 6683 6684 int Fi = 0; 6685 for (unsigned E = Operands.size(); I != E; ++I) { 6686 auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(), 6687 MCOI::TIED_TO); 6688 if (TiedTo != -1) { 6689 assert((unsigned)TiedTo < Inst.getNumOperands()); 6690 // handle tied old or src2 for MAC instructions 6691 Inst.addOperand(Inst.getOperand(TiedTo)); 6692 } 6693 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6694 // Add the register arguments 6695 if (Op.isReg() && validateVccOperand(Op.getReg())) { 6696 // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token. 6697 // Skip it. 6698 continue; 6699 } 6700 6701 if (IsDPP8) { 6702 if (Op.isDPP8()) { 6703 Op.addImmOperands(Inst, 1); 6704 } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6705 Op.addRegWithFPInputModsOperands(Inst, 2); 6706 } else if (Op.isFI()) { 6707 Fi = Op.getImm(); 6708 } else if (Op.isReg()) { 6709 Op.addRegOperands(Inst, 1); 6710 } else { 6711 llvm_unreachable("Invalid operand type"); 6712 } 6713 } else { 6714 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6715 Op.addRegWithFPInputModsOperands(Inst, 2); 6716 } else if (Op.isDPPCtrl()) { 6717 Op.addImmOperands(Inst, 1); 6718 } else if (Op.isImm()) { 6719 // Handle optional arguments 6720 OptionalIdx[Op.getImmTy()] = I; 6721 } else { 6722 llvm_unreachable("Invalid operand type"); 6723 } 6724 } 6725 } 6726 6727 if (IsDPP8) { 6728 using namespace llvm::AMDGPU::DPP; 6729 Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0)); 6730 } else { 6731 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf); 6732 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf); 6733 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl); 6734 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) { 6735 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi); 6736 } 6737 } 6738 } 6739 6740 //===----------------------------------------------------------------------===// 6741 // sdwa 6742 //===----------------------------------------------------------------------===// 6743 6744 OperandMatchResultTy 6745 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix, 6746 AMDGPUOperand::ImmTy Type) { 6747 using namespace llvm::AMDGPU::SDWA; 6748 6749 SMLoc S = Parser.getTok().getLoc(); 6750 StringRef Value; 6751 OperandMatchResultTy res; 6752 6753 res = parseStringWithPrefix(Prefix, Value); 6754 if (res != MatchOperand_Success) { 6755 return res; 6756 } 6757 6758 int64_t Int; 6759 Int = StringSwitch<int64_t>(Value) 6760 .Case("BYTE_0", SdwaSel::BYTE_0) 6761 .Case("BYTE_1", SdwaSel::BYTE_1) 6762 .Case("BYTE_2", SdwaSel::BYTE_2) 6763 .Case("BYTE_3", SdwaSel::BYTE_3) 6764 .Case("WORD_0", SdwaSel::WORD_0) 6765 .Case("WORD_1", SdwaSel::WORD_1) 6766 .Case("DWORD", SdwaSel::DWORD) 6767 .Default(0xffffffff); 6768 Parser.Lex(); // eat last token 6769 6770 if (Int == 0xffffffff) { 6771 return MatchOperand_ParseFail; 6772 } 6773 6774 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type)); 6775 return MatchOperand_Success; 6776 } 6777 6778 OperandMatchResultTy 6779 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) { 6780 using namespace llvm::AMDGPU::SDWA; 6781 6782 SMLoc S = Parser.getTok().getLoc(); 6783 StringRef Value; 6784 OperandMatchResultTy res; 6785 6786 res = parseStringWithPrefix("dst_unused", Value); 6787 if (res != MatchOperand_Success) { 6788 return res; 6789 } 6790 6791 int64_t Int; 6792 Int = StringSwitch<int64_t>(Value) 6793 .Case("UNUSED_PAD", DstUnused::UNUSED_PAD) 6794 .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT) 6795 .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE) 6796 .Default(0xffffffff); 6797 Parser.Lex(); // eat last token 6798 6799 if (Int == 0xffffffff) { 6800 return MatchOperand_ParseFail; 6801 } 6802 6803 Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused)); 6804 return MatchOperand_Success; 6805 } 6806 6807 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) { 6808 cvtSDWA(Inst, Operands, SIInstrFlags::VOP1); 6809 } 6810 6811 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) { 6812 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2); 6813 } 6814 6815 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) { 6816 cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true); 6817 } 6818 6819 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) { 6820 cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI()); 6821 } 6822 6823 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands, 6824 uint64_t BasicInstType, bool skipVcc) { 6825 using namespace llvm::AMDGPU::SDWA; 6826 6827 OptionalImmIndexMap OptionalIdx; 6828 bool skippedVcc = false; 6829 6830 unsigned I = 1; 6831 const MCInstrDesc &Desc = MII.get(Inst.getOpcode()); 6832 for (unsigned J = 0; J < Desc.getNumDefs(); ++J) { 6833 ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1); 6834 } 6835 6836 for (unsigned E = Operands.size(); I != E; ++I) { 6837 AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]); 6838 if (skipVcc && !skippedVcc && Op.isReg() && 6839 (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) { 6840 // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst. 6841 // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3) 6842 // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand. 6843 // Skip VCC only if we didn't skip it on previous iteration. 6844 if (BasicInstType == SIInstrFlags::VOP2 && 6845 (Inst.getNumOperands() == 1 || Inst.getNumOperands() == 5)) { 6846 skippedVcc = true; 6847 continue; 6848 } else if (BasicInstType == SIInstrFlags::VOPC && 6849 Inst.getNumOperands() == 0) { 6850 skippedVcc = true; 6851 continue; 6852 } 6853 } 6854 if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) { 6855 Op.addRegOrImmWithInputModsOperands(Inst, 2); 6856 } else if (Op.isImm()) { 6857 // Handle optional arguments 6858 OptionalIdx[Op.getImmTy()] = I; 6859 } else { 6860 llvm_unreachable("Invalid operand type"); 6861 } 6862 skippedVcc = false; 6863 } 6864 6865 if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 && 6866 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 && 6867 Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) { 6868 // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments 6869 switch (BasicInstType) { 6870 case SIInstrFlags::VOP1: 6871 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6872 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6873 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6874 } 6875 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6876 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6877 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6878 break; 6879 6880 case SIInstrFlags::VOP2: 6881 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6882 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) { 6883 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0); 6884 } 6885 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD); 6886 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE); 6887 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6888 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6889 break; 6890 6891 case SIInstrFlags::VOPC: 6892 if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1) 6893 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0); 6894 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD); 6895 addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD); 6896 break; 6897 6898 default: 6899 llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed"); 6900 } 6901 } 6902 6903 // special case v_mac_{f16, f32}: 6904 // it has src2 register operand that is tied to dst operand 6905 if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi || 6906 Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) { 6907 auto it = Inst.begin(); 6908 std::advance( 6909 it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2)); 6910 Inst.insert(it, Inst.getOperand(0)); // src2 = dst 6911 } 6912 } 6913 6914 //===----------------------------------------------------------------------===// 6915 // mAI 6916 //===----------------------------------------------------------------------===// 6917 6918 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const { 6919 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP); 6920 } 6921 6922 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const { 6923 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ); 6924 } 6925 6926 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const { 6927 return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID); 6928 } 6929 6930 /// Force static initialization. 6931 extern "C" void LLVMInitializeAMDGPUAsmParser() { 6932 RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget()); 6933 RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget()); 6934 } 6935 6936 #define GET_REGISTER_MATCHER 6937 #define GET_MATCHER_IMPLEMENTATION 6938 #define GET_MNEMONIC_SPELL_CHECKER 6939 #include "AMDGPUGenAsmMatcher.inc" 6940 6941 // This fuction should be defined after auto-generated include so that we have 6942 // MatchClassKind enum defined 6943 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op, 6944 unsigned Kind) { 6945 // Tokens like "glc" would be parsed as immediate operands in ParseOperand(). 6946 // But MatchInstructionImpl() expects to meet token and fails to validate 6947 // operand. This method checks if we are given immediate operand but expect to 6948 // get corresponding token. 6949 AMDGPUOperand &Operand = (AMDGPUOperand&)Op; 6950 switch (Kind) { 6951 case MCK_addr64: 6952 return Operand.isAddr64() ? Match_Success : Match_InvalidOperand; 6953 case MCK_gds: 6954 return Operand.isGDS() ? Match_Success : Match_InvalidOperand; 6955 case MCK_lds: 6956 return Operand.isLDS() ? Match_Success : Match_InvalidOperand; 6957 case MCK_glc: 6958 return Operand.isGLC() ? Match_Success : Match_InvalidOperand; 6959 case MCK_idxen: 6960 return Operand.isIdxen() ? Match_Success : Match_InvalidOperand; 6961 case MCK_offen: 6962 return Operand.isOffen() ? Match_Success : Match_InvalidOperand; 6963 case MCK_SSrcB32: 6964 // When operands have expression values, they will return true for isToken, 6965 // because it is not possible to distinguish between a token and an 6966 // expression at parse time. MatchInstructionImpl() will always try to 6967 // match an operand as a token, when isToken returns true, and when the 6968 // name of the expression is not a valid token, the match will fail, 6969 // so we need to handle it here. 6970 return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand; 6971 case MCK_SSrcF32: 6972 return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand; 6973 case MCK_SoppBrTarget: 6974 return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand; 6975 case MCK_VReg32OrOff: 6976 return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand; 6977 case MCK_InterpSlot: 6978 return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand; 6979 case MCK_Attr: 6980 return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand; 6981 case MCK_AttrChan: 6982 return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand; 6983 case MCK_SReg_64: 6984 case MCK_SReg_64_XEXEC: 6985 // Null is defined as a 32-bit register but 6986 // it should also be enabled with 64-bit operands. 6987 // The following code enables it for SReg_64 operands 6988 // used as source and destination. Remaining source 6989 // operands are handled in isInlinableImm. 6990 return Operand.isNull() ? Match_Success : Match_InvalidOperand; 6991 default: 6992 return Match_InvalidOperand; 6993 } 6994 } 6995 6996 //===----------------------------------------------------------------------===// 6997 // endpgm 6998 //===----------------------------------------------------------------------===// 6999 7000 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) { 7001 SMLoc S = Parser.getTok().getLoc(); 7002 int64_t Imm = 0; 7003 7004 if (!parseExpr(Imm)) { 7005 // The operand is optional, if not present default to 0 7006 Imm = 0; 7007 } 7008 7009 if (!isUInt<16>(Imm)) { 7010 Error(S, "expected a 16-bit value"); 7011 return MatchOperand_ParseFail; 7012 } 7013 7014 Operands.push_back( 7015 AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm)); 7016 return MatchOperand_Success; 7017 } 7018 7019 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); } 7020