1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "AMDGPUAsmUtils.h" 12 #include "AMDKernelCodeT.h" 13 #include "GCNSubtarget.h" 14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/IR/Attributes.h" 17 #include "llvm/IR/Function.h" 18 #include "llvm/IR/GlobalValue.h" 19 #include "llvm/IR/IntrinsicsAMDGPU.h" 20 #include "llvm/IR/IntrinsicsR600.h" 21 #include "llvm/IR/LLVMContext.h" 22 #include "llvm/MC/MCSubtargetInfo.h" 23 #include "llvm/Support/AMDHSAKernelDescriptor.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/TargetParser.h" 26 27 #define GET_INSTRINFO_NAMED_OPS 28 #define GET_INSTRMAP_INFO 29 #include "AMDGPUGenInstrInfo.inc" 30 31 static llvm::cl::opt<unsigned> AmdhsaCodeObjectVersion( 32 "amdhsa-code-object-version", llvm::cl::Hidden, 33 llvm::cl::desc("AMDHSA Code Object Version"), llvm::cl::init(4), 34 llvm::cl::ZeroOrMore); 35 36 namespace { 37 38 /// \returns Bit mask for given bit \p Shift and bit \p Width. 39 unsigned getBitMask(unsigned Shift, unsigned Width) { 40 return ((1 << Width) - 1) << Shift; 41 } 42 43 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 44 /// 45 /// \returns Packed \p Dst. 46 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 47 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 48 Dst |= (Src << Shift) & getBitMask(Shift, Width); 49 return Dst; 50 } 51 52 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Unpacked bits. 55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 56 return (Src & getBitMask(Shift, Width)) >> Shift; 57 } 58 59 /// \returns Vmcnt bit shift (lower bits). 60 unsigned getVmcntBitShiftLo() { return 0; } 61 62 /// \returns Vmcnt bit width (lower bits). 63 unsigned getVmcntBitWidthLo() { return 4; } 64 65 /// \returns Expcnt bit shift. 66 unsigned getExpcntBitShift() { return 4; } 67 68 /// \returns Expcnt bit width. 69 unsigned getExpcntBitWidth() { return 3; } 70 71 /// \returns Lgkmcnt bit shift. 72 unsigned getLgkmcntBitShift() { return 8; } 73 74 /// \returns Lgkmcnt bit width. 75 unsigned getLgkmcntBitWidth(unsigned VersionMajor) { 76 return (VersionMajor >= 10) ? 6 : 4; 77 } 78 79 /// \returns Vmcnt bit shift (higher bits). 80 unsigned getVmcntBitShiftHi() { return 14; } 81 82 /// \returns Vmcnt bit width (higher bits). 83 unsigned getVmcntBitWidthHi() { return 2; } 84 85 } // end namespace anonymous 86 87 namespace llvm { 88 89 namespace AMDGPU { 90 91 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) { 92 if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA) 93 return None; 94 95 switch (AmdhsaCodeObjectVersion) { 96 case 2: 97 return ELF::ELFABIVERSION_AMDGPU_HSA_V2; 98 case 3: 99 return ELF::ELFABIVERSION_AMDGPU_HSA_V3; 100 case 4: 101 return ELF::ELFABIVERSION_AMDGPU_HSA_V4; 102 default: 103 report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") + 104 Twine(AmdhsaCodeObjectVersion)); 105 } 106 } 107 108 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) { 109 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 110 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2; 111 return false; 112 } 113 114 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) { 115 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 116 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3; 117 return false; 118 } 119 120 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) { 121 if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI)) 122 return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4; 123 return false; 124 } 125 126 bool isHsaAbiVersion3Or4(const MCSubtargetInfo *STI) { 127 return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI); 128 } 129 130 #define GET_MIMGBaseOpcodesTable_IMPL 131 #define GET_MIMGDimInfoTable_IMPL 132 #define GET_MIMGInfoTable_IMPL 133 #define GET_MIMGLZMappingTable_IMPL 134 #define GET_MIMGMIPMappingTable_IMPL 135 #define GET_MIMGG16MappingTable_IMPL 136 #include "AMDGPUGenSearchableTables.inc" 137 138 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, 139 unsigned VDataDwords, unsigned VAddrDwords) { 140 const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding, 141 VDataDwords, VAddrDwords); 142 return Info ? Info->Opcode : -1; 143 } 144 145 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) { 146 const MIMGInfo *Info = getMIMGInfo(Opc); 147 return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr; 148 } 149 150 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) { 151 const MIMGInfo *OrigInfo = getMIMGInfo(Opc); 152 const MIMGInfo *NewInfo = 153 getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding, 154 NewChannels, OrigInfo->VAddrDwords); 155 return NewInfo ? NewInfo->Opcode : -1; 156 } 157 158 struct MUBUFInfo { 159 uint16_t Opcode; 160 uint16_t BaseOpcode; 161 uint8_t elements; 162 bool has_vaddr; 163 bool has_srsrc; 164 bool has_soffset; 165 }; 166 167 struct MTBUFInfo { 168 uint16_t Opcode; 169 uint16_t BaseOpcode; 170 uint8_t elements; 171 bool has_vaddr; 172 bool has_srsrc; 173 bool has_soffset; 174 }; 175 176 struct SMInfo { 177 uint16_t Opcode; 178 bool IsBuffer; 179 }; 180 181 #define GET_MTBUFInfoTable_DECL 182 #define GET_MTBUFInfoTable_IMPL 183 #define GET_MUBUFInfoTable_DECL 184 #define GET_MUBUFInfoTable_IMPL 185 #define GET_SMInfoTable_DECL 186 #define GET_SMInfoTable_IMPL 187 #include "AMDGPUGenSearchableTables.inc" 188 189 int getMTBUFBaseOpcode(unsigned Opc) { 190 const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc); 191 return Info ? Info->BaseOpcode : -1; 192 } 193 194 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) { 195 const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 196 return Info ? Info->Opcode : -1; 197 } 198 199 int getMTBUFElements(unsigned Opc) { 200 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 201 return Info ? Info->elements : 0; 202 } 203 204 bool getMTBUFHasVAddr(unsigned Opc) { 205 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 206 return Info ? Info->has_vaddr : false; 207 } 208 209 bool getMTBUFHasSrsrc(unsigned Opc) { 210 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 211 return Info ? Info->has_srsrc : false; 212 } 213 214 bool getMTBUFHasSoffset(unsigned Opc) { 215 const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc); 216 return Info ? Info->has_soffset : false; 217 } 218 219 int getMUBUFBaseOpcode(unsigned Opc) { 220 const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc); 221 return Info ? Info->BaseOpcode : -1; 222 } 223 224 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) { 225 const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements); 226 return Info ? Info->Opcode : -1; 227 } 228 229 int getMUBUFElements(unsigned Opc) { 230 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 231 return Info ? Info->elements : 0; 232 } 233 234 bool getMUBUFHasVAddr(unsigned Opc) { 235 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 236 return Info ? Info->has_vaddr : false; 237 } 238 239 bool getMUBUFHasSrsrc(unsigned Opc) { 240 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 241 return Info ? Info->has_srsrc : false; 242 } 243 244 bool getMUBUFHasSoffset(unsigned Opc) { 245 const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc); 246 return Info ? Info->has_soffset : false; 247 } 248 249 bool getSMEMIsBuffer(unsigned Opc) { 250 const SMInfo *Info = getSMEMOpcodeHelper(Opc); 251 return Info ? Info->IsBuffer : false; 252 } 253 254 // Wrapper for Tablegen'd function. enum Subtarget is not defined in any 255 // header files, so we need to wrap it in a function that takes unsigned 256 // instead. 257 int getMCOpcode(uint16_t Opcode, unsigned Gen) { 258 return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen)); 259 } 260 261 namespace IsaInfo { 262 263 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI) 264 : STI(STI), XnackSetting(TargetIDSetting::Any), 265 SramEccSetting(TargetIDSetting::Any) { 266 if (!STI.getFeatureBits().test(FeatureSupportsXNACK)) 267 XnackSetting = TargetIDSetting::Unsupported; 268 if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC)) 269 SramEccSetting = TargetIDSetting::Unsupported; 270 } 271 272 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) { 273 // Check if xnack or sramecc is explicitly enabled or disabled. In the 274 // absence of the target features we assume we must generate code that can run 275 // in any environment. 276 SubtargetFeatures Features(FS); 277 Optional<bool> XnackRequested; 278 Optional<bool> SramEccRequested; 279 280 for (const std::string &Feature : Features.getFeatures()) { 281 if (Feature == "+xnack") 282 XnackRequested = true; 283 else if (Feature == "-xnack") 284 XnackRequested = false; 285 else if (Feature == "+sramecc") 286 SramEccRequested = true; 287 else if (Feature == "-sramecc") 288 SramEccRequested = false; 289 } 290 291 bool XnackSupported = isXnackSupported(); 292 bool SramEccSupported = isSramEccSupported(); 293 294 if (XnackRequested) { 295 if (XnackSupported) { 296 XnackSetting = 297 *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off; 298 } else { 299 // If a specific xnack setting was requested and this GPU does not support 300 // xnack emit a warning. Setting will remain set to "Unsupported". 301 if (*XnackRequested) { 302 errs() << "warning: xnack 'On' was requested for a processor that does " 303 "not support it!\n"; 304 } else { 305 errs() << "warning: xnack 'Off' was requested for a processor that " 306 "does not support it!\n"; 307 } 308 } 309 } 310 311 if (SramEccRequested) { 312 if (SramEccSupported) { 313 SramEccSetting = 314 *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off; 315 } else { 316 // If a specific sramecc setting was requested and this GPU does not 317 // support sramecc emit a warning. Setting will remain set to 318 // "Unsupported". 319 if (*SramEccRequested) { 320 errs() << "warning: sramecc 'On' was requested for a processor that " 321 "does not support it!\n"; 322 } else { 323 errs() << "warning: sramecc 'Off' was requested for a processor that " 324 "does not support it!\n"; 325 } 326 } 327 } 328 } 329 330 static TargetIDSetting 331 getTargetIDSettingFromFeatureString(StringRef FeatureString) { 332 if (FeatureString.endswith("-")) 333 return TargetIDSetting::Off; 334 if (FeatureString.endswith("+")) 335 return TargetIDSetting::On; 336 337 llvm_unreachable("Malformed feature string"); 338 } 339 340 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) { 341 SmallVector<StringRef, 3> TargetIDSplit; 342 TargetID.split(TargetIDSplit, ':'); 343 344 for (const auto &FeatureString : TargetIDSplit) { 345 if (FeatureString.startswith("xnack")) 346 XnackSetting = getTargetIDSettingFromFeatureString(FeatureString); 347 if (FeatureString.startswith("sramecc")) 348 SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString); 349 } 350 } 351 352 std::string AMDGPUTargetID::toString() const { 353 std::string StringRep = ""; 354 raw_string_ostream StreamRep(StringRep); 355 356 auto TargetTriple = STI.getTargetTriple(); 357 auto Version = getIsaVersion(STI.getCPU()); 358 359 StreamRep << TargetTriple.getArchName() << '-' 360 << TargetTriple.getVendorName() << '-' 361 << TargetTriple.getOSName() << '-' 362 << TargetTriple.getEnvironmentName() << '-'; 363 364 std::string Processor = ""; 365 // TODO: Following else statement is present here because we used various 366 // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803'). 367 // Remove once all aliases are removed from GCNProcessors.td. 368 if (Version.Major >= 9) 369 Processor = STI.getCPU().str(); 370 else 371 Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) + 372 Twine(Version.Stepping)) 373 .str(); 374 375 std::string Features = ""; 376 if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) { 377 switch (*HsaAbiVersion) { 378 case ELF::ELFABIVERSION_AMDGPU_HSA_V2: 379 // Code object V2 only supported specific processors and had fixed 380 // settings for the XNACK. 381 if (Processor == "gfx600") { 382 } else if (Processor == "gfx601") { 383 } else if (Processor == "gfx602") { 384 } else if (Processor == "gfx700") { 385 } else if (Processor == "gfx701") { 386 } else if (Processor == "gfx702") { 387 } else if (Processor == "gfx703") { 388 } else if (Processor == "gfx704") { 389 } else if (Processor == "gfx705") { 390 } else if (Processor == "gfx801") { 391 if (!isXnackOnOrAny()) 392 report_fatal_error( 393 "AMD GPU code object V2 does not support processor " + Processor + 394 " without XNACK"); 395 } else if (Processor == "gfx802") { 396 } else if (Processor == "gfx803") { 397 } else if (Processor == "gfx805") { 398 } else if (Processor == "gfx810") { 399 if (!isXnackOnOrAny()) 400 report_fatal_error( 401 "AMD GPU code object V2 does not support processor " + Processor + 402 " without XNACK"); 403 } else if (Processor == "gfx900") { 404 if (isXnackOnOrAny()) 405 Processor = "gfx901"; 406 } else if (Processor == "gfx902") { 407 if (isXnackOnOrAny()) 408 Processor = "gfx903"; 409 } else if (Processor == "gfx904") { 410 if (isXnackOnOrAny()) 411 Processor = "gfx905"; 412 } else if (Processor == "gfx906") { 413 if (isXnackOnOrAny()) 414 Processor = "gfx907"; 415 } else { 416 report_fatal_error( 417 "AMD GPU code object V2 does not support processor " + Processor); 418 } 419 break; 420 case ELF::ELFABIVERSION_AMDGPU_HSA_V3: 421 // xnack. 422 if (isXnackOnOrAny()) 423 Features += "+xnack"; 424 // In code object v2 and v3, "sramecc" feature was spelled with a 425 // hyphen ("sram-ecc"). 426 if (isSramEccOnOrAny()) 427 Features += "+sram-ecc"; 428 break; 429 case ELF::ELFABIVERSION_AMDGPU_HSA_V4: 430 // sramecc. 431 if (getSramEccSetting() == TargetIDSetting::Off) 432 Features += ":sramecc-"; 433 else if (getSramEccSetting() == TargetIDSetting::On) 434 Features += ":sramecc+"; 435 // xnack. 436 if (getXnackSetting() == TargetIDSetting::Off) 437 Features += ":xnack-"; 438 else if (getXnackSetting() == TargetIDSetting::On) 439 Features += ":xnack+"; 440 break; 441 default: 442 break; 443 } 444 } 445 446 StreamRep << Processor << Features; 447 448 StreamRep.flush(); 449 return StringRep; 450 } 451 452 unsigned getWavefrontSize(const MCSubtargetInfo *STI) { 453 if (STI->getFeatureBits().test(FeatureWavefrontSize16)) 454 return 16; 455 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) 456 return 32; 457 458 return 64; 459 } 460 461 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) { 462 if (STI->getFeatureBits().test(FeatureLocalMemorySize32768)) 463 return 32768; 464 if (STI->getFeatureBits().test(FeatureLocalMemorySize65536)) 465 return 65536; 466 467 return 0; 468 } 469 470 unsigned getEUsPerCU(const MCSubtargetInfo *STI) { 471 // "Per CU" really means "per whatever functional block the waves of a 472 // workgroup must share". For gfx10 in CU mode this is the CU, which contains 473 // two SIMDs. 474 if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode)) 475 return 2; 476 // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains 477 // two CUs, so a total of four SIMDs. 478 return 4; 479 } 480 481 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI, 482 unsigned FlatWorkGroupSize) { 483 assert(FlatWorkGroupSize != 0); 484 if (STI->getTargetTriple().getArch() != Triple::amdgcn) 485 return 8; 486 unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize); 487 if (N == 1) 488 return 40; 489 N = 40 / N; 490 return std::min(N, 16u); 491 } 492 493 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) { 494 return 1; 495 } 496 497 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) { 498 // FIXME: Need to take scratch memory into account. 499 if (isGFX90A(*STI)) 500 return 8; 501 if (!isGFX10Plus(*STI)) 502 return 10; 503 return hasGFX10_3Insts(*STI) ? 16 : 20; 504 } 505 506 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI, 507 unsigned FlatWorkGroupSize) { 508 return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize), 509 getEUsPerCU(STI)); 510 } 511 512 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) { 513 return 1; 514 } 515 516 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) { 517 // Some subtargets allow encoding 2048, but this isn't tested or supported. 518 return 1024; 519 } 520 521 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI, 522 unsigned FlatWorkGroupSize) { 523 return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI)); 524 } 525 526 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) { 527 IsaVersion Version = getIsaVersion(STI->getCPU()); 528 if (Version.Major >= 10) 529 return getAddressableNumSGPRs(STI); 530 if (Version.Major >= 8) 531 return 16; 532 return 8; 533 } 534 535 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) { 536 return 8; 537 } 538 539 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) { 540 IsaVersion Version = getIsaVersion(STI->getCPU()); 541 if (Version.Major >= 8) 542 return 800; 543 return 512; 544 } 545 546 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) { 547 if (STI->getFeatureBits().test(FeatureSGPRInitBug)) 548 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 549 550 IsaVersion Version = getIsaVersion(STI->getCPU()); 551 if (Version.Major >= 10) 552 return 106; 553 if (Version.Major >= 8) 554 return 102; 555 return 104; 556 } 557 558 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 559 assert(WavesPerEU != 0); 560 561 IsaVersion Version = getIsaVersion(STI->getCPU()); 562 if (Version.Major >= 10) 563 return 0; 564 565 if (WavesPerEU >= getMaxWavesPerEU(STI)) 566 return 0; 567 568 unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1); 569 if (STI->getFeatureBits().test(FeatureTrapHandler)) 570 MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 571 MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1; 572 return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI)); 573 } 574 575 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU, 576 bool Addressable) { 577 assert(WavesPerEU != 0); 578 579 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI); 580 IsaVersion Version = getIsaVersion(STI->getCPU()); 581 if (Version.Major >= 10) 582 return Addressable ? AddressableNumSGPRs : 108; 583 if (Version.Major >= 8 && !Addressable) 584 AddressableNumSGPRs = 112; 585 unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU; 586 if (STI->getFeatureBits().test(FeatureTrapHandler)) 587 MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS); 588 MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI)); 589 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 590 } 591 592 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 593 bool FlatScrUsed, bool XNACKUsed) { 594 unsigned ExtraSGPRs = 0; 595 if (VCCUsed) 596 ExtraSGPRs = 2; 597 598 IsaVersion Version = getIsaVersion(STI->getCPU()); 599 if (Version.Major >= 10) 600 return ExtraSGPRs; 601 602 if (Version.Major < 8) { 603 if (FlatScrUsed) 604 ExtraSGPRs = 4; 605 } else { 606 if (XNACKUsed) 607 ExtraSGPRs = 4; 608 609 if (FlatScrUsed) 610 ExtraSGPRs = 6; 611 } 612 613 return ExtraSGPRs; 614 } 615 616 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed, 617 bool FlatScrUsed) { 618 return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed, 619 STI->getFeatureBits().test(AMDGPU::FeatureXNACK)); 620 } 621 622 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) { 623 NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI)); 624 // SGPRBlocks is actual number of SGPR blocks minus 1. 625 return NumSGPRs / getSGPREncodingGranule(STI) - 1; 626 } 627 628 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI, 629 Optional<bool> EnableWavefrontSize32) { 630 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 631 return 8; 632 633 bool IsWave32 = EnableWavefrontSize32 ? 634 *EnableWavefrontSize32 : 635 STI->getFeatureBits().test(FeatureWavefrontSize32); 636 637 if (hasGFX10_3Insts(*STI)) 638 return IsWave32 ? 16 : 8; 639 640 return IsWave32 ? 8 : 4; 641 } 642 643 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI, 644 Optional<bool> EnableWavefrontSize32) { 645 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 646 return 8; 647 648 bool IsWave32 = EnableWavefrontSize32 ? 649 *EnableWavefrontSize32 : 650 STI->getFeatureBits().test(FeatureWavefrontSize32); 651 652 return IsWave32 ? 8 : 4; 653 } 654 655 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) { 656 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 657 return 512; 658 if (!isGFX10Plus(*STI)) 659 return 256; 660 return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512; 661 } 662 663 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) { 664 if (STI->getFeatureBits().test(FeatureGFX90AInsts)) 665 return 512; 666 return 256; 667 } 668 669 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 670 assert(WavesPerEU != 0); 671 672 if (WavesPerEU >= getMaxWavesPerEU(STI)) 673 return 0; 674 unsigned MinNumVGPRs = 675 alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1), 676 getVGPRAllocGranule(STI)) + 1; 677 return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI)); 678 } 679 680 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) { 681 assert(WavesPerEU != 0); 682 683 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU, 684 getVGPRAllocGranule(STI)); 685 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI); 686 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 687 } 688 689 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs, 690 Optional<bool> EnableWavefrontSize32) { 691 NumVGPRs = alignTo(std::max(1u, NumVGPRs), 692 getVGPREncodingGranule(STI, EnableWavefrontSize32)); 693 // VGPRBlocks is actual number of VGPR blocks minus 1. 694 return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1; 695 } 696 697 } // end namespace IsaInfo 698 699 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 700 const MCSubtargetInfo *STI) { 701 IsaVersion Version = getIsaVersion(STI->getCPU()); 702 703 memset(&Header, 0, sizeof(Header)); 704 705 Header.amd_kernel_code_version_major = 1; 706 Header.amd_kernel_code_version_minor = 2; 707 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 708 Header.amd_machine_version_major = Version.Major; 709 Header.amd_machine_version_minor = Version.Minor; 710 Header.amd_machine_version_stepping = Version.Stepping; 711 Header.kernel_code_entry_byte_offset = sizeof(Header); 712 Header.wavefront_size = 6; 713 714 // If the code object does not support indirect functions, then the value must 715 // be 0xffffffff. 716 Header.call_convention = -1; 717 718 // These alignment values are specified in powers of two, so alignment = 719 // 2^n. The minimum alignment is 2^4 = 16. 720 Header.kernarg_segment_alignment = 4; 721 Header.group_segment_alignment = 4; 722 Header.private_segment_alignment = 4; 723 724 if (Version.Major >= 10) { 725 if (STI->getFeatureBits().test(FeatureWavefrontSize32)) { 726 Header.wavefront_size = 5; 727 Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32; 728 } 729 Header.compute_pgm_resource_registers |= 730 S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) | 731 S_00B848_MEM_ORDERED(1); 732 } 733 } 734 735 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor( 736 const MCSubtargetInfo *STI) { 737 IsaVersion Version = getIsaVersion(STI->getCPU()); 738 739 amdhsa::kernel_descriptor_t KD; 740 memset(&KD, 0, sizeof(KD)); 741 742 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 743 amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, 744 amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE); 745 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 746 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1); 747 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 748 amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1); 749 AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, 750 amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1); 751 if (Version.Major >= 10) { 752 AMDHSA_BITS_SET(KD.kernel_code_properties, 753 amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, 754 STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0); 755 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 756 amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE, 757 STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1); 758 AMDHSA_BITS_SET(KD.compute_pgm_rsrc1, 759 amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1); 760 } 761 if (AMDGPU::isGFX90A(*STI)) { 762 AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, 763 amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, 764 STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0); 765 } 766 return KD; 767 } 768 769 bool isGroupSegment(const GlobalValue *GV) { 770 return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 771 } 772 773 bool isGlobalSegment(const GlobalValue *GV) { 774 return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 775 } 776 777 bool isReadOnlySegment(const GlobalValue *GV) { 778 unsigned AS = GV->getAddressSpace(); 779 return AS == AMDGPUAS::CONSTANT_ADDRESS || 780 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT; 781 } 782 783 bool shouldEmitConstantsToTextSection(const Triple &TT) { 784 return TT.getArch() == Triple::r600; 785 } 786 787 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 788 Attribute A = F.getFnAttribute(Name); 789 int Result = Default; 790 791 if (A.isStringAttribute()) { 792 StringRef Str = A.getValueAsString(); 793 if (Str.getAsInteger(0, Result)) { 794 LLVMContext &Ctx = F.getContext(); 795 Ctx.emitError("can't parse integer attribute " + Name); 796 } 797 } 798 799 return Result; 800 } 801 802 std::pair<int, int> getIntegerPairAttribute(const Function &F, 803 StringRef Name, 804 std::pair<int, int> Default, 805 bool OnlyFirstRequired) { 806 Attribute A = F.getFnAttribute(Name); 807 if (!A.isStringAttribute()) 808 return Default; 809 810 LLVMContext &Ctx = F.getContext(); 811 std::pair<int, int> Ints = Default; 812 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 813 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 814 Ctx.emitError("can't parse first integer attribute " + Name); 815 return Default; 816 } 817 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 818 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 819 Ctx.emitError("can't parse second integer attribute " + Name); 820 return Default; 821 } 822 } 823 824 return Ints; 825 } 826 827 unsigned getVmcntBitMask(const IsaVersion &Version) { 828 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 829 if (Version.Major < 9) 830 return VmcntLo; 831 832 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 833 return VmcntLo | VmcntHi; 834 } 835 836 unsigned getExpcntBitMask(const IsaVersion &Version) { 837 return (1 << getExpcntBitWidth()) - 1; 838 } 839 840 unsigned getLgkmcntBitMask(const IsaVersion &Version) { 841 return (1 << getLgkmcntBitWidth(Version.Major)) - 1; 842 } 843 844 unsigned getWaitcntBitMask(const IsaVersion &Version) { 845 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 846 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 847 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), 848 getLgkmcntBitWidth(Version.Major)); 849 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 850 if (Version.Major < 9) 851 return Waitcnt; 852 853 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 854 return Waitcnt | VmcntHi; 855 } 856 857 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) { 858 unsigned VmcntLo = 859 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 860 if (Version.Major < 9) 861 return VmcntLo; 862 863 unsigned VmcntHi = 864 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 865 VmcntHi <<= getVmcntBitWidthLo(); 866 return VmcntLo | VmcntHi; 867 } 868 869 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) { 870 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 871 } 872 873 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) { 874 return unpackBits(Waitcnt, getLgkmcntBitShift(), 875 getLgkmcntBitWidth(Version.Major)); 876 } 877 878 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt, 879 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 880 Vmcnt = decodeVmcnt(Version, Waitcnt); 881 Expcnt = decodeExpcnt(Version, Waitcnt); 882 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 883 } 884 885 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) { 886 Waitcnt Decoded; 887 Decoded.VmCnt = decodeVmcnt(Version, Encoded); 888 Decoded.ExpCnt = decodeExpcnt(Version, Encoded); 889 Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded); 890 return Decoded; 891 } 892 893 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt, 894 unsigned Vmcnt) { 895 Waitcnt = 896 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 897 if (Version.Major < 9) 898 return Waitcnt; 899 900 Vmcnt >>= getVmcntBitWidthLo(); 901 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 902 } 903 904 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt, 905 unsigned Expcnt) { 906 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 907 } 908 909 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt, 910 unsigned Lgkmcnt) { 911 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), 912 getLgkmcntBitWidth(Version.Major)); 913 } 914 915 unsigned encodeWaitcnt(const IsaVersion &Version, 916 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 917 unsigned Waitcnt = getWaitcntBitMask(Version); 918 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 919 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 920 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 921 return Waitcnt; 922 } 923 924 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) { 925 return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt); 926 } 927 928 //===----------------------------------------------------------------------===// 929 // hwreg 930 //===----------------------------------------------------------------------===// 931 932 namespace Hwreg { 933 934 int64_t getHwregId(const StringRef Name) { 935 for (int Id = ID_SYMBOLIC_FIRST_; Id < ID_SYMBOLIC_LAST_; ++Id) { 936 if (IdSymbolic[Id] && Name == IdSymbolic[Id]) 937 return Id; 938 } 939 return ID_UNKNOWN_; 940 } 941 942 static unsigned getLastSymbolicHwreg(const MCSubtargetInfo &STI) { 943 if (isSI(STI) || isCI(STI) || isVI(STI)) 944 return ID_SYMBOLIC_FIRST_GFX9_; 945 else if (isGFX9(STI)) 946 return ID_SYMBOLIC_FIRST_GFX10_; 947 else if (isGFX10(STI) && !isGFX10_BEncoding(STI)) 948 return ID_SYMBOLIC_FIRST_GFX1030_; 949 else 950 return ID_SYMBOLIC_LAST_; 951 } 952 953 bool isValidHwreg(int64_t Id, const MCSubtargetInfo &STI) { 954 return 955 ID_SYMBOLIC_FIRST_ <= Id && Id < getLastSymbolicHwreg(STI) && 956 IdSymbolic[Id] && (Id != ID_XNACK_MASK || !AMDGPU::isGFX10_BEncoding(STI)); 957 } 958 959 bool isValidHwreg(int64_t Id) { 960 return 0 <= Id && isUInt<ID_WIDTH_>(Id); 961 } 962 963 bool isValidHwregOffset(int64_t Offset) { 964 return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset); 965 } 966 967 bool isValidHwregWidth(int64_t Width) { 968 return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1); 969 } 970 971 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) { 972 return (Id << ID_SHIFT_) | 973 (Offset << OFFSET_SHIFT_) | 974 ((Width - 1) << WIDTH_M1_SHIFT_); 975 } 976 977 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) { 978 return isValidHwreg(Id, STI) ? IdSymbolic[Id] : ""; 979 } 980 981 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) { 982 Id = (Val & ID_MASK_) >> ID_SHIFT_; 983 Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_; 984 Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1; 985 } 986 987 } // namespace Hwreg 988 989 //===----------------------------------------------------------------------===// 990 // exp tgt 991 //===----------------------------------------------------------------------===// 992 993 namespace Exp { 994 995 struct ExpTgt { 996 StringLiteral Name; 997 unsigned Tgt; 998 unsigned MaxIndex; 999 }; 1000 1001 static constexpr ExpTgt ExpTgtInfo[] = { 1002 {{"null"}, ET_NULL, ET_NULL_MAX_IDX}, 1003 {{"mrtz"}, ET_MRTZ, ET_MRTZ_MAX_IDX}, 1004 {{"prim"}, ET_PRIM, ET_PRIM_MAX_IDX}, 1005 {{"mrt"}, ET_MRT0, ET_MRT_MAX_IDX}, 1006 {{"pos"}, ET_POS0, ET_POS_MAX_IDX}, 1007 {{"param"}, ET_PARAM0, ET_PARAM_MAX_IDX}, 1008 }; 1009 1010 bool getTgtName(unsigned Id, StringRef &Name, int &Index) { 1011 for (const ExpTgt &Val : ExpTgtInfo) { 1012 if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) { 1013 Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt); 1014 Name = Val.Name; 1015 return true; 1016 } 1017 } 1018 return false; 1019 } 1020 1021 unsigned getTgtId(const StringRef Name) { 1022 1023 for (const ExpTgt &Val : ExpTgtInfo) { 1024 if (Val.MaxIndex == 0 && Name == Val.Name) 1025 return Val.Tgt; 1026 1027 if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) { 1028 StringRef Suffix = Name.drop_front(Val.Name.size()); 1029 1030 unsigned Id; 1031 if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex) 1032 return ET_INVALID; 1033 1034 // Disable leading zeroes 1035 if (Suffix.size() > 1 && Suffix[0] == '0') 1036 return ET_INVALID; 1037 1038 return Val.Tgt + Id; 1039 } 1040 } 1041 return ET_INVALID; 1042 } 1043 1044 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) { 1045 return (Id != ET_POS4 && Id != ET_PRIM) || isGFX10Plus(STI); 1046 } 1047 1048 } // namespace Exp 1049 1050 //===----------------------------------------------------------------------===// 1051 // MTBUF Format 1052 //===----------------------------------------------------------------------===// 1053 1054 namespace MTBUFFormat { 1055 1056 int64_t getDfmt(const StringRef Name) { 1057 for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) { 1058 if (Name == DfmtSymbolic[Id]) 1059 return Id; 1060 } 1061 return DFMT_UNDEF; 1062 } 1063 1064 StringRef getDfmtName(unsigned Id) { 1065 assert(Id <= DFMT_MAX); 1066 return DfmtSymbolic[Id]; 1067 } 1068 1069 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) { 1070 if (isSI(STI) || isCI(STI)) 1071 return NfmtSymbolicSICI; 1072 if (isVI(STI) || isGFX9(STI)) 1073 return NfmtSymbolicVI; 1074 return NfmtSymbolicGFX10; 1075 } 1076 1077 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) { 1078 auto lookupTable = getNfmtLookupTable(STI); 1079 for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) { 1080 if (Name == lookupTable[Id]) 1081 return Id; 1082 } 1083 return NFMT_UNDEF; 1084 } 1085 1086 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) { 1087 assert(Id <= NFMT_MAX); 1088 return getNfmtLookupTable(STI)[Id]; 1089 } 1090 1091 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1092 unsigned Dfmt; 1093 unsigned Nfmt; 1094 decodeDfmtNfmt(Id, Dfmt, Nfmt); 1095 return isValidNfmt(Nfmt, STI); 1096 } 1097 1098 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) { 1099 return !getNfmtName(Id, STI).empty(); 1100 } 1101 1102 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) { 1103 return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT); 1104 } 1105 1106 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) { 1107 Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK; 1108 Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK; 1109 } 1110 1111 int64_t getUnifiedFormat(const StringRef Name) { 1112 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { 1113 if (Name == UfmtSymbolic[Id]) 1114 return Id; 1115 } 1116 return UFMT_UNDEF; 1117 } 1118 1119 StringRef getUnifiedFormatName(unsigned Id) { 1120 return isValidUnifiedFormat(Id) ? UfmtSymbolic[Id] : ""; 1121 } 1122 1123 bool isValidUnifiedFormat(unsigned Id) { 1124 return Id <= UFMT_LAST; 1125 } 1126 1127 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt) { 1128 int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt); 1129 for (int Id = UFMT_FIRST; Id <= UFMT_LAST; ++Id) { 1130 if (Fmt == DfmtNfmt2UFmt[Id]) 1131 return Id; 1132 } 1133 return UFMT_UNDEF; 1134 } 1135 1136 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) { 1137 return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX); 1138 } 1139 1140 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) { 1141 if (isGFX10Plus(STI)) 1142 return UFMT_DEFAULT; 1143 return DFMT_NFMT_DEFAULT; 1144 } 1145 1146 } // namespace MTBUFFormat 1147 1148 //===----------------------------------------------------------------------===// 1149 // SendMsg 1150 //===----------------------------------------------------------------------===// 1151 1152 namespace SendMsg { 1153 1154 int64_t getMsgId(const StringRef Name) { 1155 for (int i = ID_GAPS_FIRST_; i < ID_GAPS_LAST_; ++i) { 1156 if (IdSymbolic[i] && Name == IdSymbolic[i]) 1157 return i; 1158 } 1159 return ID_UNKNOWN_; 1160 } 1161 1162 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI, bool Strict) { 1163 if (Strict) { 1164 switch (MsgId) { 1165 case ID_SAVEWAVE: 1166 return isVI(STI) || isGFX9Plus(STI); 1167 case ID_STALL_WAVE_GEN: 1168 case ID_HALT_WAVES: 1169 case ID_ORDERED_PS_DONE: 1170 case ID_GS_ALLOC_REQ: 1171 case ID_GET_DOORBELL: 1172 return isGFX9Plus(STI); 1173 case ID_EARLY_PRIM_DEALLOC: 1174 return isGFX9(STI); 1175 case ID_GET_DDID: 1176 return isGFX10Plus(STI); 1177 default: 1178 return 0 <= MsgId && MsgId < ID_GAPS_LAST_ && IdSymbolic[MsgId]; 1179 } 1180 } else { 1181 return 0 <= MsgId && isUInt<ID_WIDTH_>(MsgId); 1182 } 1183 } 1184 1185 StringRef getMsgName(int64_t MsgId) { 1186 assert(0 <= MsgId && MsgId < ID_GAPS_LAST_); 1187 return IdSymbolic[MsgId]; 1188 } 1189 1190 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) { 1191 const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic; 1192 const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_; 1193 const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_; 1194 for (int i = F; i < L; ++i) { 1195 if (Name == S[i]) { 1196 return i; 1197 } 1198 } 1199 return OP_UNKNOWN_; 1200 } 1201 1202 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI, 1203 bool Strict) { 1204 assert(isValidMsgId(MsgId, STI, Strict)); 1205 1206 if (!Strict) 1207 return 0 <= OpId && isUInt<OP_WIDTH_>(OpId); 1208 1209 switch(MsgId) 1210 { 1211 case ID_GS: 1212 return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP; 1213 case ID_GS_DONE: 1214 return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_; 1215 case ID_SYSMSG: 1216 return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_; 1217 default: 1218 return OpId == OP_NONE_; 1219 } 1220 } 1221 1222 StringRef getMsgOpName(int64_t MsgId, int64_t OpId) { 1223 assert(msgRequiresOp(MsgId)); 1224 return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId]; 1225 } 1226 1227 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId, 1228 const MCSubtargetInfo &STI, bool Strict) { 1229 assert(isValidMsgOp(MsgId, OpId, STI, Strict)); 1230 1231 if (!Strict) 1232 return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId); 1233 1234 switch(MsgId) 1235 { 1236 case ID_GS: 1237 return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_; 1238 case ID_GS_DONE: 1239 return (OpId == OP_GS_NOP)? 1240 (StreamId == STREAM_ID_NONE_) : 1241 (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_); 1242 default: 1243 return StreamId == STREAM_ID_NONE_; 1244 } 1245 } 1246 1247 bool msgRequiresOp(int64_t MsgId) { 1248 return MsgId == ID_GS || MsgId == ID_GS_DONE || MsgId == ID_SYSMSG; 1249 } 1250 1251 bool msgSupportsStream(int64_t MsgId, int64_t OpId) { 1252 return (MsgId == ID_GS || MsgId == ID_GS_DONE) && OpId != OP_GS_NOP; 1253 } 1254 1255 void decodeMsg(unsigned Val, 1256 uint16_t &MsgId, 1257 uint16_t &OpId, 1258 uint16_t &StreamId) { 1259 MsgId = Val & ID_MASK_; 1260 OpId = (Val & OP_MASK_) >> OP_SHIFT_; 1261 StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_; 1262 } 1263 1264 uint64_t encodeMsg(uint64_t MsgId, 1265 uint64_t OpId, 1266 uint64_t StreamId) { 1267 return (MsgId << ID_SHIFT_) | 1268 (OpId << OP_SHIFT_) | 1269 (StreamId << STREAM_ID_SHIFT_); 1270 } 1271 1272 } // namespace SendMsg 1273 1274 //===----------------------------------------------------------------------===// 1275 // 1276 //===----------------------------------------------------------------------===// 1277 1278 unsigned getInitialPSInputAddr(const Function &F) { 1279 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 1280 } 1281 1282 bool isShader(CallingConv::ID cc) { 1283 switch(cc) { 1284 case CallingConv::AMDGPU_VS: 1285 case CallingConv::AMDGPU_LS: 1286 case CallingConv::AMDGPU_HS: 1287 case CallingConv::AMDGPU_ES: 1288 case CallingConv::AMDGPU_GS: 1289 case CallingConv::AMDGPU_PS: 1290 case CallingConv::AMDGPU_CS: 1291 return true; 1292 default: 1293 return false; 1294 } 1295 } 1296 1297 bool isGraphics(CallingConv::ID cc) { 1298 return isShader(cc) || cc == CallingConv::AMDGPU_Gfx; 1299 } 1300 1301 bool isCompute(CallingConv::ID cc) { 1302 return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS; 1303 } 1304 1305 bool isEntryFunctionCC(CallingConv::ID CC) { 1306 switch (CC) { 1307 case CallingConv::AMDGPU_KERNEL: 1308 case CallingConv::SPIR_KERNEL: 1309 case CallingConv::AMDGPU_VS: 1310 case CallingConv::AMDGPU_GS: 1311 case CallingConv::AMDGPU_PS: 1312 case CallingConv::AMDGPU_CS: 1313 case CallingConv::AMDGPU_ES: 1314 case CallingConv::AMDGPU_HS: 1315 case CallingConv::AMDGPU_LS: 1316 return true; 1317 default: 1318 return false; 1319 } 1320 } 1321 1322 bool isModuleEntryFunctionCC(CallingConv::ID CC) { 1323 switch (CC) { 1324 case CallingConv::AMDGPU_Gfx: 1325 return true; 1326 default: 1327 return isEntryFunctionCC(CC); 1328 } 1329 } 1330 1331 bool hasXNACK(const MCSubtargetInfo &STI) { 1332 return STI.getFeatureBits()[AMDGPU::FeatureXNACK]; 1333 } 1334 1335 bool hasSRAMECC(const MCSubtargetInfo &STI) { 1336 return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC]; 1337 } 1338 1339 bool hasMIMG_R128(const MCSubtargetInfo &STI) { 1340 return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16]; 1341 } 1342 1343 bool hasGFX10A16(const MCSubtargetInfo &STI) { 1344 return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16]; 1345 } 1346 1347 bool hasG16(const MCSubtargetInfo &STI) { 1348 return STI.getFeatureBits()[AMDGPU::FeatureG16]; 1349 } 1350 1351 bool hasPackedD16(const MCSubtargetInfo &STI) { 1352 return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem]; 1353 } 1354 1355 bool isSI(const MCSubtargetInfo &STI) { 1356 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 1357 } 1358 1359 bool isCI(const MCSubtargetInfo &STI) { 1360 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 1361 } 1362 1363 bool isVI(const MCSubtargetInfo &STI) { 1364 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 1365 } 1366 1367 bool isGFX9(const MCSubtargetInfo &STI) { 1368 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 1369 } 1370 1371 bool isGFX9Plus(const MCSubtargetInfo &STI) { 1372 return isGFX9(STI) || isGFX10Plus(STI); 1373 } 1374 1375 bool isGFX10(const MCSubtargetInfo &STI) { 1376 return STI.getFeatureBits()[AMDGPU::FeatureGFX10]; 1377 } 1378 1379 bool isGFX10Plus(const MCSubtargetInfo &STI) { return isGFX10(STI); } 1380 1381 bool isGCN3Encoding(const MCSubtargetInfo &STI) { 1382 return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding]; 1383 } 1384 1385 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) { 1386 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding]; 1387 } 1388 1389 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) { 1390 return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts]; 1391 } 1392 1393 bool isGFX90A(const MCSubtargetInfo &STI) { 1394 return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts]; 1395 } 1396 1397 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 1398 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 1399 const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0); 1400 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 1401 Reg == AMDGPU::SCC; 1402 } 1403 1404 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) { 1405 for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) { 1406 if (*R == Reg1) return true; 1407 } 1408 return false; 1409 } 1410 1411 #define MAP_REG2REG \ 1412 using namespace AMDGPU; \ 1413 switch(Reg) { \ 1414 default: return Reg; \ 1415 CASE_CI_VI(FLAT_SCR) \ 1416 CASE_CI_VI(FLAT_SCR_LO) \ 1417 CASE_CI_VI(FLAT_SCR_HI) \ 1418 CASE_VI_GFX9PLUS(TTMP0) \ 1419 CASE_VI_GFX9PLUS(TTMP1) \ 1420 CASE_VI_GFX9PLUS(TTMP2) \ 1421 CASE_VI_GFX9PLUS(TTMP3) \ 1422 CASE_VI_GFX9PLUS(TTMP4) \ 1423 CASE_VI_GFX9PLUS(TTMP5) \ 1424 CASE_VI_GFX9PLUS(TTMP6) \ 1425 CASE_VI_GFX9PLUS(TTMP7) \ 1426 CASE_VI_GFX9PLUS(TTMP8) \ 1427 CASE_VI_GFX9PLUS(TTMP9) \ 1428 CASE_VI_GFX9PLUS(TTMP10) \ 1429 CASE_VI_GFX9PLUS(TTMP11) \ 1430 CASE_VI_GFX9PLUS(TTMP12) \ 1431 CASE_VI_GFX9PLUS(TTMP13) \ 1432 CASE_VI_GFX9PLUS(TTMP14) \ 1433 CASE_VI_GFX9PLUS(TTMP15) \ 1434 CASE_VI_GFX9PLUS(TTMP0_TTMP1) \ 1435 CASE_VI_GFX9PLUS(TTMP2_TTMP3) \ 1436 CASE_VI_GFX9PLUS(TTMP4_TTMP5) \ 1437 CASE_VI_GFX9PLUS(TTMP6_TTMP7) \ 1438 CASE_VI_GFX9PLUS(TTMP8_TTMP9) \ 1439 CASE_VI_GFX9PLUS(TTMP10_TTMP11) \ 1440 CASE_VI_GFX9PLUS(TTMP12_TTMP13) \ 1441 CASE_VI_GFX9PLUS(TTMP14_TTMP15) \ 1442 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \ 1443 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \ 1444 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \ 1445 CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \ 1446 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \ 1447 CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \ 1448 CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1449 CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \ 1450 } 1451 1452 #define CASE_CI_VI(node) \ 1453 assert(!isSI(STI)); \ 1454 case node: return isCI(STI) ? node##_ci : node##_vi; 1455 1456 #define CASE_VI_GFX9PLUS(node) \ 1457 case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi; 1458 1459 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 1460 if (STI.getTargetTriple().getArch() == Triple::r600) 1461 return Reg; 1462 MAP_REG2REG 1463 } 1464 1465 #undef CASE_CI_VI 1466 #undef CASE_VI_GFX9PLUS 1467 1468 #define CASE_CI_VI(node) case node##_ci: case node##_vi: return node; 1469 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node; 1470 1471 unsigned mc2PseudoReg(unsigned Reg) { 1472 MAP_REG2REG 1473 } 1474 1475 #undef CASE_CI_VI 1476 #undef CASE_VI_GFX9PLUS 1477 #undef MAP_REG2REG 1478 1479 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1480 assert(OpNo < Desc.NumOperands); 1481 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1482 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 1483 OpType <= AMDGPU::OPERAND_SRC_LAST; 1484 } 1485 1486 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1487 assert(OpNo < Desc.NumOperands); 1488 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1489 switch (OpType) { 1490 case AMDGPU::OPERAND_REG_IMM_FP32: 1491 case AMDGPU::OPERAND_REG_IMM_FP64: 1492 case AMDGPU::OPERAND_REG_IMM_FP16: 1493 case AMDGPU::OPERAND_REG_IMM_V2FP16: 1494 case AMDGPU::OPERAND_REG_IMM_V2INT16: 1495 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 1496 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 1497 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 1498 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 1499 case AMDGPU::OPERAND_REG_INLINE_C_V2INT16: 1500 case AMDGPU::OPERAND_REG_INLINE_AC_FP32: 1501 case AMDGPU::OPERAND_REG_INLINE_AC_FP16: 1502 case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: 1503 case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16: 1504 case AMDGPU::OPERAND_REG_IMM_V2FP32: 1505 case AMDGPU::OPERAND_REG_INLINE_C_V2FP32: 1506 case AMDGPU::OPERAND_REG_INLINE_AC_FP64: 1507 return true; 1508 default: 1509 return false; 1510 } 1511 } 1512 1513 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 1514 assert(OpNo < Desc.NumOperands); 1515 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 1516 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 1517 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 1518 } 1519 1520 // Avoid using MCRegisterClass::getSize, since that function will go away 1521 // (move from MC* level to Target* level). Return size in bits. 1522 unsigned getRegBitWidth(unsigned RCID) { 1523 switch (RCID) { 1524 case AMDGPU::VGPR_LO16RegClassID: 1525 case AMDGPU::VGPR_HI16RegClassID: 1526 case AMDGPU::SGPR_LO16RegClassID: 1527 case AMDGPU::AGPR_LO16RegClassID: 1528 return 16; 1529 case AMDGPU::SGPR_32RegClassID: 1530 case AMDGPU::VGPR_32RegClassID: 1531 case AMDGPU::VRegOrLds_32RegClassID: 1532 case AMDGPU::AGPR_32RegClassID: 1533 case AMDGPU::VS_32RegClassID: 1534 case AMDGPU::AV_32RegClassID: 1535 case AMDGPU::SReg_32RegClassID: 1536 case AMDGPU::SReg_32_XM0RegClassID: 1537 case AMDGPU::SRegOrLds_32RegClassID: 1538 return 32; 1539 case AMDGPU::SGPR_64RegClassID: 1540 case AMDGPU::VS_64RegClassID: 1541 case AMDGPU::AV_64RegClassID: 1542 case AMDGPU::SReg_64RegClassID: 1543 case AMDGPU::VReg_64RegClassID: 1544 case AMDGPU::AReg_64RegClassID: 1545 case AMDGPU::SReg_64_XEXECRegClassID: 1546 case AMDGPU::VReg_64_Align2RegClassID: 1547 case AMDGPU::AReg_64_Align2RegClassID: 1548 return 64; 1549 case AMDGPU::SGPR_96RegClassID: 1550 case AMDGPU::SReg_96RegClassID: 1551 case AMDGPU::VReg_96RegClassID: 1552 case AMDGPU::AReg_96RegClassID: 1553 case AMDGPU::VReg_96_Align2RegClassID: 1554 case AMDGPU::AReg_96_Align2RegClassID: 1555 case AMDGPU::AV_96RegClassID: 1556 return 96; 1557 case AMDGPU::SGPR_128RegClassID: 1558 case AMDGPU::SReg_128RegClassID: 1559 case AMDGPU::VReg_128RegClassID: 1560 case AMDGPU::AReg_128RegClassID: 1561 case AMDGPU::VReg_128_Align2RegClassID: 1562 case AMDGPU::AReg_128_Align2RegClassID: 1563 case AMDGPU::AV_128RegClassID: 1564 return 128; 1565 case AMDGPU::SGPR_160RegClassID: 1566 case AMDGPU::SReg_160RegClassID: 1567 case AMDGPU::VReg_160RegClassID: 1568 case AMDGPU::AReg_160RegClassID: 1569 case AMDGPU::VReg_160_Align2RegClassID: 1570 case AMDGPU::AReg_160_Align2RegClassID: 1571 case AMDGPU::AV_160RegClassID: 1572 return 160; 1573 case AMDGPU::SGPR_192RegClassID: 1574 case AMDGPU::SReg_192RegClassID: 1575 case AMDGPU::VReg_192RegClassID: 1576 case AMDGPU::AReg_192RegClassID: 1577 case AMDGPU::VReg_192_Align2RegClassID: 1578 case AMDGPU::AReg_192_Align2RegClassID: 1579 return 192; 1580 case AMDGPU::SGPR_256RegClassID: 1581 case AMDGPU::SReg_256RegClassID: 1582 case AMDGPU::VReg_256RegClassID: 1583 case AMDGPU::AReg_256RegClassID: 1584 case AMDGPU::VReg_256_Align2RegClassID: 1585 case AMDGPU::AReg_256_Align2RegClassID: 1586 return 256; 1587 case AMDGPU::SGPR_512RegClassID: 1588 case AMDGPU::SReg_512RegClassID: 1589 case AMDGPU::VReg_512RegClassID: 1590 case AMDGPU::AReg_512RegClassID: 1591 case AMDGPU::VReg_512_Align2RegClassID: 1592 case AMDGPU::AReg_512_Align2RegClassID: 1593 return 512; 1594 case AMDGPU::SGPR_1024RegClassID: 1595 case AMDGPU::SReg_1024RegClassID: 1596 case AMDGPU::VReg_1024RegClassID: 1597 case AMDGPU::AReg_1024RegClassID: 1598 case AMDGPU::VReg_1024_Align2RegClassID: 1599 case AMDGPU::AReg_1024_Align2RegClassID: 1600 return 1024; 1601 default: 1602 llvm_unreachable("Unexpected register class"); 1603 } 1604 } 1605 1606 unsigned getRegBitWidth(const MCRegisterClass &RC) { 1607 return getRegBitWidth(RC.getID()); 1608 } 1609 1610 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 1611 unsigned OpNo) { 1612 assert(OpNo < Desc.NumOperands); 1613 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 1614 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 1615 } 1616 1617 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 1618 if (isInlinableIntLiteral(Literal)) 1619 return true; 1620 1621 uint64_t Val = static_cast<uint64_t>(Literal); 1622 return (Val == DoubleToBits(0.0)) || 1623 (Val == DoubleToBits(1.0)) || 1624 (Val == DoubleToBits(-1.0)) || 1625 (Val == DoubleToBits(0.5)) || 1626 (Val == DoubleToBits(-0.5)) || 1627 (Val == DoubleToBits(2.0)) || 1628 (Val == DoubleToBits(-2.0)) || 1629 (Val == DoubleToBits(4.0)) || 1630 (Val == DoubleToBits(-4.0)) || 1631 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 1632 } 1633 1634 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 1635 if (isInlinableIntLiteral(Literal)) 1636 return true; 1637 1638 // The actual type of the operand does not seem to matter as long 1639 // as the bits match one of the inline immediate values. For example: 1640 // 1641 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 1642 // so it is a legal inline immediate. 1643 // 1644 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 1645 // floating-point, so it is a legal inline immediate. 1646 1647 uint32_t Val = static_cast<uint32_t>(Literal); 1648 return (Val == FloatToBits(0.0f)) || 1649 (Val == FloatToBits(1.0f)) || 1650 (Val == FloatToBits(-1.0f)) || 1651 (Val == FloatToBits(0.5f)) || 1652 (Val == FloatToBits(-0.5f)) || 1653 (Val == FloatToBits(2.0f)) || 1654 (Val == FloatToBits(-2.0f)) || 1655 (Val == FloatToBits(4.0f)) || 1656 (Val == FloatToBits(-4.0f)) || 1657 (Val == 0x3e22f983 && HasInv2Pi); 1658 } 1659 1660 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 1661 if (!HasInv2Pi) 1662 return false; 1663 1664 if (isInlinableIntLiteral(Literal)) 1665 return true; 1666 1667 uint16_t Val = static_cast<uint16_t>(Literal); 1668 return Val == 0x3C00 || // 1.0 1669 Val == 0xBC00 || // -1.0 1670 Val == 0x3800 || // 0.5 1671 Val == 0xB800 || // -0.5 1672 Val == 0x4000 || // 2.0 1673 Val == 0xC000 || // -2.0 1674 Val == 0x4400 || // 4.0 1675 Val == 0xC400 || // -4.0 1676 Val == 0x3118; // 1/2pi 1677 } 1678 1679 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 1680 assert(HasInv2Pi); 1681 1682 if (isInt<16>(Literal) || isUInt<16>(Literal)) { 1683 int16_t Trunc = static_cast<int16_t>(Literal); 1684 return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi); 1685 } 1686 if (!(Literal & 0xffff)) 1687 return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi); 1688 1689 int16_t Lo16 = static_cast<int16_t>(Literal); 1690 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1691 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 1692 } 1693 1694 bool isInlinableIntLiteralV216(int32_t Literal) { 1695 int16_t Lo16 = static_cast<int16_t>(Literal); 1696 if (isInt<16>(Literal) || isUInt<16>(Literal)) 1697 return isInlinableIntLiteral(Lo16); 1698 1699 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1700 if (!(Literal & 0xffff)) 1701 return isInlinableIntLiteral(Hi16); 1702 return Lo16 == Hi16 && isInlinableIntLiteral(Lo16); 1703 } 1704 1705 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) { 1706 assert(HasInv2Pi); 1707 1708 int16_t Lo16 = static_cast<int16_t>(Literal); 1709 if (isInt<16>(Literal) || isUInt<16>(Literal)) 1710 return true; 1711 1712 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 1713 if (!(Literal & 0xffff)) 1714 return true; 1715 return Lo16 == Hi16; 1716 } 1717 1718 bool isArgPassedInSGPR(const Argument *A) { 1719 const Function *F = A->getParent(); 1720 1721 // Arguments to compute shaders are never a source of divergence. 1722 CallingConv::ID CC = F->getCallingConv(); 1723 switch (CC) { 1724 case CallingConv::AMDGPU_KERNEL: 1725 case CallingConv::SPIR_KERNEL: 1726 return true; 1727 case CallingConv::AMDGPU_VS: 1728 case CallingConv::AMDGPU_LS: 1729 case CallingConv::AMDGPU_HS: 1730 case CallingConv::AMDGPU_ES: 1731 case CallingConv::AMDGPU_GS: 1732 case CallingConv::AMDGPU_PS: 1733 case CallingConv::AMDGPU_CS: 1734 case CallingConv::AMDGPU_Gfx: 1735 // For non-compute shaders, SGPR inputs are marked with either inreg or byval. 1736 // Everything else is in VGPRs. 1737 return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) || 1738 F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal); 1739 default: 1740 // TODO: Should calls support inreg for SGPR inputs? 1741 return false; 1742 } 1743 } 1744 1745 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) { 1746 return isGCN3Encoding(ST) || isGFX10Plus(ST); 1747 } 1748 1749 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) { 1750 return isGFX9Plus(ST); 1751 } 1752 1753 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST, 1754 int64_t EncodedOffset) { 1755 return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset) 1756 : isUInt<8>(EncodedOffset); 1757 } 1758 1759 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST, 1760 int64_t EncodedOffset, 1761 bool IsBuffer) { 1762 return !IsBuffer && 1763 hasSMRDSignedImmOffset(ST) && 1764 isInt<21>(EncodedOffset); 1765 } 1766 1767 static bool isDwordAligned(uint64_t ByteOffset) { 1768 return (ByteOffset & 3) == 0; 1769 } 1770 1771 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST, 1772 uint64_t ByteOffset) { 1773 if (hasSMEMByteOffset(ST)) 1774 return ByteOffset; 1775 1776 assert(isDwordAligned(ByteOffset)); 1777 return ByteOffset >> 2; 1778 } 1779 1780 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST, 1781 int64_t ByteOffset, bool IsBuffer) { 1782 // The signed version is always a byte offset. 1783 if (!IsBuffer && hasSMRDSignedImmOffset(ST)) { 1784 assert(hasSMEMByteOffset(ST)); 1785 return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None; 1786 } 1787 1788 if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST)) 1789 return None; 1790 1791 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 1792 return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset) 1793 ? Optional<int64_t>(EncodedOffset) 1794 : None; 1795 } 1796 1797 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST, 1798 int64_t ByteOffset) { 1799 if (!isCI(ST) || !isDwordAligned(ByteOffset)) 1800 return None; 1801 1802 int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset); 1803 return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None; 1804 } 1805 1806 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) { 1807 // Address offset is 12-bit signed for GFX10, 13-bit for GFX9. 1808 if (AMDGPU::isGFX10(ST)) 1809 return Signed ? 12 : 11; 1810 1811 return Signed ? 13 : 12; 1812 } 1813 1814 // Given Imm, split it into the values to put into the SOffset and ImmOffset 1815 // fields in an MUBUF instruction. Return false if it is not possible (due to a 1816 // hardware bug needing a workaround). 1817 // 1818 // The required alignment ensures that individual address components remain 1819 // aligned if they are aligned to begin with. It also ensures that additional 1820 // offsets within the given alignment can be added to the resulting ImmOffset. 1821 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset, 1822 const GCNSubtarget *Subtarget, Align Alignment) { 1823 const uint32_t MaxImm = alignDown(4095, Alignment.value()); 1824 uint32_t Overflow = 0; 1825 1826 if (Imm > MaxImm) { 1827 if (Imm <= MaxImm + 64) { 1828 // Use an SOffset inline constant for 4..64 1829 Overflow = Imm - MaxImm; 1830 Imm = MaxImm; 1831 } else { 1832 // Try to keep the same value in SOffset for adjacent loads, so that 1833 // the corresponding register contents can be re-used. 1834 // 1835 // Load values with all low-bits (except for alignment bits) set into 1836 // SOffset, so that a larger range of values can be covered using 1837 // s_movk_i32. 1838 // 1839 // Atomic operations fail to work correctly when individual address 1840 // components are unaligned, even if their sum is aligned. 1841 uint32_t High = (Imm + Alignment.value()) & ~4095; 1842 uint32_t Low = (Imm + Alignment.value()) & 4095; 1843 Imm = Low; 1844 Overflow = High - Alignment.value(); 1845 } 1846 } 1847 1848 // There is a hardware bug in SI and CI which prevents address clamping in 1849 // MUBUF instructions from working correctly with SOffsets. The immediate 1850 // offset is unaffected. 1851 if (Overflow > 0 && 1852 Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS) 1853 return false; 1854 1855 ImmOffset = Imm; 1856 SOffset = Overflow; 1857 return true; 1858 } 1859 1860 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) { 1861 *this = getDefaultForCallingConv(F.getCallingConv()); 1862 1863 StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString(); 1864 if (!IEEEAttr.empty()) 1865 IEEE = IEEEAttr == "true"; 1866 1867 StringRef DX10ClampAttr 1868 = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString(); 1869 if (!DX10ClampAttr.empty()) 1870 DX10Clamp = DX10ClampAttr == "true"; 1871 1872 StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString(); 1873 if (!DenormF32Attr.empty()) { 1874 DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr); 1875 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; 1876 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 1877 } 1878 1879 StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString(); 1880 if (!DenormAttr.empty()) { 1881 DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr); 1882 1883 if (DenormF32Attr.empty()) { 1884 FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE; 1885 FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 1886 } 1887 1888 FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE; 1889 FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE; 1890 } 1891 } 1892 1893 namespace { 1894 1895 struct SourceOfDivergence { 1896 unsigned Intr; 1897 }; 1898 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr); 1899 1900 #define GET_SourcesOfDivergence_IMPL 1901 #define GET_Gfx9BufferFormat_IMPL 1902 #define GET_Gfx10PlusBufferFormat_IMPL 1903 #include "AMDGPUGenSearchableTables.inc" 1904 1905 } // end anonymous namespace 1906 1907 bool isIntrinsicSourceOfDivergence(unsigned IntrID) { 1908 return lookupSourceOfDivergence(IntrID); 1909 } 1910 1911 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp, 1912 uint8_t NumComponents, 1913 uint8_t NumFormat, 1914 const MCSubtargetInfo &STI) { 1915 return isGFX10Plus(STI) 1916 ? getGfx10PlusBufferFormatInfo(BitsPerComp, NumComponents, 1917 NumFormat) 1918 : getGfx9BufferFormatInfo(BitsPerComp, NumComponents, NumFormat); 1919 } 1920 1921 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format, 1922 const MCSubtargetInfo &STI) { 1923 return isGFX10Plus(STI) ? getGfx10PlusBufferFormatInfo(Format) 1924 : getGfx9BufferFormatInfo(Format); 1925 } 1926 1927 } // namespace AMDGPU 1928 1929 raw_ostream &operator<<(raw_ostream &OS, 1930 const AMDGPU::IsaInfo::TargetIDSetting S) { 1931 switch (S) { 1932 case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported): 1933 OS << "Unsupported"; 1934 break; 1935 case (AMDGPU::IsaInfo::TargetIDSetting::Any): 1936 OS << "Any"; 1937 break; 1938 case (AMDGPU::IsaInfo::TargetIDSetting::Off): 1939 OS << "Off"; 1940 break; 1941 case (AMDGPU::IsaInfo::TargetIDSetting::On): 1942 OS << "On"; 1943 break; 1944 } 1945 return OS; 1946 } 1947 1948 } // namespace llvm 1949