1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "SIDefines.h" 12 #include "llvm/CodeGen/MachineMemOperand.h" 13 #include "llvm/IR/Constants.h" 14 #include "llvm/IR/Function.h" 15 #include "llvm/IR/GlobalValue.h" 16 #include "llvm/IR/LLVMContext.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCInstrInfo.h" 19 #include "llvm/MC/MCRegisterInfo.h" 20 #include "llvm/MC/MCSectionELF.h" 21 #include "llvm/MC/MCSubtargetInfo.h" 22 #include "llvm/MC/SubtargetFeature.h" 23 24 #define GET_SUBTARGETINFO_ENUM 25 #include "AMDGPUGenSubtargetInfo.inc" 26 #undef GET_SUBTARGETINFO_ENUM 27 28 #define GET_REGINFO_ENUM 29 #include "AMDGPUGenRegisterInfo.inc" 30 #undef GET_REGINFO_ENUM 31 32 #define GET_INSTRINFO_NAMED_OPS 33 #define GET_INSTRINFO_ENUM 34 #include "AMDGPUGenInstrInfo.inc" 35 #undef GET_INSTRINFO_NAMED_OPS 36 #undef GET_INSTRINFO_ENUM 37 38 namespace { 39 40 /// \returns Bit mask for given bit \p Shift and bit \p Width. 41 unsigned getBitMask(unsigned Shift, unsigned Width) { 42 return ((1 << Width) - 1) << Shift; 43 } 44 45 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 46 /// 47 /// \returns Packed \p Dst. 48 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 49 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 50 Dst |= (Src << Shift) & getBitMask(Shift, Width); 51 return Dst; 52 } 53 54 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 55 /// 56 /// \returns Unpacked bits. 57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 58 return (Src & getBitMask(Shift, Width)) >> Shift; 59 } 60 61 /// \returns Vmcnt bit shift. 62 unsigned getVmcntBitShift() { return 0; } 63 64 /// \returns Vmcnt bit width. 65 unsigned getVmcntBitWidth() { return 4; } 66 67 /// \returns Expcnt bit shift. 68 unsigned getExpcntBitShift() { return 4; } 69 70 /// \returns Expcnt bit width. 71 unsigned getExpcntBitWidth() { return 3; } 72 73 /// \returns Lgkmcnt bit shift. 74 unsigned getLgkmcntBitShift() { return 8; } 75 76 /// \returns Lgkmcnt bit width. 77 unsigned getLgkmcntBitWidth() { return 4; } 78 79 } // namespace anonymous 80 81 namespace llvm { 82 namespace AMDGPU { 83 84 namespace IsaInfo { 85 86 IsaVersion getIsaVersion(const FeatureBitset &Features) { 87 // CI. 88 if (Features.test(FeatureISAVersion7_0_0)) 89 return {7, 0, 0}; 90 if (Features.test(FeatureISAVersion7_0_1)) 91 return {7, 0, 1}; 92 if (Features.test(FeatureISAVersion7_0_2)) 93 return {7, 0, 2}; 94 95 // VI. 96 if (Features.test(FeatureISAVersion8_0_0)) 97 return {8, 0, 0}; 98 if (Features.test(FeatureISAVersion8_0_1)) 99 return {8, 0, 1}; 100 if (Features.test(FeatureISAVersion8_0_2)) 101 return {8, 0, 2}; 102 if (Features.test(FeatureISAVersion8_0_3)) 103 return {8, 0, 3}; 104 if (Features.test(FeatureISAVersion8_0_4)) 105 return {8, 0, 4}; 106 if (Features.test(FeatureISAVersion8_1_0)) 107 return {8, 1, 0}; 108 109 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 110 return {0, 0, 0}; 111 return {7, 0, 0}; 112 } 113 114 unsigned getWavefrontSize(const FeatureBitset &Features) { 115 if (Features.test(FeatureWavefrontSize16)) 116 return 16; 117 if (Features.test(FeatureWavefrontSize32)) 118 return 32; 119 120 return 64; 121 } 122 123 unsigned getLocalMemorySize(const FeatureBitset &Features) { 124 if (Features.test(FeatureLocalMemorySize32768)) 125 return 32768; 126 if (Features.test(FeatureLocalMemorySize65536)) 127 return 65536; 128 129 return 0; 130 } 131 132 unsigned getEUsPerCU(const FeatureBitset &Features) { 133 return 4; 134 } 135 136 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 137 unsigned FlatWorkGroupSize) { 138 if (!Features.test(FeatureGCN)) 139 return 8; 140 return getWavesPerWorkGroup(Features, FlatWorkGroupSize) == 1 ? 40 : 16; 141 } 142 143 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 144 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 145 } 146 147 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 148 unsigned FlatWorkGroupSize) { 149 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 150 } 151 152 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 153 return 1; 154 } 155 156 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 157 if (!Features.test(FeatureGCN)) 158 return 8; 159 // FIXME: Need to take scratch memory into account. 160 return 10; 161 } 162 163 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 164 unsigned FlatWorkGroupSize) { 165 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 166 getEUsPerCU(Features)) / getEUsPerCU(Features); 167 } 168 169 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 170 return 1; 171 } 172 173 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 174 return 2048; 175 } 176 177 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 178 unsigned FlatWorkGroupSize) { 179 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 180 getWavefrontSize(Features); 181 } 182 183 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 184 IsaVersion Version = getIsaVersion(Features); 185 if (Version.Major >= 8) 186 return 16; 187 return 8; 188 } 189 190 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 191 return 8; 192 } 193 194 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 195 IsaVersion Version = getIsaVersion(Features); 196 if (Version.Major >= 8) 197 return 800; 198 return 512; 199 } 200 201 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 202 if (Features.test(FeatureSGPRInitBug)) 203 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 204 205 IsaVersion Version = getIsaVersion(Features); 206 if (Version.Major >= 8) 207 return 102; 208 return 104; 209 } 210 211 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 212 assert(WavesPerEU != 0); 213 214 if (WavesPerEU >= getMaxWavesPerEU(Features)) 215 return 0; 216 unsigned MinNumSGPRs = 217 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 218 getSGPRAllocGranule(Features)) + 1; 219 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 220 } 221 222 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 223 bool Addressable) { 224 assert(WavesPerEU != 0); 225 226 IsaVersion Version = getIsaVersion(Features); 227 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 228 getSGPRAllocGranule(Features)); 229 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 230 if (Version.Major >= 8 && !Addressable) 231 AddressableNumSGPRs = 112; 232 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 233 } 234 235 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 236 return 4; 237 } 238 239 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 240 return getVGPRAllocGranule(Features); 241 } 242 243 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 244 return 256; 245 } 246 247 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 248 return getTotalNumVGPRs(Features); 249 } 250 251 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 252 assert(WavesPerEU != 0); 253 254 if (WavesPerEU >= getMaxWavesPerEU(Features)) 255 return 0; 256 unsigned MinNumVGPRs = 257 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 258 getVGPRAllocGranule(Features)) + 1; 259 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 260 } 261 262 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 263 assert(WavesPerEU != 0); 264 265 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 266 getVGPRAllocGranule(Features)); 267 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 268 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 269 } 270 271 } // namespace IsaInfo 272 273 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 274 const FeatureBitset &Features) { 275 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 276 277 memset(&Header, 0, sizeof(Header)); 278 279 Header.amd_kernel_code_version_major = 1; 280 Header.amd_kernel_code_version_minor = 0; 281 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 282 Header.amd_machine_version_major = ISA.Major; 283 Header.amd_machine_version_minor = ISA.Minor; 284 Header.amd_machine_version_stepping = ISA.Stepping; 285 Header.kernel_code_entry_byte_offset = sizeof(Header); 286 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 287 Header.wavefront_size = 6; 288 289 // If the code object does not support indirect functions, then the value must 290 // be 0xffffffff. 291 Header.call_convention = -1; 292 293 // These alignment values are specified in powers of two, so alignment = 294 // 2^n. The minimum alignment is 2^4 = 16. 295 Header.kernarg_segment_alignment = 4; 296 Header.group_segment_alignment = 4; 297 Header.private_segment_alignment = 4; 298 } 299 300 MCSection *getHSATextSection(MCContext &Ctx) { 301 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, 302 ELF::SHF_ALLOC | ELF::SHF_WRITE | 303 ELF::SHF_EXECINSTR | 304 ELF::SHF_AMDGPU_HSA_AGENT | 305 ELF::SHF_AMDGPU_HSA_CODE); 306 } 307 308 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { 309 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, 310 ELF::SHF_ALLOC | ELF::SHF_WRITE | 311 ELF::SHF_AMDGPU_HSA_GLOBAL | 312 ELF::SHF_AMDGPU_HSA_AGENT); 313 } 314 315 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { 316 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, 317 ELF::SHF_ALLOC | ELF::SHF_WRITE | 318 ELF::SHF_AMDGPU_HSA_GLOBAL); 319 } 320 321 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { 322 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, 323 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | 324 ELF::SHF_AMDGPU_HSA_AGENT); 325 } 326 327 bool isGroupSegment(const GlobalValue *GV) { 328 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 329 } 330 331 bool isGlobalSegment(const GlobalValue *GV) { 332 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 333 } 334 335 bool isReadOnlySegment(const GlobalValue *GV) { 336 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 337 } 338 339 bool shouldEmitConstantsToTextSection(const Triple &TT) { 340 return TT.getOS() != Triple::AMDHSA; 341 } 342 343 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 344 Attribute A = F.getFnAttribute(Name); 345 int Result = Default; 346 347 if (A.isStringAttribute()) { 348 StringRef Str = A.getValueAsString(); 349 if (Str.getAsInteger(0, Result)) { 350 LLVMContext &Ctx = F.getContext(); 351 Ctx.emitError("can't parse integer attribute " + Name); 352 } 353 } 354 355 return Result; 356 } 357 358 std::pair<int, int> getIntegerPairAttribute(const Function &F, 359 StringRef Name, 360 std::pair<int, int> Default, 361 bool OnlyFirstRequired) { 362 Attribute A = F.getFnAttribute(Name); 363 if (!A.isStringAttribute()) 364 return Default; 365 366 LLVMContext &Ctx = F.getContext(); 367 std::pair<int, int> Ints = Default; 368 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 369 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 370 Ctx.emitError("can't parse first integer attribute " + Name); 371 return Default; 372 } 373 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 374 if (!OnlyFirstRequired || Strs.second.trim().size()) { 375 Ctx.emitError("can't parse second integer attribute " + Name); 376 return Default; 377 } 378 } 379 380 return Ints; 381 } 382 383 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 384 return (1 << getVmcntBitWidth()) - 1; 385 } 386 387 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 388 return (1 << getExpcntBitWidth()) - 1; 389 } 390 391 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 392 return (1 << getLgkmcntBitWidth()) - 1; 393 } 394 395 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 396 unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); 397 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 398 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 399 return Vmcnt | Expcnt | Lgkmcnt; 400 } 401 402 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 403 return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); 404 } 405 406 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 407 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 408 } 409 410 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 411 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 412 } 413 414 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 415 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 416 Vmcnt = decodeVmcnt(Version, Waitcnt); 417 Expcnt = decodeExpcnt(Version, Waitcnt); 418 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 419 } 420 421 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 422 unsigned Vmcnt) { 423 return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); 424 } 425 426 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 427 unsigned Expcnt) { 428 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 429 } 430 431 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 432 unsigned Lgkmcnt) { 433 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 434 } 435 436 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 437 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 438 unsigned Waitcnt = getWaitcntBitMask(Version); 439 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 440 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 441 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 442 return Waitcnt; 443 } 444 445 unsigned getInitialPSInputAddr(const Function &F) { 446 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 447 } 448 449 bool isShader(CallingConv::ID cc) { 450 switch(cc) { 451 case CallingConv::AMDGPU_VS: 452 case CallingConv::AMDGPU_GS: 453 case CallingConv::AMDGPU_PS: 454 case CallingConv::AMDGPU_CS: 455 return true; 456 default: 457 return false; 458 } 459 } 460 461 bool isCompute(CallingConv::ID cc) { 462 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 463 } 464 465 bool isSI(const MCSubtargetInfo &STI) { 466 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 467 } 468 469 bool isCI(const MCSubtargetInfo &STI) { 470 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 471 } 472 473 bool isVI(const MCSubtargetInfo &STI) { 474 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 475 } 476 477 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 478 479 switch(Reg) { 480 default: break; 481 case AMDGPU::FLAT_SCR: 482 assert(!isSI(STI)); 483 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 484 485 case AMDGPU::FLAT_SCR_LO: 486 assert(!isSI(STI)); 487 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 488 489 case AMDGPU::FLAT_SCR_HI: 490 assert(!isSI(STI)); 491 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 492 } 493 return Reg; 494 } 495 496 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 497 assert(OpNo < Desc.NumOperands); 498 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 499 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 500 OpType <= AMDGPU::OPERAND_SRC_LAST; 501 } 502 503 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 504 assert(OpNo < Desc.NumOperands); 505 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 506 switch (OpType) { 507 case AMDGPU::OPERAND_REG_IMM_FP32: 508 case AMDGPU::OPERAND_REG_IMM_FP64: 509 case AMDGPU::OPERAND_REG_IMM_FP16: 510 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 511 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 512 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 513 return true; 514 default: 515 return false; 516 } 517 } 518 519 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 520 assert(OpNo < Desc.NumOperands); 521 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 522 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 523 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 524 } 525 526 // Avoid using MCRegisterClass::getSize, since that function will go away 527 // (move from MC* level to Target* level). Return size in bits. 528 unsigned getRegBitWidth(unsigned RCID) { 529 switch (RCID) { 530 case AMDGPU::SGPR_32RegClassID: 531 case AMDGPU::VGPR_32RegClassID: 532 case AMDGPU::VS_32RegClassID: 533 case AMDGPU::SReg_32RegClassID: 534 case AMDGPU::SReg_32_XM0RegClassID: 535 return 32; 536 case AMDGPU::SGPR_64RegClassID: 537 case AMDGPU::VS_64RegClassID: 538 case AMDGPU::SReg_64RegClassID: 539 case AMDGPU::VReg_64RegClassID: 540 return 64; 541 case AMDGPU::VReg_96RegClassID: 542 return 96; 543 case AMDGPU::SGPR_128RegClassID: 544 case AMDGPU::SReg_128RegClassID: 545 case AMDGPU::VReg_128RegClassID: 546 return 128; 547 case AMDGPU::SReg_256RegClassID: 548 case AMDGPU::VReg_256RegClassID: 549 return 256; 550 case AMDGPU::SReg_512RegClassID: 551 case AMDGPU::VReg_512RegClassID: 552 return 512; 553 default: 554 llvm_unreachable("Unexpected register class"); 555 } 556 } 557 558 unsigned getRegBitWidth(const MCRegisterClass &RC) { 559 return getRegBitWidth(RC.getID()); 560 } 561 562 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 563 unsigned OpNo) { 564 assert(OpNo < Desc.NumOperands); 565 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 566 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 567 } 568 569 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 570 if (Literal >= -16 && Literal <= 64) 571 return true; 572 573 uint64_t Val = static_cast<uint64_t>(Literal); 574 return (Val == DoubleToBits(0.0)) || 575 (Val == DoubleToBits(1.0)) || 576 (Val == DoubleToBits(-1.0)) || 577 (Val == DoubleToBits(0.5)) || 578 (Val == DoubleToBits(-0.5)) || 579 (Val == DoubleToBits(2.0)) || 580 (Val == DoubleToBits(-2.0)) || 581 (Val == DoubleToBits(4.0)) || 582 (Val == DoubleToBits(-4.0)) || 583 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 584 } 585 586 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 587 if (Literal >= -16 && Literal <= 64) 588 return true; 589 590 // The actual type of the operand does not seem to matter as long 591 // as the bits match one of the inline immediate values. For example: 592 // 593 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 594 // so it is a legal inline immediate. 595 // 596 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 597 // floating-point, so it is a legal inline immediate. 598 599 uint32_t Val = static_cast<uint32_t>(Literal); 600 return (Val == FloatToBits(0.0f)) || 601 (Val == FloatToBits(1.0f)) || 602 (Val == FloatToBits(-1.0f)) || 603 (Val == FloatToBits(0.5f)) || 604 (Val == FloatToBits(-0.5f)) || 605 (Val == FloatToBits(2.0f)) || 606 (Val == FloatToBits(-2.0f)) || 607 (Val == FloatToBits(4.0f)) || 608 (Val == FloatToBits(-4.0f)) || 609 (Val == 0x3e22f983 && HasInv2Pi); 610 } 611 612 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 613 if (!HasInv2Pi) 614 return false; 615 616 if (Literal >= -16 && Literal <= 64) 617 return true; 618 619 uint16_t Val = static_cast<uint16_t>(Literal); 620 return Val == 0x3C00 || // 1.0 621 Val == 0xBC00 || // -1.0 622 Val == 0x3800 || // 0.5 623 Val == 0xB800 || // -0.5 624 Val == 0x4000 || // 2.0 625 Val == 0xC000 || // -2.0 626 Val == 0x4400 || // 4.0 627 Val == 0xC400 || // -4.0 628 Val == 0x3118; // 1/2pi 629 } 630 631 bool isUniformMMO(const MachineMemOperand *MMO) { 632 const Value *Ptr = MMO->getValue(); 633 // UndefValue means this is a load of a kernel input. These are uniform. 634 // Sometimes LDS instructions have constant pointers. 635 // If Ptr is null, then that means this mem operand contains a 636 // PseudoSourceValue like GOT. 637 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 638 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 639 return true; 640 641 const Instruction *I = dyn_cast<Instruction>(Ptr); 642 return I && I->getMetadata("amdgpu.uniform"); 643 } 644 645 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 646 if (isSI(ST) || isCI(ST)) 647 return ByteOffset >> 2; 648 649 return ByteOffset; 650 } 651 652 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 653 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 654 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 655 isUInt<20>(EncodedOffset); 656 } 657 658 } // End namespace AMDGPU 659 } // End namespace llvm 660