1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "AMDGPUBaseInfo.h" 11 #include "AMDGPU.h" 12 #include "SIDefines.h" 13 #include "llvm/ADT/StringRef.h" 14 #include "llvm/ADT/Triple.h" 15 #include "llvm/BinaryFormat/ELF.h" 16 #include "llvm/CodeGen/MachineMemOperand.h" 17 #include "llvm/IR/Attributes.h" 18 #include "llvm/IR/Constants.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/GlobalValue.h" 21 #include "llvm/IR/Instruction.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/MC/MCContext.h" 25 #include "llvm/MC/MCInstrDesc.h" 26 #include "llvm/MC/MCRegisterInfo.h" 27 #include "llvm/MC/MCSectionELF.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/MC/SubtargetFeature.h" 30 #include "llvm/Support/Casting.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include <algorithm> 34 #include <cassert> 35 #include <cstdint> 36 #include <cstring> 37 #include <utility> 38 39 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 40 41 #define GET_INSTRINFO_NAMED_OPS 42 #include "AMDGPUGenInstrInfo.inc" 43 #undef GET_INSTRINFO_NAMED_OPS 44 45 namespace { 46 47 /// \returns Bit mask for given bit \p Shift and bit \p Width. 48 unsigned getBitMask(unsigned Shift, unsigned Width) { 49 return ((1 << Width) - 1) << Shift; 50 } 51 52 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 53 /// 54 /// \returns Packed \p Dst. 55 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 56 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 57 Dst |= (Src << Shift) & getBitMask(Shift, Width); 58 return Dst; 59 } 60 61 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 62 /// 63 /// \returns Unpacked bits. 64 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 65 return (Src & getBitMask(Shift, Width)) >> Shift; 66 } 67 68 /// \returns Vmcnt bit shift (lower bits). 69 unsigned getVmcntBitShiftLo() { return 0; } 70 71 /// \returns Vmcnt bit width (lower bits). 72 unsigned getVmcntBitWidthLo() { return 4; } 73 74 /// \returns Expcnt bit shift. 75 unsigned getExpcntBitShift() { return 4; } 76 77 /// \returns Expcnt bit width. 78 unsigned getExpcntBitWidth() { return 3; } 79 80 /// \returns Lgkmcnt bit shift. 81 unsigned getLgkmcntBitShift() { return 8; } 82 83 /// \returns Lgkmcnt bit width. 84 unsigned getLgkmcntBitWidth() { return 4; } 85 86 /// \returns Vmcnt bit shift (higher bits). 87 unsigned getVmcntBitShiftHi() { return 14; } 88 89 /// \returns Vmcnt bit width (higher bits). 90 unsigned getVmcntBitWidthHi() { return 2; } 91 92 } // end namespace anonymous 93 94 namespace llvm { 95 96 static cl::opt<bool> EnablePackedInlinableLiterals( 97 "enable-packed-inlinable-literals", 98 cl::desc("Enable packed inlinable literals (v2f16, v2i16)"), 99 cl::init(false)); 100 101 namespace AMDGPU { 102 103 namespace IsaInfo { 104 105 IsaVersion getIsaVersion(const FeatureBitset &Features) { 106 // CI. 107 if (Features.test(FeatureISAVersion7_0_0)) 108 return {7, 0, 0}; 109 if (Features.test(FeatureISAVersion7_0_1)) 110 return {7, 0, 1}; 111 if (Features.test(FeatureISAVersion7_0_2)) 112 return {7, 0, 2}; 113 114 // VI. 115 if (Features.test(FeatureISAVersion8_0_0)) 116 return {8, 0, 0}; 117 if (Features.test(FeatureISAVersion8_0_1)) 118 return {8, 0, 1}; 119 if (Features.test(FeatureISAVersion8_0_2)) 120 return {8, 0, 2}; 121 if (Features.test(FeatureISAVersion8_0_3)) 122 return {8, 0, 3}; 123 if (Features.test(FeatureISAVersion8_0_4)) 124 return {8, 0, 4}; 125 if (Features.test(FeatureISAVersion8_1_0)) 126 return {8, 1, 0}; 127 128 // GFX9. 129 if (Features.test(FeatureISAVersion9_0_0)) 130 return {9, 0, 0}; 131 if (Features.test(FeatureISAVersion9_0_1)) 132 return {9, 0, 1}; 133 134 if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands)) 135 return {0, 0, 0}; 136 return {7, 0, 0}; 137 } 138 139 unsigned getWavefrontSize(const FeatureBitset &Features) { 140 if (Features.test(FeatureWavefrontSize16)) 141 return 16; 142 if (Features.test(FeatureWavefrontSize32)) 143 return 32; 144 145 return 64; 146 } 147 148 unsigned getLocalMemorySize(const FeatureBitset &Features) { 149 if (Features.test(FeatureLocalMemorySize32768)) 150 return 32768; 151 if (Features.test(FeatureLocalMemorySize65536)) 152 return 65536; 153 154 return 0; 155 } 156 157 unsigned getEUsPerCU(const FeatureBitset &Features) { 158 return 4; 159 } 160 161 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features, 162 unsigned FlatWorkGroupSize) { 163 if (!Features.test(FeatureGCN)) 164 return 8; 165 unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize); 166 if (N == 1) 167 return 40; 168 N = 40 / N; 169 return std::min(N, 16u); 170 } 171 172 unsigned getMaxWavesPerCU(const FeatureBitset &Features) { 173 return getMaxWavesPerEU(Features) * getEUsPerCU(Features); 174 } 175 176 unsigned getMaxWavesPerCU(const FeatureBitset &Features, 177 unsigned FlatWorkGroupSize) { 178 return getWavesPerWorkGroup(Features, FlatWorkGroupSize); 179 } 180 181 unsigned getMinWavesPerEU(const FeatureBitset &Features) { 182 return 1; 183 } 184 185 unsigned getMaxWavesPerEU(const FeatureBitset &Features) { 186 if (!Features.test(FeatureGCN)) 187 return 8; 188 // FIXME: Need to take scratch memory into account. 189 return 10; 190 } 191 192 unsigned getMaxWavesPerEU(const FeatureBitset &Features, 193 unsigned FlatWorkGroupSize) { 194 return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize), 195 getEUsPerCU(Features)) / getEUsPerCU(Features); 196 } 197 198 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) { 199 return 1; 200 } 201 202 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) { 203 return 2048; 204 } 205 206 unsigned getWavesPerWorkGroup(const FeatureBitset &Features, 207 unsigned FlatWorkGroupSize) { 208 return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) / 209 getWavefrontSize(Features); 210 } 211 212 unsigned getSGPRAllocGranule(const FeatureBitset &Features) { 213 IsaVersion Version = getIsaVersion(Features); 214 if (Version.Major >= 8) 215 return 16; 216 return 8; 217 } 218 219 unsigned getSGPREncodingGranule(const FeatureBitset &Features) { 220 return 8; 221 } 222 223 unsigned getTotalNumSGPRs(const FeatureBitset &Features) { 224 IsaVersion Version = getIsaVersion(Features); 225 if (Version.Major >= 8) 226 return 800; 227 return 512; 228 } 229 230 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) { 231 if (Features.test(FeatureSGPRInitBug)) 232 return FIXED_NUM_SGPRS_FOR_INIT_BUG; 233 234 IsaVersion Version = getIsaVersion(Features); 235 if (Version.Major >= 8) 236 return 102; 237 return 104; 238 } 239 240 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 241 assert(WavesPerEU != 0); 242 243 if (WavesPerEU >= getMaxWavesPerEU(Features)) 244 return 0; 245 unsigned MinNumSGPRs = 246 alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1), 247 getSGPRAllocGranule(Features)) + 1; 248 return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features)); 249 } 250 251 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU, 252 bool Addressable) { 253 assert(WavesPerEU != 0); 254 255 IsaVersion Version = getIsaVersion(Features); 256 unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU, 257 getSGPRAllocGranule(Features)); 258 unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features); 259 if (Version.Major >= 8 && !Addressable) 260 AddressableNumSGPRs = 112; 261 return std::min(MaxNumSGPRs, AddressableNumSGPRs); 262 } 263 264 unsigned getVGPRAllocGranule(const FeatureBitset &Features) { 265 return 4; 266 } 267 268 unsigned getVGPREncodingGranule(const FeatureBitset &Features) { 269 return getVGPRAllocGranule(Features); 270 } 271 272 unsigned getTotalNumVGPRs(const FeatureBitset &Features) { 273 return 256; 274 } 275 276 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) { 277 return getTotalNumVGPRs(Features); 278 } 279 280 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 281 assert(WavesPerEU != 0); 282 283 if (WavesPerEU >= getMaxWavesPerEU(Features)) 284 return 0; 285 unsigned MinNumVGPRs = 286 alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1), 287 getVGPRAllocGranule(Features)) + 1; 288 return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features)); 289 } 290 291 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) { 292 assert(WavesPerEU != 0); 293 294 unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU, 295 getVGPRAllocGranule(Features)); 296 unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features); 297 return std::min(MaxNumVGPRs, AddressableNumVGPRs); 298 } 299 300 } // end namespace IsaInfo 301 302 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 303 const FeatureBitset &Features) { 304 IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features); 305 306 memset(&Header, 0, sizeof(Header)); 307 308 Header.amd_kernel_code_version_major = 1; 309 Header.amd_kernel_code_version_minor = 1; 310 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 311 Header.amd_machine_version_major = ISA.Major; 312 Header.amd_machine_version_minor = ISA.Minor; 313 Header.amd_machine_version_stepping = ISA.Stepping; 314 Header.kernel_code_entry_byte_offset = sizeof(Header); 315 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 316 Header.wavefront_size = 6; 317 318 // If the code object does not support indirect functions, then the value must 319 // be 0xffffffff. 320 Header.call_convention = -1; 321 322 // These alignment values are specified in powers of two, so alignment = 323 // 2^n. The minimum alignment is 2^4 = 16. 324 Header.kernarg_segment_alignment = 4; 325 Header.group_segment_alignment = 4; 326 Header.private_segment_alignment = 4; 327 } 328 329 bool isGroupSegment(const GlobalValue *GV, AMDGPUAS AS) { 330 return GV->getType()->getAddressSpace() == AS.LOCAL_ADDRESS; 331 } 332 333 bool isGlobalSegment(const GlobalValue *GV, AMDGPUAS AS) { 334 return GV->getType()->getAddressSpace() == AS.GLOBAL_ADDRESS; 335 } 336 337 bool isReadOnlySegment(const GlobalValue *GV, AMDGPUAS AS) { 338 return GV->getType()->getAddressSpace() == AS.CONSTANT_ADDRESS; 339 } 340 341 bool shouldEmitConstantsToTextSection(const Triple &TT) { 342 return TT.getOS() != Triple::AMDHSA; 343 } 344 345 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 346 Attribute A = F.getFnAttribute(Name); 347 int Result = Default; 348 349 if (A.isStringAttribute()) { 350 StringRef Str = A.getValueAsString(); 351 if (Str.getAsInteger(0, Result)) { 352 LLVMContext &Ctx = F.getContext(); 353 Ctx.emitError("can't parse integer attribute " + Name); 354 } 355 } 356 357 return Result; 358 } 359 360 std::pair<int, int> getIntegerPairAttribute(const Function &F, 361 StringRef Name, 362 std::pair<int, int> Default, 363 bool OnlyFirstRequired) { 364 Attribute A = F.getFnAttribute(Name); 365 if (!A.isStringAttribute()) 366 return Default; 367 368 LLVMContext &Ctx = F.getContext(); 369 std::pair<int, int> Ints = Default; 370 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 371 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 372 Ctx.emitError("can't parse first integer attribute " + Name); 373 return Default; 374 } 375 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 376 if (!OnlyFirstRequired || !Strs.second.trim().empty()) { 377 Ctx.emitError("can't parse second integer attribute " + Name); 378 return Default; 379 } 380 } 381 382 return Ints; 383 } 384 385 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) { 386 unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1; 387 if (Version.Major < 9) 388 return VmcntLo; 389 390 unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo(); 391 return VmcntLo | VmcntHi; 392 } 393 394 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) { 395 return (1 << getExpcntBitWidth()) - 1; 396 } 397 398 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) { 399 return (1 << getLgkmcntBitWidth()) - 1; 400 } 401 402 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) { 403 unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo()); 404 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 405 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 406 unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt; 407 if (Version.Major < 9) 408 return Waitcnt; 409 410 unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi()); 411 return Waitcnt | VmcntHi; 412 } 413 414 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 415 unsigned VmcntLo = 416 unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 417 if (Version.Major < 9) 418 return VmcntLo; 419 420 unsigned VmcntHi = 421 unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 422 VmcntHi <<= getVmcntBitWidthLo(); 423 return VmcntLo | VmcntHi; 424 } 425 426 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 427 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 428 } 429 430 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) { 431 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 432 } 433 434 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 435 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 436 Vmcnt = decodeVmcnt(Version, Waitcnt); 437 Expcnt = decodeExpcnt(Version, Waitcnt); 438 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 439 } 440 441 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 442 unsigned Vmcnt) { 443 Waitcnt = 444 packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo()); 445 if (Version.Major < 9) 446 return Waitcnt; 447 448 Vmcnt >>= getVmcntBitWidthLo(); 449 return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi()); 450 } 451 452 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 453 unsigned Expcnt) { 454 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 455 } 456 457 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt, 458 unsigned Lgkmcnt) { 459 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 460 } 461 462 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version, 463 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 464 unsigned Waitcnt = getWaitcntBitMask(Version); 465 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 466 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 467 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 468 return Waitcnt; 469 } 470 471 unsigned getInitialPSInputAddr(const Function &F) { 472 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 473 } 474 475 bool isShader(CallingConv::ID cc) { 476 switch(cc) { 477 case CallingConv::AMDGPU_VS: 478 case CallingConv::AMDGPU_HS: 479 case CallingConv::AMDGPU_GS: 480 case CallingConv::AMDGPU_PS: 481 case CallingConv::AMDGPU_CS: 482 return true; 483 default: 484 return false; 485 } 486 } 487 488 bool isCompute(CallingConv::ID cc) { 489 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 490 } 491 492 bool isEntryFunctionCC(CallingConv::ID CC) { 493 switch (CC) { 494 case CallingConv::AMDGPU_KERNEL: 495 case CallingConv::SPIR_KERNEL: 496 case CallingConv::AMDGPU_VS: 497 case CallingConv::AMDGPU_GS: 498 case CallingConv::AMDGPU_PS: 499 case CallingConv::AMDGPU_CS: 500 case CallingConv::AMDGPU_HS: 501 return true; 502 default: 503 return false; 504 } 505 } 506 507 bool isSI(const MCSubtargetInfo &STI) { 508 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 509 } 510 511 bool isCI(const MCSubtargetInfo &STI) { 512 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 513 } 514 515 bool isVI(const MCSubtargetInfo &STI) { 516 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 517 } 518 519 bool isGFX9(const MCSubtargetInfo &STI) { 520 return STI.getFeatureBits()[AMDGPU::FeatureGFX9]; 521 } 522 523 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) { 524 const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID); 525 const unsigned FirstSubReg = TRI->getSubReg(Reg, 1); 526 return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) || 527 Reg == AMDGPU::SCC; 528 } 529 530 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 531 532 switch(Reg) { 533 default: break; 534 case AMDGPU::FLAT_SCR: 535 assert(!isSI(STI)); 536 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 537 538 case AMDGPU::FLAT_SCR_LO: 539 assert(!isSI(STI)); 540 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 541 542 case AMDGPU::FLAT_SCR_HI: 543 assert(!isSI(STI)); 544 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 545 } 546 return Reg; 547 } 548 549 unsigned mc2PseudoReg(unsigned Reg) { 550 switch (Reg) { 551 case AMDGPU::FLAT_SCR_ci: 552 case AMDGPU::FLAT_SCR_vi: 553 return FLAT_SCR; 554 555 case AMDGPU::FLAT_SCR_LO_ci: 556 case AMDGPU::FLAT_SCR_LO_vi: 557 return AMDGPU::FLAT_SCR_LO; 558 559 case AMDGPU::FLAT_SCR_HI_ci: 560 case AMDGPU::FLAT_SCR_HI_vi: 561 return AMDGPU::FLAT_SCR_HI; 562 563 default: 564 return Reg; 565 } 566 } 567 568 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 569 assert(OpNo < Desc.NumOperands); 570 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 571 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 572 OpType <= AMDGPU::OPERAND_SRC_LAST; 573 } 574 575 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 576 assert(OpNo < Desc.NumOperands); 577 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 578 switch (OpType) { 579 case AMDGPU::OPERAND_REG_IMM_FP32: 580 case AMDGPU::OPERAND_REG_IMM_FP64: 581 case AMDGPU::OPERAND_REG_IMM_FP16: 582 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 583 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 584 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 585 case AMDGPU::OPERAND_REG_INLINE_C_V2FP16: 586 return true; 587 default: 588 return false; 589 } 590 } 591 592 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 593 assert(OpNo < Desc.NumOperands); 594 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 595 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 596 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 597 } 598 599 // Avoid using MCRegisterClass::getSize, since that function will go away 600 // (move from MC* level to Target* level). Return size in bits. 601 unsigned getRegBitWidth(unsigned RCID) { 602 switch (RCID) { 603 case AMDGPU::SGPR_32RegClassID: 604 case AMDGPU::VGPR_32RegClassID: 605 case AMDGPU::VS_32RegClassID: 606 case AMDGPU::SReg_32RegClassID: 607 case AMDGPU::SReg_32_XM0RegClassID: 608 return 32; 609 case AMDGPU::SGPR_64RegClassID: 610 case AMDGPU::VS_64RegClassID: 611 case AMDGPU::SReg_64RegClassID: 612 case AMDGPU::VReg_64RegClassID: 613 return 64; 614 case AMDGPU::VReg_96RegClassID: 615 return 96; 616 case AMDGPU::SGPR_128RegClassID: 617 case AMDGPU::SReg_128RegClassID: 618 case AMDGPU::VReg_128RegClassID: 619 return 128; 620 case AMDGPU::SReg_256RegClassID: 621 case AMDGPU::VReg_256RegClassID: 622 return 256; 623 case AMDGPU::SReg_512RegClassID: 624 case AMDGPU::VReg_512RegClassID: 625 return 512; 626 default: 627 llvm_unreachable("Unexpected register class"); 628 } 629 } 630 631 unsigned getRegBitWidth(const MCRegisterClass &RC) { 632 return getRegBitWidth(RC.getID()); 633 } 634 635 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 636 unsigned OpNo) { 637 assert(OpNo < Desc.NumOperands); 638 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 639 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 640 } 641 642 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 643 if (Literal >= -16 && Literal <= 64) 644 return true; 645 646 uint64_t Val = static_cast<uint64_t>(Literal); 647 return (Val == DoubleToBits(0.0)) || 648 (Val == DoubleToBits(1.0)) || 649 (Val == DoubleToBits(-1.0)) || 650 (Val == DoubleToBits(0.5)) || 651 (Val == DoubleToBits(-0.5)) || 652 (Val == DoubleToBits(2.0)) || 653 (Val == DoubleToBits(-2.0)) || 654 (Val == DoubleToBits(4.0)) || 655 (Val == DoubleToBits(-4.0)) || 656 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 657 } 658 659 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 660 if (Literal >= -16 && Literal <= 64) 661 return true; 662 663 // The actual type of the operand does not seem to matter as long 664 // as the bits match one of the inline immediate values. For example: 665 // 666 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 667 // so it is a legal inline immediate. 668 // 669 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 670 // floating-point, so it is a legal inline immediate. 671 672 uint32_t Val = static_cast<uint32_t>(Literal); 673 return (Val == FloatToBits(0.0f)) || 674 (Val == FloatToBits(1.0f)) || 675 (Val == FloatToBits(-1.0f)) || 676 (Val == FloatToBits(0.5f)) || 677 (Val == FloatToBits(-0.5f)) || 678 (Val == FloatToBits(2.0f)) || 679 (Val == FloatToBits(-2.0f)) || 680 (Val == FloatToBits(4.0f)) || 681 (Val == FloatToBits(-4.0f)) || 682 (Val == 0x3e22f983 && HasInv2Pi); 683 } 684 685 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 686 if (!HasInv2Pi) 687 return false; 688 689 if (Literal >= -16 && Literal <= 64) 690 return true; 691 692 uint16_t Val = static_cast<uint16_t>(Literal); 693 return Val == 0x3C00 || // 1.0 694 Val == 0xBC00 || // -1.0 695 Val == 0x3800 || // 0.5 696 Val == 0xB800 || // -0.5 697 Val == 0x4000 || // 2.0 698 Val == 0xC000 || // -2.0 699 Val == 0x4400 || // 4.0 700 Val == 0xC400 || // -4.0 701 Val == 0x3118; // 1/2pi 702 } 703 704 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) { 705 assert(HasInv2Pi); 706 707 if (!EnablePackedInlinableLiterals) 708 return false; 709 710 int16_t Lo16 = static_cast<int16_t>(Literal); 711 int16_t Hi16 = static_cast<int16_t>(Literal >> 16); 712 return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi); 713 } 714 715 bool isUniformMMO(const MachineMemOperand *MMO) { 716 const Value *Ptr = MMO->getValue(); 717 // UndefValue means this is a load of a kernel input. These are uniform. 718 // Sometimes LDS instructions have constant pointers. 719 // If Ptr is null, then that means this mem operand contains a 720 // PseudoSourceValue like GOT. 721 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 722 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 723 return true; 724 725 const Instruction *I = dyn_cast<Instruction>(Ptr); 726 return I && I->getMetadata("amdgpu.uniform"); 727 } 728 729 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 730 if (isSI(ST) || isCI(ST)) 731 return ByteOffset >> 2; 732 733 return ByteOffset; 734 } 735 736 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 737 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 738 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 739 isUInt<20>(EncodedOffset); 740 } 741 } // end namespace AMDGPU 742 743 } // end namespace llvm 744 745 const unsigned AMDGPUAS::MAX_COMMON_ADDRESS; 746 const unsigned AMDGPUAS::GLOBAL_ADDRESS; 747 const unsigned AMDGPUAS::LOCAL_ADDRESS; 748 const unsigned AMDGPUAS::PARAM_D_ADDRESS; 749 const unsigned AMDGPUAS::PARAM_I_ADDRESS; 750 const unsigned AMDGPUAS::CONSTANT_BUFFER_0; 751 const unsigned AMDGPUAS::CONSTANT_BUFFER_1; 752 const unsigned AMDGPUAS::CONSTANT_BUFFER_2; 753 const unsigned AMDGPUAS::CONSTANT_BUFFER_3; 754 const unsigned AMDGPUAS::CONSTANT_BUFFER_4; 755 const unsigned AMDGPUAS::CONSTANT_BUFFER_5; 756 const unsigned AMDGPUAS::CONSTANT_BUFFER_6; 757 const unsigned AMDGPUAS::CONSTANT_BUFFER_7; 758 const unsigned AMDGPUAS::CONSTANT_BUFFER_8; 759 const unsigned AMDGPUAS::CONSTANT_BUFFER_9; 760 const unsigned AMDGPUAS::CONSTANT_BUFFER_10; 761 const unsigned AMDGPUAS::CONSTANT_BUFFER_11; 762 const unsigned AMDGPUAS::CONSTANT_BUFFER_12; 763 const unsigned AMDGPUAS::CONSTANT_BUFFER_13; 764 const unsigned AMDGPUAS::CONSTANT_BUFFER_14; 765 const unsigned AMDGPUAS::CONSTANT_BUFFER_15; 766 const unsigned AMDGPUAS::UNKNOWN_ADDRESS_SPACE; 767 768 namespace llvm { 769 namespace AMDGPU { 770 771 AMDGPUAS getAMDGPUAS(Triple T) { 772 auto Env = T.getEnvironmentName(); 773 AMDGPUAS AS; 774 if (Env == "amdgiz" || Env == "amdgizcl") { 775 AS.FLAT_ADDRESS = 0; 776 AS.PRIVATE_ADDRESS = 5; 777 AS.REGION_ADDRESS = 4; 778 } 779 else { 780 AS.FLAT_ADDRESS = 4; 781 AS.PRIVATE_ADDRESS = 0; 782 AS.REGION_ADDRESS = 5; 783 } 784 return AS; 785 } 786 787 AMDGPUAS getAMDGPUAS(const TargetMachine &M) { 788 return getAMDGPUAS(M.getTargetTriple()); 789 } 790 791 AMDGPUAS getAMDGPUAS(const Module &M) { 792 return getAMDGPUAS(Triple(M.getTargetTriple())); 793 } 794 } // namespace AMDGPU 795 } // namespace llvm 796