1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 #include "AMDGPUBaseInfo.h" 10 #include "AMDGPU.h" 11 #include "SIDefines.h" 12 #include "llvm/CodeGen/MachineMemOperand.h" 13 #include "llvm/IR/LLVMContext.h" 14 #include "llvm/IR/Constants.h" 15 #include "llvm/IR/Function.h" 16 #include "llvm/IR/GlobalValue.h" 17 #include "llvm/MC/MCContext.h" 18 #include "llvm/MC/MCInstrInfo.h" 19 #include "llvm/MC/MCRegisterInfo.h" 20 #include "llvm/MC/MCSectionELF.h" 21 #include "llvm/MC/MCSubtargetInfo.h" 22 #include "llvm/MC/SubtargetFeature.h" 23 24 #define GET_SUBTARGETINFO_ENUM 25 #include "AMDGPUGenSubtargetInfo.inc" 26 #undef GET_SUBTARGETINFO_ENUM 27 28 #define GET_REGINFO_ENUM 29 #include "AMDGPUGenRegisterInfo.inc" 30 #undef GET_REGINFO_ENUM 31 32 #define GET_INSTRINFO_NAMED_OPS 33 #define GET_INSTRINFO_ENUM 34 #include "AMDGPUGenInstrInfo.inc" 35 #undef GET_INSTRINFO_NAMED_OPS 36 #undef GET_INSTRINFO_ENUM 37 38 namespace { 39 40 /// \returns Bit mask for given bit \p Shift and bit \p Width. 41 unsigned getBitMask(unsigned Shift, unsigned Width) { 42 return ((1 << Width) - 1) << Shift; 43 } 44 45 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width. 46 /// 47 /// \returns Packed \p Dst. 48 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) { 49 Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width); 50 Dst |= (Src << Shift) & getBitMask(Shift, Width); 51 return Dst; 52 } 53 54 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width. 55 /// 56 /// \returns Unpacked bits. 57 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) { 58 return (Src & getBitMask(Shift, Width)) >> Shift; 59 } 60 61 /// \returns Vmcnt bit shift. 62 unsigned getVmcntBitShift() { return 0; } 63 64 /// \returns Vmcnt bit width. 65 unsigned getVmcntBitWidth() { return 4; } 66 67 /// \returns Expcnt bit shift. 68 unsigned getExpcntBitShift() { return 4; } 69 70 /// \returns Expcnt bit width. 71 unsigned getExpcntBitWidth() { return 3; } 72 73 /// \returns Lgkmcnt bit shift. 74 unsigned getLgkmcntBitShift() { return 8; } 75 76 /// \returns Lgkmcnt bit width. 77 unsigned getLgkmcntBitWidth() { return 4; } 78 79 } // anonymous namespace 80 81 namespace llvm { 82 namespace AMDGPU { 83 84 IsaVersion getIsaVersion(const FeatureBitset &Features) { 85 86 if (Features.test(FeatureISAVersion7_0_0)) 87 return {7, 0, 0}; 88 89 if (Features.test(FeatureISAVersion7_0_1)) 90 return {7, 0, 1}; 91 92 if (Features.test(FeatureISAVersion7_0_2)) 93 return {7, 0, 2}; 94 95 if (Features.test(FeatureISAVersion8_0_0)) 96 return {8, 0, 0}; 97 98 if (Features.test(FeatureISAVersion8_0_1)) 99 return {8, 0, 1}; 100 101 if (Features.test(FeatureISAVersion8_0_2)) 102 return {8, 0, 2}; 103 104 if (Features.test(FeatureISAVersion8_0_3)) 105 return {8, 0, 3}; 106 107 if (Features.test(FeatureISAVersion8_0_4)) 108 return {8, 0, 4}; 109 110 if (Features.test(FeatureISAVersion8_1_0)) 111 return {8, 1, 0}; 112 113 return {0, 0, 0}; 114 } 115 116 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header, 117 const FeatureBitset &Features) { 118 119 IsaVersion ISA = getIsaVersion(Features); 120 121 memset(&Header, 0, sizeof(Header)); 122 123 Header.amd_kernel_code_version_major = 1; 124 Header.amd_kernel_code_version_minor = 0; 125 Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU 126 Header.amd_machine_version_major = ISA.Major; 127 Header.amd_machine_version_minor = ISA.Minor; 128 Header.amd_machine_version_stepping = ISA.Stepping; 129 Header.kernel_code_entry_byte_offset = sizeof(Header); 130 // wavefront_size is specified as a power of 2: 2^6 = 64 threads. 131 Header.wavefront_size = 6; 132 133 // If the code object does not support indirect functions, then the value must 134 // be 0xffffffff. 135 Header.call_convention = -1; 136 137 // These alignment values are specified in powers of two, so alignment = 138 // 2^n. The minimum alignment is 2^4 = 16. 139 Header.kernarg_segment_alignment = 4; 140 Header.group_segment_alignment = 4; 141 Header.private_segment_alignment = 4; 142 } 143 144 MCSection *getHSATextSection(MCContext &Ctx) { 145 return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS, 146 ELF::SHF_ALLOC | ELF::SHF_WRITE | 147 ELF::SHF_EXECINSTR | 148 ELF::SHF_AMDGPU_HSA_AGENT | 149 ELF::SHF_AMDGPU_HSA_CODE); 150 } 151 152 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) { 153 return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS, 154 ELF::SHF_ALLOC | ELF::SHF_WRITE | 155 ELF::SHF_AMDGPU_HSA_GLOBAL | 156 ELF::SHF_AMDGPU_HSA_AGENT); 157 } 158 159 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) { 160 return Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS, 161 ELF::SHF_ALLOC | ELF::SHF_WRITE | 162 ELF::SHF_AMDGPU_HSA_GLOBAL); 163 } 164 165 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) { 166 return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS, 167 ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY | 168 ELF::SHF_AMDGPU_HSA_AGENT); 169 } 170 171 bool isGroupSegment(const GlobalValue *GV) { 172 return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS; 173 } 174 175 bool isGlobalSegment(const GlobalValue *GV) { 176 return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS; 177 } 178 179 bool isReadOnlySegment(const GlobalValue *GV) { 180 return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS; 181 } 182 183 bool shouldEmitConstantsToTextSection(const Triple &TT) { 184 return TT.getOS() != Triple::AMDHSA; 185 } 186 187 int getIntegerAttribute(const Function &F, StringRef Name, int Default) { 188 Attribute A = F.getFnAttribute(Name); 189 int Result = Default; 190 191 if (A.isStringAttribute()) { 192 StringRef Str = A.getValueAsString(); 193 if (Str.getAsInteger(0, Result)) { 194 LLVMContext &Ctx = F.getContext(); 195 Ctx.emitError("can't parse integer attribute " + Name); 196 } 197 } 198 199 return Result; 200 } 201 202 std::pair<int, int> getIntegerPairAttribute(const Function &F, 203 StringRef Name, 204 std::pair<int, int> Default, 205 bool OnlyFirstRequired) { 206 Attribute A = F.getFnAttribute(Name); 207 if (!A.isStringAttribute()) 208 return Default; 209 210 LLVMContext &Ctx = F.getContext(); 211 std::pair<int, int> Ints = Default; 212 std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(','); 213 if (Strs.first.trim().getAsInteger(0, Ints.first)) { 214 Ctx.emitError("can't parse first integer attribute " + Name); 215 return Default; 216 } 217 if (Strs.second.trim().getAsInteger(0, Ints.second)) { 218 if (!OnlyFirstRequired || Strs.second.trim().size()) { 219 Ctx.emitError("can't parse second integer attribute " + Name); 220 return Default; 221 } 222 } 223 224 return Ints; 225 } 226 227 unsigned getWaitcntBitMask(IsaVersion Version) { 228 unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth()); 229 unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth()); 230 unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth()); 231 return Vmcnt | Expcnt | Lgkmcnt; 232 } 233 234 unsigned getVmcntBitMask(IsaVersion Version) { 235 return (1 << getVmcntBitWidth()) - 1; 236 } 237 238 unsigned getExpcntBitMask(IsaVersion Version) { 239 return (1 << getExpcntBitWidth()) - 1; 240 } 241 242 unsigned getLgkmcntBitMask(IsaVersion Version) { 243 return (1 << getLgkmcntBitWidth()) - 1; 244 } 245 246 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) { 247 return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); 248 } 249 250 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) { 251 return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 252 } 253 254 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) { 255 return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 256 } 257 258 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt, 259 unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) { 260 Vmcnt = decodeVmcnt(Version, Waitcnt); 261 Expcnt = decodeExpcnt(Version, Waitcnt); 262 Lgkmcnt = decodeLgkmcnt(Version, Waitcnt); 263 } 264 265 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) { 266 return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth()); 267 } 268 269 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) { 270 return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth()); 271 } 272 273 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) { 274 return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth()); 275 } 276 277 unsigned encodeWaitcnt(IsaVersion Version, 278 unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) { 279 unsigned Waitcnt = getWaitcntBitMask(Version); 280 Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt); 281 Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt); 282 Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt); 283 return Waitcnt; 284 } 285 286 unsigned getInitialPSInputAddr(const Function &F) { 287 return getIntegerAttribute(F, "InitialPSInputAddr", 0); 288 } 289 290 bool isShader(CallingConv::ID cc) { 291 switch(cc) { 292 case CallingConv::AMDGPU_VS: 293 case CallingConv::AMDGPU_GS: 294 case CallingConv::AMDGPU_PS: 295 case CallingConv::AMDGPU_CS: 296 return true; 297 default: 298 return false; 299 } 300 } 301 302 bool isCompute(CallingConv::ID cc) { 303 return !isShader(cc) || cc == CallingConv::AMDGPU_CS; 304 } 305 306 bool isSI(const MCSubtargetInfo &STI) { 307 return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands]; 308 } 309 310 bool isCI(const MCSubtargetInfo &STI) { 311 return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands]; 312 } 313 314 bool isVI(const MCSubtargetInfo &STI) { 315 return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands]; 316 } 317 318 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) { 319 320 switch(Reg) { 321 default: break; 322 case AMDGPU::FLAT_SCR: 323 assert(!isSI(STI)); 324 return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi; 325 326 case AMDGPU::FLAT_SCR_LO: 327 assert(!isSI(STI)); 328 return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi; 329 330 case AMDGPU::FLAT_SCR_HI: 331 assert(!isSI(STI)); 332 return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi; 333 } 334 return Reg; 335 } 336 337 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) { 338 assert(OpNo <= Desc.NumOperands); 339 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 340 return OpType >= AMDGPU::OPERAND_SRC_FIRST && 341 OpType <= AMDGPU::OPERAND_SRC_LAST; 342 } 343 344 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) { 345 assert(OpNo <= Desc.NumOperands); 346 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 347 switch (OpType) { 348 case AMDGPU::OPERAND_REG_IMM_FP32: 349 case AMDGPU::OPERAND_REG_IMM_FP64: 350 case AMDGPU::OPERAND_REG_IMM_FP16: 351 case AMDGPU::OPERAND_REG_INLINE_C_FP32: 352 case AMDGPU::OPERAND_REG_INLINE_C_FP64: 353 case AMDGPU::OPERAND_REG_INLINE_C_FP16: 354 return true; 355 default: 356 return false; 357 } 358 } 359 360 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) { 361 assert(OpNo <= Desc.NumOperands); 362 unsigned OpType = Desc.OpInfo[OpNo].OperandType; 363 return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST && 364 OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST; 365 } 366 367 // Avoid using MCRegisterClass::getSize, since that function will go away 368 // (move from MC* level to Target* level). Return size in bits. 369 unsigned getRegBitWidth(unsigned RCID) { 370 switch (RCID) { 371 case AMDGPU::SGPR_32RegClassID: 372 case AMDGPU::VGPR_32RegClassID: 373 case AMDGPU::VS_32RegClassID: 374 case AMDGPU::SReg_32RegClassID: 375 case AMDGPU::SReg_32_XM0RegClassID: 376 return 32; 377 case AMDGPU::SGPR_64RegClassID: 378 case AMDGPU::VS_64RegClassID: 379 case AMDGPU::SReg_64RegClassID: 380 case AMDGPU::VReg_64RegClassID: 381 return 64; 382 case AMDGPU::VReg_96RegClassID: 383 return 96; 384 case AMDGPU::SGPR_128RegClassID: 385 case AMDGPU::SReg_128RegClassID: 386 case AMDGPU::VReg_128RegClassID: 387 return 128; 388 case AMDGPU::SReg_256RegClassID: 389 case AMDGPU::VReg_256RegClassID: 390 return 256; 391 case AMDGPU::SReg_512RegClassID: 392 case AMDGPU::VReg_512RegClassID: 393 return 512; 394 default: 395 llvm_unreachable("Unexpected register class"); 396 } 397 } 398 399 unsigned getRegBitWidth(const MCRegisterClass &RC) { 400 return getRegBitWidth(RC.getID()); 401 } 402 403 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc, 404 unsigned OpNo) { 405 assert(OpNo <= Desc.NumOperands); 406 unsigned RCID = Desc.OpInfo[OpNo].RegClass; 407 return getRegBitWidth(MRI->getRegClass(RCID)) / 8; 408 } 409 410 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) { 411 if (Literal >= -16 && Literal <= 64) 412 return true; 413 414 uint64_t Val = static_cast<uint64_t>(Literal); 415 return (Val == DoubleToBits(0.0)) || 416 (Val == DoubleToBits(1.0)) || 417 (Val == DoubleToBits(-1.0)) || 418 (Val == DoubleToBits(0.5)) || 419 (Val == DoubleToBits(-0.5)) || 420 (Val == DoubleToBits(2.0)) || 421 (Val == DoubleToBits(-2.0)) || 422 (Val == DoubleToBits(4.0)) || 423 (Val == DoubleToBits(-4.0)) || 424 (Val == 0x3fc45f306dc9c882 && HasInv2Pi); 425 } 426 427 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) { 428 if (Literal >= -16 && Literal <= 64) 429 return true; 430 431 // The actual type of the operand does not seem to matter as long 432 // as the bits match one of the inline immediate values. For example: 433 // 434 // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal, 435 // so it is a legal inline immediate. 436 // 437 // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in 438 // floating-point, so it is a legal inline immediate. 439 440 uint32_t Val = static_cast<uint32_t>(Literal); 441 return (Val == FloatToBits(0.0f)) || 442 (Val == FloatToBits(1.0f)) || 443 (Val == FloatToBits(-1.0f)) || 444 (Val == FloatToBits(0.5f)) || 445 (Val == FloatToBits(-0.5f)) || 446 (Val == FloatToBits(2.0f)) || 447 (Val == FloatToBits(-2.0f)) || 448 (Val == FloatToBits(4.0f)) || 449 (Val == FloatToBits(-4.0f)) || 450 (Val == 0x3e22f983 && HasInv2Pi); 451 } 452 453 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) { 454 if (!HasInv2Pi) 455 return false; 456 457 if (Literal >= -16 && Literal <= 64) 458 return true; 459 460 uint16_t Val = static_cast<uint16_t>(Literal); 461 return Val == 0x3C00 || // 1.0 462 Val == 0xBC00 || // -1.0 463 Val == 0x3800 || // 0.5 464 Val == 0xB800 || // -0.5 465 Val == 0x4000 || // 2.0 466 Val == 0xC000 || // -2.0 467 Val == 0x4400 || // 4.0 468 Val == 0xC400 || // -4.0 469 Val == 0x3118; // 1/2pi 470 } 471 472 bool isUniformMMO(const MachineMemOperand *MMO) { 473 const Value *Ptr = MMO->getValue(); 474 // UndefValue means this is a load of a kernel input. These are uniform. 475 // Sometimes LDS instructions have constant pointers. 476 // If Ptr is null, then that means this mem operand contains a 477 // PseudoSourceValue like GOT. 478 if (!Ptr || isa<UndefValue>(Ptr) || isa<Argument>(Ptr) || 479 isa<Constant>(Ptr) || isa<GlobalValue>(Ptr)) 480 return true; 481 482 const Instruction *I = dyn_cast<Instruction>(Ptr); 483 return I && I->getMetadata("amdgpu.uniform"); 484 } 485 486 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 487 if (isSI(ST) || isCI(ST)) 488 return ByteOffset >> 2; 489 490 return ByteOffset; 491 } 492 493 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) { 494 int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset); 495 return isSI(ST) || isCI(ST) ? isUInt<8>(EncodedOffset) : 496 isUInt<20>(EncodedOffset); 497 } 498 499 } // End namespace AMDGPU 500 } // End namespace llvm 501