1 //===-- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information--------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "SIDefines.h"
12 #include "llvm/IR/LLVMContext.h"
13 #include "llvm/IR/Function.h"
14 #include "llvm/IR/GlobalValue.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCInstrInfo.h"
17 #include "llvm/MC/MCRegisterInfo.h"
18 #include "llvm/MC/MCSectionELF.h"
19 #include "llvm/MC/MCSubtargetInfo.h"
20 #include "llvm/MC/SubtargetFeature.h"
21 
22 #define GET_SUBTARGETINFO_ENUM
23 #include "AMDGPUGenSubtargetInfo.inc"
24 #undef GET_SUBTARGETINFO_ENUM
25 
26 #define GET_REGINFO_ENUM
27 #include "AMDGPUGenRegisterInfo.inc"
28 #undef GET_REGINFO_ENUM
29 
30 #define GET_INSTRINFO_NAMED_OPS
31 #define GET_INSTRINFO_ENUM
32 #include "AMDGPUGenInstrInfo.inc"
33 #undef GET_INSTRINFO_NAMED_OPS
34 #undef GET_INSTRINFO_ENUM
35 
36 namespace {
37 
38 /// \returns Bit mask for given bit \p Shift and bit \p Width.
39 unsigned getBitMask(unsigned Shift, unsigned Width) {
40   return ((1 << Width) - 1) << Shift;
41 }
42 
43 /// \brief Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
44 ///
45 /// \returns Packed \p Dst.
46 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
47   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
48   Dst |= (Src << Shift) & getBitMask(Shift, Width);
49   return Dst;
50 }
51 
52 /// \brief Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
53 ///
54 /// \returns Unpacked bits.
55 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
56   return (Src & getBitMask(Shift, Width)) >> Shift;
57 }
58 
59 /// \returns Vmcnt bit shift.
60 unsigned getVmcntBitShift() { return 0; }
61 
62 /// \returns Vmcnt bit width.
63 unsigned getVmcntBitWidth() { return 4; }
64 
65 /// \returns Expcnt bit shift.
66 unsigned getExpcntBitShift() { return 4; }
67 
68 /// \returns Expcnt bit width.
69 unsigned getExpcntBitWidth() { return 3; }
70 
71 /// \returns Lgkmcnt bit shift.
72 unsigned getLgkmcntBitShift() { return 8; }
73 
74 /// \returns Lgkmcnt bit width.
75 unsigned getLgkmcntBitWidth() { return 4; }
76 
77 } // anonymous namespace
78 
79 namespace llvm {
80 namespace AMDGPU {
81 
82 IsaVersion getIsaVersion(const FeatureBitset &Features) {
83 
84   if (Features.test(FeatureISAVersion7_0_0))
85     return {7, 0, 0};
86 
87   if (Features.test(FeatureISAVersion7_0_1))
88     return {7, 0, 1};
89 
90   if (Features.test(FeatureISAVersion7_0_2))
91     return {7, 0, 2};
92 
93   if (Features.test(FeatureISAVersion8_0_0))
94     return {8, 0, 0};
95 
96   if (Features.test(FeatureISAVersion8_0_1))
97     return {8, 0, 1};
98 
99   if (Features.test(FeatureISAVersion8_0_2))
100     return {8, 0, 2};
101 
102   if (Features.test(FeatureISAVersion8_0_3))
103     return {8, 0, 3};
104 
105   if (Features.test(FeatureISAVersion8_0_4))
106     return {8, 0, 4};
107 
108   if (Features.test(FeatureISAVersion8_1_0))
109     return {8, 1, 0};
110 
111   return {0, 0, 0};
112 }
113 
114 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
115                                const FeatureBitset &Features) {
116 
117   IsaVersion ISA = getIsaVersion(Features);
118 
119   memset(&Header, 0, sizeof(Header));
120 
121   Header.amd_kernel_code_version_major = 1;
122   Header.amd_kernel_code_version_minor = 0;
123   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
124   Header.amd_machine_version_major = ISA.Major;
125   Header.amd_machine_version_minor = ISA.Minor;
126   Header.amd_machine_version_stepping = ISA.Stepping;
127   Header.kernel_code_entry_byte_offset = sizeof(Header);
128   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
129   Header.wavefront_size = 6;
130   // These alignment values are specified in powers of two, so alignment =
131   // 2^n.  The minimum alignment is 2^4 = 16.
132   Header.kernarg_segment_alignment = 4;
133   Header.group_segment_alignment = 4;
134   Header.private_segment_alignment = 4;
135 }
136 
137 MCSection *getHSATextSection(MCContext &Ctx) {
138   return Ctx.getELFSection(".hsatext", ELF::SHT_PROGBITS,
139                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
140                            ELF::SHF_EXECINSTR |
141                            ELF::SHF_AMDGPU_HSA_AGENT |
142                            ELF::SHF_AMDGPU_HSA_CODE);
143 }
144 
145 MCSection *getHSADataGlobalAgentSection(MCContext &Ctx) {
146   return Ctx.getELFSection(".hsadata_global_agent", ELF::SHT_PROGBITS,
147                            ELF::SHF_ALLOC | ELF::SHF_WRITE |
148                            ELF::SHF_AMDGPU_HSA_GLOBAL |
149                            ELF::SHF_AMDGPU_HSA_AGENT);
150 }
151 
152 MCSection *getHSADataGlobalProgramSection(MCContext &Ctx) {
153   return  Ctx.getELFSection(".hsadata_global_program", ELF::SHT_PROGBITS,
154                             ELF::SHF_ALLOC | ELF::SHF_WRITE |
155                             ELF::SHF_AMDGPU_HSA_GLOBAL);
156 }
157 
158 MCSection *getHSARodataReadonlyAgentSection(MCContext &Ctx) {
159   return Ctx.getELFSection(".hsarodata_readonly_agent", ELF::SHT_PROGBITS,
160                            ELF::SHF_ALLOC | ELF::SHF_AMDGPU_HSA_READONLY |
161                            ELF::SHF_AMDGPU_HSA_AGENT);
162 }
163 
164 bool isGroupSegment(const GlobalValue *GV) {
165   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
166 }
167 
168 bool isGlobalSegment(const GlobalValue *GV) {
169   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
170 }
171 
172 bool isReadOnlySegment(const GlobalValue *GV) {
173   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
174 }
175 
176 bool shouldEmitConstantsToTextSection(const Triple &TT) {
177   return TT.getOS() != Triple::AMDHSA;
178 }
179 
180 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
181   Attribute A = F.getFnAttribute(Name);
182   int Result = Default;
183 
184   if (A.isStringAttribute()) {
185     StringRef Str = A.getValueAsString();
186     if (Str.getAsInteger(0, Result)) {
187       LLVMContext &Ctx = F.getContext();
188       Ctx.emitError("can't parse integer attribute " + Name);
189     }
190   }
191 
192   return Result;
193 }
194 
195 std::pair<int, int> getIntegerPairAttribute(const Function &F,
196                                             StringRef Name,
197                                             std::pair<int, int> Default,
198                                             bool OnlyFirstRequired) {
199   Attribute A = F.getFnAttribute(Name);
200   if (!A.isStringAttribute())
201     return Default;
202 
203   LLVMContext &Ctx = F.getContext();
204   std::pair<int, int> Ints = Default;
205   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
206   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
207     Ctx.emitError("can't parse first integer attribute " + Name);
208     return Default;
209   }
210   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
211     if (!OnlyFirstRequired || Strs.second.trim().size()) {
212       Ctx.emitError("can't parse second integer attribute " + Name);
213       return Default;
214     }
215   }
216 
217   return Ints;
218 }
219 
220 unsigned getWaitcntBitMask(IsaVersion Version) {
221   unsigned Vmcnt = getBitMask(getVmcntBitShift(), getVmcntBitWidth());
222   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
223   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
224   return Vmcnt | Expcnt | Lgkmcnt;
225 }
226 
227 unsigned getVmcntBitMask(IsaVersion Version) {
228   return (1 << getVmcntBitWidth()) - 1;
229 }
230 
231 unsigned getExpcntBitMask(IsaVersion Version) {
232   return (1 << getExpcntBitWidth()) - 1;
233 }
234 
235 unsigned getLgkmcntBitMask(IsaVersion Version) {
236   return (1 << getLgkmcntBitWidth()) - 1;
237 }
238 
239 unsigned decodeVmcnt(IsaVersion Version, unsigned Waitcnt) {
240   return unpackBits(Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
241 }
242 
243 unsigned decodeExpcnt(IsaVersion Version, unsigned Waitcnt) {
244   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
245 }
246 
247 unsigned decodeLgkmcnt(IsaVersion Version, unsigned Waitcnt) {
248   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
249 }
250 
251 void decodeWaitcnt(IsaVersion Version, unsigned Waitcnt,
252                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
253   Vmcnt = decodeVmcnt(Version, Waitcnt);
254   Expcnt = decodeExpcnt(Version, Waitcnt);
255   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
256 }
257 
258 unsigned encodeVmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Vmcnt) {
259   return packBits(Vmcnt, Waitcnt, getVmcntBitShift(), getVmcntBitWidth());
260 }
261 
262 unsigned encodeExpcnt(IsaVersion Version, unsigned Waitcnt, unsigned Expcnt) {
263   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
264 }
265 
266 unsigned encodeLgkmcnt(IsaVersion Version, unsigned Waitcnt, unsigned Lgkmcnt) {
267   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
268 }
269 
270 unsigned encodeWaitcnt(IsaVersion Version,
271                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
272   unsigned Waitcnt = getWaitcntBitMask(Version);;
273   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
274   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
275   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
276   return Waitcnt;
277 }
278 
279 unsigned getInitialPSInputAddr(const Function &F) {
280   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
281 }
282 
283 bool isShader(CallingConv::ID cc) {
284   switch(cc) {
285     case CallingConv::AMDGPU_VS:
286     case CallingConv::AMDGPU_GS:
287     case CallingConv::AMDGPU_PS:
288     case CallingConv::AMDGPU_CS:
289       return true;
290     default:
291       return false;
292   }
293 }
294 
295 bool isCompute(CallingConv::ID cc) {
296   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
297 }
298 
299 bool isSI(const MCSubtargetInfo &STI) {
300   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
301 }
302 
303 bool isCI(const MCSubtargetInfo &STI) {
304   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
305 }
306 
307 bool isVI(const MCSubtargetInfo &STI) {
308   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
309 }
310 
311 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
312 
313   switch(Reg) {
314   default: break;
315   case AMDGPU::FLAT_SCR:
316     assert(!isSI(STI));
317     return isCI(STI) ? AMDGPU::FLAT_SCR_ci : AMDGPU::FLAT_SCR_vi;
318 
319   case AMDGPU::FLAT_SCR_LO:
320     assert(!isSI(STI));
321     return isCI(STI) ? AMDGPU::FLAT_SCR_LO_ci : AMDGPU::FLAT_SCR_LO_vi;
322 
323   case AMDGPU::FLAT_SCR_HI:
324     assert(!isSI(STI));
325     return isCI(STI) ? AMDGPU::FLAT_SCR_HI_ci : AMDGPU::FLAT_SCR_HI_vi;
326   }
327   return Reg;
328 }
329 
330 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
331   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
332 
333   return OpType == AMDGPU::OPERAND_REG_IMM32_INT ||
334          OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
335          OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
336          OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
337 }
338 
339 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
340   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
341 
342   return OpType == AMDGPU::OPERAND_REG_IMM32_FP ||
343          OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
344 }
345 
346 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
347   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
348 
349   return OpType == AMDGPU::OPERAND_REG_INLINE_C_INT ||
350          OpType == AMDGPU::OPERAND_REG_INLINE_C_FP;
351 }
352 
353 // Avoid using MCRegisterClass::getSize, since that function will go away
354 // (move from MC* level to Target* level). Return size in bits.
355 unsigned getRegBitWidth(unsigned RCID) {
356   switch (RCID) {
357   case AMDGPU::SGPR_32RegClassID:
358   case AMDGPU::VGPR_32RegClassID:
359   case AMDGPU::VS_32RegClassID:
360   case AMDGPU::SReg_32RegClassID:
361   case AMDGPU::SReg_32_XM0RegClassID:
362     return 32;
363   case AMDGPU::SGPR_64RegClassID:
364   case AMDGPU::VS_64RegClassID:
365   case AMDGPU::SReg_64RegClassID:
366   case AMDGPU::VReg_64RegClassID:
367     return 64;
368   case AMDGPU::VReg_96RegClassID:
369     return 96;
370   case AMDGPU::SGPR_128RegClassID:
371   case AMDGPU::SReg_128RegClassID:
372   case AMDGPU::VReg_128RegClassID:
373     return 128;
374   case AMDGPU::SReg_256RegClassID:
375   case AMDGPU::VReg_256RegClassID:
376     return 256;
377   case AMDGPU::SReg_512RegClassID:
378   case AMDGPU::VReg_512RegClassID:
379     return 512;
380   default:
381     llvm_unreachable("Unexpected register class");
382   }
383 }
384 
385 unsigned getRegBitWidth(const MCRegisterClass &RC) {
386   return getRegBitWidth(RC.getID());
387 }
388 
389 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
390                            unsigned OpNo) {
391   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
392   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
393 }
394 
395 bool isInlinableLiteral64(int64_t Literal, bool IsVI) {
396   if (Literal >= -16 && Literal <= 64)
397     return true;
398 
399   double D = BitsToDouble(Literal);
400 
401   if (D == 0.5 || D == -0.5 ||
402       D == 1.0 || D == -1.0 ||
403       D == 2.0 || D == -2.0 ||
404       D == 4.0 || D == -4.0)
405     return true;
406 
407   if (IsVI && Literal == 0x3fc45f306dc9c882)
408     return true;
409 
410   return false;
411 }
412 
413 bool isInlinableLiteral32(int32_t Literal, bool IsVI) {
414   if (Literal >= -16 && Literal <= 64)
415     return true;
416 
417   float F = BitsToFloat(Literal);
418 
419   if (F == 0.5 || F == -0.5 ||
420       F == 1.0 || F == -1.0 ||
421       F == 2.0 || F == -2.0 ||
422       F == 4.0 || F == -4.0)
423     return true;
424 
425   if (IsVI && Literal == 0x3e22f983)
426     return true;
427 
428   return false;
429 }
430 
431 
432 } // End namespace AMDGPU
433 } // End namespace llvm
434