1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
11 #include "AMDGPUTargetTransformInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/CodeGen/MachineMemOperand.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/SubtargetFeature.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
41 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53   return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61   Dst |= (Src << Shift) & getBitMask(Shift, Width);
62   return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69   return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 namespace AMDGPU {
101 
102 struct MIMGInfo {
103   uint16_t Opcode;
104   uint16_t BaseOpcode;
105   uint8_t MIMGEncoding;
106   uint8_t VDataDwords;
107   uint8_t VAddrDwords;
108 };
109 
110 #define GET_MIMGBaseOpcodesTable_IMPL
111 #define GET_MIMGDimInfoTable_IMPL
112 #define GET_MIMGInfoTable_IMPL
113 #include "AMDGPUGenSearchableTables.inc"
114 
115 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
116                   unsigned VDataDwords, unsigned VAddrDwords) {
117   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
118                                              VDataDwords, VAddrDwords);
119   return Info ? Info->Opcode : -1;
120 }
121 
122 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
123   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
124   const MIMGInfo *NewInfo =
125       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
126                           NewChannels, OrigInfo->VAddrDwords);
127   return NewInfo ? NewInfo->Opcode : -1;
128 }
129 
130 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
131 // header files, so we need to wrap it in a function that takes unsigned
132 // instead.
133 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
134   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
135 }
136 
137 namespace IsaInfo {
138 
139 IsaVersion getIsaVersion(const FeatureBitset &Features) {
140   // GCN GFX6 (Southern Islands (SI)).
141   if (Features.test(FeatureISAVersion6_0_0))
142     return {6, 0, 0};
143   if (Features.test(FeatureISAVersion6_0_1))
144     return {6, 0, 1};
145 
146   // GCN GFX7 (Sea Islands (CI)).
147   if (Features.test(FeatureISAVersion7_0_0))
148     return {7, 0, 0};
149   if (Features.test(FeatureISAVersion7_0_1))
150     return {7, 0, 1};
151   if (Features.test(FeatureISAVersion7_0_2))
152     return {7, 0, 2};
153   if (Features.test(FeatureISAVersion7_0_3))
154     return {7, 0, 3};
155   if (Features.test(FeatureISAVersion7_0_4))
156     return {7, 0, 4};
157   if (Features.test(FeatureSeaIslands))
158     return {7, 0, 0};
159 
160   // GCN GFX8 (Volcanic Islands (VI)).
161   if (Features.test(FeatureISAVersion8_0_1))
162     return {8, 0, 1};
163   if (Features.test(FeatureISAVersion8_0_2))
164     return {8, 0, 2};
165   if (Features.test(FeatureISAVersion8_0_3))
166     return {8, 0, 3};
167   if (Features.test(FeatureISAVersion8_1_0))
168     return {8, 1, 0};
169   if (Features.test(FeatureVolcanicIslands))
170     return {8, 0, 0};
171 
172   // GCN GFX9.
173   if (Features.test(FeatureISAVersion9_0_0))
174     return {9, 0, 0};
175   if (Features.test(FeatureISAVersion9_0_2))
176     return {9, 0, 2};
177   if (Features.test(FeatureISAVersion9_0_4))
178     return {9, 0, 4};
179   if (Features.test(FeatureISAVersion9_0_6))
180     return {9, 0, 6};
181   if (Features.test(FeatureGFX9))
182     return {9, 0, 0};
183 
184   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
185     return {0, 0, 0};
186   return {7, 0, 0};
187 }
188 
189 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
190   auto TargetTriple = STI->getTargetTriple();
191   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
192 
193   Stream << TargetTriple.getArchName() << '-'
194          << TargetTriple.getVendorName() << '-'
195          << TargetTriple.getOSName() << '-'
196          << TargetTriple.getEnvironmentName() << '-'
197          << "gfx"
198          << ISAVersion.Major
199          << ISAVersion.Minor
200          << ISAVersion.Stepping;
201 
202   if (hasXNACK(*STI))
203     Stream << "+xnack";
204 
205   Stream.flush();
206 }
207 
208 bool hasCodeObjectV3(const MCSubtargetInfo *STI) {
209   return STI->getFeatureBits().test(FeatureCodeObjectV3);
210 }
211 
212 unsigned getWavefrontSize(const FeatureBitset &Features) {
213   if (Features.test(FeatureWavefrontSize16))
214     return 16;
215   if (Features.test(FeatureWavefrontSize32))
216     return 32;
217 
218   return 64;
219 }
220 
221 unsigned getLocalMemorySize(const FeatureBitset &Features) {
222   if (Features.test(FeatureLocalMemorySize32768))
223     return 32768;
224   if (Features.test(FeatureLocalMemorySize65536))
225     return 65536;
226 
227   return 0;
228 }
229 
230 unsigned getEUsPerCU(const FeatureBitset &Features) {
231   return 4;
232 }
233 
234 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
235                                unsigned FlatWorkGroupSize) {
236   if (!Features.test(FeatureGCN))
237     return 8;
238   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
239   if (N == 1)
240     return 40;
241   N = 40 / N;
242   return std::min(N, 16u);
243 }
244 
245 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
246   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
247 }
248 
249 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
250                           unsigned FlatWorkGroupSize) {
251   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
252 }
253 
254 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
255   return 1;
256 }
257 
258 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
259   if (!Features.test(FeatureGCN))
260     return 8;
261   // FIXME: Need to take scratch memory into account.
262   return 10;
263 }
264 
265 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
266                           unsigned FlatWorkGroupSize) {
267   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
268                  getEUsPerCU(Features)) / getEUsPerCU(Features);
269 }
270 
271 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
272   return 1;
273 }
274 
275 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
276   return 2048;
277 }
278 
279 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
280                               unsigned FlatWorkGroupSize) {
281   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
282                  getWavefrontSize(Features);
283 }
284 
285 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
286   IsaVersion Version = getIsaVersion(Features);
287   if (Version.Major >= 8)
288     return 16;
289   return 8;
290 }
291 
292 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
293   return 8;
294 }
295 
296 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
297   IsaVersion Version = getIsaVersion(Features);
298   if (Version.Major >= 8)
299     return 800;
300   return 512;
301 }
302 
303 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
304   if (Features.test(FeatureSGPRInitBug))
305     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
306 
307   IsaVersion Version = getIsaVersion(Features);
308   if (Version.Major >= 8)
309     return 102;
310   return 104;
311 }
312 
313 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
314   assert(WavesPerEU != 0);
315 
316   if (WavesPerEU >= getMaxWavesPerEU(Features))
317     return 0;
318 
319   unsigned MinNumSGPRs = getTotalNumSGPRs(Features) / (WavesPerEU + 1);
320   if (Features.test(FeatureTrapHandler))
321     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
322   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(Features)) + 1;
323   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
324 }
325 
326 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
327                         bool Addressable) {
328   assert(WavesPerEU != 0);
329 
330   IsaVersion Version = getIsaVersion(Features);
331   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
332   if (Version.Major >= 8 && !Addressable)
333     AddressableNumSGPRs = 112;
334   unsigned MaxNumSGPRs = getTotalNumSGPRs(Features) / WavesPerEU;
335   if (Features.test(FeatureTrapHandler))
336     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
337   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(Features));
338   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
339 }
340 
341 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
342                           bool FlatScrUsed, bool XNACKUsed) {
343   unsigned ExtraSGPRs = 0;
344   if (VCCUsed)
345     ExtraSGPRs = 2;
346 
347   IsaVersion Version = getIsaVersion(Features);
348   if (Version.Major < 8) {
349     if (FlatScrUsed)
350       ExtraSGPRs = 4;
351   } else {
352     if (XNACKUsed)
353       ExtraSGPRs = 4;
354 
355     if (FlatScrUsed)
356       ExtraSGPRs = 6;
357   }
358 
359   return ExtraSGPRs;
360 }
361 
362 unsigned getNumExtraSGPRs(const FeatureBitset &Features, bool VCCUsed,
363                           bool FlatScrUsed) {
364   return getNumExtraSGPRs(Features, VCCUsed, FlatScrUsed,
365                           Features[AMDGPU::FeatureXNACK]);
366 }
367 
368 unsigned getNumSGPRBlocks(const FeatureBitset &Features, unsigned NumSGPRs) {
369   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(Features));
370   // SGPRBlocks is actual number of SGPR blocks minus 1.
371   return NumSGPRs / getSGPREncodingGranule(Features) - 1;
372 }
373 
374 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
375   return 4;
376 }
377 
378 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
379   return getVGPRAllocGranule(Features);
380 }
381 
382 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
383   return 256;
384 }
385 
386 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
387   return getTotalNumVGPRs(Features);
388 }
389 
390 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
391   assert(WavesPerEU != 0);
392 
393   if (WavesPerEU >= getMaxWavesPerEU(Features))
394     return 0;
395   unsigned MinNumVGPRs =
396       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
397                 getVGPRAllocGranule(Features)) + 1;
398   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
399 }
400 
401 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
402   assert(WavesPerEU != 0);
403 
404   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
405                                    getVGPRAllocGranule(Features));
406   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
407   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
408 }
409 
410 unsigned getNumVGPRBlocks(const FeatureBitset &Features, unsigned NumVGPRs) {
411   NumVGPRs = alignTo(std::max(1u, NumVGPRs), getVGPREncodingGranule(Features));
412   // VGPRBlocks is actual number of VGPR blocks minus 1.
413   return NumVGPRs / getVGPREncodingGranule(Features) - 1;
414 }
415 
416 } // end namespace IsaInfo
417 
418 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
419                                const FeatureBitset &Features) {
420   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
421 
422   memset(&Header, 0, sizeof(Header));
423 
424   Header.amd_kernel_code_version_major = 1;
425   Header.amd_kernel_code_version_minor = 2;
426   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
427   Header.amd_machine_version_major = ISA.Major;
428   Header.amd_machine_version_minor = ISA.Minor;
429   Header.amd_machine_version_stepping = ISA.Stepping;
430   Header.kernel_code_entry_byte_offset = sizeof(Header);
431   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
432   Header.wavefront_size = 6;
433 
434   // If the code object does not support indirect functions, then the value must
435   // be 0xffffffff.
436   Header.call_convention = -1;
437 
438   // These alignment values are specified in powers of two, so alignment =
439   // 2^n.  The minimum alignment is 2^4 = 16.
440   Header.kernarg_segment_alignment = 4;
441   Header.group_segment_alignment = 4;
442   Header.private_segment_alignment = 4;
443 }
444 
445 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor() {
446   amdhsa::kernel_descriptor_t KD;
447   memset(&KD, 0, sizeof(KD));
448   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
449                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
450                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
451   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
452                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
453   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
454                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
455   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
456                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
457   return KD;
458 }
459 
460 bool isGroupSegment(const GlobalValue *GV) {
461   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
462 }
463 
464 bool isGlobalSegment(const GlobalValue *GV) {
465   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
466 }
467 
468 bool isReadOnlySegment(const GlobalValue *GV) {
469   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
470          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
471 }
472 
473 bool shouldEmitConstantsToTextSection(const Triple &TT) {
474   return TT.getOS() != Triple::AMDHSA;
475 }
476 
477 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
478   Attribute A = F.getFnAttribute(Name);
479   int Result = Default;
480 
481   if (A.isStringAttribute()) {
482     StringRef Str = A.getValueAsString();
483     if (Str.getAsInteger(0, Result)) {
484       LLVMContext &Ctx = F.getContext();
485       Ctx.emitError("can't parse integer attribute " + Name);
486     }
487   }
488 
489   return Result;
490 }
491 
492 std::pair<int, int> getIntegerPairAttribute(const Function &F,
493                                             StringRef Name,
494                                             std::pair<int, int> Default,
495                                             bool OnlyFirstRequired) {
496   Attribute A = F.getFnAttribute(Name);
497   if (!A.isStringAttribute())
498     return Default;
499 
500   LLVMContext &Ctx = F.getContext();
501   std::pair<int, int> Ints = Default;
502   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
503   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
504     Ctx.emitError("can't parse first integer attribute " + Name);
505     return Default;
506   }
507   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
508     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
509       Ctx.emitError("can't parse second integer attribute " + Name);
510       return Default;
511     }
512   }
513 
514   return Ints;
515 }
516 
517 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
518   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
519   if (Version.Major < 9)
520     return VmcntLo;
521 
522   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
523   return VmcntLo | VmcntHi;
524 }
525 
526 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
527   return (1 << getExpcntBitWidth()) - 1;
528 }
529 
530 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
531   return (1 << getLgkmcntBitWidth()) - 1;
532 }
533 
534 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
535   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
536   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
537   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
538   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
539   if (Version.Major < 9)
540     return Waitcnt;
541 
542   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
543   return Waitcnt | VmcntHi;
544 }
545 
546 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
547   unsigned VmcntLo =
548       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
549   if (Version.Major < 9)
550     return VmcntLo;
551 
552   unsigned VmcntHi =
553       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
554   VmcntHi <<= getVmcntBitWidthLo();
555   return VmcntLo | VmcntHi;
556 }
557 
558 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
559   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
560 }
561 
562 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
563   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
564 }
565 
566 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
567                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
568   Vmcnt = decodeVmcnt(Version, Waitcnt);
569   Expcnt = decodeExpcnt(Version, Waitcnt);
570   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
571 }
572 
573 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
574                      unsigned Vmcnt) {
575   Waitcnt =
576       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
577   if (Version.Major < 9)
578     return Waitcnt;
579 
580   Vmcnt >>= getVmcntBitWidthLo();
581   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
582 }
583 
584 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
585                       unsigned Expcnt) {
586   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
587 }
588 
589 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
590                        unsigned Lgkmcnt) {
591   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
592 }
593 
594 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
595                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
596   unsigned Waitcnt = getWaitcntBitMask(Version);
597   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
598   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
599   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
600   return Waitcnt;
601 }
602 
603 unsigned getInitialPSInputAddr(const Function &F) {
604   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
605 }
606 
607 bool isShader(CallingConv::ID cc) {
608   switch(cc) {
609     case CallingConv::AMDGPU_VS:
610     case CallingConv::AMDGPU_LS:
611     case CallingConv::AMDGPU_HS:
612     case CallingConv::AMDGPU_ES:
613     case CallingConv::AMDGPU_GS:
614     case CallingConv::AMDGPU_PS:
615     case CallingConv::AMDGPU_CS:
616       return true;
617     default:
618       return false;
619   }
620 }
621 
622 bool isCompute(CallingConv::ID cc) {
623   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
624 }
625 
626 bool isEntryFunctionCC(CallingConv::ID CC) {
627   switch (CC) {
628   case CallingConv::AMDGPU_KERNEL:
629   case CallingConv::SPIR_KERNEL:
630   case CallingConv::AMDGPU_VS:
631   case CallingConv::AMDGPU_GS:
632   case CallingConv::AMDGPU_PS:
633   case CallingConv::AMDGPU_CS:
634   case CallingConv::AMDGPU_ES:
635   case CallingConv::AMDGPU_HS:
636   case CallingConv::AMDGPU_LS:
637     return true;
638   default:
639     return false;
640   }
641 }
642 
643 bool hasXNACK(const MCSubtargetInfo &STI) {
644   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
645 }
646 
647 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
648   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
649 }
650 
651 bool hasPackedD16(const MCSubtargetInfo &STI) {
652   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
653 }
654 
655 bool isSI(const MCSubtargetInfo &STI) {
656   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
657 }
658 
659 bool isCI(const MCSubtargetInfo &STI) {
660   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
661 }
662 
663 bool isVI(const MCSubtargetInfo &STI) {
664   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
665 }
666 
667 bool isGFX9(const MCSubtargetInfo &STI) {
668   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
669 }
670 
671 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
672   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
673 }
674 
675 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
676   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
677   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
678   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
679     Reg == AMDGPU::SCC;
680 }
681 
682 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
683   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
684     if (*R == Reg1) return true;
685   }
686   return false;
687 }
688 
689 #define MAP_REG2REG \
690   using namespace AMDGPU; \
691   switch(Reg) { \
692   default: return Reg; \
693   CASE_CI_VI(FLAT_SCR) \
694   CASE_CI_VI(FLAT_SCR_LO) \
695   CASE_CI_VI(FLAT_SCR_HI) \
696   CASE_VI_GFX9(TTMP0) \
697   CASE_VI_GFX9(TTMP1) \
698   CASE_VI_GFX9(TTMP2) \
699   CASE_VI_GFX9(TTMP3) \
700   CASE_VI_GFX9(TTMP4) \
701   CASE_VI_GFX9(TTMP5) \
702   CASE_VI_GFX9(TTMP6) \
703   CASE_VI_GFX9(TTMP7) \
704   CASE_VI_GFX9(TTMP8) \
705   CASE_VI_GFX9(TTMP9) \
706   CASE_VI_GFX9(TTMP10) \
707   CASE_VI_GFX9(TTMP11) \
708   CASE_VI_GFX9(TTMP12) \
709   CASE_VI_GFX9(TTMP13) \
710   CASE_VI_GFX9(TTMP14) \
711   CASE_VI_GFX9(TTMP15) \
712   CASE_VI_GFX9(TTMP0_TTMP1) \
713   CASE_VI_GFX9(TTMP2_TTMP3) \
714   CASE_VI_GFX9(TTMP4_TTMP5) \
715   CASE_VI_GFX9(TTMP6_TTMP7) \
716   CASE_VI_GFX9(TTMP8_TTMP9) \
717   CASE_VI_GFX9(TTMP10_TTMP11) \
718   CASE_VI_GFX9(TTMP12_TTMP13) \
719   CASE_VI_GFX9(TTMP14_TTMP15) \
720   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
721   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
722   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
723   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
724   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
725   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
726   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
727   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
728   }
729 
730 #define CASE_CI_VI(node) \
731   assert(!isSI(STI)); \
732   case node: return isCI(STI) ? node##_ci : node##_vi;
733 
734 #define CASE_VI_GFX9(node) \
735   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
736 
737 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
738   MAP_REG2REG
739 }
740 
741 #undef CASE_CI_VI
742 #undef CASE_VI_GFX9
743 
744 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
745 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
746 
747 unsigned mc2PseudoReg(unsigned Reg) {
748   MAP_REG2REG
749 }
750 
751 #undef CASE_CI_VI
752 #undef CASE_VI_GFX9
753 #undef MAP_REG2REG
754 
755 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
756   assert(OpNo < Desc.NumOperands);
757   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
758   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
759          OpType <= AMDGPU::OPERAND_SRC_LAST;
760 }
761 
762 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
763   assert(OpNo < Desc.NumOperands);
764   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
765   switch (OpType) {
766   case AMDGPU::OPERAND_REG_IMM_FP32:
767   case AMDGPU::OPERAND_REG_IMM_FP64:
768   case AMDGPU::OPERAND_REG_IMM_FP16:
769   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
770   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
771   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
772   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
773     return true;
774   default:
775     return false;
776   }
777 }
778 
779 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
780   assert(OpNo < Desc.NumOperands);
781   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
782   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
783          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
784 }
785 
786 // Avoid using MCRegisterClass::getSize, since that function will go away
787 // (move from MC* level to Target* level). Return size in bits.
788 unsigned getRegBitWidth(unsigned RCID) {
789   switch (RCID) {
790   case AMDGPU::SGPR_32RegClassID:
791   case AMDGPU::VGPR_32RegClassID:
792   case AMDGPU::VS_32RegClassID:
793   case AMDGPU::SReg_32RegClassID:
794   case AMDGPU::SReg_32_XM0RegClassID:
795     return 32;
796   case AMDGPU::SGPR_64RegClassID:
797   case AMDGPU::VS_64RegClassID:
798   case AMDGPU::SReg_64RegClassID:
799   case AMDGPU::VReg_64RegClassID:
800     return 64;
801   case AMDGPU::VReg_96RegClassID:
802     return 96;
803   case AMDGPU::SGPR_128RegClassID:
804   case AMDGPU::SReg_128RegClassID:
805   case AMDGPU::VReg_128RegClassID:
806     return 128;
807   case AMDGPU::SReg_256RegClassID:
808   case AMDGPU::VReg_256RegClassID:
809     return 256;
810   case AMDGPU::SReg_512RegClassID:
811   case AMDGPU::VReg_512RegClassID:
812     return 512;
813   default:
814     llvm_unreachable("Unexpected register class");
815   }
816 }
817 
818 unsigned getRegBitWidth(const MCRegisterClass &RC) {
819   return getRegBitWidth(RC.getID());
820 }
821 
822 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
823                            unsigned OpNo) {
824   assert(OpNo < Desc.NumOperands);
825   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
826   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
827 }
828 
829 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
830   if (Literal >= -16 && Literal <= 64)
831     return true;
832 
833   uint64_t Val = static_cast<uint64_t>(Literal);
834   return (Val == DoubleToBits(0.0)) ||
835          (Val == DoubleToBits(1.0)) ||
836          (Val == DoubleToBits(-1.0)) ||
837          (Val == DoubleToBits(0.5)) ||
838          (Val == DoubleToBits(-0.5)) ||
839          (Val == DoubleToBits(2.0)) ||
840          (Val == DoubleToBits(-2.0)) ||
841          (Val == DoubleToBits(4.0)) ||
842          (Val == DoubleToBits(-4.0)) ||
843          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
844 }
845 
846 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
847   if (Literal >= -16 && Literal <= 64)
848     return true;
849 
850   // The actual type of the operand does not seem to matter as long
851   // as the bits match one of the inline immediate values.  For example:
852   //
853   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
854   // so it is a legal inline immediate.
855   //
856   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
857   // floating-point, so it is a legal inline immediate.
858 
859   uint32_t Val = static_cast<uint32_t>(Literal);
860   return (Val == FloatToBits(0.0f)) ||
861          (Val == FloatToBits(1.0f)) ||
862          (Val == FloatToBits(-1.0f)) ||
863          (Val == FloatToBits(0.5f)) ||
864          (Val == FloatToBits(-0.5f)) ||
865          (Val == FloatToBits(2.0f)) ||
866          (Val == FloatToBits(-2.0f)) ||
867          (Val == FloatToBits(4.0f)) ||
868          (Val == FloatToBits(-4.0f)) ||
869          (Val == 0x3e22f983 && HasInv2Pi);
870 }
871 
872 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
873   if (!HasInv2Pi)
874     return false;
875 
876   if (Literal >= -16 && Literal <= 64)
877     return true;
878 
879   uint16_t Val = static_cast<uint16_t>(Literal);
880   return Val == 0x3C00 || // 1.0
881          Val == 0xBC00 || // -1.0
882          Val == 0x3800 || // 0.5
883          Val == 0xB800 || // -0.5
884          Val == 0x4000 || // 2.0
885          Val == 0xC000 || // -2.0
886          Val == 0x4400 || // 4.0
887          Val == 0xC400 || // -4.0
888          Val == 0x3118;   // 1/2pi
889 }
890 
891 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
892   assert(HasInv2Pi);
893 
894   int16_t Lo16 = static_cast<int16_t>(Literal);
895   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
896   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
897 }
898 
899 bool isArgPassedInSGPR(const Argument *A) {
900   const Function *F = A->getParent();
901 
902   // Arguments to compute shaders are never a source of divergence.
903   CallingConv::ID CC = F->getCallingConv();
904   switch (CC) {
905   case CallingConv::AMDGPU_KERNEL:
906   case CallingConv::SPIR_KERNEL:
907     return true;
908   case CallingConv::AMDGPU_VS:
909   case CallingConv::AMDGPU_LS:
910   case CallingConv::AMDGPU_HS:
911   case CallingConv::AMDGPU_ES:
912   case CallingConv::AMDGPU_GS:
913   case CallingConv::AMDGPU_PS:
914   case CallingConv::AMDGPU_CS:
915     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
916     // Everything else is in VGPRs.
917     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
918            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
919   default:
920     // TODO: Should calls support inreg for SGPR inputs?
921     return false;
922   }
923 }
924 
925 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
926   if (isGCN3Encoding(ST))
927     return ByteOffset;
928   return ByteOffset >> 2;
929 }
930 
931 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
932   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
933   return isGCN3Encoding(ST) ?
934     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
935 }
936 
937 } // end namespace AMDGPU
938 
939 } // end namespace llvm
940 
941 namespace llvm {
942 namespace AMDGPU {
943 
944 AMDGPUAS getAMDGPUAS(Triple T) {
945   AMDGPUAS AS;
946   AS.FLAT_ADDRESS = 0;
947   AS.PRIVATE_ADDRESS = 5;
948   AS.REGION_ADDRESS = 2;
949   return AS;
950 }
951 
952 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
953   return getAMDGPUAS(M.getTargetTriple());
954 }
955 
956 AMDGPUAS getAMDGPUAS(const Module &M) {
957   return getAMDGPUAS(Triple(M.getTargetTriple()));
958 }
959 
960 namespace {
961 
962 struct SourceOfDivergence {
963   unsigned Intr;
964 };
965 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
966 
967 #define GET_SourcesOfDivergence_IMPL
968 #include "AMDGPUGenSearchableTables.inc"
969 
970 } // end anonymous namespace
971 
972 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
973   return lookupSourceOfDivergence(IntrID);
974 }
975 } // namespace AMDGPU
976 } // namespace llvm
977