1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "AMDGPUBaseInfo.h"
11 #include "AMDGPUTargetTransformInfo.h"
12 #include "AMDGPU.h"
13 #include "SIDefines.h"
14 #include "llvm/ADT/StringRef.h"
15 #include "llvm/ADT/Triple.h"
16 #include "llvm/BinaryFormat/ELF.h"
17 #include "llvm/CodeGen/MachineMemOperand.h"
18 #include "llvm/IR/Attributes.h"
19 #include "llvm/IR/Constants.h"
20 #include "llvm/IR/Function.h"
21 #include "llvm/IR/GlobalValue.h"
22 #include "llvm/IR/Instruction.h"
23 #include "llvm/IR/LLVMContext.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/MCContext.h"
26 #include "llvm/MC/MCInstrDesc.h"
27 #include "llvm/MC/MCInstrInfo.h"
28 #include "llvm/MC/MCRegisterInfo.h"
29 #include "llvm/MC/MCSectionELF.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/SubtargetFeature.h"
32 #include "llvm/Support/Casting.h"
33 #include "llvm/Support/ErrorHandling.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <cstdint>
38 #include <cstring>
39 #include <utility>
40 
41 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
42 
43 #define GET_INSTRINFO_NAMED_OPS
44 #define GET_INSTRMAP_INFO
45 #include "AMDGPUGenInstrInfo.inc"
46 #undef GET_INSTRMAP_INFO
47 #undef GET_INSTRINFO_NAMED_OPS
48 
49 namespace {
50 
51 /// \returns Bit mask for given bit \p Shift and bit \p Width.
52 unsigned getBitMask(unsigned Shift, unsigned Width) {
53   return ((1 << Width) - 1) << Shift;
54 }
55 
56 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Packed \p Dst.
59 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
60   Dst &= ~(1 << Shift) & ~getBitMask(Shift, Width);
61   Dst |= (Src << Shift) & getBitMask(Shift, Width);
62   return Dst;
63 }
64 
65 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
66 ///
67 /// \returns Unpacked bits.
68 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
69   return (Src & getBitMask(Shift, Width)) >> Shift;
70 }
71 
72 /// \returns Vmcnt bit shift (lower bits).
73 unsigned getVmcntBitShiftLo() { return 0; }
74 
75 /// \returns Vmcnt bit width (lower bits).
76 unsigned getVmcntBitWidthLo() { return 4; }
77 
78 /// \returns Expcnt bit shift.
79 unsigned getExpcntBitShift() { return 4; }
80 
81 /// \returns Expcnt bit width.
82 unsigned getExpcntBitWidth() { return 3; }
83 
84 /// \returns Lgkmcnt bit shift.
85 unsigned getLgkmcntBitShift() { return 8; }
86 
87 /// \returns Lgkmcnt bit width.
88 unsigned getLgkmcntBitWidth() { return 4; }
89 
90 /// \returns Vmcnt bit shift (higher bits).
91 unsigned getVmcntBitShiftHi() { return 14; }
92 
93 /// \returns Vmcnt bit width (higher bits).
94 unsigned getVmcntBitWidthHi() { return 2; }
95 
96 } // end namespace anonymous
97 
98 namespace llvm {
99 
100 namespace AMDGPU {
101 
102 LLVM_READNONE
103 static inline Channels indexToChannel(unsigned Channel) {
104   switch (Channel) {
105   case 1:
106     return AMDGPU::Channels_1;
107   case 2:
108     return AMDGPU::Channels_2;
109   case 3:
110     return AMDGPU::Channels_3;
111   case 4:
112     return AMDGPU::Channels_4;
113   default:
114     llvm_unreachable("invalid MIMG channel");
115   }
116 }
117 
118 
119 // FIXME: Need to handle d16 images correctly.
120 static unsigned rcToChannels(unsigned RCID) {
121   switch (RCID) {
122   case AMDGPU::VGPR_32RegClassID:
123     return 1;
124   case AMDGPU::VReg_64RegClassID:
125     return 2;
126   case AMDGPU::VReg_96RegClassID:
127     return 3;
128   case AMDGPU::VReg_128RegClassID:
129     return 4;
130   default:
131     llvm_unreachable("invalid MIMG register class");
132   }
133 }
134 
135 int getMaskedMIMGOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
136   AMDGPU::Channels Channel = AMDGPU::indexToChannel(NewChannels);
137   unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
138   if (NewChannels == OrigChannels)
139     return Opc;
140 
141   switch (OrigChannels) {
142   case 1:
143     return AMDGPU::getMaskedMIMGOp1(Opc, Channel);
144   case 2:
145     return AMDGPU::getMaskedMIMGOp2(Opc, Channel);
146   case 3:
147     return AMDGPU::getMaskedMIMGOp3(Opc, Channel);
148   case 4:
149     return AMDGPU::getMaskedMIMGOp4(Opc, Channel);
150   default:
151     llvm_unreachable("invalid MIMG channel");
152   }
153 }
154 
155 int getMaskedMIMGAtomicOp(const MCInstrInfo &MII, unsigned Opc, unsigned NewChannels) {
156   assert(AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst) != -1);
157   assert(NewChannels == 1 || NewChannels == 2 || NewChannels == 4);
158 
159   unsigned OrigChannels = rcToChannels(MII.get(Opc).OpInfo[0].RegClass);
160   assert(OrigChannels == 1 || OrigChannels == 2 || OrigChannels == 4);
161 
162   if (NewChannels == OrigChannels) return Opc;
163 
164   if (OrigChannels <= 2 && NewChannels <= 2) {
165     // This is an ordinary atomic (not an atomic_cmpswap)
166     return (OrigChannels == 1)?
167       AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc);
168   } else if (OrigChannels >= 2 && NewChannels >= 2) {
169     // This is an atomic_cmpswap
170     return (OrigChannels == 2)?
171       AMDGPU::getMIMGAtomicOp1(Opc) : AMDGPU::getMIMGAtomicOp2(Opc);
172   } else { // invalid OrigChannels/NewChannels value
173     return -1;
174   }
175 }
176 
177 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
178 // header files, so we need to wrap it in a function that takes unsigned
179 // instead.
180 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
181   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
182 }
183 
184 namespace IsaInfo {
185 
186 IsaVersion getIsaVersion(const FeatureBitset &Features) {
187   // GCN GFX6 (Southern Islands (SI)).
188   if (Features.test(FeatureISAVersion6_0_0))
189     return {6, 0, 0};
190   if (Features.test(FeatureISAVersion6_0_1))
191     return {6, 0, 1};
192 
193   // GCN GFX7 (Sea Islands (CI)).
194   if (Features.test(FeatureISAVersion7_0_0))
195     return {7, 0, 0};
196   if (Features.test(FeatureISAVersion7_0_1))
197     return {7, 0, 1};
198   if (Features.test(FeatureISAVersion7_0_2))
199     return {7, 0, 2};
200   if (Features.test(FeatureISAVersion7_0_3))
201     return {7, 0, 3};
202   if (Features.test(FeatureISAVersion7_0_4))
203     return {7, 0, 4};
204   if (Features.test(FeatureSeaIslands))
205     return {7, 0, 0};
206 
207   // GCN GFX8 (Volcanic Islands (VI)).
208   if (Features.test(FeatureISAVersion8_0_1))
209     return {8, 0, 1};
210   if (Features.test(FeatureISAVersion8_0_2))
211     return {8, 0, 2};
212   if (Features.test(FeatureISAVersion8_0_3))
213     return {8, 0, 3};
214   if (Features.test(FeatureISAVersion8_1_0))
215     return {8, 1, 0};
216   if (Features.test(FeatureVolcanicIslands))
217     return {8, 0, 0};
218 
219   // GCN GFX9.
220   if (Features.test(FeatureISAVersion9_0_0))
221     return {9, 0, 0};
222   if (Features.test(FeatureISAVersion9_0_2))
223     return {9, 0, 2};
224   if (Features.test(FeatureISAVersion9_0_4))
225     return {9, 0, 4};
226   if (Features.test(FeatureISAVersion9_0_6))
227     return {9, 0, 6};
228   if (Features.test(FeatureGFX9))
229     return {9, 0, 0};
230 
231   if (!Features.test(FeatureGCN) || Features.test(FeatureSouthernIslands))
232     return {0, 0, 0};
233   return {7, 0, 0};
234 }
235 
236 void streamIsaVersion(const MCSubtargetInfo *STI, raw_ostream &Stream) {
237   auto TargetTriple = STI->getTargetTriple();
238   auto ISAVersion = IsaInfo::getIsaVersion(STI->getFeatureBits());
239 
240   Stream << TargetTriple.getArchName() << '-'
241          << TargetTriple.getVendorName() << '-'
242          << TargetTriple.getOSName() << '-'
243          << TargetTriple.getEnvironmentName() << '-'
244          << "gfx"
245          << ISAVersion.Major
246          << ISAVersion.Minor
247          << ISAVersion.Stepping;
248   Stream.flush();
249 }
250 
251 bool hasCodeObjectV3(const FeatureBitset &Features) {
252   return Features.test(FeatureCodeObjectV3);
253 }
254 
255 unsigned getWavefrontSize(const FeatureBitset &Features) {
256   if (Features.test(FeatureWavefrontSize16))
257     return 16;
258   if (Features.test(FeatureWavefrontSize32))
259     return 32;
260 
261   return 64;
262 }
263 
264 unsigned getLocalMemorySize(const FeatureBitset &Features) {
265   if (Features.test(FeatureLocalMemorySize32768))
266     return 32768;
267   if (Features.test(FeatureLocalMemorySize65536))
268     return 65536;
269 
270   return 0;
271 }
272 
273 unsigned getEUsPerCU(const FeatureBitset &Features) {
274   return 4;
275 }
276 
277 unsigned getMaxWorkGroupsPerCU(const FeatureBitset &Features,
278                                unsigned FlatWorkGroupSize) {
279   if (!Features.test(FeatureGCN))
280     return 8;
281   unsigned N = getWavesPerWorkGroup(Features, FlatWorkGroupSize);
282   if (N == 1)
283     return 40;
284   N = 40 / N;
285   return std::min(N, 16u);
286 }
287 
288 unsigned getMaxWavesPerCU(const FeatureBitset &Features) {
289   return getMaxWavesPerEU(Features) * getEUsPerCU(Features);
290 }
291 
292 unsigned getMaxWavesPerCU(const FeatureBitset &Features,
293                           unsigned FlatWorkGroupSize) {
294   return getWavesPerWorkGroup(Features, FlatWorkGroupSize);
295 }
296 
297 unsigned getMinWavesPerEU(const FeatureBitset &Features) {
298   return 1;
299 }
300 
301 unsigned getMaxWavesPerEU(const FeatureBitset &Features) {
302   if (!Features.test(FeatureGCN))
303     return 8;
304   // FIXME: Need to take scratch memory into account.
305   return 10;
306 }
307 
308 unsigned getMaxWavesPerEU(const FeatureBitset &Features,
309                           unsigned FlatWorkGroupSize) {
310   return alignTo(getMaxWavesPerCU(Features, FlatWorkGroupSize),
311                  getEUsPerCU(Features)) / getEUsPerCU(Features);
312 }
313 
314 unsigned getMinFlatWorkGroupSize(const FeatureBitset &Features) {
315   return 1;
316 }
317 
318 unsigned getMaxFlatWorkGroupSize(const FeatureBitset &Features) {
319   return 2048;
320 }
321 
322 unsigned getWavesPerWorkGroup(const FeatureBitset &Features,
323                               unsigned FlatWorkGroupSize) {
324   return alignTo(FlatWorkGroupSize, getWavefrontSize(Features)) /
325                  getWavefrontSize(Features);
326 }
327 
328 unsigned getSGPRAllocGranule(const FeatureBitset &Features) {
329   IsaVersion Version = getIsaVersion(Features);
330   if (Version.Major >= 8)
331     return 16;
332   return 8;
333 }
334 
335 unsigned getSGPREncodingGranule(const FeatureBitset &Features) {
336   return 8;
337 }
338 
339 unsigned getTotalNumSGPRs(const FeatureBitset &Features) {
340   IsaVersion Version = getIsaVersion(Features);
341   if (Version.Major >= 8)
342     return 800;
343   return 512;
344 }
345 
346 unsigned getAddressableNumSGPRs(const FeatureBitset &Features) {
347   if (Features.test(FeatureSGPRInitBug))
348     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
349 
350   IsaVersion Version = getIsaVersion(Features);
351   if (Version.Major >= 8)
352     return 102;
353   return 104;
354 }
355 
356 unsigned getMinNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
357   assert(WavesPerEU != 0);
358 
359   if (WavesPerEU >= getMaxWavesPerEU(Features))
360     return 0;
361   unsigned MinNumSGPRs =
362       alignDown(getTotalNumSGPRs(Features) / (WavesPerEU + 1),
363                 getSGPRAllocGranule(Features)) + 1;
364   return std::min(MinNumSGPRs, getAddressableNumSGPRs(Features));
365 }
366 
367 unsigned getMaxNumSGPRs(const FeatureBitset &Features, unsigned WavesPerEU,
368                         bool Addressable) {
369   assert(WavesPerEU != 0);
370 
371   IsaVersion Version = getIsaVersion(Features);
372   unsigned MaxNumSGPRs = alignDown(getTotalNumSGPRs(Features) / WavesPerEU,
373                                    getSGPRAllocGranule(Features));
374   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(Features);
375   if (Version.Major >= 8 && !Addressable)
376     AddressableNumSGPRs = 112;
377   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
378 }
379 
380 unsigned getVGPRAllocGranule(const FeatureBitset &Features) {
381   return 4;
382 }
383 
384 unsigned getVGPREncodingGranule(const FeatureBitset &Features) {
385   return getVGPRAllocGranule(Features);
386 }
387 
388 unsigned getTotalNumVGPRs(const FeatureBitset &Features) {
389   return 256;
390 }
391 
392 unsigned getAddressableNumVGPRs(const FeatureBitset &Features) {
393   return getTotalNumVGPRs(Features);
394 }
395 
396 unsigned getMinNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
397   assert(WavesPerEU != 0);
398 
399   if (WavesPerEU >= getMaxWavesPerEU(Features))
400     return 0;
401   unsigned MinNumVGPRs =
402       alignDown(getTotalNumVGPRs(Features) / (WavesPerEU + 1),
403                 getVGPRAllocGranule(Features)) + 1;
404   return std::min(MinNumVGPRs, getAddressableNumVGPRs(Features));
405 }
406 
407 unsigned getMaxNumVGPRs(const FeatureBitset &Features, unsigned WavesPerEU) {
408   assert(WavesPerEU != 0);
409 
410   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(Features) / WavesPerEU,
411                                    getVGPRAllocGranule(Features));
412   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(Features);
413   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
414 }
415 
416 } // end namespace IsaInfo
417 
418 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
419                                const FeatureBitset &Features) {
420   IsaInfo::IsaVersion ISA = IsaInfo::getIsaVersion(Features);
421 
422   memset(&Header, 0, sizeof(Header));
423 
424   Header.amd_kernel_code_version_major = 1;
425   Header.amd_kernel_code_version_minor = 2;
426   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
427   Header.amd_machine_version_major = ISA.Major;
428   Header.amd_machine_version_minor = ISA.Minor;
429   Header.amd_machine_version_stepping = ISA.Stepping;
430   Header.kernel_code_entry_byte_offset = sizeof(Header);
431   // wavefront_size is specified as a power of 2: 2^6 = 64 threads.
432   Header.wavefront_size = 6;
433 
434   // If the code object does not support indirect functions, then the value must
435   // be 0xffffffff.
436   Header.call_convention = -1;
437 
438   // These alignment values are specified in powers of two, so alignment =
439   // 2^n.  The minimum alignment is 2^4 = 16.
440   Header.kernarg_segment_alignment = 4;
441   Header.group_segment_alignment = 4;
442   Header.private_segment_alignment = 4;
443 }
444 
445 bool isGroupSegment(const GlobalValue *GV) {
446   return GV->getType()->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
447 }
448 
449 bool isGlobalSegment(const GlobalValue *GV) {
450   return GV->getType()->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
451 }
452 
453 bool isReadOnlySegment(const GlobalValue *GV) {
454   return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS ||
455          GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
456 }
457 
458 bool shouldEmitConstantsToTextSection(const Triple &TT) {
459   return TT.getOS() != Triple::AMDHSA;
460 }
461 
462 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
463   Attribute A = F.getFnAttribute(Name);
464   int Result = Default;
465 
466   if (A.isStringAttribute()) {
467     StringRef Str = A.getValueAsString();
468     if (Str.getAsInteger(0, Result)) {
469       LLVMContext &Ctx = F.getContext();
470       Ctx.emitError("can't parse integer attribute " + Name);
471     }
472   }
473 
474   return Result;
475 }
476 
477 std::pair<int, int> getIntegerPairAttribute(const Function &F,
478                                             StringRef Name,
479                                             std::pair<int, int> Default,
480                                             bool OnlyFirstRequired) {
481   Attribute A = F.getFnAttribute(Name);
482   if (!A.isStringAttribute())
483     return Default;
484 
485   LLVMContext &Ctx = F.getContext();
486   std::pair<int, int> Ints = Default;
487   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
488   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
489     Ctx.emitError("can't parse first integer attribute " + Name);
490     return Default;
491   }
492   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
493     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
494       Ctx.emitError("can't parse second integer attribute " + Name);
495       return Default;
496     }
497   }
498 
499   return Ints;
500 }
501 
502 unsigned getVmcntBitMask(const IsaInfo::IsaVersion &Version) {
503   unsigned VmcntLo = (1 << getVmcntBitWidthLo()) - 1;
504   if (Version.Major < 9)
505     return VmcntLo;
506 
507   unsigned VmcntHi = ((1 << getVmcntBitWidthHi()) - 1) << getVmcntBitWidthLo();
508   return VmcntLo | VmcntHi;
509 }
510 
511 unsigned getExpcntBitMask(const IsaInfo::IsaVersion &Version) {
512   return (1 << getExpcntBitWidth()) - 1;
513 }
514 
515 unsigned getLgkmcntBitMask(const IsaInfo::IsaVersion &Version) {
516   return (1 << getLgkmcntBitWidth()) - 1;
517 }
518 
519 unsigned getWaitcntBitMask(const IsaInfo::IsaVersion &Version) {
520   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(), getVmcntBitWidthLo());
521   unsigned Expcnt = getBitMask(getExpcntBitShift(), getExpcntBitWidth());
522   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(), getLgkmcntBitWidth());
523   unsigned Waitcnt = VmcntLo | Expcnt | Lgkmcnt;
524   if (Version.Major < 9)
525     return Waitcnt;
526 
527   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(), getVmcntBitWidthHi());
528   return Waitcnt | VmcntHi;
529 }
530 
531 unsigned decodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
532   unsigned VmcntLo =
533       unpackBits(Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
534   if (Version.Major < 9)
535     return VmcntLo;
536 
537   unsigned VmcntHi =
538       unpackBits(Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
539   VmcntHi <<= getVmcntBitWidthLo();
540   return VmcntLo | VmcntHi;
541 }
542 
543 unsigned decodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
544   return unpackBits(Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
545 }
546 
547 unsigned decodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt) {
548   return unpackBits(Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
549 }
550 
551 void decodeWaitcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
552                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
553   Vmcnt = decodeVmcnt(Version, Waitcnt);
554   Expcnt = decodeExpcnt(Version, Waitcnt);
555   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
556 }
557 
558 unsigned encodeVmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
559                      unsigned Vmcnt) {
560   Waitcnt =
561       packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(), getVmcntBitWidthLo());
562   if (Version.Major < 9)
563     return Waitcnt;
564 
565   Vmcnt >>= getVmcntBitWidthLo();
566   return packBits(Vmcnt, Waitcnt, getVmcntBitShiftHi(), getVmcntBitWidthHi());
567 }
568 
569 unsigned encodeExpcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
570                       unsigned Expcnt) {
571   return packBits(Expcnt, Waitcnt, getExpcntBitShift(), getExpcntBitWidth());
572 }
573 
574 unsigned encodeLgkmcnt(const IsaInfo::IsaVersion &Version, unsigned Waitcnt,
575                        unsigned Lgkmcnt) {
576   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(), getLgkmcntBitWidth());
577 }
578 
579 unsigned encodeWaitcnt(const IsaInfo::IsaVersion &Version,
580                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
581   unsigned Waitcnt = getWaitcntBitMask(Version);
582   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
583   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
584   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
585   return Waitcnt;
586 }
587 
588 unsigned getInitialPSInputAddr(const Function &F) {
589   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
590 }
591 
592 bool isShader(CallingConv::ID cc) {
593   switch(cc) {
594     case CallingConv::AMDGPU_VS:
595     case CallingConv::AMDGPU_LS:
596     case CallingConv::AMDGPU_HS:
597     case CallingConv::AMDGPU_ES:
598     case CallingConv::AMDGPU_GS:
599     case CallingConv::AMDGPU_PS:
600     case CallingConv::AMDGPU_CS:
601       return true;
602     default:
603       return false;
604   }
605 }
606 
607 bool isCompute(CallingConv::ID cc) {
608   return !isShader(cc) || cc == CallingConv::AMDGPU_CS;
609 }
610 
611 bool isEntryFunctionCC(CallingConv::ID CC) {
612   switch (CC) {
613   case CallingConv::AMDGPU_KERNEL:
614   case CallingConv::SPIR_KERNEL:
615   case CallingConv::AMDGPU_VS:
616   case CallingConv::AMDGPU_GS:
617   case CallingConv::AMDGPU_PS:
618   case CallingConv::AMDGPU_CS:
619   case CallingConv::AMDGPU_ES:
620   case CallingConv::AMDGPU_HS:
621   case CallingConv::AMDGPU_LS:
622     return true;
623   default:
624     return false;
625   }
626 }
627 
628 bool hasXNACK(const MCSubtargetInfo &STI) {
629   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
630 }
631 
632 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
633   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128];
634 }
635 
636 bool hasPackedD16(const MCSubtargetInfo &STI) {
637   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem];
638 }
639 
640 bool isSI(const MCSubtargetInfo &STI) {
641   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
642 }
643 
644 bool isCI(const MCSubtargetInfo &STI) {
645   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
646 }
647 
648 bool isVI(const MCSubtargetInfo &STI) {
649   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
650 }
651 
652 bool isGFX9(const MCSubtargetInfo &STI) {
653   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
654 }
655 
656 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
657   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
658 }
659 
660 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
661   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
662   const unsigned FirstSubReg = TRI->getSubReg(Reg, 1);
663   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
664     Reg == AMDGPU::SCC;
665 }
666 
667 bool isRegIntersect(unsigned Reg0, unsigned Reg1, const MCRegisterInfo* TRI) {
668   for (MCRegAliasIterator R(Reg0, TRI, true); R.isValid(); ++R) {
669     if (*R == Reg1) return true;
670   }
671   return false;
672 }
673 
674 #define MAP_REG2REG \
675   using namespace AMDGPU; \
676   switch(Reg) { \
677   default: return Reg; \
678   CASE_CI_VI(FLAT_SCR) \
679   CASE_CI_VI(FLAT_SCR_LO) \
680   CASE_CI_VI(FLAT_SCR_HI) \
681   CASE_VI_GFX9(TTMP0) \
682   CASE_VI_GFX9(TTMP1) \
683   CASE_VI_GFX9(TTMP2) \
684   CASE_VI_GFX9(TTMP3) \
685   CASE_VI_GFX9(TTMP4) \
686   CASE_VI_GFX9(TTMP5) \
687   CASE_VI_GFX9(TTMP6) \
688   CASE_VI_GFX9(TTMP7) \
689   CASE_VI_GFX9(TTMP8) \
690   CASE_VI_GFX9(TTMP9) \
691   CASE_VI_GFX9(TTMP10) \
692   CASE_VI_GFX9(TTMP11) \
693   CASE_VI_GFX9(TTMP12) \
694   CASE_VI_GFX9(TTMP13) \
695   CASE_VI_GFX9(TTMP14) \
696   CASE_VI_GFX9(TTMP15) \
697   CASE_VI_GFX9(TTMP0_TTMP1) \
698   CASE_VI_GFX9(TTMP2_TTMP3) \
699   CASE_VI_GFX9(TTMP4_TTMP5) \
700   CASE_VI_GFX9(TTMP6_TTMP7) \
701   CASE_VI_GFX9(TTMP8_TTMP9) \
702   CASE_VI_GFX9(TTMP10_TTMP11) \
703   CASE_VI_GFX9(TTMP12_TTMP13) \
704   CASE_VI_GFX9(TTMP14_TTMP15) \
705   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3) \
706   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7) \
707   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11) \
708   CASE_VI_GFX9(TTMP12_TTMP13_TTMP14_TTMP15) \
709   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
710   CASE_VI_GFX9(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
711   CASE_VI_GFX9(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
712   CASE_VI_GFX9(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
713   }
714 
715 #define CASE_CI_VI(node) \
716   assert(!isSI(STI)); \
717   case node: return isCI(STI) ? node##_ci : node##_vi;
718 
719 #define CASE_VI_GFX9(node) \
720   case node: return isGFX9(STI) ? node##_gfx9 : node##_vi;
721 
722 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
723   MAP_REG2REG
724 }
725 
726 #undef CASE_CI_VI
727 #undef CASE_VI_GFX9
728 
729 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
730 #define CASE_VI_GFX9(node) case node##_vi: case node##_gfx9: return node;
731 
732 unsigned mc2PseudoReg(unsigned Reg) {
733   MAP_REG2REG
734 }
735 
736 #undef CASE_CI_VI
737 #undef CASE_VI_GFX9
738 #undef MAP_REG2REG
739 
740 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
741   assert(OpNo < Desc.NumOperands);
742   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
743   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
744          OpType <= AMDGPU::OPERAND_SRC_LAST;
745 }
746 
747 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
748   assert(OpNo < Desc.NumOperands);
749   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
750   switch (OpType) {
751   case AMDGPU::OPERAND_REG_IMM_FP32:
752   case AMDGPU::OPERAND_REG_IMM_FP64:
753   case AMDGPU::OPERAND_REG_IMM_FP16:
754   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
755   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
756   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
757   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
758     return true;
759   default:
760     return false;
761   }
762 }
763 
764 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
765   assert(OpNo < Desc.NumOperands);
766   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
767   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
768          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
769 }
770 
771 // Avoid using MCRegisterClass::getSize, since that function will go away
772 // (move from MC* level to Target* level). Return size in bits.
773 unsigned getRegBitWidth(unsigned RCID) {
774   switch (RCID) {
775   case AMDGPU::SGPR_32RegClassID:
776   case AMDGPU::VGPR_32RegClassID:
777   case AMDGPU::VS_32RegClassID:
778   case AMDGPU::SReg_32RegClassID:
779   case AMDGPU::SReg_32_XM0RegClassID:
780     return 32;
781   case AMDGPU::SGPR_64RegClassID:
782   case AMDGPU::VS_64RegClassID:
783   case AMDGPU::SReg_64RegClassID:
784   case AMDGPU::VReg_64RegClassID:
785     return 64;
786   case AMDGPU::VReg_96RegClassID:
787     return 96;
788   case AMDGPU::SGPR_128RegClassID:
789   case AMDGPU::SReg_128RegClassID:
790   case AMDGPU::VReg_128RegClassID:
791     return 128;
792   case AMDGPU::SReg_256RegClassID:
793   case AMDGPU::VReg_256RegClassID:
794     return 256;
795   case AMDGPU::SReg_512RegClassID:
796   case AMDGPU::VReg_512RegClassID:
797     return 512;
798   default:
799     llvm_unreachable("Unexpected register class");
800   }
801 }
802 
803 unsigned getRegBitWidth(const MCRegisterClass &RC) {
804   return getRegBitWidth(RC.getID());
805 }
806 
807 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
808                            unsigned OpNo) {
809   assert(OpNo < Desc.NumOperands);
810   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
811   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
812 }
813 
814 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
815   if (Literal >= -16 && Literal <= 64)
816     return true;
817 
818   uint64_t Val = static_cast<uint64_t>(Literal);
819   return (Val == DoubleToBits(0.0)) ||
820          (Val == DoubleToBits(1.0)) ||
821          (Val == DoubleToBits(-1.0)) ||
822          (Val == DoubleToBits(0.5)) ||
823          (Val == DoubleToBits(-0.5)) ||
824          (Val == DoubleToBits(2.0)) ||
825          (Val == DoubleToBits(-2.0)) ||
826          (Val == DoubleToBits(4.0)) ||
827          (Val == DoubleToBits(-4.0)) ||
828          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
829 }
830 
831 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
832   if (Literal >= -16 && Literal <= 64)
833     return true;
834 
835   // The actual type of the operand does not seem to matter as long
836   // as the bits match one of the inline immediate values.  For example:
837   //
838   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
839   // so it is a legal inline immediate.
840   //
841   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
842   // floating-point, so it is a legal inline immediate.
843 
844   uint32_t Val = static_cast<uint32_t>(Literal);
845   return (Val == FloatToBits(0.0f)) ||
846          (Val == FloatToBits(1.0f)) ||
847          (Val == FloatToBits(-1.0f)) ||
848          (Val == FloatToBits(0.5f)) ||
849          (Val == FloatToBits(-0.5f)) ||
850          (Val == FloatToBits(2.0f)) ||
851          (Val == FloatToBits(-2.0f)) ||
852          (Val == FloatToBits(4.0f)) ||
853          (Val == FloatToBits(-4.0f)) ||
854          (Val == 0x3e22f983 && HasInv2Pi);
855 }
856 
857 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
858   if (!HasInv2Pi)
859     return false;
860 
861   if (Literal >= -16 && Literal <= 64)
862     return true;
863 
864   uint16_t Val = static_cast<uint16_t>(Literal);
865   return Val == 0x3C00 || // 1.0
866          Val == 0xBC00 || // -1.0
867          Val == 0x3800 || // 0.5
868          Val == 0xB800 || // -0.5
869          Val == 0x4000 || // 2.0
870          Val == 0xC000 || // -2.0
871          Val == 0x4400 || // 4.0
872          Val == 0xC400 || // -4.0
873          Val == 0x3118;   // 1/2pi
874 }
875 
876 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
877   assert(HasInv2Pi);
878 
879   int16_t Lo16 = static_cast<int16_t>(Literal);
880   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
881   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
882 }
883 
884 bool isArgPassedInSGPR(const Argument *A) {
885   const Function *F = A->getParent();
886 
887   // Arguments to compute shaders are never a source of divergence.
888   CallingConv::ID CC = F->getCallingConv();
889   switch (CC) {
890   case CallingConv::AMDGPU_KERNEL:
891   case CallingConv::SPIR_KERNEL:
892     return true;
893   case CallingConv::AMDGPU_VS:
894   case CallingConv::AMDGPU_LS:
895   case CallingConv::AMDGPU_HS:
896   case CallingConv::AMDGPU_ES:
897   case CallingConv::AMDGPU_GS:
898   case CallingConv::AMDGPU_PS:
899   case CallingConv::AMDGPU_CS:
900     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
901     // Everything else is in VGPRs.
902     return F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::InReg) ||
903            F->getAttributes().hasParamAttribute(A->getArgNo(), Attribute::ByVal);
904   default:
905     // TODO: Should calls support inreg for SGPR inputs?
906     return false;
907   }
908 }
909 
910 int64_t getSMRDEncodedOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
911   if (isGCN3Encoding(ST))
912     return ByteOffset;
913   return ByteOffset >> 2;
914 }
915 
916 bool isLegalSMRDImmOffset(const MCSubtargetInfo &ST, int64_t ByteOffset) {
917   int64_t EncodedOffset = getSMRDEncodedOffset(ST, ByteOffset);
918   return isGCN3Encoding(ST) ?
919     isUInt<20>(EncodedOffset) : isUInt<8>(EncodedOffset);
920 }
921 
922 } // end namespace AMDGPU
923 
924 } // end namespace llvm
925 
926 namespace llvm {
927 namespace AMDGPU {
928 
929 AMDGPUAS getAMDGPUAS(Triple T) {
930   AMDGPUAS AS;
931   AS.FLAT_ADDRESS = 0;
932   AS.PRIVATE_ADDRESS = 5;
933   AS.REGION_ADDRESS = 2;
934   return AS;
935 }
936 
937 AMDGPUAS getAMDGPUAS(const TargetMachine &M) {
938   return getAMDGPUAS(M.getTargetTriple());
939 }
940 
941 AMDGPUAS getAMDGPUAS(const Module &M) {
942   return getAMDGPUAS(Triple(M.getTargetTriple()));
943 }
944 
945 namespace {
946 
947 struct SourceOfDivergence {
948   unsigned Intr;
949 };
950 const SourceOfDivergence *lookupSourceOfDivergenceByIntr(unsigned Intr);
951 
952 #define GET_SOURCEOFDIVERGENCE_IMPL
953 #include "AMDGPUGenSearchableTables.inc"
954 
955 } // end anonymous namespace
956 
957 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
958   return lookupSourceOfDivergenceByIntr(IntrID);
959 }
960 } // namespace AMDGPU
961 } // namespace llvm
962