1 //===- AMDGPUBaseInfo.cpp - AMDGPU Base encoding information --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "AMDGPUBaseInfo.h"
10 #include "AMDGPU.h"
11 #include "AMDGPUAsmUtils.h"
12 #include "AMDKernelCodeT.h"
13 #include "GCNSubtarget.h"
14 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
15 #include "llvm/BinaryFormat/ELF.h"
16 #include "llvm/IR/Attributes.h"
17 #include "llvm/IR/Function.h"
18 #include "llvm/IR/GlobalValue.h"
19 #include "llvm/IR/IntrinsicsAMDGPU.h"
20 #include "llvm/IR/IntrinsicsR600.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/MC/MCSubtargetInfo.h"
23 #include "llvm/Support/AMDHSAKernelDescriptor.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/TargetParser.h"
26 
27 #define GET_INSTRINFO_NAMED_OPS
28 #define GET_INSTRMAP_INFO
29 #include "AMDGPUGenInstrInfo.inc"
30 
31 static llvm::cl::opt<unsigned>
32     AmdhsaCodeObjectVersion("amdhsa-code-object-version", llvm::cl::Hidden,
33                             llvm::cl::desc("AMDHSA Code Object Version"),
34                             llvm::cl::init(4));
35 
36 // TODO-GFX11: Remove this when full 16-bit codegen is implemented.
37 static llvm::cl::opt<bool>
38     LimitTo128VGPRs("amdgpu-limit-to-128-vgprs", llvm::cl::Hidden,
39                     llvm::cl::desc("Never use more than 128 VGPRs"));
40 
41 namespace {
42 
43 /// \returns Bit mask for given bit \p Shift and bit \p Width.
44 unsigned getBitMask(unsigned Shift, unsigned Width) {
45   return ((1 << Width) - 1) << Shift;
46 }
47 
48 /// Packs \p Src into \p Dst for given bit \p Shift and bit \p Width.
49 ///
50 /// \returns Packed \p Dst.
51 unsigned packBits(unsigned Src, unsigned Dst, unsigned Shift, unsigned Width) {
52   unsigned Mask = getBitMask(Shift, Width);
53   return ((Src << Shift) & Mask) | (Dst & ~Mask);
54 }
55 
56 /// Unpacks bits from \p Src for given bit \p Shift and bit \p Width.
57 ///
58 /// \returns Unpacked bits.
59 unsigned unpackBits(unsigned Src, unsigned Shift, unsigned Width) {
60   return (Src & getBitMask(Shift, Width)) >> Shift;
61 }
62 
63 /// \returns Vmcnt bit shift (lower bits).
64 unsigned getVmcntBitShiftLo(unsigned VersionMajor) {
65   return VersionMajor >= 11 ? 10 : 0;
66 }
67 
68 /// \returns Vmcnt bit width (lower bits).
69 unsigned getVmcntBitWidthLo(unsigned VersionMajor) {
70   return VersionMajor >= 11 ? 6 : 4;
71 }
72 
73 /// \returns Expcnt bit shift.
74 unsigned getExpcntBitShift(unsigned VersionMajor) {
75   return VersionMajor >= 11 ? 0 : 4;
76 }
77 
78 /// \returns Expcnt bit width.
79 unsigned getExpcntBitWidth(unsigned VersionMajor) { return 3; }
80 
81 /// \returns Lgkmcnt bit shift.
82 unsigned getLgkmcntBitShift(unsigned VersionMajor) {
83   return VersionMajor >= 11 ? 4 : 8;
84 }
85 
86 /// \returns Lgkmcnt bit width.
87 unsigned getLgkmcntBitWidth(unsigned VersionMajor) {
88   return VersionMajor >= 10 ? 6 : 4;
89 }
90 
91 /// \returns Vmcnt bit shift (higher bits).
92 unsigned getVmcntBitShiftHi(unsigned VersionMajor) { return 14; }
93 
94 /// \returns Vmcnt bit width (higher bits).
95 unsigned getVmcntBitWidthHi(unsigned VersionMajor) {
96   return (VersionMajor == 9 || VersionMajor == 10) ? 2 : 0;
97 }
98 
99 } // end namespace anonymous
100 
101 namespace llvm {
102 
103 namespace AMDGPU {
104 
105 Optional<uint8_t> getHsaAbiVersion(const MCSubtargetInfo *STI) {
106   if (STI && STI->getTargetTriple().getOS() != Triple::AMDHSA)
107     return None;
108 
109   switch (AmdhsaCodeObjectVersion) {
110   case 2:
111     return ELF::ELFABIVERSION_AMDGPU_HSA_V2;
112   case 3:
113     return ELF::ELFABIVERSION_AMDGPU_HSA_V3;
114   case 4:
115     return ELF::ELFABIVERSION_AMDGPU_HSA_V4;
116   case 5:
117     return ELF::ELFABIVERSION_AMDGPU_HSA_V5;
118   default:
119     report_fatal_error(Twine("Unsupported AMDHSA Code Object Version ") +
120                        Twine(AmdhsaCodeObjectVersion));
121   }
122 }
123 
124 bool isHsaAbiVersion2(const MCSubtargetInfo *STI) {
125   if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
126     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V2;
127   return false;
128 }
129 
130 bool isHsaAbiVersion3(const MCSubtargetInfo *STI) {
131   if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
132     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V3;
133   return false;
134 }
135 
136 bool isHsaAbiVersion4(const MCSubtargetInfo *STI) {
137   if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
138     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V4;
139   return false;
140 }
141 
142 bool isHsaAbiVersion5(const MCSubtargetInfo *STI) {
143   if (Optional<uint8_t> HsaAbiVer = getHsaAbiVersion(STI))
144     return *HsaAbiVer == ELF::ELFABIVERSION_AMDGPU_HSA_V5;
145   return false;
146 }
147 
148 bool isHsaAbiVersion3AndAbove(const MCSubtargetInfo *STI) {
149   return isHsaAbiVersion3(STI) || isHsaAbiVersion4(STI) ||
150          isHsaAbiVersion5(STI);
151 }
152 
153 unsigned getAmdhsaCodeObjectVersion() {
154   return AmdhsaCodeObjectVersion;
155 }
156 
157 unsigned getMultigridSyncArgImplicitArgPosition() {
158   switch (AmdhsaCodeObjectVersion) {
159   case 2:
160   case 3:
161   case 4:
162     return 48;
163   case 5:
164     return AMDGPU::ImplicitArg::MULTIGRID_SYNC_ARG_OFFSET;
165   default:
166     llvm_unreachable("Unexpected code object version");
167     return 0;
168   }
169 }
170 
171 
172 // FIXME: All such magic numbers about the ABI should be in a
173 // central TD file.
174 unsigned getHostcallImplicitArgPosition() {
175   switch (AmdhsaCodeObjectVersion) {
176   case 2:
177   case 3:
178   case 4:
179     return 24;
180   case 5:
181     return AMDGPU::ImplicitArg::HOSTCALL_PTR_OFFSET;
182   default:
183     llvm_unreachable("Unexpected code object version");
184     return 0;
185   }
186 }
187 
188 #define GET_MIMGBaseOpcodesTable_IMPL
189 #define GET_MIMGDimInfoTable_IMPL
190 #define GET_MIMGInfoTable_IMPL
191 #define GET_MIMGLZMappingTable_IMPL
192 #define GET_MIMGMIPMappingTable_IMPL
193 #define GET_MIMGBiasMappingTable_IMPL
194 #define GET_MIMGOffsetMappingTable_IMPL
195 #define GET_MIMGG16MappingTable_IMPL
196 #define GET_MAIInstInfoTable_IMPL
197 #include "AMDGPUGenSearchableTables.inc"
198 
199 int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding,
200                   unsigned VDataDwords, unsigned VAddrDwords) {
201   const MIMGInfo *Info = getMIMGOpcodeHelper(BaseOpcode, MIMGEncoding,
202                                              VDataDwords, VAddrDwords);
203   return Info ? Info->Opcode : -1;
204 }
205 
206 const MIMGBaseOpcodeInfo *getMIMGBaseOpcode(unsigned Opc) {
207   const MIMGInfo *Info = getMIMGInfo(Opc);
208   return Info ? getMIMGBaseOpcodeInfo(Info->BaseOpcode) : nullptr;
209 }
210 
211 int getMaskedMIMGOp(unsigned Opc, unsigned NewChannels) {
212   const MIMGInfo *OrigInfo = getMIMGInfo(Opc);
213   const MIMGInfo *NewInfo =
214       getMIMGOpcodeHelper(OrigInfo->BaseOpcode, OrigInfo->MIMGEncoding,
215                           NewChannels, OrigInfo->VAddrDwords);
216   return NewInfo ? NewInfo->Opcode : -1;
217 }
218 
219 unsigned getAddrSizeMIMGOp(const MIMGBaseOpcodeInfo *BaseOpcode,
220                            const MIMGDimInfo *Dim, bool IsA16,
221                            bool IsG16Supported) {
222   unsigned AddrWords = BaseOpcode->NumExtraArgs;
223   unsigned AddrComponents = (BaseOpcode->Coordinates ? Dim->NumCoords : 0) +
224                             (BaseOpcode->LodOrClampOrMip ? 1 : 0);
225   if (IsA16)
226     AddrWords += divideCeil(AddrComponents, 2);
227   else
228     AddrWords += AddrComponents;
229 
230   // Note: For subtargets that support A16 but not G16, enabling A16 also
231   // enables 16 bit gradients.
232   // For subtargets that support A16 (operand) and G16 (done with a different
233   // instruction encoding), they are independent.
234 
235   if (BaseOpcode->Gradients) {
236     if ((IsA16 && !IsG16Supported) || BaseOpcode->G16)
237       // There are two gradients per coordinate, we pack them separately.
238       // For the 3d case,
239       // we get (dy/du, dx/du) (-, dz/du) (dy/dv, dx/dv) (-, dz/dv)
240       AddrWords += alignTo<2>(Dim->NumGradients / 2);
241     else
242       AddrWords += Dim->NumGradients;
243   }
244   return AddrWords;
245 }
246 
247 struct MUBUFInfo {
248   uint16_t Opcode;
249   uint16_t BaseOpcode;
250   uint8_t elements;
251   bool has_vaddr;
252   bool has_srsrc;
253   bool has_soffset;
254   bool IsBufferInv;
255 };
256 
257 struct MTBUFInfo {
258   uint16_t Opcode;
259   uint16_t BaseOpcode;
260   uint8_t elements;
261   bool has_vaddr;
262   bool has_srsrc;
263   bool has_soffset;
264 };
265 
266 struct SMInfo {
267   uint16_t Opcode;
268   bool IsBuffer;
269 };
270 
271 struct VOPInfo {
272   uint16_t Opcode;
273   bool IsSingle;
274 };
275 
276 #define GET_MTBUFInfoTable_DECL
277 #define GET_MTBUFInfoTable_IMPL
278 #define GET_MUBUFInfoTable_DECL
279 #define GET_MUBUFInfoTable_IMPL
280 #define GET_SMInfoTable_DECL
281 #define GET_SMInfoTable_IMPL
282 #define GET_VOP1InfoTable_DECL
283 #define GET_VOP1InfoTable_IMPL
284 #define GET_VOP2InfoTable_DECL
285 #define GET_VOP2InfoTable_IMPL
286 #define GET_VOP3InfoTable_DECL
287 #define GET_VOP3InfoTable_IMPL
288 #include "AMDGPUGenSearchableTables.inc"
289 
290 int getMTBUFBaseOpcode(unsigned Opc) {
291   const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
292   return Info ? Info->BaseOpcode : -1;
293 }
294 
295 int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
296   const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
297   return Info ? Info->Opcode : -1;
298 }
299 
300 int getMTBUFElements(unsigned Opc) {
301   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
302   return Info ? Info->elements : 0;
303 }
304 
305 bool getMTBUFHasVAddr(unsigned Opc) {
306   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
307   return Info ? Info->has_vaddr : false;
308 }
309 
310 bool getMTBUFHasSrsrc(unsigned Opc) {
311   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
312   return Info ? Info->has_srsrc : false;
313 }
314 
315 bool getMTBUFHasSoffset(unsigned Opc) {
316   const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
317   return Info ? Info->has_soffset : false;
318 }
319 
320 int getMUBUFBaseOpcode(unsigned Opc) {
321   const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
322   return Info ? Info->BaseOpcode : -1;
323 }
324 
325 int getMUBUFOpcode(unsigned BaseOpc, unsigned Elements) {
326   const MUBUFInfo *Info = getMUBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
327   return Info ? Info->Opcode : -1;
328 }
329 
330 int getMUBUFElements(unsigned Opc) {
331   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
332   return Info ? Info->elements : 0;
333 }
334 
335 bool getMUBUFHasVAddr(unsigned Opc) {
336   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
337   return Info ? Info->has_vaddr : false;
338 }
339 
340 bool getMUBUFHasSrsrc(unsigned Opc) {
341   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
342   return Info ? Info->has_srsrc : false;
343 }
344 
345 bool getMUBUFHasSoffset(unsigned Opc) {
346   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
347   return Info ? Info->has_soffset : false;
348 }
349 
350 bool getMUBUFIsBufferInv(unsigned Opc) {
351   const MUBUFInfo *Info = getMUBUFOpcodeHelper(Opc);
352   return Info ? Info->IsBufferInv : false;
353 }
354 
355 bool getSMEMIsBuffer(unsigned Opc) {
356   const SMInfo *Info = getSMEMOpcodeHelper(Opc);
357   return Info ? Info->IsBuffer : false;
358 }
359 
360 bool getVOP1IsSingle(unsigned Opc) {
361   const VOPInfo *Info = getVOP1OpcodeHelper(Opc);
362   return Info ? Info->IsSingle : false;
363 }
364 
365 bool getVOP2IsSingle(unsigned Opc) {
366   const VOPInfo *Info = getVOP2OpcodeHelper(Opc);
367   return Info ? Info->IsSingle : false;
368 }
369 
370 bool getVOP3IsSingle(unsigned Opc) {
371   const VOPInfo *Info = getVOP3OpcodeHelper(Opc);
372   return Info ? Info->IsSingle : false;
373 }
374 
375 bool getMAIIsDGEMM(unsigned Opc) {
376   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
377   return Info ? Info->is_dgemm : false;
378 }
379 
380 bool getMAIIsGFX940XDL(unsigned Opc) {
381   const MAIInstInfo *Info = getMAIInstInfoHelper(Opc);
382   return Info ? Info->is_gfx940_xdl : false;
383 }
384 
385 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
386 // header files, so we need to wrap it in a function that takes unsigned
387 // instead.
388 int getMCOpcode(uint16_t Opcode, unsigned Gen) {
389   return getMCOpcodeGen(Opcode, static_cast<Subtarget>(Gen));
390 }
391 
392 namespace IsaInfo {
393 
394 AMDGPUTargetID::AMDGPUTargetID(const MCSubtargetInfo &STI)
395     : STI(STI), XnackSetting(TargetIDSetting::Any),
396       SramEccSetting(TargetIDSetting::Any) {
397   if (!STI.getFeatureBits().test(FeatureSupportsXNACK))
398     XnackSetting = TargetIDSetting::Unsupported;
399   if (!STI.getFeatureBits().test(FeatureSupportsSRAMECC))
400     SramEccSetting = TargetIDSetting::Unsupported;
401 }
402 
403 void AMDGPUTargetID::setTargetIDFromFeaturesString(StringRef FS) {
404   // Check if xnack or sramecc is explicitly enabled or disabled.  In the
405   // absence of the target features we assume we must generate code that can run
406   // in any environment.
407   SubtargetFeatures Features(FS);
408   Optional<bool> XnackRequested;
409   Optional<bool> SramEccRequested;
410 
411   for (const std::string &Feature : Features.getFeatures()) {
412     if (Feature == "+xnack")
413       XnackRequested = true;
414     else if (Feature == "-xnack")
415       XnackRequested = false;
416     else if (Feature == "+sramecc")
417       SramEccRequested = true;
418     else if (Feature == "-sramecc")
419       SramEccRequested = false;
420   }
421 
422   bool XnackSupported = isXnackSupported();
423   bool SramEccSupported = isSramEccSupported();
424 
425   if (XnackRequested) {
426     if (XnackSupported) {
427       XnackSetting =
428           *XnackRequested ? TargetIDSetting::On : TargetIDSetting::Off;
429     } else {
430       // If a specific xnack setting was requested and this GPU does not support
431       // xnack emit a warning. Setting will remain set to "Unsupported".
432       if (*XnackRequested) {
433         errs() << "warning: xnack 'On' was requested for a processor that does "
434                   "not support it!\n";
435       } else {
436         errs() << "warning: xnack 'Off' was requested for a processor that "
437                   "does not support it!\n";
438       }
439     }
440   }
441 
442   if (SramEccRequested) {
443     if (SramEccSupported) {
444       SramEccSetting =
445           *SramEccRequested ? TargetIDSetting::On : TargetIDSetting::Off;
446     } else {
447       // If a specific sramecc setting was requested and this GPU does not
448       // support sramecc emit a warning. Setting will remain set to
449       // "Unsupported".
450       if (*SramEccRequested) {
451         errs() << "warning: sramecc 'On' was requested for a processor that "
452                   "does not support it!\n";
453       } else {
454         errs() << "warning: sramecc 'Off' was requested for a processor that "
455                   "does not support it!\n";
456       }
457     }
458   }
459 }
460 
461 static TargetIDSetting
462 getTargetIDSettingFromFeatureString(StringRef FeatureString) {
463   if (FeatureString.endswith("-"))
464     return TargetIDSetting::Off;
465   if (FeatureString.endswith("+"))
466     return TargetIDSetting::On;
467 
468   llvm_unreachable("Malformed feature string");
469 }
470 
471 void AMDGPUTargetID::setTargetIDFromTargetIDStream(StringRef TargetID) {
472   SmallVector<StringRef, 3> TargetIDSplit;
473   TargetID.split(TargetIDSplit, ':');
474 
475   for (const auto &FeatureString : TargetIDSplit) {
476     if (FeatureString.startswith("xnack"))
477       XnackSetting = getTargetIDSettingFromFeatureString(FeatureString);
478     if (FeatureString.startswith("sramecc"))
479       SramEccSetting = getTargetIDSettingFromFeatureString(FeatureString);
480   }
481 }
482 
483 std::string AMDGPUTargetID::toString() const {
484   std::string StringRep;
485   raw_string_ostream StreamRep(StringRep);
486 
487   auto TargetTriple = STI.getTargetTriple();
488   auto Version = getIsaVersion(STI.getCPU());
489 
490   StreamRep << TargetTriple.getArchName() << '-'
491             << TargetTriple.getVendorName() << '-'
492             << TargetTriple.getOSName() << '-'
493             << TargetTriple.getEnvironmentName() << '-';
494 
495   std::string Processor;
496   // TODO: Following else statement is present here because we used various
497   // alias names for GPUs up until GFX9 (e.g. 'fiji' is same as 'gfx803').
498   // Remove once all aliases are removed from GCNProcessors.td.
499   if (Version.Major >= 9)
500     Processor = STI.getCPU().str();
501   else
502     Processor = (Twine("gfx") + Twine(Version.Major) + Twine(Version.Minor) +
503                  Twine(Version.Stepping))
504                     .str();
505 
506   std::string Features;
507   if (Optional<uint8_t> HsaAbiVersion = getHsaAbiVersion(&STI)) {
508     switch (*HsaAbiVersion) {
509     case ELF::ELFABIVERSION_AMDGPU_HSA_V2:
510       // Code object V2 only supported specific processors and had fixed
511       // settings for the XNACK.
512       if (Processor == "gfx600") {
513       } else if (Processor == "gfx601") {
514       } else if (Processor == "gfx602") {
515       } else if (Processor == "gfx700") {
516       } else if (Processor == "gfx701") {
517       } else if (Processor == "gfx702") {
518       } else if (Processor == "gfx703") {
519       } else if (Processor == "gfx704") {
520       } else if (Processor == "gfx705") {
521       } else if (Processor == "gfx801") {
522         if (!isXnackOnOrAny())
523           report_fatal_error(
524               "AMD GPU code object V2 does not support processor " +
525               Twine(Processor) + " without XNACK");
526       } else if (Processor == "gfx802") {
527       } else if (Processor == "gfx803") {
528       } else if (Processor == "gfx805") {
529       } else if (Processor == "gfx810") {
530         if (!isXnackOnOrAny())
531           report_fatal_error(
532               "AMD GPU code object V2 does not support processor " +
533               Twine(Processor) + " without XNACK");
534       } else if (Processor == "gfx900") {
535         if (isXnackOnOrAny())
536           Processor = "gfx901";
537       } else if (Processor == "gfx902") {
538         if (isXnackOnOrAny())
539           Processor = "gfx903";
540       } else if (Processor == "gfx904") {
541         if (isXnackOnOrAny())
542           Processor = "gfx905";
543       } else if (Processor == "gfx906") {
544         if (isXnackOnOrAny())
545           Processor = "gfx907";
546       } else if (Processor == "gfx90c") {
547         if (isXnackOnOrAny())
548           report_fatal_error(
549               "AMD GPU code object V2 does not support processor " +
550               Twine(Processor) + " with XNACK being ON or ANY");
551       } else {
552         report_fatal_error(
553             "AMD GPU code object V2 does not support processor " +
554             Twine(Processor));
555       }
556       break;
557     case ELF::ELFABIVERSION_AMDGPU_HSA_V3:
558       // xnack.
559       if (isXnackOnOrAny())
560         Features += "+xnack";
561       // In code object v2 and v3, "sramecc" feature was spelled with a
562       // hyphen ("sram-ecc").
563       if (isSramEccOnOrAny())
564         Features += "+sram-ecc";
565       break;
566     case ELF::ELFABIVERSION_AMDGPU_HSA_V4:
567     case ELF::ELFABIVERSION_AMDGPU_HSA_V5:
568       // sramecc.
569       if (getSramEccSetting() == TargetIDSetting::Off)
570         Features += ":sramecc-";
571       else if (getSramEccSetting() == TargetIDSetting::On)
572         Features += ":sramecc+";
573       // xnack.
574       if (getXnackSetting() == TargetIDSetting::Off)
575         Features += ":xnack-";
576       else if (getXnackSetting() == TargetIDSetting::On)
577         Features += ":xnack+";
578       break;
579     default:
580       break;
581     }
582   }
583 
584   StreamRep << Processor << Features;
585 
586   StreamRep.flush();
587   return StringRep;
588 }
589 
590 unsigned getWavefrontSize(const MCSubtargetInfo *STI) {
591   if (STI->getFeatureBits().test(FeatureWavefrontSize16))
592     return 16;
593   if (STI->getFeatureBits().test(FeatureWavefrontSize32))
594     return 32;
595 
596   return 64;
597 }
598 
599 unsigned getLocalMemorySize(const MCSubtargetInfo *STI) {
600   if (STI->getFeatureBits().test(FeatureLocalMemorySize32768))
601     return 32768;
602   if (STI->getFeatureBits().test(FeatureLocalMemorySize65536))
603     return 65536;
604 
605   return 0;
606 }
607 
608 unsigned getEUsPerCU(const MCSubtargetInfo *STI) {
609   // "Per CU" really means "per whatever functional block the waves of a
610   // workgroup must share". For gfx10 in CU mode this is the CU, which contains
611   // two SIMDs.
612   if (isGFX10Plus(*STI) && STI->getFeatureBits().test(FeatureCuMode))
613     return 2;
614   // Pre-gfx10 a CU contains four SIMDs. For gfx10 in WGP mode the WGP contains
615   // two CUs, so a total of four SIMDs.
616   return 4;
617 }
618 
619 unsigned getMaxWorkGroupsPerCU(const MCSubtargetInfo *STI,
620                                unsigned FlatWorkGroupSize) {
621   assert(FlatWorkGroupSize != 0);
622   if (STI->getTargetTriple().getArch() != Triple::amdgcn)
623     return 8;
624   unsigned N = getWavesPerWorkGroup(STI, FlatWorkGroupSize);
625   if (N == 1)
626     return 40;
627   N = 40 / N;
628   return std::min(N, 16u);
629 }
630 
631 unsigned getMinWavesPerEU(const MCSubtargetInfo *STI) {
632   return 1;
633 }
634 
635 unsigned getMaxWavesPerEU(const MCSubtargetInfo *STI) {
636   // FIXME: Need to take scratch memory into account.
637   if (isGFX90A(*STI))
638     return 8;
639   if (!isGFX10Plus(*STI))
640     return 10;
641   return hasGFX10_3Insts(*STI) ? 16 : 20;
642 }
643 
644 unsigned getWavesPerEUForWorkGroup(const MCSubtargetInfo *STI,
645                                    unsigned FlatWorkGroupSize) {
646   return divideCeil(getWavesPerWorkGroup(STI, FlatWorkGroupSize),
647                     getEUsPerCU(STI));
648 }
649 
650 unsigned getMinFlatWorkGroupSize(const MCSubtargetInfo *STI) {
651   return 1;
652 }
653 
654 unsigned getMaxFlatWorkGroupSize(const MCSubtargetInfo *STI) {
655   // Some subtargets allow encoding 2048, but this isn't tested or supported.
656   return 1024;
657 }
658 
659 unsigned getWavesPerWorkGroup(const MCSubtargetInfo *STI,
660                               unsigned FlatWorkGroupSize) {
661   return divideCeil(FlatWorkGroupSize, getWavefrontSize(STI));
662 }
663 
664 unsigned getSGPRAllocGranule(const MCSubtargetInfo *STI) {
665   IsaVersion Version = getIsaVersion(STI->getCPU());
666   if (Version.Major >= 10)
667     return getAddressableNumSGPRs(STI);
668   if (Version.Major >= 8)
669     return 16;
670   return 8;
671 }
672 
673 unsigned getSGPREncodingGranule(const MCSubtargetInfo *STI) {
674   return 8;
675 }
676 
677 unsigned getTotalNumSGPRs(const MCSubtargetInfo *STI) {
678   IsaVersion Version = getIsaVersion(STI->getCPU());
679   if (Version.Major >= 8)
680     return 800;
681   return 512;
682 }
683 
684 unsigned getAddressableNumSGPRs(const MCSubtargetInfo *STI) {
685   if (STI->getFeatureBits().test(FeatureSGPRInitBug))
686     return FIXED_NUM_SGPRS_FOR_INIT_BUG;
687 
688   IsaVersion Version = getIsaVersion(STI->getCPU());
689   if (Version.Major >= 10)
690     return 106;
691   if (Version.Major >= 8)
692     return 102;
693   return 104;
694 }
695 
696 unsigned getMinNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
697   assert(WavesPerEU != 0);
698 
699   IsaVersion Version = getIsaVersion(STI->getCPU());
700   if (Version.Major >= 10)
701     return 0;
702 
703   if (WavesPerEU >= getMaxWavesPerEU(STI))
704     return 0;
705 
706   unsigned MinNumSGPRs = getTotalNumSGPRs(STI) / (WavesPerEU + 1);
707   if (STI->getFeatureBits().test(FeatureTrapHandler))
708     MinNumSGPRs -= std::min(MinNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
709   MinNumSGPRs = alignDown(MinNumSGPRs, getSGPRAllocGranule(STI)) + 1;
710   return std::min(MinNumSGPRs, getAddressableNumSGPRs(STI));
711 }
712 
713 unsigned getMaxNumSGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU,
714                         bool Addressable) {
715   assert(WavesPerEU != 0);
716 
717   unsigned AddressableNumSGPRs = getAddressableNumSGPRs(STI);
718   IsaVersion Version = getIsaVersion(STI->getCPU());
719   if (Version.Major >= 10)
720     return Addressable ? AddressableNumSGPRs : 108;
721   if (Version.Major >= 8 && !Addressable)
722     AddressableNumSGPRs = 112;
723   unsigned MaxNumSGPRs = getTotalNumSGPRs(STI) / WavesPerEU;
724   if (STI->getFeatureBits().test(FeatureTrapHandler))
725     MaxNumSGPRs -= std::min(MaxNumSGPRs, (unsigned)TRAP_NUM_SGPRS);
726   MaxNumSGPRs = alignDown(MaxNumSGPRs, getSGPRAllocGranule(STI));
727   return std::min(MaxNumSGPRs, AddressableNumSGPRs);
728 }
729 
730 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
731                           bool FlatScrUsed, bool XNACKUsed) {
732   unsigned ExtraSGPRs = 0;
733   if (VCCUsed)
734     ExtraSGPRs = 2;
735 
736   IsaVersion Version = getIsaVersion(STI->getCPU());
737   if (Version.Major >= 10)
738     return ExtraSGPRs;
739 
740   if (Version.Major < 8) {
741     if (FlatScrUsed)
742       ExtraSGPRs = 4;
743   } else {
744     if (XNACKUsed)
745       ExtraSGPRs = 4;
746 
747     if (FlatScrUsed ||
748         STI->getFeatureBits().test(AMDGPU::FeatureArchitectedFlatScratch))
749       ExtraSGPRs = 6;
750   }
751 
752   return ExtraSGPRs;
753 }
754 
755 unsigned getNumExtraSGPRs(const MCSubtargetInfo *STI, bool VCCUsed,
756                           bool FlatScrUsed) {
757   return getNumExtraSGPRs(STI, VCCUsed, FlatScrUsed,
758                           STI->getFeatureBits().test(AMDGPU::FeatureXNACK));
759 }
760 
761 unsigned getNumSGPRBlocks(const MCSubtargetInfo *STI, unsigned NumSGPRs) {
762   NumSGPRs = alignTo(std::max(1u, NumSGPRs), getSGPREncodingGranule(STI));
763   // SGPRBlocks is actual number of SGPR blocks minus 1.
764   return NumSGPRs / getSGPREncodingGranule(STI) - 1;
765 }
766 
767 unsigned getVGPRAllocGranule(const MCSubtargetInfo *STI,
768                              Optional<bool> EnableWavefrontSize32) {
769   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
770     return 8;
771 
772   bool IsWave32 = EnableWavefrontSize32 ?
773       *EnableWavefrontSize32 :
774       STI->getFeatureBits().test(FeatureWavefrontSize32);
775 
776   if (hasGFX10_3Insts(*STI))
777     return IsWave32 ? 16 : 8;
778 
779   return IsWave32 ? 8 : 4;
780 }
781 
782 unsigned getVGPREncodingGranule(const MCSubtargetInfo *STI,
783                                 Optional<bool> EnableWavefrontSize32) {
784   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
785     return 8;
786 
787   bool IsWave32 = EnableWavefrontSize32 ?
788       *EnableWavefrontSize32 :
789       STI->getFeatureBits().test(FeatureWavefrontSize32);
790 
791   return IsWave32 ? 8 : 4;
792 }
793 
794 unsigned getTotalNumVGPRs(const MCSubtargetInfo *STI) {
795   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
796     return 512;
797   if (!isGFX10Plus(*STI))
798     return 256;
799   return STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1024 : 512;
800 }
801 
802 unsigned getAddressableNumVGPRs(const MCSubtargetInfo *STI) {
803   if (LimitTo128VGPRs.getNumOccurrences() ? LimitTo128VGPRs
804                                           : isGFX11Plus(*STI)) {
805     // GFX11 changes the encoding of 16-bit operands in VOP1/2/C instructions
806     // such that values 128..255 no longer mean v128..v255, they mean
807     // v0.hi..v127.hi instead. Until the compiler understands this, it is not
808     // safe to use v128..v255.
809     // TODO-GFX11: Remove this when full 16-bit codegen is implemented.
810     return 128;
811   }
812   if (STI->getFeatureBits().test(FeatureGFX90AInsts))
813     return 512;
814   return 256;
815 }
816 
817 unsigned getMinNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
818   assert(WavesPerEU != 0);
819 
820   if (WavesPerEU >= getMaxWavesPerEU(STI))
821     return 0;
822   unsigned MinNumVGPRs =
823       alignDown(getTotalNumVGPRs(STI) / (WavesPerEU + 1),
824                 getVGPRAllocGranule(STI)) + 1;
825   return std::min(MinNumVGPRs, getAddressableNumVGPRs(STI));
826 }
827 
828 unsigned getMaxNumVGPRs(const MCSubtargetInfo *STI, unsigned WavesPerEU) {
829   assert(WavesPerEU != 0);
830 
831   unsigned MaxNumVGPRs = alignDown(getTotalNumVGPRs(STI) / WavesPerEU,
832                                    getVGPRAllocGranule(STI));
833   unsigned AddressableNumVGPRs = getAddressableNumVGPRs(STI);
834   return std::min(MaxNumVGPRs, AddressableNumVGPRs);
835 }
836 
837 unsigned getNumVGPRBlocks(const MCSubtargetInfo *STI, unsigned NumVGPRs,
838                           Optional<bool> EnableWavefrontSize32) {
839   NumVGPRs = alignTo(std::max(1u, NumVGPRs),
840                      getVGPREncodingGranule(STI, EnableWavefrontSize32));
841   // VGPRBlocks is actual number of VGPR blocks minus 1.
842   return NumVGPRs / getVGPREncodingGranule(STI, EnableWavefrontSize32) - 1;
843 }
844 
845 } // end namespace IsaInfo
846 
847 void initDefaultAMDKernelCodeT(amd_kernel_code_t &Header,
848                                const MCSubtargetInfo *STI) {
849   IsaVersion Version = getIsaVersion(STI->getCPU());
850 
851   memset(&Header, 0, sizeof(Header));
852 
853   Header.amd_kernel_code_version_major = 1;
854   Header.amd_kernel_code_version_minor = 2;
855   Header.amd_machine_kind = 1; // AMD_MACHINE_KIND_AMDGPU
856   Header.amd_machine_version_major = Version.Major;
857   Header.amd_machine_version_minor = Version.Minor;
858   Header.amd_machine_version_stepping = Version.Stepping;
859   Header.kernel_code_entry_byte_offset = sizeof(Header);
860   Header.wavefront_size = 6;
861 
862   // If the code object does not support indirect functions, then the value must
863   // be 0xffffffff.
864   Header.call_convention = -1;
865 
866   // These alignment values are specified in powers of two, so alignment =
867   // 2^n.  The minimum alignment is 2^4 = 16.
868   Header.kernarg_segment_alignment = 4;
869   Header.group_segment_alignment = 4;
870   Header.private_segment_alignment = 4;
871 
872   if (Version.Major >= 10) {
873     if (STI->getFeatureBits().test(FeatureWavefrontSize32)) {
874       Header.wavefront_size = 5;
875       Header.code_properties |= AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32;
876     }
877     Header.compute_pgm_resource_registers |=
878       S_00B848_WGP_MODE(STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1) |
879       S_00B848_MEM_ORDERED(1);
880   }
881 }
882 
883 amdhsa::kernel_descriptor_t getDefaultAmdhsaKernelDescriptor(
884     const MCSubtargetInfo *STI) {
885   IsaVersion Version = getIsaVersion(STI->getCPU());
886 
887   amdhsa::kernel_descriptor_t KD;
888   memset(&KD, 0, sizeof(KD));
889 
890   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
891                   amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64,
892                   amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE);
893   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
894                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, 1);
895   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
896                   amdhsa::COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE, 1);
897   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2,
898                   amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, 1);
899   if (Version.Major >= 10) {
900     AMDHSA_BITS_SET(KD.kernel_code_properties,
901                     amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
902                     STI->getFeatureBits().test(FeatureWavefrontSize32) ? 1 : 0);
903     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
904                     amdhsa::COMPUTE_PGM_RSRC1_WGP_MODE,
905                     STI->getFeatureBits().test(FeatureCuMode) ? 0 : 1);
906     AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
907                     amdhsa::COMPUTE_PGM_RSRC1_MEM_ORDERED, 1);
908   }
909   if (AMDGPU::isGFX90A(*STI)) {
910     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3,
911                     amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT,
912                     STI->getFeatureBits().test(FeatureTgSplit) ? 1 : 0);
913   }
914   return KD;
915 }
916 
917 bool isGroupSegment(const GlobalValue *GV) {
918   return GV->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS;
919 }
920 
921 bool isGlobalSegment(const GlobalValue *GV) {
922   return GV->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS;
923 }
924 
925 bool isReadOnlySegment(const GlobalValue *GV) {
926   unsigned AS = GV->getAddressSpace();
927   return AS == AMDGPUAS::CONSTANT_ADDRESS ||
928          AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT;
929 }
930 
931 bool shouldEmitConstantsToTextSection(const Triple &TT) {
932   return TT.getArch() == Triple::r600;
933 }
934 
935 int getIntegerAttribute(const Function &F, StringRef Name, int Default) {
936   Attribute A = F.getFnAttribute(Name);
937   int Result = Default;
938 
939   if (A.isStringAttribute()) {
940     StringRef Str = A.getValueAsString();
941     if (Str.getAsInteger(0, Result)) {
942       LLVMContext &Ctx = F.getContext();
943       Ctx.emitError("can't parse integer attribute " + Name);
944     }
945   }
946 
947   return Result;
948 }
949 
950 std::pair<int, int> getIntegerPairAttribute(const Function &F,
951                                             StringRef Name,
952                                             std::pair<int, int> Default,
953                                             bool OnlyFirstRequired) {
954   Attribute A = F.getFnAttribute(Name);
955   if (!A.isStringAttribute())
956     return Default;
957 
958   LLVMContext &Ctx = F.getContext();
959   std::pair<int, int> Ints = Default;
960   std::pair<StringRef, StringRef> Strs = A.getValueAsString().split(',');
961   if (Strs.first.trim().getAsInteger(0, Ints.first)) {
962     Ctx.emitError("can't parse first integer attribute " + Name);
963     return Default;
964   }
965   if (Strs.second.trim().getAsInteger(0, Ints.second)) {
966     if (!OnlyFirstRequired || !Strs.second.trim().empty()) {
967       Ctx.emitError("can't parse second integer attribute " + Name);
968       return Default;
969     }
970   }
971 
972   return Ints;
973 }
974 
975 unsigned getVmcntBitMask(const IsaVersion &Version) {
976   return (1 << (getVmcntBitWidthLo(Version.Major) +
977                 getVmcntBitWidthHi(Version.Major))) -
978          1;
979 }
980 
981 unsigned getExpcntBitMask(const IsaVersion &Version) {
982   return (1 << getExpcntBitWidth(Version.Major)) - 1;
983 }
984 
985 unsigned getLgkmcntBitMask(const IsaVersion &Version) {
986   return (1 << getLgkmcntBitWidth(Version.Major)) - 1;
987 }
988 
989 unsigned getWaitcntBitMask(const IsaVersion &Version) {
990   unsigned VmcntLo = getBitMask(getVmcntBitShiftLo(Version.Major),
991                                 getVmcntBitWidthLo(Version.Major));
992   unsigned Expcnt = getBitMask(getExpcntBitShift(Version.Major),
993                                getExpcntBitWidth(Version.Major));
994   unsigned Lgkmcnt = getBitMask(getLgkmcntBitShift(Version.Major),
995                                 getLgkmcntBitWidth(Version.Major));
996   unsigned VmcntHi = getBitMask(getVmcntBitShiftHi(Version.Major),
997                                 getVmcntBitWidthHi(Version.Major));
998   return VmcntLo | Expcnt | Lgkmcnt | VmcntHi;
999 }
1000 
1001 unsigned decodeVmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1002   unsigned VmcntLo = unpackBits(Waitcnt, getVmcntBitShiftLo(Version.Major),
1003                                 getVmcntBitWidthLo(Version.Major));
1004   unsigned VmcntHi = unpackBits(Waitcnt, getVmcntBitShiftHi(Version.Major),
1005                                 getVmcntBitWidthHi(Version.Major));
1006   return VmcntLo | VmcntHi << getVmcntBitWidthLo(Version.Major);
1007 }
1008 
1009 unsigned decodeExpcnt(const IsaVersion &Version, unsigned Waitcnt) {
1010   return unpackBits(Waitcnt, getExpcntBitShift(Version.Major),
1011                     getExpcntBitWidth(Version.Major));
1012 }
1013 
1014 unsigned decodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt) {
1015   return unpackBits(Waitcnt, getLgkmcntBitShift(Version.Major),
1016                     getLgkmcntBitWidth(Version.Major));
1017 }
1018 
1019 void decodeWaitcnt(const IsaVersion &Version, unsigned Waitcnt,
1020                    unsigned &Vmcnt, unsigned &Expcnt, unsigned &Lgkmcnt) {
1021   Vmcnt = decodeVmcnt(Version, Waitcnt);
1022   Expcnt = decodeExpcnt(Version, Waitcnt);
1023   Lgkmcnt = decodeLgkmcnt(Version, Waitcnt);
1024 }
1025 
1026 Waitcnt decodeWaitcnt(const IsaVersion &Version, unsigned Encoded) {
1027   Waitcnt Decoded;
1028   Decoded.VmCnt = decodeVmcnt(Version, Encoded);
1029   Decoded.ExpCnt = decodeExpcnt(Version, Encoded);
1030   Decoded.LgkmCnt = decodeLgkmcnt(Version, Encoded);
1031   return Decoded;
1032 }
1033 
1034 unsigned encodeVmcnt(const IsaVersion &Version, unsigned Waitcnt,
1035                      unsigned Vmcnt) {
1036   Waitcnt = packBits(Vmcnt, Waitcnt, getVmcntBitShiftLo(Version.Major),
1037                      getVmcntBitWidthLo(Version.Major));
1038   return packBits(Vmcnt >> getVmcntBitWidthLo(Version.Major), Waitcnt,
1039                   getVmcntBitShiftHi(Version.Major),
1040                   getVmcntBitWidthHi(Version.Major));
1041 }
1042 
1043 unsigned encodeExpcnt(const IsaVersion &Version, unsigned Waitcnt,
1044                       unsigned Expcnt) {
1045   return packBits(Expcnt, Waitcnt, getExpcntBitShift(Version.Major),
1046                   getExpcntBitWidth(Version.Major));
1047 }
1048 
1049 unsigned encodeLgkmcnt(const IsaVersion &Version, unsigned Waitcnt,
1050                        unsigned Lgkmcnt) {
1051   return packBits(Lgkmcnt, Waitcnt, getLgkmcntBitShift(Version.Major),
1052                   getLgkmcntBitWidth(Version.Major));
1053 }
1054 
1055 unsigned encodeWaitcnt(const IsaVersion &Version,
1056                        unsigned Vmcnt, unsigned Expcnt, unsigned Lgkmcnt) {
1057   unsigned Waitcnt = getWaitcntBitMask(Version);
1058   Waitcnt = encodeVmcnt(Version, Waitcnt, Vmcnt);
1059   Waitcnt = encodeExpcnt(Version, Waitcnt, Expcnt);
1060   Waitcnt = encodeLgkmcnt(Version, Waitcnt, Lgkmcnt);
1061   return Waitcnt;
1062 }
1063 
1064 unsigned encodeWaitcnt(const IsaVersion &Version, const Waitcnt &Decoded) {
1065   return encodeWaitcnt(Version, Decoded.VmCnt, Decoded.ExpCnt, Decoded.LgkmCnt);
1066 }
1067 
1068 //===----------------------------------------------------------------------===//
1069 // Custom Operands.
1070 //
1071 // A table of custom operands shall describe "primary" operand names
1072 // first followed by aliases if any. It is not required but recommended
1073 // to arrange operands so that operand encoding match operand position
1074 // in the table. This will make disassembly a bit more efficient.
1075 // Unused slots in the table shall have an empty name.
1076 //
1077 //===----------------------------------------------------------------------===//
1078 
1079 template <class T>
1080 static bool isValidOpr(int Idx, const CustomOperand<T> OpInfo[], int OpInfoSize,
1081                        T Context) {
1082   return 0 <= Idx && Idx < OpInfoSize && !OpInfo[Idx].Name.empty() &&
1083          (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context));
1084 }
1085 
1086 template <class T>
1087 static int getOprIdx(std::function<bool(const CustomOperand<T> &)> Test,
1088                      const CustomOperand<T> OpInfo[], int OpInfoSize,
1089                      T Context) {
1090   int InvalidIdx = OPR_ID_UNKNOWN;
1091   for (int Idx = 0; Idx < OpInfoSize; ++Idx) {
1092     if (Test(OpInfo[Idx])) {
1093       if (!OpInfo[Idx].Cond || OpInfo[Idx].Cond(Context))
1094         return Idx;
1095       InvalidIdx = OPR_ID_UNSUPPORTED;
1096     }
1097   }
1098   return InvalidIdx;
1099 }
1100 
1101 template <class T>
1102 static int getOprIdx(const StringRef Name, const CustomOperand<T> OpInfo[],
1103                      int OpInfoSize, T Context) {
1104   auto Test = [=](const CustomOperand<T> &Op) { return Op.Name == Name; };
1105   return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1106 }
1107 
1108 template <class T>
1109 static int getOprIdx(int Id, const CustomOperand<T> OpInfo[], int OpInfoSize,
1110                      T Context, bool QuickCheck = true) {
1111   auto Test = [=](const CustomOperand<T> &Op) {
1112     return Op.Encoding == Id && !Op.Name.empty();
1113   };
1114   // This is an optimization that should work in most cases.
1115   // As a side effect, it may cause selection of an alias
1116   // instead of a primary operand name in case of sparse tables.
1117   if (QuickCheck && isValidOpr<T>(Id, OpInfo, OpInfoSize, Context) &&
1118       OpInfo[Id].Encoding == Id) {
1119     return Id;
1120   }
1121   return getOprIdx<T>(Test, OpInfo, OpInfoSize, Context);
1122 }
1123 
1124 //===----------------------------------------------------------------------===//
1125 // Custom Operand Values
1126 //===----------------------------------------------------------------------===//
1127 
1128 static unsigned getDefaultCustomOperandEncoding(const CustomOperandVal *Opr,
1129                                                 int Size,
1130                                                 const MCSubtargetInfo &STI) {
1131   unsigned Enc = 0;
1132   for (int Idx = 0; Idx < Size; ++Idx) {
1133     const auto &Op = Opr[Idx];
1134     if (Op.isSupported(STI))
1135       Enc |= Op.encode(Op.Default);
1136   }
1137   return Enc;
1138 }
1139 
1140 static bool isSymbolicCustomOperandEncoding(const CustomOperandVal *Opr,
1141                                             int Size, unsigned Code,
1142                                             bool &HasNonDefaultVal,
1143                                             const MCSubtargetInfo &STI) {
1144   unsigned UsedOprMask = 0;
1145   HasNonDefaultVal = false;
1146   for (int Idx = 0; Idx < Size; ++Idx) {
1147     const auto &Op = Opr[Idx];
1148     if (!Op.isSupported(STI))
1149       continue;
1150     UsedOprMask |= Op.getMask();
1151     unsigned Val = Op.decode(Code);
1152     if (!Op.isValid(Val))
1153       return false;
1154     HasNonDefaultVal |= (Val != Op.Default);
1155   }
1156   return (Code & ~UsedOprMask) == 0;
1157 }
1158 
1159 static bool decodeCustomOperand(const CustomOperandVal *Opr, int Size,
1160                                 unsigned Code, int &Idx, StringRef &Name,
1161                                 unsigned &Val, bool &IsDefault,
1162                                 const MCSubtargetInfo &STI) {
1163   while (Idx < Size) {
1164     const auto &Op = Opr[Idx++];
1165     if (Op.isSupported(STI)) {
1166       Name = Op.Name;
1167       Val = Op.decode(Code);
1168       IsDefault = (Val == Op.Default);
1169       return true;
1170     }
1171   }
1172 
1173   return false;
1174 }
1175 
1176 static int encodeCustomOperandVal(const CustomOperandVal &Op,
1177                                   int64_t InputVal) {
1178   if (InputVal < 0 || InputVal > Op.Max)
1179     return OPR_VAL_INVALID;
1180   return Op.encode(InputVal);
1181 }
1182 
1183 static int encodeCustomOperand(const CustomOperandVal *Opr, int Size,
1184                                const StringRef Name, int64_t InputVal,
1185                                unsigned &UsedOprMask,
1186                                const MCSubtargetInfo &STI) {
1187   int InvalidId = OPR_ID_UNKNOWN;
1188   for (int Idx = 0; Idx < Size; ++Idx) {
1189     const auto &Op = Opr[Idx];
1190     if (Op.Name == Name) {
1191       if (!Op.isSupported(STI)) {
1192         InvalidId = OPR_ID_UNSUPPORTED;
1193         continue;
1194       }
1195       auto OprMask = Op.getMask();
1196       if (OprMask & UsedOprMask)
1197         return OPR_ID_DUPLICATE;
1198       UsedOprMask |= OprMask;
1199       return encodeCustomOperandVal(Op, InputVal);
1200     }
1201   }
1202   return InvalidId;
1203 }
1204 
1205 //===----------------------------------------------------------------------===//
1206 // DepCtr
1207 //===----------------------------------------------------------------------===//
1208 
1209 namespace DepCtr {
1210 
1211 int getDefaultDepCtrEncoding(const MCSubtargetInfo &STI) {
1212   static int Default = -1;
1213   if (Default == -1)
1214     Default = getDefaultCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, STI);
1215   return Default;
1216 }
1217 
1218 bool isSymbolicDepCtrEncoding(unsigned Code, bool &HasNonDefaultVal,
1219                               const MCSubtargetInfo &STI) {
1220   return isSymbolicCustomOperandEncoding(DepCtrInfo, DEP_CTR_SIZE, Code,
1221                                          HasNonDefaultVal, STI);
1222 }
1223 
1224 bool decodeDepCtr(unsigned Code, int &Id, StringRef &Name, unsigned &Val,
1225                   bool &IsDefault, const MCSubtargetInfo &STI) {
1226   return decodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Code, Id, Name, Val,
1227                              IsDefault, STI);
1228 }
1229 
1230 int encodeDepCtr(const StringRef Name, int64_t Val, unsigned &UsedOprMask,
1231                  const MCSubtargetInfo &STI) {
1232   return encodeCustomOperand(DepCtrInfo, DEP_CTR_SIZE, Name, Val, UsedOprMask,
1233                              STI);
1234 }
1235 
1236 } // namespace DepCtr
1237 
1238 //===----------------------------------------------------------------------===//
1239 // hwreg
1240 //===----------------------------------------------------------------------===//
1241 
1242 namespace Hwreg {
1243 
1244 int64_t getHwregId(const StringRef Name, const MCSubtargetInfo &STI) {
1245   int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Opr, OPR_SIZE, STI);
1246   return (Idx < 0) ? Idx : Opr[Idx].Encoding;
1247 }
1248 
1249 bool isValidHwreg(int64_t Id) {
1250   return 0 <= Id && isUInt<ID_WIDTH_>(Id);
1251 }
1252 
1253 bool isValidHwregOffset(int64_t Offset) {
1254   return 0 <= Offset && isUInt<OFFSET_WIDTH_>(Offset);
1255 }
1256 
1257 bool isValidHwregWidth(int64_t Width) {
1258   return 0 <= (Width - 1) && isUInt<WIDTH_M1_WIDTH_>(Width - 1);
1259 }
1260 
1261 uint64_t encodeHwreg(uint64_t Id, uint64_t Offset, uint64_t Width) {
1262   return (Id << ID_SHIFT_) |
1263          (Offset << OFFSET_SHIFT_) |
1264          ((Width - 1) << WIDTH_M1_SHIFT_);
1265 }
1266 
1267 StringRef getHwreg(unsigned Id, const MCSubtargetInfo &STI) {
1268   int Idx = getOprIdx<const MCSubtargetInfo &>(Id, Opr, OPR_SIZE, STI);
1269   return (Idx < 0) ? "" : Opr[Idx].Name;
1270 }
1271 
1272 void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width) {
1273   Id = (Val & ID_MASK_) >> ID_SHIFT_;
1274   Offset = (Val & OFFSET_MASK_) >> OFFSET_SHIFT_;
1275   Width = ((Val & WIDTH_M1_MASK_) >> WIDTH_M1_SHIFT_) + 1;
1276 }
1277 
1278 } // namespace Hwreg
1279 
1280 //===----------------------------------------------------------------------===//
1281 // exp tgt
1282 //===----------------------------------------------------------------------===//
1283 
1284 namespace Exp {
1285 
1286 struct ExpTgt {
1287   StringLiteral Name;
1288   unsigned Tgt;
1289   unsigned MaxIndex;
1290 };
1291 
1292 static constexpr ExpTgt ExpTgtInfo[] = {
1293   {{"null"},           ET_NULL,            ET_NULL_MAX_IDX},
1294   {{"mrtz"},           ET_MRTZ,            ET_MRTZ_MAX_IDX},
1295   {{"prim"},           ET_PRIM,            ET_PRIM_MAX_IDX},
1296   {{"mrt"},            ET_MRT0,            ET_MRT_MAX_IDX},
1297   {{"pos"},            ET_POS0,            ET_POS_MAX_IDX},
1298   {{"dual_src_blend"}, ET_DUAL_SRC_BLEND0, ET_DUAL_SRC_BLEND_MAX_IDX},
1299   {{"param"},          ET_PARAM0,          ET_PARAM_MAX_IDX},
1300 };
1301 
1302 bool getTgtName(unsigned Id, StringRef &Name, int &Index) {
1303   for (const ExpTgt &Val : ExpTgtInfo) {
1304     if (Val.Tgt <= Id && Id <= Val.Tgt + Val.MaxIndex) {
1305       Index = (Val.MaxIndex == 0) ? -1 : (Id - Val.Tgt);
1306       Name = Val.Name;
1307       return true;
1308     }
1309   }
1310   return false;
1311 }
1312 
1313 unsigned getTgtId(const StringRef Name) {
1314 
1315   for (const ExpTgt &Val : ExpTgtInfo) {
1316     if (Val.MaxIndex == 0 && Name == Val.Name)
1317       return Val.Tgt;
1318 
1319     if (Val.MaxIndex > 0 && Name.startswith(Val.Name)) {
1320       StringRef Suffix = Name.drop_front(Val.Name.size());
1321 
1322       unsigned Id;
1323       if (Suffix.getAsInteger(10, Id) || Id > Val.MaxIndex)
1324         return ET_INVALID;
1325 
1326       // Disable leading zeroes
1327       if (Suffix.size() > 1 && Suffix[0] == '0')
1328         return ET_INVALID;
1329 
1330       return Val.Tgt + Id;
1331     }
1332   }
1333   return ET_INVALID;
1334 }
1335 
1336 bool isSupportedTgtId(unsigned Id, const MCSubtargetInfo &STI) {
1337   switch (Id) {
1338   case ET_NULL:
1339     return !isGFX11Plus(STI);
1340   case ET_POS4:
1341   case ET_PRIM:
1342     return isGFX10Plus(STI);
1343   case ET_DUAL_SRC_BLEND0:
1344   case ET_DUAL_SRC_BLEND1:
1345     return isGFX11Plus(STI);
1346   default:
1347     if (Id >= ET_PARAM0 && Id <= ET_PARAM31)
1348       return !isGFX11Plus(STI);
1349     return true;
1350   }
1351 }
1352 
1353 } // namespace Exp
1354 
1355 //===----------------------------------------------------------------------===//
1356 // MTBUF Format
1357 //===----------------------------------------------------------------------===//
1358 
1359 namespace MTBUFFormat {
1360 
1361 int64_t getDfmt(const StringRef Name) {
1362   for (int Id = DFMT_MIN; Id <= DFMT_MAX; ++Id) {
1363     if (Name == DfmtSymbolic[Id])
1364       return Id;
1365   }
1366   return DFMT_UNDEF;
1367 }
1368 
1369 StringRef getDfmtName(unsigned Id) {
1370   assert(Id <= DFMT_MAX);
1371   return DfmtSymbolic[Id];
1372 }
1373 
1374 static StringLiteral const *getNfmtLookupTable(const MCSubtargetInfo &STI) {
1375   if (isSI(STI) || isCI(STI))
1376     return NfmtSymbolicSICI;
1377   if (isVI(STI) || isGFX9(STI))
1378     return NfmtSymbolicVI;
1379   return NfmtSymbolicGFX10;
1380 }
1381 
1382 int64_t getNfmt(const StringRef Name, const MCSubtargetInfo &STI) {
1383   auto lookupTable = getNfmtLookupTable(STI);
1384   for (int Id = NFMT_MIN; Id <= NFMT_MAX; ++Id) {
1385     if (Name == lookupTable[Id])
1386       return Id;
1387   }
1388   return NFMT_UNDEF;
1389 }
1390 
1391 StringRef getNfmtName(unsigned Id, const MCSubtargetInfo &STI) {
1392   assert(Id <= NFMT_MAX);
1393   return getNfmtLookupTable(STI)[Id];
1394 }
1395 
1396 bool isValidDfmtNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1397   unsigned Dfmt;
1398   unsigned Nfmt;
1399   decodeDfmtNfmt(Id, Dfmt, Nfmt);
1400   return isValidNfmt(Nfmt, STI);
1401 }
1402 
1403 bool isValidNfmt(unsigned Id, const MCSubtargetInfo &STI) {
1404   return !getNfmtName(Id, STI).empty();
1405 }
1406 
1407 int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
1408   return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
1409 }
1410 
1411 void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
1412   Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
1413   Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
1414 }
1415 
1416 int64_t getUnifiedFormat(const StringRef Name, const MCSubtargetInfo &STI) {
1417   if (isGFX11Plus(STI)) {
1418     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1419       if (Name == UfmtSymbolicGFX11[Id])
1420         return Id;
1421     }
1422   } else {
1423     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1424       if (Name == UfmtSymbolicGFX10[Id])
1425         return Id;
1426     }
1427   }
1428   return UFMT_UNDEF;
1429 }
1430 
1431 StringRef getUnifiedFormatName(unsigned Id, const MCSubtargetInfo &STI) {
1432   if(isValidUnifiedFormat(Id, STI))
1433     return isGFX10(STI) ? UfmtSymbolicGFX10[Id] : UfmtSymbolicGFX11[Id];
1434   return "";
1435 }
1436 
1437 bool isValidUnifiedFormat(unsigned Id, const MCSubtargetInfo &STI) {
1438   return isGFX10(STI) ? Id <= UfmtGFX10::UFMT_LAST : Id <= UfmtGFX11::UFMT_LAST;
1439 }
1440 
1441 int64_t convertDfmtNfmt2Ufmt(unsigned Dfmt, unsigned Nfmt,
1442                              const MCSubtargetInfo &STI) {
1443   int64_t Fmt = encodeDfmtNfmt(Dfmt, Nfmt);
1444   if (isGFX11Plus(STI)) {
1445     for (int Id = UfmtGFX11::UFMT_FIRST; Id <= UfmtGFX11::UFMT_LAST; ++Id) {
1446       if (Fmt == DfmtNfmt2UFmtGFX11[Id])
1447         return Id;
1448     }
1449   } else {
1450     for (int Id = UfmtGFX10::UFMT_FIRST; Id <= UfmtGFX10::UFMT_LAST; ++Id) {
1451       if (Fmt == DfmtNfmt2UFmtGFX10[Id])
1452         return Id;
1453     }
1454   }
1455   return UFMT_UNDEF;
1456 }
1457 
1458 bool isValidFormatEncoding(unsigned Val, const MCSubtargetInfo &STI) {
1459   return isGFX10Plus(STI) ? (Val <= UFMT_MAX) : (Val <= DFMT_NFMT_MAX);
1460 }
1461 
1462 unsigned getDefaultFormatEncoding(const MCSubtargetInfo &STI) {
1463   if (isGFX10Plus(STI))
1464     return UFMT_DEFAULT;
1465   return DFMT_NFMT_DEFAULT;
1466 }
1467 
1468 } // namespace MTBUFFormat
1469 
1470 //===----------------------------------------------------------------------===//
1471 // SendMsg
1472 //===----------------------------------------------------------------------===//
1473 
1474 namespace SendMsg {
1475 
1476 static uint64_t getMsgIdMask(const MCSubtargetInfo &STI) {
1477   return isGFX11Plus(STI) ? ID_MASK_GFX11Plus_ : ID_MASK_PreGFX11_;
1478 }
1479 
1480 int64_t getMsgId(const StringRef Name, const MCSubtargetInfo &STI) {
1481   int Idx = getOprIdx<const MCSubtargetInfo &>(Name, Msg, MSG_SIZE, STI);
1482   return (Idx < 0) ? Idx : Msg[Idx].Encoding;
1483 }
1484 
1485 bool isValidMsgId(int64_t MsgId, const MCSubtargetInfo &STI) {
1486   return (MsgId & ~(getMsgIdMask(STI))) == 0;
1487 }
1488 
1489 StringRef getMsgName(int64_t MsgId, const MCSubtargetInfo &STI) {
1490   int Idx = getOprIdx<const MCSubtargetInfo &>(MsgId, Msg, MSG_SIZE, STI);
1491   return (Idx < 0) ? "" : Msg[Idx].Name;
1492 }
1493 
1494 int64_t getMsgOpId(int64_t MsgId, const StringRef Name) {
1495   const char* const *S = (MsgId == ID_SYSMSG) ? OpSysSymbolic : OpGsSymbolic;
1496   const int F = (MsgId == ID_SYSMSG) ? OP_SYS_FIRST_ : OP_GS_FIRST_;
1497   const int L = (MsgId == ID_SYSMSG) ? OP_SYS_LAST_ : OP_GS_LAST_;
1498   for (int i = F; i < L; ++i) {
1499     if (Name == S[i]) {
1500       return i;
1501     }
1502   }
1503   return OP_UNKNOWN_;
1504 }
1505 
1506 bool isValidMsgOp(int64_t MsgId, int64_t OpId, const MCSubtargetInfo &STI,
1507                   bool Strict) {
1508   assert(isValidMsgId(MsgId, STI));
1509 
1510   if (!Strict)
1511     return 0 <= OpId && isUInt<OP_WIDTH_>(OpId);
1512 
1513   if (MsgId == ID_SYSMSG)
1514     return OP_SYS_FIRST_ <= OpId && OpId < OP_SYS_LAST_;
1515   if (!isGFX11Plus(STI)) {
1516     switch (MsgId) {
1517     case ID_GS_PreGFX11:
1518       return (OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_) && OpId != OP_GS_NOP;
1519     case ID_GS_DONE_PreGFX11:
1520       return OP_GS_FIRST_ <= OpId && OpId < OP_GS_LAST_;
1521     }
1522   }
1523   return OpId == OP_NONE_;
1524 }
1525 
1526 StringRef getMsgOpName(int64_t MsgId, int64_t OpId,
1527                        const MCSubtargetInfo &STI) {
1528   assert(msgRequiresOp(MsgId, STI));
1529   return (MsgId == ID_SYSMSG)? OpSysSymbolic[OpId] : OpGsSymbolic[OpId];
1530 }
1531 
1532 bool isValidMsgStream(int64_t MsgId, int64_t OpId, int64_t StreamId,
1533                       const MCSubtargetInfo &STI, bool Strict) {
1534   assert(isValidMsgOp(MsgId, OpId, STI, Strict));
1535 
1536   if (!Strict)
1537     return 0 <= StreamId && isUInt<STREAM_ID_WIDTH_>(StreamId);
1538 
1539   if (!isGFX11Plus(STI)) {
1540     switch (MsgId) {
1541     case ID_GS_PreGFX11:
1542       return STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_;
1543     case ID_GS_DONE_PreGFX11:
1544       return (OpId == OP_GS_NOP) ?
1545           (StreamId == STREAM_ID_NONE_) :
1546           (STREAM_ID_FIRST_ <= StreamId && StreamId < STREAM_ID_LAST_);
1547     }
1548   }
1549   return StreamId == STREAM_ID_NONE_;
1550 }
1551 
1552 bool msgRequiresOp(int64_t MsgId, const MCSubtargetInfo &STI) {
1553   return MsgId == ID_SYSMSG ||
1554       (!isGFX11Plus(STI) &&
1555        (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11));
1556 }
1557 
1558 bool msgSupportsStream(int64_t MsgId, int64_t OpId,
1559                        const MCSubtargetInfo &STI) {
1560   return !isGFX11Plus(STI) &&
1561       (MsgId == ID_GS_PreGFX11 || MsgId == ID_GS_DONE_PreGFX11) &&
1562       OpId != OP_GS_NOP;
1563 }
1564 
1565 void decodeMsg(unsigned Val, uint16_t &MsgId, uint16_t &OpId,
1566                uint16_t &StreamId, const MCSubtargetInfo &STI) {
1567   MsgId = Val & getMsgIdMask(STI);
1568   if (isGFX11Plus(STI)) {
1569     OpId = 0;
1570     StreamId = 0;
1571   } else {
1572     OpId = (Val & OP_MASK_) >> OP_SHIFT_;
1573     StreamId = (Val & STREAM_ID_MASK_) >> STREAM_ID_SHIFT_;
1574   }
1575 }
1576 
1577 uint64_t encodeMsg(uint64_t MsgId,
1578                    uint64_t OpId,
1579                    uint64_t StreamId) {
1580   return MsgId | (OpId << OP_SHIFT_) | (StreamId << STREAM_ID_SHIFT_);
1581 }
1582 
1583 } // namespace SendMsg
1584 
1585 //===----------------------------------------------------------------------===//
1586 //
1587 //===----------------------------------------------------------------------===//
1588 
1589 unsigned getInitialPSInputAddr(const Function &F) {
1590   return getIntegerAttribute(F, "InitialPSInputAddr", 0);
1591 }
1592 
1593 bool getHasColorExport(const Function &F) {
1594   // As a safe default always respond as if PS has color exports.
1595   return getIntegerAttribute(
1596              F, "amdgpu-color-export",
1597              F.getCallingConv() == CallingConv::AMDGPU_PS ? 1 : 0) != 0;
1598 }
1599 
1600 bool getHasDepthExport(const Function &F) {
1601   return getIntegerAttribute(F, "amdgpu-depth-export", 0) != 0;
1602 }
1603 
1604 bool isShader(CallingConv::ID cc) {
1605   switch(cc) {
1606     case CallingConv::AMDGPU_VS:
1607     case CallingConv::AMDGPU_LS:
1608     case CallingConv::AMDGPU_HS:
1609     case CallingConv::AMDGPU_ES:
1610     case CallingConv::AMDGPU_GS:
1611     case CallingConv::AMDGPU_PS:
1612     case CallingConv::AMDGPU_CS:
1613       return true;
1614     default:
1615       return false;
1616   }
1617 }
1618 
1619 bool isGraphics(CallingConv::ID cc) {
1620   return isShader(cc) || cc == CallingConv::AMDGPU_Gfx;
1621 }
1622 
1623 bool isCompute(CallingConv::ID cc) {
1624   return !isGraphics(cc) || cc == CallingConv::AMDGPU_CS;
1625 }
1626 
1627 bool isEntryFunctionCC(CallingConv::ID CC) {
1628   switch (CC) {
1629   case CallingConv::AMDGPU_KERNEL:
1630   case CallingConv::SPIR_KERNEL:
1631   case CallingConv::AMDGPU_VS:
1632   case CallingConv::AMDGPU_GS:
1633   case CallingConv::AMDGPU_PS:
1634   case CallingConv::AMDGPU_CS:
1635   case CallingConv::AMDGPU_ES:
1636   case CallingConv::AMDGPU_HS:
1637   case CallingConv::AMDGPU_LS:
1638     return true;
1639   default:
1640     return false;
1641   }
1642 }
1643 
1644 bool isModuleEntryFunctionCC(CallingConv::ID CC) {
1645   switch (CC) {
1646   case CallingConv::AMDGPU_Gfx:
1647     return true;
1648   default:
1649     return isEntryFunctionCC(CC);
1650   }
1651 }
1652 
1653 bool isKernelCC(const Function *Func) {
1654   return AMDGPU::isModuleEntryFunctionCC(Func->getCallingConv());
1655 }
1656 
1657 bool hasXNACK(const MCSubtargetInfo &STI) {
1658   return STI.getFeatureBits()[AMDGPU::FeatureXNACK];
1659 }
1660 
1661 bool hasSRAMECC(const MCSubtargetInfo &STI) {
1662   return STI.getFeatureBits()[AMDGPU::FeatureSRAMECC];
1663 }
1664 
1665 bool hasMIMG_R128(const MCSubtargetInfo &STI) {
1666   return STI.getFeatureBits()[AMDGPU::FeatureMIMG_R128] && !STI.getFeatureBits()[AMDGPU::FeatureR128A16];
1667 }
1668 
1669 bool hasGFX10A16(const MCSubtargetInfo &STI) {
1670   return STI.getFeatureBits()[AMDGPU::FeatureGFX10A16];
1671 }
1672 
1673 bool hasG16(const MCSubtargetInfo &STI) {
1674   return STI.getFeatureBits()[AMDGPU::FeatureG16];
1675 }
1676 
1677 bool hasPackedD16(const MCSubtargetInfo &STI) {
1678   return !STI.getFeatureBits()[AMDGPU::FeatureUnpackedD16VMem] && !isCI(STI) &&
1679          !isSI(STI);
1680 }
1681 
1682 bool isSI(const MCSubtargetInfo &STI) {
1683   return STI.getFeatureBits()[AMDGPU::FeatureSouthernIslands];
1684 }
1685 
1686 bool isCI(const MCSubtargetInfo &STI) {
1687   return STI.getFeatureBits()[AMDGPU::FeatureSeaIslands];
1688 }
1689 
1690 bool isVI(const MCSubtargetInfo &STI) {
1691   return STI.getFeatureBits()[AMDGPU::FeatureVolcanicIslands];
1692 }
1693 
1694 bool isGFX9(const MCSubtargetInfo &STI) {
1695   return STI.getFeatureBits()[AMDGPU::FeatureGFX9];
1696 }
1697 
1698 bool isGFX9_GFX10(const MCSubtargetInfo &STI) {
1699   return isGFX9(STI) || isGFX10(STI);
1700 }
1701 
1702 bool isGFX8_GFX9_GFX10(const MCSubtargetInfo &STI) {
1703   return isVI(STI) || isGFX9(STI) || isGFX10(STI);
1704 }
1705 
1706 bool isGFX8Plus(const MCSubtargetInfo &STI) {
1707   return isVI(STI) || isGFX9Plus(STI);
1708 }
1709 
1710 bool isGFX9Plus(const MCSubtargetInfo &STI) {
1711   return isGFX9(STI) || isGFX10Plus(STI);
1712 }
1713 
1714 bool isGFX10(const MCSubtargetInfo &STI) {
1715   return STI.getFeatureBits()[AMDGPU::FeatureGFX10];
1716 }
1717 
1718 bool isGFX10Plus(const MCSubtargetInfo &STI) {
1719   return isGFX10(STI) || isGFX11Plus(STI);
1720 }
1721 
1722 bool isGFX11(const MCSubtargetInfo &STI) {
1723   return STI.getFeatureBits()[AMDGPU::FeatureGFX11];
1724 }
1725 
1726 bool isGFX11Plus(const MCSubtargetInfo &STI) {
1727   return isGFX11(STI);
1728 }
1729 
1730 bool isNotGFX11Plus(const MCSubtargetInfo &STI) {
1731   return !isGFX11Plus(STI);
1732 }
1733 
1734 bool isNotGFX10Plus(const MCSubtargetInfo &STI) {
1735   return isSI(STI) || isCI(STI) || isVI(STI) || isGFX9(STI);
1736 }
1737 
1738 bool isGFX10Before1030(const MCSubtargetInfo &STI) {
1739   return isGFX10(STI) && !AMDGPU::isGFX10_BEncoding(STI);
1740 }
1741 
1742 bool isGCN3Encoding(const MCSubtargetInfo &STI) {
1743   return STI.getFeatureBits()[AMDGPU::FeatureGCN3Encoding];
1744 }
1745 
1746 bool isGFX10_AEncoding(const MCSubtargetInfo &STI) {
1747   return STI.getFeatureBits()[AMDGPU::FeatureGFX10_AEncoding];
1748 }
1749 
1750 bool isGFX10_BEncoding(const MCSubtargetInfo &STI) {
1751   return STI.getFeatureBits()[AMDGPU::FeatureGFX10_BEncoding];
1752 }
1753 
1754 bool hasGFX10_3Insts(const MCSubtargetInfo &STI) {
1755   return STI.getFeatureBits()[AMDGPU::FeatureGFX10_3Insts];
1756 }
1757 
1758 bool isGFX90A(const MCSubtargetInfo &STI) {
1759   return STI.getFeatureBits()[AMDGPU::FeatureGFX90AInsts];
1760 }
1761 
1762 bool isGFX940(const MCSubtargetInfo &STI) {
1763   return STI.getFeatureBits()[AMDGPU::FeatureGFX940Insts];
1764 }
1765 
1766 bool hasArchitectedFlatScratch(const MCSubtargetInfo &STI) {
1767   return STI.getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
1768 }
1769 
1770 bool hasMAIInsts(const MCSubtargetInfo &STI) {
1771   return STI.getFeatureBits()[AMDGPU::FeatureMAIInsts];
1772 }
1773 
1774 int32_t getTotalNumVGPRs(bool has90AInsts, int32_t ArgNumAGPR,
1775                          int32_t ArgNumVGPR) {
1776   if (has90AInsts && ArgNumAGPR)
1777     return alignTo(ArgNumVGPR, 4) + ArgNumAGPR;
1778   return std::max(ArgNumVGPR, ArgNumAGPR);
1779 }
1780 
1781 bool isSGPR(unsigned Reg, const MCRegisterInfo* TRI) {
1782   const MCRegisterClass SGPRClass = TRI->getRegClass(AMDGPU::SReg_32RegClassID);
1783   const unsigned FirstSubReg = TRI->getSubReg(Reg, AMDGPU::sub0);
1784   return SGPRClass.contains(FirstSubReg != 0 ? FirstSubReg : Reg) ||
1785     Reg == AMDGPU::SCC;
1786 }
1787 
1788 #define MAP_REG2REG \
1789   using namespace AMDGPU; \
1790   switch(Reg) { \
1791   default: return Reg; \
1792   CASE_CI_VI(FLAT_SCR) \
1793   CASE_CI_VI(FLAT_SCR_LO) \
1794   CASE_CI_VI(FLAT_SCR_HI) \
1795   CASE_VI_GFX9PLUS(TTMP0) \
1796   CASE_VI_GFX9PLUS(TTMP1) \
1797   CASE_VI_GFX9PLUS(TTMP2) \
1798   CASE_VI_GFX9PLUS(TTMP3) \
1799   CASE_VI_GFX9PLUS(TTMP4) \
1800   CASE_VI_GFX9PLUS(TTMP5) \
1801   CASE_VI_GFX9PLUS(TTMP6) \
1802   CASE_VI_GFX9PLUS(TTMP7) \
1803   CASE_VI_GFX9PLUS(TTMP8) \
1804   CASE_VI_GFX9PLUS(TTMP9) \
1805   CASE_VI_GFX9PLUS(TTMP10) \
1806   CASE_VI_GFX9PLUS(TTMP11) \
1807   CASE_VI_GFX9PLUS(TTMP12) \
1808   CASE_VI_GFX9PLUS(TTMP13) \
1809   CASE_VI_GFX9PLUS(TTMP14) \
1810   CASE_VI_GFX9PLUS(TTMP15) \
1811   CASE_VI_GFX9PLUS(TTMP0_TTMP1) \
1812   CASE_VI_GFX9PLUS(TTMP2_TTMP3) \
1813   CASE_VI_GFX9PLUS(TTMP4_TTMP5) \
1814   CASE_VI_GFX9PLUS(TTMP6_TTMP7) \
1815   CASE_VI_GFX9PLUS(TTMP8_TTMP9) \
1816   CASE_VI_GFX9PLUS(TTMP10_TTMP11) \
1817   CASE_VI_GFX9PLUS(TTMP12_TTMP13) \
1818   CASE_VI_GFX9PLUS(TTMP14_TTMP15) \
1819   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3) \
1820   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7) \
1821   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11) \
1822   CASE_VI_GFX9PLUS(TTMP12_TTMP13_TTMP14_TTMP15) \
1823   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7) \
1824   CASE_VI_GFX9PLUS(TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11) \
1825   CASE_VI_GFX9PLUS(TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1826   CASE_VI_GFX9PLUS(TTMP0_TTMP1_TTMP2_TTMP3_TTMP4_TTMP5_TTMP6_TTMP7_TTMP8_TTMP9_TTMP10_TTMP11_TTMP12_TTMP13_TTMP14_TTMP15) \
1827   CASE_GFXPRE11_GFX11PLUS(M0) \
1828   CASE_GFXPRE11_GFX11PLUS(SGPR_NULL) \
1829   CASE_GFXPRE11_GFX11PLUS_TO(SGPR_NULL64, SGPR_NULL) \
1830   }
1831 
1832 #define CASE_CI_VI(node) \
1833   assert(!isSI(STI)); \
1834   case node: return isCI(STI) ? node##_ci : node##_vi;
1835 
1836 #define CASE_VI_GFX9PLUS(node) \
1837   case node: return isGFX9Plus(STI) ? node##_gfx9plus : node##_vi;
1838 
1839 #define CASE_GFXPRE11_GFX11PLUS(node) \
1840   case node: return isGFX11Plus(STI) ? node##_gfx11plus : node##_gfxpre11;
1841 
1842 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result) \
1843   case node: return isGFX11Plus(STI) ? result##_gfx11plus : result##_gfxpre11;
1844 
1845 unsigned getMCReg(unsigned Reg, const MCSubtargetInfo &STI) {
1846   if (STI.getTargetTriple().getArch() == Triple::r600)
1847     return Reg;
1848   MAP_REG2REG
1849 }
1850 
1851 #undef CASE_CI_VI
1852 #undef CASE_VI_GFX9PLUS
1853 #undef CASE_GFXPRE11_GFX11PLUS
1854 #undef CASE_GFXPRE11_GFX11PLUS_TO
1855 
1856 #define CASE_CI_VI(node)   case node##_ci: case node##_vi:   return node;
1857 #define CASE_VI_GFX9PLUS(node) case node##_vi: case node##_gfx9plus: return node;
1858 #define CASE_GFXPRE11_GFX11PLUS(node) case node##_gfx11plus: case node##_gfxpre11: return node;
1859 #define CASE_GFXPRE11_GFX11PLUS_TO(node, result)
1860 
1861 unsigned mc2PseudoReg(unsigned Reg) {
1862   MAP_REG2REG
1863 }
1864 
1865 #undef CASE_CI_VI
1866 #undef CASE_VI_GFX9PLUS
1867 #undef CASE_GFXPRE11_GFX11PLUS
1868 #undef CASE_GFXPRE11_GFX11PLUS_TO
1869 #undef MAP_REG2REG
1870 
1871 bool isSISrcOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1872   assert(OpNo < Desc.NumOperands);
1873   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1874   return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
1875          OpType <= AMDGPU::OPERAND_SRC_LAST;
1876 }
1877 
1878 bool isSISrcFPOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1879   assert(OpNo < Desc.NumOperands);
1880   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1881   switch (OpType) {
1882   case AMDGPU::OPERAND_REG_IMM_FP32:
1883   case AMDGPU::OPERAND_REG_IMM_FP32_DEFERRED:
1884   case AMDGPU::OPERAND_REG_IMM_FP64:
1885   case AMDGPU::OPERAND_REG_IMM_FP16:
1886   case AMDGPU::OPERAND_REG_IMM_FP16_DEFERRED:
1887   case AMDGPU::OPERAND_REG_IMM_V2FP16:
1888   case AMDGPU::OPERAND_REG_IMM_V2INT16:
1889   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
1890   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
1891   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
1892   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
1893   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
1894   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
1895   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
1896   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
1897   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
1898   case AMDGPU::OPERAND_REG_IMM_V2FP32:
1899   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
1900   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
1901     return true;
1902   default:
1903     return false;
1904   }
1905 }
1906 
1907 bool isSISrcInlinableOperand(const MCInstrDesc &Desc, unsigned OpNo) {
1908   assert(OpNo < Desc.NumOperands);
1909   unsigned OpType = Desc.OpInfo[OpNo].OperandType;
1910   return OpType >= AMDGPU::OPERAND_REG_INLINE_C_FIRST &&
1911          OpType <= AMDGPU::OPERAND_REG_INLINE_C_LAST;
1912 }
1913 
1914 // Avoid using MCRegisterClass::getSize, since that function will go away
1915 // (move from MC* level to Target* level). Return size in bits.
1916 unsigned getRegBitWidth(unsigned RCID) {
1917   switch (RCID) {
1918   case AMDGPU::VGPR_LO16RegClassID:
1919   case AMDGPU::VGPR_HI16RegClassID:
1920   case AMDGPU::SGPR_LO16RegClassID:
1921   case AMDGPU::AGPR_LO16RegClassID:
1922     return 16;
1923   case AMDGPU::SGPR_32RegClassID:
1924   case AMDGPU::VGPR_32RegClassID:
1925   case AMDGPU::VRegOrLds_32RegClassID:
1926   case AMDGPU::AGPR_32RegClassID:
1927   case AMDGPU::VS_32RegClassID:
1928   case AMDGPU::AV_32RegClassID:
1929   case AMDGPU::SReg_32RegClassID:
1930   case AMDGPU::SReg_32_XM0RegClassID:
1931   case AMDGPU::SRegOrLds_32RegClassID:
1932     return 32;
1933   case AMDGPU::SGPR_64RegClassID:
1934   case AMDGPU::VS_64RegClassID:
1935   case AMDGPU::SReg_64RegClassID:
1936   case AMDGPU::VReg_64RegClassID:
1937   case AMDGPU::AReg_64RegClassID:
1938   case AMDGPU::SReg_64_XEXECRegClassID:
1939   case AMDGPU::VReg_64_Align2RegClassID:
1940   case AMDGPU::AReg_64_Align2RegClassID:
1941   case AMDGPU::AV_64RegClassID:
1942   case AMDGPU::AV_64_Align2RegClassID:
1943     return 64;
1944   case AMDGPU::SGPR_96RegClassID:
1945   case AMDGPU::SReg_96RegClassID:
1946   case AMDGPU::VReg_96RegClassID:
1947   case AMDGPU::AReg_96RegClassID:
1948   case AMDGPU::VReg_96_Align2RegClassID:
1949   case AMDGPU::AReg_96_Align2RegClassID:
1950   case AMDGPU::AV_96RegClassID:
1951   case AMDGPU::AV_96_Align2RegClassID:
1952     return 96;
1953   case AMDGPU::SGPR_128RegClassID:
1954   case AMDGPU::SReg_128RegClassID:
1955   case AMDGPU::VReg_128RegClassID:
1956   case AMDGPU::AReg_128RegClassID:
1957   case AMDGPU::VReg_128_Align2RegClassID:
1958   case AMDGPU::AReg_128_Align2RegClassID:
1959   case AMDGPU::AV_128RegClassID:
1960   case AMDGPU::AV_128_Align2RegClassID:
1961     return 128;
1962   case AMDGPU::SGPR_160RegClassID:
1963   case AMDGPU::SReg_160RegClassID:
1964   case AMDGPU::VReg_160RegClassID:
1965   case AMDGPU::AReg_160RegClassID:
1966   case AMDGPU::VReg_160_Align2RegClassID:
1967   case AMDGPU::AReg_160_Align2RegClassID:
1968   case AMDGPU::AV_160RegClassID:
1969   case AMDGPU::AV_160_Align2RegClassID:
1970     return 160;
1971   case AMDGPU::SGPR_192RegClassID:
1972   case AMDGPU::SReg_192RegClassID:
1973   case AMDGPU::VReg_192RegClassID:
1974   case AMDGPU::AReg_192RegClassID:
1975   case AMDGPU::VReg_192_Align2RegClassID:
1976   case AMDGPU::AReg_192_Align2RegClassID:
1977   case AMDGPU::AV_192RegClassID:
1978   case AMDGPU::AV_192_Align2RegClassID:
1979     return 192;
1980   case AMDGPU::SGPR_224RegClassID:
1981   case AMDGPU::SReg_224RegClassID:
1982   case AMDGPU::VReg_224RegClassID:
1983   case AMDGPU::AReg_224RegClassID:
1984   case AMDGPU::VReg_224_Align2RegClassID:
1985   case AMDGPU::AReg_224_Align2RegClassID:
1986   case AMDGPU::AV_224RegClassID:
1987   case AMDGPU::AV_224_Align2RegClassID:
1988     return 224;
1989   case AMDGPU::SGPR_256RegClassID:
1990   case AMDGPU::SReg_256RegClassID:
1991   case AMDGPU::VReg_256RegClassID:
1992   case AMDGPU::AReg_256RegClassID:
1993   case AMDGPU::VReg_256_Align2RegClassID:
1994   case AMDGPU::AReg_256_Align2RegClassID:
1995   case AMDGPU::AV_256RegClassID:
1996   case AMDGPU::AV_256_Align2RegClassID:
1997     return 256;
1998   case AMDGPU::SGPR_512RegClassID:
1999   case AMDGPU::SReg_512RegClassID:
2000   case AMDGPU::VReg_512RegClassID:
2001   case AMDGPU::AReg_512RegClassID:
2002   case AMDGPU::VReg_512_Align2RegClassID:
2003   case AMDGPU::AReg_512_Align2RegClassID:
2004   case AMDGPU::AV_512RegClassID:
2005   case AMDGPU::AV_512_Align2RegClassID:
2006     return 512;
2007   case AMDGPU::SGPR_1024RegClassID:
2008   case AMDGPU::SReg_1024RegClassID:
2009   case AMDGPU::VReg_1024RegClassID:
2010   case AMDGPU::AReg_1024RegClassID:
2011   case AMDGPU::VReg_1024_Align2RegClassID:
2012   case AMDGPU::AReg_1024_Align2RegClassID:
2013   case AMDGPU::AV_1024RegClassID:
2014   case AMDGPU::AV_1024_Align2RegClassID:
2015     return 1024;
2016   default:
2017     llvm_unreachable("Unexpected register class");
2018   }
2019 }
2020 
2021 unsigned getRegBitWidth(const MCRegisterClass &RC) {
2022   return getRegBitWidth(RC.getID());
2023 }
2024 
2025 unsigned getRegOperandSize(const MCRegisterInfo *MRI, const MCInstrDesc &Desc,
2026                            unsigned OpNo) {
2027   assert(OpNo < Desc.NumOperands);
2028   unsigned RCID = Desc.OpInfo[OpNo].RegClass;
2029   return getRegBitWidth(MRI->getRegClass(RCID)) / 8;
2030 }
2031 
2032 bool isInlinableLiteral64(int64_t Literal, bool HasInv2Pi) {
2033   if (isInlinableIntLiteral(Literal))
2034     return true;
2035 
2036   uint64_t Val = static_cast<uint64_t>(Literal);
2037   return (Val == DoubleToBits(0.0)) ||
2038          (Val == DoubleToBits(1.0)) ||
2039          (Val == DoubleToBits(-1.0)) ||
2040          (Val == DoubleToBits(0.5)) ||
2041          (Val == DoubleToBits(-0.5)) ||
2042          (Val == DoubleToBits(2.0)) ||
2043          (Val == DoubleToBits(-2.0)) ||
2044          (Val == DoubleToBits(4.0)) ||
2045          (Val == DoubleToBits(-4.0)) ||
2046          (Val == 0x3fc45f306dc9c882 && HasInv2Pi);
2047 }
2048 
2049 bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi) {
2050   if (isInlinableIntLiteral(Literal))
2051     return true;
2052 
2053   // The actual type of the operand does not seem to matter as long
2054   // as the bits match one of the inline immediate values.  For example:
2055   //
2056   // -nan has the hexadecimal encoding of 0xfffffffe which is -2 in decimal,
2057   // so it is a legal inline immediate.
2058   //
2059   // 1065353216 has the hexadecimal encoding 0x3f800000 which is 1.0f in
2060   // floating-point, so it is a legal inline immediate.
2061 
2062   uint32_t Val = static_cast<uint32_t>(Literal);
2063   return (Val == FloatToBits(0.0f)) ||
2064          (Val == FloatToBits(1.0f)) ||
2065          (Val == FloatToBits(-1.0f)) ||
2066          (Val == FloatToBits(0.5f)) ||
2067          (Val == FloatToBits(-0.5f)) ||
2068          (Val == FloatToBits(2.0f)) ||
2069          (Val == FloatToBits(-2.0f)) ||
2070          (Val == FloatToBits(4.0f)) ||
2071          (Val == FloatToBits(-4.0f)) ||
2072          (Val == 0x3e22f983 && HasInv2Pi);
2073 }
2074 
2075 bool isInlinableLiteral16(int16_t Literal, bool HasInv2Pi) {
2076   if (!HasInv2Pi)
2077     return false;
2078 
2079   if (isInlinableIntLiteral(Literal))
2080     return true;
2081 
2082   uint16_t Val = static_cast<uint16_t>(Literal);
2083   return Val == 0x3C00 || // 1.0
2084          Val == 0xBC00 || // -1.0
2085          Val == 0x3800 || // 0.5
2086          Val == 0xB800 || // -0.5
2087          Val == 0x4000 || // 2.0
2088          Val == 0xC000 || // -2.0
2089          Val == 0x4400 || // 4.0
2090          Val == 0xC400 || // -4.0
2091          Val == 0x3118;   // 1/2pi
2092 }
2093 
2094 bool isInlinableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2095   assert(HasInv2Pi);
2096 
2097   if (isInt<16>(Literal) || isUInt<16>(Literal)) {
2098     int16_t Trunc = static_cast<int16_t>(Literal);
2099     return AMDGPU::isInlinableLiteral16(Trunc, HasInv2Pi);
2100   }
2101   if (!(Literal & 0xffff))
2102     return AMDGPU::isInlinableLiteral16(Literal >> 16, HasInv2Pi);
2103 
2104   int16_t Lo16 = static_cast<int16_t>(Literal);
2105   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2106   return Lo16 == Hi16 && isInlinableLiteral16(Lo16, HasInv2Pi);
2107 }
2108 
2109 bool isInlinableIntLiteralV216(int32_t Literal) {
2110   int16_t Lo16 = static_cast<int16_t>(Literal);
2111   if (isInt<16>(Literal) || isUInt<16>(Literal))
2112     return isInlinableIntLiteral(Lo16);
2113 
2114   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2115   if (!(Literal & 0xffff))
2116     return isInlinableIntLiteral(Hi16);
2117   return Lo16 == Hi16 && isInlinableIntLiteral(Lo16);
2118 }
2119 
2120 bool isFoldableLiteralV216(int32_t Literal, bool HasInv2Pi) {
2121   assert(HasInv2Pi);
2122 
2123   int16_t Lo16 = static_cast<int16_t>(Literal);
2124   if (isInt<16>(Literal) || isUInt<16>(Literal))
2125     return true;
2126 
2127   int16_t Hi16 = static_cast<int16_t>(Literal >> 16);
2128   if (!(Literal & 0xffff))
2129     return true;
2130   return Lo16 == Hi16;
2131 }
2132 
2133 bool isArgPassedInSGPR(const Argument *A) {
2134   const Function *F = A->getParent();
2135 
2136   // Arguments to compute shaders are never a source of divergence.
2137   CallingConv::ID CC = F->getCallingConv();
2138   switch (CC) {
2139   case CallingConv::AMDGPU_KERNEL:
2140   case CallingConv::SPIR_KERNEL:
2141     return true;
2142   case CallingConv::AMDGPU_VS:
2143   case CallingConv::AMDGPU_LS:
2144   case CallingConv::AMDGPU_HS:
2145   case CallingConv::AMDGPU_ES:
2146   case CallingConv::AMDGPU_GS:
2147   case CallingConv::AMDGPU_PS:
2148   case CallingConv::AMDGPU_CS:
2149   case CallingConv::AMDGPU_Gfx:
2150     // For non-compute shaders, SGPR inputs are marked with either inreg or byval.
2151     // Everything else is in VGPRs.
2152     return F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::InReg) ||
2153            F->getAttributes().hasParamAttr(A->getArgNo(), Attribute::ByVal);
2154   default:
2155     // TODO: Should calls support inreg for SGPR inputs?
2156     return false;
2157   }
2158 }
2159 
2160 static bool hasSMEMByteOffset(const MCSubtargetInfo &ST) {
2161   return isGCN3Encoding(ST) || isGFX10Plus(ST);
2162 }
2163 
2164 static bool hasSMRDSignedImmOffset(const MCSubtargetInfo &ST) {
2165   return isGFX9Plus(ST);
2166 }
2167 
2168 bool isLegalSMRDEncodedUnsignedOffset(const MCSubtargetInfo &ST,
2169                                       int64_t EncodedOffset) {
2170   return hasSMEMByteOffset(ST) ? isUInt<20>(EncodedOffset)
2171                                : isUInt<8>(EncodedOffset);
2172 }
2173 
2174 bool isLegalSMRDEncodedSignedOffset(const MCSubtargetInfo &ST,
2175                                     int64_t EncodedOffset,
2176                                     bool IsBuffer) {
2177   return !IsBuffer &&
2178          hasSMRDSignedImmOffset(ST) &&
2179          isInt<21>(EncodedOffset);
2180 }
2181 
2182 static bool isDwordAligned(uint64_t ByteOffset) {
2183   return (ByteOffset & 3) == 0;
2184 }
2185 
2186 uint64_t convertSMRDOffsetUnits(const MCSubtargetInfo &ST,
2187                                 uint64_t ByteOffset) {
2188   if (hasSMEMByteOffset(ST))
2189     return ByteOffset;
2190 
2191   assert(isDwordAligned(ByteOffset));
2192   return ByteOffset >> 2;
2193 }
2194 
2195 Optional<int64_t> getSMRDEncodedOffset(const MCSubtargetInfo &ST,
2196                                        int64_t ByteOffset, bool IsBuffer) {
2197   // The signed version is always a byte offset.
2198   if (!IsBuffer && hasSMRDSignedImmOffset(ST)) {
2199     assert(hasSMEMByteOffset(ST));
2200     return isInt<20>(ByteOffset) ? Optional<int64_t>(ByteOffset) : None;
2201   }
2202 
2203   if (!isDwordAligned(ByteOffset) && !hasSMEMByteOffset(ST))
2204     return None;
2205 
2206   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2207   return isLegalSMRDEncodedUnsignedOffset(ST, EncodedOffset)
2208              ? Optional<int64_t>(EncodedOffset)
2209              : None;
2210 }
2211 
2212 Optional<int64_t> getSMRDEncodedLiteralOffset32(const MCSubtargetInfo &ST,
2213                                                 int64_t ByteOffset) {
2214   if (!isCI(ST) || !isDwordAligned(ByteOffset))
2215     return None;
2216 
2217   int64_t EncodedOffset = convertSMRDOffsetUnits(ST, ByteOffset);
2218   return isUInt<32>(EncodedOffset) ? Optional<int64_t>(EncodedOffset) : None;
2219 }
2220 
2221 unsigned getNumFlatOffsetBits(const MCSubtargetInfo &ST, bool Signed) {
2222   // Address offset is 12-bit signed for GFX10, 13-bit for GFX9 and GFX11+.
2223   if (AMDGPU::isGFX10(ST))
2224     return Signed ? 12 : 11;
2225 
2226   return Signed ? 13 : 12;
2227 }
2228 
2229 // Given Imm, split it into the values to put into the SOffset and ImmOffset
2230 // fields in an MUBUF instruction. Return false if it is not possible (due to a
2231 // hardware bug needing a workaround).
2232 //
2233 // The required alignment ensures that individual address components remain
2234 // aligned if they are aligned to begin with. It also ensures that additional
2235 // offsets within the given alignment can be added to the resulting ImmOffset.
2236 bool splitMUBUFOffset(uint32_t Imm, uint32_t &SOffset, uint32_t &ImmOffset,
2237                       const GCNSubtarget *Subtarget, Align Alignment) {
2238   const uint32_t MaxImm = alignDown(4095, Alignment.value());
2239   uint32_t Overflow = 0;
2240 
2241   if (Imm > MaxImm) {
2242     if (Imm <= MaxImm + 64) {
2243       // Use an SOffset inline constant for 4..64
2244       Overflow = Imm - MaxImm;
2245       Imm = MaxImm;
2246     } else {
2247       // Try to keep the same value in SOffset for adjacent loads, so that
2248       // the corresponding register contents can be re-used.
2249       //
2250       // Load values with all low-bits (except for alignment bits) set into
2251       // SOffset, so that a larger range of values can be covered using
2252       // s_movk_i32.
2253       //
2254       // Atomic operations fail to work correctly when individual address
2255       // components are unaligned, even if their sum is aligned.
2256       uint32_t High = (Imm + Alignment.value()) & ~4095;
2257       uint32_t Low = (Imm + Alignment.value()) & 4095;
2258       Imm = Low;
2259       Overflow = High - Alignment.value();
2260     }
2261   }
2262 
2263   // There is a hardware bug in SI and CI which prevents address clamping in
2264   // MUBUF instructions from working correctly with SOffsets. The immediate
2265   // offset is unaffected.
2266   if (Overflow > 0 &&
2267       Subtarget->getGeneration() <= AMDGPUSubtarget::SEA_ISLANDS)
2268     return false;
2269 
2270   ImmOffset = Imm;
2271   SOffset = Overflow;
2272   return true;
2273 }
2274 
2275 SIModeRegisterDefaults::SIModeRegisterDefaults(const Function &F) {
2276   *this = getDefaultForCallingConv(F.getCallingConv());
2277 
2278   StringRef IEEEAttr = F.getFnAttribute("amdgpu-ieee").getValueAsString();
2279   if (!IEEEAttr.empty())
2280     IEEE = IEEEAttr == "true";
2281 
2282   StringRef DX10ClampAttr
2283     = F.getFnAttribute("amdgpu-dx10-clamp").getValueAsString();
2284   if (!DX10ClampAttr.empty())
2285     DX10Clamp = DX10ClampAttr == "true";
2286 
2287   StringRef DenormF32Attr = F.getFnAttribute("denormal-fp-math-f32").getValueAsString();
2288   if (!DenormF32Attr.empty()) {
2289     DenormalMode DenormMode = parseDenormalFPAttribute(DenormF32Attr);
2290     FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
2291     FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
2292   }
2293 
2294   StringRef DenormAttr = F.getFnAttribute("denormal-fp-math").getValueAsString();
2295   if (!DenormAttr.empty()) {
2296     DenormalMode DenormMode = parseDenormalFPAttribute(DenormAttr);
2297 
2298     if (DenormF32Attr.empty()) {
2299       FP32InputDenormals = DenormMode.Input == DenormalMode::IEEE;
2300       FP32OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
2301     }
2302 
2303     FP64FP16InputDenormals = DenormMode.Input == DenormalMode::IEEE;
2304     FP64FP16OutputDenormals = DenormMode.Output == DenormalMode::IEEE;
2305   }
2306 }
2307 
2308 namespace {
2309 
2310 struct SourceOfDivergence {
2311   unsigned Intr;
2312 };
2313 const SourceOfDivergence *lookupSourceOfDivergence(unsigned Intr);
2314 
2315 #define GET_SourcesOfDivergence_IMPL
2316 #define GET_Gfx9BufferFormat_IMPL
2317 #define GET_Gfx10BufferFormat_IMPL
2318 #define GET_Gfx11PlusBufferFormat_IMPL
2319 #include "AMDGPUGenSearchableTables.inc"
2320 
2321 } // end anonymous namespace
2322 
2323 bool isIntrinsicSourceOfDivergence(unsigned IntrID) {
2324   return lookupSourceOfDivergence(IntrID);
2325 }
2326 
2327 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t BitsPerComp,
2328                                                   uint8_t NumComponents,
2329                                                   uint8_t NumFormat,
2330                                                   const MCSubtargetInfo &STI) {
2331   return isGFX11Plus(STI)
2332              ? getGfx11PlusBufferFormatInfo(BitsPerComp, NumComponents,
2333                                             NumFormat)
2334              : isGFX10(STI) ? getGfx10BufferFormatInfo(BitsPerComp,
2335                                                        NumComponents, NumFormat)
2336                             : getGfx9BufferFormatInfo(BitsPerComp,
2337                                                       NumComponents, NumFormat);
2338 }
2339 
2340 const GcnBufferFormatInfo *getGcnBufferFormatInfo(uint8_t Format,
2341                                                   const MCSubtargetInfo &STI) {
2342   return isGFX11Plus(STI) ? getGfx11PlusBufferFormatInfo(Format)
2343                           : isGFX10(STI) ? getGfx10BufferFormatInfo(Format)
2344                                          : getGfx9BufferFormatInfo(Format);
2345 }
2346 
2347 } // namespace AMDGPU
2348 
2349 raw_ostream &operator<<(raw_ostream &OS,
2350                         const AMDGPU::IsaInfo::TargetIDSetting S) {
2351   switch (S) {
2352   case (AMDGPU::IsaInfo::TargetIDSetting::Unsupported):
2353     OS << "Unsupported";
2354     break;
2355   case (AMDGPU::IsaInfo::TargetIDSetting::Any):
2356     OS << "Any";
2357     break;
2358   case (AMDGPU::IsaInfo::TargetIDSetting::Off):
2359     OS << "Off";
2360     break;
2361   case (AMDGPU::IsaInfo::TargetIDSetting::On):
2362     OS << "On";
2363     break;
2364   }
2365   return OS;
2366 }
2367 
2368 } // namespace llvm
2369