1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 namespace llvm {
29 namespace AMDGPU {
30 #define GET_RSRCINTRINSIC_IMPL
31 #include "AMDGPUGenSearchableTables.inc"
32 }
33 }
34 
35 // Pin the vtable to this file.
36 void AMDGPUInstrInfo::anchor() {}
37 
38 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
39   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
40     ST(ST),
41     AMDGPUASI(ST.getAMDGPUAS()) {}
42 
43 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
44 // the first 16 loads will be interleaved with the stores, and the next 16 will
45 // be clustered as expected. It should really split into 2 16 store batches.
46 //
47 // Loads are clustered until this returns false, rather than trying to schedule
48 // groups of stores. This also means we have to deal with saying different
49 // address space loads should be clustered, and ones which might cause bank
50 // conflicts.
51 //
52 // This might be deprecated so it might not be worth that much effort to fix.
53 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
54                                               int64_t Offset0, int64_t Offset1,
55                                               unsigned NumLoads) const {
56   assert(Offset1 > Offset0 &&
57          "Second offset should be larger than first offset!");
58   // If we have less than 16 loads in a row, and the offsets are within 64
59   // bytes, then schedule together.
60 
61   // A cacheline is 64 bytes (for global memory).
62   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
63 }
64 
65 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
66 enum SIEncodingFamily {
67   SI = 0,
68   VI = 1,
69   SDWA = 2,
70   SDWA9 = 3,
71   GFX80 = 4,
72   GFX9 = 5
73 };
74 
75 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
76   switch (ST.getGeneration()) {
77   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
78   case AMDGPUSubtarget::SEA_ISLANDS:
79     return SIEncodingFamily::SI;
80   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
81   case AMDGPUSubtarget::GFX9:
82     return SIEncodingFamily::VI;
83 
84   // FIXME: This should never be called for r600 GPUs.
85   case AMDGPUSubtarget::R600:
86   case AMDGPUSubtarget::R700:
87   case AMDGPUSubtarget::EVERGREEN:
88   case AMDGPUSubtarget::NORTHERN_ISLANDS:
89     return SIEncodingFamily::SI;
90   }
91 
92   llvm_unreachable("Unknown subtarget generation!");
93 }
94 
95 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
96   SIEncodingFamily Gen = subtargetEncodingFamily(ST);
97 
98   if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
99     ST.getGeneration() >= AMDGPUSubtarget::GFX9)
100     Gen = SIEncodingFamily::GFX9;
101 
102   if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
103     Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
104                                                       : SIEncodingFamily::SDWA;
105   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
106   // subtarget has UnpackedD16VMem feature.
107   // TODO: remove this when we discard GFX80 encoding.
108   if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
109                               && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
110     Gen = SIEncodingFamily::GFX80;
111 
112   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
113 
114   // -1 means that Opcode is already a native instruction.
115   if (MCOp == -1)
116     return Opcode;
117 
118   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
119   // no encoding in the given subtarget generation.
120   if (MCOp == (uint16_t)-1)
121     return -1;
122 
123   return MCOp;
124 }
125 
126 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
127 bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
128   const Value *Ptr = MMO->getValue();
129   // UndefValue means this is a load of a kernel input.  These are uniform.
130   // Sometimes LDS instructions have constant pointers.
131   // If Ptr is null, then that means this mem operand contains a
132   // PseudoSourceValue like GOT.
133   if (!Ptr || isa<UndefValue>(Ptr) ||
134       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
135     return true;
136 
137   if (MMO->getAddrSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT)
138     return true;
139 
140   if (const Argument *Arg = dyn_cast<Argument>(Ptr))
141     return AMDGPU::isArgPassedInSGPR(Arg);
142 
143   const Instruction *I = dyn_cast<Instruction>(Ptr);
144   return I && I->getMetadata("amdgpu.uniform");
145 }
146