1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #define GET_INSTRINFO_NAMED_OPS
27 #define GET_INSTRMAP_INFO
28 #include "AMDGPUGenInstrInfo.inc"
29 
30 // Pin the vtable to this file.
31 void AMDGPUInstrInfo::anchor() {}
32 
33 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &st)
34     : AMDGPUGenInstrInfo(-1, -1), ST(st) {}
35 
36 const AMDGPURegisterInfo &AMDGPUInstrInfo::getRegisterInfo() const {
37   return RI;
38 }
39 
40 bool AMDGPUInstrInfo::enableClusterLoads() const {
41   return true;
42 }
43 
44 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
45 // the first 16 loads will be interleaved with the stores, and the next 16 will
46 // be clustered as expected. It should really split into 2 16 store batches.
47 //
48 // Loads are clustered until this returns false, rather than trying to schedule
49 // groups of stores. This also means we have to deal with saying different
50 // address space loads should be clustered, and ones which might cause bank
51 // conflicts.
52 //
53 // This might be deprecated so it might not be worth that much effort to fix.
54 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
55                                               int64_t Offset0, int64_t Offset1,
56                                               unsigned NumLoads) const {
57   assert(Offset1 > Offset0 &&
58          "Second offset should be larger than first offset!");
59   // If we have less than 16 loads in a row, and the offsets are within 64
60   // bytes, then schedule together.
61 
62   // A cacheline is 64 bytes (for global memory).
63   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
64 }
65 
66 int AMDGPUInstrInfo::getIndirectIndexBegin(const MachineFunction &MF) const {
67   const MachineRegisterInfo &MRI = MF.getRegInfo();
68   const MachineFrameInfo *MFI = MF.getFrameInfo();
69   int Offset = -1;
70 
71   if (MFI->getNumObjects() == 0) {
72     return -1;
73   }
74 
75   if (MRI.livein_empty()) {
76     return 0;
77   }
78 
79   const TargetRegisterClass *IndirectRC = getIndirectAddrRegClass();
80   for (MachineRegisterInfo::livein_iterator LI = MRI.livein_begin(),
81                                             LE = MRI.livein_end();
82                                             LI != LE; ++LI) {
83     unsigned Reg = LI->first;
84     if (TargetRegisterInfo::isVirtualRegister(Reg) ||
85         !IndirectRC->contains(Reg))
86       continue;
87 
88     unsigned RegIndex;
89     unsigned RegEnd;
90     for (RegIndex = 0, RegEnd = IndirectRC->getNumRegs(); RegIndex != RegEnd;
91                                                           ++RegIndex) {
92       if (IndirectRC->getRegister(RegIndex) == Reg)
93         break;
94     }
95     Offset = std::max(Offset, (int)RegIndex);
96   }
97 
98   return Offset + 1;
99 }
100 
101 int AMDGPUInstrInfo::getIndirectIndexEnd(const MachineFunction &MF) const {
102   int Offset = 0;
103   const MachineFrameInfo *MFI = MF.getFrameInfo();
104 
105   // Variable sized objects are not supported
106   assert(!MFI->hasVarSizedObjects());
107 
108   if (MFI->getNumObjects() == 0) {
109     return -1;
110   }
111 
112   unsigned IgnoredFrameReg;
113   Offset = MF.getSubtarget().getFrameLowering()->getFrameIndexReference(
114       MF, -1, IgnoredFrameReg);
115 
116   return getIndirectIndexBegin(MF) + Offset;
117 }
118 
119 int AMDGPUInstrInfo::getMaskedMIMGOp(uint16_t Opcode, unsigned Channels) const {
120   switch (Channels) {
121   default: return Opcode;
122   case 1: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_1);
123   case 2: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_2);
124   case 3: return AMDGPU::getMaskedMIMGOp(Opcode, AMDGPU::Channels_3);
125   }
126 }
127 
128 // Wrapper for Tablegen'd function.  enum Subtarget is not defined in any
129 // header files, so we need to wrap it in a function that takes unsigned
130 // instead.
131 namespace llvm {
132 namespace AMDGPU {
133 static int getMCOpcode(uint16_t Opcode, unsigned Gen) {
134   return getMCOpcodeGen(Opcode, (enum Subtarget)Gen);
135 }
136 }
137 }
138 
139 // This must be kept in sync with the SISubtarget class in SIInstrInfo.td
140 enum SISubtarget {
141   SI = 0,
142   VI = 1
143 };
144 
145 static enum SISubtarget AMDGPUSubtargetToSISubtarget(unsigned Gen) {
146   switch (Gen) {
147   default:
148     return SI;
149   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
150     return VI;
151   }
152 }
153 
154 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
155   int MCOp = AMDGPU::getMCOpcode(
156       Opcode, AMDGPUSubtargetToSISubtarget(ST.getGeneration()));
157 
158   // -1 means that Opcode is already a native instruction.
159   if (MCOp == -1)
160     return Opcode;
161 
162   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
163   // no encoding in the given subtarget generation.
164   if (MCOp == (uint16_t)-1)
165     return -1;
166 
167   return MCOp;
168 }
169