1 //===-- AMDGPUInstrInfo.cpp - Base class for AMD GPU InstrInfo ------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Implementation of the TargetInstrInfo class that is common to all
12 /// AMD GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUInstrInfo.h"
17 #include "AMDGPURegisterInfo.h"
18 #include "AMDGPUTargetMachine.h"
19 #include "llvm/CodeGen/MachineFrameInfo.h"
20 #include "llvm/CodeGen/MachineInstrBuilder.h"
21 #include "llvm/CodeGen/MachineRegisterInfo.h"
22 
23 using namespace llvm;
24 
25 #define GET_INSTRINFO_CTOR_DTOR
26 #include "AMDGPUGenInstrInfo.inc"
27 
28 // Pin the vtable to this file.
29 void AMDGPUInstrInfo::anchor() {}
30 
31 AMDGPUInstrInfo::AMDGPUInstrInfo(const AMDGPUSubtarget &ST)
32   : AMDGPUGenInstrInfo(AMDGPU::ADJCALLSTACKUP, AMDGPU::ADJCALLSTACKDOWN),
33     ST(ST),
34     AMDGPUASI(ST.getAMDGPUAS()) {}
35 
36 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
37 // the first 16 loads will be interleaved with the stores, and the next 16 will
38 // be clustered as expected. It should really split into 2 16 store batches.
39 //
40 // Loads are clustered until this returns false, rather than trying to schedule
41 // groups of stores. This also means we have to deal with saying different
42 // address space loads should be clustered, and ones which might cause bank
43 // conflicts.
44 //
45 // This might be deprecated so it might not be worth that much effort to fix.
46 bool AMDGPUInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
47                                               int64_t Offset0, int64_t Offset1,
48                                               unsigned NumLoads) const {
49   assert(Offset1 > Offset0 &&
50          "Second offset should be larger than first offset!");
51   // If we have less than 16 loads in a row, and the offsets are within 64
52   // bytes, then schedule together.
53 
54   // A cacheline is 64 bytes (for global memory).
55   return (NumLoads <= 16 && (Offset1 - Offset0) < 64);
56 }
57 
58 // This must be kept in sync with the SIEncodingFamily class in SIInstrInfo.td
59 enum SIEncodingFamily {
60   SI = 0,
61   VI = 1,
62   SDWA = 2,
63   SDWA9 = 3,
64   GFX80 = 4,
65   GFX9 = 5
66 };
67 
68 static SIEncodingFamily subtargetEncodingFamily(const AMDGPUSubtarget &ST) {
69   switch (ST.getGeneration()) {
70   case AMDGPUSubtarget::SOUTHERN_ISLANDS:
71   case AMDGPUSubtarget::SEA_ISLANDS:
72     return SIEncodingFamily::SI;
73   case AMDGPUSubtarget::VOLCANIC_ISLANDS:
74   case AMDGPUSubtarget::GFX9:
75     return SIEncodingFamily::VI;
76 
77   // FIXME: This should never be called for r600 GPUs.
78   case AMDGPUSubtarget::R600:
79   case AMDGPUSubtarget::R700:
80   case AMDGPUSubtarget::EVERGREEN:
81   case AMDGPUSubtarget::NORTHERN_ISLANDS:
82     return SIEncodingFamily::SI;
83   }
84 
85   llvm_unreachable("Unknown subtarget generation!");
86 }
87 
88 int AMDGPUInstrInfo::pseudoToMCOpcode(int Opcode) const {
89   SIEncodingFamily Gen = subtargetEncodingFamily(ST);
90 
91   if ((get(Opcode).TSFlags & SIInstrFlags::renamedInGFX9) != 0 &&
92     ST.getGeneration() >= AMDGPUSubtarget::GFX9)
93     Gen = SIEncodingFamily::GFX9;
94 
95   if (get(Opcode).TSFlags & SIInstrFlags::SDWA)
96     Gen = ST.getGeneration() == AMDGPUSubtarget::GFX9 ? SIEncodingFamily::SDWA9
97                                                       : SIEncodingFamily::SDWA;
98   // Adjust the encoding family to GFX80 for D16 buffer instructions when the
99   // subtarget has UnpackedD16VMem feature.
100   // TODO: remove this when we discard GFX80 encoding.
101   if (ST.hasUnpackedD16VMem() && (get(Opcode).TSFlags & SIInstrFlags::D16)
102                               && !(get(Opcode).TSFlags & SIInstrFlags::MIMG))
103     Gen = SIEncodingFamily::GFX80;
104 
105   int MCOp = AMDGPU::getMCOpcode(Opcode, Gen);
106 
107   // -1 means that Opcode is already a native instruction.
108   if (MCOp == -1)
109     return Opcode;
110 
111   // (uint16_t)-1 means that Opcode is a pseudo instruction that has
112   // no encoding in the given subtarget generation.
113   if (MCOp == (uint16_t)-1)
114     return -1;
115 
116   return MCOp;
117 }
118 
119 // TODO: Should largely merge with AMDGPUTTIImpl::isSourceOfDivergence.
120 bool AMDGPUInstrInfo::isUniformMMO(const MachineMemOperand *MMO) {
121   const Value *Ptr = MMO->getValue();
122   // UndefValue means this is a load of a kernel input.  These are uniform.
123   // Sometimes LDS instructions have constant pointers.
124   // If Ptr is null, then that means this mem operand contains a
125   // PseudoSourceValue like GOT.
126   if (!Ptr || isa<UndefValue>(Ptr) ||
127       isa<Constant>(Ptr) || isa<GlobalValue>(Ptr))
128     return true;
129 
130   if (const Argument *Arg = dyn_cast<Argument>(Ptr))
131     return AMDGPU::isArgPassedInSGPR(Arg);
132 
133   const Instruction *I = dyn_cast<Instruction>(Ptr);
134   return I && I->getMetadata("amdgpu.uniform");
135 }
136