1 //===-- AArch64Subtarget.cpp - AArch64 Subtarget Information ----*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the AArch64 specific subclass of TargetSubtarget. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AArch64Subtarget.h" 15 16 #include "AArch64.h" 17 #include "AArch64InstrInfo.h" 18 #include "AArch64PBQPRegAlloc.h" 19 #include "AArch64TargetMachine.h" 20 21 #ifdef LLVM_BUILD_GLOBAL_ISEL 22 #include "AArch64CallLowering.h" 23 #include "AArch64LegalizerInfo.h" 24 #include "AArch64RegisterBankInfo.h" 25 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" 26 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 27 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 28 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 29 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" 30 #endif 31 #include "llvm/CodeGen/MachineScheduler.h" 32 #include "llvm/IR/GlobalValue.h" 33 #include "llvm/Support/TargetRegistry.h" 34 35 using namespace llvm; 36 37 #define DEBUG_TYPE "aarch64-subtarget" 38 39 #define GET_SUBTARGETINFO_CTOR 40 #define GET_SUBTARGETINFO_TARGET_DESC 41 #include "AArch64GenSubtargetInfo.inc" 42 43 static cl::opt<bool> 44 EnableEarlyIfConvert("aarch64-early-ifcvt", cl::desc("Enable the early if " 45 "converter pass"), cl::init(true), cl::Hidden); 46 47 // If OS supports TBI, use this flag to enable it. 48 static cl::opt<bool> 49 UseAddressTopByteIgnored("aarch64-use-tbi", cl::desc("Assume that top byte of " 50 "an address is ignored"), cl::init(false), cl::Hidden); 51 52 static cl::opt<bool> 53 UseNonLazyBind("aarch64-enable-nonlazybind", 54 cl::desc("Call nonlazybind functions via direct GOT load"), 55 cl::init(false), cl::Hidden); 56 57 AArch64Subtarget & 58 AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, 59 StringRef CPUString) { 60 // Determine default and user-specified characteristics 61 62 if (CPUString.empty()) 63 CPUString = "generic"; 64 65 ParseSubtargetFeatures(CPUString, FS); 66 initializeProperties(); 67 68 return *this; 69 } 70 71 void AArch64Subtarget::initializeProperties() { 72 // Initialize CPU specific properties. We should add a tablegen feature for 73 // this in the future so we can specify it together with the subtarget 74 // features. 75 switch (ARMProcFamily) { 76 case Cyclone: 77 CacheLineSize = 64; 78 PrefetchDistance = 280; 79 MinPrefetchStride = 2048; 80 MaxPrefetchIterationsAhead = 3; 81 break; 82 case CortexA57: 83 MaxInterleaveFactor = 4; 84 break; 85 case ExynosM1: 86 MaxInterleaveFactor = 4; 87 MaxJumpTableSize = 8; 88 PrefFunctionAlignment = 4; 89 PrefLoopAlignment = 3; 90 break; 91 case Falkor: 92 MaxInterleaveFactor = 4; 93 // FIXME: remove this to enable 64-bit SLP if performance looks good. 94 MinVectorRegisterBitWidth = 128; 95 CacheLineSize = 128; 96 PrefetchDistance = 820; 97 MinPrefetchStride = 2048; 98 MaxPrefetchIterationsAhead = 8; 99 break; 100 case Kryo: 101 MaxInterleaveFactor = 4; 102 VectorInsertExtractBaseCost = 2; 103 CacheLineSize = 128; 104 PrefetchDistance = 740; 105 MinPrefetchStride = 1024; 106 MaxPrefetchIterationsAhead = 11; 107 // FIXME: remove this to enable 64-bit SLP if performance looks good. 108 MinVectorRegisterBitWidth = 128; 109 break; 110 case ThunderX2T99: 111 CacheLineSize = 64; 112 PrefFunctionAlignment = 3; 113 PrefLoopAlignment = 2; 114 MaxInterleaveFactor = 4; 115 PrefetchDistance = 128; 116 MinPrefetchStride = 1024; 117 MaxPrefetchIterationsAhead = 4; 118 // FIXME: remove this to enable 64-bit SLP if performance looks good. 119 MinVectorRegisterBitWidth = 128; 120 break; 121 case ThunderX: 122 case ThunderXT88: 123 case ThunderXT81: 124 case ThunderXT83: 125 CacheLineSize = 128; 126 PrefFunctionAlignment = 3; 127 PrefLoopAlignment = 2; 128 // FIXME: remove this to enable 64-bit SLP if performance looks good. 129 MinVectorRegisterBitWidth = 128; 130 break; 131 case CortexA35: break; 132 case CortexA53: break; 133 case CortexA72: break; 134 case CortexA73: break; 135 case Others: break; 136 } 137 } 138 139 #ifdef LLVM_BUILD_GLOBAL_ISEL 140 namespace { 141 142 struct AArch64GISelActualAccessor : public GISelAccessor { 143 std::unique_ptr<CallLowering> CallLoweringInfo; 144 std::unique_ptr<InstructionSelector> InstSelector; 145 std::unique_ptr<LegalizerInfo> Legalizer; 146 std::unique_ptr<RegisterBankInfo> RegBankInfo; 147 148 const CallLowering *getCallLowering() const override { 149 return CallLoweringInfo.get(); 150 } 151 152 const InstructionSelector *getInstructionSelector() const override { 153 return InstSelector.get(); 154 } 155 156 const LegalizerInfo *getLegalizerInfo() const override { 157 return Legalizer.get(); 158 } 159 160 const RegisterBankInfo *getRegBankInfo() const override { 161 return RegBankInfo.get(); 162 } 163 }; 164 165 } // end anonymous namespace 166 #endif 167 168 AArch64Subtarget::AArch64Subtarget(const Triple &TT, const std::string &CPU, 169 const std::string &FS, 170 const TargetMachine &TM, bool LittleEndian) 171 : AArch64GenSubtargetInfo(TT, CPU, FS), ReserveX18(TT.isOSDarwin()), 172 IsLittle(LittleEndian), TargetTriple(TT), FrameLowering(), 173 InstrInfo(initializeSubtargetDependencies(FS, CPU)), TSInfo(), 174 TLInfo(TM, *this), GISel() { 175 #ifndef LLVM_BUILD_GLOBAL_ISEL 176 GISelAccessor *AArch64GISel = new GISelAccessor(); 177 #else 178 AArch64GISelActualAccessor *AArch64GISel = new AArch64GISelActualAccessor(); 179 AArch64GISel->CallLoweringInfo.reset( 180 new AArch64CallLowering(*getTargetLowering())); 181 AArch64GISel->Legalizer.reset(new AArch64LegalizerInfo()); 182 183 auto *RBI = new AArch64RegisterBankInfo(*getRegisterInfo()); 184 185 // FIXME: At this point, we can't rely on Subtarget having RBI. 186 // It's awkward to mix passing RBI and the Subtarget; should we pass 187 // TII/TRI as well? 188 AArch64GISel->InstSelector.reset(createAArch64InstructionSelector( 189 *static_cast<const AArch64TargetMachine *>(&TM), *this, *RBI)); 190 191 AArch64GISel->RegBankInfo.reset(RBI); 192 #endif 193 setGISelAccessor(*AArch64GISel); 194 } 195 196 const CallLowering *AArch64Subtarget::getCallLowering() const { 197 assert(GISel && "Access to GlobalISel APIs not set"); 198 return GISel->getCallLowering(); 199 } 200 201 const InstructionSelector *AArch64Subtarget::getInstructionSelector() const { 202 assert(GISel && "Access to GlobalISel APIs not set"); 203 return GISel->getInstructionSelector(); 204 } 205 206 const LegalizerInfo *AArch64Subtarget::getLegalizerInfo() const { 207 assert(GISel && "Access to GlobalISel APIs not set"); 208 return GISel->getLegalizerInfo(); 209 } 210 211 const RegisterBankInfo *AArch64Subtarget::getRegBankInfo() const { 212 assert(GISel && "Access to GlobalISel APIs not set"); 213 return GISel->getRegBankInfo(); 214 } 215 216 /// Find the target operand flags that describe how a global value should be 217 /// referenced for the current subtarget. 218 unsigned char 219 AArch64Subtarget::ClassifyGlobalReference(const GlobalValue *GV, 220 const TargetMachine &TM) const { 221 // MachO large model always goes via a GOT, simply to get a single 8-byte 222 // absolute relocation on all global addresses. 223 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO()) 224 return AArch64II::MO_GOT; 225 226 if (!TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 227 return AArch64II::MO_GOT; 228 229 // The small code model's direct accesses use ADRP, which cannot 230 // necessarily produce the value 0 (if the code is above 4GB). 231 if (useSmallAddressing() && GV->hasExternalWeakLinkage()) 232 return AArch64II::MO_GOT; 233 234 return AArch64II::MO_NO_FLAG; 235 } 236 237 unsigned char AArch64Subtarget::classifyGlobalFunctionReference( 238 const GlobalValue *GV, const TargetMachine &TM) const { 239 // MachO large model always goes via a GOT, because we don't have the 240 // relocations available to do anything else.. 241 if (TM.getCodeModel() == CodeModel::Large && isTargetMachO() && 242 !GV->hasInternalLinkage()) 243 return AArch64II::MO_GOT; 244 245 // NonLazyBind goes via GOT unless we know it's available locally. 246 auto *F = dyn_cast<Function>(GV); 247 if (UseNonLazyBind && F && F->hasFnAttribute(Attribute::NonLazyBind) && 248 !TM.shouldAssumeDSOLocal(*GV->getParent(), GV)) 249 return AArch64II::MO_GOT; 250 251 return AArch64II::MO_NO_FLAG; 252 } 253 254 /// This function returns the name of a function which has an interface 255 /// like the non-standard bzero function, if such a function exists on 256 /// the current subtarget and it is considered prefereable over 257 /// memset with zero passed as the second argument. Otherwise it 258 /// returns null. 259 const char *AArch64Subtarget::getBZeroEntry() const { 260 // Prefer bzero on Darwin only. 261 if(isTargetDarwin()) 262 return "bzero"; 263 264 return nullptr; 265 } 266 267 void AArch64Subtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, 268 unsigned NumRegionInstrs) const { 269 // LNT run (at least on Cyclone) showed reasonably significant gains for 270 // bi-directional scheduling. 253.perlbmk. 271 Policy.OnlyTopDown = false; 272 Policy.OnlyBottomUp = false; 273 // Enabling or Disabling the latency heuristic is a close call: It seems to 274 // help nearly no benchmark on out-of-order architectures, on the other hand 275 // it regresses register pressure on a few benchmarking. 276 Policy.DisableLatencyHeuristic = DisableLatencySchedHeuristic; 277 } 278 279 bool AArch64Subtarget::enableEarlyIfConversion() const { 280 return EnableEarlyIfConvert; 281 } 282 283 bool AArch64Subtarget::supportsAddressTopByteIgnored() const { 284 if (!UseAddressTopByteIgnored) 285 return false; 286 287 if (TargetTriple.isiOS()) { 288 unsigned Major, Minor, Micro; 289 TargetTriple.getiOSVersion(Major, Minor, Micro); 290 return Major >= 8; 291 } 292 293 return false; 294 } 295 296 std::unique_ptr<PBQPRAConstraint> 297 AArch64Subtarget::getCustomPBQPConstraints() const { 298 return balanceFPOps() ? llvm::make_unique<A57ChainingConstraint>() : nullptr; 299 } 300