1 //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64.h" 14 #include "AArch64TargetMachine.h" 15 #include "AArch64TargetObjectFile.h" 16 #include "AArch64TargetTransformInfo.h" 17 #ifdef LLVM_BUILD_GLOBAL_ISEL 18 # include "llvm/CodeGen/GlobalISel/IRTranslator.h" 19 #endif 20 #include "llvm/CodeGen/Passes.h" 21 #include "llvm/CodeGen/RegAllocRegistry.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/LegacyPassManager.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/TargetRegistry.h" 26 #include "llvm/Target/TargetOptions.h" 27 #include "llvm/Transforms/Scalar.h" 28 using namespace llvm; 29 30 static cl::opt<bool> 31 EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"), 32 cl::init(true), cl::Hidden); 33 34 static cl::opt<bool> EnableMCR("aarch64-mcr", 35 cl::desc("Enable the machine combiner pass"), 36 cl::init(true), cl::Hidden); 37 38 static cl::opt<bool> 39 EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"), 40 cl::init(true), cl::Hidden); 41 42 static cl::opt<bool> 43 EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar" 44 " integer instructions"), cl::init(false), cl::Hidden); 45 46 static cl::opt<bool> 47 EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote " 48 "constant pass"), cl::init(true), cl::Hidden); 49 50 static cl::opt<bool> 51 EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the" 52 " linker optimization hints (LOH)"), cl::init(true), 53 cl::Hidden); 54 55 static cl::opt<bool> 56 EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden, 57 cl::desc("Enable the pass that removes dead" 58 " definitons and replaces stores to" 59 " them with stores to the zero" 60 " register"), 61 cl::init(true)); 62 63 static cl::opt<bool> 64 EnableRedundantCopyElimination("aarch64-redundant-copy-elim", 65 cl::desc("Enable the redundant copy elimination pass"), 66 cl::init(true), cl::Hidden); 67 68 static cl::opt<bool> 69 EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" 70 " optimization pass"), cl::init(true), cl::Hidden); 71 72 static cl::opt<bool> 73 EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden, 74 cl::desc("Run SimplifyCFG after expanding atomic operations" 75 " to make use of cmpxchg flow-based information"), 76 cl::init(true)); 77 78 static cl::opt<bool> 79 EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, 80 cl::desc("Run early if-conversion"), 81 cl::init(true)); 82 83 static cl::opt<bool> 84 EnableCondOpt("aarch64-condopt", 85 cl::desc("Enable the condition optimizer pass"), 86 cl::init(true), cl::Hidden); 87 88 static cl::opt<bool> 89 EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden, 90 cl::desc("Work around Cortex-A53 erratum 835769"), 91 cl::init(false)); 92 93 static cl::opt<bool> 94 EnableGEPOpt("aarch64-gep-opt", cl::Hidden, 95 cl::desc("Enable optimizations on complex GEPs"), 96 cl::init(false)); 97 98 // FIXME: Unify control over GlobalMerge. 99 static cl::opt<cl::boolOrDefault> 100 EnableGlobalMerge("aarch64-global-merge", cl::Hidden, 101 cl::desc("Enable the global merge pass")); 102 103 extern "C" void LLVMInitializeAArch64Target() { 104 // Register the target. 105 RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); 106 RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget); 107 RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64Target); 108 } 109 110 //===----------------------------------------------------------------------===// 111 // AArch64 Lowering public interface. 112 //===----------------------------------------------------------------------===// 113 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 114 if (TT.isOSBinFormatMachO()) 115 return make_unique<AArch64_MachoTargetObjectFile>(); 116 117 return make_unique<AArch64_ELFTargetObjectFile>(); 118 } 119 120 // Helper function to build a DataLayout string 121 static std::string computeDataLayout(const Triple &TT, bool LittleEndian) { 122 if (TT.isOSBinFormatMachO()) 123 return "e-m:o-i64:64-i128:128-n32:64-S128"; 124 if (LittleEndian) 125 return "e-m:e-i64:64-i128:128-n32:64-S128"; 126 return "E-m:e-i64:64-i128:128-n32:64-S128"; 127 } 128 129 /// TargetMachine ctor - Create an AArch64 architecture model. 130 /// 131 AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, 132 StringRef CPU, StringRef FS, 133 const TargetOptions &Options, 134 Reloc::Model RM, CodeModel::Model CM, 135 CodeGenOpt::Level OL, 136 bool LittleEndian) 137 // This nested ternary is horrible, but DL needs to be properly 138 // initialized before TLInfo is constructed. 139 : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS, 140 Options, RM, CM, OL), 141 TLOF(createTLOF(getTargetTriple())), 142 isLittle(LittleEndian) { 143 initAsmInfo(); 144 } 145 146 AArch64TargetMachine::~AArch64TargetMachine() {} 147 148 const AArch64Subtarget * 149 AArch64TargetMachine::getSubtargetImpl(const Function &F) const { 150 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 151 Attribute FSAttr = F.getFnAttribute("target-features"); 152 153 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 154 ? CPUAttr.getValueAsString().str() 155 : TargetCPU; 156 std::string FS = !FSAttr.hasAttribute(Attribute::None) 157 ? FSAttr.getValueAsString().str() 158 : TargetFS; 159 160 auto &I = SubtargetMap[CPU + FS]; 161 if (!I) { 162 // This needs to be done before we create a new subtarget since any 163 // creation will depend on the TM and the code generation flags on the 164 // function that reside in TargetOptions. 165 resetTargetOptions(F); 166 I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, 167 isLittle); 168 } 169 return I.get(); 170 } 171 172 void AArch64leTargetMachine::anchor() { } 173 174 AArch64leTargetMachine::AArch64leTargetMachine( 175 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 176 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 177 CodeGenOpt::Level OL) 178 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 179 180 void AArch64beTargetMachine::anchor() { } 181 182 AArch64beTargetMachine::AArch64beTargetMachine( 183 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 184 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 185 CodeGenOpt::Level OL) 186 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 187 188 namespace { 189 /// AArch64 Code Generator Pass Configuration Options. 190 class AArch64PassConfig : public TargetPassConfig { 191 public: 192 AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) 193 : TargetPassConfig(TM, PM) { 194 if (TM->getOptLevel() != CodeGenOpt::None) 195 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); 196 } 197 198 AArch64TargetMachine &getAArch64TargetMachine() const { 199 return getTM<AArch64TargetMachine>(); 200 } 201 202 void addIRPasses() override; 203 bool addPreISel() override; 204 bool addInstSelector() override; 205 #ifdef LLVM_BUILD_GLOBAL_ISEL 206 bool addIRTranslator() override; 207 #endif 208 bool addILPOpts() override; 209 void addPreRegAlloc() override; 210 void addPostRegAlloc() override; 211 void addPreSched2() override; 212 void addPreEmitPass() override; 213 }; 214 } // namespace 215 216 TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { 217 return TargetIRAnalysis([this](const Function &F) { 218 return TargetTransformInfo(AArch64TTIImpl(this, F)); 219 }); 220 } 221 222 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { 223 return new AArch64PassConfig(this, PM); 224 } 225 226 void AArch64PassConfig::addIRPasses() { 227 // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg 228 // ourselves. 229 addPass(createAtomicExpandPass(TM)); 230 231 // Cmpxchg instructions are often used with a subsequent comparison to 232 // determine whether it succeeded. We can exploit existing control-flow in 233 // ldrex/strex loops to simplify this, but it needs tidying up. 234 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) 235 addPass(createCFGSimplificationPass()); 236 237 TargetPassConfig::addIRPasses(); 238 239 // Match interleaved memory accesses to ldN/stN intrinsics. 240 if (TM->getOptLevel() != CodeGenOpt::None) 241 addPass(createInterleavedAccessPass(TM)); 242 243 if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { 244 // Call SeparateConstOffsetFromGEP pass to extract constants within indices 245 // and lower a GEP with multiple indices to either arithmetic operations or 246 // multiple GEPs with single index. 247 addPass(createSeparateConstOffsetFromGEPPass(TM, true)); 248 // Call EarlyCSE pass to find and remove subexpressions in the lowered 249 // result. 250 addPass(createEarlyCSEPass()); 251 // Do loop invariant code motion in case part of the lowered result is 252 // invariant. 253 addPass(createLICMPass()); 254 } 255 } 256 257 // Pass Pipeline Configuration 258 bool AArch64PassConfig::addPreISel() { 259 // Run promote constant before global merge, so that the promoted constants 260 // get a chance to be merged 261 if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) 262 addPass(createAArch64PromoteConstantPass()); 263 // FIXME: On AArch64, this depends on the type. 264 // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). 265 // and the offset has to be a multiple of the related size in bytes. 266 if ((TM->getOptLevel() != CodeGenOpt::None && 267 EnableGlobalMerge == cl::BOU_UNSET) || 268 EnableGlobalMerge == cl::BOU_TRUE) { 269 bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && 270 (EnableGlobalMerge == cl::BOU_UNSET); 271 addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize)); 272 } 273 274 if (TM->getOptLevel() != CodeGenOpt::None) 275 addPass(createAArch64AddressTypePromotionPass()); 276 277 return false; 278 } 279 280 bool AArch64PassConfig::addInstSelector() { 281 addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); 282 283 // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many 284 // references to _TLS_MODULE_BASE_ as possible. 285 if (TM->getTargetTriple().isOSBinFormatELF() && 286 getOptLevel() != CodeGenOpt::None) 287 addPass(createAArch64CleanupLocalDynamicTLSPass()); 288 289 return false; 290 } 291 292 #ifdef LLVM_BUILD_GLOBAL_ISEL 293 bool AArch64PassConfig::addIRTranslator() { 294 addPass(new IRTranslator()); 295 return false; 296 } 297 #endif 298 299 bool AArch64PassConfig::addILPOpts() { 300 if (EnableCondOpt) 301 addPass(createAArch64ConditionOptimizerPass()); 302 if (EnableCCMP) 303 addPass(createAArch64ConditionalCompares()); 304 if (EnableMCR) 305 addPass(&MachineCombinerID); 306 if (EnableEarlyIfConversion) 307 addPass(&EarlyIfConverterID); 308 if (EnableStPairSuppress) 309 addPass(createAArch64StorePairSuppressPass()); 310 return true; 311 } 312 313 void AArch64PassConfig::addPreRegAlloc() { 314 // Use AdvSIMD scalar instructions whenever profitable. 315 if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) { 316 addPass(createAArch64AdvSIMDScalar()); 317 // The AdvSIMD pass may produce copies that can be rewritten to 318 // be register coaleascer friendly. 319 addPass(&PeepholeOptimizerID); 320 } 321 } 322 323 void AArch64PassConfig::addPostRegAlloc() { 324 // Remove redundant copy instructions. 325 if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) 326 addPass(createAArch64RedundantCopyEliminationPass()); 327 328 // Change dead register definitions to refer to the zero register. 329 if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) 330 addPass(createAArch64DeadRegisterDefinitions()); 331 if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) 332 // Improve performance for some FP/SIMD code for A57. 333 addPass(createAArch64A57FPLoadBalancing()); 334 } 335 336 void AArch64PassConfig::addPreSched2() { 337 // Expand some pseudo instructions to allow proper scheduling. 338 addPass(createAArch64ExpandPseudoPass()); 339 // Use load/store pair instructions when possible. 340 if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt) 341 addPass(createAArch64LoadStoreOptimizationPass()); 342 } 343 344 void AArch64PassConfig::addPreEmitPass() { 345 if (EnableA53Fix835769) 346 addPass(createAArch64A53Fix835769()); 347 // Relax conditional branch instructions if they're otherwise out of 348 // range of their destination. 349 addPass(createAArch64BranchRelaxation()); 350 if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && 351 TM->getTargetTriple().isOSBinFormatMachO()) 352 addPass(createAArch64CollectLOHPass()); 353 } 354