1 //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64.h" 14 #include "AArch64TargetMachine.h" 15 #include "AArch64TargetObjectFile.h" 16 #include "AArch64TargetTransformInfo.h" 17 #ifdef LLVM_BUILD_GLOBAL_ISEL 18 # include "llvm/CodeGen/GlobalISel/IRTranslator.h" 19 #endif 20 #include "llvm/CodeGen/Passes.h" 21 #include "llvm/CodeGen/RegAllocRegistry.h" 22 #include "llvm/IR/Function.h" 23 #include "llvm/IR/LegacyPassManager.h" 24 #include "llvm/InitializePasses.h" 25 #include "llvm/Support/CommandLine.h" 26 #include "llvm/Support/TargetRegistry.h" 27 #include "llvm/Target/TargetOptions.h" 28 #include "llvm/Transforms/Scalar.h" 29 using namespace llvm; 30 31 static cl::opt<bool> 32 EnableCCMP("aarch64-ccmp", cl::desc("Enable the CCMP formation pass"), 33 cl::init(true), cl::Hidden); 34 35 static cl::opt<bool> EnableMCR("aarch64-mcr", 36 cl::desc("Enable the machine combiner pass"), 37 cl::init(true), cl::Hidden); 38 39 static cl::opt<bool> 40 EnableStPairSuppress("aarch64-stp-suppress", cl::desc("Suppress STP for AArch64"), 41 cl::init(true), cl::Hidden); 42 43 static cl::opt<bool> 44 EnableAdvSIMDScalar("aarch64-simd-scalar", cl::desc("Enable use of AdvSIMD scalar" 45 " integer instructions"), cl::init(false), cl::Hidden); 46 47 static cl::opt<bool> 48 EnablePromoteConstant("aarch64-promote-const", cl::desc("Enable the promote " 49 "constant pass"), cl::init(true), cl::Hidden); 50 51 static cl::opt<bool> 52 EnableCollectLOH("aarch64-collect-loh", cl::desc("Enable the pass that emits the" 53 " linker optimization hints (LOH)"), cl::init(true), 54 cl::Hidden); 55 56 static cl::opt<bool> 57 EnableDeadRegisterElimination("aarch64-dead-def-elimination", cl::Hidden, 58 cl::desc("Enable the pass that removes dead" 59 " definitons and replaces stores to" 60 " them with stores to the zero" 61 " register"), 62 cl::init(true)); 63 64 static cl::opt<bool> 65 EnableRedundantCopyElimination("aarch64-redundant-copy-elim", 66 cl::desc("Enable the redundant copy elimination pass"), 67 cl::init(true), cl::Hidden); 68 69 static cl::opt<bool> 70 EnableLoadStoreOpt("aarch64-load-store-opt", cl::desc("Enable the load/store pair" 71 " optimization pass"), cl::init(true), cl::Hidden); 72 73 static cl::opt<bool> 74 EnableAtomicTidy("aarch64-atomic-cfg-tidy", cl::Hidden, 75 cl::desc("Run SimplifyCFG after expanding atomic operations" 76 " to make use of cmpxchg flow-based information"), 77 cl::init(true)); 78 79 static cl::opt<bool> 80 EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, 81 cl::desc("Run early if-conversion"), 82 cl::init(true)); 83 84 static cl::opt<bool> 85 EnableCondOpt("aarch64-condopt", 86 cl::desc("Enable the condition optimizer pass"), 87 cl::init(true), cl::Hidden); 88 89 static cl::opt<bool> 90 EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden, 91 cl::desc("Work around Cortex-A53 erratum 835769"), 92 cl::init(false)); 93 94 static cl::opt<bool> 95 EnableGEPOpt("aarch64-gep-opt", cl::Hidden, 96 cl::desc("Enable optimizations on complex GEPs"), 97 cl::init(false)); 98 99 // FIXME: Unify control over GlobalMerge. 100 static cl::opt<cl::boolOrDefault> 101 EnableGlobalMerge("aarch64-global-merge", cl::Hidden, 102 cl::desc("Enable the global merge pass")); 103 104 extern "C" void LLVMInitializeAArch64Target() { 105 // Register the target. 106 RegisterTargetMachine<AArch64leTargetMachine> X(TheAArch64leTarget); 107 RegisterTargetMachine<AArch64beTargetMachine> Y(TheAArch64beTarget); 108 RegisterTargetMachine<AArch64leTargetMachine> Z(TheARM64Target); 109 initializeGlobalISel(*PassRegistry::getPassRegistry()); 110 } 111 112 //===----------------------------------------------------------------------===// 113 // AArch64 Lowering public interface. 114 //===----------------------------------------------------------------------===// 115 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 116 if (TT.isOSBinFormatMachO()) 117 return make_unique<AArch64_MachoTargetObjectFile>(); 118 119 return make_unique<AArch64_ELFTargetObjectFile>(); 120 } 121 122 // Helper function to build a DataLayout string 123 static std::string computeDataLayout(const Triple &TT, bool LittleEndian) { 124 if (TT.isOSBinFormatMachO()) 125 return "e-m:o-i64:64-i128:128-n32:64-S128"; 126 if (LittleEndian) 127 return "e-m:e-i64:64-i128:128-n32:64-S128"; 128 return "E-m:e-i64:64-i128:128-n32:64-S128"; 129 } 130 131 /// TargetMachine ctor - Create an AArch64 architecture model. 132 /// 133 AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, 134 StringRef CPU, StringRef FS, 135 const TargetOptions &Options, 136 Reloc::Model RM, CodeModel::Model CM, 137 CodeGenOpt::Level OL, 138 bool LittleEndian) 139 // This nested ternary is horrible, but DL needs to be properly 140 // initialized before TLInfo is constructed. 141 : LLVMTargetMachine(T, computeDataLayout(TT, LittleEndian), TT, CPU, FS, 142 Options, RM, CM, OL), 143 TLOF(createTLOF(getTargetTriple())), 144 isLittle(LittleEndian) { 145 initAsmInfo(); 146 } 147 148 AArch64TargetMachine::~AArch64TargetMachine() {} 149 150 const AArch64Subtarget * 151 AArch64TargetMachine::getSubtargetImpl(const Function &F) const { 152 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 153 Attribute FSAttr = F.getFnAttribute("target-features"); 154 155 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 156 ? CPUAttr.getValueAsString().str() 157 : TargetCPU; 158 std::string FS = !FSAttr.hasAttribute(Attribute::None) 159 ? FSAttr.getValueAsString().str() 160 : TargetFS; 161 162 auto &I = SubtargetMap[CPU + FS]; 163 if (!I) { 164 // This needs to be done before we create a new subtarget since any 165 // creation will depend on the TM and the code generation flags on the 166 // function that reside in TargetOptions. 167 resetTargetOptions(F); 168 I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this, 169 isLittle); 170 } 171 return I.get(); 172 } 173 174 void AArch64leTargetMachine::anchor() { } 175 176 AArch64leTargetMachine::AArch64leTargetMachine( 177 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 178 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 179 CodeGenOpt::Level OL) 180 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 181 182 void AArch64beTargetMachine::anchor() { } 183 184 AArch64beTargetMachine::AArch64beTargetMachine( 185 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 186 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 187 CodeGenOpt::Level OL) 188 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 189 190 namespace { 191 /// AArch64 Code Generator Pass Configuration Options. 192 class AArch64PassConfig : public TargetPassConfig { 193 public: 194 AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM) 195 : TargetPassConfig(TM, PM) { 196 if (TM->getOptLevel() != CodeGenOpt::None) 197 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); 198 } 199 200 AArch64TargetMachine &getAArch64TargetMachine() const { 201 return getTM<AArch64TargetMachine>(); 202 } 203 204 void addIRPasses() override; 205 bool addPreISel() override; 206 bool addInstSelector() override; 207 #ifdef LLVM_BUILD_GLOBAL_ISEL 208 bool addIRTranslator() override; 209 #endif 210 bool addILPOpts() override; 211 void addPreRegAlloc() override; 212 void addPostRegAlloc() override; 213 void addPreSched2() override; 214 void addPreEmitPass() override; 215 }; 216 } // namespace 217 218 TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() { 219 return TargetIRAnalysis([this](const Function &F) { 220 return TargetTransformInfo(AArch64TTIImpl(this, F)); 221 }); 222 } 223 224 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { 225 return new AArch64PassConfig(this, PM); 226 } 227 228 void AArch64PassConfig::addIRPasses() { 229 // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg 230 // ourselves. 231 addPass(createAtomicExpandPass(TM)); 232 233 // Cmpxchg instructions are often used with a subsequent comparison to 234 // determine whether it succeeded. We can exploit existing control-flow in 235 // ldrex/strex loops to simplify this, but it needs tidying up. 236 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) 237 addPass(createCFGSimplificationPass()); 238 239 TargetPassConfig::addIRPasses(); 240 241 // Match interleaved memory accesses to ldN/stN intrinsics. 242 if (TM->getOptLevel() != CodeGenOpt::None) 243 addPass(createInterleavedAccessPass(TM)); 244 245 if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { 246 // Call SeparateConstOffsetFromGEP pass to extract constants within indices 247 // and lower a GEP with multiple indices to either arithmetic operations or 248 // multiple GEPs with single index. 249 addPass(createSeparateConstOffsetFromGEPPass(TM, true)); 250 // Call EarlyCSE pass to find and remove subexpressions in the lowered 251 // result. 252 addPass(createEarlyCSEPass()); 253 // Do loop invariant code motion in case part of the lowered result is 254 // invariant. 255 addPass(createLICMPass()); 256 } 257 } 258 259 // Pass Pipeline Configuration 260 bool AArch64PassConfig::addPreISel() { 261 // Run promote constant before global merge, so that the promoted constants 262 // get a chance to be merged 263 if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) 264 addPass(createAArch64PromoteConstantPass()); 265 // FIXME: On AArch64, this depends on the type. 266 // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). 267 // and the offset has to be a multiple of the related size in bytes. 268 if ((TM->getOptLevel() != CodeGenOpt::None && 269 EnableGlobalMerge == cl::BOU_UNSET) || 270 EnableGlobalMerge == cl::BOU_TRUE) { 271 bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && 272 (EnableGlobalMerge == cl::BOU_UNSET); 273 addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize)); 274 } 275 276 if (TM->getOptLevel() != CodeGenOpt::None) 277 addPass(createAArch64AddressTypePromotionPass()); 278 279 return false; 280 } 281 282 bool AArch64PassConfig::addInstSelector() { 283 addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); 284 285 // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many 286 // references to _TLS_MODULE_BASE_ as possible. 287 if (TM->getTargetTriple().isOSBinFormatELF() && 288 getOptLevel() != CodeGenOpt::None) 289 addPass(createAArch64CleanupLocalDynamicTLSPass()); 290 291 return false; 292 } 293 294 #ifdef LLVM_BUILD_GLOBAL_ISEL 295 bool AArch64PassConfig::addIRTranslator() { 296 addPass(new IRTranslator()); 297 return false; 298 } 299 #endif 300 301 bool AArch64PassConfig::addILPOpts() { 302 if (EnableCondOpt) 303 addPass(createAArch64ConditionOptimizerPass()); 304 if (EnableCCMP) 305 addPass(createAArch64ConditionalCompares()); 306 if (EnableMCR) 307 addPass(&MachineCombinerID); 308 if (EnableEarlyIfConversion) 309 addPass(&EarlyIfConverterID); 310 if (EnableStPairSuppress) 311 addPass(createAArch64StorePairSuppressPass()); 312 return true; 313 } 314 315 void AArch64PassConfig::addPreRegAlloc() { 316 // Use AdvSIMD scalar instructions whenever profitable. 317 if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) { 318 addPass(createAArch64AdvSIMDScalar()); 319 // The AdvSIMD pass may produce copies that can be rewritten to 320 // be register coaleascer friendly. 321 addPass(&PeepholeOptimizerID); 322 } 323 } 324 325 void AArch64PassConfig::addPostRegAlloc() { 326 // Remove redundant copy instructions. 327 if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) 328 addPass(createAArch64RedundantCopyEliminationPass()); 329 330 // Change dead register definitions to refer to the zero register. 331 if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) 332 addPass(createAArch64DeadRegisterDefinitions()); 333 if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) 334 // Improve performance for some FP/SIMD code for A57. 335 addPass(createAArch64A57FPLoadBalancing()); 336 } 337 338 void AArch64PassConfig::addPreSched2() { 339 // Expand some pseudo instructions to allow proper scheduling. 340 addPass(createAArch64ExpandPseudoPass()); 341 // Use load/store pair instructions when possible. 342 if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt) 343 addPass(createAArch64LoadStoreOptimizationPass()); 344 } 345 346 void AArch64PassConfig::addPreEmitPass() { 347 if (EnableA53Fix835769) 348 addPass(createAArch64A53Fix835769()); 349 // Relax conditional branch instructions if they're otherwise out of 350 // range of their destination. 351 addPass(createAArch64BranchRelaxation()); 352 if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && 353 TM->getTargetTriple().isOSBinFormatMachO()) 354 addPass(createAArch64CollectLOHPass()); 355 } 356