1 //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64.h"
14 #include "AArch64MacroFusion.h"
15 #include "AArch64Subtarget.h"
16 #include "AArch64TargetMachine.h"
17 #include "AArch64TargetObjectFile.h"
18 #include "AArch64TargetTransformInfo.h"
19 #include "MCTargetDesc/AArch64MCTargetDesc.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/Triple.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
24 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
25 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
26 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
27 #include "llvm/CodeGen/MachineScheduler.h"
28 #include "llvm/CodeGen/Passes.h"
29 #include "llvm/CodeGen/TargetPassConfig.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/Function.h"
32 #include "llvm/MC/MCTargetOptions.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/CodeGen.h"
35 #include "llvm/Support/CommandLine.h"
36 #include "llvm/Support/TargetRegistry.h"
37 #include "llvm/Target/TargetLoweringObjectFile.h"
38 #include "llvm/Target/TargetOptions.h"
39 #include "llvm/Transforms/Scalar.h"
40 #include <memory>
41 #include <string>
42 
43 using namespace llvm;
44 
45 static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp",
46                                 cl::desc("Enable the CCMP formation pass"),
47                                 cl::init(true), cl::Hidden);
48 
49 static cl::opt<bool> EnableMCR("aarch64-enable-mcr",
50                                cl::desc("Enable the machine combiner pass"),
51                                cl::init(true), cl::Hidden);
52 
53 static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress",
54                                           cl::desc("Suppress STP for AArch64"),
55                                           cl::init(true), cl::Hidden);
56 
57 static cl::opt<bool> EnableAdvSIMDScalar(
58     "aarch64-enable-simd-scalar",
59     cl::desc("Enable use of AdvSIMD scalar integer instructions"),
60     cl::init(false), cl::Hidden);
61 
62 static cl::opt<bool>
63     EnablePromoteConstant("aarch64-enable-promote-const",
64                           cl::desc("Enable the promote constant pass"),
65                           cl::init(true), cl::Hidden);
66 
67 static cl::opt<bool> EnableCollectLOH(
68     "aarch64-enable-collect-loh",
69     cl::desc("Enable the pass that emits the linker optimization hints (LOH)"),
70     cl::init(true), cl::Hidden);
71 
72 static cl::opt<bool>
73     EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden,
74                                   cl::desc("Enable the pass that removes dead"
75                                            " definitons and replaces stores to"
76                                            " them with stores to the zero"
77                                            " register"),
78                                   cl::init(true));
79 
80 static cl::opt<bool> EnableRedundantCopyElimination(
81     "aarch64-enable-copyelim",
82     cl::desc("Enable the redundant copy elimination pass"), cl::init(true),
83     cl::Hidden);
84 
85 static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt",
86                                         cl::desc("Enable the load/store pair"
87                                                  " optimization pass"),
88                                         cl::init(true), cl::Hidden);
89 
90 static cl::opt<bool> EnableAtomicTidy(
91     "aarch64-enable-atomic-cfg-tidy", cl::Hidden,
92     cl::desc("Run SimplifyCFG after expanding atomic operations"
93              " to make use of cmpxchg flow-based information"),
94     cl::init(true));
95 
96 static cl::opt<bool>
97 EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden,
98                         cl::desc("Run early if-conversion"),
99                         cl::init(true));
100 
101 static cl::opt<bool>
102     EnableCondOpt("aarch64-enable-condopt",
103                   cl::desc("Enable the condition optimizer pass"),
104                   cl::init(true), cl::Hidden);
105 
106 static cl::opt<bool>
107 EnableA53Fix835769("aarch64-fix-cortex-a53-835769", cl::Hidden,
108                 cl::desc("Work around Cortex-A53 erratum 835769"),
109                 cl::init(false));
110 
111 static cl::opt<bool>
112     EnableAddressTypePromotion("aarch64-enable-type-promotion", cl::Hidden,
113                                cl::desc("Enable the type promotion pass"),
114                                cl::init(false));
115 
116 static cl::opt<bool>
117     EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden,
118                  cl::desc("Enable optimizations on complex GEPs"),
119                  cl::init(false));
120 
121 static cl::opt<bool>
122     BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true),
123                      cl::desc("Relax out of range conditional branches"));
124 
125 // FIXME: Unify control over GlobalMerge.
126 static cl::opt<cl::boolOrDefault>
127     EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden,
128                       cl::desc("Enable the global merge pass"));
129 
130 static cl::opt<bool>
131     EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden,
132                            cl::desc("Enable the loop data prefetch pass"),
133                            cl::init(true));
134 
135 static cl::opt<int> EnableGlobalISelAtO(
136     "aarch64-enable-global-isel-at-O", cl::Hidden,
137     cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"),
138     cl::init(-1));
139 
140 extern "C" void LLVMInitializeAArch64Target() {
141   // Register the target.
142   RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget());
143   RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget());
144   RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target());
145   auto PR = PassRegistry::getPassRegistry();
146   initializeGlobalISel(*PR);
147   initializeAArch64A53Fix835769Pass(*PR);
148   initializeAArch64A57FPLoadBalancingPass(*PR);
149   initializeAArch64AddressTypePromotionPass(*PR);
150   initializeAArch64AdvSIMDScalarPass(*PR);
151   initializeAArch64CollectLOHPass(*PR);
152   initializeAArch64ConditionalComparesPass(*PR);
153   initializeAArch64ConditionOptimizerPass(*PR);
154   initializeAArch64DeadRegisterDefinitionsPass(*PR);
155   initializeAArch64ExpandPseudoPass(*PR);
156   initializeAArch64LoadStoreOptPass(*PR);
157   initializeAArch64VectorByElementOptPass(*PR);
158   initializeAArch64PromoteConstantPass(*PR);
159   initializeAArch64RedundantCopyEliminationPass(*PR);
160   initializeAArch64StorePairSuppressPass(*PR);
161   initializeLDTLSCleanupPass(*PR);
162 }
163 
164 //===----------------------------------------------------------------------===//
165 // AArch64 Lowering public interface.
166 //===----------------------------------------------------------------------===//
167 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
168   if (TT.isOSBinFormatMachO())
169     return llvm::make_unique<AArch64_MachoTargetObjectFile>();
170 
171   return llvm::make_unique<AArch64_ELFTargetObjectFile>();
172 }
173 
174 // Helper function to build a DataLayout string
175 static std::string computeDataLayout(const Triple &TT,
176                                      const MCTargetOptions &Options,
177                                      bool LittleEndian) {
178   if (Options.getABIName() == "ilp32")
179     return "e-m:e-p:32:32-i8:8-i16:16-i64:64-S128";
180   if (TT.isOSBinFormatMachO())
181     return "e-m:o-i64:64-i128:128-n32:64-S128";
182   if (LittleEndian)
183     return "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
184   return "E-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128";
185 }
186 
187 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
188                                            Optional<Reloc::Model> RM) {
189   // AArch64 Darwin is always PIC.
190   if (TT.isOSDarwin())
191     return Reloc::PIC_;
192   // On ELF platforms the default static relocation model has a smart enough
193   // linker to cope with referencing external symbols defined in a shared
194   // library. Hence DynamicNoPIC doesn't need to be promoted to PIC.
195   if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC)
196     return Reloc::Static;
197   return *RM;
198 }
199 
200 /// Create an AArch64 architecture model.
201 ///
202 AArch64TargetMachine::AArch64TargetMachine(
203     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
204     const TargetOptions &Options, Optional<Reloc::Model> RM,
205     CodeModel::Model CM, CodeGenOpt::Level OL, bool LittleEndian)
206     // This nested ternary is horrible, but DL needs to be properly
207     // initialized before TLInfo is constructed.
208     : LLVMTargetMachine(T, computeDataLayout(TT, Options.MCOptions,
209                                              LittleEndian),
210                         TT, CPU, FS, Options,
211 			getEffectiveRelocModel(TT, RM), CM, OL),
212       TLOF(createTLOF(getTargetTriple())),
213       isLittle(LittleEndian) {
214   initAsmInfo();
215 }
216 
217 AArch64TargetMachine::~AArch64TargetMachine() = default;
218 
219 const AArch64Subtarget *
220 AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
221   Attribute CPUAttr = F.getFnAttribute("target-cpu");
222   Attribute FSAttr = F.getFnAttribute("target-features");
223   bool ForCodeSize = F.optForSize();
224 
225   std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
226                         ? CPUAttr.getValueAsString().str()
227                         : TargetCPU;
228   std::string FS = !FSAttr.hasAttribute(Attribute::None)
229                        ? FSAttr.getValueAsString().str()
230                        : TargetFS;
231   std::string ForCodeSizeStr =
232       std::string(ForCodeSize ? "+" : "-") + "forcodesize";
233 
234   auto &I = SubtargetMap[CPU + FS + ForCodeSizeStr];
235   if (!I) {
236     // This needs to be done before we create a new subtarget since any
237     // creation will depend on the TM and the code generation flags on the
238     // function that reside in TargetOptions.
239     resetTargetOptions(F);
240     I = llvm::make_unique<AArch64Subtarget>(TargetTriple, CPU, FS, *this,
241                                             isLittle, ForCodeSize);
242   }
243   return I.get();
244 }
245 
246 void AArch64leTargetMachine::anchor() { }
247 
248 AArch64leTargetMachine::AArch64leTargetMachine(
249     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
250     const TargetOptions &Options, Optional<Reloc::Model> RM,
251     CodeModel::Model CM, CodeGenOpt::Level OL)
252     : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
253 
254 void AArch64beTargetMachine::anchor() { }
255 
256 AArch64beTargetMachine::AArch64beTargetMachine(
257     const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
258     const TargetOptions &Options, Optional<Reloc::Model> RM,
259     CodeModel::Model CM, CodeGenOpt::Level OL)
260     : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
261 
262 namespace {
263 
264 /// AArch64 Code Generator Pass Configuration Options.
265 class AArch64PassConfig : public TargetPassConfig {
266 public:
267   AArch64PassConfig(AArch64TargetMachine *TM, PassManagerBase &PM)
268       : TargetPassConfig(TM, PM) {
269     if (TM->getOptLevel() != CodeGenOpt::None)
270       substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
271   }
272 
273   AArch64TargetMachine &getAArch64TargetMachine() const {
274     return getTM<AArch64TargetMachine>();
275   }
276 
277   ScheduleDAGInstrs *
278   createMachineScheduler(MachineSchedContext *C) const override {
279     ScheduleDAGMILive *DAG = createGenericSchedLive(C);
280     DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
281     DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
282     DAG->addMutation(createAArch64MacroFusionDAGMutation());
283     return DAG;
284   }
285 
286   ScheduleDAGInstrs *
287   createPostMachineScheduler(MachineSchedContext *C) const override {
288     const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>();
289     if (ST.hasFuseLiterals()) {
290       // Run the Macro Fusion after RA again since literals are expanded from
291       // pseudos then (v. addPreSched2()).
292       ScheduleDAGMI *DAG = createGenericSchedPostRA(C);
293       DAG->addMutation(createAArch64MacroFusionDAGMutation());
294       return DAG;
295     }
296 
297     return nullptr;
298   }
299 
300   void addIRPasses()  override;
301   bool addPreISel() override;
302   bool addInstSelector() override;
303 #ifdef LLVM_BUILD_GLOBAL_ISEL
304   bool addIRTranslator() override;
305   bool addLegalizeMachineIR() override;
306   bool addRegBankSelect() override;
307   bool addGlobalInstructionSelect() override;
308 #endif
309   bool addILPOpts() override;
310   void addPreRegAlloc() override;
311   void addPostRegAlloc() override;
312   void addPreSched2() override;
313   void addPreEmitPass() override;
314 
315   bool isGlobalISelEnabled() const override;
316 };
317 
318 } // end anonymous namespace
319 
320 TargetIRAnalysis AArch64TargetMachine::getTargetIRAnalysis() {
321   return TargetIRAnalysis([this](const Function &F) {
322     return TargetTransformInfo(AArch64TTIImpl(this, F));
323   });
324 }
325 
326 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) {
327   return new AArch64PassConfig(this, PM);
328 }
329 
330 void AArch64PassConfig::addIRPasses() {
331   // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg
332   // ourselves.
333   addPass(createAtomicExpandPass(TM));
334 
335   // Cmpxchg instructions are often used with a subsequent comparison to
336   // determine whether it succeeded. We can exploit existing control-flow in
337   // ldrex/strex loops to simplify this, but it needs tidying up.
338   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
339     addPass(createCFGSimplificationPass());
340 
341   // Run LoopDataPrefetch
342   //
343   // Run this before LSR to remove the multiplies involved in computing the
344   // pointer values N iterations ahead.
345   if (TM->getOptLevel() != CodeGenOpt::None && EnableLoopDataPrefetch)
346     addPass(createLoopDataPrefetchPass());
347 
348   TargetPassConfig::addIRPasses();
349 
350   // Match interleaved memory accesses to ldN/stN intrinsics.
351   if (TM->getOptLevel() != CodeGenOpt::None)
352     addPass(createInterleavedAccessPass(TM));
353 
354   if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) {
355     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
356     // and lower a GEP with multiple indices to either arithmetic operations or
357     // multiple GEPs with single index.
358     addPass(createSeparateConstOffsetFromGEPPass(TM, true));
359     // Call EarlyCSE pass to find and remove subexpressions in the lowered
360     // result.
361     addPass(createEarlyCSEPass());
362     // Do loop invariant code motion in case part of the lowered result is
363     // invariant.
364     addPass(createLICMPass());
365   }
366 }
367 
368 // Pass Pipeline Configuration
369 bool AArch64PassConfig::addPreISel() {
370   // Run promote constant before global merge, so that the promoted constants
371   // get a chance to be merged
372   if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant)
373     addPass(createAArch64PromoteConstantPass());
374   // FIXME: On AArch64, this depends on the type.
375   // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes().
376   // and the offset has to be a multiple of the related size in bytes.
377   if ((TM->getOptLevel() != CodeGenOpt::None &&
378        EnableGlobalMerge == cl::BOU_UNSET) ||
379       EnableGlobalMerge == cl::BOU_TRUE) {
380     bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) &&
381                                (EnableGlobalMerge == cl::BOU_UNSET);
382     addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize));
383   }
384 
385   if (TM->getOptLevel() != CodeGenOpt::None && EnableAddressTypePromotion)
386     addPass(createAArch64AddressTypePromotionPass());
387 
388   return false;
389 }
390 
391 bool AArch64PassConfig::addInstSelector() {
392   addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel()));
393 
394   // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many
395   // references to _TLS_MODULE_BASE_ as possible.
396   if (TM->getTargetTriple().isOSBinFormatELF() &&
397       getOptLevel() != CodeGenOpt::None)
398     addPass(createAArch64CleanupLocalDynamicTLSPass());
399 
400   return false;
401 }
402 
403 #ifdef LLVM_BUILD_GLOBAL_ISEL
404 bool AArch64PassConfig::addIRTranslator() {
405   addPass(new IRTranslator());
406   return false;
407 }
408 
409 bool AArch64PassConfig::addLegalizeMachineIR() {
410   addPass(new Legalizer());
411   return false;
412 }
413 
414 bool AArch64PassConfig::addRegBankSelect() {
415   addPass(new RegBankSelect());
416   return false;
417 }
418 
419 bool AArch64PassConfig::addGlobalInstructionSelect() {
420   addPass(new InstructionSelect());
421   return false;
422 }
423 #endif
424 
425 bool AArch64PassConfig::isGlobalISelEnabled() const {
426   return TM->getOptLevel() <= EnableGlobalISelAtO;
427 }
428 
429 bool AArch64PassConfig::addILPOpts() {
430   if (EnableCondOpt)
431     addPass(createAArch64ConditionOptimizerPass());
432   if (EnableCCMP)
433     addPass(createAArch64ConditionalCompares());
434   if (EnableMCR)
435     addPass(&MachineCombinerID);
436   if (EnableEarlyIfConversion)
437     addPass(&EarlyIfConverterID);
438   if (EnableStPairSuppress)
439     addPass(createAArch64StorePairSuppressPass());
440   addPass(createAArch64VectorByElementOptPass());
441   return true;
442 }
443 
444 void AArch64PassConfig::addPreRegAlloc() {
445   // Change dead register definitions to refer to the zero register.
446   if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination)
447     addPass(createAArch64DeadRegisterDefinitions());
448 
449   // Use AdvSIMD scalar instructions whenever profitable.
450   if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) {
451     addPass(createAArch64AdvSIMDScalar());
452     // The AdvSIMD pass may produce copies that can be rewritten to
453     // be register coaleascer friendly.
454     addPass(&PeepholeOptimizerID);
455   }
456 }
457 
458 void AArch64PassConfig::addPostRegAlloc() {
459   // Remove redundant copy instructions.
460   if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination)
461     addPass(createAArch64RedundantCopyEliminationPass());
462 
463   if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc())
464     // Improve performance for some FP/SIMD code for A57.
465     addPass(createAArch64A57FPLoadBalancing());
466 }
467 
468 void AArch64PassConfig::addPreSched2() {
469   // Expand some pseudo instructions to allow proper scheduling.
470   addPass(createAArch64ExpandPseudoPass());
471   // Use load/store pair instructions when possible.
472   if (TM->getOptLevel() != CodeGenOpt::None && EnableLoadStoreOpt)
473     addPass(createAArch64LoadStoreOptimizationPass());
474 }
475 
476 void AArch64PassConfig::addPreEmitPass() {
477   if (EnableA53Fix835769)
478     addPass(createAArch64A53Fix835769());
479   // Relax conditional branch instructions if they're otherwise out of
480   // range of their destination.
481   if (BranchRelaxation)
482     addPass(&BranchRelaxationPassID);
483 
484   if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH &&
485       TM->getTargetTriple().isOSBinFormatMachO())
486     addPass(createAArch64CollectLOHPass());
487 }
488