1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Top-level implementation for the PowerPC target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCTargetMachine.h"
14 #include "MCTargetDesc/PPCMCTargetDesc.h"
15 #include "PPC.h"
16 #include "PPCMachineScheduler.h"
17 #include "PPCMacroFusion.h"
18 #include "PPCSubtarget.h"
19 #include "PPCTargetObjectFile.h"
20 #include "PPCTargetTransformInfo.h"
21 #include "TargetInfo/PowerPCTargetInfo.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/Triple.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
28 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
29 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
30 #include "llvm/CodeGen/GlobalISel/Localizer.h"
31 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
32 #include "llvm/CodeGen/MachineScheduler.h"
33 #include "llvm/CodeGen/Passes.h"
34 #include "llvm/CodeGen/TargetPassConfig.h"
35 #include "llvm/IR/Attributes.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/Function.h"
38 #include "llvm/InitializePasses.h"
39 #include "llvm/Pass.h"
40 #include "llvm/Support/CodeGen.h"
41 #include "llvm/Support/CommandLine.h"
42 #include "llvm/Support/TargetRegistry.h"
43 #include "llvm/Target/TargetLoweringObjectFile.h"
44 #include "llvm/Target/TargetOptions.h"
45 #include "llvm/Transforms/Scalar.h"
46 #include <cassert>
47 #include <memory>
48 #include <string>
49 
50 using namespace llvm;
51 
52 
53 static cl::opt<bool>
54     EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
55                            cl::desc("enable coalescing of duplicate branches for PPC"));
56 static cl::
57 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
58                         cl::desc("Disable CTR loops for PPC"));
59 
60 static cl::
61 opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden,
62                             cl::desc("Disable PPC loop instr form prep"));
63 
64 static cl::opt<bool>
65 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
66   cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
67 
68 static cl::
69 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
70                                 cl::desc("Disable VSX Swap Removal for PPC"));
71 
72 static cl::
73 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
74                             cl::desc("Disable machine peepholes for PPC"));
75 
76 static cl::opt<bool>
77 EnableGEPOpt("ppc-gep-opt", cl::Hidden,
78              cl::desc("Enable optimizations on complex GEPs"),
79              cl::init(true));
80 
81 static cl::opt<bool>
82 EnablePrefetch("enable-ppc-prefetching",
83                   cl::desc("enable software prefetching on PPC"),
84                   cl::init(false), cl::Hidden);
85 
86 static cl::opt<bool>
87 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
88                       cl::desc("Add extra TOC register dependencies"),
89                       cl::init(true), cl::Hidden);
90 
91 static cl::opt<bool>
92 EnableMachineCombinerPass("ppc-machine-combiner",
93                           cl::desc("Enable the machine combiner pass"),
94                           cl::init(true), cl::Hidden);
95 
96 static cl::opt<bool>
97   ReduceCRLogical("ppc-reduce-cr-logicals",
98                   cl::desc("Expand eligible cr-logical binary ops to branches"),
99                   cl::init(true), cl::Hidden);
100 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() {
101   // Register the targets
102   RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
103   RegisterTargetMachine<PPCTargetMachine> B(getThePPC32LETarget());
104   RegisterTargetMachine<PPCTargetMachine> C(getThePPC64Target());
105   RegisterTargetMachine<PPCTargetMachine> D(getThePPC64LETarget());
106 
107   PassRegistry &PR = *PassRegistry::getPassRegistry();
108 #ifndef NDEBUG
109   initializePPCCTRLoopsVerifyPass(PR);
110 #endif
111   initializePPCLoopInstrFormPrepPass(PR);
112   initializePPCTOCRegDepsPass(PR);
113   initializePPCEarlyReturnPass(PR);
114   initializePPCVSXCopyPass(PR);
115   initializePPCVSXFMAMutatePass(PR);
116   initializePPCVSXSwapRemovalPass(PR);
117   initializePPCReduceCRLogicalsPass(PR);
118   initializePPCBSelPass(PR);
119   initializePPCBranchCoalescingPass(PR);
120   initializePPCBoolRetToIntPass(PR);
121   initializePPCExpandISELPass(PR);
122   initializePPCPreEmitPeepholePass(PR);
123   initializePPCTLSDynamicCallPass(PR);
124   initializePPCMIPeepholePass(PR);
125   initializePPCLowerMASSVEntriesPass(PR);
126   initializeGlobalISel(PR);
127 }
128 
129 /// Return the datalayout string of a subtarget.
130 static std::string getDataLayoutString(const Triple &T) {
131   bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
132   std::string Ret;
133 
134   // Most PPC* platforms are big endian, PPC(64)LE is little endian.
135   if (T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppcle)
136     Ret = "e";
137   else
138     Ret = "E";
139 
140   Ret += DataLayout::getManglingComponent(T);
141 
142   // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
143   // pointers.
144   if (!is64Bit || T.getOS() == Triple::Lv2)
145     Ret += "-p:32:32";
146 
147   // Note, the alignment values for f64 and i64 on ppc64 in Darwin
148   // documentation are wrong; these are correct (i.e. "what gcc does").
149   if (is64Bit || !T.isOSDarwin())
150     Ret += "-i64:64";
151   else
152     Ret += "-f64:32:64";
153 
154   // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
155   if (is64Bit)
156     Ret += "-n32:64";
157   else
158     Ret += "-n32";
159 
160   // Specify the vector alignment explicitly. For v256i1 and v512i1, the
161   // calculated alignment would be 256*alignment(i1) and 512*alignment(i1),
162   // which is 256 and 512 bytes - way over aligned.
163   if ((T.getArch() == Triple::ppc64le || T.getArch() == Triple::ppc64) &&
164       (T.isOSAIX() || T.isOSLinux()))
165     Ret += "-v256:256:256-v512:512:512";
166 
167   return Ret;
168 }
169 
170 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
171                                       const Triple &TT) {
172   std::string FullFS = std::string(FS);
173 
174   // Make sure 64-bit features are available when CPUname is generic
175   if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
176     if (!FullFS.empty())
177       FullFS = "+64bit," + FullFS;
178     else
179       FullFS = "+64bit";
180   }
181 
182   if (OL >= CodeGenOpt::Default) {
183     if (!FullFS.empty())
184       FullFS = "+crbits," + FullFS;
185     else
186       FullFS = "+crbits";
187   }
188 
189   if (OL != CodeGenOpt::None) {
190     if (!FullFS.empty())
191       FullFS = "+invariant-function-descriptors," + FullFS;
192     else
193       FullFS = "+invariant-function-descriptors";
194   }
195 
196   return FullFS;
197 }
198 
199 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
200   if (TT.isOSDarwin())
201     return std::make_unique<TargetLoweringObjectFileMachO>();
202 
203   if (TT.isOSAIX())
204     return std::make_unique<TargetLoweringObjectFileXCOFF>();
205 
206   return std::make_unique<PPC64LinuxTargetObjectFile>();
207 }
208 
209 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
210                                                  const TargetOptions &Options) {
211   if (TT.isOSDarwin())
212     report_fatal_error("Darwin is no longer supported for PowerPC");
213 
214   if (Options.MCOptions.getABIName().startswith("elfv1"))
215     return PPCTargetMachine::PPC_ABI_ELFv1;
216   else if (Options.MCOptions.getABIName().startswith("elfv2"))
217     return PPCTargetMachine::PPC_ABI_ELFv2;
218 
219   assert(Options.MCOptions.getABIName().empty() &&
220          "Unknown target-abi option!");
221 
222   if (TT.isMacOSX())
223     return PPCTargetMachine::PPC_ABI_UNKNOWN;
224 
225   switch (TT.getArch()) {
226   case Triple::ppc64le:
227     return PPCTargetMachine::PPC_ABI_ELFv2;
228   case Triple::ppc64:
229     return PPCTargetMachine::PPC_ABI_ELFv1;
230   default:
231     return PPCTargetMachine::PPC_ABI_UNKNOWN;
232   }
233 }
234 
235 static Reloc::Model getEffectiveRelocModel(const Triple &TT,
236                                            Optional<Reloc::Model> RM) {
237   assert((!TT.isOSAIX() || !RM.hasValue() || *RM == Reloc::PIC_) &&
238          "Invalid relocation model for AIX.");
239 
240   if (RM.hasValue())
241     return *RM;
242 
243   // Darwin defaults to dynamic-no-pic.
244   if (TT.isOSDarwin())
245     return Reloc::DynamicNoPIC;
246 
247   // Big Endian PPC and AIX default to PIC.
248   if (TT.getArch() == Triple::ppc64 || TT.isOSAIX())
249     return Reloc::PIC_;
250 
251   // Rest are static by default.
252   return Reloc::Static;
253 }
254 
255 static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
256                                                  Optional<CodeModel::Model> CM,
257                                                  bool JIT) {
258   if (CM) {
259     if (*CM == CodeModel::Tiny)
260       report_fatal_error("Target does not support the tiny CodeModel", false);
261     if (*CM == CodeModel::Kernel)
262       report_fatal_error("Target does not support the kernel CodeModel", false);
263     return *CM;
264   }
265 
266   if (JIT)
267     return CodeModel::Small;
268   if (TT.isOSAIX())
269     return CodeModel::Small;
270 
271   assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based.");
272 
273   if (TT.isArch32Bit())
274     return CodeModel::Small;
275 
276   assert(TT.isArch64Bit() && "Unsupported PPC architecture.");
277   return CodeModel::Medium;
278 }
279 
280 
281 static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
282   const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
283   ScheduleDAGMILive *DAG =
284     new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
285                           std::make_unique<PPCPreRASchedStrategy>(C) :
286                           std::make_unique<GenericScheduler>(C));
287   // add DAG Mutations here.
288   DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
289   if (ST.hasStoreFusion())
290     DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
291   if (ST.hasFusion())
292     DAG->addMutation(createPowerPCMacroFusionDAGMutation());
293 
294   return DAG;
295 }
296 
297 static ScheduleDAGInstrs *createPPCPostMachineScheduler(
298   MachineSchedContext *C) {
299   const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
300   ScheduleDAGMI *DAG =
301     new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
302                       std::make_unique<PPCPostRASchedStrategy>(C) :
303                       std::make_unique<PostGenericScheduler>(C), true);
304   // add DAG Mutations here.
305   if (ST.hasStoreFusion())
306     DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
307   if (ST.hasFusion())
308     DAG->addMutation(createPowerPCMacroFusionDAGMutation());
309   return DAG;
310 }
311 
312 // The FeatureString here is a little subtle. We are modifying the feature
313 // string with what are (currently) non-function specific overrides as it goes
314 // into the LLVMTargetMachine constructor and then using the stored value in the
315 // Subtarget constructor below it.
316 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
317                                    StringRef CPU, StringRef FS,
318                                    const TargetOptions &Options,
319                                    Optional<Reloc::Model> RM,
320                                    Optional<CodeModel::Model> CM,
321                                    CodeGenOpt::Level OL, bool JIT)
322     : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
323                         computeFSAdditions(FS, OL, TT), Options,
324                         getEffectiveRelocModel(TT, RM),
325                         getEffectivePPCCodeModel(TT, CM, JIT), OL),
326       TLOF(createTLOF(getTargetTriple())),
327       TargetABI(computeTargetABI(TT, Options)) {
328   initAsmInfo();
329 }
330 
331 PPCTargetMachine::~PPCTargetMachine() = default;
332 
333 const PPCSubtarget *
334 PPCTargetMachine::getSubtargetImpl(const Function &F) const {
335   Attribute CPUAttr = F.getFnAttribute("target-cpu");
336   Attribute FSAttr = F.getFnAttribute("target-features");
337 
338   std::string CPU =
339       CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU;
340   std::string FS =
341       FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS;
342 
343   // FIXME: This is related to the code below to reset the target options,
344   // we need to know whether or not the soft float flag is set on the
345   // function before we can generate a subtarget. We also need to use
346   // it as a key for the subtarget since that can be the only difference
347   // between two functions.
348   bool SoftFloat =
349       F.getFnAttribute("use-soft-float").getValueAsString() == "true";
350   // If the soft float attribute is set on the function turn on the soft float
351   // subtarget feature.
352   if (SoftFloat)
353     FS += FS.empty() ? "-hard-float" : ",-hard-float";
354 
355   auto &I = SubtargetMap[CPU + FS];
356   if (!I) {
357     // This needs to be done before we create a new subtarget since any
358     // creation will depend on the TM and the code generation flags on the
359     // function that reside in TargetOptions.
360     resetTargetOptions(F);
361     I = std::make_unique<PPCSubtarget>(
362         TargetTriple, CPU,
363         // FIXME: It would be good to have the subtarget additions here
364         // not necessary. Anything that turns them on/off (overrides) ends
365         // up being put at the end of the feature string, but the defaults
366         // shouldn't require adding them. Fixing this means pulling Feature64Bit
367         // out of most of the target cpus in the .td file and making it set only
368         // as part of initialization via the TargetTriple.
369         computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
370   }
371   return I.get();
372 }
373 
374 //===----------------------------------------------------------------------===//
375 // Pass Pipeline Configuration
376 //===----------------------------------------------------------------------===//
377 
378 namespace {
379 
380 /// PPC Code Generator Pass Configuration Options.
381 class PPCPassConfig : public TargetPassConfig {
382 public:
383   PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
384     : TargetPassConfig(TM, PM) {
385     // At any optimization level above -O0 we use the Machine Scheduler and not
386     // the default Post RA List Scheduler.
387     if (TM.getOptLevel() != CodeGenOpt::None)
388       substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
389   }
390 
391   PPCTargetMachine &getPPCTargetMachine() const {
392     return getTM<PPCTargetMachine>();
393   }
394 
395   void addIRPasses() override;
396   bool addPreISel() override;
397   bool addILPOpts() override;
398   bool addInstSelector() override;
399   void addMachineSSAOptimization() override;
400   void addPreRegAlloc() override;
401   void addPreSched2() override;
402   void addPreEmitPass() override;
403   // GlobalISEL
404   bool addIRTranslator() override;
405   bool addLegalizeMachineIR() override;
406   bool addRegBankSelect() override;
407   bool addGlobalInstructionSelect() override;
408 
409   ScheduleDAGInstrs *
410   createMachineScheduler(MachineSchedContext *C) const override {
411     return createPPCMachineScheduler(C);
412   }
413   ScheduleDAGInstrs *
414   createPostMachineScheduler(MachineSchedContext *C) const override {
415     return createPPCPostMachineScheduler(C);
416   }
417 };
418 
419 } // end anonymous namespace
420 
421 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
422   return new PPCPassConfig(*this, PM);
423 }
424 
425 void PPCPassConfig::addIRPasses() {
426   if (TM->getOptLevel() != CodeGenOpt::None)
427     addPass(createPPCBoolRetToIntPass());
428   addPass(createAtomicExpandPass());
429 
430   // Lower generic MASSV routines to PowerPC subtarget-specific entries.
431   addPass(createPPCLowerMASSVEntriesPass());
432 
433   // If explicitly requested, add explicit data prefetch intrinsics.
434   if (EnablePrefetch.getNumOccurrences() > 0)
435     addPass(createLoopDataPrefetchPass());
436 
437   if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
438     // Call SeparateConstOffsetFromGEP pass to extract constants within indices
439     // and lower a GEP with multiple indices to either arithmetic operations or
440     // multiple GEPs with single index.
441     addPass(createSeparateConstOffsetFromGEPPass(true));
442     // Call EarlyCSE pass to find and remove subexpressions in the lowered
443     // result.
444     addPass(createEarlyCSEPass());
445     // Do loop invariant code motion in case part of the lowered result is
446     // invariant.
447     addPass(createLICMPass());
448   }
449 
450   TargetPassConfig::addIRPasses();
451 }
452 
453 bool PPCPassConfig::addPreISel() {
454   if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None)
455     addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
456 
457   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
458     addPass(createHardwareLoopsPass());
459 
460   return false;
461 }
462 
463 bool PPCPassConfig::addILPOpts() {
464   addPass(&EarlyIfConverterID);
465 
466   if (EnableMachineCombinerPass)
467     addPass(&MachineCombinerID);
468 
469   return true;
470 }
471 
472 bool PPCPassConfig::addInstSelector() {
473   // Install an instruction selector.
474   addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
475 
476 #ifndef NDEBUG
477   if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
478     addPass(createPPCCTRLoopsVerify());
479 #endif
480 
481   addPass(createPPCVSXCopyPass());
482   return false;
483 }
484 
485 void PPCPassConfig::addMachineSSAOptimization() {
486   // PPCBranchCoalescingPass need to be done before machine sinking
487   // since it merges empty blocks.
488   if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None)
489     addPass(createPPCBranchCoalescingPass());
490   TargetPassConfig::addMachineSSAOptimization();
491   // For little endian, remove where possible the vector swap instructions
492   // introduced at code generation to normalize vector element order.
493   if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
494       !DisableVSXSwapRemoval)
495     addPass(createPPCVSXSwapRemovalPass());
496   // Reduce the number of cr-logical ops.
497   if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None)
498     addPass(createPPCReduceCRLogicalsPass());
499   // Target-specific peephole cleanups performed after instruction
500   // selection.
501   if (!DisableMIPeephole) {
502     addPass(createPPCMIPeepholePass());
503     addPass(&DeadMachineInstructionElimID);
504   }
505 }
506 
507 void PPCPassConfig::addPreRegAlloc() {
508   if (getOptLevel() != CodeGenOpt::None) {
509     initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
510     insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
511                &PPCVSXFMAMutateID);
512   }
513 
514   // FIXME: We probably don't need to run these for -fPIE.
515   if (getPPCTargetMachine().isPositionIndependent()) {
516     // FIXME: LiveVariables should not be necessary here!
517     // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
518     // LiveVariables. This (unnecessary) dependency has been removed now,
519     // however a stage-2 clang build fails without LiveVariables computed here.
520     addPass(&LiveVariablesID);
521     addPass(createPPCTLSDynamicCallPass());
522   }
523   if (EnableExtraTOCRegDeps)
524     addPass(createPPCTOCRegDepsPass());
525 
526   if (getOptLevel() != CodeGenOpt::None)
527     addPass(&MachinePipelinerID);
528 }
529 
530 void PPCPassConfig::addPreSched2() {
531   if (getOptLevel() != CodeGenOpt::None)
532     addPass(&IfConverterID);
533 }
534 
535 void PPCPassConfig::addPreEmitPass() {
536   addPass(createPPCPreEmitPeepholePass());
537   addPass(createPPCExpandISELPass());
538 
539   if (getOptLevel() != CodeGenOpt::None)
540     addPass(createPPCEarlyReturnPass());
541   // Must run branch selection immediately preceding the asm printer.
542   addPass(createPPCBranchSelectionPass());
543 }
544 
545 TargetTransformInfo
546 PPCTargetMachine::getTargetTransformInfo(const Function &F) {
547   return TargetTransformInfo(PPCTTIImpl(this, F));
548 }
549 
550 static MachineSchedRegistry
551 PPCPreRASchedRegistry("ppc-prera",
552                       "Run PowerPC PreRA specific scheduler",
553                       createPPCMachineScheduler);
554 
555 static MachineSchedRegistry
556 PPCPostRASchedRegistry("ppc-postra",
557                        "Run PowerPC PostRA specific scheduler",
558                        createPPCPostMachineScheduler);
559 
560 // Global ISEL
561 bool PPCPassConfig::addIRTranslator() {
562   addPass(new IRTranslator());
563   return false;
564 }
565 
566 bool PPCPassConfig::addLegalizeMachineIR() {
567   addPass(new Legalizer());
568   return false;
569 }
570 
571 bool PPCPassConfig::addRegBankSelect() {
572   addPass(new RegBankSelect());
573   return false;
574 }
575 
576 bool PPCPassConfig::addGlobalInstructionSelect() {
577   addPass(new InstructionSelect());
578   return false;
579 }
580