1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Top-level implementation for the PowerPC target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCTargetMachine.h" 15 #include "PPC.h" 16 #include "PPCTargetObjectFile.h" 17 #include "PPCTargetTransformInfo.h" 18 #include "llvm/CodeGen/Passes.h" 19 #include "llvm/IR/Function.h" 20 #include "llvm/IR/LegacyPassManager.h" 21 #include "llvm/MC/MCStreamer.h" 22 #include "llvm/Support/CommandLine.h" 23 #include "llvm/Support/FormattedStream.h" 24 #include "llvm/Support/TargetRegistry.h" 25 #include "llvm/Target/TargetOptions.h" 26 #include "llvm/Transforms/Scalar.h" 27 using namespace llvm; 28 29 static cl:: 30 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, 31 cl::desc("Disable CTR loops for PPC")); 32 33 static cl:: 34 opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden, 35 cl::desc("Disable PPC loop preinc prep")); 36 37 static cl::opt<bool> 38 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", 39 cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); 40 41 static cl:: 42 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, 43 cl::desc("Disable VSX Swap Removal for PPC")); 44 45 static cl:: 46 opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden, 47 cl::desc("Disable QPX load splat simplification")); 48 49 static cl:: 50 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden, 51 cl::desc("Disable machine peepholes for PPC")); 52 53 static cl::opt<bool> 54 EnableGEPOpt("ppc-gep-opt", cl::Hidden, 55 cl::desc("Enable optimizations on complex GEPs"), 56 cl::init(true)); 57 58 static cl::opt<bool> 59 EnablePrefetch("enable-ppc-prefetching", 60 cl::desc("disable software prefetching on PPC"), 61 cl::init(false), cl::Hidden); 62 63 static cl::opt<bool> 64 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", 65 cl::desc("Add extra TOC register dependencies"), 66 cl::init(true), cl::Hidden); 67 68 static cl::opt<bool> 69 EnableMachineCombinerPass("ppc-machine-combiner", 70 cl::desc("Enable the machine combiner pass"), 71 cl::init(true), cl::Hidden); 72 73 extern "C" void LLVMInitializePowerPCTarget() { 74 // Register the targets 75 RegisterTargetMachine<PPC32TargetMachine> A(ThePPC32Target); 76 RegisterTargetMachine<PPC64TargetMachine> B(ThePPC64Target); 77 RegisterTargetMachine<PPC64TargetMachine> C(ThePPC64LETarget); 78 79 PassRegistry &PR = *PassRegistry::getPassRegistry(); 80 initializePPCBoolRetToIntPass(PR); 81 } 82 83 /// Return the datalayout string of a subtarget. 84 static std::string getDataLayoutString(const Triple &T) { 85 bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; 86 std::string Ret; 87 88 // Most PPC* platforms are big endian, PPC64LE is little endian. 89 if (T.getArch() == Triple::ppc64le) 90 Ret = "e"; 91 else 92 Ret = "E"; 93 94 Ret += DataLayout::getManglingComponent(T); 95 96 // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit 97 // pointers. 98 if (!is64Bit || T.getOS() == Triple::Lv2) 99 Ret += "-p:32:32"; 100 101 // Note, the alignment values for f64 and i64 on ppc64 in Darwin 102 // documentation are wrong; these are correct (i.e. "what gcc does"). 103 if (is64Bit || !T.isOSDarwin()) 104 Ret += "-i64:64"; 105 else 106 Ret += "-f64:32:64"; 107 108 // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. 109 if (is64Bit) 110 Ret += "-n32:64"; 111 else 112 Ret += "-n32"; 113 114 return Ret; 115 } 116 117 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, 118 const Triple &TT) { 119 std::string FullFS = FS; 120 121 // Make sure 64-bit features are available when CPUname is generic 122 if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { 123 if (!FullFS.empty()) 124 FullFS = "+64bit," + FullFS; 125 else 126 FullFS = "+64bit"; 127 } 128 129 if (OL >= CodeGenOpt::Default) { 130 if (!FullFS.empty()) 131 FullFS = "+crbits," + FullFS; 132 else 133 FullFS = "+crbits"; 134 } 135 136 if (OL != CodeGenOpt::None) { 137 if (!FullFS.empty()) 138 FullFS = "+invariant-function-descriptors," + FullFS; 139 else 140 FullFS = "+invariant-function-descriptors"; 141 } 142 143 return FullFS; 144 } 145 146 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 147 // If it isn't a Mach-O file then it's going to be a linux ELF 148 // object file. 149 if (TT.isOSDarwin()) 150 return make_unique<TargetLoweringObjectFileMachO>(); 151 152 return make_unique<PPC64LinuxTargetObjectFile>(); 153 } 154 155 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, 156 const TargetOptions &Options) { 157 if (Options.MCOptions.getABIName().startswith("elfv1")) 158 return PPCTargetMachine::PPC_ABI_ELFv1; 159 else if (Options.MCOptions.getABIName().startswith("elfv2")) 160 return PPCTargetMachine::PPC_ABI_ELFv2; 161 162 assert(Options.MCOptions.getABIName().empty() && 163 "Unknown target-abi option!"); 164 165 if (!TT.isMacOSX()) { 166 switch (TT.getArch()) { 167 case Triple::ppc64le: 168 return PPCTargetMachine::PPC_ABI_ELFv2; 169 case Triple::ppc64: 170 return PPCTargetMachine::PPC_ABI_ELFv1; 171 default: 172 // Fallthrough. 173 ; 174 } 175 } 176 return PPCTargetMachine::PPC_ABI_UNKNOWN; 177 } 178 179 // The FeatureString here is a little subtle. We are modifying the feature 180 // string with what are (currently) non-function specific overrides as it goes 181 // into the LLVMTargetMachine constructor and then using the stored value in the 182 // Subtarget constructor below it. 183 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, 184 StringRef CPU, StringRef FS, 185 const TargetOptions &Options, 186 Reloc::Model RM, CodeModel::Model CM, 187 CodeGenOpt::Level OL) 188 : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, 189 computeFSAdditions(FS, OL, TT), Options, RM, CM, OL), 190 TLOF(createTLOF(getTargetTriple())), 191 TargetABI(computeTargetABI(TT, Options)), 192 Subtarget(TargetTriple, CPU, computeFSAdditions(FS, OL, TT), *this) { 193 194 // For the estimates, convergence is quadratic, so we essentially double the 195 // number of digits correct after every iteration. For both FRE and FRSQRTE, 196 // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(), 197 // this is 2^-14. IEEE float has 23 digits and double has 52 digits. 198 unsigned RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3, 199 RefinementSteps64 = RefinementSteps + 1; 200 201 this->Options.Reciprocals.setDefaults("sqrtf", true, RefinementSteps); 202 this->Options.Reciprocals.setDefaults("vec-sqrtf", true, RefinementSteps); 203 this->Options.Reciprocals.setDefaults("divf", true, RefinementSteps); 204 this->Options.Reciprocals.setDefaults("vec-divf", true, RefinementSteps); 205 206 this->Options.Reciprocals.setDefaults("sqrtd", true, RefinementSteps64); 207 this->Options.Reciprocals.setDefaults("vec-sqrtd", true, RefinementSteps64); 208 this->Options.Reciprocals.setDefaults("divd", true, RefinementSteps64); 209 this->Options.Reciprocals.setDefaults("vec-divd", true, RefinementSteps64); 210 211 initAsmInfo(); 212 } 213 214 PPCTargetMachine::~PPCTargetMachine() {} 215 216 void PPC32TargetMachine::anchor() { } 217 218 PPC32TargetMachine::PPC32TargetMachine(const Target &T, const Triple &TT, 219 StringRef CPU, StringRef FS, 220 const TargetOptions &Options, 221 Reloc::Model RM, CodeModel::Model CM, 222 CodeGenOpt::Level OL) 223 : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 224 225 void PPC64TargetMachine::anchor() { } 226 227 PPC64TargetMachine::PPC64TargetMachine(const Target &T, const Triple &TT, 228 StringRef CPU, StringRef FS, 229 const TargetOptions &Options, 230 Reloc::Model RM, CodeModel::Model CM, 231 CodeGenOpt::Level OL) 232 : PPCTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 233 234 const PPCSubtarget * 235 PPCTargetMachine::getSubtargetImpl(const Function &F) const { 236 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 237 Attribute FSAttr = F.getFnAttribute("target-features"); 238 239 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 240 ? CPUAttr.getValueAsString().str() 241 : TargetCPU; 242 std::string FS = !FSAttr.hasAttribute(Attribute::None) 243 ? FSAttr.getValueAsString().str() 244 : TargetFS; 245 246 // FIXME: This is related to the code below to reset the target options, 247 // we need to know whether or not the soft float flag is set on the 248 // function before we can generate a subtarget. We also need to use 249 // it as a key for the subtarget since that can be the only difference 250 // between two functions. 251 bool SoftFloat = 252 F.getFnAttribute("use-soft-float").getValueAsString() == "true"; 253 // If the soft float attribute is set on the function turn on the soft float 254 // subtarget feature. 255 if (SoftFloat) 256 FS += FS.empty() ? "+soft-float" : ",+soft-float"; 257 258 auto &I = SubtargetMap[CPU + FS]; 259 if (!I) { 260 // This needs to be done before we create a new subtarget since any 261 // creation will depend on the TM and the code generation flags on the 262 // function that reside in TargetOptions. 263 resetTargetOptions(F); 264 I = llvm::make_unique<PPCSubtarget>( 265 TargetTriple, CPU, 266 // FIXME: It would be good to have the subtarget additions here 267 // not necessary. Anything that turns them on/off (overrides) ends 268 // up being put at the end of the feature string, but the defaults 269 // shouldn't require adding them. Fixing this means pulling Feature64Bit 270 // out of most of the target cpus in the .td file and making it set only 271 // as part of initialization via the TargetTriple. 272 computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); 273 } 274 return I.get(); 275 } 276 277 //===----------------------------------------------------------------------===// 278 // Pass Pipeline Configuration 279 //===----------------------------------------------------------------------===// 280 281 namespace { 282 /// PPC Code Generator Pass Configuration Options. 283 class PPCPassConfig : public TargetPassConfig { 284 public: 285 PPCPassConfig(PPCTargetMachine *TM, PassManagerBase &PM) 286 : TargetPassConfig(TM, PM) {} 287 288 PPCTargetMachine &getPPCTargetMachine() const { 289 return getTM<PPCTargetMachine>(); 290 } 291 292 void addIRPasses() override; 293 bool addPreISel() override; 294 bool addILPOpts() override; 295 bool addInstSelector() override; 296 void addMachineSSAOptimization() override; 297 void addPreRegAlloc() override; 298 void addPreSched2() override; 299 void addPreEmitPass() override; 300 }; 301 } // namespace 302 303 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { 304 return new PPCPassConfig(this, PM); 305 } 306 307 void PPCPassConfig::addIRPasses() { 308 if (TM->getOptLevel() != CodeGenOpt::None) 309 addPass(createPPCBoolRetToIntPass()); 310 addPass(createAtomicExpandPass(&getPPCTargetMachine())); 311 312 // For the BG/Q (or if explicitly requested), add explicit data prefetch 313 // intrinsics. 314 bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ && 315 getOptLevel() != CodeGenOpt::None; 316 if (EnablePrefetch.getNumOccurrences() > 0) 317 UsePrefetching = EnablePrefetch; 318 if (UsePrefetching) 319 addPass(createLoopDataPrefetchPass()); 320 321 if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) { 322 // Call SeparateConstOffsetFromGEP pass to extract constants within indices 323 // and lower a GEP with multiple indices to either arithmetic operations or 324 // multiple GEPs with single index. 325 addPass(createSeparateConstOffsetFromGEPPass(TM, true)); 326 // Call EarlyCSE pass to find and remove subexpressions in the lowered 327 // result. 328 addPass(createEarlyCSEPass()); 329 // Do loop invariant code motion in case part of the lowered result is 330 // invariant. 331 addPass(createLICMPass()); 332 } 333 334 TargetPassConfig::addIRPasses(); 335 } 336 337 bool PPCPassConfig::addPreISel() { 338 if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None) 339 addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine())); 340 341 if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) 342 addPass(createPPCCTRLoops(getPPCTargetMachine())); 343 344 return false; 345 } 346 347 bool PPCPassConfig::addILPOpts() { 348 addPass(&EarlyIfConverterID); 349 350 if (EnableMachineCombinerPass) 351 addPass(&MachineCombinerID); 352 353 return true; 354 } 355 356 bool PPCPassConfig::addInstSelector() { 357 // Install an instruction selector. 358 addPass(createPPCISelDag(getPPCTargetMachine())); 359 360 #ifndef NDEBUG 361 if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) 362 addPass(createPPCCTRLoopsVerify()); 363 #endif 364 365 addPass(createPPCVSXCopyPass()); 366 return false; 367 } 368 369 void PPCPassConfig::addMachineSSAOptimization() { 370 TargetPassConfig::addMachineSSAOptimization(); 371 // For little endian, remove where possible the vector swap instructions 372 // introduced at code generation to normalize vector element order. 373 if (TM->getTargetTriple().getArch() == Triple::ppc64le && 374 !DisableVSXSwapRemoval) 375 addPass(createPPCVSXSwapRemovalPass()); 376 // Target-specific peephole cleanups performed after instruction 377 // selection. 378 if (!DisableMIPeephole) { 379 addPass(createPPCMIPeepholePass()); 380 addPass(&DeadMachineInstructionElimID); 381 } 382 } 383 384 void PPCPassConfig::addPreRegAlloc() { 385 initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); 386 insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, 387 &PPCVSXFMAMutateID); 388 if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_) 389 addPass(createPPCTLSDynamicCallPass()); 390 if (EnableExtraTOCRegDeps) 391 addPass(createPPCTOCRegDepsPass()); 392 } 393 394 void PPCPassConfig::addPreSched2() { 395 if (getOptLevel() != CodeGenOpt::None) { 396 addPass(&IfConverterID); 397 398 // This optimization must happen after anything that might do store-to-load 399 // forwarding. Here we're after RA (and, thus, when spills are inserted) 400 // but before post-RA scheduling. 401 if (!DisableQPXLoadSplat) 402 addPass(createPPCQPXLoadSplatPass()); 403 } 404 } 405 406 void PPCPassConfig::addPreEmitPass() { 407 if (getOptLevel() != CodeGenOpt::None) 408 addPass(createPPCEarlyReturnPass(), false); 409 // Must run branch selection immediately preceding the asm printer. 410 addPass(createPPCBranchSelectionPass(), false); 411 } 412 413 TargetIRAnalysis PPCTargetMachine::getTargetIRAnalysis() { 414 return TargetIRAnalysis([this](const Function &F) { 415 return TargetTransformInfo(PPCTTIImpl(this, F)); 416 }); 417 } 418