1 //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Top-level implementation for the PowerPC target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCTargetMachine.h" 14 #include "MCTargetDesc/PPCMCTargetDesc.h" 15 #include "PPC.h" 16 #include "PPCMachineScheduler.h" 17 #include "PPCMacroFusion.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetObjectFile.h" 20 #include "PPCTargetTransformInfo.h" 21 #include "TargetInfo/PowerPCTargetInfo.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/StringRef.h" 25 #include "llvm/ADT/Triple.h" 26 #include "llvm/Analysis/TargetTransformInfo.h" 27 #include "llvm/CodeGen/Passes.h" 28 #include "llvm/CodeGen/TargetPassConfig.h" 29 #include "llvm/CodeGen/MachineScheduler.h" 30 #include "llvm/IR/Attributes.h" 31 #include "llvm/IR/DataLayout.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/Pass.h" 34 #include "llvm/Support/CodeGen.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Support/TargetRegistry.h" 37 #include "llvm/Target/TargetLoweringObjectFile.h" 38 #include "llvm/Target/TargetOptions.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include <cassert> 41 #include <memory> 42 #include <string> 43 44 using namespace llvm; 45 46 47 static cl::opt<bool> 48 EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden, 49 cl::desc("enable coalescing of duplicate branches for PPC")); 50 static cl:: 51 opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden, 52 cl::desc("Disable CTR loops for PPC")); 53 54 static cl:: 55 opt<bool> DisableInstrFormPrep("disable-ppc-instr-form-prep", cl::Hidden, 56 cl::desc("Disable PPC loop instr form prep")); 57 58 static cl::opt<bool> 59 VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early", 60 cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early")); 61 62 static cl:: 63 opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden, 64 cl::desc("Disable VSX Swap Removal for PPC")); 65 66 static cl:: 67 opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden, 68 cl::desc("Disable machine peepholes for PPC")); 69 70 static cl::opt<bool> 71 EnableGEPOpt("ppc-gep-opt", cl::Hidden, 72 cl::desc("Enable optimizations on complex GEPs"), 73 cl::init(true)); 74 75 static cl::opt<bool> 76 EnablePrefetch("enable-ppc-prefetching", 77 cl::desc("enable software prefetching on PPC"), 78 cl::init(false), cl::Hidden); 79 80 static cl::opt<bool> 81 EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps", 82 cl::desc("Add extra TOC register dependencies"), 83 cl::init(true), cl::Hidden); 84 85 static cl::opt<bool> 86 EnableMachineCombinerPass("ppc-machine-combiner", 87 cl::desc("Enable the machine combiner pass"), 88 cl::init(true), cl::Hidden); 89 90 static cl::opt<bool> 91 ReduceCRLogical("ppc-reduce-cr-logicals", 92 cl::desc("Expand eligible cr-logical binary ops to branches"), 93 cl::init(true), cl::Hidden); 94 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializePowerPCTarget() { 95 // Register the targets 96 RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target()); 97 RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target()); 98 RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget()); 99 100 PassRegistry &PR = *PassRegistry::getPassRegistry(); 101 #ifndef NDEBUG 102 initializePPCCTRLoopsVerifyPass(PR); 103 #endif 104 initializePPCLoopInstrFormPrepPass(PR); 105 initializePPCTOCRegDepsPass(PR); 106 initializePPCEarlyReturnPass(PR); 107 initializePPCVSXCopyPass(PR); 108 initializePPCVSXFMAMutatePass(PR); 109 initializePPCVSXSwapRemovalPass(PR); 110 initializePPCReduceCRLogicalsPass(PR); 111 initializePPCBSelPass(PR); 112 initializePPCBranchCoalescingPass(PR); 113 initializePPCBoolRetToIntPass(PR); 114 initializePPCExpandISELPass(PR); 115 initializePPCPreEmitPeepholePass(PR); 116 initializePPCTLSDynamicCallPass(PR); 117 initializePPCMIPeepholePass(PR); 118 initializePPCLowerMASSVEntriesPass(PR); 119 } 120 121 /// Return the datalayout string of a subtarget. 122 static std::string getDataLayoutString(const Triple &T) { 123 bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le; 124 std::string Ret; 125 126 // Most PPC* platforms are big endian, PPC64LE is little endian. 127 if (T.getArch() == Triple::ppc64le) 128 Ret = "e"; 129 else 130 Ret = "E"; 131 132 Ret += DataLayout::getManglingComponent(T); 133 134 // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit 135 // pointers. 136 if (!is64Bit || T.getOS() == Triple::Lv2) 137 Ret += "-p:32:32"; 138 139 // Note, the alignment values for f64 and i64 on ppc64 in Darwin 140 // documentation are wrong; these are correct (i.e. "what gcc does"). 141 if (is64Bit || !T.isOSDarwin()) 142 Ret += "-i64:64"; 143 else 144 Ret += "-f64:32:64"; 145 146 // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones. 147 if (is64Bit) 148 Ret += "-n32:64"; 149 else 150 Ret += "-n32"; 151 152 return Ret; 153 } 154 155 static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, 156 const Triple &TT) { 157 std::string FullFS = std::string(FS); 158 159 // Make sure 64-bit features are available when CPUname is generic 160 if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) { 161 if (!FullFS.empty()) 162 FullFS = "+64bit," + FullFS; 163 else 164 FullFS = "+64bit"; 165 } 166 167 if (OL >= CodeGenOpt::Default) { 168 if (!FullFS.empty()) 169 FullFS = "+crbits," + FullFS; 170 else 171 FullFS = "+crbits"; 172 } 173 174 if (OL != CodeGenOpt::None) { 175 if (!FullFS.empty()) 176 FullFS = "+invariant-function-descriptors," + FullFS; 177 else 178 FullFS = "+invariant-function-descriptors"; 179 } 180 181 return FullFS; 182 } 183 184 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 185 if (TT.isOSDarwin()) 186 return std::make_unique<TargetLoweringObjectFileMachO>(); 187 188 if (TT.isOSAIX()) 189 return std::make_unique<TargetLoweringObjectFileXCOFF>(); 190 191 return std::make_unique<PPC64LinuxTargetObjectFile>(); 192 } 193 194 static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT, 195 const TargetOptions &Options) { 196 if (TT.isOSDarwin()) 197 report_fatal_error("Darwin is no longer supported for PowerPC"); 198 199 if (Options.MCOptions.getABIName().startswith("elfv1")) 200 return PPCTargetMachine::PPC_ABI_ELFv1; 201 else if (Options.MCOptions.getABIName().startswith("elfv2")) 202 return PPCTargetMachine::PPC_ABI_ELFv2; 203 204 assert(Options.MCOptions.getABIName().empty() && 205 "Unknown target-abi option!"); 206 207 if (TT.isMacOSX()) 208 return PPCTargetMachine::PPC_ABI_UNKNOWN; 209 210 switch (TT.getArch()) { 211 case Triple::ppc64le: 212 return PPCTargetMachine::PPC_ABI_ELFv2; 213 case Triple::ppc64: 214 return PPCTargetMachine::PPC_ABI_ELFv1; 215 default: 216 return PPCTargetMachine::PPC_ABI_UNKNOWN; 217 } 218 } 219 220 static Reloc::Model getEffectiveRelocModel(const Triple &TT, 221 Optional<Reloc::Model> RM) { 222 assert((!TT.isOSAIX() || !RM.hasValue() || *RM == Reloc::PIC_) && 223 "Invalid relocation model for AIX."); 224 225 if (RM.hasValue()) 226 return *RM; 227 228 // Darwin defaults to dynamic-no-pic. 229 if (TT.isOSDarwin()) 230 return Reloc::DynamicNoPIC; 231 232 // Big Endian PPC and AIX default to PIC. 233 if (TT.getArch() == Triple::ppc64 || TT.isOSAIX()) 234 return Reloc::PIC_; 235 236 // Rest are static by default. 237 return Reloc::Static; 238 } 239 240 static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT, 241 Optional<CodeModel::Model> CM, 242 bool JIT) { 243 if (CM) { 244 if (*CM == CodeModel::Tiny) 245 report_fatal_error("Target does not support the tiny CodeModel", false); 246 if (*CM == CodeModel::Kernel) 247 report_fatal_error("Target does not support the kernel CodeModel", false); 248 return *CM; 249 } 250 251 if (JIT) 252 return CodeModel::Small; 253 if (TT.isOSAIX()) 254 return CodeModel::Small; 255 256 assert(TT.isOSBinFormatELF() && "All remaining PPC OSes are ELF based."); 257 258 if (TT.isArch32Bit()) 259 return CodeModel::Small; 260 261 assert(TT.isArch64Bit() && "Unsupported PPC architecture."); 262 return CodeModel::Medium; 263 } 264 265 266 static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) { 267 const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); 268 ScheduleDAGMILive *DAG = 269 new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ? 270 std::make_unique<PPCPreRASchedStrategy>(C) : 271 std::make_unique<GenericScheduler>(C)); 272 // add DAG Mutations here. 273 DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI)); 274 if (ST.hasFusion()) 275 DAG->addMutation(createPowerPCMacroFusionDAGMutation()); 276 277 return DAG; 278 } 279 280 static ScheduleDAGInstrs *createPPCPostMachineScheduler( 281 MachineSchedContext *C) { 282 const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>(); 283 ScheduleDAGMI *DAG = 284 new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ? 285 std::make_unique<PPCPostRASchedStrategy>(C) : 286 std::make_unique<PostGenericScheduler>(C), true); 287 // add DAG Mutations here. 288 if (ST.hasFusion()) 289 DAG->addMutation(createPowerPCMacroFusionDAGMutation()); 290 return DAG; 291 } 292 293 // The FeatureString here is a little subtle. We are modifying the feature 294 // string with what are (currently) non-function specific overrides as it goes 295 // into the LLVMTargetMachine constructor and then using the stored value in the 296 // Subtarget constructor below it. 297 PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT, 298 StringRef CPU, StringRef FS, 299 const TargetOptions &Options, 300 Optional<Reloc::Model> RM, 301 Optional<CodeModel::Model> CM, 302 CodeGenOpt::Level OL, bool JIT) 303 : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU, 304 computeFSAdditions(FS, OL, TT), Options, 305 getEffectiveRelocModel(TT, RM), 306 getEffectivePPCCodeModel(TT, CM, JIT), OL), 307 TLOF(createTLOF(getTargetTriple())), 308 TargetABI(computeTargetABI(TT, Options)) { 309 initAsmInfo(); 310 } 311 312 PPCTargetMachine::~PPCTargetMachine() = default; 313 314 const PPCSubtarget * 315 PPCTargetMachine::getSubtargetImpl(const Function &F) const { 316 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 317 Attribute FSAttr = F.getFnAttribute("target-features"); 318 319 std::string CPU = !CPUAttr.hasAttribute(Attribute::None) 320 ? CPUAttr.getValueAsString().str() 321 : TargetCPU; 322 std::string FS = !FSAttr.hasAttribute(Attribute::None) 323 ? FSAttr.getValueAsString().str() 324 : TargetFS; 325 326 // FIXME: This is related to the code below to reset the target options, 327 // we need to know whether or not the soft float flag is set on the 328 // function before we can generate a subtarget. We also need to use 329 // it as a key for the subtarget since that can be the only difference 330 // between two functions. 331 bool SoftFloat = 332 F.getFnAttribute("use-soft-float").getValueAsString() == "true"; 333 // If the soft float attribute is set on the function turn on the soft float 334 // subtarget feature. 335 if (SoftFloat) 336 FS += FS.empty() ? "-hard-float" : ",-hard-float"; 337 338 auto &I = SubtargetMap[CPU + FS]; 339 if (!I) { 340 // This needs to be done before we create a new subtarget since any 341 // creation will depend on the TM and the code generation flags on the 342 // function that reside in TargetOptions. 343 resetTargetOptions(F); 344 I = std::make_unique<PPCSubtarget>( 345 TargetTriple, CPU, 346 // FIXME: It would be good to have the subtarget additions here 347 // not necessary. Anything that turns them on/off (overrides) ends 348 // up being put at the end of the feature string, but the defaults 349 // shouldn't require adding them. Fixing this means pulling Feature64Bit 350 // out of most of the target cpus in the .td file and making it set only 351 // as part of initialization via the TargetTriple. 352 computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this); 353 } 354 return I.get(); 355 } 356 357 //===----------------------------------------------------------------------===// 358 // Pass Pipeline Configuration 359 //===----------------------------------------------------------------------===// 360 361 namespace { 362 363 /// PPC Code Generator Pass Configuration Options. 364 class PPCPassConfig : public TargetPassConfig { 365 public: 366 PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM) 367 : TargetPassConfig(TM, PM) { 368 // At any optimization level above -O0 we use the Machine Scheduler and not 369 // the default Post RA List Scheduler. 370 if (TM.getOptLevel() != CodeGenOpt::None) 371 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); 372 } 373 374 PPCTargetMachine &getPPCTargetMachine() const { 375 return getTM<PPCTargetMachine>(); 376 } 377 378 void addIRPasses() override; 379 bool addPreISel() override; 380 bool addILPOpts() override; 381 bool addInstSelector() override; 382 void addMachineSSAOptimization() override; 383 void addPreRegAlloc() override; 384 void addPreSched2() override; 385 void addPreEmitPass() override; 386 ScheduleDAGInstrs * 387 createMachineScheduler(MachineSchedContext *C) const override { 388 return createPPCMachineScheduler(C); 389 } 390 ScheduleDAGInstrs * 391 createPostMachineScheduler(MachineSchedContext *C) const override { 392 return createPPCPostMachineScheduler(C); 393 } 394 }; 395 396 } // end anonymous namespace 397 398 TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { 399 return new PPCPassConfig(*this, PM); 400 } 401 402 void PPCPassConfig::addIRPasses() { 403 if (TM->getOptLevel() != CodeGenOpt::None) 404 addPass(createPPCBoolRetToIntPass()); 405 addPass(createAtomicExpandPass()); 406 407 // Lower generic MASSV routines to PowerPC subtarget-specific entries. 408 addPass(createPPCLowerMASSVEntriesPass()); 409 410 // If explicitly requested, add explicit data prefetch intrinsics. 411 if (EnablePrefetch.getNumOccurrences() > 0) 412 addPass(createLoopDataPrefetchPass()); 413 414 if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) { 415 // Call SeparateConstOffsetFromGEP pass to extract constants within indices 416 // and lower a GEP with multiple indices to either arithmetic operations or 417 // multiple GEPs with single index. 418 addPass(createSeparateConstOffsetFromGEPPass(true)); 419 // Call EarlyCSE pass to find and remove subexpressions in the lowered 420 // result. 421 addPass(createEarlyCSEPass()); 422 // Do loop invariant code motion in case part of the lowered result is 423 // invariant. 424 addPass(createLICMPass()); 425 } 426 427 TargetPassConfig::addIRPasses(); 428 } 429 430 bool PPCPassConfig::addPreISel() { 431 if (!DisableInstrFormPrep && getOptLevel() != CodeGenOpt::None) 432 addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine())); 433 434 if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) 435 addPass(createHardwareLoopsPass()); 436 437 return false; 438 } 439 440 bool PPCPassConfig::addILPOpts() { 441 addPass(&EarlyIfConverterID); 442 443 if (EnableMachineCombinerPass) 444 addPass(&MachineCombinerID); 445 446 return true; 447 } 448 449 bool PPCPassConfig::addInstSelector() { 450 // Install an instruction selector. 451 addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel())); 452 453 #ifndef NDEBUG 454 if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None) 455 addPass(createPPCCTRLoopsVerify()); 456 #endif 457 458 addPass(createPPCVSXCopyPass()); 459 return false; 460 } 461 462 void PPCPassConfig::addMachineSSAOptimization() { 463 // PPCBranchCoalescingPass need to be done before machine sinking 464 // since it merges empty blocks. 465 if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None) 466 addPass(createPPCBranchCoalescingPass()); 467 TargetPassConfig::addMachineSSAOptimization(); 468 // For little endian, remove where possible the vector swap instructions 469 // introduced at code generation to normalize vector element order. 470 if (TM->getTargetTriple().getArch() == Triple::ppc64le && 471 !DisableVSXSwapRemoval) 472 addPass(createPPCVSXSwapRemovalPass()); 473 // Reduce the number of cr-logical ops. 474 if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None) 475 addPass(createPPCReduceCRLogicalsPass()); 476 // Target-specific peephole cleanups performed after instruction 477 // selection. 478 if (!DisableMIPeephole) { 479 addPass(createPPCMIPeepholePass()); 480 addPass(&DeadMachineInstructionElimID); 481 } 482 } 483 484 void PPCPassConfig::addPreRegAlloc() { 485 if (getOptLevel() != CodeGenOpt::None) { 486 initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry()); 487 insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID, 488 &PPCVSXFMAMutateID); 489 } 490 491 // FIXME: We probably don't need to run these for -fPIE. 492 if (getPPCTargetMachine().isPositionIndependent()) { 493 // FIXME: LiveVariables should not be necessary here! 494 // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on 495 // LiveVariables. This (unnecessary) dependency has been removed now, 496 // however a stage-2 clang build fails without LiveVariables computed here. 497 addPass(&LiveVariablesID); 498 addPass(createPPCTLSDynamicCallPass()); 499 } 500 if (EnableExtraTOCRegDeps) 501 addPass(createPPCTOCRegDepsPass()); 502 503 if (getOptLevel() != CodeGenOpt::None) 504 addPass(&MachinePipelinerID); 505 } 506 507 void PPCPassConfig::addPreSched2() { 508 if (getOptLevel() != CodeGenOpt::None) 509 addPass(&IfConverterID); 510 } 511 512 void PPCPassConfig::addPreEmitPass() { 513 addPass(createPPCPreEmitPeepholePass()); 514 addPass(createPPCExpandISELPass()); 515 516 if (getOptLevel() != CodeGenOpt::None) 517 addPass(createPPCEarlyReturnPass()); 518 // Must run branch selection immediately preceding the asm printer. 519 addPass(createPPCBranchSelectionPass()); 520 } 521 522 TargetTransformInfo 523 PPCTargetMachine::getTargetTransformInfo(const Function &F) { 524 return TargetTransformInfo(PPCTTIImpl(this, F)); 525 } 526 527 static MachineSchedRegistry 528 PPCPreRASchedRegistry("ppc-prera", 529 "Run PowerPC PreRA specific scheduler", 530 createPPCMachineScheduler); 531 532 static MachineSchedRegistry 533 PPCPostRASchedRegistry("ppc-postra", 534 "Run PowerPC PostRA specific scheduler", 535 createPPCPostMachineScheduler); 536