1 //===-- AArch64TargetMachine.cpp - Define TargetMachine for AArch64 -------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // 10 //===----------------------------------------------------------------------===// 11 12 #include "AArch64TargetMachine.h" 13 #include "AArch64.h" 14 #include "AArch64MachineFunctionInfo.h" 15 #include "AArch64MacroFusion.h" 16 #include "AArch64Subtarget.h" 17 #include "AArch64TargetObjectFile.h" 18 #include "AArch64TargetTransformInfo.h" 19 #include "MCTargetDesc/AArch64MCTargetDesc.h" 20 #include "TargetInfo/AArch64TargetInfo.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/ADT/Triple.h" 23 #include "llvm/Analysis/TargetTransformInfo.h" 24 #include "llvm/CodeGen/CFIFixup.h" 25 #include "llvm/CodeGen/CSEConfigBase.h" 26 #include "llvm/CodeGen/GlobalISel/CSEInfo.h" 27 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 28 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 29 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 30 #include "llvm/CodeGen/GlobalISel/LoadStoreOpt.h" 31 #include "llvm/CodeGen/GlobalISel/Localizer.h" 32 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" 33 #include "llvm/CodeGen/MIRParser/MIParser.h" 34 #include "llvm/CodeGen/MachineScheduler.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/CodeGen/TargetPassConfig.h" 37 #include "llvm/IR/Attributes.h" 38 #include "llvm/IR/Function.h" 39 #include "llvm/InitializePasses.h" 40 #include "llvm/MC/MCAsmInfo.h" 41 #include "llvm/MC/MCTargetOptions.h" 42 #include "llvm/MC/TargetRegistry.h" 43 #include "llvm/Pass.h" 44 #include "llvm/Support/CodeGen.h" 45 #include "llvm/Support/CommandLine.h" 46 #include "llvm/Target/TargetLoweringObjectFile.h" 47 #include "llvm/Target/TargetOptions.h" 48 #include "llvm/Transforms/CFGuard.h" 49 #include "llvm/Transforms/Scalar.h" 50 #include <memory> 51 #include <string> 52 53 using namespace llvm; 54 55 static cl::opt<bool> EnableCCMP("aarch64-enable-ccmp", 56 cl::desc("Enable the CCMP formation pass"), 57 cl::init(true), cl::Hidden); 58 59 static cl::opt<bool> 60 EnableCondBrTuning("aarch64-enable-cond-br-tune", 61 cl::desc("Enable the conditional branch tuning pass"), 62 cl::init(true), cl::Hidden); 63 64 static cl::opt<bool> EnableMCR("aarch64-enable-mcr", 65 cl::desc("Enable the machine combiner pass"), 66 cl::init(true), cl::Hidden); 67 68 static cl::opt<bool> EnableStPairSuppress("aarch64-enable-stp-suppress", 69 cl::desc("Suppress STP for AArch64"), 70 cl::init(true), cl::Hidden); 71 72 static cl::opt<bool> EnableAdvSIMDScalar( 73 "aarch64-enable-simd-scalar", 74 cl::desc("Enable use of AdvSIMD scalar integer instructions"), 75 cl::init(false), cl::Hidden); 76 77 static cl::opt<bool> 78 EnablePromoteConstant("aarch64-enable-promote-const", 79 cl::desc("Enable the promote constant pass"), 80 cl::init(true), cl::Hidden); 81 82 static cl::opt<bool> EnableCollectLOH( 83 "aarch64-enable-collect-loh", 84 cl::desc("Enable the pass that emits the linker optimization hints (LOH)"), 85 cl::init(true), cl::Hidden); 86 87 static cl::opt<bool> 88 EnableDeadRegisterElimination("aarch64-enable-dead-defs", cl::Hidden, 89 cl::desc("Enable the pass that removes dead" 90 " definitons and replaces stores to" 91 " them with stores to the zero" 92 " register"), 93 cl::init(true)); 94 95 static cl::opt<bool> EnableRedundantCopyElimination( 96 "aarch64-enable-copyelim", 97 cl::desc("Enable the redundant copy elimination pass"), cl::init(true), 98 cl::Hidden); 99 100 static cl::opt<bool> EnableLoadStoreOpt("aarch64-enable-ldst-opt", 101 cl::desc("Enable the load/store pair" 102 " optimization pass"), 103 cl::init(true), cl::Hidden); 104 105 static cl::opt<bool> EnableAtomicTidy( 106 "aarch64-enable-atomic-cfg-tidy", cl::Hidden, 107 cl::desc("Run SimplifyCFG after expanding atomic operations" 108 " to make use of cmpxchg flow-based information"), 109 cl::init(true)); 110 111 static cl::opt<bool> 112 EnableEarlyIfConversion("aarch64-enable-early-ifcvt", cl::Hidden, 113 cl::desc("Run early if-conversion"), 114 cl::init(true)); 115 116 static cl::opt<bool> 117 EnableCondOpt("aarch64-enable-condopt", 118 cl::desc("Enable the condition optimizer pass"), 119 cl::init(true), cl::Hidden); 120 121 static cl::opt<bool> 122 EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, 123 cl::desc("Enable optimizations on complex GEPs"), 124 cl::init(false)); 125 126 static cl::opt<bool> 127 BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), 128 cl::desc("Relax out of range conditional branches")); 129 130 static cl::opt<bool> EnableCompressJumpTables( 131 "aarch64-enable-compress-jump-tables", cl::Hidden, cl::init(true), 132 cl::desc("Use smallest entry possible for jump tables")); 133 134 // FIXME: Unify control over GlobalMerge. 135 static cl::opt<cl::boolOrDefault> 136 EnableGlobalMerge("aarch64-enable-global-merge", cl::Hidden, 137 cl::desc("Enable the global merge pass")); 138 139 static cl::opt<bool> 140 EnableLoopDataPrefetch("aarch64-enable-loop-data-prefetch", cl::Hidden, 141 cl::desc("Enable the loop data prefetch pass"), 142 cl::init(true)); 143 144 static cl::opt<int> EnableGlobalISelAtO( 145 "aarch64-enable-global-isel-at-O", cl::Hidden, 146 cl::desc("Enable GlobalISel at or below an opt level (-1 to disable)"), 147 cl::init(0)); 148 149 static cl::opt<bool> 150 EnableSVEIntrinsicOpts("aarch64-enable-sve-intrinsic-opts", cl::Hidden, 151 cl::desc("Enable SVE intrinsic opts"), 152 cl::init(true)); 153 154 static cl::opt<bool> EnableFalkorHWPFFix("aarch64-enable-falkor-hwpf-fix", 155 cl::init(true), cl::Hidden); 156 157 static cl::opt<bool> 158 EnableBranchTargets("aarch64-enable-branch-targets", cl::Hidden, 159 cl::desc("Enable the AArch64 branch target pass"), 160 cl::init(true)); 161 162 static cl::opt<unsigned> SVEVectorBitsMaxOpt( 163 "aarch64-sve-vector-bits-max", 164 cl::desc("Assume SVE vector registers are at most this big, " 165 "with zero meaning no maximum size is assumed."), 166 cl::init(0), cl::Hidden); 167 168 static cl::opt<unsigned> SVEVectorBitsMinOpt( 169 "aarch64-sve-vector-bits-min", 170 cl::desc("Assume SVE vector registers are at least this big, " 171 "with zero meaning no minimum size is assumed."), 172 cl::init(0), cl::Hidden); 173 174 extern cl::opt<bool> EnableHomogeneousPrologEpilog; 175 176 static cl::opt<bool> EnableGISelLoadStoreOptPreLegal( 177 "aarch64-enable-gisel-ldst-prelegal", 178 cl::desc("Enable GlobalISel's pre-legalizer load/store optimization pass"), 179 cl::init(true), cl::Hidden); 180 181 static cl::opt<bool> EnableGISelLoadStoreOptPostLegal( 182 "aarch64-enable-gisel-ldst-postlegal", 183 cl::desc("Enable GlobalISel's post-legalizer load/store optimization pass"), 184 cl::init(false), cl::Hidden); 185 186 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAArch64Target() { 187 // Register the target. 188 RegisterTargetMachine<AArch64leTargetMachine> X(getTheAArch64leTarget()); 189 RegisterTargetMachine<AArch64beTargetMachine> Y(getTheAArch64beTarget()); 190 RegisterTargetMachine<AArch64leTargetMachine> Z(getTheARM64Target()); 191 RegisterTargetMachine<AArch64leTargetMachine> W(getTheARM64_32Target()); 192 RegisterTargetMachine<AArch64leTargetMachine> V(getTheAArch64_32Target()); 193 auto PR = PassRegistry::getPassRegistry(); 194 initializeGlobalISel(*PR); 195 initializeAArch64A53Fix835769Pass(*PR); 196 initializeAArch64A57FPLoadBalancingPass(*PR); 197 initializeAArch64AdvSIMDScalarPass(*PR); 198 initializeAArch64BranchTargetsPass(*PR); 199 initializeAArch64CollectLOHPass(*PR); 200 initializeAArch64CompressJumpTablesPass(*PR); 201 initializeAArch64ConditionalComparesPass(*PR); 202 initializeAArch64ConditionOptimizerPass(*PR); 203 initializeAArch64DeadRegisterDefinitionsPass(*PR); 204 initializeAArch64ExpandPseudoPass(*PR); 205 initializeAArch64LoadStoreOptPass(*PR); 206 initializeAArch64MIPeepholeOptPass(*PR); 207 initializeAArch64SIMDInstrOptPass(*PR); 208 initializeAArch64O0PreLegalizerCombinerPass(*PR); 209 initializeAArch64PreLegalizerCombinerPass(*PR); 210 initializeAArch64PostLegalizerCombinerPass(*PR); 211 initializeAArch64PostLegalizerLoweringPass(*PR); 212 initializeAArch64PostSelectOptimizePass(*PR); 213 initializeAArch64PromoteConstantPass(*PR); 214 initializeAArch64RedundantCopyEliminationPass(*PR); 215 initializeAArch64StorePairSuppressPass(*PR); 216 initializeFalkorHWPFFixPass(*PR); 217 initializeFalkorMarkStridedAccessesLegacyPass(*PR); 218 initializeLDTLSCleanupPass(*PR); 219 initializeSVEIntrinsicOptsPass(*PR); 220 initializeAArch64SpeculationHardeningPass(*PR); 221 initializeAArch64SLSHardeningPass(*PR); 222 initializeAArch64StackTaggingPass(*PR); 223 initializeAArch64StackTaggingPreRAPass(*PR); 224 initializeAArch64LowerHomogeneousPrologEpilogPass(*PR); 225 } 226 227 //===----------------------------------------------------------------------===// 228 // AArch64 Lowering public interface. 229 //===----------------------------------------------------------------------===// 230 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 231 if (TT.isOSBinFormatMachO()) 232 return std::make_unique<AArch64_MachoTargetObjectFile>(); 233 if (TT.isOSBinFormatCOFF()) 234 return std::make_unique<AArch64_COFFTargetObjectFile>(); 235 236 return std::make_unique<AArch64_ELFTargetObjectFile>(); 237 } 238 239 // Helper function to build a DataLayout string 240 static std::string computeDataLayout(const Triple &TT, 241 const MCTargetOptions &Options, 242 bool LittleEndian) { 243 if (TT.isOSBinFormatMachO()) { 244 if (TT.getArch() == Triple::aarch64_32) 245 return "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128"; 246 return "e-m:o-i64:64-i128:128-n32:64-S128"; 247 } 248 if (TT.isOSBinFormatCOFF()) 249 return "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128"; 250 std::string Endian = LittleEndian ? "e" : "E"; 251 std::string Ptr32 = TT.getEnvironment() == Triple::GNUILP32 ? "-p:32:32" : ""; 252 return Endian + "-m:e" + Ptr32 + 253 "-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"; 254 } 255 256 static StringRef computeDefaultCPU(const Triple &TT, StringRef CPU) { 257 if (CPU.empty() && TT.isArm64e()) 258 return "apple-a12"; 259 return CPU; 260 } 261 262 static Reloc::Model getEffectiveRelocModel(const Triple &TT, 263 Optional<Reloc::Model> RM) { 264 // AArch64 Darwin and Windows are always PIC. 265 if (TT.isOSDarwin() || TT.isOSWindows()) 266 return Reloc::PIC_; 267 // On ELF platforms the default static relocation model has a smart enough 268 // linker to cope with referencing external symbols defined in a shared 269 // library. Hence DynamicNoPIC doesn't need to be promoted to PIC. 270 if (!RM.hasValue() || *RM == Reloc::DynamicNoPIC) 271 return Reloc::Static; 272 return *RM; 273 } 274 275 static CodeModel::Model 276 getEffectiveAArch64CodeModel(const Triple &TT, Optional<CodeModel::Model> CM, 277 bool JIT) { 278 if (CM) { 279 if (*CM != CodeModel::Small && *CM != CodeModel::Tiny && 280 *CM != CodeModel::Large) { 281 report_fatal_error( 282 "Only small, tiny and large code models are allowed on AArch64"); 283 } else if (*CM == CodeModel::Tiny && !TT.isOSBinFormatELF()) 284 report_fatal_error("tiny code model is only supported on ELF"); 285 return *CM; 286 } 287 // The default MCJIT memory managers make no guarantees about where they can 288 // find an executable page; JITed code needs to be able to refer to globals 289 // no matter how far away they are. 290 // We should set the CodeModel::Small for Windows ARM64 in JIT mode, 291 // since with large code model LLVM generating 4 MOV instructions, and 292 // Windows doesn't support relocating these long branch (4 MOVs). 293 if (JIT && !TT.isOSWindows()) 294 return CodeModel::Large; 295 return CodeModel::Small; 296 } 297 298 /// Create an AArch64 architecture model. 299 /// 300 AArch64TargetMachine::AArch64TargetMachine(const Target &T, const Triple &TT, 301 StringRef CPU, StringRef FS, 302 const TargetOptions &Options, 303 Optional<Reloc::Model> RM, 304 Optional<CodeModel::Model> CM, 305 CodeGenOpt::Level OL, bool JIT, 306 bool LittleEndian) 307 : LLVMTargetMachine(T, 308 computeDataLayout(TT, Options.MCOptions, LittleEndian), 309 TT, computeDefaultCPU(TT, CPU), FS, Options, 310 getEffectiveRelocModel(TT, RM), 311 getEffectiveAArch64CodeModel(TT, CM, JIT), OL), 312 TLOF(createTLOF(getTargetTriple())), isLittle(LittleEndian) { 313 initAsmInfo(); 314 315 if (TT.isOSBinFormatMachO()) { 316 this->Options.TrapUnreachable = true; 317 this->Options.NoTrapAfterNoreturn = true; 318 } 319 320 if (getMCAsmInfo()->usesWindowsCFI()) { 321 // Unwinding can get confused if the last instruction in an 322 // exception-handling region (function, funclet, try block, etc.) 323 // is a call. 324 // 325 // FIXME: We could elide the trap if the next instruction would be in 326 // the same region anyway. 327 this->Options.TrapUnreachable = true; 328 } 329 330 if (this->Options.TLSSize == 0) // default 331 this->Options.TLSSize = 24; 332 if ((getCodeModel() == CodeModel::Small || 333 getCodeModel() == CodeModel::Kernel) && 334 this->Options.TLSSize > 32) 335 // for the small (and kernel) code model, the maximum TLS size is 4GiB 336 this->Options.TLSSize = 32; 337 else if (getCodeModel() == CodeModel::Tiny && this->Options.TLSSize > 24) 338 // for the tiny code model, the maximum TLS size is 1MiB (< 16MiB) 339 this->Options.TLSSize = 24; 340 341 // Enable GlobalISel at or below EnableGlobalISelAt0, unless this is 342 // MachO/CodeModel::Large, which GlobalISel does not support. 343 if (getOptLevel() <= EnableGlobalISelAtO && 344 TT.getArch() != Triple::aarch64_32 && 345 TT.getEnvironment() != Triple::GNUILP32 && 346 !(getCodeModel() == CodeModel::Large && TT.isOSBinFormatMachO())) { 347 setGlobalISel(true); 348 setGlobalISelAbort(GlobalISelAbortMode::Disable); 349 } 350 351 // AArch64 supports the MachineOutliner. 352 setMachineOutliner(true); 353 354 // AArch64 supports default outlining behaviour. 355 setSupportsDefaultOutlining(true); 356 357 // AArch64 supports the debug entry values. 358 setSupportsDebugEntryValues(true); 359 360 // AArch64 supports fixing up the DWARF unwind information. 361 if (!getMCAsmInfo()->usesWindowsCFI()) 362 setCFIFixup(true); 363 } 364 365 AArch64TargetMachine::~AArch64TargetMachine() = default; 366 367 const AArch64Subtarget * 368 AArch64TargetMachine::getSubtargetImpl(const Function &F) const { 369 Attribute CPUAttr = F.getFnAttribute("target-cpu"); 370 Attribute TuneAttr = F.getFnAttribute("tune-cpu"); 371 Attribute FSAttr = F.getFnAttribute("target-features"); 372 373 std::string CPU = 374 CPUAttr.isValid() ? CPUAttr.getValueAsString().str() : TargetCPU; 375 std::string TuneCPU = 376 TuneAttr.isValid() ? TuneAttr.getValueAsString().str() : CPU; 377 std::string FS = 378 FSAttr.isValid() ? FSAttr.getValueAsString().str() : TargetFS; 379 380 SmallString<512> Key; 381 382 unsigned MinSVEVectorSize = 0; 383 unsigned MaxSVEVectorSize = 0; 384 Attribute VScaleRangeAttr = F.getFnAttribute(Attribute::VScaleRange); 385 if (VScaleRangeAttr.isValid()) { 386 Optional<unsigned> VScaleMax = VScaleRangeAttr.getVScaleRangeMax(); 387 MinSVEVectorSize = VScaleRangeAttr.getVScaleRangeMin() * 128; 388 MaxSVEVectorSize = VScaleMax ? VScaleMax.getValue() * 128 : 0; 389 } else { 390 MinSVEVectorSize = SVEVectorBitsMinOpt; 391 MaxSVEVectorSize = SVEVectorBitsMaxOpt; 392 } 393 394 assert(MinSVEVectorSize % 128 == 0 && 395 "SVE requires vector length in multiples of 128!"); 396 assert(MaxSVEVectorSize % 128 == 0 && 397 "SVE requires vector length in multiples of 128!"); 398 assert((MaxSVEVectorSize >= MinSVEVectorSize || MaxSVEVectorSize == 0) && 399 "Minimum SVE vector size should not be larger than its maximum!"); 400 401 // Sanitize user input in case of no asserts 402 if (MaxSVEVectorSize == 0) 403 MinSVEVectorSize = (MinSVEVectorSize / 128) * 128; 404 else { 405 MinSVEVectorSize = 406 (std::min(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128; 407 MaxSVEVectorSize = 408 (std::max(MinSVEVectorSize, MaxSVEVectorSize) / 128) * 128; 409 } 410 411 Key += "SVEMin"; 412 Key += std::to_string(MinSVEVectorSize); 413 Key += "SVEMax"; 414 Key += std::to_string(MaxSVEVectorSize); 415 Key += CPU; 416 Key += TuneCPU; 417 Key += FS; 418 419 auto &I = SubtargetMap[Key]; 420 if (!I) { 421 // This needs to be done before we create a new subtarget since any 422 // creation will depend on the TM and the code generation flags on the 423 // function that reside in TargetOptions. 424 resetTargetOptions(F); 425 I = std::make_unique<AArch64Subtarget>(TargetTriple, CPU, TuneCPU, FS, 426 *this, isLittle, MinSVEVectorSize, 427 MaxSVEVectorSize); 428 } 429 return I.get(); 430 } 431 432 void AArch64leTargetMachine::anchor() { } 433 434 AArch64leTargetMachine::AArch64leTargetMachine( 435 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 436 const TargetOptions &Options, Optional<Reloc::Model> RM, 437 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 438 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, true) {} 439 440 void AArch64beTargetMachine::anchor() { } 441 442 AArch64beTargetMachine::AArch64beTargetMachine( 443 const Target &T, const Triple &TT, StringRef CPU, StringRef FS, 444 const TargetOptions &Options, Optional<Reloc::Model> RM, 445 Optional<CodeModel::Model> CM, CodeGenOpt::Level OL, bool JIT) 446 : AArch64TargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, JIT, false) {} 447 448 namespace { 449 450 /// AArch64 Code Generator Pass Configuration Options. 451 class AArch64PassConfig : public TargetPassConfig { 452 public: 453 AArch64PassConfig(AArch64TargetMachine &TM, PassManagerBase &PM) 454 : TargetPassConfig(TM, PM) { 455 if (TM.getOptLevel() != CodeGenOpt::None) 456 substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); 457 } 458 459 AArch64TargetMachine &getAArch64TargetMachine() const { 460 return getTM<AArch64TargetMachine>(); 461 } 462 463 ScheduleDAGInstrs * 464 createMachineScheduler(MachineSchedContext *C) const override { 465 const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); 466 ScheduleDAGMILive *DAG = createGenericSchedLive(C); 467 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 468 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 469 if (ST.hasFusion()) 470 DAG->addMutation(createAArch64MacroFusionDAGMutation()); 471 return DAG; 472 } 473 474 ScheduleDAGInstrs * 475 createPostMachineScheduler(MachineSchedContext *C) const override { 476 const AArch64Subtarget &ST = C->MF->getSubtarget<AArch64Subtarget>(); 477 if (ST.hasFusion()) { 478 // Run the Macro Fusion after RA again since literals are expanded from 479 // pseudos then (v. addPreSched2()). 480 ScheduleDAGMI *DAG = createGenericSchedPostRA(C); 481 DAG->addMutation(createAArch64MacroFusionDAGMutation()); 482 return DAG; 483 } 484 485 return nullptr; 486 } 487 488 void addIRPasses() override; 489 bool addPreISel() override; 490 void addCodeGenPrepare() override; 491 bool addInstSelector() override; 492 bool addIRTranslator() override; 493 void addPreLegalizeMachineIR() override; 494 bool addLegalizeMachineIR() override; 495 void addPreRegBankSelect() override; 496 bool addRegBankSelect() override; 497 void addPreGlobalInstructionSelect() override; 498 bool addGlobalInstructionSelect() override; 499 void addMachineSSAOptimization() override; 500 bool addILPOpts() override; 501 void addPreRegAlloc() override; 502 void addPostRegAlloc() override; 503 void addPreSched2() override; 504 void addPreEmitPass() override; 505 void addPreEmitPass2() override; 506 507 std::unique_ptr<CSEConfigBase> getCSEConfig() const override; 508 }; 509 510 } // end anonymous namespace 511 512 TargetTransformInfo 513 AArch64TargetMachine::getTargetTransformInfo(const Function &F) const { 514 return TargetTransformInfo(AArch64TTIImpl(this, F)); 515 } 516 517 TargetPassConfig *AArch64TargetMachine::createPassConfig(PassManagerBase &PM) { 518 return new AArch64PassConfig(*this, PM); 519 } 520 521 std::unique_ptr<CSEConfigBase> AArch64PassConfig::getCSEConfig() const { 522 return getStandardCSEConfigForOpt(TM->getOptLevel()); 523 } 524 525 void AArch64PassConfig::addIRPasses() { 526 // Always expand atomic operations, we don't deal with atomicrmw or cmpxchg 527 // ourselves. 528 addPass(createAtomicExpandPass()); 529 530 // Expand any SVE vector library calls that we can't code generate directly. 531 if (EnableSVEIntrinsicOpts && TM->getOptLevel() == CodeGenOpt::Aggressive) 532 addPass(createSVEIntrinsicOptsPass()); 533 534 // Cmpxchg instructions are often used with a subsequent comparison to 535 // determine whether it succeeded. We can exploit existing control-flow in 536 // ldrex/strex loops to simplify this, but it needs tidying up. 537 if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy) 538 addPass(createCFGSimplificationPass(SimplifyCFGOptions() 539 .forwardSwitchCondToPhi(true) 540 .convertSwitchRangeToICmp(true) 541 .convertSwitchToLookupTable(true) 542 .needCanonicalLoops(false) 543 .hoistCommonInsts(true) 544 .sinkCommonInsts(true))); 545 546 // Run LoopDataPrefetch 547 // 548 // Run this before LSR to remove the multiplies involved in computing the 549 // pointer values N iterations ahead. 550 if (TM->getOptLevel() != CodeGenOpt::None) { 551 if (EnableLoopDataPrefetch) 552 addPass(createLoopDataPrefetchPass()); 553 if (EnableFalkorHWPFFix) 554 addPass(createFalkorMarkStridedAccessesPass()); 555 } 556 557 TargetPassConfig::addIRPasses(); 558 559 addPass(createAArch64StackTaggingPass( 560 /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); 561 562 // Match interleaved memory accesses to ldN/stN intrinsics. 563 if (TM->getOptLevel() != CodeGenOpt::None) { 564 addPass(createInterleavedLoadCombinePass()); 565 addPass(createInterleavedAccessPass()); 566 } 567 568 if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { 569 // Call SeparateConstOffsetFromGEP pass to extract constants within indices 570 // and lower a GEP with multiple indices to either arithmetic operations or 571 // multiple GEPs with single index. 572 addPass(createSeparateConstOffsetFromGEPPass(true)); 573 // Call EarlyCSE pass to find and remove subexpressions in the lowered 574 // result. 575 addPass(createEarlyCSEPass()); 576 // Do loop invariant code motion in case part of the lowered result is 577 // invariant. 578 addPass(createLICMPass()); 579 } 580 581 // Add Control Flow Guard checks. 582 if (TM->getTargetTriple().isOSWindows()) 583 addPass(createCFGuardCheckPass()); 584 585 if (TM->Options.JMCInstrument) 586 addPass(createJMCInstrumenterPass()); 587 } 588 589 // Pass Pipeline Configuration 590 bool AArch64PassConfig::addPreISel() { 591 // Run promote constant before global merge, so that the promoted constants 592 // get a chance to be merged 593 if (TM->getOptLevel() != CodeGenOpt::None && EnablePromoteConstant) 594 addPass(createAArch64PromoteConstantPass()); 595 // FIXME: On AArch64, this depends on the type. 596 // Basically, the addressable offsets are up to 4095 * Ty.getSizeInBytes(). 597 // and the offset has to be a multiple of the related size in bytes. 598 if ((TM->getOptLevel() != CodeGenOpt::None && 599 EnableGlobalMerge == cl::BOU_UNSET) || 600 EnableGlobalMerge == cl::BOU_TRUE) { 601 bool OnlyOptimizeForSize = (TM->getOptLevel() < CodeGenOpt::Aggressive) && 602 (EnableGlobalMerge == cl::BOU_UNSET); 603 604 // Merging of extern globals is enabled by default on non-Mach-O as we 605 // expect it to be generally either beneficial or harmless. On Mach-O it 606 // is disabled as we emit the .subsections_via_symbols directive which 607 // means that merging extern globals is not safe. 608 bool MergeExternalByDefault = !TM->getTargetTriple().isOSBinFormatMachO(); 609 610 // FIXME: extern global merging is only enabled when we optimise for size 611 // because there are some regressions with it also enabled for performance. 612 if (!OnlyOptimizeForSize) 613 MergeExternalByDefault = false; 614 615 addPass(createGlobalMergePass(TM, 4095, OnlyOptimizeForSize, 616 MergeExternalByDefault)); 617 } 618 619 return false; 620 } 621 622 void AArch64PassConfig::addCodeGenPrepare() { 623 if (getOptLevel() != CodeGenOpt::None) 624 addPass(createTypePromotionPass()); 625 TargetPassConfig::addCodeGenPrepare(); 626 } 627 628 bool AArch64PassConfig::addInstSelector() { 629 addPass(createAArch64ISelDag(getAArch64TargetMachine(), getOptLevel())); 630 631 // For ELF, cleanup any local-dynamic TLS accesses (i.e. combine as many 632 // references to _TLS_MODULE_BASE_ as possible. 633 if (TM->getTargetTriple().isOSBinFormatELF() && 634 getOptLevel() != CodeGenOpt::None) 635 addPass(createAArch64CleanupLocalDynamicTLSPass()); 636 637 return false; 638 } 639 640 bool AArch64PassConfig::addIRTranslator() { 641 addPass(new IRTranslator(getOptLevel())); 642 return false; 643 } 644 645 void AArch64PassConfig::addPreLegalizeMachineIR() { 646 if (getOptLevel() == CodeGenOpt::None) 647 addPass(createAArch64O0PreLegalizerCombiner()); 648 else { 649 addPass(createAArch64PreLegalizerCombiner()); 650 if (EnableGISelLoadStoreOptPreLegal) 651 addPass(new LoadStoreOpt()); 652 } 653 } 654 655 bool AArch64PassConfig::addLegalizeMachineIR() { 656 addPass(new Legalizer()); 657 return false; 658 } 659 660 void AArch64PassConfig::addPreRegBankSelect() { 661 bool IsOptNone = getOptLevel() == CodeGenOpt::None; 662 if (!IsOptNone) { 663 addPass(createAArch64PostLegalizerCombiner(IsOptNone)); 664 if (EnableGISelLoadStoreOptPostLegal) 665 addPass(new LoadStoreOpt()); 666 } 667 addPass(createAArch64PostLegalizerLowering()); 668 } 669 670 bool AArch64PassConfig::addRegBankSelect() { 671 addPass(new RegBankSelect()); 672 return false; 673 } 674 675 void AArch64PassConfig::addPreGlobalInstructionSelect() { 676 addPass(new Localizer()); 677 } 678 679 bool AArch64PassConfig::addGlobalInstructionSelect() { 680 addPass(new InstructionSelect(getOptLevel())); 681 if (getOptLevel() != CodeGenOpt::None) 682 addPass(createAArch64PostSelectOptimize()); 683 return false; 684 } 685 686 void AArch64PassConfig::addMachineSSAOptimization() { 687 // Run default MachineSSAOptimization first. 688 TargetPassConfig::addMachineSSAOptimization(); 689 690 if (TM->getOptLevel() != CodeGenOpt::None) 691 addPass(createAArch64MIPeepholeOptPass()); 692 } 693 694 bool AArch64PassConfig::addILPOpts() { 695 if (EnableCondOpt) 696 addPass(createAArch64ConditionOptimizerPass()); 697 if (EnableCCMP) 698 addPass(createAArch64ConditionalCompares()); 699 if (EnableMCR) 700 addPass(&MachineCombinerID); 701 if (EnableCondBrTuning) 702 addPass(createAArch64CondBrTuning()); 703 if (EnableEarlyIfConversion) 704 addPass(&EarlyIfConverterID); 705 if (EnableStPairSuppress) 706 addPass(createAArch64StorePairSuppressPass()); 707 addPass(createAArch64SIMDInstrOptPass()); 708 if (TM->getOptLevel() != CodeGenOpt::None) 709 addPass(createAArch64StackTaggingPreRAPass()); 710 return true; 711 } 712 713 void AArch64PassConfig::addPreRegAlloc() { 714 // Change dead register definitions to refer to the zero register. 715 if (TM->getOptLevel() != CodeGenOpt::None && EnableDeadRegisterElimination) 716 addPass(createAArch64DeadRegisterDefinitions()); 717 718 // Use AdvSIMD scalar instructions whenever profitable. 719 if (TM->getOptLevel() != CodeGenOpt::None && EnableAdvSIMDScalar) { 720 addPass(createAArch64AdvSIMDScalar()); 721 // The AdvSIMD pass may produce copies that can be rewritten to 722 // be register coalescer friendly. 723 addPass(&PeepholeOptimizerID); 724 } 725 } 726 727 void AArch64PassConfig::addPostRegAlloc() { 728 // Remove redundant copy instructions. 729 if (TM->getOptLevel() != CodeGenOpt::None && EnableRedundantCopyElimination) 730 addPass(createAArch64RedundantCopyEliminationPass()); 731 732 if (TM->getOptLevel() != CodeGenOpt::None && usingDefaultRegAlloc()) 733 // Improve performance for some FP/SIMD code for A57. 734 addPass(createAArch64A57FPLoadBalancing()); 735 } 736 737 void AArch64PassConfig::addPreSched2() { 738 // Lower homogeneous frame instructions 739 if (EnableHomogeneousPrologEpilog) 740 addPass(createAArch64LowerHomogeneousPrologEpilogPass()); 741 // Expand some pseudo instructions to allow proper scheduling. 742 addPass(createAArch64ExpandPseudoPass()); 743 // Use load/store pair instructions when possible. 744 if (TM->getOptLevel() != CodeGenOpt::None) { 745 if (EnableLoadStoreOpt) 746 addPass(createAArch64LoadStoreOptimizationPass()); 747 } 748 749 // The AArch64SpeculationHardeningPass destroys dominator tree and natural 750 // loop info, which is needed for the FalkorHWPFFixPass and also later on. 751 // Therefore, run the AArch64SpeculationHardeningPass before the 752 // FalkorHWPFFixPass to avoid recomputing dominator tree and natural loop 753 // info. 754 addPass(createAArch64SpeculationHardeningPass()); 755 756 addPass(createAArch64IndirectThunks()); 757 addPass(createAArch64SLSHardeningPass()); 758 759 if (TM->getOptLevel() != CodeGenOpt::None) { 760 if (EnableFalkorHWPFFix) 761 addPass(createFalkorHWPFFixPass()); 762 } 763 } 764 765 void AArch64PassConfig::addPreEmitPass() { 766 // Machine Block Placement might have created new opportunities when run 767 // at O3, where the Tail Duplication Threshold is set to 4 instructions. 768 // Run the load/store optimizer once more. 769 if (TM->getOptLevel() >= CodeGenOpt::Aggressive && EnableLoadStoreOpt) 770 addPass(createAArch64LoadStoreOptimizationPass()); 771 772 addPass(createAArch64A53Fix835769()); 773 774 if (EnableBranchTargets) 775 addPass(createAArch64BranchTargetsPass()); 776 777 // Relax conditional branch instructions if they're otherwise out of 778 // range of their destination. 779 if (BranchRelaxation) 780 addPass(&BranchRelaxationPassID); 781 782 if (TM->getTargetTriple().isOSWindows()) { 783 // Identify valid longjmp targets for Windows Control Flow Guard. 784 addPass(createCFGuardLongjmpPass()); 785 // Identify valid eh continuation targets for Windows EHCont Guard. 786 addPass(createEHContGuardCatchretPass()); 787 } 788 789 if (TM->getOptLevel() != CodeGenOpt::None && EnableCompressJumpTables) 790 addPass(createAArch64CompressJumpTablesPass()); 791 792 if (TM->getOptLevel() != CodeGenOpt::None && EnableCollectLOH && 793 TM->getTargetTriple().isOSBinFormatMachO()) 794 addPass(createAArch64CollectLOHPass()); 795 } 796 797 void AArch64PassConfig::addPreEmitPass2() { 798 // SVE bundles move prefixes with destructive operations. BLR_RVMARKER pseudo 799 // instructions are lowered to bundles as well. 800 addPass(createUnpackMachineBundles(nullptr)); 801 } 802 803 yaml::MachineFunctionInfo * 804 AArch64TargetMachine::createDefaultFuncInfoYAML() const { 805 return new yaml::AArch64FunctionInfo(); 806 } 807 808 yaml::MachineFunctionInfo * 809 AArch64TargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { 810 const auto *MFI = MF.getInfo<AArch64FunctionInfo>(); 811 return new yaml::AArch64FunctionInfo(*MFI); 812 } 813 814 bool AArch64TargetMachine::parseMachineFunctionInfo( 815 const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, 816 SMDiagnostic &Error, SMRange &SourceRange) const { 817 const auto &YamlMFI = static_cast<const yaml::AArch64FunctionInfo &>(MFI); 818 MachineFunction &MF = PFS.MF; 819 MF.getInfo<AArch64FunctionInfo>()->initializeBaseYamlFields(YamlMFI); 820 return false; 821 } 822