194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// 294faadacSSnehasish Kumar // 394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information. 594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 694faadacSSnehasish Kumar // 794faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 894faadacSSnehasish Kumar // 994faadacSSnehasish Kumar // \file 1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks. 1194faadacSSnehasish Kumar // 1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This 1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked 1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with 1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold 1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by 1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together 1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that 1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of 2094faadacSSnehasish Kumar // additional jump instructions may be introduced. 2194faadacSSnehasish Kumar // 2294faadacSSnehasish Kumar // For the original RFC of this pass please see 2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ 2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 2594faadacSSnehasish Kumar 2694faadacSSnehasish Kumar #include "llvm/ADT/Statistic.h" 2794faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h" 2894faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h" 2994faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h" 3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h" 3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h" 3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h" 3494faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h" 3594faadacSSnehasish Kumar #include "llvm/IR/Function.h" 3694faadacSSnehasish Kumar #include "llvm/IR/Module.h" 3794faadacSSnehasish Kumar #include "llvm/InitializePasses.h" 3894faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h" 3994faadacSSnehasish Kumar 4094faadacSSnehasish Kumar using namespace llvm; 4194faadacSSnehasish Kumar 42*24bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to 43*24bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms. 44*24bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. 45*24bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. 46*24bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff 47*24bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on 48*24bf6ff4SSnehasish Kumar // Intel CPUs. 4994faadacSSnehasish Kumar static cl::opt<unsigned> 5094faadacSSnehasish Kumar PercentileCutoff("mfs-psi-cutoff", 5194faadacSSnehasish Kumar cl::desc("Percentile profile summary cutoff used to " 5294faadacSSnehasish Kumar "determine cold blocks. Unused if set to zero."), 53*24bf6ff4SSnehasish Kumar cl::init(999950), cl::Hidden); 5494faadacSSnehasish Kumar 5594faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold( 5694faadacSSnehasish Kumar "mfs-count-threshold", 5794faadacSSnehasish Kumar cl::desc( 5894faadacSSnehasish Kumar "Minimum number of times a block must be executed to be retained."), 5994faadacSSnehasish Kumar cl::init(1), cl::Hidden); 6094faadacSSnehasish Kumar 6194faadacSSnehasish Kumar namespace { 6294faadacSSnehasish Kumar 6394faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass { 6494faadacSSnehasish Kumar public: 6594faadacSSnehasish Kumar static char ID; 6694faadacSSnehasish Kumar MachineFunctionSplitter() : MachineFunctionPass(ID) { 6794faadacSSnehasish Kumar initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); 6894faadacSSnehasish Kumar } 6994faadacSSnehasish Kumar 7094faadacSSnehasish Kumar StringRef getPassName() const override { 7194faadacSSnehasish Kumar return "Machine Function Splitter Transformation"; 7294faadacSSnehasish Kumar } 7394faadacSSnehasish Kumar 7494faadacSSnehasish Kumar void getAnalysisUsage(AnalysisUsage &AU) const override; 7594faadacSSnehasish Kumar 7694faadacSSnehasish Kumar bool runOnMachineFunction(MachineFunction &F) override; 7794faadacSSnehasish Kumar }; 7894faadacSSnehasish Kumar } // end anonymous namespace 7994faadacSSnehasish Kumar 8094faadacSSnehasish Kumar static bool isColdBlock(MachineBasicBlock &MBB, 8194faadacSSnehasish Kumar const MachineBlockFrequencyInfo *MBFI, 8294faadacSSnehasish Kumar ProfileSummaryInfo *PSI) { 8394faadacSSnehasish Kumar Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); 8494faadacSSnehasish Kumar if (!Count.hasValue()) 8594faadacSSnehasish Kumar return true; 8694faadacSSnehasish Kumar 8794faadacSSnehasish Kumar if (PercentileCutoff > 0) { 8894faadacSSnehasish Kumar return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); 8994faadacSSnehasish Kumar } 9094faadacSSnehasish Kumar return (*Count < ColdCountThreshold); 9194faadacSSnehasish Kumar } 9294faadacSSnehasish Kumar 9394faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { 9494faadacSSnehasish Kumar // TODO: We only target functions with profile data. Static information may 9594faadacSSnehasish Kumar // also be considered but we don't see performance improvements yet. 9694faadacSSnehasish Kumar if (!MF.getFunction().hasProfileData()) 9794faadacSSnehasish Kumar return false; 9894faadacSSnehasish Kumar 9994faadacSSnehasish Kumar // TODO: We don't split functions where a section attribute has been set 10094faadacSSnehasish Kumar // since the split part may not be placed in a contiguous region. It may also 10194faadacSSnehasish Kumar // be more beneficial to augment the linker to ensure contiguous layout of 10294faadacSSnehasish Kumar // split functions within the same section as specified by the attribute. 10394faadacSSnehasish Kumar if (!MF.getFunction().getSection().empty()) 10494faadacSSnehasish Kumar return false; 10594faadacSSnehasish Kumar 10694faadacSSnehasish Kumar // We don't want to proceed further for cold functions 10794faadacSSnehasish Kumar // or functions of unknown hotness. Lukewarm functions have no prefix. 10894faadacSSnehasish Kumar Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); 10994faadacSSnehasish Kumar if (SectionPrefix.hasValue() && 11094faadacSSnehasish Kumar (SectionPrefix.getValue().equals(".unlikely") || 11194faadacSSnehasish Kumar SectionPrefix.getValue().equals(".unknown"))) { 11294faadacSSnehasish Kumar return false; 11394faadacSSnehasish Kumar } 11494faadacSSnehasish Kumar 11594faadacSSnehasish Kumar // Renumbering blocks here preserves the order of the blocks as 11694faadacSSnehasish Kumar // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort 11794faadacSSnehasish Kumar // blocks. Preserving the order of blocks is essential to retaining decisions 11894faadacSSnehasish Kumar // made by prior passes such as MachineBlockPlacement. 11994faadacSSnehasish Kumar MF.RenumberBlocks(); 12094faadacSSnehasish Kumar MF.setBBSectionsType(BasicBlockSection::Preset); 12194faadacSSnehasish Kumar auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 12294faadacSSnehasish Kumar auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 12394faadacSSnehasish Kumar 12494faadacSSnehasish Kumar for (auto &MBB : MF) { 12594faadacSSnehasish Kumar // FIXME: We retain the entry block and conservatively keep all landing pad 12694faadacSSnehasish Kumar // blocks as part of the original function. Once D73739 is submitted, we can 12794faadacSSnehasish Kumar // improve the handling of ehpads. 12894faadacSSnehasish Kumar if ((MBB.pred_empty() || MBB.isEHPad())) 12994faadacSSnehasish Kumar continue; 13094faadacSSnehasish Kumar if (isColdBlock(MBB, MBFI, PSI)) 13194faadacSSnehasish Kumar MBB.setSectionID(MBBSectionID::ColdSectionID); 13294faadacSSnehasish Kumar } 13394faadacSSnehasish Kumar 13494faadacSSnehasish Kumar auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { 13594faadacSSnehasish Kumar return X.getSectionID().Type < Y.getSectionID().Type; 13694faadacSSnehasish Kumar }; 13794faadacSSnehasish Kumar llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); 13894faadacSSnehasish Kumar 13994faadacSSnehasish Kumar return true; 14094faadacSSnehasish Kumar } 14194faadacSSnehasish Kumar 14294faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { 14394faadacSSnehasish Kumar AU.addRequired<MachineModuleInfoWrapperPass>(); 14494faadacSSnehasish Kumar AU.addRequired<MachineBlockFrequencyInfo>(); 14594faadacSSnehasish Kumar AU.addRequired<ProfileSummaryInfoWrapperPass>(); 14694faadacSSnehasish Kumar } 14794faadacSSnehasish Kumar 14894faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0; 14994faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", 15094faadacSSnehasish Kumar "Split machine functions using profile information", false, 15194faadacSSnehasish Kumar false) 15294faadacSSnehasish Kumar 15394faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { 15494faadacSSnehasish Kumar return new MachineFunctionSplitter(); 15594faadacSSnehasish Kumar } 156