1*94faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// 2*94faadacSSnehasish Kumar // 3*94faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*94faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information. 5*94faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*94faadacSSnehasish Kumar // 7*94faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 8*94faadacSSnehasish Kumar // 9*94faadacSSnehasish Kumar // \file 10*94faadacSSnehasish Kumar // Uses profile information to split out cold blocks. 11*94faadacSSnehasish Kumar // 12*94faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This 13*94faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked 14*94faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with 15*94faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold 16*94faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by 17*94faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together 18*94faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that 19*94faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of 20*94faadacSSnehasish Kumar // additional jump instructions may be introduced. 21*94faadacSSnehasish Kumar // 22*94faadacSSnehasish Kumar // For the original RFC of this pass please see 23*94faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ 24*94faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 25*94faadacSSnehasish Kumar 26*94faadacSSnehasish Kumar #include "llvm/ADT/Statistic.h" 27*94faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h" 28*94faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h" 29*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h" 30*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 31*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h" 32*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h" 33*94faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h" 34*94faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h" 35*94faadacSSnehasish Kumar #include "llvm/IR/Function.h" 36*94faadacSSnehasish Kumar #include "llvm/IR/Module.h" 37*94faadacSSnehasish Kumar #include "llvm/InitializePasses.h" 38*94faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h" 39*94faadacSSnehasish Kumar 40*94faadacSSnehasish Kumar using namespace llvm; 41*94faadacSSnehasish Kumar 42*94faadacSSnehasish Kumar static cl::opt<unsigned> 43*94faadacSSnehasish Kumar PercentileCutoff("mfs-psi-cutoff", 44*94faadacSSnehasish Kumar cl::desc("Percentile profile summary cutoff used to " 45*94faadacSSnehasish Kumar "determine cold blocks. Unused if set to zero."), 46*94faadacSSnehasish Kumar cl::init(0), cl::Hidden); 47*94faadacSSnehasish Kumar 48*94faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold( 49*94faadacSSnehasish Kumar "mfs-count-threshold", 50*94faadacSSnehasish Kumar cl::desc( 51*94faadacSSnehasish Kumar "Minimum number of times a block must be executed to be retained."), 52*94faadacSSnehasish Kumar cl::init(1), cl::Hidden); 53*94faadacSSnehasish Kumar 54*94faadacSSnehasish Kumar namespace { 55*94faadacSSnehasish Kumar 56*94faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass { 57*94faadacSSnehasish Kumar public: 58*94faadacSSnehasish Kumar static char ID; 59*94faadacSSnehasish Kumar MachineFunctionSplitter() : MachineFunctionPass(ID) { 60*94faadacSSnehasish Kumar initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); 61*94faadacSSnehasish Kumar } 62*94faadacSSnehasish Kumar 63*94faadacSSnehasish Kumar StringRef getPassName() const override { 64*94faadacSSnehasish Kumar return "Machine Function Splitter Transformation"; 65*94faadacSSnehasish Kumar } 66*94faadacSSnehasish Kumar 67*94faadacSSnehasish Kumar void getAnalysisUsage(AnalysisUsage &AU) const override; 68*94faadacSSnehasish Kumar 69*94faadacSSnehasish Kumar bool runOnMachineFunction(MachineFunction &F) override; 70*94faadacSSnehasish Kumar }; 71*94faadacSSnehasish Kumar } // end anonymous namespace 72*94faadacSSnehasish Kumar 73*94faadacSSnehasish Kumar static bool isColdBlock(MachineBasicBlock &MBB, 74*94faadacSSnehasish Kumar const MachineBlockFrequencyInfo *MBFI, 75*94faadacSSnehasish Kumar ProfileSummaryInfo *PSI) { 76*94faadacSSnehasish Kumar Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); 77*94faadacSSnehasish Kumar if (!Count.hasValue()) 78*94faadacSSnehasish Kumar return true; 79*94faadacSSnehasish Kumar 80*94faadacSSnehasish Kumar if (PercentileCutoff > 0) { 81*94faadacSSnehasish Kumar return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); 82*94faadacSSnehasish Kumar } 83*94faadacSSnehasish Kumar return (*Count < ColdCountThreshold); 84*94faadacSSnehasish Kumar } 85*94faadacSSnehasish Kumar 86*94faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { 87*94faadacSSnehasish Kumar // TODO: We only target functions with profile data. Static information may 88*94faadacSSnehasish Kumar // also be considered but we don't see performance improvements yet. 89*94faadacSSnehasish Kumar if (!MF.getFunction().hasProfileData()) 90*94faadacSSnehasish Kumar return false; 91*94faadacSSnehasish Kumar 92*94faadacSSnehasish Kumar // TODO: We don't split functions where a section attribute has been set 93*94faadacSSnehasish Kumar // since the split part may not be placed in a contiguous region. It may also 94*94faadacSSnehasish Kumar // be more beneficial to augment the linker to ensure contiguous layout of 95*94faadacSSnehasish Kumar // split functions within the same section as specified by the attribute. 96*94faadacSSnehasish Kumar if (!MF.getFunction().getSection().empty()) 97*94faadacSSnehasish Kumar return false; 98*94faadacSSnehasish Kumar 99*94faadacSSnehasish Kumar // We don't want to proceed further for cold functions 100*94faadacSSnehasish Kumar // or functions of unknown hotness. Lukewarm functions have no prefix. 101*94faadacSSnehasish Kumar Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); 102*94faadacSSnehasish Kumar if (SectionPrefix.hasValue() && 103*94faadacSSnehasish Kumar (SectionPrefix.getValue().equals(".unlikely") || 104*94faadacSSnehasish Kumar SectionPrefix.getValue().equals(".unknown"))) { 105*94faadacSSnehasish Kumar return false; 106*94faadacSSnehasish Kumar } 107*94faadacSSnehasish Kumar 108*94faadacSSnehasish Kumar // Renumbering blocks here preserves the order of the blocks as 109*94faadacSSnehasish Kumar // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort 110*94faadacSSnehasish Kumar // blocks. Preserving the order of blocks is essential to retaining decisions 111*94faadacSSnehasish Kumar // made by prior passes such as MachineBlockPlacement. 112*94faadacSSnehasish Kumar MF.RenumberBlocks(); 113*94faadacSSnehasish Kumar MF.setBBSectionsType(BasicBlockSection::Preset); 114*94faadacSSnehasish Kumar auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 115*94faadacSSnehasish Kumar auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 116*94faadacSSnehasish Kumar 117*94faadacSSnehasish Kumar for (auto &MBB : MF) { 118*94faadacSSnehasish Kumar // FIXME: We retain the entry block and conservatively keep all landing pad 119*94faadacSSnehasish Kumar // blocks as part of the original function. Once D73739 is submitted, we can 120*94faadacSSnehasish Kumar // improve the handling of ehpads. 121*94faadacSSnehasish Kumar if ((MBB.pred_empty() || MBB.isEHPad())) 122*94faadacSSnehasish Kumar continue; 123*94faadacSSnehasish Kumar if (isColdBlock(MBB, MBFI, PSI)) 124*94faadacSSnehasish Kumar MBB.setSectionID(MBBSectionID::ColdSectionID); 125*94faadacSSnehasish Kumar } 126*94faadacSSnehasish Kumar 127*94faadacSSnehasish Kumar auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { 128*94faadacSSnehasish Kumar return X.getSectionID().Type < Y.getSectionID().Type; 129*94faadacSSnehasish Kumar }; 130*94faadacSSnehasish Kumar llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); 131*94faadacSSnehasish Kumar 132*94faadacSSnehasish Kumar return true; 133*94faadacSSnehasish Kumar } 134*94faadacSSnehasish Kumar 135*94faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { 136*94faadacSSnehasish Kumar AU.addRequired<MachineModuleInfoWrapperPass>(); 137*94faadacSSnehasish Kumar AU.addRequired<MachineBlockFrequencyInfo>(); 138*94faadacSSnehasish Kumar AU.addRequired<ProfileSummaryInfoWrapperPass>(); 139*94faadacSSnehasish Kumar } 140*94faadacSSnehasish Kumar 141*94faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0; 142*94faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", 143*94faadacSSnehasish Kumar "Split machine functions using profile information", false, 144*94faadacSSnehasish Kumar false) 145*94faadacSSnehasish Kumar 146*94faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { 147*94faadacSSnehasish Kumar return new MachineFunctionSplitter(); 148*94faadacSSnehasish Kumar } 149