194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// 294faadacSSnehasish Kumar // 394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information. 594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 694faadacSSnehasish Kumar // 794faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 894faadacSSnehasish Kumar // 994faadacSSnehasish Kumar // \file 1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks. 1194faadacSSnehasish Kumar // 1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This 1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked 1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with 1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold 1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by 1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together 1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that 1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of 2094faadacSSnehasish Kumar // additional jump instructions may be introduced. 2194faadacSSnehasish Kumar // 2294faadacSSnehasish Kumar // For the original RFC of this pass please see 2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ 2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 2594faadacSSnehasish Kumar 262c7077e6SSnehasish Kumar #include "llvm/ADT/SmallVector.h" 2794faadacSSnehasish Kumar #include "llvm/ADT/Statistic.h" 2894faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h" 2994faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h" 3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h" 3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h" 3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h" 3494faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h" 3594faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h" 3694faadacSSnehasish Kumar #include "llvm/IR/Function.h" 3794faadacSSnehasish Kumar #include "llvm/IR/Module.h" 3894faadacSSnehasish Kumar #include "llvm/InitializePasses.h" 3994faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h" 4094faadacSSnehasish Kumar 4194faadacSSnehasish Kumar using namespace llvm; 4294faadacSSnehasish Kumar 4324bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to 4424bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms. 4524bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. 4624bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. 4724bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff 4824bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on 4924bf6ff4SSnehasish Kumar // Intel CPUs. 5094faadacSSnehasish Kumar static cl::opt<unsigned> 5194faadacSSnehasish Kumar PercentileCutoff("mfs-psi-cutoff", 5294faadacSSnehasish Kumar cl::desc("Percentile profile summary cutoff used to " 5394faadacSSnehasish Kumar "determine cold blocks. Unused if set to zero."), 5424bf6ff4SSnehasish Kumar cl::init(999950), cl::Hidden); 5594faadacSSnehasish Kumar 5694faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold( 5794faadacSSnehasish Kumar "mfs-count-threshold", 5894faadacSSnehasish Kumar cl::desc( 5994faadacSSnehasish Kumar "Minimum number of times a block must be executed to be retained."), 6094faadacSSnehasish Kumar cl::init(1), cl::Hidden); 6194faadacSSnehasish Kumar 6294faadacSSnehasish Kumar namespace { 6394faadacSSnehasish Kumar 6494faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass { 6594faadacSSnehasish Kumar public: 6694faadacSSnehasish Kumar static char ID; 6794faadacSSnehasish Kumar MachineFunctionSplitter() : MachineFunctionPass(ID) { 6894faadacSSnehasish Kumar initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); 6994faadacSSnehasish Kumar } 7094faadacSSnehasish Kumar 7194faadacSSnehasish Kumar StringRef getPassName() const override { 7294faadacSSnehasish Kumar return "Machine Function Splitter Transformation"; 7394faadacSSnehasish Kumar } 7494faadacSSnehasish Kumar 7594faadacSSnehasish Kumar void getAnalysisUsage(AnalysisUsage &AU) const override; 7694faadacSSnehasish Kumar 7794faadacSSnehasish Kumar bool runOnMachineFunction(MachineFunction &F) override; 7894faadacSSnehasish Kumar }; 7994faadacSSnehasish Kumar } // end anonymous namespace 8094faadacSSnehasish Kumar 812c7077e6SSnehasish Kumar static bool isColdBlock(const MachineBasicBlock &MBB, 8294faadacSSnehasish Kumar const MachineBlockFrequencyInfo *MBFI, 8394faadacSSnehasish Kumar ProfileSummaryInfo *PSI) { 8494faadacSSnehasish Kumar Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); 8594faadacSSnehasish Kumar if (!Count.hasValue()) 8694faadacSSnehasish Kumar return true; 8794faadacSSnehasish Kumar 8894faadacSSnehasish Kumar if (PercentileCutoff > 0) { 8994faadacSSnehasish Kumar return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); 9094faadacSSnehasish Kumar } 9194faadacSSnehasish Kumar return (*Count < ColdCountThreshold); 9294faadacSSnehasish Kumar } 9394faadacSSnehasish Kumar 9494faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { 9594faadacSSnehasish Kumar // TODO: We only target functions with profile data. Static information may 9694faadacSSnehasish Kumar // also be considered but we don't see performance improvements yet. 9794faadacSSnehasish Kumar if (!MF.getFunction().hasProfileData()) 9894faadacSSnehasish Kumar return false; 9994faadacSSnehasish Kumar 10094faadacSSnehasish Kumar // TODO: We don't split functions where a section attribute has been set 10194faadacSSnehasish Kumar // since the split part may not be placed in a contiguous region. It may also 10294faadacSSnehasish Kumar // be more beneficial to augment the linker to ensure contiguous layout of 10394faadacSSnehasish Kumar // split functions within the same section as specified by the attribute. 104*3da0aeeaSSnehasish Kumar if (MF.getFunction().hasSection() || 1058077d0ffSSnehasish Kumar MF.getFunction().hasFnAttribute("implicit-section-name")) 10694faadacSSnehasish Kumar return false; 10794faadacSSnehasish Kumar 10894faadacSSnehasish Kumar // We don't want to proceed further for cold functions 10994faadacSSnehasish Kumar // or functions of unknown hotness. Lukewarm functions have no prefix. 11094faadacSSnehasish Kumar Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); 11194faadacSSnehasish Kumar if (SectionPrefix.hasValue() && 1127af80299SPan, Tao (SectionPrefix.getValue().equals("unlikely") || 1137af80299SPan, Tao SectionPrefix.getValue().equals("unknown"))) { 11494faadacSSnehasish Kumar return false; 11594faadacSSnehasish Kumar } 11694faadacSSnehasish Kumar 11794faadacSSnehasish Kumar // Renumbering blocks here preserves the order of the blocks as 11894faadacSSnehasish Kumar // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort 11994faadacSSnehasish Kumar // blocks. Preserving the order of blocks is essential to retaining decisions 12094faadacSSnehasish Kumar // made by prior passes such as MachineBlockPlacement. 12194faadacSSnehasish Kumar MF.RenumberBlocks(); 12294faadacSSnehasish Kumar MF.setBBSectionsType(BasicBlockSection::Preset); 12394faadacSSnehasish Kumar auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 12494faadacSSnehasish Kumar auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 12594faadacSSnehasish Kumar 1262c7077e6SSnehasish Kumar SmallVector<MachineBasicBlock *, 2> LandingPads; 12794faadacSSnehasish Kumar for (auto &MBB : MF) { 1282c7077e6SSnehasish Kumar if (MBB.isEntryBlock()) 12994faadacSSnehasish Kumar continue; 1302c7077e6SSnehasish Kumar 1312c7077e6SSnehasish Kumar if (MBB.isEHPad()) 1322c7077e6SSnehasish Kumar LandingPads.push_back(&MBB); 1332c7077e6SSnehasish Kumar else if (isColdBlock(MBB, MBFI, PSI)) 13494faadacSSnehasish Kumar MBB.setSectionID(MBBSectionID::ColdSectionID); 13594faadacSSnehasish Kumar } 13694faadacSSnehasish Kumar 1372c7077e6SSnehasish Kumar // We only split out eh pads if all of them are cold. 1382c7077e6SSnehasish Kumar bool HasHotLandingPads = false; 1392c7077e6SSnehasish Kumar for (const MachineBasicBlock *LP : LandingPads) { 1402c7077e6SSnehasish Kumar if (!isColdBlock(*LP, MBFI, PSI)) 1412c7077e6SSnehasish Kumar HasHotLandingPads = true; 1422c7077e6SSnehasish Kumar } 1432c7077e6SSnehasish Kumar if (!HasHotLandingPads) { 1442c7077e6SSnehasish Kumar for (MachineBasicBlock *LP : LandingPads) 1452c7077e6SSnehasish Kumar LP->setSectionID(MBBSectionID::ColdSectionID); 1462c7077e6SSnehasish Kumar } 1472c7077e6SSnehasish Kumar 14894faadacSSnehasish Kumar auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { 14994faadacSSnehasish Kumar return X.getSectionID().Type < Y.getSectionID().Type; 15094faadacSSnehasish Kumar }; 15194faadacSSnehasish Kumar llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); 15294faadacSSnehasish Kumar 15394faadacSSnehasish Kumar return true; 15494faadacSSnehasish Kumar } 15594faadacSSnehasish Kumar 15694faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { 15794faadacSSnehasish Kumar AU.addRequired<MachineModuleInfoWrapperPass>(); 15894faadacSSnehasish Kumar AU.addRequired<MachineBlockFrequencyInfo>(); 15994faadacSSnehasish Kumar AU.addRequired<ProfileSummaryInfoWrapperPass>(); 16094faadacSSnehasish Kumar } 16194faadacSSnehasish Kumar 16294faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0; 16394faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", 16494faadacSSnehasish Kumar "Split machine functions using profile information", false, 16594faadacSSnehasish Kumar false) 16694faadacSSnehasish Kumar 16794faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { 16894faadacSSnehasish Kumar return new MachineFunctionSplitter(); 16994faadacSSnehasish Kumar } 170