194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===// 294faadacSSnehasish Kumar // 394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information. 594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 694faadacSSnehasish Kumar // 794faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 894faadacSSnehasish Kumar // 994faadacSSnehasish Kumar // \file 1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks. 1194faadacSSnehasish Kumar // 1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This 1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked 1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with 1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold 1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by 1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together 1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that 1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of 2094faadacSSnehasish Kumar // additional jump instructions may be introduced. 2194faadacSSnehasish Kumar // 2294faadacSSnehasish Kumar // For the original RFC of this pass please see 2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ 2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===// 2594faadacSSnehasish Kumar 262c7077e6SSnehasish Kumar #include "llvm/ADT/SmallVector.h" 2794faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h" 2894faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h" 2994faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h" 3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h" 3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h" 3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h" 3494faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h" 3594faadacSSnehasish Kumar #include "llvm/IR/Function.h" 3694faadacSSnehasish Kumar #include "llvm/InitializePasses.h" 3794faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h" 3894faadacSSnehasish Kumar 3994faadacSSnehasish Kumar using namespace llvm; 4094faadacSSnehasish Kumar 4124bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to 4224bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms. 4324bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff. 4424bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split. 4524bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff 4624bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on 4724bf6ff4SSnehasish Kumar // Intel CPUs. 4894faadacSSnehasish Kumar static cl::opt<unsigned> 4994faadacSSnehasish Kumar PercentileCutoff("mfs-psi-cutoff", 5094faadacSSnehasish Kumar cl::desc("Percentile profile summary cutoff used to " 5194faadacSSnehasish Kumar "determine cold blocks. Unused if set to zero."), 5224bf6ff4SSnehasish Kumar cl::init(999950), cl::Hidden); 5394faadacSSnehasish Kumar 5494faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold( 5594faadacSSnehasish Kumar "mfs-count-threshold", 5694faadacSSnehasish Kumar cl::desc( 5794faadacSSnehasish Kumar "Minimum number of times a block must be executed to be retained."), 5894faadacSSnehasish Kumar cl::init(1), cl::Hidden); 5994faadacSSnehasish Kumar 6094faadacSSnehasish Kumar namespace { 6194faadacSSnehasish Kumar 6294faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass { 6394faadacSSnehasish Kumar public: 6494faadacSSnehasish Kumar static char ID; 6594faadacSSnehasish Kumar MachineFunctionSplitter() : MachineFunctionPass(ID) { 6694faadacSSnehasish Kumar initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry()); 6794faadacSSnehasish Kumar } 6894faadacSSnehasish Kumar 6994faadacSSnehasish Kumar StringRef getPassName() const override { 7094faadacSSnehasish Kumar return "Machine Function Splitter Transformation"; 7194faadacSSnehasish Kumar } 7294faadacSSnehasish Kumar 7394faadacSSnehasish Kumar void getAnalysisUsage(AnalysisUsage &AU) const override; 7494faadacSSnehasish Kumar 7594faadacSSnehasish Kumar bool runOnMachineFunction(MachineFunction &F) override; 7694faadacSSnehasish Kumar }; 7794faadacSSnehasish Kumar } // end anonymous namespace 7894faadacSSnehasish Kumar 792c7077e6SSnehasish Kumar static bool isColdBlock(const MachineBasicBlock &MBB, 8094faadacSSnehasish Kumar const MachineBlockFrequencyInfo *MBFI, 8194faadacSSnehasish Kumar ProfileSummaryInfo *PSI) { 8294faadacSSnehasish Kumar Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB); 83e0e687a6SKazu Hirata if (!Count) 8494faadacSSnehasish Kumar return true; 8594faadacSSnehasish Kumar 8694faadacSSnehasish Kumar if (PercentileCutoff > 0) { 8794faadacSSnehasish Kumar return PSI->isColdCountNthPercentile(PercentileCutoff, *Count); 8894faadacSSnehasish Kumar } 8994faadacSSnehasish Kumar return (*Count < ColdCountThreshold); 9094faadacSSnehasish Kumar } 9194faadacSSnehasish Kumar 9294faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) { 9394faadacSSnehasish Kumar // TODO: We only target functions with profile data. Static information may 9494faadacSSnehasish Kumar // also be considered but we don't see performance improvements yet. 9594faadacSSnehasish Kumar if (!MF.getFunction().hasProfileData()) 9694faadacSSnehasish Kumar return false; 9794faadacSSnehasish Kumar 9894faadacSSnehasish Kumar // TODO: We don't split functions where a section attribute has been set 9994faadacSSnehasish Kumar // since the split part may not be placed in a contiguous region. It may also 10094faadacSSnehasish Kumar // be more beneficial to augment the linker to ensure contiguous layout of 10194faadacSSnehasish Kumar // split functions within the same section as specified by the attribute. 1023da0aeeaSSnehasish Kumar if (MF.getFunction().hasSection() || 1038077d0ffSSnehasish Kumar MF.getFunction().hasFnAttribute("implicit-section-name")) 10494faadacSSnehasish Kumar return false; 10594faadacSSnehasish Kumar 10694faadacSSnehasish Kumar // We don't want to proceed further for cold functions 10794faadacSSnehasish Kumar // or functions of unknown hotness. Lukewarm functions have no prefix. 10894faadacSSnehasish Kumar Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix(); 109*611ffcf4SKazu Hirata if (SectionPrefix && (SectionPrefix.value().equals("unlikely") || 110*611ffcf4SKazu Hirata SectionPrefix.value().equals("unknown"))) { 11194faadacSSnehasish Kumar return false; 11294faadacSSnehasish Kumar } 11394faadacSSnehasish Kumar 11494faadacSSnehasish Kumar // Renumbering blocks here preserves the order of the blocks as 11594faadacSSnehasish Kumar // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort 11694faadacSSnehasish Kumar // blocks. Preserving the order of blocks is essential to retaining decisions 11794faadacSSnehasish Kumar // made by prior passes such as MachineBlockPlacement. 11894faadacSSnehasish Kumar MF.RenumberBlocks(); 11994faadacSSnehasish Kumar MF.setBBSectionsType(BasicBlockSection::Preset); 12094faadacSSnehasish Kumar auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 12194faadacSSnehasish Kumar auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 12294faadacSSnehasish Kumar 1232c7077e6SSnehasish Kumar SmallVector<MachineBasicBlock *, 2> LandingPads; 12494faadacSSnehasish Kumar for (auto &MBB : MF) { 1252c7077e6SSnehasish Kumar if (MBB.isEntryBlock()) 12694faadacSSnehasish Kumar continue; 1272c7077e6SSnehasish Kumar 1282c7077e6SSnehasish Kumar if (MBB.isEHPad()) 1292c7077e6SSnehasish Kumar LandingPads.push_back(&MBB); 1302c7077e6SSnehasish Kumar else if (isColdBlock(MBB, MBFI, PSI)) 13194faadacSSnehasish Kumar MBB.setSectionID(MBBSectionID::ColdSectionID); 13294faadacSSnehasish Kumar } 13394faadacSSnehasish Kumar 1342c7077e6SSnehasish Kumar // We only split out eh pads if all of them are cold. 1352c7077e6SSnehasish Kumar bool HasHotLandingPads = false; 1362c7077e6SSnehasish Kumar for (const MachineBasicBlock *LP : LandingPads) { 1372c7077e6SSnehasish Kumar if (!isColdBlock(*LP, MBFI, PSI)) 1382c7077e6SSnehasish Kumar HasHotLandingPads = true; 1392c7077e6SSnehasish Kumar } 1402c7077e6SSnehasish Kumar if (!HasHotLandingPads) { 1412c7077e6SSnehasish Kumar for (MachineBasicBlock *LP : LandingPads) 1422c7077e6SSnehasish Kumar LP->setSectionID(MBBSectionID::ColdSectionID); 1432c7077e6SSnehasish Kumar } 1442c7077e6SSnehasish Kumar 14594faadacSSnehasish Kumar auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) { 14694faadacSSnehasish Kumar return X.getSectionID().Type < Y.getSectionID().Type; 14794faadacSSnehasish Kumar }; 14894faadacSSnehasish Kumar llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator); 14994faadacSSnehasish Kumar 15094faadacSSnehasish Kumar return true; 15194faadacSSnehasish Kumar } 15294faadacSSnehasish Kumar 15394faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const { 15494faadacSSnehasish Kumar AU.addRequired<MachineModuleInfoWrapperPass>(); 15594faadacSSnehasish Kumar AU.addRequired<MachineBlockFrequencyInfo>(); 15694faadacSSnehasish Kumar AU.addRequired<ProfileSummaryInfoWrapperPass>(); 15794faadacSSnehasish Kumar } 15894faadacSSnehasish Kumar 15994faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0; 16094faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter", 16194faadacSSnehasish Kumar "Split machine functions using profile information", false, 16294faadacSSnehasish Kumar false) 16394faadacSSnehasish Kumar 16494faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() { 16594faadacSSnehasish Kumar return new MachineFunctionSplitter(); 16694faadacSSnehasish Kumar } 167