194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
294faadacSSnehasish Kumar //
394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information.
594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
694faadacSSnehasish Kumar //
794faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
894faadacSSnehasish Kumar //
994faadacSSnehasish Kumar // \file
1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks.
1194faadacSSnehasish Kumar //
1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This
1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked
1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with
1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold
1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by
1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together
1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that
1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of
2094faadacSSnehasish Kumar // additional jump instructions may be introduced.
2194faadacSSnehasish Kumar //
2294faadacSSnehasish Kumar // For the original RFC of this pass please see
2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
2594faadacSSnehasish Kumar 
2694faadacSSnehasish Kumar #include "llvm/ADT/Statistic.h"
2794faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h"
2894faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h"
2994faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h"
3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h"
3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h"
3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h"
3494faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h"
3594faadacSSnehasish Kumar #include "llvm/IR/Function.h"
3694faadacSSnehasish Kumar #include "llvm/IR/Module.h"
3794faadacSSnehasish Kumar #include "llvm/InitializePasses.h"
3894faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h"
3994faadacSSnehasish Kumar 
4094faadacSSnehasish Kumar using namespace llvm;
4194faadacSSnehasish Kumar 
4224bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to
4324bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms.
4424bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
4524bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
4624bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff
4724bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
4824bf6ff4SSnehasish Kumar // Intel CPUs.
4994faadacSSnehasish Kumar static cl::opt<unsigned>
5094faadacSSnehasish Kumar     PercentileCutoff("mfs-psi-cutoff",
5194faadacSSnehasish Kumar                      cl::desc("Percentile profile summary cutoff used to "
5294faadacSSnehasish Kumar                               "determine cold blocks. Unused if set to zero."),
5324bf6ff4SSnehasish Kumar                      cl::init(999950), cl::Hidden);
5494faadacSSnehasish Kumar 
5594faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold(
5694faadacSSnehasish Kumar     "mfs-count-threshold",
5794faadacSSnehasish Kumar     cl::desc(
5894faadacSSnehasish Kumar         "Minimum number of times a block must be executed to be retained."),
5994faadacSSnehasish Kumar     cl::init(1), cl::Hidden);
6094faadacSSnehasish Kumar 
6194faadacSSnehasish Kumar namespace {
6294faadacSSnehasish Kumar 
6394faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass {
6494faadacSSnehasish Kumar public:
6594faadacSSnehasish Kumar   static char ID;
6694faadacSSnehasish Kumar   MachineFunctionSplitter() : MachineFunctionPass(ID) {
6794faadacSSnehasish Kumar     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
6894faadacSSnehasish Kumar   }
6994faadacSSnehasish Kumar 
7094faadacSSnehasish Kumar   StringRef getPassName() const override {
7194faadacSSnehasish Kumar     return "Machine Function Splitter Transformation";
7294faadacSSnehasish Kumar   }
7394faadacSSnehasish Kumar 
7494faadacSSnehasish Kumar   void getAnalysisUsage(AnalysisUsage &AU) const override;
7594faadacSSnehasish Kumar 
7694faadacSSnehasish Kumar   bool runOnMachineFunction(MachineFunction &F) override;
7794faadacSSnehasish Kumar };
7894faadacSSnehasish Kumar } // end anonymous namespace
7994faadacSSnehasish Kumar 
8094faadacSSnehasish Kumar static bool isColdBlock(MachineBasicBlock &MBB,
8194faadacSSnehasish Kumar                         const MachineBlockFrequencyInfo *MBFI,
8294faadacSSnehasish Kumar                         ProfileSummaryInfo *PSI) {
8394faadacSSnehasish Kumar   Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
8494faadacSSnehasish Kumar   if (!Count.hasValue())
8594faadacSSnehasish Kumar     return true;
8694faadacSSnehasish Kumar 
8794faadacSSnehasish Kumar   if (PercentileCutoff > 0) {
8894faadacSSnehasish Kumar     return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
8994faadacSSnehasish Kumar   }
9094faadacSSnehasish Kumar   return (*Count < ColdCountThreshold);
9194faadacSSnehasish Kumar }
9294faadacSSnehasish Kumar 
9394faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
9494faadacSSnehasish Kumar   // TODO: We only target functions with profile data. Static information may
9594faadacSSnehasish Kumar   // also be considered but we don't see performance improvements yet.
9694faadacSSnehasish Kumar   if (!MF.getFunction().hasProfileData())
9794faadacSSnehasish Kumar     return false;
9894faadacSSnehasish Kumar 
9994faadacSSnehasish Kumar   // TODO: We don't split functions where a section attribute has been set
10094faadacSSnehasish Kumar   // since the split part may not be placed in a contiguous region. It may also
10194faadacSSnehasish Kumar   // be more beneficial to augment the linker to ensure contiguous layout of
10294faadacSSnehasish Kumar   // split functions within the same section as specified by the attribute.
10394faadacSSnehasish Kumar   if (!MF.getFunction().getSection().empty())
10494faadacSSnehasish Kumar     return false;
10594faadacSSnehasish Kumar 
10694faadacSSnehasish Kumar   // We don't want to proceed further for cold functions
10794faadacSSnehasish Kumar   // or functions of unknown hotness. Lukewarm functions have no prefix.
10894faadacSSnehasish Kumar   Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
10994faadacSSnehasish Kumar   if (SectionPrefix.hasValue() &&
110*7af80299SPan, Tao       (SectionPrefix.getValue().equals("unlikely") ||
111*7af80299SPan, Tao        SectionPrefix.getValue().equals("unknown"))) {
11294faadacSSnehasish Kumar     return false;
11394faadacSSnehasish Kumar   }
11494faadacSSnehasish Kumar 
11594faadacSSnehasish Kumar   // Renumbering blocks here preserves the order of the blocks as
11694faadacSSnehasish Kumar   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
11794faadacSSnehasish Kumar   // blocks. Preserving the order of blocks is essential to retaining decisions
11894faadacSSnehasish Kumar   // made by prior passes such as MachineBlockPlacement.
11994faadacSSnehasish Kumar   MF.RenumberBlocks();
12094faadacSSnehasish Kumar   MF.setBBSectionsType(BasicBlockSection::Preset);
12194faadacSSnehasish Kumar   auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
12294faadacSSnehasish Kumar   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
12394faadacSSnehasish Kumar 
12494faadacSSnehasish Kumar   for (auto &MBB : MF) {
12594faadacSSnehasish Kumar     // FIXME: We retain the entry block and conservatively keep all landing pad
12694faadacSSnehasish Kumar     // blocks as part of the original function. Once D73739 is submitted, we can
12794faadacSSnehasish Kumar     // improve the handling of ehpads.
12894faadacSSnehasish Kumar     if ((MBB.pred_empty() || MBB.isEHPad()))
12994faadacSSnehasish Kumar       continue;
13094faadacSSnehasish Kumar     if (isColdBlock(MBB, MBFI, PSI))
13194faadacSSnehasish Kumar       MBB.setSectionID(MBBSectionID::ColdSectionID);
13294faadacSSnehasish Kumar   }
13394faadacSSnehasish Kumar 
13494faadacSSnehasish Kumar   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
13594faadacSSnehasish Kumar     return X.getSectionID().Type < Y.getSectionID().Type;
13694faadacSSnehasish Kumar   };
13794faadacSSnehasish Kumar   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
13894faadacSSnehasish Kumar 
13994faadacSSnehasish Kumar   return true;
14094faadacSSnehasish Kumar }
14194faadacSSnehasish Kumar 
14294faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
14394faadacSSnehasish Kumar   AU.addRequired<MachineModuleInfoWrapperPass>();
14494faadacSSnehasish Kumar   AU.addRequired<MachineBlockFrequencyInfo>();
14594faadacSSnehasish Kumar   AU.addRequired<ProfileSummaryInfoWrapperPass>();
14694faadacSSnehasish Kumar }
14794faadacSSnehasish Kumar 
14894faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0;
14994faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
15094faadacSSnehasish Kumar                 "Split machine functions using profile information", false,
15194faadacSSnehasish Kumar                 false)
15294faadacSSnehasish Kumar 
15394faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
15494faadacSSnehasish Kumar   return new MachineFunctionSplitter();
15594faadacSSnehasish Kumar }
156