194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
294faadacSSnehasish Kumar //
394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information.
594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
694faadacSSnehasish Kumar //
794faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
894faadacSSnehasish Kumar //
994faadacSSnehasish Kumar // \file
1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks.
1194faadacSSnehasish Kumar //
1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This
1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked
1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with
1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold
1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by
1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together
1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that
1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of
2094faadacSSnehasish Kumar // additional jump instructions may be introduced.
2194faadacSSnehasish Kumar //
2294faadacSSnehasish Kumar // For the original RFC of this pass please see
2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
2594faadacSSnehasish Kumar 
262c7077e6SSnehasish Kumar #include "llvm/ADT/SmallVector.h"
2794faadacSSnehasish Kumar #include "llvm/ADT/Statistic.h"
2894faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h"
2994faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h"
3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h"
3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h"
3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h"
3494faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h"
3594faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h"
3694faadacSSnehasish Kumar #include "llvm/IR/Function.h"
3794faadacSSnehasish Kumar #include "llvm/IR/Module.h"
3894faadacSSnehasish Kumar #include "llvm/InitializePasses.h"
3994faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h"
4094faadacSSnehasish Kumar 
4194faadacSSnehasish Kumar using namespace llvm;
4294faadacSSnehasish Kumar 
4324bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to
4424bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms.
4524bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
4624bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
4724bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff
4824bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
4924bf6ff4SSnehasish Kumar // Intel CPUs.
5094faadacSSnehasish Kumar static cl::opt<unsigned>
5194faadacSSnehasish Kumar     PercentileCutoff("mfs-psi-cutoff",
5294faadacSSnehasish Kumar                      cl::desc("Percentile profile summary cutoff used to "
5394faadacSSnehasish Kumar                               "determine cold blocks. Unused if set to zero."),
5424bf6ff4SSnehasish Kumar                      cl::init(999950), cl::Hidden);
5594faadacSSnehasish Kumar 
5694faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold(
5794faadacSSnehasish Kumar     "mfs-count-threshold",
5894faadacSSnehasish Kumar     cl::desc(
5994faadacSSnehasish Kumar         "Minimum number of times a block must be executed to be retained."),
6094faadacSSnehasish Kumar     cl::init(1), cl::Hidden);
6194faadacSSnehasish Kumar 
6294faadacSSnehasish Kumar namespace {
6394faadacSSnehasish Kumar 
6494faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass {
6594faadacSSnehasish Kumar public:
6694faadacSSnehasish Kumar   static char ID;
6794faadacSSnehasish Kumar   MachineFunctionSplitter() : MachineFunctionPass(ID) {
6894faadacSSnehasish Kumar     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
6994faadacSSnehasish Kumar   }
7094faadacSSnehasish Kumar 
7194faadacSSnehasish Kumar   StringRef getPassName() const override {
7294faadacSSnehasish Kumar     return "Machine Function Splitter Transformation";
7394faadacSSnehasish Kumar   }
7494faadacSSnehasish Kumar 
7594faadacSSnehasish Kumar   void getAnalysisUsage(AnalysisUsage &AU) const override;
7694faadacSSnehasish Kumar 
7794faadacSSnehasish Kumar   bool runOnMachineFunction(MachineFunction &F) override;
7894faadacSSnehasish Kumar };
7994faadacSSnehasish Kumar } // end anonymous namespace
8094faadacSSnehasish Kumar 
812c7077e6SSnehasish Kumar static bool isColdBlock(const MachineBasicBlock &MBB,
8294faadacSSnehasish Kumar                         const MachineBlockFrequencyInfo *MBFI,
8394faadacSSnehasish Kumar                         ProfileSummaryInfo *PSI) {
8494faadacSSnehasish Kumar   Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
8594faadacSSnehasish Kumar   if (!Count.hasValue())
8694faadacSSnehasish Kumar     return true;
8794faadacSSnehasish Kumar 
8894faadacSSnehasish Kumar   if (PercentileCutoff > 0) {
8994faadacSSnehasish Kumar     return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
9094faadacSSnehasish Kumar   }
9194faadacSSnehasish Kumar   return (*Count < ColdCountThreshold);
9294faadacSSnehasish Kumar }
9394faadacSSnehasish Kumar 
9494faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
9594faadacSSnehasish Kumar   // TODO: We only target functions with profile data. Static information may
9694faadacSSnehasish Kumar   // also be considered but we don't see performance improvements yet.
9794faadacSSnehasish Kumar   if (!MF.getFunction().hasProfileData())
9894faadacSSnehasish Kumar     return false;
9994faadacSSnehasish Kumar 
10094faadacSSnehasish Kumar   // TODO: We don't split functions where a section attribute has been set
10194faadacSSnehasish Kumar   // since the split part may not be placed in a contiguous region. It may also
10294faadacSSnehasish Kumar   // be more beneficial to augment the linker to ensure contiguous layout of
10394faadacSSnehasish Kumar   // split functions within the same section as specified by the attribute.
104*3da0aeeaSSnehasish Kumar   if (MF.getFunction().hasSection() ||
1058077d0ffSSnehasish Kumar       MF.getFunction().hasFnAttribute("implicit-section-name"))
10694faadacSSnehasish Kumar     return false;
10794faadacSSnehasish Kumar 
10894faadacSSnehasish Kumar   // We don't want to proceed further for cold functions
10994faadacSSnehasish Kumar   // or functions of unknown hotness. Lukewarm functions have no prefix.
11094faadacSSnehasish Kumar   Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
11194faadacSSnehasish Kumar   if (SectionPrefix.hasValue() &&
1127af80299SPan, Tao       (SectionPrefix.getValue().equals("unlikely") ||
1137af80299SPan, Tao        SectionPrefix.getValue().equals("unknown"))) {
11494faadacSSnehasish Kumar     return false;
11594faadacSSnehasish Kumar   }
11694faadacSSnehasish Kumar 
11794faadacSSnehasish Kumar   // Renumbering blocks here preserves the order of the blocks as
11894faadacSSnehasish Kumar   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
11994faadacSSnehasish Kumar   // blocks. Preserving the order of blocks is essential to retaining decisions
12094faadacSSnehasish Kumar   // made by prior passes such as MachineBlockPlacement.
12194faadacSSnehasish Kumar   MF.RenumberBlocks();
12294faadacSSnehasish Kumar   MF.setBBSectionsType(BasicBlockSection::Preset);
12394faadacSSnehasish Kumar   auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
12494faadacSSnehasish Kumar   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
12594faadacSSnehasish Kumar 
1262c7077e6SSnehasish Kumar   SmallVector<MachineBasicBlock *, 2> LandingPads;
12794faadacSSnehasish Kumar   for (auto &MBB : MF) {
1282c7077e6SSnehasish Kumar     if (MBB.isEntryBlock())
12994faadacSSnehasish Kumar       continue;
1302c7077e6SSnehasish Kumar 
1312c7077e6SSnehasish Kumar     if (MBB.isEHPad())
1322c7077e6SSnehasish Kumar       LandingPads.push_back(&MBB);
1332c7077e6SSnehasish Kumar     else if (isColdBlock(MBB, MBFI, PSI))
13494faadacSSnehasish Kumar       MBB.setSectionID(MBBSectionID::ColdSectionID);
13594faadacSSnehasish Kumar   }
13694faadacSSnehasish Kumar 
1372c7077e6SSnehasish Kumar   // We only split out eh pads if all of them are cold.
1382c7077e6SSnehasish Kumar   bool HasHotLandingPads = false;
1392c7077e6SSnehasish Kumar   for (const MachineBasicBlock *LP : LandingPads) {
1402c7077e6SSnehasish Kumar     if (!isColdBlock(*LP, MBFI, PSI))
1412c7077e6SSnehasish Kumar       HasHotLandingPads = true;
1422c7077e6SSnehasish Kumar   }
1432c7077e6SSnehasish Kumar   if (!HasHotLandingPads) {
1442c7077e6SSnehasish Kumar     for (MachineBasicBlock *LP : LandingPads)
1452c7077e6SSnehasish Kumar       LP->setSectionID(MBBSectionID::ColdSectionID);
1462c7077e6SSnehasish Kumar   }
1472c7077e6SSnehasish Kumar 
14894faadacSSnehasish Kumar   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
14994faadacSSnehasish Kumar     return X.getSectionID().Type < Y.getSectionID().Type;
15094faadacSSnehasish Kumar   };
15194faadacSSnehasish Kumar   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
15294faadacSSnehasish Kumar 
15394faadacSSnehasish Kumar   return true;
15494faadacSSnehasish Kumar }
15594faadacSSnehasish Kumar 
15694faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
15794faadacSSnehasish Kumar   AU.addRequired<MachineModuleInfoWrapperPass>();
15894faadacSSnehasish Kumar   AU.addRequired<MachineBlockFrequencyInfo>();
15994faadacSSnehasish Kumar   AU.addRequired<ProfileSummaryInfoWrapperPass>();
16094faadacSSnehasish Kumar }
16194faadacSSnehasish Kumar 
16294faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0;
16394faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
16494faadacSSnehasish Kumar                 "Split machine functions using profile information", false,
16594faadacSSnehasish Kumar                 false)
16694faadacSSnehasish Kumar 
16794faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
16894faadacSSnehasish Kumar   return new MachineFunctionSplitter();
16994faadacSSnehasish Kumar }
170