194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
294faadacSSnehasish Kumar //
394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information.
594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
694faadacSSnehasish Kumar //
794faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
894faadacSSnehasish Kumar //
994faadacSSnehasish Kumar // \file
1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks.
1194faadacSSnehasish Kumar //
1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This
1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked
1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with
1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold
1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by
1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together
1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that
1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of
2094faadacSSnehasish Kumar // additional jump instructions may be introduced.
2194faadacSSnehasish Kumar //
2294faadacSSnehasish Kumar // For the original RFC of this pass please see
2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
2594faadacSSnehasish Kumar 
262c7077e6SSnehasish Kumar #include "llvm/ADT/SmallVector.h"
2794faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h"
2894faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h"
2994faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h"
3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h"
3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h"
3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h"
3494faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h"
3594faadacSSnehasish Kumar #include "llvm/IR/Function.h"
3694faadacSSnehasish Kumar #include "llvm/InitializePasses.h"
3794faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h"
3894faadacSSnehasish Kumar 
3994faadacSSnehasish Kumar using namespace llvm;
4094faadacSSnehasish Kumar 
4124bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to
4224bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms.
4324bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
4424bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
4524bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff
4624bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
4724bf6ff4SSnehasish Kumar // Intel CPUs.
4894faadacSSnehasish Kumar static cl::opt<unsigned>
4994faadacSSnehasish Kumar     PercentileCutoff("mfs-psi-cutoff",
5094faadacSSnehasish Kumar                      cl::desc("Percentile profile summary cutoff used to "
5194faadacSSnehasish Kumar                               "determine cold blocks. Unused if set to zero."),
5224bf6ff4SSnehasish Kumar                      cl::init(999950), cl::Hidden);
5394faadacSSnehasish Kumar 
5494faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold(
5594faadacSSnehasish Kumar     "mfs-count-threshold",
5694faadacSSnehasish Kumar     cl::desc(
5794faadacSSnehasish Kumar         "Minimum number of times a block must be executed to be retained."),
5894faadacSSnehasish Kumar     cl::init(1), cl::Hidden);
5994faadacSSnehasish Kumar 
6094faadacSSnehasish Kumar namespace {
6194faadacSSnehasish Kumar 
6294faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass {
6394faadacSSnehasish Kumar public:
6494faadacSSnehasish Kumar   static char ID;
MachineFunctionSplitter()6594faadacSSnehasish Kumar   MachineFunctionSplitter() : MachineFunctionPass(ID) {
6694faadacSSnehasish Kumar     initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
6794faadacSSnehasish Kumar   }
6894faadacSSnehasish Kumar 
getPassName() const6994faadacSSnehasish Kumar   StringRef getPassName() const override {
7094faadacSSnehasish Kumar     return "Machine Function Splitter Transformation";
7194faadacSSnehasish Kumar   }
7294faadacSSnehasish Kumar 
7394faadacSSnehasish Kumar   void getAnalysisUsage(AnalysisUsage &AU) const override;
7494faadacSSnehasish Kumar 
7594faadacSSnehasish Kumar   bool runOnMachineFunction(MachineFunction &F) override;
7694faadacSSnehasish Kumar };
7794faadacSSnehasish Kumar } // end anonymous namespace
7894faadacSSnehasish Kumar 
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)792c7077e6SSnehasish Kumar static bool isColdBlock(const MachineBasicBlock &MBB,
8094faadacSSnehasish Kumar                         const MachineBlockFrequencyInfo *MBFI,
8194faadacSSnehasish Kumar                         ProfileSummaryInfo *PSI) {
8294faadacSSnehasish Kumar   Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
83e0e687a6SKazu Hirata   if (!Count)
8494faadacSSnehasish Kumar     return true;
8594faadacSSnehasish Kumar 
8694faadacSSnehasish Kumar   if (PercentileCutoff > 0) {
8794faadacSSnehasish Kumar     return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
8894faadacSSnehasish Kumar   }
8994faadacSSnehasish Kumar   return (*Count < ColdCountThreshold);
9094faadacSSnehasish Kumar }
9194faadacSSnehasish Kumar 
runOnMachineFunction(MachineFunction & MF)9294faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
9394faadacSSnehasish Kumar   // TODO: We only target functions with profile data. Static information may
9494faadacSSnehasish Kumar   // also be considered but we don't see performance improvements yet.
9594faadacSSnehasish Kumar   if (!MF.getFunction().hasProfileData())
9694faadacSSnehasish Kumar     return false;
9794faadacSSnehasish Kumar 
9894faadacSSnehasish Kumar   // TODO: We don't split functions where a section attribute has been set
9994faadacSSnehasish Kumar   // since the split part may not be placed in a contiguous region. It may also
10094faadacSSnehasish Kumar   // be more beneficial to augment the linker to ensure contiguous layout of
10194faadacSSnehasish Kumar   // split functions within the same section as specified by the attribute.
1023da0aeeaSSnehasish Kumar   if (MF.getFunction().hasSection() ||
1038077d0ffSSnehasish Kumar       MF.getFunction().hasFnAttribute("implicit-section-name"))
10494faadacSSnehasish Kumar     return false;
10594faadacSSnehasish Kumar 
10694faadacSSnehasish Kumar   // We don't want to proceed further for cold functions
10794faadacSSnehasish Kumar   // or functions of unknown hotness. Lukewarm functions have no prefix.
10894faadacSSnehasish Kumar   Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
109611ffcf4SKazu Hirata   if (SectionPrefix && (SectionPrefix.value().equals("unlikely") ||
110611ffcf4SKazu Hirata                         SectionPrefix.value().equals("unknown"))) {
11194faadacSSnehasish Kumar     return false;
11294faadacSSnehasish Kumar   }
11394faadacSSnehasish Kumar 
11494faadacSSnehasish Kumar   // Renumbering blocks here preserves the order of the blocks as
11594faadacSSnehasish Kumar   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
11694faadacSSnehasish Kumar   // blocks. Preserving the order of blocks is essential to retaining decisions
11794faadacSSnehasish Kumar   // made by prior passes such as MachineBlockPlacement.
11894faadacSSnehasish Kumar   MF.RenumberBlocks();
11994faadacSSnehasish Kumar   MF.setBBSectionsType(BasicBlockSection::Preset);
12094faadacSSnehasish Kumar   auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
12194faadacSSnehasish Kumar   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
12294faadacSSnehasish Kumar 
1232c7077e6SSnehasish Kumar   SmallVector<MachineBasicBlock *, 2> LandingPads;
12494faadacSSnehasish Kumar   for (auto &MBB : MF) {
1252c7077e6SSnehasish Kumar     if (MBB.isEntryBlock())
12694faadacSSnehasish Kumar       continue;
1272c7077e6SSnehasish Kumar 
1282c7077e6SSnehasish Kumar     if (MBB.isEHPad())
1292c7077e6SSnehasish Kumar       LandingPads.push_back(&MBB);
1302c7077e6SSnehasish Kumar     else if (isColdBlock(MBB, MBFI, PSI))
13194faadacSSnehasish Kumar       MBB.setSectionID(MBBSectionID::ColdSectionID);
13294faadacSSnehasish Kumar   }
13394faadacSSnehasish Kumar 
1342c7077e6SSnehasish Kumar   // We only split out eh pads if all of them are cold.
1352c7077e6SSnehasish Kumar   bool HasHotLandingPads = false;
1362c7077e6SSnehasish Kumar   for (const MachineBasicBlock *LP : LandingPads) {
1372c7077e6SSnehasish Kumar     if (!isColdBlock(*LP, MBFI, PSI))
1382c7077e6SSnehasish Kumar       HasHotLandingPads = true;
1392c7077e6SSnehasish Kumar   }
1402c7077e6SSnehasish Kumar   if (!HasHotLandingPads) {
1412c7077e6SSnehasish Kumar     for (MachineBasicBlock *LP : LandingPads)
1422c7077e6SSnehasish Kumar       LP->setSectionID(MBBSectionID::ColdSectionID);
1432c7077e6SSnehasish Kumar   }
1442c7077e6SSnehasish Kumar 
14594faadacSSnehasish Kumar   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
14694faadacSSnehasish Kumar     return X.getSectionID().Type < Y.getSectionID().Type;
14794faadacSSnehasish Kumar   };
14894faadacSSnehasish Kumar   llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
149*3bb1ce23SARCHIT SAXENA   llvm::avoidZeroOffsetLandingPad(MF);
15094faadacSSnehasish Kumar   return true;
15194faadacSSnehasish Kumar }
15294faadacSSnehasish Kumar 
getAnalysisUsage(AnalysisUsage & AU) const15394faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
15494faadacSSnehasish Kumar   AU.addRequired<MachineModuleInfoWrapperPass>();
15594faadacSSnehasish Kumar   AU.addRequired<MachineBlockFrequencyInfo>();
15694faadacSSnehasish Kumar   AU.addRequired<ProfileSummaryInfoWrapperPass>();
15794faadacSSnehasish Kumar }
15894faadacSSnehasish Kumar 
15994faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0;
16094faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
16194faadacSSnehasish Kumar                 "Split machine functions using profile information", false,
16294faadacSSnehasish Kumar                 false)
16394faadacSSnehasish Kumar 
createMachineFunctionSplitterPass()16494faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
16594faadacSSnehasish Kumar   return new MachineFunctionSplitter();
16694faadacSSnehasish Kumar }
167