194faadacSSnehasish Kumar //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
294faadacSSnehasish Kumar //
394faadacSSnehasish Kumar // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
494faadacSSnehasish Kumar // See https://llvm.org/LICENSE.txt for license information.
594faadacSSnehasish Kumar // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
694faadacSSnehasish Kumar //
794faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
894faadacSSnehasish Kumar //
994faadacSSnehasish Kumar // \file
1094faadacSSnehasish Kumar // Uses profile information to split out cold blocks.
1194faadacSSnehasish Kumar //
1294faadacSSnehasish Kumar // This pass splits out cold machine basic blocks from the parent function. This
1394faadacSSnehasish Kumar // implementation leverages the basic block section framework. Blocks marked
1494faadacSSnehasish Kumar // cold by this pass are grouped together in a separate section prefixed with
1594faadacSSnehasish Kumar // ".text.unlikely.*". The linker can then group these together as a cold
1694faadacSSnehasish Kumar // section. The split part of the function is a contiguous region identified by
1794faadacSSnehasish Kumar // the symbol "foo.cold". Grouping all cold blocks across functions together
1894faadacSSnehasish Kumar // decreases fragmentation and improves icache and itlb utilization. Note that
1994faadacSSnehasish Kumar // the overall changes to the binary size are negligible; only a small number of
2094faadacSSnehasish Kumar // additional jump instructions may be introduced.
2194faadacSSnehasish Kumar //
2294faadacSSnehasish Kumar // For the original RFC of this pass please see
2394faadacSSnehasish Kumar // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
2494faadacSSnehasish Kumar //===----------------------------------------------------------------------===//
2594faadacSSnehasish Kumar
262c7077e6SSnehasish Kumar #include "llvm/ADT/SmallVector.h"
2794faadacSSnehasish Kumar #include "llvm/Analysis/ProfileSummaryInfo.h"
2894faadacSSnehasish Kumar #include "llvm/CodeGen/BasicBlockSectionUtils.h"
2994faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBasicBlock.h"
3094faadacSSnehasish Kumar #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
3194faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunction.h"
3294faadacSSnehasish Kumar #include "llvm/CodeGen/MachineFunctionPass.h"
3394faadacSSnehasish Kumar #include "llvm/CodeGen/MachineModuleInfo.h"
3494faadacSSnehasish Kumar #include "llvm/CodeGen/Passes.h"
3594faadacSSnehasish Kumar #include "llvm/IR/Function.h"
3694faadacSSnehasish Kumar #include "llvm/InitializePasses.h"
3794faadacSSnehasish Kumar #include "llvm/Support/CommandLine.h"
3894faadacSSnehasish Kumar
3994faadacSSnehasish Kumar using namespace llvm;
4094faadacSSnehasish Kumar
4124bf6ff4SSnehasish Kumar // FIXME: This cutoff value is CPU dependent and should be moved to
4224bf6ff4SSnehasish Kumar // TargetTransformInfo once we consider enabling this on other platforms.
4324bf6ff4SSnehasish Kumar // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
4424bf6ff4SSnehasish Kumar // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
4524bf6ff4SSnehasish Kumar // The default was empirically determined to be optimal when considering cutoff
4624bf6ff4SSnehasish Kumar // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
4724bf6ff4SSnehasish Kumar // Intel CPUs.
4894faadacSSnehasish Kumar static cl::opt<unsigned>
4994faadacSSnehasish Kumar PercentileCutoff("mfs-psi-cutoff",
5094faadacSSnehasish Kumar cl::desc("Percentile profile summary cutoff used to "
5194faadacSSnehasish Kumar "determine cold blocks. Unused if set to zero."),
5224bf6ff4SSnehasish Kumar cl::init(999950), cl::Hidden);
5394faadacSSnehasish Kumar
5494faadacSSnehasish Kumar static cl::opt<unsigned> ColdCountThreshold(
5594faadacSSnehasish Kumar "mfs-count-threshold",
5694faadacSSnehasish Kumar cl::desc(
5794faadacSSnehasish Kumar "Minimum number of times a block must be executed to be retained."),
5894faadacSSnehasish Kumar cl::init(1), cl::Hidden);
5994faadacSSnehasish Kumar
6094faadacSSnehasish Kumar namespace {
6194faadacSSnehasish Kumar
6294faadacSSnehasish Kumar class MachineFunctionSplitter : public MachineFunctionPass {
6394faadacSSnehasish Kumar public:
6494faadacSSnehasish Kumar static char ID;
MachineFunctionSplitter()6594faadacSSnehasish Kumar MachineFunctionSplitter() : MachineFunctionPass(ID) {
6694faadacSSnehasish Kumar initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
6794faadacSSnehasish Kumar }
6894faadacSSnehasish Kumar
getPassName() const6994faadacSSnehasish Kumar StringRef getPassName() const override {
7094faadacSSnehasish Kumar return "Machine Function Splitter Transformation";
7194faadacSSnehasish Kumar }
7294faadacSSnehasish Kumar
7394faadacSSnehasish Kumar void getAnalysisUsage(AnalysisUsage &AU) const override;
7494faadacSSnehasish Kumar
7594faadacSSnehasish Kumar bool runOnMachineFunction(MachineFunction &F) override;
7694faadacSSnehasish Kumar };
7794faadacSSnehasish Kumar } // end anonymous namespace
7894faadacSSnehasish Kumar
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)792c7077e6SSnehasish Kumar static bool isColdBlock(const MachineBasicBlock &MBB,
8094faadacSSnehasish Kumar const MachineBlockFrequencyInfo *MBFI,
8194faadacSSnehasish Kumar ProfileSummaryInfo *PSI) {
8294faadacSSnehasish Kumar Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
83e0e687a6SKazu Hirata if (!Count)
8494faadacSSnehasish Kumar return true;
8594faadacSSnehasish Kumar
8694faadacSSnehasish Kumar if (PercentileCutoff > 0) {
8794faadacSSnehasish Kumar return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
8894faadacSSnehasish Kumar }
8994faadacSSnehasish Kumar return (*Count < ColdCountThreshold);
9094faadacSSnehasish Kumar }
9194faadacSSnehasish Kumar
runOnMachineFunction(MachineFunction & MF)9294faadacSSnehasish Kumar bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
9394faadacSSnehasish Kumar // TODO: We only target functions with profile data. Static information may
9494faadacSSnehasish Kumar // also be considered but we don't see performance improvements yet.
9594faadacSSnehasish Kumar if (!MF.getFunction().hasProfileData())
9694faadacSSnehasish Kumar return false;
9794faadacSSnehasish Kumar
9894faadacSSnehasish Kumar // TODO: We don't split functions where a section attribute has been set
9994faadacSSnehasish Kumar // since the split part may not be placed in a contiguous region. It may also
10094faadacSSnehasish Kumar // be more beneficial to augment the linker to ensure contiguous layout of
10194faadacSSnehasish Kumar // split functions within the same section as specified by the attribute.
1023da0aeeaSSnehasish Kumar if (MF.getFunction().hasSection() ||
1038077d0ffSSnehasish Kumar MF.getFunction().hasFnAttribute("implicit-section-name"))
10494faadacSSnehasish Kumar return false;
10594faadacSSnehasish Kumar
10694faadacSSnehasish Kumar // We don't want to proceed further for cold functions
10794faadacSSnehasish Kumar // or functions of unknown hotness. Lukewarm functions have no prefix.
10894faadacSSnehasish Kumar Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
109611ffcf4SKazu Hirata if (SectionPrefix && (SectionPrefix.value().equals("unlikely") ||
110611ffcf4SKazu Hirata SectionPrefix.value().equals("unknown"))) {
11194faadacSSnehasish Kumar return false;
11294faadacSSnehasish Kumar }
11394faadacSSnehasish Kumar
11494faadacSSnehasish Kumar // Renumbering blocks here preserves the order of the blocks as
11594faadacSSnehasish Kumar // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
11694faadacSSnehasish Kumar // blocks. Preserving the order of blocks is essential to retaining decisions
11794faadacSSnehasish Kumar // made by prior passes such as MachineBlockPlacement.
11894faadacSSnehasish Kumar MF.RenumberBlocks();
11994faadacSSnehasish Kumar MF.setBBSectionsType(BasicBlockSection::Preset);
12094faadacSSnehasish Kumar auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
12194faadacSSnehasish Kumar auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
12294faadacSSnehasish Kumar
1232c7077e6SSnehasish Kumar SmallVector<MachineBasicBlock *, 2> LandingPads;
12494faadacSSnehasish Kumar for (auto &MBB : MF) {
1252c7077e6SSnehasish Kumar if (MBB.isEntryBlock())
12694faadacSSnehasish Kumar continue;
1272c7077e6SSnehasish Kumar
1282c7077e6SSnehasish Kumar if (MBB.isEHPad())
1292c7077e6SSnehasish Kumar LandingPads.push_back(&MBB);
1302c7077e6SSnehasish Kumar else if (isColdBlock(MBB, MBFI, PSI))
13194faadacSSnehasish Kumar MBB.setSectionID(MBBSectionID::ColdSectionID);
13294faadacSSnehasish Kumar }
13394faadacSSnehasish Kumar
1342c7077e6SSnehasish Kumar // We only split out eh pads if all of them are cold.
1352c7077e6SSnehasish Kumar bool HasHotLandingPads = false;
1362c7077e6SSnehasish Kumar for (const MachineBasicBlock *LP : LandingPads) {
1372c7077e6SSnehasish Kumar if (!isColdBlock(*LP, MBFI, PSI))
1382c7077e6SSnehasish Kumar HasHotLandingPads = true;
1392c7077e6SSnehasish Kumar }
1402c7077e6SSnehasish Kumar if (!HasHotLandingPads) {
1412c7077e6SSnehasish Kumar for (MachineBasicBlock *LP : LandingPads)
1422c7077e6SSnehasish Kumar LP->setSectionID(MBBSectionID::ColdSectionID);
1432c7077e6SSnehasish Kumar }
1442c7077e6SSnehasish Kumar
14594faadacSSnehasish Kumar auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
14694faadacSSnehasish Kumar return X.getSectionID().Type < Y.getSectionID().Type;
14794faadacSSnehasish Kumar };
14894faadacSSnehasish Kumar llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
149*3bb1ce23SARCHIT SAXENA llvm::avoidZeroOffsetLandingPad(MF);
15094faadacSSnehasish Kumar return true;
15194faadacSSnehasish Kumar }
15294faadacSSnehasish Kumar
getAnalysisUsage(AnalysisUsage & AU) const15394faadacSSnehasish Kumar void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
15494faadacSSnehasish Kumar AU.addRequired<MachineModuleInfoWrapperPass>();
15594faadacSSnehasish Kumar AU.addRequired<MachineBlockFrequencyInfo>();
15694faadacSSnehasish Kumar AU.addRequired<ProfileSummaryInfoWrapperPass>();
15794faadacSSnehasish Kumar }
15894faadacSSnehasish Kumar
15994faadacSSnehasish Kumar char MachineFunctionSplitter::ID = 0;
16094faadacSSnehasish Kumar INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
16194faadacSSnehasish Kumar "Split machine functions using profile information", false,
16294faadacSSnehasish Kumar false)
16394faadacSSnehasish Kumar
createMachineFunctionSplitterPass()16494faadacSSnehasish Kumar MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
16594faadacSSnehasish Kumar return new MachineFunctionSplitter();
16694faadacSSnehasish Kumar }
167