1af732203SDimitry Andric //===-- MachineFunctionSplitter.cpp - Split machine functions //-----------===//
2af732203SDimitry Andric //
3af732203SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4af732203SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5af732203SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6af732203SDimitry Andric //
7af732203SDimitry Andric //===----------------------------------------------------------------------===//
8af732203SDimitry Andric //
9af732203SDimitry Andric // \file
10af732203SDimitry Andric // Uses profile information to split out cold blocks.
11af732203SDimitry Andric //
12af732203SDimitry Andric // This pass splits out cold machine basic blocks from the parent function. This
13af732203SDimitry Andric // implementation leverages the basic block section framework. Blocks marked
14af732203SDimitry Andric // cold by this pass are grouped together in a separate section prefixed with
15af732203SDimitry Andric // ".text.unlikely.*". The linker can then group these together as a cold
16af732203SDimitry Andric // section. The split part of the function is a contiguous region identified by
17af732203SDimitry Andric // the symbol "foo.cold". Grouping all cold blocks across functions together
18af732203SDimitry Andric // decreases fragmentation and improves icache and itlb utilization. Note that
19af732203SDimitry Andric // the overall changes to the binary size are negligible; only a small number of
20af732203SDimitry Andric // additional jump instructions may be introduced.
21af732203SDimitry Andric //
22af732203SDimitry Andric // For the original RFC of this pass please see
23af732203SDimitry Andric // https://groups.google.com/d/msg/llvm-dev/RUegaMg-iqc/wFAVxa6fCgAJ
24af732203SDimitry Andric //===----------------------------------------------------------------------===//
25af732203SDimitry Andric
26*5f7ddb14SDimitry Andric #include "llvm/ADT/SmallVector.h"
27af732203SDimitry Andric #include "llvm/ADT/Statistic.h"
28af732203SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h"
29af732203SDimitry Andric #include "llvm/CodeGen/BasicBlockSectionUtils.h"
30af732203SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
31af732203SDimitry Andric #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
32af732203SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
33af732203SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
34af732203SDimitry Andric #include "llvm/CodeGen/MachineModuleInfo.h"
35af732203SDimitry Andric #include "llvm/CodeGen/Passes.h"
36af732203SDimitry Andric #include "llvm/IR/Function.h"
37af732203SDimitry Andric #include "llvm/IR/Module.h"
38af732203SDimitry Andric #include "llvm/InitializePasses.h"
39af732203SDimitry Andric #include "llvm/Support/CommandLine.h"
40af732203SDimitry Andric
41af732203SDimitry Andric using namespace llvm;
42af732203SDimitry Andric
43af732203SDimitry Andric // FIXME: This cutoff value is CPU dependent and should be moved to
44af732203SDimitry Andric // TargetTransformInfo once we consider enabling this on other platforms.
45af732203SDimitry Andric // The value is expressed as a ProfileSummaryInfo integer percentile cutoff.
46af732203SDimitry Andric // Defaults to 999950, i.e. all blocks colder than 99.995 percentile are split.
47af732203SDimitry Andric // The default was empirically determined to be optimal when considering cutoff
48af732203SDimitry Andric // values between 99%-ile to 100%-ile with respect to iTLB and icache metrics on
49af732203SDimitry Andric // Intel CPUs.
50af732203SDimitry Andric static cl::opt<unsigned>
51af732203SDimitry Andric PercentileCutoff("mfs-psi-cutoff",
52af732203SDimitry Andric cl::desc("Percentile profile summary cutoff used to "
53af732203SDimitry Andric "determine cold blocks. Unused if set to zero."),
54af732203SDimitry Andric cl::init(999950), cl::Hidden);
55af732203SDimitry Andric
56af732203SDimitry Andric static cl::opt<unsigned> ColdCountThreshold(
57af732203SDimitry Andric "mfs-count-threshold",
58af732203SDimitry Andric cl::desc(
59af732203SDimitry Andric "Minimum number of times a block must be executed to be retained."),
60af732203SDimitry Andric cl::init(1), cl::Hidden);
61af732203SDimitry Andric
62af732203SDimitry Andric namespace {
63af732203SDimitry Andric
64af732203SDimitry Andric class MachineFunctionSplitter : public MachineFunctionPass {
65af732203SDimitry Andric public:
66af732203SDimitry Andric static char ID;
MachineFunctionSplitter()67af732203SDimitry Andric MachineFunctionSplitter() : MachineFunctionPass(ID) {
68af732203SDimitry Andric initializeMachineFunctionSplitterPass(*PassRegistry::getPassRegistry());
69af732203SDimitry Andric }
70af732203SDimitry Andric
getPassName() const71af732203SDimitry Andric StringRef getPassName() const override {
72af732203SDimitry Andric return "Machine Function Splitter Transformation";
73af732203SDimitry Andric }
74af732203SDimitry Andric
75af732203SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override;
76af732203SDimitry Andric
77af732203SDimitry Andric bool runOnMachineFunction(MachineFunction &F) override;
78af732203SDimitry Andric };
79af732203SDimitry Andric } // end anonymous namespace
80af732203SDimitry Andric
isColdBlock(const MachineBasicBlock & MBB,const MachineBlockFrequencyInfo * MBFI,ProfileSummaryInfo * PSI)81*5f7ddb14SDimitry Andric static bool isColdBlock(const MachineBasicBlock &MBB,
82af732203SDimitry Andric const MachineBlockFrequencyInfo *MBFI,
83af732203SDimitry Andric ProfileSummaryInfo *PSI) {
84af732203SDimitry Andric Optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
85af732203SDimitry Andric if (!Count.hasValue())
86af732203SDimitry Andric return true;
87af732203SDimitry Andric
88af732203SDimitry Andric if (PercentileCutoff > 0) {
89af732203SDimitry Andric return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
90af732203SDimitry Andric }
91af732203SDimitry Andric return (*Count < ColdCountThreshold);
92af732203SDimitry Andric }
93af732203SDimitry Andric
runOnMachineFunction(MachineFunction & MF)94af732203SDimitry Andric bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
95af732203SDimitry Andric // TODO: We only target functions with profile data. Static information may
96af732203SDimitry Andric // also be considered but we don't see performance improvements yet.
97af732203SDimitry Andric if (!MF.getFunction().hasProfileData())
98af732203SDimitry Andric return false;
99af732203SDimitry Andric
100af732203SDimitry Andric // TODO: We don't split functions where a section attribute has been set
101af732203SDimitry Andric // since the split part may not be placed in a contiguous region. It may also
102af732203SDimitry Andric // be more beneficial to augment the linker to ensure contiguous layout of
103af732203SDimitry Andric // split functions within the same section as specified by the attribute.
104*5f7ddb14SDimitry Andric if (MF.getFunction().hasSection() ||
105*5f7ddb14SDimitry Andric MF.getFunction().hasFnAttribute("implicit-section-name"))
106af732203SDimitry Andric return false;
107af732203SDimitry Andric
108af732203SDimitry Andric // We don't want to proceed further for cold functions
109af732203SDimitry Andric // or functions of unknown hotness. Lukewarm functions have no prefix.
110af732203SDimitry Andric Optional<StringRef> SectionPrefix = MF.getFunction().getSectionPrefix();
111af732203SDimitry Andric if (SectionPrefix.hasValue() &&
112af732203SDimitry Andric (SectionPrefix.getValue().equals("unlikely") ||
113af732203SDimitry Andric SectionPrefix.getValue().equals("unknown"))) {
114af732203SDimitry Andric return false;
115af732203SDimitry Andric }
116af732203SDimitry Andric
117af732203SDimitry Andric // Renumbering blocks here preserves the order of the blocks as
118af732203SDimitry Andric // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
119af732203SDimitry Andric // blocks. Preserving the order of blocks is essential to retaining decisions
120af732203SDimitry Andric // made by prior passes such as MachineBlockPlacement.
121af732203SDimitry Andric MF.RenumberBlocks();
122af732203SDimitry Andric MF.setBBSectionsType(BasicBlockSection::Preset);
123af732203SDimitry Andric auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
124af732203SDimitry Andric auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
125af732203SDimitry Andric
126*5f7ddb14SDimitry Andric SmallVector<MachineBasicBlock *, 2> LandingPads;
127af732203SDimitry Andric for (auto &MBB : MF) {
128*5f7ddb14SDimitry Andric if (MBB.isEntryBlock())
129af732203SDimitry Andric continue;
130*5f7ddb14SDimitry Andric
131*5f7ddb14SDimitry Andric if (MBB.isEHPad())
132*5f7ddb14SDimitry Andric LandingPads.push_back(&MBB);
133*5f7ddb14SDimitry Andric else if (isColdBlock(MBB, MBFI, PSI))
134af732203SDimitry Andric MBB.setSectionID(MBBSectionID::ColdSectionID);
135af732203SDimitry Andric }
136af732203SDimitry Andric
137*5f7ddb14SDimitry Andric // We only split out eh pads if all of them are cold.
138*5f7ddb14SDimitry Andric bool HasHotLandingPads = false;
139*5f7ddb14SDimitry Andric for (const MachineBasicBlock *LP : LandingPads) {
140*5f7ddb14SDimitry Andric if (!isColdBlock(*LP, MBFI, PSI))
141*5f7ddb14SDimitry Andric HasHotLandingPads = true;
142*5f7ddb14SDimitry Andric }
143*5f7ddb14SDimitry Andric if (!HasHotLandingPads) {
144*5f7ddb14SDimitry Andric for (MachineBasicBlock *LP : LandingPads)
145*5f7ddb14SDimitry Andric LP->setSectionID(MBBSectionID::ColdSectionID);
146*5f7ddb14SDimitry Andric }
147*5f7ddb14SDimitry Andric
148af732203SDimitry Andric auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
149af732203SDimitry Andric return X.getSectionID().Type < Y.getSectionID().Type;
150af732203SDimitry Andric };
151af732203SDimitry Andric llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
152af732203SDimitry Andric
153af732203SDimitry Andric return true;
154af732203SDimitry Andric }
155af732203SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const156af732203SDimitry Andric void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
157af732203SDimitry Andric AU.addRequired<MachineModuleInfoWrapperPass>();
158af732203SDimitry Andric AU.addRequired<MachineBlockFrequencyInfo>();
159af732203SDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>();
160af732203SDimitry Andric }
161af732203SDimitry Andric
162af732203SDimitry Andric char MachineFunctionSplitter::ID = 0;
163af732203SDimitry Andric INITIALIZE_PASS(MachineFunctionSplitter, "machine-function-splitter",
164af732203SDimitry Andric "Split machine functions using profile information", false,
165af732203SDimitry Andric false)
166af732203SDimitry Andric
createMachineFunctionSplitterPass()167af732203SDimitry Andric MachineFunctionPass *llvm::createMachineFunctionSplitterPass() {
168af732203SDimitry Andric return new MachineFunctionSplitter();
169af732203SDimitry Andric }
170