1dc40be75SVolkan Keles //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
2dc40be75SVolkan Keles //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dc40be75SVolkan Keles //
7dc40be75SVolkan Keles //===----------------------------------------------------------------------===//
8dc40be75SVolkan Keles //
9dc40be75SVolkan Keles // This pass extracts the specified basic blocks from the module into their
10dc40be75SVolkan Keles // own functions.
11dc40be75SVolkan Keles //
12dc40be75SVolkan Keles //===----------------------------------------------------------------------===//
13dc40be75SVolkan Keles
148d9466a3SArthur Eubanks #include "llvm/Transforms/IPO/BlockExtractor.h"
15dc40be75SVolkan Keles #include "llvm/ADT/STLExtras.h"
16dc40be75SVolkan Keles #include "llvm/ADT/Statistic.h"
17dc40be75SVolkan Keles #include "llvm/IR/Instructions.h"
18dc40be75SVolkan Keles #include "llvm/IR/Module.h"
198d9466a3SArthur Eubanks #include "llvm/IR/PassManager.h"
2005da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
21dc40be75SVolkan Keles #include "llvm/Pass.h"
22dc40be75SVolkan Keles #include "llvm/Support/CommandLine.h"
23dc40be75SVolkan Keles #include "llvm/Support/Debug.h"
24dc40be75SVolkan Keles #include "llvm/Support/MemoryBuffer.h"
25dc40be75SVolkan Keles #include "llvm/Transforms/IPO.h"
26dc40be75SVolkan Keles #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27dc40be75SVolkan Keles #include "llvm/Transforms/Utils/CodeExtractor.h"
28ea3364bfSQuentin Colombet
29dc40be75SVolkan Keles using namespace llvm;
30dc40be75SVolkan Keles
31dc40be75SVolkan Keles #define DEBUG_TYPE "block-extractor"
32dc40be75SVolkan Keles
33dc40be75SVolkan Keles STATISTIC(NumExtracted, "Number of basic blocks extracted");
34dc40be75SVolkan Keles
35dc40be75SVolkan Keles static cl::opt<std::string> BlockExtractorFile(
36dc40be75SVolkan Keles "extract-blocks-file", cl::value_desc("filename"),
37dc40be75SVolkan Keles cl::desc("A file containing list of basic blocks to extract"), cl::Hidden);
38dc40be75SVolkan Keles
39d8aba75aSFangrui Song static cl::opt<bool>
40d8aba75aSFangrui Song BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
41dc40be75SVolkan Keles cl::desc("Erase the existing functions"),
42dc40be75SVolkan Keles cl::Hidden);
43dc40be75SVolkan Keles namespace {
448d9466a3SArthur Eubanks class BlockExtractor {
458d9466a3SArthur Eubanks public:
BlockExtractor(bool EraseFunctions)468d9466a3SArthur Eubanks BlockExtractor(bool EraseFunctions) : EraseFunctions(EraseFunctions) {}
478d9466a3SArthur Eubanks bool runOnModule(Module &M);
init(const SmallVectorImpl<SmallVector<BasicBlock *,16>> & GroupsOfBlocksToExtract)4831ce2742SQuentin Colombet void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
4931ce2742SQuentin Colombet &GroupsOfBlocksToExtract) {
5031ce2742SQuentin Colombet for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
5131ce2742SQuentin Colombet GroupsOfBlocksToExtract) {
5231ce2742SQuentin Colombet SmallVector<BasicBlock *, 16> NewGroup;
5331ce2742SQuentin Colombet NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end());
5431ce2742SQuentin Colombet GroupsOfBlocks.emplace_back(NewGroup);
5531ce2742SQuentin Colombet }
5631ce2742SQuentin Colombet if (!BlockExtractorFile.empty())
5731ce2742SQuentin Colombet loadFile();
5831ce2742SQuentin Colombet }
5931ce2742SQuentin Colombet
608d9466a3SArthur Eubanks private:
618d9466a3SArthur Eubanks SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
628d9466a3SArthur Eubanks bool EraseFunctions;
638d9466a3SArthur Eubanks /// Map a function name to groups of blocks.
648d9466a3SArthur Eubanks SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
658d9466a3SArthur Eubanks BlocksByName;
668d9466a3SArthur Eubanks
678d9466a3SArthur Eubanks void loadFile();
688d9466a3SArthur Eubanks void splitLandingPadPreds(Function &F);
698d9466a3SArthur Eubanks };
708d9466a3SArthur Eubanks
718d9466a3SArthur Eubanks class BlockExtractorLegacyPass : public ModulePass {
728d9466a3SArthur Eubanks BlockExtractor BE;
738d9466a3SArthur Eubanks bool runOnModule(Module &M) override;
748d9466a3SArthur Eubanks
75dc40be75SVolkan Keles public:
76dc40be75SVolkan Keles static char ID;
BlockExtractorLegacyPass(const SmallVectorImpl<BasicBlock * > & BlocksToExtract,bool EraseFunctions)778d9466a3SArthur Eubanks BlockExtractorLegacyPass(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
78dc40be75SVolkan Keles bool EraseFunctions)
798d9466a3SArthur Eubanks : ModulePass(ID), BE(EraseFunctions) {
80ea3364bfSQuentin Colombet // We want one group per element of the input list.
8131ce2742SQuentin Colombet SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
82ea3364bfSQuentin Colombet for (BasicBlock *BB : BlocksToExtract) {
83ea3364bfSQuentin Colombet SmallVector<BasicBlock *, 16> NewGroup;
84ea3364bfSQuentin Colombet NewGroup.push_back(BB);
8531ce2742SQuentin Colombet MassagedGroupsOfBlocks.push_back(NewGroup);
86ea3364bfSQuentin Colombet }
878d9466a3SArthur Eubanks BE.init(MassagedGroupsOfBlocks);
88dc40be75SVolkan Keles }
8931ce2742SQuentin Colombet
BlockExtractorLegacyPass(const SmallVectorImpl<SmallVector<BasicBlock *,16>> & GroupsOfBlocksToExtract,bool EraseFunctions)908d9466a3SArthur Eubanks BlockExtractorLegacyPass(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
9131ce2742SQuentin Colombet &GroupsOfBlocksToExtract,
9231ce2742SQuentin Colombet bool EraseFunctions)
938d9466a3SArthur Eubanks : ModulePass(ID), BE(EraseFunctions) {
948d9466a3SArthur Eubanks BE.init(GroupsOfBlocksToExtract);
9531ce2742SQuentin Colombet }
9631ce2742SQuentin Colombet
BlockExtractorLegacyPass()978d9466a3SArthur Eubanks BlockExtractorLegacyPass()
988d9466a3SArthur Eubanks : BlockExtractorLegacyPass(SmallVector<BasicBlock *, 0>(), false) {}
99dc40be75SVolkan Keles };
1008d9466a3SArthur Eubanks
101dc40be75SVolkan Keles } // end anonymous namespace
102dc40be75SVolkan Keles
1038d9466a3SArthur Eubanks char BlockExtractorLegacyPass::ID = 0;
1048d9466a3SArthur Eubanks INITIALIZE_PASS(BlockExtractorLegacyPass, "extract-blocks",
105dc40be75SVolkan Keles "Extract basic blocks from module", false, false)
106dc40be75SVolkan Keles
createBlockExtractorPass()1078d9466a3SArthur Eubanks ModulePass *llvm::createBlockExtractorPass() {
1088d9466a3SArthur Eubanks return new BlockExtractorLegacyPass();
1098d9466a3SArthur Eubanks }
createBlockExtractorPass(const SmallVectorImpl<BasicBlock * > & BlocksToExtract,bool EraseFunctions)110dc40be75SVolkan Keles ModulePass *llvm::createBlockExtractorPass(
111dc40be75SVolkan Keles const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
1128d9466a3SArthur Eubanks return new BlockExtractorLegacyPass(BlocksToExtract, EraseFunctions);
113dc40be75SVolkan Keles }
createBlockExtractorPass(const SmallVectorImpl<SmallVector<BasicBlock *,16>> & GroupsOfBlocksToExtract,bool EraseFunctions)11431ce2742SQuentin Colombet ModulePass *llvm::createBlockExtractorPass(
11531ce2742SQuentin Colombet const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
11631ce2742SQuentin Colombet &GroupsOfBlocksToExtract,
11731ce2742SQuentin Colombet bool EraseFunctions) {
1188d9466a3SArthur Eubanks return new BlockExtractorLegacyPass(GroupsOfBlocksToExtract, EraseFunctions);
11931ce2742SQuentin Colombet }
120dc40be75SVolkan Keles
121dc40be75SVolkan Keles /// Gets all of the blocks specified in the input file.
loadFile()122dc40be75SVolkan Keles void BlockExtractor::loadFile() {
123dc40be75SVolkan Keles auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile);
124ebf34ea3SVolkan Keles if (ErrOrBuf.getError())
125dc40be75SVolkan Keles report_fatal_error("BlockExtractor couldn't load the file.");
126dc40be75SVolkan Keles // Read the file.
127dc40be75SVolkan Keles auto &Buf = *ErrOrBuf;
128dc40be75SVolkan Keles SmallVector<StringRef, 16> Lines;
129dc40be75SVolkan Keles Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
130dc40be75SVolkan Keles /*KeepEmpty=*/false);
131dc40be75SVolkan Keles for (const auto &Line : Lines) {
132ea3364bfSQuentin Colombet SmallVector<StringRef, 4> LineSplit;
133ea3364bfSQuentin Colombet Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
134ea3364bfSQuentin Colombet /*KeepEmpty=*/false);
135ea3364bfSQuentin Colombet if (LineSplit.empty())
136ea3364bfSQuentin Colombet continue;
137cda334baSJinsong Ji if (LineSplit.size()!=2)
138*80b3dcc0SNuno Lopes report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'",
139*80b3dcc0SNuno Lopes /*GenCrashDiag=*/false);
140ea3364bfSQuentin Colombet SmallVector<StringRef, 4> BBNames;
141ae2cbb34SQuentin Colombet LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
142ea3364bfSQuentin Colombet /*KeepEmpty=*/false);
143ea3364bfSQuentin Colombet if (BBNames.empty())
144ea3364bfSQuentin Colombet report_fatal_error("Missing bbs name");
145adcd0268SBenjamin Kramer BlocksByName.push_back(
146adcd0268SBenjamin Kramer {std::string(LineSplit[0]), {BBNames.begin(), BBNames.end()}});
147dc40be75SVolkan Keles }
148dc40be75SVolkan Keles }
149dc40be75SVolkan Keles
150dc40be75SVolkan Keles /// Extracts the landing pads to make sure all of them have only one
151dc40be75SVolkan Keles /// predecessor.
splitLandingPadPreds(Function & F)152dc40be75SVolkan Keles void BlockExtractor::splitLandingPadPreds(Function &F) {
153dc40be75SVolkan Keles for (BasicBlock &BB : F) {
154dc40be75SVolkan Keles for (Instruction &I : BB) {
155dc40be75SVolkan Keles if (!isa<InvokeInst>(&I))
156dc40be75SVolkan Keles continue;
157dc40be75SVolkan Keles InvokeInst *II = cast<InvokeInst>(&I);
158dc40be75SVolkan Keles BasicBlock *Parent = II->getParent();
159dc40be75SVolkan Keles BasicBlock *LPad = II->getUnwindDest();
160dc40be75SVolkan Keles
161dc40be75SVolkan Keles // Look through the landing pad's predecessors. If one of them ends in an
162dc40be75SVolkan Keles // 'invoke', then we want to split the landing pad.
163dc40be75SVolkan Keles bool Split = false;
164dc40be75SVolkan Keles for (auto PredBB : predecessors(LPad)) {
165dc40be75SVolkan Keles if (PredBB->isLandingPad() && PredBB != Parent &&
166dc40be75SVolkan Keles isa<InvokeInst>(Parent->getTerminator())) {
167dc40be75SVolkan Keles Split = true;
168dc40be75SVolkan Keles break;
169dc40be75SVolkan Keles }
170dc40be75SVolkan Keles }
171dc40be75SVolkan Keles
172dc40be75SVolkan Keles if (!Split)
173dc40be75SVolkan Keles continue;
174dc40be75SVolkan Keles
175dc40be75SVolkan Keles SmallVector<BasicBlock *, 2> NewBBs;
176dc40be75SVolkan Keles SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
177dc40be75SVolkan Keles }
178dc40be75SVolkan Keles }
179dc40be75SVolkan Keles }
180dc40be75SVolkan Keles
runOnModule(Module & M)181dc40be75SVolkan Keles bool BlockExtractor::runOnModule(Module &M) {
182dc40be75SVolkan Keles
183dc40be75SVolkan Keles bool Changed = false;
184dc40be75SVolkan Keles
185dc40be75SVolkan Keles // Get all the functions.
186dc40be75SVolkan Keles SmallVector<Function *, 4> Functions;
187dc40be75SVolkan Keles for (Function &F : M) {
188dc40be75SVolkan Keles splitLandingPadPreds(F);
189dc40be75SVolkan Keles Functions.push_back(&F);
190dc40be75SVolkan Keles }
191dc40be75SVolkan Keles
192dc40be75SVolkan Keles // Get all the blocks specified in the input file.
193ea3364bfSQuentin Colombet unsigned NextGroupIdx = GroupsOfBlocks.size();
194ea3364bfSQuentin Colombet GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
195dc40be75SVolkan Keles for (const auto &BInfo : BlocksByName) {
196dc40be75SVolkan Keles Function *F = M.getFunction(BInfo.first);
197dc40be75SVolkan Keles if (!F)
198*80b3dcc0SNuno Lopes report_fatal_error("Invalid function name specified in the input file",
199*80b3dcc0SNuno Lopes /*GenCrashDiag=*/false);
200ea3364bfSQuentin Colombet for (const auto &BBInfo : BInfo.second) {
201dc40be75SVolkan Keles auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
202ea3364bfSQuentin Colombet return BB.getName().equals(BBInfo);
203dc40be75SVolkan Keles });
204dc40be75SVolkan Keles if (Res == F->end())
205*80b3dcc0SNuno Lopes report_fatal_error("Invalid block name specified in the input file",
206*80b3dcc0SNuno Lopes /*GenCrashDiag=*/false);
207ea3364bfSQuentin Colombet GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
208ea3364bfSQuentin Colombet }
209ea3364bfSQuentin Colombet ++NextGroupIdx;
210dc40be75SVolkan Keles }
211dc40be75SVolkan Keles
212ea3364bfSQuentin Colombet // Extract each group of basic blocks.
213ea3364bfSQuentin Colombet for (auto &BBs : GroupsOfBlocks) {
214ea3364bfSQuentin Colombet SmallVector<BasicBlock *, 32> BlocksToExtractVec;
215ea3364bfSQuentin Colombet for (BasicBlock *BB : BBs) {
216dc40be75SVolkan Keles // Check if the module contains BB.
217dc40be75SVolkan Keles if (BB->getParent()->getParent() != &M)
218*80b3dcc0SNuno Lopes report_fatal_error("Invalid basic block", /*GenCrashDiag=*/false);
219d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
220d34e60caSNicola Zaghen << BB->getParent()->getName() << ":" << BB->getName()
221d34e60caSNicola Zaghen << "\n");
222dc40be75SVolkan Keles BlocksToExtractVec.push_back(BB);
223dc40be75SVolkan Keles if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
224dc40be75SVolkan Keles BlocksToExtractVec.push_back(II->getUnwindDest());
225dc40be75SVolkan Keles ++NumExtracted;
226dc40be75SVolkan Keles Changed = true;
227dc40be75SVolkan Keles }
2289852699dSVedant Kumar CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
2299852699dSVedant Kumar Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
230ea3364bfSQuentin Colombet if (F)
231ea3364bfSQuentin Colombet LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
232ea3364bfSQuentin Colombet << "' in: " << F->getName() << '\n');
233ea3364bfSQuentin Colombet else
234ea3364bfSQuentin Colombet LLVM_DEBUG(dbgs() << "Failed to extract for group '"
235ea3364bfSQuentin Colombet << (*BBs.begin())->getName() << "'\n");
236ea3364bfSQuentin Colombet }
237dc40be75SVolkan Keles
238dc40be75SVolkan Keles // Erase the functions.
239dc40be75SVolkan Keles if (EraseFunctions || BlockExtractorEraseFuncs) {
240dc40be75SVolkan Keles for (Function *F : Functions) {
241d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName()
2424ecdb44aSVolkan Keles << "\n");
2434ecdb44aSVolkan Keles F->deleteBody();
244dc40be75SVolkan Keles }
245dc40be75SVolkan Keles // Set linkage as ExternalLinkage to avoid erasing unreachable functions.
246dc40be75SVolkan Keles for (Function &F : M)
247dc40be75SVolkan Keles F.setLinkage(GlobalValue::ExternalLinkage);
248dc40be75SVolkan Keles Changed = true;
249dc40be75SVolkan Keles }
250dc40be75SVolkan Keles
251dc40be75SVolkan Keles return Changed;
252dc40be75SVolkan Keles }
2538d9466a3SArthur Eubanks
runOnModule(Module & M)2548d9466a3SArthur Eubanks bool BlockExtractorLegacyPass::runOnModule(Module &M) {
2558d9466a3SArthur Eubanks return BE.runOnModule(M);
2568d9466a3SArthur Eubanks }
2578d9466a3SArthur Eubanks
run(Module & M,ModuleAnalysisManager & AM)2588d9466a3SArthur Eubanks PreservedAnalyses BlockExtractorPass::run(Module &M,
2598d9466a3SArthur Eubanks ModuleAnalysisManager &AM) {
2608d9466a3SArthur Eubanks BlockExtractor BE(false);
2618d9466a3SArthur Eubanks BE.init(SmallVector<SmallVector<BasicBlock *, 16>, 0>());
2628d9466a3SArthur Eubanks return BE.runOnModule(M) ? PreservedAnalyses::none()
2638d9466a3SArthur Eubanks : PreservedAnalyses::all();
2648d9466a3SArthur Eubanks }
265