1dc40be75SVolkan Keles //===- BlockExtractor.cpp - Extracts blocks into their own functions ------===//
2dc40be75SVolkan Keles //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6dc40be75SVolkan Keles //
7dc40be75SVolkan Keles //===----------------------------------------------------------------------===//
8dc40be75SVolkan Keles //
9dc40be75SVolkan Keles // This pass extracts the specified basic blocks from the module into their
10dc40be75SVolkan Keles // own functions.
11dc40be75SVolkan Keles //
12dc40be75SVolkan Keles //===----------------------------------------------------------------------===//
13dc40be75SVolkan Keles 
14dc40be75SVolkan Keles #include "llvm/ADT/STLExtras.h"
15dc40be75SVolkan Keles #include "llvm/ADT/Statistic.h"
16dc40be75SVolkan Keles #include "llvm/IR/Instructions.h"
17dc40be75SVolkan Keles #include "llvm/IR/Module.h"
18*05da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
19dc40be75SVolkan Keles #include "llvm/Pass.h"
20dc40be75SVolkan Keles #include "llvm/Support/CommandLine.h"
21dc40be75SVolkan Keles #include "llvm/Support/Debug.h"
22dc40be75SVolkan Keles #include "llvm/Support/MemoryBuffer.h"
23dc40be75SVolkan Keles #include "llvm/Transforms/IPO.h"
24dc40be75SVolkan Keles #include "llvm/Transforms/Utils/BasicBlockUtils.h"
25dc40be75SVolkan Keles #include "llvm/Transforms/Utils/CodeExtractor.h"
26ea3364bfSQuentin Colombet 
27dc40be75SVolkan Keles using namespace llvm;
28dc40be75SVolkan Keles 
29dc40be75SVolkan Keles #define DEBUG_TYPE "block-extractor"
30dc40be75SVolkan Keles 
31dc40be75SVolkan Keles STATISTIC(NumExtracted, "Number of basic blocks extracted");
32dc40be75SVolkan Keles 
33dc40be75SVolkan Keles static cl::opt<std::string> BlockExtractorFile(
34dc40be75SVolkan Keles     "extract-blocks-file", cl::value_desc("filename"),
35dc40be75SVolkan Keles     cl::desc("A file containing list of basic blocks to extract"), cl::Hidden);
36dc40be75SVolkan Keles 
37dc40be75SVolkan Keles cl::opt<bool> BlockExtractorEraseFuncs("extract-blocks-erase-funcs",
38dc40be75SVolkan Keles                                        cl::desc("Erase the existing functions"),
39dc40be75SVolkan Keles                                        cl::Hidden);
40dc40be75SVolkan Keles namespace {
41dc40be75SVolkan Keles class BlockExtractor : public ModulePass {
42ea3364bfSQuentin Colombet   SmallVector<SmallVector<BasicBlock *, 16>, 4> GroupsOfBlocks;
43dc40be75SVolkan Keles   bool EraseFunctions;
44ea3364bfSQuentin Colombet   /// Map a function name to groups of blocks.
45ea3364bfSQuentin Colombet   SmallVector<std::pair<std::string, SmallVector<std::string, 4>>, 4>
46ea3364bfSQuentin Colombet       BlocksByName;
47dc40be75SVolkan Keles 
4831ce2742SQuentin Colombet   void init(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
4931ce2742SQuentin Colombet                 &GroupsOfBlocksToExtract) {
5031ce2742SQuentin Colombet     for (const SmallVectorImpl<BasicBlock *> &GroupOfBlocks :
5131ce2742SQuentin Colombet          GroupsOfBlocksToExtract) {
5231ce2742SQuentin Colombet       SmallVector<BasicBlock *, 16> NewGroup;
5331ce2742SQuentin Colombet       NewGroup.append(GroupOfBlocks.begin(), GroupOfBlocks.end());
5431ce2742SQuentin Colombet       GroupsOfBlocks.emplace_back(NewGroup);
5531ce2742SQuentin Colombet     }
5631ce2742SQuentin Colombet     if (!BlockExtractorFile.empty())
5731ce2742SQuentin Colombet       loadFile();
5831ce2742SQuentin Colombet   }
5931ce2742SQuentin Colombet 
60dc40be75SVolkan Keles public:
61dc40be75SVolkan Keles   static char ID;
62dc40be75SVolkan Keles   BlockExtractor(const SmallVectorImpl<BasicBlock *> &BlocksToExtract,
63dc40be75SVolkan Keles                  bool EraseFunctions)
64ea3364bfSQuentin Colombet       : ModulePass(ID), EraseFunctions(EraseFunctions) {
65ea3364bfSQuentin Colombet     // We want one group per element of the input list.
6631ce2742SQuentin Colombet     SmallVector<SmallVector<BasicBlock *, 16>, 4> MassagedGroupsOfBlocks;
67ea3364bfSQuentin Colombet     for (BasicBlock *BB : BlocksToExtract) {
68ea3364bfSQuentin Colombet       SmallVector<BasicBlock *, 16> NewGroup;
69ea3364bfSQuentin Colombet       NewGroup.push_back(BB);
7031ce2742SQuentin Colombet       MassagedGroupsOfBlocks.push_back(NewGroup);
71ea3364bfSQuentin Colombet     }
7231ce2742SQuentin Colombet     init(MassagedGroupsOfBlocks);
73dc40be75SVolkan Keles   }
7431ce2742SQuentin Colombet 
7531ce2742SQuentin Colombet   BlockExtractor(const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
7631ce2742SQuentin Colombet                      &GroupsOfBlocksToExtract,
7731ce2742SQuentin Colombet                  bool EraseFunctions)
7831ce2742SQuentin Colombet       : ModulePass(ID), EraseFunctions(EraseFunctions) {
7931ce2742SQuentin Colombet     init(GroupsOfBlocksToExtract);
8031ce2742SQuentin Colombet   }
8131ce2742SQuentin Colombet 
82dc40be75SVolkan Keles   BlockExtractor() : BlockExtractor(SmallVector<BasicBlock *, 0>(), false) {}
83dc40be75SVolkan Keles   bool runOnModule(Module &M) override;
84dc40be75SVolkan Keles 
85dc40be75SVolkan Keles private:
86dc40be75SVolkan Keles   void loadFile();
87dc40be75SVolkan Keles   void splitLandingPadPreds(Function &F);
88dc40be75SVolkan Keles };
89dc40be75SVolkan Keles } // end anonymous namespace
90dc40be75SVolkan Keles 
91dc40be75SVolkan Keles char BlockExtractor::ID = 0;
92dc40be75SVolkan Keles INITIALIZE_PASS(BlockExtractor, "extract-blocks",
93dc40be75SVolkan Keles                 "Extract basic blocks from module", false, false)
94dc40be75SVolkan Keles 
95dc40be75SVolkan Keles ModulePass *llvm::createBlockExtractorPass() { return new BlockExtractor(); }
96dc40be75SVolkan Keles ModulePass *llvm::createBlockExtractorPass(
97dc40be75SVolkan Keles     const SmallVectorImpl<BasicBlock *> &BlocksToExtract, bool EraseFunctions) {
98dc40be75SVolkan Keles   return new BlockExtractor(BlocksToExtract, EraseFunctions);
99dc40be75SVolkan Keles }
10031ce2742SQuentin Colombet ModulePass *llvm::createBlockExtractorPass(
10131ce2742SQuentin Colombet     const SmallVectorImpl<SmallVector<BasicBlock *, 16>>
10231ce2742SQuentin Colombet         &GroupsOfBlocksToExtract,
10331ce2742SQuentin Colombet     bool EraseFunctions) {
10431ce2742SQuentin Colombet   return new BlockExtractor(GroupsOfBlocksToExtract, EraseFunctions);
10531ce2742SQuentin Colombet }
106dc40be75SVolkan Keles 
107dc40be75SVolkan Keles /// Gets all of the blocks specified in the input file.
108dc40be75SVolkan Keles void BlockExtractor::loadFile() {
109dc40be75SVolkan Keles   auto ErrOrBuf = MemoryBuffer::getFile(BlockExtractorFile);
110ebf34ea3SVolkan Keles   if (ErrOrBuf.getError())
111dc40be75SVolkan Keles     report_fatal_error("BlockExtractor couldn't load the file.");
112dc40be75SVolkan Keles   // Read the file.
113dc40be75SVolkan Keles   auto &Buf = *ErrOrBuf;
114dc40be75SVolkan Keles   SmallVector<StringRef, 16> Lines;
115dc40be75SVolkan Keles   Buf->getBuffer().split(Lines, '\n', /*MaxSplit=*/-1,
116dc40be75SVolkan Keles                          /*KeepEmpty=*/false);
117dc40be75SVolkan Keles   for (const auto &Line : Lines) {
118ea3364bfSQuentin Colombet     SmallVector<StringRef, 4> LineSplit;
119ea3364bfSQuentin Colombet     Line.split(LineSplit, ' ', /*MaxSplit=*/-1,
120ea3364bfSQuentin Colombet                /*KeepEmpty=*/false);
121ea3364bfSQuentin Colombet     if (LineSplit.empty())
122ea3364bfSQuentin Colombet       continue;
123cda334baSJinsong Ji     if (LineSplit.size()!=2)
124cda334baSJinsong Ji       report_fatal_error("Invalid line format, expecting lines like: 'funcname bb1[;bb2..]'");
125ea3364bfSQuentin Colombet     SmallVector<StringRef, 4> BBNames;
126ae2cbb34SQuentin Colombet     LineSplit[1].split(BBNames, ';', /*MaxSplit=*/-1,
127ea3364bfSQuentin Colombet                        /*KeepEmpty=*/false);
128ea3364bfSQuentin Colombet     if (BBNames.empty())
129ea3364bfSQuentin Colombet       report_fatal_error("Missing bbs name");
130ea3364bfSQuentin Colombet     BlocksByName.push_back({LineSplit[0], {BBNames.begin(), BBNames.end()}});
131dc40be75SVolkan Keles   }
132dc40be75SVolkan Keles }
133dc40be75SVolkan Keles 
134dc40be75SVolkan Keles /// Extracts the landing pads to make sure all of them have only one
135dc40be75SVolkan Keles /// predecessor.
136dc40be75SVolkan Keles void BlockExtractor::splitLandingPadPreds(Function &F) {
137dc40be75SVolkan Keles   for (BasicBlock &BB : F) {
138dc40be75SVolkan Keles     for (Instruction &I : BB) {
139dc40be75SVolkan Keles       if (!isa<InvokeInst>(&I))
140dc40be75SVolkan Keles         continue;
141dc40be75SVolkan Keles       InvokeInst *II = cast<InvokeInst>(&I);
142dc40be75SVolkan Keles       BasicBlock *Parent = II->getParent();
143dc40be75SVolkan Keles       BasicBlock *LPad = II->getUnwindDest();
144dc40be75SVolkan Keles 
145dc40be75SVolkan Keles       // Look through the landing pad's predecessors. If one of them ends in an
146dc40be75SVolkan Keles       // 'invoke', then we want to split the landing pad.
147dc40be75SVolkan Keles       bool Split = false;
148dc40be75SVolkan Keles       for (auto PredBB : predecessors(LPad)) {
149dc40be75SVolkan Keles         if (PredBB->isLandingPad() && PredBB != Parent &&
150dc40be75SVolkan Keles             isa<InvokeInst>(Parent->getTerminator())) {
151dc40be75SVolkan Keles           Split = true;
152dc40be75SVolkan Keles           break;
153dc40be75SVolkan Keles         }
154dc40be75SVolkan Keles       }
155dc40be75SVolkan Keles 
156dc40be75SVolkan Keles       if (!Split)
157dc40be75SVolkan Keles         continue;
158dc40be75SVolkan Keles 
159dc40be75SVolkan Keles       SmallVector<BasicBlock *, 2> NewBBs;
160dc40be75SVolkan Keles       SplitLandingPadPredecessors(LPad, Parent, ".1", ".2", NewBBs);
161dc40be75SVolkan Keles     }
162dc40be75SVolkan Keles   }
163dc40be75SVolkan Keles }
164dc40be75SVolkan Keles 
165dc40be75SVolkan Keles bool BlockExtractor::runOnModule(Module &M) {
166dc40be75SVolkan Keles 
167dc40be75SVolkan Keles   bool Changed = false;
168dc40be75SVolkan Keles 
169dc40be75SVolkan Keles   // Get all the functions.
170dc40be75SVolkan Keles   SmallVector<Function *, 4> Functions;
171dc40be75SVolkan Keles   for (Function &F : M) {
172dc40be75SVolkan Keles     splitLandingPadPreds(F);
173dc40be75SVolkan Keles     Functions.push_back(&F);
174dc40be75SVolkan Keles   }
175dc40be75SVolkan Keles 
176dc40be75SVolkan Keles   // Get all the blocks specified in the input file.
177ea3364bfSQuentin Colombet   unsigned NextGroupIdx = GroupsOfBlocks.size();
178ea3364bfSQuentin Colombet   GroupsOfBlocks.resize(NextGroupIdx + BlocksByName.size());
179dc40be75SVolkan Keles   for (const auto &BInfo : BlocksByName) {
180dc40be75SVolkan Keles     Function *F = M.getFunction(BInfo.first);
181dc40be75SVolkan Keles     if (!F)
182dc40be75SVolkan Keles       report_fatal_error("Invalid function name specified in the input file");
183ea3364bfSQuentin Colombet     for (const auto &BBInfo : BInfo.second) {
184dc40be75SVolkan Keles       auto Res = llvm::find_if(*F, [&](const BasicBlock &BB) {
185ea3364bfSQuentin Colombet         return BB.getName().equals(BBInfo);
186dc40be75SVolkan Keles       });
187dc40be75SVolkan Keles       if (Res == F->end())
188dc40be75SVolkan Keles         report_fatal_error("Invalid block name specified in the input file");
189ea3364bfSQuentin Colombet       GroupsOfBlocks[NextGroupIdx].push_back(&*Res);
190ea3364bfSQuentin Colombet     }
191ea3364bfSQuentin Colombet     ++NextGroupIdx;
192dc40be75SVolkan Keles   }
193dc40be75SVolkan Keles 
194ea3364bfSQuentin Colombet   // Extract each group of basic blocks.
195ea3364bfSQuentin Colombet   for (auto &BBs : GroupsOfBlocks) {
196ea3364bfSQuentin Colombet     SmallVector<BasicBlock *, 32> BlocksToExtractVec;
197ea3364bfSQuentin Colombet     for (BasicBlock *BB : BBs) {
198dc40be75SVolkan Keles       // Check if the module contains BB.
199dc40be75SVolkan Keles       if (BB->getParent()->getParent() != &M)
200dc40be75SVolkan Keles         report_fatal_error("Invalid basic block");
201d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "BlockExtractor: Extracting "
202d34e60caSNicola Zaghen                         << BB->getParent()->getName() << ":" << BB->getName()
203d34e60caSNicola Zaghen                         << "\n");
204dc40be75SVolkan Keles       BlocksToExtractVec.push_back(BB);
205dc40be75SVolkan Keles       if (const InvokeInst *II = dyn_cast<InvokeInst>(BB->getTerminator()))
206dc40be75SVolkan Keles         BlocksToExtractVec.push_back(II->getUnwindDest());
207dc40be75SVolkan Keles       ++NumExtracted;
208dc40be75SVolkan Keles       Changed = true;
209dc40be75SVolkan Keles     }
2109852699dSVedant Kumar     CodeExtractorAnalysisCache CEAC(*BBs[0]->getParent());
2119852699dSVedant Kumar     Function *F = CodeExtractor(BlocksToExtractVec).extractCodeRegion(CEAC);
212ea3364bfSQuentin Colombet     if (F)
213ea3364bfSQuentin Colombet       LLVM_DEBUG(dbgs() << "Extracted group '" << (*BBs.begin())->getName()
214ea3364bfSQuentin Colombet                         << "' in: " << F->getName() << '\n');
215ea3364bfSQuentin Colombet     else
216ea3364bfSQuentin Colombet       LLVM_DEBUG(dbgs() << "Failed to extract for group '"
217ea3364bfSQuentin Colombet                         << (*BBs.begin())->getName() << "'\n");
218ea3364bfSQuentin Colombet   }
219dc40be75SVolkan Keles 
220dc40be75SVolkan Keles   // Erase the functions.
221dc40be75SVolkan Keles   if (EraseFunctions || BlockExtractorEraseFuncs) {
222dc40be75SVolkan Keles     for (Function *F : Functions) {
223d34e60caSNicola Zaghen       LLVM_DEBUG(dbgs() << "BlockExtractor: Trying to delete " << F->getName()
2244ecdb44aSVolkan Keles                         << "\n");
2254ecdb44aSVolkan Keles       F->deleteBody();
226dc40be75SVolkan Keles     }
227dc40be75SVolkan Keles     // Set linkage as ExternalLinkage to avoid erasing unreachable functions.
228dc40be75SVolkan Keles     for (Function &F : M)
229dc40be75SVolkan Keles       F.setLinkage(GlobalValue::ExternalLinkage);
230dc40be75SVolkan Keles     Changed = true;
231dc40be75SVolkan Keles   }
232dc40be75SVolkan Keles 
233dc40be75SVolkan Keles   return Changed;
234dc40be75SVolkan Keles }
235