1*2f09f445SMaksim Panchenko //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
9*2f09f445SMaksim Panchenko // Implementation of the class that manages parallel work on BinaryFunctions.
10*2f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler 
13a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "llvm/Support/ThreadPool.h"
17a34c753fSRafael Auler #include "llvm/Support/Timer.h"
18a34c753fSRafael Auler #include <mutex>
19a34c753fSRafael Auler #include <shared_mutex>
20a34c753fSRafael Auler 
21a34c753fSRafael Auler #define DEBUG_TYPE "par-utils"
22a34c753fSRafael Auler 
23a34c753fSRafael Auler namespace opts {
24a34c753fSRafael Auler extern cl::OptionCategory BoltCategory;
25a34c753fSRafael Auler 
26a34c753fSRafael Auler cl::opt<unsigned>
27a34c753fSRafael Auler ThreadCount("thread-count",
28a34c753fSRafael Auler   cl::desc("number of threads"),
29a34c753fSRafael Auler   cl::init(hardware_concurrency().compute_thread_count()),
30a34c753fSRafael Auler   cl::cat(BoltCategory));
31a34c753fSRafael Auler 
32a34c753fSRafael Auler cl::opt<bool>
33a34c753fSRafael Auler NoThreads("no-threads",
34a34c753fSRafael Auler   cl::desc("disable multithreading"),
35a34c753fSRafael Auler   cl::init(false),
36a34c753fSRafael Auler   cl::cat(BoltCategory));
37a34c753fSRafael Auler 
38a34c753fSRafael Auler cl::opt<unsigned>
39a34c753fSRafael Auler TaskCount("tasks-per-thread",
40a34c753fSRafael Auler   cl::desc("number of tasks to be created per thread"),
41a34c753fSRafael Auler   cl::init(20),
42a34c753fSRafael Auler   cl::cat(BoltCategory));
43a34c753fSRafael Auler 
44a34c753fSRafael Auler } // namespace opts
45a34c753fSRafael Auler 
46a34c753fSRafael Auler namespace llvm {
47a34c753fSRafael Auler namespace bolt {
48a34c753fSRafael Auler namespace ParallelUtilities {
49a34c753fSRafael Auler 
50a34c753fSRafael Auler namespace {
51a34c753fSRafael Auler /// A single thread pool that is used to run parallel tasks
52a34c753fSRafael Auler std::unique_ptr<ThreadPool> ThreadPoolPtr;
53a34c753fSRafael Auler 
computeCostFor(const BinaryFunction & BF,const PredicateTy & SkipPredicate,const SchedulingPolicy & SchedPolicy)54a34c753fSRafael Auler unsigned computeCostFor(const BinaryFunction &BF,
55a34c753fSRafael Auler                         const PredicateTy &SkipPredicate,
56a34c753fSRafael Auler                         const SchedulingPolicy &SchedPolicy) {
57a34c753fSRafael Auler   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58a34c753fSRafael Auler     return 1;
59a34c753fSRafael Auler 
60a34c753fSRafael Auler   if (SkipPredicate && SkipPredicate(BF))
61a34c753fSRafael Auler     return 0;
62a34c753fSRafael Auler 
63a34c753fSRafael Auler   switch (SchedPolicy) {
64a34c753fSRafael Auler   case SchedulingPolicy::SP_CONSTANT:
65a34c753fSRafael Auler     return 1;
66a34c753fSRafael Auler   case SchedulingPolicy::SP_INST_LINEAR:
67a34c753fSRafael Auler     return BF.getSize();
68a34c753fSRafael Auler   case SchedulingPolicy::SP_INST_QUADRATIC:
69a34c753fSRafael Auler     return BF.getSize() * BF.getSize();
70a34c753fSRafael Auler   case SchedulingPolicy::SP_BB_LINEAR:
71a34c753fSRafael Auler     return BF.size();
72a34c753fSRafael Auler   case SchedulingPolicy::SP_BB_QUADRATIC:
73a34c753fSRafael Auler     return BF.size() * BF.size();
74a34c753fSRafael Auler   default:
75a34c753fSRafael Auler     llvm_unreachable("unsupported scheduling policy");
76a34c753fSRafael Auler   }
77a34c753fSRafael Auler }
78a34c753fSRafael Auler 
estimateTotalCost(const BinaryContext & BC,const PredicateTy & SkipPredicate,SchedulingPolicy & SchedPolicy)79a34c753fSRafael Auler inline unsigned estimateTotalCost(const BinaryContext &BC,
80a34c753fSRafael Auler                                   const PredicateTy &SkipPredicate,
81a34c753fSRafael Auler                                   SchedulingPolicy &SchedPolicy) {
82a34c753fSRafael Auler   if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83a34c753fSRafael Auler     return BC.getBinaryFunctions().size();
84a34c753fSRafael Auler 
85a34c753fSRafael Auler   unsigned TotalCost = 0;
86a34c753fSRafael Auler   for (auto &BFI : BC.getBinaryFunctions()) {
87a34c753fSRafael Auler     const BinaryFunction &BF = BFI.second;
88a34c753fSRafael Auler     TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89a34c753fSRafael Auler   }
90a34c753fSRafael Auler 
91a34c753fSRafael Auler   // Switch to trivial scheduling if total estimated work is zero
92a34c753fSRafael Auler   if (TotalCost == 0) {
93a34c753fSRafael Auler     outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
94a34c753fSRafael Auler               "switch to  trivial scheduling.\n";
95a34c753fSRafael Auler 
96a34c753fSRafael Auler     SchedPolicy = SP_TRIVIAL;
97a34c753fSRafael Auler     TotalCost = BC.getBinaryFunctions().size();
98a34c753fSRafael Auler   }
99a34c753fSRafael Auler   return TotalCost;
100a34c753fSRafael Auler }
101a34c753fSRafael Auler 
102a34c753fSRafael Auler } // namespace
103a34c753fSRafael Auler 
getThreadPool()104a34c753fSRafael Auler ThreadPool &getThreadPool() {
105a34c753fSRafael Auler   if (ThreadPoolPtr.get())
106a34c753fSRafael Auler     return *ThreadPoolPtr;
107a34c753fSRafael Auler 
108a34c753fSRafael Auler   ThreadPoolPtr = std::make_unique<ThreadPool>(
109a34c753fSRafael Auler       llvm::hardware_concurrency(opts::ThreadCount));
110a34c753fSRafael Auler   return *ThreadPoolPtr;
111a34c753fSRafael Auler }
112a34c753fSRafael Auler 
runOnEachFunction(BinaryContext & BC,SchedulingPolicy SchedPolicy,WorkFuncTy WorkFunction,PredicateTy SkipPredicate,std::string LogName,bool ForceSequential,unsigned TasksPerThread)113a34c753fSRafael Auler void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
114a34c753fSRafael Auler                        WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
115a34c753fSRafael Auler                        std::string LogName, bool ForceSequential,
116a34c753fSRafael Auler                        unsigned TasksPerThread) {
117a34c753fSRafael Auler   if (BC.getBinaryFunctions().size() == 0)
118a34c753fSRafael Auler     return;
119a34c753fSRafael Auler 
120a34c753fSRafael Auler   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
121a34c753fSRafael Auler                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
122a34c753fSRafael Auler     Timer T(LogName, LogName);
123a34c753fSRafael Auler     LLVM_DEBUG(T.startTimer());
124a34c753fSRafael Auler 
125a34c753fSRafael Auler     for (auto It = BlockBegin; It != BlockEnd; ++It) {
126a34c753fSRafael Auler       BinaryFunction &BF = It->second;
127a34c753fSRafael Auler       if (SkipPredicate && SkipPredicate(BF))
128a34c753fSRafael Auler         continue;
129a34c753fSRafael Auler 
130a34c753fSRafael Auler       WorkFunction(BF);
131a34c753fSRafael Auler     }
132a34c753fSRafael Auler     LLVM_DEBUG(T.stopTimer());
133a34c753fSRafael Auler   };
134a34c753fSRafael Auler 
135a34c753fSRafael Auler   if (opts::NoThreads || ForceSequential) {
136a34c753fSRafael Auler     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
137a34c753fSRafael Auler     return;
138a34c753fSRafael Auler   }
139a34c753fSRafael Auler 
140a34c753fSRafael Auler   // Estimate the overall runtime cost using the scheduling policy
141a34c753fSRafael Auler   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
142a34c753fSRafael Auler   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
143a34c753fSRafael Auler   const unsigned BlockCost =
144a34c753fSRafael Auler       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
145a34c753fSRafael Auler 
146a34c753fSRafael Auler   // Divide work into blocks of equal cost
147a34c753fSRafael Auler   ThreadPool &Pool = getThreadPool();
148a34c753fSRafael Auler   auto BlockBegin = BC.getBinaryFunctions().begin();
149a34c753fSRafael Auler   unsigned CurrentCost = 0;
150a34c753fSRafael Auler 
151a34c753fSRafael Auler   for (auto It = BC.getBinaryFunctions().begin();
152a34c753fSRafael Auler        It != BC.getBinaryFunctions().end(); ++It) {
153a34c753fSRafael Auler     BinaryFunction &BF = It->second;
154a34c753fSRafael Auler     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
155a34c753fSRafael Auler 
156a34c753fSRafael Auler     if (CurrentCost >= BlockCost) {
157a34c753fSRafael Auler       Pool.async(runBlock, BlockBegin, std::next(It));
158a34c753fSRafael Auler       BlockBegin = std::next(It);
159a34c753fSRafael Auler       CurrentCost = 0;
160a34c753fSRafael Auler     }
161a34c753fSRafael Auler   }
162a34c753fSRafael Auler   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
163a34c753fSRafael Auler   Pool.wait();
164a34c753fSRafael Auler }
165a34c753fSRafael Auler 
runOnEachFunctionWithUniqueAllocId(BinaryContext & BC,SchedulingPolicy SchedPolicy,WorkFuncWithAllocTy WorkFunction,PredicateTy SkipPredicate,std::string LogName,bool ForceSequential,unsigned TasksPerThread)166a34c753fSRafael Auler void runOnEachFunctionWithUniqueAllocId(
167a34c753fSRafael Auler     BinaryContext &BC, SchedulingPolicy SchedPolicy,
168a34c753fSRafael Auler     WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
169a34c753fSRafael Auler     std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
170a34c753fSRafael Auler   if (BC.getBinaryFunctions().size() == 0)
171a34c753fSRafael Auler     return;
172a34c753fSRafael Auler 
173a34c753fSRafael Auler   std::shared_timed_mutex MainLock;
174a34c753fSRafael Auler   auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
175a34c753fSRafael Auler                       std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
176a34c753fSRafael Auler                       MCPlusBuilder::AllocatorIdTy AllocId) {
177a34c753fSRafael Auler     Timer T(LogName, LogName);
178a34c753fSRafael Auler     LLVM_DEBUG(T.startTimer());
179a34c753fSRafael Auler     std::shared_lock<std::shared_timed_mutex> Lock(MainLock);
180a34c753fSRafael Auler     for (auto It = BlockBegin; It != BlockEnd; ++It) {
181a34c753fSRafael Auler       BinaryFunction &BF = It->second;
182a34c753fSRafael Auler       if (SkipPredicate && SkipPredicate(BF))
183a34c753fSRafael Auler         continue;
184a34c753fSRafael Auler 
185a34c753fSRafael Auler       WorkFunction(BF, AllocId);
186a34c753fSRafael Auler     }
187a34c753fSRafael Auler     LLVM_DEBUG(T.stopTimer());
188a34c753fSRafael Auler   };
189a34c753fSRafael Auler 
190a34c753fSRafael Auler   if (opts::NoThreads || ForceSequential) {
191a34c753fSRafael Auler     runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
192a34c753fSRafael Auler     return;
193a34c753fSRafael Auler   }
194a34c753fSRafael Auler   // This lock is used to postpone task execution
195a34c753fSRafael Auler   std::unique_lock<std::shared_timed_mutex> Lock(MainLock);
196a34c753fSRafael Auler 
197a34c753fSRafael Auler   // Estimate the overall runtime cost using the scheduling policy
198a34c753fSRafael Auler   const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
199a34c753fSRafael Auler   const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
200a34c753fSRafael Auler   const unsigned BlockCost =
201a34c753fSRafael Auler       TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
202a34c753fSRafael Auler 
203a34c753fSRafael Auler   // Divide work into blocks of equal cost
204a34c753fSRafael Auler   ThreadPool &Pool = getThreadPool();
205a34c753fSRafael Auler   auto BlockBegin = BC.getBinaryFunctions().begin();
206a34c753fSRafael Auler   unsigned CurrentCost = 0;
207a34c753fSRafael Auler   unsigned AllocId = 1;
208a34c753fSRafael Auler   for (auto It = BC.getBinaryFunctions().begin();
209a34c753fSRafael Auler        It != BC.getBinaryFunctions().end(); ++It) {
210a34c753fSRafael Auler     BinaryFunction &BF = It->second;
211a34c753fSRafael Auler     CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
212a34c753fSRafael Auler 
213a34c753fSRafael Auler     if (CurrentCost >= BlockCost) {
214a34c753fSRafael Auler       if (!BC.MIB->checkAllocatorExists(AllocId)) {
215a34c753fSRafael Auler         MCPlusBuilder::AllocatorIdTy Id =
216a34c753fSRafael Auler             BC.MIB->initializeNewAnnotationAllocator();
217a34c753fSRafael Auler         (void)Id;
218a34c753fSRafael Auler         assert(AllocId == Id && "unexpected allocator id created");
219a34c753fSRafael Auler       }
220a34c753fSRafael Auler       Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
221a34c753fSRafael Auler       AllocId++;
222a34c753fSRafael Auler       BlockBegin = std::next(It);
223a34c753fSRafael Auler       CurrentCost = 0;
224a34c753fSRafael Auler     }
225a34c753fSRafael Auler   }
226a34c753fSRafael Auler 
227a34c753fSRafael Auler   if (!BC.MIB->checkAllocatorExists(AllocId)) {
228a34c753fSRafael Auler     MCPlusBuilder::AllocatorIdTy Id =
229a34c753fSRafael Auler         BC.MIB->initializeNewAnnotationAllocator();
230a34c753fSRafael Auler     (void)Id;
231a34c753fSRafael Auler     assert(AllocId == Id && "unexpected allocator id created");
232a34c753fSRafael Auler   }
233a34c753fSRafael Auler 
234a34c753fSRafael Auler   Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
235a34c753fSRafael Auler   Lock.unlock();
236a34c753fSRafael Auler   Pool.wait();
237a34c753fSRafael Auler }
238a34c753fSRafael Auler 
239a34c753fSRafael Auler } // namespace ParallelUtilities
240a34c753fSRafael Auler } // namespace bolt
241a34c753fSRafael Auler } // namespace llvm
242