1*2f09f445SMaksim Panchenko //===- bolt/Core/ParallelUtilities.cpp - Parallel utilities ---------------===//
2a34c753fSRafael Auler //
3a34c753fSRafael Auler // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4a34c753fSRafael Auler // See https://llvm.org/LICENSE.txt for license information.
5a34c753fSRafael Auler // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6a34c753fSRafael Auler //
7a34c753fSRafael Auler //===----------------------------------------------------------------------===//
8a34c753fSRafael Auler //
9*2f09f445SMaksim Panchenko // Implementation of the class that manages parallel work on BinaryFunctions.
10*2f09f445SMaksim Panchenko //
11a34c753fSRafael Auler //===----------------------------------------------------------------------===//
12a34c753fSRafael Auler
13a34c753fSRafael Auler #include "bolt/Core/ParallelUtilities.h"
14a34c753fSRafael Auler #include "bolt/Core/BinaryContext.h"
15a34c753fSRafael Auler #include "bolt/Core/BinaryFunction.h"
16a34c753fSRafael Auler #include "llvm/Support/ThreadPool.h"
17a34c753fSRafael Auler #include "llvm/Support/Timer.h"
18a34c753fSRafael Auler #include <mutex>
19a34c753fSRafael Auler #include <shared_mutex>
20a34c753fSRafael Auler
21a34c753fSRafael Auler #define DEBUG_TYPE "par-utils"
22a34c753fSRafael Auler
23a34c753fSRafael Auler namespace opts {
24a34c753fSRafael Auler extern cl::OptionCategory BoltCategory;
25a34c753fSRafael Auler
26a34c753fSRafael Auler cl::opt<unsigned>
27a34c753fSRafael Auler ThreadCount("thread-count",
28a34c753fSRafael Auler cl::desc("number of threads"),
29a34c753fSRafael Auler cl::init(hardware_concurrency().compute_thread_count()),
30a34c753fSRafael Auler cl::cat(BoltCategory));
31a34c753fSRafael Auler
32a34c753fSRafael Auler cl::opt<bool>
33a34c753fSRafael Auler NoThreads("no-threads",
34a34c753fSRafael Auler cl::desc("disable multithreading"),
35a34c753fSRafael Auler cl::init(false),
36a34c753fSRafael Auler cl::cat(BoltCategory));
37a34c753fSRafael Auler
38a34c753fSRafael Auler cl::opt<unsigned>
39a34c753fSRafael Auler TaskCount("tasks-per-thread",
40a34c753fSRafael Auler cl::desc("number of tasks to be created per thread"),
41a34c753fSRafael Auler cl::init(20),
42a34c753fSRafael Auler cl::cat(BoltCategory));
43a34c753fSRafael Auler
44a34c753fSRafael Auler } // namespace opts
45a34c753fSRafael Auler
46a34c753fSRafael Auler namespace llvm {
47a34c753fSRafael Auler namespace bolt {
48a34c753fSRafael Auler namespace ParallelUtilities {
49a34c753fSRafael Auler
50a34c753fSRafael Auler namespace {
51a34c753fSRafael Auler /// A single thread pool that is used to run parallel tasks
52a34c753fSRafael Auler std::unique_ptr<ThreadPool> ThreadPoolPtr;
53a34c753fSRafael Auler
computeCostFor(const BinaryFunction & BF,const PredicateTy & SkipPredicate,const SchedulingPolicy & SchedPolicy)54a34c753fSRafael Auler unsigned computeCostFor(const BinaryFunction &BF,
55a34c753fSRafael Auler const PredicateTy &SkipPredicate,
56a34c753fSRafael Auler const SchedulingPolicy &SchedPolicy) {
57a34c753fSRafael Auler if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
58a34c753fSRafael Auler return 1;
59a34c753fSRafael Auler
60a34c753fSRafael Auler if (SkipPredicate && SkipPredicate(BF))
61a34c753fSRafael Auler return 0;
62a34c753fSRafael Auler
63a34c753fSRafael Auler switch (SchedPolicy) {
64a34c753fSRafael Auler case SchedulingPolicy::SP_CONSTANT:
65a34c753fSRafael Auler return 1;
66a34c753fSRafael Auler case SchedulingPolicy::SP_INST_LINEAR:
67a34c753fSRafael Auler return BF.getSize();
68a34c753fSRafael Auler case SchedulingPolicy::SP_INST_QUADRATIC:
69a34c753fSRafael Auler return BF.getSize() * BF.getSize();
70a34c753fSRafael Auler case SchedulingPolicy::SP_BB_LINEAR:
71a34c753fSRafael Auler return BF.size();
72a34c753fSRafael Auler case SchedulingPolicy::SP_BB_QUADRATIC:
73a34c753fSRafael Auler return BF.size() * BF.size();
74a34c753fSRafael Auler default:
75a34c753fSRafael Auler llvm_unreachable("unsupported scheduling policy");
76a34c753fSRafael Auler }
77a34c753fSRafael Auler }
78a34c753fSRafael Auler
estimateTotalCost(const BinaryContext & BC,const PredicateTy & SkipPredicate,SchedulingPolicy & SchedPolicy)79a34c753fSRafael Auler inline unsigned estimateTotalCost(const BinaryContext &BC,
80a34c753fSRafael Auler const PredicateTy &SkipPredicate,
81a34c753fSRafael Auler SchedulingPolicy &SchedPolicy) {
82a34c753fSRafael Auler if (SchedPolicy == SchedulingPolicy::SP_TRIVIAL)
83a34c753fSRafael Auler return BC.getBinaryFunctions().size();
84a34c753fSRafael Auler
85a34c753fSRafael Auler unsigned TotalCost = 0;
86a34c753fSRafael Auler for (auto &BFI : BC.getBinaryFunctions()) {
87a34c753fSRafael Auler const BinaryFunction &BF = BFI.second;
88a34c753fSRafael Auler TotalCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
89a34c753fSRafael Auler }
90a34c753fSRafael Auler
91a34c753fSRafael Auler // Switch to trivial scheduling if total estimated work is zero
92a34c753fSRafael Auler if (TotalCost == 0) {
93a34c753fSRafael Auler outs() << "BOLT-WARNING: Running parallel work of 0 estimated cost, will "
94a34c753fSRafael Auler "switch to trivial scheduling.\n";
95a34c753fSRafael Auler
96a34c753fSRafael Auler SchedPolicy = SP_TRIVIAL;
97a34c753fSRafael Auler TotalCost = BC.getBinaryFunctions().size();
98a34c753fSRafael Auler }
99a34c753fSRafael Auler return TotalCost;
100a34c753fSRafael Auler }
101a34c753fSRafael Auler
102a34c753fSRafael Auler } // namespace
103a34c753fSRafael Auler
getThreadPool()104a34c753fSRafael Auler ThreadPool &getThreadPool() {
105a34c753fSRafael Auler if (ThreadPoolPtr.get())
106a34c753fSRafael Auler return *ThreadPoolPtr;
107a34c753fSRafael Auler
108a34c753fSRafael Auler ThreadPoolPtr = std::make_unique<ThreadPool>(
109a34c753fSRafael Auler llvm::hardware_concurrency(opts::ThreadCount));
110a34c753fSRafael Auler return *ThreadPoolPtr;
111a34c753fSRafael Auler }
112a34c753fSRafael Auler
runOnEachFunction(BinaryContext & BC,SchedulingPolicy SchedPolicy,WorkFuncTy WorkFunction,PredicateTy SkipPredicate,std::string LogName,bool ForceSequential,unsigned TasksPerThread)113a34c753fSRafael Auler void runOnEachFunction(BinaryContext &BC, SchedulingPolicy SchedPolicy,
114a34c753fSRafael Auler WorkFuncTy WorkFunction, PredicateTy SkipPredicate,
115a34c753fSRafael Auler std::string LogName, bool ForceSequential,
116a34c753fSRafael Auler unsigned TasksPerThread) {
117a34c753fSRafael Auler if (BC.getBinaryFunctions().size() == 0)
118a34c753fSRafael Auler return;
119a34c753fSRafael Auler
120a34c753fSRafael Auler auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
121a34c753fSRafael Auler std::map<uint64_t, BinaryFunction>::iterator BlockEnd) {
122a34c753fSRafael Auler Timer T(LogName, LogName);
123a34c753fSRafael Auler LLVM_DEBUG(T.startTimer());
124a34c753fSRafael Auler
125a34c753fSRafael Auler for (auto It = BlockBegin; It != BlockEnd; ++It) {
126a34c753fSRafael Auler BinaryFunction &BF = It->second;
127a34c753fSRafael Auler if (SkipPredicate && SkipPredicate(BF))
128a34c753fSRafael Auler continue;
129a34c753fSRafael Auler
130a34c753fSRafael Auler WorkFunction(BF);
131a34c753fSRafael Auler }
132a34c753fSRafael Auler LLVM_DEBUG(T.stopTimer());
133a34c753fSRafael Auler };
134a34c753fSRafael Auler
135a34c753fSRafael Auler if (opts::NoThreads || ForceSequential) {
136a34c753fSRafael Auler runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end());
137a34c753fSRafael Auler return;
138a34c753fSRafael Auler }
139a34c753fSRafael Auler
140a34c753fSRafael Auler // Estimate the overall runtime cost using the scheduling policy
141a34c753fSRafael Auler const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
142a34c753fSRafael Auler const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
143a34c753fSRafael Auler const unsigned BlockCost =
144a34c753fSRafael Auler TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
145a34c753fSRafael Auler
146a34c753fSRafael Auler // Divide work into blocks of equal cost
147a34c753fSRafael Auler ThreadPool &Pool = getThreadPool();
148a34c753fSRafael Auler auto BlockBegin = BC.getBinaryFunctions().begin();
149a34c753fSRafael Auler unsigned CurrentCost = 0;
150a34c753fSRafael Auler
151a34c753fSRafael Auler for (auto It = BC.getBinaryFunctions().begin();
152a34c753fSRafael Auler It != BC.getBinaryFunctions().end(); ++It) {
153a34c753fSRafael Auler BinaryFunction &BF = It->second;
154a34c753fSRafael Auler CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
155a34c753fSRafael Auler
156a34c753fSRafael Auler if (CurrentCost >= BlockCost) {
157a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, std::next(It));
158a34c753fSRafael Auler BlockBegin = std::next(It);
159a34c753fSRafael Auler CurrentCost = 0;
160a34c753fSRafael Auler }
161a34c753fSRafael Auler }
162a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end());
163a34c753fSRafael Auler Pool.wait();
164a34c753fSRafael Auler }
165a34c753fSRafael Auler
runOnEachFunctionWithUniqueAllocId(BinaryContext & BC,SchedulingPolicy SchedPolicy,WorkFuncWithAllocTy WorkFunction,PredicateTy SkipPredicate,std::string LogName,bool ForceSequential,unsigned TasksPerThread)166a34c753fSRafael Auler void runOnEachFunctionWithUniqueAllocId(
167a34c753fSRafael Auler BinaryContext &BC, SchedulingPolicy SchedPolicy,
168a34c753fSRafael Auler WorkFuncWithAllocTy WorkFunction, PredicateTy SkipPredicate,
169a34c753fSRafael Auler std::string LogName, bool ForceSequential, unsigned TasksPerThread) {
170a34c753fSRafael Auler if (BC.getBinaryFunctions().size() == 0)
171a34c753fSRafael Auler return;
172a34c753fSRafael Auler
173a34c753fSRafael Auler std::shared_timed_mutex MainLock;
174a34c753fSRafael Auler auto runBlock = [&](std::map<uint64_t, BinaryFunction>::iterator BlockBegin,
175a34c753fSRafael Auler std::map<uint64_t, BinaryFunction>::iterator BlockEnd,
176a34c753fSRafael Auler MCPlusBuilder::AllocatorIdTy AllocId) {
177a34c753fSRafael Auler Timer T(LogName, LogName);
178a34c753fSRafael Auler LLVM_DEBUG(T.startTimer());
179a34c753fSRafael Auler std::shared_lock<std::shared_timed_mutex> Lock(MainLock);
180a34c753fSRafael Auler for (auto It = BlockBegin; It != BlockEnd; ++It) {
181a34c753fSRafael Auler BinaryFunction &BF = It->second;
182a34c753fSRafael Auler if (SkipPredicate && SkipPredicate(BF))
183a34c753fSRafael Auler continue;
184a34c753fSRafael Auler
185a34c753fSRafael Auler WorkFunction(BF, AllocId);
186a34c753fSRafael Auler }
187a34c753fSRafael Auler LLVM_DEBUG(T.stopTimer());
188a34c753fSRafael Auler };
189a34c753fSRafael Auler
190a34c753fSRafael Auler if (opts::NoThreads || ForceSequential) {
191a34c753fSRafael Auler runBlock(BC.getBinaryFunctions().begin(), BC.getBinaryFunctions().end(), 0);
192a34c753fSRafael Auler return;
193a34c753fSRafael Auler }
194a34c753fSRafael Auler // This lock is used to postpone task execution
195a34c753fSRafael Auler std::unique_lock<std::shared_timed_mutex> Lock(MainLock);
196a34c753fSRafael Auler
197a34c753fSRafael Auler // Estimate the overall runtime cost using the scheduling policy
198a34c753fSRafael Auler const unsigned TotalCost = estimateTotalCost(BC, SkipPredicate, SchedPolicy);
199a34c753fSRafael Auler const unsigned BlocksCount = TasksPerThread * opts::ThreadCount;
200a34c753fSRafael Auler const unsigned BlockCost =
201a34c753fSRafael Auler TotalCost > BlocksCount ? TotalCost / BlocksCount : 1;
202a34c753fSRafael Auler
203a34c753fSRafael Auler // Divide work into blocks of equal cost
204a34c753fSRafael Auler ThreadPool &Pool = getThreadPool();
205a34c753fSRafael Auler auto BlockBegin = BC.getBinaryFunctions().begin();
206a34c753fSRafael Auler unsigned CurrentCost = 0;
207a34c753fSRafael Auler unsigned AllocId = 1;
208a34c753fSRafael Auler for (auto It = BC.getBinaryFunctions().begin();
209a34c753fSRafael Auler It != BC.getBinaryFunctions().end(); ++It) {
210a34c753fSRafael Auler BinaryFunction &BF = It->second;
211a34c753fSRafael Auler CurrentCost += computeCostFor(BF, SkipPredicate, SchedPolicy);
212a34c753fSRafael Auler
213a34c753fSRafael Auler if (CurrentCost >= BlockCost) {
214a34c753fSRafael Auler if (!BC.MIB->checkAllocatorExists(AllocId)) {
215a34c753fSRafael Auler MCPlusBuilder::AllocatorIdTy Id =
216a34c753fSRafael Auler BC.MIB->initializeNewAnnotationAllocator();
217a34c753fSRafael Auler (void)Id;
218a34c753fSRafael Auler assert(AllocId == Id && "unexpected allocator id created");
219a34c753fSRafael Auler }
220a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, std::next(It), AllocId);
221a34c753fSRafael Auler AllocId++;
222a34c753fSRafael Auler BlockBegin = std::next(It);
223a34c753fSRafael Auler CurrentCost = 0;
224a34c753fSRafael Auler }
225a34c753fSRafael Auler }
226a34c753fSRafael Auler
227a34c753fSRafael Auler if (!BC.MIB->checkAllocatorExists(AllocId)) {
228a34c753fSRafael Auler MCPlusBuilder::AllocatorIdTy Id =
229a34c753fSRafael Auler BC.MIB->initializeNewAnnotationAllocator();
230a34c753fSRafael Auler (void)Id;
231a34c753fSRafael Auler assert(AllocId == Id && "unexpected allocator id created");
232a34c753fSRafael Auler }
233a34c753fSRafael Auler
234a34c753fSRafael Auler Pool.async(runBlock, BlockBegin, BC.getBinaryFunctions().end(), AllocId);
235a34c753fSRafael Auler Lock.unlock();
236a34c753fSRafael Auler Pool.wait();
237a34c753fSRafael Auler }
238a34c753fSRafael Auler
239a34c753fSRafael Auler } // namespace ParallelUtilities
240a34c753fSRafael Auler } // namespace bolt
241a34c753fSRafael Auler } // namespace llvm
242