1 /*
2 Copyright (c) 2005-2021 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 /*
18 This file contains the TBB-based implementation of convex hull algorithm.
19 It corresponds to the following settings in convex_hull_bench.cpp:
20 - USETBB defined to 1
21 - USECONCVEC defined to 1
22 - INIT_ONCE defined to 0
23 - only buffered version is used
24 */
25
26 #include "oneapi/tbb/parallel_for.h"
27 #include "oneapi/tbb/parallel_reduce.h"
28 #include "oneapi/tbb/blocked_range.h"
29 #include "oneapi/tbb/tick_count.h"
30 #include "oneapi/tbb/concurrent_vector.h"
31
32 #include "convex_hull.hpp"
33
34 typedef util::point<double> point_t;
35 typedef oneapi::tbb::concurrent_vector<point_t> pointVec_t;
36 typedef oneapi::tbb::blocked_range<std::size_t> range_t;
37
appendVector(const point_t * src,std::size_t srcSize,pointVec_t & dest)38 void appendVector(const point_t *src, std::size_t srcSize, pointVec_t &dest) {
39 std::copy(src, src + srcSize, dest.grow_by(srcSize));
40 }
41
appendVector(const pointVec_t & src,pointVec_t & dest)42 void appendVector(const pointVec_t &src, pointVec_t &dest) {
43 std::copy(src.begin(), src.end(), dest.grow_by(src.size()));
44 }
45 class FillRNDPointsVector_buf {
46 pointVec_t &points;
47
48 public:
49 static const std::size_t grainSize = cfg::generateGrainSize;
50
FillRNDPointsVector_buf(pointVec_t & _points)51 explicit FillRNDPointsVector_buf(pointVec_t &_points) : points(_points) {}
52
operator ()(const range_t & range) const53 void operator()(const range_t &range) const {
54 util::rng the_rng(range.begin());
55 const std::size_t i_end = range.end();
56 std::size_t count = 0, j = 0;
57 point_t tmp_vec[grainSize];
58
59 for (std::size_t i = range.begin(); i != i_end; ++i) {
60 tmp_vec[j++] = util::GenerateRNDPoint<double>(count, the_rng, util::rng::max_rand);
61 }
62 //Here we have race condition. Elements being written to may be still under construction.
63 //For C++ 2003 it is workarounded by vector element type which default constructor does not touch memory,
64 //it being constructed on. See comments near default ctor of point class for more details.
65 //Strictly speaking it is UB.
66 //TODO: need to find more reliable/correct way
67 points.grow_to_at_least(range.end());
68 std::copy(tmp_vec, tmp_vec + j, points.begin() + range.begin());
69 }
70 };
71
initialize(pointVec_t & points)72 void initialize(pointVec_t &points) {
73 //This function generate the same series of point on every call.
74 //Reproducibility is needed for benchmarking to produce reliable results.
75 //It is achieved through the following points:
76 // - FillRNDPointsVector_buf instance has its own local instance
77 // of random number generator, which in turn does not use any global data
78 // - oneapi::tbb::simple_partitioner produce the same set of ranges on every call to
79 // oneapi::tbb::parallel_for
80 // - local RNG instances are seeded by the starting indexes of corresponding ranges
81 // - grow_to_at_least() enables putting points into the resulting vector in deterministic order
82 // (unlike concurrent push_back or grow_by).
83
84 // In the buffered version, a temporary storage for as much as grainSize elements
85 // is allocated inside the body. Since auto_partitioner may increase effective
86 // range size which would cause a crash, simple partitioner has to be used.
87 oneapi::tbb::parallel_for(range_t(0, cfg::numberOfPoints, FillRNDPointsVector_buf::grainSize),
88 FillRNDPointsVector_buf(points),
89 oneapi::tbb::simple_partitioner());
90 }
91
92 class FindXExtremum {
93 public:
94 typedef enum { minX, maxX } extremumType;
95
96 static const std::size_t grainSize = cfg::findExtremumGrainSize;
97
FindXExtremum(const pointVec_t & points_,extremumType exType_)98 FindXExtremum(const pointVec_t &points_, extremumType exType_)
99 : points(points_),
100 exType(exType_),
101 extrXPoint(points[0]) {}
102
FindXExtremum(const FindXExtremum & fxex,oneapi::tbb::split)103 FindXExtremum(const FindXExtremum &fxex, oneapi::tbb::split)
104 // Can run in parallel with fxex.operator()() or fxex.join().
105 // The data race reported by tools is harmless.
106 : points(fxex.points),
107 exType(fxex.exType),
108 extrXPoint(fxex.extrXPoint) {}
109
operator ()(const range_t & range)110 void operator()(const range_t &range) {
111 const std::size_t i_end = range.end();
112 if (!range.empty()) {
113 for (std::size_t i = range.begin(); i != i_end; ++i) {
114 if (closerToExtremum(points[i])) {
115 extrXPoint = points[i];
116 }
117 }
118 }
119 }
120
join(const FindXExtremum & rhs)121 void join(const FindXExtremum &rhs) {
122 if (closerToExtremum(rhs.extrXPoint)) {
123 extrXPoint = rhs.extrXPoint;
124 }
125 }
126
extremeXPoint()127 point_t extremeXPoint() {
128 return extrXPoint;
129 }
130
131 private:
132 const pointVec_t &points;
133 const extremumType exType;
134 point_t extrXPoint;
closerToExtremum(const point_t & p) const135 bool closerToExtremum(const point_t &p) const {
136 switch (exType) {
137 case minX: return p.x < extrXPoint.x; break;
138 case maxX: return p.x > extrXPoint.x; break;
139 }
140 return false; // avoid warning
141 }
142 };
143
144 template <FindXExtremum::extremumType type>
extremum(const pointVec_t & P)145 point_t extremum(const pointVec_t &P) {
146 FindXExtremum fxBody(P, type);
147 oneapi::tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody);
148 return fxBody.extremeXPoint();
149 }
150
151 class SplitByCP_buf {
152 const pointVec_t &initialSet;
153 pointVec_t &reducedSet;
154 point_t p1, p2;
155 point_t farPoint;
156 double howFar;
157
158 public:
159 static const std::size_t grainSize = cfg::divideGrainSize;
160
SplitByCP_buf(point_t _p1,point_t _p2,const pointVec_t & _initialSet,pointVec_t & _reducedSet)161 SplitByCP_buf(point_t _p1, point_t _p2, const pointVec_t &_initialSet, pointVec_t &_reducedSet)
162 : p1(_p1),
163 p2(_p2),
164 initialSet(_initialSet),
165 reducedSet(_reducedSet),
166 howFar(0),
167 farPoint(p1) {}
168
SplitByCP_buf(SplitByCP_buf & sbcp,oneapi::tbb::split)169 SplitByCP_buf(SplitByCP_buf &sbcp, oneapi::tbb::split)
170 : p1(sbcp.p1),
171 p2(sbcp.p2),
172 initialSet(sbcp.initialSet),
173 reducedSet(sbcp.reducedSet),
174 howFar(0),
175 farPoint(p1) {}
176
operator ()(const range_t & range)177 void operator()(const range_t &range) {
178 const std::size_t i_end = range.end();
179 std::size_t j = 0;
180 double cp;
181 point_t tmp_vec[grainSize];
182 for (std::size_t i = range.begin(); i != i_end; ++i) {
183 if ((initialSet[i] != p1) && (initialSet[i] != p2)) {
184 cp = util::cross_product(p1, p2, initialSet[i]);
185 if (cp > 0) {
186 tmp_vec[j++] = initialSet[i];
187 if (cp > howFar) {
188 farPoint = initialSet[i];
189 howFar = cp;
190 }
191 }
192 }
193 }
194
195 appendVector(tmp_vec, j, reducedSet);
196 }
197
join(const SplitByCP_buf & rhs)198 void join(const SplitByCP_buf &rhs) {
199 if (rhs.howFar > howFar) {
200 howFar = rhs.howFar;
201 farPoint = rhs.farPoint;
202 }
203 }
204
farthestPoint() const205 point_t farthestPoint() const {
206 return farPoint;
207 }
208 };
209
divide(const pointVec_t & P,pointVec_t & P_reduced,const point_t & p1,const point_t & p2)210 point_t divide(const pointVec_t &P, pointVec_t &P_reduced, const point_t &p1, const point_t &p2) {
211 SplitByCP_buf sbcpb(p1, p2, P, P_reduced);
212 // Must use simple_partitioner (see the comment in initialize() above)
213 oneapi::tbb::parallel_reduce(
214 range_t(0, P.size(), SplitByCP_buf::grainSize), sbcpb, oneapi::tbb::simple_partitioner());
215
216 if (util::verbose) {
217 std::stringstream ss;
218 ss << P.size() << " nodes in bucket"
219 << ", "
220 << "dividing by: [ " << p1 << ", " << p2 << " ], "
221 << "farthest node: " << sbcpb.farthestPoint();
222 util::OUTPUT.push_back(ss.str());
223 }
224
225 return sbcpb.farthestPoint();
226 }
227
divide_and_conquer(const pointVec_t & P,pointVec_t & H,point_t p1,point_t p2)228 void divide_and_conquer(const pointVec_t &P, pointVec_t &H, point_t p1, point_t p2) {
229 assert(P.size() >= 2);
230 pointVec_t P_reduced;
231 pointVec_t H1, H2;
232 point_t p_far = divide(P, P_reduced, p1, p2);
233 if (P_reduced.size() < 2) {
234 H.push_back(p1);
235 appendVector(P_reduced, H);
236 }
237 else {
238 divide_and_conquer(P_reduced, H1, p1, p_far);
239 divide_and_conquer(P_reduced, H2, p_far, p2);
240
241 appendVector(H1, H);
242 appendVector(H2, H);
243 }
244 }
245
quickhull(const pointVec_t & points,pointVec_t & hull)246 void quickhull(const pointVec_t &points, pointVec_t &hull) {
247 if (points.size() < 2) {
248 appendVector(points, hull);
249 return;
250 }
251
252 point_t p_maxx = extremum<FindXExtremum::maxX>(points);
253 point_t p_minx = extremum<FindXExtremum::minX>(points);
254
255 pointVec_t H;
256
257 divide_and_conquer(points, hull, p_maxx, p_minx);
258 divide_and_conquer(points, H, p_minx, p_maxx);
259
260 appendVector(H, hull);
261 }
262
main(int argc,char * argv[])263 int main(int argc, char *argv[]) {
264 utility::thread_number_range threads(utility::get_default_num_threads);
265 util::my_time_t tm_main_begin = util::gettime();
266
267 util::ParseInputArgs(argc, argv, threads);
268
269 pointVec_t points;
270 pointVec_t hull;
271 int nthreads;
272
273 points.reserve(cfg::numberOfPoints);
274
275 if (!util::silent) {
276 std::cout << "Starting TBB-buffered version of QUICK HULL algorithm"
277 << "\n";
278 }
279
280 for (nthreads = threads.first; nthreads <= threads.last; nthreads = threads.step(nthreads)) {
281 oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism,
282 nthreads);
283
284 points.clear();
285 util::my_time_t tm_init = util::gettime();
286 initialize(points);
287 util::my_time_t tm_start = util::gettime();
288 if (!util::silent) {
289 std::cout << "Init time on " << nthreads
290 << " threads: " << util::time_diff(tm_init, tm_start)
291 << " Points in input: " << points.size() << "\n";
292 }
293
294 tm_start = util::gettime();
295 quickhull(points, hull);
296 util::my_time_t tm_end = util::gettime();
297 if (!util::silent) {
298 std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end)
299 << " Points in hull: " << hull.size() << "\n";
300 }
301 hull.clear();
302 }
303 utility::report_elapsed_time(util::time_diff(tm_main_begin, util::gettime()));
304 return 0;
305 }
306