1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 /*
18     This file contains the TBB-based implementation of convex hull algorithm.
19     It corresponds to the following settings in convex_hull_bench.cpp:
20     - USETBB defined to 1
21     - USECONCVEC defined to 1
22     - INIT_ONCE defined to 0
23     - only buffered version is used
24 */
25 
26 #include "oneapi/tbb/parallel_for.h"
27 #include "oneapi/tbb/parallel_reduce.h"
28 #include "oneapi/tbb/blocked_range.h"
29 #include "oneapi/tbb/tick_count.h"
30 #include "oneapi/tbb/concurrent_vector.h"
31 
32 #include "convex_hull.hpp"
33 
34 typedef util::point<double> point_t;
35 typedef oneapi::tbb::concurrent_vector<point_t> pointVec_t;
36 typedef oneapi::tbb::blocked_range<std::size_t> range_t;
37 
appendVector(const point_t * src,std::size_t srcSize,pointVec_t & dest)38 void appendVector(const point_t *src, std::size_t srcSize, pointVec_t &dest) {
39     std::copy(src, src + srcSize, dest.grow_by(srcSize));
40 }
41 
appendVector(const pointVec_t & src,pointVec_t & dest)42 void appendVector(const pointVec_t &src, pointVec_t &dest) {
43     std::copy(src.begin(), src.end(), dest.grow_by(src.size()));
44 }
45 class FillRNDPointsVector_buf {
46     pointVec_t &points;
47 
48 public:
49     static const std::size_t grainSize = cfg::generateGrainSize;
50 
FillRNDPointsVector_buf(pointVec_t & _points)51     explicit FillRNDPointsVector_buf(pointVec_t &_points) : points(_points) {}
52 
operator ()(const range_t & range) const53     void operator()(const range_t &range) const {
54         util::rng the_rng(range.begin());
55         const std::size_t i_end = range.end();
56         std::size_t count = 0, j = 0;
57         point_t tmp_vec[grainSize];
58 
59         for (std::size_t i = range.begin(); i != i_end; ++i) {
60             tmp_vec[j++] = util::GenerateRNDPoint<double>(count, the_rng, util::rng::max_rand);
61         }
62         //Here we have race condition. Elements being written to may be still under construction.
63         //For C++ 2003 it is workarounded by vector element type which default constructor does not touch memory,
64         //it being constructed on. See comments near default ctor of point class for more details.
65         //Strictly speaking it is UB.
66         //TODO: need to find more reliable/correct way
67         points.grow_to_at_least(range.end());
68         std::copy(tmp_vec, tmp_vec + j, points.begin() + range.begin());
69     }
70 };
71 
initialize(pointVec_t & points)72 void initialize(pointVec_t &points) {
73     //This function generate the same series of point on every call.
74     //Reproducibility is needed for benchmarking to produce reliable results.
75     //It is achieved through the following points:
76     //      - FillRNDPointsVector_buf instance has its own local instance
77     //        of random number generator, which in turn does not use any global data
78     //      - oneapi::tbb::simple_partitioner produce the same set of ranges on every call to
79     //        oneapi::tbb::parallel_for
80     //      - local RNG instances are seeded by the starting indexes of corresponding ranges
81     //      - grow_to_at_least() enables putting points into the resulting vector in deterministic order
82     //        (unlike concurrent push_back or grow_by).
83 
84     // In the buffered version, a temporary storage for as much as grainSize elements
85     // is allocated inside the body. Since auto_partitioner may increase effective
86     // range size which would cause a crash, simple partitioner has to be used.
87     oneapi::tbb::parallel_for(range_t(0, cfg::numberOfPoints, FillRNDPointsVector_buf::grainSize),
88                               FillRNDPointsVector_buf(points),
89                               oneapi::tbb::simple_partitioner());
90 }
91 
92 class FindXExtremum {
93 public:
94     typedef enum { minX, maxX } extremumType;
95 
96     static const std::size_t grainSize = cfg::findExtremumGrainSize;
97 
FindXExtremum(const pointVec_t & points_,extremumType exType_)98     FindXExtremum(const pointVec_t &points_, extremumType exType_)
99             : points(points_),
100               exType(exType_),
101               extrXPoint(points[0]) {}
102 
FindXExtremum(const FindXExtremum & fxex,oneapi::tbb::split)103     FindXExtremum(const FindXExtremum &fxex, oneapi::tbb::split)
104             // Can run in parallel with fxex.operator()() or fxex.join().
105             // The data race reported by tools is harmless.
106             : points(fxex.points),
107               exType(fxex.exType),
108               extrXPoint(fxex.extrXPoint) {}
109 
operator ()(const range_t & range)110     void operator()(const range_t &range) {
111         const std::size_t i_end = range.end();
112         if (!range.empty()) {
113             for (std::size_t i = range.begin(); i != i_end; ++i) {
114                 if (closerToExtremum(points[i])) {
115                     extrXPoint = points[i];
116                 }
117             }
118         }
119     }
120 
join(const FindXExtremum & rhs)121     void join(const FindXExtremum &rhs) {
122         if (closerToExtremum(rhs.extrXPoint)) {
123             extrXPoint = rhs.extrXPoint;
124         }
125     }
126 
extremeXPoint()127     point_t extremeXPoint() {
128         return extrXPoint;
129     }
130 
131 private:
132     const pointVec_t &points;
133     const extremumType exType;
134     point_t extrXPoint;
closerToExtremum(const point_t & p) const135     bool closerToExtremum(const point_t &p) const {
136         switch (exType) {
137             case minX: return p.x < extrXPoint.x; break;
138             case maxX: return p.x > extrXPoint.x; break;
139         }
140         return false; // avoid warning
141     }
142 };
143 
144 template <FindXExtremum::extremumType type>
extremum(const pointVec_t & P)145 point_t extremum(const pointVec_t &P) {
146     FindXExtremum fxBody(P, type);
147     oneapi::tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody);
148     return fxBody.extremeXPoint();
149 }
150 
151 class SplitByCP_buf {
152     const pointVec_t &initialSet;
153     pointVec_t &reducedSet;
154     point_t p1, p2;
155     point_t farPoint;
156     double howFar;
157 
158 public:
159     static const std::size_t grainSize = cfg::divideGrainSize;
160 
SplitByCP_buf(point_t _p1,point_t _p2,const pointVec_t & _initialSet,pointVec_t & _reducedSet)161     SplitByCP_buf(point_t _p1, point_t _p2, const pointVec_t &_initialSet, pointVec_t &_reducedSet)
162             : p1(_p1),
163               p2(_p2),
164               initialSet(_initialSet),
165               reducedSet(_reducedSet),
166               howFar(0),
167               farPoint(p1) {}
168 
SplitByCP_buf(SplitByCP_buf & sbcp,oneapi::tbb::split)169     SplitByCP_buf(SplitByCP_buf &sbcp, oneapi::tbb::split)
170             : p1(sbcp.p1),
171               p2(sbcp.p2),
172               initialSet(sbcp.initialSet),
173               reducedSet(sbcp.reducedSet),
174               howFar(0),
175               farPoint(p1) {}
176 
operator ()(const range_t & range)177     void operator()(const range_t &range) {
178         const std::size_t i_end = range.end();
179         std::size_t j = 0;
180         double cp;
181         point_t tmp_vec[grainSize];
182         for (std::size_t i = range.begin(); i != i_end; ++i) {
183             if ((initialSet[i] != p1) && (initialSet[i] != p2)) {
184                 cp = util::cross_product(p1, p2, initialSet[i]);
185                 if (cp > 0) {
186                     tmp_vec[j++] = initialSet[i];
187                     if (cp > howFar) {
188                         farPoint = initialSet[i];
189                         howFar = cp;
190                     }
191                 }
192             }
193         }
194 
195         appendVector(tmp_vec, j, reducedSet);
196     }
197 
join(const SplitByCP_buf & rhs)198     void join(const SplitByCP_buf &rhs) {
199         if (rhs.howFar > howFar) {
200             howFar = rhs.howFar;
201             farPoint = rhs.farPoint;
202         }
203     }
204 
farthestPoint() const205     point_t farthestPoint() const {
206         return farPoint;
207     }
208 };
209 
divide(const pointVec_t & P,pointVec_t & P_reduced,const point_t & p1,const point_t & p2)210 point_t divide(const pointVec_t &P, pointVec_t &P_reduced, const point_t &p1, const point_t &p2) {
211     SplitByCP_buf sbcpb(p1, p2, P, P_reduced);
212     // Must use simple_partitioner (see the comment in initialize() above)
213     oneapi::tbb::parallel_reduce(
214         range_t(0, P.size(), SplitByCP_buf::grainSize), sbcpb, oneapi::tbb::simple_partitioner());
215 
216     if (util::verbose) {
217         std::stringstream ss;
218         ss << P.size() << " nodes in bucket"
219            << ", "
220            << "dividing by: [ " << p1 << ", " << p2 << " ], "
221            << "farthest node: " << sbcpb.farthestPoint();
222         util::OUTPUT.push_back(ss.str());
223     }
224 
225     return sbcpb.farthestPoint();
226 }
227 
divide_and_conquer(const pointVec_t & P,pointVec_t & H,point_t p1,point_t p2)228 void divide_and_conquer(const pointVec_t &P, pointVec_t &H, point_t p1, point_t p2) {
229     assert(P.size() >= 2);
230     pointVec_t P_reduced;
231     pointVec_t H1, H2;
232     point_t p_far = divide(P, P_reduced, p1, p2);
233     if (P_reduced.size() < 2) {
234         H.push_back(p1);
235         appendVector(P_reduced, H);
236     }
237     else {
238         divide_and_conquer(P_reduced, H1, p1, p_far);
239         divide_and_conquer(P_reduced, H2, p_far, p2);
240 
241         appendVector(H1, H);
242         appendVector(H2, H);
243     }
244 }
245 
quickhull(const pointVec_t & points,pointVec_t & hull)246 void quickhull(const pointVec_t &points, pointVec_t &hull) {
247     if (points.size() < 2) {
248         appendVector(points, hull);
249         return;
250     }
251 
252     point_t p_maxx = extremum<FindXExtremum::maxX>(points);
253     point_t p_minx = extremum<FindXExtremum::minX>(points);
254 
255     pointVec_t H;
256 
257     divide_and_conquer(points, hull, p_maxx, p_minx);
258     divide_and_conquer(points, H, p_minx, p_maxx);
259 
260     appendVector(H, hull);
261 }
262 
main(int argc,char * argv[])263 int main(int argc, char *argv[]) {
264     utility::thread_number_range threads(utility::get_default_num_threads);
265     util::my_time_t tm_main_begin = util::gettime();
266 
267     util::ParseInputArgs(argc, argv, threads);
268 
269     pointVec_t points;
270     pointVec_t hull;
271     int nthreads;
272 
273     points.reserve(cfg::numberOfPoints);
274 
275     if (!util::silent) {
276         std::cout << "Starting TBB-buffered version of QUICK HULL algorithm"
277                   << "\n";
278     }
279 
280     for (nthreads = threads.first; nthreads <= threads.last; nthreads = threads.step(nthreads)) {
281         oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism,
282                                       nthreads);
283 
284         points.clear();
285         util::my_time_t tm_init = util::gettime();
286         initialize(points);
287         util::my_time_t tm_start = util::gettime();
288         if (!util::silent) {
289             std::cout << "Init time on " << nthreads
290                       << " threads: " << util::time_diff(tm_init, tm_start)
291                       << "  Points in input: " << points.size() << "\n";
292         }
293 
294         tm_start = util::gettime();
295         quickhull(points, hull);
296         util::my_time_t tm_end = util::gettime();
297         if (!util::silent) {
298             std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end)
299                       << "  Points in hull: " << hull.size() << "\n";
300         }
301         hull.clear();
302     }
303     utility::report_elapsed_time(util::time_diff(tm_main_begin, util::gettime()));
304     return 0;
305 }
306