1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 /* 18 This file contains the TBB-based implementation of convex hull algorithm. 19 It corresponds to the following settings in convex_hull_bench.cpp: 20 - USETBB defined to 1 21 - USECONCVEC defined to 1 22 - INIT_ONCE defined to 0 23 - only buffered version is used 24 */ 25 26 #include "oneapi/tbb/parallel_for.h" 27 #include "oneapi/tbb/parallel_reduce.h" 28 #include "oneapi/tbb/blocked_range.h" 29 #include "oneapi/tbb/tick_count.h" 30 #include "oneapi/tbb/concurrent_vector.h" 31 32 #include "convex_hull.hpp" 33 34 typedef util::point<double> point_t; 35 typedef oneapi::tbb::concurrent_vector<point_t> pointVec_t; 36 typedef oneapi::tbb::blocked_range<std::size_t> range_t; 37 38 void appendVector(const point_t *src, std::size_t srcSize, pointVec_t &dest) { 39 std::copy(src, src + srcSize, dest.grow_by(srcSize)); 40 } 41 42 void appendVector(const pointVec_t &src, pointVec_t &dest) { 43 std::copy(src.begin(), src.end(), dest.grow_by(src.size())); 44 } 45 class FillRNDPointsVector_buf { 46 pointVec_t &points; 47 48 public: 49 static const std::size_t grainSize = cfg::generateGrainSize; 50 51 explicit FillRNDPointsVector_buf(pointVec_t &_points) : points(_points) {} 52 53 void operator()(const range_t &range) const { 54 util::rng the_rng(range.begin()); 55 const std::size_t i_end = range.end(); 56 std::size_t count = 0, j = 0; 57 point_t tmp_vec[grainSize]; 58 59 for (std::size_t i = range.begin(); i != i_end; ++i) { 60 tmp_vec[j++] = util::GenerateRNDPoint<double>(count, the_rng, util::rng::max_rand); 61 } 62 //Here we have race condition. Elements being written to may be still under construction. 63 //For C++ 2003 it is workarounded by vector element type which default constructor does not touch memory, 64 //it being constructed on. See comments near default ctor of point class for more details. 65 //Strictly speaking it is UB. 66 //TODO: need to find more reliable/correct way 67 points.grow_to_at_least(range.end()); 68 std::copy(tmp_vec, tmp_vec + j, points.begin() + range.begin()); 69 } 70 }; 71 72 void initialize(pointVec_t &points) { 73 //This function generate the same series of point on every call. 74 //Reproducibility is needed for benchmarking to produce reliable results. 75 //It is achieved through the following points: 76 // - FillRNDPointsVector_buf instance has its own local instance 77 // of random number generator, which in turn does not use any global data 78 // - oneapi::tbb::simple_partitioner produce the same set of ranges on every call to 79 // oneapi::tbb::parallel_for 80 // - local RNG instances are seeded by the starting indexes of corresponding ranges 81 // - grow_to_at_least() enables putting points into the resulting vector in deterministic order 82 // (unlike concurrent push_back or grow_by). 83 84 // In the buffered version, a temporary storage for as much as grainSize elements 85 // is allocated inside the body. Since auto_partitioner may increase effective 86 // range size which would cause a crash, simple partitioner has to be used. 87 oneapi::tbb::parallel_for(range_t(0, cfg::numberOfPoints, FillRNDPointsVector_buf::grainSize), 88 FillRNDPointsVector_buf(points), 89 oneapi::tbb::simple_partitioner()); 90 } 91 92 class FindXExtremum { 93 public: 94 typedef enum { minX, maxX } extremumType; 95 96 static const std::size_t grainSize = cfg::findExtremumGrainSize; 97 98 FindXExtremum(const pointVec_t &points_, extremumType exType_) 99 : points(points_), 100 exType(exType_), 101 extrXPoint(points[0]) {} 102 103 FindXExtremum(const FindXExtremum &fxex, oneapi::tbb::split) 104 // Can run in parallel with fxex.operator()() or fxex.join(). 105 // The data race reported by tools is harmless. 106 : points(fxex.points), 107 exType(fxex.exType), 108 extrXPoint(fxex.extrXPoint) {} 109 110 void operator()(const range_t &range) { 111 const std::size_t i_end = range.end(); 112 if (!range.empty()) { 113 for (std::size_t i = range.begin(); i != i_end; ++i) { 114 if (closerToExtremum(points[i])) { 115 extrXPoint = points[i]; 116 } 117 } 118 } 119 } 120 121 void join(const FindXExtremum &rhs) { 122 if (closerToExtremum(rhs.extrXPoint)) { 123 extrXPoint = rhs.extrXPoint; 124 } 125 } 126 127 point_t extremeXPoint() { 128 return extrXPoint; 129 } 130 131 private: 132 const pointVec_t &points; 133 const extremumType exType; 134 point_t extrXPoint; 135 bool closerToExtremum(const point_t &p) const { 136 switch (exType) { 137 case minX: return p.x < extrXPoint.x; break; 138 case maxX: return p.x > extrXPoint.x; break; 139 } 140 return false; // avoid warning 141 } 142 }; 143 144 template <FindXExtremum::extremumType type> 145 point_t extremum(const pointVec_t &P) { 146 FindXExtremum fxBody(P, type); 147 oneapi::tbb::parallel_reduce(range_t(0, P.size(), FindXExtremum::grainSize), fxBody); 148 return fxBody.extremeXPoint(); 149 } 150 151 class SplitByCP_buf { 152 const pointVec_t &initialSet; 153 pointVec_t &reducedSet; 154 point_t p1, p2; 155 point_t farPoint; 156 double howFar; 157 158 public: 159 static const std::size_t grainSize = cfg::divideGrainSize; 160 161 SplitByCP_buf(point_t _p1, point_t _p2, const pointVec_t &_initialSet, pointVec_t &_reducedSet) 162 : p1(_p1), 163 p2(_p2), 164 initialSet(_initialSet), 165 reducedSet(_reducedSet), 166 howFar(0), 167 farPoint(p1) {} 168 169 SplitByCP_buf(SplitByCP_buf &sbcp, oneapi::tbb::split) 170 : p1(sbcp.p1), 171 p2(sbcp.p2), 172 initialSet(sbcp.initialSet), 173 reducedSet(sbcp.reducedSet), 174 howFar(0), 175 farPoint(p1) {} 176 177 void operator()(const range_t &range) { 178 const std::size_t i_end = range.end(); 179 std::size_t j = 0; 180 double cp; 181 point_t tmp_vec[grainSize]; 182 for (std::size_t i = range.begin(); i != i_end; ++i) { 183 if ((initialSet[i] != p1) && (initialSet[i] != p2)) { 184 cp = util::cross_product(p1, p2, initialSet[i]); 185 if (cp > 0) { 186 tmp_vec[j++] = initialSet[i]; 187 if (cp > howFar) { 188 farPoint = initialSet[i]; 189 howFar = cp; 190 } 191 } 192 } 193 } 194 195 appendVector(tmp_vec, j, reducedSet); 196 } 197 198 void join(const SplitByCP_buf &rhs) { 199 if (rhs.howFar > howFar) { 200 howFar = rhs.howFar; 201 farPoint = rhs.farPoint; 202 } 203 } 204 205 point_t farthestPoint() const { 206 return farPoint; 207 } 208 }; 209 210 point_t divide(const pointVec_t &P, pointVec_t &P_reduced, const point_t &p1, const point_t &p2) { 211 SplitByCP_buf sbcpb(p1, p2, P, P_reduced); 212 // Must use simple_partitioner (see the comment in initialize() above) 213 oneapi::tbb::parallel_reduce( 214 range_t(0, P.size(), SplitByCP_buf::grainSize), sbcpb, oneapi::tbb::simple_partitioner()); 215 216 if (util::verbose) { 217 std::stringstream ss; 218 ss << P.size() << " nodes in bucket" 219 << ", " 220 << "dividing by: [ " << p1 << ", " << p2 << " ], " 221 << "farthest node: " << sbcpb.farthestPoint(); 222 util::OUTPUT.push_back(ss.str()); 223 } 224 225 return sbcpb.farthestPoint(); 226 } 227 228 void divide_and_conquer(const pointVec_t &P, pointVec_t &H, point_t p1, point_t p2) { 229 assert(P.size() >= 2); 230 pointVec_t P_reduced; 231 pointVec_t H1, H2; 232 point_t p_far = divide(P, P_reduced, p1, p2); 233 if (P_reduced.size() < 2) { 234 H.push_back(p1); 235 appendVector(P_reduced, H); 236 } 237 else { 238 divide_and_conquer(P_reduced, H1, p1, p_far); 239 divide_and_conquer(P_reduced, H2, p_far, p2); 240 241 appendVector(H1, H); 242 appendVector(H2, H); 243 } 244 } 245 246 void quickhull(const pointVec_t &points, pointVec_t &hull) { 247 if (points.size() < 2) { 248 appendVector(points, hull); 249 return; 250 } 251 252 point_t p_maxx = extremum<FindXExtremum::maxX>(points); 253 point_t p_minx = extremum<FindXExtremum::minX>(points); 254 255 pointVec_t H; 256 257 divide_and_conquer(points, hull, p_maxx, p_minx); 258 divide_and_conquer(points, H, p_minx, p_maxx); 259 260 appendVector(H, hull); 261 } 262 263 int main(int argc, char *argv[]) { 264 utility::thread_number_range threads(utility::get_default_num_threads); 265 util::my_time_t tm_main_begin = util::gettime(); 266 267 util::ParseInputArgs(argc, argv, threads); 268 269 pointVec_t points; 270 pointVec_t hull; 271 int nthreads; 272 273 points.reserve(cfg::numberOfPoints); 274 275 if (!util::silent) { 276 std::cout << "Starting TBB-buffered version of QUICK HULL algorithm" 277 << "\n"; 278 } 279 280 for (nthreads = threads.first; nthreads <= threads.last; nthreads = threads.step(nthreads)) { 281 oneapi::tbb::global_control c(oneapi::tbb::global_control::max_allowed_parallelism, 282 nthreads); 283 284 points.clear(); 285 util::my_time_t tm_init = util::gettime(); 286 initialize(points); 287 util::my_time_t tm_start = util::gettime(); 288 if (!util::silent) { 289 std::cout << "Init time on " << nthreads 290 << " threads: " << util::time_diff(tm_init, tm_start) 291 << " Points in input: " << points.size() << "\n"; 292 } 293 294 tm_start = util::gettime(); 295 quickhull(points, hull); 296 util::my_time_t tm_end = util::gettime(); 297 if (!util::silent) { 298 std::cout << "Time on " << nthreads << " threads: " << util::time_diff(tm_start, tm_end) 299 << " Points in hull: " << hull.size() << "\n"; 300 } 301 hull.clear(); 302 } 303 utility::report_elapsed_time(util::time_diff(tm_main_begin, util::gettime())); 304 return 0; 305 } 306