1 /* 2 Copyright (c) 2005-2023 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS 18 #include "common/test.h" 19 #include "common/utils.h" 20 #include "common/utils_report.h" 21 #include "common/test_invoke.h" 22 23 #include "oneapi/tbb/parallel_for.h" 24 #include "oneapi/tbb/tick_count.h" 25 26 #include "../tbb/test_partitioner.h" 27 28 #include <atomic> 29 30 //! \file conformance_parallel_for.cpp 31 //! \brief Test for [algorithms.parallel_for algorithms.auto_partitioner algorithms.simple_partitioner algorithms.static_partitioner algorithms.affinity_partitioner] specification 32 33 static const int N = 500; 34 static std::atomic<int> Array[N]; 35 36 struct parallel_tag {}; 37 struct empty_partitioner_tag {}; 38 39 // Testing parallel_for with step support 40 const std::size_t PFOR_BUFFER_TEST_SIZE = 1024; 41 // test_buffer has some extra items beyond its right bound 42 const std::size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024; 43 size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE]; 44 45 template<typename T> 46 class TestFunctor{ 47 public: 48 void operator ()(T index) const { 49 pfor_buffer[index]++; 50 } 51 }; 52 53 static std::atomic<int> FooBodyCount; 54 55 // A range object whose only public members are those required by the Range concept. 56 template<size_t Pad> 57 class FooRange { 58 // Start of range 59 int start; 60 61 // Size of range 62 int size; 63 FooRange( int start_, int size_ ) : start(start_), size(size_) { 64 utils::zero_fill<char>(pad, Pad); 65 pad[Pad-1] = 'x'; 66 } 67 template<typename Flavor_, std::size_t Pad_> friend void Flog( ); 68 template<size_t Pad_> friend class FooBody; 69 void operator&(); 70 71 char pad[Pad]; 72 public: 73 bool empty() const {return size==0;} 74 bool is_divisible() const {return size>1;} 75 FooRange( FooRange& original, oneapi::tbb::split ) : size(original.size/2) { 76 original.size -= size; 77 start = original.start+original.size; 78 CHECK_FAST( original.pad[Pad-1]=='x'); 79 pad[Pad-1] = 'x'; 80 } 81 }; 82 83 // A range object whose only public members are those required by the parallel_for.h body concept. 84 template<size_t Pad> 85 class FooBody { 86 public: 87 ~FooBody() { 88 --FooBodyCount; 89 for( std::size_t i=0; i<sizeof(*this); ++i ) 90 reinterpret_cast<char*>(this)[i] = -1; 91 } 92 // Copy constructor 93 FooBody( const FooBody& other ) : array(other.array), state(other.state) { 94 ++FooBodyCount; 95 CHECK_FAST(state == LIVE); 96 } 97 void operator()( FooRange<Pad>& r ) const { 98 for (int k = r.start; k < r.start + r.size; ++k) { 99 CHECK_FAST(array[k].load(std::memory_order_relaxed) == 0); 100 array[k].store(1, std::memory_order_relaxed); 101 } 102 } 103 private: 104 const int LIVE = 0x1234; 105 std::atomic<int>* array; 106 int state; 107 friend class FooRange<Pad>; 108 template<typename Flavor_, std::size_t Pad_> friend void Flog( ); 109 FooBody( std::atomic<int>* array_ ) : array(array_), state(LIVE) {} 110 }; 111 112 template <typename Flavor, typename Partitioner, typename Range, typename Body> 113 struct Invoker; 114 115 template <typename Range, typename Body> 116 struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> { 117 void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) { 118 oneapi::tbb::parallel_for( r, body ); 119 } 120 }; 121 122 template <typename Partitioner, typename Range, typename Body> 123 struct Invoker<parallel_tag, Partitioner, Range, Body> { 124 void operator()( const Range& r, const Body& body, Partitioner& p ) { 125 oneapi::tbb::parallel_for( r, body, p ); 126 } 127 }; 128 129 template <typename Flavor, typename Partitioner, typename T, typename Body> 130 struct InvokerStep; 131 132 template <typename T, typename Body> 133 struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> { 134 void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) { 135 oneapi::tbb::parallel_for( first, last, f ); 136 } 137 void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) { 138 oneapi::tbb::parallel_for( first, last, step, f ); 139 } 140 }; 141 142 template <typename Partitioner, typename T, typename Body> 143 struct InvokerStep<parallel_tag, Partitioner, T, Body> { 144 void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) { 145 oneapi::tbb::parallel_for( first, last, f, p ); 146 } 147 void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) { 148 oneapi::tbb::parallel_for( first, last, step, f, p ); 149 } 150 }; 151 152 template<typename Flavor, std::size_t Pad> 153 void Flog() { 154 for ( int i=0; i<N; ++i ) { 155 for ( int mode = 0; mode < 4; ++mode) { 156 FooRange<Pad> r( 0, i ); 157 const FooRange<Pad> rc = r; 158 FooBody<Pad> f( Array ); 159 const FooBody<Pad> fc = f; 160 for (int a_i = 0; a_i < N; a_i++) { 161 Array[a_i].store(0, std::memory_order_relaxed); 162 } 163 FooBodyCount = 1; 164 switch (mode) { 165 case 0: { 166 empty_partitioner_tag p; 167 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for; 168 invoke_for( rc, fc, p ); 169 } 170 break; 171 case 1: { 172 Invoker< Flavor, const oneapi::tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 173 invoke_for( rc, fc, oneapi::tbb::simple_partitioner() ); 174 } 175 break; 176 case 2: { 177 Invoker< Flavor, const oneapi::tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 178 invoke_for( rc, fc, oneapi::tbb::auto_partitioner() ); 179 } 180 break; 181 case 3: { 182 static oneapi::tbb::affinity_partitioner affinity; 183 Invoker< Flavor, oneapi::tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 184 invoke_for( rc, fc, affinity ); 185 } 186 break; 187 } 188 CHECK(std::find_if_not(Array, Array + i, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 1; }) == Array + i); 189 CHECK(std::find_if_not(Array + i, Array + N, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 0; }) == Array + N); 190 CHECK(FooBodyCount == 1); 191 } 192 } 193 } 194 195 #include <stdexcept> // std::invalid_argument 196 197 template <typename Flavor, typename T, typename Partitioner> 198 void TestParallelForWithStepSupportHelper(Partitioner& p) { 199 const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE); 200 const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE); 201 // Testing parallel_for with different step values 202 InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for; 203 for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) { 204 T step; 205 for (step = 1; step < pfor_buffer_test_size; step++) { 206 std::memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(std::size_t)); 207 if (step == 1){ 208 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p); 209 } else { 210 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p); 211 } 212 // Verifying that parallel_for processed all items it should 213 for (T i = begin; i < pfor_buffer_test_size; i = i + step) { 214 if (pfor_buffer[i] != 1) { 215 CHECK_MESSAGE(false, "parallel_for didn't process all required elements"); 216 } 217 pfor_buffer[i] = 0; 218 } 219 // Verifying that no extra items were processed and right bound of array wasn't crossed 220 for (T i = 0; i < pfor_buffer_actual_size; i++) { 221 if (pfor_buffer[i] != 0) { 222 CHECK_MESSAGE(false, "parallel_for processed an extra element"); 223 } 224 } 225 } 226 } 227 } 228 229 template <typename Flavor, typename T> 230 void TestParallelForWithStepSupport() { 231 static oneapi::tbb::affinity_partitioner affinity_p; 232 oneapi::tbb::auto_partitioner auto_p; 233 oneapi::tbb::simple_partitioner simple_p; 234 oneapi::tbb::static_partitioner static_p; 235 empty_partitioner_tag p; 236 237 // Try out all partitioner combinations 238 TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p); 239 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::auto_partitioner >(auto_p); 240 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::simple_partitioner >(simple_p); 241 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::affinity_partitioner >(affinity_p); 242 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::static_partitioner >(static_p); 243 244 // Testing some corner cases 245 oneapi::tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>()); 246 } 247 248 #if __TBB_CPP17_INVOKE_PRESENT 249 class SmartIndex { 250 public: 251 SmartIndex(std::size_t ri) : real_index(ri), change_vector(nullptr) {} 252 SmartIndex(std::size_t ri, std::vector<std::size_t>& cv) 253 : real_index(ri), change_vector(&cv) {} 254 SmartIndex(const SmartIndex& other) : real_index(other.real_index), 255 change_vector(other.change_vector) {} 256 ~SmartIndex() = default; 257 258 SmartIndex& operator=(const SmartIndex& other) { 259 real_index = other.real_index; 260 change_vector = other.change_vector; 261 return *this; 262 } 263 264 bool operator<(const SmartIndex& other) const { 265 return real_index < other.real_index; 266 } 267 268 bool operator<=(const SmartIndex& other) const { 269 return real_index <= other.real_index; 270 } 271 272 SmartIndex operator/(const SmartIndex& other) const { 273 return {real_index / other.real_index, *change_vector}; 274 } 275 276 SmartIndex operator*(const SmartIndex& other) const { 277 return {real_index * other.real_index, *change_vector}; 278 } 279 280 SmartIndex operator+(const SmartIndex& other) const { 281 return {real_index + other.real_index, *change_vector}; 282 } 283 284 SmartIndex& operator+=(const SmartIndex& other) { 285 real_index += other.real_index; 286 return *this; 287 } 288 289 SmartIndex& operator++() { ++real_index; return *this; } 290 291 std::size_t operator-(const SmartIndex& other) const { 292 return real_index - other.real_index; 293 } 294 295 SmartIndex operator+(std::size_t k) { 296 return {real_index + k, *change_vector}; 297 } 298 299 void increase() const { 300 CHECK(change_vector); 301 ++(*change_vector)[real_index]; 302 } 303 private: 304 std::size_t real_index; 305 std::vector<std::size_t>* change_vector; 306 }; 307 308 void test_pfor_body_invoke() { 309 const std::size_t number_of_overloads = 5; 310 const std::size_t iterations = 100000; 311 312 using range_type = test_invoke::SmartRange<std::size_t>; 313 std::vector<std::size_t> change_vector(iterations, 0); 314 range_type range{0, iterations, change_vector}; 315 316 oneapi::tbb::parallel_for(range, &range_type::increase); 317 oneapi::tbb::parallel_for(range, &range_type::increase, oneapi::tbb::simple_partitioner()); 318 oneapi::tbb::parallel_for(range, &range_type::increase, oneapi::tbb::auto_partitioner()); 319 oneapi::tbb::parallel_for(range, &range_type::increase, oneapi::tbb::static_partitioner()); 320 oneapi::tbb::affinity_partitioner aff; 321 oneapi::tbb::parallel_for(range, &range_type::increase, aff); 322 323 for (std::size_t item : change_vector) { 324 CHECK(item == number_of_overloads); 325 } 326 } 327 328 329 void test_pfor_func_invoke() { 330 const std::size_t number_of_overloads = 5; 331 const std::size_t iterations = 100000; 332 333 std::vector<std::size_t> change_vector(iterations, 0); 334 SmartIndex first{0, change_vector}; 335 SmartIndex last{iterations, change_vector}; 336 SmartIndex stride{2}; 337 338 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase); 339 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, oneapi::tbb::simple_partitioner()); 340 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, oneapi::tbb::auto_partitioner()); 341 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, oneapi::tbb::static_partitioner()); 342 oneapi::tbb::affinity_partitioner aff; 343 oneapi::tbb::parallel_for(first, last, &SmartIndex::increase, aff); 344 345 for (std::size_t& item : change_vector) { 346 CHECK(item == number_of_overloads); 347 item = 0; 348 } 349 350 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase); 351 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, oneapi::tbb::simple_partitioner()); 352 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, oneapi::tbb::auto_partitioner()); 353 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, oneapi::tbb::static_partitioner()); 354 oneapi::tbb::parallel_for(first, last, stride, &SmartIndex::increase, aff); 355 356 CHECK(change_vector[0] == number_of_overloads); 357 for (std::size_t i = 1; i < iterations; ++i) { 358 std::size_t expected = change_vector[i - 1] == 0 ? number_of_overloads : 0; 359 CHECK(change_vector[i] == expected); 360 } 361 } 362 #endif // __TBB_CPP17_INVOKE_PRESENT 363 364 //! Test simple parallel_for with different partitioners 365 //! \brief \ref interface \ref requirement 366 TEST_CASE("Basic parallel_for") { 367 std::atomic<unsigned long> counter{}; 368 const std::size_t number_of_partitioners = 5; 369 const std::size_t iterations = 100000; 370 371 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 372 counter++; 373 }); 374 375 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 376 counter++; 377 }, oneapi::tbb::simple_partitioner()); 378 379 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 380 counter++; 381 }, oneapi::tbb::auto_partitioner()); 382 383 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 384 counter++; 385 }, oneapi::tbb::static_partitioner()); 386 387 oneapi::tbb::affinity_partitioner aff; 388 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 389 counter++; 390 }, aff); 391 392 CHECK_EQ(counter.load(std::memory_order_relaxed), iterations * number_of_partitioners); 393 } 394 395 //! Testing parallel for with different partitioners and ranges ranges 396 //! \brief \ref interface \ref requirement \ref stress 397 TEST_CASE("Flog test") { 398 Flog<parallel_tag, 1>(); 399 Flog<parallel_tag, 10>(); 400 Flog<parallel_tag, 100>(); 401 Flog<parallel_tag, 1000>(); 402 Flog<parallel_tag, 10000>(); 403 } 404 405 //! Testing parallel for with different types and step 406 //! \brief \ref interface \ref requirement 407 TEST_CASE_TEMPLATE("parallel_for with step support", T, short, unsigned short, int, unsigned int, 408 long, unsigned long, long long, unsigned long long, std::size_t) { 409 // Testing with different integer types 410 TestParallelForWithStepSupport<parallel_tag, T>(); 411 } 412 413 //! Testing with different types of ranges and partitioners 414 //! \brief \ref interface \ref requirement 415 TEST_CASE("Testing parallel_for with partitioners") { 416 using namespace test_partitioner_utils::interaction_with_range_and_partitioner; 417 418 test_partitioner_utils::SimpleBody b; 419 oneapi::tbb::affinity_partitioner ap; 420 421 parallel_for(Range1(true, false), b, ap); 422 parallel_for(Range6(false, true), b, ap); 423 424 parallel_for(Range1(false, true), b, oneapi::tbb::simple_partitioner()); 425 parallel_for(Range6(false, true), b, oneapi::tbb::simple_partitioner()); 426 427 parallel_for(Range1(false, true), b, oneapi::tbb::auto_partitioner()); 428 parallel_for(Range6(false, true), b, oneapi::tbb::auto_partitioner()); 429 430 parallel_for(Range1(true, false), b, oneapi::tbb::static_partitioner()); 431 parallel_for(Range6(false, true), b, oneapi::tbb::static_partitioner()); 432 } 433 434 #if __TBB_CPP17_INVOKE_PRESENT 435 //! Test that parallel_for uses std::invoke to run body and function 436 //! \brief \ref interface \ref requirement 437 TEST_CASE("parallel_for and std::invoke") { 438 test_pfor_body_invoke(); 439 test_pfor_func_invoke(); 440 } 441 #endif 442