1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS 18 #include "common/test.h" 19 #include "common/utils.h" 20 #include "common/utils_report.h" 21 22 #include "oneapi/tbb/parallel_for.h" 23 #include "oneapi/tbb/tick_count.h" 24 25 #include "../tbb/test_partitioner.h" 26 27 #include <atomic> 28 29 //! \file conformance_parallel_for.cpp 30 //! \brief Test for [algorithms.parallel_for algorithms.auto_partitioner algorithms.simple_partitioner algorithms.static_partitioner algorithms.affinity_partitioner] specification 31 32 static const int N = 500; 33 static std::atomic<int> Array[N]; 34 35 struct parallel_tag {}; 36 struct empty_partitioner_tag {}; 37 38 // Testing parallel_for with step support 39 const std::size_t PFOR_BUFFER_TEST_SIZE = 1024; 40 // test_buffer has some extra items beyond its right bound 41 const std::size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024; 42 size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE]; 43 44 template<typename T> 45 class TestFunctor{ 46 public: 47 void operator ()(T index) const { 48 pfor_buffer[index]++; 49 } 50 }; 51 52 static std::atomic<int> FooBodyCount; 53 54 // A range object whose only public members are those required by the Range concept. 55 template<size_t Pad> 56 class FooRange { 57 // Start of range 58 int start; 59 60 // Size of range 61 int size; 62 FooRange( int start_, int size_ ) : start(start_), size(size_) { 63 utils::zero_fill<char>(pad, Pad); 64 pad[Pad-1] = 'x'; 65 } 66 template<typename Flavor_, std::size_t Pad_> friend void Flog( ); 67 template<size_t Pad_> friend class FooBody; 68 void operator&(); 69 70 char pad[Pad]; 71 public: 72 bool empty() const {return size==0;} 73 bool is_divisible() const {return size>1;} 74 FooRange( FooRange& original, oneapi::tbb::split ) : size(original.size/2) { 75 original.size -= size; 76 start = original.start+original.size; 77 CHECK_FAST( original.pad[Pad-1]=='x'); 78 pad[Pad-1] = 'x'; 79 } 80 }; 81 82 // A range object whose only public members are those required by the parallel_for.h body concept. 83 template<size_t Pad> 84 class FooBody { 85 public: 86 ~FooBody() { 87 --FooBodyCount; 88 for( std::size_t i=0; i<sizeof(*this); ++i ) 89 reinterpret_cast<char*>(this)[i] = -1; 90 } 91 // Copy constructor 92 FooBody( const FooBody& other ) : array(other.array), state(other.state) { 93 ++FooBodyCount; 94 CHECK_FAST(state == LIVE); 95 } 96 void operator()( FooRange<Pad>& r ) const { 97 for (int k = r.start; k < r.start + r.size; ++k) { 98 CHECK_FAST(array[k].load(std::memory_order_relaxed) == 0); 99 array[k].store(1, std::memory_order_relaxed); 100 } 101 } 102 private: 103 const int LIVE = 0x1234; 104 std::atomic<int>* array; 105 int state; 106 friend class FooRange<Pad>; 107 template<typename Flavor_, std::size_t Pad_> friend void Flog( ); 108 FooBody( std::atomic<int>* array_ ) : array(array_), state(LIVE) {} 109 }; 110 111 template <typename Flavor, typename Partitioner, typename Range, typename Body> 112 struct Invoker; 113 114 template <typename Range, typename Body> 115 struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> { 116 void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) { 117 oneapi::tbb::parallel_for( r, body ); 118 } 119 }; 120 121 template <typename Partitioner, typename Range, typename Body> 122 struct Invoker<parallel_tag, Partitioner, Range, Body> { 123 void operator()( const Range& r, const Body& body, Partitioner& p ) { 124 oneapi::tbb::parallel_for( r, body, p ); 125 } 126 }; 127 128 template <typename Flavor, typename Partitioner, typename T, typename Body> 129 struct InvokerStep; 130 131 template <typename T, typename Body> 132 struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> { 133 void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) { 134 oneapi::tbb::parallel_for( first, last, f ); 135 } 136 void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) { 137 oneapi::tbb::parallel_for( first, last, step, f ); 138 } 139 }; 140 141 template <typename Partitioner, typename T, typename Body> 142 struct InvokerStep<parallel_tag, Partitioner, T, Body> { 143 void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) { 144 oneapi::tbb::parallel_for( first, last, f, p ); 145 } 146 void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) { 147 oneapi::tbb::parallel_for( first, last, step, f, p ); 148 } 149 }; 150 151 template<typename Flavor, std::size_t Pad> 152 void Flog() { 153 for ( int i=0; i<N; ++i ) { 154 for ( int mode = 0; mode < 4; ++mode) { 155 FooRange<Pad> r( 0, i ); 156 const FooRange<Pad> rc = r; 157 FooBody<Pad> f( Array ); 158 const FooBody<Pad> fc = f; 159 for (int a_i = 0; a_i < N; a_i++) { 160 Array[a_i].store(0, std::memory_order_relaxed); 161 } 162 FooBodyCount = 1; 163 switch (mode) { 164 case 0: { 165 empty_partitioner_tag p; 166 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for; 167 invoke_for( rc, fc, p ); 168 } 169 break; 170 case 1: { 171 Invoker< Flavor, const oneapi::tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 172 invoke_for( rc, fc, oneapi::tbb::simple_partitioner() ); 173 } 174 break; 175 case 2: { 176 Invoker< Flavor, const oneapi::tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 177 invoke_for( rc, fc, oneapi::tbb::auto_partitioner() ); 178 } 179 break; 180 case 3: { 181 static oneapi::tbb::affinity_partitioner affinity; 182 Invoker< Flavor, oneapi::tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 183 invoke_for( rc, fc, affinity ); 184 } 185 break; 186 } 187 CHECK(std::find_if_not(Array, Array + i, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 1; }) == Array + i); 188 CHECK(std::find_if_not(Array + i, Array + N, [](const std::atomic<int>& v) { return v.load(std::memory_order_relaxed) == 0; }) == Array + N); 189 CHECK(FooBodyCount == 1); 190 } 191 } 192 } 193 194 #include <stdexcept> // std::invalid_argument 195 196 template <typename Flavor, typename T, typename Partitioner> 197 void TestParallelForWithStepSupportHelper(Partitioner& p) { 198 const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE); 199 const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE); 200 // Testing parallel_for with different step values 201 InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for; 202 for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) { 203 T step; 204 for (step = 1; step < pfor_buffer_test_size; step++) { 205 std::memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(std::size_t)); 206 if (step == 1){ 207 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p); 208 } else { 209 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p); 210 } 211 // Verifying that parallel_for processed all items it should 212 for (T i = begin; i < pfor_buffer_test_size; i = i + step) { 213 if (pfor_buffer[i] != 1) { 214 CHECK_MESSAGE(false, "parallel_for didn't process all required elements"); 215 } 216 pfor_buffer[i] = 0; 217 } 218 // Verifying that no extra items were processed and right bound of array wasn't crossed 219 for (T i = 0; i < pfor_buffer_actual_size; i++) { 220 if (pfor_buffer[i] != 0) { 221 CHECK_MESSAGE(false, "parallel_for processed an extra element"); 222 } 223 } 224 } 225 } 226 } 227 228 template <typename Flavor, typename T> 229 void TestParallelForWithStepSupport() { 230 static oneapi::tbb::affinity_partitioner affinity_p; 231 oneapi::tbb::auto_partitioner auto_p; 232 oneapi::tbb::simple_partitioner simple_p; 233 oneapi::tbb::static_partitioner static_p; 234 empty_partitioner_tag p; 235 236 // Try out all partitioner combinations 237 TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p); 238 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::auto_partitioner >(auto_p); 239 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::simple_partitioner >(simple_p); 240 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::affinity_partitioner >(affinity_p); 241 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::static_partitioner >(static_p); 242 243 // Testing some corner cases 244 oneapi::tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>()); 245 } 246 247 //! Test simple parallel_for with different partitioners 248 //! \brief \ref interface \ref requirement 249 TEST_CASE("Basic parallel_for") { 250 std::atomic<unsigned long> counter{}; 251 const std::size_t number_of_partitioners = 5; 252 const std::size_t iterations = 100000; 253 254 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 255 counter++; 256 }); 257 258 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 259 counter++; 260 }, oneapi::tbb::simple_partitioner()); 261 262 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 263 counter++; 264 }, oneapi::tbb::auto_partitioner()); 265 266 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 267 counter++; 268 }, oneapi::tbb::static_partitioner()); 269 270 oneapi::tbb::affinity_partitioner aff; 271 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 272 counter++; 273 }, aff); 274 275 CHECK_EQ(counter.load(std::memory_order_relaxed), iterations * number_of_partitioners); 276 } 277 278 //! Testing parallel for with different partitioners and ranges ranges 279 //! \brief \ref interface \ref requirement \ref stress 280 TEST_CASE("Flog test") { 281 Flog<parallel_tag, 1>(); 282 Flog<parallel_tag, 10>(); 283 Flog<parallel_tag, 100>(); 284 Flog<parallel_tag, 1000>(); 285 Flog<parallel_tag, 10000>(); 286 } 287 288 //! Testing parallel for with different types and step 289 //! \brief \ref interface \ref requirement 290 TEST_CASE_TEMPLATE("parallel_for with step support", T, short, unsigned short, int, unsigned int, 291 long, unsigned long, long long, unsigned long long, std::size_t) { 292 // Testing with different integer types 293 TestParallelForWithStepSupport<parallel_tag, T>(); 294 } 295 296 //! Testing with different types of ranges and partitioners 297 //! \brief \ref interface \ref requirement 298 TEST_CASE("Testing parallel_for with partitioners") { 299 using namespace test_partitioner_utils::interaction_with_range_and_partitioner; 300 301 test_partitioner_utils::SimpleBody b; 302 oneapi::tbb::affinity_partitioner ap; 303 304 parallel_for(Range1(true, false), b, ap); 305 parallel_for(Range6(false, true), b, ap); 306 307 parallel_for(Range1(false, true), b, oneapi::tbb::simple_partitioner()); 308 parallel_for(Range6(false, true), b, oneapi::tbb::simple_partitioner()); 309 310 parallel_for(Range1(false, true), b, oneapi::tbb::auto_partitioner()); 311 parallel_for(Range6(false, true), b, oneapi::tbb::auto_partitioner()); 312 313 parallel_for(Range1(true, false), b, oneapi::tbb::static_partitioner()); 314 parallel_for(Range6(false, true), b, oneapi::tbb::static_partitioner()); 315 } 316