1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #define DOCTEST_CONFIG_SUPER_FAST_ASSERTS 18 #include "common/test.h" 19 #include "common/utils.h" 20 #include "common/utils_report.h" 21 22 #include "oneapi/tbb/parallel_for.h" 23 #include "oneapi/tbb/tick_count.h" 24 25 #include "../tbb/test_partitioner.h" 26 27 #include <atomic> 28 29 //! \file conformance_parallel_for.cpp 30 //! \brief Test for [algorithms.parallel_for algorithms.auto_partitioner algorithms.simple_partitioner algorithms.static_partitioner algorithms.affinity_partitioner] specification 31 32 static const int N = 500; 33 static std::atomic<int> Array[N]; 34 35 struct parallel_tag {}; 36 struct empty_partitioner_tag {}; 37 38 // Testing parallel_for with step support 39 const std::size_t PFOR_BUFFER_TEST_SIZE = 1024; 40 // test_buffer has some extra items beyond its right bound 41 const std::size_t PFOR_BUFFER_ACTUAL_SIZE = PFOR_BUFFER_TEST_SIZE + 1024; 42 size_t pfor_buffer[PFOR_BUFFER_ACTUAL_SIZE]; 43 44 template<typename T> 45 class TestFunctor{ 46 public: 47 void operator ()(T index) const { 48 pfor_buffer[index]++; 49 } 50 }; 51 52 static std::atomic<int> FooBodyCount; 53 54 // A range object whose only public members are those required by the Range concept. 55 template<size_t Pad> 56 class FooRange { 57 // Start of range 58 int start; 59 60 // Size of range 61 int size; 62 FooRange( int start_, int size_ ) : start(start_), size(size_) { 63 utils::zero_fill<char>(pad, Pad); 64 pad[Pad-1] = 'x'; 65 } 66 template<typename Flavor_, std::size_t Pad_> friend void Flog( ); 67 template<size_t Pad_> friend class FooBody; 68 void operator&(); 69 70 char pad[Pad]; 71 public: 72 bool empty() const {return size==0;} 73 bool is_divisible() const {return size>1;} 74 FooRange( FooRange& original, oneapi::tbb::split ) : size(original.size/2) { 75 original.size -= size; 76 start = original.start+original.size; 77 CHECK( original.pad[Pad-1]=='x'); 78 pad[Pad-1] = 'x'; 79 } 80 }; 81 82 // A range object whose only public members are those required by the parallel_for.h body concept. 83 template<size_t Pad> 84 class FooBody { 85 public: 86 ~FooBody() { 87 --FooBodyCount; 88 for( std::size_t i=0; i<sizeof(*this); ++i ) 89 reinterpret_cast<char*>(this)[i] = -1; 90 } 91 // Copy constructor 92 FooBody( const FooBody& other ) : array(other.array), state(other.state) { 93 ++FooBodyCount; 94 CHECK(state == LIVE); 95 } 96 void operator()( FooRange<Pad>& r ) const { 97 for( int k=0; k<r.size; ++k ) { 98 const int i = array[r.start+k]++; 99 CHECK( i==0 ); 100 } 101 } 102 private: 103 const int LIVE = 0x1234; 104 std::atomic<int>* array; 105 int state; 106 friend class FooRange<Pad>; 107 template<typename Flavor_, std::size_t Pad_> friend void Flog( ); 108 FooBody( std::atomic<int>* array_ ) : array(array_), state(LIVE) {} 109 }; 110 111 template <typename Flavor, typename Partitioner, typename Range, typename Body> 112 struct Invoker; 113 114 template <typename Range, typename Body> 115 struct Invoker<parallel_tag, empty_partitioner_tag, Range, Body> { 116 void operator()( const Range& r, const Body& body, empty_partitioner_tag& ) { 117 oneapi::tbb::parallel_for( r, body ); 118 } 119 }; 120 121 template <typename Partitioner, typename Range, typename Body> 122 struct Invoker<parallel_tag, Partitioner, Range, Body> { 123 void operator()( const Range& r, const Body& body, Partitioner& p ) { 124 oneapi::tbb::parallel_for( r, body, p ); 125 } 126 }; 127 128 template <typename Flavor, typename Partitioner, typename T, typename Body> 129 struct InvokerStep; 130 131 template <typename T, typename Body> 132 struct InvokerStep<parallel_tag, empty_partitioner_tag, T, Body> { 133 void operator()( const T& first, const T& last, const Body& f, empty_partitioner_tag& ) { 134 oneapi::tbb::parallel_for( first, last, f ); 135 } 136 void operator()( const T& first, const T& last, const T& step, const Body& f, empty_partitioner_tag& ) { 137 oneapi::tbb::parallel_for( first, last, step, f ); 138 } 139 }; 140 141 template <typename Partitioner, typename T, typename Body> 142 struct InvokerStep<parallel_tag, Partitioner, T, Body> { 143 void operator()( const T& first, const T& last, const Body& f, Partitioner& p ) { 144 oneapi::tbb::parallel_for( first, last, f, p ); 145 } 146 void operator()( const T& first, const T& last, const T& step, const Body& f, Partitioner& p ) { 147 oneapi::tbb::parallel_for( first, last, step, f, p ); 148 } 149 }; 150 151 template<typename Flavor, std::size_t Pad> 152 void Flog() { 153 for ( int i=0; i<N; ++i ) { 154 for ( int mode = 0; mode < 4; ++mode) { 155 FooRange<Pad> r( 0, i ); 156 const FooRange<Pad> rc = r; 157 FooBody<Pad> f( Array ); 158 const FooBody<Pad> fc = f; 159 for (int a_i = 0; a_i < N; a_i++) { 160 Array[a_i].store(0, std::memory_order_relaxed); 161 } 162 FooBodyCount = 1; 163 switch (mode) { 164 case 0: { 165 empty_partitioner_tag p; 166 Invoker< Flavor, empty_partitioner_tag, FooRange<Pad>, FooBody<Pad> > invoke_for; 167 invoke_for( rc, fc, p ); 168 } 169 break; 170 case 1: { 171 Invoker< Flavor, const oneapi::tbb::simple_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 172 invoke_for( rc, fc, oneapi::tbb::simple_partitioner() ); 173 } 174 break; 175 case 2: { 176 Invoker< Flavor, const oneapi::tbb::auto_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 177 invoke_for( rc, fc, oneapi::tbb::auto_partitioner() ); 178 } 179 break; 180 case 3: { 181 static oneapi::tbb::affinity_partitioner affinity; 182 Invoker< Flavor, oneapi::tbb::affinity_partitioner, FooRange<Pad>, FooBody<Pad> > invoke_for; 183 invoke_for( rc, fc, affinity ); 184 } 185 break; 186 } 187 for( int j=0; j<i; ++j ) 188 CHECK( Array[j]==1); 189 for( int j=i; j<N; ++j ) 190 CHECK( Array[j]==0); 191 CHECK( FooBodyCount==1); 192 } 193 } 194 } 195 196 #include <stdexcept> // std::invalid_argument 197 198 template <typename Flavor, typename T, typename Partitioner> 199 void TestParallelForWithStepSupportHelper(Partitioner& p) { 200 const T pfor_buffer_test_size = static_cast<T>(PFOR_BUFFER_TEST_SIZE); 201 const T pfor_buffer_actual_size = static_cast<T>(PFOR_BUFFER_ACTUAL_SIZE); 202 // Testing parallel_for with different step values 203 InvokerStep< Flavor, Partitioner, T, TestFunctor<T> > invoke_for; 204 for (T begin = 0; begin < pfor_buffer_test_size - 1; begin += pfor_buffer_test_size / 10 + 1) { 205 T step; 206 for (step = 1; step < pfor_buffer_test_size; step++) { 207 std::memset(pfor_buffer, 0, pfor_buffer_actual_size * sizeof(std::size_t)); 208 if (step == 1){ 209 invoke_for(begin, pfor_buffer_test_size, TestFunctor<T>(), p); 210 } else { 211 invoke_for(begin, pfor_buffer_test_size, step, TestFunctor<T>(), p); 212 } 213 // Verifying that parallel_for processed all items it should 214 for (T i = begin; i < pfor_buffer_test_size; i = i + step) { 215 if (pfor_buffer[i] != 1) { 216 CHECK_MESSAGE(false, "parallel_for didn't process all required elements"); 217 } 218 pfor_buffer[i] = 0; 219 } 220 // Verifying that no extra items were processed and right bound of array wasn't crossed 221 for (T i = 0; i < pfor_buffer_actual_size; i++) { 222 if (pfor_buffer[i] != 0) { 223 CHECK_MESSAGE(false, "parallel_for processed an extra element"); 224 } 225 } 226 } 227 } 228 } 229 230 template <typename Flavor, typename T> 231 void TestParallelForWithStepSupport() { 232 static oneapi::tbb::affinity_partitioner affinity_p; 233 oneapi::tbb::auto_partitioner auto_p; 234 oneapi::tbb::simple_partitioner simple_p; 235 oneapi::tbb::static_partitioner static_p; 236 empty_partitioner_tag p; 237 238 // Try out all partitioner combinations 239 TestParallelForWithStepSupportHelper< Flavor,T,empty_partitioner_tag >(p); 240 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::auto_partitioner >(auto_p); 241 TestParallelForWithStepSupportHelper< Flavor,T,const oneapi::tbb::simple_partitioner >(simple_p); 242 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::affinity_partitioner >(affinity_p); 243 TestParallelForWithStepSupportHelper< Flavor,T,oneapi::tbb::static_partitioner >(static_p); 244 245 // Testing some corner cases 246 oneapi::tbb::parallel_for(static_cast<T>(2), static_cast<T>(1), static_cast<T>(1), TestFunctor<T>()); 247 } 248 249 //! Test simple parallel_for with different partitioners 250 //! \brief \ref interface \ref requirement 251 TEST_CASE("Basic parallel_for") { 252 std::atomic<unsigned long> counter{}; 253 const std::size_t number_of_partitioners = 5; 254 const std::size_t iterations = 100000; 255 256 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 257 counter++; 258 }); 259 260 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 261 counter++; 262 }, oneapi::tbb::simple_partitioner()); 263 264 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 265 counter++; 266 }, oneapi::tbb::auto_partitioner()); 267 268 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 269 counter++; 270 }, oneapi::tbb::static_partitioner()); 271 272 oneapi::tbb::affinity_partitioner aff; 273 oneapi::tbb::parallel_for(std::size_t(0), iterations, [&](std::size_t) { 274 counter++; 275 }, aff); 276 277 CHECK_EQ(counter.load(std::memory_order_relaxed), iterations * number_of_partitioners); 278 } 279 280 //! Testing parallel for with different partitioners and ranges ranges 281 //! \brief \ref interface \ref requirement \ref stress 282 TEST_CASE("Flog test") { 283 Flog<parallel_tag, 1>(); 284 Flog<parallel_tag, 10>(); 285 Flog<parallel_tag, 100>(); 286 Flog<parallel_tag, 1000>(); 287 Flog<parallel_tag, 10000>(); 288 } 289 290 //! Testing parallel for with different types and step 291 //! \brief \ref interface \ref requirement 292 TEST_CASE_TEMPLATE("parallel_for with step support", T, short, unsigned short, int, unsigned int, 293 long, unsigned long, long long, unsigned long long, std::size_t) { 294 // Testing with different integer types 295 TestParallelForWithStepSupport<parallel_tag, T>(); 296 } 297 298 //! Testing with different types of ranges and partitioners 299 //! \brief \ref interface \ref requirement 300 TEST_CASE("Testing parallel_for with partitioners") { 301 using namespace test_partitioner_utils::interaction_with_range_and_partitioner; 302 303 test_partitioner_utils::SimpleBody b; 304 oneapi::tbb::affinity_partitioner ap; 305 306 parallel_for(Range1(true, false), b, ap); 307 parallel_for(Range6(false, true), b, ap); 308 309 parallel_for(Range1(false, true), b, oneapi::tbb::simple_partitioner()); 310 parallel_for(Range6(false, true), b, oneapi::tbb::simple_partitioner()); 311 312 parallel_for(Range1(false, true), b, oneapi::tbb::auto_partitioner()); 313 parallel_for(Range6(false, true), b, oneapi::tbb::auto_partitioner()); 314 315 parallel_for(Range1(true, false), b, oneapi::tbb::static_partitioner()); 316 parallel_for(Range6(false, true), b, oneapi::tbb::static_partitioner()); 317 } 318