1 /* 2 Copyright (c) 2005-2021 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef _TBB_tbb_misc_H 18 #define _TBB_tbb_misc_H 19 20 #include "oneapi/tbb/detail/_config.h" 21 #include "oneapi/tbb/detail/_assert.h" 22 #include "oneapi/tbb/detail/_utils.h" 23 24 #if __TBB_ARENA_BINDING 25 #include "oneapi/tbb/info.h" 26 #endif /*__TBB_ARENA_BINDING*/ 27 28 #if __unix__ 29 #include <sys/param.h> // __FreeBSD_version 30 #if __FreeBSD_version >= 701000 31 #include <sys/cpuset.h> 32 #endif 33 #endif 34 35 #include <atomic> 36 37 // Does the operating system have a system call to pin a thread to a set of OS processors? 38 #define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000)) 39 // On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB, 40 // and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account. 41 #define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__) 42 43 namespace tbb { 44 namespace detail { 45 namespace r1 { 46 47 void runtime_warning(const char* format, ... ); 48 49 #if __TBB_ARENA_BINDING 50 class task_arena; 51 class task_scheduler_observer; 52 #endif /*__TBB_ARENA_BINDING*/ 53 54 const std::size_t MByte = 1024*1024; 55 56 #if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00) 57 // In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API 58 // that does not allow to specify the stack size. 59 // Still, the thread stack size value, either explicit or default, is used by the scheduler. 60 // So here we set the default value to match the platform's default of 1MB. 61 const std::size_t ThreadStackSize = 1*MByte; 62 #else 63 const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte; 64 #endif 65 66 #ifndef __TBB_HardwareConcurrency 67 68 //! Returns maximal parallelism level supported by the current OS configuration. 69 int AvailableHwConcurrency(); 70 71 #else 72 73 inline int AvailableHwConcurrency() { 74 int n = __TBB_HardwareConcurrency(); 75 return n > 0 ? n : 1; // Fail safety strap 76 } 77 #endif /* __TBB_HardwareConcurrency */ 78 79 //! Returns OS regular memory page size 80 size_t DefaultSystemPageSize(); 81 82 //! Returns number of processor groups in the current OS configuration. 83 /** AvailableHwConcurrency must be called at least once before calling this method. **/ 84 int NumberOfProcessorGroups(); 85 86 #if _WIN32||_WIN64 87 88 //! Retrieves index of processor group containing processor with the given index 89 int FindProcessorGroupIndex ( int processorIndex ); 90 91 //! Affinitizes the thread to the specified processor group 92 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ); 93 94 #endif /* _WIN32||_WIN64 */ 95 96 //! Prints TBB version information on stderr 97 void PrintVersion(); 98 99 //! Prints arbitrary extra TBB version information on stderr 100 void PrintExtraVersionInfo( const char* category, const char* format, ... ); 101 102 //! A callback routine to print RML version information on stderr 103 void PrintRMLVersionInfo( void* arg, const char* server_info ); 104 105 // For TBB compilation only; not to be used in public headers 106 #if defined(min) || defined(max) 107 #undef min 108 #undef max 109 #endif 110 111 //! Utility template function returning lesser of the two values. 112 /** Provided here to avoid including not strict safe <algorithm>.\n 113 In case operands cause signed/unsigned or size mismatch warnings it is caller's 114 responsibility to do the appropriate cast before calling the function. **/ 115 template<typename T> 116 T min ( const T& val1, const T& val2 ) { 117 return val1 < val2 ? val1 : val2; 118 } 119 120 //! Utility template function returning greater of the two values. 121 /** Provided here to avoid including not strict safe <algorithm>.\n 122 In case operands cause signed/unsigned or size mismatch warnings it is caller's 123 responsibility to do the appropriate cast before calling the function. **/ 124 template<typename T> 125 T max ( const T& val1, const T& val2 ) { 126 return val1 < val2 ? val2 : val1; 127 } 128 129 //! Utility helper structure to ease overload resolution 130 template<int > struct int_to_type {}; 131 132 //------------------------------------------------------------------------ 133 // FastRandom 134 //------------------------------------------------------------------------ 135 136 //! A fast random number generator. 137 /** Uses linear congruential method. */ 138 class FastRandom { 139 private: 140 unsigned x, c; 141 static const unsigned a = 0x9e3779b1; // a big prime number 142 public: 143 //! Get a random number. 144 unsigned short get() { 145 return get(x); 146 } 147 //! Get a random number for the given seed; update the seed for next use. 148 unsigned short get( unsigned& seed ) { 149 unsigned short r = (unsigned short)(seed>>16); 150 __TBB_ASSERT(c&1, "c must be odd for big rng period"); 151 seed = seed*a+c; 152 return r; 153 } 154 //! Construct a random number generator. 155 FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); } 156 157 template <typename T> 158 void init( T seed ) { 159 init(seed,int_to_type<sizeof(seed)>()); 160 } 161 void init( uint64_t seed , int_to_type<8> ) { 162 init(uint32_t((seed>>32)+seed), int_to_type<4>()); 163 } 164 void init( uint32_t seed, int_to_type<4> ) { 165 // threads use different seeds for unique sequences 166 c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number 167 x = c^(seed>>1); // also shuffle x for the first get() invocation 168 } 169 }; 170 171 //------------------------------------------------------------------------ 172 // Atomic extensions 173 //------------------------------------------------------------------------ 174 175 //! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate 176 /** Return value semantics is the same as for CAS. **/ 177 template<typename T1, class Pred> 178 T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { 179 T1 oldValue = dst.load(std::memory_order_acquire); 180 while ( compare(oldValue, newValue) ) { 181 if ( dst.compare_exchange_strong(oldValue, newValue) ) 182 break; 183 } 184 return oldValue; 185 } 186 187 #if __TBB_USE_OS_AFFINITY_SYSCALL 188 #if __linux__ 189 typedef cpu_set_t basic_mask_t; 190 #elif __FreeBSD_version >= 701000 191 typedef cpuset_t basic_mask_t; 192 #else 193 #error affinity_helper is not implemented in this OS 194 #endif 195 class affinity_helper : no_copy { 196 basic_mask_t* threadMask; 197 int is_changed; 198 public: 199 affinity_helper() : threadMask(NULL), is_changed(0) {} 200 ~affinity_helper(); 201 void protect_affinity_mask( bool restore_process_mask ); 202 void dismiss(); 203 }; 204 void destroy_process_mask(); 205 #else 206 class affinity_helper : no_copy { 207 public: 208 void protect_affinity_mask( bool ) {} 209 }; 210 inline void destroy_process_mask(){} 211 #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ 212 213 struct cpu_features_type { 214 bool rtm_enabled{false}; 215 bool waitpkg_enabled{false}; 216 }; 217 218 void detect_cpu_features(cpu_features_type& cpu_features); 219 220 #if __TBB_ARENA_BINDING 221 class binding_handler; 222 223 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core); 224 void destroy_binding_handler(binding_handler* handler_ptr); 225 void apply_affinity_mask(binding_handler* handler_ptr, int slot_num); 226 void restore_affinity_mask(binding_handler* handler_ptr, int slot_num); 227 228 #endif /*__TBB_ARENA_BINDING*/ 229 230 // RTM specific section 231 // abort code for mutexes that detect a conflict with another thread. 232 enum { 233 speculation_not_supported = 0x00, 234 speculation_transaction_aborted = 0x01, 235 speculation_can_retry = 0x02, 236 speculation_memadd_conflict = 0x04, 237 speculation_buffer_overflow = 0x08, 238 speculation_breakpoint_hit = 0x10, 239 speculation_nested_abort = 0x20, 240 speculation_xabort_mask = 0xFF000000, 241 speculation_xabort_shift = 24, 242 speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free 243 speculation_successful_begin = 0xFFFFFFFF, 244 speculation_retry = speculation_transaction_aborted 245 | speculation_can_retry 246 | speculation_memadd_conflict 247 }; 248 249 // We suppose that successful transactions are sequentially ordered and 250 // do not require additional memory fences around them. 251 // Technically it can be achieved only if xbegin has implicit 252 // acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level. 253 // See the article: https://arxiv.org/pdf/1710.04839.pdf 254 static inline unsigned int begin_transaction() { 255 #if __TBB_TSX_INTRINSICS_PRESENT 256 return _xbegin(); 257 #else 258 return speculation_not_supported; // return unsuccessful code 259 #endif 260 } 261 262 static inline void end_transaction() { 263 #if __TBB_TSX_INTRINSICS_PRESENT 264 _xend(); 265 #endif 266 } 267 268 static inline void abort_transaction() { 269 #if __TBB_TSX_INTRINSICS_PRESENT 270 _xabort(speculation_xabort_not_free); 271 #endif 272 } 273 274 #if TBB_USE_ASSERT 275 static inline unsigned char is_in_transaction() { 276 #if __TBB_TSX_INTRINSICS_PRESENT 277 return _xtest(); 278 #else 279 return 0; 280 #endif 281 } 282 #endif // TBB_USE_ASSERT 283 284 } // namespace r1 285 } // namespace detail 286 } // namespace tbb 287 288 #endif /* _TBB_tbb_misc_H */ 289