1 /* 2 Copyright (c) 2005-2022 Intel Corporation 3 4 Licensed under the Apache License, Version 2.0 (the "License"); 5 you may not use this file except in compliance with the License. 6 You may obtain a copy of the License at 7 8 http://www.apache.org/licenses/LICENSE-2.0 9 10 Unless required by applicable law or agreed to in writing, software 11 distributed under the License is distributed on an "AS IS" BASIS, 12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 See the License for the specific language governing permissions and 14 limitations under the License. 15 */ 16 17 #ifndef _TBB_tbb_misc_H 18 #define _TBB_tbb_misc_H 19 20 #include "oneapi/tbb/detail/_config.h" 21 #include "oneapi/tbb/detail/_assert.h" 22 #include "oneapi/tbb/detail/_utils.h" 23 24 #if __TBB_ARENA_BINDING 25 #include "oneapi/tbb/info.h" 26 #endif /*__TBB_ARENA_BINDING*/ 27 28 #if __unix__ 29 #include <sys/param.h> // __FreeBSD_version 30 #if __FreeBSD_version >= 701000 31 #include <sys/cpuset.h> 32 #endif 33 #endif 34 35 #include <atomic> 36 37 // Does the operating system have a system call to pin a thread to a set of OS processors? 38 #define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000)) 39 // On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB, 40 // and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account. 41 #define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__) 42 43 namespace tbb { 44 namespace detail { 45 namespace r1 { 46 47 void runtime_warning(const char* format, ... ); 48 49 #if __TBB_ARENA_BINDING 50 class task_arena; 51 class task_scheduler_observer; 52 #endif /*__TBB_ARENA_BINDING*/ 53 54 const std::size_t MByte = 1024*1024; 55 56 #if __TBB_USE_WINAPI 57 // The Microsoft Documentation about Thread Stack Size states that 58 // "The default stack reservation size used by the linker is 1 MB" 59 const std::size_t ThreadStackSize = 1*MByte; 60 #else 61 const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte; 62 #endif 63 64 #ifndef __TBB_HardwareConcurrency 65 66 //! Returns maximal parallelism level supported by the current OS configuration. 67 int AvailableHwConcurrency(); 68 69 #else 70 71 inline int AvailableHwConcurrency() { 72 int n = __TBB_HardwareConcurrency(); 73 return n > 0 ? n : 1; // Fail safety strap 74 } 75 #endif /* __TBB_HardwareConcurrency */ 76 77 //! Returns OS regular memory page size 78 size_t DefaultSystemPageSize(); 79 80 //! Returns number of processor groups in the current OS configuration. 81 /** AvailableHwConcurrency must be called at least once before calling this method. **/ 82 int NumberOfProcessorGroups(); 83 84 #if _WIN32||_WIN64 85 86 //! Retrieves index of processor group containing processor with the given index 87 int FindProcessorGroupIndex ( int processorIndex ); 88 89 //! Affinitizes the thread to the specified processor group 90 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex ); 91 92 #endif /* _WIN32||_WIN64 */ 93 94 //! Prints TBB version information on stderr 95 void PrintVersion(); 96 97 //! Prints arbitrary extra TBB version information on stderr 98 void PrintExtraVersionInfo( const char* category, const char* format, ... ); 99 100 //! A callback routine to print RML version information on stderr 101 void PrintRMLVersionInfo( void* arg, const char* server_info ); 102 103 // For TBB compilation only; not to be used in public headers 104 #if defined(min) || defined(max) 105 #undef min 106 #undef max 107 #endif 108 109 //! Utility template function returning lesser of the two values. 110 /** Provided here to avoid including not strict safe <algorithm>.\n 111 In case operands cause signed/unsigned or size mismatch warnings it is caller's 112 responsibility to do the appropriate cast before calling the function. **/ 113 template<typename T> 114 T min ( const T& val1, const T& val2 ) { 115 return val1 < val2 ? val1 : val2; 116 } 117 118 //! Utility template function returning greater of the two values. 119 /** Provided here to avoid including not strict safe <algorithm>.\n 120 In case operands cause signed/unsigned or size mismatch warnings it is caller's 121 responsibility to do the appropriate cast before calling the function. **/ 122 template<typename T> 123 T max ( const T& val1, const T& val2 ) { 124 return val1 < val2 ? val2 : val1; 125 } 126 127 //! Utility helper structure to ease overload resolution 128 template<int > struct int_to_type {}; 129 130 //------------------------------------------------------------------------ 131 // FastRandom 132 //------------------------------------------------------------------------ 133 134 //! A fast random number generator. 135 /** Uses linear congruential method. */ 136 class FastRandom { 137 private: 138 unsigned x, c; 139 static const unsigned a = 0x9e3779b1; // a big prime number 140 public: 141 //! Get a random number. 142 unsigned short get() { 143 return get(x); 144 } 145 //! Get a random number for the given seed; update the seed for next use. 146 unsigned short get( unsigned& seed ) { 147 unsigned short r = (unsigned short)(seed>>16); 148 __TBB_ASSERT(c&1, "c must be odd for big rng period"); 149 seed = seed*a+c; 150 return r; 151 } 152 //! Construct a random number generator. 153 FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); } 154 155 template <typename T> 156 void init( T seed ) { 157 init(seed,int_to_type<sizeof(seed)>()); 158 } 159 void init( uint64_t seed , int_to_type<8> ) { 160 init(uint32_t((seed>>32)+seed), int_to_type<4>()); 161 } 162 void init( uint32_t seed, int_to_type<4> ) { 163 // threads use different seeds for unique sequences 164 c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number 165 x = c^(seed>>1); // also shuffle x for the first get() invocation 166 } 167 }; 168 169 //------------------------------------------------------------------------ 170 // Atomic extensions 171 //------------------------------------------------------------------------ 172 173 //! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate 174 /** Return value semantics is the same as for CAS. **/ 175 template<typename T1, class Pred> 176 T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) { 177 T1 oldValue = dst.load(std::memory_order_acquire); 178 while ( compare(oldValue, newValue) ) { 179 if ( dst.compare_exchange_strong(oldValue, newValue) ) 180 break; 181 } 182 return oldValue; 183 } 184 185 #if __TBB_USE_OS_AFFINITY_SYSCALL 186 #if __linux__ 187 typedef cpu_set_t basic_mask_t; 188 #elif __FreeBSD_version >= 701000 189 typedef cpuset_t basic_mask_t; 190 #else 191 #error affinity_helper is not implemented in this OS 192 #endif 193 class affinity_helper : no_copy { 194 basic_mask_t* threadMask; 195 int is_changed; 196 public: 197 affinity_helper() : threadMask(nullptr), is_changed(0) {} 198 ~affinity_helper(); 199 void protect_affinity_mask( bool restore_process_mask ); 200 void dismiss(); 201 }; 202 void destroy_process_mask(); 203 #else 204 class affinity_helper : no_copy { 205 public: 206 void protect_affinity_mask( bool ) {} 207 }; 208 inline void destroy_process_mask(){} 209 #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */ 210 211 struct cpu_features_type { 212 bool rtm_enabled{false}; 213 bool waitpkg_enabled{false}; 214 }; 215 216 void detect_cpu_features(cpu_features_type& cpu_features); 217 218 #if __TBB_ARENA_BINDING 219 class binding_handler; 220 221 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core); 222 void destroy_binding_handler(binding_handler* handler_ptr); 223 void apply_affinity_mask(binding_handler* handler_ptr, int slot_num); 224 void restore_affinity_mask(binding_handler* handler_ptr, int slot_num); 225 226 #endif /*__TBB_ARENA_BINDING*/ 227 228 // RTM specific section 229 // abort code for mutexes that detect a conflict with another thread. 230 enum { 231 speculation_not_supported = 0x00, 232 speculation_transaction_aborted = 0x01, 233 speculation_can_retry = 0x02, 234 speculation_memadd_conflict = 0x04, 235 speculation_buffer_overflow = 0x08, 236 speculation_breakpoint_hit = 0x10, 237 speculation_nested_abort = 0x20, 238 speculation_xabort_mask = 0xFF000000, 239 speculation_xabort_shift = 24, 240 speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free 241 speculation_successful_begin = 0xFFFFFFFF, 242 speculation_retry = speculation_transaction_aborted 243 | speculation_can_retry 244 | speculation_memadd_conflict 245 }; 246 247 // We suppose that successful transactions are sequentially ordered and 248 // do not require additional memory fences around them. 249 // Technically it can be achieved only if xbegin has implicit 250 // acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level. 251 // See the article: https://arxiv.org/pdf/1710.04839.pdf 252 static inline unsigned int begin_transaction() { 253 #if __TBB_TSX_INTRINSICS_PRESENT 254 return _xbegin(); 255 #else 256 return speculation_not_supported; // return unsuccessful code 257 #endif 258 } 259 260 static inline void end_transaction() { 261 #if __TBB_TSX_INTRINSICS_PRESENT 262 _xend(); 263 #endif 264 } 265 266 static inline void abort_transaction() { 267 #if __TBB_TSX_INTRINSICS_PRESENT 268 _xabort(speculation_xabort_not_free); 269 #endif 270 } 271 272 #if TBB_USE_ASSERT 273 static inline unsigned char is_in_transaction() { 274 #if __TBB_TSX_INTRINSICS_PRESENT 275 return _xtest(); 276 #else 277 return 0; 278 #endif 279 } 280 #endif // TBB_USE_ASSERT 281 282 } // namespace r1 283 } // namespace detail 284 } // namespace tbb 285 286 #endif /* _TBB_tbb_misc_H */ 287