xref: /oneTBB/src/tbb/misc.h (revision 0a2b3987)
1 /*
2     Copyright (c) 2005-2021 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef _TBB_tbb_misc_H
18 #define _TBB_tbb_misc_H
19 
20 #include "oneapi/tbb/detail/_config.h"
21 #include "oneapi/tbb/detail/_assert.h"
22 #include "oneapi/tbb/detail/_utils.h"
23 
24 #if __TBB_ARENA_BINDING
25 #include "oneapi/tbb/info.h"
26 #endif /*__TBB_ARENA_BINDING*/
27 
28 #if __unix__
29 #include <sys/param.h>  // __FreeBSD_version
30 #if __FreeBSD_version >= 701000
31 #include <sys/cpuset.h>
32 #endif
33 #endif
34 
35 #include <atomic>
36 
37 // Does the operating system have a system call to pin a thread to a set of OS processors?
38 #define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000))
39 // On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB,
40 // and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account.
41 #define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__)
42 
43 namespace tbb {
44 namespace detail {
45 namespace r1 {
46 
47 void runtime_warning(const char* format, ... );
48 
49 #if __TBB_ARENA_BINDING
50 class task_arena;
51 class task_scheduler_observer;
52 #endif /*__TBB_ARENA_BINDING*/
53 
54 const std::size_t MByte = 1024*1024;
55 
56 #if __TBB_WIN8UI_SUPPORT && (_WIN32_WINNT < 0x0A00)
57 // In Win8UI mode (Windows 8 Store* applications), TBB uses a thread creation API
58 // that does not allow to specify the stack size.
59 // Still, the thread stack size value, either explicit or default, is used by the scheduler.
60 // So here we set the default value to match the platform's default of 1MB.
61 const std::size_t ThreadStackSize = 1*MByte;
62 #else
63 const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte;
64 #endif
65 
66 #ifndef __TBB_HardwareConcurrency
67 
68 //! Returns maximal parallelism level supported by the current OS configuration.
69 int AvailableHwConcurrency();
70 
71 #else
72 
73 inline int AvailableHwConcurrency() {
74     int n = __TBB_HardwareConcurrency();
75     return n > 0 ? n : 1; // Fail safety strap
76 }
77 #endif /* __TBB_HardwareConcurrency */
78 
79 //! Returns OS regular memory page size
80 size_t DefaultSystemPageSize();
81 
82 //! Returns number of processor groups in the current OS configuration.
83 /** AvailableHwConcurrency must be called at least once before calling this method. **/
84 int NumberOfProcessorGroups();
85 
86 #if _WIN32||_WIN64
87 
88 //! Retrieves index of processor group containing processor with the given index
89 int FindProcessorGroupIndex ( int processorIndex );
90 
91 //! Affinitizes the thread to the specified processor group
92 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex );
93 
94 #endif /* _WIN32||_WIN64 */
95 
96 //! Prints TBB version information on stderr
97 void PrintVersion();
98 
99 //! Prints arbitrary extra TBB version information on stderr
100 void PrintExtraVersionInfo( const char* category, const char* format, ... );
101 
102 //! A callback routine to print RML version information on stderr
103 void PrintRMLVersionInfo( void* arg, const char* server_info );
104 
105 // For TBB compilation only; not to be used in public headers
106 #if defined(min) || defined(max)
107 #undef min
108 #undef max
109 #endif
110 
111 //! Utility template function returning lesser of the two values.
112 /** Provided here to avoid including not strict safe <algorithm>.\n
113     In case operands cause signed/unsigned or size mismatch warnings it is caller's
114     responsibility to do the appropriate cast before calling the function. **/
115 template<typename T>
116 T min ( const T& val1, const T& val2 ) {
117     return val1 < val2 ? val1 : val2;
118 }
119 
120 //! Utility template function returning greater of the two values.
121 /** Provided here to avoid including not strict safe <algorithm>.\n
122     In case operands cause signed/unsigned or size mismatch warnings it is caller's
123     responsibility to do the appropriate cast before calling the function. **/
124 template<typename T>
125 T max ( const T& val1, const T& val2 ) {
126     return val1 < val2 ? val2 : val1;
127 }
128 
129 //! Utility helper structure to ease overload resolution
130 template<int > struct int_to_type {};
131 
132 //------------------------------------------------------------------------
133 // FastRandom
134 //------------------------------------------------------------------------
135 
136 //! A fast random number generator.
137 /** Uses linear congruential method. */
138 class FastRandom {
139 private:
140     unsigned x, c;
141     static const unsigned a = 0x9e3779b1; // a big prime number
142 public:
143     //! Get a random number.
144     unsigned short get() {
145         return get(x);
146     }
147     //! Get a random number for the given seed; update the seed for next use.
148     unsigned short get( unsigned& seed ) {
149         unsigned short r = (unsigned short)(seed>>16);
150         __TBB_ASSERT(c&1, "c must be odd for big rng period");
151         seed = seed*a+c;
152         return r;
153     }
154     //! Construct a random number generator.
155     FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); }
156 
157     template <typename T>
158     void init( T seed ) {
159         init(seed,int_to_type<sizeof(seed)>());
160     }
161     void init( uint64_t seed , int_to_type<8> ) {
162         init(uint32_t((seed>>32)+seed), int_to_type<4>());
163     }
164     void init( uint32_t seed, int_to_type<4> ) {
165         // threads use different seeds for unique sequences
166         c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number
167         x = c^(seed>>1); // also shuffle x for the first get() invocation
168     }
169 };
170 
171 //------------------------------------------------------------------------
172 // Atomic extensions
173 //------------------------------------------------------------------------
174 
175 //! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate
176 /** Return value semantics is the same as for CAS. **/
177 template<typename T1, class Pred>
178 T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) {
179     T1 oldValue = dst.load(std::memory_order_acquire);
180     while ( compare(oldValue, newValue) ) {
181         if ( dst.compare_exchange_strong(oldValue, newValue) )
182             break;
183     }
184     return oldValue;
185 }
186 
187 #if __TBB_USE_OS_AFFINITY_SYSCALL
188   #if __linux__
189     typedef cpu_set_t basic_mask_t;
190   #elif __FreeBSD_version >= 701000
191     typedef cpuset_t basic_mask_t;
192   #else
193     #error affinity_helper is not implemented in this OS
194   #endif
195     class affinity_helper : no_copy {
196         basic_mask_t* threadMask;
197         int is_changed;
198     public:
199         affinity_helper() : threadMask(NULL), is_changed(0) {}
200         ~affinity_helper();
201         void protect_affinity_mask( bool restore_process_mask  );
202         void dismiss();
203     };
204     void destroy_process_mask();
205 #else
206     class affinity_helper : no_copy {
207     public:
208         void protect_affinity_mask( bool ) {}
209     };
210     inline void destroy_process_mask(){}
211 #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */
212 
213 struct cpu_features_type {
214     bool rtm_enabled{false};
215     bool waitpkg_enabled{false};
216 };
217 
218 void detect_cpu_features(cpu_features_type& cpu_features);
219 
220 #if __TBB_ARENA_BINDING
221 class binding_handler;
222 
223 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core);
224 void destroy_binding_handler(binding_handler* handler_ptr);
225 void apply_affinity_mask(binding_handler* handler_ptr, int slot_num);
226 void restore_affinity_mask(binding_handler* handler_ptr, int slot_num);
227 
228 #endif /*__TBB_ARENA_BINDING*/
229 
230 // RTM specific section
231 // abort code for mutexes that detect a conflict with another thread.
232 enum {
233     speculation_not_supported       = 0x00,
234     speculation_transaction_aborted = 0x01,
235     speculation_can_retry           = 0x02,
236     speculation_memadd_conflict     = 0x04,
237     speculation_buffer_overflow     = 0x08,
238     speculation_breakpoint_hit      = 0x10,
239     speculation_nested_abort        = 0x20,
240     speculation_xabort_mask         = 0xFF000000,
241     speculation_xabort_shift        = 24,
242     speculation_xabort_not_free     = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
243     speculation_successful_begin    = 0xFFFFFFFF,
244     speculation_retry               = speculation_transaction_aborted
245                                       | speculation_can_retry
246                                       | speculation_memadd_conflict
247 };
248 
249 // We suppose that successful transactions are sequentially ordered and
250 // do not require additional memory fences around them.
251 // Technically it can be achieved only if xbegin has implicit
252 // acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level.
253 // See the article: https://arxiv.org/pdf/1710.04839.pdf
254 static inline unsigned int begin_transaction() {
255 #if __TBB_TSX_INTRINSICS_PRESENT
256     return _xbegin();
257 #else
258     return speculation_not_supported; // return unsuccessful code
259 #endif
260 }
261 
262 static inline void end_transaction() {
263 #if __TBB_TSX_INTRINSICS_PRESENT
264     _xend();
265 #endif
266 }
267 
268 static inline void abort_transaction() {
269 #if __TBB_TSX_INTRINSICS_PRESENT
270     _xabort(speculation_xabort_not_free);
271 #endif
272 }
273 
274 #if TBB_USE_ASSERT
275 static inline unsigned char is_in_transaction() {
276 #if __TBB_TSX_INTRINSICS_PRESENT
277     return _xtest();
278 #else
279     return 0;
280 #endif
281 }
282 #endif // TBB_USE_ASSERT
283 
284 } // namespace r1
285 } // namespace detail
286 } // namespace tbb
287 
288 #endif /* _TBB_tbb_misc_H */
289