xref: /oneTBB/src/tbb/misc.h (revision c21e688a)
1 /*
2     Copyright (c) 2005-2022 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef _TBB_tbb_misc_H
18 #define _TBB_tbb_misc_H
19 
20 #include "oneapi/tbb/detail/_config.h"
21 #include "oneapi/tbb/detail/_assert.h"
22 #include "oneapi/tbb/detail/_utils.h"
23 
24 #if __TBB_ARENA_BINDING
25 #include "oneapi/tbb/info.h"
26 #endif /*__TBB_ARENA_BINDING*/
27 
28 #if __unix__
29 #include <sys/param.h>  // __FreeBSD_version
30 #if __FreeBSD_version >= 701000
31 #include <sys/cpuset.h>
32 #endif
33 #endif
34 
35 #include <atomic>
36 
37 // Does the operating system have a system call to pin a thread to a set of OS processors?
38 #define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000))
39 // On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB,
40 // and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account.
41 #define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__)
42 
43 namespace tbb {
44 namespace detail {
45 namespace r1 {
46 
47 void runtime_warning(const char* format, ... );
48 
49 #if __TBB_ARENA_BINDING
50 class task_arena;
51 class task_scheduler_observer;
52 #endif /*__TBB_ARENA_BINDING*/
53 
54 const std::size_t MByte = 1024*1024;
55 
56 #if __TBB_USE_WINAPI
57 // The Microsoft Documentation about Thread Stack Size states that
58 // "The default stack reservation size used by the linker is 1 MB"
59 const std::size_t ThreadStackSize = 1*MByte;
60 #else
61 const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte;
62 #endif
63 
64 #ifndef __TBB_HardwareConcurrency
65 
66 //! Returns maximal parallelism level supported by the current OS configuration.
67 int AvailableHwConcurrency();
68 
69 #else
70 
AvailableHwConcurrency()71 inline int AvailableHwConcurrency() {
72     int n = __TBB_HardwareConcurrency();
73     return n > 0 ? n : 1; // Fail safety strap
74 }
75 #endif /* __TBB_HardwareConcurrency */
76 
77 //! Returns OS regular memory page size
78 size_t DefaultSystemPageSize();
79 
80 //! Returns number of processor groups in the current OS configuration.
81 /** AvailableHwConcurrency must be called at least once before calling this method. **/
82 int NumberOfProcessorGroups();
83 
84 #if _WIN32||_WIN64
85 
86 //! Retrieves index of processor group containing processor with the given index
87 int FindProcessorGroupIndex ( int processorIndex );
88 
89 //! Affinitizes the thread to the specified processor group
90 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex );
91 
92 #endif /* _WIN32||_WIN64 */
93 
94 //! Prints TBB version information on stderr
95 void PrintVersion();
96 
97 //! Prints arbitrary extra TBB version information on stderr
98 void PrintExtraVersionInfo( const char* category, const char* format, ... );
99 
100 //! A callback routine to print RML version information on stderr
101 void PrintRMLVersionInfo( void* arg, const char* server_info );
102 
103 // For TBB compilation only; not to be used in public headers
104 #if defined(min) || defined(max)
105 #undef min
106 #undef max
107 #endif
108 
109 //! Utility template function returning lesser of the two values.
110 /** Provided here to avoid including not strict safe <algorithm>.\n
111     In case operands cause signed/unsigned or size mismatch warnings it is caller's
112     responsibility to do the appropriate cast before calling the function. **/
113 template<typename T>
min(const T & val1,const T & val2)114 T min ( const T& val1, const T& val2 ) {
115     return val1 < val2 ? val1 : val2;
116 }
117 
118 //! Utility template function returning greater of the two values.
119 /** Provided here to avoid including not strict safe <algorithm>.\n
120     In case operands cause signed/unsigned or size mismatch warnings it is caller's
121     responsibility to do the appropriate cast before calling the function. **/
122 template<typename T>
max(const T & val1,const T & val2)123 T max ( const T& val1, const T& val2 ) {
124     return val1 < val2 ? val2 : val1;
125 }
126 
127 //! Utility helper structure to ease overload resolution
128 template<int > struct int_to_type {};
129 
130 //------------------------------------------------------------------------
131 // FastRandom
132 //------------------------------------------------------------------------
133 
134 //! A fast random number generator.
135 /** Uses linear congruential method. */
136 class FastRandom {
137 private:
138     unsigned x, c;
139     static const unsigned a = 0x9e3779b1; // a big prime number
140 public:
141     //! Get a random number.
get()142     unsigned short get() {
143         return get(x);
144     }
145     //! Get a random number for the given seed; update the seed for next use.
get(unsigned & seed)146     unsigned short get( unsigned& seed ) {
147         unsigned short r = (unsigned short)(seed>>16);
148         __TBB_ASSERT(c&1, "c must be odd for big rng period");
149         seed = seed*a+c;
150         return r;
151     }
152     //! Construct a random number generator.
FastRandom(void * unique_ptr)153     FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); }
154 
155     template <typename T>
init(T seed)156     void init( T seed ) {
157         init(seed,int_to_type<sizeof(seed)>());
158     }
init(uint64_t seed,int_to_type<8>)159     void init( uint64_t seed , int_to_type<8> ) {
160         init(uint32_t((seed>>32)+seed), int_to_type<4>());
161     }
init(uint32_t seed,int_to_type<4>)162     void init( uint32_t seed, int_to_type<4> ) {
163         // threads use different seeds for unique sequences
164         c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number
165         x = c^(seed>>1); // also shuffle x for the first get() invocation
166     }
167 };
168 
169 //------------------------------------------------------------------------
170 // Atomic extensions
171 //------------------------------------------------------------------------
172 
173 //! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate
174 /** Return value semantics is the same as for CAS. **/
175 template<typename T1, class Pred>
atomic_update(std::atomic<T1> & dst,T1 newValue,Pred compare)176 T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) {
177     T1 oldValue = dst.load(std::memory_order_acquire);
178     while ( compare(oldValue, newValue) ) {
179         if ( dst.compare_exchange_strong(oldValue, newValue) )
180             break;
181     }
182     return oldValue;
183 }
184 
185 #if __TBB_USE_OS_AFFINITY_SYSCALL
186   #if __linux__
187     typedef cpu_set_t basic_mask_t;
188   #elif __FreeBSD_version >= 701000
189     typedef cpuset_t basic_mask_t;
190   #else
191     #error affinity_helper is not implemented in this OS
192   #endif
193     class affinity_helper : no_copy {
194         basic_mask_t* threadMask;
195         int is_changed;
196     public:
affinity_helper()197         affinity_helper() : threadMask(nullptr), is_changed(0) {}
198         ~affinity_helper();
199         void protect_affinity_mask( bool restore_process_mask  );
200         void dismiss();
201     };
202     void destroy_process_mask();
203 #else
204     class affinity_helper : no_copy {
205     public:
protect_affinity_mask(bool)206         void protect_affinity_mask( bool ) {}
207     };
destroy_process_mask()208     inline void destroy_process_mask(){}
209 #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */
210 
211 struct cpu_features_type {
212     bool rtm_enabled{false};
213     bool waitpkg_enabled{false};
214 };
215 
216 void detect_cpu_features(cpu_features_type& cpu_features);
217 
218 #if __TBB_ARENA_BINDING
219 class binding_handler;
220 
221 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core);
222 void destroy_binding_handler(binding_handler* handler_ptr);
223 void apply_affinity_mask(binding_handler* handler_ptr, int slot_num);
224 void restore_affinity_mask(binding_handler* handler_ptr, int slot_num);
225 
226 #endif /*__TBB_ARENA_BINDING*/
227 
228 // RTM specific section
229 // abort code for mutexes that detect a conflict with another thread.
230 enum {
231     speculation_not_supported       = 0x00,
232     speculation_transaction_aborted = 0x01,
233     speculation_can_retry           = 0x02,
234     speculation_memadd_conflict     = 0x04,
235     speculation_buffer_overflow     = 0x08,
236     speculation_breakpoint_hit      = 0x10,
237     speculation_nested_abort        = 0x20,
238     speculation_xabort_mask         = 0xFF000000,
239     speculation_xabort_shift        = 24,
240     speculation_xabort_not_free     = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
241     speculation_successful_begin    = 0xFFFFFFFF,
242     speculation_retry               = speculation_transaction_aborted
243                                       | speculation_can_retry
244                                       | speculation_memadd_conflict
245 };
246 
247 // We suppose that successful transactions are sequentially ordered and
248 // do not require additional memory fences around them.
249 // Technically it can be achieved only if xbegin has implicit
250 // acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level.
251 // See the article: https://arxiv.org/pdf/1710.04839.pdf
begin_transaction()252 static inline unsigned int begin_transaction() {
253 #if __TBB_TSX_INTRINSICS_PRESENT
254     return _xbegin();
255 #else
256     return speculation_not_supported; // return unsuccessful code
257 #endif
258 }
259 
end_transaction()260 static inline void end_transaction() {
261 #if __TBB_TSX_INTRINSICS_PRESENT
262     _xend();
263 #endif
264 }
265 
abort_transaction()266 static inline void abort_transaction() {
267 #if __TBB_TSX_INTRINSICS_PRESENT
268     _xabort(speculation_xabort_not_free);
269 #endif
270 }
271 
272 #if TBB_USE_ASSERT
is_in_transaction()273 static inline unsigned char is_in_transaction() {
274 #if __TBB_TSX_INTRINSICS_PRESENT
275     return _xtest();
276 #else
277     return 0;
278 #endif
279 }
280 #endif // TBB_USE_ASSERT
281 
282 } // namespace r1
283 } // namespace detail
284 } // namespace tbb
285 
286 #endif /* _TBB_tbb_misc_H */
287