1 /*
2 Copyright (c) 2005-2022 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #ifndef _TBB_tbb_misc_H
18 #define _TBB_tbb_misc_H
19
20 #include "oneapi/tbb/detail/_config.h"
21 #include "oneapi/tbb/detail/_assert.h"
22 #include "oneapi/tbb/detail/_utils.h"
23
24 #if __TBB_ARENA_BINDING
25 #include "oneapi/tbb/info.h"
26 #endif /*__TBB_ARENA_BINDING*/
27
28 #if __unix__
29 #include <sys/param.h> // __FreeBSD_version
30 #if __FreeBSD_version >= 701000
31 #include <sys/cpuset.h>
32 #endif
33 #endif
34
35 #include <atomic>
36
37 // Does the operating system have a system call to pin a thread to a set of OS processors?
38 #define __TBB_OS_AFFINITY_SYSCALL_PRESENT ((__linux__ && !__ANDROID__) || (__FreeBSD_version >= 701000))
39 // On IBM* Blue Gene* CNK nodes, the affinity API has restrictions that prevent its usability for TBB,
40 // and also sysconf(_SC_NPROCESSORS_ONLN) already takes process affinity into account.
41 #define __TBB_USE_OS_AFFINITY_SYSCALL (__TBB_OS_AFFINITY_SYSCALL_PRESENT && !__bg__)
42
43 namespace tbb {
44 namespace detail {
45 namespace r1 {
46
47 void runtime_warning(const char* format, ... );
48
49 #if __TBB_ARENA_BINDING
50 class task_arena;
51 class task_scheduler_observer;
52 #endif /*__TBB_ARENA_BINDING*/
53
54 const std::size_t MByte = 1024*1024;
55
56 #if __TBB_USE_WINAPI
57 // The Microsoft Documentation about Thread Stack Size states that
58 // "The default stack reservation size used by the linker is 1 MB"
59 const std::size_t ThreadStackSize = 1*MByte;
60 #else
61 const std::size_t ThreadStackSize = (sizeof(uintptr_t) <= 4 ? 2 : 4 )*MByte;
62 #endif
63
64 #ifndef __TBB_HardwareConcurrency
65
66 //! Returns maximal parallelism level supported by the current OS configuration.
67 int AvailableHwConcurrency();
68
69 #else
70
AvailableHwConcurrency()71 inline int AvailableHwConcurrency() {
72 int n = __TBB_HardwareConcurrency();
73 return n > 0 ? n : 1; // Fail safety strap
74 }
75 #endif /* __TBB_HardwareConcurrency */
76
77 //! Returns OS regular memory page size
78 size_t DefaultSystemPageSize();
79
80 //! Returns number of processor groups in the current OS configuration.
81 /** AvailableHwConcurrency must be called at least once before calling this method. **/
82 int NumberOfProcessorGroups();
83
84 #if _WIN32||_WIN64
85
86 //! Retrieves index of processor group containing processor with the given index
87 int FindProcessorGroupIndex ( int processorIndex );
88
89 //! Affinitizes the thread to the specified processor group
90 void MoveThreadIntoProcessorGroup( void* hThread, int groupIndex );
91
92 #endif /* _WIN32||_WIN64 */
93
94 //! Prints TBB version information on stderr
95 void PrintVersion();
96
97 //! Prints arbitrary extra TBB version information on stderr
98 void PrintExtraVersionInfo( const char* category, const char* format, ... );
99
100 //! A callback routine to print RML version information on stderr
101 void PrintRMLVersionInfo( void* arg, const char* server_info );
102
103 // For TBB compilation only; not to be used in public headers
104 #if defined(min) || defined(max)
105 #undef min
106 #undef max
107 #endif
108
109 //! Utility template function returning lesser of the two values.
110 /** Provided here to avoid including not strict safe <algorithm>.\n
111 In case operands cause signed/unsigned or size mismatch warnings it is caller's
112 responsibility to do the appropriate cast before calling the function. **/
113 template<typename T>
min(const T & val1,const T & val2)114 T min ( const T& val1, const T& val2 ) {
115 return val1 < val2 ? val1 : val2;
116 }
117
118 //! Utility template function returning greater of the two values.
119 /** Provided here to avoid including not strict safe <algorithm>.\n
120 In case operands cause signed/unsigned or size mismatch warnings it is caller's
121 responsibility to do the appropriate cast before calling the function. **/
122 template<typename T>
max(const T & val1,const T & val2)123 T max ( const T& val1, const T& val2 ) {
124 return val1 < val2 ? val2 : val1;
125 }
126
127 //! Utility helper structure to ease overload resolution
128 template<int > struct int_to_type {};
129
130 //------------------------------------------------------------------------
131 // FastRandom
132 //------------------------------------------------------------------------
133
134 //! A fast random number generator.
135 /** Uses linear congruential method. */
136 class FastRandom {
137 private:
138 unsigned x, c;
139 static const unsigned a = 0x9e3779b1; // a big prime number
140 public:
141 //! Get a random number.
get()142 unsigned short get() {
143 return get(x);
144 }
145 //! Get a random number for the given seed; update the seed for next use.
get(unsigned & seed)146 unsigned short get( unsigned& seed ) {
147 unsigned short r = (unsigned short)(seed>>16);
148 __TBB_ASSERT(c&1, "c must be odd for big rng period");
149 seed = seed*a+c;
150 return r;
151 }
152 //! Construct a random number generator.
FastRandom(void * unique_ptr)153 FastRandom( void* unique_ptr ) { init(uintptr_t(unique_ptr)); }
154
155 template <typename T>
init(T seed)156 void init( T seed ) {
157 init(seed,int_to_type<sizeof(seed)>());
158 }
init(uint64_t seed,int_to_type<8>)159 void init( uint64_t seed , int_to_type<8> ) {
160 init(uint32_t((seed>>32)+seed), int_to_type<4>());
161 }
init(uint32_t seed,int_to_type<4>)162 void init( uint32_t seed, int_to_type<4> ) {
163 // threads use different seeds for unique sequences
164 c = (seed|1)*0xba5703f5; // c must be odd, shuffle by a prime number
165 x = c^(seed>>1); // also shuffle x for the first get() invocation
166 }
167 };
168
169 //------------------------------------------------------------------------
170 // Atomic extensions
171 //------------------------------------------------------------------------
172
173 //! Atomically replaces value of dst with newValue if they satisfy condition of compare predicate
174 /** Return value semantics is the same as for CAS. **/
175 template<typename T1, class Pred>
atomic_update(std::atomic<T1> & dst,T1 newValue,Pred compare)176 T1 atomic_update(std::atomic<T1>& dst, T1 newValue, Pred compare) {
177 T1 oldValue = dst.load(std::memory_order_acquire);
178 while ( compare(oldValue, newValue) ) {
179 if ( dst.compare_exchange_strong(oldValue, newValue) )
180 break;
181 }
182 return oldValue;
183 }
184
185 #if __TBB_USE_OS_AFFINITY_SYSCALL
186 #if __linux__
187 typedef cpu_set_t basic_mask_t;
188 #elif __FreeBSD_version >= 701000
189 typedef cpuset_t basic_mask_t;
190 #else
191 #error affinity_helper is not implemented in this OS
192 #endif
193 class affinity_helper : no_copy {
194 basic_mask_t* threadMask;
195 int is_changed;
196 public:
affinity_helper()197 affinity_helper() : threadMask(nullptr), is_changed(0) {}
198 ~affinity_helper();
199 void protect_affinity_mask( bool restore_process_mask );
200 void dismiss();
201 };
202 void destroy_process_mask();
203 #else
204 class affinity_helper : no_copy {
205 public:
protect_affinity_mask(bool)206 void protect_affinity_mask( bool ) {}
207 };
destroy_process_mask()208 inline void destroy_process_mask(){}
209 #endif /* __TBB_USE_OS_AFFINITY_SYSCALL */
210
211 struct cpu_features_type {
212 bool rtm_enabled{false};
213 bool waitpkg_enabled{false};
214 };
215
216 void detect_cpu_features(cpu_features_type& cpu_features);
217
218 #if __TBB_ARENA_BINDING
219 class binding_handler;
220
221 binding_handler* construct_binding_handler(int slot_num, int numa_id, int core_type_id, int max_threads_per_core);
222 void destroy_binding_handler(binding_handler* handler_ptr);
223 void apply_affinity_mask(binding_handler* handler_ptr, int slot_num);
224 void restore_affinity_mask(binding_handler* handler_ptr, int slot_num);
225
226 #endif /*__TBB_ARENA_BINDING*/
227
228 // RTM specific section
229 // abort code for mutexes that detect a conflict with another thread.
230 enum {
231 speculation_not_supported = 0x00,
232 speculation_transaction_aborted = 0x01,
233 speculation_can_retry = 0x02,
234 speculation_memadd_conflict = 0x04,
235 speculation_buffer_overflow = 0x08,
236 speculation_breakpoint_hit = 0x10,
237 speculation_nested_abort = 0x20,
238 speculation_xabort_mask = 0xFF000000,
239 speculation_xabort_shift = 24,
240 speculation_xabort_not_free = 0xFF, // The value (0xFF) below comes from the Intel(R) 64 and IA-32 Architectures Optimization Reference Manual 12.4.5 lock not free
241 speculation_successful_begin = 0xFFFFFFFF,
242 speculation_retry = speculation_transaction_aborted
243 | speculation_can_retry
244 | speculation_memadd_conflict
245 };
246
247 // We suppose that successful transactions are sequentially ordered and
248 // do not require additional memory fences around them.
249 // Technically it can be achieved only if xbegin has implicit
250 // acquire memory semantics an xend/xabort has release memory semantics on compiler and hardware level.
251 // See the article: https://arxiv.org/pdf/1710.04839.pdf
begin_transaction()252 static inline unsigned int begin_transaction() {
253 #if __TBB_TSX_INTRINSICS_PRESENT
254 return _xbegin();
255 #else
256 return speculation_not_supported; // return unsuccessful code
257 #endif
258 }
259
end_transaction()260 static inline void end_transaction() {
261 #if __TBB_TSX_INTRINSICS_PRESENT
262 _xend();
263 #endif
264 }
265
abort_transaction()266 static inline void abort_transaction() {
267 #if __TBB_TSX_INTRINSICS_PRESENT
268 _xabort(speculation_xabort_not_free);
269 #endif
270 }
271
272 #if TBB_USE_ASSERT
is_in_transaction()273 static inline unsigned char is_in_transaction() {
274 #if __TBB_TSX_INTRINSICS_PRESENT
275 return _xtest();
276 #else
277 return 0;
278 #endif
279 }
280 #endif // TBB_USE_ASSERT
281
282 } // namespace r1
283 } // namespace detail
284 } // namespace tbb
285
286 #endif /* _TBB_tbb_misc_H */
287