1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2013 Ed Schouten <[email protected]>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 * SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33
34 #include <machine/atomic.h>
35 #include <machine/cpufunc.h>
36 #include <machine/sysarch.h>
37
38 /*
39 * Executing statements with interrupts disabled.
40 */
41
42 #if defined(_KERNEL) && !defined(SMP)
43 #define WITHOUT_INTERRUPTS(s) do { \
44 register_t regs; \
45 \
46 regs = intr_disable(); \
47 do s while (0); \
48 intr_restore(regs); \
49 } while (0)
50 #endif /* _KERNEL && !SMP */
51
52 /*
53 * Memory barriers.
54 *
55 * It turns out __sync_synchronize() does not emit any code when used
56 * with GCC 4.2. Implement our own version that does work reliably.
57 *
58 * Although __sync_lock_test_and_set() should only perform an acquire
59 * barrier, make it do a full barrier like the other functions. This
60 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
61 */
62
63 #if defined(_KERNEL) && !defined(SMP)
64 static inline void
do_sync(void)65 do_sync(void)
66 {
67
68 __asm volatile ("" : : : "memory");
69 }
70 #else
71 static inline void
do_sync(void)72 do_sync(void)
73 {
74
75 dmb();
76 }
77 #endif
78
79
80 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
81
82 #ifdef __clang__
83 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
84 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
85 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
86 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
87 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
88 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
89 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
90 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
91 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
92 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
93 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
94 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
95 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
96 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
97 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
98 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
99 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
100 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
101 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
102 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
103 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
104 #endif
105
106 /*
107 * Old __sync_* API.
108 */
109
110
111 /* Implementations for old GCC versions, lacking support for atomics. */
112
113 typedef union {
114 uint8_t v8[4];
115 uint32_t v32;
116 } reg_t;
117
118 /*
119 * Given a memory address pointing to an 8-bit or 16-bit integer, return
120 * the address of the 32-bit word containing it.
121 */
122
123 static inline uint32_t *
round_to_word(void * ptr)124 round_to_word(void *ptr)
125 {
126
127 return ((uint32_t *)((intptr_t)ptr & ~3));
128 }
129
130 /*
131 * Utility functions for loading and storing 8-bit and 16-bit integers
132 * in 32-bit words at an offset corresponding with the location of the
133 * atomic variable.
134 */
135
136 static inline void
put_1(reg_t * r,const uint8_t * offset_ptr,uint8_t val)137 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
138 {
139 size_t offset;
140
141 offset = (intptr_t)offset_ptr & 3;
142 r->v8[offset] = val;
143 }
144
145 static inline uint8_t
get_1(const reg_t * r,const uint8_t * offset_ptr)146 get_1(const reg_t *r, const uint8_t *offset_ptr)
147 {
148 size_t offset;
149
150 offset = (intptr_t)offset_ptr & 3;
151 return (r->v8[offset]);
152 }
153
154 static inline void
put_2(reg_t * r,const uint16_t * offset_ptr,uint16_t val)155 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
156 {
157 size_t offset;
158 union {
159 uint16_t in;
160 uint8_t out[2];
161 } bytes;
162
163 offset = (intptr_t)offset_ptr & 3;
164 bytes.in = val;
165 r->v8[offset] = bytes.out[0];
166 r->v8[offset + 1] = bytes.out[1];
167 }
168
169 static inline uint16_t
get_2(const reg_t * r,const uint16_t * offset_ptr)170 get_2(const reg_t *r, const uint16_t *offset_ptr)
171 {
172 size_t offset;
173 union {
174 uint8_t in[2];
175 uint16_t out;
176 } bytes;
177
178 offset = (intptr_t)offset_ptr & 3;
179 bytes.in[0] = r->v8[offset];
180 bytes.in[1] = r->v8[offset + 1];
181 return (bytes.out);
182 }
183
184 /*
185 * 8-bit and 16-bit routines.
186 *
187 * These operations are not natively supported by the CPU, so we use
188 * some shifting and bitmasking on top of the 32-bit instructions.
189 */
190
191 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
192 uintN_t \
193 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
194 { \
195 uint32_t *mem32; \
196 reg_t val32, negmask, old; \
197 uint32_t temp1, temp2; \
198 \
199 mem32 = round_to_word(mem); \
200 val32.v32 = 0x00000000; \
201 put_##N(&val32, mem, val); \
202 negmask.v32 = 0xffffffff; \
203 put_##N(&negmask, mem, 0); \
204 \
205 do_sync(); \
206 __asm volatile ( \
207 "1:" \
208 "\tldrex %0, %6\n" /* Load old value. */ \
209 "\tand %2, %5, %0\n" /* Remove the old value. */ \
210 "\torr %2, %2, %4\n" /* Put in the new value. */ \
211 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
212 "\tcmp %3, #0\n" /* Did it succeed? */ \
213 "\tbne 1b\n" /* Spin if failed. */ \
214 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
215 "=&r" (temp2) \
216 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
217 return (get_##N(&old, mem)); \
218 }
219
220 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
221 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
222
223 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
224 uintN_t \
225 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
226 uintN_t desired) \
227 { \
228 uint32_t *mem32; \
229 reg_t expected32, desired32, posmask, old; \
230 uint32_t negmask, temp1, temp2; \
231 \
232 mem32 = round_to_word(mem); \
233 expected32.v32 = 0x00000000; \
234 put_##N(&expected32, mem, expected); \
235 desired32.v32 = 0x00000000; \
236 put_##N(&desired32, mem, desired); \
237 posmask.v32 = 0x00000000; \
238 put_##N(&posmask, mem, ~0); \
239 negmask = ~posmask.v32; \
240 \
241 do_sync(); \
242 __asm volatile ( \
243 "1:" \
244 "\tldrex %0, %8\n" /* Load old value. */ \
245 "\tand %2, %6, %0\n" /* Isolate the old value. */ \
246 "\tcmp %2, %4\n" /* Compare to expected value. */\
247 "\tbne 2f\n" /* Values are unequal. */ \
248 "\tand %2, %7, %0\n" /* Remove the old value. */ \
249 "\torr %2, %5\n" /* Put in the new value. */ \
250 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
251 "\tcmp %3, #0\n" /* Did it succeed? */ \
252 "\tbne 1b\n" /* Spin if failed. */ \
253 "2:" \
254 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
255 "=&r" (temp2) \
256 : "r" (expected32.v32), "r" (desired32.v32), \
257 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
258 return (get_##N(&old, mem)); \
259 }
260
261 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
262 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
263
264 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
265 uintN_t \
266 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
267 { \
268 uint32_t *mem32; \
269 reg_t val32, posmask, old; \
270 uint32_t negmask, temp1, temp2; \
271 \
272 mem32 = round_to_word(mem); \
273 val32.v32 = 0x00000000; \
274 put_##N(&val32, mem, val); \
275 posmask.v32 = 0x00000000; \
276 put_##N(&posmask, mem, ~0); \
277 negmask = ~posmask.v32; \
278 \
279 do_sync(); \
280 __asm volatile ( \
281 "1:" \
282 "\tldrex %0, %7\n" /* Load old value. */ \
283 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
284 "\tand %2, %5\n" /* Isolate the new value. */ \
285 "\tand %3, %6, %0\n" /* Remove the old value. */ \
286 "\torr %2, %2, %3\n" /* Put in the new value. */ \
287 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
288 "\tcmp %3, #0\n" /* Did it succeed? */ \
289 "\tbne 1b\n" /* Spin if failed. */ \
290 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
291 "=&r" (temp2) \
292 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
293 "m" (*mem32)); \
294 return (get_##N(&old, mem)); \
295 }
296
297 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
298 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
300 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
301
302 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
303 uintN_t \
304 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
305 { \
306 uint32_t *mem32; \
307 reg_t val32, old; \
308 uint32_t temp1, temp2; \
309 \
310 mem32 = round_to_word(mem); \
311 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
312 put_##N(&val32, mem, val); \
313 \
314 do_sync(); \
315 __asm volatile ( \
316 "1:" \
317 "\tldrex %0, %5\n" /* Load old value. */ \
318 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \
319 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
320 "\tcmp %3, #0\n" /* Did it succeed? */ \
321 "\tbne 1b\n" /* Spin if failed. */ \
322 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
323 "=&r" (temp2) \
324 : "r" (val32.v32), "m" (*mem32)); \
325 return (get_##N(&old, mem)); \
326 }
327
328 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
329 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
331 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
332 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
334
335 /*
336 * 32-bit routines.
337 */
338
339 uint32_t
__sync_lock_test_and_set_4_c(uint32_t * mem,uint32_t val)340 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
341 {
342 uint32_t old, temp;
343
344 do_sync();
345 __asm volatile (
346 "1:"
347 "\tldrex %0, %4\n" /* Load old value. */
348 "\tstrex %2, %3, %1\n" /* Attempt to store. */
349 "\tcmp %2, #0\n" /* Did it succeed? */
350 "\tbne 1b\n" /* Spin if failed. */
351 : "=&r" (old), "=m" (*mem), "=&r" (temp)
352 : "r" (val), "m" (*mem));
353 return (old);
354 }
355
356 uint32_t
__sync_val_compare_and_swap_4_c(uint32_t * mem,uint32_t expected,uint32_t desired)357 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
358 uint32_t desired)
359 {
360 uint32_t old, temp;
361
362 do_sync();
363 __asm volatile (
364 "1:"
365 "\tldrex %0, %5\n" /* Load old value. */
366 "\tcmp %0, %3\n" /* Compare to expected value. */
367 "\tbne 2f\n" /* Values are unequal. */
368 "\tstrex %2, %4, %1\n" /* Attempt to store. */
369 "\tcmp %2, #0\n" /* Did it succeed? */
370 "\tbne 1b\n" /* Spin if failed. */
371 "2:"
372 : "=&r" (old), "=m" (*mem), "=&r" (temp)
373 : "r" (expected), "r" (desired), "m" (*mem));
374 return (old);
375 }
376
377 #define EMIT_FETCH_AND_OP_4(name, op) \
378 uint32_t \
379 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \
380 { \
381 uint32_t old, temp1, temp2; \
382 \
383 do_sync(); \
384 __asm volatile ( \
385 "1:" \
386 "\tldrex %0, %5\n" /* Load old value. */ \
387 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
388 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
389 "\tcmp %3, #0\n" /* Did it succeed? */ \
390 "\tbne 1b\n" /* Spin if failed. */ \
391 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \
392 "=&r" (temp2) \
393 : "r" (val), "m" (*mem)); \
394 return (old); \
395 }
396
397 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
398 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
399 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
400 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
401 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
402
403 #ifndef __clang__
404 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
405 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
406 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
407 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
408 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
409 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
410 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
411 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
412 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
413 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
414 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
415 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
416 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
417 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
418 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
419 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
420 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
421 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
422 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
423 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
424 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
425 #endif
426
427 #endif /* __SYNC_ATOMICS */
428