1a9643ea8Slogwang /*-
2*22ce4affSfengbojiang * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3*22ce4affSfengbojiang *
4a9643ea8Slogwang * Copyright (c) 1998 Doug Rabson
5a9643ea8Slogwang * All rights reserved.
6a9643ea8Slogwang *
7a9643ea8Slogwang * Redistribution and use in source and binary forms, with or without
8a9643ea8Slogwang * modification, are permitted provided that the following conditions
9a9643ea8Slogwang * are met:
10a9643ea8Slogwang * 1. Redistributions of source code must retain the above copyright
11a9643ea8Slogwang * notice, this list of conditions and the following disclaimer.
12a9643ea8Slogwang * 2. Redistributions in binary form must reproduce the above copyright
13a9643ea8Slogwang * notice, this list of conditions and the following disclaimer in the
14a9643ea8Slogwang * documentation and/or other materials provided with the distribution.
15a9643ea8Slogwang *
16a9643ea8Slogwang * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17a9643ea8Slogwang * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18a9643ea8Slogwang * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19a9643ea8Slogwang * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20a9643ea8Slogwang * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21a9643ea8Slogwang * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22a9643ea8Slogwang * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23a9643ea8Slogwang * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24a9643ea8Slogwang * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25a9643ea8Slogwang * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26a9643ea8Slogwang * SUCH DAMAGE.
27a9643ea8Slogwang *
28a9643ea8Slogwang * $FreeBSD$
29a9643ea8Slogwang */
30a9643ea8Slogwang #ifndef _MACHINE_ATOMIC_H_
31a9643ea8Slogwang #define _MACHINE_ATOMIC_H_
32a9643ea8Slogwang
33a9643ea8Slogwang #ifndef _SYS_CDEFS_H_
34a9643ea8Slogwang #error this file needs sys/cdefs.h as a prerequisite
35a9643ea8Slogwang #endif
36a9643ea8Slogwang
37a9643ea8Slogwang /*
38a9643ea8Slogwang * To express interprocessor (as opposed to processor and device) memory
39a9643ea8Slogwang * ordering constraints, use the atomic_*() functions with acquire and release
40a9643ea8Slogwang * semantics rather than the *mb() functions. An architecture's memory
41a9643ea8Slogwang * ordering (or memory consistency) model governs the order in which a
42a9643ea8Slogwang * program's accesses to different locations may be performed by an
43a9643ea8Slogwang * implementation of that architecture. In general, for memory regions
44a9643ea8Slogwang * defined as writeback cacheable, the memory ordering implemented by amd64
45a9643ea8Slogwang * processors preserves the program ordering of a load followed by a load, a
46a9643ea8Slogwang * load followed by a store, and a store followed by a store. Only a store
47a9643ea8Slogwang * followed by a load to a different memory location may be reordered.
48a9643ea8Slogwang * Therefore, except for special cases, like non-temporal memory accesses or
49a9643ea8Slogwang * memory regions defined as write combining, the memory ordering effects
50a9643ea8Slogwang * provided by the sfence instruction in the wmb() function and the lfence
51a9643ea8Slogwang * instruction in the rmb() function are redundant. In contrast, the
52a9643ea8Slogwang * atomic_*() functions with acquire and release semantics do not perform
53a9643ea8Slogwang * redundant instructions for ordinary cases of interprocessor memory
54a9643ea8Slogwang * ordering on any architecture.
55a9643ea8Slogwang */
56a9643ea8Slogwang #define mb() __asm __volatile("mfence;" : : : "memory")
57a9643ea8Slogwang #define wmb() __asm __volatile("sfence;" : : : "memory")
58a9643ea8Slogwang #define rmb() __asm __volatile("lfence;" : : : "memory")
59a9643ea8Slogwang
60*22ce4affSfengbojiang #ifdef _KERNEL
61*22ce4affSfengbojiang /*
62*22ce4affSfengbojiang * OFFSETOF_MONITORBUF == __pcpu_offset(pc_monitorbuf).
63*22ce4affSfengbojiang *
64*22ce4affSfengbojiang * The open-coded number is used instead of the symbolic expression to
65*22ce4affSfengbojiang * avoid a dependency on sys/pcpu.h in machine/atomic.h consumers.
66*22ce4affSfengbojiang * An assertion in amd64/vm_machdep.c ensures that the value is correct.
67*22ce4affSfengbojiang */
68*22ce4affSfengbojiang #define OFFSETOF_MONITORBUF 0x100
69*22ce4affSfengbojiang #endif
70*22ce4affSfengbojiang
71*22ce4affSfengbojiang #if defined(KCSAN) && !defined(KCSAN_RUNTIME)
72*22ce4affSfengbojiang #include <sys/_cscan_atomic.h>
73*22ce4affSfengbojiang #else
74*22ce4affSfengbojiang #include <sys/atomic_common.h>
75*22ce4affSfengbojiang
76a9643ea8Slogwang /*
77a9643ea8Slogwang * Various simple operations on memory, each of which is atomic in the
78a9643ea8Slogwang * presence of interrupts and multiple processors.
79a9643ea8Slogwang *
80a9643ea8Slogwang * atomic_set_char(P, V) (*(u_char *)(P) |= (V))
81a9643ea8Slogwang * atomic_clear_char(P, V) (*(u_char *)(P) &= ~(V))
82a9643ea8Slogwang * atomic_add_char(P, V) (*(u_char *)(P) += (V))
83a9643ea8Slogwang * atomic_subtract_char(P, V) (*(u_char *)(P) -= (V))
84a9643ea8Slogwang *
85a9643ea8Slogwang * atomic_set_short(P, V) (*(u_short *)(P) |= (V))
86a9643ea8Slogwang * atomic_clear_short(P, V) (*(u_short *)(P) &= ~(V))
87a9643ea8Slogwang * atomic_add_short(P, V) (*(u_short *)(P) += (V))
88a9643ea8Slogwang * atomic_subtract_short(P, V) (*(u_short *)(P) -= (V))
89a9643ea8Slogwang *
90a9643ea8Slogwang * atomic_set_int(P, V) (*(u_int *)(P) |= (V))
91a9643ea8Slogwang * atomic_clear_int(P, V) (*(u_int *)(P) &= ~(V))
92a9643ea8Slogwang * atomic_add_int(P, V) (*(u_int *)(P) += (V))
93a9643ea8Slogwang * atomic_subtract_int(P, V) (*(u_int *)(P) -= (V))
94a9643ea8Slogwang * atomic_swap_int(P, V) (return (*(u_int *)(P)); *(u_int *)(P) = (V);)
95a9643ea8Slogwang * atomic_readandclear_int(P) (return (*(u_int *)(P)); *(u_int *)(P) = 0;)
96a9643ea8Slogwang *
97a9643ea8Slogwang * atomic_set_long(P, V) (*(u_long *)(P) |= (V))
98a9643ea8Slogwang * atomic_clear_long(P, V) (*(u_long *)(P) &= ~(V))
99a9643ea8Slogwang * atomic_add_long(P, V) (*(u_long *)(P) += (V))
100a9643ea8Slogwang * atomic_subtract_long(P, V) (*(u_long *)(P) -= (V))
101a9643ea8Slogwang * atomic_swap_long(P, V) (return (*(u_long *)(P)); *(u_long *)(P) = (V);)
102a9643ea8Slogwang * atomic_readandclear_long(P) (return (*(u_long *)(P)); *(u_long *)(P) = 0;)
103a9643ea8Slogwang */
104a9643ea8Slogwang
105a9643ea8Slogwang /*
106a9643ea8Slogwang * The above functions are expanded inline in the statically-linked
107a9643ea8Slogwang * kernel. Lock prefixes are generated if an SMP kernel is being
108a9643ea8Slogwang * built.
109a9643ea8Slogwang *
110a9643ea8Slogwang * Kernel modules call real functions which are built into the kernel.
111a9643ea8Slogwang * This allows kernel modules to be portable between UP and SMP systems.
112a9643ea8Slogwang */
113*22ce4affSfengbojiang #if !defined(__GNUCLIKE_ASM)
114a9643ea8Slogwang #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \
115a9643ea8Slogwang void atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v); \
116a9643ea8Slogwang void atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
117a9643ea8Slogwang
118*22ce4affSfengbojiang int atomic_cmpset_char(volatile u_char *dst, u_char expect, u_char src);
119*22ce4affSfengbojiang int atomic_cmpset_short(volatile u_short *dst, u_short expect, u_short src);
120a9643ea8Slogwang int atomic_cmpset_int(volatile u_int *dst, u_int expect, u_int src);
121a9643ea8Slogwang int atomic_cmpset_long(volatile u_long *dst, u_long expect, u_long src);
122*22ce4affSfengbojiang int atomic_fcmpset_char(volatile u_char *dst, u_char *expect, u_char src);
123*22ce4affSfengbojiang int atomic_fcmpset_short(volatile u_short *dst, u_short *expect,
124*22ce4affSfengbojiang u_short src);
125*22ce4affSfengbojiang int atomic_fcmpset_int(volatile u_int *dst, u_int *expect, u_int src);
126*22ce4affSfengbojiang int atomic_fcmpset_long(volatile u_long *dst, u_long *expect, u_long src);
127a9643ea8Slogwang u_int atomic_fetchadd_int(volatile u_int *p, u_int v);
128a9643ea8Slogwang u_long atomic_fetchadd_long(volatile u_long *p, u_long v);
129a9643ea8Slogwang int atomic_testandset_int(volatile u_int *p, u_int v);
130a9643ea8Slogwang int atomic_testandset_long(volatile u_long *p, u_int v);
131a9643ea8Slogwang int atomic_testandclear_int(volatile u_int *p, u_int v);
132a9643ea8Slogwang int atomic_testandclear_long(volatile u_long *p, u_int v);
133a9643ea8Slogwang void atomic_thread_fence_acq(void);
134a9643ea8Slogwang void atomic_thread_fence_acq_rel(void);
135a9643ea8Slogwang void atomic_thread_fence_rel(void);
136a9643ea8Slogwang void atomic_thread_fence_seq_cst(void);
137a9643ea8Slogwang
138a9643ea8Slogwang #define ATOMIC_LOAD(TYPE) \
139a9643ea8Slogwang u_##TYPE atomic_load_acq_##TYPE(volatile u_##TYPE *p)
140a9643ea8Slogwang #define ATOMIC_STORE(TYPE) \
141a9643ea8Slogwang void atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v)
142a9643ea8Slogwang
143a9643ea8Slogwang #else /* !KLD_MODULE && __GNUCLIKE_ASM */
144a9643ea8Slogwang
145a9643ea8Slogwang /*
146a9643ea8Slogwang * For userland, always use lock prefixes so that the binaries will run
147a9643ea8Slogwang * on both SMP and !SMP systems.
148a9643ea8Slogwang */
149*22ce4affSfengbojiang #if defined(SMP) || !defined(_KERNEL) || defined(KLD_MODULE)
150a9643ea8Slogwang #define MPLOCKED "lock ; "
151a9643ea8Slogwang #else
152a9643ea8Slogwang #define MPLOCKED
153a9643ea8Slogwang #endif
154a9643ea8Slogwang
155a9643ea8Slogwang /*
156a9643ea8Slogwang * The assembly is volatilized to avoid code chunk removal by the compiler.
157a9643ea8Slogwang * GCC aggressively reorders operations and memory clobbering is necessary
158a9643ea8Slogwang * in order to avoid that for memory barriers.
159a9643ea8Slogwang */
160a9643ea8Slogwang #define ATOMIC_ASM(NAME, TYPE, OP, CONS, V) \
161a9643ea8Slogwang static __inline void \
162a9643ea8Slogwang atomic_##NAME##_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
163a9643ea8Slogwang { \
164a9643ea8Slogwang __asm __volatile(MPLOCKED OP \
165a9643ea8Slogwang : "+m" (*p) \
166a9643ea8Slogwang : CONS (V) \
167a9643ea8Slogwang : "cc"); \
168a9643ea8Slogwang } \
169a9643ea8Slogwang \
170a9643ea8Slogwang static __inline void \
171a9643ea8Slogwang atomic_##NAME##_barr_##TYPE(volatile u_##TYPE *p, u_##TYPE v)\
172a9643ea8Slogwang { \
173a9643ea8Slogwang __asm __volatile(MPLOCKED OP \
174a9643ea8Slogwang : "+m" (*p) \
175a9643ea8Slogwang : CONS (V) \
176a9643ea8Slogwang : "memory", "cc"); \
177a9643ea8Slogwang } \
178a9643ea8Slogwang struct __hack
179a9643ea8Slogwang
180a9643ea8Slogwang /*
181*22ce4affSfengbojiang * Atomic compare and set, used by the mutex functions.
182a9643ea8Slogwang *
183*22ce4affSfengbojiang * cmpset:
184*22ce4affSfengbojiang * if (*dst == expect)
185*22ce4affSfengbojiang * *dst = src
186a9643ea8Slogwang *
187*22ce4affSfengbojiang * fcmpset:
188*22ce4affSfengbojiang * if (*dst == *expect)
189*22ce4affSfengbojiang * *dst = src
190*22ce4affSfengbojiang * else
191*22ce4affSfengbojiang * *expect = *dst
192*22ce4affSfengbojiang *
193*22ce4affSfengbojiang * Returns 0 on failure, non-zero on success.
194a9643ea8Slogwang */
195*22ce4affSfengbojiang #define ATOMIC_CMPSET(TYPE) \
196*22ce4affSfengbojiang static __inline int \
197*22ce4affSfengbojiang atomic_cmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE expect, u_##TYPE src) \
198*22ce4affSfengbojiang { \
199*22ce4affSfengbojiang u_char res; \
200*22ce4affSfengbojiang \
201*22ce4affSfengbojiang __asm __volatile( \
202*22ce4affSfengbojiang " " MPLOCKED " " \
203*22ce4affSfengbojiang " cmpxchg %3,%1 ; " \
204*22ce4affSfengbojiang "# atomic_cmpset_" #TYPE " " \
205*22ce4affSfengbojiang : "=@cce" (res), /* 0 */ \
206*22ce4affSfengbojiang "+m" (*dst), /* 1 */ \
207*22ce4affSfengbojiang "+a" (expect) /* 2 */ \
208*22ce4affSfengbojiang : "r" (src) /* 3 */ \
209*22ce4affSfengbojiang : "memory", "cc"); \
210*22ce4affSfengbojiang return (res); \
211*22ce4affSfengbojiang } \
212*22ce4affSfengbojiang \
213*22ce4affSfengbojiang static __inline int \
214*22ce4affSfengbojiang atomic_fcmpset_##TYPE(volatile u_##TYPE *dst, u_##TYPE *expect, u_##TYPE src) \
215*22ce4affSfengbojiang { \
216*22ce4affSfengbojiang u_char res; \
217*22ce4affSfengbojiang \
218*22ce4affSfengbojiang __asm __volatile( \
219*22ce4affSfengbojiang " " MPLOCKED " " \
220*22ce4affSfengbojiang " cmpxchg %3,%1 ; " \
221*22ce4affSfengbojiang "# atomic_fcmpset_" #TYPE " " \
222*22ce4affSfengbojiang : "=@cce" (res), /* 0 */ \
223*22ce4affSfengbojiang "+m" (*dst), /* 1 */ \
224*22ce4affSfengbojiang "+a" (*expect) /* 2 */ \
225*22ce4affSfengbojiang : "r" (src) /* 3 */ \
226*22ce4affSfengbojiang : "memory", "cc"); \
227*22ce4affSfengbojiang return (res); \
228a9643ea8Slogwang }
229a9643ea8Slogwang
230*22ce4affSfengbojiang ATOMIC_CMPSET(char);
231*22ce4affSfengbojiang ATOMIC_CMPSET(short);
232*22ce4affSfengbojiang ATOMIC_CMPSET(int);
233*22ce4affSfengbojiang ATOMIC_CMPSET(long);
234a9643ea8Slogwang
235a9643ea8Slogwang /*
236a9643ea8Slogwang * Atomically add the value of v to the integer pointed to by p and return
237a9643ea8Slogwang * the previous value of *p.
238a9643ea8Slogwang */
239a9643ea8Slogwang static __inline u_int
atomic_fetchadd_int(volatile u_int * p,u_int v)240a9643ea8Slogwang atomic_fetchadd_int(volatile u_int *p, u_int v)
241a9643ea8Slogwang {
242a9643ea8Slogwang
243a9643ea8Slogwang __asm __volatile(
244a9643ea8Slogwang " " MPLOCKED " "
245a9643ea8Slogwang " xaddl %0,%1 ; "
246a9643ea8Slogwang "# atomic_fetchadd_int"
247a9643ea8Slogwang : "+r" (v), /* 0 */
248a9643ea8Slogwang "+m" (*p) /* 1 */
249a9643ea8Slogwang : : "cc");
250a9643ea8Slogwang return (v);
251a9643ea8Slogwang }
252a9643ea8Slogwang
253a9643ea8Slogwang /*
254a9643ea8Slogwang * Atomically add the value of v to the long integer pointed to by p and return
255a9643ea8Slogwang * the previous value of *p.
256a9643ea8Slogwang */
257a9643ea8Slogwang static __inline u_long
atomic_fetchadd_long(volatile u_long * p,u_long v)258a9643ea8Slogwang atomic_fetchadd_long(volatile u_long *p, u_long v)
259a9643ea8Slogwang {
260a9643ea8Slogwang
261a9643ea8Slogwang __asm __volatile(
262a9643ea8Slogwang " " MPLOCKED " "
263a9643ea8Slogwang " xaddq %0,%1 ; "
264a9643ea8Slogwang "# atomic_fetchadd_long"
265a9643ea8Slogwang : "+r" (v), /* 0 */
266a9643ea8Slogwang "+m" (*p) /* 1 */
267a9643ea8Slogwang : : "cc");
268a9643ea8Slogwang return (v);
269a9643ea8Slogwang }
270a9643ea8Slogwang
271a9643ea8Slogwang static __inline int
atomic_testandset_int(volatile u_int * p,u_int v)272a9643ea8Slogwang atomic_testandset_int(volatile u_int *p, u_int v)
273a9643ea8Slogwang {
274a9643ea8Slogwang u_char res;
275a9643ea8Slogwang
276a9643ea8Slogwang __asm __volatile(
277a9643ea8Slogwang " " MPLOCKED " "
278a9643ea8Slogwang " btsl %2,%1 ; "
279a9643ea8Slogwang "# atomic_testandset_int"
280*22ce4affSfengbojiang : "=@ccc" (res), /* 0 */
281a9643ea8Slogwang "+m" (*p) /* 1 */
282a9643ea8Slogwang : "Ir" (v & 0x1f) /* 2 */
283a9643ea8Slogwang : "cc");
284a9643ea8Slogwang return (res);
285a9643ea8Slogwang }
286a9643ea8Slogwang
287a9643ea8Slogwang static __inline int
atomic_testandset_long(volatile u_long * p,u_int v)288a9643ea8Slogwang atomic_testandset_long(volatile u_long *p, u_int v)
289a9643ea8Slogwang {
290a9643ea8Slogwang u_char res;
291a9643ea8Slogwang
292a9643ea8Slogwang __asm __volatile(
293a9643ea8Slogwang " " MPLOCKED " "
294a9643ea8Slogwang " btsq %2,%1 ; "
295a9643ea8Slogwang "# atomic_testandset_long"
296*22ce4affSfengbojiang : "=@ccc" (res), /* 0 */
297a9643ea8Slogwang "+m" (*p) /* 1 */
298a9643ea8Slogwang : "Jr" ((u_long)(v & 0x3f)) /* 2 */
299a9643ea8Slogwang : "cc");
300a9643ea8Slogwang return (res);
301a9643ea8Slogwang }
302a9643ea8Slogwang
303a9643ea8Slogwang static __inline int
atomic_testandclear_int(volatile u_int * p,u_int v)304a9643ea8Slogwang atomic_testandclear_int(volatile u_int *p, u_int v)
305a9643ea8Slogwang {
306a9643ea8Slogwang u_char res;
307a9643ea8Slogwang
308a9643ea8Slogwang __asm __volatile(
309a9643ea8Slogwang " " MPLOCKED " "
310a9643ea8Slogwang " btrl %2,%1 ; "
311a9643ea8Slogwang "# atomic_testandclear_int"
312*22ce4affSfengbojiang : "=@ccc" (res), /* 0 */
313a9643ea8Slogwang "+m" (*p) /* 1 */
314a9643ea8Slogwang : "Ir" (v & 0x1f) /* 2 */
315a9643ea8Slogwang : "cc");
316a9643ea8Slogwang return (res);
317a9643ea8Slogwang }
318a9643ea8Slogwang
319a9643ea8Slogwang static __inline int
atomic_testandclear_long(volatile u_long * p,u_int v)320a9643ea8Slogwang atomic_testandclear_long(volatile u_long *p, u_int v)
321a9643ea8Slogwang {
322a9643ea8Slogwang u_char res;
323a9643ea8Slogwang
324a9643ea8Slogwang __asm __volatile(
325a9643ea8Slogwang " " MPLOCKED " "
326a9643ea8Slogwang " btrq %2,%1 ; "
327a9643ea8Slogwang "# atomic_testandclear_long"
328*22ce4affSfengbojiang : "=@ccc" (res), /* 0 */
329a9643ea8Slogwang "+m" (*p) /* 1 */
330a9643ea8Slogwang : "Jr" ((u_long)(v & 0x3f)) /* 2 */
331a9643ea8Slogwang : "cc");
332a9643ea8Slogwang return (res);
333a9643ea8Slogwang }
334a9643ea8Slogwang
335a9643ea8Slogwang /*
336a9643ea8Slogwang * We assume that a = b will do atomic loads and stores. Due to the
337a9643ea8Slogwang * IA32 memory model, a simple store guarantees release semantics.
338a9643ea8Slogwang *
339a9643ea8Slogwang * However, a load may pass a store if they are performed on distinct
340a9643ea8Slogwang * addresses, so we need a Store/Load barrier for sequentially
341a9643ea8Slogwang * consistent fences in SMP kernels. We use "lock addl $0,mem" for a
342a9643ea8Slogwang * Store/Load barrier, as recommended by the AMD Software Optimization
343a9643ea8Slogwang * Guide, and not mfence. To avoid false data dependencies, we use a
344a9643ea8Slogwang * special address for "mem". In the kernel, we use a private per-cpu
345a9643ea8Slogwang * cache line. In user space, we use a word in the stack's red zone
346a9643ea8Slogwang * (-8(%rsp)).
347a9643ea8Slogwang *
348a9643ea8Slogwang * For UP kernels, however, the memory of the single processor is
349a9643ea8Slogwang * always consistent, so we only need to stop the compiler from
350a9643ea8Slogwang * reordering accesses in a way that violates the semantics of acquire
351a9643ea8Slogwang * and release.
352a9643ea8Slogwang */
353a9643ea8Slogwang
354a9643ea8Slogwang #if defined(_KERNEL)
355a9643ea8Slogwang
356*22ce4affSfengbojiang #if defined(SMP) || defined(KLD_MODULE)
357a9643ea8Slogwang static __inline void
__storeload_barrier(void)358a9643ea8Slogwang __storeload_barrier(void)
359a9643ea8Slogwang {
360a9643ea8Slogwang
361a9643ea8Slogwang __asm __volatile("lock; addl $0,%%gs:%0"
362a9643ea8Slogwang : "+m" (*(u_int *)OFFSETOF_MONITORBUF) : : "memory", "cc");
363a9643ea8Slogwang }
364a9643ea8Slogwang #else /* _KERNEL && UP */
365a9643ea8Slogwang static __inline void
__storeload_barrier(void)366a9643ea8Slogwang __storeload_barrier(void)
367a9643ea8Slogwang {
368a9643ea8Slogwang
369a9643ea8Slogwang __compiler_membar();
370a9643ea8Slogwang }
371a9643ea8Slogwang #endif /* SMP */
372a9643ea8Slogwang #else /* !_KERNEL */
373a9643ea8Slogwang static __inline void
__storeload_barrier(void)374a9643ea8Slogwang __storeload_barrier(void)
375a9643ea8Slogwang {
376a9643ea8Slogwang
377a9643ea8Slogwang __asm __volatile("lock; addl $0,-8(%%rsp)" : : : "memory", "cc");
378a9643ea8Slogwang }
379a9643ea8Slogwang #endif /* _KERNEL*/
380a9643ea8Slogwang
381a9643ea8Slogwang #define ATOMIC_LOAD(TYPE) \
382a9643ea8Slogwang static __inline u_##TYPE \
383a9643ea8Slogwang atomic_load_acq_##TYPE(volatile u_##TYPE *p) \
384a9643ea8Slogwang { \
385a9643ea8Slogwang u_##TYPE res; \
386a9643ea8Slogwang \
387a9643ea8Slogwang res = *p; \
388a9643ea8Slogwang __compiler_membar(); \
389a9643ea8Slogwang return (res); \
390a9643ea8Slogwang } \
391a9643ea8Slogwang struct __hack
392a9643ea8Slogwang
393a9643ea8Slogwang #define ATOMIC_STORE(TYPE) \
394a9643ea8Slogwang static __inline void \
395a9643ea8Slogwang atomic_store_rel_##TYPE(volatile u_##TYPE *p, u_##TYPE v) \
396a9643ea8Slogwang { \
397a9643ea8Slogwang \
398a9643ea8Slogwang __compiler_membar(); \
399a9643ea8Slogwang *p = v; \
400a9643ea8Slogwang } \
401a9643ea8Slogwang struct __hack
402a9643ea8Slogwang
403a9643ea8Slogwang static __inline void
atomic_thread_fence_acq(void)404a9643ea8Slogwang atomic_thread_fence_acq(void)
405a9643ea8Slogwang {
406a9643ea8Slogwang
407a9643ea8Slogwang __compiler_membar();
408a9643ea8Slogwang }
409a9643ea8Slogwang
410a9643ea8Slogwang static __inline void
atomic_thread_fence_rel(void)411a9643ea8Slogwang atomic_thread_fence_rel(void)
412a9643ea8Slogwang {
413a9643ea8Slogwang
414a9643ea8Slogwang __compiler_membar();
415a9643ea8Slogwang }
416a9643ea8Slogwang
417a9643ea8Slogwang static __inline void
atomic_thread_fence_acq_rel(void)418a9643ea8Slogwang atomic_thread_fence_acq_rel(void)
419a9643ea8Slogwang {
420a9643ea8Slogwang
421a9643ea8Slogwang __compiler_membar();
422a9643ea8Slogwang }
423a9643ea8Slogwang
424a9643ea8Slogwang static __inline void
atomic_thread_fence_seq_cst(void)425a9643ea8Slogwang atomic_thread_fence_seq_cst(void)
426a9643ea8Slogwang {
427a9643ea8Slogwang
428a9643ea8Slogwang __storeload_barrier();
429a9643ea8Slogwang }
430a9643ea8Slogwang
431a9643ea8Slogwang #endif /* KLD_MODULE || !__GNUCLIKE_ASM */
432a9643ea8Slogwang
433a9643ea8Slogwang ATOMIC_ASM(set, char, "orb %b1,%0", "iq", v);
434a9643ea8Slogwang ATOMIC_ASM(clear, char, "andb %b1,%0", "iq", ~v);
435a9643ea8Slogwang ATOMIC_ASM(add, char, "addb %b1,%0", "iq", v);
436a9643ea8Slogwang ATOMIC_ASM(subtract, char, "subb %b1,%0", "iq", v);
437a9643ea8Slogwang
438a9643ea8Slogwang ATOMIC_ASM(set, short, "orw %w1,%0", "ir", v);
439a9643ea8Slogwang ATOMIC_ASM(clear, short, "andw %w1,%0", "ir", ~v);
440a9643ea8Slogwang ATOMIC_ASM(add, short, "addw %w1,%0", "ir", v);
441a9643ea8Slogwang ATOMIC_ASM(subtract, short, "subw %w1,%0", "ir", v);
442a9643ea8Slogwang
443a9643ea8Slogwang ATOMIC_ASM(set, int, "orl %1,%0", "ir", v);
444a9643ea8Slogwang ATOMIC_ASM(clear, int, "andl %1,%0", "ir", ~v);
445a9643ea8Slogwang ATOMIC_ASM(add, int, "addl %1,%0", "ir", v);
446a9643ea8Slogwang ATOMIC_ASM(subtract, int, "subl %1,%0", "ir", v);
447a9643ea8Slogwang
448*22ce4affSfengbojiang ATOMIC_ASM(set, long, "orq %1,%0", "er", v);
449*22ce4affSfengbojiang ATOMIC_ASM(clear, long, "andq %1,%0", "er", ~v);
450*22ce4affSfengbojiang ATOMIC_ASM(add, long, "addq %1,%0", "er", v);
451*22ce4affSfengbojiang ATOMIC_ASM(subtract, long, "subq %1,%0", "er", v);
452a9643ea8Slogwang
453a9643ea8Slogwang #define ATOMIC_LOADSTORE(TYPE) \
454a9643ea8Slogwang ATOMIC_LOAD(TYPE); \
455a9643ea8Slogwang ATOMIC_STORE(TYPE)
456a9643ea8Slogwang
457a9643ea8Slogwang ATOMIC_LOADSTORE(char);
458a9643ea8Slogwang ATOMIC_LOADSTORE(short);
459a9643ea8Slogwang ATOMIC_LOADSTORE(int);
460a9643ea8Slogwang ATOMIC_LOADSTORE(long);
461a9643ea8Slogwang
462a9643ea8Slogwang #undef ATOMIC_ASM
463a9643ea8Slogwang #undef ATOMIC_LOAD
464a9643ea8Slogwang #undef ATOMIC_STORE
465a9643ea8Slogwang #undef ATOMIC_LOADSTORE
466a9643ea8Slogwang #ifndef WANT_FUNCTIONS
467a9643ea8Slogwang
468a9643ea8Slogwang /* Read the current value and store a new value in the destination. */
469a9643ea8Slogwang #ifdef __GNUCLIKE_ASM
470a9643ea8Slogwang
471a9643ea8Slogwang static __inline u_int
atomic_swap_int(volatile u_int * p,u_int v)472a9643ea8Slogwang atomic_swap_int(volatile u_int *p, u_int v)
473a9643ea8Slogwang {
474a9643ea8Slogwang
475a9643ea8Slogwang __asm __volatile(
476a9643ea8Slogwang " xchgl %1,%0 ; "
477a9643ea8Slogwang "# atomic_swap_int"
478a9643ea8Slogwang : "+r" (v), /* 0 */
479a9643ea8Slogwang "+m" (*p)); /* 1 */
480a9643ea8Slogwang return (v);
481a9643ea8Slogwang }
482a9643ea8Slogwang
483a9643ea8Slogwang static __inline u_long
atomic_swap_long(volatile u_long * p,u_long v)484a9643ea8Slogwang atomic_swap_long(volatile u_long *p, u_long v)
485a9643ea8Slogwang {
486a9643ea8Slogwang
487a9643ea8Slogwang __asm __volatile(
488a9643ea8Slogwang " xchgq %1,%0 ; "
489a9643ea8Slogwang "# atomic_swap_long"
490a9643ea8Slogwang : "+r" (v), /* 0 */
491a9643ea8Slogwang "+m" (*p)); /* 1 */
492a9643ea8Slogwang return (v);
493a9643ea8Slogwang }
494a9643ea8Slogwang
495a9643ea8Slogwang #else /* !__GNUCLIKE_ASM */
496a9643ea8Slogwang
497a9643ea8Slogwang u_int atomic_swap_int(volatile u_int *p, u_int v);
498a9643ea8Slogwang u_long atomic_swap_long(volatile u_long *p, u_long v);
499a9643ea8Slogwang
500a9643ea8Slogwang #endif /* __GNUCLIKE_ASM */
501a9643ea8Slogwang
502a9643ea8Slogwang #define atomic_set_acq_char atomic_set_barr_char
503a9643ea8Slogwang #define atomic_set_rel_char atomic_set_barr_char
504a9643ea8Slogwang #define atomic_clear_acq_char atomic_clear_barr_char
505a9643ea8Slogwang #define atomic_clear_rel_char atomic_clear_barr_char
506a9643ea8Slogwang #define atomic_add_acq_char atomic_add_barr_char
507a9643ea8Slogwang #define atomic_add_rel_char atomic_add_barr_char
508a9643ea8Slogwang #define atomic_subtract_acq_char atomic_subtract_barr_char
509a9643ea8Slogwang #define atomic_subtract_rel_char atomic_subtract_barr_char
510*22ce4affSfengbojiang #define atomic_cmpset_acq_char atomic_cmpset_char
511*22ce4affSfengbojiang #define atomic_cmpset_rel_char atomic_cmpset_char
512*22ce4affSfengbojiang #define atomic_fcmpset_acq_char atomic_fcmpset_char
513*22ce4affSfengbojiang #define atomic_fcmpset_rel_char atomic_fcmpset_char
514a9643ea8Slogwang
515a9643ea8Slogwang #define atomic_set_acq_short atomic_set_barr_short
516a9643ea8Slogwang #define atomic_set_rel_short atomic_set_barr_short
517a9643ea8Slogwang #define atomic_clear_acq_short atomic_clear_barr_short
518a9643ea8Slogwang #define atomic_clear_rel_short atomic_clear_barr_short
519a9643ea8Slogwang #define atomic_add_acq_short atomic_add_barr_short
520a9643ea8Slogwang #define atomic_add_rel_short atomic_add_barr_short
521a9643ea8Slogwang #define atomic_subtract_acq_short atomic_subtract_barr_short
522a9643ea8Slogwang #define atomic_subtract_rel_short atomic_subtract_barr_short
523*22ce4affSfengbojiang #define atomic_cmpset_acq_short atomic_cmpset_short
524*22ce4affSfengbojiang #define atomic_cmpset_rel_short atomic_cmpset_short
525*22ce4affSfengbojiang #define atomic_fcmpset_acq_short atomic_fcmpset_short
526*22ce4affSfengbojiang #define atomic_fcmpset_rel_short atomic_fcmpset_short
527a9643ea8Slogwang
528a9643ea8Slogwang #define atomic_set_acq_int atomic_set_barr_int
529a9643ea8Slogwang #define atomic_set_rel_int atomic_set_barr_int
530a9643ea8Slogwang #define atomic_clear_acq_int atomic_clear_barr_int
531a9643ea8Slogwang #define atomic_clear_rel_int atomic_clear_barr_int
532a9643ea8Slogwang #define atomic_add_acq_int atomic_add_barr_int
533a9643ea8Slogwang #define atomic_add_rel_int atomic_add_barr_int
534a9643ea8Slogwang #define atomic_subtract_acq_int atomic_subtract_barr_int
535a9643ea8Slogwang #define atomic_subtract_rel_int atomic_subtract_barr_int
536a9643ea8Slogwang #define atomic_cmpset_acq_int atomic_cmpset_int
537a9643ea8Slogwang #define atomic_cmpset_rel_int atomic_cmpset_int
538*22ce4affSfengbojiang #define atomic_fcmpset_acq_int atomic_fcmpset_int
539*22ce4affSfengbojiang #define atomic_fcmpset_rel_int atomic_fcmpset_int
540a9643ea8Slogwang
541a9643ea8Slogwang #define atomic_set_acq_long atomic_set_barr_long
542a9643ea8Slogwang #define atomic_set_rel_long atomic_set_barr_long
543a9643ea8Slogwang #define atomic_clear_acq_long atomic_clear_barr_long
544a9643ea8Slogwang #define atomic_clear_rel_long atomic_clear_barr_long
545a9643ea8Slogwang #define atomic_add_acq_long atomic_add_barr_long
546a9643ea8Slogwang #define atomic_add_rel_long atomic_add_barr_long
547a9643ea8Slogwang #define atomic_subtract_acq_long atomic_subtract_barr_long
548a9643ea8Slogwang #define atomic_subtract_rel_long atomic_subtract_barr_long
549a9643ea8Slogwang #define atomic_cmpset_acq_long atomic_cmpset_long
550a9643ea8Slogwang #define atomic_cmpset_rel_long atomic_cmpset_long
551*22ce4affSfengbojiang #define atomic_fcmpset_acq_long atomic_fcmpset_long
552*22ce4affSfengbojiang #define atomic_fcmpset_rel_long atomic_fcmpset_long
553a9643ea8Slogwang
554a9643ea8Slogwang #define atomic_readandclear_int(p) atomic_swap_int(p, 0)
555a9643ea8Slogwang #define atomic_readandclear_long(p) atomic_swap_long(p, 0)
556*22ce4affSfengbojiang #define atomic_testandset_acq_long atomic_testandset_long
557a9643ea8Slogwang
558a9643ea8Slogwang /* Operations on 8-bit bytes. */
559a9643ea8Slogwang #define atomic_set_8 atomic_set_char
560a9643ea8Slogwang #define atomic_set_acq_8 atomic_set_acq_char
561a9643ea8Slogwang #define atomic_set_rel_8 atomic_set_rel_char
562a9643ea8Slogwang #define atomic_clear_8 atomic_clear_char
563a9643ea8Slogwang #define atomic_clear_acq_8 atomic_clear_acq_char
564a9643ea8Slogwang #define atomic_clear_rel_8 atomic_clear_rel_char
565a9643ea8Slogwang #define atomic_add_8 atomic_add_char
566a9643ea8Slogwang #define atomic_add_acq_8 atomic_add_acq_char
567a9643ea8Slogwang #define atomic_add_rel_8 atomic_add_rel_char
568a9643ea8Slogwang #define atomic_subtract_8 atomic_subtract_char
569a9643ea8Slogwang #define atomic_subtract_acq_8 atomic_subtract_acq_char
570a9643ea8Slogwang #define atomic_subtract_rel_8 atomic_subtract_rel_char
571a9643ea8Slogwang #define atomic_load_acq_8 atomic_load_acq_char
572a9643ea8Slogwang #define atomic_store_rel_8 atomic_store_rel_char
573*22ce4affSfengbojiang #define atomic_cmpset_8 atomic_cmpset_char
574*22ce4affSfengbojiang #define atomic_cmpset_acq_8 atomic_cmpset_acq_char
575*22ce4affSfengbojiang #define atomic_cmpset_rel_8 atomic_cmpset_rel_char
576*22ce4affSfengbojiang #define atomic_fcmpset_8 atomic_fcmpset_char
577*22ce4affSfengbojiang #define atomic_fcmpset_acq_8 atomic_fcmpset_acq_char
578*22ce4affSfengbojiang #define atomic_fcmpset_rel_8 atomic_fcmpset_rel_char
579a9643ea8Slogwang
580a9643ea8Slogwang /* Operations on 16-bit words. */
581a9643ea8Slogwang #define atomic_set_16 atomic_set_short
582a9643ea8Slogwang #define atomic_set_acq_16 atomic_set_acq_short
583a9643ea8Slogwang #define atomic_set_rel_16 atomic_set_rel_short
584a9643ea8Slogwang #define atomic_clear_16 atomic_clear_short
585a9643ea8Slogwang #define atomic_clear_acq_16 atomic_clear_acq_short
586a9643ea8Slogwang #define atomic_clear_rel_16 atomic_clear_rel_short
587a9643ea8Slogwang #define atomic_add_16 atomic_add_short
588a9643ea8Slogwang #define atomic_add_acq_16 atomic_add_acq_short
589a9643ea8Slogwang #define atomic_add_rel_16 atomic_add_rel_short
590a9643ea8Slogwang #define atomic_subtract_16 atomic_subtract_short
591a9643ea8Slogwang #define atomic_subtract_acq_16 atomic_subtract_acq_short
592a9643ea8Slogwang #define atomic_subtract_rel_16 atomic_subtract_rel_short
593a9643ea8Slogwang #define atomic_load_acq_16 atomic_load_acq_short
594a9643ea8Slogwang #define atomic_store_rel_16 atomic_store_rel_short
595*22ce4affSfengbojiang #define atomic_cmpset_16 atomic_cmpset_short
596*22ce4affSfengbojiang #define atomic_cmpset_acq_16 atomic_cmpset_acq_short
597*22ce4affSfengbojiang #define atomic_cmpset_rel_16 atomic_cmpset_rel_short
598*22ce4affSfengbojiang #define atomic_fcmpset_16 atomic_fcmpset_short
599*22ce4affSfengbojiang #define atomic_fcmpset_acq_16 atomic_fcmpset_acq_short
600*22ce4affSfengbojiang #define atomic_fcmpset_rel_16 atomic_fcmpset_rel_short
601a9643ea8Slogwang
602a9643ea8Slogwang /* Operations on 32-bit double words. */
603a9643ea8Slogwang #define atomic_set_32 atomic_set_int
604a9643ea8Slogwang #define atomic_set_acq_32 atomic_set_acq_int
605a9643ea8Slogwang #define atomic_set_rel_32 atomic_set_rel_int
606a9643ea8Slogwang #define atomic_clear_32 atomic_clear_int
607a9643ea8Slogwang #define atomic_clear_acq_32 atomic_clear_acq_int
608a9643ea8Slogwang #define atomic_clear_rel_32 atomic_clear_rel_int
609a9643ea8Slogwang #define atomic_add_32 atomic_add_int
610a9643ea8Slogwang #define atomic_add_acq_32 atomic_add_acq_int
611a9643ea8Slogwang #define atomic_add_rel_32 atomic_add_rel_int
612a9643ea8Slogwang #define atomic_subtract_32 atomic_subtract_int
613a9643ea8Slogwang #define atomic_subtract_acq_32 atomic_subtract_acq_int
614a9643ea8Slogwang #define atomic_subtract_rel_32 atomic_subtract_rel_int
615a9643ea8Slogwang #define atomic_load_acq_32 atomic_load_acq_int
616a9643ea8Slogwang #define atomic_store_rel_32 atomic_store_rel_int
617a9643ea8Slogwang #define atomic_cmpset_32 atomic_cmpset_int
618a9643ea8Slogwang #define atomic_cmpset_acq_32 atomic_cmpset_acq_int
619a9643ea8Slogwang #define atomic_cmpset_rel_32 atomic_cmpset_rel_int
620*22ce4affSfengbojiang #define atomic_fcmpset_32 atomic_fcmpset_int
621*22ce4affSfengbojiang #define atomic_fcmpset_acq_32 atomic_fcmpset_acq_int
622*22ce4affSfengbojiang #define atomic_fcmpset_rel_32 atomic_fcmpset_rel_int
623a9643ea8Slogwang #define atomic_swap_32 atomic_swap_int
624a9643ea8Slogwang #define atomic_readandclear_32 atomic_readandclear_int
625a9643ea8Slogwang #define atomic_fetchadd_32 atomic_fetchadd_int
626a9643ea8Slogwang #define atomic_testandset_32 atomic_testandset_int
627a9643ea8Slogwang #define atomic_testandclear_32 atomic_testandclear_int
628a9643ea8Slogwang
629a9643ea8Slogwang /* Operations on 64-bit quad words. */
630a9643ea8Slogwang #define atomic_set_64 atomic_set_long
631a9643ea8Slogwang #define atomic_set_acq_64 atomic_set_acq_long
632a9643ea8Slogwang #define atomic_set_rel_64 atomic_set_rel_long
633a9643ea8Slogwang #define atomic_clear_64 atomic_clear_long
634a9643ea8Slogwang #define atomic_clear_acq_64 atomic_clear_acq_long
635a9643ea8Slogwang #define atomic_clear_rel_64 atomic_clear_rel_long
636a9643ea8Slogwang #define atomic_add_64 atomic_add_long
637a9643ea8Slogwang #define atomic_add_acq_64 atomic_add_acq_long
638a9643ea8Slogwang #define atomic_add_rel_64 atomic_add_rel_long
639a9643ea8Slogwang #define atomic_subtract_64 atomic_subtract_long
640a9643ea8Slogwang #define atomic_subtract_acq_64 atomic_subtract_acq_long
641a9643ea8Slogwang #define atomic_subtract_rel_64 atomic_subtract_rel_long
642a9643ea8Slogwang #define atomic_load_acq_64 atomic_load_acq_long
643a9643ea8Slogwang #define atomic_store_rel_64 atomic_store_rel_long
644a9643ea8Slogwang #define atomic_cmpset_64 atomic_cmpset_long
645a9643ea8Slogwang #define atomic_cmpset_acq_64 atomic_cmpset_acq_long
646a9643ea8Slogwang #define atomic_cmpset_rel_64 atomic_cmpset_rel_long
647*22ce4affSfengbojiang #define atomic_fcmpset_64 atomic_fcmpset_long
648*22ce4affSfengbojiang #define atomic_fcmpset_acq_64 atomic_fcmpset_acq_long
649*22ce4affSfengbojiang #define atomic_fcmpset_rel_64 atomic_fcmpset_rel_long
650a9643ea8Slogwang #define atomic_swap_64 atomic_swap_long
651a9643ea8Slogwang #define atomic_readandclear_64 atomic_readandclear_long
652a9643ea8Slogwang #define atomic_fetchadd_64 atomic_fetchadd_long
653a9643ea8Slogwang #define atomic_testandset_64 atomic_testandset_long
654a9643ea8Slogwang #define atomic_testandclear_64 atomic_testandclear_long
655a9643ea8Slogwang
656a9643ea8Slogwang /* Operations on pointers. */
657a9643ea8Slogwang #define atomic_set_ptr atomic_set_long
658a9643ea8Slogwang #define atomic_set_acq_ptr atomic_set_acq_long
659a9643ea8Slogwang #define atomic_set_rel_ptr atomic_set_rel_long
660a9643ea8Slogwang #define atomic_clear_ptr atomic_clear_long
661a9643ea8Slogwang #define atomic_clear_acq_ptr atomic_clear_acq_long
662a9643ea8Slogwang #define atomic_clear_rel_ptr atomic_clear_rel_long
663a9643ea8Slogwang #define atomic_add_ptr atomic_add_long
664a9643ea8Slogwang #define atomic_add_acq_ptr atomic_add_acq_long
665a9643ea8Slogwang #define atomic_add_rel_ptr atomic_add_rel_long
666a9643ea8Slogwang #define atomic_subtract_ptr atomic_subtract_long
667a9643ea8Slogwang #define atomic_subtract_acq_ptr atomic_subtract_acq_long
668a9643ea8Slogwang #define atomic_subtract_rel_ptr atomic_subtract_rel_long
669a9643ea8Slogwang #define atomic_load_acq_ptr atomic_load_acq_long
670a9643ea8Slogwang #define atomic_store_rel_ptr atomic_store_rel_long
671a9643ea8Slogwang #define atomic_cmpset_ptr atomic_cmpset_long
672a9643ea8Slogwang #define atomic_cmpset_acq_ptr atomic_cmpset_acq_long
673a9643ea8Slogwang #define atomic_cmpset_rel_ptr atomic_cmpset_rel_long
674*22ce4affSfengbojiang #define atomic_fcmpset_ptr atomic_fcmpset_long
675*22ce4affSfengbojiang #define atomic_fcmpset_acq_ptr atomic_fcmpset_acq_long
676*22ce4affSfengbojiang #define atomic_fcmpset_rel_ptr atomic_fcmpset_rel_long
677a9643ea8Slogwang #define atomic_swap_ptr atomic_swap_long
678a9643ea8Slogwang #define atomic_readandclear_ptr atomic_readandclear_long
679a9643ea8Slogwang
680a9643ea8Slogwang #endif /* !WANT_FUNCTIONS */
681a9643ea8Slogwang
682*22ce4affSfengbojiang #endif /* KCSAN && !KCSAN_RUNTIME */
683*22ce4affSfengbojiang
684a9643ea8Slogwang #endif /* !_MACHINE_ATOMIC_H_ */
685