xref: /linux-6.15/include/linux/seqlock.h (revision 55f3560d)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_SEQLOCK_H
3 #define __LINUX_SEQLOCK_H
4 
5 /*
6  * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
7  * lockless readers (read-only retry loops), and no writer starvation.
8  *
9  * See Documentation/locking/seqlock.rst
10  *
11  * Copyrights:
12  * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
13  * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH
14  */
15 
16 #include <linux/compiler.h>
17 #include <linux/kcsan-checks.h>
18 #include <linux/lockdep.h>
19 #include <linux/mutex.h>
20 #include <linux/preempt.h>
21 #include <linux/spinlock.h>
22 #include <linux/ww_mutex.h>
23 
24 #include <asm/processor.h>
25 
26 /*
27  * The seqlock seqcount_t interface does not prescribe a precise sequence of
28  * read begin/retry/end. For readers, typically there is a call to
29  * read_seqcount_begin() and read_seqcount_retry(), however, there are more
30  * esoteric cases which do not follow this pattern.
31  *
32  * As a consequence, we take the following best-effort approach for raw usage
33  * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
34  * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
35  * atomics; if there is a matching read_seqcount_retry() call, no following
36  * memory operations are considered atomic. Usage of the seqlock_t interface
37  * is not affected.
38  */
39 #define KCSAN_SEQLOCK_REGION_MAX 1000
40 
41 /*
42  * Sequence counters (seqcount_t)
43  *
44  * This is the raw counting mechanism, without any writer protection.
45  *
46  * Write side critical sections must be serialized and non-preemptible.
47  *
48  * If readers can be invoked from hardirq or softirq contexts,
49  * interrupts or bottom halves must also be respectively disabled before
50  * entering the write section.
51  *
52  * This mechanism can't be used if the protected data contains pointers,
53  * as the writer can invalidate a pointer that a reader is following.
54  *
55  * If the write serialization mechanism is one of the common kernel
56  * locking primitives, use a sequence counter with associated lock
57  * (seqcount_LOCKTYPE_t) instead.
58  *
59  * If it's desired to automatically handle the sequence counter writer
60  * serialization and non-preemptibility requirements, use a sequential
61  * lock (seqlock_t) instead.
62  *
63  * See Documentation/locking/seqlock.rst
64  */
65 typedef struct seqcount {
66 	unsigned sequence;
67 #ifdef CONFIG_DEBUG_LOCK_ALLOC
68 	struct lockdep_map dep_map;
69 #endif
70 } seqcount_t;
71 
72 static inline void __seqcount_init(seqcount_t *s, const char *name,
73 					  struct lock_class_key *key)
74 {
75 	/*
76 	 * Make sure we are not reinitializing a held lock:
77 	 */
78 	lockdep_init_map(&s->dep_map, name, key, 0);
79 	s->sequence = 0;
80 }
81 
82 #ifdef CONFIG_DEBUG_LOCK_ALLOC
83 # define SEQCOUNT_DEP_MAP_INIT(lockname) \
84 		.dep_map = { .name = #lockname } \
85 
86 /**
87  * seqcount_init() - runtime initializer for seqcount_t
88  * @s: Pointer to the seqcount_t instance
89  */
90 # define seqcount_init(s)				\
91 	do {						\
92 		static struct lock_class_key __key;	\
93 		__seqcount_init((s), #s, &__key);	\
94 	} while (0)
95 
96 static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
97 {
98 	seqcount_t *l = (seqcount_t *)s;
99 	unsigned long flags;
100 
101 	local_irq_save(flags);
102 	seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
103 	seqcount_release(&l->dep_map, _RET_IP_);
104 	local_irq_restore(flags);
105 }
106 
107 #else
108 # define SEQCOUNT_DEP_MAP_INIT(lockname)
109 # define seqcount_init(s) __seqcount_init(s, NULL, NULL)
110 # define seqcount_lockdep_reader_access(x)
111 #endif
112 
113 /**
114  * SEQCNT_ZERO() - static initializer for seqcount_t
115  * @name: Name of the seqcount_t instance
116  */
117 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) }
118 
119 /*
120  * Sequence counters with associated locks (seqcount_LOCKTYPE_t)
121  *
122  * A sequence counter which associates the lock used for writer
123  * serialization at initialization time. This enables lockdep to validate
124  * that the write side critical section is properly serialized.
125  *
126  * For associated locks which do not implicitly disable preemption,
127  * preemption protection is enforced in the write side function.
128  *
129  * Lockdep is never used in any for the raw write variants.
130  *
131  * See Documentation/locking/seqlock.rst
132  */
133 
134 #ifdef CONFIG_LOCKDEP
135 #define __SEQ_LOCKDEP(expr)	expr
136 #else
137 #define __SEQ_LOCKDEP(expr)
138 #endif
139 
140 #define SEQCOUNT_LOCKTYPE_ZERO(seq_name, assoc_lock) {			\
141 	.seqcount		= SEQCNT_ZERO(seq_name.seqcount),	\
142 	__SEQ_LOCKDEP(.lock	= (assoc_lock))				\
143 }
144 
145 #define seqcount_locktype_init(s, assoc_lock)				\
146 do {									\
147 	seqcount_init(&(s)->seqcount);					\
148 	__SEQ_LOCKDEP((s)->lock = (assoc_lock));			\
149 } while (0)
150 
151 /**
152  * typedef seqcount_spinlock_t - sequence counter with spinlock associated
153  * @seqcount:	The real sequence counter
154  * @lock:	Pointer to the associated spinlock
155  *
156  * A plain sequence counter with external writer synchronization by a
157  * spinlock. The spinlock is associated to the sequence count in the
158  * static initializer or init function. This enables lockdep to validate
159  * that the write side critical section is properly serialized.
160  */
161 typedef struct seqcount_spinlock {
162 	seqcount_t	seqcount;
163 	__SEQ_LOCKDEP(spinlock_t	*lock);
164 } seqcount_spinlock_t;
165 
166 /**
167  * SEQCNT_SPINLOCK_ZERO - static initializer for seqcount_spinlock_t
168  * @name:	Name of the seqcount_spinlock_t instance
169  * @lock:	Pointer to the associated spinlock
170  */
171 #define SEQCNT_SPINLOCK_ZERO(name, lock)				\
172 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
173 
174 /**
175  * seqcount_spinlock_init - runtime initializer for seqcount_spinlock_t
176  * @s:		Pointer to the seqcount_spinlock_t instance
177  * @lock:	Pointer to the associated spinlock
178  */
179 #define seqcount_spinlock_init(s, lock)					\
180 	seqcount_locktype_init(s, lock)
181 
182 /**
183  * typedef seqcount_raw_spinlock_t - sequence count with raw spinlock associated
184  * @seqcount:	The real sequence counter
185  * @lock:	Pointer to the associated raw spinlock
186  *
187  * A plain sequence counter with external writer synchronization by a
188  * raw spinlock. The raw spinlock is associated to the sequence count in
189  * the static initializer or init function. This enables lockdep to
190  * validate that the write side critical section is properly serialized.
191  */
192 typedef struct seqcount_raw_spinlock {
193 	seqcount_t      seqcount;
194 	__SEQ_LOCKDEP(raw_spinlock_t	*lock);
195 } seqcount_raw_spinlock_t;
196 
197 /**
198  * SEQCNT_RAW_SPINLOCK_ZERO - static initializer for seqcount_raw_spinlock_t
199  * @name:	Name of the seqcount_raw_spinlock_t instance
200  * @lock:	Pointer to the associated raw_spinlock
201  */
202 #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock)				\
203 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
204 
205 /**
206  * seqcount_raw_spinlock_init - runtime initializer for seqcount_raw_spinlock_t
207  * @s:		Pointer to the seqcount_raw_spinlock_t instance
208  * @lock:	Pointer to the associated raw_spinlock
209  */
210 #define seqcount_raw_spinlock_init(s, lock)				\
211 	seqcount_locktype_init(s, lock)
212 
213 /**
214  * typedef seqcount_rwlock_t - sequence count with rwlock associated
215  * @seqcount:	The real sequence counter
216  * @lock:	Pointer to the associated rwlock
217  *
218  * A plain sequence counter with external writer synchronization by a
219  * rwlock. The rwlock is associated to the sequence count in the static
220  * initializer or init function. This enables lockdep to validate that
221  * the write side critical section is properly serialized.
222  */
223 typedef struct seqcount_rwlock {
224 	seqcount_t      seqcount;
225 	__SEQ_LOCKDEP(rwlock_t		*lock);
226 } seqcount_rwlock_t;
227 
228 /**
229  * SEQCNT_RWLOCK_ZERO - static initializer for seqcount_rwlock_t
230  * @name:	Name of the seqcount_rwlock_t instance
231  * @lock:	Pointer to the associated rwlock
232  */
233 #define SEQCNT_RWLOCK_ZERO(name, lock)					\
234 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
235 
236 /**
237  * seqcount_rwlock_init - runtime initializer for seqcount_rwlock_t
238  * @s:		Pointer to the seqcount_rwlock_t instance
239  * @lock:	Pointer to the associated rwlock
240  */
241 #define seqcount_rwlock_init(s, lock)					\
242 	seqcount_locktype_init(s, lock)
243 
244 /**
245  * typedef seqcount_mutex_t - sequence count with mutex associated
246  * @seqcount:	The real sequence counter
247  * @lock:	Pointer to the associated mutex
248  *
249  * A plain sequence counter with external writer synchronization by a
250  * mutex. The mutex is associated to the sequence counter in the static
251  * initializer or init function. This enables lockdep to validate that
252  * the write side critical section is properly serialized.
253  *
254  * The write side API functions write_seqcount_begin()/end() automatically
255  * disable and enable preemption when used with seqcount_mutex_t.
256  */
257 typedef struct seqcount_mutex {
258 	seqcount_t      seqcount;
259 	__SEQ_LOCKDEP(struct mutex	*lock);
260 } seqcount_mutex_t;
261 
262 /**
263  * SEQCNT_MUTEX_ZERO - static initializer for seqcount_mutex_t
264  * @name:	Name of the seqcount_mutex_t instance
265  * @lock:	Pointer to the associated mutex
266  */
267 #define SEQCNT_MUTEX_ZERO(name, lock)					\
268 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
269 
270 /**
271  * seqcount_mutex_init - runtime initializer for seqcount_mutex_t
272  * @s:		Pointer to the seqcount_mutex_t instance
273  * @lock:	Pointer to the associated mutex
274  */
275 #define seqcount_mutex_init(s, lock)					\
276 	seqcount_locktype_init(s, lock)
277 
278 /**
279  * typedef seqcount_ww_mutex_t - sequence count with ww_mutex associated
280  * @seqcount:	The real sequence counter
281  * @lock:	Pointer to the associated ww_mutex
282  *
283  * A plain sequence counter with external writer synchronization by a
284  * ww_mutex. The ww_mutex is associated to the sequence counter in the static
285  * initializer or init function. This enables lockdep to validate that
286  * the write side critical section is properly serialized.
287  *
288  * The write side API functions write_seqcount_begin()/end() automatically
289  * disable and enable preemption when used with seqcount_ww_mutex_t.
290  */
291 typedef struct seqcount_ww_mutex {
292 	seqcount_t      seqcount;
293 	__SEQ_LOCKDEP(struct ww_mutex	*lock);
294 } seqcount_ww_mutex_t;
295 
296 /**
297  * SEQCNT_WW_MUTEX_ZERO - static initializer for seqcount_ww_mutex_t
298  * @name:	Name of the seqcount_ww_mutex_t instance
299  * @lock:	Pointer to the associated ww_mutex
300  */
301 #define SEQCNT_WW_MUTEX_ZERO(name, lock)				\
302 	SEQCOUNT_LOCKTYPE_ZERO(name, lock)
303 
304 /**
305  * seqcount_ww_mutex_init - runtime initializer for seqcount_ww_mutex_t
306  * @s:		Pointer to the seqcount_ww_mutex_t instance
307  * @lock:	Pointer to the associated ww_mutex
308  */
309 #define seqcount_ww_mutex_init(s, lock)					\
310 	seqcount_locktype_init(s, lock)
311 
312 /*
313  * @preempt: Is the associated write serialization lock preemtpible?
314  */
315 #define SEQCOUNT_LOCKTYPE(locktype, preempt, lockmember)		\
316 static inline seqcount_t *						\
317 __seqcount_##locktype##_ptr(seqcount_##locktype##_t *s)			\
318 {									\
319 	return &s->seqcount;						\
320 }									\
321 									\
322 static inline bool							\
323 __seqcount_##locktype##_preemptible(seqcount_##locktype##_t *s)		\
324 {									\
325 	return preempt;							\
326 }									\
327 									\
328 static inline void							\
329 __seqcount_##locktype##_assert(seqcount_##locktype##_t *s)		\
330 {									\
331 	__SEQ_LOCKDEP(lockdep_assert_held(lockmember));			\
332 }
333 
334 /*
335  * Similar hooks, but for plain seqcount_t
336  */
337 
338 static inline seqcount_t *__seqcount_ptr(seqcount_t *s)
339 {
340 	return s;
341 }
342 
343 static inline bool __seqcount_preemptible(seqcount_t *s)
344 {
345 	return false;
346 }
347 
348 static inline void __seqcount_assert(seqcount_t *s)
349 {
350 	lockdep_assert_preemption_disabled();
351 }
352 
353 /*
354  * @s: Pointer to seqcount_locktype_t, generated hooks first parameter.
355  */
356 SEQCOUNT_LOCKTYPE(raw_spinlock,	false,	s->lock)
357 SEQCOUNT_LOCKTYPE(spinlock,	false,	s->lock)
358 SEQCOUNT_LOCKTYPE(rwlock,	false,	s->lock)
359 SEQCOUNT_LOCKTYPE(mutex,	true,	s->lock)
360 SEQCOUNT_LOCKTYPE(ww_mutex,	true,	&s->lock->base)
361 
362 #define __seqprop_case(s, locktype, prop)				\
363 	seqcount_##locktype##_t: __seqcount_##locktype##_##prop((void *)(s))
364 
365 #define __seqprop(s, prop) _Generic(*(s),				\
366 	seqcount_t:		__seqcount_##prop((void *)(s)),		\
367 	__seqprop_case((s),	raw_spinlock,	prop),			\
368 	__seqprop_case((s),	spinlock,	prop),			\
369 	__seqprop_case((s),	rwlock,		prop),			\
370 	__seqprop_case((s),	mutex,		prop),			\
371 	__seqprop_case((s),	ww_mutex,	prop))
372 
373 #define __to_seqcount_t(s)				__seqprop(s, ptr)
374 #define __associated_lock_exists_and_is_preemptible(s)	__seqprop(s, preemptible)
375 #define __assert_write_section_is_protected(s)		__seqprop(s, assert)
376 
377 /**
378  * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
379  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
380  *
381  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
382  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
383  * provided before actually loading any of the variables that are to be
384  * protected in this critical section.
385  *
386  * Use carefully, only in critical code, and comment how the barrier is
387  * provided.
388  *
389  * Return: count to be passed to read_seqcount_retry()
390  */
391 #define __read_seqcount_begin(s)					\
392 	__read_seqcount_t_begin(__to_seqcount_t(s))
393 
394 static inline unsigned __read_seqcount_t_begin(const seqcount_t *s)
395 {
396 	unsigned ret;
397 
398 repeat:
399 	ret = READ_ONCE(s->sequence);
400 	if (unlikely(ret & 1)) {
401 		cpu_relax();
402 		goto repeat;
403 	}
404 	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
405 	return ret;
406 }
407 
408 /**
409  * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep
410  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
411  *
412  * Return: count to be passed to read_seqcount_retry()
413  */
414 #define raw_read_seqcount_begin(s)					\
415 	raw_read_seqcount_t_begin(__to_seqcount_t(s))
416 
417 static inline unsigned raw_read_seqcount_t_begin(const seqcount_t *s)
418 {
419 	unsigned ret = __read_seqcount_t_begin(s);
420 	smp_rmb();
421 	return ret;
422 }
423 
424 /**
425  * read_seqcount_begin() - begin a seqcount_t read critical section
426  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
427  *
428  * Return: count to be passed to read_seqcount_retry()
429  */
430 #define read_seqcount_begin(s)						\
431 	read_seqcount_t_begin(__to_seqcount_t(s))
432 
433 static inline unsigned read_seqcount_t_begin(const seqcount_t *s)
434 {
435 	seqcount_lockdep_reader_access(s);
436 	return raw_read_seqcount_t_begin(s);
437 }
438 
439 /**
440  * raw_read_seqcount() - read the raw seqcount_t counter value
441  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
442  *
443  * raw_read_seqcount opens a read critical section of the given
444  * seqcount_t, without any lockdep checking, and without checking or
445  * masking the sequence counter LSB. Calling code is responsible for
446  * handling that.
447  *
448  * Return: count to be passed to read_seqcount_retry()
449  */
450 #define raw_read_seqcount(s)						\
451 	raw_read_seqcount_t(__to_seqcount_t(s))
452 
453 static inline unsigned raw_read_seqcount_t(const seqcount_t *s)
454 {
455 	unsigned ret = READ_ONCE(s->sequence);
456 	smp_rmb();
457 	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
458 	return ret;
459 }
460 
461 /**
462  * raw_seqcount_begin() - begin a seqcount_t read critical section w/o
463  *                        lockdep and w/o counter stabilization
464  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
465  *
466  * raw_seqcount_begin opens a read critical section of the given
467  * seqcount_t. Unlike read_seqcount_begin(), this function will not wait
468  * for the count to stabilize. If a writer is active when it begins, it
469  * will fail the read_seqcount_retry() at the end of the read critical
470  * section instead of stabilizing at the beginning of it.
471  *
472  * Use this only in special kernel hot paths where the read section is
473  * small and has a high probability of success through other external
474  * means. It will save a single branching instruction.
475  *
476  * Return: count to be passed to read_seqcount_retry()
477  */
478 #define raw_seqcount_begin(s)						\
479 	raw_seqcount_t_begin(__to_seqcount_t(s))
480 
481 static inline unsigned raw_seqcount_t_begin(const seqcount_t *s)
482 {
483 	/*
484 	 * If the counter is odd, let read_seqcount_retry() fail
485 	 * by decrementing the counter.
486 	 */
487 	return raw_read_seqcount_t(s) & ~1;
488 }
489 
490 /**
491  * __read_seqcount_retry() - end a seqcount_t read section w/o barrier
492  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
493  * @start: count, from read_seqcount_begin()
494  *
495  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
496  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
497  * provided before actually loading any of the variables that are to be
498  * protected in this critical section.
499  *
500  * Use carefully, only in critical code, and comment how the barrier is
501  * provided.
502  *
503  * Return: true if a read section retry is required, else false
504  */
505 #define __read_seqcount_retry(s, start)					\
506 	__read_seqcount_t_retry(__to_seqcount_t(s), start)
507 
508 static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
509 {
510 	kcsan_atomic_next(0);
511 	return unlikely(READ_ONCE(s->sequence) != start);
512 }
513 
514 /**
515  * read_seqcount_retry() - end a seqcount_t read critical section
516  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
517  * @start: count, from read_seqcount_begin()
518  *
519  * read_seqcount_retry closes the read critical section of given
520  * seqcount_t.  If the critical section was invalid, it must be ignored
521  * (and typically retried).
522  *
523  * Return: true if a read section retry is required, else false
524  */
525 #define read_seqcount_retry(s, start)					\
526 	read_seqcount_t_retry(__to_seqcount_t(s), start)
527 
528 static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
529 {
530 	smp_rmb();
531 	return __read_seqcount_t_retry(s, start);
532 }
533 
534 /**
535  * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
536  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
537  */
538 #define raw_write_seqcount_begin(s)					\
539 do {									\
540 	if (__associated_lock_exists_and_is_preemptible(s))		\
541 		preempt_disable();					\
542 									\
543 	raw_write_seqcount_t_begin(__to_seqcount_t(s));			\
544 } while (0)
545 
546 static inline void raw_write_seqcount_t_begin(seqcount_t *s)
547 {
548 	kcsan_nestable_atomic_begin();
549 	s->sequence++;
550 	smp_wmb();
551 }
552 
553 /**
554  * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
555  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
556  */
557 #define raw_write_seqcount_end(s)					\
558 do {									\
559 	raw_write_seqcount_t_end(__to_seqcount_t(s));			\
560 									\
561 	if (__associated_lock_exists_and_is_preemptible(s))		\
562 		preempt_enable();					\
563 } while (0)
564 
565 static inline void raw_write_seqcount_t_end(seqcount_t *s)
566 {
567 	smp_wmb();
568 	s->sequence++;
569 	kcsan_nestable_atomic_end();
570 }
571 
572 /**
573  * write_seqcount_begin_nested() - start a seqcount_t write section with
574  *                                 custom lockdep nesting level
575  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
576  * @subclass: lockdep nesting level
577  *
578  * See Documentation/locking/lockdep-design.rst
579  */
580 #define write_seqcount_begin_nested(s, subclass)			\
581 do {									\
582 	__assert_write_section_is_protected(s);				\
583 									\
584 	if (__associated_lock_exists_and_is_preemptible(s))		\
585 		preempt_disable();					\
586 									\
587 	write_seqcount_t_begin_nested(__to_seqcount_t(s), subclass);	\
588 } while (0)
589 
590 static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
591 {
592 	raw_write_seqcount_t_begin(s);
593 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
594 }
595 
596 /**
597  * write_seqcount_begin() - start a seqcount_t write side critical section
598  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
599  *
600  * write_seqcount_begin opens a write side critical section of the given
601  * seqcount_t.
602  *
603  * Context: seqcount_t write side critical sections must be serialized and
604  * non-preemptible. If readers can be invoked from hardirq or softirq
605  * context, interrupts or bottom halves must be respectively disabled.
606  */
607 #define write_seqcount_begin(s)						\
608 do {									\
609 	__assert_write_section_is_protected(s);				\
610 									\
611 	if (__associated_lock_exists_and_is_preemptible(s))		\
612 		preempt_disable();					\
613 									\
614 	write_seqcount_t_begin(__to_seqcount_t(s));			\
615 } while (0)
616 
617 static inline void write_seqcount_t_begin(seqcount_t *s)
618 {
619 	write_seqcount_t_begin_nested(s, 0);
620 }
621 
622 /**
623  * write_seqcount_end() - end a seqcount_t write side critical section
624  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
625  *
626  * The write section must've been opened with write_seqcount_begin().
627  */
628 #define write_seqcount_end(s)						\
629 do {									\
630 	write_seqcount_t_end(__to_seqcount_t(s));			\
631 									\
632 	if (__associated_lock_exists_and_is_preemptible(s))		\
633 		preempt_enable();					\
634 } while (0)
635 
636 static inline void write_seqcount_t_end(seqcount_t *s)
637 {
638 	seqcount_release(&s->dep_map, _RET_IP_);
639 	raw_write_seqcount_t_end(s);
640 }
641 
642 /**
643  * raw_write_seqcount_barrier() - do a seqcount_t write barrier
644  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
645  *
646  * This can be used to provide an ordering guarantee instead of the usual
647  * consistency guarantee. It is one wmb cheaper, because it can collapse
648  * the two back-to-back wmb()s.
649  *
650  * Note that writes surrounding the barrier should be declared atomic (e.g.
651  * via WRITE_ONCE): a) to ensure the writes become visible to other threads
652  * atomically, avoiding compiler optimizations; b) to document which writes are
653  * meant to propagate to the reader critical section. This is necessary because
654  * neither writes before and after the barrier are enclosed in a seq-writer
655  * critical section that would ensure readers are aware of ongoing writes::
656  *
657  *	seqcount_t seq;
658  *	bool X = true, Y = false;
659  *
660  *	void read(void)
661  *	{
662  *		bool x, y;
663  *
664  *		do {
665  *			int s = read_seqcount_begin(&seq);
666  *
667  *			x = X; y = Y;
668  *
669  *		} while (read_seqcount_retry(&seq, s));
670  *
671  *		BUG_ON(!x && !y);
672  *      }
673  *
674  *      void write(void)
675  *      {
676  *		WRITE_ONCE(Y, true);
677  *
678  *		raw_write_seqcount_barrier(seq);
679  *
680  *		WRITE_ONCE(X, false);
681  *      }
682  */
683 #define raw_write_seqcount_barrier(s)					\
684 	raw_write_seqcount_t_barrier(__to_seqcount_t(s))
685 
686 static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
687 {
688 	kcsan_nestable_atomic_begin();
689 	s->sequence++;
690 	smp_wmb();
691 	s->sequence++;
692 	kcsan_nestable_atomic_end();
693 }
694 
695 /**
696  * write_seqcount_invalidate() - invalidate in-progress seqcount_t read
697  *                               side operations
698  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
699  *
700  * After write_seqcount_invalidate, no seqcount_t read side operations
701  * will complete successfully and see data older than this.
702  */
703 #define write_seqcount_invalidate(s)					\
704 	write_seqcount_t_invalidate(__to_seqcount_t(s))
705 
706 static inline void write_seqcount_t_invalidate(seqcount_t *s)
707 {
708 	smp_wmb();
709 	kcsan_nestable_atomic_begin();
710 	s->sequence+=2;
711 	kcsan_nestable_atomic_end();
712 }
713 
714 /**
715  * raw_read_seqcount_latch() - pick even/odd seqcount_t latch data copy
716  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
717  *
718  * Use seqcount_t latching to switch between two storage places protected
719  * by a sequence counter. Doing so allows having interruptible, preemptible,
720  * seqcount_t write side critical sections.
721  *
722  * Check raw_write_seqcount_latch() for more details and a full reader and
723  * writer usage example.
724  *
725  * Return: sequence counter raw value. Use the lowest bit as an index for
726  * picking which data copy to read. The full counter value must then be
727  * checked with read_seqcount_retry().
728  */
729 #define raw_read_seqcount_latch(s)					\
730 	raw_read_seqcount_t_latch(__to_seqcount_t(s))
731 
732 static inline int raw_read_seqcount_t_latch(seqcount_t *s)
733 {
734 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
735 	int seq = READ_ONCE(s->sequence); /* ^^^ */
736 	return seq;
737 }
738 
739 /**
740  * raw_write_seqcount_latch() - redirect readers to even/odd copy
741  * @s: Pointer to seqcount_t or any of the seqcount_locktype_t variants
742  *
743  * The latch technique is a multiversion concurrency control method that allows
744  * queries during non-atomic modifications. If you can guarantee queries never
745  * interrupt the modification -- e.g. the concurrency is strictly between CPUs
746  * -- you most likely do not need this.
747  *
748  * Where the traditional RCU/lockless data structures rely on atomic
749  * modifications to ensure queries observe either the old or the new state the
750  * latch allows the same for non-atomic updates. The trade-off is doubling the
751  * cost of storage; we have to maintain two copies of the entire data
752  * structure.
753  *
754  * Very simply put: we first modify one copy and then the other. This ensures
755  * there is always one copy in a stable state, ready to give us an answer.
756  *
757  * The basic form is a data structure like::
758  *
759  *	struct latch_struct {
760  *		seqcount_t		seq;
761  *		struct data_struct	data[2];
762  *	};
763  *
764  * Where a modification, which is assumed to be externally serialized, does the
765  * following::
766  *
767  *	void latch_modify(struct latch_struct *latch, ...)
768  *	{
769  *		smp_wmb();	// Ensure that the last data[1] update is visible
770  *		latch->seq++;
771  *		smp_wmb();	// Ensure that the seqcount update is visible
772  *
773  *		modify(latch->data[0], ...);
774  *
775  *		smp_wmb();	// Ensure that the data[0] update is visible
776  *		latch->seq++;
777  *		smp_wmb();	// Ensure that the seqcount update is visible
778  *
779  *		modify(latch->data[1], ...);
780  *	}
781  *
782  * The query will have a form like::
783  *
784  *	struct entry *latch_query(struct latch_struct *latch, ...)
785  *	{
786  *		struct entry *entry;
787  *		unsigned seq, idx;
788  *
789  *		do {
790  *			seq = raw_read_seqcount_latch(&latch->seq);
791  *
792  *			idx = seq & 0x01;
793  *			entry = data_query(latch->data[idx], ...);
794  *
795  *		// read_seqcount_retry() includes needed smp_rmb()
796  *		} while (read_seqcount_retry(&latch->seq, seq));
797  *
798  *		return entry;
799  *	}
800  *
801  * So during the modification, queries are first redirected to data[1]. Then we
802  * modify data[0]. When that is complete, we redirect queries back to data[0]
803  * and we can modify data[1].
804  *
805  * NOTE:
806  *
807  *	The non-requirement for atomic modifications does _NOT_ include
808  *	the publishing of new entries in the case where data is a dynamic
809  *	data structure.
810  *
811  *	An iteration might start in data[0] and get suspended long enough
812  *	to miss an entire modification sequence, once it resumes it might
813  *	observe the new entry.
814  *
815  * NOTE:
816  *
817  *	When data is a dynamic data structure; one should use regular RCU
818  *	patterns to manage the lifetimes of the objects within.
819  */
820 #define raw_write_seqcount_latch(s)					\
821 	raw_write_seqcount_t_latch(__to_seqcount_t(s))
822 
823 static inline void raw_write_seqcount_t_latch(seqcount_t *s)
824 {
825        smp_wmb();      /* prior stores before incrementing "sequence" */
826        s->sequence++;
827        smp_wmb();      /* increment "sequence" before following stores */
828 }
829 
830 /*
831  * Sequential locks (seqlock_t)
832  *
833  * Sequence counters with an embedded spinlock for writer serialization
834  * and non-preemptibility.
835  *
836  * For more info, see:
837  *    - Comments on top of seqcount_t
838  *    - Documentation/locking/seqlock.rst
839  */
840 typedef struct {
841 	struct seqcount seqcount;
842 	spinlock_t lock;
843 } seqlock_t;
844 
845 #define __SEQLOCK_UNLOCKED(lockname)			\
846 	{						\
847 		.seqcount = SEQCNT_ZERO(lockname),	\
848 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
849 	}
850 
851 /**
852  * seqlock_init() - dynamic initializer for seqlock_t
853  * @sl: Pointer to the seqlock_t instance
854  */
855 #define seqlock_init(sl)				\
856 	do {						\
857 		seqcount_init(&(sl)->seqcount);		\
858 		spin_lock_init(&(sl)->lock);		\
859 	} while (0)
860 
861 /**
862  * DEFINE_SEQLOCK() - Define a statically allocated seqlock_t
863  * @sl: Name of the seqlock_t instance
864  */
865 #define DEFINE_SEQLOCK(sl) \
866 		seqlock_t sl = __SEQLOCK_UNLOCKED(sl)
867 
868 /**
869  * read_seqbegin() - start a seqlock_t read side critical section
870  * @sl: Pointer to seqlock_t
871  *
872  * Return: count, to be passed to read_seqretry()
873  */
874 static inline unsigned read_seqbegin(const seqlock_t *sl)
875 {
876 	unsigned ret = read_seqcount_begin(&sl->seqcount);
877 
878 	kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() */
879 	kcsan_flat_atomic_begin();
880 	return ret;
881 }
882 
883 /**
884  * read_seqretry() - end a seqlock_t read side section
885  * @sl: Pointer to seqlock_t
886  * @start: count, from read_seqbegin()
887  *
888  * read_seqretry closes the read side critical section of given seqlock_t.
889  * If the critical section was invalid, it must be ignored (and typically
890  * retried).
891  *
892  * Return: true if a read section retry is required, else false
893  */
894 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
895 {
896 	/*
897 	 * Assume not nested: read_seqretry() may be called multiple times when
898 	 * completing read critical section.
899 	 */
900 	kcsan_flat_atomic_end();
901 
902 	return read_seqcount_retry(&sl->seqcount, start);
903 }
904 
905 /**
906  * write_seqlock() - start a seqlock_t write side critical section
907  * @sl: Pointer to seqlock_t
908  *
909  * write_seqlock opens a write side critical section for the given
910  * seqlock_t.  It also implicitly acquires the spinlock_t embedded inside
911  * that sequential lock. All seqlock_t write side sections are thus
912  * automatically serialized and non-preemptible.
913  *
914  * Context: if the seqlock_t read section, or other write side critical
915  * sections, can be invoked from hardirq or softirq contexts, use the
916  * _irqsave or _bh variants of this function instead.
917  */
918 static inline void write_seqlock(seqlock_t *sl)
919 {
920 	spin_lock(&sl->lock);
921 	write_seqcount_t_begin(&sl->seqcount);
922 }
923 
924 /**
925  * write_sequnlock() - end a seqlock_t write side critical section
926  * @sl: Pointer to seqlock_t
927  *
928  * write_sequnlock closes the (serialized and non-preemptible) write side
929  * critical section of given seqlock_t.
930  */
931 static inline void write_sequnlock(seqlock_t *sl)
932 {
933 	write_seqcount_t_end(&sl->seqcount);
934 	spin_unlock(&sl->lock);
935 }
936 
937 /**
938  * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section
939  * @sl: Pointer to seqlock_t
940  *
941  * _bh variant of write_seqlock(). Use only if the read side section, or
942  * other write side sections, can be invoked from softirq contexts.
943  */
944 static inline void write_seqlock_bh(seqlock_t *sl)
945 {
946 	spin_lock_bh(&sl->lock);
947 	write_seqcount_t_begin(&sl->seqcount);
948 }
949 
950 /**
951  * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section
952  * @sl: Pointer to seqlock_t
953  *
954  * write_sequnlock_bh closes the serialized, non-preemptible, and
955  * softirqs-disabled, seqlock_t write side critical section opened with
956  * write_seqlock_bh().
957  */
958 static inline void write_sequnlock_bh(seqlock_t *sl)
959 {
960 	write_seqcount_t_end(&sl->seqcount);
961 	spin_unlock_bh(&sl->lock);
962 }
963 
964 /**
965  * write_seqlock_irq() - start a non-interruptible seqlock_t write section
966  * @sl: Pointer to seqlock_t
967  *
968  * _irq variant of write_seqlock(). Use only if the read side section, or
969  * other write sections, can be invoked from hardirq contexts.
970  */
971 static inline void write_seqlock_irq(seqlock_t *sl)
972 {
973 	spin_lock_irq(&sl->lock);
974 	write_seqcount_t_begin(&sl->seqcount);
975 }
976 
977 /**
978  * write_sequnlock_irq() - end a non-interruptible seqlock_t write section
979  * @sl: Pointer to seqlock_t
980  *
981  * write_sequnlock_irq closes the serialized and non-interruptible
982  * seqlock_t write side section opened with write_seqlock_irq().
983  */
984 static inline void write_sequnlock_irq(seqlock_t *sl)
985 {
986 	write_seqcount_t_end(&sl->seqcount);
987 	spin_unlock_irq(&sl->lock);
988 }
989 
990 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
991 {
992 	unsigned long flags;
993 
994 	spin_lock_irqsave(&sl->lock, flags);
995 	write_seqcount_t_begin(&sl->seqcount);
996 	return flags;
997 }
998 
999 /**
1000  * write_seqlock_irqsave() - start a non-interruptible seqlock_t write
1001  *                           section
1002  * @lock:  Pointer to seqlock_t
1003  * @flags: Stack-allocated storage for saving caller's local interrupt
1004  *         state, to be passed to write_sequnlock_irqrestore().
1005  *
1006  * _irqsave variant of write_seqlock(). Use it only if the read side
1007  * section, or other write sections, can be invoked from hardirq context.
1008  */
1009 #define write_seqlock_irqsave(lock, flags)				\
1010 	do { flags = __write_seqlock_irqsave(lock); } while (0)
1011 
1012 /**
1013  * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write
1014  *                                section
1015  * @sl:    Pointer to seqlock_t
1016  * @flags: Caller's saved interrupt state, from write_seqlock_irqsave()
1017  *
1018  * write_sequnlock_irqrestore closes the serialized and non-interruptible
1019  * seqlock_t write section previously opened with write_seqlock_irqsave().
1020  */
1021 static inline void
1022 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
1023 {
1024 	write_seqcount_t_end(&sl->seqcount);
1025 	spin_unlock_irqrestore(&sl->lock, flags);
1026 }
1027 
1028 /**
1029  * read_seqlock_excl() - begin a seqlock_t locking reader section
1030  * @sl:	Pointer to seqlock_t
1031  *
1032  * read_seqlock_excl opens a seqlock_t locking reader critical section.  A
1033  * locking reader exclusively locks out *both* other writers *and* other
1034  * locking readers, but it does not update the embedded sequence number.
1035  *
1036  * Locking readers act like a normal spin_lock()/spin_unlock().
1037  *
1038  * Context: if the seqlock_t write section, *or other read sections*, can
1039  * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
1040  * variant of this function instead.
1041  *
1042  * The opened read section must be closed with read_sequnlock_excl().
1043  */
1044 static inline void read_seqlock_excl(seqlock_t *sl)
1045 {
1046 	spin_lock(&sl->lock);
1047 }
1048 
1049 /**
1050  * read_sequnlock_excl() - end a seqlock_t locking reader critical section
1051  * @sl: Pointer to seqlock_t
1052  */
1053 static inline void read_sequnlock_excl(seqlock_t *sl)
1054 {
1055 	spin_unlock(&sl->lock);
1056 }
1057 
1058 /**
1059  * read_seqlock_excl_bh() - start a seqlock_t locking reader section with
1060  *			    softirqs disabled
1061  * @sl: Pointer to seqlock_t
1062  *
1063  * _bh variant of read_seqlock_excl(). Use this variant only if the
1064  * seqlock_t write side section, *or other read sections*, can be invoked
1065  * from softirq contexts.
1066  */
1067 static inline void read_seqlock_excl_bh(seqlock_t *sl)
1068 {
1069 	spin_lock_bh(&sl->lock);
1070 }
1071 
1072 /**
1073  * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking
1074  *			      reader section
1075  * @sl: Pointer to seqlock_t
1076  */
1077 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
1078 {
1079 	spin_unlock_bh(&sl->lock);
1080 }
1081 
1082 /**
1083  * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking
1084  *			     reader section
1085  * @sl: Pointer to seqlock_t
1086  *
1087  * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t
1088  * write side section, *or other read sections*, can be invoked from a
1089  * hardirq context.
1090  */
1091 static inline void read_seqlock_excl_irq(seqlock_t *sl)
1092 {
1093 	spin_lock_irq(&sl->lock);
1094 }
1095 
1096 /**
1097  * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t
1098  *                             locking reader section
1099  * @sl: Pointer to seqlock_t
1100  */
1101 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
1102 {
1103 	spin_unlock_irq(&sl->lock);
1104 }
1105 
1106 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
1107 {
1108 	unsigned long flags;
1109 
1110 	spin_lock_irqsave(&sl->lock, flags);
1111 	return flags;
1112 }
1113 
1114 /**
1115  * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t
1116  *				 locking reader section
1117  * @lock:  Pointer to seqlock_t
1118  * @flags: Stack-allocated storage for saving caller's local interrupt
1119  *         state, to be passed to read_sequnlock_excl_irqrestore().
1120  *
1121  * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t
1122  * write side section, *or other read sections*, can be invoked from a
1123  * hardirq context.
1124  */
1125 #define read_seqlock_excl_irqsave(lock, flags)				\
1126 	do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
1127 
1128 /**
1129  * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t
1130  *				      locking reader section
1131  * @sl:    Pointer to seqlock_t
1132  * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave()
1133  */
1134 static inline void
1135 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
1136 {
1137 	spin_unlock_irqrestore(&sl->lock, flags);
1138 }
1139 
1140 /**
1141  * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader
1142  * @lock: Pointer to seqlock_t
1143  * @seq : Marker and return parameter. If the passed value is even, the
1144  * reader will become a *lockless* seqlock_t reader as in read_seqbegin().
1145  * If the passed value is odd, the reader will become a *locking* reader
1146  * as in read_seqlock_excl().  In the first call to this function, the
1147  * caller *must* initialize and pass an even value to @seq; this way, a
1148  * lockless read can be optimistically tried first.
1149  *
1150  * read_seqbegin_or_lock is an API designed to optimistically try a normal
1151  * lockless seqlock_t read section first.  If an odd counter is found, the
1152  * lockless read trial has failed, and the next read iteration transforms
1153  * itself into a full seqlock_t locking reader.
1154  *
1155  * This is typically used to avoid seqlock_t lockless readers starvation
1156  * (too much retry loops) in the case of a sharp spike in write side
1157  * activity.
1158  *
1159  * Context: if the seqlock_t write section, *or other read sections*, can
1160  * be invoked from hardirq or softirq contexts, use the _irqsave or _bh
1161  * variant of this function instead.
1162  *
1163  * Check Documentation/locking/seqlock.rst for template example code.
1164  *
1165  * Return: the encountered sequence counter value, through the @seq
1166  * parameter, which is overloaded as a return parameter. This returned
1167  * value must be checked with need_seqretry(). If the read section need to
1168  * be retried, this returned value must also be passed as the @seq
1169  * parameter of the next read_seqbegin_or_lock() iteration.
1170  */
1171 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
1172 {
1173 	if (!(*seq & 1))	/* Even */
1174 		*seq = read_seqbegin(lock);
1175 	else			/* Odd */
1176 		read_seqlock_excl(lock);
1177 }
1178 
1179 /**
1180  * need_seqretry() - validate seqlock_t "locking or lockless" read section
1181  * @lock: Pointer to seqlock_t
1182  * @seq: sequence count, from read_seqbegin_or_lock()
1183  *
1184  * Return: true if a read section retry is required, false otherwise
1185  */
1186 static inline int need_seqretry(seqlock_t *lock, int seq)
1187 {
1188 	return !(seq & 1) && read_seqretry(lock, seq);
1189 }
1190 
1191 /**
1192  * done_seqretry() - end seqlock_t "locking or lockless" reader section
1193  * @lock: Pointer to seqlock_t
1194  * @seq: count, from read_seqbegin_or_lock()
1195  *
1196  * done_seqretry finishes the seqlock_t read side critical section started
1197  * with read_seqbegin_or_lock() and validated by need_seqretry().
1198  */
1199 static inline void done_seqretry(seqlock_t *lock, int seq)
1200 {
1201 	if (seq & 1)
1202 		read_sequnlock_excl(lock);
1203 }
1204 
1205 /**
1206  * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or
1207  *                                   a non-interruptible locking reader
1208  * @lock: Pointer to seqlock_t
1209  * @seq:  Marker and return parameter. Check read_seqbegin_or_lock().
1210  *
1211  * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if
1212  * the seqlock_t write section, *or other read sections*, can be invoked
1213  * from hardirq context.
1214  *
1215  * Note: Interrupts will be disabled only for "locking reader" mode.
1216  *
1217  * Return:
1218  *
1219  *   1. The saved local interrupts state in case of a locking reader, to
1220  *      be passed to done_seqretry_irqrestore().
1221  *
1222  *   2. The encountered sequence counter value, returned through @seq
1223  *      overloaded as a return parameter. Check read_seqbegin_or_lock().
1224  */
1225 static inline unsigned long
1226 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
1227 {
1228 	unsigned long flags = 0;
1229 
1230 	if (!(*seq & 1))	/* Even */
1231 		*seq = read_seqbegin(lock);
1232 	else			/* Odd */
1233 		read_seqlock_excl_irqsave(lock, flags);
1234 
1235 	return flags;
1236 }
1237 
1238 /**
1239  * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a
1240  *				non-interruptible locking reader section
1241  * @lock:  Pointer to seqlock_t
1242  * @seq:   Count, from read_seqbegin_or_lock_irqsave()
1243  * @flags: Caller's saved local interrupt state in case of a locking
1244  *	   reader, also from read_seqbegin_or_lock_irqsave()
1245  *
1246  * This is the _irqrestore variant of done_seqretry(). The read section
1247  * must've been opened with read_seqbegin_or_lock_irqsave(), and validated
1248  * by need_seqretry().
1249  */
1250 static inline void
1251 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
1252 {
1253 	if (seq & 1)
1254 		read_sequnlock_excl_irqrestore(lock, flags);
1255 }
1256 #endif /* __LINUX_SEQLOCK_H */
1257