xref: /linux-6.15/include/linux/seqlock.h (revision 0d24f65e)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef __LINUX_SEQLOCK_H
3 #define __LINUX_SEQLOCK_H
4 
5 /*
6  * seqcount_t / seqlock_t - a reader-writer consistency mechanism with
7  * lockless readers (read-only retry loops), and no writer starvation.
8  *
9  * See Documentation/locking/seqlock.rst
10  *
11  * Copyrights:
12  * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli
13  */
14 
15 #include <linux/spinlock.h>
16 #include <linux/preempt.h>
17 #include <linux/lockdep.h>
18 #include <linux/compiler.h>
19 #include <linux/kcsan-checks.h>
20 #include <asm/processor.h>
21 
22 /*
23  * The seqlock seqcount_t interface does not prescribe a precise sequence of
24  * read begin/retry/end. For readers, typically there is a call to
25  * read_seqcount_begin() and read_seqcount_retry(), however, there are more
26  * esoteric cases which do not follow this pattern.
27  *
28  * As a consequence, we take the following best-effort approach for raw usage
29  * via seqcount_t under KCSAN: upon beginning a seq-reader critical section,
30  * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as
31  * atomics; if there is a matching read_seqcount_retry() call, no following
32  * memory operations are considered atomic. Usage of the seqlock_t interface
33  * is not affected.
34  */
35 #define KCSAN_SEQLOCK_REGION_MAX 1000
36 
37 /*
38  * Sequence counters (seqcount_t)
39  *
40  * This is the raw counting mechanism, without any writer protection.
41  *
42  * Write side critical sections must be serialized and non-preemptible.
43  *
44  * If readers can be invoked from hardirq or softirq contexts,
45  * interrupts or bottom halves must also be respectively disabled before
46  * entering the write section.
47  *
48  * This mechanism can't be used if the protected data contains pointers,
49  * as the writer can invalidate a pointer that a reader is following.
50  *
51  * If it's desired to automatically handle the sequence counter writer
52  * serialization and non-preemptibility requirements, use a sequential
53  * lock (seqlock_t) instead.
54  *
55  * See Documentation/locking/seqlock.rst
56  */
57 typedef struct seqcount {
58 	unsigned sequence;
59 #ifdef CONFIG_DEBUG_LOCK_ALLOC
60 	struct lockdep_map dep_map;
61 #endif
62 } seqcount_t;
63 
64 static inline void __seqcount_init(seqcount_t *s, const char *name,
65 					  struct lock_class_key *key)
66 {
67 	/*
68 	 * Make sure we are not reinitializing a held lock:
69 	 */
70 	lockdep_init_map(&s->dep_map, name, key, 0);
71 	s->sequence = 0;
72 }
73 
74 #ifdef CONFIG_DEBUG_LOCK_ALLOC
75 # define SEQCOUNT_DEP_MAP_INIT(lockname) \
76 		.dep_map = { .name = #lockname } \
77 
78 # define seqcount_init(s)				\
79 	do {						\
80 		static struct lock_class_key __key;	\
81 		__seqcount_init((s), #s, &__key);	\
82 	} while (0)
83 
84 static inline void seqcount_lockdep_reader_access(const seqcount_t *s)
85 {
86 	seqcount_t *l = (seqcount_t *)s;
87 	unsigned long flags;
88 
89 	local_irq_save(flags);
90 	seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_);
91 	seqcount_release(&l->dep_map, _RET_IP_);
92 	local_irq_restore(flags);
93 }
94 
95 #else
96 # define SEQCOUNT_DEP_MAP_INIT(lockname)
97 # define seqcount_init(s) __seqcount_init(s, NULL, NULL)
98 # define seqcount_lockdep_reader_access(x)
99 #endif
100 
101 #define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)}
102 
103 
104 /**
105  * __read_seqcount_begin - begin a seq-read critical section (without barrier)
106  * @s: pointer to seqcount_t
107  * Returns: count to be passed to read_seqcount_retry
108  *
109  * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
110  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
111  * provided before actually loading any of the variables that are to be
112  * protected in this critical section.
113  *
114  * Use carefully, only in critical code, and comment how the barrier is
115  * provided.
116  */
117 static inline unsigned __read_seqcount_begin(const seqcount_t *s)
118 {
119 	unsigned ret;
120 
121 repeat:
122 	ret = READ_ONCE(s->sequence);
123 	if (unlikely(ret & 1)) {
124 		cpu_relax();
125 		goto repeat;
126 	}
127 	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
128 	return ret;
129 }
130 
131 /**
132  * raw_read_seqcount - Read the raw seqcount
133  * @s: pointer to seqcount_t
134  * Returns: count to be passed to read_seqcount_retry
135  *
136  * raw_read_seqcount opens a read critical section of the given
137  * seqcount without any lockdep checking and without checking or
138  * masking the LSB. Calling code is responsible for handling that.
139  */
140 static inline unsigned raw_read_seqcount(const seqcount_t *s)
141 {
142 	unsigned ret = READ_ONCE(s->sequence);
143 	smp_rmb();
144 	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
145 	return ret;
146 }
147 
148 /**
149  * raw_read_seqcount_begin - start seq-read critical section w/o lockdep
150  * @s: pointer to seqcount_t
151  * Returns: count to be passed to read_seqcount_retry
152  *
153  * raw_read_seqcount_begin opens a read critical section of the given
154  * seqcount, but without any lockdep checking. Validity of the critical
155  * section is tested by checking read_seqcount_retry function.
156  */
157 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s)
158 {
159 	unsigned ret = __read_seqcount_begin(s);
160 	smp_rmb();
161 	return ret;
162 }
163 
164 /**
165  * read_seqcount_begin - begin a seq-read critical section
166  * @s: pointer to seqcount_t
167  * Returns: count to be passed to read_seqcount_retry
168  *
169  * read_seqcount_begin opens a read critical section of the given seqcount.
170  * Validity of the critical section is tested by checking read_seqcount_retry
171  * function.
172  */
173 static inline unsigned read_seqcount_begin(const seqcount_t *s)
174 {
175 	seqcount_lockdep_reader_access(s);
176 	return raw_read_seqcount_begin(s);
177 }
178 
179 /**
180  * raw_seqcount_begin - begin a seq-read critical section
181  * @s: pointer to seqcount_t
182  * Returns: count to be passed to read_seqcount_retry
183  *
184  * raw_seqcount_begin opens a read critical section of the given seqcount.
185  * Validity of the critical section is tested by checking read_seqcount_retry
186  * function.
187  *
188  * Unlike read_seqcount_begin(), this function will not wait for the count
189  * to stabilize. If a writer is active when we begin, we will fail the
190  * read_seqcount_retry() instead of stabilizing at the beginning of the
191  * critical section.
192  */
193 static inline unsigned raw_seqcount_begin(const seqcount_t *s)
194 {
195 	unsigned ret = READ_ONCE(s->sequence);
196 	smp_rmb();
197 	kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX);
198 	return ret & ~1;
199 }
200 
201 /**
202  * __read_seqcount_retry - end a seq-read critical section (without barrier)
203  * @s: pointer to seqcount_t
204  * @start: count, from read_seqcount_begin
205  * Returns: 1 if retry is required, else 0
206  *
207  * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
208  * barrier. Callers should ensure that smp_rmb() or equivalent ordering is
209  * provided before actually loading any of the variables that are to be
210  * protected in this critical section.
211  *
212  * Use carefully, only in critical code, and comment how the barrier is
213  * provided.
214  */
215 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
216 {
217 	kcsan_atomic_next(0);
218 	return unlikely(READ_ONCE(s->sequence) != start);
219 }
220 
221 /**
222  * read_seqcount_retry - end a seq-read critical section
223  * @s: pointer to seqcount_t
224  * @start: count, from read_seqcount_begin
225  * Returns: 1 if retry is required, else 0
226  *
227  * read_seqcount_retry closes a read critical section of the given seqcount.
228  * If the critical section was invalid, it must be ignored (and typically
229  * retried).
230  */
231 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
232 {
233 	smp_rmb();
234 	return __read_seqcount_retry(s, start);
235 }
236 
237 
238 
239 static inline void raw_write_seqcount_begin(seqcount_t *s)
240 {
241 	kcsan_nestable_atomic_begin();
242 	s->sequence++;
243 	smp_wmb();
244 }
245 
246 static inline void raw_write_seqcount_end(seqcount_t *s)
247 {
248 	smp_wmb();
249 	s->sequence++;
250 	kcsan_nestable_atomic_end();
251 }
252 
253 /**
254  * raw_write_seqcount_barrier - do a seq write barrier
255  * @s: pointer to seqcount_t
256  *
257  * This can be used to provide an ordering guarantee instead of the
258  * usual consistency guarantee. It is one wmb cheaper, because we can
259  * collapse the two back-to-back wmb()s.
260  *
261  * Note that writes surrounding the barrier should be declared atomic (e.g.
262  * via WRITE_ONCE): a) to ensure the writes become visible to other threads
263  * atomically, avoiding compiler optimizations; b) to document which writes are
264  * meant to propagate to the reader critical section. This is necessary because
265  * neither writes before and after the barrier are enclosed in a seq-writer
266  * critical section that would ensure readers are aware of ongoing writes.
267  *
268  *      seqcount_t seq;
269  *      bool X = true, Y = false;
270  *
271  *      void read(void)
272  *      {
273  *              bool x, y;
274  *
275  *              do {
276  *                      int s = read_seqcount_begin(&seq);
277  *
278  *                      x = X; y = Y;
279  *
280  *              } while (read_seqcount_retry(&seq, s));
281  *
282  *              BUG_ON(!x && !y);
283  *      }
284  *
285  *      void write(void)
286  *      {
287  *              WRITE_ONCE(Y, true);
288  *
289  *              raw_write_seqcount_barrier(seq);
290  *
291  *              WRITE_ONCE(X, false);
292  *      }
293  */
294 static inline void raw_write_seqcount_barrier(seqcount_t *s)
295 {
296 	kcsan_nestable_atomic_begin();
297 	s->sequence++;
298 	smp_wmb();
299 	s->sequence++;
300 	kcsan_nestable_atomic_end();
301 }
302 
303 static inline int raw_read_seqcount_latch(seqcount_t *s)
304 {
305 	/* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */
306 	int seq = READ_ONCE(s->sequence); /* ^^^ */
307 	return seq;
308 }
309 
310 /**
311  * raw_write_seqcount_latch - redirect readers to even/odd copy
312  * @s: pointer to seqcount_t
313  *
314  * The latch technique is a multiversion concurrency control method that allows
315  * queries during non-atomic modifications. If you can guarantee queries never
316  * interrupt the modification -- e.g. the concurrency is strictly between CPUs
317  * -- you most likely do not need this.
318  *
319  * Where the traditional RCU/lockless data structures rely on atomic
320  * modifications to ensure queries observe either the old or the new state the
321  * latch allows the same for non-atomic updates. The trade-off is doubling the
322  * cost of storage; we have to maintain two copies of the entire data
323  * structure.
324  *
325  * Very simply put: we first modify one copy and then the other. This ensures
326  * there is always one copy in a stable state, ready to give us an answer.
327  *
328  * The basic form is a data structure like:
329  *
330  * struct latch_struct {
331  *	seqcount_t		seq;
332  *	struct data_struct	data[2];
333  * };
334  *
335  * Where a modification, which is assumed to be externally serialized, does the
336  * following:
337  *
338  * void latch_modify(struct latch_struct *latch, ...)
339  * {
340  *	smp_wmb();	<- Ensure that the last data[1] update is visible
341  *	latch->seq++;
342  *	smp_wmb();	<- Ensure that the seqcount update is visible
343  *
344  *	modify(latch->data[0], ...);
345  *
346  *	smp_wmb();	<- Ensure that the data[0] update is visible
347  *	latch->seq++;
348  *	smp_wmb();	<- Ensure that the seqcount update is visible
349  *
350  *	modify(latch->data[1], ...);
351  * }
352  *
353  * The query will have a form like:
354  *
355  * struct entry *latch_query(struct latch_struct *latch, ...)
356  * {
357  *	struct entry *entry;
358  *	unsigned seq, idx;
359  *
360  *	do {
361  *		seq = raw_read_seqcount_latch(&latch->seq);
362  *
363  *		idx = seq & 0x01;
364  *		entry = data_query(latch->data[idx], ...);
365  *
366  *		smp_rmb();
367  *	} while (seq != latch->seq);
368  *
369  *	return entry;
370  * }
371  *
372  * So during the modification, queries are first redirected to data[1]. Then we
373  * modify data[0]. When that is complete, we redirect queries back to data[0]
374  * and we can modify data[1].
375  *
376  * NOTE: The non-requirement for atomic modifications does _NOT_ include
377  *       the publishing of new entries in the case where data is a dynamic
378  *       data structure.
379  *
380  *       An iteration might start in data[0] and get suspended long enough
381  *       to miss an entire modification sequence, once it resumes it might
382  *       observe the new entry.
383  *
384  * NOTE: When data is a dynamic data structure; one should use regular RCU
385  *       patterns to manage the lifetimes of the objects within.
386  */
387 static inline void raw_write_seqcount_latch(seqcount_t *s)
388 {
389        smp_wmb();      /* prior stores before incrementing "sequence" */
390        s->sequence++;
391        smp_wmb();      /* increment "sequence" before following stores */
392 }
393 
394 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass)
395 {
396 	raw_write_seqcount_begin(s);
397 	seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
398 }
399 
400 static inline void write_seqcount_begin(seqcount_t *s)
401 {
402 	write_seqcount_begin_nested(s, 0);
403 }
404 
405 static inline void write_seqcount_end(seqcount_t *s)
406 {
407 	seqcount_release(&s->dep_map, _RET_IP_);
408 	raw_write_seqcount_end(s);
409 }
410 
411 /**
412  * write_seqcount_invalidate - invalidate in-progress read-side seq operations
413  * @s: pointer to seqcount_t
414  *
415  * After write_seqcount_invalidate, no read-side seq operations will complete
416  * successfully and see data older than this.
417  */
418 static inline void write_seqcount_invalidate(seqcount_t *s)
419 {
420 	smp_wmb();
421 	kcsan_nestable_atomic_begin();
422 	s->sequence+=2;
423 	kcsan_nestable_atomic_end();
424 }
425 
426 /*
427  * Sequential locks (seqlock_t)
428  *
429  * Sequence counters with an embedded spinlock for writer serialization
430  * and non-preemptibility.
431  *
432  * For more info, see:
433  *    - Comments on top of seqcount_t
434  *    - Documentation/locking/seqlock.rst
435  */
436 typedef struct {
437 	struct seqcount seqcount;
438 	spinlock_t lock;
439 } seqlock_t;
440 
441 #define __SEQLOCK_UNLOCKED(lockname)			\
442 	{						\
443 		.seqcount = SEQCNT_ZERO(lockname),	\
444 		.lock =	__SPIN_LOCK_UNLOCKED(lockname)	\
445 	}
446 
447 #define seqlock_init(x)					\
448 	do {						\
449 		seqcount_init(&(x)->seqcount);		\
450 		spin_lock_init(&(x)->lock);		\
451 	} while (0)
452 
453 #define DEFINE_SEQLOCK(x) \
454 		seqlock_t x = __SEQLOCK_UNLOCKED(x)
455 
456 /*
457  * Read side functions for starting and finalizing a read side section.
458  */
459 static inline unsigned read_seqbegin(const seqlock_t *sl)
460 {
461 	unsigned ret = read_seqcount_begin(&sl->seqcount);
462 
463 	kcsan_atomic_next(0);  /* non-raw usage, assume closing read_seqretry() */
464 	kcsan_flat_atomic_begin();
465 	return ret;
466 }
467 
468 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
469 {
470 	/*
471 	 * Assume not nested: read_seqretry() may be called multiple times when
472 	 * completing read critical section.
473 	 */
474 	kcsan_flat_atomic_end();
475 
476 	return read_seqcount_retry(&sl->seqcount, start);
477 }
478 
479 /*
480  * Lock out other writers and update the count.
481  * Acts like a normal spin_lock/unlock.
482  * Don't need preempt_disable() because that is in the spin_lock already.
483  */
484 static inline void write_seqlock(seqlock_t *sl)
485 {
486 	spin_lock(&sl->lock);
487 	write_seqcount_begin(&sl->seqcount);
488 }
489 
490 static inline void write_sequnlock(seqlock_t *sl)
491 {
492 	write_seqcount_end(&sl->seqcount);
493 	spin_unlock(&sl->lock);
494 }
495 
496 static inline void write_seqlock_bh(seqlock_t *sl)
497 {
498 	spin_lock_bh(&sl->lock);
499 	write_seqcount_begin(&sl->seqcount);
500 }
501 
502 static inline void write_sequnlock_bh(seqlock_t *sl)
503 {
504 	write_seqcount_end(&sl->seqcount);
505 	spin_unlock_bh(&sl->lock);
506 }
507 
508 static inline void write_seqlock_irq(seqlock_t *sl)
509 {
510 	spin_lock_irq(&sl->lock);
511 	write_seqcount_begin(&sl->seqcount);
512 }
513 
514 static inline void write_sequnlock_irq(seqlock_t *sl)
515 {
516 	write_seqcount_end(&sl->seqcount);
517 	spin_unlock_irq(&sl->lock);
518 }
519 
520 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
521 {
522 	unsigned long flags;
523 
524 	spin_lock_irqsave(&sl->lock, flags);
525 	write_seqcount_begin(&sl->seqcount);
526 	return flags;
527 }
528 
529 #define write_seqlock_irqsave(lock, flags)				\
530 	do { flags = __write_seqlock_irqsave(lock); } while (0)
531 
532 static inline void
533 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
534 {
535 	write_seqcount_end(&sl->seqcount);
536 	spin_unlock_irqrestore(&sl->lock, flags);
537 }
538 
539 /*
540  * A locking reader exclusively locks out other writers and locking readers,
541  * but doesn't update the sequence number. Acts like a normal spin_lock/unlock.
542  * Don't need preempt_disable() because that is in the spin_lock already.
543  */
544 static inline void read_seqlock_excl(seqlock_t *sl)
545 {
546 	spin_lock(&sl->lock);
547 }
548 
549 static inline void read_sequnlock_excl(seqlock_t *sl)
550 {
551 	spin_unlock(&sl->lock);
552 }
553 
554 /**
555  * read_seqbegin_or_lock - begin a sequence number check or locking block
556  * @lock: sequence lock
557  * @seq : sequence number to be checked
558  *
559  * First try it once optimistically without taking the lock. If that fails,
560  * take the lock. The sequence number is also used as a marker for deciding
561  * whether to be a reader (even) or writer (odd).
562  * N.B. seq must be initialized to an even number to begin with.
563  */
564 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq)
565 {
566 	if (!(*seq & 1))	/* Even */
567 		*seq = read_seqbegin(lock);
568 	else			/* Odd */
569 		read_seqlock_excl(lock);
570 }
571 
572 static inline int need_seqretry(seqlock_t *lock, int seq)
573 {
574 	return !(seq & 1) && read_seqretry(lock, seq);
575 }
576 
577 static inline void done_seqretry(seqlock_t *lock, int seq)
578 {
579 	if (seq & 1)
580 		read_sequnlock_excl(lock);
581 }
582 
583 static inline void read_seqlock_excl_bh(seqlock_t *sl)
584 {
585 	spin_lock_bh(&sl->lock);
586 }
587 
588 static inline void read_sequnlock_excl_bh(seqlock_t *sl)
589 {
590 	spin_unlock_bh(&sl->lock);
591 }
592 
593 static inline void read_seqlock_excl_irq(seqlock_t *sl)
594 {
595 	spin_lock_irq(&sl->lock);
596 }
597 
598 static inline void read_sequnlock_excl_irq(seqlock_t *sl)
599 {
600 	spin_unlock_irq(&sl->lock);
601 }
602 
603 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl)
604 {
605 	unsigned long flags;
606 
607 	spin_lock_irqsave(&sl->lock, flags);
608 	return flags;
609 }
610 
611 #define read_seqlock_excl_irqsave(lock, flags)				\
612 	do { flags = __read_seqlock_excl_irqsave(lock); } while (0)
613 
614 static inline void
615 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
616 {
617 	spin_unlock_irqrestore(&sl->lock, flags);
618 }
619 
620 static inline unsigned long
621 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
622 {
623 	unsigned long flags = 0;
624 
625 	if (!(*seq & 1))	/* Even */
626 		*seq = read_seqbegin(lock);
627 	else			/* Odd */
628 		read_seqlock_excl_irqsave(lock, flags);
629 
630 	return flags;
631 }
632 
633 static inline void
634 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
635 {
636 	if (seq & 1)
637 		read_sequnlock_excl_irqrestore(lock, flags);
638 }
639 #endif /* __LINUX_SEQLOCK_H */
640