1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_SEQLOCK_H 3 #define __LINUX_SEQLOCK_H 4 5 /* 6 * seqcount_t / seqlock_t - a reader-writer consistency mechanism with 7 * lockless readers (read-only retry loops), and no writer starvation. 8 * 9 * See Documentation/locking/seqlock.rst 10 * 11 * Copyrights: 12 * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli 13 * - Sequence counters with associated locks, (C) 2020 Linutronix GmbH 14 */ 15 16 #include <linux/compiler.h> 17 #include <linux/kcsan-checks.h> 18 #include <linux/lockdep.h> 19 #include <linux/mutex.h> 20 #include <linux/preempt.h> 21 #include <linux/seqlock_types.h> 22 #include <linux/spinlock.h> 23 24 #include <asm/processor.h> 25 26 /* 27 * The seqlock seqcount_t interface does not prescribe a precise sequence of 28 * read begin/retry/end. For readers, typically there is a call to 29 * read_seqcount_begin() and read_seqcount_retry(), however, there are more 30 * esoteric cases which do not follow this pattern. 31 * 32 * As a consequence, we take the following best-effort approach for raw usage 33 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, 34 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as 35 * atomics; if there is a matching read_seqcount_retry() call, no following 36 * memory operations are considered atomic. Usage of the seqlock_t interface 37 * is not affected. 38 */ 39 #define KCSAN_SEQLOCK_REGION_MAX 1000 40 41 static inline void __seqcount_init(seqcount_t *s, const char *name, 42 struct lock_class_key *key) 43 { 44 /* 45 * Make sure we are not reinitializing a held lock: 46 */ 47 lockdep_init_map(&s->dep_map, name, key, 0); 48 s->sequence = 0; 49 } 50 51 #ifdef CONFIG_DEBUG_LOCK_ALLOC 52 53 # define SEQCOUNT_DEP_MAP_INIT(lockname) \ 54 .dep_map = { .name = #lockname } 55 56 /** 57 * seqcount_init() - runtime initializer for seqcount_t 58 * @s: Pointer to the seqcount_t instance 59 */ 60 # define seqcount_init(s) \ 61 do { \ 62 static struct lock_class_key __key; \ 63 __seqcount_init((s), #s, &__key); \ 64 } while (0) 65 66 static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 67 { 68 seqcount_t *l = (seqcount_t *)s; 69 unsigned long flags; 70 71 local_irq_save(flags); 72 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 73 seqcount_release(&l->dep_map, _RET_IP_); 74 local_irq_restore(flags); 75 } 76 77 #else 78 # define SEQCOUNT_DEP_MAP_INIT(lockname) 79 # define seqcount_init(s) __seqcount_init(s, NULL, NULL) 80 # define seqcount_lockdep_reader_access(x) 81 #endif 82 83 /** 84 * SEQCNT_ZERO() - static initializer for seqcount_t 85 * @name: Name of the seqcount_t instance 86 */ 87 #define SEQCNT_ZERO(name) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(name) } 88 89 /* 90 * Sequence counters with associated locks (seqcount_LOCKNAME_t) 91 * 92 * A sequence counter which associates the lock used for writer 93 * serialization at initialization time. This enables lockdep to validate 94 * that the write side critical section is properly serialized. 95 * 96 * For associated locks which do not implicitly disable preemption, 97 * preemption protection is enforced in the write side function. 98 * 99 * Lockdep is never used in any for the raw write variants. 100 * 101 * See Documentation/locking/seqlock.rst 102 */ 103 104 /* 105 * typedef seqcount_LOCKNAME_t - sequence counter with LOCKNAME associated 106 * @seqcount: The real sequence counter 107 * @lock: Pointer to the associated lock 108 * 109 * A plain sequence counter with external writer synchronization by 110 * LOCKNAME @lock. The lock is associated to the sequence counter in the 111 * static initializer or init function. This enables lockdep to validate 112 * that the write side critical section is properly serialized. 113 * 114 * LOCKNAME: raw_spinlock, spinlock, rwlock or mutex 115 */ 116 117 /* 118 * seqcount_LOCKNAME_init() - runtime initializer for seqcount_LOCKNAME_t 119 * @s: Pointer to the seqcount_LOCKNAME_t instance 120 * @lock: Pointer to the associated lock 121 */ 122 123 #define seqcount_LOCKNAME_init(s, _lock, lockname) \ 124 do { \ 125 seqcount_##lockname##_t *____s = (s); \ 126 seqcount_init(&____s->seqcount); \ 127 __SEQ_LOCK(____s->lock = (_lock)); \ 128 } while (0) 129 130 #define seqcount_raw_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, raw_spinlock) 131 #define seqcount_spinlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, spinlock) 132 #define seqcount_rwlock_init(s, lock) seqcount_LOCKNAME_init(s, lock, rwlock) 133 #define seqcount_mutex_init(s, lock) seqcount_LOCKNAME_init(s, lock, mutex) 134 135 /* 136 * SEQCOUNT_LOCKNAME() - Instantiate seqcount_LOCKNAME_t and helpers 137 * seqprop_LOCKNAME_*() - Property accessors for seqcount_LOCKNAME_t 138 * 139 * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t 140 * @locktype: LOCKNAME canonical C data type 141 * @preemptible: preemptibility of above locktype 142 * @lockbase: prefix for associated lock/unlock 143 */ 144 #define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ 145 static __always_inline seqcount_t * \ 146 __seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ 147 { \ 148 return &s->seqcount; \ 149 } \ 150 \ 151 static __always_inline const seqcount_t * \ 152 __seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ 153 { \ 154 return &s->seqcount; \ 155 } \ 156 \ 157 static __always_inline unsigned \ 158 __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ 159 { \ 160 unsigned seq = smp_load_acquire(&s->seqcount.sequence); \ 161 \ 162 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ 163 return seq; \ 164 \ 165 if (preemptible && unlikely(seq & 1)) { \ 166 __SEQ_LOCK(lockbase##_lock(s->lock)); \ 167 __SEQ_LOCK(lockbase##_unlock(s->lock)); \ 168 \ 169 /* \ 170 * Re-read the sequence counter since the (possibly \ 171 * preempted) writer made progress. \ 172 */ \ 173 seq = smp_load_acquire(&s->seqcount.sequence); \ 174 } \ 175 \ 176 return seq; \ 177 } \ 178 \ 179 static __always_inline bool \ 180 __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ 181 { \ 182 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) \ 183 return preemptible; \ 184 \ 185 /* PREEMPT_RT relies on the above LOCK+UNLOCK */ \ 186 return false; \ 187 } \ 188 \ 189 static __always_inline void \ 190 __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ 191 { \ 192 __SEQ_LOCK(lockdep_assert_held(s->lock)); \ 193 } 194 195 /* 196 * __seqprop() for seqcount_t 197 */ 198 199 static inline seqcount_t *__seqprop_ptr(seqcount_t *s) 200 { 201 return s; 202 } 203 204 static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) 205 { 206 return s; 207 } 208 209 static inline unsigned __seqprop_sequence(const seqcount_t *s) 210 { 211 return smp_load_acquire(&s->sequence); 212 } 213 214 static inline bool __seqprop_preemptible(const seqcount_t *s) 215 { 216 return false; 217 } 218 219 static inline void __seqprop_assert(const seqcount_t *s) 220 { 221 lockdep_assert_preemption_disabled(); 222 } 223 224 #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) 225 226 SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) 227 SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) 228 SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) 229 SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) 230 #undef SEQCOUNT_LOCKNAME 231 232 /* 233 * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t 234 * @name: Name of the seqcount_LOCKNAME_t instance 235 * @lock: Pointer to the associated LOCKNAME 236 */ 237 238 #define SEQCOUNT_LOCKNAME_ZERO(seq_name, assoc_lock) { \ 239 .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ 240 __SEQ_LOCK(.lock = (assoc_lock)) \ 241 } 242 243 #define SEQCNT_RAW_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 244 #define SEQCNT_SPINLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 245 #define SEQCNT_RWLOCK_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 246 #define SEQCNT_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 247 #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) 248 249 #define __seqprop_case(s, lockname, prop) \ 250 seqcount_##lockname##_t: __seqprop_##lockname##_##prop 251 252 #define __seqprop(s, prop) _Generic(*(s), \ 253 seqcount_t: __seqprop_##prop, \ 254 __seqprop_case((s), raw_spinlock, prop), \ 255 __seqprop_case((s), spinlock, prop), \ 256 __seqprop_case((s), rwlock, prop), \ 257 __seqprop_case((s), mutex, prop)) 258 259 #define seqprop_ptr(s) __seqprop(s, ptr)(s) 260 #define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) 261 #define seqprop_sequence(s) __seqprop(s, sequence)(s) 262 #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) 263 #define seqprop_assert(s) __seqprop(s, assert)(s) 264 265 /** 266 * __read_seqcount_begin() - begin a seqcount_t read section 267 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 268 * 269 * Return: count to be passed to read_seqcount_retry() 270 */ 271 #define __read_seqcount_begin(s) \ 272 ({ \ 273 unsigned __seq; \ 274 \ 275 while (unlikely((__seq = seqprop_sequence(s)) & 1)) \ 276 cpu_relax(); \ 277 \ 278 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ 279 __seq; \ 280 }) 281 282 /** 283 * raw_read_seqcount_begin() - begin a seqcount_t read section w/o lockdep 284 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 285 * 286 * Return: count to be passed to read_seqcount_retry() 287 */ 288 #define raw_read_seqcount_begin(s) __read_seqcount_begin(s) 289 290 /** 291 * read_seqcount_begin() - begin a seqcount_t read critical section 292 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 293 * 294 * Return: count to be passed to read_seqcount_retry() 295 */ 296 #define read_seqcount_begin(s) \ 297 ({ \ 298 seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ 299 raw_read_seqcount_begin(s); \ 300 }) 301 302 /** 303 * raw_read_seqcount() - read the raw seqcount_t counter value 304 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 305 * 306 * raw_read_seqcount opens a read critical section of the given 307 * seqcount_t, without any lockdep checking, and without checking or 308 * masking the sequence counter LSB. Calling code is responsible for 309 * handling that. 310 * 311 * Return: count to be passed to read_seqcount_retry() 312 */ 313 #define raw_read_seqcount(s) \ 314 ({ \ 315 unsigned __seq = seqprop_sequence(s); \ 316 \ 317 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ 318 __seq; \ 319 }) 320 321 /** 322 * raw_seqcount_try_begin() - begin a seqcount_t read critical section 323 * w/o lockdep and w/o counter stabilization 324 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 325 * 326 * Similar to raw_seqcount_begin(), except it enables eliding the critical 327 * section entirely if odd, instead of doing the speculation knowing it will 328 * fail. 329 * 330 * Useful when counter stabilization is more or less equivalent to taking 331 * the lock and there is a slowpath that does that. 332 * 333 * If true, start will be set to the (even) sequence count read. 334 * 335 * Return: true when a read critical section is started. 336 */ 337 #define raw_seqcount_try_begin(s, start) \ 338 ({ \ 339 start = raw_read_seqcount(s); \ 340 !(start & 1); \ 341 }) 342 343 /** 344 * raw_seqcount_begin() - begin a seqcount_t read critical section w/o 345 * lockdep and w/o counter stabilization 346 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 347 * 348 * raw_seqcount_begin opens a read critical section of the given 349 * seqcount_t. Unlike read_seqcount_begin(), this function will not wait 350 * for the count to stabilize. If a writer is active when it begins, it 351 * will fail the read_seqcount_retry() at the end of the read critical 352 * section instead of stabilizing at the beginning of it. 353 * 354 * Use this only in special kernel hot paths where the read section is 355 * small and has a high probability of success through other external 356 * means. It will save a single branching instruction. 357 * 358 * Return: count to be passed to read_seqcount_retry() 359 */ 360 #define raw_seqcount_begin(s) \ 361 ({ \ 362 /* \ 363 * If the counter is odd, let read_seqcount_retry() fail \ 364 * by decrementing the counter. \ 365 */ \ 366 raw_read_seqcount(s) & ~1; \ 367 }) 368 369 /** 370 * __read_seqcount_retry() - end a seqcount_t read section w/o barrier 371 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 372 * @start: count, from read_seqcount_begin() 373 * 374 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 375 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 376 * provided before actually loading any of the variables that are to be 377 * protected in this critical section. 378 * 379 * Use carefully, only in critical code, and comment how the barrier is 380 * provided. 381 * 382 * Return: true if a read section retry is required, else false 383 */ 384 #define __read_seqcount_retry(s, start) \ 385 do___read_seqcount_retry(seqprop_const_ptr(s), start) 386 387 static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) 388 { 389 kcsan_atomic_next(0); 390 return unlikely(READ_ONCE(s->sequence) != start); 391 } 392 393 /** 394 * read_seqcount_retry() - end a seqcount_t read critical section 395 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 396 * @start: count, from read_seqcount_begin() 397 * 398 * read_seqcount_retry closes the read critical section of given 399 * seqcount_t. If the critical section was invalid, it must be ignored 400 * (and typically retried). 401 * 402 * Return: true if a read section retry is required, else false 403 */ 404 #define read_seqcount_retry(s, start) \ 405 do_read_seqcount_retry(seqprop_const_ptr(s), start) 406 407 static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) 408 { 409 smp_rmb(); 410 return do___read_seqcount_retry(s, start); 411 } 412 413 /** 414 * raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep 415 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 416 * 417 * Context: check write_seqcount_begin() 418 */ 419 #define raw_write_seqcount_begin(s) \ 420 do { \ 421 if (seqprop_preemptible(s)) \ 422 preempt_disable(); \ 423 \ 424 do_raw_write_seqcount_begin(seqprop_ptr(s)); \ 425 } while (0) 426 427 static inline void do_raw_write_seqcount_begin(seqcount_t *s) 428 { 429 kcsan_nestable_atomic_begin(); 430 s->sequence++; 431 smp_wmb(); 432 } 433 434 /** 435 * raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep 436 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 437 * 438 * Context: check write_seqcount_end() 439 */ 440 #define raw_write_seqcount_end(s) \ 441 do { \ 442 do_raw_write_seqcount_end(seqprop_ptr(s)); \ 443 \ 444 if (seqprop_preemptible(s)) \ 445 preempt_enable(); \ 446 } while (0) 447 448 static inline void do_raw_write_seqcount_end(seqcount_t *s) 449 { 450 smp_wmb(); 451 s->sequence++; 452 kcsan_nestable_atomic_end(); 453 } 454 455 /** 456 * write_seqcount_begin_nested() - start a seqcount_t write section with 457 * custom lockdep nesting level 458 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 459 * @subclass: lockdep nesting level 460 * 461 * See Documentation/locking/lockdep-design.rst 462 * Context: check write_seqcount_begin() 463 */ 464 #define write_seqcount_begin_nested(s, subclass) \ 465 do { \ 466 seqprop_assert(s); \ 467 \ 468 if (seqprop_preemptible(s)) \ 469 preempt_disable(); \ 470 \ 471 do_write_seqcount_begin_nested(seqprop_ptr(s), subclass); \ 472 } while (0) 473 474 static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass) 475 { 476 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 477 do_raw_write_seqcount_begin(s); 478 } 479 480 /** 481 * write_seqcount_begin() - start a seqcount_t write side critical section 482 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 483 * 484 * Context: sequence counter write side sections must be serialized and 485 * non-preemptible. Preemption will be automatically disabled if and 486 * only if the seqcount write serialization lock is associated, and 487 * preemptible. If readers can be invoked from hardirq or softirq 488 * context, interrupts or bottom halves must be respectively disabled. 489 */ 490 #define write_seqcount_begin(s) \ 491 do { \ 492 seqprop_assert(s); \ 493 \ 494 if (seqprop_preemptible(s)) \ 495 preempt_disable(); \ 496 \ 497 do_write_seqcount_begin(seqprop_ptr(s)); \ 498 } while (0) 499 500 static inline void do_write_seqcount_begin(seqcount_t *s) 501 { 502 do_write_seqcount_begin_nested(s, 0); 503 } 504 505 /** 506 * write_seqcount_end() - end a seqcount_t write side critical section 507 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 508 * 509 * Context: Preemption will be automatically re-enabled if and only if 510 * the seqcount write serialization lock is associated, and preemptible. 511 */ 512 #define write_seqcount_end(s) \ 513 do { \ 514 do_write_seqcount_end(seqprop_ptr(s)); \ 515 \ 516 if (seqprop_preemptible(s)) \ 517 preempt_enable(); \ 518 } while (0) 519 520 static inline void do_write_seqcount_end(seqcount_t *s) 521 { 522 seqcount_release(&s->dep_map, _RET_IP_); 523 do_raw_write_seqcount_end(s); 524 } 525 526 /** 527 * raw_write_seqcount_barrier() - do a seqcount_t write barrier 528 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 529 * 530 * This can be used to provide an ordering guarantee instead of the usual 531 * consistency guarantee. It is one wmb cheaper, because it can collapse 532 * the two back-to-back wmb()s. 533 * 534 * Note that writes surrounding the barrier should be declared atomic (e.g. 535 * via WRITE_ONCE): a) to ensure the writes become visible to other threads 536 * atomically, avoiding compiler optimizations; b) to document which writes are 537 * meant to propagate to the reader critical section. This is necessary because 538 * neither writes before nor after the barrier are enclosed in a seq-writer 539 * critical section that would ensure readers are aware of ongoing writes:: 540 * 541 * seqcount_t seq; 542 * bool X = true, Y = false; 543 * 544 * void read(void) 545 * { 546 * bool x, y; 547 * 548 * do { 549 * int s = read_seqcount_begin(&seq); 550 * 551 * x = X; y = Y; 552 * 553 * } while (read_seqcount_retry(&seq, s)); 554 * 555 * BUG_ON(!x && !y); 556 * } 557 * 558 * void write(void) 559 * { 560 * WRITE_ONCE(Y, true); 561 * 562 * raw_write_seqcount_barrier(seq); 563 * 564 * WRITE_ONCE(X, false); 565 * } 566 */ 567 #define raw_write_seqcount_barrier(s) \ 568 do_raw_write_seqcount_barrier(seqprop_ptr(s)) 569 570 static inline void do_raw_write_seqcount_barrier(seqcount_t *s) 571 { 572 kcsan_nestable_atomic_begin(); 573 s->sequence++; 574 smp_wmb(); 575 s->sequence++; 576 kcsan_nestable_atomic_end(); 577 } 578 579 /** 580 * write_seqcount_invalidate() - invalidate in-progress seqcount_t read 581 * side operations 582 * @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants 583 * 584 * After write_seqcount_invalidate, no seqcount_t read side operations 585 * will complete successfully and see data older than this. 586 */ 587 #define write_seqcount_invalidate(s) \ 588 do_write_seqcount_invalidate(seqprop_ptr(s)) 589 590 static inline void do_write_seqcount_invalidate(seqcount_t *s) 591 { 592 smp_wmb(); 593 kcsan_nestable_atomic_begin(); 594 s->sequence+=2; 595 kcsan_nestable_atomic_end(); 596 } 597 598 /* 599 * Latch sequence counters (seqcount_latch_t) 600 * 601 * A sequence counter variant where the counter even/odd value is used to 602 * switch between two copies of protected data. This allows the read path, 603 * typically NMIs, to safely interrupt the write side critical section. 604 * 605 * As the write sections are fully preemptible, no special handling for 606 * PREEMPT_RT is needed. 607 */ 608 typedef struct { 609 seqcount_t seqcount; 610 } seqcount_latch_t; 611 612 /** 613 * SEQCNT_LATCH_ZERO() - static initializer for seqcount_latch_t 614 * @seq_name: Name of the seqcount_latch_t instance 615 */ 616 #define SEQCNT_LATCH_ZERO(seq_name) { \ 617 .seqcount = SEQCNT_ZERO(seq_name.seqcount), \ 618 } 619 620 /** 621 * seqcount_latch_init() - runtime initializer for seqcount_latch_t 622 * @s: Pointer to the seqcount_latch_t instance 623 */ 624 #define seqcount_latch_init(s) seqcount_init(&(s)->seqcount) 625 626 /** 627 * raw_read_seqcount_latch() - pick even/odd latch data copy 628 * @s: Pointer to seqcount_latch_t 629 * 630 * See raw_write_seqcount_latch() for details and a full reader/writer 631 * usage example. 632 * 633 * Return: sequence counter raw value. Use the lowest bit as an index for 634 * picking which data copy to read. The full counter must then be checked 635 * with raw_read_seqcount_latch_retry(). 636 */ 637 static __always_inline unsigned raw_read_seqcount_latch(const seqcount_latch_t *s) 638 { 639 /* 640 * Pairs with the first smp_wmb() in raw_write_seqcount_latch(). 641 * Due to the dependent load, a full smp_rmb() is not needed. 642 */ 643 return READ_ONCE(s->seqcount.sequence); 644 } 645 646 /** 647 * read_seqcount_latch() - pick even/odd latch data copy 648 * @s: Pointer to seqcount_latch_t 649 * 650 * See write_seqcount_latch() for details and a full reader/writer usage 651 * example. 652 * 653 * Return: sequence counter raw value. Use the lowest bit as an index for 654 * picking which data copy to read. The full counter must then be checked 655 * with read_seqcount_latch_retry(). 656 */ 657 static __always_inline unsigned read_seqcount_latch(const seqcount_latch_t *s) 658 { 659 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 660 return raw_read_seqcount_latch(s); 661 } 662 663 /** 664 * raw_read_seqcount_latch_retry() - end a seqcount_latch_t read section 665 * @s: Pointer to seqcount_latch_t 666 * @start: count, from raw_read_seqcount_latch() 667 * 668 * Return: true if a read section retry is required, else false 669 */ 670 static __always_inline int 671 raw_read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) 672 { 673 smp_rmb(); 674 return unlikely(READ_ONCE(s->seqcount.sequence) != start); 675 } 676 677 /** 678 * read_seqcount_latch_retry() - end a seqcount_latch_t read section 679 * @s: Pointer to seqcount_latch_t 680 * @start: count, from read_seqcount_latch() 681 * 682 * Return: true if a read section retry is required, else false 683 */ 684 static __always_inline int 685 read_seqcount_latch_retry(const seqcount_latch_t *s, unsigned start) 686 { 687 kcsan_atomic_next(0); 688 return raw_read_seqcount_latch_retry(s, start); 689 } 690 691 /** 692 * raw_write_seqcount_latch() - redirect latch readers to even/odd copy 693 * @s: Pointer to seqcount_latch_t 694 */ 695 static __always_inline void raw_write_seqcount_latch(seqcount_latch_t *s) 696 { 697 smp_wmb(); /* prior stores before incrementing "sequence" */ 698 s->seqcount.sequence++; 699 smp_wmb(); /* increment "sequence" before following stores */ 700 } 701 702 /** 703 * write_seqcount_latch_begin() - redirect latch readers to odd copy 704 * @s: Pointer to seqcount_latch_t 705 * 706 * The latch technique is a multiversion concurrency control method that allows 707 * queries during non-atomic modifications. If you can guarantee queries never 708 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 709 * -- you most likely do not need this. 710 * 711 * Where the traditional RCU/lockless data structures rely on atomic 712 * modifications to ensure queries observe either the old or the new state the 713 * latch allows the same for non-atomic updates. The trade-off is doubling the 714 * cost of storage; we have to maintain two copies of the entire data 715 * structure. 716 * 717 * Very simply put: we first modify one copy and then the other. This ensures 718 * there is always one copy in a stable state, ready to give us an answer. 719 * 720 * The basic form is a data structure like:: 721 * 722 * struct latch_struct { 723 * seqcount_latch_t seq; 724 * struct data_struct data[2]; 725 * }; 726 * 727 * Where a modification, which is assumed to be externally serialized, does the 728 * following:: 729 * 730 * void latch_modify(struct latch_struct *latch, ...) 731 * { 732 * write_seqcount_latch_begin(&latch->seq); 733 * modify(latch->data[0], ...); 734 * write_seqcount_latch(&latch->seq); 735 * modify(latch->data[1], ...); 736 * write_seqcount_latch_end(&latch->seq); 737 * } 738 * 739 * The query will have a form like:: 740 * 741 * struct entry *latch_query(struct latch_struct *latch, ...) 742 * { 743 * struct entry *entry; 744 * unsigned seq, idx; 745 * 746 * do { 747 * seq = read_seqcount_latch(&latch->seq); 748 * 749 * idx = seq & 0x01; 750 * entry = data_query(latch->data[idx], ...); 751 * 752 * // This includes needed smp_rmb() 753 * } while (read_seqcount_latch_retry(&latch->seq, seq)); 754 * 755 * return entry; 756 * } 757 * 758 * So during the modification, queries are first redirected to data[1]. Then we 759 * modify data[0]. When that is complete, we redirect queries back to data[0] 760 * and we can modify data[1]. 761 * 762 * NOTE: 763 * 764 * The non-requirement for atomic modifications does _NOT_ include 765 * the publishing of new entries in the case where data is a dynamic 766 * data structure. 767 * 768 * An iteration might start in data[0] and get suspended long enough 769 * to miss an entire modification sequence, once it resumes it might 770 * observe the new entry. 771 * 772 * NOTE2: 773 * 774 * When data is a dynamic data structure; one should use regular RCU 775 * patterns to manage the lifetimes of the objects within. 776 */ 777 static __always_inline void write_seqcount_latch_begin(seqcount_latch_t *s) 778 { 779 kcsan_nestable_atomic_begin(); 780 raw_write_seqcount_latch(s); 781 } 782 783 /** 784 * write_seqcount_latch() - redirect latch readers to even copy 785 * @s: Pointer to seqcount_latch_t 786 */ 787 static __always_inline void write_seqcount_latch(seqcount_latch_t *s) 788 { 789 raw_write_seqcount_latch(s); 790 } 791 792 /** 793 * write_seqcount_latch_end() - end a seqcount_latch_t write section 794 * @s: Pointer to seqcount_latch_t 795 * 796 * Marks the end of a seqcount_latch_t writer section, after all copies of the 797 * latch-protected data have been updated. 798 */ 799 static __always_inline void write_seqcount_latch_end(seqcount_latch_t *s) 800 { 801 kcsan_nestable_atomic_end(); 802 } 803 804 #define __SEQLOCK_UNLOCKED(lockname) \ 805 { \ 806 .seqcount = SEQCNT_SPINLOCK_ZERO(lockname, &(lockname).lock), \ 807 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 808 } 809 810 /** 811 * seqlock_init() - dynamic initializer for seqlock_t 812 * @sl: Pointer to the seqlock_t instance 813 */ 814 #define seqlock_init(sl) \ 815 do { \ 816 spin_lock_init(&(sl)->lock); \ 817 seqcount_spinlock_init(&(sl)->seqcount, &(sl)->lock); \ 818 } while (0) 819 820 /** 821 * DEFINE_SEQLOCK(sl) - Define a statically allocated seqlock_t 822 * @sl: Name of the seqlock_t instance 823 */ 824 #define DEFINE_SEQLOCK(sl) \ 825 seqlock_t sl = __SEQLOCK_UNLOCKED(sl) 826 827 /** 828 * read_seqbegin() - start a seqlock_t read side critical section 829 * @sl: Pointer to seqlock_t 830 * 831 * Return: count, to be passed to read_seqretry() 832 */ 833 static inline unsigned read_seqbegin(const seqlock_t *sl) 834 { 835 return read_seqcount_begin(&sl->seqcount); 836 } 837 838 /** 839 * read_seqretry() - end a seqlock_t read side section 840 * @sl: Pointer to seqlock_t 841 * @start: count, from read_seqbegin() 842 * 843 * read_seqretry closes the read side critical section of given seqlock_t. 844 * If the critical section was invalid, it must be ignored (and typically 845 * retried). 846 * 847 * Return: true if a read section retry is required, else false 848 */ 849 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 850 { 851 return read_seqcount_retry(&sl->seqcount, start); 852 } 853 854 /* 855 * For all seqlock_t write side functions, use the internal 856 * do_write_seqcount_begin() instead of generic write_seqcount_begin(). 857 * This way, no redundant lockdep_assert_held() checks are added. 858 */ 859 860 /** 861 * write_seqlock() - start a seqlock_t write side critical section 862 * @sl: Pointer to seqlock_t 863 * 864 * write_seqlock opens a write side critical section for the given 865 * seqlock_t. It also implicitly acquires the spinlock_t embedded inside 866 * that sequential lock. All seqlock_t write side sections are thus 867 * automatically serialized and non-preemptible. 868 * 869 * Context: if the seqlock_t read section, or other write side critical 870 * sections, can be invoked from hardirq or softirq contexts, use the 871 * _irqsave or _bh variants of this function instead. 872 */ 873 static inline void write_seqlock(seqlock_t *sl) 874 { 875 spin_lock(&sl->lock); 876 do_write_seqcount_begin(&sl->seqcount.seqcount); 877 } 878 879 /** 880 * write_sequnlock() - end a seqlock_t write side critical section 881 * @sl: Pointer to seqlock_t 882 * 883 * write_sequnlock closes the (serialized and non-preemptible) write side 884 * critical section of given seqlock_t. 885 */ 886 static inline void write_sequnlock(seqlock_t *sl) 887 { 888 do_write_seqcount_end(&sl->seqcount.seqcount); 889 spin_unlock(&sl->lock); 890 } 891 892 /** 893 * write_seqlock_bh() - start a softirqs-disabled seqlock_t write section 894 * @sl: Pointer to seqlock_t 895 * 896 * _bh variant of write_seqlock(). Use only if the read side section, or 897 * other write side sections, can be invoked from softirq contexts. 898 */ 899 static inline void write_seqlock_bh(seqlock_t *sl) 900 { 901 spin_lock_bh(&sl->lock); 902 do_write_seqcount_begin(&sl->seqcount.seqcount); 903 } 904 905 /** 906 * write_sequnlock_bh() - end a softirqs-disabled seqlock_t write section 907 * @sl: Pointer to seqlock_t 908 * 909 * write_sequnlock_bh closes the serialized, non-preemptible, and 910 * softirqs-disabled, seqlock_t write side critical section opened with 911 * write_seqlock_bh(). 912 */ 913 static inline void write_sequnlock_bh(seqlock_t *sl) 914 { 915 do_write_seqcount_end(&sl->seqcount.seqcount); 916 spin_unlock_bh(&sl->lock); 917 } 918 919 /** 920 * write_seqlock_irq() - start a non-interruptible seqlock_t write section 921 * @sl: Pointer to seqlock_t 922 * 923 * _irq variant of write_seqlock(). Use only if the read side section, or 924 * other write sections, can be invoked from hardirq contexts. 925 */ 926 static inline void write_seqlock_irq(seqlock_t *sl) 927 { 928 spin_lock_irq(&sl->lock); 929 do_write_seqcount_begin(&sl->seqcount.seqcount); 930 } 931 932 /** 933 * write_sequnlock_irq() - end a non-interruptible seqlock_t write section 934 * @sl: Pointer to seqlock_t 935 * 936 * write_sequnlock_irq closes the serialized and non-interruptible 937 * seqlock_t write side section opened with write_seqlock_irq(). 938 */ 939 static inline void write_sequnlock_irq(seqlock_t *sl) 940 { 941 do_write_seqcount_end(&sl->seqcount.seqcount); 942 spin_unlock_irq(&sl->lock); 943 } 944 945 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 946 { 947 unsigned long flags; 948 949 spin_lock_irqsave(&sl->lock, flags); 950 do_write_seqcount_begin(&sl->seqcount.seqcount); 951 return flags; 952 } 953 954 /** 955 * write_seqlock_irqsave() - start a non-interruptible seqlock_t write 956 * section 957 * @lock: Pointer to seqlock_t 958 * @flags: Stack-allocated storage for saving caller's local interrupt 959 * state, to be passed to write_sequnlock_irqrestore(). 960 * 961 * _irqsave variant of write_seqlock(). Use it only if the read side 962 * section, or other write sections, can be invoked from hardirq context. 963 */ 964 #define write_seqlock_irqsave(lock, flags) \ 965 do { flags = __write_seqlock_irqsave(lock); } while (0) 966 967 /** 968 * write_sequnlock_irqrestore() - end non-interruptible seqlock_t write 969 * section 970 * @sl: Pointer to seqlock_t 971 * @flags: Caller's saved interrupt state, from write_seqlock_irqsave() 972 * 973 * write_sequnlock_irqrestore closes the serialized and non-interruptible 974 * seqlock_t write section previously opened with write_seqlock_irqsave(). 975 */ 976 static inline void 977 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 978 { 979 do_write_seqcount_end(&sl->seqcount.seqcount); 980 spin_unlock_irqrestore(&sl->lock, flags); 981 } 982 983 /** 984 * read_seqlock_excl() - begin a seqlock_t locking reader section 985 * @sl: Pointer to seqlock_t 986 * 987 * read_seqlock_excl opens a seqlock_t locking reader critical section. A 988 * locking reader exclusively locks out *both* other writers *and* other 989 * locking readers, but it does not update the embedded sequence number. 990 * 991 * Locking readers act like a normal spin_lock()/spin_unlock(). 992 * 993 * Context: if the seqlock_t write section, *or other read sections*, can 994 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh 995 * variant of this function instead. 996 * 997 * The opened read section must be closed with read_sequnlock_excl(). 998 */ 999 static inline void read_seqlock_excl(seqlock_t *sl) 1000 { 1001 spin_lock(&sl->lock); 1002 } 1003 1004 /** 1005 * read_sequnlock_excl() - end a seqlock_t locking reader critical section 1006 * @sl: Pointer to seqlock_t 1007 */ 1008 static inline void read_sequnlock_excl(seqlock_t *sl) 1009 { 1010 spin_unlock(&sl->lock); 1011 } 1012 1013 /** 1014 * read_seqlock_excl_bh() - start a seqlock_t locking reader section with 1015 * softirqs disabled 1016 * @sl: Pointer to seqlock_t 1017 * 1018 * _bh variant of read_seqlock_excl(). Use this variant only if the 1019 * seqlock_t write side section, *or other read sections*, can be invoked 1020 * from softirq contexts. 1021 */ 1022 static inline void read_seqlock_excl_bh(seqlock_t *sl) 1023 { 1024 spin_lock_bh(&sl->lock); 1025 } 1026 1027 /** 1028 * read_sequnlock_excl_bh() - stop a seqlock_t softirq-disabled locking 1029 * reader section 1030 * @sl: Pointer to seqlock_t 1031 */ 1032 static inline void read_sequnlock_excl_bh(seqlock_t *sl) 1033 { 1034 spin_unlock_bh(&sl->lock); 1035 } 1036 1037 /** 1038 * read_seqlock_excl_irq() - start a non-interruptible seqlock_t locking 1039 * reader section 1040 * @sl: Pointer to seqlock_t 1041 * 1042 * _irq variant of read_seqlock_excl(). Use this only if the seqlock_t 1043 * write side section, *or other read sections*, can be invoked from a 1044 * hardirq context. 1045 */ 1046 static inline void read_seqlock_excl_irq(seqlock_t *sl) 1047 { 1048 spin_lock_irq(&sl->lock); 1049 } 1050 1051 /** 1052 * read_sequnlock_excl_irq() - end an interrupts-disabled seqlock_t 1053 * locking reader section 1054 * @sl: Pointer to seqlock_t 1055 */ 1056 static inline void read_sequnlock_excl_irq(seqlock_t *sl) 1057 { 1058 spin_unlock_irq(&sl->lock); 1059 } 1060 1061 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 1062 { 1063 unsigned long flags; 1064 1065 spin_lock_irqsave(&sl->lock, flags); 1066 return flags; 1067 } 1068 1069 /** 1070 * read_seqlock_excl_irqsave() - start a non-interruptible seqlock_t 1071 * locking reader section 1072 * @lock: Pointer to seqlock_t 1073 * @flags: Stack-allocated storage for saving caller's local interrupt 1074 * state, to be passed to read_sequnlock_excl_irqrestore(). 1075 * 1076 * _irqsave variant of read_seqlock_excl(). Use this only if the seqlock_t 1077 * write side section, *or other read sections*, can be invoked from a 1078 * hardirq context. 1079 */ 1080 #define read_seqlock_excl_irqsave(lock, flags) \ 1081 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 1082 1083 /** 1084 * read_sequnlock_excl_irqrestore() - end non-interruptible seqlock_t 1085 * locking reader section 1086 * @sl: Pointer to seqlock_t 1087 * @flags: Caller saved interrupt state, from read_seqlock_excl_irqsave() 1088 */ 1089 static inline void 1090 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 1091 { 1092 spin_unlock_irqrestore(&sl->lock, flags); 1093 } 1094 1095 /** 1096 * read_seqbegin_or_lock() - begin a seqlock_t lockless or locking reader 1097 * @lock: Pointer to seqlock_t 1098 * @seq : Marker and return parameter. If the passed value is even, the 1099 * reader will become a *lockless* seqlock_t reader as in read_seqbegin(). 1100 * If the passed value is odd, the reader will become a *locking* reader 1101 * as in read_seqlock_excl(). In the first call to this function, the 1102 * caller *must* initialize and pass an even value to @seq; this way, a 1103 * lockless read can be optimistically tried first. 1104 * 1105 * read_seqbegin_or_lock is an API designed to optimistically try a normal 1106 * lockless seqlock_t read section first. If an odd counter is found, the 1107 * lockless read trial has failed, and the next read iteration transforms 1108 * itself into a full seqlock_t locking reader. 1109 * 1110 * This is typically used to avoid seqlock_t lockless readers starvation 1111 * (too much retry loops) in the case of a sharp spike in write side 1112 * activity. 1113 * 1114 * Context: if the seqlock_t write section, *or other read sections*, can 1115 * be invoked from hardirq or softirq contexts, use the _irqsave or _bh 1116 * variant of this function instead. 1117 * 1118 * Check Documentation/locking/seqlock.rst for template example code. 1119 * 1120 * Return: the encountered sequence counter value, through the @seq 1121 * parameter, which is overloaded as a return parameter. This returned 1122 * value must be checked with need_seqretry(). If the read section need to 1123 * be retried, this returned value must also be passed as the @seq 1124 * parameter of the next read_seqbegin_or_lock() iteration. 1125 */ 1126 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 1127 { 1128 if (!(*seq & 1)) /* Even */ 1129 *seq = read_seqbegin(lock); 1130 else /* Odd */ 1131 read_seqlock_excl(lock); 1132 } 1133 1134 /** 1135 * need_seqretry() - validate seqlock_t "locking or lockless" read section 1136 * @lock: Pointer to seqlock_t 1137 * @seq: sequence count, from read_seqbegin_or_lock() 1138 * 1139 * Return: true if a read section retry is required, false otherwise 1140 */ 1141 static inline int need_seqretry(seqlock_t *lock, int seq) 1142 { 1143 return !(seq & 1) && read_seqretry(lock, seq); 1144 } 1145 1146 /** 1147 * done_seqretry() - end seqlock_t "locking or lockless" reader section 1148 * @lock: Pointer to seqlock_t 1149 * @seq: count, from read_seqbegin_or_lock() 1150 * 1151 * done_seqretry finishes the seqlock_t read side critical section started 1152 * with read_seqbegin_or_lock() and validated by need_seqretry(). 1153 */ 1154 static inline void done_seqretry(seqlock_t *lock, int seq) 1155 { 1156 if (seq & 1) 1157 read_sequnlock_excl(lock); 1158 } 1159 1160 /** 1161 * read_seqbegin_or_lock_irqsave() - begin a seqlock_t lockless reader, or 1162 * a non-interruptible locking reader 1163 * @lock: Pointer to seqlock_t 1164 * @seq: Marker and return parameter. Check read_seqbegin_or_lock(). 1165 * 1166 * This is the _irqsave variant of read_seqbegin_or_lock(). Use it only if 1167 * the seqlock_t write section, *or other read sections*, can be invoked 1168 * from hardirq context. 1169 * 1170 * Note: Interrupts will be disabled only for "locking reader" mode. 1171 * 1172 * Return: 1173 * 1174 * 1. The saved local interrupts state in case of a locking reader, to 1175 * be passed to done_seqretry_irqrestore(). 1176 * 1177 * 2. The encountered sequence counter value, returned through @seq 1178 * overloaded as a return parameter. Check read_seqbegin_or_lock(). 1179 */ 1180 static inline unsigned long 1181 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 1182 { 1183 unsigned long flags = 0; 1184 1185 if (!(*seq & 1)) /* Even */ 1186 *seq = read_seqbegin(lock); 1187 else /* Odd */ 1188 read_seqlock_excl_irqsave(lock, flags); 1189 1190 return flags; 1191 } 1192 1193 /** 1194 * done_seqretry_irqrestore() - end a seqlock_t lockless reader, or a 1195 * non-interruptible locking reader section 1196 * @lock: Pointer to seqlock_t 1197 * @seq: Count, from read_seqbegin_or_lock_irqsave() 1198 * @flags: Caller's saved local interrupt state in case of a locking 1199 * reader, also from read_seqbegin_or_lock_irqsave() 1200 * 1201 * This is the _irqrestore variant of done_seqretry(). The read section 1202 * must've been opened with read_seqbegin_or_lock_irqsave(), and validated 1203 * by need_seqretry(). 1204 */ 1205 static inline void 1206 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 1207 { 1208 if (seq & 1) 1209 read_sequnlock_excl_irqrestore(lock, flags); 1210 } 1211 #endif /* __LINUX_SEQLOCK_H */ 1212