1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef __LINUX_SEQLOCK_H 3 #define __LINUX_SEQLOCK_H 4 5 /* 6 * seqcount_t / seqlock_t - a reader-writer consistency mechanism with 7 * lockless readers (read-only retry loops), and no writer starvation. 8 * 9 * See Documentation/locking/seqlock.rst 10 * 11 * Copyrights: 12 * - Based on x86_64 vsyscall gettimeofday: Keith Owens, Andrea Arcangeli 13 */ 14 15 #include <linux/spinlock.h> 16 #include <linux/preempt.h> 17 #include <linux/lockdep.h> 18 #include <linux/compiler.h> 19 #include <linux/kcsan-checks.h> 20 #include <asm/processor.h> 21 22 /* 23 * The seqlock seqcount_t interface does not prescribe a precise sequence of 24 * read begin/retry/end. For readers, typically there is a call to 25 * read_seqcount_begin() and read_seqcount_retry(), however, there are more 26 * esoteric cases which do not follow this pattern. 27 * 28 * As a consequence, we take the following best-effort approach for raw usage 29 * via seqcount_t under KCSAN: upon beginning a seq-reader critical section, 30 * pessimistically mark the next KCSAN_SEQLOCK_REGION_MAX memory accesses as 31 * atomics; if there is a matching read_seqcount_retry() call, no following 32 * memory operations are considered atomic. Usage of the seqlock_t interface 33 * is not affected. 34 */ 35 #define KCSAN_SEQLOCK_REGION_MAX 1000 36 37 /* 38 * Sequence counters (seqcount_t) 39 * 40 * This is the raw counting mechanism, without any writer protection. 41 * 42 * Write side critical sections must be serialized and non-preemptible. 43 * 44 * If readers can be invoked from hardirq or softirq contexts, 45 * interrupts or bottom halves must also be respectively disabled before 46 * entering the write section. 47 * 48 * This mechanism can't be used if the protected data contains pointers, 49 * as the writer can invalidate a pointer that a reader is following. 50 * 51 * If it's desired to automatically handle the sequence counter writer 52 * serialization and non-preemptibility requirements, use a sequential 53 * lock (seqlock_t) instead. 54 * 55 * See Documentation/locking/seqlock.rst 56 */ 57 typedef struct seqcount { 58 unsigned sequence; 59 #ifdef CONFIG_DEBUG_LOCK_ALLOC 60 struct lockdep_map dep_map; 61 #endif 62 } seqcount_t; 63 64 static inline void __seqcount_init(seqcount_t *s, const char *name, 65 struct lock_class_key *key) 66 { 67 /* 68 * Make sure we are not reinitializing a held lock: 69 */ 70 lockdep_init_map(&s->dep_map, name, key, 0); 71 s->sequence = 0; 72 } 73 74 #ifdef CONFIG_DEBUG_LOCK_ALLOC 75 # define SEQCOUNT_DEP_MAP_INIT(lockname) \ 76 .dep_map = { .name = #lockname } \ 77 78 # define seqcount_init(s) \ 79 do { \ 80 static struct lock_class_key __key; \ 81 __seqcount_init((s), #s, &__key); \ 82 } while (0) 83 84 static inline void seqcount_lockdep_reader_access(const seqcount_t *s) 85 { 86 seqcount_t *l = (seqcount_t *)s; 87 unsigned long flags; 88 89 local_irq_save(flags); 90 seqcount_acquire_read(&l->dep_map, 0, 0, _RET_IP_); 91 seqcount_release(&l->dep_map, _RET_IP_); 92 local_irq_restore(flags); 93 } 94 95 #else 96 # define SEQCOUNT_DEP_MAP_INIT(lockname) 97 # define seqcount_init(s) __seqcount_init(s, NULL, NULL) 98 # define seqcount_lockdep_reader_access(x) 99 #endif 100 101 #define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} 102 103 104 /** 105 * __read_seqcount_begin - begin a seq-read critical section (without barrier) 106 * @s: pointer to seqcount_t 107 * Returns: count to be passed to read_seqcount_retry 108 * 109 * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() 110 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 111 * provided before actually loading any of the variables that are to be 112 * protected in this critical section. 113 * 114 * Use carefully, only in critical code, and comment how the barrier is 115 * provided. 116 */ 117 static inline unsigned __read_seqcount_begin(const seqcount_t *s) 118 { 119 unsigned ret; 120 121 repeat: 122 ret = READ_ONCE(s->sequence); 123 if (unlikely(ret & 1)) { 124 cpu_relax(); 125 goto repeat; 126 } 127 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 128 return ret; 129 } 130 131 /** 132 * raw_read_seqcount - Read the raw seqcount 133 * @s: pointer to seqcount_t 134 * Returns: count to be passed to read_seqcount_retry 135 * 136 * raw_read_seqcount opens a read critical section of the given 137 * seqcount without any lockdep checking and without checking or 138 * masking the LSB. Calling code is responsible for handling that. 139 */ 140 static inline unsigned raw_read_seqcount(const seqcount_t *s) 141 { 142 unsigned ret = READ_ONCE(s->sequence); 143 smp_rmb(); 144 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 145 return ret; 146 } 147 148 /** 149 * raw_read_seqcount_begin - start seq-read critical section w/o lockdep 150 * @s: pointer to seqcount_t 151 * Returns: count to be passed to read_seqcount_retry 152 * 153 * raw_read_seqcount_begin opens a read critical section of the given 154 * seqcount, but without any lockdep checking. Validity of the critical 155 * section is tested by checking read_seqcount_retry function. 156 */ 157 static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) 158 { 159 unsigned ret = __read_seqcount_begin(s); 160 smp_rmb(); 161 return ret; 162 } 163 164 /** 165 * read_seqcount_begin - begin a seq-read critical section 166 * @s: pointer to seqcount_t 167 * Returns: count to be passed to read_seqcount_retry 168 * 169 * read_seqcount_begin opens a read critical section of the given seqcount. 170 * Validity of the critical section is tested by checking read_seqcount_retry 171 * function. 172 */ 173 static inline unsigned read_seqcount_begin(const seqcount_t *s) 174 { 175 seqcount_lockdep_reader_access(s); 176 return raw_read_seqcount_begin(s); 177 } 178 179 /** 180 * raw_seqcount_begin - begin a seq-read critical section 181 * @s: pointer to seqcount_t 182 * Returns: count to be passed to read_seqcount_retry 183 * 184 * raw_seqcount_begin opens a read critical section of the given seqcount. 185 * Validity of the critical section is tested by checking read_seqcount_retry 186 * function. 187 * 188 * Unlike read_seqcount_begin(), this function will not wait for the count 189 * to stabilize. If a writer is active when we begin, we will fail the 190 * read_seqcount_retry() instead of stabilizing at the beginning of the 191 * critical section. 192 */ 193 static inline unsigned raw_seqcount_begin(const seqcount_t *s) 194 { 195 unsigned ret = READ_ONCE(s->sequence); 196 smp_rmb(); 197 kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); 198 return ret & ~1; 199 } 200 201 /** 202 * __read_seqcount_retry - end a seq-read critical section (without barrier) 203 * @s: pointer to seqcount_t 204 * @start: count, from read_seqcount_begin 205 * Returns: 1 if retry is required, else 0 206 * 207 * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() 208 * barrier. Callers should ensure that smp_rmb() or equivalent ordering is 209 * provided before actually loading any of the variables that are to be 210 * protected in this critical section. 211 * 212 * Use carefully, only in critical code, and comment how the barrier is 213 * provided. 214 */ 215 static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) 216 { 217 kcsan_atomic_next(0); 218 return unlikely(READ_ONCE(s->sequence) != start); 219 } 220 221 /** 222 * read_seqcount_retry - end a seq-read critical section 223 * @s: pointer to seqcount_t 224 * @start: count, from read_seqcount_begin 225 * Returns: 1 if retry is required, else 0 226 * 227 * read_seqcount_retry closes a read critical section of the given seqcount. 228 * If the critical section was invalid, it must be ignored (and typically 229 * retried). 230 */ 231 static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) 232 { 233 smp_rmb(); 234 return __read_seqcount_retry(s, start); 235 } 236 237 238 239 static inline void raw_write_seqcount_begin(seqcount_t *s) 240 { 241 kcsan_nestable_atomic_begin(); 242 s->sequence++; 243 smp_wmb(); 244 } 245 246 static inline void raw_write_seqcount_end(seqcount_t *s) 247 { 248 smp_wmb(); 249 s->sequence++; 250 kcsan_nestable_atomic_end(); 251 } 252 253 /** 254 * raw_write_seqcount_barrier - do a seq write barrier 255 * @s: pointer to seqcount_t 256 * 257 * This can be used to provide an ordering guarantee instead of the 258 * usual consistency guarantee. It is one wmb cheaper, because we can 259 * collapse the two back-to-back wmb()s. 260 * 261 * Note that writes surrounding the barrier should be declared atomic (e.g. 262 * via WRITE_ONCE): a) to ensure the writes become visible to other threads 263 * atomically, avoiding compiler optimizations; b) to document which writes are 264 * meant to propagate to the reader critical section. This is necessary because 265 * neither writes before and after the barrier are enclosed in a seq-writer 266 * critical section that would ensure readers are aware of ongoing writes. 267 * 268 * seqcount_t seq; 269 * bool X = true, Y = false; 270 * 271 * void read(void) 272 * { 273 * bool x, y; 274 * 275 * do { 276 * int s = read_seqcount_begin(&seq); 277 * 278 * x = X; y = Y; 279 * 280 * } while (read_seqcount_retry(&seq, s)); 281 * 282 * BUG_ON(!x && !y); 283 * } 284 * 285 * void write(void) 286 * { 287 * WRITE_ONCE(Y, true); 288 * 289 * raw_write_seqcount_barrier(seq); 290 * 291 * WRITE_ONCE(X, false); 292 * } 293 */ 294 static inline void raw_write_seqcount_barrier(seqcount_t *s) 295 { 296 kcsan_nestable_atomic_begin(); 297 s->sequence++; 298 smp_wmb(); 299 s->sequence++; 300 kcsan_nestable_atomic_end(); 301 } 302 303 static inline int raw_read_seqcount_latch(seqcount_t *s) 304 { 305 /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ 306 int seq = READ_ONCE(s->sequence); /* ^^^ */ 307 return seq; 308 } 309 310 /** 311 * raw_write_seqcount_latch - redirect readers to even/odd copy 312 * @s: pointer to seqcount_t 313 * 314 * The latch technique is a multiversion concurrency control method that allows 315 * queries during non-atomic modifications. If you can guarantee queries never 316 * interrupt the modification -- e.g. the concurrency is strictly between CPUs 317 * -- you most likely do not need this. 318 * 319 * Where the traditional RCU/lockless data structures rely on atomic 320 * modifications to ensure queries observe either the old or the new state the 321 * latch allows the same for non-atomic updates. The trade-off is doubling the 322 * cost of storage; we have to maintain two copies of the entire data 323 * structure. 324 * 325 * Very simply put: we first modify one copy and then the other. This ensures 326 * there is always one copy in a stable state, ready to give us an answer. 327 * 328 * The basic form is a data structure like: 329 * 330 * struct latch_struct { 331 * seqcount_t seq; 332 * struct data_struct data[2]; 333 * }; 334 * 335 * Where a modification, which is assumed to be externally serialized, does the 336 * following: 337 * 338 * void latch_modify(struct latch_struct *latch, ...) 339 * { 340 * smp_wmb(); <- Ensure that the last data[1] update is visible 341 * latch->seq++; 342 * smp_wmb(); <- Ensure that the seqcount update is visible 343 * 344 * modify(latch->data[0], ...); 345 * 346 * smp_wmb(); <- Ensure that the data[0] update is visible 347 * latch->seq++; 348 * smp_wmb(); <- Ensure that the seqcount update is visible 349 * 350 * modify(latch->data[1], ...); 351 * } 352 * 353 * The query will have a form like: 354 * 355 * struct entry *latch_query(struct latch_struct *latch, ...) 356 * { 357 * struct entry *entry; 358 * unsigned seq, idx; 359 * 360 * do { 361 * seq = raw_read_seqcount_latch(&latch->seq); 362 * 363 * idx = seq & 0x01; 364 * entry = data_query(latch->data[idx], ...); 365 * 366 * smp_rmb(); 367 * } while (seq != latch->seq); 368 * 369 * return entry; 370 * } 371 * 372 * So during the modification, queries are first redirected to data[1]. Then we 373 * modify data[0]. When that is complete, we redirect queries back to data[0] 374 * and we can modify data[1]. 375 * 376 * NOTE: The non-requirement for atomic modifications does _NOT_ include 377 * the publishing of new entries in the case where data is a dynamic 378 * data structure. 379 * 380 * An iteration might start in data[0] and get suspended long enough 381 * to miss an entire modification sequence, once it resumes it might 382 * observe the new entry. 383 * 384 * NOTE: When data is a dynamic data structure; one should use regular RCU 385 * patterns to manage the lifetimes of the objects within. 386 */ 387 static inline void raw_write_seqcount_latch(seqcount_t *s) 388 { 389 smp_wmb(); /* prior stores before incrementing "sequence" */ 390 s->sequence++; 391 smp_wmb(); /* increment "sequence" before following stores */ 392 } 393 394 static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) 395 { 396 raw_write_seqcount_begin(s); 397 seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_); 398 } 399 400 static inline void write_seqcount_begin(seqcount_t *s) 401 { 402 write_seqcount_begin_nested(s, 0); 403 } 404 405 static inline void write_seqcount_end(seqcount_t *s) 406 { 407 seqcount_release(&s->dep_map, _RET_IP_); 408 raw_write_seqcount_end(s); 409 } 410 411 /** 412 * write_seqcount_invalidate - invalidate in-progress read-side seq operations 413 * @s: pointer to seqcount_t 414 * 415 * After write_seqcount_invalidate, no read-side seq operations will complete 416 * successfully and see data older than this. 417 */ 418 static inline void write_seqcount_invalidate(seqcount_t *s) 419 { 420 smp_wmb(); 421 kcsan_nestable_atomic_begin(); 422 s->sequence+=2; 423 kcsan_nestable_atomic_end(); 424 } 425 426 /* 427 * Sequential locks (seqlock_t) 428 * 429 * Sequence counters with an embedded spinlock for writer serialization 430 * and non-preemptibility. 431 * 432 * For more info, see: 433 * - Comments on top of seqcount_t 434 * - Documentation/locking/seqlock.rst 435 */ 436 typedef struct { 437 struct seqcount seqcount; 438 spinlock_t lock; 439 } seqlock_t; 440 441 #define __SEQLOCK_UNLOCKED(lockname) \ 442 { \ 443 .seqcount = SEQCNT_ZERO(lockname), \ 444 .lock = __SPIN_LOCK_UNLOCKED(lockname) \ 445 } 446 447 #define seqlock_init(x) \ 448 do { \ 449 seqcount_init(&(x)->seqcount); \ 450 spin_lock_init(&(x)->lock); \ 451 } while (0) 452 453 #define DEFINE_SEQLOCK(x) \ 454 seqlock_t x = __SEQLOCK_UNLOCKED(x) 455 456 /* 457 * Read side functions for starting and finalizing a read side section. 458 */ 459 static inline unsigned read_seqbegin(const seqlock_t *sl) 460 { 461 unsigned ret = read_seqcount_begin(&sl->seqcount); 462 463 kcsan_atomic_next(0); /* non-raw usage, assume closing read_seqretry() */ 464 kcsan_flat_atomic_begin(); 465 return ret; 466 } 467 468 static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) 469 { 470 /* 471 * Assume not nested: read_seqretry() may be called multiple times when 472 * completing read critical section. 473 */ 474 kcsan_flat_atomic_end(); 475 476 return read_seqcount_retry(&sl->seqcount, start); 477 } 478 479 /* 480 * Lock out other writers and update the count. 481 * Acts like a normal spin_lock/unlock. 482 * Don't need preempt_disable() because that is in the spin_lock already. 483 */ 484 static inline void write_seqlock(seqlock_t *sl) 485 { 486 spin_lock(&sl->lock); 487 write_seqcount_begin(&sl->seqcount); 488 } 489 490 static inline void write_sequnlock(seqlock_t *sl) 491 { 492 write_seqcount_end(&sl->seqcount); 493 spin_unlock(&sl->lock); 494 } 495 496 static inline void write_seqlock_bh(seqlock_t *sl) 497 { 498 spin_lock_bh(&sl->lock); 499 write_seqcount_begin(&sl->seqcount); 500 } 501 502 static inline void write_sequnlock_bh(seqlock_t *sl) 503 { 504 write_seqcount_end(&sl->seqcount); 505 spin_unlock_bh(&sl->lock); 506 } 507 508 static inline void write_seqlock_irq(seqlock_t *sl) 509 { 510 spin_lock_irq(&sl->lock); 511 write_seqcount_begin(&sl->seqcount); 512 } 513 514 static inline void write_sequnlock_irq(seqlock_t *sl) 515 { 516 write_seqcount_end(&sl->seqcount); 517 spin_unlock_irq(&sl->lock); 518 } 519 520 static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) 521 { 522 unsigned long flags; 523 524 spin_lock_irqsave(&sl->lock, flags); 525 write_seqcount_begin(&sl->seqcount); 526 return flags; 527 } 528 529 #define write_seqlock_irqsave(lock, flags) \ 530 do { flags = __write_seqlock_irqsave(lock); } while (0) 531 532 static inline void 533 write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) 534 { 535 write_seqcount_end(&sl->seqcount); 536 spin_unlock_irqrestore(&sl->lock, flags); 537 } 538 539 /* 540 * A locking reader exclusively locks out other writers and locking readers, 541 * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. 542 * Don't need preempt_disable() because that is in the spin_lock already. 543 */ 544 static inline void read_seqlock_excl(seqlock_t *sl) 545 { 546 spin_lock(&sl->lock); 547 } 548 549 static inline void read_sequnlock_excl(seqlock_t *sl) 550 { 551 spin_unlock(&sl->lock); 552 } 553 554 /** 555 * read_seqbegin_or_lock - begin a sequence number check or locking block 556 * @lock: sequence lock 557 * @seq : sequence number to be checked 558 * 559 * First try it once optimistically without taking the lock. If that fails, 560 * take the lock. The sequence number is also used as a marker for deciding 561 * whether to be a reader (even) or writer (odd). 562 * N.B. seq must be initialized to an even number to begin with. 563 */ 564 static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) 565 { 566 if (!(*seq & 1)) /* Even */ 567 *seq = read_seqbegin(lock); 568 else /* Odd */ 569 read_seqlock_excl(lock); 570 } 571 572 static inline int need_seqretry(seqlock_t *lock, int seq) 573 { 574 return !(seq & 1) && read_seqretry(lock, seq); 575 } 576 577 static inline void done_seqretry(seqlock_t *lock, int seq) 578 { 579 if (seq & 1) 580 read_sequnlock_excl(lock); 581 } 582 583 static inline void read_seqlock_excl_bh(seqlock_t *sl) 584 { 585 spin_lock_bh(&sl->lock); 586 } 587 588 static inline void read_sequnlock_excl_bh(seqlock_t *sl) 589 { 590 spin_unlock_bh(&sl->lock); 591 } 592 593 static inline void read_seqlock_excl_irq(seqlock_t *sl) 594 { 595 spin_lock_irq(&sl->lock); 596 } 597 598 static inline void read_sequnlock_excl_irq(seqlock_t *sl) 599 { 600 spin_unlock_irq(&sl->lock); 601 } 602 603 static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) 604 { 605 unsigned long flags; 606 607 spin_lock_irqsave(&sl->lock, flags); 608 return flags; 609 } 610 611 #define read_seqlock_excl_irqsave(lock, flags) \ 612 do { flags = __read_seqlock_excl_irqsave(lock); } while (0) 613 614 static inline void 615 read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) 616 { 617 spin_unlock_irqrestore(&sl->lock, flags); 618 } 619 620 static inline unsigned long 621 read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) 622 { 623 unsigned long flags = 0; 624 625 if (!(*seq & 1)) /* Even */ 626 *seq = read_seqbegin(lock); 627 else /* Odd */ 628 read_seqlock_excl_irqsave(lock, flags); 629 630 return flags; 631 } 632 633 static inline void 634 done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) 635 { 636 if (seq & 1) 637 read_sequnlock_excl_irqrestore(lock, flags); 638 } 639 #endif /* __LINUX_SEQLOCK_H */ 640