xref: /linux-6.15/kernel/locking/rwsem.c (revision 894d1b3d)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2ed428bfcSPeter Zijlstra /* kernel/rwsem.c: R/W semaphores, public implementation
3ed428bfcSPeter Zijlstra  *
4ed428bfcSPeter Zijlstra  * Written by David Howells ([email protected]).
5ed428bfcSPeter Zijlstra  * Derived from asm-i386/semaphore.h
65dec94d4SWaiman Long  *
75dec94d4SWaiman Long  * Writer lock-stealing by Alex Shi <[email protected]>
85dec94d4SWaiman Long  * and Michel Lespinasse <[email protected]>
95dec94d4SWaiman Long  *
105dec94d4SWaiman Long  * Optimistic spinning by Tim Chen <[email protected]>
115dec94d4SWaiman Long  * and Davidlohr Bueso <[email protected]>. Based on mutexes.
125dec94d4SWaiman Long  *
134f23dbc1SWaiman Long  * Rwsem count bit fields re-definition and rwsem rearchitecture by
144f23dbc1SWaiman Long  * Waiman Long <[email protected]> and
154f23dbc1SWaiman Long  * Peter Zijlstra <[email protected]>.
16ed428bfcSPeter Zijlstra  */
17ed428bfcSPeter Zijlstra 
18ed428bfcSPeter Zijlstra #include <linux/types.h>
19ed428bfcSPeter Zijlstra #include <linux/kernel.h>
20ed428bfcSPeter Zijlstra #include <linux/sched.h>
215dec94d4SWaiman Long #include <linux/sched/rt.h>
225dec94d4SWaiman Long #include <linux/sched/task.h>
23b17b0153SIngo Molnar #include <linux/sched/debug.h>
245dec94d4SWaiman Long #include <linux/sched/wake_q.h>
255dec94d4SWaiman Long #include <linux/sched/signal.h>
267d43f1ceSWaiman Long #include <linux/sched/clock.h>
27ed428bfcSPeter Zijlstra #include <linux/export.h>
28ed428bfcSPeter Zijlstra #include <linux/rwsem.h>
29ed428bfcSPeter Zijlstra #include <linux/atomic.h>
30ee042be1SNamhyung Kim #include <trace/events/lock.h>
31ed428bfcSPeter Zijlstra 
3242254105SThomas Gleixner #ifndef CONFIG_PREEMPT_RT
335dec94d4SWaiman Long #include "lock_events.h"
345dec94d4SWaiman Long 
355dec94d4SWaiman Long /*
36617f3ef9SWaiman Long  * The least significant 2 bits of the owner value has the following
375dec94d4SWaiman Long  * meanings when set.
38d566c786SWaiman Long  *  - Bit 0: RWSEM_READER_OWNED - rwsem may be owned by readers (just a hint)
39617f3ef9SWaiman Long  *  - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
405dec94d4SWaiman Long  *
41617f3ef9SWaiman Long  * When the rwsem is reader-owned and a spinning writer has timed out,
42617f3ef9SWaiman Long  * the nonspinnable bit will be set to disable optimistic spinning.
437d43f1ceSWaiman Long 
445dec94d4SWaiman Long  * When a writer acquires a rwsem, it puts its task_struct pointer
455dec94d4SWaiman Long  * into the owner field. It is cleared after an unlock.
465dec94d4SWaiman Long  *
475dec94d4SWaiman Long  * When a reader acquires a rwsem, it will also puts its task_struct
487d43f1ceSWaiman Long  * pointer into the owner field with the RWSEM_READER_OWNED bit set.
497d43f1ceSWaiman Long  * On unlock, the owner field will largely be left untouched. So
507d43f1ceSWaiman Long  * for a free or reader-owned rwsem, the owner value may contain
517d43f1ceSWaiman Long  * information about the last reader that acquires the rwsem.
525dec94d4SWaiman Long  *
535dec94d4SWaiman Long  * That information may be helpful in debugging cases where the system
545dec94d4SWaiman Long  * seems to hang on a reader owned rwsem especially if only one reader
555dec94d4SWaiman Long  * is involved. Ideally we would like to track all the readers that own
565dec94d4SWaiman Long  * a rwsem, but the overhead is simply too big.
575cfd92e1SWaiman Long  *
58617f3ef9SWaiman Long  * A fast path reader optimistic lock stealing is supported when the rwsem
59617f3ef9SWaiman Long  * is previously owned by a writer and the following conditions are met:
60617f3ef9SWaiman Long  *  - rwsem is not currently writer owned
61617f3ef9SWaiman Long  *  - the handoff isn't set.
625dec94d4SWaiman Long  */
635dec94d4SWaiman Long #define RWSEM_READER_OWNED	(1UL << 0)
64617f3ef9SWaiman Long #define RWSEM_NONSPINNABLE	(1UL << 1)
6502f1082bSWaiman Long #define RWSEM_OWNER_FLAGS_MASK	(RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
665dec94d4SWaiman Long 
675dec94d4SWaiman Long #ifdef CONFIG_DEBUG_RWSEMS
685dec94d4SWaiman Long # define DEBUG_RWSEMS_WARN_ON(c, sem)	do {			\
695dec94d4SWaiman Long 	if (!debug_locks_silent &&				\
70fce45cd4SDavidlohr Bueso 	    WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
715dec94d4SWaiman Long 		#c, atomic_long_read(&(sem)->count),		\
72fce45cd4SDavidlohr Bueso 		(unsigned long) sem->magic,			\
7394a9717bSWaiman Long 		atomic_long_read(&(sem)->owner), (long)current,	\
745dec94d4SWaiman Long 		list_empty(&(sem)->wait_list) ? "" : "not "))	\
755dec94d4SWaiman Long 			debug_locks_off();			\
765dec94d4SWaiman Long 	} while (0)
775dec94d4SWaiman Long #else
785dec94d4SWaiman Long # define DEBUG_RWSEMS_WARN_ON(c, sem)
795dec94d4SWaiman Long #endif
805dec94d4SWaiman Long 
815dec94d4SWaiman Long /*
82a15ea1a3SWaiman Long  * On 64-bit architectures, the bit definitions of the count are:
835dec94d4SWaiman Long  *
845dec94d4SWaiman Long  * Bit  0    - writer locked bit
855dec94d4SWaiman Long  * Bit  1    - waiters present bit
864f23dbc1SWaiman Long  * Bit  2    - lock handoff bit
874f23dbc1SWaiman Long  * Bits 3-7  - reserved
88a15ea1a3SWaiman Long  * Bits 8-62 - 55-bit reader count
89a15ea1a3SWaiman Long  * Bit  63   - read fail bit
90a15ea1a3SWaiman Long  *
91a15ea1a3SWaiman Long  * On 32-bit architectures, the bit definitions of the count are:
92a15ea1a3SWaiman Long  *
93a15ea1a3SWaiman Long  * Bit  0    - writer locked bit
94a15ea1a3SWaiman Long  * Bit  1    - waiters present bit
95a15ea1a3SWaiman Long  * Bit  2    - lock handoff bit
96a15ea1a3SWaiman Long  * Bits 3-7  - reserved
97a15ea1a3SWaiman Long  * Bits 8-30 - 23-bit reader count
98a15ea1a3SWaiman Long  * Bit  31   - read fail bit
99a15ea1a3SWaiman Long  *
100a15ea1a3SWaiman Long  * It is not likely that the most significant bit (read fail bit) will ever
101a15ea1a3SWaiman Long  * be set. This guard bit is still checked anyway in the down_read() fastpath
102a15ea1a3SWaiman Long  * just in case we need to use up more of the reader bits for other purpose
103a15ea1a3SWaiman Long  * in the future.
1045dec94d4SWaiman Long  *
1055dec94d4SWaiman Long  * atomic_long_fetch_add() is used to obtain reader lock, whereas
1065dec94d4SWaiman Long  * atomic_long_cmpxchg() will be used to obtain writer lock.
1074f23dbc1SWaiman Long  *
1084f23dbc1SWaiman Long  * There are three places where the lock handoff bit may be set or cleared.
109d257cc8cSWaiman Long  * 1) rwsem_mark_wake() for readers		-- set, clear
110d257cc8cSWaiman Long  * 2) rwsem_try_write_lock() for writers	-- set, clear
111d257cc8cSWaiman Long  * 3) rwsem_del_waiter()			-- clear
1124f23dbc1SWaiman Long  *
1134f23dbc1SWaiman Long  * For all the above cases, wait_lock will be held. A writer must also
1144f23dbc1SWaiman Long  * be the first one in the wait_list to be eligible for setting the handoff
1154f23dbc1SWaiman Long  * bit. So concurrent setting/clearing of handoff bit is not possible.
1165dec94d4SWaiman Long  */
1175dec94d4SWaiman Long #define RWSEM_WRITER_LOCKED	(1UL << 0)
1185dec94d4SWaiman Long #define RWSEM_FLAG_WAITERS	(1UL << 1)
1194f23dbc1SWaiman Long #define RWSEM_FLAG_HANDOFF	(1UL << 2)
120a15ea1a3SWaiman Long #define RWSEM_FLAG_READFAIL	(1UL << (BITS_PER_LONG - 1))
1214f23dbc1SWaiman Long 
1225dec94d4SWaiman Long #define RWSEM_READER_SHIFT	8
1235dec94d4SWaiman Long #define RWSEM_READER_BIAS	(1UL << RWSEM_READER_SHIFT)
1245dec94d4SWaiman Long #define RWSEM_READER_MASK	(~(RWSEM_READER_BIAS - 1))
1255dec94d4SWaiman Long #define RWSEM_WRITER_MASK	RWSEM_WRITER_LOCKED
1265dec94d4SWaiman Long #define RWSEM_LOCK_MASK		(RWSEM_WRITER_MASK|RWSEM_READER_MASK)
1274f23dbc1SWaiman Long #define RWSEM_READ_FAILED_MASK	(RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
128a15ea1a3SWaiman Long 				 RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
1295dec94d4SWaiman Long 
1305dec94d4SWaiman Long /*
1315dec94d4SWaiman Long  * All writes to owner are protected by WRITE_ONCE() to make sure that
1325dec94d4SWaiman Long  * store tearing can't happen as optimistic spinners may read and use
1335dec94d4SWaiman Long  * the owner value concurrently without lock. Read from owner, however,
1345dec94d4SWaiman Long  * may not need READ_ONCE() as long as the pointer value is only used
1355dec94d4SWaiman Long  * for comparison and isn't being dereferenced.
13648dfb5d2SGokul krishna Krishnakumar  *
13748dfb5d2SGokul krishna Krishnakumar  * Both rwsem_{set,clear}_owner() functions should be in the same
13848dfb5d2SGokul krishna Krishnakumar  * preempt disable section as the atomic op that changes sem->count.
1395dec94d4SWaiman Long  */
rwsem_set_owner(struct rw_semaphore * sem)1405dec94d4SWaiman Long static inline void rwsem_set_owner(struct rw_semaphore *sem)
1415dec94d4SWaiman Long {
14248dfb5d2SGokul krishna Krishnakumar 	lockdep_assert_preemption_disabled();
14394a9717bSWaiman Long 	atomic_long_set(&sem->owner, (long)current);
1445dec94d4SWaiman Long }
1455dec94d4SWaiman Long 
rwsem_clear_owner(struct rw_semaphore * sem)1465dec94d4SWaiman Long static inline void rwsem_clear_owner(struct rw_semaphore *sem)
1475dec94d4SWaiman Long {
14848dfb5d2SGokul krishna Krishnakumar 	lockdep_assert_preemption_disabled();
14994a9717bSWaiman Long 	atomic_long_set(&sem->owner, 0);
15094a9717bSWaiman Long }
15194a9717bSWaiman Long 
15294a9717bSWaiman Long /*
15394a9717bSWaiman Long  * Test the flags in the owner field.
15494a9717bSWaiman Long  */
rwsem_test_oflags(struct rw_semaphore * sem,long flags)15594a9717bSWaiman Long static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
15694a9717bSWaiman Long {
15794a9717bSWaiman Long 	return atomic_long_read(&sem->owner) & flags;
1585dec94d4SWaiman Long }
1595dec94d4SWaiman Long 
1605dec94d4SWaiman Long /*
1615dec94d4SWaiman Long  * The task_struct pointer of the last owning reader will be left in
1625dec94d4SWaiman Long  * the owner field.
1635dec94d4SWaiman Long  *
1645dec94d4SWaiman Long  * Note that the owner value just indicates the task has owned the rwsem
1655dec94d4SWaiman Long  * previously, it may not be the real owner or one of the real owners
1665dec94d4SWaiman Long  * anymore when that field is examined, so take it with a grain of salt.
1675cfd92e1SWaiman Long  *
1685cfd92e1SWaiman Long  * The reader non-spinnable bit is preserved.
1695dec94d4SWaiman Long  */
__rwsem_set_reader_owned(struct rw_semaphore * sem,struct task_struct * owner)1705dec94d4SWaiman Long static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
1715dec94d4SWaiman Long 					    struct task_struct *owner)
1725dec94d4SWaiman Long {
1735cfd92e1SWaiman Long 	unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
174617f3ef9SWaiman Long 		(atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
1755dec94d4SWaiman Long 
17694a9717bSWaiman Long 	atomic_long_set(&sem->owner, val);
1775dec94d4SWaiman Long }
1785dec94d4SWaiman Long 
rwsem_set_reader_owned(struct rw_semaphore * sem)1795dec94d4SWaiman Long static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
1805dec94d4SWaiman Long {
1815dec94d4SWaiman Long 	__rwsem_set_reader_owned(sem, current);
1825dec94d4SWaiman Long }
1835dec94d4SWaiman Long 
184d00b83d4SWaiman Long #ifdef CONFIG_DEBUG_RWSEMS
185d00b83d4SWaiman Long /*
186d00b83d4SWaiman Long  * Return just the real task structure pointer of the owner
187d00b83d4SWaiman Long  */
rwsem_owner(struct rw_semaphore * sem)188d00b83d4SWaiman Long static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
189d00b83d4SWaiman Long {
190d00b83d4SWaiman Long 	return (struct task_struct *)
191d00b83d4SWaiman Long 		(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
192d00b83d4SWaiman Long }
193d00b83d4SWaiman Long 
1945dec94d4SWaiman Long /*
19594a9717bSWaiman Long  * Return true if the rwsem is owned by a reader.
1965dec94d4SWaiman Long  */
is_rwsem_reader_owned(struct rw_semaphore * sem)19794a9717bSWaiman Long static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
1985dec94d4SWaiman Long {
19994a9717bSWaiman Long 	/*
20094a9717bSWaiman Long 	 * Check the count to see if it is write-locked.
20194a9717bSWaiman Long 	 */
20294a9717bSWaiman Long 	long count = atomic_long_read(&sem->count);
20394a9717bSWaiman Long 
20494a9717bSWaiman Long 	if (count & RWSEM_WRITER_MASK)
20594a9717bSWaiman Long 		return false;
20694a9717bSWaiman Long 	return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
2075dec94d4SWaiman Long }
2085dec94d4SWaiman Long 
2095dec94d4SWaiman Long /*
2105dec94d4SWaiman Long  * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
2115dec94d4SWaiman Long  * is a task pointer in owner of a reader-owned rwsem, it will be the
2125dec94d4SWaiman Long  * real owner or one of the real owners. The only exception is when the
2135dec94d4SWaiman Long  * unlock is done by up_read_non_owner().
2145dec94d4SWaiman Long  */
rwsem_clear_reader_owned(struct rw_semaphore * sem)2155dec94d4SWaiman Long static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
2165dec94d4SWaiman Long {
21794a9717bSWaiman Long 	unsigned long val = atomic_long_read(&sem->owner);
21894a9717bSWaiman Long 
21994a9717bSWaiman Long 	while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
22094a9717bSWaiman Long 		if (atomic_long_try_cmpxchg(&sem->owner, &val,
22194a9717bSWaiman Long 					    val & RWSEM_OWNER_FLAGS_MASK))
22294a9717bSWaiman Long 			return;
22394a9717bSWaiman Long 	}
2245dec94d4SWaiman Long }
2255dec94d4SWaiman Long #else
rwsem_clear_reader_owned(struct rw_semaphore * sem)2265dec94d4SWaiman Long static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
2275dec94d4SWaiman Long {
2285dec94d4SWaiman Long }
2295dec94d4SWaiman Long #endif
2305dec94d4SWaiman Long 
2315dec94d4SWaiman Long /*
2327d43f1ceSWaiman Long  * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
2337d43f1ceSWaiman Long  * remains set. Otherwise, the operation will be aborted.
2347d43f1ceSWaiman Long  */
rwsem_set_nonspinnable(struct rw_semaphore * sem)2357d43f1ceSWaiman Long static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
2367d43f1ceSWaiman Long {
2377d43f1ceSWaiman Long 	unsigned long owner = atomic_long_read(&sem->owner);
2387d43f1ceSWaiman Long 
2397d43f1ceSWaiman Long 	do {
2407d43f1ceSWaiman Long 		if (!(owner & RWSEM_READER_OWNED))
2417d43f1ceSWaiman Long 			break;
2427d43f1ceSWaiman Long 		if (owner & RWSEM_NONSPINNABLE)
2437d43f1ceSWaiman Long 			break;
2447d43f1ceSWaiman Long 	} while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
2457d43f1ceSWaiman Long 					  owner | RWSEM_NONSPINNABLE));
2467d43f1ceSWaiman Long }
2477d43f1ceSWaiman Long 
rwsem_read_trylock(struct rw_semaphore * sem,long * cntp)248c8fe8b05SWaiman Long static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
249a15ea1a3SWaiman Long {
250c8fe8b05SWaiman Long 	*cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
2513379116aSPeter Zijlstra 
252c8fe8b05SWaiman Long 	if (WARN_ON_ONCE(*cntp < 0))
253a15ea1a3SWaiman Long 		rwsem_set_nonspinnable(sem);
2543379116aSPeter Zijlstra 
255c8fe8b05SWaiman Long 	if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
2563379116aSPeter Zijlstra 		rwsem_set_reader_owned(sem);
2573379116aSPeter Zijlstra 		return true;
2583379116aSPeter Zijlstra 	}
2593379116aSPeter Zijlstra 
2603379116aSPeter Zijlstra 	return false;
261a15ea1a3SWaiman Long }
262a15ea1a3SWaiman Long 
rwsem_write_trylock(struct rw_semaphore * sem)263285c61aeSPeter Zijlstra static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
264285c61aeSPeter Zijlstra {
265285c61aeSPeter Zijlstra 	long tmp = RWSEM_UNLOCKED_VALUE;
266285c61aeSPeter Zijlstra 
267285c61aeSPeter Zijlstra 	if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
268285c61aeSPeter Zijlstra 		rwsem_set_owner(sem);
2691d61659cSWaiman Long 		return true;
270285c61aeSPeter Zijlstra 	}
271285c61aeSPeter Zijlstra 
2721d61659cSWaiman Long 	return false;
273285c61aeSPeter Zijlstra }
274285c61aeSPeter Zijlstra 
2757d43f1ceSWaiman Long /*
27694a9717bSWaiman Long  * Return the real task structure pointer of the owner and the embedded
27794a9717bSWaiman Long  * flags in the owner. pflags must be non-NULL.
27894a9717bSWaiman Long  */
27994a9717bSWaiman Long static inline struct task_struct *
rwsem_owner_flags(struct rw_semaphore * sem,unsigned long * pflags)28094a9717bSWaiman Long rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
28194a9717bSWaiman Long {
28294a9717bSWaiman Long 	unsigned long owner = atomic_long_read(&sem->owner);
28394a9717bSWaiman Long 
28494a9717bSWaiman Long 	*pflags = owner & RWSEM_OWNER_FLAGS_MASK;
28594a9717bSWaiman Long 	return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
28694a9717bSWaiman Long }
28794a9717bSWaiman Long 
28894a9717bSWaiman Long /*
2895dec94d4SWaiman Long  * Guide to the rw_semaphore's count field.
2905dec94d4SWaiman Long  *
2915dec94d4SWaiman Long  * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
2925dec94d4SWaiman Long  * by a writer.
2935dec94d4SWaiman Long  *
2945dec94d4SWaiman Long  * The lock is owned by readers when
2955dec94d4SWaiman Long  * (1) the RWSEM_WRITER_LOCKED isn't set in count,
2965dec94d4SWaiman Long  * (2) some of the reader bits are set in count, and
2975dec94d4SWaiman Long  * (3) the owner field has RWSEM_READ_OWNED bit set.
2985dec94d4SWaiman Long  *
2995dec94d4SWaiman Long  * Having some reader bits set is not enough to guarantee a readers owned
3005dec94d4SWaiman Long  * lock as the readers may be in the process of backing out from the count
3015dec94d4SWaiman Long  * and a writer has just released the lock. So another writer may steal
3025dec94d4SWaiman Long  * the lock immediately after that.
3035dec94d4SWaiman Long  */
3045dec94d4SWaiman Long 
3055dec94d4SWaiman Long /*
3065dec94d4SWaiman Long  * Initialize an rwsem:
3075dec94d4SWaiman Long  */
__init_rwsem(struct rw_semaphore * sem,const char * name,struct lock_class_key * key)3085dec94d4SWaiman Long void __init_rwsem(struct rw_semaphore *sem, const char *name,
3095dec94d4SWaiman Long 		  struct lock_class_key *key)
3105dec94d4SWaiman Long {
3115dec94d4SWaiman Long #ifdef CONFIG_DEBUG_LOCK_ALLOC
3125dec94d4SWaiman Long 	/*
3135dec94d4SWaiman Long 	 * Make sure we are not reinitializing a held semaphore:
3145dec94d4SWaiman Long 	 */
3155dec94d4SWaiman Long 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
316de8f5e4fSPeter Zijlstra 	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
3175dec94d4SWaiman Long #endif
318fce45cd4SDavidlohr Bueso #ifdef CONFIG_DEBUG_RWSEMS
319fce45cd4SDavidlohr Bueso 	sem->magic = sem;
320fce45cd4SDavidlohr Bueso #endif
3215dec94d4SWaiman Long 	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
3225dec94d4SWaiman Long 	raw_spin_lock_init(&sem->wait_lock);
3235dec94d4SWaiman Long 	INIT_LIST_HEAD(&sem->wait_list);
32494a9717bSWaiman Long 	atomic_long_set(&sem->owner, 0L);
3255dec94d4SWaiman Long #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
3265dec94d4SWaiman Long 	osq_lock_init(&sem->osq);
3275dec94d4SWaiman Long #endif
3285dec94d4SWaiman Long }
3295dec94d4SWaiman Long EXPORT_SYMBOL(__init_rwsem);
3305dec94d4SWaiman Long 
3315dec94d4SWaiman Long enum rwsem_waiter_type {
3325dec94d4SWaiman Long 	RWSEM_WAITING_FOR_WRITE,
3335dec94d4SWaiman Long 	RWSEM_WAITING_FOR_READ
3345dec94d4SWaiman Long };
3355dec94d4SWaiman Long 
3365dec94d4SWaiman Long struct rwsem_waiter {
3375dec94d4SWaiman Long 	struct list_head list;
3385dec94d4SWaiman Long 	struct task_struct *task;
3395dec94d4SWaiman Long 	enum rwsem_waiter_type type;
3404f23dbc1SWaiman Long 	unsigned long timeout;
341d257cc8cSWaiman Long 	bool handoff_set;
3425dec94d4SWaiman Long };
3434f23dbc1SWaiman Long #define rwsem_first_waiter(sem) \
3444f23dbc1SWaiman Long 	list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
3455dec94d4SWaiman Long 
3465dec94d4SWaiman Long enum rwsem_wake_type {
3475dec94d4SWaiman Long 	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */
3485dec94d4SWaiman Long 	RWSEM_WAKE_READERS,	/* Wake readers only */
3495dec94d4SWaiman Long 	RWSEM_WAKE_READ_OWNED	/* Waker thread holds the read lock */
3505dec94d4SWaiman Long };
3515dec94d4SWaiman Long 
3524f23dbc1SWaiman Long /*
3534f23dbc1SWaiman Long  * The typical HZ value is either 250 or 1000. So set the minimum waiting
3544f23dbc1SWaiman Long  * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
3554f23dbc1SWaiman Long  * queue before initiating the handoff protocol.
3564f23dbc1SWaiman Long  */
3574f23dbc1SWaiman Long #define RWSEM_WAIT_TIMEOUT	DIV_ROUND_UP(HZ, 250)
3584f23dbc1SWaiman Long 
3595dec94d4SWaiman Long /*
360d3681e26SWaiman Long  * Magic number to batch-wakeup waiting readers, even when writers are
361d3681e26SWaiman Long  * also present in the queue. This both limits the amount of work the
362d3681e26SWaiman Long  * waking thread must do and also prevents any potential counter overflow,
363d3681e26SWaiman Long  * however unlikely.
364d3681e26SWaiman Long  */
365d3681e26SWaiman Long #define MAX_READERS_WAKEUP	0x100
366d3681e26SWaiman Long 
367d257cc8cSWaiman Long static inline void
rwsem_add_waiter(struct rw_semaphore * sem,struct rwsem_waiter * waiter)368d257cc8cSWaiman Long rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
369d257cc8cSWaiman Long {
370d257cc8cSWaiman Long 	lockdep_assert_held(&sem->wait_lock);
371d257cc8cSWaiman Long 	list_add_tail(&waiter->list, &sem->wait_list);
372d257cc8cSWaiman Long 	/* caller will set RWSEM_FLAG_WAITERS */
373d257cc8cSWaiman Long }
374d257cc8cSWaiman Long 
375d257cc8cSWaiman Long /*
376d257cc8cSWaiman Long  * Remove a waiter from the wait_list and clear flags.
377d257cc8cSWaiman Long  *
378d257cc8cSWaiman Long  * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
379d257cc8cSWaiman Long  * this function. Modify with care.
3801ee32619SWaiman Long  *
3811ee32619SWaiman Long  * Return: true if wait_list isn't empty and false otherwise
382d257cc8cSWaiman Long  */
3831ee32619SWaiman Long static inline bool
rwsem_del_waiter(struct rw_semaphore * sem,struct rwsem_waiter * waiter)384d257cc8cSWaiman Long rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
385d257cc8cSWaiman Long {
386d257cc8cSWaiman Long 	lockdep_assert_held(&sem->wait_lock);
387d257cc8cSWaiman Long 	list_del(&waiter->list);
388d257cc8cSWaiman Long 	if (likely(!list_empty(&sem->wait_list)))
3891ee32619SWaiman Long 		return true;
390d257cc8cSWaiman Long 
391d257cc8cSWaiman Long 	atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
3921ee32619SWaiman Long 	return false;
393d257cc8cSWaiman Long }
394d257cc8cSWaiman Long 
395d3681e26SWaiman Long /*
3965dec94d4SWaiman Long  * handle the lock release when processes blocked on it that can now run
3975dec94d4SWaiman Long  * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
3985dec94d4SWaiman Long  *   have been set.
3995dec94d4SWaiman Long  * - there must be someone on the queue
4005dec94d4SWaiman Long  * - the wait_lock must be held by the caller
4015dec94d4SWaiman Long  * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
4025dec94d4SWaiman Long  *   to actually wakeup the blocked task(s) and drop the reference count,
4035dec94d4SWaiman Long  *   preferably when the wait_lock is released
4045dec94d4SWaiman Long  * - woken process blocks are discarded from the list after having task zeroed
4055dec94d4SWaiman Long  * - writers are only marked woken if downgrading is false
406d257cc8cSWaiman Long  *
407d257cc8cSWaiman Long  * Implies rwsem_del_waiter() for all woken readers.
4085dec94d4SWaiman Long  */
rwsem_mark_wake(struct rw_semaphore * sem,enum rwsem_wake_type wake_type,struct wake_q_head * wake_q)4096cef7ff6SWaiman Long static void rwsem_mark_wake(struct rw_semaphore *sem,
4105dec94d4SWaiman Long 			    enum rwsem_wake_type wake_type,
4115dec94d4SWaiman Long 			    struct wake_q_head *wake_q)
4125dec94d4SWaiman Long {
4135dec94d4SWaiman Long 	struct rwsem_waiter *waiter, *tmp;
4145dec94d4SWaiman Long 	long oldcount, woken = 0, adjustment = 0;
4155dec94d4SWaiman Long 	struct list_head wlist;
4165dec94d4SWaiman Long 
4174f23dbc1SWaiman Long 	lockdep_assert_held(&sem->wait_lock);
4184f23dbc1SWaiman Long 
4195dec94d4SWaiman Long 	/*
4205dec94d4SWaiman Long 	 * Take a peek at the queue head waiter such that we can determine
4215dec94d4SWaiman Long 	 * the wakeup(s) to perform.
4225dec94d4SWaiman Long 	 */
4234f23dbc1SWaiman Long 	waiter = rwsem_first_waiter(sem);
4245dec94d4SWaiman Long 
4255dec94d4SWaiman Long 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
4265dec94d4SWaiman Long 		if (wake_type == RWSEM_WAKE_ANY) {
4275dec94d4SWaiman Long 			/*
4285dec94d4SWaiman Long 			 * Mark writer at the front of the queue for wakeup.
4295dec94d4SWaiman Long 			 * Until the task is actually later awoken later by
4305dec94d4SWaiman Long 			 * the caller, other writers are able to steal it.
4315dec94d4SWaiman Long 			 * Readers, on the other hand, will block as they
4325dec94d4SWaiman Long 			 * will notice the queued writer.
4335dec94d4SWaiman Long 			 */
4345dec94d4SWaiman Long 			wake_q_add(wake_q, waiter->task);
4355dec94d4SWaiman Long 			lockevent_inc(rwsem_wake_writer);
4365dec94d4SWaiman Long 		}
4375dec94d4SWaiman Long 
4385dec94d4SWaiman Long 		return;
4395dec94d4SWaiman Long 	}
4405dec94d4SWaiman Long 
4415dec94d4SWaiman Long 	/*
442a15ea1a3SWaiman Long 	 * No reader wakeup if there are too many of them already.
443a15ea1a3SWaiman Long 	 */
444a15ea1a3SWaiman Long 	if (unlikely(atomic_long_read(&sem->count) < 0))
445a15ea1a3SWaiman Long 		return;
446a15ea1a3SWaiman Long 
447a15ea1a3SWaiman Long 	/*
4485dec94d4SWaiman Long 	 * Writers might steal the lock before we grant it to the next reader.
4495dec94d4SWaiman Long 	 * We prefer to do the first reader grant before counting readers
4505dec94d4SWaiman Long 	 * so we can bail out early if a writer stole the lock.
4515dec94d4SWaiman Long 	 */
4525dec94d4SWaiman Long 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
4535cfd92e1SWaiman Long 		struct task_struct *owner;
4545cfd92e1SWaiman Long 
4555dec94d4SWaiman Long 		adjustment = RWSEM_READER_BIAS;
4565dec94d4SWaiman Long 		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
4575dec94d4SWaiman Long 		if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
4584f23dbc1SWaiman Long 			/*
4594f23dbc1SWaiman Long 			 * When we've been waiting "too" long (for writers
4604f23dbc1SWaiman Long 			 * to give up the lock), request a HANDOFF to
4614f23dbc1SWaiman Long 			 * force the issue.
4624f23dbc1SWaiman Long 			 */
4636eebd5fbSWaiman Long 			if (time_after(jiffies, waiter->timeout)) {
4646eebd5fbSWaiman Long 				if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
4654f23dbc1SWaiman Long 					adjustment -= RWSEM_FLAG_HANDOFF;
4664f23dbc1SWaiman Long 					lockevent_inc(rwsem_rlock_handoff);
4674f23dbc1SWaiman Long 				}
4686eebd5fbSWaiman Long 				waiter->handoff_set = true;
4696eebd5fbSWaiman Long 			}
4704f23dbc1SWaiman Long 
4714f23dbc1SWaiman Long 			atomic_long_add(-adjustment, &sem->count);
4725dec94d4SWaiman Long 			return;
4735dec94d4SWaiman Long 		}
4745dec94d4SWaiman Long 		/*
4755dec94d4SWaiman Long 		 * Set it to reader-owned to give spinners an early
4765dec94d4SWaiman Long 		 * indication that readers now have the lock.
4775cfd92e1SWaiman Long 		 * The reader nonspinnable bit seen at slowpath entry of
4785cfd92e1SWaiman Long 		 * the reader is copied over.
4795dec94d4SWaiman Long 		 */
4805cfd92e1SWaiman Long 		owner = waiter->task;
4815cfd92e1SWaiman Long 		__rwsem_set_reader_owned(sem, owner);
4825dec94d4SWaiman Long 	}
4835dec94d4SWaiman Long 
4845dec94d4SWaiman Long 	/*
485d3681e26SWaiman Long 	 * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
486d3681e26SWaiman Long 	 * queue. We know that the woken will be at least 1 as we accounted
4875dec94d4SWaiman Long 	 * for above. Note we increment the 'active part' of the count by the
4885dec94d4SWaiman Long 	 * number of readers before waking any processes up.
4895dec94d4SWaiman Long 	 *
490d3681e26SWaiman Long 	 * This is an adaptation of the phase-fair R/W locks where at the
491d3681e26SWaiman Long 	 * reader phase (first waiter is a reader), all readers are eligible
492d3681e26SWaiman Long 	 * to acquire the lock at the same time irrespective of their order
493d3681e26SWaiman Long 	 * in the queue. The writers acquire the lock according to their
494d3681e26SWaiman Long 	 * order in the queue.
495d3681e26SWaiman Long 	 *
4965dec94d4SWaiman Long 	 * We have to do wakeup in 2 passes to prevent the possibility that
4975dec94d4SWaiman Long 	 * the reader count may be decremented before it is incremented. It
4985dec94d4SWaiman Long 	 * is because the to-be-woken waiter may not have slept yet. So it
4995dec94d4SWaiman Long 	 * may see waiter->task got cleared, finish its critical section and
5005dec94d4SWaiman Long 	 * do an unlock before the reader count increment.
5015dec94d4SWaiman Long 	 *
5025dec94d4SWaiman Long 	 * 1) Collect the read-waiters in a separate list, count them and
5035dec94d4SWaiman Long 	 *    fully increment the reader count in rwsem.
5045dec94d4SWaiman Long 	 * 2) For each waiters in the new list, clear waiter->task and
5055dec94d4SWaiman Long 	 *    put them into wake_q to be woken up later.
5065dec94d4SWaiman Long 	 */
507d3681e26SWaiman Long 	INIT_LIST_HEAD(&wlist);
508d3681e26SWaiman Long 	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
5095dec94d4SWaiman Long 		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
510d3681e26SWaiman Long 			continue;
5115dec94d4SWaiman Long 
5125dec94d4SWaiman Long 		woken++;
513d3681e26SWaiman Long 		list_move_tail(&waiter->list, &wlist);
514d3681e26SWaiman Long 
515d3681e26SWaiman Long 		/*
516d3681e26SWaiman Long 		 * Limit # of readers that can be woken up per wakeup call.
517d3681e26SWaiman Long 		 */
5185197fcd0SYanfei Xu 		if (unlikely(woken >= MAX_READERS_WAKEUP))
519d3681e26SWaiman Long 			break;
5205dec94d4SWaiman Long 	}
5215dec94d4SWaiman Long 
5225dec94d4SWaiman Long 	adjustment = woken * RWSEM_READER_BIAS - adjustment;
5235dec94d4SWaiman Long 	lockevent_cond_inc(rwsem_wake_reader, woken);
5245dec94d4SWaiman Long 
525d257cc8cSWaiman Long 	oldcount = atomic_long_read(&sem->count);
526d257cc8cSWaiman Long 	if (list_empty(&sem->wait_list)) {
5274f23dbc1SWaiman Long 		/*
528d257cc8cSWaiman Long 		 * Combined with list_move_tail() above, this implies
529d257cc8cSWaiman Long 		 * rwsem_del_waiter().
5304f23dbc1SWaiman Long 		 */
531d257cc8cSWaiman Long 		adjustment -= RWSEM_FLAG_WAITERS;
532d257cc8cSWaiman Long 		if (oldcount & RWSEM_FLAG_HANDOFF)
5334f23dbc1SWaiman Long 			adjustment -= RWSEM_FLAG_HANDOFF;
534d257cc8cSWaiman Long 	} else if (woken) {
535d257cc8cSWaiman Long 		/*
536d257cc8cSWaiman Long 		 * When we've woken a reader, we no longer need to force
537d257cc8cSWaiman Long 		 * writers to give up the lock and we can clear HANDOFF.
538d257cc8cSWaiman Long 		 */
539d257cc8cSWaiman Long 		if (oldcount & RWSEM_FLAG_HANDOFF)
540d257cc8cSWaiman Long 			adjustment -= RWSEM_FLAG_HANDOFF;
541d257cc8cSWaiman Long 	}
5424f23dbc1SWaiman Long 
5435dec94d4SWaiman Long 	if (adjustment)
5445dec94d4SWaiman Long 		atomic_long_add(adjustment, &sem->count);
5455dec94d4SWaiman Long 
5465dec94d4SWaiman Long 	/* 2nd pass */
5475dec94d4SWaiman Long 	list_for_each_entry_safe(waiter, tmp, &wlist, list) {
5485dec94d4SWaiman Long 		struct task_struct *tsk;
5495dec94d4SWaiman Long 
5505dec94d4SWaiman Long 		tsk = waiter->task;
5515dec94d4SWaiman Long 		get_task_struct(tsk);
5525dec94d4SWaiman Long 
5535dec94d4SWaiman Long 		/*
5545dec94d4SWaiman Long 		 * Ensure calling get_task_struct() before setting the reader
5556cef7ff6SWaiman Long 		 * waiter to nil such that rwsem_down_read_slowpath() cannot
5565dec94d4SWaiman Long 		 * race with do_exit() by always holding a reference count
5575dec94d4SWaiman Long 		 * to the task to wakeup.
5585dec94d4SWaiman Long 		 */
5595dec94d4SWaiman Long 		smp_store_release(&waiter->task, NULL);
5605dec94d4SWaiman Long 		/*
5615dec94d4SWaiman Long 		 * Ensure issuing the wakeup (either by us or someone else)
5625dec94d4SWaiman Long 		 * after setting the reader waiter to nil.
5635dec94d4SWaiman Long 		 */
5645dec94d4SWaiman Long 		wake_q_add_safe(wake_q, tsk);
5655dec94d4SWaiman Long 	}
5665dec94d4SWaiman Long }
5675dec94d4SWaiman Long 
5685dec94d4SWaiman Long /*
5691ee32619SWaiman Long  * Remove a waiter and try to wake up other waiters in the wait queue
5701ee32619SWaiman Long  * This function is called from the out_nolock path of both the reader and
5711ee32619SWaiman Long  * writer slowpaths with wait_lock held. It releases the wait_lock and
5721ee32619SWaiman Long  * optionally wake up waiters before it returns.
5731ee32619SWaiman Long  */
5741ee32619SWaiman Long static inline void
rwsem_del_wake_waiter(struct rw_semaphore * sem,struct rwsem_waiter * waiter,struct wake_q_head * wake_q)5751ee32619SWaiman Long rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
5761ee32619SWaiman Long 		      struct wake_q_head *wake_q)
5771ee32619SWaiman Long 		      __releases(&sem->wait_lock)
5781ee32619SWaiman Long {
5791ee32619SWaiman Long 	bool first = rwsem_first_waiter(sem) == waiter;
5801ee32619SWaiman Long 
5811ee32619SWaiman Long 	wake_q_init(wake_q);
5821ee32619SWaiman Long 
5831ee32619SWaiman Long 	/*
5841ee32619SWaiman Long 	 * If the wait_list isn't empty and the waiter to be deleted is
5851ee32619SWaiman Long 	 * the first waiter, we wake up the remaining waiters as they may
5861ee32619SWaiman Long 	 * be eligible to acquire or spin on the lock.
5871ee32619SWaiman Long 	 */
5881ee32619SWaiman Long 	if (rwsem_del_waiter(sem, waiter) && first)
5891ee32619SWaiman Long 		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
5901ee32619SWaiman Long 	raw_spin_unlock_irq(&sem->wait_lock);
5911ee32619SWaiman Long 	if (!wake_q_empty(wake_q))
5921ee32619SWaiman Long 		wake_up_q(wake_q);
5931ee32619SWaiman Long }
5941ee32619SWaiman Long 
5951ee32619SWaiman Long /*
5965dec94d4SWaiman Long  * This function must be called with the sem->wait_lock held to prevent
5975dec94d4SWaiman Long  * race conditions between checking the rwsem wait list and setting the
5985dec94d4SWaiman Long  * sem->count accordingly.
5994f23dbc1SWaiman Long  *
600d257cc8cSWaiman Long  * Implies rwsem_del_waiter() on success.
6015dec94d4SWaiman Long  */
rwsem_try_write_lock(struct rw_semaphore * sem,struct rwsem_waiter * waiter)60200f3c5a3SWaiman Long static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
603d257cc8cSWaiman Long 					struct rwsem_waiter *waiter)
6045dec94d4SWaiman Long {
6056eebd5fbSWaiman Long 	struct rwsem_waiter *first = rwsem_first_waiter(sem);
60600f3c5a3SWaiman Long 	long count, new;
6075dec94d4SWaiman Long 
6084f23dbc1SWaiman Long 	lockdep_assert_held(&sem->wait_lock);
6094f23dbc1SWaiman Long 
61000f3c5a3SWaiman Long 	count = atomic_long_read(&sem->count);
6114f23dbc1SWaiman Long 	do {
6124f23dbc1SWaiman Long 		bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
6134f23dbc1SWaiman Long 
614d257cc8cSWaiman Long 		if (has_handoff) {
6156eebd5fbSWaiman Long 			/*
6166eebd5fbSWaiman Long 			 * Honor handoff bit and yield only when the first
6176eebd5fbSWaiman Long 			 * waiter is the one that set it. Otherwisee, we
6186eebd5fbSWaiman Long 			 * still try to acquire the rwsem.
6196eebd5fbSWaiman Long 			 */
6206eebd5fbSWaiman Long 			if (first->handoff_set && (waiter != first))
6215dec94d4SWaiman Long 				return false;
622d257cc8cSWaiman Long 		}
623d257cc8cSWaiman Long 
6244f23dbc1SWaiman Long 		new = count;
6255dec94d4SWaiman Long 
6264f23dbc1SWaiman Long 		if (count & RWSEM_LOCK_MASK) {
627b613c7f3SWaiman Long 			/*
628b613c7f3SWaiman Long 			 * A waiter (first or not) can set the handoff bit
629b613c7f3SWaiman Long 			 * if it is an RT task or wait in the wait queue
630b613c7f3SWaiman Long 			 * for too long.
631b613c7f3SWaiman Long 			 */
632ae04f69dSQais Yousef 			if (has_handoff || (!rt_or_dl_task(waiter->task) &&
633d257cc8cSWaiman Long 					    !time_after(jiffies, waiter->timeout)))
6344f23dbc1SWaiman Long 				return false;
6354f23dbc1SWaiman Long 
6364f23dbc1SWaiman Long 			new |= RWSEM_FLAG_HANDOFF;
6374f23dbc1SWaiman Long 		} else {
6384f23dbc1SWaiman Long 			new |= RWSEM_WRITER_LOCKED;
6394f23dbc1SWaiman Long 			new &= ~RWSEM_FLAG_HANDOFF;
6404f23dbc1SWaiman Long 
6414f23dbc1SWaiman Long 			if (list_is_singular(&sem->wait_list))
6424f23dbc1SWaiman Long 				new &= ~RWSEM_FLAG_WAITERS;
6434f23dbc1SWaiman Long 		}
6444f23dbc1SWaiman Long 	} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
6454f23dbc1SWaiman Long 
6464f23dbc1SWaiman Long 	/*
647b613c7f3SWaiman Long 	 * We have either acquired the lock with handoff bit cleared or set
648b613c7f3SWaiman Long 	 * the handoff bit. Only the first waiter can have its handoff_set
649b613c7f3SWaiman Long 	 * set here to enable optimistic spinning in slowpath loop.
6504f23dbc1SWaiman Long 	 */
651d257cc8cSWaiman Long 	if (new & RWSEM_FLAG_HANDOFF) {
652b613c7f3SWaiman Long 		first->handoff_set = true;
653d257cc8cSWaiman Long 		lockevent_inc(rwsem_wlock_handoff);
6544f23dbc1SWaiman Long 		return false;
655d257cc8cSWaiman Long 	}
6564f23dbc1SWaiman Long 
657d257cc8cSWaiman Long 	/*
658d257cc8cSWaiman Long 	 * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
659d257cc8cSWaiman Long 	 * success.
660d257cc8cSWaiman Long 	 */
661d257cc8cSWaiman Long 	list_del(&waiter->list);
6625dec94d4SWaiman Long 	rwsem_set_owner(sem);
6635dec94d4SWaiman Long 	return true;
6645dec94d4SWaiman Long }
6655dec94d4SWaiman Long 
6667cdacc5fSYanfei Xu /*
6677cdacc5fSYanfei Xu  * The rwsem_spin_on_owner() function returns the following 4 values
6687cdacc5fSYanfei Xu  * depending on the lock owner state.
6697cdacc5fSYanfei Xu  *   OWNER_NULL  : owner is currently NULL
6707cdacc5fSYanfei Xu  *   OWNER_WRITER: when owner changes and is a writer
6717cdacc5fSYanfei Xu  *   OWNER_READER: when owner changes and the new owner may be a reader.
6727cdacc5fSYanfei Xu  *   OWNER_NONSPINNABLE:
6737cdacc5fSYanfei Xu  *		   when optimistic spinning has to stop because either the
6747cdacc5fSYanfei Xu  *		   owner stops running, is unknown, or its timeslice has
6757cdacc5fSYanfei Xu  *		   been used up.
6767cdacc5fSYanfei Xu  */
6777cdacc5fSYanfei Xu enum owner_state {
6787cdacc5fSYanfei Xu 	OWNER_NULL		= 1 << 0,
6797cdacc5fSYanfei Xu 	OWNER_WRITER		= 1 << 1,
6807cdacc5fSYanfei Xu 	OWNER_READER		= 1 << 2,
6817cdacc5fSYanfei Xu 	OWNER_NONSPINNABLE	= 1 << 3,
6827cdacc5fSYanfei Xu };
6837cdacc5fSYanfei Xu 
6845dec94d4SWaiman Long #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
6855dec94d4SWaiman Long /*
6865dec94d4SWaiman Long  * Try to acquire write lock before the writer has been put on wait queue.
6875dec94d4SWaiman Long  */
rwsem_try_write_lock_unqueued(struct rw_semaphore * sem)6885dec94d4SWaiman Long static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
6895dec94d4SWaiman Long {
6905dec94d4SWaiman Long 	long count = atomic_long_read(&sem->count);
6915dec94d4SWaiman Long 
6924f23dbc1SWaiman Long 	while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
6935dec94d4SWaiman Long 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
6944f23dbc1SWaiman Long 					count | RWSEM_WRITER_LOCKED)) {
6955dec94d4SWaiman Long 			rwsem_set_owner(sem);
696617f3ef9SWaiman Long 			lockevent_inc(rwsem_opt_lock);
6975dec94d4SWaiman Long 			return true;
6985dec94d4SWaiman Long 		}
6995dec94d4SWaiman Long 	}
7005dec94d4SWaiman Long 	return false;
7015dec94d4SWaiman Long }
7025dec94d4SWaiman Long 
rwsem_can_spin_on_owner(struct rw_semaphore * sem)703617f3ef9SWaiman Long static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
7045dec94d4SWaiman Long {
7055dec94d4SWaiman Long 	struct task_struct *owner;
70694a9717bSWaiman Long 	unsigned long flags;
7075dec94d4SWaiman Long 	bool ret = true;
7085dec94d4SWaiman Long 
709cf69482dSWaiman Long 	if (need_resched()) {
710cf69482dSWaiman Long 		lockevent_inc(rwsem_opt_fail);
7115dec94d4SWaiman Long 		return false;
712cf69482dSWaiman Long 	}
7135dec94d4SWaiman Long 
7146c2787f2SYanfei Xu 	/*
7156c2787f2SYanfei Xu 	 * Disable preemption is equal to the RCU read-side crital section,
7166c2787f2SYanfei Xu 	 * thus the task_strcut structure won't go away.
7176c2787f2SYanfei Xu 	 */
71894a9717bSWaiman Long 	owner = rwsem_owner_flags(sem, &flags);
71978134300SWaiman Long 	/*
72078134300SWaiman Long 	 * Don't check the read-owner as the entry may be stale.
72178134300SWaiman Long 	 */
722617f3ef9SWaiman Long 	if ((flags & RWSEM_NONSPINNABLE) ||
72378134300SWaiman Long 	    (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
72494a9717bSWaiman Long 		ret = false;
725cf69482dSWaiman Long 
726cf69482dSWaiman Long 	lockevent_cond_inc(rwsem_opt_fail, !ret);
7275dec94d4SWaiman Long 	return ret;
7285dec94d4SWaiman Long }
7295dec94d4SWaiman Long 
7307d43f1ceSWaiman Long #define OWNER_SPINNABLE		(OWNER_NULL | OWNER_WRITER | OWNER_READER)
7315dec94d4SWaiman Long 
73294a9717bSWaiman Long static inline enum owner_state
rwsem_owner_state(struct task_struct * owner,unsigned long flags)733617f3ef9SWaiman Long rwsem_owner_state(struct task_struct *owner, unsigned long flags)
7343f6d517aSWaiman Long {
735617f3ef9SWaiman Long 	if (flags & RWSEM_NONSPINNABLE)
7363f6d517aSWaiman Long 		return OWNER_NONSPINNABLE;
7373f6d517aSWaiman Long 
73894a9717bSWaiman Long 	if (flags & RWSEM_READER_OWNED)
7393f6d517aSWaiman Long 		return OWNER_READER;
7403f6d517aSWaiman Long 
74194a9717bSWaiman Long 	return owner ? OWNER_WRITER : OWNER_NULL;
7423f6d517aSWaiman Long }
7433f6d517aSWaiman Long 
7447d43f1ceSWaiman Long static noinline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore * sem)745617f3ef9SWaiman Long rwsem_spin_on_owner(struct rw_semaphore *sem)
7463f6d517aSWaiman Long {
74794a9717bSWaiman Long 	struct task_struct *new, *owner;
74894a9717bSWaiman Long 	unsigned long flags, new_flags;
74994a9717bSWaiman Long 	enum owner_state state;
7503f6d517aSWaiman Long 
7516c2787f2SYanfei Xu 	lockdep_assert_preemption_disabled();
7526c2787f2SYanfei Xu 
75394a9717bSWaiman Long 	owner = rwsem_owner_flags(sem, &flags);
754617f3ef9SWaiman Long 	state = rwsem_owner_state(owner, flags);
7553f6d517aSWaiman Long 	if (state != OWNER_WRITER)
7563f6d517aSWaiman Long 		return state;
7575dec94d4SWaiman Long 
7583f6d517aSWaiman Long 	for (;;) {
75991d2a812SWaiman Long 		/*
76091d2a812SWaiman Long 		 * When a waiting writer set the handoff flag, it may spin
76191d2a812SWaiman Long 		 * on the owner as well. Once that writer acquires the lock,
76291d2a812SWaiman Long 		 * we can spin on it. So we don't need to quit even when the
76391d2a812SWaiman Long 		 * handoff bit is set.
76491d2a812SWaiman Long 		 */
76594a9717bSWaiman Long 		new = rwsem_owner_flags(sem, &new_flags);
76694a9717bSWaiman Long 		if ((new != owner) || (new_flags != flags)) {
767617f3ef9SWaiman Long 			state = rwsem_owner_state(new, new_flags);
7683f6d517aSWaiman Long 			break;
7693f6d517aSWaiman Long 		}
7703f6d517aSWaiman Long 
7715dec94d4SWaiman Long 		/*
7725dec94d4SWaiman Long 		 * Ensure we emit the owner->on_cpu, dereference _after_
7735dec94d4SWaiman Long 		 * checking sem->owner still matches owner, if that fails,
7745dec94d4SWaiman Long 		 * owner might point to free()d memory, if it still matches,
7756c2787f2SYanfei Xu 		 * our spinning context already disabled preemption which is
7766c2787f2SYanfei Xu 		 * equal to RCU read-side crital section ensures the memory
7776c2787f2SYanfei Xu 		 * stays valid.
7785dec94d4SWaiman Long 		 */
7795dec94d4SWaiman Long 		barrier();
7805dec94d4SWaiman Long 
7815dec94d4SWaiman Long 		if (need_resched() || !owner_on_cpu(owner)) {
7823f6d517aSWaiman Long 			state = OWNER_NONSPINNABLE;
7833f6d517aSWaiman Long 			break;
7845dec94d4SWaiman Long 		}
7855dec94d4SWaiman Long 
7865dec94d4SWaiman Long 		cpu_relax();
7875dec94d4SWaiman Long 	}
7885dec94d4SWaiman Long 
7893f6d517aSWaiman Long 	return state;
7905dec94d4SWaiman Long }
7915dec94d4SWaiman Long 
7927d43f1ceSWaiman Long /*
7937d43f1ceSWaiman Long  * Calculate reader-owned rwsem spinning threshold for writer
7947d43f1ceSWaiman Long  *
7957d43f1ceSWaiman Long  * The more readers own the rwsem, the longer it will take for them to
7967d43f1ceSWaiman Long  * wind down and free the rwsem. So the empirical formula used to
7977d43f1ceSWaiman Long  * determine the actual spinning time limit here is:
7987d43f1ceSWaiman Long  *
7997d43f1ceSWaiman Long  *   Spinning threshold = (10 + nr_readers/2)us
8007d43f1ceSWaiman Long  *
8017d43f1ceSWaiman Long  * The limit is capped to a maximum of 25us (30 readers). This is just
8027d43f1ceSWaiman Long  * a heuristic and is subjected to change in the future.
8037d43f1ceSWaiman Long  */
rwsem_rspin_threshold(struct rw_semaphore * sem)8047d43f1ceSWaiman Long static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
8057d43f1ceSWaiman Long {
8067d43f1ceSWaiman Long 	long count = atomic_long_read(&sem->count);
8077d43f1ceSWaiman Long 	int readers = count >> RWSEM_READER_SHIFT;
8087d43f1ceSWaiman Long 	u64 delta;
8097d43f1ceSWaiman Long 
8107d43f1ceSWaiman Long 	if (readers > 30)
8117d43f1ceSWaiman Long 		readers = 30;
8127d43f1ceSWaiman Long 	delta = (20 + readers) * NSEC_PER_USEC / 2;
8137d43f1ceSWaiman Long 
8147d43f1ceSWaiman Long 	return sched_clock() + delta;
8157d43f1ceSWaiman Long }
8167d43f1ceSWaiman Long 
rwsem_optimistic_spin(struct rw_semaphore * sem)817617f3ef9SWaiman Long static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
8185dec94d4SWaiman Long {
8195dec94d4SWaiman Long 	bool taken = false;
820990fa738SWaiman Long 	int prev_owner_state = OWNER_NULL;
8217d43f1ceSWaiman Long 	int loop = 0;
8227d43f1ceSWaiman Long 	u64 rspin_threshold = 0;
8235dec94d4SWaiman Long 
8245dec94d4SWaiman Long 	/* sem->wait_lock should not be held when doing optimistic spinning */
8255dec94d4SWaiman Long 	if (!osq_lock(&sem->osq))
8265dec94d4SWaiman Long 		goto done;
8275dec94d4SWaiman Long 
8285dec94d4SWaiman Long 	/*
8295dec94d4SWaiman Long 	 * Optimistically spin on the owner field and attempt to acquire the
8305dec94d4SWaiman Long 	 * lock whenever the owner changes. Spinning will be stopped when:
8315dec94d4SWaiman Long 	 *  1) the owning writer isn't running; or
8327d43f1ceSWaiman Long 	 *  2) readers own the lock and spinning time has exceeded limit.
8335dec94d4SWaiman Long 	 */
834990fa738SWaiman Long 	for (;;) {
8357d43f1ceSWaiman Long 		enum owner_state owner_state;
836990fa738SWaiman Long 
837617f3ef9SWaiman Long 		owner_state = rwsem_spin_on_owner(sem);
838990fa738SWaiman Long 		if (!(owner_state & OWNER_SPINNABLE))
839990fa738SWaiman Long 			break;
840990fa738SWaiman Long 
8415dec94d4SWaiman Long 		/*
8425dec94d4SWaiman Long 		 * Try to acquire the lock
8435dec94d4SWaiman Long 		 */
844617f3ef9SWaiman Long 		taken = rwsem_try_write_lock_unqueued(sem);
845cf69482dSWaiman Long 
846cf69482dSWaiman Long 		if (taken)
8475dec94d4SWaiman Long 			break;
8485dec94d4SWaiman Long 
8495dec94d4SWaiman Long 		/*
8507d43f1ceSWaiman Long 		 * Time-based reader-owned rwsem optimistic spinning
8517d43f1ceSWaiman Long 		 */
852617f3ef9SWaiman Long 		if (owner_state == OWNER_READER) {
8537d43f1ceSWaiman Long 			/*
8547d43f1ceSWaiman Long 			 * Re-initialize rspin_threshold every time when
8557d43f1ceSWaiman Long 			 * the owner state changes from non-reader to reader.
8567d43f1ceSWaiman Long 			 * This allows a writer to steal the lock in between
8577d43f1ceSWaiman Long 			 * 2 reader phases and have the threshold reset at
8587d43f1ceSWaiman Long 			 * the beginning of the 2nd reader phase.
8597d43f1ceSWaiman Long 			 */
8607d43f1ceSWaiman Long 			if (prev_owner_state != OWNER_READER) {
861617f3ef9SWaiman Long 				if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
8627d43f1ceSWaiman Long 					break;
8637d43f1ceSWaiman Long 				rspin_threshold = rwsem_rspin_threshold(sem);
8647d43f1ceSWaiman Long 				loop = 0;
8657d43f1ceSWaiman Long 			}
8667d43f1ceSWaiman Long 
8677d43f1ceSWaiman Long 			/*
8687d43f1ceSWaiman Long 			 * Check time threshold once every 16 iterations to
8697d43f1ceSWaiman Long 			 * avoid calling sched_clock() too frequently so
8707d43f1ceSWaiman Long 			 * as to reduce the average latency between the times
8717d43f1ceSWaiman Long 			 * when the lock becomes free and when the spinner
8727d43f1ceSWaiman Long 			 * is ready to do a trylock.
8737d43f1ceSWaiman Long 			 */
8747d43f1ceSWaiman Long 			else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
8757d43f1ceSWaiman Long 				rwsem_set_nonspinnable(sem);
8767d43f1ceSWaiman Long 				lockevent_inc(rwsem_opt_nospin);
8777d43f1ceSWaiman Long 				break;
8787d43f1ceSWaiman Long 			}
8797d43f1ceSWaiman Long 		}
8807d43f1ceSWaiman Long 
8817d43f1ceSWaiman Long 		/*
882990fa738SWaiman Long 		 * An RT task cannot do optimistic spinning if it cannot
883990fa738SWaiman Long 		 * be sure the lock holder is running or live-lock may
884990fa738SWaiman Long 		 * happen if the current task and the lock holder happen
885990fa738SWaiman Long 		 * to run in the same CPU. However, aborting optimistic
886990fa738SWaiman Long 		 * spinning while a NULL owner is detected may miss some
887990fa738SWaiman Long 		 * opportunity where spinning can continue without causing
888990fa738SWaiman Long 		 * problem.
889990fa738SWaiman Long 		 *
890990fa738SWaiman Long 		 * There are 2 possible cases where an RT task may be able
891990fa738SWaiman Long 		 * to continue spinning.
892990fa738SWaiman Long 		 *
893990fa738SWaiman Long 		 * 1) The lock owner is in the process of releasing the
894990fa738SWaiman Long 		 *    lock, sem->owner is cleared but the lock has not
895990fa738SWaiman Long 		 *    been released yet.
896990fa738SWaiman Long 		 * 2) The lock was free and owner cleared, but another
897990fa738SWaiman Long 		 *    task just comes in and acquire the lock before
898990fa738SWaiman Long 		 *    we try to get it. The new owner may be a spinnable
899990fa738SWaiman Long 		 *    writer.
900990fa738SWaiman Long 		 *
901e2db7592SIngo Molnar 		 * To take advantage of two scenarios listed above, the RT
902990fa738SWaiman Long 		 * task is made to retry one more time to see if it can
903990fa738SWaiman Long 		 * acquire the lock or continue spinning on the new owning
904990fa738SWaiman Long 		 * writer. Of course, if the time lag is long enough or the
905990fa738SWaiman Long 		 * new owner is not a writer or spinnable, the RT task will
906990fa738SWaiman Long 		 * quit spinning.
907990fa738SWaiman Long 		 *
908990fa738SWaiman Long 		 * If the owner is a writer, the need_resched() check is
909990fa738SWaiman Long 		 * done inside rwsem_spin_on_owner(). If the owner is not
910990fa738SWaiman Long 		 * a writer, need_resched() check needs to be done here.
9115dec94d4SWaiman Long 		 */
912990fa738SWaiman Long 		if (owner_state != OWNER_WRITER) {
913990fa738SWaiman Long 			if (need_resched())
9145dec94d4SWaiman Long 				break;
915ae04f69dSQais Yousef 			if (rt_or_dl_task(current) &&
916990fa738SWaiman Long 			   (prev_owner_state != OWNER_WRITER))
917990fa738SWaiman Long 				break;
918990fa738SWaiman Long 		}
919990fa738SWaiman Long 		prev_owner_state = owner_state;
9205dec94d4SWaiman Long 
9215dec94d4SWaiman Long 		/*
9225dec94d4SWaiman Long 		 * The cpu_relax() call is a compiler barrier which forces
9235dec94d4SWaiman Long 		 * everything in this loop to be re-loaded. We don't need
9245dec94d4SWaiman Long 		 * memory barriers as we'll eventually observe the right
9255dec94d4SWaiman Long 		 * values at the cost of a few extra spins.
9265dec94d4SWaiman Long 		 */
9275dec94d4SWaiman Long 		cpu_relax();
9285dec94d4SWaiman Long 	}
9295dec94d4SWaiman Long 	osq_unlock(&sem->osq);
9305dec94d4SWaiman Long done:
9315dec94d4SWaiman Long 	lockevent_cond_inc(rwsem_opt_fail, !taken);
9325dec94d4SWaiman Long 	return taken;
9335dec94d4SWaiman Long }
9347d43f1ceSWaiman Long 
9357d43f1ceSWaiman Long /*
936617f3ef9SWaiman Long  * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should
9377d43f1ceSWaiman Long  * only be called when the reader count reaches 0.
9387d43f1ceSWaiman Long  */
clear_nonspinnable(struct rw_semaphore * sem)939617f3ef9SWaiman Long static inline void clear_nonspinnable(struct rw_semaphore *sem)
9407d43f1ceSWaiman Long {
94154c1ee4dSWaiman Long 	if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
942617f3ef9SWaiman Long 		atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
9431a728dffSWaiman Long }
9441a728dffSWaiman Long 
9455dec94d4SWaiman Long #else
rwsem_can_spin_on_owner(struct rw_semaphore * sem)946617f3ef9SWaiman Long static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
947cf69482dSWaiman Long {
948cf69482dSWaiman Long 	return false;
949cf69482dSWaiman Long }
950cf69482dSWaiman Long 
rwsem_optimistic_spin(struct rw_semaphore * sem)951617f3ef9SWaiman Long static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
9525dec94d4SWaiman Long {
9535dec94d4SWaiman Long 	return false;
9545dec94d4SWaiman Long }
9557d43f1ceSWaiman Long 
clear_nonspinnable(struct rw_semaphore * sem)956617f3ef9SWaiman Long static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
9571a728dffSWaiman Long 
9587cdacc5fSYanfei Xu static inline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore * sem)959617f3ef9SWaiman Long rwsem_spin_on_owner(struct rw_semaphore *sem)
96091d2a812SWaiman Long {
9617cdacc5fSYanfei Xu 	return OWNER_NONSPINNABLE;
96291d2a812SWaiman Long }
9635dec94d4SWaiman Long #endif
9645dec94d4SWaiman Long 
9655dec94d4SWaiman Long /*
96654c1ee4dSWaiman Long  * Prepare to wake up waiter(s) in the wait queue by putting them into the
96754c1ee4dSWaiman Long  * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
96854c1ee4dSWaiman Long  * reader-owned, wake up read lock waiters in queue front or wake up any
96954c1ee4dSWaiman Long  * front waiter otherwise.
97054c1ee4dSWaiman Long 
97154c1ee4dSWaiman Long  * This is being called from both reader and writer slow paths.
97254c1ee4dSWaiman Long  */
rwsem_cond_wake_waiter(struct rw_semaphore * sem,long count,struct wake_q_head * wake_q)97354c1ee4dSWaiman Long static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
97454c1ee4dSWaiman Long 					  struct wake_q_head *wake_q)
97554c1ee4dSWaiman Long {
97654c1ee4dSWaiman Long 	enum rwsem_wake_type wake_type;
97754c1ee4dSWaiman Long 
97854c1ee4dSWaiman Long 	if (count & RWSEM_WRITER_MASK)
97954c1ee4dSWaiman Long 		return;
98054c1ee4dSWaiman Long 
98154c1ee4dSWaiman Long 	if (count & RWSEM_READER_MASK) {
98254c1ee4dSWaiman Long 		wake_type = RWSEM_WAKE_READERS;
98354c1ee4dSWaiman Long 	} else {
98454c1ee4dSWaiman Long 		wake_type = RWSEM_WAKE_ANY;
98554c1ee4dSWaiman Long 		clear_nonspinnable(sem);
98654c1ee4dSWaiman Long 	}
98754c1ee4dSWaiman Long 	rwsem_mark_wake(sem, wake_type, wake_q);
98854c1ee4dSWaiman Long }
98954c1ee4dSWaiman Long 
99054c1ee4dSWaiman Long /*
9915dec94d4SWaiman Long  * Wait for the read lock to be granted
9925dec94d4SWaiman Long  */
9936cef7ff6SWaiman Long static struct rw_semaphore __sched *
rwsem_down_read_slowpath(struct rw_semaphore * sem,long count,unsigned int state)9942f064a59SPeter Zijlstra rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
9955dec94d4SWaiman Long {
996617f3ef9SWaiman Long 	long adjustment = -RWSEM_READER_BIAS;
9972f06f702SWaiman Long 	long rcnt = (count >> RWSEM_READER_SHIFT);
9985dec94d4SWaiman Long 	struct rwsem_waiter waiter;
9995dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
10005dec94d4SWaiman Long 
10015cfd92e1SWaiman Long 	/*
10022f06f702SWaiman Long 	 * To prevent a constant stream of readers from starving a sleeping
1003d566c786SWaiman Long 	 * writer, don't attempt optimistic lock stealing if the lock is
1004d566c786SWaiman Long 	 * very likely owned by readers.
10052f06f702SWaiman Long 	 */
1006617f3ef9SWaiman Long 	if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
1007617f3ef9SWaiman Long 	    (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
10082f06f702SWaiman Long 		goto queue;
10092f06f702SWaiman Long 
10102f06f702SWaiman Long 	/*
1011617f3ef9SWaiman Long 	 * Reader optimistic lock stealing.
10121a728dffSWaiman Long 	 */
1013617f3ef9SWaiman Long 	if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) {
10141a728dffSWaiman Long 		rwsem_set_reader_owned(sem);
10151a728dffSWaiman Long 		lockevent_inc(rwsem_rlock_steal);
10161a728dffSWaiman Long 
10171a728dffSWaiman Long 		/*
1018617f3ef9SWaiman Long 		 * Wake up other readers in the wait queue if it is
1019617f3ef9SWaiman Long 		 * the first reader.
10205cfd92e1SWaiman Long 		 */
1021617f3ef9SWaiman Long 		if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
1022cf69482dSWaiman Long 			raw_spin_lock_irq(&sem->wait_lock);
1023cf69482dSWaiman Long 			if (!list_empty(&sem->wait_list))
1024cf69482dSWaiman Long 				rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
1025cf69482dSWaiman Long 						&wake_q);
1026cf69482dSWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
1027cf69482dSWaiman Long 			wake_up_q(&wake_q);
1028cf69482dSWaiman Long 		}
1029cf69482dSWaiman Long 		return sem;
1030cf69482dSWaiman Long 	}
1031cf69482dSWaiman Long 
1032cf69482dSWaiman Long queue:
10335dec94d4SWaiman Long 	waiter.task = current;
10345dec94d4SWaiman Long 	waiter.type = RWSEM_WAITING_FOR_READ;
10354f23dbc1SWaiman Long 	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
10366eebd5fbSWaiman Long 	waiter.handoff_set = false;
10375dec94d4SWaiman Long 
10385dec94d4SWaiman Long 	raw_spin_lock_irq(&sem->wait_lock);
10395dec94d4SWaiman Long 	if (list_empty(&sem->wait_list)) {
10405dec94d4SWaiman Long 		/*
10415dec94d4SWaiman Long 		 * In case the wait queue is empty and the lock isn't owned
1042f9e21aa9SWaiman Long 		 * by a writer, this reader can exit the slowpath and return
1043f9e21aa9SWaiman Long 		 * immediately as its RWSEM_READER_BIAS has already been set
1044f9e21aa9SWaiman Long 		 * in the count.
10455dec94d4SWaiman Long 		 */
1046f9e21aa9SWaiman Long 		if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
1047e1b98fa3SJan Stancek 			/* Provide lock ACQUIRE */
1048e1b98fa3SJan Stancek 			smp_acquire__after_ctrl_dep();
10495dec94d4SWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
10505dec94d4SWaiman Long 			rwsem_set_reader_owned(sem);
10515dec94d4SWaiman Long 			lockevent_inc(rwsem_rlock_fast);
10525dec94d4SWaiman Long 			return sem;
10535dec94d4SWaiman Long 		}
10545dec94d4SWaiman Long 		adjustment += RWSEM_FLAG_WAITERS;
10555dec94d4SWaiman Long 	}
1056d257cc8cSWaiman Long 	rwsem_add_waiter(sem, &waiter);
10575dec94d4SWaiman Long 
10585dec94d4SWaiman Long 	/* we're now waiting on the lock, but no longer actively locking */
10595dec94d4SWaiman Long 	count = atomic_long_add_return(adjustment, &sem->count);
10605dec94d4SWaiman Long 
106154c1ee4dSWaiman Long 	rwsem_cond_wake_waiter(sem, count, &wake_q);
10625dec94d4SWaiman Long 	raw_spin_unlock_irq(&sem->wait_lock);
106354c1ee4dSWaiman Long 
106454c1ee4dSWaiman Long 	if (!wake_q_empty(&wake_q))
10655dec94d4SWaiman Long 		wake_up_q(&wake_q);
10665dec94d4SWaiman Long 
1067ee042be1SNamhyung Kim 	trace_contention_begin(sem, LCB_F_READ);
1068ee042be1SNamhyung Kim 
10695dec94d4SWaiman Long 	/* wait to be given the lock */
10706ffddfb9SPeter Zijlstra 	for (;;) {
10715dec94d4SWaiman Long 		set_current_state(state);
107299143f82SPeter Zijlstra 		if (!smp_load_acquire(&waiter.task)) {
10736ffddfb9SPeter Zijlstra 			/* Matches rwsem_mark_wake()'s smp_store_release(). */
10745dec94d4SWaiman Long 			break;
107599143f82SPeter Zijlstra 		}
10765dec94d4SWaiman Long 		if (signal_pending_state(state, current)) {
10775dec94d4SWaiman Long 			raw_spin_lock_irq(&sem->wait_lock);
10785dec94d4SWaiman Long 			if (waiter.task)
10795dec94d4SWaiman Long 				goto out_nolock;
10805dec94d4SWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
10816ffddfb9SPeter Zijlstra 			/* Ordered by sem->wait_lock against rwsem_mark_wake(). */
10825dec94d4SWaiman Long 			break;
10835dec94d4SWaiman Long 		}
10843f524553SWaiman Long 		schedule_preempt_disabled();
10855dec94d4SWaiman Long 		lockevent_inc(rwsem_sleep_reader);
10865dec94d4SWaiman Long 	}
10875dec94d4SWaiman Long 
10885dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
10895dec94d4SWaiman Long 	lockevent_inc(rwsem_rlock);
1090ee042be1SNamhyung Kim 	trace_contention_end(sem, 0);
10915dec94d4SWaiman Long 	return sem;
10926ffddfb9SPeter Zijlstra 
10935dec94d4SWaiman Long out_nolock:
10941ee32619SWaiman Long 	rwsem_del_wake_waiter(sem, &waiter, &wake_q);
10955dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
10965dec94d4SWaiman Long 	lockevent_inc(rwsem_rlock_fail);
1097ee042be1SNamhyung Kim 	trace_contention_end(sem, -EINTR);
10985dec94d4SWaiman Long 	return ERR_PTR(-EINTR);
10995dec94d4SWaiman Long }
11005dec94d4SWaiman Long 
11015dec94d4SWaiman Long /*
11025dec94d4SWaiman Long  * Wait until we successfully acquire the write lock
11035dec94d4SWaiman Long  */
1104c441e934SMinchan Kim static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore * sem,int state)11056cef7ff6SWaiman Long rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
11065dec94d4SWaiman Long {
11075dec94d4SWaiman Long 	struct rwsem_waiter waiter;
11085dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
11095dec94d4SWaiman Long 
11105dec94d4SWaiman Long 	/* do optimistic spinning and steal lock if possible */
1111617f3ef9SWaiman Long 	if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
11126ffddfb9SPeter Zijlstra 		/* rwsem_optimistic_spin() implies ACQUIRE on success */
11135dec94d4SWaiman Long 		return sem;
11146ffddfb9SPeter Zijlstra 	}
11155dec94d4SWaiman Long 
11165dec94d4SWaiman Long 	/*
11175dec94d4SWaiman Long 	 * Optimistic spinning failed, proceed to the slowpath
11185dec94d4SWaiman Long 	 * and block until we can acquire the sem.
11195dec94d4SWaiman Long 	 */
11205dec94d4SWaiman Long 	waiter.task = current;
11215dec94d4SWaiman Long 	waiter.type = RWSEM_WAITING_FOR_WRITE;
11224f23dbc1SWaiman Long 	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1123d257cc8cSWaiman Long 	waiter.handoff_set = false;
11245dec94d4SWaiman Long 
11255dec94d4SWaiman Long 	raw_spin_lock_irq(&sem->wait_lock);
1126d257cc8cSWaiman Long 	rwsem_add_waiter(sem, &waiter);
11275dec94d4SWaiman Long 
11285dec94d4SWaiman Long 	/* we're now waiting on the lock */
1129d257cc8cSWaiman Long 	if (rwsem_first_waiter(sem) != &waiter) {
113054c1ee4dSWaiman Long 		rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
113154c1ee4dSWaiman Long 				       &wake_q);
113200f3c5a3SWaiman Long 		if (!wake_q_empty(&wake_q)) {
11335dec94d4SWaiman Long 			/*
113400f3c5a3SWaiman Long 			 * We want to minimize wait_lock hold time especially
113500f3c5a3SWaiman Long 			 * when a large number of readers are to be woken up.
11365dec94d4SWaiman Long 			 */
113700f3c5a3SWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
11385dec94d4SWaiman Long 			wake_up_q(&wake_q);
113900f3c5a3SWaiman Long 			raw_spin_lock_irq(&sem->wait_lock);
114000f3c5a3SWaiman Long 		}
11415dec94d4SWaiman Long 	} else {
114200f3c5a3SWaiman Long 		atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
11435dec94d4SWaiman Long 	}
11445dec94d4SWaiman Long 
11455dec94d4SWaiman Long 	/* wait until we successfully acquire the lock */
11465dec94d4SWaiman Long 	set_current_state(state);
1147ee042be1SNamhyung Kim 	trace_contention_begin(sem, LCB_F_WRITE);
1148ee042be1SNamhyung Kim 
11496ffddfb9SPeter Zijlstra 	for (;;) {
1150d257cc8cSWaiman Long 		if (rwsem_try_write_lock(sem, &waiter)) {
11516ffddfb9SPeter Zijlstra 			/* rwsem_try_write_lock() implies ACQUIRE on success */
11525dec94d4SWaiman Long 			break;
11536ffddfb9SPeter Zijlstra 		}
11544f23dbc1SWaiman Long 
11555dec94d4SWaiman Long 		raw_spin_unlock_irq(&sem->wait_lock);
11565dec94d4SWaiman Long 
1157d257cc8cSWaiman Long 		if (signal_pending_state(state, current))
1158d257cc8cSWaiman Long 			goto out_nolock;
1159d257cc8cSWaiman Long 
116091d2a812SWaiman Long 		/*
116191d2a812SWaiman Long 		 * After setting the handoff bit and failing to acquire
116291d2a812SWaiman Long 		 * the lock, attempt to spin on owner to accelerate lock
116391d2a812SWaiman Long 		 * transfer. If the previous owner is a on-cpu writer and it
116491d2a812SWaiman Long 		 * has just released the lock, OWNER_NULL will be returned.
116591d2a812SWaiman Long 		 * In this case, we attempt to acquire the lock again
116691d2a812SWaiman Long 		 * without sleeping.
116791d2a812SWaiman Long 		 */
1168d257cc8cSWaiman Long 		if (waiter.handoff_set) {
11697cdacc5fSYanfei Xu 			enum owner_state owner_state;
11707cdacc5fSYanfei Xu 
11717cdacc5fSYanfei Xu 			owner_state = rwsem_spin_on_owner(sem);
11727cdacc5fSYanfei Xu 			if (owner_state == OWNER_NULL)
117391d2a812SWaiman Long 				goto trylock_again;
11747cdacc5fSYanfei Xu 		}
117591d2a812SWaiman Long 
11761d61659cSWaiman Long 		schedule_preempt_disabled();
11775dec94d4SWaiman Long 		lockevent_inc(rwsem_sleep_writer);
11785dec94d4SWaiman Long 		set_current_state(state);
117991d2a812SWaiman Long trylock_again:
11805dec94d4SWaiman Long 		raw_spin_lock_irq(&sem->wait_lock);
11815dec94d4SWaiman Long 	}
11825dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
11835dec94d4SWaiman Long 	raw_spin_unlock_irq(&sem->wait_lock);
11845dec94d4SWaiman Long 	lockevent_inc(rwsem_wlock);
1185ee042be1SNamhyung Kim 	trace_contention_end(sem, 0);
1186d257cc8cSWaiman Long 	return sem;
11875dec94d4SWaiman Long 
11885dec94d4SWaiman Long out_nolock:
11895dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
11905dec94d4SWaiman Long 	raw_spin_lock_irq(&sem->wait_lock);
11911ee32619SWaiman Long 	rwsem_del_wake_waiter(sem, &waiter, &wake_q);
11925dec94d4SWaiman Long 	lockevent_inc(rwsem_wlock_fail);
1193ee042be1SNamhyung Kim 	trace_contention_end(sem, -EINTR);
11945dec94d4SWaiman Long 	return ERR_PTR(-EINTR);
11955dec94d4SWaiman Long }
11965dec94d4SWaiman Long 
11975dec94d4SWaiman Long /*
11985dec94d4SWaiman Long  * handle waking up a waiter on the semaphore
11995dec94d4SWaiman Long  * - up_read/up_write has decremented the active part of count if we come here
12005dec94d4SWaiman Long  */
rwsem_wake(struct rw_semaphore * sem)1201d4e5076cSxuyehan static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
12025dec94d4SWaiman Long {
12035dec94d4SWaiman Long 	unsigned long flags;
12045dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
12055dec94d4SWaiman Long 
12065dec94d4SWaiman Long 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
12075dec94d4SWaiman Long 
12085dec94d4SWaiman Long 	if (!list_empty(&sem->wait_list))
12096cef7ff6SWaiman Long 		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
12105dec94d4SWaiman Long 
12115dec94d4SWaiman Long 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
12125dec94d4SWaiman Long 	wake_up_q(&wake_q);
12135dec94d4SWaiman Long 
12145dec94d4SWaiman Long 	return sem;
12155dec94d4SWaiman Long }
12165dec94d4SWaiman Long 
12175dec94d4SWaiman Long /*
12185dec94d4SWaiman Long  * downgrade a write lock into a read lock
12195dec94d4SWaiman Long  * - caller incremented waiting part of count and discovered it still negative
12205dec94d4SWaiman Long  * - just wake up any readers at the front of the queue
12215dec94d4SWaiman Long  */
rwsem_downgrade_wake(struct rw_semaphore * sem)12226cef7ff6SWaiman Long static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
12235dec94d4SWaiman Long {
12245dec94d4SWaiman Long 	unsigned long flags;
12255dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
12265dec94d4SWaiman Long 
12275dec94d4SWaiman Long 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
12285dec94d4SWaiman Long 
12295dec94d4SWaiman Long 	if (!list_empty(&sem->wait_list))
12306cef7ff6SWaiman Long 		rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
12315dec94d4SWaiman Long 
12325dec94d4SWaiman Long 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
12335dec94d4SWaiman Long 	wake_up_q(&wake_q);
12345dec94d4SWaiman Long 
12355dec94d4SWaiman Long 	return sem;
12365dec94d4SWaiman Long }
12375dec94d4SWaiman Long 
12385dec94d4SWaiman Long /*
12395dec94d4SWaiman Long  * lock for reading
12405dec94d4SWaiman Long  */
__down_read_common(struct rw_semaphore * sem,int state)124192cc5d00SJohn Stultz static __always_inline int __down_read_common(struct rw_semaphore *sem, int state)
12425dec94d4SWaiman Long {
12433f524553SWaiman Long 	int ret = 0;
1244c8fe8b05SWaiman Long 	long count;
1245c8fe8b05SWaiman Long 
12463f524553SWaiman Long 	preempt_disable();
1247c8fe8b05SWaiman Long 	if (!rwsem_read_trylock(sem, &count)) {
12483f524553SWaiman Long 		if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) {
12493f524553SWaiman Long 			ret = -EINTR;
12503f524553SWaiman Long 			goto out;
12513f524553SWaiman Long 		}
125294a9717bSWaiman Long 		DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
12535dec94d4SWaiman Long 	}
12543f524553SWaiman Long out:
12553f524553SWaiman Long 	preempt_enable();
12563f524553SWaiman Long 	return ret;
1257c995e638SPeter Zijlstra }
1258c995e638SPeter Zijlstra 
__down_read(struct rw_semaphore * sem)125992cc5d00SJohn Stultz static __always_inline void __down_read(struct rw_semaphore *sem)
1260c995e638SPeter Zijlstra {
1261c995e638SPeter Zijlstra 	__down_read_common(sem, TASK_UNINTERRUPTIBLE);
12625dec94d4SWaiman Long }
12635dec94d4SWaiman Long 
__down_read_interruptible(struct rw_semaphore * sem)126492cc5d00SJohn Stultz static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
126531784cffSEric W. Biederman {
1266c995e638SPeter Zijlstra 	return __down_read_common(sem, TASK_INTERRUPTIBLE);
126731784cffSEric W. Biederman }
126831784cffSEric W. Biederman 
__down_read_killable(struct rw_semaphore * sem)126992cc5d00SJohn Stultz static __always_inline int __down_read_killable(struct rw_semaphore *sem)
12705dec94d4SWaiman Long {
1271c995e638SPeter Zijlstra 	return __down_read_common(sem, TASK_KILLABLE);
12725dec94d4SWaiman Long }
12735dec94d4SWaiman Long 
__down_read_trylock(struct rw_semaphore * sem)12745dec94d4SWaiman Long static inline int __down_read_trylock(struct rw_semaphore *sem)
12755dec94d4SWaiman Long {
12763f524553SWaiman Long 	int ret = 0;
1277fce45cd4SDavidlohr Bueso 	long tmp;
1278fce45cd4SDavidlohr Bueso 
1279fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1280fce45cd4SDavidlohr Bueso 
12813f524553SWaiman Long 	preempt_disable();
128214c24048SMuchun Song 	tmp = atomic_long_read(&sem->count);
128314c24048SMuchun Song 	while (!(tmp & RWSEM_READ_FAILED_MASK)) {
12845dec94d4SWaiman Long 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
12855dec94d4SWaiman Long 						    tmp + RWSEM_READER_BIAS)) {
12865dec94d4SWaiman Long 			rwsem_set_reader_owned(sem);
12873f524553SWaiman Long 			ret = 1;
12883f524553SWaiman Long 			break;
12895dec94d4SWaiman Long 		}
129014c24048SMuchun Song 	}
12913f524553SWaiman Long 	preempt_enable();
12923f524553SWaiman Long 	return ret;
12935dec94d4SWaiman Long }
12945dec94d4SWaiman Long 
12955dec94d4SWaiman Long /*
12965dec94d4SWaiman Long  * lock for writing
12975dec94d4SWaiman Long  */
__down_write_common(struct rw_semaphore * sem,int state)1298e81859feSJohn Stultz static __always_inline int __down_write_common(struct rw_semaphore *sem, int state)
12995dec94d4SWaiman Long {
13001d61659cSWaiman Long 	int ret = 0;
13011d61659cSWaiman Long 
13021d61659cSWaiman Long 	preempt_disable();
1303285c61aeSPeter Zijlstra 	if (unlikely(!rwsem_write_trylock(sem))) {
1304c995e638SPeter Zijlstra 		if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
13051d61659cSWaiman Long 			ret = -EINTR;
13065cfd92e1SWaiman Long 	}
13071d61659cSWaiman Long 	preempt_enable();
13081d61659cSWaiman Long 	return ret;
13095dec94d4SWaiman Long }
13105dec94d4SWaiman Long 
__down_write(struct rw_semaphore * sem)1311e81859feSJohn Stultz static __always_inline void __down_write(struct rw_semaphore *sem)
1312c995e638SPeter Zijlstra {
1313c995e638SPeter Zijlstra 	__down_write_common(sem, TASK_UNINTERRUPTIBLE);
1314c995e638SPeter Zijlstra }
1315c995e638SPeter Zijlstra 
__down_write_killable(struct rw_semaphore * sem)1316e81859feSJohn Stultz static __always_inline int __down_write_killable(struct rw_semaphore *sem)
1317c995e638SPeter Zijlstra {
1318c995e638SPeter Zijlstra 	return __down_write_common(sem, TASK_KILLABLE);
1319c995e638SPeter Zijlstra }
1320c995e638SPeter Zijlstra 
__down_write_trylock(struct rw_semaphore * sem)13215dec94d4SWaiman Long static inline int __down_write_trylock(struct rw_semaphore *sem)
13225dec94d4SWaiman Long {
13231d61659cSWaiman Long 	int ret;
13241d61659cSWaiman Long 
13251d61659cSWaiman Long 	preempt_disable();
1326fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
13271d61659cSWaiman Long 	ret = rwsem_write_trylock(sem);
13281d61659cSWaiman Long 	preempt_enable();
13291d61659cSWaiman Long 
13301d61659cSWaiman Long 	return ret;
13315dec94d4SWaiman Long }
13325dec94d4SWaiman Long 
13335dec94d4SWaiman Long /*
13345dec94d4SWaiman Long  * unlock after reading
13355dec94d4SWaiman Long  */
__up_read(struct rw_semaphore * sem)13367f26482aSPeter Zijlstra static inline void __up_read(struct rw_semaphore *sem)
13375dec94d4SWaiman Long {
13385dec94d4SWaiman Long 	long tmp;
13395dec94d4SWaiman Long 
1340fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
134194a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1342fce45cd4SDavidlohr Bueso 
13433f524553SWaiman Long 	preempt_disable();
13445dec94d4SWaiman Long 	rwsem_clear_reader_owned(sem);
13455dec94d4SWaiman Long 	tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1346a15ea1a3SWaiman Long 	DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
13476cef7ff6SWaiman Long 	if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
13487d43f1ceSWaiman Long 		      RWSEM_FLAG_WAITERS)) {
1349617f3ef9SWaiman Long 		clear_nonspinnable(sem);
1350d4e5076cSxuyehan 		rwsem_wake(sem);
13515dec94d4SWaiman Long 	}
13523f524553SWaiman Long 	preempt_enable();
13537d43f1ceSWaiman Long }
13545dec94d4SWaiman Long 
13555dec94d4SWaiman Long /*
13565dec94d4SWaiman Long  * unlock after writing
13575dec94d4SWaiman Long  */
__up_write(struct rw_semaphore * sem)13587f26482aSPeter Zijlstra static inline void __up_write(struct rw_semaphore *sem)
13595dec94d4SWaiman Long {
13606cef7ff6SWaiman Long 	long tmp;
13616cef7ff6SWaiman Long 
1362fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
136302f1082bSWaiman Long 	/*
136402f1082bSWaiman Long 	 * sem->owner may differ from current if the ownership is transferred
136502f1082bSWaiman Long 	 * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
136602f1082bSWaiman Long 	 */
136794a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
136894a9717bSWaiman Long 			    !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1369fce45cd4SDavidlohr Bueso 
137048dfb5d2SGokul krishna Krishnakumar 	preempt_disable();
13715dec94d4SWaiman Long 	rwsem_clear_owner(sem);
13726cef7ff6SWaiman Long 	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
13736cef7ff6SWaiman Long 	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1374d4e5076cSxuyehan 		rwsem_wake(sem);
13751d61659cSWaiman Long 	preempt_enable();
13765dec94d4SWaiman Long }
13775dec94d4SWaiman Long 
13785dec94d4SWaiman Long /*
13795dec94d4SWaiman Long  * downgrade write lock to read lock
13805dec94d4SWaiman Long  */
__downgrade_write(struct rw_semaphore * sem)13815dec94d4SWaiman Long static inline void __downgrade_write(struct rw_semaphore *sem)
13825dec94d4SWaiman Long {
13835dec94d4SWaiman Long 	long tmp;
13845dec94d4SWaiman Long 
13855dec94d4SWaiman Long 	/*
13865dec94d4SWaiman Long 	 * When downgrading from exclusive to shared ownership,
13875dec94d4SWaiman Long 	 * anything inside the write-locked region cannot leak
13885dec94d4SWaiman Long 	 * into the read side. In contrast, anything in the
13895dec94d4SWaiman Long 	 * read-locked region is ok to be re-ordered into the
13905dec94d4SWaiman Long 	 * write side. As such, rely on RELEASE semantics.
13915dec94d4SWaiman Long 	 */
139294a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
13931d61659cSWaiman Long 	preempt_disable();
13945dec94d4SWaiman Long 	tmp = atomic_long_fetch_add_release(
13955dec94d4SWaiman Long 		-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
13965dec94d4SWaiman Long 	rwsem_set_reader_owned(sem);
13975dec94d4SWaiman Long 	if (tmp & RWSEM_FLAG_WAITERS)
13985dec94d4SWaiman Long 		rwsem_downgrade_wake(sem);
13991d61659cSWaiman Long 	preempt_enable();
14005dec94d4SWaiman Long }
14014fc828e2SDavidlohr Bueso 
140242254105SThomas Gleixner #else /* !CONFIG_PREEMPT_RT */
140342254105SThomas Gleixner 
1404e17ba59bSThomas Gleixner #define RT_MUTEX_BUILD_MUTEX
140542254105SThomas Gleixner #include "rtmutex.c"
140642254105SThomas Gleixner 
140742254105SThomas Gleixner #define rwbase_set_and_save_current_state(state)	\
140842254105SThomas Gleixner 	set_current_state(state)
140942254105SThomas Gleixner 
141042254105SThomas Gleixner #define rwbase_restore_current_state()			\
141142254105SThomas Gleixner 	__set_current_state(TASK_RUNNING)
141242254105SThomas Gleixner 
141342254105SThomas Gleixner #define rwbase_rtmutex_lock_state(rtm, state)		\
141442254105SThomas Gleixner 	__rt_mutex_lock(rtm, state)
141542254105SThomas Gleixner 
1416*894d1b3dSPeter Zijlstra #define rwbase_rtmutex_slowlock_locked(rtm, state, wq)	\
1417*894d1b3dSPeter Zijlstra 	__rt_mutex_slowlock_locked(rtm, NULL, state, wq)
141842254105SThomas Gleixner 
141942254105SThomas Gleixner #define rwbase_rtmutex_unlock(rtm)			\
142042254105SThomas Gleixner 	__rt_mutex_unlock(rtm)
142142254105SThomas Gleixner 
142242254105SThomas Gleixner #define rwbase_rtmutex_trylock(rtm)			\
142342254105SThomas Gleixner 	__rt_mutex_trylock(rtm)
142442254105SThomas Gleixner 
142542254105SThomas Gleixner #define rwbase_signal_pending_state(state, current)	\
142642254105SThomas Gleixner 	signal_pending_state(state, current)
142742254105SThomas Gleixner 
1428d14f9e93SSebastian Andrzej Siewior #define rwbase_pre_schedule()				\
1429d14f9e93SSebastian Andrzej Siewior 	rt_mutex_pre_schedule()
1430d14f9e93SSebastian Andrzej Siewior 
143142254105SThomas Gleixner #define rwbase_schedule()				\
1432d14f9e93SSebastian Andrzej Siewior 	rt_mutex_schedule()
1433d14f9e93SSebastian Andrzej Siewior 
1434d14f9e93SSebastian Andrzej Siewior #define rwbase_post_schedule()				\
1435d14f9e93SSebastian Andrzej Siewior 	rt_mutex_post_schedule()
143642254105SThomas Gleixner 
143742254105SThomas Gleixner #include "rwbase_rt.c"
143842254105SThomas Gleixner 
__init_rwsem(struct rw_semaphore * sem,const char * name,struct lock_class_key * key)143915eb7c88SMike Galbraith void __init_rwsem(struct rw_semaphore *sem, const char *name,
144042254105SThomas Gleixner 		  struct lock_class_key *key)
144142254105SThomas Gleixner {
144215eb7c88SMike Galbraith 	init_rwbase_rt(&(sem)->rwbase);
144315eb7c88SMike Galbraith 
144415eb7c88SMike Galbraith #ifdef CONFIG_DEBUG_LOCK_ALLOC
144542254105SThomas Gleixner 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
144642254105SThomas Gleixner 	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
144742254105SThomas Gleixner #endif
144815eb7c88SMike Galbraith }
144915eb7c88SMike Galbraith EXPORT_SYMBOL(__init_rwsem);
145042254105SThomas Gleixner 
__down_read(struct rw_semaphore * sem)145142254105SThomas Gleixner static inline void __down_read(struct rw_semaphore *sem)
145242254105SThomas Gleixner {
145342254105SThomas Gleixner 	rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
145442254105SThomas Gleixner }
145542254105SThomas Gleixner 
__down_read_interruptible(struct rw_semaphore * sem)145642254105SThomas Gleixner static inline int __down_read_interruptible(struct rw_semaphore *sem)
145742254105SThomas Gleixner {
145842254105SThomas Gleixner 	return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
145942254105SThomas Gleixner }
146042254105SThomas Gleixner 
__down_read_killable(struct rw_semaphore * sem)146142254105SThomas Gleixner static inline int __down_read_killable(struct rw_semaphore *sem)
146242254105SThomas Gleixner {
146342254105SThomas Gleixner 	return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
146442254105SThomas Gleixner }
146542254105SThomas Gleixner 
__down_read_trylock(struct rw_semaphore * sem)146642254105SThomas Gleixner static inline int __down_read_trylock(struct rw_semaphore *sem)
146742254105SThomas Gleixner {
146842254105SThomas Gleixner 	return rwbase_read_trylock(&sem->rwbase);
146942254105SThomas Gleixner }
147042254105SThomas Gleixner 
__up_read(struct rw_semaphore * sem)147142254105SThomas Gleixner static inline void __up_read(struct rw_semaphore *sem)
147242254105SThomas Gleixner {
147342254105SThomas Gleixner 	rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
147442254105SThomas Gleixner }
147542254105SThomas Gleixner 
__down_write(struct rw_semaphore * sem)147642254105SThomas Gleixner static inline void __sched __down_write(struct rw_semaphore *sem)
147742254105SThomas Gleixner {
147842254105SThomas Gleixner 	rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
147942254105SThomas Gleixner }
148042254105SThomas Gleixner 
__down_write_killable(struct rw_semaphore * sem)148142254105SThomas Gleixner static inline int __sched __down_write_killable(struct rw_semaphore *sem)
148242254105SThomas Gleixner {
148342254105SThomas Gleixner 	return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
148442254105SThomas Gleixner }
148542254105SThomas Gleixner 
__down_write_trylock(struct rw_semaphore * sem)148642254105SThomas Gleixner static inline int __down_write_trylock(struct rw_semaphore *sem)
148742254105SThomas Gleixner {
148842254105SThomas Gleixner 	return rwbase_write_trylock(&sem->rwbase);
148942254105SThomas Gleixner }
149042254105SThomas Gleixner 
__up_write(struct rw_semaphore * sem)149142254105SThomas Gleixner static inline void __up_write(struct rw_semaphore *sem)
149242254105SThomas Gleixner {
149342254105SThomas Gleixner 	rwbase_write_unlock(&sem->rwbase);
149442254105SThomas Gleixner }
149542254105SThomas Gleixner 
__downgrade_write(struct rw_semaphore * sem)149642254105SThomas Gleixner static inline void __downgrade_write(struct rw_semaphore *sem)
149742254105SThomas Gleixner {
149842254105SThomas Gleixner 	rwbase_write_downgrade(&sem->rwbase);
149942254105SThomas Gleixner }
150042254105SThomas Gleixner 
150142254105SThomas Gleixner /* Debug stubs for the common API */
150242254105SThomas Gleixner #define DEBUG_RWSEMS_WARN_ON(c, sem)
150342254105SThomas Gleixner 
__rwsem_set_reader_owned(struct rw_semaphore * sem,struct task_struct * owner)150442254105SThomas Gleixner static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
150542254105SThomas Gleixner 					    struct task_struct *owner)
150642254105SThomas Gleixner {
150742254105SThomas Gleixner }
150842254105SThomas Gleixner 
is_rwsem_reader_owned(struct rw_semaphore * sem)150942254105SThomas Gleixner static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
151042254105SThomas Gleixner {
151142254105SThomas Gleixner 	int count = atomic_read(&sem->rwbase.readers);
151242254105SThomas Gleixner 
151342254105SThomas Gleixner 	return count < 0 && count != READER_BIAS;
151442254105SThomas Gleixner }
151542254105SThomas Gleixner 
151642254105SThomas Gleixner #endif /* CONFIG_PREEMPT_RT */
151742254105SThomas Gleixner 
1518ed428bfcSPeter Zijlstra /*
1519ed428bfcSPeter Zijlstra  * lock for reading
1520ed428bfcSPeter Zijlstra  */
down_read(struct rw_semaphore * sem)1521ed428bfcSPeter Zijlstra void __sched down_read(struct rw_semaphore *sem)
1522ed428bfcSPeter Zijlstra {
1523ed428bfcSPeter Zijlstra 	might_sleep();
1524ed428bfcSPeter Zijlstra 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1525ed428bfcSPeter Zijlstra 
1526ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1527ed428bfcSPeter Zijlstra }
1528ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read);
1529ed428bfcSPeter Zijlstra 
down_read_interruptible(struct rw_semaphore * sem)153031784cffSEric W. Biederman int __sched down_read_interruptible(struct rw_semaphore *sem)
153131784cffSEric W. Biederman {
153231784cffSEric W. Biederman 	might_sleep();
153331784cffSEric W. Biederman 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
153431784cffSEric W. Biederman 
153531784cffSEric W. Biederman 	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
153631784cffSEric W. Biederman 		rwsem_release(&sem->dep_map, _RET_IP_);
153731784cffSEric W. Biederman 		return -EINTR;
153831784cffSEric W. Biederman 	}
153931784cffSEric W. Biederman 
154031784cffSEric W. Biederman 	return 0;
154131784cffSEric W. Biederman }
154231784cffSEric W. Biederman EXPORT_SYMBOL(down_read_interruptible);
154331784cffSEric W. Biederman 
down_read_killable(struct rw_semaphore * sem)154476f8507fSKirill Tkhai int __sched down_read_killable(struct rw_semaphore *sem)
154576f8507fSKirill Tkhai {
154676f8507fSKirill Tkhai 	might_sleep();
154776f8507fSKirill Tkhai 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
154876f8507fSKirill Tkhai 
154976f8507fSKirill Tkhai 	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
15505facae4fSQian Cai 		rwsem_release(&sem->dep_map, _RET_IP_);
155176f8507fSKirill Tkhai 		return -EINTR;
155276f8507fSKirill Tkhai 	}
155376f8507fSKirill Tkhai 
155476f8507fSKirill Tkhai 	return 0;
155576f8507fSKirill Tkhai }
155676f8507fSKirill Tkhai EXPORT_SYMBOL(down_read_killable);
155776f8507fSKirill Tkhai 
1558ed428bfcSPeter Zijlstra /*
1559ed428bfcSPeter Zijlstra  * trylock for reading -- returns 1 if successful, 0 if contention
1560ed428bfcSPeter Zijlstra  */
down_read_trylock(struct rw_semaphore * sem)1561ed428bfcSPeter Zijlstra int down_read_trylock(struct rw_semaphore *sem)
1562ed428bfcSPeter Zijlstra {
1563ed428bfcSPeter Zijlstra 	int ret = __down_read_trylock(sem);
1564ed428bfcSPeter Zijlstra 
1565c7580c1eSWaiman Long 	if (ret == 1)
1566ed428bfcSPeter Zijlstra 		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
1567ed428bfcSPeter Zijlstra 	return ret;
1568ed428bfcSPeter Zijlstra }
1569ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_trylock);
1570ed428bfcSPeter Zijlstra 
1571ed428bfcSPeter Zijlstra /*
1572ed428bfcSPeter Zijlstra  * lock for writing
1573ed428bfcSPeter Zijlstra  */
down_write(struct rw_semaphore * sem)1574ed428bfcSPeter Zijlstra void __sched down_write(struct rw_semaphore *sem)
1575ed428bfcSPeter Zijlstra {
1576ed428bfcSPeter Zijlstra 	might_sleep();
1577ed428bfcSPeter Zijlstra 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1578ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1579ed428bfcSPeter Zijlstra }
1580ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write);
1581ed428bfcSPeter Zijlstra 
1582ed428bfcSPeter Zijlstra /*
1583916633a4SMichal Hocko  * lock for writing
1584916633a4SMichal Hocko  */
down_write_killable(struct rw_semaphore * sem)1585916633a4SMichal Hocko int __sched down_write_killable(struct rw_semaphore *sem)
1586916633a4SMichal Hocko {
1587916633a4SMichal Hocko 	might_sleep();
1588916633a4SMichal Hocko 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1589916633a4SMichal Hocko 
15906cef7ff6SWaiman Long 	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
15916cef7ff6SWaiman Long 				  __down_write_killable)) {
15925facae4fSQian Cai 		rwsem_release(&sem->dep_map, _RET_IP_);
1593916633a4SMichal Hocko 		return -EINTR;
1594916633a4SMichal Hocko 	}
1595916633a4SMichal Hocko 
1596916633a4SMichal Hocko 	return 0;
1597916633a4SMichal Hocko }
1598916633a4SMichal Hocko EXPORT_SYMBOL(down_write_killable);
1599916633a4SMichal Hocko 
1600916633a4SMichal Hocko /*
1601ed428bfcSPeter Zijlstra  * trylock for writing -- returns 1 if successful, 0 if contention
1602ed428bfcSPeter Zijlstra  */
down_write_trylock(struct rw_semaphore * sem)1603ed428bfcSPeter Zijlstra int down_write_trylock(struct rw_semaphore *sem)
1604ed428bfcSPeter Zijlstra {
1605ed428bfcSPeter Zijlstra 	int ret = __down_write_trylock(sem);
1606ed428bfcSPeter Zijlstra 
1607c7580c1eSWaiman Long 	if (ret == 1)
1608ed428bfcSPeter Zijlstra 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
16094fc828e2SDavidlohr Bueso 
1610ed428bfcSPeter Zijlstra 	return ret;
1611ed428bfcSPeter Zijlstra }
1612ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write_trylock);
1613ed428bfcSPeter Zijlstra 
1614ed428bfcSPeter Zijlstra /*
1615ed428bfcSPeter Zijlstra  * release a read lock
1616ed428bfcSPeter Zijlstra  */
up_read(struct rw_semaphore * sem)1617ed428bfcSPeter Zijlstra void up_read(struct rw_semaphore *sem)
1618ed428bfcSPeter Zijlstra {
16195facae4fSQian Cai 	rwsem_release(&sem->dep_map, _RET_IP_);
1620ed428bfcSPeter Zijlstra 	__up_read(sem);
1621ed428bfcSPeter Zijlstra }
1622ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_read);
1623ed428bfcSPeter Zijlstra 
1624ed428bfcSPeter Zijlstra /*
1625ed428bfcSPeter Zijlstra  * release a write lock
1626ed428bfcSPeter Zijlstra  */
up_write(struct rw_semaphore * sem)1627ed428bfcSPeter Zijlstra void up_write(struct rw_semaphore *sem)
1628ed428bfcSPeter Zijlstra {
16295facae4fSQian Cai 	rwsem_release(&sem->dep_map, _RET_IP_);
1630ed428bfcSPeter Zijlstra 	__up_write(sem);
1631ed428bfcSPeter Zijlstra }
1632ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_write);
1633ed428bfcSPeter Zijlstra 
1634ed428bfcSPeter Zijlstra /*
1635ed428bfcSPeter Zijlstra  * downgrade write lock to read lock
1636ed428bfcSPeter Zijlstra  */
downgrade_write(struct rw_semaphore * sem)1637ed428bfcSPeter Zijlstra void downgrade_write(struct rw_semaphore *sem)
1638ed428bfcSPeter Zijlstra {
16396419c4afSJ. R. Okajima 	lock_downgrade(&sem->dep_map, _RET_IP_);
1640ed428bfcSPeter Zijlstra 	__downgrade_write(sem);
1641ed428bfcSPeter Zijlstra }
1642ed428bfcSPeter Zijlstra EXPORT_SYMBOL(downgrade_write);
1643ed428bfcSPeter Zijlstra 
1644ed428bfcSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC
1645ed428bfcSPeter Zijlstra 
down_read_nested(struct rw_semaphore * sem,int subclass)1646ed428bfcSPeter Zijlstra void down_read_nested(struct rw_semaphore *sem, int subclass)
1647ed428bfcSPeter Zijlstra {
1648ed428bfcSPeter Zijlstra 	might_sleep();
1649ed428bfcSPeter Zijlstra 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1650ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1651ed428bfcSPeter Zijlstra }
1652ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_nested);
1653ed428bfcSPeter Zijlstra 
down_read_killable_nested(struct rw_semaphore * sem,int subclass)16540f9368b5SEric W. Biederman int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
16550f9368b5SEric W. Biederman {
16560f9368b5SEric W. Biederman 	might_sleep();
16570f9368b5SEric W. Biederman 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
16580f9368b5SEric W. Biederman 
16590f9368b5SEric W. Biederman 	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
16600f9368b5SEric W. Biederman 		rwsem_release(&sem->dep_map, _RET_IP_);
16610f9368b5SEric W. Biederman 		return -EINTR;
16620f9368b5SEric W. Biederman 	}
16630f9368b5SEric W. Biederman 
16640f9368b5SEric W. Biederman 	return 0;
16650f9368b5SEric W. Biederman }
16660f9368b5SEric W. Biederman EXPORT_SYMBOL(down_read_killable_nested);
16670f9368b5SEric W. Biederman 
_down_write_nest_lock(struct rw_semaphore * sem,struct lockdep_map * nest)1668ed428bfcSPeter Zijlstra void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1669ed428bfcSPeter Zijlstra {
1670ed428bfcSPeter Zijlstra 	might_sleep();
1671ed428bfcSPeter Zijlstra 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
1672ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1673ed428bfcSPeter Zijlstra }
1674ed428bfcSPeter Zijlstra EXPORT_SYMBOL(_down_write_nest_lock);
1675ed428bfcSPeter Zijlstra 
down_read_non_owner(struct rw_semaphore * sem)1676ed428bfcSPeter Zijlstra void down_read_non_owner(struct rw_semaphore *sem)
1677ed428bfcSPeter Zijlstra {
1678ed428bfcSPeter Zijlstra 	might_sleep();
1679ed428bfcSPeter Zijlstra 	__down_read(sem);
16803f524553SWaiman Long 	/*
16813f524553SWaiman Long 	 * The owner value for a reader-owned lock is mostly for debugging
16823f524553SWaiman Long 	 * purpose only and is not critical to the correct functioning of
16833f524553SWaiman Long 	 * rwsem. So it is perfectly fine to set it in a preempt-enabled
16843f524553SWaiman Long 	 * context here.
16853f524553SWaiman Long 	 */
1686925b9cd1SWaiman Long 	__rwsem_set_reader_owned(sem, NULL);
1687ed428bfcSPeter Zijlstra }
1688ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_non_owner);
1689ed428bfcSPeter Zijlstra 
down_write_nested(struct rw_semaphore * sem,int subclass)1690ed428bfcSPeter Zijlstra void down_write_nested(struct rw_semaphore *sem, int subclass)
1691ed428bfcSPeter Zijlstra {
1692ed428bfcSPeter Zijlstra 	might_sleep();
1693ed428bfcSPeter Zijlstra 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1694ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1695ed428bfcSPeter Zijlstra }
1696ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write_nested);
1697ed428bfcSPeter Zijlstra 
down_write_killable_nested(struct rw_semaphore * sem,int subclass)1698887bddfaSAl Viro int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
1699887bddfaSAl Viro {
1700887bddfaSAl Viro 	might_sleep();
1701887bddfaSAl Viro 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1702887bddfaSAl Viro 
17036cef7ff6SWaiman Long 	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
17046cef7ff6SWaiman Long 				  __down_write_killable)) {
17055facae4fSQian Cai 		rwsem_release(&sem->dep_map, _RET_IP_);
1706887bddfaSAl Viro 		return -EINTR;
1707887bddfaSAl Viro 	}
1708887bddfaSAl Viro 
1709887bddfaSAl Viro 	return 0;
1710887bddfaSAl Viro }
1711887bddfaSAl Viro EXPORT_SYMBOL(down_write_killable_nested);
1712887bddfaSAl Viro 
up_read_non_owner(struct rw_semaphore * sem)1713ed428bfcSPeter Zijlstra void up_read_non_owner(struct rw_semaphore *sem)
1714ed428bfcSPeter Zijlstra {
171594a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1716ed428bfcSPeter Zijlstra 	__up_read(sem);
1717ed428bfcSPeter Zijlstra }
1718ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_read_non_owner);
1719ed428bfcSPeter Zijlstra 
1720ed428bfcSPeter Zijlstra #endif
1721