xref: /openbmc/linux/kernel/locking/rwsem.c (revision 92cc5d00)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
2ed428bfcSPeter Zijlstra /* kernel/rwsem.c: R/W semaphores, public implementation
3ed428bfcSPeter Zijlstra  *
4ed428bfcSPeter Zijlstra  * Written by David Howells (dhowells@redhat.com).
5ed428bfcSPeter Zijlstra  * Derived from asm-i386/semaphore.h
65dec94d4SWaiman Long  *
75dec94d4SWaiman Long  * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
85dec94d4SWaiman Long  * and Michel Lespinasse <walken@google.com>
95dec94d4SWaiman Long  *
105dec94d4SWaiman Long  * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
115dec94d4SWaiman Long  * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
125dec94d4SWaiman Long  *
134f23dbc1SWaiman Long  * Rwsem count bit fields re-definition and rwsem rearchitecture by
144f23dbc1SWaiman Long  * Waiman Long <longman@redhat.com> and
154f23dbc1SWaiman Long  * Peter Zijlstra <peterz@infradead.org>.
16ed428bfcSPeter Zijlstra  */
17ed428bfcSPeter Zijlstra 
18ed428bfcSPeter Zijlstra #include <linux/types.h>
19ed428bfcSPeter Zijlstra #include <linux/kernel.h>
20ed428bfcSPeter Zijlstra #include <linux/sched.h>
215dec94d4SWaiman Long #include <linux/sched/rt.h>
225dec94d4SWaiman Long #include <linux/sched/task.h>
23b17b0153SIngo Molnar #include <linux/sched/debug.h>
245dec94d4SWaiman Long #include <linux/sched/wake_q.h>
255dec94d4SWaiman Long #include <linux/sched/signal.h>
267d43f1ceSWaiman Long #include <linux/sched/clock.h>
27ed428bfcSPeter Zijlstra #include <linux/export.h>
28ed428bfcSPeter Zijlstra #include <linux/rwsem.h>
29ed428bfcSPeter Zijlstra #include <linux/atomic.h>
30ee042be1SNamhyung Kim #include <trace/events/lock.h>
31ed428bfcSPeter Zijlstra 
3242254105SThomas Gleixner #ifndef CONFIG_PREEMPT_RT
335dec94d4SWaiman Long #include "lock_events.h"
345dec94d4SWaiman Long 
355dec94d4SWaiman Long /*
36617f3ef9SWaiman Long  * The least significant 2 bits of the owner value has the following
375dec94d4SWaiman Long  * meanings when set.
3802f1082bSWaiman Long  *  - Bit 0: RWSEM_READER_OWNED - The rwsem is owned by readers
39617f3ef9SWaiman Long  *  - Bit 1: RWSEM_NONSPINNABLE - Cannot spin on a reader-owned lock
405dec94d4SWaiman Long  *
41617f3ef9SWaiman Long  * When the rwsem is reader-owned and a spinning writer has timed out,
42617f3ef9SWaiman Long  * the nonspinnable bit will be set to disable optimistic spinning.
437d43f1ceSWaiman Long 
445dec94d4SWaiman Long  * When a writer acquires a rwsem, it puts its task_struct pointer
455dec94d4SWaiman Long  * into the owner field. It is cleared after an unlock.
465dec94d4SWaiman Long  *
475dec94d4SWaiman Long  * When a reader acquires a rwsem, it will also puts its task_struct
487d43f1ceSWaiman Long  * pointer into the owner field with the RWSEM_READER_OWNED bit set.
497d43f1ceSWaiman Long  * On unlock, the owner field will largely be left untouched. So
507d43f1ceSWaiman Long  * for a free or reader-owned rwsem, the owner value may contain
517d43f1ceSWaiman Long  * information about the last reader that acquires the rwsem.
525dec94d4SWaiman Long  *
535dec94d4SWaiman Long  * That information may be helpful in debugging cases where the system
545dec94d4SWaiman Long  * seems to hang on a reader owned rwsem especially if only one reader
555dec94d4SWaiman Long  * is involved. Ideally we would like to track all the readers that own
565dec94d4SWaiman Long  * a rwsem, but the overhead is simply too big.
575cfd92e1SWaiman Long  *
58617f3ef9SWaiman Long  * A fast path reader optimistic lock stealing is supported when the rwsem
59617f3ef9SWaiman Long  * is previously owned by a writer and the following conditions are met:
60617f3ef9SWaiman Long  *  - rwsem is not currently writer owned
61617f3ef9SWaiman Long  *  - the handoff isn't set.
625dec94d4SWaiman Long  */
635dec94d4SWaiman Long #define RWSEM_READER_OWNED	(1UL << 0)
64617f3ef9SWaiman Long #define RWSEM_NONSPINNABLE	(1UL << 1)
6502f1082bSWaiman Long #define RWSEM_OWNER_FLAGS_MASK	(RWSEM_READER_OWNED | RWSEM_NONSPINNABLE)
665dec94d4SWaiman Long 
675dec94d4SWaiman Long #ifdef CONFIG_DEBUG_RWSEMS
685dec94d4SWaiman Long # define DEBUG_RWSEMS_WARN_ON(c, sem)	do {			\
695dec94d4SWaiman Long 	if (!debug_locks_silent &&				\
70fce45cd4SDavidlohr Bueso 	    WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, magic = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
715dec94d4SWaiman Long 		#c, atomic_long_read(&(sem)->count),		\
72fce45cd4SDavidlohr Bueso 		(unsigned long) sem->magic,			\
7394a9717bSWaiman Long 		atomic_long_read(&(sem)->owner), (long)current,	\
745dec94d4SWaiman Long 		list_empty(&(sem)->wait_list) ? "" : "not "))	\
755dec94d4SWaiman Long 			debug_locks_off();			\
765dec94d4SWaiman Long 	} while (0)
775dec94d4SWaiman Long #else
785dec94d4SWaiman Long # define DEBUG_RWSEMS_WARN_ON(c, sem)
795dec94d4SWaiman Long #endif
805dec94d4SWaiman Long 
815dec94d4SWaiman Long /*
82a15ea1a3SWaiman Long  * On 64-bit architectures, the bit definitions of the count are:
835dec94d4SWaiman Long  *
845dec94d4SWaiman Long  * Bit  0    - writer locked bit
855dec94d4SWaiman Long  * Bit  1    - waiters present bit
864f23dbc1SWaiman Long  * Bit  2    - lock handoff bit
874f23dbc1SWaiman Long  * Bits 3-7  - reserved
88a15ea1a3SWaiman Long  * Bits 8-62 - 55-bit reader count
89a15ea1a3SWaiman Long  * Bit  63   - read fail bit
90a15ea1a3SWaiman Long  *
91a15ea1a3SWaiman Long  * On 32-bit architectures, the bit definitions of the count are:
92a15ea1a3SWaiman Long  *
93a15ea1a3SWaiman Long  * Bit  0    - writer locked bit
94a15ea1a3SWaiman Long  * Bit  1    - waiters present bit
95a15ea1a3SWaiman Long  * Bit  2    - lock handoff bit
96a15ea1a3SWaiman Long  * Bits 3-7  - reserved
97a15ea1a3SWaiman Long  * Bits 8-30 - 23-bit reader count
98a15ea1a3SWaiman Long  * Bit  31   - read fail bit
99a15ea1a3SWaiman Long  *
100a15ea1a3SWaiman Long  * It is not likely that the most significant bit (read fail bit) will ever
101a15ea1a3SWaiman Long  * be set. This guard bit is still checked anyway in the down_read() fastpath
102a15ea1a3SWaiman Long  * just in case we need to use up more of the reader bits for other purpose
103a15ea1a3SWaiman Long  * in the future.
1045dec94d4SWaiman Long  *
1055dec94d4SWaiman Long  * atomic_long_fetch_add() is used to obtain reader lock, whereas
1065dec94d4SWaiman Long  * atomic_long_cmpxchg() will be used to obtain writer lock.
1074f23dbc1SWaiman Long  *
1084f23dbc1SWaiman Long  * There are three places where the lock handoff bit may be set or cleared.
109d257cc8cSWaiman Long  * 1) rwsem_mark_wake() for readers		-- set, clear
110d257cc8cSWaiman Long  * 2) rwsem_try_write_lock() for writers	-- set, clear
111d257cc8cSWaiman Long  * 3) rwsem_del_waiter()			-- clear
1124f23dbc1SWaiman Long  *
1134f23dbc1SWaiman Long  * For all the above cases, wait_lock will be held. A writer must also
1144f23dbc1SWaiman Long  * be the first one in the wait_list to be eligible for setting the handoff
1154f23dbc1SWaiman Long  * bit. So concurrent setting/clearing of handoff bit is not possible.
1165dec94d4SWaiman Long  */
1175dec94d4SWaiman Long #define RWSEM_WRITER_LOCKED	(1UL << 0)
1185dec94d4SWaiman Long #define RWSEM_FLAG_WAITERS	(1UL << 1)
1194f23dbc1SWaiman Long #define RWSEM_FLAG_HANDOFF	(1UL << 2)
120a15ea1a3SWaiman Long #define RWSEM_FLAG_READFAIL	(1UL << (BITS_PER_LONG - 1))
1214f23dbc1SWaiman Long 
1225dec94d4SWaiman Long #define RWSEM_READER_SHIFT	8
1235dec94d4SWaiman Long #define RWSEM_READER_BIAS	(1UL << RWSEM_READER_SHIFT)
1245dec94d4SWaiman Long #define RWSEM_READER_MASK	(~(RWSEM_READER_BIAS - 1))
1255dec94d4SWaiman Long #define RWSEM_WRITER_MASK	RWSEM_WRITER_LOCKED
1265dec94d4SWaiman Long #define RWSEM_LOCK_MASK		(RWSEM_WRITER_MASK|RWSEM_READER_MASK)
1274f23dbc1SWaiman Long #define RWSEM_READ_FAILED_MASK	(RWSEM_WRITER_MASK|RWSEM_FLAG_WAITERS|\
128a15ea1a3SWaiman Long 				 RWSEM_FLAG_HANDOFF|RWSEM_FLAG_READFAIL)
1295dec94d4SWaiman Long 
1305dec94d4SWaiman Long /*
1315dec94d4SWaiman Long  * All writes to owner are protected by WRITE_ONCE() to make sure that
1325dec94d4SWaiman Long  * store tearing can't happen as optimistic spinners may read and use
1335dec94d4SWaiman Long  * the owner value concurrently without lock. Read from owner, however,
1345dec94d4SWaiman Long  * may not need READ_ONCE() as long as the pointer value is only used
1355dec94d4SWaiman Long  * for comparison and isn't being dereferenced.
13648dfb5d2SGokul krishna Krishnakumar  *
13748dfb5d2SGokul krishna Krishnakumar  * Both rwsem_{set,clear}_owner() functions should be in the same
13848dfb5d2SGokul krishna Krishnakumar  * preempt disable section as the atomic op that changes sem->count.
1395dec94d4SWaiman Long  */
rwsem_set_owner(struct rw_semaphore * sem)1405dec94d4SWaiman Long static inline void rwsem_set_owner(struct rw_semaphore *sem)
1415dec94d4SWaiman Long {
14248dfb5d2SGokul krishna Krishnakumar 	lockdep_assert_preemption_disabled();
14394a9717bSWaiman Long 	atomic_long_set(&sem->owner, (long)current);
1445dec94d4SWaiman Long }
1455dec94d4SWaiman Long 
rwsem_clear_owner(struct rw_semaphore * sem)1465dec94d4SWaiman Long static inline void rwsem_clear_owner(struct rw_semaphore *sem)
1475dec94d4SWaiman Long {
14848dfb5d2SGokul krishna Krishnakumar 	lockdep_assert_preemption_disabled();
14994a9717bSWaiman Long 	atomic_long_set(&sem->owner, 0);
15094a9717bSWaiman Long }
15194a9717bSWaiman Long 
15294a9717bSWaiman Long /*
15394a9717bSWaiman Long  * Test the flags in the owner field.
15494a9717bSWaiman Long  */
rwsem_test_oflags(struct rw_semaphore * sem,long flags)15594a9717bSWaiman Long static inline bool rwsem_test_oflags(struct rw_semaphore *sem, long flags)
15694a9717bSWaiman Long {
15794a9717bSWaiman Long 	return atomic_long_read(&sem->owner) & flags;
1585dec94d4SWaiman Long }
1595dec94d4SWaiman Long 
1605dec94d4SWaiman Long /*
1615dec94d4SWaiman Long  * The task_struct pointer of the last owning reader will be left in
1625dec94d4SWaiman Long  * the owner field.
1635dec94d4SWaiman Long  *
1645dec94d4SWaiman Long  * Note that the owner value just indicates the task has owned the rwsem
1655dec94d4SWaiman Long  * previously, it may not be the real owner or one of the real owners
1665dec94d4SWaiman Long  * anymore when that field is examined, so take it with a grain of salt.
1675cfd92e1SWaiman Long  *
1685cfd92e1SWaiman Long  * The reader non-spinnable bit is preserved.
1695dec94d4SWaiman Long  */
__rwsem_set_reader_owned(struct rw_semaphore * sem,struct task_struct * owner)1705dec94d4SWaiman Long static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
1715dec94d4SWaiman Long 					    struct task_struct *owner)
1725dec94d4SWaiman Long {
1735cfd92e1SWaiman Long 	unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED |
174617f3ef9SWaiman Long 		(atomic_long_read(&sem->owner) & RWSEM_NONSPINNABLE);
1755dec94d4SWaiman Long 
17694a9717bSWaiman Long 	atomic_long_set(&sem->owner, val);
1775dec94d4SWaiman Long }
1785dec94d4SWaiman Long 
rwsem_set_reader_owned(struct rw_semaphore * sem)1795dec94d4SWaiman Long static inline void rwsem_set_reader_owned(struct rw_semaphore *sem)
1805dec94d4SWaiman Long {
1815dec94d4SWaiman Long 	__rwsem_set_reader_owned(sem, current);
1825dec94d4SWaiman Long }
1835dec94d4SWaiman Long 
1845dec94d4SWaiman Long /*
18594a9717bSWaiman Long  * Return true if the rwsem is owned by a reader.
1865dec94d4SWaiman Long  */
is_rwsem_reader_owned(struct rw_semaphore * sem)18794a9717bSWaiman Long static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
1885dec94d4SWaiman Long {
18994a9717bSWaiman Long #ifdef CONFIG_DEBUG_RWSEMS
19094a9717bSWaiman Long 	/*
19194a9717bSWaiman Long 	 * Check the count to see if it is write-locked.
19294a9717bSWaiman Long 	 */
19394a9717bSWaiman Long 	long count = atomic_long_read(&sem->count);
19494a9717bSWaiman Long 
19594a9717bSWaiman Long 	if (count & RWSEM_WRITER_MASK)
19694a9717bSWaiman Long 		return false;
19794a9717bSWaiman Long #endif
19894a9717bSWaiman Long 	return rwsem_test_oflags(sem, RWSEM_READER_OWNED);
1995dec94d4SWaiman Long }
2005dec94d4SWaiman Long 
2015dec94d4SWaiman Long #ifdef CONFIG_DEBUG_RWSEMS
2025dec94d4SWaiman Long /*
2035dec94d4SWaiman Long  * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there
2045dec94d4SWaiman Long  * is a task pointer in owner of a reader-owned rwsem, it will be the
2055dec94d4SWaiman Long  * real owner or one of the real owners. The only exception is when the
2065dec94d4SWaiman Long  * unlock is done by up_read_non_owner().
2075dec94d4SWaiman Long  */
rwsem_clear_reader_owned(struct rw_semaphore * sem)2085dec94d4SWaiman Long static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
2095dec94d4SWaiman Long {
21094a9717bSWaiman Long 	unsigned long val = atomic_long_read(&sem->owner);
21194a9717bSWaiman Long 
21294a9717bSWaiman Long 	while ((val & ~RWSEM_OWNER_FLAGS_MASK) == (unsigned long)current) {
21394a9717bSWaiman Long 		if (atomic_long_try_cmpxchg(&sem->owner, &val,
21494a9717bSWaiman Long 					    val & RWSEM_OWNER_FLAGS_MASK))
21594a9717bSWaiman Long 			return;
21694a9717bSWaiman Long 	}
2175dec94d4SWaiman Long }
2185dec94d4SWaiman Long #else
rwsem_clear_reader_owned(struct rw_semaphore * sem)2195dec94d4SWaiman Long static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
2205dec94d4SWaiman Long {
2215dec94d4SWaiman Long }
2225dec94d4SWaiman Long #endif
2235dec94d4SWaiman Long 
2245dec94d4SWaiman Long /*
2257d43f1ceSWaiman Long  * Set the RWSEM_NONSPINNABLE bits if the RWSEM_READER_OWNED flag
2267d43f1ceSWaiman Long  * remains set. Otherwise, the operation will be aborted.
2277d43f1ceSWaiman Long  */
rwsem_set_nonspinnable(struct rw_semaphore * sem)2287d43f1ceSWaiman Long static inline void rwsem_set_nonspinnable(struct rw_semaphore *sem)
2297d43f1ceSWaiman Long {
2307d43f1ceSWaiman Long 	unsigned long owner = atomic_long_read(&sem->owner);
2317d43f1ceSWaiman Long 
2327d43f1ceSWaiman Long 	do {
2337d43f1ceSWaiman Long 		if (!(owner & RWSEM_READER_OWNED))
2347d43f1ceSWaiman Long 			break;
2357d43f1ceSWaiman Long 		if (owner & RWSEM_NONSPINNABLE)
2367d43f1ceSWaiman Long 			break;
2377d43f1ceSWaiman Long 	} while (!atomic_long_try_cmpxchg(&sem->owner, &owner,
2387d43f1ceSWaiman Long 					  owner | RWSEM_NONSPINNABLE));
2397d43f1ceSWaiman Long }
2407d43f1ceSWaiman Long 
rwsem_read_trylock(struct rw_semaphore * sem,long * cntp)241c8fe8b05SWaiman Long static inline bool rwsem_read_trylock(struct rw_semaphore *sem, long *cntp)
242a15ea1a3SWaiman Long {
243c8fe8b05SWaiman Long 	*cntp = atomic_long_add_return_acquire(RWSEM_READER_BIAS, &sem->count);
2443379116aSPeter Zijlstra 
245c8fe8b05SWaiman Long 	if (WARN_ON_ONCE(*cntp < 0))
246a15ea1a3SWaiman Long 		rwsem_set_nonspinnable(sem);
2473379116aSPeter Zijlstra 
248c8fe8b05SWaiman Long 	if (!(*cntp & RWSEM_READ_FAILED_MASK)) {
2493379116aSPeter Zijlstra 		rwsem_set_reader_owned(sem);
2503379116aSPeter Zijlstra 		return true;
2513379116aSPeter Zijlstra 	}
2523379116aSPeter Zijlstra 
2533379116aSPeter Zijlstra 	return false;
254a15ea1a3SWaiman Long }
255a15ea1a3SWaiman Long 
rwsem_write_trylock(struct rw_semaphore * sem)256285c61aeSPeter Zijlstra static inline bool rwsem_write_trylock(struct rw_semaphore *sem)
257285c61aeSPeter Zijlstra {
258285c61aeSPeter Zijlstra 	long tmp = RWSEM_UNLOCKED_VALUE;
259285c61aeSPeter Zijlstra 
260285c61aeSPeter Zijlstra 	if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, RWSEM_WRITER_LOCKED)) {
261285c61aeSPeter Zijlstra 		rwsem_set_owner(sem);
2621d61659cSWaiman Long 		return true;
263285c61aeSPeter Zijlstra 	}
264285c61aeSPeter Zijlstra 
2651d61659cSWaiman Long 	return false;
266285c61aeSPeter Zijlstra }
267285c61aeSPeter Zijlstra 
2687d43f1ceSWaiman Long /*
26994a9717bSWaiman Long  * Return just the real task structure pointer of the owner
27094a9717bSWaiman Long  */
rwsem_owner(struct rw_semaphore * sem)27194a9717bSWaiman Long static inline struct task_struct *rwsem_owner(struct rw_semaphore *sem)
27294a9717bSWaiman Long {
27394a9717bSWaiman Long 	return (struct task_struct *)
27494a9717bSWaiman Long 		(atomic_long_read(&sem->owner) & ~RWSEM_OWNER_FLAGS_MASK);
27594a9717bSWaiman Long }
27694a9717bSWaiman Long 
27794a9717bSWaiman Long /*
27894a9717bSWaiman Long  * Return the real task structure pointer of the owner and the embedded
27994a9717bSWaiman Long  * flags in the owner. pflags must be non-NULL.
28094a9717bSWaiman Long  */
28194a9717bSWaiman Long static inline struct task_struct *
rwsem_owner_flags(struct rw_semaphore * sem,unsigned long * pflags)28294a9717bSWaiman Long rwsem_owner_flags(struct rw_semaphore *sem, unsigned long *pflags)
28394a9717bSWaiman Long {
28494a9717bSWaiman Long 	unsigned long owner = atomic_long_read(&sem->owner);
28594a9717bSWaiman Long 
28694a9717bSWaiman Long 	*pflags = owner & RWSEM_OWNER_FLAGS_MASK;
28794a9717bSWaiman Long 	return (struct task_struct *)(owner & ~RWSEM_OWNER_FLAGS_MASK);
28894a9717bSWaiman Long }
28994a9717bSWaiman Long 
29094a9717bSWaiman Long /*
2915dec94d4SWaiman Long  * Guide to the rw_semaphore's count field.
2925dec94d4SWaiman Long  *
2935dec94d4SWaiman Long  * When the RWSEM_WRITER_LOCKED bit in count is set, the lock is owned
2945dec94d4SWaiman Long  * by a writer.
2955dec94d4SWaiman Long  *
2965dec94d4SWaiman Long  * The lock is owned by readers when
2975dec94d4SWaiman Long  * (1) the RWSEM_WRITER_LOCKED isn't set in count,
2985dec94d4SWaiman Long  * (2) some of the reader bits are set in count, and
2995dec94d4SWaiman Long  * (3) the owner field has RWSEM_READ_OWNED bit set.
3005dec94d4SWaiman Long  *
3015dec94d4SWaiman Long  * Having some reader bits set is not enough to guarantee a readers owned
3025dec94d4SWaiman Long  * lock as the readers may be in the process of backing out from the count
3035dec94d4SWaiman Long  * and a writer has just released the lock. So another writer may steal
3045dec94d4SWaiman Long  * the lock immediately after that.
3055dec94d4SWaiman Long  */
3065dec94d4SWaiman Long 
3075dec94d4SWaiman Long /*
3085dec94d4SWaiman Long  * Initialize an rwsem:
3095dec94d4SWaiman Long  */
__init_rwsem(struct rw_semaphore * sem,const char * name,struct lock_class_key * key)3105dec94d4SWaiman Long void __init_rwsem(struct rw_semaphore *sem, const char *name,
3115dec94d4SWaiman Long 		  struct lock_class_key *key)
3125dec94d4SWaiman Long {
3135dec94d4SWaiman Long #ifdef CONFIG_DEBUG_LOCK_ALLOC
3145dec94d4SWaiman Long 	/*
3155dec94d4SWaiman Long 	 * Make sure we are not reinitializing a held semaphore:
3165dec94d4SWaiman Long 	 */
3175dec94d4SWaiman Long 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
318de8f5e4fSPeter Zijlstra 	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
3195dec94d4SWaiman Long #endif
320fce45cd4SDavidlohr Bueso #ifdef CONFIG_DEBUG_RWSEMS
321fce45cd4SDavidlohr Bueso 	sem->magic = sem;
322fce45cd4SDavidlohr Bueso #endif
3235dec94d4SWaiman Long 	atomic_long_set(&sem->count, RWSEM_UNLOCKED_VALUE);
3245dec94d4SWaiman Long 	raw_spin_lock_init(&sem->wait_lock);
3255dec94d4SWaiman Long 	INIT_LIST_HEAD(&sem->wait_list);
32694a9717bSWaiman Long 	atomic_long_set(&sem->owner, 0L);
3275dec94d4SWaiman Long #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
3285dec94d4SWaiman Long 	osq_lock_init(&sem->osq);
3295dec94d4SWaiman Long #endif
3305dec94d4SWaiman Long }
3315dec94d4SWaiman Long EXPORT_SYMBOL(__init_rwsem);
3325dec94d4SWaiman Long 
3335dec94d4SWaiman Long enum rwsem_waiter_type {
3345dec94d4SWaiman Long 	RWSEM_WAITING_FOR_WRITE,
3355dec94d4SWaiman Long 	RWSEM_WAITING_FOR_READ
3365dec94d4SWaiman Long };
3375dec94d4SWaiman Long 
3385dec94d4SWaiman Long struct rwsem_waiter {
3395dec94d4SWaiman Long 	struct list_head list;
3405dec94d4SWaiman Long 	struct task_struct *task;
3415dec94d4SWaiman Long 	enum rwsem_waiter_type type;
3424f23dbc1SWaiman Long 	unsigned long timeout;
343d257cc8cSWaiman Long 	bool handoff_set;
3445dec94d4SWaiman Long };
3454f23dbc1SWaiman Long #define rwsem_first_waiter(sem) \
3464f23dbc1SWaiman Long 	list_first_entry(&sem->wait_list, struct rwsem_waiter, list)
3475dec94d4SWaiman Long 
3485dec94d4SWaiman Long enum rwsem_wake_type {
3495dec94d4SWaiman Long 	RWSEM_WAKE_ANY,		/* Wake whatever's at head of wait list */
3505dec94d4SWaiman Long 	RWSEM_WAKE_READERS,	/* Wake readers only */
3515dec94d4SWaiman Long 	RWSEM_WAKE_READ_OWNED	/* Waker thread holds the read lock */
3525dec94d4SWaiman Long };
3535dec94d4SWaiman Long 
3544f23dbc1SWaiman Long /*
3554f23dbc1SWaiman Long  * The typical HZ value is either 250 or 1000. So set the minimum waiting
3564f23dbc1SWaiman Long  * time to at least 4ms or 1 jiffy (if it is higher than 4ms) in the wait
3574f23dbc1SWaiman Long  * queue before initiating the handoff protocol.
3584f23dbc1SWaiman Long  */
3594f23dbc1SWaiman Long #define RWSEM_WAIT_TIMEOUT	DIV_ROUND_UP(HZ, 250)
3604f23dbc1SWaiman Long 
3615dec94d4SWaiman Long /*
362d3681e26SWaiman Long  * Magic number to batch-wakeup waiting readers, even when writers are
363d3681e26SWaiman Long  * also present in the queue. This both limits the amount of work the
364d3681e26SWaiman Long  * waking thread must do and also prevents any potential counter overflow,
365d3681e26SWaiman Long  * however unlikely.
366d3681e26SWaiman Long  */
367d3681e26SWaiman Long #define MAX_READERS_WAKEUP	0x100
368d3681e26SWaiman Long 
369d257cc8cSWaiman Long static inline void
rwsem_add_waiter(struct rw_semaphore * sem,struct rwsem_waiter * waiter)370d257cc8cSWaiman Long rwsem_add_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
371d257cc8cSWaiman Long {
372d257cc8cSWaiman Long 	lockdep_assert_held(&sem->wait_lock);
373d257cc8cSWaiman Long 	list_add_tail(&waiter->list, &sem->wait_list);
374d257cc8cSWaiman Long 	/* caller will set RWSEM_FLAG_WAITERS */
375d257cc8cSWaiman Long }
376d257cc8cSWaiman Long 
377d257cc8cSWaiman Long /*
378d257cc8cSWaiman Long  * Remove a waiter from the wait_list and clear flags.
379d257cc8cSWaiman Long  *
380d257cc8cSWaiman Long  * Both rwsem_mark_wake() and rwsem_try_write_lock() contain a full 'copy' of
381d257cc8cSWaiman Long  * this function. Modify with care.
3821ee32619SWaiman Long  *
3831ee32619SWaiman Long  * Return: true if wait_list isn't empty and false otherwise
384d257cc8cSWaiman Long  */
3851ee32619SWaiman Long static inline bool
rwsem_del_waiter(struct rw_semaphore * sem,struct rwsem_waiter * waiter)386d257cc8cSWaiman Long rwsem_del_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter)
387d257cc8cSWaiman Long {
388d257cc8cSWaiman Long 	lockdep_assert_held(&sem->wait_lock);
389d257cc8cSWaiman Long 	list_del(&waiter->list);
390d257cc8cSWaiman Long 	if (likely(!list_empty(&sem->wait_list)))
3911ee32619SWaiman Long 		return true;
392d257cc8cSWaiman Long 
393d257cc8cSWaiman Long 	atomic_long_andnot(RWSEM_FLAG_HANDOFF | RWSEM_FLAG_WAITERS, &sem->count);
3941ee32619SWaiman Long 	return false;
395d257cc8cSWaiman Long }
396d257cc8cSWaiman Long 
397d3681e26SWaiman Long /*
3985dec94d4SWaiman Long  * handle the lock release when processes blocked on it that can now run
3995dec94d4SWaiman Long  * - if we come here from up_xxxx(), then the RWSEM_FLAG_WAITERS bit must
4005dec94d4SWaiman Long  *   have been set.
4015dec94d4SWaiman Long  * - there must be someone on the queue
4025dec94d4SWaiman Long  * - the wait_lock must be held by the caller
4035dec94d4SWaiman Long  * - tasks are marked for wakeup, the caller must later invoke wake_up_q()
4045dec94d4SWaiman Long  *   to actually wakeup the blocked task(s) and drop the reference count,
4055dec94d4SWaiman Long  *   preferably when the wait_lock is released
4065dec94d4SWaiman Long  * - woken process blocks are discarded from the list after having task zeroed
4075dec94d4SWaiman Long  * - writers are only marked woken if downgrading is false
408d257cc8cSWaiman Long  *
409d257cc8cSWaiman Long  * Implies rwsem_del_waiter() for all woken readers.
4105dec94d4SWaiman Long  */
rwsem_mark_wake(struct rw_semaphore * sem,enum rwsem_wake_type wake_type,struct wake_q_head * wake_q)4116cef7ff6SWaiman Long static void rwsem_mark_wake(struct rw_semaphore *sem,
4125dec94d4SWaiman Long 			    enum rwsem_wake_type wake_type,
4135dec94d4SWaiman Long 			    struct wake_q_head *wake_q)
4145dec94d4SWaiman Long {
4155dec94d4SWaiman Long 	struct rwsem_waiter *waiter, *tmp;
4165dec94d4SWaiman Long 	long oldcount, woken = 0, adjustment = 0;
4175dec94d4SWaiman Long 	struct list_head wlist;
4185dec94d4SWaiman Long 
4194f23dbc1SWaiman Long 	lockdep_assert_held(&sem->wait_lock);
4204f23dbc1SWaiman Long 
4215dec94d4SWaiman Long 	/*
4225dec94d4SWaiman Long 	 * Take a peek at the queue head waiter such that we can determine
4235dec94d4SWaiman Long 	 * the wakeup(s) to perform.
4245dec94d4SWaiman Long 	 */
4254f23dbc1SWaiman Long 	waiter = rwsem_first_waiter(sem);
4265dec94d4SWaiman Long 
4275dec94d4SWaiman Long 	if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
4285dec94d4SWaiman Long 		if (wake_type == RWSEM_WAKE_ANY) {
4295dec94d4SWaiman Long 			/*
4305dec94d4SWaiman Long 			 * Mark writer at the front of the queue for wakeup.
4315dec94d4SWaiman Long 			 * Until the task is actually later awoken later by
4325dec94d4SWaiman Long 			 * the caller, other writers are able to steal it.
4335dec94d4SWaiman Long 			 * Readers, on the other hand, will block as they
4345dec94d4SWaiman Long 			 * will notice the queued writer.
4355dec94d4SWaiman Long 			 */
4365dec94d4SWaiman Long 			wake_q_add(wake_q, waiter->task);
4375dec94d4SWaiman Long 			lockevent_inc(rwsem_wake_writer);
4385dec94d4SWaiman Long 		}
4395dec94d4SWaiman Long 
4405dec94d4SWaiman Long 		return;
4415dec94d4SWaiman Long 	}
4425dec94d4SWaiman Long 
4435dec94d4SWaiman Long 	/*
444a15ea1a3SWaiman Long 	 * No reader wakeup if there are too many of them already.
445a15ea1a3SWaiman Long 	 */
446a15ea1a3SWaiman Long 	if (unlikely(atomic_long_read(&sem->count) < 0))
447a15ea1a3SWaiman Long 		return;
448a15ea1a3SWaiman Long 
449a15ea1a3SWaiman Long 	/*
4505dec94d4SWaiman Long 	 * Writers might steal the lock before we grant it to the next reader.
4515dec94d4SWaiman Long 	 * We prefer to do the first reader grant before counting readers
4525dec94d4SWaiman Long 	 * so we can bail out early if a writer stole the lock.
4535dec94d4SWaiman Long 	 */
4545dec94d4SWaiman Long 	if (wake_type != RWSEM_WAKE_READ_OWNED) {
4555cfd92e1SWaiman Long 		struct task_struct *owner;
4565cfd92e1SWaiman Long 
4575dec94d4SWaiman Long 		adjustment = RWSEM_READER_BIAS;
4585dec94d4SWaiman Long 		oldcount = atomic_long_fetch_add(adjustment, &sem->count);
4595dec94d4SWaiman Long 		if (unlikely(oldcount & RWSEM_WRITER_MASK)) {
4604f23dbc1SWaiman Long 			/*
4614f23dbc1SWaiman Long 			 * When we've been waiting "too" long (for writers
4624f23dbc1SWaiman Long 			 * to give up the lock), request a HANDOFF to
4634f23dbc1SWaiman Long 			 * force the issue.
4644f23dbc1SWaiman Long 			 */
4656eebd5fbSWaiman Long 			if (time_after(jiffies, waiter->timeout)) {
4666eebd5fbSWaiman Long 				if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
4674f23dbc1SWaiman Long 					adjustment -= RWSEM_FLAG_HANDOFF;
4684f23dbc1SWaiman Long 					lockevent_inc(rwsem_rlock_handoff);
4694f23dbc1SWaiman Long 				}
4706eebd5fbSWaiman Long 				waiter->handoff_set = true;
4716eebd5fbSWaiman Long 			}
4724f23dbc1SWaiman Long 
4734f23dbc1SWaiman Long 			atomic_long_add(-adjustment, &sem->count);
4745dec94d4SWaiman Long 			return;
4755dec94d4SWaiman Long 		}
4765dec94d4SWaiman Long 		/*
4775dec94d4SWaiman Long 		 * Set it to reader-owned to give spinners an early
4785dec94d4SWaiman Long 		 * indication that readers now have the lock.
4795cfd92e1SWaiman Long 		 * The reader nonspinnable bit seen at slowpath entry of
4805cfd92e1SWaiman Long 		 * the reader is copied over.
4815dec94d4SWaiman Long 		 */
4825cfd92e1SWaiman Long 		owner = waiter->task;
4835cfd92e1SWaiman Long 		__rwsem_set_reader_owned(sem, owner);
4845dec94d4SWaiman Long 	}
4855dec94d4SWaiman Long 
4865dec94d4SWaiman Long 	/*
487d3681e26SWaiman Long 	 * Grant up to MAX_READERS_WAKEUP read locks to all the readers in the
488d3681e26SWaiman Long 	 * queue. We know that the woken will be at least 1 as we accounted
4895dec94d4SWaiman Long 	 * for above. Note we increment the 'active part' of the count by the
4905dec94d4SWaiman Long 	 * number of readers before waking any processes up.
4915dec94d4SWaiman Long 	 *
492d3681e26SWaiman Long 	 * This is an adaptation of the phase-fair R/W locks where at the
493d3681e26SWaiman Long 	 * reader phase (first waiter is a reader), all readers are eligible
494d3681e26SWaiman Long 	 * to acquire the lock at the same time irrespective of their order
495d3681e26SWaiman Long 	 * in the queue. The writers acquire the lock according to their
496d3681e26SWaiman Long 	 * order in the queue.
497d3681e26SWaiman Long 	 *
4985dec94d4SWaiman Long 	 * We have to do wakeup in 2 passes to prevent the possibility that
4995dec94d4SWaiman Long 	 * the reader count may be decremented before it is incremented. It
5005dec94d4SWaiman Long 	 * is because the to-be-woken waiter may not have slept yet. So it
5015dec94d4SWaiman Long 	 * may see waiter->task got cleared, finish its critical section and
5025dec94d4SWaiman Long 	 * do an unlock before the reader count increment.
5035dec94d4SWaiman Long 	 *
5045dec94d4SWaiman Long 	 * 1) Collect the read-waiters in a separate list, count them and
5055dec94d4SWaiman Long 	 *    fully increment the reader count in rwsem.
5065dec94d4SWaiman Long 	 * 2) For each waiters in the new list, clear waiter->task and
5075dec94d4SWaiman Long 	 *    put them into wake_q to be woken up later.
5085dec94d4SWaiman Long 	 */
509d3681e26SWaiman Long 	INIT_LIST_HEAD(&wlist);
510d3681e26SWaiman Long 	list_for_each_entry_safe(waiter, tmp, &sem->wait_list, list) {
5115dec94d4SWaiman Long 		if (waiter->type == RWSEM_WAITING_FOR_WRITE)
512d3681e26SWaiman Long 			continue;
5135dec94d4SWaiman Long 
5145dec94d4SWaiman Long 		woken++;
515d3681e26SWaiman Long 		list_move_tail(&waiter->list, &wlist);
516d3681e26SWaiman Long 
517d3681e26SWaiman Long 		/*
518d3681e26SWaiman Long 		 * Limit # of readers that can be woken up per wakeup call.
519d3681e26SWaiman Long 		 */
5205197fcd0SYanfei Xu 		if (unlikely(woken >= MAX_READERS_WAKEUP))
521d3681e26SWaiman Long 			break;
5225dec94d4SWaiman Long 	}
5235dec94d4SWaiman Long 
5245dec94d4SWaiman Long 	adjustment = woken * RWSEM_READER_BIAS - adjustment;
5255dec94d4SWaiman Long 	lockevent_cond_inc(rwsem_wake_reader, woken);
5265dec94d4SWaiman Long 
527d257cc8cSWaiman Long 	oldcount = atomic_long_read(&sem->count);
528d257cc8cSWaiman Long 	if (list_empty(&sem->wait_list)) {
5294f23dbc1SWaiman Long 		/*
530d257cc8cSWaiman Long 		 * Combined with list_move_tail() above, this implies
531d257cc8cSWaiman Long 		 * rwsem_del_waiter().
5324f23dbc1SWaiman Long 		 */
533d257cc8cSWaiman Long 		adjustment -= RWSEM_FLAG_WAITERS;
534d257cc8cSWaiman Long 		if (oldcount & RWSEM_FLAG_HANDOFF)
5354f23dbc1SWaiman Long 			adjustment -= RWSEM_FLAG_HANDOFF;
536d257cc8cSWaiman Long 	} else if (woken) {
537d257cc8cSWaiman Long 		/*
538d257cc8cSWaiman Long 		 * When we've woken a reader, we no longer need to force
539d257cc8cSWaiman Long 		 * writers to give up the lock and we can clear HANDOFF.
540d257cc8cSWaiman Long 		 */
541d257cc8cSWaiman Long 		if (oldcount & RWSEM_FLAG_HANDOFF)
542d257cc8cSWaiman Long 			adjustment -= RWSEM_FLAG_HANDOFF;
543d257cc8cSWaiman Long 	}
5444f23dbc1SWaiman Long 
5455dec94d4SWaiman Long 	if (adjustment)
5465dec94d4SWaiman Long 		atomic_long_add(adjustment, &sem->count);
5475dec94d4SWaiman Long 
5485dec94d4SWaiman Long 	/* 2nd pass */
5495dec94d4SWaiman Long 	list_for_each_entry_safe(waiter, tmp, &wlist, list) {
5505dec94d4SWaiman Long 		struct task_struct *tsk;
5515dec94d4SWaiman Long 
5525dec94d4SWaiman Long 		tsk = waiter->task;
5535dec94d4SWaiman Long 		get_task_struct(tsk);
5545dec94d4SWaiman Long 
5555dec94d4SWaiman Long 		/*
5565dec94d4SWaiman Long 		 * Ensure calling get_task_struct() before setting the reader
5576cef7ff6SWaiman Long 		 * waiter to nil such that rwsem_down_read_slowpath() cannot
5585dec94d4SWaiman Long 		 * race with do_exit() by always holding a reference count
5595dec94d4SWaiman Long 		 * to the task to wakeup.
5605dec94d4SWaiman Long 		 */
5615dec94d4SWaiman Long 		smp_store_release(&waiter->task, NULL);
5625dec94d4SWaiman Long 		/*
5635dec94d4SWaiman Long 		 * Ensure issuing the wakeup (either by us or someone else)
5645dec94d4SWaiman Long 		 * after setting the reader waiter to nil.
5655dec94d4SWaiman Long 		 */
5665dec94d4SWaiman Long 		wake_q_add_safe(wake_q, tsk);
5675dec94d4SWaiman Long 	}
5685dec94d4SWaiman Long }
5695dec94d4SWaiman Long 
5705dec94d4SWaiman Long /*
5711ee32619SWaiman Long  * Remove a waiter and try to wake up other waiters in the wait queue
5721ee32619SWaiman Long  * This function is called from the out_nolock path of both the reader and
5731ee32619SWaiman Long  * writer slowpaths with wait_lock held. It releases the wait_lock and
5741ee32619SWaiman Long  * optionally wake up waiters before it returns.
5751ee32619SWaiman Long  */
5761ee32619SWaiman Long static inline void
rwsem_del_wake_waiter(struct rw_semaphore * sem,struct rwsem_waiter * waiter,struct wake_q_head * wake_q)5771ee32619SWaiman Long rwsem_del_wake_waiter(struct rw_semaphore *sem, struct rwsem_waiter *waiter,
5781ee32619SWaiman Long 		      struct wake_q_head *wake_q)
5791ee32619SWaiman Long 		      __releases(&sem->wait_lock)
5801ee32619SWaiman Long {
5811ee32619SWaiman Long 	bool first = rwsem_first_waiter(sem) == waiter;
5821ee32619SWaiman Long 
5831ee32619SWaiman Long 	wake_q_init(wake_q);
5841ee32619SWaiman Long 
5851ee32619SWaiman Long 	/*
5861ee32619SWaiman Long 	 * If the wait_list isn't empty and the waiter to be deleted is
5871ee32619SWaiman Long 	 * the first waiter, we wake up the remaining waiters as they may
5881ee32619SWaiman Long 	 * be eligible to acquire or spin on the lock.
5891ee32619SWaiman Long 	 */
5901ee32619SWaiman Long 	if (rwsem_del_waiter(sem, waiter) && first)
5911ee32619SWaiman Long 		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, wake_q);
5921ee32619SWaiman Long 	raw_spin_unlock_irq(&sem->wait_lock);
5931ee32619SWaiman Long 	if (!wake_q_empty(wake_q))
5941ee32619SWaiman Long 		wake_up_q(wake_q);
5951ee32619SWaiman Long }
5961ee32619SWaiman Long 
5971ee32619SWaiman Long /*
5985dec94d4SWaiman Long  * This function must be called with the sem->wait_lock held to prevent
5995dec94d4SWaiman Long  * race conditions between checking the rwsem wait list and setting the
6005dec94d4SWaiman Long  * sem->count accordingly.
6014f23dbc1SWaiman Long  *
602d257cc8cSWaiman Long  * Implies rwsem_del_waiter() on success.
6035dec94d4SWaiman Long  */
rwsem_try_write_lock(struct rw_semaphore * sem,struct rwsem_waiter * waiter)60400f3c5a3SWaiman Long static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
605d257cc8cSWaiman Long 					struct rwsem_waiter *waiter)
6065dec94d4SWaiman Long {
6076eebd5fbSWaiman Long 	struct rwsem_waiter *first = rwsem_first_waiter(sem);
60800f3c5a3SWaiman Long 	long count, new;
6095dec94d4SWaiman Long 
6104f23dbc1SWaiman Long 	lockdep_assert_held(&sem->wait_lock);
6114f23dbc1SWaiman Long 
61200f3c5a3SWaiman Long 	count = atomic_long_read(&sem->count);
6134f23dbc1SWaiman Long 	do {
6144f23dbc1SWaiman Long 		bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
6154f23dbc1SWaiman Long 
616d257cc8cSWaiman Long 		if (has_handoff) {
6176eebd5fbSWaiman Long 			/*
6186eebd5fbSWaiman Long 			 * Honor handoff bit and yield only when the first
6196eebd5fbSWaiman Long 			 * waiter is the one that set it. Otherwisee, we
6206eebd5fbSWaiman Long 			 * still try to acquire the rwsem.
6216eebd5fbSWaiman Long 			 */
6226eebd5fbSWaiman Long 			if (first->handoff_set && (waiter != first))
6235dec94d4SWaiman Long 				return false;
624d257cc8cSWaiman Long 		}
625d257cc8cSWaiman Long 
6264f23dbc1SWaiman Long 		new = count;
6275dec94d4SWaiman Long 
6284f23dbc1SWaiman Long 		if (count & RWSEM_LOCK_MASK) {
629b613c7f3SWaiman Long 			/*
630b613c7f3SWaiman Long 			 * A waiter (first or not) can set the handoff bit
631b613c7f3SWaiman Long 			 * if it is an RT task or wait in the wait queue
632b613c7f3SWaiman Long 			 * for too long.
633b613c7f3SWaiman Long 			 */
634d257cc8cSWaiman Long 			if (has_handoff || (!rt_task(waiter->task) &&
635d257cc8cSWaiman Long 					    !time_after(jiffies, waiter->timeout)))
6364f23dbc1SWaiman Long 				return false;
6374f23dbc1SWaiman Long 
6384f23dbc1SWaiman Long 			new |= RWSEM_FLAG_HANDOFF;
6394f23dbc1SWaiman Long 		} else {
6404f23dbc1SWaiman Long 			new |= RWSEM_WRITER_LOCKED;
6414f23dbc1SWaiman Long 			new &= ~RWSEM_FLAG_HANDOFF;
6424f23dbc1SWaiman Long 
6434f23dbc1SWaiman Long 			if (list_is_singular(&sem->wait_list))
6444f23dbc1SWaiman Long 				new &= ~RWSEM_FLAG_WAITERS;
6454f23dbc1SWaiman Long 		}
6464f23dbc1SWaiman Long 	} while (!atomic_long_try_cmpxchg_acquire(&sem->count, &count, new));
6474f23dbc1SWaiman Long 
6484f23dbc1SWaiman Long 	/*
649b613c7f3SWaiman Long 	 * We have either acquired the lock with handoff bit cleared or set
650b613c7f3SWaiman Long 	 * the handoff bit. Only the first waiter can have its handoff_set
651b613c7f3SWaiman Long 	 * set here to enable optimistic spinning in slowpath loop.
6524f23dbc1SWaiman Long 	 */
653d257cc8cSWaiman Long 	if (new & RWSEM_FLAG_HANDOFF) {
654b613c7f3SWaiman Long 		first->handoff_set = true;
655d257cc8cSWaiman Long 		lockevent_inc(rwsem_wlock_handoff);
6564f23dbc1SWaiman Long 		return false;
657d257cc8cSWaiman Long 	}
6584f23dbc1SWaiman Long 
659d257cc8cSWaiman Long 	/*
660d257cc8cSWaiman Long 	 * Have rwsem_try_write_lock() fully imply rwsem_del_waiter() on
661d257cc8cSWaiman Long 	 * success.
662d257cc8cSWaiman Long 	 */
663d257cc8cSWaiman Long 	list_del(&waiter->list);
6645dec94d4SWaiman Long 	rwsem_set_owner(sem);
6655dec94d4SWaiman Long 	return true;
6665dec94d4SWaiman Long }
6675dec94d4SWaiman Long 
6687cdacc5fSYanfei Xu /*
6697cdacc5fSYanfei Xu  * The rwsem_spin_on_owner() function returns the following 4 values
6707cdacc5fSYanfei Xu  * depending on the lock owner state.
6717cdacc5fSYanfei Xu  *   OWNER_NULL  : owner is currently NULL
6727cdacc5fSYanfei Xu  *   OWNER_WRITER: when owner changes and is a writer
6737cdacc5fSYanfei Xu  *   OWNER_READER: when owner changes and the new owner may be a reader.
6747cdacc5fSYanfei Xu  *   OWNER_NONSPINNABLE:
6757cdacc5fSYanfei Xu  *		   when optimistic spinning has to stop because either the
6767cdacc5fSYanfei Xu  *		   owner stops running, is unknown, or its timeslice has
6777cdacc5fSYanfei Xu  *		   been used up.
6787cdacc5fSYanfei Xu  */
6797cdacc5fSYanfei Xu enum owner_state {
6807cdacc5fSYanfei Xu 	OWNER_NULL		= 1 << 0,
6817cdacc5fSYanfei Xu 	OWNER_WRITER		= 1 << 1,
6827cdacc5fSYanfei Xu 	OWNER_READER		= 1 << 2,
6837cdacc5fSYanfei Xu 	OWNER_NONSPINNABLE	= 1 << 3,
6847cdacc5fSYanfei Xu };
6857cdacc5fSYanfei Xu 
6865dec94d4SWaiman Long #ifdef CONFIG_RWSEM_SPIN_ON_OWNER
6875dec94d4SWaiman Long /*
6885dec94d4SWaiman Long  * Try to acquire write lock before the writer has been put on wait queue.
6895dec94d4SWaiman Long  */
rwsem_try_write_lock_unqueued(struct rw_semaphore * sem)6905dec94d4SWaiman Long static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
6915dec94d4SWaiman Long {
6925dec94d4SWaiman Long 	long count = atomic_long_read(&sem->count);
6935dec94d4SWaiman Long 
6944f23dbc1SWaiman Long 	while (!(count & (RWSEM_LOCK_MASK|RWSEM_FLAG_HANDOFF))) {
6955dec94d4SWaiman Long 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
6964f23dbc1SWaiman Long 					count | RWSEM_WRITER_LOCKED)) {
6975dec94d4SWaiman Long 			rwsem_set_owner(sem);
698617f3ef9SWaiman Long 			lockevent_inc(rwsem_opt_lock);
6995dec94d4SWaiman Long 			return true;
7005dec94d4SWaiman Long 		}
7015dec94d4SWaiman Long 	}
7025dec94d4SWaiman Long 	return false;
7035dec94d4SWaiman Long }
7045dec94d4SWaiman Long 
rwsem_can_spin_on_owner(struct rw_semaphore * sem)705617f3ef9SWaiman Long static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
7065dec94d4SWaiman Long {
7075dec94d4SWaiman Long 	struct task_struct *owner;
70894a9717bSWaiman Long 	unsigned long flags;
7095dec94d4SWaiman Long 	bool ret = true;
7105dec94d4SWaiman Long 
711cf69482dSWaiman Long 	if (need_resched()) {
712cf69482dSWaiman Long 		lockevent_inc(rwsem_opt_fail);
7135dec94d4SWaiman Long 		return false;
714cf69482dSWaiman Long 	}
7155dec94d4SWaiman Long 
7166c2787f2SYanfei Xu 	/*
7176c2787f2SYanfei Xu 	 * Disable preemption is equal to the RCU read-side crital section,
7186c2787f2SYanfei Xu 	 * thus the task_strcut structure won't go away.
7196c2787f2SYanfei Xu 	 */
72094a9717bSWaiman Long 	owner = rwsem_owner_flags(sem, &flags);
72178134300SWaiman Long 	/*
72278134300SWaiman Long 	 * Don't check the read-owner as the entry may be stale.
72378134300SWaiman Long 	 */
724617f3ef9SWaiman Long 	if ((flags & RWSEM_NONSPINNABLE) ||
72578134300SWaiman Long 	    (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
72694a9717bSWaiman Long 		ret = false;
727cf69482dSWaiman Long 
728cf69482dSWaiman Long 	lockevent_cond_inc(rwsem_opt_fail, !ret);
7295dec94d4SWaiman Long 	return ret;
7305dec94d4SWaiman Long }
7315dec94d4SWaiman Long 
7327d43f1ceSWaiman Long #define OWNER_SPINNABLE		(OWNER_NULL | OWNER_WRITER | OWNER_READER)
7335dec94d4SWaiman Long 
73494a9717bSWaiman Long static inline enum owner_state
rwsem_owner_state(struct task_struct * owner,unsigned long flags)735617f3ef9SWaiman Long rwsem_owner_state(struct task_struct *owner, unsigned long flags)
7363f6d517aSWaiman Long {
737617f3ef9SWaiman Long 	if (flags & RWSEM_NONSPINNABLE)
7383f6d517aSWaiman Long 		return OWNER_NONSPINNABLE;
7393f6d517aSWaiman Long 
74094a9717bSWaiman Long 	if (flags & RWSEM_READER_OWNED)
7413f6d517aSWaiman Long 		return OWNER_READER;
7423f6d517aSWaiman Long 
74394a9717bSWaiman Long 	return owner ? OWNER_WRITER : OWNER_NULL;
7443f6d517aSWaiman Long }
7453f6d517aSWaiman Long 
7467d43f1ceSWaiman Long static noinline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore * sem)747617f3ef9SWaiman Long rwsem_spin_on_owner(struct rw_semaphore *sem)
7483f6d517aSWaiman Long {
74994a9717bSWaiman Long 	struct task_struct *new, *owner;
75094a9717bSWaiman Long 	unsigned long flags, new_flags;
75194a9717bSWaiman Long 	enum owner_state state;
7523f6d517aSWaiman Long 
7536c2787f2SYanfei Xu 	lockdep_assert_preemption_disabled();
7546c2787f2SYanfei Xu 
75594a9717bSWaiman Long 	owner = rwsem_owner_flags(sem, &flags);
756617f3ef9SWaiman Long 	state = rwsem_owner_state(owner, flags);
7573f6d517aSWaiman Long 	if (state != OWNER_WRITER)
7583f6d517aSWaiman Long 		return state;
7595dec94d4SWaiman Long 
7603f6d517aSWaiman Long 	for (;;) {
76191d2a812SWaiman Long 		/*
76291d2a812SWaiman Long 		 * When a waiting writer set the handoff flag, it may spin
76391d2a812SWaiman Long 		 * on the owner as well. Once that writer acquires the lock,
76491d2a812SWaiman Long 		 * we can spin on it. So we don't need to quit even when the
76591d2a812SWaiman Long 		 * handoff bit is set.
76691d2a812SWaiman Long 		 */
76794a9717bSWaiman Long 		new = rwsem_owner_flags(sem, &new_flags);
76894a9717bSWaiman Long 		if ((new != owner) || (new_flags != flags)) {
769617f3ef9SWaiman Long 			state = rwsem_owner_state(new, new_flags);
7703f6d517aSWaiman Long 			break;
7713f6d517aSWaiman Long 		}
7723f6d517aSWaiman Long 
7735dec94d4SWaiman Long 		/*
7745dec94d4SWaiman Long 		 * Ensure we emit the owner->on_cpu, dereference _after_
7755dec94d4SWaiman Long 		 * checking sem->owner still matches owner, if that fails,
7765dec94d4SWaiman Long 		 * owner might point to free()d memory, if it still matches,
7776c2787f2SYanfei Xu 		 * our spinning context already disabled preemption which is
7786c2787f2SYanfei Xu 		 * equal to RCU read-side crital section ensures the memory
7796c2787f2SYanfei Xu 		 * stays valid.
7805dec94d4SWaiman Long 		 */
7815dec94d4SWaiman Long 		barrier();
7825dec94d4SWaiman Long 
7835dec94d4SWaiman Long 		if (need_resched() || !owner_on_cpu(owner)) {
7843f6d517aSWaiman Long 			state = OWNER_NONSPINNABLE;
7853f6d517aSWaiman Long 			break;
7865dec94d4SWaiman Long 		}
7875dec94d4SWaiman Long 
7885dec94d4SWaiman Long 		cpu_relax();
7895dec94d4SWaiman Long 	}
7905dec94d4SWaiman Long 
7913f6d517aSWaiman Long 	return state;
7925dec94d4SWaiman Long }
7935dec94d4SWaiman Long 
7947d43f1ceSWaiman Long /*
7957d43f1ceSWaiman Long  * Calculate reader-owned rwsem spinning threshold for writer
7967d43f1ceSWaiman Long  *
7977d43f1ceSWaiman Long  * The more readers own the rwsem, the longer it will take for them to
7987d43f1ceSWaiman Long  * wind down and free the rwsem. So the empirical formula used to
7997d43f1ceSWaiman Long  * determine the actual spinning time limit here is:
8007d43f1ceSWaiman Long  *
8017d43f1ceSWaiman Long  *   Spinning threshold = (10 + nr_readers/2)us
8027d43f1ceSWaiman Long  *
8037d43f1ceSWaiman Long  * The limit is capped to a maximum of 25us (30 readers). This is just
8047d43f1ceSWaiman Long  * a heuristic and is subjected to change in the future.
8057d43f1ceSWaiman Long  */
rwsem_rspin_threshold(struct rw_semaphore * sem)8067d43f1ceSWaiman Long static inline u64 rwsem_rspin_threshold(struct rw_semaphore *sem)
8077d43f1ceSWaiman Long {
8087d43f1ceSWaiman Long 	long count = atomic_long_read(&sem->count);
8097d43f1ceSWaiman Long 	int readers = count >> RWSEM_READER_SHIFT;
8107d43f1ceSWaiman Long 	u64 delta;
8117d43f1ceSWaiman Long 
8127d43f1ceSWaiman Long 	if (readers > 30)
8137d43f1ceSWaiman Long 		readers = 30;
8147d43f1ceSWaiman Long 	delta = (20 + readers) * NSEC_PER_USEC / 2;
8157d43f1ceSWaiman Long 
8167d43f1ceSWaiman Long 	return sched_clock() + delta;
8177d43f1ceSWaiman Long }
8187d43f1ceSWaiman Long 
rwsem_optimistic_spin(struct rw_semaphore * sem)819617f3ef9SWaiman Long static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
8205dec94d4SWaiman Long {
8215dec94d4SWaiman Long 	bool taken = false;
822990fa738SWaiman Long 	int prev_owner_state = OWNER_NULL;
8237d43f1ceSWaiman Long 	int loop = 0;
8247d43f1ceSWaiman Long 	u64 rspin_threshold = 0;
8255dec94d4SWaiman Long 
8265dec94d4SWaiman Long 	/* sem->wait_lock should not be held when doing optimistic spinning */
8275dec94d4SWaiman Long 	if (!osq_lock(&sem->osq))
8285dec94d4SWaiman Long 		goto done;
8295dec94d4SWaiman Long 
8305dec94d4SWaiman Long 	/*
8315dec94d4SWaiman Long 	 * Optimistically spin on the owner field and attempt to acquire the
8325dec94d4SWaiman Long 	 * lock whenever the owner changes. Spinning will be stopped when:
8335dec94d4SWaiman Long 	 *  1) the owning writer isn't running; or
8347d43f1ceSWaiman Long 	 *  2) readers own the lock and spinning time has exceeded limit.
8355dec94d4SWaiman Long 	 */
836990fa738SWaiman Long 	for (;;) {
8377d43f1ceSWaiman Long 		enum owner_state owner_state;
838990fa738SWaiman Long 
839617f3ef9SWaiman Long 		owner_state = rwsem_spin_on_owner(sem);
840990fa738SWaiman Long 		if (!(owner_state & OWNER_SPINNABLE))
841990fa738SWaiman Long 			break;
842990fa738SWaiman Long 
8435dec94d4SWaiman Long 		/*
8445dec94d4SWaiman Long 		 * Try to acquire the lock
8455dec94d4SWaiman Long 		 */
846617f3ef9SWaiman Long 		taken = rwsem_try_write_lock_unqueued(sem);
847cf69482dSWaiman Long 
848cf69482dSWaiman Long 		if (taken)
8495dec94d4SWaiman Long 			break;
8505dec94d4SWaiman Long 
8515dec94d4SWaiman Long 		/*
8527d43f1ceSWaiman Long 		 * Time-based reader-owned rwsem optimistic spinning
8537d43f1ceSWaiman Long 		 */
854617f3ef9SWaiman Long 		if (owner_state == OWNER_READER) {
8557d43f1ceSWaiman Long 			/*
8567d43f1ceSWaiman Long 			 * Re-initialize rspin_threshold every time when
8577d43f1ceSWaiman Long 			 * the owner state changes from non-reader to reader.
8587d43f1ceSWaiman Long 			 * This allows a writer to steal the lock in between
8597d43f1ceSWaiman Long 			 * 2 reader phases and have the threshold reset at
8607d43f1ceSWaiman Long 			 * the beginning of the 2nd reader phase.
8617d43f1ceSWaiman Long 			 */
8627d43f1ceSWaiman Long 			if (prev_owner_state != OWNER_READER) {
863617f3ef9SWaiman Long 				if (rwsem_test_oflags(sem, RWSEM_NONSPINNABLE))
8647d43f1ceSWaiman Long 					break;
8657d43f1ceSWaiman Long 				rspin_threshold = rwsem_rspin_threshold(sem);
8667d43f1ceSWaiman Long 				loop = 0;
8677d43f1ceSWaiman Long 			}
8687d43f1ceSWaiman Long 
8697d43f1ceSWaiman Long 			/*
8707d43f1ceSWaiman Long 			 * Check time threshold once every 16 iterations to
8717d43f1ceSWaiman Long 			 * avoid calling sched_clock() too frequently so
8727d43f1ceSWaiman Long 			 * as to reduce the average latency between the times
8737d43f1ceSWaiman Long 			 * when the lock becomes free and when the spinner
8747d43f1ceSWaiman Long 			 * is ready to do a trylock.
8757d43f1ceSWaiman Long 			 */
8767d43f1ceSWaiman Long 			else if (!(++loop & 0xf) && (sched_clock() > rspin_threshold)) {
8777d43f1ceSWaiman Long 				rwsem_set_nonspinnable(sem);
8787d43f1ceSWaiman Long 				lockevent_inc(rwsem_opt_nospin);
8797d43f1ceSWaiman Long 				break;
8807d43f1ceSWaiman Long 			}
8817d43f1ceSWaiman Long 		}
8827d43f1ceSWaiman Long 
8837d43f1ceSWaiman Long 		/*
884990fa738SWaiman Long 		 * An RT task cannot do optimistic spinning if it cannot
885990fa738SWaiman Long 		 * be sure the lock holder is running or live-lock may
886990fa738SWaiman Long 		 * happen if the current task and the lock holder happen
887990fa738SWaiman Long 		 * to run in the same CPU. However, aborting optimistic
888990fa738SWaiman Long 		 * spinning while a NULL owner is detected may miss some
889990fa738SWaiman Long 		 * opportunity where spinning can continue without causing
890990fa738SWaiman Long 		 * problem.
891990fa738SWaiman Long 		 *
892990fa738SWaiman Long 		 * There are 2 possible cases where an RT task may be able
893990fa738SWaiman Long 		 * to continue spinning.
894990fa738SWaiman Long 		 *
895990fa738SWaiman Long 		 * 1) The lock owner is in the process of releasing the
896990fa738SWaiman Long 		 *    lock, sem->owner is cleared but the lock has not
897990fa738SWaiman Long 		 *    been released yet.
898990fa738SWaiman Long 		 * 2) The lock was free and owner cleared, but another
899990fa738SWaiman Long 		 *    task just comes in and acquire the lock before
900990fa738SWaiman Long 		 *    we try to get it. The new owner may be a spinnable
901990fa738SWaiman Long 		 *    writer.
902990fa738SWaiman Long 		 *
903e2db7592SIngo Molnar 		 * To take advantage of two scenarios listed above, the RT
904990fa738SWaiman Long 		 * task is made to retry one more time to see if it can
905990fa738SWaiman Long 		 * acquire the lock or continue spinning on the new owning
906990fa738SWaiman Long 		 * writer. Of course, if the time lag is long enough or the
907990fa738SWaiman Long 		 * new owner is not a writer or spinnable, the RT task will
908990fa738SWaiman Long 		 * quit spinning.
909990fa738SWaiman Long 		 *
910990fa738SWaiman Long 		 * If the owner is a writer, the need_resched() check is
911990fa738SWaiman Long 		 * done inside rwsem_spin_on_owner(). If the owner is not
912990fa738SWaiman Long 		 * a writer, need_resched() check needs to be done here.
9135dec94d4SWaiman Long 		 */
914990fa738SWaiman Long 		if (owner_state != OWNER_WRITER) {
915990fa738SWaiman Long 			if (need_resched())
9165dec94d4SWaiman Long 				break;
917990fa738SWaiman Long 			if (rt_task(current) &&
918990fa738SWaiman Long 			   (prev_owner_state != OWNER_WRITER))
919990fa738SWaiman Long 				break;
920990fa738SWaiman Long 		}
921990fa738SWaiman Long 		prev_owner_state = owner_state;
9225dec94d4SWaiman Long 
9235dec94d4SWaiman Long 		/*
9245dec94d4SWaiman Long 		 * The cpu_relax() call is a compiler barrier which forces
9255dec94d4SWaiman Long 		 * everything in this loop to be re-loaded. We don't need
9265dec94d4SWaiman Long 		 * memory barriers as we'll eventually observe the right
9275dec94d4SWaiman Long 		 * values at the cost of a few extra spins.
9285dec94d4SWaiman Long 		 */
9295dec94d4SWaiman Long 		cpu_relax();
9305dec94d4SWaiman Long 	}
9315dec94d4SWaiman Long 	osq_unlock(&sem->osq);
9325dec94d4SWaiman Long done:
9335dec94d4SWaiman Long 	lockevent_cond_inc(rwsem_opt_fail, !taken);
9345dec94d4SWaiman Long 	return taken;
9355dec94d4SWaiman Long }
9367d43f1ceSWaiman Long 
9377d43f1ceSWaiman Long /*
938617f3ef9SWaiman Long  * Clear the owner's RWSEM_NONSPINNABLE bit if it is set. This should
9397d43f1ceSWaiman Long  * only be called when the reader count reaches 0.
9407d43f1ceSWaiman Long  */
clear_nonspinnable(struct rw_semaphore * sem)941617f3ef9SWaiman Long static inline void clear_nonspinnable(struct rw_semaphore *sem)
9427d43f1ceSWaiman Long {
94354c1ee4dSWaiman Long 	if (unlikely(rwsem_test_oflags(sem, RWSEM_NONSPINNABLE)))
944617f3ef9SWaiman Long 		atomic_long_andnot(RWSEM_NONSPINNABLE, &sem->owner);
9451a728dffSWaiman Long }
9461a728dffSWaiman Long 
9475dec94d4SWaiman Long #else
rwsem_can_spin_on_owner(struct rw_semaphore * sem)948617f3ef9SWaiman Long static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
949cf69482dSWaiman Long {
950cf69482dSWaiman Long 	return false;
951cf69482dSWaiman Long }
952cf69482dSWaiman Long 
rwsem_optimistic_spin(struct rw_semaphore * sem)953617f3ef9SWaiman Long static inline bool rwsem_optimistic_spin(struct rw_semaphore *sem)
9545dec94d4SWaiman Long {
9555dec94d4SWaiman Long 	return false;
9565dec94d4SWaiman Long }
9577d43f1ceSWaiman Long 
clear_nonspinnable(struct rw_semaphore * sem)958617f3ef9SWaiman Long static inline void clear_nonspinnable(struct rw_semaphore *sem) { }
9591a728dffSWaiman Long 
9607cdacc5fSYanfei Xu static inline enum owner_state
rwsem_spin_on_owner(struct rw_semaphore * sem)961617f3ef9SWaiman Long rwsem_spin_on_owner(struct rw_semaphore *sem)
96291d2a812SWaiman Long {
9637cdacc5fSYanfei Xu 	return OWNER_NONSPINNABLE;
96491d2a812SWaiman Long }
9655dec94d4SWaiman Long #endif
9665dec94d4SWaiman Long 
9675dec94d4SWaiman Long /*
96854c1ee4dSWaiman Long  * Prepare to wake up waiter(s) in the wait queue by putting them into the
96954c1ee4dSWaiman Long  * given wake_q if the rwsem lock owner isn't a writer. If rwsem is likely
97054c1ee4dSWaiman Long  * reader-owned, wake up read lock waiters in queue front or wake up any
97154c1ee4dSWaiman Long  * front waiter otherwise.
97254c1ee4dSWaiman Long 
97354c1ee4dSWaiman Long  * This is being called from both reader and writer slow paths.
97454c1ee4dSWaiman Long  */
rwsem_cond_wake_waiter(struct rw_semaphore * sem,long count,struct wake_q_head * wake_q)97554c1ee4dSWaiman Long static inline void rwsem_cond_wake_waiter(struct rw_semaphore *sem, long count,
97654c1ee4dSWaiman Long 					  struct wake_q_head *wake_q)
97754c1ee4dSWaiman Long {
97854c1ee4dSWaiman Long 	enum rwsem_wake_type wake_type;
97954c1ee4dSWaiman Long 
98054c1ee4dSWaiman Long 	if (count & RWSEM_WRITER_MASK)
98154c1ee4dSWaiman Long 		return;
98254c1ee4dSWaiman Long 
98354c1ee4dSWaiman Long 	if (count & RWSEM_READER_MASK) {
98454c1ee4dSWaiman Long 		wake_type = RWSEM_WAKE_READERS;
98554c1ee4dSWaiman Long 	} else {
98654c1ee4dSWaiman Long 		wake_type = RWSEM_WAKE_ANY;
98754c1ee4dSWaiman Long 		clear_nonspinnable(sem);
98854c1ee4dSWaiman Long 	}
98954c1ee4dSWaiman Long 	rwsem_mark_wake(sem, wake_type, wake_q);
99054c1ee4dSWaiman Long }
99154c1ee4dSWaiman Long 
99254c1ee4dSWaiman Long /*
9935dec94d4SWaiman Long  * Wait for the read lock to be granted
9945dec94d4SWaiman Long  */
9956cef7ff6SWaiman Long static struct rw_semaphore __sched *
rwsem_down_read_slowpath(struct rw_semaphore * sem,long count,unsigned int state)9962f064a59SPeter Zijlstra rwsem_down_read_slowpath(struct rw_semaphore *sem, long count, unsigned int state)
9975dec94d4SWaiman Long {
998617f3ef9SWaiman Long 	long adjustment = -RWSEM_READER_BIAS;
9992f06f702SWaiman Long 	long rcnt = (count >> RWSEM_READER_SHIFT);
10005dec94d4SWaiman Long 	struct rwsem_waiter waiter;
10015dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
10025dec94d4SWaiman Long 
10035cfd92e1SWaiman Long 	/*
10042f06f702SWaiman Long 	 * To prevent a constant stream of readers from starving a sleeping
1005617f3ef9SWaiman Long 	 * waiter, don't attempt optimistic lock stealing if the lock is
1006617f3ef9SWaiman Long 	 * currently owned by readers.
10072f06f702SWaiman Long 	 */
1008617f3ef9SWaiman Long 	if ((atomic_long_read(&sem->owner) & RWSEM_READER_OWNED) &&
1009617f3ef9SWaiman Long 	    (rcnt > 1) && !(count & RWSEM_WRITER_LOCKED))
10102f06f702SWaiman Long 		goto queue;
10112f06f702SWaiman Long 
10122f06f702SWaiman Long 	/*
1013617f3ef9SWaiman Long 	 * Reader optimistic lock stealing.
10141a728dffSWaiman Long 	 */
1015617f3ef9SWaiman Long 	if (!(count & (RWSEM_WRITER_LOCKED | RWSEM_FLAG_HANDOFF))) {
10161a728dffSWaiman Long 		rwsem_set_reader_owned(sem);
10171a728dffSWaiman Long 		lockevent_inc(rwsem_rlock_steal);
10181a728dffSWaiman Long 
10191a728dffSWaiman Long 		/*
1020617f3ef9SWaiman Long 		 * Wake up other readers in the wait queue if it is
1021617f3ef9SWaiman Long 		 * the first reader.
10225cfd92e1SWaiman Long 		 */
1023617f3ef9SWaiman Long 		if ((rcnt == 1) && (count & RWSEM_FLAG_WAITERS)) {
1024cf69482dSWaiman Long 			raw_spin_lock_irq(&sem->wait_lock);
1025cf69482dSWaiman Long 			if (!list_empty(&sem->wait_list))
1026cf69482dSWaiman Long 				rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED,
1027cf69482dSWaiman Long 						&wake_q);
1028cf69482dSWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
1029cf69482dSWaiman Long 			wake_up_q(&wake_q);
1030cf69482dSWaiman Long 		}
1031cf69482dSWaiman Long 		return sem;
1032cf69482dSWaiman Long 	}
1033cf69482dSWaiman Long 
1034cf69482dSWaiman Long queue:
10355dec94d4SWaiman Long 	waiter.task = current;
10365dec94d4SWaiman Long 	waiter.type = RWSEM_WAITING_FOR_READ;
10374f23dbc1SWaiman Long 	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
10386eebd5fbSWaiman Long 	waiter.handoff_set = false;
10395dec94d4SWaiman Long 
10405dec94d4SWaiman Long 	raw_spin_lock_irq(&sem->wait_lock);
10415dec94d4SWaiman Long 	if (list_empty(&sem->wait_list)) {
10425dec94d4SWaiman Long 		/*
10435dec94d4SWaiman Long 		 * In case the wait queue is empty and the lock isn't owned
1044f9e21aa9SWaiman Long 		 * by a writer, this reader can exit the slowpath and return
1045f9e21aa9SWaiman Long 		 * immediately as its RWSEM_READER_BIAS has already been set
1046f9e21aa9SWaiman Long 		 * in the count.
10475dec94d4SWaiman Long 		 */
1048f9e21aa9SWaiman Long 		if (!(atomic_long_read(&sem->count) & RWSEM_WRITER_MASK)) {
1049e1b98fa3SJan Stancek 			/* Provide lock ACQUIRE */
1050e1b98fa3SJan Stancek 			smp_acquire__after_ctrl_dep();
10515dec94d4SWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
10525dec94d4SWaiman Long 			rwsem_set_reader_owned(sem);
10535dec94d4SWaiman Long 			lockevent_inc(rwsem_rlock_fast);
10545dec94d4SWaiman Long 			return sem;
10555dec94d4SWaiman Long 		}
10565dec94d4SWaiman Long 		adjustment += RWSEM_FLAG_WAITERS;
10575dec94d4SWaiman Long 	}
1058d257cc8cSWaiman Long 	rwsem_add_waiter(sem, &waiter);
10595dec94d4SWaiman Long 
10605dec94d4SWaiman Long 	/* we're now waiting on the lock, but no longer actively locking */
10615dec94d4SWaiman Long 	count = atomic_long_add_return(adjustment, &sem->count);
10625dec94d4SWaiman Long 
106354c1ee4dSWaiman Long 	rwsem_cond_wake_waiter(sem, count, &wake_q);
10645dec94d4SWaiman Long 	raw_spin_unlock_irq(&sem->wait_lock);
106554c1ee4dSWaiman Long 
106654c1ee4dSWaiman Long 	if (!wake_q_empty(&wake_q))
10675dec94d4SWaiman Long 		wake_up_q(&wake_q);
10685dec94d4SWaiman Long 
1069ee042be1SNamhyung Kim 	trace_contention_begin(sem, LCB_F_READ);
1070ee042be1SNamhyung Kim 
10715dec94d4SWaiman Long 	/* wait to be given the lock */
10726ffddfb9SPeter Zijlstra 	for (;;) {
10735dec94d4SWaiman Long 		set_current_state(state);
107499143f82SPeter Zijlstra 		if (!smp_load_acquire(&waiter.task)) {
10756ffddfb9SPeter Zijlstra 			/* Matches rwsem_mark_wake()'s smp_store_release(). */
10765dec94d4SWaiman Long 			break;
107799143f82SPeter Zijlstra 		}
10785dec94d4SWaiman Long 		if (signal_pending_state(state, current)) {
10795dec94d4SWaiman Long 			raw_spin_lock_irq(&sem->wait_lock);
10805dec94d4SWaiman Long 			if (waiter.task)
10815dec94d4SWaiman Long 				goto out_nolock;
10825dec94d4SWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
10836ffddfb9SPeter Zijlstra 			/* Ordered by sem->wait_lock against rwsem_mark_wake(). */
10845dec94d4SWaiman Long 			break;
10855dec94d4SWaiman Long 		}
10863f524553SWaiman Long 		schedule_preempt_disabled();
10875dec94d4SWaiman Long 		lockevent_inc(rwsem_sleep_reader);
10885dec94d4SWaiman Long 	}
10895dec94d4SWaiman Long 
10905dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
10915dec94d4SWaiman Long 	lockevent_inc(rwsem_rlock);
1092ee042be1SNamhyung Kim 	trace_contention_end(sem, 0);
10935dec94d4SWaiman Long 	return sem;
10946ffddfb9SPeter Zijlstra 
10955dec94d4SWaiman Long out_nolock:
10961ee32619SWaiman Long 	rwsem_del_wake_waiter(sem, &waiter, &wake_q);
10975dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
10985dec94d4SWaiman Long 	lockevent_inc(rwsem_rlock_fail);
1099ee042be1SNamhyung Kim 	trace_contention_end(sem, -EINTR);
11005dec94d4SWaiman Long 	return ERR_PTR(-EINTR);
11015dec94d4SWaiman Long }
11025dec94d4SWaiman Long 
11035dec94d4SWaiman Long /*
11045dec94d4SWaiman Long  * Wait until we successfully acquire the write lock
11055dec94d4SWaiman Long  */
1106c441e934SMinchan Kim static struct rw_semaphore __sched *
rwsem_down_write_slowpath(struct rw_semaphore * sem,int state)11076cef7ff6SWaiman Long rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
11085dec94d4SWaiman Long {
11095dec94d4SWaiman Long 	struct rwsem_waiter waiter;
11105dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
11115dec94d4SWaiman Long 
11125dec94d4SWaiman Long 	/* do optimistic spinning and steal lock if possible */
1113617f3ef9SWaiman Long 	if (rwsem_can_spin_on_owner(sem) && rwsem_optimistic_spin(sem)) {
11146ffddfb9SPeter Zijlstra 		/* rwsem_optimistic_spin() implies ACQUIRE on success */
11155dec94d4SWaiman Long 		return sem;
11166ffddfb9SPeter Zijlstra 	}
11175dec94d4SWaiman Long 
11185dec94d4SWaiman Long 	/*
11195dec94d4SWaiman Long 	 * Optimistic spinning failed, proceed to the slowpath
11205dec94d4SWaiman Long 	 * and block until we can acquire the sem.
11215dec94d4SWaiman Long 	 */
11225dec94d4SWaiman Long 	waiter.task = current;
11235dec94d4SWaiman Long 	waiter.type = RWSEM_WAITING_FOR_WRITE;
11244f23dbc1SWaiman Long 	waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
1125d257cc8cSWaiman Long 	waiter.handoff_set = false;
11265dec94d4SWaiman Long 
11275dec94d4SWaiman Long 	raw_spin_lock_irq(&sem->wait_lock);
1128d257cc8cSWaiman Long 	rwsem_add_waiter(sem, &waiter);
11295dec94d4SWaiman Long 
11305dec94d4SWaiman Long 	/* we're now waiting on the lock */
1131d257cc8cSWaiman Long 	if (rwsem_first_waiter(sem) != &waiter) {
113254c1ee4dSWaiman Long 		rwsem_cond_wake_waiter(sem, atomic_long_read(&sem->count),
113354c1ee4dSWaiman Long 				       &wake_q);
113400f3c5a3SWaiman Long 		if (!wake_q_empty(&wake_q)) {
11355dec94d4SWaiman Long 			/*
113600f3c5a3SWaiman Long 			 * We want to minimize wait_lock hold time especially
113700f3c5a3SWaiman Long 			 * when a large number of readers are to be woken up.
11385dec94d4SWaiman Long 			 */
113900f3c5a3SWaiman Long 			raw_spin_unlock_irq(&sem->wait_lock);
11405dec94d4SWaiman Long 			wake_up_q(&wake_q);
114100f3c5a3SWaiman Long 			raw_spin_lock_irq(&sem->wait_lock);
114200f3c5a3SWaiman Long 		}
11435dec94d4SWaiman Long 	} else {
114400f3c5a3SWaiman Long 		atomic_long_or(RWSEM_FLAG_WAITERS, &sem->count);
11455dec94d4SWaiman Long 	}
11465dec94d4SWaiman Long 
11475dec94d4SWaiman Long 	/* wait until we successfully acquire the lock */
11485dec94d4SWaiman Long 	set_current_state(state);
1149ee042be1SNamhyung Kim 	trace_contention_begin(sem, LCB_F_WRITE);
1150ee042be1SNamhyung Kim 
11516ffddfb9SPeter Zijlstra 	for (;;) {
1152d257cc8cSWaiman Long 		if (rwsem_try_write_lock(sem, &waiter)) {
11536ffddfb9SPeter Zijlstra 			/* rwsem_try_write_lock() implies ACQUIRE on success */
11545dec94d4SWaiman Long 			break;
11556ffddfb9SPeter Zijlstra 		}
11564f23dbc1SWaiman Long 
11575dec94d4SWaiman Long 		raw_spin_unlock_irq(&sem->wait_lock);
11585dec94d4SWaiman Long 
1159d257cc8cSWaiman Long 		if (signal_pending_state(state, current))
1160d257cc8cSWaiman Long 			goto out_nolock;
1161d257cc8cSWaiman Long 
116291d2a812SWaiman Long 		/*
116391d2a812SWaiman Long 		 * After setting the handoff bit and failing to acquire
116491d2a812SWaiman Long 		 * the lock, attempt to spin on owner to accelerate lock
116591d2a812SWaiman Long 		 * transfer. If the previous owner is a on-cpu writer and it
116691d2a812SWaiman Long 		 * has just released the lock, OWNER_NULL will be returned.
116791d2a812SWaiman Long 		 * In this case, we attempt to acquire the lock again
116891d2a812SWaiman Long 		 * without sleeping.
116991d2a812SWaiman Long 		 */
1170d257cc8cSWaiman Long 		if (waiter.handoff_set) {
11717cdacc5fSYanfei Xu 			enum owner_state owner_state;
11727cdacc5fSYanfei Xu 
11737cdacc5fSYanfei Xu 			owner_state = rwsem_spin_on_owner(sem);
11747cdacc5fSYanfei Xu 			if (owner_state == OWNER_NULL)
117591d2a812SWaiman Long 				goto trylock_again;
11767cdacc5fSYanfei Xu 		}
117791d2a812SWaiman Long 
11781d61659cSWaiman Long 		schedule_preempt_disabled();
11795dec94d4SWaiman Long 		lockevent_inc(rwsem_sleep_writer);
11805dec94d4SWaiman Long 		set_current_state(state);
118191d2a812SWaiman Long trylock_again:
11825dec94d4SWaiman Long 		raw_spin_lock_irq(&sem->wait_lock);
11835dec94d4SWaiman Long 	}
11845dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
11855dec94d4SWaiman Long 	raw_spin_unlock_irq(&sem->wait_lock);
11865dec94d4SWaiman Long 	lockevent_inc(rwsem_wlock);
1187ee042be1SNamhyung Kim 	trace_contention_end(sem, 0);
1188d257cc8cSWaiman Long 	return sem;
11895dec94d4SWaiman Long 
11905dec94d4SWaiman Long out_nolock:
11915dec94d4SWaiman Long 	__set_current_state(TASK_RUNNING);
11925dec94d4SWaiman Long 	raw_spin_lock_irq(&sem->wait_lock);
11931ee32619SWaiman Long 	rwsem_del_wake_waiter(sem, &waiter, &wake_q);
11945dec94d4SWaiman Long 	lockevent_inc(rwsem_wlock_fail);
1195ee042be1SNamhyung Kim 	trace_contention_end(sem, -EINTR);
11965dec94d4SWaiman Long 	return ERR_PTR(-EINTR);
11975dec94d4SWaiman Long }
11985dec94d4SWaiman Long 
11995dec94d4SWaiman Long /*
12005dec94d4SWaiman Long  * handle waking up a waiter on the semaphore
12015dec94d4SWaiman Long  * - up_read/up_write has decremented the active part of count if we come here
12025dec94d4SWaiman Long  */
rwsem_wake(struct rw_semaphore * sem)1203d4e5076cSxuyehan static struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
12045dec94d4SWaiman Long {
12055dec94d4SWaiman Long 	unsigned long flags;
12065dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
12075dec94d4SWaiman Long 
12085dec94d4SWaiman Long 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
12095dec94d4SWaiman Long 
12105dec94d4SWaiman Long 	if (!list_empty(&sem->wait_list))
12116cef7ff6SWaiman Long 		rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
12125dec94d4SWaiman Long 
12135dec94d4SWaiman Long 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
12145dec94d4SWaiman Long 	wake_up_q(&wake_q);
12155dec94d4SWaiman Long 
12165dec94d4SWaiman Long 	return sem;
12175dec94d4SWaiman Long }
12185dec94d4SWaiman Long 
12195dec94d4SWaiman Long /*
12205dec94d4SWaiman Long  * downgrade a write lock into a read lock
12215dec94d4SWaiman Long  * - caller incremented waiting part of count and discovered it still negative
12225dec94d4SWaiman Long  * - just wake up any readers at the front of the queue
12235dec94d4SWaiman Long  */
rwsem_downgrade_wake(struct rw_semaphore * sem)12246cef7ff6SWaiman Long static struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
12255dec94d4SWaiman Long {
12265dec94d4SWaiman Long 	unsigned long flags;
12275dec94d4SWaiman Long 	DEFINE_WAKE_Q(wake_q);
12285dec94d4SWaiman Long 
12295dec94d4SWaiman Long 	raw_spin_lock_irqsave(&sem->wait_lock, flags);
12305dec94d4SWaiman Long 
12315dec94d4SWaiman Long 	if (!list_empty(&sem->wait_list))
12326cef7ff6SWaiman Long 		rwsem_mark_wake(sem, RWSEM_WAKE_READ_OWNED, &wake_q);
12335dec94d4SWaiman Long 
12345dec94d4SWaiman Long 	raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
12355dec94d4SWaiman Long 	wake_up_q(&wake_q);
12365dec94d4SWaiman Long 
12375dec94d4SWaiman Long 	return sem;
12385dec94d4SWaiman Long }
12395dec94d4SWaiman Long 
12405dec94d4SWaiman Long /*
12415dec94d4SWaiman Long  * lock for reading
12425dec94d4SWaiman Long  */
__down_read_common(struct rw_semaphore * sem,int state)1243*92cc5d00SJohn Stultz static __always_inline int __down_read_common(struct rw_semaphore *sem, int state)
12445dec94d4SWaiman Long {
12453f524553SWaiman Long 	int ret = 0;
1246c8fe8b05SWaiman Long 	long count;
1247c8fe8b05SWaiman Long 
12483f524553SWaiman Long 	preempt_disable();
1249c8fe8b05SWaiman Long 	if (!rwsem_read_trylock(sem, &count)) {
12503f524553SWaiman Long 		if (IS_ERR(rwsem_down_read_slowpath(sem, count, state))) {
12513f524553SWaiman Long 			ret = -EINTR;
12523f524553SWaiman Long 			goto out;
12533f524553SWaiman Long 		}
125494a9717bSWaiman Long 		DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
12555dec94d4SWaiman Long 	}
12563f524553SWaiman Long out:
12573f524553SWaiman Long 	preempt_enable();
12583f524553SWaiman Long 	return ret;
1259c995e638SPeter Zijlstra }
1260c995e638SPeter Zijlstra 
__down_read(struct rw_semaphore * sem)1261*92cc5d00SJohn Stultz static __always_inline void __down_read(struct rw_semaphore *sem)
1262c995e638SPeter Zijlstra {
1263c995e638SPeter Zijlstra 	__down_read_common(sem, TASK_UNINTERRUPTIBLE);
12645dec94d4SWaiman Long }
12655dec94d4SWaiman Long 
__down_read_interruptible(struct rw_semaphore * sem)1266*92cc5d00SJohn Stultz static __always_inline int __down_read_interruptible(struct rw_semaphore *sem)
126731784cffSEric W. Biederman {
1268c995e638SPeter Zijlstra 	return __down_read_common(sem, TASK_INTERRUPTIBLE);
126931784cffSEric W. Biederman }
127031784cffSEric W. Biederman 
__down_read_killable(struct rw_semaphore * sem)1271*92cc5d00SJohn Stultz static __always_inline int __down_read_killable(struct rw_semaphore *sem)
12725dec94d4SWaiman Long {
1273c995e638SPeter Zijlstra 	return __down_read_common(sem, TASK_KILLABLE);
12745dec94d4SWaiman Long }
12755dec94d4SWaiman Long 
__down_read_trylock(struct rw_semaphore * sem)12765dec94d4SWaiman Long static inline int __down_read_trylock(struct rw_semaphore *sem)
12775dec94d4SWaiman Long {
12783f524553SWaiman Long 	int ret = 0;
1279fce45cd4SDavidlohr Bueso 	long tmp;
1280fce45cd4SDavidlohr Bueso 
1281fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
1282fce45cd4SDavidlohr Bueso 
12833f524553SWaiman Long 	preempt_disable();
128414c24048SMuchun Song 	tmp = atomic_long_read(&sem->count);
128514c24048SMuchun Song 	while (!(tmp & RWSEM_READ_FAILED_MASK)) {
12865dec94d4SWaiman Long 		if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
12875dec94d4SWaiman Long 						    tmp + RWSEM_READER_BIAS)) {
12885dec94d4SWaiman Long 			rwsem_set_reader_owned(sem);
12893f524553SWaiman Long 			ret = 1;
12903f524553SWaiman Long 			break;
12915dec94d4SWaiman Long 		}
129214c24048SMuchun Song 	}
12933f524553SWaiman Long 	preempt_enable();
12943f524553SWaiman Long 	return ret;
12955dec94d4SWaiman Long }
12965dec94d4SWaiman Long 
12975dec94d4SWaiman Long /*
12985dec94d4SWaiman Long  * lock for writing
12995dec94d4SWaiman Long  */
__down_write_common(struct rw_semaphore * sem,int state)1300c995e638SPeter Zijlstra static inline int __down_write_common(struct rw_semaphore *sem, int state)
13015dec94d4SWaiman Long {
13021d61659cSWaiman Long 	int ret = 0;
13031d61659cSWaiman Long 
13041d61659cSWaiman Long 	preempt_disable();
1305285c61aeSPeter Zijlstra 	if (unlikely(!rwsem_write_trylock(sem))) {
1306c995e638SPeter Zijlstra 		if (IS_ERR(rwsem_down_write_slowpath(sem, state)))
13071d61659cSWaiman Long 			ret = -EINTR;
13085cfd92e1SWaiman Long 	}
13091d61659cSWaiman Long 	preempt_enable();
13101d61659cSWaiman Long 	return ret;
13115dec94d4SWaiman Long }
13125dec94d4SWaiman Long 
__down_write(struct rw_semaphore * sem)1313c995e638SPeter Zijlstra static inline void __down_write(struct rw_semaphore *sem)
1314c995e638SPeter Zijlstra {
1315c995e638SPeter Zijlstra 	__down_write_common(sem, TASK_UNINTERRUPTIBLE);
1316c995e638SPeter Zijlstra }
1317c995e638SPeter Zijlstra 
__down_write_killable(struct rw_semaphore * sem)1318c995e638SPeter Zijlstra static inline int __down_write_killable(struct rw_semaphore *sem)
1319c995e638SPeter Zijlstra {
1320c995e638SPeter Zijlstra 	return __down_write_common(sem, TASK_KILLABLE);
1321c995e638SPeter Zijlstra }
1322c995e638SPeter Zijlstra 
__down_write_trylock(struct rw_semaphore * sem)13235dec94d4SWaiman Long static inline int __down_write_trylock(struct rw_semaphore *sem)
13245dec94d4SWaiman Long {
13251d61659cSWaiman Long 	int ret;
13261d61659cSWaiman Long 
13271d61659cSWaiman Long 	preempt_disable();
1328fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
13291d61659cSWaiman Long 	ret = rwsem_write_trylock(sem);
13301d61659cSWaiman Long 	preempt_enable();
13311d61659cSWaiman Long 
13321d61659cSWaiman Long 	return ret;
13335dec94d4SWaiman Long }
13345dec94d4SWaiman Long 
13355dec94d4SWaiman Long /*
13365dec94d4SWaiman Long  * unlock after reading
13375dec94d4SWaiman Long  */
__up_read(struct rw_semaphore * sem)13387f26482aSPeter Zijlstra static inline void __up_read(struct rw_semaphore *sem)
13395dec94d4SWaiman Long {
13405dec94d4SWaiman Long 	long tmp;
13415dec94d4SWaiman Long 
1342fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
134394a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1344fce45cd4SDavidlohr Bueso 
13453f524553SWaiman Long 	preempt_disable();
13465dec94d4SWaiman Long 	rwsem_clear_reader_owned(sem);
13475dec94d4SWaiman Long 	tmp = atomic_long_add_return_release(-RWSEM_READER_BIAS, &sem->count);
1348a15ea1a3SWaiman Long 	DEBUG_RWSEMS_WARN_ON(tmp < 0, sem);
13496cef7ff6SWaiman Long 	if (unlikely((tmp & (RWSEM_LOCK_MASK|RWSEM_FLAG_WAITERS)) ==
13507d43f1ceSWaiman Long 		      RWSEM_FLAG_WAITERS)) {
1351617f3ef9SWaiman Long 		clear_nonspinnable(sem);
1352d4e5076cSxuyehan 		rwsem_wake(sem);
13535dec94d4SWaiman Long 	}
13543f524553SWaiman Long 	preempt_enable();
13557d43f1ceSWaiman Long }
13565dec94d4SWaiman Long 
13575dec94d4SWaiman Long /*
13585dec94d4SWaiman Long  * unlock after writing
13595dec94d4SWaiman Long  */
__up_write(struct rw_semaphore * sem)13607f26482aSPeter Zijlstra static inline void __up_write(struct rw_semaphore *sem)
13615dec94d4SWaiman Long {
13626cef7ff6SWaiman Long 	long tmp;
13636cef7ff6SWaiman Long 
1364fce45cd4SDavidlohr Bueso 	DEBUG_RWSEMS_WARN_ON(sem->magic != sem, sem);
136502f1082bSWaiman Long 	/*
136602f1082bSWaiman Long 	 * sem->owner may differ from current if the ownership is transferred
136702f1082bSWaiman Long 	 * to an anonymous writer by setting the RWSEM_NONSPINNABLE bits.
136802f1082bSWaiman Long 	 */
136994a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON((rwsem_owner(sem) != current) &&
137094a9717bSWaiman Long 			    !rwsem_test_oflags(sem, RWSEM_NONSPINNABLE), sem);
1371fce45cd4SDavidlohr Bueso 
137248dfb5d2SGokul krishna Krishnakumar 	preempt_disable();
13735dec94d4SWaiman Long 	rwsem_clear_owner(sem);
13746cef7ff6SWaiman Long 	tmp = atomic_long_fetch_add_release(-RWSEM_WRITER_LOCKED, &sem->count);
13756cef7ff6SWaiman Long 	if (unlikely(tmp & RWSEM_FLAG_WAITERS))
1376d4e5076cSxuyehan 		rwsem_wake(sem);
13771d61659cSWaiman Long 	preempt_enable();
13785dec94d4SWaiman Long }
13795dec94d4SWaiman Long 
13805dec94d4SWaiman Long /*
13815dec94d4SWaiman Long  * downgrade write lock to read lock
13825dec94d4SWaiman Long  */
__downgrade_write(struct rw_semaphore * sem)13835dec94d4SWaiman Long static inline void __downgrade_write(struct rw_semaphore *sem)
13845dec94d4SWaiman Long {
13855dec94d4SWaiman Long 	long tmp;
13865dec94d4SWaiman Long 
13875dec94d4SWaiman Long 	/*
13885dec94d4SWaiman Long 	 * When downgrading from exclusive to shared ownership,
13895dec94d4SWaiman Long 	 * anything inside the write-locked region cannot leak
13905dec94d4SWaiman Long 	 * into the read side. In contrast, anything in the
13915dec94d4SWaiman Long 	 * read-locked region is ok to be re-ordered into the
13925dec94d4SWaiman Long 	 * write side. As such, rely on RELEASE semantics.
13935dec94d4SWaiman Long 	 */
139494a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON(rwsem_owner(sem) != current, sem);
13951d61659cSWaiman Long 	preempt_disable();
13965dec94d4SWaiman Long 	tmp = atomic_long_fetch_add_release(
13975dec94d4SWaiman Long 		-RWSEM_WRITER_LOCKED+RWSEM_READER_BIAS, &sem->count);
13985dec94d4SWaiman Long 	rwsem_set_reader_owned(sem);
13995dec94d4SWaiman Long 	if (tmp & RWSEM_FLAG_WAITERS)
14005dec94d4SWaiman Long 		rwsem_downgrade_wake(sem);
14011d61659cSWaiman Long 	preempt_enable();
14025dec94d4SWaiman Long }
14034fc828e2SDavidlohr Bueso 
140442254105SThomas Gleixner #else /* !CONFIG_PREEMPT_RT */
140542254105SThomas Gleixner 
1406e17ba59bSThomas Gleixner #define RT_MUTEX_BUILD_MUTEX
140742254105SThomas Gleixner #include "rtmutex.c"
140842254105SThomas Gleixner 
140942254105SThomas Gleixner #define rwbase_set_and_save_current_state(state)	\
141042254105SThomas Gleixner 	set_current_state(state)
141142254105SThomas Gleixner 
141242254105SThomas Gleixner #define rwbase_restore_current_state()			\
141342254105SThomas Gleixner 	__set_current_state(TASK_RUNNING)
141442254105SThomas Gleixner 
141542254105SThomas Gleixner #define rwbase_rtmutex_lock_state(rtm, state)		\
141642254105SThomas Gleixner 	__rt_mutex_lock(rtm, state)
141742254105SThomas Gleixner 
141842254105SThomas Gleixner #define rwbase_rtmutex_slowlock_locked(rtm, state)	\
1419add46132SPeter Zijlstra 	__rt_mutex_slowlock_locked(rtm, NULL, state)
142042254105SThomas Gleixner 
142142254105SThomas Gleixner #define rwbase_rtmutex_unlock(rtm)			\
142242254105SThomas Gleixner 	__rt_mutex_unlock(rtm)
142342254105SThomas Gleixner 
142442254105SThomas Gleixner #define rwbase_rtmutex_trylock(rtm)			\
142542254105SThomas Gleixner 	__rt_mutex_trylock(rtm)
142642254105SThomas Gleixner 
142742254105SThomas Gleixner #define rwbase_signal_pending_state(state, current)	\
142842254105SThomas Gleixner 	signal_pending_state(state, current)
142942254105SThomas Gleixner 
143042254105SThomas Gleixner #define rwbase_schedule()				\
143142254105SThomas Gleixner 	schedule()
143242254105SThomas Gleixner 
143342254105SThomas Gleixner #include "rwbase_rt.c"
143442254105SThomas Gleixner 
__init_rwsem(struct rw_semaphore * sem,const char * name,struct lock_class_key * key)143515eb7c88SMike Galbraith void __init_rwsem(struct rw_semaphore *sem, const char *name,
143642254105SThomas Gleixner 		  struct lock_class_key *key)
143742254105SThomas Gleixner {
143815eb7c88SMike Galbraith 	init_rwbase_rt(&(sem)->rwbase);
143915eb7c88SMike Galbraith 
144015eb7c88SMike Galbraith #ifdef CONFIG_DEBUG_LOCK_ALLOC
144142254105SThomas Gleixner 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
144242254105SThomas Gleixner 	lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
144342254105SThomas Gleixner #endif
144415eb7c88SMike Galbraith }
144515eb7c88SMike Galbraith EXPORT_SYMBOL(__init_rwsem);
144642254105SThomas Gleixner 
__down_read(struct rw_semaphore * sem)144742254105SThomas Gleixner static inline void __down_read(struct rw_semaphore *sem)
144842254105SThomas Gleixner {
144942254105SThomas Gleixner 	rwbase_read_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
145042254105SThomas Gleixner }
145142254105SThomas Gleixner 
__down_read_interruptible(struct rw_semaphore * sem)145242254105SThomas Gleixner static inline int __down_read_interruptible(struct rw_semaphore *sem)
145342254105SThomas Gleixner {
145442254105SThomas Gleixner 	return rwbase_read_lock(&sem->rwbase, TASK_INTERRUPTIBLE);
145542254105SThomas Gleixner }
145642254105SThomas Gleixner 
__down_read_killable(struct rw_semaphore * sem)145742254105SThomas Gleixner static inline int __down_read_killable(struct rw_semaphore *sem)
145842254105SThomas Gleixner {
145942254105SThomas Gleixner 	return rwbase_read_lock(&sem->rwbase, TASK_KILLABLE);
146042254105SThomas Gleixner }
146142254105SThomas Gleixner 
__down_read_trylock(struct rw_semaphore * sem)146242254105SThomas Gleixner static inline int __down_read_trylock(struct rw_semaphore *sem)
146342254105SThomas Gleixner {
146442254105SThomas Gleixner 	return rwbase_read_trylock(&sem->rwbase);
146542254105SThomas Gleixner }
146642254105SThomas Gleixner 
__up_read(struct rw_semaphore * sem)146742254105SThomas Gleixner static inline void __up_read(struct rw_semaphore *sem)
146842254105SThomas Gleixner {
146942254105SThomas Gleixner 	rwbase_read_unlock(&sem->rwbase, TASK_NORMAL);
147042254105SThomas Gleixner }
147142254105SThomas Gleixner 
__down_write(struct rw_semaphore * sem)147242254105SThomas Gleixner static inline void __sched __down_write(struct rw_semaphore *sem)
147342254105SThomas Gleixner {
147442254105SThomas Gleixner 	rwbase_write_lock(&sem->rwbase, TASK_UNINTERRUPTIBLE);
147542254105SThomas Gleixner }
147642254105SThomas Gleixner 
__down_write_killable(struct rw_semaphore * sem)147742254105SThomas Gleixner static inline int __sched __down_write_killable(struct rw_semaphore *sem)
147842254105SThomas Gleixner {
147942254105SThomas Gleixner 	return rwbase_write_lock(&sem->rwbase, TASK_KILLABLE);
148042254105SThomas Gleixner }
148142254105SThomas Gleixner 
__down_write_trylock(struct rw_semaphore * sem)148242254105SThomas Gleixner static inline int __down_write_trylock(struct rw_semaphore *sem)
148342254105SThomas Gleixner {
148442254105SThomas Gleixner 	return rwbase_write_trylock(&sem->rwbase);
148542254105SThomas Gleixner }
148642254105SThomas Gleixner 
__up_write(struct rw_semaphore * sem)148742254105SThomas Gleixner static inline void __up_write(struct rw_semaphore *sem)
148842254105SThomas Gleixner {
148942254105SThomas Gleixner 	rwbase_write_unlock(&sem->rwbase);
149042254105SThomas Gleixner }
149142254105SThomas Gleixner 
__downgrade_write(struct rw_semaphore * sem)149242254105SThomas Gleixner static inline void __downgrade_write(struct rw_semaphore *sem)
149342254105SThomas Gleixner {
149442254105SThomas Gleixner 	rwbase_write_downgrade(&sem->rwbase);
149542254105SThomas Gleixner }
149642254105SThomas Gleixner 
149742254105SThomas Gleixner /* Debug stubs for the common API */
149842254105SThomas Gleixner #define DEBUG_RWSEMS_WARN_ON(c, sem)
149942254105SThomas Gleixner 
__rwsem_set_reader_owned(struct rw_semaphore * sem,struct task_struct * owner)150042254105SThomas Gleixner static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem,
150142254105SThomas Gleixner 					    struct task_struct *owner)
150242254105SThomas Gleixner {
150342254105SThomas Gleixner }
150442254105SThomas Gleixner 
is_rwsem_reader_owned(struct rw_semaphore * sem)150542254105SThomas Gleixner static inline bool is_rwsem_reader_owned(struct rw_semaphore *sem)
150642254105SThomas Gleixner {
150742254105SThomas Gleixner 	int count = atomic_read(&sem->rwbase.readers);
150842254105SThomas Gleixner 
150942254105SThomas Gleixner 	return count < 0 && count != READER_BIAS;
151042254105SThomas Gleixner }
151142254105SThomas Gleixner 
151242254105SThomas Gleixner #endif /* CONFIG_PREEMPT_RT */
151342254105SThomas Gleixner 
1514ed428bfcSPeter Zijlstra /*
1515ed428bfcSPeter Zijlstra  * lock for reading
1516ed428bfcSPeter Zijlstra  */
down_read(struct rw_semaphore * sem)1517ed428bfcSPeter Zijlstra void __sched down_read(struct rw_semaphore *sem)
1518ed428bfcSPeter Zijlstra {
1519ed428bfcSPeter Zijlstra 	might_sleep();
1520ed428bfcSPeter Zijlstra 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
1521ed428bfcSPeter Zijlstra 
1522ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1523ed428bfcSPeter Zijlstra }
1524ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read);
1525ed428bfcSPeter Zijlstra 
down_read_interruptible(struct rw_semaphore * sem)152631784cffSEric W. Biederman int __sched down_read_interruptible(struct rw_semaphore *sem)
152731784cffSEric W. Biederman {
152831784cffSEric W. Biederman 	might_sleep();
152931784cffSEric W. Biederman 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
153031784cffSEric W. Biederman 
153131784cffSEric W. Biederman 	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_interruptible)) {
153231784cffSEric W. Biederman 		rwsem_release(&sem->dep_map, _RET_IP_);
153331784cffSEric W. Biederman 		return -EINTR;
153431784cffSEric W. Biederman 	}
153531784cffSEric W. Biederman 
153631784cffSEric W. Biederman 	return 0;
153731784cffSEric W. Biederman }
153831784cffSEric W. Biederman EXPORT_SYMBOL(down_read_interruptible);
153931784cffSEric W. Biederman 
down_read_killable(struct rw_semaphore * sem)154076f8507fSKirill Tkhai int __sched down_read_killable(struct rw_semaphore *sem)
154176f8507fSKirill Tkhai {
154276f8507fSKirill Tkhai 	might_sleep();
154376f8507fSKirill Tkhai 	rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
154476f8507fSKirill Tkhai 
154576f8507fSKirill Tkhai 	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
15465facae4fSQian Cai 		rwsem_release(&sem->dep_map, _RET_IP_);
154776f8507fSKirill Tkhai 		return -EINTR;
154876f8507fSKirill Tkhai 	}
154976f8507fSKirill Tkhai 
155076f8507fSKirill Tkhai 	return 0;
155176f8507fSKirill Tkhai }
155276f8507fSKirill Tkhai EXPORT_SYMBOL(down_read_killable);
155376f8507fSKirill Tkhai 
1554ed428bfcSPeter Zijlstra /*
1555ed428bfcSPeter Zijlstra  * trylock for reading -- returns 1 if successful, 0 if contention
1556ed428bfcSPeter Zijlstra  */
down_read_trylock(struct rw_semaphore * sem)1557ed428bfcSPeter Zijlstra int down_read_trylock(struct rw_semaphore *sem)
1558ed428bfcSPeter Zijlstra {
1559ed428bfcSPeter Zijlstra 	int ret = __down_read_trylock(sem);
1560ed428bfcSPeter Zijlstra 
1561c7580c1eSWaiman Long 	if (ret == 1)
1562ed428bfcSPeter Zijlstra 		rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
1563ed428bfcSPeter Zijlstra 	return ret;
1564ed428bfcSPeter Zijlstra }
1565ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_trylock);
1566ed428bfcSPeter Zijlstra 
1567ed428bfcSPeter Zijlstra /*
1568ed428bfcSPeter Zijlstra  * lock for writing
1569ed428bfcSPeter Zijlstra  */
down_write(struct rw_semaphore * sem)1570ed428bfcSPeter Zijlstra void __sched down_write(struct rw_semaphore *sem)
1571ed428bfcSPeter Zijlstra {
1572ed428bfcSPeter Zijlstra 	might_sleep();
1573ed428bfcSPeter Zijlstra 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1574ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1575ed428bfcSPeter Zijlstra }
1576ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write);
1577ed428bfcSPeter Zijlstra 
1578ed428bfcSPeter Zijlstra /*
1579916633a4SMichal Hocko  * lock for writing
1580916633a4SMichal Hocko  */
down_write_killable(struct rw_semaphore * sem)1581916633a4SMichal Hocko int __sched down_write_killable(struct rw_semaphore *sem)
1582916633a4SMichal Hocko {
1583916633a4SMichal Hocko 	might_sleep();
1584916633a4SMichal Hocko 	rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
1585916633a4SMichal Hocko 
15866cef7ff6SWaiman Long 	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
15876cef7ff6SWaiman Long 				  __down_write_killable)) {
15885facae4fSQian Cai 		rwsem_release(&sem->dep_map, _RET_IP_);
1589916633a4SMichal Hocko 		return -EINTR;
1590916633a4SMichal Hocko 	}
1591916633a4SMichal Hocko 
1592916633a4SMichal Hocko 	return 0;
1593916633a4SMichal Hocko }
1594916633a4SMichal Hocko EXPORT_SYMBOL(down_write_killable);
1595916633a4SMichal Hocko 
1596916633a4SMichal Hocko /*
1597ed428bfcSPeter Zijlstra  * trylock for writing -- returns 1 if successful, 0 if contention
1598ed428bfcSPeter Zijlstra  */
down_write_trylock(struct rw_semaphore * sem)1599ed428bfcSPeter Zijlstra int down_write_trylock(struct rw_semaphore *sem)
1600ed428bfcSPeter Zijlstra {
1601ed428bfcSPeter Zijlstra 	int ret = __down_write_trylock(sem);
1602ed428bfcSPeter Zijlstra 
1603c7580c1eSWaiman Long 	if (ret == 1)
1604ed428bfcSPeter Zijlstra 		rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
16054fc828e2SDavidlohr Bueso 
1606ed428bfcSPeter Zijlstra 	return ret;
1607ed428bfcSPeter Zijlstra }
1608ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write_trylock);
1609ed428bfcSPeter Zijlstra 
1610ed428bfcSPeter Zijlstra /*
1611ed428bfcSPeter Zijlstra  * release a read lock
1612ed428bfcSPeter Zijlstra  */
up_read(struct rw_semaphore * sem)1613ed428bfcSPeter Zijlstra void up_read(struct rw_semaphore *sem)
1614ed428bfcSPeter Zijlstra {
16155facae4fSQian Cai 	rwsem_release(&sem->dep_map, _RET_IP_);
1616ed428bfcSPeter Zijlstra 	__up_read(sem);
1617ed428bfcSPeter Zijlstra }
1618ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_read);
1619ed428bfcSPeter Zijlstra 
1620ed428bfcSPeter Zijlstra /*
1621ed428bfcSPeter Zijlstra  * release a write lock
1622ed428bfcSPeter Zijlstra  */
up_write(struct rw_semaphore * sem)1623ed428bfcSPeter Zijlstra void up_write(struct rw_semaphore *sem)
1624ed428bfcSPeter Zijlstra {
16255facae4fSQian Cai 	rwsem_release(&sem->dep_map, _RET_IP_);
1626ed428bfcSPeter Zijlstra 	__up_write(sem);
1627ed428bfcSPeter Zijlstra }
1628ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_write);
1629ed428bfcSPeter Zijlstra 
1630ed428bfcSPeter Zijlstra /*
1631ed428bfcSPeter Zijlstra  * downgrade write lock to read lock
1632ed428bfcSPeter Zijlstra  */
downgrade_write(struct rw_semaphore * sem)1633ed428bfcSPeter Zijlstra void downgrade_write(struct rw_semaphore *sem)
1634ed428bfcSPeter Zijlstra {
16356419c4afSJ. R. Okajima 	lock_downgrade(&sem->dep_map, _RET_IP_);
1636ed428bfcSPeter Zijlstra 	__downgrade_write(sem);
1637ed428bfcSPeter Zijlstra }
1638ed428bfcSPeter Zijlstra EXPORT_SYMBOL(downgrade_write);
1639ed428bfcSPeter Zijlstra 
1640ed428bfcSPeter Zijlstra #ifdef CONFIG_DEBUG_LOCK_ALLOC
1641ed428bfcSPeter Zijlstra 
down_read_nested(struct rw_semaphore * sem,int subclass)1642ed428bfcSPeter Zijlstra void down_read_nested(struct rw_semaphore *sem, int subclass)
1643ed428bfcSPeter Zijlstra {
1644ed428bfcSPeter Zijlstra 	might_sleep();
1645ed428bfcSPeter Zijlstra 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
1646ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
1647ed428bfcSPeter Zijlstra }
1648ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_nested);
1649ed428bfcSPeter Zijlstra 
down_read_killable_nested(struct rw_semaphore * sem,int subclass)16500f9368b5SEric W. Biederman int down_read_killable_nested(struct rw_semaphore *sem, int subclass)
16510f9368b5SEric W. Biederman {
16520f9368b5SEric W. Biederman 	might_sleep();
16530f9368b5SEric W. Biederman 	rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
16540f9368b5SEric W. Biederman 
16550f9368b5SEric W. Biederman 	if (LOCK_CONTENDED_RETURN(sem, __down_read_trylock, __down_read_killable)) {
16560f9368b5SEric W. Biederman 		rwsem_release(&sem->dep_map, _RET_IP_);
16570f9368b5SEric W. Biederman 		return -EINTR;
16580f9368b5SEric W. Biederman 	}
16590f9368b5SEric W. Biederman 
16600f9368b5SEric W. Biederman 	return 0;
16610f9368b5SEric W. Biederman }
16620f9368b5SEric W. Biederman EXPORT_SYMBOL(down_read_killable_nested);
16630f9368b5SEric W. Biederman 
_down_write_nest_lock(struct rw_semaphore * sem,struct lockdep_map * nest)1664ed428bfcSPeter Zijlstra void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
1665ed428bfcSPeter Zijlstra {
1666ed428bfcSPeter Zijlstra 	might_sleep();
1667ed428bfcSPeter Zijlstra 	rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
1668ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1669ed428bfcSPeter Zijlstra }
1670ed428bfcSPeter Zijlstra EXPORT_SYMBOL(_down_write_nest_lock);
1671ed428bfcSPeter Zijlstra 
down_read_non_owner(struct rw_semaphore * sem)1672ed428bfcSPeter Zijlstra void down_read_non_owner(struct rw_semaphore *sem)
1673ed428bfcSPeter Zijlstra {
1674ed428bfcSPeter Zijlstra 	might_sleep();
1675ed428bfcSPeter Zijlstra 	__down_read(sem);
16763f524553SWaiman Long 	/*
16773f524553SWaiman Long 	 * The owner value for a reader-owned lock is mostly for debugging
16783f524553SWaiman Long 	 * purpose only and is not critical to the correct functioning of
16793f524553SWaiman Long 	 * rwsem. So it is perfectly fine to set it in a preempt-enabled
16803f524553SWaiman Long 	 * context here.
16813f524553SWaiman Long 	 */
1682925b9cd1SWaiman Long 	__rwsem_set_reader_owned(sem, NULL);
1683ed428bfcSPeter Zijlstra }
1684ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_read_non_owner);
1685ed428bfcSPeter Zijlstra 
down_write_nested(struct rw_semaphore * sem,int subclass)1686ed428bfcSPeter Zijlstra void down_write_nested(struct rw_semaphore *sem, int subclass)
1687ed428bfcSPeter Zijlstra {
1688ed428bfcSPeter Zijlstra 	might_sleep();
1689ed428bfcSPeter Zijlstra 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1690ed428bfcSPeter Zijlstra 	LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
1691ed428bfcSPeter Zijlstra }
1692ed428bfcSPeter Zijlstra EXPORT_SYMBOL(down_write_nested);
1693ed428bfcSPeter Zijlstra 
down_write_killable_nested(struct rw_semaphore * sem,int subclass)1694887bddfaSAl Viro int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
1695887bddfaSAl Viro {
1696887bddfaSAl Viro 	might_sleep();
1697887bddfaSAl Viro 	rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
1698887bddfaSAl Viro 
16996cef7ff6SWaiman Long 	if (LOCK_CONTENDED_RETURN(sem, __down_write_trylock,
17006cef7ff6SWaiman Long 				  __down_write_killable)) {
17015facae4fSQian Cai 		rwsem_release(&sem->dep_map, _RET_IP_);
1702887bddfaSAl Viro 		return -EINTR;
1703887bddfaSAl Viro 	}
1704887bddfaSAl Viro 
1705887bddfaSAl Viro 	return 0;
1706887bddfaSAl Viro }
1707887bddfaSAl Viro EXPORT_SYMBOL(down_write_killable_nested);
1708887bddfaSAl Viro 
up_read_non_owner(struct rw_semaphore * sem)1709ed428bfcSPeter Zijlstra void up_read_non_owner(struct rw_semaphore *sem)
1710ed428bfcSPeter Zijlstra {
171194a9717bSWaiman Long 	DEBUG_RWSEMS_WARN_ON(!is_rwsem_reader_owned(sem), sem);
1712ed428bfcSPeter Zijlstra 	__up_read(sem);
1713ed428bfcSPeter Zijlstra }
1714ed428bfcSPeter Zijlstra EXPORT_SYMBOL(up_read_non_owner);
1715ed428bfcSPeter Zijlstra 
1716ed428bfcSPeter Zijlstra #endif
1717