1943f0edbSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2943f0edbSThomas Gleixner 3943f0edbSThomas Gleixner /* 4943f0edbSThomas Gleixner * RT-specific reader/writer semaphores and reader/writer locks 5943f0edbSThomas Gleixner * 6943f0edbSThomas Gleixner * down_write/write_lock() 7943f0edbSThomas Gleixner * 1) Lock rtmutex 8943f0edbSThomas Gleixner * 2) Remove the reader BIAS to force readers into the slow path 9943f0edbSThomas Gleixner * 3) Wait until all readers have left the critical section 10943f0edbSThomas Gleixner * 4) Mark it write locked 11943f0edbSThomas Gleixner * 12943f0edbSThomas Gleixner * up_write/write_unlock() 13943f0edbSThomas Gleixner * 1) Remove the write locked marker 14943f0edbSThomas Gleixner * 2) Set the reader BIAS, so readers can use the fast path again 15943f0edbSThomas Gleixner * 3) Unlock rtmutex, to release blocked readers 16943f0edbSThomas Gleixner * 17943f0edbSThomas Gleixner * down_read/read_lock() 18943f0edbSThomas Gleixner * 1) Try fast path acquisition (reader BIAS is set) 19943f0edbSThomas Gleixner * 2) Take tmutex::wait_lock, which protects the writelocked flag 20943f0edbSThomas Gleixner * 3) If !writelocked, acquire it for read 21943f0edbSThomas Gleixner * 4) If writelocked, block on tmutex 22943f0edbSThomas Gleixner * 5) unlock rtmutex, goto 1) 23943f0edbSThomas Gleixner * 24943f0edbSThomas Gleixner * up_read/read_unlock() 25943f0edbSThomas Gleixner * 1) Try fast path release (reader count != 1) 26943f0edbSThomas Gleixner * 2) Wake the writer waiting in down_write()/write_lock() #3 27943f0edbSThomas Gleixner * 28943f0edbSThomas Gleixner * down_read/read_lock()#3 has the consequence, that rw semaphores and rw 29943f0edbSThomas Gleixner * locks on RT are not writer fair, but writers, which should be avoided in 30943f0edbSThomas Gleixner * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL 31943f0edbSThomas Gleixner * inheritance mechanism. 32943f0edbSThomas Gleixner * 33943f0edbSThomas Gleixner * It's possible to make the rw primitives writer fair by keeping a list of 34943f0edbSThomas Gleixner * active readers. A blocked writer would force all newly incoming readers 35943f0edbSThomas Gleixner * to block on the rtmutex, but the rtmutex would have to be proxy locked 36943f0edbSThomas Gleixner * for one reader after the other. We can't use multi-reader inheritance 37943f0edbSThomas Gleixner * because there is no way to support that with SCHED_DEADLINE. 38943f0edbSThomas Gleixner * Implementing the one by one reader boosting/handover mechanism is a 39943f0edbSThomas Gleixner * major surgery for a very dubious value. 40943f0edbSThomas Gleixner * 41943f0edbSThomas Gleixner * The risk of writer starvation is there, but the pathological use cases 42943f0edbSThomas Gleixner * which trigger it are not necessarily the typical RT workloads. 43943f0edbSThomas Gleixner * 4481121524SBoqun Feng * Fast-path orderings: 4581121524SBoqun Feng * The lock/unlock of readers can run in fast paths: lock and unlock are only 4681121524SBoqun Feng * atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE 4781121524SBoqun Feng * semantics of rwbase_rt. Atomic ops should thus provide _acquire() 4881121524SBoqun Feng * and _release() (or stronger). 4981121524SBoqun Feng * 50943f0edbSThomas Gleixner * Common code shared between RT rw_semaphore and rwlock 51943f0edbSThomas Gleixner */ 52943f0edbSThomas Gleixner 53943f0edbSThomas Gleixner static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) 54943f0edbSThomas Gleixner { 55943f0edbSThomas Gleixner int r; 56943f0edbSThomas Gleixner 57943f0edbSThomas Gleixner /* 58943f0edbSThomas Gleixner * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is 59943f0edbSThomas Gleixner * set. 60943f0edbSThomas Gleixner */ 61943f0edbSThomas Gleixner for (r = atomic_read(&rwb->readers); r < 0;) { 62c78416d1SDavidlohr Bueso if (likely(atomic_try_cmpxchg_acquire(&rwb->readers, &r, r + 1))) 63943f0edbSThomas Gleixner return 1; 64943f0edbSThomas Gleixner } 65943f0edbSThomas Gleixner return 0; 66943f0edbSThomas Gleixner } 67943f0edbSThomas Gleixner 68943f0edbSThomas Gleixner static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, 69943f0edbSThomas Gleixner unsigned int state) 70943f0edbSThomas Gleixner { 71943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 72943f0edbSThomas Gleixner int ret; 73943f0edbSThomas Gleixner 74943f0edbSThomas Gleixner raw_spin_lock_irq(&rtm->wait_lock); 75943f0edbSThomas Gleixner /* 76943f0edbSThomas Gleixner * Allow readers, as long as the writer has not completely 77943f0edbSThomas Gleixner * acquired the semaphore for write. 78943f0edbSThomas Gleixner */ 79943f0edbSThomas Gleixner if (atomic_read(&rwb->readers) != WRITER_BIAS) { 80943f0edbSThomas Gleixner atomic_inc(&rwb->readers); 81943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 82943f0edbSThomas Gleixner return 0; 83943f0edbSThomas Gleixner } 84943f0edbSThomas Gleixner 85943f0edbSThomas Gleixner /* 86943f0edbSThomas Gleixner * Call into the slow lock path with the rtmutex->wait_lock 87943f0edbSThomas Gleixner * held, so this can't result in the following race: 88943f0edbSThomas Gleixner * 89943f0edbSThomas Gleixner * Reader1 Reader2 Writer 90943f0edbSThomas Gleixner * down_read() 91943f0edbSThomas Gleixner * down_write() 92943f0edbSThomas Gleixner * rtmutex_lock(m) 93943f0edbSThomas Gleixner * wait() 94943f0edbSThomas Gleixner * down_read() 95943f0edbSThomas Gleixner * unlock(m->wait_lock) 96943f0edbSThomas Gleixner * up_read() 97943f0edbSThomas Gleixner * wake(Writer) 98943f0edbSThomas Gleixner * lock(m->wait_lock) 99943f0edbSThomas Gleixner * sem->writelocked=true 100943f0edbSThomas Gleixner * unlock(m->wait_lock) 101943f0edbSThomas Gleixner * 102943f0edbSThomas Gleixner * up_write() 103943f0edbSThomas Gleixner * sem->writelocked=false 104943f0edbSThomas Gleixner * rtmutex_unlock(m) 105943f0edbSThomas Gleixner * down_read() 106943f0edbSThomas Gleixner * down_write() 107943f0edbSThomas Gleixner * rtmutex_lock(m) 108943f0edbSThomas Gleixner * wait() 109943f0edbSThomas Gleixner * rtmutex_lock(m) 110943f0edbSThomas Gleixner * 111943f0edbSThomas Gleixner * That would put Reader1 behind the writer waiting on 112943f0edbSThomas Gleixner * Reader2 to call up_read(), which might be unbound. 113943f0edbSThomas Gleixner */ 114943f0edbSThomas Gleixner 115*ee042be1SNamhyung Kim trace_contention_begin(rwb, LCB_F_RT | LCB_F_READ); 116*ee042be1SNamhyung Kim 117943f0edbSThomas Gleixner /* 118943f0edbSThomas Gleixner * For rwlocks this returns 0 unconditionally, so the below 119943f0edbSThomas Gleixner * !ret conditionals are optimized out. 120943f0edbSThomas Gleixner */ 121943f0edbSThomas Gleixner ret = rwbase_rtmutex_slowlock_locked(rtm, state); 122943f0edbSThomas Gleixner 123943f0edbSThomas Gleixner /* 124943f0edbSThomas Gleixner * On success the rtmutex is held, so there can't be a writer 125943f0edbSThomas Gleixner * active. Increment the reader count and immediately drop the 126943f0edbSThomas Gleixner * rtmutex again. 127943f0edbSThomas Gleixner * 128943f0edbSThomas Gleixner * rtmutex->wait_lock has to be unlocked in any case of course. 129943f0edbSThomas Gleixner */ 130943f0edbSThomas Gleixner if (!ret) 131943f0edbSThomas Gleixner atomic_inc(&rwb->readers); 132943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 133943f0edbSThomas Gleixner if (!ret) 134943f0edbSThomas Gleixner rwbase_rtmutex_unlock(rtm); 135*ee042be1SNamhyung Kim 136*ee042be1SNamhyung Kim trace_contention_end(rwb, ret); 137943f0edbSThomas Gleixner return ret; 138943f0edbSThomas Gleixner } 139943f0edbSThomas Gleixner 140943f0edbSThomas Gleixner static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, 141943f0edbSThomas Gleixner unsigned int state) 142943f0edbSThomas Gleixner { 143943f0edbSThomas Gleixner if (rwbase_read_trylock(rwb)) 144943f0edbSThomas Gleixner return 0; 145943f0edbSThomas Gleixner 146943f0edbSThomas Gleixner return __rwbase_read_lock(rwb, state); 147943f0edbSThomas Gleixner } 148943f0edbSThomas Gleixner 149943f0edbSThomas Gleixner static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, 150943f0edbSThomas Gleixner unsigned int state) 151943f0edbSThomas Gleixner { 152943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 153943f0edbSThomas Gleixner struct task_struct *owner; 1549321f815SThomas Gleixner DEFINE_RT_WAKE_Q(wqh); 155943f0edbSThomas Gleixner 156943f0edbSThomas Gleixner raw_spin_lock_irq(&rtm->wait_lock); 157943f0edbSThomas Gleixner /* 158943f0edbSThomas Gleixner * Wake the writer, i.e. the rtmutex owner. It might release the 159943f0edbSThomas Gleixner * rtmutex concurrently in the fast path (due to a signal), but to 160943f0edbSThomas Gleixner * clean up rwb->readers it needs to acquire rtm->wait_lock. The 161943f0edbSThomas Gleixner * worst case which can happen is a spurious wakeup. 162943f0edbSThomas Gleixner */ 163943f0edbSThomas Gleixner owner = rt_mutex_owner(rtm); 164943f0edbSThomas Gleixner if (owner) 1659321f815SThomas Gleixner rt_mutex_wake_q_add_task(&wqh, owner, state); 166943f0edbSThomas Gleixner 1679321f815SThomas Gleixner /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */ 1689321f815SThomas Gleixner preempt_disable(); 169943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 1709321f815SThomas Gleixner rt_mutex_wake_up_q(&wqh); 171943f0edbSThomas Gleixner } 172943f0edbSThomas Gleixner 173943f0edbSThomas Gleixner static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, 174943f0edbSThomas Gleixner unsigned int state) 175943f0edbSThomas Gleixner { 176943f0edbSThomas Gleixner /* 177943f0edbSThomas Gleixner * rwb->readers can only hit 0 when a writer is waiting for the 178943f0edbSThomas Gleixner * active readers to leave the critical section. 17981121524SBoqun Feng * 18081121524SBoqun Feng * dec_and_test() is fully ordered, provides RELEASE. 181943f0edbSThomas Gleixner */ 182943f0edbSThomas Gleixner if (unlikely(atomic_dec_and_test(&rwb->readers))) 183943f0edbSThomas Gleixner __rwbase_read_unlock(rwb, state); 184943f0edbSThomas Gleixner } 185943f0edbSThomas Gleixner 186943f0edbSThomas Gleixner static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, 187943f0edbSThomas Gleixner unsigned long flags) 188943f0edbSThomas Gleixner { 189943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 190943f0edbSThomas Gleixner 19181121524SBoqun Feng /* 19281121524SBoqun Feng * _release() is needed in case that reader is in fast path, pairing 193c78416d1SDavidlohr Bueso * with atomic_try_cmpxchg_acquire() in rwbase_read_trylock(). 19481121524SBoqun Feng */ 19581121524SBoqun Feng (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers); 196943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 197943f0edbSThomas Gleixner rwbase_rtmutex_unlock(rtm); 198943f0edbSThomas Gleixner } 199943f0edbSThomas Gleixner 200943f0edbSThomas Gleixner static inline void rwbase_write_unlock(struct rwbase_rt *rwb) 201943f0edbSThomas Gleixner { 202943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 203943f0edbSThomas Gleixner unsigned long flags; 204943f0edbSThomas Gleixner 205943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 206943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, WRITER_BIAS, flags); 207943f0edbSThomas Gleixner } 208943f0edbSThomas Gleixner 209943f0edbSThomas Gleixner static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) 210943f0edbSThomas Gleixner { 211943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 212943f0edbSThomas Gleixner unsigned long flags; 213943f0edbSThomas Gleixner 214943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 215943f0edbSThomas Gleixner /* Release it and account current as reader */ 216943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); 217943f0edbSThomas Gleixner } 218943f0edbSThomas Gleixner 219616be87eSPeter Zijlstra static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb) 220616be87eSPeter Zijlstra { 221616be87eSPeter Zijlstra /* Can do without CAS because we're serialized by wait_lock. */ 222616be87eSPeter Zijlstra lockdep_assert_held(&rwb->rtmutex.wait_lock); 223616be87eSPeter Zijlstra 22481121524SBoqun Feng /* 22581121524SBoqun Feng * _acquire is needed in case the reader is in the fast path, pairing 22681121524SBoqun Feng * with rwbase_read_unlock(), provides ACQUIRE. 22781121524SBoqun Feng */ 22881121524SBoqun Feng if (!atomic_read_acquire(&rwb->readers)) { 229616be87eSPeter Zijlstra atomic_set(&rwb->readers, WRITER_BIAS); 230616be87eSPeter Zijlstra return 1; 231616be87eSPeter Zijlstra } 232616be87eSPeter Zijlstra 233616be87eSPeter Zijlstra return 0; 234616be87eSPeter Zijlstra } 235616be87eSPeter Zijlstra 236943f0edbSThomas Gleixner static int __sched rwbase_write_lock(struct rwbase_rt *rwb, 237943f0edbSThomas Gleixner unsigned int state) 238943f0edbSThomas Gleixner { 239943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 240943f0edbSThomas Gleixner unsigned long flags; 241943f0edbSThomas Gleixner 242943f0edbSThomas Gleixner /* Take the rtmutex as a first step */ 243943f0edbSThomas Gleixner if (rwbase_rtmutex_lock_state(rtm, state)) 244943f0edbSThomas Gleixner return -EINTR; 245943f0edbSThomas Gleixner 246943f0edbSThomas Gleixner /* Force readers into slow path */ 247943f0edbSThomas Gleixner atomic_sub(READER_BIAS, &rwb->readers); 248943f0edbSThomas Gleixner 249943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 250616be87eSPeter Zijlstra if (__rwbase_write_trylock(rwb)) 251616be87eSPeter Zijlstra goto out_unlock; 252943f0edbSThomas Gleixner 253616be87eSPeter Zijlstra rwbase_set_and_save_current_state(state); 254*ee042be1SNamhyung Kim trace_contention_begin(rwb, LCB_F_RT | LCB_F_WRITE); 255616be87eSPeter Zijlstra for (;;) { 256943f0edbSThomas Gleixner /* Optimized out for rwlocks */ 257943f0edbSThomas Gleixner if (rwbase_signal_pending_state(state, current)) { 2587687201eSPeter Zijlstra rwbase_restore_current_state(); 259943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, 0, flags); 260*ee042be1SNamhyung Kim trace_contention_end(rwb, -EINTR); 261943f0edbSThomas Gleixner return -EINTR; 262943f0edbSThomas Gleixner } 263616be87eSPeter Zijlstra 264616be87eSPeter Zijlstra if (__rwbase_write_trylock(rwb)) 265616be87eSPeter Zijlstra break; 266616be87eSPeter Zijlstra 267943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 268943f0edbSThomas Gleixner rwbase_schedule(); 269943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 270943f0edbSThomas Gleixner 271616be87eSPeter Zijlstra set_current_state(state); 272616be87eSPeter Zijlstra } 273943f0edbSThomas Gleixner rwbase_restore_current_state(); 274*ee042be1SNamhyung Kim trace_contention_end(rwb, 0); 275616be87eSPeter Zijlstra 276616be87eSPeter Zijlstra out_unlock: 277943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 278943f0edbSThomas Gleixner return 0; 279943f0edbSThomas Gleixner } 280943f0edbSThomas Gleixner 281943f0edbSThomas Gleixner static inline int rwbase_write_trylock(struct rwbase_rt *rwb) 282943f0edbSThomas Gleixner { 283943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 284943f0edbSThomas Gleixner unsigned long flags; 285943f0edbSThomas Gleixner 286943f0edbSThomas Gleixner if (!rwbase_rtmutex_trylock(rtm)) 287943f0edbSThomas Gleixner return 0; 288943f0edbSThomas Gleixner 289943f0edbSThomas Gleixner atomic_sub(READER_BIAS, &rwb->readers); 290943f0edbSThomas Gleixner 291943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 292616be87eSPeter Zijlstra if (__rwbase_write_trylock(rwb)) { 293943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 294943f0edbSThomas Gleixner return 1; 295943f0edbSThomas Gleixner } 296943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, 0, flags); 297943f0edbSThomas Gleixner return 0; 298943f0edbSThomas Gleixner } 299