1943f0edbSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2943f0edbSThomas Gleixner 3943f0edbSThomas Gleixner /* 4943f0edbSThomas Gleixner * RT-specific reader/writer semaphores and reader/writer locks 5943f0edbSThomas Gleixner * 6943f0edbSThomas Gleixner * down_write/write_lock() 7943f0edbSThomas Gleixner * 1) Lock rtmutex 8943f0edbSThomas Gleixner * 2) Remove the reader BIAS to force readers into the slow path 9943f0edbSThomas Gleixner * 3) Wait until all readers have left the critical section 10943f0edbSThomas Gleixner * 4) Mark it write locked 11943f0edbSThomas Gleixner * 12943f0edbSThomas Gleixner * up_write/write_unlock() 13943f0edbSThomas Gleixner * 1) Remove the write locked marker 14943f0edbSThomas Gleixner * 2) Set the reader BIAS, so readers can use the fast path again 15943f0edbSThomas Gleixner * 3) Unlock rtmutex, to release blocked readers 16943f0edbSThomas Gleixner * 17943f0edbSThomas Gleixner * down_read/read_lock() 18943f0edbSThomas Gleixner * 1) Try fast path acquisition (reader BIAS is set) 19943f0edbSThomas Gleixner * 2) Take tmutex::wait_lock, which protects the writelocked flag 20943f0edbSThomas Gleixner * 3) If !writelocked, acquire it for read 21943f0edbSThomas Gleixner * 4) If writelocked, block on tmutex 22943f0edbSThomas Gleixner * 5) unlock rtmutex, goto 1) 23943f0edbSThomas Gleixner * 24943f0edbSThomas Gleixner * up_read/read_unlock() 25943f0edbSThomas Gleixner * 1) Try fast path release (reader count != 1) 26943f0edbSThomas Gleixner * 2) Wake the writer waiting in down_write()/write_lock() #3 27943f0edbSThomas Gleixner * 28943f0edbSThomas Gleixner * down_read/read_lock()#3 has the consequence, that rw semaphores and rw 29943f0edbSThomas Gleixner * locks on RT are not writer fair, but writers, which should be avoided in 30943f0edbSThomas Gleixner * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL 31943f0edbSThomas Gleixner * inheritance mechanism. 32943f0edbSThomas Gleixner * 33943f0edbSThomas Gleixner * It's possible to make the rw primitives writer fair by keeping a list of 34943f0edbSThomas Gleixner * active readers. A blocked writer would force all newly incoming readers 35943f0edbSThomas Gleixner * to block on the rtmutex, but the rtmutex would have to be proxy locked 36943f0edbSThomas Gleixner * for one reader after the other. We can't use multi-reader inheritance 37943f0edbSThomas Gleixner * because there is no way to support that with SCHED_DEADLINE. 38943f0edbSThomas Gleixner * Implementing the one by one reader boosting/handover mechanism is a 39943f0edbSThomas Gleixner * major surgery for a very dubious value. 40943f0edbSThomas Gleixner * 41943f0edbSThomas Gleixner * The risk of writer starvation is there, but the pathological use cases 42943f0edbSThomas Gleixner * which trigger it are not necessarily the typical RT workloads. 43943f0edbSThomas Gleixner * 4481121524SBoqun Feng * Fast-path orderings: 4581121524SBoqun Feng * The lock/unlock of readers can run in fast paths: lock and unlock are only 4681121524SBoqun Feng * atomic ops, and there is no inner lock to provide ACQUIRE and RELEASE 4781121524SBoqun Feng * semantics of rwbase_rt. Atomic ops should thus provide _acquire() 4881121524SBoqun Feng * and _release() (or stronger). 4981121524SBoqun Feng * 50943f0edbSThomas Gleixner * Common code shared between RT rw_semaphore and rwlock 51943f0edbSThomas Gleixner */ 52943f0edbSThomas Gleixner 53943f0edbSThomas Gleixner static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) 54943f0edbSThomas Gleixner { 55943f0edbSThomas Gleixner int r; 56943f0edbSThomas Gleixner 57943f0edbSThomas Gleixner /* 58943f0edbSThomas Gleixner * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is 59943f0edbSThomas Gleixner * set. 60943f0edbSThomas Gleixner */ 61943f0edbSThomas Gleixner for (r = atomic_read(&rwb->readers); r < 0;) { 62*c78416d1SDavidlohr Bueso if (likely(atomic_try_cmpxchg_acquire(&rwb->readers, &r, r + 1))) 63943f0edbSThomas Gleixner return 1; 64943f0edbSThomas Gleixner } 65943f0edbSThomas Gleixner return 0; 66943f0edbSThomas Gleixner } 67943f0edbSThomas Gleixner 68943f0edbSThomas Gleixner static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, 69943f0edbSThomas Gleixner unsigned int state) 70943f0edbSThomas Gleixner { 71943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 72943f0edbSThomas Gleixner int ret; 73943f0edbSThomas Gleixner 74943f0edbSThomas Gleixner raw_spin_lock_irq(&rtm->wait_lock); 75943f0edbSThomas Gleixner /* 76943f0edbSThomas Gleixner * Allow readers, as long as the writer has not completely 77943f0edbSThomas Gleixner * acquired the semaphore for write. 78943f0edbSThomas Gleixner */ 79943f0edbSThomas Gleixner if (atomic_read(&rwb->readers) != WRITER_BIAS) { 80943f0edbSThomas Gleixner atomic_inc(&rwb->readers); 81943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 82943f0edbSThomas Gleixner return 0; 83943f0edbSThomas Gleixner } 84943f0edbSThomas Gleixner 85943f0edbSThomas Gleixner /* 86943f0edbSThomas Gleixner * Call into the slow lock path with the rtmutex->wait_lock 87943f0edbSThomas Gleixner * held, so this can't result in the following race: 88943f0edbSThomas Gleixner * 89943f0edbSThomas Gleixner * Reader1 Reader2 Writer 90943f0edbSThomas Gleixner * down_read() 91943f0edbSThomas Gleixner * down_write() 92943f0edbSThomas Gleixner * rtmutex_lock(m) 93943f0edbSThomas Gleixner * wait() 94943f0edbSThomas Gleixner * down_read() 95943f0edbSThomas Gleixner * unlock(m->wait_lock) 96943f0edbSThomas Gleixner * up_read() 97943f0edbSThomas Gleixner * wake(Writer) 98943f0edbSThomas Gleixner * lock(m->wait_lock) 99943f0edbSThomas Gleixner * sem->writelocked=true 100943f0edbSThomas Gleixner * unlock(m->wait_lock) 101943f0edbSThomas Gleixner * 102943f0edbSThomas Gleixner * up_write() 103943f0edbSThomas Gleixner * sem->writelocked=false 104943f0edbSThomas Gleixner * rtmutex_unlock(m) 105943f0edbSThomas Gleixner * down_read() 106943f0edbSThomas Gleixner * down_write() 107943f0edbSThomas Gleixner * rtmutex_lock(m) 108943f0edbSThomas Gleixner * wait() 109943f0edbSThomas Gleixner * rtmutex_lock(m) 110943f0edbSThomas Gleixner * 111943f0edbSThomas Gleixner * That would put Reader1 behind the writer waiting on 112943f0edbSThomas Gleixner * Reader2 to call up_read(), which might be unbound. 113943f0edbSThomas Gleixner */ 114943f0edbSThomas Gleixner 115943f0edbSThomas Gleixner /* 116943f0edbSThomas Gleixner * For rwlocks this returns 0 unconditionally, so the below 117943f0edbSThomas Gleixner * !ret conditionals are optimized out. 118943f0edbSThomas Gleixner */ 119943f0edbSThomas Gleixner ret = rwbase_rtmutex_slowlock_locked(rtm, state); 120943f0edbSThomas Gleixner 121943f0edbSThomas Gleixner /* 122943f0edbSThomas Gleixner * On success the rtmutex is held, so there can't be a writer 123943f0edbSThomas Gleixner * active. Increment the reader count and immediately drop the 124943f0edbSThomas Gleixner * rtmutex again. 125943f0edbSThomas Gleixner * 126943f0edbSThomas Gleixner * rtmutex->wait_lock has to be unlocked in any case of course. 127943f0edbSThomas Gleixner */ 128943f0edbSThomas Gleixner if (!ret) 129943f0edbSThomas Gleixner atomic_inc(&rwb->readers); 130943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 131943f0edbSThomas Gleixner if (!ret) 132943f0edbSThomas Gleixner rwbase_rtmutex_unlock(rtm); 133943f0edbSThomas Gleixner return ret; 134943f0edbSThomas Gleixner } 135943f0edbSThomas Gleixner 136943f0edbSThomas Gleixner static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, 137943f0edbSThomas Gleixner unsigned int state) 138943f0edbSThomas Gleixner { 139943f0edbSThomas Gleixner if (rwbase_read_trylock(rwb)) 140943f0edbSThomas Gleixner return 0; 141943f0edbSThomas Gleixner 142943f0edbSThomas Gleixner return __rwbase_read_lock(rwb, state); 143943f0edbSThomas Gleixner } 144943f0edbSThomas Gleixner 145943f0edbSThomas Gleixner static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, 146943f0edbSThomas Gleixner unsigned int state) 147943f0edbSThomas Gleixner { 148943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 149943f0edbSThomas Gleixner struct task_struct *owner; 1509321f815SThomas Gleixner DEFINE_RT_WAKE_Q(wqh); 151943f0edbSThomas Gleixner 152943f0edbSThomas Gleixner raw_spin_lock_irq(&rtm->wait_lock); 153943f0edbSThomas Gleixner /* 154943f0edbSThomas Gleixner * Wake the writer, i.e. the rtmutex owner. It might release the 155943f0edbSThomas Gleixner * rtmutex concurrently in the fast path (due to a signal), but to 156943f0edbSThomas Gleixner * clean up rwb->readers it needs to acquire rtm->wait_lock. The 157943f0edbSThomas Gleixner * worst case which can happen is a spurious wakeup. 158943f0edbSThomas Gleixner */ 159943f0edbSThomas Gleixner owner = rt_mutex_owner(rtm); 160943f0edbSThomas Gleixner if (owner) 1619321f815SThomas Gleixner rt_mutex_wake_q_add_task(&wqh, owner, state); 162943f0edbSThomas Gleixner 1639321f815SThomas Gleixner /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */ 1649321f815SThomas Gleixner preempt_disable(); 165943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 1669321f815SThomas Gleixner rt_mutex_wake_up_q(&wqh); 167943f0edbSThomas Gleixner } 168943f0edbSThomas Gleixner 169943f0edbSThomas Gleixner static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, 170943f0edbSThomas Gleixner unsigned int state) 171943f0edbSThomas Gleixner { 172943f0edbSThomas Gleixner /* 173943f0edbSThomas Gleixner * rwb->readers can only hit 0 when a writer is waiting for the 174943f0edbSThomas Gleixner * active readers to leave the critical section. 17581121524SBoqun Feng * 17681121524SBoqun Feng * dec_and_test() is fully ordered, provides RELEASE. 177943f0edbSThomas Gleixner */ 178943f0edbSThomas Gleixner if (unlikely(atomic_dec_and_test(&rwb->readers))) 179943f0edbSThomas Gleixner __rwbase_read_unlock(rwb, state); 180943f0edbSThomas Gleixner } 181943f0edbSThomas Gleixner 182943f0edbSThomas Gleixner static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, 183943f0edbSThomas Gleixner unsigned long flags) 184943f0edbSThomas Gleixner { 185943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 186943f0edbSThomas Gleixner 18781121524SBoqun Feng /* 18881121524SBoqun Feng * _release() is needed in case that reader is in fast path, pairing 189*c78416d1SDavidlohr Bueso * with atomic_try_cmpxchg_acquire() in rwbase_read_trylock(). 19081121524SBoqun Feng */ 19181121524SBoqun Feng (void)atomic_add_return_release(READER_BIAS - bias, &rwb->readers); 192943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 193943f0edbSThomas Gleixner rwbase_rtmutex_unlock(rtm); 194943f0edbSThomas Gleixner } 195943f0edbSThomas Gleixner 196943f0edbSThomas Gleixner static inline void rwbase_write_unlock(struct rwbase_rt *rwb) 197943f0edbSThomas Gleixner { 198943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 199943f0edbSThomas Gleixner unsigned long flags; 200943f0edbSThomas Gleixner 201943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 202943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, WRITER_BIAS, flags); 203943f0edbSThomas Gleixner } 204943f0edbSThomas Gleixner 205943f0edbSThomas Gleixner static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) 206943f0edbSThomas Gleixner { 207943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 208943f0edbSThomas Gleixner unsigned long flags; 209943f0edbSThomas Gleixner 210943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 211943f0edbSThomas Gleixner /* Release it and account current as reader */ 212943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); 213943f0edbSThomas Gleixner } 214943f0edbSThomas Gleixner 215616be87eSPeter Zijlstra static inline bool __rwbase_write_trylock(struct rwbase_rt *rwb) 216616be87eSPeter Zijlstra { 217616be87eSPeter Zijlstra /* Can do without CAS because we're serialized by wait_lock. */ 218616be87eSPeter Zijlstra lockdep_assert_held(&rwb->rtmutex.wait_lock); 219616be87eSPeter Zijlstra 22081121524SBoqun Feng /* 22181121524SBoqun Feng * _acquire is needed in case the reader is in the fast path, pairing 22281121524SBoqun Feng * with rwbase_read_unlock(), provides ACQUIRE. 22381121524SBoqun Feng */ 22481121524SBoqun Feng if (!atomic_read_acquire(&rwb->readers)) { 225616be87eSPeter Zijlstra atomic_set(&rwb->readers, WRITER_BIAS); 226616be87eSPeter Zijlstra return 1; 227616be87eSPeter Zijlstra } 228616be87eSPeter Zijlstra 229616be87eSPeter Zijlstra return 0; 230616be87eSPeter Zijlstra } 231616be87eSPeter Zijlstra 232943f0edbSThomas Gleixner static int __sched rwbase_write_lock(struct rwbase_rt *rwb, 233943f0edbSThomas Gleixner unsigned int state) 234943f0edbSThomas Gleixner { 235943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 236943f0edbSThomas Gleixner unsigned long flags; 237943f0edbSThomas Gleixner 238943f0edbSThomas Gleixner /* Take the rtmutex as a first step */ 239943f0edbSThomas Gleixner if (rwbase_rtmutex_lock_state(rtm, state)) 240943f0edbSThomas Gleixner return -EINTR; 241943f0edbSThomas Gleixner 242943f0edbSThomas Gleixner /* Force readers into slow path */ 243943f0edbSThomas Gleixner atomic_sub(READER_BIAS, &rwb->readers); 244943f0edbSThomas Gleixner 245943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 246616be87eSPeter Zijlstra if (__rwbase_write_trylock(rwb)) 247616be87eSPeter Zijlstra goto out_unlock; 248943f0edbSThomas Gleixner 249616be87eSPeter Zijlstra rwbase_set_and_save_current_state(state); 250616be87eSPeter Zijlstra for (;;) { 251943f0edbSThomas Gleixner /* Optimized out for rwlocks */ 252943f0edbSThomas Gleixner if (rwbase_signal_pending_state(state, current)) { 2537687201eSPeter Zijlstra rwbase_restore_current_state(); 254943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, 0, flags); 255943f0edbSThomas Gleixner return -EINTR; 256943f0edbSThomas Gleixner } 257616be87eSPeter Zijlstra 258616be87eSPeter Zijlstra if (__rwbase_write_trylock(rwb)) 259616be87eSPeter Zijlstra break; 260616be87eSPeter Zijlstra 261943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 262943f0edbSThomas Gleixner rwbase_schedule(); 263943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 264943f0edbSThomas Gleixner 265616be87eSPeter Zijlstra set_current_state(state); 266616be87eSPeter Zijlstra } 267943f0edbSThomas Gleixner rwbase_restore_current_state(); 268616be87eSPeter Zijlstra 269616be87eSPeter Zijlstra out_unlock: 270943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 271943f0edbSThomas Gleixner return 0; 272943f0edbSThomas Gleixner } 273943f0edbSThomas Gleixner 274943f0edbSThomas Gleixner static inline int rwbase_write_trylock(struct rwbase_rt *rwb) 275943f0edbSThomas Gleixner { 276943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 277943f0edbSThomas Gleixner unsigned long flags; 278943f0edbSThomas Gleixner 279943f0edbSThomas Gleixner if (!rwbase_rtmutex_trylock(rtm)) 280943f0edbSThomas Gleixner return 0; 281943f0edbSThomas Gleixner 282943f0edbSThomas Gleixner atomic_sub(READER_BIAS, &rwb->readers); 283943f0edbSThomas Gleixner 284943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 285616be87eSPeter Zijlstra if (__rwbase_write_trylock(rwb)) { 286943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 287943f0edbSThomas Gleixner return 1; 288943f0edbSThomas Gleixner } 289943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, 0, flags); 290943f0edbSThomas Gleixner return 0; 291943f0edbSThomas Gleixner } 292