1943f0edbSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 2943f0edbSThomas Gleixner 3943f0edbSThomas Gleixner /* 4943f0edbSThomas Gleixner * RT-specific reader/writer semaphores and reader/writer locks 5943f0edbSThomas Gleixner * 6943f0edbSThomas Gleixner * down_write/write_lock() 7943f0edbSThomas Gleixner * 1) Lock rtmutex 8943f0edbSThomas Gleixner * 2) Remove the reader BIAS to force readers into the slow path 9943f0edbSThomas Gleixner * 3) Wait until all readers have left the critical section 10943f0edbSThomas Gleixner * 4) Mark it write locked 11943f0edbSThomas Gleixner * 12943f0edbSThomas Gleixner * up_write/write_unlock() 13943f0edbSThomas Gleixner * 1) Remove the write locked marker 14943f0edbSThomas Gleixner * 2) Set the reader BIAS, so readers can use the fast path again 15943f0edbSThomas Gleixner * 3) Unlock rtmutex, to release blocked readers 16943f0edbSThomas Gleixner * 17943f0edbSThomas Gleixner * down_read/read_lock() 18943f0edbSThomas Gleixner * 1) Try fast path acquisition (reader BIAS is set) 19943f0edbSThomas Gleixner * 2) Take tmutex::wait_lock, which protects the writelocked flag 20943f0edbSThomas Gleixner * 3) If !writelocked, acquire it for read 21943f0edbSThomas Gleixner * 4) If writelocked, block on tmutex 22943f0edbSThomas Gleixner * 5) unlock rtmutex, goto 1) 23943f0edbSThomas Gleixner * 24943f0edbSThomas Gleixner * up_read/read_unlock() 25943f0edbSThomas Gleixner * 1) Try fast path release (reader count != 1) 26943f0edbSThomas Gleixner * 2) Wake the writer waiting in down_write()/write_lock() #3 27943f0edbSThomas Gleixner * 28943f0edbSThomas Gleixner * down_read/read_lock()#3 has the consequence, that rw semaphores and rw 29943f0edbSThomas Gleixner * locks on RT are not writer fair, but writers, which should be avoided in 30943f0edbSThomas Gleixner * RT tasks (think mmap_sem), are subject to the rtmutex priority/DL 31943f0edbSThomas Gleixner * inheritance mechanism. 32943f0edbSThomas Gleixner * 33943f0edbSThomas Gleixner * It's possible to make the rw primitives writer fair by keeping a list of 34943f0edbSThomas Gleixner * active readers. A blocked writer would force all newly incoming readers 35943f0edbSThomas Gleixner * to block on the rtmutex, but the rtmutex would have to be proxy locked 36943f0edbSThomas Gleixner * for one reader after the other. We can't use multi-reader inheritance 37943f0edbSThomas Gleixner * because there is no way to support that with SCHED_DEADLINE. 38943f0edbSThomas Gleixner * Implementing the one by one reader boosting/handover mechanism is a 39943f0edbSThomas Gleixner * major surgery for a very dubious value. 40943f0edbSThomas Gleixner * 41943f0edbSThomas Gleixner * The risk of writer starvation is there, but the pathological use cases 42943f0edbSThomas Gleixner * which trigger it are not necessarily the typical RT workloads. 43943f0edbSThomas Gleixner * 44943f0edbSThomas Gleixner * Common code shared between RT rw_semaphore and rwlock 45943f0edbSThomas Gleixner */ 46943f0edbSThomas Gleixner 47943f0edbSThomas Gleixner static __always_inline int rwbase_read_trylock(struct rwbase_rt *rwb) 48943f0edbSThomas Gleixner { 49943f0edbSThomas Gleixner int r; 50943f0edbSThomas Gleixner 51943f0edbSThomas Gleixner /* 52943f0edbSThomas Gleixner * Increment reader count, if sem->readers < 0, i.e. READER_BIAS is 53943f0edbSThomas Gleixner * set. 54943f0edbSThomas Gleixner */ 55943f0edbSThomas Gleixner for (r = atomic_read(&rwb->readers); r < 0;) { 56943f0edbSThomas Gleixner if (likely(atomic_try_cmpxchg(&rwb->readers, &r, r + 1))) 57943f0edbSThomas Gleixner return 1; 58943f0edbSThomas Gleixner } 59943f0edbSThomas Gleixner return 0; 60943f0edbSThomas Gleixner } 61943f0edbSThomas Gleixner 62943f0edbSThomas Gleixner static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, 63943f0edbSThomas Gleixner unsigned int state) 64943f0edbSThomas Gleixner { 65943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 66943f0edbSThomas Gleixner int ret; 67943f0edbSThomas Gleixner 68943f0edbSThomas Gleixner raw_spin_lock_irq(&rtm->wait_lock); 69943f0edbSThomas Gleixner /* 70943f0edbSThomas Gleixner * Allow readers, as long as the writer has not completely 71943f0edbSThomas Gleixner * acquired the semaphore for write. 72943f0edbSThomas Gleixner */ 73943f0edbSThomas Gleixner if (atomic_read(&rwb->readers) != WRITER_BIAS) { 74943f0edbSThomas Gleixner atomic_inc(&rwb->readers); 75943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 76943f0edbSThomas Gleixner return 0; 77943f0edbSThomas Gleixner } 78943f0edbSThomas Gleixner 79943f0edbSThomas Gleixner /* 80943f0edbSThomas Gleixner * Call into the slow lock path with the rtmutex->wait_lock 81943f0edbSThomas Gleixner * held, so this can't result in the following race: 82943f0edbSThomas Gleixner * 83943f0edbSThomas Gleixner * Reader1 Reader2 Writer 84943f0edbSThomas Gleixner * down_read() 85943f0edbSThomas Gleixner * down_write() 86943f0edbSThomas Gleixner * rtmutex_lock(m) 87943f0edbSThomas Gleixner * wait() 88943f0edbSThomas Gleixner * down_read() 89943f0edbSThomas Gleixner * unlock(m->wait_lock) 90943f0edbSThomas Gleixner * up_read() 91943f0edbSThomas Gleixner * wake(Writer) 92943f0edbSThomas Gleixner * lock(m->wait_lock) 93943f0edbSThomas Gleixner * sem->writelocked=true 94943f0edbSThomas Gleixner * unlock(m->wait_lock) 95943f0edbSThomas Gleixner * 96943f0edbSThomas Gleixner * up_write() 97943f0edbSThomas Gleixner * sem->writelocked=false 98943f0edbSThomas Gleixner * rtmutex_unlock(m) 99943f0edbSThomas Gleixner * down_read() 100943f0edbSThomas Gleixner * down_write() 101943f0edbSThomas Gleixner * rtmutex_lock(m) 102943f0edbSThomas Gleixner * wait() 103943f0edbSThomas Gleixner * rtmutex_lock(m) 104943f0edbSThomas Gleixner * 105943f0edbSThomas Gleixner * That would put Reader1 behind the writer waiting on 106943f0edbSThomas Gleixner * Reader2 to call up_read(), which might be unbound. 107943f0edbSThomas Gleixner */ 108943f0edbSThomas Gleixner 109943f0edbSThomas Gleixner /* 110943f0edbSThomas Gleixner * For rwlocks this returns 0 unconditionally, so the below 111943f0edbSThomas Gleixner * !ret conditionals are optimized out. 112943f0edbSThomas Gleixner */ 113943f0edbSThomas Gleixner ret = rwbase_rtmutex_slowlock_locked(rtm, state); 114943f0edbSThomas Gleixner 115943f0edbSThomas Gleixner /* 116943f0edbSThomas Gleixner * On success the rtmutex is held, so there can't be a writer 117943f0edbSThomas Gleixner * active. Increment the reader count and immediately drop the 118943f0edbSThomas Gleixner * rtmutex again. 119943f0edbSThomas Gleixner * 120943f0edbSThomas Gleixner * rtmutex->wait_lock has to be unlocked in any case of course. 121943f0edbSThomas Gleixner */ 122943f0edbSThomas Gleixner if (!ret) 123943f0edbSThomas Gleixner atomic_inc(&rwb->readers); 124943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 125943f0edbSThomas Gleixner if (!ret) 126943f0edbSThomas Gleixner rwbase_rtmutex_unlock(rtm); 127943f0edbSThomas Gleixner return ret; 128943f0edbSThomas Gleixner } 129943f0edbSThomas Gleixner 130943f0edbSThomas Gleixner static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, 131943f0edbSThomas Gleixner unsigned int state) 132943f0edbSThomas Gleixner { 133943f0edbSThomas Gleixner if (rwbase_read_trylock(rwb)) 134943f0edbSThomas Gleixner return 0; 135943f0edbSThomas Gleixner 136943f0edbSThomas Gleixner return __rwbase_read_lock(rwb, state); 137943f0edbSThomas Gleixner } 138943f0edbSThomas Gleixner 139943f0edbSThomas Gleixner static void __sched __rwbase_read_unlock(struct rwbase_rt *rwb, 140943f0edbSThomas Gleixner unsigned int state) 141943f0edbSThomas Gleixner { 142943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 143943f0edbSThomas Gleixner struct task_struct *owner; 144*9321f815SThomas Gleixner DEFINE_RT_WAKE_Q(wqh); 145943f0edbSThomas Gleixner 146943f0edbSThomas Gleixner raw_spin_lock_irq(&rtm->wait_lock); 147943f0edbSThomas Gleixner /* 148943f0edbSThomas Gleixner * Wake the writer, i.e. the rtmutex owner. It might release the 149943f0edbSThomas Gleixner * rtmutex concurrently in the fast path (due to a signal), but to 150943f0edbSThomas Gleixner * clean up rwb->readers it needs to acquire rtm->wait_lock. The 151943f0edbSThomas Gleixner * worst case which can happen is a spurious wakeup. 152943f0edbSThomas Gleixner */ 153943f0edbSThomas Gleixner owner = rt_mutex_owner(rtm); 154943f0edbSThomas Gleixner if (owner) 155*9321f815SThomas Gleixner rt_mutex_wake_q_add_task(&wqh, owner, state); 156943f0edbSThomas Gleixner 157*9321f815SThomas Gleixner /* Pairs with the preempt_enable in rt_mutex_wake_up_q() */ 158*9321f815SThomas Gleixner preempt_disable(); 159943f0edbSThomas Gleixner raw_spin_unlock_irq(&rtm->wait_lock); 160*9321f815SThomas Gleixner rt_mutex_wake_up_q(&wqh); 161943f0edbSThomas Gleixner } 162943f0edbSThomas Gleixner 163943f0edbSThomas Gleixner static __always_inline void rwbase_read_unlock(struct rwbase_rt *rwb, 164943f0edbSThomas Gleixner unsigned int state) 165943f0edbSThomas Gleixner { 166943f0edbSThomas Gleixner /* 167943f0edbSThomas Gleixner * rwb->readers can only hit 0 when a writer is waiting for the 168943f0edbSThomas Gleixner * active readers to leave the critical section. 169943f0edbSThomas Gleixner */ 170943f0edbSThomas Gleixner if (unlikely(atomic_dec_and_test(&rwb->readers))) 171943f0edbSThomas Gleixner __rwbase_read_unlock(rwb, state); 172943f0edbSThomas Gleixner } 173943f0edbSThomas Gleixner 174943f0edbSThomas Gleixner static inline void __rwbase_write_unlock(struct rwbase_rt *rwb, int bias, 175943f0edbSThomas Gleixner unsigned long flags) 176943f0edbSThomas Gleixner { 177943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 178943f0edbSThomas Gleixner 179943f0edbSThomas Gleixner atomic_add(READER_BIAS - bias, &rwb->readers); 180943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 181943f0edbSThomas Gleixner rwbase_rtmutex_unlock(rtm); 182943f0edbSThomas Gleixner } 183943f0edbSThomas Gleixner 184943f0edbSThomas Gleixner static inline void rwbase_write_unlock(struct rwbase_rt *rwb) 185943f0edbSThomas Gleixner { 186943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 187943f0edbSThomas Gleixner unsigned long flags; 188943f0edbSThomas Gleixner 189943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 190943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, WRITER_BIAS, flags); 191943f0edbSThomas Gleixner } 192943f0edbSThomas Gleixner 193943f0edbSThomas Gleixner static inline void rwbase_write_downgrade(struct rwbase_rt *rwb) 194943f0edbSThomas Gleixner { 195943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 196943f0edbSThomas Gleixner unsigned long flags; 197943f0edbSThomas Gleixner 198943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 199943f0edbSThomas Gleixner /* Release it and account current as reader */ 200943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, WRITER_BIAS - 1, flags); 201943f0edbSThomas Gleixner } 202943f0edbSThomas Gleixner 203943f0edbSThomas Gleixner static int __sched rwbase_write_lock(struct rwbase_rt *rwb, 204943f0edbSThomas Gleixner unsigned int state) 205943f0edbSThomas Gleixner { 206943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 207943f0edbSThomas Gleixner unsigned long flags; 208943f0edbSThomas Gleixner 209943f0edbSThomas Gleixner /* Take the rtmutex as a first step */ 210943f0edbSThomas Gleixner if (rwbase_rtmutex_lock_state(rtm, state)) 211943f0edbSThomas Gleixner return -EINTR; 212943f0edbSThomas Gleixner 213943f0edbSThomas Gleixner /* Force readers into slow path */ 214943f0edbSThomas Gleixner atomic_sub(READER_BIAS, &rwb->readers); 215943f0edbSThomas Gleixner 216943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 217943f0edbSThomas Gleixner /* 218943f0edbSThomas Gleixner * set_current_state() for rw_semaphore 219943f0edbSThomas Gleixner * current_save_and_set_rtlock_wait_state() for rwlock 220943f0edbSThomas Gleixner */ 221943f0edbSThomas Gleixner rwbase_set_and_save_current_state(state); 222943f0edbSThomas Gleixner 223943f0edbSThomas Gleixner /* Block until all readers have left the critical section. */ 224943f0edbSThomas Gleixner for (; atomic_read(&rwb->readers);) { 225943f0edbSThomas Gleixner /* Optimized out for rwlocks */ 226943f0edbSThomas Gleixner if (rwbase_signal_pending_state(state, current)) { 227943f0edbSThomas Gleixner __set_current_state(TASK_RUNNING); 228943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, 0, flags); 229943f0edbSThomas Gleixner return -EINTR; 230943f0edbSThomas Gleixner } 231943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 232943f0edbSThomas Gleixner 233943f0edbSThomas Gleixner /* 234943f0edbSThomas Gleixner * Schedule and wait for the readers to leave the critical 235943f0edbSThomas Gleixner * section. The last reader leaving it wakes the waiter. 236943f0edbSThomas Gleixner */ 237943f0edbSThomas Gleixner if (atomic_read(&rwb->readers) != 0) 238943f0edbSThomas Gleixner rwbase_schedule(); 239943f0edbSThomas Gleixner set_current_state(state); 240943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 241943f0edbSThomas Gleixner } 242943f0edbSThomas Gleixner 243943f0edbSThomas Gleixner atomic_set(&rwb->readers, WRITER_BIAS); 244943f0edbSThomas Gleixner rwbase_restore_current_state(); 245943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 246943f0edbSThomas Gleixner return 0; 247943f0edbSThomas Gleixner } 248943f0edbSThomas Gleixner 249943f0edbSThomas Gleixner static inline int rwbase_write_trylock(struct rwbase_rt *rwb) 250943f0edbSThomas Gleixner { 251943f0edbSThomas Gleixner struct rt_mutex_base *rtm = &rwb->rtmutex; 252943f0edbSThomas Gleixner unsigned long flags; 253943f0edbSThomas Gleixner 254943f0edbSThomas Gleixner if (!rwbase_rtmutex_trylock(rtm)) 255943f0edbSThomas Gleixner return 0; 256943f0edbSThomas Gleixner 257943f0edbSThomas Gleixner atomic_sub(READER_BIAS, &rwb->readers); 258943f0edbSThomas Gleixner 259943f0edbSThomas Gleixner raw_spin_lock_irqsave(&rtm->wait_lock, flags); 260943f0edbSThomas Gleixner if (!atomic_read(&rwb->readers)) { 261943f0edbSThomas Gleixner atomic_set(&rwb->readers, WRITER_BIAS); 262943f0edbSThomas Gleixner raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); 263943f0edbSThomas Gleixner return 1; 264943f0edbSThomas Gleixner } 265943f0edbSThomas Gleixner __rwbase_write_unlock(rwb, 0, flags); 266943f0edbSThomas Gleixner return 0; 267943f0edbSThomas Gleixner } 268