1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2008 Oracle. All rights reserved. 4 */ 5 6 #include <linux/sched.h> 7 #include <linux/pagemap.h> 8 #include <linux/spinlock.h> 9 #include <linux/page-flags.h> 10 #include <asm/bug.h> 11 #include "misc.h" 12 #include "ctree.h" 13 #include "extent_io.h" 14 #include "locking.h" 15 16 /* 17 * Extent buffer locking 18 * ===================== 19 * 20 * We use a rw_semaphore for tree locking, and the semantics are exactly the 21 * same: 22 * 23 * - reader/writer exclusion 24 * - writer/writer exclusion 25 * - reader/reader sharing 26 * - try-lock semantics for readers and writers 27 * 28 * The rwsem implementation does opportunistic spinning which reduces number of 29 * times the locking task needs to sleep. 30 */ 31 32 /* 33 * __btrfs_tree_read_lock - lock extent buffer for read 34 * @eb: the eb to be locked 35 * @nest: the nesting level to be used for lockdep 36 * 37 * This takes the read lock on the extent buffer, using the specified nesting 38 * level for lockdep purposes. 39 */ 40 void __btrfs_tree_read_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) 41 { 42 u64 start_ns = 0; 43 44 if (trace_btrfs_tree_read_lock_enabled()) 45 start_ns = ktime_get_ns(); 46 47 down_read_nested(&eb->lock, nest); 48 eb->lock_owner = current->pid; 49 trace_btrfs_tree_read_lock(eb, start_ns); 50 } 51 52 void btrfs_tree_read_lock(struct extent_buffer *eb) 53 { 54 __btrfs_tree_read_lock(eb, BTRFS_NESTING_NORMAL); 55 } 56 57 /* 58 * Try-lock for read. 59 * 60 * Return 1 if the rwlock has been taken, 0 otherwise 61 */ 62 int btrfs_try_tree_read_lock(struct extent_buffer *eb) 63 { 64 if (down_read_trylock(&eb->lock)) { 65 eb->lock_owner = current->pid; 66 trace_btrfs_try_tree_read_lock(eb); 67 return 1; 68 } 69 return 0; 70 } 71 72 /* 73 * Try-lock for write. 74 * 75 * Return 1 if the rwlock has been taken, 0 otherwise 76 */ 77 int btrfs_try_tree_write_lock(struct extent_buffer *eb) 78 { 79 if (down_write_trylock(&eb->lock)) { 80 eb->lock_owner = current->pid; 81 trace_btrfs_try_tree_write_lock(eb); 82 return 1; 83 } 84 return 0; 85 } 86 87 /* 88 * Release read lock. 89 */ 90 void btrfs_tree_read_unlock(struct extent_buffer *eb) 91 { 92 trace_btrfs_tree_read_unlock(eb); 93 eb->lock_owner = 0; 94 up_read(&eb->lock); 95 } 96 97 /* 98 * __btrfs_tree_lock - lock eb for write 99 * @eb: the eb to lock 100 * @nest: the nesting to use for the lock 101 * 102 * Returns with the eb->lock write locked. 103 */ 104 void __btrfs_tree_lock(struct extent_buffer *eb, enum btrfs_lock_nesting nest) 105 __acquires(&eb->lock) 106 { 107 u64 start_ns = 0; 108 109 if (trace_btrfs_tree_lock_enabled()) 110 start_ns = ktime_get_ns(); 111 112 down_write_nested(&eb->lock, nest); 113 eb->lock_owner = current->pid; 114 trace_btrfs_tree_lock(eb, start_ns); 115 } 116 117 void btrfs_tree_lock(struct extent_buffer *eb) 118 { 119 __btrfs_tree_lock(eb, BTRFS_NESTING_NORMAL); 120 } 121 122 /* 123 * Release the write lock. 124 */ 125 void btrfs_tree_unlock(struct extent_buffer *eb) 126 { 127 trace_btrfs_tree_unlock(eb); 128 eb->lock_owner = 0; 129 up_write(&eb->lock); 130 } 131 132 /* 133 * This releases any locks held in the path starting at level and going all the 134 * way up to the root. 135 * 136 * btrfs_search_slot will keep the lock held on higher nodes in a few corner 137 * cases, such as COW of the block at slot zero in the node. This ignores 138 * those rules, and it should only be called when there are no more updates to 139 * be done higher up in the tree. 140 */ 141 void btrfs_unlock_up_safe(struct btrfs_path *path, int level) 142 { 143 int i; 144 145 if (path->keep_locks) 146 return; 147 148 for (i = level; i < BTRFS_MAX_LEVEL; i++) { 149 if (!path->nodes[i]) 150 continue; 151 if (!path->locks[i]) 152 continue; 153 btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); 154 path->locks[i] = 0; 155 } 156 } 157 158 /* 159 * Loop around taking references on and locking the root node of the tree until 160 * we end up with a lock on the root node. 161 * 162 * Return: root extent buffer with write lock held 163 */ 164 struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) 165 { 166 struct extent_buffer *eb; 167 168 while (1) { 169 eb = btrfs_root_node(root); 170 btrfs_tree_lock(eb); 171 if (eb == root->node) 172 break; 173 btrfs_tree_unlock(eb); 174 free_extent_buffer(eb); 175 } 176 return eb; 177 } 178 179 /* 180 * Loop around taking references on and locking the root node of the tree until 181 * we end up with a lock on the root node. 182 * 183 * Return: root extent buffer with read lock held 184 */ 185 struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) 186 { 187 struct extent_buffer *eb; 188 189 while (1) { 190 eb = btrfs_root_node(root); 191 btrfs_tree_read_lock(eb); 192 if (eb == root->node) 193 break; 194 btrfs_tree_read_unlock(eb); 195 free_extent_buffer(eb); 196 } 197 return eb; 198 } 199 200 /* 201 * DREW locks 202 * ========== 203 * 204 * DREW stands for double-reader-writer-exclusion lock. It's used in situation 205 * where you want to provide A-B exclusion but not AA or BB. 206 * 207 * Currently implementation gives more priority to reader. If a reader and a 208 * writer both race to acquire their respective sides of the lock the writer 209 * would yield its lock as soon as it detects a concurrent reader. Additionally 210 * if there are pending readers no new writers would be allowed to come in and 211 * acquire the lock. 212 */ 213 214 int btrfs_drew_lock_init(struct btrfs_drew_lock *lock) 215 { 216 int ret; 217 218 ret = percpu_counter_init(&lock->writers, 0, GFP_KERNEL); 219 if (ret) 220 return ret; 221 222 atomic_set(&lock->readers, 0); 223 init_waitqueue_head(&lock->pending_readers); 224 init_waitqueue_head(&lock->pending_writers); 225 226 return 0; 227 } 228 229 void btrfs_drew_lock_destroy(struct btrfs_drew_lock *lock) 230 { 231 percpu_counter_destroy(&lock->writers); 232 } 233 234 /* Return true if acquisition is successful, false otherwise */ 235 bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock) 236 { 237 if (atomic_read(&lock->readers)) 238 return false; 239 240 percpu_counter_inc(&lock->writers); 241 242 /* Ensure writers count is updated before we check for pending readers */ 243 smp_mb(); 244 if (atomic_read(&lock->readers)) { 245 btrfs_drew_write_unlock(lock); 246 return false; 247 } 248 249 return true; 250 } 251 252 void btrfs_drew_write_lock(struct btrfs_drew_lock *lock) 253 { 254 while (true) { 255 if (btrfs_drew_try_write_lock(lock)) 256 return; 257 wait_event(lock->pending_writers, !atomic_read(&lock->readers)); 258 } 259 } 260 261 void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock) 262 { 263 percpu_counter_dec(&lock->writers); 264 cond_wake_up(&lock->pending_readers); 265 } 266 267 void btrfs_drew_read_lock(struct btrfs_drew_lock *lock) 268 { 269 atomic_inc(&lock->readers); 270 271 /* 272 * Ensure the pending reader count is perceieved BEFORE this reader 273 * goes to sleep in case of active writers. This guarantees new writers 274 * won't be allowed and that the current reader will be woken up when 275 * the last active writer finishes its jobs. 276 */ 277 smp_mb__after_atomic(); 278 279 wait_event(lock->pending_readers, 280 percpu_counter_sum(&lock->writers) == 0); 281 } 282 283 void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock) 284 { 285 /* 286 * atomic_dec_and_test implies a full barrier, so woken up writers 287 * are guaranteed to see the decrement 288 */ 289 if (atomic_dec_and_test(&lock->readers)) 290 wake_up(&lock->pending_writers); 291 } 292