1 #ifndef _LINUX_CLOSURE_H 2 #define _LINUX_CLOSURE_H 3 4 #include <linux/llist.h> 5 #include <linux/sched.h> 6 #include <linux/workqueue.h> 7 8 /* 9 * Closure is perhaps the most overused and abused term in computer science, but 10 * since I've been unable to come up with anything better you're stuck with it 11 * again. 12 * 13 * What are closures? 14 * 15 * They embed a refcount. The basic idea is they count "things that are in 16 * progress" - in flight bios, some other thread that's doing something else - 17 * anything you might want to wait on. 18 * 19 * The refcount may be manipulated with closure_get() and closure_put(). 20 * closure_put() is where many of the interesting things happen, when it causes 21 * the refcount to go to 0. 22 * 23 * Closures can be used to wait on things both synchronously and asynchronously, 24 * and synchronous and asynchronous use can be mixed without restriction. To 25 * wait synchronously, use closure_sync() - you will sleep until your closure's 26 * refcount hits 1. 27 * 28 * To wait asynchronously, use 29 * continue_at(cl, next_function, workqueue); 30 * 31 * passing it, as you might expect, the function to run when nothing is pending 32 * and the workqueue to run that function out of. 33 * 34 * continue_at() also, critically, is a macro that returns the calling function. 35 * There's good reason for this. 36 * 37 * To use safely closures asynchronously, they must always have a refcount while 38 * they are running owned by the thread that is running them. Otherwise, suppose 39 * you submit some bios and wish to have a function run when they all complete: 40 * 41 * foo_endio(struct bio *bio, int error) 42 * { 43 * closure_put(cl); 44 * } 45 * 46 * closure_init(cl); 47 * 48 * do_stuff(); 49 * closure_get(cl); 50 * bio1->bi_endio = foo_endio; 51 * bio_submit(bio1); 52 * 53 * do_more_stuff(); 54 * closure_get(cl); 55 * bio2->bi_endio = foo_endio; 56 * bio_submit(bio2); 57 * 58 * continue_at(cl, complete_some_read, system_wq); 59 * 60 * If closure's refcount started at 0, complete_some_read() could run before the 61 * second bio was submitted - which is almost always not what you want! More 62 * importantly, it wouldn't be possible to say whether the original thread or 63 * complete_some_read()'s thread owned the closure - and whatever state it was 64 * associated with! 65 * 66 * So, closure_init() initializes a closure's refcount to 1 - and when a 67 * closure_fn is run, the refcount will be reset to 1 first. 68 * 69 * Then, the rule is - if you got the refcount with closure_get(), release it 70 * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount 71 * on a closure because you called closure_init() or you were run out of a 72 * closure - _always_ use continue_at(). Doing so consistently will help 73 * eliminate an entire class of particularly pernicious races. 74 * 75 * For a closure to wait on an arbitrary event, we need to introduce waitlists: 76 * 77 * struct closure_waitlist list; 78 * closure_wait_event(list, cl, condition); 79 * closure_wake_up(wait_list); 80 * 81 * These work analagously to wait_event() and wake_up() - except that instead of 82 * operating on the current thread (for wait_event()) and lists of threads, they 83 * operate on an explicit closure and lists of closures. 84 * 85 * Because it's a closure we can now wait either synchronously or 86 * asynchronously. closure_wait_event() returns the current value of the 87 * condition, and if it returned false continue_at() or closure_sync() can be 88 * used to wait for it to become true. 89 * 90 * It's useful for waiting on things when you can't sleep in the context in 91 * which you must check the condition (perhaps a spinlock held, or you might be 92 * beneath generic_make_request() - in which case you can't sleep on IO). 93 * 94 * closure_wait_event() will wait either synchronously or asynchronously, 95 * depending on whether the closure is in blocking mode or not. You can pick a 96 * mode explicitly with closure_wait_event_sync() and 97 * closure_wait_event_async(), which do just what you might expect. 98 * 99 * Lastly, you might have a wait list dedicated to a specific event, and have no 100 * need for specifying the condition - you just want to wait until someone runs 101 * closure_wake_up() on the appropriate wait list. In that case, just use 102 * closure_wait(). It will return either true or false, depending on whether the 103 * closure was already on a wait list or not - a closure can only be on one wait 104 * list at a time. 105 * 106 * Parents: 107 * 108 * closure_init() takes two arguments - it takes the closure to initialize, and 109 * a (possibly null) parent. 110 * 111 * If parent is non null, the new closure will have a refcount for its lifetime; 112 * a closure is considered to be "finished" when its refcount hits 0 and the 113 * function to run is null. Hence 114 * 115 * continue_at(cl, NULL, NULL); 116 * 117 * returns up the (spaghetti) stack of closures, precisely like normal return 118 * returns up the C stack. continue_at() with non null fn is better thought of 119 * as doing a tail call. 120 * 121 * All this implies that a closure should typically be embedded in a particular 122 * struct (which its refcount will normally control the lifetime of), and that 123 * struct can very much be thought of as a stack frame. 124 * 125 * Locking: 126 * 127 * Closures are based on work items but they can be thought of as more like 128 * threads - in that like threads and unlike work items they have a well 129 * defined lifetime; they are created (with closure_init()) and eventually 130 * complete after a continue_at(cl, NULL, NULL). 131 * 132 * Suppose you've got some larger structure with a closure embedded in it that's 133 * used for periodically doing garbage collection. You only want one garbage 134 * collection happening at a time, so the natural thing to do is protect it with 135 * a lock. However, it's difficult to use a lock protecting a closure correctly 136 * because the unlock should come after the last continue_to() (additionally, if 137 * you're using the closure asynchronously a mutex won't work since a mutex has 138 * to be unlocked by the same process that locked it). 139 * 140 * So to make it less error prone and more efficient, we also have the ability 141 * to use closures as locks: 142 * 143 * closure_init_unlocked(); 144 * closure_trylock(); 145 * 146 * That's all we need for trylock() - the last closure_put() implicitly unlocks 147 * it for you. But for closure_lock(), we also need a wait list: 148 * 149 * struct closure_with_waitlist frobnicator_cl; 150 * 151 * closure_init_unlocked(&frobnicator_cl); 152 * closure_lock(&frobnicator_cl); 153 * 154 * A closure_with_waitlist embeds a closure and a wait list - much like struct 155 * delayed_work embeds a work item and a timer_list. The important thing is, use 156 * it exactly like you would a regular closure and closure_put() will magically 157 * handle everything for you. 158 */ 159 160 struct closure; 161 typedef void (closure_fn) (struct closure *); 162 163 struct closure_waitlist { 164 struct llist_head list; 165 }; 166 167 enum closure_type { 168 TYPE_closure = 0, 169 TYPE_closure_with_waitlist = 1, 170 MAX_CLOSURE_TYPE = 1, 171 }; 172 173 enum closure_state { 174 /* 175 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by 176 * the thread that owns the closure, and cleared by the thread that's 177 * waking up the closure. 178 * 179 * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep 180 * - indicates that cl->task is valid and closure_put() may wake it up. 181 * Only set or cleared by the thread that owns the closure. 182 * 183 * The rest are for debugging and don't affect behaviour: 184 * 185 * CLOSURE_RUNNING: Set when a closure is running (i.e. by 186 * closure_init() and when closure_put() runs then next function), and 187 * must be cleared before remaining hits 0. Primarily to help guard 188 * against incorrect usage and accidentally transferring references. 189 * continue_at() and closure_return() clear it for you, if you're doing 190 * something unusual you can use closure_set_dead() which also helps 191 * annotate where references are being transferred. 192 * 193 * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a 194 * closure with this flag set 195 */ 196 197 CLOSURE_BITS_START = (1 << 23), 198 CLOSURE_DESTRUCTOR = (1 << 23), 199 CLOSURE_WAITING = (1 << 25), 200 CLOSURE_SLEEPING = (1 << 27), 201 CLOSURE_RUNNING = (1 << 29), 202 CLOSURE_STACK = (1 << 31), 203 }; 204 205 #define CLOSURE_GUARD_MASK \ 206 ((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING| \ 207 CLOSURE_RUNNING|CLOSURE_STACK) << 1) 208 209 #define CLOSURE_REMAINING_MASK (CLOSURE_BITS_START - 1) 210 #define CLOSURE_REMAINING_INITIALIZER (1|CLOSURE_RUNNING) 211 212 struct closure { 213 union { 214 struct { 215 struct workqueue_struct *wq; 216 struct task_struct *task; 217 struct llist_node list; 218 closure_fn *fn; 219 }; 220 struct work_struct work; 221 }; 222 223 struct closure *parent; 224 225 atomic_t remaining; 226 227 enum closure_type type; 228 229 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG 230 #define CLOSURE_MAGIC_DEAD 0xc054dead 231 #define CLOSURE_MAGIC_ALIVE 0xc054a11e 232 233 unsigned magic; 234 struct list_head all; 235 unsigned long ip; 236 unsigned long waiting_on; 237 #endif 238 }; 239 240 struct closure_with_waitlist { 241 struct closure cl; 242 struct closure_waitlist wait; 243 }; 244 245 extern unsigned invalid_closure_type(void); 246 247 #define __CLOSURE_TYPE(cl, _t) \ 248 __builtin_types_compatible_p(typeof(cl), struct _t) \ 249 ? TYPE_ ## _t : \ 250 251 #define __closure_type(cl) \ 252 ( \ 253 __CLOSURE_TYPE(cl, closure) \ 254 __CLOSURE_TYPE(cl, closure_with_waitlist) \ 255 invalid_closure_type() \ 256 ) 257 258 void closure_sub(struct closure *cl, int v); 259 void closure_put(struct closure *cl); 260 void __closure_wake_up(struct closure_waitlist *list); 261 bool closure_wait(struct closure_waitlist *list, struct closure *cl); 262 void closure_sync(struct closure *cl); 263 264 bool closure_trylock(struct closure *cl, struct closure *parent); 265 void __closure_lock(struct closure *cl, struct closure *parent, 266 struct closure_waitlist *wait_list); 267 268 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG 269 270 void closure_debug_init(void); 271 void closure_debug_create(struct closure *cl); 272 void closure_debug_destroy(struct closure *cl); 273 274 #else 275 276 static inline void closure_debug_init(void) {} 277 static inline void closure_debug_create(struct closure *cl) {} 278 static inline void closure_debug_destroy(struct closure *cl) {} 279 280 #endif 281 282 static inline void closure_set_ip(struct closure *cl) 283 { 284 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG 285 cl->ip = _THIS_IP_; 286 #endif 287 } 288 289 static inline void closure_set_ret_ip(struct closure *cl) 290 { 291 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG 292 cl->ip = _RET_IP_; 293 #endif 294 } 295 296 static inline void closure_get(struct closure *cl) 297 { 298 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG 299 BUG_ON((atomic_inc_return(&cl->remaining) & 300 CLOSURE_REMAINING_MASK) <= 1); 301 #else 302 atomic_inc(&cl->remaining); 303 #endif 304 } 305 306 static inline void closure_set_stopped(struct closure *cl) 307 { 308 atomic_sub(CLOSURE_RUNNING, &cl->remaining); 309 } 310 311 static inline bool closure_is_unlocked(struct closure *cl) 312 { 313 return atomic_read(&cl->remaining) == -1; 314 } 315 316 static inline void do_closure_init(struct closure *cl, struct closure *parent, 317 bool running) 318 { 319 cl->parent = parent; 320 if (parent) 321 closure_get(parent); 322 323 if (running) { 324 closure_debug_create(cl); 325 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER); 326 } else 327 atomic_set(&cl->remaining, -1); 328 329 closure_set_ip(cl); 330 } 331 332 /* 333 * Hack to get at the embedded closure if there is one, by doing an unsafe cast: 334 * the result of __closure_type() is thrown away, it's used merely for type 335 * checking. 336 */ 337 #define __to_internal_closure(cl) \ 338 ({ \ 339 BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE); \ 340 (struct closure *) cl; \ 341 }) 342 343 #define closure_init_type(cl, parent, running) \ 344 do { \ 345 struct closure *_cl = __to_internal_closure(cl); \ 346 _cl->type = __closure_type(*(cl)); \ 347 do_closure_init(_cl, parent, running); \ 348 } while (0) 349 350 /** 351 * __closure_init() - Initialize a closure, skipping the memset() 352 * 353 * May be used instead of closure_init() when memory has already been zeroed. 354 */ 355 #define __closure_init(cl, parent) \ 356 closure_init_type(cl, parent, true) 357 358 /** 359 * closure_init() - Initialize a closure, setting the refcount to 1 360 * @cl: closure to initialize 361 * @parent: parent of the new closure. cl will take a refcount on it for its 362 * lifetime; may be NULL. 363 */ 364 #define closure_init(cl, parent) \ 365 do { \ 366 memset((cl), 0, sizeof(*(cl))); \ 367 __closure_init(cl, parent); \ 368 } while (0) 369 370 static inline void closure_init_stack(struct closure *cl) 371 { 372 memset(cl, 0, sizeof(struct closure)); 373 atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK); 374 } 375 376 /** 377 * closure_init_unlocked() - Initialize a closure but leave it unlocked. 378 * @cl: closure to initialize 379 * 380 * For when the closure will be used as a lock. The closure may not be used 381 * until after a closure_lock() or closure_trylock(). 382 */ 383 #define closure_init_unlocked(cl) \ 384 do { \ 385 memset((cl), 0, sizeof(*(cl))); \ 386 closure_init_type(cl, NULL, false); \ 387 } while (0) 388 389 /** 390 * closure_lock() - lock and initialize a closure. 391 * @cl: the closure to lock 392 * @parent: the new parent for this closure 393 * 394 * The closure must be of one of the types that has a waitlist (otherwise we 395 * wouldn't be able to sleep on contention). 396 * 397 * @parent has exactly the same meaning as in closure_init(); if non null, the 398 * closure will take a reference on @parent which will be released when it is 399 * unlocked. 400 */ 401 #define closure_lock(cl, parent) \ 402 __closure_lock(__to_internal_closure(cl), parent, &(cl)->wait) 403 404 static inline void __closure_end_sleep(struct closure *cl) 405 { 406 __set_current_state(TASK_RUNNING); 407 408 if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING) 409 atomic_sub(CLOSURE_SLEEPING, &cl->remaining); 410 } 411 412 static inline void __closure_start_sleep(struct closure *cl) 413 { 414 closure_set_ip(cl); 415 cl->task = current; 416 set_current_state(TASK_UNINTERRUPTIBLE); 417 418 if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING)) 419 atomic_add(CLOSURE_SLEEPING, &cl->remaining); 420 } 421 422 /** 423 * closure_wake_up() - wake up all closures on a wait list. 424 */ 425 static inline void closure_wake_up(struct closure_waitlist *list) 426 { 427 smp_mb(); 428 __closure_wake_up(list); 429 } 430 431 /* 432 * Wait on an event, synchronously or asynchronously - analogous to wait_event() 433 * but for closures. 434 * 435 * The loop is oddly structured so as to avoid a race; we must check the 436 * condition again after we've added ourself to the waitlist. We know if we were 437 * already on the waitlist because closure_wait() returns false; thus, we only 438 * schedule or break if closure_wait() returns false. If it returns true, we 439 * just loop again - rechecking the condition. 440 * 441 * The __closure_wake_up() is necessary because we may race with the event 442 * becoming true; i.e. we see event false -> wait -> recheck condition, but the 443 * thread that made the event true may have called closure_wake_up() before we 444 * added ourself to the wait list. 445 * 446 * We have to call closure_sync() at the end instead of just 447 * __closure_end_sleep() because a different thread might've called 448 * closure_wake_up() before us and gotten preempted before they dropped the 449 * refcount on our closure. If this was a stack allocated closure, that would be 450 * bad. 451 */ 452 #define closure_wait_event(list, cl, condition) \ 453 ({ \ 454 typeof(condition) ret; \ 455 \ 456 while (1) { \ 457 ret = (condition); \ 458 if (ret) { \ 459 __closure_wake_up(list); \ 460 closure_sync(cl); \ 461 break; \ 462 } \ 463 \ 464 __closure_start_sleep(cl); \ 465 \ 466 if (!closure_wait(list, cl)) \ 467 schedule(); \ 468 } \ 469 \ 470 ret; \ 471 }) 472 473 static inline void closure_queue(struct closure *cl) 474 { 475 struct workqueue_struct *wq = cl->wq; 476 if (wq) { 477 INIT_WORK(&cl->work, cl->work.func); 478 BUG_ON(!queue_work(wq, &cl->work)); 479 } else 480 cl->fn(cl); 481 } 482 483 static inline void set_closure_fn(struct closure *cl, closure_fn *fn, 484 struct workqueue_struct *wq) 485 { 486 BUG_ON(object_is_on_stack(cl)); 487 closure_set_ip(cl); 488 cl->fn = fn; 489 cl->wq = wq; 490 /* between atomic_dec() in closure_put() */ 491 smp_mb__before_atomic_dec(); 492 } 493 494 #define continue_at(_cl, _fn, _wq) \ 495 do { \ 496 set_closure_fn(_cl, _fn, _wq); \ 497 closure_sub(_cl, CLOSURE_RUNNING + 1); \ 498 return; \ 499 } while (0) 500 501 #define closure_return(_cl) continue_at((_cl), NULL, NULL) 502 503 #define continue_at_nobarrier(_cl, _fn, _wq) \ 504 do { \ 505 set_closure_fn(_cl, _fn, _wq); \ 506 closure_queue(_cl); \ 507 return; \ 508 } while (0) 509 510 #define closure_return_with_destructor(_cl, _destructor) \ 511 do { \ 512 set_closure_fn(_cl, _destructor, NULL); \ 513 closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1); \ 514 return; \ 515 } while (0) 516 517 static inline void closure_call(struct closure *cl, closure_fn fn, 518 struct workqueue_struct *wq, 519 struct closure *parent) 520 { 521 closure_init(cl, parent); 522 continue_at_nobarrier(cl, fn, wq); 523 } 524 525 static inline void closure_trylock_call(struct closure *cl, closure_fn fn, 526 struct workqueue_struct *wq, 527 struct closure *parent) 528 { 529 if (closure_trylock(cl, parent)) 530 continue_at_nobarrier(cl, fn, wq); 531 } 532 533 #endif /* _LINUX_CLOSURE_H */ 534