xref: /openbmc/linux/drivers/md/bcache/closure.h (revision 79f08d9e)
1 #ifndef _LINUX_CLOSURE_H
2 #define _LINUX_CLOSURE_H
3 
4 #include <linux/llist.h>
5 #include <linux/sched.h>
6 #include <linux/workqueue.h>
7 
8 /*
9  * Closure is perhaps the most overused and abused term in computer science, but
10  * since I've been unable to come up with anything better you're stuck with it
11  * again.
12  *
13  * What are closures?
14  *
15  * They embed a refcount. The basic idea is they count "things that are in
16  * progress" - in flight bios, some other thread that's doing something else -
17  * anything you might want to wait on.
18  *
19  * The refcount may be manipulated with closure_get() and closure_put().
20  * closure_put() is where many of the interesting things happen, when it causes
21  * the refcount to go to 0.
22  *
23  * Closures can be used to wait on things both synchronously and asynchronously,
24  * and synchronous and asynchronous use can be mixed without restriction. To
25  * wait synchronously, use closure_sync() - you will sleep until your closure's
26  * refcount hits 1.
27  *
28  * To wait asynchronously, use
29  *   continue_at(cl, next_function, workqueue);
30  *
31  * passing it, as you might expect, the function to run when nothing is pending
32  * and the workqueue to run that function out of.
33  *
34  * continue_at() also, critically, is a macro that returns the calling function.
35  * There's good reason for this.
36  *
37  * To use safely closures asynchronously, they must always have a refcount while
38  * they are running owned by the thread that is running them. Otherwise, suppose
39  * you submit some bios and wish to have a function run when they all complete:
40  *
41  * foo_endio(struct bio *bio, int error)
42  * {
43  *	closure_put(cl);
44  * }
45  *
46  * closure_init(cl);
47  *
48  * do_stuff();
49  * closure_get(cl);
50  * bio1->bi_endio = foo_endio;
51  * bio_submit(bio1);
52  *
53  * do_more_stuff();
54  * closure_get(cl);
55  * bio2->bi_endio = foo_endio;
56  * bio_submit(bio2);
57  *
58  * continue_at(cl, complete_some_read, system_wq);
59  *
60  * If closure's refcount started at 0, complete_some_read() could run before the
61  * second bio was submitted - which is almost always not what you want! More
62  * importantly, it wouldn't be possible to say whether the original thread or
63  * complete_some_read()'s thread owned the closure - and whatever state it was
64  * associated with!
65  *
66  * So, closure_init() initializes a closure's refcount to 1 - and when a
67  * closure_fn is run, the refcount will be reset to 1 first.
68  *
69  * Then, the rule is - if you got the refcount with closure_get(), release it
70  * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
71  * on a closure because you called closure_init() or you were run out of a
72  * closure - _always_ use continue_at(). Doing so consistently will help
73  * eliminate an entire class of particularly pernicious races.
74  *
75  * For a closure to wait on an arbitrary event, we need to introduce waitlists:
76  *
77  * struct closure_waitlist list;
78  * closure_wait_event(list, cl, condition);
79  * closure_wake_up(wait_list);
80  *
81  * These work analagously to wait_event() and wake_up() - except that instead of
82  * operating on the current thread (for wait_event()) and lists of threads, they
83  * operate on an explicit closure and lists of closures.
84  *
85  * Because it's a closure we can now wait either synchronously or
86  * asynchronously. closure_wait_event() returns the current value of the
87  * condition, and if it returned false continue_at() or closure_sync() can be
88  * used to wait for it to become true.
89  *
90  * It's useful for waiting on things when you can't sleep in the context in
91  * which you must check the condition (perhaps a spinlock held, or you might be
92  * beneath generic_make_request() - in which case you can't sleep on IO).
93  *
94  * closure_wait_event() will wait either synchronously or asynchronously,
95  * depending on whether the closure is in blocking mode or not. You can pick a
96  * mode explicitly with closure_wait_event_sync() and
97  * closure_wait_event_async(), which do just what you might expect.
98  *
99  * Lastly, you might have a wait list dedicated to a specific event, and have no
100  * need for specifying the condition - you just want to wait until someone runs
101  * closure_wake_up() on the appropriate wait list. In that case, just use
102  * closure_wait(). It will return either true or false, depending on whether the
103  * closure was already on a wait list or not - a closure can only be on one wait
104  * list at a time.
105  *
106  * Parents:
107  *
108  * closure_init() takes two arguments - it takes the closure to initialize, and
109  * a (possibly null) parent.
110  *
111  * If parent is non null, the new closure will have a refcount for its lifetime;
112  * a closure is considered to be "finished" when its refcount hits 0 and the
113  * function to run is null. Hence
114  *
115  * continue_at(cl, NULL, NULL);
116  *
117  * returns up the (spaghetti) stack of closures, precisely like normal return
118  * returns up the C stack. continue_at() with non null fn is better thought of
119  * as doing a tail call.
120  *
121  * All this implies that a closure should typically be embedded in a particular
122  * struct (which its refcount will normally control the lifetime of), and that
123  * struct can very much be thought of as a stack frame.
124  *
125  * Locking:
126  *
127  * Closures are based on work items but they can be thought of as more like
128  * threads - in that like threads and unlike work items they have a well
129  * defined lifetime; they are created (with closure_init()) and eventually
130  * complete after a continue_at(cl, NULL, NULL).
131  *
132  * Suppose you've got some larger structure with a closure embedded in it that's
133  * used for periodically doing garbage collection. You only want one garbage
134  * collection happening at a time, so the natural thing to do is protect it with
135  * a lock. However, it's difficult to use a lock protecting a closure correctly
136  * because the unlock should come after the last continue_to() (additionally, if
137  * you're using the closure asynchronously a mutex won't work since a mutex has
138  * to be unlocked by the same process that locked it).
139  *
140  * So to make it less error prone and more efficient, we also have the ability
141  * to use closures as locks:
142  *
143  * closure_init_unlocked();
144  * closure_trylock();
145  *
146  * That's all we need for trylock() - the last closure_put() implicitly unlocks
147  * it for you.  But for closure_lock(), we also need a wait list:
148  *
149  * struct closure_with_waitlist frobnicator_cl;
150  *
151  * closure_init_unlocked(&frobnicator_cl);
152  * closure_lock(&frobnicator_cl);
153  *
154  * A closure_with_waitlist embeds a closure and a wait list - much like struct
155  * delayed_work embeds a work item and a timer_list. The important thing is, use
156  * it exactly like you would a regular closure and closure_put() will magically
157  * handle everything for you.
158  */
159 
160 struct closure;
161 typedef void (closure_fn) (struct closure *);
162 
163 struct closure_waitlist {
164 	struct llist_head	list;
165 };
166 
167 enum closure_type {
168 	TYPE_closure				= 0,
169 	TYPE_closure_with_waitlist		= 1,
170 	MAX_CLOSURE_TYPE			= 1,
171 };
172 
173 enum closure_state {
174 	/*
175 	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
176 	 * the thread that owns the closure, and cleared by the thread that's
177 	 * waking up the closure.
178 	 *
179 	 * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
180 	 * - indicates that cl->task is valid and closure_put() may wake it up.
181 	 * Only set or cleared by the thread that owns the closure.
182 	 *
183 	 * The rest are for debugging and don't affect behaviour:
184 	 *
185 	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
186 	 * closure_init() and when closure_put() runs then next function), and
187 	 * must be cleared before remaining hits 0. Primarily to help guard
188 	 * against incorrect usage and accidentally transferring references.
189 	 * continue_at() and closure_return() clear it for you, if you're doing
190 	 * something unusual you can use closure_set_dead() which also helps
191 	 * annotate where references are being transferred.
192 	 *
193 	 * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
194 	 * closure with this flag set
195 	 */
196 
197 	CLOSURE_BITS_START	= (1 << 23),
198 	CLOSURE_DESTRUCTOR	= (1 << 23),
199 	CLOSURE_WAITING		= (1 << 25),
200 	CLOSURE_SLEEPING	= (1 << 27),
201 	CLOSURE_RUNNING		= (1 << 29),
202 	CLOSURE_STACK		= (1 << 31),
203 };
204 
205 #define CLOSURE_GUARD_MASK					\
206 	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING|	\
207 	  CLOSURE_RUNNING|CLOSURE_STACK) << 1)
208 
209 #define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
210 #define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)
211 
212 struct closure {
213 	union {
214 		struct {
215 			struct workqueue_struct *wq;
216 			struct task_struct	*task;
217 			struct llist_node	list;
218 			closure_fn		*fn;
219 		};
220 		struct work_struct	work;
221 	};
222 
223 	struct closure		*parent;
224 
225 	atomic_t		remaining;
226 
227 	enum closure_type	type;
228 
229 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
230 #define CLOSURE_MAGIC_DEAD	0xc054dead
231 #define CLOSURE_MAGIC_ALIVE	0xc054a11e
232 
233 	unsigned		magic;
234 	struct list_head	all;
235 	unsigned long		ip;
236 	unsigned long		waiting_on;
237 #endif
238 };
239 
240 struct closure_with_waitlist {
241 	struct closure		cl;
242 	struct closure_waitlist	wait;
243 };
244 
245 extern unsigned invalid_closure_type(void);
246 
247 #define __CLOSURE_TYPE(cl, _t)						\
248 	  __builtin_types_compatible_p(typeof(cl), struct _t)		\
249 		? TYPE_ ## _t :						\
250 
251 #define __closure_type(cl)						\
252 (									\
253 	__CLOSURE_TYPE(cl, closure)					\
254 	__CLOSURE_TYPE(cl, closure_with_waitlist)			\
255 	invalid_closure_type()						\
256 )
257 
258 void closure_sub(struct closure *cl, int v);
259 void closure_put(struct closure *cl);
260 void __closure_wake_up(struct closure_waitlist *list);
261 bool closure_wait(struct closure_waitlist *list, struct closure *cl);
262 void closure_sync(struct closure *cl);
263 
264 bool closure_trylock(struct closure *cl, struct closure *parent);
265 void __closure_lock(struct closure *cl, struct closure *parent,
266 		    struct closure_waitlist *wait_list);
267 
268 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
269 
270 void closure_debug_init(void);
271 void closure_debug_create(struct closure *cl);
272 void closure_debug_destroy(struct closure *cl);
273 
274 #else
275 
276 static inline void closure_debug_init(void) {}
277 static inline void closure_debug_create(struct closure *cl) {}
278 static inline void closure_debug_destroy(struct closure *cl) {}
279 
280 #endif
281 
282 static inline void closure_set_ip(struct closure *cl)
283 {
284 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
285 	cl->ip = _THIS_IP_;
286 #endif
287 }
288 
289 static inline void closure_set_ret_ip(struct closure *cl)
290 {
291 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
292 	cl->ip = _RET_IP_;
293 #endif
294 }
295 
296 static inline void closure_get(struct closure *cl)
297 {
298 #ifdef CONFIG_BCACHE_CLOSURES_DEBUG
299 	BUG_ON((atomic_inc_return(&cl->remaining) &
300 		CLOSURE_REMAINING_MASK) <= 1);
301 #else
302 	atomic_inc(&cl->remaining);
303 #endif
304 }
305 
306 static inline void closure_set_stopped(struct closure *cl)
307 {
308 	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
309 }
310 
311 static inline bool closure_is_unlocked(struct closure *cl)
312 {
313 	return atomic_read(&cl->remaining) == -1;
314 }
315 
316 static inline void do_closure_init(struct closure *cl, struct closure *parent,
317 				   bool running)
318 {
319 	cl->parent = parent;
320 	if (parent)
321 		closure_get(parent);
322 
323 	if (running) {
324 		closure_debug_create(cl);
325 		atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
326 	} else
327 		atomic_set(&cl->remaining, -1);
328 
329 	closure_set_ip(cl);
330 }
331 
332 /*
333  * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
334  * the result of __closure_type() is thrown away, it's used merely for type
335  * checking.
336  */
337 #define __to_internal_closure(cl)				\
338 ({								\
339 	BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE);	\
340 	(struct closure *) cl;					\
341 })
342 
343 #define closure_init_type(cl, parent, running)			\
344 do {								\
345 	struct closure *_cl = __to_internal_closure(cl);	\
346 	_cl->type = __closure_type(*(cl));			\
347 	do_closure_init(_cl, parent, running);			\
348 } while (0)
349 
350 /**
351  * __closure_init() - Initialize a closure, skipping the memset()
352  *
353  * May be used instead of closure_init() when memory has already been zeroed.
354  */
355 #define __closure_init(cl, parent)				\
356 	closure_init_type(cl, parent, true)
357 
358 /**
359  * closure_init() - Initialize a closure, setting the refcount to 1
360  * @cl:		closure to initialize
361  * @parent:	parent of the new closure. cl will take a refcount on it for its
362  *		lifetime; may be NULL.
363  */
364 #define closure_init(cl, parent)				\
365 do {								\
366 	memset((cl), 0, sizeof(*(cl)));				\
367 	__closure_init(cl, parent);				\
368 } while (0)
369 
370 static inline void closure_init_stack(struct closure *cl)
371 {
372 	memset(cl, 0, sizeof(struct closure));
373 	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
374 }
375 
376 /**
377  * closure_init_unlocked() - Initialize a closure but leave it unlocked.
378  * @cl:		closure to initialize
379  *
380  * For when the closure will be used as a lock. The closure may not be used
381  * until after a closure_lock() or closure_trylock().
382  */
383 #define closure_init_unlocked(cl)				\
384 do {								\
385 	memset((cl), 0, sizeof(*(cl)));				\
386 	closure_init_type(cl, NULL, false);			\
387 } while (0)
388 
389 /**
390  * closure_lock() - lock and initialize a closure.
391  * @cl:		the closure to lock
392  * @parent:	the new parent for this closure
393  *
394  * The closure must be of one of the types that has a waitlist (otherwise we
395  * wouldn't be able to sleep on contention).
396  *
397  * @parent has exactly the same meaning as in closure_init(); if non null, the
398  * closure will take a reference on @parent which will be released when it is
399  * unlocked.
400  */
401 #define closure_lock(cl, parent)				\
402 	__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)
403 
404 static inline void __closure_end_sleep(struct closure *cl)
405 {
406 	__set_current_state(TASK_RUNNING);
407 
408 	if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
409 		atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
410 }
411 
412 static inline void __closure_start_sleep(struct closure *cl)
413 {
414 	closure_set_ip(cl);
415 	cl->task = current;
416 	set_current_state(TASK_UNINTERRUPTIBLE);
417 
418 	if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
419 		atomic_add(CLOSURE_SLEEPING, &cl->remaining);
420 }
421 
422 /**
423  * closure_wake_up() - wake up all closures on a wait list.
424  */
425 static inline void closure_wake_up(struct closure_waitlist *list)
426 {
427 	smp_mb();
428 	__closure_wake_up(list);
429 }
430 
431 /*
432  * Wait on an event, synchronously or asynchronously - analogous to wait_event()
433  * but for closures.
434  *
435  * The loop is oddly structured so as to avoid a race; we must check the
436  * condition again after we've added ourself to the waitlist. We know if we were
437  * already on the waitlist because closure_wait() returns false; thus, we only
438  * schedule or break if closure_wait() returns false. If it returns true, we
439  * just loop again - rechecking the condition.
440  *
441  * The __closure_wake_up() is necessary because we may race with the event
442  * becoming true; i.e. we see event false -> wait -> recheck condition, but the
443  * thread that made the event true may have called closure_wake_up() before we
444  * added ourself to the wait list.
445  *
446  * We have to call closure_sync() at the end instead of just
447  * __closure_end_sleep() because a different thread might've called
448  * closure_wake_up() before us and gotten preempted before they dropped the
449  * refcount on our closure. If this was a stack allocated closure, that would be
450  * bad.
451  */
452 #define closure_wait_event(list, cl, condition)				\
453 ({									\
454 	typeof(condition) ret;						\
455 									\
456 	while (1) {							\
457 		ret = (condition);					\
458 		if (ret) {						\
459 			__closure_wake_up(list);			\
460 			closure_sync(cl);				\
461 			break;						\
462 		}							\
463 									\
464 		__closure_start_sleep(cl);				\
465 									\
466 		if (!closure_wait(list, cl))				\
467 			schedule();					\
468 	}								\
469 									\
470 	ret;								\
471 })
472 
473 static inline void closure_queue(struct closure *cl)
474 {
475 	struct workqueue_struct *wq = cl->wq;
476 	if (wq) {
477 		INIT_WORK(&cl->work, cl->work.func);
478 		BUG_ON(!queue_work(wq, &cl->work));
479 	} else
480 		cl->fn(cl);
481 }
482 
483 static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
484 				  struct workqueue_struct *wq)
485 {
486 	BUG_ON(object_is_on_stack(cl));
487 	closure_set_ip(cl);
488 	cl->fn = fn;
489 	cl->wq = wq;
490 	/* between atomic_dec() in closure_put() */
491 	smp_mb__before_atomic_dec();
492 }
493 
494 #define continue_at(_cl, _fn, _wq)					\
495 do {									\
496 	set_closure_fn(_cl, _fn, _wq);					\
497 	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
498 	return;								\
499 } while (0)
500 
501 #define closure_return(_cl)	continue_at((_cl), NULL, NULL)
502 
503 #define continue_at_nobarrier(_cl, _fn, _wq)				\
504 do {									\
505 	set_closure_fn(_cl, _fn, _wq);					\
506 	closure_queue(_cl);						\
507 	return;								\
508 } while (0)
509 
510 #define closure_return_with_destructor(_cl, _destructor)		\
511 do {									\
512 	set_closure_fn(_cl, _destructor, NULL);				\
513 	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
514 	return;								\
515 } while (0)
516 
517 static inline void closure_call(struct closure *cl, closure_fn fn,
518 				struct workqueue_struct *wq,
519 				struct closure *parent)
520 {
521 	closure_init(cl, parent);
522 	continue_at_nobarrier(cl, fn, wq);
523 }
524 
525 static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
526 					struct workqueue_struct *wq,
527 					struct closure *parent)
528 {
529 	if (closure_trylock(cl, parent))
530 		continue_at_nobarrier(cl, fn, wq);
531 }
532 
533 #endif /* _LINUX_CLOSURE_H */
534