md/bcache/closure.h

#ifndef _LINUX_CLOSURE_H
#define _LINUX_CLOSURE_H

#include <linux/llist.h>
#include <linux/sched.h>
#include <linux/workqueue.h>

/*
 * Closure is perhaps the most overused and abused term in computer science, but
 * since I've been unable to come up with anything better you're stuck with it
 * again.
 *
 * What are closures?
 *
 * They embed a refcount. The basic idea is they count "things that are in
 * progress" - in flight bios, some other thread that's doing something else -
 * anything you might want to wait on.
 *
 * The refcount may be manipulated with closure_get() and closure_put().
 * closure_put() is where many of the interesting things happen, when it causes
 * the refcount to go to 0.
 *
 * Closures can be used to wait on things both synchronously and asynchronously,
 * and synchronous and asynchronous use can be mixed without restriction. To
 * wait synchronously, use closure_sync() - you will sleep until your closure's
 * refcount hits 1.
 *
 * To wait asynchronously, use
 *   continue_at(cl, next_function, workqueue);
 *
 * passing it, as you might expect, the function to run when nothing is pending
 * and the workqueue to run that function out of.
 *
 * continue_at() also, critically, is a macro that returns the calling function.
 * There's good reason for this.
 *
 * To use safely closures asynchronously, they must always have a refcount while
 * they are running owned by the thread that is running them. Otherwise, suppose
 * you submit some bios and wish to have a function run when they all complete:
 *
 * foo_endio(struct bio *bio, int error)
 * {
 *	closure_put(cl);
 * }
 *
 * closure_init(cl);
 *
 * do_stuff();
 * closure_get(cl);
 * bio1->bi_endio = foo_endio;
 * bio_submit(bio1);
 *
 * do_more_stuff();
 * closure_get(cl);
 * bio2->bi_endio = foo_endio;
 * bio_submit(bio2);
 *
 * continue_at(cl, complete_some_read, system_wq);
 *
 * If closure's refcount started at 0, complete_some_read() could run before the
 * second bio was submitted - which is almost always not what you want! More
 * importantly, it wouldn't be possible to say whether the original thread or
 * complete_some_read()'s thread owned the closure - and whatever state it was
 * associated with!
 *
 * So, closure_init() initializes a closure's refcount to 1 - and when a
 * closure_fn is run, the refcount will be reset to 1 first.
 *
 * Then, the rule is - if you got the refcount with closure_get(), release it
 * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
 * on a closure because you called closure_init() or you were run out of a
 * closure - _always_ use continue_at(). Doing so consistently will help
 * eliminate an entire class of particularly pernicious races.
 *
 * For a closure to wait on an arbitrary event, we need to introduce waitlists:
 *
 * struct closure_waitlist list;
 * closure_wait_event(list, cl, condition);
 * closure_wake_up(wait_list);
 *
 * These work analagously to wait_event() and wake_up() - except that instead of
 * operating on the current thread (for wait_event()) and lists of threads, they
 * operate on an explicit closure and lists of closures.
 *
 * Because it's a closure we can now wait either synchronously or
 * asynchronously. closure_wait_event() returns the current value of the
 * condition, and if it returned false continue_at() or closure_sync() can be
 * used to wait for it to become true.
 *
 * It's useful for waiting on things when you can't sleep in the context in
 * which you must check the condition (perhaps a spinlock held, or you might be
 * beneath generic_make_request() - in which case you can't sleep on IO).
 *
 * closure_wait_event() will wait either synchronously or asynchronously,
 * depending on whether the closure is in blocking mode or not. You can pick a
 * mode explicitly with closure_wait_event_sync() and
 * closure_wait_event_async(), which do just what you might expect.
 *
 * Lastly, you might have a wait list dedicated to a specific event, and have no
 * need for specifying the condition - you just want to wait until someone runs
 * closure_wake_up() on the appropriate wait list. In that case, just use
 * closure_wait(). It will return either true or false, depending on whether the
 * closure was already on a wait list or not - a closure can only be on one wait
 * list at a time.
 *
 * Parents:
 *
 * closure_init() takes two arguments - it takes the closure to initialize, and
 * a (possibly null) parent.
 *
 * If parent is non null, the new closure will have a refcount for its lifetime;
 * a closure is considered to be "finished" when its refcount hits 0 and the
 * function to run is null. Hence
 *
 * continue_at(cl, NULL, NULL);
 *
 * returns up the (spaghetti) stack of closures, precisely like normal return
 * returns up the C stack. continue_at() with non null fn is better thought of
 * as doing a tail call.
 *
 * All this implies that a closure should typically be embedded in a particular
 * struct (which its refcount will normally control the lifetime of), and that
 * struct can very much be thought of as a stack frame.
 *
 * Locking:
 *
 * Closures are based on work items but they can be thought of as more like
 * threads - in that like threads and unlike work items they have a well
 * defined lifetime; they are created (with closure_init()) and eventually
 * complete after a continue_at(cl, NULL, NULL).
 *
 * Suppose you've got some larger structure with a closure embedded in it that's
 * used for periodically doing garbage collection. You only want one garbage
 * collection happening at a time, so the natural thing to do is protect it with
 * a lock. However, it's difficult to use a lock protecting a closure correctly
 * because the unlock should come after the last continue_to() (additionally, if
 * you're using the closure asynchronously a mutex won't work since a mutex has
 * to be unlocked by the same process that locked it).
 *
 * So to make it less error prone and more efficient, we also have the ability
 * to use closures as locks:
 *
 * closure_init_unlocked();
 * closure_trylock();
 *
 * That's all we need for trylock() - the last closure_put() implicitly unlocks
 * it for you.  But for closure_lock(), we also need a wait list:
 *
 * struct closure_with_waitlist frobnicator_cl;
 *
 * closure_init_unlocked(&frobnicator_cl);
 * closure_lock(&frobnicator_cl);
 *
 * A closure_with_waitlist embeds a closure and a wait list - much like struct
 * delayed_work embeds a work item and a timer_list. The important thing is, use
 * it exactly like you would a regular closure and closure_put() will magically
 * handle everything for you.
 *
 * We've got closures that embed timers, too. They're called, appropriately
 * enough:
 * struct closure_with_timer;
 *
 * This gives you access to closure_delay(). It takes a refcount for a specified
 * number of jiffies - you could then call closure_sync() (for a slightly
 * convoluted version of msleep()) or continue_at() - which gives you the same
 * effect as using a delayed work item, except you can reuse the work_struct
 * already embedded in struct closure.
 *
 * Lastly, there's struct closure_with_waitlist_and_timer. It does what you
 * probably expect, if you happen to need the features of both. (You don't
 * really want to know how all this is implemented, but if I've done my job
 * right you shouldn't have to care).
 */

struct closure;
typedef void (closure_fn) (struct closure *);

struct closure_waitlist {
	struct llist_head	list;
};

enum closure_type {
	TYPE_closure				= 0,
	TYPE_closure_with_waitlist		= 1,
	TYPE_closure_with_timer			= 2,
	TYPE_closure_with_waitlist_and_timer	= 3,
	MAX_CLOSURE_TYPE			= 3,
};

enum closure_state {
	/*
	 * CLOSURE_BLOCKING: Causes closure_wait_event() to block, instead of
	 * waiting asynchronously
	 *
	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
	 * the thread that owns the closure, and cleared by the thread that's
	 * waking up the closure.
	 *
	 * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
	 * - indicates that cl->task is valid and closure_put() may wake it up.
	 * Only set or cleared by the thread that owns the closure.
	 *
	 * CLOSURE_TIMER: Analagous to CLOSURE_WAITING, indicates that a closure
	 * has an outstanding timer. Must be set by the thread that owns the
	 * closure, and cleared by the timer function when the timer goes off.
	 *
	 * The rest are for debugging and don't affect behaviour:
	 *
	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
	 * closure_init() and when closure_put() runs then next function), and
	 * must be cleared before remaining hits 0. Primarily to help guard
	 * against incorrect usage and accidentally transferring references.
	 * continue_at() and closure_return() clear it for you, if you're doing
	 * something unusual you can use closure_set_dead() which also helps
	 * annotate where references are being transferred.
	 *
	 * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
	 * closure with this flag set
	 */

	CLOSURE_BITS_START	= (1 << 19),
	CLOSURE_DESTRUCTOR	= (1 << 19),
	CLOSURE_BLOCKING	= (1 << 21),
	CLOSURE_WAITING		= (1 << 23),
	CLOSURE_SLEEPING	= (1 << 25),
	CLOSURE_TIMER		= (1 << 27),
	CLOSURE_RUNNING		= (1 << 29),
	CLOSURE_STACK		= (1 << 31),
};

#define CLOSURE_GUARD_MASK					\
	((CLOSURE_DESTRUCTOR|CLOSURE_BLOCKING|CLOSURE_WAITING|	\
	  CLOSURE_SLEEPING|CLOSURE_TIMER|CLOSURE_RUNNING|CLOSURE_STACK) << 1)

#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)

struct closure {
	union {
		struct {
			struct workqueue_struct *wq;
			struct task_struct	*task;
			struct llist_node	list;
			closure_fn		*fn;
		};
		struct work_struct	work;
	};

	struct closure		*parent;

	atomic_t		remaining;

	enum closure_type	type;

#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
#define CLOSURE_MAGIC_DEAD	0xc054dead
#define CLOSURE_MAGIC_ALIVE	0xc054a11e

	unsigned		magic;
	struct list_head	all;
	unsigned long		ip;
	unsigned long		waiting_on;
#endif
};

struct closure_with_waitlist {
	struct closure		cl;
	struct closure_waitlist	wait;
};

struct closure_with_timer {
	struct closure		cl;
	struct timer_list	timer;
};

struct closure_with_waitlist_and_timer {
	struct closure		cl;
	struct closure_waitlist	wait;
	struct timer_list	timer;
};

extern unsigned invalid_closure_type(void);

#define __CLOSURE_TYPE(cl, _t)						\
	  __builtin_types_compatible_p(typeof(cl), struct _t)		\
		? TYPE_ ## _t :						\

#define __closure_type(cl)						\
(									\
	__CLOSURE_TYPE(cl, closure)					\
	__CLOSURE_TYPE(cl, closure_with_waitlist)			\
	__CLOSURE_TYPE(cl, closure_with_timer)				\
	__CLOSURE_TYPE(cl, closure_with_waitlist_and_timer)		\
	invalid_closure_type()						\
)

void closure_sub(struct closure *cl, int v);
void closure_put(struct closure *cl);
void closure_queue(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void closure_sync(struct closure *cl);

bool closure_trylock(struct closure *cl, struct closure *parent);
void __closure_lock(struct closure *cl, struct closure *parent,
		    struct closure_waitlist *wait_list);

void do_closure_timer_init(struct closure *cl);
bool __closure_delay(struct closure *cl, unsigned long delay,
		     struct timer_list *timer);
void __closure_flush(struct closure *cl, struct timer_list *timer);
void __closure_flush_sync(struct closure *cl, struct timer_list *timer);

#ifdef CONFIG_BCACHE_CLOSURES_DEBUG

void closure_debug_init(void);
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);

#else

static inline void closure_debug_init(void) {}
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}

#endif

static inline void closure_set_ip(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
	cl->ip = _THIS_IP_;
#endif
}

static inline void closure_set_ret_ip(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
	cl->ip = _RET_IP_;
#endif
}

static inline void closure_get(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
	BUG_ON((atomic_inc_return(&cl->remaining) &
		CLOSURE_REMAINING_MASK) <= 1);
#else
	atomic_inc(&cl->remaining);
#endif
}

static inline void closure_set_stopped(struct closure *cl)
{
	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
}

static inline bool closure_is_stopped(struct closure *cl)
{
	return !(atomic_read(&cl->remaining) & CLOSURE_RUNNING);
}

static inline bool closure_is_unlocked(struct closure *cl)
{
	return atomic_read(&cl->remaining) == -1;
}

static inline void do_closure_init(struct closure *cl, struct closure *parent,
				   bool running)
{
	switch (cl->type) {
	case TYPE_closure_with_timer:
	case TYPE_closure_with_waitlist_and_timer:
		do_closure_timer_init(cl);
	default:
		break;
	}

	cl->parent = parent;
	if (parent)
		closure_get(parent);

	if (running) {
		closure_debug_create(cl);
		atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
	} else
		atomic_set(&cl->remaining, -1);

	closure_set_ip(cl);
}

/*
 * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
 * the result of __closure_type() is thrown away, it's used merely for type
 * checking.
 */
#define __to_internal_closure(cl)				\
({								\
	BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE);	\
	(struct closure *) cl;					\
})

#define closure_init_type(cl, parent, running)			\
do {								\
	struct closure *_cl = __to_internal_closure(cl);	\
	_cl->type = __closure_type(*(cl));			\
	do_closure_init(_cl, parent, running);			\
} while (0)

/**
 * __closure_init() - Initialize a closure, skipping the memset()
 *
 * May be used instead of closure_init() when memory has already been zeroed.
 */
#define __closure_init(cl, parent)				\
	closure_init_type(cl, parent, true)

/**
 * closure_init() - Initialize a closure, setting the refcount to 1
 * @cl:		closure to initialize
 * @parent:	parent of the new closure. cl will take a refcount on it for its
 *		lifetime; may be NULL.
 */
#define closure_init(cl, parent)				\
do {								\
	memset((cl), 0, sizeof(*(cl)));				\
	__closure_init(cl, parent);				\
} while (0)

static inline void closure_init_stack(struct closure *cl)
{
	memset(cl, 0, sizeof(struct closure));
	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|
		   CLOSURE_BLOCKING|CLOSURE_STACK);
}

/**
 * closure_init_unlocked() - Initialize a closure but leave it unlocked.
 * @cl:		closure to initialize
 *
 * For when the closure will be used as a lock. The closure may not be used
 * until after a closure_lock() or closure_trylock().
 */
#define closure_init_unlocked(cl)				\
do {								\
	memset((cl), 0, sizeof(*(cl)));				\
	closure_init_type(cl, NULL, false);			\
} while (0)

/**
 * closure_lock() - lock and initialize a closure.
 * @cl:		the closure to lock
 * @parent:	the new parent for this closure
 *
 * The closure must be of one of the types that has a waitlist (otherwise we
 * wouldn't be able to sleep on contention).
 *
 * @parent has exactly the same meaning as in closure_init(); if non null, the
 * closure will take a reference on @parent which will be released when it is
 * unlocked.
 */
#define closure_lock(cl, parent)				\
	__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)

/**
 * closure_delay() - delay some number of jiffies
 * @cl:		the closure that will sleep
 * @delay:	the delay in jiffies
 *
 * Takes a refcount on @cl which will be released after @delay jiffies; this may
 * be used to have a function run after a delay with continue_at(), or
 * closure_sync() may be used for a convoluted version of msleep().
 */
#define closure_delay(cl, delay)			\
	__closure_delay(__to_internal_closure(cl), delay, &(cl)->timer)

#define closure_flush(cl)				\
	__closure_flush(__to_internal_closure(cl), &(cl)->timer)

#define closure_flush_sync(cl)				\
	__closure_flush_sync(__to_internal_closure(cl), &(cl)->timer)

static inline void __closure_end_sleep(struct closure *cl)
{
	__set_current_state(TASK_RUNNING);

	if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
		atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
}

static inline void __closure_start_sleep(struct closure *cl)
{
	closure_set_ip(cl);
	cl->task = current;
	set_current_state(TASK_UNINTERRUPTIBLE);

	if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
		atomic_add(CLOSURE_SLEEPING, &cl->remaining);
}

/**
 * closure_blocking() - returns true if the closure is in blocking mode.
 *
 * If a closure is in blocking mode, closure_wait_event() will sleep until the
 * condition is true instead of waiting asynchronously.
 */
static inline bool closure_blocking(struct closure *cl)
{
	return atomic_read(&cl->remaining) & CLOSURE_BLOCKING;
}

/**
 * set_closure_blocking() - put a closure in blocking mode.
 *
 * If a closure is in blocking mode, closure_wait_event() will sleep until the
 * condition is true instead of waiting asynchronously.
 *
 * Not thread safe - can only be called by the thread running the closure.
 */
static inline void set_closure_blocking(struct closure *cl)
{
	if (!closure_blocking(cl))
		atomic_add(CLOSURE_BLOCKING, &cl->remaining);
}

/*
 * Not thread safe - can only be called by the thread running the closure.
 */
static inline void clear_closure_blocking(struct closure *cl)
{
	if (closure_blocking(cl))
		atomic_sub(CLOSURE_BLOCKING, &cl->remaining);
}

/**
 * closure_wake_up() - wake up all closures on a wait list.
 */
static inline void closure_wake_up(struct closure_waitlist *list)
{
	smp_mb();
	__closure_wake_up(list);
}

/*
 * Wait on an event, synchronously or asynchronously - analogous to wait_event()
 * but for closures.
 *
 * The loop is oddly structured so as to avoid a race; we must check the
 * condition again after we've added ourself to the waitlist. We know if we were
 * already on the waitlist because closure_wait() returns false; thus, we only
 * schedule or break if closure_wait() returns false. If it returns true, we
 * just loop again - rechecking the condition.
 *
 * The __closure_wake_up() is necessary because we may race with the event
 * becoming true; i.e. we see event false -> wait -> recheck condition, but the
 * thread that made the event true may have called closure_wake_up() before we
 * added ourself to the wait list.
 *
 * We have to call closure_sync() at the end instead of just
 * __closure_end_sleep() because a different thread might've called
 * closure_wake_up() before us and gotten preempted before they dropped the
 * refcount on our closure. If this was a stack allocated closure, that would be
 * bad.
 */
#define __closure_wait_event(list, cl, condition, _block)		\
({									\
	bool block = _block;						\
	typeof(condition) ret;						\
									\
	while (1) {							\
		ret = (condition);					\
		if (ret) {						\
			__closure_wake_up(list);			\
			if (block)					\
				closure_sync(cl);			\
									\
			break;						\
		}							\
									\
		if (block)						\
			__closure_start_sleep(cl);			\
									\
		if (!closure_wait(list, cl)) {				\
			if (!block)					\
				break;					\
									\
			schedule();					\
		}							\
	}								\
									\
	ret;								\
})

/**
 * closure_wait_event() - wait on a condition, synchronously or asynchronously.
 * @list:	the wait list to wait on
 * @cl:		the closure that is doing the waiting
 * @condition:	a C expression for the event to wait for
 *
 * If the closure is in blocking mode, sleeps until the @condition evaluates to
 * true - exactly like wait_event().
 *
 * If the closure is not in blocking mode, waits asynchronously; if the
 * condition is currently false the @cl is put onto @list and returns. @list
 * owns a refcount on @cl; closure_sync() or continue_at() may be used later to
 * wait for another thread to wake up @list, which drops the refcount on @cl.
 *
 * Returns the value of @condition; @cl will be on @list iff @condition was
 * false.
 *
 * closure_wake_up(@list) must be called after changing any variable that could
 * cause @condition to become true.
 */
#define closure_wait_event(list, cl, condition)				\
	__closure_wait_event(list, cl, condition, closure_blocking(cl))

#define closure_wait_event_async(list, cl, condition)			\
	__closure_wait_event(list, cl, condition, false)

#define closure_wait_event_sync(list, cl, condition)			\
	__closure_wait_event(list, cl, condition, true)

static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
				  struct workqueue_struct *wq)
{
	BUG_ON(object_is_on_stack(cl));
	closure_set_ip(cl);
	cl->fn = fn;
	cl->wq = wq;
	/* between atomic_dec() in closure_put() */
	smp_mb__before_atomic_dec();
}

#define continue_at(_cl, _fn, _wq)					\
do {									\
	set_closure_fn(_cl, _fn, _wq);					\
	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
	return;								\
} while (0)

#define closure_return(_cl)	continue_at((_cl), NULL, NULL)

#define continue_at_nobarrier(_cl, _fn, _wq)				\
do {									\
	set_closure_fn(_cl, _fn, _wq);					\
	closure_queue(cl);						\
	return;								\
} while (0)

#define closure_return_with_destructor(_cl, _destructor)		\
do {									\
	set_closure_fn(_cl, _destructor, NULL);				\
	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
	return;								\
} while (0)

static inline void closure_call(struct closure *cl, closure_fn fn,
				struct workqueue_struct *wq,
				struct closure *parent)
{
	closure_init(cl, parent);
	continue_at_nobarrier(cl, fn, wq);
}

static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
					struct workqueue_struct *wq,
					struct closure *parent)
{
	if (closure_trylock(cl, parent))
		continue_at_nobarrier(cl, fn, wq);
}

#endif /* _LINUX_CLOSURE_H */