xref: /openbmc/linux/kernel/workqueue.c (revision 1e19ffc6)
11da177e4SLinus Torvalds /*
21da177e4SLinus Torvalds  * linux/kernel/workqueue.c
31da177e4SLinus Torvalds  *
41da177e4SLinus Torvalds  * Generic mechanism for defining kernel helper threads for running
51da177e4SLinus Torvalds  * arbitrary tasks in process context.
61da177e4SLinus Torvalds  *
71da177e4SLinus Torvalds  * Started by Ingo Molnar, Copyright (C) 2002
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * Derived from the taskqueue/keventd code by:
101da177e4SLinus Torvalds  *
111da177e4SLinus Torvalds  *   David Woodhouse <dwmw2@infradead.org>
12e1f8e874SFrancois Cami  *   Andrew Morton
131da177e4SLinus Torvalds  *   Kai Petzke <wpp@marie.physik.tu-berlin.de>
141da177e4SLinus Torvalds  *   Theodore Ts'o <tytso@mit.edu>
1589ada679SChristoph Lameter  *
16cde53535SChristoph Lameter  * Made to use alloc_percpu by Christoph Lameter.
171da177e4SLinus Torvalds  */
181da177e4SLinus Torvalds 
191da177e4SLinus Torvalds #include <linux/module.h>
201da177e4SLinus Torvalds #include <linux/kernel.h>
211da177e4SLinus Torvalds #include <linux/sched.h>
221da177e4SLinus Torvalds #include <linux/init.h>
231da177e4SLinus Torvalds #include <linux/signal.h>
241da177e4SLinus Torvalds #include <linux/completion.h>
251da177e4SLinus Torvalds #include <linux/workqueue.h>
261da177e4SLinus Torvalds #include <linux/slab.h>
271da177e4SLinus Torvalds #include <linux/cpu.h>
281da177e4SLinus Torvalds #include <linux/notifier.h>
291da177e4SLinus Torvalds #include <linux/kthread.h>
301fa44ecaSJames Bottomley #include <linux/hardirq.h>
3146934023SChristoph Lameter #include <linux/mempolicy.h>
32341a5958SRafael J. Wysocki #include <linux/freezer.h>
33d5abe669SPeter Zijlstra #include <linux/kallsyms.h>
34d5abe669SPeter Zijlstra #include <linux/debug_locks.h>
354e6045f1SJohannes Berg #include <linux/lockdep.h>
36c34056a3STejun Heo #include <linux/idr.h>
371da177e4SLinus Torvalds 
381da177e4SLinus Torvalds /*
394690c4abSTejun Heo  * Structure fields follow one of the following exclusion rules.
404690c4abSTejun Heo  *
414690c4abSTejun Heo  * I: Set during initialization and read-only afterwards.
424690c4abSTejun Heo  *
434690c4abSTejun Heo  * L: cwq->lock protected.  Access with cwq->lock held.
444690c4abSTejun Heo  *
4573f53c4aSTejun Heo  * F: wq->flush_mutex protected.
4673f53c4aSTejun Heo  *
474690c4abSTejun Heo  * W: workqueue_lock protected.
484690c4abSTejun Heo  */
494690c4abSTejun Heo 
50c34056a3STejun Heo struct cpu_workqueue_struct;
51c34056a3STejun Heo 
52c34056a3STejun Heo struct worker {
53c34056a3STejun Heo 	struct work_struct	*current_work;	/* L: work being processed */
54affee4b2STejun Heo 	struct list_head	scheduled;	/* L: scheduled works */
55c34056a3STejun Heo 	struct task_struct	*task;		/* I: worker task */
56c34056a3STejun Heo 	struct cpu_workqueue_struct *cwq;	/* I: the associated cwq */
57c34056a3STejun Heo 	int			id;		/* I: worker id */
58c34056a3STejun Heo };
59c34056a3STejun Heo 
604690c4abSTejun Heo /*
61f756d5e2SNathan Lynch  * The per-CPU workqueue (if single thread, we always use the first
620f900049STejun Heo  * possible cpu).  The lower WORK_STRUCT_FLAG_BITS of
630f900049STejun Heo  * work_struct->data are used for flags and thus cwqs need to be
640f900049STejun Heo  * aligned at two's power of the number of flag bits.
651da177e4SLinus Torvalds  */
661da177e4SLinus Torvalds struct cpu_workqueue_struct {
671da177e4SLinus Torvalds 
681da177e4SLinus Torvalds 	spinlock_t lock;
691da177e4SLinus Torvalds 
701da177e4SLinus Torvalds 	struct list_head worklist;
711da177e4SLinus Torvalds 	wait_queue_head_t more_work;
721537663fSTejun Heo 	unsigned int		cpu;
73c34056a3STejun Heo 	struct worker		*worker;
741da177e4SLinus Torvalds 
754690c4abSTejun Heo 	struct workqueue_struct *wq;		/* I: the owning workqueue */
7673f53c4aSTejun Heo 	int			work_color;	/* L: current color */
7773f53c4aSTejun Heo 	int			flush_color;	/* L: flushing color */
7873f53c4aSTejun Heo 	int			nr_in_flight[WORK_NR_COLORS];
7973f53c4aSTejun Heo 						/* L: nr of in_flight works */
801e19ffc6STejun Heo 	int			nr_active;	/* L: nr of active works */
811e19ffc6STejun Heo 	int			max_active;	/* I: max active works */
821e19ffc6STejun Heo 	struct list_head	delayed_works;	/* L: delayed works */
830f900049STejun Heo };
841da177e4SLinus Torvalds 
851da177e4SLinus Torvalds /*
8673f53c4aSTejun Heo  * Structure used to wait for workqueue flush.
8773f53c4aSTejun Heo  */
8873f53c4aSTejun Heo struct wq_flusher {
8973f53c4aSTejun Heo 	struct list_head	list;		/* F: list of flushers */
9073f53c4aSTejun Heo 	int			flush_color;	/* F: flush color waiting for */
9173f53c4aSTejun Heo 	struct completion	done;		/* flush completion */
9273f53c4aSTejun Heo };
9373f53c4aSTejun Heo 
9473f53c4aSTejun Heo /*
951da177e4SLinus Torvalds  * The externally visible workqueue abstraction is an array of
961da177e4SLinus Torvalds  * per-CPU workqueues:
971da177e4SLinus Torvalds  */
981da177e4SLinus Torvalds struct workqueue_struct {
9997e37d7bSTejun Heo 	unsigned int		flags;		/* I: WQ_* flags */
1004690c4abSTejun Heo 	struct cpu_workqueue_struct *cpu_wq;	/* I: cwq's */
1014690c4abSTejun Heo 	struct list_head	list;		/* W: list of all workqueues */
10273f53c4aSTejun Heo 
10373f53c4aSTejun Heo 	struct mutex		flush_mutex;	/* protects wq flushing */
10473f53c4aSTejun Heo 	int			work_color;	/* F: current work color */
10573f53c4aSTejun Heo 	int			flush_color;	/* F: current flush color */
10673f53c4aSTejun Heo 	atomic_t		nr_cwqs_to_flush; /* flush in progress */
10773f53c4aSTejun Heo 	struct wq_flusher	*first_flusher;	/* F: first flusher */
10873f53c4aSTejun Heo 	struct list_head	flusher_queue;	/* F: flush waiters */
10973f53c4aSTejun Heo 	struct list_head	flusher_overflow; /* F: flush overflow list */
11073f53c4aSTejun Heo 
1114690c4abSTejun Heo 	const char		*name;		/* I: workqueue name */
1124e6045f1SJohannes Berg #ifdef CONFIG_LOCKDEP
1134e6045f1SJohannes Berg 	struct lockdep_map	lockdep_map;
1144e6045f1SJohannes Berg #endif
1151da177e4SLinus Torvalds };
1161da177e4SLinus Torvalds 
117dc186ad7SThomas Gleixner #ifdef CONFIG_DEBUG_OBJECTS_WORK
118dc186ad7SThomas Gleixner 
119dc186ad7SThomas Gleixner static struct debug_obj_descr work_debug_descr;
120dc186ad7SThomas Gleixner 
121dc186ad7SThomas Gleixner /*
122dc186ad7SThomas Gleixner  * fixup_init is called when:
123dc186ad7SThomas Gleixner  * - an active object is initialized
124dc186ad7SThomas Gleixner  */
125dc186ad7SThomas Gleixner static int work_fixup_init(void *addr, enum debug_obj_state state)
126dc186ad7SThomas Gleixner {
127dc186ad7SThomas Gleixner 	struct work_struct *work = addr;
128dc186ad7SThomas Gleixner 
129dc186ad7SThomas Gleixner 	switch (state) {
130dc186ad7SThomas Gleixner 	case ODEBUG_STATE_ACTIVE:
131dc186ad7SThomas Gleixner 		cancel_work_sync(work);
132dc186ad7SThomas Gleixner 		debug_object_init(work, &work_debug_descr);
133dc186ad7SThomas Gleixner 		return 1;
134dc186ad7SThomas Gleixner 	default:
135dc186ad7SThomas Gleixner 		return 0;
136dc186ad7SThomas Gleixner 	}
137dc186ad7SThomas Gleixner }
138dc186ad7SThomas Gleixner 
139dc186ad7SThomas Gleixner /*
140dc186ad7SThomas Gleixner  * fixup_activate is called when:
141dc186ad7SThomas Gleixner  * - an active object is activated
142dc186ad7SThomas Gleixner  * - an unknown object is activated (might be a statically initialized object)
143dc186ad7SThomas Gleixner  */
144dc186ad7SThomas Gleixner static int work_fixup_activate(void *addr, enum debug_obj_state state)
145dc186ad7SThomas Gleixner {
146dc186ad7SThomas Gleixner 	struct work_struct *work = addr;
147dc186ad7SThomas Gleixner 
148dc186ad7SThomas Gleixner 	switch (state) {
149dc186ad7SThomas Gleixner 
150dc186ad7SThomas Gleixner 	case ODEBUG_STATE_NOTAVAILABLE:
151dc186ad7SThomas Gleixner 		/*
152dc186ad7SThomas Gleixner 		 * This is not really a fixup. The work struct was
153dc186ad7SThomas Gleixner 		 * statically initialized. We just make sure that it
154dc186ad7SThomas Gleixner 		 * is tracked in the object tracker.
155dc186ad7SThomas Gleixner 		 */
15622df02bbSTejun Heo 		if (test_bit(WORK_STRUCT_STATIC_BIT, work_data_bits(work))) {
157dc186ad7SThomas Gleixner 			debug_object_init(work, &work_debug_descr);
158dc186ad7SThomas Gleixner 			debug_object_activate(work, &work_debug_descr);
159dc186ad7SThomas Gleixner 			return 0;
160dc186ad7SThomas Gleixner 		}
161dc186ad7SThomas Gleixner 		WARN_ON_ONCE(1);
162dc186ad7SThomas Gleixner 		return 0;
163dc186ad7SThomas Gleixner 
164dc186ad7SThomas Gleixner 	case ODEBUG_STATE_ACTIVE:
165dc186ad7SThomas Gleixner 		WARN_ON(1);
166dc186ad7SThomas Gleixner 
167dc186ad7SThomas Gleixner 	default:
168dc186ad7SThomas Gleixner 		return 0;
169dc186ad7SThomas Gleixner 	}
170dc186ad7SThomas Gleixner }
171dc186ad7SThomas Gleixner 
172dc186ad7SThomas Gleixner /*
173dc186ad7SThomas Gleixner  * fixup_free is called when:
174dc186ad7SThomas Gleixner  * - an active object is freed
175dc186ad7SThomas Gleixner  */
176dc186ad7SThomas Gleixner static int work_fixup_free(void *addr, enum debug_obj_state state)
177dc186ad7SThomas Gleixner {
178dc186ad7SThomas Gleixner 	struct work_struct *work = addr;
179dc186ad7SThomas Gleixner 
180dc186ad7SThomas Gleixner 	switch (state) {
181dc186ad7SThomas Gleixner 	case ODEBUG_STATE_ACTIVE:
182dc186ad7SThomas Gleixner 		cancel_work_sync(work);
183dc186ad7SThomas Gleixner 		debug_object_free(work, &work_debug_descr);
184dc186ad7SThomas Gleixner 		return 1;
185dc186ad7SThomas Gleixner 	default:
186dc186ad7SThomas Gleixner 		return 0;
187dc186ad7SThomas Gleixner 	}
188dc186ad7SThomas Gleixner }
189dc186ad7SThomas Gleixner 
190dc186ad7SThomas Gleixner static struct debug_obj_descr work_debug_descr = {
191dc186ad7SThomas Gleixner 	.name		= "work_struct",
192dc186ad7SThomas Gleixner 	.fixup_init	= work_fixup_init,
193dc186ad7SThomas Gleixner 	.fixup_activate	= work_fixup_activate,
194dc186ad7SThomas Gleixner 	.fixup_free	= work_fixup_free,
195dc186ad7SThomas Gleixner };
196dc186ad7SThomas Gleixner 
197dc186ad7SThomas Gleixner static inline void debug_work_activate(struct work_struct *work)
198dc186ad7SThomas Gleixner {
199dc186ad7SThomas Gleixner 	debug_object_activate(work, &work_debug_descr);
200dc186ad7SThomas Gleixner }
201dc186ad7SThomas Gleixner 
202dc186ad7SThomas Gleixner static inline void debug_work_deactivate(struct work_struct *work)
203dc186ad7SThomas Gleixner {
204dc186ad7SThomas Gleixner 	debug_object_deactivate(work, &work_debug_descr);
205dc186ad7SThomas Gleixner }
206dc186ad7SThomas Gleixner 
207dc186ad7SThomas Gleixner void __init_work(struct work_struct *work, int onstack)
208dc186ad7SThomas Gleixner {
209dc186ad7SThomas Gleixner 	if (onstack)
210dc186ad7SThomas Gleixner 		debug_object_init_on_stack(work, &work_debug_descr);
211dc186ad7SThomas Gleixner 	else
212dc186ad7SThomas Gleixner 		debug_object_init(work, &work_debug_descr);
213dc186ad7SThomas Gleixner }
214dc186ad7SThomas Gleixner EXPORT_SYMBOL_GPL(__init_work);
215dc186ad7SThomas Gleixner 
216dc186ad7SThomas Gleixner void destroy_work_on_stack(struct work_struct *work)
217dc186ad7SThomas Gleixner {
218dc186ad7SThomas Gleixner 	debug_object_free(work, &work_debug_descr);
219dc186ad7SThomas Gleixner }
220dc186ad7SThomas Gleixner EXPORT_SYMBOL_GPL(destroy_work_on_stack);
221dc186ad7SThomas Gleixner 
222dc186ad7SThomas Gleixner #else
223dc186ad7SThomas Gleixner static inline void debug_work_activate(struct work_struct *work) { }
224dc186ad7SThomas Gleixner static inline void debug_work_deactivate(struct work_struct *work) { }
225dc186ad7SThomas Gleixner #endif
226dc186ad7SThomas Gleixner 
22795402b38SGautham R Shenoy /* Serializes the accesses to the list of workqueues. */
22895402b38SGautham R Shenoy static DEFINE_SPINLOCK(workqueue_lock);
2291da177e4SLinus Torvalds static LIST_HEAD(workqueues);
230c34056a3STejun Heo static DEFINE_PER_CPU(struct ida, worker_ida);
231c34056a3STejun Heo 
232c34056a3STejun Heo static int worker_thread(void *__worker);
2331da177e4SLinus Torvalds 
2343af24433SOleg Nesterov static int singlethread_cpu __read_mostly;
235b1f4ec17SOleg Nesterov 
2364690c4abSTejun Heo static struct cpu_workqueue_struct *get_cwq(unsigned int cpu,
2374690c4abSTejun Heo 					    struct workqueue_struct *wq)
238a848e3b6SOleg Nesterov {
239a848e3b6SOleg Nesterov 	return per_cpu_ptr(wq->cpu_wq, cpu);
240a848e3b6SOleg Nesterov }
241a848e3b6SOleg Nesterov 
2421537663fSTejun Heo static struct cpu_workqueue_struct *target_cwq(unsigned int cpu,
2431537663fSTejun Heo 					       struct workqueue_struct *wq)
2441537663fSTejun Heo {
2451537663fSTejun Heo 	if (unlikely(wq->flags & WQ_SINGLE_THREAD))
2461537663fSTejun Heo 		cpu = singlethread_cpu;
2471537663fSTejun Heo 	return get_cwq(cpu, wq);
2481537663fSTejun Heo }
2491537663fSTejun Heo 
25073f53c4aSTejun Heo static unsigned int work_color_to_flags(int color)
25173f53c4aSTejun Heo {
25273f53c4aSTejun Heo 	return color << WORK_STRUCT_COLOR_SHIFT;
25373f53c4aSTejun Heo }
25473f53c4aSTejun Heo 
25573f53c4aSTejun Heo static int get_work_color(struct work_struct *work)
25673f53c4aSTejun Heo {
25773f53c4aSTejun Heo 	return (*work_data_bits(work) >> WORK_STRUCT_COLOR_SHIFT) &
25873f53c4aSTejun Heo 		((1 << WORK_STRUCT_COLOR_BITS) - 1);
25973f53c4aSTejun Heo }
26073f53c4aSTejun Heo 
26173f53c4aSTejun Heo static int work_next_color(int color)
26273f53c4aSTejun Heo {
26373f53c4aSTejun Heo 	return (color + 1) % WORK_NR_COLORS;
26473f53c4aSTejun Heo }
26573f53c4aSTejun Heo 
2664594bf15SDavid Howells /*
2674594bf15SDavid Howells  * Set the workqueue on which a work item is to be run
2684594bf15SDavid Howells  * - Must *only* be called if the pending flag is set
2694594bf15SDavid Howells  */
270ed7c0feeSOleg Nesterov static inline void set_wq_data(struct work_struct *work,
2714690c4abSTejun Heo 			       struct cpu_workqueue_struct *cwq,
2724690c4abSTejun Heo 			       unsigned long extra_flags)
273365970a1SDavid Howells {
2744594bf15SDavid Howells 	BUG_ON(!work_pending(work));
2754594bf15SDavid Howells 
2764690c4abSTejun Heo 	atomic_long_set(&work->data, (unsigned long)cwq | work_static(work) |
27722df02bbSTejun Heo 			WORK_STRUCT_PENDING | extra_flags);
278365970a1SDavid Howells }
279365970a1SDavid Howells 
2804d707b9fSOleg Nesterov /*
2814d707b9fSOleg Nesterov  * Clear WORK_STRUCT_PENDING and the workqueue on which it was queued.
2824d707b9fSOleg Nesterov  */
2834d707b9fSOleg Nesterov static inline void clear_wq_data(struct work_struct *work)
2844d707b9fSOleg Nesterov {
2854690c4abSTejun Heo 	atomic_long_set(&work->data, work_static(work));
2864d707b9fSOleg Nesterov }
2874d707b9fSOleg Nesterov 
28864166699STejun Heo static inline struct cpu_workqueue_struct *get_wq_data(struct work_struct *work)
289365970a1SDavid Howells {
29064166699STejun Heo 	return (void *)(atomic_long_read(&work->data) &
29164166699STejun Heo 			WORK_STRUCT_WQ_DATA_MASK);
292365970a1SDavid Howells }
293365970a1SDavid Howells 
2944690c4abSTejun Heo /**
2954690c4abSTejun Heo  * insert_work - insert a work into cwq
2964690c4abSTejun Heo  * @cwq: cwq @work belongs to
2974690c4abSTejun Heo  * @work: work to insert
2984690c4abSTejun Heo  * @head: insertion point
2994690c4abSTejun Heo  * @extra_flags: extra WORK_STRUCT_* flags to set
3004690c4abSTejun Heo  *
3014690c4abSTejun Heo  * Insert @work into @cwq after @head.
3024690c4abSTejun Heo  *
3034690c4abSTejun Heo  * CONTEXT:
3044690c4abSTejun Heo  * spin_lock_irq(cwq->lock).
3054690c4abSTejun Heo  */
306b89deed3SOleg Nesterov static void insert_work(struct cpu_workqueue_struct *cwq,
3074690c4abSTejun Heo 			struct work_struct *work, struct list_head *head,
3084690c4abSTejun Heo 			unsigned int extra_flags)
309b89deed3SOleg Nesterov {
3104690c4abSTejun Heo 	/* we own @work, set data and link */
3114690c4abSTejun Heo 	set_wq_data(work, cwq, extra_flags);
3124690c4abSTejun Heo 
3136e84d644SOleg Nesterov 	/*
3146e84d644SOleg Nesterov 	 * Ensure that we get the right work->data if we see the
3156e84d644SOleg Nesterov 	 * result of list_add() below, see try_to_grab_pending().
3166e84d644SOleg Nesterov 	 */
3176e84d644SOleg Nesterov 	smp_wmb();
3184690c4abSTejun Heo 
3191a4d9b0aSOleg Nesterov 	list_add_tail(&work->entry, head);
320b89deed3SOleg Nesterov 	wake_up(&cwq->more_work);
321b89deed3SOleg Nesterov }
322b89deed3SOleg Nesterov 
3234690c4abSTejun Heo static void __queue_work(unsigned int cpu, struct workqueue_struct *wq,
3241da177e4SLinus Torvalds 			 struct work_struct *work)
3251da177e4SLinus Torvalds {
3261537663fSTejun Heo 	struct cpu_workqueue_struct *cwq = target_cwq(cpu, wq);
3271e19ffc6STejun Heo 	struct list_head *worklist;
3281da177e4SLinus Torvalds 	unsigned long flags;
3291da177e4SLinus Torvalds 
330dc186ad7SThomas Gleixner 	debug_work_activate(work);
3311e19ffc6STejun Heo 
3321da177e4SLinus Torvalds 	spin_lock_irqsave(&cwq->lock, flags);
3334690c4abSTejun Heo 	BUG_ON(!list_empty(&work->entry));
3341e19ffc6STejun Heo 
33573f53c4aSTejun Heo 	cwq->nr_in_flight[cwq->work_color]++;
3361e19ffc6STejun Heo 
3371e19ffc6STejun Heo 	if (likely(cwq->nr_active < cwq->max_active)) {
3381e19ffc6STejun Heo 		cwq->nr_active++;
3391e19ffc6STejun Heo 		worklist = &cwq->worklist;
3401e19ffc6STejun Heo 	} else
3411e19ffc6STejun Heo 		worklist = &cwq->delayed_works;
3421e19ffc6STejun Heo 
3431e19ffc6STejun Heo 	insert_work(cwq, work, worklist, work_color_to_flags(cwq->work_color));
3441e19ffc6STejun Heo 
3451da177e4SLinus Torvalds 	spin_unlock_irqrestore(&cwq->lock, flags);
3461da177e4SLinus Torvalds }
3471da177e4SLinus Torvalds 
3480fcb78c2SRolf Eike Beer /**
3490fcb78c2SRolf Eike Beer  * queue_work - queue work on a workqueue
3500fcb78c2SRolf Eike Beer  * @wq: workqueue to use
3510fcb78c2SRolf Eike Beer  * @work: work to queue
3520fcb78c2SRolf Eike Beer  *
353057647fcSAlan Stern  * Returns 0 if @work was already on a queue, non-zero otherwise.
3541da177e4SLinus Torvalds  *
35500dfcaf7SOleg Nesterov  * We queue the work to the CPU on which it was submitted, but if the CPU dies
35600dfcaf7SOleg Nesterov  * it can be processed by another CPU.
3571da177e4SLinus Torvalds  */
3587ad5b3a5SHarvey Harrison int queue_work(struct workqueue_struct *wq, struct work_struct *work)
3591da177e4SLinus Torvalds {
360ef1ca236SOleg Nesterov 	int ret;
3611da177e4SLinus Torvalds 
362ef1ca236SOleg Nesterov 	ret = queue_work_on(get_cpu(), wq, work);
363a848e3b6SOleg Nesterov 	put_cpu();
364ef1ca236SOleg Nesterov 
3651da177e4SLinus Torvalds 	return ret;
3661da177e4SLinus Torvalds }
367ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(queue_work);
3681da177e4SLinus Torvalds 
369c1a220e7SZhang Rui /**
370c1a220e7SZhang Rui  * queue_work_on - queue work on specific cpu
371c1a220e7SZhang Rui  * @cpu: CPU number to execute work on
372c1a220e7SZhang Rui  * @wq: workqueue to use
373c1a220e7SZhang Rui  * @work: work to queue
374c1a220e7SZhang Rui  *
375c1a220e7SZhang Rui  * Returns 0 if @work was already on a queue, non-zero otherwise.
376c1a220e7SZhang Rui  *
377c1a220e7SZhang Rui  * We queue the work to a specific CPU, the caller must ensure it
378c1a220e7SZhang Rui  * can't go away.
379c1a220e7SZhang Rui  */
380c1a220e7SZhang Rui int
381c1a220e7SZhang Rui queue_work_on(int cpu, struct workqueue_struct *wq, struct work_struct *work)
382c1a220e7SZhang Rui {
383c1a220e7SZhang Rui 	int ret = 0;
384c1a220e7SZhang Rui 
38522df02bbSTejun Heo 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
3864690c4abSTejun Heo 		__queue_work(cpu, wq, work);
387c1a220e7SZhang Rui 		ret = 1;
388c1a220e7SZhang Rui 	}
389c1a220e7SZhang Rui 	return ret;
390c1a220e7SZhang Rui }
391c1a220e7SZhang Rui EXPORT_SYMBOL_GPL(queue_work_on);
392c1a220e7SZhang Rui 
3936d141c3fSLi Zefan static void delayed_work_timer_fn(unsigned long __data)
3941da177e4SLinus Torvalds {
39552bad64dSDavid Howells 	struct delayed_work *dwork = (struct delayed_work *)__data;
396ed7c0feeSOleg Nesterov 	struct cpu_workqueue_struct *cwq = get_wq_data(&dwork->work);
3971da177e4SLinus Torvalds 
3984690c4abSTejun Heo 	__queue_work(smp_processor_id(), cwq->wq, &dwork->work);
3991da177e4SLinus Torvalds }
4001da177e4SLinus Torvalds 
4010fcb78c2SRolf Eike Beer /**
4020fcb78c2SRolf Eike Beer  * queue_delayed_work - queue work on a workqueue after delay
4030fcb78c2SRolf Eike Beer  * @wq: workqueue to use
404af9997e4SRandy Dunlap  * @dwork: delayable work to queue
4050fcb78c2SRolf Eike Beer  * @delay: number of jiffies to wait before queueing
4060fcb78c2SRolf Eike Beer  *
407057647fcSAlan Stern  * Returns 0 if @work was already on a queue, non-zero otherwise.
4080fcb78c2SRolf Eike Beer  */
4097ad5b3a5SHarvey Harrison int queue_delayed_work(struct workqueue_struct *wq,
41052bad64dSDavid Howells 			struct delayed_work *dwork, unsigned long delay)
4111da177e4SLinus Torvalds {
41252bad64dSDavid Howells 	if (delay == 0)
41363bc0362SOleg Nesterov 		return queue_work(wq, &dwork->work);
4141da177e4SLinus Torvalds 
41563bc0362SOleg Nesterov 	return queue_delayed_work_on(-1, wq, dwork, delay);
4161da177e4SLinus Torvalds }
417ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(queue_delayed_work);
4181da177e4SLinus Torvalds 
4190fcb78c2SRolf Eike Beer /**
4200fcb78c2SRolf Eike Beer  * queue_delayed_work_on - queue work on specific CPU after delay
4210fcb78c2SRolf Eike Beer  * @cpu: CPU number to execute work on
4220fcb78c2SRolf Eike Beer  * @wq: workqueue to use
423af9997e4SRandy Dunlap  * @dwork: work to queue
4240fcb78c2SRolf Eike Beer  * @delay: number of jiffies to wait before queueing
4250fcb78c2SRolf Eike Beer  *
426057647fcSAlan Stern  * Returns 0 if @work was already on a queue, non-zero otherwise.
4270fcb78c2SRolf Eike Beer  */
4287a6bc1cdSVenkatesh Pallipadi int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
42952bad64dSDavid Howells 			struct delayed_work *dwork, unsigned long delay)
4307a6bc1cdSVenkatesh Pallipadi {
4317a6bc1cdSVenkatesh Pallipadi 	int ret = 0;
43252bad64dSDavid Howells 	struct timer_list *timer = &dwork->timer;
43352bad64dSDavid Howells 	struct work_struct *work = &dwork->work;
4347a6bc1cdSVenkatesh Pallipadi 
43522df02bbSTejun Heo 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work))) {
4367a6bc1cdSVenkatesh Pallipadi 		BUG_ON(timer_pending(timer));
4377a6bc1cdSVenkatesh Pallipadi 		BUG_ON(!list_empty(&work->entry));
4387a6bc1cdSVenkatesh Pallipadi 
4398a3e77ccSAndrew Liu 		timer_stats_timer_set_start_info(&dwork->timer);
4408a3e77ccSAndrew Liu 
441ed7c0feeSOleg Nesterov 		/* This stores cwq for the moment, for the timer_fn */
4421537663fSTejun Heo 		set_wq_data(work, target_cwq(raw_smp_processor_id(), wq), 0);
4437a6bc1cdSVenkatesh Pallipadi 		timer->expires = jiffies + delay;
44452bad64dSDavid Howells 		timer->data = (unsigned long)dwork;
4457a6bc1cdSVenkatesh Pallipadi 		timer->function = delayed_work_timer_fn;
44663bc0362SOleg Nesterov 
44763bc0362SOleg Nesterov 		if (unlikely(cpu >= 0))
4487a6bc1cdSVenkatesh Pallipadi 			add_timer_on(timer, cpu);
44963bc0362SOleg Nesterov 		else
45063bc0362SOleg Nesterov 			add_timer(timer);
4517a6bc1cdSVenkatesh Pallipadi 		ret = 1;
4527a6bc1cdSVenkatesh Pallipadi 	}
4537a6bc1cdSVenkatesh Pallipadi 	return ret;
4547a6bc1cdSVenkatesh Pallipadi }
455ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(queue_delayed_work_on);
4561da177e4SLinus Torvalds 
457c34056a3STejun Heo static struct worker *alloc_worker(void)
458c34056a3STejun Heo {
459c34056a3STejun Heo 	struct worker *worker;
460c34056a3STejun Heo 
461c34056a3STejun Heo 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
462affee4b2STejun Heo 	if (worker)
463affee4b2STejun Heo 		INIT_LIST_HEAD(&worker->scheduled);
464c34056a3STejun Heo 	return worker;
465c34056a3STejun Heo }
466c34056a3STejun Heo 
467c34056a3STejun Heo /**
468c34056a3STejun Heo  * create_worker - create a new workqueue worker
469c34056a3STejun Heo  * @cwq: cwq the new worker will belong to
470c34056a3STejun Heo  * @bind: whether to set affinity to @cpu or not
471c34056a3STejun Heo  *
472c34056a3STejun Heo  * Create a new worker which is bound to @cwq.  The returned worker
473c34056a3STejun Heo  * can be started by calling start_worker() or destroyed using
474c34056a3STejun Heo  * destroy_worker().
475c34056a3STejun Heo  *
476c34056a3STejun Heo  * CONTEXT:
477c34056a3STejun Heo  * Might sleep.  Does GFP_KERNEL allocations.
478c34056a3STejun Heo  *
479c34056a3STejun Heo  * RETURNS:
480c34056a3STejun Heo  * Pointer to the newly created worker.
481c34056a3STejun Heo  */
482c34056a3STejun Heo static struct worker *create_worker(struct cpu_workqueue_struct *cwq, bool bind)
483c34056a3STejun Heo {
484c34056a3STejun Heo 	int id = -1;
485c34056a3STejun Heo 	struct worker *worker = NULL;
486c34056a3STejun Heo 
487c34056a3STejun Heo 	spin_lock(&workqueue_lock);
488c34056a3STejun Heo 	while (ida_get_new(&per_cpu(worker_ida, cwq->cpu), &id)) {
489c34056a3STejun Heo 		spin_unlock(&workqueue_lock);
490c34056a3STejun Heo 		if (!ida_pre_get(&per_cpu(worker_ida, cwq->cpu), GFP_KERNEL))
491c34056a3STejun Heo 			goto fail;
492c34056a3STejun Heo 		spin_lock(&workqueue_lock);
493c34056a3STejun Heo 	}
494c34056a3STejun Heo 	spin_unlock(&workqueue_lock);
495c34056a3STejun Heo 
496c34056a3STejun Heo 	worker = alloc_worker();
497c34056a3STejun Heo 	if (!worker)
498c34056a3STejun Heo 		goto fail;
499c34056a3STejun Heo 
500c34056a3STejun Heo 	worker->cwq = cwq;
501c34056a3STejun Heo 	worker->id = id;
502c34056a3STejun Heo 
503c34056a3STejun Heo 	worker->task = kthread_create(worker_thread, worker, "kworker/%u:%d",
504c34056a3STejun Heo 				      cwq->cpu, id);
505c34056a3STejun Heo 	if (IS_ERR(worker->task))
506c34056a3STejun Heo 		goto fail;
507c34056a3STejun Heo 
508c34056a3STejun Heo 	if (bind)
509c34056a3STejun Heo 		kthread_bind(worker->task, cwq->cpu);
510c34056a3STejun Heo 
511c34056a3STejun Heo 	return worker;
512c34056a3STejun Heo fail:
513c34056a3STejun Heo 	if (id >= 0) {
514c34056a3STejun Heo 		spin_lock(&workqueue_lock);
515c34056a3STejun Heo 		ida_remove(&per_cpu(worker_ida, cwq->cpu), id);
516c34056a3STejun Heo 		spin_unlock(&workqueue_lock);
517c34056a3STejun Heo 	}
518c34056a3STejun Heo 	kfree(worker);
519c34056a3STejun Heo 	return NULL;
520c34056a3STejun Heo }
521c34056a3STejun Heo 
522c34056a3STejun Heo /**
523c34056a3STejun Heo  * start_worker - start a newly created worker
524c34056a3STejun Heo  * @worker: worker to start
525c34056a3STejun Heo  *
526c34056a3STejun Heo  * Start @worker.
527c34056a3STejun Heo  *
528c34056a3STejun Heo  * CONTEXT:
529c34056a3STejun Heo  * spin_lock_irq(cwq->lock).
530c34056a3STejun Heo  */
531c34056a3STejun Heo static void start_worker(struct worker *worker)
532c34056a3STejun Heo {
533c34056a3STejun Heo 	wake_up_process(worker->task);
534c34056a3STejun Heo }
535c34056a3STejun Heo 
536c34056a3STejun Heo /**
537c34056a3STejun Heo  * destroy_worker - destroy a workqueue worker
538c34056a3STejun Heo  * @worker: worker to be destroyed
539c34056a3STejun Heo  *
540c34056a3STejun Heo  * Destroy @worker.
541c34056a3STejun Heo  */
542c34056a3STejun Heo static void destroy_worker(struct worker *worker)
543c34056a3STejun Heo {
544c34056a3STejun Heo 	int cpu = worker->cwq->cpu;
545c34056a3STejun Heo 	int id = worker->id;
546c34056a3STejun Heo 
547c34056a3STejun Heo 	/* sanity check frenzy */
548c34056a3STejun Heo 	BUG_ON(worker->current_work);
549affee4b2STejun Heo 	BUG_ON(!list_empty(&worker->scheduled));
550c34056a3STejun Heo 
551c34056a3STejun Heo 	kthread_stop(worker->task);
552c34056a3STejun Heo 	kfree(worker);
553c34056a3STejun Heo 
554c34056a3STejun Heo 	spin_lock(&workqueue_lock);
555c34056a3STejun Heo 	ida_remove(&per_cpu(worker_ida, cpu), id);
556c34056a3STejun Heo 	spin_unlock(&workqueue_lock);
557c34056a3STejun Heo }
558c34056a3STejun Heo 
559a62428c0STejun Heo /**
560affee4b2STejun Heo  * move_linked_works - move linked works to a list
561affee4b2STejun Heo  * @work: start of series of works to be scheduled
562affee4b2STejun Heo  * @head: target list to append @work to
563affee4b2STejun Heo  * @nextp: out paramter for nested worklist walking
564affee4b2STejun Heo  *
565affee4b2STejun Heo  * Schedule linked works starting from @work to @head.  Work series to
566affee4b2STejun Heo  * be scheduled starts at @work and includes any consecutive work with
567affee4b2STejun Heo  * WORK_STRUCT_LINKED set in its predecessor.
568affee4b2STejun Heo  *
569affee4b2STejun Heo  * If @nextp is not NULL, it's updated to point to the next work of
570affee4b2STejun Heo  * the last scheduled work.  This allows move_linked_works() to be
571affee4b2STejun Heo  * nested inside outer list_for_each_entry_safe().
572affee4b2STejun Heo  *
573affee4b2STejun Heo  * CONTEXT:
574affee4b2STejun Heo  * spin_lock_irq(cwq->lock).
575affee4b2STejun Heo  */
576affee4b2STejun Heo static void move_linked_works(struct work_struct *work, struct list_head *head,
577affee4b2STejun Heo 			      struct work_struct **nextp)
578affee4b2STejun Heo {
579affee4b2STejun Heo 	struct work_struct *n;
580affee4b2STejun Heo 
581affee4b2STejun Heo 	/*
582affee4b2STejun Heo 	 * Linked worklist will always end before the end of the list,
583affee4b2STejun Heo 	 * use NULL for list head.
584affee4b2STejun Heo 	 */
585affee4b2STejun Heo 	list_for_each_entry_safe_from(work, n, NULL, entry) {
586affee4b2STejun Heo 		list_move_tail(&work->entry, head);
587affee4b2STejun Heo 		if (!(*work_data_bits(work) & WORK_STRUCT_LINKED))
588affee4b2STejun Heo 			break;
589affee4b2STejun Heo 	}
590affee4b2STejun Heo 
591affee4b2STejun Heo 	/*
592affee4b2STejun Heo 	 * If we're already inside safe list traversal and have moved
593affee4b2STejun Heo 	 * multiple works to the scheduled queue, the next position
594affee4b2STejun Heo 	 * needs to be updated.
595affee4b2STejun Heo 	 */
596affee4b2STejun Heo 	if (nextp)
597affee4b2STejun Heo 		*nextp = n;
598affee4b2STejun Heo }
599affee4b2STejun Heo 
6001e19ffc6STejun Heo static void cwq_activate_first_delayed(struct cpu_workqueue_struct *cwq)
6011e19ffc6STejun Heo {
6021e19ffc6STejun Heo 	struct work_struct *work = list_first_entry(&cwq->delayed_works,
6031e19ffc6STejun Heo 						    struct work_struct, entry);
6041e19ffc6STejun Heo 
6051e19ffc6STejun Heo 	move_linked_works(work, &cwq->worklist, NULL);
6061e19ffc6STejun Heo 	cwq->nr_active++;
6071e19ffc6STejun Heo }
6081e19ffc6STejun Heo 
609affee4b2STejun Heo /**
61073f53c4aSTejun Heo  * cwq_dec_nr_in_flight - decrement cwq's nr_in_flight
61173f53c4aSTejun Heo  * @cwq: cwq of interest
61273f53c4aSTejun Heo  * @color: color of work which left the queue
61373f53c4aSTejun Heo  *
61473f53c4aSTejun Heo  * A work either has completed or is removed from pending queue,
61573f53c4aSTejun Heo  * decrement nr_in_flight of its cwq and handle workqueue flushing.
61673f53c4aSTejun Heo  *
61773f53c4aSTejun Heo  * CONTEXT:
61873f53c4aSTejun Heo  * spin_lock_irq(cwq->lock).
61973f53c4aSTejun Heo  */
62073f53c4aSTejun Heo static void cwq_dec_nr_in_flight(struct cpu_workqueue_struct *cwq, int color)
62173f53c4aSTejun Heo {
62273f53c4aSTejun Heo 	/* ignore uncolored works */
62373f53c4aSTejun Heo 	if (color == WORK_NO_COLOR)
62473f53c4aSTejun Heo 		return;
62573f53c4aSTejun Heo 
62673f53c4aSTejun Heo 	cwq->nr_in_flight[color]--;
6271e19ffc6STejun Heo 	cwq->nr_active--;
6281e19ffc6STejun Heo 
6291e19ffc6STejun Heo 	/* one down, submit a delayed one */
6301e19ffc6STejun Heo 	if (!list_empty(&cwq->delayed_works) &&
6311e19ffc6STejun Heo 	    cwq->nr_active < cwq->max_active)
6321e19ffc6STejun Heo 		cwq_activate_first_delayed(cwq);
63373f53c4aSTejun Heo 
63473f53c4aSTejun Heo 	/* is flush in progress and are we at the flushing tip? */
63573f53c4aSTejun Heo 	if (likely(cwq->flush_color != color))
63673f53c4aSTejun Heo 		return;
63773f53c4aSTejun Heo 
63873f53c4aSTejun Heo 	/* are there still in-flight works? */
63973f53c4aSTejun Heo 	if (cwq->nr_in_flight[color])
64073f53c4aSTejun Heo 		return;
64173f53c4aSTejun Heo 
64273f53c4aSTejun Heo 	/* this cwq is done, clear flush_color */
64373f53c4aSTejun Heo 	cwq->flush_color = -1;
64473f53c4aSTejun Heo 
64573f53c4aSTejun Heo 	/*
64673f53c4aSTejun Heo 	 * If this was the last cwq, wake up the first flusher.  It
64773f53c4aSTejun Heo 	 * will handle the rest.
64873f53c4aSTejun Heo 	 */
64973f53c4aSTejun Heo 	if (atomic_dec_and_test(&cwq->wq->nr_cwqs_to_flush))
65073f53c4aSTejun Heo 		complete(&cwq->wq->first_flusher->done);
65173f53c4aSTejun Heo }
65273f53c4aSTejun Heo 
65373f53c4aSTejun Heo /**
654a62428c0STejun Heo  * process_one_work - process single work
655c34056a3STejun Heo  * @worker: self
656a62428c0STejun Heo  * @work: work to process
657a62428c0STejun Heo  *
658a62428c0STejun Heo  * Process @work.  This function contains all the logics necessary to
659a62428c0STejun Heo  * process a single work including synchronization against and
660a62428c0STejun Heo  * interaction with other workers on the same cpu, queueing and
661a62428c0STejun Heo  * flushing.  As long as context requirement is met, any worker can
662a62428c0STejun Heo  * call this function to process a work.
663a62428c0STejun Heo  *
664a62428c0STejun Heo  * CONTEXT:
665a62428c0STejun Heo  * spin_lock_irq(cwq->lock) which is released and regrabbed.
666a62428c0STejun Heo  */
667c34056a3STejun Heo static void process_one_work(struct worker *worker, struct work_struct *work)
6681da177e4SLinus Torvalds {
669c34056a3STejun Heo 	struct cpu_workqueue_struct *cwq = worker->cwq;
6706bb49e59SDavid Howells 	work_func_t f = work->func;
67173f53c4aSTejun Heo 	int work_color;
6724e6045f1SJohannes Berg #ifdef CONFIG_LOCKDEP
6734e6045f1SJohannes Berg 	/*
674a62428c0STejun Heo 	 * It is permissible to free the struct work_struct from
675a62428c0STejun Heo 	 * inside the function that is called from it, this we need to
676a62428c0STejun Heo 	 * take into account for lockdep too.  To avoid bogus "held
677a62428c0STejun Heo 	 * lock freed" warnings as well as problems when looking into
678a62428c0STejun Heo 	 * work->lockdep_map, make a copy and use that here.
6794e6045f1SJohannes Berg 	 */
6804e6045f1SJohannes Berg 	struct lockdep_map lockdep_map = work->lockdep_map;
6814e6045f1SJohannes Berg #endif
682a62428c0STejun Heo 	/* claim and process */
683dc186ad7SThomas Gleixner 	debug_work_deactivate(work);
684c34056a3STejun Heo 	worker->current_work = work;
68573f53c4aSTejun Heo 	work_color = get_work_color(work);
686a62428c0STejun Heo 	list_del_init(&work->entry);
687a62428c0STejun Heo 
688f293ea92SOleg Nesterov 	spin_unlock_irq(&cwq->lock);
6891da177e4SLinus Torvalds 
690365970a1SDavid Howells 	BUG_ON(get_wq_data(work) != cwq);
69123b2e599SOleg Nesterov 	work_clear_pending(work);
6923295f0efSIngo Molnar 	lock_map_acquire(&cwq->wq->lockdep_map);
6933295f0efSIngo Molnar 	lock_map_acquire(&lockdep_map);
69465f27f38SDavid Howells 	f(work);
6953295f0efSIngo Molnar 	lock_map_release(&lockdep_map);
6963295f0efSIngo Molnar 	lock_map_release(&cwq->wq->lockdep_map);
6971da177e4SLinus Torvalds 
698d5abe669SPeter Zijlstra 	if (unlikely(in_atomic() || lockdep_depth(current) > 0)) {
699d5abe669SPeter Zijlstra 		printk(KERN_ERR "BUG: workqueue leaked lock or atomic: "
700d5abe669SPeter Zijlstra 		       "%s/0x%08x/%d\n",
701a62428c0STejun Heo 		       current->comm, preempt_count(), task_pid_nr(current));
702d5abe669SPeter Zijlstra 		printk(KERN_ERR "    last function: ");
703d5abe669SPeter Zijlstra 		print_symbol("%s\n", (unsigned long)f);
704d5abe669SPeter Zijlstra 		debug_show_held_locks(current);
705d5abe669SPeter Zijlstra 		dump_stack();
706d5abe669SPeter Zijlstra 	}
707d5abe669SPeter Zijlstra 
708f293ea92SOleg Nesterov 	spin_lock_irq(&cwq->lock);
709a62428c0STejun Heo 
710a62428c0STejun Heo 	/* we're done with it, release */
711c34056a3STejun Heo 	worker->current_work = NULL;
71273f53c4aSTejun Heo 	cwq_dec_nr_in_flight(cwq, work_color);
7131da177e4SLinus Torvalds }
714a62428c0STejun Heo 
715affee4b2STejun Heo /**
716affee4b2STejun Heo  * process_scheduled_works - process scheduled works
717affee4b2STejun Heo  * @worker: self
718affee4b2STejun Heo  *
719affee4b2STejun Heo  * Process all scheduled works.  Please note that the scheduled list
720affee4b2STejun Heo  * may change while processing a work, so this function repeatedly
721affee4b2STejun Heo  * fetches a work from the top and executes it.
722affee4b2STejun Heo  *
723affee4b2STejun Heo  * CONTEXT:
724affee4b2STejun Heo  * spin_lock_irq(cwq->lock) which may be released and regrabbed
725affee4b2STejun Heo  * multiple times.
726affee4b2STejun Heo  */
727affee4b2STejun Heo static void process_scheduled_works(struct worker *worker)
728a62428c0STejun Heo {
729affee4b2STejun Heo 	while (!list_empty(&worker->scheduled)) {
730affee4b2STejun Heo 		struct work_struct *work = list_first_entry(&worker->scheduled,
731a62428c0STejun Heo 						struct work_struct, entry);
732c34056a3STejun Heo 		process_one_work(worker, work);
733a62428c0STejun Heo 	}
7341da177e4SLinus Torvalds }
7351da177e4SLinus Torvalds 
7364690c4abSTejun Heo /**
7374690c4abSTejun Heo  * worker_thread - the worker thread function
738c34056a3STejun Heo  * @__worker: self
7394690c4abSTejun Heo  *
7404690c4abSTejun Heo  * The cwq worker thread function.
7414690c4abSTejun Heo  */
742c34056a3STejun Heo static int worker_thread(void *__worker)
7431da177e4SLinus Torvalds {
744c34056a3STejun Heo 	struct worker *worker = __worker;
745c34056a3STejun Heo 	struct cpu_workqueue_struct *cwq = worker->cwq;
7463af24433SOleg Nesterov 	DEFINE_WAIT(wait);
7471da177e4SLinus Torvalds 
74897e37d7bSTejun Heo 	if (cwq->wq->flags & WQ_FREEZEABLE)
74983144186SRafael J. Wysocki 		set_freezable();
7501da177e4SLinus Torvalds 
7513af24433SOleg Nesterov 	for (;;) {
7523af24433SOleg Nesterov 		prepare_to_wait(&cwq->more_work, &wait, TASK_INTERRUPTIBLE);
75314441960SOleg Nesterov 		if (!freezing(current) &&
75414441960SOleg Nesterov 		    !kthread_should_stop() &&
75514441960SOleg Nesterov 		    list_empty(&cwq->worklist))
7561da177e4SLinus Torvalds 			schedule();
7573af24433SOleg Nesterov 		finish_wait(&cwq->more_work, &wait);
7581da177e4SLinus Torvalds 
75985f4186aSOleg Nesterov 		try_to_freeze();
76085f4186aSOleg Nesterov 
76114441960SOleg Nesterov 		if (kthread_should_stop())
7623af24433SOleg Nesterov 			break;
7633af24433SOleg Nesterov 
764c34056a3STejun Heo 		if (unlikely(!cpumask_equal(&worker->task->cpus_allowed,
7651537663fSTejun Heo 					    get_cpu_mask(cwq->cpu))))
766c34056a3STejun Heo 			set_cpus_allowed_ptr(worker->task,
7671537663fSTejun Heo 					     get_cpu_mask(cwq->cpu));
768affee4b2STejun Heo 
769affee4b2STejun Heo 		spin_lock_irq(&cwq->lock);
770affee4b2STejun Heo 
771affee4b2STejun Heo 		while (!list_empty(&cwq->worklist)) {
772affee4b2STejun Heo 			struct work_struct *work =
773affee4b2STejun Heo 				list_first_entry(&cwq->worklist,
774affee4b2STejun Heo 						 struct work_struct, entry);
775affee4b2STejun Heo 
776affee4b2STejun Heo 			if (likely(!(*work_data_bits(work) &
777affee4b2STejun Heo 				     WORK_STRUCT_LINKED))) {
778affee4b2STejun Heo 				/* optimization path, not strictly necessary */
779affee4b2STejun Heo 				process_one_work(worker, work);
780affee4b2STejun Heo 				if (unlikely(!list_empty(&worker->scheduled)))
781affee4b2STejun Heo 					process_scheduled_works(worker);
782affee4b2STejun Heo 			} else {
783affee4b2STejun Heo 				move_linked_works(work, &worker->scheduled,
784affee4b2STejun Heo 						  NULL);
785affee4b2STejun Heo 				process_scheduled_works(worker);
786affee4b2STejun Heo 			}
787affee4b2STejun Heo 		}
788affee4b2STejun Heo 
789affee4b2STejun Heo 		spin_unlock_irq(&cwq->lock);
7901da177e4SLinus Torvalds 	}
7913af24433SOleg Nesterov 
7921da177e4SLinus Torvalds 	return 0;
7931da177e4SLinus Torvalds }
7941da177e4SLinus Torvalds 
795fc2e4d70SOleg Nesterov struct wq_barrier {
796fc2e4d70SOleg Nesterov 	struct work_struct	work;
797fc2e4d70SOleg Nesterov 	struct completion	done;
798fc2e4d70SOleg Nesterov };
799fc2e4d70SOleg Nesterov 
800fc2e4d70SOleg Nesterov static void wq_barrier_func(struct work_struct *work)
801fc2e4d70SOleg Nesterov {
802fc2e4d70SOleg Nesterov 	struct wq_barrier *barr = container_of(work, struct wq_barrier, work);
803fc2e4d70SOleg Nesterov 	complete(&barr->done);
804fc2e4d70SOleg Nesterov }
805fc2e4d70SOleg Nesterov 
8064690c4abSTejun Heo /**
8074690c4abSTejun Heo  * insert_wq_barrier - insert a barrier work
8084690c4abSTejun Heo  * @cwq: cwq to insert barrier into
8094690c4abSTejun Heo  * @barr: wq_barrier to insert
810affee4b2STejun Heo  * @target: target work to attach @barr to
811affee4b2STejun Heo  * @worker: worker currently executing @target, NULL if @target is not executing
8124690c4abSTejun Heo  *
813affee4b2STejun Heo  * @barr is linked to @target such that @barr is completed only after
814affee4b2STejun Heo  * @target finishes execution.  Please note that the ordering
815affee4b2STejun Heo  * guarantee is observed only with respect to @target and on the local
816affee4b2STejun Heo  * cpu.
817affee4b2STejun Heo  *
818affee4b2STejun Heo  * Currently, a queued barrier can't be canceled.  This is because
819affee4b2STejun Heo  * try_to_grab_pending() can't determine whether the work to be
820affee4b2STejun Heo  * grabbed is at the head of the queue and thus can't clear LINKED
821affee4b2STejun Heo  * flag of the previous work while there must be a valid next work
822affee4b2STejun Heo  * after a work with LINKED flag set.
823affee4b2STejun Heo  *
824affee4b2STejun Heo  * Note that when @worker is non-NULL, @target may be modified
825affee4b2STejun Heo  * underneath us, so we can't reliably determine cwq from @target.
8264690c4abSTejun Heo  *
8274690c4abSTejun Heo  * CONTEXT:
8284690c4abSTejun Heo  * spin_lock_irq(cwq->lock).
8294690c4abSTejun Heo  */
83083c22520SOleg Nesterov static void insert_wq_barrier(struct cpu_workqueue_struct *cwq,
831affee4b2STejun Heo 			      struct wq_barrier *barr,
832affee4b2STejun Heo 			      struct work_struct *target, struct worker *worker)
833fc2e4d70SOleg Nesterov {
834affee4b2STejun Heo 	struct list_head *head;
835affee4b2STejun Heo 	unsigned int linked = 0;
836affee4b2STejun Heo 
837dc186ad7SThomas Gleixner 	/*
838dc186ad7SThomas Gleixner 	 * debugobject calls are safe here even with cwq->lock locked
839dc186ad7SThomas Gleixner 	 * as we know for sure that this will not trigger any of the
840dc186ad7SThomas Gleixner 	 * checks and call back into the fixup functions where we
841dc186ad7SThomas Gleixner 	 * might deadlock.
842dc186ad7SThomas Gleixner 	 */
843dc186ad7SThomas Gleixner 	INIT_WORK_ON_STACK(&barr->work, wq_barrier_func);
84422df02bbSTejun Heo 	__set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(&barr->work));
845fc2e4d70SOleg Nesterov 	init_completion(&barr->done);
84683c22520SOleg Nesterov 
847affee4b2STejun Heo 	/*
848affee4b2STejun Heo 	 * If @target is currently being executed, schedule the
849affee4b2STejun Heo 	 * barrier to the worker; otherwise, put it after @target.
850affee4b2STejun Heo 	 */
851affee4b2STejun Heo 	if (worker)
852affee4b2STejun Heo 		head = worker->scheduled.next;
853affee4b2STejun Heo 	else {
854affee4b2STejun Heo 		unsigned long *bits = work_data_bits(target);
855affee4b2STejun Heo 
856affee4b2STejun Heo 		head = target->entry.next;
857affee4b2STejun Heo 		/* there can already be other linked works, inherit and set */
858affee4b2STejun Heo 		linked = *bits & WORK_STRUCT_LINKED;
859affee4b2STejun Heo 		__set_bit(WORK_STRUCT_LINKED_BIT, bits);
860affee4b2STejun Heo 	}
861affee4b2STejun Heo 
862dc186ad7SThomas Gleixner 	debug_work_activate(&barr->work);
863affee4b2STejun Heo 	insert_work(cwq, &barr->work, head,
864affee4b2STejun Heo 		    work_color_to_flags(WORK_NO_COLOR) | linked);
865fc2e4d70SOleg Nesterov }
866fc2e4d70SOleg Nesterov 
86773f53c4aSTejun Heo /**
86873f53c4aSTejun Heo  * flush_workqueue_prep_cwqs - prepare cwqs for workqueue flushing
86973f53c4aSTejun Heo  * @wq: workqueue being flushed
87073f53c4aSTejun Heo  * @flush_color: new flush color, < 0 for no-op
87173f53c4aSTejun Heo  * @work_color: new work color, < 0 for no-op
87273f53c4aSTejun Heo  *
87373f53c4aSTejun Heo  * Prepare cwqs for workqueue flushing.
87473f53c4aSTejun Heo  *
87573f53c4aSTejun Heo  * If @flush_color is non-negative, flush_color on all cwqs should be
87673f53c4aSTejun Heo  * -1.  If no cwq has in-flight commands at the specified color, all
87773f53c4aSTejun Heo  * cwq->flush_color's stay at -1 and %false is returned.  If any cwq
87873f53c4aSTejun Heo  * has in flight commands, its cwq->flush_color is set to
87973f53c4aSTejun Heo  * @flush_color, @wq->nr_cwqs_to_flush is updated accordingly, cwq
88073f53c4aSTejun Heo  * wakeup logic is armed and %true is returned.
88173f53c4aSTejun Heo  *
88273f53c4aSTejun Heo  * The caller should have initialized @wq->first_flusher prior to
88373f53c4aSTejun Heo  * calling this function with non-negative @flush_color.  If
88473f53c4aSTejun Heo  * @flush_color is negative, no flush color update is done and %false
88573f53c4aSTejun Heo  * is returned.
88673f53c4aSTejun Heo  *
88773f53c4aSTejun Heo  * If @work_color is non-negative, all cwqs should have the same
88873f53c4aSTejun Heo  * work_color which is previous to @work_color and all will be
88973f53c4aSTejun Heo  * advanced to @work_color.
89073f53c4aSTejun Heo  *
89173f53c4aSTejun Heo  * CONTEXT:
89273f53c4aSTejun Heo  * mutex_lock(wq->flush_mutex).
89373f53c4aSTejun Heo  *
89473f53c4aSTejun Heo  * RETURNS:
89573f53c4aSTejun Heo  * %true if @flush_color >= 0 and there's something to flush.  %false
89673f53c4aSTejun Heo  * otherwise.
89773f53c4aSTejun Heo  */
89873f53c4aSTejun Heo static bool flush_workqueue_prep_cwqs(struct workqueue_struct *wq,
89973f53c4aSTejun Heo 				      int flush_color, int work_color)
9001da177e4SLinus Torvalds {
90173f53c4aSTejun Heo 	bool wait = false;
90273f53c4aSTejun Heo 	unsigned int cpu;
9031da177e4SLinus Torvalds 
90473f53c4aSTejun Heo 	if (flush_color >= 0) {
90573f53c4aSTejun Heo 		BUG_ON(atomic_read(&wq->nr_cwqs_to_flush));
90673f53c4aSTejun Heo 		atomic_set(&wq->nr_cwqs_to_flush, 1);
90773f53c4aSTejun Heo 	}
90873f53c4aSTejun Heo 
90973f53c4aSTejun Heo 	for_each_possible_cpu(cpu) {
91073f53c4aSTejun Heo 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
9112355b70fSLai Jiangshan 
91283c22520SOleg Nesterov 		spin_lock_irq(&cwq->lock);
91373f53c4aSTejun Heo 
91473f53c4aSTejun Heo 		if (flush_color >= 0) {
91573f53c4aSTejun Heo 			BUG_ON(cwq->flush_color != -1);
91673f53c4aSTejun Heo 
91773f53c4aSTejun Heo 			if (cwq->nr_in_flight[flush_color]) {
91873f53c4aSTejun Heo 				cwq->flush_color = flush_color;
91973f53c4aSTejun Heo 				atomic_inc(&wq->nr_cwqs_to_flush);
92073f53c4aSTejun Heo 				wait = true;
92183c22520SOleg Nesterov 			}
92273f53c4aSTejun Heo 		}
92373f53c4aSTejun Heo 
92473f53c4aSTejun Heo 		if (work_color >= 0) {
92573f53c4aSTejun Heo 			BUG_ON(work_color != work_next_color(cwq->work_color));
92673f53c4aSTejun Heo 			cwq->work_color = work_color;
92773f53c4aSTejun Heo 		}
92873f53c4aSTejun Heo 
92983c22520SOleg Nesterov 		spin_unlock_irq(&cwq->lock);
930dc186ad7SThomas Gleixner 	}
93114441960SOleg Nesterov 
93273f53c4aSTejun Heo 	if (flush_color >= 0 && atomic_dec_and_test(&wq->nr_cwqs_to_flush))
93373f53c4aSTejun Heo 		complete(&wq->first_flusher->done);
93473f53c4aSTejun Heo 
93573f53c4aSTejun Heo 	return wait;
93683c22520SOleg Nesterov }
9371da177e4SLinus Torvalds 
9380fcb78c2SRolf Eike Beer /**
9391da177e4SLinus Torvalds  * flush_workqueue - ensure that any scheduled work has run to completion.
9400fcb78c2SRolf Eike Beer  * @wq: workqueue to flush
9411da177e4SLinus Torvalds  *
9421da177e4SLinus Torvalds  * Forces execution of the workqueue and blocks until its completion.
9431da177e4SLinus Torvalds  * This is typically used in driver shutdown handlers.
9441da177e4SLinus Torvalds  *
945fc2e4d70SOleg Nesterov  * We sleep until all works which were queued on entry have been handled,
946fc2e4d70SOleg Nesterov  * but we are not livelocked by new incoming ones.
9471da177e4SLinus Torvalds  */
9487ad5b3a5SHarvey Harrison void flush_workqueue(struct workqueue_struct *wq)
9491da177e4SLinus Torvalds {
95073f53c4aSTejun Heo 	struct wq_flusher this_flusher = {
95173f53c4aSTejun Heo 		.list = LIST_HEAD_INIT(this_flusher.list),
95273f53c4aSTejun Heo 		.flush_color = -1,
95373f53c4aSTejun Heo 		.done = COMPLETION_INITIALIZER_ONSTACK(this_flusher.done),
95473f53c4aSTejun Heo 	};
95573f53c4aSTejun Heo 	int next_color;
956b1f4ec17SOleg Nesterov 
9573295f0efSIngo Molnar 	lock_map_acquire(&wq->lockdep_map);
9583295f0efSIngo Molnar 	lock_map_release(&wq->lockdep_map);
95973f53c4aSTejun Heo 
96073f53c4aSTejun Heo 	mutex_lock(&wq->flush_mutex);
96173f53c4aSTejun Heo 
96273f53c4aSTejun Heo 	/*
96373f53c4aSTejun Heo 	 * Start-to-wait phase
96473f53c4aSTejun Heo 	 */
96573f53c4aSTejun Heo 	next_color = work_next_color(wq->work_color);
96673f53c4aSTejun Heo 
96773f53c4aSTejun Heo 	if (next_color != wq->flush_color) {
96873f53c4aSTejun Heo 		/*
96973f53c4aSTejun Heo 		 * Color space is not full.  The current work_color
97073f53c4aSTejun Heo 		 * becomes our flush_color and work_color is advanced
97173f53c4aSTejun Heo 		 * by one.
97273f53c4aSTejun Heo 		 */
97373f53c4aSTejun Heo 		BUG_ON(!list_empty(&wq->flusher_overflow));
97473f53c4aSTejun Heo 		this_flusher.flush_color = wq->work_color;
97573f53c4aSTejun Heo 		wq->work_color = next_color;
97673f53c4aSTejun Heo 
97773f53c4aSTejun Heo 		if (!wq->first_flusher) {
97873f53c4aSTejun Heo 			/* no flush in progress, become the first flusher */
97973f53c4aSTejun Heo 			BUG_ON(wq->flush_color != this_flusher.flush_color);
98073f53c4aSTejun Heo 
98173f53c4aSTejun Heo 			wq->first_flusher = &this_flusher;
98273f53c4aSTejun Heo 
98373f53c4aSTejun Heo 			if (!flush_workqueue_prep_cwqs(wq, wq->flush_color,
98473f53c4aSTejun Heo 						       wq->work_color)) {
98573f53c4aSTejun Heo 				/* nothing to flush, done */
98673f53c4aSTejun Heo 				wq->flush_color = next_color;
98773f53c4aSTejun Heo 				wq->first_flusher = NULL;
98873f53c4aSTejun Heo 				goto out_unlock;
98973f53c4aSTejun Heo 			}
99073f53c4aSTejun Heo 		} else {
99173f53c4aSTejun Heo 			/* wait in queue */
99273f53c4aSTejun Heo 			BUG_ON(wq->flush_color == this_flusher.flush_color);
99373f53c4aSTejun Heo 			list_add_tail(&this_flusher.list, &wq->flusher_queue);
99473f53c4aSTejun Heo 			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
99573f53c4aSTejun Heo 		}
99673f53c4aSTejun Heo 	} else {
99773f53c4aSTejun Heo 		/*
99873f53c4aSTejun Heo 		 * Oops, color space is full, wait on overflow queue.
99973f53c4aSTejun Heo 		 * The next flush completion will assign us
100073f53c4aSTejun Heo 		 * flush_color and transfer to flusher_queue.
100173f53c4aSTejun Heo 		 */
100273f53c4aSTejun Heo 		list_add_tail(&this_flusher.list, &wq->flusher_overflow);
100373f53c4aSTejun Heo 	}
100473f53c4aSTejun Heo 
100573f53c4aSTejun Heo 	mutex_unlock(&wq->flush_mutex);
100673f53c4aSTejun Heo 
100773f53c4aSTejun Heo 	wait_for_completion(&this_flusher.done);
100873f53c4aSTejun Heo 
100973f53c4aSTejun Heo 	/*
101073f53c4aSTejun Heo 	 * Wake-up-and-cascade phase
101173f53c4aSTejun Heo 	 *
101273f53c4aSTejun Heo 	 * First flushers are responsible for cascading flushes and
101373f53c4aSTejun Heo 	 * handling overflow.  Non-first flushers can simply return.
101473f53c4aSTejun Heo 	 */
101573f53c4aSTejun Heo 	if (wq->first_flusher != &this_flusher)
101673f53c4aSTejun Heo 		return;
101773f53c4aSTejun Heo 
101873f53c4aSTejun Heo 	mutex_lock(&wq->flush_mutex);
101973f53c4aSTejun Heo 
102073f53c4aSTejun Heo 	wq->first_flusher = NULL;
102173f53c4aSTejun Heo 
102273f53c4aSTejun Heo 	BUG_ON(!list_empty(&this_flusher.list));
102373f53c4aSTejun Heo 	BUG_ON(wq->flush_color != this_flusher.flush_color);
102473f53c4aSTejun Heo 
102573f53c4aSTejun Heo 	while (true) {
102673f53c4aSTejun Heo 		struct wq_flusher *next, *tmp;
102773f53c4aSTejun Heo 
102873f53c4aSTejun Heo 		/* complete all the flushers sharing the current flush color */
102973f53c4aSTejun Heo 		list_for_each_entry_safe(next, tmp, &wq->flusher_queue, list) {
103073f53c4aSTejun Heo 			if (next->flush_color != wq->flush_color)
103173f53c4aSTejun Heo 				break;
103273f53c4aSTejun Heo 			list_del_init(&next->list);
103373f53c4aSTejun Heo 			complete(&next->done);
103473f53c4aSTejun Heo 		}
103573f53c4aSTejun Heo 
103673f53c4aSTejun Heo 		BUG_ON(!list_empty(&wq->flusher_overflow) &&
103773f53c4aSTejun Heo 		       wq->flush_color != work_next_color(wq->work_color));
103873f53c4aSTejun Heo 
103973f53c4aSTejun Heo 		/* this flush_color is finished, advance by one */
104073f53c4aSTejun Heo 		wq->flush_color = work_next_color(wq->flush_color);
104173f53c4aSTejun Heo 
104273f53c4aSTejun Heo 		/* one color has been freed, handle overflow queue */
104373f53c4aSTejun Heo 		if (!list_empty(&wq->flusher_overflow)) {
104473f53c4aSTejun Heo 			/*
104573f53c4aSTejun Heo 			 * Assign the same color to all overflowed
104673f53c4aSTejun Heo 			 * flushers, advance work_color and append to
104773f53c4aSTejun Heo 			 * flusher_queue.  This is the start-to-wait
104873f53c4aSTejun Heo 			 * phase for these overflowed flushers.
104973f53c4aSTejun Heo 			 */
105073f53c4aSTejun Heo 			list_for_each_entry(tmp, &wq->flusher_overflow, list)
105173f53c4aSTejun Heo 				tmp->flush_color = wq->work_color;
105273f53c4aSTejun Heo 
105373f53c4aSTejun Heo 			wq->work_color = work_next_color(wq->work_color);
105473f53c4aSTejun Heo 
105573f53c4aSTejun Heo 			list_splice_tail_init(&wq->flusher_overflow,
105673f53c4aSTejun Heo 					      &wq->flusher_queue);
105773f53c4aSTejun Heo 			flush_workqueue_prep_cwqs(wq, -1, wq->work_color);
105873f53c4aSTejun Heo 		}
105973f53c4aSTejun Heo 
106073f53c4aSTejun Heo 		if (list_empty(&wq->flusher_queue)) {
106173f53c4aSTejun Heo 			BUG_ON(wq->flush_color != wq->work_color);
106273f53c4aSTejun Heo 			break;
106373f53c4aSTejun Heo 		}
106473f53c4aSTejun Heo 
106573f53c4aSTejun Heo 		/*
106673f53c4aSTejun Heo 		 * Need to flush more colors.  Make the next flusher
106773f53c4aSTejun Heo 		 * the new first flusher and arm cwqs.
106873f53c4aSTejun Heo 		 */
106973f53c4aSTejun Heo 		BUG_ON(wq->flush_color == wq->work_color);
107073f53c4aSTejun Heo 		BUG_ON(wq->flush_color != next->flush_color);
107173f53c4aSTejun Heo 
107273f53c4aSTejun Heo 		list_del_init(&next->list);
107373f53c4aSTejun Heo 		wq->first_flusher = next;
107473f53c4aSTejun Heo 
107573f53c4aSTejun Heo 		if (flush_workqueue_prep_cwqs(wq, wq->flush_color, -1))
107673f53c4aSTejun Heo 			break;
107773f53c4aSTejun Heo 
107873f53c4aSTejun Heo 		/*
107973f53c4aSTejun Heo 		 * Meh... this color is already done, clear first
108073f53c4aSTejun Heo 		 * flusher and repeat cascading.
108173f53c4aSTejun Heo 		 */
108273f53c4aSTejun Heo 		wq->first_flusher = NULL;
108373f53c4aSTejun Heo 	}
108473f53c4aSTejun Heo 
108573f53c4aSTejun Heo out_unlock:
108673f53c4aSTejun Heo 	mutex_unlock(&wq->flush_mutex);
10871da177e4SLinus Torvalds }
1088ae90dd5dSDave Jones EXPORT_SYMBOL_GPL(flush_workqueue);
10891da177e4SLinus Torvalds 
1090db700897SOleg Nesterov /**
1091db700897SOleg Nesterov  * flush_work - block until a work_struct's callback has terminated
1092db700897SOleg Nesterov  * @work: the work which is to be flushed
1093db700897SOleg Nesterov  *
1094a67da70dSOleg Nesterov  * Returns false if @work has already terminated.
1095a67da70dSOleg Nesterov  *
1096db700897SOleg Nesterov  * It is expected that, prior to calling flush_work(), the caller has
1097db700897SOleg Nesterov  * arranged for the work to not be requeued, otherwise it doesn't make
1098db700897SOleg Nesterov  * sense to use this function.
1099db700897SOleg Nesterov  */
1100db700897SOleg Nesterov int flush_work(struct work_struct *work)
1101db700897SOleg Nesterov {
1102affee4b2STejun Heo 	struct worker *worker = NULL;
1103db700897SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
1104db700897SOleg Nesterov 	struct wq_barrier barr;
1105db700897SOleg Nesterov 
1106db700897SOleg Nesterov 	might_sleep();
1107db700897SOleg Nesterov 	cwq = get_wq_data(work);
1108db700897SOleg Nesterov 	if (!cwq)
1109db700897SOleg Nesterov 		return 0;
1110db700897SOleg Nesterov 
11113295f0efSIngo Molnar 	lock_map_acquire(&cwq->wq->lockdep_map);
11123295f0efSIngo Molnar 	lock_map_release(&cwq->wq->lockdep_map);
1113a67da70dSOleg Nesterov 
1114db700897SOleg Nesterov 	spin_lock_irq(&cwq->lock);
1115db700897SOleg Nesterov 	if (!list_empty(&work->entry)) {
1116db700897SOleg Nesterov 		/*
1117db700897SOleg Nesterov 		 * See the comment near try_to_grab_pending()->smp_rmb().
1118db700897SOleg Nesterov 		 * If it was re-queued under us we are not going to wait.
1119db700897SOleg Nesterov 		 */
1120db700897SOleg Nesterov 		smp_rmb();
1121db700897SOleg Nesterov 		if (unlikely(cwq != get_wq_data(work)))
11224690c4abSTejun Heo 			goto already_gone;
1123db700897SOleg Nesterov 	} else {
1124affee4b2STejun Heo 		if (cwq->worker && cwq->worker->current_work == work)
1125affee4b2STejun Heo 			worker = cwq->worker;
1126affee4b2STejun Heo 		if (!worker)
11274690c4abSTejun Heo 			goto already_gone;
1128db700897SOleg Nesterov 	}
1129db700897SOleg Nesterov 
1130affee4b2STejun Heo 	insert_wq_barrier(cwq, &barr, work, worker);
11314690c4abSTejun Heo 	spin_unlock_irq(&cwq->lock);
1132db700897SOleg Nesterov 	wait_for_completion(&barr.done);
1133dc186ad7SThomas Gleixner 	destroy_work_on_stack(&barr.work);
1134db700897SOleg Nesterov 	return 1;
11354690c4abSTejun Heo already_gone:
11364690c4abSTejun Heo 	spin_unlock_irq(&cwq->lock);
11374690c4abSTejun Heo 	return 0;
1138db700897SOleg Nesterov }
1139db700897SOleg Nesterov EXPORT_SYMBOL_GPL(flush_work);
1140db700897SOleg Nesterov 
11416e84d644SOleg Nesterov /*
11421f1f642eSOleg Nesterov  * Upon a successful return (>= 0), the caller "owns" WORK_STRUCT_PENDING bit,
11436e84d644SOleg Nesterov  * so this work can't be re-armed in any way.
11446e84d644SOleg Nesterov  */
11456e84d644SOleg Nesterov static int try_to_grab_pending(struct work_struct *work)
11466e84d644SOleg Nesterov {
11476e84d644SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
11481f1f642eSOleg Nesterov 	int ret = -1;
11496e84d644SOleg Nesterov 
115022df02bbSTejun Heo 	if (!test_and_set_bit(WORK_STRUCT_PENDING_BIT, work_data_bits(work)))
11511f1f642eSOleg Nesterov 		return 0;
11526e84d644SOleg Nesterov 
11536e84d644SOleg Nesterov 	/*
11546e84d644SOleg Nesterov 	 * The queueing is in progress, or it is already queued. Try to
11556e84d644SOleg Nesterov 	 * steal it from ->worklist without clearing WORK_STRUCT_PENDING.
11566e84d644SOleg Nesterov 	 */
11576e84d644SOleg Nesterov 
11586e84d644SOleg Nesterov 	cwq = get_wq_data(work);
11596e84d644SOleg Nesterov 	if (!cwq)
11606e84d644SOleg Nesterov 		return ret;
11616e84d644SOleg Nesterov 
11626e84d644SOleg Nesterov 	spin_lock_irq(&cwq->lock);
11636e84d644SOleg Nesterov 	if (!list_empty(&work->entry)) {
11646e84d644SOleg Nesterov 		/*
11656e84d644SOleg Nesterov 		 * This work is queued, but perhaps we locked the wrong cwq.
11666e84d644SOleg Nesterov 		 * In that case we must see the new value after rmb(), see
11676e84d644SOleg Nesterov 		 * insert_work()->wmb().
11686e84d644SOleg Nesterov 		 */
11696e84d644SOleg Nesterov 		smp_rmb();
11706e84d644SOleg Nesterov 		if (cwq == get_wq_data(work)) {
1171dc186ad7SThomas Gleixner 			debug_work_deactivate(work);
11726e84d644SOleg Nesterov 			list_del_init(&work->entry);
117373f53c4aSTejun Heo 			cwq_dec_nr_in_flight(cwq, get_work_color(work));
11746e84d644SOleg Nesterov 			ret = 1;
11756e84d644SOleg Nesterov 		}
11766e84d644SOleg Nesterov 	}
11776e84d644SOleg Nesterov 	spin_unlock_irq(&cwq->lock);
11786e84d644SOleg Nesterov 
11796e84d644SOleg Nesterov 	return ret;
11806e84d644SOleg Nesterov }
11816e84d644SOleg Nesterov 
11826e84d644SOleg Nesterov static void wait_on_cpu_work(struct cpu_workqueue_struct *cwq,
1183b89deed3SOleg Nesterov 				struct work_struct *work)
1184b89deed3SOleg Nesterov {
1185b89deed3SOleg Nesterov 	struct wq_barrier barr;
1186affee4b2STejun Heo 	struct worker *worker;
1187b89deed3SOleg Nesterov 
1188b89deed3SOleg Nesterov 	spin_lock_irq(&cwq->lock);
1189affee4b2STejun Heo 
1190affee4b2STejun Heo 	worker = NULL;
1191c34056a3STejun Heo 	if (unlikely(cwq->worker && cwq->worker->current_work == work)) {
1192affee4b2STejun Heo 		worker = cwq->worker;
1193affee4b2STejun Heo 		insert_wq_barrier(cwq, &barr, work, worker);
1194b89deed3SOleg Nesterov 	}
1195affee4b2STejun Heo 
1196b89deed3SOleg Nesterov 	spin_unlock_irq(&cwq->lock);
1197b89deed3SOleg Nesterov 
1198affee4b2STejun Heo 	if (unlikely(worker)) {
1199b89deed3SOleg Nesterov 		wait_for_completion(&barr.done);
1200dc186ad7SThomas Gleixner 		destroy_work_on_stack(&barr.work);
1201dc186ad7SThomas Gleixner 	}
1202b89deed3SOleg Nesterov }
1203b89deed3SOleg Nesterov 
12046e84d644SOleg Nesterov static void wait_on_work(struct work_struct *work)
1205b89deed3SOleg Nesterov {
1206b89deed3SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
120728e53bddSOleg Nesterov 	struct workqueue_struct *wq;
1208b1f4ec17SOleg Nesterov 	int cpu;
1209b89deed3SOleg Nesterov 
1210f293ea92SOleg Nesterov 	might_sleep();
1211f293ea92SOleg Nesterov 
12123295f0efSIngo Molnar 	lock_map_acquire(&work->lockdep_map);
12133295f0efSIngo Molnar 	lock_map_release(&work->lockdep_map);
12144e6045f1SJohannes Berg 
1215b89deed3SOleg Nesterov 	cwq = get_wq_data(work);
1216b89deed3SOleg Nesterov 	if (!cwq)
12173af24433SOleg Nesterov 		return;
1218b89deed3SOleg Nesterov 
121928e53bddSOleg Nesterov 	wq = cwq->wq;
122028e53bddSOleg Nesterov 
12211537663fSTejun Heo 	for_each_possible_cpu(cpu)
12224690c4abSTejun Heo 		wait_on_cpu_work(get_cwq(cpu, wq), work);
12236e84d644SOleg Nesterov }
12246e84d644SOleg Nesterov 
12251f1f642eSOleg Nesterov static int __cancel_work_timer(struct work_struct *work,
12261f1f642eSOleg Nesterov 				struct timer_list* timer)
12271f1f642eSOleg Nesterov {
12281f1f642eSOleg Nesterov 	int ret;
12291f1f642eSOleg Nesterov 
12301f1f642eSOleg Nesterov 	do {
12311f1f642eSOleg Nesterov 		ret = (timer && likely(del_timer(timer)));
12321f1f642eSOleg Nesterov 		if (!ret)
12331f1f642eSOleg Nesterov 			ret = try_to_grab_pending(work);
12341f1f642eSOleg Nesterov 		wait_on_work(work);
12351f1f642eSOleg Nesterov 	} while (unlikely(ret < 0));
12361f1f642eSOleg Nesterov 
12374d707b9fSOleg Nesterov 	clear_wq_data(work);
12381f1f642eSOleg Nesterov 	return ret;
12391f1f642eSOleg Nesterov }
12401f1f642eSOleg Nesterov 
12416e84d644SOleg Nesterov /**
12426e84d644SOleg Nesterov  * cancel_work_sync - block until a work_struct's callback has terminated
12436e84d644SOleg Nesterov  * @work: the work which is to be flushed
12446e84d644SOleg Nesterov  *
12451f1f642eSOleg Nesterov  * Returns true if @work was pending.
12461f1f642eSOleg Nesterov  *
12476e84d644SOleg Nesterov  * cancel_work_sync() will cancel the work if it is queued. If the work's
12486e84d644SOleg Nesterov  * callback appears to be running, cancel_work_sync() will block until it
12496e84d644SOleg Nesterov  * has completed.
12506e84d644SOleg Nesterov  *
12516e84d644SOleg Nesterov  * It is possible to use this function if the work re-queues itself. It can
12526e84d644SOleg Nesterov  * cancel the work even if it migrates to another workqueue, however in that
12536e84d644SOleg Nesterov  * case it only guarantees that work->func() has completed on the last queued
12546e84d644SOleg Nesterov  * workqueue.
12556e84d644SOleg Nesterov  *
12566e84d644SOleg Nesterov  * cancel_work_sync(&delayed_work->work) should be used only if ->timer is not
12576e84d644SOleg Nesterov  * pending, otherwise it goes into a busy-wait loop until the timer expires.
12586e84d644SOleg Nesterov  *
12596e84d644SOleg Nesterov  * The caller must ensure that workqueue_struct on which this work was last
12606e84d644SOleg Nesterov  * queued can't be destroyed before this function returns.
12616e84d644SOleg Nesterov  */
12621f1f642eSOleg Nesterov int cancel_work_sync(struct work_struct *work)
12636e84d644SOleg Nesterov {
12641f1f642eSOleg Nesterov 	return __cancel_work_timer(work, NULL);
1265b89deed3SOleg Nesterov }
126628e53bddSOleg Nesterov EXPORT_SYMBOL_GPL(cancel_work_sync);
1267b89deed3SOleg Nesterov 
12686e84d644SOleg Nesterov /**
1269f5a421a4SOleg Nesterov  * cancel_delayed_work_sync - reliably kill off a delayed work.
12706e84d644SOleg Nesterov  * @dwork: the delayed work struct
12716e84d644SOleg Nesterov  *
12721f1f642eSOleg Nesterov  * Returns true if @dwork was pending.
12731f1f642eSOleg Nesterov  *
12746e84d644SOleg Nesterov  * It is possible to use this function if @dwork rearms itself via queue_work()
12756e84d644SOleg Nesterov  * or queue_delayed_work(). See also the comment for cancel_work_sync().
12766e84d644SOleg Nesterov  */
12771f1f642eSOleg Nesterov int cancel_delayed_work_sync(struct delayed_work *dwork)
12786e84d644SOleg Nesterov {
12791f1f642eSOleg Nesterov 	return __cancel_work_timer(&dwork->work, &dwork->timer);
12806e84d644SOleg Nesterov }
1281f5a421a4SOleg Nesterov EXPORT_SYMBOL(cancel_delayed_work_sync);
12821da177e4SLinus Torvalds 
12836e84d644SOleg Nesterov static struct workqueue_struct *keventd_wq __read_mostly;
12841da177e4SLinus Torvalds 
12850fcb78c2SRolf Eike Beer /**
12860fcb78c2SRolf Eike Beer  * schedule_work - put work task in global workqueue
12870fcb78c2SRolf Eike Beer  * @work: job to be done
12880fcb78c2SRolf Eike Beer  *
12895b0f437dSBart Van Assche  * Returns zero if @work was already on the kernel-global workqueue and
12905b0f437dSBart Van Assche  * non-zero otherwise.
12915b0f437dSBart Van Assche  *
12925b0f437dSBart Van Assche  * This puts a job in the kernel-global workqueue if it was not already
12935b0f437dSBart Van Assche  * queued and leaves it in the same position on the kernel-global
12945b0f437dSBart Van Assche  * workqueue otherwise.
12950fcb78c2SRolf Eike Beer  */
12967ad5b3a5SHarvey Harrison int schedule_work(struct work_struct *work)
12971da177e4SLinus Torvalds {
12981da177e4SLinus Torvalds 	return queue_work(keventd_wq, work);
12991da177e4SLinus Torvalds }
1300ae90dd5dSDave Jones EXPORT_SYMBOL(schedule_work);
13011da177e4SLinus Torvalds 
1302c1a220e7SZhang Rui /*
1303c1a220e7SZhang Rui  * schedule_work_on - put work task on a specific cpu
1304c1a220e7SZhang Rui  * @cpu: cpu to put the work task on
1305c1a220e7SZhang Rui  * @work: job to be done
1306c1a220e7SZhang Rui  *
1307c1a220e7SZhang Rui  * This puts a job on a specific cpu
1308c1a220e7SZhang Rui  */
1309c1a220e7SZhang Rui int schedule_work_on(int cpu, struct work_struct *work)
1310c1a220e7SZhang Rui {
1311c1a220e7SZhang Rui 	return queue_work_on(cpu, keventd_wq, work);
1312c1a220e7SZhang Rui }
1313c1a220e7SZhang Rui EXPORT_SYMBOL(schedule_work_on);
1314c1a220e7SZhang Rui 
13150fcb78c2SRolf Eike Beer /**
13160fcb78c2SRolf Eike Beer  * schedule_delayed_work - put work task in global workqueue after delay
131752bad64dSDavid Howells  * @dwork: job to be done
131852bad64dSDavid Howells  * @delay: number of jiffies to wait or 0 for immediate execution
13190fcb78c2SRolf Eike Beer  *
13200fcb78c2SRolf Eike Beer  * After waiting for a given time this puts a job in the kernel-global
13210fcb78c2SRolf Eike Beer  * workqueue.
13220fcb78c2SRolf Eike Beer  */
13237ad5b3a5SHarvey Harrison int schedule_delayed_work(struct delayed_work *dwork,
132482f67cd9SIngo Molnar 					unsigned long delay)
13251da177e4SLinus Torvalds {
132652bad64dSDavid Howells 	return queue_delayed_work(keventd_wq, dwork, delay);
13271da177e4SLinus Torvalds }
1328ae90dd5dSDave Jones EXPORT_SYMBOL(schedule_delayed_work);
13291da177e4SLinus Torvalds 
13300fcb78c2SRolf Eike Beer /**
13318c53e463SLinus Torvalds  * flush_delayed_work - block until a dwork_struct's callback has terminated
13328c53e463SLinus Torvalds  * @dwork: the delayed work which is to be flushed
13338c53e463SLinus Torvalds  *
13348c53e463SLinus Torvalds  * Any timeout is cancelled, and any pending work is run immediately.
13358c53e463SLinus Torvalds  */
13368c53e463SLinus Torvalds void flush_delayed_work(struct delayed_work *dwork)
13378c53e463SLinus Torvalds {
13388c53e463SLinus Torvalds 	if (del_timer_sync(&dwork->timer)) {
13394690c4abSTejun Heo 		__queue_work(get_cpu(), get_wq_data(&dwork->work)->wq,
13404690c4abSTejun Heo 			     &dwork->work);
13418c53e463SLinus Torvalds 		put_cpu();
13428c53e463SLinus Torvalds 	}
13438c53e463SLinus Torvalds 	flush_work(&dwork->work);
13448c53e463SLinus Torvalds }
13458c53e463SLinus Torvalds EXPORT_SYMBOL(flush_delayed_work);
13468c53e463SLinus Torvalds 
13478c53e463SLinus Torvalds /**
13480fcb78c2SRolf Eike Beer  * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
13490fcb78c2SRolf Eike Beer  * @cpu: cpu to use
135052bad64dSDavid Howells  * @dwork: job to be done
13510fcb78c2SRolf Eike Beer  * @delay: number of jiffies to wait
13520fcb78c2SRolf Eike Beer  *
13530fcb78c2SRolf Eike Beer  * After waiting for a given time this puts a job in the kernel-global
13540fcb78c2SRolf Eike Beer  * workqueue on the specified CPU.
13550fcb78c2SRolf Eike Beer  */
13561da177e4SLinus Torvalds int schedule_delayed_work_on(int cpu,
135752bad64dSDavid Howells 			struct delayed_work *dwork, unsigned long delay)
13581da177e4SLinus Torvalds {
135952bad64dSDavid Howells 	return queue_delayed_work_on(cpu, keventd_wq, dwork, delay);
13601da177e4SLinus Torvalds }
1361ae90dd5dSDave Jones EXPORT_SYMBOL(schedule_delayed_work_on);
13621da177e4SLinus Torvalds 
1363b6136773SAndrew Morton /**
1364b6136773SAndrew Morton  * schedule_on_each_cpu - call a function on each online CPU from keventd
1365b6136773SAndrew Morton  * @func: the function to call
1366b6136773SAndrew Morton  *
1367b6136773SAndrew Morton  * Returns zero on success.
1368b6136773SAndrew Morton  * Returns -ve errno on failure.
1369b6136773SAndrew Morton  *
1370b6136773SAndrew Morton  * schedule_on_each_cpu() is very slow.
1371b6136773SAndrew Morton  */
137265f27f38SDavid Howells int schedule_on_each_cpu(work_func_t func)
137315316ba8SChristoph Lameter {
137415316ba8SChristoph Lameter 	int cpu;
137565a64464SAndi Kleen 	int orig = -1;
1376b6136773SAndrew Morton 	struct work_struct *works;
137715316ba8SChristoph Lameter 
1378b6136773SAndrew Morton 	works = alloc_percpu(struct work_struct);
1379b6136773SAndrew Morton 	if (!works)
138015316ba8SChristoph Lameter 		return -ENOMEM;
1381b6136773SAndrew Morton 
138295402b38SGautham R Shenoy 	get_online_cpus();
138393981800STejun Heo 
138493981800STejun Heo 	/*
138593981800STejun Heo 	 * When running in keventd don't schedule a work item on
138693981800STejun Heo 	 * itself.  Can just call directly because the work queue is
138793981800STejun Heo 	 * already bound.  This also is faster.
138893981800STejun Heo 	 */
138993981800STejun Heo 	if (current_is_keventd())
139093981800STejun Heo 		orig = raw_smp_processor_id();
139193981800STejun Heo 
139215316ba8SChristoph Lameter 	for_each_online_cpu(cpu) {
13939bfb1839SIngo Molnar 		struct work_struct *work = per_cpu_ptr(works, cpu);
13949bfb1839SIngo Molnar 
13959bfb1839SIngo Molnar 		INIT_WORK(work, func);
139693981800STejun Heo 		if (cpu != orig)
13978de6d308SOleg Nesterov 			schedule_work_on(cpu, work);
139815316ba8SChristoph Lameter 	}
139993981800STejun Heo 	if (orig >= 0)
140093981800STejun Heo 		func(per_cpu_ptr(works, orig));
140193981800STejun Heo 
140293981800STejun Heo 	for_each_online_cpu(cpu)
14038616a89aSOleg Nesterov 		flush_work(per_cpu_ptr(works, cpu));
140493981800STejun Heo 
140595402b38SGautham R Shenoy 	put_online_cpus();
1406b6136773SAndrew Morton 	free_percpu(works);
140715316ba8SChristoph Lameter 	return 0;
140815316ba8SChristoph Lameter }
140915316ba8SChristoph Lameter 
1410eef6a7d5SAlan Stern /**
1411eef6a7d5SAlan Stern  * flush_scheduled_work - ensure that any scheduled work has run to completion.
1412eef6a7d5SAlan Stern  *
1413eef6a7d5SAlan Stern  * Forces execution of the kernel-global workqueue and blocks until its
1414eef6a7d5SAlan Stern  * completion.
1415eef6a7d5SAlan Stern  *
1416eef6a7d5SAlan Stern  * Think twice before calling this function!  It's very easy to get into
1417eef6a7d5SAlan Stern  * trouble if you don't take great care.  Either of the following situations
1418eef6a7d5SAlan Stern  * will lead to deadlock:
1419eef6a7d5SAlan Stern  *
1420eef6a7d5SAlan Stern  *	One of the work items currently on the workqueue needs to acquire
1421eef6a7d5SAlan Stern  *	a lock held by your code or its caller.
1422eef6a7d5SAlan Stern  *
1423eef6a7d5SAlan Stern  *	Your code is running in the context of a work routine.
1424eef6a7d5SAlan Stern  *
1425eef6a7d5SAlan Stern  * They will be detected by lockdep when they occur, but the first might not
1426eef6a7d5SAlan Stern  * occur very often.  It depends on what work items are on the workqueue and
1427eef6a7d5SAlan Stern  * what locks they need, which you have no control over.
1428eef6a7d5SAlan Stern  *
1429eef6a7d5SAlan Stern  * In most situations flushing the entire workqueue is overkill; you merely
1430eef6a7d5SAlan Stern  * need to know that a particular work item isn't queued and isn't running.
1431eef6a7d5SAlan Stern  * In such cases you should use cancel_delayed_work_sync() or
1432eef6a7d5SAlan Stern  * cancel_work_sync() instead.
1433eef6a7d5SAlan Stern  */
14341da177e4SLinus Torvalds void flush_scheduled_work(void)
14351da177e4SLinus Torvalds {
14361da177e4SLinus Torvalds 	flush_workqueue(keventd_wq);
14371da177e4SLinus Torvalds }
1438ae90dd5dSDave Jones EXPORT_SYMBOL(flush_scheduled_work);
14391da177e4SLinus Torvalds 
14401da177e4SLinus Torvalds /**
14411fa44ecaSJames Bottomley  * execute_in_process_context - reliably execute the routine with user context
14421fa44ecaSJames Bottomley  * @fn:		the function to execute
14431fa44ecaSJames Bottomley  * @ew:		guaranteed storage for the execute work structure (must
14441fa44ecaSJames Bottomley  *		be available when the work executes)
14451fa44ecaSJames Bottomley  *
14461fa44ecaSJames Bottomley  * Executes the function immediately if process context is available,
14471fa44ecaSJames Bottomley  * otherwise schedules the function for delayed execution.
14481fa44ecaSJames Bottomley  *
14491fa44ecaSJames Bottomley  * Returns:	0 - function was executed
14501fa44ecaSJames Bottomley  *		1 - function was scheduled for execution
14511fa44ecaSJames Bottomley  */
145265f27f38SDavid Howells int execute_in_process_context(work_func_t fn, struct execute_work *ew)
14531fa44ecaSJames Bottomley {
14541fa44ecaSJames Bottomley 	if (!in_interrupt()) {
145565f27f38SDavid Howells 		fn(&ew->work);
14561fa44ecaSJames Bottomley 		return 0;
14571fa44ecaSJames Bottomley 	}
14581fa44ecaSJames Bottomley 
145965f27f38SDavid Howells 	INIT_WORK(&ew->work, fn);
14601fa44ecaSJames Bottomley 	schedule_work(&ew->work);
14611fa44ecaSJames Bottomley 
14621fa44ecaSJames Bottomley 	return 1;
14631fa44ecaSJames Bottomley }
14641fa44ecaSJames Bottomley EXPORT_SYMBOL_GPL(execute_in_process_context);
14651fa44ecaSJames Bottomley 
14661da177e4SLinus Torvalds int keventd_up(void)
14671da177e4SLinus Torvalds {
14681da177e4SLinus Torvalds 	return keventd_wq != NULL;
14691da177e4SLinus Torvalds }
14701da177e4SLinus Torvalds 
14711da177e4SLinus Torvalds int current_is_keventd(void)
14721da177e4SLinus Torvalds {
14731da177e4SLinus Torvalds 	struct cpu_workqueue_struct *cwq;
1474d243769dSHugh Dickins 	int cpu = raw_smp_processor_id(); /* preempt-safe: keventd is per-cpu */
14751da177e4SLinus Torvalds 	int ret = 0;
14761da177e4SLinus Torvalds 
14771da177e4SLinus Torvalds 	BUG_ON(!keventd_wq);
14781da177e4SLinus Torvalds 
14791537663fSTejun Heo 	cwq = get_cwq(cpu, keventd_wq);
1480c34056a3STejun Heo 	if (current == cwq->worker->task)
14811da177e4SLinus Torvalds 		ret = 1;
14821da177e4SLinus Torvalds 
14831da177e4SLinus Torvalds 	return ret;
14841da177e4SLinus Torvalds 
14851da177e4SLinus Torvalds }
14861da177e4SLinus Torvalds 
14870f900049STejun Heo static struct cpu_workqueue_struct *alloc_cwqs(void)
14880f900049STejun Heo {
14890f900049STejun Heo 	/*
14900f900049STejun Heo 	 * cwqs are forced aligned according to WORK_STRUCT_FLAG_BITS.
14910f900049STejun Heo 	 * Make sure that the alignment isn't lower than that of
14920f900049STejun Heo 	 * unsigned long long.
14930f900049STejun Heo 	 */
14940f900049STejun Heo 	const size_t size = sizeof(struct cpu_workqueue_struct);
14950f900049STejun Heo 	const size_t align = max_t(size_t, 1 << WORK_STRUCT_FLAG_BITS,
14960f900049STejun Heo 				   __alignof__(unsigned long long));
14970f900049STejun Heo 	struct cpu_workqueue_struct *cwqs;
14980f900049STejun Heo #ifndef CONFIG_SMP
14990f900049STejun Heo 	void *ptr;
15000f900049STejun Heo 
15010f900049STejun Heo 	/*
15020f900049STejun Heo 	 * On UP, percpu allocator doesn't honor alignment parameter
15030f900049STejun Heo 	 * and simply uses arch-dependent default.  Allocate enough
15040f900049STejun Heo 	 * room to align cwq and put an extra pointer at the end
15050f900049STejun Heo 	 * pointing back to the originally allocated pointer which
15060f900049STejun Heo 	 * will be used for free.
15070f900049STejun Heo 	 *
15080f900049STejun Heo 	 * FIXME: This really belongs to UP percpu code.  Update UP
15090f900049STejun Heo 	 * percpu code to honor alignment and remove this ugliness.
15100f900049STejun Heo 	 */
15110f900049STejun Heo 	ptr = __alloc_percpu(size + align + sizeof(void *), 1);
15120f900049STejun Heo 	cwqs = PTR_ALIGN(ptr, align);
15130f900049STejun Heo 	*(void **)per_cpu_ptr(cwqs + 1, 0) = ptr;
15140f900049STejun Heo #else
15150f900049STejun Heo 	/* On SMP, percpu allocator can do it itself */
15160f900049STejun Heo 	cwqs = __alloc_percpu(size, align);
15170f900049STejun Heo #endif
15180f900049STejun Heo 	/* just in case, make sure it's actually aligned */
15190f900049STejun Heo 	BUG_ON(!IS_ALIGNED((unsigned long)cwqs, align));
15200f900049STejun Heo 	return cwqs;
15210f900049STejun Heo }
15220f900049STejun Heo 
15230f900049STejun Heo static void free_cwqs(struct cpu_workqueue_struct *cwqs)
15240f900049STejun Heo {
15250f900049STejun Heo #ifndef CONFIG_SMP
15260f900049STejun Heo 	/* on UP, the pointer to free is stored right after the cwq */
15270f900049STejun Heo 	if (cwqs)
15280f900049STejun Heo 		free_percpu(*(void **)per_cpu_ptr(cwqs + 1, 0));
15290f900049STejun Heo #else
15300f900049STejun Heo 	free_percpu(cwqs);
15310f900049STejun Heo #endif
15320f900049STejun Heo }
15330f900049STejun Heo 
15344e6045f1SJohannes Berg struct workqueue_struct *__create_workqueue_key(const char *name,
153597e37d7bSTejun Heo 						unsigned int flags,
15361e19ffc6STejun Heo 						int max_active,
1537eb13ba87SJohannes Berg 						struct lock_class_key *key,
1538eb13ba87SJohannes Berg 						const char *lock_name)
15393af24433SOleg Nesterov {
15401537663fSTejun Heo 	bool singlethread = flags & WQ_SINGLE_THREAD;
15413af24433SOleg Nesterov 	struct workqueue_struct *wq;
1542c34056a3STejun Heo 	bool failed = false;
1543c34056a3STejun Heo 	unsigned int cpu;
15443af24433SOleg Nesterov 
15451e19ffc6STejun Heo 	max_active = clamp_val(max_active, 1, INT_MAX);
15461e19ffc6STejun Heo 
15473af24433SOleg Nesterov 	wq = kzalloc(sizeof(*wq), GFP_KERNEL);
15483af24433SOleg Nesterov 	if (!wq)
15494690c4abSTejun Heo 		goto err;
15503af24433SOleg Nesterov 
15510f900049STejun Heo 	wq->cpu_wq = alloc_cwqs();
15524690c4abSTejun Heo 	if (!wq->cpu_wq)
15534690c4abSTejun Heo 		goto err;
15543af24433SOleg Nesterov 
155597e37d7bSTejun Heo 	wq->flags = flags;
155673f53c4aSTejun Heo 	mutex_init(&wq->flush_mutex);
155773f53c4aSTejun Heo 	atomic_set(&wq->nr_cwqs_to_flush, 0);
155873f53c4aSTejun Heo 	INIT_LIST_HEAD(&wq->flusher_queue);
155973f53c4aSTejun Heo 	INIT_LIST_HEAD(&wq->flusher_overflow);
15603af24433SOleg Nesterov 	wq->name = name;
1561eb13ba87SJohannes Berg 	lockdep_init_map(&wq->lockdep_map, lock_name, key, 0);
1562cce1a165SOleg Nesterov 	INIT_LIST_HEAD(&wq->list);
15633af24433SOleg Nesterov 
15643da1c84cSOleg Nesterov 	cpu_maps_update_begin();
15656af8bf3dSOleg Nesterov 	/*
15666af8bf3dSOleg Nesterov 	 * We must initialize cwqs for each possible cpu even if we
15676af8bf3dSOleg Nesterov 	 * are going to call destroy_workqueue() finally. Otherwise
15686af8bf3dSOleg Nesterov 	 * cpu_up() can hit the uninitialized cwq once we drop the
15696af8bf3dSOleg Nesterov 	 * lock.
15706af8bf3dSOleg Nesterov 	 */
15713af24433SOleg Nesterov 	for_each_possible_cpu(cpu) {
15721537663fSTejun Heo 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
15731537663fSTejun Heo 
15740f900049STejun Heo 		BUG_ON((unsigned long)cwq & WORK_STRUCT_FLAG_MASK);
15751537663fSTejun Heo 		cwq->cpu = cpu;
1576c34056a3STejun Heo 		cwq->wq = wq;
157773f53c4aSTejun Heo 		cwq->flush_color = -1;
15781e19ffc6STejun Heo 		cwq->max_active = max_active;
15791537663fSTejun Heo 		spin_lock_init(&cwq->lock);
15801537663fSTejun Heo 		INIT_LIST_HEAD(&cwq->worklist);
15811e19ffc6STejun Heo 		INIT_LIST_HEAD(&cwq->delayed_works);
15821537663fSTejun Heo 		init_waitqueue_head(&cwq->more_work);
15831537663fSTejun Heo 
1584c34056a3STejun Heo 		if (failed)
15853af24433SOleg Nesterov 			continue;
1586c34056a3STejun Heo 		cwq->worker = create_worker(cwq,
1587c34056a3STejun Heo 					    cpu_online(cpu) && !singlethread);
1588c34056a3STejun Heo 		if (cwq->worker)
1589c34056a3STejun Heo 			start_worker(cwq->worker);
15901537663fSTejun Heo 		else
1591c34056a3STejun Heo 			failed = true;
15923af24433SOleg Nesterov 	}
15931537663fSTejun Heo 
15941537663fSTejun Heo 	spin_lock(&workqueue_lock);
15951537663fSTejun Heo 	list_add(&wq->list, &workqueues);
15961537663fSTejun Heo 	spin_unlock(&workqueue_lock);
15971537663fSTejun Heo 
15983da1c84cSOleg Nesterov 	cpu_maps_update_done();
15993af24433SOleg Nesterov 
1600c34056a3STejun Heo 	if (failed) {
16013af24433SOleg Nesterov 		destroy_workqueue(wq);
16023af24433SOleg Nesterov 		wq = NULL;
16033af24433SOleg Nesterov 	}
16043af24433SOleg Nesterov 	return wq;
16054690c4abSTejun Heo err:
16064690c4abSTejun Heo 	if (wq) {
16070f900049STejun Heo 		free_cwqs(wq->cpu_wq);
16084690c4abSTejun Heo 		kfree(wq);
16094690c4abSTejun Heo 	}
16104690c4abSTejun Heo 	return NULL;
16113af24433SOleg Nesterov }
16124e6045f1SJohannes Berg EXPORT_SYMBOL_GPL(__create_workqueue_key);
16133af24433SOleg Nesterov 
16143af24433SOleg Nesterov /**
16153af24433SOleg Nesterov  * destroy_workqueue - safely terminate a workqueue
16163af24433SOleg Nesterov  * @wq: target workqueue
16173af24433SOleg Nesterov  *
16183af24433SOleg Nesterov  * Safely destroy a workqueue. All work currently pending will be done first.
16193af24433SOleg Nesterov  */
16203af24433SOleg Nesterov void destroy_workqueue(struct workqueue_struct *wq)
16213af24433SOleg Nesterov {
16223af24433SOleg Nesterov 	int cpu;
16233af24433SOleg Nesterov 
16243da1c84cSOleg Nesterov 	cpu_maps_update_begin();
162595402b38SGautham R Shenoy 	spin_lock(&workqueue_lock);
16263af24433SOleg Nesterov 	list_del(&wq->list);
162795402b38SGautham R Shenoy 	spin_unlock(&workqueue_lock);
16283da1c84cSOleg Nesterov 	cpu_maps_update_done();
16293af24433SOleg Nesterov 
163073f53c4aSTejun Heo 	flush_workqueue(wq);
163173f53c4aSTejun Heo 
163273f53c4aSTejun Heo 	for_each_possible_cpu(cpu) {
163373f53c4aSTejun Heo 		struct cpu_workqueue_struct *cwq = get_cwq(cpu, wq);
163473f53c4aSTejun Heo 		int i;
163573f53c4aSTejun Heo 
1636c34056a3STejun Heo 		if (cwq->worker) {
1637c34056a3STejun Heo 			destroy_worker(cwq->worker);
1638c34056a3STejun Heo 			cwq->worker = NULL;
163973f53c4aSTejun Heo 		}
164073f53c4aSTejun Heo 
164173f53c4aSTejun Heo 		for (i = 0; i < WORK_NR_COLORS; i++)
164273f53c4aSTejun Heo 			BUG_ON(cwq->nr_in_flight[i]);
16431e19ffc6STejun Heo 		BUG_ON(cwq->nr_active);
16441e19ffc6STejun Heo 		BUG_ON(!list_empty(&cwq->delayed_works));
164573f53c4aSTejun Heo 	}
16461537663fSTejun Heo 
16470f900049STejun Heo 	free_cwqs(wq->cpu_wq);
16483af24433SOleg Nesterov 	kfree(wq);
16493af24433SOleg Nesterov }
16503af24433SOleg Nesterov EXPORT_SYMBOL_GPL(destroy_workqueue);
16513af24433SOleg Nesterov 
16529c7b216dSChandra Seetharaman static int __devinit workqueue_cpu_callback(struct notifier_block *nfb,
16531da177e4SLinus Torvalds 						unsigned long action,
16541da177e4SLinus Torvalds 						void *hcpu)
16551da177e4SLinus Torvalds {
16563af24433SOleg Nesterov 	unsigned int cpu = (unsigned long)hcpu;
16573af24433SOleg Nesterov 	struct cpu_workqueue_struct *cwq;
16581da177e4SLinus Torvalds 	struct workqueue_struct *wq;
16591da177e4SLinus Torvalds 
16608bb78442SRafael J. Wysocki 	action &= ~CPU_TASKS_FROZEN;
16618bb78442SRafael J. Wysocki 
16621da177e4SLinus Torvalds 	list_for_each_entry(wq, &workqueues, list) {
16631537663fSTejun Heo 		if (wq->flags & WQ_SINGLE_THREAD)
16641537663fSTejun Heo 			continue;
16651537663fSTejun Heo 
16661537663fSTejun Heo 		cwq = get_cwq(cpu, wq);
16673af24433SOleg Nesterov 
16683af24433SOleg Nesterov 		switch (action) {
16693da1c84cSOleg Nesterov 		case CPU_POST_DEAD:
167073f53c4aSTejun Heo 			flush_workqueue(wq);
16711da177e4SLinus Torvalds 			break;
16721da177e4SLinus Torvalds 		}
16733af24433SOleg Nesterov 	}
16741da177e4SLinus Torvalds 
16751537663fSTejun Heo 	return notifier_from_errno(0);
16761da177e4SLinus Torvalds }
16771da177e4SLinus Torvalds 
16782d3854a3SRusty Russell #ifdef CONFIG_SMP
16798ccad40dSRusty Russell 
16802d3854a3SRusty Russell struct work_for_cpu {
16816b44003eSAndrew Morton 	struct completion completion;
16822d3854a3SRusty Russell 	long (*fn)(void *);
16832d3854a3SRusty Russell 	void *arg;
16842d3854a3SRusty Russell 	long ret;
16852d3854a3SRusty Russell };
16862d3854a3SRusty Russell 
16876b44003eSAndrew Morton static int do_work_for_cpu(void *_wfc)
16882d3854a3SRusty Russell {
16896b44003eSAndrew Morton 	struct work_for_cpu *wfc = _wfc;
16902d3854a3SRusty Russell 	wfc->ret = wfc->fn(wfc->arg);
16916b44003eSAndrew Morton 	complete(&wfc->completion);
16926b44003eSAndrew Morton 	return 0;
16932d3854a3SRusty Russell }
16942d3854a3SRusty Russell 
16952d3854a3SRusty Russell /**
16962d3854a3SRusty Russell  * work_on_cpu - run a function in user context on a particular cpu
16972d3854a3SRusty Russell  * @cpu: the cpu to run on
16982d3854a3SRusty Russell  * @fn: the function to run
16992d3854a3SRusty Russell  * @arg: the function arg
17002d3854a3SRusty Russell  *
170131ad9081SRusty Russell  * This will return the value @fn returns.
170231ad9081SRusty Russell  * It is up to the caller to ensure that the cpu doesn't go offline.
17036b44003eSAndrew Morton  * The caller must not hold any locks which would prevent @fn from completing.
17042d3854a3SRusty Russell  */
17052d3854a3SRusty Russell long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
17062d3854a3SRusty Russell {
17076b44003eSAndrew Morton 	struct task_struct *sub_thread;
17086b44003eSAndrew Morton 	struct work_for_cpu wfc = {
17096b44003eSAndrew Morton 		.completion = COMPLETION_INITIALIZER_ONSTACK(wfc.completion),
17106b44003eSAndrew Morton 		.fn = fn,
17116b44003eSAndrew Morton 		.arg = arg,
17126b44003eSAndrew Morton 	};
17132d3854a3SRusty Russell 
17146b44003eSAndrew Morton 	sub_thread = kthread_create(do_work_for_cpu, &wfc, "work_for_cpu");
17156b44003eSAndrew Morton 	if (IS_ERR(sub_thread))
17166b44003eSAndrew Morton 		return PTR_ERR(sub_thread);
17176b44003eSAndrew Morton 	kthread_bind(sub_thread, cpu);
17186b44003eSAndrew Morton 	wake_up_process(sub_thread);
17196b44003eSAndrew Morton 	wait_for_completion(&wfc.completion);
17202d3854a3SRusty Russell 	return wfc.ret;
17212d3854a3SRusty Russell }
17222d3854a3SRusty Russell EXPORT_SYMBOL_GPL(work_on_cpu);
17232d3854a3SRusty Russell #endif /* CONFIG_SMP */
17242d3854a3SRusty Russell 
1725c12920d1SOleg Nesterov void __init init_workqueues(void)
17261da177e4SLinus Torvalds {
1727c34056a3STejun Heo 	unsigned int cpu;
1728c34056a3STejun Heo 
1729c34056a3STejun Heo 	for_each_possible_cpu(cpu)
1730c34056a3STejun Heo 		ida_init(&per_cpu(worker_ida, cpu));
1731c34056a3STejun Heo 
1732e7577c50SRusty Russell 	singlethread_cpu = cpumask_first(cpu_possible_mask);
17331da177e4SLinus Torvalds 	hotcpu_notifier(workqueue_cpu_callback, 0);
17341da177e4SLinus Torvalds 	keventd_wq = create_workqueue("events");
17351da177e4SLinus Torvalds 	BUG_ON(!keventd_wq);
17361da177e4SLinus Torvalds }
1737