xref: /openbmc/linux/kernel/kthread.c (revision 06ff634c0dae791c17ceeeb60c74e14470d76898)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Kernel thread helper functions.
3  *   Copyright (C) 2004 IBM Corporation, Rusty Russell.
4  *
5  * Creation is done via kthreadd, so that we get a clean environment
6  * even if we're invoked from userspace (think modprobe, hotplug cpu,
7  * etc.).
8  */
9 #include <uapi/linux/sched/types.h>
10 #include <linux/sched.h>
11 #include <linux/sched/task.h>
12 #include <linux/kthread.h>
13 #include <linux/completion.h>
14 #include <linux/err.h>
15 #include <linux/cgroup.h>
16 #include <linux/cpuset.h>
17 #include <linux/unistd.h>
18 #include <linux/file.h>
19 #include <linux/export.h>
20 #include <linux/mutex.h>
21 #include <linux/slab.h>
22 #include <linux/freezer.h>
23 #include <linux/ptrace.h>
24 #include <linux/uaccess.h>
25 #include <linux/numa.h>
26 #include <trace/events/sched.h>
27 
28 static DEFINE_SPINLOCK(kthread_create_lock);
29 static LIST_HEAD(kthread_create_list);
30 struct task_struct *kthreadd_task;
31 
32 struct kthread_create_info
33 {
34 	/* Information passed to kthread() from kthreadd. */
35 	int (*threadfn)(void *data);
36 	void *data;
37 	int node;
38 
39 	/* Result passed back to kthread_create() from kthreadd. */
40 	struct task_struct *result;
41 	struct completion *done;
42 
43 	struct list_head list;
44 };
45 
46 struct kthread {
47 	unsigned long flags;
48 	unsigned int cpu;
49 	void *data;
50 	struct completion parked;
51 	struct completion exited;
52 #ifdef CONFIG_BLK_CGROUP
53 	struct cgroup_subsys_state *blkcg_css;
54 #endif
55 };
56 
57 enum KTHREAD_BITS {
58 	KTHREAD_IS_PER_CPU = 0,
59 	KTHREAD_SHOULD_STOP,
60 	KTHREAD_SHOULD_PARK,
61 };
62 
63 static inline void set_kthread_struct(void *kthread)
64 {
65 	/*
66 	 * We abuse ->set_child_tid to avoid the new member and because it
67 	 * can't be wrongly copied by copy_process(). We also rely on fact
68 	 * that the caller can't exec, so PF_KTHREAD can't be cleared.
69 	 */
70 	current->set_child_tid = (__force void __user *)kthread;
71 }
72 
73 static inline struct kthread *to_kthread(struct task_struct *k)
74 {
75 	WARN_ON(!(k->flags & PF_KTHREAD));
76 	return (__force void *)k->set_child_tid;
77 }
78 
79 void free_kthread_struct(struct task_struct *k)
80 {
81 	struct kthread *kthread;
82 
83 	/*
84 	 * Can be NULL if this kthread was created by kernel_thread()
85 	 * or if kmalloc() in kthread() failed.
86 	 */
87 	kthread = to_kthread(k);
88 #ifdef CONFIG_BLK_CGROUP
89 	WARN_ON_ONCE(kthread && kthread->blkcg_css);
90 #endif
91 	kfree(kthread);
92 }
93 
94 /**
95  * kthread_should_stop - should this kthread return now?
96  *
97  * When someone calls kthread_stop() on your kthread, it will be woken
98  * and this will return true.  You should then return, and your return
99  * value will be passed through to kthread_stop().
100  */
101 bool kthread_should_stop(void)
102 {
103 	return test_bit(KTHREAD_SHOULD_STOP, &to_kthread(current)->flags);
104 }
105 EXPORT_SYMBOL(kthread_should_stop);
106 
107 bool __kthread_should_park(struct task_struct *k)
108 {
109 	return test_bit(KTHREAD_SHOULD_PARK, &to_kthread(k)->flags);
110 }
111 EXPORT_SYMBOL_GPL(__kthread_should_park);
112 
113 /**
114  * kthread_should_park - should this kthread park now?
115  *
116  * When someone calls kthread_park() on your kthread, it will be woken
117  * and this will return true.  You should then do the necessary
118  * cleanup and call kthread_parkme()
119  *
120  * Similar to kthread_should_stop(), but this keeps the thread alive
121  * and in a park position. kthread_unpark() "restarts" the thread and
122  * calls the thread function again.
123  */
124 bool kthread_should_park(void)
125 {
126 	return __kthread_should_park(current);
127 }
128 EXPORT_SYMBOL_GPL(kthread_should_park);
129 
130 /**
131  * kthread_freezable_should_stop - should this freezable kthread return now?
132  * @was_frozen: optional out parameter, indicates whether %current was frozen
133  *
134  * kthread_should_stop() for freezable kthreads, which will enter
135  * refrigerator if necessary.  This function is safe from kthread_stop() /
136  * freezer deadlock and freezable kthreads should use this function instead
137  * of calling try_to_freeze() directly.
138  */
139 bool kthread_freezable_should_stop(bool *was_frozen)
140 {
141 	bool frozen = false;
142 
143 	might_sleep();
144 
145 	if (unlikely(freezing(current)))
146 		frozen = __refrigerator(true);
147 
148 	if (was_frozen)
149 		*was_frozen = frozen;
150 
151 	return kthread_should_stop();
152 }
153 EXPORT_SYMBOL_GPL(kthread_freezable_should_stop);
154 
155 /**
156  * kthread_data - return data value specified on kthread creation
157  * @task: kthread task in question
158  *
159  * Return the data value specified when kthread @task was created.
160  * The caller is responsible for ensuring the validity of @task when
161  * calling this function.
162  */
163 void *kthread_data(struct task_struct *task)
164 {
165 	return to_kthread(task)->data;
166 }
167 
168 /**
169  * kthread_probe_data - speculative version of kthread_data()
170  * @task: possible kthread task in question
171  *
172  * @task could be a kthread task.  Return the data value specified when it
173  * was created if accessible.  If @task isn't a kthread task or its data is
174  * inaccessible for any reason, %NULL is returned.  This function requires
175  * that @task itself is safe to dereference.
176  */
177 void *kthread_probe_data(struct task_struct *task)
178 {
179 	struct kthread *kthread = to_kthread(task);
180 	void *data = NULL;
181 
182 	probe_kernel_read(&data, &kthread->data, sizeof(data));
183 	return data;
184 }
185 
186 static void __kthread_parkme(struct kthread *self)
187 {
188 	for (;;) {
189 		/*
190 		 * TASK_PARKED is a special state; we must serialize against
191 		 * possible pending wakeups to avoid store-store collisions on
192 		 * task->state.
193 		 *
194 		 * Such a collision might possibly result in the task state
195 		 * changin from TASK_PARKED and us failing the
196 		 * wait_task_inactive() in kthread_park().
197 		 */
198 		set_special_state(TASK_PARKED);
199 		if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags))
200 			break;
201 
202 		/*
203 		 * Thread is going to call schedule(), do not preempt it,
204 		 * or the caller of kthread_park() may spend more time in
205 		 * wait_task_inactive().
206 		 */
207 		preempt_disable();
208 		complete(&self->parked);
209 		schedule_preempt_disabled();
210 		preempt_enable();
211 	}
212 	__set_current_state(TASK_RUNNING);
213 }
214 
215 void kthread_parkme(void)
216 {
217 	__kthread_parkme(to_kthread(current));
218 }
219 EXPORT_SYMBOL_GPL(kthread_parkme);
220 
221 static int kthread(void *_create)
222 {
223 	/* Copy data: it's on kthread's stack */
224 	struct kthread_create_info *create = _create;
225 	int (*threadfn)(void *data) = create->threadfn;
226 	void *data = create->data;
227 	struct completion *done;
228 	struct kthread *self;
229 	int ret;
230 
231 	self = kzalloc(sizeof(*self), GFP_KERNEL);
232 	set_kthread_struct(self);
233 
234 	/* If user was SIGKILLed, I release the structure. */
235 	done = xchg(&create->done, NULL);
236 	if (!done) {
237 		kfree(create);
238 		do_exit(-EINTR);
239 	}
240 
241 	if (!self) {
242 		create->result = ERR_PTR(-ENOMEM);
243 		complete(done);
244 		do_exit(-ENOMEM);
245 	}
246 
247 	self->data = data;
248 	init_completion(&self->exited);
249 	init_completion(&self->parked);
250 	current->vfork_done = &self->exited;
251 
252 	/* OK, tell user we're spawned, wait for stop or wakeup */
253 	__set_current_state(TASK_UNINTERRUPTIBLE);
254 	create->result = current;
255 	/*
256 	 * Thread is going to call schedule(), do not preempt it,
257 	 * or the creator may spend more time in wait_task_inactive().
258 	 */
259 	preempt_disable();
260 	complete(done);
261 	schedule_preempt_disabled();
262 	preempt_enable();
263 
264 	ret = -EINTR;
265 	if (!test_bit(KTHREAD_SHOULD_STOP, &self->flags)) {
266 		cgroup_kthread_ready();
267 		__kthread_parkme(self);
268 		ret = threadfn(data);
269 	}
270 	do_exit(ret);
271 }
272 
273 /* called from do_fork() to get node information for about to be created task */
274 int tsk_fork_get_node(struct task_struct *tsk)
275 {
276 #ifdef CONFIG_NUMA
277 	if (tsk == kthreadd_task)
278 		return tsk->pref_node_fork;
279 #endif
280 	return NUMA_NO_NODE;
281 }
282 
283 static void create_kthread(struct kthread_create_info *create)
284 {
285 	int pid;
286 
287 #ifdef CONFIG_NUMA
288 	current->pref_node_fork = create->node;
289 #endif
290 	/* We want our own signal handler (we take no signals by default). */
291 	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
292 	if (pid < 0) {
293 		/* If user was SIGKILLed, I release the structure. */
294 		struct completion *done = xchg(&create->done, NULL);
295 
296 		if (!done) {
297 			kfree(create);
298 			return;
299 		}
300 		create->result = ERR_PTR(pid);
301 		complete(done);
302 	}
303 }
304 
305 static __printf(4, 0)
306 struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),
307 						    void *data, int node,
308 						    const char namefmt[],
309 						    va_list args)
310 {
311 	DECLARE_COMPLETION_ONSTACK(done);
312 	struct task_struct *task;
313 	struct kthread_create_info *create = kmalloc(sizeof(*create),
314 						     GFP_KERNEL);
315 
316 	if (!create)
317 		return ERR_PTR(-ENOMEM);
318 	create->threadfn = threadfn;
319 	create->data = data;
320 	create->node = node;
321 	create->done = &done;
322 
323 	spin_lock(&kthread_create_lock);
324 	list_add_tail(&create->list, &kthread_create_list);
325 	spin_unlock(&kthread_create_lock);
326 
327 	wake_up_process(kthreadd_task);
328 	/*
329 	 * Wait for completion in killable state, for I might be chosen by
330 	 * the OOM killer while kthreadd is trying to allocate memory for
331 	 * new kernel thread.
332 	 */
333 	if (unlikely(wait_for_completion_killable(&done))) {
334 		/*
335 		 * If I was SIGKILLed before kthreadd (or new kernel thread)
336 		 * calls complete(), leave the cleanup of this structure to
337 		 * that thread.
338 		 */
339 		if (xchg(&create->done, NULL))
340 			return ERR_PTR(-EINTR);
341 		/*
342 		 * kthreadd (or new kernel thread) will call complete()
343 		 * shortly.
344 		 */
345 		wait_for_completion(&done);
346 	}
347 	task = create->result;
348 	if (!IS_ERR(task)) {
349 		static const struct sched_param param = { .sched_priority = 0 };
350 		char name[TASK_COMM_LEN];
351 
352 		/*
353 		 * task is already visible to other tasks, so updating
354 		 * COMM must be protected.
355 		 */
356 		vsnprintf(name, sizeof(name), namefmt, args);
357 		set_task_comm(task, name);
358 		/*
359 		 * root may have changed our (kthreadd's) priority or CPU mask.
360 		 * The kernel thread should not inherit these properties.
361 		 */
362 		sched_setscheduler_nocheck(task, SCHED_NORMAL, &param);
363 		set_cpus_allowed_ptr(task, cpu_all_mask);
364 	}
365 	kfree(create);
366 	return task;
367 }
368 
369 /**
370  * kthread_create_on_node - create a kthread.
371  * @threadfn: the function to run until signal_pending(current).
372  * @data: data ptr for @threadfn.
373  * @node: task and thread structures for the thread are allocated on this node
374  * @namefmt: printf-style name for the thread.
375  *
376  * Description: This helper function creates and names a kernel
377  * thread.  The thread will be stopped: use wake_up_process() to start
378  * it.  See also kthread_run().  The new thread has SCHED_NORMAL policy and
379  * is affine to all CPUs.
380  *
381  * If thread is going to be bound on a particular cpu, give its node
382  * in @node, to get NUMA affinity for kthread stack, or else give NUMA_NO_NODE.
383  * When woken, the thread will run @threadfn() with @data as its
384  * argument. @threadfn() can either call do_exit() directly if it is a
385  * standalone thread for which no one will call kthread_stop(), or
386  * return when 'kthread_should_stop()' is true (which means
387  * kthread_stop() has been called).  The return value should be zero
388  * or a negative error number; it will be passed to kthread_stop().
389  *
390  * Returns a task_struct or ERR_PTR(-ENOMEM) or ERR_PTR(-EINTR).
391  */
392 struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
393 					   void *data, int node,
394 					   const char namefmt[],
395 					   ...)
396 {
397 	struct task_struct *task;
398 	va_list args;
399 
400 	va_start(args, namefmt);
401 	task = __kthread_create_on_node(threadfn, data, node, namefmt, args);
402 	va_end(args);
403 
404 	return task;
405 }
406 EXPORT_SYMBOL(kthread_create_on_node);
407 
408 static void __kthread_bind_mask(struct task_struct *p, const struct cpumask *mask, long state)
409 {
410 	unsigned long flags;
411 
412 	if (!wait_task_inactive(p, state)) {
413 		WARN_ON(1);
414 		return;
415 	}
416 
417 	/* It's safe because the task is inactive. */
418 	raw_spin_lock_irqsave(&p->pi_lock, flags);
419 	do_set_cpus_allowed(p, mask);
420 	p->flags |= PF_NO_SETAFFINITY;
421 	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
422 }
423 
424 static void __kthread_bind(struct task_struct *p, unsigned int cpu, long state)
425 {
426 	__kthread_bind_mask(p, cpumask_of(cpu), state);
427 }
428 
429 void kthread_bind_mask(struct task_struct *p, const struct cpumask *mask)
430 {
431 	__kthread_bind_mask(p, mask, TASK_UNINTERRUPTIBLE);
432 }
433 
434 /**
435  * kthread_bind - bind a just-created kthread to a cpu.
436  * @p: thread created by kthread_create().
437  * @cpu: cpu (might not be online, must be possible) for @k to run on.
438  *
439  * Description: This function is equivalent to set_cpus_allowed(),
440  * except that @cpu doesn't need to be online, and the thread must be
441  * stopped (i.e., just returned from kthread_create()).
442  */
443 void kthread_bind(struct task_struct *p, unsigned int cpu)
444 {
445 	__kthread_bind(p, cpu, TASK_UNINTERRUPTIBLE);
446 }
447 EXPORT_SYMBOL(kthread_bind);
448 
449 /**
450  * kthread_create_on_cpu - Create a cpu bound kthread
451  * @threadfn: the function to run until signal_pending(current).
452  * @data: data ptr for @threadfn.
453  * @cpu: The cpu on which the thread should be bound,
454  * @namefmt: printf-style name for the thread. Format is restricted
455  *	     to "name.*%u". Code fills in cpu number.
456  *
457  * Description: This helper function creates and names a kernel thread
458  * The thread will be woken and put into park mode.
459  */
460 struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
461 					  void *data, unsigned int cpu,
462 					  const char *namefmt)
463 {
464 	struct task_struct *p;
465 
466 	p = kthread_create_on_node(threadfn, data, cpu_to_node(cpu), namefmt,
467 				   cpu);
468 	if (IS_ERR(p))
469 		return p;
470 	kthread_bind(p, cpu);
471 	/* CPU hotplug need to bind once again when unparking the thread. */
472 	set_bit(KTHREAD_IS_PER_CPU, &to_kthread(p)->flags);
473 	to_kthread(p)->cpu = cpu;
474 	return p;
475 }
476 
477 /**
478  * kthread_unpark - unpark a thread created by kthread_create().
479  * @k:		thread created by kthread_create().
480  *
481  * Sets kthread_should_park() for @k to return false, wakes it, and
482  * waits for it to return. If the thread is marked percpu then its
483  * bound to the cpu again.
484  */
485 void kthread_unpark(struct task_struct *k)
486 {
487 	struct kthread *kthread = to_kthread(k);
488 
489 	/*
490 	 * Newly created kthread was parked when the CPU was offline.
491 	 * The binding was lost and we need to set it again.
492 	 */
493 	if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags))
494 		__kthread_bind(k, kthread->cpu, TASK_PARKED);
495 
496 	clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
497 	/*
498 	 * __kthread_parkme() will either see !SHOULD_PARK or get the wakeup.
499 	 */
500 	wake_up_state(k, TASK_PARKED);
501 }
502 EXPORT_SYMBOL_GPL(kthread_unpark);
503 
504 /**
505  * kthread_park - park a thread created by kthread_create().
506  * @k: thread created by kthread_create().
507  *
508  * Sets kthread_should_park() for @k to return true, wakes it, and
509  * waits for it to return. This can also be called after kthread_create()
510  * instead of calling wake_up_process(): the thread will park without
511  * calling threadfn().
512  *
513  * Returns 0 if the thread is parked, -ENOSYS if the thread exited.
514  * If called by the kthread itself just the park bit is set.
515  */
516 int kthread_park(struct task_struct *k)
517 {
518 	struct kthread *kthread = to_kthread(k);
519 
520 	if (WARN_ON(k->flags & PF_EXITING))
521 		return -ENOSYS;
522 
523 	if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags)))
524 		return -EBUSY;
525 
526 	set_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
527 	if (k != current) {
528 		wake_up_process(k);
529 		/*
530 		 * Wait for __kthread_parkme() to complete(), this means we
531 		 * _will_ have TASK_PARKED and are about to call schedule().
532 		 */
533 		wait_for_completion(&kthread->parked);
534 		/*
535 		 * Now wait for that schedule() to complete and the task to
536 		 * get scheduled out.
537 		 */
538 		WARN_ON_ONCE(!wait_task_inactive(k, TASK_PARKED));
539 	}
540 
541 	return 0;
542 }
543 EXPORT_SYMBOL_GPL(kthread_park);
544 
545 /**
546  * kthread_stop - stop a thread created by kthread_create().
547  * @k: thread created by kthread_create().
548  *
549  * Sets kthread_should_stop() for @k to return true, wakes it, and
550  * waits for it to exit. This can also be called after kthread_create()
551  * instead of calling wake_up_process(): the thread will exit without
552  * calling threadfn().
553  *
554  * If threadfn() may call do_exit() itself, the caller must ensure
555  * task_struct can't go away.
556  *
557  * Returns the result of threadfn(), or %-EINTR if wake_up_process()
558  * was never called.
559  */
560 int kthread_stop(struct task_struct *k)
561 {
562 	struct kthread *kthread;
563 	int ret;
564 
565 	trace_sched_kthread_stop(k);
566 
567 	get_task_struct(k);
568 	kthread = to_kthread(k);
569 	set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
570 	kthread_unpark(k);
571 	wake_up_process(k);
572 	wait_for_completion(&kthread->exited);
573 	ret = k->exit_code;
574 	put_task_struct(k);
575 
576 	trace_sched_kthread_stop_ret(ret);
577 	return ret;
578 }
579 EXPORT_SYMBOL(kthread_stop);
580 
581 int kthreadd(void *unused)
582 {
583 	struct task_struct *tsk = current;
584 
585 	/* Setup a clean context for our children to inherit. */
586 	set_task_comm(tsk, "kthreadd");
587 	ignore_signals(tsk);
588 	set_cpus_allowed_ptr(tsk, cpu_all_mask);
589 	set_mems_allowed(node_states[N_MEMORY]);
590 
591 	current->flags |= PF_NOFREEZE;
592 	cgroup_init_kthreadd();
593 
594 	for (;;) {
595 		set_current_state(TASK_INTERRUPTIBLE);
596 		if (list_empty(&kthread_create_list))
597 			schedule();
598 		__set_current_state(TASK_RUNNING);
599 
600 		spin_lock(&kthread_create_lock);
601 		while (!list_empty(&kthread_create_list)) {
602 			struct kthread_create_info *create;
603 
604 			create = list_entry(kthread_create_list.next,
605 					    struct kthread_create_info, list);
606 			list_del_init(&create->list);
607 			spin_unlock(&kthread_create_lock);
608 
609 			create_kthread(create);
610 
611 			spin_lock(&kthread_create_lock);
612 		}
613 		spin_unlock(&kthread_create_lock);
614 	}
615 
616 	return 0;
617 }
618 
619 void __kthread_init_worker(struct kthread_worker *worker,
620 				const char *name,
621 				struct lock_class_key *key)
622 {
623 	memset(worker, 0, sizeof(struct kthread_worker));
624 	raw_spin_lock_init(&worker->lock);
625 	lockdep_set_class_and_name(&worker->lock, key, name);
626 	INIT_LIST_HEAD(&worker->work_list);
627 	INIT_LIST_HEAD(&worker->delayed_work_list);
628 }
629 EXPORT_SYMBOL_GPL(__kthread_init_worker);
630 
631 /**
632  * kthread_worker_fn - kthread function to process kthread_worker
633  * @worker_ptr: pointer to initialized kthread_worker
634  *
635  * This function implements the main cycle of kthread worker. It processes
636  * work_list until it is stopped with kthread_stop(). It sleeps when the queue
637  * is empty.
638  *
639  * The works are not allowed to keep any locks, disable preemption or interrupts
640  * when they finish. There is defined a safe point for freezing when one work
641  * finishes and before a new one is started.
642  *
643  * Also the works must not be handled by more than one worker at the same time,
644  * see also kthread_queue_work().
645  */
646 int kthread_worker_fn(void *worker_ptr)
647 {
648 	struct kthread_worker *worker = worker_ptr;
649 	struct kthread_work *work;
650 
651 	/*
652 	 * FIXME: Update the check and remove the assignment when all kthread
653 	 * worker users are created using kthread_create_worker*() functions.
654 	 */
655 	WARN_ON(worker->task && worker->task != current);
656 	worker->task = current;
657 
658 	if (worker->flags & KTW_FREEZABLE)
659 		set_freezable();
660 
661 repeat:
662 	set_current_state(TASK_INTERRUPTIBLE);	/* mb paired w/ kthread_stop */
663 
664 	if (kthread_should_stop()) {
665 		__set_current_state(TASK_RUNNING);
666 		raw_spin_lock_irq(&worker->lock);
667 		worker->task = NULL;
668 		raw_spin_unlock_irq(&worker->lock);
669 		return 0;
670 	}
671 
672 	work = NULL;
673 	raw_spin_lock_irq(&worker->lock);
674 	if (!list_empty(&worker->work_list)) {
675 		work = list_first_entry(&worker->work_list,
676 					struct kthread_work, node);
677 		list_del_init(&work->node);
678 	}
679 	worker->current_work = work;
680 	raw_spin_unlock_irq(&worker->lock);
681 
682 	if (work) {
683 		__set_current_state(TASK_RUNNING);
684 		work->func(work);
685 	} else if (!freezing(current))
686 		schedule();
687 
688 	try_to_freeze();
689 	cond_resched();
690 	goto repeat;
691 }
692 EXPORT_SYMBOL_GPL(kthread_worker_fn);
693 
694 static __printf(3, 0) struct kthread_worker *
695 __kthread_create_worker(int cpu, unsigned int flags,
696 			const char namefmt[], va_list args)
697 {
698 	struct kthread_worker *worker;
699 	struct task_struct *task;
700 	int node = NUMA_NO_NODE;
701 
702 	worker = kzalloc(sizeof(*worker), GFP_KERNEL);
703 	if (!worker)
704 		return ERR_PTR(-ENOMEM);
705 
706 	kthread_init_worker(worker);
707 
708 	if (cpu >= 0)
709 		node = cpu_to_node(cpu);
710 
711 	task = __kthread_create_on_node(kthread_worker_fn, worker,
712 						node, namefmt, args);
713 	if (IS_ERR(task))
714 		goto fail_task;
715 
716 	if (cpu >= 0)
717 		kthread_bind(task, cpu);
718 
719 	worker->flags = flags;
720 	worker->task = task;
721 	wake_up_process(task);
722 	return worker;
723 
724 fail_task:
725 	kfree(worker);
726 	return ERR_CAST(task);
727 }
728 
729 /**
730  * kthread_create_worker - create a kthread worker
731  * @flags: flags modifying the default behavior of the worker
732  * @namefmt: printf-style name for the kthread worker (task).
733  *
734  * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
735  * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
736  * when the worker was SIGKILLed.
737  */
738 struct kthread_worker *
739 kthread_create_worker(unsigned int flags, const char namefmt[], ...)
740 {
741 	struct kthread_worker *worker;
742 	va_list args;
743 
744 	va_start(args, namefmt);
745 	worker = __kthread_create_worker(-1, flags, namefmt, args);
746 	va_end(args);
747 
748 	return worker;
749 }
750 EXPORT_SYMBOL(kthread_create_worker);
751 
752 /**
753  * kthread_create_worker_on_cpu - create a kthread worker and bind it
754  *	it to a given CPU and the associated NUMA node.
755  * @cpu: CPU number
756  * @flags: flags modifying the default behavior of the worker
757  * @namefmt: printf-style name for the kthread worker (task).
758  *
759  * Use a valid CPU number if you want to bind the kthread worker
760  * to the given CPU and the associated NUMA node.
761  *
762  * A good practice is to add the cpu number also into the worker name.
763  * For example, use kthread_create_worker_on_cpu(cpu, "helper/%d", cpu).
764  *
765  * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
766  * when the needed structures could not get allocated, and ERR_PTR(-EINTR)
767  * when the worker was SIGKILLed.
768  */
769 struct kthread_worker *
770 kthread_create_worker_on_cpu(int cpu, unsigned int flags,
771 			     const char namefmt[], ...)
772 {
773 	struct kthread_worker *worker;
774 	va_list args;
775 
776 	va_start(args, namefmt);
777 	worker = __kthread_create_worker(cpu, flags, namefmt, args);
778 	va_end(args);
779 
780 	return worker;
781 }
782 EXPORT_SYMBOL(kthread_create_worker_on_cpu);
783 
784 /*
785  * Returns true when the work could not be queued at the moment.
786  * It happens when it is already pending in a worker list
787  * or when it is being cancelled.
788  */
789 static inline bool queuing_blocked(struct kthread_worker *worker,
790 				   struct kthread_work *work)
791 {
792 	lockdep_assert_held(&worker->lock);
793 
794 	return !list_empty(&work->node) || work->canceling;
795 }
796 
797 static void kthread_insert_work_sanity_check(struct kthread_worker *worker,
798 					     struct kthread_work *work)
799 {
800 	lockdep_assert_held(&worker->lock);
801 	WARN_ON_ONCE(!list_empty(&work->node));
802 	/* Do not use a work with >1 worker, see kthread_queue_work() */
803 	WARN_ON_ONCE(work->worker && work->worker != worker);
804 }
805 
806 /* insert @work before @pos in @worker */
807 static void kthread_insert_work(struct kthread_worker *worker,
808 				struct kthread_work *work,
809 				struct list_head *pos)
810 {
811 	kthread_insert_work_sanity_check(worker, work);
812 
813 	list_add_tail(&work->node, pos);
814 	work->worker = worker;
815 	if (!worker->current_work && likely(worker->task))
816 		wake_up_process(worker->task);
817 }
818 
819 /**
820  * kthread_queue_work - queue a kthread_work
821  * @worker: target kthread_worker
822  * @work: kthread_work to queue
823  *
824  * Queue @work to work processor @task for async execution.  @task
825  * must have been created with kthread_worker_create().  Returns %true
826  * if @work was successfully queued, %false if it was already pending.
827  *
828  * Reinitialize the work if it needs to be used by another worker.
829  * For example, when the worker was stopped and started again.
830  */
831 bool kthread_queue_work(struct kthread_worker *worker,
832 			struct kthread_work *work)
833 {
834 	bool ret = false;
835 	unsigned long flags;
836 
837 	raw_spin_lock_irqsave(&worker->lock, flags);
838 	if (!queuing_blocked(worker, work)) {
839 		kthread_insert_work(worker, work, &worker->work_list);
840 		ret = true;
841 	}
842 	raw_spin_unlock_irqrestore(&worker->lock, flags);
843 	return ret;
844 }
845 EXPORT_SYMBOL_GPL(kthread_queue_work);
846 
847 /**
848  * kthread_delayed_work_timer_fn - callback that queues the associated kthread
849  *	delayed work when the timer expires.
850  * @t: pointer to the expired timer
851  *
852  * The format of the function is defined by struct timer_list.
853  * It should have been called from irqsafe timer with irq already off.
854  */
855 void kthread_delayed_work_timer_fn(struct timer_list *t)
856 {
857 	struct kthread_delayed_work *dwork = from_timer(dwork, t, timer);
858 	struct kthread_work *work = &dwork->work;
859 	struct kthread_worker *worker = work->worker;
860 	unsigned long flags;
861 
862 	/*
863 	 * This might happen when a pending work is reinitialized.
864 	 * It means that it is used a wrong way.
865 	 */
866 	if (WARN_ON_ONCE(!worker))
867 		return;
868 
869 	raw_spin_lock_irqsave(&worker->lock, flags);
870 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
871 	WARN_ON_ONCE(work->worker != worker);
872 
873 	/* Move the work from worker->delayed_work_list. */
874 	WARN_ON_ONCE(list_empty(&work->node));
875 	list_del_init(&work->node);
876 	kthread_insert_work(worker, work, &worker->work_list);
877 
878 	raw_spin_unlock_irqrestore(&worker->lock, flags);
879 }
880 EXPORT_SYMBOL(kthread_delayed_work_timer_fn);
881 
882 static void __kthread_queue_delayed_work(struct kthread_worker *worker,
883 					 struct kthread_delayed_work *dwork,
884 					 unsigned long delay)
885 {
886 	struct timer_list *timer = &dwork->timer;
887 	struct kthread_work *work = &dwork->work;
888 
889 	WARN_ON_ONCE(timer->function != kthread_delayed_work_timer_fn);
890 
891 	/*
892 	 * If @delay is 0, queue @dwork->work immediately.  This is for
893 	 * both optimization and correctness.  The earliest @timer can
894 	 * expire is on the closest next tick and delayed_work users depend
895 	 * on that there's no such delay when @delay is 0.
896 	 */
897 	if (!delay) {
898 		kthread_insert_work(worker, work, &worker->work_list);
899 		return;
900 	}
901 
902 	/* Be paranoid and try to detect possible races already now. */
903 	kthread_insert_work_sanity_check(worker, work);
904 
905 	list_add(&work->node, &worker->delayed_work_list);
906 	work->worker = worker;
907 	timer->expires = jiffies + delay;
908 	add_timer(timer);
909 }
910 
911 /**
912  * kthread_queue_delayed_work - queue the associated kthread work
913  *	after a delay.
914  * @worker: target kthread_worker
915  * @dwork: kthread_delayed_work to queue
916  * @delay: number of jiffies to wait before queuing
917  *
918  * If the work has not been pending it starts a timer that will queue
919  * the work after the given @delay. If @delay is zero, it queues the
920  * work immediately.
921  *
922  * Return: %false if the @work has already been pending. It means that
923  * either the timer was running or the work was queued. It returns %true
924  * otherwise.
925  */
926 bool kthread_queue_delayed_work(struct kthread_worker *worker,
927 				struct kthread_delayed_work *dwork,
928 				unsigned long delay)
929 {
930 	struct kthread_work *work = &dwork->work;
931 	unsigned long flags;
932 	bool ret = false;
933 
934 	raw_spin_lock_irqsave(&worker->lock, flags);
935 
936 	if (!queuing_blocked(worker, work)) {
937 		__kthread_queue_delayed_work(worker, dwork, delay);
938 		ret = true;
939 	}
940 
941 	raw_spin_unlock_irqrestore(&worker->lock, flags);
942 	return ret;
943 }
944 EXPORT_SYMBOL_GPL(kthread_queue_delayed_work);
945 
946 struct kthread_flush_work {
947 	struct kthread_work	work;
948 	struct completion	done;
949 };
950 
951 static void kthread_flush_work_fn(struct kthread_work *work)
952 {
953 	struct kthread_flush_work *fwork =
954 		container_of(work, struct kthread_flush_work, work);
955 	complete(&fwork->done);
956 }
957 
958 /**
959  * kthread_flush_work - flush a kthread_work
960  * @work: work to flush
961  *
962  * If @work is queued or executing, wait for it to finish execution.
963  */
964 void kthread_flush_work(struct kthread_work *work)
965 {
966 	struct kthread_flush_work fwork = {
967 		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
968 		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
969 	};
970 	struct kthread_worker *worker;
971 	bool noop = false;
972 
973 	worker = work->worker;
974 	if (!worker)
975 		return;
976 
977 	raw_spin_lock_irq(&worker->lock);
978 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
979 	WARN_ON_ONCE(work->worker != worker);
980 
981 	if (!list_empty(&work->node))
982 		kthread_insert_work(worker, &fwork.work, work->node.next);
983 	else if (worker->current_work == work)
984 		kthread_insert_work(worker, &fwork.work,
985 				    worker->work_list.next);
986 	else
987 		noop = true;
988 
989 	raw_spin_unlock_irq(&worker->lock);
990 
991 	if (!noop)
992 		wait_for_completion(&fwork.done);
993 }
994 EXPORT_SYMBOL_GPL(kthread_flush_work);
995 
996 /*
997  * This function removes the work from the worker queue. Also it makes sure
998  * that it won't get queued later via the delayed work's timer.
999  *
1000  * The work might still be in use when this function finishes. See the
1001  * current_work proceed by the worker.
1002  *
1003  * Return: %true if @work was pending and successfully canceled,
1004  *	%false if @work was not pending
1005  */
1006 static bool __kthread_cancel_work(struct kthread_work *work, bool is_dwork,
1007 				  unsigned long *flags)
1008 {
1009 	/* Try to cancel the timer if exists. */
1010 	if (is_dwork) {
1011 		struct kthread_delayed_work *dwork =
1012 			container_of(work, struct kthread_delayed_work, work);
1013 		struct kthread_worker *worker = work->worker;
1014 
1015 		/*
1016 		 * del_timer_sync() must be called to make sure that the timer
1017 		 * callback is not running. The lock must be temporary released
1018 		 * to avoid a deadlock with the callback. In the meantime,
1019 		 * any queuing is blocked by setting the canceling counter.
1020 		 */
1021 		work->canceling++;
1022 		raw_spin_unlock_irqrestore(&worker->lock, *flags);
1023 		del_timer_sync(&dwork->timer);
1024 		raw_spin_lock_irqsave(&worker->lock, *flags);
1025 		work->canceling--;
1026 	}
1027 
1028 	/*
1029 	 * Try to remove the work from a worker list. It might either
1030 	 * be from worker->work_list or from worker->delayed_work_list.
1031 	 */
1032 	if (!list_empty(&work->node)) {
1033 		list_del_init(&work->node);
1034 		return true;
1035 	}
1036 
1037 	return false;
1038 }
1039 
1040 /**
1041  * kthread_mod_delayed_work - modify delay of or queue a kthread delayed work
1042  * @worker: kthread worker to use
1043  * @dwork: kthread delayed work to queue
1044  * @delay: number of jiffies to wait before queuing
1045  *
1046  * If @dwork is idle, equivalent to kthread_queue_delayed_work(). Otherwise,
1047  * modify @dwork's timer so that it expires after @delay. If @delay is zero,
1048  * @work is guaranteed to be queued immediately.
1049  *
1050  * Return: %true if @dwork was pending and its timer was modified,
1051  * %false otherwise.
1052  *
1053  * A special case is when the work is being canceled in parallel.
1054  * It might be caused either by the real kthread_cancel_delayed_work_sync()
1055  * or yet another kthread_mod_delayed_work() call. We let the other command
1056  * win and return %false here. The caller is supposed to synchronize these
1057  * operations a reasonable way.
1058  *
1059  * This function is safe to call from any context including IRQ handler.
1060  * See __kthread_cancel_work() and kthread_delayed_work_timer_fn()
1061  * for details.
1062  */
1063 bool kthread_mod_delayed_work(struct kthread_worker *worker,
1064 			      struct kthread_delayed_work *dwork,
1065 			      unsigned long delay)
1066 {
1067 	struct kthread_work *work = &dwork->work;
1068 	unsigned long flags;
1069 	int ret = false;
1070 
1071 	raw_spin_lock_irqsave(&worker->lock, flags);
1072 
1073 	/* Do not bother with canceling when never queued. */
1074 	if (!work->worker)
1075 		goto fast_queue;
1076 
1077 	/* Work must not be used with >1 worker, see kthread_queue_work() */
1078 	WARN_ON_ONCE(work->worker != worker);
1079 
1080 	/* Do not fight with another command that is canceling this work. */
1081 	if (work->canceling)
1082 		goto out;
1083 
1084 	ret = __kthread_cancel_work(work, true, &flags);
1085 fast_queue:
1086 	__kthread_queue_delayed_work(worker, dwork, delay);
1087 out:
1088 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1089 	return ret;
1090 }
1091 EXPORT_SYMBOL_GPL(kthread_mod_delayed_work);
1092 
1093 static bool __kthread_cancel_work_sync(struct kthread_work *work, bool is_dwork)
1094 {
1095 	struct kthread_worker *worker = work->worker;
1096 	unsigned long flags;
1097 	int ret = false;
1098 
1099 	if (!worker)
1100 		goto out;
1101 
1102 	raw_spin_lock_irqsave(&worker->lock, flags);
1103 	/* Work must not be used with >1 worker, see kthread_queue_work(). */
1104 	WARN_ON_ONCE(work->worker != worker);
1105 
1106 	ret = __kthread_cancel_work(work, is_dwork, &flags);
1107 
1108 	if (worker->current_work != work)
1109 		goto out_fast;
1110 
1111 	/*
1112 	 * The work is in progress and we need to wait with the lock released.
1113 	 * In the meantime, block any queuing by setting the canceling counter.
1114 	 */
1115 	work->canceling++;
1116 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1117 	kthread_flush_work(work);
1118 	raw_spin_lock_irqsave(&worker->lock, flags);
1119 	work->canceling--;
1120 
1121 out_fast:
1122 	raw_spin_unlock_irqrestore(&worker->lock, flags);
1123 out:
1124 	return ret;
1125 }
1126 
1127 /**
1128  * kthread_cancel_work_sync - cancel a kthread work and wait for it to finish
1129  * @work: the kthread work to cancel
1130  *
1131  * Cancel @work and wait for its execution to finish.  This function
1132  * can be used even if the work re-queues itself. On return from this
1133  * function, @work is guaranteed to be not pending or executing on any CPU.
1134  *
1135  * kthread_cancel_work_sync(&delayed_work->work) must not be used for
1136  * delayed_work's. Use kthread_cancel_delayed_work_sync() instead.
1137  *
1138  * The caller must ensure that the worker on which @work was last
1139  * queued can't be destroyed before this function returns.
1140  *
1141  * Return: %true if @work was pending, %false otherwise.
1142  */
1143 bool kthread_cancel_work_sync(struct kthread_work *work)
1144 {
1145 	return __kthread_cancel_work_sync(work, false);
1146 }
1147 EXPORT_SYMBOL_GPL(kthread_cancel_work_sync);
1148 
1149 /**
1150  * kthread_cancel_delayed_work_sync - cancel a kthread delayed work and
1151  *	wait for it to finish.
1152  * @dwork: the kthread delayed work to cancel
1153  *
1154  * This is kthread_cancel_work_sync() for delayed works.
1155  *
1156  * Return: %true if @dwork was pending, %false otherwise.
1157  */
1158 bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *dwork)
1159 {
1160 	return __kthread_cancel_work_sync(&dwork->work, true);
1161 }
1162 EXPORT_SYMBOL_GPL(kthread_cancel_delayed_work_sync);
1163 
1164 /**
1165  * kthread_flush_worker - flush all current works on a kthread_worker
1166  * @worker: worker to flush
1167  *
1168  * Wait until all currently executing or pending works on @worker are
1169  * finished.
1170  */
1171 void kthread_flush_worker(struct kthread_worker *worker)
1172 {
1173 	struct kthread_flush_work fwork = {
1174 		KTHREAD_WORK_INIT(fwork.work, kthread_flush_work_fn),
1175 		COMPLETION_INITIALIZER_ONSTACK(fwork.done),
1176 	};
1177 
1178 	kthread_queue_work(worker, &fwork.work);
1179 	wait_for_completion(&fwork.done);
1180 }
1181 EXPORT_SYMBOL_GPL(kthread_flush_worker);
1182 
1183 /**
1184  * kthread_destroy_worker - destroy a kthread worker
1185  * @worker: worker to be destroyed
1186  *
1187  * Flush and destroy @worker.  The simple flush is enough because the kthread
1188  * worker API is used only in trivial scenarios.  There are no multi-step state
1189  * machines needed.
1190  */
1191 void kthread_destroy_worker(struct kthread_worker *worker)
1192 {
1193 	struct task_struct *task;
1194 
1195 	task = worker->task;
1196 	if (WARN_ON(!task))
1197 		return;
1198 
1199 	kthread_flush_worker(worker);
1200 	kthread_stop(task);
1201 	WARN_ON(!list_empty(&worker->work_list));
1202 	kfree(worker);
1203 }
1204 EXPORT_SYMBOL(kthread_destroy_worker);
1205 
1206 #ifdef CONFIG_BLK_CGROUP
1207 /**
1208  * kthread_associate_blkcg - associate blkcg to current kthread
1209  * @css: the cgroup info
1210  *
1211  * Current thread must be a kthread. The thread is running jobs on behalf of
1212  * other threads. In some cases, we expect the jobs attach cgroup info of
1213  * original threads instead of that of current thread. This function stores
1214  * original thread's cgroup info in current kthread context for later
1215  * retrieval.
1216  */
1217 void kthread_associate_blkcg(struct cgroup_subsys_state *css)
1218 {
1219 	struct kthread *kthread;
1220 
1221 	if (!(current->flags & PF_KTHREAD))
1222 		return;
1223 	kthread = to_kthread(current);
1224 	if (!kthread)
1225 		return;
1226 
1227 	if (kthread->blkcg_css) {
1228 		css_put(kthread->blkcg_css);
1229 		kthread->blkcg_css = NULL;
1230 	}
1231 	if (css) {
1232 		css_get(css);
1233 		kthread->blkcg_css = css;
1234 	}
1235 }
1236 EXPORT_SYMBOL(kthread_associate_blkcg);
1237 
1238 /**
1239  * kthread_blkcg - get associated blkcg css of current kthread
1240  *
1241  * Current thread must be a kthread.
1242  */
1243 struct cgroup_subsys_state *kthread_blkcg(void)
1244 {
1245 	struct kthread *kthread;
1246 
1247 	if (current->flags & PF_KTHREAD) {
1248 		kthread = to_kthread(current);
1249 		if (kthread)
1250 			return kthread->blkcg_css;
1251 	}
1252 	return NULL;
1253 }
1254 EXPORT_SYMBOL(kthread_blkcg);
1255 #endif
1256