1 // SPDX-License-Identifier: GPL-2.0 2 #include <linux/irq_work.h> 3 #include <linux/spinlock.h> 4 #include <linux/task_work.h> 5 #include <linux/resume_user_mode.h> 6 7 static struct callback_head work_exited; /* all we need is ->next == NULL */ 8 9 #ifdef CONFIG_IRQ_WORK 10 static void task_work_set_notify_irq(struct irq_work *entry) 11 { 12 test_and_set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); 13 } 14 static DEFINE_PER_CPU(struct irq_work, irq_work_NMI_resume) = 15 IRQ_WORK_INIT_HARD(task_work_set_notify_irq); 16 #endif 17 18 /** 19 * task_work_add - ask the @task to execute @work->func() 20 * @task: the task which should run the callback 21 * @work: the callback to run 22 * @notify: how to notify the targeted task 23 * 24 * Queue @work for task_work_run() below and notify the @task if @notify 25 * is @TWA_RESUME, @TWA_SIGNAL, @TWA_SIGNAL_NO_IPI or @TWA_NMI_CURRENT. 26 * 27 * @TWA_SIGNAL works like signals, in that the it will interrupt the targeted 28 * task and run the task_work, regardless of whether the task is currently 29 * running in the kernel or userspace. 30 * @TWA_SIGNAL_NO_IPI works like @TWA_SIGNAL, except it doesn't send a 31 * reschedule IPI to force the targeted task to reschedule and run task_work. 32 * This can be advantageous if there's no strict requirement that the 33 * task_work be run as soon as possible, just whenever the task enters the 34 * kernel anyway. 35 * @TWA_RESUME work is run only when the task exits the kernel and returns to 36 * user mode, or before entering guest mode. 37 * @TWA_NMI_CURRENT works like @TWA_RESUME, except it can only be used for the 38 * current @task and if the current context is NMI. 39 * 40 * Fails if the @task is exiting/exited and thus it can't process this @work. 41 * Otherwise @work->func() will be called when the @task goes through one of 42 * the aforementioned transitions, or exits. 43 * 44 * If the targeted task is exiting, then an error is returned and the work item 45 * is not queued. It's up to the caller to arrange for an alternative mechanism 46 * in that case. 47 * 48 * Note: there is no ordering guarantee on works queued here. The task_work 49 * list is LIFO. 50 * 51 * RETURNS: 52 * 0 if succeeds or -ESRCH. 53 */ 54 int task_work_add(struct task_struct *task, struct callback_head *work, 55 enum task_work_notify_mode notify) 56 { 57 struct callback_head *head; 58 int flags = notify & TWA_FLAGS; 59 60 notify &= ~TWA_FLAGS; 61 if (notify == TWA_NMI_CURRENT) { 62 if (WARN_ON_ONCE(task != current)) 63 return -EINVAL; 64 if (!IS_ENABLED(CONFIG_IRQ_WORK)) 65 return -EINVAL; 66 } else { 67 /* 68 * Record the work call stack in order to print it in KASAN 69 * reports. 70 * 71 * Note that stack allocation can fail if TWAF_NO_ALLOC flag 72 * is set and new page is needed to expand the stack buffer. 73 */ 74 if (flags & TWAF_NO_ALLOC) 75 kasan_record_aux_stack_noalloc(work); 76 else 77 kasan_record_aux_stack(work); 78 } 79 80 head = READ_ONCE(task->task_works); 81 do { 82 if (unlikely(head == &work_exited)) 83 return -ESRCH; 84 work->next = head; 85 } while (!try_cmpxchg(&task->task_works, &head, work)); 86 87 switch (notify) { 88 case TWA_NONE: 89 break; 90 case TWA_RESUME: 91 set_notify_resume(task); 92 break; 93 case TWA_SIGNAL: 94 set_notify_signal(task); 95 break; 96 case TWA_SIGNAL_NO_IPI: 97 __set_notify_signal(task); 98 break; 99 #ifdef CONFIG_IRQ_WORK 100 case TWA_NMI_CURRENT: 101 irq_work_queue(this_cpu_ptr(&irq_work_NMI_resume)); 102 break; 103 #endif 104 default: 105 WARN_ON_ONCE(1); 106 break; 107 } 108 109 return 0; 110 } 111 112 /** 113 * task_work_cancel_match - cancel a pending work added by task_work_add() 114 * @task: the task which should execute the work 115 * @match: match function to call 116 * @data: data to be passed in to match function 117 * 118 * RETURNS: 119 * The found work or NULL if not found. 120 */ 121 struct callback_head * 122 task_work_cancel_match(struct task_struct *task, 123 bool (*match)(struct callback_head *, void *data), 124 void *data) 125 { 126 struct callback_head **pprev = &task->task_works; 127 struct callback_head *work; 128 unsigned long flags; 129 130 if (likely(!task_work_pending(task))) 131 return NULL; 132 /* 133 * If cmpxchg() fails we continue without updating pprev. 134 * Either we raced with task_work_add() which added the 135 * new entry before this work, we will find it again. Or 136 * we raced with task_work_run(), *pprev == NULL/exited. 137 */ 138 raw_spin_lock_irqsave(&task->pi_lock, flags); 139 work = READ_ONCE(*pprev); 140 while (work) { 141 if (!match(work, data)) { 142 pprev = &work->next; 143 work = READ_ONCE(*pprev); 144 } else if (try_cmpxchg(pprev, &work, work->next)) 145 break; 146 } 147 raw_spin_unlock_irqrestore(&task->pi_lock, flags); 148 149 return work; 150 } 151 152 static bool task_work_func_match(struct callback_head *cb, void *data) 153 { 154 return cb->func == data; 155 } 156 157 /** 158 * task_work_cancel_func - cancel a pending work matching a function added by task_work_add() 159 * @task: the task which should execute the func's work 160 * @func: identifies the func to match with a work to remove 161 * 162 * Find the last queued pending work with ->func == @func and remove 163 * it from queue. 164 * 165 * RETURNS: 166 * The found work or NULL if not found. 167 */ 168 struct callback_head * 169 task_work_cancel_func(struct task_struct *task, task_work_func_t func) 170 { 171 return task_work_cancel_match(task, task_work_func_match, func); 172 } 173 174 static bool task_work_match(struct callback_head *cb, void *data) 175 { 176 return cb == data; 177 } 178 179 /** 180 * task_work_cancel - cancel a pending work added by task_work_add() 181 * @task: the task which should execute the work 182 * @cb: the callback to remove if queued 183 * 184 * Remove a callback from a task's queue if queued. 185 * 186 * RETURNS: 187 * True if the callback was queued and got cancelled, false otherwise. 188 */ 189 bool task_work_cancel(struct task_struct *task, struct callback_head *cb) 190 { 191 struct callback_head *ret; 192 193 ret = task_work_cancel_match(task, task_work_match, cb); 194 195 return ret == cb; 196 } 197 198 /** 199 * task_work_run - execute the works added by task_work_add() 200 * 201 * Flush the pending works. Should be used by the core kernel code. 202 * Called before the task returns to the user-mode or stops, or when 203 * it exits. In the latter case task_work_add() can no longer add the 204 * new work after task_work_run() returns. 205 */ 206 void task_work_run(void) 207 { 208 struct task_struct *task = current; 209 struct callback_head *work, *head, *next; 210 211 for (;;) { 212 /* 213 * work->func() can do task_work_add(), do not set 214 * work_exited unless the list is empty. 215 */ 216 work = READ_ONCE(task->task_works); 217 do { 218 head = NULL; 219 if (!work) { 220 if (task->flags & PF_EXITING) 221 head = &work_exited; 222 else 223 break; 224 } 225 } while (!try_cmpxchg(&task->task_works, &work, head)); 226 227 if (!work) 228 break; 229 /* 230 * Synchronize with task_work_cancel_match(). It can not remove 231 * the first entry == work, cmpxchg(task_works) must fail. 232 * But it can remove another entry from the ->next list. 233 */ 234 raw_spin_lock_irq(&task->pi_lock); 235 raw_spin_unlock_irq(&task->pi_lock); 236 237 do { 238 next = work->next; 239 work->func(work); 240 work = next; 241 cond_resched(); 242 } while (work); 243 } 244 } 245