1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra 4 * 5 * Provides a framework for enqueueing and running callbacks from hardirq 6 * context. The enqueueing is NMI-safe. 7 */ 8 9 #include <linux/bug.h> 10 #include <linux/kernel.h> 11 #include <linux/export.h> 12 #include <linux/irq_work.h> 13 #include <linux/percpu.h> 14 #include <linux/hardirq.h> 15 #include <linux/irqflags.h> 16 #include <linux/sched.h> 17 #include <linux/tick.h> 18 #include <linux/cpu.h> 19 #include <linux/notifier.h> 20 #include <linux/smp.h> 21 #include <linux/smpboot.h> 22 #include <asm/processor.h> 23 #include <linux/kasan.h> 24 25 static DEFINE_PER_CPU(struct llist_head, raised_list); 26 static DEFINE_PER_CPU(struct llist_head, lazy_list); 27 static DEFINE_PER_CPU(struct task_struct *, irq_workd); 28 29 static void wake_irq_workd(void) 30 { 31 struct task_struct *tsk = __this_cpu_read(irq_workd); 32 33 if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) 34 wake_up_process(tsk); 35 } 36 37 #ifdef CONFIG_SMP 38 static void irq_work_wake(struct irq_work *entry) 39 { 40 wake_irq_workd(); 41 } 42 43 static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = 44 IRQ_WORK_INIT_HARD(irq_work_wake); 45 #endif 46 47 static int irq_workd_should_run(unsigned int cpu) 48 { 49 return !llist_empty(this_cpu_ptr(&lazy_list)); 50 } 51 52 /* 53 * Claim the entry so that no one else will poke at it. 54 */ 55 static bool irq_work_claim(struct irq_work *work) 56 { 57 int oflags; 58 59 oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags); 60 /* 61 * If the work is already pending, no need to raise the IPI. 62 * The pairing smp_mb() in irq_work_single() makes sure 63 * everything we did before is visible. 64 */ 65 if (oflags & IRQ_WORK_PENDING) 66 return false; 67 return true; 68 } 69 70 void __weak arch_irq_work_raise(void) 71 { 72 /* 73 * Lame architectures will get the timer tick callback 74 */ 75 } 76 77 /* Enqueue on current CPU, work must already be claimed and preempt disabled */ 78 static void __irq_work_queue_local(struct irq_work *work) 79 { 80 struct llist_head *list; 81 bool rt_lazy_work = false; 82 bool lazy_work = false; 83 int work_flags; 84 85 work_flags = atomic_read(&work->node.a_flags); 86 if (work_flags & IRQ_WORK_LAZY) 87 lazy_work = true; 88 else if (IS_ENABLED(CONFIG_PREEMPT_RT) && 89 !(work_flags & IRQ_WORK_HARD_IRQ)) 90 rt_lazy_work = true; 91 92 if (lazy_work || rt_lazy_work) 93 list = this_cpu_ptr(&lazy_list); 94 else 95 list = this_cpu_ptr(&raised_list); 96 97 if (!llist_add(&work->node.llist, list)) 98 return; 99 100 /* If the work is "lazy", handle it from next tick if any */ 101 if (!lazy_work || tick_nohz_tick_stopped()) 102 arch_irq_work_raise(); 103 } 104 105 /* Enqueue the irq work @work on the current CPU */ 106 bool irq_work_queue(struct irq_work *work) 107 { 108 /* Only queue if not already pending */ 109 if (!irq_work_claim(work)) 110 return false; 111 112 /* Queue the entry and raise the IPI if needed. */ 113 preempt_disable(); 114 __irq_work_queue_local(work); 115 preempt_enable(); 116 117 return true; 118 } 119 EXPORT_SYMBOL_GPL(irq_work_queue); 120 121 /* 122 * Enqueue the irq_work @work on @cpu unless it's already pending 123 * somewhere. 124 * 125 * Can be re-enqueued while the callback is still in progress. 126 */ 127 bool irq_work_queue_on(struct irq_work *work, int cpu) 128 { 129 #ifndef CONFIG_SMP 130 return irq_work_queue(work); 131 132 #else /* CONFIG_SMP: */ 133 /* All work should have been flushed before going offline */ 134 WARN_ON_ONCE(cpu_is_offline(cpu)); 135 136 /* Only queue if not already pending */ 137 if (!irq_work_claim(work)) 138 return false; 139 140 kasan_record_aux_stack(work); 141 142 preempt_disable(); 143 if (cpu != smp_processor_id()) { 144 /* Arch remote IPI send/receive backend aren't NMI safe */ 145 WARN_ON_ONCE(in_nmi()); 146 147 /* 148 * On PREEMPT_RT the items which are not marked as 149 * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work 150 * item is used on the remote CPU to wake the thread. 151 */ 152 if (IS_ENABLED(CONFIG_PREEMPT_RT) && 153 !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { 154 155 if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) 156 goto out; 157 158 work = &per_cpu(irq_work_wakeup, cpu); 159 if (!irq_work_claim(work)) 160 goto out; 161 } 162 163 __smp_call_single_queue(cpu, &work->node.llist); 164 } else { 165 __irq_work_queue_local(work); 166 } 167 out: 168 preempt_enable(); 169 170 return true; 171 #endif /* CONFIG_SMP */ 172 } 173 174 bool irq_work_needs_cpu(void) 175 { 176 struct llist_head *raised, *lazy; 177 178 raised = this_cpu_ptr(&raised_list); 179 lazy = this_cpu_ptr(&lazy_list); 180 181 if (llist_empty(raised) || arch_irq_work_has_interrupt()) 182 if (llist_empty(lazy)) 183 return false; 184 185 /* All work should have been flushed before going offline */ 186 WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); 187 188 return true; 189 } 190 191 void irq_work_single(void *arg) 192 { 193 struct irq_work *work = arg; 194 int flags; 195 196 /* 197 * Clear the PENDING bit, after this point the @work can be re-used. 198 * The PENDING bit acts as a lock, and we own it, so we can clear it 199 * without atomic ops. 200 */ 201 flags = atomic_read(&work->node.a_flags); 202 flags &= ~IRQ_WORK_PENDING; 203 atomic_set(&work->node.a_flags, flags); 204 205 /* 206 * See irq_work_claim(). 207 */ 208 smp_mb(); 209 210 lockdep_irq_work_enter(flags); 211 work->func(work); 212 lockdep_irq_work_exit(flags); 213 214 /* 215 * Clear the BUSY bit, if set, and return to the free state if no-one 216 * else claimed it meanwhile. 217 */ 218 (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); 219 220 if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || 221 !arch_irq_work_has_interrupt()) 222 rcuwait_wake_up(&work->irqwait); 223 } 224 225 static void irq_work_run_list(struct llist_head *list) 226 { 227 struct irq_work *work, *tmp; 228 struct llist_node *llnode; 229 230 /* 231 * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed 232 * in a per-CPU thread in preemptible context. Only the items which are 233 * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. 234 */ 235 BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); 236 237 if (llist_empty(list)) 238 return; 239 240 llnode = llist_del_all(list); 241 llist_for_each_entry_safe(work, tmp, llnode, node.llist) 242 irq_work_single(work); 243 } 244 245 /* 246 * hotplug calls this through: 247 * hotplug_cfd() -> flush_smp_call_function_queue() 248 */ 249 void irq_work_run(void) 250 { 251 irq_work_run_list(this_cpu_ptr(&raised_list)); 252 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) 253 irq_work_run_list(this_cpu_ptr(&lazy_list)); 254 else 255 wake_irq_workd(); 256 } 257 EXPORT_SYMBOL_GPL(irq_work_run); 258 259 void irq_work_tick(void) 260 { 261 struct llist_head *raised = this_cpu_ptr(&raised_list); 262 263 if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) 264 irq_work_run_list(raised); 265 266 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) 267 irq_work_run_list(this_cpu_ptr(&lazy_list)); 268 else 269 wake_irq_workd(); 270 } 271 272 /* 273 * Synchronize against the irq_work @entry, ensures the entry is not 274 * currently in use. 275 */ 276 void irq_work_sync(struct irq_work *work) 277 { 278 lockdep_assert_irqs_enabled(); 279 might_sleep(); 280 281 if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || 282 !arch_irq_work_has_interrupt()) { 283 rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), 284 TASK_UNINTERRUPTIBLE); 285 return; 286 } 287 288 while (irq_work_is_busy(work)) 289 cpu_relax(); 290 } 291 EXPORT_SYMBOL_GPL(irq_work_sync); 292 293 static void run_irq_workd(unsigned int cpu) 294 { 295 irq_work_run_list(this_cpu_ptr(&lazy_list)); 296 } 297 298 static void irq_workd_setup(unsigned int cpu) 299 { 300 sched_set_fifo_low(current); 301 } 302 303 static struct smp_hotplug_thread irqwork_threads = { 304 .store = &irq_workd, 305 .setup = irq_workd_setup, 306 .thread_should_run = irq_workd_should_run, 307 .thread_fn = run_irq_workd, 308 .thread_comm = "irq_work/%u", 309 }; 310 311 static __init int irq_work_init_threads(void) 312 { 313 if (IS_ENABLED(CONFIG_PREEMPT_RT)) 314 BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); 315 return 0; 316 } 317 early_initcall(irq_work_init_threads); 318