1 /* 2 * Generic helpers for smp ipi calls 3 * 4 * (C) Jens Axboe <jens.axboe@oracle.com> 2008 5 */ 6 #include <linux/rcupdate.h> 7 #include <linux/rculist.h> 8 #include <linux/kernel.h> 9 #include <linux/module.h> 10 #include <linux/percpu.h> 11 #include <linux/init.h> 12 #include <linux/smp.h> 13 #include <linux/cpu.h> 14 15 static DEFINE_PER_CPU(struct call_single_queue, call_single_queue); 16 17 static struct { 18 struct list_head queue; 19 spinlock_t lock; 20 } call_function __cacheline_aligned_in_smp = 21 { 22 .queue = LIST_HEAD_INIT(call_function.queue), 23 .lock = __SPIN_LOCK_UNLOCKED(call_function.lock), 24 }; 25 26 enum { 27 CSD_FLAG_LOCK = 0x01, 28 }; 29 30 struct call_function_data { 31 struct call_single_data csd; 32 spinlock_t lock; 33 unsigned int refs; 34 cpumask_var_t cpumask; 35 }; 36 37 struct call_single_queue { 38 struct list_head list; 39 spinlock_t lock; 40 }; 41 42 static DEFINE_PER_CPU(struct call_function_data, cfd_data) = { 43 .lock = __SPIN_LOCK_UNLOCKED(cfd_data.lock), 44 }; 45 46 static int 47 hotplug_cfd(struct notifier_block *nfb, unsigned long action, void *hcpu) 48 { 49 long cpu = (long)hcpu; 50 struct call_function_data *cfd = &per_cpu(cfd_data, cpu); 51 52 switch (action) { 53 case CPU_UP_PREPARE: 54 case CPU_UP_PREPARE_FROZEN: 55 if (!zalloc_cpumask_var_node(&cfd->cpumask, GFP_KERNEL, 56 cpu_to_node(cpu))) 57 return NOTIFY_BAD; 58 break; 59 60 #ifdef CONFIG_HOTPLUG_CPU 61 case CPU_UP_CANCELED: 62 case CPU_UP_CANCELED_FROZEN: 63 64 case CPU_DEAD: 65 case CPU_DEAD_FROZEN: 66 free_cpumask_var(cfd->cpumask); 67 break; 68 #endif 69 }; 70 71 return NOTIFY_OK; 72 } 73 74 static struct notifier_block __cpuinitdata hotplug_cfd_notifier = { 75 .notifier_call = hotplug_cfd, 76 }; 77 78 static int __cpuinit init_call_single_data(void) 79 { 80 void *cpu = (void *)(long)smp_processor_id(); 81 int i; 82 83 for_each_possible_cpu(i) { 84 struct call_single_queue *q = &per_cpu(call_single_queue, i); 85 86 spin_lock_init(&q->lock); 87 INIT_LIST_HEAD(&q->list); 88 } 89 90 hotplug_cfd(&hotplug_cfd_notifier, CPU_UP_PREPARE, cpu); 91 register_cpu_notifier(&hotplug_cfd_notifier); 92 93 return 0; 94 } 95 early_initcall(init_call_single_data); 96 97 /* 98 * csd_lock/csd_unlock used to serialize access to per-cpu csd resources 99 * 100 * For non-synchronous ipi calls the csd can still be in use by the 101 * previous function call. For multi-cpu calls its even more interesting 102 * as we'll have to ensure no other cpu is observing our csd. 103 */ 104 static void csd_lock_wait(struct call_single_data *data) 105 { 106 while (data->flags & CSD_FLAG_LOCK) 107 cpu_relax(); 108 } 109 110 static void csd_lock(struct call_single_data *data) 111 { 112 csd_lock_wait(data); 113 data->flags = CSD_FLAG_LOCK; 114 115 /* 116 * prevent CPU from reordering the above assignment 117 * to ->flags with any subsequent assignments to other 118 * fields of the specified call_single_data structure: 119 */ 120 smp_mb(); 121 } 122 123 static void csd_unlock(struct call_single_data *data) 124 { 125 WARN_ON(!(data->flags & CSD_FLAG_LOCK)); 126 127 /* 128 * ensure we're all done before releasing data: 129 */ 130 smp_mb(); 131 132 data->flags &= ~CSD_FLAG_LOCK; 133 } 134 135 /* 136 * Insert a previously allocated call_single_data element 137 * for execution on the given CPU. data must already have 138 * ->func, ->info, and ->flags set. 139 */ 140 static 141 void generic_exec_single(int cpu, struct call_single_data *data, int wait) 142 { 143 struct call_single_queue *dst = &per_cpu(call_single_queue, cpu); 144 unsigned long flags; 145 int ipi; 146 147 spin_lock_irqsave(&dst->lock, flags); 148 ipi = list_empty(&dst->list); 149 list_add_tail(&data->list, &dst->list); 150 spin_unlock_irqrestore(&dst->lock, flags); 151 152 /* 153 * The list addition should be visible before sending the IPI 154 * handler locks the list to pull the entry off it because of 155 * normal cache coherency rules implied by spinlocks. 156 * 157 * If IPIs can go out of order to the cache coherency protocol 158 * in an architecture, sufficient synchronisation should be added 159 * to arch code to make it appear to obey cache coherency WRT 160 * locking and barrier primitives. Generic code isn't really 161 * equipped to do the right thing... 162 */ 163 if (ipi) 164 arch_send_call_function_single_ipi(cpu); 165 166 if (wait) 167 csd_lock_wait(data); 168 } 169 170 /* 171 * Invoked by arch to handle an IPI for call function. Must be called with 172 * interrupts disabled. 173 */ 174 void generic_smp_call_function_interrupt(void) 175 { 176 struct call_function_data *data; 177 int cpu = get_cpu(); 178 179 /* 180 * Shouldn't receive this interrupt on a cpu that is not yet online. 181 */ 182 WARN_ON_ONCE(!cpu_online(cpu)); 183 184 /* 185 * Ensure entry is visible on call_function_queue after we have 186 * entered the IPI. See comment in smp_call_function_many. 187 * If we don't have this, then we may miss an entry on the list 188 * and never get another IPI to process it. 189 */ 190 smp_mb(); 191 192 /* 193 * It's ok to use list_for_each_rcu() here even though we may 194 * delete 'pos', since list_del_rcu() doesn't clear ->next 195 */ 196 list_for_each_entry_rcu(data, &call_function.queue, csd.list) { 197 int refs; 198 199 spin_lock(&data->lock); 200 if (!cpumask_test_cpu(cpu, data->cpumask)) { 201 spin_unlock(&data->lock); 202 continue; 203 } 204 cpumask_clear_cpu(cpu, data->cpumask); 205 spin_unlock(&data->lock); 206 207 data->csd.func(data->csd.info); 208 209 spin_lock(&data->lock); 210 WARN_ON(data->refs == 0); 211 refs = --data->refs; 212 if (!refs) { 213 spin_lock(&call_function.lock); 214 list_del_rcu(&data->csd.list); 215 spin_unlock(&call_function.lock); 216 } 217 spin_unlock(&data->lock); 218 219 if (refs) 220 continue; 221 222 csd_unlock(&data->csd); 223 } 224 225 put_cpu(); 226 } 227 228 /* 229 * Invoked by arch to handle an IPI for call function single. Must be 230 * called from the arch with interrupts disabled. 231 */ 232 void generic_smp_call_function_single_interrupt(void) 233 { 234 struct call_single_queue *q = &__get_cpu_var(call_single_queue); 235 unsigned int data_flags; 236 LIST_HEAD(list); 237 238 /* 239 * Shouldn't receive this interrupt on a cpu that is not yet online. 240 */ 241 WARN_ON_ONCE(!cpu_online(smp_processor_id())); 242 243 spin_lock(&q->lock); 244 list_replace_init(&q->list, &list); 245 spin_unlock(&q->lock); 246 247 while (!list_empty(&list)) { 248 struct call_single_data *data; 249 250 data = list_entry(list.next, struct call_single_data, list); 251 list_del(&data->list); 252 253 /* 254 * 'data' can be invalid after this call if flags == 0 255 * (when called through generic_exec_single()), 256 * so save them away before making the call: 257 */ 258 data_flags = data->flags; 259 260 data->func(data->info); 261 262 /* 263 * Unlocked CSDs are valid through generic_exec_single(): 264 */ 265 if (data_flags & CSD_FLAG_LOCK) 266 csd_unlock(data); 267 } 268 } 269 270 static DEFINE_PER_CPU(struct call_single_data, csd_data); 271 272 /* 273 * smp_call_function_single - Run a function on a specific CPU 274 * @func: The function to run. This must be fast and non-blocking. 275 * @info: An arbitrary pointer to pass to the function. 276 * @wait: If true, wait until function has completed on other CPUs. 277 * 278 * Returns 0 on success, else a negative status code. Note that @wait 279 * will be implicitly turned on in case of allocation failures, since 280 * we fall back to on-stack allocation. 281 */ 282 int smp_call_function_single(int cpu, void (*func) (void *info), void *info, 283 int wait) 284 { 285 struct call_single_data d = { 286 .flags = 0, 287 }; 288 unsigned long flags; 289 int this_cpu; 290 int err = 0; 291 292 /* 293 * prevent preemption and reschedule on another processor, 294 * as well as CPU removal 295 */ 296 this_cpu = get_cpu(); 297 298 /* 299 * Can deadlock when called with interrupts disabled. 300 * We allow cpu's that are not yet online though, as no one else can 301 * send smp call function interrupt to this cpu and as such deadlocks 302 * can't happen. 303 */ 304 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 305 && !oops_in_progress); 306 307 if (cpu == this_cpu) { 308 local_irq_save(flags); 309 func(info); 310 local_irq_restore(flags); 311 } else { 312 if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) { 313 struct call_single_data *data = &d; 314 315 if (!wait) 316 data = &__get_cpu_var(csd_data); 317 318 csd_lock(data); 319 320 data->func = func; 321 data->info = info; 322 generic_exec_single(cpu, data, wait); 323 } else { 324 err = -ENXIO; /* CPU not online */ 325 } 326 } 327 328 put_cpu(); 329 330 return err; 331 } 332 EXPORT_SYMBOL(smp_call_function_single); 333 334 /** 335 * __smp_call_function_single(): Run a function on another CPU 336 * @cpu: The CPU to run on. 337 * @data: Pre-allocated and setup data structure 338 * 339 * Like smp_call_function_single(), but allow caller to pass in a 340 * pre-allocated data structure. Useful for embedding @data inside 341 * other structures, for instance. 342 */ 343 void __smp_call_function_single(int cpu, struct call_single_data *data, 344 int wait) 345 { 346 csd_lock(data); 347 348 /* 349 * Can deadlock when called with interrupts disabled. 350 * We allow cpu's that are not yet online though, as no one else can 351 * send smp call function interrupt to this cpu and as such deadlocks 352 * can't happen. 353 */ 354 WARN_ON_ONCE(cpu_online(smp_processor_id()) && wait && irqs_disabled() 355 && !oops_in_progress); 356 357 generic_exec_single(cpu, data, wait); 358 } 359 360 /* Deprecated: shim for archs using old arch_send_call_function_ipi API. */ 361 362 #ifndef arch_send_call_function_ipi_mask 363 # define arch_send_call_function_ipi_mask(maskp) \ 364 arch_send_call_function_ipi(*(maskp)) 365 #endif 366 367 /** 368 * smp_call_function_many(): Run a function on a set of other CPUs. 369 * @mask: The set of cpus to run on (only runs on online subset). 370 * @func: The function to run. This must be fast and non-blocking. 371 * @info: An arbitrary pointer to pass to the function. 372 * @wait: If true, wait (atomically) until function has completed 373 * on other CPUs. 374 * 375 * If @wait is true, then returns once @func has returned. Note that @wait 376 * will be implicitly turned on in case of allocation failures, since 377 * we fall back to on-stack allocation. 378 * 379 * You must not call this function with disabled interrupts or from a 380 * hardware interrupt handler or from a bottom half handler. Preemption 381 * must be disabled when calling this function. 382 */ 383 void smp_call_function_many(const struct cpumask *mask, 384 void (*func)(void *), void *info, bool wait) 385 { 386 struct call_function_data *data; 387 unsigned long flags; 388 int cpu, next_cpu, this_cpu = smp_processor_id(); 389 390 /* 391 * Can deadlock when called with interrupts disabled. 392 * We allow cpu's that are not yet online though, as no one else can 393 * send smp call function interrupt to this cpu and as such deadlocks 394 * can't happen. 395 */ 396 WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled() 397 && !oops_in_progress); 398 399 /* So, what's a CPU they want? Ignoring this one. */ 400 cpu = cpumask_first_and(mask, cpu_online_mask); 401 if (cpu == this_cpu) 402 cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 403 404 /* No online cpus? We're done. */ 405 if (cpu >= nr_cpu_ids) 406 return; 407 408 /* Do we have another CPU which isn't us? */ 409 next_cpu = cpumask_next_and(cpu, mask, cpu_online_mask); 410 if (next_cpu == this_cpu) 411 next_cpu = cpumask_next_and(next_cpu, mask, cpu_online_mask); 412 413 /* Fastpath: do that cpu by itself. */ 414 if (next_cpu >= nr_cpu_ids) { 415 smp_call_function_single(cpu, func, info, wait); 416 return; 417 } 418 419 data = &__get_cpu_var(cfd_data); 420 csd_lock(&data->csd); 421 422 spin_lock_irqsave(&data->lock, flags); 423 data->csd.func = func; 424 data->csd.info = info; 425 cpumask_and(data->cpumask, mask, cpu_online_mask); 426 cpumask_clear_cpu(this_cpu, data->cpumask); 427 data->refs = cpumask_weight(data->cpumask); 428 429 spin_lock(&call_function.lock); 430 /* 431 * Place entry at the _HEAD_ of the list, so that any cpu still 432 * observing the entry in generic_smp_call_function_interrupt() 433 * will not miss any other list entries: 434 */ 435 list_add_rcu(&data->csd.list, &call_function.queue); 436 spin_unlock(&call_function.lock); 437 438 spin_unlock_irqrestore(&data->lock, flags); 439 440 /* 441 * Make the list addition visible before sending the ipi. 442 * (IPIs must obey or appear to obey normal Linux cache 443 * coherency rules -- see comment in generic_exec_single). 444 */ 445 smp_mb(); 446 447 /* Send a message to all CPUs in the map */ 448 arch_send_call_function_ipi_mask(data->cpumask); 449 450 /* Optionally wait for the CPUs to complete */ 451 if (wait) 452 csd_lock_wait(&data->csd); 453 } 454 EXPORT_SYMBOL(smp_call_function_many); 455 456 /** 457 * smp_call_function(): Run a function on all other CPUs. 458 * @func: The function to run. This must be fast and non-blocking. 459 * @info: An arbitrary pointer to pass to the function. 460 * @wait: If true, wait (atomically) until function has completed 461 * on other CPUs. 462 * 463 * Returns 0. 464 * 465 * If @wait is true, then returns once @func has returned; otherwise 466 * it returns just before the target cpu calls @func. In case of allocation 467 * failure, @wait will be implicitly turned on. 468 * 469 * You must not call this function with disabled interrupts or from a 470 * hardware interrupt handler or from a bottom half handler. 471 */ 472 int smp_call_function(void (*func)(void *), void *info, int wait) 473 { 474 preempt_disable(); 475 smp_call_function_many(cpu_online_mask, func, info, wait); 476 preempt_enable(); 477 478 return 0; 479 } 480 EXPORT_SYMBOL(smp_call_function); 481 482 void ipi_call_lock(void) 483 { 484 spin_lock(&call_function.lock); 485 } 486 487 void ipi_call_unlock(void) 488 { 489 spin_unlock(&call_function.lock); 490 } 491 492 void ipi_call_lock_irq(void) 493 { 494 spin_lock_irq(&call_function.lock); 495 } 496 497 void ipi_call_unlock_irq(void) 498 { 499 spin_unlock_irq(&call_function.lock); 500 } 501