1 /* 2 * kernel/stop_machine.c 3 * 4 * Copyright (C) 2008, 2005 IBM Corporation. 5 * Copyright (C) 2008, 2005 Rusty Russell rusty@rustcorp.com.au 6 * Copyright (C) 2010 SUSE Linux Products GmbH 7 * Copyright (C) 2010 Tejun Heo <tj@kernel.org> 8 * 9 * This file is released under the GPLv2 and any later version. 10 */ 11 #include <linux/completion.h> 12 #include <linux/cpu.h> 13 #include <linux/init.h> 14 #include <linux/kthread.h> 15 #include <linux/export.h> 16 #include <linux/percpu.h> 17 #include <linux/sched.h> 18 #include <linux/stop_machine.h> 19 #include <linux/interrupt.h> 20 #include <linux/kallsyms.h> 21 22 #include <linux/atomic.h> 23 24 /* 25 * Structure to determine completion condition and record errors. May 26 * be shared by works on different cpus. 27 */ 28 struct cpu_stop_done { 29 atomic_t nr_todo; /* nr left to execute */ 30 bool executed; /* actually executed? */ 31 int ret; /* collected return value */ 32 struct completion completion; /* fired if nr_todo reaches 0 */ 33 }; 34 35 /* the actual stopper, one per every possible cpu, enabled on online cpus */ 36 struct cpu_stopper { 37 spinlock_t lock; 38 bool enabled; /* is this stopper enabled? */ 39 struct list_head works; /* list of pending works */ 40 struct task_struct *thread; /* stopper thread */ 41 }; 42 43 static DEFINE_PER_CPU(struct cpu_stopper, cpu_stopper); 44 static bool stop_machine_initialized = false; 45 46 static void cpu_stop_init_done(struct cpu_stop_done *done, unsigned int nr_todo) 47 { 48 memset(done, 0, sizeof(*done)); 49 atomic_set(&done->nr_todo, nr_todo); 50 init_completion(&done->completion); 51 } 52 53 /* signal completion unless @done is NULL */ 54 static void cpu_stop_signal_done(struct cpu_stop_done *done, bool executed) 55 { 56 if (done) { 57 if (executed) 58 done->executed = true; 59 if (atomic_dec_and_test(&done->nr_todo)) 60 complete(&done->completion); 61 } 62 } 63 64 /* queue @work to @stopper. if offline, @work is completed immediately */ 65 static void cpu_stop_queue_work(struct cpu_stopper *stopper, 66 struct cpu_stop_work *work) 67 { 68 unsigned long flags; 69 70 spin_lock_irqsave(&stopper->lock, flags); 71 72 if (stopper->enabled) { 73 list_add_tail(&work->list, &stopper->works); 74 wake_up_process(stopper->thread); 75 } else 76 cpu_stop_signal_done(work->done, false); 77 78 spin_unlock_irqrestore(&stopper->lock, flags); 79 } 80 81 /** 82 * stop_one_cpu - stop a cpu 83 * @cpu: cpu to stop 84 * @fn: function to execute 85 * @arg: argument to @fn 86 * 87 * Execute @fn(@arg) on @cpu. @fn is run in a process context with 88 * the highest priority preempting any task on the cpu and 89 * monopolizing it. This function returns after the execution is 90 * complete. 91 * 92 * This function doesn't guarantee @cpu stays online till @fn 93 * completes. If @cpu goes down in the middle, execution may happen 94 * partially or fully on different cpus. @fn should either be ready 95 * for that or the caller should ensure that @cpu stays online until 96 * this function completes. 97 * 98 * CONTEXT: 99 * Might sleep. 100 * 101 * RETURNS: 102 * -ENOENT if @fn(@arg) was not executed because @cpu was offline; 103 * otherwise, the return value of @fn. 104 */ 105 int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg) 106 { 107 struct cpu_stop_done done; 108 struct cpu_stop_work work = { .fn = fn, .arg = arg, .done = &done }; 109 110 cpu_stop_init_done(&done, 1); 111 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), &work); 112 wait_for_completion(&done.completion); 113 return done.executed ? done.ret : -ENOENT; 114 } 115 116 /** 117 * stop_one_cpu_nowait - stop a cpu but don't wait for completion 118 * @cpu: cpu to stop 119 * @fn: function to execute 120 * @arg: argument to @fn 121 * 122 * Similar to stop_one_cpu() but doesn't wait for completion. The 123 * caller is responsible for ensuring @work_buf is currently unused 124 * and will remain untouched until stopper starts executing @fn. 125 * 126 * CONTEXT: 127 * Don't care. 128 */ 129 void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, 130 struct cpu_stop_work *work_buf) 131 { 132 *work_buf = (struct cpu_stop_work){ .fn = fn, .arg = arg, }; 133 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); 134 } 135 136 /* static data for stop_cpus */ 137 static DEFINE_MUTEX(stop_cpus_mutex); 138 static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); 139 140 static void queue_stop_cpus_work(const struct cpumask *cpumask, 141 cpu_stop_fn_t fn, void *arg, 142 struct cpu_stop_done *done) 143 { 144 struct cpu_stop_work *work; 145 unsigned int cpu; 146 147 /* initialize works and done */ 148 for_each_cpu(cpu, cpumask) { 149 work = &per_cpu(stop_cpus_work, cpu); 150 work->fn = fn; 151 work->arg = arg; 152 work->done = done; 153 } 154 155 /* 156 * Disable preemption while queueing to avoid getting 157 * preempted by a stopper which might wait for other stoppers 158 * to enter @fn which can lead to deadlock. 159 */ 160 preempt_disable(); 161 for_each_cpu(cpu, cpumask) 162 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), 163 &per_cpu(stop_cpus_work, cpu)); 164 preempt_enable(); 165 } 166 167 static int __stop_cpus(const struct cpumask *cpumask, 168 cpu_stop_fn_t fn, void *arg) 169 { 170 struct cpu_stop_done done; 171 172 cpu_stop_init_done(&done, cpumask_weight(cpumask)); 173 queue_stop_cpus_work(cpumask, fn, arg, &done); 174 wait_for_completion(&done.completion); 175 return done.executed ? done.ret : -ENOENT; 176 } 177 178 /** 179 * stop_cpus - stop multiple cpus 180 * @cpumask: cpus to stop 181 * @fn: function to execute 182 * @arg: argument to @fn 183 * 184 * Execute @fn(@arg) on online cpus in @cpumask. On each target cpu, 185 * @fn is run in a process context with the highest priority 186 * preempting any task on the cpu and monopolizing it. This function 187 * returns after all executions are complete. 188 * 189 * This function doesn't guarantee the cpus in @cpumask stay online 190 * till @fn completes. If some cpus go down in the middle, execution 191 * on the cpu may happen partially or fully on different cpus. @fn 192 * should either be ready for that or the caller should ensure that 193 * the cpus stay online until this function completes. 194 * 195 * All stop_cpus() calls are serialized making it safe for @fn to wait 196 * for all cpus to start executing it. 197 * 198 * CONTEXT: 199 * Might sleep. 200 * 201 * RETURNS: 202 * -ENOENT if @fn(@arg) was not executed at all because all cpus in 203 * @cpumask were offline; otherwise, 0 if all executions of @fn 204 * returned 0, any non zero return value if any returned non zero. 205 */ 206 int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 207 { 208 int ret; 209 210 /* static works are used, process one request at a time */ 211 mutex_lock(&stop_cpus_mutex); 212 ret = __stop_cpus(cpumask, fn, arg); 213 mutex_unlock(&stop_cpus_mutex); 214 return ret; 215 } 216 217 /** 218 * try_stop_cpus - try to stop multiple cpus 219 * @cpumask: cpus to stop 220 * @fn: function to execute 221 * @arg: argument to @fn 222 * 223 * Identical to stop_cpus() except that it fails with -EAGAIN if 224 * someone else is already using the facility. 225 * 226 * CONTEXT: 227 * Might sleep. 228 * 229 * RETURNS: 230 * -EAGAIN if someone else is already stopping cpus, -ENOENT if 231 * @fn(@arg) was not executed at all because all cpus in @cpumask were 232 * offline; otherwise, 0 if all executions of @fn returned 0, any non 233 * zero return value if any returned non zero. 234 */ 235 int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 236 { 237 int ret; 238 239 /* static works are used, process one request at a time */ 240 if (!mutex_trylock(&stop_cpus_mutex)) 241 return -EAGAIN; 242 ret = __stop_cpus(cpumask, fn, arg); 243 mutex_unlock(&stop_cpus_mutex); 244 return ret; 245 } 246 247 static int cpu_stopper_thread(void *data) 248 { 249 struct cpu_stopper *stopper = data; 250 struct cpu_stop_work *work; 251 int ret; 252 253 repeat: 254 set_current_state(TASK_INTERRUPTIBLE); /* mb paired w/ kthread_stop */ 255 256 if (kthread_should_stop()) { 257 __set_current_state(TASK_RUNNING); 258 return 0; 259 } 260 261 work = NULL; 262 spin_lock_irq(&stopper->lock); 263 if (!list_empty(&stopper->works)) { 264 work = list_first_entry(&stopper->works, 265 struct cpu_stop_work, list); 266 list_del_init(&work->list); 267 } 268 spin_unlock_irq(&stopper->lock); 269 270 if (work) { 271 cpu_stop_fn_t fn = work->fn; 272 void *arg = work->arg; 273 struct cpu_stop_done *done = work->done; 274 char ksym_buf[KSYM_NAME_LEN] __maybe_unused; 275 276 __set_current_state(TASK_RUNNING); 277 278 /* cpu stop callbacks are not allowed to sleep */ 279 preempt_disable(); 280 281 ret = fn(arg); 282 if (ret) 283 done->ret = ret; 284 285 /* restore preemption and check it's still balanced */ 286 preempt_enable(); 287 WARN_ONCE(preempt_count(), 288 "cpu_stop: %s(%p) leaked preempt count\n", 289 kallsyms_lookup((unsigned long)fn, NULL, NULL, NULL, 290 ksym_buf), arg); 291 292 cpu_stop_signal_done(done, true); 293 } else 294 schedule(); 295 296 goto repeat; 297 } 298 299 extern void sched_set_stop_task(int cpu, struct task_struct *stop); 300 301 /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */ 302 static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb, 303 unsigned long action, void *hcpu) 304 { 305 unsigned int cpu = (unsigned long)hcpu; 306 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 307 struct task_struct *p; 308 309 switch (action & ~CPU_TASKS_FROZEN) { 310 case CPU_UP_PREPARE: 311 BUG_ON(stopper->thread || stopper->enabled || 312 !list_empty(&stopper->works)); 313 p = kthread_create_on_node(cpu_stopper_thread, 314 stopper, 315 cpu_to_node(cpu), 316 "migration/%d", cpu); 317 if (IS_ERR(p)) 318 return notifier_from_errno(PTR_ERR(p)); 319 get_task_struct(p); 320 kthread_bind(p, cpu); 321 sched_set_stop_task(cpu, p); 322 stopper->thread = p; 323 break; 324 325 case CPU_ONLINE: 326 /* strictly unnecessary, as first user will wake it */ 327 wake_up_process(stopper->thread); 328 /* mark enabled */ 329 spin_lock_irq(&stopper->lock); 330 stopper->enabled = true; 331 spin_unlock_irq(&stopper->lock); 332 break; 333 334 #ifdef CONFIG_HOTPLUG_CPU 335 case CPU_UP_CANCELED: 336 case CPU_POST_DEAD: 337 { 338 struct cpu_stop_work *work; 339 340 sched_set_stop_task(cpu, NULL); 341 /* kill the stopper */ 342 kthread_stop(stopper->thread); 343 /* drain remaining works */ 344 spin_lock_irq(&stopper->lock); 345 list_for_each_entry(work, &stopper->works, list) 346 cpu_stop_signal_done(work->done, false); 347 stopper->enabled = false; 348 spin_unlock_irq(&stopper->lock); 349 /* release the stopper */ 350 put_task_struct(stopper->thread); 351 stopper->thread = NULL; 352 break; 353 } 354 #endif 355 } 356 357 return NOTIFY_OK; 358 } 359 360 /* 361 * Give it a higher priority so that cpu stopper is available to other 362 * cpu notifiers. It currently shares the same priority as sched 363 * migration_notifier. 364 */ 365 static struct notifier_block __cpuinitdata cpu_stop_cpu_notifier = { 366 .notifier_call = cpu_stop_cpu_callback, 367 .priority = 10, 368 }; 369 370 static int __init cpu_stop_init(void) 371 { 372 void *bcpu = (void *)(long)smp_processor_id(); 373 unsigned int cpu; 374 int err; 375 376 for_each_possible_cpu(cpu) { 377 struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); 378 379 spin_lock_init(&stopper->lock); 380 INIT_LIST_HEAD(&stopper->works); 381 } 382 383 /* start one for the boot cpu */ 384 err = cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_UP_PREPARE, 385 bcpu); 386 BUG_ON(err != NOTIFY_OK); 387 cpu_stop_cpu_callback(&cpu_stop_cpu_notifier, CPU_ONLINE, bcpu); 388 register_cpu_notifier(&cpu_stop_cpu_notifier); 389 390 stop_machine_initialized = true; 391 392 return 0; 393 } 394 early_initcall(cpu_stop_init); 395 396 #ifdef CONFIG_STOP_MACHINE 397 398 /* This controls the threads on each CPU. */ 399 enum stopmachine_state { 400 /* Dummy starting state for thread. */ 401 STOPMACHINE_NONE, 402 /* Awaiting everyone to be scheduled. */ 403 STOPMACHINE_PREPARE, 404 /* Disable interrupts. */ 405 STOPMACHINE_DISABLE_IRQ, 406 /* Run the function */ 407 STOPMACHINE_RUN, 408 /* Exit */ 409 STOPMACHINE_EXIT, 410 }; 411 412 struct stop_machine_data { 413 int (*fn)(void *); 414 void *data; 415 /* Like num_online_cpus(), but hotplug cpu uses us, so we need this. */ 416 unsigned int num_threads; 417 const struct cpumask *active_cpus; 418 419 enum stopmachine_state state; 420 atomic_t thread_ack; 421 }; 422 423 static void set_state(struct stop_machine_data *smdata, 424 enum stopmachine_state newstate) 425 { 426 /* Reset ack counter. */ 427 atomic_set(&smdata->thread_ack, smdata->num_threads); 428 smp_wmb(); 429 smdata->state = newstate; 430 } 431 432 /* Last one to ack a state moves to the next state. */ 433 static void ack_state(struct stop_machine_data *smdata) 434 { 435 if (atomic_dec_and_test(&smdata->thread_ack)) 436 set_state(smdata, smdata->state + 1); 437 } 438 439 /* This is the cpu_stop function which stops the CPU. */ 440 static int stop_machine_cpu_stop(void *data) 441 { 442 struct stop_machine_data *smdata = data; 443 enum stopmachine_state curstate = STOPMACHINE_NONE; 444 int cpu = smp_processor_id(), err = 0; 445 unsigned long flags; 446 bool is_active; 447 448 /* 449 * When called from stop_machine_from_inactive_cpu(), irq might 450 * already be disabled. Save the state and restore it on exit. 451 */ 452 local_save_flags(flags); 453 454 if (!smdata->active_cpus) 455 is_active = cpu == cpumask_first(cpu_online_mask); 456 else 457 is_active = cpumask_test_cpu(cpu, smdata->active_cpus); 458 459 /* Simple state machine */ 460 do { 461 /* Chill out and ensure we re-read stopmachine_state. */ 462 cpu_relax(); 463 if (smdata->state != curstate) { 464 curstate = smdata->state; 465 switch (curstate) { 466 case STOPMACHINE_DISABLE_IRQ: 467 local_irq_disable(); 468 hard_irq_disable(); 469 break; 470 case STOPMACHINE_RUN: 471 if (is_active) 472 err = smdata->fn(smdata->data); 473 break; 474 default: 475 break; 476 } 477 ack_state(smdata); 478 } 479 } while (curstate != STOPMACHINE_EXIT); 480 481 local_irq_restore(flags); 482 return err; 483 } 484 485 int __stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 486 { 487 struct stop_machine_data smdata = { .fn = fn, .data = data, 488 .num_threads = num_online_cpus(), 489 .active_cpus = cpus }; 490 491 if (!stop_machine_initialized) { 492 /* 493 * Handle the case where stop_machine() is called 494 * early in boot before stop_machine() has been 495 * initialized. 496 */ 497 unsigned long flags; 498 int ret; 499 500 WARN_ON_ONCE(smdata.num_threads != 1); 501 502 local_irq_save(flags); 503 hard_irq_disable(); 504 ret = (*fn)(data); 505 local_irq_restore(flags); 506 507 return ret; 508 } 509 510 /* Set the initial state and stop all online cpus. */ 511 set_state(&smdata, STOPMACHINE_PREPARE); 512 return stop_cpus(cpu_online_mask, stop_machine_cpu_stop, &smdata); 513 } 514 515 int stop_machine(int (*fn)(void *), void *data, const struct cpumask *cpus) 516 { 517 int ret; 518 519 /* No CPUs can come up or down during this. */ 520 get_online_cpus(); 521 ret = __stop_machine(fn, data, cpus); 522 put_online_cpus(); 523 return ret; 524 } 525 EXPORT_SYMBOL_GPL(stop_machine); 526 527 /** 528 * stop_machine_from_inactive_cpu - stop_machine() from inactive CPU 529 * @fn: the function to run 530 * @data: the data ptr for the @fn() 531 * @cpus: the cpus to run the @fn() on (NULL = any online cpu) 532 * 533 * This is identical to stop_machine() but can be called from a CPU which 534 * is not active. The local CPU is in the process of hotplug (so no other 535 * CPU hotplug can start) and not marked active and doesn't have enough 536 * context to sleep. 537 * 538 * This function provides stop_machine() functionality for such state by 539 * using busy-wait for synchronization and executing @fn directly for local 540 * CPU. 541 * 542 * CONTEXT: 543 * Local CPU is inactive. Temporarily stops all active CPUs. 544 * 545 * RETURNS: 546 * 0 if all executions of @fn returned 0, any non zero return value if any 547 * returned non zero. 548 */ 549 int stop_machine_from_inactive_cpu(int (*fn)(void *), void *data, 550 const struct cpumask *cpus) 551 { 552 struct stop_machine_data smdata = { .fn = fn, .data = data, 553 .active_cpus = cpus }; 554 struct cpu_stop_done done; 555 int ret; 556 557 /* Local CPU must be inactive and CPU hotplug in progress. */ 558 BUG_ON(cpu_active(raw_smp_processor_id())); 559 smdata.num_threads = num_active_cpus() + 1; /* +1 for local */ 560 561 /* No proper task established and can't sleep - busy wait for lock. */ 562 while (!mutex_trylock(&stop_cpus_mutex)) 563 cpu_relax(); 564 565 /* Schedule work on other CPUs and execute directly for local CPU */ 566 set_state(&smdata, STOPMACHINE_PREPARE); 567 cpu_stop_init_done(&done, num_active_cpus()); 568 queue_stop_cpus_work(cpu_active_mask, stop_machine_cpu_stop, &smdata, 569 &done); 570 ret = stop_machine_cpu_stop(&smdata); 571 572 /* Busy wait for completion. */ 573 while (!completion_done(&done.completion)) 574 cpu_relax(); 575 576 mutex_unlock(&stop_cpus_mutex); 577 return ret ?: done.ret; 578 } 579 580 #endif /* CONFIG_STOP_MACHINE */ 581