1 /* 2 * linux/kernel/time/tick-broadcast.c 3 * 4 * This file contains functions which emulate a local clock-event 5 * device via a broadcast event source. 6 * 7 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 8 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 9 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 10 * 11 * This code is licenced under the GPL version 2. For details see 12 * kernel-base/COPYING. 13 */ 14 #include <linux/cpu.h> 15 #include <linux/err.h> 16 #include <linux/hrtimer.h> 17 #include <linux/interrupt.h> 18 #include <linux/percpu.h> 19 #include <linux/profile.h> 20 #include <linux/sched.h> 21 #include <linux/smp.h> 22 #include <linux/module.h> 23 24 #include "tick-internal.h" 25 26 /* 27 * Broadcast support for broken x86 hardware, where the local apic 28 * timer stops in C3 state. 29 */ 30 31 static struct tick_device tick_broadcast_device; 32 static cpumask_var_t tick_broadcast_mask; 33 static cpumask_var_t tick_broadcast_on; 34 static cpumask_var_t tmpmask; 35 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 36 static int tick_broadcast_forced; 37 38 #ifdef CONFIG_TICK_ONESHOT 39 static void tick_broadcast_clear_oneshot(int cpu); 40 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 41 #else 42 static inline void tick_broadcast_clear_oneshot(int cpu) { } 43 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 44 #endif 45 46 /* 47 * Debugging: see timer_list.c 48 */ 49 struct tick_device *tick_get_broadcast_device(void) 50 { 51 return &tick_broadcast_device; 52 } 53 54 struct cpumask *tick_get_broadcast_mask(void) 55 { 56 return tick_broadcast_mask; 57 } 58 59 /* 60 * Start the device in periodic mode 61 */ 62 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 63 { 64 if (bc) 65 tick_setup_periodic(bc, 1); 66 } 67 68 /* 69 * Check, if the device can be utilized as broadcast device: 70 */ 71 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 72 struct clock_event_device *newdev) 73 { 74 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 75 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 76 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 77 return false; 78 79 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 80 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 81 return false; 82 83 return !curdev || newdev->rating > curdev->rating; 84 } 85 86 /* 87 * Conditionally install/replace broadcast device 88 */ 89 void tick_install_broadcast_device(struct clock_event_device *dev) 90 { 91 struct clock_event_device *cur = tick_broadcast_device.evtdev; 92 93 if (!tick_check_broadcast_device(cur, dev)) 94 return; 95 96 if (!try_module_get(dev->owner)) 97 return; 98 99 clockevents_exchange_device(cur, dev); 100 if (cur) 101 cur->event_handler = clockevents_handle_noop; 102 tick_broadcast_device.evtdev = dev; 103 if (!cpumask_empty(tick_broadcast_mask)) 104 tick_broadcast_start_periodic(dev); 105 /* 106 * Inform all cpus about this. We might be in a situation 107 * where we did not switch to oneshot mode because the per cpu 108 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 109 * of a oneshot capable broadcast device. Without that 110 * notification the systems stays stuck in periodic mode 111 * forever. 112 */ 113 if (dev->features & CLOCK_EVT_FEAT_ONESHOT) 114 tick_clock_notify(); 115 } 116 117 /* 118 * Check, if the device is the broadcast device 119 */ 120 int tick_is_broadcast_device(struct clock_event_device *dev) 121 { 122 return (dev && tick_broadcast_device.evtdev == dev); 123 } 124 125 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 126 { 127 int ret = -ENODEV; 128 129 if (tick_is_broadcast_device(dev)) { 130 raw_spin_lock(&tick_broadcast_lock); 131 ret = __clockevents_update_freq(dev, freq); 132 raw_spin_unlock(&tick_broadcast_lock); 133 } 134 return ret; 135 } 136 137 138 static void err_broadcast(const struct cpumask *mask) 139 { 140 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 141 } 142 143 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 144 { 145 if (!dev->broadcast) 146 dev->broadcast = tick_broadcast; 147 if (!dev->broadcast) { 148 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 149 dev->name); 150 dev->broadcast = err_broadcast; 151 } 152 } 153 154 /* 155 * Check, if the device is disfunctional and a place holder, which 156 * needs to be handled by the broadcast device. 157 */ 158 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 159 { 160 struct clock_event_device *bc = tick_broadcast_device.evtdev; 161 unsigned long flags; 162 int ret; 163 164 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 165 166 /* 167 * Devices might be registered with both periodic and oneshot 168 * mode disabled. This signals, that the device needs to be 169 * operated from the broadcast device and is a placeholder for 170 * the cpu local device. 171 */ 172 if (!tick_device_is_functional(dev)) { 173 dev->event_handler = tick_handle_periodic; 174 tick_device_setup_broadcast_func(dev); 175 cpumask_set_cpu(cpu, tick_broadcast_mask); 176 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 177 tick_broadcast_start_periodic(bc); 178 else 179 tick_broadcast_setup_oneshot(bc); 180 ret = 1; 181 } else { 182 /* 183 * Clear the broadcast bit for this cpu if the 184 * device is not power state affected. 185 */ 186 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 187 cpumask_clear_cpu(cpu, tick_broadcast_mask); 188 else 189 tick_device_setup_broadcast_func(dev); 190 191 /* 192 * Clear the broadcast bit if the CPU is not in 193 * periodic broadcast on state. 194 */ 195 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 196 cpumask_clear_cpu(cpu, tick_broadcast_mask); 197 198 switch (tick_broadcast_device.mode) { 199 case TICKDEV_MODE_ONESHOT: 200 /* 201 * If the system is in oneshot mode we can 202 * unconditionally clear the oneshot mask bit, 203 * because the CPU is running and therefore 204 * not in an idle state which causes the power 205 * state affected device to stop. Let the 206 * caller initialize the device. 207 */ 208 tick_broadcast_clear_oneshot(cpu); 209 ret = 0; 210 break; 211 212 case TICKDEV_MODE_PERIODIC: 213 /* 214 * If the system is in periodic mode, check 215 * whether the broadcast device can be 216 * switched off now. 217 */ 218 if (cpumask_empty(tick_broadcast_mask) && bc) 219 clockevents_shutdown(bc); 220 /* 221 * If we kept the cpu in the broadcast mask, 222 * tell the caller to leave the per cpu device 223 * in shutdown state. The periodic interrupt 224 * is delivered by the broadcast device. 225 */ 226 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 227 break; 228 default: 229 /* Nothing to do */ 230 ret = 0; 231 break; 232 } 233 } 234 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 235 return ret; 236 } 237 238 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 239 int tick_receive_broadcast(void) 240 { 241 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 242 struct clock_event_device *evt = td->evtdev; 243 244 if (!evt) 245 return -ENODEV; 246 247 if (!evt->event_handler) 248 return -EINVAL; 249 250 evt->event_handler(evt); 251 return 0; 252 } 253 #endif 254 255 /* 256 * Broadcast the event to the cpus, which are set in the mask (mangled). 257 */ 258 static void tick_do_broadcast(struct cpumask *mask) 259 { 260 int cpu = smp_processor_id(); 261 struct tick_device *td; 262 263 /* 264 * Check, if the current cpu is in the mask 265 */ 266 if (cpumask_test_cpu(cpu, mask)) { 267 cpumask_clear_cpu(cpu, mask); 268 td = &per_cpu(tick_cpu_device, cpu); 269 td->evtdev->event_handler(td->evtdev); 270 } 271 272 if (!cpumask_empty(mask)) { 273 /* 274 * It might be necessary to actually check whether the devices 275 * have different broadcast functions. For now, just use the 276 * one of the first device. This works as long as we have this 277 * misfeature only on x86 (lapic) 278 */ 279 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 280 td->evtdev->broadcast(mask); 281 } 282 } 283 284 /* 285 * Periodic broadcast: 286 * - invoke the broadcast handlers 287 */ 288 static void tick_do_periodic_broadcast(void) 289 { 290 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 291 tick_do_broadcast(tmpmask); 292 } 293 294 /* 295 * Event handler for periodic broadcast ticks 296 */ 297 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 298 { 299 ktime_t next; 300 301 raw_spin_lock(&tick_broadcast_lock); 302 303 tick_do_periodic_broadcast(); 304 305 /* 306 * The device is in periodic mode. No reprogramming necessary: 307 */ 308 if (dev->state == CLOCK_EVT_STATE_PERIODIC) 309 goto unlock; 310 311 /* 312 * Setup the next period for devices, which do not have 313 * periodic mode. We read dev->next_event first and add to it 314 * when the event already expired. clockevents_program_event() 315 * sets dev->next_event only when the event is really 316 * programmed to the device. 317 */ 318 for (next = dev->next_event; ;) { 319 next = ktime_add(next, tick_period); 320 321 if (!clockevents_program_event(dev, next, false)) 322 goto unlock; 323 tick_do_periodic_broadcast(); 324 } 325 unlock: 326 raw_spin_unlock(&tick_broadcast_lock); 327 } 328 329 /** 330 * tick_broadcast_control - Enable/disable or force broadcast mode 331 * @mode: The selected broadcast mode 332 * 333 * Called when the system enters a state where affected tick devices 334 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 335 * 336 * Called with interrupts disabled, so clockevents_lock is not 337 * required here because the local clock event device cannot go away 338 * under us. 339 */ 340 void tick_broadcast_control(enum tick_broadcast_mode mode) 341 { 342 struct clock_event_device *bc, *dev; 343 struct tick_device *td; 344 int cpu, bc_stopped; 345 346 td = this_cpu_ptr(&tick_cpu_device); 347 dev = td->evtdev; 348 349 /* 350 * Is the device not affected by the powerstate ? 351 */ 352 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 353 return; 354 355 if (!tick_device_is_functional(dev)) 356 return; 357 358 raw_spin_lock(&tick_broadcast_lock); 359 cpu = smp_processor_id(); 360 bc = tick_broadcast_device.evtdev; 361 bc_stopped = cpumask_empty(tick_broadcast_mask); 362 363 switch (mode) { 364 case TICK_BROADCAST_FORCE: 365 tick_broadcast_forced = 1; 366 case TICK_BROADCAST_ON: 367 cpumask_set_cpu(cpu, tick_broadcast_on); 368 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 369 if (tick_broadcast_device.mode == 370 TICKDEV_MODE_PERIODIC) 371 clockevents_shutdown(dev); 372 } 373 break; 374 375 case TICK_BROADCAST_OFF: 376 if (tick_broadcast_forced) 377 break; 378 cpumask_clear_cpu(cpu, tick_broadcast_on); 379 if (!tick_device_is_functional(dev)) 380 break; 381 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 382 if (tick_broadcast_device.mode == 383 TICKDEV_MODE_PERIODIC) 384 tick_setup_periodic(dev, 0); 385 } 386 break; 387 } 388 389 if (cpumask_empty(tick_broadcast_mask)) { 390 if (!bc_stopped) 391 clockevents_shutdown(bc); 392 } else if (bc_stopped) { 393 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 394 tick_broadcast_start_periodic(bc); 395 else 396 tick_broadcast_setup_oneshot(bc); 397 } 398 raw_spin_unlock(&tick_broadcast_lock); 399 } 400 EXPORT_SYMBOL_GPL(tick_broadcast_control); 401 402 /* 403 * Set the periodic handler depending on broadcast on/off 404 */ 405 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 406 { 407 if (!broadcast) 408 dev->event_handler = tick_handle_periodic; 409 else 410 dev->event_handler = tick_handle_periodic_broadcast; 411 } 412 413 #ifdef CONFIG_HOTPLUG_CPU 414 /* 415 * Remove a CPU from broadcasting 416 */ 417 void tick_shutdown_broadcast(unsigned int cpu) 418 { 419 struct clock_event_device *bc; 420 unsigned long flags; 421 422 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 423 424 bc = tick_broadcast_device.evtdev; 425 cpumask_clear_cpu(cpu, tick_broadcast_mask); 426 cpumask_clear_cpu(cpu, tick_broadcast_on); 427 428 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 429 if (bc && cpumask_empty(tick_broadcast_mask)) 430 clockevents_shutdown(bc); 431 } 432 433 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 434 } 435 #endif 436 437 void tick_suspend_broadcast(void) 438 { 439 struct clock_event_device *bc; 440 unsigned long flags; 441 442 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 443 444 bc = tick_broadcast_device.evtdev; 445 if (bc) 446 clockevents_shutdown(bc); 447 448 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 449 } 450 451 /* 452 * This is called from tick_resume_local() on a resuming CPU. That's 453 * called from the core resume function, tick_unfreeze() and the magic XEN 454 * resume hackery. 455 * 456 * In none of these cases the broadcast device mode can change and the 457 * bit of the resuming CPU in the broadcast mask is safe as well. 458 */ 459 bool tick_resume_check_broadcast(void) 460 { 461 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 462 return false; 463 else 464 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 465 } 466 467 void tick_resume_broadcast(void) 468 { 469 struct clock_event_device *bc; 470 unsigned long flags; 471 472 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 473 474 bc = tick_broadcast_device.evtdev; 475 476 if (bc) { 477 clockevents_tick_resume(bc); 478 479 switch (tick_broadcast_device.mode) { 480 case TICKDEV_MODE_PERIODIC: 481 if (!cpumask_empty(tick_broadcast_mask)) 482 tick_broadcast_start_periodic(bc); 483 break; 484 case TICKDEV_MODE_ONESHOT: 485 if (!cpumask_empty(tick_broadcast_mask)) 486 tick_resume_broadcast_oneshot(bc); 487 break; 488 } 489 } 490 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 491 } 492 493 #ifdef CONFIG_TICK_ONESHOT 494 495 static cpumask_var_t tick_broadcast_oneshot_mask; 496 static cpumask_var_t tick_broadcast_pending_mask; 497 static cpumask_var_t tick_broadcast_force_mask; 498 499 /* 500 * Exposed for debugging: see timer_list.c 501 */ 502 struct cpumask *tick_get_broadcast_oneshot_mask(void) 503 { 504 return tick_broadcast_oneshot_mask; 505 } 506 507 /* 508 * Called before going idle with interrupts disabled. Checks whether a 509 * broadcast event from the other core is about to happen. We detected 510 * that in tick_broadcast_oneshot_control(). The callsite can use this 511 * to avoid a deep idle transition as we are about to get the 512 * broadcast IPI right away. 513 */ 514 int tick_check_broadcast_expired(void) 515 { 516 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 517 } 518 519 /* 520 * Set broadcast interrupt affinity 521 */ 522 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 523 const struct cpumask *cpumask) 524 { 525 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 526 return; 527 528 if (cpumask_equal(bc->cpumask, cpumask)) 529 return; 530 531 bc->cpumask = cpumask; 532 irq_set_affinity(bc->irq, bc->cpumask); 533 } 534 535 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 536 ktime_t expires, int force) 537 { 538 int ret; 539 540 if (bc->state != CLOCK_EVT_STATE_ONESHOT) 541 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 542 543 ret = clockevents_program_event(bc, expires, force); 544 if (!ret) 545 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 546 return ret; 547 } 548 549 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 550 { 551 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 552 } 553 554 /* 555 * Called from irq_enter() when idle was interrupted to reenable the 556 * per cpu device. 557 */ 558 void tick_check_oneshot_broadcast_this_cpu(void) 559 { 560 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 561 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 562 563 /* 564 * We might be in the middle of switching over from 565 * periodic to oneshot. If the CPU has not yet 566 * switched over, leave the device alone. 567 */ 568 if (td->mode == TICKDEV_MODE_ONESHOT) { 569 clockevents_set_state(td->evtdev, 570 CLOCK_EVT_STATE_ONESHOT); 571 } 572 } 573 } 574 575 /* 576 * Handle oneshot mode broadcasting 577 */ 578 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 579 { 580 struct tick_device *td; 581 ktime_t now, next_event; 582 int cpu, next_cpu = 0; 583 584 raw_spin_lock(&tick_broadcast_lock); 585 again: 586 dev->next_event.tv64 = KTIME_MAX; 587 next_event.tv64 = KTIME_MAX; 588 cpumask_clear(tmpmask); 589 now = ktime_get(); 590 /* Find all expired events */ 591 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 592 td = &per_cpu(tick_cpu_device, cpu); 593 if (td->evtdev->next_event.tv64 <= now.tv64) { 594 cpumask_set_cpu(cpu, tmpmask); 595 /* 596 * Mark the remote cpu in the pending mask, so 597 * it can avoid reprogramming the cpu local 598 * timer in tick_broadcast_oneshot_control(). 599 */ 600 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 601 } else if (td->evtdev->next_event.tv64 < next_event.tv64) { 602 next_event.tv64 = td->evtdev->next_event.tv64; 603 next_cpu = cpu; 604 } 605 } 606 607 /* 608 * Remove the current cpu from the pending mask. The event is 609 * delivered immediately in tick_do_broadcast() ! 610 */ 611 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 612 613 /* Take care of enforced broadcast requests */ 614 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 615 cpumask_clear(tick_broadcast_force_mask); 616 617 /* 618 * Sanity check. Catch the case where we try to broadcast to 619 * offline cpus. 620 */ 621 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 622 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 623 624 /* 625 * Wakeup the cpus which have an expired event. 626 */ 627 tick_do_broadcast(tmpmask); 628 629 /* 630 * Two reasons for reprogram: 631 * 632 * - The global event did not expire any CPU local 633 * events. This happens in dyntick mode, as the maximum PIT 634 * delta is quite small. 635 * 636 * - There are pending events on sleeping CPUs which were not 637 * in the event mask 638 */ 639 if (next_event.tv64 != KTIME_MAX) { 640 /* 641 * Rearm the broadcast device. If event expired, 642 * repeat the above 643 */ 644 if (tick_broadcast_set_event(dev, next_cpu, next_event, 0)) 645 goto again; 646 } 647 raw_spin_unlock(&tick_broadcast_lock); 648 } 649 650 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 651 { 652 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 653 return 0; 654 if (bc->next_event.tv64 == KTIME_MAX) 655 return 0; 656 return bc->bound_on == cpu ? -EBUSY : 0; 657 } 658 659 static void broadcast_shutdown_local(struct clock_event_device *bc, 660 struct clock_event_device *dev) 661 { 662 /* 663 * For hrtimer based broadcasting we cannot shutdown the cpu 664 * local device if our own event is the first one to expire or 665 * if we own the broadcast timer. 666 */ 667 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 668 if (broadcast_needs_cpu(bc, smp_processor_id())) 669 return; 670 if (dev->next_event.tv64 < bc->next_event.tv64) 671 return; 672 } 673 clockevents_set_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 674 } 675 676 /** 677 * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode 678 * @state: The target state (enter/exit) 679 * 680 * The system enters/leaves a state, where affected devices might stop 681 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. 682 * 683 * Called with interrupts disabled, so clockevents_lock is not 684 * required here because the local clock event device cannot go away 685 * under us. 686 */ 687 int tick_broadcast_oneshot_control(enum tick_broadcast_state state) 688 { 689 struct clock_event_device *bc, *dev; 690 struct tick_device *td; 691 int cpu, ret = 0; 692 ktime_t now; 693 694 /* 695 * Periodic mode does not care about the enter/exit of power 696 * states 697 */ 698 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 699 return 0; 700 701 /* 702 * We are called with preemtion disabled from the depth of the 703 * idle code, so we can't be moved away. 704 */ 705 td = this_cpu_ptr(&tick_cpu_device); 706 dev = td->evtdev; 707 708 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 709 return 0; 710 711 raw_spin_lock(&tick_broadcast_lock); 712 bc = tick_broadcast_device.evtdev; 713 cpu = smp_processor_id(); 714 715 if (state == TICK_BROADCAST_ENTER) { 716 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 717 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 718 broadcast_shutdown_local(bc, dev); 719 /* 720 * We only reprogram the broadcast timer if we 721 * did not mark ourself in the force mask and 722 * if the cpu local event is earlier than the 723 * broadcast event. If the current CPU is in 724 * the force mask, then we are going to be 725 * woken by the IPI right away. 726 */ 727 if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) && 728 dev->next_event.tv64 < bc->next_event.tv64) 729 tick_broadcast_set_event(bc, cpu, dev->next_event, 1); 730 } 731 /* 732 * If the current CPU owns the hrtimer broadcast 733 * mechanism, it cannot go deep idle and we remove the 734 * CPU from the broadcast mask. We don't have to go 735 * through the EXIT path as the local timer is not 736 * shutdown. 737 */ 738 ret = broadcast_needs_cpu(bc, cpu); 739 if (ret) 740 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 741 } else { 742 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 743 clockevents_set_state(dev, CLOCK_EVT_STATE_ONESHOT); 744 /* 745 * The cpu which was handling the broadcast 746 * timer marked this cpu in the broadcast 747 * pending mask and fired the broadcast 748 * IPI. So we are going to handle the expired 749 * event anyway via the broadcast IPI 750 * handler. No need to reprogram the timer 751 * with an already expired event. 752 */ 753 if (cpumask_test_and_clear_cpu(cpu, 754 tick_broadcast_pending_mask)) 755 goto out; 756 757 /* 758 * Bail out if there is no next event. 759 */ 760 if (dev->next_event.tv64 == KTIME_MAX) 761 goto out; 762 /* 763 * If the pending bit is not set, then we are 764 * either the CPU handling the broadcast 765 * interrupt or we got woken by something else. 766 * 767 * We are not longer in the broadcast mask, so 768 * if the cpu local expiry time is already 769 * reached, we would reprogram the cpu local 770 * timer with an already expired event. 771 * 772 * This can lead to a ping-pong when we return 773 * to idle and therefor rearm the broadcast 774 * timer before the cpu local timer was able 775 * to fire. This happens because the forced 776 * reprogramming makes sure that the event 777 * will happen in the future and depending on 778 * the min_delta setting this might be far 779 * enough out that the ping-pong starts. 780 * 781 * If the cpu local next_event has expired 782 * then we know that the broadcast timer 783 * next_event has expired as well and 784 * broadcast is about to be handled. So we 785 * avoid reprogramming and enforce that the 786 * broadcast handler, which did not run yet, 787 * will invoke the cpu local handler. 788 * 789 * We cannot call the handler directly from 790 * here, because we might be in a NOHZ phase 791 * and we did not go through the irq_enter() 792 * nohz fixups. 793 */ 794 now = ktime_get(); 795 if (dev->next_event.tv64 <= now.tv64) { 796 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 797 goto out; 798 } 799 /* 800 * We got woken by something else. Reprogram 801 * the cpu local timer device. 802 */ 803 tick_program_event(dev->next_event, 1); 804 } 805 } 806 out: 807 raw_spin_unlock(&tick_broadcast_lock); 808 return ret; 809 } 810 EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); 811 812 /* 813 * Reset the one shot broadcast for a cpu 814 * 815 * Called with tick_broadcast_lock held 816 */ 817 static void tick_broadcast_clear_oneshot(int cpu) 818 { 819 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 820 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 821 } 822 823 static void tick_broadcast_init_next_event(struct cpumask *mask, 824 ktime_t expires) 825 { 826 struct tick_device *td; 827 int cpu; 828 829 for_each_cpu(cpu, mask) { 830 td = &per_cpu(tick_cpu_device, cpu); 831 if (td->evtdev) 832 td->evtdev->next_event = expires; 833 } 834 } 835 836 /** 837 * tick_broadcast_setup_oneshot - setup the broadcast device 838 */ 839 void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 840 { 841 int cpu = smp_processor_id(); 842 843 /* Set it up only once ! */ 844 if (bc->event_handler != tick_handle_oneshot_broadcast) { 845 int was_periodic = bc->state == CLOCK_EVT_STATE_PERIODIC; 846 847 bc->event_handler = tick_handle_oneshot_broadcast; 848 849 /* 850 * We must be careful here. There might be other CPUs 851 * waiting for periodic broadcast. We need to set the 852 * oneshot_mask bits for those and program the 853 * broadcast device to fire. 854 */ 855 cpumask_copy(tmpmask, tick_broadcast_mask); 856 cpumask_clear_cpu(cpu, tmpmask); 857 cpumask_or(tick_broadcast_oneshot_mask, 858 tick_broadcast_oneshot_mask, tmpmask); 859 860 if (was_periodic && !cpumask_empty(tmpmask)) { 861 clockevents_set_state(bc, CLOCK_EVT_STATE_ONESHOT); 862 tick_broadcast_init_next_event(tmpmask, 863 tick_next_period); 864 tick_broadcast_set_event(bc, cpu, tick_next_period, 1); 865 } else 866 bc->next_event.tv64 = KTIME_MAX; 867 } else { 868 /* 869 * The first cpu which switches to oneshot mode sets 870 * the bit for all other cpus which are in the general 871 * (periodic) broadcast mask. So the bit is set and 872 * would prevent the first broadcast enter after this 873 * to program the bc device. 874 */ 875 tick_broadcast_clear_oneshot(cpu); 876 } 877 } 878 879 /* 880 * Select oneshot operating mode for the broadcast device 881 */ 882 void tick_broadcast_switch_to_oneshot(void) 883 { 884 struct clock_event_device *bc; 885 unsigned long flags; 886 887 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 888 889 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 890 bc = tick_broadcast_device.evtdev; 891 if (bc) 892 tick_broadcast_setup_oneshot(bc); 893 894 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 895 } 896 897 #ifdef CONFIG_HOTPLUG_CPU 898 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 899 { 900 struct clock_event_device *bc; 901 unsigned long flags; 902 903 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 904 bc = tick_broadcast_device.evtdev; 905 906 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 907 /* This moves the broadcast assignment to this CPU: */ 908 clockevents_program_event(bc, bc->next_event, 1); 909 } 910 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 911 } 912 913 /* 914 * Remove a dead CPU from broadcasting 915 */ 916 void tick_shutdown_broadcast_oneshot(unsigned int cpu) 917 { 918 unsigned long flags; 919 920 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 921 922 /* 923 * Clear the broadcast masks for the dead cpu, but do not stop 924 * the broadcast device! 925 */ 926 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 927 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 928 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 929 930 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 931 } 932 #endif 933 934 /* 935 * Check, whether the broadcast device is in one shot mode 936 */ 937 int tick_broadcast_oneshot_active(void) 938 { 939 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 940 } 941 942 /* 943 * Check whether the broadcast device supports oneshot. 944 */ 945 bool tick_broadcast_oneshot_available(void) 946 { 947 struct clock_event_device *bc = tick_broadcast_device.evtdev; 948 949 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 950 } 951 952 #endif 953 954 void __init tick_broadcast_init(void) 955 { 956 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 957 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 958 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 959 #ifdef CONFIG_TICK_ONESHOT 960 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 961 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 962 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 963 #endif 964 } 965