1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains functions which emulate a local clock-event 4 * device via a broadcast event source. 5 * 6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/smp.h> 18 #include <linux/module.h> 19 20 #include "tick-internal.h" 21 22 /* 23 * Broadcast support for broken x86 hardware, where the local apic 24 * timer stops in C3 state. 25 */ 26 27 static struct tick_device tick_broadcast_device; 28 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; 29 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; 30 static cpumask_var_t tmpmask __cpumask_var_read_mostly; 31 static int tick_broadcast_forced; 32 33 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 34 35 #ifdef CONFIG_TICK_ONESHOT 36 static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); 37 38 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); 39 static void tick_broadcast_clear_oneshot(int cpu); 40 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 41 # ifdef CONFIG_HOTPLUG_CPU 42 static void tick_broadcast_oneshot_offline(unsigned int cpu); 43 # endif 44 #else 45 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } 46 static inline void tick_broadcast_clear_oneshot(int cpu) { } 47 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 48 # ifdef CONFIG_HOTPLUG_CPU 49 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } 50 # endif 51 #endif 52 53 /* 54 * Debugging: see timer_list.c 55 */ 56 struct tick_device *tick_get_broadcast_device(void) 57 { 58 return &tick_broadcast_device; 59 } 60 61 struct cpumask *tick_get_broadcast_mask(void) 62 { 63 return tick_broadcast_mask; 64 } 65 66 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); 67 68 const struct clock_event_device *tick_get_wakeup_device(int cpu) 69 { 70 return tick_get_oneshot_wakeup_device(cpu); 71 } 72 73 /* 74 * Start the device in periodic mode 75 */ 76 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 77 { 78 if (bc) 79 tick_setup_periodic(bc, 1); 80 } 81 82 /* 83 * Check, if the device can be utilized as broadcast device: 84 */ 85 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 86 struct clock_event_device *newdev) 87 { 88 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 89 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 90 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 91 return false; 92 93 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 94 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 95 return false; 96 97 return !curdev || newdev->rating > curdev->rating; 98 } 99 100 #ifdef CONFIG_TICK_ONESHOT 101 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 102 { 103 return per_cpu(tick_oneshot_wakeup_device, cpu); 104 } 105 106 static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) 107 { 108 /* 109 * If we woke up early and the tick was reprogrammed in the 110 * meantime then this may be spurious but harmless. 111 */ 112 tick_receive_broadcast(); 113 } 114 115 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 116 int cpu) 117 { 118 struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); 119 120 if (!newdev) 121 goto set_device; 122 123 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 124 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 125 return false; 126 127 if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || 128 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 129 return false; 130 131 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) 132 return false; 133 134 if (curdev && newdev->rating <= curdev->rating) 135 return false; 136 137 if (!try_module_get(newdev->owner)) 138 return false; 139 140 newdev->event_handler = tick_oneshot_wakeup_handler; 141 set_device: 142 clockevents_exchange_device(curdev, newdev); 143 per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; 144 return true; 145 } 146 #else 147 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 148 { 149 return NULL; 150 } 151 152 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 153 int cpu) 154 { 155 return false; 156 } 157 #endif 158 159 /* 160 * Conditionally install/replace broadcast device 161 */ 162 void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) 163 { 164 struct clock_event_device *cur = tick_broadcast_device.evtdev; 165 166 if (tick_set_oneshot_wakeup_device(dev, cpu)) 167 return; 168 169 if (!tick_check_broadcast_device(cur, dev)) 170 return; 171 172 if (!try_module_get(dev->owner)) 173 return; 174 175 clockevents_exchange_device(cur, dev); 176 if (cur) 177 cur->event_handler = clockevents_handle_noop; 178 tick_broadcast_device.evtdev = dev; 179 if (!cpumask_empty(tick_broadcast_mask)) 180 tick_broadcast_start_periodic(dev); 181 182 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 183 return; 184 185 /* 186 * If the system already runs in oneshot mode, switch the newly 187 * registered broadcast device to oneshot mode explicitly. 188 */ 189 if (tick_broadcast_oneshot_active()) { 190 tick_broadcast_switch_to_oneshot(); 191 return; 192 } 193 194 /* 195 * Inform all cpus about this. We might be in a situation 196 * where we did not switch to oneshot mode because the per cpu 197 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 198 * of a oneshot capable broadcast device. Without that 199 * notification the systems stays stuck in periodic mode 200 * forever. 201 */ 202 tick_clock_notify(); 203 } 204 205 /* 206 * Check, if the device is the broadcast device 207 */ 208 int tick_is_broadcast_device(struct clock_event_device *dev) 209 { 210 return (dev && tick_broadcast_device.evtdev == dev); 211 } 212 213 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 214 { 215 int ret = -ENODEV; 216 217 if (tick_is_broadcast_device(dev)) { 218 raw_spin_lock(&tick_broadcast_lock); 219 ret = __clockevents_update_freq(dev, freq); 220 raw_spin_unlock(&tick_broadcast_lock); 221 } 222 return ret; 223 } 224 225 226 static void err_broadcast(const struct cpumask *mask) 227 { 228 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 229 } 230 231 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 232 { 233 if (!dev->broadcast) 234 dev->broadcast = tick_broadcast; 235 if (!dev->broadcast) { 236 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 237 dev->name); 238 dev->broadcast = err_broadcast; 239 } 240 } 241 242 /* 243 * Check, if the device is dysfunctional and a placeholder, which 244 * needs to be handled by the broadcast device. 245 */ 246 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 247 { 248 struct clock_event_device *bc = tick_broadcast_device.evtdev; 249 unsigned long flags; 250 int ret = 0; 251 252 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 253 254 /* 255 * Devices might be registered with both periodic and oneshot 256 * mode disabled. This signals, that the device needs to be 257 * operated from the broadcast device and is a placeholder for 258 * the cpu local device. 259 */ 260 if (!tick_device_is_functional(dev)) { 261 dev->event_handler = tick_handle_periodic; 262 tick_device_setup_broadcast_func(dev); 263 cpumask_set_cpu(cpu, tick_broadcast_mask); 264 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 265 tick_broadcast_start_periodic(bc); 266 else 267 tick_broadcast_setup_oneshot(bc); 268 ret = 1; 269 } else { 270 /* 271 * Clear the broadcast bit for this cpu if the 272 * device is not power state affected. 273 */ 274 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 275 cpumask_clear_cpu(cpu, tick_broadcast_mask); 276 else 277 tick_device_setup_broadcast_func(dev); 278 279 /* 280 * Clear the broadcast bit if the CPU is not in 281 * periodic broadcast on state. 282 */ 283 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 284 cpumask_clear_cpu(cpu, tick_broadcast_mask); 285 286 switch (tick_broadcast_device.mode) { 287 case TICKDEV_MODE_ONESHOT: 288 /* 289 * If the system is in oneshot mode we can 290 * unconditionally clear the oneshot mask bit, 291 * because the CPU is running and therefore 292 * not in an idle state which causes the power 293 * state affected device to stop. Let the 294 * caller initialize the device. 295 */ 296 tick_broadcast_clear_oneshot(cpu); 297 ret = 0; 298 break; 299 300 case TICKDEV_MODE_PERIODIC: 301 /* 302 * If the system is in periodic mode, check 303 * whether the broadcast device can be 304 * switched off now. 305 */ 306 if (cpumask_empty(tick_broadcast_mask) && bc) 307 clockevents_shutdown(bc); 308 /* 309 * If we kept the cpu in the broadcast mask, 310 * tell the caller to leave the per cpu device 311 * in shutdown state. The periodic interrupt 312 * is delivered by the broadcast device, if 313 * the broadcast device exists and is not 314 * hrtimer based. 315 */ 316 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 317 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 318 break; 319 default: 320 break; 321 } 322 } 323 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 324 return ret; 325 } 326 327 int tick_receive_broadcast(void) 328 { 329 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 330 struct clock_event_device *evt = td->evtdev; 331 332 if (!evt) 333 return -ENODEV; 334 335 if (!evt->event_handler) 336 return -EINVAL; 337 338 evt->event_handler(evt); 339 return 0; 340 } 341 342 /* 343 * Broadcast the event to the cpus, which are set in the mask (mangled). 344 */ 345 static bool tick_do_broadcast(struct cpumask *mask) 346 { 347 int cpu = smp_processor_id(); 348 struct tick_device *td; 349 bool local = false; 350 351 /* 352 * Check, if the current cpu is in the mask 353 */ 354 if (cpumask_test_cpu(cpu, mask)) { 355 struct clock_event_device *bc = tick_broadcast_device.evtdev; 356 357 cpumask_clear_cpu(cpu, mask); 358 /* 359 * We only run the local handler, if the broadcast 360 * device is not hrtimer based. Otherwise we run into 361 * a hrtimer recursion. 362 * 363 * local timer_interrupt() 364 * local_handler() 365 * expire_hrtimers() 366 * bc_handler() 367 * local_handler() 368 * expire_hrtimers() 369 */ 370 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); 371 } 372 373 if (!cpumask_empty(mask)) { 374 /* 375 * It might be necessary to actually check whether the devices 376 * have different broadcast functions. For now, just use the 377 * one of the first device. This works as long as we have this 378 * misfeature only on x86 (lapic) 379 */ 380 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 381 td->evtdev->broadcast(mask); 382 } 383 return local; 384 } 385 386 /* 387 * Periodic broadcast: 388 * - invoke the broadcast handlers 389 */ 390 static bool tick_do_periodic_broadcast(void) 391 { 392 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 393 return tick_do_broadcast(tmpmask); 394 } 395 396 /* 397 * Event handler for periodic broadcast ticks 398 */ 399 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 400 { 401 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 402 bool bc_local; 403 404 raw_spin_lock(&tick_broadcast_lock); 405 406 /* Handle spurious interrupts gracefully */ 407 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { 408 raw_spin_unlock(&tick_broadcast_lock); 409 return; 410 } 411 412 bc_local = tick_do_periodic_broadcast(); 413 414 if (clockevent_state_oneshot(dev)) { 415 ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); 416 417 clockevents_program_event(dev, next, true); 418 } 419 raw_spin_unlock(&tick_broadcast_lock); 420 421 /* 422 * We run the handler of the local cpu after dropping 423 * tick_broadcast_lock because the handler might deadlock when 424 * trying to switch to oneshot mode. 425 */ 426 if (bc_local) 427 td->evtdev->event_handler(td->evtdev); 428 } 429 430 /** 431 * tick_broadcast_control - Enable/disable or force broadcast mode 432 * @mode: The selected broadcast mode 433 * 434 * Called when the system enters a state where affected tick devices 435 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 436 */ 437 void tick_broadcast_control(enum tick_broadcast_mode mode) 438 { 439 struct clock_event_device *bc, *dev; 440 struct tick_device *td; 441 int cpu, bc_stopped; 442 unsigned long flags; 443 444 /* Protects also the local clockevent device. */ 445 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 446 td = this_cpu_ptr(&tick_cpu_device); 447 dev = td->evtdev; 448 449 /* 450 * Is the device not affected by the powerstate ? 451 */ 452 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 453 goto out; 454 455 if (!tick_device_is_functional(dev)) 456 goto out; 457 458 cpu = smp_processor_id(); 459 bc = tick_broadcast_device.evtdev; 460 bc_stopped = cpumask_empty(tick_broadcast_mask); 461 462 switch (mode) { 463 case TICK_BROADCAST_FORCE: 464 tick_broadcast_forced = 1; 465 fallthrough; 466 case TICK_BROADCAST_ON: 467 cpumask_set_cpu(cpu, tick_broadcast_on); 468 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 469 /* 470 * Only shutdown the cpu local device, if: 471 * 472 * - the broadcast device exists 473 * - the broadcast device is not a hrtimer based one 474 * - the broadcast device is in periodic mode to 475 * avoid a hiccup during switch to oneshot mode 476 */ 477 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && 478 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 479 clockevents_shutdown(dev); 480 } 481 break; 482 483 case TICK_BROADCAST_OFF: 484 if (tick_broadcast_forced) 485 break; 486 cpumask_clear_cpu(cpu, tick_broadcast_on); 487 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 488 if (tick_broadcast_device.mode == 489 TICKDEV_MODE_PERIODIC) 490 tick_setup_periodic(dev, 0); 491 } 492 break; 493 } 494 495 if (bc) { 496 if (cpumask_empty(tick_broadcast_mask)) { 497 if (!bc_stopped) 498 clockevents_shutdown(bc); 499 } else if (bc_stopped) { 500 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 501 tick_broadcast_start_periodic(bc); 502 else 503 tick_broadcast_setup_oneshot(bc); 504 } 505 } 506 out: 507 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 508 } 509 EXPORT_SYMBOL_GPL(tick_broadcast_control); 510 511 /* 512 * Set the periodic handler depending on broadcast on/off 513 */ 514 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 515 { 516 if (!broadcast) 517 dev->event_handler = tick_handle_periodic; 518 else 519 dev->event_handler = tick_handle_periodic_broadcast; 520 } 521 522 #ifdef CONFIG_HOTPLUG_CPU 523 static void tick_shutdown_broadcast(void) 524 { 525 struct clock_event_device *bc = tick_broadcast_device.evtdev; 526 527 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 528 if (bc && cpumask_empty(tick_broadcast_mask)) 529 clockevents_shutdown(bc); 530 } 531 } 532 533 /* 534 * Remove a CPU from broadcasting 535 */ 536 void tick_broadcast_offline(unsigned int cpu) 537 { 538 raw_spin_lock(&tick_broadcast_lock); 539 cpumask_clear_cpu(cpu, tick_broadcast_mask); 540 cpumask_clear_cpu(cpu, tick_broadcast_on); 541 tick_broadcast_oneshot_offline(cpu); 542 tick_shutdown_broadcast(); 543 raw_spin_unlock(&tick_broadcast_lock); 544 } 545 546 #endif 547 548 void tick_suspend_broadcast(void) 549 { 550 struct clock_event_device *bc; 551 unsigned long flags; 552 553 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 554 555 bc = tick_broadcast_device.evtdev; 556 if (bc) 557 clockevents_shutdown(bc); 558 559 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 560 } 561 562 /* 563 * This is called from tick_resume_local() on a resuming CPU. That's 564 * called from the core resume function, tick_unfreeze() and the magic XEN 565 * resume hackery. 566 * 567 * In none of these cases the broadcast device mode can change and the 568 * bit of the resuming CPU in the broadcast mask is safe as well. 569 */ 570 bool tick_resume_check_broadcast(void) 571 { 572 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 573 return false; 574 else 575 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 576 } 577 578 void tick_resume_broadcast(void) 579 { 580 struct clock_event_device *bc; 581 unsigned long flags; 582 583 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 584 585 bc = tick_broadcast_device.evtdev; 586 587 if (bc) { 588 clockevents_tick_resume(bc); 589 590 switch (tick_broadcast_device.mode) { 591 case TICKDEV_MODE_PERIODIC: 592 if (!cpumask_empty(tick_broadcast_mask)) 593 tick_broadcast_start_periodic(bc); 594 break; 595 case TICKDEV_MODE_ONESHOT: 596 if (!cpumask_empty(tick_broadcast_mask)) 597 tick_resume_broadcast_oneshot(bc); 598 break; 599 } 600 } 601 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 602 } 603 604 #ifdef CONFIG_TICK_ONESHOT 605 606 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; 607 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; 608 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; 609 610 /* 611 * Exposed for debugging: see timer_list.c 612 */ 613 struct cpumask *tick_get_broadcast_oneshot_mask(void) 614 { 615 return tick_broadcast_oneshot_mask; 616 } 617 618 /* 619 * Called before going idle with interrupts disabled. Checks whether a 620 * broadcast event from the other core is about to happen. We detected 621 * that in tick_broadcast_oneshot_control(). The callsite can use this 622 * to avoid a deep idle transition as we are about to get the 623 * broadcast IPI right away. 624 */ 625 noinstr int tick_check_broadcast_expired(void) 626 { 627 #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H 628 return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); 629 #else 630 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 631 #endif 632 } 633 634 /* 635 * Set broadcast interrupt affinity 636 */ 637 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 638 const struct cpumask *cpumask) 639 { 640 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 641 return; 642 643 if (cpumask_equal(bc->cpumask, cpumask)) 644 return; 645 646 bc->cpumask = cpumask; 647 irq_set_affinity(bc->irq, bc->cpumask); 648 } 649 650 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 651 ktime_t expires) 652 { 653 if (!clockevent_state_oneshot(bc)) 654 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 655 656 clockevents_program_event(bc, expires, 1); 657 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 658 } 659 660 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 661 { 662 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 663 } 664 665 /* 666 * Called from irq_enter() when idle was interrupted to reenable the 667 * per cpu device. 668 */ 669 void tick_check_oneshot_broadcast_this_cpu(void) 670 { 671 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 672 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 673 674 /* 675 * We might be in the middle of switching over from 676 * periodic to oneshot. If the CPU has not yet 677 * switched over, leave the device alone. 678 */ 679 if (td->mode == TICKDEV_MODE_ONESHOT) { 680 clockevents_switch_state(td->evtdev, 681 CLOCK_EVT_STATE_ONESHOT); 682 } 683 } 684 } 685 686 /* 687 * Handle oneshot mode broadcasting 688 */ 689 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 690 { 691 struct tick_device *td; 692 ktime_t now, next_event; 693 int cpu, next_cpu = 0; 694 bool bc_local; 695 696 raw_spin_lock(&tick_broadcast_lock); 697 dev->next_event = KTIME_MAX; 698 next_event = KTIME_MAX; 699 cpumask_clear(tmpmask); 700 now = ktime_get(); 701 /* Find all expired events */ 702 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 703 /* 704 * Required for !SMP because for_each_cpu() reports 705 * unconditionally CPU0 as set on UP kernels. 706 */ 707 if (!IS_ENABLED(CONFIG_SMP) && 708 cpumask_empty(tick_broadcast_oneshot_mask)) 709 break; 710 711 td = &per_cpu(tick_cpu_device, cpu); 712 if (td->evtdev->next_event <= now) { 713 cpumask_set_cpu(cpu, tmpmask); 714 /* 715 * Mark the remote cpu in the pending mask, so 716 * it can avoid reprogramming the cpu local 717 * timer in tick_broadcast_oneshot_control(). 718 */ 719 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 720 } else if (td->evtdev->next_event < next_event) { 721 next_event = td->evtdev->next_event; 722 next_cpu = cpu; 723 } 724 } 725 726 /* 727 * Remove the current cpu from the pending mask. The event is 728 * delivered immediately in tick_do_broadcast() ! 729 */ 730 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 731 732 /* Take care of enforced broadcast requests */ 733 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 734 cpumask_clear(tick_broadcast_force_mask); 735 736 /* 737 * Sanity check. Catch the case where we try to broadcast to 738 * offline cpus. 739 */ 740 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 741 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 742 743 /* 744 * Wakeup the cpus which have an expired event. 745 */ 746 bc_local = tick_do_broadcast(tmpmask); 747 748 /* 749 * Two reasons for reprogram: 750 * 751 * - The global event did not expire any CPU local 752 * events. This happens in dyntick mode, as the maximum PIT 753 * delta is quite small. 754 * 755 * - There are pending events on sleeping CPUs which were not 756 * in the event mask 757 */ 758 if (next_event != KTIME_MAX) 759 tick_broadcast_set_event(dev, next_cpu, next_event); 760 761 raw_spin_unlock(&tick_broadcast_lock); 762 763 if (bc_local) { 764 td = this_cpu_ptr(&tick_cpu_device); 765 td->evtdev->event_handler(td->evtdev); 766 } 767 } 768 769 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 770 { 771 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 772 return 0; 773 if (bc->next_event == KTIME_MAX) 774 return 0; 775 return bc->bound_on == cpu ? -EBUSY : 0; 776 } 777 778 static void broadcast_shutdown_local(struct clock_event_device *bc, 779 struct clock_event_device *dev) 780 { 781 /* 782 * For hrtimer based broadcasting we cannot shutdown the cpu 783 * local device if our own event is the first one to expire or 784 * if we own the broadcast timer. 785 */ 786 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 787 if (broadcast_needs_cpu(bc, smp_processor_id())) 788 return; 789 if (dev->next_event < bc->next_event) 790 return; 791 } 792 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 793 } 794 795 static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, 796 struct tick_device *td, 797 int cpu) 798 { 799 struct clock_event_device *bc, *dev = td->evtdev; 800 int ret = 0; 801 ktime_t now; 802 803 raw_spin_lock(&tick_broadcast_lock); 804 bc = tick_broadcast_device.evtdev; 805 806 if (state == TICK_BROADCAST_ENTER) { 807 /* 808 * If the current CPU owns the hrtimer broadcast 809 * mechanism, it cannot go deep idle and we do not add 810 * the CPU to the broadcast mask. We don't have to go 811 * through the EXIT path as the local timer is not 812 * shutdown. 813 */ 814 ret = broadcast_needs_cpu(bc, cpu); 815 if (ret) 816 goto out; 817 818 /* 819 * If the broadcast device is in periodic mode, we 820 * return. 821 */ 822 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 823 /* If it is a hrtimer based broadcast, return busy */ 824 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) 825 ret = -EBUSY; 826 goto out; 827 } 828 829 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 830 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 831 832 /* Conditionally shut down the local timer. */ 833 broadcast_shutdown_local(bc, dev); 834 835 /* 836 * We only reprogram the broadcast timer if we 837 * did not mark ourself in the force mask and 838 * if the cpu local event is earlier than the 839 * broadcast event. If the current CPU is in 840 * the force mask, then we are going to be 841 * woken by the IPI right away; we return 842 * busy, so the CPU does not try to go deep 843 * idle. 844 */ 845 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { 846 ret = -EBUSY; 847 } else if (dev->next_event < bc->next_event) { 848 tick_broadcast_set_event(bc, cpu, dev->next_event); 849 /* 850 * In case of hrtimer broadcasts the 851 * programming might have moved the 852 * timer to this cpu. If yes, remove 853 * us from the broadcast mask and 854 * return busy. 855 */ 856 ret = broadcast_needs_cpu(bc, cpu); 857 if (ret) { 858 cpumask_clear_cpu(cpu, 859 tick_broadcast_oneshot_mask); 860 } 861 } 862 } 863 } else { 864 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 865 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 866 /* 867 * The cpu which was handling the broadcast 868 * timer marked this cpu in the broadcast 869 * pending mask and fired the broadcast 870 * IPI. So we are going to handle the expired 871 * event anyway via the broadcast IPI 872 * handler. No need to reprogram the timer 873 * with an already expired event. 874 */ 875 if (cpumask_test_and_clear_cpu(cpu, 876 tick_broadcast_pending_mask)) 877 goto out; 878 879 /* 880 * Bail out if there is no next event. 881 */ 882 if (dev->next_event == KTIME_MAX) 883 goto out; 884 /* 885 * If the pending bit is not set, then we are 886 * either the CPU handling the broadcast 887 * interrupt or we got woken by something else. 888 * 889 * We are no longer in the broadcast mask, so 890 * if the cpu local expiry time is already 891 * reached, we would reprogram the cpu local 892 * timer with an already expired event. 893 * 894 * This can lead to a ping-pong when we return 895 * to idle and therefore rearm the broadcast 896 * timer before the cpu local timer was able 897 * to fire. This happens because the forced 898 * reprogramming makes sure that the event 899 * will happen in the future and depending on 900 * the min_delta setting this might be far 901 * enough out that the ping-pong starts. 902 * 903 * If the cpu local next_event has expired 904 * then we know that the broadcast timer 905 * next_event has expired as well and 906 * broadcast is about to be handled. So we 907 * avoid reprogramming and enforce that the 908 * broadcast handler, which did not run yet, 909 * will invoke the cpu local handler. 910 * 911 * We cannot call the handler directly from 912 * here, because we might be in a NOHZ phase 913 * and we did not go through the irq_enter() 914 * nohz fixups. 915 */ 916 now = ktime_get(); 917 if (dev->next_event <= now) { 918 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 919 goto out; 920 } 921 /* 922 * We got woken by something else. Reprogram 923 * the cpu local timer device. 924 */ 925 tick_program_event(dev->next_event, 1); 926 } 927 } 928 out: 929 raw_spin_unlock(&tick_broadcast_lock); 930 return ret; 931 } 932 933 static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, 934 struct tick_device *td, 935 int cpu) 936 { 937 struct clock_event_device *dev, *wd; 938 939 dev = td->evtdev; 940 if (td->mode != TICKDEV_MODE_ONESHOT) 941 return -EINVAL; 942 943 wd = tick_get_oneshot_wakeup_device(cpu); 944 if (!wd) 945 return -ENODEV; 946 947 switch (state) { 948 case TICK_BROADCAST_ENTER: 949 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); 950 clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); 951 clockevents_program_event(wd, dev->next_event, 1); 952 break; 953 case TICK_BROADCAST_EXIT: 954 /* We may have transitioned to oneshot mode while idle */ 955 if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) 956 return -ENODEV; 957 } 958 959 return 0; 960 } 961 962 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 963 { 964 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 965 int cpu = smp_processor_id(); 966 967 if (!tick_oneshot_wakeup_control(state, td, cpu)) 968 return 0; 969 970 if (tick_broadcast_device.evtdev) 971 return ___tick_broadcast_oneshot_control(state, td, cpu); 972 973 /* 974 * If there is no broadcast or wakeup device, tell the caller not 975 * to go into deep idle. 976 */ 977 return -EBUSY; 978 } 979 980 /* 981 * Reset the one shot broadcast for a cpu 982 * 983 * Called with tick_broadcast_lock held 984 */ 985 static void tick_broadcast_clear_oneshot(int cpu) 986 { 987 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 988 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 989 } 990 991 static void tick_broadcast_init_next_event(struct cpumask *mask, 992 ktime_t expires) 993 { 994 struct tick_device *td; 995 int cpu; 996 997 for_each_cpu(cpu, mask) { 998 td = &per_cpu(tick_cpu_device, cpu); 999 if (td->evtdev) 1000 td->evtdev->next_event = expires; 1001 } 1002 } 1003 1004 static inline ktime_t tick_get_next_period(void) 1005 { 1006 ktime_t next; 1007 1008 /* 1009 * Protect against concurrent updates (store /load tearing on 1010 * 32bit). It does not matter if the time is already in the 1011 * past. The broadcast device which is about to be programmed will 1012 * fire in any case. 1013 */ 1014 raw_spin_lock(&jiffies_lock); 1015 next = tick_next_period; 1016 raw_spin_unlock(&jiffies_lock); 1017 return next; 1018 } 1019 1020 /** 1021 * tick_broadcast_setup_oneshot - setup the broadcast device 1022 */ 1023 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 1024 { 1025 int cpu = smp_processor_id(); 1026 1027 if (!bc) 1028 return; 1029 1030 /* Set it up only once ! */ 1031 if (bc->event_handler != tick_handle_oneshot_broadcast) { 1032 int was_periodic = clockevent_state_periodic(bc); 1033 1034 bc->event_handler = tick_handle_oneshot_broadcast; 1035 1036 /* 1037 * We must be careful here. There might be other CPUs 1038 * waiting for periodic broadcast. We need to set the 1039 * oneshot_mask bits for those and program the 1040 * broadcast device to fire. 1041 */ 1042 cpumask_copy(tmpmask, tick_broadcast_mask); 1043 cpumask_clear_cpu(cpu, tmpmask); 1044 cpumask_or(tick_broadcast_oneshot_mask, 1045 tick_broadcast_oneshot_mask, tmpmask); 1046 1047 if (was_periodic && !cpumask_empty(tmpmask)) { 1048 ktime_t nextevt = tick_get_next_period(); 1049 1050 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 1051 tick_broadcast_init_next_event(tmpmask, nextevt); 1052 tick_broadcast_set_event(bc, cpu, nextevt); 1053 } else 1054 bc->next_event = KTIME_MAX; 1055 } else { 1056 /* 1057 * The first cpu which switches to oneshot mode sets 1058 * the bit for all other cpus which are in the general 1059 * (periodic) broadcast mask. So the bit is set and 1060 * would prevent the first broadcast enter after this 1061 * to program the bc device. 1062 */ 1063 tick_broadcast_clear_oneshot(cpu); 1064 } 1065 } 1066 1067 /* 1068 * Select oneshot operating mode for the broadcast device 1069 */ 1070 void tick_broadcast_switch_to_oneshot(void) 1071 { 1072 struct clock_event_device *bc; 1073 unsigned long flags; 1074 1075 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1076 1077 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 1078 bc = tick_broadcast_device.evtdev; 1079 if (bc) 1080 tick_broadcast_setup_oneshot(bc); 1081 1082 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1083 } 1084 1085 #ifdef CONFIG_HOTPLUG_CPU 1086 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 1087 { 1088 struct clock_event_device *bc; 1089 unsigned long flags; 1090 1091 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1092 bc = tick_broadcast_device.evtdev; 1093 1094 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 1095 /* This moves the broadcast assignment to this CPU: */ 1096 clockevents_program_event(bc, bc->next_event, 1); 1097 } 1098 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1099 } 1100 1101 /* 1102 * Remove a dying CPU from broadcasting 1103 */ 1104 static void tick_broadcast_oneshot_offline(unsigned int cpu) 1105 { 1106 if (tick_get_oneshot_wakeup_device(cpu)) 1107 tick_set_oneshot_wakeup_device(NULL, cpu); 1108 1109 /* 1110 * Clear the broadcast masks for the dead cpu, but do not stop 1111 * the broadcast device! 1112 */ 1113 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 1114 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 1115 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 1116 } 1117 #endif 1118 1119 /* 1120 * Check, whether the broadcast device is in one shot mode 1121 */ 1122 int tick_broadcast_oneshot_active(void) 1123 { 1124 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 1125 } 1126 1127 /* 1128 * Check whether the broadcast device supports oneshot. 1129 */ 1130 bool tick_broadcast_oneshot_available(void) 1131 { 1132 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1133 1134 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 1135 } 1136 1137 #else 1138 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 1139 { 1140 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1141 1142 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) 1143 return -EBUSY; 1144 1145 return 0; 1146 } 1147 #endif 1148 1149 void __init tick_broadcast_init(void) 1150 { 1151 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 1152 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 1153 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 1154 #ifdef CONFIG_TICK_ONESHOT 1155 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 1156 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 1157 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 1158 #endif 1159 } 1160