1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains functions which emulate a local clock-event 4 * device via a broadcast event source. 5 * 6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/smp.h> 18 #include <linux/module.h> 19 20 #include "tick-internal.h" 21 22 /* 23 * Broadcast support for broken x86 hardware, where the local apic 24 * timer stops in C3 state. 25 */ 26 27 static struct tick_device tick_broadcast_device; 28 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; 29 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; 30 static cpumask_var_t tmpmask __cpumask_var_read_mostly; 31 static int tick_broadcast_forced; 32 33 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 34 35 #ifdef CONFIG_TICK_ONESHOT 36 static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); 37 38 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); 39 static void tick_broadcast_clear_oneshot(int cpu); 40 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 41 # ifdef CONFIG_HOTPLUG_CPU 42 static void tick_broadcast_oneshot_offline(unsigned int cpu); 43 # endif 44 #else 45 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } 46 static inline void tick_broadcast_clear_oneshot(int cpu) { } 47 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 48 # ifdef CONFIG_HOTPLUG_CPU 49 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } 50 # endif 51 #endif 52 53 /* 54 * Debugging: see timer_list.c 55 */ 56 struct tick_device *tick_get_broadcast_device(void) 57 { 58 return &tick_broadcast_device; 59 } 60 61 struct cpumask *tick_get_broadcast_mask(void) 62 { 63 return tick_broadcast_mask; 64 } 65 66 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); 67 68 const struct clock_event_device *tick_get_wakeup_device(int cpu) 69 { 70 return tick_get_oneshot_wakeup_device(cpu); 71 } 72 73 /* 74 * Start the device in periodic mode 75 */ 76 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 77 { 78 if (bc) 79 tick_setup_periodic(bc, 1); 80 } 81 82 /* 83 * Check, if the device can be utilized as broadcast device: 84 */ 85 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 86 struct clock_event_device *newdev) 87 { 88 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 89 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 90 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 91 return false; 92 93 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 94 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 95 return false; 96 97 return !curdev || newdev->rating > curdev->rating; 98 } 99 100 #ifdef CONFIG_TICK_ONESHOT 101 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 102 { 103 return per_cpu(tick_oneshot_wakeup_device, cpu); 104 } 105 106 static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) 107 { 108 /* 109 * If we woke up early and the tick was reprogrammed in the 110 * meantime then this may be spurious but harmless. 111 */ 112 tick_receive_broadcast(); 113 } 114 115 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 116 int cpu) 117 { 118 struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); 119 120 if (!newdev) 121 goto set_device; 122 123 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 124 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 125 return false; 126 127 if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || 128 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 129 return false; 130 131 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) 132 return false; 133 134 if (curdev && newdev->rating <= curdev->rating) 135 return false; 136 137 if (!try_module_get(newdev->owner)) 138 return false; 139 140 newdev->event_handler = tick_oneshot_wakeup_handler; 141 set_device: 142 clockevents_exchange_device(curdev, newdev); 143 per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; 144 return true; 145 } 146 #else 147 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 148 { 149 return NULL; 150 } 151 152 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 153 int cpu) 154 { 155 return false; 156 } 157 #endif 158 159 /* 160 * Conditionally install/replace broadcast device 161 */ 162 void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) 163 { 164 struct clock_event_device *cur = tick_broadcast_device.evtdev; 165 166 if (tick_set_oneshot_wakeup_device(dev, cpu)) 167 return; 168 169 if (!tick_check_broadcast_device(cur, dev)) 170 return; 171 172 if (!try_module_get(dev->owner)) 173 return; 174 175 clockevents_exchange_device(cur, dev); 176 if (cur) 177 cur->event_handler = clockevents_handle_noop; 178 tick_broadcast_device.evtdev = dev; 179 if (!cpumask_empty(tick_broadcast_mask)) 180 tick_broadcast_start_periodic(dev); 181 182 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 183 return; 184 185 /* 186 * If the system already runs in oneshot mode, switch the newly 187 * registered broadcast device to oneshot mode explicitly. 188 */ 189 if (tick_broadcast_oneshot_active()) { 190 tick_broadcast_switch_to_oneshot(); 191 return; 192 } 193 194 /* 195 * Inform all cpus about this. We might be in a situation 196 * where we did not switch to oneshot mode because the per cpu 197 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 198 * of a oneshot capable broadcast device. Without that 199 * notification the systems stays stuck in periodic mode 200 * forever. 201 */ 202 tick_clock_notify(); 203 } 204 205 /* 206 * Check, if the device is the broadcast device 207 */ 208 int tick_is_broadcast_device(struct clock_event_device *dev) 209 { 210 return (dev && tick_broadcast_device.evtdev == dev); 211 } 212 213 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 214 { 215 int ret = -ENODEV; 216 217 if (tick_is_broadcast_device(dev)) { 218 raw_spin_lock(&tick_broadcast_lock); 219 ret = __clockevents_update_freq(dev, freq); 220 raw_spin_unlock(&tick_broadcast_lock); 221 } 222 return ret; 223 } 224 225 226 static void err_broadcast(const struct cpumask *mask) 227 { 228 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 229 } 230 231 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 232 { 233 if (!dev->broadcast) 234 dev->broadcast = tick_broadcast; 235 if (!dev->broadcast) { 236 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 237 dev->name); 238 dev->broadcast = err_broadcast; 239 } 240 } 241 242 /* 243 * Check, if the device is dysfunctional and a placeholder, which 244 * needs to be handled by the broadcast device. 245 */ 246 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 247 { 248 struct clock_event_device *bc = tick_broadcast_device.evtdev; 249 unsigned long flags; 250 int ret = 0; 251 252 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 253 254 /* 255 * Devices might be registered with both periodic and oneshot 256 * mode disabled. This signals, that the device needs to be 257 * operated from the broadcast device and is a placeholder for 258 * the cpu local device. 259 */ 260 if (!tick_device_is_functional(dev)) { 261 dev->event_handler = tick_handle_periodic; 262 tick_device_setup_broadcast_func(dev); 263 cpumask_set_cpu(cpu, tick_broadcast_mask); 264 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 265 tick_broadcast_start_periodic(bc); 266 else 267 tick_broadcast_setup_oneshot(bc); 268 ret = 1; 269 } else { 270 /* 271 * Clear the broadcast bit for this cpu if the 272 * device is not power state affected. 273 */ 274 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 275 cpumask_clear_cpu(cpu, tick_broadcast_mask); 276 else 277 tick_device_setup_broadcast_func(dev); 278 279 /* 280 * Clear the broadcast bit if the CPU is not in 281 * periodic broadcast on state. 282 */ 283 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 284 cpumask_clear_cpu(cpu, tick_broadcast_mask); 285 286 switch (tick_broadcast_device.mode) { 287 case TICKDEV_MODE_ONESHOT: 288 /* 289 * If the system is in oneshot mode we can 290 * unconditionally clear the oneshot mask bit, 291 * because the CPU is running and therefore 292 * not in an idle state which causes the power 293 * state affected device to stop. Let the 294 * caller initialize the device. 295 */ 296 tick_broadcast_clear_oneshot(cpu); 297 ret = 0; 298 break; 299 300 case TICKDEV_MODE_PERIODIC: 301 /* 302 * If the system is in periodic mode, check 303 * whether the broadcast device can be 304 * switched off now. 305 */ 306 if (cpumask_empty(tick_broadcast_mask) && bc) 307 clockevents_shutdown(bc); 308 /* 309 * If we kept the cpu in the broadcast mask, 310 * tell the caller to leave the per cpu device 311 * in shutdown state. The periodic interrupt 312 * is delivered by the broadcast device, if 313 * the broadcast device exists and is not 314 * hrtimer based. 315 */ 316 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 317 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 318 break; 319 default: 320 break; 321 } 322 } 323 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 324 return ret; 325 } 326 327 int tick_receive_broadcast(void) 328 { 329 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 330 struct clock_event_device *evt = td->evtdev; 331 332 if (!evt) 333 return -ENODEV; 334 335 if (!evt->event_handler) 336 return -EINVAL; 337 338 evt->event_handler(evt); 339 return 0; 340 } 341 342 /* 343 * Broadcast the event to the cpus, which are set in the mask (mangled). 344 */ 345 static bool tick_do_broadcast(struct cpumask *mask) 346 { 347 int cpu = smp_processor_id(); 348 struct tick_device *td; 349 bool local = false; 350 351 /* 352 * Check, if the current cpu is in the mask 353 */ 354 if (cpumask_test_cpu(cpu, mask)) { 355 struct clock_event_device *bc = tick_broadcast_device.evtdev; 356 357 cpumask_clear_cpu(cpu, mask); 358 /* 359 * We only run the local handler, if the broadcast 360 * device is not hrtimer based. Otherwise we run into 361 * a hrtimer recursion. 362 * 363 * local timer_interrupt() 364 * local_handler() 365 * expire_hrtimers() 366 * bc_handler() 367 * local_handler() 368 * expire_hrtimers() 369 */ 370 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); 371 } 372 373 if (!cpumask_empty(mask)) { 374 /* 375 * It might be necessary to actually check whether the devices 376 * have different broadcast functions. For now, just use the 377 * one of the first device. This works as long as we have this 378 * misfeature only on x86 (lapic) 379 */ 380 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 381 td->evtdev->broadcast(mask); 382 } 383 return local; 384 } 385 386 /* 387 * Periodic broadcast: 388 * - invoke the broadcast handlers 389 */ 390 static bool tick_do_periodic_broadcast(void) 391 { 392 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 393 return tick_do_broadcast(tmpmask); 394 } 395 396 /* 397 * Event handler for periodic broadcast ticks 398 */ 399 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 400 { 401 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 402 bool bc_local; 403 404 raw_spin_lock(&tick_broadcast_lock); 405 406 /* Handle spurious interrupts gracefully */ 407 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { 408 raw_spin_unlock(&tick_broadcast_lock); 409 return; 410 } 411 412 bc_local = tick_do_periodic_broadcast(); 413 414 if (clockevent_state_oneshot(dev)) { 415 ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); 416 417 clockevents_program_event(dev, next, true); 418 } 419 raw_spin_unlock(&tick_broadcast_lock); 420 421 /* 422 * We run the handler of the local cpu after dropping 423 * tick_broadcast_lock because the handler might deadlock when 424 * trying to switch to oneshot mode. 425 */ 426 if (bc_local) 427 td->evtdev->event_handler(td->evtdev); 428 } 429 430 /** 431 * tick_broadcast_control - Enable/disable or force broadcast mode 432 * @mode: The selected broadcast mode 433 * 434 * Called when the system enters a state where affected tick devices 435 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 436 */ 437 void tick_broadcast_control(enum tick_broadcast_mode mode) 438 { 439 struct clock_event_device *bc, *dev; 440 struct tick_device *td; 441 int cpu, bc_stopped; 442 unsigned long flags; 443 444 /* Protects also the local clockevent device. */ 445 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 446 td = this_cpu_ptr(&tick_cpu_device); 447 dev = td->evtdev; 448 449 /* 450 * Is the device not affected by the powerstate ? 451 */ 452 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 453 goto out; 454 455 if (!tick_device_is_functional(dev)) 456 goto out; 457 458 cpu = smp_processor_id(); 459 bc = tick_broadcast_device.evtdev; 460 bc_stopped = cpumask_empty(tick_broadcast_mask); 461 462 switch (mode) { 463 case TICK_BROADCAST_FORCE: 464 tick_broadcast_forced = 1; 465 fallthrough; 466 case TICK_BROADCAST_ON: 467 cpumask_set_cpu(cpu, tick_broadcast_on); 468 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 469 /* 470 * Only shutdown the cpu local device, if: 471 * 472 * - the broadcast device exists 473 * - the broadcast device is not a hrtimer based one 474 * - the broadcast device is in periodic mode to 475 * avoid a hiccup during switch to oneshot mode 476 */ 477 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && 478 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 479 clockevents_shutdown(dev); 480 } 481 break; 482 483 case TICK_BROADCAST_OFF: 484 if (tick_broadcast_forced) 485 break; 486 cpumask_clear_cpu(cpu, tick_broadcast_on); 487 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 488 if (tick_broadcast_device.mode == 489 TICKDEV_MODE_PERIODIC) 490 tick_setup_periodic(dev, 0); 491 } 492 break; 493 } 494 495 if (bc) { 496 if (cpumask_empty(tick_broadcast_mask)) { 497 if (!bc_stopped) 498 clockevents_shutdown(bc); 499 } else if (bc_stopped) { 500 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 501 tick_broadcast_start_periodic(bc); 502 else 503 tick_broadcast_setup_oneshot(bc); 504 } 505 } 506 out: 507 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 508 } 509 EXPORT_SYMBOL_GPL(tick_broadcast_control); 510 511 /* 512 * Set the periodic handler depending on broadcast on/off 513 */ 514 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 515 { 516 if (!broadcast) 517 dev->event_handler = tick_handle_periodic; 518 else 519 dev->event_handler = tick_handle_periodic_broadcast; 520 } 521 522 #ifdef CONFIG_HOTPLUG_CPU 523 static void tick_shutdown_broadcast(void) 524 { 525 struct clock_event_device *bc = tick_broadcast_device.evtdev; 526 527 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 528 if (bc && cpumask_empty(tick_broadcast_mask)) 529 clockevents_shutdown(bc); 530 } 531 } 532 533 /* 534 * Remove a CPU from broadcasting 535 */ 536 void tick_broadcast_offline(unsigned int cpu) 537 { 538 raw_spin_lock(&tick_broadcast_lock); 539 cpumask_clear_cpu(cpu, tick_broadcast_mask); 540 cpumask_clear_cpu(cpu, tick_broadcast_on); 541 tick_broadcast_oneshot_offline(cpu); 542 tick_shutdown_broadcast(); 543 raw_spin_unlock(&tick_broadcast_lock); 544 } 545 546 #endif 547 548 void tick_suspend_broadcast(void) 549 { 550 struct clock_event_device *bc; 551 unsigned long flags; 552 553 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 554 555 bc = tick_broadcast_device.evtdev; 556 if (bc) 557 clockevents_shutdown(bc); 558 559 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 560 } 561 562 /* 563 * This is called from tick_resume_local() on a resuming CPU. That's 564 * called from the core resume function, tick_unfreeze() and the magic XEN 565 * resume hackery. 566 * 567 * In none of these cases the broadcast device mode can change and the 568 * bit of the resuming CPU in the broadcast mask is safe as well. 569 */ 570 bool tick_resume_check_broadcast(void) 571 { 572 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 573 return false; 574 else 575 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 576 } 577 578 void tick_resume_broadcast(void) 579 { 580 struct clock_event_device *bc; 581 unsigned long flags; 582 583 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 584 585 bc = tick_broadcast_device.evtdev; 586 587 if (bc) { 588 clockevents_tick_resume(bc); 589 590 switch (tick_broadcast_device.mode) { 591 case TICKDEV_MODE_PERIODIC: 592 if (!cpumask_empty(tick_broadcast_mask)) 593 tick_broadcast_start_periodic(bc); 594 break; 595 case TICKDEV_MODE_ONESHOT: 596 if (!cpumask_empty(tick_broadcast_mask)) 597 tick_resume_broadcast_oneshot(bc); 598 break; 599 } 600 } 601 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 602 } 603 604 #ifdef CONFIG_TICK_ONESHOT 605 606 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; 607 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; 608 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; 609 610 /* 611 * Exposed for debugging: see timer_list.c 612 */ 613 struct cpumask *tick_get_broadcast_oneshot_mask(void) 614 { 615 return tick_broadcast_oneshot_mask; 616 } 617 618 /* 619 * Called before going idle with interrupts disabled. Checks whether a 620 * broadcast event from the other core is about to happen. We detected 621 * that in tick_broadcast_oneshot_control(). The callsite can use this 622 * to avoid a deep idle transition as we are about to get the 623 * broadcast IPI right away. 624 */ 625 int tick_check_broadcast_expired(void) 626 { 627 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 628 } 629 630 /* 631 * Set broadcast interrupt affinity 632 */ 633 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 634 const struct cpumask *cpumask) 635 { 636 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 637 return; 638 639 if (cpumask_equal(bc->cpumask, cpumask)) 640 return; 641 642 bc->cpumask = cpumask; 643 irq_set_affinity(bc->irq, bc->cpumask); 644 } 645 646 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 647 ktime_t expires) 648 { 649 if (!clockevent_state_oneshot(bc)) 650 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 651 652 clockevents_program_event(bc, expires, 1); 653 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 654 } 655 656 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 657 { 658 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 659 } 660 661 /* 662 * Called from irq_enter() when idle was interrupted to reenable the 663 * per cpu device. 664 */ 665 void tick_check_oneshot_broadcast_this_cpu(void) 666 { 667 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 668 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 669 670 /* 671 * We might be in the middle of switching over from 672 * periodic to oneshot. If the CPU has not yet 673 * switched over, leave the device alone. 674 */ 675 if (td->mode == TICKDEV_MODE_ONESHOT) { 676 clockevents_switch_state(td->evtdev, 677 CLOCK_EVT_STATE_ONESHOT); 678 } 679 } 680 } 681 682 /* 683 * Handle oneshot mode broadcasting 684 */ 685 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 686 { 687 struct tick_device *td; 688 ktime_t now, next_event; 689 int cpu, next_cpu = 0; 690 bool bc_local; 691 692 raw_spin_lock(&tick_broadcast_lock); 693 dev->next_event = KTIME_MAX; 694 next_event = KTIME_MAX; 695 cpumask_clear(tmpmask); 696 now = ktime_get(); 697 /* Find all expired events */ 698 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 699 /* 700 * Required for !SMP because for_each_cpu() reports 701 * unconditionally CPU0 as set on UP kernels. 702 */ 703 if (!IS_ENABLED(CONFIG_SMP) && 704 cpumask_empty(tick_broadcast_oneshot_mask)) 705 break; 706 707 td = &per_cpu(tick_cpu_device, cpu); 708 if (td->evtdev->next_event <= now) { 709 cpumask_set_cpu(cpu, tmpmask); 710 /* 711 * Mark the remote cpu in the pending mask, so 712 * it can avoid reprogramming the cpu local 713 * timer in tick_broadcast_oneshot_control(). 714 */ 715 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 716 } else if (td->evtdev->next_event < next_event) { 717 next_event = td->evtdev->next_event; 718 next_cpu = cpu; 719 } 720 } 721 722 /* 723 * Remove the current cpu from the pending mask. The event is 724 * delivered immediately in tick_do_broadcast() ! 725 */ 726 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 727 728 /* Take care of enforced broadcast requests */ 729 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 730 cpumask_clear(tick_broadcast_force_mask); 731 732 /* 733 * Sanity check. Catch the case where we try to broadcast to 734 * offline cpus. 735 */ 736 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 737 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 738 739 /* 740 * Wakeup the cpus which have an expired event. 741 */ 742 bc_local = tick_do_broadcast(tmpmask); 743 744 /* 745 * Two reasons for reprogram: 746 * 747 * - The global event did not expire any CPU local 748 * events. This happens in dyntick mode, as the maximum PIT 749 * delta is quite small. 750 * 751 * - There are pending events on sleeping CPUs which were not 752 * in the event mask 753 */ 754 if (next_event != KTIME_MAX) 755 tick_broadcast_set_event(dev, next_cpu, next_event); 756 757 raw_spin_unlock(&tick_broadcast_lock); 758 759 if (bc_local) { 760 td = this_cpu_ptr(&tick_cpu_device); 761 td->evtdev->event_handler(td->evtdev); 762 } 763 } 764 765 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 766 { 767 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 768 return 0; 769 if (bc->next_event == KTIME_MAX) 770 return 0; 771 return bc->bound_on == cpu ? -EBUSY : 0; 772 } 773 774 static void broadcast_shutdown_local(struct clock_event_device *bc, 775 struct clock_event_device *dev) 776 { 777 /* 778 * For hrtimer based broadcasting we cannot shutdown the cpu 779 * local device if our own event is the first one to expire or 780 * if we own the broadcast timer. 781 */ 782 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 783 if (broadcast_needs_cpu(bc, smp_processor_id())) 784 return; 785 if (dev->next_event < bc->next_event) 786 return; 787 } 788 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 789 } 790 791 static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, 792 struct tick_device *td, 793 int cpu) 794 { 795 struct clock_event_device *bc, *dev = td->evtdev; 796 int ret = 0; 797 ktime_t now; 798 799 raw_spin_lock(&tick_broadcast_lock); 800 bc = tick_broadcast_device.evtdev; 801 802 if (state == TICK_BROADCAST_ENTER) { 803 /* 804 * If the current CPU owns the hrtimer broadcast 805 * mechanism, it cannot go deep idle and we do not add 806 * the CPU to the broadcast mask. We don't have to go 807 * through the EXIT path as the local timer is not 808 * shutdown. 809 */ 810 ret = broadcast_needs_cpu(bc, cpu); 811 if (ret) 812 goto out; 813 814 /* 815 * If the broadcast device is in periodic mode, we 816 * return. 817 */ 818 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 819 /* If it is a hrtimer based broadcast, return busy */ 820 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) 821 ret = -EBUSY; 822 goto out; 823 } 824 825 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 826 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 827 828 /* Conditionally shut down the local timer. */ 829 broadcast_shutdown_local(bc, dev); 830 831 /* 832 * We only reprogram the broadcast timer if we 833 * did not mark ourself in the force mask and 834 * if the cpu local event is earlier than the 835 * broadcast event. If the current CPU is in 836 * the force mask, then we are going to be 837 * woken by the IPI right away; we return 838 * busy, so the CPU does not try to go deep 839 * idle. 840 */ 841 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { 842 ret = -EBUSY; 843 } else if (dev->next_event < bc->next_event) { 844 tick_broadcast_set_event(bc, cpu, dev->next_event); 845 /* 846 * In case of hrtimer broadcasts the 847 * programming might have moved the 848 * timer to this cpu. If yes, remove 849 * us from the broadcast mask and 850 * return busy. 851 */ 852 ret = broadcast_needs_cpu(bc, cpu); 853 if (ret) { 854 cpumask_clear_cpu(cpu, 855 tick_broadcast_oneshot_mask); 856 } 857 } 858 } 859 } else { 860 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 861 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 862 /* 863 * The cpu which was handling the broadcast 864 * timer marked this cpu in the broadcast 865 * pending mask and fired the broadcast 866 * IPI. So we are going to handle the expired 867 * event anyway via the broadcast IPI 868 * handler. No need to reprogram the timer 869 * with an already expired event. 870 */ 871 if (cpumask_test_and_clear_cpu(cpu, 872 tick_broadcast_pending_mask)) 873 goto out; 874 875 /* 876 * Bail out if there is no next event. 877 */ 878 if (dev->next_event == KTIME_MAX) 879 goto out; 880 /* 881 * If the pending bit is not set, then we are 882 * either the CPU handling the broadcast 883 * interrupt or we got woken by something else. 884 * 885 * We are no longer in the broadcast mask, so 886 * if the cpu local expiry time is already 887 * reached, we would reprogram the cpu local 888 * timer with an already expired event. 889 * 890 * This can lead to a ping-pong when we return 891 * to idle and therefore rearm the broadcast 892 * timer before the cpu local timer was able 893 * to fire. This happens because the forced 894 * reprogramming makes sure that the event 895 * will happen in the future and depending on 896 * the min_delta setting this might be far 897 * enough out that the ping-pong starts. 898 * 899 * If the cpu local next_event has expired 900 * then we know that the broadcast timer 901 * next_event has expired as well and 902 * broadcast is about to be handled. So we 903 * avoid reprogramming and enforce that the 904 * broadcast handler, which did not run yet, 905 * will invoke the cpu local handler. 906 * 907 * We cannot call the handler directly from 908 * here, because we might be in a NOHZ phase 909 * and we did not go through the irq_enter() 910 * nohz fixups. 911 */ 912 now = ktime_get(); 913 if (dev->next_event <= now) { 914 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 915 goto out; 916 } 917 /* 918 * We got woken by something else. Reprogram 919 * the cpu local timer device. 920 */ 921 tick_program_event(dev->next_event, 1); 922 } 923 } 924 out: 925 raw_spin_unlock(&tick_broadcast_lock); 926 return ret; 927 } 928 929 static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, 930 struct tick_device *td, 931 int cpu) 932 { 933 struct clock_event_device *dev, *wd; 934 935 dev = td->evtdev; 936 if (td->mode != TICKDEV_MODE_ONESHOT) 937 return -EINVAL; 938 939 wd = tick_get_oneshot_wakeup_device(cpu); 940 if (!wd) 941 return -ENODEV; 942 943 switch (state) { 944 case TICK_BROADCAST_ENTER: 945 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); 946 clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); 947 clockevents_program_event(wd, dev->next_event, 1); 948 break; 949 case TICK_BROADCAST_EXIT: 950 /* We may have transitioned to oneshot mode while idle */ 951 if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) 952 return -ENODEV; 953 } 954 955 return 0; 956 } 957 958 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 959 { 960 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 961 int cpu = smp_processor_id(); 962 963 if (!tick_oneshot_wakeup_control(state, td, cpu)) 964 return 0; 965 966 if (tick_broadcast_device.evtdev) 967 return ___tick_broadcast_oneshot_control(state, td, cpu); 968 969 /* 970 * If there is no broadcast or wakeup device, tell the caller not 971 * to go into deep idle. 972 */ 973 return -EBUSY; 974 } 975 976 /* 977 * Reset the one shot broadcast for a cpu 978 * 979 * Called with tick_broadcast_lock held 980 */ 981 static void tick_broadcast_clear_oneshot(int cpu) 982 { 983 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 984 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 985 } 986 987 static void tick_broadcast_init_next_event(struct cpumask *mask, 988 ktime_t expires) 989 { 990 struct tick_device *td; 991 int cpu; 992 993 for_each_cpu(cpu, mask) { 994 td = &per_cpu(tick_cpu_device, cpu); 995 if (td->evtdev) 996 td->evtdev->next_event = expires; 997 } 998 } 999 1000 static inline ktime_t tick_get_next_period(void) 1001 { 1002 ktime_t next; 1003 1004 /* 1005 * Protect against concurrent updates (store /load tearing on 1006 * 32bit). It does not matter if the time is already in the 1007 * past. The broadcast device which is about to be programmed will 1008 * fire in any case. 1009 */ 1010 raw_spin_lock(&jiffies_lock); 1011 next = tick_next_period; 1012 raw_spin_unlock(&jiffies_lock); 1013 return next; 1014 } 1015 1016 /** 1017 * tick_broadcast_setup_oneshot - setup the broadcast device 1018 */ 1019 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 1020 { 1021 int cpu = smp_processor_id(); 1022 1023 if (!bc) 1024 return; 1025 1026 /* Set it up only once ! */ 1027 if (bc->event_handler != tick_handle_oneshot_broadcast) { 1028 int was_periodic = clockevent_state_periodic(bc); 1029 1030 bc->event_handler = tick_handle_oneshot_broadcast; 1031 1032 /* 1033 * We must be careful here. There might be other CPUs 1034 * waiting for periodic broadcast. We need to set the 1035 * oneshot_mask bits for those and program the 1036 * broadcast device to fire. 1037 */ 1038 cpumask_copy(tmpmask, tick_broadcast_mask); 1039 cpumask_clear_cpu(cpu, tmpmask); 1040 cpumask_or(tick_broadcast_oneshot_mask, 1041 tick_broadcast_oneshot_mask, tmpmask); 1042 1043 if (was_periodic && !cpumask_empty(tmpmask)) { 1044 ktime_t nextevt = tick_get_next_period(); 1045 1046 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 1047 tick_broadcast_init_next_event(tmpmask, nextevt); 1048 tick_broadcast_set_event(bc, cpu, nextevt); 1049 } else 1050 bc->next_event = KTIME_MAX; 1051 } else { 1052 /* 1053 * The first cpu which switches to oneshot mode sets 1054 * the bit for all other cpus which are in the general 1055 * (periodic) broadcast mask. So the bit is set and 1056 * would prevent the first broadcast enter after this 1057 * to program the bc device. 1058 */ 1059 tick_broadcast_clear_oneshot(cpu); 1060 } 1061 } 1062 1063 /* 1064 * Select oneshot operating mode for the broadcast device 1065 */ 1066 void tick_broadcast_switch_to_oneshot(void) 1067 { 1068 struct clock_event_device *bc; 1069 unsigned long flags; 1070 1071 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1072 1073 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 1074 bc = tick_broadcast_device.evtdev; 1075 if (bc) 1076 tick_broadcast_setup_oneshot(bc); 1077 1078 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1079 } 1080 1081 #ifdef CONFIG_HOTPLUG_CPU 1082 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 1083 { 1084 struct clock_event_device *bc; 1085 unsigned long flags; 1086 1087 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1088 bc = tick_broadcast_device.evtdev; 1089 1090 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 1091 /* This moves the broadcast assignment to this CPU: */ 1092 clockevents_program_event(bc, bc->next_event, 1); 1093 } 1094 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1095 } 1096 1097 /* 1098 * Remove a dying CPU from broadcasting 1099 */ 1100 static void tick_broadcast_oneshot_offline(unsigned int cpu) 1101 { 1102 if (tick_get_oneshot_wakeup_device(cpu)) 1103 tick_set_oneshot_wakeup_device(NULL, cpu); 1104 1105 /* 1106 * Clear the broadcast masks for the dead cpu, but do not stop 1107 * the broadcast device! 1108 */ 1109 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 1110 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 1111 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 1112 } 1113 #endif 1114 1115 /* 1116 * Check, whether the broadcast device is in one shot mode 1117 */ 1118 int tick_broadcast_oneshot_active(void) 1119 { 1120 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 1121 } 1122 1123 /* 1124 * Check whether the broadcast device supports oneshot. 1125 */ 1126 bool tick_broadcast_oneshot_available(void) 1127 { 1128 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1129 1130 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 1131 } 1132 1133 #else 1134 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 1135 { 1136 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1137 1138 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) 1139 return -EBUSY; 1140 1141 return 0; 1142 } 1143 #endif 1144 1145 void __init tick_broadcast_init(void) 1146 { 1147 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 1148 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 1149 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 1150 #ifdef CONFIG_TICK_ONESHOT 1151 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 1152 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 1153 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 1154 #endif 1155 } 1156