1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains functions which emulate a local clock-event 4 * device via a broadcast event source. 5 * 6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/smp.h> 18 #include <linux/module.h> 19 20 #include "tick-internal.h" 21 22 /* 23 * Broadcast support for broken x86 hardware, where the local apic 24 * timer stops in C3 state. 25 */ 26 27 static struct tick_device tick_broadcast_device; 28 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; 29 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; 30 static cpumask_var_t tmpmask __cpumask_var_read_mostly; 31 static int tick_broadcast_forced; 32 33 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 34 35 #ifdef CONFIG_TICK_ONESHOT 36 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc); 37 static void tick_broadcast_clear_oneshot(int cpu); 38 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 39 # ifdef CONFIG_HOTPLUG_CPU 40 static void tick_broadcast_oneshot_offline(unsigned int cpu); 41 # endif 42 #else 43 static inline void tick_broadcast_setup_oneshot(struct clock_event_device *bc) { BUG(); } 44 static inline void tick_broadcast_clear_oneshot(int cpu) { } 45 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 46 # ifdef CONFIG_HOTPLUG_CPU 47 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } 48 # endif 49 #endif 50 51 /* 52 * Debugging: see timer_list.c 53 */ 54 struct tick_device *tick_get_broadcast_device(void) 55 { 56 return &tick_broadcast_device; 57 } 58 59 struct cpumask *tick_get_broadcast_mask(void) 60 { 61 return tick_broadcast_mask; 62 } 63 64 /* 65 * Start the device in periodic mode 66 */ 67 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 68 { 69 if (bc) 70 tick_setup_periodic(bc, 1); 71 } 72 73 /* 74 * Check, if the device can be utilized as broadcast device: 75 */ 76 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 77 struct clock_event_device *newdev) 78 { 79 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 80 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 81 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 82 return false; 83 84 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 85 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 86 return false; 87 88 return !curdev || newdev->rating > curdev->rating; 89 } 90 91 /* 92 * Conditionally install/replace broadcast device 93 */ 94 void tick_install_broadcast_device(struct clock_event_device *dev) 95 { 96 struct clock_event_device *cur = tick_broadcast_device.evtdev; 97 98 if (!tick_check_broadcast_device(cur, dev)) 99 return; 100 101 if (!try_module_get(dev->owner)) 102 return; 103 104 clockevents_exchange_device(cur, dev); 105 if (cur) 106 cur->event_handler = clockevents_handle_noop; 107 tick_broadcast_device.evtdev = dev; 108 if (!cpumask_empty(tick_broadcast_mask)) 109 tick_broadcast_start_periodic(dev); 110 111 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 112 return; 113 114 /* 115 * If the system already runs in oneshot mode, switch the newly 116 * registered broadcast device to oneshot mode explicitly. 117 */ 118 if (tick_broadcast_oneshot_active()) { 119 tick_broadcast_switch_to_oneshot(); 120 return; 121 } 122 123 /* 124 * Inform all cpus about this. We might be in a situation 125 * where we did not switch to oneshot mode because the per cpu 126 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 127 * of a oneshot capable broadcast device. Without that 128 * notification the systems stays stuck in periodic mode 129 * forever. 130 */ 131 tick_clock_notify(); 132 } 133 134 /* 135 * Check, if the device is the broadcast device 136 */ 137 int tick_is_broadcast_device(struct clock_event_device *dev) 138 { 139 return (dev && tick_broadcast_device.evtdev == dev); 140 } 141 142 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 143 { 144 int ret = -ENODEV; 145 146 if (tick_is_broadcast_device(dev)) { 147 raw_spin_lock(&tick_broadcast_lock); 148 ret = __clockevents_update_freq(dev, freq); 149 raw_spin_unlock(&tick_broadcast_lock); 150 } 151 return ret; 152 } 153 154 155 static void err_broadcast(const struct cpumask *mask) 156 { 157 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 158 } 159 160 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 161 { 162 if (!dev->broadcast) 163 dev->broadcast = tick_broadcast; 164 if (!dev->broadcast) { 165 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 166 dev->name); 167 dev->broadcast = err_broadcast; 168 } 169 } 170 171 /* 172 * Check, if the device is dysfunctional and a placeholder, which 173 * needs to be handled by the broadcast device. 174 */ 175 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 176 { 177 struct clock_event_device *bc = tick_broadcast_device.evtdev; 178 unsigned long flags; 179 int ret = 0; 180 181 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 182 183 /* 184 * Devices might be registered with both periodic and oneshot 185 * mode disabled. This signals, that the device needs to be 186 * operated from the broadcast device and is a placeholder for 187 * the cpu local device. 188 */ 189 if (!tick_device_is_functional(dev)) { 190 dev->event_handler = tick_handle_periodic; 191 tick_device_setup_broadcast_func(dev); 192 cpumask_set_cpu(cpu, tick_broadcast_mask); 193 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 194 tick_broadcast_start_periodic(bc); 195 else 196 tick_broadcast_setup_oneshot(bc); 197 ret = 1; 198 } else { 199 /* 200 * Clear the broadcast bit for this cpu if the 201 * device is not power state affected. 202 */ 203 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 204 cpumask_clear_cpu(cpu, tick_broadcast_mask); 205 else 206 tick_device_setup_broadcast_func(dev); 207 208 /* 209 * Clear the broadcast bit if the CPU is not in 210 * periodic broadcast on state. 211 */ 212 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 213 cpumask_clear_cpu(cpu, tick_broadcast_mask); 214 215 switch (tick_broadcast_device.mode) { 216 case TICKDEV_MODE_ONESHOT: 217 /* 218 * If the system is in oneshot mode we can 219 * unconditionally clear the oneshot mask bit, 220 * because the CPU is running and therefore 221 * not in an idle state which causes the power 222 * state affected device to stop. Let the 223 * caller initialize the device. 224 */ 225 tick_broadcast_clear_oneshot(cpu); 226 ret = 0; 227 break; 228 229 case TICKDEV_MODE_PERIODIC: 230 /* 231 * If the system is in periodic mode, check 232 * whether the broadcast device can be 233 * switched off now. 234 */ 235 if (cpumask_empty(tick_broadcast_mask) && bc) 236 clockevents_shutdown(bc); 237 /* 238 * If we kept the cpu in the broadcast mask, 239 * tell the caller to leave the per cpu device 240 * in shutdown state. The periodic interrupt 241 * is delivered by the broadcast device, if 242 * the broadcast device exists and is not 243 * hrtimer based. 244 */ 245 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 246 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 247 break; 248 default: 249 break; 250 } 251 } 252 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 253 return ret; 254 } 255 256 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST 257 int tick_receive_broadcast(void) 258 { 259 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 260 struct clock_event_device *evt = td->evtdev; 261 262 if (!evt) 263 return -ENODEV; 264 265 if (!evt->event_handler) 266 return -EINVAL; 267 268 evt->event_handler(evt); 269 return 0; 270 } 271 #endif 272 273 /* 274 * Broadcast the event to the cpus, which are set in the mask (mangled). 275 */ 276 static bool tick_do_broadcast(struct cpumask *mask) 277 { 278 int cpu = smp_processor_id(); 279 struct tick_device *td; 280 bool local = false; 281 282 /* 283 * Check, if the current cpu is in the mask 284 */ 285 if (cpumask_test_cpu(cpu, mask)) { 286 struct clock_event_device *bc = tick_broadcast_device.evtdev; 287 288 cpumask_clear_cpu(cpu, mask); 289 /* 290 * We only run the local handler, if the broadcast 291 * device is not hrtimer based. Otherwise we run into 292 * a hrtimer recursion. 293 * 294 * local timer_interrupt() 295 * local_handler() 296 * expire_hrtimers() 297 * bc_handler() 298 * local_handler() 299 * expire_hrtimers() 300 */ 301 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); 302 } 303 304 if (!cpumask_empty(mask)) { 305 /* 306 * It might be necessary to actually check whether the devices 307 * have different broadcast functions. For now, just use the 308 * one of the first device. This works as long as we have this 309 * misfeature only on x86 (lapic) 310 */ 311 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 312 td->evtdev->broadcast(mask); 313 } 314 return local; 315 } 316 317 /* 318 * Periodic broadcast: 319 * - invoke the broadcast handlers 320 */ 321 static bool tick_do_periodic_broadcast(void) 322 { 323 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 324 return tick_do_broadcast(tmpmask); 325 } 326 327 /* 328 * Event handler for periodic broadcast ticks 329 */ 330 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 331 { 332 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 333 bool bc_local; 334 335 raw_spin_lock(&tick_broadcast_lock); 336 337 /* Handle spurious interrupts gracefully */ 338 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { 339 raw_spin_unlock(&tick_broadcast_lock); 340 return; 341 } 342 343 bc_local = tick_do_periodic_broadcast(); 344 345 if (clockevent_state_oneshot(dev)) { 346 ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); 347 348 clockevents_program_event(dev, next, true); 349 } 350 raw_spin_unlock(&tick_broadcast_lock); 351 352 /* 353 * We run the handler of the local cpu after dropping 354 * tick_broadcast_lock because the handler might deadlock when 355 * trying to switch to oneshot mode. 356 */ 357 if (bc_local) 358 td->evtdev->event_handler(td->evtdev); 359 } 360 361 /** 362 * tick_broadcast_control - Enable/disable or force broadcast mode 363 * @mode: The selected broadcast mode 364 * 365 * Called when the system enters a state where affected tick devices 366 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 367 */ 368 void tick_broadcast_control(enum tick_broadcast_mode mode) 369 { 370 struct clock_event_device *bc, *dev; 371 struct tick_device *td; 372 int cpu, bc_stopped; 373 unsigned long flags; 374 375 /* Protects also the local clockevent device. */ 376 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 377 td = this_cpu_ptr(&tick_cpu_device); 378 dev = td->evtdev; 379 380 /* 381 * Is the device not affected by the powerstate ? 382 */ 383 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 384 goto out; 385 386 if (!tick_device_is_functional(dev)) 387 goto out; 388 389 cpu = smp_processor_id(); 390 bc = tick_broadcast_device.evtdev; 391 bc_stopped = cpumask_empty(tick_broadcast_mask); 392 393 switch (mode) { 394 case TICK_BROADCAST_FORCE: 395 tick_broadcast_forced = 1; 396 fallthrough; 397 case TICK_BROADCAST_ON: 398 cpumask_set_cpu(cpu, tick_broadcast_on); 399 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 400 /* 401 * Only shutdown the cpu local device, if: 402 * 403 * - the broadcast device exists 404 * - the broadcast device is not a hrtimer based one 405 * - the broadcast device is in periodic mode to 406 * avoid a hiccup during switch to oneshot mode 407 */ 408 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && 409 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 410 clockevents_shutdown(dev); 411 } 412 break; 413 414 case TICK_BROADCAST_OFF: 415 if (tick_broadcast_forced) 416 break; 417 cpumask_clear_cpu(cpu, tick_broadcast_on); 418 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 419 if (tick_broadcast_device.mode == 420 TICKDEV_MODE_PERIODIC) 421 tick_setup_periodic(dev, 0); 422 } 423 break; 424 } 425 426 if (bc) { 427 if (cpumask_empty(tick_broadcast_mask)) { 428 if (!bc_stopped) 429 clockevents_shutdown(bc); 430 } else if (bc_stopped) { 431 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 432 tick_broadcast_start_periodic(bc); 433 else 434 tick_broadcast_setup_oneshot(bc); 435 } 436 } 437 out: 438 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 439 } 440 EXPORT_SYMBOL_GPL(tick_broadcast_control); 441 442 /* 443 * Set the periodic handler depending on broadcast on/off 444 */ 445 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 446 { 447 if (!broadcast) 448 dev->event_handler = tick_handle_periodic; 449 else 450 dev->event_handler = tick_handle_periodic_broadcast; 451 } 452 453 #ifdef CONFIG_HOTPLUG_CPU 454 static void tick_shutdown_broadcast(void) 455 { 456 struct clock_event_device *bc = tick_broadcast_device.evtdev; 457 458 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 459 if (bc && cpumask_empty(tick_broadcast_mask)) 460 clockevents_shutdown(bc); 461 } 462 } 463 464 /* 465 * Remove a CPU from broadcasting 466 */ 467 void tick_broadcast_offline(unsigned int cpu) 468 { 469 raw_spin_lock(&tick_broadcast_lock); 470 cpumask_clear_cpu(cpu, tick_broadcast_mask); 471 cpumask_clear_cpu(cpu, tick_broadcast_on); 472 tick_broadcast_oneshot_offline(cpu); 473 tick_shutdown_broadcast(); 474 raw_spin_unlock(&tick_broadcast_lock); 475 } 476 477 #endif 478 479 void tick_suspend_broadcast(void) 480 { 481 struct clock_event_device *bc; 482 unsigned long flags; 483 484 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 485 486 bc = tick_broadcast_device.evtdev; 487 if (bc) 488 clockevents_shutdown(bc); 489 490 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 491 } 492 493 /* 494 * This is called from tick_resume_local() on a resuming CPU. That's 495 * called from the core resume function, tick_unfreeze() and the magic XEN 496 * resume hackery. 497 * 498 * In none of these cases the broadcast device mode can change and the 499 * bit of the resuming CPU in the broadcast mask is safe as well. 500 */ 501 bool tick_resume_check_broadcast(void) 502 { 503 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 504 return false; 505 else 506 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 507 } 508 509 void tick_resume_broadcast(void) 510 { 511 struct clock_event_device *bc; 512 unsigned long flags; 513 514 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 515 516 bc = tick_broadcast_device.evtdev; 517 518 if (bc) { 519 clockevents_tick_resume(bc); 520 521 switch (tick_broadcast_device.mode) { 522 case TICKDEV_MODE_PERIODIC: 523 if (!cpumask_empty(tick_broadcast_mask)) 524 tick_broadcast_start_periodic(bc); 525 break; 526 case TICKDEV_MODE_ONESHOT: 527 if (!cpumask_empty(tick_broadcast_mask)) 528 tick_resume_broadcast_oneshot(bc); 529 break; 530 } 531 } 532 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 533 } 534 535 #ifdef CONFIG_TICK_ONESHOT 536 537 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; 538 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; 539 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; 540 541 /* 542 * Exposed for debugging: see timer_list.c 543 */ 544 struct cpumask *tick_get_broadcast_oneshot_mask(void) 545 { 546 return tick_broadcast_oneshot_mask; 547 } 548 549 /* 550 * Called before going idle with interrupts disabled. Checks whether a 551 * broadcast event from the other core is about to happen. We detected 552 * that in tick_broadcast_oneshot_control(). The callsite can use this 553 * to avoid a deep idle transition as we are about to get the 554 * broadcast IPI right away. 555 */ 556 int tick_check_broadcast_expired(void) 557 { 558 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 559 } 560 561 /* 562 * Set broadcast interrupt affinity 563 */ 564 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 565 const struct cpumask *cpumask) 566 { 567 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 568 return; 569 570 if (cpumask_equal(bc->cpumask, cpumask)) 571 return; 572 573 bc->cpumask = cpumask; 574 irq_set_affinity(bc->irq, bc->cpumask); 575 } 576 577 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 578 ktime_t expires) 579 { 580 if (!clockevent_state_oneshot(bc)) 581 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 582 583 clockevents_program_event(bc, expires, 1); 584 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 585 } 586 587 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 588 { 589 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 590 } 591 592 /* 593 * Called from irq_enter() when idle was interrupted to reenable the 594 * per cpu device. 595 */ 596 void tick_check_oneshot_broadcast_this_cpu(void) 597 { 598 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 599 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 600 601 /* 602 * We might be in the middle of switching over from 603 * periodic to oneshot. If the CPU has not yet 604 * switched over, leave the device alone. 605 */ 606 if (td->mode == TICKDEV_MODE_ONESHOT) { 607 clockevents_switch_state(td->evtdev, 608 CLOCK_EVT_STATE_ONESHOT); 609 } 610 } 611 } 612 613 /* 614 * Handle oneshot mode broadcasting 615 */ 616 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 617 { 618 struct tick_device *td; 619 ktime_t now, next_event; 620 int cpu, next_cpu = 0; 621 bool bc_local; 622 623 raw_spin_lock(&tick_broadcast_lock); 624 dev->next_event = KTIME_MAX; 625 next_event = KTIME_MAX; 626 cpumask_clear(tmpmask); 627 now = ktime_get(); 628 /* Find all expired events */ 629 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 630 /* 631 * Required for !SMP because for_each_cpu() reports 632 * unconditionally CPU0 as set on UP kernels. 633 */ 634 if (!IS_ENABLED(CONFIG_SMP) && 635 cpumask_empty(tick_broadcast_oneshot_mask)) 636 break; 637 638 td = &per_cpu(tick_cpu_device, cpu); 639 if (td->evtdev->next_event <= now) { 640 cpumask_set_cpu(cpu, tmpmask); 641 /* 642 * Mark the remote cpu in the pending mask, so 643 * it can avoid reprogramming the cpu local 644 * timer in tick_broadcast_oneshot_control(). 645 */ 646 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 647 } else if (td->evtdev->next_event < next_event) { 648 next_event = td->evtdev->next_event; 649 next_cpu = cpu; 650 } 651 } 652 653 /* 654 * Remove the current cpu from the pending mask. The event is 655 * delivered immediately in tick_do_broadcast() ! 656 */ 657 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 658 659 /* Take care of enforced broadcast requests */ 660 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 661 cpumask_clear(tick_broadcast_force_mask); 662 663 /* 664 * Sanity check. Catch the case where we try to broadcast to 665 * offline cpus. 666 */ 667 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 668 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 669 670 /* 671 * Wakeup the cpus which have an expired event. 672 */ 673 bc_local = tick_do_broadcast(tmpmask); 674 675 /* 676 * Two reasons for reprogram: 677 * 678 * - The global event did not expire any CPU local 679 * events. This happens in dyntick mode, as the maximum PIT 680 * delta is quite small. 681 * 682 * - There are pending events on sleeping CPUs which were not 683 * in the event mask 684 */ 685 if (next_event != KTIME_MAX) 686 tick_broadcast_set_event(dev, next_cpu, next_event); 687 688 raw_spin_unlock(&tick_broadcast_lock); 689 690 if (bc_local) { 691 td = this_cpu_ptr(&tick_cpu_device); 692 td->evtdev->event_handler(td->evtdev); 693 } 694 } 695 696 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 697 { 698 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 699 return 0; 700 if (bc->next_event == KTIME_MAX) 701 return 0; 702 return bc->bound_on == cpu ? -EBUSY : 0; 703 } 704 705 static void broadcast_shutdown_local(struct clock_event_device *bc, 706 struct clock_event_device *dev) 707 { 708 /* 709 * For hrtimer based broadcasting we cannot shutdown the cpu 710 * local device if our own event is the first one to expire or 711 * if we own the broadcast timer. 712 */ 713 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 714 if (broadcast_needs_cpu(bc, smp_processor_id())) 715 return; 716 if (dev->next_event < bc->next_event) 717 return; 718 } 719 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 720 } 721 722 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 723 { 724 struct clock_event_device *bc, *dev; 725 int cpu, ret = 0; 726 ktime_t now; 727 728 /* 729 * If there is no broadcast device, tell the caller not to go 730 * into deep idle. 731 */ 732 if (!tick_broadcast_device.evtdev) 733 return -EBUSY; 734 735 dev = this_cpu_ptr(&tick_cpu_device)->evtdev; 736 737 raw_spin_lock(&tick_broadcast_lock); 738 bc = tick_broadcast_device.evtdev; 739 cpu = smp_processor_id(); 740 741 if (state == TICK_BROADCAST_ENTER) { 742 /* 743 * If the current CPU owns the hrtimer broadcast 744 * mechanism, it cannot go deep idle and we do not add 745 * the CPU to the broadcast mask. We don't have to go 746 * through the EXIT path as the local timer is not 747 * shutdown. 748 */ 749 ret = broadcast_needs_cpu(bc, cpu); 750 if (ret) 751 goto out; 752 753 /* 754 * If the broadcast device is in periodic mode, we 755 * return. 756 */ 757 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 758 /* If it is a hrtimer based broadcast, return busy */ 759 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) 760 ret = -EBUSY; 761 goto out; 762 } 763 764 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 765 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 766 767 /* Conditionally shut down the local timer. */ 768 broadcast_shutdown_local(bc, dev); 769 770 /* 771 * We only reprogram the broadcast timer if we 772 * did not mark ourself in the force mask and 773 * if the cpu local event is earlier than the 774 * broadcast event. If the current CPU is in 775 * the force mask, then we are going to be 776 * woken by the IPI right away; we return 777 * busy, so the CPU does not try to go deep 778 * idle. 779 */ 780 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { 781 ret = -EBUSY; 782 } else if (dev->next_event < bc->next_event) { 783 tick_broadcast_set_event(bc, cpu, dev->next_event); 784 /* 785 * In case of hrtimer broadcasts the 786 * programming might have moved the 787 * timer to this cpu. If yes, remove 788 * us from the broadcast mask and 789 * return busy. 790 */ 791 ret = broadcast_needs_cpu(bc, cpu); 792 if (ret) { 793 cpumask_clear_cpu(cpu, 794 tick_broadcast_oneshot_mask); 795 } 796 } 797 } 798 } else { 799 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 800 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 801 /* 802 * The cpu which was handling the broadcast 803 * timer marked this cpu in the broadcast 804 * pending mask and fired the broadcast 805 * IPI. So we are going to handle the expired 806 * event anyway via the broadcast IPI 807 * handler. No need to reprogram the timer 808 * with an already expired event. 809 */ 810 if (cpumask_test_and_clear_cpu(cpu, 811 tick_broadcast_pending_mask)) 812 goto out; 813 814 /* 815 * Bail out if there is no next event. 816 */ 817 if (dev->next_event == KTIME_MAX) 818 goto out; 819 /* 820 * If the pending bit is not set, then we are 821 * either the CPU handling the broadcast 822 * interrupt or we got woken by something else. 823 * 824 * We are no longer in the broadcast mask, so 825 * if the cpu local expiry time is already 826 * reached, we would reprogram the cpu local 827 * timer with an already expired event. 828 * 829 * This can lead to a ping-pong when we return 830 * to idle and therefore rearm the broadcast 831 * timer before the cpu local timer was able 832 * to fire. This happens because the forced 833 * reprogramming makes sure that the event 834 * will happen in the future and depending on 835 * the min_delta setting this might be far 836 * enough out that the ping-pong starts. 837 * 838 * If the cpu local next_event has expired 839 * then we know that the broadcast timer 840 * next_event has expired as well and 841 * broadcast is about to be handled. So we 842 * avoid reprogramming and enforce that the 843 * broadcast handler, which did not run yet, 844 * will invoke the cpu local handler. 845 * 846 * We cannot call the handler directly from 847 * here, because we might be in a NOHZ phase 848 * and we did not go through the irq_enter() 849 * nohz fixups. 850 */ 851 now = ktime_get(); 852 if (dev->next_event <= now) { 853 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 854 goto out; 855 } 856 /* 857 * We got woken by something else. Reprogram 858 * the cpu local timer device. 859 */ 860 tick_program_event(dev->next_event, 1); 861 } 862 } 863 out: 864 raw_spin_unlock(&tick_broadcast_lock); 865 return ret; 866 } 867 868 /* 869 * Reset the one shot broadcast for a cpu 870 * 871 * Called with tick_broadcast_lock held 872 */ 873 static void tick_broadcast_clear_oneshot(int cpu) 874 { 875 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 876 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 877 } 878 879 static void tick_broadcast_init_next_event(struct cpumask *mask, 880 ktime_t expires) 881 { 882 struct tick_device *td; 883 int cpu; 884 885 for_each_cpu(cpu, mask) { 886 td = &per_cpu(tick_cpu_device, cpu); 887 if (td->evtdev) 888 td->evtdev->next_event = expires; 889 } 890 } 891 892 static inline ktime_t tick_get_next_period(void) 893 { 894 ktime_t next; 895 896 /* 897 * Protect against concurrent updates (store /load tearing on 898 * 32bit). It does not matter if the time is already in the 899 * past. The broadcast device which is about to be programmed will 900 * fire in any case. 901 */ 902 raw_spin_lock(&jiffies_lock); 903 next = tick_next_period; 904 raw_spin_unlock(&jiffies_lock); 905 return next; 906 } 907 908 /** 909 * tick_broadcast_setup_oneshot - setup the broadcast device 910 */ 911 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc) 912 { 913 int cpu = smp_processor_id(); 914 915 if (!bc) 916 return; 917 918 /* Set it up only once ! */ 919 if (bc->event_handler != tick_handle_oneshot_broadcast) { 920 int was_periodic = clockevent_state_periodic(bc); 921 922 bc->event_handler = tick_handle_oneshot_broadcast; 923 924 /* 925 * We must be careful here. There might be other CPUs 926 * waiting for periodic broadcast. We need to set the 927 * oneshot_mask bits for those and program the 928 * broadcast device to fire. 929 */ 930 cpumask_copy(tmpmask, tick_broadcast_mask); 931 cpumask_clear_cpu(cpu, tmpmask); 932 cpumask_or(tick_broadcast_oneshot_mask, 933 tick_broadcast_oneshot_mask, tmpmask); 934 935 if (was_periodic && !cpumask_empty(tmpmask)) { 936 ktime_t nextevt = tick_get_next_period(); 937 938 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 939 tick_broadcast_init_next_event(tmpmask, nextevt); 940 tick_broadcast_set_event(bc, cpu, nextevt); 941 } else 942 bc->next_event = KTIME_MAX; 943 } else { 944 /* 945 * The first cpu which switches to oneshot mode sets 946 * the bit for all other cpus which are in the general 947 * (periodic) broadcast mask. So the bit is set and 948 * would prevent the first broadcast enter after this 949 * to program the bc device. 950 */ 951 tick_broadcast_clear_oneshot(cpu); 952 } 953 } 954 955 /* 956 * Select oneshot operating mode for the broadcast device 957 */ 958 void tick_broadcast_switch_to_oneshot(void) 959 { 960 struct clock_event_device *bc; 961 unsigned long flags; 962 963 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 964 965 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 966 bc = tick_broadcast_device.evtdev; 967 if (bc) 968 tick_broadcast_setup_oneshot(bc); 969 970 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 971 } 972 973 #ifdef CONFIG_HOTPLUG_CPU 974 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 975 { 976 struct clock_event_device *bc; 977 unsigned long flags; 978 979 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 980 bc = tick_broadcast_device.evtdev; 981 982 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 983 /* This moves the broadcast assignment to this CPU: */ 984 clockevents_program_event(bc, bc->next_event, 1); 985 } 986 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 987 } 988 989 /* 990 * Remove a dying CPU from broadcasting 991 */ 992 static void tick_broadcast_oneshot_offline(unsigned int cpu) 993 { 994 /* 995 * Clear the broadcast masks for the dead cpu, but do not stop 996 * the broadcast device! 997 */ 998 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 999 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 1000 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 1001 } 1002 #endif 1003 1004 /* 1005 * Check, whether the broadcast device is in one shot mode 1006 */ 1007 int tick_broadcast_oneshot_active(void) 1008 { 1009 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 1010 } 1011 1012 /* 1013 * Check whether the broadcast device supports oneshot. 1014 */ 1015 bool tick_broadcast_oneshot_available(void) 1016 { 1017 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1018 1019 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 1020 } 1021 1022 #else 1023 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 1024 { 1025 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1026 1027 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) 1028 return -EBUSY; 1029 1030 return 0; 1031 } 1032 #endif 1033 1034 void __init tick_broadcast_init(void) 1035 { 1036 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 1037 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 1038 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 1039 #ifdef CONFIG_TICK_ONESHOT 1040 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 1041 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 1042 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 1043 #endif 1044 } 1045