1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains the base functions to manage periodic tick 4 * related events. 5 * 6 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/module.h> 18 #include <trace/events/power.h> 19 20 #include <asm/irq_regs.h> 21 22 #include "tick-internal.h" 23 24 /* 25 * Tick devices 26 */ 27 DEFINE_PER_CPU(struct tick_device, tick_cpu_device); 28 /* 29 * Tick next event: keeps track of the tick time 30 */ 31 ktime_t tick_next_period; 32 ktime_t tick_period; 33 34 /* 35 * tick_do_timer_cpu is a timer core internal variable which holds the CPU NR 36 * which is responsible for calling do_timer(), i.e. the timekeeping stuff. This 37 * variable has two functions: 38 * 39 * 1) Prevent a thundering herd issue of a gazillion of CPUs trying to grab the 40 * timekeeping lock all at once. Only the CPU which is assigned to do the 41 * update is handling it. 42 * 43 * 2) Hand off the duty in the NOHZ idle case by setting the value to 44 * TICK_DO_TIMER_NONE, i.e. a non existing CPU. So the next cpu which looks 45 * at it will take over and keep the time keeping alive. The handover 46 * procedure also covers cpu hotplug. 47 */ 48 int tick_do_timer_cpu __read_mostly = TICK_DO_TIMER_BOOT; 49 50 /* 51 * Debugging: see timer_list.c 52 */ 53 struct tick_device *tick_get_device(int cpu) 54 { 55 return &per_cpu(tick_cpu_device, cpu); 56 } 57 58 /** 59 * tick_is_oneshot_available - check for a oneshot capable event device 60 */ 61 int tick_is_oneshot_available(void) 62 { 63 struct clock_event_device *dev = __this_cpu_read(tick_cpu_device.evtdev); 64 65 if (!dev || !(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 66 return 0; 67 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 68 return 1; 69 return tick_broadcast_oneshot_available(); 70 } 71 72 /* 73 * Periodic tick 74 */ 75 static void tick_periodic(int cpu) 76 { 77 if (tick_do_timer_cpu == cpu) { 78 write_seqlock(&jiffies_lock); 79 80 /* Keep track of the next tick event */ 81 tick_next_period = ktime_add(tick_next_period, tick_period); 82 83 do_timer(1); 84 write_sequnlock(&jiffies_lock); 85 update_wall_time(); 86 } 87 88 update_process_times(user_mode(get_irq_regs())); 89 profile_tick(CPU_PROFILING); 90 } 91 92 /* 93 * Event handler for periodic ticks 94 */ 95 void tick_handle_periodic(struct clock_event_device *dev) 96 { 97 int cpu = smp_processor_id(); 98 ktime_t next = dev->next_event; 99 100 tick_periodic(cpu); 101 102 #if defined(CONFIG_HIGH_RES_TIMERS) || defined(CONFIG_NO_HZ_COMMON) 103 /* 104 * The cpu might have transitioned to HIGHRES or NOHZ mode via 105 * update_process_times() -> run_local_timers() -> 106 * hrtimer_run_queues(). 107 */ 108 if (dev->event_handler != tick_handle_periodic) 109 return; 110 #endif 111 112 if (!clockevent_state_oneshot(dev)) 113 return; 114 for (;;) { 115 /* 116 * Setup the next period for devices, which do not have 117 * periodic mode: 118 */ 119 next = ktime_add(next, tick_period); 120 121 if (!clockevents_program_event(dev, next, false)) 122 return; 123 /* 124 * Have to be careful here. If we're in oneshot mode, 125 * before we call tick_periodic() in a loop, we need 126 * to be sure we're using a real hardware clocksource. 127 * Otherwise we could get trapped in an infinite 128 * loop, as the tick_periodic() increments jiffies, 129 * which then will increment time, possibly causing 130 * the loop to trigger again and again. 131 */ 132 if (timekeeping_valid_for_hres()) 133 tick_periodic(cpu); 134 } 135 } 136 137 /* 138 * Setup the device for a periodic tick 139 */ 140 void tick_setup_periodic(struct clock_event_device *dev, int broadcast) 141 { 142 tick_set_periodic_handler(dev, broadcast); 143 144 /* Broadcast setup ? */ 145 if (!tick_device_is_functional(dev)) 146 return; 147 148 if ((dev->features & CLOCK_EVT_FEAT_PERIODIC) && 149 !tick_broadcast_oneshot_active()) { 150 clockevents_switch_state(dev, CLOCK_EVT_STATE_PERIODIC); 151 } else { 152 unsigned long seq; 153 ktime_t next; 154 155 do { 156 seq = read_seqbegin(&jiffies_lock); 157 next = tick_next_period; 158 } while (read_seqretry(&jiffies_lock, seq)); 159 160 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 161 162 for (;;) { 163 if (!clockevents_program_event(dev, next, false)) 164 return; 165 next = ktime_add(next, tick_period); 166 } 167 } 168 } 169 170 /* 171 * Setup the tick device 172 */ 173 static void tick_setup_device(struct tick_device *td, 174 struct clock_event_device *newdev, int cpu, 175 const struct cpumask *cpumask) 176 { 177 void (*handler)(struct clock_event_device *) = NULL; 178 ktime_t next_event = 0; 179 180 /* 181 * First device setup ? 182 */ 183 if (!td->evtdev) { 184 /* 185 * If no cpu took the do_timer update, assign it to 186 * this cpu: 187 */ 188 if (tick_do_timer_cpu == TICK_DO_TIMER_BOOT) { 189 if (!tick_nohz_full_cpu(cpu)) 190 tick_do_timer_cpu = cpu; 191 else 192 tick_do_timer_cpu = TICK_DO_TIMER_NONE; 193 tick_next_period = ktime_get(); 194 tick_period = NSEC_PER_SEC / HZ; 195 } 196 197 /* 198 * Startup in periodic mode first. 199 */ 200 td->mode = TICKDEV_MODE_PERIODIC; 201 } else { 202 handler = td->evtdev->event_handler; 203 next_event = td->evtdev->next_event; 204 td->evtdev->event_handler = clockevents_handle_noop; 205 } 206 207 td->evtdev = newdev; 208 209 /* 210 * When the device is not per cpu, pin the interrupt to the 211 * current cpu: 212 */ 213 if (!cpumask_equal(newdev->cpumask, cpumask)) 214 irq_set_affinity(newdev->irq, cpumask); 215 216 /* 217 * When global broadcasting is active, check if the current 218 * device is registered as a placeholder for broadcast mode. 219 * This allows us to handle this x86 misfeature in a generic 220 * way. This function also returns !=0 when we keep the 221 * current active broadcast state for this CPU. 222 */ 223 if (tick_device_uses_broadcast(newdev, cpu)) 224 return; 225 226 if (td->mode == TICKDEV_MODE_PERIODIC) 227 tick_setup_periodic(newdev, 0); 228 else 229 tick_setup_oneshot(newdev, handler, next_event); 230 } 231 232 void tick_install_replacement(struct clock_event_device *newdev) 233 { 234 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 235 int cpu = smp_processor_id(); 236 237 clockevents_exchange_device(td->evtdev, newdev); 238 tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); 239 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 240 tick_oneshot_notify(); 241 } 242 243 static bool tick_check_percpu(struct clock_event_device *curdev, 244 struct clock_event_device *newdev, int cpu) 245 { 246 if (!cpumask_test_cpu(cpu, newdev->cpumask)) 247 return false; 248 if (cpumask_equal(newdev->cpumask, cpumask_of(cpu))) 249 return true; 250 /* Check if irq affinity can be set */ 251 if (newdev->irq >= 0 && !irq_can_set_affinity(newdev->irq)) 252 return false; 253 /* Prefer an existing cpu local device */ 254 if (curdev && cpumask_equal(curdev->cpumask, cpumask_of(cpu))) 255 return false; 256 return true; 257 } 258 259 static bool tick_check_preferred(struct clock_event_device *curdev, 260 struct clock_event_device *newdev) 261 { 262 /* Prefer oneshot capable device */ 263 if (!(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) { 264 if (curdev && (curdev->features & CLOCK_EVT_FEAT_ONESHOT)) 265 return false; 266 if (tick_oneshot_mode_active()) 267 return false; 268 } 269 270 /* 271 * Use the higher rated one, but prefer a CPU local device with a lower 272 * rating than a non-CPU local device 273 */ 274 return !curdev || 275 newdev->rating > curdev->rating || 276 !cpumask_equal(curdev->cpumask, newdev->cpumask); 277 } 278 279 /* 280 * Check whether the new device is a better fit than curdev. curdev 281 * can be NULL ! 282 */ 283 bool tick_check_replacement(struct clock_event_device *curdev, 284 struct clock_event_device *newdev) 285 { 286 if (!tick_check_percpu(curdev, newdev, smp_processor_id())) 287 return false; 288 289 return tick_check_preferred(curdev, newdev); 290 } 291 292 /* 293 * Check, if the new registered device should be used. Called with 294 * clockevents_lock held and interrupts disabled. 295 */ 296 void tick_check_new_device(struct clock_event_device *newdev) 297 { 298 struct clock_event_device *curdev; 299 struct tick_device *td; 300 int cpu; 301 302 cpu = smp_processor_id(); 303 td = &per_cpu(tick_cpu_device, cpu); 304 curdev = td->evtdev; 305 306 /* cpu local device ? */ 307 if (!tick_check_percpu(curdev, newdev, cpu)) 308 goto out_bc; 309 310 /* Preference decision */ 311 if (!tick_check_preferred(curdev, newdev)) 312 goto out_bc; 313 314 if (!try_module_get(newdev->owner)) 315 return; 316 317 /* 318 * Replace the eventually existing device by the new 319 * device. If the current device is the broadcast device, do 320 * not give it back to the clockevents layer ! 321 */ 322 if (tick_is_broadcast_device(curdev)) { 323 clockevents_shutdown(curdev); 324 curdev = NULL; 325 } 326 clockevents_exchange_device(curdev, newdev); 327 tick_setup_device(td, newdev, cpu, cpumask_of(cpu)); 328 if (newdev->features & CLOCK_EVT_FEAT_ONESHOT) 329 tick_oneshot_notify(); 330 return; 331 332 out_bc: 333 /* 334 * Can the new device be used as a broadcast device ? 335 */ 336 tick_install_broadcast_device(newdev); 337 } 338 339 /** 340 * tick_broadcast_oneshot_control - Enter/exit broadcast oneshot mode 341 * @state: The target state (enter/exit) 342 * 343 * The system enters/leaves a state, where affected devices might stop 344 * Returns 0 on success, -EBUSY if the cpu is used to broadcast wakeups. 345 * 346 * Called with interrupts disabled, so clockevents_lock is not 347 * required here because the local clock event device cannot go away 348 * under us. 349 */ 350 int tick_broadcast_oneshot_control(enum tick_broadcast_state state) 351 { 352 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 353 354 if (!(td->evtdev->features & CLOCK_EVT_FEAT_C3STOP)) 355 return 0; 356 357 return __tick_broadcast_oneshot_control(state); 358 } 359 EXPORT_SYMBOL_GPL(tick_broadcast_oneshot_control); 360 361 #ifdef CONFIG_HOTPLUG_CPU 362 /* 363 * Transfer the do_timer job away from a dying cpu. 364 * 365 * Called with interrupts disabled. Not locking required. If 366 * tick_do_timer_cpu is owned by this cpu, nothing can change it. 367 */ 368 void tick_handover_do_timer(void) 369 { 370 if (tick_do_timer_cpu == smp_processor_id()) { 371 int cpu = cpumask_first(cpu_online_mask); 372 373 tick_do_timer_cpu = (cpu < nr_cpu_ids) ? cpu : 374 TICK_DO_TIMER_NONE; 375 } 376 } 377 378 /* 379 * Shutdown an event device on a given cpu: 380 * 381 * This is called on a life CPU, when a CPU is dead. So we cannot 382 * access the hardware device itself. 383 * We just set the mode and remove it from the lists. 384 */ 385 void tick_shutdown(unsigned int cpu) 386 { 387 struct tick_device *td = &per_cpu(tick_cpu_device, cpu); 388 struct clock_event_device *dev = td->evtdev; 389 390 td->mode = TICKDEV_MODE_PERIODIC; 391 if (dev) { 392 /* 393 * Prevent that the clock events layer tries to call 394 * the set mode function! 395 */ 396 clockevent_set_state(dev, CLOCK_EVT_STATE_DETACHED); 397 clockevents_exchange_device(dev, NULL); 398 dev->event_handler = clockevents_handle_noop; 399 td->evtdev = NULL; 400 } 401 } 402 #endif 403 404 /** 405 * tick_suspend_local - Suspend the local tick device 406 * 407 * Called from the local cpu for freeze with interrupts disabled. 408 * 409 * No locks required. Nothing can change the per cpu device. 410 */ 411 void tick_suspend_local(void) 412 { 413 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 414 415 clockevents_shutdown(td->evtdev); 416 } 417 418 /** 419 * tick_resume_local - Resume the local tick device 420 * 421 * Called from the local CPU for unfreeze or XEN resume magic. 422 * 423 * No locks required. Nothing can change the per cpu device. 424 */ 425 void tick_resume_local(void) 426 { 427 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 428 bool broadcast = tick_resume_check_broadcast(); 429 430 clockevents_tick_resume(td->evtdev); 431 if (!broadcast) { 432 if (td->mode == TICKDEV_MODE_PERIODIC) 433 tick_setup_periodic(td->evtdev, 0); 434 else 435 tick_resume_oneshot(); 436 } 437 } 438 439 /** 440 * tick_suspend - Suspend the tick and the broadcast device 441 * 442 * Called from syscore_suspend() via timekeeping_suspend with only one 443 * CPU online and interrupts disabled or from tick_unfreeze() under 444 * tick_freeze_lock. 445 * 446 * No locks required. Nothing can change the per cpu device. 447 */ 448 void tick_suspend(void) 449 { 450 tick_suspend_local(); 451 tick_suspend_broadcast(); 452 } 453 454 /** 455 * tick_resume - Resume the tick and the broadcast device 456 * 457 * Called from syscore_resume() via timekeeping_resume with only one 458 * CPU online and interrupts disabled. 459 * 460 * No locks required. Nothing can change the per cpu device. 461 */ 462 void tick_resume(void) 463 { 464 tick_resume_broadcast(); 465 tick_resume_local(); 466 } 467 468 #ifdef CONFIG_SUSPEND 469 static DEFINE_RAW_SPINLOCK(tick_freeze_lock); 470 static unsigned int tick_freeze_depth; 471 472 /** 473 * tick_freeze - Suspend the local tick and (possibly) timekeeping. 474 * 475 * Check if this is the last online CPU executing the function and if so, 476 * suspend timekeeping. Otherwise suspend the local tick. 477 * 478 * Call with interrupts disabled. Must be balanced with %tick_unfreeze(). 479 * Interrupts must not be enabled before the subsequent %tick_unfreeze(). 480 */ 481 void tick_freeze(void) 482 { 483 raw_spin_lock(&tick_freeze_lock); 484 485 tick_freeze_depth++; 486 if (tick_freeze_depth == num_online_cpus()) { 487 trace_suspend_resume(TPS("timekeeping_freeze"), 488 smp_processor_id(), true); 489 system_state = SYSTEM_SUSPEND; 490 timekeeping_suspend(); 491 } else { 492 tick_suspend_local(); 493 } 494 495 raw_spin_unlock(&tick_freeze_lock); 496 } 497 498 /** 499 * tick_unfreeze - Resume the local tick and (possibly) timekeeping. 500 * 501 * Check if this is the first CPU executing the function and if so, resume 502 * timekeeping. Otherwise resume the local tick. 503 * 504 * Call with interrupts disabled. Must be balanced with %tick_freeze(). 505 * Interrupts must not be enabled after the preceding %tick_freeze(). 506 */ 507 void tick_unfreeze(void) 508 { 509 raw_spin_lock(&tick_freeze_lock); 510 511 if (tick_freeze_depth == num_online_cpus()) { 512 timekeeping_resume(); 513 system_state = SYSTEM_RUNNING; 514 trace_suspend_resume(TPS("timekeeping_freeze"), 515 smp_processor_id(), false); 516 } else { 517 tick_resume_local(); 518 } 519 520 tick_freeze_depth--; 521 522 raw_spin_unlock(&tick_freeze_lock); 523 } 524 #endif /* CONFIG_SUSPEND */ 525 526 /** 527 * tick_init - initialize the tick control 528 */ 529 void __init tick_init(void) 530 { 531 tick_broadcast_init(); 532 tick_nohz_init(); 533 } 534