1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2018 Linaro Limited 4 * 5 * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 * 7 * The idle injection framework provides a way to force CPUs to enter idle 8 * states for a specified fraction of time over a specified period. 9 * 10 * It relies on the smpboot kthreads feature providing common code for CPU 11 * hotplug and thread [un]parking. 12 * 13 * All of the kthreads used for idle injection are created at init time. 14 * 15 * Next, the users of the idle injection framework provide a cpumask via 16 * its register function. The kthreads will be synchronized with respect to 17 * this cpumask. 18 * 19 * The idle + run duration is specified via separate helpers and that allows 20 * idle injection to be started. 21 * 22 * The idle injection kthreads will call play_idle_precise() with the idle 23 * duration and max allowed latency specified as per the above. 24 * 25 * After all of them have been woken up, a timer is set to start the next idle 26 * injection cycle. 27 * 28 * The timer interrupt handler will wake up the idle injection kthreads for 29 * all of the CPUs in the cpumask provided by the user. 30 * 31 * Idle injection is stopped synchronously and no leftover idle injection 32 * kthread activity after its completion is guaranteed. 33 * 34 * It is up to the user of this framework to provide a lock for higher-level 35 * synchronization to prevent race conditions like starting idle injection 36 * while unregistering from the framework. 37 */ 38 #define pr_fmt(fmt) "ii_dev: " fmt 39 40 #include <linux/cpu.h> 41 #include <linux/hrtimer.h> 42 #include <linux/kthread.h> 43 #include <linux/sched.h> 44 #include <linux/slab.h> 45 #include <linux/smpboot.h> 46 #include <linux/idle_inject.h> 47 48 #include <uapi/linux/sched/types.h> 49 50 /** 51 * struct idle_inject_thread - task on/off switch structure 52 * @tsk: task injecting the idle cycles 53 * @should_run: whether or not to run the task (for the smpboot kthread API) 54 */ 55 struct idle_inject_thread { 56 struct task_struct *tsk; 57 int should_run; 58 }; 59 60 /** 61 * struct idle_inject_device - idle injection data 62 * @timer: idle injection period timer 63 * @idle_duration_us: duration of CPU idle time to inject 64 * @run_duration_us: duration of CPU run time to allow 65 * @latency_us: max allowed latency 66 * @update: Optional callback deciding whether or not to skip idle 67 * injection in the given cycle. 68 * @cpumask: mask of CPUs affected by idle injection 69 * 70 * This structure is used to define per instance idle inject device data. Each 71 * instance has an idle duration, a run duration and mask of CPUs to inject 72 * idle. 73 * 74 * Actual CPU idle time is injected by calling kernel scheduler interface 75 * play_idle_precise(). There is one optional callback that can be registered 76 * by calling idle_inject_register_full(): 77 * 78 * update() - This callback is invoked just before waking up CPUs to inject 79 * idle. If it returns false, CPUs are not woken up to inject idle in the given 80 * cycle. It also allows the caller to readjust the idle and run duration by 81 * calling idle_inject_set_duration() for the next cycle. 82 */ 83 struct idle_inject_device { 84 struct hrtimer timer; 85 unsigned int idle_duration_us; 86 unsigned int run_duration_us; 87 unsigned int latency_us; 88 bool (*update)(void); 89 unsigned long cpumask[]; 90 }; 91 92 static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); 93 static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); 94 95 /** 96 * idle_inject_wakeup - Wake up idle injection threads 97 * @ii_dev: target idle injection device 98 * 99 * Every idle injection task associated with the given idle injection device 100 * and running on an online CPU will be woken up. 101 */ 102 static void idle_inject_wakeup(struct idle_inject_device *ii_dev) 103 { 104 struct idle_inject_thread *iit; 105 unsigned int cpu; 106 107 for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { 108 iit = per_cpu_ptr(&idle_inject_thread, cpu); 109 iit->should_run = 1; 110 wake_up_process(iit->tsk); 111 } 112 } 113 114 /** 115 * idle_inject_timer_fn - idle injection timer function 116 * @timer: idle injection hrtimer 117 * 118 * This function is called when the idle injection timer expires. It wakes up 119 * idle injection tasks associated with the timer and they, in turn, invoke 120 * play_idle_precise() to inject a specified amount of CPU idle time. 121 * 122 * Return: HRTIMER_RESTART. 123 */ 124 static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) 125 { 126 unsigned int duration_us; 127 struct idle_inject_device *ii_dev = 128 container_of(timer, struct idle_inject_device, timer); 129 130 if (!ii_dev->update || (ii_dev->update && ii_dev->update())) 131 idle_inject_wakeup(ii_dev); 132 133 duration_us = READ_ONCE(ii_dev->run_duration_us); 134 duration_us += READ_ONCE(ii_dev->idle_duration_us); 135 136 hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); 137 138 return HRTIMER_RESTART; 139 } 140 141 /** 142 * idle_inject_fn - idle injection work function 143 * @cpu: the CPU owning the task 144 * 145 * This function calls play_idle_precise() to inject a specified amount of CPU 146 * idle time. 147 */ 148 static void idle_inject_fn(unsigned int cpu) 149 { 150 struct idle_inject_device *ii_dev; 151 struct idle_inject_thread *iit; 152 153 ii_dev = per_cpu(idle_inject_device, cpu); 154 iit = per_cpu_ptr(&idle_inject_thread, cpu); 155 156 /* 157 * Let the smpboot main loop know that the task should not run again. 158 */ 159 iit->should_run = 0; 160 161 play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, 162 READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); 163 } 164 165 /** 166 * idle_inject_set_duration - idle and run duration update helper 167 * @ii_dev: idle injection control device structure 168 * @run_duration_us: CPU run time to allow in microseconds 169 * @idle_duration_us: CPU idle time to inject in microseconds 170 */ 171 void idle_inject_set_duration(struct idle_inject_device *ii_dev, 172 unsigned int run_duration_us, 173 unsigned int idle_duration_us) 174 { 175 if (run_duration_us + idle_duration_us) { 176 WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); 177 WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); 178 } 179 if (!run_duration_us) 180 pr_debug("CPU is forced to 100 percent idle\n"); 181 } 182 EXPORT_SYMBOL_NS_GPL(idle_inject_set_duration, IDLE_INJECT); 183 184 /** 185 * idle_inject_get_duration - idle and run duration retrieval helper 186 * @ii_dev: idle injection control device structure 187 * @run_duration_us: memory location to store the current CPU run time 188 * @idle_duration_us: memory location to store the current CPU idle time 189 */ 190 void idle_inject_get_duration(struct idle_inject_device *ii_dev, 191 unsigned int *run_duration_us, 192 unsigned int *idle_duration_us) 193 { 194 *run_duration_us = READ_ONCE(ii_dev->run_duration_us); 195 *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 196 } 197 EXPORT_SYMBOL_NS_GPL(idle_inject_get_duration, IDLE_INJECT); 198 199 /** 200 * idle_inject_set_latency - set the maximum latency allowed 201 * @ii_dev: idle injection control device structure 202 * @latency_us: set the latency requirement for the idle state 203 */ 204 void idle_inject_set_latency(struct idle_inject_device *ii_dev, 205 unsigned int latency_us) 206 { 207 WRITE_ONCE(ii_dev->latency_us, latency_us); 208 } 209 EXPORT_SYMBOL_NS_GPL(idle_inject_set_latency, IDLE_INJECT); 210 211 /** 212 * idle_inject_start - start idle injections 213 * @ii_dev: idle injection control device structure 214 * 215 * The function starts idle injection by first waking up all of the idle 216 * injection kthreads associated with @ii_dev to let them inject CPU idle time 217 * sets up a timer to start the next idle injection period. 218 * 219 * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. 220 */ 221 int idle_inject_start(struct idle_inject_device *ii_dev) 222 { 223 unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 224 unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); 225 226 if (!(idle_duration_us + run_duration_us)) 227 return -EINVAL; 228 229 pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", 230 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 231 232 idle_inject_wakeup(ii_dev); 233 234 hrtimer_start(&ii_dev->timer, 235 ns_to_ktime((idle_duration_us + run_duration_us) * 236 NSEC_PER_USEC), 237 HRTIMER_MODE_REL); 238 239 return 0; 240 } 241 EXPORT_SYMBOL_NS_GPL(idle_inject_start, IDLE_INJECT); 242 243 /** 244 * idle_inject_stop - stops idle injections 245 * @ii_dev: idle injection control device structure 246 * 247 * The function stops idle injection and waits for the threads to finish work. 248 * If CPU idle time is being injected when this function runs, then it will 249 * wait until the end of the cycle. 250 * 251 * When it returns, there is no more idle injection kthread activity. The 252 * kthreads are scheduled out and the periodic timer is off. 253 */ 254 void idle_inject_stop(struct idle_inject_device *ii_dev) 255 { 256 struct idle_inject_thread *iit; 257 unsigned int cpu; 258 259 pr_debug("Stopping idle injection on CPUs '%*pbl'\n", 260 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 261 262 hrtimer_cancel(&ii_dev->timer); 263 264 /* 265 * Stopping idle injection requires all of the idle injection kthreads 266 * associated with the given cpumask to be parked and stay that way, so 267 * prevent CPUs from going online at this point. Any CPUs going online 268 * after the loop below will be covered by clearing the should_run flag 269 * that will cause the smpboot main loop to schedule them out. 270 */ 271 cpu_hotplug_disable(); 272 273 /* 274 * Iterate over all (online + offline) CPUs here in case one of them 275 * goes offline with the should_run flag set so as to prevent its idle 276 * injection kthread from running when the CPU goes online again after 277 * the ii_dev has been freed. 278 */ 279 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 280 iit = per_cpu_ptr(&idle_inject_thread, cpu); 281 iit->should_run = 0; 282 283 wait_task_inactive(iit->tsk, TASK_ANY); 284 } 285 286 cpu_hotplug_enable(); 287 } 288 EXPORT_SYMBOL_NS_GPL(idle_inject_stop, IDLE_INJECT); 289 290 /** 291 * idle_inject_setup - prepare the current task for idle injection 292 * @cpu: not used 293 * 294 * Called once, this function is in charge of setting the current task's 295 * scheduler parameters to make it an RT task. 296 */ 297 static void idle_inject_setup(unsigned int cpu) 298 { 299 sched_set_fifo(current); 300 } 301 302 /** 303 * idle_inject_should_run - function helper for the smpboot API 304 * @cpu: CPU the kthread is running on 305 * 306 * Return: whether or not the thread can run. 307 */ 308 static int idle_inject_should_run(unsigned int cpu) 309 { 310 struct idle_inject_thread *iit = 311 per_cpu_ptr(&idle_inject_thread, cpu); 312 313 return iit->should_run; 314 } 315 316 /** 317 * idle_inject_register_full - initialize idle injection on a set of CPUs 318 * @cpumask: CPUs to be affected by idle injection 319 * @update: This callback is called just before waking up CPUs to inject 320 * idle 321 * 322 * This function creates an idle injection control device structure for the 323 * given set of CPUs and initializes the timer associated with it. This 324 * function also allows to register update()callback. 325 * It does not start any injection cycles. 326 * 327 * Return: NULL if memory allocation fails, idle injection control device 328 * pointer on success. 329 */ 330 331 struct idle_inject_device *idle_inject_register_full(struct cpumask *cpumask, 332 bool (*update)(void)) 333 { 334 struct idle_inject_device *ii_dev; 335 int cpu, cpu_rb; 336 337 ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); 338 if (!ii_dev) 339 return NULL; 340 341 cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); 342 hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 343 ii_dev->timer.function = idle_inject_timer_fn; 344 ii_dev->latency_us = UINT_MAX; 345 ii_dev->update = update; 346 347 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 348 349 if (per_cpu(idle_inject_device, cpu)) { 350 pr_err("cpu%d is already registered\n", cpu); 351 goto out_rollback; 352 } 353 354 per_cpu(idle_inject_device, cpu) = ii_dev; 355 } 356 357 return ii_dev; 358 359 out_rollback: 360 for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { 361 if (cpu == cpu_rb) 362 break; 363 per_cpu(idle_inject_device, cpu_rb) = NULL; 364 } 365 366 kfree(ii_dev); 367 368 return NULL; 369 } 370 EXPORT_SYMBOL_NS_GPL(idle_inject_register_full, IDLE_INJECT); 371 372 /** 373 * idle_inject_register - initialize idle injection on a set of CPUs 374 * @cpumask: CPUs to be affected by idle injection 375 * 376 * This function creates an idle injection control device structure for the 377 * given set of CPUs and initializes the timer associated with it. It does not 378 * start any injection cycles. 379 * 380 * Return: NULL if memory allocation fails, idle injection control device 381 * pointer on success. 382 */ 383 struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) 384 { 385 return idle_inject_register_full(cpumask, NULL); 386 } 387 EXPORT_SYMBOL_NS_GPL(idle_inject_register, IDLE_INJECT); 388 389 /** 390 * idle_inject_unregister - unregister idle injection control device 391 * @ii_dev: idle injection control device to unregister 392 * 393 * The function stops idle injection for the given control device, 394 * unregisters its kthreads and frees memory allocated when that device was 395 * created. 396 */ 397 void idle_inject_unregister(struct idle_inject_device *ii_dev) 398 { 399 unsigned int cpu; 400 401 idle_inject_stop(ii_dev); 402 403 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) 404 per_cpu(idle_inject_device, cpu) = NULL; 405 406 kfree(ii_dev); 407 } 408 EXPORT_SYMBOL_NS_GPL(idle_inject_unregister, IDLE_INJECT); 409 410 static struct smp_hotplug_thread idle_inject_threads = { 411 .store = &idle_inject_thread.tsk, 412 .setup = idle_inject_setup, 413 .thread_fn = idle_inject_fn, 414 .thread_comm = "idle_inject/%u", 415 .thread_should_run = idle_inject_should_run, 416 }; 417 418 static int __init idle_inject_init(void) 419 { 420 return smpboot_register_percpu_thread(&idle_inject_threads); 421 } 422 early_initcall(idle_inject_init); 423