1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2018 Linaro Limited 4 * 5 * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 * 7 * The idle injection framework provides a way to force CPUs to enter idle 8 * states for a specified fraction of time over a specified period. 9 * 10 * It relies on the smpboot kthreads feature providing common code for CPU 11 * hotplug and thread [un]parking. 12 * 13 * All of the kthreads used for idle injection are created at init time. 14 * 15 * Next, the users of the idle injection framework provide a cpumask via 16 * its register function. The kthreads will be synchronized with respect to 17 * this cpumask. 18 * 19 * The idle + run duration is specified via separate helpers and that allows 20 * idle injection to be started. 21 * 22 * The idle injection kthreads will call play_idle_precise() with the idle 23 * duration and max allowed latency specified as per the above. 24 * 25 * After all of them have been woken up, a timer is set to start the next idle 26 * injection cycle. 27 * 28 * The timer interrupt handler will wake up the idle injection kthreads for 29 * all of the CPUs in the cpumask provided by the user. 30 * 31 * Idle injection is stopped synchronously and no leftover idle injection 32 * kthread activity after its completion is guaranteed. 33 * 34 * It is up to the user of this framework to provide a lock for higher-level 35 * synchronization to prevent race conditions like starting idle injection 36 * while unregistering from the framework. 37 */ 38 #define pr_fmt(fmt) "ii_dev: " fmt 39 40 #include <linux/cpu.h> 41 #include <linux/hrtimer.h> 42 #include <linux/kthread.h> 43 #include <linux/sched.h> 44 #include <linux/slab.h> 45 #include <linux/smpboot.h> 46 #include <linux/idle_inject.h> 47 48 #include <uapi/linux/sched/types.h> 49 50 /** 51 * struct idle_inject_thread - task on/off switch structure 52 * @tsk: task injecting the idle cycles 53 * @should_run: whether or not to run the task (for the smpboot kthread API) 54 */ 55 struct idle_inject_thread { 56 struct task_struct *tsk; 57 int should_run; 58 }; 59 60 /** 61 * struct idle_inject_device - idle injection data 62 * @timer: idle injection period timer 63 * @idle_duration_us: duration of CPU idle time to inject 64 * @run_duration_us: duration of CPU run time to allow 65 * @latency_us: max allowed latency 66 * @cpumask: mask of CPUs affected by idle injection 67 */ 68 struct idle_inject_device { 69 struct hrtimer timer; 70 unsigned int idle_duration_us; 71 unsigned int run_duration_us; 72 unsigned int latency_us; 73 unsigned long cpumask[]; 74 }; 75 76 static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); 77 static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); 78 79 /** 80 * idle_inject_wakeup - Wake up idle injection threads 81 * @ii_dev: target idle injection device 82 * 83 * Every idle injection task associated with the given idle injection device 84 * and running on an online CPU will be woken up. 85 */ 86 static void idle_inject_wakeup(struct idle_inject_device *ii_dev) 87 { 88 struct idle_inject_thread *iit; 89 unsigned int cpu; 90 91 for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { 92 iit = per_cpu_ptr(&idle_inject_thread, cpu); 93 iit->should_run = 1; 94 wake_up_process(iit->tsk); 95 } 96 } 97 98 /** 99 * idle_inject_timer_fn - idle injection timer function 100 * @timer: idle injection hrtimer 101 * 102 * This function is called when the idle injection timer expires. It wakes up 103 * idle injection tasks associated with the timer and they, in turn, invoke 104 * play_idle_precise() to inject a specified amount of CPU idle time. 105 * 106 * Return: HRTIMER_RESTART. 107 */ 108 static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) 109 { 110 unsigned int duration_us; 111 struct idle_inject_device *ii_dev = 112 container_of(timer, struct idle_inject_device, timer); 113 114 duration_us = READ_ONCE(ii_dev->run_duration_us); 115 duration_us += READ_ONCE(ii_dev->idle_duration_us); 116 117 idle_inject_wakeup(ii_dev); 118 119 hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); 120 121 return HRTIMER_RESTART; 122 } 123 124 /** 125 * idle_inject_fn - idle injection work function 126 * @cpu: the CPU owning the task 127 * 128 * This function calls play_idle_precise() to inject a specified amount of CPU 129 * idle time. 130 */ 131 static void idle_inject_fn(unsigned int cpu) 132 { 133 struct idle_inject_device *ii_dev; 134 struct idle_inject_thread *iit; 135 136 ii_dev = per_cpu(idle_inject_device, cpu); 137 iit = per_cpu_ptr(&idle_inject_thread, cpu); 138 139 /* 140 * Let the smpboot main loop know that the task should not run again. 141 */ 142 iit->should_run = 0; 143 144 play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, 145 READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); 146 } 147 148 /** 149 * idle_inject_set_duration - idle and run duration update helper 150 * @run_duration_us: CPU run time to allow in microseconds 151 * @idle_duration_us: CPU idle time to inject in microseconds 152 */ 153 void idle_inject_set_duration(struct idle_inject_device *ii_dev, 154 unsigned int run_duration_us, 155 unsigned int idle_duration_us) 156 { 157 if (run_duration_us && idle_duration_us) { 158 WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); 159 WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); 160 } 161 } 162 163 /** 164 * idle_inject_get_duration - idle and run duration retrieval helper 165 * @run_duration_us: memory location to store the current CPU run time 166 * @idle_duration_us: memory location to store the current CPU idle time 167 */ 168 void idle_inject_get_duration(struct idle_inject_device *ii_dev, 169 unsigned int *run_duration_us, 170 unsigned int *idle_duration_us) 171 { 172 *run_duration_us = READ_ONCE(ii_dev->run_duration_us); 173 *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 174 } 175 176 /** 177 * idle_inject_set_latency - set the maximum latency allowed 178 * @latency_us: set the latency requirement for the idle state 179 */ 180 void idle_inject_set_latency(struct idle_inject_device *ii_dev, 181 unsigned int latency_us) 182 { 183 WRITE_ONCE(ii_dev->latency_us, latency_us); 184 } 185 186 /** 187 * idle_inject_start - start idle injections 188 * @ii_dev: idle injection control device structure 189 * 190 * The function starts idle injection by first waking up all of the idle 191 * injection kthreads associated with @ii_dev to let them inject CPU idle time 192 * sets up a timer to start the next idle injection period. 193 * 194 * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. 195 */ 196 int idle_inject_start(struct idle_inject_device *ii_dev) 197 { 198 unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 199 unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); 200 201 if (!idle_duration_us || !run_duration_us) 202 return -EINVAL; 203 204 pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", 205 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 206 207 idle_inject_wakeup(ii_dev); 208 209 hrtimer_start(&ii_dev->timer, 210 ns_to_ktime((idle_duration_us + run_duration_us) * 211 NSEC_PER_USEC), 212 HRTIMER_MODE_REL); 213 214 return 0; 215 } 216 217 /** 218 * idle_inject_stop - stops idle injections 219 * @ii_dev: idle injection control device structure 220 * 221 * The function stops idle injection and waits for the threads to finish work. 222 * If CPU idle time is being injected when this function runs, then it will 223 * wait until the end of the cycle. 224 * 225 * When it returns, there is no more idle injection kthread activity. The 226 * kthreads are scheduled out and the periodic timer is off. 227 */ 228 void idle_inject_stop(struct idle_inject_device *ii_dev) 229 { 230 struct idle_inject_thread *iit; 231 unsigned int cpu; 232 233 pr_debug("Stopping idle injection on CPUs '%*pbl'\n", 234 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 235 236 hrtimer_cancel(&ii_dev->timer); 237 238 /* 239 * Stopping idle injection requires all of the idle injection kthreads 240 * associated with the given cpumask to be parked and stay that way, so 241 * prevent CPUs from going online at this point. Any CPUs going online 242 * after the loop below will be covered by clearing the should_run flag 243 * that will cause the smpboot main loop to schedule them out. 244 */ 245 cpu_hotplug_disable(); 246 247 /* 248 * Iterate over all (online + offline) CPUs here in case one of them 249 * goes offline with the should_run flag set so as to prevent its idle 250 * injection kthread from running when the CPU goes online again after 251 * the ii_dev has been freed. 252 */ 253 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 254 iit = per_cpu_ptr(&idle_inject_thread, cpu); 255 iit->should_run = 0; 256 257 wait_task_inactive(iit->tsk, TASK_ANY); 258 } 259 260 cpu_hotplug_enable(); 261 } 262 263 /** 264 * idle_inject_setup - prepare the current task for idle injection 265 * @cpu: not used 266 * 267 * Called once, this function is in charge of setting the current task's 268 * scheduler parameters to make it an RT task. 269 */ 270 static void idle_inject_setup(unsigned int cpu) 271 { 272 sched_set_fifo(current); 273 } 274 275 /** 276 * idle_inject_should_run - function helper for the smpboot API 277 * @cpu: CPU the kthread is running on 278 * 279 * Return: whether or not the thread can run. 280 */ 281 static int idle_inject_should_run(unsigned int cpu) 282 { 283 struct idle_inject_thread *iit = 284 per_cpu_ptr(&idle_inject_thread, cpu); 285 286 return iit->should_run; 287 } 288 289 /** 290 * idle_inject_register - initialize idle injection on a set of CPUs 291 * @cpumask: CPUs to be affected by idle injection 292 * 293 * This function creates an idle injection control device structure for the 294 * given set of CPUs and initializes the timer associated with it. It does not 295 * start any injection cycles. 296 * 297 * Return: NULL if memory allocation fails, idle injection control device 298 * pointer on success. 299 */ 300 struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) 301 { 302 struct idle_inject_device *ii_dev; 303 int cpu, cpu_rb; 304 305 ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); 306 if (!ii_dev) 307 return NULL; 308 309 cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); 310 hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 311 ii_dev->timer.function = idle_inject_timer_fn; 312 ii_dev->latency_us = UINT_MAX; 313 314 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 315 316 if (per_cpu(idle_inject_device, cpu)) { 317 pr_err("cpu%d is already registered\n", cpu); 318 goto out_rollback; 319 } 320 321 per_cpu(idle_inject_device, cpu) = ii_dev; 322 } 323 324 return ii_dev; 325 326 out_rollback: 327 for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { 328 if (cpu == cpu_rb) 329 break; 330 per_cpu(idle_inject_device, cpu_rb) = NULL; 331 } 332 333 kfree(ii_dev); 334 335 return NULL; 336 } 337 338 /** 339 * idle_inject_unregister - unregister idle injection control device 340 * @ii_dev: idle injection control device to unregister 341 * 342 * The function stops idle injection for the given control device, 343 * unregisters its kthreads and frees memory allocated when that device was 344 * created. 345 */ 346 void idle_inject_unregister(struct idle_inject_device *ii_dev) 347 { 348 unsigned int cpu; 349 350 idle_inject_stop(ii_dev); 351 352 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) 353 per_cpu(idle_inject_device, cpu) = NULL; 354 355 kfree(ii_dev); 356 } 357 358 static struct smp_hotplug_thread idle_inject_threads = { 359 .store = &idle_inject_thread.tsk, 360 .setup = idle_inject_setup, 361 .thread_fn = idle_inject_fn, 362 .thread_comm = "idle_inject/%u", 363 .thread_should_run = idle_inject_should_run, 364 }; 365 366 static int __init idle_inject_init(void) 367 { 368 return smpboot_register_percpu_thread(&idle_inject_threads); 369 } 370 early_initcall(idle_inject_init); 371