1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright 2018 Linaro Limited 4 * 5 * Author: Daniel Lezcano <daniel.lezcano@linaro.org> 6 * 7 * The idle injection framework provides a way to force CPUs to enter idle 8 * states for a specified fraction of time over a specified period. 9 * 10 * It relies on the smpboot kthreads feature providing common code for CPU 11 * hotplug and thread [un]parking. 12 * 13 * All of the kthreads used for idle injection are created at init time. 14 * 15 * Next, the users of the the idle injection framework provide a cpumask via 16 * its register function. The kthreads will be synchronized with respect to 17 * this cpumask. 18 * 19 * The idle + run duration is specified via separate helpers and that allows 20 * idle injection to be started. 21 * 22 * The idle injection kthreads will call play_idle() with the idle duration 23 * specified as per the above. 24 * 25 * After all of them have been woken up, a timer is set to start the next idle 26 * injection cycle. 27 * 28 * The timer interrupt handler will wake up the idle injection kthreads for 29 * all of the CPUs in the cpumask provided by the user. 30 * 31 * Idle injection is stopped synchronously and no leftover idle injection 32 * kthread activity after its completion is guaranteed. 33 * 34 * It is up to the user of this framework to provide a lock for higher-level 35 * synchronization to prevent race conditions like starting idle injection 36 * while unregistering from the framework. 37 */ 38 #define pr_fmt(fmt) "ii_dev: " fmt 39 40 #include <linux/cpu.h> 41 #include <linux/hrtimer.h> 42 #include <linux/kthread.h> 43 #include <linux/sched.h> 44 #include <linux/slab.h> 45 #include <linux/smpboot.h> 46 47 #include <uapi/linux/sched/types.h> 48 49 /** 50 * struct idle_inject_thread - task on/off switch structure 51 * @tsk: task injecting the idle cycles 52 * @should_run: whether or not to run the task (for the smpboot kthread API) 53 */ 54 struct idle_inject_thread { 55 struct task_struct *tsk; 56 int should_run; 57 }; 58 59 /** 60 * struct idle_inject_device - idle injection data 61 * @timer: idle injection period timer 62 * @idle_duration_us: duration of CPU idle time to inject 63 * @run_duration_us: duration of CPU run time to allow 64 * @latency_us: max allowed latency 65 * @cpumask: mask of CPUs affected by idle injection 66 */ 67 struct idle_inject_device { 68 struct hrtimer timer; 69 unsigned int idle_duration_us; 70 unsigned int run_duration_us; 71 unsigned int latency_us; 72 unsigned long cpumask[]; 73 }; 74 75 static DEFINE_PER_CPU(struct idle_inject_thread, idle_inject_thread); 76 static DEFINE_PER_CPU(struct idle_inject_device *, idle_inject_device); 77 78 /** 79 * idle_inject_wakeup - Wake up idle injection threads 80 * @ii_dev: target idle injection device 81 * 82 * Every idle injection task associated with the given idle injection device 83 * and running on an online CPU will be woken up. 84 */ 85 static void idle_inject_wakeup(struct idle_inject_device *ii_dev) 86 { 87 struct idle_inject_thread *iit; 88 unsigned int cpu; 89 90 for_each_cpu_and(cpu, to_cpumask(ii_dev->cpumask), cpu_online_mask) { 91 iit = per_cpu_ptr(&idle_inject_thread, cpu); 92 iit->should_run = 1; 93 wake_up_process(iit->tsk); 94 } 95 } 96 97 /** 98 * idle_inject_timer_fn - idle injection timer function 99 * @timer: idle injection hrtimer 100 * 101 * This function is called when the idle injection timer expires. It wakes up 102 * idle injection tasks associated with the timer and they, in turn, invoke 103 * play_idle() to inject a specified amount of CPU idle time. 104 * 105 * Return: HRTIMER_RESTART. 106 */ 107 static enum hrtimer_restart idle_inject_timer_fn(struct hrtimer *timer) 108 { 109 unsigned int duration_us; 110 struct idle_inject_device *ii_dev = 111 container_of(timer, struct idle_inject_device, timer); 112 113 duration_us = READ_ONCE(ii_dev->run_duration_us); 114 duration_us += READ_ONCE(ii_dev->idle_duration_us); 115 116 idle_inject_wakeup(ii_dev); 117 118 hrtimer_forward_now(timer, ns_to_ktime(duration_us * NSEC_PER_USEC)); 119 120 return HRTIMER_RESTART; 121 } 122 123 /** 124 * idle_inject_fn - idle injection work function 125 * @cpu: the CPU owning the task 126 * 127 * This function calls play_idle() to inject a specified amount of CPU idle 128 * time. 129 */ 130 static void idle_inject_fn(unsigned int cpu) 131 { 132 struct idle_inject_device *ii_dev; 133 struct idle_inject_thread *iit; 134 135 ii_dev = per_cpu(idle_inject_device, cpu); 136 iit = per_cpu_ptr(&idle_inject_thread, cpu); 137 138 /* 139 * Let the smpboot main loop know that the task should not run again. 140 */ 141 iit->should_run = 0; 142 143 play_idle_precise(READ_ONCE(ii_dev->idle_duration_us) * NSEC_PER_USEC, 144 READ_ONCE(ii_dev->latency_us) * NSEC_PER_USEC); 145 } 146 147 /** 148 * idle_inject_set_duration - idle and run duration update helper 149 * @run_duration_us: CPU run time to allow in microseconds 150 * @idle_duration_us: CPU idle time to inject in microseconds 151 */ 152 void idle_inject_set_duration(struct idle_inject_device *ii_dev, 153 unsigned int run_duration_us, 154 unsigned int idle_duration_us) 155 { 156 if (run_duration_us && idle_duration_us) { 157 WRITE_ONCE(ii_dev->run_duration_us, run_duration_us); 158 WRITE_ONCE(ii_dev->idle_duration_us, idle_duration_us); 159 } 160 } 161 162 /** 163 * idle_inject_get_duration - idle and run duration retrieval helper 164 * @run_duration_us: memory location to store the current CPU run time 165 * @idle_duration_us: memory location to store the current CPU idle time 166 */ 167 void idle_inject_get_duration(struct idle_inject_device *ii_dev, 168 unsigned int *run_duration_us, 169 unsigned int *idle_duration_us) 170 { 171 *run_duration_us = READ_ONCE(ii_dev->run_duration_us); 172 *idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 173 } 174 175 /** 176 * idle_inject_set_latency - set the maximum latency allowed 177 * @latency_us: set the latency requirement for the idle state 178 */ 179 void idle_inject_set_latency(struct idle_inject_device *ii_dev, 180 unsigned int latency_us) 181 { 182 WRITE_ONCE(ii_dev->latency_us, latency_us); 183 } 184 185 /** 186 * idle_inject_start - start idle injections 187 * @ii_dev: idle injection control device structure 188 * 189 * The function starts idle injection by first waking up all of the idle 190 * injection kthreads associated with @ii_dev to let them inject CPU idle time 191 * sets up a timer to start the next idle injection period. 192 * 193 * Return: -EINVAL if the CPU idle or CPU run time is not set or 0 on success. 194 */ 195 int idle_inject_start(struct idle_inject_device *ii_dev) 196 { 197 unsigned int idle_duration_us = READ_ONCE(ii_dev->idle_duration_us); 198 unsigned int run_duration_us = READ_ONCE(ii_dev->run_duration_us); 199 200 if (!idle_duration_us || !run_duration_us) 201 return -EINVAL; 202 203 pr_debug("Starting injecting idle cycles on CPUs '%*pbl'\n", 204 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 205 206 idle_inject_wakeup(ii_dev); 207 208 hrtimer_start(&ii_dev->timer, 209 ns_to_ktime((idle_duration_us + run_duration_us) * 210 NSEC_PER_USEC), 211 HRTIMER_MODE_REL); 212 213 return 0; 214 } 215 216 /** 217 * idle_inject_stop - stops idle injections 218 * @ii_dev: idle injection control device structure 219 * 220 * The function stops idle injection and waits for the threads to finish work. 221 * If CPU idle time is being injected when this function runs, then it will 222 * wait until the end of the cycle. 223 * 224 * When it returns, there is no more idle injection kthread activity. The 225 * kthreads are scheduled out and the periodic timer is off. 226 */ 227 void idle_inject_stop(struct idle_inject_device *ii_dev) 228 { 229 struct idle_inject_thread *iit; 230 unsigned int cpu; 231 232 pr_debug("Stopping idle injection on CPUs '%*pbl'\n", 233 cpumask_pr_args(to_cpumask(ii_dev->cpumask))); 234 235 hrtimer_cancel(&ii_dev->timer); 236 237 /* 238 * Stopping idle injection requires all of the idle injection kthreads 239 * associated with the given cpumask to be parked and stay that way, so 240 * prevent CPUs from going online at this point. Any CPUs going online 241 * after the loop below will be covered by clearing the should_run flag 242 * that will cause the smpboot main loop to schedule them out. 243 */ 244 cpu_hotplug_disable(); 245 246 /* 247 * Iterate over all (online + offline) CPUs here in case one of them 248 * goes offline with the should_run flag set so as to prevent its idle 249 * injection kthread from running when the CPU goes online again after 250 * the ii_dev has been freed. 251 */ 252 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 253 iit = per_cpu_ptr(&idle_inject_thread, cpu); 254 iit->should_run = 0; 255 256 wait_task_inactive(iit->tsk, 0); 257 } 258 259 cpu_hotplug_enable(); 260 } 261 262 /** 263 * idle_inject_setup - prepare the current task for idle injection 264 * @cpu: not used 265 * 266 * Called once, this function is in charge of setting the current task's 267 * scheduler parameters to make it an RT task. 268 */ 269 static void idle_inject_setup(unsigned int cpu) 270 { 271 struct sched_param param = { .sched_priority = MAX_USER_RT_PRIO / 2 }; 272 273 sched_setscheduler(current, SCHED_FIFO, ¶m); 274 } 275 276 /** 277 * idle_inject_should_run - function helper for the smpboot API 278 * @cpu: CPU the kthread is running on 279 * 280 * Return: whether or not the thread can run. 281 */ 282 static int idle_inject_should_run(unsigned int cpu) 283 { 284 struct idle_inject_thread *iit = 285 per_cpu_ptr(&idle_inject_thread, cpu); 286 287 return iit->should_run; 288 } 289 290 /** 291 * idle_inject_register - initialize idle injection on a set of CPUs 292 * @cpumask: CPUs to be affected by idle injection 293 * 294 * This function creates an idle injection control device structure for the 295 * given set of CPUs and initializes the timer associated with it. It does not 296 * start any injection cycles. 297 * 298 * Return: NULL if memory allocation fails, idle injection control device 299 * pointer on success. 300 */ 301 struct idle_inject_device *idle_inject_register(struct cpumask *cpumask) 302 { 303 struct idle_inject_device *ii_dev; 304 int cpu, cpu_rb; 305 306 ii_dev = kzalloc(sizeof(*ii_dev) + cpumask_size(), GFP_KERNEL); 307 if (!ii_dev) 308 return NULL; 309 310 cpumask_copy(to_cpumask(ii_dev->cpumask), cpumask); 311 hrtimer_init(&ii_dev->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); 312 ii_dev->timer.function = idle_inject_timer_fn; 313 ii_dev->latency_us = UINT_MAX; 314 315 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) { 316 317 if (per_cpu(idle_inject_device, cpu)) { 318 pr_err("cpu%d is already registered\n", cpu); 319 goto out_rollback; 320 } 321 322 per_cpu(idle_inject_device, cpu) = ii_dev; 323 } 324 325 return ii_dev; 326 327 out_rollback: 328 for_each_cpu(cpu_rb, to_cpumask(ii_dev->cpumask)) { 329 if (cpu == cpu_rb) 330 break; 331 per_cpu(idle_inject_device, cpu_rb) = NULL; 332 } 333 334 kfree(ii_dev); 335 336 return NULL; 337 } 338 339 /** 340 * idle_inject_unregister - unregister idle injection control device 341 * @ii_dev: idle injection control device to unregister 342 * 343 * The function stops idle injection for the given control device, 344 * unregisters its kthreads and frees memory allocated when that device was 345 * created. 346 */ 347 void idle_inject_unregister(struct idle_inject_device *ii_dev) 348 { 349 unsigned int cpu; 350 351 idle_inject_stop(ii_dev); 352 353 for_each_cpu(cpu, to_cpumask(ii_dev->cpumask)) 354 per_cpu(idle_inject_device, cpu) = NULL; 355 356 kfree(ii_dev); 357 } 358 359 static struct smp_hotplug_thread idle_inject_threads = { 360 .store = &idle_inject_thread.tsk, 361 .setup = idle_inject_setup, 362 .thread_fn = idle_inject_fn, 363 .thread_comm = "idle_inject/%u", 364 .thread_should_run = idle_inject_should_run, 365 }; 366 367 static int __init idle_inject_init(void) 368 { 369 return smpboot_register_percpu_thread(&idle_inject_threads); 370 } 371 early_initcall(idle_inject_init); 372