xref: /openbmc/linux/kernel/sched/cpufreq_schedutil.c (revision 5ef12cb4a3a78ffb331c03a795a15eea4ae35155)
1 /*
2  * CPUFreq governor based on scheduler-provided CPU utilization data.
3  *
4  * Copyright (C) 2016, Intel Corporation
5  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 
14 #include "sched.h"
15 
16 #include <trace/events/power.h>
17 
18 struct sugov_tunables {
19 	struct gov_attr_set	attr_set;
20 	unsigned int		rate_limit_us;
21 };
22 
23 struct sugov_policy {
24 	struct cpufreq_policy	*policy;
25 
26 	struct sugov_tunables	*tunables;
27 	struct list_head	tunables_hook;
28 
29 	raw_spinlock_t		update_lock;	/* For shared policies */
30 	u64			last_freq_update_time;
31 	s64			freq_update_delay_ns;
32 	unsigned int		next_freq;
33 	unsigned int		cached_raw_freq;
34 
35 	/* The next fields are only needed if fast switch cannot be used: */
36 	struct			irq_work irq_work;
37 	struct			kthread_work work;
38 	struct			mutex work_lock;
39 	struct			kthread_worker worker;
40 	struct task_struct	*thread;
41 	bool			work_in_progress;
42 
43 	bool			need_freq_update;
44 };
45 
46 struct sugov_cpu {
47 	struct update_util_data	update_util;
48 	struct sugov_policy	*sg_policy;
49 	unsigned int		cpu;
50 
51 	bool			iowait_boost_pending;
52 	unsigned int		iowait_boost;
53 	unsigned int		iowait_boost_max;
54 	u64 last_update;
55 
56 	/* The fields below are only needed when sharing a policy: */
57 	unsigned long		util_cfs;
58 	unsigned long		util_dl;
59 	unsigned long		max;
60 
61 	/* The field below is for single-CPU policies only: */
62 #ifdef CONFIG_NO_HZ_COMMON
63 	unsigned long		saved_idle_calls;
64 #endif
65 };
66 
67 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
68 
69 /************************ Governor internals ***********************/
70 
71 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
72 {
73 	s64 delta_ns;
74 
75 	/*
76 	 * Since cpufreq_update_util() is called with rq->lock held for
77 	 * the @target_cpu, our per-CPU data is fully serialized.
78 	 *
79 	 * However, drivers cannot in general deal with cross-CPU
80 	 * requests, so while get_next_freq() will work, our
81 	 * sugov_update_commit() call may not for the fast switching platforms.
82 	 *
83 	 * Hence stop here for remote requests if they aren't supported
84 	 * by the hardware, as calculating the frequency is pointless if
85 	 * we cannot in fact act on it.
86 	 *
87 	 * For the slow switching platforms, the kthread is always scheduled on
88 	 * the right set of CPUs and any CPU can find the next frequency and
89 	 * schedule the kthread.
90 	 */
91 	if (sg_policy->policy->fast_switch_enabled &&
92 	    !cpufreq_can_do_remote_dvfs(sg_policy->policy))
93 		return false;
94 
95 	if (sg_policy->work_in_progress)
96 		return false;
97 
98 	if (unlikely(sg_policy->need_freq_update)) {
99 		sg_policy->need_freq_update = false;
100 		/*
101 		 * This happens when limits change, so forget the previous
102 		 * next_freq value and force an update.
103 		 */
104 		sg_policy->next_freq = UINT_MAX;
105 		return true;
106 	}
107 
108 	delta_ns = time - sg_policy->last_freq_update_time;
109 
110 	return delta_ns >= sg_policy->freq_update_delay_ns;
111 }
112 
113 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
114 				unsigned int next_freq)
115 {
116 	struct cpufreq_policy *policy = sg_policy->policy;
117 
118 	if (sg_policy->next_freq == next_freq)
119 		return;
120 
121 	sg_policy->next_freq = next_freq;
122 	sg_policy->last_freq_update_time = time;
123 
124 	if (policy->fast_switch_enabled) {
125 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
126 		if (!next_freq)
127 			return;
128 
129 		policy->cur = next_freq;
130 		trace_cpu_frequency(next_freq, smp_processor_id());
131 	} else {
132 		sg_policy->work_in_progress = true;
133 		irq_work_queue(&sg_policy->irq_work);
134 	}
135 }
136 
137 /**
138  * get_next_freq - Compute a new frequency for a given cpufreq policy.
139  * @sg_policy: schedutil policy object to compute the new frequency for.
140  * @util: Current CPU utilization.
141  * @max: CPU capacity.
142  *
143  * If the utilization is frequency-invariant, choose the new frequency to be
144  * proportional to it, that is
145  *
146  * next_freq = C * max_freq * util / max
147  *
148  * Otherwise, approximate the would-be frequency-invariant utilization by
149  * util_raw * (curr_freq / max_freq) which leads to
150  *
151  * next_freq = C * curr_freq * util_raw / max
152  *
153  * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
154  *
155  * The lowest driver-supported frequency which is equal or greater than the raw
156  * next_freq (as calculated above) is returned, subject to policy min/max and
157  * cpufreq driver limitations.
158  */
159 static unsigned int get_next_freq(struct sugov_policy *sg_policy,
160 				  unsigned long util, unsigned long max)
161 {
162 	struct cpufreq_policy *policy = sg_policy->policy;
163 	unsigned int freq = arch_scale_freq_invariant() ?
164 				policy->cpuinfo.max_freq : policy->cur;
165 
166 	freq = (freq + (freq >> 2)) * util / max;
167 
168 	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
169 		return sg_policy->next_freq;
170 	sg_policy->cached_raw_freq = freq;
171 	return cpufreq_driver_resolve_freq(policy, freq);
172 }
173 
174 static void sugov_get_util(struct sugov_cpu *sg_cpu)
175 {
176 	struct rq *rq = cpu_rq(sg_cpu->cpu);
177 
178 	sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
179 	sg_cpu->util_cfs = cpu_util_cfs(rq);
180 	sg_cpu->util_dl  = cpu_util_dl(rq);
181 }
182 
183 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
184 {
185 	struct rq *rq = cpu_rq(sg_cpu->cpu);
186 	unsigned long util;
187 
188 	if (rq->rt.rt_nr_running) {
189 		util = sg_cpu->max;
190 	} else {
191 		util = sg_cpu->util_dl;
192 		if (rq->cfs.h_nr_running)
193 			util += sg_cpu->util_cfs;
194 	}
195 
196 	/*
197 	 * Ideally we would like to set util_dl as min/guaranteed freq and
198 	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
199 	 * ready for such an interface. So, we only do the latter for now.
200 	 */
201 	return min(util, sg_cpu->max);
202 }
203 
204 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time, unsigned int flags)
205 {
206 	if (flags & SCHED_CPUFREQ_IOWAIT) {
207 		if (sg_cpu->iowait_boost_pending)
208 			return;
209 
210 		sg_cpu->iowait_boost_pending = true;
211 
212 		if (sg_cpu->iowait_boost) {
213 			sg_cpu->iowait_boost <<= 1;
214 			if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
215 				sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
216 		} else {
217 			sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
218 		}
219 	} else if (sg_cpu->iowait_boost) {
220 		s64 delta_ns = time - sg_cpu->last_update;
221 
222 		/* Clear iowait_boost if the CPU apprears to have been idle. */
223 		if (delta_ns > TICK_NSEC) {
224 			sg_cpu->iowait_boost = 0;
225 			sg_cpu->iowait_boost_pending = false;
226 		}
227 	}
228 }
229 
230 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
231 			       unsigned long *max)
232 {
233 	unsigned int boost_util, boost_max;
234 
235 	if (!sg_cpu->iowait_boost)
236 		return;
237 
238 	if (sg_cpu->iowait_boost_pending) {
239 		sg_cpu->iowait_boost_pending = false;
240 	} else {
241 		sg_cpu->iowait_boost >>= 1;
242 		if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
243 			sg_cpu->iowait_boost = 0;
244 			return;
245 		}
246 	}
247 
248 	boost_util = sg_cpu->iowait_boost;
249 	boost_max = sg_cpu->iowait_boost_max;
250 
251 	if (*util * boost_max < *max * boost_util) {
252 		*util = boost_util;
253 		*max = boost_max;
254 	}
255 }
256 
257 #ifdef CONFIG_NO_HZ_COMMON
258 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
259 {
260 	unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
261 	bool ret = idle_calls == sg_cpu->saved_idle_calls;
262 
263 	sg_cpu->saved_idle_calls = idle_calls;
264 	return ret;
265 }
266 #else
267 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
268 #endif /* CONFIG_NO_HZ_COMMON */
269 
270 /*
271  * Make sugov_should_update_freq() ignore the rate limit when DL
272  * has increased the utilization.
273  */
274 static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
275 {
276 	if (cpu_util_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->util_dl)
277 		sg_policy->need_freq_update = true;
278 }
279 
280 static void sugov_update_single(struct update_util_data *hook, u64 time,
281 				unsigned int flags)
282 {
283 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
284 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
285 	unsigned long util, max;
286 	unsigned int next_f;
287 	bool busy;
288 
289 	sugov_set_iowait_boost(sg_cpu, time, flags);
290 	sg_cpu->last_update = time;
291 
292 	ignore_dl_rate_limit(sg_cpu, sg_policy);
293 
294 	if (!sugov_should_update_freq(sg_policy, time))
295 		return;
296 
297 	busy = sugov_cpu_is_busy(sg_cpu);
298 
299 	sugov_get_util(sg_cpu);
300 	max = sg_cpu->max;
301 	util = sugov_aggregate_util(sg_cpu);
302 	sugov_iowait_boost(sg_cpu, &util, &max);
303 	next_f = get_next_freq(sg_policy, util, max);
304 	/*
305 	 * Do not reduce the frequency if the CPU has not been idle
306 	 * recently, as the reduction is likely to be premature then.
307 	 */
308 	if (busy && next_f < sg_policy->next_freq &&
309 	    sg_policy->next_freq != UINT_MAX) {
310 		next_f = sg_policy->next_freq;
311 
312 		/* Reset cached freq as next_freq has changed */
313 		sg_policy->cached_raw_freq = 0;
314 	}
315 
316 	sugov_update_commit(sg_policy, time, next_f);
317 }
318 
319 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
320 {
321 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
322 	struct cpufreq_policy *policy = sg_policy->policy;
323 	unsigned long util = 0, max = 1;
324 	unsigned int j;
325 
326 	for_each_cpu(j, policy->cpus) {
327 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
328 		unsigned long j_util, j_max;
329 		s64 delta_ns;
330 
331 		sugov_get_util(j_sg_cpu);
332 
333 		/*
334 		 * If the CFS CPU utilization was last updated before the
335 		 * previous frequency update and the time elapsed between the
336 		 * last update of the CPU utilization and the last frequency
337 		 * update is long enough, reset iowait_boost and util_cfs, as
338 		 * they are now probably stale. However, still consider the
339 		 * CPU contribution if it has some DEADLINE utilization
340 		 * (util_dl).
341 		 */
342 		delta_ns = time - j_sg_cpu->last_update;
343 		if (delta_ns > TICK_NSEC) {
344 			j_sg_cpu->iowait_boost = 0;
345 			j_sg_cpu->iowait_boost_pending = false;
346 		}
347 
348 		j_max = j_sg_cpu->max;
349 		j_util = sugov_aggregate_util(j_sg_cpu);
350 		sugov_iowait_boost(j_sg_cpu, &j_util, &j_max);
351 		if (j_util * max > j_max * util) {
352 			util = j_util;
353 			max = j_max;
354 		}
355 	}
356 
357 	return get_next_freq(sg_policy, util, max);
358 }
359 
360 static void
361 sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
362 {
363 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
364 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
365 	unsigned int next_f;
366 
367 	raw_spin_lock(&sg_policy->update_lock);
368 
369 	sugov_set_iowait_boost(sg_cpu, time, flags);
370 	sg_cpu->last_update = time;
371 
372 	ignore_dl_rate_limit(sg_cpu, sg_policy);
373 
374 	if (sugov_should_update_freq(sg_policy, time)) {
375 		next_f = sugov_next_freq_shared(sg_cpu, time);
376 		sugov_update_commit(sg_policy, time, next_f);
377 	}
378 
379 	raw_spin_unlock(&sg_policy->update_lock);
380 }
381 
382 static void sugov_work(struct kthread_work *work)
383 {
384 	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
385 
386 	mutex_lock(&sg_policy->work_lock);
387 	__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
388 				CPUFREQ_RELATION_L);
389 	mutex_unlock(&sg_policy->work_lock);
390 
391 	sg_policy->work_in_progress = false;
392 }
393 
394 static void sugov_irq_work(struct irq_work *irq_work)
395 {
396 	struct sugov_policy *sg_policy;
397 
398 	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
399 
400 	kthread_queue_work(&sg_policy->worker, &sg_policy->work);
401 }
402 
403 /************************** sysfs interface ************************/
404 
405 static struct sugov_tunables *global_tunables;
406 static DEFINE_MUTEX(global_tunables_lock);
407 
408 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
409 {
410 	return container_of(attr_set, struct sugov_tunables, attr_set);
411 }
412 
413 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
414 {
415 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
416 
417 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
418 }
419 
420 static ssize_t
421 rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf, size_t count)
422 {
423 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
424 	struct sugov_policy *sg_policy;
425 	unsigned int rate_limit_us;
426 
427 	if (kstrtouint(buf, 10, &rate_limit_us))
428 		return -EINVAL;
429 
430 	tunables->rate_limit_us = rate_limit_us;
431 
432 	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
433 		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
434 
435 	return count;
436 }
437 
438 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
439 
440 static struct attribute *sugov_attributes[] = {
441 	&rate_limit_us.attr,
442 	NULL
443 };
444 
445 static struct kobj_type sugov_tunables_ktype = {
446 	.default_attrs = sugov_attributes,
447 	.sysfs_ops = &governor_sysfs_ops,
448 };
449 
450 /********************** cpufreq governor interface *********************/
451 
452 static struct cpufreq_governor schedutil_gov;
453 
454 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
455 {
456 	struct sugov_policy *sg_policy;
457 
458 	sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
459 	if (!sg_policy)
460 		return NULL;
461 
462 	sg_policy->policy = policy;
463 	raw_spin_lock_init(&sg_policy->update_lock);
464 	return sg_policy;
465 }
466 
467 static void sugov_policy_free(struct sugov_policy *sg_policy)
468 {
469 	kfree(sg_policy);
470 }
471 
472 static int sugov_kthread_create(struct sugov_policy *sg_policy)
473 {
474 	struct task_struct *thread;
475 	struct sched_attr attr = {
476 		.size		= sizeof(struct sched_attr),
477 		.sched_policy	= SCHED_DEADLINE,
478 		.sched_flags	= SCHED_FLAG_SUGOV,
479 		.sched_nice	= 0,
480 		.sched_priority	= 0,
481 		/*
482 		 * Fake (unused) bandwidth; workaround to "fix"
483 		 * priority inheritance.
484 		 */
485 		.sched_runtime	=  1000000,
486 		.sched_deadline = 10000000,
487 		.sched_period	= 10000000,
488 	};
489 	struct cpufreq_policy *policy = sg_policy->policy;
490 	int ret;
491 
492 	/* kthread only required for slow path */
493 	if (policy->fast_switch_enabled)
494 		return 0;
495 
496 	kthread_init_work(&sg_policy->work, sugov_work);
497 	kthread_init_worker(&sg_policy->worker);
498 	thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
499 				"sugov:%d",
500 				cpumask_first(policy->related_cpus));
501 	if (IS_ERR(thread)) {
502 		pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
503 		return PTR_ERR(thread);
504 	}
505 
506 	ret = sched_setattr_nocheck(thread, &attr);
507 	if (ret) {
508 		kthread_stop(thread);
509 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
510 		return ret;
511 	}
512 
513 	sg_policy->thread = thread;
514 
515 	/* Kthread is bound to all CPUs by default */
516 	if (!policy->dvfs_possible_from_any_cpu)
517 		kthread_bind_mask(thread, policy->related_cpus);
518 
519 	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
520 	mutex_init(&sg_policy->work_lock);
521 
522 	wake_up_process(thread);
523 
524 	return 0;
525 }
526 
527 static void sugov_kthread_stop(struct sugov_policy *sg_policy)
528 {
529 	/* kthread only required for slow path */
530 	if (sg_policy->policy->fast_switch_enabled)
531 		return;
532 
533 	kthread_flush_worker(&sg_policy->worker);
534 	kthread_stop(sg_policy->thread);
535 	mutex_destroy(&sg_policy->work_lock);
536 }
537 
538 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
539 {
540 	struct sugov_tunables *tunables;
541 
542 	tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
543 	if (tunables) {
544 		gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
545 		if (!have_governor_per_policy())
546 			global_tunables = tunables;
547 	}
548 	return tunables;
549 }
550 
551 static void sugov_tunables_free(struct sugov_tunables *tunables)
552 {
553 	if (!have_governor_per_policy())
554 		global_tunables = NULL;
555 
556 	kfree(tunables);
557 }
558 
559 static int sugov_init(struct cpufreq_policy *policy)
560 {
561 	struct sugov_policy *sg_policy;
562 	struct sugov_tunables *tunables;
563 	int ret = 0;
564 
565 	/* State should be equivalent to EXIT */
566 	if (policy->governor_data)
567 		return -EBUSY;
568 
569 	cpufreq_enable_fast_switch(policy);
570 
571 	sg_policy = sugov_policy_alloc(policy);
572 	if (!sg_policy) {
573 		ret = -ENOMEM;
574 		goto disable_fast_switch;
575 	}
576 
577 	ret = sugov_kthread_create(sg_policy);
578 	if (ret)
579 		goto free_sg_policy;
580 
581 	mutex_lock(&global_tunables_lock);
582 
583 	if (global_tunables) {
584 		if (WARN_ON(have_governor_per_policy())) {
585 			ret = -EINVAL;
586 			goto stop_kthread;
587 		}
588 		policy->governor_data = sg_policy;
589 		sg_policy->tunables = global_tunables;
590 
591 		gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
592 		goto out;
593 	}
594 
595 	tunables = sugov_tunables_alloc(sg_policy);
596 	if (!tunables) {
597 		ret = -ENOMEM;
598 		goto stop_kthread;
599 	}
600 
601 	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
602 
603 	policy->governor_data = sg_policy;
604 	sg_policy->tunables = tunables;
605 
606 	ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
607 				   get_governor_parent_kobj(policy), "%s",
608 				   schedutil_gov.name);
609 	if (ret)
610 		goto fail;
611 
612 out:
613 	mutex_unlock(&global_tunables_lock);
614 	return 0;
615 
616 fail:
617 	policy->governor_data = NULL;
618 	sugov_tunables_free(tunables);
619 
620 stop_kthread:
621 	sugov_kthread_stop(sg_policy);
622 	mutex_unlock(&global_tunables_lock);
623 
624 free_sg_policy:
625 	sugov_policy_free(sg_policy);
626 
627 disable_fast_switch:
628 	cpufreq_disable_fast_switch(policy);
629 
630 	pr_err("initialization failed (error %d)\n", ret);
631 	return ret;
632 }
633 
634 static void sugov_exit(struct cpufreq_policy *policy)
635 {
636 	struct sugov_policy *sg_policy = policy->governor_data;
637 	struct sugov_tunables *tunables = sg_policy->tunables;
638 	unsigned int count;
639 
640 	mutex_lock(&global_tunables_lock);
641 
642 	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
643 	policy->governor_data = NULL;
644 	if (!count)
645 		sugov_tunables_free(tunables);
646 
647 	mutex_unlock(&global_tunables_lock);
648 
649 	sugov_kthread_stop(sg_policy);
650 	sugov_policy_free(sg_policy);
651 	cpufreq_disable_fast_switch(policy);
652 }
653 
654 static int sugov_start(struct cpufreq_policy *policy)
655 {
656 	struct sugov_policy *sg_policy = policy->governor_data;
657 	unsigned int cpu;
658 
659 	sg_policy->freq_update_delay_ns	= sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
660 	sg_policy->last_freq_update_time	= 0;
661 	sg_policy->next_freq			= UINT_MAX;
662 	sg_policy->work_in_progress		= false;
663 	sg_policy->need_freq_update		= false;
664 	sg_policy->cached_raw_freq		= 0;
665 
666 	for_each_cpu(cpu, policy->cpus) {
667 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
668 
669 		memset(sg_cpu, 0, sizeof(*sg_cpu));
670 		sg_cpu->cpu			= cpu;
671 		sg_cpu->sg_policy		= sg_policy;
672 		sg_cpu->iowait_boost_max	= policy->cpuinfo.max_freq;
673 	}
674 
675 	for_each_cpu(cpu, policy->cpus) {
676 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
677 
678 		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
679 					     policy_is_shared(policy) ?
680 							sugov_update_shared :
681 							sugov_update_single);
682 	}
683 	return 0;
684 }
685 
686 static void sugov_stop(struct cpufreq_policy *policy)
687 {
688 	struct sugov_policy *sg_policy = policy->governor_data;
689 	unsigned int cpu;
690 
691 	for_each_cpu(cpu, policy->cpus)
692 		cpufreq_remove_update_util_hook(cpu);
693 
694 	synchronize_sched();
695 
696 	if (!policy->fast_switch_enabled) {
697 		irq_work_sync(&sg_policy->irq_work);
698 		kthread_cancel_work_sync(&sg_policy->work);
699 	}
700 }
701 
702 static void sugov_limits(struct cpufreq_policy *policy)
703 {
704 	struct sugov_policy *sg_policy = policy->governor_data;
705 
706 	if (!policy->fast_switch_enabled) {
707 		mutex_lock(&sg_policy->work_lock);
708 		cpufreq_policy_apply_limits(policy);
709 		mutex_unlock(&sg_policy->work_lock);
710 	}
711 
712 	sg_policy->need_freq_update = true;
713 }
714 
715 static struct cpufreq_governor schedutil_gov = {
716 	.name			= "schedutil",
717 	.owner			= THIS_MODULE,
718 	.dynamic_switching	= true,
719 	.init			= sugov_init,
720 	.exit			= sugov_exit,
721 	.start			= sugov_start,
722 	.stop			= sugov_stop,
723 	.limits			= sugov_limits,
724 };
725 
726 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
727 struct cpufreq_governor *cpufreq_default_governor(void)
728 {
729 	return &schedutil_gov;
730 }
731 #endif
732 
733 static int __init sugov_register(void)
734 {
735 	return cpufreq_register_governor(&schedutil_gov);
736 }
737 fs_initcall(sugov_register);
738