1 /*
2  * CPUFreq governor based on scheduler-provided CPU utilization data.
3  *
4  * Copyright (C) 2016, Intel Corporation
5  * Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 
14 #include <linux/cpufreq.h>
15 #include <linux/kthread.h>
16 #include <uapi/linux/sched/types.h>
17 #include <linux/slab.h>
18 #include <trace/events/power.h>
19 
20 #include "sched.h"
21 
22 struct sugov_tunables {
23 	struct gov_attr_set attr_set;
24 	unsigned int rate_limit_us;
25 };
26 
27 struct sugov_policy {
28 	struct cpufreq_policy *policy;
29 
30 	struct sugov_tunables *tunables;
31 	struct list_head tunables_hook;
32 
33 	raw_spinlock_t update_lock;  /* For shared policies */
34 	u64 last_freq_update_time;
35 	s64 freq_update_delay_ns;
36 	unsigned int next_freq;
37 	unsigned int cached_raw_freq;
38 
39 	/* The next fields are only needed if fast switch cannot be used. */
40 	struct irq_work irq_work;
41 	struct kthread_work work;
42 	struct mutex work_lock;
43 	struct kthread_worker worker;
44 	struct task_struct *thread;
45 	bool work_in_progress;
46 
47 	bool need_freq_update;
48 };
49 
50 struct sugov_cpu {
51 	struct update_util_data update_util;
52 	struct sugov_policy *sg_policy;
53 	unsigned int cpu;
54 
55 	bool iowait_boost_pending;
56 	unsigned int iowait_boost;
57 	unsigned int iowait_boost_max;
58 	u64 last_update;
59 
60 	/* The fields below are only needed when sharing a policy. */
61 	unsigned long util_cfs;
62 	unsigned long util_dl;
63 	unsigned long max;
64 	unsigned int flags;
65 
66 	/* The field below is for single-CPU policies only. */
67 #ifdef CONFIG_NO_HZ_COMMON
68 	unsigned long saved_idle_calls;
69 #endif
70 };
71 
72 static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
73 
74 /************************ Governor internals ***********************/
75 
76 static bool sugov_should_update_freq(struct sugov_policy *sg_policy, u64 time)
77 {
78 	s64 delta_ns;
79 
80 	/*
81 	 * Since cpufreq_update_util() is called with rq->lock held for
82 	 * the @target_cpu, our per-cpu data is fully serialized.
83 	 *
84 	 * However, drivers cannot in general deal with cross-cpu
85 	 * requests, so while get_next_freq() will work, our
86 	 * sugov_update_commit() call may not for the fast switching platforms.
87 	 *
88 	 * Hence stop here for remote requests if they aren't supported
89 	 * by the hardware, as calculating the frequency is pointless if
90 	 * we cannot in fact act on it.
91 	 *
92 	 * For the slow switching platforms, the kthread is always scheduled on
93 	 * the right set of CPUs and any CPU can find the next frequency and
94 	 * schedule the kthread.
95 	 */
96 	if (sg_policy->policy->fast_switch_enabled &&
97 	    !cpufreq_can_do_remote_dvfs(sg_policy->policy))
98 		return false;
99 
100 	if (sg_policy->work_in_progress)
101 		return false;
102 
103 	if (unlikely(sg_policy->need_freq_update)) {
104 		sg_policy->need_freq_update = false;
105 		/*
106 		 * This happens when limits change, so forget the previous
107 		 * next_freq value and force an update.
108 		 */
109 		sg_policy->next_freq = UINT_MAX;
110 		return true;
111 	}
112 
113 	delta_ns = time - sg_policy->last_freq_update_time;
114 	return delta_ns >= sg_policy->freq_update_delay_ns;
115 }
116 
117 static void sugov_update_commit(struct sugov_policy *sg_policy, u64 time,
118 				unsigned int next_freq)
119 {
120 	struct cpufreq_policy *policy = sg_policy->policy;
121 
122 	if (sg_policy->next_freq == next_freq)
123 		return;
124 
125 	sg_policy->next_freq = next_freq;
126 	sg_policy->last_freq_update_time = time;
127 
128 	if (policy->fast_switch_enabled) {
129 		next_freq = cpufreq_driver_fast_switch(policy, next_freq);
130 		if (!next_freq)
131 			return;
132 
133 		policy->cur = next_freq;
134 		trace_cpu_frequency(next_freq, smp_processor_id());
135 	} else {
136 		sg_policy->work_in_progress = true;
137 		irq_work_queue(&sg_policy->irq_work);
138 	}
139 }
140 
141 /**
142  * get_next_freq - Compute a new frequency for a given cpufreq policy.
143  * @sg_policy: schedutil policy object to compute the new frequency for.
144  * @util: Current CPU utilization.
145  * @max: CPU capacity.
146  *
147  * If the utilization is frequency-invariant, choose the new frequency to be
148  * proportional to it, that is
149  *
150  * next_freq = C * max_freq * util / max
151  *
152  * Otherwise, approximate the would-be frequency-invariant utilization by
153  * util_raw * (curr_freq / max_freq) which leads to
154  *
155  * next_freq = C * curr_freq * util_raw / max
156  *
157  * Take C = 1.25 for the frequency tipping point at (util / max) = 0.8.
158  *
159  * The lowest driver-supported frequency which is equal or greater than the raw
160  * next_freq (as calculated above) is returned, subject to policy min/max and
161  * cpufreq driver limitations.
162  */
163 static unsigned int get_next_freq(struct sugov_policy *sg_policy,
164 				  unsigned long util, unsigned long max)
165 {
166 	struct cpufreq_policy *policy = sg_policy->policy;
167 	unsigned int freq = arch_scale_freq_invariant() ?
168 				policy->cpuinfo.max_freq : policy->cur;
169 
170 	freq = (freq + (freq >> 2)) * util / max;
171 
172 	if (freq == sg_policy->cached_raw_freq && sg_policy->next_freq != UINT_MAX)
173 		return sg_policy->next_freq;
174 	sg_policy->cached_raw_freq = freq;
175 	return cpufreq_driver_resolve_freq(policy, freq);
176 }
177 
178 static void sugov_get_util(struct sugov_cpu *sg_cpu)
179 {
180 	struct rq *rq = cpu_rq(sg_cpu->cpu);
181 
182 	sg_cpu->max = arch_scale_cpu_capacity(NULL, sg_cpu->cpu);
183 	sg_cpu->util_cfs = cpu_util_cfs(rq);
184 	sg_cpu->util_dl  = cpu_util_dl(rq);
185 }
186 
187 static unsigned long sugov_aggregate_util(struct sugov_cpu *sg_cpu)
188 {
189 	/*
190 	 * Ideally we would like to set util_dl as min/guaranteed freq and
191 	 * util_cfs + util_dl as requested freq. However, cpufreq is not yet
192 	 * ready for such an interface. So, we only do the latter for now.
193 	 */
194 	return min(sg_cpu->util_cfs + sg_cpu->util_dl, sg_cpu->max);
195 }
196 
197 static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time)
198 {
199 	if (sg_cpu->flags & SCHED_CPUFREQ_IOWAIT) {
200 		if (sg_cpu->iowait_boost_pending)
201 			return;
202 
203 		sg_cpu->iowait_boost_pending = true;
204 
205 		if (sg_cpu->iowait_boost) {
206 			sg_cpu->iowait_boost <<= 1;
207 			if (sg_cpu->iowait_boost > sg_cpu->iowait_boost_max)
208 				sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
209 		} else {
210 			sg_cpu->iowait_boost = sg_cpu->sg_policy->policy->min;
211 		}
212 	} else if (sg_cpu->iowait_boost) {
213 		s64 delta_ns = time - sg_cpu->last_update;
214 
215 		/* Clear iowait_boost if the CPU apprears to have been idle. */
216 		if (delta_ns > TICK_NSEC) {
217 			sg_cpu->iowait_boost = 0;
218 			sg_cpu->iowait_boost_pending = false;
219 		}
220 	}
221 }
222 
223 static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
224 			       unsigned long *max)
225 {
226 	unsigned int boost_util, boost_max;
227 
228 	if (!sg_cpu->iowait_boost)
229 		return;
230 
231 	if (sg_cpu->iowait_boost_pending) {
232 		sg_cpu->iowait_boost_pending = false;
233 	} else {
234 		sg_cpu->iowait_boost >>= 1;
235 		if (sg_cpu->iowait_boost < sg_cpu->sg_policy->policy->min) {
236 			sg_cpu->iowait_boost = 0;
237 			return;
238 		}
239 	}
240 
241 	boost_util = sg_cpu->iowait_boost;
242 	boost_max = sg_cpu->iowait_boost_max;
243 
244 	if (*util * boost_max < *max * boost_util) {
245 		*util = boost_util;
246 		*max = boost_max;
247 	}
248 }
249 
250 #ifdef CONFIG_NO_HZ_COMMON
251 static bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu)
252 {
253 	unsigned long idle_calls = tick_nohz_get_idle_calls_cpu(sg_cpu->cpu);
254 	bool ret = idle_calls == sg_cpu->saved_idle_calls;
255 
256 	sg_cpu->saved_idle_calls = idle_calls;
257 	return ret;
258 }
259 #else
260 static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
261 #endif /* CONFIG_NO_HZ_COMMON */
262 
263 static void sugov_update_single(struct update_util_data *hook, u64 time,
264 				unsigned int flags)
265 {
266 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
267 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
268 	struct cpufreq_policy *policy = sg_policy->policy;
269 	unsigned long util, max;
270 	unsigned int next_f;
271 	bool busy;
272 
273 	sugov_set_iowait_boost(sg_cpu, time);
274 	sg_cpu->last_update = time;
275 
276 	if (!sugov_should_update_freq(sg_policy, time))
277 		return;
278 
279 	busy = sugov_cpu_is_busy(sg_cpu);
280 
281 	if (flags & SCHED_CPUFREQ_RT) {
282 		next_f = policy->cpuinfo.max_freq;
283 	} else {
284 		sugov_get_util(sg_cpu);
285 		max = sg_cpu->max;
286 		util = sugov_aggregate_util(sg_cpu);
287 		sugov_iowait_boost(sg_cpu, &util, &max);
288 		next_f = get_next_freq(sg_policy, util, max);
289 		/*
290 		 * Do not reduce the frequency if the CPU has not been idle
291 		 * recently, as the reduction is likely to be premature then.
292 		 */
293 		if (busy && next_f < sg_policy->next_freq) {
294 			next_f = sg_policy->next_freq;
295 
296 			/* Reset cached freq as next_freq has changed */
297 			sg_policy->cached_raw_freq = 0;
298 		}
299 	}
300 	sugov_update_commit(sg_policy, time, next_f);
301 }
302 
303 static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, u64 time)
304 {
305 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
306 	struct cpufreq_policy *policy = sg_policy->policy;
307 	unsigned long util = 0, max = 1;
308 	unsigned int j;
309 
310 	for_each_cpu(j, policy->cpus) {
311 		struct sugov_cpu *j_sg_cpu = &per_cpu(sugov_cpu, j);
312 		unsigned long j_util, j_max;
313 		s64 delta_ns;
314 
315 		/*
316 		 * If the CFS CPU utilization was last updated before the
317 		 * previous frequency update and the time elapsed between the
318 		 * last update of the CPU utilization and the last frequency
319 		 * update is long enough, reset iowait_boost and util_cfs, as
320 		 * they are now probably stale. However, still consider the
321 		 * CPU contribution if it has some DEADLINE utilization
322 		 * (util_dl).
323 		 */
324 		delta_ns = time - j_sg_cpu->last_update;
325 		if (delta_ns > TICK_NSEC) {
326 			j_sg_cpu->iowait_boost = 0;
327 			j_sg_cpu->iowait_boost_pending = false;
328 			j_sg_cpu->util_cfs = 0;
329 			if (j_sg_cpu->util_dl == 0)
330 				continue;
331 		}
332 		if (j_sg_cpu->flags & SCHED_CPUFREQ_RT)
333 			return policy->cpuinfo.max_freq;
334 
335 		j_max = j_sg_cpu->max;
336 		j_util = sugov_aggregate_util(j_sg_cpu);
337 		if (j_util * max > j_max * util) {
338 			util = j_util;
339 			max = j_max;
340 		}
341 
342 		sugov_iowait_boost(j_sg_cpu, &util, &max);
343 	}
344 
345 	return get_next_freq(sg_policy, util, max);
346 }
347 
348 static void sugov_update_shared(struct update_util_data *hook, u64 time,
349 				unsigned int flags)
350 {
351 	struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
352 	struct sugov_policy *sg_policy = sg_cpu->sg_policy;
353 	unsigned int next_f;
354 
355 	raw_spin_lock(&sg_policy->update_lock);
356 
357 	sugov_get_util(sg_cpu);
358 	sg_cpu->flags = flags;
359 
360 	sugov_set_iowait_boost(sg_cpu, time);
361 	sg_cpu->last_update = time;
362 
363 	if (sugov_should_update_freq(sg_policy, time)) {
364 		if (flags & SCHED_CPUFREQ_RT)
365 			next_f = sg_policy->policy->cpuinfo.max_freq;
366 		else
367 			next_f = sugov_next_freq_shared(sg_cpu, time);
368 
369 		sugov_update_commit(sg_policy, time, next_f);
370 	}
371 
372 	raw_spin_unlock(&sg_policy->update_lock);
373 }
374 
375 static void sugov_work(struct kthread_work *work)
376 {
377 	struct sugov_policy *sg_policy = container_of(work, struct sugov_policy, work);
378 
379 	mutex_lock(&sg_policy->work_lock);
380 	__cpufreq_driver_target(sg_policy->policy, sg_policy->next_freq,
381 				CPUFREQ_RELATION_L);
382 	mutex_unlock(&sg_policy->work_lock);
383 
384 	sg_policy->work_in_progress = false;
385 }
386 
387 static void sugov_irq_work(struct irq_work *irq_work)
388 {
389 	struct sugov_policy *sg_policy;
390 
391 	sg_policy = container_of(irq_work, struct sugov_policy, irq_work);
392 
393 	/*
394 	 * For RT tasks, the schedutil governor shoots the frequency to maximum.
395 	 * Special care must be taken to ensure that this kthread doesn't result
396 	 * in the same behavior.
397 	 *
398 	 * This is (mostly) guaranteed by the work_in_progress flag. The flag is
399 	 * updated only at the end of the sugov_work() function and before that
400 	 * the schedutil governor rejects all other frequency scaling requests.
401 	 *
402 	 * There is a very rare case though, where the RT thread yields right
403 	 * after the work_in_progress flag is cleared. The effects of that are
404 	 * neglected for now.
405 	 */
406 	kthread_queue_work(&sg_policy->worker, &sg_policy->work);
407 }
408 
409 /************************** sysfs interface ************************/
410 
411 static struct sugov_tunables *global_tunables;
412 static DEFINE_MUTEX(global_tunables_lock);
413 
414 static inline struct sugov_tunables *to_sugov_tunables(struct gov_attr_set *attr_set)
415 {
416 	return container_of(attr_set, struct sugov_tunables, attr_set);
417 }
418 
419 static ssize_t rate_limit_us_show(struct gov_attr_set *attr_set, char *buf)
420 {
421 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
422 
423 	return sprintf(buf, "%u\n", tunables->rate_limit_us);
424 }
425 
426 static ssize_t rate_limit_us_store(struct gov_attr_set *attr_set, const char *buf,
427 				   size_t count)
428 {
429 	struct sugov_tunables *tunables = to_sugov_tunables(attr_set);
430 	struct sugov_policy *sg_policy;
431 	unsigned int rate_limit_us;
432 
433 	if (kstrtouint(buf, 10, &rate_limit_us))
434 		return -EINVAL;
435 
436 	tunables->rate_limit_us = rate_limit_us;
437 
438 	list_for_each_entry(sg_policy, &attr_set->policy_list, tunables_hook)
439 		sg_policy->freq_update_delay_ns = rate_limit_us * NSEC_PER_USEC;
440 
441 	return count;
442 }
443 
444 static struct governor_attr rate_limit_us = __ATTR_RW(rate_limit_us);
445 
446 static struct attribute *sugov_attributes[] = {
447 	&rate_limit_us.attr,
448 	NULL
449 };
450 
451 static struct kobj_type sugov_tunables_ktype = {
452 	.default_attrs = sugov_attributes,
453 	.sysfs_ops = &governor_sysfs_ops,
454 };
455 
456 /********************** cpufreq governor interface *********************/
457 
458 static struct cpufreq_governor schedutil_gov;
459 
460 static struct sugov_policy *sugov_policy_alloc(struct cpufreq_policy *policy)
461 {
462 	struct sugov_policy *sg_policy;
463 
464 	sg_policy = kzalloc(sizeof(*sg_policy), GFP_KERNEL);
465 	if (!sg_policy)
466 		return NULL;
467 
468 	sg_policy->policy = policy;
469 	raw_spin_lock_init(&sg_policy->update_lock);
470 	return sg_policy;
471 }
472 
473 static void sugov_policy_free(struct sugov_policy *sg_policy)
474 {
475 	kfree(sg_policy);
476 }
477 
478 static int sugov_kthread_create(struct sugov_policy *sg_policy)
479 {
480 	struct task_struct *thread;
481 	struct sched_attr attr = {
482 		.size = sizeof(struct sched_attr),
483 		.sched_policy = SCHED_DEADLINE,
484 		.sched_flags = SCHED_FLAG_SUGOV,
485 		.sched_nice = 0,
486 		.sched_priority = 0,
487 		/*
488 		 * Fake (unused) bandwidth; workaround to "fix"
489 		 * priority inheritance.
490 		 */
491 		.sched_runtime	=  1000000,
492 		.sched_deadline = 10000000,
493 		.sched_period	= 10000000,
494 	};
495 	struct cpufreq_policy *policy = sg_policy->policy;
496 	int ret;
497 
498 	/* kthread only required for slow path */
499 	if (policy->fast_switch_enabled)
500 		return 0;
501 
502 	kthread_init_work(&sg_policy->work, sugov_work);
503 	kthread_init_worker(&sg_policy->worker);
504 	thread = kthread_create(kthread_worker_fn, &sg_policy->worker,
505 				"sugov:%d",
506 				cpumask_first(policy->related_cpus));
507 	if (IS_ERR(thread)) {
508 		pr_err("failed to create sugov thread: %ld\n", PTR_ERR(thread));
509 		return PTR_ERR(thread);
510 	}
511 
512 	ret = sched_setattr_nocheck(thread, &attr);
513 	if (ret) {
514 		kthread_stop(thread);
515 		pr_warn("%s: failed to set SCHED_DEADLINE\n", __func__);
516 		return ret;
517 	}
518 
519 	sg_policy->thread = thread;
520 
521 	/* Kthread is bound to all CPUs by default */
522 	if (!policy->dvfs_possible_from_any_cpu)
523 		kthread_bind_mask(thread, policy->related_cpus);
524 
525 	init_irq_work(&sg_policy->irq_work, sugov_irq_work);
526 	mutex_init(&sg_policy->work_lock);
527 
528 	wake_up_process(thread);
529 
530 	return 0;
531 }
532 
533 static void sugov_kthread_stop(struct sugov_policy *sg_policy)
534 {
535 	/* kthread only required for slow path */
536 	if (sg_policy->policy->fast_switch_enabled)
537 		return;
538 
539 	kthread_flush_worker(&sg_policy->worker);
540 	kthread_stop(sg_policy->thread);
541 	mutex_destroy(&sg_policy->work_lock);
542 }
543 
544 static struct sugov_tunables *sugov_tunables_alloc(struct sugov_policy *sg_policy)
545 {
546 	struct sugov_tunables *tunables;
547 
548 	tunables = kzalloc(sizeof(*tunables), GFP_KERNEL);
549 	if (tunables) {
550 		gov_attr_set_init(&tunables->attr_set, &sg_policy->tunables_hook);
551 		if (!have_governor_per_policy())
552 			global_tunables = tunables;
553 	}
554 	return tunables;
555 }
556 
557 static void sugov_tunables_free(struct sugov_tunables *tunables)
558 {
559 	if (!have_governor_per_policy())
560 		global_tunables = NULL;
561 
562 	kfree(tunables);
563 }
564 
565 static int sugov_init(struct cpufreq_policy *policy)
566 {
567 	struct sugov_policy *sg_policy;
568 	struct sugov_tunables *tunables;
569 	int ret = 0;
570 
571 	/* State should be equivalent to EXIT */
572 	if (policy->governor_data)
573 		return -EBUSY;
574 
575 	cpufreq_enable_fast_switch(policy);
576 
577 	sg_policy = sugov_policy_alloc(policy);
578 	if (!sg_policy) {
579 		ret = -ENOMEM;
580 		goto disable_fast_switch;
581 	}
582 
583 	ret = sugov_kthread_create(sg_policy);
584 	if (ret)
585 		goto free_sg_policy;
586 
587 	mutex_lock(&global_tunables_lock);
588 
589 	if (global_tunables) {
590 		if (WARN_ON(have_governor_per_policy())) {
591 			ret = -EINVAL;
592 			goto stop_kthread;
593 		}
594 		policy->governor_data = sg_policy;
595 		sg_policy->tunables = global_tunables;
596 
597 		gov_attr_set_get(&global_tunables->attr_set, &sg_policy->tunables_hook);
598 		goto out;
599 	}
600 
601 	tunables = sugov_tunables_alloc(sg_policy);
602 	if (!tunables) {
603 		ret = -ENOMEM;
604 		goto stop_kthread;
605 	}
606 
607 	tunables->rate_limit_us = cpufreq_policy_transition_delay_us(policy);
608 
609 	policy->governor_data = sg_policy;
610 	sg_policy->tunables = tunables;
611 
612 	ret = kobject_init_and_add(&tunables->attr_set.kobj, &sugov_tunables_ktype,
613 				   get_governor_parent_kobj(policy), "%s",
614 				   schedutil_gov.name);
615 	if (ret)
616 		goto fail;
617 
618 out:
619 	mutex_unlock(&global_tunables_lock);
620 	return 0;
621 
622 fail:
623 	policy->governor_data = NULL;
624 	sugov_tunables_free(tunables);
625 
626 stop_kthread:
627 	sugov_kthread_stop(sg_policy);
628 
629 free_sg_policy:
630 	mutex_unlock(&global_tunables_lock);
631 
632 	sugov_policy_free(sg_policy);
633 
634 disable_fast_switch:
635 	cpufreq_disable_fast_switch(policy);
636 
637 	pr_err("initialization failed (error %d)\n", ret);
638 	return ret;
639 }
640 
641 static void sugov_exit(struct cpufreq_policy *policy)
642 {
643 	struct sugov_policy *sg_policy = policy->governor_data;
644 	struct sugov_tunables *tunables = sg_policy->tunables;
645 	unsigned int count;
646 
647 	mutex_lock(&global_tunables_lock);
648 
649 	count = gov_attr_set_put(&tunables->attr_set, &sg_policy->tunables_hook);
650 	policy->governor_data = NULL;
651 	if (!count)
652 		sugov_tunables_free(tunables);
653 
654 	mutex_unlock(&global_tunables_lock);
655 
656 	sugov_kthread_stop(sg_policy);
657 	sugov_policy_free(sg_policy);
658 	cpufreq_disable_fast_switch(policy);
659 }
660 
661 static int sugov_start(struct cpufreq_policy *policy)
662 {
663 	struct sugov_policy *sg_policy = policy->governor_data;
664 	unsigned int cpu;
665 
666 	sg_policy->freq_update_delay_ns = sg_policy->tunables->rate_limit_us * NSEC_PER_USEC;
667 	sg_policy->last_freq_update_time = 0;
668 	sg_policy->next_freq = UINT_MAX;
669 	sg_policy->work_in_progress = false;
670 	sg_policy->need_freq_update = false;
671 	sg_policy->cached_raw_freq = 0;
672 
673 	for_each_cpu(cpu, policy->cpus) {
674 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
675 
676 		memset(sg_cpu, 0, sizeof(*sg_cpu));
677 		sg_cpu->cpu = cpu;
678 		sg_cpu->sg_policy = sg_policy;
679 		sg_cpu->flags = 0;
680 		sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
681 	}
682 
683 	for_each_cpu(cpu, policy->cpus) {
684 		struct sugov_cpu *sg_cpu = &per_cpu(sugov_cpu, cpu);
685 
686 		cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
687 					     policy_is_shared(policy) ?
688 							sugov_update_shared :
689 							sugov_update_single);
690 	}
691 	return 0;
692 }
693 
694 static void sugov_stop(struct cpufreq_policy *policy)
695 {
696 	struct sugov_policy *sg_policy = policy->governor_data;
697 	unsigned int cpu;
698 
699 	for_each_cpu(cpu, policy->cpus)
700 		cpufreq_remove_update_util_hook(cpu);
701 
702 	synchronize_sched();
703 
704 	if (!policy->fast_switch_enabled) {
705 		irq_work_sync(&sg_policy->irq_work);
706 		kthread_cancel_work_sync(&sg_policy->work);
707 	}
708 }
709 
710 static void sugov_limits(struct cpufreq_policy *policy)
711 {
712 	struct sugov_policy *sg_policy = policy->governor_data;
713 
714 	if (!policy->fast_switch_enabled) {
715 		mutex_lock(&sg_policy->work_lock);
716 		cpufreq_policy_apply_limits(policy);
717 		mutex_unlock(&sg_policy->work_lock);
718 	}
719 
720 	sg_policy->need_freq_update = true;
721 }
722 
723 static struct cpufreq_governor schedutil_gov = {
724 	.name = "schedutil",
725 	.owner = THIS_MODULE,
726 	.dynamic_switching = true,
727 	.init = sugov_init,
728 	.exit = sugov_exit,
729 	.start = sugov_start,
730 	.stop = sugov_stop,
731 	.limits = sugov_limits,
732 };
733 
734 #ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
735 struct cpufreq_governor *cpufreq_default_governor(void)
736 {
737 	return &schedutil_gov;
738 }
739 #endif
740 
741 static int __init sugov_register(void)
742 {
743 	return cpufreq_register_governor(&schedutil_gov);
744 }
745 fs_initcall(sugov_register);
746