xref: /openbmc/linux/arch/x86/kernel/itmt.c (revision f9793e34)
1 /*
2  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
3  *
4  * (C) Copyright 2016 Intel Corporation
5  * Author: Tim Chen <tim.c.chen@linux.intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  *
12  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
13  * the maximum turbo frequencies of some cores in a CPU package may be
14  * higher than for the other cores in the same package.  In that case,
15  * better performance can be achieved by making the scheduler prefer
16  * to run tasks on the CPUs with higher max turbo frequencies.
17  *
18  * This file provides functions and data structures for enabling the
19  * scheduler to favor scheduling on cores can be boosted to a higher
20  * frequency under ITMT.
21  */
22 
23 #include <linux/sched.h>
24 #include <linux/cpumask.h>
25 #include <linux/cpuset.h>
26 #include <asm/mutex.h>
27 #include <linux/sched.h>
28 #include <linux/sysctl.h>
29 #include <linux/nodemask.h>
30 
31 static DEFINE_MUTEX(itmt_update_mutex);
32 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
33 
34 /* Boolean to track if system has ITMT capabilities */
35 static bool __read_mostly sched_itmt_capable;
36 
37 /*
38  * Boolean to control whether we want to move processes to cpu capable
39  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
40  * Technology 3.0.
41  *
42  * It can be set via /proc/sys/kernel/sched_itmt_enabled
43  */
44 unsigned int __read_mostly sysctl_sched_itmt_enabled;
45 
46 static int sched_itmt_update_handler(struct ctl_table *table, int write,
47 				     void __user *buffer, size_t *lenp,
48 				     loff_t *ppos)
49 {
50 	unsigned int old_sysctl;
51 	int ret;
52 
53 	mutex_lock(&itmt_update_mutex);
54 
55 	if (!sched_itmt_capable) {
56 		mutex_unlock(&itmt_update_mutex);
57 		return -EINVAL;
58 	}
59 
60 	old_sysctl = sysctl_sched_itmt_enabled;
61 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
62 
63 	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
64 		x86_topology_update = true;
65 		rebuild_sched_domains();
66 	}
67 
68 	mutex_unlock(&itmt_update_mutex);
69 
70 	return ret;
71 }
72 
73 static unsigned int zero;
74 static unsigned int one = 1;
75 static struct ctl_table itmt_kern_table[] = {
76 	{
77 		.procname	= "sched_itmt_enabled",
78 		.data		= &sysctl_sched_itmt_enabled,
79 		.maxlen		= sizeof(unsigned int),
80 		.mode		= 0644,
81 		.proc_handler	= sched_itmt_update_handler,
82 		.extra1		= &zero,
83 		.extra2		= &one,
84 	},
85 	{}
86 };
87 
88 static struct ctl_table itmt_root_table[] = {
89 	{
90 		.procname	= "kernel",
91 		.mode		= 0555,
92 		.child		= itmt_kern_table,
93 	},
94 	{}
95 };
96 
97 static struct ctl_table_header *itmt_sysctl_header;
98 
99 /**
100  * sched_set_itmt_support() - Indicate platform supports ITMT
101  *
102  * This function is used by the OS to indicate to scheduler that the platform
103  * is capable of supporting the ITMT feature.
104  *
105  * The current scheme has the pstate driver detects if the system
106  * is ITMT capable and call sched_set_itmt_support.
107  *
108  * This must be done only after sched_set_itmt_core_prio
109  * has been called to set the cpus' priorities.
110  * It must not be called with cpu hot plug lock
111  * held as we need to acquire the lock to rebuild sched domains
112  * later.
113  *
114  * Return: 0 on success
115  */
116 int sched_set_itmt_support(void)
117 {
118 	mutex_lock(&itmt_update_mutex);
119 
120 	if (sched_itmt_capable) {
121 		mutex_unlock(&itmt_update_mutex);
122 		return 0;
123 	}
124 
125 	itmt_sysctl_header = register_sysctl_table(itmt_root_table);
126 	if (!itmt_sysctl_header) {
127 		mutex_unlock(&itmt_update_mutex);
128 		return -ENOMEM;
129 	}
130 
131 	sched_itmt_capable = true;
132 
133 	sysctl_sched_itmt_enabled = 1;
134 
135 	if (sysctl_sched_itmt_enabled) {
136 		x86_topology_update = true;
137 		rebuild_sched_domains();
138 	}
139 
140 	mutex_unlock(&itmt_update_mutex);
141 
142 	return 0;
143 }
144 
145 /**
146  * sched_clear_itmt_support() - Revoke platform's support of ITMT
147  *
148  * This function is used by the OS to indicate that it has
149  * revoked the platform's support of ITMT feature.
150  *
151  * It must not be called with cpu hot plug lock
152  * held as we need to acquire the lock to rebuild sched domains
153  * later.
154  */
155 void sched_clear_itmt_support(void)
156 {
157 	mutex_lock(&itmt_update_mutex);
158 
159 	if (!sched_itmt_capable) {
160 		mutex_unlock(&itmt_update_mutex);
161 		return;
162 	}
163 	sched_itmt_capable = false;
164 
165 	if (itmt_sysctl_header) {
166 		unregister_sysctl_table(itmt_sysctl_header);
167 		itmt_sysctl_header = NULL;
168 	}
169 
170 	if (sysctl_sched_itmt_enabled) {
171 		/* disable sched_itmt if we are no longer ITMT capable */
172 		sysctl_sched_itmt_enabled = 0;
173 		x86_topology_update = true;
174 		rebuild_sched_domains();
175 	}
176 
177 	mutex_unlock(&itmt_update_mutex);
178 }
179 
180 int arch_asym_cpu_priority(int cpu)
181 {
182 	return per_cpu(sched_core_priority, cpu);
183 }
184 
185 /**
186  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
187  * @prio:	Priority of cpu core
188  * @core_cpu:	The cpu number associated with the core
189  *
190  * The pstate driver will find out the max boost frequency
191  * and call this function to set a priority proportional
192  * to the max boost frequency. CPU with higher boost
193  * frequency will receive higher priority.
194  *
195  * No need to rebuild sched domain after updating
196  * the CPU priorities. The sched domains have no
197  * dependency on CPU priorities.
198  */
199 void sched_set_itmt_core_prio(int prio, int core_cpu)
200 {
201 	int cpu, i = 1;
202 
203 	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
204 		int smt_prio;
205 
206 		/*
207 		 * Ensure that the siblings are moved to the end
208 		 * of the priority chain and only used when
209 		 * all other high priority cpus are out of capacity.
210 		 */
211 		smt_prio = prio * smp_num_siblings / i;
212 		per_cpu(sched_core_priority, cpu) = smt_prio;
213 		i++;
214 	}
215 }
216