xref: /openbmc/linux/arch/x86/kernel/itmt.c (revision 5b394b2d)
1 /*
2  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
3  *
4  * (C) Copyright 2016 Intel Corporation
5  * Author: Tim Chen <tim.c.chen@linux.intel.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU General Public License
9  * as published by the Free Software Foundation; version 2
10  * of the License.
11  *
12  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
13  * the maximum turbo frequencies of some cores in a CPU package may be
14  * higher than for the other cores in the same package.  In that case,
15  * better performance can be achieved by making the scheduler prefer
16  * to run tasks on the CPUs with higher max turbo frequencies.
17  *
18  * This file provides functions and data structures for enabling the
19  * scheduler to favor scheduling on cores can be boosted to a higher
20  * frequency under ITMT.
21  */
22 
23 #include <linux/sched.h>
24 #include <linux/cpumask.h>
25 #include <linux/cpuset.h>
26 #include <linux/mutex.h>
27 #include <linux/sysctl.h>
28 #include <linux/nodemask.h>
29 
30 static DEFINE_MUTEX(itmt_update_mutex);
31 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
32 
33 /* Boolean to track if system has ITMT capabilities */
34 static bool __read_mostly sched_itmt_capable;
35 
36 /*
37  * Boolean to control whether we want to move processes to cpu capable
38  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
39  * Technology 3.0.
40  *
41  * It can be set via /proc/sys/kernel/sched_itmt_enabled
42  */
43 unsigned int __read_mostly sysctl_sched_itmt_enabled;
44 
45 static int sched_itmt_update_handler(struct ctl_table *table, int write,
46 				     void __user *buffer, size_t *lenp,
47 				     loff_t *ppos)
48 {
49 	unsigned int old_sysctl;
50 	int ret;
51 
52 	mutex_lock(&itmt_update_mutex);
53 
54 	if (!sched_itmt_capable) {
55 		mutex_unlock(&itmt_update_mutex);
56 		return -EINVAL;
57 	}
58 
59 	old_sysctl = sysctl_sched_itmt_enabled;
60 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
61 
62 	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
63 		x86_topology_update = true;
64 		rebuild_sched_domains();
65 	}
66 
67 	mutex_unlock(&itmt_update_mutex);
68 
69 	return ret;
70 }
71 
72 static unsigned int zero;
73 static unsigned int one = 1;
74 static struct ctl_table itmt_kern_table[] = {
75 	{
76 		.procname	= "sched_itmt_enabled",
77 		.data		= &sysctl_sched_itmt_enabled,
78 		.maxlen		= sizeof(unsigned int),
79 		.mode		= 0644,
80 		.proc_handler	= sched_itmt_update_handler,
81 		.extra1		= &zero,
82 		.extra2		= &one,
83 	},
84 	{}
85 };
86 
87 static struct ctl_table itmt_root_table[] = {
88 	{
89 		.procname	= "kernel",
90 		.mode		= 0555,
91 		.child		= itmt_kern_table,
92 	},
93 	{}
94 };
95 
96 static struct ctl_table_header *itmt_sysctl_header;
97 
98 /**
99  * sched_set_itmt_support() - Indicate platform supports ITMT
100  *
101  * This function is used by the OS to indicate to scheduler that the platform
102  * is capable of supporting the ITMT feature.
103  *
104  * The current scheme has the pstate driver detects if the system
105  * is ITMT capable and call sched_set_itmt_support.
106  *
107  * This must be done only after sched_set_itmt_core_prio
108  * has been called to set the cpus' priorities.
109  * It must not be called with cpu hot plug lock
110  * held as we need to acquire the lock to rebuild sched domains
111  * later.
112  *
113  * Return: 0 on success
114  */
115 int sched_set_itmt_support(void)
116 {
117 	mutex_lock(&itmt_update_mutex);
118 
119 	if (sched_itmt_capable) {
120 		mutex_unlock(&itmt_update_mutex);
121 		return 0;
122 	}
123 
124 	itmt_sysctl_header = register_sysctl_table(itmt_root_table);
125 	if (!itmt_sysctl_header) {
126 		mutex_unlock(&itmt_update_mutex);
127 		return -ENOMEM;
128 	}
129 
130 	sched_itmt_capable = true;
131 
132 	sysctl_sched_itmt_enabled = 1;
133 
134 	x86_topology_update = true;
135 	rebuild_sched_domains();
136 
137 	mutex_unlock(&itmt_update_mutex);
138 
139 	return 0;
140 }
141 
142 /**
143  * sched_clear_itmt_support() - Revoke platform's support of ITMT
144  *
145  * This function is used by the OS to indicate that it has
146  * revoked the platform's support of ITMT feature.
147  *
148  * It must not be called with cpu hot plug lock
149  * held as we need to acquire the lock to rebuild sched domains
150  * later.
151  */
152 void sched_clear_itmt_support(void)
153 {
154 	mutex_lock(&itmt_update_mutex);
155 
156 	if (!sched_itmt_capable) {
157 		mutex_unlock(&itmt_update_mutex);
158 		return;
159 	}
160 	sched_itmt_capable = false;
161 
162 	if (itmt_sysctl_header) {
163 		unregister_sysctl_table(itmt_sysctl_header);
164 		itmt_sysctl_header = NULL;
165 	}
166 
167 	if (sysctl_sched_itmt_enabled) {
168 		/* disable sched_itmt if we are no longer ITMT capable */
169 		sysctl_sched_itmt_enabled = 0;
170 		x86_topology_update = true;
171 		rebuild_sched_domains();
172 	}
173 
174 	mutex_unlock(&itmt_update_mutex);
175 }
176 
177 int arch_asym_cpu_priority(int cpu)
178 {
179 	return per_cpu(sched_core_priority, cpu);
180 }
181 
182 /**
183  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
184  * @prio:	Priority of cpu core
185  * @core_cpu:	The cpu number associated with the core
186  *
187  * The pstate driver will find out the max boost frequency
188  * and call this function to set a priority proportional
189  * to the max boost frequency. CPU with higher boost
190  * frequency will receive higher priority.
191  *
192  * No need to rebuild sched domain after updating
193  * the CPU priorities. The sched domains have no
194  * dependency on CPU priorities.
195  */
196 void sched_set_itmt_core_prio(int prio, int core_cpu)
197 {
198 	int cpu, i = 1;
199 
200 	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
201 		int smt_prio;
202 
203 		/*
204 		 * Ensure that the siblings are moved to the end
205 		 * of the priority chain and only used when
206 		 * all other high priority cpus are out of capacity.
207 		 */
208 		smt_prio = prio * smp_num_siblings / i;
209 		per_cpu(sched_core_priority, cpu) = smt_prio;
210 		i++;
211 	}
212 }
213