xref: /openbmc/linux/arch/x86/kernel/itmt.c (revision 4e95bc26)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
4  *
5  * (C) Copyright 2016 Intel Corporation
6  * Author: Tim Chen <tim.c.chen@linux.intel.com>
7  *
8  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
9  * the maximum turbo frequencies of some cores in a CPU package may be
10  * higher than for the other cores in the same package.  In that case,
11  * better performance can be achieved by making the scheduler prefer
12  * to run tasks on the CPUs with higher max turbo frequencies.
13  *
14  * This file provides functions and data structures for enabling the
15  * scheduler to favor scheduling on cores can be boosted to a higher
16  * frequency under ITMT.
17  */
18 
19 #include <linux/sched.h>
20 #include <linux/cpumask.h>
21 #include <linux/cpuset.h>
22 #include <linux/mutex.h>
23 #include <linux/sysctl.h>
24 #include <linux/nodemask.h>
25 
26 static DEFINE_MUTEX(itmt_update_mutex);
27 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
28 
29 /* Boolean to track if system has ITMT capabilities */
30 static bool __read_mostly sched_itmt_capable;
31 
32 /*
33  * Boolean to control whether we want to move processes to cpu capable
34  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35  * Technology 3.0.
36  *
37  * It can be set via /proc/sys/kernel/sched_itmt_enabled
38  */
39 unsigned int __read_mostly sysctl_sched_itmt_enabled;
40 
41 static int sched_itmt_update_handler(struct ctl_table *table, int write,
42 				     void __user *buffer, size_t *lenp,
43 				     loff_t *ppos)
44 {
45 	unsigned int old_sysctl;
46 	int ret;
47 
48 	mutex_lock(&itmt_update_mutex);
49 
50 	if (!sched_itmt_capable) {
51 		mutex_unlock(&itmt_update_mutex);
52 		return -EINVAL;
53 	}
54 
55 	old_sysctl = sysctl_sched_itmt_enabled;
56 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
57 
58 	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
59 		x86_topology_update = true;
60 		rebuild_sched_domains();
61 	}
62 
63 	mutex_unlock(&itmt_update_mutex);
64 
65 	return ret;
66 }
67 
68 static unsigned int zero;
69 static unsigned int one = 1;
70 static struct ctl_table itmt_kern_table[] = {
71 	{
72 		.procname	= "sched_itmt_enabled",
73 		.data		= &sysctl_sched_itmt_enabled,
74 		.maxlen		= sizeof(unsigned int),
75 		.mode		= 0644,
76 		.proc_handler	= sched_itmt_update_handler,
77 		.extra1		= &zero,
78 		.extra2		= &one,
79 	},
80 	{}
81 };
82 
83 static struct ctl_table itmt_root_table[] = {
84 	{
85 		.procname	= "kernel",
86 		.mode		= 0555,
87 		.child		= itmt_kern_table,
88 	},
89 	{}
90 };
91 
92 static struct ctl_table_header *itmt_sysctl_header;
93 
94 /**
95  * sched_set_itmt_support() - Indicate platform supports ITMT
96  *
97  * This function is used by the OS to indicate to scheduler that the platform
98  * is capable of supporting the ITMT feature.
99  *
100  * The current scheme has the pstate driver detects if the system
101  * is ITMT capable and call sched_set_itmt_support.
102  *
103  * This must be done only after sched_set_itmt_core_prio
104  * has been called to set the cpus' priorities.
105  * It must not be called with cpu hot plug lock
106  * held as we need to acquire the lock to rebuild sched domains
107  * later.
108  *
109  * Return: 0 on success
110  */
111 int sched_set_itmt_support(void)
112 {
113 	mutex_lock(&itmt_update_mutex);
114 
115 	if (sched_itmt_capable) {
116 		mutex_unlock(&itmt_update_mutex);
117 		return 0;
118 	}
119 
120 	itmt_sysctl_header = register_sysctl_table(itmt_root_table);
121 	if (!itmt_sysctl_header) {
122 		mutex_unlock(&itmt_update_mutex);
123 		return -ENOMEM;
124 	}
125 
126 	sched_itmt_capable = true;
127 
128 	sysctl_sched_itmt_enabled = 1;
129 
130 	x86_topology_update = true;
131 	rebuild_sched_domains();
132 
133 	mutex_unlock(&itmt_update_mutex);
134 
135 	return 0;
136 }
137 
138 /**
139  * sched_clear_itmt_support() - Revoke platform's support of ITMT
140  *
141  * This function is used by the OS to indicate that it has
142  * revoked the platform's support of ITMT feature.
143  *
144  * It must not be called with cpu hot plug lock
145  * held as we need to acquire the lock to rebuild sched domains
146  * later.
147  */
148 void sched_clear_itmt_support(void)
149 {
150 	mutex_lock(&itmt_update_mutex);
151 
152 	if (!sched_itmt_capable) {
153 		mutex_unlock(&itmt_update_mutex);
154 		return;
155 	}
156 	sched_itmt_capable = false;
157 
158 	if (itmt_sysctl_header) {
159 		unregister_sysctl_table(itmt_sysctl_header);
160 		itmt_sysctl_header = NULL;
161 	}
162 
163 	if (sysctl_sched_itmt_enabled) {
164 		/* disable sched_itmt if we are no longer ITMT capable */
165 		sysctl_sched_itmt_enabled = 0;
166 		x86_topology_update = true;
167 		rebuild_sched_domains();
168 	}
169 
170 	mutex_unlock(&itmt_update_mutex);
171 }
172 
173 int arch_asym_cpu_priority(int cpu)
174 {
175 	return per_cpu(sched_core_priority, cpu);
176 }
177 
178 /**
179  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
180  * @prio:	Priority of cpu core
181  * @core_cpu:	The cpu number associated with the core
182  *
183  * The pstate driver will find out the max boost frequency
184  * and call this function to set a priority proportional
185  * to the max boost frequency. CPU with higher boost
186  * frequency will receive higher priority.
187  *
188  * No need to rebuild sched domain after updating
189  * the CPU priorities. The sched domains have no
190  * dependency on CPU priorities.
191  */
192 void sched_set_itmt_core_prio(int prio, int core_cpu)
193 {
194 	int cpu, i = 1;
195 
196 	for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) {
197 		int smt_prio;
198 
199 		/*
200 		 * Ensure that the siblings are moved to the end
201 		 * of the priority chain and only used when
202 		 * all other high priority cpus are out of capacity.
203 		 */
204 		smt_prio = prio * smp_num_siblings / i;
205 		per_cpu(sched_core_priority, cpu) = smt_prio;
206 		i++;
207 	}
208 }
209