xref: /openbmc/linux/arch/x86/kernel/itmt.c (revision 046a5a95)
1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
25e76b2abSTim Chen /*
35e76b2abSTim Chen  * itmt.c: Support Intel Turbo Boost Max Technology 3.0
45e76b2abSTim Chen  *
55e76b2abSTim Chen  * (C) Copyright 2016 Intel Corporation
65e76b2abSTim Chen  * Author: Tim Chen <tim.c.chen@linux.intel.com>
75e76b2abSTim Chen  *
85e76b2abSTim Chen  * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT),
95e76b2abSTim Chen  * the maximum turbo frequencies of some cores in a CPU package may be
105e76b2abSTim Chen  * higher than for the other cores in the same package.  In that case,
115e76b2abSTim Chen  * better performance can be achieved by making the scheduler prefer
125e76b2abSTim Chen  * to run tasks on the CPUs with higher max turbo frequencies.
135e76b2abSTim Chen  *
145e76b2abSTim Chen  * This file provides functions and data structures for enabling the
155e76b2abSTim Chen  * scheduler to favor scheduling on cores can be boosted to a higher
165e76b2abSTim Chen  * frequency under ITMT.
175e76b2abSTim Chen  */
185e76b2abSTim Chen 
195e76b2abSTim Chen #include <linux/sched.h>
205e76b2abSTim Chen #include <linux/cpumask.h>
215e76b2abSTim Chen #include <linux/cpuset.h>
22a293b395SIngo Molnar #include <linux/mutex.h>
235e76b2abSTim Chen #include <linux/sysctl.h>
245e76b2abSTim Chen #include <linux/nodemask.h>
255e76b2abSTim Chen 
265e76b2abSTim Chen static DEFINE_MUTEX(itmt_update_mutex);
275e76b2abSTim Chen DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority);
285e76b2abSTim Chen 
295e76b2abSTim Chen /* Boolean to track if system has ITMT capabilities */
305e76b2abSTim Chen static bool __read_mostly sched_itmt_capable;
315e76b2abSTim Chen 
32f9793e34STim Chen /*
33f9793e34STim Chen  * Boolean to control whether we want to move processes to cpu capable
34f9793e34STim Chen  * of higher turbo frequency for cpus supporting Intel Turbo Boost Max
35f9793e34STim Chen  * Technology 3.0.
36f9793e34STim Chen  *
37f9793e34STim Chen  * It can be set via /proc/sys/kernel/sched_itmt_enabled
38f9793e34STim Chen  */
39f9793e34STim Chen unsigned int __read_mostly sysctl_sched_itmt_enabled;
40f9793e34STim Chen 
sched_itmt_update_handler(struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)41f9793e34STim Chen static int sched_itmt_update_handler(struct ctl_table *table, int write,
4232927393SChristoph Hellwig 				     void *buffer, size_t *lenp, loff_t *ppos)
43f9793e34STim Chen {
44f9793e34STim Chen 	unsigned int old_sysctl;
45f9793e34STim Chen 	int ret;
46f9793e34STim Chen 
47f9793e34STim Chen 	mutex_lock(&itmt_update_mutex);
48f9793e34STim Chen 
49f9793e34STim Chen 	if (!sched_itmt_capable) {
50f9793e34STim Chen 		mutex_unlock(&itmt_update_mutex);
51f9793e34STim Chen 		return -EINVAL;
52f9793e34STim Chen 	}
53f9793e34STim Chen 
54f9793e34STim Chen 	old_sysctl = sysctl_sched_itmt_enabled;
55f9793e34STim Chen 	ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
56f9793e34STim Chen 
57f9793e34STim Chen 	if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) {
58f9793e34STim Chen 		x86_topology_update = true;
59f9793e34STim Chen 		rebuild_sched_domains();
60f9793e34STim Chen 	}
61f9793e34STim Chen 
62f9793e34STim Chen 	mutex_unlock(&itmt_update_mutex);
63f9793e34STim Chen 
64f9793e34STim Chen 	return ret;
65f9793e34STim Chen }
66f9793e34STim Chen 
67f9793e34STim Chen static struct ctl_table itmt_kern_table[] = {
68f9793e34STim Chen 	{
69f9793e34STim Chen 		.procname	= "sched_itmt_enabled",
70f9793e34STim Chen 		.data		= &sysctl_sched_itmt_enabled,
71f9793e34STim Chen 		.maxlen		= sizeof(unsigned int),
72f9793e34STim Chen 		.mode		= 0644,
73f9793e34STim Chen 		.proc_handler	= sched_itmt_update_handler,
74eec4844fSMatteo Croce 		.extra1		= SYSCTL_ZERO,
75eec4844fSMatteo Croce 		.extra2		= SYSCTL_ONE,
76f9793e34STim Chen 	},
77f9793e34STim Chen 	{}
78f9793e34STim Chen };
79f9793e34STim Chen 
80f9793e34STim Chen static struct ctl_table_header *itmt_sysctl_header;
81f9793e34STim Chen 
825e76b2abSTim Chen /**
835e76b2abSTim Chen  * sched_set_itmt_support() - Indicate platform supports ITMT
845e76b2abSTim Chen  *
855e76b2abSTim Chen  * This function is used by the OS to indicate to scheduler that the platform
865e76b2abSTim Chen  * is capable of supporting the ITMT feature.
875e76b2abSTim Chen  *
885e76b2abSTim Chen  * The current scheme has the pstate driver detects if the system
895e76b2abSTim Chen  * is ITMT capable and call sched_set_itmt_support.
905e76b2abSTim Chen  *
915e76b2abSTim Chen  * This must be done only after sched_set_itmt_core_prio
925e76b2abSTim Chen  * has been called to set the cpus' priorities.
93f9793e34STim Chen  * It must not be called with cpu hot plug lock
94f9793e34STim Chen  * held as we need to acquire the lock to rebuild sched domains
95f9793e34STim Chen  * later.
96f9793e34STim Chen  *
97f9793e34STim Chen  * Return: 0 on success
985e76b2abSTim Chen  */
sched_set_itmt_support(void)99f9793e34STim Chen int sched_set_itmt_support(void)
1005e76b2abSTim Chen {
1015e76b2abSTim Chen 	mutex_lock(&itmt_update_mutex);
1025e76b2abSTim Chen 
103f9793e34STim Chen 	if (sched_itmt_capable) {
104f9793e34STim Chen 		mutex_unlock(&itmt_update_mutex);
105f9793e34STim Chen 		return 0;
106f9793e34STim Chen 	}
107f9793e34STim Chen 
10889d7971eSLuis Chamberlain 	itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table);
109f9793e34STim Chen 	if (!itmt_sysctl_header) {
110f9793e34STim Chen 		mutex_unlock(&itmt_update_mutex);
111f9793e34STim Chen 		return -ENOMEM;
112f9793e34STim Chen 	}
113f9793e34STim Chen 
1145e76b2abSTim Chen 	sched_itmt_capable = true;
1155e76b2abSTim Chen 
116f9793e34STim Chen 	sysctl_sched_itmt_enabled = 1;
117f9793e34STim Chen 
118f9793e34STim Chen 	x86_topology_update = true;
119f9793e34STim Chen 	rebuild_sched_domains();
120f9793e34STim Chen 
1215e76b2abSTim Chen 	mutex_unlock(&itmt_update_mutex);
122f9793e34STim Chen 
123f9793e34STim Chen 	return 0;
1245e76b2abSTim Chen }
1255e76b2abSTim Chen 
1265e76b2abSTim Chen /**
1275e76b2abSTim Chen  * sched_clear_itmt_support() - Revoke platform's support of ITMT
1285e76b2abSTim Chen  *
1295e76b2abSTim Chen  * This function is used by the OS to indicate that it has
1305e76b2abSTim Chen  * revoked the platform's support of ITMT feature.
1315e76b2abSTim Chen  *
132f9793e34STim Chen  * It must not be called with cpu hot plug lock
133f9793e34STim Chen  * held as we need to acquire the lock to rebuild sched domains
134f9793e34STim Chen  * later.
1355e76b2abSTim Chen  */
sched_clear_itmt_support(void)1365e76b2abSTim Chen void sched_clear_itmt_support(void)
1375e76b2abSTim Chen {
1385e76b2abSTim Chen 	mutex_lock(&itmt_update_mutex);
1395e76b2abSTim Chen 
140f9793e34STim Chen 	if (!sched_itmt_capable) {
141f9793e34STim Chen 		mutex_unlock(&itmt_update_mutex);
142f9793e34STim Chen 		return;
143f9793e34STim Chen 	}
1445e76b2abSTim Chen 	sched_itmt_capable = false;
1455e76b2abSTim Chen 
146f9793e34STim Chen 	if (itmt_sysctl_header) {
147f9793e34STim Chen 		unregister_sysctl_table(itmt_sysctl_header);
148f9793e34STim Chen 		itmt_sysctl_header = NULL;
149f9793e34STim Chen 	}
150f9793e34STim Chen 
151f9793e34STim Chen 	if (sysctl_sched_itmt_enabled) {
152f9793e34STim Chen 		/* disable sched_itmt if we are no longer ITMT capable */
153f9793e34STim Chen 		sysctl_sched_itmt_enabled = 0;
154f9793e34STim Chen 		x86_topology_update = true;
155f9793e34STim Chen 		rebuild_sched_domains();
156f9793e34STim Chen 	}
157f9793e34STim Chen 
1585e76b2abSTim Chen 	mutex_unlock(&itmt_update_mutex);
1595e76b2abSTim Chen }
1605e76b2abSTim Chen 
arch_asym_cpu_priority(int cpu)1615e76b2abSTim Chen int arch_asym_cpu_priority(int cpu)
1625e76b2abSTim Chen {
1635e76b2abSTim Chen 	return per_cpu(sched_core_priority, cpu);
1645e76b2abSTim Chen }
1655e76b2abSTim Chen 
1665e76b2abSTim Chen /**
1675e76b2abSTim Chen  * sched_set_itmt_core_prio() - Set CPU priority based on ITMT
168*046a5a95SRicardo Neri  * @prio:	Priority of @cpu
169*046a5a95SRicardo Neri  * @cpu:	The CPU number
1705e76b2abSTim Chen  *
1715e76b2abSTim Chen  * The pstate driver will find out the max boost frequency
1725e76b2abSTim Chen  * and call this function to set a priority proportional
173*046a5a95SRicardo Neri  * to the max boost frequency. CPUs with higher boost
1745e76b2abSTim Chen  * frequency will receive higher priority.
1755e76b2abSTim Chen  *
1765e76b2abSTim Chen  * No need to rebuild sched domain after updating
1775e76b2abSTim Chen  * the CPU priorities. The sched domains have no
1785e76b2abSTim Chen  * dependency on CPU priorities.
1795e76b2abSTim Chen  */
sched_set_itmt_core_prio(int prio,int cpu)180*046a5a95SRicardo Neri void sched_set_itmt_core_prio(int prio, int cpu)
1815e76b2abSTim Chen {
182*046a5a95SRicardo Neri 	per_cpu(sched_core_priority, cpu) = prio;
1835e76b2abSTim Chen }
184