1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 25e76b2abSTim Chen /* 35e76b2abSTim Chen * itmt.c: Support Intel Turbo Boost Max Technology 3.0 45e76b2abSTim Chen * 55e76b2abSTim Chen * (C) Copyright 2016 Intel Corporation 65e76b2abSTim Chen * Author: Tim Chen <tim.c.chen@linux.intel.com> 75e76b2abSTim Chen * 85e76b2abSTim Chen * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 95e76b2abSTim Chen * the maximum turbo frequencies of some cores in a CPU package may be 105e76b2abSTim Chen * higher than for the other cores in the same package. In that case, 115e76b2abSTim Chen * better performance can be achieved by making the scheduler prefer 125e76b2abSTim Chen * to run tasks on the CPUs with higher max turbo frequencies. 135e76b2abSTim Chen * 145e76b2abSTim Chen * This file provides functions and data structures for enabling the 155e76b2abSTim Chen * scheduler to favor scheduling on cores can be boosted to a higher 165e76b2abSTim Chen * frequency under ITMT. 175e76b2abSTim Chen */ 185e76b2abSTim Chen 195e76b2abSTim Chen #include <linux/sched.h> 205e76b2abSTim Chen #include <linux/cpumask.h> 215e76b2abSTim Chen #include <linux/cpuset.h> 22a293b395SIngo Molnar #include <linux/mutex.h> 235e76b2abSTim Chen #include <linux/sysctl.h> 245e76b2abSTim Chen #include <linux/nodemask.h> 255e76b2abSTim Chen 265e76b2abSTim Chen static DEFINE_MUTEX(itmt_update_mutex); 275e76b2abSTim Chen DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 285e76b2abSTim Chen 295e76b2abSTim Chen /* Boolean to track if system has ITMT capabilities */ 305e76b2abSTim Chen static bool __read_mostly sched_itmt_capable; 315e76b2abSTim Chen 32f9793e34STim Chen /* 33f9793e34STim Chen * Boolean to control whether we want to move processes to cpu capable 34f9793e34STim Chen * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 35f9793e34STim Chen * Technology 3.0. 36f9793e34STim Chen * 37f9793e34STim Chen * It can be set via /proc/sys/kernel/sched_itmt_enabled 38f9793e34STim Chen */ 39f9793e34STim Chen unsigned int __read_mostly sysctl_sched_itmt_enabled; 40f9793e34STim Chen 41f9793e34STim Chen static int sched_itmt_update_handler(struct ctl_table *table, int write, 4232927393SChristoph Hellwig void *buffer, size_t *lenp, loff_t *ppos) 43f9793e34STim Chen { 44f9793e34STim Chen unsigned int old_sysctl; 45f9793e34STim Chen int ret; 46f9793e34STim Chen 47f9793e34STim Chen mutex_lock(&itmt_update_mutex); 48f9793e34STim Chen 49f9793e34STim Chen if (!sched_itmt_capable) { 50f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 51f9793e34STim Chen return -EINVAL; 52f9793e34STim Chen } 53f9793e34STim Chen 54f9793e34STim Chen old_sysctl = sysctl_sched_itmt_enabled; 55f9793e34STim Chen ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 56f9793e34STim Chen 57f9793e34STim Chen if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 58f9793e34STim Chen x86_topology_update = true; 59f9793e34STim Chen rebuild_sched_domains(); 60f9793e34STim Chen } 61f9793e34STim Chen 62f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 63f9793e34STim Chen 64f9793e34STim Chen return ret; 65f9793e34STim Chen } 66f9793e34STim Chen 67f9793e34STim Chen static struct ctl_table itmt_kern_table[] = { 68f9793e34STim Chen { 69f9793e34STim Chen .procname = "sched_itmt_enabled", 70f9793e34STim Chen .data = &sysctl_sched_itmt_enabled, 71f9793e34STim Chen .maxlen = sizeof(unsigned int), 72f9793e34STim Chen .mode = 0644, 73f9793e34STim Chen .proc_handler = sched_itmt_update_handler, 74eec4844fSMatteo Croce .extra1 = SYSCTL_ZERO, 75eec4844fSMatteo Croce .extra2 = SYSCTL_ONE, 76f9793e34STim Chen }, 77f9793e34STim Chen {} 78f9793e34STim Chen }; 79f9793e34STim Chen 80f9793e34STim Chen static struct ctl_table_header *itmt_sysctl_header; 81f9793e34STim Chen 825e76b2abSTim Chen /** 835e76b2abSTim Chen * sched_set_itmt_support() - Indicate platform supports ITMT 845e76b2abSTim Chen * 855e76b2abSTim Chen * This function is used by the OS to indicate to scheduler that the platform 865e76b2abSTim Chen * is capable of supporting the ITMT feature. 875e76b2abSTim Chen * 885e76b2abSTim Chen * The current scheme has the pstate driver detects if the system 895e76b2abSTim Chen * is ITMT capable and call sched_set_itmt_support. 905e76b2abSTim Chen * 915e76b2abSTim Chen * This must be done only after sched_set_itmt_core_prio 925e76b2abSTim Chen * has been called to set the cpus' priorities. 93f9793e34STim Chen * It must not be called with cpu hot plug lock 94f9793e34STim Chen * held as we need to acquire the lock to rebuild sched domains 95f9793e34STim Chen * later. 96f9793e34STim Chen * 97f9793e34STim Chen * Return: 0 on success 985e76b2abSTim Chen */ 99f9793e34STim Chen int sched_set_itmt_support(void) 1005e76b2abSTim Chen { 1015e76b2abSTim Chen mutex_lock(&itmt_update_mutex); 1025e76b2abSTim Chen 103f9793e34STim Chen if (sched_itmt_capable) { 104f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 105f9793e34STim Chen return 0; 106f9793e34STim Chen } 107f9793e34STim Chen 108*89d7971eSLuis Chamberlain itmt_sysctl_header = register_sysctl("kernel", itmt_kern_table); 109f9793e34STim Chen if (!itmt_sysctl_header) { 110f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 111f9793e34STim Chen return -ENOMEM; 112f9793e34STim Chen } 113f9793e34STim Chen 1145e76b2abSTim Chen sched_itmt_capable = true; 1155e76b2abSTim Chen 116f9793e34STim Chen sysctl_sched_itmt_enabled = 1; 117f9793e34STim Chen 118f9793e34STim Chen x86_topology_update = true; 119f9793e34STim Chen rebuild_sched_domains(); 120f9793e34STim Chen 1215e76b2abSTim Chen mutex_unlock(&itmt_update_mutex); 122f9793e34STim Chen 123f9793e34STim Chen return 0; 1245e76b2abSTim Chen } 1255e76b2abSTim Chen 1265e76b2abSTim Chen /** 1275e76b2abSTim Chen * sched_clear_itmt_support() - Revoke platform's support of ITMT 1285e76b2abSTim Chen * 1295e76b2abSTim Chen * This function is used by the OS to indicate that it has 1305e76b2abSTim Chen * revoked the platform's support of ITMT feature. 1315e76b2abSTim Chen * 132f9793e34STim Chen * It must not be called with cpu hot plug lock 133f9793e34STim Chen * held as we need to acquire the lock to rebuild sched domains 134f9793e34STim Chen * later. 1355e76b2abSTim Chen */ 1365e76b2abSTim Chen void sched_clear_itmt_support(void) 1375e76b2abSTim Chen { 1385e76b2abSTim Chen mutex_lock(&itmt_update_mutex); 1395e76b2abSTim Chen 140f9793e34STim Chen if (!sched_itmt_capable) { 141f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 142f9793e34STim Chen return; 143f9793e34STim Chen } 1445e76b2abSTim Chen sched_itmt_capable = false; 1455e76b2abSTim Chen 146f9793e34STim Chen if (itmt_sysctl_header) { 147f9793e34STim Chen unregister_sysctl_table(itmt_sysctl_header); 148f9793e34STim Chen itmt_sysctl_header = NULL; 149f9793e34STim Chen } 150f9793e34STim Chen 151f9793e34STim Chen if (sysctl_sched_itmt_enabled) { 152f9793e34STim Chen /* disable sched_itmt if we are no longer ITMT capable */ 153f9793e34STim Chen sysctl_sched_itmt_enabled = 0; 154f9793e34STim Chen x86_topology_update = true; 155f9793e34STim Chen rebuild_sched_domains(); 156f9793e34STim Chen } 157f9793e34STim Chen 1585e76b2abSTim Chen mutex_unlock(&itmt_update_mutex); 1595e76b2abSTim Chen } 1605e76b2abSTim Chen 1615e76b2abSTim Chen int arch_asym_cpu_priority(int cpu) 1625e76b2abSTim Chen { 1635e76b2abSTim Chen return per_cpu(sched_core_priority, cpu); 1645e76b2abSTim Chen } 1655e76b2abSTim Chen 1665e76b2abSTim Chen /** 1675e76b2abSTim Chen * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 1685e76b2abSTim Chen * @prio: Priority of cpu core 1695e76b2abSTim Chen * @core_cpu: The cpu number associated with the core 1705e76b2abSTim Chen * 1715e76b2abSTim Chen * The pstate driver will find out the max boost frequency 1725e76b2abSTim Chen * and call this function to set a priority proportional 1735e76b2abSTim Chen * to the max boost frequency. CPU with higher boost 1745e76b2abSTim Chen * frequency will receive higher priority. 1755e76b2abSTim Chen * 1765e76b2abSTim Chen * No need to rebuild sched domain after updating 1775e76b2abSTim Chen * the CPU priorities. The sched domains have no 1785e76b2abSTim Chen * dependency on CPU priorities. 1795e76b2abSTim Chen */ 1805e76b2abSTim Chen void sched_set_itmt_core_prio(int prio, int core_cpu) 1815e76b2abSTim Chen { 1825e76b2abSTim Chen int cpu, i = 1; 1835e76b2abSTim Chen 1845e76b2abSTim Chen for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 1855e76b2abSTim Chen int smt_prio; 1865e76b2abSTim Chen 1875e76b2abSTim Chen /* 1885e76b2abSTim Chen * Ensure that the siblings are moved to the end 1895e76b2abSTim Chen * of the priority chain and only used when 1905e76b2abSTim Chen * all other high priority cpus are out of capacity. 1915e76b2abSTim Chen */ 192183b8ec3SRicardo Neri smt_prio = prio * smp_num_siblings / (i * i); 1935e76b2abSTim Chen per_cpu(sched_core_priority, cpu) = smt_prio; 1945e76b2abSTim Chen i++; 1955e76b2abSTim Chen } 1965e76b2abSTim Chen } 197