1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only 25e76b2abSTim Chen /* 35e76b2abSTim Chen * itmt.c: Support Intel Turbo Boost Max Technology 3.0 45e76b2abSTim Chen * 55e76b2abSTim Chen * (C) Copyright 2016 Intel Corporation 65e76b2abSTim Chen * Author: Tim Chen <tim.c.chen@linux.intel.com> 75e76b2abSTim Chen * 85e76b2abSTim Chen * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 95e76b2abSTim Chen * the maximum turbo frequencies of some cores in a CPU package may be 105e76b2abSTim Chen * higher than for the other cores in the same package. In that case, 115e76b2abSTim Chen * better performance can be achieved by making the scheduler prefer 125e76b2abSTim Chen * to run tasks on the CPUs with higher max turbo frequencies. 135e76b2abSTim Chen * 145e76b2abSTim Chen * This file provides functions and data structures for enabling the 155e76b2abSTim Chen * scheduler to favor scheduling on cores can be boosted to a higher 165e76b2abSTim Chen * frequency under ITMT. 175e76b2abSTim Chen */ 185e76b2abSTim Chen 195e76b2abSTim Chen #include <linux/sched.h> 205e76b2abSTim Chen #include <linux/cpumask.h> 215e76b2abSTim Chen #include <linux/cpuset.h> 22a293b395SIngo Molnar #include <linux/mutex.h> 235e76b2abSTim Chen #include <linux/sysctl.h> 245e76b2abSTim Chen #include <linux/nodemask.h> 255e76b2abSTim Chen 265e76b2abSTim Chen static DEFINE_MUTEX(itmt_update_mutex); 275e76b2abSTim Chen DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 285e76b2abSTim Chen 295e76b2abSTim Chen /* Boolean to track if system has ITMT capabilities */ 305e76b2abSTim Chen static bool __read_mostly sched_itmt_capable; 315e76b2abSTim Chen 32f9793e34STim Chen /* 33f9793e34STim Chen * Boolean to control whether we want to move processes to cpu capable 34f9793e34STim Chen * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 35f9793e34STim Chen * Technology 3.0. 36f9793e34STim Chen * 37f9793e34STim Chen * It can be set via /proc/sys/kernel/sched_itmt_enabled 38f9793e34STim Chen */ 39f9793e34STim Chen unsigned int __read_mostly sysctl_sched_itmt_enabled; 40f9793e34STim Chen 41f9793e34STim Chen static int sched_itmt_update_handler(struct ctl_table *table, int write, 42f9793e34STim Chen void __user *buffer, size_t *lenp, 43f9793e34STim Chen loff_t *ppos) 44f9793e34STim Chen { 45f9793e34STim Chen unsigned int old_sysctl; 46f9793e34STim Chen int ret; 47f9793e34STim Chen 48f9793e34STim Chen mutex_lock(&itmt_update_mutex); 49f9793e34STim Chen 50f9793e34STim Chen if (!sched_itmt_capable) { 51f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 52f9793e34STim Chen return -EINVAL; 53f9793e34STim Chen } 54f9793e34STim Chen 55f9793e34STim Chen old_sysctl = sysctl_sched_itmt_enabled; 56f9793e34STim Chen ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 57f9793e34STim Chen 58f9793e34STim Chen if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 59f9793e34STim Chen x86_topology_update = true; 60f9793e34STim Chen rebuild_sched_domains(); 61f9793e34STim Chen } 62f9793e34STim Chen 63f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 64f9793e34STim Chen 65f9793e34STim Chen return ret; 66f9793e34STim Chen } 67f9793e34STim Chen 68f9793e34STim Chen static struct ctl_table itmt_kern_table[] = { 69f9793e34STim Chen { 70f9793e34STim Chen .procname = "sched_itmt_enabled", 71f9793e34STim Chen .data = &sysctl_sched_itmt_enabled, 72f9793e34STim Chen .maxlen = sizeof(unsigned int), 73f9793e34STim Chen .mode = 0644, 74f9793e34STim Chen .proc_handler = sched_itmt_update_handler, 75eec4844fSMatteo Croce .extra1 = SYSCTL_ZERO, 76eec4844fSMatteo Croce .extra2 = SYSCTL_ONE, 77f9793e34STim Chen }, 78f9793e34STim Chen {} 79f9793e34STim Chen }; 80f9793e34STim Chen 81f9793e34STim Chen static struct ctl_table itmt_root_table[] = { 82f9793e34STim Chen { 83f9793e34STim Chen .procname = "kernel", 84f9793e34STim Chen .mode = 0555, 85f9793e34STim Chen .child = itmt_kern_table, 86f9793e34STim Chen }, 87f9793e34STim Chen {} 88f9793e34STim Chen }; 89f9793e34STim Chen 90f9793e34STim Chen static struct ctl_table_header *itmt_sysctl_header; 91f9793e34STim Chen 925e76b2abSTim Chen /** 935e76b2abSTim Chen * sched_set_itmt_support() - Indicate platform supports ITMT 945e76b2abSTim Chen * 955e76b2abSTim Chen * This function is used by the OS to indicate to scheduler that the platform 965e76b2abSTim Chen * is capable of supporting the ITMT feature. 975e76b2abSTim Chen * 985e76b2abSTim Chen * The current scheme has the pstate driver detects if the system 995e76b2abSTim Chen * is ITMT capable and call sched_set_itmt_support. 1005e76b2abSTim Chen * 1015e76b2abSTim Chen * This must be done only after sched_set_itmt_core_prio 1025e76b2abSTim Chen * has been called to set the cpus' priorities. 103f9793e34STim Chen * It must not be called with cpu hot plug lock 104f9793e34STim Chen * held as we need to acquire the lock to rebuild sched domains 105f9793e34STim Chen * later. 106f9793e34STim Chen * 107f9793e34STim Chen * Return: 0 on success 1085e76b2abSTim Chen */ 109f9793e34STim Chen int sched_set_itmt_support(void) 1105e76b2abSTim Chen { 1115e76b2abSTim Chen mutex_lock(&itmt_update_mutex); 1125e76b2abSTim Chen 113f9793e34STim Chen if (sched_itmt_capable) { 114f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 115f9793e34STim Chen return 0; 116f9793e34STim Chen } 117f9793e34STim Chen 118f9793e34STim Chen itmt_sysctl_header = register_sysctl_table(itmt_root_table); 119f9793e34STim Chen if (!itmt_sysctl_header) { 120f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 121f9793e34STim Chen return -ENOMEM; 122f9793e34STim Chen } 123f9793e34STim Chen 1245e76b2abSTim Chen sched_itmt_capable = true; 1255e76b2abSTim Chen 126f9793e34STim Chen sysctl_sched_itmt_enabled = 1; 127f9793e34STim Chen 128f9793e34STim Chen x86_topology_update = true; 129f9793e34STim Chen rebuild_sched_domains(); 130f9793e34STim Chen 1315e76b2abSTim Chen mutex_unlock(&itmt_update_mutex); 132f9793e34STim Chen 133f9793e34STim Chen return 0; 1345e76b2abSTim Chen } 1355e76b2abSTim Chen 1365e76b2abSTim Chen /** 1375e76b2abSTim Chen * sched_clear_itmt_support() - Revoke platform's support of ITMT 1385e76b2abSTim Chen * 1395e76b2abSTim Chen * This function is used by the OS to indicate that it has 1405e76b2abSTim Chen * revoked the platform's support of ITMT feature. 1415e76b2abSTim Chen * 142f9793e34STim Chen * It must not be called with cpu hot plug lock 143f9793e34STim Chen * held as we need to acquire the lock to rebuild sched domains 144f9793e34STim Chen * later. 1455e76b2abSTim Chen */ 1465e76b2abSTim Chen void sched_clear_itmt_support(void) 1475e76b2abSTim Chen { 1485e76b2abSTim Chen mutex_lock(&itmt_update_mutex); 1495e76b2abSTim Chen 150f9793e34STim Chen if (!sched_itmt_capable) { 151f9793e34STim Chen mutex_unlock(&itmt_update_mutex); 152f9793e34STim Chen return; 153f9793e34STim Chen } 1545e76b2abSTim Chen sched_itmt_capable = false; 1555e76b2abSTim Chen 156f9793e34STim Chen if (itmt_sysctl_header) { 157f9793e34STim Chen unregister_sysctl_table(itmt_sysctl_header); 158f9793e34STim Chen itmt_sysctl_header = NULL; 159f9793e34STim Chen } 160f9793e34STim Chen 161f9793e34STim Chen if (sysctl_sched_itmt_enabled) { 162f9793e34STim Chen /* disable sched_itmt if we are no longer ITMT capable */ 163f9793e34STim Chen sysctl_sched_itmt_enabled = 0; 164f9793e34STim Chen x86_topology_update = true; 165f9793e34STim Chen rebuild_sched_domains(); 166f9793e34STim Chen } 167f9793e34STim Chen 1685e76b2abSTim Chen mutex_unlock(&itmt_update_mutex); 1695e76b2abSTim Chen } 1705e76b2abSTim Chen 1715e76b2abSTim Chen int arch_asym_cpu_priority(int cpu) 1725e76b2abSTim Chen { 1735e76b2abSTim Chen return per_cpu(sched_core_priority, cpu); 1745e76b2abSTim Chen } 1755e76b2abSTim Chen 1765e76b2abSTim Chen /** 1775e76b2abSTim Chen * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 1785e76b2abSTim Chen * @prio: Priority of cpu core 1795e76b2abSTim Chen * @core_cpu: The cpu number associated with the core 1805e76b2abSTim Chen * 1815e76b2abSTim Chen * The pstate driver will find out the max boost frequency 1825e76b2abSTim Chen * and call this function to set a priority proportional 1835e76b2abSTim Chen * to the max boost frequency. CPU with higher boost 1845e76b2abSTim Chen * frequency will receive higher priority. 1855e76b2abSTim Chen * 1865e76b2abSTim Chen * No need to rebuild sched domain after updating 1875e76b2abSTim Chen * the CPU priorities. The sched domains have no 1885e76b2abSTim Chen * dependency on CPU priorities. 1895e76b2abSTim Chen */ 1905e76b2abSTim Chen void sched_set_itmt_core_prio(int prio, int core_cpu) 1915e76b2abSTim Chen { 1925e76b2abSTim Chen int cpu, i = 1; 1935e76b2abSTim Chen 1945e76b2abSTim Chen for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 1955e76b2abSTim Chen int smt_prio; 1965e76b2abSTim Chen 1975e76b2abSTim Chen /* 1985e76b2abSTim Chen * Ensure that the siblings are moved to the end 1995e76b2abSTim Chen * of the priority chain and only used when 2005e76b2abSTim Chen * all other high priority cpus are out of capacity. 2015e76b2abSTim Chen */ 2025e76b2abSTim Chen smt_prio = prio * smp_num_siblings / i; 2035e76b2abSTim Chen per_cpu(sched_core_priority, cpu) = smt_prio; 2045e76b2abSTim Chen i++; 2055e76b2abSTim Chen } 2065e76b2abSTim Chen } 207