1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0 4 * 5 * (C) Copyright 2016 Intel Corporation 6 * Author: Tim Chen <tim.c.chen@linux.intel.com> 7 * 8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 9 * the maximum turbo frequencies of some cores in a CPU package may be 10 * higher than for the other cores in the same package. In that case, 11 * better performance can be achieved by making the scheduler prefer 12 * to run tasks on the CPUs with higher max turbo frequencies. 13 * 14 * This file provides functions and data structures for enabling the 15 * scheduler to favor scheduling on cores can be boosted to a higher 16 * frequency under ITMT. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/cpumask.h> 21 #include <linux/cpuset.h> 22 #include <linux/mutex.h> 23 #include <linux/sysctl.h> 24 #include <linux/nodemask.h> 25 26 static DEFINE_MUTEX(itmt_update_mutex); 27 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 28 29 /* Boolean to track if system has ITMT capabilities */ 30 static bool __read_mostly sched_itmt_capable; 31 32 /* 33 * Boolean to control whether we want to move processes to cpu capable 34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 35 * Technology 3.0. 36 * 37 * It can be set via /proc/sys/kernel/sched_itmt_enabled 38 */ 39 unsigned int __read_mostly sysctl_sched_itmt_enabled; 40 41 static int sched_itmt_update_handler(struct ctl_table *table, int write, 42 void __user *buffer, size_t *lenp, 43 loff_t *ppos) 44 { 45 unsigned int old_sysctl; 46 int ret; 47 48 mutex_lock(&itmt_update_mutex); 49 50 if (!sched_itmt_capable) { 51 mutex_unlock(&itmt_update_mutex); 52 return -EINVAL; 53 } 54 55 old_sysctl = sysctl_sched_itmt_enabled; 56 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 57 58 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 59 x86_topology_update = true; 60 rebuild_sched_domains(); 61 } 62 63 mutex_unlock(&itmt_update_mutex); 64 65 return ret; 66 } 67 68 static struct ctl_table itmt_kern_table[] = { 69 { 70 .procname = "sched_itmt_enabled", 71 .data = &sysctl_sched_itmt_enabled, 72 .maxlen = sizeof(unsigned int), 73 .mode = 0644, 74 .proc_handler = sched_itmt_update_handler, 75 .extra1 = SYSCTL_ZERO, 76 .extra2 = SYSCTL_ONE, 77 }, 78 {} 79 }; 80 81 static struct ctl_table itmt_root_table[] = { 82 { 83 .procname = "kernel", 84 .mode = 0555, 85 .child = itmt_kern_table, 86 }, 87 {} 88 }; 89 90 static struct ctl_table_header *itmt_sysctl_header; 91 92 /** 93 * sched_set_itmt_support() - Indicate platform supports ITMT 94 * 95 * This function is used by the OS to indicate to scheduler that the platform 96 * is capable of supporting the ITMT feature. 97 * 98 * The current scheme has the pstate driver detects if the system 99 * is ITMT capable and call sched_set_itmt_support. 100 * 101 * This must be done only after sched_set_itmt_core_prio 102 * has been called to set the cpus' priorities. 103 * It must not be called with cpu hot plug lock 104 * held as we need to acquire the lock to rebuild sched domains 105 * later. 106 * 107 * Return: 0 on success 108 */ 109 int sched_set_itmt_support(void) 110 { 111 mutex_lock(&itmt_update_mutex); 112 113 if (sched_itmt_capable) { 114 mutex_unlock(&itmt_update_mutex); 115 return 0; 116 } 117 118 itmt_sysctl_header = register_sysctl_table(itmt_root_table); 119 if (!itmt_sysctl_header) { 120 mutex_unlock(&itmt_update_mutex); 121 return -ENOMEM; 122 } 123 124 sched_itmt_capable = true; 125 126 sysctl_sched_itmt_enabled = 1; 127 128 x86_topology_update = true; 129 rebuild_sched_domains(); 130 131 mutex_unlock(&itmt_update_mutex); 132 133 return 0; 134 } 135 136 /** 137 * sched_clear_itmt_support() - Revoke platform's support of ITMT 138 * 139 * This function is used by the OS to indicate that it has 140 * revoked the platform's support of ITMT feature. 141 * 142 * It must not be called with cpu hot plug lock 143 * held as we need to acquire the lock to rebuild sched domains 144 * later. 145 */ 146 void sched_clear_itmt_support(void) 147 { 148 mutex_lock(&itmt_update_mutex); 149 150 if (!sched_itmt_capable) { 151 mutex_unlock(&itmt_update_mutex); 152 return; 153 } 154 sched_itmt_capable = false; 155 156 if (itmt_sysctl_header) { 157 unregister_sysctl_table(itmt_sysctl_header); 158 itmt_sysctl_header = NULL; 159 } 160 161 if (sysctl_sched_itmt_enabled) { 162 /* disable sched_itmt if we are no longer ITMT capable */ 163 sysctl_sched_itmt_enabled = 0; 164 x86_topology_update = true; 165 rebuild_sched_domains(); 166 } 167 168 mutex_unlock(&itmt_update_mutex); 169 } 170 171 int arch_asym_cpu_priority(int cpu) 172 { 173 return per_cpu(sched_core_priority, cpu); 174 } 175 176 /** 177 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 178 * @prio: Priority of cpu core 179 * @core_cpu: The cpu number associated with the core 180 * 181 * The pstate driver will find out the max boost frequency 182 * and call this function to set a priority proportional 183 * to the max boost frequency. CPU with higher boost 184 * frequency will receive higher priority. 185 * 186 * No need to rebuild sched domain after updating 187 * the CPU priorities. The sched domains have no 188 * dependency on CPU priorities. 189 */ 190 void sched_set_itmt_core_prio(int prio, int core_cpu) 191 { 192 int cpu, i = 1; 193 194 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 195 int smt_prio; 196 197 /* 198 * Ensure that the siblings are moved to the end 199 * of the priority chain and only used when 200 * all other high priority cpus are out of capacity. 201 */ 202 smt_prio = prio * smp_num_siblings / i; 203 per_cpu(sched_core_priority, cpu) = smt_prio; 204 i++; 205 } 206 } 207