1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * itmt.c: Support Intel Turbo Boost Max Technology 3.0 4 * 5 * (C) Copyright 2016 Intel Corporation 6 * Author: Tim Chen <tim.c.chen@linux.intel.com> 7 * 8 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 9 * the maximum turbo frequencies of some cores in a CPU package may be 10 * higher than for the other cores in the same package. In that case, 11 * better performance can be achieved by making the scheduler prefer 12 * to run tasks on the CPUs with higher max turbo frequencies. 13 * 14 * This file provides functions and data structures for enabling the 15 * scheduler to favor scheduling on cores can be boosted to a higher 16 * frequency under ITMT. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/cpumask.h> 21 #include <linux/cpuset.h> 22 #include <linux/mutex.h> 23 #include <linux/sysctl.h> 24 #include <linux/nodemask.h> 25 26 static DEFINE_MUTEX(itmt_update_mutex); 27 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 28 29 /* Boolean to track if system has ITMT capabilities */ 30 static bool __read_mostly sched_itmt_capable; 31 32 /* 33 * Boolean to control whether we want to move processes to cpu capable 34 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 35 * Technology 3.0. 36 * 37 * It can be set via /proc/sys/kernel/sched_itmt_enabled 38 */ 39 unsigned int __read_mostly sysctl_sched_itmt_enabled; 40 41 static int sched_itmt_update_handler(struct ctl_table *table, int write, 42 void __user *buffer, size_t *lenp, 43 loff_t *ppos) 44 { 45 unsigned int old_sysctl; 46 int ret; 47 48 mutex_lock(&itmt_update_mutex); 49 50 if (!sched_itmt_capable) { 51 mutex_unlock(&itmt_update_mutex); 52 return -EINVAL; 53 } 54 55 old_sysctl = sysctl_sched_itmt_enabled; 56 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 57 58 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 59 x86_topology_update = true; 60 rebuild_sched_domains(); 61 } 62 63 mutex_unlock(&itmt_update_mutex); 64 65 return ret; 66 } 67 68 static unsigned int zero; 69 static unsigned int one = 1; 70 static struct ctl_table itmt_kern_table[] = { 71 { 72 .procname = "sched_itmt_enabled", 73 .data = &sysctl_sched_itmt_enabled, 74 .maxlen = sizeof(unsigned int), 75 .mode = 0644, 76 .proc_handler = sched_itmt_update_handler, 77 .extra1 = &zero, 78 .extra2 = &one, 79 }, 80 {} 81 }; 82 83 static struct ctl_table itmt_root_table[] = { 84 { 85 .procname = "kernel", 86 .mode = 0555, 87 .child = itmt_kern_table, 88 }, 89 {} 90 }; 91 92 static struct ctl_table_header *itmt_sysctl_header; 93 94 /** 95 * sched_set_itmt_support() - Indicate platform supports ITMT 96 * 97 * This function is used by the OS to indicate to scheduler that the platform 98 * is capable of supporting the ITMT feature. 99 * 100 * The current scheme has the pstate driver detects if the system 101 * is ITMT capable and call sched_set_itmt_support. 102 * 103 * This must be done only after sched_set_itmt_core_prio 104 * has been called to set the cpus' priorities. 105 * It must not be called with cpu hot plug lock 106 * held as we need to acquire the lock to rebuild sched domains 107 * later. 108 * 109 * Return: 0 on success 110 */ 111 int sched_set_itmt_support(void) 112 { 113 mutex_lock(&itmt_update_mutex); 114 115 if (sched_itmt_capable) { 116 mutex_unlock(&itmt_update_mutex); 117 return 0; 118 } 119 120 itmt_sysctl_header = register_sysctl_table(itmt_root_table); 121 if (!itmt_sysctl_header) { 122 mutex_unlock(&itmt_update_mutex); 123 return -ENOMEM; 124 } 125 126 sched_itmt_capable = true; 127 128 sysctl_sched_itmt_enabled = 1; 129 130 x86_topology_update = true; 131 rebuild_sched_domains(); 132 133 mutex_unlock(&itmt_update_mutex); 134 135 return 0; 136 } 137 138 /** 139 * sched_clear_itmt_support() - Revoke platform's support of ITMT 140 * 141 * This function is used by the OS to indicate that it has 142 * revoked the platform's support of ITMT feature. 143 * 144 * It must not be called with cpu hot plug lock 145 * held as we need to acquire the lock to rebuild sched domains 146 * later. 147 */ 148 void sched_clear_itmt_support(void) 149 { 150 mutex_lock(&itmt_update_mutex); 151 152 if (!sched_itmt_capable) { 153 mutex_unlock(&itmt_update_mutex); 154 return; 155 } 156 sched_itmt_capable = false; 157 158 if (itmt_sysctl_header) { 159 unregister_sysctl_table(itmt_sysctl_header); 160 itmt_sysctl_header = NULL; 161 } 162 163 if (sysctl_sched_itmt_enabled) { 164 /* disable sched_itmt if we are no longer ITMT capable */ 165 sysctl_sched_itmt_enabled = 0; 166 x86_topology_update = true; 167 rebuild_sched_domains(); 168 } 169 170 mutex_unlock(&itmt_update_mutex); 171 } 172 173 int arch_asym_cpu_priority(int cpu) 174 { 175 return per_cpu(sched_core_priority, cpu); 176 } 177 178 /** 179 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 180 * @prio: Priority of cpu core 181 * @core_cpu: The cpu number associated with the core 182 * 183 * The pstate driver will find out the max boost frequency 184 * and call this function to set a priority proportional 185 * to the max boost frequency. CPU with higher boost 186 * frequency will receive higher priority. 187 * 188 * No need to rebuild sched domain after updating 189 * the CPU priorities. The sched domains have no 190 * dependency on CPU priorities. 191 */ 192 void sched_set_itmt_core_prio(int prio, int core_cpu) 193 { 194 int cpu, i = 1; 195 196 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 197 int smt_prio; 198 199 /* 200 * Ensure that the siblings are moved to the end 201 * of the priority chain and only used when 202 * all other high priority cpus are out of capacity. 203 */ 204 smt_prio = prio * smp_num_siblings / i; 205 per_cpu(sched_core_priority, cpu) = smt_prio; 206 i++; 207 } 208 } 209