1 /* 2 * itmt.c: Support Intel Turbo Boost Max Technology 3.0 3 * 4 * (C) Copyright 2016 Intel Corporation 5 * Author: Tim Chen <tim.c.chen@linux.intel.com> 6 * 7 * This program is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU General Public License 9 * as published by the Free Software Foundation; version 2 10 * of the License. 11 * 12 * On platforms supporting Intel Turbo Boost Max Technology 3.0, (ITMT), 13 * the maximum turbo frequencies of some cores in a CPU package may be 14 * higher than for the other cores in the same package. In that case, 15 * better performance can be achieved by making the scheduler prefer 16 * to run tasks on the CPUs with higher max turbo frequencies. 17 * 18 * This file provides functions and data structures for enabling the 19 * scheduler to favor scheduling on cores can be boosted to a higher 20 * frequency under ITMT. 21 */ 22 23 #include <linux/sched.h> 24 #include <linux/cpumask.h> 25 #include <linux/cpuset.h> 26 #include <linux/mutex.h> 27 #include <linux/sysctl.h> 28 #include <linux/nodemask.h> 29 30 static DEFINE_MUTEX(itmt_update_mutex); 31 DEFINE_PER_CPU_READ_MOSTLY(int, sched_core_priority); 32 33 /* Boolean to track if system has ITMT capabilities */ 34 static bool __read_mostly sched_itmt_capable; 35 36 /* 37 * Boolean to control whether we want to move processes to cpu capable 38 * of higher turbo frequency for cpus supporting Intel Turbo Boost Max 39 * Technology 3.0. 40 * 41 * It can be set via /proc/sys/kernel/sched_itmt_enabled 42 */ 43 unsigned int __read_mostly sysctl_sched_itmt_enabled; 44 45 static int sched_itmt_update_handler(struct ctl_table *table, int write, 46 void __user *buffer, size_t *lenp, 47 loff_t *ppos) 48 { 49 unsigned int old_sysctl; 50 int ret; 51 52 mutex_lock(&itmt_update_mutex); 53 54 if (!sched_itmt_capable) { 55 mutex_unlock(&itmt_update_mutex); 56 return -EINVAL; 57 } 58 59 old_sysctl = sysctl_sched_itmt_enabled; 60 ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); 61 62 if (!ret && write && old_sysctl != sysctl_sched_itmt_enabled) { 63 x86_topology_update = true; 64 rebuild_sched_domains(); 65 } 66 67 mutex_unlock(&itmt_update_mutex); 68 69 return ret; 70 } 71 72 static unsigned int zero; 73 static unsigned int one = 1; 74 static struct ctl_table itmt_kern_table[] = { 75 { 76 .procname = "sched_itmt_enabled", 77 .data = &sysctl_sched_itmt_enabled, 78 .maxlen = sizeof(unsigned int), 79 .mode = 0644, 80 .proc_handler = sched_itmt_update_handler, 81 .extra1 = &zero, 82 .extra2 = &one, 83 }, 84 {} 85 }; 86 87 static struct ctl_table itmt_root_table[] = { 88 { 89 .procname = "kernel", 90 .mode = 0555, 91 .child = itmt_kern_table, 92 }, 93 {} 94 }; 95 96 static struct ctl_table_header *itmt_sysctl_header; 97 98 /** 99 * sched_set_itmt_support() - Indicate platform supports ITMT 100 * 101 * This function is used by the OS to indicate to scheduler that the platform 102 * is capable of supporting the ITMT feature. 103 * 104 * The current scheme has the pstate driver detects if the system 105 * is ITMT capable and call sched_set_itmt_support. 106 * 107 * This must be done only after sched_set_itmt_core_prio 108 * has been called to set the cpus' priorities. 109 * It must not be called with cpu hot plug lock 110 * held as we need to acquire the lock to rebuild sched domains 111 * later. 112 * 113 * Return: 0 on success 114 */ 115 int sched_set_itmt_support(void) 116 { 117 mutex_lock(&itmt_update_mutex); 118 119 if (sched_itmt_capable) { 120 mutex_unlock(&itmt_update_mutex); 121 return 0; 122 } 123 124 itmt_sysctl_header = register_sysctl_table(itmt_root_table); 125 if (!itmt_sysctl_header) { 126 mutex_unlock(&itmt_update_mutex); 127 return -ENOMEM; 128 } 129 130 sched_itmt_capable = true; 131 132 sysctl_sched_itmt_enabled = 1; 133 134 x86_topology_update = true; 135 rebuild_sched_domains(); 136 137 mutex_unlock(&itmt_update_mutex); 138 139 return 0; 140 } 141 142 /** 143 * sched_clear_itmt_support() - Revoke platform's support of ITMT 144 * 145 * This function is used by the OS to indicate that it has 146 * revoked the platform's support of ITMT feature. 147 * 148 * It must not be called with cpu hot plug lock 149 * held as we need to acquire the lock to rebuild sched domains 150 * later. 151 */ 152 void sched_clear_itmt_support(void) 153 { 154 mutex_lock(&itmt_update_mutex); 155 156 if (!sched_itmt_capable) { 157 mutex_unlock(&itmt_update_mutex); 158 return; 159 } 160 sched_itmt_capable = false; 161 162 if (itmt_sysctl_header) { 163 unregister_sysctl_table(itmt_sysctl_header); 164 itmt_sysctl_header = NULL; 165 } 166 167 if (sysctl_sched_itmt_enabled) { 168 /* disable sched_itmt if we are no longer ITMT capable */ 169 sysctl_sched_itmt_enabled = 0; 170 x86_topology_update = true; 171 rebuild_sched_domains(); 172 } 173 174 mutex_unlock(&itmt_update_mutex); 175 } 176 177 int arch_asym_cpu_priority(int cpu) 178 { 179 return per_cpu(sched_core_priority, cpu); 180 } 181 182 /** 183 * sched_set_itmt_core_prio() - Set CPU priority based on ITMT 184 * @prio: Priority of cpu core 185 * @core_cpu: The cpu number associated with the core 186 * 187 * The pstate driver will find out the max boost frequency 188 * and call this function to set a priority proportional 189 * to the max boost frequency. CPU with higher boost 190 * frequency will receive higher priority. 191 * 192 * No need to rebuild sched domain after updating 193 * the CPU priorities. The sched domains have no 194 * dependency on CPU priorities. 195 */ 196 void sched_set_itmt_core_prio(int prio, int core_cpu) 197 { 198 int cpu, i = 1; 199 200 for_each_cpu(cpu, topology_sibling_cpumask(core_cpu)) { 201 int smt_prio; 202 203 /* 204 * Ensure that the siblings are moved to the end 205 * of the priority chain and only used when 206 * all other high priority cpus are out of capacity. 207 */ 208 smt_prio = prio * smp_num_siblings / i; 209 per_cpu(sched_core_priority, cpu) = smt_prio; 210 i++; 211 } 212 } 213