1 /* 2 * fam15h_power.c - AMD Family 15h processor power monitoring 3 * 4 * Copyright (c) 2011 Advanced Micro Devices, Inc. 5 * Author: Andreas Herrmann <herrmann.der.user@googlemail.com> 6 * 7 * 8 * This driver is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU General Public License; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This driver is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 15 * See the GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this driver; if not, see <http://www.gnu.org/licenses/>. 19 */ 20 21 #include <linux/err.h> 22 #include <linux/hwmon.h> 23 #include <linux/hwmon-sysfs.h> 24 #include <linux/init.h> 25 #include <linux/module.h> 26 #include <linux/pci.h> 27 #include <linux/bitops.h> 28 #include <asm/processor.h> 29 #include <asm/msr.h> 30 31 MODULE_DESCRIPTION("AMD Family 15h CPU processor power monitor"); 32 MODULE_AUTHOR("Andreas Herrmann <herrmann.der.user@googlemail.com>"); 33 MODULE_LICENSE("GPL"); 34 35 /* D18F3 */ 36 #define REG_NORTHBRIDGE_CAP 0xe8 37 38 /* D18F4 */ 39 #define REG_PROCESSOR_TDP 0x1b8 40 41 /* D18F5 */ 42 #define REG_TDP_RUNNING_AVERAGE 0xe0 43 #define REG_TDP_LIMIT3 0xe8 44 45 #define FAM15H_MIN_NUM_ATTRS 2 46 #define FAM15H_NUM_GROUPS 2 47 48 #define MSR_F15H_CU_MAX_PWR_ACCUMULATOR 0xc001007b 49 50 #define PCI_DEVICE_ID_AMD_15H_M70H_NB_F4 0x15b4 51 52 struct fam15h_power_data { 53 struct pci_dev *pdev; 54 unsigned int tdp_to_watts; 55 unsigned int base_tdp; 56 unsigned int processor_pwr_watts; 57 unsigned int cpu_pwr_sample_ratio; 58 const struct attribute_group *groups[FAM15H_NUM_GROUPS]; 59 struct attribute_group group; 60 /* maximum accumulated power of a compute unit */ 61 u64 max_cu_acc_power; 62 }; 63 64 static ssize_t show_power(struct device *dev, 65 struct device_attribute *attr, char *buf) 66 { 67 u32 val, tdp_limit, running_avg_range; 68 s32 running_avg_capture; 69 u64 curr_pwr_watts; 70 struct fam15h_power_data *data = dev_get_drvdata(dev); 71 struct pci_dev *f4 = data->pdev; 72 73 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), 74 REG_TDP_RUNNING_AVERAGE, &val); 75 76 /* 77 * On Carrizo and later platforms, TdpRunAvgAccCap bit field 78 * is extended to 4:31 from 4:25. 79 */ 80 if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) { 81 running_avg_capture = val >> 4; 82 running_avg_capture = sign_extend32(running_avg_capture, 27); 83 } else { 84 running_avg_capture = (val >> 4) & 0x3fffff; 85 running_avg_capture = sign_extend32(running_avg_capture, 21); 86 } 87 88 running_avg_range = (val & 0xf) + 1; 89 90 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), 91 REG_TDP_LIMIT3, &val); 92 93 /* 94 * On Carrizo and later platforms, ApmTdpLimit bit field 95 * is extended to 16:31 from 16:28. 96 */ 97 if (boot_cpu_data.x86 == 0x15 && boot_cpu_data.x86_model >= 0x60) 98 tdp_limit = val >> 16; 99 else 100 tdp_limit = (val >> 16) & 0x1fff; 101 102 curr_pwr_watts = ((u64)(tdp_limit + 103 data->base_tdp)) << running_avg_range; 104 curr_pwr_watts -= running_avg_capture; 105 curr_pwr_watts *= data->tdp_to_watts; 106 107 /* 108 * Convert to microWatt 109 * 110 * power is in Watt provided as fixed point integer with 111 * scaling factor 1/(2^16). For conversion we use 112 * (10^6)/(2^16) = 15625/(2^10) 113 */ 114 curr_pwr_watts = (curr_pwr_watts * 15625) >> (10 + running_avg_range); 115 return sprintf(buf, "%u\n", (unsigned int) curr_pwr_watts); 116 } 117 static DEVICE_ATTR(power1_input, S_IRUGO, show_power, NULL); 118 119 static ssize_t show_power_crit(struct device *dev, 120 struct device_attribute *attr, char *buf) 121 { 122 struct fam15h_power_data *data = dev_get_drvdata(dev); 123 124 return sprintf(buf, "%u\n", data->processor_pwr_watts); 125 } 126 static DEVICE_ATTR(power1_crit, S_IRUGO, show_power_crit, NULL); 127 128 static int fam15h_power_init_attrs(struct pci_dev *pdev, 129 struct fam15h_power_data *data) 130 { 131 int n = FAM15H_MIN_NUM_ATTRS; 132 struct attribute **fam15h_power_attrs; 133 struct cpuinfo_x86 *c = &boot_cpu_data; 134 135 if (c->x86 == 0x15 && 136 (c->x86_model <= 0xf || 137 (c->x86_model >= 0x60 && c->x86_model <= 0x7f))) 138 n += 1; 139 140 fam15h_power_attrs = devm_kcalloc(&pdev->dev, n, 141 sizeof(*fam15h_power_attrs), 142 GFP_KERNEL); 143 144 if (!fam15h_power_attrs) 145 return -ENOMEM; 146 147 n = 0; 148 fam15h_power_attrs[n++] = &dev_attr_power1_crit.attr; 149 if (c->x86 == 0x15 && 150 (c->x86_model <= 0xf || 151 (c->x86_model >= 0x60 && c->x86_model <= 0x7f))) 152 fam15h_power_attrs[n++] = &dev_attr_power1_input.attr; 153 154 data->group.attrs = fam15h_power_attrs; 155 156 return 0; 157 } 158 159 static bool should_load_on_this_node(struct pci_dev *f4) 160 { 161 u32 val; 162 163 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 3), 164 REG_NORTHBRIDGE_CAP, &val); 165 if ((val & BIT(29)) && ((val >> 30) & 3)) 166 return false; 167 168 return true; 169 } 170 171 /* 172 * Newer BKDG versions have an updated recommendation on how to properly 173 * initialize the running average range (was: 0xE, now: 0x9). This avoids 174 * counter saturations resulting in bogus power readings. 175 * We correct this value ourselves to cope with older BIOSes. 176 */ 177 static const struct pci_device_id affected_device[] = { 178 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, 179 { 0 } 180 }; 181 182 static void tweak_runavg_range(struct pci_dev *pdev) 183 { 184 u32 val; 185 186 /* 187 * let this quirk apply only to the current version of the 188 * northbridge, since future versions may change the behavior 189 */ 190 if (!pci_match_id(affected_device, pdev)) 191 return; 192 193 pci_bus_read_config_dword(pdev->bus, 194 PCI_DEVFN(PCI_SLOT(pdev->devfn), 5), 195 REG_TDP_RUNNING_AVERAGE, &val); 196 if ((val & 0xf) != 0xe) 197 return; 198 199 val &= ~0xf; 200 val |= 0x9; 201 pci_bus_write_config_dword(pdev->bus, 202 PCI_DEVFN(PCI_SLOT(pdev->devfn), 5), 203 REG_TDP_RUNNING_AVERAGE, val); 204 } 205 206 #ifdef CONFIG_PM 207 static int fam15h_power_resume(struct pci_dev *pdev) 208 { 209 tweak_runavg_range(pdev); 210 return 0; 211 } 212 #else 213 #define fam15h_power_resume NULL 214 #endif 215 216 static int fam15h_power_init_data(struct pci_dev *f4, 217 struct fam15h_power_data *data) 218 { 219 u32 val, eax, ebx, ecx, edx; 220 u64 tmp; 221 int ret; 222 223 pci_read_config_dword(f4, REG_PROCESSOR_TDP, &val); 224 data->base_tdp = val >> 16; 225 tmp = val & 0xffff; 226 227 pci_bus_read_config_dword(f4->bus, PCI_DEVFN(PCI_SLOT(f4->devfn), 5), 228 REG_TDP_LIMIT3, &val); 229 230 data->tdp_to_watts = ((val & 0x3ff) << 6) | ((val >> 10) & 0x3f); 231 tmp *= data->tdp_to_watts; 232 233 /* result not allowed to be >= 256W */ 234 if ((tmp >> 16) >= 256) 235 dev_warn(&f4->dev, 236 "Bogus value for ProcessorPwrWatts (processor_pwr_watts>=%u)\n", 237 (unsigned int) (tmp >> 16)); 238 239 /* convert to microWatt */ 240 data->processor_pwr_watts = (tmp * 15625) >> 10; 241 242 ret = fam15h_power_init_attrs(f4, data); 243 if (ret) 244 return ret; 245 246 cpuid(0x80000007, &eax, &ebx, &ecx, &edx); 247 248 /* CPUID Fn8000_0007:EDX[12] indicates to support accumulated power */ 249 if (!(edx & BIT(12))) 250 return 0; 251 252 /* 253 * determine the ratio of the compute unit power accumulator 254 * sample period to the PTSC counter period by executing CPUID 255 * Fn8000_0007:ECX 256 */ 257 data->cpu_pwr_sample_ratio = ecx; 258 259 if (rdmsrl_safe(MSR_F15H_CU_MAX_PWR_ACCUMULATOR, &tmp)) { 260 pr_err("Failed to read max compute unit power accumulator MSR\n"); 261 return -ENODEV; 262 } 263 264 data->max_cu_acc_power = tmp; 265 266 return 0; 267 } 268 269 static int fam15h_power_probe(struct pci_dev *pdev, 270 const struct pci_device_id *id) 271 { 272 struct fam15h_power_data *data; 273 struct device *dev = &pdev->dev; 274 struct device *hwmon_dev; 275 int ret; 276 277 /* 278 * though we ignore every other northbridge, we still have to 279 * do the tweaking on _each_ node in MCM processors as the counters 280 * are working hand-in-hand 281 */ 282 tweak_runavg_range(pdev); 283 284 if (!should_load_on_this_node(pdev)) 285 return -ENODEV; 286 287 data = devm_kzalloc(dev, sizeof(struct fam15h_power_data), GFP_KERNEL); 288 if (!data) 289 return -ENOMEM; 290 291 ret = fam15h_power_init_data(pdev, data); 292 if (ret) 293 return ret; 294 295 data->pdev = pdev; 296 297 data->groups[0] = &data->group; 298 299 hwmon_dev = devm_hwmon_device_register_with_groups(dev, "fam15h_power", 300 data, 301 &data->groups[0]); 302 return PTR_ERR_OR_ZERO(hwmon_dev); 303 } 304 305 static const struct pci_device_id fam15h_power_id_table[] = { 306 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_NB_F4) }, 307 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M30H_NB_F4) }, 308 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M60H_NB_F4) }, 309 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_15H_M70H_NB_F4) }, 310 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, 311 { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, 312 {} 313 }; 314 MODULE_DEVICE_TABLE(pci, fam15h_power_id_table); 315 316 static struct pci_driver fam15h_power_driver = { 317 .name = "fam15h_power", 318 .id_table = fam15h_power_id_table, 319 .probe = fam15h_power_probe, 320 .resume = fam15h_power_resume, 321 }; 322 323 module_pci_driver(fam15h_power_driver); 324