1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Hardware Feedback Interface Driver 4 * 5 * Copyright (c) 2021, Intel Corporation. 6 * 7 * Authors: Aubrey Li <aubrey.li@linux.intel.com> 8 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> 9 * 10 * 11 * The Hardware Feedback Interface provides a performance and energy efficiency 12 * capability information for each CPU in the system. Depending on the processor 13 * model, hardware may periodically update these capabilities as a result of 14 * changes in the operating conditions (e.g., power limits or thermal 15 * constraints). On other processor models, there is a single HFI update 16 * at boot. 17 * 18 * This file provides functionality to process HFI updates and relay these 19 * updates to userspace. 20 */ 21 22 #define pr_fmt(fmt) "intel-hfi: " fmt 23 24 #include <linux/bitops.h> 25 #include <linux/cpufeature.h> 26 #include <linux/cpumask.h> 27 #include <linux/gfp.h> 28 #include <linux/io.h> 29 #include <linux/kernel.h> 30 #include <linux/math.h> 31 #include <linux/mutex.h> 32 #include <linux/percpu-defs.h> 33 #include <linux/printk.h> 34 #include <linux/processor.h> 35 #include <linux/slab.h> 36 #include <linux/spinlock.h> 37 #include <linux/string.h> 38 #include <linux/topology.h> 39 #include <linux/workqueue.h> 40 41 #include <asm/msr.h> 42 43 #include "../thermal_core.h" 44 #include "intel_hfi.h" 45 46 #define THERM_STATUS_CLEAR_PKG_MASK (BIT(1) | BIT(3) | BIT(5) | BIT(7) | \ 47 BIT(9) | BIT(11) | BIT(26)) 48 49 /* Hardware Feedback Interface MSR configuration bits */ 50 #define HW_FEEDBACK_PTR_VALID_BIT BIT(0) 51 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) 52 53 /* CPUID detection and enumeration definitions for HFI */ 54 55 #define CPUID_HFI_LEAF 6 56 57 union hfi_capabilities { 58 struct { 59 u8 performance:1; 60 u8 energy_efficiency:1; 61 u8 __reserved:6; 62 } split; 63 u8 bits; 64 }; 65 66 union cpuid6_edx { 67 struct { 68 union hfi_capabilities capabilities; 69 u32 table_pages:4; 70 u32 __reserved:4; 71 s32 index:16; 72 } split; 73 u32 full; 74 }; 75 76 /** 77 * struct hfi_cpu_data - HFI capabilities per CPU 78 * @perf_cap: Performance capability 79 * @ee_cap: Energy efficiency capability 80 * 81 * Capabilities of a logical processor in the HFI table. These capabilities are 82 * unitless. 83 */ 84 struct hfi_cpu_data { 85 u8 perf_cap; 86 u8 ee_cap; 87 } __packed; 88 89 /** 90 * struct hfi_hdr - Header of the HFI table 91 * @perf_updated: Hardware updated performance capabilities 92 * @ee_updated: Hardware updated energy efficiency capabilities 93 * 94 * Properties of the data in an HFI table. 95 */ 96 struct hfi_hdr { 97 u8 perf_updated; 98 u8 ee_updated; 99 } __packed; 100 101 /** 102 * struct hfi_instance - Representation of an HFI instance (i.e., a table) 103 * @local_table: Base of the local copy of the HFI table 104 * @timestamp: Timestamp of the last update of the local table. 105 * Located at the base of the local table. 106 * @hdr: Base address of the header of the local table 107 * @data: Base address of the data of the local table 108 * @cpus: CPUs represented in this HFI table instance 109 * @hw_table: Pointer to the HFI table of this instance 110 * @update_work: Delayed work to process HFI updates 111 * @table_lock: Lock to protect acceses to the table of this instance 112 * @event_lock: Lock to process HFI interrupts 113 * 114 * A set of parameters to parse and navigate a specific HFI table. 115 */ 116 struct hfi_instance { 117 union { 118 void *local_table; 119 u64 *timestamp; 120 }; 121 void *hdr; 122 void *data; 123 cpumask_var_t cpus; 124 void *hw_table; 125 struct delayed_work update_work; 126 raw_spinlock_t table_lock; 127 raw_spinlock_t event_lock; 128 }; 129 130 /** 131 * struct hfi_features - Supported HFI features 132 * @nr_table_pages: Size of the HFI table in 4KB pages 133 * @cpu_stride: Stride size to locate the capability data of a logical 134 * processor within the table (i.e., row stride) 135 * @hdr_size: Size of the table header 136 * 137 * Parameters and supported features that are common to all HFI instances 138 */ 139 struct hfi_features { 140 unsigned int nr_table_pages; 141 unsigned int cpu_stride; 142 unsigned int hdr_size; 143 }; 144 145 /** 146 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data 147 * @index: Row of this CPU in its HFI table 148 * @hfi_instance: Attributes of the HFI table to which this CPU belongs 149 * 150 * Parameters to link a logical processor to an HFI table and a row within it. 151 */ 152 struct hfi_cpu_info { 153 s16 index; 154 struct hfi_instance *hfi_instance; 155 }; 156 157 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; 158 159 static int max_hfi_instances; 160 static struct hfi_instance *hfi_instances; 161 162 static struct hfi_features hfi_features; 163 static DEFINE_MUTEX(hfi_instance_lock); 164 165 static struct workqueue_struct *hfi_updates_wq; 166 #define HFI_UPDATE_INTERVAL HZ 167 #define HFI_MAX_THERM_NOTIFY_COUNT 16 168 169 static void get_hfi_caps(struct hfi_instance *hfi_instance, 170 struct thermal_genl_cpu_caps *cpu_caps) 171 { 172 int cpu, i = 0; 173 174 raw_spin_lock_irq(&hfi_instance->table_lock); 175 for_each_cpu(cpu, hfi_instance->cpus) { 176 struct hfi_cpu_data *caps; 177 s16 index; 178 179 index = per_cpu(hfi_cpu_info, cpu).index; 180 caps = hfi_instance->data + index * hfi_features.cpu_stride; 181 cpu_caps[i].cpu = cpu; 182 183 /* 184 * Scale performance and energy efficiency to 185 * the [0, 1023] interval that thermal netlink uses. 186 */ 187 cpu_caps[i].performance = caps->perf_cap << 2; 188 cpu_caps[i].efficiency = caps->ee_cap << 2; 189 190 ++i; 191 } 192 raw_spin_unlock_irq(&hfi_instance->table_lock); 193 } 194 195 /* 196 * Call update_capabilities() when there are changes in the HFI table. 197 */ 198 static void update_capabilities(struct hfi_instance *hfi_instance) 199 { 200 struct thermal_genl_cpu_caps *cpu_caps; 201 int i = 0, cpu_count; 202 203 /* CPUs may come online/offline while processing an HFI update. */ 204 mutex_lock(&hfi_instance_lock); 205 206 cpu_count = cpumask_weight(hfi_instance->cpus); 207 208 /* No CPUs to report in this hfi_instance. */ 209 if (!cpu_count) 210 goto out; 211 212 cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); 213 if (!cpu_caps) 214 goto out; 215 216 get_hfi_caps(hfi_instance, cpu_caps); 217 218 if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) 219 goto last_cmd; 220 221 /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ 222 for (i = 0; 223 (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; 224 i += HFI_MAX_THERM_NOTIFY_COUNT) 225 thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, 226 &cpu_caps[i]); 227 228 cpu_count = cpu_count - i; 229 230 last_cmd: 231 /* Process the remaining capabilities if any. */ 232 if (cpu_count) 233 thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); 234 235 kfree(cpu_caps); 236 out: 237 mutex_unlock(&hfi_instance_lock); 238 } 239 240 static void hfi_update_work_fn(struct work_struct *work) 241 { 242 struct hfi_instance *hfi_instance; 243 244 hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, 245 update_work); 246 if (!hfi_instance) 247 return; 248 249 update_capabilities(hfi_instance); 250 } 251 252 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) 253 { 254 struct hfi_instance *hfi_instance; 255 int cpu = smp_processor_id(); 256 struct hfi_cpu_info *info; 257 u64 new_timestamp; 258 259 if (!pkg_therm_status_msr_val) 260 return; 261 262 info = &per_cpu(hfi_cpu_info, cpu); 263 if (!info) 264 return; 265 266 /* 267 * A CPU is linked to its HFI instance before the thermal vector in the 268 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL 269 * when receiving an HFI event. 270 */ 271 hfi_instance = info->hfi_instance; 272 if (unlikely(!hfi_instance)) { 273 pr_debug("Received event on CPU %d but instance was null", cpu); 274 return; 275 } 276 277 /* 278 * On most systems, all CPUs in the package receive a package-level 279 * thermal interrupt when there is an HFI update. It is sufficient to 280 * let a single CPU to acknowledge the update and queue work to 281 * process it. The remaining CPUs can resume their work. 282 */ 283 if (!raw_spin_trylock(&hfi_instance->event_lock)) 284 return; 285 286 /* Skip duplicated updates. */ 287 new_timestamp = *(u64 *)hfi_instance->hw_table; 288 if (*hfi_instance->timestamp == new_timestamp) { 289 raw_spin_unlock(&hfi_instance->event_lock); 290 return; 291 } 292 293 raw_spin_lock(&hfi_instance->table_lock); 294 295 /* 296 * Copy the updated table into our local copy. This includes the new 297 * timestamp. 298 */ 299 memcpy(hfi_instance->local_table, hfi_instance->hw_table, 300 hfi_features.nr_table_pages << PAGE_SHIFT); 301 302 raw_spin_unlock(&hfi_instance->table_lock); 303 raw_spin_unlock(&hfi_instance->event_lock); 304 305 /* 306 * Let hardware know that we are done reading the HFI table and it is 307 * free to update it again. 308 */ 309 pkg_therm_status_msr_val &= THERM_STATUS_CLEAR_PKG_MASK & 310 ~PACKAGE_THERM_STATUS_HFI_UPDATED; 311 wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, pkg_therm_status_msr_val); 312 313 queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, 314 HFI_UPDATE_INTERVAL); 315 } 316 317 static void init_hfi_cpu_index(struct hfi_cpu_info *info) 318 { 319 union cpuid6_edx edx; 320 321 /* Do not re-read @cpu's index if it has already been initialized. */ 322 if (info->index > -1) 323 return; 324 325 edx.full = cpuid_edx(CPUID_HFI_LEAF); 326 info->index = edx.split.index; 327 } 328 329 /* 330 * The format of the HFI table depends on the number of capabilities that the 331 * hardware supports. Keep a data structure to navigate the table. 332 */ 333 static void init_hfi_instance(struct hfi_instance *hfi_instance) 334 { 335 /* The HFI header is below the time-stamp. */ 336 hfi_instance->hdr = hfi_instance->local_table + 337 sizeof(*hfi_instance->timestamp); 338 339 /* The HFI data starts below the header. */ 340 hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; 341 } 342 343 /** 344 * intel_hfi_online() - Enable HFI on @cpu 345 * @cpu: CPU in which the HFI will be enabled 346 * 347 * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package 348 * level. The first CPU in the die/package to come online does the full HFI 349 * initialization. Subsequent CPUs will just link themselves to the HFI 350 * instance of their die/package. 351 * 352 * This function is called before enabling the thermal vector in the local APIC 353 * in order to ensure that @cpu has an associated HFI instance when it receives 354 * an HFI event. 355 */ 356 void intel_hfi_online(unsigned int cpu) 357 { 358 struct hfi_instance *hfi_instance; 359 struct hfi_cpu_info *info; 360 phys_addr_t hw_table_pa; 361 u64 msr_val; 362 u16 die_id; 363 364 /* Nothing to do if hfi_instances are missing. */ 365 if (!hfi_instances) 366 return; 367 368 /* 369 * Link @cpu to the HFI instance of its package/die. It does not 370 * matter whether the instance has been initialized. 371 */ 372 info = &per_cpu(hfi_cpu_info, cpu); 373 die_id = topology_logical_die_id(cpu); 374 hfi_instance = info->hfi_instance; 375 if (!hfi_instance) { 376 if (die_id < 0 || die_id >= max_hfi_instances) 377 return; 378 379 hfi_instance = &hfi_instances[die_id]; 380 info->hfi_instance = hfi_instance; 381 } 382 383 init_hfi_cpu_index(info); 384 385 /* 386 * Now check if the HFI instance of the package/die of @cpu has been 387 * initialized (by checking its header). In such case, all we have to 388 * do is to add @cpu to this instance's cpumask. 389 */ 390 mutex_lock(&hfi_instance_lock); 391 if (hfi_instance->hdr) { 392 cpumask_set_cpu(cpu, hfi_instance->cpus); 393 goto unlock; 394 } 395 396 /* 397 * Hardware is programmed with the physical address of the first page 398 * frame of the table. Hence, the allocated memory must be page-aligned. 399 */ 400 hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, 401 GFP_KERNEL | __GFP_ZERO); 402 if (!hfi_instance->hw_table) 403 goto unlock; 404 405 hw_table_pa = virt_to_phys(hfi_instance->hw_table); 406 407 /* 408 * Allocate memory to keep a local copy of the table that 409 * hardware generates. 410 */ 411 hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, 412 GFP_KERNEL); 413 if (!hfi_instance->local_table) 414 goto free_hw_table; 415 416 /* 417 * Program the address of the feedback table of this die/package. On 418 * some processors, hardware remembers the old address of the HFI table 419 * even after having been reprogrammed and re-enabled. Thus, do not free 420 * the pages allocated for the table or reprogram the hardware with a 421 * new base address. Namely, program the hardware only once. 422 */ 423 msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; 424 wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); 425 426 init_hfi_instance(hfi_instance); 427 428 INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); 429 raw_spin_lock_init(&hfi_instance->table_lock); 430 raw_spin_lock_init(&hfi_instance->event_lock); 431 432 cpumask_set_cpu(cpu, hfi_instance->cpus); 433 434 /* 435 * Enable the hardware feedback interface and never disable it. See 436 * comment on programming the address of the table. 437 */ 438 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 439 msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; 440 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 441 442 unlock: 443 mutex_unlock(&hfi_instance_lock); 444 return; 445 446 free_hw_table: 447 free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); 448 goto unlock; 449 } 450 451 /** 452 * intel_hfi_offline() - Disable HFI on @cpu 453 * @cpu: CPU in which the HFI will be disabled 454 * 455 * Remove @cpu from those covered by its HFI instance. 456 * 457 * On some processors, hardware remembers previous programming settings even 458 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the 459 * die/package of @cpu are offline. See note in intel_hfi_online(). 460 */ 461 void intel_hfi_offline(unsigned int cpu) 462 { 463 struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); 464 struct hfi_instance *hfi_instance; 465 466 /* 467 * Check if @cpu as an associated, initialized (i.e., with a non-NULL 468 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI 469 * is present. 470 */ 471 hfi_instance = info->hfi_instance; 472 if (!hfi_instance) 473 return; 474 475 if (!hfi_instance->hdr) 476 return; 477 478 mutex_lock(&hfi_instance_lock); 479 cpumask_clear_cpu(cpu, hfi_instance->cpus); 480 mutex_unlock(&hfi_instance_lock); 481 } 482 483 static __init int hfi_parse_features(void) 484 { 485 unsigned int nr_capabilities; 486 union cpuid6_edx edx; 487 488 if (!boot_cpu_has(X86_FEATURE_HFI)) 489 return -ENODEV; 490 491 /* 492 * If we are here we know that CPUID_HFI_LEAF exists. Parse the 493 * supported capabilities and the size of the HFI table. 494 */ 495 edx.full = cpuid_edx(CPUID_HFI_LEAF); 496 497 if (!edx.split.capabilities.split.performance) { 498 pr_debug("Performance reporting not supported! Not using HFI\n"); 499 return -ENODEV; 500 } 501 502 /* 503 * The number of supported capabilities determines the number of 504 * columns in the HFI table. Exclude the reserved bits. 505 */ 506 edx.split.capabilities.split.__reserved = 0; 507 nr_capabilities = hweight8(edx.split.capabilities.bits); 508 509 /* The number of 4KB pages required by the table */ 510 hfi_features.nr_table_pages = edx.split.table_pages + 1; 511 512 /* 513 * The header contains change indications for each supported feature. 514 * The size of the table header is rounded up to be a multiple of 8 515 * bytes. 516 */ 517 hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; 518 519 /* 520 * Data of each logical processor is also rounded up to be a multiple 521 * of 8 bytes. 522 */ 523 hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; 524 525 return 0; 526 } 527 528 void __init intel_hfi_init(void) 529 { 530 struct hfi_instance *hfi_instance; 531 int i, j; 532 533 if (hfi_parse_features()) 534 return; 535 536 /* There is one HFI instance per die/package. */ 537 max_hfi_instances = topology_max_packages() * 538 topology_max_die_per_package(); 539 540 /* 541 * This allocation may fail. CPU hotplug callbacks must check 542 * for a null pointer. 543 */ 544 hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), 545 GFP_KERNEL); 546 if (!hfi_instances) 547 return; 548 549 for (i = 0; i < max_hfi_instances; i++) { 550 hfi_instance = &hfi_instances[i]; 551 if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) 552 goto err_nomem; 553 } 554 555 hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); 556 if (!hfi_updates_wq) 557 goto err_nomem; 558 559 return; 560 561 err_nomem: 562 for (j = 0; j < i; ++j) { 563 hfi_instance = &hfi_instances[j]; 564 free_cpumask_var(hfi_instance->cpus); 565 } 566 567 kfree(hfi_instances); 568 hfi_instances = NULL; 569 } 570