1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Hardware Feedback Interface Driver 4 * 5 * Copyright (c) 2021, Intel Corporation. 6 * 7 * Authors: Aubrey Li <aubrey.li@linux.intel.com> 8 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> 9 * 10 * 11 * The Hardware Feedback Interface provides a performance and energy efficiency 12 * capability information for each CPU in the system. Depending on the processor 13 * model, hardware may periodically update these capabilities as a result of 14 * changes in the operating conditions (e.g., power limits or thermal 15 * constraints). On other processor models, there is a single HFI update 16 * at boot. 17 * 18 * This file provides functionality to process HFI updates and relay these 19 * updates to userspace. 20 */ 21 22 #define pr_fmt(fmt) "intel-hfi: " fmt 23 24 #include <linux/bitops.h> 25 #include <linux/cpufeature.h> 26 #include <linux/cpumask.h> 27 #include <linux/gfp.h> 28 #include <linux/io.h> 29 #include <linux/kernel.h> 30 #include <linux/math.h> 31 #include <linux/mutex.h> 32 #include <linux/percpu-defs.h> 33 #include <linux/printk.h> 34 #include <linux/processor.h> 35 #include <linux/slab.h> 36 #include <linux/spinlock.h> 37 #include <linux/string.h> 38 #include <linux/topology.h> 39 #include <linux/workqueue.h> 40 41 #include <asm/msr.h> 42 43 #include "../thermal_core.h" 44 #include "intel_hfi.h" 45 #include "thermal_interrupt.h" 46 47 /* Hardware Feedback Interface MSR configuration bits */ 48 #define HW_FEEDBACK_PTR_VALID_BIT BIT(0) 49 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) 50 51 /* CPUID detection and enumeration definitions for HFI */ 52 53 #define CPUID_HFI_LEAF 6 54 55 union hfi_capabilities { 56 struct { 57 u8 performance:1; 58 u8 energy_efficiency:1; 59 u8 __reserved:6; 60 } split; 61 u8 bits; 62 }; 63 64 union cpuid6_edx { 65 struct { 66 union hfi_capabilities capabilities; 67 u32 table_pages:4; 68 u32 __reserved:4; 69 s32 index:16; 70 } split; 71 u32 full; 72 }; 73 74 /** 75 * struct hfi_cpu_data - HFI capabilities per CPU 76 * @perf_cap: Performance capability 77 * @ee_cap: Energy efficiency capability 78 * 79 * Capabilities of a logical processor in the HFI table. These capabilities are 80 * unitless. 81 */ 82 struct hfi_cpu_data { 83 u8 perf_cap; 84 u8 ee_cap; 85 } __packed; 86 87 /** 88 * struct hfi_hdr - Header of the HFI table 89 * @perf_updated: Hardware updated performance capabilities 90 * @ee_updated: Hardware updated energy efficiency capabilities 91 * 92 * Properties of the data in an HFI table. 93 */ 94 struct hfi_hdr { 95 u8 perf_updated; 96 u8 ee_updated; 97 } __packed; 98 99 /** 100 * struct hfi_instance - Representation of an HFI instance (i.e., a table) 101 * @local_table: Base of the local copy of the HFI table 102 * @timestamp: Timestamp of the last update of the local table. 103 * Located at the base of the local table. 104 * @hdr: Base address of the header of the local table 105 * @data: Base address of the data of the local table 106 * @cpus: CPUs represented in this HFI table instance 107 * @hw_table: Pointer to the HFI table of this instance 108 * @update_work: Delayed work to process HFI updates 109 * @table_lock: Lock to protect acceses to the table of this instance 110 * @event_lock: Lock to process HFI interrupts 111 * 112 * A set of parameters to parse and navigate a specific HFI table. 113 */ 114 struct hfi_instance { 115 union { 116 void *local_table; 117 u64 *timestamp; 118 }; 119 void *hdr; 120 void *data; 121 cpumask_var_t cpus; 122 void *hw_table; 123 struct delayed_work update_work; 124 raw_spinlock_t table_lock; 125 raw_spinlock_t event_lock; 126 }; 127 128 /** 129 * struct hfi_features - Supported HFI features 130 * @nr_table_pages: Size of the HFI table in 4KB pages 131 * @cpu_stride: Stride size to locate the capability data of a logical 132 * processor within the table (i.e., row stride) 133 * @hdr_size: Size of the table header 134 * 135 * Parameters and supported features that are common to all HFI instances 136 */ 137 struct hfi_features { 138 size_t nr_table_pages; 139 unsigned int cpu_stride; 140 unsigned int hdr_size; 141 }; 142 143 /** 144 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data 145 * @index: Row of this CPU in its HFI table 146 * @hfi_instance: Attributes of the HFI table to which this CPU belongs 147 * 148 * Parameters to link a logical processor to an HFI table and a row within it. 149 */ 150 struct hfi_cpu_info { 151 s16 index; 152 struct hfi_instance *hfi_instance; 153 }; 154 155 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; 156 157 static int max_hfi_instances; 158 static struct hfi_instance *hfi_instances; 159 160 static struct hfi_features hfi_features; 161 static DEFINE_MUTEX(hfi_instance_lock); 162 163 static struct workqueue_struct *hfi_updates_wq; 164 #define HFI_UPDATE_INTERVAL HZ 165 #define HFI_MAX_THERM_NOTIFY_COUNT 16 166 167 static void get_hfi_caps(struct hfi_instance *hfi_instance, 168 struct thermal_genl_cpu_caps *cpu_caps) 169 { 170 int cpu, i = 0; 171 172 raw_spin_lock_irq(&hfi_instance->table_lock); 173 for_each_cpu(cpu, hfi_instance->cpus) { 174 struct hfi_cpu_data *caps; 175 s16 index; 176 177 index = per_cpu(hfi_cpu_info, cpu).index; 178 caps = hfi_instance->data + index * hfi_features.cpu_stride; 179 cpu_caps[i].cpu = cpu; 180 181 /* 182 * Scale performance and energy efficiency to 183 * the [0, 1023] interval that thermal netlink uses. 184 */ 185 cpu_caps[i].performance = caps->perf_cap << 2; 186 cpu_caps[i].efficiency = caps->ee_cap << 2; 187 188 ++i; 189 } 190 raw_spin_unlock_irq(&hfi_instance->table_lock); 191 } 192 193 /* 194 * Call update_capabilities() when there are changes in the HFI table. 195 */ 196 static void update_capabilities(struct hfi_instance *hfi_instance) 197 { 198 struct thermal_genl_cpu_caps *cpu_caps; 199 int i = 0, cpu_count; 200 201 /* CPUs may come online/offline while processing an HFI update. */ 202 mutex_lock(&hfi_instance_lock); 203 204 cpu_count = cpumask_weight(hfi_instance->cpus); 205 206 /* No CPUs to report in this hfi_instance. */ 207 if (!cpu_count) 208 goto out; 209 210 cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); 211 if (!cpu_caps) 212 goto out; 213 214 get_hfi_caps(hfi_instance, cpu_caps); 215 216 if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) 217 goto last_cmd; 218 219 /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ 220 for (i = 0; 221 (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; 222 i += HFI_MAX_THERM_NOTIFY_COUNT) 223 thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, 224 &cpu_caps[i]); 225 226 cpu_count = cpu_count - i; 227 228 last_cmd: 229 /* Process the remaining capabilities if any. */ 230 if (cpu_count) 231 thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); 232 233 kfree(cpu_caps); 234 out: 235 mutex_unlock(&hfi_instance_lock); 236 } 237 238 static void hfi_update_work_fn(struct work_struct *work) 239 { 240 struct hfi_instance *hfi_instance; 241 242 hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, 243 update_work); 244 245 update_capabilities(hfi_instance); 246 } 247 248 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) 249 { 250 struct hfi_instance *hfi_instance; 251 int cpu = smp_processor_id(); 252 struct hfi_cpu_info *info; 253 u64 new_timestamp, msr, hfi; 254 255 if (!pkg_therm_status_msr_val) 256 return; 257 258 info = &per_cpu(hfi_cpu_info, cpu); 259 if (!info) 260 return; 261 262 /* 263 * A CPU is linked to its HFI instance before the thermal vector in the 264 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL 265 * when receiving an HFI event. 266 */ 267 hfi_instance = info->hfi_instance; 268 if (unlikely(!hfi_instance)) { 269 pr_debug("Received event on CPU %d but instance was null", cpu); 270 return; 271 } 272 273 /* 274 * On most systems, all CPUs in the package receive a package-level 275 * thermal interrupt when there is an HFI update. It is sufficient to 276 * let a single CPU to acknowledge the update and queue work to 277 * process it. The remaining CPUs can resume their work. 278 */ 279 if (!raw_spin_trylock(&hfi_instance->event_lock)) 280 return; 281 282 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr); 283 hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED; 284 if (!hfi) { 285 raw_spin_unlock(&hfi_instance->event_lock); 286 return; 287 } 288 289 /* 290 * Ack duplicate update. Since there is an active HFI 291 * status from HW, it must be a new event, not a case 292 * where a lagging CPU entered the locked region. 293 */ 294 new_timestamp = *(u64 *)hfi_instance->hw_table; 295 if (*hfi_instance->timestamp == new_timestamp) { 296 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED); 297 raw_spin_unlock(&hfi_instance->event_lock); 298 return; 299 } 300 301 raw_spin_lock(&hfi_instance->table_lock); 302 303 /* 304 * Copy the updated table into our local copy. This includes the new 305 * timestamp. 306 */ 307 memcpy(hfi_instance->local_table, hfi_instance->hw_table, 308 hfi_features.nr_table_pages << PAGE_SHIFT); 309 310 /* 311 * Let hardware know that we are done reading the HFI table and it is 312 * free to update it again. 313 */ 314 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED); 315 316 raw_spin_unlock(&hfi_instance->table_lock); 317 raw_spin_unlock(&hfi_instance->event_lock); 318 319 queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, 320 HFI_UPDATE_INTERVAL); 321 } 322 323 static void init_hfi_cpu_index(struct hfi_cpu_info *info) 324 { 325 union cpuid6_edx edx; 326 327 /* Do not re-read @cpu's index if it has already been initialized. */ 328 if (info->index > -1) 329 return; 330 331 edx.full = cpuid_edx(CPUID_HFI_LEAF); 332 info->index = edx.split.index; 333 } 334 335 /* 336 * The format of the HFI table depends on the number of capabilities that the 337 * hardware supports. Keep a data structure to navigate the table. 338 */ 339 static void init_hfi_instance(struct hfi_instance *hfi_instance) 340 { 341 /* The HFI header is below the time-stamp. */ 342 hfi_instance->hdr = hfi_instance->local_table + 343 sizeof(*hfi_instance->timestamp); 344 345 /* The HFI data starts below the header. */ 346 hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; 347 } 348 349 /** 350 * intel_hfi_online() - Enable HFI on @cpu 351 * @cpu: CPU in which the HFI will be enabled 352 * 353 * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package 354 * level. The first CPU in the die/package to come online does the full HFI 355 * initialization. Subsequent CPUs will just link themselves to the HFI 356 * instance of their die/package. 357 * 358 * This function is called before enabling the thermal vector in the local APIC 359 * in order to ensure that @cpu has an associated HFI instance when it receives 360 * an HFI event. 361 */ 362 void intel_hfi_online(unsigned int cpu) 363 { 364 struct hfi_instance *hfi_instance; 365 struct hfi_cpu_info *info; 366 phys_addr_t hw_table_pa; 367 u64 msr_val; 368 u16 die_id; 369 370 /* Nothing to do if hfi_instances are missing. */ 371 if (!hfi_instances) 372 return; 373 374 /* 375 * Link @cpu to the HFI instance of its package/die. It does not 376 * matter whether the instance has been initialized. 377 */ 378 info = &per_cpu(hfi_cpu_info, cpu); 379 die_id = topology_logical_die_id(cpu); 380 hfi_instance = info->hfi_instance; 381 if (!hfi_instance) { 382 if (die_id >= max_hfi_instances) 383 return; 384 385 hfi_instance = &hfi_instances[die_id]; 386 info->hfi_instance = hfi_instance; 387 } 388 389 init_hfi_cpu_index(info); 390 391 /* 392 * Now check if the HFI instance of the package/die of @cpu has been 393 * initialized (by checking its header). In such case, all we have to 394 * do is to add @cpu to this instance's cpumask. 395 */ 396 mutex_lock(&hfi_instance_lock); 397 if (hfi_instance->hdr) { 398 cpumask_set_cpu(cpu, hfi_instance->cpus); 399 goto unlock; 400 } 401 402 /* 403 * Hardware is programmed with the physical address of the first page 404 * frame of the table. Hence, the allocated memory must be page-aligned. 405 */ 406 hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, 407 GFP_KERNEL | __GFP_ZERO); 408 if (!hfi_instance->hw_table) 409 goto unlock; 410 411 hw_table_pa = virt_to_phys(hfi_instance->hw_table); 412 413 /* 414 * Allocate memory to keep a local copy of the table that 415 * hardware generates. 416 */ 417 hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, 418 GFP_KERNEL); 419 if (!hfi_instance->local_table) 420 goto free_hw_table; 421 422 /* 423 * Program the address of the feedback table of this die/package. On 424 * some processors, hardware remembers the old address of the HFI table 425 * even after having been reprogrammed and re-enabled. Thus, do not free 426 * the pages allocated for the table or reprogram the hardware with a 427 * new base address. Namely, program the hardware only once. 428 */ 429 msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; 430 wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); 431 432 init_hfi_instance(hfi_instance); 433 434 INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); 435 raw_spin_lock_init(&hfi_instance->table_lock); 436 raw_spin_lock_init(&hfi_instance->event_lock); 437 438 cpumask_set_cpu(cpu, hfi_instance->cpus); 439 440 /* 441 * Enable the hardware feedback interface and never disable it. See 442 * comment on programming the address of the table. 443 */ 444 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 445 msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; 446 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 447 448 unlock: 449 mutex_unlock(&hfi_instance_lock); 450 return; 451 452 free_hw_table: 453 free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); 454 goto unlock; 455 } 456 457 /** 458 * intel_hfi_offline() - Disable HFI on @cpu 459 * @cpu: CPU in which the HFI will be disabled 460 * 461 * Remove @cpu from those covered by its HFI instance. 462 * 463 * On some processors, hardware remembers previous programming settings even 464 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the 465 * die/package of @cpu are offline. See note in intel_hfi_online(). 466 */ 467 void intel_hfi_offline(unsigned int cpu) 468 { 469 struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); 470 struct hfi_instance *hfi_instance; 471 472 /* 473 * Check if @cpu as an associated, initialized (i.e., with a non-NULL 474 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI 475 * is present. 476 */ 477 hfi_instance = info->hfi_instance; 478 if (!hfi_instance) 479 return; 480 481 if (!hfi_instance->hdr) 482 return; 483 484 mutex_lock(&hfi_instance_lock); 485 cpumask_clear_cpu(cpu, hfi_instance->cpus); 486 mutex_unlock(&hfi_instance_lock); 487 } 488 489 static __init int hfi_parse_features(void) 490 { 491 unsigned int nr_capabilities; 492 union cpuid6_edx edx; 493 494 if (!boot_cpu_has(X86_FEATURE_HFI)) 495 return -ENODEV; 496 497 /* 498 * If we are here we know that CPUID_HFI_LEAF exists. Parse the 499 * supported capabilities and the size of the HFI table. 500 */ 501 edx.full = cpuid_edx(CPUID_HFI_LEAF); 502 503 if (!edx.split.capabilities.split.performance) { 504 pr_debug("Performance reporting not supported! Not using HFI\n"); 505 return -ENODEV; 506 } 507 508 /* 509 * The number of supported capabilities determines the number of 510 * columns in the HFI table. Exclude the reserved bits. 511 */ 512 edx.split.capabilities.split.__reserved = 0; 513 nr_capabilities = hweight8(edx.split.capabilities.bits); 514 515 /* The number of 4KB pages required by the table */ 516 hfi_features.nr_table_pages = edx.split.table_pages + 1; 517 518 /* 519 * The header contains change indications for each supported feature. 520 * The size of the table header is rounded up to be a multiple of 8 521 * bytes. 522 */ 523 hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; 524 525 /* 526 * Data of each logical processor is also rounded up to be a multiple 527 * of 8 bytes. 528 */ 529 hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; 530 531 return 0; 532 } 533 534 void __init intel_hfi_init(void) 535 { 536 struct hfi_instance *hfi_instance; 537 int i, j; 538 539 if (hfi_parse_features()) 540 return; 541 542 /* There is one HFI instance per die/package. */ 543 max_hfi_instances = topology_max_packages() * 544 topology_max_die_per_package(); 545 546 /* 547 * This allocation may fail. CPU hotplug callbacks must check 548 * for a null pointer. 549 */ 550 hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), 551 GFP_KERNEL); 552 if (!hfi_instances) 553 return; 554 555 for (i = 0; i < max_hfi_instances; i++) { 556 hfi_instance = &hfi_instances[i]; 557 if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) 558 goto err_nomem; 559 } 560 561 hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); 562 if (!hfi_updates_wq) 563 goto err_nomem; 564 565 return; 566 567 err_nomem: 568 for (j = 0; j < i; ++j) { 569 hfi_instance = &hfi_instances[j]; 570 free_cpumask_var(hfi_instance->cpus); 571 } 572 573 kfree(hfi_instances); 574 hfi_instances = NULL; 575 } 576