1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Hardware Feedback Interface Driver 4 * 5 * Copyright (c) 2021, Intel Corporation. 6 * 7 * Authors: Aubrey Li <aubrey.li@linux.intel.com> 8 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> 9 * 10 * 11 * The Hardware Feedback Interface provides a performance and energy efficiency 12 * capability information for each CPU in the system. Depending on the processor 13 * model, hardware may periodically update these capabilities as a result of 14 * changes in the operating conditions (e.g., power limits or thermal 15 * constraints). On other processor models, there is a single HFI update 16 * at boot. 17 * 18 * This file provides functionality to process HFI updates and relay these 19 * updates to userspace. 20 */ 21 22 #define pr_fmt(fmt) "intel-hfi: " fmt 23 24 #include <linux/bitops.h> 25 #include <linux/cpufeature.h> 26 #include <linux/cpumask.h> 27 #include <linux/delay.h> 28 #include <linux/gfp.h> 29 #include <linux/io.h> 30 #include <linux/kernel.h> 31 #include <linux/math.h> 32 #include <linux/mutex.h> 33 #include <linux/percpu-defs.h> 34 #include <linux/printk.h> 35 #include <linux/processor.h> 36 #include <linux/slab.h> 37 #include <linux/spinlock.h> 38 #include <linux/suspend.h> 39 #include <linux/string.h> 40 #include <linux/syscore_ops.h> 41 #include <linux/topology.h> 42 #include <linux/workqueue.h> 43 44 #include <asm/msr.h> 45 46 #include "intel_hfi.h" 47 #include "thermal_interrupt.h" 48 49 #include "../thermal_netlink.h" 50 51 /* Hardware Feedback Interface MSR configuration bits */ 52 #define HW_FEEDBACK_PTR_VALID_BIT BIT(0) 53 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) 54 55 /* CPUID detection and enumeration definitions for HFI */ 56 57 #define CPUID_HFI_LEAF 6 58 59 union hfi_capabilities { 60 struct { 61 u8 performance:1; 62 u8 energy_efficiency:1; 63 u8 __reserved:6; 64 } split; 65 u8 bits; 66 }; 67 68 union cpuid6_edx { 69 struct { 70 union hfi_capabilities capabilities; 71 u32 table_pages:4; 72 u32 __reserved:4; 73 s32 index:16; 74 } split; 75 u32 full; 76 }; 77 78 /** 79 * struct hfi_cpu_data - HFI capabilities per CPU 80 * @perf_cap: Performance capability 81 * @ee_cap: Energy efficiency capability 82 * 83 * Capabilities of a logical processor in the HFI table. These capabilities are 84 * unitless. 85 */ 86 struct hfi_cpu_data { 87 u8 perf_cap; 88 u8 ee_cap; 89 } __packed; 90 91 /** 92 * struct hfi_hdr - Header of the HFI table 93 * @perf_updated: Hardware updated performance capabilities 94 * @ee_updated: Hardware updated energy efficiency capabilities 95 * 96 * Properties of the data in an HFI table. 97 */ 98 struct hfi_hdr { 99 u8 perf_updated; 100 u8 ee_updated; 101 } __packed; 102 103 /** 104 * struct hfi_instance - Representation of an HFI instance (i.e., a table) 105 * @local_table: Base of the local copy of the HFI table 106 * @timestamp: Timestamp of the last update of the local table. 107 * Located at the base of the local table. 108 * @hdr: Base address of the header of the local table 109 * @data: Base address of the data of the local table 110 * @cpus: CPUs represented in this HFI table instance 111 * @hw_table: Pointer to the HFI table of this instance 112 * @update_work: Delayed work to process HFI updates 113 * @table_lock: Lock to protect acceses to the table of this instance 114 * @event_lock: Lock to process HFI interrupts 115 * 116 * A set of parameters to parse and navigate a specific HFI table. 117 */ 118 struct hfi_instance { 119 union { 120 void *local_table; 121 u64 *timestamp; 122 }; 123 void *hdr; 124 void *data; 125 cpumask_var_t cpus; 126 void *hw_table; 127 struct delayed_work update_work; 128 raw_spinlock_t table_lock; 129 raw_spinlock_t event_lock; 130 }; 131 132 /** 133 * struct hfi_features - Supported HFI features 134 * @nr_table_pages: Size of the HFI table in 4KB pages 135 * @cpu_stride: Stride size to locate the capability data of a logical 136 * processor within the table (i.e., row stride) 137 * @hdr_size: Size of the table header 138 * 139 * Parameters and supported features that are common to all HFI instances 140 */ 141 struct hfi_features { 142 size_t nr_table_pages; 143 unsigned int cpu_stride; 144 unsigned int hdr_size; 145 }; 146 147 /** 148 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data 149 * @index: Row of this CPU in its HFI table 150 * @hfi_instance: Attributes of the HFI table to which this CPU belongs 151 * 152 * Parameters to link a logical processor to an HFI table and a row within it. 153 */ 154 struct hfi_cpu_info { 155 s16 index; 156 struct hfi_instance *hfi_instance; 157 }; 158 159 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; 160 161 static int max_hfi_instances; 162 static struct hfi_instance *hfi_instances; 163 164 static struct hfi_features hfi_features; 165 static DEFINE_MUTEX(hfi_instance_lock); 166 167 static struct workqueue_struct *hfi_updates_wq; 168 #define HFI_UPDATE_INTERVAL HZ 169 #define HFI_MAX_THERM_NOTIFY_COUNT 16 170 171 static void get_hfi_caps(struct hfi_instance *hfi_instance, 172 struct thermal_genl_cpu_caps *cpu_caps) 173 { 174 int cpu, i = 0; 175 176 raw_spin_lock_irq(&hfi_instance->table_lock); 177 for_each_cpu(cpu, hfi_instance->cpus) { 178 struct hfi_cpu_data *caps; 179 s16 index; 180 181 index = per_cpu(hfi_cpu_info, cpu).index; 182 caps = hfi_instance->data + index * hfi_features.cpu_stride; 183 cpu_caps[i].cpu = cpu; 184 185 /* 186 * Scale performance and energy efficiency to 187 * the [0, 1023] interval that thermal netlink uses. 188 */ 189 cpu_caps[i].performance = caps->perf_cap << 2; 190 cpu_caps[i].efficiency = caps->ee_cap << 2; 191 192 ++i; 193 } 194 raw_spin_unlock_irq(&hfi_instance->table_lock); 195 } 196 197 /* 198 * Call update_capabilities() when there are changes in the HFI table. 199 */ 200 static void update_capabilities(struct hfi_instance *hfi_instance) 201 { 202 struct thermal_genl_cpu_caps *cpu_caps; 203 int i = 0, cpu_count; 204 205 /* CPUs may come online/offline while processing an HFI update. */ 206 mutex_lock(&hfi_instance_lock); 207 208 cpu_count = cpumask_weight(hfi_instance->cpus); 209 210 /* No CPUs to report in this hfi_instance. */ 211 if (!cpu_count) 212 goto out; 213 214 cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); 215 if (!cpu_caps) 216 goto out; 217 218 get_hfi_caps(hfi_instance, cpu_caps); 219 220 if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) 221 goto last_cmd; 222 223 /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ 224 for (i = 0; 225 (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; 226 i += HFI_MAX_THERM_NOTIFY_COUNT) 227 thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, 228 &cpu_caps[i]); 229 230 cpu_count = cpu_count - i; 231 232 last_cmd: 233 /* Process the remaining capabilities if any. */ 234 if (cpu_count) 235 thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); 236 237 kfree(cpu_caps); 238 out: 239 mutex_unlock(&hfi_instance_lock); 240 } 241 242 static void hfi_update_work_fn(struct work_struct *work) 243 { 244 struct hfi_instance *hfi_instance; 245 246 hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, 247 update_work); 248 249 update_capabilities(hfi_instance); 250 } 251 252 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) 253 { 254 struct hfi_instance *hfi_instance; 255 int cpu = smp_processor_id(); 256 struct hfi_cpu_info *info; 257 u64 new_timestamp, msr, hfi; 258 259 if (!pkg_therm_status_msr_val) 260 return; 261 262 info = &per_cpu(hfi_cpu_info, cpu); 263 if (!info) 264 return; 265 266 /* 267 * A CPU is linked to its HFI instance before the thermal vector in the 268 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL 269 * when receiving an HFI event. 270 */ 271 hfi_instance = info->hfi_instance; 272 if (unlikely(!hfi_instance)) { 273 pr_debug("Received event on CPU %d but instance was null", cpu); 274 return; 275 } 276 277 /* 278 * On most systems, all CPUs in the package receive a package-level 279 * thermal interrupt when there is an HFI update. It is sufficient to 280 * let a single CPU to acknowledge the update and queue work to 281 * process it. The remaining CPUs can resume their work. 282 */ 283 if (!raw_spin_trylock(&hfi_instance->event_lock)) 284 return; 285 286 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr); 287 hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED; 288 if (!hfi) { 289 raw_spin_unlock(&hfi_instance->event_lock); 290 return; 291 } 292 293 /* 294 * Ack duplicate update. Since there is an active HFI 295 * status from HW, it must be a new event, not a case 296 * where a lagging CPU entered the locked region. 297 */ 298 new_timestamp = *(u64 *)hfi_instance->hw_table; 299 if (*hfi_instance->timestamp == new_timestamp) { 300 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED); 301 raw_spin_unlock(&hfi_instance->event_lock); 302 return; 303 } 304 305 raw_spin_lock(&hfi_instance->table_lock); 306 307 /* 308 * Copy the updated table into our local copy. This includes the new 309 * timestamp. 310 */ 311 memcpy(hfi_instance->local_table, hfi_instance->hw_table, 312 hfi_features.nr_table_pages << PAGE_SHIFT); 313 314 /* 315 * Let hardware know that we are done reading the HFI table and it is 316 * free to update it again. 317 */ 318 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED); 319 320 raw_spin_unlock(&hfi_instance->table_lock); 321 raw_spin_unlock(&hfi_instance->event_lock); 322 323 queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, 324 HFI_UPDATE_INTERVAL); 325 } 326 327 static void init_hfi_cpu_index(struct hfi_cpu_info *info) 328 { 329 union cpuid6_edx edx; 330 331 /* Do not re-read @cpu's index if it has already been initialized. */ 332 if (info->index > -1) 333 return; 334 335 edx.full = cpuid_edx(CPUID_HFI_LEAF); 336 info->index = edx.split.index; 337 } 338 339 /* 340 * The format of the HFI table depends on the number of capabilities that the 341 * hardware supports. Keep a data structure to navigate the table. 342 */ 343 static void init_hfi_instance(struct hfi_instance *hfi_instance) 344 { 345 /* The HFI header is below the time-stamp. */ 346 hfi_instance->hdr = hfi_instance->local_table + 347 sizeof(*hfi_instance->timestamp); 348 349 /* The HFI data starts below the header. */ 350 hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; 351 } 352 353 /* Caller must hold hfi_instance_lock. */ 354 static void hfi_enable(void) 355 { 356 u64 msr_val; 357 358 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 359 msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; 360 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 361 } 362 363 static void hfi_set_hw_table(struct hfi_instance *hfi_instance) 364 { 365 phys_addr_t hw_table_pa; 366 u64 msr_val; 367 368 hw_table_pa = virt_to_phys(hfi_instance->hw_table); 369 msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; 370 wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); 371 } 372 373 /* Caller must hold hfi_instance_lock. */ 374 static void hfi_disable(void) 375 { 376 u64 msr_val; 377 int i; 378 379 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 380 msr_val &= ~HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; 381 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 382 383 /* 384 * Wait for hardware to acknowledge the disabling of HFI. Some 385 * processors may not do it. Wait for ~2ms. This is a reasonable 386 * time for hardware to complete any pending actions on the HFI 387 * memory. 388 */ 389 for (i = 0; i < 2000; i++) { 390 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 391 if (msr_val & PACKAGE_THERM_STATUS_HFI_UPDATED) 392 break; 393 394 udelay(1); 395 cpu_relax(); 396 } 397 } 398 399 /** 400 * intel_hfi_online() - Enable HFI on @cpu 401 * @cpu: CPU in which the HFI will be enabled 402 * 403 * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package 404 * level. The first CPU in the die/package to come online does the full HFI 405 * initialization. Subsequent CPUs will just link themselves to the HFI 406 * instance of their die/package. 407 * 408 * This function is called before enabling the thermal vector in the local APIC 409 * in order to ensure that @cpu has an associated HFI instance when it receives 410 * an HFI event. 411 */ 412 void intel_hfi_online(unsigned int cpu) 413 { 414 struct hfi_instance *hfi_instance; 415 struct hfi_cpu_info *info; 416 u16 die_id; 417 418 /* Nothing to do if hfi_instances are missing. */ 419 if (!hfi_instances) 420 return; 421 422 /* 423 * Link @cpu to the HFI instance of its package/die. It does not 424 * matter whether the instance has been initialized. 425 */ 426 info = &per_cpu(hfi_cpu_info, cpu); 427 die_id = topology_logical_die_id(cpu); 428 hfi_instance = info->hfi_instance; 429 if (!hfi_instance) { 430 if (die_id >= max_hfi_instances) 431 return; 432 433 hfi_instance = &hfi_instances[die_id]; 434 info->hfi_instance = hfi_instance; 435 } 436 437 init_hfi_cpu_index(info); 438 439 /* 440 * Now check if the HFI instance of the package/die of @cpu has been 441 * initialized (by checking its header). In such case, all we have to 442 * do is to add @cpu to this instance's cpumask. 443 */ 444 mutex_lock(&hfi_instance_lock); 445 if (hfi_instance->hdr) { 446 cpumask_set_cpu(cpu, hfi_instance->cpus); 447 goto unlock; 448 } 449 450 /* 451 * Hardware is programmed with the physical address of the first page 452 * frame of the table. Hence, the allocated memory must be page-aligned. 453 * 454 * Some processors do not forget the initial address of the HFI table 455 * even after having been reprogrammed. Keep using the same pages. Do 456 * not free them. 457 */ 458 hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, 459 GFP_KERNEL | __GFP_ZERO); 460 if (!hfi_instance->hw_table) 461 goto unlock; 462 463 /* 464 * Allocate memory to keep a local copy of the table that 465 * hardware generates. 466 */ 467 hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, 468 GFP_KERNEL); 469 if (!hfi_instance->local_table) 470 goto free_hw_table; 471 472 init_hfi_instance(hfi_instance); 473 474 INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); 475 raw_spin_lock_init(&hfi_instance->table_lock); 476 raw_spin_lock_init(&hfi_instance->event_lock); 477 478 cpumask_set_cpu(cpu, hfi_instance->cpus); 479 480 hfi_set_hw_table(hfi_instance); 481 hfi_enable(); 482 483 unlock: 484 mutex_unlock(&hfi_instance_lock); 485 return; 486 487 free_hw_table: 488 free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); 489 goto unlock; 490 } 491 492 /** 493 * intel_hfi_offline() - Disable HFI on @cpu 494 * @cpu: CPU in which the HFI will be disabled 495 * 496 * Remove @cpu from those covered by its HFI instance. 497 * 498 * On some processors, hardware remembers previous programming settings even 499 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the 500 * die/package of @cpu are offline. See note in intel_hfi_online(). 501 */ 502 void intel_hfi_offline(unsigned int cpu) 503 { 504 struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); 505 struct hfi_instance *hfi_instance; 506 507 /* 508 * Check if @cpu as an associated, initialized (i.e., with a non-NULL 509 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI 510 * is present. 511 */ 512 hfi_instance = info->hfi_instance; 513 if (!hfi_instance) 514 return; 515 516 if (!hfi_instance->hdr) 517 return; 518 519 mutex_lock(&hfi_instance_lock); 520 cpumask_clear_cpu(cpu, hfi_instance->cpus); 521 522 if (!cpumask_weight(hfi_instance->cpus)) 523 hfi_disable(); 524 525 mutex_unlock(&hfi_instance_lock); 526 } 527 528 static __init int hfi_parse_features(void) 529 { 530 unsigned int nr_capabilities; 531 union cpuid6_edx edx; 532 533 if (!boot_cpu_has(X86_FEATURE_HFI)) 534 return -ENODEV; 535 536 /* 537 * If we are here we know that CPUID_HFI_LEAF exists. Parse the 538 * supported capabilities and the size of the HFI table. 539 */ 540 edx.full = cpuid_edx(CPUID_HFI_LEAF); 541 542 if (!edx.split.capabilities.split.performance) { 543 pr_debug("Performance reporting not supported! Not using HFI\n"); 544 return -ENODEV; 545 } 546 547 /* 548 * The number of supported capabilities determines the number of 549 * columns in the HFI table. Exclude the reserved bits. 550 */ 551 edx.split.capabilities.split.__reserved = 0; 552 nr_capabilities = hweight8(edx.split.capabilities.bits); 553 554 /* The number of 4KB pages required by the table */ 555 hfi_features.nr_table_pages = edx.split.table_pages + 1; 556 557 /* 558 * The header contains change indications for each supported feature. 559 * The size of the table header is rounded up to be a multiple of 8 560 * bytes. 561 */ 562 hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; 563 564 /* 565 * Data of each logical processor is also rounded up to be a multiple 566 * of 8 bytes. 567 */ 568 hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; 569 570 return 0; 571 } 572 573 static void hfi_do_enable(void) 574 { 575 /* This code runs only on the boot CPU. */ 576 struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, 0); 577 struct hfi_instance *hfi_instance = info->hfi_instance; 578 579 /* No locking needed. There is no concurrency with CPU online. */ 580 hfi_set_hw_table(hfi_instance); 581 hfi_enable(); 582 } 583 584 static int hfi_do_disable(void) 585 { 586 /* No locking needed. There is no concurrency with CPU offline. */ 587 hfi_disable(); 588 589 return 0; 590 } 591 592 static struct syscore_ops hfi_pm_ops = { 593 .resume = hfi_do_enable, 594 .suspend = hfi_do_disable, 595 }; 596 597 void __init intel_hfi_init(void) 598 { 599 struct hfi_instance *hfi_instance; 600 int i, j; 601 602 if (hfi_parse_features()) 603 return; 604 605 /* There is one HFI instance per die/package. */ 606 max_hfi_instances = topology_max_packages() * 607 topology_max_die_per_package(); 608 609 /* 610 * This allocation may fail. CPU hotplug callbacks must check 611 * for a null pointer. 612 */ 613 hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), 614 GFP_KERNEL); 615 if (!hfi_instances) 616 return; 617 618 for (i = 0; i < max_hfi_instances; i++) { 619 hfi_instance = &hfi_instances[i]; 620 if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) 621 goto err_nomem; 622 } 623 624 hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); 625 if (!hfi_updates_wq) 626 goto err_nomem; 627 628 register_syscore_ops(&hfi_pm_ops); 629 630 return; 631 632 err_nomem: 633 for (j = 0; j < i; ++j) { 634 hfi_instance = &hfi_instances[j]; 635 free_cpumask_var(hfi_instance->cpus); 636 } 637 638 kfree(hfi_instances); 639 hfi_instances = NULL; 640 } 641