1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Hardware Feedback Interface Driver 4 * 5 * Copyright (c) 2021, Intel Corporation. 6 * 7 * Authors: Aubrey Li <aubrey.li@linux.intel.com> 8 * Ricardo Neri <ricardo.neri-calderon@linux.intel.com> 9 * 10 * 11 * The Hardware Feedback Interface provides a performance and energy efficiency 12 * capability information for each CPU in the system. Depending on the processor 13 * model, hardware may periodically update these capabilities as a result of 14 * changes in the operating conditions (e.g., power limits or thermal 15 * constraints). On other processor models, there is a single HFI update 16 * at boot. 17 * 18 * This file provides functionality to process HFI updates and relay these 19 * updates to userspace. 20 */ 21 22 #define pr_fmt(fmt) "intel-hfi: " fmt 23 24 #include <linux/bitops.h> 25 #include <linux/cpufeature.h> 26 #include <linux/cpumask.h> 27 #include <linux/gfp.h> 28 #include <linux/io.h> 29 #include <linux/kernel.h> 30 #include <linux/math.h> 31 #include <linux/mutex.h> 32 #include <linux/percpu-defs.h> 33 #include <linux/printk.h> 34 #include <linux/processor.h> 35 #include <linux/slab.h> 36 #include <linux/spinlock.h> 37 #include <linux/string.h> 38 #include <linux/topology.h> 39 #include <linux/workqueue.h> 40 41 #include <asm/msr.h> 42 43 #include "intel_hfi.h" 44 #include "thermal_interrupt.h" 45 46 #include "../thermal_netlink.h" 47 48 /* Hardware Feedback Interface MSR configuration bits */ 49 #define HW_FEEDBACK_PTR_VALID_BIT BIT(0) 50 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT BIT(0) 51 52 /* CPUID detection and enumeration definitions for HFI */ 53 54 #define CPUID_HFI_LEAF 6 55 56 union hfi_capabilities { 57 struct { 58 u8 performance:1; 59 u8 energy_efficiency:1; 60 u8 __reserved:6; 61 } split; 62 u8 bits; 63 }; 64 65 union cpuid6_edx { 66 struct { 67 union hfi_capabilities capabilities; 68 u32 table_pages:4; 69 u32 __reserved:4; 70 s32 index:16; 71 } split; 72 u32 full; 73 }; 74 75 /** 76 * struct hfi_cpu_data - HFI capabilities per CPU 77 * @perf_cap: Performance capability 78 * @ee_cap: Energy efficiency capability 79 * 80 * Capabilities of a logical processor in the HFI table. These capabilities are 81 * unitless. 82 */ 83 struct hfi_cpu_data { 84 u8 perf_cap; 85 u8 ee_cap; 86 } __packed; 87 88 /** 89 * struct hfi_hdr - Header of the HFI table 90 * @perf_updated: Hardware updated performance capabilities 91 * @ee_updated: Hardware updated energy efficiency capabilities 92 * 93 * Properties of the data in an HFI table. 94 */ 95 struct hfi_hdr { 96 u8 perf_updated; 97 u8 ee_updated; 98 } __packed; 99 100 /** 101 * struct hfi_instance - Representation of an HFI instance (i.e., a table) 102 * @local_table: Base of the local copy of the HFI table 103 * @timestamp: Timestamp of the last update of the local table. 104 * Located at the base of the local table. 105 * @hdr: Base address of the header of the local table 106 * @data: Base address of the data of the local table 107 * @cpus: CPUs represented in this HFI table instance 108 * @hw_table: Pointer to the HFI table of this instance 109 * @update_work: Delayed work to process HFI updates 110 * @table_lock: Lock to protect acceses to the table of this instance 111 * @event_lock: Lock to process HFI interrupts 112 * 113 * A set of parameters to parse and navigate a specific HFI table. 114 */ 115 struct hfi_instance { 116 union { 117 void *local_table; 118 u64 *timestamp; 119 }; 120 void *hdr; 121 void *data; 122 cpumask_var_t cpus; 123 void *hw_table; 124 struct delayed_work update_work; 125 raw_spinlock_t table_lock; 126 raw_spinlock_t event_lock; 127 }; 128 129 /** 130 * struct hfi_features - Supported HFI features 131 * @nr_table_pages: Size of the HFI table in 4KB pages 132 * @cpu_stride: Stride size to locate the capability data of a logical 133 * processor within the table (i.e., row stride) 134 * @hdr_size: Size of the table header 135 * 136 * Parameters and supported features that are common to all HFI instances 137 */ 138 struct hfi_features { 139 size_t nr_table_pages; 140 unsigned int cpu_stride; 141 unsigned int hdr_size; 142 }; 143 144 /** 145 * struct hfi_cpu_info - Per-CPU attributes to consume HFI data 146 * @index: Row of this CPU in its HFI table 147 * @hfi_instance: Attributes of the HFI table to which this CPU belongs 148 * 149 * Parameters to link a logical processor to an HFI table and a row within it. 150 */ 151 struct hfi_cpu_info { 152 s16 index; 153 struct hfi_instance *hfi_instance; 154 }; 155 156 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 }; 157 158 static int max_hfi_instances; 159 static struct hfi_instance *hfi_instances; 160 161 static struct hfi_features hfi_features; 162 static DEFINE_MUTEX(hfi_instance_lock); 163 164 static struct workqueue_struct *hfi_updates_wq; 165 #define HFI_UPDATE_INTERVAL HZ 166 #define HFI_MAX_THERM_NOTIFY_COUNT 16 167 168 static void get_hfi_caps(struct hfi_instance *hfi_instance, 169 struct thermal_genl_cpu_caps *cpu_caps) 170 { 171 int cpu, i = 0; 172 173 raw_spin_lock_irq(&hfi_instance->table_lock); 174 for_each_cpu(cpu, hfi_instance->cpus) { 175 struct hfi_cpu_data *caps; 176 s16 index; 177 178 index = per_cpu(hfi_cpu_info, cpu).index; 179 caps = hfi_instance->data + index * hfi_features.cpu_stride; 180 cpu_caps[i].cpu = cpu; 181 182 /* 183 * Scale performance and energy efficiency to 184 * the [0, 1023] interval that thermal netlink uses. 185 */ 186 cpu_caps[i].performance = caps->perf_cap << 2; 187 cpu_caps[i].efficiency = caps->ee_cap << 2; 188 189 ++i; 190 } 191 raw_spin_unlock_irq(&hfi_instance->table_lock); 192 } 193 194 /* 195 * Call update_capabilities() when there are changes in the HFI table. 196 */ 197 static void update_capabilities(struct hfi_instance *hfi_instance) 198 { 199 struct thermal_genl_cpu_caps *cpu_caps; 200 int i = 0, cpu_count; 201 202 /* CPUs may come online/offline while processing an HFI update. */ 203 mutex_lock(&hfi_instance_lock); 204 205 cpu_count = cpumask_weight(hfi_instance->cpus); 206 207 /* No CPUs to report in this hfi_instance. */ 208 if (!cpu_count) 209 goto out; 210 211 cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL); 212 if (!cpu_caps) 213 goto out; 214 215 get_hfi_caps(hfi_instance, cpu_caps); 216 217 if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT) 218 goto last_cmd; 219 220 /* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */ 221 for (i = 0; 222 (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count; 223 i += HFI_MAX_THERM_NOTIFY_COUNT) 224 thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT, 225 &cpu_caps[i]); 226 227 cpu_count = cpu_count - i; 228 229 last_cmd: 230 /* Process the remaining capabilities if any. */ 231 if (cpu_count) 232 thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]); 233 234 kfree(cpu_caps); 235 out: 236 mutex_unlock(&hfi_instance_lock); 237 } 238 239 static void hfi_update_work_fn(struct work_struct *work) 240 { 241 struct hfi_instance *hfi_instance; 242 243 hfi_instance = container_of(to_delayed_work(work), struct hfi_instance, 244 update_work); 245 246 update_capabilities(hfi_instance); 247 } 248 249 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val) 250 { 251 struct hfi_instance *hfi_instance; 252 int cpu = smp_processor_id(); 253 struct hfi_cpu_info *info; 254 u64 new_timestamp, msr, hfi; 255 256 if (!pkg_therm_status_msr_val) 257 return; 258 259 info = &per_cpu(hfi_cpu_info, cpu); 260 if (!info) 261 return; 262 263 /* 264 * A CPU is linked to its HFI instance before the thermal vector in the 265 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL 266 * when receiving an HFI event. 267 */ 268 hfi_instance = info->hfi_instance; 269 if (unlikely(!hfi_instance)) { 270 pr_debug("Received event on CPU %d but instance was null", cpu); 271 return; 272 } 273 274 /* 275 * On most systems, all CPUs in the package receive a package-level 276 * thermal interrupt when there is an HFI update. It is sufficient to 277 * let a single CPU to acknowledge the update and queue work to 278 * process it. The remaining CPUs can resume their work. 279 */ 280 if (!raw_spin_trylock(&hfi_instance->event_lock)) 281 return; 282 283 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr); 284 hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED; 285 if (!hfi) { 286 raw_spin_unlock(&hfi_instance->event_lock); 287 return; 288 } 289 290 /* 291 * Ack duplicate update. Since there is an active HFI 292 * status from HW, it must be a new event, not a case 293 * where a lagging CPU entered the locked region. 294 */ 295 new_timestamp = *(u64 *)hfi_instance->hw_table; 296 if (*hfi_instance->timestamp == new_timestamp) { 297 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED); 298 raw_spin_unlock(&hfi_instance->event_lock); 299 return; 300 } 301 302 raw_spin_lock(&hfi_instance->table_lock); 303 304 /* 305 * Copy the updated table into our local copy. This includes the new 306 * timestamp. 307 */ 308 memcpy(hfi_instance->local_table, hfi_instance->hw_table, 309 hfi_features.nr_table_pages << PAGE_SHIFT); 310 311 /* 312 * Let hardware know that we are done reading the HFI table and it is 313 * free to update it again. 314 */ 315 thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED); 316 317 raw_spin_unlock(&hfi_instance->table_lock); 318 raw_spin_unlock(&hfi_instance->event_lock); 319 320 queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work, 321 HFI_UPDATE_INTERVAL); 322 } 323 324 static void init_hfi_cpu_index(struct hfi_cpu_info *info) 325 { 326 union cpuid6_edx edx; 327 328 /* Do not re-read @cpu's index if it has already been initialized. */ 329 if (info->index > -1) 330 return; 331 332 edx.full = cpuid_edx(CPUID_HFI_LEAF); 333 info->index = edx.split.index; 334 } 335 336 /* 337 * The format of the HFI table depends on the number of capabilities that the 338 * hardware supports. Keep a data structure to navigate the table. 339 */ 340 static void init_hfi_instance(struct hfi_instance *hfi_instance) 341 { 342 /* The HFI header is below the time-stamp. */ 343 hfi_instance->hdr = hfi_instance->local_table + 344 sizeof(*hfi_instance->timestamp); 345 346 /* The HFI data starts below the header. */ 347 hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size; 348 } 349 350 /** 351 * intel_hfi_online() - Enable HFI on @cpu 352 * @cpu: CPU in which the HFI will be enabled 353 * 354 * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package 355 * level. The first CPU in the die/package to come online does the full HFI 356 * initialization. Subsequent CPUs will just link themselves to the HFI 357 * instance of their die/package. 358 * 359 * This function is called before enabling the thermal vector in the local APIC 360 * in order to ensure that @cpu has an associated HFI instance when it receives 361 * an HFI event. 362 */ 363 void intel_hfi_online(unsigned int cpu) 364 { 365 struct hfi_instance *hfi_instance; 366 struct hfi_cpu_info *info; 367 phys_addr_t hw_table_pa; 368 u64 msr_val; 369 u16 die_id; 370 371 /* Nothing to do if hfi_instances are missing. */ 372 if (!hfi_instances) 373 return; 374 375 /* 376 * Link @cpu to the HFI instance of its package/die. It does not 377 * matter whether the instance has been initialized. 378 */ 379 info = &per_cpu(hfi_cpu_info, cpu); 380 die_id = topology_logical_die_id(cpu); 381 hfi_instance = info->hfi_instance; 382 if (!hfi_instance) { 383 if (die_id >= max_hfi_instances) 384 return; 385 386 hfi_instance = &hfi_instances[die_id]; 387 info->hfi_instance = hfi_instance; 388 } 389 390 init_hfi_cpu_index(info); 391 392 /* 393 * Now check if the HFI instance of the package/die of @cpu has been 394 * initialized (by checking its header). In such case, all we have to 395 * do is to add @cpu to this instance's cpumask. 396 */ 397 mutex_lock(&hfi_instance_lock); 398 if (hfi_instance->hdr) { 399 cpumask_set_cpu(cpu, hfi_instance->cpus); 400 goto unlock; 401 } 402 403 /* 404 * Hardware is programmed with the physical address of the first page 405 * frame of the table. Hence, the allocated memory must be page-aligned. 406 */ 407 hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages, 408 GFP_KERNEL | __GFP_ZERO); 409 if (!hfi_instance->hw_table) 410 goto unlock; 411 412 hw_table_pa = virt_to_phys(hfi_instance->hw_table); 413 414 /* 415 * Allocate memory to keep a local copy of the table that 416 * hardware generates. 417 */ 418 hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT, 419 GFP_KERNEL); 420 if (!hfi_instance->local_table) 421 goto free_hw_table; 422 423 /* 424 * Program the address of the feedback table of this die/package. On 425 * some processors, hardware remembers the old address of the HFI table 426 * even after having been reprogrammed and re-enabled. Thus, do not free 427 * the pages allocated for the table or reprogram the hardware with a 428 * new base address. Namely, program the hardware only once. 429 */ 430 msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT; 431 wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val); 432 433 init_hfi_instance(hfi_instance); 434 435 INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn); 436 raw_spin_lock_init(&hfi_instance->table_lock); 437 raw_spin_lock_init(&hfi_instance->event_lock); 438 439 cpumask_set_cpu(cpu, hfi_instance->cpus); 440 441 /* 442 * Enable the hardware feedback interface and never disable it. See 443 * comment on programming the address of the table. 444 */ 445 rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 446 msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT; 447 wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val); 448 449 unlock: 450 mutex_unlock(&hfi_instance_lock); 451 return; 452 453 free_hw_table: 454 free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages); 455 goto unlock; 456 } 457 458 /** 459 * intel_hfi_offline() - Disable HFI on @cpu 460 * @cpu: CPU in which the HFI will be disabled 461 * 462 * Remove @cpu from those covered by its HFI instance. 463 * 464 * On some processors, hardware remembers previous programming settings even 465 * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the 466 * die/package of @cpu are offline. See note in intel_hfi_online(). 467 */ 468 void intel_hfi_offline(unsigned int cpu) 469 { 470 struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu); 471 struct hfi_instance *hfi_instance; 472 473 /* 474 * Check if @cpu as an associated, initialized (i.e., with a non-NULL 475 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI 476 * is present. 477 */ 478 hfi_instance = info->hfi_instance; 479 if (!hfi_instance) 480 return; 481 482 if (!hfi_instance->hdr) 483 return; 484 485 mutex_lock(&hfi_instance_lock); 486 cpumask_clear_cpu(cpu, hfi_instance->cpus); 487 mutex_unlock(&hfi_instance_lock); 488 } 489 490 static __init int hfi_parse_features(void) 491 { 492 unsigned int nr_capabilities; 493 union cpuid6_edx edx; 494 495 if (!boot_cpu_has(X86_FEATURE_HFI)) 496 return -ENODEV; 497 498 /* 499 * If we are here we know that CPUID_HFI_LEAF exists. Parse the 500 * supported capabilities and the size of the HFI table. 501 */ 502 edx.full = cpuid_edx(CPUID_HFI_LEAF); 503 504 if (!edx.split.capabilities.split.performance) { 505 pr_debug("Performance reporting not supported! Not using HFI\n"); 506 return -ENODEV; 507 } 508 509 /* 510 * The number of supported capabilities determines the number of 511 * columns in the HFI table. Exclude the reserved bits. 512 */ 513 edx.split.capabilities.split.__reserved = 0; 514 nr_capabilities = hweight8(edx.split.capabilities.bits); 515 516 /* The number of 4KB pages required by the table */ 517 hfi_features.nr_table_pages = edx.split.table_pages + 1; 518 519 /* 520 * The header contains change indications for each supported feature. 521 * The size of the table header is rounded up to be a multiple of 8 522 * bytes. 523 */ 524 hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8; 525 526 /* 527 * Data of each logical processor is also rounded up to be a multiple 528 * of 8 bytes. 529 */ 530 hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8; 531 532 return 0; 533 } 534 535 void __init intel_hfi_init(void) 536 { 537 struct hfi_instance *hfi_instance; 538 int i, j; 539 540 if (hfi_parse_features()) 541 return; 542 543 /* There is one HFI instance per die/package. */ 544 max_hfi_instances = topology_max_packages() * 545 topology_max_die_per_package(); 546 547 /* 548 * This allocation may fail. CPU hotplug callbacks must check 549 * for a null pointer. 550 */ 551 hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances), 552 GFP_KERNEL); 553 if (!hfi_instances) 554 return; 555 556 for (i = 0; i < max_hfi_instances; i++) { 557 hfi_instance = &hfi_instances[i]; 558 if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL)) 559 goto err_nomem; 560 } 561 562 hfi_updates_wq = create_singlethread_workqueue("hfi-updates"); 563 if (!hfi_updates_wq) 564 goto err_nomem; 565 566 return; 567 568 err_nomem: 569 for (j = 0; j < i; ++j) { 570 hfi_instance = &hfi_instances[j]; 571 free_cpumask_var(hfi_instance->cpus); 572 } 573 574 kfree(hfi_instances); 575 hfi_instances = NULL; 576 } 577