1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Hardware Feedback Interface Driver
4  *
5  * Copyright (c) 2021, Intel Corporation.
6  *
7  * Authors: Aubrey Li <aubrey.li@linux.intel.com>
8  *          Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
9  *
10  *
11  * The Hardware Feedback Interface provides a performance and energy efficiency
12  * capability information for each CPU in the system. Depending on the processor
13  * model, hardware may periodically update these capabilities as a result of
14  * changes in the operating conditions (e.g., power limits or thermal
15  * constraints). On other processor models, there is a single HFI update
16  * at boot.
17  *
18  * This file provides functionality to process HFI updates and relay these
19  * updates to userspace.
20  */
21 
22 #define pr_fmt(fmt)  "intel-hfi: " fmt
23 
24 #include <linux/bitops.h>
25 #include <linux/cpufeature.h>
26 #include <linux/cpumask.h>
27 #include <linux/gfp.h>
28 #include <linux/io.h>
29 #include <linux/kernel.h>
30 #include <linux/math.h>
31 #include <linux/mutex.h>
32 #include <linux/percpu-defs.h>
33 #include <linux/printk.h>
34 #include <linux/processor.h>
35 #include <linux/slab.h>
36 #include <linux/spinlock.h>
37 #include <linux/string.h>
38 #include <linux/topology.h>
39 #include <linux/workqueue.h>
40 
41 #include <asm/msr.h>
42 
43 #include "../thermal_core.h"
44 #include "intel_hfi.h"
45 #include "thermal_interrupt.h"
46 
47 /* Hardware Feedback Interface MSR configuration bits */
48 #define HW_FEEDBACK_PTR_VALID_BIT		BIT(0)
49 #define HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT	BIT(0)
50 
51 /* CPUID detection and enumeration definitions for HFI */
52 
53 #define CPUID_HFI_LEAF 6
54 
55 union hfi_capabilities {
56 	struct {
57 		u8	performance:1;
58 		u8	energy_efficiency:1;
59 		u8	__reserved:6;
60 	} split;
61 	u8 bits;
62 };
63 
64 union cpuid6_edx {
65 	struct {
66 		union hfi_capabilities	capabilities;
67 		u32			table_pages:4;
68 		u32			__reserved:4;
69 		s32			index:16;
70 	} split;
71 	u32 full;
72 };
73 
74 /**
75  * struct hfi_cpu_data - HFI capabilities per CPU
76  * @perf_cap:		Performance capability
77  * @ee_cap:		Energy efficiency capability
78  *
79  * Capabilities of a logical processor in the HFI table. These capabilities are
80  * unitless.
81  */
82 struct hfi_cpu_data {
83 	u8	perf_cap;
84 	u8	ee_cap;
85 } __packed;
86 
87 /**
88  * struct hfi_hdr - Header of the HFI table
89  * @perf_updated:	Hardware updated performance capabilities
90  * @ee_updated:		Hardware updated energy efficiency capabilities
91  *
92  * Properties of the data in an HFI table.
93  */
94 struct hfi_hdr {
95 	u8	perf_updated;
96 	u8	ee_updated;
97 } __packed;
98 
99 /**
100  * struct hfi_instance - Representation of an HFI instance (i.e., a table)
101  * @local_table:	Base of the local copy of the HFI table
102  * @timestamp:		Timestamp of the last update of the local table.
103  *			Located at the base of the local table.
104  * @hdr:		Base address of the header of the local table
105  * @data:		Base address of the data of the local table
106  * @cpus:		CPUs represented in this HFI table instance
107  * @hw_table:		Pointer to the HFI table of this instance
108  * @update_work:	Delayed work to process HFI updates
109  * @table_lock:		Lock to protect acceses to the table of this instance
110  * @event_lock:		Lock to process HFI interrupts
111  *
112  * A set of parameters to parse and navigate a specific HFI table.
113  */
114 struct hfi_instance {
115 	union {
116 		void			*local_table;
117 		u64			*timestamp;
118 	};
119 	void			*hdr;
120 	void			*data;
121 	cpumask_var_t		cpus;
122 	void			*hw_table;
123 	struct delayed_work	update_work;
124 	raw_spinlock_t		table_lock;
125 	raw_spinlock_t		event_lock;
126 };
127 
128 /**
129  * struct hfi_features - Supported HFI features
130  * @nr_table_pages:	Size of the HFI table in 4KB pages
131  * @cpu_stride:		Stride size to locate the capability data of a logical
132  *			processor within the table (i.e., row stride)
133  * @hdr_size:		Size of the table header
134  *
135  * Parameters and supported features that are common to all HFI instances
136  */
137 struct hfi_features {
138 	size_t		nr_table_pages;
139 	unsigned int	cpu_stride;
140 	unsigned int	hdr_size;
141 };
142 
143 /**
144  * struct hfi_cpu_info - Per-CPU attributes to consume HFI data
145  * @index:		Row of this CPU in its HFI table
146  * @hfi_instance:	Attributes of the HFI table to which this CPU belongs
147  *
148  * Parameters to link a logical processor to an HFI table and a row within it.
149  */
150 struct hfi_cpu_info {
151 	s16			index;
152 	struct hfi_instance	*hfi_instance;
153 };
154 
155 static DEFINE_PER_CPU(struct hfi_cpu_info, hfi_cpu_info) = { .index = -1 };
156 
157 static int max_hfi_instances;
158 static struct hfi_instance *hfi_instances;
159 
160 static struct hfi_features hfi_features;
161 static DEFINE_MUTEX(hfi_instance_lock);
162 
163 static struct workqueue_struct *hfi_updates_wq;
164 #define HFI_UPDATE_INTERVAL		HZ
165 #define HFI_MAX_THERM_NOTIFY_COUNT	16
166 
167 static void get_hfi_caps(struct hfi_instance *hfi_instance,
168 			 struct thermal_genl_cpu_caps *cpu_caps)
169 {
170 	int cpu, i = 0;
171 
172 	raw_spin_lock_irq(&hfi_instance->table_lock);
173 	for_each_cpu(cpu, hfi_instance->cpus) {
174 		struct hfi_cpu_data *caps;
175 		s16 index;
176 
177 		index = per_cpu(hfi_cpu_info, cpu).index;
178 		caps = hfi_instance->data + index * hfi_features.cpu_stride;
179 		cpu_caps[i].cpu = cpu;
180 
181 		/*
182 		 * Scale performance and energy efficiency to
183 		 * the [0, 1023] interval that thermal netlink uses.
184 		 */
185 		cpu_caps[i].performance = caps->perf_cap << 2;
186 		cpu_caps[i].efficiency = caps->ee_cap << 2;
187 
188 		++i;
189 	}
190 	raw_spin_unlock_irq(&hfi_instance->table_lock);
191 }
192 
193 /*
194  * Call update_capabilities() when there are changes in the HFI table.
195  */
196 static void update_capabilities(struct hfi_instance *hfi_instance)
197 {
198 	struct thermal_genl_cpu_caps *cpu_caps;
199 	int i = 0, cpu_count;
200 
201 	/* CPUs may come online/offline while processing an HFI update. */
202 	mutex_lock(&hfi_instance_lock);
203 
204 	cpu_count = cpumask_weight(hfi_instance->cpus);
205 
206 	/* No CPUs to report in this hfi_instance. */
207 	if (!cpu_count)
208 		goto out;
209 
210 	cpu_caps = kcalloc(cpu_count, sizeof(*cpu_caps), GFP_KERNEL);
211 	if (!cpu_caps)
212 		goto out;
213 
214 	get_hfi_caps(hfi_instance, cpu_caps);
215 
216 	if (cpu_count < HFI_MAX_THERM_NOTIFY_COUNT)
217 		goto last_cmd;
218 
219 	/* Process complete chunks of HFI_MAX_THERM_NOTIFY_COUNT capabilities. */
220 	for (i = 0;
221 	     (i + HFI_MAX_THERM_NOTIFY_COUNT) <= cpu_count;
222 	     i += HFI_MAX_THERM_NOTIFY_COUNT)
223 		thermal_genl_cpu_capability_event(HFI_MAX_THERM_NOTIFY_COUNT,
224 						  &cpu_caps[i]);
225 
226 	cpu_count = cpu_count - i;
227 
228 last_cmd:
229 	/* Process the remaining capabilities if any. */
230 	if (cpu_count)
231 		thermal_genl_cpu_capability_event(cpu_count, &cpu_caps[i]);
232 
233 	kfree(cpu_caps);
234 out:
235 	mutex_unlock(&hfi_instance_lock);
236 }
237 
238 static void hfi_update_work_fn(struct work_struct *work)
239 {
240 	struct hfi_instance *hfi_instance;
241 
242 	hfi_instance = container_of(to_delayed_work(work), struct hfi_instance,
243 				    update_work);
244 
245 	update_capabilities(hfi_instance);
246 }
247 
248 void intel_hfi_process_event(__u64 pkg_therm_status_msr_val)
249 {
250 	struct hfi_instance *hfi_instance;
251 	int cpu = smp_processor_id();
252 	struct hfi_cpu_info *info;
253 	u64 new_timestamp, msr, hfi;
254 
255 	if (!pkg_therm_status_msr_val)
256 		return;
257 
258 	info = &per_cpu(hfi_cpu_info, cpu);
259 	if (!info)
260 		return;
261 
262 	/*
263 	 * A CPU is linked to its HFI instance before the thermal vector in the
264 	 * local APIC is unmasked. Hence, info->hfi_instance cannot be NULL
265 	 * when receiving an HFI event.
266 	 */
267 	hfi_instance = info->hfi_instance;
268 	if (unlikely(!hfi_instance)) {
269 		pr_debug("Received event on CPU %d but instance was null", cpu);
270 		return;
271 	}
272 
273 	/*
274 	 * On most systems, all CPUs in the package receive a package-level
275 	 * thermal interrupt when there is an HFI update. It is sufficient to
276 	 * let a single CPU to acknowledge the update and queue work to
277 	 * process it. The remaining CPUs can resume their work.
278 	 */
279 	if (!raw_spin_trylock(&hfi_instance->event_lock))
280 		return;
281 
282 	rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr);
283 	hfi = msr & PACKAGE_THERM_STATUS_HFI_UPDATED;
284 	if (!hfi) {
285 		raw_spin_unlock(&hfi_instance->event_lock);
286 		return;
287 	}
288 
289 	/*
290 	 * Ack duplicate update. Since there is an active HFI
291 	 * status from HW, it must be a new event, not a case
292 	 * where a lagging CPU entered the locked region.
293 	 */
294 	new_timestamp = *(u64 *)hfi_instance->hw_table;
295 	if (*hfi_instance->timestamp == new_timestamp) {
296 		thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
297 		raw_spin_unlock(&hfi_instance->event_lock);
298 		return;
299 	}
300 
301 	raw_spin_lock(&hfi_instance->table_lock);
302 
303 	/*
304 	 * Copy the updated table into our local copy. This includes the new
305 	 * timestamp.
306 	 */
307 	memcpy(hfi_instance->local_table, hfi_instance->hw_table,
308 	       hfi_features.nr_table_pages << PAGE_SHIFT);
309 
310 	/*
311 	 * Let hardware know that we are done reading the HFI table and it is
312 	 * free to update it again.
313 	 */
314 	thermal_clear_package_intr_status(PACKAGE_LEVEL, PACKAGE_THERM_STATUS_HFI_UPDATED);
315 
316 	raw_spin_unlock(&hfi_instance->table_lock);
317 	raw_spin_unlock(&hfi_instance->event_lock);
318 
319 	queue_delayed_work(hfi_updates_wq, &hfi_instance->update_work,
320 			   HFI_UPDATE_INTERVAL);
321 }
322 
323 static void init_hfi_cpu_index(struct hfi_cpu_info *info)
324 {
325 	union cpuid6_edx edx;
326 
327 	/* Do not re-read @cpu's index if it has already been initialized. */
328 	if (info->index > -1)
329 		return;
330 
331 	edx.full = cpuid_edx(CPUID_HFI_LEAF);
332 	info->index = edx.split.index;
333 }
334 
335 /*
336  * The format of the HFI table depends on the number of capabilities that the
337  * hardware supports. Keep a data structure to navigate the table.
338  */
339 static void init_hfi_instance(struct hfi_instance *hfi_instance)
340 {
341 	/* The HFI header is below the time-stamp. */
342 	hfi_instance->hdr = hfi_instance->local_table +
343 			    sizeof(*hfi_instance->timestamp);
344 
345 	/* The HFI data starts below the header. */
346 	hfi_instance->data = hfi_instance->hdr + hfi_features.hdr_size;
347 }
348 
349 /**
350  * intel_hfi_online() - Enable HFI on @cpu
351  * @cpu:	CPU in which the HFI will be enabled
352  *
353  * Enable the HFI to be used in @cpu. The HFI is enabled at the die/package
354  * level. The first CPU in the die/package to come online does the full HFI
355  * initialization. Subsequent CPUs will just link themselves to the HFI
356  * instance of their die/package.
357  *
358  * This function is called before enabling the thermal vector in the local APIC
359  * in order to ensure that @cpu has an associated HFI instance when it receives
360  * an HFI event.
361  */
362 void intel_hfi_online(unsigned int cpu)
363 {
364 	struct hfi_instance *hfi_instance;
365 	struct hfi_cpu_info *info;
366 	phys_addr_t hw_table_pa;
367 	u64 msr_val;
368 	u16 die_id;
369 
370 	/* Nothing to do if hfi_instances are missing. */
371 	if (!hfi_instances)
372 		return;
373 
374 	/*
375 	 * Link @cpu to the HFI instance of its package/die. It does not
376 	 * matter whether the instance has been initialized.
377 	 */
378 	info = &per_cpu(hfi_cpu_info, cpu);
379 	die_id = topology_logical_die_id(cpu);
380 	hfi_instance = info->hfi_instance;
381 	if (!hfi_instance) {
382 		if (die_id >= max_hfi_instances)
383 			return;
384 
385 		hfi_instance = &hfi_instances[die_id];
386 		info->hfi_instance = hfi_instance;
387 	}
388 
389 	init_hfi_cpu_index(info);
390 
391 	/*
392 	 * Now check if the HFI instance of the package/die of @cpu has been
393 	 * initialized (by checking its header). In such case, all we have to
394 	 * do is to add @cpu to this instance's cpumask.
395 	 */
396 	mutex_lock(&hfi_instance_lock);
397 	if (hfi_instance->hdr) {
398 		cpumask_set_cpu(cpu, hfi_instance->cpus);
399 		goto unlock;
400 	}
401 
402 	/*
403 	 * Hardware is programmed with the physical address of the first page
404 	 * frame of the table. Hence, the allocated memory must be page-aligned.
405 	 */
406 	hfi_instance->hw_table = alloc_pages_exact(hfi_features.nr_table_pages,
407 						   GFP_KERNEL | __GFP_ZERO);
408 	if (!hfi_instance->hw_table)
409 		goto unlock;
410 
411 	hw_table_pa = virt_to_phys(hfi_instance->hw_table);
412 
413 	/*
414 	 * Allocate memory to keep a local copy of the table that
415 	 * hardware generates.
416 	 */
417 	hfi_instance->local_table = kzalloc(hfi_features.nr_table_pages << PAGE_SHIFT,
418 					    GFP_KERNEL);
419 	if (!hfi_instance->local_table)
420 		goto free_hw_table;
421 
422 	/*
423 	 * Program the address of the feedback table of this die/package. On
424 	 * some processors, hardware remembers the old address of the HFI table
425 	 * even after having been reprogrammed and re-enabled. Thus, do not free
426 	 * the pages allocated for the table or reprogram the hardware with a
427 	 * new base address. Namely, program the hardware only once.
428 	 */
429 	msr_val = hw_table_pa | HW_FEEDBACK_PTR_VALID_BIT;
430 	wrmsrl(MSR_IA32_HW_FEEDBACK_PTR, msr_val);
431 
432 	init_hfi_instance(hfi_instance);
433 
434 	INIT_DELAYED_WORK(&hfi_instance->update_work, hfi_update_work_fn);
435 	raw_spin_lock_init(&hfi_instance->table_lock);
436 	raw_spin_lock_init(&hfi_instance->event_lock);
437 
438 	cpumask_set_cpu(cpu, hfi_instance->cpus);
439 
440 	/*
441 	 * Enable the hardware feedback interface and never disable it. See
442 	 * comment on programming the address of the table.
443 	 */
444 	rdmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
445 	msr_val |= HW_FEEDBACK_CONFIG_HFI_ENABLE_BIT;
446 	wrmsrl(MSR_IA32_HW_FEEDBACK_CONFIG, msr_val);
447 
448 unlock:
449 	mutex_unlock(&hfi_instance_lock);
450 	return;
451 
452 free_hw_table:
453 	free_pages_exact(hfi_instance->hw_table, hfi_features.nr_table_pages);
454 	goto unlock;
455 }
456 
457 /**
458  * intel_hfi_offline() - Disable HFI on @cpu
459  * @cpu:	CPU in which the HFI will be disabled
460  *
461  * Remove @cpu from those covered by its HFI instance.
462  *
463  * On some processors, hardware remembers previous programming settings even
464  * after being reprogrammed. Thus, keep HFI enabled even if all CPUs in the
465  * die/package of @cpu are offline. See note in intel_hfi_online().
466  */
467 void intel_hfi_offline(unsigned int cpu)
468 {
469 	struct hfi_cpu_info *info = &per_cpu(hfi_cpu_info, cpu);
470 	struct hfi_instance *hfi_instance;
471 
472 	/*
473 	 * Check if @cpu as an associated, initialized (i.e., with a non-NULL
474 	 * header). Also, HFI instances are only initialized if X86_FEATURE_HFI
475 	 * is present.
476 	 */
477 	hfi_instance = info->hfi_instance;
478 	if (!hfi_instance)
479 		return;
480 
481 	if (!hfi_instance->hdr)
482 		return;
483 
484 	mutex_lock(&hfi_instance_lock);
485 	cpumask_clear_cpu(cpu, hfi_instance->cpus);
486 	mutex_unlock(&hfi_instance_lock);
487 }
488 
489 static __init int hfi_parse_features(void)
490 {
491 	unsigned int nr_capabilities;
492 	union cpuid6_edx edx;
493 
494 	if (!boot_cpu_has(X86_FEATURE_HFI))
495 		return -ENODEV;
496 
497 	/*
498 	 * If we are here we know that CPUID_HFI_LEAF exists. Parse the
499 	 * supported capabilities and the size of the HFI table.
500 	 */
501 	edx.full = cpuid_edx(CPUID_HFI_LEAF);
502 
503 	if (!edx.split.capabilities.split.performance) {
504 		pr_debug("Performance reporting not supported! Not using HFI\n");
505 		return -ENODEV;
506 	}
507 
508 	/*
509 	 * The number of supported capabilities determines the number of
510 	 * columns in the HFI table. Exclude the reserved bits.
511 	 */
512 	edx.split.capabilities.split.__reserved = 0;
513 	nr_capabilities = hweight8(edx.split.capabilities.bits);
514 
515 	/* The number of 4KB pages required by the table */
516 	hfi_features.nr_table_pages = edx.split.table_pages + 1;
517 
518 	/*
519 	 * The header contains change indications for each supported feature.
520 	 * The size of the table header is rounded up to be a multiple of 8
521 	 * bytes.
522 	 */
523 	hfi_features.hdr_size = DIV_ROUND_UP(nr_capabilities, 8) * 8;
524 
525 	/*
526 	 * Data of each logical processor is also rounded up to be a multiple
527 	 * of 8 bytes.
528 	 */
529 	hfi_features.cpu_stride = DIV_ROUND_UP(nr_capabilities, 8) * 8;
530 
531 	return 0;
532 }
533 
534 void __init intel_hfi_init(void)
535 {
536 	struct hfi_instance *hfi_instance;
537 	int i, j;
538 
539 	if (hfi_parse_features())
540 		return;
541 
542 	/* There is one HFI instance per die/package. */
543 	max_hfi_instances = topology_max_packages() *
544 			    topology_max_die_per_package();
545 
546 	/*
547 	 * This allocation may fail. CPU hotplug callbacks must check
548 	 * for a null pointer.
549 	 */
550 	hfi_instances = kcalloc(max_hfi_instances, sizeof(*hfi_instances),
551 				GFP_KERNEL);
552 	if (!hfi_instances)
553 		return;
554 
555 	for (i = 0; i < max_hfi_instances; i++) {
556 		hfi_instance = &hfi_instances[i];
557 		if (!zalloc_cpumask_var(&hfi_instance->cpus, GFP_KERNEL))
558 			goto err_nomem;
559 	}
560 
561 	hfi_updates_wq = create_singlethread_workqueue("hfi-updates");
562 	if (!hfi_updates_wq)
563 		goto err_nomem;
564 
565 	return;
566 
567 err_nomem:
568 	for (j = 0; j < i; ++j) {
569 		hfi_instance = &hfi_instances[j];
570 		free_cpumask_var(hfi_instance->cpus);
571 	}
572 
573 	kfree(hfi_instances);
574 	hfi_instances = NULL;
575 }
576