xref: /openbmc/linux/arch/x86/kernel/cpu/resctrl/internal.h (revision 781096d971dfe3c5f9401a300bdb0b148a600584)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_X86_RESCTRL_INTERNAL_H
3 #define _ASM_X86_RESCTRL_INTERNAL_H
4 
5 #include <linux/resctrl.h>
6 #include <linux/sched.h>
7 #include <linux/kernfs.h>
8 #include <linux/fs_context.h>
9 #include <linux/jump_label.h>
10 
11 #define MSR_IA32_L3_QOS_CFG		0xc81
12 #define MSR_IA32_L2_QOS_CFG		0xc82
13 #define MSR_IA32_L3_CBM_BASE		0xc90
14 #define MSR_IA32_L2_CBM_BASE		0xd10
15 #define MSR_IA32_MBA_THRTL_BASE		0xd50
16 #define MSR_IA32_MBA_BW_BASE		0xc0000200
17 
18 #define MSR_IA32_QM_CTR			0x0c8e
19 #define MSR_IA32_QM_EVTSEL		0x0c8d
20 
21 #define L3_QOS_CDP_ENABLE		0x01ULL
22 
23 #define L2_QOS_CDP_ENABLE		0x01ULL
24 
25 /*
26  * Event IDs are used to program IA32_QM_EVTSEL before reading event
27  * counter from IA32_QM_CTR
28  */
29 #define QOS_L3_OCCUP_EVENT_ID		0x01
30 #define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
31 #define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
32 
33 #define CQM_LIMBOCHECK_INTERVAL	1000
34 
35 #define MBM_CNTR_WIDTH_BASE		24
36 #define MBM_OVERFLOW_INTERVAL		1000
37 #define MAX_MBA_BW			100u
38 #define MBA_IS_LINEAR			0x4
39 #define MAX_MBA_BW_AMD			0x800
40 #define MBM_CNTR_WIDTH_OFFSET_AMD	20
41 
42 #define RMID_VAL_ERROR			BIT_ULL(63)
43 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
44 /*
45  * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
46  * data to be returned. The counter width is discovered from the hardware
47  * as an offset from MBM_CNTR_WIDTH_BASE.
48  */
49 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
50 
51 
52 struct rdt_fs_context {
53 	struct kernfs_fs_context	kfc;
54 	bool				enable_cdpl2;
55 	bool				enable_cdpl3;
56 	bool				enable_mba_mbps;
57 };
58 
59 static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
60 {
61 	struct kernfs_fs_context *kfc = fc->fs_private;
62 
63 	return container_of(kfc, struct rdt_fs_context, kfc);
64 }
65 
66 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
67 DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
68 
69 /**
70  * struct mon_evt - Entry in the event list of a resource
71  * @evtid:		event id
72  * @name:		name of the event
73  * @list:		entry in &rdt_resource->evt_list
74  */
75 struct mon_evt {
76 	u32			evtid;
77 	char			*name;
78 	struct list_head	list;
79 };
80 
81 /**
82  * union mon_data_bits - Monitoring details for each event file
83  * @priv:              Used to store monitoring event data in @u
84  *                     as kernfs private data
85  * @rid:               Resource id associated with the event file
86  * @evtid:             Event id associated with the event file
87  * @domid:             The domain to which the event file belongs
88  * @u:                 Name of the bit fields struct
89  */
90 union mon_data_bits {
91 	void *priv;
92 	struct {
93 		unsigned int rid	: 10;
94 		unsigned int evtid	: 8;
95 		unsigned int domid	: 14;
96 	} u;
97 };
98 
99 struct rmid_read {
100 	struct rdtgroup		*rgrp;
101 	struct rdt_resource	*r;
102 	struct rdt_domain	*d;
103 	int			evtid;
104 	bool			first;
105 	u64			val;
106 };
107 
108 extern unsigned int resctrl_cqm_threshold;
109 extern bool rdt_alloc_capable;
110 extern bool rdt_mon_capable;
111 extern unsigned int rdt_mon_features;
112 extern struct list_head resctrl_schema_all;
113 
114 enum rdt_group_type {
115 	RDTCTRL_GROUP = 0,
116 	RDTMON_GROUP,
117 	RDT_NUM_GROUP,
118 };
119 
120 /**
121  * enum rdtgrp_mode - Mode of a RDT resource group
122  * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
123  * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
124  * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
125  * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
126  *                          allowed AND the allocations are Cache Pseudo-Locked
127  * @RDT_NUM_MODES: Total number of modes
128  *
129  * The mode of a resource group enables control over the allowed overlap
130  * between allocations associated with different resource groups (classes
131  * of service). User is able to modify the mode of a resource group by
132  * writing to the "mode" resctrl file associated with the resource group.
133  *
134  * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
135  * writing the appropriate text to the "mode" file. A resource group enters
136  * "pseudo-locked" mode after the schemata is written while the resource
137  * group is in "pseudo-locksetup" mode.
138  */
139 enum rdtgrp_mode {
140 	RDT_MODE_SHAREABLE = 0,
141 	RDT_MODE_EXCLUSIVE,
142 	RDT_MODE_PSEUDO_LOCKSETUP,
143 	RDT_MODE_PSEUDO_LOCKED,
144 
145 	/* Must be last */
146 	RDT_NUM_MODES,
147 };
148 
149 /**
150  * struct mongroup - store mon group's data in resctrl fs.
151  * @mon_data_kn:		kernfs node for the mon_data directory
152  * @parent:			parent rdtgrp
153  * @crdtgrp_list:		child rdtgroup node list
154  * @rmid:			rmid for this rdtgroup
155  */
156 struct mongroup {
157 	struct kernfs_node	*mon_data_kn;
158 	struct rdtgroup		*parent;
159 	struct list_head	crdtgrp_list;
160 	u32			rmid;
161 };
162 
163 /**
164  * struct pseudo_lock_region - pseudo-lock region information
165  * @s:			Resctrl schema for the resource to which this
166  *			pseudo-locked region belongs
167  * @d:			RDT domain to which this pseudo-locked region
168  *			belongs
169  * @cbm:		bitmask of the pseudo-locked region
170  * @lock_thread_wq:	waitqueue used to wait on the pseudo-locking thread
171  *			completion
172  * @thread_done:	variable used by waitqueue to test if pseudo-locking
173  *			thread completed
174  * @cpu:		core associated with the cache on which the setup code
175  *			will be run
176  * @line_size:		size of the cache lines
177  * @size:		size of pseudo-locked region in bytes
178  * @kmem:		the kernel memory associated with pseudo-locked region
179  * @minor:		minor number of character device associated with this
180  *			region
181  * @debugfs_dir:	pointer to this region's directory in the debugfs
182  *			filesystem
183  * @pm_reqs:		Power management QoS requests related to this region
184  */
185 struct pseudo_lock_region {
186 	struct resctrl_schema	*s;
187 	struct rdt_domain	*d;
188 	u32			cbm;
189 	wait_queue_head_t	lock_thread_wq;
190 	int			thread_done;
191 	int			cpu;
192 	unsigned int		line_size;
193 	unsigned int		size;
194 	void			*kmem;
195 	unsigned int		minor;
196 	struct dentry		*debugfs_dir;
197 	struct list_head	pm_reqs;
198 };
199 
200 /**
201  * struct rdtgroup - store rdtgroup's data in resctrl file system.
202  * @kn:				kernfs node
203  * @rdtgroup_list:		linked list for all rdtgroups
204  * @closid:			closid for this rdtgroup
205  * @cpu_mask:			CPUs assigned to this rdtgroup
206  * @flags:			status bits
207  * @waitcount:			how many cpus expect to find this
208  *				group when they acquire rdtgroup_mutex
209  * @type:			indicates type of this rdtgroup - either
210  *				monitor only or ctrl_mon group
211  * @mon:			mongroup related data
212  * @mode:			mode of resource group
213  * @plr:			pseudo-locked region
214  */
215 struct rdtgroup {
216 	struct kernfs_node		*kn;
217 	struct list_head		rdtgroup_list;
218 	u32				closid;
219 	struct cpumask			cpu_mask;
220 	int				flags;
221 	atomic_t			waitcount;
222 	enum rdt_group_type		type;
223 	struct mongroup			mon;
224 	enum rdtgrp_mode		mode;
225 	struct pseudo_lock_region	*plr;
226 };
227 
228 /* rdtgroup.flags */
229 #define	RDT_DELETED		1
230 
231 /* rftype.flags */
232 #define RFTYPE_FLAGS_CPUS_LIST	1
233 
234 /*
235  * Define the file type flags for base and info directories.
236  */
237 #define RFTYPE_INFO			BIT(0)
238 #define RFTYPE_BASE			BIT(1)
239 #define RF_CTRLSHIFT			4
240 #define RF_MONSHIFT			5
241 #define RF_TOPSHIFT			6
242 #define RFTYPE_CTRL			BIT(RF_CTRLSHIFT)
243 #define RFTYPE_MON			BIT(RF_MONSHIFT)
244 #define RFTYPE_TOP			BIT(RF_TOPSHIFT)
245 #define RFTYPE_RES_CACHE		BIT(8)
246 #define RFTYPE_RES_MB			BIT(9)
247 #define RF_CTRL_INFO			(RFTYPE_INFO | RFTYPE_CTRL)
248 #define RF_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
249 #define RF_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
250 #define RF_CTRL_BASE			(RFTYPE_BASE | RFTYPE_CTRL)
251 
252 /* List of all resource groups */
253 extern struct list_head rdt_all_groups;
254 
255 extern int max_name_width, max_data_width;
256 
257 int __init rdtgroup_init(void);
258 void __exit rdtgroup_exit(void);
259 
260 /**
261  * struct rftype - describe each file in the resctrl file system
262  * @name:	File name
263  * @mode:	Access mode
264  * @kf_ops:	File operations
265  * @flags:	File specific RFTYPE_FLAGS_* flags
266  * @fflags:	File specific RF_* or RFTYPE_* flags
267  * @seq_show:	Show content of the file
268  * @write:	Write to the file
269  */
270 struct rftype {
271 	char			*name;
272 	umode_t			mode;
273 	const struct kernfs_ops	*kf_ops;
274 	unsigned long		flags;
275 	unsigned long		fflags;
276 
277 	int (*seq_show)(struct kernfs_open_file *of,
278 			struct seq_file *sf, void *v);
279 	/*
280 	 * write() is the generic write callback which maps directly to
281 	 * kernfs write operation and overrides all other operations.
282 	 * Maximum write size is determined by ->max_write_len.
283 	 */
284 	ssize_t (*write)(struct kernfs_open_file *of,
285 			 char *buf, size_t nbytes, loff_t off);
286 };
287 
288 /**
289  * struct mbm_state - status for each MBM counter in each domain
290  * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
291  * @prev_msr:	Value of IA32_QM_CTR for this RMID last time we read it
292  * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
293  * @prev_bw:	The most recent bandwidth in MBps
294  * @delta_bw:	Difference between the current and previous bandwidth
295  * @delta_comp:	Indicates whether to compute the delta_bw
296  */
297 struct mbm_state {
298 	u64	chunks;
299 	u64	prev_msr;
300 	u64	prev_bw_msr;
301 	u32	prev_bw;
302 	u32	delta_bw;
303 	bool	delta_comp;
304 };
305 
306 /**
307  * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
308  *			  a resource
309  * @d_resctrl:	Properties exposed to the resctrl file system
310  * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
311  * @mbps_val:	When mba_sc is enabled, this holds the bandwidth in MBps
312  *
313  * Members of this structure are accessed via helpers that provide abstraction.
314  */
315 struct rdt_hw_domain {
316 	struct rdt_domain		d_resctrl;
317 	u32				*ctrl_val;
318 	u32				*mbps_val;
319 };
320 
321 static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
322 {
323 	return container_of(r, struct rdt_hw_domain, d_resctrl);
324 }
325 
326 /**
327  * struct msr_param - set a range of MSRs from a domain
328  * @res:       The resource to use
329  * @low:       Beginning index from base MSR
330  * @high:      End index
331  */
332 struct msr_param {
333 	struct rdt_resource	*res;
334 	u32			low;
335 	u32			high;
336 };
337 
338 static inline bool is_llc_occupancy_enabled(void)
339 {
340 	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
341 }
342 
343 static inline bool is_mbm_total_enabled(void)
344 {
345 	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
346 }
347 
348 static inline bool is_mbm_local_enabled(void)
349 {
350 	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
351 }
352 
353 static inline bool is_mbm_enabled(void)
354 {
355 	return (is_mbm_total_enabled() || is_mbm_local_enabled());
356 }
357 
358 static inline bool is_mbm_event(int e)
359 {
360 	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
361 		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
362 }
363 
364 struct rdt_parse_data {
365 	struct rdtgroup		*rdtgrp;
366 	char			*buf;
367 };
368 
369 /**
370  * struct rdt_hw_resource - arch private attributes of a resctrl resource
371  * @r_resctrl:		Attributes of the resource used directly by resctrl.
372  * @num_closid:		Maximum number of closid this hardware can support,
373  *			regardless of CDP. This is exposed via
374  *			resctrl_arch_get_num_closid() to avoid confusion
375  *			with struct resctrl_schema's property of the same name,
376  *			which has been corrected for features like CDP.
377  * @msr_base:		Base MSR address for CBMs
378  * @msr_update:		Function pointer to update QOS MSRs
379  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
380  * @mbm_width:		Monitor width, to detect and correct for overflow.
381  * @cdp_enabled:	CDP state of this resource
382  *
383  * Members of this structure are either private to the architecture
384  * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
385  * msr_update and msr_base.
386  */
387 struct rdt_hw_resource {
388 	struct rdt_resource	r_resctrl;
389 	u32			num_closid;
390 	unsigned int		msr_base;
391 	void (*msr_update)	(struct rdt_domain *d, struct msr_param *m,
392 				 struct rdt_resource *r);
393 	unsigned int		mon_scale;
394 	unsigned int		mbm_width;
395 	bool			cdp_enabled;
396 };
397 
398 static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
399 {
400 	return container_of(r, struct rdt_hw_resource, r_resctrl);
401 }
402 
403 int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
404 	      struct rdt_domain *d);
405 int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
406 	     struct rdt_domain *d);
407 
408 extern struct mutex rdtgroup_mutex;
409 
410 extern struct rdt_hw_resource rdt_resources_all[];
411 extern struct rdtgroup rdtgroup_default;
412 DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
413 
414 extern struct dentry *debugfs_resctrl;
415 
416 enum resctrl_res_level {
417 	RDT_RESOURCE_L3,
418 	RDT_RESOURCE_L2,
419 	RDT_RESOURCE_MBA,
420 
421 	/* Must be the last */
422 	RDT_NUM_RESOURCES,
423 };
424 
425 static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
426 {
427 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
428 
429 	hw_res++;
430 	return &hw_res->r_resctrl;
431 }
432 
433 static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
434 {
435 	return rdt_resources_all[l].cdp_enabled;
436 }
437 
438 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
439 
440 /*
441  * To return the common struct rdt_resource, which is contained in struct
442  * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
443  */
444 #define for_each_rdt_resource(r)					      \
445 	for (r = &rdt_resources_all[0].r_resctrl;			      \
446 	     r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl;	      \
447 	     r = resctrl_inc(r))
448 
449 #define for_each_capable_rdt_resource(r)				      \
450 	for_each_rdt_resource(r)					      \
451 		if (r->alloc_capable || r->mon_capable)
452 
453 #define for_each_alloc_capable_rdt_resource(r)				      \
454 	for_each_rdt_resource(r)					      \
455 		if (r->alloc_capable)
456 
457 #define for_each_mon_capable_rdt_resource(r)				      \
458 	for_each_rdt_resource(r)					      \
459 		if (r->mon_capable)
460 
461 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
462 union cpuid_0x10_1_eax {
463 	struct {
464 		unsigned int cbm_len:5;
465 	} split;
466 	unsigned int full;
467 };
468 
469 /* CPUID.(EAX=10H, ECX=ResID=3).EAX */
470 union cpuid_0x10_3_eax {
471 	struct {
472 		unsigned int max_delay:12;
473 	} split;
474 	unsigned int full;
475 };
476 
477 /* CPUID.(EAX=10H, ECX=ResID).EDX */
478 union cpuid_0x10_x_edx {
479 	struct {
480 		unsigned int cos_max:16;
481 	} split;
482 	unsigned int full;
483 };
484 
485 void rdt_last_cmd_clear(void);
486 void rdt_last_cmd_puts(const char *s);
487 __printf(1, 2)
488 void rdt_last_cmd_printf(const char *fmt, ...);
489 
490 void rdt_ctrl_update(void *arg);
491 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
492 void rdtgroup_kn_unlock(struct kernfs_node *kn);
493 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
494 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
495 			     umode_t mask);
496 struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
497 				   struct list_head **pos);
498 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
499 				char *buf, size_t nbytes, loff_t off);
500 int rdtgroup_schemata_show(struct kernfs_open_file *of,
501 			   struct seq_file *s, void *v);
502 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
503 			   unsigned long cbm, int closid, bool exclusive);
504 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
505 				  unsigned long cbm);
506 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
507 int rdtgroup_tasks_assigned(struct rdtgroup *r);
508 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
509 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
510 bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
511 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
512 int rdt_pseudo_lock_init(void);
513 void rdt_pseudo_lock_release(void);
514 int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
515 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
516 struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
517 int closids_supported(void);
518 void closid_free(int closid);
519 int alloc_rmid(void);
520 void free_rmid(u32 rmid);
521 int rdt_get_mon_l3_config(struct rdt_resource *r);
522 void mon_event_count(void *info);
523 int rdtgroup_mondata_show(struct seq_file *m, void *arg);
524 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
525 		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
526 		    int evtid, int first);
527 void mbm_setup_overflow_handler(struct rdt_domain *dom,
528 				unsigned long delay_ms);
529 void mbm_handle_overflow(struct work_struct *work);
530 void __init intel_rdt_mbm_apply_quirk(void);
531 bool is_mba_sc(struct rdt_resource *r);
532 void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
533 u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
534 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
535 void cqm_handle_limbo(struct work_struct *work);
536 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
537 void __check_limbo(struct rdt_domain *d, bool force_free);
538 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
539 void __init thread_throttle_mode_init(void);
540 
541 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
542