xref: /openbmc/linux/arch/x86/kernel/cpu/resctrl/internal.h (revision 31ab09b4218879bc394c9faa6da983a82a694600)
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #ifndef _ASM_X86_RESCTRL_INTERNAL_H
3 #define _ASM_X86_RESCTRL_INTERNAL_H
4 
5 #include <linux/resctrl.h>
6 #include <linux/sched.h>
7 #include <linux/kernfs.h>
8 #include <linux/fs_context.h>
9 #include <linux/jump_label.h>
10 
11 #define MSR_IA32_L3_QOS_CFG		0xc81
12 #define MSR_IA32_L2_QOS_CFG		0xc82
13 #define MSR_IA32_L3_CBM_BASE		0xc90
14 #define MSR_IA32_L2_CBM_BASE		0xd10
15 #define MSR_IA32_MBA_THRTL_BASE		0xd50
16 #define MSR_IA32_MBA_BW_BASE		0xc0000200
17 
18 #define MSR_IA32_QM_CTR			0x0c8e
19 #define MSR_IA32_QM_EVTSEL		0x0c8d
20 
21 #define L3_QOS_CDP_ENABLE		0x01ULL
22 
23 #define L2_QOS_CDP_ENABLE		0x01ULL
24 
25 /*
26  * Event IDs are used to program IA32_QM_EVTSEL before reading event
27  * counter from IA32_QM_CTR
28  */
29 #define QOS_L3_OCCUP_EVENT_ID		0x01
30 #define QOS_L3_MBM_TOTAL_EVENT_ID	0x02
31 #define QOS_L3_MBM_LOCAL_EVENT_ID	0x03
32 
33 #define CQM_LIMBOCHECK_INTERVAL	1000
34 
35 #define MBM_CNTR_WIDTH_BASE		24
36 #define MBM_OVERFLOW_INTERVAL		1000
37 #define MAX_MBA_BW			100u
38 #define MBA_IS_LINEAR			0x4
39 #define MBA_MAX_MBPS			U32_MAX
40 #define MAX_MBA_BW_AMD			0x800
41 #define MBM_CNTR_WIDTH_OFFSET_AMD	20
42 
43 #define RMID_VAL_ERROR			BIT_ULL(63)
44 #define RMID_VAL_UNAVAIL		BIT_ULL(62)
45 /*
46  * With the above fields in use 62 bits remain in MSR_IA32_QM_CTR for
47  * data to be returned. The counter width is discovered from the hardware
48  * as an offset from MBM_CNTR_WIDTH_BASE.
49  */
50 #define MBM_CNTR_WIDTH_OFFSET_MAX (62 - MBM_CNTR_WIDTH_BASE)
51 
52 
53 struct rdt_fs_context {
54 	struct kernfs_fs_context	kfc;
55 	bool				enable_cdpl2;
56 	bool				enable_cdpl3;
57 	bool				enable_mba_mbps;
58 };
59 
60 static inline struct rdt_fs_context *rdt_fc2context(struct fs_context *fc)
61 {
62 	struct kernfs_fs_context *kfc = fc->fs_private;
63 
64 	return container_of(kfc, struct rdt_fs_context, kfc);
65 }
66 
67 DECLARE_STATIC_KEY_FALSE(rdt_enable_key);
68 DECLARE_STATIC_KEY_FALSE(rdt_mon_enable_key);
69 
70 /**
71  * struct mon_evt - Entry in the event list of a resource
72  * @evtid:		event id
73  * @name:		name of the event
74  * @list:		entry in &rdt_resource->evt_list
75  */
76 struct mon_evt {
77 	u32			evtid;
78 	char			*name;
79 	struct list_head	list;
80 };
81 
82 /**
83  * union mon_data_bits - Monitoring details for each event file
84  * @priv:              Used to store monitoring event data in @u
85  *                     as kernfs private data
86  * @rid:               Resource id associated with the event file
87  * @evtid:             Event id associated with the event file
88  * @domid:             The domain to which the event file belongs
89  * @u:                 Name of the bit fields struct
90  */
91 union mon_data_bits {
92 	void *priv;
93 	struct {
94 		unsigned int rid	: 10;
95 		unsigned int evtid	: 8;
96 		unsigned int domid	: 14;
97 	} u;
98 };
99 
100 struct rmid_read {
101 	struct rdtgroup		*rgrp;
102 	struct rdt_resource	*r;
103 	struct rdt_domain	*d;
104 	int			evtid;
105 	bool			first;
106 	u64			val;
107 };
108 
109 extern unsigned int resctrl_cqm_threshold;
110 extern bool rdt_alloc_capable;
111 extern bool rdt_mon_capable;
112 extern unsigned int rdt_mon_features;
113 extern struct list_head resctrl_schema_all;
114 
115 enum rdt_group_type {
116 	RDTCTRL_GROUP = 0,
117 	RDTMON_GROUP,
118 	RDT_NUM_GROUP,
119 };
120 
121 /**
122  * enum rdtgrp_mode - Mode of a RDT resource group
123  * @RDT_MODE_SHAREABLE: This resource group allows sharing of its allocations
124  * @RDT_MODE_EXCLUSIVE: No sharing of this resource group's allocations allowed
125  * @RDT_MODE_PSEUDO_LOCKSETUP: Resource group will be used for Pseudo-Locking
126  * @RDT_MODE_PSEUDO_LOCKED: No sharing of this resource group's allocations
127  *                          allowed AND the allocations are Cache Pseudo-Locked
128  * @RDT_NUM_MODES: Total number of modes
129  *
130  * The mode of a resource group enables control over the allowed overlap
131  * between allocations associated with different resource groups (classes
132  * of service). User is able to modify the mode of a resource group by
133  * writing to the "mode" resctrl file associated with the resource group.
134  *
135  * The "shareable", "exclusive", and "pseudo-locksetup" modes are set by
136  * writing the appropriate text to the "mode" file. A resource group enters
137  * "pseudo-locked" mode after the schemata is written while the resource
138  * group is in "pseudo-locksetup" mode.
139  */
140 enum rdtgrp_mode {
141 	RDT_MODE_SHAREABLE = 0,
142 	RDT_MODE_EXCLUSIVE,
143 	RDT_MODE_PSEUDO_LOCKSETUP,
144 	RDT_MODE_PSEUDO_LOCKED,
145 
146 	/* Must be last */
147 	RDT_NUM_MODES,
148 };
149 
150 /**
151  * struct mongroup - store mon group's data in resctrl fs.
152  * @mon_data_kn:		kernfs node for the mon_data directory
153  * @parent:			parent rdtgrp
154  * @crdtgrp_list:		child rdtgroup node list
155  * @rmid:			rmid for this rdtgroup
156  */
157 struct mongroup {
158 	struct kernfs_node	*mon_data_kn;
159 	struct rdtgroup		*parent;
160 	struct list_head	crdtgrp_list;
161 	u32			rmid;
162 };
163 
164 /**
165  * struct pseudo_lock_region - pseudo-lock region information
166  * @s:			Resctrl schema for the resource to which this
167  *			pseudo-locked region belongs
168  * @d:			RDT domain to which this pseudo-locked region
169  *			belongs
170  * @cbm:		bitmask of the pseudo-locked region
171  * @lock_thread_wq:	waitqueue used to wait on the pseudo-locking thread
172  *			completion
173  * @thread_done:	variable used by waitqueue to test if pseudo-locking
174  *			thread completed
175  * @cpu:		core associated with the cache on which the setup code
176  *			will be run
177  * @line_size:		size of the cache lines
178  * @size:		size of pseudo-locked region in bytes
179  * @kmem:		the kernel memory associated with pseudo-locked region
180  * @minor:		minor number of character device associated with this
181  *			region
182  * @debugfs_dir:	pointer to this region's directory in the debugfs
183  *			filesystem
184  * @pm_reqs:		Power management QoS requests related to this region
185  */
186 struct pseudo_lock_region {
187 	struct resctrl_schema	*s;
188 	struct rdt_domain	*d;
189 	u32			cbm;
190 	wait_queue_head_t	lock_thread_wq;
191 	int			thread_done;
192 	int			cpu;
193 	unsigned int		line_size;
194 	unsigned int		size;
195 	void			*kmem;
196 	unsigned int		minor;
197 	struct dentry		*debugfs_dir;
198 	struct list_head	pm_reqs;
199 };
200 
201 /**
202  * struct rdtgroup - store rdtgroup's data in resctrl file system.
203  * @kn:				kernfs node
204  * @rdtgroup_list:		linked list for all rdtgroups
205  * @closid:			closid for this rdtgroup
206  * @cpu_mask:			CPUs assigned to this rdtgroup
207  * @flags:			status bits
208  * @waitcount:			how many cpus expect to find this
209  *				group when they acquire rdtgroup_mutex
210  * @type:			indicates type of this rdtgroup - either
211  *				monitor only or ctrl_mon group
212  * @mon:			mongroup related data
213  * @mode:			mode of resource group
214  * @plr:			pseudo-locked region
215  */
216 struct rdtgroup {
217 	struct kernfs_node		*kn;
218 	struct list_head		rdtgroup_list;
219 	u32				closid;
220 	struct cpumask			cpu_mask;
221 	int				flags;
222 	atomic_t			waitcount;
223 	enum rdt_group_type		type;
224 	struct mongroup			mon;
225 	enum rdtgrp_mode		mode;
226 	struct pseudo_lock_region	*plr;
227 };
228 
229 /* rdtgroup.flags */
230 #define	RDT_DELETED		1
231 
232 /* rftype.flags */
233 #define RFTYPE_FLAGS_CPUS_LIST	1
234 
235 /*
236  * Define the file type flags for base and info directories.
237  */
238 #define RFTYPE_INFO			BIT(0)
239 #define RFTYPE_BASE			BIT(1)
240 #define RF_CTRLSHIFT			4
241 #define RF_MONSHIFT			5
242 #define RF_TOPSHIFT			6
243 #define RFTYPE_CTRL			BIT(RF_CTRLSHIFT)
244 #define RFTYPE_MON			BIT(RF_MONSHIFT)
245 #define RFTYPE_TOP			BIT(RF_TOPSHIFT)
246 #define RFTYPE_RES_CACHE		BIT(8)
247 #define RFTYPE_RES_MB			BIT(9)
248 #define RF_CTRL_INFO			(RFTYPE_INFO | RFTYPE_CTRL)
249 #define RF_MON_INFO			(RFTYPE_INFO | RFTYPE_MON)
250 #define RF_TOP_INFO			(RFTYPE_INFO | RFTYPE_TOP)
251 #define RF_CTRL_BASE			(RFTYPE_BASE | RFTYPE_CTRL)
252 
253 /* List of all resource groups */
254 extern struct list_head rdt_all_groups;
255 
256 extern int max_name_width, max_data_width;
257 
258 int __init rdtgroup_init(void);
259 void __exit rdtgroup_exit(void);
260 
261 /**
262  * struct rftype - describe each file in the resctrl file system
263  * @name:	File name
264  * @mode:	Access mode
265  * @kf_ops:	File operations
266  * @flags:	File specific RFTYPE_FLAGS_* flags
267  * @fflags:	File specific RF_* or RFTYPE_* flags
268  * @seq_show:	Show content of the file
269  * @write:	Write to the file
270  */
271 struct rftype {
272 	char			*name;
273 	umode_t			mode;
274 	const struct kernfs_ops	*kf_ops;
275 	unsigned long		flags;
276 	unsigned long		fflags;
277 
278 	int (*seq_show)(struct kernfs_open_file *of,
279 			struct seq_file *sf, void *v);
280 	/*
281 	 * write() is the generic write callback which maps directly to
282 	 * kernfs write operation and overrides all other operations.
283 	 * Maximum write size is determined by ->max_write_len.
284 	 */
285 	ssize_t (*write)(struct kernfs_open_file *of,
286 			 char *buf, size_t nbytes, loff_t off);
287 };
288 
289 /**
290  * struct mbm_state - status for each MBM counter in each domain
291  * @chunks:	Total data moved (multiply by rdt_group.mon_scale to get bytes)
292  * @prev_msr:	Value of IA32_QM_CTR for this RMID last time we read it
293  * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
294  * @prev_bw:	The most recent bandwidth in MBps
295  * @delta_bw:	Difference between the current and previous bandwidth
296  * @delta_comp:	Indicates whether to compute the delta_bw
297  */
298 struct mbm_state {
299 	u64	chunks;
300 	u64	prev_msr;
301 	u64	prev_bw_msr;
302 	u32	prev_bw;
303 	u32	delta_bw;
304 	bool	delta_comp;
305 };
306 
307 /**
308  * struct rdt_hw_domain - Arch private attributes of a set of CPUs that share
309  *			  a resource
310  * @d_resctrl:	Properties exposed to the resctrl file system
311  * @ctrl_val:	array of cache or mem ctrl values (indexed by CLOSID)
312  * @mbps_val:	When mba_sc is enabled, this holds the bandwidth in MBps
313  *
314  * Members of this structure are accessed via helpers that provide abstraction.
315  */
316 struct rdt_hw_domain {
317 	struct rdt_domain		d_resctrl;
318 	u32				*ctrl_val;
319 	u32				*mbps_val;
320 };
321 
322 static inline struct rdt_hw_domain *resctrl_to_arch_dom(struct rdt_domain *r)
323 {
324 	return container_of(r, struct rdt_hw_domain, d_resctrl);
325 }
326 
327 /**
328  * struct msr_param - set a range of MSRs from a domain
329  * @res:       The resource to use
330  * @low:       Beginning index from base MSR
331  * @high:      End index
332  */
333 struct msr_param {
334 	struct rdt_resource	*res;
335 	u32			low;
336 	u32			high;
337 };
338 
339 static inline bool is_llc_occupancy_enabled(void)
340 {
341 	return (rdt_mon_features & (1 << QOS_L3_OCCUP_EVENT_ID));
342 }
343 
344 static inline bool is_mbm_total_enabled(void)
345 {
346 	return (rdt_mon_features & (1 << QOS_L3_MBM_TOTAL_EVENT_ID));
347 }
348 
349 static inline bool is_mbm_local_enabled(void)
350 {
351 	return (rdt_mon_features & (1 << QOS_L3_MBM_LOCAL_EVENT_ID));
352 }
353 
354 static inline bool is_mbm_enabled(void)
355 {
356 	return (is_mbm_total_enabled() || is_mbm_local_enabled());
357 }
358 
359 static inline bool is_mbm_event(int e)
360 {
361 	return (e >= QOS_L3_MBM_TOTAL_EVENT_ID &&
362 		e <= QOS_L3_MBM_LOCAL_EVENT_ID);
363 }
364 
365 struct rdt_parse_data {
366 	struct rdtgroup		*rdtgrp;
367 	char			*buf;
368 };
369 
370 /**
371  * struct rdt_hw_resource - arch private attributes of a resctrl resource
372  * @r_resctrl:		Attributes of the resource used directly by resctrl.
373  * @num_closid:		Maximum number of closid this hardware can support,
374  *			regardless of CDP. This is exposed via
375  *			resctrl_arch_get_num_closid() to avoid confusion
376  *			with struct resctrl_schema's property of the same name,
377  *			which has been corrected for features like CDP.
378  * @msr_base:		Base MSR address for CBMs
379  * @msr_update:		Function pointer to update QOS MSRs
380  * @mon_scale:		cqm counter * mon_scale = occupancy in bytes
381  * @mbm_width:		Monitor width, to detect and correct for overflow.
382  * @cdp_enabled:	CDP state of this resource
383  *
384  * Members of this structure are either private to the architecture
385  * e.g. mbm_width, or accessed via helpers that provide abstraction. e.g.
386  * msr_update and msr_base.
387  */
388 struct rdt_hw_resource {
389 	struct rdt_resource	r_resctrl;
390 	u32			num_closid;
391 	unsigned int		msr_base;
392 	void (*msr_update)	(struct rdt_domain *d, struct msr_param *m,
393 				 struct rdt_resource *r);
394 	unsigned int		mon_scale;
395 	unsigned int		mbm_width;
396 	bool			cdp_enabled;
397 };
398 
399 static inline struct rdt_hw_resource *resctrl_to_arch_res(struct rdt_resource *r)
400 {
401 	return container_of(r, struct rdt_hw_resource, r_resctrl);
402 }
403 
404 int parse_cbm(struct rdt_parse_data *data, struct resctrl_schema *s,
405 	      struct rdt_domain *d);
406 int parse_bw(struct rdt_parse_data *data, struct resctrl_schema *s,
407 	     struct rdt_domain *d);
408 
409 extern struct mutex rdtgroup_mutex;
410 
411 extern struct rdt_hw_resource rdt_resources_all[];
412 extern struct rdtgroup rdtgroup_default;
413 DECLARE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
414 
415 extern struct dentry *debugfs_resctrl;
416 
417 enum resctrl_res_level {
418 	RDT_RESOURCE_L3,
419 	RDT_RESOURCE_L2,
420 	RDT_RESOURCE_MBA,
421 
422 	/* Must be the last */
423 	RDT_NUM_RESOURCES,
424 };
425 
426 static inline struct rdt_resource *resctrl_inc(struct rdt_resource *res)
427 {
428 	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(res);
429 
430 	hw_res++;
431 	return &hw_res->r_resctrl;
432 }
433 
434 static inline bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l)
435 {
436 	return rdt_resources_all[l].cdp_enabled;
437 }
438 
439 int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable);
440 
441 /*
442  * To return the common struct rdt_resource, which is contained in struct
443  * rdt_hw_resource, walk the resctrl member of struct rdt_hw_resource.
444  */
445 #define for_each_rdt_resource(r)					      \
446 	for (r = &rdt_resources_all[0].r_resctrl;			      \
447 	     r <= &rdt_resources_all[RDT_NUM_RESOURCES - 1].r_resctrl;	      \
448 	     r = resctrl_inc(r))
449 
450 #define for_each_capable_rdt_resource(r)				      \
451 	for_each_rdt_resource(r)					      \
452 		if (r->alloc_capable || r->mon_capable)
453 
454 #define for_each_alloc_capable_rdt_resource(r)				      \
455 	for_each_rdt_resource(r)					      \
456 		if (r->alloc_capable)
457 
458 #define for_each_mon_capable_rdt_resource(r)				      \
459 	for_each_rdt_resource(r)					      \
460 		if (r->mon_capable)
461 
462 #define for_each_alloc_enabled_rdt_resource(r)				      \
463 	for_each_rdt_resource(r)					      \
464 		if (r->alloc_enabled)
465 
466 #define for_each_mon_enabled_rdt_resource(r)				      \
467 	for_each_rdt_resource(r)					      \
468 		if (r->mon_enabled)
469 
470 /* CPUID.(EAX=10H, ECX=ResID=1).EAX */
471 union cpuid_0x10_1_eax {
472 	struct {
473 		unsigned int cbm_len:5;
474 	} split;
475 	unsigned int full;
476 };
477 
478 /* CPUID.(EAX=10H, ECX=ResID=3).EAX */
479 union cpuid_0x10_3_eax {
480 	struct {
481 		unsigned int max_delay:12;
482 	} split;
483 	unsigned int full;
484 };
485 
486 /* CPUID.(EAX=10H, ECX=ResID).EDX */
487 union cpuid_0x10_x_edx {
488 	struct {
489 		unsigned int cos_max:16;
490 	} split;
491 	unsigned int full;
492 };
493 
494 void rdt_last_cmd_clear(void);
495 void rdt_last_cmd_puts(const char *s);
496 __printf(1, 2)
497 void rdt_last_cmd_printf(const char *fmt, ...);
498 
499 void rdt_ctrl_update(void *arg);
500 struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn);
501 void rdtgroup_kn_unlock(struct kernfs_node *kn);
502 int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name);
503 int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
504 			     umode_t mask);
505 struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
506 				   struct list_head **pos);
507 ssize_t rdtgroup_schemata_write(struct kernfs_open_file *of,
508 				char *buf, size_t nbytes, loff_t off);
509 int rdtgroup_schemata_show(struct kernfs_open_file *of,
510 			   struct seq_file *s, void *v);
511 bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
512 			   unsigned long cbm, int closid, bool exclusive);
513 unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r, struct rdt_domain *d,
514 				  unsigned long cbm);
515 enum rdtgrp_mode rdtgroup_mode_by_closid(int closid);
516 int rdtgroup_tasks_assigned(struct rdtgroup *r);
517 int rdtgroup_locksetup_enter(struct rdtgroup *rdtgrp);
518 int rdtgroup_locksetup_exit(struct rdtgroup *rdtgrp);
519 bool rdtgroup_cbm_overlaps_pseudo_locked(struct rdt_domain *d, unsigned long cbm);
520 bool rdtgroup_pseudo_locked_in_hierarchy(struct rdt_domain *d);
521 int rdt_pseudo_lock_init(void);
522 void rdt_pseudo_lock_release(void);
523 int rdtgroup_pseudo_lock_create(struct rdtgroup *rdtgrp);
524 void rdtgroup_pseudo_lock_remove(struct rdtgroup *rdtgrp);
525 struct rdt_domain *get_domain_from_cpu(int cpu, struct rdt_resource *r);
526 int closids_supported(void);
527 void closid_free(int closid);
528 int alloc_rmid(void);
529 void free_rmid(u32 rmid);
530 int rdt_get_mon_l3_config(struct rdt_resource *r);
531 void mon_event_count(void *info);
532 int rdtgroup_mondata_show(struct seq_file *m, void *arg);
533 void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
534 				    unsigned int dom_id);
535 void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
536 				    struct rdt_domain *d);
537 void mon_event_read(struct rmid_read *rr, struct rdt_resource *r,
538 		    struct rdt_domain *d, struct rdtgroup *rdtgrp,
539 		    int evtid, int first);
540 void mbm_setup_overflow_handler(struct rdt_domain *dom,
541 				unsigned long delay_ms);
542 void mbm_handle_overflow(struct work_struct *work);
543 void __init intel_rdt_mbm_apply_quirk(void);
544 bool is_mba_sc(struct rdt_resource *r);
545 void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
546 u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
547 void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
548 void cqm_handle_limbo(struct work_struct *work);
549 bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
550 void __check_limbo(struct rdt_domain *d, bool force_free);
551 void rdt_domain_reconfigure_cdp(struct rdt_resource *r);
552 void __init thread_throttle_mode_init(void);
553 
554 #endif /* _ASM_X86_RESCTRL_INTERNAL_H */
555