xref: /openbmc/linux/arch/x86/kernel/cpu/resctrl/rdtgroup.c (revision 5ee9cd065836e5934710ca35653bce7905add20b)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   * User interface for Resource Allocation in Resource Director Technology(RDT)
4   *
5   * Copyright (C) 2016 Intel Corporation
6   *
7   * Author: Fenghua Yu <fenghua.yu@intel.com>
8   *
9   * More information about RDT be found in the Intel (R) x86 Architecture
10   * Software Developer Manual.
11   */
12  
13  #define pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
14  
15  #include <linux/cacheinfo.h>
16  #include <linux/cpu.h>
17  #include <linux/debugfs.h>
18  #include <linux/fs.h>
19  #include <linux/fs_parser.h>
20  #include <linux/sysfs.h>
21  #include <linux/kernfs.h>
22  #include <linux/seq_buf.h>
23  #include <linux/seq_file.h>
24  #include <linux/sched/signal.h>
25  #include <linux/sched/task.h>
26  #include <linux/slab.h>
27  #include <linux/task_work.h>
28  #include <linux/user_namespace.h>
29  
30  #include <uapi/linux/magic.h>
31  
32  #include <asm/resctrl.h>
33  #include "internal.h"
34  
35  DEFINE_STATIC_KEY_FALSE(rdt_enable_key);
36  DEFINE_STATIC_KEY_FALSE(rdt_mon_enable_key);
37  DEFINE_STATIC_KEY_FALSE(rdt_alloc_enable_key);
38  static struct kernfs_root *rdt_root;
39  struct rdtgroup rdtgroup_default;
40  LIST_HEAD(rdt_all_groups);
41  
42  /* list of entries for the schemata file */
43  LIST_HEAD(resctrl_schema_all);
44  
45  /* Kernel fs node for "info" directory under root */
46  static struct kernfs_node *kn_info;
47  
48  /* Kernel fs node for "mon_groups" directory under root */
49  static struct kernfs_node *kn_mongrp;
50  
51  /* Kernel fs node for "mon_data" directory under root */
52  static struct kernfs_node *kn_mondata;
53  
54  static struct seq_buf last_cmd_status;
55  static char last_cmd_status_buf[512];
56  
57  struct dentry *debugfs_resctrl;
58  
rdt_last_cmd_clear(void)59  void rdt_last_cmd_clear(void)
60  {
61  	lockdep_assert_held(&rdtgroup_mutex);
62  	seq_buf_clear(&last_cmd_status);
63  }
64  
rdt_last_cmd_puts(const char * s)65  void rdt_last_cmd_puts(const char *s)
66  {
67  	lockdep_assert_held(&rdtgroup_mutex);
68  	seq_buf_puts(&last_cmd_status, s);
69  }
70  
rdt_last_cmd_printf(const char * fmt,...)71  void rdt_last_cmd_printf(const char *fmt, ...)
72  {
73  	va_list ap;
74  
75  	va_start(ap, fmt);
76  	lockdep_assert_held(&rdtgroup_mutex);
77  	seq_buf_vprintf(&last_cmd_status, fmt, ap);
78  	va_end(ap);
79  }
80  
rdt_staged_configs_clear(void)81  void rdt_staged_configs_clear(void)
82  {
83  	struct rdt_resource *r;
84  	struct rdt_domain *dom;
85  
86  	lockdep_assert_held(&rdtgroup_mutex);
87  
88  	for_each_alloc_capable_rdt_resource(r) {
89  		list_for_each_entry(dom, &r->domains, list)
90  			memset(dom->staged_config, 0, sizeof(dom->staged_config));
91  	}
92  }
93  
94  /*
95   * Trivial allocator for CLOSIDs. Since h/w only supports a small number,
96   * we can keep a bitmap of free CLOSIDs in a single integer.
97   *
98   * Using a global CLOSID across all resources has some advantages and
99   * some drawbacks:
100   * + We can simply set "current->closid" to assign a task to a resource
101   *   group.
102   * + Context switch code can avoid extra memory references deciding which
103   *   CLOSID to load into the PQR_ASSOC MSR
104   * - We give up some options in configuring resource groups across multi-socket
105   *   systems.
106   * - Our choices on how to configure each resource become progressively more
107   *   limited as the number of resources grows.
108   */
109  static int closid_free_map;
110  static int closid_free_map_len;
111  
closids_supported(void)112  int closids_supported(void)
113  {
114  	return closid_free_map_len;
115  }
116  
closid_init(void)117  static void closid_init(void)
118  {
119  	struct resctrl_schema *s;
120  	u32 rdt_min_closid = 32;
121  
122  	/* Compute rdt_min_closid across all resources */
123  	list_for_each_entry(s, &resctrl_schema_all, list)
124  		rdt_min_closid = min(rdt_min_closid, s->num_closid);
125  
126  	closid_free_map = BIT_MASK(rdt_min_closid) - 1;
127  
128  	/* CLOSID 0 is always reserved for the default group */
129  	closid_free_map &= ~1;
130  	closid_free_map_len = rdt_min_closid;
131  }
132  
closid_alloc(void)133  static int closid_alloc(void)
134  {
135  	u32 closid = ffs(closid_free_map);
136  
137  	if (closid == 0)
138  		return -ENOSPC;
139  	closid--;
140  	closid_free_map &= ~(1 << closid);
141  
142  	return closid;
143  }
144  
closid_free(int closid)145  void closid_free(int closid)
146  {
147  	closid_free_map |= 1 << closid;
148  }
149  
150  /**
151   * closid_allocated - test if provided closid is in use
152   * @closid: closid to be tested
153   *
154   * Return: true if @closid is currently associated with a resource group,
155   * false if @closid is free
156   */
closid_allocated(unsigned int closid)157  static bool closid_allocated(unsigned int closid)
158  {
159  	return (closid_free_map & (1 << closid)) == 0;
160  }
161  
162  /**
163   * rdtgroup_mode_by_closid - Return mode of resource group with closid
164   * @closid: closid if the resource group
165   *
166   * Each resource group is associated with a @closid. Here the mode
167   * of a resource group can be queried by searching for it using its closid.
168   *
169   * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
170   */
rdtgroup_mode_by_closid(int closid)171  enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
172  {
173  	struct rdtgroup *rdtgrp;
174  
175  	list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
176  		if (rdtgrp->closid == closid)
177  			return rdtgrp->mode;
178  	}
179  
180  	return RDT_NUM_MODES;
181  }
182  
183  static const char * const rdt_mode_str[] = {
184  	[RDT_MODE_SHAREABLE]		= "shareable",
185  	[RDT_MODE_EXCLUSIVE]		= "exclusive",
186  	[RDT_MODE_PSEUDO_LOCKSETUP]	= "pseudo-locksetup",
187  	[RDT_MODE_PSEUDO_LOCKED]	= "pseudo-locked",
188  };
189  
190  /**
191   * rdtgroup_mode_str - Return the string representation of mode
192   * @mode: the resource group mode as &enum rdtgroup_mode
193   *
194   * Return: string representation of valid mode, "unknown" otherwise
195   */
rdtgroup_mode_str(enum rdtgrp_mode mode)196  static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
197  {
198  	if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
199  		return "unknown";
200  
201  	return rdt_mode_str[mode];
202  }
203  
204  /* set uid and gid of rdtgroup dirs and files to that of the creator */
rdtgroup_kn_set_ugid(struct kernfs_node * kn)205  static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
206  {
207  	struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
208  				.ia_uid = current_fsuid(),
209  				.ia_gid = current_fsgid(), };
210  
211  	if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
212  	    gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
213  		return 0;
214  
215  	return kernfs_setattr(kn, &iattr);
216  }
217  
rdtgroup_add_file(struct kernfs_node * parent_kn,struct rftype * rft)218  static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
219  {
220  	struct kernfs_node *kn;
221  	int ret;
222  
223  	kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
224  				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
225  				  0, rft->kf_ops, rft, NULL, NULL);
226  	if (IS_ERR(kn))
227  		return PTR_ERR(kn);
228  
229  	ret = rdtgroup_kn_set_ugid(kn);
230  	if (ret) {
231  		kernfs_remove(kn);
232  		return ret;
233  	}
234  
235  	return 0;
236  }
237  
rdtgroup_seqfile_show(struct seq_file * m,void * arg)238  static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
239  {
240  	struct kernfs_open_file *of = m->private;
241  	struct rftype *rft = of->kn->priv;
242  
243  	if (rft->seq_show)
244  		return rft->seq_show(of, m, arg);
245  	return 0;
246  }
247  
rdtgroup_file_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)248  static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
249  				   size_t nbytes, loff_t off)
250  {
251  	struct rftype *rft = of->kn->priv;
252  
253  	if (rft->write)
254  		return rft->write(of, buf, nbytes, off);
255  
256  	return -EINVAL;
257  }
258  
259  static const struct kernfs_ops rdtgroup_kf_single_ops = {
260  	.atomic_write_len	= PAGE_SIZE,
261  	.write			= rdtgroup_file_write,
262  	.seq_show		= rdtgroup_seqfile_show,
263  };
264  
265  static const struct kernfs_ops kf_mondata_ops = {
266  	.atomic_write_len	= PAGE_SIZE,
267  	.seq_show		= rdtgroup_mondata_show,
268  };
269  
is_cpu_list(struct kernfs_open_file * of)270  static bool is_cpu_list(struct kernfs_open_file *of)
271  {
272  	struct rftype *rft = of->kn->priv;
273  
274  	return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
275  }
276  
rdtgroup_cpus_show(struct kernfs_open_file * of,struct seq_file * s,void * v)277  static int rdtgroup_cpus_show(struct kernfs_open_file *of,
278  			      struct seq_file *s, void *v)
279  {
280  	struct rdtgroup *rdtgrp;
281  	struct cpumask *mask;
282  	int ret = 0;
283  
284  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
285  
286  	if (rdtgrp) {
287  		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
288  			if (!rdtgrp->plr->d) {
289  				rdt_last_cmd_clear();
290  				rdt_last_cmd_puts("Cache domain offline\n");
291  				ret = -ENODEV;
292  			} else {
293  				mask = &rdtgrp->plr->d->cpu_mask;
294  				seq_printf(s, is_cpu_list(of) ?
295  					   "%*pbl\n" : "%*pb\n",
296  					   cpumask_pr_args(mask));
297  			}
298  		} else {
299  			seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
300  				   cpumask_pr_args(&rdtgrp->cpu_mask));
301  		}
302  	} else {
303  		ret = -ENOENT;
304  	}
305  	rdtgroup_kn_unlock(of->kn);
306  
307  	return ret;
308  }
309  
310  /*
311   * This is safe against resctrl_sched_in() called from __switch_to()
312   * because __switch_to() is executed with interrupts disabled. A local call
313   * from update_closid_rmid() is protected against __switch_to() because
314   * preemption is disabled.
315   */
update_cpu_closid_rmid(void * info)316  static void update_cpu_closid_rmid(void *info)
317  {
318  	struct rdtgroup *r = info;
319  
320  	if (r) {
321  		this_cpu_write(pqr_state.default_closid, r->closid);
322  		this_cpu_write(pqr_state.default_rmid, r->mon.rmid);
323  	}
324  
325  	/*
326  	 * We cannot unconditionally write the MSR because the current
327  	 * executing task might have its own closid selected. Just reuse
328  	 * the context switch code.
329  	 */
330  	resctrl_sched_in(current);
331  }
332  
333  /*
334   * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
335   *
336   * Per task closids/rmids must have been set up before calling this function.
337   */
338  static void
update_closid_rmid(const struct cpumask * cpu_mask,struct rdtgroup * r)339  update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
340  {
341  	on_each_cpu_mask(cpu_mask, update_cpu_closid_rmid, r, 1);
342  }
343  
cpus_mon_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask)344  static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
345  			  cpumask_var_t tmpmask)
346  {
347  	struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
348  	struct list_head *head;
349  
350  	/* Check whether cpus belong to parent ctrl group */
351  	cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
352  	if (!cpumask_empty(tmpmask)) {
353  		rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
354  		return -EINVAL;
355  	}
356  
357  	/* Check whether cpus are dropped from this group */
358  	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
359  	if (!cpumask_empty(tmpmask)) {
360  		/* Give any dropped cpus to parent rdtgroup */
361  		cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
362  		update_closid_rmid(tmpmask, prgrp);
363  	}
364  
365  	/*
366  	 * If we added cpus, remove them from previous group that owned them
367  	 * and update per-cpu rmid
368  	 */
369  	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
370  	if (!cpumask_empty(tmpmask)) {
371  		head = &prgrp->mon.crdtgrp_list;
372  		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
373  			if (crgrp == rdtgrp)
374  				continue;
375  			cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
376  				       tmpmask);
377  		}
378  		update_closid_rmid(tmpmask, rdtgrp);
379  	}
380  
381  	/* Done pushing/pulling - update this group with new mask */
382  	cpumask_copy(&rdtgrp->cpu_mask, newmask);
383  
384  	return 0;
385  }
386  
cpumask_rdtgrp_clear(struct rdtgroup * r,struct cpumask * m)387  static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
388  {
389  	struct rdtgroup *crgrp;
390  
391  	cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
392  	/* update the child mon group masks as well*/
393  	list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
394  		cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
395  }
396  
cpus_ctrl_write(struct rdtgroup * rdtgrp,cpumask_var_t newmask,cpumask_var_t tmpmask,cpumask_var_t tmpmask1)397  static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
398  			   cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
399  {
400  	struct rdtgroup *r, *crgrp;
401  	struct list_head *head;
402  
403  	/* Check whether cpus are dropped from this group */
404  	cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
405  	if (!cpumask_empty(tmpmask)) {
406  		/* Can't drop from default group */
407  		if (rdtgrp == &rdtgroup_default) {
408  			rdt_last_cmd_puts("Can't drop CPUs from default group\n");
409  			return -EINVAL;
410  		}
411  
412  		/* Give any dropped cpus to rdtgroup_default */
413  		cpumask_or(&rdtgroup_default.cpu_mask,
414  			   &rdtgroup_default.cpu_mask, tmpmask);
415  		update_closid_rmid(tmpmask, &rdtgroup_default);
416  	}
417  
418  	/*
419  	 * If we added cpus, remove them from previous group and
420  	 * the prev group's child groups that owned them
421  	 * and update per-cpu closid/rmid.
422  	 */
423  	cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
424  	if (!cpumask_empty(tmpmask)) {
425  		list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
426  			if (r == rdtgrp)
427  				continue;
428  			cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
429  			if (!cpumask_empty(tmpmask1))
430  				cpumask_rdtgrp_clear(r, tmpmask1);
431  		}
432  		update_closid_rmid(tmpmask, rdtgrp);
433  	}
434  
435  	/* Done pushing/pulling - update this group with new mask */
436  	cpumask_copy(&rdtgrp->cpu_mask, newmask);
437  
438  	/*
439  	 * Clear child mon group masks since there is a new parent mask
440  	 * now and update the rmid for the cpus the child lost.
441  	 */
442  	head = &rdtgrp->mon.crdtgrp_list;
443  	list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
444  		cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
445  		update_closid_rmid(tmpmask, rdtgrp);
446  		cpumask_clear(&crgrp->cpu_mask);
447  	}
448  
449  	return 0;
450  }
451  
rdtgroup_cpus_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)452  static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
453  				   char *buf, size_t nbytes, loff_t off)
454  {
455  	cpumask_var_t tmpmask, newmask, tmpmask1;
456  	struct rdtgroup *rdtgrp;
457  	int ret;
458  
459  	if (!buf)
460  		return -EINVAL;
461  
462  	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
463  		return -ENOMEM;
464  	if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
465  		free_cpumask_var(tmpmask);
466  		return -ENOMEM;
467  	}
468  	if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
469  		free_cpumask_var(tmpmask);
470  		free_cpumask_var(newmask);
471  		return -ENOMEM;
472  	}
473  
474  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
475  	if (!rdtgrp) {
476  		ret = -ENOENT;
477  		goto unlock;
478  	}
479  
480  	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
481  	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
482  		ret = -EINVAL;
483  		rdt_last_cmd_puts("Pseudo-locking in progress\n");
484  		goto unlock;
485  	}
486  
487  	if (is_cpu_list(of))
488  		ret = cpulist_parse(buf, newmask);
489  	else
490  		ret = cpumask_parse(buf, newmask);
491  
492  	if (ret) {
493  		rdt_last_cmd_puts("Bad CPU list/mask\n");
494  		goto unlock;
495  	}
496  
497  	/* check that user didn't specify any offline cpus */
498  	cpumask_andnot(tmpmask, newmask, cpu_online_mask);
499  	if (!cpumask_empty(tmpmask)) {
500  		ret = -EINVAL;
501  		rdt_last_cmd_puts("Can only assign online CPUs\n");
502  		goto unlock;
503  	}
504  
505  	if (rdtgrp->type == RDTCTRL_GROUP)
506  		ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
507  	else if (rdtgrp->type == RDTMON_GROUP)
508  		ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
509  	else
510  		ret = -EINVAL;
511  
512  unlock:
513  	rdtgroup_kn_unlock(of->kn);
514  	free_cpumask_var(tmpmask);
515  	free_cpumask_var(newmask);
516  	free_cpumask_var(tmpmask1);
517  
518  	return ret ?: nbytes;
519  }
520  
521  /**
522   * rdtgroup_remove - the helper to remove resource group safely
523   * @rdtgrp: resource group to remove
524   *
525   * On resource group creation via a mkdir, an extra kernfs_node reference is
526   * taken to ensure that the rdtgroup structure remains accessible for the
527   * rdtgroup_kn_unlock() calls where it is removed.
528   *
529   * Drop the extra reference here, then free the rdtgroup structure.
530   *
531   * Return: void
532   */
rdtgroup_remove(struct rdtgroup * rdtgrp)533  static void rdtgroup_remove(struct rdtgroup *rdtgrp)
534  {
535  	kernfs_put(rdtgrp->kn);
536  	kfree(rdtgrp);
537  }
538  
_update_task_closid_rmid(void * task)539  static void _update_task_closid_rmid(void *task)
540  {
541  	/*
542  	 * If the task is still current on this CPU, update PQR_ASSOC MSR.
543  	 * Otherwise, the MSR is updated when the task is scheduled in.
544  	 */
545  	if (task == current)
546  		resctrl_sched_in(task);
547  }
548  
update_task_closid_rmid(struct task_struct * t)549  static void update_task_closid_rmid(struct task_struct *t)
550  {
551  	if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
552  		smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
553  	else
554  		_update_task_closid_rmid(t);
555  }
556  
__rdtgroup_move_task(struct task_struct * tsk,struct rdtgroup * rdtgrp)557  static int __rdtgroup_move_task(struct task_struct *tsk,
558  				struct rdtgroup *rdtgrp)
559  {
560  	/* If the task is already in rdtgrp, no need to move the task. */
561  	if ((rdtgrp->type == RDTCTRL_GROUP && tsk->closid == rdtgrp->closid &&
562  	     tsk->rmid == rdtgrp->mon.rmid) ||
563  	    (rdtgrp->type == RDTMON_GROUP && tsk->rmid == rdtgrp->mon.rmid &&
564  	     tsk->closid == rdtgrp->mon.parent->closid))
565  		return 0;
566  
567  	/*
568  	 * Set the task's closid/rmid before the PQR_ASSOC MSR can be
569  	 * updated by them.
570  	 *
571  	 * For ctrl_mon groups, move both closid and rmid.
572  	 * For monitor groups, can move the tasks only from
573  	 * their parent CTRL group.
574  	 */
575  
576  	if (rdtgrp->type == RDTCTRL_GROUP) {
577  		WRITE_ONCE(tsk->closid, rdtgrp->closid);
578  		WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
579  	} else if (rdtgrp->type == RDTMON_GROUP) {
580  		if (rdtgrp->mon.parent->closid == tsk->closid) {
581  			WRITE_ONCE(tsk->rmid, rdtgrp->mon.rmid);
582  		} else {
583  			rdt_last_cmd_puts("Can't move task to different control group\n");
584  			return -EINVAL;
585  		}
586  	}
587  
588  	/*
589  	 * Ensure the task's closid and rmid are written before determining if
590  	 * the task is current that will decide if it will be interrupted.
591  	 * This pairs with the full barrier between the rq->curr update and
592  	 * resctrl_sched_in() during context switch.
593  	 */
594  	smp_mb();
595  
596  	/*
597  	 * By now, the task's closid and rmid are set. If the task is current
598  	 * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
599  	 * group go into effect. If the task is not current, the MSR will be
600  	 * updated when the task is scheduled in.
601  	 */
602  	update_task_closid_rmid(tsk);
603  
604  	return 0;
605  }
606  
is_closid_match(struct task_struct * t,struct rdtgroup * r)607  static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
608  {
609  	return (rdt_alloc_capable &&
610  	       (r->type == RDTCTRL_GROUP) && (t->closid == r->closid));
611  }
612  
is_rmid_match(struct task_struct * t,struct rdtgroup * r)613  static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
614  {
615  	return (rdt_mon_capable &&
616  	       (r->type == RDTMON_GROUP) && (t->rmid == r->mon.rmid));
617  }
618  
619  /**
620   * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
621   * @r: Resource group
622   *
623   * Return: 1 if tasks have been assigned to @r, 0 otherwise
624   */
rdtgroup_tasks_assigned(struct rdtgroup * r)625  int rdtgroup_tasks_assigned(struct rdtgroup *r)
626  {
627  	struct task_struct *p, *t;
628  	int ret = 0;
629  
630  	lockdep_assert_held(&rdtgroup_mutex);
631  
632  	rcu_read_lock();
633  	for_each_process_thread(p, t) {
634  		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
635  			ret = 1;
636  			break;
637  		}
638  	}
639  	rcu_read_unlock();
640  
641  	return ret;
642  }
643  
rdtgroup_task_write_permission(struct task_struct * task,struct kernfs_open_file * of)644  static int rdtgroup_task_write_permission(struct task_struct *task,
645  					  struct kernfs_open_file *of)
646  {
647  	const struct cred *tcred = get_task_cred(task);
648  	const struct cred *cred = current_cred();
649  	int ret = 0;
650  
651  	/*
652  	 * Even if we're attaching all tasks in the thread group, we only
653  	 * need to check permissions on one of them.
654  	 */
655  	if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
656  	    !uid_eq(cred->euid, tcred->uid) &&
657  	    !uid_eq(cred->euid, tcred->suid)) {
658  		rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
659  		ret = -EPERM;
660  	}
661  
662  	put_cred(tcred);
663  	return ret;
664  }
665  
rdtgroup_move_task(pid_t pid,struct rdtgroup * rdtgrp,struct kernfs_open_file * of)666  static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
667  			      struct kernfs_open_file *of)
668  {
669  	struct task_struct *tsk;
670  	int ret;
671  
672  	rcu_read_lock();
673  	if (pid) {
674  		tsk = find_task_by_vpid(pid);
675  		if (!tsk) {
676  			rcu_read_unlock();
677  			rdt_last_cmd_printf("No task %d\n", pid);
678  			return -ESRCH;
679  		}
680  	} else {
681  		tsk = current;
682  	}
683  
684  	get_task_struct(tsk);
685  	rcu_read_unlock();
686  
687  	ret = rdtgroup_task_write_permission(tsk, of);
688  	if (!ret)
689  		ret = __rdtgroup_move_task(tsk, rdtgrp);
690  
691  	put_task_struct(tsk);
692  	return ret;
693  }
694  
rdtgroup_tasks_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)695  static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
696  				    char *buf, size_t nbytes, loff_t off)
697  {
698  	struct rdtgroup *rdtgrp;
699  	int ret = 0;
700  	pid_t pid;
701  
702  	if (kstrtoint(strstrip(buf), 0, &pid) || pid < 0)
703  		return -EINVAL;
704  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
705  	if (!rdtgrp) {
706  		rdtgroup_kn_unlock(of->kn);
707  		return -ENOENT;
708  	}
709  	rdt_last_cmd_clear();
710  
711  	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
712  	    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
713  		ret = -EINVAL;
714  		rdt_last_cmd_puts("Pseudo-locking in progress\n");
715  		goto unlock;
716  	}
717  
718  	ret = rdtgroup_move_task(pid, rdtgrp, of);
719  
720  unlock:
721  	rdtgroup_kn_unlock(of->kn);
722  
723  	return ret ?: nbytes;
724  }
725  
show_rdt_tasks(struct rdtgroup * r,struct seq_file * s)726  static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
727  {
728  	struct task_struct *p, *t;
729  	pid_t pid;
730  
731  	rcu_read_lock();
732  	for_each_process_thread(p, t) {
733  		if (is_closid_match(t, r) || is_rmid_match(t, r)) {
734  			pid = task_pid_vnr(t);
735  			if (pid)
736  				seq_printf(s, "%d\n", pid);
737  		}
738  	}
739  	rcu_read_unlock();
740  }
741  
rdtgroup_tasks_show(struct kernfs_open_file * of,struct seq_file * s,void * v)742  static int rdtgroup_tasks_show(struct kernfs_open_file *of,
743  			       struct seq_file *s, void *v)
744  {
745  	struct rdtgroup *rdtgrp;
746  	int ret = 0;
747  
748  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
749  	if (rdtgrp)
750  		show_rdt_tasks(rdtgrp, s);
751  	else
752  		ret = -ENOENT;
753  	rdtgroup_kn_unlock(of->kn);
754  
755  	return ret;
756  }
757  
758  #ifdef CONFIG_PROC_CPU_RESCTRL
759  
760  /*
761   * A task can only be part of one resctrl control group and of one monitor
762   * group which is associated to that control group.
763   *
764   * 1)   res:
765   *      mon:
766   *
767   *    resctrl is not available.
768   *
769   * 2)   res:/
770   *      mon:
771   *
772   *    Task is part of the root resctrl control group, and it is not associated
773   *    to any monitor group.
774   *
775   * 3)  res:/
776   *     mon:mon0
777   *
778   *    Task is part of the root resctrl control group and monitor group mon0.
779   *
780   * 4)  res:group0
781   *     mon:
782   *
783   *    Task is part of resctrl control group group0, and it is not associated
784   *    to any monitor group.
785   *
786   * 5) res:group0
787   *    mon:mon1
788   *
789   *    Task is part of resctrl control group group0 and monitor group mon1.
790   */
proc_resctrl_show(struct seq_file * s,struct pid_namespace * ns,struct pid * pid,struct task_struct * tsk)791  int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
792  		      struct pid *pid, struct task_struct *tsk)
793  {
794  	struct rdtgroup *rdtg;
795  	int ret = 0;
796  
797  	mutex_lock(&rdtgroup_mutex);
798  
799  	/* Return empty if resctrl has not been mounted. */
800  	if (!static_branch_unlikely(&rdt_enable_key)) {
801  		seq_puts(s, "res:\nmon:\n");
802  		goto unlock;
803  	}
804  
805  	list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
806  		struct rdtgroup *crg;
807  
808  		/*
809  		 * Task information is only relevant for shareable
810  		 * and exclusive groups.
811  		 */
812  		if (rdtg->mode != RDT_MODE_SHAREABLE &&
813  		    rdtg->mode != RDT_MODE_EXCLUSIVE)
814  			continue;
815  
816  		if (rdtg->closid != tsk->closid)
817  			continue;
818  
819  		seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
820  			   rdtg->kn->name);
821  		seq_puts(s, "mon:");
822  		list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
823  				    mon.crdtgrp_list) {
824  			if (tsk->rmid != crg->mon.rmid)
825  				continue;
826  			seq_printf(s, "%s", crg->kn->name);
827  			break;
828  		}
829  		seq_putc(s, '\n');
830  		goto unlock;
831  	}
832  	/*
833  	 * The above search should succeed. Otherwise return
834  	 * with an error.
835  	 */
836  	ret = -ENOENT;
837  unlock:
838  	mutex_unlock(&rdtgroup_mutex);
839  
840  	return ret;
841  }
842  #endif
843  
rdt_last_cmd_status_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)844  static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
845  				    struct seq_file *seq, void *v)
846  {
847  	int len;
848  
849  	mutex_lock(&rdtgroup_mutex);
850  	len = seq_buf_used(&last_cmd_status);
851  	if (len)
852  		seq_printf(seq, "%.*s", len, last_cmd_status_buf);
853  	else
854  		seq_puts(seq, "ok\n");
855  	mutex_unlock(&rdtgroup_mutex);
856  	return 0;
857  }
858  
rdt_num_closids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)859  static int rdt_num_closids_show(struct kernfs_open_file *of,
860  				struct seq_file *seq, void *v)
861  {
862  	struct resctrl_schema *s = of->kn->parent->priv;
863  
864  	seq_printf(seq, "%u\n", s->num_closid);
865  	return 0;
866  }
867  
rdt_default_ctrl_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)868  static int rdt_default_ctrl_show(struct kernfs_open_file *of,
869  			     struct seq_file *seq, void *v)
870  {
871  	struct resctrl_schema *s = of->kn->parent->priv;
872  	struct rdt_resource *r = s->res;
873  
874  	seq_printf(seq, "%x\n", r->default_ctrl);
875  	return 0;
876  }
877  
rdt_min_cbm_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)878  static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
879  			     struct seq_file *seq, void *v)
880  {
881  	struct resctrl_schema *s = of->kn->parent->priv;
882  	struct rdt_resource *r = s->res;
883  
884  	seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
885  	return 0;
886  }
887  
rdt_shareable_bits_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)888  static int rdt_shareable_bits_show(struct kernfs_open_file *of,
889  				   struct seq_file *seq, void *v)
890  {
891  	struct resctrl_schema *s = of->kn->parent->priv;
892  	struct rdt_resource *r = s->res;
893  
894  	seq_printf(seq, "%x\n", r->cache.shareable_bits);
895  	return 0;
896  }
897  
898  /**
899   * rdt_bit_usage_show - Display current usage of resources
900   *
901   * A domain is a shared resource that can now be allocated differently. Here
902   * we display the current regions of the domain as an annotated bitmask.
903   * For each domain of this resource its allocation bitmask
904   * is annotated as below to indicate the current usage of the corresponding bit:
905   *   0 - currently unused
906   *   X - currently available for sharing and used by software and hardware
907   *   H - currently used by hardware only but available for software use
908   *   S - currently used and shareable by software only
909   *   E - currently used exclusively by one resource group
910   *   P - currently pseudo-locked by one resource group
911   */
rdt_bit_usage_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)912  static int rdt_bit_usage_show(struct kernfs_open_file *of,
913  			      struct seq_file *seq, void *v)
914  {
915  	struct resctrl_schema *s = of->kn->parent->priv;
916  	/*
917  	 * Use unsigned long even though only 32 bits are used to ensure
918  	 * test_bit() is used safely.
919  	 */
920  	unsigned long sw_shareable = 0, hw_shareable = 0;
921  	unsigned long exclusive = 0, pseudo_locked = 0;
922  	struct rdt_resource *r = s->res;
923  	struct rdt_domain *dom;
924  	int i, hwb, swb, excl, psl;
925  	enum rdtgrp_mode mode;
926  	bool sep = false;
927  	u32 ctrl_val;
928  
929  	mutex_lock(&rdtgroup_mutex);
930  	hw_shareable = r->cache.shareable_bits;
931  	list_for_each_entry(dom, &r->domains, list) {
932  		if (sep)
933  			seq_putc(seq, ';');
934  		sw_shareable = 0;
935  		exclusive = 0;
936  		seq_printf(seq, "%d=", dom->id);
937  		for (i = 0; i < closids_supported(); i++) {
938  			if (!closid_allocated(i))
939  				continue;
940  			ctrl_val = resctrl_arch_get_config(r, dom, i,
941  							   s->conf_type);
942  			mode = rdtgroup_mode_by_closid(i);
943  			switch (mode) {
944  			case RDT_MODE_SHAREABLE:
945  				sw_shareable |= ctrl_val;
946  				break;
947  			case RDT_MODE_EXCLUSIVE:
948  				exclusive |= ctrl_val;
949  				break;
950  			case RDT_MODE_PSEUDO_LOCKSETUP:
951  			/*
952  			 * RDT_MODE_PSEUDO_LOCKSETUP is possible
953  			 * here but not included since the CBM
954  			 * associated with this CLOSID in this mode
955  			 * is not initialized and no task or cpu can be
956  			 * assigned this CLOSID.
957  			 */
958  				break;
959  			case RDT_MODE_PSEUDO_LOCKED:
960  			case RDT_NUM_MODES:
961  				WARN(1,
962  				     "invalid mode for closid %d\n", i);
963  				break;
964  			}
965  		}
966  		for (i = r->cache.cbm_len - 1; i >= 0; i--) {
967  			pseudo_locked = dom->plr ? dom->plr->cbm : 0;
968  			hwb = test_bit(i, &hw_shareable);
969  			swb = test_bit(i, &sw_shareable);
970  			excl = test_bit(i, &exclusive);
971  			psl = test_bit(i, &pseudo_locked);
972  			if (hwb && swb)
973  				seq_putc(seq, 'X');
974  			else if (hwb && !swb)
975  				seq_putc(seq, 'H');
976  			else if (!hwb && swb)
977  				seq_putc(seq, 'S');
978  			else if (excl)
979  				seq_putc(seq, 'E');
980  			else if (psl)
981  				seq_putc(seq, 'P');
982  			else /* Unused bits remain */
983  				seq_putc(seq, '0');
984  		}
985  		sep = true;
986  	}
987  	seq_putc(seq, '\n');
988  	mutex_unlock(&rdtgroup_mutex);
989  	return 0;
990  }
991  
rdt_min_bw_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)992  static int rdt_min_bw_show(struct kernfs_open_file *of,
993  			     struct seq_file *seq, void *v)
994  {
995  	struct resctrl_schema *s = of->kn->parent->priv;
996  	struct rdt_resource *r = s->res;
997  
998  	seq_printf(seq, "%u\n", r->membw.min_bw);
999  	return 0;
1000  }
1001  
rdt_num_rmids_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1002  static int rdt_num_rmids_show(struct kernfs_open_file *of,
1003  			      struct seq_file *seq, void *v)
1004  {
1005  	struct rdt_resource *r = of->kn->parent->priv;
1006  
1007  	seq_printf(seq, "%d\n", r->num_rmid);
1008  
1009  	return 0;
1010  }
1011  
rdt_mon_features_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1012  static int rdt_mon_features_show(struct kernfs_open_file *of,
1013  				 struct seq_file *seq, void *v)
1014  {
1015  	struct rdt_resource *r = of->kn->parent->priv;
1016  	struct mon_evt *mevt;
1017  
1018  	list_for_each_entry(mevt, &r->evt_list, list) {
1019  		seq_printf(seq, "%s\n", mevt->name);
1020  		if (mevt->configurable)
1021  			seq_printf(seq, "%s_config\n", mevt->name);
1022  	}
1023  
1024  	return 0;
1025  }
1026  
rdt_bw_gran_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1027  static int rdt_bw_gran_show(struct kernfs_open_file *of,
1028  			     struct seq_file *seq, void *v)
1029  {
1030  	struct resctrl_schema *s = of->kn->parent->priv;
1031  	struct rdt_resource *r = s->res;
1032  
1033  	seq_printf(seq, "%u\n", r->membw.bw_gran);
1034  	return 0;
1035  }
1036  
rdt_delay_linear_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1037  static int rdt_delay_linear_show(struct kernfs_open_file *of,
1038  			     struct seq_file *seq, void *v)
1039  {
1040  	struct resctrl_schema *s = of->kn->parent->priv;
1041  	struct rdt_resource *r = s->res;
1042  
1043  	seq_printf(seq, "%u\n", r->membw.delay_linear);
1044  	return 0;
1045  }
1046  
max_threshold_occ_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1047  static int max_threshold_occ_show(struct kernfs_open_file *of,
1048  				  struct seq_file *seq, void *v)
1049  {
1050  	seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
1051  
1052  	return 0;
1053  }
1054  
rdt_thread_throttle_mode_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1055  static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
1056  					 struct seq_file *seq, void *v)
1057  {
1058  	struct resctrl_schema *s = of->kn->parent->priv;
1059  	struct rdt_resource *r = s->res;
1060  
1061  	if (r->membw.throttle_mode == THREAD_THROTTLE_PER_THREAD)
1062  		seq_puts(seq, "per-thread\n");
1063  	else
1064  		seq_puts(seq, "max\n");
1065  
1066  	return 0;
1067  }
1068  
max_threshold_occ_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1069  static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
1070  				       char *buf, size_t nbytes, loff_t off)
1071  {
1072  	unsigned int bytes;
1073  	int ret;
1074  
1075  	ret = kstrtouint(buf, 0, &bytes);
1076  	if (ret)
1077  		return ret;
1078  
1079  	if (bytes > resctrl_rmid_realloc_limit)
1080  		return -EINVAL;
1081  
1082  	resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
1083  
1084  	return nbytes;
1085  }
1086  
1087  /*
1088   * rdtgroup_mode_show - Display mode of this resource group
1089   */
rdtgroup_mode_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1090  static int rdtgroup_mode_show(struct kernfs_open_file *of,
1091  			      struct seq_file *s, void *v)
1092  {
1093  	struct rdtgroup *rdtgrp;
1094  
1095  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1096  	if (!rdtgrp) {
1097  		rdtgroup_kn_unlock(of->kn);
1098  		return -ENOENT;
1099  	}
1100  
1101  	seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
1102  
1103  	rdtgroup_kn_unlock(of->kn);
1104  	return 0;
1105  }
1106  
resctrl_peer_type(enum resctrl_conf_type my_type)1107  static enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
1108  {
1109  	switch (my_type) {
1110  	case CDP_CODE:
1111  		return CDP_DATA;
1112  	case CDP_DATA:
1113  		return CDP_CODE;
1114  	default:
1115  	case CDP_NONE:
1116  		return CDP_NONE;
1117  	}
1118  }
1119  
1120  /**
1121   * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
1122   * @r: Resource to which domain instance @d belongs.
1123   * @d: The domain instance for which @closid is being tested.
1124   * @cbm: Capacity bitmask being tested.
1125   * @closid: Intended closid for @cbm.
1126   * @exclusive: Only check if overlaps with exclusive resource groups
1127   *
1128   * Checks if provided @cbm intended to be used for @closid on domain
1129   * @d overlaps with any other closids or other hardware usage associated
1130   * with this domain. If @exclusive is true then only overlaps with
1131   * resource groups in exclusive mode will be considered. If @exclusive
1132   * is false then overlaps with any resource group or hardware entities
1133   * will be considered.
1134   *
1135   * @cbm is unsigned long, even if only 32 bits are used, to make the
1136   * bitmap functions work correctly.
1137   *
1138   * Return: false if CBM does not overlap, true if it does.
1139   */
__rdtgroup_cbm_overlaps(struct rdt_resource * r,struct rdt_domain * d,unsigned long cbm,int closid,enum resctrl_conf_type type,bool exclusive)1140  static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_domain *d,
1141  				    unsigned long cbm, int closid,
1142  				    enum resctrl_conf_type type, bool exclusive)
1143  {
1144  	enum rdtgrp_mode mode;
1145  	unsigned long ctrl_b;
1146  	int i;
1147  
1148  	/* Check for any overlap with regions used by hardware directly */
1149  	if (!exclusive) {
1150  		ctrl_b = r->cache.shareable_bits;
1151  		if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
1152  			return true;
1153  	}
1154  
1155  	/* Check for overlap with other resource groups */
1156  	for (i = 0; i < closids_supported(); i++) {
1157  		ctrl_b = resctrl_arch_get_config(r, d, i, type);
1158  		mode = rdtgroup_mode_by_closid(i);
1159  		if (closid_allocated(i) && i != closid &&
1160  		    mode != RDT_MODE_PSEUDO_LOCKSETUP) {
1161  			if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
1162  				if (exclusive) {
1163  					if (mode == RDT_MODE_EXCLUSIVE)
1164  						return true;
1165  					continue;
1166  				}
1167  				return true;
1168  			}
1169  		}
1170  	}
1171  
1172  	return false;
1173  }
1174  
1175  /**
1176   * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
1177   * @s: Schema for the resource to which domain instance @d belongs.
1178   * @d: The domain instance for which @closid is being tested.
1179   * @cbm: Capacity bitmask being tested.
1180   * @closid: Intended closid for @cbm.
1181   * @exclusive: Only check if overlaps with exclusive resource groups
1182   *
1183   * Resources that can be allocated using a CBM can use the CBM to control
1184   * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
1185   * for overlap. Overlap test is not limited to the specific resource for
1186   * which the CBM is intended though - when dealing with CDP resources that
1187   * share the underlying hardware the overlap check should be performed on
1188   * the CDP resource sharing the hardware also.
1189   *
1190   * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
1191   * overlap test.
1192   *
1193   * Return: true if CBM overlap detected, false if there is no overlap
1194   */
rdtgroup_cbm_overlaps(struct resctrl_schema * s,struct rdt_domain * d,unsigned long cbm,int closid,bool exclusive)1195  bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_domain *d,
1196  			   unsigned long cbm, int closid, bool exclusive)
1197  {
1198  	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
1199  	struct rdt_resource *r = s->res;
1200  
1201  	if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
1202  				    exclusive))
1203  		return true;
1204  
1205  	if (!resctrl_arch_get_cdp_enabled(r->rid))
1206  		return false;
1207  	return  __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
1208  }
1209  
1210  /**
1211   * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
1212   *
1213   * An exclusive resource group implies that there should be no sharing of
1214   * its allocated resources. At the time this group is considered to be
1215   * exclusive this test can determine if its current schemata supports this
1216   * setting by testing for overlap with all other resource groups.
1217   *
1218   * Return: true if resource group can be exclusive, false if there is overlap
1219   * with allocations of other resource groups and thus this resource group
1220   * cannot be exclusive.
1221   */
rdtgroup_mode_test_exclusive(struct rdtgroup * rdtgrp)1222  static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
1223  {
1224  	int closid = rdtgrp->closid;
1225  	struct resctrl_schema *s;
1226  	struct rdt_resource *r;
1227  	bool has_cache = false;
1228  	struct rdt_domain *d;
1229  	u32 ctrl;
1230  
1231  	list_for_each_entry(s, &resctrl_schema_all, list) {
1232  		r = s->res;
1233  		if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
1234  			continue;
1235  		has_cache = true;
1236  		list_for_each_entry(d, &r->domains, list) {
1237  			ctrl = resctrl_arch_get_config(r, d, closid,
1238  						       s->conf_type);
1239  			if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
1240  				rdt_last_cmd_puts("Schemata overlaps\n");
1241  				return false;
1242  			}
1243  		}
1244  	}
1245  
1246  	if (!has_cache) {
1247  		rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
1248  		return false;
1249  	}
1250  
1251  	return true;
1252  }
1253  
1254  /**
1255   * rdtgroup_mode_write - Modify the resource group's mode
1256   *
1257   */
rdtgroup_mode_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1258  static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
1259  				   char *buf, size_t nbytes, loff_t off)
1260  {
1261  	struct rdtgroup *rdtgrp;
1262  	enum rdtgrp_mode mode;
1263  	int ret = 0;
1264  
1265  	/* Valid input requires a trailing newline */
1266  	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1267  		return -EINVAL;
1268  	buf[nbytes - 1] = '\0';
1269  
1270  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1271  	if (!rdtgrp) {
1272  		rdtgroup_kn_unlock(of->kn);
1273  		return -ENOENT;
1274  	}
1275  
1276  	rdt_last_cmd_clear();
1277  
1278  	mode = rdtgrp->mode;
1279  
1280  	if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
1281  	    (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
1282  	    (!strcmp(buf, "pseudo-locksetup") &&
1283  	     mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
1284  	    (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
1285  		goto out;
1286  
1287  	if (mode == RDT_MODE_PSEUDO_LOCKED) {
1288  		rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
1289  		ret = -EINVAL;
1290  		goto out;
1291  	}
1292  
1293  	if (!strcmp(buf, "shareable")) {
1294  		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1295  			ret = rdtgroup_locksetup_exit(rdtgrp);
1296  			if (ret)
1297  				goto out;
1298  		}
1299  		rdtgrp->mode = RDT_MODE_SHAREABLE;
1300  	} else if (!strcmp(buf, "exclusive")) {
1301  		if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
1302  			ret = -EINVAL;
1303  			goto out;
1304  		}
1305  		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1306  			ret = rdtgroup_locksetup_exit(rdtgrp);
1307  			if (ret)
1308  				goto out;
1309  		}
1310  		rdtgrp->mode = RDT_MODE_EXCLUSIVE;
1311  	} else if (!strcmp(buf, "pseudo-locksetup")) {
1312  		ret = rdtgroup_locksetup_enter(rdtgrp);
1313  		if (ret)
1314  			goto out;
1315  		rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
1316  	} else {
1317  		rdt_last_cmd_puts("Unknown or unsupported mode\n");
1318  		ret = -EINVAL;
1319  	}
1320  
1321  out:
1322  	rdtgroup_kn_unlock(of->kn);
1323  	return ret ?: nbytes;
1324  }
1325  
1326  /**
1327   * rdtgroup_cbm_to_size - Translate CBM to size in bytes
1328   * @r: RDT resource to which @d belongs.
1329   * @d: RDT domain instance.
1330   * @cbm: bitmask for which the size should be computed.
1331   *
1332   * The bitmask provided associated with the RDT domain instance @d will be
1333   * translated into how many bytes it represents. The size in bytes is
1334   * computed by first dividing the total cache size by the CBM length to
1335   * determine how many bytes each bit in the bitmask represents. The result
1336   * is multiplied with the number of bits set in the bitmask.
1337   *
1338   * @cbm is unsigned long, even if only 32 bits are used to make the
1339   * bitmap functions work correctly.
1340   */
rdtgroup_cbm_to_size(struct rdt_resource * r,struct rdt_domain * d,unsigned long cbm)1341  unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
1342  				  struct rdt_domain *d, unsigned long cbm)
1343  {
1344  	struct cpu_cacheinfo *ci;
1345  	unsigned int size = 0;
1346  	int num_b, i;
1347  
1348  	num_b = bitmap_weight(&cbm, r->cache.cbm_len);
1349  	ci = get_cpu_cacheinfo(cpumask_any(&d->cpu_mask));
1350  	for (i = 0; i < ci->num_leaves; i++) {
1351  		if (ci->info_list[i].level == r->cache_level) {
1352  			size = ci->info_list[i].size / r->cache.cbm_len * num_b;
1353  			break;
1354  		}
1355  	}
1356  
1357  	return size;
1358  }
1359  
1360  /**
1361   * rdtgroup_size_show - Display size in bytes of allocated regions
1362   *
1363   * The "size" file mirrors the layout of the "schemata" file, printing the
1364   * size in bytes of each region instead of the capacity bitmask.
1365   *
1366   */
rdtgroup_size_show(struct kernfs_open_file * of,struct seq_file * s,void * v)1367  static int rdtgroup_size_show(struct kernfs_open_file *of,
1368  			      struct seq_file *s, void *v)
1369  {
1370  	struct resctrl_schema *schema;
1371  	enum resctrl_conf_type type;
1372  	struct rdtgroup *rdtgrp;
1373  	struct rdt_resource *r;
1374  	struct rdt_domain *d;
1375  	unsigned int size;
1376  	int ret = 0;
1377  	u32 closid;
1378  	bool sep;
1379  	u32 ctrl;
1380  
1381  	rdtgrp = rdtgroup_kn_lock_live(of->kn);
1382  	if (!rdtgrp) {
1383  		rdtgroup_kn_unlock(of->kn);
1384  		return -ENOENT;
1385  	}
1386  
1387  	if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
1388  		if (!rdtgrp->plr->d) {
1389  			rdt_last_cmd_clear();
1390  			rdt_last_cmd_puts("Cache domain offline\n");
1391  			ret = -ENODEV;
1392  		} else {
1393  			seq_printf(s, "%*s:", max_name_width,
1394  				   rdtgrp->plr->s->name);
1395  			size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
1396  						    rdtgrp->plr->d,
1397  						    rdtgrp->plr->cbm);
1398  			seq_printf(s, "%d=%u\n", rdtgrp->plr->d->id, size);
1399  		}
1400  		goto out;
1401  	}
1402  
1403  	closid = rdtgrp->closid;
1404  
1405  	list_for_each_entry(schema, &resctrl_schema_all, list) {
1406  		r = schema->res;
1407  		type = schema->conf_type;
1408  		sep = false;
1409  		seq_printf(s, "%*s:", max_name_width, schema->name);
1410  		list_for_each_entry(d, &r->domains, list) {
1411  			if (sep)
1412  				seq_putc(s, ';');
1413  			if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
1414  				size = 0;
1415  			} else {
1416  				if (is_mba_sc(r))
1417  					ctrl = d->mbps_val[closid];
1418  				else
1419  					ctrl = resctrl_arch_get_config(r, d,
1420  								       closid,
1421  								       type);
1422  				if (r->rid == RDT_RESOURCE_MBA ||
1423  				    r->rid == RDT_RESOURCE_SMBA)
1424  					size = ctrl;
1425  				else
1426  					size = rdtgroup_cbm_to_size(r, d, ctrl);
1427  			}
1428  			seq_printf(s, "%d=%u", d->id, size);
1429  			sep = true;
1430  		}
1431  		seq_putc(s, '\n');
1432  	}
1433  
1434  out:
1435  	rdtgroup_kn_unlock(of->kn);
1436  
1437  	return ret;
1438  }
1439  
1440  struct mon_config_info {
1441  	u32 evtid;
1442  	u32 mon_config;
1443  };
1444  
1445  #define INVALID_CONFIG_INDEX   UINT_MAX
1446  
1447  /**
1448   * mon_event_config_index_get - get the hardware index for the
1449   *                              configurable event
1450   * @evtid: event id.
1451   *
1452   * Return: 0 for evtid == QOS_L3_MBM_TOTAL_EVENT_ID
1453   *         1 for evtid == QOS_L3_MBM_LOCAL_EVENT_ID
1454   *         INVALID_CONFIG_INDEX for invalid evtid
1455   */
mon_event_config_index_get(u32 evtid)1456  static inline unsigned int mon_event_config_index_get(u32 evtid)
1457  {
1458  	switch (evtid) {
1459  	case QOS_L3_MBM_TOTAL_EVENT_ID:
1460  		return 0;
1461  	case QOS_L3_MBM_LOCAL_EVENT_ID:
1462  		return 1;
1463  	default:
1464  		/* Should never reach here */
1465  		return INVALID_CONFIG_INDEX;
1466  	}
1467  }
1468  
mon_event_config_read(void * info)1469  static void mon_event_config_read(void *info)
1470  {
1471  	struct mon_config_info *mon_info = info;
1472  	unsigned int index;
1473  	u64 msrval;
1474  
1475  	index = mon_event_config_index_get(mon_info->evtid);
1476  	if (index == INVALID_CONFIG_INDEX) {
1477  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1478  		return;
1479  	}
1480  	rdmsrl(MSR_IA32_EVT_CFG_BASE + index, msrval);
1481  
1482  	/* Report only the valid event configuration bits */
1483  	mon_info->mon_config = msrval & MAX_EVT_CONFIG_BITS;
1484  }
1485  
mondata_config_read(struct rdt_domain * d,struct mon_config_info * mon_info)1486  static void mondata_config_read(struct rdt_domain *d, struct mon_config_info *mon_info)
1487  {
1488  	smp_call_function_any(&d->cpu_mask, mon_event_config_read, mon_info, 1);
1489  }
1490  
mbm_config_show(struct seq_file * s,struct rdt_resource * r,u32 evtid)1491  static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
1492  {
1493  	struct mon_config_info mon_info = {0};
1494  	struct rdt_domain *dom;
1495  	bool sep = false;
1496  
1497  	mutex_lock(&rdtgroup_mutex);
1498  
1499  	list_for_each_entry(dom, &r->domains, list) {
1500  		if (sep)
1501  			seq_puts(s, ";");
1502  
1503  		memset(&mon_info, 0, sizeof(struct mon_config_info));
1504  		mon_info.evtid = evtid;
1505  		mondata_config_read(dom, &mon_info);
1506  
1507  		seq_printf(s, "%d=0x%02x", dom->id, mon_info.mon_config);
1508  		sep = true;
1509  	}
1510  	seq_puts(s, "\n");
1511  
1512  	mutex_unlock(&rdtgroup_mutex);
1513  
1514  	return 0;
1515  }
1516  
mbm_total_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1517  static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
1518  				       struct seq_file *seq, void *v)
1519  {
1520  	struct rdt_resource *r = of->kn->parent->priv;
1521  
1522  	mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
1523  
1524  	return 0;
1525  }
1526  
mbm_local_bytes_config_show(struct kernfs_open_file * of,struct seq_file * seq,void * v)1527  static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
1528  				       struct seq_file *seq, void *v)
1529  {
1530  	struct rdt_resource *r = of->kn->parent->priv;
1531  
1532  	mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
1533  
1534  	return 0;
1535  }
1536  
mon_event_config_write(void * info)1537  static void mon_event_config_write(void *info)
1538  {
1539  	struct mon_config_info *mon_info = info;
1540  	unsigned int index;
1541  
1542  	index = mon_event_config_index_get(mon_info->evtid);
1543  	if (index == INVALID_CONFIG_INDEX) {
1544  		pr_warn_once("Invalid event id %d\n", mon_info->evtid);
1545  		return;
1546  	}
1547  	wrmsr(MSR_IA32_EVT_CFG_BASE + index, mon_info->mon_config, 0);
1548  }
1549  
mbm_config_write_domain(struct rdt_resource * r,struct rdt_domain * d,u32 evtid,u32 val)1550  static int mbm_config_write_domain(struct rdt_resource *r,
1551  				   struct rdt_domain *d, u32 evtid, u32 val)
1552  {
1553  	struct mon_config_info mon_info = {0};
1554  	int ret = 0;
1555  
1556  	/*
1557  	 * Read the current config value first. If both are the same then
1558  	 * no need to write it again.
1559  	 */
1560  	mon_info.evtid = evtid;
1561  	mondata_config_read(d, &mon_info);
1562  	if (mon_info.mon_config == val)
1563  		goto out;
1564  
1565  	mon_info.mon_config = val;
1566  
1567  	/*
1568  	 * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
1569  	 * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
1570  	 * are scoped at the domain level. Writing any of these MSRs
1571  	 * on one CPU is observed by all the CPUs in the domain.
1572  	 */
1573  	smp_call_function_any(&d->cpu_mask, mon_event_config_write,
1574  			      &mon_info, 1);
1575  
1576  	/*
1577  	 * When an Event Configuration is changed, the bandwidth counters
1578  	 * for all RMIDs and Events will be cleared by the hardware. The
1579  	 * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
1580  	 * every RMID on the next read to any event for every RMID.
1581  	 * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
1582  	 * cleared while it is tracked by the hardware. Clear the
1583  	 * mbm_local and mbm_total counts for all the RMIDs.
1584  	 */
1585  	resctrl_arch_reset_rmid_all(r, d);
1586  
1587  out:
1588  	return ret;
1589  }
1590  
mon_config_write(struct rdt_resource * r,char * tok,u32 evtid)1591  static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
1592  {
1593  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
1594  	char *dom_str = NULL, *id_str;
1595  	unsigned long dom_id, val;
1596  	struct rdt_domain *d;
1597  	int ret = 0;
1598  
1599  next:
1600  	if (!tok || tok[0] == '\0')
1601  		return 0;
1602  
1603  	/* Start processing the strings for each domain */
1604  	dom_str = strim(strsep(&tok, ";"));
1605  	id_str = strsep(&dom_str, "=");
1606  
1607  	if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
1608  		rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
1609  		return -EINVAL;
1610  	}
1611  
1612  	if (!dom_str || kstrtoul(dom_str, 16, &val)) {
1613  		rdt_last_cmd_puts("Non-numeric event configuration value\n");
1614  		return -EINVAL;
1615  	}
1616  
1617  	/* Value from user cannot be more than the supported set of events */
1618  	if ((val & hw_res->mbm_cfg_mask) != val) {
1619  		rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
1620  				    hw_res->mbm_cfg_mask);
1621  		return -EINVAL;
1622  	}
1623  
1624  	list_for_each_entry(d, &r->domains, list) {
1625  		if (d->id == dom_id) {
1626  			ret = mbm_config_write_domain(r, d, evtid, val);
1627  			if (ret)
1628  				return -EINVAL;
1629  			goto next;
1630  		}
1631  	}
1632  
1633  	return -EINVAL;
1634  }
1635  
mbm_total_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1636  static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
1637  					    char *buf, size_t nbytes,
1638  					    loff_t off)
1639  {
1640  	struct rdt_resource *r = of->kn->parent->priv;
1641  	int ret;
1642  
1643  	/* Valid input requires a trailing newline */
1644  	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1645  		return -EINVAL;
1646  
1647  	mutex_lock(&rdtgroup_mutex);
1648  
1649  	rdt_last_cmd_clear();
1650  
1651  	buf[nbytes - 1] = '\0';
1652  
1653  	ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
1654  
1655  	mutex_unlock(&rdtgroup_mutex);
1656  
1657  	return ret ?: nbytes;
1658  }
1659  
mbm_local_bytes_config_write(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)1660  static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
1661  					    char *buf, size_t nbytes,
1662  					    loff_t off)
1663  {
1664  	struct rdt_resource *r = of->kn->parent->priv;
1665  	int ret;
1666  
1667  	/* Valid input requires a trailing newline */
1668  	if (nbytes == 0 || buf[nbytes - 1] != '\n')
1669  		return -EINVAL;
1670  
1671  	mutex_lock(&rdtgroup_mutex);
1672  
1673  	rdt_last_cmd_clear();
1674  
1675  	buf[nbytes - 1] = '\0';
1676  
1677  	ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
1678  
1679  	mutex_unlock(&rdtgroup_mutex);
1680  
1681  	return ret ?: nbytes;
1682  }
1683  
1684  /* rdtgroup information files for one cache resource. */
1685  static struct rftype res_common_files[] = {
1686  	{
1687  		.name		= "last_cmd_status",
1688  		.mode		= 0444,
1689  		.kf_ops		= &rdtgroup_kf_single_ops,
1690  		.seq_show	= rdt_last_cmd_status_show,
1691  		.fflags		= RF_TOP_INFO,
1692  	},
1693  	{
1694  		.name		= "num_closids",
1695  		.mode		= 0444,
1696  		.kf_ops		= &rdtgroup_kf_single_ops,
1697  		.seq_show	= rdt_num_closids_show,
1698  		.fflags		= RF_CTRL_INFO,
1699  	},
1700  	{
1701  		.name		= "mon_features",
1702  		.mode		= 0444,
1703  		.kf_ops		= &rdtgroup_kf_single_ops,
1704  		.seq_show	= rdt_mon_features_show,
1705  		.fflags		= RF_MON_INFO,
1706  	},
1707  	{
1708  		.name		= "num_rmids",
1709  		.mode		= 0444,
1710  		.kf_ops		= &rdtgroup_kf_single_ops,
1711  		.seq_show	= rdt_num_rmids_show,
1712  		.fflags		= RF_MON_INFO,
1713  	},
1714  	{
1715  		.name		= "cbm_mask",
1716  		.mode		= 0444,
1717  		.kf_ops		= &rdtgroup_kf_single_ops,
1718  		.seq_show	= rdt_default_ctrl_show,
1719  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1720  	},
1721  	{
1722  		.name		= "min_cbm_bits",
1723  		.mode		= 0444,
1724  		.kf_ops		= &rdtgroup_kf_single_ops,
1725  		.seq_show	= rdt_min_cbm_bits_show,
1726  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1727  	},
1728  	{
1729  		.name		= "shareable_bits",
1730  		.mode		= 0444,
1731  		.kf_ops		= &rdtgroup_kf_single_ops,
1732  		.seq_show	= rdt_shareable_bits_show,
1733  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1734  	},
1735  	{
1736  		.name		= "bit_usage",
1737  		.mode		= 0444,
1738  		.kf_ops		= &rdtgroup_kf_single_ops,
1739  		.seq_show	= rdt_bit_usage_show,
1740  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_CACHE,
1741  	},
1742  	{
1743  		.name		= "min_bandwidth",
1744  		.mode		= 0444,
1745  		.kf_ops		= &rdtgroup_kf_single_ops,
1746  		.seq_show	= rdt_min_bw_show,
1747  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1748  	},
1749  	{
1750  		.name		= "bandwidth_gran",
1751  		.mode		= 0444,
1752  		.kf_ops		= &rdtgroup_kf_single_ops,
1753  		.seq_show	= rdt_bw_gran_show,
1754  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1755  	},
1756  	{
1757  		.name		= "delay_linear",
1758  		.mode		= 0444,
1759  		.kf_ops		= &rdtgroup_kf_single_ops,
1760  		.seq_show	= rdt_delay_linear_show,
1761  		.fflags		= RF_CTRL_INFO | RFTYPE_RES_MB,
1762  	},
1763  	/*
1764  	 * Platform specific which (if any) capabilities are provided by
1765  	 * thread_throttle_mode. Defer "fflags" initialization to platform
1766  	 * discovery.
1767  	 */
1768  	{
1769  		.name		= "thread_throttle_mode",
1770  		.mode		= 0444,
1771  		.kf_ops		= &rdtgroup_kf_single_ops,
1772  		.seq_show	= rdt_thread_throttle_mode_show,
1773  	},
1774  	{
1775  		.name		= "max_threshold_occupancy",
1776  		.mode		= 0644,
1777  		.kf_ops		= &rdtgroup_kf_single_ops,
1778  		.write		= max_threshold_occ_write,
1779  		.seq_show	= max_threshold_occ_show,
1780  		.fflags		= RF_MON_INFO | RFTYPE_RES_CACHE,
1781  	},
1782  	{
1783  		.name		= "mbm_total_bytes_config",
1784  		.mode		= 0644,
1785  		.kf_ops		= &rdtgroup_kf_single_ops,
1786  		.seq_show	= mbm_total_bytes_config_show,
1787  		.write		= mbm_total_bytes_config_write,
1788  	},
1789  	{
1790  		.name		= "mbm_local_bytes_config",
1791  		.mode		= 0644,
1792  		.kf_ops		= &rdtgroup_kf_single_ops,
1793  		.seq_show	= mbm_local_bytes_config_show,
1794  		.write		= mbm_local_bytes_config_write,
1795  	},
1796  	{
1797  		.name		= "cpus",
1798  		.mode		= 0644,
1799  		.kf_ops		= &rdtgroup_kf_single_ops,
1800  		.write		= rdtgroup_cpus_write,
1801  		.seq_show	= rdtgroup_cpus_show,
1802  		.fflags		= RFTYPE_BASE,
1803  	},
1804  	{
1805  		.name		= "cpus_list",
1806  		.mode		= 0644,
1807  		.kf_ops		= &rdtgroup_kf_single_ops,
1808  		.write		= rdtgroup_cpus_write,
1809  		.seq_show	= rdtgroup_cpus_show,
1810  		.flags		= RFTYPE_FLAGS_CPUS_LIST,
1811  		.fflags		= RFTYPE_BASE,
1812  	},
1813  	{
1814  		.name		= "tasks",
1815  		.mode		= 0644,
1816  		.kf_ops		= &rdtgroup_kf_single_ops,
1817  		.write		= rdtgroup_tasks_write,
1818  		.seq_show	= rdtgroup_tasks_show,
1819  		.fflags		= RFTYPE_BASE,
1820  	},
1821  	{
1822  		.name		= "schemata",
1823  		.mode		= 0644,
1824  		.kf_ops		= &rdtgroup_kf_single_ops,
1825  		.write		= rdtgroup_schemata_write,
1826  		.seq_show	= rdtgroup_schemata_show,
1827  		.fflags		= RF_CTRL_BASE,
1828  	},
1829  	{
1830  		.name		= "mode",
1831  		.mode		= 0644,
1832  		.kf_ops		= &rdtgroup_kf_single_ops,
1833  		.write		= rdtgroup_mode_write,
1834  		.seq_show	= rdtgroup_mode_show,
1835  		.fflags		= RF_CTRL_BASE,
1836  	},
1837  	{
1838  		.name		= "size",
1839  		.mode		= 0444,
1840  		.kf_ops		= &rdtgroup_kf_single_ops,
1841  		.seq_show	= rdtgroup_size_show,
1842  		.fflags		= RF_CTRL_BASE,
1843  	},
1844  
1845  };
1846  
rdtgroup_add_files(struct kernfs_node * kn,unsigned long fflags)1847  static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
1848  {
1849  	struct rftype *rfts, *rft;
1850  	int ret, len;
1851  
1852  	rfts = res_common_files;
1853  	len = ARRAY_SIZE(res_common_files);
1854  
1855  	lockdep_assert_held(&rdtgroup_mutex);
1856  
1857  	for (rft = rfts; rft < rfts + len; rft++) {
1858  		if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
1859  			ret = rdtgroup_add_file(kn, rft);
1860  			if (ret)
1861  				goto error;
1862  		}
1863  	}
1864  
1865  	return 0;
1866  error:
1867  	pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
1868  	while (--rft >= rfts) {
1869  		if ((fflags & rft->fflags) == rft->fflags)
1870  			kernfs_remove_by_name(kn, rft->name);
1871  	}
1872  	return ret;
1873  }
1874  
rdtgroup_get_rftype_by_name(const char * name)1875  static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
1876  {
1877  	struct rftype *rfts, *rft;
1878  	int len;
1879  
1880  	rfts = res_common_files;
1881  	len = ARRAY_SIZE(res_common_files);
1882  
1883  	for (rft = rfts; rft < rfts + len; rft++) {
1884  		if (!strcmp(rft->name, name))
1885  			return rft;
1886  	}
1887  
1888  	return NULL;
1889  }
1890  
thread_throttle_mode_init(void)1891  void __init thread_throttle_mode_init(void)
1892  {
1893  	struct rftype *rft;
1894  
1895  	rft = rdtgroup_get_rftype_by_name("thread_throttle_mode");
1896  	if (!rft)
1897  		return;
1898  
1899  	rft->fflags = RF_CTRL_INFO | RFTYPE_RES_MB;
1900  }
1901  
mbm_config_rftype_init(const char * config)1902  void __init mbm_config_rftype_init(const char *config)
1903  {
1904  	struct rftype *rft;
1905  
1906  	rft = rdtgroup_get_rftype_by_name(config);
1907  	if (rft)
1908  		rft->fflags = RF_MON_INFO | RFTYPE_RES_CACHE;
1909  }
1910  
1911  /**
1912   * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
1913   * @r: The resource group with which the file is associated.
1914   * @name: Name of the file
1915   *
1916   * The permissions of named resctrl file, directory, or link are modified
1917   * to not allow read, write, or execute by any user.
1918   *
1919   * WARNING: This function is intended to communicate to the user that the
1920   * resctrl file has been locked down - that it is not relevant to the
1921   * particular state the system finds itself in. It should not be relied
1922   * on to protect from user access because after the file's permissions
1923   * are restricted the user can still change the permissions using chmod
1924   * from the command line.
1925   *
1926   * Return: 0 on success, <0 on failure.
1927   */
rdtgroup_kn_mode_restrict(struct rdtgroup * r,const char * name)1928  int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
1929  {
1930  	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1931  	struct kernfs_node *kn;
1932  	int ret = 0;
1933  
1934  	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1935  	if (!kn)
1936  		return -ENOENT;
1937  
1938  	switch (kernfs_type(kn)) {
1939  	case KERNFS_DIR:
1940  		iattr.ia_mode = S_IFDIR;
1941  		break;
1942  	case KERNFS_FILE:
1943  		iattr.ia_mode = S_IFREG;
1944  		break;
1945  	case KERNFS_LINK:
1946  		iattr.ia_mode = S_IFLNK;
1947  		break;
1948  	}
1949  
1950  	ret = kernfs_setattr(kn, &iattr);
1951  	kernfs_put(kn);
1952  	return ret;
1953  }
1954  
1955  /**
1956   * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
1957   * @r: The resource group with which the file is associated.
1958   * @name: Name of the file
1959   * @mask: Mask of permissions that should be restored
1960   *
1961   * Restore the permissions of the named file. If @name is a directory the
1962   * permissions of its parent will be used.
1963   *
1964   * Return: 0 on success, <0 on failure.
1965   */
rdtgroup_kn_mode_restore(struct rdtgroup * r,const char * name,umode_t mask)1966  int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
1967  			     umode_t mask)
1968  {
1969  	struct iattr iattr = {.ia_valid = ATTR_MODE,};
1970  	struct kernfs_node *kn, *parent;
1971  	struct rftype *rfts, *rft;
1972  	int ret, len;
1973  
1974  	rfts = res_common_files;
1975  	len = ARRAY_SIZE(res_common_files);
1976  
1977  	for (rft = rfts; rft < rfts + len; rft++) {
1978  		if (!strcmp(rft->name, name))
1979  			iattr.ia_mode = rft->mode & mask;
1980  	}
1981  
1982  	kn = kernfs_find_and_get_ns(r->kn, name, NULL);
1983  	if (!kn)
1984  		return -ENOENT;
1985  
1986  	switch (kernfs_type(kn)) {
1987  	case KERNFS_DIR:
1988  		parent = kernfs_get_parent(kn);
1989  		if (parent) {
1990  			iattr.ia_mode |= parent->mode;
1991  			kernfs_put(parent);
1992  		}
1993  		iattr.ia_mode |= S_IFDIR;
1994  		break;
1995  	case KERNFS_FILE:
1996  		iattr.ia_mode |= S_IFREG;
1997  		break;
1998  	case KERNFS_LINK:
1999  		iattr.ia_mode |= S_IFLNK;
2000  		break;
2001  	}
2002  
2003  	ret = kernfs_setattr(kn, &iattr);
2004  	kernfs_put(kn);
2005  	return ret;
2006  }
2007  
rdtgroup_mkdir_info_resdir(void * priv,char * name,unsigned long fflags)2008  static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
2009  				      unsigned long fflags)
2010  {
2011  	struct kernfs_node *kn_subdir;
2012  	int ret;
2013  
2014  	kn_subdir = kernfs_create_dir(kn_info, name,
2015  				      kn_info->mode, priv);
2016  	if (IS_ERR(kn_subdir))
2017  		return PTR_ERR(kn_subdir);
2018  
2019  	ret = rdtgroup_kn_set_ugid(kn_subdir);
2020  	if (ret)
2021  		return ret;
2022  
2023  	ret = rdtgroup_add_files(kn_subdir, fflags);
2024  	if (!ret)
2025  		kernfs_activate(kn_subdir);
2026  
2027  	return ret;
2028  }
2029  
rdtgroup_create_info_dir(struct kernfs_node * parent_kn)2030  static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
2031  {
2032  	struct resctrl_schema *s;
2033  	struct rdt_resource *r;
2034  	unsigned long fflags;
2035  	char name[32];
2036  	int ret;
2037  
2038  	/* create the directory */
2039  	kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
2040  	if (IS_ERR(kn_info))
2041  		return PTR_ERR(kn_info);
2042  
2043  	ret = rdtgroup_add_files(kn_info, RF_TOP_INFO);
2044  	if (ret)
2045  		goto out_destroy;
2046  
2047  	/* loop over enabled controls, these are all alloc_capable */
2048  	list_for_each_entry(s, &resctrl_schema_all, list) {
2049  		r = s->res;
2050  		fflags =  r->fflags | RF_CTRL_INFO;
2051  		ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
2052  		if (ret)
2053  			goto out_destroy;
2054  	}
2055  
2056  	for_each_mon_capable_rdt_resource(r) {
2057  		fflags =  r->fflags | RF_MON_INFO;
2058  		sprintf(name, "%s_MON", r->name);
2059  		ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
2060  		if (ret)
2061  			goto out_destroy;
2062  	}
2063  
2064  	ret = rdtgroup_kn_set_ugid(kn_info);
2065  	if (ret)
2066  		goto out_destroy;
2067  
2068  	kernfs_activate(kn_info);
2069  
2070  	return 0;
2071  
2072  out_destroy:
2073  	kernfs_remove(kn_info);
2074  	return ret;
2075  }
2076  
2077  static int
mongroup_create_dir(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,char * name,struct kernfs_node ** dest_kn)2078  mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
2079  		    char *name, struct kernfs_node **dest_kn)
2080  {
2081  	struct kernfs_node *kn;
2082  	int ret;
2083  
2084  	/* create the directory */
2085  	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2086  	if (IS_ERR(kn))
2087  		return PTR_ERR(kn);
2088  
2089  	if (dest_kn)
2090  		*dest_kn = kn;
2091  
2092  	ret = rdtgroup_kn_set_ugid(kn);
2093  	if (ret)
2094  		goto out_destroy;
2095  
2096  	kernfs_activate(kn);
2097  
2098  	return 0;
2099  
2100  out_destroy:
2101  	kernfs_remove(kn);
2102  	return ret;
2103  }
2104  
l3_qos_cfg_update(void * arg)2105  static void l3_qos_cfg_update(void *arg)
2106  {
2107  	bool *enable = arg;
2108  
2109  	wrmsrl(MSR_IA32_L3_QOS_CFG, *enable ? L3_QOS_CDP_ENABLE : 0ULL);
2110  }
2111  
l2_qos_cfg_update(void * arg)2112  static void l2_qos_cfg_update(void *arg)
2113  {
2114  	bool *enable = arg;
2115  
2116  	wrmsrl(MSR_IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
2117  }
2118  
is_mba_linear(void)2119  static inline bool is_mba_linear(void)
2120  {
2121  	return rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl.membw.delay_linear;
2122  }
2123  
set_cache_qos_cfg(int level,bool enable)2124  static int set_cache_qos_cfg(int level, bool enable)
2125  {
2126  	void (*update)(void *arg);
2127  	struct rdt_resource *r_l;
2128  	cpumask_var_t cpu_mask;
2129  	struct rdt_domain *d;
2130  	int cpu;
2131  
2132  	if (level == RDT_RESOURCE_L3)
2133  		update = l3_qos_cfg_update;
2134  	else if (level == RDT_RESOURCE_L2)
2135  		update = l2_qos_cfg_update;
2136  	else
2137  		return -EINVAL;
2138  
2139  	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2140  		return -ENOMEM;
2141  
2142  	r_l = &rdt_resources_all[level].r_resctrl;
2143  	list_for_each_entry(d, &r_l->domains, list) {
2144  		if (r_l->cache.arch_has_per_cpu_cfg)
2145  			/* Pick all the CPUs in the domain instance */
2146  			for_each_cpu(cpu, &d->cpu_mask)
2147  				cpumask_set_cpu(cpu, cpu_mask);
2148  		else
2149  			/* Pick one CPU from each domain instance to update MSR */
2150  			cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2151  	}
2152  
2153  	/* Update QOS_CFG MSR on all the CPUs in cpu_mask */
2154  	on_each_cpu_mask(cpu_mask, update, &enable, 1);
2155  
2156  	free_cpumask_var(cpu_mask);
2157  
2158  	return 0;
2159  }
2160  
2161  /* Restore the qos cfg state when a domain comes online */
rdt_domain_reconfigure_cdp(struct rdt_resource * r)2162  void rdt_domain_reconfigure_cdp(struct rdt_resource *r)
2163  {
2164  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2165  
2166  	if (!r->cdp_capable)
2167  		return;
2168  
2169  	if (r->rid == RDT_RESOURCE_L2)
2170  		l2_qos_cfg_update(&hw_res->cdp_enabled);
2171  
2172  	if (r->rid == RDT_RESOURCE_L3)
2173  		l3_qos_cfg_update(&hw_res->cdp_enabled);
2174  }
2175  
mba_sc_domain_allocate(struct rdt_resource * r,struct rdt_domain * d)2176  static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_domain *d)
2177  {
2178  	u32 num_closid = resctrl_arch_get_num_closid(r);
2179  	int cpu = cpumask_any(&d->cpu_mask);
2180  	int i;
2181  
2182  	d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
2183  				   GFP_KERNEL, cpu_to_node(cpu));
2184  	if (!d->mbps_val)
2185  		return -ENOMEM;
2186  
2187  	for (i = 0; i < num_closid; i++)
2188  		d->mbps_val[i] = MBA_MAX_MBPS;
2189  
2190  	return 0;
2191  }
2192  
mba_sc_domain_destroy(struct rdt_resource * r,struct rdt_domain * d)2193  static void mba_sc_domain_destroy(struct rdt_resource *r,
2194  				  struct rdt_domain *d)
2195  {
2196  	kfree(d->mbps_val);
2197  	d->mbps_val = NULL;
2198  }
2199  
2200  /*
2201   * MBA software controller is supported only if
2202   * MBM is supported and MBA is in linear scale.
2203   */
supports_mba_mbps(void)2204  static bool supports_mba_mbps(void)
2205  {
2206  	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2207  
2208  	return (is_mbm_local_enabled() &&
2209  		r->alloc_capable && is_mba_linear());
2210  }
2211  
2212  /*
2213   * Enable or disable the MBA software controller
2214   * which helps user specify bandwidth in MBps.
2215   */
set_mba_sc(bool mba_sc)2216  static int set_mba_sc(bool mba_sc)
2217  {
2218  	struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl;
2219  	u32 num_closid = resctrl_arch_get_num_closid(r);
2220  	struct rdt_domain *d;
2221  	int i;
2222  
2223  	if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
2224  		return -EINVAL;
2225  
2226  	r->membw.mba_sc = mba_sc;
2227  
2228  	list_for_each_entry(d, &r->domains, list) {
2229  		for (i = 0; i < num_closid; i++)
2230  			d->mbps_val[i] = MBA_MAX_MBPS;
2231  	}
2232  
2233  	return 0;
2234  }
2235  
cdp_enable(int level)2236  static int cdp_enable(int level)
2237  {
2238  	struct rdt_resource *r_l = &rdt_resources_all[level].r_resctrl;
2239  	int ret;
2240  
2241  	if (!r_l->alloc_capable)
2242  		return -EINVAL;
2243  
2244  	ret = set_cache_qos_cfg(level, true);
2245  	if (!ret)
2246  		rdt_resources_all[level].cdp_enabled = true;
2247  
2248  	return ret;
2249  }
2250  
cdp_disable(int level)2251  static void cdp_disable(int level)
2252  {
2253  	struct rdt_hw_resource *r_hw = &rdt_resources_all[level];
2254  
2255  	if (r_hw->cdp_enabled) {
2256  		set_cache_qos_cfg(level, false);
2257  		r_hw->cdp_enabled = false;
2258  	}
2259  }
2260  
resctrl_arch_set_cdp_enabled(enum resctrl_res_level l,bool enable)2261  int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable)
2262  {
2263  	struct rdt_hw_resource *hw_res = &rdt_resources_all[l];
2264  
2265  	if (!hw_res->r_resctrl.cdp_capable)
2266  		return -EINVAL;
2267  
2268  	if (enable)
2269  		return cdp_enable(l);
2270  
2271  	cdp_disable(l);
2272  
2273  	return 0;
2274  }
2275  
cdp_disable_all(void)2276  static void cdp_disable_all(void)
2277  {
2278  	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
2279  		resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
2280  	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
2281  		resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
2282  }
2283  
2284  /*
2285   * We don't allow rdtgroup directories to be created anywhere
2286   * except the root directory. Thus when looking for the rdtgroup
2287   * structure for a kernfs node we are either looking at a directory,
2288   * in which case the rdtgroup structure is pointed at by the "priv"
2289   * field, otherwise we have a file, and need only look to the parent
2290   * to find the rdtgroup.
2291   */
kernfs_to_rdtgroup(struct kernfs_node * kn)2292  static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
2293  {
2294  	if (kernfs_type(kn) == KERNFS_DIR) {
2295  		/*
2296  		 * All the resource directories use "kn->priv"
2297  		 * to point to the "struct rdtgroup" for the
2298  		 * resource. "info" and its subdirectories don't
2299  		 * have rdtgroup structures, so return NULL here.
2300  		 */
2301  		if (kn == kn_info || kn->parent == kn_info)
2302  			return NULL;
2303  		else
2304  			return kn->priv;
2305  	} else {
2306  		return kn->parent->priv;
2307  	}
2308  }
2309  
rdtgroup_kn_get(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2310  static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2311  {
2312  	atomic_inc(&rdtgrp->waitcount);
2313  	kernfs_break_active_protection(kn);
2314  }
2315  
rdtgroup_kn_put(struct rdtgroup * rdtgrp,struct kernfs_node * kn)2316  static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
2317  {
2318  	if (atomic_dec_and_test(&rdtgrp->waitcount) &&
2319  	    (rdtgrp->flags & RDT_DELETED)) {
2320  		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2321  		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2322  			rdtgroup_pseudo_lock_remove(rdtgrp);
2323  		kernfs_unbreak_active_protection(kn);
2324  		rdtgroup_remove(rdtgrp);
2325  	} else {
2326  		kernfs_unbreak_active_protection(kn);
2327  	}
2328  }
2329  
rdtgroup_kn_lock_live(struct kernfs_node * kn)2330  struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
2331  {
2332  	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2333  
2334  	if (!rdtgrp)
2335  		return NULL;
2336  
2337  	rdtgroup_kn_get(rdtgrp, kn);
2338  
2339  	mutex_lock(&rdtgroup_mutex);
2340  
2341  	/* Was this group deleted while we waited? */
2342  	if (rdtgrp->flags & RDT_DELETED)
2343  		return NULL;
2344  
2345  	return rdtgrp;
2346  }
2347  
rdtgroup_kn_unlock(struct kernfs_node * kn)2348  void rdtgroup_kn_unlock(struct kernfs_node *kn)
2349  {
2350  	struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
2351  
2352  	if (!rdtgrp)
2353  		return;
2354  
2355  	mutex_unlock(&rdtgroup_mutex);
2356  	rdtgroup_kn_put(rdtgrp, kn);
2357  }
2358  
2359  static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2360  			     struct rdtgroup *prgrp,
2361  			     struct kernfs_node **mon_data_kn);
2362  
rdt_enable_ctx(struct rdt_fs_context * ctx)2363  static int rdt_enable_ctx(struct rdt_fs_context *ctx)
2364  {
2365  	int ret = 0;
2366  
2367  	if (ctx->enable_cdpl2)
2368  		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
2369  
2370  	if (!ret && ctx->enable_cdpl3)
2371  		ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
2372  
2373  	if (!ret && ctx->enable_mba_mbps)
2374  		ret = set_mba_sc(true);
2375  
2376  	return ret;
2377  }
2378  
schemata_list_add(struct rdt_resource * r,enum resctrl_conf_type type)2379  static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
2380  {
2381  	struct resctrl_schema *s;
2382  	const char *suffix = "";
2383  	int ret, cl;
2384  
2385  	s = kzalloc(sizeof(*s), GFP_KERNEL);
2386  	if (!s)
2387  		return -ENOMEM;
2388  
2389  	s->res = r;
2390  	s->num_closid = resctrl_arch_get_num_closid(r);
2391  	if (resctrl_arch_get_cdp_enabled(r->rid))
2392  		s->num_closid /= 2;
2393  
2394  	s->conf_type = type;
2395  	switch (type) {
2396  	case CDP_CODE:
2397  		suffix = "CODE";
2398  		break;
2399  	case CDP_DATA:
2400  		suffix = "DATA";
2401  		break;
2402  	case CDP_NONE:
2403  		suffix = "";
2404  		break;
2405  	}
2406  
2407  	ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
2408  	if (ret >= sizeof(s->name)) {
2409  		kfree(s);
2410  		return -EINVAL;
2411  	}
2412  
2413  	cl = strlen(s->name);
2414  
2415  	/*
2416  	 * If CDP is supported by this resource, but not enabled,
2417  	 * include the suffix. This ensures the tabular format of the
2418  	 * schemata file does not change between mounts of the filesystem.
2419  	 */
2420  	if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
2421  		cl += 4;
2422  
2423  	if (cl > max_name_width)
2424  		max_name_width = cl;
2425  
2426  	INIT_LIST_HEAD(&s->list);
2427  	list_add(&s->list, &resctrl_schema_all);
2428  
2429  	return 0;
2430  }
2431  
schemata_list_create(void)2432  static int schemata_list_create(void)
2433  {
2434  	struct rdt_resource *r;
2435  	int ret = 0;
2436  
2437  	for_each_alloc_capable_rdt_resource(r) {
2438  		if (resctrl_arch_get_cdp_enabled(r->rid)) {
2439  			ret = schemata_list_add(r, CDP_CODE);
2440  			if (ret)
2441  				break;
2442  
2443  			ret = schemata_list_add(r, CDP_DATA);
2444  		} else {
2445  			ret = schemata_list_add(r, CDP_NONE);
2446  		}
2447  
2448  		if (ret)
2449  			break;
2450  	}
2451  
2452  	return ret;
2453  }
2454  
schemata_list_destroy(void)2455  static void schemata_list_destroy(void)
2456  {
2457  	struct resctrl_schema *s, *tmp;
2458  
2459  	list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
2460  		list_del(&s->list);
2461  		kfree(s);
2462  	}
2463  }
2464  
rdt_get_tree(struct fs_context * fc)2465  static int rdt_get_tree(struct fs_context *fc)
2466  {
2467  	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2468  	struct rdt_domain *dom;
2469  	struct rdt_resource *r;
2470  	int ret;
2471  
2472  	cpus_read_lock();
2473  	mutex_lock(&rdtgroup_mutex);
2474  	/*
2475  	 * resctrl file system can only be mounted once.
2476  	 */
2477  	if (static_branch_unlikely(&rdt_enable_key)) {
2478  		ret = -EBUSY;
2479  		goto out;
2480  	}
2481  
2482  	ret = rdt_enable_ctx(ctx);
2483  	if (ret < 0)
2484  		goto out_cdp;
2485  
2486  	ret = schemata_list_create();
2487  	if (ret) {
2488  		schemata_list_destroy();
2489  		goto out_mba;
2490  	}
2491  
2492  	closid_init();
2493  
2494  	ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
2495  	if (ret < 0)
2496  		goto out_schemata_free;
2497  
2498  	if (rdt_mon_capable) {
2499  		ret = mongroup_create_dir(rdtgroup_default.kn,
2500  					  &rdtgroup_default, "mon_groups",
2501  					  &kn_mongrp);
2502  		if (ret < 0)
2503  			goto out_info;
2504  
2505  		ret = mkdir_mondata_all(rdtgroup_default.kn,
2506  					&rdtgroup_default, &kn_mondata);
2507  		if (ret < 0)
2508  			goto out_mongrp;
2509  		rdtgroup_default.mon.mon_data_kn = kn_mondata;
2510  	}
2511  
2512  	ret = rdt_pseudo_lock_init();
2513  	if (ret)
2514  		goto out_mondata;
2515  
2516  	ret = kernfs_get_tree(fc);
2517  	if (ret < 0)
2518  		goto out_psl;
2519  
2520  	if (rdt_alloc_capable)
2521  		static_branch_enable_cpuslocked(&rdt_alloc_enable_key);
2522  	if (rdt_mon_capable)
2523  		static_branch_enable_cpuslocked(&rdt_mon_enable_key);
2524  
2525  	if (rdt_alloc_capable || rdt_mon_capable)
2526  		static_branch_enable_cpuslocked(&rdt_enable_key);
2527  
2528  	if (is_mbm_enabled()) {
2529  		r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
2530  		list_for_each_entry(dom, &r->domains, list)
2531  			mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL);
2532  	}
2533  
2534  	goto out;
2535  
2536  out_psl:
2537  	rdt_pseudo_lock_release();
2538  out_mondata:
2539  	if (rdt_mon_capable)
2540  		kernfs_remove(kn_mondata);
2541  out_mongrp:
2542  	if (rdt_mon_capable)
2543  		kernfs_remove(kn_mongrp);
2544  out_info:
2545  	kernfs_remove(kn_info);
2546  out_schemata_free:
2547  	schemata_list_destroy();
2548  out_mba:
2549  	if (ctx->enable_mba_mbps)
2550  		set_mba_sc(false);
2551  out_cdp:
2552  	cdp_disable_all();
2553  out:
2554  	rdt_last_cmd_clear();
2555  	mutex_unlock(&rdtgroup_mutex);
2556  	cpus_read_unlock();
2557  	return ret;
2558  }
2559  
2560  enum rdt_param {
2561  	Opt_cdp,
2562  	Opt_cdpl2,
2563  	Opt_mba_mbps,
2564  	nr__rdt_params
2565  };
2566  
2567  static const struct fs_parameter_spec rdt_fs_parameters[] = {
2568  	fsparam_flag("cdp",		Opt_cdp),
2569  	fsparam_flag("cdpl2",		Opt_cdpl2),
2570  	fsparam_flag("mba_MBps",	Opt_mba_mbps),
2571  	{}
2572  };
2573  
rdt_parse_param(struct fs_context * fc,struct fs_parameter * param)2574  static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
2575  {
2576  	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2577  	struct fs_parse_result result;
2578  	int opt;
2579  
2580  	opt = fs_parse(fc, rdt_fs_parameters, param, &result);
2581  	if (opt < 0)
2582  		return opt;
2583  
2584  	switch (opt) {
2585  	case Opt_cdp:
2586  		ctx->enable_cdpl3 = true;
2587  		return 0;
2588  	case Opt_cdpl2:
2589  		ctx->enable_cdpl2 = true;
2590  		return 0;
2591  	case Opt_mba_mbps:
2592  		if (!supports_mba_mbps())
2593  			return -EINVAL;
2594  		ctx->enable_mba_mbps = true;
2595  		return 0;
2596  	}
2597  
2598  	return -EINVAL;
2599  }
2600  
rdt_fs_context_free(struct fs_context * fc)2601  static void rdt_fs_context_free(struct fs_context *fc)
2602  {
2603  	struct rdt_fs_context *ctx = rdt_fc2context(fc);
2604  
2605  	kernfs_free_fs_context(fc);
2606  	kfree(ctx);
2607  }
2608  
2609  static const struct fs_context_operations rdt_fs_context_ops = {
2610  	.free		= rdt_fs_context_free,
2611  	.parse_param	= rdt_parse_param,
2612  	.get_tree	= rdt_get_tree,
2613  };
2614  
rdt_init_fs_context(struct fs_context * fc)2615  static int rdt_init_fs_context(struct fs_context *fc)
2616  {
2617  	struct rdt_fs_context *ctx;
2618  
2619  	ctx = kzalloc(sizeof(struct rdt_fs_context), GFP_KERNEL);
2620  	if (!ctx)
2621  		return -ENOMEM;
2622  
2623  	ctx->kfc.root = rdt_root;
2624  	ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
2625  	fc->fs_private = &ctx->kfc;
2626  	fc->ops = &rdt_fs_context_ops;
2627  	put_user_ns(fc->user_ns);
2628  	fc->user_ns = get_user_ns(&init_user_ns);
2629  	fc->global = true;
2630  	return 0;
2631  }
2632  
reset_all_ctrls(struct rdt_resource * r)2633  static int reset_all_ctrls(struct rdt_resource *r)
2634  {
2635  	struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
2636  	struct rdt_hw_domain *hw_dom;
2637  	struct msr_param msr_param;
2638  	cpumask_var_t cpu_mask;
2639  	struct rdt_domain *d;
2640  	int i;
2641  
2642  	if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
2643  		return -ENOMEM;
2644  
2645  	msr_param.res = r;
2646  	msr_param.low = 0;
2647  	msr_param.high = hw_res->num_closid;
2648  
2649  	/*
2650  	 * Disable resource control for this resource by setting all
2651  	 * CBMs in all domains to the maximum mask value. Pick one CPU
2652  	 * from each domain to update the MSRs below.
2653  	 */
2654  	list_for_each_entry(d, &r->domains, list) {
2655  		hw_dom = resctrl_to_arch_dom(d);
2656  		cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
2657  
2658  		for (i = 0; i < hw_res->num_closid; i++)
2659  			hw_dom->ctrl_val[i] = r->default_ctrl;
2660  	}
2661  
2662  	/* Update CBM on all the CPUs in cpu_mask */
2663  	on_each_cpu_mask(cpu_mask, rdt_ctrl_update, &msr_param, 1);
2664  
2665  	free_cpumask_var(cpu_mask);
2666  
2667  	return 0;
2668  }
2669  
2670  /*
2671   * Move tasks from one to the other group. If @from is NULL, then all tasks
2672   * in the systems are moved unconditionally (used for teardown).
2673   *
2674   * If @mask is not NULL the cpus on which moved tasks are running are set
2675   * in that mask so the update smp function call is restricted to affected
2676   * cpus.
2677   */
rdt_move_group_tasks(struct rdtgroup * from,struct rdtgroup * to,struct cpumask * mask)2678  static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
2679  				 struct cpumask *mask)
2680  {
2681  	struct task_struct *p, *t;
2682  
2683  	read_lock(&tasklist_lock);
2684  	for_each_process_thread(p, t) {
2685  		if (!from || is_closid_match(t, from) ||
2686  		    is_rmid_match(t, from)) {
2687  			WRITE_ONCE(t->closid, to->closid);
2688  			WRITE_ONCE(t->rmid, to->mon.rmid);
2689  
2690  			/*
2691  			 * Order the closid/rmid stores above before the loads
2692  			 * in task_curr(). This pairs with the full barrier
2693  			 * between the rq->curr update and resctrl_sched_in()
2694  			 * during context switch.
2695  			 */
2696  			smp_mb();
2697  
2698  			/*
2699  			 * If the task is on a CPU, set the CPU in the mask.
2700  			 * The detection is inaccurate as tasks might move or
2701  			 * schedule before the smp function call takes place.
2702  			 * In such a case the function call is pointless, but
2703  			 * there is no other side effect.
2704  			 */
2705  			if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
2706  				cpumask_set_cpu(task_cpu(t), mask);
2707  		}
2708  	}
2709  	read_unlock(&tasklist_lock);
2710  }
2711  
free_all_child_rdtgrp(struct rdtgroup * rdtgrp)2712  static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
2713  {
2714  	struct rdtgroup *sentry, *stmp;
2715  	struct list_head *head;
2716  
2717  	head = &rdtgrp->mon.crdtgrp_list;
2718  	list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
2719  		free_rmid(sentry->mon.rmid);
2720  		list_del(&sentry->mon.crdtgrp_list);
2721  
2722  		if (atomic_read(&sentry->waitcount) != 0)
2723  			sentry->flags = RDT_DELETED;
2724  		else
2725  			rdtgroup_remove(sentry);
2726  	}
2727  }
2728  
2729  /*
2730   * Forcibly remove all of subdirectories under root.
2731   */
rmdir_all_sub(void)2732  static void rmdir_all_sub(void)
2733  {
2734  	struct rdtgroup *rdtgrp, *tmp;
2735  
2736  	/* Move all tasks to the default resource group */
2737  	rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
2738  
2739  	list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
2740  		/* Free any child rmids */
2741  		free_all_child_rdtgrp(rdtgrp);
2742  
2743  		/* Remove each rdtgroup other than root */
2744  		if (rdtgrp == &rdtgroup_default)
2745  			continue;
2746  
2747  		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
2748  		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
2749  			rdtgroup_pseudo_lock_remove(rdtgrp);
2750  
2751  		/*
2752  		 * Give any CPUs back to the default group. We cannot copy
2753  		 * cpu_online_mask because a CPU might have executed the
2754  		 * offline callback already, but is still marked online.
2755  		 */
2756  		cpumask_or(&rdtgroup_default.cpu_mask,
2757  			   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
2758  
2759  		free_rmid(rdtgrp->mon.rmid);
2760  
2761  		kernfs_remove(rdtgrp->kn);
2762  		list_del(&rdtgrp->rdtgroup_list);
2763  
2764  		if (atomic_read(&rdtgrp->waitcount) != 0)
2765  			rdtgrp->flags = RDT_DELETED;
2766  		else
2767  			rdtgroup_remove(rdtgrp);
2768  	}
2769  	/* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
2770  	update_closid_rmid(cpu_online_mask, &rdtgroup_default);
2771  
2772  	kernfs_remove(kn_info);
2773  	kernfs_remove(kn_mongrp);
2774  	kernfs_remove(kn_mondata);
2775  }
2776  
rdt_kill_sb(struct super_block * sb)2777  static void rdt_kill_sb(struct super_block *sb)
2778  {
2779  	struct rdt_resource *r;
2780  
2781  	cpus_read_lock();
2782  	mutex_lock(&rdtgroup_mutex);
2783  
2784  	set_mba_sc(false);
2785  
2786  	/*Put everything back to default values. */
2787  	for_each_alloc_capable_rdt_resource(r)
2788  		reset_all_ctrls(r);
2789  	cdp_disable_all();
2790  	rmdir_all_sub();
2791  	rdt_pseudo_lock_release();
2792  	rdtgroup_default.mode = RDT_MODE_SHAREABLE;
2793  	schemata_list_destroy();
2794  	static_branch_disable_cpuslocked(&rdt_alloc_enable_key);
2795  	static_branch_disable_cpuslocked(&rdt_mon_enable_key);
2796  	static_branch_disable_cpuslocked(&rdt_enable_key);
2797  	kernfs_kill_sb(sb);
2798  	mutex_unlock(&rdtgroup_mutex);
2799  	cpus_read_unlock();
2800  }
2801  
2802  static struct file_system_type rdt_fs_type = {
2803  	.name			= "resctrl",
2804  	.init_fs_context	= rdt_init_fs_context,
2805  	.parameters		= rdt_fs_parameters,
2806  	.kill_sb		= rdt_kill_sb,
2807  };
2808  
mon_addfile(struct kernfs_node * parent_kn,const char * name,void * priv)2809  static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
2810  		       void *priv)
2811  {
2812  	struct kernfs_node *kn;
2813  	int ret = 0;
2814  
2815  	kn = __kernfs_create_file(parent_kn, name, 0444,
2816  				  GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
2817  				  &kf_mondata_ops, priv, NULL, NULL);
2818  	if (IS_ERR(kn))
2819  		return PTR_ERR(kn);
2820  
2821  	ret = rdtgroup_kn_set_ugid(kn);
2822  	if (ret) {
2823  		kernfs_remove(kn);
2824  		return ret;
2825  	}
2826  
2827  	return ret;
2828  }
2829  
2830  /*
2831   * Remove all subdirectories of mon_data of ctrl_mon groups
2832   * and monitor groups with given domain id.
2833   */
rmdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,unsigned int dom_id)2834  static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2835  					   unsigned int dom_id)
2836  {
2837  	struct rdtgroup *prgrp, *crgrp;
2838  	char name[32];
2839  
2840  	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2841  		sprintf(name, "mon_%s_%02d", r->name, dom_id);
2842  		kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
2843  
2844  		list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
2845  			kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
2846  	}
2847  }
2848  
mkdir_mondata_subdir(struct kernfs_node * parent_kn,struct rdt_domain * d,struct rdt_resource * r,struct rdtgroup * prgrp)2849  static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
2850  				struct rdt_domain *d,
2851  				struct rdt_resource *r, struct rdtgroup *prgrp)
2852  {
2853  	union mon_data_bits priv;
2854  	struct kernfs_node *kn;
2855  	struct mon_evt *mevt;
2856  	struct rmid_read rr;
2857  	char name[32];
2858  	int ret;
2859  
2860  	sprintf(name, "mon_%s_%02d", r->name, d->id);
2861  	/* create the directory */
2862  	kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
2863  	if (IS_ERR(kn))
2864  		return PTR_ERR(kn);
2865  
2866  	ret = rdtgroup_kn_set_ugid(kn);
2867  	if (ret)
2868  		goto out_destroy;
2869  
2870  	if (WARN_ON(list_empty(&r->evt_list))) {
2871  		ret = -EPERM;
2872  		goto out_destroy;
2873  	}
2874  
2875  	priv.u.rid = r->rid;
2876  	priv.u.domid = d->id;
2877  	list_for_each_entry(mevt, &r->evt_list, list) {
2878  		priv.u.evtid = mevt->evtid;
2879  		ret = mon_addfile(kn, mevt->name, priv.priv);
2880  		if (ret)
2881  			goto out_destroy;
2882  
2883  		if (is_mbm_event(mevt->evtid))
2884  			mon_event_read(&rr, r, d, prgrp, mevt->evtid, true);
2885  	}
2886  	kernfs_activate(kn);
2887  	return 0;
2888  
2889  out_destroy:
2890  	kernfs_remove(kn);
2891  	return ret;
2892  }
2893  
2894  /*
2895   * Add all subdirectories of mon_data for "ctrl_mon" groups
2896   * and "monitor" groups with given domain id.
2897   */
mkdir_mondata_subdir_allrdtgrp(struct rdt_resource * r,struct rdt_domain * d)2898  static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
2899  					   struct rdt_domain *d)
2900  {
2901  	struct kernfs_node *parent_kn;
2902  	struct rdtgroup *prgrp, *crgrp;
2903  	struct list_head *head;
2904  
2905  	list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
2906  		parent_kn = prgrp->mon.mon_data_kn;
2907  		mkdir_mondata_subdir(parent_kn, d, r, prgrp);
2908  
2909  		head = &prgrp->mon.crdtgrp_list;
2910  		list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
2911  			parent_kn = crgrp->mon.mon_data_kn;
2912  			mkdir_mondata_subdir(parent_kn, d, r, crgrp);
2913  		}
2914  	}
2915  }
2916  
mkdir_mondata_subdir_alldom(struct kernfs_node * parent_kn,struct rdt_resource * r,struct rdtgroup * prgrp)2917  static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
2918  				       struct rdt_resource *r,
2919  				       struct rdtgroup *prgrp)
2920  {
2921  	struct rdt_domain *dom;
2922  	int ret;
2923  
2924  	list_for_each_entry(dom, &r->domains, list) {
2925  		ret = mkdir_mondata_subdir(parent_kn, dom, r, prgrp);
2926  		if (ret)
2927  			return ret;
2928  	}
2929  
2930  	return 0;
2931  }
2932  
2933  /*
2934   * This creates a directory mon_data which contains the monitored data.
2935   *
2936   * mon_data has one directory for each domain which are named
2937   * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
2938   * with L3 domain looks as below:
2939   * ./mon_data:
2940   * mon_L3_00
2941   * mon_L3_01
2942   * mon_L3_02
2943   * ...
2944   *
2945   * Each domain directory has one file per event:
2946   * ./mon_L3_00/:
2947   * llc_occupancy
2948   *
2949   */
mkdir_mondata_all(struct kernfs_node * parent_kn,struct rdtgroup * prgrp,struct kernfs_node ** dest_kn)2950  static int mkdir_mondata_all(struct kernfs_node *parent_kn,
2951  			     struct rdtgroup *prgrp,
2952  			     struct kernfs_node **dest_kn)
2953  {
2954  	struct rdt_resource *r;
2955  	struct kernfs_node *kn;
2956  	int ret;
2957  
2958  	/*
2959  	 * Create the mon_data directory first.
2960  	 */
2961  	ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
2962  	if (ret)
2963  		return ret;
2964  
2965  	if (dest_kn)
2966  		*dest_kn = kn;
2967  
2968  	/*
2969  	 * Create the subdirectories for each domain. Note that all events
2970  	 * in a domain like L3 are grouped into a resource whose domain is L3
2971  	 */
2972  	for_each_mon_capable_rdt_resource(r) {
2973  		ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
2974  		if (ret)
2975  			goto out_destroy;
2976  	}
2977  
2978  	return 0;
2979  
2980  out_destroy:
2981  	kernfs_remove(kn);
2982  	return ret;
2983  }
2984  
2985  /**
2986   * cbm_ensure_valid - Enforce validity on provided CBM
2987   * @_val:	Candidate CBM
2988   * @r:		RDT resource to which the CBM belongs
2989   *
2990   * The provided CBM represents all cache portions available for use. This
2991   * may be represented by a bitmap that does not consist of contiguous ones
2992   * and thus be an invalid CBM.
2993   * Here the provided CBM is forced to be a valid CBM by only considering
2994   * the first set of contiguous bits as valid and clearing all bits.
2995   * The intention here is to provide a valid default CBM with which a new
2996   * resource group is initialized. The user can follow this with a
2997   * modification to the CBM if the default does not satisfy the
2998   * requirements.
2999   */
cbm_ensure_valid(u32 _val,struct rdt_resource * r)3000  static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
3001  {
3002  	unsigned int cbm_len = r->cache.cbm_len;
3003  	unsigned long first_bit, zero_bit;
3004  	unsigned long val = _val;
3005  
3006  	if (!val)
3007  		return 0;
3008  
3009  	first_bit = find_first_bit(&val, cbm_len);
3010  	zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
3011  
3012  	/* Clear any remaining bits to ensure contiguous region */
3013  	bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
3014  	return (u32)val;
3015  }
3016  
3017  /*
3018   * Initialize cache resources per RDT domain
3019   *
3020   * Set the RDT domain up to start off with all usable allocations. That is,
3021   * all shareable and unused bits. All-zero CBM is invalid.
3022   */
__init_one_rdt_domain(struct rdt_domain * d,struct resctrl_schema * s,u32 closid)3023  static int __init_one_rdt_domain(struct rdt_domain *d, struct resctrl_schema *s,
3024  				 u32 closid)
3025  {
3026  	enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
3027  	enum resctrl_conf_type t = s->conf_type;
3028  	struct resctrl_staged_config *cfg;
3029  	struct rdt_resource *r = s->res;
3030  	u32 used_b = 0, unused_b = 0;
3031  	unsigned long tmp_cbm;
3032  	enum rdtgrp_mode mode;
3033  	u32 peer_ctl, ctrl_val;
3034  	int i;
3035  
3036  	cfg = &d->staged_config[t];
3037  	cfg->have_new_ctrl = false;
3038  	cfg->new_ctrl = r->cache.shareable_bits;
3039  	used_b = r->cache.shareable_bits;
3040  	for (i = 0; i < closids_supported(); i++) {
3041  		if (closid_allocated(i) && i != closid) {
3042  			mode = rdtgroup_mode_by_closid(i);
3043  			if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
3044  				/*
3045  				 * ctrl values for locksetup aren't relevant
3046  				 * until the schemata is written, and the mode
3047  				 * becomes RDT_MODE_PSEUDO_LOCKED.
3048  				 */
3049  				continue;
3050  			/*
3051  			 * If CDP is active include peer domain's
3052  			 * usage to ensure there is no overlap
3053  			 * with an exclusive group.
3054  			 */
3055  			if (resctrl_arch_get_cdp_enabled(r->rid))
3056  				peer_ctl = resctrl_arch_get_config(r, d, i,
3057  								   peer_type);
3058  			else
3059  				peer_ctl = 0;
3060  			ctrl_val = resctrl_arch_get_config(r, d, i,
3061  							   s->conf_type);
3062  			used_b |= ctrl_val | peer_ctl;
3063  			if (mode == RDT_MODE_SHAREABLE)
3064  				cfg->new_ctrl |= ctrl_val | peer_ctl;
3065  		}
3066  	}
3067  	if (d->plr && d->plr->cbm > 0)
3068  		used_b |= d->plr->cbm;
3069  	unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
3070  	unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
3071  	cfg->new_ctrl |= unused_b;
3072  	/*
3073  	 * Force the initial CBM to be valid, user can
3074  	 * modify the CBM based on system availability.
3075  	 */
3076  	cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
3077  	/*
3078  	 * Assign the u32 CBM to an unsigned long to ensure that
3079  	 * bitmap_weight() does not access out-of-bound memory.
3080  	 */
3081  	tmp_cbm = cfg->new_ctrl;
3082  	if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
3083  		rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->id);
3084  		return -ENOSPC;
3085  	}
3086  	cfg->have_new_ctrl = true;
3087  
3088  	return 0;
3089  }
3090  
3091  /*
3092   * Initialize cache resources with default values.
3093   *
3094   * A new RDT group is being created on an allocation capable (CAT)
3095   * supporting system. Set this group up to start off with all usable
3096   * allocations.
3097   *
3098   * If there are no more shareable bits available on any domain then
3099   * the entire allocation will fail.
3100   */
rdtgroup_init_cat(struct resctrl_schema * s,u32 closid)3101  static int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
3102  {
3103  	struct rdt_domain *d;
3104  	int ret;
3105  
3106  	list_for_each_entry(d, &s->res->domains, list) {
3107  		ret = __init_one_rdt_domain(d, s, closid);
3108  		if (ret < 0)
3109  			return ret;
3110  	}
3111  
3112  	return 0;
3113  }
3114  
3115  /* Initialize MBA resource with default values. */
rdtgroup_init_mba(struct rdt_resource * r,u32 closid)3116  static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
3117  {
3118  	struct resctrl_staged_config *cfg;
3119  	struct rdt_domain *d;
3120  
3121  	list_for_each_entry(d, &r->domains, list) {
3122  		if (is_mba_sc(r)) {
3123  			d->mbps_val[closid] = MBA_MAX_MBPS;
3124  			continue;
3125  		}
3126  
3127  		cfg = &d->staged_config[CDP_NONE];
3128  		cfg->new_ctrl = r->default_ctrl;
3129  		cfg->have_new_ctrl = true;
3130  	}
3131  }
3132  
3133  /* Initialize the RDT group's allocations. */
rdtgroup_init_alloc(struct rdtgroup * rdtgrp)3134  static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
3135  {
3136  	struct resctrl_schema *s;
3137  	struct rdt_resource *r;
3138  	int ret = 0;
3139  
3140  	rdt_staged_configs_clear();
3141  
3142  	list_for_each_entry(s, &resctrl_schema_all, list) {
3143  		r = s->res;
3144  		if (r->rid == RDT_RESOURCE_MBA ||
3145  		    r->rid == RDT_RESOURCE_SMBA) {
3146  			rdtgroup_init_mba(r, rdtgrp->closid);
3147  			if (is_mba_sc(r))
3148  				continue;
3149  		} else {
3150  			ret = rdtgroup_init_cat(s, rdtgrp->closid);
3151  			if (ret < 0)
3152  				goto out;
3153  		}
3154  
3155  		ret = resctrl_arch_update_domains(r, rdtgrp->closid);
3156  		if (ret < 0) {
3157  			rdt_last_cmd_puts("Failed to initialize allocations\n");
3158  			goto out;
3159  		}
3160  
3161  	}
3162  
3163  	rdtgrp->mode = RDT_MODE_SHAREABLE;
3164  
3165  out:
3166  	rdt_staged_configs_clear();
3167  	return ret;
3168  }
3169  
mkdir_rdt_prepare(struct kernfs_node * parent_kn,const char * name,umode_t mode,enum rdt_group_type rtype,struct rdtgroup ** r)3170  static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
3171  			     const char *name, umode_t mode,
3172  			     enum rdt_group_type rtype, struct rdtgroup **r)
3173  {
3174  	struct rdtgroup *prdtgrp, *rdtgrp;
3175  	struct kernfs_node *kn;
3176  	uint files = 0;
3177  	int ret;
3178  
3179  	prdtgrp = rdtgroup_kn_lock_live(parent_kn);
3180  	if (!prdtgrp) {
3181  		ret = -ENODEV;
3182  		goto out_unlock;
3183  	}
3184  
3185  	if (rtype == RDTMON_GROUP &&
3186  	    (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3187  	     prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
3188  		ret = -EINVAL;
3189  		rdt_last_cmd_puts("Pseudo-locking in progress\n");
3190  		goto out_unlock;
3191  	}
3192  
3193  	/* allocate the rdtgroup. */
3194  	rdtgrp = kzalloc(sizeof(*rdtgrp), GFP_KERNEL);
3195  	if (!rdtgrp) {
3196  		ret = -ENOSPC;
3197  		rdt_last_cmd_puts("Kernel out of memory\n");
3198  		goto out_unlock;
3199  	}
3200  	*r = rdtgrp;
3201  	rdtgrp->mon.parent = prdtgrp;
3202  	rdtgrp->type = rtype;
3203  	INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
3204  
3205  	/* kernfs creates the directory for rdtgrp */
3206  	kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
3207  	if (IS_ERR(kn)) {
3208  		ret = PTR_ERR(kn);
3209  		rdt_last_cmd_puts("kernfs create error\n");
3210  		goto out_free_rgrp;
3211  	}
3212  	rdtgrp->kn = kn;
3213  
3214  	/*
3215  	 * kernfs_remove() will drop the reference count on "kn" which
3216  	 * will free it. But we still need it to stick around for the
3217  	 * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
3218  	 * which will be dropped by kernfs_put() in rdtgroup_remove().
3219  	 */
3220  	kernfs_get(kn);
3221  
3222  	ret = rdtgroup_kn_set_ugid(kn);
3223  	if (ret) {
3224  		rdt_last_cmd_puts("kernfs perm error\n");
3225  		goto out_destroy;
3226  	}
3227  
3228  	files = RFTYPE_BASE | BIT(RF_CTRLSHIFT + rtype);
3229  	ret = rdtgroup_add_files(kn, files);
3230  	if (ret) {
3231  		rdt_last_cmd_puts("kernfs fill error\n");
3232  		goto out_destroy;
3233  	}
3234  
3235  	if (rdt_mon_capable) {
3236  		ret = alloc_rmid();
3237  		if (ret < 0) {
3238  			rdt_last_cmd_puts("Out of RMIDs\n");
3239  			goto out_destroy;
3240  		}
3241  		rdtgrp->mon.rmid = ret;
3242  
3243  		ret = mkdir_mondata_all(kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
3244  		if (ret) {
3245  			rdt_last_cmd_puts("kernfs subdir error\n");
3246  			goto out_idfree;
3247  		}
3248  	}
3249  	kernfs_activate(kn);
3250  
3251  	/*
3252  	 * The caller unlocks the parent_kn upon success.
3253  	 */
3254  	return 0;
3255  
3256  out_idfree:
3257  	free_rmid(rdtgrp->mon.rmid);
3258  out_destroy:
3259  	kernfs_put(rdtgrp->kn);
3260  	kernfs_remove(rdtgrp->kn);
3261  out_free_rgrp:
3262  	kfree(rdtgrp);
3263  out_unlock:
3264  	rdtgroup_kn_unlock(parent_kn);
3265  	return ret;
3266  }
3267  
mkdir_rdt_prepare_clean(struct rdtgroup * rgrp)3268  static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
3269  {
3270  	kernfs_remove(rgrp->kn);
3271  	free_rmid(rgrp->mon.rmid);
3272  	rdtgroup_remove(rgrp);
3273  }
3274  
3275  /*
3276   * Create a monitor group under "mon_groups" directory of a control
3277   * and monitor group(ctrl_mon). This is a resource group
3278   * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
3279   */
rdtgroup_mkdir_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3280  static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
3281  			      const char *name, umode_t mode)
3282  {
3283  	struct rdtgroup *rdtgrp, *prgrp;
3284  	int ret;
3285  
3286  	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
3287  	if (ret)
3288  		return ret;
3289  
3290  	prgrp = rdtgrp->mon.parent;
3291  	rdtgrp->closid = prgrp->closid;
3292  
3293  	/*
3294  	 * Add the rdtgrp to the list of rdtgrps the parent
3295  	 * ctrl_mon group has to track.
3296  	 */
3297  	list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
3298  
3299  	rdtgroup_kn_unlock(parent_kn);
3300  	return ret;
3301  }
3302  
3303  /*
3304   * These are rdtgroups created under the root directory. Can be used
3305   * to allocate and monitor resources.
3306   */
rdtgroup_mkdir_ctrl_mon(struct kernfs_node * parent_kn,const char * name,umode_t mode)3307  static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
3308  				   const char *name, umode_t mode)
3309  {
3310  	struct rdtgroup *rdtgrp;
3311  	struct kernfs_node *kn;
3312  	u32 closid;
3313  	int ret;
3314  
3315  	ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
3316  	if (ret)
3317  		return ret;
3318  
3319  	kn = rdtgrp->kn;
3320  	ret = closid_alloc();
3321  	if (ret < 0) {
3322  		rdt_last_cmd_puts("Out of CLOSIDs\n");
3323  		goto out_common_fail;
3324  	}
3325  	closid = ret;
3326  	ret = 0;
3327  
3328  	rdtgrp->closid = closid;
3329  	ret = rdtgroup_init_alloc(rdtgrp);
3330  	if (ret < 0)
3331  		goto out_id_free;
3332  
3333  	list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
3334  
3335  	if (rdt_mon_capable) {
3336  		/*
3337  		 * Create an empty mon_groups directory to hold the subset
3338  		 * of tasks and cpus to monitor.
3339  		 */
3340  		ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
3341  		if (ret) {
3342  			rdt_last_cmd_puts("kernfs subdir error\n");
3343  			goto out_del_list;
3344  		}
3345  	}
3346  
3347  	goto out_unlock;
3348  
3349  out_del_list:
3350  	list_del(&rdtgrp->rdtgroup_list);
3351  out_id_free:
3352  	closid_free(closid);
3353  out_common_fail:
3354  	mkdir_rdt_prepare_clean(rdtgrp);
3355  out_unlock:
3356  	rdtgroup_kn_unlock(parent_kn);
3357  	return ret;
3358  }
3359  
3360  /*
3361   * We allow creating mon groups only with in a directory called "mon_groups"
3362   * which is present in every ctrl_mon group. Check if this is a valid
3363   * "mon_groups" directory.
3364   *
3365   * 1. The directory should be named "mon_groups".
3366   * 2. The mon group itself should "not" be named "mon_groups".
3367   *   This makes sure "mon_groups" directory always has a ctrl_mon group
3368   *   as parent.
3369   */
is_mon_groups(struct kernfs_node * kn,const char * name)3370  static bool is_mon_groups(struct kernfs_node *kn, const char *name)
3371  {
3372  	return (!strcmp(kn->name, "mon_groups") &&
3373  		strcmp(name, "mon_groups"));
3374  }
3375  
rdtgroup_mkdir(struct kernfs_node * parent_kn,const char * name,umode_t mode)3376  static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
3377  			  umode_t mode)
3378  {
3379  	/* Do not accept '\n' to avoid unparsable situation. */
3380  	if (strchr(name, '\n'))
3381  		return -EINVAL;
3382  
3383  	/*
3384  	 * If the parent directory is the root directory and RDT
3385  	 * allocation is supported, add a control and monitoring
3386  	 * subdirectory
3387  	 */
3388  	if (rdt_alloc_capable && parent_kn == rdtgroup_default.kn)
3389  		return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
3390  
3391  	/*
3392  	 * If RDT monitoring is supported and the parent directory is a valid
3393  	 * "mon_groups" directory, add a monitoring subdirectory.
3394  	 */
3395  	if (rdt_mon_capable && is_mon_groups(parent_kn, name))
3396  		return rdtgroup_mkdir_mon(parent_kn, name, mode);
3397  
3398  	return -EPERM;
3399  }
3400  
rdtgroup_rmdir_mon(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3401  static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3402  {
3403  	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3404  	int cpu;
3405  
3406  	/* Give any tasks back to the parent group */
3407  	rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
3408  
3409  	/* Update per cpu rmid of the moved CPUs first */
3410  	for_each_cpu(cpu, &rdtgrp->cpu_mask)
3411  		per_cpu(pqr_state.default_rmid, cpu) = prdtgrp->mon.rmid;
3412  	/*
3413  	 * Update the MSR on moved CPUs and CPUs which have moved
3414  	 * task running on them.
3415  	 */
3416  	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3417  	update_closid_rmid(tmpmask, NULL);
3418  
3419  	rdtgrp->flags = RDT_DELETED;
3420  	free_rmid(rdtgrp->mon.rmid);
3421  
3422  	/*
3423  	 * Remove the rdtgrp from the parent ctrl_mon group's list
3424  	 */
3425  	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3426  	list_del(&rdtgrp->mon.crdtgrp_list);
3427  
3428  	kernfs_remove(rdtgrp->kn);
3429  
3430  	return 0;
3431  }
3432  
rdtgroup_ctrl_remove(struct rdtgroup * rdtgrp)3433  static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
3434  {
3435  	rdtgrp->flags = RDT_DELETED;
3436  	list_del(&rdtgrp->rdtgroup_list);
3437  
3438  	kernfs_remove(rdtgrp->kn);
3439  	return 0;
3440  }
3441  
rdtgroup_rmdir_ctrl(struct rdtgroup * rdtgrp,cpumask_var_t tmpmask)3442  static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
3443  {
3444  	int cpu;
3445  
3446  	/* Give any tasks back to the default group */
3447  	rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
3448  
3449  	/* Give any CPUs back to the default group */
3450  	cpumask_or(&rdtgroup_default.cpu_mask,
3451  		   &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
3452  
3453  	/* Update per cpu closid and rmid of the moved CPUs first */
3454  	for_each_cpu(cpu, &rdtgrp->cpu_mask) {
3455  		per_cpu(pqr_state.default_closid, cpu) = rdtgroup_default.closid;
3456  		per_cpu(pqr_state.default_rmid, cpu) = rdtgroup_default.mon.rmid;
3457  	}
3458  
3459  	/*
3460  	 * Update the MSR on moved CPUs and CPUs which have moved
3461  	 * task running on them.
3462  	 */
3463  	cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
3464  	update_closid_rmid(tmpmask, NULL);
3465  
3466  	closid_free(rdtgrp->closid);
3467  	free_rmid(rdtgrp->mon.rmid);
3468  
3469  	rdtgroup_ctrl_remove(rdtgrp);
3470  
3471  	/*
3472  	 * Free all the child monitor group rmids.
3473  	 */
3474  	free_all_child_rdtgrp(rdtgrp);
3475  
3476  	return 0;
3477  }
3478  
rdtgroup_rmdir(struct kernfs_node * kn)3479  static int rdtgroup_rmdir(struct kernfs_node *kn)
3480  {
3481  	struct kernfs_node *parent_kn = kn->parent;
3482  	struct rdtgroup *rdtgrp;
3483  	cpumask_var_t tmpmask;
3484  	int ret = 0;
3485  
3486  	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
3487  		return -ENOMEM;
3488  
3489  	rdtgrp = rdtgroup_kn_lock_live(kn);
3490  	if (!rdtgrp) {
3491  		ret = -EPERM;
3492  		goto out;
3493  	}
3494  
3495  	/*
3496  	 * If the rdtgroup is a ctrl_mon group and parent directory
3497  	 * is the root directory, remove the ctrl_mon group.
3498  	 *
3499  	 * If the rdtgroup is a mon group and parent directory
3500  	 * is a valid "mon_groups" directory, remove the mon group.
3501  	 */
3502  	if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
3503  	    rdtgrp != &rdtgroup_default) {
3504  		if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
3505  		    rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
3506  			ret = rdtgroup_ctrl_remove(rdtgrp);
3507  		} else {
3508  			ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
3509  		}
3510  	} else if (rdtgrp->type == RDTMON_GROUP &&
3511  		 is_mon_groups(parent_kn, kn->name)) {
3512  		ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
3513  	} else {
3514  		ret = -EPERM;
3515  	}
3516  
3517  out:
3518  	rdtgroup_kn_unlock(kn);
3519  	free_cpumask_var(tmpmask);
3520  	return ret;
3521  }
3522  
3523  /**
3524   * mongrp_reparent() - replace parent CTRL_MON group of a MON group
3525   * @rdtgrp:		the MON group whose parent should be replaced
3526   * @new_prdtgrp:	replacement parent CTRL_MON group for @rdtgrp
3527   * @cpus:		cpumask provided by the caller for use during this call
3528   *
3529   * Replaces the parent CTRL_MON group for a MON group, resulting in all member
3530   * tasks' CLOSID immediately changing to that of the new parent group.
3531   * Monitoring data for the group is unaffected by this operation.
3532   */
mongrp_reparent(struct rdtgroup * rdtgrp,struct rdtgroup * new_prdtgrp,cpumask_var_t cpus)3533  static void mongrp_reparent(struct rdtgroup *rdtgrp,
3534  			    struct rdtgroup *new_prdtgrp,
3535  			    cpumask_var_t cpus)
3536  {
3537  	struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
3538  
3539  	WARN_ON(rdtgrp->type != RDTMON_GROUP);
3540  	WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
3541  
3542  	/* Nothing to do when simply renaming a MON group. */
3543  	if (prdtgrp == new_prdtgrp)
3544  		return;
3545  
3546  	WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
3547  	list_move_tail(&rdtgrp->mon.crdtgrp_list,
3548  		       &new_prdtgrp->mon.crdtgrp_list);
3549  
3550  	rdtgrp->mon.parent = new_prdtgrp;
3551  	rdtgrp->closid = new_prdtgrp->closid;
3552  
3553  	/* Propagate updated closid to all tasks in this group. */
3554  	rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
3555  
3556  	update_closid_rmid(cpus, NULL);
3557  }
3558  
rdtgroup_rename(struct kernfs_node * kn,struct kernfs_node * new_parent,const char * new_name)3559  static int rdtgroup_rename(struct kernfs_node *kn,
3560  			   struct kernfs_node *new_parent, const char *new_name)
3561  {
3562  	struct rdtgroup *new_prdtgrp;
3563  	struct rdtgroup *rdtgrp;
3564  	cpumask_var_t tmpmask;
3565  	int ret;
3566  
3567  	rdtgrp = kernfs_to_rdtgroup(kn);
3568  	new_prdtgrp = kernfs_to_rdtgroup(new_parent);
3569  	if (!rdtgrp || !new_prdtgrp)
3570  		return -ENOENT;
3571  
3572  	/* Release both kernfs active_refs before obtaining rdtgroup mutex. */
3573  	rdtgroup_kn_get(rdtgrp, kn);
3574  	rdtgroup_kn_get(new_prdtgrp, new_parent);
3575  
3576  	mutex_lock(&rdtgroup_mutex);
3577  
3578  	rdt_last_cmd_clear();
3579  
3580  	/*
3581  	 * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
3582  	 * either kernfs_node is a file.
3583  	 */
3584  	if (kernfs_type(kn) != KERNFS_DIR ||
3585  	    kernfs_type(new_parent) != KERNFS_DIR) {
3586  		rdt_last_cmd_puts("Source and destination must be directories");
3587  		ret = -EPERM;
3588  		goto out;
3589  	}
3590  
3591  	if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
3592  		ret = -ENOENT;
3593  		goto out;
3594  	}
3595  
3596  	if (rdtgrp->type != RDTMON_GROUP || !kn->parent ||
3597  	    !is_mon_groups(kn->parent, kn->name)) {
3598  		rdt_last_cmd_puts("Source must be a MON group\n");
3599  		ret = -EPERM;
3600  		goto out;
3601  	}
3602  
3603  	if (!is_mon_groups(new_parent, new_name)) {
3604  		rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
3605  		ret = -EPERM;
3606  		goto out;
3607  	}
3608  
3609  	/*
3610  	 * If the MON group is monitoring CPUs, the CPUs must be assigned to the
3611  	 * current parent CTRL_MON group and therefore cannot be assigned to
3612  	 * the new parent, making the move illegal.
3613  	 */
3614  	if (!cpumask_empty(&rdtgrp->cpu_mask) &&
3615  	    rdtgrp->mon.parent != new_prdtgrp) {
3616  		rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
3617  		ret = -EPERM;
3618  		goto out;
3619  	}
3620  
3621  	/*
3622  	 * Allocate the cpumask for use in mongrp_reparent() to avoid the
3623  	 * possibility of failing to allocate it after kernfs_rename() has
3624  	 * succeeded.
3625  	 */
3626  	if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
3627  		ret = -ENOMEM;
3628  		goto out;
3629  	}
3630  
3631  	/*
3632  	 * Perform all input validation and allocations needed to ensure
3633  	 * mongrp_reparent() will succeed before calling kernfs_rename(),
3634  	 * otherwise it would be necessary to revert this call if
3635  	 * mongrp_reparent() failed.
3636  	 */
3637  	ret = kernfs_rename(kn, new_parent, new_name);
3638  	if (!ret)
3639  		mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
3640  
3641  	free_cpumask_var(tmpmask);
3642  
3643  out:
3644  	mutex_unlock(&rdtgroup_mutex);
3645  	rdtgroup_kn_put(rdtgrp, kn);
3646  	rdtgroup_kn_put(new_prdtgrp, new_parent);
3647  	return ret;
3648  }
3649  
rdtgroup_show_options(struct seq_file * seq,struct kernfs_root * kf)3650  static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
3651  {
3652  	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
3653  		seq_puts(seq, ",cdp");
3654  
3655  	if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
3656  		seq_puts(seq, ",cdpl2");
3657  
3658  	if (is_mba_sc(&rdt_resources_all[RDT_RESOURCE_MBA].r_resctrl))
3659  		seq_puts(seq, ",mba_MBps");
3660  
3661  	return 0;
3662  }
3663  
3664  static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
3665  	.mkdir		= rdtgroup_mkdir,
3666  	.rmdir		= rdtgroup_rmdir,
3667  	.rename		= rdtgroup_rename,
3668  	.show_options	= rdtgroup_show_options,
3669  };
3670  
rdtgroup_setup_root(void)3671  static int __init rdtgroup_setup_root(void)
3672  {
3673  	int ret;
3674  
3675  	rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
3676  				      KERNFS_ROOT_CREATE_DEACTIVATED |
3677  				      KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
3678  				      &rdtgroup_default);
3679  	if (IS_ERR(rdt_root))
3680  		return PTR_ERR(rdt_root);
3681  
3682  	mutex_lock(&rdtgroup_mutex);
3683  
3684  	rdtgroup_default.closid = 0;
3685  	rdtgroup_default.mon.rmid = 0;
3686  	rdtgroup_default.type = RDTCTRL_GROUP;
3687  	INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
3688  
3689  	list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
3690  
3691  	ret = rdtgroup_add_files(kernfs_root_to_node(rdt_root), RF_CTRL_BASE);
3692  	if (ret) {
3693  		kernfs_destroy_root(rdt_root);
3694  		goto out;
3695  	}
3696  
3697  	rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
3698  	kernfs_activate(rdtgroup_default.kn);
3699  
3700  out:
3701  	mutex_unlock(&rdtgroup_mutex);
3702  
3703  	return ret;
3704  }
3705  
domain_destroy_mon_state(struct rdt_domain * d)3706  static void domain_destroy_mon_state(struct rdt_domain *d)
3707  {
3708  	bitmap_free(d->rmid_busy_llc);
3709  	kfree(d->mbm_total);
3710  	kfree(d->mbm_local);
3711  }
3712  
resctrl_offline_domain(struct rdt_resource * r,struct rdt_domain * d)3713  void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d)
3714  {
3715  	lockdep_assert_held(&rdtgroup_mutex);
3716  
3717  	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
3718  		mba_sc_domain_destroy(r, d);
3719  
3720  	if (!r->mon_capable)
3721  		return;
3722  
3723  	/*
3724  	 * If resctrl is mounted, remove all the
3725  	 * per domain monitor data directories.
3726  	 */
3727  	if (static_branch_unlikely(&rdt_mon_enable_key))
3728  		rmdir_mondata_subdir_allrdtgrp(r, d->id);
3729  
3730  	if (is_mbm_enabled())
3731  		cancel_delayed_work(&d->mbm_over);
3732  	if (is_llc_occupancy_enabled() && has_busy_rmid(r, d)) {
3733  		/*
3734  		 * When a package is going down, forcefully
3735  		 * decrement rmid->ebusy. There is no way to know
3736  		 * that the L3 was flushed and hence may lead to
3737  		 * incorrect counts in rare scenarios, but leaving
3738  		 * the RMID as busy creates RMID leaks if the
3739  		 * package never comes back.
3740  		 */
3741  		__check_limbo(d, true);
3742  		cancel_delayed_work(&d->cqm_limbo);
3743  	}
3744  
3745  	domain_destroy_mon_state(d);
3746  }
3747  
domain_setup_mon_state(struct rdt_resource * r,struct rdt_domain * d)3748  static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_domain *d)
3749  {
3750  	size_t tsize;
3751  
3752  	if (is_llc_occupancy_enabled()) {
3753  		d->rmid_busy_llc = bitmap_zalloc(r->num_rmid, GFP_KERNEL);
3754  		if (!d->rmid_busy_llc)
3755  			return -ENOMEM;
3756  	}
3757  	if (is_mbm_total_enabled()) {
3758  		tsize = sizeof(*d->mbm_total);
3759  		d->mbm_total = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
3760  		if (!d->mbm_total) {
3761  			bitmap_free(d->rmid_busy_llc);
3762  			return -ENOMEM;
3763  		}
3764  	}
3765  	if (is_mbm_local_enabled()) {
3766  		tsize = sizeof(*d->mbm_local);
3767  		d->mbm_local = kcalloc(r->num_rmid, tsize, GFP_KERNEL);
3768  		if (!d->mbm_local) {
3769  			bitmap_free(d->rmid_busy_llc);
3770  			kfree(d->mbm_total);
3771  			return -ENOMEM;
3772  		}
3773  	}
3774  
3775  	return 0;
3776  }
3777  
resctrl_online_domain(struct rdt_resource * r,struct rdt_domain * d)3778  int resctrl_online_domain(struct rdt_resource *r, struct rdt_domain *d)
3779  {
3780  	int err;
3781  
3782  	lockdep_assert_held(&rdtgroup_mutex);
3783  
3784  	if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
3785  		/* RDT_RESOURCE_MBA is never mon_capable */
3786  		return mba_sc_domain_allocate(r, d);
3787  
3788  	if (!r->mon_capable)
3789  		return 0;
3790  
3791  	err = domain_setup_mon_state(r, d);
3792  	if (err)
3793  		return err;
3794  
3795  	if (is_mbm_enabled()) {
3796  		INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
3797  		mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL);
3798  	}
3799  
3800  	if (is_llc_occupancy_enabled())
3801  		INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
3802  
3803  	/* If resctrl is mounted, add per domain monitor data directories. */
3804  	if (static_branch_unlikely(&rdt_mon_enable_key))
3805  		mkdir_mondata_subdir_allrdtgrp(r, d);
3806  
3807  	return 0;
3808  }
3809  
3810  /*
3811   * rdtgroup_init - rdtgroup initialization
3812   *
3813   * Setup resctrl file system including set up root, create mount point,
3814   * register rdtgroup filesystem, and initialize files under root directory.
3815   *
3816   * Return: 0 on success or -errno
3817   */
rdtgroup_init(void)3818  int __init rdtgroup_init(void)
3819  {
3820  	int ret = 0;
3821  
3822  	seq_buf_init(&last_cmd_status, last_cmd_status_buf,
3823  		     sizeof(last_cmd_status_buf));
3824  
3825  	ret = rdtgroup_setup_root();
3826  	if (ret)
3827  		return ret;
3828  
3829  	ret = sysfs_create_mount_point(fs_kobj, "resctrl");
3830  	if (ret)
3831  		goto cleanup_root;
3832  
3833  	ret = register_filesystem(&rdt_fs_type);
3834  	if (ret)
3835  		goto cleanup_mountpoint;
3836  
3837  	/*
3838  	 * Adding the resctrl debugfs directory here may not be ideal since
3839  	 * it would let the resctrl debugfs directory appear on the debugfs
3840  	 * filesystem before the resctrl filesystem is mounted.
3841  	 * It may also be ok since that would enable debugging of RDT before
3842  	 * resctrl is mounted.
3843  	 * The reason why the debugfs directory is created here and not in
3844  	 * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
3845  	 * during the debugfs directory creation also &sb->s_type->i_mutex_key
3846  	 * (the lockdep class of inode->i_rwsem). Other filesystem
3847  	 * interactions (eg. SyS_getdents) have the lock ordering:
3848  	 * &sb->s_type->i_mutex_key --> &mm->mmap_lock
3849  	 * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
3850  	 * is taken, thus creating dependency:
3851  	 * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
3852  	 * issues considering the other two lock dependencies.
3853  	 * By creating the debugfs directory here we avoid a dependency
3854  	 * that may cause deadlock (even though file operations cannot
3855  	 * occur until the filesystem is mounted, but I do not know how to
3856  	 * tell lockdep that).
3857  	 */
3858  	debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
3859  
3860  	return 0;
3861  
3862  cleanup_mountpoint:
3863  	sysfs_remove_mount_point(fs_kobj, "resctrl");
3864  cleanup_root:
3865  	kernfs_destroy_root(rdt_root);
3866  
3867  	return ret;
3868  }
3869  
rdtgroup_exit(void)3870  void __exit rdtgroup_exit(void)
3871  {
3872  	debugfs_remove_recursive(debugfs_resctrl);
3873  	unregister_filesystem(&rdt_fs_type);
3874  	sysfs_remove_mount_point(fs_kobj, "resctrl");
3875  	kernfs_destroy_root(rdt_root);
3876  }
3877