xref: /openbmc/linux/kernel/cgroup/rdma.c (revision 7896dfb0)
139d3e758SParav Pandit /*
239d3e758SParav Pandit  * RDMA resource limiting controller for cgroups.
339d3e758SParav Pandit  *
439d3e758SParav Pandit  * Used to allow a cgroup hierarchy to stop processes from consuming
539d3e758SParav Pandit  * additional RDMA resources after a certain limit is reached.
639d3e758SParav Pandit  *
739d3e758SParav Pandit  * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
839d3e758SParav Pandit  *
939d3e758SParav Pandit  * This file is subject to the terms and conditions of version 2 of the GNU
1039d3e758SParav Pandit  * General Public License. See the file COPYING in the main directory of the
1139d3e758SParav Pandit  * Linux distribution for more details.
1239d3e758SParav Pandit  */
1339d3e758SParav Pandit 
1439d3e758SParav Pandit #include <linux/bitops.h>
1539d3e758SParav Pandit #include <linux/slab.h>
1639d3e758SParav Pandit #include <linux/seq_file.h>
1739d3e758SParav Pandit #include <linux/cgroup.h>
1839d3e758SParav Pandit #include <linux/parser.h>
1939d3e758SParav Pandit #include <linux/cgroup_rdma.h>
2039d3e758SParav Pandit 
2139d3e758SParav Pandit #define RDMACG_MAX_STR "max"
2239d3e758SParav Pandit 
2339d3e758SParav Pandit /*
2439d3e758SParav Pandit  * Protects list of resource pools maintained on per cgroup basis
2539d3e758SParav Pandit  * and rdma device list.
2639d3e758SParav Pandit  */
2739d3e758SParav Pandit static DEFINE_MUTEX(rdmacg_mutex);
2839d3e758SParav Pandit static LIST_HEAD(rdmacg_devices);
2939d3e758SParav Pandit 
3039d3e758SParav Pandit enum rdmacg_file_type {
3139d3e758SParav Pandit 	RDMACG_RESOURCE_TYPE_MAX,
3239d3e758SParav Pandit 	RDMACG_RESOURCE_TYPE_STAT,
3339d3e758SParav Pandit };
3439d3e758SParav Pandit 
3539d3e758SParav Pandit /*
3639d3e758SParav Pandit  * resource table definition as to be seen by the user.
3739d3e758SParav Pandit  * Need to add entries to it when more resources are
3839d3e758SParav Pandit  * added/defined at IB verb/core layer.
3939d3e758SParav Pandit  */
4039d3e758SParav Pandit static char const *rdmacg_resource_names[] = {
4139d3e758SParav Pandit 	[RDMACG_RESOURCE_HCA_HANDLE]	= "hca_handle",
4239d3e758SParav Pandit 	[RDMACG_RESOURCE_HCA_OBJECT]	= "hca_object",
4339d3e758SParav Pandit };
4439d3e758SParav Pandit 
4539d3e758SParav Pandit /* resource tracker for each resource of rdma cgroup */
4639d3e758SParav Pandit struct rdmacg_resource {
4739d3e758SParav Pandit 	int max;
4839d3e758SParav Pandit 	int usage;
4939d3e758SParav Pandit };
5039d3e758SParav Pandit 
5139d3e758SParav Pandit /*
5239d3e758SParav Pandit  * resource pool object which represents per cgroup, per device
5339d3e758SParav Pandit  * resources. There are multiple instances of this object per cgroup,
5439d3e758SParav Pandit  * therefore it cannot be embedded within rdma_cgroup structure. It
5539d3e758SParav Pandit  * is maintained as list.
5639d3e758SParav Pandit  */
5739d3e758SParav Pandit struct rdmacg_resource_pool {
5839d3e758SParav Pandit 	struct rdmacg_device	*device;
5939d3e758SParav Pandit 	struct rdmacg_resource	resources[RDMACG_RESOURCE_MAX];
6039d3e758SParav Pandit 
6139d3e758SParav Pandit 	struct list_head	cg_node;
6239d3e758SParav Pandit 	struct list_head	dev_node;
6339d3e758SParav Pandit 
6439d3e758SParav Pandit 	/* count active user tasks of this pool */
6539d3e758SParav Pandit 	u64			usage_sum;
6639d3e758SParav Pandit 	/* total number counts which are set to max */
6739d3e758SParav Pandit 	int			num_max_cnt;
6839d3e758SParav Pandit };
6939d3e758SParav Pandit 
7039d3e758SParav Pandit static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
7139d3e758SParav Pandit {
7239d3e758SParav Pandit 	return container_of(css, struct rdma_cgroup, css);
7339d3e758SParav Pandit }
7439d3e758SParav Pandit 
7539d3e758SParav Pandit static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
7639d3e758SParav Pandit {
7739d3e758SParav Pandit 	return css_rdmacg(cg->css.parent);
7839d3e758SParav Pandit }
7939d3e758SParav Pandit 
8039d3e758SParav Pandit static inline struct rdma_cgroup *get_current_rdmacg(void)
8139d3e758SParav Pandit {
8239d3e758SParav Pandit 	return css_rdmacg(task_get_css(current, rdma_cgrp_id));
8339d3e758SParav Pandit }
8439d3e758SParav Pandit 
8539d3e758SParav Pandit static void set_resource_limit(struct rdmacg_resource_pool *rpool,
8639d3e758SParav Pandit 			       int index, int new_max)
8739d3e758SParav Pandit {
8839d3e758SParav Pandit 	if (new_max == S32_MAX) {
8939d3e758SParav Pandit 		if (rpool->resources[index].max != S32_MAX)
9039d3e758SParav Pandit 			rpool->num_max_cnt++;
9139d3e758SParav Pandit 	} else {
9239d3e758SParav Pandit 		if (rpool->resources[index].max == S32_MAX)
9339d3e758SParav Pandit 			rpool->num_max_cnt--;
9439d3e758SParav Pandit 	}
9539d3e758SParav Pandit 	rpool->resources[index].max = new_max;
9639d3e758SParav Pandit }
9739d3e758SParav Pandit 
9839d3e758SParav Pandit static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
9939d3e758SParav Pandit {
10039d3e758SParav Pandit 	int i;
10139d3e758SParav Pandit 
10239d3e758SParav Pandit 	for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
10339d3e758SParav Pandit 		set_resource_limit(rpool, i, S32_MAX);
10439d3e758SParav Pandit }
10539d3e758SParav Pandit 
10639d3e758SParav Pandit static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
10739d3e758SParav Pandit {
10839d3e758SParav Pandit 	lockdep_assert_held(&rdmacg_mutex);
10939d3e758SParav Pandit 
11039d3e758SParav Pandit 	list_del(&rpool->cg_node);
11139d3e758SParav Pandit 	list_del(&rpool->dev_node);
11239d3e758SParav Pandit 	kfree(rpool);
11339d3e758SParav Pandit }
11439d3e758SParav Pandit 
11539d3e758SParav Pandit static struct rdmacg_resource_pool *
11639d3e758SParav Pandit find_cg_rpool_locked(struct rdma_cgroup *cg,
11739d3e758SParav Pandit 		     struct rdmacg_device *device)
11839d3e758SParav Pandit 
11939d3e758SParav Pandit {
12039d3e758SParav Pandit 	struct rdmacg_resource_pool *pool;
12139d3e758SParav Pandit 
12239d3e758SParav Pandit 	lockdep_assert_held(&rdmacg_mutex);
12339d3e758SParav Pandit 
12439d3e758SParav Pandit 	list_for_each_entry(pool, &cg->rpools, cg_node)
12539d3e758SParav Pandit 		if (pool->device == device)
12639d3e758SParav Pandit 			return pool;
12739d3e758SParav Pandit 
12839d3e758SParav Pandit 	return NULL;
12939d3e758SParav Pandit }
13039d3e758SParav Pandit 
13139d3e758SParav Pandit static struct rdmacg_resource_pool *
13239d3e758SParav Pandit get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
13339d3e758SParav Pandit {
13439d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
13539d3e758SParav Pandit 
13639d3e758SParav Pandit 	rpool = find_cg_rpool_locked(cg, device);
13739d3e758SParav Pandit 	if (rpool)
13839d3e758SParav Pandit 		return rpool;
13939d3e758SParav Pandit 
14039d3e758SParav Pandit 	rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
14139d3e758SParav Pandit 	if (!rpool)
14239d3e758SParav Pandit 		return ERR_PTR(-ENOMEM);
14339d3e758SParav Pandit 
14439d3e758SParav Pandit 	rpool->device = device;
14539d3e758SParav Pandit 	set_all_resource_max_limit(rpool);
14639d3e758SParav Pandit 
14739d3e758SParav Pandit 	INIT_LIST_HEAD(&rpool->cg_node);
14839d3e758SParav Pandit 	INIT_LIST_HEAD(&rpool->dev_node);
14939d3e758SParav Pandit 	list_add_tail(&rpool->cg_node, &cg->rpools);
15039d3e758SParav Pandit 	list_add_tail(&rpool->dev_node, &device->rpools);
15139d3e758SParav Pandit 	return rpool;
15239d3e758SParav Pandit }
15339d3e758SParav Pandit 
15439d3e758SParav Pandit /**
15539d3e758SParav Pandit  * uncharge_cg_locked - uncharge resource for rdma cgroup
15639d3e758SParav Pandit  * @cg: pointer to cg to uncharge and all parents in hierarchy
15739d3e758SParav Pandit  * @device: pointer to rdmacg device
15839d3e758SParav Pandit  * @index: index of the resource to uncharge in cg (resource pool)
15939d3e758SParav Pandit  *
16039d3e758SParav Pandit  * It also frees the resource pool which was created as part of
16139d3e758SParav Pandit  * charging operation when there are no resources attached to
16239d3e758SParav Pandit  * resource pool.
16339d3e758SParav Pandit  */
16439d3e758SParav Pandit static void
16539d3e758SParav Pandit uncharge_cg_locked(struct rdma_cgroup *cg,
16639d3e758SParav Pandit 		   struct rdmacg_device *device,
16739d3e758SParav Pandit 		   enum rdmacg_resource_type index)
16839d3e758SParav Pandit {
16939d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
17039d3e758SParav Pandit 
17139d3e758SParav Pandit 	rpool = find_cg_rpool_locked(cg, device);
17239d3e758SParav Pandit 
17339d3e758SParav Pandit 	/*
17439d3e758SParav Pandit 	 * rpool cannot be null at this stage. Let kernel operate in case
17539d3e758SParav Pandit 	 * if there a bug in IB stack or rdma controller, instead of crashing
17639d3e758SParav Pandit 	 * the system.
17739d3e758SParav Pandit 	 */
17839d3e758SParav Pandit 	if (unlikely(!rpool)) {
17939d3e758SParav Pandit 		pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
18039d3e758SParav Pandit 		return;
18139d3e758SParav Pandit 	}
18239d3e758SParav Pandit 
18339d3e758SParav Pandit 	rpool->resources[index].usage--;
18439d3e758SParav Pandit 
18539d3e758SParav Pandit 	/*
18639d3e758SParav Pandit 	 * A negative count (or overflow) is invalid,
18739d3e758SParav Pandit 	 * it indicates a bug in the rdma controller.
18839d3e758SParav Pandit 	 */
18939d3e758SParav Pandit 	WARN_ON_ONCE(rpool->resources[index].usage < 0);
19039d3e758SParav Pandit 	rpool->usage_sum--;
19139d3e758SParav Pandit 	if (rpool->usage_sum == 0 &&
19239d3e758SParav Pandit 	    rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
19339d3e758SParav Pandit 		/*
19439d3e758SParav Pandit 		 * No user of the rpool and all entries are set to max, so
19539d3e758SParav Pandit 		 * safe to delete this rpool.
19639d3e758SParav Pandit 		 */
19739d3e758SParav Pandit 		free_cg_rpool_locked(rpool);
19839d3e758SParav Pandit 	}
19939d3e758SParav Pandit }
20039d3e758SParav Pandit 
20139d3e758SParav Pandit /**
20239d3e758SParav Pandit  * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count
20339d3e758SParav Pandit  * @device: pointer to rdmacg device
20439d3e758SParav Pandit  * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup
20539d3e758SParav Pandit  *           stop uncharging
20639d3e758SParav Pandit  * @index: index of the resource to uncharge in cg in given resource pool
20739d3e758SParav Pandit  */
20839d3e758SParav Pandit static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
20939d3e758SParav Pandit 				     struct rdmacg_device *device,
21039d3e758SParav Pandit 				     struct rdma_cgroup *stop_cg,
21139d3e758SParav Pandit 				     enum rdmacg_resource_type index)
21239d3e758SParav Pandit {
21339d3e758SParav Pandit 	struct rdma_cgroup *p;
21439d3e758SParav Pandit 
21539d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
21639d3e758SParav Pandit 
21739d3e758SParav Pandit 	for (p = cg; p != stop_cg; p = parent_rdmacg(p))
21839d3e758SParav Pandit 		uncharge_cg_locked(p, device, index);
21939d3e758SParav Pandit 
22039d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
22139d3e758SParav Pandit 
22239d3e758SParav Pandit 	css_put(&cg->css);
22339d3e758SParav Pandit }
22439d3e758SParav Pandit 
22539d3e758SParav Pandit /**
22639d3e758SParav Pandit  * rdmacg_uncharge - hierarchically uncharge rdma resource count
22739d3e758SParav Pandit  * @device: pointer to rdmacg device
22839d3e758SParav Pandit  * @index: index of the resource to uncharge in cgroup in given resource pool
22939d3e758SParav Pandit  */
23039d3e758SParav Pandit void rdmacg_uncharge(struct rdma_cgroup *cg,
23139d3e758SParav Pandit 		     struct rdmacg_device *device,
23239d3e758SParav Pandit 		     enum rdmacg_resource_type index)
23339d3e758SParav Pandit {
23439d3e758SParav Pandit 	if (index >= RDMACG_RESOURCE_MAX)
23539d3e758SParav Pandit 		return;
23639d3e758SParav Pandit 
23739d3e758SParav Pandit 	rdmacg_uncharge_hierarchy(cg, device, NULL, index);
23839d3e758SParav Pandit }
23939d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_uncharge);
24039d3e758SParav Pandit 
24139d3e758SParav Pandit /**
24239d3e758SParav Pandit  * rdmacg_try_charge - hierarchically try to charge the rdma resource
24339d3e758SParav Pandit  * @rdmacg: pointer to rdma cgroup which will own this resource
24439d3e758SParav Pandit  * @device: pointer to rdmacg device
24539d3e758SParav Pandit  * @index: index of the resource to charge in cgroup (resource pool)
24639d3e758SParav Pandit  *
24739d3e758SParav Pandit  * This function follows charging resource in hierarchical way.
24839d3e758SParav Pandit  * It will fail if the charge would cause the new value to exceed the
24939d3e758SParav Pandit  * hierarchical limit.
25039d3e758SParav Pandit  * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
25139d3e758SParav Pandit  * Returns pointer to rdmacg for this resource when charging is successful.
25239d3e758SParav Pandit  *
25339d3e758SParav Pandit  * Charger needs to account resources on two criteria.
25439d3e758SParav Pandit  * (a) per cgroup & (b) per device resource usage.
25539d3e758SParav Pandit  * Per cgroup resource usage ensures that tasks of cgroup doesn't cross
25639d3e758SParav Pandit  * the configured limits. Per device provides granular configuration
25739d3e758SParav Pandit  * in multi device usage. It allocates resource pool in the hierarchy
25839d3e758SParav Pandit  * for each parent it come across for first resource. Later on resource
25939d3e758SParav Pandit  * pool will be available. Therefore it will be much faster thereon
26039d3e758SParav Pandit  * to charge/uncharge.
26139d3e758SParav Pandit  */
26239d3e758SParav Pandit int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
26339d3e758SParav Pandit 		      struct rdmacg_device *device,
26439d3e758SParav Pandit 		      enum rdmacg_resource_type index)
26539d3e758SParav Pandit {
26639d3e758SParav Pandit 	struct rdma_cgroup *cg, *p;
26739d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
26839d3e758SParav Pandit 	s64 new;
26939d3e758SParav Pandit 	int ret = 0;
27039d3e758SParav Pandit 
27139d3e758SParav Pandit 	if (index >= RDMACG_RESOURCE_MAX)
27239d3e758SParav Pandit 		return -EINVAL;
27339d3e758SParav Pandit 
27439d3e758SParav Pandit 	/*
27539d3e758SParav Pandit 	 * hold on to css, as cgroup can be removed but resource
27639d3e758SParav Pandit 	 * accounting happens on css.
27739d3e758SParav Pandit 	 */
27839d3e758SParav Pandit 	cg = get_current_rdmacg();
27939d3e758SParav Pandit 
28039d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
28139d3e758SParav Pandit 	for (p = cg; p; p = parent_rdmacg(p)) {
28239d3e758SParav Pandit 		rpool = get_cg_rpool_locked(p, device);
28339d3e758SParav Pandit 		if (IS_ERR(rpool)) {
28439d3e758SParav Pandit 			ret = PTR_ERR(rpool);
28539d3e758SParav Pandit 			goto err;
28639d3e758SParav Pandit 		} else {
28739d3e758SParav Pandit 			new = rpool->resources[index].usage + 1;
28839d3e758SParav Pandit 			if (new > rpool->resources[index].max) {
28939d3e758SParav Pandit 				ret = -EAGAIN;
29039d3e758SParav Pandit 				goto err;
29139d3e758SParav Pandit 			} else {
29239d3e758SParav Pandit 				rpool->resources[index].usage = new;
29339d3e758SParav Pandit 				rpool->usage_sum++;
29439d3e758SParav Pandit 			}
29539d3e758SParav Pandit 		}
29639d3e758SParav Pandit 	}
29739d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
29839d3e758SParav Pandit 
29939d3e758SParav Pandit 	*rdmacg = cg;
30039d3e758SParav Pandit 	return 0;
30139d3e758SParav Pandit 
30239d3e758SParav Pandit err:
30339d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
30439d3e758SParav Pandit 	rdmacg_uncharge_hierarchy(cg, device, p, index);
30539d3e758SParav Pandit 	return ret;
30639d3e758SParav Pandit }
30739d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_try_charge);
30839d3e758SParav Pandit 
30939d3e758SParav Pandit /**
31039d3e758SParav Pandit  * rdmacg_register_device - register rdmacg device to rdma controller.
31139d3e758SParav Pandit  * @device: pointer to rdmacg device whose resources need to be accounted.
31239d3e758SParav Pandit  *
31339d3e758SParav Pandit  * If IB stack wish a device to participate in rdma cgroup resource
31439d3e758SParav Pandit  * tracking, it must invoke this API to register with rdma cgroup before
31539d3e758SParav Pandit  * any user space application can start using the RDMA resources.
31639d3e758SParav Pandit  * Returns 0 on success or EINVAL when table length given is beyond
31739d3e758SParav Pandit  * supported size.
31839d3e758SParav Pandit  */
31939d3e758SParav Pandit int rdmacg_register_device(struct rdmacg_device *device)
32039d3e758SParav Pandit {
32139d3e758SParav Pandit 	INIT_LIST_HEAD(&device->dev_node);
32239d3e758SParav Pandit 	INIT_LIST_HEAD(&device->rpools);
32339d3e758SParav Pandit 
32439d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
32539d3e758SParav Pandit 	list_add_tail(&device->dev_node, &rdmacg_devices);
32639d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
32739d3e758SParav Pandit 	return 0;
32839d3e758SParav Pandit }
32939d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_register_device);
33039d3e758SParav Pandit 
33139d3e758SParav Pandit /**
33239d3e758SParav Pandit  * rdmacg_unregister_device - unregister rdmacg device from rdma controller.
33339d3e758SParav Pandit  * @device: pointer to rdmacg device which was previously registered with rdma
33439d3e758SParav Pandit  *          controller using rdmacg_register_device().
33539d3e758SParav Pandit  *
33639d3e758SParav Pandit  * IB stack must invoke this after all the resources of the IB device
33739d3e758SParav Pandit  * are destroyed and after ensuring that no more resources will be created
33839d3e758SParav Pandit  * when this API is invoked.
33939d3e758SParav Pandit  */
34039d3e758SParav Pandit void rdmacg_unregister_device(struct rdmacg_device *device)
34139d3e758SParav Pandit {
34239d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool, *tmp;
34339d3e758SParav Pandit 
34439d3e758SParav Pandit 	/*
34539d3e758SParav Pandit 	 * Synchronize with any active resource settings,
34639d3e758SParav Pandit 	 * usage query happening via configfs.
34739d3e758SParav Pandit 	 */
34839d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
34939d3e758SParav Pandit 	list_del_init(&device->dev_node);
35039d3e758SParav Pandit 
35139d3e758SParav Pandit 	/*
35239d3e758SParav Pandit 	 * Now that this device is off the cgroup list, its safe to free
35339d3e758SParav Pandit 	 * all the rpool resources.
35439d3e758SParav Pandit 	 */
35539d3e758SParav Pandit 	list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
35639d3e758SParav Pandit 		free_cg_rpool_locked(rpool);
35739d3e758SParav Pandit 
35839d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
35939d3e758SParav Pandit }
36039d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_unregister_device);
36139d3e758SParav Pandit 
36239d3e758SParav Pandit static int parse_resource(char *c, int *intval)
36339d3e758SParav Pandit {
36439d3e758SParav Pandit 	substring_t argstr;
36539d3e758SParav Pandit 	const char **table = &rdmacg_resource_names[0];
36639d3e758SParav Pandit 	char *name, *value = c;
36739d3e758SParav Pandit 	size_t len;
36839d3e758SParav Pandit 	int ret, i = 0;
36939d3e758SParav Pandit 
37039d3e758SParav Pandit 	name = strsep(&value, "=");
37139d3e758SParav Pandit 	if (!name || !value)
37239d3e758SParav Pandit 		return -EINVAL;
37339d3e758SParav Pandit 
37439d3e758SParav Pandit 	len = strlen(value);
37539d3e758SParav Pandit 
37639d3e758SParav Pandit 	for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
37739d3e758SParav Pandit 		if (strcmp(table[i], name))
37839d3e758SParav Pandit 			continue;
37939d3e758SParav Pandit 
38039d3e758SParav Pandit 		argstr.from = value;
38139d3e758SParav Pandit 		argstr.to = value + len;
38239d3e758SParav Pandit 
38339d3e758SParav Pandit 		ret = match_int(&argstr, intval);
38439d3e758SParav Pandit 		if (ret >= 0) {
38539d3e758SParav Pandit 			if (*intval < 0)
38639d3e758SParav Pandit 				break;
38739d3e758SParav Pandit 			return i;
38839d3e758SParav Pandit 		}
38939d3e758SParav Pandit 		if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
39039d3e758SParav Pandit 			*intval = S32_MAX;
39139d3e758SParav Pandit 			return i;
39239d3e758SParav Pandit 		}
39339d3e758SParav Pandit 		break;
39439d3e758SParav Pandit 	}
39539d3e758SParav Pandit 	return -EINVAL;
39639d3e758SParav Pandit }
39739d3e758SParav Pandit 
39839d3e758SParav Pandit static int rdmacg_parse_limits(char *options,
39939d3e758SParav Pandit 			       int *new_limits, unsigned long *enables)
40039d3e758SParav Pandit {
40139d3e758SParav Pandit 	char *c;
40239d3e758SParav Pandit 	int err = -EINVAL;
40339d3e758SParav Pandit 
40439d3e758SParav Pandit 	/* parse resource options */
40539d3e758SParav Pandit 	while ((c = strsep(&options, " ")) != NULL) {
40639d3e758SParav Pandit 		int index, intval;
40739d3e758SParav Pandit 
40839d3e758SParav Pandit 		index = parse_resource(c, &intval);
40939d3e758SParav Pandit 		if (index < 0)
41039d3e758SParav Pandit 			goto err;
41139d3e758SParav Pandit 
41239d3e758SParav Pandit 		new_limits[index] = intval;
41339d3e758SParav Pandit 		*enables |= BIT(index);
41439d3e758SParav Pandit 	}
41539d3e758SParav Pandit 	return 0;
41639d3e758SParav Pandit 
41739d3e758SParav Pandit err:
41839d3e758SParav Pandit 	return err;
41939d3e758SParav Pandit }
42039d3e758SParav Pandit 
42139d3e758SParav Pandit static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
42239d3e758SParav Pandit {
42339d3e758SParav Pandit 	struct rdmacg_device *device;
42439d3e758SParav Pandit 
42539d3e758SParav Pandit 	lockdep_assert_held(&rdmacg_mutex);
42639d3e758SParav Pandit 
42739d3e758SParav Pandit 	list_for_each_entry(device, &rdmacg_devices, dev_node)
42839d3e758SParav Pandit 		if (!strcmp(name, device->name))
42939d3e758SParav Pandit 			return device;
43039d3e758SParav Pandit 
43139d3e758SParav Pandit 	return NULL;
43239d3e758SParav Pandit }
43339d3e758SParav Pandit 
43439d3e758SParav Pandit static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
43539d3e758SParav Pandit 				       char *buf, size_t nbytes, loff_t off)
43639d3e758SParav Pandit {
43739d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(of_css(of));
43839d3e758SParav Pandit 	const char *dev_name;
43939d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
44039d3e758SParav Pandit 	struct rdmacg_device *device;
44139d3e758SParav Pandit 	char *options = strstrip(buf);
44239d3e758SParav Pandit 	int *new_limits;
44339d3e758SParav Pandit 	unsigned long enables = 0;
44439d3e758SParav Pandit 	int i = 0, ret = 0;
44539d3e758SParav Pandit 
44639d3e758SParav Pandit 	/* extract the device name first */
44739d3e758SParav Pandit 	dev_name = strsep(&options, " ");
44839d3e758SParav Pandit 	if (!dev_name) {
44939d3e758SParav Pandit 		ret = -EINVAL;
45039d3e758SParav Pandit 		goto err;
45139d3e758SParav Pandit 	}
45239d3e758SParav Pandit 
45339d3e758SParav Pandit 	new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
45439d3e758SParav Pandit 	if (!new_limits) {
45539d3e758SParav Pandit 		ret = -ENOMEM;
45639d3e758SParav Pandit 		goto err;
45739d3e758SParav Pandit 	}
45839d3e758SParav Pandit 
45939d3e758SParav Pandit 	ret = rdmacg_parse_limits(options, new_limits, &enables);
46039d3e758SParav Pandit 	if (ret)
46139d3e758SParav Pandit 		goto parse_err;
46239d3e758SParav Pandit 
46339d3e758SParav Pandit 	/* acquire lock to synchronize with hot plug devices */
46439d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
46539d3e758SParav Pandit 
46639d3e758SParav Pandit 	device = rdmacg_get_device_locked(dev_name);
46739d3e758SParav Pandit 	if (!device) {
46839d3e758SParav Pandit 		ret = -ENODEV;
46939d3e758SParav Pandit 		goto dev_err;
47039d3e758SParav Pandit 	}
47139d3e758SParav Pandit 
47239d3e758SParav Pandit 	rpool = get_cg_rpool_locked(cg, device);
47339d3e758SParav Pandit 	if (IS_ERR(rpool)) {
47439d3e758SParav Pandit 		ret = PTR_ERR(rpool);
47539d3e758SParav Pandit 		goto dev_err;
47639d3e758SParav Pandit 	}
47739d3e758SParav Pandit 
47839d3e758SParav Pandit 	/* now set the new limits of the rpool */
47939d3e758SParav Pandit 	for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
48039d3e758SParav Pandit 		set_resource_limit(rpool, i, new_limits[i]);
48139d3e758SParav Pandit 
48239d3e758SParav Pandit 	if (rpool->usage_sum == 0 &&
48339d3e758SParav Pandit 	    rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
48439d3e758SParav Pandit 		/*
48539d3e758SParav Pandit 		 * No user of the rpool and all entries are set to max, so
48639d3e758SParav Pandit 		 * safe to delete this rpool.
48739d3e758SParav Pandit 		 */
48839d3e758SParav Pandit 		free_cg_rpool_locked(rpool);
48939d3e758SParav Pandit 	}
49039d3e758SParav Pandit 
49139d3e758SParav Pandit dev_err:
49239d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
49339d3e758SParav Pandit 
49439d3e758SParav Pandit parse_err:
49539d3e758SParav Pandit 	kfree(new_limits);
49639d3e758SParav Pandit 
49739d3e758SParav Pandit err:
49839d3e758SParav Pandit 	return ret ?: nbytes;
49939d3e758SParav Pandit }
50039d3e758SParav Pandit 
50139d3e758SParav Pandit static void print_rpool_values(struct seq_file *sf,
50239d3e758SParav Pandit 			       struct rdmacg_resource_pool *rpool)
50339d3e758SParav Pandit {
50439d3e758SParav Pandit 	enum rdmacg_file_type sf_type;
50539d3e758SParav Pandit 	int i;
50639d3e758SParav Pandit 	u32 value;
50739d3e758SParav Pandit 
50839d3e758SParav Pandit 	sf_type = seq_cft(sf)->private;
50939d3e758SParav Pandit 
51039d3e758SParav Pandit 	for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
51139d3e758SParav Pandit 		seq_puts(sf, rdmacg_resource_names[i]);
51239d3e758SParav Pandit 		seq_putc(sf, '=');
51339d3e758SParav Pandit 		if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
51439d3e758SParav Pandit 			if (rpool)
51539d3e758SParav Pandit 				value = rpool->resources[i].max;
51639d3e758SParav Pandit 			else
51739d3e758SParav Pandit 				value = S32_MAX;
51839d3e758SParav Pandit 		} else {
51939d3e758SParav Pandit 			if (rpool)
52039d3e758SParav Pandit 				value = rpool->resources[i].usage;
5217896dfb0SParav Pandit 			else
5227896dfb0SParav Pandit 				value = 0;
52339d3e758SParav Pandit 		}
52439d3e758SParav Pandit 
52539d3e758SParav Pandit 		if (value == S32_MAX)
52639d3e758SParav Pandit 			seq_puts(sf, RDMACG_MAX_STR);
52739d3e758SParav Pandit 		else
52839d3e758SParav Pandit 			seq_printf(sf, "%d", value);
52939d3e758SParav Pandit 		seq_putc(sf, ' ');
53039d3e758SParav Pandit 	}
53139d3e758SParav Pandit }
53239d3e758SParav Pandit 
53339d3e758SParav Pandit static int rdmacg_resource_read(struct seq_file *sf, void *v)
53439d3e758SParav Pandit {
53539d3e758SParav Pandit 	struct rdmacg_device *device;
53639d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
53739d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
53839d3e758SParav Pandit 
53939d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
54039d3e758SParav Pandit 
54139d3e758SParav Pandit 	list_for_each_entry(device, &rdmacg_devices, dev_node) {
54239d3e758SParav Pandit 		seq_printf(sf, "%s ", device->name);
54339d3e758SParav Pandit 
54439d3e758SParav Pandit 		rpool = find_cg_rpool_locked(cg, device);
54539d3e758SParav Pandit 		print_rpool_values(sf, rpool);
54639d3e758SParav Pandit 
54739d3e758SParav Pandit 		seq_putc(sf, '\n');
54839d3e758SParav Pandit 	}
54939d3e758SParav Pandit 
55039d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
55139d3e758SParav Pandit 	return 0;
55239d3e758SParav Pandit }
55339d3e758SParav Pandit 
55439d3e758SParav Pandit static struct cftype rdmacg_files[] = {
55539d3e758SParav Pandit 	{
55639d3e758SParav Pandit 		.name = "max",
55739d3e758SParav Pandit 		.write = rdmacg_resource_set_max,
55839d3e758SParav Pandit 		.seq_show = rdmacg_resource_read,
55939d3e758SParav Pandit 		.private = RDMACG_RESOURCE_TYPE_MAX,
56039d3e758SParav Pandit 		.flags = CFTYPE_NOT_ON_ROOT,
56139d3e758SParav Pandit 	},
56239d3e758SParav Pandit 	{
56339d3e758SParav Pandit 		.name = "current",
56439d3e758SParav Pandit 		.seq_show = rdmacg_resource_read,
56539d3e758SParav Pandit 		.private = RDMACG_RESOURCE_TYPE_STAT,
56639d3e758SParav Pandit 		.flags = CFTYPE_NOT_ON_ROOT,
56739d3e758SParav Pandit 	},
56839d3e758SParav Pandit 	{ }	/* terminate */
56939d3e758SParav Pandit };
57039d3e758SParav Pandit 
57139d3e758SParav Pandit static struct cgroup_subsys_state *
57239d3e758SParav Pandit rdmacg_css_alloc(struct cgroup_subsys_state *parent)
57339d3e758SParav Pandit {
57439d3e758SParav Pandit 	struct rdma_cgroup *cg;
57539d3e758SParav Pandit 
57639d3e758SParav Pandit 	cg = kzalloc(sizeof(*cg), GFP_KERNEL);
57739d3e758SParav Pandit 	if (!cg)
57839d3e758SParav Pandit 		return ERR_PTR(-ENOMEM);
57939d3e758SParav Pandit 
58039d3e758SParav Pandit 	INIT_LIST_HEAD(&cg->rpools);
58139d3e758SParav Pandit 	return &cg->css;
58239d3e758SParav Pandit }
58339d3e758SParav Pandit 
58439d3e758SParav Pandit static void rdmacg_css_free(struct cgroup_subsys_state *css)
58539d3e758SParav Pandit {
58639d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(css);
58739d3e758SParav Pandit 
58839d3e758SParav Pandit 	kfree(cg);
58939d3e758SParav Pandit }
59039d3e758SParav Pandit 
59139d3e758SParav Pandit /**
59239d3e758SParav Pandit  * rdmacg_css_offline - cgroup css_offline callback
59339d3e758SParav Pandit  * @css: css of interest
59439d3e758SParav Pandit  *
59539d3e758SParav Pandit  * This function is called when @css is about to go away and responsible
59639d3e758SParav Pandit  * for shooting down all rdmacg associated with @css. As part of that it
59739d3e758SParav Pandit  * marks all the resource pool entries to max value, so that when resources are
59839d3e758SParav Pandit  * uncharged, associated resource pool can be freed as well.
59939d3e758SParav Pandit  */
60039d3e758SParav Pandit static void rdmacg_css_offline(struct cgroup_subsys_state *css)
60139d3e758SParav Pandit {
60239d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(css);
60339d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
60439d3e758SParav Pandit 
60539d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
60639d3e758SParav Pandit 
60739d3e758SParav Pandit 	list_for_each_entry(rpool, &cg->rpools, cg_node)
60839d3e758SParav Pandit 		set_all_resource_max_limit(rpool);
60939d3e758SParav Pandit 
61039d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
61139d3e758SParav Pandit }
61239d3e758SParav Pandit 
61339d3e758SParav Pandit struct cgroup_subsys rdma_cgrp_subsys = {
61439d3e758SParav Pandit 	.css_alloc	= rdmacg_css_alloc,
61539d3e758SParav Pandit 	.css_free	= rdmacg_css_free,
61639d3e758SParav Pandit 	.css_offline	= rdmacg_css_offline,
61739d3e758SParav Pandit 	.legacy_cftypes	= rdmacg_files,
61839d3e758SParav Pandit 	.dfl_cftypes	= rdmacg_files,
61939d3e758SParav Pandit };
620