xref: /openbmc/linux/kernel/cgroup/rdma.c (revision c20d4d88)
1f85d2086SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
239d3e758SParav Pandit /*
339d3e758SParav Pandit  * RDMA resource limiting controller for cgroups.
439d3e758SParav Pandit  *
539d3e758SParav Pandit  * Used to allow a cgroup hierarchy to stop processes from consuming
639d3e758SParav Pandit  * additional RDMA resources after a certain limit is reached.
739d3e758SParav Pandit  *
839d3e758SParav Pandit  * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com>
939d3e758SParav Pandit  */
1039d3e758SParav Pandit 
1139d3e758SParav Pandit #include <linux/bitops.h>
1239d3e758SParav Pandit #include <linux/slab.h>
1339d3e758SParav Pandit #include <linux/seq_file.h>
1439d3e758SParav Pandit #include <linux/cgroup.h>
1539d3e758SParav Pandit #include <linux/parser.h>
1639d3e758SParav Pandit #include <linux/cgroup_rdma.h>
1739d3e758SParav Pandit 
1839d3e758SParav Pandit #define RDMACG_MAX_STR "max"
1939d3e758SParav Pandit 
2039d3e758SParav Pandit /*
2139d3e758SParav Pandit  * Protects list of resource pools maintained on per cgroup basis
2239d3e758SParav Pandit  * and rdma device list.
2339d3e758SParav Pandit  */
2439d3e758SParav Pandit static DEFINE_MUTEX(rdmacg_mutex);
2539d3e758SParav Pandit static LIST_HEAD(rdmacg_devices);
2639d3e758SParav Pandit 
2739d3e758SParav Pandit enum rdmacg_file_type {
2839d3e758SParav Pandit 	RDMACG_RESOURCE_TYPE_MAX,
2939d3e758SParav Pandit 	RDMACG_RESOURCE_TYPE_STAT,
3039d3e758SParav Pandit };
3139d3e758SParav Pandit 
3239d3e758SParav Pandit /*
3339d3e758SParav Pandit  * resource table definition as to be seen by the user.
3439d3e758SParav Pandit  * Need to add entries to it when more resources are
3539d3e758SParav Pandit  * added/defined at IB verb/core layer.
3639d3e758SParav Pandit  */
3739d3e758SParav Pandit static char const *rdmacg_resource_names[] = {
3839d3e758SParav Pandit 	[RDMACG_RESOURCE_HCA_HANDLE]	= "hca_handle",
3939d3e758SParav Pandit 	[RDMACG_RESOURCE_HCA_OBJECT]	= "hca_object",
4039d3e758SParav Pandit };
4139d3e758SParav Pandit 
4239d3e758SParav Pandit /* resource tracker for each resource of rdma cgroup */
4339d3e758SParav Pandit struct rdmacg_resource {
4439d3e758SParav Pandit 	int max;
4539d3e758SParav Pandit 	int usage;
4639d3e758SParav Pandit };
4739d3e758SParav Pandit 
4839d3e758SParav Pandit /*
4939d3e758SParav Pandit  * resource pool object which represents per cgroup, per device
5039d3e758SParav Pandit  * resources. There are multiple instances of this object per cgroup,
5139d3e758SParav Pandit  * therefore it cannot be embedded within rdma_cgroup structure. It
5239d3e758SParav Pandit  * is maintained as list.
5339d3e758SParav Pandit  */
5439d3e758SParav Pandit struct rdmacg_resource_pool {
5539d3e758SParav Pandit 	struct rdmacg_device	*device;
5639d3e758SParav Pandit 	struct rdmacg_resource	resources[RDMACG_RESOURCE_MAX];
5739d3e758SParav Pandit 
5839d3e758SParav Pandit 	struct list_head	cg_node;
5939d3e758SParav Pandit 	struct list_head	dev_node;
6039d3e758SParav Pandit 
6139d3e758SParav Pandit 	/* count active user tasks of this pool */
6239d3e758SParav Pandit 	u64			usage_sum;
6339d3e758SParav Pandit 	/* total number counts which are set to max */
6439d3e758SParav Pandit 	int			num_max_cnt;
6539d3e758SParav Pandit };
6639d3e758SParav Pandit 
css_rdmacg(struct cgroup_subsys_state * css)6739d3e758SParav Pandit static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css)
6839d3e758SParav Pandit {
6939d3e758SParav Pandit 	return container_of(css, struct rdma_cgroup, css);
7039d3e758SParav Pandit }
7139d3e758SParav Pandit 
parent_rdmacg(struct rdma_cgroup * cg)7239d3e758SParav Pandit static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg)
7339d3e758SParav Pandit {
7439d3e758SParav Pandit 	return css_rdmacg(cg->css.parent);
7539d3e758SParav Pandit }
7639d3e758SParav Pandit 
get_current_rdmacg(void)7739d3e758SParav Pandit static inline struct rdma_cgroup *get_current_rdmacg(void)
7839d3e758SParav Pandit {
7939d3e758SParav Pandit 	return css_rdmacg(task_get_css(current, rdma_cgrp_id));
8039d3e758SParav Pandit }
8139d3e758SParav Pandit 
set_resource_limit(struct rdmacg_resource_pool * rpool,int index,int new_max)8239d3e758SParav Pandit static void set_resource_limit(struct rdmacg_resource_pool *rpool,
8339d3e758SParav Pandit 			       int index, int new_max)
8439d3e758SParav Pandit {
8539d3e758SParav Pandit 	if (new_max == S32_MAX) {
8639d3e758SParav Pandit 		if (rpool->resources[index].max != S32_MAX)
8739d3e758SParav Pandit 			rpool->num_max_cnt++;
8839d3e758SParav Pandit 	} else {
8939d3e758SParav Pandit 		if (rpool->resources[index].max == S32_MAX)
9039d3e758SParav Pandit 			rpool->num_max_cnt--;
9139d3e758SParav Pandit 	}
9239d3e758SParav Pandit 	rpool->resources[index].max = new_max;
9339d3e758SParav Pandit }
9439d3e758SParav Pandit 
set_all_resource_max_limit(struct rdmacg_resource_pool * rpool)9539d3e758SParav Pandit static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool)
9639d3e758SParav Pandit {
9739d3e758SParav Pandit 	int i;
9839d3e758SParav Pandit 
9939d3e758SParav Pandit 	for (i = 0; i < RDMACG_RESOURCE_MAX; i++)
10039d3e758SParav Pandit 		set_resource_limit(rpool, i, S32_MAX);
10139d3e758SParav Pandit }
10239d3e758SParav Pandit 
free_cg_rpool_locked(struct rdmacg_resource_pool * rpool)10339d3e758SParav Pandit static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool)
10439d3e758SParav Pandit {
10539d3e758SParav Pandit 	lockdep_assert_held(&rdmacg_mutex);
10639d3e758SParav Pandit 
10739d3e758SParav Pandit 	list_del(&rpool->cg_node);
10839d3e758SParav Pandit 	list_del(&rpool->dev_node);
10939d3e758SParav Pandit 	kfree(rpool);
11039d3e758SParav Pandit }
11139d3e758SParav Pandit 
11239d3e758SParav Pandit static struct rdmacg_resource_pool *
find_cg_rpool_locked(struct rdma_cgroup * cg,struct rdmacg_device * device)11339d3e758SParav Pandit find_cg_rpool_locked(struct rdma_cgroup *cg,
11439d3e758SParav Pandit 		     struct rdmacg_device *device)
11539d3e758SParav Pandit 
11639d3e758SParav Pandit {
11739d3e758SParav Pandit 	struct rdmacg_resource_pool *pool;
11839d3e758SParav Pandit 
11939d3e758SParav Pandit 	lockdep_assert_held(&rdmacg_mutex);
12039d3e758SParav Pandit 
12139d3e758SParav Pandit 	list_for_each_entry(pool, &cg->rpools, cg_node)
12239d3e758SParav Pandit 		if (pool->device == device)
12339d3e758SParav Pandit 			return pool;
12439d3e758SParav Pandit 
12539d3e758SParav Pandit 	return NULL;
12639d3e758SParav Pandit }
12739d3e758SParav Pandit 
12839d3e758SParav Pandit static struct rdmacg_resource_pool *
get_cg_rpool_locked(struct rdma_cgroup * cg,struct rdmacg_device * device)12939d3e758SParav Pandit get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device)
13039d3e758SParav Pandit {
13139d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
13239d3e758SParav Pandit 
13339d3e758SParav Pandit 	rpool = find_cg_rpool_locked(cg, device);
13439d3e758SParav Pandit 	if (rpool)
13539d3e758SParav Pandit 		return rpool;
13639d3e758SParav Pandit 
13739d3e758SParav Pandit 	rpool = kzalloc(sizeof(*rpool), GFP_KERNEL);
13839d3e758SParav Pandit 	if (!rpool)
13939d3e758SParav Pandit 		return ERR_PTR(-ENOMEM);
14039d3e758SParav Pandit 
14139d3e758SParav Pandit 	rpool->device = device;
14239d3e758SParav Pandit 	set_all_resource_max_limit(rpool);
14339d3e758SParav Pandit 
14439d3e758SParav Pandit 	INIT_LIST_HEAD(&rpool->cg_node);
14539d3e758SParav Pandit 	INIT_LIST_HEAD(&rpool->dev_node);
14639d3e758SParav Pandit 	list_add_tail(&rpool->cg_node, &cg->rpools);
14739d3e758SParav Pandit 	list_add_tail(&rpool->dev_node, &device->rpools);
14839d3e758SParav Pandit 	return rpool;
14939d3e758SParav Pandit }
15039d3e758SParav Pandit 
15139d3e758SParav Pandit /**
15239d3e758SParav Pandit  * uncharge_cg_locked - uncharge resource for rdma cgroup
15339d3e758SParav Pandit  * @cg: pointer to cg to uncharge and all parents in hierarchy
15439d3e758SParav Pandit  * @device: pointer to rdmacg device
15539d3e758SParav Pandit  * @index: index of the resource to uncharge in cg (resource pool)
15639d3e758SParav Pandit  *
15739d3e758SParav Pandit  * It also frees the resource pool which was created as part of
15839d3e758SParav Pandit  * charging operation when there are no resources attached to
15939d3e758SParav Pandit  * resource pool.
16039d3e758SParav Pandit  */
16139d3e758SParav Pandit static void
uncharge_cg_locked(struct rdma_cgroup * cg,struct rdmacg_device * device,enum rdmacg_resource_type index)16239d3e758SParav Pandit uncharge_cg_locked(struct rdma_cgroup *cg,
16339d3e758SParav Pandit 		   struct rdmacg_device *device,
16439d3e758SParav Pandit 		   enum rdmacg_resource_type index)
16539d3e758SParav Pandit {
16639d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
16739d3e758SParav Pandit 
16839d3e758SParav Pandit 	rpool = find_cg_rpool_locked(cg, device);
16939d3e758SParav Pandit 
17039d3e758SParav Pandit 	/*
17139d3e758SParav Pandit 	 * rpool cannot be null at this stage. Let kernel operate in case
17239d3e758SParav Pandit 	 * if there a bug in IB stack or rdma controller, instead of crashing
17339d3e758SParav Pandit 	 * the system.
17439d3e758SParav Pandit 	 */
17539d3e758SParav Pandit 	if (unlikely(!rpool)) {
17639d3e758SParav Pandit 		pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device);
17739d3e758SParav Pandit 		return;
17839d3e758SParav Pandit 	}
17939d3e758SParav Pandit 
18039d3e758SParav Pandit 	rpool->resources[index].usage--;
18139d3e758SParav Pandit 
18239d3e758SParav Pandit 	/*
18339d3e758SParav Pandit 	 * A negative count (or overflow) is invalid,
18439d3e758SParav Pandit 	 * it indicates a bug in the rdma controller.
18539d3e758SParav Pandit 	 */
18639d3e758SParav Pandit 	WARN_ON_ONCE(rpool->resources[index].usage < 0);
18739d3e758SParav Pandit 	rpool->usage_sum--;
18839d3e758SParav Pandit 	if (rpool->usage_sum == 0 &&
18939d3e758SParav Pandit 	    rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
19039d3e758SParav Pandit 		/*
19139d3e758SParav Pandit 		 * No user of the rpool and all entries are set to max, so
19239d3e758SParav Pandit 		 * safe to delete this rpool.
19339d3e758SParav Pandit 		 */
19439d3e758SParav Pandit 		free_cg_rpool_locked(rpool);
19539d3e758SParav Pandit 	}
19639d3e758SParav Pandit }
19739d3e758SParav Pandit 
19839d3e758SParav Pandit /**
19939d3e758SParav Pandit  * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count
200*c20d4d88SGaosheng Cui  * @cg: pointer to cg to uncharge and all parents in hierarchy
20139d3e758SParav Pandit  * @device: pointer to rdmacg device
20239d3e758SParav Pandit  * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup
20339d3e758SParav Pandit  *           stop uncharging
20439d3e758SParav Pandit  * @index: index of the resource to uncharge in cg in given resource pool
20539d3e758SParav Pandit  */
rdmacg_uncharge_hierarchy(struct rdma_cgroup * cg,struct rdmacg_device * device,struct rdma_cgroup * stop_cg,enum rdmacg_resource_type index)20639d3e758SParav Pandit static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg,
20739d3e758SParav Pandit 				     struct rdmacg_device *device,
20839d3e758SParav Pandit 				     struct rdma_cgroup *stop_cg,
20939d3e758SParav Pandit 				     enum rdmacg_resource_type index)
21039d3e758SParav Pandit {
21139d3e758SParav Pandit 	struct rdma_cgroup *p;
21239d3e758SParav Pandit 
21339d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
21439d3e758SParav Pandit 
21539d3e758SParav Pandit 	for (p = cg; p != stop_cg; p = parent_rdmacg(p))
21639d3e758SParav Pandit 		uncharge_cg_locked(p, device, index);
21739d3e758SParav Pandit 
21839d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
21939d3e758SParav Pandit 
22039d3e758SParav Pandit 	css_put(&cg->css);
22139d3e758SParav Pandit }
22239d3e758SParav Pandit 
22339d3e758SParav Pandit /**
22439d3e758SParav Pandit  * rdmacg_uncharge - hierarchically uncharge rdma resource count
225*c20d4d88SGaosheng Cui  * @cg: pointer to cg to uncharge and all parents in hierarchy
22639d3e758SParav Pandit  * @device: pointer to rdmacg device
22739d3e758SParav Pandit  * @index: index of the resource to uncharge in cgroup in given resource pool
22839d3e758SParav Pandit  */
rdmacg_uncharge(struct rdma_cgroup * cg,struct rdmacg_device * device,enum rdmacg_resource_type index)22939d3e758SParav Pandit void rdmacg_uncharge(struct rdma_cgroup *cg,
23039d3e758SParav Pandit 		     struct rdmacg_device *device,
23139d3e758SParav Pandit 		     enum rdmacg_resource_type index)
23239d3e758SParav Pandit {
23339d3e758SParav Pandit 	if (index >= RDMACG_RESOURCE_MAX)
23439d3e758SParav Pandit 		return;
23539d3e758SParav Pandit 
23639d3e758SParav Pandit 	rdmacg_uncharge_hierarchy(cg, device, NULL, index);
23739d3e758SParav Pandit }
23839d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_uncharge);
23939d3e758SParav Pandit 
24039d3e758SParav Pandit /**
24139d3e758SParav Pandit  * rdmacg_try_charge - hierarchically try to charge the rdma resource
24239d3e758SParav Pandit  * @rdmacg: pointer to rdma cgroup which will own this resource
24339d3e758SParav Pandit  * @device: pointer to rdmacg device
24439d3e758SParav Pandit  * @index: index of the resource to charge in cgroup (resource pool)
24539d3e758SParav Pandit  *
24639d3e758SParav Pandit  * This function follows charging resource in hierarchical way.
24739d3e758SParav Pandit  * It will fail if the charge would cause the new value to exceed the
24839d3e758SParav Pandit  * hierarchical limit.
24908b2b6fdSZhen Lei  * Returns 0 if the charge succeeded, otherwise -EAGAIN, -ENOMEM or -EINVAL.
25039d3e758SParav Pandit  * Returns pointer to rdmacg for this resource when charging is successful.
25139d3e758SParav Pandit  *
25239d3e758SParav Pandit  * Charger needs to account resources on two criteria.
25339d3e758SParav Pandit  * (a) per cgroup & (b) per device resource usage.
25439d3e758SParav Pandit  * Per cgroup resource usage ensures that tasks of cgroup doesn't cross
25539d3e758SParav Pandit  * the configured limits. Per device provides granular configuration
25639d3e758SParav Pandit  * in multi device usage. It allocates resource pool in the hierarchy
25739d3e758SParav Pandit  * for each parent it come across for first resource. Later on resource
25839d3e758SParav Pandit  * pool will be available. Therefore it will be much faster thereon
25939d3e758SParav Pandit  * to charge/uncharge.
26039d3e758SParav Pandit  */
rdmacg_try_charge(struct rdma_cgroup ** rdmacg,struct rdmacg_device * device,enum rdmacg_resource_type index)26139d3e758SParav Pandit int rdmacg_try_charge(struct rdma_cgroup **rdmacg,
26239d3e758SParav Pandit 		      struct rdmacg_device *device,
26339d3e758SParav Pandit 		      enum rdmacg_resource_type index)
26439d3e758SParav Pandit {
26539d3e758SParav Pandit 	struct rdma_cgroup *cg, *p;
26639d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
26739d3e758SParav Pandit 	s64 new;
26839d3e758SParav Pandit 	int ret = 0;
26939d3e758SParav Pandit 
27039d3e758SParav Pandit 	if (index >= RDMACG_RESOURCE_MAX)
27139d3e758SParav Pandit 		return -EINVAL;
27239d3e758SParav Pandit 
27339d3e758SParav Pandit 	/*
27439d3e758SParav Pandit 	 * hold on to css, as cgroup can be removed but resource
27539d3e758SParav Pandit 	 * accounting happens on css.
27639d3e758SParav Pandit 	 */
27739d3e758SParav Pandit 	cg = get_current_rdmacg();
27839d3e758SParav Pandit 
27939d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
28039d3e758SParav Pandit 	for (p = cg; p; p = parent_rdmacg(p)) {
28139d3e758SParav Pandit 		rpool = get_cg_rpool_locked(p, device);
28239d3e758SParav Pandit 		if (IS_ERR(rpool)) {
28339d3e758SParav Pandit 			ret = PTR_ERR(rpool);
28439d3e758SParav Pandit 			goto err;
28539d3e758SParav Pandit 		} else {
28639d3e758SParav Pandit 			new = rpool->resources[index].usage + 1;
28739d3e758SParav Pandit 			if (new > rpool->resources[index].max) {
28839d3e758SParav Pandit 				ret = -EAGAIN;
28939d3e758SParav Pandit 				goto err;
29039d3e758SParav Pandit 			} else {
29139d3e758SParav Pandit 				rpool->resources[index].usage = new;
29239d3e758SParav Pandit 				rpool->usage_sum++;
29339d3e758SParav Pandit 			}
29439d3e758SParav Pandit 		}
29539d3e758SParav Pandit 	}
29639d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
29739d3e758SParav Pandit 
29839d3e758SParav Pandit 	*rdmacg = cg;
29939d3e758SParav Pandit 	return 0;
30039d3e758SParav Pandit 
30139d3e758SParav Pandit err:
30239d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
30339d3e758SParav Pandit 	rdmacg_uncharge_hierarchy(cg, device, p, index);
30439d3e758SParav Pandit 	return ret;
30539d3e758SParav Pandit }
30639d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_try_charge);
30739d3e758SParav Pandit 
30839d3e758SParav Pandit /**
30939d3e758SParav Pandit  * rdmacg_register_device - register rdmacg device to rdma controller.
31039d3e758SParav Pandit  * @device: pointer to rdmacg device whose resources need to be accounted.
31139d3e758SParav Pandit  *
31239d3e758SParav Pandit  * If IB stack wish a device to participate in rdma cgroup resource
31339d3e758SParav Pandit  * tracking, it must invoke this API to register with rdma cgroup before
31439d3e758SParav Pandit  * any user space application can start using the RDMA resources.
31539d3e758SParav Pandit  */
rdmacg_register_device(struct rdmacg_device * device)3167527a7b1SParav Pandit void rdmacg_register_device(struct rdmacg_device *device)
31739d3e758SParav Pandit {
31839d3e758SParav Pandit 	INIT_LIST_HEAD(&device->dev_node);
31939d3e758SParav Pandit 	INIT_LIST_HEAD(&device->rpools);
32039d3e758SParav Pandit 
32139d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
32239d3e758SParav Pandit 	list_add_tail(&device->dev_node, &rdmacg_devices);
32339d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
32439d3e758SParav Pandit }
32539d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_register_device);
32639d3e758SParav Pandit 
32739d3e758SParav Pandit /**
32839d3e758SParav Pandit  * rdmacg_unregister_device - unregister rdmacg device from rdma controller.
32939d3e758SParav Pandit  * @device: pointer to rdmacg device which was previously registered with rdma
33039d3e758SParav Pandit  *          controller using rdmacg_register_device().
33139d3e758SParav Pandit  *
33239d3e758SParav Pandit  * IB stack must invoke this after all the resources of the IB device
33339d3e758SParav Pandit  * are destroyed and after ensuring that no more resources will be created
33439d3e758SParav Pandit  * when this API is invoked.
33539d3e758SParav Pandit  */
rdmacg_unregister_device(struct rdmacg_device * device)33639d3e758SParav Pandit void rdmacg_unregister_device(struct rdmacg_device *device)
33739d3e758SParav Pandit {
33839d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool, *tmp;
33939d3e758SParav Pandit 
34039d3e758SParav Pandit 	/*
34139d3e758SParav Pandit 	 * Synchronize with any active resource settings,
34239d3e758SParav Pandit 	 * usage query happening via configfs.
34339d3e758SParav Pandit 	 */
34439d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
34539d3e758SParav Pandit 	list_del_init(&device->dev_node);
34639d3e758SParav Pandit 
34739d3e758SParav Pandit 	/*
34839d3e758SParav Pandit 	 * Now that this device is off the cgroup list, its safe to free
34939d3e758SParav Pandit 	 * all the rpool resources.
35039d3e758SParav Pandit 	 */
35139d3e758SParav Pandit 	list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node)
35239d3e758SParav Pandit 		free_cg_rpool_locked(rpool);
35339d3e758SParav Pandit 
35439d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
35539d3e758SParav Pandit }
35639d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_unregister_device);
35739d3e758SParav Pandit 
parse_resource(char * c,int * intval)35839d3e758SParav Pandit static int parse_resource(char *c, int *intval)
35939d3e758SParav Pandit {
36039d3e758SParav Pandit 	substring_t argstr;
36139d3e758SParav Pandit 	char *name, *value = c;
36239d3e758SParav Pandit 	size_t len;
363cc659e76SAndy Shevchenko 	int ret, i;
36439d3e758SParav Pandit 
36539d3e758SParav Pandit 	name = strsep(&value, "=");
36639d3e758SParav Pandit 	if (!name || !value)
36739d3e758SParav Pandit 		return -EINVAL;
36839d3e758SParav Pandit 
369cc659e76SAndy Shevchenko 	i = match_string(rdmacg_resource_names, RDMACG_RESOURCE_MAX, name);
370cc659e76SAndy Shevchenko 	if (i < 0)
371cc659e76SAndy Shevchenko 		return i;
37239d3e758SParav Pandit 
373cc659e76SAndy Shevchenko 	len = strlen(value);
37439d3e758SParav Pandit 
37539d3e758SParav Pandit 	argstr.from = value;
37639d3e758SParav Pandit 	argstr.to = value + len;
37739d3e758SParav Pandit 
37839d3e758SParav Pandit 	ret = match_int(&argstr, intval);
37939d3e758SParav Pandit 	if (ret >= 0) {
38039d3e758SParav Pandit 		if (*intval < 0)
381cc659e76SAndy Shevchenko 			return -EINVAL;
38239d3e758SParav Pandit 		return i;
38339d3e758SParav Pandit 	}
38439d3e758SParav Pandit 	if (strncmp(value, RDMACG_MAX_STR, len) == 0) {
38539d3e758SParav Pandit 		*intval = S32_MAX;
38639d3e758SParav Pandit 		return i;
38739d3e758SParav Pandit 	}
38839d3e758SParav Pandit 	return -EINVAL;
38939d3e758SParav Pandit }
39039d3e758SParav Pandit 
rdmacg_parse_limits(char * options,int * new_limits,unsigned long * enables)39139d3e758SParav Pandit static int rdmacg_parse_limits(char *options,
39239d3e758SParav Pandit 			       int *new_limits, unsigned long *enables)
39339d3e758SParav Pandit {
39439d3e758SParav Pandit 	char *c;
39539d3e758SParav Pandit 	int err = -EINVAL;
39639d3e758SParav Pandit 
39739d3e758SParav Pandit 	/* parse resource options */
39839d3e758SParav Pandit 	while ((c = strsep(&options, " ")) != NULL) {
39939d3e758SParav Pandit 		int index, intval;
40039d3e758SParav Pandit 
40139d3e758SParav Pandit 		index = parse_resource(c, &intval);
40239d3e758SParav Pandit 		if (index < 0)
40339d3e758SParav Pandit 			goto err;
40439d3e758SParav Pandit 
40539d3e758SParav Pandit 		new_limits[index] = intval;
40639d3e758SParav Pandit 		*enables |= BIT(index);
40739d3e758SParav Pandit 	}
40839d3e758SParav Pandit 	return 0;
40939d3e758SParav Pandit 
41039d3e758SParav Pandit err:
41139d3e758SParav Pandit 	return err;
41239d3e758SParav Pandit }
41339d3e758SParav Pandit 
rdmacg_get_device_locked(const char * name)41439d3e758SParav Pandit static struct rdmacg_device *rdmacg_get_device_locked(const char *name)
41539d3e758SParav Pandit {
41639d3e758SParav Pandit 	struct rdmacg_device *device;
41739d3e758SParav Pandit 
41839d3e758SParav Pandit 	lockdep_assert_held(&rdmacg_mutex);
41939d3e758SParav Pandit 
42039d3e758SParav Pandit 	list_for_each_entry(device, &rdmacg_devices, dev_node)
42139d3e758SParav Pandit 		if (!strcmp(name, device->name))
42239d3e758SParav Pandit 			return device;
42339d3e758SParav Pandit 
42439d3e758SParav Pandit 	return NULL;
42539d3e758SParav Pandit }
42639d3e758SParav Pandit 
rdmacg_resource_set_max(struct kernfs_open_file * of,char * buf,size_t nbytes,loff_t off)42739d3e758SParav Pandit static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
42839d3e758SParav Pandit 				       char *buf, size_t nbytes, loff_t off)
42939d3e758SParav Pandit {
43039d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(of_css(of));
43139d3e758SParav Pandit 	const char *dev_name;
43239d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
43339d3e758SParav Pandit 	struct rdmacg_device *device;
43439d3e758SParav Pandit 	char *options = strstrip(buf);
43539d3e758SParav Pandit 	int *new_limits;
43639d3e758SParav Pandit 	unsigned long enables = 0;
43739d3e758SParav Pandit 	int i = 0, ret = 0;
43839d3e758SParav Pandit 
43939d3e758SParav Pandit 	/* extract the device name first */
44039d3e758SParav Pandit 	dev_name = strsep(&options, " ");
44139d3e758SParav Pandit 	if (!dev_name) {
44239d3e758SParav Pandit 		ret = -EINVAL;
44339d3e758SParav Pandit 		goto err;
44439d3e758SParav Pandit 	}
44539d3e758SParav Pandit 
44639d3e758SParav Pandit 	new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL);
44739d3e758SParav Pandit 	if (!new_limits) {
44839d3e758SParav Pandit 		ret = -ENOMEM;
44939d3e758SParav Pandit 		goto err;
45039d3e758SParav Pandit 	}
45139d3e758SParav Pandit 
45239d3e758SParav Pandit 	ret = rdmacg_parse_limits(options, new_limits, &enables);
45339d3e758SParav Pandit 	if (ret)
45439d3e758SParav Pandit 		goto parse_err;
45539d3e758SParav Pandit 
45639d3e758SParav Pandit 	/* acquire lock to synchronize with hot plug devices */
45739d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
45839d3e758SParav Pandit 
45939d3e758SParav Pandit 	device = rdmacg_get_device_locked(dev_name);
46039d3e758SParav Pandit 	if (!device) {
46139d3e758SParav Pandit 		ret = -ENODEV;
46239d3e758SParav Pandit 		goto dev_err;
46339d3e758SParav Pandit 	}
46439d3e758SParav Pandit 
46539d3e758SParav Pandit 	rpool = get_cg_rpool_locked(cg, device);
46639d3e758SParav Pandit 	if (IS_ERR(rpool)) {
46739d3e758SParav Pandit 		ret = PTR_ERR(rpool);
46839d3e758SParav Pandit 		goto dev_err;
46939d3e758SParav Pandit 	}
47039d3e758SParav Pandit 
47139d3e758SParav Pandit 	/* now set the new limits of the rpool */
47239d3e758SParav Pandit 	for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX)
47339d3e758SParav Pandit 		set_resource_limit(rpool, i, new_limits[i]);
47439d3e758SParav Pandit 
47539d3e758SParav Pandit 	if (rpool->usage_sum == 0 &&
47639d3e758SParav Pandit 	    rpool->num_max_cnt == RDMACG_RESOURCE_MAX) {
47739d3e758SParav Pandit 		/*
47839d3e758SParav Pandit 		 * No user of the rpool and all entries are set to max, so
47939d3e758SParav Pandit 		 * safe to delete this rpool.
48039d3e758SParav Pandit 		 */
48139d3e758SParav Pandit 		free_cg_rpool_locked(rpool);
48239d3e758SParav Pandit 	}
48339d3e758SParav Pandit 
48439d3e758SParav Pandit dev_err:
48539d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
48639d3e758SParav Pandit 
48739d3e758SParav Pandit parse_err:
48839d3e758SParav Pandit 	kfree(new_limits);
48939d3e758SParav Pandit 
49039d3e758SParav Pandit err:
49139d3e758SParav Pandit 	return ret ?: nbytes;
49239d3e758SParav Pandit }
49339d3e758SParav Pandit 
print_rpool_values(struct seq_file * sf,struct rdmacg_resource_pool * rpool)49439d3e758SParav Pandit static void print_rpool_values(struct seq_file *sf,
49539d3e758SParav Pandit 			       struct rdmacg_resource_pool *rpool)
49639d3e758SParav Pandit {
49739d3e758SParav Pandit 	enum rdmacg_file_type sf_type;
49839d3e758SParav Pandit 	int i;
49939d3e758SParav Pandit 	u32 value;
50039d3e758SParav Pandit 
50139d3e758SParav Pandit 	sf_type = seq_cft(sf)->private;
50239d3e758SParav Pandit 
50339d3e758SParav Pandit 	for (i = 0; i < RDMACG_RESOURCE_MAX; i++) {
50439d3e758SParav Pandit 		seq_puts(sf, rdmacg_resource_names[i]);
50539d3e758SParav Pandit 		seq_putc(sf, '=');
50639d3e758SParav Pandit 		if (sf_type == RDMACG_RESOURCE_TYPE_MAX) {
50739d3e758SParav Pandit 			if (rpool)
50839d3e758SParav Pandit 				value = rpool->resources[i].max;
50939d3e758SParav Pandit 			else
51039d3e758SParav Pandit 				value = S32_MAX;
51139d3e758SParav Pandit 		} else {
51239d3e758SParav Pandit 			if (rpool)
51339d3e758SParav Pandit 				value = rpool->resources[i].usage;
5147896dfb0SParav Pandit 			else
5157896dfb0SParav Pandit 				value = 0;
51639d3e758SParav Pandit 		}
51739d3e758SParav Pandit 
51839d3e758SParav Pandit 		if (value == S32_MAX)
51939d3e758SParav Pandit 			seq_puts(sf, RDMACG_MAX_STR);
52039d3e758SParav Pandit 		else
52139d3e758SParav Pandit 			seq_printf(sf, "%d", value);
52239d3e758SParav Pandit 		seq_putc(sf, ' ');
52339d3e758SParav Pandit 	}
52439d3e758SParav Pandit }
52539d3e758SParav Pandit 
rdmacg_resource_read(struct seq_file * sf,void * v)52639d3e758SParav Pandit static int rdmacg_resource_read(struct seq_file *sf, void *v)
52739d3e758SParav Pandit {
52839d3e758SParav Pandit 	struct rdmacg_device *device;
52939d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
53039d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(seq_css(sf));
53139d3e758SParav Pandit 
53239d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
53339d3e758SParav Pandit 
53439d3e758SParav Pandit 	list_for_each_entry(device, &rdmacg_devices, dev_node) {
53539d3e758SParav Pandit 		seq_printf(sf, "%s ", device->name);
53639d3e758SParav Pandit 
53739d3e758SParav Pandit 		rpool = find_cg_rpool_locked(cg, device);
53839d3e758SParav Pandit 		print_rpool_values(sf, rpool);
53939d3e758SParav Pandit 
54039d3e758SParav Pandit 		seq_putc(sf, '\n');
54139d3e758SParav Pandit 	}
54239d3e758SParav Pandit 
54339d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
54439d3e758SParav Pandit 	return 0;
54539d3e758SParav Pandit }
54639d3e758SParav Pandit 
54739d3e758SParav Pandit static struct cftype rdmacg_files[] = {
54839d3e758SParav Pandit 	{
54939d3e758SParav Pandit 		.name = "max",
55039d3e758SParav Pandit 		.write = rdmacg_resource_set_max,
55139d3e758SParav Pandit 		.seq_show = rdmacg_resource_read,
55239d3e758SParav Pandit 		.private = RDMACG_RESOURCE_TYPE_MAX,
55339d3e758SParav Pandit 		.flags = CFTYPE_NOT_ON_ROOT,
55439d3e758SParav Pandit 	},
55539d3e758SParav Pandit 	{
55639d3e758SParav Pandit 		.name = "current",
55739d3e758SParav Pandit 		.seq_show = rdmacg_resource_read,
55839d3e758SParav Pandit 		.private = RDMACG_RESOURCE_TYPE_STAT,
55939d3e758SParav Pandit 		.flags = CFTYPE_NOT_ON_ROOT,
56039d3e758SParav Pandit 	},
56139d3e758SParav Pandit 	{ }	/* terminate */
56239d3e758SParav Pandit };
56339d3e758SParav Pandit 
56439d3e758SParav Pandit static struct cgroup_subsys_state *
rdmacg_css_alloc(struct cgroup_subsys_state * parent)56539d3e758SParav Pandit rdmacg_css_alloc(struct cgroup_subsys_state *parent)
56639d3e758SParav Pandit {
56739d3e758SParav Pandit 	struct rdma_cgroup *cg;
56839d3e758SParav Pandit 
56939d3e758SParav Pandit 	cg = kzalloc(sizeof(*cg), GFP_KERNEL);
57039d3e758SParav Pandit 	if (!cg)
57139d3e758SParav Pandit 		return ERR_PTR(-ENOMEM);
57239d3e758SParav Pandit 
57339d3e758SParav Pandit 	INIT_LIST_HEAD(&cg->rpools);
57439d3e758SParav Pandit 	return &cg->css;
57539d3e758SParav Pandit }
57639d3e758SParav Pandit 
rdmacg_css_free(struct cgroup_subsys_state * css)57739d3e758SParav Pandit static void rdmacg_css_free(struct cgroup_subsys_state *css)
57839d3e758SParav Pandit {
57939d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(css);
58039d3e758SParav Pandit 
58139d3e758SParav Pandit 	kfree(cg);
58239d3e758SParav Pandit }
58339d3e758SParav Pandit 
58439d3e758SParav Pandit /**
58539d3e758SParav Pandit  * rdmacg_css_offline - cgroup css_offline callback
58639d3e758SParav Pandit  * @css: css of interest
58739d3e758SParav Pandit  *
58839d3e758SParav Pandit  * This function is called when @css is about to go away and responsible
58939d3e758SParav Pandit  * for shooting down all rdmacg associated with @css. As part of that it
59039d3e758SParav Pandit  * marks all the resource pool entries to max value, so that when resources are
59139d3e758SParav Pandit  * uncharged, associated resource pool can be freed as well.
59239d3e758SParav Pandit  */
rdmacg_css_offline(struct cgroup_subsys_state * css)59339d3e758SParav Pandit static void rdmacg_css_offline(struct cgroup_subsys_state *css)
59439d3e758SParav Pandit {
59539d3e758SParav Pandit 	struct rdma_cgroup *cg = css_rdmacg(css);
59639d3e758SParav Pandit 	struct rdmacg_resource_pool *rpool;
59739d3e758SParav Pandit 
59839d3e758SParav Pandit 	mutex_lock(&rdmacg_mutex);
59939d3e758SParav Pandit 
60039d3e758SParav Pandit 	list_for_each_entry(rpool, &cg->rpools, cg_node)
60139d3e758SParav Pandit 		set_all_resource_max_limit(rpool);
60239d3e758SParav Pandit 
60339d3e758SParav Pandit 	mutex_unlock(&rdmacg_mutex);
60439d3e758SParav Pandit }
60539d3e758SParav Pandit 
60639d3e758SParav Pandit struct cgroup_subsys rdma_cgrp_subsys = {
60739d3e758SParav Pandit 	.css_alloc	= rdmacg_css_alloc,
60839d3e758SParav Pandit 	.css_free	= rdmacg_css_free,
60939d3e758SParav Pandit 	.css_offline	= rdmacg_css_offline,
61039d3e758SParav Pandit 	.legacy_cftypes	= rdmacg_files,
61139d3e758SParav Pandit 	.dfl_cftypes	= rdmacg_files,
61239d3e758SParav Pandit };
613