139d3e758SParav Pandit /* 239d3e758SParav Pandit * RDMA resource limiting controller for cgroups. 339d3e758SParav Pandit * 439d3e758SParav Pandit * Used to allow a cgroup hierarchy to stop processes from consuming 539d3e758SParav Pandit * additional RDMA resources after a certain limit is reached. 639d3e758SParav Pandit * 739d3e758SParav Pandit * Copyright (C) 2016 Parav Pandit <pandit.parav@gmail.com> 839d3e758SParav Pandit * 939d3e758SParav Pandit * This file is subject to the terms and conditions of version 2 of the GNU 1039d3e758SParav Pandit * General Public License. See the file COPYING in the main directory of the 1139d3e758SParav Pandit * Linux distribution for more details. 1239d3e758SParav Pandit */ 1339d3e758SParav Pandit 1439d3e758SParav Pandit #include <linux/bitops.h> 1539d3e758SParav Pandit #include <linux/slab.h> 1639d3e758SParav Pandit #include <linux/seq_file.h> 1739d3e758SParav Pandit #include <linux/cgroup.h> 1839d3e758SParav Pandit #include <linux/parser.h> 1939d3e758SParav Pandit #include <linux/cgroup_rdma.h> 2039d3e758SParav Pandit 2139d3e758SParav Pandit #define RDMACG_MAX_STR "max" 2239d3e758SParav Pandit 2339d3e758SParav Pandit /* 2439d3e758SParav Pandit * Protects list of resource pools maintained on per cgroup basis 2539d3e758SParav Pandit * and rdma device list. 2639d3e758SParav Pandit */ 2739d3e758SParav Pandit static DEFINE_MUTEX(rdmacg_mutex); 2839d3e758SParav Pandit static LIST_HEAD(rdmacg_devices); 2939d3e758SParav Pandit 3039d3e758SParav Pandit enum rdmacg_file_type { 3139d3e758SParav Pandit RDMACG_RESOURCE_TYPE_MAX, 3239d3e758SParav Pandit RDMACG_RESOURCE_TYPE_STAT, 3339d3e758SParav Pandit }; 3439d3e758SParav Pandit 3539d3e758SParav Pandit /* 3639d3e758SParav Pandit * resource table definition as to be seen by the user. 3739d3e758SParav Pandit * Need to add entries to it when more resources are 3839d3e758SParav Pandit * added/defined at IB verb/core layer. 3939d3e758SParav Pandit */ 4039d3e758SParav Pandit static char const *rdmacg_resource_names[] = { 4139d3e758SParav Pandit [RDMACG_RESOURCE_HCA_HANDLE] = "hca_handle", 4239d3e758SParav Pandit [RDMACG_RESOURCE_HCA_OBJECT] = "hca_object", 4339d3e758SParav Pandit }; 4439d3e758SParav Pandit 4539d3e758SParav Pandit /* resource tracker for each resource of rdma cgroup */ 4639d3e758SParav Pandit struct rdmacg_resource { 4739d3e758SParav Pandit int max; 4839d3e758SParav Pandit int usage; 4939d3e758SParav Pandit }; 5039d3e758SParav Pandit 5139d3e758SParav Pandit /* 5239d3e758SParav Pandit * resource pool object which represents per cgroup, per device 5339d3e758SParav Pandit * resources. There are multiple instances of this object per cgroup, 5439d3e758SParav Pandit * therefore it cannot be embedded within rdma_cgroup structure. It 5539d3e758SParav Pandit * is maintained as list. 5639d3e758SParav Pandit */ 5739d3e758SParav Pandit struct rdmacg_resource_pool { 5839d3e758SParav Pandit struct rdmacg_device *device; 5939d3e758SParav Pandit struct rdmacg_resource resources[RDMACG_RESOURCE_MAX]; 6039d3e758SParav Pandit 6139d3e758SParav Pandit struct list_head cg_node; 6239d3e758SParav Pandit struct list_head dev_node; 6339d3e758SParav Pandit 6439d3e758SParav Pandit /* count active user tasks of this pool */ 6539d3e758SParav Pandit u64 usage_sum; 6639d3e758SParav Pandit /* total number counts which are set to max */ 6739d3e758SParav Pandit int num_max_cnt; 6839d3e758SParav Pandit }; 6939d3e758SParav Pandit 7039d3e758SParav Pandit static struct rdma_cgroup *css_rdmacg(struct cgroup_subsys_state *css) 7139d3e758SParav Pandit { 7239d3e758SParav Pandit return container_of(css, struct rdma_cgroup, css); 7339d3e758SParav Pandit } 7439d3e758SParav Pandit 7539d3e758SParav Pandit static struct rdma_cgroup *parent_rdmacg(struct rdma_cgroup *cg) 7639d3e758SParav Pandit { 7739d3e758SParav Pandit return css_rdmacg(cg->css.parent); 7839d3e758SParav Pandit } 7939d3e758SParav Pandit 8039d3e758SParav Pandit static inline struct rdma_cgroup *get_current_rdmacg(void) 8139d3e758SParav Pandit { 8239d3e758SParav Pandit return css_rdmacg(task_get_css(current, rdma_cgrp_id)); 8339d3e758SParav Pandit } 8439d3e758SParav Pandit 8539d3e758SParav Pandit static void set_resource_limit(struct rdmacg_resource_pool *rpool, 8639d3e758SParav Pandit int index, int new_max) 8739d3e758SParav Pandit { 8839d3e758SParav Pandit if (new_max == S32_MAX) { 8939d3e758SParav Pandit if (rpool->resources[index].max != S32_MAX) 9039d3e758SParav Pandit rpool->num_max_cnt++; 9139d3e758SParav Pandit } else { 9239d3e758SParav Pandit if (rpool->resources[index].max == S32_MAX) 9339d3e758SParav Pandit rpool->num_max_cnt--; 9439d3e758SParav Pandit } 9539d3e758SParav Pandit rpool->resources[index].max = new_max; 9639d3e758SParav Pandit } 9739d3e758SParav Pandit 9839d3e758SParav Pandit static void set_all_resource_max_limit(struct rdmacg_resource_pool *rpool) 9939d3e758SParav Pandit { 10039d3e758SParav Pandit int i; 10139d3e758SParav Pandit 10239d3e758SParav Pandit for (i = 0; i < RDMACG_RESOURCE_MAX; i++) 10339d3e758SParav Pandit set_resource_limit(rpool, i, S32_MAX); 10439d3e758SParav Pandit } 10539d3e758SParav Pandit 10639d3e758SParav Pandit static void free_cg_rpool_locked(struct rdmacg_resource_pool *rpool) 10739d3e758SParav Pandit { 10839d3e758SParav Pandit lockdep_assert_held(&rdmacg_mutex); 10939d3e758SParav Pandit 11039d3e758SParav Pandit list_del(&rpool->cg_node); 11139d3e758SParav Pandit list_del(&rpool->dev_node); 11239d3e758SParav Pandit kfree(rpool); 11339d3e758SParav Pandit } 11439d3e758SParav Pandit 11539d3e758SParav Pandit static struct rdmacg_resource_pool * 11639d3e758SParav Pandit find_cg_rpool_locked(struct rdma_cgroup *cg, 11739d3e758SParav Pandit struct rdmacg_device *device) 11839d3e758SParav Pandit 11939d3e758SParav Pandit { 12039d3e758SParav Pandit struct rdmacg_resource_pool *pool; 12139d3e758SParav Pandit 12239d3e758SParav Pandit lockdep_assert_held(&rdmacg_mutex); 12339d3e758SParav Pandit 12439d3e758SParav Pandit list_for_each_entry(pool, &cg->rpools, cg_node) 12539d3e758SParav Pandit if (pool->device == device) 12639d3e758SParav Pandit return pool; 12739d3e758SParav Pandit 12839d3e758SParav Pandit return NULL; 12939d3e758SParav Pandit } 13039d3e758SParav Pandit 13139d3e758SParav Pandit static struct rdmacg_resource_pool * 13239d3e758SParav Pandit get_cg_rpool_locked(struct rdma_cgroup *cg, struct rdmacg_device *device) 13339d3e758SParav Pandit { 13439d3e758SParav Pandit struct rdmacg_resource_pool *rpool; 13539d3e758SParav Pandit 13639d3e758SParav Pandit rpool = find_cg_rpool_locked(cg, device); 13739d3e758SParav Pandit if (rpool) 13839d3e758SParav Pandit return rpool; 13939d3e758SParav Pandit 14039d3e758SParav Pandit rpool = kzalloc(sizeof(*rpool), GFP_KERNEL); 14139d3e758SParav Pandit if (!rpool) 14239d3e758SParav Pandit return ERR_PTR(-ENOMEM); 14339d3e758SParav Pandit 14439d3e758SParav Pandit rpool->device = device; 14539d3e758SParav Pandit set_all_resource_max_limit(rpool); 14639d3e758SParav Pandit 14739d3e758SParav Pandit INIT_LIST_HEAD(&rpool->cg_node); 14839d3e758SParav Pandit INIT_LIST_HEAD(&rpool->dev_node); 14939d3e758SParav Pandit list_add_tail(&rpool->cg_node, &cg->rpools); 15039d3e758SParav Pandit list_add_tail(&rpool->dev_node, &device->rpools); 15139d3e758SParav Pandit return rpool; 15239d3e758SParav Pandit } 15339d3e758SParav Pandit 15439d3e758SParav Pandit /** 15539d3e758SParav Pandit * uncharge_cg_locked - uncharge resource for rdma cgroup 15639d3e758SParav Pandit * @cg: pointer to cg to uncharge and all parents in hierarchy 15739d3e758SParav Pandit * @device: pointer to rdmacg device 15839d3e758SParav Pandit * @index: index of the resource to uncharge in cg (resource pool) 15939d3e758SParav Pandit * 16039d3e758SParav Pandit * It also frees the resource pool which was created as part of 16139d3e758SParav Pandit * charging operation when there are no resources attached to 16239d3e758SParav Pandit * resource pool. 16339d3e758SParav Pandit */ 16439d3e758SParav Pandit static void 16539d3e758SParav Pandit uncharge_cg_locked(struct rdma_cgroup *cg, 16639d3e758SParav Pandit struct rdmacg_device *device, 16739d3e758SParav Pandit enum rdmacg_resource_type index) 16839d3e758SParav Pandit { 16939d3e758SParav Pandit struct rdmacg_resource_pool *rpool; 17039d3e758SParav Pandit 17139d3e758SParav Pandit rpool = find_cg_rpool_locked(cg, device); 17239d3e758SParav Pandit 17339d3e758SParav Pandit /* 17439d3e758SParav Pandit * rpool cannot be null at this stage. Let kernel operate in case 17539d3e758SParav Pandit * if there a bug in IB stack or rdma controller, instead of crashing 17639d3e758SParav Pandit * the system. 17739d3e758SParav Pandit */ 17839d3e758SParav Pandit if (unlikely(!rpool)) { 17939d3e758SParav Pandit pr_warn("Invalid device %p or rdma cgroup %p\n", cg, device); 18039d3e758SParav Pandit return; 18139d3e758SParav Pandit } 18239d3e758SParav Pandit 18339d3e758SParav Pandit rpool->resources[index].usage--; 18439d3e758SParav Pandit 18539d3e758SParav Pandit /* 18639d3e758SParav Pandit * A negative count (or overflow) is invalid, 18739d3e758SParav Pandit * it indicates a bug in the rdma controller. 18839d3e758SParav Pandit */ 18939d3e758SParav Pandit WARN_ON_ONCE(rpool->resources[index].usage < 0); 19039d3e758SParav Pandit rpool->usage_sum--; 19139d3e758SParav Pandit if (rpool->usage_sum == 0 && 19239d3e758SParav Pandit rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { 19339d3e758SParav Pandit /* 19439d3e758SParav Pandit * No user of the rpool and all entries are set to max, so 19539d3e758SParav Pandit * safe to delete this rpool. 19639d3e758SParav Pandit */ 19739d3e758SParav Pandit free_cg_rpool_locked(rpool); 19839d3e758SParav Pandit } 19939d3e758SParav Pandit } 20039d3e758SParav Pandit 20139d3e758SParav Pandit /** 20239d3e758SParav Pandit * rdmacg_uncharge_hierarchy - hierarchically uncharge rdma resource count 20339d3e758SParav Pandit * @device: pointer to rdmacg device 20439d3e758SParav Pandit * @stop_cg: while traversing hirerchy, when meet with stop_cg cgroup 20539d3e758SParav Pandit * stop uncharging 20639d3e758SParav Pandit * @index: index of the resource to uncharge in cg in given resource pool 20739d3e758SParav Pandit */ 20839d3e758SParav Pandit static void rdmacg_uncharge_hierarchy(struct rdma_cgroup *cg, 20939d3e758SParav Pandit struct rdmacg_device *device, 21039d3e758SParav Pandit struct rdma_cgroup *stop_cg, 21139d3e758SParav Pandit enum rdmacg_resource_type index) 21239d3e758SParav Pandit { 21339d3e758SParav Pandit struct rdma_cgroup *p; 21439d3e758SParav Pandit 21539d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 21639d3e758SParav Pandit 21739d3e758SParav Pandit for (p = cg; p != stop_cg; p = parent_rdmacg(p)) 21839d3e758SParav Pandit uncharge_cg_locked(p, device, index); 21939d3e758SParav Pandit 22039d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 22139d3e758SParav Pandit 22239d3e758SParav Pandit css_put(&cg->css); 22339d3e758SParav Pandit } 22439d3e758SParav Pandit 22539d3e758SParav Pandit /** 22639d3e758SParav Pandit * rdmacg_uncharge - hierarchically uncharge rdma resource count 22739d3e758SParav Pandit * @device: pointer to rdmacg device 22839d3e758SParav Pandit * @index: index of the resource to uncharge in cgroup in given resource pool 22939d3e758SParav Pandit */ 23039d3e758SParav Pandit void rdmacg_uncharge(struct rdma_cgroup *cg, 23139d3e758SParav Pandit struct rdmacg_device *device, 23239d3e758SParav Pandit enum rdmacg_resource_type index) 23339d3e758SParav Pandit { 23439d3e758SParav Pandit if (index >= RDMACG_RESOURCE_MAX) 23539d3e758SParav Pandit return; 23639d3e758SParav Pandit 23739d3e758SParav Pandit rdmacg_uncharge_hierarchy(cg, device, NULL, index); 23839d3e758SParav Pandit } 23939d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_uncharge); 24039d3e758SParav Pandit 24139d3e758SParav Pandit /** 24239d3e758SParav Pandit * rdmacg_try_charge - hierarchically try to charge the rdma resource 24339d3e758SParav Pandit * @rdmacg: pointer to rdma cgroup which will own this resource 24439d3e758SParav Pandit * @device: pointer to rdmacg device 24539d3e758SParav Pandit * @index: index of the resource to charge in cgroup (resource pool) 24639d3e758SParav Pandit * 24739d3e758SParav Pandit * This function follows charging resource in hierarchical way. 24839d3e758SParav Pandit * It will fail if the charge would cause the new value to exceed the 24939d3e758SParav Pandit * hierarchical limit. 25039d3e758SParav Pandit * Returns 0 if the charge succeded, otherwise -EAGAIN, -ENOMEM or -EINVAL. 25139d3e758SParav Pandit * Returns pointer to rdmacg for this resource when charging is successful. 25239d3e758SParav Pandit * 25339d3e758SParav Pandit * Charger needs to account resources on two criteria. 25439d3e758SParav Pandit * (a) per cgroup & (b) per device resource usage. 25539d3e758SParav Pandit * Per cgroup resource usage ensures that tasks of cgroup doesn't cross 25639d3e758SParav Pandit * the configured limits. Per device provides granular configuration 25739d3e758SParav Pandit * in multi device usage. It allocates resource pool in the hierarchy 25839d3e758SParav Pandit * for each parent it come across for first resource. Later on resource 25939d3e758SParav Pandit * pool will be available. Therefore it will be much faster thereon 26039d3e758SParav Pandit * to charge/uncharge. 26139d3e758SParav Pandit */ 26239d3e758SParav Pandit int rdmacg_try_charge(struct rdma_cgroup **rdmacg, 26339d3e758SParav Pandit struct rdmacg_device *device, 26439d3e758SParav Pandit enum rdmacg_resource_type index) 26539d3e758SParav Pandit { 26639d3e758SParav Pandit struct rdma_cgroup *cg, *p; 26739d3e758SParav Pandit struct rdmacg_resource_pool *rpool; 26839d3e758SParav Pandit s64 new; 26939d3e758SParav Pandit int ret = 0; 27039d3e758SParav Pandit 27139d3e758SParav Pandit if (index >= RDMACG_RESOURCE_MAX) 27239d3e758SParav Pandit return -EINVAL; 27339d3e758SParav Pandit 27439d3e758SParav Pandit /* 27539d3e758SParav Pandit * hold on to css, as cgroup can be removed but resource 27639d3e758SParav Pandit * accounting happens on css. 27739d3e758SParav Pandit */ 27839d3e758SParav Pandit cg = get_current_rdmacg(); 27939d3e758SParav Pandit 28039d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 28139d3e758SParav Pandit for (p = cg; p; p = parent_rdmacg(p)) { 28239d3e758SParav Pandit rpool = get_cg_rpool_locked(p, device); 28339d3e758SParav Pandit if (IS_ERR(rpool)) { 28439d3e758SParav Pandit ret = PTR_ERR(rpool); 28539d3e758SParav Pandit goto err; 28639d3e758SParav Pandit } else { 28739d3e758SParav Pandit new = rpool->resources[index].usage + 1; 28839d3e758SParav Pandit if (new > rpool->resources[index].max) { 28939d3e758SParav Pandit ret = -EAGAIN; 29039d3e758SParav Pandit goto err; 29139d3e758SParav Pandit } else { 29239d3e758SParav Pandit rpool->resources[index].usage = new; 29339d3e758SParav Pandit rpool->usage_sum++; 29439d3e758SParav Pandit } 29539d3e758SParav Pandit } 29639d3e758SParav Pandit } 29739d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 29839d3e758SParav Pandit 29939d3e758SParav Pandit *rdmacg = cg; 30039d3e758SParav Pandit return 0; 30139d3e758SParav Pandit 30239d3e758SParav Pandit err: 30339d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 30439d3e758SParav Pandit rdmacg_uncharge_hierarchy(cg, device, p, index); 30539d3e758SParav Pandit return ret; 30639d3e758SParav Pandit } 30739d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_try_charge); 30839d3e758SParav Pandit 30939d3e758SParav Pandit /** 31039d3e758SParav Pandit * rdmacg_register_device - register rdmacg device to rdma controller. 31139d3e758SParav Pandit * @device: pointer to rdmacg device whose resources need to be accounted. 31239d3e758SParav Pandit * 31339d3e758SParav Pandit * If IB stack wish a device to participate in rdma cgroup resource 31439d3e758SParav Pandit * tracking, it must invoke this API to register with rdma cgroup before 31539d3e758SParav Pandit * any user space application can start using the RDMA resources. 31639d3e758SParav Pandit * Returns 0 on success or EINVAL when table length given is beyond 31739d3e758SParav Pandit * supported size. 31839d3e758SParav Pandit */ 31939d3e758SParav Pandit int rdmacg_register_device(struct rdmacg_device *device) 32039d3e758SParav Pandit { 32139d3e758SParav Pandit INIT_LIST_HEAD(&device->dev_node); 32239d3e758SParav Pandit INIT_LIST_HEAD(&device->rpools); 32339d3e758SParav Pandit 32439d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 32539d3e758SParav Pandit list_add_tail(&device->dev_node, &rdmacg_devices); 32639d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 32739d3e758SParav Pandit return 0; 32839d3e758SParav Pandit } 32939d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_register_device); 33039d3e758SParav Pandit 33139d3e758SParav Pandit /** 33239d3e758SParav Pandit * rdmacg_unregister_device - unregister rdmacg device from rdma controller. 33339d3e758SParav Pandit * @device: pointer to rdmacg device which was previously registered with rdma 33439d3e758SParav Pandit * controller using rdmacg_register_device(). 33539d3e758SParav Pandit * 33639d3e758SParav Pandit * IB stack must invoke this after all the resources of the IB device 33739d3e758SParav Pandit * are destroyed and after ensuring that no more resources will be created 33839d3e758SParav Pandit * when this API is invoked. 33939d3e758SParav Pandit */ 34039d3e758SParav Pandit void rdmacg_unregister_device(struct rdmacg_device *device) 34139d3e758SParav Pandit { 34239d3e758SParav Pandit struct rdmacg_resource_pool *rpool, *tmp; 34339d3e758SParav Pandit 34439d3e758SParav Pandit /* 34539d3e758SParav Pandit * Synchronize with any active resource settings, 34639d3e758SParav Pandit * usage query happening via configfs. 34739d3e758SParav Pandit */ 34839d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 34939d3e758SParav Pandit list_del_init(&device->dev_node); 35039d3e758SParav Pandit 35139d3e758SParav Pandit /* 35239d3e758SParav Pandit * Now that this device is off the cgroup list, its safe to free 35339d3e758SParav Pandit * all the rpool resources. 35439d3e758SParav Pandit */ 35539d3e758SParav Pandit list_for_each_entry_safe(rpool, tmp, &device->rpools, dev_node) 35639d3e758SParav Pandit free_cg_rpool_locked(rpool); 35739d3e758SParav Pandit 35839d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 35939d3e758SParav Pandit } 36039d3e758SParav Pandit EXPORT_SYMBOL(rdmacg_unregister_device); 36139d3e758SParav Pandit 36239d3e758SParav Pandit static int parse_resource(char *c, int *intval) 36339d3e758SParav Pandit { 36439d3e758SParav Pandit substring_t argstr; 36539d3e758SParav Pandit const char **table = &rdmacg_resource_names[0]; 36639d3e758SParav Pandit char *name, *value = c; 36739d3e758SParav Pandit size_t len; 36839d3e758SParav Pandit int ret, i = 0; 36939d3e758SParav Pandit 37039d3e758SParav Pandit name = strsep(&value, "="); 37139d3e758SParav Pandit if (!name || !value) 37239d3e758SParav Pandit return -EINVAL; 37339d3e758SParav Pandit 37439d3e758SParav Pandit len = strlen(value); 37539d3e758SParav Pandit 37639d3e758SParav Pandit for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 37739d3e758SParav Pandit if (strcmp(table[i], name)) 37839d3e758SParav Pandit continue; 37939d3e758SParav Pandit 38039d3e758SParav Pandit argstr.from = value; 38139d3e758SParav Pandit argstr.to = value + len; 38239d3e758SParav Pandit 38339d3e758SParav Pandit ret = match_int(&argstr, intval); 38439d3e758SParav Pandit if (ret >= 0) { 38539d3e758SParav Pandit if (*intval < 0) 38639d3e758SParav Pandit break; 38739d3e758SParav Pandit return i; 38839d3e758SParav Pandit } 38939d3e758SParav Pandit if (strncmp(value, RDMACG_MAX_STR, len) == 0) { 39039d3e758SParav Pandit *intval = S32_MAX; 39139d3e758SParav Pandit return i; 39239d3e758SParav Pandit } 39339d3e758SParav Pandit break; 39439d3e758SParav Pandit } 39539d3e758SParav Pandit return -EINVAL; 39639d3e758SParav Pandit } 39739d3e758SParav Pandit 39839d3e758SParav Pandit static int rdmacg_parse_limits(char *options, 39939d3e758SParav Pandit int *new_limits, unsigned long *enables) 40039d3e758SParav Pandit { 40139d3e758SParav Pandit char *c; 40239d3e758SParav Pandit int err = -EINVAL; 40339d3e758SParav Pandit 40439d3e758SParav Pandit /* parse resource options */ 40539d3e758SParav Pandit while ((c = strsep(&options, " ")) != NULL) { 40639d3e758SParav Pandit int index, intval; 40739d3e758SParav Pandit 40839d3e758SParav Pandit index = parse_resource(c, &intval); 40939d3e758SParav Pandit if (index < 0) 41039d3e758SParav Pandit goto err; 41139d3e758SParav Pandit 41239d3e758SParav Pandit new_limits[index] = intval; 41339d3e758SParav Pandit *enables |= BIT(index); 41439d3e758SParav Pandit } 41539d3e758SParav Pandit return 0; 41639d3e758SParav Pandit 41739d3e758SParav Pandit err: 41839d3e758SParav Pandit return err; 41939d3e758SParav Pandit } 42039d3e758SParav Pandit 42139d3e758SParav Pandit static struct rdmacg_device *rdmacg_get_device_locked(const char *name) 42239d3e758SParav Pandit { 42339d3e758SParav Pandit struct rdmacg_device *device; 42439d3e758SParav Pandit 42539d3e758SParav Pandit lockdep_assert_held(&rdmacg_mutex); 42639d3e758SParav Pandit 42739d3e758SParav Pandit list_for_each_entry(device, &rdmacg_devices, dev_node) 42839d3e758SParav Pandit if (!strcmp(name, device->name)) 42939d3e758SParav Pandit return device; 43039d3e758SParav Pandit 43139d3e758SParav Pandit return NULL; 43239d3e758SParav Pandit } 43339d3e758SParav Pandit 43439d3e758SParav Pandit static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of, 43539d3e758SParav Pandit char *buf, size_t nbytes, loff_t off) 43639d3e758SParav Pandit { 43739d3e758SParav Pandit struct rdma_cgroup *cg = css_rdmacg(of_css(of)); 43839d3e758SParav Pandit const char *dev_name; 43939d3e758SParav Pandit struct rdmacg_resource_pool *rpool; 44039d3e758SParav Pandit struct rdmacg_device *device; 44139d3e758SParav Pandit char *options = strstrip(buf); 44239d3e758SParav Pandit int *new_limits; 44339d3e758SParav Pandit unsigned long enables = 0; 44439d3e758SParav Pandit int i = 0, ret = 0; 44539d3e758SParav Pandit 44639d3e758SParav Pandit /* extract the device name first */ 44739d3e758SParav Pandit dev_name = strsep(&options, " "); 44839d3e758SParav Pandit if (!dev_name) { 44939d3e758SParav Pandit ret = -EINVAL; 45039d3e758SParav Pandit goto err; 45139d3e758SParav Pandit } 45239d3e758SParav Pandit 45339d3e758SParav Pandit new_limits = kcalloc(RDMACG_RESOURCE_MAX, sizeof(int), GFP_KERNEL); 45439d3e758SParav Pandit if (!new_limits) { 45539d3e758SParav Pandit ret = -ENOMEM; 45639d3e758SParav Pandit goto err; 45739d3e758SParav Pandit } 45839d3e758SParav Pandit 45939d3e758SParav Pandit ret = rdmacg_parse_limits(options, new_limits, &enables); 46039d3e758SParav Pandit if (ret) 46139d3e758SParav Pandit goto parse_err; 46239d3e758SParav Pandit 46339d3e758SParav Pandit /* acquire lock to synchronize with hot plug devices */ 46439d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 46539d3e758SParav Pandit 46639d3e758SParav Pandit device = rdmacg_get_device_locked(dev_name); 46739d3e758SParav Pandit if (!device) { 46839d3e758SParav Pandit ret = -ENODEV; 46939d3e758SParav Pandit goto dev_err; 47039d3e758SParav Pandit } 47139d3e758SParav Pandit 47239d3e758SParav Pandit rpool = get_cg_rpool_locked(cg, device); 47339d3e758SParav Pandit if (IS_ERR(rpool)) { 47439d3e758SParav Pandit ret = PTR_ERR(rpool); 47539d3e758SParav Pandit goto dev_err; 47639d3e758SParav Pandit } 47739d3e758SParav Pandit 47839d3e758SParav Pandit /* now set the new limits of the rpool */ 47939d3e758SParav Pandit for_each_set_bit(i, &enables, RDMACG_RESOURCE_MAX) 48039d3e758SParav Pandit set_resource_limit(rpool, i, new_limits[i]); 48139d3e758SParav Pandit 48239d3e758SParav Pandit if (rpool->usage_sum == 0 && 48339d3e758SParav Pandit rpool->num_max_cnt == RDMACG_RESOURCE_MAX) { 48439d3e758SParav Pandit /* 48539d3e758SParav Pandit * No user of the rpool and all entries are set to max, so 48639d3e758SParav Pandit * safe to delete this rpool. 48739d3e758SParav Pandit */ 48839d3e758SParav Pandit free_cg_rpool_locked(rpool); 48939d3e758SParav Pandit } 49039d3e758SParav Pandit 49139d3e758SParav Pandit dev_err: 49239d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 49339d3e758SParav Pandit 49439d3e758SParav Pandit parse_err: 49539d3e758SParav Pandit kfree(new_limits); 49639d3e758SParav Pandit 49739d3e758SParav Pandit err: 49839d3e758SParav Pandit return ret ?: nbytes; 49939d3e758SParav Pandit } 50039d3e758SParav Pandit 50139d3e758SParav Pandit static void print_rpool_values(struct seq_file *sf, 50239d3e758SParav Pandit struct rdmacg_resource_pool *rpool) 50339d3e758SParav Pandit { 50439d3e758SParav Pandit enum rdmacg_file_type sf_type; 50539d3e758SParav Pandit int i; 50639d3e758SParav Pandit u32 value; 50739d3e758SParav Pandit 50839d3e758SParav Pandit sf_type = seq_cft(sf)->private; 50939d3e758SParav Pandit 51039d3e758SParav Pandit for (i = 0; i < RDMACG_RESOURCE_MAX; i++) { 51139d3e758SParav Pandit seq_puts(sf, rdmacg_resource_names[i]); 51239d3e758SParav Pandit seq_putc(sf, '='); 51339d3e758SParav Pandit if (sf_type == RDMACG_RESOURCE_TYPE_MAX) { 51439d3e758SParav Pandit if (rpool) 51539d3e758SParav Pandit value = rpool->resources[i].max; 51639d3e758SParav Pandit else 51739d3e758SParav Pandit value = S32_MAX; 51839d3e758SParav Pandit } else { 51939d3e758SParav Pandit if (rpool) 52039d3e758SParav Pandit value = rpool->resources[i].usage; 5217896dfb0SParav Pandit else 5227896dfb0SParav Pandit value = 0; 52339d3e758SParav Pandit } 52439d3e758SParav Pandit 52539d3e758SParav Pandit if (value == S32_MAX) 52639d3e758SParav Pandit seq_puts(sf, RDMACG_MAX_STR); 52739d3e758SParav Pandit else 52839d3e758SParav Pandit seq_printf(sf, "%d", value); 52939d3e758SParav Pandit seq_putc(sf, ' '); 53039d3e758SParav Pandit } 53139d3e758SParav Pandit } 53239d3e758SParav Pandit 53339d3e758SParav Pandit static int rdmacg_resource_read(struct seq_file *sf, void *v) 53439d3e758SParav Pandit { 53539d3e758SParav Pandit struct rdmacg_device *device; 53639d3e758SParav Pandit struct rdmacg_resource_pool *rpool; 53739d3e758SParav Pandit struct rdma_cgroup *cg = css_rdmacg(seq_css(sf)); 53839d3e758SParav Pandit 53939d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 54039d3e758SParav Pandit 54139d3e758SParav Pandit list_for_each_entry(device, &rdmacg_devices, dev_node) { 54239d3e758SParav Pandit seq_printf(sf, "%s ", device->name); 54339d3e758SParav Pandit 54439d3e758SParav Pandit rpool = find_cg_rpool_locked(cg, device); 54539d3e758SParav Pandit print_rpool_values(sf, rpool); 54639d3e758SParav Pandit 54739d3e758SParav Pandit seq_putc(sf, '\n'); 54839d3e758SParav Pandit } 54939d3e758SParav Pandit 55039d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 55139d3e758SParav Pandit return 0; 55239d3e758SParav Pandit } 55339d3e758SParav Pandit 55439d3e758SParav Pandit static struct cftype rdmacg_files[] = { 55539d3e758SParav Pandit { 55639d3e758SParav Pandit .name = "max", 55739d3e758SParav Pandit .write = rdmacg_resource_set_max, 55839d3e758SParav Pandit .seq_show = rdmacg_resource_read, 55939d3e758SParav Pandit .private = RDMACG_RESOURCE_TYPE_MAX, 56039d3e758SParav Pandit .flags = CFTYPE_NOT_ON_ROOT, 56139d3e758SParav Pandit }, 56239d3e758SParav Pandit { 56339d3e758SParav Pandit .name = "current", 56439d3e758SParav Pandit .seq_show = rdmacg_resource_read, 56539d3e758SParav Pandit .private = RDMACG_RESOURCE_TYPE_STAT, 56639d3e758SParav Pandit .flags = CFTYPE_NOT_ON_ROOT, 56739d3e758SParav Pandit }, 56839d3e758SParav Pandit { } /* terminate */ 56939d3e758SParav Pandit }; 57039d3e758SParav Pandit 57139d3e758SParav Pandit static struct cgroup_subsys_state * 57239d3e758SParav Pandit rdmacg_css_alloc(struct cgroup_subsys_state *parent) 57339d3e758SParav Pandit { 57439d3e758SParav Pandit struct rdma_cgroup *cg; 57539d3e758SParav Pandit 57639d3e758SParav Pandit cg = kzalloc(sizeof(*cg), GFP_KERNEL); 57739d3e758SParav Pandit if (!cg) 57839d3e758SParav Pandit return ERR_PTR(-ENOMEM); 57939d3e758SParav Pandit 58039d3e758SParav Pandit INIT_LIST_HEAD(&cg->rpools); 58139d3e758SParav Pandit return &cg->css; 58239d3e758SParav Pandit } 58339d3e758SParav Pandit 58439d3e758SParav Pandit static void rdmacg_css_free(struct cgroup_subsys_state *css) 58539d3e758SParav Pandit { 58639d3e758SParav Pandit struct rdma_cgroup *cg = css_rdmacg(css); 58739d3e758SParav Pandit 58839d3e758SParav Pandit kfree(cg); 58939d3e758SParav Pandit } 59039d3e758SParav Pandit 59139d3e758SParav Pandit /** 59239d3e758SParav Pandit * rdmacg_css_offline - cgroup css_offline callback 59339d3e758SParav Pandit * @css: css of interest 59439d3e758SParav Pandit * 59539d3e758SParav Pandit * This function is called when @css is about to go away and responsible 59639d3e758SParav Pandit * for shooting down all rdmacg associated with @css. As part of that it 59739d3e758SParav Pandit * marks all the resource pool entries to max value, so that when resources are 59839d3e758SParav Pandit * uncharged, associated resource pool can be freed as well. 59939d3e758SParav Pandit */ 60039d3e758SParav Pandit static void rdmacg_css_offline(struct cgroup_subsys_state *css) 60139d3e758SParav Pandit { 60239d3e758SParav Pandit struct rdma_cgroup *cg = css_rdmacg(css); 60339d3e758SParav Pandit struct rdmacg_resource_pool *rpool; 60439d3e758SParav Pandit 60539d3e758SParav Pandit mutex_lock(&rdmacg_mutex); 60639d3e758SParav Pandit 60739d3e758SParav Pandit list_for_each_entry(rpool, &cg->rpools, cg_node) 60839d3e758SParav Pandit set_all_resource_max_limit(rpool); 60939d3e758SParav Pandit 61039d3e758SParav Pandit mutex_unlock(&rdmacg_mutex); 61139d3e758SParav Pandit } 61239d3e758SParav Pandit 61339d3e758SParav Pandit struct cgroup_subsys rdma_cgrp_subsys = { 61439d3e758SParav Pandit .css_alloc = rdmacg_css_alloc, 61539d3e758SParav Pandit .css_free = rdmacg_css_free, 61639d3e758SParav Pandit .css_offline = rdmacg_css_offline, 61739d3e758SParav Pandit .legacy_cftypes = rdmacg_files, 61839d3e758SParav Pandit .dfl_cftypes = rdmacg_files, 61939d3e758SParav Pandit }; 620