1 /* 2 * net/core/netprio_cgroup.c Priority Control Group 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Neil Horman <nhorman@tuxdriver.com> 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/types.h> 17 #include <linux/string.h> 18 #include <linux/errno.h> 19 #include <linux/skbuff.h> 20 #include <linux/cgroup.h> 21 #include <linux/rcupdate.h> 22 #include <linux/atomic.h> 23 #include <net/rtnetlink.h> 24 #include <net/pkt_cls.h> 25 #include <net/sock.h> 26 #include <net/netprio_cgroup.h> 27 28 #include <linux/fdtable.h> 29 30 #define PRIOIDX_SZ 128 31 32 static unsigned long prioidx_map[PRIOIDX_SZ]; 33 static DEFINE_SPINLOCK(prioidx_map_lock); 34 static atomic_t max_prioidx = ATOMIC_INIT(0); 35 36 static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) 37 { 38 return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), 39 struct cgroup_netprio_state, css); 40 } 41 42 static int get_prioidx(u32 *prio) 43 { 44 unsigned long flags; 45 u32 prioidx; 46 47 spin_lock_irqsave(&prioidx_map_lock, flags); 48 prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); 49 if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { 50 spin_unlock_irqrestore(&prioidx_map_lock, flags); 51 return -ENOSPC; 52 } 53 set_bit(prioidx, prioidx_map); 54 if (atomic_read(&max_prioidx) < prioidx) 55 atomic_set(&max_prioidx, prioidx); 56 spin_unlock_irqrestore(&prioidx_map_lock, flags); 57 *prio = prioidx; 58 return 0; 59 } 60 61 static void put_prioidx(u32 idx) 62 { 63 unsigned long flags; 64 65 spin_lock_irqsave(&prioidx_map_lock, flags); 66 clear_bit(idx, prioidx_map); 67 spin_unlock_irqrestore(&prioidx_map_lock, flags); 68 } 69 70 static int extend_netdev_table(struct net_device *dev, u32 new_len) 71 { 72 size_t new_size = sizeof(struct netprio_map) + 73 ((sizeof(u32) * new_len)); 74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); 75 struct netprio_map *old_priomap; 76 77 old_priomap = rtnl_dereference(dev->priomap); 78 79 if (!new_priomap) { 80 pr_warn("Unable to alloc new priomap!\n"); 81 return -ENOMEM; 82 } 83 84 if (old_priomap) 85 memcpy(new_priomap->priomap, old_priomap->priomap, 86 old_priomap->priomap_len * 87 sizeof(old_priomap->priomap[0])); 88 89 new_priomap->priomap_len = new_len; 90 91 rcu_assign_pointer(dev->priomap, new_priomap); 92 if (old_priomap) 93 kfree_rcu(old_priomap, rcu); 94 return 0; 95 } 96 97 static int write_update_netdev_table(struct net_device *dev) 98 { 99 int ret = 0; 100 u32 max_len; 101 struct netprio_map *map; 102 103 max_len = atomic_read(&max_prioidx) + 1; 104 map = rtnl_dereference(dev->priomap); 105 if (!map || map->priomap_len < max_len) 106 ret = extend_netdev_table(dev, max_len); 107 108 return ret; 109 } 110 111 static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 112 { 113 struct cgroup_netprio_state *cs; 114 int ret = -EINVAL; 115 116 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 117 if (!cs) 118 return ERR_PTR(-ENOMEM); 119 120 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) 121 goto out; 122 123 ret = get_prioidx(&cs->prioidx); 124 if (ret < 0) { 125 pr_warn("No space in priority index array\n"); 126 goto out; 127 } 128 129 return &cs->css; 130 out: 131 kfree(cs); 132 return ERR_PTR(ret); 133 } 134 135 static void cgrp_destroy(struct cgroup *cgrp) 136 { 137 struct cgroup_netprio_state *cs; 138 struct net_device *dev; 139 struct netprio_map *map; 140 141 cs = cgrp_netprio_state(cgrp); 142 rtnl_lock(); 143 for_each_netdev(&init_net, dev) { 144 map = rtnl_dereference(dev->priomap); 145 if (map && cs->prioidx < map->priomap_len) 146 map->priomap[cs->prioidx] = 0; 147 } 148 rtnl_unlock(); 149 put_prioidx(cs->prioidx); 150 kfree(cs); 151 } 152 153 static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 154 { 155 return (u64)cgrp_netprio_state(cgrp)->prioidx; 156 } 157 158 static int read_priomap(struct cgroup *cont, struct cftype *cft, 159 struct cgroup_map_cb *cb) 160 { 161 struct net_device *dev; 162 u32 prioidx = cgrp_netprio_state(cont)->prioidx; 163 u32 priority; 164 struct netprio_map *map; 165 166 rcu_read_lock(); 167 for_each_netdev_rcu(&init_net, dev) { 168 map = rcu_dereference(dev->priomap); 169 priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; 170 cb->fill(cb, dev->name, priority); 171 } 172 rcu_read_unlock(); 173 return 0; 174 } 175 176 static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 177 const char *buffer) 178 { 179 char *devname = kstrdup(buffer, GFP_KERNEL); 180 int ret = -EINVAL; 181 u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; 182 unsigned long priority; 183 char *priostr; 184 struct net_device *dev; 185 struct netprio_map *map; 186 187 if (!devname) 188 return -ENOMEM; 189 190 /* 191 * Minimally sized valid priomap string 192 */ 193 if (strlen(devname) < 3) 194 goto out_free_devname; 195 196 priostr = strstr(devname, " "); 197 if (!priostr) 198 goto out_free_devname; 199 200 /* 201 *Separate the devname from the associated priority 202 *and advance the priostr pointer to the priority value 203 */ 204 *priostr = '\0'; 205 priostr++; 206 207 /* 208 * If the priostr points to NULL, we're at the end of the passed 209 * in string, and its not a valid write 210 */ 211 if (*priostr == '\0') 212 goto out_free_devname; 213 214 ret = kstrtoul(priostr, 10, &priority); 215 if (ret < 0) 216 goto out_free_devname; 217 218 ret = -ENODEV; 219 220 dev = dev_get_by_name(&init_net, devname); 221 if (!dev) 222 goto out_free_devname; 223 224 rtnl_lock(); 225 ret = write_update_netdev_table(dev); 226 if (ret < 0) 227 goto out_put_dev; 228 229 map = rtnl_dereference(dev->priomap); 230 if (map) 231 map->priomap[prioidx] = priority; 232 233 out_put_dev: 234 rtnl_unlock(); 235 dev_put(dev); 236 237 out_free_devname: 238 kfree(devname); 239 return ret; 240 } 241 242 static int update_netprio(const void *v, struct file *file, unsigned n) 243 { 244 int err; 245 struct socket *sock = sock_from_file(file, &err); 246 if (sock) 247 sock->sk->sk_cgrp_prioidx = (u32)(unsigned long)v; 248 return 0; 249 } 250 251 void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 252 { 253 struct task_struct *p; 254 void *v; 255 256 cgroup_taskset_for_each(p, cgrp, tset) { 257 task_lock(p); 258 v = (void *)(unsigned long)task_netprioidx(p); 259 iterate_fd(p->files, 0, update_netprio, v); 260 task_unlock(p); 261 } 262 } 263 264 static struct cftype ss_files[] = { 265 { 266 .name = "prioidx", 267 .read_u64 = read_prioidx, 268 }, 269 { 270 .name = "ifpriomap", 271 .read_map = read_priomap, 272 .write_string = write_priomap, 273 }, 274 { } /* terminate */ 275 }; 276 277 struct cgroup_subsys net_prio_subsys = { 278 .name = "net_prio", 279 .create = cgrp_create, 280 .destroy = cgrp_destroy, 281 .attach = net_prio_attach, 282 .subsys_id = net_prio_subsys_id, 283 .base_cftypes = ss_files, 284 .module = THIS_MODULE, 285 286 /* 287 * net_prio has artificial limit on the number of cgroups and 288 * disallows nesting making it impossible to co-mount it with other 289 * hierarchical subsystems. Remove the artificially low PRIOIDX_SZ 290 * limit and properly nest configuration such that children follow 291 * their parents' configurations by default and are allowed to 292 * override and remove the following. 293 */ 294 .broken_hierarchy = true, 295 }; 296 297 static int netprio_device_event(struct notifier_block *unused, 298 unsigned long event, void *ptr) 299 { 300 struct net_device *dev = ptr; 301 struct netprio_map *old; 302 303 /* 304 * Note this is called with rtnl_lock held so we have update side 305 * protection on our rcu assignments 306 */ 307 308 switch (event) { 309 case NETDEV_UNREGISTER: 310 old = rtnl_dereference(dev->priomap); 311 RCU_INIT_POINTER(dev->priomap, NULL); 312 if (old) 313 kfree_rcu(old, rcu); 314 break; 315 } 316 return NOTIFY_DONE; 317 } 318 319 static struct notifier_block netprio_device_notifier = { 320 .notifier_call = netprio_device_event 321 }; 322 323 static int __init init_cgroup_netprio(void) 324 { 325 int ret; 326 327 ret = cgroup_load_subsys(&net_prio_subsys); 328 if (ret) 329 goto out; 330 331 register_netdevice_notifier(&netprio_device_notifier); 332 333 out: 334 return ret; 335 } 336 337 static void __exit exit_cgroup_netprio(void) 338 { 339 struct netprio_map *old; 340 struct net_device *dev; 341 342 unregister_netdevice_notifier(&netprio_device_notifier); 343 344 cgroup_unload_subsys(&net_prio_subsys); 345 346 rtnl_lock(); 347 for_each_netdev(&init_net, dev) { 348 old = rtnl_dereference(dev->priomap); 349 RCU_INIT_POINTER(dev->priomap, NULL); 350 if (old) 351 kfree_rcu(old, rcu); 352 } 353 rtnl_unlock(); 354 } 355 356 module_init(init_cgroup_netprio); 357 module_exit(exit_cgroup_netprio); 358 MODULE_LICENSE("GPL v2"); 359