1 /* 2 * net/core/netprio_cgroup.c Priority Control Group 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public License 6 * as published by the Free Software Foundation; either version 7 * 2 of the License, or (at your option) any later version. 8 * 9 * Authors: Neil Horman <nhorman@tuxdriver.com> 10 */ 11 12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 13 14 #include <linux/module.h> 15 #include <linux/slab.h> 16 #include <linux/types.h> 17 #include <linux/string.h> 18 #include <linux/errno.h> 19 #include <linux/skbuff.h> 20 #include <linux/cgroup.h> 21 #include <linux/rcupdate.h> 22 #include <linux/atomic.h> 23 #include <net/rtnetlink.h> 24 #include <net/pkt_cls.h> 25 #include <net/sock.h> 26 #include <net/netprio_cgroup.h> 27 28 #include <linux/fdtable.h> 29 30 #define PRIOIDX_SZ 128 31 32 static unsigned long prioidx_map[PRIOIDX_SZ]; 33 static DEFINE_SPINLOCK(prioidx_map_lock); 34 static atomic_t max_prioidx = ATOMIC_INIT(0); 35 36 static inline struct cgroup_netprio_state *cgrp_netprio_state(struct cgroup *cgrp) 37 { 38 return container_of(cgroup_subsys_state(cgrp, net_prio_subsys_id), 39 struct cgroup_netprio_state, css); 40 } 41 42 static int get_prioidx(u32 *prio) 43 { 44 unsigned long flags; 45 u32 prioidx; 46 47 spin_lock_irqsave(&prioidx_map_lock, flags); 48 prioidx = find_first_zero_bit(prioidx_map, sizeof(unsigned long) * PRIOIDX_SZ); 49 if (prioidx == sizeof(unsigned long) * PRIOIDX_SZ) { 50 spin_unlock_irqrestore(&prioidx_map_lock, flags); 51 return -ENOSPC; 52 } 53 set_bit(prioidx, prioidx_map); 54 if (atomic_read(&max_prioidx) < prioidx) 55 atomic_set(&max_prioidx, prioidx); 56 spin_unlock_irqrestore(&prioidx_map_lock, flags); 57 *prio = prioidx; 58 return 0; 59 } 60 61 static void put_prioidx(u32 idx) 62 { 63 unsigned long flags; 64 65 spin_lock_irqsave(&prioidx_map_lock, flags); 66 clear_bit(idx, prioidx_map); 67 spin_unlock_irqrestore(&prioidx_map_lock, flags); 68 } 69 70 static int extend_netdev_table(struct net_device *dev, u32 new_len) 71 { 72 size_t new_size = sizeof(struct netprio_map) + 73 ((sizeof(u32) * new_len)); 74 struct netprio_map *new_priomap = kzalloc(new_size, GFP_KERNEL); 75 struct netprio_map *old_priomap; 76 int i; 77 78 old_priomap = rtnl_dereference(dev->priomap); 79 80 if (!new_priomap) { 81 pr_warn("Unable to alloc new priomap!\n"); 82 return -ENOMEM; 83 } 84 85 for (i = 0; 86 old_priomap && (i < old_priomap->priomap_len); 87 i++) 88 new_priomap->priomap[i] = old_priomap->priomap[i]; 89 90 new_priomap->priomap_len = new_len; 91 92 rcu_assign_pointer(dev->priomap, new_priomap); 93 if (old_priomap) 94 kfree_rcu(old_priomap, rcu); 95 return 0; 96 } 97 98 static int write_update_netdev_table(struct net_device *dev) 99 { 100 int ret = 0; 101 u32 max_len; 102 struct netprio_map *map; 103 104 max_len = atomic_read(&max_prioidx) + 1; 105 map = rtnl_dereference(dev->priomap); 106 if (!map || map->priomap_len < max_len) 107 ret = extend_netdev_table(dev, max_len); 108 109 return ret; 110 } 111 112 static int update_netdev_tables(void) 113 { 114 int ret = 0; 115 struct net_device *dev; 116 u32 max_len; 117 struct netprio_map *map; 118 119 rtnl_lock(); 120 max_len = atomic_read(&max_prioidx) + 1; 121 for_each_netdev(&init_net, dev) { 122 map = rtnl_dereference(dev->priomap); 123 /* 124 * don't allocate priomap if we didn't 125 * change net_prio.ifpriomap (map == NULL), 126 * this will speed up skb_update_prio. 127 */ 128 if (map && map->priomap_len < max_len) { 129 ret = extend_netdev_table(dev, max_len); 130 if (ret < 0) 131 break; 132 } 133 } 134 rtnl_unlock(); 135 return ret; 136 } 137 138 static struct cgroup_subsys_state *cgrp_create(struct cgroup *cgrp) 139 { 140 struct cgroup_netprio_state *cs; 141 int ret = -EINVAL; 142 143 cs = kzalloc(sizeof(*cs), GFP_KERNEL); 144 if (!cs) 145 return ERR_PTR(-ENOMEM); 146 147 if (cgrp->parent && cgrp_netprio_state(cgrp->parent)->prioidx) 148 goto out; 149 150 ret = get_prioidx(&cs->prioidx); 151 if (ret < 0) { 152 pr_warn("No space in priority index array\n"); 153 goto out; 154 } 155 156 ret = update_netdev_tables(); 157 if (ret < 0) { 158 put_prioidx(cs->prioidx); 159 goto out; 160 } 161 162 return &cs->css; 163 out: 164 kfree(cs); 165 return ERR_PTR(ret); 166 } 167 168 static void cgrp_destroy(struct cgroup *cgrp) 169 { 170 struct cgroup_netprio_state *cs; 171 struct net_device *dev; 172 struct netprio_map *map; 173 174 cs = cgrp_netprio_state(cgrp); 175 rtnl_lock(); 176 for_each_netdev(&init_net, dev) { 177 map = rtnl_dereference(dev->priomap); 178 if (map && cs->prioidx < map->priomap_len) 179 map->priomap[cs->prioidx] = 0; 180 } 181 rtnl_unlock(); 182 put_prioidx(cs->prioidx); 183 kfree(cs); 184 } 185 186 static u64 read_prioidx(struct cgroup *cgrp, struct cftype *cft) 187 { 188 return (u64)cgrp_netprio_state(cgrp)->prioidx; 189 } 190 191 static int read_priomap(struct cgroup *cont, struct cftype *cft, 192 struct cgroup_map_cb *cb) 193 { 194 struct net_device *dev; 195 u32 prioidx = cgrp_netprio_state(cont)->prioidx; 196 u32 priority; 197 struct netprio_map *map; 198 199 rcu_read_lock(); 200 for_each_netdev_rcu(&init_net, dev) { 201 map = rcu_dereference(dev->priomap); 202 priority = (map && prioidx < map->priomap_len) ? map->priomap[prioidx] : 0; 203 cb->fill(cb, dev->name, priority); 204 } 205 rcu_read_unlock(); 206 return 0; 207 } 208 209 static int write_priomap(struct cgroup *cgrp, struct cftype *cft, 210 const char *buffer) 211 { 212 char *devname = kstrdup(buffer, GFP_KERNEL); 213 int ret = -EINVAL; 214 u32 prioidx = cgrp_netprio_state(cgrp)->prioidx; 215 unsigned long priority; 216 char *priostr; 217 struct net_device *dev; 218 struct netprio_map *map; 219 220 if (!devname) 221 return -ENOMEM; 222 223 /* 224 * Minimally sized valid priomap string 225 */ 226 if (strlen(devname) < 3) 227 goto out_free_devname; 228 229 priostr = strstr(devname, " "); 230 if (!priostr) 231 goto out_free_devname; 232 233 /* 234 *Separate the devname from the associated priority 235 *and advance the priostr pointer to the priority value 236 */ 237 *priostr = '\0'; 238 priostr++; 239 240 /* 241 * If the priostr points to NULL, we're at the end of the passed 242 * in string, and its not a valid write 243 */ 244 if (*priostr == '\0') 245 goto out_free_devname; 246 247 ret = kstrtoul(priostr, 10, &priority); 248 if (ret < 0) 249 goto out_free_devname; 250 251 ret = -ENODEV; 252 253 dev = dev_get_by_name(&init_net, devname); 254 if (!dev) 255 goto out_free_devname; 256 257 rtnl_lock(); 258 ret = write_update_netdev_table(dev); 259 if (ret < 0) 260 goto out_put_dev; 261 262 map = rtnl_dereference(dev->priomap); 263 if (map) 264 map->priomap[prioidx] = priority; 265 266 out_put_dev: 267 rtnl_unlock(); 268 dev_put(dev); 269 270 out_free_devname: 271 kfree(devname); 272 return ret; 273 } 274 275 void net_prio_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) 276 { 277 struct task_struct *p; 278 279 cgroup_taskset_for_each(p, cgrp, tset) { 280 unsigned int fd; 281 struct fdtable *fdt; 282 struct files_struct *files; 283 284 task_lock(p); 285 files = p->files; 286 if (!files) { 287 task_unlock(p); 288 continue; 289 } 290 291 spin_lock(&files->file_lock); 292 fdt = files_fdtable(files); 293 for (fd = 0; fd < fdt->max_fds; fd++) { 294 struct file *file; 295 struct socket *sock; 296 int err; 297 298 file = fcheck_files(files, fd); 299 if (!file) 300 continue; 301 302 sock = sock_from_file(file, &err); 303 if (sock) 304 sock_update_netprioidx(sock->sk, p); 305 } 306 spin_unlock(&files->file_lock); 307 task_unlock(p); 308 } 309 } 310 311 static struct cftype ss_files[] = { 312 { 313 .name = "prioidx", 314 .read_u64 = read_prioidx, 315 }, 316 { 317 .name = "ifpriomap", 318 .read_map = read_priomap, 319 .write_string = write_priomap, 320 }, 321 { } /* terminate */ 322 }; 323 324 struct cgroup_subsys net_prio_subsys = { 325 .name = "net_prio", 326 .create = cgrp_create, 327 .destroy = cgrp_destroy, 328 .attach = net_prio_attach, 329 #ifdef CONFIG_NETPRIO_CGROUP 330 .subsys_id = net_prio_subsys_id, 331 #endif 332 .base_cftypes = ss_files, 333 .module = THIS_MODULE 334 }; 335 336 static int netprio_device_event(struct notifier_block *unused, 337 unsigned long event, void *ptr) 338 { 339 struct net_device *dev = ptr; 340 struct netprio_map *old; 341 342 /* 343 * Note this is called with rtnl_lock held so we have update side 344 * protection on our rcu assignments 345 */ 346 347 switch (event) { 348 case NETDEV_UNREGISTER: 349 old = rtnl_dereference(dev->priomap); 350 RCU_INIT_POINTER(dev->priomap, NULL); 351 if (old) 352 kfree_rcu(old, rcu); 353 break; 354 } 355 return NOTIFY_DONE; 356 } 357 358 static struct notifier_block netprio_device_notifier = { 359 .notifier_call = netprio_device_event 360 }; 361 362 static int __init init_cgroup_netprio(void) 363 { 364 int ret; 365 366 ret = cgroup_load_subsys(&net_prio_subsys); 367 if (ret) 368 goto out; 369 #ifndef CONFIG_NETPRIO_CGROUP 370 smp_wmb(); 371 net_prio_subsys_id = net_prio_subsys.subsys_id; 372 #endif 373 374 register_netdevice_notifier(&netprio_device_notifier); 375 376 out: 377 return ret; 378 } 379 380 static void __exit exit_cgroup_netprio(void) 381 { 382 struct netprio_map *old; 383 struct net_device *dev; 384 385 unregister_netdevice_notifier(&netprio_device_notifier); 386 387 cgroup_unload_subsys(&net_prio_subsys); 388 389 #ifndef CONFIG_NETPRIO_CGROUP 390 net_prio_subsys_id = -1; 391 synchronize_rcu(); 392 #endif 393 394 rtnl_lock(); 395 for_each_netdev(&init_net, dev) { 396 old = rtnl_dereference(dev->priomap); 397 RCU_INIT_POINTER(dev->priomap, NULL); 398 if (old) 399 kfree_rcu(old, rcu); 400 } 401 rtnl_unlock(); 402 } 403 404 module_init(init_cgroup_netprio); 405 module_exit(exit_cgroup_netprio); 406 MODULE_LICENSE("GPL v2"); 407