1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <linux/netdevice.h> 12 #include <net/net_namespace.h> 13 #include <net/netns/generic.h> 14 #include <net/rtnetlink.h> 15 16 /* 17 * Our network namespace constructor/destructor lists 18 */ 19 20 static LIST_HEAD(pernet_list); 21 static struct list_head *first_device = &pernet_list; 22 static DEFINE_MUTEX(net_mutex); 23 24 LIST_HEAD(net_namespace_list); 25 EXPORT_SYMBOL_GPL(net_namespace_list); 26 27 struct net init_net; 28 EXPORT_SYMBOL(init_net); 29 30 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 31 32 static void unregister_netdevices(struct net *net, struct list_head *list) 33 { 34 struct net_device *dev; 35 /* At exit all network devices most be removed from a network 36 * namespace. Do this in the reverse order of registeration. 37 */ 38 for_each_netdev_reverse(net, dev) { 39 if (dev->rtnl_link_ops) 40 dev->rtnl_link_ops->dellink(dev, list); 41 else 42 unregister_netdevice_queue(dev, list); 43 } 44 } 45 46 static int ops_init(const struct pernet_operations *ops, struct net *net) 47 { 48 int err; 49 if (ops->id && ops->size) { 50 void *data = kzalloc(ops->size, GFP_KERNEL); 51 if (!data) 52 return -ENOMEM; 53 54 err = net_assign_generic(net, *ops->id, data); 55 if (err) { 56 kfree(data); 57 return err; 58 } 59 } 60 if (ops->init) 61 return ops->init(net); 62 return 0; 63 } 64 65 static void ops_free(const struct pernet_operations *ops, struct net *net) 66 { 67 if (ops->id && ops->size) { 68 int id = *ops->id; 69 kfree(net_generic(net, id)); 70 } 71 } 72 73 /* 74 * setup_net runs the initializers for the network namespace object. 75 */ 76 static __net_init int setup_net(struct net *net) 77 { 78 /* Must be called with net_mutex held */ 79 const struct pernet_operations *ops, *saved_ops; 80 int error = 0; 81 82 atomic_set(&net->count, 1); 83 84 #ifdef NETNS_REFCNT_DEBUG 85 atomic_set(&net->use_count, 0); 86 #endif 87 88 list_for_each_entry(ops, &pernet_list, list) { 89 error = ops_init(ops, net); 90 if (error < 0) 91 goto out_undo; 92 } 93 out: 94 return error; 95 96 out_undo: 97 /* Walk through the list backwards calling the exit functions 98 * for the pernet modules whose init functions did not fail. 99 */ 100 saved_ops = ops; 101 list_for_each_entry_continue_reverse(ops, &pernet_list, list) { 102 if (ops->exit) 103 ops->exit(net); 104 if (&ops->list == first_device) { 105 LIST_HEAD(dev_kill_list); 106 rtnl_lock(); 107 unregister_netdevices(net, &dev_kill_list); 108 unregister_netdevice_many(&dev_kill_list); 109 rtnl_unlock(); 110 } 111 } 112 ops = saved_ops; 113 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 114 ops_free(ops, net); 115 116 rcu_barrier(); 117 goto out; 118 } 119 120 static struct net_generic *net_alloc_generic(void) 121 { 122 struct net_generic *ng; 123 size_t generic_size = sizeof(struct net_generic) + 124 INITIAL_NET_GEN_PTRS * sizeof(void *); 125 126 ng = kzalloc(generic_size, GFP_KERNEL); 127 if (ng) 128 ng->len = INITIAL_NET_GEN_PTRS; 129 130 return ng; 131 } 132 133 #ifdef CONFIG_NET_NS 134 static struct kmem_cache *net_cachep; 135 static struct workqueue_struct *netns_wq; 136 137 static struct net *net_alloc(void) 138 { 139 struct net *net = NULL; 140 struct net_generic *ng; 141 142 ng = net_alloc_generic(); 143 if (!ng) 144 goto out; 145 146 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 147 if (!net) 148 goto out_free; 149 150 rcu_assign_pointer(net->gen, ng); 151 out: 152 return net; 153 154 out_free: 155 kfree(ng); 156 goto out; 157 } 158 159 static void net_free(struct net *net) 160 { 161 #ifdef NETNS_REFCNT_DEBUG 162 if (unlikely(atomic_read(&net->use_count) != 0)) { 163 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 164 atomic_read(&net->use_count)); 165 return; 166 } 167 #endif 168 kfree(net->gen); 169 kmem_cache_free(net_cachep, net); 170 } 171 172 static struct net *net_create(void) 173 { 174 struct net *net; 175 int rv; 176 177 net = net_alloc(); 178 if (!net) 179 return ERR_PTR(-ENOMEM); 180 mutex_lock(&net_mutex); 181 rv = setup_net(net); 182 if (rv == 0) { 183 rtnl_lock(); 184 list_add_tail_rcu(&net->list, &net_namespace_list); 185 rtnl_unlock(); 186 } 187 mutex_unlock(&net_mutex); 188 if (rv < 0) { 189 net_free(net); 190 return ERR_PTR(rv); 191 } 192 return net; 193 } 194 195 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 196 { 197 if (!(flags & CLONE_NEWNET)) 198 return get_net(old_net); 199 return net_create(); 200 } 201 202 static DEFINE_SPINLOCK(cleanup_list_lock); 203 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 204 205 static void cleanup_net(struct work_struct *work) 206 { 207 const struct pernet_operations *ops; 208 struct net *net, *tmp; 209 LIST_HEAD(net_kill_list); 210 211 /* Atomically snapshot the list of namespaces to cleanup */ 212 spin_lock_irq(&cleanup_list_lock); 213 list_replace_init(&cleanup_list, &net_kill_list); 214 spin_unlock_irq(&cleanup_list_lock); 215 216 mutex_lock(&net_mutex); 217 218 /* Don't let anyone else find us. */ 219 rtnl_lock(); 220 list_for_each_entry(net, &net_kill_list, cleanup_list) 221 list_del_rcu(&net->list); 222 rtnl_unlock(); 223 224 /* 225 * Another CPU might be rcu-iterating the list, wait for it. 226 * This needs to be before calling the exit() notifiers, so 227 * the rcu_barrier() below isn't sufficient alone. 228 */ 229 synchronize_rcu(); 230 231 /* Run all of the network namespace exit methods */ 232 list_for_each_entry_reverse(ops, &pernet_list, list) { 233 if (ops->exit) { 234 list_for_each_entry(net, &net_kill_list, cleanup_list) 235 ops->exit(net); 236 } 237 if (&ops->list == first_device) { 238 LIST_HEAD(dev_kill_list); 239 rtnl_lock(); 240 list_for_each_entry(net, &net_kill_list, cleanup_list) 241 unregister_netdevices(net, &dev_kill_list); 242 unregister_netdevice_many(&dev_kill_list); 243 rtnl_unlock(); 244 } 245 } 246 /* Free the net generic variables */ 247 list_for_each_entry_reverse(ops, &pernet_list, list) { 248 if (ops->size && ops->id) { 249 list_for_each_entry(net, &net_kill_list, cleanup_list) 250 ops_free(ops, net); 251 } 252 } 253 254 mutex_unlock(&net_mutex); 255 256 /* Ensure there are no outstanding rcu callbacks using this 257 * network namespace. 258 */ 259 rcu_barrier(); 260 261 /* Finally it is safe to free my network namespace structure */ 262 list_for_each_entry_safe(net, tmp, &net_kill_list, cleanup_list) { 263 list_del_init(&net->cleanup_list); 264 net_free(net); 265 } 266 } 267 static DECLARE_WORK(net_cleanup_work, cleanup_net); 268 269 void __put_net(struct net *net) 270 { 271 /* Cleanup the network namespace in process context */ 272 unsigned long flags; 273 274 spin_lock_irqsave(&cleanup_list_lock, flags); 275 list_add(&net->cleanup_list, &cleanup_list); 276 spin_unlock_irqrestore(&cleanup_list_lock, flags); 277 278 queue_work(netns_wq, &net_cleanup_work); 279 } 280 EXPORT_SYMBOL_GPL(__put_net); 281 282 #else 283 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 284 { 285 if (flags & CLONE_NEWNET) 286 return ERR_PTR(-EINVAL); 287 return old_net; 288 } 289 #endif 290 291 struct net *get_net_ns_by_pid(pid_t pid) 292 { 293 struct task_struct *tsk; 294 struct net *net; 295 296 /* Lookup the network namespace */ 297 net = ERR_PTR(-ESRCH); 298 rcu_read_lock(); 299 tsk = find_task_by_vpid(pid); 300 if (tsk) { 301 struct nsproxy *nsproxy; 302 nsproxy = task_nsproxy(tsk); 303 if (nsproxy) 304 net = get_net(nsproxy->net_ns); 305 } 306 rcu_read_unlock(); 307 return net; 308 } 309 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 310 311 static int __init net_ns_init(void) 312 { 313 struct net_generic *ng; 314 315 #ifdef CONFIG_NET_NS 316 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 317 SMP_CACHE_BYTES, 318 SLAB_PANIC, NULL); 319 320 /* Create workqueue for cleanup */ 321 netns_wq = create_singlethread_workqueue("netns"); 322 if (!netns_wq) 323 panic("Could not create netns workq"); 324 #endif 325 326 ng = net_alloc_generic(); 327 if (!ng) 328 panic("Could not allocate generic netns"); 329 330 rcu_assign_pointer(init_net.gen, ng); 331 332 mutex_lock(&net_mutex); 333 if (setup_net(&init_net)) 334 panic("Could not setup the initial network namespace"); 335 336 rtnl_lock(); 337 list_add_tail_rcu(&init_net.list, &net_namespace_list); 338 rtnl_unlock(); 339 340 mutex_unlock(&net_mutex); 341 342 return 0; 343 } 344 345 pure_initcall(net_ns_init); 346 347 #ifdef CONFIG_NET_NS 348 static int __register_pernet_operations(struct list_head *list, 349 struct pernet_operations *ops) 350 { 351 struct net *net, *undo_net; 352 int error; 353 354 list_add_tail(&ops->list, list); 355 if (ops->init || (ops->id && ops->size)) { 356 for_each_net(net) { 357 error = ops_init(ops, net); 358 if (error) 359 goto out_undo; 360 } 361 } 362 return 0; 363 364 out_undo: 365 /* If I have an error cleanup all namespaces I initialized */ 366 list_del(&ops->list); 367 if (ops->exit) { 368 for_each_net(undo_net) { 369 if (net_eq(undo_net, net)) 370 goto undone; 371 ops->exit(undo_net); 372 } 373 } 374 undone: 375 if (ops->size && ops->id) { 376 for_each_net(undo_net) { 377 if (net_eq(undo_net, net)) 378 goto freed; 379 ops_free(ops, undo_net); 380 } 381 } 382 freed: 383 return error; 384 } 385 386 static void __unregister_pernet_operations(struct pernet_operations *ops) 387 { 388 struct net *net; 389 390 list_del(&ops->list); 391 if (ops->exit) 392 for_each_net(net) 393 ops->exit(net); 394 if (ops->id && ops->size) 395 for_each_net(net) 396 ops_free(ops, net); 397 } 398 399 #else 400 401 static int __register_pernet_operations(struct list_head *list, 402 struct pernet_operations *ops) 403 { 404 int err = 0; 405 err = ops_init(ops, &init_net); 406 if (err) 407 ops_free(ops, &init_net); 408 return err; 409 410 } 411 412 static void __unregister_pernet_operations(struct pernet_operations *ops) 413 { 414 if (ops->exit) 415 ops->exit(&init_net); 416 ops_free(ops, &init_net); 417 } 418 419 #endif /* CONFIG_NET_NS */ 420 421 static DEFINE_IDA(net_generic_ids); 422 423 static int register_pernet_operations(struct list_head *list, 424 struct pernet_operations *ops) 425 { 426 int error; 427 428 if (ops->id) { 429 again: 430 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 431 if (error < 0) { 432 if (error == -EAGAIN) { 433 ida_pre_get(&net_generic_ids, GFP_KERNEL); 434 goto again; 435 } 436 return error; 437 } 438 } 439 error = __register_pernet_operations(list, ops); 440 if (error && ops->id) 441 ida_remove(&net_generic_ids, *ops->id); 442 443 return error; 444 } 445 446 static void unregister_pernet_operations(struct pernet_operations *ops) 447 { 448 449 __unregister_pernet_operations(ops); 450 if (ops->id) 451 ida_remove(&net_generic_ids, *ops->id); 452 } 453 454 /** 455 * register_pernet_subsys - register a network namespace subsystem 456 * @ops: pernet operations structure for the subsystem 457 * 458 * Register a subsystem which has init and exit functions 459 * that are called when network namespaces are created and 460 * destroyed respectively. 461 * 462 * When registered all network namespace init functions are 463 * called for every existing network namespace. Allowing kernel 464 * modules to have a race free view of the set of network namespaces. 465 * 466 * When a new network namespace is created all of the init 467 * methods are called in the order in which they were registered. 468 * 469 * When a network namespace is destroyed all of the exit methods 470 * are called in the reverse of the order with which they were 471 * registered. 472 */ 473 int register_pernet_subsys(struct pernet_operations *ops) 474 { 475 int error; 476 mutex_lock(&net_mutex); 477 error = register_pernet_operations(first_device, ops); 478 mutex_unlock(&net_mutex); 479 return error; 480 } 481 EXPORT_SYMBOL_GPL(register_pernet_subsys); 482 483 /** 484 * unregister_pernet_subsys - unregister a network namespace subsystem 485 * @ops: pernet operations structure to manipulate 486 * 487 * Remove the pernet operations structure from the list to be 488 * used when network namespaces are created or destroyed. In 489 * addition run the exit method for all existing network 490 * namespaces. 491 */ 492 void unregister_pernet_subsys(struct pernet_operations *module) 493 { 494 mutex_lock(&net_mutex); 495 unregister_pernet_operations(module); 496 mutex_unlock(&net_mutex); 497 } 498 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 499 500 /** 501 * register_pernet_device - register a network namespace device 502 * @ops: pernet operations structure for the subsystem 503 * 504 * Register a device which has init and exit functions 505 * that are called when network namespaces are created and 506 * destroyed respectively. 507 * 508 * When registered all network namespace init functions are 509 * called for every existing network namespace. Allowing kernel 510 * modules to have a race free view of the set of network namespaces. 511 * 512 * When a new network namespace is created all of the init 513 * methods are called in the order in which they were registered. 514 * 515 * When a network namespace is destroyed all of the exit methods 516 * are called in the reverse of the order with which they were 517 * registered. 518 */ 519 int register_pernet_device(struct pernet_operations *ops) 520 { 521 int error; 522 mutex_lock(&net_mutex); 523 error = register_pernet_operations(&pernet_list, ops); 524 if (!error && (first_device == &pernet_list)) 525 first_device = &ops->list; 526 mutex_unlock(&net_mutex); 527 return error; 528 } 529 EXPORT_SYMBOL_GPL(register_pernet_device); 530 531 /** 532 * unregister_pernet_device - unregister a network namespace netdevice 533 * @ops: pernet operations structure to manipulate 534 * 535 * Remove the pernet operations structure from the list to be 536 * used when network namespaces are created or destroyed. In 537 * addition run the exit method for all existing network 538 * namespaces. 539 */ 540 void unregister_pernet_device(struct pernet_operations *ops) 541 { 542 mutex_lock(&net_mutex); 543 if (&ops->list == first_device) 544 first_device = first_device->next; 545 unregister_pernet_operations(ops); 546 mutex_unlock(&net_mutex); 547 } 548 EXPORT_SYMBOL_GPL(unregister_pernet_device); 549 550 static void net_generic_release(struct rcu_head *rcu) 551 { 552 struct net_generic *ng; 553 554 ng = container_of(rcu, struct net_generic, rcu); 555 kfree(ng); 556 } 557 558 int net_assign_generic(struct net *net, int id, void *data) 559 { 560 struct net_generic *ng, *old_ng; 561 562 BUG_ON(!mutex_is_locked(&net_mutex)); 563 BUG_ON(id == 0); 564 565 ng = old_ng = net->gen; 566 if (old_ng->len >= id) 567 goto assign; 568 569 ng = kzalloc(sizeof(struct net_generic) + 570 id * sizeof(void *), GFP_KERNEL); 571 if (ng == NULL) 572 return -ENOMEM; 573 574 /* 575 * Some synchronisation notes: 576 * 577 * The net_generic explores the net->gen array inside rcu 578 * read section. Besides once set the net->gen->ptr[x] 579 * pointer never changes (see rules in netns/generic.h). 580 * 581 * That said, we simply duplicate this array and schedule 582 * the old copy for kfree after a grace period. 583 */ 584 585 ng->len = id; 586 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 587 588 rcu_assign_pointer(net->gen, ng); 589 call_rcu(&old_ng->rcu, net_generic_release); 590 assign: 591 ng->ptr[id - 1] = data; 592 return 0; 593 } 594 EXPORT_SYMBOL_GPL(net_assign_generic); 595