1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <linux/proc_fs.h> 12 #include <linux/file.h> 13 #include <linux/export.h> 14 #include <net/net_namespace.h> 15 #include <net/netns/generic.h> 16 17 /* 18 * Our network namespace constructor/destructor lists 19 */ 20 21 static LIST_HEAD(pernet_list); 22 static struct list_head *first_device = &pernet_list; 23 static DEFINE_MUTEX(net_mutex); 24 25 LIST_HEAD(net_namespace_list); 26 EXPORT_SYMBOL_GPL(net_namespace_list); 27 28 struct net init_net; 29 EXPORT_SYMBOL(init_net); 30 31 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 32 33 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 34 35 static struct net_generic *net_alloc_generic(void) 36 { 37 struct net_generic *ng; 38 size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 39 40 ng = kzalloc(generic_size, GFP_KERNEL); 41 if (ng) 42 ng->len = max_gen_ptrs; 43 44 return ng; 45 } 46 47 static int net_assign_generic(struct net *net, int id, void *data) 48 { 49 struct net_generic *ng, *old_ng; 50 51 BUG_ON(!mutex_is_locked(&net_mutex)); 52 BUG_ON(id == 0); 53 54 old_ng = rcu_dereference_protected(net->gen, 55 lockdep_is_held(&net_mutex)); 56 ng = old_ng; 57 if (old_ng->len >= id) 58 goto assign; 59 60 ng = net_alloc_generic(); 61 if (ng == NULL) 62 return -ENOMEM; 63 64 /* 65 * Some synchronisation notes: 66 * 67 * The net_generic explores the net->gen array inside rcu 68 * read section. Besides once set the net->gen->ptr[x] 69 * pointer never changes (see rules in netns/generic.h). 70 * 71 * That said, we simply duplicate this array and schedule 72 * the old copy for kfree after a grace period. 73 */ 74 75 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 76 77 rcu_assign_pointer(net->gen, ng); 78 kfree_rcu(old_ng, rcu); 79 assign: 80 ng->ptr[id - 1] = data; 81 return 0; 82 } 83 84 static int ops_init(const struct pernet_operations *ops, struct net *net) 85 { 86 int err; 87 if (ops->id && ops->size) { 88 void *data = kzalloc(ops->size, GFP_KERNEL); 89 if (!data) 90 return -ENOMEM; 91 92 err = net_assign_generic(net, *ops->id, data); 93 if (err) { 94 kfree(data); 95 return err; 96 } 97 } 98 if (ops->init) 99 return ops->init(net); 100 return 0; 101 } 102 103 static void ops_free(const struct pernet_operations *ops, struct net *net) 104 { 105 if (ops->id && ops->size) { 106 int id = *ops->id; 107 kfree(net_generic(net, id)); 108 } 109 } 110 111 static void ops_exit_list(const struct pernet_operations *ops, 112 struct list_head *net_exit_list) 113 { 114 struct net *net; 115 if (ops->exit) { 116 list_for_each_entry(net, net_exit_list, exit_list) 117 ops->exit(net); 118 } 119 if (ops->exit_batch) 120 ops->exit_batch(net_exit_list); 121 } 122 123 static void ops_free_list(const struct pernet_operations *ops, 124 struct list_head *net_exit_list) 125 { 126 struct net *net; 127 if (ops->size && ops->id) { 128 list_for_each_entry(net, net_exit_list, exit_list) 129 ops_free(ops, net); 130 } 131 } 132 133 /* 134 * setup_net runs the initializers for the network namespace object. 135 */ 136 static __net_init int setup_net(struct net *net) 137 { 138 /* Must be called with net_mutex held */ 139 const struct pernet_operations *ops, *saved_ops; 140 int error = 0; 141 LIST_HEAD(net_exit_list); 142 143 atomic_set(&net->count, 1); 144 atomic_set(&net->passive, 1); 145 net->dev_base_seq = 1; 146 147 #ifdef NETNS_REFCNT_DEBUG 148 atomic_set(&net->use_count, 0); 149 #endif 150 151 list_for_each_entry(ops, &pernet_list, list) { 152 error = ops_init(ops, net); 153 if (error < 0) 154 goto out_undo; 155 } 156 out: 157 return error; 158 159 out_undo: 160 /* Walk through the list backwards calling the exit functions 161 * for the pernet modules whose init functions did not fail. 162 */ 163 list_add(&net->exit_list, &net_exit_list); 164 saved_ops = ops; 165 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 166 ops_exit_list(ops, &net_exit_list); 167 168 ops = saved_ops; 169 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 170 ops_free_list(ops, &net_exit_list); 171 172 rcu_barrier(); 173 goto out; 174 } 175 176 177 #ifdef CONFIG_NET_NS 178 static struct kmem_cache *net_cachep; 179 static struct workqueue_struct *netns_wq; 180 181 static struct net *net_alloc(void) 182 { 183 struct net *net = NULL; 184 struct net_generic *ng; 185 186 ng = net_alloc_generic(); 187 if (!ng) 188 goto out; 189 190 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 191 if (!net) 192 goto out_free; 193 194 rcu_assign_pointer(net->gen, ng); 195 out: 196 return net; 197 198 out_free: 199 kfree(ng); 200 goto out; 201 } 202 203 static void net_free(struct net *net) 204 { 205 #ifdef NETNS_REFCNT_DEBUG 206 if (unlikely(atomic_read(&net->use_count) != 0)) { 207 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 208 atomic_read(&net->use_count)); 209 return; 210 } 211 #endif 212 kfree(net->gen); 213 kmem_cache_free(net_cachep, net); 214 } 215 216 void net_drop_ns(void *p) 217 { 218 struct net *ns = p; 219 if (ns && atomic_dec_and_test(&ns->passive)) 220 net_free(ns); 221 } 222 223 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 224 { 225 struct net *net; 226 int rv; 227 228 if (!(flags & CLONE_NEWNET)) 229 return get_net(old_net); 230 231 net = net_alloc(); 232 if (!net) 233 return ERR_PTR(-ENOMEM); 234 mutex_lock(&net_mutex); 235 rv = setup_net(net); 236 if (rv == 0) { 237 rtnl_lock(); 238 list_add_tail_rcu(&net->list, &net_namespace_list); 239 rtnl_unlock(); 240 } 241 mutex_unlock(&net_mutex); 242 if (rv < 0) { 243 net_drop_ns(net); 244 return ERR_PTR(rv); 245 } 246 return net; 247 } 248 249 static DEFINE_SPINLOCK(cleanup_list_lock); 250 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 251 252 static void cleanup_net(struct work_struct *work) 253 { 254 const struct pernet_operations *ops; 255 struct net *net, *tmp; 256 LIST_HEAD(net_kill_list); 257 LIST_HEAD(net_exit_list); 258 259 /* Atomically snapshot the list of namespaces to cleanup */ 260 spin_lock_irq(&cleanup_list_lock); 261 list_replace_init(&cleanup_list, &net_kill_list); 262 spin_unlock_irq(&cleanup_list_lock); 263 264 mutex_lock(&net_mutex); 265 266 /* Don't let anyone else find us. */ 267 rtnl_lock(); 268 list_for_each_entry(net, &net_kill_list, cleanup_list) { 269 list_del_rcu(&net->list); 270 list_add_tail(&net->exit_list, &net_exit_list); 271 } 272 rtnl_unlock(); 273 274 /* 275 * Another CPU might be rcu-iterating the list, wait for it. 276 * This needs to be before calling the exit() notifiers, so 277 * the rcu_barrier() below isn't sufficient alone. 278 */ 279 synchronize_rcu(); 280 281 /* Run all of the network namespace exit methods */ 282 list_for_each_entry_reverse(ops, &pernet_list, list) 283 ops_exit_list(ops, &net_exit_list); 284 285 /* Free the net generic variables */ 286 list_for_each_entry_reverse(ops, &pernet_list, list) 287 ops_free_list(ops, &net_exit_list); 288 289 mutex_unlock(&net_mutex); 290 291 /* Ensure there are no outstanding rcu callbacks using this 292 * network namespace. 293 */ 294 rcu_barrier(); 295 296 /* Finally it is safe to free my network namespace structure */ 297 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 298 list_del_init(&net->exit_list); 299 net_drop_ns(net); 300 } 301 } 302 static DECLARE_WORK(net_cleanup_work, cleanup_net); 303 304 void __put_net(struct net *net) 305 { 306 /* Cleanup the network namespace in process context */ 307 unsigned long flags; 308 309 spin_lock_irqsave(&cleanup_list_lock, flags); 310 list_add(&net->cleanup_list, &cleanup_list); 311 spin_unlock_irqrestore(&cleanup_list_lock, flags); 312 313 queue_work(netns_wq, &net_cleanup_work); 314 } 315 EXPORT_SYMBOL_GPL(__put_net); 316 317 struct net *get_net_ns_by_fd(int fd) 318 { 319 struct proc_inode *ei; 320 struct file *file; 321 struct net *net; 322 323 file = proc_ns_fget(fd); 324 if (IS_ERR(file)) 325 return ERR_CAST(file); 326 327 ei = PROC_I(file->f_dentry->d_inode); 328 if (ei->ns_ops == &netns_operations) 329 net = get_net(ei->ns); 330 else 331 net = ERR_PTR(-EINVAL); 332 333 fput(file); 334 return net; 335 } 336 337 #else 338 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 339 { 340 if (flags & CLONE_NEWNET) 341 return ERR_PTR(-EINVAL); 342 return old_net; 343 } 344 345 struct net *get_net_ns_by_fd(int fd) 346 { 347 return ERR_PTR(-EINVAL); 348 } 349 #endif 350 351 struct net *get_net_ns_by_pid(pid_t pid) 352 { 353 struct task_struct *tsk; 354 struct net *net; 355 356 /* Lookup the network namespace */ 357 net = ERR_PTR(-ESRCH); 358 rcu_read_lock(); 359 tsk = find_task_by_vpid(pid); 360 if (tsk) { 361 struct nsproxy *nsproxy; 362 nsproxy = task_nsproxy(tsk); 363 if (nsproxy) 364 net = get_net(nsproxy->net_ns); 365 } 366 rcu_read_unlock(); 367 return net; 368 } 369 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 370 371 static int __init net_ns_init(void) 372 { 373 struct net_generic *ng; 374 375 #ifdef CONFIG_NET_NS 376 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 377 SMP_CACHE_BYTES, 378 SLAB_PANIC, NULL); 379 380 /* Create workqueue for cleanup */ 381 netns_wq = create_singlethread_workqueue("netns"); 382 if (!netns_wq) 383 panic("Could not create netns workq"); 384 #endif 385 386 ng = net_alloc_generic(); 387 if (!ng) 388 panic("Could not allocate generic netns"); 389 390 rcu_assign_pointer(init_net.gen, ng); 391 392 mutex_lock(&net_mutex); 393 if (setup_net(&init_net)) 394 panic("Could not setup the initial network namespace"); 395 396 rtnl_lock(); 397 list_add_tail_rcu(&init_net.list, &net_namespace_list); 398 rtnl_unlock(); 399 400 mutex_unlock(&net_mutex); 401 402 return 0; 403 } 404 405 pure_initcall(net_ns_init); 406 407 #ifdef CONFIG_NET_NS 408 static int __register_pernet_operations(struct list_head *list, 409 struct pernet_operations *ops) 410 { 411 struct net *net; 412 int error; 413 LIST_HEAD(net_exit_list); 414 415 list_add_tail(&ops->list, list); 416 if (ops->init || (ops->id && ops->size)) { 417 for_each_net(net) { 418 error = ops_init(ops, net); 419 if (error) 420 goto out_undo; 421 list_add_tail(&net->exit_list, &net_exit_list); 422 } 423 } 424 return 0; 425 426 out_undo: 427 /* If I have an error cleanup all namespaces I initialized */ 428 list_del(&ops->list); 429 ops_exit_list(ops, &net_exit_list); 430 ops_free_list(ops, &net_exit_list); 431 return error; 432 } 433 434 static void __unregister_pernet_operations(struct pernet_operations *ops) 435 { 436 struct net *net; 437 LIST_HEAD(net_exit_list); 438 439 list_del(&ops->list); 440 for_each_net(net) 441 list_add_tail(&net->exit_list, &net_exit_list); 442 ops_exit_list(ops, &net_exit_list); 443 ops_free_list(ops, &net_exit_list); 444 } 445 446 #else 447 448 static int __register_pernet_operations(struct list_head *list, 449 struct pernet_operations *ops) 450 { 451 int err = 0; 452 err = ops_init(ops, &init_net); 453 if (err) 454 ops_free(ops, &init_net); 455 return err; 456 457 } 458 459 static void __unregister_pernet_operations(struct pernet_operations *ops) 460 { 461 LIST_HEAD(net_exit_list); 462 list_add(&init_net.exit_list, &net_exit_list); 463 ops_exit_list(ops, &net_exit_list); 464 ops_free_list(ops, &net_exit_list); 465 } 466 467 #endif /* CONFIG_NET_NS */ 468 469 static DEFINE_IDA(net_generic_ids); 470 471 static int register_pernet_operations(struct list_head *list, 472 struct pernet_operations *ops) 473 { 474 int error; 475 476 if (ops->id) { 477 again: 478 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 479 if (error < 0) { 480 if (error == -EAGAIN) { 481 ida_pre_get(&net_generic_ids, GFP_KERNEL); 482 goto again; 483 } 484 return error; 485 } 486 max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); 487 } 488 error = __register_pernet_operations(list, ops); 489 if (error) { 490 rcu_barrier(); 491 if (ops->id) 492 ida_remove(&net_generic_ids, *ops->id); 493 } 494 495 return error; 496 } 497 498 static void unregister_pernet_operations(struct pernet_operations *ops) 499 { 500 501 __unregister_pernet_operations(ops); 502 rcu_barrier(); 503 if (ops->id) 504 ida_remove(&net_generic_ids, *ops->id); 505 } 506 507 /** 508 * register_pernet_subsys - register a network namespace subsystem 509 * @ops: pernet operations structure for the subsystem 510 * 511 * Register a subsystem which has init and exit functions 512 * that are called when network namespaces are created and 513 * destroyed respectively. 514 * 515 * When registered all network namespace init functions are 516 * called for every existing network namespace. Allowing kernel 517 * modules to have a race free view of the set of network namespaces. 518 * 519 * When a new network namespace is created all of the init 520 * methods are called in the order in which they were registered. 521 * 522 * When a network namespace is destroyed all of the exit methods 523 * are called in the reverse of the order with which they were 524 * registered. 525 */ 526 int register_pernet_subsys(struct pernet_operations *ops) 527 { 528 int error; 529 mutex_lock(&net_mutex); 530 error = register_pernet_operations(first_device, ops); 531 mutex_unlock(&net_mutex); 532 return error; 533 } 534 EXPORT_SYMBOL_GPL(register_pernet_subsys); 535 536 /** 537 * unregister_pernet_subsys - unregister a network namespace subsystem 538 * @ops: pernet operations structure to manipulate 539 * 540 * Remove the pernet operations structure from the list to be 541 * used when network namespaces are created or destroyed. In 542 * addition run the exit method for all existing network 543 * namespaces. 544 */ 545 void unregister_pernet_subsys(struct pernet_operations *ops) 546 { 547 mutex_lock(&net_mutex); 548 unregister_pernet_operations(ops); 549 mutex_unlock(&net_mutex); 550 } 551 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 552 553 /** 554 * register_pernet_device - register a network namespace device 555 * @ops: pernet operations structure for the subsystem 556 * 557 * Register a device which has init and exit functions 558 * that are called when network namespaces are created and 559 * destroyed respectively. 560 * 561 * When registered all network namespace init functions are 562 * called for every existing network namespace. Allowing kernel 563 * modules to have a race free view of the set of network namespaces. 564 * 565 * When a new network namespace is created all of the init 566 * methods are called in the order in which they were registered. 567 * 568 * When a network namespace is destroyed all of the exit methods 569 * are called in the reverse of the order with which they were 570 * registered. 571 */ 572 int register_pernet_device(struct pernet_operations *ops) 573 { 574 int error; 575 mutex_lock(&net_mutex); 576 error = register_pernet_operations(&pernet_list, ops); 577 if (!error && (first_device == &pernet_list)) 578 first_device = &ops->list; 579 mutex_unlock(&net_mutex); 580 return error; 581 } 582 EXPORT_SYMBOL_GPL(register_pernet_device); 583 584 /** 585 * unregister_pernet_device - unregister a network namespace netdevice 586 * @ops: pernet operations structure to manipulate 587 * 588 * Remove the pernet operations structure from the list to be 589 * used when network namespaces are created or destroyed. In 590 * addition run the exit method for all existing network 591 * namespaces. 592 */ 593 void unregister_pernet_device(struct pernet_operations *ops) 594 { 595 mutex_lock(&net_mutex); 596 if (&ops->list == first_device) 597 first_device = first_device->next; 598 unregister_pernet_operations(ops); 599 mutex_unlock(&net_mutex); 600 } 601 EXPORT_SYMBOL_GPL(unregister_pernet_device); 602 603 #ifdef CONFIG_NET_NS 604 static void *netns_get(struct task_struct *task) 605 { 606 struct net *net = NULL; 607 struct nsproxy *nsproxy; 608 609 rcu_read_lock(); 610 nsproxy = task_nsproxy(task); 611 if (nsproxy) 612 net = get_net(nsproxy->net_ns); 613 rcu_read_unlock(); 614 615 return net; 616 } 617 618 static void netns_put(void *ns) 619 { 620 put_net(ns); 621 } 622 623 static int netns_install(struct nsproxy *nsproxy, void *ns) 624 { 625 put_net(nsproxy->net_ns); 626 nsproxy->net_ns = get_net(ns); 627 return 0; 628 } 629 630 const struct proc_ns_operations netns_operations = { 631 .name = "net", 632 .type = CLONE_NEWNET, 633 .get = netns_get, 634 .put = netns_put, 635 .install = netns_install, 636 }; 637 #endif 638