1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <linux/proc_fs.h> 12 #include <linux/file.h> 13 #include <linux/export.h> 14 #include <net/net_namespace.h> 15 #include <net/netns/generic.h> 16 17 /* 18 * Our network namespace constructor/destructor lists 19 */ 20 21 static LIST_HEAD(pernet_list); 22 static struct list_head *first_device = &pernet_list; 23 static DEFINE_MUTEX(net_mutex); 24 25 LIST_HEAD(net_namespace_list); 26 EXPORT_SYMBOL_GPL(net_namespace_list); 27 28 struct net init_net; 29 EXPORT_SYMBOL(init_net); 30 31 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 32 33 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 34 35 static struct net_generic *net_alloc_generic(void) 36 { 37 struct net_generic *ng; 38 size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 39 40 ng = kzalloc(generic_size, GFP_KERNEL); 41 if (ng) 42 ng->len = max_gen_ptrs; 43 44 return ng; 45 } 46 47 static int net_assign_generic(struct net *net, int id, void *data) 48 { 49 struct net_generic *ng, *old_ng; 50 51 BUG_ON(!mutex_is_locked(&net_mutex)); 52 BUG_ON(id == 0); 53 54 old_ng = rcu_dereference_protected(net->gen, 55 lockdep_is_held(&net_mutex)); 56 ng = old_ng; 57 if (old_ng->len >= id) 58 goto assign; 59 60 ng = net_alloc_generic(); 61 if (ng == NULL) 62 return -ENOMEM; 63 64 /* 65 * Some synchronisation notes: 66 * 67 * The net_generic explores the net->gen array inside rcu 68 * read section. Besides once set the net->gen->ptr[x] 69 * pointer never changes (see rules in netns/generic.h). 70 * 71 * That said, we simply duplicate this array and schedule 72 * the old copy for kfree after a grace period. 73 */ 74 75 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 76 77 rcu_assign_pointer(net->gen, ng); 78 kfree_rcu(old_ng, rcu); 79 assign: 80 ng->ptr[id - 1] = data; 81 return 0; 82 } 83 84 static int ops_init(const struct pernet_operations *ops, struct net *net) 85 { 86 int err = -ENOMEM; 87 void *data = NULL; 88 89 if (ops->id && ops->size) { 90 data = kzalloc(ops->size, GFP_KERNEL); 91 if (!data) 92 goto out; 93 94 err = net_assign_generic(net, *ops->id, data); 95 if (err) 96 goto cleanup; 97 } 98 err = 0; 99 if (ops->init) 100 err = ops->init(net); 101 if (!err) 102 return 0; 103 104 cleanup: 105 kfree(data); 106 107 out: 108 return err; 109 } 110 111 static void ops_free(const struct pernet_operations *ops, struct net *net) 112 { 113 if (ops->id && ops->size) { 114 int id = *ops->id; 115 kfree(net_generic(net, id)); 116 } 117 } 118 119 static void ops_exit_list(const struct pernet_operations *ops, 120 struct list_head *net_exit_list) 121 { 122 struct net *net; 123 if (ops->exit) { 124 list_for_each_entry(net, net_exit_list, exit_list) 125 ops->exit(net); 126 } 127 if (ops->exit_batch) 128 ops->exit_batch(net_exit_list); 129 } 130 131 static void ops_free_list(const struct pernet_operations *ops, 132 struct list_head *net_exit_list) 133 { 134 struct net *net; 135 if (ops->size && ops->id) { 136 list_for_each_entry(net, net_exit_list, exit_list) 137 ops_free(ops, net); 138 } 139 } 140 141 /* 142 * setup_net runs the initializers for the network namespace object. 143 */ 144 static __net_init int setup_net(struct net *net) 145 { 146 /* Must be called with net_mutex held */ 147 const struct pernet_operations *ops, *saved_ops; 148 int error = 0; 149 LIST_HEAD(net_exit_list); 150 151 atomic_set(&net->count, 1); 152 atomic_set(&net->passive, 1); 153 net->dev_base_seq = 1; 154 155 #ifdef NETNS_REFCNT_DEBUG 156 atomic_set(&net->use_count, 0); 157 #endif 158 159 list_for_each_entry(ops, &pernet_list, list) { 160 error = ops_init(ops, net); 161 if (error < 0) 162 goto out_undo; 163 } 164 out: 165 return error; 166 167 out_undo: 168 /* Walk through the list backwards calling the exit functions 169 * for the pernet modules whose init functions did not fail. 170 */ 171 list_add(&net->exit_list, &net_exit_list); 172 saved_ops = ops; 173 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 174 ops_exit_list(ops, &net_exit_list); 175 176 ops = saved_ops; 177 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 178 ops_free_list(ops, &net_exit_list); 179 180 rcu_barrier(); 181 goto out; 182 } 183 184 185 #ifdef CONFIG_NET_NS 186 static struct kmem_cache *net_cachep; 187 static struct workqueue_struct *netns_wq; 188 189 static struct net *net_alloc(void) 190 { 191 struct net *net = NULL; 192 struct net_generic *ng; 193 194 ng = net_alloc_generic(); 195 if (!ng) 196 goto out; 197 198 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 199 if (!net) 200 goto out_free; 201 202 rcu_assign_pointer(net->gen, ng); 203 out: 204 return net; 205 206 out_free: 207 kfree(ng); 208 goto out; 209 } 210 211 static void net_free(struct net *net) 212 { 213 #ifdef NETNS_REFCNT_DEBUG 214 if (unlikely(atomic_read(&net->use_count) != 0)) { 215 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 216 atomic_read(&net->use_count)); 217 return; 218 } 219 #endif 220 kfree(net->gen); 221 kmem_cache_free(net_cachep, net); 222 } 223 224 void net_drop_ns(void *p) 225 { 226 struct net *ns = p; 227 if (ns && atomic_dec_and_test(&ns->passive)) 228 net_free(ns); 229 } 230 231 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 232 { 233 struct net *net; 234 int rv; 235 236 if (!(flags & CLONE_NEWNET)) 237 return get_net(old_net); 238 239 net = net_alloc(); 240 if (!net) 241 return ERR_PTR(-ENOMEM); 242 mutex_lock(&net_mutex); 243 rv = setup_net(net); 244 if (rv == 0) { 245 rtnl_lock(); 246 list_add_tail_rcu(&net->list, &net_namespace_list); 247 rtnl_unlock(); 248 } 249 mutex_unlock(&net_mutex); 250 if (rv < 0) { 251 net_drop_ns(net); 252 return ERR_PTR(rv); 253 } 254 return net; 255 } 256 257 static DEFINE_SPINLOCK(cleanup_list_lock); 258 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 259 260 static void cleanup_net(struct work_struct *work) 261 { 262 const struct pernet_operations *ops; 263 struct net *net, *tmp; 264 LIST_HEAD(net_kill_list); 265 LIST_HEAD(net_exit_list); 266 267 /* Atomically snapshot the list of namespaces to cleanup */ 268 spin_lock_irq(&cleanup_list_lock); 269 list_replace_init(&cleanup_list, &net_kill_list); 270 spin_unlock_irq(&cleanup_list_lock); 271 272 mutex_lock(&net_mutex); 273 274 /* Don't let anyone else find us. */ 275 rtnl_lock(); 276 list_for_each_entry(net, &net_kill_list, cleanup_list) { 277 list_del_rcu(&net->list); 278 list_add_tail(&net->exit_list, &net_exit_list); 279 } 280 rtnl_unlock(); 281 282 /* 283 * Another CPU might be rcu-iterating the list, wait for it. 284 * This needs to be before calling the exit() notifiers, so 285 * the rcu_barrier() below isn't sufficient alone. 286 */ 287 synchronize_rcu(); 288 289 /* Run all of the network namespace exit methods */ 290 list_for_each_entry_reverse(ops, &pernet_list, list) 291 ops_exit_list(ops, &net_exit_list); 292 293 /* Free the net generic variables */ 294 list_for_each_entry_reverse(ops, &pernet_list, list) 295 ops_free_list(ops, &net_exit_list); 296 297 mutex_unlock(&net_mutex); 298 299 /* Ensure there are no outstanding rcu callbacks using this 300 * network namespace. 301 */ 302 rcu_barrier(); 303 304 /* Finally it is safe to free my network namespace structure */ 305 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 306 list_del_init(&net->exit_list); 307 net_drop_ns(net); 308 } 309 } 310 static DECLARE_WORK(net_cleanup_work, cleanup_net); 311 312 void __put_net(struct net *net) 313 { 314 /* Cleanup the network namespace in process context */ 315 unsigned long flags; 316 317 spin_lock_irqsave(&cleanup_list_lock, flags); 318 list_add(&net->cleanup_list, &cleanup_list); 319 spin_unlock_irqrestore(&cleanup_list_lock, flags); 320 321 queue_work(netns_wq, &net_cleanup_work); 322 } 323 EXPORT_SYMBOL_GPL(__put_net); 324 325 struct net *get_net_ns_by_fd(int fd) 326 { 327 struct proc_inode *ei; 328 struct file *file; 329 struct net *net; 330 331 file = proc_ns_fget(fd); 332 if (IS_ERR(file)) 333 return ERR_CAST(file); 334 335 ei = PROC_I(file->f_dentry->d_inode); 336 if (ei->ns_ops == &netns_operations) 337 net = get_net(ei->ns); 338 else 339 net = ERR_PTR(-EINVAL); 340 341 fput(file); 342 return net; 343 } 344 345 #else 346 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 347 { 348 if (flags & CLONE_NEWNET) 349 return ERR_PTR(-EINVAL); 350 return old_net; 351 } 352 353 struct net *get_net_ns_by_fd(int fd) 354 { 355 return ERR_PTR(-EINVAL); 356 } 357 #endif 358 359 struct net *get_net_ns_by_pid(pid_t pid) 360 { 361 struct task_struct *tsk; 362 struct net *net; 363 364 /* Lookup the network namespace */ 365 net = ERR_PTR(-ESRCH); 366 rcu_read_lock(); 367 tsk = find_task_by_vpid(pid); 368 if (tsk) { 369 struct nsproxy *nsproxy; 370 nsproxy = task_nsproxy(tsk); 371 if (nsproxy) 372 net = get_net(nsproxy->net_ns); 373 } 374 rcu_read_unlock(); 375 return net; 376 } 377 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 378 379 static int __init net_ns_init(void) 380 { 381 struct net_generic *ng; 382 383 #ifdef CONFIG_NET_NS 384 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 385 SMP_CACHE_BYTES, 386 SLAB_PANIC, NULL); 387 388 /* Create workqueue for cleanup */ 389 netns_wq = create_singlethread_workqueue("netns"); 390 if (!netns_wq) 391 panic("Could not create netns workq"); 392 #endif 393 394 ng = net_alloc_generic(); 395 if (!ng) 396 panic("Could not allocate generic netns"); 397 398 rcu_assign_pointer(init_net.gen, ng); 399 400 mutex_lock(&net_mutex); 401 if (setup_net(&init_net)) 402 panic("Could not setup the initial network namespace"); 403 404 rtnl_lock(); 405 list_add_tail_rcu(&init_net.list, &net_namespace_list); 406 rtnl_unlock(); 407 408 mutex_unlock(&net_mutex); 409 410 return 0; 411 } 412 413 pure_initcall(net_ns_init); 414 415 #ifdef CONFIG_NET_NS 416 static int __register_pernet_operations(struct list_head *list, 417 struct pernet_operations *ops) 418 { 419 struct net *net; 420 int error; 421 LIST_HEAD(net_exit_list); 422 423 list_add_tail(&ops->list, list); 424 if (ops->init || (ops->id && ops->size)) { 425 for_each_net(net) { 426 error = ops_init(ops, net); 427 if (error) 428 goto out_undo; 429 list_add_tail(&net->exit_list, &net_exit_list); 430 } 431 } 432 return 0; 433 434 out_undo: 435 /* If I have an error cleanup all namespaces I initialized */ 436 list_del(&ops->list); 437 ops_exit_list(ops, &net_exit_list); 438 ops_free_list(ops, &net_exit_list); 439 return error; 440 } 441 442 static void __unregister_pernet_operations(struct pernet_operations *ops) 443 { 444 struct net *net; 445 LIST_HEAD(net_exit_list); 446 447 list_del(&ops->list); 448 for_each_net(net) 449 list_add_tail(&net->exit_list, &net_exit_list); 450 ops_exit_list(ops, &net_exit_list); 451 ops_free_list(ops, &net_exit_list); 452 } 453 454 #else 455 456 static int __register_pernet_operations(struct list_head *list, 457 struct pernet_operations *ops) 458 { 459 return ops_init(ops, &init_net); 460 } 461 462 static void __unregister_pernet_operations(struct pernet_operations *ops) 463 { 464 LIST_HEAD(net_exit_list); 465 list_add(&init_net.exit_list, &net_exit_list); 466 ops_exit_list(ops, &net_exit_list); 467 ops_free_list(ops, &net_exit_list); 468 } 469 470 #endif /* CONFIG_NET_NS */ 471 472 static DEFINE_IDA(net_generic_ids); 473 474 static int register_pernet_operations(struct list_head *list, 475 struct pernet_operations *ops) 476 { 477 int error; 478 479 if (ops->id) { 480 again: 481 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 482 if (error < 0) { 483 if (error == -EAGAIN) { 484 ida_pre_get(&net_generic_ids, GFP_KERNEL); 485 goto again; 486 } 487 return error; 488 } 489 max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id); 490 } 491 error = __register_pernet_operations(list, ops); 492 if (error) { 493 rcu_barrier(); 494 if (ops->id) 495 ida_remove(&net_generic_ids, *ops->id); 496 } 497 498 return error; 499 } 500 501 static void unregister_pernet_operations(struct pernet_operations *ops) 502 { 503 504 __unregister_pernet_operations(ops); 505 rcu_barrier(); 506 if (ops->id) 507 ida_remove(&net_generic_ids, *ops->id); 508 } 509 510 /** 511 * register_pernet_subsys - register a network namespace subsystem 512 * @ops: pernet operations structure for the subsystem 513 * 514 * Register a subsystem which has init and exit functions 515 * that are called when network namespaces are created and 516 * destroyed respectively. 517 * 518 * When registered all network namespace init functions are 519 * called for every existing network namespace. Allowing kernel 520 * modules to have a race free view of the set of network namespaces. 521 * 522 * When a new network namespace is created all of the init 523 * methods are called in the order in which they were registered. 524 * 525 * When a network namespace is destroyed all of the exit methods 526 * are called in the reverse of the order with which they were 527 * registered. 528 */ 529 int register_pernet_subsys(struct pernet_operations *ops) 530 { 531 int error; 532 mutex_lock(&net_mutex); 533 error = register_pernet_operations(first_device, ops); 534 mutex_unlock(&net_mutex); 535 return error; 536 } 537 EXPORT_SYMBOL_GPL(register_pernet_subsys); 538 539 /** 540 * unregister_pernet_subsys - unregister a network namespace subsystem 541 * @ops: pernet operations structure to manipulate 542 * 543 * Remove the pernet operations structure from the list to be 544 * used when network namespaces are created or destroyed. In 545 * addition run the exit method for all existing network 546 * namespaces. 547 */ 548 void unregister_pernet_subsys(struct pernet_operations *ops) 549 { 550 mutex_lock(&net_mutex); 551 unregister_pernet_operations(ops); 552 mutex_unlock(&net_mutex); 553 } 554 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 555 556 /** 557 * register_pernet_device - register a network namespace device 558 * @ops: pernet operations structure for the subsystem 559 * 560 * Register a device which has init and exit functions 561 * that are called when network namespaces are created and 562 * destroyed respectively. 563 * 564 * When registered all network namespace init functions are 565 * called for every existing network namespace. Allowing kernel 566 * modules to have a race free view of the set of network namespaces. 567 * 568 * When a new network namespace is created all of the init 569 * methods are called in the order in which they were registered. 570 * 571 * When a network namespace is destroyed all of the exit methods 572 * are called in the reverse of the order with which they were 573 * registered. 574 */ 575 int register_pernet_device(struct pernet_operations *ops) 576 { 577 int error; 578 mutex_lock(&net_mutex); 579 error = register_pernet_operations(&pernet_list, ops); 580 if (!error && (first_device == &pernet_list)) 581 first_device = &ops->list; 582 mutex_unlock(&net_mutex); 583 return error; 584 } 585 EXPORT_SYMBOL_GPL(register_pernet_device); 586 587 /** 588 * unregister_pernet_device - unregister a network namespace netdevice 589 * @ops: pernet operations structure to manipulate 590 * 591 * Remove the pernet operations structure from the list to be 592 * used when network namespaces are created or destroyed. In 593 * addition run the exit method for all existing network 594 * namespaces. 595 */ 596 void unregister_pernet_device(struct pernet_operations *ops) 597 { 598 mutex_lock(&net_mutex); 599 if (&ops->list == first_device) 600 first_device = first_device->next; 601 unregister_pernet_operations(ops); 602 mutex_unlock(&net_mutex); 603 } 604 EXPORT_SYMBOL_GPL(unregister_pernet_device); 605 606 #ifdef CONFIG_NET_NS 607 static void *netns_get(struct task_struct *task) 608 { 609 struct net *net = NULL; 610 struct nsproxy *nsproxy; 611 612 rcu_read_lock(); 613 nsproxy = task_nsproxy(task); 614 if (nsproxy) 615 net = get_net(nsproxy->net_ns); 616 rcu_read_unlock(); 617 618 return net; 619 } 620 621 static void netns_put(void *ns) 622 { 623 put_net(ns); 624 } 625 626 static int netns_install(struct nsproxy *nsproxy, void *ns) 627 { 628 put_net(nsproxy->net_ns); 629 nsproxy->net_ns = get_net(ns); 630 return 0; 631 } 632 633 const struct proc_ns_operations netns_operations = { 634 .name = "net", 635 .type = CLONE_NEWNET, 636 .get = netns_get, 637 .put = netns_put, 638 .install = netns_install, 639 }; 640 #endif 641