1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <linux/proc_fs.h> 12 #include <linux/file.h> 13 #include <net/net_namespace.h> 14 #include <net/netns/generic.h> 15 16 /* 17 * Our network namespace constructor/destructor lists 18 */ 19 20 static LIST_HEAD(pernet_list); 21 static struct list_head *first_device = &pernet_list; 22 static DEFINE_MUTEX(net_mutex); 23 24 LIST_HEAD(net_namespace_list); 25 EXPORT_SYMBOL_GPL(net_namespace_list); 26 27 struct net init_net; 28 EXPORT_SYMBOL(init_net); 29 30 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 31 32 static int net_assign_generic(struct net *net, int id, void *data) 33 { 34 struct net_generic *ng, *old_ng; 35 36 BUG_ON(!mutex_is_locked(&net_mutex)); 37 BUG_ON(id == 0); 38 39 old_ng = rcu_dereference_protected(net->gen, 40 lockdep_is_held(&net_mutex)); 41 ng = old_ng; 42 if (old_ng->len >= id) 43 goto assign; 44 45 ng = kzalloc(sizeof(struct net_generic) + 46 id * sizeof(void *), GFP_KERNEL); 47 if (ng == NULL) 48 return -ENOMEM; 49 50 /* 51 * Some synchronisation notes: 52 * 53 * The net_generic explores the net->gen array inside rcu 54 * read section. Besides once set the net->gen->ptr[x] 55 * pointer never changes (see rules in netns/generic.h). 56 * 57 * That said, we simply duplicate this array and schedule 58 * the old copy for kfree after a grace period. 59 */ 60 61 ng->len = id; 62 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 63 64 rcu_assign_pointer(net->gen, ng); 65 kfree_rcu(old_ng, rcu); 66 assign: 67 ng->ptr[id - 1] = data; 68 return 0; 69 } 70 71 static int ops_init(const struct pernet_operations *ops, struct net *net) 72 { 73 int err; 74 if (ops->id && ops->size) { 75 void *data = kzalloc(ops->size, GFP_KERNEL); 76 if (!data) 77 return -ENOMEM; 78 79 err = net_assign_generic(net, *ops->id, data); 80 if (err) { 81 kfree(data); 82 return err; 83 } 84 } 85 if (ops->init) 86 return ops->init(net); 87 return 0; 88 } 89 90 static void ops_free(const struct pernet_operations *ops, struct net *net) 91 { 92 if (ops->id && ops->size) { 93 int id = *ops->id; 94 kfree(net_generic(net, id)); 95 } 96 } 97 98 static void ops_exit_list(const struct pernet_operations *ops, 99 struct list_head *net_exit_list) 100 { 101 struct net *net; 102 if (ops->exit) { 103 list_for_each_entry(net, net_exit_list, exit_list) 104 ops->exit(net); 105 } 106 if (ops->exit_batch) 107 ops->exit_batch(net_exit_list); 108 } 109 110 static void ops_free_list(const struct pernet_operations *ops, 111 struct list_head *net_exit_list) 112 { 113 struct net *net; 114 if (ops->size && ops->id) { 115 list_for_each_entry(net, net_exit_list, exit_list) 116 ops_free(ops, net); 117 } 118 } 119 120 /* 121 * setup_net runs the initializers for the network namespace object. 122 */ 123 static __net_init int setup_net(struct net *net) 124 { 125 /* Must be called with net_mutex held */ 126 const struct pernet_operations *ops, *saved_ops; 127 int error = 0; 128 LIST_HEAD(net_exit_list); 129 130 atomic_set(&net->count, 1); 131 atomic_set(&net->passive, 1); 132 net->dev_base_seq = 1; 133 134 #ifdef NETNS_REFCNT_DEBUG 135 atomic_set(&net->use_count, 0); 136 #endif 137 138 list_for_each_entry(ops, &pernet_list, list) { 139 error = ops_init(ops, net); 140 if (error < 0) 141 goto out_undo; 142 } 143 out: 144 return error; 145 146 out_undo: 147 /* Walk through the list backwards calling the exit functions 148 * for the pernet modules whose init functions did not fail. 149 */ 150 list_add(&net->exit_list, &net_exit_list); 151 saved_ops = ops; 152 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 153 ops_exit_list(ops, &net_exit_list); 154 155 ops = saved_ops; 156 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 157 ops_free_list(ops, &net_exit_list); 158 159 rcu_barrier(); 160 goto out; 161 } 162 163 static struct net_generic *net_alloc_generic(void) 164 { 165 struct net_generic *ng; 166 size_t generic_size = sizeof(struct net_generic) + 167 INITIAL_NET_GEN_PTRS * sizeof(void *); 168 169 ng = kzalloc(generic_size, GFP_KERNEL); 170 if (ng) 171 ng->len = INITIAL_NET_GEN_PTRS; 172 173 return ng; 174 } 175 176 #ifdef CONFIG_NET_NS 177 static struct kmem_cache *net_cachep; 178 static struct workqueue_struct *netns_wq; 179 180 static struct net *net_alloc(void) 181 { 182 struct net *net = NULL; 183 struct net_generic *ng; 184 185 ng = net_alloc_generic(); 186 if (!ng) 187 goto out; 188 189 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 190 if (!net) 191 goto out_free; 192 193 rcu_assign_pointer(net->gen, ng); 194 out: 195 return net; 196 197 out_free: 198 kfree(ng); 199 goto out; 200 } 201 202 static void net_free(struct net *net) 203 { 204 #ifdef NETNS_REFCNT_DEBUG 205 if (unlikely(atomic_read(&net->use_count) != 0)) { 206 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 207 atomic_read(&net->use_count)); 208 return; 209 } 210 #endif 211 kfree(net->gen); 212 kmem_cache_free(net_cachep, net); 213 } 214 215 void net_drop_ns(void *p) 216 { 217 struct net *ns = p; 218 if (ns && atomic_dec_and_test(&ns->passive)) 219 net_free(ns); 220 } 221 222 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 223 { 224 struct net *net; 225 int rv; 226 227 if (!(flags & CLONE_NEWNET)) 228 return get_net(old_net); 229 230 net = net_alloc(); 231 if (!net) 232 return ERR_PTR(-ENOMEM); 233 mutex_lock(&net_mutex); 234 rv = setup_net(net); 235 if (rv == 0) { 236 rtnl_lock(); 237 list_add_tail_rcu(&net->list, &net_namespace_list); 238 rtnl_unlock(); 239 } 240 mutex_unlock(&net_mutex); 241 if (rv < 0) { 242 net_drop_ns(net); 243 return ERR_PTR(rv); 244 } 245 return net; 246 } 247 248 static DEFINE_SPINLOCK(cleanup_list_lock); 249 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 250 251 static void cleanup_net(struct work_struct *work) 252 { 253 const struct pernet_operations *ops; 254 struct net *net, *tmp; 255 LIST_HEAD(net_kill_list); 256 LIST_HEAD(net_exit_list); 257 258 /* Atomically snapshot the list of namespaces to cleanup */ 259 spin_lock_irq(&cleanup_list_lock); 260 list_replace_init(&cleanup_list, &net_kill_list); 261 spin_unlock_irq(&cleanup_list_lock); 262 263 mutex_lock(&net_mutex); 264 265 /* Don't let anyone else find us. */ 266 rtnl_lock(); 267 list_for_each_entry(net, &net_kill_list, cleanup_list) { 268 list_del_rcu(&net->list); 269 list_add_tail(&net->exit_list, &net_exit_list); 270 } 271 rtnl_unlock(); 272 273 /* 274 * Another CPU might be rcu-iterating the list, wait for it. 275 * This needs to be before calling the exit() notifiers, so 276 * the rcu_barrier() below isn't sufficient alone. 277 */ 278 synchronize_rcu(); 279 280 /* Run all of the network namespace exit methods */ 281 list_for_each_entry_reverse(ops, &pernet_list, list) 282 ops_exit_list(ops, &net_exit_list); 283 284 /* Free the net generic variables */ 285 list_for_each_entry_reverse(ops, &pernet_list, list) 286 ops_free_list(ops, &net_exit_list); 287 288 mutex_unlock(&net_mutex); 289 290 /* Ensure there are no outstanding rcu callbacks using this 291 * network namespace. 292 */ 293 rcu_barrier(); 294 295 /* Finally it is safe to free my network namespace structure */ 296 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 297 list_del_init(&net->exit_list); 298 net_drop_ns(net); 299 } 300 } 301 static DECLARE_WORK(net_cleanup_work, cleanup_net); 302 303 void __put_net(struct net *net) 304 { 305 /* Cleanup the network namespace in process context */ 306 unsigned long flags; 307 308 spin_lock_irqsave(&cleanup_list_lock, flags); 309 list_add(&net->cleanup_list, &cleanup_list); 310 spin_unlock_irqrestore(&cleanup_list_lock, flags); 311 312 queue_work(netns_wq, &net_cleanup_work); 313 } 314 EXPORT_SYMBOL_GPL(__put_net); 315 316 struct net *get_net_ns_by_fd(int fd) 317 { 318 struct proc_inode *ei; 319 struct file *file; 320 struct net *net; 321 322 file = proc_ns_fget(fd); 323 if (IS_ERR(file)) 324 return ERR_CAST(file); 325 326 ei = PROC_I(file->f_dentry->d_inode); 327 if (ei->ns_ops == &netns_operations) 328 net = get_net(ei->ns); 329 else 330 net = ERR_PTR(-EINVAL); 331 332 fput(file); 333 return net; 334 } 335 336 #else 337 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 338 { 339 if (flags & CLONE_NEWNET) 340 return ERR_PTR(-EINVAL); 341 return old_net; 342 } 343 344 struct net *get_net_ns_by_fd(int fd) 345 { 346 return ERR_PTR(-EINVAL); 347 } 348 #endif 349 350 struct net *get_net_ns_by_pid(pid_t pid) 351 { 352 struct task_struct *tsk; 353 struct net *net; 354 355 /* Lookup the network namespace */ 356 net = ERR_PTR(-ESRCH); 357 rcu_read_lock(); 358 tsk = find_task_by_vpid(pid); 359 if (tsk) { 360 struct nsproxy *nsproxy; 361 nsproxy = task_nsproxy(tsk); 362 if (nsproxy) 363 net = get_net(nsproxy->net_ns); 364 } 365 rcu_read_unlock(); 366 return net; 367 } 368 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 369 370 static int __init net_ns_init(void) 371 { 372 struct net_generic *ng; 373 374 #ifdef CONFIG_NET_NS 375 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 376 SMP_CACHE_BYTES, 377 SLAB_PANIC, NULL); 378 379 /* Create workqueue for cleanup */ 380 netns_wq = create_singlethread_workqueue("netns"); 381 if (!netns_wq) 382 panic("Could not create netns workq"); 383 #endif 384 385 ng = net_alloc_generic(); 386 if (!ng) 387 panic("Could not allocate generic netns"); 388 389 rcu_assign_pointer(init_net.gen, ng); 390 391 mutex_lock(&net_mutex); 392 if (setup_net(&init_net)) 393 panic("Could not setup the initial network namespace"); 394 395 rtnl_lock(); 396 list_add_tail_rcu(&init_net.list, &net_namespace_list); 397 rtnl_unlock(); 398 399 mutex_unlock(&net_mutex); 400 401 return 0; 402 } 403 404 pure_initcall(net_ns_init); 405 406 #ifdef CONFIG_NET_NS 407 static int __register_pernet_operations(struct list_head *list, 408 struct pernet_operations *ops) 409 { 410 struct net *net; 411 int error; 412 LIST_HEAD(net_exit_list); 413 414 list_add_tail(&ops->list, list); 415 if (ops->init || (ops->id && ops->size)) { 416 for_each_net(net) { 417 error = ops_init(ops, net); 418 if (error) 419 goto out_undo; 420 list_add_tail(&net->exit_list, &net_exit_list); 421 } 422 } 423 return 0; 424 425 out_undo: 426 /* If I have an error cleanup all namespaces I initialized */ 427 list_del(&ops->list); 428 ops_exit_list(ops, &net_exit_list); 429 ops_free_list(ops, &net_exit_list); 430 return error; 431 } 432 433 static void __unregister_pernet_operations(struct pernet_operations *ops) 434 { 435 struct net *net; 436 LIST_HEAD(net_exit_list); 437 438 list_del(&ops->list); 439 for_each_net(net) 440 list_add_tail(&net->exit_list, &net_exit_list); 441 ops_exit_list(ops, &net_exit_list); 442 ops_free_list(ops, &net_exit_list); 443 } 444 445 #else 446 447 static int __register_pernet_operations(struct list_head *list, 448 struct pernet_operations *ops) 449 { 450 int err = 0; 451 err = ops_init(ops, &init_net); 452 if (err) 453 ops_free(ops, &init_net); 454 return err; 455 456 } 457 458 static void __unregister_pernet_operations(struct pernet_operations *ops) 459 { 460 LIST_HEAD(net_exit_list); 461 list_add(&init_net.exit_list, &net_exit_list); 462 ops_exit_list(ops, &net_exit_list); 463 ops_free_list(ops, &net_exit_list); 464 } 465 466 #endif /* CONFIG_NET_NS */ 467 468 static DEFINE_IDA(net_generic_ids); 469 470 static int register_pernet_operations(struct list_head *list, 471 struct pernet_operations *ops) 472 { 473 int error; 474 475 if (ops->id) { 476 again: 477 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 478 if (error < 0) { 479 if (error == -EAGAIN) { 480 ida_pre_get(&net_generic_ids, GFP_KERNEL); 481 goto again; 482 } 483 return error; 484 } 485 } 486 error = __register_pernet_operations(list, ops); 487 if (error) { 488 rcu_barrier(); 489 if (ops->id) 490 ida_remove(&net_generic_ids, *ops->id); 491 } 492 493 return error; 494 } 495 496 static void unregister_pernet_operations(struct pernet_operations *ops) 497 { 498 499 __unregister_pernet_operations(ops); 500 rcu_barrier(); 501 if (ops->id) 502 ida_remove(&net_generic_ids, *ops->id); 503 } 504 505 /** 506 * register_pernet_subsys - register a network namespace subsystem 507 * @ops: pernet operations structure for the subsystem 508 * 509 * Register a subsystem which has init and exit functions 510 * that are called when network namespaces are created and 511 * destroyed respectively. 512 * 513 * When registered all network namespace init functions are 514 * called for every existing network namespace. Allowing kernel 515 * modules to have a race free view of the set of network namespaces. 516 * 517 * When a new network namespace is created all of the init 518 * methods are called in the order in which they were registered. 519 * 520 * When a network namespace is destroyed all of the exit methods 521 * are called in the reverse of the order with which they were 522 * registered. 523 */ 524 int register_pernet_subsys(struct pernet_operations *ops) 525 { 526 int error; 527 mutex_lock(&net_mutex); 528 error = register_pernet_operations(first_device, ops); 529 mutex_unlock(&net_mutex); 530 return error; 531 } 532 EXPORT_SYMBOL_GPL(register_pernet_subsys); 533 534 /** 535 * unregister_pernet_subsys - unregister a network namespace subsystem 536 * @ops: pernet operations structure to manipulate 537 * 538 * Remove the pernet operations structure from the list to be 539 * used when network namespaces are created or destroyed. In 540 * addition run the exit method for all existing network 541 * namespaces. 542 */ 543 void unregister_pernet_subsys(struct pernet_operations *ops) 544 { 545 mutex_lock(&net_mutex); 546 unregister_pernet_operations(ops); 547 mutex_unlock(&net_mutex); 548 } 549 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 550 551 /** 552 * register_pernet_device - register a network namespace device 553 * @ops: pernet operations structure for the subsystem 554 * 555 * Register a device which has init and exit functions 556 * that are called when network namespaces are created and 557 * destroyed respectively. 558 * 559 * When registered all network namespace init functions are 560 * called for every existing network namespace. Allowing kernel 561 * modules to have a race free view of the set of network namespaces. 562 * 563 * When a new network namespace is created all of the init 564 * methods are called in the order in which they were registered. 565 * 566 * When a network namespace is destroyed all of the exit methods 567 * are called in the reverse of the order with which they were 568 * registered. 569 */ 570 int register_pernet_device(struct pernet_operations *ops) 571 { 572 int error; 573 mutex_lock(&net_mutex); 574 error = register_pernet_operations(&pernet_list, ops); 575 if (!error && (first_device == &pernet_list)) 576 first_device = &ops->list; 577 mutex_unlock(&net_mutex); 578 return error; 579 } 580 EXPORT_SYMBOL_GPL(register_pernet_device); 581 582 /** 583 * unregister_pernet_device - unregister a network namespace netdevice 584 * @ops: pernet operations structure to manipulate 585 * 586 * Remove the pernet operations structure from the list to be 587 * used when network namespaces are created or destroyed. In 588 * addition run the exit method for all existing network 589 * namespaces. 590 */ 591 void unregister_pernet_device(struct pernet_operations *ops) 592 { 593 mutex_lock(&net_mutex); 594 if (&ops->list == first_device) 595 first_device = first_device->next; 596 unregister_pernet_operations(ops); 597 mutex_unlock(&net_mutex); 598 } 599 EXPORT_SYMBOL_GPL(unregister_pernet_device); 600 601 #ifdef CONFIG_NET_NS 602 static void *netns_get(struct task_struct *task) 603 { 604 struct net *net = NULL; 605 struct nsproxy *nsproxy; 606 607 rcu_read_lock(); 608 nsproxy = task_nsproxy(task); 609 if (nsproxy) 610 net = get_net(nsproxy->net_ns); 611 rcu_read_unlock(); 612 613 return net; 614 } 615 616 static void netns_put(void *ns) 617 { 618 put_net(ns); 619 } 620 621 static int netns_install(struct nsproxy *nsproxy, void *ns) 622 { 623 put_net(nsproxy->net_ns); 624 nsproxy->net_ns = get_net(ns); 625 return 0; 626 } 627 628 const struct proc_ns_operations netns_operations = { 629 .name = "net", 630 .type = CLONE_NEWNET, 631 .get = netns_get, 632 .put = netns_put, 633 .install = netns_install, 634 }; 635 #endif 636