1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/workqueue.h> 4 #include <linux/rtnetlink.h> 5 #include <linux/cache.h> 6 #include <linux/slab.h> 7 #include <linux/list.h> 8 #include <linux/delay.h> 9 #include <linux/sched.h> 10 #include <linux/idr.h> 11 #include <linux/rculist.h> 12 #include <linux/nsproxy.h> 13 #include <linux/fs.h> 14 #include <linux/proc_ns.h> 15 #include <linux/file.h> 16 #include <linux/export.h> 17 #include <linux/user_namespace.h> 18 #include <linux/net_namespace.h> 19 #include <linux/sched/task.h> 20 21 #include <net/sock.h> 22 #include <net/netlink.h> 23 #include <net/net_namespace.h> 24 #include <net/netns/generic.h> 25 26 /* 27 * Our network namespace constructor/destructor lists 28 */ 29 30 static LIST_HEAD(pernet_list); 31 static struct list_head *first_device = &pernet_list; 32 DEFINE_MUTEX(net_mutex); 33 34 LIST_HEAD(net_namespace_list); 35 EXPORT_SYMBOL_GPL(net_namespace_list); 36 37 struct net init_net = { 38 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), 39 }; 40 EXPORT_SYMBOL(init_net); 41 42 static bool init_net_initialized; 43 44 #define MIN_PERNET_OPS_ID \ 45 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) 46 47 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 48 49 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 50 51 static struct net_generic *net_alloc_generic(void) 52 { 53 struct net_generic *ng; 54 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 55 56 ng = kzalloc(generic_size, GFP_KERNEL); 57 if (ng) 58 ng->s.len = max_gen_ptrs; 59 60 return ng; 61 } 62 63 static int net_assign_generic(struct net *net, unsigned int id, void *data) 64 { 65 struct net_generic *ng, *old_ng; 66 67 BUG_ON(!mutex_is_locked(&net_mutex)); 68 BUG_ON(id < MIN_PERNET_OPS_ID); 69 70 old_ng = rcu_dereference_protected(net->gen, 71 lockdep_is_held(&net_mutex)); 72 if (old_ng->s.len > id) { 73 old_ng->ptr[id] = data; 74 return 0; 75 } 76 77 ng = net_alloc_generic(); 78 if (ng == NULL) 79 return -ENOMEM; 80 81 /* 82 * Some synchronisation notes: 83 * 84 * The net_generic explores the net->gen array inside rcu 85 * read section. Besides once set the net->gen->ptr[x] 86 * pointer never changes (see rules in netns/generic.h). 87 * 88 * That said, we simply duplicate this array and schedule 89 * the old copy for kfree after a grace period. 90 */ 91 92 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], 93 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); 94 ng->ptr[id] = data; 95 96 rcu_assign_pointer(net->gen, ng); 97 kfree_rcu(old_ng, s.rcu); 98 return 0; 99 } 100 101 static int ops_init(const struct pernet_operations *ops, struct net *net) 102 { 103 int err = -ENOMEM; 104 void *data = NULL; 105 106 if (ops->id && ops->size) { 107 data = kzalloc(ops->size, GFP_KERNEL); 108 if (!data) 109 goto out; 110 111 err = net_assign_generic(net, *ops->id, data); 112 if (err) 113 goto cleanup; 114 } 115 err = 0; 116 if (ops->init) 117 err = ops->init(net); 118 if (!err) 119 return 0; 120 121 cleanup: 122 kfree(data); 123 124 out: 125 return err; 126 } 127 128 static void ops_free(const struct pernet_operations *ops, struct net *net) 129 { 130 if (ops->id && ops->size) { 131 kfree(net_generic(net, *ops->id)); 132 } 133 } 134 135 static void ops_exit_list(const struct pernet_operations *ops, 136 struct list_head *net_exit_list) 137 { 138 struct net *net; 139 if (ops->exit) { 140 list_for_each_entry(net, net_exit_list, exit_list) 141 ops->exit(net); 142 } 143 if (ops->exit_batch) 144 ops->exit_batch(net_exit_list); 145 } 146 147 static void ops_free_list(const struct pernet_operations *ops, 148 struct list_head *net_exit_list) 149 { 150 struct net *net; 151 if (ops->size && ops->id) { 152 list_for_each_entry(net, net_exit_list, exit_list) 153 ops_free(ops, net); 154 } 155 } 156 157 /* should be called with nsid_lock held */ 158 static int alloc_netid(struct net *net, struct net *peer, int reqid) 159 { 160 int min = 0, max = 0; 161 162 if (reqid >= 0) { 163 min = reqid; 164 max = reqid + 1; 165 } 166 167 return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); 168 } 169 170 /* This function is used by idr_for_each(). If net is equal to peer, the 171 * function returns the id so that idr_for_each() stops. Because we cannot 172 * returns the id 0 (idr_for_each() will not stop), we return the magic value 173 * NET_ID_ZERO (-1) for it. 174 */ 175 #define NET_ID_ZERO -1 176 static int net_eq_idr(int id, void *net, void *peer) 177 { 178 if (net_eq(net, peer)) 179 return id ? : NET_ID_ZERO; 180 return 0; 181 } 182 183 /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc 184 * is set to true, thus the caller knows that the new id must be notified via 185 * rtnl. 186 */ 187 static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) 188 { 189 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); 190 bool alloc_it = *alloc; 191 192 *alloc = false; 193 194 /* Magic value for id 0. */ 195 if (id == NET_ID_ZERO) 196 return 0; 197 if (id > 0) 198 return id; 199 200 if (alloc_it) { 201 id = alloc_netid(net, peer, -1); 202 *alloc = true; 203 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; 204 } 205 206 return NETNSA_NSID_NOT_ASSIGNED; 207 } 208 209 /* should be called with nsid_lock held */ 210 static int __peernet2id(struct net *net, struct net *peer) 211 { 212 bool no = false; 213 214 return __peernet2id_alloc(net, peer, &no); 215 } 216 217 static void rtnl_net_notifyid(struct net *net, int cmd, int id); 218 /* This function returns the id of a peer netns. If no id is assigned, one will 219 * be allocated and returned. 220 */ 221 int peernet2id_alloc(struct net *net, struct net *peer) 222 { 223 bool alloc; 224 int id; 225 226 if (atomic_read(&net->count) == 0) 227 return NETNSA_NSID_NOT_ASSIGNED; 228 spin_lock_bh(&net->nsid_lock); 229 alloc = atomic_read(&peer->count) == 0 ? false : true; 230 id = __peernet2id_alloc(net, peer, &alloc); 231 spin_unlock_bh(&net->nsid_lock); 232 if (alloc && id >= 0) 233 rtnl_net_notifyid(net, RTM_NEWNSID, id); 234 return id; 235 } 236 237 /* This function returns, if assigned, the id of a peer netns. */ 238 int peernet2id(struct net *net, struct net *peer) 239 { 240 int id; 241 242 spin_lock_bh(&net->nsid_lock); 243 id = __peernet2id(net, peer); 244 spin_unlock_bh(&net->nsid_lock); 245 return id; 246 } 247 EXPORT_SYMBOL(peernet2id); 248 249 /* This function returns true is the peer netns has an id assigned into the 250 * current netns. 251 */ 252 bool peernet_has_id(struct net *net, struct net *peer) 253 { 254 return peernet2id(net, peer) >= 0; 255 } 256 257 struct net *get_net_ns_by_id(struct net *net, int id) 258 { 259 struct net *peer; 260 261 if (id < 0) 262 return NULL; 263 264 rcu_read_lock(); 265 spin_lock_bh(&net->nsid_lock); 266 peer = idr_find(&net->netns_ids, id); 267 if (peer) 268 get_net(peer); 269 spin_unlock_bh(&net->nsid_lock); 270 rcu_read_unlock(); 271 272 return peer; 273 } 274 275 /* 276 * setup_net runs the initializers for the network namespace object. 277 */ 278 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) 279 { 280 /* Must be called with net_mutex held */ 281 const struct pernet_operations *ops, *saved_ops; 282 int error = 0; 283 LIST_HEAD(net_exit_list); 284 285 atomic_set(&net->count, 1); 286 atomic_set(&net->passive, 1); 287 net->dev_base_seq = 1; 288 net->user_ns = user_ns; 289 idr_init(&net->netns_ids); 290 spin_lock_init(&net->nsid_lock); 291 292 list_for_each_entry(ops, &pernet_list, list) { 293 error = ops_init(ops, net); 294 if (error < 0) 295 goto out_undo; 296 } 297 out: 298 return error; 299 300 out_undo: 301 /* Walk through the list backwards calling the exit functions 302 * for the pernet modules whose init functions did not fail. 303 */ 304 list_add(&net->exit_list, &net_exit_list); 305 saved_ops = ops; 306 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 307 ops_exit_list(ops, &net_exit_list); 308 309 ops = saved_ops; 310 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 311 ops_free_list(ops, &net_exit_list); 312 313 rcu_barrier(); 314 goto out; 315 } 316 317 318 #ifdef CONFIG_NET_NS 319 static struct ucounts *inc_net_namespaces(struct user_namespace *ns) 320 { 321 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); 322 } 323 324 static void dec_net_namespaces(struct ucounts *ucounts) 325 { 326 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); 327 } 328 329 static struct kmem_cache *net_cachep; 330 static struct workqueue_struct *netns_wq; 331 332 static struct net *net_alloc(void) 333 { 334 struct net *net = NULL; 335 struct net_generic *ng; 336 337 ng = net_alloc_generic(); 338 if (!ng) 339 goto out; 340 341 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 342 if (!net) 343 goto out_free; 344 345 rcu_assign_pointer(net->gen, ng); 346 out: 347 return net; 348 349 out_free: 350 kfree(ng); 351 goto out; 352 } 353 354 static void net_free(struct net *net) 355 { 356 kfree(rcu_access_pointer(net->gen)); 357 kmem_cache_free(net_cachep, net); 358 } 359 360 void net_drop_ns(void *p) 361 { 362 struct net *ns = p; 363 if (ns && atomic_dec_and_test(&ns->passive)) 364 net_free(ns); 365 } 366 367 struct net *copy_net_ns(unsigned long flags, 368 struct user_namespace *user_ns, struct net *old_net) 369 { 370 struct ucounts *ucounts; 371 struct net *net; 372 int rv; 373 374 if (!(flags & CLONE_NEWNET)) 375 return get_net(old_net); 376 377 ucounts = inc_net_namespaces(user_ns); 378 if (!ucounts) 379 return ERR_PTR(-ENOSPC); 380 381 net = net_alloc(); 382 if (!net) { 383 dec_net_namespaces(ucounts); 384 return ERR_PTR(-ENOMEM); 385 } 386 387 get_user_ns(user_ns); 388 389 rv = mutex_lock_killable(&net_mutex); 390 if (rv < 0) { 391 net_free(net); 392 dec_net_namespaces(ucounts); 393 put_user_ns(user_ns); 394 return ERR_PTR(rv); 395 } 396 397 net->ucounts = ucounts; 398 rv = setup_net(net, user_ns); 399 if (rv == 0) { 400 rtnl_lock(); 401 list_add_tail_rcu(&net->list, &net_namespace_list); 402 rtnl_unlock(); 403 } 404 mutex_unlock(&net_mutex); 405 if (rv < 0) { 406 dec_net_namespaces(ucounts); 407 put_user_ns(user_ns); 408 net_drop_ns(net); 409 return ERR_PTR(rv); 410 } 411 return net; 412 } 413 414 static DEFINE_SPINLOCK(cleanup_list_lock); 415 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 416 417 static void cleanup_net(struct work_struct *work) 418 { 419 const struct pernet_operations *ops; 420 struct net *net, *tmp; 421 struct list_head net_kill_list; 422 LIST_HEAD(net_exit_list); 423 424 /* Atomically snapshot the list of namespaces to cleanup */ 425 spin_lock_irq(&cleanup_list_lock); 426 list_replace_init(&cleanup_list, &net_kill_list); 427 spin_unlock_irq(&cleanup_list_lock); 428 429 mutex_lock(&net_mutex); 430 431 /* Don't let anyone else find us. */ 432 rtnl_lock(); 433 list_for_each_entry(net, &net_kill_list, cleanup_list) { 434 list_del_rcu(&net->list); 435 list_add_tail(&net->exit_list, &net_exit_list); 436 for_each_net(tmp) { 437 int id; 438 439 spin_lock_bh(&tmp->nsid_lock); 440 id = __peernet2id(tmp, net); 441 if (id >= 0) 442 idr_remove(&tmp->netns_ids, id); 443 spin_unlock_bh(&tmp->nsid_lock); 444 if (id >= 0) 445 rtnl_net_notifyid(tmp, RTM_DELNSID, id); 446 } 447 spin_lock_bh(&net->nsid_lock); 448 idr_destroy(&net->netns_ids); 449 spin_unlock_bh(&net->nsid_lock); 450 451 } 452 rtnl_unlock(); 453 454 /* 455 * Another CPU might be rcu-iterating the list, wait for it. 456 * This needs to be before calling the exit() notifiers, so 457 * the rcu_barrier() below isn't sufficient alone. 458 */ 459 synchronize_rcu(); 460 461 /* Run all of the network namespace exit methods */ 462 list_for_each_entry_reverse(ops, &pernet_list, list) 463 ops_exit_list(ops, &net_exit_list); 464 465 /* Free the net generic variables */ 466 list_for_each_entry_reverse(ops, &pernet_list, list) 467 ops_free_list(ops, &net_exit_list); 468 469 mutex_unlock(&net_mutex); 470 471 /* Ensure there are no outstanding rcu callbacks using this 472 * network namespace. 473 */ 474 rcu_barrier(); 475 476 /* Finally it is safe to free my network namespace structure */ 477 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 478 list_del_init(&net->exit_list); 479 dec_net_namespaces(net->ucounts); 480 put_user_ns(net->user_ns); 481 net_drop_ns(net); 482 } 483 } 484 static DECLARE_WORK(net_cleanup_work, cleanup_net); 485 486 void __put_net(struct net *net) 487 { 488 /* Cleanup the network namespace in process context */ 489 unsigned long flags; 490 491 spin_lock_irqsave(&cleanup_list_lock, flags); 492 list_add(&net->cleanup_list, &cleanup_list); 493 spin_unlock_irqrestore(&cleanup_list_lock, flags); 494 495 queue_work(netns_wq, &net_cleanup_work); 496 } 497 EXPORT_SYMBOL_GPL(__put_net); 498 499 struct net *get_net_ns_by_fd(int fd) 500 { 501 struct file *file; 502 struct ns_common *ns; 503 struct net *net; 504 505 file = proc_ns_fget(fd); 506 if (IS_ERR(file)) 507 return ERR_CAST(file); 508 509 ns = get_proc_ns(file_inode(file)); 510 if (ns->ops == &netns_operations) 511 net = get_net(container_of(ns, struct net, ns)); 512 else 513 net = ERR_PTR(-EINVAL); 514 515 fput(file); 516 return net; 517 } 518 519 #else 520 struct net *get_net_ns_by_fd(int fd) 521 { 522 return ERR_PTR(-EINVAL); 523 } 524 #endif 525 EXPORT_SYMBOL_GPL(get_net_ns_by_fd); 526 527 struct net *get_net_ns_by_pid(pid_t pid) 528 { 529 struct task_struct *tsk; 530 struct net *net; 531 532 /* Lookup the network namespace */ 533 net = ERR_PTR(-ESRCH); 534 rcu_read_lock(); 535 tsk = find_task_by_vpid(pid); 536 if (tsk) { 537 struct nsproxy *nsproxy; 538 task_lock(tsk); 539 nsproxy = tsk->nsproxy; 540 if (nsproxy) 541 net = get_net(nsproxy->net_ns); 542 task_unlock(tsk); 543 } 544 rcu_read_unlock(); 545 return net; 546 } 547 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 548 549 static __net_init int net_ns_net_init(struct net *net) 550 { 551 #ifdef CONFIG_NET_NS 552 net->ns.ops = &netns_operations; 553 #endif 554 return ns_alloc_inum(&net->ns); 555 } 556 557 static __net_exit void net_ns_net_exit(struct net *net) 558 { 559 ns_free_inum(&net->ns); 560 } 561 562 static struct pernet_operations __net_initdata net_ns_ops = { 563 .init = net_ns_net_init, 564 .exit = net_ns_net_exit, 565 }; 566 567 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { 568 [NETNSA_NONE] = { .type = NLA_UNSPEC }, 569 [NETNSA_NSID] = { .type = NLA_S32 }, 570 [NETNSA_PID] = { .type = NLA_U32 }, 571 [NETNSA_FD] = { .type = NLA_U32 }, 572 }; 573 574 static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh) 575 { 576 struct net *net = sock_net(skb->sk); 577 struct nlattr *tb[NETNSA_MAX + 1]; 578 struct net *peer; 579 int nsid, err; 580 581 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 582 rtnl_net_policy); 583 if (err < 0) 584 return err; 585 if (!tb[NETNSA_NSID]) 586 return -EINVAL; 587 nsid = nla_get_s32(tb[NETNSA_NSID]); 588 589 if (tb[NETNSA_PID]) 590 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 591 else if (tb[NETNSA_FD]) 592 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 593 else 594 return -EINVAL; 595 if (IS_ERR(peer)) 596 return PTR_ERR(peer); 597 598 spin_lock_bh(&net->nsid_lock); 599 if (__peernet2id(net, peer) >= 0) { 600 spin_unlock_bh(&net->nsid_lock); 601 err = -EEXIST; 602 goto out; 603 } 604 605 err = alloc_netid(net, peer, nsid); 606 spin_unlock_bh(&net->nsid_lock); 607 if (err >= 0) { 608 rtnl_net_notifyid(net, RTM_NEWNSID, err); 609 err = 0; 610 } 611 out: 612 put_net(peer); 613 return err; 614 } 615 616 static int rtnl_net_get_size(void) 617 { 618 return NLMSG_ALIGN(sizeof(struct rtgenmsg)) 619 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ 620 ; 621 } 622 623 static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, 624 int cmd, struct net *net, int nsid) 625 { 626 struct nlmsghdr *nlh; 627 struct rtgenmsg *rth; 628 629 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); 630 if (!nlh) 631 return -EMSGSIZE; 632 633 rth = nlmsg_data(nlh); 634 rth->rtgen_family = AF_UNSPEC; 635 636 if (nla_put_s32(skb, NETNSA_NSID, nsid)) 637 goto nla_put_failure; 638 639 nlmsg_end(skb, nlh); 640 return 0; 641 642 nla_put_failure: 643 nlmsg_cancel(skb, nlh); 644 return -EMSGSIZE; 645 } 646 647 static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh) 648 { 649 struct net *net = sock_net(skb->sk); 650 struct nlattr *tb[NETNSA_MAX + 1]; 651 struct sk_buff *msg; 652 struct net *peer; 653 int err, id; 654 655 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 656 rtnl_net_policy); 657 if (err < 0) 658 return err; 659 if (tb[NETNSA_PID]) 660 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 661 else if (tb[NETNSA_FD]) 662 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 663 else 664 return -EINVAL; 665 666 if (IS_ERR(peer)) 667 return PTR_ERR(peer); 668 669 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 670 if (!msg) { 671 err = -ENOMEM; 672 goto out; 673 } 674 675 id = peernet2id(net, peer); 676 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 677 RTM_NEWNSID, net, id); 678 if (err < 0) 679 goto err_out; 680 681 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); 682 goto out; 683 684 err_out: 685 nlmsg_free(msg); 686 out: 687 put_net(peer); 688 return err; 689 } 690 691 struct rtnl_net_dump_cb { 692 struct net *net; 693 struct sk_buff *skb; 694 struct netlink_callback *cb; 695 int idx; 696 int s_idx; 697 }; 698 699 static int rtnl_net_dumpid_one(int id, void *peer, void *data) 700 { 701 struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; 702 int ret; 703 704 if (net_cb->idx < net_cb->s_idx) 705 goto cont; 706 707 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, 708 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, 709 RTM_NEWNSID, net_cb->net, id); 710 if (ret < 0) 711 return ret; 712 713 cont: 714 net_cb->idx++; 715 return 0; 716 } 717 718 static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) 719 { 720 struct net *net = sock_net(skb->sk); 721 struct rtnl_net_dump_cb net_cb = { 722 .net = net, 723 .skb = skb, 724 .cb = cb, 725 .idx = 0, 726 .s_idx = cb->args[0], 727 }; 728 729 spin_lock_bh(&net->nsid_lock); 730 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); 731 spin_unlock_bh(&net->nsid_lock); 732 733 cb->args[0] = net_cb.idx; 734 return skb->len; 735 } 736 737 static void rtnl_net_notifyid(struct net *net, int cmd, int id) 738 { 739 struct sk_buff *msg; 740 int err = -ENOMEM; 741 742 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 743 if (!msg) 744 goto out; 745 746 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id); 747 if (err < 0) 748 goto err_out; 749 750 rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); 751 return; 752 753 err_out: 754 nlmsg_free(msg); 755 out: 756 rtnl_set_sk_err(net, RTNLGRP_NSID, err); 757 } 758 759 static int __init net_ns_init(void) 760 { 761 struct net_generic *ng; 762 763 #ifdef CONFIG_NET_NS 764 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 765 SMP_CACHE_BYTES, 766 SLAB_PANIC, NULL); 767 768 /* Create workqueue for cleanup */ 769 netns_wq = create_singlethread_workqueue("netns"); 770 if (!netns_wq) 771 panic("Could not create netns workq"); 772 #endif 773 774 ng = net_alloc_generic(); 775 if (!ng) 776 panic("Could not allocate generic netns"); 777 778 rcu_assign_pointer(init_net.gen, ng); 779 780 mutex_lock(&net_mutex); 781 if (setup_net(&init_net, &init_user_ns)) 782 panic("Could not setup the initial network namespace"); 783 784 init_net_initialized = true; 785 786 rtnl_lock(); 787 list_add_tail_rcu(&init_net.list, &net_namespace_list); 788 rtnl_unlock(); 789 790 mutex_unlock(&net_mutex); 791 792 register_pernet_subsys(&net_ns_ops); 793 794 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); 795 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, 796 NULL); 797 798 return 0; 799 } 800 801 pure_initcall(net_ns_init); 802 803 #ifdef CONFIG_NET_NS 804 static int __register_pernet_operations(struct list_head *list, 805 struct pernet_operations *ops) 806 { 807 struct net *net; 808 int error; 809 LIST_HEAD(net_exit_list); 810 811 list_add_tail(&ops->list, list); 812 if (ops->init || (ops->id && ops->size)) { 813 for_each_net(net) { 814 error = ops_init(ops, net); 815 if (error) 816 goto out_undo; 817 list_add_tail(&net->exit_list, &net_exit_list); 818 } 819 } 820 return 0; 821 822 out_undo: 823 /* If I have an error cleanup all namespaces I initialized */ 824 list_del(&ops->list); 825 ops_exit_list(ops, &net_exit_list); 826 ops_free_list(ops, &net_exit_list); 827 return error; 828 } 829 830 static void __unregister_pernet_operations(struct pernet_operations *ops) 831 { 832 struct net *net; 833 LIST_HEAD(net_exit_list); 834 835 list_del(&ops->list); 836 for_each_net(net) 837 list_add_tail(&net->exit_list, &net_exit_list); 838 ops_exit_list(ops, &net_exit_list); 839 ops_free_list(ops, &net_exit_list); 840 } 841 842 #else 843 844 static int __register_pernet_operations(struct list_head *list, 845 struct pernet_operations *ops) 846 { 847 if (!init_net_initialized) { 848 list_add_tail(&ops->list, list); 849 return 0; 850 } 851 852 return ops_init(ops, &init_net); 853 } 854 855 static void __unregister_pernet_operations(struct pernet_operations *ops) 856 { 857 if (!init_net_initialized) { 858 list_del(&ops->list); 859 } else { 860 LIST_HEAD(net_exit_list); 861 list_add(&init_net.exit_list, &net_exit_list); 862 ops_exit_list(ops, &net_exit_list); 863 ops_free_list(ops, &net_exit_list); 864 } 865 } 866 867 #endif /* CONFIG_NET_NS */ 868 869 static DEFINE_IDA(net_generic_ids); 870 871 static int register_pernet_operations(struct list_head *list, 872 struct pernet_operations *ops) 873 { 874 int error; 875 876 if (ops->id) { 877 again: 878 error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id); 879 if (error < 0) { 880 if (error == -EAGAIN) { 881 ida_pre_get(&net_generic_ids, GFP_KERNEL); 882 goto again; 883 } 884 return error; 885 } 886 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); 887 } 888 error = __register_pernet_operations(list, ops); 889 if (error) { 890 rcu_barrier(); 891 if (ops->id) 892 ida_remove(&net_generic_ids, *ops->id); 893 } 894 895 return error; 896 } 897 898 static void unregister_pernet_operations(struct pernet_operations *ops) 899 { 900 901 __unregister_pernet_operations(ops); 902 rcu_barrier(); 903 if (ops->id) 904 ida_remove(&net_generic_ids, *ops->id); 905 } 906 907 /** 908 * register_pernet_subsys - register a network namespace subsystem 909 * @ops: pernet operations structure for the subsystem 910 * 911 * Register a subsystem which has init and exit functions 912 * that are called when network namespaces are created and 913 * destroyed respectively. 914 * 915 * When registered all network namespace init functions are 916 * called for every existing network namespace. Allowing kernel 917 * modules to have a race free view of the set of network namespaces. 918 * 919 * When a new network namespace is created all of the init 920 * methods are called in the order in which they were registered. 921 * 922 * When a network namespace is destroyed all of the exit methods 923 * are called in the reverse of the order with which they were 924 * registered. 925 */ 926 int register_pernet_subsys(struct pernet_operations *ops) 927 { 928 int error; 929 mutex_lock(&net_mutex); 930 error = register_pernet_operations(first_device, ops); 931 mutex_unlock(&net_mutex); 932 return error; 933 } 934 EXPORT_SYMBOL_GPL(register_pernet_subsys); 935 936 /** 937 * unregister_pernet_subsys - unregister a network namespace subsystem 938 * @ops: pernet operations structure to manipulate 939 * 940 * Remove the pernet operations structure from the list to be 941 * used when network namespaces are created or destroyed. In 942 * addition run the exit method for all existing network 943 * namespaces. 944 */ 945 void unregister_pernet_subsys(struct pernet_operations *ops) 946 { 947 mutex_lock(&net_mutex); 948 unregister_pernet_operations(ops); 949 mutex_unlock(&net_mutex); 950 } 951 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 952 953 /** 954 * register_pernet_device - register a network namespace device 955 * @ops: pernet operations structure for the subsystem 956 * 957 * Register a device which has init and exit functions 958 * that are called when network namespaces are created and 959 * destroyed respectively. 960 * 961 * When registered all network namespace init functions are 962 * called for every existing network namespace. Allowing kernel 963 * modules to have a race free view of the set of network namespaces. 964 * 965 * When a new network namespace is created all of the init 966 * methods are called in the order in which they were registered. 967 * 968 * When a network namespace is destroyed all of the exit methods 969 * are called in the reverse of the order with which they were 970 * registered. 971 */ 972 int register_pernet_device(struct pernet_operations *ops) 973 { 974 int error; 975 mutex_lock(&net_mutex); 976 error = register_pernet_operations(&pernet_list, ops); 977 if (!error && (first_device == &pernet_list)) 978 first_device = &ops->list; 979 mutex_unlock(&net_mutex); 980 return error; 981 } 982 EXPORT_SYMBOL_GPL(register_pernet_device); 983 984 /** 985 * unregister_pernet_device - unregister a network namespace netdevice 986 * @ops: pernet operations structure to manipulate 987 * 988 * Remove the pernet operations structure from the list to be 989 * used when network namespaces are created or destroyed. In 990 * addition run the exit method for all existing network 991 * namespaces. 992 */ 993 void unregister_pernet_device(struct pernet_operations *ops) 994 { 995 mutex_lock(&net_mutex); 996 if (&ops->list == first_device) 997 first_device = first_device->next; 998 unregister_pernet_operations(ops); 999 mutex_unlock(&net_mutex); 1000 } 1001 EXPORT_SYMBOL_GPL(unregister_pernet_device); 1002 1003 #ifdef CONFIG_NET_NS 1004 static struct ns_common *netns_get(struct task_struct *task) 1005 { 1006 struct net *net = NULL; 1007 struct nsproxy *nsproxy; 1008 1009 task_lock(task); 1010 nsproxy = task->nsproxy; 1011 if (nsproxy) 1012 net = get_net(nsproxy->net_ns); 1013 task_unlock(task); 1014 1015 return net ? &net->ns : NULL; 1016 } 1017 1018 static inline struct net *to_net_ns(struct ns_common *ns) 1019 { 1020 return container_of(ns, struct net, ns); 1021 } 1022 1023 static void netns_put(struct ns_common *ns) 1024 { 1025 put_net(to_net_ns(ns)); 1026 } 1027 1028 static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1029 { 1030 struct net *net = to_net_ns(ns); 1031 1032 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 1033 !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 1034 return -EPERM; 1035 1036 put_net(nsproxy->net_ns); 1037 nsproxy->net_ns = get_net(net); 1038 return 0; 1039 } 1040 1041 static struct user_namespace *netns_owner(struct ns_common *ns) 1042 { 1043 return to_net_ns(ns)->user_ns; 1044 } 1045 1046 const struct proc_ns_operations netns_operations = { 1047 .name = "net", 1048 .type = CLONE_NEWNET, 1049 .get = netns_get, 1050 .put = netns_put, 1051 .install = netns_install, 1052 .owner = netns_owner, 1053 }; 1054 #endif 1055