1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/workqueue.h> 4 #include <linux/rtnetlink.h> 5 #include <linux/cache.h> 6 #include <linux/slab.h> 7 #include <linux/list.h> 8 #include <linux/delay.h> 9 #include <linux/sched.h> 10 #include <linux/idr.h> 11 #include <linux/rculist.h> 12 #include <linux/nsproxy.h> 13 #include <linux/fs.h> 14 #include <linux/proc_ns.h> 15 #include <linux/file.h> 16 #include <linux/export.h> 17 #include <linux/user_namespace.h> 18 #include <linux/net_namespace.h> 19 #include <linux/sched/task.h> 20 21 #include <net/sock.h> 22 #include <net/netlink.h> 23 #include <net/net_namespace.h> 24 #include <net/netns/generic.h> 25 26 /* 27 * Our network namespace constructor/destructor lists 28 */ 29 30 static LIST_HEAD(pernet_list); 31 static struct list_head *first_device = &pernet_list; 32 DEFINE_MUTEX(net_mutex); 33 34 LIST_HEAD(net_namespace_list); 35 EXPORT_SYMBOL_GPL(net_namespace_list); 36 37 struct net init_net = { 38 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), 39 }; 40 EXPORT_SYMBOL(init_net); 41 42 static bool init_net_initialized; 43 44 #define MIN_PERNET_OPS_ID \ 45 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) 46 47 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 48 49 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 50 51 static struct net_generic *net_alloc_generic(void) 52 { 53 struct net_generic *ng; 54 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 55 56 ng = kzalloc(generic_size, GFP_KERNEL); 57 if (ng) 58 ng->s.len = max_gen_ptrs; 59 60 return ng; 61 } 62 63 static int net_assign_generic(struct net *net, unsigned int id, void *data) 64 { 65 struct net_generic *ng, *old_ng; 66 67 BUG_ON(!mutex_is_locked(&net_mutex)); 68 BUG_ON(id < MIN_PERNET_OPS_ID); 69 70 old_ng = rcu_dereference_protected(net->gen, 71 lockdep_is_held(&net_mutex)); 72 if (old_ng->s.len > id) { 73 old_ng->ptr[id] = data; 74 return 0; 75 } 76 77 ng = net_alloc_generic(); 78 if (ng == NULL) 79 return -ENOMEM; 80 81 /* 82 * Some synchronisation notes: 83 * 84 * The net_generic explores the net->gen array inside rcu 85 * read section. Besides once set the net->gen->ptr[x] 86 * pointer never changes (see rules in netns/generic.h). 87 * 88 * That said, we simply duplicate this array and schedule 89 * the old copy for kfree after a grace period. 90 */ 91 92 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], 93 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); 94 ng->ptr[id] = data; 95 96 rcu_assign_pointer(net->gen, ng); 97 kfree_rcu(old_ng, s.rcu); 98 return 0; 99 } 100 101 static int ops_init(const struct pernet_operations *ops, struct net *net) 102 { 103 int err = -ENOMEM; 104 void *data = NULL; 105 106 if (ops->id && ops->size) { 107 data = kzalloc(ops->size, GFP_KERNEL); 108 if (!data) 109 goto out; 110 111 err = net_assign_generic(net, *ops->id, data); 112 if (err) 113 goto cleanup; 114 } 115 err = 0; 116 if (ops->init) 117 err = ops->init(net); 118 if (!err) 119 return 0; 120 121 cleanup: 122 kfree(data); 123 124 out: 125 return err; 126 } 127 128 static void ops_free(const struct pernet_operations *ops, struct net *net) 129 { 130 if (ops->id && ops->size) { 131 kfree(net_generic(net, *ops->id)); 132 } 133 } 134 135 static void ops_exit_list(const struct pernet_operations *ops, 136 struct list_head *net_exit_list) 137 { 138 struct net *net; 139 if (ops->exit) { 140 list_for_each_entry(net, net_exit_list, exit_list) 141 ops->exit(net); 142 } 143 if (ops->exit_batch) 144 ops->exit_batch(net_exit_list); 145 } 146 147 static void ops_free_list(const struct pernet_operations *ops, 148 struct list_head *net_exit_list) 149 { 150 struct net *net; 151 if (ops->size && ops->id) { 152 list_for_each_entry(net, net_exit_list, exit_list) 153 ops_free(ops, net); 154 } 155 } 156 157 /* should be called with nsid_lock held */ 158 static int alloc_netid(struct net *net, struct net *peer, int reqid) 159 { 160 int min = 0, max = 0; 161 162 if (reqid >= 0) { 163 min = reqid; 164 max = reqid + 1; 165 } 166 167 return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); 168 } 169 170 /* This function is used by idr_for_each(). If net is equal to peer, the 171 * function returns the id so that idr_for_each() stops. Because we cannot 172 * returns the id 0 (idr_for_each() will not stop), we return the magic value 173 * NET_ID_ZERO (-1) for it. 174 */ 175 #define NET_ID_ZERO -1 176 static int net_eq_idr(int id, void *net, void *peer) 177 { 178 if (net_eq(net, peer)) 179 return id ? : NET_ID_ZERO; 180 return 0; 181 } 182 183 /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc 184 * is set to true, thus the caller knows that the new id must be notified via 185 * rtnl. 186 */ 187 static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) 188 { 189 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); 190 bool alloc_it = *alloc; 191 192 *alloc = false; 193 194 /* Magic value for id 0. */ 195 if (id == NET_ID_ZERO) 196 return 0; 197 if (id > 0) 198 return id; 199 200 if (alloc_it) { 201 id = alloc_netid(net, peer, -1); 202 *alloc = true; 203 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; 204 } 205 206 return NETNSA_NSID_NOT_ASSIGNED; 207 } 208 209 /* should be called with nsid_lock held */ 210 static int __peernet2id(struct net *net, struct net *peer) 211 { 212 bool no = false; 213 214 return __peernet2id_alloc(net, peer, &no); 215 } 216 217 static void rtnl_net_notifyid(struct net *net, int cmd, int id); 218 /* This function returns the id of a peer netns. If no id is assigned, one will 219 * be allocated and returned. 220 */ 221 int peernet2id_alloc(struct net *net, struct net *peer) 222 { 223 bool alloc; 224 int id; 225 226 if (atomic_read(&net->count) == 0) 227 return NETNSA_NSID_NOT_ASSIGNED; 228 spin_lock_bh(&net->nsid_lock); 229 alloc = atomic_read(&peer->count) == 0 ? false : true; 230 id = __peernet2id_alloc(net, peer, &alloc); 231 spin_unlock_bh(&net->nsid_lock); 232 if (alloc && id >= 0) 233 rtnl_net_notifyid(net, RTM_NEWNSID, id); 234 return id; 235 } 236 237 /* This function returns, if assigned, the id of a peer netns. */ 238 int peernet2id(struct net *net, struct net *peer) 239 { 240 int id; 241 242 spin_lock_bh(&net->nsid_lock); 243 id = __peernet2id(net, peer); 244 spin_unlock_bh(&net->nsid_lock); 245 return id; 246 } 247 EXPORT_SYMBOL(peernet2id); 248 249 /* This function returns true is the peer netns has an id assigned into the 250 * current netns. 251 */ 252 bool peernet_has_id(struct net *net, struct net *peer) 253 { 254 return peernet2id(net, peer) >= 0; 255 } 256 257 struct net *get_net_ns_by_id(struct net *net, int id) 258 { 259 struct net *peer; 260 261 if (id < 0) 262 return NULL; 263 264 rcu_read_lock(); 265 spin_lock_bh(&net->nsid_lock); 266 peer = idr_find(&net->netns_ids, id); 267 if (peer) 268 get_net(peer); 269 spin_unlock_bh(&net->nsid_lock); 270 rcu_read_unlock(); 271 272 return peer; 273 } 274 275 /* 276 * setup_net runs the initializers for the network namespace object. 277 */ 278 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) 279 { 280 /* Must be called with net_mutex held */ 281 const struct pernet_operations *ops, *saved_ops; 282 int error = 0; 283 LIST_HEAD(net_exit_list); 284 285 atomic_set(&net->count, 1); 286 atomic_set(&net->passive, 1); 287 net->dev_base_seq = 1; 288 net->user_ns = user_ns; 289 idr_init(&net->netns_ids); 290 spin_lock_init(&net->nsid_lock); 291 292 list_for_each_entry(ops, &pernet_list, list) { 293 error = ops_init(ops, net); 294 if (error < 0) 295 goto out_undo; 296 } 297 out: 298 return error; 299 300 out_undo: 301 /* Walk through the list backwards calling the exit functions 302 * for the pernet modules whose init functions did not fail. 303 */ 304 list_add(&net->exit_list, &net_exit_list); 305 saved_ops = ops; 306 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 307 ops_exit_list(ops, &net_exit_list); 308 309 ops = saved_ops; 310 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 311 ops_free_list(ops, &net_exit_list); 312 313 rcu_barrier(); 314 goto out; 315 } 316 317 318 #ifdef CONFIG_NET_NS 319 static struct ucounts *inc_net_namespaces(struct user_namespace *ns) 320 { 321 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); 322 } 323 324 static void dec_net_namespaces(struct ucounts *ucounts) 325 { 326 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); 327 } 328 329 static struct kmem_cache *net_cachep; 330 static struct workqueue_struct *netns_wq; 331 332 static struct net *net_alloc(void) 333 { 334 struct net *net = NULL; 335 struct net_generic *ng; 336 337 ng = net_alloc_generic(); 338 if (!ng) 339 goto out; 340 341 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 342 if (!net) 343 goto out_free; 344 345 rcu_assign_pointer(net->gen, ng); 346 out: 347 return net; 348 349 out_free: 350 kfree(ng); 351 goto out; 352 } 353 354 static void net_free(struct net *net) 355 { 356 kfree(rcu_access_pointer(net->gen)); 357 kmem_cache_free(net_cachep, net); 358 } 359 360 void net_drop_ns(void *p) 361 { 362 struct net *ns = p; 363 if (ns && atomic_dec_and_test(&ns->passive)) 364 net_free(ns); 365 } 366 367 struct net *copy_net_ns(unsigned long flags, 368 struct user_namespace *user_ns, struct net *old_net) 369 { 370 struct ucounts *ucounts; 371 struct net *net; 372 int rv; 373 374 if (!(flags & CLONE_NEWNET)) 375 return get_net(old_net); 376 377 ucounts = inc_net_namespaces(user_ns); 378 if (!ucounts) 379 return ERR_PTR(-ENOSPC); 380 381 net = net_alloc(); 382 if (!net) { 383 dec_net_namespaces(ucounts); 384 return ERR_PTR(-ENOMEM); 385 } 386 387 get_user_ns(user_ns); 388 389 rv = mutex_lock_killable(&net_mutex); 390 if (rv < 0) { 391 net_free(net); 392 dec_net_namespaces(ucounts); 393 put_user_ns(user_ns); 394 return ERR_PTR(rv); 395 } 396 397 net->ucounts = ucounts; 398 rv = setup_net(net, user_ns); 399 if (rv == 0) { 400 rtnl_lock(); 401 list_add_tail_rcu(&net->list, &net_namespace_list); 402 rtnl_unlock(); 403 } 404 mutex_unlock(&net_mutex); 405 if (rv < 0) { 406 dec_net_namespaces(ucounts); 407 put_user_ns(user_ns); 408 net_drop_ns(net); 409 return ERR_PTR(rv); 410 } 411 return net; 412 } 413 414 static DEFINE_SPINLOCK(cleanup_list_lock); 415 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 416 417 static void cleanup_net(struct work_struct *work) 418 { 419 const struct pernet_operations *ops; 420 struct net *net, *tmp; 421 struct list_head net_kill_list; 422 LIST_HEAD(net_exit_list); 423 424 /* Atomically snapshot the list of namespaces to cleanup */ 425 spin_lock_irq(&cleanup_list_lock); 426 list_replace_init(&cleanup_list, &net_kill_list); 427 spin_unlock_irq(&cleanup_list_lock); 428 429 mutex_lock(&net_mutex); 430 431 /* Don't let anyone else find us. */ 432 rtnl_lock(); 433 list_for_each_entry(net, &net_kill_list, cleanup_list) { 434 list_del_rcu(&net->list); 435 list_add_tail(&net->exit_list, &net_exit_list); 436 for_each_net(tmp) { 437 int id; 438 439 spin_lock_bh(&tmp->nsid_lock); 440 id = __peernet2id(tmp, net); 441 if (id >= 0) 442 idr_remove(&tmp->netns_ids, id); 443 spin_unlock_bh(&tmp->nsid_lock); 444 if (id >= 0) 445 rtnl_net_notifyid(tmp, RTM_DELNSID, id); 446 } 447 spin_lock_bh(&net->nsid_lock); 448 idr_destroy(&net->netns_ids); 449 spin_unlock_bh(&net->nsid_lock); 450 451 } 452 rtnl_unlock(); 453 454 /* 455 * Another CPU might be rcu-iterating the list, wait for it. 456 * This needs to be before calling the exit() notifiers, so 457 * the rcu_barrier() below isn't sufficient alone. 458 */ 459 synchronize_rcu(); 460 461 /* Run all of the network namespace exit methods */ 462 list_for_each_entry_reverse(ops, &pernet_list, list) 463 ops_exit_list(ops, &net_exit_list); 464 465 /* Free the net generic variables */ 466 list_for_each_entry_reverse(ops, &pernet_list, list) 467 ops_free_list(ops, &net_exit_list); 468 469 mutex_unlock(&net_mutex); 470 471 /* Ensure there are no outstanding rcu callbacks using this 472 * network namespace. 473 */ 474 rcu_barrier(); 475 476 /* Finally it is safe to free my network namespace structure */ 477 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 478 list_del_init(&net->exit_list); 479 dec_net_namespaces(net->ucounts); 480 put_user_ns(net->user_ns); 481 net_drop_ns(net); 482 } 483 } 484 static DECLARE_WORK(net_cleanup_work, cleanup_net); 485 486 void __put_net(struct net *net) 487 { 488 /* Cleanup the network namespace in process context */ 489 unsigned long flags; 490 491 spin_lock_irqsave(&cleanup_list_lock, flags); 492 list_add(&net->cleanup_list, &cleanup_list); 493 spin_unlock_irqrestore(&cleanup_list_lock, flags); 494 495 queue_work(netns_wq, &net_cleanup_work); 496 } 497 EXPORT_SYMBOL_GPL(__put_net); 498 499 struct net *get_net_ns_by_fd(int fd) 500 { 501 struct file *file; 502 struct ns_common *ns; 503 struct net *net; 504 505 file = proc_ns_fget(fd); 506 if (IS_ERR(file)) 507 return ERR_CAST(file); 508 509 ns = get_proc_ns(file_inode(file)); 510 if (ns->ops == &netns_operations) 511 net = get_net(container_of(ns, struct net, ns)); 512 else 513 net = ERR_PTR(-EINVAL); 514 515 fput(file); 516 return net; 517 } 518 519 #else 520 struct net *get_net_ns_by_fd(int fd) 521 { 522 return ERR_PTR(-EINVAL); 523 } 524 #endif 525 EXPORT_SYMBOL_GPL(get_net_ns_by_fd); 526 527 struct net *get_net_ns_by_pid(pid_t pid) 528 { 529 struct task_struct *tsk; 530 struct net *net; 531 532 /* Lookup the network namespace */ 533 net = ERR_PTR(-ESRCH); 534 rcu_read_lock(); 535 tsk = find_task_by_vpid(pid); 536 if (tsk) { 537 struct nsproxy *nsproxy; 538 task_lock(tsk); 539 nsproxy = tsk->nsproxy; 540 if (nsproxy) 541 net = get_net(nsproxy->net_ns); 542 task_unlock(tsk); 543 } 544 rcu_read_unlock(); 545 return net; 546 } 547 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 548 549 static __net_init int net_ns_net_init(struct net *net) 550 { 551 #ifdef CONFIG_NET_NS 552 net->ns.ops = &netns_operations; 553 #endif 554 return ns_alloc_inum(&net->ns); 555 } 556 557 static __net_exit void net_ns_net_exit(struct net *net) 558 { 559 ns_free_inum(&net->ns); 560 } 561 562 static struct pernet_operations __net_initdata net_ns_ops = { 563 .init = net_ns_net_init, 564 .exit = net_ns_net_exit, 565 }; 566 567 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { 568 [NETNSA_NONE] = { .type = NLA_UNSPEC }, 569 [NETNSA_NSID] = { .type = NLA_S32 }, 570 [NETNSA_PID] = { .type = NLA_U32 }, 571 [NETNSA_FD] = { .type = NLA_U32 }, 572 }; 573 574 static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, 575 struct netlink_ext_ack *extack) 576 { 577 struct net *net = sock_net(skb->sk); 578 struct nlattr *tb[NETNSA_MAX + 1]; 579 struct net *peer; 580 int nsid, err; 581 582 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 583 rtnl_net_policy, extack); 584 if (err < 0) 585 return err; 586 if (!tb[NETNSA_NSID]) 587 return -EINVAL; 588 nsid = nla_get_s32(tb[NETNSA_NSID]); 589 590 if (tb[NETNSA_PID]) 591 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 592 else if (tb[NETNSA_FD]) 593 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 594 else 595 return -EINVAL; 596 if (IS_ERR(peer)) 597 return PTR_ERR(peer); 598 599 spin_lock_bh(&net->nsid_lock); 600 if (__peernet2id(net, peer) >= 0) { 601 spin_unlock_bh(&net->nsid_lock); 602 err = -EEXIST; 603 goto out; 604 } 605 606 err = alloc_netid(net, peer, nsid); 607 spin_unlock_bh(&net->nsid_lock); 608 if (err >= 0) { 609 rtnl_net_notifyid(net, RTM_NEWNSID, err); 610 err = 0; 611 } 612 out: 613 put_net(peer); 614 return err; 615 } 616 617 static int rtnl_net_get_size(void) 618 { 619 return NLMSG_ALIGN(sizeof(struct rtgenmsg)) 620 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ 621 ; 622 } 623 624 static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, 625 int cmd, struct net *net, int nsid) 626 { 627 struct nlmsghdr *nlh; 628 struct rtgenmsg *rth; 629 630 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); 631 if (!nlh) 632 return -EMSGSIZE; 633 634 rth = nlmsg_data(nlh); 635 rth->rtgen_family = AF_UNSPEC; 636 637 if (nla_put_s32(skb, NETNSA_NSID, nsid)) 638 goto nla_put_failure; 639 640 nlmsg_end(skb, nlh); 641 return 0; 642 643 nla_put_failure: 644 nlmsg_cancel(skb, nlh); 645 return -EMSGSIZE; 646 } 647 648 static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, 649 struct netlink_ext_ack *extack) 650 { 651 struct net *net = sock_net(skb->sk); 652 struct nlattr *tb[NETNSA_MAX + 1]; 653 struct sk_buff *msg; 654 struct net *peer; 655 int err, id; 656 657 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 658 rtnl_net_policy, extack); 659 if (err < 0) 660 return err; 661 if (tb[NETNSA_PID]) 662 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 663 else if (tb[NETNSA_FD]) 664 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 665 else 666 return -EINVAL; 667 668 if (IS_ERR(peer)) 669 return PTR_ERR(peer); 670 671 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 672 if (!msg) { 673 err = -ENOMEM; 674 goto out; 675 } 676 677 id = peernet2id(net, peer); 678 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 679 RTM_NEWNSID, net, id); 680 if (err < 0) 681 goto err_out; 682 683 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); 684 goto out; 685 686 err_out: 687 nlmsg_free(msg); 688 out: 689 put_net(peer); 690 return err; 691 } 692 693 struct rtnl_net_dump_cb { 694 struct net *net; 695 struct sk_buff *skb; 696 struct netlink_callback *cb; 697 int idx; 698 int s_idx; 699 }; 700 701 static int rtnl_net_dumpid_one(int id, void *peer, void *data) 702 { 703 struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; 704 int ret; 705 706 if (net_cb->idx < net_cb->s_idx) 707 goto cont; 708 709 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, 710 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, 711 RTM_NEWNSID, net_cb->net, id); 712 if (ret < 0) 713 return ret; 714 715 cont: 716 net_cb->idx++; 717 return 0; 718 } 719 720 static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) 721 { 722 struct net *net = sock_net(skb->sk); 723 struct rtnl_net_dump_cb net_cb = { 724 .net = net, 725 .skb = skb, 726 .cb = cb, 727 .idx = 0, 728 .s_idx = cb->args[0], 729 }; 730 731 spin_lock_bh(&net->nsid_lock); 732 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); 733 spin_unlock_bh(&net->nsid_lock); 734 735 cb->args[0] = net_cb.idx; 736 return skb->len; 737 } 738 739 static void rtnl_net_notifyid(struct net *net, int cmd, int id) 740 { 741 struct sk_buff *msg; 742 int err = -ENOMEM; 743 744 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 745 if (!msg) 746 goto out; 747 748 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id); 749 if (err < 0) 750 goto err_out; 751 752 rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); 753 return; 754 755 err_out: 756 nlmsg_free(msg); 757 out: 758 rtnl_set_sk_err(net, RTNLGRP_NSID, err); 759 } 760 761 static int __init net_ns_init(void) 762 { 763 struct net_generic *ng; 764 765 #ifdef CONFIG_NET_NS 766 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 767 SMP_CACHE_BYTES, 768 SLAB_PANIC, NULL); 769 770 /* Create workqueue for cleanup */ 771 netns_wq = create_singlethread_workqueue("netns"); 772 if (!netns_wq) 773 panic("Could not create netns workq"); 774 #endif 775 776 ng = net_alloc_generic(); 777 if (!ng) 778 panic("Could not allocate generic netns"); 779 780 rcu_assign_pointer(init_net.gen, ng); 781 782 mutex_lock(&net_mutex); 783 if (setup_net(&init_net, &init_user_ns)) 784 panic("Could not setup the initial network namespace"); 785 786 init_net_initialized = true; 787 788 rtnl_lock(); 789 list_add_tail_rcu(&init_net.list, &net_namespace_list); 790 rtnl_unlock(); 791 792 mutex_unlock(&net_mutex); 793 794 register_pernet_subsys(&net_ns_ops); 795 796 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); 797 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, 798 NULL); 799 800 return 0; 801 } 802 803 pure_initcall(net_ns_init); 804 805 #ifdef CONFIG_NET_NS 806 static int __register_pernet_operations(struct list_head *list, 807 struct pernet_operations *ops) 808 { 809 struct net *net; 810 int error; 811 LIST_HEAD(net_exit_list); 812 813 list_add_tail(&ops->list, list); 814 if (ops->init || (ops->id && ops->size)) { 815 for_each_net(net) { 816 error = ops_init(ops, net); 817 if (error) 818 goto out_undo; 819 list_add_tail(&net->exit_list, &net_exit_list); 820 } 821 } 822 return 0; 823 824 out_undo: 825 /* If I have an error cleanup all namespaces I initialized */ 826 list_del(&ops->list); 827 ops_exit_list(ops, &net_exit_list); 828 ops_free_list(ops, &net_exit_list); 829 return error; 830 } 831 832 static void __unregister_pernet_operations(struct pernet_operations *ops) 833 { 834 struct net *net; 835 LIST_HEAD(net_exit_list); 836 837 list_del(&ops->list); 838 for_each_net(net) 839 list_add_tail(&net->exit_list, &net_exit_list); 840 ops_exit_list(ops, &net_exit_list); 841 ops_free_list(ops, &net_exit_list); 842 } 843 844 #else 845 846 static int __register_pernet_operations(struct list_head *list, 847 struct pernet_operations *ops) 848 { 849 if (!init_net_initialized) { 850 list_add_tail(&ops->list, list); 851 return 0; 852 } 853 854 return ops_init(ops, &init_net); 855 } 856 857 static void __unregister_pernet_operations(struct pernet_operations *ops) 858 { 859 if (!init_net_initialized) { 860 list_del(&ops->list); 861 } else { 862 LIST_HEAD(net_exit_list); 863 list_add(&init_net.exit_list, &net_exit_list); 864 ops_exit_list(ops, &net_exit_list); 865 ops_free_list(ops, &net_exit_list); 866 } 867 } 868 869 #endif /* CONFIG_NET_NS */ 870 871 static DEFINE_IDA(net_generic_ids); 872 873 static int register_pernet_operations(struct list_head *list, 874 struct pernet_operations *ops) 875 { 876 int error; 877 878 if (ops->id) { 879 again: 880 error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id); 881 if (error < 0) { 882 if (error == -EAGAIN) { 883 ida_pre_get(&net_generic_ids, GFP_KERNEL); 884 goto again; 885 } 886 return error; 887 } 888 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); 889 } 890 error = __register_pernet_operations(list, ops); 891 if (error) { 892 rcu_barrier(); 893 if (ops->id) 894 ida_remove(&net_generic_ids, *ops->id); 895 } 896 897 return error; 898 } 899 900 static void unregister_pernet_operations(struct pernet_operations *ops) 901 { 902 903 __unregister_pernet_operations(ops); 904 rcu_barrier(); 905 if (ops->id) 906 ida_remove(&net_generic_ids, *ops->id); 907 } 908 909 /** 910 * register_pernet_subsys - register a network namespace subsystem 911 * @ops: pernet operations structure for the subsystem 912 * 913 * Register a subsystem which has init and exit functions 914 * that are called when network namespaces are created and 915 * destroyed respectively. 916 * 917 * When registered all network namespace init functions are 918 * called for every existing network namespace. Allowing kernel 919 * modules to have a race free view of the set of network namespaces. 920 * 921 * When a new network namespace is created all of the init 922 * methods are called in the order in which they were registered. 923 * 924 * When a network namespace is destroyed all of the exit methods 925 * are called in the reverse of the order with which they were 926 * registered. 927 */ 928 int register_pernet_subsys(struct pernet_operations *ops) 929 { 930 int error; 931 mutex_lock(&net_mutex); 932 error = register_pernet_operations(first_device, ops); 933 mutex_unlock(&net_mutex); 934 return error; 935 } 936 EXPORT_SYMBOL_GPL(register_pernet_subsys); 937 938 /** 939 * unregister_pernet_subsys - unregister a network namespace subsystem 940 * @ops: pernet operations structure to manipulate 941 * 942 * Remove the pernet operations structure from the list to be 943 * used when network namespaces are created or destroyed. In 944 * addition run the exit method for all existing network 945 * namespaces. 946 */ 947 void unregister_pernet_subsys(struct pernet_operations *ops) 948 { 949 mutex_lock(&net_mutex); 950 unregister_pernet_operations(ops); 951 mutex_unlock(&net_mutex); 952 } 953 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 954 955 /** 956 * register_pernet_device - register a network namespace device 957 * @ops: pernet operations structure for the subsystem 958 * 959 * Register a device which has init and exit functions 960 * that are called when network namespaces are created and 961 * destroyed respectively. 962 * 963 * When registered all network namespace init functions are 964 * called for every existing network namespace. Allowing kernel 965 * modules to have a race free view of the set of network namespaces. 966 * 967 * When a new network namespace is created all of the init 968 * methods are called in the order in which they were registered. 969 * 970 * When a network namespace is destroyed all of the exit methods 971 * are called in the reverse of the order with which they were 972 * registered. 973 */ 974 int register_pernet_device(struct pernet_operations *ops) 975 { 976 int error; 977 mutex_lock(&net_mutex); 978 error = register_pernet_operations(&pernet_list, ops); 979 if (!error && (first_device == &pernet_list)) 980 first_device = &ops->list; 981 mutex_unlock(&net_mutex); 982 return error; 983 } 984 EXPORT_SYMBOL_GPL(register_pernet_device); 985 986 /** 987 * unregister_pernet_device - unregister a network namespace netdevice 988 * @ops: pernet operations structure to manipulate 989 * 990 * Remove the pernet operations structure from the list to be 991 * used when network namespaces are created or destroyed. In 992 * addition run the exit method for all existing network 993 * namespaces. 994 */ 995 void unregister_pernet_device(struct pernet_operations *ops) 996 { 997 mutex_lock(&net_mutex); 998 if (&ops->list == first_device) 999 first_device = first_device->next; 1000 unregister_pernet_operations(ops); 1001 mutex_unlock(&net_mutex); 1002 } 1003 EXPORT_SYMBOL_GPL(unregister_pernet_device); 1004 1005 #ifdef CONFIG_NET_NS 1006 static struct ns_common *netns_get(struct task_struct *task) 1007 { 1008 struct net *net = NULL; 1009 struct nsproxy *nsproxy; 1010 1011 task_lock(task); 1012 nsproxy = task->nsproxy; 1013 if (nsproxy) 1014 net = get_net(nsproxy->net_ns); 1015 task_unlock(task); 1016 1017 return net ? &net->ns : NULL; 1018 } 1019 1020 static inline struct net *to_net_ns(struct ns_common *ns) 1021 { 1022 return container_of(ns, struct net, ns); 1023 } 1024 1025 static void netns_put(struct ns_common *ns) 1026 { 1027 put_net(to_net_ns(ns)); 1028 } 1029 1030 static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1031 { 1032 struct net *net = to_net_ns(ns); 1033 1034 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 1035 !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 1036 return -EPERM; 1037 1038 put_net(nsproxy->net_ns); 1039 nsproxy->net_ns = get_net(net); 1040 return 0; 1041 } 1042 1043 static struct user_namespace *netns_owner(struct ns_common *ns) 1044 { 1045 return to_net_ns(ns)->user_ns; 1046 } 1047 1048 const struct proc_ns_operations netns_operations = { 1049 .name = "net", 1050 .type = CLONE_NEWNET, 1051 .get = netns_get, 1052 .put = netns_put, 1053 .install = netns_install, 1054 .owner = netns_owner, 1055 }; 1056 #endif 1057