1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/workqueue.h> 4 #include <linux/rtnetlink.h> 5 #include <linux/cache.h> 6 #include <linux/slab.h> 7 #include <linux/list.h> 8 #include <linux/delay.h> 9 #include <linux/sched.h> 10 #include <linux/idr.h> 11 #include <linux/rculist.h> 12 #include <linux/nsproxy.h> 13 #include <linux/fs.h> 14 #include <linux/proc_ns.h> 15 #include <linux/file.h> 16 #include <linux/export.h> 17 #include <linux/user_namespace.h> 18 #include <linux/net_namespace.h> 19 #include <linux/sched/task.h> 20 21 #include <net/sock.h> 22 #include <net/netlink.h> 23 #include <net/net_namespace.h> 24 #include <net/netns/generic.h> 25 26 /* 27 * Our network namespace constructor/destructor lists 28 */ 29 30 static LIST_HEAD(pernet_list); 31 static struct list_head *first_device = &pernet_list; 32 DEFINE_MUTEX(net_mutex); 33 34 LIST_HEAD(net_namespace_list); 35 EXPORT_SYMBOL_GPL(net_namespace_list); 36 37 struct net init_net = { 38 .count = ATOMIC_INIT(1), 39 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), 40 }; 41 EXPORT_SYMBOL(init_net); 42 43 static bool init_net_initialized; 44 45 #define MIN_PERNET_OPS_ID \ 46 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) 47 48 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 49 50 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 51 52 static struct net_generic *net_alloc_generic(void) 53 { 54 struct net_generic *ng; 55 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 56 57 ng = kzalloc(generic_size, GFP_KERNEL); 58 if (ng) 59 ng->s.len = max_gen_ptrs; 60 61 return ng; 62 } 63 64 static int net_assign_generic(struct net *net, unsigned int id, void *data) 65 { 66 struct net_generic *ng, *old_ng; 67 68 BUG_ON(!mutex_is_locked(&net_mutex)); 69 BUG_ON(id < MIN_PERNET_OPS_ID); 70 71 old_ng = rcu_dereference_protected(net->gen, 72 lockdep_is_held(&net_mutex)); 73 if (old_ng->s.len > id) { 74 old_ng->ptr[id] = data; 75 return 0; 76 } 77 78 ng = net_alloc_generic(); 79 if (ng == NULL) 80 return -ENOMEM; 81 82 /* 83 * Some synchronisation notes: 84 * 85 * The net_generic explores the net->gen array inside rcu 86 * read section. Besides once set the net->gen->ptr[x] 87 * pointer never changes (see rules in netns/generic.h). 88 * 89 * That said, we simply duplicate this array and schedule 90 * the old copy for kfree after a grace period. 91 */ 92 93 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], 94 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); 95 ng->ptr[id] = data; 96 97 rcu_assign_pointer(net->gen, ng); 98 kfree_rcu(old_ng, s.rcu); 99 return 0; 100 } 101 102 static int ops_init(const struct pernet_operations *ops, struct net *net) 103 { 104 int err = -ENOMEM; 105 void *data = NULL; 106 107 if (ops->id && ops->size) { 108 data = kzalloc(ops->size, GFP_KERNEL); 109 if (!data) 110 goto out; 111 112 err = net_assign_generic(net, *ops->id, data); 113 if (err) 114 goto cleanup; 115 } 116 err = 0; 117 if (ops->init) 118 err = ops->init(net); 119 if (!err) 120 return 0; 121 122 cleanup: 123 kfree(data); 124 125 out: 126 return err; 127 } 128 129 static void ops_free(const struct pernet_operations *ops, struct net *net) 130 { 131 if (ops->id && ops->size) { 132 kfree(net_generic(net, *ops->id)); 133 } 134 } 135 136 static void ops_exit_list(const struct pernet_operations *ops, 137 struct list_head *net_exit_list) 138 { 139 struct net *net; 140 if (ops->exit) { 141 list_for_each_entry(net, net_exit_list, exit_list) 142 ops->exit(net); 143 } 144 if (ops->exit_batch) 145 ops->exit_batch(net_exit_list); 146 } 147 148 static void ops_free_list(const struct pernet_operations *ops, 149 struct list_head *net_exit_list) 150 { 151 struct net *net; 152 if (ops->size && ops->id) { 153 list_for_each_entry(net, net_exit_list, exit_list) 154 ops_free(ops, net); 155 } 156 } 157 158 /* should be called with nsid_lock held */ 159 static int alloc_netid(struct net *net, struct net *peer, int reqid) 160 { 161 int min = 0, max = 0; 162 163 if (reqid >= 0) { 164 min = reqid; 165 max = reqid + 1; 166 } 167 168 return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); 169 } 170 171 /* This function is used by idr_for_each(). If net is equal to peer, the 172 * function returns the id so that idr_for_each() stops. Because we cannot 173 * returns the id 0 (idr_for_each() will not stop), we return the magic value 174 * NET_ID_ZERO (-1) for it. 175 */ 176 #define NET_ID_ZERO -1 177 static int net_eq_idr(int id, void *net, void *peer) 178 { 179 if (net_eq(net, peer)) 180 return id ? : NET_ID_ZERO; 181 return 0; 182 } 183 184 /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc 185 * is set to true, thus the caller knows that the new id must be notified via 186 * rtnl. 187 */ 188 static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) 189 { 190 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); 191 bool alloc_it = *alloc; 192 193 *alloc = false; 194 195 /* Magic value for id 0. */ 196 if (id == NET_ID_ZERO) 197 return 0; 198 if (id > 0) 199 return id; 200 201 if (alloc_it) { 202 id = alloc_netid(net, peer, -1); 203 *alloc = true; 204 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; 205 } 206 207 return NETNSA_NSID_NOT_ASSIGNED; 208 } 209 210 /* should be called with nsid_lock held */ 211 static int __peernet2id(struct net *net, struct net *peer) 212 { 213 bool no = false; 214 215 return __peernet2id_alloc(net, peer, &no); 216 } 217 218 static void rtnl_net_notifyid(struct net *net, int cmd, int id); 219 /* This function returns the id of a peer netns. If no id is assigned, one will 220 * be allocated and returned. 221 */ 222 int peernet2id_alloc(struct net *net, struct net *peer) 223 { 224 bool alloc; 225 int id; 226 227 if (atomic_read(&net->count) == 0) 228 return NETNSA_NSID_NOT_ASSIGNED; 229 spin_lock_bh(&net->nsid_lock); 230 alloc = atomic_read(&peer->count) == 0 ? false : true; 231 id = __peernet2id_alloc(net, peer, &alloc); 232 spin_unlock_bh(&net->nsid_lock); 233 if (alloc && id >= 0) 234 rtnl_net_notifyid(net, RTM_NEWNSID, id); 235 return id; 236 } 237 EXPORT_SYMBOL_GPL(peernet2id_alloc); 238 239 /* This function returns, if assigned, the id of a peer netns. */ 240 int peernet2id(struct net *net, struct net *peer) 241 { 242 int id; 243 244 spin_lock_bh(&net->nsid_lock); 245 id = __peernet2id(net, peer); 246 spin_unlock_bh(&net->nsid_lock); 247 return id; 248 } 249 EXPORT_SYMBOL(peernet2id); 250 251 /* This function returns true is the peer netns has an id assigned into the 252 * current netns. 253 */ 254 bool peernet_has_id(struct net *net, struct net *peer) 255 { 256 return peernet2id(net, peer) >= 0; 257 } 258 259 struct net *get_net_ns_by_id(struct net *net, int id) 260 { 261 struct net *peer; 262 263 if (id < 0) 264 return NULL; 265 266 rcu_read_lock(); 267 spin_lock_bh(&net->nsid_lock); 268 peer = idr_find(&net->netns_ids, id); 269 if (peer) 270 get_net(peer); 271 spin_unlock_bh(&net->nsid_lock); 272 rcu_read_unlock(); 273 274 return peer; 275 } 276 277 /* 278 * setup_net runs the initializers for the network namespace object. 279 */ 280 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) 281 { 282 /* Must be called with net_mutex held */ 283 const struct pernet_operations *ops, *saved_ops; 284 int error = 0; 285 LIST_HEAD(net_exit_list); 286 287 atomic_set(&net->count, 1); 288 refcount_set(&net->passive, 1); 289 net->dev_base_seq = 1; 290 net->user_ns = user_ns; 291 idr_init(&net->netns_ids); 292 spin_lock_init(&net->nsid_lock); 293 294 list_for_each_entry(ops, &pernet_list, list) { 295 error = ops_init(ops, net); 296 if (error < 0) 297 goto out_undo; 298 } 299 out: 300 return error; 301 302 out_undo: 303 /* Walk through the list backwards calling the exit functions 304 * for the pernet modules whose init functions did not fail. 305 */ 306 list_add(&net->exit_list, &net_exit_list); 307 saved_ops = ops; 308 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 309 ops_exit_list(ops, &net_exit_list); 310 311 ops = saved_ops; 312 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 313 ops_free_list(ops, &net_exit_list); 314 315 rcu_barrier(); 316 goto out; 317 } 318 319 static int __net_init net_defaults_init_net(struct net *net) 320 { 321 net->core.sysctl_somaxconn = SOMAXCONN; 322 return 0; 323 } 324 325 static struct pernet_operations net_defaults_ops = { 326 .init = net_defaults_init_net, 327 }; 328 329 static __init int net_defaults_init(void) 330 { 331 if (register_pernet_subsys(&net_defaults_ops)) 332 panic("Cannot initialize net default settings"); 333 334 return 0; 335 } 336 337 core_initcall(net_defaults_init); 338 339 #ifdef CONFIG_NET_NS 340 static struct ucounts *inc_net_namespaces(struct user_namespace *ns) 341 { 342 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); 343 } 344 345 static void dec_net_namespaces(struct ucounts *ucounts) 346 { 347 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); 348 } 349 350 static struct kmem_cache *net_cachep; 351 static struct workqueue_struct *netns_wq; 352 353 static struct net *net_alloc(void) 354 { 355 struct net *net = NULL; 356 struct net_generic *ng; 357 358 ng = net_alloc_generic(); 359 if (!ng) 360 goto out; 361 362 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 363 if (!net) 364 goto out_free; 365 366 rcu_assign_pointer(net->gen, ng); 367 out: 368 return net; 369 370 out_free: 371 kfree(ng); 372 goto out; 373 } 374 375 static void net_free(struct net *net) 376 { 377 kfree(rcu_access_pointer(net->gen)); 378 kmem_cache_free(net_cachep, net); 379 } 380 381 void net_drop_ns(void *p) 382 { 383 struct net *ns = p; 384 if (ns && refcount_dec_and_test(&ns->passive)) 385 net_free(ns); 386 } 387 388 struct net *copy_net_ns(unsigned long flags, 389 struct user_namespace *user_ns, struct net *old_net) 390 { 391 struct ucounts *ucounts; 392 struct net *net; 393 int rv; 394 395 if (!(flags & CLONE_NEWNET)) 396 return get_net(old_net); 397 398 ucounts = inc_net_namespaces(user_ns); 399 if (!ucounts) 400 return ERR_PTR(-ENOSPC); 401 402 net = net_alloc(); 403 if (!net) { 404 dec_net_namespaces(ucounts); 405 return ERR_PTR(-ENOMEM); 406 } 407 408 get_user_ns(user_ns); 409 410 rv = mutex_lock_killable(&net_mutex); 411 if (rv < 0) { 412 net_free(net); 413 dec_net_namespaces(ucounts); 414 put_user_ns(user_ns); 415 return ERR_PTR(rv); 416 } 417 418 net->ucounts = ucounts; 419 rv = setup_net(net, user_ns); 420 if (rv == 0) { 421 rtnl_lock(); 422 list_add_tail_rcu(&net->list, &net_namespace_list); 423 rtnl_unlock(); 424 } 425 mutex_unlock(&net_mutex); 426 if (rv < 0) { 427 dec_net_namespaces(ucounts); 428 put_user_ns(user_ns); 429 net_drop_ns(net); 430 return ERR_PTR(rv); 431 } 432 return net; 433 } 434 435 static DEFINE_SPINLOCK(cleanup_list_lock); 436 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 437 438 static void cleanup_net(struct work_struct *work) 439 { 440 const struct pernet_operations *ops; 441 struct net *net, *tmp; 442 struct list_head net_kill_list; 443 LIST_HEAD(net_exit_list); 444 445 /* Atomically snapshot the list of namespaces to cleanup */ 446 spin_lock_irq(&cleanup_list_lock); 447 list_replace_init(&cleanup_list, &net_kill_list); 448 spin_unlock_irq(&cleanup_list_lock); 449 450 mutex_lock(&net_mutex); 451 452 /* Don't let anyone else find us. */ 453 rtnl_lock(); 454 list_for_each_entry(net, &net_kill_list, cleanup_list) { 455 list_del_rcu(&net->list); 456 list_add_tail(&net->exit_list, &net_exit_list); 457 for_each_net(tmp) { 458 int id; 459 460 spin_lock_bh(&tmp->nsid_lock); 461 id = __peernet2id(tmp, net); 462 if (id >= 0) 463 idr_remove(&tmp->netns_ids, id); 464 spin_unlock_bh(&tmp->nsid_lock); 465 if (id >= 0) 466 rtnl_net_notifyid(tmp, RTM_DELNSID, id); 467 } 468 spin_lock_bh(&net->nsid_lock); 469 idr_destroy(&net->netns_ids); 470 spin_unlock_bh(&net->nsid_lock); 471 472 } 473 rtnl_unlock(); 474 475 /* 476 * Another CPU might be rcu-iterating the list, wait for it. 477 * This needs to be before calling the exit() notifiers, so 478 * the rcu_barrier() below isn't sufficient alone. 479 */ 480 synchronize_rcu(); 481 482 /* Run all of the network namespace exit methods */ 483 list_for_each_entry_reverse(ops, &pernet_list, list) 484 ops_exit_list(ops, &net_exit_list); 485 486 /* Free the net generic variables */ 487 list_for_each_entry_reverse(ops, &pernet_list, list) 488 ops_free_list(ops, &net_exit_list); 489 490 mutex_unlock(&net_mutex); 491 492 /* Ensure there are no outstanding rcu callbacks using this 493 * network namespace. 494 */ 495 rcu_barrier(); 496 497 /* Finally it is safe to free my network namespace structure */ 498 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 499 list_del_init(&net->exit_list); 500 dec_net_namespaces(net->ucounts); 501 put_user_ns(net->user_ns); 502 net_drop_ns(net); 503 } 504 } 505 506 /** 507 * net_ns_barrier - wait until concurrent net_cleanup_work is done 508 * 509 * cleanup_net runs from work queue and will first remove namespaces 510 * from the global list, then run net exit functions. 511 * 512 * Call this in module exit path to make sure that all netns 513 * ->exit ops have been invoked before the function is removed. 514 */ 515 void net_ns_barrier(void) 516 { 517 mutex_lock(&net_mutex); 518 mutex_unlock(&net_mutex); 519 } 520 EXPORT_SYMBOL(net_ns_barrier); 521 522 static DECLARE_WORK(net_cleanup_work, cleanup_net); 523 524 void __put_net(struct net *net) 525 { 526 /* Cleanup the network namespace in process context */ 527 unsigned long flags; 528 529 spin_lock_irqsave(&cleanup_list_lock, flags); 530 list_add(&net->cleanup_list, &cleanup_list); 531 spin_unlock_irqrestore(&cleanup_list_lock, flags); 532 533 queue_work(netns_wq, &net_cleanup_work); 534 } 535 EXPORT_SYMBOL_GPL(__put_net); 536 537 struct net *get_net_ns_by_fd(int fd) 538 { 539 struct file *file; 540 struct ns_common *ns; 541 struct net *net; 542 543 file = proc_ns_fget(fd); 544 if (IS_ERR(file)) 545 return ERR_CAST(file); 546 547 ns = get_proc_ns(file_inode(file)); 548 if (ns->ops == &netns_operations) 549 net = get_net(container_of(ns, struct net, ns)); 550 else 551 net = ERR_PTR(-EINVAL); 552 553 fput(file); 554 return net; 555 } 556 557 #else 558 struct net *get_net_ns_by_fd(int fd) 559 { 560 return ERR_PTR(-EINVAL); 561 } 562 #endif 563 EXPORT_SYMBOL_GPL(get_net_ns_by_fd); 564 565 struct net *get_net_ns_by_pid(pid_t pid) 566 { 567 struct task_struct *tsk; 568 struct net *net; 569 570 /* Lookup the network namespace */ 571 net = ERR_PTR(-ESRCH); 572 rcu_read_lock(); 573 tsk = find_task_by_vpid(pid); 574 if (tsk) { 575 struct nsproxy *nsproxy; 576 task_lock(tsk); 577 nsproxy = tsk->nsproxy; 578 if (nsproxy) 579 net = get_net(nsproxy->net_ns); 580 task_unlock(tsk); 581 } 582 rcu_read_unlock(); 583 return net; 584 } 585 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 586 587 static __net_init int net_ns_net_init(struct net *net) 588 { 589 #ifdef CONFIG_NET_NS 590 net->ns.ops = &netns_operations; 591 #endif 592 return ns_alloc_inum(&net->ns); 593 } 594 595 static __net_exit void net_ns_net_exit(struct net *net) 596 { 597 ns_free_inum(&net->ns); 598 } 599 600 static struct pernet_operations __net_initdata net_ns_ops = { 601 .init = net_ns_net_init, 602 .exit = net_ns_net_exit, 603 }; 604 605 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { 606 [NETNSA_NONE] = { .type = NLA_UNSPEC }, 607 [NETNSA_NSID] = { .type = NLA_S32 }, 608 [NETNSA_PID] = { .type = NLA_U32 }, 609 [NETNSA_FD] = { .type = NLA_U32 }, 610 }; 611 612 static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, 613 struct netlink_ext_ack *extack) 614 { 615 struct net *net = sock_net(skb->sk); 616 struct nlattr *tb[NETNSA_MAX + 1]; 617 struct nlattr *nla; 618 struct net *peer; 619 int nsid, err; 620 621 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 622 rtnl_net_policy, extack); 623 if (err < 0) 624 return err; 625 if (!tb[NETNSA_NSID]) { 626 NL_SET_ERR_MSG(extack, "nsid is missing"); 627 return -EINVAL; 628 } 629 nsid = nla_get_s32(tb[NETNSA_NSID]); 630 631 if (tb[NETNSA_PID]) { 632 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 633 nla = tb[NETNSA_PID]; 634 } else if (tb[NETNSA_FD]) { 635 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 636 nla = tb[NETNSA_FD]; 637 } else { 638 NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); 639 return -EINVAL; 640 } 641 if (IS_ERR(peer)) { 642 NL_SET_BAD_ATTR(extack, nla); 643 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); 644 return PTR_ERR(peer); 645 } 646 647 spin_lock_bh(&net->nsid_lock); 648 if (__peernet2id(net, peer) >= 0) { 649 spin_unlock_bh(&net->nsid_lock); 650 err = -EEXIST; 651 NL_SET_BAD_ATTR(extack, nla); 652 NL_SET_ERR_MSG(extack, 653 "Peer netns already has a nsid assigned"); 654 goto out; 655 } 656 657 err = alloc_netid(net, peer, nsid); 658 spin_unlock_bh(&net->nsid_lock); 659 if (err >= 0) { 660 rtnl_net_notifyid(net, RTM_NEWNSID, err); 661 err = 0; 662 } else if (err == -ENOSPC && nsid >= 0) { 663 err = -EEXIST; 664 NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); 665 NL_SET_ERR_MSG(extack, "The specified nsid is already used"); 666 } 667 out: 668 put_net(peer); 669 return err; 670 } 671 672 static int rtnl_net_get_size(void) 673 { 674 return NLMSG_ALIGN(sizeof(struct rtgenmsg)) 675 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ 676 ; 677 } 678 679 static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, 680 int cmd, struct net *net, int nsid) 681 { 682 struct nlmsghdr *nlh; 683 struct rtgenmsg *rth; 684 685 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); 686 if (!nlh) 687 return -EMSGSIZE; 688 689 rth = nlmsg_data(nlh); 690 rth->rtgen_family = AF_UNSPEC; 691 692 if (nla_put_s32(skb, NETNSA_NSID, nsid)) 693 goto nla_put_failure; 694 695 nlmsg_end(skb, nlh); 696 return 0; 697 698 nla_put_failure: 699 nlmsg_cancel(skb, nlh); 700 return -EMSGSIZE; 701 } 702 703 static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, 704 struct netlink_ext_ack *extack) 705 { 706 struct net *net = sock_net(skb->sk); 707 struct nlattr *tb[NETNSA_MAX + 1]; 708 struct nlattr *nla; 709 struct sk_buff *msg; 710 struct net *peer; 711 int err, id; 712 713 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 714 rtnl_net_policy, extack); 715 if (err < 0) 716 return err; 717 if (tb[NETNSA_PID]) { 718 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 719 nla = tb[NETNSA_PID]; 720 } else if (tb[NETNSA_FD]) { 721 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 722 nla = tb[NETNSA_FD]; 723 } else { 724 NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); 725 return -EINVAL; 726 } 727 728 if (IS_ERR(peer)) { 729 NL_SET_BAD_ATTR(extack, nla); 730 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); 731 return PTR_ERR(peer); 732 } 733 734 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 735 if (!msg) { 736 err = -ENOMEM; 737 goto out; 738 } 739 740 id = peernet2id(net, peer); 741 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 742 RTM_NEWNSID, net, id); 743 if (err < 0) 744 goto err_out; 745 746 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); 747 goto out; 748 749 err_out: 750 nlmsg_free(msg); 751 out: 752 put_net(peer); 753 return err; 754 } 755 756 struct rtnl_net_dump_cb { 757 struct net *net; 758 struct sk_buff *skb; 759 struct netlink_callback *cb; 760 int idx; 761 int s_idx; 762 }; 763 764 static int rtnl_net_dumpid_one(int id, void *peer, void *data) 765 { 766 struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; 767 int ret; 768 769 if (net_cb->idx < net_cb->s_idx) 770 goto cont; 771 772 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, 773 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, 774 RTM_NEWNSID, net_cb->net, id); 775 if (ret < 0) 776 return ret; 777 778 cont: 779 net_cb->idx++; 780 return 0; 781 } 782 783 static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) 784 { 785 struct net *net = sock_net(skb->sk); 786 struct rtnl_net_dump_cb net_cb = { 787 .net = net, 788 .skb = skb, 789 .cb = cb, 790 .idx = 0, 791 .s_idx = cb->args[0], 792 }; 793 794 spin_lock_bh(&net->nsid_lock); 795 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); 796 spin_unlock_bh(&net->nsid_lock); 797 798 cb->args[0] = net_cb.idx; 799 return skb->len; 800 } 801 802 static void rtnl_net_notifyid(struct net *net, int cmd, int id) 803 { 804 struct sk_buff *msg; 805 int err = -ENOMEM; 806 807 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 808 if (!msg) 809 goto out; 810 811 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id); 812 if (err < 0) 813 goto err_out; 814 815 rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); 816 return; 817 818 err_out: 819 nlmsg_free(msg); 820 out: 821 rtnl_set_sk_err(net, RTNLGRP_NSID, err); 822 } 823 824 static int __init net_ns_init(void) 825 { 826 struct net_generic *ng; 827 828 #ifdef CONFIG_NET_NS 829 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 830 SMP_CACHE_BYTES, 831 SLAB_PANIC, NULL); 832 833 /* Create workqueue for cleanup */ 834 netns_wq = create_singlethread_workqueue("netns"); 835 if (!netns_wq) 836 panic("Could not create netns workq"); 837 #endif 838 839 ng = net_alloc_generic(); 840 if (!ng) 841 panic("Could not allocate generic netns"); 842 843 rcu_assign_pointer(init_net.gen, ng); 844 845 mutex_lock(&net_mutex); 846 if (setup_net(&init_net, &init_user_ns)) 847 panic("Could not setup the initial network namespace"); 848 849 init_net_initialized = true; 850 851 rtnl_lock(); 852 list_add_tail_rcu(&init_net.list, &net_namespace_list); 853 rtnl_unlock(); 854 855 mutex_unlock(&net_mutex); 856 857 register_pernet_subsys(&net_ns_ops); 858 859 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, 860 RTNL_FLAG_DOIT_UNLOCKED); 861 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, 862 RTNL_FLAG_DOIT_UNLOCKED); 863 864 return 0; 865 } 866 867 pure_initcall(net_ns_init); 868 869 #ifdef CONFIG_NET_NS 870 static int __register_pernet_operations(struct list_head *list, 871 struct pernet_operations *ops) 872 { 873 struct net *net; 874 int error; 875 LIST_HEAD(net_exit_list); 876 877 list_add_tail(&ops->list, list); 878 if (ops->init || (ops->id && ops->size)) { 879 for_each_net(net) { 880 error = ops_init(ops, net); 881 if (error) 882 goto out_undo; 883 list_add_tail(&net->exit_list, &net_exit_list); 884 } 885 } 886 return 0; 887 888 out_undo: 889 /* If I have an error cleanup all namespaces I initialized */ 890 list_del(&ops->list); 891 ops_exit_list(ops, &net_exit_list); 892 ops_free_list(ops, &net_exit_list); 893 return error; 894 } 895 896 static void __unregister_pernet_operations(struct pernet_operations *ops) 897 { 898 struct net *net; 899 LIST_HEAD(net_exit_list); 900 901 list_del(&ops->list); 902 for_each_net(net) 903 list_add_tail(&net->exit_list, &net_exit_list); 904 ops_exit_list(ops, &net_exit_list); 905 ops_free_list(ops, &net_exit_list); 906 } 907 908 #else 909 910 static int __register_pernet_operations(struct list_head *list, 911 struct pernet_operations *ops) 912 { 913 if (!init_net_initialized) { 914 list_add_tail(&ops->list, list); 915 return 0; 916 } 917 918 return ops_init(ops, &init_net); 919 } 920 921 static void __unregister_pernet_operations(struct pernet_operations *ops) 922 { 923 if (!init_net_initialized) { 924 list_del(&ops->list); 925 } else { 926 LIST_HEAD(net_exit_list); 927 list_add(&init_net.exit_list, &net_exit_list); 928 ops_exit_list(ops, &net_exit_list); 929 ops_free_list(ops, &net_exit_list); 930 } 931 } 932 933 #endif /* CONFIG_NET_NS */ 934 935 static DEFINE_IDA(net_generic_ids); 936 937 static int register_pernet_operations(struct list_head *list, 938 struct pernet_operations *ops) 939 { 940 int error; 941 942 if (ops->id) { 943 again: 944 error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id); 945 if (error < 0) { 946 if (error == -EAGAIN) { 947 ida_pre_get(&net_generic_ids, GFP_KERNEL); 948 goto again; 949 } 950 return error; 951 } 952 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); 953 } 954 error = __register_pernet_operations(list, ops); 955 if (error) { 956 rcu_barrier(); 957 if (ops->id) 958 ida_remove(&net_generic_ids, *ops->id); 959 } 960 961 return error; 962 } 963 964 static void unregister_pernet_operations(struct pernet_operations *ops) 965 { 966 967 __unregister_pernet_operations(ops); 968 rcu_barrier(); 969 if (ops->id) 970 ida_remove(&net_generic_ids, *ops->id); 971 } 972 973 /** 974 * register_pernet_subsys - register a network namespace subsystem 975 * @ops: pernet operations structure for the subsystem 976 * 977 * Register a subsystem which has init and exit functions 978 * that are called when network namespaces are created and 979 * destroyed respectively. 980 * 981 * When registered all network namespace init functions are 982 * called for every existing network namespace. Allowing kernel 983 * modules to have a race free view of the set of network namespaces. 984 * 985 * When a new network namespace is created all of the init 986 * methods are called in the order in which they were registered. 987 * 988 * When a network namespace is destroyed all of the exit methods 989 * are called in the reverse of the order with which they were 990 * registered. 991 */ 992 int register_pernet_subsys(struct pernet_operations *ops) 993 { 994 int error; 995 mutex_lock(&net_mutex); 996 error = register_pernet_operations(first_device, ops); 997 mutex_unlock(&net_mutex); 998 return error; 999 } 1000 EXPORT_SYMBOL_GPL(register_pernet_subsys); 1001 1002 /** 1003 * unregister_pernet_subsys - unregister a network namespace subsystem 1004 * @ops: pernet operations structure to manipulate 1005 * 1006 * Remove the pernet operations structure from the list to be 1007 * used when network namespaces are created or destroyed. In 1008 * addition run the exit method for all existing network 1009 * namespaces. 1010 */ 1011 void unregister_pernet_subsys(struct pernet_operations *ops) 1012 { 1013 mutex_lock(&net_mutex); 1014 unregister_pernet_operations(ops); 1015 mutex_unlock(&net_mutex); 1016 } 1017 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 1018 1019 /** 1020 * register_pernet_device - register a network namespace device 1021 * @ops: pernet operations structure for the subsystem 1022 * 1023 * Register a device which has init and exit functions 1024 * that are called when network namespaces are created and 1025 * destroyed respectively. 1026 * 1027 * When registered all network namespace init functions are 1028 * called for every existing network namespace. Allowing kernel 1029 * modules to have a race free view of the set of network namespaces. 1030 * 1031 * When a new network namespace is created all of the init 1032 * methods are called in the order in which they were registered. 1033 * 1034 * When a network namespace is destroyed all of the exit methods 1035 * are called in the reverse of the order with which they were 1036 * registered. 1037 */ 1038 int register_pernet_device(struct pernet_operations *ops) 1039 { 1040 int error; 1041 mutex_lock(&net_mutex); 1042 error = register_pernet_operations(&pernet_list, ops); 1043 if (!error && (first_device == &pernet_list)) 1044 first_device = &ops->list; 1045 mutex_unlock(&net_mutex); 1046 return error; 1047 } 1048 EXPORT_SYMBOL_GPL(register_pernet_device); 1049 1050 /** 1051 * unregister_pernet_device - unregister a network namespace netdevice 1052 * @ops: pernet operations structure to manipulate 1053 * 1054 * Remove the pernet operations structure from the list to be 1055 * used when network namespaces are created or destroyed. In 1056 * addition run the exit method for all existing network 1057 * namespaces. 1058 */ 1059 void unregister_pernet_device(struct pernet_operations *ops) 1060 { 1061 mutex_lock(&net_mutex); 1062 if (&ops->list == first_device) 1063 first_device = first_device->next; 1064 unregister_pernet_operations(ops); 1065 mutex_unlock(&net_mutex); 1066 } 1067 EXPORT_SYMBOL_GPL(unregister_pernet_device); 1068 1069 #ifdef CONFIG_NET_NS 1070 static struct ns_common *netns_get(struct task_struct *task) 1071 { 1072 struct net *net = NULL; 1073 struct nsproxy *nsproxy; 1074 1075 task_lock(task); 1076 nsproxy = task->nsproxy; 1077 if (nsproxy) 1078 net = get_net(nsproxy->net_ns); 1079 task_unlock(task); 1080 1081 return net ? &net->ns : NULL; 1082 } 1083 1084 static inline struct net *to_net_ns(struct ns_common *ns) 1085 { 1086 return container_of(ns, struct net, ns); 1087 } 1088 1089 static void netns_put(struct ns_common *ns) 1090 { 1091 put_net(to_net_ns(ns)); 1092 } 1093 1094 static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1095 { 1096 struct net *net = to_net_ns(ns); 1097 1098 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 1099 !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 1100 return -EPERM; 1101 1102 put_net(nsproxy->net_ns); 1103 nsproxy->net_ns = get_net(net); 1104 return 0; 1105 } 1106 1107 static struct user_namespace *netns_owner(struct ns_common *ns) 1108 { 1109 return to_net_ns(ns)->user_ns; 1110 } 1111 1112 const struct proc_ns_operations netns_operations = { 1113 .name = "net", 1114 .type = CLONE_NEWNET, 1115 .get = netns_get, 1116 .put = netns_put, 1117 .install = netns_install, 1118 .owner = netns_owner, 1119 }; 1120 #endif 1121