1 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 2 3 #include <linux/workqueue.h> 4 #include <linux/rtnetlink.h> 5 #include <linux/cache.h> 6 #include <linux/slab.h> 7 #include <linux/list.h> 8 #include <linux/delay.h> 9 #include <linux/sched.h> 10 #include <linux/idr.h> 11 #include <linux/rculist.h> 12 #include <linux/nsproxy.h> 13 #include <linux/fs.h> 14 #include <linux/proc_ns.h> 15 #include <linux/file.h> 16 #include <linux/export.h> 17 #include <linux/user_namespace.h> 18 #include <linux/net_namespace.h> 19 #include <linux/sched/task.h> 20 21 #include <net/sock.h> 22 #include <net/netlink.h> 23 #include <net/net_namespace.h> 24 #include <net/netns/generic.h> 25 26 /* 27 * Our network namespace constructor/destructor lists 28 */ 29 30 static LIST_HEAD(pernet_list); 31 static struct list_head *first_device = &pernet_list; 32 /* Used only if there are !async pernet_operations registered */ 33 DEFINE_MUTEX(net_mutex); 34 35 LIST_HEAD(net_namespace_list); 36 EXPORT_SYMBOL_GPL(net_namespace_list); 37 38 struct net init_net = { 39 .count = REFCOUNT_INIT(1), 40 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head), 41 }; 42 EXPORT_SYMBOL(init_net); 43 44 static bool init_net_initialized; 45 static unsigned nr_sync_pernet_ops; 46 /* 47 * net_sem: protects: pernet_list, net_generic_ids, nr_sync_pernet_ops, 48 * init_net_initialized and first_device pointer. 49 */ 50 DECLARE_RWSEM(net_sem); 51 52 #define MIN_PERNET_OPS_ID \ 53 ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *)) 54 55 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 56 57 static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS; 58 59 static struct net_generic *net_alloc_generic(void) 60 { 61 struct net_generic *ng; 62 unsigned int generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]); 63 64 ng = kzalloc(generic_size, GFP_KERNEL); 65 if (ng) 66 ng->s.len = max_gen_ptrs; 67 68 return ng; 69 } 70 71 static int net_assign_generic(struct net *net, unsigned int id, void *data) 72 { 73 struct net_generic *ng, *old_ng; 74 75 BUG_ON(id < MIN_PERNET_OPS_ID); 76 77 old_ng = rcu_dereference_protected(net->gen, 78 lockdep_is_held(&net_sem)); 79 if (old_ng->s.len > id) { 80 old_ng->ptr[id] = data; 81 return 0; 82 } 83 84 ng = net_alloc_generic(); 85 if (ng == NULL) 86 return -ENOMEM; 87 88 /* 89 * Some synchronisation notes: 90 * 91 * The net_generic explores the net->gen array inside rcu 92 * read section. Besides once set the net->gen->ptr[x] 93 * pointer never changes (see rules in netns/generic.h). 94 * 95 * That said, we simply duplicate this array and schedule 96 * the old copy for kfree after a grace period. 97 */ 98 99 memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID], 100 (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *)); 101 ng->ptr[id] = data; 102 103 rcu_assign_pointer(net->gen, ng); 104 kfree_rcu(old_ng, s.rcu); 105 return 0; 106 } 107 108 static int ops_init(const struct pernet_operations *ops, struct net *net) 109 { 110 int err = -ENOMEM; 111 void *data = NULL; 112 113 if (ops->id && ops->size) { 114 data = kzalloc(ops->size, GFP_KERNEL); 115 if (!data) 116 goto out; 117 118 err = net_assign_generic(net, *ops->id, data); 119 if (err) 120 goto cleanup; 121 } 122 err = 0; 123 if (ops->init) 124 err = ops->init(net); 125 if (!err) 126 return 0; 127 128 cleanup: 129 kfree(data); 130 131 out: 132 return err; 133 } 134 135 static void ops_free(const struct pernet_operations *ops, struct net *net) 136 { 137 if (ops->id && ops->size) { 138 kfree(net_generic(net, *ops->id)); 139 } 140 } 141 142 static void ops_exit_list(const struct pernet_operations *ops, 143 struct list_head *net_exit_list) 144 { 145 struct net *net; 146 if (ops->exit) { 147 list_for_each_entry(net, net_exit_list, exit_list) 148 ops->exit(net); 149 } 150 if (ops->exit_batch) 151 ops->exit_batch(net_exit_list); 152 } 153 154 static void ops_free_list(const struct pernet_operations *ops, 155 struct list_head *net_exit_list) 156 { 157 struct net *net; 158 if (ops->size && ops->id) { 159 list_for_each_entry(net, net_exit_list, exit_list) 160 ops_free(ops, net); 161 } 162 } 163 164 /* should be called with nsid_lock held */ 165 static int alloc_netid(struct net *net, struct net *peer, int reqid) 166 { 167 int min = 0, max = 0; 168 169 if (reqid >= 0) { 170 min = reqid; 171 max = reqid + 1; 172 } 173 174 return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC); 175 } 176 177 /* This function is used by idr_for_each(). If net is equal to peer, the 178 * function returns the id so that idr_for_each() stops. Because we cannot 179 * returns the id 0 (idr_for_each() will not stop), we return the magic value 180 * NET_ID_ZERO (-1) for it. 181 */ 182 #define NET_ID_ZERO -1 183 static int net_eq_idr(int id, void *net, void *peer) 184 { 185 if (net_eq(net, peer)) 186 return id ? : NET_ID_ZERO; 187 return 0; 188 } 189 190 /* Should be called with nsid_lock held. If a new id is assigned, the bool alloc 191 * is set to true, thus the caller knows that the new id must be notified via 192 * rtnl. 193 */ 194 static int __peernet2id_alloc(struct net *net, struct net *peer, bool *alloc) 195 { 196 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer); 197 bool alloc_it = *alloc; 198 199 *alloc = false; 200 201 /* Magic value for id 0. */ 202 if (id == NET_ID_ZERO) 203 return 0; 204 if (id > 0) 205 return id; 206 207 if (alloc_it) { 208 id = alloc_netid(net, peer, -1); 209 *alloc = true; 210 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED; 211 } 212 213 return NETNSA_NSID_NOT_ASSIGNED; 214 } 215 216 /* should be called with nsid_lock held */ 217 static int __peernet2id(struct net *net, struct net *peer) 218 { 219 bool no = false; 220 221 return __peernet2id_alloc(net, peer, &no); 222 } 223 224 static void rtnl_net_notifyid(struct net *net, int cmd, int id); 225 /* This function returns the id of a peer netns. If no id is assigned, one will 226 * be allocated and returned. 227 */ 228 int peernet2id_alloc(struct net *net, struct net *peer) 229 { 230 bool alloc = false, alive = false; 231 int id; 232 233 if (refcount_read(&net->count) == 0) 234 return NETNSA_NSID_NOT_ASSIGNED; 235 spin_lock_bh(&net->nsid_lock); 236 /* 237 * When peer is obtained from RCU lists, we may race with 238 * its cleanup. Check whether it's alive, and this guarantees 239 * we never hash a peer back to net->netns_ids, after it has 240 * just been idr_remove()'d from there in cleanup_net(). 241 */ 242 if (maybe_get_net(peer)) 243 alive = alloc = true; 244 id = __peernet2id_alloc(net, peer, &alloc); 245 spin_unlock_bh(&net->nsid_lock); 246 if (alloc && id >= 0) 247 rtnl_net_notifyid(net, RTM_NEWNSID, id); 248 if (alive) 249 put_net(peer); 250 return id; 251 } 252 EXPORT_SYMBOL_GPL(peernet2id_alloc); 253 254 /* This function returns, if assigned, the id of a peer netns. */ 255 int peernet2id(struct net *net, struct net *peer) 256 { 257 int id; 258 259 spin_lock_bh(&net->nsid_lock); 260 id = __peernet2id(net, peer); 261 spin_unlock_bh(&net->nsid_lock); 262 return id; 263 } 264 EXPORT_SYMBOL(peernet2id); 265 266 /* This function returns true is the peer netns has an id assigned into the 267 * current netns. 268 */ 269 bool peernet_has_id(struct net *net, struct net *peer) 270 { 271 return peernet2id(net, peer) >= 0; 272 } 273 274 struct net *get_net_ns_by_id(struct net *net, int id) 275 { 276 struct net *peer; 277 278 if (id < 0) 279 return NULL; 280 281 rcu_read_lock(); 282 peer = idr_find(&net->netns_ids, id); 283 if (peer) 284 peer = maybe_get_net(peer); 285 rcu_read_unlock(); 286 287 return peer; 288 } 289 290 /* 291 * setup_net runs the initializers for the network namespace object. 292 */ 293 static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) 294 { 295 /* Must be called with net_sem held */ 296 const struct pernet_operations *ops, *saved_ops; 297 int error = 0; 298 LIST_HEAD(net_exit_list); 299 300 refcount_set(&net->count, 1); 301 refcount_set(&net->passive, 1); 302 net->dev_base_seq = 1; 303 net->user_ns = user_ns; 304 idr_init(&net->netns_ids); 305 spin_lock_init(&net->nsid_lock); 306 307 list_for_each_entry(ops, &pernet_list, list) { 308 error = ops_init(ops, net); 309 if (error < 0) 310 goto out_undo; 311 } 312 rtnl_lock(); 313 list_add_tail_rcu(&net->list, &net_namespace_list); 314 rtnl_unlock(); 315 out: 316 return error; 317 318 out_undo: 319 /* Walk through the list backwards calling the exit functions 320 * for the pernet modules whose init functions did not fail. 321 */ 322 list_add(&net->exit_list, &net_exit_list); 323 saved_ops = ops; 324 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 325 ops_exit_list(ops, &net_exit_list); 326 327 ops = saved_ops; 328 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 329 ops_free_list(ops, &net_exit_list); 330 331 rcu_barrier(); 332 goto out; 333 } 334 335 static int __net_init net_defaults_init_net(struct net *net) 336 { 337 net->core.sysctl_somaxconn = SOMAXCONN; 338 return 0; 339 } 340 341 static struct pernet_operations net_defaults_ops = { 342 .init = net_defaults_init_net, 343 .async = true, 344 }; 345 346 static __init int net_defaults_init(void) 347 { 348 if (register_pernet_subsys(&net_defaults_ops)) 349 panic("Cannot initialize net default settings"); 350 351 return 0; 352 } 353 354 core_initcall(net_defaults_init); 355 356 #ifdef CONFIG_NET_NS 357 static struct ucounts *inc_net_namespaces(struct user_namespace *ns) 358 { 359 return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES); 360 } 361 362 static void dec_net_namespaces(struct ucounts *ucounts) 363 { 364 dec_ucount(ucounts, UCOUNT_NET_NAMESPACES); 365 } 366 367 static struct kmem_cache *net_cachep; 368 static struct workqueue_struct *netns_wq; 369 370 static struct net *net_alloc(void) 371 { 372 struct net *net = NULL; 373 struct net_generic *ng; 374 375 ng = net_alloc_generic(); 376 if (!ng) 377 goto out; 378 379 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 380 if (!net) 381 goto out_free; 382 383 rcu_assign_pointer(net->gen, ng); 384 out: 385 return net; 386 387 out_free: 388 kfree(ng); 389 goto out; 390 } 391 392 static void net_free(struct net *net) 393 { 394 kfree(rcu_access_pointer(net->gen)); 395 kmem_cache_free(net_cachep, net); 396 } 397 398 void net_drop_ns(void *p) 399 { 400 struct net *ns = p; 401 if (ns && refcount_dec_and_test(&ns->passive)) 402 net_free(ns); 403 } 404 405 struct net *copy_net_ns(unsigned long flags, 406 struct user_namespace *user_ns, struct net *old_net) 407 { 408 struct ucounts *ucounts; 409 struct net *net; 410 int rv; 411 412 if (!(flags & CLONE_NEWNET)) 413 return get_net(old_net); 414 415 ucounts = inc_net_namespaces(user_ns); 416 if (!ucounts) 417 return ERR_PTR(-ENOSPC); 418 419 net = net_alloc(); 420 if (!net) { 421 rv = -ENOMEM; 422 goto dec_ucounts; 423 } 424 refcount_set(&net->passive, 1); 425 net->ucounts = ucounts; 426 get_user_ns(user_ns); 427 428 rv = down_read_killable(&net_sem); 429 if (rv < 0) 430 goto put_userns; 431 if (nr_sync_pernet_ops) { 432 rv = mutex_lock_killable(&net_mutex); 433 if (rv < 0) 434 goto up_read; 435 } 436 rv = setup_net(net, user_ns); 437 if (nr_sync_pernet_ops) 438 mutex_unlock(&net_mutex); 439 up_read: 440 up_read(&net_sem); 441 if (rv < 0) { 442 put_userns: 443 put_user_ns(user_ns); 444 net_drop_ns(net); 445 dec_ucounts: 446 dec_net_namespaces(ucounts); 447 return ERR_PTR(rv); 448 } 449 return net; 450 } 451 452 static void unhash_nsid(struct net *net, struct net *last) 453 { 454 struct net *tmp; 455 /* This function is only called from cleanup_net() work, 456 * and this work is the only process, that may delete 457 * a net from net_namespace_list. So, when the below 458 * is executing, the list may only grow. Thus, we do not 459 * use for_each_net_rcu() or rtnl_lock(). 460 */ 461 for_each_net(tmp) { 462 int id; 463 464 spin_lock_bh(&tmp->nsid_lock); 465 id = __peernet2id(tmp, net); 466 if (id >= 0) 467 idr_remove(&tmp->netns_ids, id); 468 spin_unlock_bh(&tmp->nsid_lock); 469 if (id >= 0) 470 rtnl_net_notifyid(tmp, RTM_DELNSID, id); 471 if (tmp == last) 472 break; 473 } 474 spin_lock_bh(&net->nsid_lock); 475 idr_destroy(&net->netns_ids); 476 spin_unlock_bh(&net->nsid_lock); 477 } 478 479 static DEFINE_SPINLOCK(cleanup_list_lock); 480 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 481 482 static void cleanup_net(struct work_struct *work) 483 { 484 const struct pernet_operations *ops; 485 struct net *net, *tmp, *last; 486 struct list_head net_kill_list; 487 LIST_HEAD(net_exit_list); 488 489 /* Atomically snapshot the list of namespaces to cleanup */ 490 spin_lock_irq(&cleanup_list_lock); 491 list_replace_init(&cleanup_list, &net_kill_list); 492 spin_unlock_irq(&cleanup_list_lock); 493 494 down_read(&net_sem); 495 if (nr_sync_pernet_ops) 496 mutex_lock(&net_mutex); 497 498 /* Don't let anyone else find us. */ 499 rtnl_lock(); 500 list_for_each_entry(net, &net_kill_list, cleanup_list) 501 list_del_rcu(&net->list); 502 /* Cache last net. After we unlock rtnl, no one new net 503 * added to net_namespace_list can assign nsid pointer 504 * to a net from net_kill_list (see peernet2id_alloc()). 505 * So, we skip them in unhash_nsid(). 506 * 507 * Note, that unhash_nsid() does not delete nsid links 508 * between net_kill_list's nets, as they've already 509 * deleted from net_namespace_list. But, this would be 510 * useless anyway, as netns_ids are destroyed there. 511 */ 512 last = list_last_entry(&net_namespace_list, struct net, list); 513 rtnl_unlock(); 514 515 list_for_each_entry(net, &net_kill_list, cleanup_list) { 516 unhash_nsid(net, last); 517 list_add_tail(&net->exit_list, &net_exit_list); 518 } 519 520 /* 521 * Another CPU might be rcu-iterating the list, wait for it. 522 * This needs to be before calling the exit() notifiers, so 523 * the rcu_barrier() below isn't sufficient alone. 524 */ 525 synchronize_rcu(); 526 527 /* Run all of the network namespace exit methods */ 528 list_for_each_entry_reverse(ops, &pernet_list, list) 529 ops_exit_list(ops, &net_exit_list); 530 531 if (nr_sync_pernet_ops) 532 mutex_unlock(&net_mutex); 533 534 /* Free the net generic variables */ 535 list_for_each_entry_reverse(ops, &pernet_list, list) 536 ops_free_list(ops, &net_exit_list); 537 538 up_read(&net_sem); 539 540 /* Ensure there are no outstanding rcu callbacks using this 541 * network namespace. 542 */ 543 rcu_barrier(); 544 545 /* Finally it is safe to free my network namespace structure */ 546 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 547 list_del_init(&net->exit_list); 548 dec_net_namespaces(net->ucounts); 549 put_user_ns(net->user_ns); 550 net_drop_ns(net); 551 } 552 } 553 554 /** 555 * net_ns_barrier - wait until concurrent net_cleanup_work is done 556 * 557 * cleanup_net runs from work queue and will first remove namespaces 558 * from the global list, then run net exit functions. 559 * 560 * Call this in module exit path to make sure that all netns 561 * ->exit ops have been invoked before the function is removed. 562 */ 563 void net_ns_barrier(void) 564 { 565 down_write(&net_sem); 566 mutex_lock(&net_mutex); 567 mutex_unlock(&net_mutex); 568 up_write(&net_sem); 569 } 570 EXPORT_SYMBOL(net_ns_barrier); 571 572 static DECLARE_WORK(net_cleanup_work, cleanup_net); 573 574 void __put_net(struct net *net) 575 { 576 /* Cleanup the network namespace in process context */ 577 unsigned long flags; 578 579 spin_lock_irqsave(&cleanup_list_lock, flags); 580 list_add(&net->cleanup_list, &cleanup_list); 581 spin_unlock_irqrestore(&cleanup_list_lock, flags); 582 583 queue_work(netns_wq, &net_cleanup_work); 584 } 585 EXPORT_SYMBOL_GPL(__put_net); 586 587 struct net *get_net_ns_by_fd(int fd) 588 { 589 struct file *file; 590 struct ns_common *ns; 591 struct net *net; 592 593 file = proc_ns_fget(fd); 594 if (IS_ERR(file)) 595 return ERR_CAST(file); 596 597 ns = get_proc_ns(file_inode(file)); 598 if (ns->ops == &netns_operations) 599 net = get_net(container_of(ns, struct net, ns)); 600 else 601 net = ERR_PTR(-EINVAL); 602 603 fput(file); 604 return net; 605 } 606 607 #else 608 struct net *get_net_ns_by_fd(int fd) 609 { 610 return ERR_PTR(-EINVAL); 611 } 612 #endif 613 EXPORT_SYMBOL_GPL(get_net_ns_by_fd); 614 615 struct net *get_net_ns_by_pid(pid_t pid) 616 { 617 struct task_struct *tsk; 618 struct net *net; 619 620 /* Lookup the network namespace */ 621 net = ERR_PTR(-ESRCH); 622 rcu_read_lock(); 623 tsk = find_task_by_vpid(pid); 624 if (tsk) { 625 struct nsproxy *nsproxy; 626 task_lock(tsk); 627 nsproxy = tsk->nsproxy; 628 if (nsproxy) 629 net = get_net(nsproxy->net_ns); 630 task_unlock(tsk); 631 } 632 rcu_read_unlock(); 633 return net; 634 } 635 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 636 637 static __net_init int net_ns_net_init(struct net *net) 638 { 639 #ifdef CONFIG_NET_NS 640 net->ns.ops = &netns_operations; 641 #endif 642 return ns_alloc_inum(&net->ns); 643 } 644 645 static __net_exit void net_ns_net_exit(struct net *net) 646 { 647 ns_free_inum(&net->ns); 648 } 649 650 static struct pernet_operations __net_initdata net_ns_ops = { 651 .init = net_ns_net_init, 652 .exit = net_ns_net_exit, 653 .async = true, 654 }; 655 656 static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = { 657 [NETNSA_NONE] = { .type = NLA_UNSPEC }, 658 [NETNSA_NSID] = { .type = NLA_S32 }, 659 [NETNSA_PID] = { .type = NLA_U32 }, 660 [NETNSA_FD] = { .type = NLA_U32 }, 661 }; 662 663 static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh, 664 struct netlink_ext_ack *extack) 665 { 666 struct net *net = sock_net(skb->sk); 667 struct nlattr *tb[NETNSA_MAX + 1]; 668 struct nlattr *nla; 669 struct net *peer; 670 int nsid, err; 671 672 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 673 rtnl_net_policy, extack); 674 if (err < 0) 675 return err; 676 if (!tb[NETNSA_NSID]) { 677 NL_SET_ERR_MSG(extack, "nsid is missing"); 678 return -EINVAL; 679 } 680 nsid = nla_get_s32(tb[NETNSA_NSID]); 681 682 if (tb[NETNSA_PID]) { 683 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 684 nla = tb[NETNSA_PID]; 685 } else if (tb[NETNSA_FD]) { 686 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 687 nla = tb[NETNSA_FD]; 688 } else { 689 NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); 690 return -EINVAL; 691 } 692 if (IS_ERR(peer)) { 693 NL_SET_BAD_ATTR(extack, nla); 694 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); 695 return PTR_ERR(peer); 696 } 697 698 spin_lock_bh(&net->nsid_lock); 699 if (__peernet2id(net, peer) >= 0) { 700 spin_unlock_bh(&net->nsid_lock); 701 err = -EEXIST; 702 NL_SET_BAD_ATTR(extack, nla); 703 NL_SET_ERR_MSG(extack, 704 "Peer netns already has a nsid assigned"); 705 goto out; 706 } 707 708 err = alloc_netid(net, peer, nsid); 709 spin_unlock_bh(&net->nsid_lock); 710 if (err >= 0) { 711 rtnl_net_notifyid(net, RTM_NEWNSID, err); 712 err = 0; 713 } else if (err == -ENOSPC && nsid >= 0) { 714 err = -EEXIST; 715 NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]); 716 NL_SET_ERR_MSG(extack, "The specified nsid is already used"); 717 } 718 out: 719 put_net(peer); 720 return err; 721 } 722 723 static int rtnl_net_get_size(void) 724 { 725 return NLMSG_ALIGN(sizeof(struct rtgenmsg)) 726 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */ 727 ; 728 } 729 730 static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags, 731 int cmd, struct net *net, int nsid) 732 { 733 struct nlmsghdr *nlh; 734 struct rtgenmsg *rth; 735 736 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags); 737 if (!nlh) 738 return -EMSGSIZE; 739 740 rth = nlmsg_data(nlh); 741 rth->rtgen_family = AF_UNSPEC; 742 743 if (nla_put_s32(skb, NETNSA_NSID, nsid)) 744 goto nla_put_failure; 745 746 nlmsg_end(skb, nlh); 747 return 0; 748 749 nla_put_failure: 750 nlmsg_cancel(skb, nlh); 751 return -EMSGSIZE; 752 } 753 754 static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh, 755 struct netlink_ext_ack *extack) 756 { 757 struct net *net = sock_net(skb->sk); 758 struct nlattr *tb[NETNSA_MAX + 1]; 759 struct nlattr *nla; 760 struct sk_buff *msg; 761 struct net *peer; 762 int err, id; 763 764 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX, 765 rtnl_net_policy, extack); 766 if (err < 0) 767 return err; 768 if (tb[NETNSA_PID]) { 769 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID])); 770 nla = tb[NETNSA_PID]; 771 } else if (tb[NETNSA_FD]) { 772 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD])); 773 nla = tb[NETNSA_FD]; 774 } else { 775 NL_SET_ERR_MSG(extack, "Peer netns reference is missing"); 776 return -EINVAL; 777 } 778 779 if (IS_ERR(peer)) { 780 NL_SET_BAD_ATTR(extack, nla); 781 NL_SET_ERR_MSG(extack, "Peer netns reference is invalid"); 782 return PTR_ERR(peer); 783 } 784 785 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 786 if (!msg) { 787 err = -ENOMEM; 788 goto out; 789 } 790 791 id = peernet2id(net, peer); 792 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 793 RTM_NEWNSID, net, id); 794 if (err < 0) 795 goto err_out; 796 797 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid); 798 goto out; 799 800 err_out: 801 nlmsg_free(msg); 802 out: 803 put_net(peer); 804 return err; 805 } 806 807 struct rtnl_net_dump_cb { 808 struct net *net; 809 struct sk_buff *skb; 810 struct netlink_callback *cb; 811 int idx; 812 int s_idx; 813 }; 814 815 static int rtnl_net_dumpid_one(int id, void *peer, void *data) 816 { 817 struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data; 818 int ret; 819 820 if (net_cb->idx < net_cb->s_idx) 821 goto cont; 822 823 ret = rtnl_net_fill(net_cb->skb, NETLINK_CB(net_cb->cb->skb).portid, 824 net_cb->cb->nlh->nlmsg_seq, NLM_F_MULTI, 825 RTM_NEWNSID, net_cb->net, id); 826 if (ret < 0) 827 return ret; 828 829 cont: 830 net_cb->idx++; 831 return 0; 832 } 833 834 static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb) 835 { 836 struct net *net = sock_net(skb->sk); 837 struct rtnl_net_dump_cb net_cb = { 838 .net = net, 839 .skb = skb, 840 .cb = cb, 841 .idx = 0, 842 .s_idx = cb->args[0], 843 }; 844 845 spin_lock_bh(&net->nsid_lock); 846 idr_for_each(&net->netns_ids, rtnl_net_dumpid_one, &net_cb); 847 spin_unlock_bh(&net->nsid_lock); 848 849 cb->args[0] = net_cb.idx; 850 return skb->len; 851 } 852 853 static void rtnl_net_notifyid(struct net *net, int cmd, int id) 854 { 855 struct sk_buff *msg; 856 int err = -ENOMEM; 857 858 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL); 859 if (!msg) 860 goto out; 861 862 err = rtnl_net_fill(msg, 0, 0, 0, cmd, net, id); 863 if (err < 0) 864 goto err_out; 865 866 rtnl_notify(msg, net, 0, RTNLGRP_NSID, NULL, 0); 867 return; 868 869 err_out: 870 nlmsg_free(msg); 871 out: 872 rtnl_set_sk_err(net, RTNLGRP_NSID, err); 873 } 874 875 static int __init net_ns_init(void) 876 { 877 struct net_generic *ng; 878 879 #ifdef CONFIG_NET_NS 880 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 881 SMP_CACHE_BYTES, 882 SLAB_PANIC, NULL); 883 884 /* Create workqueue for cleanup */ 885 netns_wq = create_singlethread_workqueue("netns"); 886 if (!netns_wq) 887 panic("Could not create netns workq"); 888 #endif 889 890 ng = net_alloc_generic(); 891 if (!ng) 892 panic("Could not allocate generic netns"); 893 894 rcu_assign_pointer(init_net.gen, ng); 895 896 down_write(&net_sem); 897 if (setup_net(&init_net, &init_user_ns)) 898 panic("Could not setup the initial network namespace"); 899 900 init_net_initialized = true; 901 up_write(&net_sem); 902 903 register_pernet_subsys(&net_ns_ops); 904 905 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, 906 RTNL_FLAG_DOIT_UNLOCKED); 907 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, 908 RTNL_FLAG_DOIT_UNLOCKED); 909 910 return 0; 911 } 912 913 pure_initcall(net_ns_init); 914 915 #ifdef CONFIG_NET_NS 916 static int __register_pernet_operations(struct list_head *list, 917 struct pernet_operations *ops) 918 { 919 struct net *net; 920 int error; 921 LIST_HEAD(net_exit_list); 922 923 list_add_tail(&ops->list, list); 924 if (ops->init || (ops->id && ops->size)) { 925 for_each_net(net) { 926 error = ops_init(ops, net); 927 if (error) 928 goto out_undo; 929 list_add_tail(&net->exit_list, &net_exit_list); 930 } 931 } 932 return 0; 933 934 out_undo: 935 /* If I have an error cleanup all namespaces I initialized */ 936 list_del(&ops->list); 937 ops_exit_list(ops, &net_exit_list); 938 ops_free_list(ops, &net_exit_list); 939 return error; 940 } 941 942 static void __unregister_pernet_operations(struct pernet_operations *ops) 943 { 944 struct net *net; 945 LIST_HEAD(net_exit_list); 946 947 list_del(&ops->list); 948 for_each_net(net) 949 list_add_tail(&net->exit_list, &net_exit_list); 950 ops_exit_list(ops, &net_exit_list); 951 ops_free_list(ops, &net_exit_list); 952 } 953 954 #else 955 956 static int __register_pernet_operations(struct list_head *list, 957 struct pernet_operations *ops) 958 { 959 if (!init_net_initialized) { 960 list_add_tail(&ops->list, list); 961 return 0; 962 } 963 964 return ops_init(ops, &init_net); 965 } 966 967 static void __unregister_pernet_operations(struct pernet_operations *ops) 968 { 969 if (!init_net_initialized) { 970 list_del(&ops->list); 971 } else { 972 LIST_HEAD(net_exit_list); 973 list_add(&init_net.exit_list, &net_exit_list); 974 ops_exit_list(ops, &net_exit_list); 975 ops_free_list(ops, &net_exit_list); 976 } 977 } 978 979 #endif /* CONFIG_NET_NS */ 980 981 static DEFINE_IDA(net_generic_ids); 982 983 static int register_pernet_operations(struct list_head *list, 984 struct pernet_operations *ops) 985 { 986 int error; 987 988 if (ops->id) { 989 again: 990 error = ida_get_new_above(&net_generic_ids, MIN_PERNET_OPS_ID, ops->id); 991 if (error < 0) { 992 if (error == -EAGAIN) { 993 ida_pre_get(&net_generic_ids, GFP_KERNEL); 994 goto again; 995 } 996 return error; 997 } 998 max_gen_ptrs = max(max_gen_ptrs, *ops->id + 1); 999 } 1000 error = __register_pernet_operations(list, ops); 1001 if (error) { 1002 rcu_barrier(); 1003 if (ops->id) 1004 ida_remove(&net_generic_ids, *ops->id); 1005 } else if (!ops->async) { 1006 pr_info_once("Pernet operations %ps are sync.\n", ops); 1007 nr_sync_pernet_ops++; 1008 } 1009 1010 return error; 1011 } 1012 1013 static void unregister_pernet_operations(struct pernet_operations *ops) 1014 { 1015 if (!ops->async) 1016 BUG_ON(nr_sync_pernet_ops-- == 0); 1017 __unregister_pernet_operations(ops); 1018 rcu_barrier(); 1019 if (ops->id) 1020 ida_remove(&net_generic_ids, *ops->id); 1021 } 1022 1023 /** 1024 * register_pernet_subsys - register a network namespace subsystem 1025 * @ops: pernet operations structure for the subsystem 1026 * 1027 * Register a subsystem which has init and exit functions 1028 * that are called when network namespaces are created and 1029 * destroyed respectively. 1030 * 1031 * When registered all network namespace init functions are 1032 * called for every existing network namespace. Allowing kernel 1033 * modules to have a race free view of the set of network namespaces. 1034 * 1035 * When a new network namespace is created all of the init 1036 * methods are called in the order in which they were registered. 1037 * 1038 * When a network namespace is destroyed all of the exit methods 1039 * are called in the reverse of the order with which they were 1040 * registered. 1041 */ 1042 int register_pernet_subsys(struct pernet_operations *ops) 1043 { 1044 int error; 1045 down_write(&net_sem); 1046 error = register_pernet_operations(first_device, ops); 1047 up_write(&net_sem); 1048 return error; 1049 } 1050 EXPORT_SYMBOL_GPL(register_pernet_subsys); 1051 1052 /** 1053 * unregister_pernet_subsys - unregister a network namespace subsystem 1054 * @ops: pernet operations structure to manipulate 1055 * 1056 * Remove the pernet operations structure from the list to be 1057 * used when network namespaces are created or destroyed. In 1058 * addition run the exit method for all existing network 1059 * namespaces. 1060 */ 1061 void unregister_pernet_subsys(struct pernet_operations *ops) 1062 { 1063 down_write(&net_sem); 1064 unregister_pernet_operations(ops); 1065 up_write(&net_sem); 1066 } 1067 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 1068 1069 /** 1070 * register_pernet_device - register a network namespace device 1071 * @ops: pernet operations structure for the subsystem 1072 * 1073 * Register a device which has init and exit functions 1074 * that are called when network namespaces are created and 1075 * destroyed respectively. 1076 * 1077 * When registered all network namespace init functions are 1078 * called for every existing network namespace. Allowing kernel 1079 * modules to have a race free view of the set of network namespaces. 1080 * 1081 * When a new network namespace is created all of the init 1082 * methods are called in the order in which they were registered. 1083 * 1084 * When a network namespace is destroyed all of the exit methods 1085 * are called in the reverse of the order with which they were 1086 * registered. 1087 */ 1088 int register_pernet_device(struct pernet_operations *ops) 1089 { 1090 int error; 1091 down_write(&net_sem); 1092 error = register_pernet_operations(&pernet_list, ops); 1093 if (!error && (first_device == &pernet_list)) 1094 first_device = &ops->list; 1095 up_write(&net_sem); 1096 return error; 1097 } 1098 EXPORT_SYMBOL_GPL(register_pernet_device); 1099 1100 /** 1101 * unregister_pernet_device - unregister a network namespace netdevice 1102 * @ops: pernet operations structure to manipulate 1103 * 1104 * Remove the pernet operations structure from the list to be 1105 * used when network namespaces are created or destroyed. In 1106 * addition run the exit method for all existing network 1107 * namespaces. 1108 */ 1109 void unregister_pernet_device(struct pernet_operations *ops) 1110 { 1111 down_write(&net_sem); 1112 if (&ops->list == first_device) 1113 first_device = first_device->next; 1114 unregister_pernet_operations(ops); 1115 up_write(&net_sem); 1116 } 1117 EXPORT_SYMBOL_GPL(unregister_pernet_device); 1118 1119 #ifdef CONFIG_NET_NS 1120 static struct ns_common *netns_get(struct task_struct *task) 1121 { 1122 struct net *net = NULL; 1123 struct nsproxy *nsproxy; 1124 1125 task_lock(task); 1126 nsproxy = task->nsproxy; 1127 if (nsproxy) 1128 net = get_net(nsproxy->net_ns); 1129 task_unlock(task); 1130 1131 return net ? &net->ns : NULL; 1132 } 1133 1134 static inline struct net *to_net_ns(struct ns_common *ns) 1135 { 1136 return container_of(ns, struct net, ns); 1137 } 1138 1139 static void netns_put(struct ns_common *ns) 1140 { 1141 put_net(to_net_ns(ns)); 1142 } 1143 1144 static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns) 1145 { 1146 struct net *net = to_net_ns(ns); 1147 1148 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) || 1149 !ns_capable(current_user_ns(), CAP_SYS_ADMIN)) 1150 return -EPERM; 1151 1152 put_net(nsproxy->net_ns); 1153 nsproxy->net_ns = get_net(net); 1154 return 0; 1155 } 1156 1157 static struct user_namespace *netns_owner(struct ns_common *ns) 1158 { 1159 return to_net_ns(ns)->user_ns; 1160 } 1161 1162 const struct proc_ns_operations netns_operations = { 1163 .name = "net", 1164 .type = CLONE_NEWNET, 1165 .get = netns_get, 1166 .put = netns_put, 1167 .install = netns_install, 1168 .owner = netns_owner, 1169 }; 1170 #endif 1171