1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 /* 15 * Our network namespace constructor/destructor lists 16 */ 17 18 static LIST_HEAD(pernet_list); 19 static struct list_head *first_device = &pernet_list; 20 static DEFINE_MUTEX(net_mutex); 21 22 LIST_HEAD(net_namespace_list); 23 EXPORT_SYMBOL_GPL(net_namespace_list); 24 25 struct net init_net; 26 EXPORT_SYMBOL(init_net); 27 28 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 29 30 static void net_generic_release(struct rcu_head *rcu) 31 { 32 struct net_generic *ng; 33 34 ng = container_of(rcu, struct net_generic, rcu); 35 kfree(ng); 36 } 37 38 static int net_assign_generic(struct net *net, int id, void *data) 39 { 40 struct net_generic *ng, *old_ng; 41 42 BUG_ON(!mutex_is_locked(&net_mutex)); 43 BUG_ON(id == 0); 44 45 old_ng = rcu_dereference_protected(net->gen, 46 lockdep_is_held(&net_mutex)); 47 ng = old_ng; 48 if (old_ng->len >= id) 49 goto assign; 50 51 ng = kzalloc(sizeof(struct net_generic) + 52 id * sizeof(void *), GFP_KERNEL); 53 if (ng == NULL) 54 return -ENOMEM; 55 56 /* 57 * Some synchronisation notes: 58 * 59 * The net_generic explores the net->gen array inside rcu 60 * read section. Besides once set the net->gen->ptr[x] 61 * pointer never changes (see rules in netns/generic.h). 62 * 63 * That said, we simply duplicate this array and schedule 64 * the old copy for kfree after a grace period. 65 */ 66 67 ng->len = id; 68 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 69 70 rcu_assign_pointer(net->gen, ng); 71 call_rcu(&old_ng->rcu, net_generic_release); 72 assign: 73 ng->ptr[id - 1] = data; 74 return 0; 75 } 76 77 static int ops_init(const struct pernet_operations *ops, struct net *net) 78 { 79 int err; 80 if (ops->id && ops->size) { 81 void *data = kzalloc(ops->size, GFP_KERNEL); 82 if (!data) 83 return -ENOMEM; 84 85 err = net_assign_generic(net, *ops->id, data); 86 if (err) { 87 kfree(data); 88 return err; 89 } 90 } 91 if (ops->init) 92 return ops->init(net); 93 return 0; 94 } 95 96 static void ops_free(const struct pernet_operations *ops, struct net *net) 97 { 98 if (ops->id && ops->size) { 99 int id = *ops->id; 100 kfree(net_generic(net, id)); 101 } 102 } 103 104 static void ops_exit_list(const struct pernet_operations *ops, 105 struct list_head *net_exit_list) 106 { 107 struct net *net; 108 if (ops->exit) { 109 list_for_each_entry(net, net_exit_list, exit_list) 110 ops->exit(net); 111 } 112 if (ops->exit_batch) 113 ops->exit_batch(net_exit_list); 114 } 115 116 static void ops_free_list(const struct pernet_operations *ops, 117 struct list_head *net_exit_list) 118 { 119 struct net *net; 120 if (ops->size && ops->id) { 121 list_for_each_entry(net, net_exit_list, exit_list) 122 ops_free(ops, net); 123 } 124 } 125 126 /* 127 * setup_net runs the initializers for the network namespace object. 128 */ 129 static __net_init int setup_net(struct net *net) 130 { 131 /* Must be called with net_mutex held */ 132 const struct pernet_operations *ops, *saved_ops; 133 int error = 0; 134 LIST_HEAD(net_exit_list); 135 136 atomic_set(&net->count, 1); 137 138 #ifdef NETNS_REFCNT_DEBUG 139 atomic_set(&net->use_count, 0); 140 #endif 141 142 list_for_each_entry(ops, &pernet_list, list) { 143 error = ops_init(ops, net); 144 if (error < 0) 145 goto out_undo; 146 } 147 out: 148 return error; 149 150 out_undo: 151 /* Walk through the list backwards calling the exit functions 152 * for the pernet modules whose init functions did not fail. 153 */ 154 list_add(&net->exit_list, &net_exit_list); 155 saved_ops = ops; 156 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 157 ops_exit_list(ops, &net_exit_list); 158 159 ops = saved_ops; 160 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 161 ops_free_list(ops, &net_exit_list); 162 163 rcu_barrier(); 164 goto out; 165 } 166 167 static struct net_generic *net_alloc_generic(void) 168 { 169 struct net_generic *ng; 170 size_t generic_size = sizeof(struct net_generic) + 171 INITIAL_NET_GEN_PTRS * sizeof(void *); 172 173 ng = kzalloc(generic_size, GFP_KERNEL); 174 if (ng) 175 ng->len = INITIAL_NET_GEN_PTRS; 176 177 return ng; 178 } 179 180 #ifdef CONFIG_NET_NS 181 static struct kmem_cache *net_cachep; 182 static struct workqueue_struct *netns_wq; 183 184 static struct net *net_alloc(void) 185 { 186 struct net *net = NULL; 187 struct net_generic *ng; 188 189 ng = net_alloc_generic(); 190 if (!ng) 191 goto out; 192 193 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 194 if (!net) 195 goto out_free; 196 197 rcu_assign_pointer(net->gen, ng); 198 out: 199 return net; 200 201 out_free: 202 kfree(ng); 203 goto out; 204 } 205 206 static void net_free(struct net *net) 207 { 208 #ifdef NETNS_REFCNT_DEBUG 209 if (unlikely(atomic_read(&net->use_count) != 0)) { 210 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 211 atomic_read(&net->use_count)); 212 return; 213 } 214 #endif 215 kfree(net->gen); 216 kmem_cache_free(net_cachep, net); 217 } 218 219 static struct net *net_create(void) 220 { 221 struct net *net; 222 int rv; 223 224 net = net_alloc(); 225 if (!net) 226 return ERR_PTR(-ENOMEM); 227 mutex_lock(&net_mutex); 228 rv = setup_net(net); 229 if (rv == 0) { 230 rtnl_lock(); 231 list_add_tail_rcu(&net->list, &net_namespace_list); 232 rtnl_unlock(); 233 } 234 mutex_unlock(&net_mutex); 235 if (rv < 0) { 236 net_free(net); 237 return ERR_PTR(rv); 238 } 239 return net; 240 } 241 242 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 243 { 244 if (!(flags & CLONE_NEWNET)) 245 return get_net(old_net); 246 return net_create(); 247 } 248 249 static DEFINE_SPINLOCK(cleanup_list_lock); 250 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 251 252 static void cleanup_net(struct work_struct *work) 253 { 254 const struct pernet_operations *ops; 255 struct net *net, *tmp; 256 LIST_HEAD(net_kill_list); 257 LIST_HEAD(net_exit_list); 258 259 /* Atomically snapshot the list of namespaces to cleanup */ 260 spin_lock_irq(&cleanup_list_lock); 261 list_replace_init(&cleanup_list, &net_kill_list); 262 spin_unlock_irq(&cleanup_list_lock); 263 264 mutex_lock(&net_mutex); 265 266 /* Don't let anyone else find us. */ 267 rtnl_lock(); 268 list_for_each_entry(net, &net_kill_list, cleanup_list) { 269 list_del_rcu(&net->list); 270 list_add_tail(&net->exit_list, &net_exit_list); 271 } 272 rtnl_unlock(); 273 274 /* 275 * Another CPU might be rcu-iterating the list, wait for it. 276 * This needs to be before calling the exit() notifiers, so 277 * the rcu_barrier() below isn't sufficient alone. 278 */ 279 synchronize_rcu(); 280 281 /* Run all of the network namespace exit methods */ 282 list_for_each_entry_reverse(ops, &pernet_list, list) 283 ops_exit_list(ops, &net_exit_list); 284 285 /* Free the net generic variables */ 286 list_for_each_entry_reverse(ops, &pernet_list, list) 287 ops_free_list(ops, &net_exit_list); 288 289 mutex_unlock(&net_mutex); 290 291 /* Ensure there are no outstanding rcu callbacks using this 292 * network namespace. 293 */ 294 rcu_barrier(); 295 296 /* Finally it is safe to free my network namespace structure */ 297 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 298 list_del_init(&net->exit_list); 299 net_free(net); 300 } 301 } 302 static DECLARE_WORK(net_cleanup_work, cleanup_net); 303 304 void __put_net(struct net *net) 305 { 306 /* Cleanup the network namespace in process context */ 307 unsigned long flags; 308 309 spin_lock_irqsave(&cleanup_list_lock, flags); 310 list_add(&net->cleanup_list, &cleanup_list); 311 spin_unlock_irqrestore(&cleanup_list_lock, flags); 312 313 queue_work(netns_wq, &net_cleanup_work); 314 } 315 EXPORT_SYMBOL_GPL(__put_net); 316 317 #else 318 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 319 { 320 if (flags & CLONE_NEWNET) 321 return ERR_PTR(-EINVAL); 322 return old_net; 323 } 324 #endif 325 326 struct net *get_net_ns_by_pid(pid_t pid) 327 { 328 struct task_struct *tsk; 329 struct net *net; 330 331 /* Lookup the network namespace */ 332 net = ERR_PTR(-ESRCH); 333 rcu_read_lock(); 334 tsk = find_task_by_vpid(pid); 335 if (tsk) { 336 struct nsproxy *nsproxy; 337 nsproxy = task_nsproxy(tsk); 338 if (nsproxy) 339 net = get_net(nsproxy->net_ns); 340 } 341 rcu_read_unlock(); 342 return net; 343 } 344 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 345 346 static int __init net_ns_init(void) 347 { 348 struct net_generic *ng; 349 350 #ifdef CONFIG_NET_NS 351 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 352 SMP_CACHE_BYTES, 353 SLAB_PANIC, NULL); 354 355 /* Create workqueue for cleanup */ 356 netns_wq = create_singlethread_workqueue("netns"); 357 if (!netns_wq) 358 panic("Could not create netns workq"); 359 #endif 360 361 ng = net_alloc_generic(); 362 if (!ng) 363 panic("Could not allocate generic netns"); 364 365 rcu_assign_pointer(init_net.gen, ng); 366 367 mutex_lock(&net_mutex); 368 if (setup_net(&init_net)) 369 panic("Could not setup the initial network namespace"); 370 371 rtnl_lock(); 372 list_add_tail_rcu(&init_net.list, &net_namespace_list); 373 rtnl_unlock(); 374 375 mutex_unlock(&net_mutex); 376 377 return 0; 378 } 379 380 pure_initcall(net_ns_init); 381 382 #ifdef CONFIG_NET_NS 383 static int __register_pernet_operations(struct list_head *list, 384 struct pernet_operations *ops) 385 { 386 struct net *net; 387 int error; 388 LIST_HEAD(net_exit_list); 389 390 list_add_tail(&ops->list, list); 391 if (ops->init || (ops->id && ops->size)) { 392 for_each_net(net) { 393 error = ops_init(ops, net); 394 if (error) 395 goto out_undo; 396 list_add_tail(&net->exit_list, &net_exit_list); 397 } 398 } 399 return 0; 400 401 out_undo: 402 /* If I have an error cleanup all namespaces I initialized */ 403 list_del(&ops->list); 404 ops_exit_list(ops, &net_exit_list); 405 ops_free_list(ops, &net_exit_list); 406 return error; 407 } 408 409 static void __unregister_pernet_operations(struct pernet_operations *ops) 410 { 411 struct net *net; 412 LIST_HEAD(net_exit_list); 413 414 list_del(&ops->list); 415 for_each_net(net) 416 list_add_tail(&net->exit_list, &net_exit_list); 417 ops_exit_list(ops, &net_exit_list); 418 ops_free_list(ops, &net_exit_list); 419 } 420 421 #else 422 423 static int __register_pernet_operations(struct list_head *list, 424 struct pernet_operations *ops) 425 { 426 int err = 0; 427 err = ops_init(ops, &init_net); 428 if (err) 429 ops_free(ops, &init_net); 430 return err; 431 432 } 433 434 static void __unregister_pernet_operations(struct pernet_operations *ops) 435 { 436 LIST_HEAD(net_exit_list); 437 list_add(&init_net.exit_list, &net_exit_list); 438 ops_exit_list(ops, &net_exit_list); 439 ops_free_list(ops, &net_exit_list); 440 } 441 442 #endif /* CONFIG_NET_NS */ 443 444 static DEFINE_IDA(net_generic_ids); 445 446 static int register_pernet_operations(struct list_head *list, 447 struct pernet_operations *ops) 448 { 449 int error; 450 451 if (ops->id) { 452 again: 453 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 454 if (error < 0) { 455 if (error == -EAGAIN) { 456 ida_pre_get(&net_generic_ids, GFP_KERNEL); 457 goto again; 458 } 459 return error; 460 } 461 } 462 error = __register_pernet_operations(list, ops); 463 if (error) { 464 rcu_barrier(); 465 if (ops->id) 466 ida_remove(&net_generic_ids, *ops->id); 467 } 468 469 return error; 470 } 471 472 static void unregister_pernet_operations(struct pernet_operations *ops) 473 { 474 475 __unregister_pernet_operations(ops); 476 rcu_barrier(); 477 if (ops->id) 478 ida_remove(&net_generic_ids, *ops->id); 479 } 480 481 /** 482 * register_pernet_subsys - register a network namespace subsystem 483 * @ops: pernet operations structure for the subsystem 484 * 485 * Register a subsystem which has init and exit functions 486 * that are called when network namespaces are created and 487 * destroyed respectively. 488 * 489 * When registered all network namespace init functions are 490 * called for every existing network namespace. Allowing kernel 491 * modules to have a race free view of the set of network namespaces. 492 * 493 * When a new network namespace is created all of the init 494 * methods are called in the order in which they were registered. 495 * 496 * When a network namespace is destroyed all of the exit methods 497 * are called in the reverse of the order with which they were 498 * registered. 499 */ 500 int register_pernet_subsys(struct pernet_operations *ops) 501 { 502 int error; 503 mutex_lock(&net_mutex); 504 error = register_pernet_operations(first_device, ops); 505 mutex_unlock(&net_mutex); 506 return error; 507 } 508 EXPORT_SYMBOL_GPL(register_pernet_subsys); 509 510 /** 511 * unregister_pernet_subsys - unregister a network namespace subsystem 512 * @ops: pernet operations structure to manipulate 513 * 514 * Remove the pernet operations structure from the list to be 515 * used when network namespaces are created or destroyed. In 516 * addition run the exit method for all existing network 517 * namespaces. 518 */ 519 void unregister_pernet_subsys(struct pernet_operations *ops) 520 { 521 mutex_lock(&net_mutex); 522 unregister_pernet_operations(ops); 523 mutex_unlock(&net_mutex); 524 } 525 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 526 527 /** 528 * register_pernet_device - register a network namespace device 529 * @ops: pernet operations structure for the subsystem 530 * 531 * Register a device which has init and exit functions 532 * that are called when network namespaces are created and 533 * destroyed respectively. 534 * 535 * When registered all network namespace init functions are 536 * called for every existing network namespace. Allowing kernel 537 * modules to have a race free view of the set of network namespaces. 538 * 539 * When a new network namespace is created all of the init 540 * methods are called in the order in which they were registered. 541 * 542 * When a network namespace is destroyed all of the exit methods 543 * are called in the reverse of the order with which they were 544 * registered. 545 */ 546 int register_pernet_device(struct pernet_operations *ops) 547 { 548 int error; 549 mutex_lock(&net_mutex); 550 error = register_pernet_operations(&pernet_list, ops); 551 if (!error && (first_device == &pernet_list)) 552 first_device = &ops->list; 553 mutex_unlock(&net_mutex); 554 return error; 555 } 556 EXPORT_SYMBOL_GPL(register_pernet_device); 557 558 /** 559 * unregister_pernet_device - unregister a network namespace netdevice 560 * @ops: pernet operations structure to manipulate 561 * 562 * Remove the pernet operations structure from the list to be 563 * used when network namespaces are created or destroyed. In 564 * addition run the exit method for all existing network 565 * namespaces. 566 */ 567 void unregister_pernet_device(struct pernet_operations *ops) 568 { 569 mutex_lock(&net_mutex); 570 if (&ops->list == first_device) 571 first_device = first_device->next; 572 unregister_pernet_operations(ops); 573 mutex_unlock(&net_mutex); 574 } 575 EXPORT_SYMBOL_GPL(unregister_pernet_device); 576