1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 /* 15 * Our network namespace constructor/destructor lists 16 */ 17 18 static LIST_HEAD(pernet_list); 19 static struct list_head *first_device = &pernet_list; 20 static DEFINE_MUTEX(net_mutex); 21 22 LIST_HEAD(net_namespace_list); 23 EXPORT_SYMBOL_GPL(net_namespace_list); 24 25 struct net init_net; 26 EXPORT_SYMBOL(init_net); 27 28 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 29 30 static int ops_init(const struct pernet_operations *ops, struct net *net) 31 { 32 int err; 33 if (ops->id && ops->size) { 34 void *data = kzalloc(ops->size, GFP_KERNEL); 35 if (!data) 36 return -ENOMEM; 37 38 err = net_assign_generic(net, *ops->id, data); 39 if (err) { 40 kfree(data); 41 return err; 42 } 43 } 44 if (ops->init) 45 return ops->init(net); 46 return 0; 47 } 48 49 static void ops_free(const struct pernet_operations *ops, struct net *net) 50 { 51 if (ops->id && ops->size) { 52 int id = *ops->id; 53 kfree(net_generic(net, id)); 54 } 55 } 56 57 static void ops_exit_list(const struct pernet_operations *ops, 58 struct list_head *net_exit_list) 59 { 60 struct net *net; 61 if (ops->exit) { 62 list_for_each_entry(net, net_exit_list, exit_list) 63 ops->exit(net); 64 } 65 if (ops->exit_batch) 66 ops->exit_batch(net_exit_list); 67 } 68 69 static void ops_free_list(const struct pernet_operations *ops, 70 struct list_head *net_exit_list) 71 { 72 struct net *net; 73 if (ops->size && ops->id) { 74 list_for_each_entry(net, net_exit_list, exit_list) 75 ops_free(ops, net); 76 } 77 } 78 79 /* 80 * setup_net runs the initializers for the network namespace object. 81 */ 82 static __net_init int setup_net(struct net *net) 83 { 84 /* Must be called with net_mutex held */ 85 const struct pernet_operations *ops, *saved_ops; 86 int error = 0; 87 LIST_HEAD(net_exit_list); 88 89 atomic_set(&net->count, 1); 90 91 #ifdef NETNS_REFCNT_DEBUG 92 atomic_set(&net->use_count, 0); 93 #endif 94 95 list_for_each_entry(ops, &pernet_list, list) { 96 error = ops_init(ops, net); 97 if (error < 0) 98 goto out_undo; 99 } 100 out: 101 return error; 102 103 out_undo: 104 /* Walk through the list backwards calling the exit functions 105 * for the pernet modules whose init functions did not fail. 106 */ 107 list_add(&net->exit_list, &net_exit_list); 108 saved_ops = ops; 109 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 110 ops_exit_list(ops, &net_exit_list); 111 112 ops = saved_ops; 113 list_for_each_entry_continue_reverse(ops, &pernet_list, list) 114 ops_free_list(ops, &net_exit_list); 115 116 rcu_barrier(); 117 goto out; 118 } 119 120 static struct net_generic *net_alloc_generic(void) 121 { 122 struct net_generic *ng; 123 size_t generic_size = sizeof(struct net_generic) + 124 INITIAL_NET_GEN_PTRS * sizeof(void *); 125 126 ng = kzalloc(generic_size, GFP_KERNEL); 127 if (ng) 128 ng->len = INITIAL_NET_GEN_PTRS; 129 130 return ng; 131 } 132 133 #ifdef CONFIG_NET_NS 134 static struct kmem_cache *net_cachep; 135 static struct workqueue_struct *netns_wq; 136 137 static struct net *net_alloc(void) 138 { 139 struct net *net = NULL; 140 struct net_generic *ng; 141 142 ng = net_alloc_generic(); 143 if (!ng) 144 goto out; 145 146 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 147 if (!net) 148 goto out_free; 149 150 rcu_assign_pointer(net->gen, ng); 151 out: 152 return net; 153 154 out_free: 155 kfree(ng); 156 goto out; 157 } 158 159 static void net_free(struct net *net) 160 { 161 #ifdef NETNS_REFCNT_DEBUG 162 if (unlikely(atomic_read(&net->use_count) != 0)) { 163 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 164 atomic_read(&net->use_count)); 165 return; 166 } 167 #endif 168 kfree(net->gen); 169 kmem_cache_free(net_cachep, net); 170 } 171 172 static struct net *net_create(void) 173 { 174 struct net *net; 175 int rv; 176 177 net = net_alloc(); 178 if (!net) 179 return ERR_PTR(-ENOMEM); 180 mutex_lock(&net_mutex); 181 rv = setup_net(net); 182 if (rv == 0) { 183 rtnl_lock(); 184 list_add_tail_rcu(&net->list, &net_namespace_list); 185 rtnl_unlock(); 186 } 187 mutex_unlock(&net_mutex); 188 if (rv < 0) { 189 net_free(net); 190 return ERR_PTR(rv); 191 } 192 return net; 193 } 194 195 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 196 { 197 if (!(flags & CLONE_NEWNET)) 198 return get_net(old_net); 199 return net_create(); 200 } 201 202 static DEFINE_SPINLOCK(cleanup_list_lock); 203 static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */ 204 205 static void cleanup_net(struct work_struct *work) 206 { 207 const struct pernet_operations *ops; 208 struct net *net, *tmp; 209 LIST_HEAD(net_kill_list); 210 LIST_HEAD(net_exit_list); 211 212 /* Atomically snapshot the list of namespaces to cleanup */ 213 spin_lock_irq(&cleanup_list_lock); 214 list_replace_init(&cleanup_list, &net_kill_list); 215 spin_unlock_irq(&cleanup_list_lock); 216 217 mutex_lock(&net_mutex); 218 219 /* Don't let anyone else find us. */ 220 rtnl_lock(); 221 list_for_each_entry(net, &net_kill_list, cleanup_list) { 222 list_del_rcu(&net->list); 223 list_add_tail(&net->exit_list, &net_exit_list); 224 } 225 rtnl_unlock(); 226 227 /* 228 * Another CPU might be rcu-iterating the list, wait for it. 229 * This needs to be before calling the exit() notifiers, so 230 * the rcu_barrier() below isn't sufficient alone. 231 */ 232 synchronize_rcu(); 233 234 /* Run all of the network namespace exit methods */ 235 list_for_each_entry_reverse(ops, &pernet_list, list) 236 ops_exit_list(ops, &net_exit_list); 237 238 /* Free the net generic variables */ 239 list_for_each_entry_reverse(ops, &pernet_list, list) 240 ops_free_list(ops, &net_exit_list); 241 242 mutex_unlock(&net_mutex); 243 244 /* Ensure there are no outstanding rcu callbacks using this 245 * network namespace. 246 */ 247 rcu_barrier(); 248 249 /* Finally it is safe to free my network namespace structure */ 250 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) { 251 list_del_init(&net->exit_list); 252 net_free(net); 253 } 254 } 255 static DECLARE_WORK(net_cleanup_work, cleanup_net); 256 257 void __put_net(struct net *net) 258 { 259 /* Cleanup the network namespace in process context */ 260 unsigned long flags; 261 262 spin_lock_irqsave(&cleanup_list_lock, flags); 263 list_add(&net->cleanup_list, &cleanup_list); 264 spin_unlock_irqrestore(&cleanup_list_lock, flags); 265 266 queue_work(netns_wq, &net_cleanup_work); 267 } 268 EXPORT_SYMBOL_GPL(__put_net); 269 270 #else 271 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 272 { 273 if (flags & CLONE_NEWNET) 274 return ERR_PTR(-EINVAL); 275 return old_net; 276 } 277 #endif 278 279 struct net *get_net_ns_by_pid(pid_t pid) 280 { 281 struct task_struct *tsk; 282 struct net *net; 283 284 /* Lookup the network namespace */ 285 net = ERR_PTR(-ESRCH); 286 rcu_read_lock(); 287 tsk = find_task_by_vpid(pid); 288 if (tsk) { 289 struct nsproxy *nsproxy; 290 nsproxy = task_nsproxy(tsk); 291 if (nsproxy) 292 net = get_net(nsproxy->net_ns); 293 } 294 rcu_read_unlock(); 295 return net; 296 } 297 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 298 299 static int __init net_ns_init(void) 300 { 301 struct net_generic *ng; 302 303 #ifdef CONFIG_NET_NS 304 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 305 SMP_CACHE_BYTES, 306 SLAB_PANIC, NULL); 307 308 /* Create workqueue for cleanup */ 309 netns_wq = create_singlethread_workqueue("netns"); 310 if (!netns_wq) 311 panic("Could not create netns workq"); 312 #endif 313 314 ng = net_alloc_generic(); 315 if (!ng) 316 panic("Could not allocate generic netns"); 317 318 rcu_assign_pointer(init_net.gen, ng); 319 320 mutex_lock(&net_mutex); 321 if (setup_net(&init_net)) 322 panic("Could not setup the initial network namespace"); 323 324 rtnl_lock(); 325 list_add_tail_rcu(&init_net.list, &net_namespace_list); 326 rtnl_unlock(); 327 328 mutex_unlock(&net_mutex); 329 330 return 0; 331 } 332 333 pure_initcall(net_ns_init); 334 335 #ifdef CONFIG_NET_NS 336 static int __register_pernet_operations(struct list_head *list, 337 struct pernet_operations *ops) 338 { 339 struct net *net; 340 int error; 341 LIST_HEAD(net_exit_list); 342 343 list_add_tail(&ops->list, list); 344 if (ops->init || (ops->id && ops->size)) { 345 for_each_net(net) { 346 error = ops_init(ops, net); 347 if (error) 348 goto out_undo; 349 list_add_tail(&net->exit_list, &net_exit_list); 350 } 351 } 352 return 0; 353 354 out_undo: 355 /* If I have an error cleanup all namespaces I initialized */ 356 list_del(&ops->list); 357 ops_exit_list(ops, &net_exit_list); 358 ops_free_list(ops, &net_exit_list); 359 return error; 360 } 361 362 static void __unregister_pernet_operations(struct pernet_operations *ops) 363 { 364 struct net *net; 365 LIST_HEAD(net_exit_list); 366 367 list_del(&ops->list); 368 for_each_net(net) 369 list_add_tail(&net->exit_list, &net_exit_list); 370 ops_exit_list(ops, &net_exit_list); 371 ops_free_list(ops, &net_exit_list); 372 } 373 374 #else 375 376 static int __register_pernet_operations(struct list_head *list, 377 struct pernet_operations *ops) 378 { 379 int err = 0; 380 err = ops_init(ops, &init_net); 381 if (err) 382 ops_free(ops, &init_net); 383 return err; 384 385 } 386 387 static void __unregister_pernet_operations(struct pernet_operations *ops) 388 { 389 LIST_HEAD(net_exit_list); 390 list_add(&init_net.exit_list, &net_exit_list); 391 ops_exit_list(ops, &net_exit_list); 392 ops_free_list(ops, &net_exit_list); 393 } 394 395 #endif /* CONFIG_NET_NS */ 396 397 static DEFINE_IDA(net_generic_ids); 398 399 static int register_pernet_operations(struct list_head *list, 400 struct pernet_operations *ops) 401 { 402 int error; 403 404 if (ops->id) { 405 again: 406 error = ida_get_new_above(&net_generic_ids, 1, ops->id); 407 if (error < 0) { 408 if (error == -EAGAIN) { 409 ida_pre_get(&net_generic_ids, GFP_KERNEL); 410 goto again; 411 } 412 return error; 413 } 414 } 415 error = __register_pernet_operations(list, ops); 416 if (error) { 417 rcu_barrier(); 418 if (ops->id) 419 ida_remove(&net_generic_ids, *ops->id); 420 } 421 422 return error; 423 } 424 425 static void unregister_pernet_operations(struct pernet_operations *ops) 426 { 427 428 __unregister_pernet_operations(ops); 429 rcu_barrier(); 430 if (ops->id) 431 ida_remove(&net_generic_ids, *ops->id); 432 } 433 434 /** 435 * register_pernet_subsys - register a network namespace subsystem 436 * @ops: pernet operations structure for the subsystem 437 * 438 * Register a subsystem which has init and exit functions 439 * that are called when network namespaces are created and 440 * destroyed respectively. 441 * 442 * When registered all network namespace init functions are 443 * called for every existing network namespace. Allowing kernel 444 * modules to have a race free view of the set of network namespaces. 445 * 446 * When a new network namespace is created all of the init 447 * methods are called in the order in which they were registered. 448 * 449 * When a network namespace is destroyed all of the exit methods 450 * are called in the reverse of the order with which they were 451 * registered. 452 */ 453 int register_pernet_subsys(struct pernet_operations *ops) 454 { 455 int error; 456 mutex_lock(&net_mutex); 457 error = register_pernet_operations(first_device, ops); 458 mutex_unlock(&net_mutex); 459 return error; 460 } 461 EXPORT_SYMBOL_GPL(register_pernet_subsys); 462 463 /** 464 * unregister_pernet_subsys - unregister a network namespace subsystem 465 * @ops: pernet operations structure to manipulate 466 * 467 * Remove the pernet operations structure from the list to be 468 * used when network namespaces are created or destroyed. In 469 * addition run the exit method for all existing network 470 * namespaces. 471 */ 472 void unregister_pernet_subsys(struct pernet_operations *module) 473 { 474 mutex_lock(&net_mutex); 475 unregister_pernet_operations(module); 476 mutex_unlock(&net_mutex); 477 } 478 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 479 480 /** 481 * register_pernet_device - register a network namespace device 482 * @ops: pernet operations structure for the subsystem 483 * 484 * Register a device which has init and exit functions 485 * that are called when network namespaces are created and 486 * destroyed respectively. 487 * 488 * When registered all network namespace init functions are 489 * called for every existing network namespace. Allowing kernel 490 * modules to have a race free view of the set of network namespaces. 491 * 492 * When a new network namespace is created all of the init 493 * methods are called in the order in which they were registered. 494 * 495 * When a network namespace is destroyed all of the exit methods 496 * are called in the reverse of the order with which they were 497 * registered. 498 */ 499 int register_pernet_device(struct pernet_operations *ops) 500 { 501 int error; 502 mutex_lock(&net_mutex); 503 error = register_pernet_operations(&pernet_list, ops); 504 if (!error && (first_device == &pernet_list)) 505 first_device = &ops->list; 506 mutex_unlock(&net_mutex); 507 return error; 508 } 509 EXPORT_SYMBOL_GPL(register_pernet_device); 510 511 /** 512 * unregister_pernet_device - unregister a network namespace netdevice 513 * @ops: pernet operations structure to manipulate 514 * 515 * Remove the pernet operations structure from the list to be 516 * used when network namespaces are created or destroyed. In 517 * addition run the exit method for all existing network 518 * namespaces. 519 */ 520 void unregister_pernet_device(struct pernet_operations *ops) 521 { 522 mutex_lock(&net_mutex); 523 if (&ops->list == first_device) 524 first_device = first_device->next; 525 unregister_pernet_operations(ops); 526 mutex_unlock(&net_mutex); 527 } 528 EXPORT_SYMBOL_GPL(unregister_pernet_device); 529 530 static void net_generic_release(struct rcu_head *rcu) 531 { 532 struct net_generic *ng; 533 534 ng = container_of(rcu, struct net_generic, rcu); 535 kfree(ng); 536 } 537 538 int net_assign_generic(struct net *net, int id, void *data) 539 { 540 struct net_generic *ng, *old_ng; 541 542 BUG_ON(!mutex_is_locked(&net_mutex)); 543 BUG_ON(id == 0); 544 545 ng = old_ng = net->gen; 546 if (old_ng->len >= id) 547 goto assign; 548 549 ng = kzalloc(sizeof(struct net_generic) + 550 id * sizeof(void *), GFP_KERNEL); 551 if (ng == NULL) 552 return -ENOMEM; 553 554 /* 555 * Some synchronisation notes: 556 * 557 * The net_generic explores the net->gen array inside rcu 558 * read section. Besides once set the net->gen->ptr[x] 559 * pointer never changes (see rules in netns/generic.h). 560 * 561 * That said, we simply duplicate this array and schedule 562 * the old copy for kfree after a grace period. 563 */ 564 565 ng->len = id; 566 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 567 568 rcu_assign_pointer(net->gen, ng); 569 call_rcu(&old_ng->rcu, net_generic_release); 570 assign: 571 ng->ptr[id - 1] = data; 572 return 0; 573 } 574 EXPORT_SYMBOL_GPL(net_assign_generic); 575