1 #include <linux/workqueue.h> 2 #include <linux/rtnetlink.h> 3 #include <linux/cache.h> 4 #include <linux/slab.h> 5 #include <linux/list.h> 6 #include <linux/delay.h> 7 #include <linux/sched.h> 8 #include <linux/idr.h> 9 #include <linux/rculist.h> 10 #include <linux/nsproxy.h> 11 #include <net/net_namespace.h> 12 #include <net/netns/generic.h> 13 14 /* 15 * Our network namespace constructor/destructor lists 16 */ 17 18 static LIST_HEAD(pernet_list); 19 static struct list_head *first_device = &pernet_list; 20 static DEFINE_MUTEX(net_mutex); 21 22 LIST_HEAD(net_namespace_list); 23 EXPORT_SYMBOL_GPL(net_namespace_list); 24 25 struct net init_net; 26 EXPORT_SYMBOL(init_net); 27 28 #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */ 29 30 /* 31 * setup_net runs the initializers for the network namespace object. 32 */ 33 static __net_init int setup_net(struct net *net) 34 { 35 /* Must be called with net_mutex held */ 36 struct pernet_operations *ops; 37 int error = 0; 38 39 atomic_set(&net->count, 1); 40 41 #ifdef NETNS_REFCNT_DEBUG 42 atomic_set(&net->use_count, 0); 43 #endif 44 45 list_for_each_entry(ops, &pernet_list, list) { 46 if (ops->init) { 47 error = ops->init(net); 48 if (error < 0) 49 goto out_undo; 50 } 51 } 52 out: 53 return error; 54 55 out_undo: 56 /* Walk through the list backwards calling the exit functions 57 * for the pernet modules whose init functions did not fail. 58 */ 59 list_for_each_entry_continue_reverse(ops, &pernet_list, list) { 60 if (ops->exit) 61 ops->exit(net); 62 } 63 64 rcu_barrier(); 65 goto out; 66 } 67 68 static struct net_generic *net_alloc_generic(void) 69 { 70 struct net_generic *ng; 71 size_t generic_size = sizeof(struct net_generic) + 72 INITIAL_NET_GEN_PTRS * sizeof(void *); 73 74 ng = kzalloc(generic_size, GFP_KERNEL); 75 if (ng) 76 ng->len = INITIAL_NET_GEN_PTRS; 77 78 return ng; 79 } 80 81 #ifdef CONFIG_NET_NS 82 static struct kmem_cache *net_cachep; 83 static struct workqueue_struct *netns_wq; 84 85 static struct net *net_alloc(void) 86 { 87 struct net *net = NULL; 88 struct net_generic *ng; 89 90 ng = net_alloc_generic(); 91 if (!ng) 92 goto out; 93 94 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL); 95 if (!net) 96 goto out_free; 97 98 rcu_assign_pointer(net->gen, ng); 99 out: 100 return net; 101 102 out_free: 103 kfree(ng); 104 goto out; 105 } 106 107 static void net_free(struct net *net) 108 { 109 #ifdef NETNS_REFCNT_DEBUG 110 if (unlikely(atomic_read(&net->use_count) != 0)) { 111 printk(KERN_EMERG "network namespace not free! Usage: %d\n", 112 atomic_read(&net->use_count)); 113 return; 114 } 115 #endif 116 kfree(net->gen); 117 kmem_cache_free(net_cachep, net); 118 } 119 120 static struct net *net_create(void) 121 { 122 struct net *net; 123 int rv; 124 125 net = net_alloc(); 126 if (!net) 127 return ERR_PTR(-ENOMEM); 128 mutex_lock(&net_mutex); 129 rv = setup_net(net); 130 if (rv == 0) { 131 rtnl_lock(); 132 list_add_tail_rcu(&net->list, &net_namespace_list); 133 rtnl_unlock(); 134 } 135 mutex_unlock(&net_mutex); 136 if (rv < 0) { 137 net_free(net); 138 return ERR_PTR(rv); 139 } 140 return net; 141 } 142 143 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 144 { 145 if (!(flags & CLONE_NEWNET)) 146 return get_net(old_net); 147 return net_create(); 148 } 149 150 static void cleanup_net(struct work_struct *work) 151 { 152 struct pernet_operations *ops; 153 struct net *net; 154 155 net = container_of(work, struct net, work); 156 157 mutex_lock(&net_mutex); 158 159 /* Don't let anyone else find us. */ 160 rtnl_lock(); 161 list_del_rcu(&net->list); 162 rtnl_unlock(); 163 164 /* 165 * Another CPU might be rcu-iterating the list, wait for it. 166 * This needs to be before calling the exit() notifiers, so 167 * the rcu_barrier() below isn't sufficient alone. 168 */ 169 synchronize_rcu(); 170 171 /* Run all of the network namespace exit methods */ 172 list_for_each_entry_reverse(ops, &pernet_list, list) { 173 if (ops->exit) 174 ops->exit(net); 175 } 176 177 mutex_unlock(&net_mutex); 178 179 /* Ensure there are no outstanding rcu callbacks using this 180 * network namespace. 181 */ 182 rcu_barrier(); 183 184 /* Finally it is safe to free my network namespace structure */ 185 net_free(net); 186 } 187 188 void __put_net(struct net *net) 189 { 190 /* Cleanup the network namespace in process context */ 191 INIT_WORK(&net->work, cleanup_net); 192 queue_work(netns_wq, &net->work); 193 } 194 EXPORT_SYMBOL_GPL(__put_net); 195 196 #else 197 struct net *copy_net_ns(unsigned long flags, struct net *old_net) 198 { 199 if (flags & CLONE_NEWNET) 200 return ERR_PTR(-EINVAL); 201 return old_net; 202 } 203 #endif 204 205 struct net *get_net_ns_by_pid(pid_t pid) 206 { 207 struct task_struct *tsk; 208 struct net *net; 209 210 /* Lookup the network namespace */ 211 net = ERR_PTR(-ESRCH); 212 rcu_read_lock(); 213 tsk = find_task_by_vpid(pid); 214 if (tsk) { 215 struct nsproxy *nsproxy; 216 nsproxy = task_nsproxy(tsk); 217 if (nsproxy) 218 net = get_net(nsproxy->net_ns); 219 } 220 rcu_read_unlock(); 221 return net; 222 } 223 EXPORT_SYMBOL_GPL(get_net_ns_by_pid); 224 225 static int __init net_ns_init(void) 226 { 227 struct net_generic *ng; 228 229 #ifdef CONFIG_NET_NS 230 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net), 231 SMP_CACHE_BYTES, 232 SLAB_PANIC, NULL); 233 234 /* Create workqueue for cleanup */ 235 netns_wq = create_singlethread_workqueue("netns"); 236 if (!netns_wq) 237 panic("Could not create netns workq"); 238 #endif 239 240 ng = net_alloc_generic(); 241 if (!ng) 242 panic("Could not allocate generic netns"); 243 244 rcu_assign_pointer(init_net.gen, ng); 245 246 mutex_lock(&net_mutex); 247 if (setup_net(&init_net)) 248 panic("Could not setup the initial network namespace"); 249 250 rtnl_lock(); 251 list_add_tail_rcu(&init_net.list, &net_namespace_list); 252 rtnl_unlock(); 253 254 mutex_unlock(&net_mutex); 255 256 return 0; 257 } 258 259 pure_initcall(net_ns_init); 260 261 #ifdef CONFIG_NET_NS 262 static int register_pernet_operations(struct list_head *list, 263 struct pernet_operations *ops) 264 { 265 struct net *net, *undo_net; 266 int error; 267 268 list_add_tail(&ops->list, list); 269 if (ops->init) { 270 for_each_net(net) { 271 error = ops->init(net); 272 if (error) 273 goto out_undo; 274 } 275 } 276 return 0; 277 278 out_undo: 279 /* If I have an error cleanup all namespaces I initialized */ 280 list_del(&ops->list); 281 if (ops->exit) { 282 for_each_net(undo_net) { 283 if (undo_net == net) 284 goto undone; 285 ops->exit(undo_net); 286 } 287 } 288 undone: 289 return error; 290 } 291 292 static void unregister_pernet_operations(struct pernet_operations *ops) 293 { 294 struct net *net; 295 296 list_del(&ops->list); 297 if (ops->exit) 298 for_each_net(net) 299 ops->exit(net); 300 } 301 302 #else 303 304 static int register_pernet_operations(struct list_head *list, 305 struct pernet_operations *ops) 306 { 307 if (ops->init == NULL) 308 return 0; 309 return ops->init(&init_net); 310 } 311 312 static void unregister_pernet_operations(struct pernet_operations *ops) 313 { 314 if (ops->exit) 315 ops->exit(&init_net); 316 } 317 #endif 318 319 static DEFINE_IDA(net_generic_ids); 320 321 /** 322 * register_pernet_subsys - register a network namespace subsystem 323 * @ops: pernet operations structure for the subsystem 324 * 325 * Register a subsystem which has init and exit functions 326 * that are called when network namespaces are created and 327 * destroyed respectively. 328 * 329 * When registered all network namespace init functions are 330 * called for every existing network namespace. Allowing kernel 331 * modules to have a race free view of the set of network namespaces. 332 * 333 * When a new network namespace is created all of the init 334 * methods are called in the order in which they were registered. 335 * 336 * When a network namespace is destroyed all of the exit methods 337 * are called in the reverse of the order with which they were 338 * registered. 339 */ 340 int register_pernet_subsys(struct pernet_operations *ops) 341 { 342 int error; 343 mutex_lock(&net_mutex); 344 error = register_pernet_operations(first_device, ops); 345 mutex_unlock(&net_mutex); 346 return error; 347 } 348 EXPORT_SYMBOL_GPL(register_pernet_subsys); 349 350 /** 351 * unregister_pernet_subsys - unregister a network namespace subsystem 352 * @ops: pernet operations structure to manipulate 353 * 354 * Remove the pernet operations structure from the list to be 355 * used when network namespaces are created or destroyed. In 356 * addition run the exit method for all existing network 357 * namespaces. 358 */ 359 void unregister_pernet_subsys(struct pernet_operations *module) 360 { 361 mutex_lock(&net_mutex); 362 unregister_pernet_operations(module); 363 mutex_unlock(&net_mutex); 364 } 365 EXPORT_SYMBOL_GPL(unregister_pernet_subsys); 366 367 int register_pernet_gen_subsys(int *id, struct pernet_operations *ops) 368 { 369 int rv; 370 371 mutex_lock(&net_mutex); 372 again: 373 rv = ida_get_new_above(&net_generic_ids, 1, id); 374 if (rv < 0) { 375 if (rv == -EAGAIN) { 376 ida_pre_get(&net_generic_ids, GFP_KERNEL); 377 goto again; 378 } 379 goto out; 380 } 381 rv = register_pernet_operations(first_device, ops); 382 if (rv < 0) 383 ida_remove(&net_generic_ids, *id); 384 out: 385 mutex_unlock(&net_mutex); 386 return rv; 387 } 388 EXPORT_SYMBOL_GPL(register_pernet_gen_subsys); 389 390 void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops) 391 { 392 mutex_lock(&net_mutex); 393 unregister_pernet_operations(ops); 394 ida_remove(&net_generic_ids, id); 395 mutex_unlock(&net_mutex); 396 } 397 EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys); 398 399 /** 400 * register_pernet_device - register a network namespace device 401 * @ops: pernet operations structure for the subsystem 402 * 403 * Register a device which has init and exit functions 404 * that are called when network namespaces are created and 405 * destroyed respectively. 406 * 407 * When registered all network namespace init functions are 408 * called for every existing network namespace. Allowing kernel 409 * modules to have a race free view of the set of network namespaces. 410 * 411 * When a new network namespace is created all of the init 412 * methods are called in the order in which they were registered. 413 * 414 * When a network namespace is destroyed all of the exit methods 415 * are called in the reverse of the order with which they were 416 * registered. 417 */ 418 int register_pernet_device(struct pernet_operations *ops) 419 { 420 int error; 421 mutex_lock(&net_mutex); 422 error = register_pernet_operations(&pernet_list, ops); 423 if (!error && (first_device == &pernet_list)) 424 first_device = &ops->list; 425 mutex_unlock(&net_mutex); 426 return error; 427 } 428 EXPORT_SYMBOL_GPL(register_pernet_device); 429 430 int register_pernet_gen_device(int *id, struct pernet_operations *ops) 431 { 432 int error; 433 mutex_lock(&net_mutex); 434 again: 435 error = ida_get_new_above(&net_generic_ids, 1, id); 436 if (error) { 437 if (error == -EAGAIN) { 438 ida_pre_get(&net_generic_ids, GFP_KERNEL); 439 goto again; 440 } 441 goto out; 442 } 443 error = register_pernet_operations(&pernet_list, ops); 444 if (error) 445 ida_remove(&net_generic_ids, *id); 446 else if (first_device == &pernet_list) 447 first_device = &ops->list; 448 out: 449 mutex_unlock(&net_mutex); 450 return error; 451 } 452 EXPORT_SYMBOL_GPL(register_pernet_gen_device); 453 454 /** 455 * unregister_pernet_device - unregister a network namespace netdevice 456 * @ops: pernet operations structure to manipulate 457 * 458 * Remove the pernet operations structure from the list to be 459 * used when network namespaces are created or destroyed. In 460 * addition run the exit method for all existing network 461 * namespaces. 462 */ 463 void unregister_pernet_device(struct pernet_operations *ops) 464 { 465 mutex_lock(&net_mutex); 466 if (&ops->list == first_device) 467 first_device = first_device->next; 468 unregister_pernet_operations(ops); 469 mutex_unlock(&net_mutex); 470 } 471 EXPORT_SYMBOL_GPL(unregister_pernet_device); 472 473 void unregister_pernet_gen_device(int id, struct pernet_operations *ops) 474 { 475 mutex_lock(&net_mutex); 476 if (&ops->list == first_device) 477 first_device = first_device->next; 478 unregister_pernet_operations(ops); 479 ida_remove(&net_generic_ids, id); 480 mutex_unlock(&net_mutex); 481 } 482 EXPORT_SYMBOL_GPL(unregister_pernet_gen_device); 483 484 static void net_generic_release(struct rcu_head *rcu) 485 { 486 struct net_generic *ng; 487 488 ng = container_of(rcu, struct net_generic, rcu); 489 kfree(ng); 490 } 491 492 int net_assign_generic(struct net *net, int id, void *data) 493 { 494 struct net_generic *ng, *old_ng; 495 496 BUG_ON(!mutex_is_locked(&net_mutex)); 497 BUG_ON(id == 0); 498 499 ng = old_ng = net->gen; 500 if (old_ng->len >= id) 501 goto assign; 502 503 ng = kzalloc(sizeof(struct net_generic) + 504 id * sizeof(void *), GFP_KERNEL); 505 if (ng == NULL) 506 return -ENOMEM; 507 508 /* 509 * Some synchronisation notes: 510 * 511 * The net_generic explores the net->gen array inside rcu 512 * read section. Besides once set the net->gen->ptr[x] 513 * pointer never changes (see rules in netns/generic.h). 514 * 515 * That said, we simply duplicate this array and schedule 516 * the old copy for kfree after a grace period. 517 */ 518 519 ng->len = id; 520 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*)); 521 522 rcu_assign_pointer(net->gen, ng); 523 call_rcu(&old_ng->rcu, net_generic_release); 524 assign: 525 ng->ptr[id - 1] = data; 526 return 0; 527 } 528 EXPORT_SYMBOL_GPL(net_assign_generic); 529