xref: /openbmc/linux/net/core/net_namespace.c (revision 09ad9bc7)
1 #include <linux/workqueue.h>
2 #include <linux/rtnetlink.h>
3 #include <linux/cache.h>
4 #include <linux/slab.h>
5 #include <linux/list.h>
6 #include <linux/delay.h>
7 #include <linux/sched.h>
8 #include <linux/idr.h>
9 #include <linux/rculist.h>
10 #include <linux/nsproxy.h>
11 #include <net/net_namespace.h>
12 #include <net/netns/generic.h>
13 
14 /*
15  *	Our network namespace constructor/destructor lists
16  */
17 
18 static LIST_HEAD(pernet_list);
19 static struct list_head *first_device = &pernet_list;
20 static DEFINE_MUTEX(net_mutex);
21 
22 LIST_HEAD(net_namespace_list);
23 EXPORT_SYMBOL_GPL(net_namespace_list);
24 
25 struct net init_net;
26 EXPORT_SYMBOL(init_net);
27 
28 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
29 
30 /*
31  * setup_net runs the initializers for the network namespace object.
32  */
33 static __net_init int setup_net(struct net *net)
34 {
35 	/* Must be called with net_mutex held */
36 	struct pernet_operations *ops;
37 	int error = 0;
38 
39 	atomic_set(&net->count, 1);
40 
41 #ifdef NETNS_REFCNT_DEBUG
42 	atomic_set(&net->use_count, 0);
43 #endif
44 
45 	list_for_each_entry(ops, &pernet_list, list) {
46 		if (ops->init) {
47 			error = ops->init(net);
48 			if (error < 0)
49 				goto out_undo;
50 		}
51 	}
52 out:
53 	return error;
54 
55 out_undo:
56 	/* Walk through the list backwards calling the exit functions
57 	 * for the pernet modules whose init functions did not fail.
58 	 */
59 	list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
60 		if (ops->exit)
61 			ops->exit(net);
62 	}
63 
64 	rcu_barrier();
65 	goto out;
66 }
67 
68 static struct net_generic *net_alloc_generic(void)
69 {
70 	struct net_generic *ng;
71 	size_t generic_size = sizeof(struct net_generic) +
72 		INITIAL_NET_GEN_PTRS * sizeof(void *);
73 
74 	ng = kzalloc(generic_size, GFP_KERNEL);
75 	if (ng)
76 		ng->len = INITIAL_NET_GEN_PTRS;
77 
78 	return ng;
79 }
80 
81 #ifdef CONFIG_NET_NS
82 static struct kmem_cache *net_cachep;
83 static struct workqueue_struct *netns_wq;
84 
85 static struct net *net_alloc(void)
86 {
87 	struct net *net = NULL;
88 	struct net_generic *ng;
89 
90 	ng = net_alloc_generic();
91 	if (!ng)
92 		goto out;
93 
94 	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
95 	if (!net)
96 		goto out_free;
97 
98 	rcu_assign_pointer(net->gen, ng);
99 out:
100 	return net;
101 
102 out_free:
103 	kfree(ng);
104 	goto out;
105 }
106 
107 static void net_free(struct net *net)
108 {
109 #ifdef NETNS_REFCNT_DEBUG
110 	if (unlikely(atomic_read(&net->use_count) != 0)) {
111 		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
112 			atomic_read(&net->use_count));
113 		return;
114 	}
115 #endif
116 	kfree(net->gen);
117 	kmem_cache_free(net_cachep, net);
118 }
119 
120 static struct net *net_create(void)
121 {
122 	struct net *net;
123 	int rv;
124 
125 	net = net_alloc();
126 	if (!net)
127 		return ERR_PTR(-ENOMEM);
128 	mutex_lock(&net_mutex);
129 	rv = setup_net(net);
130 	if (rv == 0) {
131 		rtnl_lock();
132 		list_add_tail_rcu(&net->list, &net_namespace_list);
133 		rtnl_unlock();
134 	}
135 	mutex_unlock(&net_mutex);
136 	if (rv < 0) {
137 		net_free(net);
138 		return ERR_PTR(rv);
139 	}
140 	return net;
141 }
142 
143 struct net *copy_net_ns(unsigned long flags, struct net *old_net)
144 {
145 	if (!(flags & CLONE_NEWNET))
146 		return get_net(old_net);
147 	return net_create();
148 }
149 
150 static void cleanup_net(struct work_struct *work)
151 {
152 	struct pernet_operations *ops;
153 	struct net *net;
154 
155 	net = container_of(work, struct net, work);
156 
157 	mutex_lock(&net_mutex);
158 
159 	/* Don't let anyone else find us. */
160 	rtnl_lock();
161 	list_del_rcu(&net->list);
162 	rtnl_unlock();
163 
164 	/*
165 	 * Another CPU might be rcu-iterating the list, wait for it.
166 	 * This needs to be before calling the exit() notifiers, so
167 	 * the rcu_barrier() below isn't sufficient alone.
168 	 */
169 	synchronize_rcu();
170 
171 	/* Run all of the network namespace exit methods */
172 	list_for_each_entry_reverse(ops, &pernet_list, list) {
173 		if (ops->exit)
174 			ops->exit(net);
175 	}
176 
177 	mutex_unlock(&net_mutex);
178 
179 	/* Ensure there are no outstanding rcu callbacks using this
180 	 * network namespace.
181 	 */
182 	rcu_barrier();
183 
184 	/* Finally it is safe to free my network namespace structure */
185 	net_free(net);
186 }
187 
188 void __put_net(struct net *net)
189 {
190 	/* Cleanup the network namespace in process context */
191 	INIT_WORK(&net->work, cleanup_net);
192 	queue_work(netns_wq, &net->work);
193 }
194 EXPORT_SYMBOL_GPL(__put_net);
195 
196 #else
197 struct net *copy_net_ns(unsigned long flags, struct net *old_net)
198 {
199 	if (flags & CLONE_NEWNET)
200 		return ERR_PTR(-EINVAL);
201 	return old_net;
202 }
203 #endif
204 
205 struct net *get_net_ns_by_pid(pid_t pid)
206 {
207 	struct task_struct *tsk;
208 	struct net *net;
209 
210 	/* Lookup the network namespace */
211 	net = ERR_PTR(-ESRCH);
212 	rcu_read_lock();
213 	tsk = find_task_by_vpid(pid);
214 	if (tsk) {
215 		struct nsproxy *nsproxy;
216 		nsproxy = task_nsproxy(tsk);
217 		if (nsproxy)
218 			net = get_net(nsproxy->net_ns);
219 	}
220 	rcu_read_unlock();
221 	return net;
222 }
223 EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
224 
225 static int __init net_ns_init(void)
226 {
227 	struct net_generic *ng;
228 
229 #ifdef CONFIG_NET_NS
230 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
231 					SMP_CACHE_BYTES,
232 					SLAB_PANIC, NULL);
233 
234 	/* Create workqueue for cleanup */
235 	netns_wq = create_singlethread_workqueue("netns");
236 	if (!netns_wq)
237 		panic("Could not create netns workq");
238 #endif
239 
240 	ng = net_alloc_generic();
241 	if (!ng)
242 		panic("Could not allocate generic netns");
243 
244 	rcu_assign_pointer(init_net.gen, ng);
245 
246 	mutex_lock(&net_mutex);
247 	if (setup_net(&init_net))
248 		panic("Could not setup the initial network namespace");
249 
250 	rtnl_lock();
251 	list_add_tail_rcu(&init_net.list, &net_namespace_list);
252 	rtnl_unlock();
253 
254 	mutex_unlock(&net_mutex);
255 
256 	return 0;
257 }
258 
259 pure_initcall(net_ns_init);
260 
261 #ifdef CONFIG_NET_NS
262 static int register_pernet_operations(struct list_head *list,
263 				      struct pernet_operations *ops)
264 {
265 	struct net *net, *undo_net;
266 	int error;
267 
268 	list_add_tail(&ops->list, list);
269 	if (ops->init) {
270 		for_each_net(net) {
271 			error = ops->init(net);
272 			if (error)
273 				goto out_undo;
274 		}
275 	}
276 	return 0;
277 
278 out_undo:
279 	/* If I have an error cleanup all namespaces I initialized */
280 	list_del(&ops->list);
281 	if (ops->exit) {
282 		for_each_net(undo_net) {
283 			if (net_eq(undo_net, net))
284 				goto undone;
285 			ops->exit(undo_net);
286 		}
287 	}
288 undone:
289 	return error;
290 }
291 
292 static void unregister_pernet_operations(struct pernet_operations *ops)
293 {
294 	struct net *net;
295 
296 	list_del(&ops->list);
297 	if (ops->exit)
298 		for_each_net(net)
299 			ops->exit(net);
300 }
301 
302 #else
303 
304 static int register_pernet_operations(struct list_head *list,
305 				      struct pernet_operations *ops)
306 {
307 	if (ops->init == NULL)
308 		return 0;
309 	return ops->init(&init_net);
310 }
311 
312 static void unregister_pernet_operations(struct pernet_operations *ops)
313 {
314 	if (ops->exit)
315 		ops->exit(&init_net);
316 }
317 #endif
318 
319 static DEFINE_IDA(net_generic_ids);
320 
321 /**
322  *      register_pernet_subsys - register a network namespace subsystem
323  *	@ops:  pernet operations structure for the subsystem
324  *
325  *	Register a subsystem which has init and exit functions
326  *	that are called when network namespaces are created and
327  *	destroyed respectively.
328  *
329  *	When registered all network namespace init functions are
330  *	called for every existing network namespace.  Allowing kernel
331  *	modules to have a race free view of the set of network namespaces.
332  *
333  *	When a new network namespace is created all of the init
334  *	methods are called in the order in which they were registered.
335  *
336  *	When a network namespace is destroyed all of the exit methods
337  *	are called in the reverse of the order with which they were
338  *	registered.
339  */
340 int register_pernet_subsys(struct pernet_operations *ops)
341 {
342 	int error;
343 	mutex_lock(&net_mutex);
344 	error =  register_pernet_operations(first_device, ops);
345 	mutex_unlock(&net_mutex);
346 	return error;
347 }
348 EXPORT_SYMBOL_GPL(register_pernet_subsys);
349 
350 /**
351  *      unregister_pernet_subsys - unregister a network namespace subsystem
352  *	@ops: pernet operations structure to manipulate
353  *
354  *	Remove the pernet operations structure from the list to be
355  *	used when network namespaces are created or destroyed.  In
356  *	addition run the exit method for all existing network
357  *	namespaces.
358  */
359 void unregister_pernet_subsys(struct pernet_operations *module)
360 {
361 	mutex_lock(&net_mutex);
362 	unregister_pernet_operations(module);
363 	mutex_unlock(&net_mutex);
364 }
365 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
366 
367 int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
368 {
369 	int rv;
370 
371 	mutex_lock(&net_mutex);
372 again:
373 	rv = ida_get_new_above(&net_generic_ids, 1, id);
374 	if (rv < 0) {
375 		if (rv == -EAGAIN) {
376 			ida_pre_get(&net_generic_ids, GFP_KERNEL);
377 			goto again;
378 		}
379 		goto out;
380 	}
381 	rv = register_pernet_operations(first_device, ops);
382 	if (rv < 0)
383 		ida_remove(&net_generic_ids, *id);
384 out:
385 	mutex_unlock(&net_mutex);
386 	return rv;
387 }
388 EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
389 
390 void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
391 {
392 	mutex_lock(&net_mutex);
393 	unregister_pernet_operations(ops);
394 	ida_remove(&net_generic_ids, id);
395 	mutex_unlock(&net_mutex);
396 }
397 EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
398 
399 /**
400  *      register_pernet_device - register a network namespace device
401  *	@ops:  pernet operations structure for the subsystem
402  *
403  *	Register a device which has init and exit functions
404  *	that are called when network namespaces are created and
405  *	destroyed respectively.
406  *
407  *	When registered all network namespace init functions are
408  *	called for every existing network namespace.  Allowing kernel
409  *	modules to have a race free view of the set of network namespaces.
410  *
411  *	When a new network namespace is created all of the init
412  *	methods are called in the order in which they were registered.
413  *
414  *	When a network namespace is destroyed all of the exit methods
415  *	are called in the reverse of the order with which they were
416  *	registered.
417  */
418 int register_pernet_device(struct pernet_operations *ops)
419 {
420 	int error;
421 	mutex_lock(&net_mutex);
422 	error = register_pernet_operations(&pernet_list, ops);
423 	if (!error && (first_device == &pernet_list))
424 		first_device = &ops->list;
425 	mutex_unlock(&net_mutex);
426 	return error;
427 }
428 EXPORT_SYMBOL_GPL(register_pernet_device);
429 
430 int register_pernet_gen_device(int *id, struct pernet_operations *ops)
431 {
432 	int error;
433 	mutex_lock(&net_mutex);
434 again:
435 	error = ida_get_new_above(&net_generic_ids, 1, id);
436 	if (error) {
437 		if (error == -EAGAIN) {
438 			ida_pre_get(&net_generic_ids, GFP_KERNEL);
439 			goto again;
440 		}
441 		goto out;
442 	}
443 	error = register_pernet_operations(&pernet_list, ops);
444 	if (error)
445 		ida_remove(&net_generic_ids, *id);
446 	else if (first_device == &pernet_list)
447 		first_device = &ops->list;
448 out:
449 	mutex_unlock(&net_mutex);
450 	return error;
451 }
452 EXPORT_SYMBOL_GPL(register_pernet_gen_device);
453 
454 /**
455  *      unregister_pernet_device - unregister a network namespace netdevice
456  *	@ops: pernet operations structure to manipulate
457  *
458  *	Remove the pernet operations structure from the list to be
459  *	used when network namespaces are created or destroyed.  In
460  *	addition run the exit method for all existing network
461  *	namespaces.
462  */
463 void unregister_pernet_device(struct pernet_operations *ops)
464 {
465 	mutex_lock(&net_mutex);
466 	if (&ops->list == first_device)
467 		first_device = first_device->next;
468 	unregister_pernet_operations(ops);
469 	mutex_unlock(&net_mutex);
470 }
471 EXPORT_SYMBOL_GPL(unregister_pernet_device);
472 
473 void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
474 {
475 	mutex_lock(&net_mutex);
476 	if (&ops->list == first_device)
477 		first_device = first_device->next;
478 	unregister_pernet_operations(ops);
479 	ida_remove(&net_generic_ids, id);
480 	mutex_unlock(&net_mutex);
481 }
482 EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
483 
484 static void net_generic_release(struct rcu_head *rcu)
485 {
486 	struct net_generic *ng;
487 
488 	ng = container_of(rcu, struct net_generic, rcu);
489 	kfree(ng);
490 }
491 
492 int net_assign_generic(struct net *net, int id, void *data)
493 {
494 	struct net_generic *ng, *old_ng;
495 
496 	BUG_ON(!mutex_is_locked(&net_mutex));
497 	BUG_ON(id == 0);
498 
499 	ng = old_ng = net->gen;
500 	if (old_ng->len >= id)
501 		goto assign;
502 
503 	ng = kzalloc(sizeof(struct net_generic) +
504 			id * sizeof(void *), GFP_KERNEL);
505 	if (ng == NULL)
506 		return -ENOMEM;
507 
508 	/*
509 	 * Some synchronisation notes:
510 	 *
511 	 * The net_generic explores the net->gen array inside rcu
512 	 * read section. Besides once set the net->gen->ptr[x]
513 	 * pointer never changes (see rules in netns/generic.h).
514 	 *
515 	 * That said, we simply duplicate this array and schedule
516 	 * the old copy for kfree after a grace period.
517 	 */
518 
519 	ng->len = id;
520 	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
521 
522 	rcu_assign_pointer(net->gen, ng);
523 	call_rcu(&old_ng->rcu, net_generic_release);
524 assign:
525 	ng->ptr[id - 1] = data;
526 	return 0;
527 }
528 EXPORT_SYMBOL_GPL(net_assign_generic);
529