xref: /openbmc/linux/net/core/net_namespace.c (revision 11a28d37)
1 #include <linux/workqueue.h>
2 #include <linux/rtnetlink.h>
3 #include <linux/cache.h>
4 #include <linux/slab.h>
5 #include <linux/list.h>
6 #include <linux/delay.h>
7 #include <linux/sched.h>
8 #include <linux/idr.h>
9 #include <linux/rculist.h>
10 #include <net/net_namespace.h>
11 #include <net/netns/generic.h>
12 
13 /*
14  *	Our network namespace constructor/destructor lists
15  */
16 
17 static LIST_HEAD(pernet_list);
18 static struct list_head *first_device = &pernet_list;
19 static DEFINE_MUTEX(net_mutex);
20 
21 LIST_HEAD(net_namespace_list);
22 EXPORT_SYMBOL_GPL(net_namespace_list);
23 
24 struct net init_net;
25 EXPORT_SYMBOL(init_net);
26 
27 #define INITIAL_NET_GEN_PTRS	13 /* +1 for len +2 for rcu_head */
28 
29 /*
30  * setup_net runs the initializers for the network namespace object.
31  */
32 static __net_init int setup_net(struct net *net)
33 {
34 	/* Must be called with net_mutex held */
35 	struct pernet_operations *ops;
36 	int error = 0;
37 
38 	atomic_set(&net->count, 1);
39 
40 #ifdef NETNS_REFCNT_DEBUG
41 	atomic_set(&net->use_count, 0);
42 #endif
43 
44 	list_for_each_entry(ops, &pernet_list, list) {
45 		if (ops->init) {
46 			error = ops->init(net);
47 			if (error < 0)
48 				goto out_undo;
49 		}
50 	}
51 out:
52 	return error;
53 
54 out_undo:
55 	/* Walk through the list backwards calling the exit functions
56 	 * for the pernet modules whose init functions did not fail.
57 	 */
58 	list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
59 		if (ops->exit)
60 			ops->exit(net);
61 	}
62 
63 	rcu_barrier();
64 	goto out;
65 }
66 
67 static struct net_generic *net_alloc_generic(void)
68 {
69 	struct net_generic *ng;
70 	size_t generic_size = sizeof(struct net_generic) +
71 		INITIAL_NET_GEN_PTRS * sizeof(void *);
72 
73 	ng = kzalloc(generic_size, GFP_KERNEL);
74 	if (ng)
75 		ng->len = INITIAL_NET_GEN_PTRS;
76 
77 	return ng;
78 }
79 
80 #ifdef CONFIG_NET_NS
81 static struct kmem_cache *net_cachep;
82 static struct workqueue_struct *netns_wq;
83 
84 static struct net *net_alloc(void)
85 {
86 	struct net *net = NULL;
87 	struct net_generic *ng;
88 
89 	ng = net_alloc_generic();
90 	if (!ng)
91 		goto out;
92 
93 	net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
94 	if (!net)
95 		goto out_free;
96 
97 	rcu_assign_pointer(net->gen, ng);
98 out:
99 	return net;
100 
101 out_free:
102 	kfree(ng);
103 	goto out;
104 }
105 
106 static void net_free(struct net *net)
107 {
108 #ifdef NETNS_REFCNT_DEBUG
109 	if (unlikely(atomic_read(&net->use_count) != 0)) {
110 		printk(KERN_EMERG "network namespace not free! Usage: %d\n",
111 			atomic_read(&net->use_count));
112 		return;
113 	}
114 #endif
115 	kfree(net->gen);
116 	kmem_cache_free(net_cachep, net);
117 }
118 
119 static struct net *net_create(void)
120 {
121 	struct net *net;
122 	int rv;
123 
124 	net = net_alloc();
125 	if (!net)
126 		return ERR_PTR(-ENOMEM);
127 	mutex_lock(&net_mutex);
128 	rv = setup_net(net);
129 	if (rv == 0) {
130 		rtnl_lock();
131 		list_add_tail_rcu(&net->list, &net_namespace_list);
132 		rtnl_unlock();
133 	}
134 	mutex_unlock(&net_mutex);
135 	if (rv < 0) {
136 		net_free(net);
137 		return ERR_PTR(rv);
138 	}
139 	return net;
140 }
141 
142 struct net *copy_net_ns(unsigned long flags, struct net *old_net)
143 {
144 	if (!(flags & CLONE_NEWNET))
145 		return get_net(old_net);
146 	return net_create();
147 }
148 
149 static void cleanup_net(struct work_struct *work)
150 {
151 	struct pernet_operations *ops;
152 	struct net *net;
153 
154 	net = container_of(work, struct net, work);
155 
156 	mutex_lock(&net_mutex);
157 
158 	/* Don't let anyone else find us. */
159 	rtnl_lock();
160 	list_del_rcu(&net->list);
161 	rtnl_unlock();
162 
163 	/*
164 	 * Another CPU might be rcu-iterating the list, wait for it.
165 	 * This needs to be before calling the exit() notifiers, so
166 	 * the rcu_barrier() below isn't sufficient alone.
167 	 */
168 	synchronize_rcu();
169 
170 	/* Run all of the network namespace exit methods */
171 	list_for_each_entry_reverse(ops, &pernet_list, list) {
172 		if (ops->exit)
173 			ops->exit(net);
174 	}
175 
176 	mutex_unlock(&net_mutex);
177 
178 	/* Ensure there are no outstanding rcu callbacks using this
179 	 * network namespace.
180 	 */
181 	rcu_barrier();
182 
183 	/* Finally it is safe to free my network namespace structure */
184 	net_free(net);
185 }
186 
187 void __put_net(struct net *net)
188 {
189 	/* Cleanup the network namespace in process context */
190 	INIT_WORK(&net->work, cleanup_net);
191 	queue_work(netns_wq, &net->work);
192 }
193 EXPORT_SYMBOL_GPL(__put_net);
194 
195 #else
196 struct net *copy_net_ns(unsigned long flags, struct net *old_net)
197 {
198 	if (flags & CLONE_NEWNET)
199 		return ERR_PTR(-EINVAL);
200 	return old_net;
201 }
202 #endif
203 
204 static int __init net_ns_init(void)
205 {
206 	struct net_generic *ng;
207 
208 #ifdef CONFIG_NET_NS
209 	net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
210 					SMP_CACHE_BYTES,
211 					SLAB_PANIC, NULL);
212 
213 	/* Create workqueue for cleanup */
214 	netns_wq = create_singlethread_workqueue("netns");
215 	if (!netns_wq)
216 		panic("Could not create netns workq");
217 #endif
218 
219 	ng = net_alloc_generic();
220 	if (!ng)
221 		panic("Could not allocate generic netns");
222 
223 	rcu_assign_pointer(init_net.gen, ng);
224 
225 	mutex_lock(&net_mutex);
226 	if (setup_net(&init_net))
227 		panic("Could not setup the initial network namespace");
228 
229 	rtnl_lock();
230 	list_add_tail_rcu(&init_net.list, &net_namespace_list);
231 	rtnl_unlock();
232 
233 	mutex_unlock(&net_mutex);
234 
235 	return 0;
236 }
237 
238 pure_initcall(net_ns_init);
239 
240 #ifdef CONFIG_NET_NS
241 static int register_pernet_operations(struct list_head *list,
242 				      struct pernet_operations *ops)
243 {
244 	struct net *net, *undo_net;
245 	int error;
246 
247 	list_add_tail(&ops->list, list);
248 	if (ops->init) {
249 		for_each_net(net) {
250 			error = ops->init(net);
251 			if (error)
252 				goto out_undo;
253 		}
254 	}
255 	return 0;
256 
257 out_undo:
258 	/* If I have an error cleanup all namespaces I initialized */
259 	list_del(&ops->list);
260 	if (ops->exit) {
261 		for_each_net(undo_net) {
262 			if (undo_net == net)
263 				goto undone;
264 			ops->exit(undo_net);
265 		}
266 	}
267 undone:
268 	return error;
269 }
270 
271 static void unregister_pernet_operations(struct pernet_operations *ops)
272 {
273 	struct net *net;
274 
275 	list_del(&ops->list);
276 	if (ops->exit)
277 		for_each_net(net)
278 			ops->exit(net);
279 }
280 
281 #else
282 
283 static int register_pernet_operations(struct list_head *list,
284 				      struct pernet_operations *ops)
285 {
286 	if (ops->init == NULL)
287 		return 0;
288 	return ops->init(&init_net);
289 }
290 
291 static void unregister_pernet_operations(struct pernet_operations *ops)
292 {
293 	if (ops->exit)
294 		ops->exit(&init_net);
295 }
296 #endif
297 
298 static DEFINE_IDA(net_generic_ids);
299 
300 /**
301  *      register_pernet_subsys - register a network namespace subsystem
302  *	@ops:  pernet operations structure for the subsystem
303  *
304  *	Register a subsystem which has init and exit functions
305  *	that are called when network namespaces are created and
306  *	destroyed respectively.
307  *
308  *	When registered all network namespace init functions are
309  *	called for every existing network namespace.  Allowing kernel
310  *	modules to have a race free view of the set of network namespaces.
311  *
312  *	When a new network namespace is created all of the init
313  *	methods are called in the order in which they were registered.
314  *
315  *	When a network namespace is destroyed all of the exit methods
316  *	are called in the reverse of the order with which they were
317  *	registered.
318  */
319 int register_pernet_subsys(struct pernet_operations *ops)
320 {
321 	int error;
322 	mutex_lock(&net_mutex);
323 	error =  register_pernet_operations(first_device, ops);
324 	mutex_unlock(&net_mutex);
325 	return error;
326 }
327 EXPORT_SYMBOL_GPL(register_pernet_subsys);
328 
329 /**
330  *      unregister_pernet_subsys - unregister a network namespace subsystem
331  *	@ops: pernet operations structure to manipulate
332  *
333  *	Remove the pernet operations structure from the list to be
334  *	used when network namespaces are created or destroyed.  In
335  *	addition run the exit method for all existing network
336  *	namespaces.
337  */
338 void unregister_pernet_subsys(struct pernet_operations *module)
339 {
340 	mutex_lock(&net_mutex);
341 	unregister_pernet_operations(module);
342 	mutex_unlock(&net_mutex);
343 }
344 EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
345 
346 int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
347 {
348 	int rv;
349 
350 	mutex_lock(&net_mutex);
351 again:
352 	rv = ida_get_new_above(&net_generic_ids, 1, id);
353 	if (rv < 0) {
354 		if (rv == -EAGAIN) {
355 			ida_pre_get(&net_generic_ids, GFP_KERNEL);
356 			goto again;
357 		}
358 		goto out;
359 	}
360 	rv = register_pernet_operations(first_device, ops);
361 	if (rv < 0)
362 		ida_remove(&net_generic_ids, *id);
363 out:
364 	mutex_unlock(&net_mutex);
365 	return rv;
366 }
367 EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
368 
369 void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
370 {
371 	mutex_lock(&net_mutex);
372 	unregister_pernet_operations(ops);
373 	ida_remove(&net_generic_ids, id);
374 	mutex_unlock(&net_mutex);
375 }
376 EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
377 
378 /**
379  *      register_pernet_device - register a network namespace device
380  *	@ops:  pernet operations structure for the subsystem
381  *
382  *	Register a device which has init and exit functions
383  *	that are called when network namespaces are created and
384  *	destroyed respectively.
385  *
386  *	When registered all network namespace init functions are
387  *	called for every existing network namespace.  Allowing kernel
388  *	modules to have a race free view of the set of network namespaces.
389  *
390  *	When a new network namespace is created all of the init
391  *	methods are called in the order in which they were registered.
392  *
393  *	When a network namespace is destroyed all of the exit methods
394  *	are called in the reverse of the order with which they were
395  *	registered.
396  */
397 int register_pernet_device(struct pernet_operations *ops)
398 {
399 	int error;
400 	mutex_lock(&net_mutex);
401 	error = register_pernet_operations(&pernet_list, ops);
402 	if (!error && (first_device == &pernet_list))
403 		first_device = &ops->list;
404 	mutex_unlock(&net_mutex);
405 	return error;
406 }
407 EXPORT_SYMBOL_GPL(register_pernet_device);
408 
409 int register_pernet_gen_device(int *id, struct pernet_operations *ops)
410 {
411 	int error;
412 	mutex_lock(&net_mutex);
413 again:
414 	error = ida_get_new_above(&net_generic_ids, 1, id);
415 	if (error) {
416 		if (error == -EAGAIN) {
417 			ida_pre_get(&net_generic_ids, GFP_KERNEL);
418 			goto again;
419 		}
420 		goto out;
421 	}
422 	error = register_pernet_operations(&pernet_list, ops);
423 	if (error)
424 		ida_remove(&net_generic_ids, *id);
425 	else if (first_device == &pernet_list)
426 		first_device = &ops->list;
427 out:
428 	mutex_unlock(&net_mutex);
429 	return error;
430 }
431 EXPORT_SYMBOL_GPL(register_pernet_gen_device);
432 
433 /**
434  *      unregister_pernet_device - unregister a network namespace netdevice
435  *	@ops: pernet operations structure to manipulate
436  *
437  *	Remove the pernet operations structure from the list to be
438  *	used when network namespaces are created or destroyed.  In
439  *	addition run the exit method for all existing network
440  *	namespaces.
441  */
442 void unregister_pernet_device(struct pernet_operations *ops)
443 {
444 	mutex_lock(&net_mutex);
445 	if (&ops->list == first_device)
446 		first_device = first_device->next;
447 	unregister_pernet_operations(ops);
448 	mutex_unlock(&net_mutex);
449 }
450 EXPORT_SYMBOL_GPL(unregister_pernet_device);
451 
452 void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
453 {
454 	mutex_lock(&net_mutex);
455 	if (&ops->list == first_device)
456 		first_device = first_device->next;
457 	unregister_pernet_operations(ops);
458 	ida_remove(&net_generic_ids, id);
459 	mutex_unlock(&net_mutex);
460 }
461 EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
462 
463 static void net_generic_release(struct rcu_head *rcu)
464 {
465 	struct net_generic *ng;
466 
467 	ng = container_of(rcu, struct net_generic, rcu);
468 	kfree(ng);
469 }
470 
471 int net_assign_generic(struct net *net, int id, void *data)
472 {
473 	struct net_generic *ng, *old_ng;
474 
475 	BUG_ON(!mutex_is_locked(&net_mutex));
476 	BUG_ON(id == 0);
477 
478 	ng = old_ng = net->gen;
479 	if (old_ng->len >= id)
480 		goto assign;
481 
482 	ng = kzalloc(sizeof(struct net_generic) +
483 			id * sizeof(void *), GFP_KERNEL);
484 	if (ng == NULL)
485 		return -ENOMEM;
486 
487 	/*
488 	 * Some synchronisation notes:
489 	 *
490 	 * The net_generic explores the net->gen array inside rcu
491 	 * read section. Besides once set the net->gen->ptr[x]
492 	 * pointer never changes (see rules in netns/generic.h).
493 	 *
494 	 * That said, we simply duplicate this array and schedule
495 	 * the old copy for kfree after a grace period.
496 	 */
497 
498 	ng->len = id;
499 	memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
500 
501 	rcu_assign_pointer(net->gen, ng);
502 	call_rcu(&old_ng->rcu, net_generic_release);
503 assign:
504 	ng->ptr[id - 1] = data;
505 	return 0;
506 }
507 EXPORT_SYMBOL_GPL(net_assign_generic);
508