1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2e005d193SJoe Perches #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3e005d193SJoe Perches
45f256becSEric W. Biederman #include <linux/workqueue.h>
55f256becSEric W. Biederman #include <linux/rtnetlink.h>
65f256becSEric W. Biederman #include <linux/cache.h>
75f256becSEric W. Biederman #include <linux/slab.h>
85f256becSEric W. Biederman #include <linux/list.h>
95f256becSEric W. Biederman #include <linux/delay.h>
109dd776b6SEric W. Biederman #include <linux/sched.h>
11c93cf61fSPavel Emelyanov #include <linux/idr.h>
1211a28d37SJohannes Berg #include <linux/rculist.h>
1330ffee84SJohannes Berg #include <linux/nsproxy.h>
140bb80f24SDavid Howells #include <linux/fs.h>
150bb80f24SDavid Howells #include <linux/proc_ns.h>
16f0630529SEric W. Biederman #include <linux/file.h>
17bc3b2d7fSPaul Gortmaker #include <linux/export.h>
18038e7332SEric W. Biederman #include <linux/user_namespace.h>
190c7aecd4SNicolas Dichtel #include <linux/net_namespace.h>
20f719ff9bSIngo Molnar #include <linux/sched/task.h>
21fbdeaed4STyler Hicks #include <linux/uidgid.h>
2292acdc58SDaniel Borkmann #include <linux/cookie.h>
2338e12408SAl Viro #include <linux/proc_fs.h>
24f719ff9bSIngo Molnar
250c7aecd4SNicolas Dichtel #include <net/sock.h>
260c7aecd4SNicolas Dichtel #include <net/netlink.h>
275f256becSEric W. Biederman #include <net/net_namespace.h>
28dec827d1SPavel Emelyanov #include <net/netns/generic.h>
295f256becSEric W. Biederman
305f256becSEric W. Biederman /*
315f256becSEric W. Biederman * Our network namespace constructor/destructor lists
325f256becSEric W. Biederman */
335f256becSEric W. Biederman
345f256becSEric W. Biederman static LIST_HEAD(pernet_list);
355f256becSEric W. Biederman static struct list_head *first_device = &pernet_list;
365f256becSEric W. Biederman
375f256becSEric W. Biederman LIST_HEAD(net_namespace_list);
38b76a461fSAlexey Dobriyan EXPORT_SYMBOL_GPL(net_namespace_list);
395f256becSEric W. Biederman
40f0b07bb1SKirill Tkhai /* Protects net_namespace_list. Nests iside rtnl_lock() */
41f0b07bb1SKirill Tkhai DECLARE_RWSEM(net_rwsem);
42f0b07bb1SKirill Tkhai EXPORT_SYMBOL_GPL(net_rwsem);
43f0b07bb1SKirill Tkhai
449b242610SDavid Howells #ifdef CONFIG_KEYS
459b242610SDavid Howells static struct key_tag init_net_key_domain = { .usage = REFCOUNT_INIT(1) };
469b242610SDavid Howells #endif
479b242610SDavid Howells
489c1be193SEric Dumazet struct net init_net;
49ff4b9502SDenis V. Lunev EXPORT_SYMBOL(init_net);
505f256becSEric W. Biederman
51f8c46cb3SDmitry Torokhov static bool init_net_initialized;
521a57feb8SKirill Tkhai /*
534420bf21SKirill Tkhai * pernet_ops_rwsem: protects: pernet_list, net_generic_ids,
541a57feb8SKirill Tkhai * init_net_initialized and first_device pointer.
558518e9bbSKirill Tkhai * This is internal net namespace object. Please, don't use it
568518e9bbSKirill Tkhai * outside.
571a57feb8SKirill Tkhai */
584420bf21SKirill Tkhai DECLARE_RWSEM(pernet_ops_rwsem);
59554873e5SKirill Tkhai EXPORT_SYMBOL_GPL(pernet_ops_rwsem);
60f8c46cb3SDmitry Torokhov
616af2d5ffSAlexey Dobriyan #define MIN_PERNET_OPS_ID \
626af2d5ffSAlexey Dobriyan ((sizeof(struct net_generic) + sizeof(void *) - 1) / sizeof(void *))
636af2d5ffSAlexey Dobriyan
64dec827d1SPavel Emelyanov #define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
65dec827d1SPavel Emelyanov
66073862baSEric Dumazet static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
67073862baSEric Dumazet
6892acdc58SDaniel Borkmann DEFINE_COOKIE(net_cookie);
69f318903cSDaniel Borkmann
net_alloc_generic(void)70073862baSEric Dumazet static struct net_generic *net_alloc_generic(void)
71073862baSEric Dumazet {
72b6dbfd5bSThadeu Lima de Souza Cascardo unsigned int gen_ptrs = READ_ONCE(max_gen_ptrs);
73b6dbfd5bSThadeu Lima de Souza Cascardo unsigned int generic_size;
74073862baSEric Dumazet struct net_generic *ng;
75b6dbfd5bSThadeu Lima de Souza Cascardo
76b6dbfd5bSThadeu Lima de Souza Cascardo generic_size = offsetof(struct net_generic, ptr[gen_ptrs]);
77073862baSEric Dumazet
78073862baSEric Dumazet ng = kzalloc(generic_size, GFP_KERNEL);
79073862baSEric Dumazet if (ng)
80b6dbfd5bSThadeu Lima de Souza Cascardo ng->s.len = gen_ptrs;
81073862baSEric Dumazet
82073862baSEric Dumazet return ng;
83073862baSEric Dumazet }
84073862baSEric Dumazet
net_assign_generic(struct net * net,unsigned int id,void * data)85c7d03a00SAlexey Dobriyan static int net_assign_generic(struct net *net, unsigned int id, void *data)
8605fceb4aSJiri Pirko {
8705fceb4aSJiri Pirko struct net_generic *ng, *old_ng;
8805fceb4aSJiri Pirko
896af2d5ffSAlexey Dobriyan BUG_ON(id < MIN_PERNET_OPS_ID);
9005fceb4aSJiri Pirko
911c87733dSEric Dumazet old_ng = rcu_dereference_protected(net->gen,
924420bf21SKirill Tkhai lockdep_is_held(&pernet_ops_rwsem));
936af2d5ffSAlexey Dobriyan if (old_ng->s.len > id) {
946af2d5ffSAlexey Dobriyan old_ng->ptr[id] = data;
951a9a0592SAlexey Dobriyan return 0;
961a9a0592SAlexey Dobriyan }
9705fceb4aSJiri Pirko
98073862baSEric Dumazet ng = net_alloc_generic();
9941467d2fSYajun Deng if (!ng)
10005fceb4aSJiri Pirko return -ENOMEM;
10105fceb4aSJiri Pirko
10205fceb4aSJiri Pirko /*
10305fceb4aSJiri Pirko * Some synchronisation notes:
10405fceb4aSJiri Pirko *
10505fceb4aSJiri Pirko * The net_generic explores the net->gen array inside rcu
10605fceb4aSJiri Pirko * read section. Besides once set the net->gen->ptr[x]
10705fceb4aSJiri Pirko * pointer never changes (see rules in netns/generic.h).
10805fceb4aSJiri Pirko *
10905fceb4aSJiri Pirko * That said, we simply duplicate this array and schedule
11005fceb4aSJiri Pirko * the old copy for kfree after a grace period.
11105fceb4aSJiri Pirko */
11205fceb4aSJiri Pirko
1136af2d5ffSAlexey Dobriyan memcpy(&ng->ptr[MIN_PERNET_OPS_ID], &old_ng->ptr[MIN_PERNET_OPS_ID],
1146af2d5ffSAlexey Dobriyan (old_ng->s.len - MIN_PERNET_OPS_ID) * sizeof(void *));
1156af2d5ffSAlexey Dobriyan ng->ptr[id] = data;
11605fceb4aSJiri Pirko
11705fceb4aSJiri Pirko rcu_assign_pointer(net->gen, ng);
1189bfc7b99SAlexey Dobriyan kfree_rcu(old_ng, s.rcu);
11905fceb4aSJiri Pirko return 0;
12005fceb4aSJiri Pirko }
12105fceb4aSJiri Pirko
ops_init(const struct pernet_operations * ops,struct net * net)122f875bae0SEric W. Biederman static int ops_init(const struct pernet_operations *ops, struct net *net)
123f875bae0SEric W. Biederman {
124d266935aSZhengchao Shao struct net_generic *ng;
125b922934dSJulian Anastasov int err = -ENOMEM;
126b922934dSJulian Anastasov void *data = NULL;
127b922934dSJulian Anastasov
128f875bae0SEric W. Biederman if (ops->id && ops->size) {
129b922934dSJulian Anastasov data = kzalloc(ops->size, GFP_KERNEL);
130f875bae0SEric W. Biederman if (!data)
131b922934dSJulian Anastasov goto out;
132f875bae0SEric W. Biederman
133f875bae0SEric W. Biederman err = net_assign_generic(net, *ops->id, data);
134b922934dSJulian Anastasov if (err)
135b922934dSJulian Anastasov goto cleanup;
136f875bae0SEric W. Biederman }
137b922934dSJulian Anastasov err = 0;
138f875bae0SEric W. Biederman if (ops->init)
139b922934dSJulian Anastasov err = ops->init(net);
140b922934dSJulian Anastasov if (!err)
141f875bae0SEric W. Biederman return 0;
142b922934dSJulian Anastasov
143d266935aSZhengchao Shao if (ops->id && ops->size) {
144d266935aSZhengchao Shao ng = rcu_dereference_protected(net->gen,
145d266935aSZhengchao Shao lockdep_is_held(&pernet_ops_rwsem));
146d266935aSZhengchao Shao ng->ptr[*ops->id] = NULL;
147d266935aSZhengchao Shao }
148d266935aSZhengchao Shao
14971ab9c3eSPaolo Abeni cleanup:
150b922934dSJulian Anastasov kfree(data);
151b922934dSJulian Anastasov
152b922934dSJulian Anastasov out:
153b922934dSJulian Anastasov return err;
154f875bae0SEric W. Biederman }
155f875bae0SEric W. Biederman
ops_pre_exit_list(const struct pernet_operations * ops,struct list_head * net_exit_list)156d7d99872SEric Dumazet static void ops_pre_exit_list(const struct pernet_operations *ops,
157d7d99872SEric Dumazet struct list_head *net_exit_list)
158d7d99872SEric Dumazet {
159d7d99872SEric Dumazet struct net *net;
160d7d99872SEric Dumazet
161d7d99872SEric Dumazet if (ops->pre_exit) {
162d7d99872SEric Dumazet list_for_each_entry(net, net_exit_list, exit_list)
163d7d99872SEric Dumazet ops->pre_exit(net);
164d7d99872SEric Dumazet }
165d7d99872SEric Dumazet }
166d7d99872SEric Dumazet
ops_exit_list(const struct pernet_operations * ops,struct list_head * net_exit_list)16772ad937aSEric W. Biederman static void ops_exit_list(const struct pernet_operations *ops,
16872ad937aSEric W. Biederman struct list_head *net_exit_list)
16972ad937aSEric W. Biederman {
17072ad937aSEric W. Biederman struct net *net;
17172ad937aSEric W. Biederman if (ops->exit) {
1722836615aSEric Dumazet list_for_each_entry(net, net_exit_list, exit_list) {
17372ad937aSEric W. Biederman ops->exit(net);
1742836615aSEric Dumazet cond_resched();
1752836615aSEric Dumazet }
17672ad937aSEric W. Biederman }
17772ad937aSEric W. Biederman if (ops->exit_batch)
17872ad937aSEric W. Biederman ops->exit_batch(net_exit_list);
17972ad937aSEric W. Biederman }
18072ad937aSEric W. Biederman
ops_free_list(const struct pernet_operations * ops,struct list_head * net_exit_list)18172ad937aSEric W. Biederman static void ops_free_list(const struct pernet_operations *ops,
18272ad937aSEric W. Biederman struct list_head *net_exit_list)
18372ad937aSEric W. Biederman {
18472ad937aSEric W. Biederman struct net *net;
18572ad937aSEric W. Biederman if (ops->size && ops->id) {
18672ad937aSEric W. Biederman list_for_each_entry(net, net_exit_list, exit_list)
18741467d2fSYajun Deng kfree(net_generic(net, *ops->id));
18872ad937aSEric W. Biederman }
18972ad937aSEric W. Biederman }
19072ad937aSEric W. Biederman
19195f38411SNicolas Dichtel /* should be called with nsid_lock held */
alloc_netid(struct net * net,struct net * peer,int reqid)1920c7aecd4SNicolas Dichtel static int alloc_netid(struct net *net, struct net *peer, int reqid)
1930c7aecd4SNicolas Dichtel {
1943138dbf8SNicolas Dichtel int min = 0, max = 0;
1950c7aecd4SNicolas Dichtel
1960c7aecd4SNicolas Dichtel if (reqid >= 0) {
1970c7aecd4SNicolas Dichtel min = reqid;
1980c7aecd4SNicolas Dichtel max = reqid + 1;
1990c7aecd4SNicolas Dichtel }
2000c7aecd4SNicolas Dichtel
20195f38411SNicolas Dichtel return idr_alloc(&net->netns_ids, peer, min, max, GFP_ATOMIC);
2020c7aecd4SNicolas Dichtel }
2030c7aecd4SNicolas Dichtel
2040c7aecd4SNicolas Dichtel /* This function is used by idr_for_each(). If net is equal to peer, the
2050c7aecd4SNicolas Dichtel * function returns the id so that idr_for_each() stops. Because we cannot
2060c7aecd4SNicolas Dichtel * returns the id 0 (idr_for_each() will not stop), we return the magic value
2070c7aecd4SNicolas Dichtel * NET_ID_ZERO (-1) for it.
2080c7aecd4SNicolas Dichtel */
2090c7aecd4SNicolas Dichtel #define NET_ID_ZERO -1
net_eq_idr(int id,void * net,void * peer)2100c7aecd4SNicolas Dichtel static int net_eq_idr(int id, void *net, void *peer)
2110c7aecd4SNicolas Dichtel {
2120c7aecd4SNicolas Dichtel if (net_eq(net, peer))
2130c7aecd4SNicolas Dichtel return id ? : NET_ID_ZERO;
2140c7aecd4SNicolas Dichtel return 0;
2150c7aecd4SNicolas Dichtel }
2160c7aecd4SNicolas Dichtel
2172dce224fSGuillaume Nault /* Must be called from RCU-critical section or with nsid_lock held */
__peernet2id(const struct net * net,struct net * peer)21849052941SGuillaume Nault static int __peernet2id(const struct net *net, struct net *peer)
2190c7aecd4SNicolas Dichtel {
2200c7aecd4SNicolas Dichtel int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
2213138dbf8SNicolas Dichtel
2220c7aecd4SNicolas Dichtel /* Magic value for id 0. */
2230c7aecd4SNicolas Dichtel if (id == NET_ID_ZERO)
2240c7aecd4SNicolas Dichtel return 0;
2250c7aecd4SNicolas Dichtel if (id > 0)
2260c7aecd4SNicolas Dichtel return id;
2270c7aecd4SNicolas Dichtel
228109582afSNicolas Dichtel return NETNSA_NSID_NOT_ASSIGNED;
2290c7aecd4SNicolas Dichtel }
2300c7aecd4SNicolas Dichtel
231993e4c92SNicolas Dichtel static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
232d4e4fdf9SGuillaume Nault struct nlmsghdr *nlh, gfp_t gfp);
2330c7aecd4SNicolas Dichtel /* This function returns the id of a peer netns. If no id is assigned, one will
2340c7aecd4SNicolas Dichtel * be allocated and returned.
2350c7aecd4SNicolas Dichtel */
peernet2id_alloc(struct net * net,struct net * peer,gfp_t gfp)236d4e4fdf9SGuillaume Nault int peernet2id_alloc(struct net *net, struct net *peer, gfp_t gfp)
2370c7aecd4SNicolas Dichtel {
2383138dbf8SNicolas Dichtel int id;
2390c7aecd4SNicolas Dichtel
2408b8f3e66SChristian Brauner if (refcount_read(&net->ns.count) == 0)
241cfc44a4dSWANG Cong return NETNSA_NSID_NOT_ASSIGNED;
24249052941SGuillaume Nault
243e1f469cdSTaehee Yoo spin_lock_bh(&net->nsid_lock);
24449052941SGuillaume Nault id = __peernet2id(net, peer);
24549052941SGuillaume Nault if (id >= 0) {
246e1f469cdSTaehee Yoo spin_unlock_bh(&net->nsid_lock);
24749052941SGuillaume Nault return id;
24849052941SGuillaume Nault }
24949052941SGuillaume Nault
25049052941SGuillaume Nault /* When peer is obtained from RCU lists, we may race with
2510c06bea9SKirill Tkhai * its cleanup. Check whether it's alive, and this guarantees
2520c06bea9SKirill Tkhai * we never hash a peer back to net->netns_ids, after it has
2530c06bea9SKirill Tkhai * just been idr_remove()'d from there in cleanup_net().
2540c06bea9SKirill Tkhai */
25549052941SGuillaume Nault if (!maybe_get_net(peer)) {
256e1f469cdSTaehee Yoo spin_unlock_bh(&net->nsid_lock);
25749052941SGuillaume Nault return NETNSA_NSID_NOT_ASSIGNED;
25849052941SGuillaume Nault }
25949052941SGuillaume Nault
26049052941SGuillaume Nault id = alloc_netid(net, peer, -1);
261e1f469cdSTaehee Yoo spin_unlock_bh(&net->nsid_lock);
26249052941SGuillaume Nault
2630c06bea9SKirill Tkhai put_net(peer);
26449052941SGuillaume Nault if (id < 0)
26549052941SGuillaume Nault return NETNSA_NSID_NOT_ASSIGNED;
26649052941SGuillaume Nault
26749052941SGuillaume Nault rtnl_net_notifyid(net, RTM_NEWNSID, id, 0, NULL, gfp);
26849052941SGuillaume Nault
2693138dbf8SNicolas Dichtel return id;
2700c7aecd4SNicolas Dichtel }
2717cbebc8aSJiri Benc EXPORT_SYMBOL_GPL(peernet2id_alloc);
2720c7aecd4SNicolas Dichtel
27395f38411SNicolas Dichtel /* This function returns, if assigned, the id of a peer netns. */
peernet2id(const struct net * net,struct net * peer)27456f200c7SGuillaume Nault int peernet2id(const struct net *net, struct net *peer)
27595f38411SNicolas Dichtel {
27695f38411SNicolas Dichtel int id;
27795f38411SNicolas Dichtel
2782dce224fSGuillaume Nault rcu_read_lock();
27995f38411SNicolas Dichtel id = __peernet2id(net, peer);
2802dce224fSGuillaume Nault rcu_read_unlock();
2812dce224fSGuillaume Nault
28295f38411SNicolas Dichtel return id;
28395f38411SNicolas Dichtel }
28438f507f1SWANG Cong EXPORT_SYMBOL(peernet2id);
28595f38411SNicolas Dichtel
28659324cf3SNicolas Dichtel /* This function returns true is the peer netns has an id assigned into the
28759324cf3SNicolas Dichtel * current netns.
28859324cf3SNicolas Dichtel */
peernet_has_id(const struct net * net,struct net * peer)28956f200c7SGuillaume Nault bool peernet_has_id(const struct net *net, struct net *peer)
29059324cf3SNicolas Dichtel {
29159324cf3SNicolas Dichtel return peernet2id(net, peer) >= 0;
29259324cf3SNicolas Dichtel }
29359324cf3SNicolas Dichtel
get_net_ns_by_id(const struct net * net,int id)29456f200c7SGuillaume Nault struct net *get_net_ns_by_id(const struct net *net, int id)
2950c7aecd4SNicolas Dichtel {
2960c7aecd4SNicolas Dichtel struct net *peer;
2970c7aecd4SNicolas Dichtel
2980c7aecd4SNicolas Dichtel if (id < 0)
2990c7aecd4SNicolas Dichtel return NULL;
3000c7aecd4SNicolas Dichtel
3010c7aecd4SNicolas Dichtel rcu_read_lock();
3020c7aecd4SNicolas Dichtel peer = idr_find(&net->netns_ids, id);
3030c7aecd4SNicolas Dichtel if (peer)
30421b59443SEric W. Biederman peer = maybe_get_net(peer);
3050c7aecd4SNicolas Dichtel rcu_read_unlock();
3060c7aecd4SNicolas Dichtel
3070c7aecd4SNicolas Dichtel return peer;
3080c7aecd4SNicolas Dichtel }
309b4c2b959SKumar Kartikeya Dwivedi EXPORT_SYMBOL_GPL(get_net_ns_by_id);
3100c7aecd4SNicolas Dichtel
3116e77a5a4SEric Dumazet /* init code that must occur even if setup_net() is not called. */
preinit_net(struct net * net)3126e77a5a4SEric Dumazet static __net_init void preinit_net(struct net *net)
3136e77a5a4SEric Dumazet {
314b6d7c0ebSAndrzej Hajda ref_tracker_dir_init(&net->notrefcnt_tracker, 128, "net notrefcnt");
3156e77a5a4SEric Dumazet }
3166e77a5a4SEric Dumazet
3176a1a3b9fSPavel Emelyanov /*
3186a1a3b9fSPavel Emelyanov * setup_net runs the initializers for the network namespace object.
3196a1a3b9fSPavel Emelyanov */
setup_net(struct net * net,struct user_namespace * user_ns)320038e7332SEric W. Biederman static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
3216a1a3b9fSPavel Emelyanov {
3224420bf21SKirill Tkhai /* Must be called with pernet_ops_rwsem held */
323f875bae0SEric W. Biederman const struct pernet_operations *ops, *saved_ops;
324486a87f1SDaniel Lezcano int error = 0;
32572ad937aSEric W. Biederman LIST_HEAD(net_exit_list);
3266a1a3b9fSPavel Emelyanov
3278b8f3e66SChristian Brauner refcount_set(&net->ns.count, 1);
328b6d7c0ebSAndrzej Hajda ref_tracker_dir_init(&net->refcnt_tracker, 128, "net refcnt");
3299ba74e6cSEric Dumazet
330c122e14dSReshetova, Elena refcount_set(&net->passive, 1);
331355b9855SEric Dumazet get_random_bytes(&net->hash_mix, sizeof(u32));
3323d368ab8SEric Dumazet preempt_disable();
3333d368ab8SEric Dumazet net->net_cookie = gen_cookie_next(&net_cookie);
3343d368ab8SEric Dumazet preempt_enable();
3354e985adaSThomas Graf net->dev_base_seq = 1;
336038e7332SEric W. Biederman net->user_ns = user_ns;
3370c7aecd4SNicolas Dichtel idr_init(&net->netns_ids);
338de133464SWANG Cong spin_lock_init(&net->nsid_lock);
339d9ff3049SKirill Tkhai mutex_init(&net->ipv4.ra_mutex);
340486a87f1SDaniel Lezcano
3416a1a3b9fSPavel Emelyanov list_for_each_entry(ops, &pernet_list, list) {
342f875bae0SEric W. Biederman error = ops_init(ops, net);
3436a1a3b9fSPavel Emelyanov if (error < 0)
3446a1a3b9fSPavel Emelyanov goto out_undo;
3456a1a3b9fSPavel Emelyanov }
346f0b07bb1SKirill Tkhai down_write(&net_rwsem);
34798f6c533SKirill Tkhai list_add_tail_rcu(&net->list, &net_namespace_list);
348f0b07bb1SKirill Tkhai up_write(&net_rwsem);
3496a1a3b9fSPavel Emelyanov out:
3506a1a3b9fSPavel Emelyanov return error;
3516a1a3b9fSPavel Emelyanov
3526a1a3b9fSPavel Emelyanov out_undo:
3536a1a3b9fSPavel Emelyanov /* Walk through the list backwards calling the exit functions
3546a1a3b9fSPavel Emelyanov * for the pernet modules whose init functions did not fail.
3556a1a3b9fSPavel Emelyanov */
35672ad937aSEric W. Biederman list_add(&net->exit_list, &net_exit_list);
357f875bae0SEric W. Biederman saved_ops = ops;
35872ad937aSEric W. Biederman list_for_each_entry_continue_reverse(ops, &pernet_list, list)
359d7d99872SEric Dumazet ops_pre_exit_list(ops, &net_exit_list);
360d7d99872SEric Dumazet
361d7d99872SEric Dumazet synchronize_rcu();
362d7d99872SEric Dumazet
363b272a0adSLi RongQing ops = saved_ops;
364d7d99872SEric Dumazet list_for_each_entry_continue_reverse(ops, &pernet_list, list)
36572ad937aSEric W. Biederman ops_exit_list(ops, &net_exit_list);
36672ad937aSEric W. Biederman
367f875bae0SEric W. Biederman ops = saved_ops;
368f875bae0SEric W. Biederman list_for_each_entry_continue_reverse(ops, &pernet_list, list)
36972ad937aSEric W. Biederman ops_free_list(ops, &net_exit_list);
3706a1a3b9fSPavel Emelyanov
3716a1a3b9fSPavel Emelyanov rcu_barrier();
3726a1a3b9fSPavel Emelyanov goto out;
3736a1a3b9fSPavel Emelyanov }
3746a1a3b9fSPavel Emelyanov
net_defaults_init_net(struct net * net)3757c3f1875SRoman Kapl static int __net_init net_defaults_init_net(struct net *net)
3767c3f1875SRoman Kapl {
3777c3f1875SRoman Kapl net->core.sysctl_somaxconn = SOMAXCONN;
378e187013aSAkhmat Karakotov net->core.sysctl_txrehash = SOCK_TXREHASH_ENABLED;
379e187013aSAkhmat Karakotov
3807c3f1875SRoman Kapl return 0;
3817c3f1875SRoman Kapl }
3827c3f1875SRoman Kapl
3837c3f1875SRoman Kapl static struct pernet_operations net_defaults_ops = {
3847c3f1875SRoman Kapl .init = net_defaults_init_net,
3857c3f1875SRoman Kapl };
3867c3f1875SRoman Kapl
net_defaults_init(void)3877c3f1875SRoman Kapl static __init int net_defaults_init(void)
3887c3f1875SRoman Kapl {
3897c3f1875SRoman Kapl if (register_pernet_subsys(&net_defaults_ops))
3907c3f1875SRoman Kapl panic("Cannot initialize net default settings");
3917c3f1875SRoman Kapl
3927c3f1875SRoman Kapl return 0;
3937c3f1875SRoman Kapl }
3947c3f1875SRoman Kapl
3957c3f1875SRoman Kapl core_initcall(net_defaults_init);
396486a87f1SDaniel Lezcano
397ebe47d47SClemens Noss #ifdef CONFIG_NET_NS
inc_net_namespaces(struct user_namespace * ns)3982ed6afdeSArnd Bergmann static struct ucounts *inc_net_namespaces(struct user_namespace *ns)
3992ed6afdeSArnd Bergmann {
4002ed6afdeSArnd Bergmann return inc_ucount(ns, current_euid(), UCOUNT_NET_NAMESPACES);
4012ed6afdeSArnd Bergmann }
4022ed6afdeSArnd Bergmann
dec_net_namespaces(struct ucounts * ucounts)4032ed6afdeSArnd Bergmann static void dec_net_namespaces(struct ucounts *ucounts)
4042ed6afdeSArnd Bergmann {
4052ed6afdeSArnd Bergmann dec_ucount(ucounts, UCOUNT_NET_NAMESPACES);
4062ed6afdeSArnd Bergmann }
4072ed6afdeSArnd Bergmann
40808009a76SAlexey Dobriyan static struct kmem_cache *net_cachep __ro_after_init;
409ebe47d47SClemens Noss static struct workqueue_struct *netns_wq;
410ebe47d47SClemens Noss
net_alloc(void)4115f256becSEric W. Biederman static struct net *net_alloc(void)
4125f256becSEric W. Biederman {
413486a87f1SDaniel Lezcano struct net *net = NULL;
414486a87f1SDaniel Lezcano struct net_generic *ng;
415486a87f1SDaniel Lezcano
416486a87f1SDaniel Lezcano ng = net_alloc_generic();
417486a87f1SDaniel Lezcano if (!ng)
418486a87f1SDaniel Lezcano goto out;
419486a87f1SDaniel Lezcano
420486a87f1SDaniel Lezcano net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
421486a87f1SDaniel Lezcano if (!net)
422486a87f1SDaniel Lezcano goto out_free;
423486a87f1SDaniel Lezcano
4249b242610SDavid Howells #ifdef CONFIG_KEYS
4259b242610SDavid Howells net->key_domain = kzalloc(sizeof(struct key_tag), GFP_KERNEL);
4269b242610SDavid Howells if (!net->key_domain)
4279b242610SDavid Howells goto out_free_2;
4289b242610SDavid Howells refcount_set(&net->key_domain->usage, 1);
4299b242610SDavid Howells #endif
4309b242610SDavid Howells
431486a87f1SDaniel Lezcano rcu_assign_pointer(net->gen, ng);
432486a87f1SDaniel Lezcano out:
433486a87f1SDaniel Lezcano return net;
434486a87f1SDaniel Lezcano
4359b242610SDavid Howells #ifdef CONFIG_KEYS
4369b242610SDavid Howells out_free_2:
4379b242610SDavid Howells kmem_cache_free(net_cachep, net);
4389b242610SDavid Howells net = NULL;
4399b242610SDavid Howells #endif
440486a87f1SDaniel Lezcano out_free:
441486a87f1SDaniel Lezcano kfree(ng);
442486a87f1SDaniel Lezcano goto out;
4435f256becSEric W. Biederman }
4445f256becSEric W. Biederman
445*b7a79e51SEric Dumazet static LLIST_HEAD(defer_free_list);
446*b7a79e51SEric Dumazet
net_complete_free(void)447*b7a79e51SEric Dumazet static void net_complete_free(void)
448*b7a79e51SEric Dumazet {
449*b7a79e51SEric Dumazet struct llist_node *kill_list;
450*b7a79e51SEric Dumazet struct net *net, *next;
451*b7a79e51SEric Dumazet
452*b7a79e51SEric Dumazet /* Get the list of namespaces to free from last round. */
453*b7a79e51SEric Dumazet kill_list = llist_del_all(&defer_free_list);
454*b7a79e51SEric Dumazet
455*b7a79e51SEric Dumazet llist_for_each_entry_safe(net, next, kill_list, defer_free_list)
456*b7a79e51SEric Dumazet kmem_cache_free(net_cachep, net);
457*b7a79e51SEric Dumazet
458*b7a79e51SEric Dumazet }
459*b7a79e51SEric Dumazet
net_free(struct net * net)46045a19b0aSJohann Felix Soden static void net_free(struct net *net)
46145a19b0aSJohann Felix Soden {
46241467d2fSYajun Deng if (refcount_dec_and_test(&net->passive)) {
463416c51e1SEric Dumazet kfree(rcu_access_pointer(net->gen));
4640cafd77dSEric Dumazet
4650cafd77dSEric Dumazet /* There should not be any trackers left there. */
4660cafd77dSEric Dumazet ref_tracker_dir_exit(&net->notrefcnt_tracker);
4670cafd77dSEric Dumazet
468*b7a79e51SEric Dumazet /* Wait for an extra rcu_barrier() before final free. */
469*b7a79e51SEric Dumazet llist_add(&net->defer_free_list, &defer_free_list);
47045a19b0aSJohann Felix Soden }
47141467d2fSYajun Deng }
47245a19b0aSJohann Felix Soden
net_drop_ns(void * p)473a685e089SAl Viro void net_drop_ns(void *p)
474a685e089SAl Viro {
47541467d2fSYajun Deng struct net *net = (struct net *)p;
47641467d2fSYajun Deng
47741467d2fSYajun Deng if (net)
47841467d2fSYajun Deng net_free(net);
479a685e089SAl Viro }
480a685e089SAl Viro
copy_net_ns(unsigned long flags,struct user_namespace * user_ns,struct net * old_net)481038e7332SEric W. Biederman struct net *copy_net_ns(unsigned long flags,
482038e7332SEric W. Biederman struct user_namespace *user_ns, struct net *old_net)
4836a1a3b9fSPavel Emelyanov {
48470328660SEric W. Biederman struct ucounts *ucounts;
485088eb2d9SAlexey Dobriyan struct net *net;
486088eb2d9SAlexey Dobriyan int rv;
4876a1a3b9fSPavel Emelyanov
488911cb193SRob Landley if (!(flags & CLONE_NEWNET))
489911cb193SRob Landley return get_net(old_net);
490911cb193SRob Landley
49170328660SEric W. Biederman ucounts = inc_net_namespaces(user_ns);
49270328660SEric W. Biederman if (!ucounts)
493df75e774SEric W. Biederman return ERR_PTR(-ENOSPC);
49470328660SEric W. Biederman
495088eb2d9SAlexey Dobriyan net = net_alloc();
49670328660SEric W. Biederman if (!net) {
4975ba049a5SKirill Tkhai rv = -ENOMEM;
4985ba049a5SKirill Tkhai goto dec_ucounts;
49970328660SEric W. Biederman }
5006e77a5a4SEric Dumazet
5016e77a5a4SEric Dumazet preinit_net(net);
5025ba049a5SKirill Tkhai refcount_set(&net->passive, 1);
5035ba049a5SKirill Tkhai net->ucounts = ucounts;
504038e7332SEric W. Biederman get_user_ns(user_ns);
505094374e5SKirill Tkhai
5064420bf21SKirill Tkhai rv = down_read_killable(&pernet_ops_rwsem);
5075ba049a5SKirill Tkhai if (rv < 0)
5085ba049a5SKirill Tkhai goto put_userns;
50919efbd93SKirill Tkhai
510038e7332SEric W. Biederman rv = setup_net(net, user_ns);
51119efbd93SKirill Tkhai
5124420bf21SKirill Tkhai up_read(&pernet_ops_rwsem);
51319efbd93SKirill Tkhai
514088eb2d9SAlexey Dobriyan if (rv < 0) {
5155ba049a5SKirill Tkhai put_userns:
516aed0826bSYajun Deng #ifdef CONFIG_KEYS
51782ecff65STakeshi Misawa key_remove_domain(net->key_domain);
518aed0826bSYajun Deng #endif
519038e7332SEric W. Biederman put_user_ns(user_ns);
52041467d2fSYajun Deng net_free(net);
5215ba049a5SKirill Tkhai dec_ucounts:
5225ba049a5SKirill Tkhai dec_net_namespaces(ucounts);
523088eb2d9SAlexey Dobriyan return ERR_PTR(rv);
524088eb2d9SAlexey Dobriyan }
525088eb2d9SAlexey Dobriyan return net;
526088eb2d9SAlexey Dobriyan }
527486a87f1SDaniel Lezcano
528fbdeaed4STyler Hicks /**
529fbdeaed4STyler Hicks * net_ns_get_ownership - get sysfs ownership data for @net
530fbdeaed4STyler Hicks * @net: network namespace in question (can be NULL)
531fbdeaed4STyler Hicks * @uid: kernel user ID for sysfs objects
532fbdeaed4STyler Hicks * @gid: kernel group ID for sysfs objects
533fbdeaed4STyler Hicks *
534fbdeaed4STyler Hicks * Returns the uid/gid pair of root in the user namespace associated with the
535fbdeaed4STyler Hicks * given network namespace.
536fbdeaed4STyler Hicks */
net_ns_get_ownership(const struct net * net,kuid_t * uid,kgid_t * gid)537fbdeaed4STyler Hicks void net_ns_get_ownership(const struct net *net, kuid_t *uid, kgid_t *gid)
538fbdeaed4STyler Hicks {
539fbdeaed4STyler Hicks if (net) {
540fbdeaed4STyler Hicks kuid_t ns_root_uid = make_kuid(net->user_ns, 0);
541fbdeaed4STyler Hicks kgid_t ns_root_gid = make_kgid(net->user_ns, 0);
542fbdeaed4STyler Hicks
543fbdeaed4STyler Hicks if (uid_valid(ns_root_uid))
544fbdeaed4STyler Hicks *uid = ns_root_uid;
545fbdeaed4STyler Hicks
546fbdeaed4STyler Hicks if (gid_valid(ns_root_gid))
547fbdeaed4STyler Hicks *gid = ns_root_gid;
548fbdeaed4STyler Hicks } else {
549fbdeaed4STyler Hicks *uid = GLOBAL_ROOT_UID;
550fbdeaed4STyler Hicks *gid = GLOBAL_ROOT_GID;
551fbdeaed4STyler Hicks }
552fbdeaed4STyler Hicks }
553fbdeaed4STyler Hicks EXPORT_SYMBOL_GPL(net_ns_get_ownership);
554fbdeaed4STyler Hicks
unhash_nsid(struct net * net,struct net * last)555fb07a820SKirill Tkhai static void unhash_nsid(struct net *net, struct net *last)
556fb07a820SKirill Tkhai {
557fb07a820SKirill Tkhai struct net *tmp;
558fb07a820SKirill Tkhai /* This function is only called from cleanup_net() work,
559fb07a820SKirill Tkhai * and this work is the only process, that may delete
560fb07a820SKirill Tkhai * a net from net_namespace_list. So, when the below
561fb07a820SKirill Tkhai * is executing, the list may only grow. Thus, we do not
562f0b07bb1SKirill Tkhai * use for_each_net_rcu() or net_rwsem.
563fb07a820SKirill Tkhai */
564fb07a820SKirill Tkhai for_each_net(tmp) {
565fb07a820SKirill Tkhai int id;
566fb07a820SKirill Tkhai
567e1f469cdSTaehee Yoo spin_lock_bh(&tmp->nsid_lock);
568fb07a820SKirill Tkhai id = __peernet2id(tmp, net);
569fb07a820SKirill Tkhai if (id >= 0)
570fb07a820SKirill Tkhai idr_remove(&tmp->netns_ids, id);
571e1f469cdSTaehee Yoo spin_unlock_bh(&tmp->nsid_lock);
572fb07a820SKirill Tkhai if (id >= 0)
573d4e4fdf9SGuillaume Nault rtnl_net_notifyid(tmp, RTM_DELNSID, id, 0, NULL,
574d4e4fdf9SGuillaume Nault GFP_KERNEL);
575fb07a820SKirill Tkhai if (tmp == last)
576fb07a820SKirill Tkhai break;
577fb07a820SKirill Tkhai }
578e1f469cdSTaehee Yoo spin_lock_bh(&net->nsid_lock);
579fb07a820SKirill Tkhai idr_destroy(&net->netns_ids);
580e1f469cdSTaehee Yoo spin_unlock_bh(&net->nsid_lock);
581fb07a820SKirill Tkhai }
582fb07a820SKirill Tkhai
58365b7b5b9SKirill Tkhai static LLIST_HEAD(cleanup_list);
5842b035b39SEric W. Biederman
cleanup_net(struct work_struct * work)5855f256becSEric W. Biederman static void cleanup_net(struct work_struct *work)
5865f256becSEric W. Biederman {
587f875bae0SEric W. Biederman const struct pernet_operations *ops;
588fb07a820SKirill Tkhai struct net *net, *tmp, *last;
58965b7b5b9SKirill Tkhai struct llist_node *net_kill_list;
59072ad937aSEric W. Biederman LIST_HEAD(net_exit_list);
5915f256becSEric W. Biederman
5922b035b39SEric W. Biederman /* Atomically snapshot the list of namespaces to cleanup */
59365b7b5b9SKirill Tkhai net_kill_list = llist_del_all(&cleanup_list);
59419efbd93SKirill Tkhai
5954420bf21SKirill Tkhai down_read(&pernet_ops_rwsem);
5965f256becSEric W. Biederman
5975f256becSEric W. Biederman /* Don't let anyone else find us. */
598f0b07bb1SKirill Tkhai down_write(&net_rwsem);
59965b7b5b9SKirill Tkhai llist_for_each_entry(net, net_kill_list, cleanup_list)
60011a28d37SJohannes Berg list_del_rcu(&net->list);
601fb07a820SKirill Tkhai /* Cache last net. After we unlock rtnl, no one new net
602fb07a820SKirill Tkhai * added to net_namespace_list can assign nsid pointer
603fb07a820SKirill Tkhai * to a net from net_kill_list (see peernet2id_alloc()).
604fb07a820SKirill Tkhai * So, we skip them in unhash_nsid().
605fb07a820SKirill Tkhai *
606fb07a820SKirill Tkhai * Note, that unhash_nsid() does not delete nsid links
607fb07a820SKirill Tkhai * between net_kill_list's nets, as they've already
608fb07a820SKirill Tkhai * deleted from net_namespace_list. But, this would be
609fb07a820SKirill Tkhai * useless anyway, as netns_ids are destroyed there.
610fb07a820SKirill Tkhai */
611fb07a820SKirill Tkhai last = list_last_entry(&net_namespace_list, struct net, list);
612f0b07bb1SKirill Tkhai up_write(&net_rwsem);
6135f256becSEric W. Biederman
61465b7b5b9SKirill Tkhai llist_for_each_entry(net, net_kill_list, cleanup_list) {
615fb07a820SKirill Tkhai unhash_nsid(net, last);
616fb07a820SKirill Tkhai list_add_tail(&net->exit_list, &net_exit_list);
617fb07a820SKirill Tkhai }
618fb07a820SKirill Tkhai
619d7d99872SEric Dumazet /* Run all of the network namespace pre_exit methods */
620d7d99872SEric Dumazet list_for_each_entry_reverse(ops, &pernet_list, list)
621d7d99872SEric Dumazet ops_pre_exit_list(ops, &net_exit_list);
622d7d99872SEric Dumazet
62311a28d37SJohannes Berg /*
62411a28d37SJohannes Berg * Another CPU might be rcu-iterating the list, wait for it.
62511a28d37SJohannes Berg * This needs to be before calling the exit() notifiers, so
62611a28d37SJohannes Berg * the rcu_barrier() below isn't sufficient alone.
627d7d99872SEric Dumazet * Also the pre_exit() and exit() methods need this barrier.
62811a28d37SJohannes Berg */
62911a28d37SJohannes Berg synchronize_rcu();
63011a28d37SJohannes Berg
6315f256becSEric W. Biederman /* Run all of the network namespace exit methods */
63272ad937aSEric W. Biederman list_for_each_entry_reverse(ops, &pernet_list, list)
63372ad937aSEric W. Biederman ops_exit_list(ops, &net_exit_list);
63472ad937aSEric W. Biederman
635f875bae0SEric W. Biederman /* Free the net generic variables */
63672ad937aSEric W. Biederman list_for_each_entry_reverse(ops, &pernet_list, list)
63772ad937aSEric W. Biederman ops_free_list(ops, &net_exit_list);
6385f256becSEric W. Biederman
6394420bf21SKirill Tkhai up_read(&pernet_ops_rwsem);
6405f256becSEric W. Biederman
6415f256becSEric W. Biederman /* Ensure there are no outstanding rcu callbacks using this
6425f256becSEric W. Biederman * network namespace.
6435f256becSEric W. Biederman */
6445f256becSEric W. Biederman rcu_barrier();
6455f256becSEric W. Biederman
646*b7a79e51SEric Dumazet net_complete_free();
647*b7a79e51SEric Dumazet
6485f256becSEric W. Biederman /* Finally it is safe to free my network namespace structure */
64972ad937aSEric W. Biederman list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
65072ad937aSEric W. Biederman list_del_init(&net->exit_list);
65170328660SEric W. Biederman dec_net_namespaces(net->ucounts);
652aed0826bSYajun Deng #ifdef CONFIG_KEYS
6539b242610SDavid Howells key_remove_domain(net->key_domain);
654aed0826bSYajun Deng #endif
655038e7332SEric W. Biederman put_user_ns(net->user_ns);
65641467d2fSYajun Deng net_free(net);
6575f256becSEric W. Biederman }
6582b035b39SEric W. Biederman }
6597866cc57SFlorian Westphal
6607866cc57SFlorian Westphal /**
6617866cc57SFlorian Westphal * net_ns_barrier - wait until concurrent net_cleanup_work is done
6627866cc57SFlorian Westphal *
6637866cc57SFlorian Westphal * cleanup_net runs from work queue and will first remove namespaces
6647866cc57SFlorian Westphal * from the global list, then run net exit functions.
6657866cc57SFlorian Westphal *
6667866cc57SFlorian Westphal * Call this in module exit path to make sure that all netns
6677866cc57SFlorian Westphal * ->exit ops have been invoked before the function is removed.
6687866cc57SFlorian Westphal */
net_ns_barrier(void)6697866cc57SFlorian Westphal void net_ns_barrier(void)
6707866cc57SFlorian Westphal {
6714420bf21SKirill Tkhai down_write(&pernet_ops_rwsem);
6724420bf21SKirill Tkhai up_write(&pernet_ops_rwsem);
6737866cc57SFlorian Westphal }
6747866cc57SFlorian Westphal EXPORT_SYMBOL(net_ns_barrier);
6757866cc57SFlorian Westphal
6762b035b39SEric W. Biederman static DECLARE_WORK(net_cleanup_work, cleanup_net);
6775f256becSEric W. Biederman
__put_net(struct net * net)6785f256becSEric W. Biederman void __put_net(struct net *net)
6795f256becSEric W. Biederman {
6809ba74e6cSEric Dumazet ref_tracker_dir_exit(&net->refcnt_tracker);
6815f256becSEric W. Biederman /* Cleanup the network namespace in process context */
6828349efd9SKirill Tkhai if (llist_add(&net->cleanup_list, &cleanup_list))
6832b035b39SEric W. Biederman queue_work(netns_wq, &net_cleanup_work);
6845f256becSEric W. Biederman }
6855f256becSEric W. Biederman EXPORT_SYMBOL_GPL(__put_net);
6865f256becSEric W. Biederman
687ea6932d7SChangbin Du /**
688ea6932d7SChangbin Du * get_net_ns - increment the refcount of the network namespace
689ea6932d7SChangbin Du * @ns: common namespace (net)
690ea6932d7SChangbin Du *
691ef0394caSYue Haibing * Returns the net's common namespace or ERR_PTR() if ref is zero.
692ea6932d7SChangbin Du */
get_net_ns(struct ns_common * ns)693ea6932d7SChangbin Du struct ns_common *get_net_ns(struct ns_common *ns)
694ea6932d7SChangbin Du {
695ef0394caSYue Haibing struct net *net;
696ef0394caSYue Haibing
697ef0394caSYue Haibing net = maybe_get_net(container_of(ns, struct net, ns));
698ef0394caSYue Haibing if (net)
699ef0394caSYue Haibing return &net->ns;
700ef0394caSYue Haibing return ERR_PTR(-EINVAL);
701ea6932d7SChangbin Du }
702ea6932d7SChangbin Du EXPORT_SYMBOL_GPL(get_net_ns);
703ea6932d7SChangbin Du
get_net_ns_by_fd(int fd)704956c9207SStephen Rothwell struct net *get_net_ns_by_fd(int fd)
705956c9207SStephen Rothwell {
70638e12408SAl Viro struct fd f = fdget(fd);
70738e12408SAl Viro struct net *net = ERR_PTR(-EINVAL);
708956c9207SStephen Rothwell
70938e12408SAl Viro if (!f.file)
71038e12408SAl Viro return ERR_PTR(-EBADF);
711956c9207SStephen Rothwell
71238e12408SAl Viro if (proc_ns_file(f.file)) {
71338e12408SAl Viro struct ns_common *ns = get_proc_ns(file_inode(f.file));
71433c42940SAl Viro if (ns->ops == &netns_operations)
71533c42940SAl Viro net = get_net(container_of(ns, struct net, ns));
71638e12408SAl Viro }
71738e12408SAl Viro fdput(f);
718c316e6a3SAl Viro
719956c9207SStephen Rothwell return net;
720956c9207SStephen Rothwell }
7214b681c82SVadim Kochan EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
722e34492deSChangbin Du #endif
7239dd776b6SEric W. Biederman
get_net_ns_by_pid(pid_t pid)72430ffee84SJohannes Berg struct net *get_net_ns_by_pid(pid_t pid)
72530ffee84SJohannes Berg {
72630ffee84SJohannes Berg struct task_struct *tsk;
72730ffee84SJohannes Berg struct net *net;
72830ffee84SJohannes Berg
72930ffee84SJohannes Berg /* Lookup the network namespace */
73030ffee84SJohannes Berg net = ERR_PTR(-ESRCH);
73130ffee84SJohannes Berg rcu_read_lock();
73230ffee84SJohannes Berg tsk = find_task_by_vpid(pid);
73330ffee84SJohannes Berg if (tsk) {
73430ffee84SJohannes Berg struct nsproxy *nsproxy;
735728dba3aSEric W. Biederman task_lock(tsk);
736728dba3aSEric W. Biederman nsproxy = tsk->nsproxy;
73730ffee84SJohannes Berg if (nsproxy)
73830ffee84SJohannes Berg net = get_net(nsproxy->net_ns);
739728dba3aSEric W. Biederman task_unlock(tsk);
74030ffee84SJohannes Berg }
74130ffee84SJohannes Berg rcu_read_unlock();
74230ffee84SJohannes Berg return net;
74330ffee84SJohannes Berg }
74430ffee84SJohannes Berg EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
74530ffee84SJohannes Berg
net_ns_net_init(struct net * net)74698f842e6SEric W. Biederman static __net_init int net_ns_net_init(struct net *net)
74798f842e6SEric W. Biederman {
74833c42940SAl Viro #ifdef CONFIG_NET_NS
74933c42940SAl Viro net->ns.ops = &netns_operations;
75033c42940SAl Viro #endif
7516344c433SAl Viro return ns_alloc_inum(&net->ns);
75298f842e6SEric W. Biederman }
75398f842e6SEric W. Biederman
net_ns_net_exit(struct net * net)75498f842e6SEric W. Biederman static __net_exit void net_ns_net_exit(struct net *net)
75598f842e6SEric W. Biederman {
7566344c433SAl Viro ns_free_inum(&net->ns);
75798f842e6SEric W. Biederman }
75898f842e6SEric W. Biederman
75998f842e6SEric W. Biederman static struct pernet_operations __net_initdata net_ns_ops = {
76098f842e6SEric W. Biederman .init = net_ns_net_init,
76198f842e6SEric W. Biederman .exit = net_ns_net_exit,
76298f842e6SEric W. Biederman };
76398f842e6SEric W. Biederman
7643ee5256dSstephen hemminger static const struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
7650c7aecd4SNicolas Dichtel [NETNSA_NONE] = { .type = NLA_UNSPEC },
7660c7aecd4SNicolas Dichtel [NETNSA_NSID] = { .type = NLA_S32 },
7670c7aecd4SNicolas Dichtel [NETNSA_PID] = { .type = NLA_U32 },
7680c7aecd4SNicolas Dichtel [NETNSA_FD] = { .type = NLA_U32 },
769cff478b9SNicolas Dichtel [NETNSA_TARGET_NSID] = { .type = NLA_S32 },
7700c7aecd4SNicolas Dichtel };
7710c7aecd4SNicolas Dichtel
rtnl_net_newid(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)772c21ef3e3SDavid Ahern static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh,
773c21ef3e3SDavid Ahern struct netlink_ext_ack *extack)
7740c7aecd4SNicolas Dichtel {
7750c7aecd4SNicolas Dichtel struct net *net = sock_net(skb->sk);
7760c7aecd4SNicolas Dichtel struct nlattr *tb[NETNSA_MAX + 1];
7774a7f7bc6SNicolas Dichtel struct nlattr *nla;
7780c7aecd4SNicolas Dichtel struct net *peer;
7790c7aecd4SNicolas Dichtel int nsid, err;
7800c7aecd4SNicolas Dichtel
7818cb08174SJohannes Berg err = nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg), tb,
7828cb08174SJohannes Berg NETNSA_MAX, rtnl_net_policy, extack);
7830c7aecd4SNicolas Dichtel if (err < 0)
7840c7aecd4SNicolas Dichtel return err;
7854a7f7bc6SNicolas Dichtel if (!tb[NETNSA_NSID]) {
7864a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack, "nsid is missing");
7870c7aecd4SNicolas Dichtel return -EINVAL;
7884a7f7bc6SNicolas Dichtel }
7890c7aecd4SNicolas Dichtel nsid = nla_get_s32(tb[NETNSA_NSID]);
7900c7aecd4SNicolas Dichtel
7914a7f7bc6SNicolas Dichtel if (tb[NETNSA_PID]) {
7920c7aecd4SNicolas Dichtel peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
7934a7f7bc6SNicolas Dichtel nla = tb[NETNSA_PID];
7944a7f7bc6SNicolas Dichtel } else if (tb[NETNSA_FD]) {
7950c7aecd4SNicolas Dichtel peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
7964a7f7bc6SNicolas Dichtel nla = tb[NETNSA_FD];
7974a7f7bc6SNicolas Dichtel } else {
7984a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
7990c7aecd4SNicolas Dichtel return -EINVAL;
8004a7f7bc6SNicolas Dichtel }
8014a7f7bc6SNicolas Dichtel if (IS_ERR(peer)) {
8024a7f7bc6SNicolas Dichtel NL_SET_BAD_ATTR(extack, nla);
8034a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
8040c7aecd4SNicolas Dichtel return PTR_ERR(peer);
8054a7f7bc6SNicolas Dichtel }
8060c7aecd4SNicolas Dichtel
807e1f469cdSTaehee Yoo spin_lock_bh(&net->nsid_lock);
8083138dbf8SNicolas Dichtel if (__peernet2id(net, peer) >= 0) {
809e1f469cdSTaehee Yoo spin_unlock_bh(&net->nsid_lock);
8100c7aecd4SNicolas Dichtel err = -EEXIST;
8114a7f7bc6SNicolas Dichtel NL_SET_BAD_ATTR(extack, nla);
8124a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack,
8134a7f7bc6SNicolas Dichtel "Peer netns already has a nsid assigned");
8140c7aecd4SNicolas Dichtel goto out;
8150c7aecd4SNicolas Dichtel }
8160c7aecd4SNicolas Dichtel
8170c7aecd4SNicolas Dichtel err = alloc_netid(net, peer, nsid);
818e1f469cdSTaehee Yoo spin_unlock_bh(&net->nsid_lock);
8193138dbf8SNicolas Dichtel if (err >= 0) {
820993e4c92SNicolas Dichtel rtnl_net_notifyid(net, RTM_NEWNSID, err, NETLINK_CB(skb).portid,
821d4e4fdf9SGuillaume Nault nlh, GFP_KERNEL);
8220c7aecd4SNicolas Dichtel err = 0;
8234a7f7bc6SNicolas Dichtel } else if (err == -ENOSPC && nsid >= 0) {
82410d486a3SNicolas Dichtel err = -EEXIST;
8254a7f7bc6SNicolas Dichtel NL_SET_BAD_ATTR(extack, tb[NETNSA_NSID]);
8264a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack, "The specified nsid is already used");
8273138dbf8SNicolas Dichtel }
8280c7aecd4SNicolas Dichtel out:
8290c7aecd4SNicolas Dichtel put_net(peer);
8300c7aecd4SNicolas Dichtel return err;
8310c7aecd4SNicolas Dichtel }
8320c7aecd4SNicolas Dichtel
rtnl_net_get_size(void)8330c7aecd4SNicolas Dichtel static int rtnl_net_get_size(void)
8340c7aecd4SNicolas Dichtel {
8350c7aecd4SNicolas Dichtel return NLMSG_ALIGN(sizeof(struct rtgenmsg))
8360c7aecd4SNicolas Dichtel + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
837288f06a0SNicolas Dichtel + nla_total_size(sizeof(s32)) /* NETNSA_CURRENT_NSID */
8380c7aecd4SNicolas Dichtel ;
8390c7aecd4SNicolas Dichtel }
8400c7aecd4SNicolas Dichtel
841a0732ad1SNicolas Dichtel struct net_fill_args {
842a0732ad1SNicolas Dichtel u32 portid;
843a0732ad1SNicolas Dichtel u32 seq;
844a0732ad1SNicolas Dichtel int flags;
845a0732ad1SNicolas Dichtel int cmd;
846a0732ad1SNicolas Dichtel int nsid;
847288f06a0SNicolas Dichtel bool add_ref;
848288f06a0SNicolas Dichtel int ref_nsid;
849a0732ad1SNicolas Dichtel };
850a0732ad1SNicolas Dichtel
rtnl_net_fill(struct sk_buff * skb,struct net_fill_args * args)851a0732ad1SNicolas Dichtel static int rtnl_net_fill(struct sk_buff *skb, struct net_fill_args *args)
8520c7aecd4SNicolas Dichtel {
8530c7aecd4SNicolas Dichtel struct nlmsghdr *nlh;
8540c7aecd4SNicolas Dichtel struct rtgenmsg *rth;
8550c7aecd4SNicolas Dichtel
856a0732ad1SNicolas Dichtel nlh = nlmsg_put(skb, args->portid, args->seq, args->cmd, sizeof(*rth),
857a0732ad1SNicolas Dichtel args->flags);
8580c7aecd4SNicolas Dichtel if (!nlh)
8590c7aecd4SNicolas Dichtel return -EMSGSIZE;
8600c7aecd4SNicolas Dichtel
8610c7aecd4SNicolas Dichtel rth = nlmsg_data(nlh);
8620c7aecd4SNicolas Dichtel rth->rtgen_family = AF_UNSPEC;
8630c7aecd4SNicolas Dichtel
864a0732ad1SNicolas Dichtel if (nla_put_s32(skb, NETNSA_NSID, args->nsid))
8650c7aecd4SNicolas Dichtel goto nla_put_failure;
8660c7aecd4SNicolas Dichtel
867288f06a0SNicolas Dichtel if (args->add_ref &&
868288f06a0SNicolas Dichtel nla_put_s32(skb, NETNSA_CURRENT_NSID, args->ref_nsid))
869288f06a0SNicolas Dichtel goto nla_put_failure;
870288f06a0SNicolas Dichtel
8710c7aecd4SNicolas Dichtel nlmsg_end(skb, nlh);
8720c7aecd4SNicolas Dichtel return 0;
8730c7aecd4SNicolas Dichtel
8740c7aecd4SNicolas Dichtel nla_put_failure:
8750c7aecd4SNicolas Dichtel nlmsg_cancel(skb, nlh);
8760c7aecd4SNicolas Dichtel return -EMSGSIZE;
8770c7aecd4SNicolas Dichtel }
8780c7aecd4SNicolas Dichtel
rtnl_net_valid_getid_req(struct sk_buff * skb,const struct nlmsghdr * nlh,struct nlattr ** tb,struct netlink_ext_ack * extack)8794d165f61SJakub Kicinski static int rtnl_net_valid_getid_req(struct sk_buff *skb,
8804d165f61SJakub Kicinski const struct nlmsghdr *nlh,
8814d165f61SJakub Kicinski struct nlattr **tb,
8824d165f61SJakub Kicinski struct netlink_ext_ack *extack)
8834d165f61SJakub Kicinski {
8844d165f61SJakub Kicinski int i, err;
8854d165f61SJakub Kicinski
8864d165f61SJakub Kicinski if (!netlink_strict_get_check(skb))
8878cb08174SJohannes Berg return nlmsg_parse_deprecated(nlh, sizeof(struct rtgenmsg),
8888cb08174SJohannes Berg tb, NETNSA_MAX, rtnl_net_policy,
8898cb08174SJohannes Berg extack);
8904d165f61SJakub Kicinski
8918cb08174SJohannes Berg err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
8928cb08174SJohannes Berg NETNSA_MAX, rtnl_net_policy,
8938cb08174SJohannes Berg extack);
8944d165f61SJakub Kicinski if (err)
8954d165f61SJakub Kicinski return err;
8964d165f61SJakub Kicinski
8974d165f61SJakub Kicinski for (i = 0; i <= NETNSA_MAX; i++) {
8984d165f61SJakub Kicinski if (!tb[i])
8994d165f61SJakub Kicinski continue;
9004d165f61SJakub Kicinski
9014d165f61SJakub Kicinski switch (i) {
9024d165f61SJakub Kicinski case NETNSA_PID:
9034d165f61SJakub Kicinski case NETNSA_FD:
9044d165f61SJakub Kicinski case NETNSA_NSID:
9054d165f61SJakub Kicinski case NETNSA_TARGET_NSID:
9064d165f61SJakub Kicinski break;
9074d165f61SJakub Kicinski default:
9084d165f61SJakub Kicinski NL_SET_ERR_MSG(extack, "Unsupported attribute in peer netns getid request");
9094d165f61SJakub Kicinski return -EINVAL;
9104d165f61SJakub Kicinski }
9114d165f61SJakub Kicinski }
9124d165f61SJakub Kicinski
9134d165f61SJakub Kicinski return 0;
9144d165f61SJakub Kicinski }
9154d165f61SJakub Kicinski
rtnl_net_getid(struct sk_buff * skb,struct nlmsghdr * nlh,struct netlink_ext_ack * extack)916c21ef3e3SDavid Ahern static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh,
917c21ef3e3SDavid Ahern struct netlink_ext_ack *extack)
9180c7aecd4SNicolas Dichtel {
9190c7aecd4SNicolas Dichtel struct net *net = sock_net(skb->sk);
9200c7aecd4SNicolas Dichtel struct nlattr *tb[NETNSA_MAX + 1];
921a0732ad1SNicolas Dichtel struct net_fill_args fillargs = {
922a0732ad1SNicolas Dichtel .portid = NETLINK_CB(skb).portid,
923a0732ad1SNicolas Dichtel .seq = nlh->nlmsg_seq,
924a0732ad1SNicolas Dichtel .cmd = RTM_NEWNSID,
925a0732ad1SNicolas Dichtel };
926cff478b9SNicolas Dichtel struct net *peer, *target = net;
9274a7f7bc6SNicolas Dichtel struct nlattr *nla;
9280c7aecd4SNicolas Dichtel struct sk_buff *msg;
929a0732ad1SNicolas Dichtel int err;
9300c7aecd4SNicolas Dichtel
9314d165f61SJakub Kicinski err = rtnl_net_valid_getid_req(skb, nlh, tb, extack);
9320c7aecd4SNicolas Dichtel if (err < 0)
9330c7aecd4SNicolas Dichtel return err;
9344a7f7bc6SNicolas Dichtel if (tb[NETNSA_PID]) {
9350c7aecd4SNicolas Dichtel peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
9364a7f7bc6SNicolas Dichtel nla = tb[NETNSA_PID];
9374a7f7bc6SNicolas Dichtel } else if (tb[NETNSA_FD]) {
9380c7aecd4SNicolas Dichtel peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
9394a7f7bc6SNicolas Dichtel nla = tb[NETNSA_FD];
9403a4f68bfSNicolas Dichtel } else if (tb[NETNSA_NSID]) {
941ecce39ecSGuillaume Nault peer = get_net_ns_by_id(net, nla_get_s32(tb[NETNSA_NSID]));
9423a4f68bfSNicolas Dichtel if (!peer)
9433a4f68bfSNicolas Dichtel peer = ERR_PTR(-ENOENT);
9443a4f68bfSNicolas Dichtel nla = tb[NETNSA_NSID];
9454a7f7bc6SNicolas Dichtel } else {
9464a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack, "Peer netns reference is missing");
9470c7aecd4SNicolas Dichtel return -EINVAL;
9484a7f7bc6SNicolas Dichtel }
9490c7aecd4SNicolas Dichtel
9504a7f7bc6SNicolas Dichtel if (IS_ERR(peer)) {
9514a7f7bc6SNicolas Dichtel NL_SET_BAD_ATTR(extack, nla);
9524a7f7bc6SNicolas Dichtel NL_SET_ERR_MSG(extack, "Peer netns reference is invalid");
9530c7aecd4SNicolas Dichtel return PTR_ERR(peer);
9544a7f7bc6SNicolas Dichtel }
9550c7aecd4SNicolas Dichtel
956cff478b9SNicolas Dichtel if (tb[NETNSA_TARGET_NSID]) {
957cff478b9SNicolas Dichtel int id = nla_get_s32(tb[NETNSA_TARGET_NSID]);
958cff478b9SNicolas Dichtel
959cff478b9SNicolas Dichtel target = rtnl_get_net_ns_capable(NETLINK_CB(skb).sk, id);
960cff478b9SNicolas Dichtel if (IS_ERR(target)) {
961cff478b9SNicolas Dichtel NL_SET_BAD_ATTR(extack, tb[NETNSA_TARGET_NSID]);
962cff478b9SNicolas Dichtel NL_SET_ERR_MSG(extack,
963cff478b9SNicolas Dichtel "Target netns reference is invalid");
964cff478b9SNicolas Dichtel err = PTR_ERR(target);
965cff478b9SNicolas Dichtel goto out;
966cff478b9SNicolas Dichtel }
967288f06a0SNicolas Dichtel fillargs.add_ref = true;
968288f06a0SNicolas Dichtel fillargs.ref_nsid = peernet2id(net, peer);
969cff478b9SNicolas Dichtel }
970cff478b9SNicolas Dichtel
9710c7aecd4SNicolas Dichtel msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
9720c7aecd4SNicolas Dichtel if (!msg) {
9730c7aecd4SNicolas Dichtel err = -ENOMEM;
9740c7aecd4SNicolas Dichtel goto out;
9750c7aecd4SNicolas Dichtel }
9760c7aecd4SNicolas Dichtel
977cff478b9SNicolas Dichtel fillargs.nsid = peernet2id(target, peer);
978a0732ad1SNicolas Dichtel err = rtnl_net_fill(msg, &fillargs);
9790c7aecd4SNicolas Dichtel if (err < 0)
9800c7aecd4SNicolas Dichtel goto err_out;
9810c7aecd4SNicolas Dichtel
9820c7aecd4SNicolas Dichtel err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
9830c7aecd4SNicolas Dichtel goto out;
9840c7aecd4SNicolas Dichtel
9850c7aecd4SNicolas Dichtel err_out:
9860c7aecd4SNicolas Dichtel nlmsg_free(msg);
9870c7aecd4SNicolas Dichtel out:
988288f06a0SNicolas Dichtel if (fillargs.add_ref)
989cff478b9SNicolas Dichtel put_net(target);
9900c7aecd4SNicolas Dichtel put_net(peer);
9910c7aecd4SNicolas Dichtel return err;
9920c7aecd4SNicolas Dichtel }
9930c7aecd4SNicolas Dichtel
994a143c40cSNicolas Dichtel struct rtnl_net_dump_cb {
995cff478b9SNicolas Dichtel struct net *tgt_net;
996288f06a0SNicolas Dichtel struct net *ref_net;
997a143c40cSNicolas Dichtel struct sk_buff *skb;
998a0732ad1SNicolas Dichtel struct net_fill_args fillargs;
999a143c40cSNicolas Dichtel int idx;
1000a143c40cSNicolas Dichtel int s_idx;
1001a143c40cSNicolas Dichtel };
1002a143c40cSNicolas Dichtel
10032dce224fSGuillaume Nault /* Runs in RCU-critical section. */
rtnl_net_dumpid_one(int id,void * peer,void * data)1004a143c40cSNicolas Dichtel static int rtnl_net_dumpid_one(int id, void *peer, void *data)
1005a143c40cSNicolas Dichtel {
1006a143c40cSNicolas Dichtel struct rtnl_net_dump_cb *net_cb = (struct rtnl_net_dump_cb *)data;
1007a143c40cSNicolas Dichtel int ret;
1008a143c40cSNicolas Dichtel
1009a143c40cSNicolas Dichtel if (net_cb->idx < net_cb->s_idx)
1010a143c40cSNicolas Dichtel goto cont;
1011a143c40cSNicolas Dichtel
1012a0732ad1SNicolas Dichtel net_cb->fillargs.nsid = id;
1013288f06a0SNicolas Dichtel if (net_cb->fillargs.add_ref)
1014288f06a0SNicolas Dichtel net_cb->fillargs.ref_nsid = __peernet2id(net_cb->ref_net, peer);
1015a0732ad1SNicolas Dichtel ret = rtnl_net_fill(net_cb->skb, &net_cb->fillargs);
1016a143c40cSNicolas Dichtel if (ret < 0)
1017a143c40cSNicolas Dichtel return ret;
1018a143c40cSNicolas Dichtel
1019a143c40cSNicolas Dichtel cont:
1020a143c40cSNicolas Dichtel net_cb->idx++;
1021a143c40cSNicolas Dichtel return 0;
1022a143c40cSNicolas Dichtel }
1023a143c40cSNicolas Dichtel
rtnl_valid_dump_net_req(const struct nlmsghdr * nlh,struct sock * sk,struct rtnl_net_dump_cb * net_cb,struct netlink_callback * cb)1024cff478b9SNicolas Dichtel static int rtnl_valid_dump_net_req(const struct nlmsghdr *nlh, struct sock *sk,
1025cff478b9SNicolas Dichtel struct rtnl_net_dump_cb *net_cb,
1026cff478b9SNicolas Dichtel struct netlink_callback *cb)
1027cff478b9SNicolas Dichtel {
1028cff478b9SNicolas Dichtel struct netlink_ext_ack *extack = cb->extack;
1029cff478b9SNicolas Dichtel struct nlattr *tb[NETNSA_MAX + 1];
1030cff478b9SNicolas Dichtel int err, i;
1031cff478b9SNicolas Dichtel
10328cb08174SJohannes Berg err = nlmsg_parse_deprecated_strict(nlh, sizeof(struct rtgenmsg), tb,
10338cb08174SJohannes Berg NETNSA_MAX, rtnl_net_policy,
10348cb08174SJohannes Berg extack);
1035cff478b9SNicolas Dichtel if (err < 0)
1036cff478b9SNicolas Dichtel return err;
1037cff478b9SNicolas Dichtel
1038cff478b9SNicolas Dichtel for (i = 0; i <= NETNSA_MAX; i++) {
1039cff478b9SNicolas Dichtel if (!tb[i])
1040cff478b9SNicolas Dichtel continue;
1041cff478b9SNicolas Dichtel
1042cff478b9SNicolas Dichtel if (i == NETNSA_TARGET_NSID) {
1043cff478b9SNicolas Dichtel struct net *net;
1044cff478b9SNicolas Dichtel
1045cff478b9SNicolas Dichtel net = rtnl_get_net_ns_capable(sk, nla_get_s32(tb[i]));
1046cff478b9SNicolas Dichtel if (IS_ERR(net)) {
1047cff478b9SNicolas Dichtel NL_SET_BAD_ATTR(extack, tb[i]);
1048cff478b9SNicolas Dichtel NL_SET_ERR_MSG(extack,
1049cff478b9SNicolas Dichtel "Invalid target network namespace id");
1050cff478b9SNicolas Dichtel return PTR_ERR(net);
1051cff478b9SNicolas Dichtel }
1052288f06a0SNicolas Dichtel net_cb->fillargs.add_ref = true;
1053288f06a0SNicolas Dichtel net_cb->ref_net = net_cb->tgt_net;
1054cff478b9SNicolas Dichtel net_cb->tgt_net = net;
1055cff478b9SNicolas Dichtel } else {
1056cff478b9SNicolas Dichtel NL_SET_BAD_ATTR(extack, tb[i]);
1057cff478b9SNicolas Dichtel NL_SET_ERR_MSG(extack,
1058cff478b9SNicolas Dichtel "Unsupported attribute in dump request");
1059cff478b9SNicolas Dichtel return -EINVAL;
1060cff478b9SNicolas Dichtel }
1061cff478b9SNicolas Dichtel }
1062cff478b9SNicolas Dichtel
1063cff478b9SNicolas Dichtel return 0;
1064cff478b9SNicolas Dichtel }
1065cff478b9SNicolas Dichtel
rtnl_net_dumpid(struct sk_buff * skb,struct netlink_callback * cb)1066a143c40cSNicolas Dichtel static int rtnl_net_dumpid(struct sk_buff *skb, struct netlink_callback *cb)
1067a143c40cSNicolas Dichtel {
1068a143c40cSNicolas Dichtel struct rtnl_net_dump_cb net_cb = {
1069cff478b9SNicolas Dichtel .tgt_net = sock_net(skb->sk),
1070a143c40cSNicolas Dichtel .skb = skb,
1071a0732ad1SNicolas Dichtel .fillargs = {
1072a0732ad1SNicolas Dichtel .portid = NETLINK_CB(cb->skb).portid,
1073a0732ad1SNicolas Dichtel .seq = cb->nlh->nlmsg_seq,
1074a0732ad1SNicolas Dichtel .flags = NLM_F_MULTI,
1075a0732ad1SNicolas Dichtel .cmd = RTM_NEWNSID,
1076a0732ad1SNicolas Dichtel },
1077a143c40cSNicolas Dichtel .idx = 0,
1078a143c40cSNicolas Dichtel .s_idx = cb->args[0],
1079a143c40cSNicolas Dichtel };
1080cff478b9SNicolas Dichtel int err = 0;
1081a143c40cSNicolas Dichtel
1082cff478b9SNicolas Dichtel if (cb->strict_check) {
1083cff478b9SNicolas Dichtel err = rtnl_valid_dump_net_req(cb->nlh, skb->sk, &net_cb, cb);
1084cff478b9SNicolas Dichtel if (err < 0)
1085cff478b9SNicolas Dichtel goto end;
1086f80f14c3SDavid Ahern }
1087f80f14c3SDavid Ahern
10882dce224fSGuillaume Nault rcu_read_lock();
1089cff478b9SNicolas Dichtel idr_for_each(&net_cb.tgt_net->netns_ids, rtnl_net_dumpid_one, &net_cb);
10902dce224fSGuillaume Nault rcu_read_unlock();
1091a143c40cSNicolas Dichtel
1092a143c40cSNicolas Dichtel cb->args[0] = net_cb.idx;
1093cff478b9SNicolas Dichtel end:
1094288f06a0SNicolas Dichtel if (net_cb.fillargs.add_ref)
1095cff478b9SNicolas Dichtel put_net(net_cb.tgt_net);
1096cff478b9SNicolas Dichtel return err < 0 ? err : skb->len;
1097a143c40cSNicolas Dichtel }
1098a143c40cSNicolas Dichtel
rtnl_net_notifyid(struct net * net,int cmd,int id,u32 portid,struct nlmsghdr * nlh,gfp_t gfp)1099993e4c92SNicolas Dichtel static void rtnl_net_notifyid(struct net *net, int cmd, int id, u32 portid,
1100d4e4fdf9SGuillaume Nault struct nlmsghdr *nlh, gfp_t gfp)
11019a963454SNicolas Dichtel {
1102a0732ad1SNicolas Dichtel struct net_fill_args fillargs = {
1103993e4c92SNicolas Dichtel .portid = portid,
1104993e4c92SNicolas Dichtel .seq = nlh ? nlh->nlmsg_seq : 0,
1105a0732ad1SNicolas Dichtel .cmd = cmd,
1106a0732ad1SNicolas Dichtel .nsid = id,
1107a0732ad1SNicolas Dichtel };
11089a963454SNicolas Dichtel struct sk_buff *msg;
11099a963454SNicolas Dichtel int err = -ENOMEM;
11109a963454SNicolas Dichtel
1111d4e4fdf9SGuillaume Nault msg = nlmsg_new(rtnl_net_get_size(), gfp);
11129a963454SNicolas Dichtel if (!msg)
11139a963454SNicolas Dichtel goto out;
11149a963454SNicolas Dichtel
1115a0732ad1SNicolas Dichtel err = rtnl_net_fill(msg, &fillargs);
11169a963454SNicolas Dichtel if (err < 0)
11179a963454SNicolas Dichtel goto err_out;
11189a963454SNicolas Dichtel
1119d4e4fdf9SGuillaume Nault rtnl_notify(msg, net, portid, RTNLGRP_NSID, nlh, gfp);
11209a963454SNicolas Dichtel return;
11219a963454SNicolas Dichtel
11229a963454SNicolas Dichtel err_out:
11239a963454SNicolas Dichtel nlmsg_free(msg);
11249a963454SNicolas Dichtel out:
11259a963454SNicolas Dichtel rtnl_set_sk_err(net, RTNLGRP_NSID, err);
11269a963454SNicolas Dichtel }
11279a963454SNicolas Dichtel
net_ns_init(void)11289c1be193SEric Dumazet void __init net_ns_init(void)
11295f256becSEric W. Biederman {
1130486a87f1SDaniel Lezcano struct net_generic *ng;
11315f256becSEric W. Biederman
1132d57a9212SPavel Emelyanov #ifdef CONFIG_NET_NS
11335f256becSEric W. Biederman net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
11345f256becSEric W. Biederman SMP_CACHE_BYTES,
113530855ffcSKirill Tkhai SLAB_PANIC|SLAB_ACCOUNT, NULL);
11363ef1355dSBenjamin Thery
11373ef1355dSBenjamin Thery /* Create workqueue for cleanup */
11383ef1355dSBenjamin Thery netns_wq = create_singlethread_workqueue("netns");
11393ef1355dSBenjamin Thery if (!netns_wq)
11403ef1355dSBenjamin Thery panic("Could not create netns workq");
1141d57a9212SPavel Emelyanov #endif
11423ef1355dSBenjamin Thery
1143486a87f1SDaniel Lezcano ng = net_alloc_generic();
1144486a87f1SDaniel Lezcano if (!ng)
1145486a87f1SDaniel Lezcano panic("Could not allocate generic netns");
1146486a87f1SDaniel Lezcano
1147486a87f1SDaniel Lezcano rcu_assign_pointer(init_net.gen, ng);
114892acdc58SDaniel Borkmann
11499c1be193SEric Dumazet #ifdef CONFIG_KEYS
11509c1be193SEric Dumazet init_net.key_domain = &init_net_key_domain;
11519c1be193SEric Dumazet #endif
11524420bf21SKirill Tkhai down_write(&pernet_ops_rwsem);
11536e77a5a4SEric Dumazet preinit_net(&init_net);
1154038e7332SEric W. Biederman if (setup_net(&init_net, &init_user_ns))
1155ca0f3112SStephen Hemminger panic("Could not setup the initial network namespace");
11565f256becSEric W. Biederman
1157f8c46cb3SDmitry Torokhov init_net_initialized = true;
11584420bf21SKirill Tkhai up_write(&pernet_ops_rwsem);
11595f256becSEric W. Biederman
11600eb987c8SAditya Pakki if (register_pernet_subsys(&net_ns_ops))
11610eb987c8SAditya Pakki panic("Could not register network namespace subsystems");
116298f842e6SEric W. Biederman
1163165b9117SFlorian Westphal rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL,
1164165b9117SFlorian Westphal RTNL_FLAG_DOIT_UNLOCKED);
1165a143c40cSNicolas Dichtel rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid,
1166165b9117SFlorian Westphal RTNL_FLAG_DOIT_UNLOCKED);
11675f256becSEric W. Biederman }
11685f256becSEric W. Biederman
free_exit_list(struct pernet_operations * ops,struct list_head * net_exit_list)116941467d2fSYajun Deng static void free_exit_list(struct pernet_operations *ops, struct list_head *net_exit_list)
117041467d2fSYajun Deng {
117141467d2fSYajun Deng ops_pre_exit_list(ops, net_exit_list);
117241467d2fSYajun Deng synchronize_rcu();
117341467d2fSYajun Deng ops_exit_list(ops, net_exit_list);
117441467d2fSYajun Deng ops_free_list(ops, net_exit_list);
117541467d2fSYajun Deng }
117641467d2fSYajun Deng
1177ed160e83SDenis V. Lunev #ifdef CONFIG_NET_NS
__register_pernet_operations(struct list_head * list,struct pernet_operations * ops)1178f875bae0SEric W. Biederman static int __register_pernet_operations(struct list_head *list,
11795f256becSEric W. Biederman struct pernet_operations *ops)
11805f256becSEric W. Biederman {
118172ad937aSEric W. Biederman struct net *net;
11825f256becSEric W. Biederman int error;
118372ad937aSEric W. Biederman LIST_HEAD(net_exit_list);
11845f256becSEric W. Biederman
11855f256becSEric W. Biederman list_add_tail(&ops->list, list);
1186f875bae0SEric W. Biederman if (ops->init || (ops->id && ops->size)) {
1187f0b07bb1SKirill Tkhai /* We held write locked pernet_ops_rwsem, and parallel
1188f0b07bb1SKirill Tkhai * setup_net() and cleanup_net() are not possible.
1189f0b07bb1SKirill Tkhai */
11901dba323bSPavel Emelyanov for_each_net(net) {
1191f875bae0SEric W. Biederman error = ops_init(ops, net);
11925f256becSEric W. Biederman if (error)
11935f256becSEric W. Biederman goto out_undo;
119472ad937aSEric W. Biederman list_add_tail(&net->exit_list, &net_exit_list);
11955f256becSEric W. Biederman }
11965f256becSEric W. Biederman }
11971dba323bSPavel Emelyanov return 0;
11985f256becSEric W. Biederman
11995f256becSEric W. Biederman out_undo:
12005f256becSEric W. Biederman /* If I have an error cleanup all namespaces I initialized */
12015f256becSEric W. Biederman list_del(&ops->list);
120241467d2fSYajun Deng free_exit_list(ops, &net_exit_list);
12031dba323bSPavel Emelyanov return error;
12045f256becSEric W. Biederman }
12055f256becSEric W. Biederman
__unregister_pernet_operations(struct pernet_operations * ops)1206f875bae0SEric W. Biederman static void __unregister_pernet_operations(struct pernet_operations *ops)
12075f256becSEric W. Biederman {
12085f256becSEric W. Biederman struct net *net;
120972ad937aSEric W. Biederman LIST_HEAD(net_exit_list);
12105f256becSEric W. Biederman
12115f256becSEric W. Biederman list_del(&ops->list);
1212f0b07bb1SKirill Tkhai /* See comment in __register_pernet_operations() */
12131dba323bSPavel Emelyanov for_each_net(net)
121472ad937aSEric W. Biederman list_add_tail(&net->exit_list, &net_exit_list);
121541467d2fSYajun Deng
121641467d2fSYajun Deng free_exit_list(ops, &net_exit_list);
12175f256becSEric W. Biederman }
12185f256becSEric W. Biederman
1219ed160e83SDenis V. Lunev #else
1220ed160e83SDenis V. Lunev
__register_pernet_operations(struct list_head * list,struct pernet_operations * ops)1221f875bae0SEric W. Biederman static int __register_pernet_operations(struct list_head *list,
1222f875bae0SEric W. Biederman struct pernet_operations *ops)
1223f875bae0SEric W. Biederman {
1224f8c46cb3SDmitry Torokhov if (!init_net_initialized) {
1225f8c46cb3SDmitry Torokhov list_add_tail(&ops->list, list);
1226f8c46cb3SDmitry Torokhov return 0;
1227f8c46cb3SDmitry Torokhov }
1228f8c46cb3SDmitry Torokhov
1229b922934dSJulian Anastasov return ops_init(ops, &init_net);
1230f875bae0SEric W. Biederman }
1231f875bae0SEric W. Biederman
__unregister_pernet_operations(struct pernet_operations * ops)1232f875bae0SEric W. Biederman static void __unregister_pernet_operations(struct pernet_operations *ops)
1233f875bae0SEric W. Biederman {
1234f8c46cb3SDmitry Torokhov if (!init_net_initialized) {
1235f8c46cb3SDmitry Torokhov list_del(&ops->list);
1236f8c46cb3SDmitry Torokhov } else {
123772ad937aSEric W. Biederman LIST_HEAD(net_exit_list);
123872ad937aSEric W. Biederman list_add(&init_net.exit_list, &net_exit_list);
123941467d2fSYajun Deng free_exit_list(ops, &net_exit_list);
1240f875bae0SEric W. Biederman }
1241f8c46cb3SDmitry Torokhov }
1242f875bae0SEric W. Biederman
1243f875bae0SEric W. Biederman #endif /* CONFIG_NET_NS */
1244f875bae0SEric W. Biederman
1245f875bae0SEric W. Biederman static DEFINE_IDA(net_generic_ids);
1246f875bae0SEric W. Biederman
register_pernet_operations(struct list_head * list,struct pernet_operations * ops)1247ed160e83SDenis V. Lunev static int register_pernet_operations(struct list_head *list,
1248ed160e83SDenis V. Lunev struct pernet_operations *ops)
1249ed160e83SDenis V. Lunev {
1250f875bae0SEric W. Biederman int error;
1251f875bae0SEric W. Biederman
1252f875bae0SEric W. Biederman if (ops->id) {
12536e77cc47SMatthew Wilcox error = ida_alloc_min(&net_generic_ids, MIN_PERNET_OPS_ID,
12546e77cc47SMatthew Wilcox GFP_KERNEL);
12556e77cc47SMatthew Wilcox if (error < 0)
1256f875bae0SEric W. Biederman return error;
12576e77cc47SMatthew Wilcox *ops->id = error;
1258b6dbfd5bSThadeu Lima de Souza Cascardo /* This does not require READ_ONCE as writers already hold
1259b6dbfd5bSThadeu Lima de Souza Cascardo * pernet_ops_rwsem. But WRITE_ONCE is needed to protect
1260b6dbfd5bSThadeu Lima de Souza Cascardo * net_alloc_generic.
1261b6dbfd5bSThadeu Lima de Souza Cascardo */
1262b6dbfd5bSThadeu Lima de Souza Cascardo WRITE_ONCE(max_gen_ptrs, max(max_gen_ptrs, *ops->id + 1));
1263f875bae0SEric W. Biederman }
1264f875bae0SEric W. Biederman error = __register_pernet_operations(list, ops);
12653a765edaSEric W. Biederman if (error) {
12663a765edaSEric W. Biederman rcu_barrier();
12673a765edaSEric W. Biederman if (ops->id)
12686e77cc47SMatthew Wilcox ida_free(&net_generic_ids, *ops->id);
12693a765edaSEric W. Biederman }
1270f875bae0SEric W. Biederman
1271f875bae0SEric W. Biederman return error;
1272ed160e83SDenis V. Lunev }
1273ed160e83SDenis V. Lunev
unregister_pernet_operations(struct pernet_operations * ops)1274ed160e83SDenis V. Lunev static void unregister_pernet_operations(struct pernet_operations *ops)
1275ed160e83SDenis V. Lunev {
1276f875bae0SEric W. Biederman __unregister_pernet_operations(ops);
12773a765edaSEric W. Biederman rcu_barrier();
1278f875bae0SEric W. Biederman if (ops->id)
12796e77cc47SMatthew Wilcox ida_free(&net_generic_ids, *ops->id);
1280f875bae0SEric W. Biederman }
1281c93cf61fSPavel Emelyanov
12825f256becSEric W. Biederman /**
12835f256becSEric W. Biederman * register_pernet_subsys - register a network namespace subsystem
12845f256becSEric W. Biederman * @ops: pernet operations structure for the subsystem
12855f256becSEric W. Biederman *
12865f256becSEric W. Biederman * Register a subsystem which has init and exit functions
12875f256becSEric W. Biederman * that are called when network namespaces are created and
12885f256becSEric W. Biederman * destroyed respectively.
12895f256becSEric W. Biederman *
12905f256becSEric W. Biederman * When registered all network namespace init functions are
12915f256becSEric W. Biederman * called for every existing network namespace. Allowing kernel
12925f256becSEric W. Biederman * modules to have a race free view of the set of network namespaces.
12935f256becSEric W. Biederman *
12945f256becSEric W. Biederman * When a new network namespace is created all of the init
12955f256becSEric W. Biederman * methods are called in the order in which they were registered.
12965f256becSEric W. Biederman *
12975f256becSEric W. Biederman * When a network namespace is destroyed all of the exit methods
12985f256becSEric W. Biederman * are called in the reverse of the order with which they were
12995f256becSEric W. Biederman * registered.
13005f256becSEric W. Biederman */
register_pernet_subsys(struct pernet_operations * ops)13015f256becSEric W. Biederman int register_pernet_subsys(struct pernet_operations *ops)
13025f256becSEric W. Biederman {
13035f256becSEric W. Biederman int error;
13044420bf21SKirill Tkhai down_write(&pernet_ops_rwsem);
13055f256becSEric W. Biederman error = register_pernet_operations(first_device, ops);
13064420bf21SKirill Tkhai up_write(&pernet_ops_rwsem);
13075f256becSEric W. Biederman return error;
13085f256becSEric W. Biederman }
13095f256becSEric W. Biederman EXPORT_SYMBOL_GPL(register_pernet_subsys);
13105f256becSEric W. Biederman
13115f256becSEric W. Biederman /**
13125f256becSEric W. Biederman * unregister_pernet_subsys - unregister a network namespace subsystem
13135f256becSEric W. Biederman * @ops: pernet operations structure to manipulate
13145f256becSEric W. Biederman *
13155f256becSEric W. Biederman * Remove the pernet operations structure from the list to be
131653379e57SOliver Pinter * used when network namespaces are created or destroyed. In
13175f256becSEric W. Biederman * addition run the exit method for all existing network
13185f256becSEric W. Biederman * namespaces.
13195f256becSEric W. Biederman */
unregister_pernet_subsys(struct pernet_operations * ops)1320b3c981d2SJiri Pirko void unregister_pernet_subsys(struct pernet_operations *ops)
13215f256becSEric W. Biederman {
13224420bf21SKirill Tkhai down_write(&pernet_ops_rwsem);
1323b3c981d2SJiri Pirko unregister_pernet_operations(ops);
13244420bf21SKirill Tkhai up_write(&pernet_ops_rwsem);
13255f256becSEric W. Biederman }
13265f256becSEric W. Biederman EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
13275f256becSEric W. Biederman
13285f256becSEric W. Biederman /**
13295f256becSEric W. Biederman * register_pernet_device - register a network namespace device
13305f256becSEric W. Biederman * @ops: pernet operations structure for the subsystem
13315f256becSEric W. Biederman *
13325f256becSEric W. Biederman * Register a device which has init and exit functions
13335f256becSEric W. Biederman * that are called when network namespaces are created and
13345f256becSEric W. Biederman * destroyed respectively.
13355f256becSEric W. Biederman *
13365f256becSEric W. Biederman * When registered all network namespace init functions are
13375f256becSEric W. Biederman * called for every existing network namespace. Allowing kernel
13385f256becSEric W. Biederman * modules to have a race free view of the set of network namespaces.
13395f256becSEric W. Biederman *
13405f256becSEric W. Biederman * When a new network namespace is created all of the init
13415f256becSEric W. Biederman * methods are called in the order in which they were registered.
13425f256becSEric W. Biederman *
13435f256becSEric W. Biederman * When a network namespace is destroyed all of the exit methods
13445f256becSEric W. Biederman * are called in the reverse of the order with which they were
13455f256becSEric W. Biederman * registered.
13465f256becSEric W. Biederman */
register_pernet_device(struct pernet_operations * ops)13475f256becSEric W. Biederman int register_pernet_device(struct pernet_operations *ops)
13485f256becSEric W. Biederman {
13495f256becSEric W. Biederman int error;
13504420bf21SKirill Tkhai down_write(&pernet_ops_rwsem);
13515f256becSEric W. Biederman error = register_pernet_operations(&pernet_list, ops);
13525f256becSEric W. Biederman if (!error && (first_device == &pernet_list))
13535f256becSEric W. Biederman first_device = &ops->list;
13544420bf21SKirill Tkhai up_write(&pernet_ops_rwsem);
13555f256becSEric W. Biederman return error;
13565f256becSEric W. Biederman }
13575f256becSEric W. Biederman EXPORT_SYMBOL_GPL(register_pernet_device);
13585f256becSEric W. Biederman
13595f256becSEric W. Biederman /**
13605f256becSEric W. Biederman * unregister_pernet_device - unregister a network namespace netdevice
13615f256becSEric W. Biederman * @ops: pernet operations structure to manipulate
13625f256becSEric W. Biederman *
13635f256becSEric W. Biederman * Remove the pernet operations structure from the list to be
136453379e57SOliver Pinter * used when network namespaces are created or destroyed. In
13655f256becSEric W. Biederman * addition run the exit method for all existing network
13665f256becSEric W. Biederman * namespaces.
13675f256becSEric W. Biederman */
unregister_pernet_device(struct pernet_operations * ops)13685f256becSEric W. Biederman void unregister_pernet_device(struct pernet_operations *ops)
13695f256becSEric W. Biederman {
13704420bf21SKirill Tkhai down_write(&pernet_ops_rwsem);
13715f256becSEric W. Biederman if (&ops->list == first_device)
13725f256becSEric W. Biederman first_device = first_device->next;
13735f256becSEric W. Biederman unregister_pernet_operations(ops);
13744420bf21SKirill Tkhai up_write(&pernet_ops_rwsem);
13755f256becSEric W. Biederman }
13765f256becSEric W. Biederman EXPORT_SYMBOL_GPL(unregister_pernet_device);
137713b6f576SEric W. Biederman
137813b6f576SEric W. Biederman #ifdef CONFIG_NET_NS
netns_get(struct task_struct * task)137964964528SAl Viro static struct ns_common *netns_get(struct task_struct *task)
138013b6f576SEric W. Biederman {
1381f0630529SEric W. Biederman struct net *net = NULL;
1382f0630529SEric W. Biederman struct nsproxy *nsproxy;
1383f0630529SEric W. Biederman
1384728dba3aSEric W. Biederman task_lock(task);
1385728dba3aSEric W. Biederman nsproxy = task->nsproxy;
1386f0630529SEric W. Biederman if (nsproxy)
1387f0630529SEric W. Biederman net = get_net(nsproxy->net_ns);
1388728dba3aSEric W. Biederman task_unlock(task);
1389f0630529SEric W. Biederman
1390ff24870fSAl Viro return net ? &net->ns : NULL;
1391ff24870fSAl Viro }
1392ff24870fSAl Viro
to_net_ns(struct ns_common * ns)1393ff24870fSAl Viro static inline struct net *to_net_ns(struct ns_common *ns)
1394ff24870fSAl Viro {
1395ff24870fSAl Viro return container_of(ns, struct net, ns);
139613b6f576SEric W. Biederman }
139713b6f576SEric W. Biederman
netns_put(struct ns_common * ns)139864964528SAl Viro static void netns_put(struct ns_common *ns)
139913b6f576SEric W. Biederman {
1400ff24870fSAl Viro put_net(to_net_ns(ns));
140113b6f576SEric W. Biederman }
140213b6f576SEric W. Biederman
netns_install(struct nsset * nsset,struct ns_common * ns)1403f2a8d52eSChristian Brauner static int netns_install(struct nsset *nsset, struct ns_common *ns)
140413b6f576SEric W. Biederman {
1405f2a8d52eSChristian Brauner struct nsproxy *nsproxy = nsset->nsproxy;
1406ff24870fSAl Viro struct net *net = to_net_ns(ns);
1407142e1d1dSEric W. Biederman
14085e4a0847SEric W. Biederman if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
1409f2a8d52eSChristian Brauner !ns_capable(nsset->cred->user_ns, CAP_SYS_ADMIN))
1410142e1d1dSEric W. Biederman return -EPERM;
1411142e1d1dSEric W. Biederman
141213b6f576SEric W. Biederman put_net(nsproxy->net_ns);
1413142e1d1dSEric W. Biederman nsproxy->net_ns = get_net(net);
141413b6f576SEric W. Biederman return 0;
141513b6f576SEric W. Biederman }
141613b6f576SEric W. Biederman
netns_owner(struct ns_common * ns)1417bcac25a5SAndrey Vagin static struct user_namespace *netns_owner(struct ns_common *ns)
1418bcac25a5SAndrey Vagin {
1419bcac25a5SAndrey Vagin return to_net_ns(ns)->user_ns;
1420bcac25a5SAndrey Vagin }
1421bcac25a5SAndrey Vagin
142213b6f576SEric W. Biederman const struct proc_ns_operations netns_operations = {
142313b6f576SEric W. Biederman .name = "net",
142413b6f576SEric W. Biederman .type = CLONE_NEWNET,
142513b6f576SEric W. Biederman .get = netns_get,
142613b6f576SEric W. Biederman .put = netns_put,
142713b6f576SEric W. Biederman .install = netns_install,
1428bcac25a5SAndrey Vagin .owner = netns_owner,
142913b6f576SEric W. Biederman };
143013b6f576SEric W. Biederman #endif
1431