xref: /openbmc/linux/kernel/nsproxy.c (revision 2ddd3cac)
1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2ab516013SSerge E. Hallyn /*
3ab516013SSerge E. Hallyn  *  Copyright (C) 2006 IBM Corporation
4ab516013SSerge E. Hallyn  *
5ab516013SSerge E. Hallyn  *  Author: Serge Hallyn <serue@us.ibm.com>
6ab516013SSerge E. Hallyn  *
725b21cb2SKirill Korotaev  *  Jun 2006 - namespaces support
825b21cb2SKirill Korotaev  *             OpenVZ, SWsoft Inc.
925b21cb2SKirill Korotaev  *             Pavel Emelianov <xemul@openvz.org>
10ab516013SSerge E. Hallyn  */
11ab516013SSerge E. Hallyn 
125a0e3ad6STejun Heo #include <linux/slab.h>
139984de1aSPaul Gortmaker #include <linux/export.h>
14ab516013SSerge E. Hallyn #include <linux/nsproxy.h>
150437eb59SSerge E. Hallyn #include <linux/init_task.h>
166b3286edSKirill Korotaev #include <linux/mnt_namespace.h>
174865ecf1SSerge E. Hallyn #include <linux/utsname.h>
189a575a92SCedric Le Goater #include <linux/pid_namespace.h>
199dd776b6SEric W. Biederman #include <net/net_namespace.h>
20ae5e1b22SPavel Emelyanov #include <linux/ipc_namespace.h>
21769071acSAndrei Vagin #include <linux/time_namespace.h>
22f2a8d52eSChristian Brauner #include <linux/fs_struct.h>
23303cc571SChristian Brauner #include <linux/proc_fs.h>
240bb80f24SDavid Howells #include <linux/proc_ns.h>
250663c6f8SEric W. Biederman #include <linux/file.h>
260663c6f8SEric W. Biederman #include <linux/syscalls.h>
27a79a908fSAditya Kali #include <linux/cgroup.h>
28e4222673SHari Bathini #include <linux/perf_event.h>
290437eb59SSerge E. Hallyn 
3098c0d07cSCedric Le Goater static struct kmem_cache *nsproxy_cachep;
3198c0d07cSCedric Le Goater 
328467005dSAlexey Dobriyan struct nsproxy init_nsproxy = {
33*2ddd3cacSElena Reshetova 	.count			= REFCOUNT_INIT(1),
348467005dSAlexey Dobriyan 	.uts_ns			= &init_uts_ns,
358467005dSAlexey Dobriyan #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
368467005dSAlexey Dobriyan 	.ipc_ns			= &init_ipc_ns,
378467005dSAlexey Dobriyan #endif
388467005dSAlexey Dobriyan 	.mnt_ns			= NULL,
39c2b1df2eSAndy Lutomirski 	.pid_ns_for_children	= &init_pid_ns,
408467005dSAlexey Dobriyan #ifdef CONFIG_NET
418467005dSAlexey Dobriyan 	.net_ns			= &init_net,
428467005dSAlexey Dobriyan #endif
43a79a908fSAditya Kali #ifdef CONFIG_CGROUPS
44a79a908fSAditya Kali 	.cgroup_ns		= &init_cgroup_ns,
45a79a908fSAditya Kali #endif
46769071acSAndrei Vagin #ifdef CONFIG_TIME_NS
47769071acSAndrei Vagin 	.time_ns		= &init_time_ns,
48769071acSAndrei Vagin 	.time_ns_for_children	= &init_time_ns,
49769071acSAndrei Vagin #endif
508467005dSAlexey Dobriyan };
51ab516013SSerge E. Hallyn 
create_nsproxy(void)5290af90d7SAlexey Dobriyan static inline struct nsproxy *create_nsproxy(void)
53ab516013SSerge E. Hallyn {
5490af90d7SAlexey Dobriyan 	struct nsproxy *nsproxy;
55ab516013SSerge E. Hallyn 
5690af90d7SAlexey Dobriyan 	nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
5790af90d7SAlexey Dobriyan 	if (nsproxy)
58*2ddd3cacSElena Reshetova 		refcount_set(&nsproxy->count, 1);
5990af90d7SAlexey Dobriyan 	return nsproxy;
60ab516013SSerge E. Hallyn }
61ab516013SSerge E. Hallyn 
62ab516013SSerge E. Hallyn /*
63e3222c4eSBadari Pulavarty  * Create new nsproxy and all of its the associated namespaces.
64e3222c4eSBadari Pulavarty  * Return the newly created nsproxy.  Do not attach this to the task,
65e3222c4eSBadari Pulavarty  * leave it to the caller to do proper locking and attach it to task.
66ab516013SSerge E. Hallyn  */
create_new_namespaces(unsigned long flags,struct task_struct * tsk,struct user_namespace * user_ns,struct fs_struct * new_fs)67213dd266SEric W. Biederman static struct nsproxy *create_new_namespaces(unsigned long flags,
68bcf58e72SEric W. Biederman 	struct task_struct *tsk, struct user_namespace *user_ns,
69bcf58e72SEric W. Biederman 	struct fs_struct *new_fs)
70ab516013SSerge E. Hallyn {
71e3222c4eSBadari Pulavarty 	struct nsproxy *new_nsp;
72467e9f4bSCedric Le Goater 	int err;
73ab516013SSerge E. Hallyn 
7490af90d7SAlexey Dobriyan 	new_nsp = create_nsproxy();
75e3222c4eSBadari Pulavarty 	if (!new_nsp)
76e3222c4eSBadari Pulavarty 		return ERR_PTR(-ENOMEM);
771651e14eSSerge E. Hallyn 
78bcf58e72SEric W. Biederman 	new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
79467e9f4bSCedric Le Goater 	if (IS_ERR(new_nsp->mnt_ns)) {
80467e9f4bSCedric Le Goater 		err = PTR_ERR(new_nsp->mnt_ns);
81e3222c4eSBadari Pulavarty 		goto out_ns;
82467e9f4bSCedric Le Goater 	}
83e3222c4eSBadari Pulavarty 
84bcf58e72SEric W. Biederman 	new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
85467e9f4bSCedric Le Goater 	if (IS_ERR(new_nsp->uts_ns)) {
86467e9f4bSCedric Le Goater 		err = PTR_ERR(new_nsp->uts_ns);
87e3222c4eSBadari Pulavarty 		goto out_uts;
88467e9f4bSCedric Le Goater 	}
89e3222c4eSBadari Pulavarty 
90bcf58e72SEric W. Biederman 	new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
91467e9f4bSCedric Le Goater 	if (IS_ERR(new_nsp->ipc_ns)) {
92467e9f4bSCedric Le Goater 		err = PTR_ERR(new_nsp->ipc_ns);
93e3222c4eSBadari Pulavarty 		goto out_ipc;
94467e9f4bSCedric Le Goater 	}
95e3222c4eSBadari Pulavarty 
96c2b1df2eSAndy Lutomirski 	new_nsp->pid_ns_for_children =
97c2b1df2eSAndy Lutomirski 		copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
98c2b1df2eSAndy Lutomirski 	if (IS_ERR(new_nsp->pid_ns_for_children)) {
99c2b1df2eSAndy Lutomirski 		err = PTR_ERR(new_nsp->pid_ns_for_children);
100e3222c4eSBadari Pulavarty 		goto out_pid;
101467e9f4bSCedric Le Goater 	}
102e3222c4eSBadari Pulavarty 
103a79a908fSAditya Kali 	new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
104a79a908fSAditya Kali 					    tsk->nsproxy->cgroup_ns);
105a79a908fSAditya Kali 	if (IS_ERR(new_nsp->cgroup_ns)) {
106a79a908fSAditya Kali 		err = PTR_ERR(new_nsp->cgroup_ns);
107a79a908fSAditya Kali 		goto out_cgroup;
108a79a908fSAditya Kali 	}
109a79a908fSAditya Kali 
110bcf58e72SEric W. Biederman 	new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
1119dd776b6SEric W. Biederman 	if (IS_ERR(new_nsp->net_ns)) {
1129dd776b6SEric W. Biederman 		err = PTR_ERR(new_nsp->net_ns);
1139dd776b6SEric W. Biederman 		goto out_net;
1149dd776b6SEric W. Biederman 	}
1159dd776b6SEric W. Biederman 
116769071acSAndrei Vagin 	new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
117769071acSAndrei Vagin 					tsk->nsproxy->time_ns_for_children);
118769071acSAndrei Vagin 	if (IS_ERR(new_nsp->time_ns_for_children)) {
119769071acSAndrei Vagin 		err = PTR_ERR(new_nsp->time_ns_for_children);
120769071acSAndrei Vagin 		goto out_time;
121769071acSAndrei Vagin 	}
122769071acSAndrei Vagin 	new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
123769071acSAndrei Vagin 
124e3222c4eSBadari Pulavarty 	return new_nsp;
125e3222c4eSBadari Pulavarty 
126769071acSAndrei Vagin out_time:
127769071acSAndrei Vagin 	put_net(new_nsp->net_ns);
1289dd776b6SEric W. Biederman out_net:
129a79a908fSAditya Kali 	put_cgroup_ns(new_nsp->cgroup_ns);
130a79a908fSAditya Kali out_cgroup:
131c2b1df2eSAndy Lutomirski 	if (new_nsp->pid_ns_for_children)
132c2b1df2eSAndy Lutomirski 		put_pid_ns(new_nsp->pid_ns_for_children);
133e3222c4eSBadari Pulavarty out_pid:
134e3222c4eSBadari Pulavarty 	if (new_nsp->ipc_ns)
135e3222c4eSBadari Pulavarty 		put_ipc_ns(new_nsp->ipc_ns);
136e3222c4eSBadari Pulavarty out_ipc:
137e3222c4eSBadari Pulavarty 	if (new_nsp->uts_ns)
138e3222c4eSBadari Pulavarty 		put_uts_ns(new_nsp->uts_ns);
139e3222c4eSBadari Pulavarty out_uts:
140e3222c4eSBadari Pulavarty 	if (new_nsp->mnt_ns)
141e3222c4eSBadari Pulavarty 		put_mnt_ns(new_nsp->mnt_ns);
142e3222c4eSBadari Pulavarty out_ns:
14398c0d07cSCedric Le Goater 	kmem_cache_free(nsproxy_cachep, new_nsp);
144467e9f4bSCedric Le Goater 	return ERR_PTR(err);
145ab516013SSerge E. Hallyn }
146ab516013SSerge E. Hallyn 
147ab516013SSerge E. Hallyn /*
148ab516013SSerge E. Hallyn  * called from clone.  This now handles copy for nsproxy and all
149ab516013SSerge E. Hallyn  * namespaces therein.
150ab516013SSerge E. Hallyn  */
copy_namespaces(unsigned long flags,struct task_struct * tsk)151213dd266SEric W. Biederman int copy_namespaces(unsigned long flags, struct task_struct *tsk)
152ab516013SSerge E. Hallyn {
153ab516013SSerge E. Hallyn 	struct nsproxy *old_ns = tsk->nsproxy;
154b33c77efSEric W. Biederman 	struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
1551651e14eSSerge E. Hallyn 	struct nsproxy *new_ns;
156ab516013SSerge E. Hallyn 
157dbef0c1cSEric W. Biederman 	if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
158a79a908fSAditya Kali 			      CLONE_NEWPID | CLONE_NEWNET |
159769071acSAndrei Vagin 			      CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
1602b5f9dadSAndrei Vagin 		if ((flags & CLONE_VM) ||
1612b5f9dadSAndrei Vagin 		    likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
162ab516013SSerge E. Hallyn 			get_nsproxy(old_ns);
163ab516013SSerge E. Hallyn 			return 0;
164e3222c4eSBadari Pulavarty 		}
165769071acSAndrei Vagin 	} else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
166dbef0c1cSEric W. Biederman 		return -EPERM;
167dbef0c1cSEric W. Biederman 
16802fdb36aSSerge E. Hallyn 	/*
16902fdb36aSSerge E. Hallyn 	 * CLONE_NEWIPC must detach from the undolist: after switching
17002fdb36aSSerge E. Hallyn 	 * to a new ipc namespace, the semaphore arrays from the old
17102fdb36aSSerge E. Hallyn 	 * namespace are unreachable.  In clone parlance, CLONE_SYSVSEM
17202fdb36aSSerge E. Hallyn 	 * means share undolist with parent, so we must forbid using
17302fdb36aSSerge E. Hallyn 	 * it along with CLONE_NEWIPC.
17402fdb36aSSerge E. Hallyn 	 */
17521e85194SRaphael S.Carvalho 	if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
176dbef0c1cSEric W. Biederman 		(CLONE_NEWIPC | CLONE_SYSVSEM))
177dbef0c1cSEric W. Biederman 		return -EINVAL;
17802fdb36aSSerge E. Hallyn 
179d7d48f62SYuanhan Liu 	new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
180dbef0c1cSEric W. Biederman 	if (IS_ERR(new_ns))
181dbef0c1cSEric W. Biederman 		return  PTR_ERR(new_ns);
1821651e14eSSerge E. Hallyn 
1832b5f9dadSAndrei Vagin 	if ((flags & CLONE_VM) == 0)
1845c62634fSHui Su 		timens_on_fork(new_ns, tsk);
185769071acSAndrei Vagin 
1861651e14eSSerge E. Hallyn 	tsk->nsproxy = new_ns;
187dbef0c1cSEric W. Biederman 	return 0;
188ab516013SSerge E. Hallyn }
189ab516013SSerge E. Hallyn 
free_nsproxy(struct nsproxy * ns)190ab516013SSerge E. Hallyn void free_nsproxy(struct nsproxy *ns)
191ab516013SSerge E. Hallyn {
1926b3286edSKirill Korotaev 	if (ns->mnt_ns)
1936b3286edSKirill Korotaev 		put_mnt_ns(ns->mnt_ns);
1944865ecf1SSerge E. Hallyn 	if (ns->uts_ns)
1954865ecf1SSerge E. Hallyn 		put_uts_ns(ns->uts_ns);
19625b21cb2SKirill Korotaev 	if (ns->ipc_ns)
19725b21cb2SKirill Korotaev 		put_ipc_ns(ns->ipc_ns);
198c2b1df2eSAndy Lutomirski 	if (ns->pid_ns_for_children)
199c2b1df2eSAndy Lutomirski 		put_pid_ns(ns->pid_ns_for_children);
200769071acSAndrei Vagin 	if (ns->time_ns)
201769071acSAndrei Vagin 		put_time_ns(ns->time_ns);
202769071acSAndrei Vagin 	if (ns->time_ns_for_children)
203769071acSAndrei Vagin 		put_time_ns(ns->time_ns_for_children);
204a79a908fSAditya Kali 	put_cgroup_ns(ns->cgroup_ns);
2059dd776b6SEric W. Biederman 	put_net(ns->net_ns);
20698c0d07cSCedric Le Goater 	kmem_cache_free(nsproxy_cachep, ns);
207ab516013SSerge E. Hallyn }
208e3222c4eSBadari Pulavarty 
209e3222c4eSBadari Pulavarty /*
210e3222c4eSBadari Pulavarty  * Called from unshare. Unshare all the namespaces part of nsproxy.
2114e71e474SCedric Le Goater  * On success, returns the new nsproxy.
212e3222c4eSBadari Pulavarty  */
unshare_nsproxy_namespaces(unsigned long unshare_flags,struct nsproxy ** new_nsp,struct cred * new_cred,struct fs_struct * new_fs)213e3222c4eSBadari Pulavarty int unshare_nsproxy_namespaces(unsigned long unshare_flags,
214b2e0d987SEric W. Biederman 	struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
215e3222c4eSBadari Pulavarty {
216bcf58e72SEric W. Biederman 	struct user_namespace *user_ns;
217e3222c4eSBadari Pulavarty 	int err = 0;
218e3222c4eSBadari Pulavarty 
21977ec739dSSerge E. Hallyn 	if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
220769071acSAndrei Vagin 			       CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
221769071acSAndrei Vagin 			       CLONE_NEWTIME)))
222e3222c4eSBadari Pulavarty 		return 0;
223e3222c4eSBadari Pulavarty 
224b2e0d987SEric W. Biederman 	user_ns = new_cred ? new_cred->user_ns : current_user_ns();
225b2e0d987SEric W. Biederman 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
226e3222c4eSBadari Pulavarty 		return -EPERM;
227e3222c4eSBadari Pulavarty 
228bcf58e72SEric W. Biederman 	*new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
229e3222c4eSBadari Pulavarty 					 new_fs ? new_fs : current->fs);
230858d72eaSSerge E. Hallyn 	if (IS_ERR(*new_nsp)) {
231e3222c4eSBadari Pulavarty 		err = PTR_ERR(*new_nsp);
232858d72eaSSerge E. Hallyn 		goto out;
233858d72eaSSerge E. Hallyn 	}
234858d72eaSSerge E. Hallyn 
235858d72eaSSerge E. Hallyn out:
236e3222c4eSBadari Pulavarty 	return err;
237e3222c4eSBadari Pulavarty }
23898c0d07cSCedric Le Goater 
switch_task_namespaces(struct task_struct * p,struct nsproxy * new)239cf7b708cSPavel Emelyanov void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
240cf7b708cSPavel Emelyanov {
241cf7b708cSPavel Emelyanov 	struct nsproxy *ns;
242cf7b708cSPavel Emelyanov 
243cf7b708cSPavel Emelyanov 	might_sleep();
244cf7b708cSPavel Emelyanov 
245728dba3aSEric W. Biederman 	task_lock(p);
246cf7b708cSPavel Emelyanov 	ns = p->nsproxy;
247728dba3aSEric W. Biederman 	p->nsproxy = new;
248728dba3aSEric W. Biederman 	task_unlock(p);
249cf7b708cSPavel Emelyanov 
250aabe19b8SHui Su 	if (ns)
251aabe19b8SHui Su 		put_nsproxy(ns);
252cf7b708cSPavel Emelyanov }
253cf7b708cSPavel Emelyanov 
exit_task_namespaces(struct task_struct * p)254cf7b708cSPavel Emelyanov void exit_task_namespaces(struct task_struct *p)
255cf7b708cSPavel Emelyanov {
256cf7b708cSPavel Emelyanov 	switch_task_namespaces(p, NULL);
257cf7b708cSPavel Emelyanov }
258cf7b708cSPavel Emelyanov 
exec_task_namespaces(void)2592b5f9dadSAndrei Vagin int exec_task_namespaces(void)
2602b5f9dadSAndrei Vagin {
2612b5f9dadSAndrei Vagin 	struct task_struct *tsk = current;
2622b5f9dadSAndrei Vagin 	struct nsproxy *new;
2632b5f9dadSAndrei Vagin 
2642b5f9dadSAndrei Vagin 	if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns)
2652b5f9dadSAndrei Vagin 		return 0;
2662b5f9dadSAndrei Vagin 
2672b5f9dadSAndrei Vagin 	new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
2682b5f9dadSAndrei Vagin 	if (IS_ERR(new))
2692b5f9dadSAndrei Vagin 		return PTR_ERR(new);
2702b5f9dadSAndrei Vagin 
2712b5f9dadSAndrei Vagin 	timens_on_fork(new, tsk);
2722b5f9dadSAndrei Vagin 	switch_task_namespaces(tsk, new);
2732b5f9dadSAndrei Vagin 	return 0;
2742b5f9dadSAndrei Vagin }
2752b5f9dadSAndrei Vagin 
check_setns_flags(unsigned long flags)276303cc571SChristian Brauner static int check_setns_flags(unsigned long flags)
277303cc571SChristian Brauner {
278303cc571SChristian Brauner 	if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
27976c12881SChristian Brauner 				 CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
28076c12881SChristian Brauner 				 CLONE_NEWPID | CLONE_NEWCGROUP)))
281303cc571SChristian Brauner 		return -EINVAL;
282303cc571SChristian Brauner 
283303cc571SChristian Brauner #ifndef CONFIG_USER_NS
284303cc571SChristian Brauner 	if (flags & CLONE_NEWUSER)
285303cc571SChristian Brauner 		return -EINVAL;
286303cc571SChristian Brauner #endif
287303cc571SChristian Brauner #ifndef CONFIG_PID_NS
288303cc571SChristian Brauner 	if (flags & CLONE_NEWPID)
289303cc571SChristian Brauner 		return -EINVAL;
290303cc571SChristian Brauner #endif
291303cc571SChristian Brauner #ifndef CONFIG_UTS_NS
292303cc571SChristian Brauner 	if (flags & CLONE_NEWUTS)
293303cc571SChristian Brauner 		return -EINVAL;
294303cc571SChristian Brauner #endif
295303cc571SChristian Brauner #ifndef CONFIG_IPC_NS
296303cc571SChristian Brauner 	if (flags & CLONE_NEWIPC)
297303cc571SChristian Brauner 		return -EINVAL;
298303cc571SChristian Brauner #endif
299303cc571SChristian Brauner #ifndef CONFIG_CGROUPS
300303cc571SChristian Brauner 	if (flags & CLONE_NEWCGROUP)
301303cc571SChristian Brauner 		return -EINVAL;
302303cc571SChristian Brauner #endif
303303cc571SChristian Brauner #ifndef CONFIG_NET_NS
304303cc571SChristian Brauner 	if (flags & CLONE_NEWNET)
305303cc571SChristian Brauner 		return -EINVAL;
306303cc571SChristian Brauner #endif
30776c12881SChristian Brauner #ifndef CONFIG_TIME_NS
30876c12881SChristian Brauner 	if (flags & CLONE_NEWTIME)
30976c12881SChristian Brauner 		return -EINVAL;
31076c12881SChristian Brauner #endif
311303cc571SChristian Brauner 
312303cc571SChristian Brauner 	return 0;
313303cc571SChristian Brauner }
314303cc571SChristian Brauner 
put_nsset(struct nsset * nsset)315f2a8d52eSChristian Brauner static void put_nsset(struct nsset *nsset)
316f2a8d52eSChristian Brauner {
317f2a8d52eSChristian Brauner 	unsigned flags = nsset->flags;
318f2a8d52eSChristian Brauner 
319f2a8d52eSChristian Brauner 	if (flags & CLONE_NEWUSER)
320f2a8d52eSChristian Brauner 		put_cred(nsset_cred(nsset));
321303cc571SChristian Brauner 	/*
322303cc571SChristian Brauner 	 * We only created a temporary copy if we attached to more than just
323303cc571SChristian Brauner 	 * the mount namespace.
324303cc571SChristian Brauner 	 */
325303cc571SChristian Brauner 	if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
326303cc571SChristian Brauner 		free_fs_struct(nsset->fs);
327f2a8d52eSChristian Brauner 	if (nsset->nsproxy)
328f2a8d52eSChristian Brauner 		free_nsproxy(nsset->nsproxy);
329f2a8d52eSChristian Brauner }
330f2a8d52eSChristian Brauner 
prepare_nsset(unsigned flags,struct nsset * nsset)331303cc571SChristian Brauner static int prepare_nsset(unsigned flags, struct nsset *nsset)
332f2a8d52eSChristian Brauner {
333f2a8d52eSChristian Brauner 	struct task_struct *me = current;
334f2a8d52eSChristian Brauner 
335f2a8d52eSChristian Brauner 	nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
336f2a8d52eSChristian Brauner 	if (IS_ERR(nsset->nsproxy))
337f2a8d52eSChristian Brauner 		return PTR_ERR(nsset->nsproxy);
338f2a8d52eSChristian Brauner 
339303cc571SChristian Brauner 	if (flags & CLONE_NEWUSER)
340f2a8d52eSChristian Brauner 		nsset->cred = prepare_creds();
341f2a8d52eSChristian Brauner 	else
342f2a8d52eSChristian Brauner 		nsset->cred = current_cred();
343f2a8d52eSChristian Brauner 	if (!nsset->cred)
344f2a8d52eSChristian Brauner 		goto out;
345f2a8d52eSChristian Brauner 
346303cc571SChristian Brauner 	/* Only create a temporary copy of fs_struct if we really need to. */
347303cc571SChristian Brauner 	if (flags == CLONE_NEWNS) {
348f2a8d52eSChristian Brauner 		nsset->fs = me->fs;
349303cc571SChristian Brauner 	} else if (flags & CLONE_NEWNS) {
350303cc571SChristian Brauner 		nsset->fs = copy_fs_struct(me->fs);
351303cc571SChristian Brauner 		if (!nsset->fs)
352303cc571SChristian Brauner 			goto out;
353303cc571SChristian Brauner 	}
354f2a8d52eSChristian Brauner 
355303cc571SChristian Brauner 	nsset->flags = flags;
356f2a8d52eSChristian Brauner 	return 0;
357f2a8d52eSChristian Brauner 
358f2a8d52eSChristian Brauner out:
359f2a8d52eSChristian Brauner 	put_nsset(nsset);
360f2a8d52eSChristian Brauner 	return -ENOMEM;
361f2a8d52eSChristian Brauner }
362f2a8d52eSChristian Brauner 
validate_ns(struct nsset * nsset,struct ns_common * ns)363303cc571SChristian Brauner static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
364303cc571SChristian Brauner {
365303cc571SChristian Brauner 	return ns->ops->install(nsset, ns);
366303cc571SChristian Brauner }
367303cc571SChristian Brauner 
368303cc571SChristian Brauner /*
369303cc571SChristian Brauner  * This is the inverse operation to unshare().
370303cc571SChristian Brauner  * Ordering is equivalent to the standard ordering used everywhere else
371303cc571SChristian Brauner  * during unshare and process creation. The switch to the new set of
372303cc571SChristian Brauner  * namespaces occurs at the point of no return after installation of
373303cc571SChristian Brauner  * all requested namespaces was successful in commit_nsset().
374303cc571SChristian Brauner  */
validate_nsset(struct nsset * nsset,struct pid * pid)375303cc571SChristian Brauner static int validate_nsset(struct nsset *nsset, struct pid *pid)
376303cc571SChristian Brauner {
377303cc571SChristian Brauner 	int ret = 0;
378303cc571SChristian Brauner 	unsigned flags = nsset->flags;
379303cc571SChristian Brauner 	struct user_namespace *user_ns = NULL;
380303cc571SChristian Brauner 	struct pid_namespace *pid_ns = NULL;
381303cc571SChristian Brauner 	struct nsproxy *nsp;
382303cc571SChristian Brauner 	struct task_struct *tsk;
383303cc571SChristian Brauner 
384303cc571SChristian Brauner 	/* Take a "snapshot" of the target task's namespaces. */
385303cc571SChristian Brauner 	rcu_read_lock();
386303cc571SChristian Brauner 	tsk = pid_task(pid, PIDTYPE_PID);
387303cc571SChristian Brauner 	if (!tsk) {
388303cc571SChristian Brauner 		rcu_read_unlock();
389303cc571SChristian Brauner 		return -ESRCH;
390303cc571SChristian Brauner 	}
391303cc571SChristian Brauner 
392303cc571SChristian Brauner 	if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
393303cc571SChristian Brauner 		rcu_read_unlock();
394303cc571SChristian Brauner 		return -EPERM;
395303cc571SChristian Brauner 	}
396303cc571SChristian Brauner 
397303cc571SChristian Brauner 	task_lock(tsk);
398303cc571SChristian Brauner 	nsp = tsk->nsproxy;
399303cc571SChristian Brauner 	if (nsp)
400303cc571SChristian Brauner 		get_nsproxy(nsp);
401303cc571SChristian Brauner 	task_unlock(tsk);
402303cc571SChristian Brauner 	if (!nsp) {
403303cc571SChristian Brauner 		rcu_read_unlock();
404303cc571SChristian Brauner 		return -ESRCH;
405303cc571SChristian Brauner 	}
406303cc571SChristian Brauner 
407303cc571SChristian Brauner #ifdef CONFIG_PID_NS
408303cc571SChristian Brauner 	if (flags & CLONE_NEWPID) {
409303cc571SChristian Brauner 		pid_ns = task_active_pid_ns(tsk);
410303cc571SChristian Brauner 		if (unlikely(!pid_ns)) {
411303cc571SChristian Brauner 			rcu_read_unlock();
412303cc571SChristian Brauner 			ret = -ESRCH;
413303cc571SChristian Brauner 			goto out;
414303cc571SChristian Brauner 		}
415303cc571SChristian Brauner 		get_pid_ns(pid_ns);
416303cc571SChristian Brauner 	}
417303cc571SChristian Brauner #endif
418303cc571SChristian Brauner 
419303cc571SChristian Brauner #ifdef CONFIG_USER_NS
420303cc571SChristian Brauner 	if (flags & CLONE_NEWUSER)
421303cc571SChristian Brauner 		user_ns = get_user_ns(__task_cred(tsk)->user_ns);
422303cc571SChristian Brauner #endif
423303cc571SChristian Brauner 	rcu_read_unlock();
424303cc571SChristian Brauner 
425303cc571SChristian Brauner 	/*
426303cc571SChristian Brauner 	 * Install requested namespaces. The caller will have
427303cc571SChristian Brauner 	 * verified earlier that the requested namespaces are
428303cc571SChristian Brauner 	 * supported on this kernel. We don't report errors here
429303cc571SChristian Brauner 	 * if a namespace is requested that isn't supported.
430303cc571SChristian Brauner 	 */
431303cc571SChristian Brauner #ifdef CONFIG_USER_NS
432303cc571SChristian Brauner 	if (flags & CLONE_NEWUSER) {
433303cc571SChristian Brauner 		ret = validate_ns(nsset, &user_ns->ns);
434303cc571SChristian Brauner 		if (ret)
435303cc571SChristian Brauner 			goto out;
436303cc571SChristian Brauner 	}
437303cc571SChristian Brauner #endif
438303cc571SChristian Brauner 
439303cc571SChristian Brauner 	if (flags & CLONE_NEWNS) {
440303cc571SChristian Brauner 		ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
441303cc571SChristian Brauner 		if (ret)
442303cc571SChristian Brauner 			goto out;
443303cc571SChristian Brauner 	}
444303cc571SChristian Brauner 
445303cc571SChristian Brauner #ifdef CONFIG_UTS_NS
446303cc571SChristian Brauner 	if (flags & CLONE_NEWUTS) {
447303cc571SChristian Brauner 		ret = validate_ns(nsset, &nsp->uts_ns->ns);
448303cc571SChristian Brauner 		if (ret)
449303cc571SChristian Brauner 			goto out;
450303cc571SChristian Brauner 	}
451303cc571SChristian Brauner #endif
452303cc571SChristian Brauner 
453303cc571SChristian Brauner #ifdef CONFIG_IPC_NS
454303cc571SChristian Brauner 	if (flags & CLONE_NEWIPC) {
455303cc571SChristian Brauner 		ret = validate_ns(nsset, &nsp->ipc_ns->ns);
456303cc571SChristian Brauner 		if (ret)
457303cc571SChristian Brauner 			goto out;
458303cc571SChristian Brauner 	}
459303cc571SChristian Brauner #endif
460303cc571SChristian Brauner 
461303cc571SChristian Brauner #ifdef CONFIG_PID_NS
462303cc571SChristian Brauner 	if (flags & CLONE_NEWPID) {
463303cc571SChristian Brauner 		ret = validate_ns(nsset, &pid_ns->ns);
464303cc571SChristian Brauner 		if (ret)
465303cc571SChristian Brauner 			goto out;
466303cc571SChristian Brauner 	}
467303cc571SChristian Brauner #endif
468303cc571SChristian Brauner 
469303cc571SChristian Brauner #ifdef CONFIG_CGROUPS
470303cc571SChristian Brauner 	if (flags & CLONE_NEWCGROUP) {
471303cc571SChristian Brauner 		ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
472303cc571SChristian Brauner 		if (ret)
473303cc571SChristian Brauner 			goto out;
474303cc571SChristian Brauner 	}
475303cc571SChristian Brauner #endif
476303cc571SChristian Brauner 
477303cc571SChristian Brauner #ifdef CONFIG_NET_NS
478303cc571SChristian Brauner 	if (flags & CLONE_NEWNET) {
479303cc571SChristian Brauner 		ret = validate_ns(nsset, &nsp->net_ns->ns);
480303cc571SChristian Brauner 		if (ret)
481303cc571SChristian Brauner 			goto out;
482303cc571SChristian Brauner 	}
483303cc571SChristian Brauner #endif
484303cc571SChristian Brauner 
48576c12881SChristian Brauner #ifdef CONFIG_TIME_NS
48676c12881SChristian Brauner 	if (flags & CLONE_NEWTIME) {
48776c12881SChristian Brauner 		ret = validate_ns(nsset, &nsp->time_ns->ns);
48876c12881SChristian Brauner 		if (ret)
48976c12881SChristian Brauner 			goto out;
49076c12881SChristian Brauner 	}
49176c12881SChristian Brauner #endif
49276c12881SChristian Brauner 
493303cc571SChristian Brauner out:
494303cc571SChristian Brauner 	if (pid_ns)
495303cc571SChristian Brauner 		put_pid_ns(pid_ns);
496303cc571SChristian Brauner 	if (nsp)
497303cc571SChristian Brauner 		put_nsproxy(nsp);
498303cc571SChristian Brauner 	put_user_ns(user_ns);
499303cc571SChristian Brauner 
500303cc571SChristian Brauner 	return ret;
501303cc571SChristian Brauner }
502303cc571SChristian Brauner 
503f2a8d52eSChristian Brauner /*
504f2a8d52eSChristian Brauner  * This is the point of no return. There are just a few namespaces
505f2a8d52eSChristian Brauner  * that do some actual work here and it's sufficiently minimal that
506f2a8d52eSChristian Brauner  * a separate ns_common operation seems unnecessary for now.
507f2a8d52eSChristian Brauner  * Unshare is doing the same thing. If we'll end up needing to do
508f2a8d52eSChristian Brauner  * more in a given namespace or a helper here is ultimately not
509f2a8d52eSChristian Brauner  * exported anymore a simple commit handler for each namespace
510f2a8d52eSChristian Brauner  * should be added to ns_common.
511f2a8d52eSChristian Brauner  */
commit_nsset(struct nsset * nsset)512f2a8d52eSChristian Brauner static void commit_nsset(struct nsset *nsset)
513f2a8d52eSChristian Brauner {
514f2a8d52eSChristian Brauner 	unsigned flags = nsset->flags;
515f2a8d52eSChristian Brauner 	struct task_struct *me = current;
516f2a8d52eSChristian Brauner 
517f2a8d52eSChristian Brauner #ifdef CONFIG_USER_NS
518f2a8d52eSChristian Brauner 	if (flags & CLONE_NEWUSER) {
519f2a8d52eSChristian Brauner 		/* transfer ownership */
520f2a8d52eSChristian Brauner 		commit_creds(nsset_cred(nsset));
521f2a8d52eSChristian Brauner 		nsset->cred = NULL;
522f2a8d52eSChristian Brauner 	}
523f2a8d52eSChristian Brauner #endif
524f2a8d52eSChristian Brauner 
525303cc571SChristian Brauner 	/* We only need to commit if we have used a temporary fs_struct. */
526303cc571SChristian Brauner 	if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
527303cc571SChristian Brauner 		set_fs_root(me->fs, &nsset->fs->root);
528303cc571SChristian Brauner 		set_fs_pwd(me->fs, &nsset->fs->pwd);
529303cc571SChristian Brauner 	}
530303cc571SChristian Brauner 
531f2a8d52eSChristian Brauner #ifdef CONFIG_IPC_NS
532f2a8d52eSChristian Brauner 	if (flags & CLONE_NEWIPC)
533f2a8d52eSChristian Brauner 		exit_sem(me);
534f2a8d52eSChristian Brauner #endif
535f2a8d52eSChristian Brauner 
53676c12881SChristian Brauner #ifdef CONFIG_TIME_NS
53776c12881SChristian Brauner 	if (flags & CLONE_NEWTIME)
53876c12881SChristian Brauner 		timens_commit(me, nsset->nsproxy->time_ns);
53976c12881SChristian Brauner #endif
54076c12881SChristian Brauner 
541f2a8d52eSChristian Brauner 	/* transfer ownership */
542f2a8d52eSChristian Brauner 	switch_task_namespaces(me, nsset->nsproxy);
543f2a8d52eSChristian Brauner 	nsset->nsproxy = NULL;
544f2a8d52eSChristian Brauner }
545f2a8d52eSChristian Brauner 
SYSCALL_DEFINE2(setns,int,fd,int,flags)546303cc571SChristian Brauner SYSCALL_DEFINE2(setns, int, fd, int, flags)
5470663c6f8SEric W. Biederman {
548281117ccSAl Viro 	struct fd f = fdget(fd);
549303cc571SChristian Brauner 	struct ns_common *ns = NULL;
550f2a8d52eSChristian Brauner 	struct nsset nsset = {};
551303cc571SChristian Brauner 	int err = 0;
5520663c6f8SEric W. Biederman 
553281117ccSAl Viro 	if (!f.file)
554303cc571SChristian Brauner 		return -EBADF;
5550663c6f8SEric W. Biederman 
556281117ccSAl Viro 	if (proc_ns_file(f.file)) {
557281117ccSAl Viro 		ns = get_proc_ns(file_inode(f.file));
558303cc571SChristian Brauner 		if (flags && (ns->ops->type != flags))
559303cc571SChristian Brauner 			err = -EINVAL;
560303cc571SChristian Brauner 		flags = ns->ops->type;
561281117ccSAl Viro 	} else if (!IS_ERR(pidfd_pid(f.file))) {
562303cc571SChristian Brauner 		err = check_setns_flags(flags);
563303cc571SChristian Brauner 	} else {
564e571d4eeSChristian Brauner 		err = -EINVAL;
565303cc571SChristian Brauner 	}
566f2a8d52eSChristian Brauner 	if (err)
5670663c6f8SEric W. Biederman 		goto out;
5680663c6f8SEric W. Biederman 
569303cc571SChristian Brauner 	err = prepare_nsset(flags, &nsset);
570303cc571SChristian Brauner 	if (err)
571303cc571SChristian Brauner 		goto out;
572303cc571SChristian Brauner 
573281117ccSAl Viro 	if (proc_ns_file(f.file))
574303cc571SChristian Brauner 		err = validate_ns(&nsset, ns);
575303cc571SChristian Brauner 	else
576281117ccSAl Viro 		err = validate_nsset(&nsset, f.file->private_data);
577f2a8d52eSChristian Brauner 	if (!err) {
578f2a8d52eSChristian Brauner 		commit_nsset(&nsset);
579f2a8d52eSChristian Brauner 		perf_event_namespaces(current);
5800663c6f8SEric W. Biederman 	}
581f2a8d52eSChristian Brauner 	put_nsset(&nsset);
5820663c6f8SEric W. Biederman out:
583281117ccSAl Viro 	fdput(f);
5840663c6f8SEric W. Biederman 	return err;
5850663c6f8SEric W. Biederman }
5860663c6f8SEric W. Biederman 
nsproxy_cache_init(void)58766577193SAl Viro int __init nsproxy_cache_init(void)
58898c0d07cSCedric Le Goater {
58930acd0bdSVasily Averin 	nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT);
59098c0d07cSCedric Le Goater 	return 0;
59198c0d07cSCedric Le Goater }
592