1b886d83cSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
2ab516013SSerge E. Hallyn /*
3ab516013SSerge E. Hallyn * Copyright (C) 2006 IBM Corporation
4ab516013SSerge E. Hallyn *
5ab516013SSerge E. Hallyn * Author: Serge Hallyn <serue@us.ibm.com>
6ab516013SSerge E. Hallyn *
725b21cb2SKirill Korotaev * Jun 2006 - namespaces support
825b21cb2SKirill Korotaev * OpenVZ, SWsoft Inc.
925b21cb2SKirill Korotaev * Pavel Emelianov <xemul@openvz.org>
10ab516013SSerge E. Hallyn */
11ab516013SSerge E. Hallyn
125a0e3ad6STejun Heo #include <linux/slab.h>
139984de1aSPaul Gortmaker #include <linux/export.h>
14ab516013SSerge E. Hallyn #include <linux/nsproxy.h>
150437eb59SSerge E. Hallyn #include <linux/init_task.h>
166b3286edSKirill Korotaev #include <linux/mnt_namespace.h>
174865ecf1SSerge E. Hallyn #include <linux/utsname.h>
189a575a92SCedric Le Goater #include <linux/pid_namespace.h>
199dd776b6SEric W. Biederman #include <net/net_namespace.h>
20ae5e1b22SPavel Emelyanov #include <linux/ipc_namespace.h>
21769071acSAndrei Vagin #include <linux/time_namespace.h>
22f2a8d52eSChristian Brauner #include <linux/fs_struct.h>
23303cc571SChristian Brauner #include <linux/proc_fs.h>
240bb80f24SDavid Howells #include <linux/proc_ns.h>
250663c6f8SEric W. Biederman #include <linux/file.h>
260663c6f8SEric W. Biederman #include <linux/syscalls.h>
27a79a908fSAditya Kali #include <linux/cgroup.h>
28e4222673SHari Bathini #include <linux/perf_event.h>
290437eb59SSerge E. Hallyn
3098c0d07cSCedric Le Goater static struct kmem_cache *nsproxy_cachep;
3198c0d07cSCedric Le Goater
328467005dSAlexey Dobriyan struct nsproxy init_nsproxy = {
33*2ddd3cacSElena Reshetova .count = REFCOUNT_INIT(1),
348467005dSAlexey Dobriyan .uts_ns = &init_uts_ns,
358467005dSAlexey Dobriyan #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC)
368467005dSAlexey Dobriyan .ipc_ns = &init_ipc_ns,
378467005dSAlexey Dobriyan #endif
388467005dSAlexey Dobriyan .mnt_ns = NULL,
39c2b1df2eSAndy Lutomirski .pid_ns_for_children = &init_pid_ns,
408467005dSAlexey Dobriyan #ifdef CONFIG_NET
418467005dSAlexey Dobriyan .net_ns = &init_net,
428467005dSAlexey Dobriyan #endif
43a79a908fSAditya Kali #ifdef CONFIG_CGROUPS
44a79a908fSAditya Kali .cgroup_ns = &init_cgroup_ns,
45a79a908fSAditya Kali #endif
46769071acSAndrei Vagin #ifdef CONFIG_TIME_NS
47769071acSAndrei Vagin .time_ns = &init_time_ns,
48769071acSAndrei Vagin .time_ns_for_children = &init_time_ns,
49769071acSAndrei Vagin #endif
508467005dSAlexey Dobriyan };
51ab516013SSerge E. Hallyn
create_nsproxy(void)5290af90d7SAlexey Dobriyan static inline struct nsproxy *create_nsproxy(void)
53ab516013SSerge E. Hallyn {
5490af90d7SAlexey Dobriyan struct nsproxy *nsproxy;
55ab516013SSerge E. Hallyn
5690af90d7SAlexey Dobriyan nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
5790af90d7SAlexey Dobriyan if (nsproxy)
58*2ddd3cacSElena Reshetova refcount_set(&nsproxy->count, 1);
5990af90d7SAlexey Dobriyan return nsproxy;
60ab516013SSerge E. Hallyn }
61ab516013SSerge E. Hallyn
62ab516013SSerge E. Hallyn /*
63e3222c4eSBadari Pulavarty * Create new nsproxy and all of its the associated namespaces.
64e3222c4eSBadari Pulavarty * Return the newly created nsproxy. Do not attach this to the task,
65e3222c4eSBadari Pulavarty * leave it to the caller to do proper locking and attach it to task.
66ab516013SSerge E. Hallyn */
create_new_namespaces(unsigned long flags,struct task_struct * tsk,struct user_namespace * user_ns,struct fs_struct * new_fs)67213dd266SEric W. Biederman static struct nsproxy *create_new_namespaces(unsigned long flags,
68bcf58e72SEric W. Biederman struct task_struct *tsk, struct user_namespace *user_ns,
69bcf58e72SEric W. Biederman struct fs_struct *new_fs)
70ab516013SSerge E. Hallyn {
71e3222c4eSBadari Pulavarty struct nsproxy *new_nsp;
72467e9f4bSCedric Le Goater int err;
73ab516013SSerge E. Hallyn
7490af90d7SAlexey Dobriyan new_nsp = create_nsproxy();
75e3222c4eSBadari Pulavarty if (!new_nsp)
76e3222c4eSBadari Pulavarty return ERR_PTR(-ENOMEM);
771651e14eSSerge E. Hallyn
78bcf58e72SEric W. Biederman new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs);
79467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->mnt_ns)) {
80467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->mnt_ns);
81e3222c4eSBadari Pulavarty goto out_ns;
82467e9f4bSCedric Le Goater }
83e3222c4eSBadari Pulavarty
84bcf58e72SEric W. Biederman new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns);
85467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->uts_ns)) {
86467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->uts_ns);
87e3222c4eSBadari Pulavarty goto out_uts;
88467e9f4bSCedric Le Goater }
89e3222c4eSBadari Pulavarty
90bcf58e72SEric W. Biederman new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns);
91467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->ipc_ns)) {
92467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->ipc_ns);
93e3222c4eSBadari Pulavarty goto out_ipc;
94467e9f4bSCedric Le Goater }
95e3222c4eSBadari Pulavarty
96c2b1df2eSAndy Lutomirski new_nsp->pid_ns_for_children =
97c2b1df2eSAndy Lutomirski copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children);
98c2b1df2eSAndy Lutomirski if (IS_ERR(new_nsp->pid_ns_for_children)) {
99c2b1df2eSAndy Lutomirski err = PTR_ERR(new_nsp->pid_ns_for_children);
100e3222c4eSBadari Pulavarty goto out_pid;
101467e9f4bSCedric Le Goater }
102e3222c4eSBadari Pulavarty
103a79a908fSAditya Kali new_nsp->cgroup_ns = copy_cgroup_ns(flags, user_ns,
104a79a908fSAditya Kali tsk->nsproxy->cgroup_ns);
105a79a908fSAditya Kali if (IS_ERR(new_nsp->cgroup_ns)) {
106a79a908fSAditya Kali err = PTR_ERR(new_nsp->cgroup_ns);
107a79a908fSAditya Kali goto out_cgroup;
108a79a908fSAditya Kali }
109a79a908fSAditya Kali
110bcf58e72SEric W. Biederman new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns);
1119dd776b6SEric W. Biederman if (IS_ERR(new_nsp->net_ns)) {
1129dd776b6SEric W. Biederman err = PTR_ERR(new_nsp->net_ns);
1139dd776b6SEric W. Biederman goto out_net;
1149dd776b6SEric W. Biederman }
1159dd776b6SEric W. Biederman
116769071acSAndrei Vagin new_nsp->time_ns_for_children = copy_time_ns(flags, user_ns,
117769071acSAndrei Vagin tsk->nsproxy->time_ns_for_children);
118769071acSAndrei Vagin if (IS_ERR(new_nsp->time_ns_for_children)) {
119769071acSAndrei Vagin err = PTR_ERR(new_nsp->time_ns_for_children);
120769071acSAndrei Vagin goto out_time;
121769071acSAndrei Vagin }
122769071acSAndrei Vagin new_nsp->time_ns = get_time_ns(tsk->nsproxy->time_ns);
123769071acSAndrei Vagin
124e3222c4eSBadari Pulavarty return new_nsp;
125e3222c4eSBadari Pulavarty
126769071acSAndrei Vagin out_time:
127769071acSAndrei Vagin put_net(new_nsp->net_ns);
1289dd776b6SEric W. Biederman out_net:
129a79a908fSAditya Kali put_cgroup_ns(new_nsp->cgroup_ns);
130a79a908fSAditya Kali out_cgroup:
131c2b1df2eSAndy Lutomirski if (new_nsp->pid_ns_for_children)
132c2b1df2eSAndy Lutomirski put_pid_ns(new_nsp->pid_ns_for_children);
133e3222c4eSBadari Pulavarty out_pid:
134e3222c4eSBadari Pulavarty if (new_nsp->ipc_ns)
135e3222c4eSBadari Pulavarty put_ipc_ns(new_nsp->ipc_ns);
136e3222c4eSBadari Pulavarty out_ipc:
137e3222c4eSBadari Pulavarty if (new_nsp->uts_ns)
138e3222c4eSBadari Pulavarty put_uts_ns(new_nsp->uts_ns);
139e3222c4eSBadari Pulavarty out_uts:
140e3222c4eSBadari Pulavarty if (new_nsp->mnt_ns)
141e3222c4eSBadari Pulavarty put_mnt_ns(new_nsp->mnt_ns);
142e3222c4eSBadari Pulavarty out_ns:
14398c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, new_nsp);
144467e9f4bSCedric Le Goater return ERR_PTR(err);
145ab516013SSerge E. Hallyn }
146ab516013SSerge E. Hallyn
147ab516013SSerge E. Hallyn /*
148ab516013SSerge E. Hallyn * called from clone. This now handles copy for nsproxy and all
149ab516013SSerge E. Hallyn * namespaces therein.
150ab516013SSerge E. Hallyn */
copy_namespaces(unsigned long flags,struct task_struct * tsk)151213dd266SEric W. Biederman int copy_namespaces(unsigned long flags, struct task_struct *tsk)
152ab516013SSerge E. Hallyn {
153ab516013SSerge E. Hallyn struct nsproxy *old_ns = tsk->nsproxy;
154b33c77efSEric W. Biederman struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns);
1551651e14eSSerge E. Hallyn struct nsproxy *new_ns;
156ab516013SSerge E. Hallyn
157dbef0c1cSEric W. Biederman if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
158a79a908fSAditya Kali CLONE_NEWPID | CLONE_NEWNET |
159769071acSAndrei Vagin CLONE_NEWCGROUP | CLONE_NEWTIME)))) {
1602b5f9dadSAndrei Vagin if ((flags & CLONE_VM) ||
1612b5f9dadSAndrei Vagin likely(old_ns->time_ns_for_children == old_ns->time_ns)) {
162ab516013SSerge E. Hallyn get_nsproxy(old_ns);
163ab516013SSerge E. Hallyn return 0;
164e3222c4eSBadari Pulavarty }
165769071acSAndrei Vagin } else if (!ns_capable(user_ns, CAP_SYS_ADMIN))
166dbef0c1cSEric W. Biederman return -EPERM;
167dbef0c1cSEric W. Biederman
16802fdb36aSSerge E. Hallyn /*
16902fdb36aSSerge E. Hallyn * CLONE_NEWIPC must detach from the undolist: after switching
17002fdb36aSSerge E. Hallyn * to a new ipc namespace, the semaphore arrays from the old
17102fdb36aSSerge E. Hallyn * namespace are unreachable. In clone parlance, CLONE_SYSVSEM
17202fdb36aSSerge E. Hallyn * means share undolist with parent, so we must forbid using
17302fdb36aSSerge E. Hallyn * it along with CLONE_NEWIPC.
17402fdb36aSSerge E. Hallyn */
17521e85194SRaphael S.Carvalho if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) ==
176dbef0c1cSEric W. Biederman (CLONE_NEWIPC | CLONE_SYSVSEM))
177dbef0c1cSEric W. Biederman return -EINVAL;
17802fdb36aSSerge E. Hallyn
179d7d48f62SYuanhan Liu new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs);
180dbef0c1cSEric W. Biederman if (IS_ERR(new_ns))
181dbef0c1cSEric W. Biederman return PTR_ERR(new_ns);
1821651e14eSSerge E. Hallyn
1832b5f9dadSAndrei Vagin if ((flags & CLONE_VM) == 0)
1845c62634fSHui Su timens_on_fork(new_ns, tsk);
185769071acSAndrei Vagin
1861651e14eSSerge E. Hallyn tsk->nsproxy = new_ns;
187dbef0c1cSEric W. Biederman return 0;
188ab516013SSerge E. Hallyn }
189ab516013SSerge E. Hallyn
free_nsproxy(struct nsproxy * ns)190ab516013SSerge E. Hallyn void free_nsproxy(struct nsproxy *ns)
191ab516013SSerge E. Hallyn {
1926b3286edSKirill Korotaev if (ns->mnt_ns)
1936b3286edSKirill Korotaev put_mnt_ns(ns->mnt_ns);
1944865ecf1SSerge E. Hallyn if (ns->uts_ns)
1954865ecf1SSerge E. Hallyn put_uts_ns(ns->uts_ns);
19625b21cb2SKirill Korotaev if (ns->ipc_ns)
19725b21cb2SKirill Korotaev put_ipc_ns(ns->ipc_ns);
198c2b1df2eSAndy Lutomirski if (ns->pid_ns_for_children)
199c2b1df2eSAndy Lutomirski put_pid_ns(ns->pid_ns_for_children);
200769071acSAndrei Vagin if (ns->time_ns)
201769071acSAndrei Vagin put_time_ns(ns->time_ns);
202769071acSAndrei Vagin if (ns->time_ns_for_children)
203769071acSAndrei Vagin put_time_ns(ns->time_ns_for_children);
204a79a908fSAditya Kali put_cgroup_ns(ns->cgroup_ns);
2059dd776b6SEric W. Biederman put_net(ns->net_ns);
20698c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, ns);
207ab516013SSerge E. Hallyn }
208e3222c4eSBadari Pulavarty
209e3222c4eSBadari Pulavarty /*
210e3222c4eSBadari Pulavarty * Called from unshare. Unshare all the namespaces part of nsproxy.
2114e71e474SCedric Le Goater * On success, returns the new nsproxy.
212e3222c4eSBadari Pulavarty */
unshare_nsproxy_namespaces(unsigned long unshare_flags,struct nsproxy ** new_nsp,struct cred * new_cred,struct fs_struct * new_fs)213e3222c4eSBadari Pulavarty int unshare_nsproxy_namespaces(unsigned long unshare_flags,
214b2e0d987SEric W. Biederman struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs)
215e3222c4eSBadari Pulavarty {
216bcf58e72SEric W. Biederman struct user_namespace *user_ns;
217e3222c4eSBadari Pulavarty int err = 0;
218e3222c4eSBadari Pulavarty
21977ec739dSSerge E. Hallyn if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
220769071acSAndrei Vagin CLONE_NEWNET | CLONE_NEWPID | CLONE_NEWCGROUP |
221769071acSAndrei Vagin CLONE_NEWTIME)))
222e3222c4eSBadari Pulavarty return 0;
223e3222c4eSBadari Pulavarty
224b2e0d987SEric W. Biederman user_ns = new_cred ? new_cred->user_ns : current_user_ns();
225b2e0d987SEric W. Biederman if (!ns_capable(user_ns, CAP_SYS_ADMIN))
226e3222c4eSBadari Pulavarty return -EPERM;
227e3222c4eSBadari Pulavarty
228bcf58e72SEric W. Biederman *new_nsp = create_new_namespaces(unshare_flags, current, user_ns,
229e3222c4eSBadari Pulavarty new_fs ? new_fs : current->fs);
230858d72eaSSerge E. Hallyn if (IS_ERR(*new_nsp)) {
231e3222c4eSBadari Pulavarty err = PTR_ERR(*new_nsp);
232858d72eaSSerge E. Hallyn goto out;
233858d72eaSSerge E. Hallyn }
234858d72eaSSerge E. Hallyn
235858d72eaSSerge E. Hallyn out:
236e3222c4eSBadari Pulavarty return err;
237e3222c4eSBadari Pulavarty }
23898c0d07cSCedric Le Goater
switch_task_namespaces(struct task_struct * p,struct nsproxy * new)239cf7b708cSPavel Emelyanov void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
240cf7b708cSPavel Emelyanov {
241cf7b708cSPavel Emelyanov struct nsproxy *ns;
242cf7b708cSPavel Emelyanov
243cf7b708cSPavel Emelyanov might_sleep();
244cf7b708cSPavel Emelyanov
245728dba3aSEric W. Biederman task_lock(p);
246cf7b708cSPavel Emelyanov ns = p->nsproxy;
247728dba3aSEric W. Biederman p->nsproxy = new;
248728dba3aSEric W. Biederman task_unlock(p);
249cf7b708cSPavel Emelyanov
250aabe19b8SHui Su if (ns)
251aabe19b8SHui Su put_nsproxy(ns);
252cf7b708cSPavel Emelyanov }
253cf7b708cSPavel Emelyanov
exit_task_namespaces(struct task_struct * p)254cf7b708cSPavel Emelyanov void exit_task_namespaces(struct task_struct *p)
255cf7b708cSPavel Emelyanov {
256cf7b708cSPavel Emelyanov switch_task_namespaces(p, NULL);
257cf7b708cSPavel Emelyanov }
258cf7b708cSPavel Emelyanov
exec_task_namespaces(void)2592b5f9dadSAndrei Vagin int exec_task_namespaces(void)
2602b5f9dadSAndrei Vagin {
2612b5f9dadSAndrei Vagin struct task_struct *tsk = current;
2622b5f9dadSAndrei Vagin struct nsproxy *new;
2632b5f9dadSAndrei Vagin
2642b5f9dadSAndrei Vagin if (tsk->nsproxy->time_ns_for_children == tsk->nsproxy->time_ns)
2652b5f9dadSAndrei Vagin return 0;
2662b5f9dadSAndrei Vagin
2672b5f9dadSAndrei Vagin new = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
2682b5f9dadSAndrei Vagin if (IS_ERR(new))
2692b5f9dadSAndrei Vagin return PTR_ERR(new);
2702b5f9dadSAndrei Vagin
2712b5f9dadSAndrei Vagin timens_on_fork(new, tsk);
2722b5f9dadSAndrei Vagin switch_task_namespaces(tsk, new);
2732b5f9dadSAndrei Vagin return 0;
2742b5f9dadSAndrei Vagin }
2752b5f9dadSAndrei Vagin
check_setns_flags(unsigned long flags)276303cc571SChristian Brauner static int check_setns_flags(unsigned long flags)
277303cc571SChristian Brauner {
278303cc571SChristian Brauner if (!flags || (flags & ~(CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC |
27976c12881SChristian Brauner CLONE_NEWNET | CLONE_NEWTIME | CLONE_NEWUSER |
28076c12881SChristian Brauner CLONE_NEWPID | CLONE_NEWCGROUP)))
281303cc571SChristian Brauner return -EINVAL;
282303cc571SChristian Brauner
283303cc571SChristian Brauner #ifndef CONFIG_USER_NS
284303cc571SChristian Brauner if (flags & CLONE_NEWUSER)
285303cc571SChristian Brauner return -EINVAL;
286303cc571SChristian Brauner #endif
287303cc571SChristian Brauner #ifndef CONFIG_PID_NS
288303cc571SChristian Brauner if (flags & CLONE_NEWPID)
289303cc571SChristian Brauner return -EINVAL;
290303cc571SChristian Brauner #endif
291303cc571SChristian Brauner #ifndef CONFIG_UTS_NS
292303cc571SChristian Brauner if (flags & CLONE_NEWUTS)
293303cc571SChristian Brauner return -EINVAL;
294303cc571SChristian Brauner #endif
295303cc571SChristian Brauner #ifndef CONFIG_IPC_NS
296303cc571SChristian Brauner if (flags & CLONE_NEWIPC)
297303cc571SChristian Brauner return -EINVAL;
298303cc571SChristian Brauner #endif
299303cc571SChristian Brauner #ifndef CONFIG_CGROUPS
300303cc571SChristian Brauner if (flags & CLONE_NEWCGROUP)
301303cc571SChristian Brauner return -EINVAL;
302303cc571SChristian Brauner #endif
303303cc571SChristian Brauner #ifndef CONFIG_NET_NS
304303cc571SChristian Brauner if (flags & CLONE_NEWNET)
305303cc571SChristian Brauner return -EINVAL;
306303cc571SChristian Brauner #endif
30776c12881SChristian Brauner #ifndef CONFIG_TIME_NS
30876c12881SChristian Brauner if (flags & CLONE_NEWTIME)
30976c12881SChristian Brauner return -EINVAL;
31076c12881SChristian Brauner #endif
311303cc571SChristian Brauner
312303cc571SChristian Brauner return 0;
313303cc571SChristian Brauner }
314303cc571SChristian Brauner
put_nsset(struct nsset * nsset)315f2a8d52eSChristian Brauner static void put_nsset(struct nsset *nsset)
316f2a8d52eSChristian Brauner {
317f2a8d52eSChristian Brauner unsigned flags = nsset->flags;
318f2a8d52eSChristian Brauner
319f2a8d52eSChristian Brauner if (flags & CLONE_NEWUSER)
320f2a8d52eSChristian Brauner put_cred(nsset_cred(nsset));
321303cc571SChristian Brauner /*
322303cc571SChristian Brauner * We only created a temporary copy if we attached to more than just
323303cc571SChristian Brauner * the mount namespace.
324303cc571SChristian Brauner */
325303cc571SChristian Brauner if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
326303cc571SChristian Brauner free_fs_struct(nsset->fs);
327f2a8d52eSChristian Brauner if (nsset->nsproxy)
328f2a8d52eSChristian Brauner free_nsproxy(nsset->nsproxy);
329f2a8d52eSChristian Brauner }
330f2a8d52eSChristian Brauner
prepare_nsset(unsigned flags,struct nsset * nsset)331303cc571SChristian Brauner static int prepare_nsset(unsigned flags, struct nsset *nsset)
332f2a8d52eSChristian Brauner {
333f2a8d52eSChristian Brauner struct task_struct *me = current;
334f2a8d52eSChristian Brauner
335f2a8d52eSChristian Brauner nsset->nsproxy = create_new_namespaces(0, me, current_user_ns(), me->fs);
336f2a8d52eSChristian Brauner if (IS_ERR(nsset->nsproxy))
337f2a8d52eSChristian Brauner return PTR_ERR(nsset->nsproxy);
338f2a8d52eSChristian Brauner
339303cc571SChristian Brauner if (flags & CLONE_NEWUSER)
340f2a8d52eSChristian Brauner nsset->cred = prepare_creds();
341f2a8d52eSChristian Brauner else
342f2a8d52eSChristian Brauner nsset->cred = current_cred();
343f2a8d52eSChristian Brauner if (!nsset->cred)
344f2a8d52eSChristian Brauner goto out;
345f2a8d52eSChristian Brauner
346303cc571SChristian Brauner /* Only create a temporary copy of fs_struct if we really need to. */
347303cc571SChristian Brauner if (flags == CLONE_NEWNS) {
348f2a8d52eSChristian Brauner nsset->fs = me->fs;
349303cc571SChristian Brauner } else if (flags & CLONE_NEWNS) {
350303cc571SChristian Brauner nsset->fs = copy_fs_struct(me->fs);
351303cc571SChristian Brauner if (!nsset->fs)
352303cc571SChristian Brauner goto out;
353303cc571SChristian Brauner }
354f2a8d52eSChristian Brauner
355303cc571SChristian Brauner nsset->flags = flags;
356f2a8d52eSChristian Brauner return 0;
357f2a8d52eSChristian Brauner
358f2a8d52eSChristian Brauner out:
359f2a8d52eSChristian Brauner put_nsset(nsset);
360f2a8d52eSChristian Brauner return -ENOMEM;
361f2a8d52eSChristian Brauner }
362f2a8d52eSChristian Brauner
validate_ns(struct nsset * nsset,struct ns_common * ns)363303cc571SChristian Brauner static inline int validate_ns(struct nsset *nsset, struct ns_common *ns)
364303cc571SChristian Brauner {
365303cc571SChristian Brauner return ns->ops->install(nsset, ns);
366303cc571SChristian Brauner }
367303cc571SChristian Brauner
368303cc571SChristian Brauner /*
369303cc571SChristian Brauner * This is the inverse operation to unshare().
370303cc571SChristian Brauner * Ordering is equivalent to the standard ordering used everywhere else
371303cc571SChristian Brauner * during unshare and process creation. The switch to the new set of
372303cc571SChristian Brauner * namespaces occurs at the point of no return after installation of
373303cc571SChristian Brauner * all requested namespaces was successful in commit_nsset().
374303cc571SChristian Brauner */
validate_nsset(struct nsset * nsset,struct pid * pid)375303cc571SChristian Brauner static int validate_nsset(struct nsset *nsset, struct pid *pid)
376303cc571SChristian Brauner {
377303cc571SChristian Brauner int ret = 0;
378303cc571SChristian Brauner unsigned flags = nsset->flags;
379303cc571SChristian Brauner struct user_namespace *user_ns = NULL;
380303cc571SChristian Brauner struct pid_namespace *pid_ns = NULL;
381303cc571SChristian Brauner struct nsproxy *nsp;
382303cc571SChristian Brauner struct task_struct *tsk;
383303cc571SChristian Brauner
384303cc571SChristian Brauner /* Take a "snapshot" of the target task's namespaces. */
385303cc571SChristian Brauner rcu_read_lock();
386303cc571SChristian Brauner tsk = pid_task(pid, PIDTYPE_PID);
387303cc571SChristian Brauner if (!tsk) {
388303cc571SChristian Brauner rcu_read_unlock();
389303cc571SChristian Brauner return -ESRCH;
390303cc571SChristian Brauner }
391303cc571SChristian Brauner
392303cc571SChristian Brauner if (!ptrace_may_access(tsk, PTRACE_MODE_READ_REALCREDS)) {
393303cc571SChristian Brauner rcu_read_unlock();
394303cc571SChristian Brauner return -EPERM;
395303cc571SChristian Brauner }
396303cc571SChristian Brauner
397303cc571SChristian Brauner task_lock(tsk);
398303cc571SChristian Brauner nsp = tsk->nsproxy;
399303cc571SChristian Brauner if (nsp)
400303cc571SChristian Brauner get_nsproxy(nsp);
401303cc571SChristian Brauner task_unlock(tsk);
402303cc571SChristian Brauner if (!nsp) {
403303cc571SChristian Brauner rcu_read_unlock();
404303cc571SChristian Brauner return -ESRCH;
405303cc571SChristian Brauner }
406303cc571SChristian Brauner
407303cc571SChristian Brauner #ifdef CONFIG_PID_NS
408303cc571SChristian Brauner if (flags & CLONE_NEWPID) {
409303cc571SChristian Brauner pid_ns = task_active_pid_ns(tsk);
410303cc571SChristian Brauner if (unlikely(!pid_ns)) {
411303cc571SChristian Brauner rcu_read_unlock();
412303cc571SChristian Brauner ret = -ESRCH;
413303cc571SChristian Brauner goto out;
414303cc571SChristian Brauner }
415303cc571SChristian Brauner get_pid_ns(pid_ns);
416303cc571SChristian Brauner }
417303cc571SChristian Brauner #endif
418303cc571SChristian Brauner
419303cc571SChristian Brauner #ifdef CONFIG_USER_NS
420303cc571SChristian Brauner if (flags & CLONE_NEWUSER)
421303cc571SChristian Brauner user_ns = get_user_ns(__task_cred(tsk)->user_ns);
422303cc571SChristian Brauner #endif
423303cc571SChristian Brauner rcu_read_unlock();
424303cc571SChristian Brauner
425303cc571SChristian Brauner /*
426303cc571SChristian Brauner * Install requested namespaces. The caller will have
427303cc571SChristian Brauner * verified earlier that the requested namespaces are
428303cc571SChristian Brauner * supported on this kernel. We don't report errors here
429303cc571SChristian Brauner * if a namespace is requested that isn't supported.
430303cc571SChristian Brauner */
431303cc571SChristian Brauner #ifdef CONFIG_USER_NS
432303cc571SChristian Brauner if (flags & CLONE_NEWUSER) {
433303cc571SChristian Brauner ret = validate_ns(nsset, &user_ns->ns);
434303cc571SChristian Brauner if (ret)
435303cc571SChristian Brauner goto out;
436303cc571SChristian Brauner }
437303cc571SChristian Brauner #endif
438303cc571SChristian Brauner
439303cc571SChristian Brauner if (flags & CLONE_NEWNS) {
440303cc571SChristian Brauner ret = validate_ns(nsset, from_mnt_ns(nsp->mnt_ns));
441303cc571SChristian Brauner if (ret)
442303cc571SChristian Brauner goto out;
443303cc571SChristian Brauner }
444303cc571SChristian Brauner
445303cc571SChristian Brauner #ifdef CONFIG_UTS_NS
446303cc571SChristian Brauner if (flags & CLONE_NEWUTS) {
447303cc571SChristian Brauner ret = validate_ns(nsset, &nsp->uts_ns->ns);
448303cc571SChristian Brauner if (ret)
449303cc571SChristian Brauner goto out;
450303cc571SChristian Brauner }
451303cc571SChristian Brauner #endif
452303cc571SChristian Brauner
453303cc571SChristian Brauner #ifdef CONFIG_IPC_NS
454303cc571SChristian Brauner if (flags & CLONE_NEWIPC) {
455303cc571SChristian Brauner ret = validate_ns(nsset, &nsp->ipc_ns->ns);
456303cc571SChristian Brauner if (ret)
457303cc571SChristian Brauner goto out;
458303cc571SChristian Brauner }
459303cc571SChristian Brauner #endif
460303cc571SChristian Brauner
461303cc571SChristian Brauner #ifdef CONFIG_PID_NS
462303cc571SChristian Brauner if (flags & CLONE_NEWPID) {
463303cc571SChristian Brauner ret = validate_ns(nsset, &pid_ns->ns);
464303cc571SChristian Brauner if (ret)
465303cc571SChristian Brauner goto out;
466303cc571SChristian Brauner }
467303cc571SChristian Brauner #endif
468303cc571SChristian Brauner
469303cc571SChristian Brauner #ifdef CONFIG_CGROUPS
470303cc571SChristian Brauner if (flags & CLONE_NEWCGROUP) {
471303cc571SChristian Brauner ret = validate_ns(nsset, &nsp->cgroup_ns->ns);
472303cc571SChristian Brauner if (ret)
473303cc571SChristian Brauner goto out;
474303cc571SChristian Brauner }
475303cc571SChristian Brauner #endif
476303cc571SChristian Brauner
477303cc571SChristian Brauner #ifdef CONFIG_NET_NS
478303cc571SChristian Brauner if (flags & CLONE_NEWNET) {
479303cc571SChristian Brauner ret = validate_ns(nsset, &nsp->net_ns->ns);
480303cc571SChristian Brauner if (ret)
481303cc571SChristian Brauner goto out;
482303cc571SChristian Brauner }
483303cc571SChristian Brauner #endif
484303cc571SChristian Brauner
48576c12881SChristian Brauner #ifdef CONFIG_TIME_NS
48676c12881SChristian Brauner if (flags & CLONE_NEWTIME) {
48776c12881SChristian Brauner ret = validate_ns(nsset, &nsp->time_ns->ns);
48876c12881SChristian Brauner if (ret)
48976c12881SChristian Brauner goto out;
49076c12881SChristian Brauner }
49176c12881SChristian Brauner #endif
49276c12881SChristian Brauner
493303cc571SChristian Brauner out:
494303cc571SChristian Brauner if (pid_ns)
495303cc571SChristian Brauner put_pid_ns(pid_ns);
496303cc571SChristian Brauner if (nsp)
497303cc571SChristian Brauner put_nsproxy(nsp);
498303cc571SChristian Brauner put_user_ns(user_ns);
499303cc571SChristian Brauner
500303cc571SChristian Brauner return ret;
501303cc571SChristian Brauner }
502303cc571SChristian Brauner
503f2a8d52eSChristian Brauner /*
504f2a8d52eSChristian Brauner * This is the point of no return. There are just a few namespaces
505f2a8d52eSChristian Brauner * that do some actual work here and it's sufficiently minimal that
506f2a8d52eSChristian Brauner * a separate ns_common operation seems unnecessary for now.
507f2a8d52eSChristian Brauner * Unshare is doing the same thing. If we'll end up needing to do
508f2a8d52eSChristian Brauner * more in a given namespace or a helper here is ultimately not
509f2a8d52eSChristian Brauner * exported anymore a simple commit handler for each namespace
510f2a8d52eSChristian Brauner * should be added to ns_common.
511f2a8d52eSChristian Brauner */
commit_nsset(struct nsset * nsset)512f2a8d52eSChristian Brauner static void commit_nsset(struct nsset *nsset)
513f2a8d52eSChristian Brauner {
514f2a8d52eSChristian Brauner unsigned flags = nsset->flags;
515f2a8d52eSChristian Brauner struct task_struct *me = current;
516f2a8d52eSChristian Brauner
517f2a8d52eSChristian Brauner #ifdef CONFIG_USER_NS
518f2a8d52eSChristian Brauner if (flags & CLONE_NEWUSER) {
519f2a8d52eSChristian Brauner /* transfer ownership */
520f2a8d52eSChristian Brauner commit_creds(nsset_cred(nsset));
521f2a8d52eSChristian Brauner nsset->cred = NULL;
522f2a8d52eSChristian Brauner }
523f2a8d52eSChristian Brauner #endif
524f2a8d52eSChristian Brauner
525303cc571SChristian Brauner /* We only need to commit if we have used a temporary fs_struct. */
526303cc571SChristian Brauner if ((flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS)) {
527303cc571SChristian Brauner set_fs_root(me->fs, &nsset->fs->root);
528303cc571SChristian Brauner set_fs_pwd(me->fs, &nsset->fs->pwd);
529303cc571SChristian Brauner }
530303cc571SChristian Brauner
531f2a8d52eSChristian Brauner #ifdef CONFIG_IPC_NS
532f2a8d52eSChristian Brauner if (flags & CLONE_NEWIPC)
533f2a8d52eSChristian Brauner exit_sem(me);
534f2a8d52eSChristian Brauner #endif
535f2a8d52eSChristian Brauner
53676c12881SChristian Brauner #ifdef CONFIG_TIME_NS
53776c12881SChristian Brauner if (flags & CLONE_NEWTIME)
53876c12881SChristian Brauner timens_commit(me, nsset->nsproxy->time_ns);
53976c12881SChristian Brauner #endif
54076c12881SChristian Brauner
541f2a8d52eSChristian Brauner /* transfer ownership */
542f2a8d52eSChristian Brauner switch_task_namespaces(me, nsset->nsproxy);
543f2a8d52eSChristian Brauner nsset->nsproxy = NULL;
544f2a8d52eSChristian Brauner }
545f2a8d52eSChristian Brauner
SYSCALL_DEFINE2(setns,int,fd,int,flags)546303cc571SChristian Brauner SYSCALL_DEFINE2(setns, int, fd, int, flags)
5470663c6f8SEric W. Biederman {
548281117ccSAl Viro struct fd f = fdget(fd);
549303cc571SChristian Brauner struct ns_common *ns = NULL;
550f2a8d52eSChristian Brauner struct nsset nsset = {};
551303cc571SChristian Brauner int err = 0;
5520663c6f8SEric W. Biederman
553281117ccSAl Viro if (!f.file)
554303cc571SChristian Brauner return -EBADF;
5550663c6f8SEric W. Biederman
556281117ccSAl Viro if (proc_ns_file(f.file)) {
557281117ccSAl Viro ns = get_proc_ns(file_inode(f.file));
558303cc571SChristian Brauner if (flags && (ns->ops->type != flags))
559303cc571SChristian Brauner err = -EINVAL;
560303cc571SChristian Brauner flags = ns->ops->type;
561281117ccSAl Viro } else if (!IS_ERR(pidfd_pid(f.file))) {
562303cc571SChristian Brauner err = check_setns_flags(flags);
563303cc571SChristian Brauner } else {
564e571d4eeSChristian Brauner err = -EINVAL;
565303cc571SChristian Brauner }
566f2a8d52eSChristian Brauner if (err)
5670663c6f8SEric W. Biederman goto out;
5680663c6f8SEric W. Biederman
569303cc571SChristian Brauner err = prepare_nsset(flags, &nsset);
570303cc571SChristian Brauner if (err)
571303cc571SChristian Brauner goto out;
572303cc571SChristian Brauner
573281117ccSAl Viro if (proc_ns_file(f.file))
574303cc571SChristian Brauner err = validate_ns(&nsset, ns);
575303cc571SChristian Brauner else
576281117ccSAl Viro err = validate_nsset(&nsset, f.file->private_data);
577f2a8d52eSChristian Brauner if (!err) {
578f2a8d52eSChristian Brauner commit_nsset(&nsset);
579f2a8d52eSChristian Brauner perf_event_namespaces(current);
5800663c6f8SEric W. Biederman }
581f2a8d52eSChristian Brauner put_nsset(&nsset);
5820663c6f8SEric W. Biederman out:
583281117ccSAl Viro fdput(f);
5840663c6f8SEric W. Biederman return err;
5850663c6f8SEric W. Biederman }
5860663c6f8SEric W. Biederman
nsproxy_cache_init(void)58766577193SAl Viro int __init nsproxy_cache_init(void)
58898c0d07cSCedric Le Goater {
58930acd0bdSVasily Averin nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC|SLAB_ACCOUNT);
59098c0d07cSCedric Le Goater return 0;
59198c0d07cSCedric Le Goater }
592