1ab516013SSerge E. Hallyn /* 2ab516013SSerge E. Hallyn * Copyright (C) 2006 IBM Corporation 3ab516013SSerge E. Hallyn * 4ab516013SSerge E. Hallyn * Author: Serge Hallyn <serue@us.ibm.com> 5ab516013SSerge E. Hallyn * 6ab516013SSerge E. Hallyn * This program is free software; you can redistribute it and/or 7ab516013SSerge E. Hallyn * modify it under the terms of the GNU General Public License as 8ab516013SSerge E. Hallyn * published by the Free Software Foundation, version 2 of the 9ab516013SSerge E. Hallyn * License. 1025b21cb2SKirill Korotaev * 1125b21cb2SKirill Korotaev * Jun 2006 - namespaces support 1225b21cb2SKirill Korotaev * OpenVZ, SWsoft Inc. 1325b21cb2SKirill Korotaev * Pavel Emelianov <xemul@openvz.org> 14ab516013SSerge E. Hallyn */ 15ab516013SSerge E. Hallyn 165a0e3ad6STejun Heo #include <linux/slab.h> 179984de1aSPaul Gortmaker #include <linux/export.h> 18ab516013SSerge E. Hallyn #include <linux/nsproxy.h> 190437eb59SSerge E. Hallyn #include <linux/init_task.h> 206b3286edSKirill Korotaev #include <linux/mnt_namespace.h> 214865ecf1SSerge E. Hallyn #include <linux/utsname.h> 229a575a92SCedric Le Goater #include <linux/pid_namespace.h> 239dd776b6SEric W. Biederman #include <net/net_namespace.h> 24ae5e1b22SPavel Emelyanov #include <linux/ipc_namespace.h> 250bb80f24SDavid Howells #include <linux/proc_ns.h> 260663c6f8SEric W. Biederman #include <linux/file.h> 270663c6f8SEric W. Biederman #include <linux/syscalls.h> 280437eb59SSerge E. Hallyn 2998c0d07cSCedric Le Goater static struct kmem_cache *nsproxy_cachep; 3098c0d07cSCedric Le Goater 318467005dSAlexey Dobriyan struct nsproxy init_nsproxy = { 328467005dSAlexey Dobriyan .count = ATOMIC_INIT(1), 338467005dSAlexey Dobriyan .uts_ns = &init_uts_ns, 348467005dSAlexey Dobriyan #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) 358467005dSAlexey Dobriyan .ipc_ns = &init_ipc_ns, 368467005dSAlexey Dobriyan #endif 378467005dSAlexey Dobriyan .mnt_ns = NULL, 38c2b1df2eSAndy Lutomirski .pid_ns_for_children = &init_pid_ns, 398467005dSAlexey Dobriyan #ifdef CONFIG_NET 408467005dSAlexey Dobriyan .net_ns = &init_net, 418467005dSAlexey Dobriyan #endif 428467005dSAlexey Dobriyan }; 43ab516013SSerge E. Hallyn 4490af90d7SAlexey Dobriyan static inline struct nsproxy *create_nsproxy(void) 45ab516013SSerge E. Hallyn { 4690af90d7SAlexey Dobriyan struct nsproxy *nsproxy; 47ab516013SSerge E. Hallyn 4890af90d7SAlexey Dobriyan nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); 4990af90d7SAlexey Dobriyan if (nsproxy) 5090af90d7SAlexey Dobriyan atomic_set(&nsproxy->count, 1); 5190af90d7SAlexey Dobriyan return nsproxy; 52ab516013SSerge E. Hallyn } 53ab516013SSerge E. Hallyn 54ab516013SSerge E. Hallyn /* 55e3222c4eSBadari Pulavarty * Create new nsproxy and all of its the associated namespaces. 56e3222c4eSBadari Pulavarty * Return the newly created nsproxy. Do not attach this to the task, 57e3222c4eSBadari Pulavarty * leave it to the caller to do proper locking and attach it to task. 58ab516013SSerge E. Hallyn */ 59213dd266SEric W. Biederman static struct nsproxy *create_new_namespaces(unsigned long flags, 60bcf58e72SEric W. Biederman struct task_struct *tsk, struct user_namespace *user_ns, 61bcf58e72SEric W. Biederman struct fs_struct *new_fs) 62ab516013SSerge E. Hallyn { 63e3222c4eSBadari Pulavarty struct nsproxy *new_nsp; 64467e9f4bSCedric Le Goater int err; 65ab516013SSerge E. Hallyn 6690af90d7SAlexey Dobriyan new_nsp = create_nsproxy(); 67e3222c4eSBadari Pulavarty if (!new_nsp) 68e3222c4eSBadari Pulavarty return ERR_PTR(-ENOMEM); 691651e14eSSerge E. Hallyn 70bcf58e72SEric W. Biederman new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, user_ns, new_fs); 71467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->mnt_ns)) { 72467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->mnt_ns); 73e3222c4eSBadari Pulavarty goto out_ns; 74467e9f4bSCedric Le Goater } 75e3222c4eSBadari Pulavarty 76bcf58e72SEric W. Biederman new_nsp->uts_ns = copy_utsname(flags, user_ns, tsk->nsproxy->uts_ns); 77467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->uts_ns)) { 78467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->uts_ns); 79e3222c4eSBadari Pulavarty goto out_uts; 80467e9f4bSCedric Le Goater } 81e3222c4eSBadari Pulavarty 82bcf58e72SEric W. Biederman new_nsp->ipc_ns = copy_ipcs(flags, user_ns, tsk->nsproxy->ipc_ns); 83467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->ipc_ns)) { 84467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->ipc_ns); 85e3222c4eSBadari Pulavarty goto out_ipc; 86467e9f4bSCedric Le Goater } 87e3222c4eSBadari Pulavarty 88c2b1df2eSAndy Lutomirski new_nsp->pid_ns_for_children = 89c2b1df2eSAndy Lutomirski copy_pid_ns(flags, user_ns, tsk->nsproxy->pid_ns_for_children); 90c2b1df2eSAndy Lutomirski if (IS_ERR(new_nsp->pid_ns_for_children)) { 91c2b1df2eSAndy Lutomirski err = PTR_ERR(new_nsp->pid_ns_for_children); 92e3222c4eSBadari Pulavarty goto out_pid; 93467e9f4bSCedric Le Goater } 94e3222c4eSBadari Pulavarty 95bcf58e72SEric W. Biederman new_nsp->net_ns = copy_net_ns(flags, user_ns, tsk->nsproxy->net_ns); 969dd776b6SEric W. Biederman if (IS_ERR(new_nsp->net_ns)) { 979dd776b6SEric W. Biederman err = PTR_ERR(new_nsp->net_ns); 989dd776b6SEric W. Biederman goto out_net; 999dd776b6SEric W. Biederman } 1009dd776b6SEric W. Biederman 101e3222c4eSBadari Pulavarty return new_nsp; 102e3222c4eSBadari Pulavarty 1039dd776b6SEric W. Biederman out_net: 104c2b1df2eSAndy Lutomirski if (new_nsp->pid_ns_for_children) 105c2b1df2eSAndy Lutomirski put_pid_ns(new_nsp->pid_ns_for_children); 106e3222c4eSBadari Pulavarty out_pid: 107e3222c4eSBadari Pulavarty if (new_nsp->ipc_ns) 108e3222c4eSBadari Pulavarty put_ipc_ns(new_nsp->ipc_ns); 109e3222c4eSBadari Pulavarty out_ipc: 110e3222c4eSBadari Pulavarty if (new_nsp->uts_ns) 111e3222c4eSBadari Pulavarty put_uts_ns(new_nsp->uts_ns); 112e3222c4eSBadari Pulavarty out_uts: 113e3222c4eSBadari Pulavarty if (new_nsp->mnt_ns) 114e3222c4eSBadari Pulavarty put_mnt_ns(new_nsp->mnt_ns); 115e3222c4eSBadari Pulavarty out_ns: 11698c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, new_nsp); 117467e9f4bSCedric Le Goater return ERR_PTR(err); 118ab516013SSerge E. Hallyn } 119ab516013SSerge E. Hallyn 120ab516013SSerge E. Hallyn /* 121ab516013SSerge E. Hallyn * called from clone. This now handles copy for nsproxy and all 122ab516013SSerge E. Hallyn * namespaces therein. 123ab516013SSerge E. Hallyn */ 124213dd266SEric W. Biederman int copy_namespaces(unsigned long flags, struct task_struct *tsk) 125ab516013SSerge E. Hallyn { 126ab516013SSerge E. Hallyn struct nsproxy *old_ns = tsk->nsproxy; 127b33c77efSEric W. Biederman struct user_namespace *user_ns = task_cred_xxx(tsk, user_ns); 1281651e14eSSerge E. Hallyn struct nsproxy *new_ns; 129ab516013SSerge E. Hallyn 130dbef0c1cSEric W. Biederman if (likely(!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 131dbef0c1cSEric W. Biederman CLONE_NEWPID | CLONE_NEWNET)))) { 132ab516013SSerge E. Hallyn get_nsproxy(old_ns); 133ab516013SSerge E. Hallyn return 0; 134e3222c4eSBadari Pulavarty } 135e3222c4eSBadari Pulavarty 136dbef0c1cSEric W. Biederman if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 137dbef0c1cSEric W. Biederman return -EPERM; 138dbef0c1cSEric W. Biederman 13902fdb36aSSerge E. Hallyn /* 14002fdb36aSSerge E. Hallyn * CLONE_NEWIPC must detach from the undolist: after switching 14102fdb36aSSerge E. Hallyn * to a new ipc namespace, the semaphore arrays from the old 14202fdb36aSSerge E. Hallyn * namespace are unreachable. In clone parlance, CLONE_SYSVSEM 14302fdb36aSSerge E. Hallyn * means share undolist with parent, so we must forbid using 14402fdb36aSSerge E. Hallyn * it along with CLONE_NEWIPC. 14502fdb36aSSerge E. Hallyn */ 14621e85194SRaphael S.Carvalho if ((flags & (CLONE_NEWIPC | CLONE_SYSVSEM)) == 147dbef0c1cSEric W. Biederman (CLONE_NEWIPC | CLONE_SYSVSEM)) 148dbef0c1cSEric W. Biederman return -EINVAL; 14902fdb36aSSerge E. Hallyn 150d7d48f62SYuanhan Liu new_ns = create_new_namespaces(flags, tsk, user_ns, tsk->fs); 151dbef0c1cSEric W. Biederman if (IS_ERR(new_ns)) 152dbef0c1cSEric W. Biederman return PTR_ERR(new_ns); 1531651e14eSSerge E. Hallyn 1541651e14eSSerge E. Hallyn tsk->nsproxy = new_ns; 155dbef0c1cSEric W. Biederman return 0; 156ab516013SSerge E. Hallyn } 157ab516013SSerge E. Hallyn 158ab516013SSerge E. Hallyn void free_nsproxy(struct nsproxy *ns) 159ab516013SSerge E. Hallyn { 1606b3286edSKirill Korotaev if (ns->mnt_ns) 1616b3286edSKirill Korotaev put_mnt_ns(ns->mnt_ns); 1624865ecf1SSerge E. Hallyn if (ns->uts_ns) 1634865ecf1SSerge E. Hallyn put_uts_ns(ns->uts_ns); 16425b21cb2SKirill Korotaev if (ns->ipc_ns) 16525b21cb2SKirill Korotaev put_ipc_ns(ns->ipc_ns); 166c2b1df2eSAndy Lutomirski if (ns->pid_ns_for_children) 167c2b1df2eSAndy Lutomirski put_pid_ns(ns->pid_ns_for_children); 1689dd776b6SEric W. Biederman put_net(ns->net_ns); 16998c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, ns); 170ab516013SSerge E. Hallyn } 171e3222c4eSBadari Pulavarty 172e3222c4eSBadari Pulavarty /* 173e3222c4eSBadari Pulavarty * Called from unshare. Unshare all the namespaces part of nsproxy. 1744e71e474SCedric Le Goater * On success, returns the new nsproxy. 175e3222c4eSBadari Pulavarty */ 176e3222c4eSBadari Pulavarty int unshare_nsproxy_namespaces(unsigned long unshare_flags, 177b2e0d987SEric W. Biederman struct nsproxy **new_nsp, struct cred *new_cred, struct fs_struct *new_fs) 178e3222c4eSBadari Pulavarty { 179bcf58e72SEric W. Biederman struct user_namespace *user_ns; 180e3222c4eSBadari Pulavarty int err = 0; 181e3222c4eSBadari Pulavarty 18277ec739dSSerge E. Hallyn if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 18350804fe3SEric W. Biederman CLONE_NEWNET | CLONE_NEWPID))) 184e3222c4eSBadari Pulavarty return 0; 185e3222c4eSBadari Pulavarty 186b2e0d987SEric W. Biederman user_ns = new_cred ? new_cred->user_ns : current_user_ns(); 187b2e0d987SEric W. Biederman if (!ns_capable(user_ns, CAP_SYS_ADMIN)) 188e3222c4eSBadari Pulavarty return -EPERM; 189e3222c4eSBadari Pulavarty 190bcf58e72SEric W. Biederman *new_nsp = create_new_namespaces(unshare_flags, current, user_ns, 191e3222c4eSBadari Pulavarty new_fs ? new_fs : current->fs); 192858d72eaSSerge E. Hallyn if (IS_ERR(*new_nsp)) { 193e3222c4eSBadari Pulavarty err = PTR_ERR(*new_nsp); 194858d72eaSSerge E. Hallyn goto out; 195858d72eaSSerge E. Hallyn } 196858d72eaSSerge E. Hallyn 197858d72eaSSerge E. Hallyn out: 198e3222c4eSBadari Pulavarty return err; 199e3222c4eSBadari Pulavarty } 20098c0d07cSCedric Le Goater 201cf7b708cSPavel Emelyanov void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) 202cf7b708cSPavel Emelyanov { 203cf7b708cSPavel Emelyanov struct nsproxy *ns; 204cf7b708cSPavel Emelyanov 205cf7b708cSPavel Emelyanov might_sleep(); 206cf7b708cSPavel Emelyanov 207728dba3aSEric W. Biederman task_lock(p); 208cf7b708cSPavel Emelyanov ns = p->nsproxy; 209728dba3aSEric W. Biederman p->nsproxy = new; 210728dba3aSEric W. Biederman task_unlock(p); 211cf7b708cSPavel Emelyanov 212728dba3aSEric W. Biederman if (ns && atomic_dec_and_test(&ns->count)) 213cf7b708cSPavel Emelyanov free_nsproxy(ns); 214cf7b708cSPavel Emelyanov } 215cf7b708cSPavel Emelyanov 216cf7b708cSPavel Emelyanov void exit_task_namespaces(struct task_struct *p) 217cf7b708cSPavel Emelyanov { 218cf7b708cSPavel Emelyanov switch_task_namespaces(p, NULL); 219cf7b708cSPavel Emelyanov } 220cf7b708cSPavel Emelyanov 2210663c6f8SEric W. Biederman SYSCALL_DEFINE2(setns, int, fd, int, nstype) 2220663c6f8SEric W. Biederman { 2230663c6f8SEric W. Biederman const struct proc_ns_operations *ops; 2240663c6f8SEric W. Biederman struct task_struct *tsk = current; 2250663c6f8SEric W. Biederman struct nsproxy *new_nsproxy; 2260bb80f24SDavid Howells struct proc_ns *ei; 2270663c6f8SEric W. Biederman struct file *file; 2280663c6f8SEric W. Biederman int err; 2290663c6f8SEric W. Biederman 2300663c6f8SEric W. Biederman file = proc_ns_fget(fd); 2310663c6f8SEric W. Biederman if (IS_ERR(file)) 2320663c6f8SEric W. Biederman return PTR_ERR(file); 2330663c6f8SEric W. Biederman 2340663c6f8SEric W. Biederman err = -EINVAL; 2350bb80f24SDavid Howells ei = get_proc_ns(file_inode(file)); 2360663c6f8SEric W. Biederman ops = ei->ns_ops; 2370663c6f8SEric W. Biederman if (nstype && (ops->type != nstype)) 2380663c6f8SEric W. Biederman goto out; 2390663c6f8SEric W. Biederman 240bcf58e72SEric W. Biederman new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs); 2410663c6f8SEric W. Biederman if (IS_ERR(new_nsproxy)) { 2420663c6f8SEric W. Biederman err = PTR_ERR(new_nsproxy); 2430663c6f8SEric W. Biederman goto out; 2440663c6f8SEric W. Biederman } 2450663c6f8SEric W. Biederman 2460663c6f8SEric W. Biederman err = ops->install(new_nsproxy, ei->ns); 2470663c6f8SEric W. Biederman if (err) { 2480663c6f8SEric W. Biederman free_nsproxy(new_nsproxy); 2490663c6f8SEric W. Biederman goto out; 2500663c6f8SEric W. Biederman } 2510663c6f8SEric W. Biederman switch_task_namespaces(tsk, new_nsproxy); 2520663c6f8SEric W. Biederman out: 2530663c6f8SEric W. Biederman fput(file); 2540663c6f8SEric W. Biederman return err; 2550663c6f8SEric W. Biederman } 2560663c6f8SEric W. Biederman 25766577193SAl Viro int __init nsproxy_cache_init(void) 25898c0d07cSCedric Le Goater { 259db8906daSPavel Emelyanov nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 26098c0d07cSCedric Le Goater return 0; 26198c0d07cSCedric Le Goater } 262