1ab516013SSerge E. Hallyn /* 2ab516013SSerge E. Hallyn * Copyright (C) 2006 IBM Corporation 3ab516013SSerge E. Hallyn * 4ab516013SSerge E. Hallyn * Author: Serge Hallyn <serue@us.ibm.com> 5ab516013SSerge E. Hallyn * 6ab516013SSerge E. Hallyn * This program is free software; you can redistribute it and/or 7ab516013SSerge E. Hallyn * modify it under the terms of the GNU General Public License as 8ab516013SSerge E. Hallyn * published by the Free Software Foundation, version 2 of the 9ab516013SSerge E. Hallyn * License. 1025b21cb2SKirill Korotaev * 1125b21cb2SKirill Korotaev * Jun 2006 - namespaces support 1225b21cb2SKirill Korotaev * OpenVZ, SWsoft Inc. 1325b21cb2SKirill Korotaev * Pavel Emelianov <xemul@openvz.org> 14ab516013SSerge E. Hallyn */ 15ab516013SSerge E. Hallyn 165a0e3ad6STejun Heo #include <linux/slab.h> 179984de1aSPaul Gortmaker #include <linux/export.h> 18ab516013SSerge E. Hallyn #include <linux/nsproxy.h> 190437eb59SSerge E. Hallyn #include <linux/init_task.h> 206b3286edSKirill Korotaev #include <linux/mnt_namespace.h> 214865ecf1SSerge E. Hallyn #include <linux/utsname.h> 229a575a92SCedric Le Goater #include <linux/pid_namespace.h> 239dd776b6SEric W. Biederman #include <net/net_namespace.h> 24ae5e1b22SPavel Emelyanov #include <linux/ipc_namespace.h> 250663c6f8SEric W. Biederman #include <linux/proc_fs.h> 260663c6f8SEric W. Biederman #include <linux/file.h> 270663c6f8SEric W. Biederman #include <linux/syscalls.h> 280437eb59SSerge E. Hallyn 2998c0d07cSCedric Le Goater static struct kmem_cache *nsproxy_cachep; 3098c0d07cSCedric Le Goater 318467005dSAlexey Dobriyan struct nsproxy init_nsproxy = { 328467005dSAlexey Dobriyan .count = ATOMIC_INIT(1), 338467005dSAlexey Dobriyan .uts_ns = &init_uts_ns, 348467005dSAlexey Dobriyan #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) 358467005dSAlexey Dobriyan .ipc_ns = &init_ipc_ns, 368467005dSAlexey Dobriyan #endif 378467005dSAlexey Dobriyan .mnt_ns = NULL, 388467005dSAlexey Dobriyan .pid_ns = &init_pid_ns, 398467005dSAlexey Dobriyan #ifdef CONFIG_NET 408467005dSAlexey Dobriyan .net_ns = &init_net, 418467005dSAlexey Dobriyan #endif 428467005dSAlexey Dobriyan }; 43ab516013SSerge E. Hallyn 4490af90d7SAlexey Dobriyan static inline struct nsproxy *create_nsproxy(void) 45ab516013SSerge E. Hallyn { 4690af90d7SAlexey Dobriyan struct nsproxy *nsproxy; 47ab516013SSerge E. Hallyn 4890af90d7SAlexey Dobriyan nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); 4990af90d7SAlexey Dobriyan if (nsproxy) 5090af90d7SAlexey Dobriyan atomic_set(&nsproxy->count, 1); 5190af90d7SAlexey Dobriyan return nsproxy; 52ab516013SSerge E. Hallyn } 53ab516013SSerge E. Hallyn 54ab516013SSerge E. Hallyn /* 55e3222c4eSBadari Pulavarty * Create new nsproxy and all of its the associated namespaces. 56e3222c4eSBadari Pulavarty * Return the newly created nsproxy. Do not attach this to the task, 57e3222c4eSBadari Pulavarty * leave it to the caller to do proper locking and attach it to task. 58ab516013SSerge E. Hallyn */ 59213dd266SEric W. Biederman static struct nsproxy *create_new_namespaces(unsigned long flags, 60213dd266SEric W. Biederman struct task_struct *tsk, struct fs_struct *new_fs) 61ab516013SSerge E. Hallyn { 62e3222c4eSBadari Pulavarty struct nsproxy *new_nsp; 63467e9f4bSCedric Le Goater int err; 64ab516013SSerge E. Hallyn 6590af90d7SAlexey Dobriyan new_nsp = create_nsproxy(); 66e3222c4eSBadari Pulavarty if (!new_nsp) 67e3222c4eSBadari Pulavarty return ERR_PTR(-ENOMEM); 681651e14eSSerge E. Hallyn 69e3222c4eSBadari Pulavarty new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 70467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->mnt_ns)) { 71467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->mnt_ns); 72e3222c4eSBadari Pulavarty goto out_ns; 73467e9f4bSCedric Le Goater } 74e3222c4eSBadari Pulavarty 75bb96a6f5SSerge E. Hallyn new_nsp->uts_ns = copy_utsname(flags, tsk); 76467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->uts_ns)) { 77467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->uts_ns); 78e3222c4eSBadari Pulavarty goto out_uts; 79467e9f4bSCedric Le Goater } 80e3222c4eSBadari Pulavarty 81b0e77598SSerge E. Hallyn new_nsp->ipc_ns = copy_ipcs(flags, tsk); 82467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->ipc_ns)) { 83467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->ipc_ns); 84e3222c4eSBadari Pulavarty goto out_ipc; 85467e9f4bSCedric Le Goater } 86e3222c4eSBadari Pulavarty 8717cf22c3SEric W. Biederman new_nsp->pid_ns = copy_pid_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->pid_ns); 88467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->pid_ns)) { 89467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->pid_ns); 90e3222c4eSBadari Pulavarty goto out_pid; 91467e9f4bSCedric Le Goater } 92e3222c4eSBadari Pulavarty 93038e7332SEric W. Biederman new_nsp->net_ns = copy_net_ns(flags, task_cred_xxx(tsk, user_ns), tsk->nsproxy->net_ns); 949dd776b6SEric W. Biederman if (IS_ERR(new_nsp->net_ns)) { 959dd776b6SEric W. Biederman err = PTR_ERR(new_nsp->net_ns); 969dd776b6SEric W. Biederman goto out_net; 979dd776b6SEric W. Biederman } 989dd776b6SEric W. Biederman 99e3222c4eSBadari Pulavarty return new_nsp; 100e3222c4eSBadari Pulavarty 1019dd776b6SEric W. Biederman out_net: 102acce292cSCedric Le Goater if (new_nsp->pid_ns) 103acce292cSCedric Le Goater put_pid_ns(new_nsp->pid_ns); 104e3222c4eSBadari Pulavarty out_pid: 105e3222c4eSBadari Pulavarty if (new_nsp->ipc_ns) 106e3222c4eSBadari Pulavarty put_ipc_ns(new_nsp->ipc_ns); 107e3222c4eSBadari Pulavarty out_ipc: 108e3222c4eSBadari Pulavarty if (new_nsp->uts_ns) 109e3222c4eSBadari Pulavarty put_uts_ns(new_nsp->uts_ns); 110e3222c4eSBadari Pulavarty out_uts: 111e3222c4eSBadari Pulavarty if (new_nsp->mnt_ns) 112e3222c4eSBadari Pulavarty put_mnt_ns(new_nsp->mnt_ns); 113e3222c4eSBadari Pulavarty out_ns: 11498c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, new_nsp); 115467e9f4bSCedric Le Goater return ERR_PTR(err); 116ab516013SSerge E. Hallyn } 117ab516013SSerge E. Hallyn 118ab516013SSerge E. Hallyn /* 119ab516013SSerge E. Hallyn * called from clone. This now handles copy for nsproxy and all 120ab516013SSerge E. Hallyn * namespaces therein. 121ab516013SSerge E. Hallyn */ 122213dd266SEric W. Biederman int copy_namespaces(unsigned long flags, struct task_struct *tsk) 123ab516013SSerge E. Hallyn { 124ab516013SSerge E. Hallyn struct nsproxy *old_ns = tsk->nsproxy; 1251651e14eSSerge E. Hallyn struct nsproxy *new_ns; 1261651e14eSSerge E. Hallyn int err = 0; 127ab516013SSerge E. Hallyn 128ab516013SSerge E. Hallyn if (!old_ns) 129ab516013SSerge E. Hallyn return 0; 130ab516013SSerge E. Hallyn 131ab516013SSerge E. Hallyn get_nsproxy(old_ns); 132ab516013SSerge E. Hallyn 13330e49c26SPavel Emelyanov if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 13418b6e041SSerge Hallyn CLONE_NEWPID | CLONE_NEWNET))) 135ab516013SSerge E. Hallyn return 0; 1361651e14eSSerge E. Hallyn 137e3222c4eSBadari Pulavarty if (!capable(CAP_SYS_ADMIN)) { 138e3222c4eSBadari Pulavarty err = -EPERM; 139e3222c4eSBadari Pulavarty goto out; 140e3222c4eSBadari Pulavarty } 141e3222c4eSBadari Pulavarty 14202fdb36aSSerge E. Hallyn /* 14302fdb36aSSerge E. Hallyn * CLONE_NEWIPC must detach from the undolist: after switching 14402fdb36aSSerge E. Hallyn * to a new ipc namespace, the semaphore arrays from the old 14502fdb36aSSerge E. Hallyn * namespace are unreachable. In clone parlance, CLONE_SYSVSEM 14602fdb36aSSerge E. Hallyn * means share undolist with parent, so we must forbid using 14702fdb36aSSerge E. Hallyn * it along with CLONE_NEWIPC. 14802fdb36aSSerge E. Hallyn */ 14902fdb36aSSerge E. Hallyn if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { 15002fdb36aSSerge E. Hallyn err = -EINVAL; 15102fdb36aSSerge E. Hallyn goto out; 15202fdb36aSSerge E. Hallyn } 15302fdb36aSSerge E. Hallyn 154e3222c4eSBadari Pulavarty new_ns = create_new_namespaces(flags, tsk, tsk->fs); 155e3222c4eSBadari Pulavarty if (IS_ERR(new_ns)) { 156e3222c4eSBadari Pulavarty err = PTR_ERR(new_ns); 1571651e14eSSerge E. Hallyn goto out; 1581651e14eSSerge E. Hallyn } 1591651e14eSSerge E. Hallyn 1601651e14eSSerge E. Hallyn tsk->nsproxy = new_ns; 161858d72eaSSerge E. Hallyn 1621651e14eSSerge E. Hallyn out: 163444f378bSLinus Torvalds put_nsproxy(old_ns); 1641651e14eSSerge E. Hallyn return err; 165ab516013SSerge E. Hallyn } 166ab516013SSerge E. Hallyn 167ab516013SSerge E. Hallyn void free_nsproxy(struct nsproxy *ns) 168ab516013SSerge E. Hallyn { 1696b3286edSKirill Korotaev if (ns->mnt_ns) 1706b3286edSKirill Korotaev put_mnt_ns(ns->mnt_ns); 1714865ecf1SSerge E. Hallyn if (ns->uts_ns) 1724865ecf1SSerge E. Hallyn put_uts_ns(ns->uts_ns); 17325b21cb2SKirill Korotaev if (ns->ipc_ns) 17425b21cb2SKirill Korotaev put_ipc_ns(ns->ipc_ns); 1759a575a92SCedric Le Goater if (ns->pid_ns) 1769a575a92SCedric Le Goater put_pid_ns(ns->pid_ns); 1779dd776b6SEric W. Biederman put_net(ns->net_ns); 17898c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, ns); 179ab516013SSerge E. Hallyn } 180e3222c4eSBadari Pulavarty 181e3222c4eSBadari Pulavarty /* 182e3222c4eSBadari Pulavarty * Called from unshare. Unshare all the namespaces part of nsproxy. 1834e71e474SCedric Le Goater * On success, returns the new nsproxy. 184e3222c4eSBadari Pulavarty */ 185e3222c4eSBadari Pulavarty int unshare_nsproxy_namespaces(unsigned long unshare_flags, 186e3222c4eSBadari Pulavarty struct nsproxy **new_nsp, struct fs_struct *new_fs) 187e3222c4eSBadari Pulavarty { 188e3222c4eSBadari Pulavarty int err = 0; 189e3222c4eSBadari Pulavarty 19077ec739dSSerge E. Hallyn if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 19150804fe3SEric W. Biederman CLONE_NEWNET | CLONE_NEWPID))) 192e3222c4eSBadari Pulavarty return 0; 193e3222c4eSBadari Pulavarty 194e3222c4eSBadari Pulavarty if (!capable(CAP_SYS_ADMIN)) 195e3222c4eSBadari Pulavarty return -EPERM; 196e3222c4eSBadari Pulavarty 197e3222c4eSBadari Pulavarty *new_nsp = create_new_namespaces(unshare_flags, current, 198e3222c4eSBadari Pulavarty new_fs ? new_fs : current->fs); 199858d72eaSSerge E. Hallyn if (IS_ERR(*new_nsp)) { 200e3222c4eSBadari Pulavarty err = PTR_ERR(*new_nsp); 201858d72eaSSerge E. Hallyn goto out; 202858d72eaSSerge E. Hallyn } 203858d72eaSSerge E. Hallyn 204858d72eaSSerge E. Hallyn out: 205e3222c4eSBadari Pulavarty return err; 206e3222c4eSBadari Pulavarty } 20798c0d07cSCedric Le Goater 208cf7b708cSPavel Emelyanov void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) 209cf7b708cSPavel Emelyanov { 210cf7b708cSPavel Emelyanov struct nsproxy *ns; 211cf7b708cSPavel Emelyanov 212cf7b708cSPavel Emelyanov might_sleep(); 213cf7b708cSPavel Emelyanov 214cf7b708cSPavel Emelyanov ns = p->nsproxy; 215cf7b708cSPavel Emelyanov 216cf7b708cSPavel Emelyanov rcu_assign_pointer(p->nsproxy, new); 217cf7b708cSPavel Emelyanov 218cf7b708cSPavel Emelyanov if (ns && atomic_dec_and_test(&ns->count)) { 219cf7b708cSPavel Emelyanov /* 220cf7b708cSPavel Emelyanov * wait for others to get what they want from this nsproxy. 221cf7b708cSPavel Emelyanov * 222cf7b708cSPavel Emelyanov * cannot release this nsproxy via the call_rcu() since 223cf7b708cSPavel Emelyanov * put_mnt_ns() will want to sleep 224cf7b708cSPavel Emelyanov */ 225cf7b708cSPavel Emelyanov synchronize_rcu(); 226cf7b708cSPavel Emelyanov free_nsproxy(ns); 227cf7b708cSPavel Emelyanov } 228cf7b708cSPavel Emelyanov } 229cf7b708cSPavel Emelyanov 230cf7b708cSPavel Emelyanov void exit_task_namespaces(struct task_struct *p) 231cf7b708cSPavel Emelyanov { 232cf7b708cSPavel Emelyanov switch_task_namespaces(p, NULL); 233cf7b708cSPavel Emelyanov } 234cf7b708cSPavel Emelyanov 2350663c6f8SEric W. Biederman SYSCALL_DEFINE2(setns, int, fd, int, nstype) 2360663c6f8SEric W. Biederman { 2370663c6f8SEric W. Biederman const struct proc_ns_operations *ops; 2380663c6f8SEric W. Biederman struct task_struct *tsk = current; 2390663c6f8SEric W. Biederman struct nsproxy *new_nsproxy; 2400663c6f8SEric W. Biederman struct proc_inode *ei; 2410663c6f8SEric W. Biederman struct file *file; 2420663c6f8SEric W. Biederman int err; 2430663c6f8SEric W. Biederman 2440663c6f8SEric W. Biederman if (!capable(CAP_SYS_ADMIN)) 2450663c6f8SEric W. Biederman return -EPERM; 2460663c6f8SEric W. Biederman 2470663c6f8SEric W. Biederman file = proc_ns_fget(fd); 2480663c6f8SEric W. Biederman if (IS_ERR(file)) 2490663c6f8SEric W. Biederman return PTR_ERR(file); 2500663c6f8SEric W. Biederman 2510663c6f8SEric W. Biederman err = -EINVAL; 2520663c6f8SEric W. Biederman ei = PROC_I(file->f_dentry->d_inode); 2530663c6f8SEric W. Biederman ops = ei->ns_ops; 2540663c6f8SEric W. Biederman if (nstype && (ops->type != nstype)) 2550663c6f8SEric W. Biederman goto out; 2560663c6f8SEric W. Biederman 2570663c6f8SEric W. Biederman new_nsproxy = create_new_namespaces(0, tsk, tsk->fs); 2580663c6f8SEric W. Biederman if (IS_ERR(new_nsproxy)) { 2590663c6f8SEric W. Biederman err = PTR_ERR(new_nsproxy); 2600663c6f8SEric W. Biederman goto out; 2610663c6f8SEric W. Biederman } 2620663c6f8SEric W. Biederman 2630663c6f8SEric W. Biederman err = ops->install(new_nsproxy, ei->ns); 2640663c6f8SEric W. Biederman if (err) { 2650663c6f8SEric W. Biederman free_nsproxy(new_nsproxy); 2660663c6f8SEric W. Biederman goto out; 2670663c6f8SEric W. Biederman } 2680663c6f8SEric W. Biederman switch_task_namespaces(tsk, new_nsproxy); 2690663c6f8SEric W. Biederman out: 2700663c6f8SEric W. Biederman fput(file); 2710663c6f8SEric W. Biederman return err; 2720663c6f8SEric W. Biederman } 2730663c6f8SEric W. Biederman 27466577193SAl Viro int __init nsproxy_cache_init(void) 27598c0d07cSCedric Le Goater { 276db8906daSPavel Emelyanov nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 27798c0d07cSCedric Le Goater return 0; 27898c0d07cSCedric Le Goater } 279