1ab516013SSerge E. Hallyn /* 2ab516013SSerge E. Hallyn * Copyright (C) 2006 IBM Corporation 3ab516013SSerge E. Hallyn * 4ab516013SSerge E. Hallyn * Author: Serge Hallyn <serue@us.ibm.com> 5ab516013SSerge E. Hallyn * 6ab516013SSerge E. Hallyn * This program is free software; you can redistribute it and/or 7ab516013SSerge E. Hallyn * modify it under the terms of the GNU General Public License as 8ab516013SSerge E. Hallyn * published by the Free Software Foundation, version 2 of the 9ab516013SSerge E. Hallyn * License. 1025b21cb2SKirill Korotaev * 1125b21cb2SKirill Korotaev * Jun 2006 - namespaces support 1225b21cb2SKirill Korotaev * OpenVZ, SWsoft Inc. 1325b21cb2SKirill Korotaev * Pavel Emelianov <xemul@openvz.org> 14ab516013SSerge E. Hallyn */ 15ab516013SSerge E. Hallyn 16ab516013SSerge E. Hallyn #include <linux/module.h> 17ab516013SSerge E. Hallyn #include <linux/nsproxy.h> 180437eb59SSerge E. Hallyn #include <linux/init_task.h> 196b3286edSKirill Korotaev #include <linux/mnt_namespace.h> 204865ecf1SSerge E. Hallyn #include <linux/utsname.h> 219a575a92SCedric Le Goater #include <linux/pid_namespace.h> 229dd776b6SEric W. Biederman #include <net/net_namespace.h> 23ae5e1b22SPavel Emelyanov #include <linux/ipc_namespace.h> 240437eb59SSerge E. Hallyn 2598c0d07cSCedric Le Goater static struct kmem_cache *nsproxy_cachep; 2698c0d07cSCedric Le Goater 278467005dSAlexey Dobriyan struct nsproxy init_nsproxy = { 288467005dSAlexey Dobriyan .count = ATOMIC_INIT(1), 298467005dSAlexey Dobriyan .uts_ns = &init_uts_ns, 308467005dSAlexey Dobriyan #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) 318467005dSAlexey Dobriyan .ipc_ns = &init_ipc_ns, 328467005dSAlexey Dobriyan #endif 338467005dSAlexey Dobriyan .mnt_ns = NULL, 348467005dSAlexey Dobriyan .pid_ns = &init_pid_ns, 358467005dSAlexey Dobriyan #ifdef CONFIG_NET 368467005dSAlexey Dobriyan .net_ns = &init_net, 378467005dSAlexey Dobriyan #endif 388467005dSAlexey Dobriyan }; 39ab516013SSerge E. Hallyn 4090af90d7SAlexey Dobriyan static inline struct nsproxy *create_nsproxy(void) 41ab516013SSerge E. Hallyn { 4290af90d7SAlexey Dobriyan struct nsproxy *nsproxy; 43ab516013SSerge E. Hallyn 4490af90d7SAlexey Dobriyan nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); 4590af90d7SAlexey Dobriyan if (nsproxy) 4690af90d7SAlexey Dobriyan atomic_set(&nsproxy->count, 1); 4790af90d7SAlexey Dobriyan return nsproxy; 48ab516013SSerge E. Hallyn } 49ab516013SSerge E. Hallyn 50ab516013SSerge E. Hallyn /* 51e3222c4eSBadari Pulavarty * Create new nsproxy and all of its the associated namespaces. 52e3222c4eSBadari Pulavarty * Return the newly created nsproxy. Do not attach this to the task, 53e3222c4eSBadari Pulavarty * leave it to the caller to do proper locking and attach it to task. 54ab516013SSerge E. Hallyn */ 55213dd266SEric W. Biederman static struct nsproxy *create_new_namespaces(unsigned long flags, 56213dd266SEric W. Biederman struct task_struct *tsk, struct fs_struct *new_fs) 57ab516013SSerge E. Hallyn { 58e3222c4eSBadari Pulavarty struct nsproxy *new_nsp; 59467e9f4bSCedric Le Goater int err; 60ab516013SSerge E. Hallyn 6190af90d7SAlexey Dobriyan new_nsp = create_nsproxy(); 62e3222c4eSBadari Pulavarty if (!new_nsp) 63e3222c4eSBadari Pulavarty return ERR_PTR(-ENOMEM); 641651e14eSSerge E. Hallyn 65e3222c4eSBadari Pulavarty new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 66467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->mnt_ns)) { 67467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->mnt_ns); 68e3222c4eSBadari Pulavarty goto out_ns; 69467e9f4bSCedric Le Goater } 70e3222c4eSBadari Pulavarty 71e3222c4eSBadari Pulavarty new_nsp->uts_ns = copy_utsname(flags, tsk->nsproxy->uts_ns); 72467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->uts_ns)) { 73467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->uts_ns); 74e3222c4eSBadari Pulavarty goto out_uts; 75467e9f4bSCedric Le Goater } 76e3222c4eSBadari Pulavarty 77e3222c4eSBadari Pulavarty new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 78467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->ipc_ns)) { 79467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->ipc_ns); 80e3222c4eSBadari Pulavarty goto out_ipc; 81467e9f4bSCedric Le Goater } 82e3222c4eSBadari Pulavarty 832894d650SSukadev Bhattiprolu new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); 84467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->pid_ns)) { 85467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->pid_ns); 86e3222c4eSBadari Pulavarty goto out_pid; 87467e9f4bSCedric Le Goater } 88e3222c4eSBadari Pulavarty 899dd776b6SEric W. Biederman new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); 909dd776b6SEric W. Biederman if (IS_ERR(new_nsp->net_ns)) { 919dd776b6SEric W. Biederman err = PTR_ERR(new_nsp->net_ns); 929dd776b6SEric W. Biederman goto out_net; 939dd776b6SEric W. Biederman } 949dd776b6SEric W. Biederman 95e3222c4eSBadari Pulavarty return new_nsp; 96e3222c4eSBadari Pulavarty 979dd776b6SEric W. Biederman out_net: 98acce292cSCedric Le Goater if (new_nsp->pid_ns) 99acce292cSCedric Le Goater put_pid_ns(new_nsp->pid_ns); 100e3222c4eSBadari Pulavarty out_pid: 101e3222c4eSBadari Pulavarty if (new_nsp->ipc_ns) 102e3222c4eSBadari Pulavarty put_ipc_ns(new_nsp->ipc_ns); 103e3222c4eSBadari Pulavarty out_ipc: 104e3222c4eSBadari Pulavarty if (new_nsp->uts_ns) 105e3222c4eSBadari Pulavarty put_uts_ns(new_nsp->uts_ns); 106e3222c4eSBadari Pulavarty out_uts: 107e3222c4eSBadari Pulavarty if (new_nsp->mnt_ns) 108e3222c4eSBadari Pulavarty put_mnt_ns(new_nsp->mnt_ns); 109e3222c4eSBadari Pulavarty out_ns: 11098c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, new_nsp); 111467e9f4bSCedric Le Goater return ERR_PTR(err); 112ab516013SSerge E. Hallyn } 113ab516013SSerge E. Hallyn 114ab516013SSerge E. Hallyn /* 115ab516013SSerge E. Hallyn * called from clone. This now handles copy for nsproxy and all 116ab516013SSerge E. Hallyn * namespaces therein. 117ab516013SSerge E. Hallyn */ 118213dd266SEric W. Biederman int copy_namespaces(unsigned long flags, struct task_struct *tsk) 119ab516013SSerge E. Hallyn { 120ab516013SSerge E. Hallyn struct nsproxy *old_ns = tsk->nsproxy; 1211651e14eSSerge E. Hallyn struct nsproxy *new_ns; 1221651e14eSSerge E. Hallyn int err = 0; 123ab516013SSerge E. Hallyn 124ab516013SSerge E. Hallyn if (!old_ns) 125ab516013SSerge E. Hallyn return 0; 126ab516013SSerge E. Hallyn 127ab516013SSerge E. Hallyn get_nsproxy(old_ns); 128ab516013SSerge E. Hallyn 12930e49c26SPavel Emelyanov if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 13018b6e041SSerge Hallyn CLONE_NEWPID | CLONE_NEWNET))) 131ab516013SSerge E. Hallyn return 0; 1321651e14eSSerge E. Hallyn 133e3222c4eSBadari Pulavarty if (!capable(CAP_SYS_ADMIN)) { 134e3222c4eSBadari Pulavarty err = -EPERM; 135e3222c4eSBadari Pulavarty goto out; 136e3222c4eSBadari Pulavarty } 137e3222c4eSBadari Pulavarty 13802fdb36aSSerge E. Hallyn /* 13902fdb36aSSerge E. Hallyn * CLONE_NEWIPC must detach from the undolist: after switching 14002fdb36aSSerge E. Hallyn * to a new ipc namespace, the semaphore arrays from the old 14102fdb36aSSerge E. Hallyn * namespace are unreachable. In clone parlance, CLONE_SYSVSEM 14202fdb36aSSerge E. Hallyn * means share undolist with parent, so we must forbid using 14302fdb36aSSerge E. Hallyn * it along with CLONE_NEWIPC. 14402fdb36aSSerge E. Hallyn */ 14502fdb36aSSerge E. Hallyn if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { 14602fdb36aSSerge E. Hallyn err = -EINVAL; 14702fdb36aSSerge E. Hallyn goto out; 14802fdb36aSSerge E. Hallyn } 14902fdb36aSSerge E. Hallyn 150e3222c4eSBadari Pulavarty new_ns = create_new_namespaces(flags, tsk, tsk->fs); 151e3222c4eSBadari Pulavarty if (IS_ERR(new_ns)) { 152e3222c4eSBadari Pulavarty err = PTR_ERR(new_ns); 1531651e14eSSerge E. Hallyn goto out; 1541651e14eSSerge E. Hallyn } 1551651e14eSSerge E. Hallyn 1561651e14eSSerge E. Hallyn tsk->nsproxy = new_ns; 157858d72eaSSerge E. Hallyn 1581651e14eSSerge E. Hallyn out: 159444f378bSLinus Torvalds put_nsproxy(old_ns); 1601651e14eSSerge E. Hallyn return err; 161ab516013SSerge E. Hallyn } 162ab516013SSerge E. Hallyn 163ab516013SSerge E. Hallyn void free_nsproxy(struct nsproxy *ns) 164ab516013SSerge E. Hallyn { 1656b3286edSKirill Korotaev if (ns->mnt_ns) 1666b3286edSKirill Korotaev put_mnt_ns(ns->mnt_ns); 1674865ecf1SSerge E. Hallyn if (ns->uts_ns) 1684865ecf1SSerge E. Hallyn put_uts_ns(ns->uts_ns); 16925b21cb2SKirill Korotaev if (ns->ipc_ns) 17025b21cb2SKirill Korotaev put_ipc_ns(ns->ipc_ns); 1719a575a92SCedric Le Goater if (ns->pid_ns) 1729a575a92SCedric Le Goater put_pid_ns(ns->pid_ns); 1739dd776b6SEric W. Biederman put_net(ns->net_ns); 17498c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, ns); 175ab516013SSerge E. Hallyn } 176e3222c4eSBadari Pulavarty 177e3222c4eSBadari Pulavarty /* 178e3222c4eSBadari Pulavarty * Called from unshare. Unshare all the namespaces part of nsproxy. 1794e71e474SCedric Le Goater * On success, returns the new nsproxy. 180e3222c4eSBadari Pulavarty */ 181e3222c4eSBadari Pulavarty int unshare_nsproxy_namespaces(unsigned long unshare_flags, 182e3222c4eSBadari Pulavarty struct nsproxy **new_nsp, struct fs_struct *new_fs) 183e3222c4eSBadari Pulavarty { 184e3222c4eSBadari Pulavarty int err = 0; 185e3222c4eSBadari Pulavarty 18677ec739dSSerge E. Hallyn if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 18718b6e041SSerge Hallyn CLONE_NEWNET))) 188e3222c4eSBadari Pulavarty return 0; 189e3222c4eSBadari Pulavarty 190e3222c4eSBadari Pulavarty if (!capable(CAP_SYS_ADMIN)) 191e3222c4eSBadari Pulavarty return -EPERM; 192e3222c4eSBadari Pulavarty 193e3222c4eSBadari Pulavarty *new_nsp = create_new_namespaces(unshare_flags, current, 194e3222c4eSBadari Pulavarty new_fs ? new_fs : current->fs); 195858d72eaSSerge E. Hallyn if (IS_ERR(*new_nsp)) { 196e3222c4eSBadari Pulavarty err = PTR_ERR(*new_nsp); 197858d72eaSSerge E. Hallyn goto out; 198858d72eaSSerge E. Hallyn } 199858d72eaSSerge E. Hallyn 200e885dcdeSSerge E. Hallyn err = ns_cgroup_clone(current, task_pid(current)); 201858d72eaSSerge E. Hallyn if (err) 202858d72eaSSerge E. Hallyn put_nsproxy(*new_nsp); 203858d72eaSSerge E. Hallyn 204858d72eaSSerge E. Hallyn out: 205e3222c4eSBadari Pulavarty return err; 206e3222c4eSBadari Pulavarty } 20798c0d07cSCedric Le Goater 208cf7b708cSPavel Emelyanov void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) 209cf7b708cSPavel Emelyanov { 210cf7b708cSPavel Emelyanov struct nsproxy *ns; 211cf7b708cSPavel Emelyanov 212cf7b708cSPavel Emelyanov might_sleep(); 213cf7b708cSPavel Emelyanov 214cf7b708cSPavel Emelyanov ns = p->nsproxy; 215cf7b708cSPavel Emelyanov 216cf7b708cSPavel Emelyanov rcu_assign_pointer(p->nsproxy, new); 217cf7b708cSPavel Emelyanov 218cf7b708cSPavel Emelyanov if (ns && atomic_dec_and_test(&ns->count)) { 219cf7b708cSPavel Emelyanov /* 220cf7b708cSPavel Emelyanov * wait for others to get what they want from this nsproxy. 221cf7b708cSPavel Emelyanov * 222cf7b708cSPavel Emelyanov * cannot release this nsproxy via the call_rcu() since 223cf7b708cSPavel Emelyanov * put_mnt_ns() will want to sleep 224cf7b708cSPavel Emelyanov */ 225cf7b708cSPavel Emelyanov synchronize_rcu(); 226cf7b708cSPavel Emelyanov free_nsproxy(ns); 227cf7b708cSPavel Emelyanov } 228cf7b708cSPavel Emelyanov } 229cf7b708cSPavel Emelyanov 230cf7b708cSPavel Emelyanov void exit_task_namespaces(struct task_struct *p) 231cf7b708cSPavel Emelyanov { 232cf7b708cSPavel Emelyanov switch_task_namespaces(p, NULL); 233cf7b708cSPavel Emelyanov } 234cf7b708cSPavel Emelyanov 23598c0d07cSCedric Le Goater static int __init nsproxy_cache_init(void) 23698c0d07cSCedric Le Goater { 237db8906daSPavel Emelyanov nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 23898c0d07cSCedric Le Goater return 0; 23998c0d07cSCedric Le Goater } 24098c0d07cSCedric Le Goater 24198c0d07cSCedric Le Goater module_init(nsproxy_cache_init); 242