1ab516013SSerge E. Hallyn /* 2ab516013SSerge E. Hallyn * Copyright (C) 2006 IBM Corporation 3ab516013SSerge E. Hallyn * 4ab516013SSerge E. Hallyn * Author: Serge Hallyn <serue@us.ibm.com> 5ab516013SSerge E. Hallyn * 6ab516013SSerge E. Hallyn * This program is free software; you can redistribute it and/or 7ab516013SSerge E. Hallyn * modify it under the terms of the GNU General Public License as 8ab516013SSerge E. Hallyn * published by the Free Software Foundation, version 2 of the 9ab516013SSerge E. Hallyn * License. 1025b21cb2SKirill Korotaev * 1125b21cb2SKirill Korotaev * Jun 2006 - namespaces support 1225b21cb2SKirill Korotaev * OpenVZ, SWsoft Inc. 1325b21cb2SKirill Korotaev * Pavel Emelianov <xemul@openvz.org> 14ab516013SSerge E. Hallyn */ 15ab516013SSerge E. Hallyn 165a0e3ad6STejun Heo #include <linux/slab.h> 17ab516013SSerge E. Hallyn #include <linux/module.h> 18ab516013SSerge E. Hallyn #include <linux/nsproxy.h> 190437eb59SSerge E. Hallyn #include <linux/init_task.h> 206b3286edSKirill Korotaev #include <linux/mnt_namespace.h> 214865ecf1SSerge E. Hallyn #include <linux/utsname.h> 229a575a92SCedric Le Goater #include <linux/pid_namespace.h> 239dd776b6SEric W. Biederman #include <net/net_namespace.h> 24ae5e1b22SPavel Emelyanov #include <linux/ipc_namespace.h> 250437eb59SSerge E. Hallyn 2698c0d07cSCedric Le Goater static struct kmem_cache *nsproxy_cachep; 2798c0d07cSCedric Le Goater 288467005dSAlexey Dobriyan struct nsproxy init_nsproxy = { 298467005dSAlexey Dobriyan .count = ATOMIC_INIT(1), 308467005dSAlexey Dobriyan .uts_ns = &init_uts_ns, 318467005dSAlexey Dobriyan #if defined(CONFIG_POSIX_MQUEUE) || defined(CONFIG_SYSVIPC) 328467005dSAlexey Dobriyan .ipc_ns = &init_ipc_ns, 338467005dSAlexey Dobriyan #endif 348467005dSAlexey Dobriyan .mnt_ns = NULL, 358467005dSAlexey Dobriyan .pid_ns = &init_pid_ns, 368467005dSAlexey Dobriyan #ifdef CONFIG_NET 378467005dSAlexey Dobriyan .net_ns = &init_net, 388467005dSAlexey Dobriyan #endif 398467005dSAlexey Dobriyan }; 40ab516013SSerge E. Hallyn 4190af90d7SAlexey Dobriyan static inline struct nsproxy *create_nsproxy(void) 42ab516013SSerge E. Hallyn { 4390af90d7SAlexey Dobriyan struct nsproxy *nsproxy; 44ab516013SSerge E. Hallyn 4590af90d7SAlexey Dobriyan nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL); 4690af90d7SAlexey Dobriyan if (nsproxy) 4790af90d7SAlexey Dobriyan atomic_set(&nsproxy->count, 1); 4890af90d7SAlexey Dobriyan return nsproxy; 49ab516013SSerge E. Hallyn } 50ab516013SSerge E. Hallyn 51ab516013SSerge E. Hallyn /* 52e3222c4eSBadari Pulavarty * Create new nsproxy and all of its the associated namespaces. 53e3222c4eSBadari Pulavarty * Return the newly created nsproxy. Do not attach this to the task, 54e3222c4eSBadari Pulavarty * leave it to the caller to do proper locking and attach it to task. 55ab516013SSerge E. Hallyn */ 56213dd266SEric W. Biederman static struct nsproxy *create_new_namespaces(unsigned long flags, 57213dd266SEric W. Biederman struct task_struct *tsk, struct fs_struct *new_fs) 58ab516013SSerge E. Hallyn { 59e3222c4eSBadari Pulavarty struct nsproxy *new_nsp; 60467e9f4bSCedric Le Goater int err; 61ab516013SSerge E. Hallyn 6290af90d7SAlexey Dobriyan new_nsp = create_nsproxy(); 63e3222c4eSBadari Pulavarty if (!new_nsp) 64e3222c4eSBadari Pulavarty return ERR_PTR(-ENOMEM); 651651e14eSSerge E. Hallyn 66e3222c4eSBadari Pulavarty new_nsp->mnt_ns = copy_mnt_ns(flags, tsk->nsproxy->mnt_ns, new_fs); 67467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->mnt_ns)) { 68467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->mnt_ns); 69e3222c4eSBadari Pulavarty goto out_ns; 70467e9f4bSCedric Le Goater } 71e3222c4eSBadari Pulavarty 72bb96a6f5SSerge E. Hallyn new_nsp->uts_ns = copy_utsname(flags, tsk); 73467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->uts_ns)) { 74467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->uts_ns); 75e3222c4eSBadari Pulavarty goto out_uts; 76467e9f4bSCedric Le Goater } 77e3222c4eSBadari Pulavarty 78e3222c4eSBadari Pulavarty new_nsp->ipc_ns = copy_ipcs(flags, tsk->nsproxy->ipc_ns); 79467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->ipc_ns)) { 80467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->ipc_ns); 81e3222c4eSBadari Pulavarty goto out_ipc; 82467e9f4bSCedric Le Goater } 83e3222c4eSBadari Pulavarty 842894d650SSukadev Bhattiprolu new_nsp->pid_ns = copy_pid_ns(flags, task_active_pid_ns(tsk)); 85467e9f4bSCedric Le Goater if (IS_ERR(new_nsp->pid_ns)) { 86467e9f4bSCedric Le Goater err = PTR_ERR(new_nsp->pid_ns); 87e3222c4eSBadari Pulavarty goto out_pid; 88467e9f4bSCedric Le Goater } 89e3222c4eSBadari Pulavarty 909dd776b6SEric W. Biederman new_nsp->net_ns = copy_net_ns(flags, tsk->nsproxy->net_ns); 919dd776b6SEric W. Biederman if (IS_ERR(new_nsp->net_ns)) { 929dd776b6SEric W. Biederman err = PTR_ERR(new_nsp->net_ns); 939dd776b6SEric W. Biederman goto out_net; 949dd776b6SEric W. Biederman } 959dd776b6SEric W. Biederman 96e3222c4eSBadari Pulavarty return new_nsp; 97e3222c4eSBadari Pulavarty 989dd776b6SEric W. Biederman out_net: 99acce292cSCedric Le Goater if (new_nsp->pid_ns) 100acce292cSCedric Le Goater put_pid_ns(new_nsp->pid_ns); 101e3222c4eSBadari Pulavarty out_pid: 102e3222c4eSBadari Pulavarty if (new_nsp->ipc_ns) 103e3222c4eSBadari Pulavarty put_ipc_ns(new_nsp->ipc_ns); 104e3222c4eSBadari Pulavarty out_ipc: 105e3222c4eSBadari Pulavarty if (new_nsp->uts_ns) 106e3222c4eSBadari Pulavarty put_uts_ns(new_nsp->uts_ns); 107e3222c4eSBadari Pulavarty out_uts: 108e3222c4eSBadari Pulavarty if (new_nsp->mnt_ns) 109e3222c4eSBadari Pulavarty put_mnt_ns(new_nsp->mnt_ns); 110e3222c4eSBadari Pulavarty out_ns: 11198c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, new_nsp); 112467e9f4bSCedric Le Goater return ERR_PTR(err); 113ab516013SSerge E. Hallyn } 114ab516013SSerge E. Hallyn 115ab516013SSerge E. Hallyn /* 116ab516013SSerge E. Hallyn * called from clone. This now handles copy for nsproxy and all 117ab516013SSerge E. Hallyn * namespaces therein. 118ab516013SSerge E. Hallyn */ 119213dd266SEric W. Biederman int copy_namespaces(unsigned long flags, struct task_struct *tsk) 120ab516013SSerge E. Hallyn { 121ab516013SSerge E. Hallyn struct nsproxy *old_ns = tsk->nsproxy; 1221651e14eSSerge E. Hallyn struct nsproxy *new_ns; 1231651e14eSSerge E. Hallyn int err = 0; 124ab516013SSerge E. Hallyn 125ab516013SSerge E. Hallyn if (!old_ns) 126ab516013SSerge E. Hallyn return 0; 127ab516013SSerge E. Hallyn 128ab516013SSerge E. Hallyn get_nsproxy(old_ns); 129ab516013SSerge E. Hallyn 13030e49c26SPavel Emelyanov if (!(flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 13118b6e041SSerge Hallyn CLONE_NEWPID | CLONE_NEWNET))) 132ab516013SSerge E. Hallyn return 0; 1331651e14eSSerge E. Hallyn 134e3222c4eSBadari Pulavarty if (!capable(CAP_SYS_ADMIN)) { 135e3222c4eSBadari Pulavarty err = -EPERM; 136e3222c4eSBadari Pulavarty goto out; 137e3222c4eSBadari Pulavarty } 138e3222c4eSBadari Pulavarty 13902fdb36aSSerge E. Hallyn /* 14002fdb36aSSerge E. Hallyn * CLONE_NEWIPC must detach from the undolist: after switching 14102fdb36aSSerge E. Hallyn * to a new ipc namespace, the semaphore arrays from the old 14202fdb36aSSerge E. Hallyn * namespace are unreachable. In clone parlance, CLONE_SYSVSEM 14302fdb36aSSerge E. Hallyn * means share undolist with parent, so we must forbid using 14402fdb36aSSerge E. Hallyn * it along with CLONE_NEWIPC. 14502fdb36aSSerge E. Hallyn */ 14602fdb36aSSerge E. Hallyn if ((flags & CLONE_NEWIPC) && (flags & CLONE_SYSVSEM)) { 14702fdb36aSSerge E. Hallyn err = -EINVAL; 14802fdb36aSSerge E. Hallyn goto out; 14902fdb36aSSerge E. Hallyn } 15002fdb36aSSerge E. Hallyn 151e3222c4eSBadari Pulavarty new_ns = create_new_namespaces(flags, tsk, tsk->fs); 152e3222c4eSBadari Pulavarty if (IS_ERR(new_ns)) { 153e3222c4eSBadari Pulavarty err = PTR_ERR(new_ns); 1541651e14eSSerge E. Hallyn goto out; 1551651e14eSSerge E. Hallyn } 1561651e14eSSerge E. Hallyn 1571651e14eSSerge E. Hallyn tsk->nsproxy = new_ns; 158858d72eaSSerge E. Hallyn 1591651e14eSSerge E. Hallyn out: 160444f378bSLinus Torvalds put_nsproxy(old_ns); 1611651e14eSSerge E. Hallyn return err; 162ab516013SSerge E. Hallyn } 163ab516013SSerge E. Hallyn 164ab516013SSerge E. Hallyn void free_nsproxy(struct nsproxy *ns) 165ab516013SSerge E. Hallyn { 1666b3286edSKirill Korotaev if (ns->mnt_ns) 1676b3286edSKirill Korotaev put_mnt_ns(ns->mnt_ns); 1684865ecf1SSerge E. Hallyn if (ns->uts_ns) 1694865ecf1SSerge E. Hallyn put_uts_ns(ns->uts_ns); 17025b21cb2SKirill Korotaev if (ns->ipc_ns) 17125b21cb2SKirill Korotaev put_ipc_ns(ns->ipc_ns); 1729a575a92SCedric Le Goater if (ns->pid_ns) 1739a575a92SCedric Le Goater put_pid_ns(ns->pid_ns); 1749dd776b6SEric W. Biederman put_net(ns->net_ns); 17598c0d07cSCedric Le Goater kmem_cache_free(nsproxy_cachep, ns); 176ab516013SSerge E. Hallyn } 177e3222c4eSBadari Pulavarty 178e3222c4eSBadari Pulavarty /* 179e3222c4eSBadari Pulavarty * Called from unshare. Unshare all the namespaces part of nsproxy. 1804e71e474SCedric Le Goater * On success, returns the new nsproxy. 181e3222c4eSBadari Pulavarty */ 182e3222c4eSBadari Pulavarty int unshare_nsproxy_namespaces(unsigned long unshare_flags, 183e3222c4eSBadari Pulavarty struct nsproxy **new_nsp, struct fs_struct *new_fs) 184e3222c4eSBadari Pulavarty { 185e3222c4eSBadari Pulavarty int err = 0; 186e3222c4eSBadari Pulavarty 18777ec739dSSerge E. Hallyn if (!(unshare_flags & (CLONE_NEWNS | CLONE_NEWUTS | CLONE_NEWIPC | 18818b6e041SSerge Hallyn CLONE_NEWNET))) 189e3222c4eSBadari Pulavarty return 0; 190e3222c4eSBadari Pulavarty 191e3222c4eSBadari Pulavarty if (!capable(CAP_SYS_ADMIN)) 192e3222c4eSBadari Pulavarty return -EPERM; 193e3222c4eSBadari Pulavarty 194e3222c4eSBadari Pulavarty *new_nsp = create_new_namespaces(unshare_flags, current, 195e3222c4eSBadari Pulavarty new_fs ? new_fs : current->fs); 196858d72eaSSerge E. Hallyn if (IS_ERR(*new_nsp)) { 197e3222c4eSBadari Pulavarty err = PTR_ERR(*new_nsp); 198858d72eaSSerge E. Hallyn goto out; 199858d72eaSSerge E. Hallyn } 200858d72eaSSerge E. Hallyn 201e885dcdeSSerge E. Hallyn err = ns_cgroup_clone(current, task_pid(current)); 202858d72eaSSerge E. Hallyn if (err) 203858d72eaSSerge E. Hallyn put_nsproxy(*new_nsp); 204858d72eaSSerge E. Hallyn 205858d72eaSSerge E. Hallyn out: 206e3222c4eSBadari Pulavarty return err; 207e3222c4eSBadari Pulavarty } 20898c0d07cSCedric Le Goater 209cf7b708cSPavel Emelyanov void switch_task_namespaces(struct task_struct *p, struct nsproxy *new) 210cf7b708cSPavel Emelyanov { 211cf7b708cSPavel Emelyanov struct nsproxy *ns; 212cf7b708cSPavel Emelyanov 213cf7b708cSPavel Emelyanov might_sleep(); 214cf7b708cSPavel Emelyanov 215cf7b708cSPavel Emelyanov ns = p->nsproxy; 216cf7b708cSPavel Emelyanov 217cf7b708cSPavel Emelyanov rcu_assign_pointer(p->nsproxy, new); 218cf7b708cSPavel Emelyanov 219cf7b708cSPavel Emelyanov if (ns && atomic_dec_and_test(&ns->count)) { 220cf7b708cSPavel Emelyanov /* 221cf7b708cSPavel Emelyanov * wait for others to get what they want from this nsproxy. 222cf7b708cSPavel Emelyanov * 223cf7b708cSPavel Emelyanov * cannot release this nsproxy via the call_rcu() since 224cf7b708cSPavel Emelyanov * put_mnt_ns() will want to sleep 225cf7b708cSPavel Emelyanov */ 226cf7b708cSPavel Emelyanov synchronize_rcu(); 227cf7b708cSPavel Emelyanov free_nsproxy(ns); 228cf7b708cSPavel Emelyanov } 229cf7b708cSPavel Emelyanov } 230cf7b708cSPavel Emelyanov 231cf7b708cSPavel Emelyanov void exit_task_namespaces(struct task_struct *p) 232cf7b708cSPavel Emelyanov { 233cf7b708cSPavel Emelyanov switch_task_namespaces(p, NULL); 234cf7b708cSPavel Emelyanov } 235cf7b708cSPavel Emelyanov 23698c0d07cSCedric Le Goater static int __init nsproxy_cache_init(void) 23798c0d07cSCedric Le Goater { 238db8906daSPavel Emelyanov nsproxy_cachep = KMEM_CACHE(nsproxy, SLAB_PANIC); 23998c0d07cSCedric Le Goater return 0; 24098c0d07cSCedric Le Goater } 24198c0d07cSCedric Le Goater 24298c0d07cSCedric Le Goater module_init(nsproxy_cache_init); 243