1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds * Generic pidhash and scalable, time-bounded PID allocator
41da177e4SLinus Torvalds *
56d49e352SNadia Yvette Chambers * (C) 2002-2003 Nadia Yvette Chambers, IBM
66d49e352SNadia Yvette Chambers * (C) 2004 Nadia Yvette Chambers, Oracle
71da177e4SLinus Torvalds * (C) 2002-2004 Ingo Molnar, Red Hat
81da177e4SLinus Torvalds *
91da177e4SLinus Torvalds * pid-structures are backing objects for tasks sharing a given ID to chain
101da177e4SLinus Torvalds * against. There is very little to them aside from hashing them and
111da177e4SLinus Torvalds * parking tasks using given ID's on a list.
121da177e4SLinus Torvalds *
131da177e4SLinus Torvalds * The hash is always changed with the tasklist_lock write-acquired,
141da177e4SLinus Torvalds * and the hash is only accessed with the tasklist_lock at least
151da177e4SLinus Torvalds * read-acquired, so there's no additional SMP locking needed here.
161da177e4SLinus Torvalds *
171da177e4SLinus Torvalds * We have a list of bitmap pages, which bitmaps represent the PID space.
181da177e4SLinus Torvalds * Allocating and freeing PIDs is completely lockless. The worst-case
191da177e4SLinus Torvalds * allocation scenario when all but one out of 1 million PIDs possible are
201da177e4SLinus Torvalds * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
211da177e4SLinus Torvalds * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
2230e49c26SPavel Emelyanov *
2330e49c26SPavel Emelyanov * Pid namespaces:
2430e49c26SPavel Emelyanov * (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
2530e49c26SPavel Emelyanov * (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
2630e49c26SPavel Emelyanov * Many thanks to Oleg Nesterov for comments and help
2730e49c26SPavel Emelyanov *
281da177e4SLinus Torvalds */
291da177e4SLinus Torvalds
301da177e4SLinus Torvalds #include <linux/mm.h>
319984de1aSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/slab.h>
331da177e4SLinus Torvalds #include <linux/init.h>
3482524746SFranck Bui-Huu #include <linux/rculist.h>
3557c8a661SMike Rapoport #include <linux/memblock.h>
3661a58c6cSSukadev Bhattiprolu #include <linux/pid_namespace.h>
37820e45dbSSukadev Bhattiprolu #include <linux/init_task.h>
383eb07c8cSSukadev Bhattiprolu #include <linux/syscalls.h>
390bb80f24SDavid Howells #include <linux/proc_ns.h>
40f57e515aSJoel Fernandes (Google) #include <linux/refcount.h>
4132fcb426SChristian Brauner #include <linux/anon_inodes.h>
4232fcb426SChristian Brauner #include <linux/sched/signal.h>
4329930025SIngo Molnar #include <linux/sched/task.h>
4495846ecfSGargi Sharma #include <linux/idr.h>
454969f8a0SKees Cook #include <net/sock.h>
466da73d15SChristian Brauner #include <uapi/linux/pidfd.h>
471da177e4SLinus Torvalds
48e1e871afSDavid Howells struct pid init_struct_pid = {
49f57e515aSJoel Fernandes (Google) .count = REFCOUNT_INIT(1),
50e1e871afSDavid Howells .tasks = {
51e1e871afSDavid Howells { .first = NULL },
52e1e871afSDavid Howells { .first = NULL },
53e1e871afSDavid Howells { .first = NULL },
54e1e871afSDavid Howells },
55e1e871afSDavid Howells .level = 0,
56e1e871afSDavid Howells .numbers = { {
57e1e871afSDavid Howells .nr = 0,
58e1e871afSDavid Howells .ns = &init_pid_ns,
59e1e871afSDavid Howells }, }
60e1e871afSDavid Howells };
611da177e4SLinus Torvalds
621da177e4SLinus Torvalds int pid_max = PID_MAX_DEFAULT;
631da177e4SLinus Torvalds
641da177e4SLinus Torvalds #define RESERVED_PIDS 300
651da177e4SLinus Torvalds
661da177e4SLinus Torvalds int pid_max_min = RESERVED_PIDS + 1;
671da177e4SLinus Torvalds int pid_max_max = PID_MAX_LIMIT;
681da177e4SLinus Torvalds
691da177e4SLinus Torvalds /*
701da177e4SLinus Torvalds * PID-map pages start out as NULL, they get allocated upon
711da177e4SLinus Torvalds * first use and are never deallocated. This way a low pid_max
721da177e4SLinus Torvalds * value does not cause lots of bitmaps to be allocated, but
731da177e4SLinus Torvalds * the scheme scales to up to 4 million PIDs, runtime.
741da177e4SLinus Torvalds */
7561a58c6cSSukadev Bhattiprolu struct pid_namespace init_pid_ns = {
768eb71d95SKirill Tkhai .ns.count = REFCOUNT_INIT(2),
77f6bb2a2cSMatthew Wilcox .idr = IDR_INIT(init_pid_ns.idr),
78e8cfbc24SGargi Sharma .pid_allocated = PIDNS_ADDING,
79faacbfd3SPavel Emelyanov .level = 0,
80faacbfd3SPavel Emelyanov .child_reaper = &init_task,
8149f4d8b9SEric W. Biederman .user_ns = &init_user_ns,
82435d5f4bSAl Viro .ns.inum = PROC_PID_INIT_INO,
8333c42940SAl Viro #ifdef CONFIG_PID_NS
8433c42940SAl Viro .ns.ops = &pidns_operations,
8533c42940SAl Viro #endif
863fbc9648SSukadev Bhattiprolu #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
87198fe21bSPavel Emelyanov .memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
881da177e4SLinus Torvalds #endif
8992476d7fSEric W. Biederman };
9092476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(init_pid_ns);
9192476d7fSEric W. Biederman
9292476d7fSEric W. Biederman /*
9392476d7fSEric W. Biederman * Note: disable interrupts while the pidmap_lock is held as an
9492476d7fSEric W. Biederman * interrupt might come in and do read_lock(&tasklist_lock).
9592476d7fSEric W. Biederman *
9692476d7fSEric W. Biederman * If we don't disable interrupts there is a nasty deadlock between
9792476d7fSEric W. Biederman * detach_pid()->free_pid() and another cpu that does
9892476d7fSEric W. Biederman * spin_lock(&pidmap_lock) followed by an interrupt routine that does
9992476d7fSEric W. Biederman * read_lock(&tasklist_lock);
10092476d7fSEric W. Biederman *
10192476d7fSEric W. Biederman * After we clean up the tasklist_lock and know there are no
1023fbc9648SSukadev Bhattiprolu * irq handlers that take it we can leave the interrupts enabled.
1031da177e4SLinus Torvalds * For now it is easier to be safe than to prove it can't happen.
1041da177e4SLinus Torvalds */
1057ad5b3a5SHarvey Harrison
10692476d7fSEric W. Biederman static __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
107baf8f0f8SPavel Emelianov
put_pid(struct pid * pid)108baf8f0f8SPavel Emelianov void put_pid(struct pid *pid)
10992476d7fSEric W. Biederman {
11092476d7fSEric W. Biederman struct pid_namespace *ns;
111baf8f0f8SPavel Emelianov
1128ef047aaSPavel Emelyanov if (!pid)
113f57e515aSJoel Fernandes (Google) return;
114baf8f0f8SPavel Emelianov
1158ef047aaSPavel Emelyanov ns = pid->numbers[pid->level].ns;
1168ef047aaSPavel Emelyanov if (refcount_dec_and_test(&pid->count)) {
11792476d7fSEric W. Biederman kmem_cache_free(ns->pid_cachep, pid);
118bbf73147SEric W. Biederman put_pid_ns(ns);
11992476d7fSEric W. Biederman }
12092476d7fSEric W. Biederman }
12192476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(put_pid);
12292476d7fSEric W. Biederman
delayed_put_pid(struct rcu_head * rhp)12392476d7fSEric W. Biederman static void delayed_put_pid(struct rcu_head *rhp)
12492476d7fSEric W. Biederman {
12592476d7fSEric W. Biederman struct pid *pid = container_of(rhp, struct pid, rcu);
1267ad5b3a5SHarvey Harrison put_pid(pid);
12792476d7fSEric W. Biederman }
12892476d7fSEric W. Biederman
free_pid(struct pid * pid)1298ef047aaSPavel Emelyanov void free_pid(struct pid *pid)
13092476d7fSEric W. Biederman {
13192476d7fSEric W. Biederman /* We can be called with write_lock_irq(&tasklist_lock) held */
13292476d7fSEric W. Biederman int i;
1330a01f2ccSEric W. Biederman unsigned long flags;
1340a01f2ccSEric W. Biederman
135af4b8a83SEric W. Biederman spin_lock_irqsave(&pidmap_lock, flags);
136e8cfbc24SGargi Sharma for (i = 0; i <= pid->level; i++) {
137a6064885SEric W. Biederman struct upid *upid = pid->numbers + i;
138af4b8a83SEric W. Biederman struct pid_namespace *ns = upid->ns;
139af4b8a83SEric W. Biederman switch (--ns->pid_allocated) {
140af4b8a83SEric W. Biederman case 2:
141af4b8a83SEric W. Biederman case 1:
142af4b8a83SEric W. Biederman /* When all that is left in the pid namespace
143af4b8a83SEric W. Biederman * is the reaper wake up the reaper. The reaper
144af4b8a83SEric W. Biederman * may be sleeping in zap_pid_ns_processes().
145e8cfbc24SGargi Sharma */
146314a8ad0SOleg Nesterov wake_up_process(ns->child_reaper);
147314a8ad0SOleg Nesterov break;
148e8cfbc24SGargi Sharma case PIDNS_ADDING:
149af4b8a83SEric W. Biederman /* Handle a fork failure of the first process */
1500a01f2ccSEric W. Biederman WARN_ON(ns->child_reaper);
15195846ecfSGargi Sharma ns->pid_allocated = 0;
15295846ecfSGargi Sharma break;
1535e1182deSEric W. Biederman }
15492476d7fSEric W. Biederman
15592476d7fSEric W. Biederman idr_remove(&ns->idr, upid->nr);
15692476d7fSEric W. Biederman }
15792476d7fSEric W. Biederman spin_unlock_irqrestore(&pidmap_lock, flags);
15892476d7fSEric W. Biederman
15949cb2fc4SAdrian Reber call_rcu(&pid->rcu, delayed_put_pid);
16049cb2fc4SAdrian Reber }
16192476d7fSEric W. Biederman
alloc_pid(struct pid_namespace * ns,pid_t * set_tid,size_t set_tid_size)16292476d7fSEric W. Biederman struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
16392476d7fSEric W. Biederman size_t set_tid_size)
1648ef047aaSPavel Emelyanov {
1658ef047aaSPavel Emelyanov struct pid *pid;
166198fe21bSPavel Emelyanov enum pid_type type;
16735f71bc0SMichal Hocko int i, nr;
16892476d7fSEric W. Biederman struct pid_namespace *tmp;
16949cb2fc4SAdrian Reber struct upid *upid;
17049cb2fc4SAdrian Reber int retval = -ENOMEM;
17149cb2fc4SAdrian Reber
17249cb2fc4SAdrian Reber /*
17349cb2fc4SAdrian Reber * set_tid_size contains the size of the set_tid array. Starting at
17449cb2fc4SAdrian Reber * the most nested currently active PID namespace it tells alloc_pid()
17549cb2fc4SAdrian Reber * which PID to set for a process in that most nested PID namespace
17649cb2fc4SAdrian Reber * up to set_tid_size PID namespaces. It does not have to set the PID
17749cb2fc4SAdrian Reber * for a process in all nested PID namespaces but set_tid_size must
17849cb2fc4SAdrian Reber * never be greater than the current ns->level + 1.
17949cb2fc4SAdrian Reber */
180baf8f0f8SPavel Emelianov if (set_tid_size > ns->level + 1)
18192476d7fSEric W. Biederman return ERR_PTR(-EINVAL);
18235f71bc0SMichal Hocko
18392476d7fSEric W. Biederman pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
1848ef047aaSPavel Emelyanov if (!pid)
1850a01f2ccSEric W. Biederman return ERR_PTR(retval);
18695846ecfSGargi Sharma
1878ef047aaSPavel Emelyanov tmp = ns;
18849cb2fc4SAdrian Reber pid->level = ns->level;
18949cb2fc4SAdrian Reber
19049cb2fc4SAdrian Reber for (i = ns->level; i >= 0; i--) {
19149cb2fc4SAdrian Reber int tid = 0;
19249cb2fc4SAdrian Reber
19349cb2fc4SAdrian Reber if (set_tid_size) {
19449cb2fc4SAdrian Reber tid = set_tid[ns->level - i];
19549cb2fc4SAdrian Reber
19649cb2fc4SAdrian Reber retval = -EINVAL;
19749cb2fc4SAdrian Reber if (tid < 1 || tid >= pid_max)
19849cb2fc4SAdrian Reber goto out_free;
19949cb2fc4SAdrian Reber /*
20049cb2fc4SAdrian Reber * Also fail if a PID != 1 is requested and
20149cb2fc4SAdrian Reber * no PID 1 exists.
20249cb2fc4SAdrian Reber */
2031caef81dSAdrian Reber if (tid != 1 && !tmp->child_reaper)
20449cb2fc4SAdrian Reber goto out_free;
20549cb2fc4SAdrian Reber retval = -EPERM;
20649cb2fc4SAdrian Reber if (!checkpoint_restore_ns_capable(tmp->user_ns))
20795846ecfSGargi Sharma goto out_free;
20895846ecfSGargi Sharma set_tid_size--;
20995846ecfSGargi Sharma }
21095846ecfSGargi Sharma
21149cb2fc4SAdrian Reber idr_preload(GFP_KERNEL);
21249cb2fc4SAdrian Reber spin_lock_irq(&pidmap_lock);
21349cb2fc4SAdrian Reber
21495846ecfSGargi Sharma if (tid) {
21549cb2fc4SAdrian Reber nr = idr_alloc(&tmp->idr, NULL, tid,
21649cb2fc4SAdrian Reber tid + 1, GFP_ATOMIC);
21749cb2fc4SAdrian Reber /*
21849cb2fc4SAdrian Reber * If ENOSPC is returned it means that the PID is
21949cb2fc4SAdrian Reber * alreay in use. Return EEXIST in that case.
22049cb2fc4SAdrian Reber */
22149cb2fc4SAdrian Reber if (nr == -ENOSPC)
22249cb2fc4SAdrian Reber nr = -EEXIST;
22349cb2fc4SAdrian Reber } else {
22449cb2fc4SAdrian Reber int pid_min = 1;
22595846ecfSGargi Sharma /*
22695846ecfSGargi Sharma * init really needs pid 1, but after reaching the
22795846ecfSGargi Sharma * maximum wrap back to RESERVED_PIDS
22895846ecfSGargi Sharma */
22995846ecfSGargi Sharma if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
23095846ecfSGargi Sharma pid_min = RESERVED_PIDS;
23195846ecfSGargi Sharma
23295846ecfSGargi Sharma /*
23395846ecfSGargi Sharma * Store a null pointer so find_pid_ns does not find
23495846ecfSGargi Sharma * a partially initialized PID (see below).
23549cb2fc4SAdrian Reber */
23695846ecfSGargi Sharma nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
23795846ecfSGargi Sharma pid_max, GFP_ATOMIC);
23895846ecfSGargi Sharma }
239287980e4SArnd Bergmann spin_unlock_irq(&pidmap_lock);
240f83606f5SKJ Tsanaktsidis idr_preload_end();
24192476d7fSEric W. Biederman
24235f71bc0SMichal Hocko if (nr < 0) {
24392476d7fSEric W. Biederman retval = (nr == -ENOSPC) ? -EAGAIN : nr;
2448ef047aaSPavel Emelyanov goto out_free;
2458ef047aaSPavel Emelyanov }
2468ef047aaSPavel Emelyanov
2478ef047aaSPavel Emelyanov pid->numbers[i].nr = nr;
2488ef047aaSPavel Emelyanov pid->numbers[i].ns = tmp;
24910dab84cSChristian Brauner tmp = tmp->parent;
25010dab84cSChristian Brauner }
25110dab84cSChristian Brauner
25210dab84cSChristian Brauner /*
25310dab84cSChristian Brauner * ENOMEM is not the most obvious choice especially for the case
25410dab84cSChristian Brauner * where the child subreaper has already exited and the pid
25510dab84cSChristian Brauner * namespace denies the creation of any new processes. But ENOMEM
25610dab84cSChristian Brauner * is what we have exposed to userspace for a long time and it is
257b26ebfe1SCorey Minyard * documented behavior for pid namespaces. So we can't easily
258b26ebfe1SCorey Minyard * change it even if there were an error code better suited.
2598ef047aaSPavel Emelyanov */
260f57e515aSJoel Fernandes (Google) retval = -ENOMEM;
26163f818f4SEric W. Biederman
26292476d7fSEric W. Biederman get_pid_ns(ns);
26392476d7fSEric W. Biederman refcount_set(&pid->count, 1);
26492476d7fSEric W. Biederman spin_lock_init(&pid->lock);
265b53b0b9dSJoel Fernandes (Google) for (type = 0; type < PIDTYPE_MAX; ++type)
2667bc3e6e5SEric W. Biederman INIT_HLIST_HEAD(&pid->tasks[type]);
267b53b0b9dSJoel Fernandes (Google)
268417e3152SAndré Goddard Rosa init_waitqueue_head(&pid->wait_pidfd);
26992476d7fSEric W. Biederman INIT_HLIST_HEAD(&pid->inodes);
270e8cfbc24SGargi Sharma
2715e1182deSEric W. Biederman upid = pid->numbers + ns->level;
2720a01f2ccSEric W. Biederman spin_lock_irq(&pidmap_lock);
27395846ecfSGargi Sharma if (!(ns->pid_allocated & PIDNS_ADDING))
27495846ecfSGargi Sharma goto out_unlock;
275e8cfbc24SGargi Sharma for ( ; upid >= pid->numbers; --upid) {
2760a01f2ccSEric W. Biederman /* Make the PID visible to find_pid_ns. */
27792476d7fSEric W. Biederman idr_replace(&upid->ns->idr, pid, upid->nr);
27892476d7fSEric W. Biederman upid->ns->pid_allocated++;
27992476d7fSEric W. Biederman }
28092476d7fSEric W. Biederman spin_unlock_irq(&pidmap_lock);
2815e1182deSEric W. Biederman
2826e666884SEric W. Biederman return pid;
28324c037ebSOleg Nesterov
28424c037ebSOleg Nesterov out_unlock:
28592476d7fSEric W. Biederman spin_unlock_irq(&pidmap_lock);
28695846ecfSGargi Sharma put_pid_ns(ns);
2871a80dadeSMatthew Wilcox
2881a80dadeSMatthew Wilcox out_free:
2891a80dadeSMatthew Wilcox spin_lock_irq(&pidmap_lock);
2901a80dadeSMatthew Wilcox while (++i <= ns->level) {
29195846ecfSGargi Sharma upid = pid->numbers + i;
292c0ee5549SEric W. Biederman idr_remove(&upid->ns->idr, upid->nr);
293c0ee5549SEric W. Biederman }
294c0ee5549SEric W. Biederman
295c0ee5549SEric W. Biederman /* On failure to allocate the first pid, reset the state */
29695846ecfSGargi Sharma if (ns->pid_allocated == PIDNS_ADDING)
2978ef047aaSPavel Emelyanov idr_set_cursor(&ns->idr, 0);
298baf8f0f8SPavel Emelianov
29935f71bc0SMichal Hocko spin_unlock_irq(&pidmap_lock);
30092476d7fSEric W. Biederman
30192476d7fSEric W. Biederman kmem_cache_free(ns->pid_cachep, pid);
302c876ad76SEric W. Biederman return ERR_PTR(retval);
303c876ad76SEric W. Biederman }
304c876ad76SEric W. Biederman
disable_pid_allocation(struct pid_namespace * ns)305e8cfbc24SGargi Sharma void disable_pid_allocation(struct pid_namespace *ns)
306c876ad76SEric W. Biederman {
307c876ad76SEric W. Biederman spin_lock_irq(&pidmap_lock);
308c876ad76SEric W. Biederman ns->pid_allocated &= ~PIDNS_ADDING;
3097ad5b3a5SHarvey Harrison spin_unlock_irq(&pidmap_lock);
3101da177e4SLinus Torvalds }
311e8cfbc24SGargi Sharma
find_pid_ns(int nr,struct pid_namespace * ns)3121da177e4SLinus Torvalds struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
313198fe21bSPavel Emelyanov {
3141da177e4SLinus Torvalds return idr_find(&ns->idr, nr);
3158990571eSPavel Emelyanov }
3168990571eSPavel Emelyanov EXPORT_SYMBOL_GPL(find_pid_ns);
31717cf22c3SEric W. Biederman
find_vpid(int nr)3188990571eSPavel Emelyanov struct pid *find_vpid(int nr)
3198990571eSPavel Emelyanov {
3208990571eSPavel Emelyanov return find_pid_ns(nr, task_active_pid_ns(current));
3212c470475SEric W. Biederman }
3222c470475SEric W. Biederman EXPORT_SYMBOL_GPL(find_vpid);
3232c470475SEric W. Biederman
task_pid_ptr(struct task_struct * task,enum pid_type type)3242c470475SEric W. Biederman static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
3252c470475SEric W. Biederman {
3262c470475SEric W. Biederman return (type == PIDTYPE_PID) ?
3272c470475SEric W. Biederman &task->thread_pid :
328e713d0daSSukadev Bhattiprolu &task->signal->pids[type];
329e713d0daSSukadev Bhattiprolu }
330e713d0daSSukadev Bhattiprolu
33181907739SOleg Nesterov /*
3321da177e4SLinus Torvalds * attach_pid() must be called with the tasklist_lock write-held.
3332c470475SEric W. Biederman */
attach_pid(struct task_struct * task,enum pid_type type)3342c470475SEric W. Biederman void attach_pid(struct task_struct *task, enum pid_type type)
3351da177e4SLinus Torvalds {
3361da177e4SLinus Torvalds struct pid *pid = *task_pid_ptr(task, type);
33724336eaeSOleg Nesterov hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
33824336eaeSOleg Nesterov }
3391da177e4SLinus Torvalds
__change_pid(struct task_struct * task,enum pid_type type,struct pid * new)3402c470475SEric W. Biederman static void __change_pid(struct task_struct *task, enum pid_type type,
34192476d7fSEric W. Biederman struct pid *new)
34292476d7fSEric W. Biederman {
3431da177e4SLinus Torvalds struct pid **pid_ptr = task_pid_ptr(task, type);
3442c470475SEric W. Biederman struct pid *pid;
34592476d7fSEric W. Biederman int tmp;
3462c470475SEric W. Biederman
3472c470475SEric W. Biederman pid = *pid_ptr;
3481da177e4SLinus Torvalds
3491da177e4SLinus Torvalds hlist_del_rcu(&task->pid_links[type]);
3501d416a11SChristian Brauner *pid_ptr = new;
3511da177e4SLinus Torvalds
3521da177e4SLinus Torvalds for (tmp = PIDTYPE_MAX; --tmp >= 0; )
35392476d7fSEric W. Biederman if (pid_has_task(pid, tmp))
3541da177e4SLinus Torvalds return;
3551da177e4SLinus Torvalds
35624336eaeSOleg Nesterov free_pid(pid);
35724336eaeSOleg Nesterov }
35824336eaeSOleg Nesterov
detach_pid(struct task_struct * task,enum pid_type type)35924336eaeSOleg Nesterov void detach_pid(struct task_struct *task, enum pid_type type)
36024336eaeSOleg Nesterov {
36124336eaeSOleg Nesterov __change_pid(task, type, NULL);
36224336eaeSOleg Nesterov }
36324336eaeSOleg Nesterov
change_pid(struct task_struct * task,enum pid_type type,struct pid * pid)36424336eaeSOleg Nesterov void change_pid(struct task_struct *task, enum pid_type type,
36581907739SOleg Nesterov struct pid *pid)
36624336eaeSOleg Nesterov {
36724336eaeSOleg Nesterov __change_pid(task, type, pid);
3686b03d130SEric W. Biederman attach_pid(task, type);
3696b03d130SEric W. Biederman }
3706b03d130SEric W. Biederman
exchange_tids(struct task_struct * left,struct task_struct * right)3716b03d130SEric W. Biederman void exchange_tids(struct task_struct *left, struct task_struct *right)
3726b03d130SEric W. Biederman {
3736b03d130SEric W. Biederman struct pid *pid1 = left->thread_pid;
3746b03d130SEric W. Biederman struct pid *pid2 = right->thread_pid;
3756b03d130SEric W. Biederman struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
3766b03d130SEric W. Biederman struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
3776b03d130SEric W. Biederman
3786b03d130SEric W. Biederman /* Swap the single entry tid lists */
3796b03d130SEric W. Biederman hlists_swap_heads_rcu(head1, head2);
3806b03d130SEric W. Biederman
3816b03d130SEric W. Biederman /* Swap the per task_struct pid */
3826b03d130SEric W. Biederman rcu_assign_pointer(left->thread_pid, pid2);
3836b03d130SEric W. Biederman rcu_assign_pointer(right->thread_pid, pid1);
3846b03d130SEric W. Biederman
3856b03d130SEric W. Biederman /* Swap the cached value */
3866b03d130SEric W. Biederman WRITE_ONCE(left->pid, pid_nr(pid2));
387c18258c6SEric W. Biederman WRITE_ONCE(right->pid, pid_nr(pid1));
3887ad5b3a5SHarvey Harrison }
389c18258c6SEric W. Biederman
390c18258c6SEric W. Biederman /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
transfer_pid(struct task_struct * old,struct task_struct * new,enum pid_type type)3912c470475SEric W. Biederman void transfer_pid(struct task_struct *old, struct task_struct *new,
3922c470475SEric W. Biederman enum pid_type type)
3932c470475SEric W. Biederman {
394c18258c6SEric W. Biederman if (type == PIDTYPE_PID)
395c18258c6SEric W. Biederman new->thread_pid = old->thread_pid;
3967ad5b3a5SHarvey Harrison hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
39792476d7fSEric W. Biederman }
39892476d7fSEric W. Biederman
pid_task(struct pid * pid,enum pid_type type)39992476d7fSEric W. Biederman struct task_struct *pid_task(struct pid *pid, enum pid_type type)
40092476d7fSEric W. Biederman {
40167bdbffdSArnd Bergmann struct task_struct *result = NULL;
402db1466b3SPaul E. McKenney if (pid) {
40392476d7fSEric W. Biederman struct hlist_node *first;
4042c470475SEric W. Biederman first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
40592476d7fSEric W. Biederman lockdep_tasklist_lock_is_held());
40692476d7fSEric W. Biederman if (first)
40792476d7fSEric W. Biederman result = hlist_entry(first, struct task_struct, pid_links[(type)]);
408eccba068SPavel Emelyanov }
40992476d7fSEric W. Biederman return result;
41092476d7fSEric W. Biederman }
4119728e5d6STetsuo Handa EXPORT_SYMBOL(pid_task);
41292476d7fSEric W. Biederman
41317f98dcfSChristoph Hellwig /*
4141da177e4SLinus Torvalds * Must be called under rcu_read_lock().
415f78f5b90SPaul E. McKenney */
find_task_by_pid_ns(pid_t nr,struct pid_namespace * ns)416f78f5b90SPaul E. McKenney struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
41717f98dcfSChristoph Hellwig {
4181da177e4SLinus Torvalds RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
4191da177e4SLinus Torvalds "find_task_by_pid_ns() needs rcu_read_lock() protection");
420228ebcbeSPavel Emelyanov return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
421228ebcbeSPavel Emelyanov }
42217cf22c3SEric W. Biederman
find_task_by_vpid(pid_t vnr)423228ebcbeSPavel Emelyanov struct task_struct *find_task_by_vpid(pid_t vnr)
424228ebcbeSPavel Emelyanov {
4252ee08260SMike Rapoport return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
4262ee08260SMike Rapoport }
4272ee08260SMike Rapoport
find_get_task_by_vpid(pid_t nr)4282ee08260SMike Rapoport struct task_struct *find_get_task_by_vpid(pid_t nr)
4292ee08260SMike Rapoport {
4302ee08260SMike Rapoport struct task_struct *task;
4312ee08260SMike Rapoport
4322ee08260SMike Rapoport rcu_read_lock();
4332ee08260SMike Rapoport task = find_task_by_vpid(nr);
4342ee08260SMike Rapoport if (task)
4352ee08260SMike Rapoport get_task_struct(task);
4362ee08260SMike Rapoport rcu_read_unlock();
4372ee08260SMike Rapoport
4381a657f78SOleg Nesterov return task;
4391a657f78SOleg Nesterov }
4401a657f78SOleg Nesterov
get_task_pid(struct task_struct * task,enum pid_type type)4411a657f78SOleg Nesterov struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
4422c470475SEric W. Biederman {
4431a657f78SOleg Nesterov struct pid *pid;
4441a657f78SOleg Nesterov rcu_read_lock();
4451a657f78SOleg Nesterov pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
44677c100c8SRik van Riel rcu_read_unlock();
4471a657f78SOleg Nesterov return pid;
4487ad5b3a5SHarvey Harrison }
44992476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(get_task_pid);
45092476d7fSEric W. Biederman
get_pid_task(struct pid * pid,enum pid_type type)45192476d7fSEric W. Biederman struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
45292476d7fSEric W. Biederman {
45392476d7fSEric W. Biederman struct task_struct *result;
45492476d7fSEric W. Biederman rcu_read_lock();
45592476d7fSEric W. Biederman result = pid_task(pid, type);
45692476d7fSEric W. Biederman if (result)
45792476d7fSEric W. Biederman get_task_struct(result);
45877c100c8SRik van Riel rcu_read_unlock();
45992476d7fSEric W. Biederman return result;
46092476d7fSEric W. Biederman }
46192476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(get_pid_task);
46292476d7fSEric W. Biederman
find_get_pid(pid_t nr)46392476d7fSEric W. Biederman struct pid *find_get_pid(pid_t nr)
46492476d7fSEric W. Biederman {
465198fe21bSPavel Emelyanov struct pid *pid;
46692476d7fSEric W. Biederman
46792476d7fSEric W. Biederman rcu_read_lock();
46892476d7fSEric W. Biederman pid = get_pid(find_vpid(nr));
46992476d7fSEric W. Biederman rcu_read_unlock();
470339caf2aSDavid Sterba
47192476d7fSEric W. Biederman return pid;
4727af57294SPavel Emelyanov }
4737af57294SPavel Emelyanov EXPORT_SYMBOL_GPL(find_get_pid);
4747af57294SPavel Emelyanov
pid_nr_ns(struct pid * pid,struct pid_namespace * ns)4757af57294SPavel Emelyanov pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
4767af57294SPavel Emelyanov {
4777af57294SPavel Emelyanov struct upid *upid;
4787af57294SPavel Emelyanov pid_t nr = 0;
4797af57294SPavel Emelyanov
4807af57294SPavel Emelyanov if (pid && ns->level <= pid->level) {
4817af57294SPavel Emelyanov upid = &pid->numbers[ns->level];
4827af57294SPavel Emelyanov if (upid->ns == ns)
4837af57294SPavel Emelyanov nr = upid->nr;
4844f82f457SEric W. Biederman }
4857af57294SPavel Emelyanov return nr;
48644c4e1b2SEric W. Biederman }
48744c4e1b2SEric W. Biederman EXPORT_SYMBOL_GPL(pid_nr_ns);
48817cf22c3SEric W. Biederman
pid_vnr(struct pid * pid)48944c4e1b2SEric W. Biederman pid_t pid_vnr(struct pid *pid)
49044c4e1b2SEric W. Biederman {
49144c4e1b2SEric W. Biederman return pid_nr_ns(pid, task_active_pid_ns(current));
49252ee2dfdSOleg Nesterov }
49352ee2dfdSOleg Nesterov EXPORT_SYMBOL_GPL(pid_vnr);
4942f2a3a46SPavel Emelyanov
__task_pid_nr_ns(struct task_struct * task,enum pid_type type,struct pid_namespace * ns)49552ee2dfdSOleg Nesterov pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
49652ee2dfdSOleg Nesterov struct pid_namespace *ns)
49752ee2dfdSOleg Nesterov {
49852ee2dfdSOleg Nesterov pid_t nr = 0;
49917cf22c3SEric W. Biederman
5002c470475SEric W. Biederman rcu_read_lock();
50152ee2dfdSOleg Nesterov if (!ns)
50252ee2dfdSOleg Nesterov ns = task_active_pid_ns(current);
50352ee2dfdSOleg Nesterov nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
50452ee2dfdSOleg Nesterov rcu_read_unlock();
50552ee2dfdSOleg Nesterov
5062f2a3a46SPavel Emelyanov return nr;
50761bce0f1SEric W. Biederman }
50861bce0f1SEric W. Biederman EXPORT_SYMBOL(__task_pid_nr_ns);
50961bce0f1SEric W. Biederman
task_active_pid_ns(struct task_struct * tsk)51061bce0f1SEric W. Biederman struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
51161bce0f1SEric W. Biederman {
51261bce0f1SEric W. Biederman return ns_of_pid(task_pid(tsk));
5131da177e4SLinus Torvalds }
514025dfdafSFrederik Schwarzer EXPORT_SYMBOL_GPL(task_active_pid_ns);
5150804ef4bSEric W. Biederman
516e49859e7SPavel Emelyanov /*
5170804ef4bSEric W. Biederman * Used by proc to find the first pid that is greater than or equal to nr.
518198fe21bSPavel Emelyanov *
5190804ef4bSEric W. Biederman * If there is a pid at nr this function is exactly the same as find_pid_ns.
52095846ecfSGargi Sharma */
find_ge_pid(int nr,struct pid_namespace * ns)5210804ef4bSEric W. Biederman struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
522*4480c27cSAndreas Gruenbacher {
5230804ef4bSEric W. Biederman return idr_get_next(&ns->idr, &nr);
5241aa92cd3SMinchan Kim }
5251aa92cd3SMinchan Kim EXPORT_SYMBOL_GPL(find_ge_pid);
5261aa92cd3SMinchan Kim
pidfd_get_pid(unsigned int fd,unsigned int * flags)5271aa92cd3SMinchan Kim struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
5281aa92cd3SMinchan Kim {
5291aa92cd3SMinchan Kim struct fd f;
5301aa92cd3SMinchan Kim struct pid *pid;
5311aa92cd3SMinchan Kim
5321aa92cd3SMinchan Kim f = fdget(fd);
5331aa92cd3SMinchan Kim if (!f.file)
5341aa92cd3SMinchan Kim return ERR_PTR(-EBADF);
5351aa92cd3SMinchan Kim
5361aa92cd3SMinchan Kim pid = pidfd_pid(f.file);
5371aa92cd3SMinchan Kim if (!IS_ERR(pid)) {
5381aa92cd3SMinchan Kim get_pid(pid);
5391aa92cd3SMinchan Kim *flags = f.file->f_flags;
5401aa92cd3SMinchan Kim }
5411aa92cd3SMinchan Kim
5421aa92cd3SMinchan Kim fdput(f);
54332fcb426SChristian Brauner return pid;
544e9bdcdbfSChristian Brauner }
545e9bdcdbfSChristian Brauner
546e9bdcdbfSChristian Brauner /**
547e9bdcdbfSChristian Brauner * pidfd_get_task() - Get the task associated with a pidfd
548e9bdcdbfSChristian Brauner *
549e9bdcdbfSChristian Brauner * @pidfd: pidfd for which to get the task
550e9bdcdbfSChristian Brauner * @flags: flags associated with this pidfd
551e9bdcdbfSChristian Brauner *
552e9bdcdbfSChristian Brauner * Return the task associated with @pidfd. The function takes a reference on
553e9bdcdbfSChristian Brauner * the returned task. The caller is responsible for releasing that reference.
554e9bdcdbfSChristian Brauner *
555e9bdcdbfSChristian Brauner * Currently, the process identified by @pidfd is always a thread-group leader.
556e9bdcdbfSChristian Brauner * This restriction currently exists for all aspects of pidfds including pidfd
557e9bdcdbfSChristian Brauner * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
558e9bdcdbfSChristian Brauner * (only supports thread group leaders).
559e9bdcdbfSChristian Brauner *
560e9bdcdbfSChristian Brauner * Return: On success, the task_struct associated with the pidfd.
561e9bdcdbfSChristian Brauner * On error, a negative errno number will be returned.
562e9bdcdbfSChristian Brauner */
pidfd_get_task(int pidfd,unsigned int * flags)563e9bdcdbfSChristian Brauner struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
564e9bdcdbfSChristian Brauner {
565e9bdcdbfSChristian Brauner unsigned int f_flags;
566e9bdcdbfSChristian Brauner struct pid *pid;
567e9bdcdbfSChristian Brauner struct task_struct *task;
568e9bdcdbfSChristian Brauner
569e9bdcdbfSChristian Brauner pid = pidfd_get_pid(pidfd, &f_flags);
570e9bdcdbfSChristian Brauner if (IS_ERR(pid))
571e9bdcdbfSChristian Brauner return ERR_CAST(pid);
572e9bdcdbfSChristian Brauner
573e9bdcdbfSChristian Brauner task = get_pid_task(pid, PIDTYPE_TGID);
574e9bdcdbfSChristian Brauner put_pid(pid);
575e9bdcdbfSChristian Brauner if (!task)
576e9bdcdbfSChristian Brauner return ERR_PTR(-ESRCH);
577e9bdcdbfSChristian Brauner
578e9bdcdbfSChristian Brauner *flags = f_flags;
579e9bdcdbfSChristian Brauner return task;
58032fcb426SChristian Brauner }
58132fcb426SChristian Brauner
58232fcb426SChristian Brauner /**
5836da73d15SChristian Brauner * pidfd_create() - Create a new pid file descriptor.
58432fcb426SChristian Brauner *
58532fcb426SChristian Brauner * @pid: struct pid that the pidfd will reference
58632fcb426SChristian Brauner * @flags: flags to pass
58732fcb426SChristian Brauner *
58832fcb426SChristian Brauner * This creates a new pid file descriptor with the O_CLOEXEC flag set.
58932fcb426SChristian Brauner *
590c576e0fcSMatthew Bobrowski * Note, that this function can only be called after the fd table has
591c576e0fcSMatthew Bobrowski * been unshared to avoid leaking the pidfd to the new process.
59232fcb426SChristian Brauner *
59332fcb426SChristian Brauner * This symbol should not be explicitly exported to loadable modules.
59432fcb426SChristian Brauner *
595c576e0fcSMatthew Bobrowski * Return: On success, a cloexec pidfd is returned.
59632fcb426SChristian Brauner * On error, a negative errno number will be returned.
59732fcb426SChristian Brauner */
pidfd_create(struct pid * pid,unsigned int flags)59832fcb426SChristian Brauner int pidfd_create(struct pid *pid, unsigned int flags)
599490b9ba8SMatthew Bobrowski {
600490b9ba8SMatthew Bobrowski int pidfd;
601490b9ba8SMatthew Bobrowski struct file *pidfd_file;
602490b9ba8SMatthew Bobrowski
603490b9ba8SMatthew Bobrowski pidfd = pidfd_prepare(pid, flags, &pidfd_file);
604490b9ba8SMatthew Bobrowski if (pidfd < 0)
60532fcb426SChristian Brauner return pidfd;
6066da73d15SChristian Brauner
60732fcb426SChristian Brauner fd_install(pidfd, pidfd_file);
60832fcb426SChristian Brauner return pidfd;
60932fcb426SChristian Brauner }
61032fcb426SChristian Brauner
61132fcb426SChristian Brauner /**
61232fcb426SChristian Brauner * sys_pidfd_open() - Open new pid file descriptor.
61332fcb426SChristian Brauner *
61432fcb426SChristian Brauner * @pid: pid for which to retrieve a pidfd
61532fcb426SChristian Brauner * @flags: flags to pass
61632fcb426SChristian Brauner *
61732fcb426SChristian Brauner * This creates a new pid file descriptor with the O_CLOEXEC flag set for
61832fcb426SChristian Brauner * the process identified by @pid. Currently, the process identified by
61932fcb426SChristian Brauner * @pid must be a thread-group leader. This restriction currently exists
62032fcb426SChristian Brauner * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
62132fcb426SChristian Brauner * be used with CLONE_THREAD) and pidfd polling (only supports thread group
62232fcb426SChristian Brauner * leaders).
62332fcb426SChristian Brauner *
62432fcb426SChristian Brauner * Return: On success, a cloexec pidfd is returned.
62532fcb426SChristian Brauner * On error, a negative errno number will be returned.
62632fcb426SChristian Brauner */
SYSCALL_DEFINE2(pidfd_open,pid_t,pid,unsigned int,flags)62732fcb426SChristian Brauner SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
62832fcb426SChristian Brauner {
62932fcb426SChristian Brauner int fd;
63032fcb426SChristian Brauner struct pid *p;
6311e1d0f0bSChristian Brauner
63232fcb426SChristian Brauner if (flags & ~PIDFD_NONBLOCK)
63332fcb426SChristian Brauner return -EINVAL;
6346da73d15SChristian Brauner
63532fcb426SChristian Brauner if (pid <= 0)
63632fcb426SChristian Brauner return -EINVAL;
63732fcb426SChristian Brauner
63832fcb426SChristian Brauner p = find_get_pid(pid);
63932fcb426SChristian Brauner if (!p)
64032fcb426SChristian Brauner return -ESRCH;
64132fcb426SChristian Brauner
64232fcb426SChristian Brauner fd = pidfd_create(p, flags);
64332fcb426SChristian Brauner
6446da73d15SChristian Brauner put_pid(p);
64532fcb426SChristian Brauner return fd;
64632fcb426SChristian Brauner }
64732fcb426SChristian Brauner
pid_idr_init(void)64832fcb426SChristian Brauner void __init pid_idr_init(void)
64932fcb426SChristian Brauner {
65095846ecfSGargi Sharma /* Verify no one has done anything silly: */
6511da177e4SLinus Torvalds BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
652840d6fe7SZhen Lei
653e8cfbc24SGargi Sharma /* bump default and minimum pid_max based on number of cpus */
654c876ad76SEric W. Biederman pid_max = min(pid_max_max, max_t(int, pid_max,
65572680a19SHedi Berriche PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
65672680a19SHedi Berriche pid_max_min = max_t(int, pid_max_min,
65772680a19SHedi Berriche PIDS_PER_CPU_MIN * num_possible_cpus());
65872680a19SHedi Berriche pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
65972680a19SHedi Berriche
66072680a19SHedi Berriche idr_init(&init_pid_ns.idr);
66172680a19SHedi Berriche
66295846ecfSGargi Sharma init_pid_ns.pid_cachep = kmem_cache_create("pid",
66392476d7fSEric W. Biederman struct_size_t(struct pid, numbers, 1),
66474bd59bbSPavel Emelyanov __alignof__(struct pid),
6655d097056SVladimir Davydov SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
6661da177e4SLinus Torvalds NULL);
6678649c322SSargun Dhillon }
6688649c322SSargun Dhillon
__pidfd_fget(struct task_struct * task,int fd)6698649c322SSargun Dhillon static struct file *__pidfd_fget(struct task_struct *task, int fd)
6708649c322SSargun Dhillon {
6718649c322SSargun Dhillon struct file *file;
6728649c322SSargun Dhillon int ret;
673f7cfd871SEric W. Biederman
6748649c322SSargun Dhillon ret = down_read_killable(&task->signal->exec_update_lock);
6758649c322SSargun Dhillon if (ret)
6768649c322SSargun Dhillon return ERR_PTR(ret);
6778649c322SSargun Dhillon
6788649c322SSargun Dhillon if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS))
6798649c322SSargun Dhillon file = fget_task(task, fd);
6808649c322SSargun Dhillon else
6818649c322SSargun Dhillon file = ERR_PTR(-EPERM);
682f7cfd871SEric W. Biederman
6838649c322SSargun Dhillon up_read(&task->signal->exec_update_lock);
6848649c322SSargun Dhillon
6858649c322SSargun Dhillon return file ?: ERR_PTR(-EBADF);
6868649c322SSargun Dhillon }
6878649c322SSargun Dhillon
pidfd_getfd(struct pid * pid,int fd)6888649c322SSargun Dhillon static int pidfd_getfd(struct pid *pid, int fd)
6898649c322SSargun Dhillon {
6908649c322SSargun Dhillon struct task_struct *task;
6918649c322SSargun Dhillon struct file *file;
6928649c322SSargun Dhillon int ret;
6938649c322SSargun Dhillon
6948649c322SSargun Dhillon task = get_pid_task(pid, PIDTYPE_PID);
6958649c322SSargun Dhillon if (!task)
6968649c322SSargun Dhillon return -ESRCH;
6978649c322SSargun Dhillon
6988649c322SSargun Dhillon file = __pidfd_fget(task, fd);
6998649c322SSargun Dhillon put_task_struct(task);
7008649c322SSargun Dhillon if (IS_ERR(file))
7018649c322SSargun Dhillon return PTR_ERR(file);
702910d2f16SKees Cook
7038649c322SSargun Dhillon ret = receive_fd(file, O_CLOEXEC);
7048649c322SSargun Dhillon fput(file);
7058649c322SSargun Dhillon
7068649c322SSargun Dhillon return ret;
7078649c322SSargun Dhillon }
7088649c322SSargun Dhillon
7098649c322SSargun Dhillon /**
7108649c322SSargun Dhillon * sys_pidfd_getfd() - Get a file descriptor from another process
7118649c322SSargun Dhillon *
7128649c322SSargun Dhillon * @pidfd: the pidfd file descriptor of the process
7138649c322SSargun Dhillon * @fd: the file descriptor number to get
7148649c322SSargun Dhillon * @flags: flags on how to get the fd (reserved)
7158649c322SSargun Dhillon *
7168649c322SSargun Dhillon * This syscall gets a copy of a file descriptor from another process
7178649c322SSargun Dhillon * based on the pidfd, and file descriptor number. It requires that
7188649c322SSargun Dhillon * the calling process has the ability to ptrace the process represented
7198649c322SSargun Dhillon * by the pidfd. The process which is having its file descriptor copied
7208649c322SSargun Dhillon * is otherwise unaffected.
7218649c322SSargun Dhillon *
7228649c322SSargun Dhillon * Return: On success, a cloexec file descriptor is returned.
7238649c322SSargun Dhillon * On error, a negative errno number will be returned.
7248649c322SSargun Dhillon */
SYSCALL_DEFINE3(pidfd_getfd,int,pidfd,int,fd,unsigned int,flags)7258649c322SSargun Dhillon SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
7268649c322SSargun Dhillon unsigned int, flags)
7278649c322SSargun Dhillon {
7288649c322SSargun Dhillon struct pid *pid;
7298649c322SSargun Dhillon struct fd f;
7308649c322SSargun Dhillon int ret;
7318649c322SSargun Dhillon
7328649c322SSargun Dhillon /* flags is currently unused - make sure it's unset */
7338649c322SSargun Dhillon if (flags)
7348649c322SSargun Dhillon return -EINVAL;
7358649c322SSargun Dhillon
7368649c322SSargun Dhillon f = fdget(pidfd);
7378649c322SSargun Dhillon if (!f.file)
7388649c322SSargun Dhillon return -EBADF;
7398649c322SSargun Dhillon
7408649c322SSargun Dhillon pid = pidfd_pid(f.file);
7418649c322SSargun Dhillon if (IS_ERR(pid))
7428649c322SSargun Dhillon ret = PTR_ERR(pid);
7438649c322SSargun Dhillon else
7448649c322SSargun Dhillon ret = pidfd_getfd(pid, fd);
7458649c322SSargun Dhillon
7468649c322SSargun Dhillon fdput(f);
7478649c322SSargun Dhillon return ret;
748 }
749