xref: /openbmc/linux/kernel/pid.c (revision 4f2c0a4acffbec01079c28f839422e64ddeff004)
1457c8996SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-only
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  * Generic pidhash and scalable, time-bounded PID allocator
41da177e4SLinus Torvalds  *
56d49e352SNadia Yvette Chambers  * (C) 2002-2003 Nadia Yvette Chambers, IBM
66d49e352SNadia Yvette Chambers  * (C) 2004 Nadia Yvette Chambers, Oracle
71da177e4SLinus Torvalds  * (C) 2002-2004 Ingo Molnar, Red Hat
81da177e4SLinus Torvalds  *
91da177e4SLinus Torvalds  * pid-structures are backing objects for tasks sharing a given ID to chain
101da177e4SLinus Torvalds  * against. There is very little to them aside from hashing them and
111da177e4SLinus Torvalds  * parking tasks using given ID's on a list.
121da177e4SLinus Torvalds  *
131da177e4SLinus Torvalds  * The hash is always changed with the tasklist_lock write-acquired,
141da177e4SLinus Torvalds  * and the hash is only accessed with the tasklist_lock at least
151da177e4SLinus Torvalds  * read-acquired, so there's no additional SMP locking needed here.
161da177e4SLinus Torvalds  *
171da177e4SLinus Torvalds  * We have a list of bitmap pages, which bitmaps represent the PID space.
181da177e4SLinus Torvalds  * Allocating and freeing PIDs is completely lockless. The worst-case
191da177e4SLinus Torvalds  * allocation scenario when all but one out of 1 million PIDs possible are
201da177e4SLinus Torvalds  * allocated already: the scanning of 32 list entries and at most PAGE_SIZE
211da177e4SLinus Torvalds  * bytes. The typical fastpath is a single successful setbit. Freeing is O(1).
2230e49c26SPavel Emelyanov  *
2330e49c26SPavel Emelyanov  * Pid namespaces:
2430e49c26SPavel Emelyanov  *    (C) 2007 Pavel Emelyanov <xemul@openvz.org>, OpenVZ, SWsoft Inc.
2530e49c26SPavel Emelyanov  *    (C) 2007 Sukadev Bhattiprolu <sukadev@us.ibm.com>, IBM
2630e49c26SPavel Emelyanov  *     Many thanks to Oleg Nesterov for comments and help
2730e49c26SPavel Emelyanov  *
281da177e4SLinus Torvalds  */
291da177e4SLinus Torvalds 
301da177e4SLinus Torvalds #include <linux/mm.h>
319984de1aSPaul Gortmaker #include <linux/export.h>
321da177e4SLinus Torvalds #include <linux/slab.h>
331da177e4SLinus Torvalds #include <linux/init.h>
3482524746SFranck Bui-Huu #include <linux/rculist.h>
3557c8a661SMike Rapoport #include <linux/memblock.h>
3661a58c6cSSukadev Bhattiprolu #include <linux/pid_namespace.h>
37820e45dbSSukadev Bhattiprolu #include <linux/init_task.h>
383eb07c8cSSukadev Bhattiprolu #include <linux/syscalls.h>
390bb80f24SDavid Howells #include <linux/proc_ns.h>
40f57e515aSJoel Fernandes (Google) #include <linux/refcount.h>
4132fcb426SChristian Brauner #include <linux/anon_inodes.h>
4232fcb426SChristian Brauner #include <linux/sched/signal.h>
4329930025SIngo Molnar #include <linux/sched/task.h>
4495846ecfSGargi Sharma #include <linux/idr.h>
454969f8a0SKees Cook #include <net/sock.h>
466da73d15SChristian Brauner #include <uapi/linux/pidfd.h>
471da177e4SLinus Torvalds 
48e1e871afSDavid Howells struct pid init_struct_pid = {
49f57e515aSJoel Fernandes (Google) 	.count		= REFCOUNT_INIT(1),
50e1e871afSDavid Howells 	.tasks		= {
51e1e871afSDavid Howells 		{ .first = NULL },
52e1e871afSDavid Howells 		{ .first = NULL },
53e1e871afSDavid Howells 		{ .first = NULL },
54e1e871afSDavid Howells 	},
55e1e871afSDavid Howells 	.level		= 0,
56e1e871afSDavid Howells 	.numbers	= { {
57e1e871afSDavid Howells 		.nr		= 0,
58e1e871afSDavid Howells 		.ns		= &init_pid_ns,
59e1e871afSDavid Howells 	}, }
60e1e871afSDavid Howells };
611da177e4SLinus Torvalds 
621da177e4SLinus Torvalds int pid_max = PID_MAX_DEFAULT;
631da177e4SLinus Torvalds 
641da177e4SLinus Torvalds #define RESERVED_PIDS		300
651da177e4SLinus Torvalds 
661da177e4SLinus Torvalds int pid_max_min = RESERVED_PIDS + 1;
671da177e4SLinus Torvalds int pid_max_max = PID_MAX_LIMIT;
681da177e4SLinus Torvalds 
691da177e4SLinus Torvalds /*
701da177e4SLinus Torvalds  * PID-map pages start out as NULL, they get allocated upon
711da177e4SLinus Torvalds  * first use and are never deallocated. This way a low pid_max
721da177e4SLinus Torvalds  * value does not cause lots of bitmaps to be allocated, but
731da177e4SLinus Torvalds  * the scheme scales to up to 4 million PIDs, runtime.
741da177e4SLinus Torvalds  */
7561a58c6cSSukadev Bhattiprolu struct pid_namespace init_pid_ns = {
768eb71d95SKirill Tkhai 	.ns.count = REFCOUNT_INIT(2),
77f6bb2a2cSMatthew Wilcox 	.idr = IDR_INIT(init_pid_ns.idr),
78e8cfbc24SGargi Sharma 	.pid_allocated = PIDNS_ADDING,
79faacbfd3SPavel Emelyanov 	.level = 0,
80faacbfd3SPavel Emelyanov 	.child_reaper = &init_task,
8149f4d8b9SEric W. Biederman 	.user_ns = &init_user_ns,
82435d5f4bSAl Viro 	.ns.inum = PROC_PID_INIT_INO,
8333c42940SAl Viro #ifdef CONFIG_PID_NS
8433c42940SAl Viro 	.ns.ops = &pidns_operations,
8533c42940SAl Viro #endif
863fbc9648SSukadev Bhattiprolu #if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
87198fe21bSPavel Emelyanov 	.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
881da177e4SLinus Torvalds #endif
8992476d7fSEric W. Biederman };
9092476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(init_pid_ns);
9192476d7fSEric W. Biederman 
9292476d7fSEric W. Biederman /*
9392476d7fSEric W. Biederman  * Note: disable interrupts while the pidmap_lock is held as an
9492476d7fSEric W. Biederman  * interrupt might come in and do read_lock(&tasklist_lock).
9592476d7fSEric W. Biederman  *
9692476d7fSEric W. Biederman  * If we don't disable interrupts there is a nasty deadlock between
9792476d7fSEric W. Biederman  * detach_pid()->free_pid() and another cpu that does
9892476d7fSEric W. Biederman  * spin_lock(&pidmap_lock) followed by an interrupt routine that does
9992476d7fSEric W. Biederman  * read_lock(&tasklist_lock);
10092476d7fSEric W. Biederman  *
10192476d7fSEric W. Biederman  * After we clean up the tasklist_lock and know there are no
1023fbc9648SSukadev Bhattiprolu  * irq handlers that take it we can leave the interrupts enabled.
1031da177e4SLinus Torvalds  * For now it is easier to be safe than to prove it can't happen.
1041da177e4SLinus Torvalds  */
1057ad5b3a5SHarvey Harrison 
10692476d7fSEric W. Biederman static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(pidmap_lock);
107baf8f0f8SPavel Emelianov 
put_pid(struct pid * pid)108baf8f0f8SPavel Emelianov void put_pid(struct pid *pid)
10992476d7fSEric W. Biederman {
11092476d7fSEric W. Biederman 	struct pid_namespace *ns;
111baf8f0f8SPavel Emelianov 
1128ef047aaSPavel Emelyanov 	if (!pid)
113f57e515aSJoel Fernandes (Google) 		return;
114baf8f0f8SPavel Emelianov 
1158ef047aaSPavel Emelyanov 	ns = pid->numbers[pid->level].ns;
1168ef047aaSPavel Emelyanov 	if (refcount_dec_and_test(&pid->count)) {
11792476d7fSEric W. Biederman 		kmem_cache_free(ns->pid_cachep, pid);
118bbf73147SEric W. Biederman 		put_pid_ns(ns);
11992476d7fSEric W. Biederman 	}
12092476d7fSEric W. Biederman }
12192476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(put_pid);
12292476d7fSEric W. Biederman 
delayed_put_pid(struct rcu_head * rhp)12392476d7fSEric W. Biederman static void delayed_put_pid(struct rcu_head *rhp)
12492476d7fSEric W. Biederman {
12592476d7fSEric W. Biederman 	struct pid *pid = container_of(rhp, struct pid, rcu);
1267ad5b3a5SHarvey Harrison 	put_pid(pid);
12792476d7fSEric W. Biederman }
12892476d7fSEric W. Biederman 
free_pid(struct pid * pid)1298ef047aaSPavel Emelyanov void free_pid(struct pid *pid)
13092476d7fSEric W. Biederman {
13192476d7fSEric W. Biederman 	/* We can be called with write_lock_irq(&tasklist_lock) held */
13292476d7fSEric W. Biederman 	int i;
1330a01f2ccSEric W. Biederman 	unsigned long flags;
1340a01f2ccSEric W. Biederman 
135af4b8a83SEric W. Biederman 	spin_lock_irqsave(&pidmap_lock, flags);
136e8cfbc24SGargi Sharma 	for (i = 0; i <= pid->level; i++) {
137a6064885SEric W. Biederman 		struct upid *upid = pid->numbers + i;
138af4b8a83SEric W. Biederman 		struct pid_namespace *ns = upid->ns;
139af4b8a83SEric W. Biederman 		switch (--ns->pid_allocated) {
140af4b8a83SEric W. Biederman 		case 2:
141af4b8a83SEric W. Biederman 		case 1:
142af4b8a83SEric W. Biederman 			/* When all that is left in the pid namespace
143af4b8a83SEric W. Biederman 			 * is the reaper wake up the reaper.  The reaper
144af4b8a83SEric W. Biederman 			 * may be sleeping in zap_pid_ns_processes().
145e8cfbc24SGargi Sharma 			 */
146314a8ad0SOleg Nesterov 			wake_up_process(ns->child_reaper);
147314a8ad0SOleg Nesterov 			break;
148e8cfbc24SGargi Sharma 		case PIDNS_ADDING:
149af4b8a83SEric W. Biederman 			/* Handle a fork failure of the first process */
1500a01f2ccSEric W. Biederman 			WARN_ON(ns->child_reaper);
15195846ecfSGargi Sharma 			ns->pid_allocated = 0;
15295846ecfSGargi Sharma 			break;
1535e1182deSEric W. Biederman 		}
15492476d7fSEric W. Biederman 
15592476d7fSEric W. Biederman 		idr_remove(&ns->idr, upid->nr);
15692476d7fSEric W. Biederman 	}
15792476d7fSEric W. Biederman 	spin_unlock_irqrestore(&pidmap_lock, flags);
15892476d7fSEric W. Biederman 
15949cb2fc4SAdrian Reber 	call_rcu(&pid->rcu, delayed_put_pid);
16049cb2fc4SAdrian Reber }
16192476d7fSEric W. Biederman 
alloc_pid(struct pid_namespace * ns,pid_t * set_tid,size_t set_tid_size)16292476d7fSEric W. Biederman struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
16392476d7fSEric W. Biederman 		      size_t set_tid_size)
1648ef047aaSPavel Emelyanov {
1658ef047aaSPavel Emelyanov 	struct pid *pid;
166198fe21bSPavel Emelyanov 	enum pid_type type;
16735f71bc0SMichal Hocko 	int i, nr;
16892476d7fSEric W. Biederman 	struct pid_namespace *tmp;
16949cb2fc4SAdrian Reber 	struct upid *upid;
17049cb2fc4SAdrian Reber 	int retval = -ENOMEM;
17149cb2fc4SAdrian Reber 
17249cb2fc4SAdrian Reber 	/*
17349cb2fc4SAdrian Reber 	 * set_tid_size contains the size of the set_tid array. Starting at
17449cb2fc4SAdrian Reber 	 * the most nested currently active PID namespace it tells alloc_pid()
17549cb2fc4SAdrian Reber 	 * which PID to set for a process in that most nested PID namespace
17649cb2fc4SAdrian Reber 	 * up to set_tid_size PID namespaces. It does not have to set the PID
17749cb2fc4SAdrian Reber 	 * for a process in all nested PID namespaces but set_tid_size must
17849cb2fc4SAdrian Reber 	 * never be greater than the current ns->level + 1.
17949cb2fc4SAdrian Reber 	 */
180baf8f0f8SPavel Emelianov 	if (set_tid_size > ns->level + 1)
18192476d7fSEric W. Biederman 		return ERR_PTR(-EINVAL);
18235f71bc0SMichal Hocko 
18392476d7fSEric W. Biederman 	pid = kmem_cache_alloc(ns->pid_cachep, GFP_KERNEL);
1848ef047aaSPavel Emelyanov 	if (!pid)
1850a01f2ccSEric W. Biederman 		return ERR_PTR(retval);
18695846ecfSGargi Sharma 
1878ef047aaSPavel Emelyanov 	tmp = ns;
18849cb2fc4SAdrian Reber 	pid->level = ns->level;
18949cb2fc4SAdrian Reber 
19049cb2fc4SAdrian Reber 	for (i = ns->level; i >= 0; i--) {
19149cb2fc4SAdrian Reber 		int tid = 0;
19249cb2fc4SAdrian Reber 
19349cb2fc4SAdrian Reber 		if (set_tid_size) {
19449cb2fc4SAdrian Reber 			tid = set_tid[ns->level - i];
19549cb2fc4SAdrian Reber 
19649cb2fc4SAdrian Reber 			retval = -EINVAL;
19749cb2fc4SAdrian Reber 			if (tid < 1 || tid >= pid_max)
19849cb2fc4SAdrian Reber 				goto out_free;
19949cb2fc4SAdrian Reber 			/*
20049cb2fc4SAdrian Reber 			 * Also fail if a PID != 1 is requested and
20149cb2fc4SAdrian Reber 			 * no PID 1 exists.
20249cb2fc4SAdrian Reber 			 */
2031caef81dSAdrian Reber 			if (tid != 1 && !tmp->child_reaper)
20449cb2fc4SAdrian Reber 				goto out_free;
20549cb2fc4SAdrian Reber 			retval = -EPERM;
20649cb2fc4SAdrian Reber 			if (!checkpoint_restore_ns_capable(tmp->user_ns))
20795846ecfSGargi Sharma 				goto out_free;
20895846ecfSGargi Sharma 			set_tid_size--;
20995846ecfSGargi Sharma 		}
21095846ecfSGargi Sharma 
21149cb2fc4SAdrian Reber 		idr_preload(GFP_KERNEL);
21249cb2fc4SAdrian Reber 		spin_lock_irq(&pidmap_lock);
21349cb2fc4SAdrian Reber 
21495846ecfSGargi Sharma 		if (tid) {
21549cb2fc4SAdrian Reber 			nr = idr_alloc(&tmp->idr, NULL, tid,
21649cb2fc4SAdrian Reber 				       tid + 1, GFP_ATOMIC);
21749cb2fc4SAdrian Reber 			/*
21849cb2fc4SAdrian Reber 			 * If ENOSPC is returned it means that the PID is
21949cb2fc4SAdrian Reber 			 * alreay in use. Return EEXIST in that case.
22049cb2fc4SAdrian Reber 			 */
22149cb2fc4SAdrian Reber 			if (nr == -ENOSPC)
22249cb2fc4SAdrian Reber 				nr = -EEXIST;
22349cb2fc4SAdrian Reber 		} else {
22449cb2fc4SAdrian Reber 			int pid_min = 1;
22595846ecfSGargi Sharma 			/*
22695846ecfSGargi Sharma 			 * init really needs pid 1, but after reaching the
22795846ecfSGargi Sharma 			 * maximum wrap back to RESERVED_PIDS
22895846ecfSGargi Sharma 			 */
22995846ecfSGargi Sharma 			if (idr_get_cursor(&tmp->idr) > RESERVED_PIDS)
23095846ecfSGargi Sharma 				pid_min = RESERVED_PIDS;
23195846ecfSGargi Sharma 
23295846ecfSGargi Sharma 			/*
23395846ecfSGargi Sharma 			 * Store a null pointer so find_pid_ns does not find
23495846ecfSGargi Sharma 			 * a partially initialized PID (see below).
23549cb2fc4SAdrian Reber 			 */
23695846ecfSGargi Sharma 			nr = idr_alloc_cyclic(&tmp->idr, NULL, pid_min,
23795846ecfSGargi Sharma 					      pid_max, GFP_ATOMIC);
23895846ecfSGargi Sharma 		}
239287980e4SArnd Bergmann 		spin_unlock_irq(&pidmap_lock);
240f83606f5SKJ Tsanaktsidis 		idr_preload_end();
24192476d7fSEric W. Biederman 
24235f71bc0SMichal Hocko 		if (nr < 0) {
24392476d7fSEric W. Biederman 			retval = (nr == -ENOSPC) ? -EAGAIN : nr;
2448ef047aaSPavel Emelyanov 			goto out_free;
2458ef047aaSPavel Emelyanov 		}
2468ef047aaSPavel Emelyanov 
2478ef047aaSPavel Emelyanov 		pid->numbers[i].nr = nr;
2488ef047aaSPavel Emelyanov 		pid->numbers[i].ns = tmp;
24910dab84cSChristian Brauner 		tmp = tmp->parent;
25010dab84cSChristian Brauner 	}
25110dab84cSChristian Brauner 
25210dab84cSChristian Brauner 	/*
25310dab84cSChristian Brauner 	 * ENOMEM is not the most obvious choice especially for the case
25410dab84cSChristian Brauner 	 * where the child subreaper has already exited and the pid
25510dab84cSChristian Brauner 	 * namespace denies the creation of any new processes. But ENOMEM
25610dab84cSChristian Brauner 	 * is what we have exposed to userspace for a long time and it is
257b26ebfe1SCorey Minyard 	 * documented behavior for pid namespaces. So we can't easily
258b26ebfe1SCorey Minyard 	 * change it even if there were an error code better suited.
2598ef047aaSPavel Emelyanov 	 */
260f57e515aSJoel Fernandes (Google) 	retval = -ENOMEM;
26163f818f4SEric W. Biederman 
26292476d7fSEric W. Biederman 	get_pid_ns(ns);
26392476d7fSEric W. Biederman 	refcount_set(&pid->count, 1);
26492476d7fSEric W. Biederman 	spin_lock_init(&pid->lock);
265b53b0b9dSJoel Fernandes (Google) 	for (type = 0; type < PIDTYPE_MAX; ++type)
2667bc3e6e5SEric W. Biederman 		INIT_HLIST_HEAD(&pid->tasks[type]);
267b53b0b9dSJoel Fernandes (Google) 
268417e3152SAndré Goddard Rosa 	init_waitqueue_head(&pid->wait_pidfd);
26992476d7fSEric W. Biederman 	INIT_HLIST_HEAD(&pid->inodes);
270e8cfbc24SGargi Sharma 
2715e1182deSEric W. Biederman 	upid = pid->numbers + ns->level;
2720a01f2ccSEric W. Biederman 	spin_lock_irq(&pidmap_lock);
27395846ecfSGargi Sharma 	if (!(ns->pid_allocated & PIDNS_ADDING))
27495846ecfSGargi Sharma 		goto out_unlock;
275e8cfbc24SGargi Sharma 	for ( ; upid >= pid->numbers; --upid) {
2760a01f2ccSEric W. Biederman 		/* Make the PID visible to find_pid_ns. */
27792476d7fSEric W. Biederman 		idr_replace(&upid->ns->idr, pid, upid->nr);
27892476d7fSEric W. Biederman 		upid->ns->pid_allocated++;
27992476d7fSEric W. Biederman 	}
28092476d7fSEric W. Biederman 	spin_unlock_irq(&pidmap_lock);
2815e1182deSEric W. Biederman 
2826e666884SEric W. Biederman 	return pid;
28324c037ebSOleg Nesterov 
28424c037ebSOleg Nesterov out_unlock:
28592476d7fSEric W. Biederman 	spin_unlock_irq(&pidmap_lock);
28695846ecfSGargi Sharma 	put_pid_ns(ns);
2871a80dadeSMatthew Wilcox 
2881a80dadeSMatthew Wilcox out_free:
2891a80dadeSMatthew Wilcox 	spin_lock_irq(&pidmap_lock);
2901a80dadeSMatthew Wilcox 	while (++i <= ns->level) {
29195846ecfSGargi Sharma 		upid = pid->numbers + i;
292c0ee5549SEric W. Biederman 		idr_remove(&upid->ns->idr, upid->nr);
293c0ee5549SEric W. Biederman 	}
294c0ee5549SEric W. Biederman 
295c0ee5549SEric W. Biederman 	/* On failure to allocate the first pid, reset the state */
29695846ecfSGargi Sharma 	if (ns->pid_allocated == PIDNS_ADDING)
2978ef047aaSPavel Emelyanov 		idr_set_cursor(&ns->idr, 0);
298baf8f0f8SPavel Emelianov 
29935f71bc0SMichal Hocko 	spin_unlock_irq(&pidmap_lock);
30092476d7fSEric W. Biederman 
30192476d7fSEric W. Biederman 	kmem_cache_free(ns->pid_cachep, pid);
302c876ad76SEric W. Biederman 	return ERR_PTR(retval);
303c876ad76SEric W. Biederman }
304c876ad76SEric W. Biederman 
disable_pid_allocation(struct pid_namespace * ns)305e8cfbc24SGargi Sharma void disable_pid_allocation(struct pid_namespace *ns)
306c876ad76SEric W. Biederman {
307c876ad76SEric W. Biederman 	spin_lock_irq(&pidmap_lock);
308c876ad76SEric W. Biederman 	ns->pid_allocated &= ~PIDNS_ADDING;
3097ad5b3a5SHarvey Harrison 	spin_unlock_irq(&pidmap_lock);
3101da177e4SLinus Torvalds }
311e8cfbc24SGargi Sharma 
find_pid_ns(int nr,struct pid_namespace * ns)3121da177e4SLinus Torvalds struct pid *find_pid_ns(int nr, struct pid_namespace *ns)
313198fe21bSPavel Emelyanov {
3141da177e4SLinus Torvalds 	return idr_find(&ns->idr, nr);
3158990571eSPavel Emelyanov }
3168990571eSPavel Emelyanov EXPORT_SYMBOL_GPL(find_pid_ns);
31717cf22c3SEric W. Biederman 
find_vpid(int nr)3188990571eSPavel Emelyanov struct pid *find_vpid(int nr)
3198990571eSPavel Emelyanov {
3208990571eSPavel Emelyanov 	return find_pid_ns(nr, task_active_pid_ns(current));
3212c470475SEric W. Biederman }
3222c470475SEric W. Biederman EXPORT_SYMBOL_GPL(find_vpid);
3232c470475SEric W. Biederman 
task_pid_ptr(struct task_struct * task,enum pid_type type)3242c470475SEric W. Biederman static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type)
3252c470475SEric W. Biederman {
3262c470475SEric W. Biederman 	return (type == PIDTYPE_PID) ?
3272c470475SEric W. Biederman 		&task->thread_pid :
328e713d0daSSukadev Bhattiprolu 		&task->signal->pids[type];
329e713d0daSSukadev Bhattiprolu }
330e713d0daSSukadev Bhattiprolu 
33181907739SOleg Nesterov /*
3321da177e4SLinus Torvalds  * attach_pid() must be called with the tasklist_lock write-held.
3332c470475SEric W. Biederman  */
attach_pid(struct task_struct * task,enum pid_type type)3342c470475SEric W. Biederman void attach_pid(struct task_struct *task, enum pid_type type)
3351da177e4SLinus Torvalds {
3361da177e4SLinus Torvalds 	struct pid *pid = *task_pid_ptr(task, type);
33724336eaeSOleg Nesterov 	hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]);
33824336eaeSOleg Nesterov }
3391da177e4SLinus Torvalds 
__change_pid(struct task_struct * task,enum pid_type type,struct pid * new)3402c470475SEric W. Biederman static void __change_pid(struct task_struct *task, enum pid_type type,
34192476d7fSEric W. Biederman 			struct pid *new)
34292476d7fSEric W. Biederman {
3431da177e4SLinus Torvalds 	struct pid **pid_ptr = task_pid_ptr(task, type);
3442c470475SEric W. Biederman 	struct pid *pid;
34592476d7fSEric W. Biederman 	int tmp;
3462c470475SEric W. Biederman 
3472c470475SEric W. Biederman 	pid = *pid_ptr;
3481da177e4SLinus Torvalds 
3491da177e4SLinus Torvalds 	hlist_del_rcu(&task->pid_links[type]);
3501d416a11SChristian Brauner 	*pid_ptr = new;
3511da177e4SLinus Torvalds 
3521da177e4SLinus Torvalds 	for (tmp = PIDTYPE_MAX; --tmp >= 0; )
35392476d7fSEric W. Biederman 		if (pid_has_task(pid, tmp))
3541da177e4SLinus Torvalds 			return;
3551da177e4SLinus Torvalds 
35624336eaeSOleg Nesterov 	free_pid(pid);
35724336eaeSOleg Nesterov }
35824336eaeSOleg Nesterov 
detach_pid(struct task_struct * task,enum pid_type type)35924336eaeSOleg Nesterov void detach_pid(struct task_struct *task, enum pid_type type)
36024336eaeSOleg Nesterov {
36124336eaeSOleg Nesterov 	__change_pid(task, type, NULL);
36224336eaeSOleg Nesterov }
36324336eaeSOleg Nesterov 
change_pid(struct task_struct * task,enum pid_type type,struct pid * pid)36424336eaeSOleg Nesterov void change_pid(struct task_struct *task, enum pid_type type,
36581907739SOleg Nesterov 		struct pid *pid)
36624336eaeSOleg Nesterov {
36724336eaeSOleg Nesterov 	__change_pid(task, type, pid);
3686b03d130SEric W. Biederman 	attach_pid(task, type);
3696b03d130SEric W. Biederman }
3706b03d130SEric W. Biederman 
exchange_tids(struct task_struct * left,struct task_struct * right)3716b03d130SEric W. Biederman void exchange_tids(struct task_struct *left, struct task_struct *right)
3726b03d130SEric W. Biederman {
3736b03d130SEric W. Biederman 	struct pid *pid1 = left->thread_pid;
3746b03d130SEric W. Biederman 	struct pid *pid2 = right->thread_pid;
3756b03d130SEric W. Biederman 	struct hlist_head *head1 = &pid1->tasks[PIDTYPE_PID];
3766b03d130SEric W. Biederman 	struct hlist_head *head2 = &pid2->tasks[PIDTYPE_PID];
3776b03d130SEric W. Biederman 
3786b03d130SEric W. Biederman 	/* Swap the single entry tid lists */
3796b03d130SEric W. Biederman 	hlists_swap_heads_rcu(head1, head2);
3806b03d130SEric W. Biederman 
3816b03d130SEric W. Biederman 	/* Swap the per task_struct pid */
3826b03d130SEric W. Biederman 	rcu_assign_pointer(left->thread_pid, pid2);
3836b03d130SEric W. Biederman 	rcu_assign_pointer(right->thread_pid, pid1);
3846b03d130SEric W. Biederman 
3856b03d130SEric W. Biederman 	/* Swap the cached value */
3866b03d130SEric W. Biederman 	WRITE_ONCE(left->pid, pid_nr(pid2));
387c18258c6SEric W. Biederman 	WRITE_ONCE(right->pid, pid_nr(pid1));
3887ad5b3a5SHarvey Harrison }
389c18258c6SEric W. Biederman 
390c18258c6SEric W. Biederman /* transfer_pid is an optimization of attach_pid(new), detach_pid(old) */
transfer_pid(struct task_struct * old,struct task_struct * new,enum pid_type type)3912c470475SEric W. Biederman void transfer_pid(struct task_struct *old, struct task_struct *new,
3922c470475SEric W. Biederman 			   enum pid_type type)
3932c470475SEric W. Biederman {
394c18258c6SEric W. Biederman 	if (type == PIDTYPE_PID)
395c18258c6SEric W. Biederman 		new->thread_pid = old->thread_pid;
3967ad5b3a5SHarvey Harrison 	hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]);
39792476d7fSEric W. Biederman }
39892476d7fSEric W. Biederman 
pid_task(struct pid * pid,enum pid_type type)39992476d7fSEric W. Biederman struct task_struct *pid_task(struct pid *pid, enum pid_type type)
40092476d7fSEric W. Biederman {
40167bdbffdSArnd Bergmann 	struct task_struct *result = NULL;
402db1466b3SPaul E. McKenney 	if (pid) {
40392476d7fSEric W. Biederman 		struct hlist_node *first;
4042c470475SEric W. Biederman 		first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]),
40592476d7fSEric W. Biederman 					      lockdep_tasklist_lock_is_held());
40692476d7fSEric W. Biederman 		if (first)
40792476d7fSEric W. Biederman 			result = hlist_entry(first, struct task_struct, pid_links[(type)]);
408eccba068SPavel Emelyanov 	}
40992476d7fSEric W. Biederman 	return result;
41092476d7fSEric W. Biederman }
4119728e5d6STetsuo Handa EXPORT_SYMBOL(pid_task);
41292476d7fSEric W. Biederman 
41317f98dcfSChristoph Hellwig /*
4141da177e4SLinus Torvalds  * Must be called under rcu_read_lock().
415f78f5b90SPaul E. McKenney  */
find_task_by_pid_ns(pid_t nr,struct pid_namespace * ns)416f78f5b90SPaul E. McKenney struct task_struct *find_task_by_pid_ns(pid_t nr, struct pid_namespace *ns)
41717f98dcfSChristoph Hellwig {
4181da177e4SLinus Torvalds 	RCU_LOCKDEP_WARN(!rcu_read_lock_held(),
4191da177e4SLinus Torvalds 			 "find_task_by_pid_ns() needs rcu_read_lock() protection");
420228ebcbeSPavel Emelyanov 	return pid_task(find_pid_ns(nr, ns), PIDTYPE_PID);
421228ebcbeSPavel Emelyanov }
42217cf22c3SEric W. Biederman 
find_task_by_vpid(pid_t vnr)423228ebcbeSPavel Emelyanov struct task_struct *find_task_by_vpid(pid_t vnr)
424228ebcbeSPavel Emelyanov {
4252ee08260SMike Rapoport 	return find_task_by_pid_ns(vnr, task_active_pid_ns(current));
4262ee08260SMike Rapoport }
4272ee08260SMike Rapoport 
find_get_task_by_vpid(pid_t nr)4282ee08260SMike Rapoport struct task_struct *find_get_task_by_vpid(pid_t nr)
4292ee08260SMike Rapoport {
4302ee08260SMike Rapoport 	struct task_struct *task;
4312ee08260SMike Rapoport 
4322ee08260SMike Rapoport 	rcu_read_lock();
4332ee08260SMike Rapoport 	task = find_task_by_vpid(nr);
4342ee08260SMike Rapoport 	if (task)
4352ee08260SMike Rapoport 		get_task_struct(task);
4362ee08260SMike Rapoport 	rcu_read_unlock();
4372ee08260SMike Rapoport 
4381a657f78SOleg Nesterov 	return task;
4391a657f78SOleg Nesterov }
4401a657f78SOleg Nesterov 
get_task_pid(struct task_struct * task,enum pid_type type)4411a657f78SOleg Nesterov struct pid *get_task_pid(struct task_struct *task, enum pid_type type)
4422c470475SEric W. Biederman {
4431a657f78SOleg Nesterov 	struct pid *pid;
4441a657f78SOleg Nesterov 	rcu_read_lock();
4451a657f78SOleg Nesterov 	pid = get_pid(rcu_dereference(*task_pid_ptr(task, type)));
44677c100c8SRik van Riel 	rcu_read_unlock();
4471a657f78SOleg Nesterov 	return pid;
4487ad5b3a5SHarvey Harrison }
44992476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(get_task_pid);
45092476d7fSEric W. Biederman 
get_pid_task(struct pid * pid,enum pid_type type)45192476d7fSEric W. Biederman struct task_struct *get_pid_task(struct pid *pid, enum pid_type type)
45292476d7fSEric W. Biederman {
45392476d7fSEric W. Biederman 	struct task_struct *result;
45492476d7fSEric W. Biederman 	rcu_read_lock();
45592476d7fSEric W. Biederman 	result = pid_task(pid, type);
45692476d7fSEric W. Biederman 	if (result)
45792476d7fSEric W. Biederman 		get_task_struct(result);
45877c100c8SRik van Riel 	rcu_read_unlock();
45992476d7fSEric W. Biederman 	return result;
46092476d7fSEric W. Biederman }
46192476d7fSEric W. Biederman EXPORT_SYMBOL_GPL(get_pid_task);
46292476d7fSEric W. Biederman 
find_get_pid(pid_t nr)46392476d7fSEric W. Biederman struct pid *find_get_pid(pid_t nr)
46492476d7fSEric W. Biederman {
465198fe21bSPavel Emelyanov 	struct pid *pid;
46692476d7fSEric W. Biederman 
46792476d7fSEric W. Biederman 	rcu_read_lock();
46892476d7fSEric W. Biederman 	pid = get_pid(find_vpid(nr));
46992476d7fSEric W. Biederman 	rcu_read_unlock();
470339caf2aSDavid Sterba 
47192476d7fSEric W. Biederman 	return pid;
4727af57294SPavel Emelyanov }
4737af57294SPavel Emelyanov EXPORT_SYMBOL_GPL(find_get_pid);
4747af57294SPavel Emelyanov 
pid_nr_ns(struct pid * pid,struct pid_namespace * ns)4757af57294SPavel Emelyanov pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns)
4767af57294SPavel Emelyanov {
4777af57294SPavel Emelyanov 	struct upid *upid;
4787af57294SPavel Emelyanov 	pid_t nr = 0;
4797af57294SPavel Emelyanov 
4807af57294SPavel Emelyanov 	if (pid && ns->level <= pid->level) {
4817af57294SPavel Emelyanov 		upid = &pid->numbers[ns->level];
4827af57294SPavel Emelyanov 		if (upid->ns == ns)
4837af57294SPavel Emelyanov 			nr = upid->nr;
4844f82f457SEric W. Biederman 	}
4857af57294SPavel Emelyanov 	return nr;
48644c4e1b2SEric W. Biederman }
48744c4e1b2SEric W. Biederman EXPORT_SYMBOL_GPL(pid_nr_ns);
48817cf22c3SEric W. Biederman 
pid_vnr(struct pid * pid)48944c4e1b2SEric W. Biederman pid_t pid_vnr(struct pid *pid)
49044c4e1b2SEric W. Biederman {
49144c4e1b2SEric W. Biederman 	return pid_nr_ns(pid, task_active_pid_ns(current));
49252ee2dfdSOleg Nesterov }
49352ee2dfdSOleg Nesterov EXPORT_SYMBOL_GPL(pid_vnr);
4942f2a3a46SPavel Emelyanov 
__task_pid_nr_ns(struct task_struct * task,enum pid_type type,struct pid_namespace * ns)49552ee2dfdSOleg Nesterov pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
49652ee2dfdSOleg Nesterov 			struct pid_namespace *ns)
49752ee2dfdSOleg Nesterov {
49852ee2dfdSOleg Nesterov 	pid_t nr = 0;
49917cf22c3SEric W. Biederman 
5002c470475SEric W. Biederman 	rcu_read_lock();
50152ee2dfdSOleg Nesterov 	if (!ns)
50252ee2dfdSOleg Nesterov 		ns = task_active_pid_ns(current);
50352ee2dfdSOleg Nesterov 	nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns);
50452ee2dfdSOleg Nesterov 	rcu_read_unlock();
50552ee2dfdSOleg Nesterov 
5062f2a3a46SPavel Emelyanov 	return nr;
50761bce0f1SEric W. Biederman }
50861bce0f1SEric W. Biederman EXPORT_SYMBOL(__task_pid_nr_ns);
50961bce0f1SEric W. Biederman 
task_active_pid_ns(struct task_struct * tsk)51061bce0f1SEric W. Biederman struct pid_namespace *task_active_pid_ns(struct task_struct *tsk)
51161bce0f1SEric W. Biederman {
51261bce0f1SEric W. Biederman 	return ns_of_pid(task_pid(tsk));
5131da177e4SLinus Torvalds }
514025dfdafSFrederik Schwarzer EXPORT_SYMBOL_GPL(task_active_pid_ns);
5150804ef4bSEric W. Biederman 
516e49859e7SPavel Emelyanov /*
5170804ef4bSEric W. Biederman  * Used by proc to find the first pid that is greater than or equal to nr.
518198fe21bSPavel Emelyanov  *
5190804ef4bSEric W. Biederman  * If there is a pid at nr this function is exactly the same as find_pid_ns.
52095846ecfSGargi Sharma  */
find_ge_pid(int nr,struct pid_namespace * ns)5210804ef4bSEric W. Biederman struct pid *find_ge_pid(int nr, struct pid_namespace *ns)
522*4480c27cSAndreas Gruenbacher {
5230804ef4bSEric W. Biederman 	return idr_get_next(&ns->idr, &nr);
5241aa92cd3SMinchan Kim }
5251aa92cd3SMinchan Kim EXPORT_SYMBOL_GPL(find_ge_pid);
5261aa92cd3SMinchan Kim 
pidfd_get_pid(unsigned int fd,unsigned int * flags)5271aa92cd3SMinchan Kim struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags)
5281aa92cd3SMinchan Kim {
5291aa92cd3SMinchan Kim 	struct fd f;
5301aa92cd3SMinchan Kim 	struct pid *pid;
5311aa92cd3SMinchan Kim 
5321aa92cd3SMinchan Kim 	f = fdget(fd);
5331aa92cd3SMinchan Kim 	if (!f.file)
5341aa92cd3SMinchan Kim 		return ERR_PTR(-EBADF);
5351aa92cd3SMinchan Kim 
5361aa92cd3SMinchan Kim 	pid = pidfd_pid(f.file);
5371aa92cd3SMinchan Kim 	if (!IS_ERR(pid)) {
5381aa92cd3SMinchan Kim 		get_pid(pid);
5391aa92cd3SMinchan Kim 		*flags = f.file->f_flags;
5401aa92cd3SMinchan Kim 	}
5411aa92cd3SMinchan Kim 
5421aa92cd3SMinchan Kim 	fdput(f);
54332fcb426SChristian Brauner 	return pid;
544e9bdcdbfSChristian Brauner }
545e9bdcdbfSChristian Brauner 
546e9bdcdbfSChristian Brauner /**
547e9bdcdbfSChristian Brauner  * pidfd_get_task() - Get the task associated with a pidfd
548e9bdcdbfSChristian Brauner  *
549e9bdcdbfSChristian Brauner  * @pidfd: pidfd for which to get the task
550e9bdcdbfSChristian Brauner  * @flags: flags associated with this pidfd
551e9bdcdbfSChristian Brauner  *
552e9bdcdbfSChristian Brauner  * Return the task associated with @pidfd. The function takes a reference on
553e9bdcdbfSChristian Brauner  * the returned task. The caller is responsible for releasing that reference.
554e9bdcdbfSChristian Brauner  *
555e9bdcdbfSChristian Brauner  * Currently, the process identified by @pidfd is always a thread-group leader.
556e9bdcdbfSChristian Brauner  * This restriction currently exists for all aspects of pidfds including pidfd
557e9bdcdbfSChristian Brauner  * creation (CLONE_PIDFD cannot be used with CLONE_THREAD) and pidfd polling
558e9bdcdbfSChristian Brauner  * (only supports thread group leaders).
559e9bdcdbfSChristian Brauner  *
560e9bdcdbfSChristian Brauner  * Return: On success, the task_struct associated with the pidfd.
561e9bdcdbfSChristian Brauner  *	   On error, a negative errno number will be returned.
562e9bdcdbfSChristian Brauner  */
pidfd_get_task(int pidfd,unsigned int * flags)563e9bdcdbfSChristian Brauner struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags)
564e9bdcdbfSChristian Brauner {
565e9bdcdbfSChristian Brauner 	unsigned int f_flags;
566e9bdcdbfSChristian Brauner 	struct pid *pid;
567e9bdcdbfSChristian Brauner 	struct task_struct *task;
568e9bdcdbfSChristian Brauner 
569e9bdcdbfSChristian Brauner 	pid = pidfd_get_pid(pidfd, &f_flags);
570e9bdcdbfSChristian Brauner 	if (IS_ERR(pid))
571e9bdcdbfSChristian Brauner 		return ERR_CAST(pid);
572e9bdcdbfSChristian Brauner 
573e9bdcdbfSChristian Brauner 	task = get_pid_task(pid, PIDTYPE_TGID);
574e9bdcdbfSChristian Brauner 	put_pid(pid);
575e9bdcdbfSChristian Brauner 	if (!task)
576e9bdcdbfSChristian Brauner 		return ERR_PTR(-ESRCH);
577e9bdcdbfSChristian Brauner 
578e9bdcdbfSChristian Brauner 	*flags = f_flags;
579e9bdcdbfSChristian Brauner 	return task;
58032fcb426SChristian Brauner }
58132fcb426SChristian Brauner 
58232fcb426SChristian Brauner /**
5836da73d15SChristian Brauner  * pidfd_create() - Create a new pid file descriptor.
58432fcb426SChristian Brauner  *
58532fcb426SChristian Brauner  * @pid:   struct pid that the pidfd will reference
58632fcb426SChristian Brauner  * @flags: flags to pass
58732fcb426SChristian Brauner  *
58832fcb426SChristian Brauner  * This creates a new pid file descriptor with the O_CLOEXEC flag set.
58932fcb426SChristian Brauner  *
590c576e0fcSMatthew Bobrowski  * Note, that this function can only be called after the fd table has
591c576e0fcSMatthew Bobrowski  * been unshared to avoid leaking the pidfd to the new process.
59232fcb426SChristian Brauner  *
59332fcb426SChristian Brauner  * This symbol should not be explicitly exported to loadable modules.
59432fcb426SChristian Brauner  *
595c576e0fcSMatthew Bobrowski  * Return: On success, a cloexec pidfd is returned.
59632fcb426SChristian Brauner  *         On error, a negative errno number will be returned.
59732fcb426SChristian Brauner  */
pidfd_create(struct pid * pid,unsigned int flags)59832fcb426SChristian Brauner int pidfd_create(struct pid *pid, unsigned int flags)
599490b9ba8SMatthew Bobrowski {
600490b9ba8SMatthew Bobrowski 	int pidfd;
601490b9ba8SMatthew Bobrowski 	struct file *pidfd_file;
602490b9ba8SMatthew Bobrowski 
603490b9ba8SMatthew Bobrowski 	pidfd = pidfd_prepare(pid, flags, &pidfd_file);
604490b9ba8SMatthew Bobrowski 	if (pidfd < 0)
60532fcb426SChristian Brauner 		return pidfd;
6066da73d15SChristian Brauner 
60732fcb426SChristian Brauner 	fd_install(pidfd, pidfd_file);
60832fcb426SChristian Brauner 	return pidfd;
60932fcb426SChristian Brauner }
61032fcb426SChristian Brauner 
61132fcb426SChristian Brauner /**
61232fcb426SChristian Brauner  * sys_pidfd_open() - Open new pid file descriptor.
61332fcb426SChristian Brauner  *
61432fcb426SChristian Brauner  * @pid:   pid for which to retrieve a pidfd
61532fcb426SChristian Brauner  * @flags: flags to pass
61632fcb426SChristian Brauner  *
61732fcb426SChristian Brauner  * This creates a new pid file descriptor with the O_CLOEXEC flag set for
61832fcb426SChristian Brauner  * the process identified by @pid. Currently, the process identified by
61932fcb426SChristian Brauner  * @pid must be a thread-group leader. This restriction currently exists
62032fcb426SChristian Brauner  * for all aspects of pidfds including pidfd creation (CLONE_PIDFD cannot
62132fcb426SChristian Brauner  * be used with CLONE_THREAD) and pidfd polling (only supports thread group
62232fcb426SChristian Brauner  * leaders).
62332fcb426SChristian Brauner  *
62432fcb426SChristian Brauner  * Return: On success, a cloexec pidfd is returned.
62532fcb426SChristian Brauner  *         On error, a negative errno number will be returned.
62632fcb426SChristian Brauner  */
SYSCALL_DEFINE2(pidfd_open,pid_t,pid,unsigned int,flags)62732fcb426SChristian Brauner SYSCALL_DEFINE2(pidfd_open, pid_t, pid, unsigned int, flags)
62832fcb426SChristian Brauner {
62932fcb426SChristian Brauner 	int fd;
63032fcb426SChristian Brauner 	struct pid *p;
6311e1d0f0bSChristian Brauner 
63232fcb426SChristian Brauner 	if (flags & ~PIDFD_NONBLOCK)
63332fcb426SChristian Brauner 		return -EINVAL;
6346da73d15SChristian Brauner 
63532fcb426SChristian Brauner 	if (pid <= 0)
63632fcb426SChristian Brauner 		return -EINVAL;
63732fcb426SChristian Brauner 
63832fcb426SChristian Brauner 	p = find_get_pid(pid);
63932fcb426SChristian Brauner 	if (!p)
64032fcb426SChristian Brauner 		return -ESRCH;
64132fcb426SChristian Brauner 
64232fcb426SChristian Brauner 	fd = pidfd_create(p, flags);
64332fcb426SChristian Brauner 
6446da73d15SChristian Brauner 	put_pid(p);
64532fcb426SChristian Brauner 	return fd;
64632fcb426SChristian Brauner }
64732fcb426SChristian Brauner 
pid_idr_init(void)64832fcb426SChristian Brauner void __init pid_idr_init(void)
64932fcb426SChristian Brauner {
65095846ecfSGargi Sharma 	/* Verify no one has done anything silly: */
6511da177e4SLinus Torvalds 	BUILD_BUG_ON(PID_MAX_LIMIT >= PIDNS_ADDING);
652840d6fe7SZhen Lei 
653e8cfbc24SGargi Sharma 	/* bump default and minimum pid_max based on number of cpus */
654c876ad76SEric W. Biederman 	pid_max = min(pid_max_max, max_t(int, pid_max,
65572680a19SHedi Berriche 				PIDS_PER_CPU_DEFAULT * num_possible_cpus()));
65672680a19SHedi Berriche 	pid_max_min = max_t(int, pid_max_min,
65772680a19SHedi Berriche 				PIDS_PER_CPU_MIN * num_possible_cpus());
65872680a19SHedi Berriche 	pr_info("pid_max: default: %u minimum: %u\n", pid_max, pid_max_min);
65972680a19SHedi Berriche 
66072680a19SHedi Berriche 	idr_init(&init_pid_ns.idr);
66172680a19SHedi Berriche 
66295846ecfSGargi Sharma 	init_pid_ns.pid_cachep = kmem_cache_create("pid",
66392476d7fSEric W. Biederman 			struct_size_t(struct pid, numbers, 1),
66474bd59bbSPavel Emelyanov 			__alignof__(struct pid),
6655d097056SVladimir Davydov 			SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT,
6661da177e4SLinus Torvalds 			NULL);
6678649c322SSargun Dhillon }
6688649c322SSargun Dhillon 
__pidfd_fget(struct task_struct * task,int fd)6698649c322SSargun Dhillon static struct file *__pidfd_fget(struct task_struct *task, int fd)
6708649c322SSargun Dhillon {
6718649c322SSargun Dhillon 	struct file *file;
6728649c322SSargun Dhillon 	int ret;
673f7cfd871SEric W. Biederman 
6748649c322SSargun Dhillon 	ret = down_read_killable(&task->signal->exec_update_lock);
6758649c322SSargun Dhillon 	if (ret)
6768649c322SSargun Dhillon 		return ERR_PTR(ret);
6778649c322SSargun Dhillon 
6788649c322SSargun Dhillon 	if (ptrace_may_access(task, PTRACE_MODE_ATTACH_REALCREDS))
6798649c322SSargun Dhillon 		file = fget_task(task, fd);
6808649c322SSargun Dhillon 	else
6818649c322SSargun Dhillon 		file = ERR_PTR(-EPERM);
682f7cfd871SEric W. Biederman 
6838649c322SSargun Dhillon 	up_read(&task->signal->exec_update_lock);
6848649c322SSargun Dhillon 
6858649c322SSargun Dhillon 	return file ?: ERR_PTR(-EBADF);
6868649c322SSargun Dhillon }
6878649c322SSargun Dhillon 
pidfd_getfd(struct pid * pid,int fd)6888649c322SSargun Dhillon static int pidfd_getfd(struct pid *pid, int fd)
6898649c322SSargun Dhillon {
6908649c322SSargun Dhillon 	struct task_struct *task;
6918649c322SSargun Dhillon 	struct file *file;
6928649c322SSargun Dhillon 	int ret;
6938649c322SSargun Dhillon 
6948649c322SSargun Dhillon 	task = get_pid_task(pid, PIDTYPE_PID);
6958649c322SSargun Dhillon 	if (!task)
6968649c322SSargun Dhillon 		return -ESRCH;
6978649c322SSargun Dhillon 
6988649c322SSargun Dhillon 	file = __pidfd_fget(task, fd);
6998649c322SSargun Dhillon 	put_task_struct(task);
7008649c322SSargun Dhillon 	if (IS_ERR(file))
7018649c322SSargun Dhillon 		return PTR_ERR(file);
702910d2f16SKees Cook 
7038649c322SSargun Dhillon 	ret = receive_fd(file, O_CLOEXEC);
7048649c322SSargun Dhillon 	fput(file);
7058649c322SSargun Dhillon 
7068649c322SSargun Dhillon 	return ret;
7078649c322SSargun Dhillon }
7088649c322SSargun Dhillon 
7098649c322SSargun Dhillon /**
7108649c322SSargun Dhillon  * sys_pidfd_getfd() - Get a file descriptor from another process
7118649c322SSargun Dhillon  *
7128649c322SSargun Dhillon  * @pidfd:	the pidfd file descriptor of the process
7138649c322SSargun Dhillon  * @fd:		the file descriptor number to get
7148649c322SSargun Dhillon  * @flags:	flags on how to get the fd (reserved)
7158649c322SSargun Dhillon  *
7168649c322SSargun Dhillon  * This syscall gets a copy of a file descriptor from another process
7178649c322SSargun Dhillon  * based on the pidfd, and file descriptor number. It requires that
7188649c322SSargun Dhillon  * the calling process has the ability to ptrace the process represented
7198649c322SSargun Dhillon  * by the pidfd. The process which is having its file descriptor copied
7208649c322SSargun Dhillon  * is otherwise unaffected.
7218649c322SSargun Dhillon  *
7228649c322SSargun Dhillon  * Return: On success, a cloexec file descriptor is returned.
7238649c322SSargun Dhillon  *         On error, a negative errno number will be returned.
7248649c322SSargun Dhillon  */
SYSCALL_DEFINE3(pidfd_getfd,int,pidfd,int,fd,unsigned int,flags)7258649c322SSargun Dhillon SYSCALL_DEFINE3(pidfd_getfd, int, pidfd, int, fd,
7268649c322SSargun Dhillon 		unsigned int, flags)
7278649c322SSargun Dhillon {
7288649c322SSargun Dhillon 	struct pid *pid;
7298649c322SSargun Dhillon 	struct fd f;
7308649c322SSargun Dhillon 	int ret;
7318649c322SSargun Dhillon 
7328649c322SSargun Dhillon 	/* flags is currently unused - make sure it's unset */
7338649c322SSargun Dhillon 	if (flags)
7348649c322SSargun Dhillon 		return -EINVAL;
7358649c322SSargun Dhillon 
7368649c322SSargun Dhillon 	f = fdget(pidfd);
7378649c322SSargun Dhillon 	if (!f.file)
7388649c322SSargun Dhillon 		return -EBADF;
7398649c322SSargun Dhillon 
7408649c322SSargun Dhillon 	pid = pidfd_pid(f.file);
7418649c322SSargun Dhillon 	if (IS_ERR(pid))
7428649c322SSargun Dhillon 		ret = PTR_ERR(pid);
7438649c322SSargun Dhillon 	else
7448649c322SSargun Dhillon 		ret = pidfd_getfd(pid, fd);
7458649c322SSargun Dhillon 
7468649c322SSargun Dhillon 	fdput(f);
7478649c322SSargun Dhillon 	return ret;
748 }
749