xref: /openbmc/linux/security/commoncap.c (revision c900529f3d9161bfde5cca0754f83b4d3c3e0220)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
23e1c2515SJames Morris /* Common capabilities, needed by capability.o.
31da177e4SLinus Torvalds  */
41da177e4SLinus Torvalds 
5c59ede7bSRandy.Dunlap #include <linux/capability.h>
63fc689e9SEric Paris #include <linux/audit.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/kernel.h>
9b1d9e6b0SCasey Schaufler #include <linux/lsm_hooks.h>
101da177e4SLinus Torvalds #include <linux/file.h>
111da177e4SLinus Torvalds #include <linux/mm.h>
121da177e4SLinus Torvalds #include <linux/mman.h>
131da177e4SLinus Torvalds #include <linux/pagemap.h>
141da177e4SLinus Torvalds #include <linux/swap.h>
151da177e4SLinus Torvalds #include <linux/skbuff.h>
161da177e4SLinus Torvalds #include <linux/netlink.h>
171da177e4SLinus Torvalds #include <linux/ptrace.h>
181da177e4SLinus Torvalds #include <linux/xattr.h>
191da177e4SLinus Torvalds #include <linux/hugetlb.h>
20b5376771SSerge E. Hallyn #include <linux/mount.h>
21b460cbc5SSerge E. Hallyn #include <linux/sched.h>
223898b1b4SAndrew G. Morgan #include <linux/prctl.h>
233898b1b4SAndrew G. Morgan #include <linux/securebits.h>
243486740aSSerge E. Hallyn #include <linux/user_namespace.h>
2540401530SAl Viro #include <linux/binfmts.h>
2651b79beeSJonghwan Choi #include <linux/personality.h>
27a793d79eSChristian Brauner #include <linux/mnt_idmapping.h>
2872c2d582SAndrew Morgan 
29b5f22a59SSerge E. Hallyn /*
30b5f22a59SSerge E. Hallyn  * If a non-root user executes a setuid-root binary in
31b5f22a59SSerge E. Hallyn  * !secure(SECURE_NOROOT) mode, then we raise capabilities.
32b5f22a59SSerge E. Hallyn  * However if fE is also set, then the intent is for only
33b5f22a59SSerge E. Hallyn  * the file capabilities to be applied, and the setuid-root
34b5f22a59SSerge E. Hallyn  * bit is left on either to change the uid (plausible) or
35b5f22a59SSerge E. Hallyn  * to get full privilege on a kernel without file capabilities
36b5f22a59SSerge E. Hallyn  * support.  So in that case we do not raise capabilities.
37b5f22a59SSerge E. Hallyn  *
38b5f22a59SSerge E. Hallyn  * Warn if that happens, once per boot.
39b5f22a59SSerge E. Hallyn  */
warn_setuid_and_fcaps_mixed(const char * fname)40d7627467SDavid Howells static void warn_setuid_and_fcaps_mixed(const char *fname)
41b5f22a59SSerge E. Hallyn {
42b5f22a59SSerge E. Hallyn 	static int warned;
43b5f22a59SSerge E. Hallyn 	if (!warned) {
44b5f22a59SSerge E. Hallyn 		printk(KERN_INFO "warning: `%s' has both setuid-root and"
45b5f22a59SSerge E. Hallyn 			" effective capabilities. Therefore not raising all"
46b5f22a59SSerge E. Hallyn 			" capabilities.\n", fname);
47b5f22a59SSerge E. Hallyn 		warned = 1;
48b5f22a59SSerge E. Hallyn 	}
49b5f22a59SSerge E. Hallyn }
50b5f22a59SSerge E. Hallyn 
511d045980SDavid Howells /**
521d045980SDavid Howells  * cap_capable - Determine whether a task has a particular effective capability
533699c53cSDavid Howells  * @cred: The credentials to use
54049ae601SRandy Dunlap  * @targ_ns:  The user namespace in which we need the capability
551d045980SDavid Howells  * @cap: The capability to check for
56e88ed488SMicah Morton  * @opts: Bitmask of options defined in include/linux/security.h
571d045980SDavid Howells  *
581d045980SDavid Howells  * Determine whether the nominated task has the specified capability amongst
591d045980SDavid Howells  * its effective set, returning 0 if it does, -ve if it does not.
601d045980SDavid Howells  *
613699c53cSDavid Howells  * NOTE WELL: cap_has_capability() cannot be used like the kernel's capable()
623699c53cSDavid Howells  * and has_capability() functions.  That is, it has the reverse semantics:
633699c53cSDavid Howells  * cap_has_capability() returns 0 when a task has a capability, but the
643699c53cSDavid Howells  * kernel's capable() and has_capability() returns 1 for this case.
65a6dbb1efSAndrew G. Morgan  */
cap_capable(const struct cred * cred,struct user_namespace * targ_ns,int cap,unsigned int opts)666a9de491SEric Paris int cap_capable(const struct cred *cred, struct user_namespace *targ_ns,
67c1a85a00SMicah Morton 		int cap, unsigned int opts)
681da177e4SLinus Torvalds {
69520d9eabSEric W. Biederman 	struct user_namespace *ns = targ_ns;
703486740aSSerge E. Hallyn 
71520d9eabSEric W. Biederman 	/* See if cred has the capability in the target user namespace
72520d9eabSEric W. Biederman 	 * by examining the target user namespace and all of the target
73520d9eabSEric W. Biederman 	 * user namespace's parents.
74520d9eabSEric W. Biederman 	 */
75520d9eabSEric W. Biederman 	for (;;) {
763486740aSSerge E. Hallyn 		/* Do we have the necessary capabilities? */
77520d9eabSEric W. Biederman 		if (ns == cred->user_ns)
783699c53cSDavid Howells 			return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
793486740aSSerge E. Hallyn 
8064db4c7fSKirill Tkhai 		/*
8164db4c7fSKirill Tkhai 		 * If we're already at a lower level than we're looking for,
8264db4c7fSKirill Tkhai 		 * we're done searching.
8364db4c7fSKirill Tkhai 		 */
8464db4c7fSKirill Tkhai 		if (ns->level <= cred->user_ns->level)
853486740aSSerge E. Hallyn 			return -EPERM;
863486740aSSerge E. Hallyn 
873486740aSSerge E. Hallyn 		/*
88520d9eabSEric W. Biederman 		 * The owner of the user namespace in the parent of the
89520d9eabSEric W. Biederman 		 * user namespace has all caps.
90520d9eabSEric W. Biederman 		 */
91520d9eabSEric W. Biederman 		if ((ns->parent == cred->user_ns) && uid_eq(ns->owner, cred->euid))
92520d9eabSEric W. Biederman 			return 0;
93520d9eabSEric W. Biederman 
94520d9eabSEric W. Biederman 		/*
953486740aSSerge E. Hallyn 		 * If you have a capability in a parent user ns, then you have
963486740aSSerge E. Hallyn 		 * it over all children user namespaces as well.
973486740aSSerge E. Hallyn 		 */
98520d9eabSEric W. Biederman 		ns = ns->parent;
993486740aSSerge E. Hallyn 	}
1003486740aSSerge E. Hallyn 
1013486740aSSerge E. Hallyn 	/* We never get here */
1021da177e4SLinus Torvalds }
1031da177e4SLinus Torvalds 
1041d045980SDavid Howells /**
1051d045980SDavid Howells  * cap_settime - Determine whether the current process may set the system clock
1061d045980SDavid Howells  * @ts: The time to set
1071d045980SDavid Howells  * @tz: The timezone to set
1081d045980SDavid Howells  *
1091d045980SDavid Howells  * Determine whether the current process may set the system clock and timezone
1101d045980SDavid Howells  * information, returning 0 if permission granted, -ve if denied.
1111d045980SDavid Howells  */
cap_settime(const struct timespec64 * ts,const struct timezone * tz)112457db29bSBaolin Wang int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
1131da177e4SLinus Torvalds {
1141da177e4SLinus Torvalds 	if (!capable(CAP_SYS_TIME))
1151da177e4SLinus Torvalds 		return -EPERM;
1161da177e4SLinus Torvalds 	return 0;
1171da177e4SLinus Torvalds }
1181da177e4SLinus Torvalds 
1191d045980SDavid Howells /**
1209e48858fSIngo Molnar  * cap_ptrace_access_check - Determine whether the current process may access
1211d045980SDavid Howells  *			   another
1221d045980SDavid Howells  * @child: The process to be accessed
1231d045980SDavid Howells  * @mode: The mode of attachment.
1241d045980SDavid Howells  *
1258409cca7SSerge E. Hallyn  * If we are in the same or an ancestor user_ns and have all the target
1268409cca7SSerge E. Hallyn  * task's capabilities, then ptrace access is allowed.
1278409cca7SSerge E. Hallyn  * If we have the ptrace capability to the target user_ns, then ptrace
1288409cca7SSerge E. Hallyn  * access is allowed.
1298409cca7SSerge E. Hallyn  * Else denied.
1308409cca7SSerge E. Hallyn  *
1311d045980SDavid Howells  * Determine whether a process may access another, returning 0 if permission
1321d045980SDavid Howells  * granted, -ve if denied.
1331d045980SDavid Howells  */
cap_ptrace_access_check(struct task_struct * child,unsigned int mode)1349e48858fSIngo Molnar int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
1351da177e4SLinus Torvalds {
136c69e8d9cSDavid Howells 	int ret = 0;
1378409cca7SSerge E. Hallyn 	const struct cred *cred, *child_cred;
138caaee623SJann Horn 	const kernel_cap_t *caller_caps;
139c69e8d9cSDavid Howells 
140c69e8d9cSDavid Howells 	rcu_read_lock();
1418409cca7SSerge E. Hallyn 	cred = current_cred();
1428409cca7SSerge E. Hallyn 	child_cred = __task_cred(child);
143caaee623SJann Horn 	if (mode & PTRACE_MODE_FSCREDS)
144caaee623SJann Horn 		caller_caps = &cred->cap_effective;
145caaee623SJann Horn 	else
146caaee623SJann Horn 		caller_caps = &cred->cap_permitted;
147c4a4d603SEric W. Biederman 	if (cred->user_ns == child_cred->user_ns &&
148caaee623SJann Horn 	    cap_issubset(child_cred->cap_permitted, *caller_caps))
1498409cca7SSerge E. Hallyn 		goto out;
150c4a4d603SEric W. Biederman 	if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
1518409cca7SSerge E. Hallyn 		goto out;
152c69e8d9cSDavid Howells 	ret = -EPERM;
1538409cca7SSerge E. Hallyn out:
154c69e8d9cSDavid Howells 	rcu_read_unlock();
155c69e8d9cSDavid Howells 	return ret;
1565cd9c58fSDavid Howells }
1575cd9c58fSDavid Howells 
1581d045980SDavid Howells /**
1591d045980SDavid Howells  * cap_ptrace_traceme - Determine whether another process may trace the current
1601d045980SDavid Howells  * @parent: The task proposed to be the tracer
1611d045980SDavid Howells  *
1628409cca7SSerge E. Hallyn  * If parent is in the same or an ancestor user_ns and has all current's
1638409cca7SSerge E. Hallyn  * capabilities, then ptrace access is allowed.
1648409cca7SSerge E. Hallyn  * If parent has the ptrace capability to current's user_ns, then ptrace
1658409cca7SSerge E. Hallyn  * access is allowed.
1668409cca7SSerge E. Hallyn  * Else denied.
1678409cca7SSerge E. Hallyn  *
1681d045980SDavid Howells  * Determine whether the nominated task is permitted to trace the current
1691d045980SDavid Howells  * process, returning 0 if permission is granted, -ve if denied.
1701d045980SDavid Howells  */
cap_ptrace_traceme(struct task_struct * parent)1715cd9c58fSDavid Howells int cap_ptrace_traceme(struct task_struct *parent)
1725cd9c58fSDavid Howells {
173c69e8d9cSDavid Howells 	int ret = 0;
1748409cca7SSerge E. Hallyn 	const struct cred *cred, *child_cred;
175c69e8d9cSDavid Howells 
176c69e8d9cSDavid Howells 	rcu_read_lock();
1778409cca7SSerge E. Hallyn 	cred = __task_cred(parent);
1788409cca7SSerge E. Hallyn 	child_cred = current_cred();
179c4a4d603SEric W. Biederman 	if (cred->user_ns == child_cred->user_ns &&
1808409cca7SSerge E. Hallyn 	    cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
1818409cca7SSerge E. Hallyn 		goto out;
182c4a4d603SEric W. Biederman 	if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
1838409cca7SSerge E. Hallyn 		goto out;
184c69e8d9cSDavid Howells 	ret = -EPERM;
1858409cca7SSerge E. Hallyn out:
186c69e8d9cSDavid Howells 	rcu_read_unlock();
187c69e8d9cSDavid Howells 	return ret;
1881da177e4SLinus Torvalds }
1891da177e4SLinus Torvalds 
1901d045980SDavid Howells /**
1911d045980SDavid Howells  * cap_capget - Retrieve a task's capability sets
1921d045980SDavid Howells  * @target: The task from which to retrieve the capability sets
1931d045980SDavid Howells  * @effective: The place to record the effective set
1941d045980SDavid Howells  * @inheritable: The place to record the inheritable set
1951d045980SDavid Howells  * @permitted: The place to record the permitted set
1961d045980SDavid Howells  *
1971d045980SDavid Howells  * This function retrieves the capabilities of the nominated task and returns
1981d045980SDavid Howells  * them to the caller.
1991d045980SDavid Howells  */
cap_capget(const struct task_struct * target,kernel_cap_t * effective,kernel_cap_t * inheritable,kernel_cap_t * permitted)200*6672efbbSKhadija Kamran int cap_capget(const struct task_struct *target, kernel_cap_t *effective,
2011da177e4SLinus Torvalds 	       kernel_cap_t *inheritable, kernel_cap_t *permitted)
2021da177e4SLinus Torvalds {
203c69e8d9cSDavid Howells 	const struct cred *cred;
204b6dff3ecSDavid Howells 
2051da177e4SLinus Torvalds 	/* Derived from kernel/capability.c:sys_capget. */
206c69e8d9cSDavid Howells 	rcu_read_lock();
207c69e8d9cSDavid Howells 	cred = __task_cred(target);
208b6dff3ecSDavid Howells 	*effective   = cred->cap_effective;
209b6dff3ecSDavid Howells 	*inheritable = cred->cap_inheritable;
210b6dff3ecSDavid Howells 	*permitted   = cred->cap_permitted;
211c69e8d9cSDavid Howells 	rcu_read_unlock();
2121da177e4SLinus Torvalds 	return 0;
2131da177e4SLinus Torvalds }
2141da177e4SLinus Torvalds 
2151d045980SDavid Howells /*
2161d045980SDavid Howells  * Determine whether the inheritable capabilities are limited to the old
2171d045980SDavid Howells  * permitted set.  Returns 1 if they are limited, 0 if they are not.
2181d045980SDavid Howells  */
cap_inh_is_capped(void)21972c2d582SAndrew Morgan static inline int cap_inh_is_capped(void)
22072c2d582SAndrew Morgan {
2211d045980SDavid Howells 	/* they are so limited unless the current task has the CAP_SETPCAP
2221d045980SDavid Howells 	 * capability
22372c2d582SAndrew Morgan 	 */
224c4a4d603SEric W. Biederman 	if (cap_capable(current_cred(), current_cred()->user_ns,
225c1a85a00SMicah Morton 			CAP_SETPCAP, CAP_OPT_NONE) == 0)
2261d045980SDavid Howells 		return 0;
2271d045980SDavid Howells 	return 1;
22872c2d582SAndrew Morgan }
22972c2d582SAndrew Morgan 
2301d045980SDavid Howells /**
2311d045980SDavid Howells  * cap_capset - Validate and apply proposed changes to current's capabilities
2321d045980SDavid Howells  * @new: The proposed new credentials; alterations should be made here
2331d045980SDavid Howells  * @old: The current task's current credentials
2341d045980SDavid Howells  * @effective: A pointer to the proposed new effective capabilities set
2351d045980SDavid Howells  * @inheritable: A pointer to the proposed new inheritable capabilities set
2361d045980SDavid Howells  * @permitted: A pointer to the proposed new permitted capabilities set
2371d045980SDavid Howells  *
2381d045980SDavid Howells  * This function validates and applies a proposed mass change to the current
2391d045980SDavid Howells  * process's capability sets.  The changes are made to the proposed new
2401d045980SDavid Howells  * credentials, and assuming no error, will be committed by the caller of LSM.
2411d045980SDavid Howells  */
cap_capset(struct cred * new,const struct cred * old,const kernel_cap_t * effective,const kernel_cap_t * inheritable,const kernel_cap_t * permitted)242d84f4f99SDavid Howells int cap_capset(struct cred *new,
243d84f4f99SDavid Howells 	       const struct cred *old,
244d84f4f99SDavid Howells 	       const kernel_cap_t *effective,
24515a2460eSDavid Howells 	       const kernel_cap_t *inheritable,
24615a2460eSDavid Howells 	       const kernel_cap_t *permitted)
2471da177e4SLinus Torvalds {
248d84f4f99SDavid Howells 	if (cap_inh_is_capped() &&
249d84f4f99SDavid Howells 	    !cap_issubset(*inheritable,
250d84f4f99SDavid Howells 			  cap_combine(old->cap_inheritable,
251d84f4f99SDavid Howells 				      old->cap_permitted)))
25272c2d582SAndrew Morgan 		/* incapable of using this inheritable set */
2531da177e4SLinus Torvalds 		return -EPERM;
254d84f4f99SDavid Howells 
2553b7391deSSerge E. Hallyn 	if (!cap_issubset(*inheritable,
256d84f4f99SDavid Howells 			  cap_combine(old->cap_inheritable,
257d84f4f99SDavid Howells 				      old->cap_bset)))
2583b7391deSSerge E. Hallyn 		/* no new pI capabilities outside bounding set */
2593b7391deSSerge E. Hallyn 		return -EPERM;
2601da177e4SLinus Torvalds 
2611da177e4SLinus Torvalds 	/* verify restrictions on target's new Permitted set */
262d84f4f99SDavid Howells 	if (!cap_issubset(*permitted, old->cap_permitted))
2631da177e4SLinus Torvalds 		return -EPERM;
2641da177e4SLinus Torvalds 
2651da177e4SLinus Torvalds 	/* verify the _new_Effective_ is a subset of the _new_Permitted_ */
266d84f4f99SDavid Howells 	if (!cap_issubset(*effective, *permitted))
2671da177e4SLinus Torvalds 		return -EPERM;
2681da177e4SLinus Torvalds 
269d84f4f99SDavid Howells 	new->cap_effective   = *effective;
270d84f4f99SDavid Howells 	new->cap_inheritable = *inheritable;
271d84f4f99SDavid Howells 	new->cap_permitted   = *permitted;
27258319057SAndy Lutomirski 
27358319057SAndy Lutomirski 	/*
27458319057SAndy Lutomirski 	 * Mask off ambient bits that are no longer both permitted and
27558319057SAndy Lutomirski 	 * inheritable.
27658319057SAndy Lutomirski 	 */
27758319057SAndy Lutomirski 	new->cap_ambient = cap_intersect(new->cap_ambient,
27858319057SAndy Lutomirski 					 cap_intersect(*permitted,
27958319057SAndy Lutomirski 						       *inheritable));
28058319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(new)))
28158319057SAndy Lutomirski 		return -EINVAL;
2821da177e4SLinus Torvalds 	return 0;
2831da177e4SLinus Torvalds }
2841da177e4SLinus Torvalds 
2851d045980SDavid Howells /**
2861d045980SDavid Howells  * cap_inode_need_killpriv - Determine if inode change affects privileges
2871d045980SDavid Howells  * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
2881d045980SDavid Howells  *
2891d045980SDavid Howells  * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
2901d045980SDavid Howells  * affects the security markings on that inode, and if it is, should
291ab5348c9SStefan Berger  * inode_killpriv() be invoked or the change rejected.
2921d045980SDavid Howells  *
293049ae601SRandy Dunlap  * Return: 1 if security.capability has a value, meaning inode_killpriv()
294ab5348c9SStefan Berger  * is required, 0 otherwise, meaning inode_killpriv() is not required.
2951d045980SDavid Howells  */
cap_inode_need_killpriv(struct dentry * dentry)296b5376771SSerge E. Hallyn int cap_inode_need_killpriv(struct dentry *dentry)
297b5376771SSerge E. Hallyn {
298c6f493d6SDavid Howells 	struct inode *inode = d_backing_inode(dentry);
299b5376771SSerge E. Hallyn 	int error;
300b5376771SSerge E. Hallyn 
3015d6c3191SAndreas Gruenbacher 	error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
3025d6c3191SAndreas Gruenbacher 	return error > 0;
303b5376771SSerge E. Hallyn }
304b5376771SSerge E. Hallyn 
3051d045980SDavid Howells /**
3061d045980SDavid Howells  * cap_inode_killpriv - Erase the security markings on an inode
30771bc356fSChristian Brauner  *
30839f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
3091d045980SDavid Howells  * @dentry:	The inode/dentry to alter
3101d045980SDavid Howells  *
3111d045980SDavid Howells  * Erase the privilege-enhancing security markings on an inode.
3121d045980SDavid Howells  *
31339f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
31439f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
31539f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
31671bc356fSChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
3174432b507SPaul Moore  * performed on the raw inode simply pass @nop_mnt_idmap.
31871bc356fSChristian Brauner  *
319049ae601SRandy Dunlap  * Return: 0 if successful, -ve on error.
3201d045980SDavid Howells  */
cap_inode_killpriv(struct mnt_idmap * idmap,struct dentry * dentry)32139f60c1cSChristian Brauner int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
322b5376771SSerge E. Hallyn {
3235d6c3191SAndreas Gruenbacher 	int error;
324b5376771SSerge E. Hallyn 
32539f60c1cSChristian Brauner 	error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);
3265d6c3191SAndreas Gruenbacher 	if (error == -EOPNOTSUPP)
3275d6c3191SAndreas Gruenbacher 		error = 0;
3285d6c3191SAndreas Gruenbacher 	return error;
329b5376771SSerge E. Hallyn }
330b5376771SSerge E. Hallyn 
rootid_owns_currentns(vfsuid_t rootvfsuid)331b7c9b675SChristian Brauner static bool rootid_owns_currentns(vfsuid_t rootvfsuid)
3328db6c34fSSerge E. Hallyn {
3338db6c34fSSerge E. Hallyn 	struct user_namespace *ns;
334b7c9b675SChristian Brauner 	kuid_t kroot;
3358db6c34fSSerge E. Hallyn 
336b7c9b675SChristian Brauner 	if (!vfsuid_valid(rootvfsuid))
3378db6c34fSSerge E. Hallyn 		return false;
3388db6c34fSSerge E. Hallyn 
339b7c9b675SChristian Brauner 	kroot = vfsuid_into_kuid(rootvfsuid);
3408db6c34fSSerge E. Hallyn 	for (ns = current_user_ns();; ns = ns->parent) {
3418db6c34fSSerge E. Hallyn 		if (from_kuid(ns, kroot) == 0)
3428db6c34fSSerge E. Hallyn 			return true;
3438db6c34fSSerge E. Hallyn 		if (ns == &init_user_ns)
3448db6c34fSSerge E. Hallyn 			break;
3458db6c34fSSerge E. Hallyn 	}
3468db6c34fSSerge E. Hallyn 
3478db6c34fSSerge E. Hallyn 	return false;
3488db6c34fSSerge E. Hallyn }
3498db6c34fSSerge E. Hallyn 
sansflags(__u32 m)3508db6c34fSSerge E. Hallyn static __u32 sansflags(__u32 m)
3518db6c34fSSerge E. Hallyn {
3528db6c34fSSerge E. Hallyn 	return m & ~VFS_CAP_FLAGS_EFFECTIVE;
3538db6c34fSSerge E. Hallyn }
3548db6c34fSSerge E. Hallyn 
is_v2header(int size,const struct vfs_cap_data * cap)355f6fbd8cbSPaul Moore static bool is_v2header(int size, const struct vfs_cap_data *cap)
3568db6c34fSSerge E. Hallyn {
3578db6c34fSSerge E. Hallyn 	if (size != XATTR_CAPS_SZ_2)
3588db6c34fSSerge E. Hallyn 		return false;
359dc32b5c3SEric Biggers 	return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
3608db6c34fSSerge E. Hallyn }
3618db6c34fSSerge E. Hallyn 
is_v3header(int size,const struct vfs_cap_data * cap)362f6fbd8cbSPaul Moore static bool is_v3header(int size, const struct vfs_cap_data *cap)
3638db6c34fSSerge E. Hallyn {
3648db6c34fSSerge E. Hallyn 	if (size != XATTR_CAPS_SZ_3)
3658db6c34fSSerge E. Hallyn 		return false;
366dc32b5c3SEric Biggers 	return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
3678db6c34fSSerge E. Hallyn }
3688db6c34fSSerge E. Hallyn 
3698db6c34fSSerge E. Hallyn /*
3708db6c34fSSerge E. Hallyn  * getsecurity: We are called for security.* before any attempt to read the
3718db6c34fSSerge E. Hallyn  * xattr from the inode itself.
3728db6c34fSSerge E. Hallyn  *
3738db6c34fSSerge E. Hallyn  * This gives us a chance to read the on-disk value and convert it.  If we
3748db6c34fSSerge E. Hallyn  * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
3758db6c34fSSerge E. Hallyn  *
3768db6c34fSSerge E. Hallyn  * Note we are not called by vfs_getxattr_alloc(), but that is only called
3778db6c34fSSerge E. Hallyn  * by the integrity subsystem, which really wants the unconverted values -
3788db6c34fSSerge E. Hallyn  * so that's good.
3798db6c34fSSerge E. Hallyn  */
cap_inode_getsecurity(struct mnt_idmap * idmap,struct inode * inode,const char * name,void ** buffer,bool alloc)3804609e1f1SChristian Brauner int cap_inode_getsecurity(struct mnt_idmap *idmap,
38171bc356fSChristian Brauner 			  struct inode *inode, const char *name, void **buffer,
3828db6c34fSSerge E. Hallyn 			  bool alloc)
3838db6c34fSSerge E. Hallyn {
384f6fbd8cbSPaul Moore 	int size;
3858db6c34fSSerge E. Hallyn 	kuid_t kroot;
386b7c9b675SChristian Brauner 	vfsuid_t vfsroot;
387f2b00be4SMiklos Szeredi 	u32 nsmagic, magic;
3888db6c34fSSerge E. Hallyn 	uid_t root, mappedroot;
3898db6c34fSSerge E. Hallyn 	char *tmpbuf = NULL;
3908db6c34fSSerge E. Hallyn 	struct vfs_cap_data *cap;
391f2b00be4SMiklos Szeredi 	struct vfs_ns_cap_data *nscap = NULL;
3928db6c34fSSerge E. Hallyn 	struct dentry *dentry;
3938db6c34fSSerge E. Hallyn 	struct user_namespace *fs_ns;
3948db6c34fSSerge E. Hallyn 
3958db6c34fSSerge E. Hallyn 	if (strcmp(name, "capability") != 0)
3968db6c34fSSerge E. Hallyn 		return -EOPNOTSUPP;
3978db6c34fSSerge E. Hallyn 
398355139a8SEddie.Horng 	dentry = d_find_any_alias(inode);
3998db6c34fSSerge E. Hallyn 	if (!dentry)
4008db6c34fSSerge E. Hallyn 		return -EINVAL;
4014609e1f1SChristian Brauner 	size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,
402f6fbd8cbSPaul Moore 				  sizeof(struct vfs_ns_cap_data), GFP_NOFS);
4038db6c34fSSerge E. Hallyn 	dput(dentry);
404f6fbd8cbSPaul Moore 	/* gcc11 complains if we don't check for !tmpbuf */
405f6fbd8cbSPaul Moore 	if (size < 0 || !tmpbuf)
4068cf0a1bcSGaosheng Cui 		goto out_free;
4078db6c34fSSerge E. Hallyn 
4088db6c34fSSerge E. Hallyn 	fs_ns = inode->i_sb->s_user_ns;
4098db6c34fSSerge E. Hallyn 	cap = (struct vfs_cap_data *) tmpbuf;
410f6fbd8cbSPaul Moore 	if (is_v2header(size, cap)) {
411f2b00be4SMiklos Szeredi 		root = 0;
412f6fbd8cbSPaul Moore 	} else if (is_v3header(size, cap)) {
4138db6c34fSSerge E. Hallyn 		nscap = (struct vfs_ns_cap_data *) tmpbuf;
4148db6c34fSSerge E. Hallyn 		root = le32_to_cpu(nscap->rootid);
415f2b00be4SMiklos Szeredi 	} else {
416f2b00be4SMiklos Szeredi 		size = -EINVAL;
417f2b00be4SMiklos Szeredi 		goto out_free;
418f2b00be4SMiklos Szeredi 	}
419f2b00be4SMiklos Szeredi 
4208db6c34fSSerge E. Hallyn 	kroot = make_kuid(fs_ns, root);
4218db6c34fSSerge E. Hallyn 
42271bc356fSChristian Brauner 	/* If this is an idmapped mount shift the kuid. */
4234d7ca409SChristian Brauner 	vfsroot = make_vfsuid(idmap, fs_ns, kroot);
42471bc356fSChristian Brauner 
4258db6c34fSSerge E. Hallyn 	/* If the root kuid maps to a valid uid in current ns, then return
4268db6c34fSSerge E. Hallyn 	 * this as a nscap. */
427b7c9b675SChristian Brauner 	mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot));
4288db6c34fSSerge E. Hallyn 	if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
429f2b00be4SMiklos Szeredi 		size = sizeof(struct vfs_ns_cap_data);
4308db6c34fSSerge E. Hallyn 		if (alloc) {
431f2b00be4SMiklos Szeredi 			if (!nscap) {
432f2b00be4SMiklos Szeredi 				/* v2 -> v3 conversion */
433f2b00be4SMiklos Szeredi 				nscap = kzalloc(size, GFP_ATOMIC);
434f2b00be4SMiklos Szeredi 				if (!nscap) {
435f2b00be4SMiklos Szeredi 					size = -ENOMEM;
436f2b00be4SMiklos Szeredi 					goto out_free;
437f2b00be4SMiklos Szeredi 				}
438f2b00be4SMiklos Szeredi 				nsmagic = VFS_CAP_REVISION_3;
439f2b00be4SMiklos Szeredi 				magic = le32_to_cpu(cap->magic_etc);
440f2b00be4SMiklos Szeredi 				if (magic & VFS_CAP_FLAGS_EFFECTIVE)
441f2b00be4SMiklos Szeredi 					nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
442f2b00be4SMiklos Szeredi 				memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
443f2b00be4SMiklos Szeredi 				nscap->magic_etc = cpu_to_le32(nsmagic);
444f2b00be4SMiklos Szeredi 			} else {
445f2b00be4SMiklos Szeredi 				/* use allocated v3 buffer */
446f2b00be4SMiklos Szeredi 				tmpbuf = NULL;
447f2b00be4SMiklos Szeredi 			}
4488db6c34fSSerge E. Hallyn 			nscap->rootid = cpu_to_le32(mappedroot);
449f2b00be4SMiklos Szeredi 			*buffer = nscap;
450f2b00be4SMiklos Szeredi 		}
451f2b00be4SMiklos Szeredi 		goto out_free;
4528db6c34fSSerge E. Hallyn 	}
4538db6c34fSSerge E. Hallyn 
454b7c9b675SChristian Brauner 	if (!rootid_owns_currentns(vfsroot)) {
455f2b00be4SMiklos Szeredi 		size = -EOVERFLOW;
456f2b00be4SMiklos Szeredi 		goto out_free;
4578db6c34fSSerge E. Hallyn 	}
4588db6c34fSSerge E. Hallyn 
4598db6c34fSSerge E. Hallyn 	/* This comes from a parent namespace.  Return as a v2 capability */
4608db6c34fSSerge E. Hallyn 	size = sizeof(struct vfs_cap_data);
4618db6c34fSSerge E. Hallyn 	if (alloc) {
462f2b00be4SMiklos Szeredi 		if (nscap) {
463f2b00be4SMiklos Szeredi 			/* v3 -> v2 conversion */
464f2b00be4SMiklos Szeredi 			cap = kzalloc(size, GFP_ATOMIC);
465f2b00be4SMiklos Szeredi 			if (!cap) {
466f2b00be4SMiklos Szeredi 				size = -ENOMEM;
467f2b00be4SMiklos Szeredi 				goto out_free;
468f2b00be4SMiklos Szeredi 			}
4698db6c34fSSerge E. Hallyn 			magic = VFS_CAP_REVISION_2;
4708db6c34fSSerge E. Hallyn 			nsmagic = le32_to_cpu(nscap->magic_etc);
4718db6c34fSSerge E. Hallyn 			if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
4728db6c34fSSerge E. Hallyn 				magic |= VFS_CAP_FLAGS_EFFECTIVE;
4738db6c34fSSerge E. Hallyn 			memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
4748db6c34fSSerge E. Hallyn 			cap->magic_etc = cpu_to_le32(magic);
4751f578172STetsuo Handa 		} else {
476f2b00be4SMiklos Szeredi 			/* use unconverted v2 */
477f2b00be4SMiklos Szeredi 			tmpbuf = NULL;
4788db6c34fSSerge E. Hallyn 		}
479f2b00be4SMiklos Szeredi 		*buffer = cap;
4808db6c34fSSerge E. Hallyn 	}
481f2b00be4SMiklos Szeredi out_free:
4828db6c34fSSerge E. Hallyn 	kfree(tmpbuf);
4838db6c34fSSerge E. Hallyn 	return size;
4848db6c34fSSerge E. Hallyn }
4858db6c34fSSerge E. Hallyn 
486e65ce2a5SChristian Brauner /**
487e65ce2a5SChristian Brauner  * rootid_from_xattr - translate root uid of vfs caps
488e65ce2a5SChristian Brauner  *
489e65ce2a5SChristian Brauner  * @value:	vfs caps value which may be modified by this function
490e65ce2a5SChristian Brauner  * @size:	size of @ivalue
491e65ce2a5SChristian Brauner  * @task_ns:	user namespace of the caller
492e65ce2a5SChristian Brauner  */
rootid_from_xattr(const void * value,size_t size,struct user_namespace * task_ns)493b7c9b675SChristian Brauner static vfsuid_t rootid_from_xattr(const void *value, size_t size,
494b7c9b675SChristian Brauner 				  struct user_namespace *task_ns)
4958db6c34fSSerge E. Hallyn {
4968db6c34fSSerge E. Hallyn 	const struct vfs_ns_cap_data *nscap = value;
4978db6c34fSSerge E. Hallyn 	uid_t rootid = 0;
4988db6c34fSSerge E. Hallyn 
4998db6c34fSSerge E. Hallyn 	if (size == XATTR_CAPS_SZ_3)
5008db6c34fSSerge E. Hallyn 		rootid = le32_to_cpu(nscap->rootid);
5018db6c34fSSerge E. Hallyn 
502b7c9b675SChristian Brauner 	return VFSUIDT_INIT(make_kuid(task_ns, rootid));
5038db6c34fSSerge E. Hallyn }
5048db6c34fSSerge E. Hallyn 
validheader(size_t size,const struct vfs_cap_data * cap)505dc32b5c3SEric Biggers static bool validheader(size_t size, const struct vfs_cap_data *cap)
5068db6c34fSSerge E. Hallyn {
507dc32b5c3SEric Biggers 	return is_v2header(size, cap) || is_v3header(size, cap);
5088db6c34fSSerge E. Hallyn }
5098db6c34fSSerge E. Hallyn 
510e65ce2a5SChristian Brauner /**
511e65ce2a5SChristian Brauner  * cap_convert_nscap - check vfs caps
512e65ce2a5SChristian Brauner  *
51339f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
514e65ce2a5SChristian Brauner  * @dentry:	used to retrieve inode to check permissions on
515e65ce2a5SChristian Brauner  * @ivalue:	vfs caps value which may be modified by this function
516e65ce2a5SChristian Brauner  * @size:	size of @ivalue
517e65ce2a5SChristian Brauner  *
5188db6c34fSSerge E. Hallyn  * User requested a write of security.capability.  If needed, update the
5198db6c34fSSerge E. Hallyn  * xattr to change from v2 to v3, or to fixup the v3 rootid.
5208db6c34fSSerge E. Hallyn  *
52139f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
52239f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
52339f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
524e65ce2a5SChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
5254432b507SPaul Moore  * performed on the raw inode simply pass @nop_mnt_idmap.
526e65ce2a5SChristian Brauner  *
527049ae601SRandy Dunlap  * Return: On success, return the new size; on error, return < 0.
5288db6c34fSSerge E. Hallyn  */
cap_convert_nscap(struct mnt_idmap * idmap,struct dentry * dentry,const void ** ivalue,size_t size)52939f60c1cSChristian Brauner int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
530e65ce2a5SChristian Brauner 		      const void **ivalue, size_t size)
5318db6c34fSSerge E. Hallyn {
5328db6c34fSSerge E. Hallyn 	struct vfs_ns_cap_data *nscap;
5338db6c34fSSerge E. Hallyn 	uid_t nsrootid;
5348db6c34fSSerge E. Hallyn 	const struct vfs_cap_data *cap = *ivalue;
5358db6c34fSSerge E. Hallyn 	__u32 magic, nsmagic;
5368db6c34fSSerge E. Hallyn 	struct inode *inode = d_backing_inode(dentry);
5378db6c34fSSerge E. Hallyn 	struct user_namespace *task_ns = current_user_ns(),
5383b0c2d3eSEric W. Biederman 		*fs_ns = inode->i_sb->s_user_ns;
5398db6c34fSSerge E. Hallyn 	kuid_t rootid;
540b7c9b675SChristian Brauner 	vfsuid_t vfsrootid;
5418db6c34fSSerge E. Hallyn 	size_t newsize;
5428db6c34fSSerge E. Hallyn 
5438db6c34fSSerge E. Hallyn 	if (!*ivalue)
5448db6c34fSSerge E. Hallyn 		return -EINVAL;
545dc32b5c3SEric Biggers 	if (!validheader(size, cap))
5468db6c34fSSerge E. Hallyn 		return -EINVAL;
5479452e93eSChristian Brauner 	if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
5488db6c34fSSerge E. Hallyn 		return -EPERM;
54939f60c1cSChristian Brauner 	if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
5508db6c34fSSerge E. Hallyn 		if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
5518db6c34fSSerge E. Hallyn 			/* user is privileged, just write the v2 */
5528db6c34fSSerge E. Hallyn 			return size;
5538db6c34fSSerge E. Hallyn 
554b7c9b675SChristian Brauner 	vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);
555b7c9b675SChristian Brauner 	if (!vfsuid_valid(vfsrootid))
556b7c9b675SChristian Brauner 		return -EINVAL;
557b7c9b675SChristian Brauner 
5584d7ca409SChristian Brauner 	rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
5598db6c34fSSerge E. Hallyn 	if (!uid_valid(rootid))
5608db6c34fSSerge E. Hallyn 		return -EINVAL;
5618db6c34fSSerge E. Hallyn 
5628db6c34fSSerge E. Hallyn 	nsrootid = from_kuid(fs_ns, rootid);
5638db6c34fSSerge E. Hallyn 	if (nsrootid == -1)
5648db6c34fSSerge E. Hallyn 		return -EINVAL;
5658db6c34fSSerge E. Hallyn 
5668db6c34fSSerge E. Hallyn 	newsize = sizeof(struct vfs_ns_cap_data);
5678db6c34fSSerge E. Hallyn 	nscap = kmalloc(newsize, GFP_ATOMIC);
5688db6c34fSSerge E. Hallyn 	if (!nscap)
5698db6c34fSSerge E. Hallyn 		return -ENOMEM;
5708db6c34fSSerge E. Hallyn 	nscap->rootid = cpu_to_le32(nsrootid);
5718db6c34fSSerge E. Hallyn 	nsmagic = VFS_CAP_REVISION_3;
5728db6c34fSSerge E. Hallyn 	magic = le32_to_cpu(cap->magic_etc);
5738db6c34fSSerge E. Hallyn 	if (magic & VFS_CAP_FLAGS_EFFECTIVE)
5748db6c34fSSerge E. Hallyn 		nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
5758db6c34fSSerge E. Hallyn 	nscap->magic_etc = cpu_to_le32(nsmagic);
5768db6c34fSSerge E. Hallyn 	memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
5778db6c34fSSerge E. Hallyn 
5788db6c34fSSerge E. Hallyn 	*ivalue = nscap;
5798db6c34fSSerge E. Hallyn 	return newsize;
5808db6c34fSSerge E. Hallyn }
5818db6c34fSSerge E. Hallyn 
5821d045980SDavid Howells /*
5831d045980SDavid Howells  * Calculate the new process capability sets from the capability sets attached
5841d045980SDavid Howells  * to a file.
5851d045980SDavid Howells  */
bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data * caps,struct linux_binprm * bprm,bool * effective,bool * has_fcap)586c0b00441SEric Paris static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
587a6f76f23SDavid Howells 					  struct linux_binprm *bprm,
5884d49f671SZhi Li 					  bool *effective,
589fc7eadf7SRichard Guy Briggs 					  bool *has_fcap)
590b5376771SSerge E. Hallyn {
591a6f76f23SDavid Howells 	struct cred *new = bprm->cred;
592c0b00441SEric Paris 	int ret = 0;
593c0b00441SEric Paris 
594c0b00441SEric Paris 	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
595a6f76f23SDavid Howells 		*effective = true;
596c0b00441SEric Paris 
5974d49f671SZhi Li 	if (caps->magic_etc & VFS_CAP_REVISION_MASK)
598fc7eadf7SRichard Guy Briggs 		*has_fcap = true;
5994d49f671SZhi Li 
600c0b00441SEric Paris 	/*
601c0b00441SEric Paris 	 * pP' = (X & fP) | (pI & fI)
60258319057SAndy Lutomirski 	 * The addition of pA' is handled later.
603c0b00441SEric Paris 	 */
604f122a08bSLinus Torvalds 	new->cap_permitted.val =
605f122a08bSLinus Torvalds 		(new->cap_bset.val & caps->permitted.val) |
606f122a08bSLinus Torvalds 		(new->cap_inheritable.val & caps->inheritable.val);
607c0b00441SEric Paris 
608f122a08bSLinus Torvalds 	if (caps->permitted.val & ~new->cap_permitted.val)
609a6f76f23SDavid Howells 		/* insufficient to execute correctly */
610c0b00441SEric Paris 		ret = -EPERM;
611c0b00441SEric Paris 
612c0b00441SEric Paris 	/*
613c0b00441SEric Paris 	 * For legacy apps, with no internal support for recognizing they
614c0b00441SEric Paris 	 * do not have enough capabilities, we return an error if they are
615c0b00441SEric Paris 	 * missing some "forced" (aka file-permitted) capabilities.
616c0b00441SEric Paris 	 */
617a6f76f23SDavid Howells 	return *effective ? ret : 0;
618c0b00441SEric Paris }
619c0b00441SEric Paris 
62071bc356fSChristian Brauner /**
62171bc356fSChristian Brauner  * get_vfs_caps_from_disk - retrieve vfs caps from disk
62271bc356fSChristian Brauner  *
62339f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
62471bc356fSChristian Brauner  * @dentry:	dentry from which @inode is retrieved
62571bc356fSChristian Brauner  * @cpu_caps:	vfs capabilities
62671bc356fSChristian Brauner  *
6271d045980SDavid Howells  * Extract the on-exec-apply capability sets for an executable file.
62871bc356fSChristian Brauner  *
62939f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
63039f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
63139f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
63271bc356fSChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
6334432b507SPaul Moore  * performed on the raw inode simply pass @nop_mnt_idmap.
6341d045980SDavid Howells  */
get_vfs_caps_from_disk(struct mnt_idmap * idmap,const struct dentry * dentry,struct cpu_vfs_cap_data * cpu_caps)63539f60c1cSChristian Brauner int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
63671bc356fSChristian Brauner 			   const struct dentry *dentry,
63771bc356fSChristian Brauner 			   struct cpu_vfs_cap_data *cpu_caps)
638c0b00441SEric Paris {
639c6f493d6SDavid Howells 	struct inode *inode = d_backing_inode(dentry);
640b5376771SSerge E. Hallyn 	__u32 magic_etc;
641c0b00441SEric Paris 	int size;
6428db6c34fSSerge E. Hallyn 	struct vfs_ns_cap_data data, *nscaps = &data;
6438db6c34fSSerge E. Hallyn 	struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
6448db6c34fSSerge E. Hallyn 	kuid_t rootkuid;
645b7c9b675SChristian Brauner 	vfsuid_t rootvfsuid;
64676ba89c7SColin Ian King 	struct user_namespace *fs_ns;
647c0b00441SEric Paris 
648c0b00441SEric Paris 	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
649c0b00441SEric Paris 
6505d6c3191SAndreas Gruenbacher 	if (!inode)
651c0b00441SEric Paris 		return -ENODATA;
652c0b00441SEric Paris 
65376ba89c7SColin Ian King 	fs_ns = inode->i_sb->s_user_ns;
6545d6c3191SAndreas Gruenbacher 	size = __vfs_getxattr((struct dentry *)dentry, inode,
6558db6c34fSSerge E. Hallyn 			      XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
656a6f76f23SDavid Howells 	if (size == -ENODATA || size == -EOPNOTSUPP)
657c0b00441SEric Paris 		/* no data, that's ok */
658c0b00441SEric Paris 		return -ENODATA;
6598db6c34fSSerge E. Hallyn 
660c0b00441SEric Paris 	if (size < 0)
661c0b00441SEric Paris 		return size;
662b5376771SSerge E. Hallyn 
663e338d263SAndrew Morgan 	if (size < sizeof(magic_etc))
664b5376771SSerge E. Hallyn 		return -EINVAL;
665b5376771SSerge E. Hallyn 
6668db6c34fSSerge E. Hallyn 	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
667b5376771SSerge E. Hallyn 
6688db6c34fSSerge E. Hallyn 	rootkuid = make_kuid(fs_ns, 0);
669a6f76f23SDavid Howells 	switch (magic_etc & VFS_CAP_REVISION_MASK) {
670e338d263SAndrew Morgan 	case VFS_CAP_REVISION_1:
671e338d263SAndrew Morgan 		if (size != XATTR_CAPS_SZ_1)
672e338d263SAndrew Morgan 			return -EINVAL;
673e338d263SAndrew Morgan 		break;
674e338d263SAndrew Morgan 	case VFS_CAP_REVISION_2:
675e338d263SAndrew Morgan 		if (size != XATTR_CAPS_SZ_2)
676e338d263SAndrew Morgan 			return -EINVAL;
677e338d263SAndrew Morgan 		break;
6788db6c34fSSerge E. Hallyn 	case VFS_CAP_REVISION_3:
6798db6c34fSSerge E. Hallyn 		if (size != XATTR_CAPS_SZ_3)
6808db6c34fSSerge E. Hallyn 			return -EINVAL;
6818db6c34fSSerge E. Hallyn 		rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
6828db6c34fSSerge E. Hallyn 		break;
6838db6c34fSSerge E. Hallyn 
684b5376771SSerge E. Hallyn 	default:
685b5376771SSerge E. Hallyn 		return -EINVAL;
686b5376771SSerge E. Hallyn 	}
687b7c9b675SChristian Brauner 
6884d7ca409SChristian Brauner 	rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
689b7c9b675SChristian Brauner 	if (!vfsuid_valid(rootvfsuid))
690b7c9b675SChristian Brauner 		return -ENODATA;
691b7c9b675SChristian Brauner 
6928db6c34fSSerge E. Hallyn 	/* Limit the caps to the mounter of the filesystem
6938db6c34fSSerge E. Hallyn 	 * or the more limited uid specified in the xattr.
6948db6c34fSSerge E. Hallyn 	 */
695b7c9b675SChristian Brauner 	if (!rootid_owns_currentns(rootvfsuid))
6968db6c34fSSerge E. Hallyn 		return -ENODATA;
697e338d263SAndrew Morgan 
698f122a08bSLinus Torvalds 	cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
699f122a08bSLinus Torvalds 	cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
700f122a08bSLinus Torvalds 
701f122a08bSLinus Torvalds 	/*
702f122a08bSLinus Torvalds 	 * Rev1 had just a single 32-bit word, later expanded
703f122a08bSLinus Torvalds 	 * to a second one for the high bits
704f122a08bSLinus Torvalds 	 */
705f122a08bSLinus Torvalds 	if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
706f122a08bSLinus Torvalds 		cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
707f122a08bSLinus Torvalds 		cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
708e338d263SAndrew Morgan 	}
709a6f76f23SDavid Howells 
710f122a08bSLinus Torvalds 	cpu_caps->permitted.val &= CAP_VALID_MASK;
711f122a08bSLinus Torvalds 	cpu_caps->inheritable.val &= CAP_VALID_MASK;
7127d8b6c63SEric Paris 
713b7c9b675SChristian Brauner 	cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);
7142fec30e2SRichard Guy Briggs 
715c0b00441SEric Paris 	return 0;
716b5376771SSerge E. Hallyn }
717b5376771SSerge E. Hallyn 
7181d045980SDavid Howells /*
7191d045980SDavid Howells  * Attempt to get the on-exec apply capability sets for an executable file from
7201d045980SDavid Howells  * its xattrs and, if present, apply them to the proposed credentials being
7211d045980SDavid Howells  * constructed by execve().
7221d045980SDavid Howells  */
get_file_caps(struct linux_binprm * bprm,struct file * file,bool * effective,bool * has_fcap)72356305aa9SEric W. Biederman static int get_file_caps(struct linux_binprm *bprm, struct file *file,
72456305aa9SEric W. Biederman 			 bool *effective, bool *has_fcap)
725b5376771SSerge E. Hallyn {
726b5376771SSerge E. Hallyn 	int rc = 0;
727c0b00441SEric Paris 	struct cpu_vfs_cap_data vcaps;
728b5376771SSerge E. Hallyn 
729ee67ae7eSKees Cook 	cap_clear(bprm->cred->cap_permitted);
7303318a386SSerge Hallyn 
7311f29fae2SSerge E. Hallyn 	if (!file_caps_enabled)
7321f29fae2SSerge E. Hallyn 		return 0;
7331f29fae2SSerge E. Hallyn 
73456305aa9SEric W. Biederman 	if (!mnt_may_suid(file->f_path.mnt))
735b5376771SSerge E. Hallyn 		return 0;
736380cf5baSAndy Lutomirski 
737380cf5baSAndy Lutomirski 	/*
738380cf5baSAndy Lutomirski 	 * This check is redundant with mnt_may_suid() but is kept to make
739380cf5baSAndy Lutomirski 	 * explicit that capability bits are limited to s_user_ns and its
740380cf5baSAndy Lutomirski 	 * descendants.
741380cf5baSAndy Lutomirski 	 */
74256305aa9SEric W. Biederman 	if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
743d07b846fSSeth Forshee 		return 0;
744b5376771SSerge E. Hallyn 
74539f60c1cSChristian Brauner 	rc = get_vfs_caps_from_disk(file_mnt_idmap(file),
74671bc356fSChristian Brauner 				    file->f_path.dentry, &vcaps);
747c0b00441SEric Paris 	if (rc < 0) {
748c0b00441SEric Paris 		if (rc == -EINVAL)
7498db6c34fSSerge E. Hallyn 			printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
7508db6c34fSSerge E. Hallyn 					bprm->filename);
751c0b00441SEric Paris 		else if (rc == -ENODATA)
752b5376771SSerge E. Hallyn 			rc = 0;
753b5376771SSerge E. Hallyn 		goto out;
754b5376771SSerge E. Hallyn 	}
755b5376771SSerge E. Hallyn 
756fc7eadf7SRichard Guy Briggs 	rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
757b5376771SSerge E. Hallyn 
758b5376771SSerge E. Hallyn out:
759b5376771SSerge E. Hallyn 	if (rc)
760ee67ae7eSKees Cook 		cap_clear(bprm->cred->cap_permitted);
761b5376771SSerge E. Hallyn 
762b5376771SSerge E. Hallyn 	return rc;
763b5376771SSerge E. Hallyn }
764b5376771SSerge E. Hallyn 
root_privileged(void)7659304b46cSRichard Guy Briggs static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
7669304b46cSRichard Guy Briggs 
__is_real(kuid_t uid,struct cred * cred)76781a6a012SRichard Guy Briggs static inline bool __is_real(kuid_t uid, struct cred *cred)
76881a6a012SRichard Guy Briggs { return uid_eq(cred->uid, uid); }
76981a6a012SRichard Guy Briggs 
__is_eff(kuid_t uid,struct cred * cred)77081a6a012SRichard Guy Briggs static inline bool __is_eff(kuid_t uid, struct cred *cred)
77181a6a012SRichard Guy Briggs { return uid_eq(cred->euid, uid); }
77281a6a012SRichard Guy Briggs 
__is_suid(kuid_t uid,struct cred * cred)77381a6a012SRichard Guy Briggs static inline bool __is_suid(kuid_t uid, struct cred *cred)
77481a6a012SRichard Guy Briggs { return !__is_real(uid, cred) && __is_eff(uid, cred); }
77581a6a012SRichard Guy Briggs 
776db1a8922SRichard Guy Briggs /*
777db1a8922SRichard Guy Briggs  * handle_privileged_root - Handle case of privileged root
778db1a8922SRichard Guy Briggs  * @bprm: The execution parameters, including the proposed creds
779db1a8922SRichard Guy Briggs  * @has_fcap: Are any file capabilities set?
780db1a8922SRichard Guy Briggs  * @effective: Do we have effective root privilege?
781db1a8922SRichard Guy Briggs  * @root_uid: This namespace' root UID WRT initial USER namespace
782db1a8922SRichard Guy Briggs  *
783db1a8922SRichard Guy Briggs  * Handle the case where root is privileged and hasn't been neutered by
784db1a8922SRichard Guy Briggs  * SECURE_NOROOT.  If file capabilities are set, they won't be combined with
785db1a8922SRichard Guy Briggs  * set UID root and nothing is changed.  If we are root, cap_permitted is
786db1a8922SRichard Guy Briggs  * updated.  If we have become set UID root, the effective bit is set.
787db1a8922SRichard Guy Briggs  */
handle_privileged_root(struct linux_binprm * bprm,bool has_fcap,bool * effective,kuid_t root_uid)788fc7eadf7SRichard Guy Briggs static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
789db1a8922SRichard Guy Briggs 				   bool *effective, kuid_t root_uid)
790db1a8922SRichard Guy Briggs {
791db1a8922SRichard Guy Briggs 	const struct cred *old = current_cred();
792db1a8922SRichard Guy Briggs 	struct cred *new = bprm->cred;
793db1a8922SRichard Guy Briggs 
7949304b46cSRichard Guy Briggs 	if (!root_privileged())
795db1a8922SRichard Guy Briggs 		return;
796db1a8922SRichard Guy Briggs 	/*
797db1a8922SRichard Guy Briggs 	 * If the legacy file capability is set, then don't set privs
798db1a8922SRichard Guy Briggs 	 * for a setuid root binary run by a non-root user.  Do set it
799db1a8922SRichard Guy Briggs 	 * for a root user just to cause least surprise to an admin.
800db1a8922SRichard Guy Briggs 	 */
80181a6a012SRichard Guy Briggs 	if (has_fcap && __is_suid(root_uid, new)) {
802db1a8922SRichard Guy Briggs 		warn_setuid_and_fcaps_mixed(bprm->filename);
803db1a8922SRichard Guy Briggs 		return;
804db1a8922SRichard Guy Briggs 	}
805db1a8922SRichard Guy Briggs 	/*
806db1a8922SRichard Guy Briggs 	 * To support inheritance of root-permissions and suid-root
807db1a8922SRichard Guy Briggs 	 * executables under compatibility mode, we override the
808db1a8922SRichard Guy Briggs 	 * capability sets for the file.
809db1a8922SRichard Guy Briggs 	 */
81081a6a012SRichard Guy Briggs 	if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
811db1a8922SRichard Guy Briggs 		/* pP' = (cap_bset & ~0) | (pI & ~0) */
812db1a8922SRichard Guy Briggs 		new->cap_permitted = cap_combine(old->cap_bset,
813db1a8922SRichard Guy Briggs 						 old->cap_inheritable);
814db1a8922SRichard Guy Briggs 	}
815db1a8922SRichard Guy Briggs 	/*
816db1a8922SRichard Guy Briggs 	 * If only the real uid is 0, we do not set the effective bit.
817db1a8922SRichard Guy Briggs 	 */
81881a6a012SRichard Guy Briggs 	if (__is_eff(root_uid, new))
819db1a8922SRichard Guy Briggs 		*effective = true;
820db1a8922SRichard Guy Briggs }
821db1a8922SRichard Guy Briggs 
8224c7e715fSRichard Guy Briggs #define __cap_gained(field, target, source) \
8234c7e715fSRichard Guy Briggs 	!cap_issubset(target->cap_##field, source->cap_##field)
8244c7e715fSRichard Guy Briggs #define __cap_grew(target, source, cred) \
8254c7e715fSRichard Guy Briggs 	!cap_issubset(cred->cap_##target, cred->cap_##source)
8264c7e715fSRichard Guy Briggs #define __cap_full(field, cred) \
8274c7e715fSRichard Guy Briggs 	cap_issubset(CAP_FULL_SET, cred->cap_##field)
82881a6a012SRichard Guy Briggs 
__is_setuid(struct cred * new,const struct cred * old)82981a6a012SRichard Guy Briggs static inline bool __is_setuid(struct cred *new, const struct cred *old)
83081a6a012SRichard Guy Briggs { return !uid_eq(new->euid, old->uid); }
83181a6a012SRichard Guy Briggs 
__is_setgid(struct cred * new,const struct cred * old)83281a6a012SRichard Guy Briggs static inline bool __is_setgid(struct cred *new, const struct cred *old)
83381a6a012SRichard Guy Briggs { return !gid_eq(new->egid, old->gid); }
83481a6a012SRichard Guy Briggs 
8359fbc2c79SRichard Guy Briggs /*
836dbbbe110SRichard Guy Briggs  * 1) Audit candidate if current->cap_effective is set
8379fbc2c79SRichard Guy Briggs  *
8389fbc2c79SRichard Guy Briggs  * We do not bother to audit if 3 things are true:
8399fbc2c79SRichard Guy Briggs  *   1) cap_effective has all caps
840588fb2c7SRichard Guy Briggs  *   2) we became root *OR* are were already root
8419fbc2c79SRichard Guy Briggs  *   3) root is supposed to have all caps (SECURE_NOROOT)
8429fbc2c79SRichard Guy Briggs  * Since this is just a normal root execing a process.
8439fbc2c79SRichard Guy Briggs  *
8449fbc2c79SRichard Guy Briggs  * Number 1 above might fail if you don't have a full bset, but I think
8459fbc2c79SRichard Guy Briggs  * that is interesting information to audit.
846dbbbe110SRichard Guy Briggs  *
847dbbbe110SRichard Guy Briggs  * A number of other conditions require logging:
848dbbbe110SRichard Guy Briggs  * 2) something prevented setuid root getting all caps
849dbbbe110SRichard Guy Briggs  * 3) non-setuid root gets fcaps
850dbbbe110SRichard Guy Briggs  * 4) non-setuid root gets ambient
8519fbc2c79SRichard Guy Briggs  */
nonroot_raised_pE(struct cred * new,const struct cred * old,kuid_t root,bool has_fcap)852dbbbe110SRichard Guy Briggs static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
853dbbbe110SRichard Guy Briggs 				     kuid_t root, bool has_fcap)
8549fbc2c79SRichard Guy Briggs {
8559fbc2c79SRichard Guy Briggs 	bool ret = false;
8569fbc2c79SRichard Guy Briggs 
857dbbbe110SRichard Guy Briggs 	if ((__cap_grew(effective, ambient, new) &&
858dbbbe110SRichard Guy Briggs 	     !(__cap_full(effective, new) &&
859dbbbe110SRichard Guy Briggs 	       (__is_eff(root, new) || __is_real(root, new)) &&
860dbbbe110SRichard Guy Briggs 	       root_privileged())) ||
861dbbbe110SRichard Guy Briggs 	    (root_privileged() &&
862dbbbe110SRichard Guy Briggs 	     __is_suid(root, new) &&
863dbbbe110SRichard Guy Briggs 	     !__cap_full(effective, new)) ||
864dbbbe110SRichard Guy Briggs 	    (!__is_setuid(new, old) &&
865dbbbe110SRichard Guy Briggs 	     ((has_fcap &&
866dbbbe110SRichard Guy Briggs 	       __cap_gained(permitted, new, old)) ||
867dbbbe110SRichard Guy Briggs 	      __cap_gained(ambient, new, old))))
868dbbbe110SRichard Guy Briggs 
8699fbc2c79SRichard Guy Briggs 		ret = true;
870dbbbe110SRichard Guy Briggs 
8719fbc2c79SRichard Guy Briggs 	return ret;
8729fbc2c79SRichard Guy Briggs }
8739fbc2c79SRichard Guy Briggs 
8741d045980SDavid Howells /**
87556305aa9SEric W. Biederman  * cap_bprm_creds_from_file - Set up the proposed credentials for execve().
8761d045980SDavid Howells  * @bprm: The execution parameters, including the proposed creds
87756305aa9SEric W. Biederman  * @file: The file to pull the credentials from
8781d045980SDavid Howells  *
8791d045980SDavid Howells  * Set up the proposed credentials for a new execution context being
8801d045980SDavid Howells  * constructed by execve().  The proposed creds in @bprm->cred is altered,
881049ae601SRandy Dunlap  * which won't take effect immediately.
882049ae601SRandy Dunlap  *
883049ae601SRandy Dunlap  * Return: 0 if successful, -ve on error.
884a6f76f23SDavid Howells  */
cap_bprm_creds_from_file(struct linux_binprm * bprm,struct file * file)88556305aa9SEric W. Biederman int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file)
886b5376771SSerge E. Hallyn {
88756305aa9SEric W. Biederman 	/* Process setpcap binaries and capabilities for uid 0 */
888a6f76f23SDavid Howells 	const struct cred *old = current_cred();
889a6f76f23SDavid Howells 	struct cred *new = bprm->cred;
890fc7eadf7SRichard Guy Briggs 	bool effective = false, has_fcap = false, is_setid;
891b5376771SSerge E. Hallyn 	int ret;
89218815a18SEric W. Biederman 	kuid_t root_uid;
893b5376771SSerge E. Hallyn 
89458319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(old)))
89558319057SAndy Lutomirski 		return -EPERM;
89658319057SAndy Lutomirski 
89756305aa9SEric W. Biederman 	ret = get_file_caps(bprm, file, &effective, &has_fcap);
898a6f76f23SDavid Howells 	if (ret < 0)
899a6f76f23SDavid Howells 		return ret;
9001da177e4SLinus Torvalds 
90118815a18SEric W. Biederman 	root_uid = make_kuid(new->user_ns, 0);
90218815a18SEric W. Biederman 
903fc7eadf7SRichard Guy Briggs 	handle_privileged_root(bprm, has_fcap, &effective, root_uid);
904b5376771SSerge E. Hallyn 
905d52fc5ddSEric Paris 	/* if we have fs caps, clear dangerous personality flags */
9064c7e715fSRichard Guy Briggs 	if (__cap_gained(permitted, new, old))
90756305aa9SEric W. Biederman 		bprm->per_clear |= PER_CLEAR_ON_SETID;
908d52fc5ddSEric Paris 
909a6f76f23SDavid Howells 	/* Don't let someone trace a set[ug]id/setpcap binary with the revised
910259e5e6cSAndy Lutomirski 	 * credentials unless they have the appropriate permit.
911259e5e6cSAndy Lutomirski 	 *
912259e5e6cSAndy Lutomirski 	 * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
913a6f76f23SDavid Howells 	 */
91481a6a012SRichard Guy Briggs 	is_setid = __is_setuid(new, old) || __is_setgid(new, old);
91558319057SAndy Lutomirski 
9164c7e715fSRichard Guy Briggs 	if ((is_setid || __cap_gained(permitted, new, old)) &&
9179227dd2aSEric W. Biederman 	    ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
91820523132SEric W. Biederman 	     !ptracer_capable(current, new->user_ns))) {
919a6f76f23SDavid Howells 		/* downgrade; they get no more than they had, and maybe less */
92070169420SEric W. Biederman 		if (!ns_capable(new->user_ns, CAP_SETUID) ||
921259e5e6cSAndy Lutomirski 		    (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
922a6f76f23SDavid Howells 			new->euid = new->uid;
923a6f76f23SDavid Howells 			new->egid = new->gid;
9241da177e4SLinus Torvalds 		}
925a6f76f23SDavid Howells 		new->cap_permitted = cap_intersect(new->cap_permitted,
926a6f76f23SDavid Howells 						   old->cap_permitted);
9271da177e4SLinus Torvalds 	}
9281da177e4SLinus Torvalds 
929a6f76f23SDavid Howells 	new->suid = new->fsuid = new->euid;
930a6f76f23SDavid Howells 	new->sgid = new->fsgid = new->egid;
9311da177e4SLinus Torvalds 
93258319057SAndy Lutomirski 	/* File caps or setid cancels ambient. */
933fc7eadf7SRichard Guy Briggs 	if (has_fcap || is_setid)
93458319057SAndy Lutomirski 		cap_clear(new->cap_ambient);
93558319057SAndy Lutomirski 
93658319057SAndy Lutomirski 	/*
93758319057SAndy Lutomirski 	 * Now that we've computed pA', update pP' to give:
93858319057SAndy Lutomirski 	 *   pP' = (X & fP) | (pI & fI) | pA'
93958319057SAndy Lutomirski 	 */
94058319057SAndy Lutomirski 	new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
94158319057SAndy Lutomirski 
94258319057SAndy Lutomirski 	/*
94358319057SAndy Lutomirski 	 * Set pE' = (fE ? pP' : pA').  Because pA' is zero if fE is set,
94458319057SAndy Lutomirski 	 * this is the same as pE' = (fE ? pP' : 0) | pA'.
94558319057SAndy Lutomirski 	 */
946a6f76f23SDavid Howells 	if (effective)
947a6f76f23SDavid Howells 		new->cap_effective = new->cap_permitted;
948e338d263SAndrew Morgan 	else
94958319057SAndy Lutomirski 		new->cap_effective = new->cap_ambient;
95058319057SAndy Lutomirski 
95158319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(new)))
95258319057SAndy Lutomirski 		return -EPERM;
95358319057SAndy Lutomirski 
954dbbbe110SRichard Guy Briggs 	if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
955a6f76f23SDavid Howells 		ret = audit_log_bprm_fcaps(bprm, new, old);
956a6f76f23SDavid Howells 		if (ret < 0)
957a6f76f23SDavid Howells 			return ret;
958a6f76f23SDavid Howells 	}
9591da177e4SLinus Torvalds 
960d84f4f99SDavid Howells 	new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
96158319057SAndy Lutomirski 
96258319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(new)))
96358319057SAndy Lutomirski 		return -EPERM;
96458319057SAndy Lutomirski 
96546d98eb4SKees Cook 	/* Check for privilege-elevated exec. */
96602ebbaf4SRichard Guy Briggs 	if (is_setid ||
96702ebbaf4SRichard Guy Briggs 	    (!__is_real(root_uid, new) &&
96802ebbaf4SRichard Guy Briggs 	     (effective ||
96902ebbaf4SRichard Guy Briggs 	      __cap_grew(permitted, ambient, new))))
97056305aa9SEric W. Biederman 		bprm->secureexec = 1;
97146d98eb4SKees Cook 
972a6f76f23SDavid Howells 	return 0;
9731da177e4SLinus Torvalds }
9741da177e4SLinus Torvalds 
9751d045980SDavid Howells /**
9761d045980SDavid Howells  * cap_inode_setxattr - Determine whether an xattr may be altered
9771d045980SDavid Howells  * @dentry: The inode/dentry being altered
9781d045980SDavid Howells  * @name: The name of the xattr to be changed
9791d045980SDavid Howells  * @value: The value that the xattr will be changed to
9801d045980SDavid Howells  * @size: The size of value
9811d045980SDavid Howells  * @flags: The replacement flag
9821d045980SDavid Howells  *
9831d045980SDavid Howells  * Determine whether an xattr may be altered or set on an inode, returning 0 if
9841d045980SDavid Howells  * permission is granted, -ve if denied.
9851d045980SDavid Howells  *
9861d045980SDavid Howells  * This is used to make sure security xattrs don't get updated or set by those
9871d045980SDavid Howells  * who aren't privileged to do so.
9881d045980SDavid Howells  */
cap_inode_setxattr(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)9898f0cfa52SDavid Howells int cap_inode_setxattr(struct dentry *dentry, const char *name,
9908f0cfa52SDavid Howells 		       const void *value, size_t size, int flags)
9911da177e4SLinus Torvalds {
992b1d749c5SEric W. Biederman 	struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
993b1d749c5SEric W. Biederman 
9948db6c34fSSerge E. Hallyn 	/* Ignore non-security xattrs */
9958db6c34fSSerge E. Hallyn 	if (strncmp(name, XATTR_SECURITY_PREFIX,
996c5eaab1dSCarmeli Tamir 			XATTR_SECURITY_PREFIX_LEN) != 0)
997b5376771SSerge E. Hallyn 		return 0;
9981d045980SDavid Howells 
9998db6c34fSSerge E. Hallyn 	/*
10008db6c34fSSerge E. Hallyn 	 * For XATTR_NAME_CAPS the check will be done in
10018db6c34fSSerge E. Hallyn 	 * cap_convert_nscap(), called by setxattr()
10028db6c34fSSerge E. Hallyn 	 */
10038db6c34fSSerge E. Hallyn 	if (strcmp(name, XATTR_NAME_CAPS) == 0)
10048db6c34fSSerge E. Hallyn 		return 0;
10058db6c34fSSerge E. Hallyn 
1006b1d749c5SEric W. Biederman 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
10071da177e4SLinus Torvalds 		return -EPERM;
10081da177e4SLinus Torvalds 	return 0;
10091da177e4SLinus Torvalds }
10101da177e4SLinus Torvalds 
10111d045980SDavid Howells /**
10121d045980SDavid Howells  * cap_inode_removexattr - Determine whether an xattr may be removed
101371bc356fSChristian Brauner  *
101439f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
10151d045980SDavid Howells  * @dentry:	The inode/dentry being altered
10161d045980SDavid Howells  * @name:	The name of the xattr to be changed
10171d045980SDavid Howells  *
10181d045980SDavid Howells  * Determine whether an xattr may be removed from an inode, returning 0 if
10191d045980SDavid Howells  * permission is granted, -ve if denied.
10201d045980SDavid Howells  *
102139f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
102239f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
102339f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
102471bc356fSChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
102539f60c1cSChristian Brauner  * performed on the raw inode simply pass @nop_mnt_idmap.
102671bc356fSChristian Brauner  *
10271d045980SDavid Howells  * This is used to make sure security xattrs don't get removed by those who
10281d045980SDavid Howells  * aren't privileged to remove them.
10291d045980SDavid Howells  */
cap_inode_removexattr(struct mnt_idmap * idmap,struct dentry * dentry,const char * name)103039f60c1cSChristian Brauner int cap_inode_removexattr(struct mnt_idmap *idmap,
103171bc356fSChristian Brauner 			  struct dentry *dentry, const char *name)
10321da177e4SLinus Torvalds {
1033b1d749c5SEric W. Biederman 	struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1034b1d749c5SEric W. Biederman 
10358db6c34fSSerge E. Hallyn 	/* Ignore non-security xattrs */
10368db6c34fSSerge E. Hallyn 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1037c5eaab1dSCarmeli Tamir 			XATTR_SECURITY_PREFIX_LEN) != 0)
10388db6c34fSSerge E. Hallyn 		return 0;
10398db6c34fSSerge E. Hallyn 
10408db6c34fSSerge E. Hallyn 	if (strcmp(name, XATTR_NAME_CAPS) == 0) {
10418db6c34fSSerge E. Hallyn 		/* security.capability gets namespaced */
10428db6c34fSSerge E. Hallyn 		struct inode *inode = d_backing_inode(dentry);
10438db6c34fSSerge E. Hallyn 		if (!inode)
10448db6c34fSSerge E. Hallyn 			return -EINVAL;
10459452e93eSChristian Brauner 		if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
1046b5376771SSerge E. Hallyn 			return -EPERM;
1047b5376771SSerge E. Hallyn 		return 0;
10481d045980SDavid Howells 	}
10491d045980SDavid Howells 
1050b1d749c5SEric W. Biederman 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
10511da177e4SLinus Torvalds 		return -EPERM;
10521da177e4SLinus Torvalds 	return 0;
10531da177e4SLinus Torvalds }
10541da177e4SLinus Torvalds 
10551da177e4SLinus Torvalds /*
10561da177e4SLinus Torvalds  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
10571da177e4SLinus Torvalds  * a process after a call to setuid, setreuid, or setresuid.
10581da177e4SLinus Torvalds  *
10591da177e4SLinus Torvalds  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
10601da177e4SLinus Torvalds  *  {r,e,s}uid != 0, the permitted and effective capabilities are
10611da177e4SLinus Torvalds  *  cleared.
10621da177e4SLinus Torvalds  *
10631da177e4SLinus Torvalds  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
10641da177e4SLinus Torvalds  *  capabilities of the process are cleared.
10651da177e4SLinus Torvalds  *
10661da177e4SLinus Torvalds  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
10671da177e4SLinus Torvalds  *  capabilities are set to the permitted capabilities.
10681da177e4SLinus Torvalds  *
10691da177e4SLinus Torvalds  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
10701da177e4SLinus Torvalds  *  never happen.
10711da177e4SLinus Torvalds  *
10721da177e4SLinus Torvalds  *  -astor
10731da177e4SLinus Torvalds  *
10741da177e4SLinus Torvalds  * cevans - New behaviour, Oct '99
10751da177e4SLinus Torvalds  * A process may, via prctl(), elect to keep its capabilities when it
10761da177e4SLinus Torvalds  * calls setuid() and switches away from uid==0. Both permitted and
10771da177e4SLinus Torvalds  * effective sets will be retained.
10781da177e4SLinus Torvalds  * Without this change, it was impossible for a daemon to drop only some
10791da177e4SLinus Torvalds  * of its privilege. The call to setuid(!=0) would drop all privileges!
10801da177e4SLinus Torvalds  * Keeping uid 0 is not an option because uid 0 owns too many vital
10811da177e4SLinus Torvalds  * files..
10821da177e4SLinus Torvalds  * Thanks to Olaf Kirch and Peter Benie for spotting this.
10831da177e4SLinus Torvalds  */
cap_emulate_setxuid(struct cred * new,const struct cred * old)1084d84f4f99SDavid Howells static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
10851da177e4SLinus Torvalds {
108618815a18SEric W. Biederman 	kuid_t root_uid = make_kuid(old->user_ns, 0);
108718815a18SEric W. Biederman 
108818815a18SEric W. Biederman 	if ((uid_eq(old->uid, root_uid) ||
108918815a18SEric W. Biederman 	     uid_eq(old->euid, root_uid) ||
109018815a18SEric W. Biederman 	     uid_eq(old->suid, root_uid)) &&
109118815a18SEric W. Biederman 	    (!uid_eq(new->uid, root_uid) &&
109218815a18SEric W. Biederman 	     !uid_eq(new->euid, root_uid) &&
109358319057SAndy Lutomirski 	     !uid_eq(new->suid, root_uid))) {
109458319057SAndy Lutomirski 		if (!issecure(SECURE_KEEP_CAPS)) {
1095d84f4f99SDavid Howells 			cap_clear(new->cap_permitted);
1096d84f4f99SDavid Howells 			cap_clear(new->cap_effective);
10971da177e4SLinus Torvalds 		}
109858319057SAndy Lutomirski 
109958319057SAndy Lutomirski 		/*
110058319057SAndy Lutomirski 		 * Pre-ambient programs expect setresuid to nonroot followed
110158319057SAndy Lutomirski 		 * by exec to drop capabilities.  We should make sure that
110258319057SAndy Lutomirski 		 * this remains the case.
110358319057SAndy Lutomirski 		 */
110458319057SAndy Lutomirski 		cap_clear(new->cap_ambient);
110558319057SAndy Lutomirski 	}
110618815a18SEric W. Biederman 	if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1107d84f4f99SDavid Howells 		cap_clear(new->cap_effective);
110818815a18SEric W. Biederman 	if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1109d84f4f99SDavid Howells 		new->cap_effective = new->cap_permitted;
11101da177e4SLinus Torvalds }
11111da177e4SLinus Torvalds 
11121d045980SDavid Howells /**
11131d045980SDavid Howells  * cap_task_fix_setuid - Fix up the results of setuid() call
11141d045980SDavid Howells  * @new: The proposed credentials
11151d045980SDavid Howells  * @old: The current task's current credentials
11161d045980SDavid Howells  * @flags: Indications of what has changed
11171d045980SDavid Howells  *
11181d045980SDavid Howells  * Fix up the results of setuid() call before the credential changes are
1119049ae601SRandy Dunlap  * actually applied.
1120049ae601SRandy Dunlap  *
1121049ae601SRandy Dunlap  * Return: 0 to grant the changes, -ve to deny them.
11221d045980SDavid Howells  */
cap_task_fix_setuid(struct cred * new,const struct cred * old,int flags)1123d84f4f99SDavid Howells int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
11241da177e4SLinus Torvalds {
11251da177e4SLinus Torvalds 	switch (flags) {
11261da177e4SLinus Torvalds 	case LSM_SETID_RE:
11271da177e4SLinus Torvalds 	case LSM_SETID_ID:
11281da177e4SLinus Torvalds 	case LSM_SETID_RES:
11291d045980SDavid Howells 		/* juggle the capabilities to follow [RES]UID changes unless
11301d045980SDavid Howells 		 * otherwise suppressed */
1131d84f4f99SDavid Howells 		if (!issecure(SECURE_NO_SETUID_FIXUP))
1132d84f4f99SDavid Howells 			cap_emulate_setxuid(new, old);
11331da177e4SLinus Torvalds 		break;
11341da177e4SLinus Torvalds 
11351d045980SDavid Howells 	case LSM_SETID_FS:
11364432b507SPaul Moore 		/* juggle the capabilities to follow FSUID changes, unless
11371d045980SDavid Howells 		 * otherwise suppressed
11381d045980SDavid Howells 		 *
11391da177e4SLinus Torvalds 		 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
11401da177e4SLinus Torvalds 		 *          if not, we might be a bit too harsh here.
11411da177e4SLinus Torvalds 		 */
11421da177e4SLinus Torvalds 		if (!issecure(SECURE_NO_SETUID_FIXUP)) {
114318815a18SEric W. Biederman 			kuid_t root_uid = make_kuid(old->user_ns, 0);
114418815a18SEric W. Biederman 			if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1145d84f4f99SDavid Howells 				new->cap_effective =
1146d84f4f99SDavid Howells 					cap_drop_fs_set(new->cap_effective);
11471d045980SDavid Howells 
114818815a18SEric W. Biederman 			if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1149d84f4f99SDavid Howells 				new->cap_effective =
1150d84f4f99SDavid Howells 					cap_raise_fs_set(new->cap_effective,
1151d84f4f99SDavid Howells 							 new->cap_permitted);
11521da177e4SLinus Torvalds 		}
11531da177e4SLinus Torvalds 		break;
11541d045980SDavid Howells 
11551da177e4SLinus Torvalds 	default:
11561da177e4SLinus Torvalds 		return -EINVAL;
11571da177e4SLinus Torvalds 	}
11581da177e4SLinus Torvalds 
11591da177e4SLinus Torvalds 	return 0;
11601da177e4SLinus Torvalds }
11611da177e4SLinus Torvalds 
1162b5376771SSerge E. Hallyn /*
1163b5376771SSerge E. Hallyn  * Rationale: code calling task_setscheduler, task_setioprio, and
1164b5376771SSerge E. Hallyn  * task_setnice, assumes that
1165b5376771SSerge E. Hallyn  *   . if capable(cap_sys_nice), then those actions should be allowed
1166b5376771SSerge E. Hallyn  *   . if not capable(cap_sys_nice), but acting on your own processes,
1167b5376771SSerge E. Hallyn  *   	then those actions should be allowed
1168b5376771SSerge E. Hallyn  * This is insufficient now since you can call code without suid, but
1169b5376771SSerge E. Hallyn  * yet with increased caps.
1170b5376771SSerge E. Hallyn  * So we check for increased caps on the target process.
1171b5376771SSerge E. Hallyn  */
cap_safe_nice(struct task_struct * p)1172de45e806SSerge E. Hallyn static int cap_safe_nice(struct task_struct *p)
1173b5376771SSerge E. Hallyn {
1174f54fb863SSerge Hallyn 	int is_subset, ret = 0;
1175c69e8d9cSDavid Howells 
1176c69e8d9cSDavid Howells 	rcu_read_lock();
1177c69e8d9cSDavid Howells 	is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1178c69e8d9cSDavid Howells 				 current_cred()->cap_permitted);
1179f54fb863SSerge Hallyn 	if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1180f54fb863SSerge Hallyn 		ret = -EPERM;
1181c69e8d9cSDavid Howells 	rcu_read_unlock();
1182c69e8d9cSDavid Howells 
1183f54fb863SSerge Hallyn 	return ret;
1184b5376771SSerge E. Hallyn }
1185b5376771SSerge E. Hallyn 
11861d045980SDavid Howells /**
11874432b507SPaul Moore  * cap_task_setscheduler - Determine if scheduler policy change is permitted
11881d045980SDavid Howells  * @p: The task to affect
11891d045980SDavid Howells  *
11904432b507SPaul Moore  * Determine if the requested scheduler policy change is permitted for the
1191049ae601SRandy Dunlap  * specified task.
1192049ae601SRandy Dunlap  *
1193049ae601SRandy Dunlap  * Return: 0 if permission is granted, -ve if denied.
11941d045980SDavid Howells  */
cap_task_setscheduler(struct task_struct * p)1195b0ae1981SKOSAKI Motohiro int cap_task_setscheduler(struct task_struct *p)
1196b5376771SSerge E. Hallyn {
1197b5376771SSerge E. Hallyn 	return cap_safe_nice(p);
1198b5376771SSerge E. Hallyn }
1199b5376771SSerge E. Hallyn 
12001d045980SDavid Howells /**
12014432b507SPaul Moore  * cap_task_setioprio - Determine if I/O priority change is permitted
12021d045980SDavid Howells  * @p: The task to affect
12031d045980SDavid Howells  * @ioprio: The I/O priority to set
12041d045980SDavid Howells  *
12054432b507SPaul Moore  * Determine if the requested I/O priority change is permitted for the specified
1206049ae601SRandy Dunlap  * task.
1207049ae601SRandy Dunlap  *
1208049ae601SRandy Dunlap  * Return: 0 if permission is granted, -ve if denied.
12091d045980SDavid Howells  */
cap_task_setioprio(struct task_struct * p,int ioprio)1210b5376771SSerge E. Hallyn int cap_task_setioprio(struct task_struct *p, int ioprio)
1211b5376771SSerge E. Hallyn {
1212b5376771SSerge E. Hallyn 	return cap_safe_nice(p);
1213b5376771SSerge E. Hallyn }
1214b5376771SSerge E. Hallyn 
12151d045980SDavid Howells /**
12164432b507SPaul Moore  * cap_task_setnice - Determine if task priority change is permitted
12171d045980SDavid Howells  * @p: The task to affect
12181d045980SDavid Howells  * @nice: The nice value to set
12191d045980SDavid Howells  *
12204432b507SPaul Moore  * Determine if the requested task priority change is permitted for the
1221049ae601SRandy Dunlap  * specified task.
1222049ae601SRandy Dunlap  *
1223049ae601SRandy Dunlap  * Return: 0 if permission is granted, -ve if denied.
12241d045980SDavid Howells  */
cap_task_setnice(struct task_struct * p,int nice)1225b5376771SSerge E. Hallyn int cap_task_setnice(struct task_struct *p, int nice)
1226b5376771SSerge E. Hallyn {
1227b5376771SSerge E. Hallyn 	return cap_safe_nice(p);
1228b5376771SSerge E. Hallyn }
1229b5376771SSerge E. Hallyn 
12303b7391deSSerge E. Hallyn /*
12311d045980SDavid Howells  * Implement PR_CAPBSET_DROP.  Attempt to remove the specified capability from
12321d045980SDavid Howells  * the current task's bounding set.  Returns 0 on success, -ve on error.
12333b7391deSSerge E. Hallyn  */
cap_prctl_drop(unsigned long cap)12346d6f3328STetsuo Handa static int cap_prctl_drop(unsigned long cap)
12353b7391deSSerge E. Hallyn {
12366d6f3328STetsuo Handa 	struct cred *new;
12376d6f3328STetsuo Handa 
1238160da84dSEric W. Biederman 	if (!ns_capable(current_user_ns(), CAP_SETPCAP))
12393b7391deSSerge E. Hallyn 		return -EPERM;
12403b7391deSSerge E. Hallyn 	if (!cap_valid(cap))
12413b7391deSSerge E. Hallyn 		return -EINVAL;
1242d84f4f99SDavid Howells 
12436d6f3328STetsuo Handa 	new = prepare_creds();
12446d6f3328STetsuo Handa 	if (!new)
12456d6f3328STetsuo Handa 		return -ENOMEM;
1246d84f4f99SDavid Howells 	cap_lower(new->cap_bset, cap);
12476d6f3328STetsuo Handa 	return commit_creds(new);
12483b7391deSSerge E. Hallyn }
12493898b1b4SAndrew G. Morgan 
12501d045980SDavid Howells /**
12511d045980SDavid Howells  * cap_task_prctl - Implement process control functions for this security module
12521d045980SDavid Howells  * @option: The process control function requested
1253049ae601SRandy Dunlap  * @arg2: The argument data for this function
1254049ae601SRandy Dunlap  * @arg3: The argument data for this function
1255049ae601SRandy Dunlap  * @arg4: The argument data for this function
1256049ae601SRandy Dunlap  * @arg5: The argument data for this function
12571d045980SDavid Howells  *
12581d045980SDavid Howells  * Allow process control functions (sys_prctl()) to alter capabilities; may
12591d045980SDavid Howells  * also deny access to other functions not otherwise implemented here.
12601d045980SDavid Howells  *
1261049ae601SRandy Dunlap  * Return: 0 or +ve on success, -ENOSYS if this function is not implemented
12621d045980SDavid Howells  * here, other -ve on error.  If -ENOSYS is returned, sys_prctl() and other LSM
12631d045980SDavid Howells  * modules will consider performing the function.
12641d045980SDavid Howells  */
cap_task_prctl(int option,unsigned long arg2,unsigned long arg3,unsigned long arg4,unsigned long arg5)12653898b1b4SAndrew G. Morgan int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1266d84f4f99SDavid Howells 		   unsigned long arg4, unsigned long arg5)
12673898b1b4SAndrew G. Morgan {
12686d6f3328STetsuo Handa 	const struct cred *old = current_cred();
1269d84f4f99SDavid Howells 	struct cred *new;
1270d84f4f99SDavid Howells 
12713898b1b4SAndrew G. Morgan 	switch (option) {
12723898b1b4SAndrew G. Morgan 	case PR_CAPBSET_READ:
1273d84f4f99SDavid Howells 		if (!cap_valid(arg2))
12746d6f3328STetsuo Handa 			return -EINVAL;
12756d6f3328STetsuo Handa 		return !!cap_raised(old->cap_bset, arg2);
1276d84f4f99SDavid Howells 
12773898b1b4SAndrew G. Morgan 	case PR_CAPBSET_DROP:
12786d6f3328STetsuo Handa 		return cap_prctl_drop(arg2);
12793898b1b4SAndrew G. Morgan 
12803898b1b4SAndrew G. Morgan 	/*
12813898b1b4SAndrew G. Morgan 	 * The next four prctl's remain to assist with transitioning a
12823898b1b4SAndrew G. Morgan 	 * system from legacy UID=0 based privilege (when filesystem
12833898b1b4SAndrew G. Morgan 	 * capabilities are not in use) to a system using filesystem
12843898b1b4SAndrew G. Morgan 	 * capabilities only - as the POSIX.1e draft intended.
12853898b1b4SAndrew G. Morgan 	 *
12863898b1b4SAndrew G. Morgan 	 * Note:
12873898b1b4SAndrew G. Morgan 	 *
12883898b1b4SAndrew G. Morgan 	 *  PR_SET_SECUREBITS =
12893898b1b4SAndrew G. Morgan 	 *      issecure_mask(SECURE_KEEP_CAPS_LOCKED)
12903898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NOROOT)
12913898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NOROOT_LOCKED)
12923898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NO_SETUID_FIXUP)
12933898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
12943898b1b4SAndrew G. Morgan 	 *
12953898b1b4SAndrew G. Morgan 	 * will ensure that the current process and all of its
12963898b1b4SAndrew G. Morgan 	 * children will be locked into a pure
12973898b1b4SAndrew G. Morgan 	 * capability-based-privilege environment.
12983898b1b4SAndrew G. Morgan 	 */
12993898b1b4SAndrew G. Morgan 	case PR_SET_SECUREBITS:
13006d6f3328STetsuo Handa 		if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
13016d6f3328STetsuo Handa 		     & (old->securebits ^ arg2))			/*[1]*/
13026d6f3328STetsuo Handa 		    || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
13033898b1b4SAndrew G. Morgan 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
13046a9de491SEric Paris 		    || (cap_capable(current_cred(),
1305c1a85a00SMicah Morton 				    current_cred()->user_ns,
1306c1a85a00SMicah Morton 				    CAP_SETPCAP,
1307c1a85a00SMicah Morton 				    CAP_OPT_NONE) != 0)			/*[4]*/
13083898b1b4SAndrew G. Morgan 			/*
13093898b1b4SAndrew G. Morgan 			 * [1] no changing of bits that are locked
13103898b1b4SAndrew G. Morgan 			 * [2] no unlocking of locks
13113898b1b4SAndrew G. Morgan 			 * [3] no setting of unsupported bits
13123898b1b4SAndrew G. Morgan 			 * [4] doing anything requires privilege (go read about
13133898b1b4SAndrew G. Morgan 			 *     the "sendmail capabilities bug")
13143898b1b4SAndrew G. Morgan 			 */
1315d84f4f99SDavid Howells 		    )
1316d84f4f99SDavid Howells 			/* cannot change a locked bit */
13176d6f3328STetsuo Handa 			return -EPERM;
13186d6f3328STetsuo Handa 
13196d6f3328STetsuo Handa 		new = prepare_creds();
13206d6f3328STetsuo Handa 		if (!new)
13216d6f3328STetsuo Handa 			return -ENOMEM;
1322d84f4f99SDavid Howells 		new->securebits = arg2;
13236d6f3328STetsuo Handa 		return commit_creds(new);
1324d84f4f99SDavid Howells 
13253898b1b4SAndrew G. Morgan 	case PR_GET_SECUREBITS:
13266d6f3328STetsuo Handa 		return old->securebits;
13273898b1b4SAndrew G. Morgan 
13283898b1b4SAndrew G. Morgan 	case PR_GET_KEEPCAPS:
13296d6f3328STetsuo Handa 		return !!issecure(SECURE_KEEP_CAPS);
1330d84f4f99SDavid Howells 
13313898b1b4SAndrew G. Morgan 	case PR_SET_KEEPCAPS:
1332d84f4f99SDavid Howells 		if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
13336d6f3328STetsuo Handa 			return -EINVAL;
1334d84f4f99SDavid Howells 		if (issecure(SECURE_KEEP_CAPS_LOCKED))
13356d6f3328STetsuo Handa 			return -EPERM;
13366d6f3328STetsuo Handa 
13376d6f3328STetsuo Handa 		new = prepare_creds();
13386d6f3328STetsuo Handa 		if (!new)
13396d6f3328STetsuo Handa 			return -ENOMEM;
1340d84f4f99SDavid Howells 		if (arg2)
1341d84f4f99SDavid Howells 			new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
13423898b1b4SAndrew G. Morgan 		else
1343d84f4f99SDavid Howells 			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
13446d6f3328STetsuo Handa 		return commit_creds(new);
13453898b1b4SAndrew G. Morgan 
134658319057SAndy Lutomirski 	case PR_CAP_AMBIENT:
134758319057SAndy Lutomirski 		if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
134858319057SAndy Lutomirski 			if (arg3 | arg4 | arg5)
134958319057SAndy Lutomirski 				return -EINVAL;
135058319057SAndy Lutomirski 
135158319057SAndy Lutomirski 			new = prepare_creds();
135258319057SAndy Lutomirski 			if (!new)
135358319057SAndy Lutomirski 				return -ENOMEM;
135458319057SAndy Lutomirski 			cap_clear(new->cap_ambient);
135558319057SAndy Lutomirski 			return commit_creds(new);
135658319057SAndy Lutomirski 		}
135758319057SAndy Lutomirski 
135858319057SAndy Lutomirski 		if (((!cap_valid(arg3)) | arg4 | arg5))
135958319057SAndy Lutomirski 			return -EINVAL;
136058319057SAndy Lutomirski 
136158319057SAndy Lutomirski 		if (arg2 == PR_CAP_AMBIENT_IS_SET) {
136258319057SAndy Lutomirski 			return !!cap_raised(current_cred()->cap_ambient, arg3);
136358319057SAndy Lutomirski 		} else if (arg2 != PR_CAP_AMBIENT_RAISE &&
136458319057SAndy Lutomirski 			   arg2 != PR_CAP_AMBIENT_LOWER) {
136558319057SAndy Lutomirski 			return -EINVAL;
136658319057SAndy Lutomirski 		} else {
136758319057SAndy Lutomirski 			if (arg2 == PR_CAP_AMBIENT_RAISE &&
136858319057SAndy Lutomirski 			    (!cap_raised(current_cred()->cap_permitted, arg3) ||
136958319057SAndy Lutomirski 			     !cap_raised(current_cred()->cap_inheritable,
1370746bf6d6SAndy Lutomirski 					 arg3) ||
1371746bf6d6SAndy Lutomirski 			     issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
137258319057SAndy Lutomirski 				return -EPERM;
137358319057SAndy Lutomirski 
137458319057SAndy Lutomirski 			new = prepare_creds();
137558319057SAndy Lutomirski 			if (!new)
137658319057SAndy Lutomirski 				return -ENOMEM;
137758319057SAndy Lutomirski 			if (arg2 == PR_CAP_AMBIENT_RAISE)
137858319057SAndy Lutomirski 				cap_raise(new->cap_ambient, arg3);
137958319057SAndy Lutomirski 			else
138058319057SAndy Lutomirski 				cap_lower(new->cap_ambient, arg3);
138158319057SAndy Lutomirski 			return commit_creds(new);
138258319057SAndy Lutomirski 		}
138358319057SAndy Lutomirski 
13843898b1b4SAndrew G. Morgan 	default:
13853898b1b4SAndrew G. Morgan 		/* No functionality available - continue with default */
13866d6f3328STetsuo Handa 		return -ENOSYS;
13873898b1b4SAndrew G. Morgan 	}
13881da177e4SLinus Torvalds }
13891da177e4SLinus Torvalds 
13901d045980SDavid Howells /**
13911d045980SDavid Howells  * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
13921d045980SDavid Howells  * @mm: The VM space in which the new mapping is to be made
13931d045980SDavid Howells  * @pages: The size of the mapping
13941d045980SDavid Howells  *
13951d045980SDavid Howells  * Determine whether the allocation of a new virtual mapping by the current
1396049ae601SRandy Dunlap  * task is permitted.
1397049ae601SRandy Dunlap  *
1398049ae601SRandy Dunlap  * Return: 1 if permission is granted, 0 if not.
13991d045980SDavid Howells  */
cap_vm_enough_memory(struct mm_struct * mm,long pages)140034b4e4aaSAlan Cox int cap_vm_enough_memory(struct mm_struct *mm, long pages)
14011da177e4SLinus Torvalds {
14021da177e4SLinus Torvalds 	int cap_sys_admin = 0;
14031da177e4SLinus Torvalds 
1404c1a85a00SMicah Morton 	if (cap_capable(current_cred(), &init_user_ns,
1405c1a85a00SMicah Morton 				CAP_SYS_ADMIN, CAP_OPT_NOAUDIT) == 0)
14061da177e4SLinus Torvalds 		cap_sys_admin = 1;
1407c1a85a00SMicah Morton 
1408b1d9e6b0SCasey Schaufler 	return cap_sys_admin;
14091da177e4SLinus Torvalds }
14107c73875eSEric Paris 
1411049ae601SRandy Dunlap /**
1412d007794aSAl Viro  * cap_mmap_addr - check if able to map given addr
1413d007794aSAl Viro  * @addr: address attempting to be mapped
1414d007794aSAl Viro  *
1415d007794aSAl Viro  * If the process is attempting to map memory below dac_mmap_min_addr they need
1416d007794aSAl Viro  * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
1417049ae601SRandy Dunlap  * capability security module.
1418049ae601SRandy Dunlap  *
1419049ae601SRandy Dunlap  * Return: 0 if this mapping should be allowed or -EPERM if not.
1420d007794aSAl Viro  */
cap_mmap_addr(unsigned long addr)1421d007794aSAl Viro int cap_mmap_addr(unsigned long addr)
1422d007794aSAl Viro {
1423d007794aSAl Viro 	int ret = 0;
1424d007794aSAl Viro 
1425d007794aSAl Viro 	if (addr < dac_mmap_min_addr) {
1426d007794aSAl Viro 		ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1427c1a85a00SMicah Morton 				  CAP_OPT_NONE);
1428d007794aSAl Viro 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
1429d007794aSAl Viro 		if (ret == 0)
1430d007794aSAl Viro 			current->flags |= PF_SUPERPRIV;
1431d007794aSAl Viro 	}
1432d007794aSAl Viro 	return ret;
1433d007794aSAl Viro }
1434d007794aSAl Viro 
cap_mmap_file(struct file * file,unsigned long reqprot,unsigned long prot,unsigned long flags)1435e5467859SAl Viro int cap_mmap_file(struct file *file, unsigned long reqprot,
1436e5467859SAl Viro 		  unsigned long prot, unsigned long flags)
14377c73875eSEric Paris {
1438e5467859SAl Viro 	return 0;
14397c73875eSEric Paris }
1440b1d9e6b0SCasey Schaufler 
1441b1d9e6b0SCasey Schaufler #ifdef CONFIG_SECURITY
1442b1d9e6b0SCasey Schaufler 
1443f22f9aafSPaul Moore static struct security_hook_list capability_hooks[] __ro_after_init = {
1444b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(capable, cap_capable),
1445b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(settime, cap_settime),
1446b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1447b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1448b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(capget, cap_capget),
1449b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(capset, cap_capset),
145056305aa9SEric W. Biederman 	LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1451b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1452b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
14538db6c34fSSerge E. Hallyn 	LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1454b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1455b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(mmap_file, cap_mmap_file),
1456b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1457b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1458b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1459b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1460b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1461b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1462b1d9e6b0SCasey Schaufler };
1463b1d9e6b0SCasey Schaufler 
capability_init(void)1464d117a154SKees Cook static int __init capability_init(void)
1465b1d9e6b0SCasey Schaufler {
1466d69dece5SCasey Schaufler 	security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1467d69dece5SCasey Schaufler 				"capability");
1468d117a154SKees Cook 	return 0;
1469b1d9e6b0SCasey Schaufler }
1470b1d9e6b0SCasey Schaufler 
1471d117a154SKees Cook DEFINE_LSM(capability) = {
1472d117a154SKees Cook 	.name = "capability",
1473d117a154SKees Cook 	.order = LSM_ORDER_FIRST,
1474d117a154SKees Cook 	.init = capability_init,
1475d117a154SKees Cook };
1476d117a154SKees Cook 
1477b1d9e6b0SCasey Schaufler #endif /* CONFIG_SECURITY */
1478