xref: /linux-6.15/security/commoncap.c (revision 4ae89b1f)
12874c5fdSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
23e1c2515SJames Morris /* Common capabilities, needed by capability.o.
31da177e4SLinus Torvalds  */
41da177e4SLinus Torvalds 
5c59ede7bSRandy.Dunlap #include <linux/capability.h>
63fc689e9SEric Paris #include <linux/audit.h>
71da177e4SLinus Torvalds #include <linux/init.h>
81da177e4SLinus Torvalds #include <linux/kernel.h>
9b1d9e6b0SCasey Schaufler #include <linux/lsm_hooks.h>
101da177e4SLinus Torvalds #include <linux/file.h>
111da177e4SLinus Torvalds #include <linux/mm.h>
121da177e4SLinus Torvalds #include <linux/mman.h>
131da177e4SLinus Torvalds #include <linux/pagemap.h>
141da177e4SLinus Torvalds #include <linux/swap.h>
151da177e4SLinus Torvalds #include <linux/skbuff.h>
161da177e4SLinus Torvalds #include <linux/netlink.h>
171da177e4SLinus Torvalds #include <linux/ptrace.h>
181da177e4SLinus Torvalds #include <linux/xattr.h>
191da177e4SLinus Torvalds #include <linux/hugetlb.h>
20b5376771SSerge E. Hallyn #include <linux/mount.h>
21b460cbc5SSerge E. Hallyn #include <linux/sched.h>
223898b1b4SAndrew G. Morgan #include <linux/prctl.h>
233898b1b4SAndrew G. Morgan #include <linux/securebits.h>
243486740aSSerge E. Hallyn #include <linux/user_namespace.h>
2540401530SAl Viro #include <linux/binfmts.h>
2651b79beeSJonghwan Choi #include <linux/personality.h>
27a793d79eSChristian Brauner #include <linux/mnt_idmapping.h>
28f3b8788cSCasey Schaufler #include <uapi/linux/lsm.h>
2972c2d582SAndrew Morgan 
30d48da4d5SJordan Rome #define CREATE_TRACE_POINTS
31d48da4d5SJordan Rome #include <trace/events/capability.h>
32d48da4d5SJordan Rome 
33b5f22a59SSerge E. Hallyn /*
34b5f22a59SSerge E. Hallyn  * If a non-root user executes a setuid-root binary in
35b5f22a59SSerge E. Hallyn  * !secure(SECURE_NOROOT) mode, then we raise capabilities.
36b5f22a59SSerge E. Hallyn  * However if fE is also set, then the intent is for only
37b5f22a59SSerge E. Hallyn  * the file capabilities to be applied, and the setuid-root
38b5f22a59SSerge E. Hallyn  * bit is left on either to change the uid (plausible) or
39b5f22a59SSerge E. Hallyn  * to get full privilege on a kernel without file capabilities
40b5f22a59SSerge E. Hallyn  * support.  So in that case we do not raise capabilities.
41b5f22a59SSerge E. Hallyn  *
42b5f22a59SSerge E. Hallyn  * Warn if that happens, once per boot.
43b5f22a59SSerge E. Hallyn  */
warn_setuid_and_fcaps_mixed(const char * fname)44d7627467SDavid Howells static void warn_setuid_and_fcaps_mixed(const char *fname)
45b5f22a59SSerge E. Hallyn {
46b5f22a59SSerge E. Hallyn 	static int warned;
47b5f22a59SSerge E. Hallyn 	if (!warned) {
48b5f22a59SSerge E. Hallyn 		printk(KERN_INFO "warning: `%s' has both setuid-root and"
49b5f22a59SSerge E. Hallyn 			" effective capabilities. Therefore not raising all"
50b5f22a59SSerge E. Hallyn 			" capabilities.\n", fname);
51b5f22a59SSerge E. Hallyn 		warned = 1;
52b5f22a59SSerge E. Hallyn 	}
53b5f22a59SSerge E. Hallyn }
54b5f22a59SSerge E. Hallyn 
551d045980SDavid Howells /**
56d48da4d5SJordan Rome  * cap_capable_helper - Determine whether a task has a particular effective
57d48da4d5SJordan Rome  * capability.
583699c53cSDavid Howells  * @cred: The credentials to use
59d48da4d5SJordan Rome  * @target_ns:  The user namespace of the resource being accessed
60d48da4d5SJordan Rome  * @cred_ns:  The user namespace of the credentials
611d045980SDavid Howells  * @cap: The capability to check for
621d045980SDavid Howells  *
631d045980SDavid Howells  * Determine whether the nominated task has the specified capability amongst
641d045980SDavid Howells  * its effective set, returning 0 if it does, -ve if it does not.
651d045980SDavid Howells  *
66d48da4d5SJordan Rome  * See cap_capable for more details.
67a6dbb1efSAndrew G. Morgan  */
cap_capable_helper(const struct cred * cred,struct user_namespace * target_ns,const struct user_namespace * cred_ns,int cap)68d48da4d5SJordan Rome static inline int cap_capable_helper(const struct cred *cred,
69d48da4d5SJordan Rome 				     struct user_namespace *target_ns,
70d48da4d5SJordan Rome 				     const struct user_namespace *cred_ns,
71d48da4d5SJordan Rome 				     int cap)
721da177e4SLinus Torvalds {
73d48da4d5SJordan Rome 	struct user_namespace *ns = target_ns;
743486740aSSerge E. Hallyn 
75520d9eabSEric W. Biederman 	/* See if cred has the capability in the target user namespace
76520d9eabSEric W. Biederman 	 * by examining the target user namespace and all of the target
77520d9eabSEric W. Biederman 	 * user namespace's parents.
78520d9eabSEric W. Biederman 	 */
79520d9eabSEric W. Biederman 	for (;;) {
803486740aSSerge E. Hallyn 		/* Do we have the necessary capabilities? */
81d48da4d5SJordan Rome 		if (likely(ns == cred_ns))
823699c53cSDavid Howells 			return cap_raised(cred->cap_effective, cap) ? 0 : -EPERM;
833486740aSSerge E. Hallyn 
8464db4c7fSKirill Tkhai 		/*
8564db4c7fSKirill Tkhai 		 * If we're already at a lower level than we're looking for,
8664db4c7fSKirill Tkhai 		 * we're done searching.
8764db4c7fSKirill Tkhai 		 */
88d48da4d5SJordan Rome 		if (ns->level <= cred_ns->level)
893486740aSSerge E. Hallyn 			return -EPERM;
903486740aSSerge E. Hallyn 
913486740aSSerge E. Hallyn 		/*
92520d9eabSEric W. Biederman 		 * The owner of the user namespace in the parent of the
93520d9eabSEric W. Biederman 		 * user namespace has all caps.
94520d9eabSEric W. Biederman 		 */
95d48da4d5SJordan Rome 		if ((ns->parent == cred_ns) && uid_eq(ns->owner, cred->euid))
96520d9eabSEric W. Biederman 			return 0;
97520d9eabSEric W. Biederman 
98520d9eabSEric W. Biederman 		/*
993486740aSSerge E. Hallyn 		 * If you have a capability in a parent user ns, then you have
1003486740aSSerge E. Hallyn 		 * it over all children user namespaces as well.
1013486740aSSerge E. Hallyn 		 */
102520d9eabSEric W. Biederman 		ns = ns->parent;
1033486740aSSerge E. Hallyn 	}
1043486740aSSerge E. Hallyn 
1053486740aSSerge E. Hallyn 	/* We never get here */
1061da177e4SLinus Torvalds }
1071da177e4SLinus Torvalds 
1081d045980SDavid Howells /**
109d48da4d5SJordan Rome  * cap_capable - Determine whether a task has a particular effective capability
110d48da4d5SJordan Rome  * @cred: The credentials to use
111d48da4d5SJordan Rome  * @target_ns:  The user namespace of the resource being accessed
112d48da4d5SJordan Rome  * @cap: The capability to check for
113d48da4d5SJordan Rome  * @opts: Bitmask of options defined in include/linux/security.h (unused)
114d48da4d5SJordan Rome  *
115d48da4d5SJordan Rome  * Determine whether the nominated task has the specified capability amongst
116d48da4d5SJordan Rome  * its effective set, returning 0 if it does, -ve if it does not.
117d48da4d5SJordan Rome  *
118*4ae89b1fSDr. David Alan Gilbert  * NOTE WELL: cap_capable() has reverse semantics to the capable() call
119*4ae89b1fSDr. David Alan Gilbert  * and friends. That is cap_capable() returns an int 0 when a task has
120*4ae89b1fSDr. David Alan Gilbert  * a capability, while the kernel's capable(), has_ns_capability(),
121*4ae89b1fSDr. David Alan Gilbert  * has_ns_capability_noaudit(), and has_capability_noaudit() return a
122*4ae89b1fSDr. David Alan Gilbert  * bool true (1) for this case.
123d48da4d5SJordan Rome  */
cap_capable(const struct cred * cred,struct user_namespace * target_ns,int cap,unsigned int opts)124d48da4d5SJordan Rome int cap_capable(const struct cred *cred, struct user_namespace *target_ns,
125d48da4d5SJordan Rome 		int cap, unsigned int opts)
126d48da4d5SJordan Rome {
127d48da4d5SJordan Rome 	const struct user_namespace *cred_ns = cred->user_ns;
128d48da4d5SJordan Rome 	int ret = cap_capable_helper(cred, target_ns, cred_ns, cap);
129d48da4d5SJordan Rome 
130d48da4d5SJordan Rome 	trace_cap_capable(cred, target_ns, cred_ns, cap, ret);
131d48da4d5SJordan Rome 	return ret;
132d48da4d5SJordan Rome }
133d48da4d5SJordan Rome 
134d48da4d5SJordan Rome /**
1351d045980SDavid Howells  * cap_settime - Determine whether the current process may set the system clock
1361d045980SDavid Howells  * @ts: The time to set
1371d045980SDavid Howells  * @tz: The timezone to set
1381d045980SDavid Howells  *
1391d045980SDavid Howells  * Determine whether the current process may set the system clock and timezone
1401d045980SDavid Howells  * information, returning 0 if permission granted, -ve if denied.
1411d045980SDavid Howells  */
cap_settime(const struct timespec64 * ts,const struct timezone * tz)142457db29bSBaolin Wang int cap_settime(const struct timespec64 *ts, const struct timezone *tz)
1431da177e4SLinus Torvalds {
1441da177e4SLinus Torvalds 	if (!capable(CAP_SYS_TIME))
1451da177e4SLinus Torvalds 		return -EPERM;
1461da177e4SLinus Torvalds 	return 0;
1471da177e4SLinus Torvalds }
1481da177e4SLinus Torvalds 
1491d045980SDavid Howells /**
1509e48858fSIngo Molnar  * cap_ptrace_access_check - Determine whether the current process may access
1511d045980SDavid Howells  *			   another
1521d045980SDavid Howells  * @child: The process to be accessed
1531d045980SDavid Howells  * @mode: The mode of attachment.
1541d045980SDavid Howells  *
1558409cca7SSerge E. Hallyn  * If we are in the same or an ancestor user_ns and have all the target
1568409cca7SSerge E. Hallyn  * task's capabilities, then ptrace access is allowed.
1578409cca7SSerge E. Hallyn  * If we have the ptrace capability to the target user_ns, then ptrace
1588409cca7SSerge E. Hallyn  * access is allowed.
1598409cca7SSerge E. Hallyn  * Else denied.
1608409cca7SSerge E. Hallyn  *
1611d045980SDavid Howells  * Determine whether a process may access another, returning 0 if permission
1621d045980SDavid Howells  * granted, -ve if denied.
1631d045980SDavid Howells  */
cap_ptrace_access_check(struct task_struct * child,unsigned int mode)1649e48858fSIngo Molnar int cap_ptrace_access_check(struct task_struct *child, unsigned int mode)
1651da177e4SLinus Torvalds {
166c69e8d9cSDavid Howells 	int ret = 0;
1678409cca7SSerge E. Hallyn 	const struct cred *cred, *child_cred;
168caaee623SJann Horn 	const kernel_cap_t *caller_caps;
169c69e8d9cSDavid Howells 
170c69e8d9cSDavid Howells 	rcu_read_lock();
1718409cca7SSerge E. Hallyn 	cred = current_cred();
1728409cca7SSerge E. Hallyn 	child_cred = __task_cred(child);
173caaee623SJann Horn 	if (mode & PTRACE_MODE_FSCREDS)
174caaee623SJann Horn 		caller_caps = &cred->cap_effective;
175caaee623SJann Horn 	else
176caaee623SJann Horn 		caller_caps = &cred->cap_permitted;
177c4a4d603SEric W. Biederman 	if (cred->user_ns == child_cred->user_ns &&
178caaee623SJann Horn 	    cap_issubset(child_cred->cap_permitted, *caller_caps))
1798409cca7SSerge E. Hallyn 		goto out;
180c4a4d603SEric W. Biederman 	if (ns_capable(child_cred->user_ns, CAP_SYS_PTRACE))
1818409cca7SSerge E. Hallyn 		goto out;
182c69e8d9cSDavid Howells 	ret = -EPERM;
1838409cca7SSerge E. Hallyn out:
184c69e8d9cSDavid Howells 	rcu_read_unlock();
185c69e8d9cSDavid Howells 	return ret;
1865cd9c58fSDavid Howells }
1875cd9c58fSDavid Howells 
1881d045980SDavid Howells /**
1891d045980SDavid Howells  * cap_ptrace_traceme - Determine whether another process may trace the current
1901d045980SDavid Howells  * @parent: The task proposed to be the tracer
1911d045980SDavid Howells  *
1928409cca7SSerge E. Hallyn  * If parent is in the same or an ancestor user_ns and has all current's
1938409cca7SSerge E. Hallyn  * capabilities, then ptrace access is allowed.
1948409cca7SSerge E. Hallyn  * If parent has the ptrace capability to current's user_ns, then ptrace
1958409cca7SSerge E. Hallyn  * access is allowed.
1968409cca7SSerge E. Hallyn  * Else denied.
1978409cca7SSerge E. Hallyn  *
1981d045980SDavid Howells  * Determine whether the nominated task is permitted to trace the current
1991d045980SDavid Howells  * process, returning 0 if permission is granted, -ve if denied.
2001d045980SDavid Howells  */
cap_ptrace_traceme(struct task_struct * parent)2015cd9c58fSDavid Howells int cap_ptrace_traceme(struct task_struct *parent)
2025cd9c58fSDavid Howells {
203c69e8d9cSDavid Howells 	int ret = 0;
2048409cca7SSerge E. Hallyn 	const struct cred *cred, *child_cred;
205c69e8d9cSDavid Howells 
206c69e8d9cSDavid Howells 	rcu_read_lock();
2078409cca7SSerge E. Hallyn 	cred = __task_cred(parent);
2088409cca7SSerge E. Hallyn 	child_cred = current_cred();
209c4a4d603SEric W. Biederman 	if (cred->user_ns == child_cred->user_ns &&
2108409cca7SSerge E. Hallyn 	    cap_issubset(child_cred->cap_permitted, cred->cap_permitted))
2118409cca7SSerge E. Hallyn 		goto out;
212c4a4d603SEric W. Biederman 	if (has_ns_capability(parent, child_cred->user_ns, CAP_SYS_PTRACE))
2138409cca7SSerge E. Hallyn 		goto out;
214c69e8d9cSDavid Howells 	ret = -EPERM;
2158409cca7SSerge E. Hallyn out:
216c69e8d9cSDavid Howells 	rcu_read_unlock();
217c69e8d9cSDavid Howells 	return ret;
2181da177e4SLinus Torvalds }
2191da177e4SLinus Torvalds 
2201d045980SDavid Howells /**
2211d045980SDavid Howells  * cap_capget - Retrieve a task's capability sets
2221d045980SDavid Howells  * @target: The task from which to retrieve the capability sets
2231d045980SDavid Howells  * @effective: The place to record the effective set
2241d045980SDavid Howells  * @inheritable: The place to record the inheritable set
2251d045980SDavid Howells  * @permitted: The place to record the permitted set
2261d045980SDavid Howells  *
2271d045980SDavid Howells  * This function retrieves the capabilities of the nominated task and returns
2281d045980SDavid Howells  * them to the caller.
2291d045980SDavid Howells  */
cap_capget(const struct task_struct * target,kernel_cap_t * effective,kernel_cap_t * inheritable,kernel_cap_t * permitted)2306672efbbSKhadija Kamran int cap_capget(const struct task_struct *target, kernel_cap_t *effective,
2311da177e4SLinus Torvalds 	       kernel_cap_t *inheritable, kernel_cap_t *permitted)
2321da177e4SLinus Torvalds {
233c69e8d9cSDavid Howells 	const struct cred *cred;
234b6dff3ecSDavid Howells 
2351da177e4SLinus Torvalds 	/* Derived from kernel/capability.c:sys_capget. */
236c69e8d9cSDavid Howells 	rcu_read_lock();
237c69e8d9cSDavid Howells 	cred = __task_cred(target);
238b6dff3ecSDavid Howells 	*effective   = cred->cap_effective;
239b6dff3ecSDavid Howells 	*inheritable = cred->cap_inheritable;
240b6dff3ecSDavid Howells 	*permitted   = cred->cap_permitted;
241c69e8d9cSDavid Howells 	rcu_read_unlock();
2421da177e4SLinus Torvalds 	return 0;
2431da177e4SLinus Torvalds }
2441da177e4SLinus Torvalds 
2451d045980SDavid Howells /*
2461d045980SDavid Howells  * Determine whether the inheritable capabilities are limited to the old
2471d045980SDavid Howells  * permitted set.  Returns 1 if they are limited, 0 if they are not.
2481d045980SDavid Howells  */
cap_inh_is_capped(void)24972c2d582SAndrew Morgan static inline int cap_inh_is_capped(void)
25072c2d582SAndrew Morgan {
2511d045980SDavid Howells 	/* they are so limited unless the current task has the CAP_SETPCAP
2521d045980SDavid Howells 	 * capability
25372c2d582SAndrew Morgan 	 */
254c4a4d603SEric W. Biederman 	if (cap_capable(current_cred(), current_cred()->user_ns,
255c1a85a00SMicah Morton 			CAP_SETPCAP, CAP_OPT_NONE) == 0)
2561d045980SDavid Howells 		return 0;
2571d045980SDavid Howells 	return 1;
25872c2d582SAndrew Morgan }
25972c2d582SAndrew Morgan 
2601d045980SDavid Howells /**
2611d045980SDavid Howells  * cap_capset - Validate and apply proposed changes to current's capabilities
2621d045980SDavid Howells  * @new: The proposed new credentials; alterations should be made here
2631d045980SDavid Howells  * @old: The current task's current credentials
2641d045980SDavid Howells  * @effective: A pointer to the proposed new effective capabilities set
2651d045980SDavid Howells  * @inheritable: A pointer to the proposed new inheritable capabilities set
2661d045980SDavid Howells  * @permitted: A pointer to the proposed new permitted capabilities set
2671d045980SDavid Howells  *
2681d045980SDavid Howells  * This function validates and applies a proposed mass change to the current
2691d045980SDavid Howells  * process's capability sets.  The changes are made to the proposed new
2701d045980SDavid Howells  * credentials, and assuming no error, will be committed by the caller of LSM.
2711d045980SDavid Howells  */
cap_capset(struct cred * new,const struct cred * old,const kernel_cap_t * effective,const kernel_cap_t * inheritable,const kernel_cap_t * permitted)272d84f4f99SDavid Howells int cap_capset(struct cred *new,
273d84f4f99SDavid Howells 	       const struct cred *old,
274d84f4f99SDavid Howells 	       const kernel_cap_t *effective,
27515a2460eSDavid Howells 	       const kernel_cap_t *inheritable,
27615a2460eSDavid Howells 	       const kernel_cap_t *permitted)
2771da177e4SLinus Torvalds {
278d84f4f99SDavid Howells 	if (cap_inh_is_capped() &&
279d84f4f99SDavid Howells 	    !cap_issubset(*inheritable,
280d84f4f99SDavid Howells 			  cap_combine(old->cap_inheritable,
281d84f4f99SDavid Howells 				      old->cap_permitted)))
28272c2d582SAndrew Morgan 		/* incapable of using this inheritable set */
2831da177e4SLinus Torvalds 		return -EPERM;
284d84f4f99SDavid Howells 
2853b7391deSSerge E. Hallyn 	if (!cap_issubset(*inheritable,
286d84f4f99SDavid Howells 			  cap_combine(old->cap_inheritable,
287d84f4f99SDavid Howells 				      old->cap_bset)))
2883b7391deSSerge E. Hallyn 		/* no new pI capabilities outside bounding set */
2893b7391deSSerge E. Hallyn 		return -EPERM;
2901da177e4SLinus Torvalds 
2911da177e4SLinus Torvalds 	/* verify restrictions on target's new Permitted set */
292d84f4f99SDavid Howells 	if (!cap_issubset(*permitted, old->cap_permitted))
2931da177e4SLinus Torvalds 		return -EPERM;
2941da177e4SLinus Torvalds 
2951da177e4SLinus Torvalds 	/* verify the _new_Effective_ is a subset of the _new_Permitted_ */
296d84f4f99SDavid Howells 	if (!cap_issubset(*effective, *permitted))
2971da177e4SLinus Torvalds 		return -EPERM;
2981da177e4SLinus Torvalds 
299d84f4f99SDavid Howells 	new->cap_effective   = *effective;
300d84f4f99SDavid Howells 	new->cap_inheritable = *inheritable;
301d84f4f99SDavid Howells 	new->cap_permitted   = *permitted;
30258319057SAndy Lutomirski 
30358319057SAndy Lutomirski 	/*
30458319057SAndy Lutomirski 	 * Mask off ambient bits that are no longer both permitted and
30558319057SAndy Lutomirski 	 * inheritable.
30658319057SAndy Lutomirski 	 */
30758319057SAndy Lutomirski 	new->cap_ambient = cap_intersect(new->cap_ambient,
30858319057SAndy Lutomirski 					 cap_intersect(*permitted,
30958319057SAndy Lutomirski 						       *inheritable));
31058319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(new)))
31158319057SAndy Lutomirski 		return -EINVAL;
3121da177e4SLinus Torvalds 	return 0;
3131da177e4SLinus Torvalds }
3141da177e4SLinus Torvalds 
3151d045980SDavid Howells /**
3161d045980SDavid Howells  * cap_inode_need_killpriv - Determine if inode change affects privileges
3171d045980SDavid Howells  * @dentry: The inode/dentry in being changed with change marked ATTR_KILL_PRIV
3181d045980SDavid Howells  *
3191d045980SDavid Howells  * Determine if an inode having a change applied that's marked ATTR_KILL_PRIV
3201d045980SDavid Howells  * affects the security markings on that inode, and if it is, should
321ab5348c9SStefan Berger  * inode_killpriv() be invoked or the change rejected.
3221d045980SDavid Howells  *
323049ae601SRandy Dunlap  * Return: 1 if security.capability has a value, meaning inode_killpriv()
324ab5348c9SStefan Berger  * is required, 0 otherwise, meaning inode_killpriv() is not required.
3251d045980SDavid Howells  */
cap_inode_need_killpriv(struct dentry * dentry)326b5376771SSerge E. Hallyn int cap_inode_need_killpriv(struct dentry *dentry)
327b5376771SSerge E. Hallyn {
328c6f493d6SDavid Howells 	struct inode *inode = d_backing_inode(dentry);
329b5376771SSerge E. Hallyn 	int error;
330b5376771SSerge E. Hallyn 
3315d6c3191SAndreas Gruenbacher 	error = __vfs_getxattr(dentry, inode, XATTR_NAME_CAPS, NULL, 0);
3325d6c3191SAndreas Gruenbacher 	return error > 0;
333b5376771SSerge E. Hallyn }
334b5376771SSerge E. Hallyn 
3351d045980SDavid Howells /**
3361d045980SDavid Howells  * cap_inode_killpriv - Erase the security markings on an inode
33771bc356fSChristian Brauner  *
33839f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
3391d045980SDavid Howells  * @dentry:	The inode/dentry to alter
3401d045980SDavid Howells  *
3411d045980SDavid Howells  * Erase the privilege-enhancing security markings on an inode.
3421d045980SDavid Howells  *
34339f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
34439f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
34539f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
34671bc356fSChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
3474432b507SPaul Moore  * performed on the raw inode simply pass @nop_mnt_idmap.
34871bc356fSChristian Brauner  *
349049ae601SRandy Dunlap  * Return: 0 if successful, -ve on error.
3501d045980SDavid Howells  */
cap_inode_killpriv(struct mnt_idmap * idmap,struct dentry * dentry)35139f60c1cSChristian Brauner int cap_inode_killpriv(struct mnt_idmap *idmap, struct dentry *dentry)
352b5376771SSerge E. Hallyn {
3535d6c3191SAndreas Gruenbacher 	int error;
354b5376771SSerge E. Hallyn 
35539f60c1cSChristian Brauner 	error = __vfs_removexattr(idmap, dentry, XATTR_NAME_CAPS);
3565d6c3191SAndreas Gruenbacher 	if (error == -EOPNOTSUPP)
3575d6c3191SAndreas Gruenbacher 		error = 0;
3585d6c3191SAndreas Gruenbacher 	return error;
359b5376771SSerge E. Hallyn }
360b5376771SSerge E. Hallyn 
rootid_owns_currentns(vfsuid_t rootvfsuid)361b7c9b675SChristian Brauner static bool rootid_owns_currentns(vfsuid_t rootvfsuid)
3628db6c34fSSerge E. Hallyn {
3638db6c34fSSerge E. Hallyn 	struct user_namespace *ns;
364b7c9b675SChristian Brauner 	kuid_t kroot;
3658db6c34fSSerge E. Hallyn 
366b7c9b675SChristian Brauner 	if (!vfsuid_valid(rootvfsuid))
3678db6c34fSSerge E. Hallyn 		return false;
3688db6c34fSSerge E. Hallyn 
369b7c9b675SChristian Brauner 	kroot = vfsuid_into_kuid(rootvfsuid);
3708db6c34fSSerge E. Hallyn 	for (ns = current_user_ns();; ns = ns->parent) {
3718db6c34fSSerge E. Hallyn 		if (from_kuid(ns, kroot) == 0)
3728db6c34fSSerge E. Hallyn 			return true;
3738db6c34fSSerge E. Hallyn 		if (ns == &init_user_ns)
3748db6c34fSSerge E. Hallyn 			break;
3758db6c34fSSerge E. Hallyn 	}
3768db6c34fSSerge E. Hallyn 
3778db6c34fSSerge E. Hallyn 	return false;
3788db6c34fSSerge E. Hallyn }
3798db6c34fSSerge E. Hallyn 
sansflags(__u32 m)3808db6c34fSSerge E. Hallyn static __u32 sansflags(__u32 m)
3818db6c34fSSerge E. Hallyn {
3828db6c34fSSerge E. Hallyn 	return m & ~VFS_CAP_FLAGS_EFFECTIVE;
3838db6c34fSSerge E. Hallyn }
3848db6c34fSSerge E. Hallyn 
is_v2header(int size,const struct vfs_cap_data * cap)385f6fbd8cbSPaul Moore static bool is_v2header(int size, const struct vfs_cap_data *cap)
3868db6c34fSSerge E. Hallyn {
3878db6c34fSSerge E. Hallyn 	if (size != XATTR_CAPS_SZ_2)
3888db6c34fSSerge E. Hallyn 		return false;
389dc32b5c3SEric Biggers 	return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_2;
3908db6c34fSSerge E. Hallyn }
3918db6c34fSSerge E. Hallyn 
is_v3header(int size,const struct vfs_cap_data * cap)392f6fbd8cbSPaul Moore static bool is_v3header(int size, const struct vfs_cap_data *cap)
3938db6c34fSSerge E. Hallyn {
3948db6c34fSSerge E. Hallyn 	if (size != XATTR_CAPS_SZ_3)
3958db6c34fSSerge E. Hallyn 		return false;
396dc32b5c3SEric Biggers 	return sansflags(le32_to_cpu(cap->magic_etc)) == VFS_CAP_REVISION_3;
3978db6c34fSSerge E. Hallyn }
3988db6c34fSSerge E. Hallyn 
3998db6c34fSSerge E. Hallyn /*
4008db6c34fSSerge E. Hallyn  * getsecurity: We are called for security.* before any attempt to read the
4018db6c34fSSerge E. Hallyn  * xattr from the inode itself.
4028db6c34fSSerge E. Hallyn  *
4038db6c34fSSerge E. Hallyn  * This gives us a chance to read the on-disk value and convert it.  If we
4048db6c34fSSerge E. Hallyn  * return -EOPNOTSUPP, then vfs_getxattr() will call the i_op handler.
4058db6c34fSSerge E. Hallyn  *
4068db6c34fSSerge E. Hallyn  * Note we are not called by vfs_getxattr_alloc(), but that is only called
4078db6c34fSSerge E. Hallyn  * by the integrity subsystem, which really wants the unconverted values -
4088db6c34fSSerge E. Hallyn  * so that's good.
4098db6c34fSSerge E. Hallyn  */
cap_inode_getsecurity(struct mnt_idmap * idmap,struct inode * inode,const char * name,void ** buffer,bool alloc)4104609e1f1SChristian Brauner int cap_inode_getsecurity(struct mnt_idmap *idmap,
41171bc356fSChristian Brauner 			  struct inode *inode, const char *name, void **buffer,
4128db6c34fSSerge E. Hallyn 			  bool alloc)
4138db6c34fSSerge E. Hallyn {
414f6fbd8cbSPaul Moore 	int size;
4158db6c34fSSerge E. Hallyn 	kuid_t kroot;
416b7c9b675SChristian Brauner 	vfsuid_t vfsroot;
417f2b00be4SMiklos Szeredi 	u32 nsmagic, magic;
4188db6c34fSSerge E. Hallyn 	uid_t root, mappedroot;
4198db6c34fSSerge E. Hallyn 	char *tmpbuf = NULL;
4208db6c34fSSerge E. Hallyn 	struct vfs_cap_data *cap;
421f2b00be4SMiklos Szeredi 	struct vfs_ns_cap_data *nscap = NULL;
4228db6c34fSSerge E. Hallyn 	struct dentry *dentry;
4238db6c34fSSerge E. Hallyn 	struct user_namespace *fs_ns;
4248db6c34fSSerge E. Hallyn 
4258db6c34fSSerge E. Hallyn 	if (strcmp(name, "capability") != 0)
4268db6c34fSSerge E. Hallyn 		return -EOPNOTSUPP;
4278db6c34fSSerge E. Hallyn 
428355139a8SEddie.Horng 	dentry = d_find_any_alias(inode);
4298db6c34fSSerge E. Hallyn 	if (!dentry)
4308db6c34fSSerge E. Hallyn 		return -EINVAL;
4314609e1f1SChristian Brauner 	size = vfs_getxattr_alloc(idmap, dentry, XATTR_NAME_CAPS, &tmpbuf,
432f6fbd8cbSPaul Moore 				  sizeof(struct vfs_ns_cap_data), GFP_NOFS);
4338db6c34fSSerge E. Hallyn 	dput(dentry);
434f6fbd8cbSPaul Moore 	/* gcc11 complains if we don't check for !tmpbuf */
435f6fbd8cbSPaul Moore 	if (size < 0 || !tmpbuf)
4368cf0a1bcSGaosheng Cui 		goto out_free;
4378db6c34fSSerge E. Hallyn 
4388db6c34fSSerge E. Hallyn 	fs_ns = inode->i_sb->s_user_ns;
4398db6c34fSSerge E. Hallyn 	cap = (struct vfs_cap_data *) tmpbuf;
440f6fbd8cbSPaul Moore 	if (is_v2header(size, cap)) {
441f2b00be4SMiklos Szeredi 		root = 0;
442f6fbd8cbSPaul Moore 	} else if (is_v3header(size, cap)) {
4438db6c34fSSerge E. Hallyn 		nscap = (struct vfs_ns_cap_data *) tmpbuf;
4448db6c34fSSerge E. Hallyn 		root = le32_to_cpu(nscap->rootid);
445f2b00be4SMiklos Szeredi 	} else {
446f2b00be4SMiklos Szeredi 		size = -EINVAL;
447f2b00be4SMiklos Szeredi 		goto out_free;
448f2b00be4SMiklos Szeredi 	}
449f2b00be4SMiklos Szeredi 
4508db6c34fSSerge E. Hallyn 	kroot = make_kuid(fs_ns, root);
4518db6c34fSSerge E. Hallyn 
45271bc356fSChristian Brauner 	/* If this is an idmapped mount shift the kuid. */
4534d7ca409SChristian Brauner 	vfsroot = make_vfsuid(idmap, fs_ns, kroot);
45471bc356fSChristian Brauner 
4558db6c34fSSerge E. Hallyn 	/* If the root kuid maps to a valid uid in current ns, then return
4568db6c34fSSerge E. Hallyn 	 * this as a nscap. */
457b7c9b675SChristian Brauner 	mappedroot = from_kuid(current_user_ns(), vfsuid_into_kuid(vfsroot));
4588db6c34fSSerge E. Hallyn 	if (mappedroot != (uid_t)-1 && mappedroot != (uid_t)0) {
459f2b00be4SMiklos Szeredi 		size = sizeof(struct vfs_ns_cap_data);
4608db6c34fSSerge E. Hallyn 		if (alloc) {
461f2b00be4SMiklos Szeredi 			if (!nscap) {
462f2b00be4SMiklos Szeredi 				/* v2 -> v3 conversion */
463f2b00be4SMiklos Szeredi 				nscap = kzalloc(size, GFP_ATOMIC);
464f2b00be4SMiklos Szeredi 				if (!nscap) {
465f2b00be4SMiklos Szeredi 					size = -ENOMEM;
466f2b00be4SMiklos Szeredi 					goto out_free;
467f2b00be4SMiklos Szeredi 				}
468f2b00be4SMiklos Szeredi 				nsmagic = VFS_CAP_REVISION_3;
469f2b00be4SMiklos Szeredi 				magic = le32_to_cpu(cap->magic_etc);
470f2b00be4SMiklos Szeredi 				if (magic & VFS_CAP_FLAGS_EFFECTIVE)
471f2b00be4SMiklos Szeredi 					nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
472f2b00be4SMiklos Szeredi 				memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
473f2b00be4SMiklos Szeredi 				nscap->magic_etc = cpu_to_le32(nsmagic);
474f2b00be4SMiklos Szeredi 			} else {
475f2b00be4SMiklos Szeredi 				/* use allocated v3 buffer */
476f2b00be4SMiklos Szeredi 				tmpbuf = NULL;
477f2b00be4SMiklos Szeredi 			}
4788db6c34fSSerge E. Hallyn 			nscap->rootid = cpu_to_le32(mappedroot);
479f2b00be4SMiklos Szeredi 			*buffer = nscap;
480f2b00be4SMiklos Szeredi 		}
481f2b00be4SMiklos Szeredi 		goto out_free;
4828db6c34fSSerge E. Hallyn 	}
4838db6c34fSSerge E. Hallyn 
484b7c9b675SChristian Brauner 	if (!rootid_owns_currentns(vfsroot)) {
485f2b00be4SMiklos Szeredi 		size = -EOVERFLOW;
486f2b00be4SMiklos Szeredi 		goto out_free;
4878db6c34fSSerge E. Hallyn 	}
4888db6c34fSSerge E. Hallyn 
4898db6c34fSSerge E. Hallyn 	/* This comes from a parent namespace.  Return as a v2 capability */
4908db6c34fSSerge E. Hallyn 	size = sizeof(struct vfs_cap_data);
4918db6c34fSSerge E. Hallyn 	if (alloc) {
492f2b00be4SMiklos Szeredi 		if (nscap) {
493f2b00be4SMiklos Szeredi 			/* v3 -> v2 conversion */
494f2b00be4SMiklos Szeredi 			cap = kzalloc(size, GFP_ATOMIC);
495f2b00be4SMiklos Szeredi 			if (!cap) {
496f2b00be4SMiklos Szeredi 				size = -ENOMEM;
497f2b00be4SMiklos Szeredi 				goto out_free;
498f2b00be4SMiklos Szeredi 			}
4998db6c34fSSerge E. Hallyn 			magic = VFS_CAP_REVISION_2;
5008db6c34fSSerge E. Hallyn 			nsmagic = le32_to_cpu(nscap->magic_etc);
5018db6c34fSSerge E. Hallyn 			if (nsmagic & VFS_CAP_FLAGS_EFFECTIVE)
5028db6c34fSSerge E. Hallyn 				magic |= VFS_CAP_FLAGS_EFFECTIVE;
5038db6c34fSSerge E. Hallyn 			memcpy(&cap->data, &nscap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
5048db6c34fSSerge E. Hallyn 			cap->magic_etc = cpu_to_le32(magic);
5051f578172STetsuo Handa 		} else {
506f2b00be4SMiklos Szeredi 			/* use unconverted v2 */
507f2b00be4SMiklos Szeredi 			tmpbuf = NULL;
5088db6c34fSSerge E. Hallyn 		}
509f2b00be4SMiklos Szeredi 		*buffer = cap;
5108db6c34fSSerge E. Hallyn 	}
511f2b00be4SMiklos Szeredi out_free:
5128db6c34fSSerge E. Hallyn 	kfree(tmpbuf);
5138db6c34fSSerge E. Hallyn 	return size;
5148db6c34fSSerge E. Hallyn }
5158db6c34fSSerge E. Hallyn 
516e65ce2a5SChristian Brauner /**
517e65ce2a5SChristian Brauner  * rootid_from_xattr - translate root uid of vfs caps
518e65ce2a5SChristian Brauner  *
519e65ce2a5SChristian Brauner  * @value:	vfs caps value which may be modified by this function
520e65ce2a5SChristian Brauner  * @size:	size of @ivalue
521e65ce2a5SChristian Brauner  * @task_ns:	user namespace of the caller
522e65ce2a5SChristian Brauner  */
rootid_from_xattr(const void * value,size_t size,struct user_namespace * task_ns)523b7c9b675SChristian Brauner static vfsuid_t rootid_from_xattr(const void *value, size_t size,
524b7c9b675SChristian Brauner 				  struct user_namespace *task_ns)
5258db6c34fSSerge E. Hallyn {
5268db6c34fSSerge E. Hallyn 	const struct vfs_ns_cap_data *nscap = value;
5278db6c34fSSerge E. Hallyn 	uid_t rootid = 0;
5288db6c34fSSerge E. Hallyn 
5298db6c34fSSerge E. Hallyn 	if (size == XATTR_CAPS_SZ_3)
5308db6c34fSSerge E. Hallyn 		rootid = le32_to_cpu(nscap->rootid);
5318db6c34fSSerge E. Hallyn 
532b7c9b675SChristian Brauner 	return VFSUIDT_INIT(make_kuid(task_ns, rootid));
5338db6c34fSSerge E. Hallyn }
5348db6c34fSSerge E. Hallyn 
validheader(size_t size,const struct vfs_cap_data * cap)535dc32b5c3SEric Biggers static bool validheader(size_t size, const struct vfs_cap_data *cap)
5368db6c34fSSerge E. Hallyn {
537dc32b5c3SEric Biggers 	return is_v2header(size, cap) || is_v3header(size, cap);
5388db6c34fSSerge E. Hallyn }
5398db6c34fSSerge E. Hallyn 
540e65ce2a5SChristian Brauner /**
541e65ce2a5SChristian Brauner  * cap_convert_nscap - check vfs caps
542e65ce2a5SChristian Brauner  *
54339f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
544e65ce2a5SChristian Brauner  * @dentry:	used to retrieve inode to check permissions on
545e65ce2a5SChristian Brauner  * @ivalue:	vfs caps value which may be modified by this function
546e65ce2a5SChristian Brauner  * @size:	size of @ivalue
547e65ce2a5SChristian Brauner  *
5488db6c34fSSerge E. Hallyn  * User requested a write of security.capability.  If needed, update the
5498db6c34fSSerge E. Hallyn  * xattr to change from v2 to v3, or to fixup the v3 rootid.
5508db6c34fSSerge E. Hallyn  *
55139f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
55239f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
55339f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
554e65ce2a5SChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
5554432b507SPaul Moore  * performed on the raw inode simply pass @nop_mnt_idmap.
556e65ce2a5SChristian Brauner  *
557049ae601SRandy Dunlap  * Return: On success, return the new size; on error, return < 0.
5588db6c34fSSerge E. Hallyn  */
cap_convert_nscap(struct mnt_idmap * idmap,struct dentry * dentry,const void ** ivalue,size_t size)55939f60c1cSChristian Brauner int cap_convert_nscap(struct mnt_idmap *idmap, struct dentry *dentry,
560e65ce2a5SChristian Brauner 		      const void **ivalue, size_t size)
5618db6c34fSSerge E. Hallyn {
5628db6c34fSSerge E. Hallyn 	struct vfs_ns_cap_data *nscap;
5638db6c34fSSerge E. Hallyn 	uid_t nsrootid;
5648db6c34fSSerge E. Hallyn 	const struct vfs_cap_data *cap = *ivalue;
5658db6c34fSSerge E. Hallyn 	__u32 magic, nsmagic;
5668db6c34fSSerge E. Hallyn 	struct inode *inode = d_backing_inode(dentry);
5678db6c34fSSerge E. Hallyn 	struct user_namespace *task_ns = current_user_ns(),
5683b0c2d3eSEric W. Biederman 		*fs_ns = inode->i_sb->s_user_ns;
5698db6c34fSSerge E. Hallyn 	kuid_t rootid;
570b7c9b675SChristian Brauner 	vfsuid_t vfsrootid;
5718db6c34fSSerge E. Hallyn 	size_t newsize;
5728db6c34fSSerge E. Hallyn 
5738db6c34fSSerge E. Hallyn 	if (!*ivalue)
5748db6c34fSSerge E. Hallyn 		return -EINVAL;
575dc32b5c3SEric Biggers 	if (!validheader(size, cap))
5768db6c34fSSerge E. Hallyn 		return -EINVAL;
5779452e93eSChristian Brauner 	if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
5788db6c34fSSerge E. Hallyn 		return -EPERM;
57939f60c1cSChristian Brauner 	if (size == XATTR_CAPS_SZ_2 && (idmap == &nop_mnt_idmap))
5808db6c34fSSerge E. Hallyn 		if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
5818db6c34fSSerge E. Hallyn 			/* user is privileged, just write the v2 */
5828db6c34fSSerge E. Hallyn 			return size;
5838db6c34fSSerge E. Hallyn 
584b7c9b675SChristian Brauner 	vfsrootid = rootid_from_xattr(*ivalue, size, task_ns);
585b7c9b675SChristian Brauner 	if (!vfsuid_valid(vfsrootid))
586b7c9b675SChristian Brauner 		return -EINVAL;
587b7c9b675SChristian Brauner 
5884d7ca409SChristian Brauner 	rootid = from_vfsuid(idmap, fs_ns, vfsrootid);
5898db6c34fSSerge E. Hallyn 	if (!uid_valid(rootid))
5908db6c34fSSerge E. Hallyn 		return -EINVAL;
5918db6c34fSSerge E. Hallyn 
5928db6c34fSSerge E. Hallyn 	nsrootid = from_kuid(fs_ns, rootid);
5938db6c34fSSerge E. Hallyn 	if (nsrootid == -1)
5948db6c34fSSerge E. Hallyn 		return -EINVAL;
5958db6c34fSSerge E. Hallyn 
5968db6c34fSSerge E. Hallyn 	newsize = sizeof(struct vfs_ns_cap_data);
5978db6c34fSSerge E. Hallyn 	nscap = kmalloc(newsize, GFP_ATOMIC);
5988db6c34fSSerge E. Hallyn 	if (!nscap)
5998db6c34fSSerge E. Hallyn 		return -ENOMEM;
6008db6c34fSSerge E. Hallyn 	nscap->rootid = cpu_to_le32(nsrootid);
6018db6c34fSSerge E. Hallyn 	nsmagic = VFS_CAP_REVISION_3;
6028db6c34fSSerge E. Hallyn 	magic = le32_to_cpu(cap->magic_etc);
6038db6c34fSSerge E. Hallyn 	if (magic & VFS_CAP_FLAGS_EFFECTIVE)
6048db6c34fSSerge E. Hallyn 		nsmagic |= VFS_CAP_FLAGS_EFFECTIVE;
6058db6c34fSSerge E. Hallyn 	nscap->magic_etc = cpu_to_le32(nsmagic);
6068db6c34fSSerge E. Hallyn 	memcpy(&nscap->data, &cap->data, sizeof(__le32) * 2 * VFS_CAP_U32);
6078db6c34fSSerge E. Hallyn 
6088db6c34fSSerge E. Hallyn 	*ivalue = nscap;
6098db6c34fSSerge E. Hallyn 	return newsize;
6108db6c34fSSerge E. Hallyn }
6118db6c34fSSerge E. Hallyn 
6121d045980SDavid Howells /*
6131d045980SDavid Howells  * Calculate the new process capability sets from the capability sets attached
6141d045980SDavid Howells  * to a file.
6151d045980SDavid Howells  */
bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data * caps,struct linux_binprm * bprm,bool * effective,bool * has_fcap)616c0b00441SEric Paris static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
617a6f76f23SDavid Howells 					  struct linux_binprm *bprm,
6184d49f671SZhi Li 					  bool *effective,
619fc7eadf7SRichard Guy Briggs 					  bool *has_fcap)
620b5376771SSerge E. Hallyn {
621a6f76f23SDavid Howells 	struct cred *new = bprm->cred;
622c0b00441SEric Paris 	int ret = 0;
623c0b00441SEric Paris 
624c0b00441SEric Paris 	if (caps->magic_etc & VFS_CAP_FLAGS_EFFECTIVE)
625a6f76f23SDavid Howells 		*effective = true;
626c0b00441SEric Paris 
6274d49f671SZhi Li 	if (caps->magic_etc & VFS_CAP_REVISION_MASK)
628fc7eadf7SRichard Guy Briggs 		*has_fcap = true;
6294d49f671SZhi Li 
630c0b00441SEric Paris 	/*
631c0b00441SEric Paris 	 * pP' = (X & fP) | (pI & fI)
63258319057SAndy Lutomirski 	 * The addition of pA' is handled later.
633c0b00441SEric Paris 	 */
634f122a08bSLinus Torvalds 	new->cap_permitted.val =
635f122a08bSLinus Torvalds 		(new->cap_bset.val & caps->permitted.val) |
636f122a08bSLinus Torvalds 		(new->cap_inheritable.val & caps->inheritable.val);
637c0b00441SEric Paris 
638f122a08bSLinus Torvalds 	if (caps->permitted.val & ~new->cap_permitted.val)
639a6f76f23SDavid Howells 		/* insufficient to execute correctly */
640c0b00441SEric Paris 		ret = -EPERM;
641c0b00441SEric Paris 
642c0b00441SEric Paris 	/*
643c0b00441SEric Paris 	 * For legacy apps, with no internal support for recognizing they
644c0b00441SEric Paris 	 * do not have enough capabilities, we return an error if they are
645c0b00441SEric Paris 	 * missing some "forced" (aka file-permitted) capabilities.
646c0b00441SEric Paris 	 */
647a6f76f23SDavid Howells 	return *effective ? ret : 0;
648c0b00441SEric Paris }
649c0b00441SEric Paris 
65071bc356fSChristian Brauner /**
65171bc356fSChristian Brauner  * get_vfs_caps_from_disk - retrieve vfs caps from disk
65271bc356fSChristian Brauner  *
65339f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
65471bc356fSChristian Brauner  * @dentry:	dentry from which @inode is retrieved
65571bc356fSChristian Brauner  * @cpu_caps:	vfs capabilities
65671bc356fSChristian Brauner  *
6571d045980SDavid Howells  * Extract the on-exec-apply capability sets for an executable file.
65871bc356fSChristian Brauner  *
65939f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
66039f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
66139f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
66271bc356fSChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
6634432b507SPaul Moore  * performed on the raw inode simply pass @nop_mnt_idmap.
6641d045980SDavid Howells  */
get_vfs_caps_from_disk(struct mnt_idmap * idmap,const struct dentry * dentry,struct cpu_vfs_cap_data * cpu_caps)66539f60c1cSChristian Brauner int get_vfs_caps_from_disk(struct mnt_idmap *idmap,
66671bc356fSChristian Brauner 			   const struct dentry *dentry,
66771bc356fSChristian Brauner 			   struct cpu_vfs_cap_data *cpu_caps)
668c0b00441SEric Paris {
669c6f493d6SDavid Howells 	struct inode *inode = d_backing_inode(dentry);
670b5376771SSerge E. Hallyn 	__u32 magic_etc;
671c0b00441SEric Paris 	int size;
6728db6c34fSSerge E. Hallyn 	struct vfs_ns_cap_data data, *nscaps = &data;
6738db6c34fSSerge E. Hallyn 	struct vfs_cap_data *caps = (struct vfs_cap_data *) &data;
6748db6c34fSSerge E. Hallyn 	kuid_t rootkuid;
675b7c9b675SChristian Brauner 	vfsuid_t rootvfsuid;
67676ba89c7SColin Ian King 	struct user_namespace *fs_ns;
677c0b00441SEric Paris 
678c0b00441SEric Paris 	memset(cpu_caps, 0, sizeof(struct cpu_vfs_cap_data));
679c0b00441SEric Paris 
6805d6c3191SAndreas Gruenbacher 	if (!inode)
681c0b00441SEric Paris 		return -ENODATA;
682c0b00441SEric Paris 
68376ba89c7SColin Ian King 	fs_ns = inode->i_sb->s_user_ns;
6845d6c3191SAndreas Gruenbacher 	size = __vfs_getxattr((struct dentry *)dentry, inode,
6858db6c34fSSerge E. Hallyn 			      XATTR_NAME_CAPS, &data, XATTR_CAPS_SZ);
686a6f76f23SDavid Howells 	if (size == -ENODATA || size == -EOPNOTSUPP)
687c0b00441SEric Paris 		/* no data, that's ok */
688c0b00441SEric Paris 		return -ENODATA;
6898db6c34fSSerge E. Hallyn 
690c0b00441SEric Paris 	if (size < 0)
691c0b00441SEric Paris 		return size;
692b5376771SSerge E. Hallyn 
693e338d263SAndrew Morgan 	if (size < sizeof(magic_etc))
694b5376771SSerge E. Hallyn 		return -EINVAL;
695b5376771SSerge E. Hallyn 
6968db6c34fSSerge E. Hallyn 	cpu_caps->magic_etc = magic_etc = le32_to_cpu(caps->magic_etc);
697b5376771SSerge E. Hallyn 
6988db6c34fSSerge E. Hallyn 	rootkuid = make_kuid(fs_ns, 0);
699a6f76f23SDavid Howells 	switch (magic_etc & VFS_CAP_REVISION_MASK) {
700e338d263SAndrew Morgan 	case VFS_CAP_REVISION_1:
701e338d263SAndrew Morgan 		if (size != XATTR_CAPS_SZ_1)
702e338d263SAndrew Morgan 			return -EINVAL;
703e338d263SAndrew Morgan 		break;
704e338d263SAndrew Morgan 	case VFS_CAP_REVISION_2:
705e338d263SAndrew Morgan 		if (size != XATTR_CAPS_SZ_2)
706e338d263SAndrew Morgan 			return -EINVAL;
707e338d263SAndrew Morgan 		break;
7088db6c34fSSerge E. Hallyn 	case VFS_CAP_REVISION_3:
7098db6c34fSSerge E. Hallyn 		if (size != XATTR_CAPS_SZ_3)
7108db6c34fSSerge E. Hallyn 			return -EINVAL;
7118db6c34fSSerge E. Hallyn 		rootkuid = make_kuid(fs_ns, le32_to_cpu(nscaps->rootid));
7128db6c34fSSerge E. Hallyn 		break;
7138db6c34fSSerge E. Hallyn 
714b5376771SSerge E. Hallyn 	default:
715b5376771SSerge E. Hallyn 		return -EINVAL;
716b5376771SSerge E. Hallyn 	}
717b7c9b675SChristian Brauner 
7184d7ca409SChristian Brauner 	rootvfsuid = make_vfsuid(idmap, fs_ns, rootkuid);
719b7c9b675SChristian Brauner 	if (!vfsuid_valid(rootvfsuid))
720b7c9b675SChristian Brauner 		return -ENODATA;
721b7c9b675SChristian Brauner 
7228db6c34fSSerge E. Hallyn 	/* Limit the caps to the mounter of the filesystem
7238db6c34fSSerge E. Hallyn 	 * or the more limited uid specified in the xattr.
7248db6c34fSSerge E. Hallyn 	 */
725b7c9b675SChristian Brauner 	if (!rootid_owns_currentns(rootvfsuid))
7268db6c34fSSerge E. Hallyn 		return -ENODATA;
727e338d263SAndrew Morgan 
728f122a08bSLinus Torvalds 	cpu_caps->permitted.val = le32_to_cpu(caps->data[0].permitted);
729f122a08bSLinus Torvalds 	cpu_caps->inheritable.val = le32_to_cpu(caps->data[0].inheritable);
730f122a08bSLinus Torvalds 
731f122a08bSLinus Torvalds 	/*
732f122a08bSLinus Torvalds 	 * Rev1 had just a single 32-bit word, later expanded
733f122a08bSLinus Torvalds 	 * to a second one for the high bits
734f122a08bSLinus Torvalds 	 */
735f122a08bSLinus Torvalds 	if ((magic_etc & VFS_CAP_REVISION_MASK) != VFS_CAP_REVISION_1) {
736f122a08bSLinus Torvalds 		cpu_caps->permitted.val += (u64)le32_to_cpu(caps->data[1].permitted) << 32;
737f122a08bSLinus Torvalds 		cpu_caps->inheritable.val += (u64)le32_to_cpu(caps->data[1].inheritable) << 32;
738e338d263SAndrew Morgan 	}
739a6f76f23SDavid Howells 
740f122a08bSLinus Torvalds 	cpu_caps->permitted.val &= CAP_VALID_MASK;
741f122a08bSLinus Torvalds 	cpu_caps->inheritable.val &= CAP_VALID_MASK;
7427d8b6c63SEric Paris 
743b7c9b675SChristian Brauner 	cpu_caps->rootid = vfsuid_into_kuid(rootvfsuid);
7442fec30e2SRichard Guy Briggs 
745c0b00441SEric Paris 	return 0;
746b5376771SSerge E. Hallyn }
747b5376771SSerge E. Hallyn 
7481d045980SDavid Howells /*
7491d045980SDavid Howells  * Attempt to get the on-exec apply capability sets for an executable file from
7501d045980SDavid Howells  * its xattrs and, if present, apply them to the proposed credentials being
7511d045980SDavid Howells  * constructed by execve().
7521d045980SDavid Howells  */
get_file_caps(struct linux_binprm * bprm,const struct file * file,bool * effective,bool * has_fcap)7534a00c673SKhadija Kamran static int get_file_caps(struct linux_binprm *bprm, const struct file *file,
75456305aa9SEric W. Biederman 			 bool *effective, bool *has_fcap)
755b5376771SSerge E. Hallyn {
756b5376771SSerge E. Hallyn 	int rc = 0;
757c0b00441SEric Paris 	struct cpu_vfs_cap_data vcaps;
758b5376771SSerge E. Hallyn 
759ee67ae7eSKees Cook 	cap_clear(bprm->cred->cap_permitted);
7603318a386SSerge Hallyn 
7611f29fae2SSerge E. Hallyn 	if (!file_caps_enabled)
7621f29fae2SSerge E. Hallyn 		return 0;
7631f29fae2SSerge E. Hallyn 
76456305aa9SEric W. Biederman 	if (!mnt_may_suid(file->f_path.mnt))
765b5376771SSerge E. Hallyn 		return 0;
766380cf5baSAndy Lutomirski 
767380cf5baSAndy Lutomirski 	/*
768380cf5baSAndy Lutomirski 	 * This check is redundant with mnt_may_suid() but is kept to make
769380cf5baSAndy Lutomirski 	 * explicit that capability bits are limited to s_user_ns and its
770380cf5baSAndy Lutomirski 	 * descendants.
771380cf5baSAndy Lutomirski 	 */
77256305aa9SEric W. Biederman 	if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
773d07b846fSSeth Forshee 		return 0;
774b5376771SSerge E. Hallyn 
77539f60c1cSChristian Brauner 	rc = get_vfs_caps_from_disk(file_mnt_idmap(file),
77671bc356fSChristian Brauner 				    file->f_path.dentry, &vcaps);
777c0b00441SEric Paris 	if (rc < 0) {
778c0b00441SEric Paris 		if (rc == -EINVAL)
7798db6c34fSSerge E. Hallyn 			printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
7808db6c34fSSerge E. Hallyn 					bprm->filename);
781c0b00441SEric Paris 		else if (rc == -ENODATA)
782b5376771SSerge E. Hallyn 			rc = 0;
783b5376771SSerge E. Hallyn 		goto out;
784b5376771SSerge E. Hallyn 	}
785b5376771SSerge E. Hallyn 
786fc7eadf7SRichard Guy Briggs 	rc = bprm_caps_from_vfs_caps(&vcaps, bprm, effective, has_fcap);
787b5376771SSerge E. Hallyn 
788b5376771SSerge E. Hallyn out:
789b5376771SSerge E. Hallyn 	if (rc)
790ee67ae7eSKees Cook 		cap_clear(bprm->cred->cap_permitted);
791b5376771SSerge E. Hallyn 
792b5376771SSerge E. Hallyn 	return rc;
793b5376771SSerge E. Hallyn }
794b5376771SSerge E. Hallyn 
root_privileged(void)7959304b46cSRichard Guy Briggs static inline bool root_privileged(void) { return !issecure(SECURE_NOROOT); }
7969304b46cSRichard Guy Briggs 
__is_real(kuid_t uid,struct cred * cred)79781a6a012SRichard Guy Briggs static inline bool __is_real(kuid_t uid, struct cred *cred)
79881a6a012SRichard Guy Briggs { return uid_eq(cred->uid, uid); }
79981a6a012SRichard Guy Briggs 
__is_eff(kuid_t uid,struct cred * cred)80081a6a012SRichard Guy Briggs static inline bool __is_eff(kuid_t uid, struct cred *cred)
80181a6a012SRichard Guy Briggs { return uid_eq(cred->euid, uid); }
80281a6a012SRichard Guy Briggs 
__is_suid(kuid_t uid,struct cred * cred)80381a6a012SRichard Guy Briggs static inline bool __is_suid(kuid_t uid, struct cred *cred)
80481a6a012SRichard Guy Briggs { return !__is_real(uid, cred) && __is_eff(uid, cred); }
80581a6a012SRichard Guy Briggs 
806db1a8922SRichard Guy Briggs /*
807db1a8922SRichard Guy Briggs  * handle_privileged_root - Handle case of privileged root
808db1a8922SRichard Guy Briggs  * @bprm: The execution parameters, including the proposed creds
809db1a8922SRichard Guy Briggs  * @has_fcap: Are any file capabilities set?
810db1a8922SRichard Guy Briggs  * @effective: Do we have effective root privilege?
811db1a8922SRichard Guy Briggs  * @root_uid: This namespace' root UID WRT initial USER namespace
812db1a8922SRichard Guy Briggs  *
813db1a8922SRichard Guy Briggs  * Handle the case where root is privileged and hasn't been neutered by
814db1a8922SRichard Guy Briggs  * SECURE_NOROOT.  If file capabilities are set, they won't be combined with
815db1a8922SRichard Guy Briggs  * set UID root and nothing is changed.  If we are root, cap_permitted is
816db1a8922SRichard Guy Briggs  * updated.  If we have become set UID root, the effective bit is set.
817db1a8922SRichard Guy Briggs  */
handle_privileged_root(struct linux_binprm * bprm,bool has_fcap,bool * effective,kuid_t root_uid)818fc7eadf7SRichard Guy Briggs static void handle_privileged_root(struct linux_binprm *bprm, bool has_fcap,
819db1a8922SRichard Guy Briggs 				   bool *effective, kuid_t root_uid)
820db1a8922SRichard Guy Briggs {
821db1a8922SRichard Guy Briggs 	const struct cred *old = current_cred();
822db1a8922SRichard Guy Briggs 	struct cred *new = bprm->cred;
823db1a8922SRichard Guy Briggs 
8249304b46cSRichard Guy Briggs 	if (!root_privileged())
825db1a8922SRichard Guy Briggs 		return;
826db1a8922SRichard Guy Briggs 	/*
827db1a8922SRichard Guy Briggs 	 * If the legacy file capability is set, then don't set privs
828db1a8922SRichard Guy Briggs 	 * for a setuid root binary run by a non-root user.  Do set it
829db1a8922SRichard Guy Briggs 	 * for a root user just to cause least surprise to an admin.
830db1a8922SRichard Guy Briggs 	 */
83181a6a012SRichard Guy Briggs 	if (has_fcap && __is_suid(root_uid, new)) {
832db1a8922SRichard Guy Briggs 		warn_setuid_and_fcaps_mixed(bprm->filename);
833db1a8922SRichard Guy Briggs 		return;
834db1a8922SRichard Guy Briggs 	}
835db1a8922SRichard Guy Briggs 	/*
836db1a8922SRichard Guy Briggs 	 * To support inheritance of root-permissions and suid-root
837db1a8922SRichard Guy Briggs 	 * executables under compatibility mode, we override the
838db1a8922SRichard Guy Briggs 	 * capability sets for the file.
839db1a8922SRichard Guy Briggs 	 */
84081a6a012SRichard Guy Briggs 	if (__is_eff(root_uid, new) || __is_real(root_uid, new)) {
841db1a8922SRichard Guy Briggs 		/* pP' = (cap_bset & ~0) | (pI & ~0) */
842db1a8922SRichard Guy Briggs 		new->cap_permitted = cap_combine(old->cap_bset,
843db1a8922SRichard Guy Briggs 						 old->cap_inheritable);
844db1a8922SRichard Guy Briggs 	}
845db1a8922SRichard Guy Briggs 	/*
846db1a8922SRichard Guy Briggs 	 * If only the real uid is 0, we do not set the effective bit.
847db1a8922SRichard Guy Briggs 	 */
84881a6a012SRichard Guy Briggs 	if (__is_eff(root_uid, new))
849db1a8922SRichard Guy Briggs 		*effective = true;
850db1a8922SRichard Guy Briggs }
851db1a8922SRichard Guy Briggs 
8524c7e715fSRichard Guy Briggs #define __cap_gained(field, target, source) \
8534c7e715fSRichard Guy Briggs 	!cap_issubset(target->cap_##field, source->cap_##field)
8544c7e715fSRichard Guy Briggs #define __cap_grew(target, source, cred) \
8554c7e715fSRichard Guy Briggs 	!cap_issubset(cred->cap_##target, cred->cap_##source)
8564c7e715fSRichard Guy Briggs #define __cap_full(field, cred) \
8574c7e715fSRichard Guy Briggs 	cap_issubset(CAP_FULL_SET, cred->cap_##field)
85881a6a012SRichard Guy Briggs 
__is_setuid(struct cred * new,const struct cred * old)85981a6a012SRichard Guy Briggs static inline bool __is_setuid(struct cred *new, const struct cred *old)
86081a6a012SRichard Guy Briggs { return !uid_eq(new->euid, old->uid); }
86181a6a012SRichard Guy Briggs 
__is_setgid(struct cred * new,const struct cred * old)86281a6a012SRichard Guy Briggs static inline bool __is_setgid(struct cred *new, const struct cred *old)
86381a6a012SRichard Guy Briggs { return !gid_eq(new->egid, old->gid); }
86481a6a012SRichard Guy Briggs 
8659fbc2c79SRichard Guy Briggs /*
866dbbbe110SRichard Guy Briggs  * 1) Audit candidate if current->cap_effective is set
8679fbc2c79SRichard Guy Briggs  *
8689fbc2c79SRichard Guy Briggs  * We do not bother to audit if 3 things are true:
8699fbc2c79SRichard Guy Briggs  *   1) cap_effective has all caps
870588fb2c7SRichard Guy Briggs  *   2) we became root *OR* are were already root
8719fbc2c79SRichard Guy Briggs  *   3) root is supposed to have all caps (SECURE_NOROOT)
8729fbc2c79SRichard Guy Briggs  * Since this is just a normal root execing a process.
8739fbc2c79SRichard Guy Briggs  *
8749fbc2c79SRichard Guy Briggs  * Number 1 above might fail if you don't have a full bset, but I think
8759fbc2c79SRichard Guy Briggs  * that is interesting information to audit.
876dbbbe110SRichard Guy Briggs  *
877dbbbe110SRichard Guy Briggs  * A number of other conditions require logging:
878dbbbe110SRichard Guy Briggs  * 2) something prevented setuid root getting all caps
879dbbbe110SRichard Guy Briggs  * 3) non-setuid root gets fcaps
880dbbbe110SRichard Guy Briggs  * 4) non-setuid root gets ambient
8819fbc2c79SRichard Guy Briggs  */
nonroot_raised_pE(struct cred * new,const struct cred * old,kuid_t root,bool has_fcap)882dbbbe110SRichard Guy Briggs static inline bool nonroot_raised_pE(struct cred *new, const struct cred *old,
883dbbbe110SRichard Guy Briggs 				     kuid_t root, bool has_fcap)
8849fbc2c79SRichard Guy Briggs {
8859fbc2c79SRichard Guy Briggs 	bool ret = false;
8869fbc2c79SRichard Guy Briggs 
887dbbbe110SRichard Guy Briggs 	if ((__cap_grew(effective, ambient, new) &&
888dbbbe110SRichard Guy Briggs 	     !(__cap_full(effective, new) &&
889dbbbe110SRichard Guy Briggs 	       (__is_eff(root, new) || __is_real(root, new)) &&
890dbbbe110SRichard Guy Briggs 	       root_privileged())) ||
891dbbbe110SRichard Guy Briggs 	    (root_privileged() &&
892dbbbe110SRichard Guy Briggs 	     __is_suid(root, new) &&
893dbbbe110SRichard Guy Briggs 	     !__cap_full(effective, new)) ||
894dbbbe110SRichard Guy Briggs 	    (!__is_setuid(new, old) &&
895dbbbe110SRichard Guy Briggs 	     ((has_fcap &&
896dbbbe110SRichard Guy Briggs 	       __cap_gained(permitted, new, old)) ||
897dbbbe110SRichard Guy Briggs 	      __cap_gained(ambient, new, old))))
898dbbbe110SRichard Guy Briggs 
8999fbc2c79SRichard Guy Briggs 		ret = true;
900dbbbe110SRichard Guy Briggs 
9019fbc2c79SRichard Guy Briggs 	return ret;
9029fbc2c79SRichard Guy Briggs }
9039fbc2c79SRichard Guy Briggs 
9041d045980SDavid Howells /**
90556305aa9SEric W. Biederman  * cap_bprm_creds_from_file - Set up the proposed credentials for execve().
9061d045980SDavid Howells  * @bprm: The execution parameters, including the proposed creds
90756305aa9SEric W. Biederman  * @file: The file to pull the credentials from
9081d045980SDavid Howells  *
9091d045980SDavid Howells  * Set up the proposed credentials for a new execution context being
9101d045980SDavid Howells  * constructed by execve().  The proposed creds in @bprm->cred is altered,
911049ae601SRandy Dunlap  * which won't take effect immediately.
912049ae601SRandy Dunlap  *
913049ae601SRandy Dunlap  * Return: 0 if successful, -ve on error.
914a6f76f23SDavid Howells  */
cap_bprm_creds_from_file(struct linux_binprm * bprm,const struct file * file)9154a00c673SKhadija Kamran int cap_bprm_creds_from_file(struct linux_binprm *bprm, const struct file *file)
916b5376771SSerge E. Hallyn {
91756305aa9SEric W. Biederman 	/* Process setpcap binaries and capabilities for uid 0 */
918a6f76f23SDavid Howells 	const struct cred *old = current_cred();
919a6f76f23SDavid Howells 	struct cred *new = bprm->cred;
920fc7eadf7SRichard Guy Briggs 	bool effective = false, has_fcap = false, is_setid;
921b5376771SSerge E. Hallyn 	int ret;
92218815a18SEric W. Biederman 	kuid_t root_uid;
923b5376771SSerge E. Hallyn 
92458319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(old)))
92558319057SAndy Lutomirski 		return -EPERM;
92658319057SAndy Lutomirski 
92756305aa9SEric W. Biederman 	ret = get_file_caps(bprm, file, &effective, &has_fcap);
928a6f76f23SDavid Howells 	if (ret < 0)
929a6f76f23SDavid Howells 		return ret;
9301da177e4SLinus Torvalds 
93118815a18SEric W. Biederman 	root_uid = make_kuid(new->user_ns, 0);
93218815a18SEric W. Biederman 
933fc7eadf7SRichard Guy Briggs 	handle_privileged_root(bprm, has_fcap, &effective, root_uid);
934b5376771SSerge E. Hallyn 
935d52fc5ddSEric Paris 	/* if we have fs caps, clear dangerous personality flags */
9364c7e715fSRichard Guy Briggs 	if (__cap_gained(permitted, new, old))
93756305aa9SEric W. Biederman 		bprm->per_clear |= PER_CLEAR_ON_SETID;
938d52fc5ddSEric Paris 
939a6f76f23SDavid Howells 	/* Don't let someone trace a set[ug]id/setpcap binary with the revised
940259e5e6cSAndy Lutomirski 	 * credentials unless they have the appropriate permit.
941259e5e6cSAndy Lutomirski 	 *
942259e5e6cSAndy Lutomirski 	 * In addition, if NO_NEW_PRIVS, then ensure we get no new privs.
943a6f76f23SDavid Howells 	 */
94481a6a012SRichard Guy Briggs 	is_setid = __is_setuid(new, old) || __is_setgid(new, old);
94558319057SAndy Lutomirski 
9464c7e715fSRichard Guy Briggs 	if ((is_setid || __cap_gained(permitted, new, old)) &&
9479227dd2aSEric W. Biederman 	    ((bprm->unsafe & ~LSM_UNSAFE_PTRACE) ||
94820523132SEric W. Biederman 	     !ptracer_capable(current, new->user_ns))) {
949a6f76f23SDavid Howells 		/* downgrade; they get no more than they had, and maybe less */
95070169420SEric W. Biederman 		if (!ns_capable(new->user_ns, CAP_SETUID) ||
951259e5e6cSAndy Lutomirski 		    (bprm->unsafe & LSM_UNSAFE_NO_NEW_PRIVS)) {
952a6f76f23SDavid Howells 			new->euid = new->uid;
953a6f76f23SDavid Howells 			new->egid = new->gid;
9541da177e4SLinus Torvalds 		}
955a6f76f23SDavid Howells 		new->cap_permitted = cap_intersect(new->cap_permitted,
956a6f76f23SDavid Howells 						   old->cap_permitted);
9571da177e4SLinus Torvalds 	}
9581da177e4SLinus Torvalds 
959a6f76f23SDavid Howells 	new->suid = new->fsuid = new->euid;
960a6f76f23SDavid Howells 	new->sgid = new->fsgid = new->egid;
9611da177e4SLinus Torvalds 
96258319057SAndy Lutomirski 	/* File caps or setid cancels ambient. */
963fc7eadf7SRichard Guy Briggs 	if (has_fcap || is_setid)
96458319057SAndy Lutomirski 		cap_clear(new->cap_ambient);
96558319057SAndy Lutomirski 
96658319057SAndy Lutomirski 	/*
96758319057SAndy Lutomirski 	 * Now that we've computed pA', update pP' to give:
96858319057SAndy Lutomirski 	 *   pP' = (X & fP) | (pI & fI) | pA'
96958319057SAndy Lutomirski 	 */
97058319057SAndy Lutomirski 	new->cap_permitted = cap_combine(new->cap_permitted, new->cap_ambient);
97158319057SAndy Lutomirski 
97258319057SAndy Lutomirski 	/*
97358319057SAndy Lutomirski 	 * Set pE' = (fE ? pP' : pA').  Because pA' is zero if fE is set,
97458319057SAndy Lutomirski 	 * this is the same as pE' = (fE ? pP' : 0) | pA'.
97558319057SAndy Lutomirski 	 */
976a6f76f23SDavid Howells 	if (effective)
977a6f76f23SDavid Howells 		new->cap_effective = new->cap_permitted;
978e338d263SAndrew Morgan 	else
97958319057SAndy Lutomirski 		new->cap_effective = new->cap_ambient;
98058319057SAndy Lutomirski 
98158319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(new)))
98258319057SAndy Lutomirski 		return -EPERM;
98358319057SAndy Lutomirski 
984dbbbe110SRichard Guy Briggs 	if (nonroot_raised_pE(new, old, root_uid, has_fcap)) {
985a6f76f23SDavid Howells 		ret = audit_log_bprm_fcaps(bprm, new, old);
986a6f76f23SDavid Howells 		if (ret < 0)
987a6f76f23SDavid Howells 			return ret;
988a6f76f23SDavid Howells 	}
9891da177e4SLinus Torvalds 
990d84f4f99SDavid Howells 	new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
99158319057SAndy Lutomirski 
99258319057SAndy Lutomirski 	if (WARN_ON(!cap_ambient_invariant_ok(new)))
99358319057SAndy Lutomirski 		return -EPERM;
99458319057SAndy Lutomirski 
99546d98eb4SKees Cook 	/* Check for privilege-elevated exec. */
99602ebbaf4SRichard Guy Briggs 	if (is_setid ||
99702ebbaf4SRichard Guy Briggs 	    (!__is_real(root_uid, new) &&
99802ebbaf4SRichard Guy Briggs 	     (effective ||
99902ebbaf4SRichard Guy Briggs 	      __cap_grew(permitted, ambient, new))))
100056305aa9SEric W. Biederman 		bprm->secureexec = 1;
100146d98eb4SKees Cook 
1002a6f76f23SDavid Howells 	return 0;
10031da177e4SLinus Torvalds }
10041da177e4SLinus Torvalds 
10051d045980SDavid Howells /**
10061d045980SDavid Howells  * cap_inode_setxattr - Determine whether an xattr may be altered
10071d045980SDavid Howells  * @dentry: The inode/dentry being altered
10081d045980SDavid Howells  * @name: The name of the xattr to be changed
10091d045980SDavid Howells  * @value: The value that the xattr will be changed to
10101d045980SDavid Howells  * @size: The size of value
10111d045980SDavid Howells  * @flags: The replacement flag
10121d045980SDavid Howells  *
10131d045980SDavid Howells  * Determine whether an xattr may be altered or set on an inode, returning 0 if
10141d045980SDavid Howells  * permission is granted, -ve if denied.
10151d045980SDavid Howells  *
10161d045980SDavid Howells  * This is used to make sure security xattrs don't get updated or set by those
10171d045980SDavid Howells  * who aren't privileged to do so.
10181d045980SDavid Howells  */
cap_inode_setxattr(struct dentry * dentry,const char * name,const void * value,size_t size,int flags)10198f0cfa52SDavid Howells int cap_inode_setxattr(struct dentry *dentry, const char *name,
10208f0cfa52SDavid Howells 		       const void *value, size_t size, int flags)
10211da177e4SLinus Torvalds {
1022b1d749c5SEric W. Biederman 	struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1023b1d749c5SEric W. Biederman 
10248db6c34fSSerge E. Hallyn 	/* Ignore non-security xattrs */
10258db6c34fSSerge E. Hallyn 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1026c5eaab1dSCarmeli Tamir 			XATTR_SECURITY_PREFIX_LEN) != 0)
1027b5376771SSerge E. Hallyn 		return 0;
10281d045980SDavid Howells 
10298db6c34fSSerge E. Hallyn 	/*
10308db6c34fSSerge E. Hallyn 	 * For XATTR_NAME_CAPS the check will be done in
10318db6c34fSSerge E. Hallyn 	 * cap_convert_nscap(), called by setxattr()
10328db6c34fSSerge E. Hallyn 	 */
10338db6c34fSSerge E. Hallyn 	if (strcmp(name, XATTR_NAME_CAPS) == 0)
10348db6c34fSSerge E. Hallyn 		return 0;
10358db6c34fSSerge E. Hallyn 
1036b1d749c5SEric W. Biederman 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
10371da177e4SLinus Torvalds 		return -EPERM;
10381da177e4SLinus Torvalds 	return 0;
10391da177e4SLinus Torvalds }
10401da177e4SLinus Torvalds 
10411d045980SDavid Howells /**
10421d045980SDavid Howells  * cap_inode_removexattr - Determine whether an xattr may be removed
104371bc356fSChristian Brauner  *
104439f60c1cSChristian Brauner  * @idmap:	idmap of the mount the inode was found from
10451d045980SDavid Howells  * @dentry:	The inode/dentry being altered
10461d045980SDavid Howells  * @name:	The name of the xattr to be changed
10471d045980SDavid Howells  *
10481d045980SDavid Howells  * Determine whether an xattr may be removed from an inode, returning 0 if
10491d045980SDavid Howells  * permission is granted, -ve if denied.
10501d045980SDavid Howells  *
105139f60c1cSChristian Brauner  * If the inode has been found through an idmapped mount the idmap of
105239f60c1cSChristian Brauner  * the vfsmount must be passed through @idmap. This function will then
105339f60c1cSChristian Brauner  * take care to map the inode according to @idmap before checking
105471bc356fSChristian Brauner  * permissions. On non-idmapped mounts or if permission checking is to be
105539f60c1cSChristian Brauner  * performed on the raw inode simply pass @nop_mnt_idmap.
105671bc356fSChristian Brauner  *
10571d045980SDavid Howells  * This is used to make sure security xattrs don't get removed by those who
10581d045980SDavid Howells  * aren't privileged to remove them.
10591d045980SDavid Howells  */
cap_inode_removexattr(struct mnt_idmap * idmap,struct dentry * dentry,const char * name)106039f60c1cSChristian Brauner int cap_inode_removexattr(struct mnt_idmap *idmap,
106171bc356fSChristian Brauner 			  struct dentry *dentry, const char *name)
10621da177e4SLinus Torvalds {
1063b1d749c5SEric W. Biederman 	struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
1064b1d749c5SEric W. Biederman 
10658db6c34fSSerge E. Hallyn 	/* Ignore non-security xattrs */
10668db6c34fSSerge E. Hallyn 	if (strncmp(name, XATTR_SECURITY_PREFIX,
1067c5eaab1dSCarmeli Tamir 			XATTR_SECURITY_PREFIX_LEN) != 0)
10688db6c34fSSerge E. Hallyn 		return 0;
10698db6c34fSSerge E. Hallyn 
10708db6c34fSSerge E. Hallyn 	if (strcmp(name, XATTR_NAME_CAPS) == 0) {
10718db6c34fSSerge E. Hallyn 		/* security.capability gets namespaced */
10728db6c34fSSerge E. Hallyn 		struct inode *inode = d_backing_inode(dentry);
10738db6c34fSSerge E. Hallyn 		if (!inode)
10748db6c34fSSerge E. Hallyn 			return -EINVAL;
10759452e93eSChristian Brauner 		if (!capable_wrt_inode_uidgid(idmap, inode, CAP_SETFCAP))
1076b5376771SSerge E. Hallyn 			return -EPERM;
1077b5376771SSerge E. Hallyn 		return 0;
10781d045980SDavid Howells 	}
10791d045980SDavid Howells 
1080b1d749c5SEric W. Biederman 	if (!ns_capable(user_ns, CAP_SYS_ADMIN))
10811da177e4SLinus Torvalds 		return -EPERM;
10821da177e4SLinus Torvalds 	return 0;
10831da177e4SLinus Torvalds }
10841da177e4SLinus Torvalds 
10851da177e4SLinus Torvalds /*
10861da177e4SLinus Torvalds  * cap_emulate_setxuid() fixes the effective / permitted capabilities of
10871da177e4SLinus Torvalds  * a process after a call to setuid, setreuid, or setresuid.
10881da177e4SLinus Torvalds  *
10891da177e4SLinus Torvalds  *  1) When set*uiding _from_ one of {r,e,s}uid == 0 _to_ all of
10901da177e4SLinus Torvalds  *  {r,e,s}uid != 0, the permitted and effective capabilities are
10911da177e4SLinus Torvalds  *  cleared.
10921da177e4SLinus Torvalds  *
10931da177e4SLinus Torvalds  *  2) When set*uiding _from_ euid == 0 _to_ euid != 0, the effective
10941da177e4SLinus Torvalds  *  capabilities of the process are cleared.
10951da177e4SLinus Torvalds  *
10961da177e4SLinus Torvalds  *  3) When set*uiding _from_ euid != 0 _to_ euid == 0, the effective
10971da177e4SLinus Torvalds  *  capabilities are set to the permitted capabilities.
10981da177e4SLinus Torvalds  *
10991da177e4SLinus Torvalds  *  fsuid is handled elsewhere. fsuid == 0 and {r,e,s}uid!= 0 should
11001da177e4SLinus Torvalds  *  never happen.
11011da177e4SLinus Torvalds  *
11021da177e4SLinus Torvalds  *  -astor
11031da177e4SLinus Torvalds  *
11041da177e4SLinus Torvalds  * cevans - New behaviour, Oct '99
11051da177e4SLinus Torvalds  * A process may, via prctl(), elect to keep its capabilities when it
11061da177e4SLinus Torvalds  * calls setuid() and switches away from uid==0. Both permitted and
11071da177e4SLinus Torvalds  * effective sets will be retained.
11081da177e4SLinus Torvalds  * Without this change, it was impossible for a daemon to drop only some
11091da177e4SLinus Torvalds  * of its privilege. The call to setuid(!=0) would drop all privileges!
11101da177e4SLinus Torvalds  * Keeping uid 0 is not an option because uid 0 owns too many vital
11111da177e4SLinus Torvalds  * files..
11121da177e4SLinus Torvalds  * Thanks to Olaf Kirch and Peter Benie for spotting this.
11131da177e4SLinus Torvalds  */
cap_emulate_setxuid(struct cred * new,const struct cred * old)1114d84f4f99SDavid Howells static inline void cap_emulate_setxuid(struct cred *new, const struct cred *old)
11151da177e4SLinus Torvalds {
111618815a18SEric W. Biederman 	kuid_t root_uid = make_kuid(old->user_ns, 0);
111718815a18SEric W. Biederman 
111818815a18SEric W. Biederman 	if ((uid_eq(old->uid, root_uid) ||
111918815a18SEric W. Biederman 	     uid_eq(old->euid, root_uid) ||
112018815a18SEric W. Biederman 	     uid_eq(old->suid, root_uid)) &&
112118815a18SEric W. Biederman 	    (!uid_eq(new->uid, root_uid) &&
112218815a18SEric W. Biederman 	     !uid_eq(new->euid, root_uid) &&
112358319057SAndy Lutomirski 	     !uid_eq(new->suid, root_uid))) {
112458319057SAndy Lutomirski 		if (!issecure(SECURE_KEEP_CAPS)) {
1125d84f4f99SDavid Howells 			cap_clear(new->cap_permitted);
1126d84f4f99SDavid Howells 			cap_clear(new->cap_effective);
11271da177e4SLinus Torvalds 		}
112858319057SAndy Lutomirski 
112958319057SAndy Lutomirski 		/*
113058319057SAndy Lutomirski 		 * Pre-ambient programs expect setresuid to nonroot followed
113158319057SAndy Lutomirski 		 * by exec to drop capabilities.  We should make sure that
113258319057SAndy Lutomirski 		 * this remains the case.
113358319057SAndy Lutomirski 		 */
113458319057SAndy Lutomirski 		cap_clear(new->cap_ambient);
113558319057SAndy Lutomirski 	}
113618815a18SEric W. Biederman 	if (uid_eq(old->euid, root_uid) && !uid_eq(new->euid, root_uid))
1137d84f4f99SDavid Howells 		cap_clear(new->cap_effective);
113818815a18SEric W. Biederman 	if (!uid_eq(old->euid, root_uid) && uid_eq(new->euid, root_uid))
1139d84f4f99SDavid Howells 		new->cap_effective = new->cap_permitted;
11401da177e4SLinus Torvalds }
11411da177e4SLinus Torvalds 
11421d045980SDavid Howells /**
11431d045980SDavid Howells  * cap_task_fix_setuid - Fix up the results of setuid() call
11441d045980SDavid Howells  * @new: The proposed credentials
11451d045980SDavid Howells  * @old: The current task's current credentials
11461d045980SDavid Howells  * @flags: Indications of what has changed
11471d045980SDavid Howells  *
11481d045980SDavid Howells  * Fix up the results of setuid() call before the credential changes are
1149049ae601SRandy Dunlap  * actually applied.
1150049ae601SRandy Dunlap  *
1151049ae601SRandy Dunlap  * Return: 0 to grant the changes, -ve to deny them.
11521d045980SDavid Howells  */
cap_task_fix_setuid(struct cred * new,const struct cred * old,int flags)1153d84f4f99SDavid Howells int cap_task_fix_setuid(struct cred *new, const struct cred *old, int flags)
11541da177e4SLinus Torvalds {
11551da177e4SLinus Torvalds 	switch (flags) {
11561da177e4SLinus Torvalds 	case LSM_SETID_RE:
11571da177e4SLinus Torvalds 	case LSM_SETID_ID:
11581da177e4SLinus Torvalds 	case LSM_SETID_RES:
11591d045980SDavid Howells 		/* juggle the capabilities to follow [RES]UID changes unless
11601d045980SDavid Howells 		 * otherwise suppressed */
1161d84f4f99SDavid Howells 		if (!issecure(SECURE_NO_SETUID_FIXUP))
1162d84f4f99SDavid Howells 			cap_emulate_setxuid(new, old);
11631da177e4SLinus Torvalds 		break;
11641da177e4SLinus Torvalds 
11651d045980SDavid Howells 	case LSM_SETID_FS:
11664432b507SPaul Moore 		/* juggle the capabilities to follow FSUID changes, unless
11671d045980SDavid Howells 		 * otherwise suppressed
11681d045980SDavid Howells 		 *
11691da177e4SLinus Torvalds 		 * FIXME - is fsuser used for all CAP_FS_MASK capabilities?
11701da177e4SLinus Torvalds 		 *          if not, we might be a bit too harsh here.
11711da177e4SLinus Torvalds 		 */
11721da177e4SLinus Torvalds 		if (!issecure(SECURE_NO_SETUID_FIXUP)) {
117318815a18SEric W. Biederman 			kuid_t root_uid = make_kuid(old->user_ns, 0);
117418815a18SEric W. Biederman 			if (uid_eq(old->fsuid, root_uid) && !uid_eq(new->fsuid, root_uid))
1175d84f4f99SDavid Howells 				new->cap_effective =
1176d84f4f99SDavid Howells 					cap_drop_fs_set(new->cap_effective);
11771d045980SDavid Howells 
117818815a18SEric W. Biederman 			if (!uid_eq(old->fsuid, root_uid) && uid_eq(new->fsuid, root_uid))
1179d84f4f99SDavid Howells 				new->cap_effective =
1180d84f4f99SDavid Howells 					cap_raise_fs_set(new->cap_effective,
1181d84f4f99SDavid Howells 							 new->cap_permitted);
11821da177e4SLinus Torvalds 		}
11831da177e4SLinus Torvalds 		break;
11841d045980SDavid Howells 
11851da177e4SLinus Torvalds 	default:
11861da177e4SLinus Torvalds 		return -EINVAL;
11871da177e4SLinus Torvalds 	}
11881da177e4SLinus Torvalds 
11891da177e4SLinus Torvalds 	return 0;
11901da177e4SLinus Torvalds }
11911da177e4SLinus Torvalds 
1192b5376771SSerge E. Hallyn /*
1193b5376771SSerge E. Hallyn  * Rationale: code calling task_setscheduler, task_setioprio, and
1194b5376771SSerge E. Hallyn  * task_setnice, assumes that
1195b5376771SSerge E. Hallyn  *   . if capable(cap_sys_nice), then those actions should be allowed
1196b5376771SSerge E. Hallyn  *   . if not capable(cap_sys_nice), but acting on your own processes,
1197b5376771SSerge E. Hallyn  *   	then those actions should be allowed
1198b5376771SSerge E. Hallyn  * This is insufficient now since you can call code without suid, but
1199b5376771SSerge E. Hallyn  * yet with increased caps.
1200b5376771SSerge E. Hallyn  * So we check for increased caps on the target process.
1201b5376771SSerge E. Hallyn  */
cap_safe_nice(struct task_struct * p)1202de45e806SSerge E. Hallyn static int cap_safe_nice(struct task_struct *p)
1203b5376771SSerge E. Hallyn {
1204f54fb863SSerge Hallyn 	int is_subset, ret = 0;
1205c69e8d9cSDavid Howells 
1206c69e8d9cSDavid Howells 	rcu_read_lock();
1207c69e8d9cSDavid Howells 	is_subset = cap_issubset(__task_cred(p)->cap_permitted,
1208c69e8d9cSDavid Howells 				 current_cred()->cap_permitted);
1209f54fb863SSerge Hallyn 	if (!is_subset && !ns_capable(__task_cred(p)->user_ns, CAP_SYS_NICE))
1210f54fb863SSerge Hallyn 		ret = -EPERM;
1211c69e8d9cSDavid Howells 	rcu_read_unlock();
1212c69e8d9cSDavid Howells 
1213f54fb863SSerge Hallyn 	return ret;
1214b5376771SSerge E. Hallyn }
1215b5376771SSerge E. Hallyn 
12161d045980SDavid Howells /**
12174432b507SPaul Moore  * cap_task_setscheduler - Determine if scheduler policy change is permitted
12181d045980SDavid Howells  * @p: The task to affect
12191d045980SDavid Howells  *
12204432b507SPaul Moore  * Determine if the requested scheduler policy change is permitted for the
1221049ae601SRandy Dunlap  * specified task.
1222049ae601SRandy Dunlap  *
1223049ae601SRandy Dunlap  * Return: 0 if permission is granted, -ve if denied.
12241d045980SDavid Howells  */
cap_task_setscheduler(struct task_struct * p)1225b0ae1981SKOSAKI Motohiro int cap_task_setscheduler(struct task_struct *p)
1226b5376771SSerge E. Hallyn {
1227b5376771SSerge E. Hallyn 	return cap_safe_nice(p);
1228b5376771SSerge E. Hallyn }
1229b5376771SSerge E. Hallyn 
12301d045980SDavid Howells /**
12314432b507SPaul Moore  * cap_task_setioprio - Determine if I/O priority change is permitted
12321d045980SDavid Howells  * @p: The task to affect
12331d045980SDavid Howells  * @ioprio: The I/O priority to set
12341d045980SDavid Howells  *
12354432b507SPaul Moore  * Determine if the requested I/O priority change is permitted for the specified
1236049ae601SRandy Dunlap  * task.
1237049ae601SRandy Dunlap  *
1238049ae601SRandy Dunlap  * Return: 0 if permission is granted, -ve if denied.
12391d045980SDavid Howells  */
cap_task_setioprio(struct task_struct * p,int ioprio)1240b5376771SSerge E. Hallyn int cap_task_setioprio(struct task_struct *p, int ioprio)
1241b5376771SSerge E. Hallyn {
1242b5376771SSerge E. Hallyn 	return cap_safe_nice(p);
1243b5376771SSerge E. Hallyn }
1244b5376771SSerge E. Hallyn 
12451d045980SDavid Howells /**
12464432b507SPaul Moore  * cap_task_setnice - Determine if task priority change is permitted
12471d045980SDavid Howells  * @p: The task to affect
12481d045980SDavid Howells  * @nice: The nice value to set
12491d045980SDavid Howells  *
12504432b507SPaul Moore  * Determine if the requested task priority change is permitted for the
1251049ae601SRandy Dunlap  * specified task.
1252049ae601SRandy Dunlap  *
1253049ae601SRandy Dunlap  * Return: 0 if permission is granted, -ve if denied.
12541d045980SDavid Howells  */
cap_task_setnice(struct task_struct * p,int nice)1255b5376771SSerge E. Hallyn int cap_task_setnice(struct task_struct *p, int nice)
1256b5376771SSerge E. Hallyn {
1257b5376771SSerge E. Hallyn 	return cap_safe_nice(p);
1258b5376771SSerge E. Hallyn }
1259b5376771SSerge E. Hallyn 
12603b7391deSSerge E. Hallyn /*
12611d045980SDavid Howells  * Implement PR_CAPBSET_DROP.  Attempt to remove the specified capability from
12621d045980SDavid Howells  * the current task's bounding set.  Returns 0 on success, -ve on error.
12633b7391deSSerge E. Hallyn  */
cap_prctl_drop(unsigned long cap)12646d6f3328STetsuo Handa static int cap_prctl_drop(unsigned long cap)
12653b7391deSSerge E. Hallyn {
12666d6f3328STetsuo Handa 	struct cred *new;
12676d6f3328STetsuo Handa 
1268160da84dSEric W. Biederman 	if (!ns_capable(current_user_ns(), CAP_SETPCAP))
12693b7391deSSerge E. Hallyn 		return -EPERM;
12703b7391deSSerge E. Hallyn 	if (!cap_valid(cap))
12713b7391deSSerge E. Hallyn 		return -EINVAL;
1272d84f4f99SDavid Howells 
12736d6f3328STetsuo Handa 	new = prepare_creds();
12746d6f3328STetsuo Handa 	if (!new)
12756d6f3328STetsuo Handa 		return -ENOMEM;
1276d84f4f99SDavid Howells 	cap_lower(new->cap_bset, cap);
12776d6f3328STetsuo Handa 	return commit_creds(new);
12783b7391deSSerge E. Hallyn }
12793898b1b4SAndrew G. Morgan 
12801d045980SDavid Howells /**
12811d045980SDavid Howells  * cap_task_prctl - Implement process control functions for this security module
12821d045980SDavid Howells  * @option: The process control function requested
1283049ae601SRandy Dunlap  * @arg2: The argument data for this function
1284049ae601SRandy Dunlap  * @arg3: The argument data for this function
1285049ae601SRandy Dunlap  * @arg4: The argument data for this function
1286049ae601SRandy Dunlap  * @arg5: The argument data for this function
12871d045980SDavid Howells  *
12881d045980SDavid Howells  * Allow process control functions (sys_prctl()) to alter capabilities; may
12891d045980SDavid Howells  * also deny access to other functions not otherwise implemented here.
12901d045980SDavid Howells  *
1291049ae601SRandy Dunlap  * Return: 0 or +ve on success, -ENOSYS if this function is not implemented
12921d045980SDavid Howells  * here, other -ve on error.  If -ENOSYS is returned, sys_prctl() and other LSM
12931d045980SDavid Howells  * modules will consider performing the function.
12941d045980SDavid Howells  */
cap_task_prctl(int option,unsigned long arg2,unsigned long arg3,unsigned long arg4,unsigned long arg5)12953898b1b4SAndrew G. Morgan int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3,
1296d84f4f99SDavid Howells 		   unsigned long arg4, unsigned long arg5)
12973898b1b4SAndrew G. Morgan {
12986d6f3328STetsuo Handa 	const struct cred *old = current_cred();
1299d84f4f99SDavid Howells 	struct cred *new;
1300d84f4f99SDavid Howells 
13013898b1b4SAndrew G. Morgan 	switch (option) {
13023898b1b4SAndrew G. Morgan 	case PR_CAPBSET_READ:
1303d84f4f99SDavid Howells 		if (!cap_valid(arg2))
13046d6f3328STetsuo Handa 			return -EINVAL;
13056d6f3328STetsuo Handa 		return !!cap_raised(old->cap_bset, arg2);
1306d84f4f99SDavid Howells 
13073898b1b4SAndrew G. Morgan 	case PR_CAPBSET_DROP:
13086d6f3328STetsuo Handa 		return cap_prctl_drop(arg2);
13093898b1b4SAndrew G. Morgan 
13103898b1b4SAndrew G. Morgan 	/*
13113898b1b4SAndrew G. Morgan 	 * The next four prctl's remain to assist with transitioning a
13123898b1b4SAndrew G. Morgan 	 * system from legacy UID=0 based privilege (when filesystem
13133898b1b4SAndrew G. Morgan 	 * capabilities are not in use) to a system using filesystem
13143898b1b4SAndrew G. Morgan 	 * capabilities only - as the POSIX.1e draft intended.
13153898b1b4SAndrew G. Morgan 	 *
13163898b1b4SAndrew G. Morgan 	 * Note:
13173898b1b4SAndrew G. Morgan 	 *
13183898b1b4SAndrew G. Morgan 	 *  PR_SET_SECUREBITS =
13193898b1b4SAndrew G. Morgan 	 *      issecure_mask(SECURE_KEEP_CAPS_LOCKED)
13203898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NOROOT)
13213898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NOROOT_LOCKED)
13223898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NO_SETUID_FIXUP)
13233898b1b4SAndrew G. Morgan 	 *    | issecure_mask(SECURE_NO_SETUID_FIXUP_LOCKED)
13243898b1b4SAndrew G. Morgan 	 *
13253898b1b4SAndrew G. Morgan 	 * will ensure that the current process and all of its
13263898b1b4SAndrew G. Morgan 	 * children will be locked into a pure
13273898b1b4SAndrew G. Morgan 	 * capability-based-privilege environment.
13283898b1b4SAndrew G. Morgan 	 */
13293898b1b4SAndrew G. Morgan 	case PR_SET_SECUREBITS:
13306d6f3328STetsuo Handa 		if ((((old->securebits & SECURE_ALL_LOCKS) >> 1)
13316d6f3328STetsuo Handa 		     & (old->securebits ^ arg2))			/*[1]*/
13326d6f3328STetsuo Handa 		    || ((old->securebits & SECURE_ALL_LOCKS & ~arg2))	/*[2]*/
13333898b1b4SAndrew G. Morgan 		    || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS))	/*[3]*/
13343898b1b4SAndrew G. Morgan 			/*
13353898b1b4SAndrew G. Morgan 			 * [1] no changing of bits that are locked
13363898b1b4SAndrew G. Morgan 			 * [2] no unlocking of locks
13373898b1b4SAndrew G. Morgan 			 * [3] no setting of unsupported bits
13383898b1b4SAndrew G. Morgan 			 */
1339d84f4f99SDavid Howells 		    )
1340d84f4f99SDavid Howells 			/* cannot change a locked bit */
13416d6f3328STetsuo Handa 			return -EPERM;
13426d6f3328STetsuo Handa 
1343a0623b2aSMickaël Salaün 		/*
1344a0623b2aSMickaël Salaün 		 * Doing anything requires privilege (go read about the
1345a0623b2aSMickaël Salaün 		 * "sendmail capabilities bug"), except for unprivileged bits.
1346a0623b2aSMickaël Salaün 		 * Indeed, the SECURE_ALL_UNPRIVILEGED bits are not
1347a0623b2aSMickaël Salaün 		 * restrictions enforced by the kernel but by user space on
1348a0623b2aSMickaël Salaün 		 * itself.
1349a0623b2aSMickaël Salaün 		 */
1350a0623b2aSMickaël Salaün 		if (cap_capable(current_cred(), current_cred()->user_ns,
1351a0623b2aSMickaël Salaün 				CAP_SETPCAP, CAP_OPT_NONE) != 0) {
1352a0623b2aSMickaël Salaün 			const unsigned long unpriv_and_locks =
1353a0623b2aSMickaël Salaün 				SECURE_ALL_UNPRIVILEGED |
1354a0623b2aSMickaël Salaün 				SECURE_ALL_UNPRIVILEGED << 1;
1355a0623b2aSMickaël Salaün 			const unsigned long changed = old->securebits ^ arg2;
1356a0623b2aSMickaël Salaün 
1357a0623b2aSMickaël Salaün 			/* For legacy reason, denies non-change. */
1358a0623b2aSMickaël Salaün 			if (!changed)
1359a0623b2aSMickaël Salaün 				return -EPERM;
1360a0623b2aSMickaël Salaün 
1361a0623b2aSMickaël Salaün 			/* Denies privileged changes. */
1362a0623b2aSMickaël Salaün 			if (changed & ~unpriv_and_locks)
1363a0623b2aSMickaël Salaün 				return -EPERM;
1364a0623b2aSMickaël Salaün 		}
1365a0623b2aSMickaël Salaün 
13666d6f3328STetsuo Handa 		new = prepare_creds();
13676d6f3328STetsuo Handa 		if (!new)
13686d6f3328STetsuo Handa 			return -ENOMEM;
1369d84f4f99SDavid Howells 		new->securebits = arg2;
13706d6f3328STetsuo Handa 		return commit_creds(new);
1371d84f4f99SDavid Howells 
13723898b1b4SAndrew G. Morgan 	case PR_GET_SECUREBITS:
13736d6f3328STetsuo Handa 		return old->securebits;
13743898b1b4SAndrew G. Morgan 
13753898b1b4SAndrew G. Morgan 	case PR_GET_KEEPCAPS:
13766d6f3328STetsuo Handa 		return !!issecure(SECURE_KEEP_CAPS);
1377d84f4f99SDavid Howells 
13783898b1b4SAndrew G. Morgan 	case PR_SET_KEEPCAPS:
1379d84f4f99SDavid Howells 		if (arg2 > 1) /* Note, we rely on arg2 being unsigned here */
13806d6f3328STetsuo Handa 			return -EINVAL;
1381d84f4f99SDavid Howells 		if (issecure(SECURE_KEEP_CAPS_LOCKED))
13826d6f3328STetsuo Handa 			return -EPERM;
13836d6f3328STetsuo Handa 
13846d6f3328STetsuo Handa 		new = prepare_creds();
13856d6f3328STetsuo Handa 		if (!new)
13866d6f3328STetsuo Handa 			return -ENOMEM;
1387d84f4f99SDavid Howells 		if (arg2)
1388d84f4f99SDavid Howells 			new->securebits |= issecure_mask(SECURE_KEEP_CAPS);
13893898b1b4SAndrew G. Morgan 		else
1390d84f4f99SDavid Howells 			new->securebits &= ~issecure_mask(SECURE_KEEP_CAPS);
13916d6f3328STetsuo Handa 		return commit_creds(new);
13923898b1b4SAndrew G. Morgan 
139358319057SAndy Lutomirski 	case PR_CAP_AMBIENT:
139458319057SAndy Lutomirski 		if (arg2 == PR_CAP_AMBIENT_CLEAR_ALL) {
139558319057SAndy Lutomirski 			if (arg3 | arg4 | arg5)
139658319057SAndy Lutomirski 				return -EINVAL;
139758319057SAndy Lutomirski 
139858319057SAndy Lutomirski 			new = prepare_creds();
139958319057SAndy Lutomirski 			if (!new)
140058319057SAndy Lutomirski 				return -ENOMEM;
140158319057SAndy Lutomirski 			cap_clear(new->cap_ambient);
140258319057SAndy Lutomirski 			return commit_creds(new);
140358319057SAndy Lutomirski 		}
140458319057SAndy Lutomirski 
140558319057SAndy Lutomirski 		if (((!cap_valid(arg3)) | arg4 | arg5))
140658319057SAndy Lutomirski 			return -EINVAL;
140758319057SAndy Lutomirski 
140858319057SAndy Lutomirski 		if (arg2 == PR_CAP_AMBIENT_IS_SET) {
140958319057SAndy Lutomirski 			return !!cap_raised(current_cred()->cap_ambient, arg3);
141058319057SAndy Lutomirski 		} else if (arg2 != PR_CAP_AMBIENT_RAISE &&
141158319057SAndy Lutomirski 			   arg2 != PR_CAP_AMBIENT_LOWER) {
141258319057SAndy Lutomirski 			return -EINVAL;
141358319057SAndy Lutomirski 		} else {
141458319057SAndy Lutomirski 			if (arg2 == PR_CAP_AMBIENT_RAISE &&
141558319057SAndy Lutomirski 			    (!cap_raised(current_cred()->cap_permitted, arg3) ||
141658319057SAndy Lutomirski 			     !cap_raised(current_cred()->cap_inheritable,
1417746bf6d6SAndy Lutomirski 					 arg3) ||
1418746bf6d6SAndy Lutomirski 			     issecure(SECURE_NO_CAP_AMBIENT_RAISE)))
141958319057SAndy Lutomirski 				return -EPERM;
142058319057SAndy Lutomirski 
142158319057SAndy Lutomirski 			new = prepare_creds();
142258319057SAndy Lutomirski 			if (!new)
142358319057SAndy Lutomirski 				return -ENOMEM;
142458319057SAndy Lutomirski 			if (arg2 == PR_CAP_AMBIENT_RAISE)
142558319057SAndy Lutomirski 				cap_raise(new->cap_ambient, arg3);
142658319057SAndy Lutomirski 			else
142758319057SAndy Lutomirski 				cap_lower(new->cap_ambient, arg3);
142858319057SAndy Lutomirski 			return commit_creds(new);
142958319057SAndy Lutomirski 		}
143058319057SAndy Lutomirski 
14313898b1b4SAndrew G. Morgan 	default:
14323898b1b4SAndrew G. Morgan 		/* No functionality available - continue with default */
14336d6f3328STetsuo Handa 		return -ENOSYS;
14343898b1b4SAndrew G. Morgan 	}
14351da177e4SLinus Torvalds }
14361da177e4SLinus Torvalds 
14371d045980SDavid Howells /**
14381d045980SDavid Howells  * cap_vm_enough_memory - Determine whether a new virtual mapping is permitted
14391d045980SDavid Howells  * @mm: The VM space in which the new mapping is to be made
14401d045980SDavid Howells  * @pages: The size of the mapping
14411d045980SDavid Howells  *
14421d045980SDavid Howells  * Determine whether the allocation of a new virtual mapping by the current
1443049ae601SRandy Dunlap  * task is permitted.
1444049ae601SRandy Dunlap  *
1445be72a575SXu Kuohai  * Return: 0 if permission granted, negative error code if not.
14461d045980SDavid Howells  */
cap_vm_enough_memory(struct mm_struct * mm,long pages)144734b4e4aaSAlan Cox int cap_vm_enough_memory(struct mm_struct *mm, long pages)
14481da177e4SLinus Torvalds {
1449be72a575SXu Kuohai 	return cap_capable(current_cred(), &init_user_ns, CAP_SYS_ADMIN,
1450be72a575SXu Kuohai 			   CAP_OPT_NOAUDIT);
14511da177e4SLinus Torvalds }
14527c73875eSEric Paris 
1453049ae601SRandy Dunlap /**
1454d007794aSAl Viro  * cap_mmap_addr - check if able to map given addr
1455d007794aSAl Viro  * @addr: address attempting to be mapped
1456d007794aSAl Viro  *
1457d007794aSAl Viro  * If the process is attempting to map memory below dac_mmap_min_addr they need
1458d007794aSAl Viro  * CAP_SYS_RAWIO.  The other parameters to this function are unused by the
1459049ae601SRandy Dunlap  * capability security module.
1460049ae601SRandy Dunlap  *
1461049ae601SRandy Dunlap  * Return: 0 if this mapping should be allowed or -EPERM if not.
1462d007794aSAl Viro  */
cap_mmap_addr(unsigned long addr)1463d007794aSAl Viro int cap_mmap_addr(unsigned long addr)
1464d007794aSAl Viro {
1465d007794aSAl Viro 	int ret = 0;
1466d007794aSAl Viro 
1467d007794aSAl Viro 	if (addr < dac_mmap_min_addr) {
1468d007794aSAl Viro 		ret = cap_capable(current_cred(), &init_user_ns, CAP_SYS_RAWIO,
1469c1a85a00SMicah Morton 				  CAP_OPT_NONE);
1470d007794aSAl Viro 		/* set PF_SUPERPRIV if it turns out we allow the low mmap */
1471d007794aSAl Viro 		if (ret == 0)
1472d007794aSAl Viro 			current->flags |= PF_SUPERPRIV;
1473d007794aSAl Viro 	}
1474d007794aSAl Viro 	return ret;
1475d007794aSAl Viro }
1476d007794aSAl Viro 
1477b1d9e6b0SCasey Schaufler #ifdef CONFIG_SECURITY
1478b1d9e6b0SCasey Schaufler 
1479b1a867eeSPaul Moore static const struct lsm_id capability_lsmid = {
1480f3b8788cSCasey Schaufler 	.name = "capability",
1481f3b8788cSCasey Schaufler 	.id = LSM_ID_CAPABILITY,
1482f3b8788cSCasey Schaufler };
1483f3b8788cSCasey Schaufler 
1484f22f9aafSPaul Moore static struct security_hook_list capability_hooks[] __ro_after_init = {
1485b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(capable, cap_capable),
1486b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(settime, cap_settime),
1487b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(ptrace_access_check, cap_ptrace_access_check),
1488b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(ptrace_traceme, cap_ptrace_traceme),
1489b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(capget, cap_capget),
1490b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(capset, cap_capset),
149156305aa9SEric W. Biederman 	LSM_HOOK_INIT(bprm_creds_from_file, cap_bprm_creds_from_file),
1492b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(inode_need_killpriv, cap_inode_need_killpriv),
1493b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(inode_killpriv, cap_inode_killpriv),
14948db6c34fSSerge E. Hallyn 	LSM_HOOK_INIT(inode_getsecurity, cap_inode_getsecurity),
1495b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(mmap_addr, cap_mmap_addr),
1496b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_fix_setuid, cap_task_fix_setuid),
1497b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_prctl, cap_task_prctl),
1498b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_setscheduler, cap_task_setscheduler),
1499b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_setioprio, cap_task_setioprio),
1500b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(task_setnice, cap_task_setnice),
1501b1d9e6b0SCasey Schaufler 	LSM_HOOK_INIT(vm_enough_memory, cap_vm_enough_memory),
1502b1d9e6b0SCasey Schaufler };
1503b1d9e6b0SCasey Schaufler 
capability_init(void)1504d117a154SKees Cook static int __init capability_init(void)
1505b1d9e6b0SCasey Schaufler {
1506d69dece5SCasey Schaufler 	security_add_hooks(capability_hooks, ARRAY_SIZE(capability_hooks),
1507f3b8788cSCasey Schaufler 			   &capability_lsmid);
1508d117a154SKees Cook 	return 0;
1509b1d9e6b0SCasey Schaufler }
1510b1d9e6b0SCasey Schaufler 
1511d117a154SKees Cook DEFINE_LSM(capability) = {
1512d117a154SKees Cook 	.name = "capability",
1513d117a154SKees Cook 	.order = LSM_ORDER_FIRST,
1514d117a154SKees Cook 	.init = capability_init,
1515d117a154SKees Cook };
1516d117a154SKees Cook 
1517b1d9e6b0SCasey Schaufler #endif /* CONFIG_SECURITY */
1518