xref: /openbmc/linux/fs/xattr.c (revision 831be973)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3   File: fs/xattr.c
4 
5   Extended attribute handling.
6 
7   Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
8   Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
9   Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
10  */
11 #include <linux/fs.h>
12 #include <linux/filelock.h>
13 #include <linux/slab.h>
14 #include <linux/file.h>
15 #include <linux/xattr.h>
16 #include <linux/mount.h>
17 #include <linux/namei.h>
18 #include <linux/security.h>
19 #include <linux/evm.h>
20 #include <linux/syscalls.h>
21 #include <linux/export.h>
22 #include <linux/fsnotify.h>
23 #include <linux/audit.h>
24 #include <linux/vmalloc.h>
25 #include <linux/posix_acl_xattr.h>
26 
27 #include <linux/uaccess.h>
28 
29 #include "internal.h"
30 
31 static const char *
32 strcmp_prefix(const char *a, const char *a_prefix)
33 {
34 	while (*a_prefix && *a == *a_prefix) {
35 		a++;
36 		a_prefix++;
37 	}
38 	return *a_prefix ? NULL : a;
39 }
40 
41 /*
42  * In order to implement different sets of xattr operations for each xattr
43  * prefix, a filesystem should create a null-terminated array of struct
44  * xattr_handler (one for each prefix) and hang a pointer to it off of the
45  * s_xattr field of the superblock.
46  */
47 #define for_each_xattr_handler(handlers, handler)		\
48 	if (handlers)						\
49 		for ((handler) = *(handlers)++;			\
50 			(handler) != NULL;			\
51 			(handler) = *(handlers)++)
52 
53 /*
54  * Find the xattr_handler with the matching prefix.
55  */
56 static const struct xattr_handler *
57 xattr_resolve_name(struct inode *inode, const char **name)
58 {
59 	const struct xattr_handler **handlers = inode->i_sb->s_xattr;
60 	const struct xattr_handler *handler;
61 
62 	if (!(inode->i_opflags & IOP_XATTR)) {
63 		if (unlikely(is_bad_inode(inode)))
64 			return ERR_PTR(-EIO);
65 		return ERR_PTR(-EOPNOTSUPP);
66 	}
67 	for_each_xattr_handler(handlers, handler) {
68 		const char *n;
69 
70 		n = strcmp_prefix(*name, xattr_prefix(handler));
71 		if (n) {
72 			if (!handler->prefix ^ !*n) {
73 				if (*n)
74 					continue;
75 				return ERR_PTR(-EINVAL);
76 			}
77 			*name = n;
78 			return handler;
79 		}
80 	}
81 	return ERR_PTR(-EOPNOTSUPP);
82 }
83 
84 /**
85  * may_write_xattr - check whether inode allows writing xattr
86  * @idmap: idmap of the mount the inode was found from
87  * @inode: the inode on which to set an xattr
88  *
89  * Check whether the inode allows writing xattrs. Specifically, we can never
90  * set or remove an extended attribute on a read-only filesystem  or on an
91  * immutable / append-only inode.
92  *
93  * We also need to ensure that the inode has a mapping in the mount to
94  * not risk writing back invalid i_{g,u}id values.
95  *
96  * Return: On success zero is returned. On error a negative errno is returned.
97  */
98 int may_write_xattr(struct mnt_idmap *idmap, struct inode *inode)
99 {
100 	if (IS_IMMUTABLE(inode))
101 		return -EPERM;
102 	if (IS_APPEND(inode))
103 		return -EPERM;
104 	if (HAS_UNMAPPED_ID(idmap, inode))
105 		return -EPERM;
106 	return 0;
107 }
108 
109 /*
110  * Check permissions for extended attribute access.  This is a bit complicated
111  * because different namespaces have very different rules.
112  */
113 static int
114 xattr_permission(struct mnt_idmap *idmap, struct inode *inode,
115 		 const char *name, int mask)
116 {
117 	if (mask & MAY_WRITE) {
118 		int ret;
119 
120 		ret = may_write_xattr(idmap, inode);
121 		if (ret)
122 			return ret;
123 	}
124 
125 	/*
126 	 * No restriction for security.* and system.* from the VFS.  Decision
127 	 * on these is left to the underlying filesystem / security module.
128 	 */
129 	if (!strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
130 	    !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
131 		return 0;
132 
133 	/*
134 	 * The trusted.* namespace can only be accessed by privileged users.
135 	 */
136 	if (!strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN)) {
137 		if (!capable(CAP_SYS_ADMIN))
138 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
139 		return 0;
140 	}
141 
142 	/*
143 	 * In the user.* namespace, only regular files and directories can have
144 	 * extended attributes. For sticky directories, only the owner and
145 	 * privileged users can write attributes.
146 	 */
147 	if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
148 		if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
149 			return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
150 		if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
151 		    (mask & MAY_WRITE) &&
152 		    !inode_owner_or_capable(idmap, inode))
153 			return -EPERM;
154 	}
155 
156 	return inode_permission(idmap, inode, mask);
157 }
158 
159 /*
160  * Look for any handler that deals with the specified namespace.
161  */
162 int
163 xattr_supports_user_prefix(struct inode *inode)
164 {
165 	const struct xattr_handler **handlers = inode->i_sb->s_xattr;
166 	const struct xattr_handler *handler;
167 
168 	if (!(inode->i_opflags & IOP_XATTR)) {
169 		if (unlikely(is_bad_inode(inode)))
170 			return -EIO;
171 		return -EOPNOTSUPP;
172 	}
173 
174 	for_each_xattr_handler(handlers, handler) {
175 		if (!strncmp(xattr_prefix(handler), XATTR_USER_PREFIX,
176 			     XATTR_USER_PREFIX_LEN))
177 			return 0;
178 	}
179 
180 	return -EOPNOTSUPP;
181 }
182 EXPORT_SYMBOL(xattr_supports_user_prefix);
183 
184 int
185 __vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
186 	       struct inode *inode, const char *name, const void *value,
187 	       size_t size, int flags)
188 {
189 	const struct xattr_handler *handler;
190 
191 	if (is_posix_acl_xattr(name))
192 		return -EOPNOTSUPP;
193 
194 	handler = xattr_resolve_name(inode, &name);
195 	if (IS_ERR(handler))
196 		return PTR_ERR(handler);
197 	if (!handler->set)
198 		return -EOPNOTSUPP;
199 	if (size == 0)
200 		value = "";  /* empty EA, do not remove */
201 	return handler->set(handler, idmap, dentry, inode, name, value,
202 			    size, flags);
203 }
204 EXPORT_SYMBOL(__vfs_setxattr);
205 
206 /**
207  *  __vfs_setxattr_noperm - perform setxattr operation without performing
208  *  permission checks.
209  *
210  *  @idmap: idmap of the mount the inode was found from
211  *  @dentry: object to perform setxattr on
212  *  @name: xattr name to set
213  *  @value: value to set @name to
214  *  @size: size of @value
215  *  @flags: flags to pass into filesystem operations
216  *
217  *  returns the result of the internal setxattr or setsecurity operations.
218  *
219  *  This function requires the caller to lock the inode's i_mutex before it
220  *  is executed. It also assumes that the caller will make the appropriate
221  *  permission checks.
222  */
223 int __vfs_setxattr_noperm(struct mnt_idmap *idmap,
224 			  struct dentry *dentry, const char *name,
225 			  const void *value, size_t size, int flags)
226 {
227 	struct inode *inode = dentry->d_inode;
228 	int error = -EAGAIN;
229 	int issec = !strncmp(name, XATTR_SECURITY_PREFIX,
230 				   XATTR_SECURITY_PREFIX_LEN);
231 
232 	if (issec)
233 		inode->i_flags &= ~S_NOSEC;
234 	if (inode->i_opflags & IOP_XATTR) {
235 		error = __vfs_setxattr(idmap, dentry, inode, name, value,
236 				       size, flags);
237 		if (!error) {
238 			fsnotify_xattr(dentry);
239 			security_inode_post_setxattr(dentry, name, value,
240 						     size, flags);
241 		}
242 	} else {
243 		if (unlikely(is_bad_inode(inode)))
244 			return -EIO;
245 	}
246 	if (error == -EAGAIN) {
247 		error = -EOPNOTSUPP;
248 
249 		if (issec) {
250 			const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
251 
252 			error = security_inode_setsecurity(inode, suffix, value,
253 							   size, flags);
254 			if (!error)
255 				fsnotify_xattr(dentry);
256 		}
257 	}
258 
259 	return error;
260 }
261 
262 /**
263  * __vfs_setxattr_locked - set an extended attribute while holding the inode
264  * lock
265  *
266  *  @idmap: idmap of the mount of the target inode
267  *  @dentry: object to perform setxattr on
268  *  @name: xattr name to set
269  *  @value: value to set @name to
270  *  @size: size of @value
271  *  @flags: flags to pass into filesystem operations
272  *  @delegated_inode: on return, will contain an inode pointer that
273  *  a delegation was broken on, NULL if none.
274  */
275 int
276 __vfs_setxattr_locked(struct mnt_idmap *idmap, struct dentry *dentry,
277 		      const char *name, const void *value, size_t size,
278 		      int flags, struct inode **delegated_inode)
279 {
280 	struct inode *inode = dentry->d_inode;
281 	int error;
282 
283 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
284 	if (error)
285 		return error;
286 
287 	error = security_inode_setxattr(idmap, dentry, name, value, size,
288 					flags);
289 	if (error)
290 		goto out;
291 
292 	error = try_break_deleg(inode, delegated_inode);
293 	if (error)
294 		goto out;
295 
296 	error = __vfs_setxattr_noperm(idmap, dentry, name, value,
297 				      size, flags);
298 
299 out:
300 	return error;
301 }
302 EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
303 
304 int
305 vfs_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
306 	     const char *name, const void *value, size_t size, int flags)
307 {
308 	struct inode *inode = dentry->d_inode;
309 	struct inode *delegated_inode = NULL;
310 	const void  *orig_value = value;
311 	int error;
312 
313 	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
314 		error = cap_convert_nscap(idmap, dentry, &value, size);
315 		if (error < 0)
316 			return error;
317 		size = error;
318 	}
319 
320 retry_deleg:
321 	inode_lock(inode);
322 	error = __vfs_setxattr_locked(idmap, dentry, name, value, size,
323 				      flags, &delegated_inode);
324 	inode_unlock(inode);
325 
326 	if (delegated_inode) {
327 		error = break_deleg_wait(&delegated_inode);
328 		if (!error)
329 			goto retry_deleg;
330 	}
331 	if (value != orig_value)
332 		kfree(value);
333 
334 	return error;
335 }
336 EXPORT_SYMBOL_GPL(vfs_setxattr);
337 
338 static ssize_t
339 xattr_getsecurity(struct mnt_idmap *idmap, struct inode *inode,
340 		  const char *name, void *value, size_t size)
341 {
342 	void *buffer = NULL;
343 	ssize_t len;
344 
345 	if (!value || !size) {
346 		len = security_inode_getsecurity(idmap, inode, name,
347 						 &buffer, false);
348 		goto out_noalloc;
349 	}
350 
351 	len = security_inode_getsecurity(idmap, inode, name, &buffer,
352 					 true);
353 	if (len < 0)
354 		return len;
355 	if (size < len) {
356 		len = -ERANGE;
357 		goto out;
358 	}
359 	memcpy(value, buffer, len);
360 out:
361 	kfree(buffer);
362 out_noalloc:
363 	return len;
364 }
365 
366 /*
367  * vfs_getxattr_alloc - allocate memory, if necessary, before calling getxattr
368  *
369  * Allocate memory, if not already allocated, or re-allocate correct size,
370  * before retrieving the extended attribute.  The xattr value buffer should
371  * always be freed by the caller, even on error.
372  *
373  * Returns the result of alloc, if failed, or the getxattr operation.
374  */
375 int
376 vfs_getxattr_alloc(struct mnt_idmap *idmap, struct dentry *dentry,
377 		   const char *name, char **xattr_value, size_t xattr_size,
378 		   gfp_t flags)
379 {
380 	const struct xattr_handler *handler;
381 	struct inode *inode = dentry->d_inode;
382 	char *value = *xattr_value;
383 	int error;
384 
385 	error = xattr_permission(idmap, inode, name, MAY_READ);
386 	if (error)
387 		return error;
388 
389 	handler = xattr_resolve_name(inode, &name);
390 	if (IS_ERR(handler))
391 		return PTR_ERR(handler);
392 	if (!handler->get)
393 		return -EOPNOTSUPP;
394 	error = handler->get(handler, dentry, inode, name, NULL, 0);
395 	if (error < 0)
396 		return error;
397 
398 	if (!value || (error > xattr_size)) {
399 		value = krealloc(*xattr_value, error + 1, flags);
400 		if (!value)
401 			return -ENOMEM;
402 		memset(value, 0, error + 1);
403 	}
404 
405 	error = handler->get(handler, dentry, inode, name, value, error);
406 	*xattr_value = value;
407 	return error;
408 }
409 
410 ssize_t
411 __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
412 	       void *value, size_t size)
413 {
414 	const struct xattr_handler *handler;
415 
416 	if (is_posix_acl_xattr(name))
417 		return -EOPNOTSUPP;
418 
419 	handler = xattr_resolve_name(inode, &name);
420 	if (IS_ERR(handler))
421 		return PTR_ERR(handler);
422 	if (!handler->get)
423 		return -EOPNOTSUPP;
424 	return handler->get(handler, dentry, inode, name, value, size);
425 }
426 EXPORT_SYMBOL(__vfs_getxattr);
427 
428 ssize_t
429 vfs_getxattr(struct mnt_idmap *idmap, struct dentry *dentry,
430 	     const char *name, void *value, size_t size)
431 {
432 	struct inode *inode = dentry->d_inode;
433 	int error;
434 
435 	error = xattr_permission(idmap, inode, name, MAY_READ);
436 	if (error)
437 		return error;
438 
439 	error = security_inode_getxattr(dentry, name);
440 	if (error)
441 		return error;
442 
443 	if (!strncmp(name, XATTR_SECURITY_PREFIX,
444 				XATTR_SECURITY_PREFIX_LEN)) {
445 		const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
446 		int ret = xattr_getsecurity(idmap, inode, suffix, value,
447 					    size);
448 		/*
449 		 * Only overwrite the return value if a security module
450 		 * is actually active.
451 		 */
452 		if (ret == -EOPNOTSUPP)
453 			goto nolsm;
454 		return ret;
455 	}
456 nolsm:
457 	return __vfs_getxattr(dentry, inode, name, value, size);
458 }
459 EXPORT_SYMBOL_GPL(vfs_getxattr);
460 
461 ssize_t
462 vfs_listxattr(struct dentry *dentry, char *list, size_t size)
463 {
464 	struct inode *inode = d_inode(dentry);
465 	ssize_t error;
466 
467 	error = security_inode_listxattr(dentry);
468 	if (error)
469 		return error;
470 	if (inode->i_op->listxattr && (inode->i_opflags & IOP_XATTR)) {
471 		error = inode->i_op->listxattr(dentry, list, size);
472 	} else {
473 		error = security_inode_listsecurity(inode, list, size);
474 		if (size && error > size)
475 			error = -ERANGE;
476 	}
477 	return error;
478 }
479 EXPORT_SYMBOL_GPL(vfs_listxattr);
480 
481 int
482 __vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
483 		  const char *name)
484 {
485 	struct inode *inode = d_inode(dentry);
486 	const struct xattr_handler *handler;
487 
488 	if (is_posix_acl_xattr(name))
489 		return -EOPNOTSUPP;
490 
491 	handler = xattr_resolve_name(inode, &name);
492 	if (IS_ERR(handler))
493 		return PTR_ERR(handler);
494 	if (!handler->set)
495 		return -EOPNOTSUPP;
496 	return handler->set(handler, idmap, dentry, inode, name, NULL, 0,
497 			    XATTR_REPLACE);
498 }
499 EXPORT_SYMBOL(__vfs_removexattr);
500 
501 /**
502  * __vfs_removexattr_locked - set an extended attribute while holding the inode
503  * lock
504  *
505  *  @idmap: idmap of the mount of the target inode
506  *  @dentry: object to perform setxattr on
507  *  @name: name of xattr to remove
508  *  @delegated_inode: on return, will contain an inode pointer that
509  *  a delegation was broken on, NULL if none.
510  */
511 int
512 __vfs_removexattr_locked(struct mnt_idmap *idmap,
513 			 struct dentry *dentry, const char *name,
514 			 struct inode **delegated_inode)
515 {
516 	struct inode *inode = dentry->d_inode;
517 	int error;
518 
519 	error = xattr_permission(idmap, inode, name, MAY_WRITE);
520 	if (error)
521 		return error;
522 
523 	error = security_inode_removexattr(idmap, dentry, name);
524 	if (error)
525 		goto out;
526 
527 	error = try_break_deleg(inode, delegated_inode);
528 	if (error)
529 		goto out;
530 
531 	error = __vfs_removexattr(idmap, dentry, name);
532 
533 	if (!error) {
534 		fsnotify_xattr(dentry);
535 		evm_inode_post_removexattr(dentry, name);
536 	}
537 
538 out:
539 	return error;
540 }
541 EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
542 
543 int
544 vfs_removexattr(struct mnt_idmap *idmap, struct dentry *dentry,
545 		const char *name)
546 {
547 	struct inode *inode = dentry->d_inode;
548 	struct inode *delegated_inode = NULL;
549 	int error;
550 
551 retry_deleg:
552 	inode_lock(inode);
553 	error = __vfs_removexattr_locked(idmap, dentry,
554 					 name, &delegated_inode);
555 	inode_unlock(inode);
556 
557 	if (delegated_inode) {
558 		error = break_deleg_wait(&delegated_inode);
559 		if (!error)
560 			goto retry_deleg;
561 	}
562 
563 	return error;
564 }
565 EXPORT_SYMBOL_GPL(vfs_removexattr);
566 
567 /*
568  * Extended attribute SET operations
569  */
570 
571 int setxattr_copy(const char __user *name, struct xattr_ctx *ctx)
572 {
573 	int error;
574 
575 	if (ctx->flags & ~(XATTR_CREATE|XATTR_REPLACE))
576 		return -EINVAL;
577 
578 	error = strncpy_from_user(ctx->kname->name, name,
579 				sizeof(ctx->kname->name));
580 	if (error == 0 || error == sizeof(ctx->kname->name))
581 		return  -ERANGE;
582 	if (error < 0)
583 		return error;
584 
585 	error = 0;
586 	if (ctx->size) {
587 		if (ctx->size > XATTR_SIZE_MAX)
588 			return -E2BIG;
589 
590 		ctx->kvalue = vmemdup_user(ctx->cvalue, ctx->size);
591 		if (IS_ERR(ctx->kvalue)) {
592 			error = PTR_ERR(ctx->kvalue);
593 			ctx->kvalue = NULL;
594 		}
595 	}
596 
597 	return error;
598 }
599 
600 int do_setxattr(struct mnt_idmap *idmap, struct dentry *dentry,
601 		struct xattr_ctx *ctx)
602 {
603 	if (is_posix_acl_xattr(ctx->kname->name))
604 		return do_set_acl(idmap, dentry, ctx->kname->name,
605 				  ctx->kvalue, ctx->size);
606 
607 	return vfs_setxattr(idmap, dentry, ctx->kname->name,
608 			ctx->kvalue, ctx->size, ctx->flags);
609 }
610 
611 static long
612 setxattr(struct mnt_idmap *idmap, struct dentry *d,
613 	const char __user *name, const void __user *value, size_t size,
614 	int flags)
615 {
616 	struct xattr_name kname;
617 	struct xattr_ctx ctx = {
618 		.cvalue   = value,
619 		.kvalue   = NULL,
620 		.size     = size,
621 		.kname    = &kname,
622 		.flags    = flags,
623 	};
624 	int error;
625 
626 	error = setxattr_copy(name, &ctx);
627 	if (error)
628 		return error;
629 
630 	error = do_setxattr(idmap, d, &ctx);
631 
632 	kvfree(ctx.kvalue);
633 	return error;
634 }
635 
636 static int path_setxattr(const char __user *pathname,
637 			 const char __user *name, const void __user *value,
638 			 size_t size, int flags, unsigned int lookup_flags)
639 {
640 	struct path path;
641 	int error;
642 
643 retry:
644 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
645 	if (error)
646 		return error;
647 	error = mnt_want_write(path.mnt);
648 	if (!error) {
649 		error = setxattr(mnt_idmap(path.mnt), path.dentry, name,
650 				 value, size, flags);
651 		mnt_drop_write(path.mnt);
652 	}
653 	path_put(&path);
654 	if (retry_estale(error, lookup_flags)) {
655 		lookup_flags |= LOOKUP_REVAL;
656 		goto retry;
657 	}
658 	return error;
659 }
660 
661 SYSCALL_DEFINE5(setxattr, const char __user *, pathname,
662 		const char __user *, name, const void __user *, value,
663 		size_t, size, int, flags)
664 {
665 	return path_setxattr(pathname, name, value, size, flags, LOOKUP_FOLLOW);
666 }
667 
668 SYSCALL_DEFINE5(lsetxattr, const char __user *, pathname,
669 		const char __user *, name, const void __user *, value,
670 		size_t, size, int, flags)
671 {
672 	return path_setxattr(pathname, name, value, size, flags, 0);
673 }
674 
675 SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
676 		const void __user *,value, size_t, size, int, flags)
677 {
678 	struct fd f = fdget(fd);
679 	int error = -EBADF;
680 
681 	if (!f.file)
682 		return error;
683 	audit_file(f.file);
684 	error = mnt_want_write_file(f.file);
685 	if (!error) {
686 		error = setxattr(file_mnt_idmap(f.file),
687 				 f.file->f_path.dentry, name,
688 				 value, size, flags);
689 		mnt_drop_write_file(f.file);
690 	}
691 	fdput(f);
692 	return error;
693 }
694 
695 /*
696  * Extended attribute GET operations
697  */
698 ssize_t
699 do_getxattr(struct mnt_idmap *idmap, struct dentry *d,
700 	struct xattr_ctx *ctx)
701 {
702 	ssize_t error;
703 	char *kname = ctx->kname->name;
704 
705 	if (ctx->size) {
706 		if (ctx->size > XATTR_SIZE_MAX)
707 			ctx->size = XATTR_SIZE_MAX;
708 		ctx->kvalue = kvzalloc(ctx->size, GFP_KERNEL);
709 		if (!ctx->kvalue)
710 			return -ENOMEM;
711 	}
712 
713 	if (is_posix_acl_xattr(ctx->kname->name))
714 		error = do_get_acl(idmap, d, kname, ctx->kvalue, ctx->size);
715 	else
716 		error = vfs_getxattr(idmap, d, kname, ctx->kvalue, ctx->size);
717 	if (error > 0) {
718 		if (ctx->size && copy_to_user(ctx->value, ctx->kvalue, error))
719 			error = -EFAULT;
720 	} else if (error == -ERANGE && ctx->size >= XATTR_SIZE_MAX) {
721 		/* The file system tried to returned a value bigger
722 		   than XATTR_SIZE_MAX bytes. Not possible. */
723 		error = -E2BIG;
724 	}
725 
726 	return error;
727 }
728 
729 static ssize_t
730 getxattr(struct mnt_idmap *idmap, struct dentry *d,
731 	 const char __user *name, void __user *value, size_t size)
732 {
733 	ssize_t error;
734 	struct xattr_name kname;
735 	struct xattr_ctx ctx = {
736 		.value    = value,
737 		.kvalue   = NULL,
738 		.size     = size,
739 		.kname    = &kname,
740 		.flags    = 0,
741 	};
742 
743 	error = strncpy_from_user(kname.name, name, sizeof(kname.name));
744 	if (error == 0 || error == sizeof(kname.name))
745 		error = -ERANGE;
746 	if (error < 0)
747 		return error;
748 
749 	error =  do_getxattr(idmap, d, &ctx);
750 
751 	kvfree(ctx.kvalue);
752 	return error;
753 }
754 
755 static ssize_t path_getxattr(const char __user *pathname,
756 			     const char __user *name, void __user *value,
757 			     size_t size, unsigned int lookup_flags)
758 {
759 	struct path path;
760 	ssize_t error;
761 retry:
762 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
763 	if (error)
764 		return error;
765 	error = getxattr(mnt_idmap(path.mnt), path.dentry, name, value, size);
766 	path_put(&path);
767 	if (retry_estale(error, lookup_flags)) {
768 		lookup_flags |= LOOKUP_REVAL;
769 		goto retry;
770 	}
771 	return error;
772 }
773 
774 SYSCALL_DEFINE4(getxattr, const char __user *, pathname,
775 		const char __user *, name, void __user *, value, size_t, size)
776 {
777 	return path_getxattr(pathname, name, value, size, LOOKUP_FOLLOW);
778 }
779 
780 SYSCALL_DEFINE4(lgetxattr, const char __user *, pathname,
781 		const char __user *, name, void __user *, value, size_t, size)
782 {
783 	return path_getxattr(pathname, name, value, size, 0);
784 }
785 
786 SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
787 		void __user *, value, size_t, size)
788 {
789 	struct fd f = fdget(fd);
790 	ssize_t error = -EBADF;
791 
792 	if (!f.file)
793 		return error;
794 	audit_file(f.file);
795 	error = getxattr(file_mnt_idmap(f.file), f.file->f_path.dentry,
796 			 name, value, size);
797 	fdput(f);
798 	return error;
799 }
800 
801 /*
802  * Extended attribute LIST operations
803  */
804 static ssize_t
805 listxattr(struct dentry *d, char __user *list, size_t size)
806 {
807 	ssize_t error;
808 	char *klist = NULL;
809 
810 	if (size) {
811 		if (size > XATTR_LIST_MAX)
812 			size = XATTR_LIST_MAX;
813 		klist = kvmalloc(size, GFP_KERNEL);
814 		if (!klist)
815 			return -ENOMEM;
816 	}
817 
818 	error = vfs_listxattr(d, klist, size);
819 	if (error > 0) {
820 		if (size && copy_to_user(list, klist, error))
821 			error = -EFAULT;
822 	} else if (error == -ERANGE && size >= XATTR_LIST_MAX) {
823 		/* The file system tried to returned a list bigger
824 		   than XATTR_LIST_MAX bytes. Not possible. */
825 		error = -E2BIG;
826 	}
827 
828 	kvfree(klist);
829 
830 	return error;
831 }
832 
833 static ssize_t path_listxattr(const char __user *pathname, char __user *list,
834 			      size_t size, unsigned int lookup_flags)
835 {
836 	struct path path;
837 	ssize_t error;
838 retry:
839 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
840 	if (error)
841 		return error;
842 	error = listxattr(path.dentry, list, size);
843 	path_put(&path);
844 	if (retry_estale(error, lookup_flags)) {
845 		lookup_flags |= LOOKUP_REVAL;
846 		goto retry;
847 	}
848 	return error;
849 }
850 
851 SYSCALL_DEFINE3(listxattr, const char __user *, pathname, char __user *, list,
852 		size_t, size)
853 {
854 	return path_listxattr(pathname, list, size, LOOKUP_FOLLOW);
855 }
856 
857 SYSCALL_DEFINE3(llistxattr, const char __user *, pathname, char __user *, list,
858 		size_t, size)
859 {
860 	return path_listxattr(pathname, list, size, 0);
861 }
862 
863 SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
864 {
865 	struct fd f = fdget(fd);
866 	ssize_t error = -EBADF;
867 
868 	if (!f.file)
869 		return error;
870 	audit_file(f.file);
871 	error = listxattr(f.file->f_path.dentry, list, size);
872 	fdput(f);
873 	return error;
874 }
875 
876 /*
877  * Extended attribute REMOVE operations
878  */
879 static long
880 removexattr(struct mnt_idmap *idmap, struct dentry *d,
881 	    const char __user *name)
882 {
883 	int error;
884 	char kname[XATTR_NAME_MAX + 1];
885 
886 	error = strncpy_from_user(kname, name, sizeof(kname));
887 	if (error == 0 || error == sizeof(kname))
888 		error = -ERANGE;
889 	if (error < 0)
890 		return error;
891 
892 	if (is_posix_acl_xattr(kname))
893 		return vfs_remove_acl(idmap, d, kname);
894 
895 	return vfs_removexattr(idmap, d, kname);
896 }
897 
898 static int path_removexattr(const char __user *pathname,
899 			    const char __user *name, unsigned int lookup_flags)
900 {
901 	struct path path;
902 	int error;
903 retry:
904 	error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
905 	if (error)
906 		return error;
907 	error = mnt_want_write(path.mnt);
908 	if (!error) {
909 		error = removexattr(mnt_idmap(path.mnt), path.dentry, name);
910 		mnt_drop_write(path.mnt);
911 	}
912 	path_put(&path);
913 	if (retry_estale(error, lookup_flags)) {
914 		lookup_flags |= LOOKUP_REVAL;
915 		goto retry;
916 	}
917 	return error;
918 }
919 
920 SYSCALL_DEFINE2(removexattr, const char __user *, pathname,
921 		const char __user *, name)
922 {
923 	return path_removexattr(pathname, name, LOOKUP_FOLLOW);
924 }
925 
926 SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
927 		const char __user *, name)
928 {
929 	return path_removexattr(pathname, name, 0);
930 }
931 
932 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
933 {
934 	struct fd f = fdget(fd);
935 	int error = -EBADF;
936 
937 	if (!f.file)
938 		return error;
939 	audit_file(f.file);
940 	error = mnt_want_write_file(f.file);
941 	if (!error) {
942 		error = removexattr(file_mnt_idmap(f.file),
943 				    f.file->f_path.dentry, name);
944 		mnt_drop_write_file(f.file);
945 	}
946 	fdput(f);
947 	return error;
948 }
949 
950 int xattr_list_one(char **buffer, ssize_t *remaining_size, const char *name)
951 {
952 	size_t len;
953 
954 	len = strlen(name) + 1;
955 	if (*buffer) {
956 		if (*remaining_size < len)
957 			return -ERANGE;
958 		memcpy(*buffer, name, len);
959 		*buffer += len;
960 	}
961 	*remaining_size -= len;
962 	return 0;
963 }
964 
965 /*
966  * Combine the results of the list() operation from every xattr_handler in the
967  * list.
968  */
969 ssize_t
970 generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
971 {
972 	const struct xattr_handler *handler, **handlers = dentry->d_sb->s_xattr;
973 	ssize_t remaining_size = buffer_size;
974 	int err = 0;
975 
976 	err = posix_acl_listxattr(d_inode(dentry), &buffer, &remaining_size);
977 	if (err)
978 		return err;
979 
980 	for_each_xattr_handler(handlers, handler) {
981 		if (!handler->name || (handler->list && !handler->list(dentry)))
982 			continue;
983 		err = xattr_list_one(&buffer, &remaining_size, handler->name);
984 		if (err)
985 			return err;
986 	}
987 
988 	return err ? err : buffer_size - remaining_size;
989 }
990 EXPORT_SYMBOL(generic_listxattr);
991 
992 /**
993  * xattr_full_name  -  Compute full attribute name from suffix
994  *
995  * @handler:	handler of the xattr_handler operation
996  * @name:	name passed to the xattr_handler operation
997  *
998  * The get and set xattr handler operations are called with the remainder of
999  * the attribute name after skipping the handler's prefix: for example, "foo"
1000  * is passed to the get operation of a handler with prefix "user." to get
1001  * attribute "user.foo".  The full name is still "there" in the name though.
1002  *
1003  * Note: the list xattr handler operation when called from the vfs is passed a
1004  * NULL name; some file systems use this operation internally, with varying
1005  * semantics.
1006  */
1007 const char *xattr_full_name(const struct xattr_handler *handler,
1008 			    const char *name)
1009 {
1010 	size_t prefix_len = strlen(xattr_prefix(handler));
1011 
1012 	return name - prefix_len;
1013 }
1014 EXPORT_SYMBOL(xattr_full_name);
1015 
1016 /**
1017  * free_simple_xattr - free an xattr object
1018  * @xattr: the xattr object
1019  *
1020  * Free the xattr object. Can handle @xattr being NULL.
1021  */
1022 static inline void free_simple_xattr(struct simple_xattr *xattr)
1023 {
1024 	if (xattr)
1025 		kfree(xattr->name);
1026 	kvfree(xattr);
1027 }
1028 
1029 /**
1030  * simple_xattr_alloc - allocate new xattr object
1031  * @value: value of the xattr object
1032  * @size: size of @value
1033  *
1034  * Allocate a new xattr object and initialize respective members. The caller is
1035  * responsible for handling the name of the xattr.
1036  *
1037  * Return: On success a new xattr object is returned. On failure NULL is
1038  * returned.
1039  */
1040 struct simple_xattr *simple_xattr_alloc(const void *value, size_t size)
1041 {
1042 	struct simple_xattr *new_xattr;
1043 	size_t len;
1044 
1045 	/* wrap around? */
1046 	len = sizeof(*new_xattr) + size;
1047 	if (len < sizeof(*new_xattr))
1048 		return NULL;
1049 
1050 	new_xattr = kvmalloc(len, GFP_KERNEL);
1051 	if (!new_xattr)
1052 		return NULL;
1053 
1054 	new_xattr->size = size;
1055 	memcpy(new_xattr->value, value, size);
1056 	return new_xattr;
1057 }
1058 
1059 /**
1060  * rbtree_simple_xattr_cmp - compare xattr name with current rbtree xattr entry
1061  * @key: xattr name
1062  * @node: current node
1063  *
1064  * Compare the xattr name with the xattr name attached to @node in the rbtree.
1065  *
1066  * Return: Negative value if continuing left, positive if continuing right, 0
1067  * if the xattr attached to @node matches @key.
1068  */
1069 static int rbtree_simple_xattr_cmp(const void *key, const struct rb_node *node)
1070 {
1071 	const char *xattr_name = key;
1072 	const struct simple_xattr *xattr;
1073 
1074 	xattr = rb_entry(node, struct simple_xattr, rb_node);
1075 	return strcmp(xattr->name, xattr_name);
1076 }
1077 
1078 /**
1079  * rbtree_simple_xattr_node_cmp - compare two xattr rbtree nodes
1080  * @new_node: new node
1081  * @node: current node
1082  *
1083  * Compare the xattr attached to @new_node with the xattr attached to @node.
1084  *
1085  * Return: Negative value if continuing left, positive if continuing right, 0
1086  * if the xattr attached to @new_node matches the xattr attached to @node.
1087  */
1088 static int rbtree_simple_xattr_node_cmp(struct rb_node *new_node,
1089 					const struct rb_node *node)
1090 {
1091 	struct simple_xattr *xattr;
1092 	xattr = rb_entry(new_node, struct simple_xattr, rb_node);
1093 	return rbtree_simple_xattr_cmp(xattr->name, node);
1094 }
1095 
1096 /**
1097  * simple_xattr_get - get an xattr object
1098  * @xattrs: the header of the xattr object
1099  * @name: the name of the xattr to retrieve
1100  * @buffer: the buffer to store the value into
1101  * @size: the size of @buffer
1102  *
1103  * Try to find and retrieve the xattr object associated with @name.
1104  * If @buffer is provided store the value of @xattr in @buffer
1105  * otherwise just return the length. The size of @buffer is limited
1106  * to XATTR_SIZE_MAX which currently is 65536.
1107  *
1108  * Return: On success the length of the xattr value is returned. On error a
1109  * negative error code is returned.
1110  */
1111 int simple_xattr_get(struct simple_xattrs *xattrs, const char *name,
1112 		     void *buffer, size_t size)
1113 {
1114 	struct simple_xattr *xattr = NULL;
1115 	struct rb_node *rbp;
1116 	int ret = -ENODATA;
1117 
1118 	read_lock(&xattrs->lock);
1119 	rbp = rb_find(name, &xattrs->rb_root, rbtree_simple_xattr_cmp);
1120 	if (rbp) {
1121 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1122 		ret = xattr->size;
1123 		if (buffer) {
1124 			if (size < xattr->size)
1125 				ret = -ERANGE;
1126 			else
1127 				memcpy(buffer, xattr->value, xattr->size);
1128 		}
1129 	}
1130 	read_unlock(&xattrs->lock);
1131 	return ret;
1132 }
1133 
1134 /**
1135  * simple_xattr_set - set an xattr object
1136  * @xattrs: the header of the xattr object
1137  * @name: the name of the xattr to retrieve
1138  * @value: the value to store along the xattr
1139  * @size: the size of @value
1140  * @flags: the flags determining how to set the xattr
1141  * @removed_size: the size of the removed xattr
1142  *
1143  * Set a new xattr object.
1144  * If @value is passed a new xattr object will be allocated. If XATTR_REPLACE
1145  * is specified in @flags a matching xattr object for @name must already exist.
1146  * If it does it will be replaced with the new xattr object. If it doesn't we
1147  * fail. If XATTR_CREATE is specified and a matching xattr does already exist
1148  * we fail. If it doesn't we create a new xattr. If @flags is zero we simply
1149  * insert the new xattr replacing any existing one.
1150  *
1151  * If @value is empty and a matching xattr object is found we delete it if
1152  * XATTR_REPLACE is specified in @flags or @flags is zero.
1153  *
1154  * If @value is empty and no matching xattr object for @name is found we do
1155  * nothing if XATTR_CREATE is specified in @flags or @flags is zero. For
1156  * XATTR_REPLACE we fail as mentioned above.
1157  *
1158  * Return: On success zero and on error a negative error code is returned.
1159  */
1160 int simple_xattr_set(struct simple_xattrs *xattrs, const char *name,
1161 		     const void *value, size_t size, int flags,
1162 		     ssize_t *removed_size)
1163 {
1164 	struct simple_xattr *xattr = NULL, *new_xattr = NULL;
1165 	struct rb_node *parent = NULL, **rbp;
1166 	int err = 0, ret;
1167 
1168 	if (removed_size)
1169 		*removed_size = -1;
1170 
1171 	/* value == NULL means remove */
1172 	if (value) {
1173 		new_xattr = simple_xattr_alloc(value, size);
1174 		if (!new_xattr)
1175 			return -ENOMEM;
1176 
1177 		new_xattr->name = kstrdup(name, GFP_KERNEL);
1178 		if (!new_xattr->name) {
1179 			free_simple_xattr(new_xattr);
1180 			return -ENOMEM;
1181 		}
1182 	}
1183 
1184 	write_lock(&xattrs->lock);
1185 	rbp = &xattrs->rb_root.rb_node;
1186 	while (*rbp) {
1187 		parent = *rbp;
1188 		ret = rbtree_simple_xattr_cmp(name, *rbp);
1189 		if (ret < 0)
1190 			rbp = &(*rbp)->rb_left;
1191 		else if (ret > 0)
1192 			rbp = &(*rbp)->rb_right;
1193 		else
1194 			xattr = rb_entry(*rbp, struct simple_xattr, rb_node);
1195 		if (xattr)
1196 			break;
1197 	}
1198 
1199 	if (xattr) {
1200 		/* Fail if XATTR_CREATE is requested and the xattr exists. */
1201 		if (flags & XATTR_CREATE) {
1202 			err = -EEXIST;
1203 			goto out_unlock;
1204 		}
1205 
1206 		if (new_xattr)
1207 			rb_replace_node(&xattr->rb_node, &new_xattr->rb_node,
1208 					&xattrs->rb_root);
1209 		else
1210 			rb_erase(&xattr->rb_node, &xattrs->rb_root);
1211 		if (!err && removed_size)
1212 			*removed_size = xattr->size;
1213 	} else {
1214 		/* Fail if XATTR_REPLACE is requested but no xattr is found. */
1215 		if (flags & XATTR_REPLACE) {
1216 			err = -ENODATA;
1217 			goto out_unlock;
1218 		}
1219 
1220 		/*
1221 		 * If XATTR_CREATE or no flags are specified together with a
1222 		 * new value simply insert it.
1223 		 */
1224 		if (new_xattr) {
1225 			rb_link_node(&new_xattr->rb_node, parent, rbp);
1226 			rb_insert_color(&new_xattr->rb_node, &xattrs->rb_root);
1227 		}
1228 
1229 		/*
1230 		 * If XATTR_CREATE or no flags are specified and neither an
1231 		 * old or new xattr exist then we don't need to do anything.
1232 		 */
1233 	}
1234 
1235 out_unlock:
1236 	write_unlock(&xattrs->lock);
1237 	if (err)
1238 		free_simple_xattr(new_xattr);
1239 	else
1240 		free_simple_xattr(xattr);
1241 	return err;
1242 
1243 }
1244 
1245 static bool xattr_is_trusted(const char *name)
1246 {
1247 	return !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN);
1248 }
1249 
1250 /**
1251  * simple_xattr_list - list all xattr objects
1252  * @inode: inode from which to get the xattrs
1253  * @xattrs: the header of the xattr object
1254  * @buffer: the buffer to store all xattrs into
1255  * @size: the size of @buffer
1256  *
1257  * List all xattrs associated with @inode. If @buffer is NULL we returned
1258  * the required size of the buffer. If @buffer is provided we store the
1259  * xattrs value into it provided it is big enough.
1260  *
1261  * Note, the number of xattr names that can be listed with listxattr(2) is
1262  * limited to XATTR_LIST_MAX aka 65536 bytes. If a larger buffer is passed
1263  * then vfs_listxattr() caps it to XATTR_LIST_MAX and if more xattr names
1264  * are found it will return -E2BIG.
1265  *
1266  * Return: On success the required size or the size of the copied xattrs is
1267  * returned. On error a negative error code is returned.
1268  */
1269 ssize_t simple_xattr_list(struct inode *inode, struct simple_xattrs *xattrs,
1270 			  char *buffer, size_t size)
1271 {
1272 	bool trusted = ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN);
1273 	struct simple_xattr *xattr;
1274 	struct rb_node *rbp;
1275 	ssize_t remaining_size = size;
1276 	int err = 0;
1277 
1278 	err = posix_acl_listxattr(inode, &buffer, &remaining_size);
1279 	if (err)
1280 		return err;
1281 
1282 	read_lock(&xattrs->lock);
1283 	for (rbp = rb_first(&xattrs->rb_root); rbp; rbp = rb_next(rbp)) {
1284 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1285 
1286 		/* skip "trusted." attributes for unprivileged callers */
1287 		if (!trusted && xattr_is_trusted(xattr->name))
1288 			continue;
1289 
1290 		err = xattr_list_one(&buffer, &remaining_size, xattr->name);
1291 		if (err)
1292 			break;
1293 	}
1294 	read_unlock(&xattrs->lock);
1295 
1296 	return err ? err : size - remaining_size;
1297 }
1298 
1299 /**
1300  * rbtree_simple_xattr_less - compare two xattr rbtree nodes
1301  * @new_node: new node
1302  * @node: current node
1303  *
1304  * Compare the xattr attached to @new_node with the xattr attached to @node.
1305  * Note that this function technically tolerates duplicate entries.
1306  *
1307  * Return: True if insertion point in the rbtree is found.
1308  */
1309 static bool rbtree_simple_xattr_less(struct rb_node *new_node,
1310 				     const struct rb_node *node)
1311 {
1312 	return rbtree_simple_xattr_node_cmp(new_node, node) < 0;
1313 }
1314 
1315 /**
1316  * simple_xattr_add - add xattr objects
1317  * @xattrs: the header of the xattr object
1318  * @new_xattr: the xattr object to add
1319  *
1320  * Add an xattr object to @xattrs. This assumes no replacement or removal
1321  * of matching xattrs is wanted. Should only be called during inode
1322  * initialization when a few distinct initial xattrs are supposed to be set.
1323  */
1324 void simple_xattr_add(struct simple_xattrs *xattrs,
1325 		      struct simple_xattr *new_xattr)
1326 {
1327 	write_lock(&xattrs->lock);
1328 	rb_add(&new_xattr->rb_node, &xattrs->rb_root, rbtree_simple_xattr_less);
1329 	write_unlock(&xattrs->lock);
1330 }
1331 
1332 /**
1333  * simple_xattrs_init - initialize new xattr header
1334  * @xattrs: header to initialize
1335  *
1336  * Initialize relevant fields of a an xattr header.
1337  */
1338 void simple_xattrs_init(struct simple_xattrs *xattrs)
1339 {
1340 	xattrs->rb_root = RB_ROOT;
1341 	rwlock_init(&xattrs->lock);
1342 }
1343 
1344 /**
1345  * simple_xattrs_free - free xattrs
1346  * @xattrs: xattr header whose xattrs to destroy
1347  *
1348  * Destroy all xattrs in @xattr. When this is called no one can hold a
1349  * reference to any of the xattrs anymore.
1350  */
1351 void simple_xattrs_free(struct simple_xattrs *xattrs)
1352 {
1353 	struct rb_node *rbp;
1354 
1355 	rbp = rb_first(&xattrs->rb_root);
1356 	while (rbp) {
1357 		struct simple_xattr *xattr;
1358 		struct rb_node *rbp_next;
1359 
1360 		rbp_next = rb_next(rbp);
1361 		xattr = rb_entry(rbp, struct simple_xattr, rb_node);
1362 		rb_erase(&xattr->rb_node, &xattrs->rb_root);
1363 		free_simple_xattr(xattr);
1364 		rbp = rbp_next;
1365 	}
1366 }
1367