xref: /openbmc/linux/fs/ceph/xattr.c (revision 910499e1)
1 // SPDX-License-Identifier: GPL-2.0
2 #include <linux/ceph/ceph_debug.h>
3 #include <linux/ceph/pagelist.h>
4 
5 #include "super.h"
6 #include "mds_client.h"
7 
8 #include <linux/ceph/decode.h>
9 
10 #include <linux/xattr.h>
11 #include <linux/security.h>
12 #include <linux/posix_acl_xattr.h>
13 #include <linux/slab.h>
14 
15 #define XATTR_CEPH_PREFIX "ceph."
16 #define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)
17 
18 static int __remove_xattr(struct ceph_inode_info *ci,
19 			  struct ceph_inode_xattr *xattr);
20 
21 static bool ceph_is_valid_xattr(const char *name)
22 {
23 	return !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) ||
24 	       !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
25 	       !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
26 	       !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
27 }
28 
29 /*
30  * These define virtual xattrs exposing the recursive directory
31  * statistics and layout metadata.
32  */
33 struct ceph_vxattr {
34 	char *name;
35 	size_t name_size;	/* strlen(name) + 1 (for '\0') */
36 	ssize_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
37 			       size_t size);
38 	bool (*exists_cb)(struct ceph_inode_info *ci);
39 	unsigned int flags;
40 };
41 
42 #define VXATTR_FLAG_READONLY		(1<<0)
43 #define VXATTR_FLAG_HIDDEN		(1<<1)
44 #define VXATTR_FLAG_RSTAT		(1<<2)
45 #define VXATTR_FLAG_DIRSTAT		(1<<3)
46 
47 /* layouts */
48 
49 static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
50 {
51 	struct ceph_file_layout *fl = &ci->i_layout;
52 	return (fl->stripe_unit > 0 || fl->stripe_count > 0 ||
53 		fl->object_size > 0 || fl->pool_id >= 0 ||
54 		rcu_dereference_raw(fl->pool_ns) != NULL);
55 }
56 
57 static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
58 				    size_t size)
59 {
60 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
61 	struct ceph_osd_client *osdc = &fsc->client->osdc;
62 	struct ceph_string *pool_ns;
63 	s64 pool = ci->i_layout.pool_id;
64 	const char *pool_name;
65 	const char *ns_field = " pool_namespace=";
66 	char buf[128];
67 	size_t len, total_len = 0;
68 	ssize_t ret;
69 
70 	pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
71 
72 	dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
73 	down_read(&osdc->lock);
74 	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
75 	if (pool_name) {
76 		len = snprintf(buf, sizeof(buf),
77 		"stripe_unit=%u stripe_count=%u object_size=%u pool=",
78 		ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
79 	        ci->i_layout.object_size);
80 		total_len = len + strlen(pool_name);
81 	} else {
82 		len = snprintf(buf, sizeof(buf),
83 		"stripe_unit=%u stripe_count=%u object_size=%u pool=%lld",
84 		ci->i_layout.stripe_unit, ci->i_layout.stripe_count,
85 		ci->i_layout.object_size, pool);
86 		total_len = len;
87 	}
88 
89 	if (pool_ns)
90 		total_len += strlen(ns_field) + pool_ns->len;
91 
92 	ret = total_len;
93 	if (size >= total_len) {
94 		memcpy(val, buf, len);
95 		ret = len;
96 		if (pool_name) {
97 			len = strlen(pool_name);
98 			memcpy(val + ret, pool_name, len);
99 			ret += len;
100 		}
101 		if (pool_ns) {
102 			len = strlen(ns_field);
103 			memcpy(val + ret, ns_field, len);
104 			ret += len;
105 			memcpy(val + ret, pool_ns->str, pool_ns->len);
106 			ret += pool_ns->len;
107 		}
108 	}
109 	up_read(&osdc->lock);
110 	ceph_put_string(pool_ns);
111 	return ret;
112 }
113 
114 /*
115  * The convention with strings in xattrs is that they should not be NULL
116  * terminated, since we're returning the length with them. snprintf always
117  * NULL terminates however, so call it on a temporary buffer and then memcpy
118  * the result into place.
119  */
120 static __printf(3, 4)
121 int ceph_fmt_xattr(char *val, size_t size, const char *fmt, ...)
122 {
123 	int ret;
124 	va_list args;
125 	char buf[96]; /* NB: reevaluate size if new vxattrs are added */
126 
127 	va_start(args, fmt);
128 	ret = vsnprintf(buf, size ? sizeof(buf) : 0, fmt, args);
129 	va_end(args);
130 
131 	/* Sanity check */
132 	if (size && ret + 1 > sizeof(buf)) {
133 		WARN_ONCE(true, "Returned length too big (%d)", ret);
134 		return -E2BIG;
135 	}
136 
137 	if (ret <= size)
138 		memcpy(val, buf, ret);
139 	return ret;
140 }
141 
142 static ssize_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
143 						char *val, size_t size)
144 {
145 	return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_unit);
146 }
147 
148 static ssize_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
149 						 char *val, size_t size)
150 {
151 	return ceph_fmt_xattr(val, size, "%u", ci->i_layout.stripe_count);
152 }
153 
154 static ssize_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
155 						char *val, size_t size)
156 {
157 	return ceph_fmt_xattr(val, size, "%u", ci->i_layout.object_size);
158 }
159 
160 static ssize_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
161 					 char *val, size_t size)
162 {
163 	ssize_t ret;
164 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
165 	struct ceph_osd_client *osdc = &fsc->client->osdc;
166 	s64 pool = ci->i_layout.pool_id;
167 	const char *pool_name;
168 
169 	down_read(&osdc->lock);
170 	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
171 	if (pool_name) {
172 		ret = strlen(pool_name);
173 		if (ret <= size)
174 			memcpy(val, pool_name, ret);
175 	} else {
176 		ret = ceph_fmt_xattr(val, size, "%lld", pool);
177 	}
178 	up_read(&osdc->lock);
179 	return ret;
180 }
181 
182 static ssize_t ceph_vxattrcb_layout_pool_namespace(struct ceph_inode_info *ci,
183 						   char *val, size_t size)
184 {
185 	ssize_t ret = 0;
186 	struct ceph_string *ns = ceph_try_get_string(ci->i_layout.pool_ns);
187 
188 	if (ns) {
189 		ret = ns->len;
190 		if (ret <= size)
191 			memcpy(val, ns->str, ret);
192 		ceph_put_string(ns);
193 	}
194 	return ret;
195 }
196 
197 /* directories */
198 
199 static ssize_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
200 					 size_t size)
201 {
202 	return ceph_fmt_xattr(val, size, "%lld", ci->i_files + ci->i_subdirs);
203 }
204 
205 static ssize_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
206 				       size_t size)
207 {
208 	return ceph_fmt_xattr(val, size, "%lld", ci->i_files);
209 }
210 
211 static ssize_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
212 					 size_t size)
213 {
214 	return ceph_fmt_xattr(val, size, "%lld", ci->i_subdirs);
215 }
216 
217 static ssize_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
218 					  size_t size)
219 {
220 	return ceph_fmt_xattr(val, size, "%lld",
221 				ci->i_rfiles + ci->i_rsubdirs);
222 }
223 
224 static ssize_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
225 					size_t size)
226 {
227 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rfiles);
228 }
229 
230 static ssize_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
231 					  size_t size)
232 {
233 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs);
234 }
235 
236 static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
237 					size_t size)
238 {
239 	return ceph_fmt_xattr(val, size, "%lld", ci->i_rbytes);
240 }
241 
242 static ssize_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
243 					size_t size)
244 {
245 	return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_rctime.tv_sec,
246 				ci->i_rctime.tv_nsec);
247 }
248 
249 /* dir pin */
250 static bool ceph_vxattrcb_dir_pin_exists(struct ceph_inode_info *ci)
251 {
252 	return ci->i_dir_pin != -ENODATA;
253 }
254 
255 static ssize_t ceph_vxattrcb_dir_pin(struct ceph_inode_info *ci, char *val,
256 				     size_t size)
257 {
258 	return ceph_fmt_xattr(val, size, "%d", (int)ci->i_dir_pin);
259 }
260 
261 /* quotas */
262 static bool ceph_vxattrcb_quota_exists(struct ceph_inode_info *ci)
263 {
264 	bool ret = false;
265 	spin_lock(&ci->i_ceph_lock);
266 	if ((ci->i_max_files || ci->i_max_bytes) &&
267 	    ci->i_vino.snap == CEPH_NOSNAP &&
268 	    ci->i_snap_realm &&
269 	    ci->i_snap_realm->ino == ci->i_vino.ino)
270 		ret = true;
271 	spin_unlock(&ci->i_ceph_lock);
272 	return ret;
273 }
274 
275 static ssize_t ceph_vxattrcb_quota(struct ceph_inode_info *ci, char *val,
276 				   size_t size)
277 {
278 	return ceph_fmt_xattr(val, size, "max_bytes=%llu max_files=%llu",
279 				ci->i_max_bytes, ci->i_max_files);
280 }
281 
282 static ssize_t ceph_vxattrcb_quota_max_bytes(struct ceph_inode_info *ci,
283 					     char *val, size_t size)
284 {
285 	return ceph_fmt_xattr(val, size, "%llu", ci->i_max_bytes);
286 }
287 
288 static ssize_t ceph_vxattrcb_quota_max_files(struct ceph_inode_info *ci,
289 					     char *val, size_t size)
290 {
291 	return ceph_fmt_xattr(val, size, "%llu", ci->i_max_files);
292 }
293 
294 /* snapshots */
295 static bool ceph_vxattrcb_snap_btime_exists(struct ceph_inode_info *ci)
296 {
297 	return (ci->i_snap_btime.tv_sec != 0 || ci->i_snap_btime.tv_nsec != 0);
298 }
299 
300 static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
301 					size_t size)
302 {
303 	return ceph_fmt_xattr(val, size, "%lld.%09ld", ci->i_snap_btime.tv_sec,
304 				ci->i_snap_btime.tv_nsec);
305 }
306 
307 static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
308 					  char *val, size_t size)
309 {
310 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
311 
312 	return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
313 }
314 
315 static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
316 				       char *val, size_t size)
317 {
318 	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
319 
320 	return ceph_fmt_xattr(val, size, "client%lld",
321 			      ceph_client_gid(fsc->client));
322 }
323 
324 static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
325 					size_t size)
326 {
327 	int issued;
328 
329 	spin_lock(&ci->i_ceph_lock);
330 	issued = __ceph_caps_issued(ci, NULL);
331 	spin_unlock(&ci->i_ceph_lock);
332 
333 	return ceph_fmt_xattr(val, size, "%s/0x%x",
334 			      ceph_cap_string(issued), issued);
335 }
336 
337 #define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
338 #define CEPH_XATTR_NAME2(_type, _name, _name2)	\
339 	XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
340 
341 #define XATTR_NAME_CEPH(_type, _name, _flags)				\
342 	{								\
343 		.name = CEPH_XATTR_NAME(_type, _name),			\
344 		.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
345 		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
346 		.exists_cb = NULL,					\
347 		.flags = (VXATTR_FLAG_READONLY | _flags),		\
348 	}
349 #define XATTR_RSTAT_FIELD(_type, _name)			\
350 	XATTR_NAME_CEPH(_type, _name, VXATTR_FLAG_RSTAT)
351 #define XATTR_LAYOUT_FIELD(_type, _name, _field)			\
352 	{								\
353 		.name = CEPH_XATTR_NAME2(_type, _name, _field),	\
354 		.name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
355 		.getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
356 		.exists_cb = ceph_vxattrcb_layout_exists,	\
357 		.flags = VXATTR_FLAG_HIDDEN,			\
358 	}
359 #define XATTR_QUOTA_FIELD(_type, _name)					\
360 	{								\
361 		.name = CEPH_XATTR_NAME(_type, _name),			\
362 		.name_size = sizeof(CEPH_XATTR_NAME(_type, _name)),	\
363 		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name,	\
364 		.exists_cb = ceph_vxattrcb_quota_exists,		\
365 		.flags = VXATTR_FLAG_HIDDEN,				\
366 	}
367 
368 static struct ceph_vxattr ceph_dir_vxattrs[] = {
369 	{
370 		.name = "ceph.dir.layout",
371 		.name_size = sizeof("ceph.dir.layout"),
372 		.getxattr_cb = ceph_vxattrcb_layout,
373 		.exists_cb = ceph_vxattrcb_layout_exists,
374 		.flags = VXATTR_FLAG_HIDDEN,
375 	},
376 	XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
377 	XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
378 	XATTR_LAYOUT_FIELD(dir, layout, object_size),
379 	XATTR_LAYOUT_FIELD(dir, layout, pool),
380 	XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
381 	XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
382 	XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
383 	XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
384 	XATTR_RSTAT_FIELD(dir, rentries),
385 	XATTR_RSTAT_FIELD(dir, rfiles),
386 	XATTR_RSTAT_FIELD(dir, rsubdirs),
387 	XATTR_RSTAT_FIELD(dir, rbytes),
388 	XATTR_RSTAT_FIELD(dir, rctime),
389 	{
390 		.name = "ceph.dir.pin",
391 		.name_size = sizeof("ceph.dir.pin"),
392 		.getxattr_cb = ceph_vxattrcb_dir_pin,
393 		.exists_cb = ceph_vxattrcb_dir_pin_exists,
394 		.flags = VXATTR_FLAG_HIDDEN,
395 	},
396 	{
397 		.name = "ceph.quota",
398 		.name_size = sizeof("ceph.quota"),
399 		.getxattr_cb = ceph_vxattrcb_quota,
400 		.exists_cb = ceph_vxattrcb_quota_exists,
401 		.flags = VXATTR_FLAG_HIDDEN,
402 	},
403 	XATTR_QUOTA_FIELD(quota, max_bytes),
404 	XATTR_QUOTA_FIELD(quota, max_files),
405 	{
406 		.name = "ceph.snap.btime",
407 		.name_size = sizeof("ceph.snap.btime"),
408 		.getxattr_cb = ceph_vxattrcb_snap_btime,
409 		.exists_cb = ceph_vxattrcb_snap_btime_exists,
410 		.flags = VXATTR_FLAG_READONLY,
411 	},
412 	{
413 		.name = "ceph.caps",
414 		.name_size = sizeof("ceph.caps"),
415 		.getxattr_cb = ceph_vxattrcb_caps,
416 		.exists_cb = NULL,
417 		.flags = VXATTR_FLAG_HIDDEN,
418 	},
419 	{ .name = NULL, 0 }	/* Required table terminator */
420 };
421 
422 /* files */
423 
424 static struct ceph_vxattr ceph_file_vxattrs[] = {
425 	{
426 		.name = "ceph.file.layout",
427 		.name_size = sizeof("ceph.file.layout"),
428 		.getxattr_cb = ceph_vxattrcb_layout,
429 		.exists_cb = ceph_vxattrcb_layout_exists,
430 		.flags = VXATTR_FLAG_HIDDEN,
431 	},
432 	XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
433 	XATTR_LAYOUT_FIELD(file, layout, stripe_count),
434 	XATTR_LAYOUT_FIELD(file, layout, object_size),
435 	XATTR_LAYOUT_FIELD(file, layout, pool),
436 	XATTR_LAYOUT_FIELD(file, layout, pool_namespace),
437 	{
438 		.name = "ceph.snap.btime",
439 		.name_size = sizeof("ceph.snap.btime"),
440 		.getxattr_cb = ceph_vxattrcb_snap_btime,
441 		.exists_cb = ceph_vxattrcb_snap_btime_exists,
442 		.flags = VXATTR_FLAG_READONLY,
443 	},
444 	{
445 		.name = "ceph.caps",
446 		.name_size = sizeof("ceph.caps"),
447 		.getxattr_cb = ceph_vxattrcb_caps,
448 		.exists_cb = NULL,
449 		.flags = VXATTR_FLAG_HIDDEN,
450 	},
451 	{ .name = NULL, 0 }	/* Required table terminator */
452 };
453 
454 static struct ceph_vxattr ceph_common_vxattrs[] = {
455 	{
456 		.name = "ceph.cluster_fsid",
457 		.name_size = sizeof("ceph.cluster_fsid"),
458 		.getxattr_cb = ceph_vxattrcb_cluster_fsid,
459 		.exists_cb = NULL,
460 		.flags = VXATTR_FLAG_READONLY,
461 	},
462 	{
463 		.name = "ceph.client_id",
464 		.name_size = sizeof("ceph.client_id"),
465 		.getxattr_cb = ceph_vxattrcb_client_id,
466 		.exists_cb = NULL,
467 		.flags = VXATTR_FLAG_READONLY,
468 	},
469 	{ .name = NULL, 0 }	/* Required table terminator */
470 };
471 
472 static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
473 {
474 	if (S_ISDIR(inode->i_mode))
475 		return ceph_dir_vxattrs;
476 	else if (S_ISREG(inode->i_mode))
477 		return ceph_file_vxattrs;
478 	return NULL;
479 }
480 
481 static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
482 						const char *name)
483 {
484 	struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
485 
486 	if (vxattr) {
487 		while (vxattr->name) {
488 			if (!strcmp(vxattr->name, name))
489 				return vxattr;
490 			vxattr++;
491 		}
492 	}
493 
494 	vxattr = ceph_common_vxattrs;
495 	while (vxattr->name) {
496 		if (!strcmp(vxattr->name, name))
497 			return vxattr;
498 		vxattr++;
499 	}
500 
501 	return NULL;
502 }
503 
504 static int __set_xattr(struct ceph_inode_info *ci,
505 			   const char *name, int name_len,
506 			   const char *val, int val_len,
507 			   int flags, int update_xattr,
508 			   struct ceph_inode_xattr **newxattr)
509 {
510 	struct rb_node **p;
511 	struct rb_node *parent = NULL;
512 	struct ceph_inode_xattr *xattr = NULL;
513 	int c;
514 	int new = 0;
515 
516 	p = &ci->i_xattrs.index.rb_node;
517 	while (*p) {
518 		parent = *p;
519 		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
520 		c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
521 		if (c < 0)
522 			p = &(*p)->rb_left;
523 		else if (c > 0)
524 			p = &(*p)->rb_right;
525 		else {
526 			if (name_len == xattr->name_len)
527 				break;
528 			else if (name_len < xattr->name_len)
529 				p = &(*p)->rb_left;
530 			else
531 				p = &(*p)->rb_right;
532 		}
533 		xattr = NULL;
534 	}
535 
536 	if (update_xattr) {
537 		int err = 0;
538 
539 		if (xattr && (flags & XATTR_CREATE))
540 			err = -EEXIST;
541 		else if (!xattr && (flags & XATTR_REPLACE))
542 			err = -ENODATA;
543 		if (err) {
544 			kfree(name);
545 			kfree(val);
546 			kfree(*newxattr);
547 			return err;
548 		}
549 		if (update_xattr < 0) {
550 			if (xattr)
551 				__remove_xattr(ci, xattr);
552 			kfree(name);
553 			kfree(*newxattr);
554 			return 0;
555 		}
556 	}
557 
558 	if (!xattr) {
559 		new = 1;
560 		xattr = *newxattr;
561 		xattr->name = name;
562 		xattr->name_len = name_len;
563 		xattr->should_free_name = update_xattr;
564 
565 		ci->i_xattrs.count++;
566 		dout("__set_xattr count=%d\n", ci->i_xattrs.count);
567 	} else {
568 		kfree(*newxattr);
569 		*newxattr = NULL;
570 		if (xattr->should_free_val)
571 			kfree(xattr->val);
572 
573 		if (update_xattr) {
574 			kfree(name);
575 			name = xattr->name;
576 		}
577 		ci->i_xattrs.names_size -= xattr->name_len;
578 		ci->i_xattrs.vals_size -= xattr->val_len;
579 	}
580 	ci->i_xattrs.names_size += name_len;
581 	ci->i_xattrs.vals_size += val_len;
582 	if (val)
583 		xattr->val = val;
584 	else
585 		xattr->val = "";
586 
587 	xattr->val_len = val_len;
588 	xattr->dirty = update_xattr;
589 	xattr->should_free_val = (val && update_xattr);
590 
591 	if (new) {
592 		rb_link_node(&xattr->node, parent, p);
593 		rb_insert_color(&xattr->node, &ci->i_xattrs.index);
594 		dout("__set_xattr_val p=%p\n", p);
595 	}
596 
597 	dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n",
598 	     ceph_vinop(&ci->vfs_inode), xattr, name_len, name, val_len, val);
599 
600 	return 0;
601 }
602 
603 static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
604 			   const char *name)
605 {
606 	struct rb_node **p;
607 	struct rb_node *parent = NULL;
608 	struct ceph_inode_xattr *xattr = NULL;
609 	int name_len = strlen(name);
610 	int c;
611 
612 	p = &ci->i_xattrs.index.rb_node;
613 	while (*p) {
614 		parent = *p;
615 		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
616 		c = strncmp(name, xattr->name, xattr->name_len);
617 		if (c == 0 && name_len > xattr->name_len)
618 			c = 1;
619 		if (c < 0)
620 			p = &(*p)->rb_left;
621 		else if (c > 0)
622 			p = &(*p)->rb_right;
623 		else {
624 			dout("__get_xattr %s: found %.*s\n", name,
625 			     xattr->val_len, xattr->val);
626 			return xattr;
627 		}
628 	}
629 
630 	dout("__get_xattr %s: not found\n", name);
631 
632 	return NULL;
633 }
634 
635 static void __free_xattr(struct ceph_inode_xattr *xattr)
636 {
637 	BUG_ON(!xattr);
638 
639 	if (xattr->should_free_name)
640 		kfree(xattr->name);
641 	if (xattr->should_free_val)
642 		kfree(xattr->val);
643 
644 	kfree(xattr);
645 }
646 
647 static int __remove_xattr(struct ceph_inode_info *ci,
648 			  struct ceph_inode_xattr *xattr)
649 {
650 	if (!xattr)
651 		return -ENODATA;
652 
653 	rb_erase(&xattr->node, &ci->i_xattrs.index);
654 
655 	if (xattr->should_free_name)
656 		kfree(xattr->name);
657 	if (xattr->should_free_val)
658 		kfree(xattr->val);
659 
660 	ci->i_xattrs.names_size -= xattr->name_len;
661 	ci->i_xattrs.vals_size -= xattr->val_len;
662 	ci->i_xattrs.count--;
663 	kfree(xattr);
664 
665 	return 0;
666 }
667 
668 static char *__copy_xattr_names(struct ceph_inode_info *ci,
669 				char *dest)
670 {
671 	struct rb_node *p;
672 	struct ceph_inode_xattr *xattr = NULL;
673 
674 	p = rb_first(&ci->i_xattrs.index);
675 	dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);
676 
677 	while (p) {
678 		xattr = rb_entry(p, struct ceph_inode_xattr, node);
679 		memcpy(dest, xattr->name, xattr->name_len);
680 		dest[xattr->name_len] = '\0';
681 
682 		dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
683 		     xattr->name_len, ci->i_xattrs.names_size);
684 
685 		dest += xattr->name_len + 1;
686 		p = rb_next(p);
687 	}
688 
689 	return dest;
690 }
691 
692 void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
693 {
694 	struct rb_node *p, *tmp;
695 	struct ceph_inode_xattr *xattr = NULL;
696 
697 	p = rb_first(&ci->i_xattrs.index);
698 
699 	dout("__ceph_destroy_xattrs p=%p\n", p);
700 
701 	while (p) {
702 		xattr = rb_entry(p, struct ceph_inode_xattr, node);
703 		tmp = p;
704 		p = rb_next(tmp);
705 		dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
706 		     xattr->name_len, xattr->name);
707 		rb_erase(tmp, &ci->i_xattrs.index);
708 
709 		__free_xattr(xattr);
710 	}
711 
712 	ci->i_xattrs.names_size = 0;
713 	ci->i_xattrs.vals_size = 0;
714 	ci->i_xattrs.index_version = 0;
715 	ci->i_xattrs.count = 0;
716 	ci->i_xattrs.index = RB_ROOT;
717 }
718 
719 static int __build_xattrs(struct inode *inode)
720 	__releases(ci->i_ceph_lock)
721 	__acquires(ci->i_ceph_lock)
722 {
723 	u32 namelen;
724 	u32 numattr = 0;
725 	void *p, *end;
726 	u32 len;
727 	const char *name, *val;
728 	struct ceph_inode_info *ci = ceph_inode(inode);
729 	u64 xattr_version;
730 	struct ceph_inode_xattr **xattrs = NULL;
731 	int err = 0;
732 	int i;
733 
734 	dout("__build_xattrs() len=%d\n",
735 	     ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);
736 
737 	if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
738 		return 0; /* already built */
739 
740 	__ceph_destroy_xattrs(ci);
741 
742 start:
743 	/* updated internal xattr rb tree */
744 	if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
745 		p = ci->i_xattrs.blob->vec.iov_base;
746 		end = p + ci->i_xattrs.blob->vec.iov_len;
747 		ceph_decode_32_safe(&p, end, numattr, bad);
748 		xattr_version = ci->i_xattrs.version;
749 		spin_unlock(&ci->i_ceph_lock);
750 
751 		xattrs = kcalloc(numattr, sizeof(struct ceph_inode_xattr *),
752 				 GFP_NOFS);
753 		err = -ENOMEM;
754 		if (!xattrs)
755 			goto bad_lock;
756 
757 		for (i = 0; i < numattr; i++) {
758 			xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
759 					    GFP_NOFS);
760 			if (!xattrs[i])
761 				goto bad_lock;
762 		}
763 
764 		spin_lock(&ci->i_ceph_lock);
765 		if (ci->i_xattrs.version != xattr_version) {
766 			/* lost a race, retry */
767 			for (i = 0; i < numattr; i++)
768 				kfree(xattrs[i]);
769 			kfree(xattrs);
770 			xattrs = NULL;
771 			goto start;
772 		}
773 		err = -EIO;
774 		while (numattr--) {
775 			ceph_decode_32_safe(&p, end, len, bad);
776 			namelen = len;
777 			name = p;
778 			p += len;
779 			ceph_decode_32_safe(&p, end, len, bad);
780 			val = p;
781 			p += len;
782 
783 			err = __set_xattr(ci, name, namelen, val, len,
784 					  0, 0, &xattrs[numattr]);
785 
786 			if (err < 0)
787 				goto bad;
788 		}
789 		kfree(xattrs);
790 	}
791 	ci->i_xattrs.index_version = ci->i_xattrs.version;
792 	ci->i_xattrs.dirty = false;
793 
794 	return err;
795 bad_lock:
796 	spin_lock(&ci->i_ceph_lock);
797 bad:
798 	if (xattrs) {
799 		for (i = 0; i < numattr; i++)
800 			kfree(xattrs[i]);
801 		kfree(xattrs);
802 	}
803 	ci->i_xattrs.names_size = 0;
804 	return err;
805 }
806 
807 static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
808 				    int val_size)
809 {
810 	/*
811 	 * 4 bytes for the length, and additional 4 bytes per each xattr name,
812 	 * 4 bytes per each value
813 	 */
814 	int size = 4 + ci->i_xattrs.count*(4 + 4) +
815 			     ci->i_xattrs.names_size +
816 			     ci->i_xattrs.vals_size;
817 	dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
818 	     ci->i_xattrs.count, ci->i_xattrs.names_size,
819 	     ci->i_xattrs.vals_size);
820 
821 	if (name_size)
822 		size += 4 + 4 + name_size + val_size;
823 
824 	return size;
825 }
826 
827 /*
828  * If there are dirty xattrs, reencode xattrs into the prealloc_blob
829  * and swap into place.  It returns the old i_xattrs.blob (or NULL) so
830  * that it can be freed by the caller as the i_ceph_lock is likely to be
831  * held.
832  */
833 struct ceph_buffer *__ceph_build_xattrs_blob(struct ceph_inode_info *ci)
834 {
835 	struct rb_node *p;
836 	struct ceph_inode_xattr *xattr = NULL;
837 	struct ceph_buffer *old_blob = NULL;
838 	void *dest;
839 
840 	dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
841 	if (ci->i_xattrs.dirty) {
842 		int need = __get_required_blob_size(ci, 0, 0);
843 
844 		BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);
845 
846 		p = rb_first(&ci->i_xattrs.index);
847 		dest = ci->i_xattrs.prealloc_blob->vec.iov_base;
848 
849 		ceph_encode_32(&dest, ci->i_xattrs.count);
850 		while (p) {
851 			xattr = rb_entry(p, struct ceph_inode_xattr, node);
852 
853 			ceph_encode_32(&dest, xattr->name_len);
854 			memcpy(dest, xattr->name, xattr->name_len);
855 			dest += xattr->name_len;
856 			ceph_encode_32(&dest, xattr->val_len);
857 			memcpy(dest, xattr->val, xattr->val_len);
858 			dest += xattr->val_len;
859 
860 			p = rb_next(p);
861 		}
862 
863 		/* adjust buffer len; it may be larger than we need */
864 		ci->i_xattrs.prealloc_blob->vec.iov_len =
865 			dest - ci->i_xattrs.prealloc_blob->vec.iov_base;
866 
867 		if (ci->i_xattrs.blob)
868 			old_blob = ci->i_xattrs.blob;
869 		ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
870 		ci->i_xattrs.prealloc_blob = NULL;
871 		ci->i_xattrs.dirty = false;
872 		ci->i_xattrs.version++;
873 	}
874 
875 	return old_blob;
876 }
877 
878 static inline int __get_request_mask(struct inode *in) {
879 	struct ceph_mds_request *req = current->journal_info;
880 	int mask = 0;
881 	if (req && req->r_target_inode == in) {
882 		if (req->r_op == CEPH_MDS_OP_LOOKUP ||
883 		    req->r_op == CEPH_MDS_OP_LOOKUPINO ||
884 		    req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
885 		    req->r_op == CEPH_MDS_OP_GETATTR) {
886 			mask = le32_to_cpu(req->r_args.getattr.mask);
887 		} else if (req->r_op == CEPH_MDS_OP_OPEN ||
888 			   req->r_op == CEPH_MDS_OP_CREATE) {
889 			mask = le32_to_cpu(req->r_args.open.mask);
890 		}
891 	}
892 	return mask;
893 }
894 
895 ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
896 		      size_t size)
897 {
898 	struct ceph_inode_info *ci = ceph_inode(inode);
899 	struct ceph_inode_xattr *xattr;
900 	struct ceph_vxattr *vxattr = NULL;
901 	int req_mask;
902 	ssize_t err;
903 
904 	/* let's see if a virtual xattr was requested */
905 	vxattr = ceph_match_vxattr(inode, name);
906 	if (vxattr) {
907 		int mask = 0;
908 		if (vxattr->flags & VXATTR_FLAG_RSTAT)
909 			mask |= CEPH_STAT_RSTAT;
910 		if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
911 			mask |= CEPH_CAP_FILE_SHARED;
912 		err = ceph_do_getattr(inode, mask, true);
913 		if (err)
914 			return err;
915 		err = -ENODATA;
916 		if (!(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
917 			err = vxattr->getxattr_cb(ci, value, size);
918 			if (size && size < err)
919 				err = -ERANGE;
920 		}
921 		return err;
922 	}
923 
924 	req_mask = __get_request_mask(inode);
925 
926 	spin_lock(&ci->i_ceph_lock);
927 	dout("getxattr %p name '%s' ver=%lld index_ver=%lld\n", inode, name,
928 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
929 
930 	if (ci->i_xattrs.version == 0 ||
931 	    !((req_mask & CEPH_CAP_XATTR_SHARED) ||
932 	      __ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1))) {
933 		spin_unlock(&ci->i_ceph_lock);
934 
935 		/* security module gets xattr while filling trace */
936 		if (current->journal_info) {
937 			pr_warn_ratelimited("sync getxattr %p "
938 					    "during filling trace\n", inode);
939 			return -EBUSY;
940 		}
941 
942 		/* get xattrs from mds (if we don't already have them) */
943 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
944 		if (err)
945 			return err;
946 		spin_lock(&ci->i_ceph_lock);
947 	}
948 
949 	err = __build_xattrs(inode);
950 	if (err < 0)
951 		goto out;
952 
953 	err = -ENODATA;  /* == ENOATTR */
954 	xattr = __get_xattr(ci, name);
955 	if (!xattr)
956 		goto out;
957 
958 	err = -ERANGE;
959 	if (size && size < xattr->val_len)
960 		goto out;
961 
962 	err = xattr->val_len;
963 	if (size == 0)
964 		goto out;
965 
966 	memcpy(value, xattr->val, xattr->val_len);
967 
968 	if (current->journal_info &&
969 	    !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
970 	    security_ismaclabel(name + XATTR_SECURITY_PREFIX_LEN))
971 		ci->i_ceph_flags |= CEPH_I_SEC_INITED;
972 out:
973 	spin_unlock(&ci->i_ceph_lock);
974 	return err;
975 }
976 
977 ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
978 {
979 	struct inode *inode = d_inode(dentry);
980 	struct ceph_inode_info *ci = ceph_inode(inode);
981 	bool len_only = (size == 0);
982 	u32 namelen;
983 	int err;
984 
985 	spin_lock(&ci->i_ceph_lock);
986 	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
987 	     ci->i_xattrs.version, ci->i_xattrs.index_version);
988 
989 	if (ci->i_xattrs.version == 0 ||
990 	    !__ceph_caps_issued_mask_metric(ci, CEPH_CAP_XATTR_SHARED, 1)) {
991 		spin_unlock(&ci->i_ceph_lock);
992 		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
993 		if (err)
994 			return err;
995 		spin_lock(&ci->i_ceph_lock);
996 	}
997 
998 	err = __build_xattrs(inode);
999 	if (err < 0)
1000 		goto out;
1001 
1002 	/* add 1 byte for each xattr due to the null termination */
1003 	namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
1004 	if (!len_only) {
1005 		if (namelen > size) {
1006 			err = -ERANGE;
1007 			goto out;
1008 		}
1009 		names = __copy_xattr_names(ci, names);
1010 		size -= namelen;
1011 	}
1012 	err = namelen;
1013 out:
1014 	spin_unlock(&ci->i_ceph_lock);
1015 	return err;
1016 }
1017 
1018 static int ceph_sync_setxattr(struct inode *inode, const char *name,
1019 			      const char *value, size_t size, int flags)
1020 {
1021 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
1022 	struct ceph_inode_info *ci = ceph_inode(inode);
1023 	struct ceph_mds_request *req;
1024 	struct ceph_mds_client *mdsc = fsc->mdsc;
1025 	struct ceph_osd_client *osdc = &fsc->client->osdc;
1026 	struct ceph_pagelist *pagelist = NULL;
1027 	int op = CEPH_MDS_OP_SETXATTR;
1028 	int err;
1029 
1030 	if (size > 0) {
1031 		/* copy value into pagelist */
1032 		pagelist = ceph_pagelist_alloc(GFP_NOFS);
1033 		if (!pagelist)
1034 			return -ENOMEM;
1035 
1036 		err = ceph_pagelist_append(pagelist, value, size);
1037 		if (err)
1038 			goto out;
1039 	} else if (!value) {
1040 		if (flags & CEPH_XATTR_REPLACE)
1041 			op = CEPH_MDS_OP_RMXATTR;
1042 		else
1043 			flags |= CEPH_XATTR_REMOVE;
1044 	}
1045 
1046 	dout("setxattr value=%.*s\n", (int)size, value);
1047 
1048 	/* do request */
1049 	req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS);
1050 	if (IS_ERR(req)) {
1051 		err = PTR_ERR(req);
1052 		goto out;
1053 	}
1054 
1055 	req->r_path2 = kstrdup(name, GFP_NOFS);
1056 	if (!req->r_path2) {
1057 		ceph_mdsc_put_request(req);
1058 		err = -ENOMEM;
1059 		goto out;
1060 	}
1061 
1062 	if (op == CEPH_MDS_OP_SETXATTR) {
1063 		req->r_args.setxattr.flags = cpu_to_le32(flags);
1064 		req->r_args.setxattr.osdmap_epoch =
1065 			cpu_to_le32(osdc->osdmap->epoch);
1066 		req->r_pagelist = pagelist;
1067 		pagelist = NULL;
1068 	}
1069 
1070 	req->r_inode = inode;
1071 	ihold(inode);
1072 	req->r_num_caps = 1;
1073 	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
1074 
1075 	dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
1076 	err = ceph_mdsc_do_request(mdsc, NULL, req);
1077 	ceph_mdsc_put_request(req);
1078 	dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);
1079 
1080 out:
1081 	if (pagelist)
1082 		ceph_pagelist_release(pagelist);
1083 	return err;
1084 }
1085 
1086 int __ceph_setxattr(struct inode *inode, const char *name,
1087 			const void *value, size_t size, int flags)
1088 {
1089 	struct ceph_vxattr *vxattr;
1090 	struct ceph_inode_info *ci = ceph_inode(inode);
1091 	struct ceph_mds_client *mdsc = ceph_sb_to_client(inode->i_sb)->mdsc;
1092 	struct ceph_cap_flush *prealloc_cf = NULL;
1093 	struct ceph_buffer *old_blob = NULL;
1094 	int issued;
1095 	int err;
1096 	int dirty = 0;
1097 	int name_len = strlen(name);
1098 	int val_len = size;
1099 	char *newname = NULL;
1100 	char *newval = NULL;
1101 	struct ceph_inode_xattr *xattr = NULL;
1102 	int required_blob_size;
1103 	bool check_realm = false;
1104 	bool lock_snap_rwsem = false;
1105 
1106 	if (ceph_snap(inode) != CEPH_NOSNAP)
1107 		return -EROFS;
1108 
1109 	vxattr = ceph_match_vxattr(inode, name);
1110 	if (vxattr) {
1111 		if (vxattr->flags & VXATTR_FLAG_READONLY)
1112 			return -EOPNOTSUPP;
1113 		if (value && !strncmp(vxattr->name, "ceph.quota", 10))
1114 			check_realm = true;
1115 	}
1116 
1117 	/* pass any unhandled ceph.* xattrs through to the MDS */
1118 	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
1119 		goto do_sync_unlocked;
1120 
1121 	/* preallocate memory for xattr name, value, index node */
1122 	err = -ENOMEM;
1123 	newname = kmemdup(name, name_len + 1, GFP_NOFS);
1124 	if (!newname)
1125 		goto out;
1126 
1127 	if (val_len) {
1128 		newval = kmemdup(value, val_len, GFP_NOFS);
1129 		if (!newval)
1130 			goto out;
1131 	}
1132 
1133 	xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
1134 	if (!xattr)
1135 		goto out;
1136 
1137 	prealloc_cf = ceph_alloc_cap_flush();
1138 	if (!prealloc_cf)
1139 		goto out;
1140 
1141 	spin_lock(&ci->i_ceph_lock);
1142 retry:
1143 	issued = __ceph_caps_issued(ci, NULL);
1144 	if (ci->i_xattrs.version == 0 || !(issued & CEPH_CAP_XATTR_EXCL))
1145 		goto do_sync;
1146 
1147 	if (!lock_snap_rwsem && !ci->i_head_snapc) {
1148 		lock_snap_rwsem = true;
1149 		if (!down_read_trylock(&mdsc->snap_rwsem)) {
1150 			spin_unlock(&ci->i_ceph_lock);
1151 			down_read(&mdsc->snap_rwsem);
1152 			spin_lock(&ci->i_ceph_lock);
1153 			goto retry;
1154 		}
1155 	}
1156 
1157 	dout("setxattr %p name '%s' issued %s\n", inode, name,
1158 	     ceph_cap_string(issued));
1159 	__build_xattrs(inode);
1160 
1161 	required_blob_size = __get_required_blob_size(ci, name_len, val_len);
1162 
1163 	if (!ci->i_xattrs.prealloc_blob ||
1164 	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
1165 		struct ceph_buffer *blob;
1166 
1167 		spin_unlock(&ci->i_ceph_lock);
1168 		ceph_buffer_put(old_blob); /* Shouldn't be required */
1169 		dout(" pre-allocating new blob size=%d\n", required_blob_size);
1170 		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
1171 		if (!blob)
1172 			goto do_sync_unlocked;
1173 		spin_lock(&ci->i_ceph_lock);
1174 		/* prealloc_blob can't be released while holding i_ceph_lock */
1175 		if (ci->i_xattrs.prealloc_blob)
1176 			old_blob = ci->i_xattrs.prealloc_blob;
1177 		ci->i_xattrs.prealloc_blob = blob;
1178 		goto retry;
1179 	}
1180 
1181 	err = __set_xattr(ci, newname, name_len, newval, val_len,
1182 			  flags, value ? 1 : -1, &xattr);
1183 
1184 	if (!err) {
1185 		dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
1186 					       &prealloc_cf);
1187 		ci->i_xattrs.dirty = true;
1188 		inode->i_ctime = current_time(inode);
1189 	}
1190 
1191 	spin_unlock(&ci->i_ceph_lock);
1192 	ceph_buffer_put(old_blob);
1193 	if (lock_snap_rwsem)
1194 		up_read(&mdsc->snap_rwsem);
1195 	if (dirty)
1196 		__mark_inode_dirty(inode, dirty);
1197 	ceph_free_cap_flush(prealloc_cf);
1198 	return err;
1199 
1200 do_sync:
1201 	spin_unlock(&ci->i_ceph_lock);
1202 do_sync_unlocked:
1203 	if (lock_snap_rwsem)
1204 		up_read(&mdsc->snap_rwsem);
1205 
1206 	/* security module set xattr while filling trace */
1207 	if (current->journal_info) {
1208 		pr_warn_ratelimited("sync setxattr %p "
1209 				    "during filling trace\n", inode);
1210 		err = -EBUSY;
1211 	} else {
1212 		err = ceph_sync_setxattr(inode, name, value, size, flags);
1213 		if (err >= 0 && check_realm) {
1214 			/* check if snaprealm was created for quota inode */
1215 			spin_lock(&ci->i_ceph_lock);
1216 			if ((ci->i_max_files || ci->i_max_bytes) &&
1217 			    !(ci->i_snap_realm &&
1218 			      ci->i_snap_realm->ino == ci->i_vino.ino))
1219 				err = -EOPNOTSUPP;
1220 			spin_unlock(&ci->i_ceph_lock);
1221 		}
1222 	}
1223 out:
1224 	ceph_free_cap_flush(prealloc_cf);
1225 	kfree(newname);
1226 	kfree(newval);
1227 	kfree(xattr);
1228 	return err;
1229 }
1230 
1231 static int ceph_get_xattr_handler(const struct xattr_handler *handler,
1232 				  struct dentry *dentry, struct inode *inode,
1233 				  const char *name, void *value, size_t size)
1234 {
1235 	if (!ceph_is_valid_xattr(name))
1236 		return -EOPNOTSUPP;
1237 	return __ceph_getxattr(inode, name, value, size);
1238 }
1239 
1240 static int ceph_set_xattr_handler(const struct xattr_handler *handler,
1241 				  struct user_namespace *mnt_userns,
1242 				  struct dentry *unused, struct inode *inode,
1243 				  const char *name, const void *value,
1244 				  size_t size, int flags)
1245 {
1246 	if (!ceph_is_valid_xattr(name))
1247 		return -EOPNOTSUPP;
1248 	return __ceph_setxattr(inode, name, value, size, flags);
1249 }
1250 
1251 static const struct xattr_handler ceph_other_xattr_handler = {
1252 	.prefix = "",  /* match any name => handlers called with full name */
1253 	.get = ceph_get_xattr_handler,
1254 	.set = ceph_set_xattr_handler,
1255 };
1256 
1257 #ifdef CONFIG_SECURITY
1258 bool ceph_security_xattr_wanted(struct inode *in)
1259 {
1260 	return in->i_security != NULL;
1261 }
1262 
1263 bool ceph_security_xattr_deadlock(struct inode *in)
1264 {
1265 	struct ceph_inode_info *ci;
1266 	bool ret;
1267 	if (!in->i_security)
1268 		return false;
1269 	ci = ceph_inode(in);
1270 	spin_lock(&ci->i_ceph_lock);
1271 	ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
1272 	      !(ci->i_xattrs.version > 0 &&
1273 		__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
1274 	spin_unlock(&ci->i_ceph_lock);
1275 	return ret;
1276 }
1277 
1278 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
1279 int ceph_security_init_secctx(struct dentry *dentry, umode_t mode,
1280 			   struct ceph_acl_sec_ctx *as_ctx)
1281 {
1282 	struct ceph_pagelist *pagelist = as_ctx->pagelist;
1283 	const char *name;
1284 	size_t name_len;
1285 	int err;
1286 
1287 	err = security_dentry_init_security(dentry, mode, &dentry->d_name,
1288 					    &as_ctx->sec_ctx,
1289 					    &as_ctx->sec_ctxlen);
1290 	if (err < 0) {
1291 		WARN_ON_ONCE(err != -EOPNOTSUPP);
1292 		err = 0; /* do nothing */
1293 		goto out;
1294 	}
1295 
1296 	err = -ENOMEM;
1297 	if (!pagelist) {
1298 		pagelist = ceph_pagelist_alloc(GFP_KERNEL);
1299 		if (!pagelist)
1300 			goto out;
1301 		err = ceph_pagelist_reserve(pagelist, PAGE_SIZE);
1302 		if (err)
1303 			goto out;
1304 		ceph_pagelist_encode_32(pagelist, 1);
1305 	}
1306 
1307 	/*
1308 	 * FIXME: Make security_dentry_init_security() generic. Currently
1309 	 * It only supports single security module and only selinux has
1310 	 * dentry_init_security hook.
1311 	 */
1312 	name = XATTR_NAME_SELINUX;
1313 	name_len = strlen(name);
1314 	err = ceph_pagelist_reserve(pagelist,
1315 				    4 * 2 + name_len + as_ctx->sec_ctxlen);
1316 	if (err)
1317 		goto out;
1318 
1319 	if (as_ctx->pagelist) {
1320 		/* update count of KV pairs */
1321 		BUG_ON(pagelist->length <= sizeof(__le32));
1322 		if (list_is_singular(&pagelist->head)) {
1323 			le32_add_cpu((__le32*)pagelist->mapped_tail, 1);
1324 		} else {
1325 			struct page *page = list_first_entry(&pagelist->head,
1326 							     struct page, lru);
1327 			void *addr = kmap_atomic(page);
1328 			le32_add_cpu((__le32*)addr, 1);
1329 			kunmap_atomic(addr);
1330 		}
1331 	} else {
1332 		as_ctx->pagelist = pagelist;
1333 	}
1334 
1335 	ceph_pagelist_encode_32(pagelist, name_len);
1336 	ceph_pagelist_append(pagelist, name, name_len);
1337 
1338 	ceph_pagelist_encode_32(pagelist, as_ctx->sec_ctxlen);
1339 	ceph_pagelist_append(pagelist, as_ctx->sec_ctx, as_ctx->sec_ctxlen);
1340 
1341 	err = 0;
1342 out:
1343 	if (pagelist && !as_ctx->pagelist)
1344 		ceph_pagelist_release(pagelist);
1345 	return err;
1346 }
1347 #endif /* CONFIG_CEPH_FS_SECURITY_LABEL */
1348 #endif /* CONFIG_SECURITY */
1349 
1350 void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx)
1351 {
1352 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1353 	posix_acl_release(as_ctx->acl);
1354 	posix_acl_release(as_ctx->default_acl);
1355 #endif
1356 #ifdef CONFIG_CEPH_FS_SECURITY_LABEL
1357 	security_release_secctx(as_ctx->sec_ctx, as_ctx->sec_ctxlen);
1358 #endif
1359 	if (as_ctx->pagelist)
1360 		ceph_pagelist_release(as_ctx->pagelist);
1361 }
1362 
1363 /*
1364  * List of handlers for synthetic system.* attributes. Other
1365  * attributes are handled directly.
1366  */
1367 const struct xattr_handler *ceph_xattr_handlers[] = {
1368 #ifdef CONFIG_CEPH_FS_POSIX_ACL
1369 	&posix_acl_access_xattr_handler,
1370 	&posix_acl_default_xattr_handler,
1371 #endif
1372 	&ceph_other_xattr_handler,
1373 	NULL,
1374 };
1375