xref: /openbmc/linux/fs/ext4/xattr.c (revision 63dc02bd)
1 /*
2  * linux/fs/ext4/xattr.c
3  *
4  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
5  *
6  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7  * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8  * Extended attributes for symlinks and special files added per
9  *  suggestion of Luka Renko <luka.renko@hermes.si>.
10  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
11  *  Red Hat Inc.
12  * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
13  *  and Andreas Gruenbacher <agruen@suse.de>.
14  */
15 
16 /*
17  * Extended attributes are stored directly in inodes (on file systems with
18  * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
19  * field contains the block number if an inode uses an additional block. All
20  * attributes must fit in the inode and one additional block. Blocks that
21  * contain the identical set of attributes may be shared among several inodes.
22  * Identical blocks are detected by keeping a cache of blocks that have
23  * recently been accessed.
24  *
25  * The attributes in inodes and on blocks have a different header; the entries
26  * are stored in the same format:
27  *
28  *   +------------------+
29  *   | header           |
30  *   | entry 1          | |
31  *   | entry 2          | | growing downwards
32  *   | entry 3          | v
33  *   | four null bytes  |
34  *   | . . .            |
35  *   | value 1          | ^
36  *   | value 3          | | growing upwards
37  *   | value 2          | |
38  *   +------------------+
39  *
40  * The header is followed by multiple entry descriptors. In disk blocks, the
41  * entry descriptors are kept sorted. In inodes, they are unsorted. The
42  * attribute values are aligned to the end of the block in no specific order.
43  *
44  * Locking strategy
45  * ----------------
46  * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
47  * EA blocks are only changed if they are exclusive to an inode, so
48  * holding xattr_sem also means that nothing but the EA block's reference
49  * count can change. Multiple writers to the same block are synchronized
50  * by the buffer lock.
51  */
52 
53 #include <linux/init.h>
54 #include <linux/fs.h>
55 #include <linux/slab.h>
56 #include <linux/mbcache.h>
57 #include <linux/quotaops.h>
58 #include <linux/rwsem.h>
59 #include "ext4_jbd2.h"
60 #include "ext4.h"
61 #include "xattr.h"
62 #include "acl.h"
63 
64 #define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
65 #define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
66 #define BFIRST(bh) ENTRY(BHDR(bh)+1)
67 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
68 
69 #ifdef EXT4_XATTR_DEBUG
70 # define ea_idebug(inode, f...) do { \
71 		printk(KERN_DEBUG "inode %s:%lu: ", \
72 			inode->i_sb->s_id, inode->i_ino); \
73 		printk(f); \
74 		printk("\n"); \
75 	} while (0)
76 # define ea_bdebug(bh, f...) do { \
77 		char b[BDEVNAME_SIZE]; \
78 		printk(KERN_DEBUG "block %s:%lu: ", \
79 			bdevname(bh->b_bdev, b), \
80 			(unsigned long) bh->b_blocknr); \
81 		printk(f); \
82 		printk("\n"); \
83 	} while (0)
84 #else
85 # define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
86 # define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
87 #endif
88 
89 static void ext4_xattr_cache_insert(struct buffer_head *);
90 static struct buffer_head *ext4_xattr_cache_find(struct inode *,
91 						 struct ext4_xattr_header *,
92 						 struct mb_cache_entry **);
93 static void ext4_xattr_rehash(struct ext4_xattr_header *,
94 			      struct ext4_xattr_entry *);
95 static int ext4_xattr_list(struct dentry *dentry, char *buffer,
96 			   size_t buffer_size);
97 
98 static struct mb_cache *ext4_xattr_cache;
99 
100 static const struct xattr_handler *ext4_xattr_handler_map[] = {
101 	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
102 #ifdef CONFIG_EXT4_FS_POSIX_ACL
103 	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext4_xattr_acl_access_handler,
104 	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
105 #endif
106 	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
107 #ifdef CONFIG_EXT4_FS_SECURITY
108 	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
109 #endif
110 };
111 
112 const struct xattr_handler *ext4_xattr_handlers[] = {
113 	&ext4_xattr_user_handler,
114 	&ext4_xattr_trusted_handler,
115 #ifdef CONFIG_EXT4_FS_POSIX_ACL
116 	&ext4_xattr_acl_access_handler,
117 	&ext4_xattr_acl_default_handler,
118 #endif
119 #ifdef CONFIG_EXT4_FS_SECURITY
120 	&ext4_xattr_security_handler,
121 #endif
122 	NULL
123 };
124 
125 static inline const struct xattr_handler *
126 ext4_xattr_handler(int name_index)
127 {
128 	const struct xattr_handler *handler = NULL;
129 
130 	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
131 		handler = ext4_xattr_handler_map[name_index];
132 	return handler;
133 }
134 
135 /*
136  * Inode operation listxattr()
137  *
138  * dentry->d_inode->i_mutex: don't care
139  */
140 ssize_t
141 ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
142 {
143 	return ext4_xattr_list(dentry, buffer, size);
144 }
145 
146 static int
147 ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
148 {
149 	while (!IS_LAST_ENTRY(entry)) {
150 		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
151 		if ((void *)next >= end)
152 			return -EIO;
153 		entry = next;
154 	}
155 	return 0;
156 }
157 
158 static inline int
159 ext4_xattr_check_block(struct buffer_head *bh)
160 {
161 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
162 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
163 		return -EIO;
164 	return ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
165 }
166 
167 static inline int
168 ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
169 {
170 	size_t value_size = le32_to_cpu(entry->e_value_size);
171 
172 	if (entry->e_value_block != 0 || value_size > size ||
173 	    le16_to_cpu(entry->e_value_offs) + value_size > size)
174 		return -EIO;
175 	return 0;
176 }
177 
178 static int
179 ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
180 		      const char *name, size_t size, int sorted)
181 {
182 	struct ext4_xattr_entry *entry;
183 	size_t name_len;
184 	int cmp = 1;
185 
186 	if (name == NULL)
187 		return -EINVAL;
188 	name_len = strlen(name);
189 	entry = *pentry;
190 	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
191 		cmp = name_index - entry->e_name_index;
192 		if (!cmp)
193 			cmp = name_len - entry->e_name_len;
194 		if (!cmp)
195 			cmp = memcmp(name, entry->e_name, name_len);
196 		if (cmp <= 0 && (sorted || cmp == 0))
197 			break;
198 	}
199 	*pentry = entry;
200 	if (!cmp && ext4_xattr_check_entry(entry, size))
201 			return -EIO;
202 	return cmp ? -ENODATA : 0;
203 }
204 
205 static int
206 ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
207 		     void *buffer, size_t buffer_size)
208 {
209 	struct buffer_head *bh = NULL;
210 	struct ext4_xattr_entry *entry;
211 	size_t size;
212 	int error;
213 
214 	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
215 		  name_index, name, buffer, (long)buffer_size);
216 
217 	error = -ENODATA;
218 	if (!EXT4_I(inode)->i_file_acl)
219 		goto cleanup;
220 	ea_idebug(inode, "reading block %llu",
221 		  (unsigned long long)EXT4_I(inode)->i_file_acl);
222 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
223 	if (!bh)
224 		goto cleanup;
225 	ea_bdebug(bh, "b_count=%d, refcount=%d",
226 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
227 	if (ext4_xattr_check_block(bh)) {
228 bad_block:
229 		EXT4_ERROR_INODE(inode, "bad block %llu",
230 				 EXT4_I(inode)->i_file_acl);
231 		error = -EIO;
232 		goto cleanup;
233 	}
234 	ext4_xattr_cache_insert(bh);
235 	entry = BFIRST(bh);
236 	error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
237 	if (error == -EIO)
238 		goto bad_block;
239 	if (error)
240 		goto cleanup;
241 	size = le32_to_cpu(entry->e_value_size);
242 	if (buffer) {
243 		error = -ERANGE;
244 		if (size > buffer_size)
245 			goto cleanup;
246 		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
247 		       size);
248 	}
249 	error = size;
250 
251 cleanup:
252 	brelse(bh);
253 	return error;
254 }
255 
256 static int
257 ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
258 		     void *buffer, size_t buffer_size)
259 {
260 	struct ext4_xattr_ibody_header *header;
261 	struct ext4_xattr_entry *entry;
262 	struct ext4_inode *raw_inode;
263 	struct ext4_iloc iloc;
264 	size_t size;
265 	void *end;
266 	int error;
267 
268 	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
269 		return -ENODATA;
270 	error = ext4_get_inode_loc(inode, &iloc);
271 	if (error)
272 		return error;
273 	raw_inode = ext4_raw_inode(&iloc);
274 	header = IHDR(inode, raw_inode);
275 	entry = IFIRST(header);
276 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
277 	error = ext4_xattr_check_names(entry, end);
278 	if (error)
279 		goto cleanup;
280 	error = ext4_xattr_find_entry(&entry, name_index, name,
281 				      end - (void *)entry, 0);
282 	if (error)
283 		goto cleanup;
284 	size = le32_to_cpu(entry->e_value_size);
285 	if (buffer) {
286 		error = -ERANGE;
287 		if (size > buffer_size)
288 			goto cleanup;
289 		memcpy(buffer, (void *)IFIRST(header) +
290 		       le16_to_cpu(entry->e_value_offs), size);
291 	}
292 	error = size;
293 
294 cleanup:
295 	brelse(iloc.bh);
296 	return error;
297 }
298 
299 /*
300  * ext4_xattr_get()
301  *
302  * Copy an extended attribute into the buffer
303  * provided, or compute the buffer size required.
304  * Buffer is NULL to compute the size of the buffer required.
305  *
306  * Returns a negative error number on failure, or the number of bytes
307  * used / required on success.
308  */
309 int
310 ext4_xattr_get(struct inode *inode, int name_index, const char *name,
311 	       void *buffer, size_t buffer_size)
312 {
313 	int error;
314 
315 	down_read(&EXT4_I(inode)->xattr_sem);
316 	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
317 				     buffer_size);
318 	if (error == -ENODATA)
319 		error = ext4_xattr_block_get(inode, name_index, name, buffer,
320 					     buffer_size);
321 	up_read(&EXT4_I(inode)->xattr_sem);
322 	return error;
323 }
324 
325 static int
326 ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
327 			char *buffer, size_t buffer_size)
328 {
329 	size_t rest = buffer_size;
330 
331 	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
332 		const struct xattr_handler *handler =
333 			ext4_xattr_handler(entry->e_name_index);
334 
335 		if (handler) {
336 			size_t size = handler->list(dentry, buffer, rest,
337 						    entry->e_name,
338 						    entry->e_name_len,
339 						    handler->flags);
340 			if (buffer) {
341 				if (size > rest)
342 					return -ERANGE;
343 				buffer += size;
344 			}
345 			rest -= size;
346 		}
347 	}
348 	return buffer_size - rest;
349 }
350 
351 static int
352 ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
353 {
354 	struct inode *inode = dentry->d_inode;
355 	struct buffer_head *bh = NULL;
356 	int error;
357 
358 	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
359 		  buffer, (long)buffer_size);
360 
361 	error = 0;
362 	if (!EXT4_I(inode)->i_file_acl)
363 		goto cleanup;
364 	ea_idebug(inode, "reading block %llu",
365 		  (unsigned long long)EXT4_I(inode)->i_file_acl);
366 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
367 	error = -EIO;
368 	if (!bh)
369 		goto cleanup;
370 	ea_bdebug(bh, "b_count=%d, refcount=%d",
371 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
372 	if (ext4_xattr_check_block(bh)) {
373 		EXT4_ERROR_INODE(inode, "bad block %llu",
374 				 EXT4_I(inode)->i_file_acl);
375 		error = -EIO;
376 		goto cleanup;
377 	}
378 	ext4_xattr_cache_insert(bh);
379 	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
380 
381 cleanup:
382 	brelse(bh);
383 
384 	return error;
385 }
386 
387 static int
388 ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
389 {
390 	struct inode *inode = dentry->d_inode;
391 	struct ext4_xattr_ibody_header *header;
392 	struct ext4_inode *raw_inode;
393 	struct ext4_iloc iloc;
394 	void *end;
395 	int error;
396 
397 	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
398 		return 0;
399 	error = ext4_get_inode_loc(inode, &iloc);
400 	if (error)
401 		return error;
402 	raw_inode = ext4_raw_inode(&iloc);
403 	header = IHDR(inode, raw_inode);
404 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
405 	error = ext4_xattr_check_names(IFIRST(header), end);
406 	if (error)
407 		goto cleanup;
408 	error = ext4_xattr_list_entries(dentry, IFIRST(header),
409 					buffer, buffer_size);
410 
411 cleanup:
412 	brelse(iloc.bh);
413 	return error;
414 }
415 
416 /*
417  * ext4_xattr_list()
418  *
419  * Copy a list of attribute names into the buffer
420  * provided, or compute the buffer size required.
421  * Buffer is NULL to compute the size of the buffer required.
422  *
423  * Returns a negative error number on failure, or the number of bytes
424  * used / required on success.
425  */
426 static int
427 ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
428 {
429 	int ret, ret2;
430 
431 	down_read(&EXT4_I(dentry->d_inode)->xattr_sem);
432 	ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
433 	if (ret < 0)
434 		goto errout;
435 	if (buffer) {
436 		buffer += ret;
437 		buffer_size -= ret;
438 	}
439 	ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
440 	if (ret < 0)
441 		goto errout;
442 	ret += ret2;
443 errout:
444 	up_read(&EXT4_I(dentry->d_inode)->xattr_sem);
445 	return ret;
446 }
447 
448 /*
449  * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
450  * not set, set it.
451  */
452 static void ext4_xattr_update_super_block(handle_t *handle,
453 					  struct super_block *sb)
454 {
455 	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
456 		return;
457 
458 	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
459 		EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
460 		ext4_handle_dirty_super(handle, sb);
461 	}
462 }
463 
464 /*
465  * Release the xattr block BH: If the reference count is > 1, decrement
466  * it; otherwise free the block.
467  */
468 static void
469 ext4_xattr_release_block(handle_t *handle, struct inode *inode,
470 			 struct buffer_head *bh)
471 {
472 	struct mb_cache_entry *ce = NULL;
473 	int error = 0;
474 
475 	ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
476 	error = ext4_journal_get_write_access(handle, bh);
477 	if (error)
478 		goto out;
479 
480 	lock_buffer(bh);
481 	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
482 		ea_bdebug(bh, "refcount now=0; freeing");
483 		if (ce)
484 			mb_cache_entry_free(ce);
485 		get_bh(bh);
486 		ext4_free_blocks(handle, inode, bh, 0, 1,
487 				 EXT4_FREE_BLOCKS_METADATA |
488 				 EXT4_FREE_BLOCKS_FORGET);
489 		unlock_buffer(bh);
490 	} else {
491 		le32_add_cpu(&BHDR(bh)->h_refcount, -1);
492 		if (ce)
493 			mb_cache_entry_release(ce);
494 		unlock_buffer(bh);
495 		error = ext4_handle_dirty_metadata(handle, inode, bh);
496 		if (IS_SYNC(inode))
497 			ext4_handle_sync(handle);
498 		dquot_free_block(inode, 1);
499 		ea_bdebug(bh, "refcount now=%d; releasing",
500 			  le32_to_cpu(BHDR(bh)->h_refcount));
501 	}
502 out:
503 	ext4_std_error(inode->i_sb, error);
504 	return;
505 }
506 
507 /*
508  * Find the available free space for EAs. This also returns the total number of
509  * bytes used by EA entries.
510  */
511 static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
512 				    size_t *min_offs, void *base, int *total)
513 {
514 	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
515 		*total += EXT4_XATTR_LEN(last->e_name_len);
516 		if (!last->e_value_block && last->e_value_size) {
517 			size_t offs = le16_to_cpu(last->e_value_offs);
518 			if (offs < *min_offs)
519 				*min_offs = offs;
520 		}
521 	}
522 	return (*min_offs - ((void *)last - base) - sizeof(__u32));
523 }
524 
525 struct ext4_xattr_info {
526 	int name_index;
527 	const char *name;
528 	const void *value;
529 	size_t value_len;
530 };
531 
532 struct ext4_xattr_search {
533 	struct ext4_xattr_entry *first;
534 	void *base;
535 	void *end;
536 	struct ext4_xattr_entry *here;
537 	int not_found;
538 };
539 
540 static int
541 ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
542 {
543 	struct ext4_xattr_entry *last;
544 	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
545 
546 	/* Compute min_offs and last. */
547 	last = s->first;
548 	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
549 		if (!last->e_value_block && last->e_value_size) {
550 			size_t offs = le16_to_cpu(last->e_value_offs);
551 			if (offs < min_offs)
552 				min_offs = offs;
553 		}
554 	}
555 	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
556 	if (!s->not_found) {
557 		if (!s->here->e_value_block && s->here->e_value_size) {
558 			size_t size = le32_to_cpu(s->here->e_value_size);
559 			free += EXT4_XATTR_SIZE(size);
560 		}
561 		free += EXT4_XATTR_LEN(name_len);
562 	}
563 	if (i->value) {
564 		if (free < EXT4_XATTR_SIZE(i->value_len) ||
565 		    free < EXT4_XATTR_LEN(name_len) +
566 			   EXT4_XATTR_SIZE(i->value_len))
567 			return -ENOSPC;
568 	}
569 
570 	if (i->value && s->not_found) {
571 		/* Insert the new name. */
572 		size_t size = EXT4_XATTR_LEN(name_len);
573 		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
574 		memmove((void *)s->here + size, s->here, rest);
575 		memset(s->here, 0, size);
576 		s->here->e_name_index = i->name_index;
577 		s->here->e_name_len = name_len;
578 		memcpy(s->here->e_name, i->name, name_len);
579 	} else {
580 		if (!s->here->e_value_block && s->here->e_value_size) {
581 			void *first_val = s->base + min_offs;
582 			size_t offs = le16_to_cpu(s->here->e_value_offs);
583 			void *val = s->base + offs;
584 			size_t size = EXT4_XATTR_SIZE(
585 				le32_to_cpu(s->here->e_value_size));
586 
587 			if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
588 				/* The old and the new value have the same
589 				   size. Just replace. */
590 				s->here->e_value_size =
591 					cpu_to_le32(i->value_len);
592 				memset(val + size - EXT4_XATTR_PAD, 0,
593 				       EXT4_XATTR_PAD); /* Clear pad bytes. */
594 				memcpy(val, i->value, i->value_len);
595 				return 0;
596 			}
597 
598 			/* Remove the old value. */
599 			memmove(first_val + size, first_val, val - first_val);
600 			memset(first_val, 0, size);
601 			s->here->e_value_size = 0;
602 			s->here->e_value_offs = 0;
603 			min_offs += size;
604 
605 			/* Adjust all value offsets. */
606 			last = s->first;
607 			while (!IS_LAST_ENTRY(last)) {
608 				size_t o = le16_to_cpu(last->e_value_offs);
609 				if (!last->e_value_block &&
610 				    last->e_value_size && o < offs)
611 					last->e_value_offs =
612 						cpu_to_le16(o + size);
613 				last = EXT4_XATTR_NEXT(last);
614 			}
615 		}
616 		if (!i->value) {
617 			/* Remove the old name. */
618 			size_t size = EXT4_XATTR_LEN(name_len);
619 			last = ENTRY((void *)last - size);
620 			memmove(s->here, (void *)s->here + size,
621 				(void *)last - (void *)s->here + sizeof(__u32));
622 			memset(last, 0, size);
623 		}
624 	}
625 
626 	if (i->value) {
627 		/* Insert the new value. */
628 		s->here->e_value_size = cpu_to_le32(i->value_len);
629 		if (i->value_len) {
630 			size_t size = EXT4_XATTR_SIZE(i->value_len);
631 			void *val = s->base + min_offs - size;
632 			s->here->e_value_offs = cpu_to_le16(min_offs - size);
633 			memset(val + size - EXT4_XATTR_PAD, 0,
634 			       EXT4_XATTR_PAD); /* Clear the pad bytes. */
635 			memcpy(val, i->value, i->value_len);
636 		}
637 	}
638 	return 0;
639 }
640 
641 struct ext4_xattr_block_find {
642 	struct ext4_xattr_search s;
643 	struct buffer_head *bh;
644 };
645 
646 static int
647 ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
648 		      struct ext4_xattr_block_find *bs)
649 {
650 	struct super_block *sb = inode->i_sb;
651 	int error;
652 
653 	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
654 		  i->name_index, i->name, i->value, (long)i->value_len);
655 
656 	if (EXT4_I(inode)->i_file_acl) {
657 		/* The inode already has an extended attribute block. */
658 		bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
659 		error = -EIO;
660 		if (!bs->bh)
661 			goto cleanup;
662 		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
663 			atomic_read(&(bs->bh->b_count)),
664 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
665 		if (ext4_xattr_check_block(bs->bh)) {
666 			EXT4_ERROR_INODE(inode, "bad block %llu",
667 					 EXT4_I(inode)->i_file_acl);
668 			error = -EIO;
669 			goto cleanup;
670 		}
671 		/* Find the named attribute. */
672 		bs->s.base = BHDR(bs->bh);
673 		bs->s.first = BFIRST(bs->bh);
674 		bs->s.end = bs->bh->b_data + bs->bh->b_size;
675 		bs->s.here = bs->s.first;
676 		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
677 					      i->name, bs->bh->b_size, 1);
678 		if (error && error != -ENODATA)
679 			goto cleanup;
680 		bs->s.not_found = error;
681 	}
682 	error = 0;
683 
684 cleanup:
685 	return error;
686 }
687 
688 static int
689 ext4_xattr_block_set(handle_t *handle, struct inode *inode,
690 		     struct ext4_xattr_info *i,
691 		     struct ext4_xattr_block_find *bs)
692 {
693 	struct super_block *sb = inode->i_sb;
694 	struct buffer_head *new_bh = NULL;
695 	struct ext4_xattr_search *s = &bs->s;
696 	struct mb_cache_entry *ce = NULL;
697 	int error = 0;
698 
699 #define header(x) ((struct ext4_xattr_header *)(x))
700 
701 	if (i->value && i->value_len > sb->s_blocksize)
702 		return -ENOSPC;
703 	if (s->base) {
704 		ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
705 					bs->bh->b_blocknr);
706 		error = ext4_journal_get_write_access(handle, bs->bh);
707 		if (error)
708 			goto cleanup;
709 		lock_buffer(bs->bh);
710 
711 		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
712 			if (ce) {
713 				mb_cache_entry_free(ce);
714 				ce = NULL;
715 			}
716 			ea_bdebug(bs->bh, "modifying in-place");
717 			error = ext4_xattr_set_entry(i, s);
718 			if (!error) {
719 				if (!IS_LAST_ENTRY(s->first))
720 					ext4_xattr_rehash(header(s->base),
721 							  s->here);
722 				ext4_xattr_cache_insert(bs->bh);
723 			}
724 			unlock_buffer(bs->bh);
725 			if (error == -EIO)
726 				goto bad_block;
727 			if (!error)
728 				error = ext4_handle_dirty_metadata(handle,
729 								   inode,
730 								   bs->bh);
731 			if (error)
732 				goto cleanup;
733 			goto inserted;
734 		} else {
735 			int offset = (char *)s->here - bs->bh->b_data;
736 
737 			unlock_buffer(bs->bh);
738 			ext4_handle_release_buffer(handle, bs->bh);
739 			if (ce) {
740 				mb_cache_entry_release(ce);
741 				ce = NULL;
742 			}
743 			ea_bdebug(bs->bh, "cloning");
744 			s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
745 			error = -ENOMEM;
746 			if (s->base == NULL)
747 				goto cleanup;
748 			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
749 			s->first = ENTRY(header(s->base)+1);
750 			header(s->base)->h_refcount = cpu_to_le32(1);
751 			s->here = ENTRY(s->base + offset);
752 			s->end = s->base + bs->bh->b_size;
753 		}
754 	} else {
755 		/* Allocate a buffer where we construct the new block. */
756 		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
757 		/* assert(header == s->base) */
758 		error = -ENOMEM;
759 		if (s->base == NULL)
760 			goto cleanup;
761 		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
762 		header(s->base)->h_blocks = cpu_to_le32(1);
763 		header(s->base)->h_refcount = cpu_to_le32(1);
764 		s->first = ENTRY(header(s->base)+1);
765 		s->here = ENTRY(header(s->base)+1);
766 		s->end = s->base + sb->s_blocksize;
767 	}
768 
769 	error = ext4_xattr_set_entry(i, s);
770 	if (error == -EIO)
771 		goto bad_block;
772 	if (error)
773 		goto cleanup;
774 	if (!IS_LAST_ENTRY(s->first))
775 		ext4_xattr_rehash(header(s->base), s->here);
776 
777 inserted:
778 	if (!IS_LAST_ENTRY(s->first)) {
779 		new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
780 		if (new_bh) {
781 			/* We found an identical block in the cache. */
782 			if (new_bh == bs->bh)
783 				ea_bdebug(new_bh, "keeping");
784 			else {
785 				/* The old block is released after updating
786 				   the inode. */
787 				error = dquot_alloc_block(inode, 1);
788 				if (error)
789 					goto cleanup;
790 				error = ext4_journal_get_write_access(handle,
791 								      new_bh);
792 				if (error)
793 					goto cleanup_dquot;
794 				lock_buffer(new_bh);
795 				le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
796 				ea_bdebug(new_bh, "reusing; refcount now=%d",
797 					le32_to_cpu(BHDR(new_bh)->h_refcount));
798 				unlock_buffer(new_bh);
799 				error = ext4_handle_dirty_metadata(handle,
800 								   inode,
801 								   new_bh);
802 				if (error)
803 					goto cleanup_dquot;
804 			}
805 			mb_cache_entry_release(ce);
806 			ce = NULL;
807 		} else if (bs->bh && s->base == bs->bh->b_data) {
808 			/* We were modifying this block in-place. */
809 			ea_bdebug(bs->bh, "keeping this block");
810 			new_bh = bs->bh;
811 			get_bh(new_bh);
812 		} else {
813 			/* We need to allocate a new block */
814 			ext4_fsblk_t goal, block;
815 
816 			goal = ext4_group_first_block_no(sb,
817 						EXT4_I(inode)->i_block_group);
818 
819 			/* non-extent files can't have physical blocks past 2^32 */
820 			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
821 				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
822 
823 			/*
824 			 * take i_data_sem because we will test
825 			 * i_delalloc_reserved_flag in ext4_mb_new_blocks
826 			 */
827 			down_read((&EXT4_I(inode)->i_data_sem));
828 			block = ext4_new_meta_blocks(handle, inode, goal, 0,
829 						     NULL, &error);
830 			up_read((&EXT4_I(inode)->i_data_sem));
831 			if (error)
832 				goto cleanup;
833 
834 			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
835 				BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);
836 
837 			ea_idebug(inode, "creating block %llu",
838 				  (unsigned long long)block);
839 
840 			new_bh = sb_getblk(sb, block);
841 			if (!new_bh) {
842 getblk_failed:
843 				ext4_free_blocks(handle, inode, NULL, block, 1,
844 						 EXT4_FREE_BLOCKS_METADATA);
845 				error = -EIO;
846 				goto cleanup;
847 			}
848 			lock_buffer(new_bh);
849 			error = ext4_journal_get_create_access(handle, new_bh);
850 			if (error) {
851 				unlock_buffer(new_bh);
852 				goto getblk_failed;
853 			}
854 			memcpy(new_bh->b_data, s->base, new_bh->b_size);
855 			set_buffer_uptodate(new_bh);
856 			unlock_buffer(new_bh);
857 			ext4_xattr_cache_insert(new_bh);
858 			error = ext4_handle_dirty_metadata(handle,
859 							   inode, new_bh);
860 			if (error)
861 				goto cleanup;
862 		}
863 	}
864 
865 	/* Update the inode. */
866 	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
867 
868 	/* Drop the previous xattr block. */
869 	if (bs->bh && bs->bh != new_bh)
870 		ext4_xattr_release_block(handle, inode, bs->bh);
871 	error = 0;
872 
873 cleanup:
874 	if (ce)
875 		mb_cache_entry_release(ce);
876 	brelse(new_bh);
877 	if (!(bs->bh && s->base == bs->bh->b_data))
878 		kfree(s->base);
879 
880 	return error;
881 
882 cleanup_dquot:
883 	dquot_free_block(inode, 1);
884 	goto cleanup;
885 
886 bad_block:
887 	EXT4_ERROR_INODE(inode, "bad block %llu",
888 			 EXT4_I(inode)->i_file_acl);
889 	goto cleanup;
890 
891 #undef header
892 }
893 
894 struct ext4_xattr_ibody_find {
895 	struct ext4_xattr_search s;
896 	struct ext4_iloc iloc;
897 };
898 
899 static int
900 ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
901 		      struct ext4_xattr_ibody_find *is)
902 {
903 	struct ext4_xattr_ibody_header *header;
904 	struct ext4_inode *raw_inode;
905 	int error;
906 
907 	if (EXT4_I(inode)->i_extra_isize == 0)
908 		return 0;
909 	raw_inode = ext4_raw_inode(&is->iloc);
910 	header = IHDR(inode, raw_inode);
911 	is->s.base = is->s.first = IFIRST(header);
912 	is->s.here = is->s.first;
913 	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
914 	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
915 		error = ext4_xattr_check_names(IFIRST(header), is->s.end);
916 		if (error)
917 			return error;
918 		/* Find the named attribute. */
919 		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
920 					      i->name, is->s.end -
921 					      (void *)is->s.base, 0);
922 		if (error && error != -ENODATA)
923 			return error;
924 		is->s.not_found = error;
925 	}
926 	return 0;
927 }
928 
929 static int
930 ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
931 		     struct ext4_xattr_info *i,
932 		     struct ext4_xattr_ibody_find *is)
933 {
934 	struct ext4_xattr_ibody_header *header;
935 	struct ext4_xattr_search *s = &is->s;
936 	int error;
937 
938 	if (EXT4_I(inode)->i_extra_isize == 0)
939 		return -ENOSPC;
940 	error = ext4_xattr_set_entry(i, s);
941 	if (error)
942 		return error;
943 	header = IHDR(inode, ext4_raw_inode(&is->iloc));
944 	if (!IS_LAST_ENTRY(s->first)) {
945 		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
946 		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
947 	} else {
948 		header->h_magic = cpu_to_le32(0);
949 		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
950 	}
951 	return 0;
952 }
953 
954 /*
955  * ext4_xattr_set_handle()
956  *
957  * Create, replace or remove an extended attribute for this inode.  Value
958  * is NULL to remove an existing extended attribute, and non-NULL to
959  * either replace an existing extended attribute, or create a new extended
960  * attribute. The flags XATTR_REPLACE and XATTR_CREATE
961  * specify that an extended attribute must exist and must not exist
962  * previous to the call, respectively.
963  *
964  * Returns 0, or a negative error number on failure.
965  */
966 int
967 ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
968 		      const char *name, const void *value, size_t value_len,
969 		      int flags)
970 {
971 	struct ext4_xattr_info i = {
972 		.name_index = name_index,
973 		.name = name,
974 		.value = value,
975 		.value_len = value_len,
976 
977 	};
978 	struct ext4_xattr_ibody_find is = {
979 		.s = { .not_found = -ENODATA, },
980 	};
981 	struct ext4_xattr_block_find bs = {
982 		.s = { .not_found = -ENODATA, },
983 	};
984 	unsigned long no_expand;
985 	int error;
986 
987 	if (!name)
988 		return -EINVAL;
989 	if (strlen(name) > 255)
990 		return -ERANGE;
991 	down_write(&EXT4_I(inode)->xattr_sem);
992 	no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
993 	ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
994 
995 	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
996 	if (error)
997 		goto cleanup;
998 
999 	if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1000 		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
1001 		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1002 		ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1003 	}
1004 
1005 	error = ext4_xattr_ibody_find(inode, &i, &is);
1006 	if (error)
1007 		goto cleanup;
1008 	if (is.s.not_found)
1009 		error = ext4_xattr_block_find(inode, &i, &bs);
1010 	if (error)
1011 		goto cleanup;
1012 	if (is.s.not_found && bs.s.not_found) {
1013 		error = -ENODATA;
1014 		if (flags & XATTR_REPLACE)
1015 			goto cleanup;
1016 		error = 0;
1017 		if (!value)
1018 			goto cleanup;
1019 	} else {
1020 		error = -EEXIST;
1021 		if (flags & XATTR_CREATE)
1022 			goto cleanup;
1023 	}
1024 	if (!value) {
1025 		if (!is.s.not_found)
1026 			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1027 		else if (!bs.s.not_found)
1028 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1029 	} else {
1030 		error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1031 		if (!error && !bs.s.not_found) {
1032 			i.value = NULL;
1033 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1034 		} else if (error == -ENOSPC) {
1035 			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
1036 				error = ext4_xattr_block_find(inode, &i, &bs);
1037 				if (error)
1038 					goto cleanup;
1039 			}
1040 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1041 			if (error)
1042 				goto cleanup;
1043 			if (!is.s.not_found) {
1044 				i.value = NULL;
1045 				error = ext4_xattr_ibody_set(handle, inode, &i,
1046 							     &is);
1047 			}
1048 		}
1049 	}
1050 	if (!error) {
1051 		ext4_xattr_update_super_block(handle, inode->i_sb);
1052 		inode->i_ctime = ext4_current_time(inode);
1053 		if (!value)
1054 			ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1055 		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1056 		/*
1057 		 * The bh is consumed by ext4_mark_iloc_dirty, even with
1058 		 * error != 0.
1059 		 */
1060 		is.iloc.bh = NULL;
1061 		if (IS_SYNC(inode))
1062 			ext4_handle_sync(handle);
1063 	}
1064 
1065 cleanup:
1066 	brelse(is.iloc.bh);
1067 	brelse(bs.bh);
1068 	if (no_expand == 0)
1069 		ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1070 	up_write(&EXT4_I(inode)->xattr_sem);
1071 	return error;
1072 }
1073 
1074 /*
1075  * ext4_xattr_set()
1076  *
1077  * Like ext4_xattr_set_handle, but start from an inode. This extended
1078  * attribute modification is a filesystem transaction by itself.
1079  *
1080  * Returns 0, or a negative error number on failure.
1081  */
1082 int
1083 ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1084 	       const void *value, size_t value_len, int flags)
1085 {
1086 	handle_t *handle;
1087 	int error, retries = 0;
1088 
1089 retry:
1090 	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
1091 	if (IS_ERR(handle)) {
1092 		error = PTR_ERR(handle);
1093 	} else {
1094 		int error2;
1095 
1096 		error = ext4_xattr_set_handle(handle, inode, name_index, name,
1097 					      value, value_len, flags);
1098 		error2 = ext4_journal_stop(handle);
1099 		if (error == -ENOSPC &&
1100 		    ext4_should_retry_alloc(inode->i_sb, &retries))
1101 			goto retry;
1102 		if (error == 0)
1103 			error = error2;
1104 	}
1105 
1106 	return error;
1107 }
1108 
1109 /*
1110  * Shift the EA entries in the inode to create space for the increased
1111  * i_extra_isize.
1112  */
1113 static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
1114 				     int value_offs_shift, void *to,
1115 				     void *from, size_t n, int blocksize)
1116 {
1117 	struct ext4_xattr_entry *last = entry;
1118 	int new_offs;
1119 
1120 	/* Adjust the value offsets of the entries */
1121 	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1122 		if (!last->e_value_block && last->e_value_size) {
1123 			new_offs = le16_to_cpu(last->e_value_offs) +
1124 							value_offs_shift;
1125 			BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
1126 				 > blocksize);
1127 			last->e_value_offs = cpu_to_le16(new_offs);
1128 		}
1129 	}
1130 	/* Shift the entries by n bytes */
1131 	memmove(to, from, n);
1132 }
1133 
1134 /*
1135  * Expand an inode by new_extra_isize bytes when EAs are present.
1136  * Returns 0 on success or negative error number on failure.
1137  */
1138 int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
1139 			       struct ext4_inode *raw_inode, handle_t *handle)
1140 {
1141 	struct ext4_xattr_ibody_header *header;
1142 	struct ext4_xattr_entry *entry, *last, *first;
1143 	struct buffer_head *bh = NULL;
1144 	struct ext4_xattr_ibody_find *is = NULL;
1145 	struct ext4_xattr_block_find *bs = NULL;
1146 	char *buffer = NULL, *b_entry_name = NULL;
1147 	size_t min_offs, free;
1148 	int total_ino, total_blk;
1149 	void *base, *start, *end;
1150 	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
1151 	int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1152 
1153 	down_write(&EXT4_I(inode)->xattr_sem);
1154 retry:
1155 	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
1156 		up_write(&EXT4_I(inode)->xattr_sem);
1157 		return 0;
1158 	}
1159 
1160 	header = IHDR(inode, raw_inode);
1161 	entry = IFIRST(header);
1162 
1163 	/*
1164 	 * Check if enough free space is available in the inode to shift the
1165 	 * entries ahead by new_extra_isize.
1166 	 */
1167 
1168 	base = start = entry;
1169 	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1170 	min_offs = end - base;
1171 	last = entry;
1172 	total_ino = sizeof(struct ext4_xattr_ibody_header);
1173 
1174 	free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
1175 	if (free >= new_extra_isize) {
1176 		entry = IFIRST(header);
1177 		ext4_xattr_shift_entries(entry,	EXT4_I(inode)->i_extra_isize
1178 				- new_extra_isize, (void *)raw_inode +
1179 				EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
1180 				(void *)header, total_ino,
1181 				inode->i_sb->s_blocksize);
1182 		EXT4_I(inode)->i_extra_isize = new_extra_isize;
1183 		error = 0;
1184 		goto cleanup;
1185 	}
1186 
1187 	/*
1188 	 * Enough free space isn't available in the inode, check if
1189 	 * EA block can hold new_extra_isize bytes.
1190 	 */
1191 	if (EXT4_I(inode)->i_file_acl) {
1192 		bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1193 		error = -EIO;
1194 		if (!bh)
1195 			goto cleanup;
1196 		if (ext4_xattr_check_block(bh)) {
1197 			EXT4_ERROR_INODE(inode, "bad block %llu",
1198 					 EXT4_I(inode)->i_file_acl);
1199 			error = -EIO;
1200 			goto cleanup;
1201 		}
1202 		base = BHDR(bh);
1203 		first = BFIRST(bh);
1204 		end = bh->b_data + bh->b_size;
1205 		min_offs = end - base;
1206 		free = ext4_xattr_free_space(first, &min_offs, base,
1207 					     &total_blk);
1208 		if (free < new_extra_isize) {
1209 			if (!tried_min_extra_isize && s_min_extra_isize) {
1210 				tried_min_extra_isize++;
1211 				new_extra_isize = s_min_extra_isize;
1212 				brelse(bh);
1213 				goto retry;
1214 			}
1215 			error = -1;
1216 			goto cleanup;
1217 		}
1218 	} else {
1219 		free = inode->i_sb->s_blocksize;
1220 	}
1221 
1222 	while (new_extra_isize > 0) {
1223 		size_t offs, size, entry_size;
1224 		struct ext4_xattr_entry *small_entry = NULL;
1225 		struct ext4_xattr_info i = {
1226 			.value = NULL,
1227 			.value_len = 0,
1228 		};
1229 		unsigned int total_size;  /* EA entry size + value size */
1230 		unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
1231 		unsigned int min_total_size = ~0U;
1232 
1233 		is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
1234 		bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
1235 		if (!is || !bs) {
1236 			error = -ENOMEM;
1237 			goto cleanup;
1238 		}
1239 
1240 		is->s.not_found = -ENODATA;
1241 		bs->s.not_found = -ENODATA;
1242 		is->iloc.bh = NULL;
1243 		bs->bh = NULL;
1244 
1245 		last = IFIRST(header);
1246 		/* Find the entry best suited to be pushed into EA block */
1247 		entry = NULL;
1248 		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
1249 			total_size =
1250 			EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
1251 					EXT4_XATTR_LEN(last->e_name_len);
1252 			if (total_size <= free && total_size < min_total_size) {
1253 				if (total_size < new_extra_isize) {
1254 					small_entry = last;
1255 				} else {
1256 					entry = last;
1257 					min_total_size = total_size;
1258 				}
1259 			}
1260 		}
1261 
1262 		if (entry == NULL) {
1263 			if (small_entry) {
1264 				entry = small_entry;
1265 			} else {
1266 				if (!tried_min_extra_isize &&
1267 				    s_min_extra_isize) {
1268 					tried_min_extra_isize++;
1269 					new_extra_isize = s_min_extra_isize;
1270 					goto retry;
1271 				}
1272 				error = -1;
1273 				goto cleanup;
1274 			}
1275 		}
1276 		offs = le16_to_cpu(entry->e_value_offs);
1277 		size = le32_to_cpu(entry->e_value_size);
1278 		entry_size = EXT4_XATTR_LEN(entry->e_name_len);
1279 		i.name_index = entry->e_name_index,
1280 		buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
1281 		b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
1282 		if (!buffer || !b_entry_name) {
1283 			error = -ENOMEM;
1284 			goto cleanup;
1285 		}
1286 		/* Save the entry name and the entry value */
1287 		memcpy(buffer, (void *)IFIRST(header) + offs,
1288 		       EXT4_XATTR_SIZE(size));
1289 		memcpy(b_entry_name, entry->e_name, entry->e_name_len);
1290 		b_entry_name[entry->e_name_len] = '\0';
1291 		i.name = b_entry_name;
1292 
1293 		error = ext4_get_inode_loc(inode, &is->iloc);
1294 		if (error)
1295 			goto cleanup;
1296 
1297 		error = ext4_xattr_ibody_find(inode, &i, is);
1298 		if (error)
1299 			goto cleanup;
1300 
1301 		/* Remove the chosen entry from the inode */
1302 		error = ext4_xattr_ibody_set(handle, inode, &i, is);
1303 		if (error)
1304 			goto cleanup;
1305 
1306 		entry = IFIRST(header);
1307 		if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
1308 			shift_bytes = new_extra_isize;
1309 		else
1310 			shift_bytes = entry_size + size;
1311 		/* Adjust the offsets and shift the remaining entries ahead */
1312 		ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
1313 			shift_bytes, (void *)raw_inode +
1314 			EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
1315 			(void *)header, total_ino - entry_size,
1316 			inode->i_sb->s_blocksize);
1317 
1318 		extra_isize += shift_bytes;
1319 		new_extra_isize -= shift_bytes;
1320 		EXT4_I(inode)->i_extra_isize = extra_isize;
1321 
1322 		i.name = b_entry_name;
1323 		i.value = buffer;
1324 		i.value_len = size;
1325 		error = ext4_xattr_block_find(inode, &i, bs);
1326 		if (error)
1327 			goto cleanup;
1328 
1329 		/* Add entry which was removed from the inode into the block */
1330 		error = ext4_xattr_block_set(handle, inode, &i, bs);
1331 		if (error)
1332 			goto cleanup;
1333 		kfree(b_entry_name);
1334 		kfree(buffer);
1335 		b_entry_name = NULL;
1336 		buffer = NULL;
1337 		brelse(is->iloc.bh);
1338 		kfree(is);
1339 		kfree(bs);
1340 	}
1341 	brelse(bh);
1342 	up_write(&EXT4_I(inode)->xattr_sem);
1343 	return 0;
1344 
1345 cleanup:
1346 	kfree(b_entry_name);
1347 	kfree(buffer);
1348 	if (is)
1349 		brelse(is->iloc.bh);
1350 	kfree(is);
1351 	kfree(bs);
1352 	brelse(bh);
1353 	up_write(&EXT4_I(inode)->xattr_sem);
1354 	return error;
1355 }
1356 
1357 
1358 
1359 /*
1360  * ext4_xattr_delete_inode()
1361  *
1362  * Free extended attribute resources associated with this inode. This
1363  * is called immediately before an inode is freed. We have exclusive
1364  * access to the inode.
1365  */
1366 void
1367 ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1368 {
1369 	struct buffer_head *bh = NULL;
1370 
1371 	if (!EXT4_I(inode)->i_file_acl)
1372 		goto cleanup;
1373 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1374 	if (!bh) {
1375 		EXT4_ERROR_INODE(inode, "block %llu read error",
1376 				 EXT4_I(inode)->i_file_acl);
1377 		goto cleanup;
1378 	}
1379 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1380 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1381 		EXT4_ERROR_INODE(inode, "bad block %llu",
1382 				 EXT4_I(inode)->i_file_acl);
1383 		goto cleanup;
1384 	}
1385 	ext4_xattr_release_block(handle, inode, bh);
1386 	EXT4_I(inode)->i_file_acl = 0;
1387 
1388 cleanup:
1389 	brelse(bh);
1390 }
1391 
1392 /*
1393  * ext4_xattr_put_super()
1394  *
1395  * This is called when a file system is unmounted.
1396  */
1397 void
1398 ext4_xattr_put_super(struct super_block *sb)
1399 {
1400 	mb_cache_shrink(sb->s_bdev);
1401 }
1402 
1403 /*
1404  * ext4_xattr_cache_insert()
1405  *
1406  * Create a new entry in the extended attribute cache, and insert
1407  * it unless such an entry is already in the cache.
1408  *
1409  * Returns 0, or a negative error number on failure.
1410  */
1411 static void
1412 ext4_xattr_cache_insert(struct buffer_head *bh)
1413 {
1414 	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
1415 	struct mb_cache_entry *ce;
1416 	int error;
1417 
1418 	ce = mb_cache_entry_alloc(ext4_xattr_cache, GFP_NOFS);
1419 	if (!ce) {
1420 		ea_bdebug(bh, "out of memory");
1421 		return;
1422 	}
1423 	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
1424 	if (error) {
1425 		mb_cache_entry_free(ce);
1426 		if (error == -EBUSY) {
1427 			ea_bdebug(bh, "already in cache");
1428 			error = 0;
1429 		}
1430 	} else {
1431 		ea_bdebug(bh, "inserting [%x]", (int)hash);
1432 		mb_cache_entry_release(ce);
1433 	}
1434 }
1435 
1436 /*
1437  * ext4_xattr_cmp()
1438  *
1439  * Compare two extended attribute blocks for equality.
1440  *
1441  * Returns 0 if the blocks are equal, 1 if they differ, and
1442  * a negative error number on errors.
1443  */
1444 static int
1445 ext4_xattr_cmp(struct ext4_xattr_header *header1,
1446 	       struct ext4_xattr_header *header2)
1447 {
1448 	struct ext4_xattr_entry *entry1, *entry2;
1449 
1450 	entry1 = ENTRY(header1+1);
1451 	entry2 = ENTRY(header2+1);
1452 	while (!IS_LAST_ENTRY(entry1)) {
1453 		if (IS_LAST_ENTRY(entry2))
1454 			return 1;
1455 		if (entry1->e_hash != entry2->e_hash ||
1456 		    entry1->e_name_index != entry2->e_name_index ||
1457 		    entry1->e_name_len != entry2->e_name_len ||
1458 		    entry1->e_value_size != entry2->e_value_size ||
1459 		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
1460 			return 1;
1461 		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1462 			return -EIO;
1463 		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
1464 			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
1465 			   le32_to_cpu(entry1->e_value_size)))
1466 			return 1;
1467 
1468 		entry1 = EXT4_XATTR_NEXT(entry1);
1469 		entry2 = EXT4_XATTR_NEXT(entry2);
1470 	}
1471 	if (!IS_LAST_ENTRY(entry2))
1472 		return 1;
1473 	return 0;
1474 }
1475 
1476 /*
1477  * ext4_xattr_cache_find()
1478  *
1479  * Find an identical extended attribute block.
1480  *
1481  * Returns a pointer to the block found, or NULL if such a block was
1482  * not found or an error occurred.
1483  */
1484 static struct buffer_head *
1485 ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
1486 		      struct mb_cache_entry **pce)
1487 {
1488 	__u32 hash = le32_to_cpu(header->h_hash);
1489 	struct mb_cache_entry *ce;
1490 
1491 	if (!header->h_hash)
1492 		return NULL;  /* never share */
1493 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
1494 again:
1495 	ce = mb_cache_entry_find_first(ext4_xattr_cache, inode->i_sb->s_bdev,
1496 				       hash);
1497 	while (ce) {
1498 		struct buffer_head *bh;
1499 
1500 		if (IS_ERR(ce)) {
1501 			if (PTR_ERR(ce) == -EAGAIN)
1502 				goto again;
1503 			break;
1504 		}
1505 		bh = sb_bread(inode->i_sb, ce->e_block);
1506 		if (!bh) {
1507 			EXT4_ERROR_INODE(inode, "block %lu read error",
1508 					 (unsigned long) ce->e_block);
1509 		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
1510 				EXT4_XATTR_REFCOUNT_MAX) {
1511 			ea_idebug(inode, "block %lu refcount %d>=%d",
1512 				  (unsigned long) ce->e_block,
1513 				  le32_to_cpu(BHDR(bh)->h_refcount),
1514 					  EXT4_XATTR_REFCOUNT_MAX);
1515 		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1516 			*pce = ce;
1517 			return bh;
1518 		}
1519 		brelse(bh);
1520 		ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
1521 	}
1522 	return NULL;
1523 }
1524 
1525 #define NAME_HASH_SHIFT 5
1526 #define VALUE_HASH_SHIFT 16
1527 
1528 /*
1529  * ext4_xattr_hash_entry()
1530  *
1531  * Compute the hash of an extended attribute.
1532  */
1533 static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
1534 					 struct ext4_xattr_entry *entry)
1535 {
1536 	__u32 hash = 0;
1537 	char *name = entry->e_name;
1538 	int n;
1539 
1540 	for (n = 0; n < entry->e_name_len; n++) {
1541 		hash = (hash << NAME_HASH_SHIFT) ^
1542 		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
1543 		       *name++;
1544 	}
1545 
1546 	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
1547 		__le32 *value = (__le32 *)((char *)header +
1548 			le16_to_cpu(entry->e_value_offs));
1549 		for (n = (le32_to_cpu(entry->e_value_size) +
1550 		     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1551 			hash = (hash << VALUE_HASH_SHIFT) ^
1552 			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
1553 			       le32_to_cpu(*value++);
1554 		}
1555 	}
1556 	entry->e_hash = cpu_to_le32(hash);
1557 }
1558 
1559 #undef NAME_HASH_SHIFT
1560 #undef VALUE_HASH_SHIFT
1561 
1562 #define BLOCK_HASH_SHIFT 16
1563 
1564 /*
1565  * ext4_xattr_rehash()
1566  *
1567  * Re-compute the extended attribute hash value after an entry has changed.
1568  */
1569 static void ext4_xattr_rehash(struct ext4_xattr_header *header,
1570 			      struct ext4_xattr_entry *entry)
1571 {
1572 	struct ext4_xattr_entry *here;
1573 	__u32 hash = 0;
1574 
1575 	ext4_xattr_hash_entry(header, entry);
1576 	here = ENTRY(header+1);
1577 	while (!IS_LAST_ENTRY(here)) {
1578 		if (!here->e_hash) {
1579 			/* Block is not shared if an entry's hash value == 0 */
1580 			hash = 0;
1581 			break;
1582 		}
1583 		hash = (hash << BLOCK_HASH_SHIFT) ^
1584 		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
1585 		       le32_to_cpu(here->e_hash);
1586 		here = EXT4_XATTR_NEXT(here);
1587 	}
1588 	header->h_hash = cpu_to_le32(hash);
1589 }
1590 
1591 #undef BLOCK_HASH_SHIFT
1592 
1593 int __init
1594 ext4_init_xattr(void)
1595 {
1596 	ext4_xattr_cache = mb_cache_create("ext4_xattr", 6);
1597 	if (!ext4_xattr_cache)
1598 		return -ENOMEM;
1599 	return 0;
1600 }
1601 
1602 void
1603 ext4_exit_xattr(void)
1604 {
1605 	if (ext4_xattr_cache)
1606 		mb_cache_destroy(ext4_xattr_cache);
1607 	ext4_xattr_cache = NULL;
1608 }
1609