xref: /openbmc/linux/fs/ext4/namei.c (revision 84764a41)
1 /*
2  *  linux/fs/ext4/namei.c
3  *
4  * Copyright (C) 1992, 1993, 1994, 1995
5  * Remy Card (card@masi.ibp.fr)
6  * Laboratoire MASI - Institut Blaise Pascal
7  * Universite Pierre et Marie Curie (Paris VI)
8  *
9  *  from
10  *
11  *  linux/fs/minix/namei.c
12  *
13  *  Copyright (C) 1991, 1992  Linus Torvalds
14  *
15  *  Big-endian to little-endian byte-swapping/bitmaps by
16  *        David S. Miller (davem@caip.rutgers.edu), 1995
17  *  Directory entry file type support and forward compatibility hooks
18  *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
19  *  Hash Tree Directory indexing (c)
20  *	Daniel Phillips, 2001
21  *  Hash Tree Directory indexing porting
22  *	Christopher Li, 2002
23  *  Hash Tree Directory indexing cleanup
24  *	Theodore Ts'o, 2002
25  */
26 
27 #include <linux/fs.h>
28 #include <linux/pagemap.h>
29 #include <linux/jbd2.h>
30 #include <linux/time.h>
31 #include <linux/fcntl.h>
32 #include <linux/stat.h>
33 #include <linux/string.h>
34 #include <linux/quotaops.h>
35 #include <linux/buffer_head.h>
36 #include <linux/bio.h>
37 #include "ext4.h"
38 #include "ext4_jbd2.h"
39 
40 #include "xattr.h"
41 #include "acl.h"
42 
43 #include <trace/events/ext4.h>
44 /*
45  * define how far ahead to read directories while searching them.
46  */
47 #define NAMEI_RA_CHUNKS  2
48 #define NAMEI_RA_BLOCKS  4
49 #define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
50 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
51 
52 static struct buffer_head *ext4_append(handle_t *handle,
53 					struct inode *inode,
54 					ext4_lblk_t *block, int *err)
55 {
56 	struct buffer_head *bh;
57 
58 	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 		     ((inode->i_size >> 10) >=
60 		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb))) {
61 		*err = -ENOSPC;
62 		return NULL;
63 	}
64 
65 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
66 
67 	bh = ext4_bread(handle, inode, *block, 1, err);
68 	if (bh) {
69 		inode->i_size += inode->i_sb->s_blocksize;
70 		EXT4_I(inode)->i_disksize = inode->i_size;
71 		*err = ext4_journal_get_write_access(handle, bh);
72 		if (*err) {
73 			brelse(bh);
74 			bh = NULL;
75 		}
76 	}
77 	if (!bh && !(*err)) {
78 		*err = -EIO;
79 		ext4_error(inode->i_sb,
80 			   "Directory hole detected on inode %lu\n",
81 			   inode->i_ino);
82 	}
83 	return bh;
84 }
85 
86 #ifndef assert
87 #define assert(test) J_ASSERT(test)
88 #endif
89 
90 #ifdef DX_DEBUG
91 #define dxtrace(command) command
92 #else
93 #define dxtrace(command)
94 #endif
95 
96 struct fake_dirent
97 {
98 	__le32 inode;
99 	__le16 rec_len;
100 	u8 name_len;
101 	u8 file_type;
102 };
103 
104 struct dx_countlimit
105 {
106 	__le16 limit;
107 	__le16 count;
108 };
109 
110 struct dx_entry
111 {
112 	__le32 hash;
113 	__le32 block;
114 };
115 
116 /*
117  * dx_root_info is laid out so that if it should somehow get overlaid by a
118  * dirent the two low bits of the hash version will be zero.  Therefore, the
119  * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
120  */
121 
122 struct dx_root
123 {
124 	struct fake_dirent dot;
125 	char dot_name[4];
126 	struct fake_dirent dotdot;
127 	char dotdot_name[4];
128 	struct dx_root_info
129 	{
130 		__le32 reserved_zero;
131 		u8 hash_version;
132 		u8 info_length; /* 8 */
133 		u8 indirect_levels;
134 		u8 unused_flags;
135 	}
136 	info;
137 	struct dx_entry	entries[0];
138 };
139 
140 struct dx_node
141 {
142 	struct fake_dirent fake;
143 	struct dx_entry	entries[0];
144 };
145 
146 
147 struct dx_frame
148 {
149 	struct buffer_head *bh;
150 	struct dx_entry *entries;
151 	struct dx_entry *at;
152 };
153 
154 struct dx_map_entry
155 {
156 	u32 hash;
157 	u16 offs;
158 	u16 size;
159 };
160 
161 /*
162  * This goes at the end of each htree block.
163  */
164 struct dx_tail {
165 	u32 dt_reserved;
166 	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
167 };
168 
169 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
170 static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
171 static inline unsigned dx_get_hash(struct dx_entry *entry);
172 static void dx_set_hash(struct dx_entry *entry, unsigned value);
173 static unsigned dx_get_count(struct dx_entry *entries);
174 static unsigned dx_get_limit(struct dx_entry *entries);
175 static void dx_set_count(struct dx_entry *entries, unsigned value);
176 static void dx_set_limit(struct dx_entry *entries, unsigned value);
177 static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
178 static unsigned dx_node_limit(struct inode *dir);
179 static struct dx_frame *dx_probe(const struct qstr *d_name,
180 				 struct inode *dir,
181 				 struct dx_hash_info *hinfo,
182 				 struct dx_frame *frame,
183 				 int *err);
184 static void dx_release(struct dx_frame *frames);
185 static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
186 		       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
187 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
188 static struct ext4_dir_entry_2 *dx_move_dirents(char *from, char *to,
189 		struct dx_map_entry *offsets, int count, unsigned blocksize);
190 static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize);
191 static void dx_insert_block(struct dx_frame *frame,
192 					u32 hash, ext4_lblk_t block);
193 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
194 				 struct dx_frame *frame,
195 				 struct dx_frame *frames,
196 				 __u32 *start_hash);
197 static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
198 		const struct qstr *d_name,
199 		struct ext4_dir_entry_2 **res_dir,
200 		int *err);
201 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
202 			     struct inode *inode);
203 
204 /* checksumming functions */
205 #define EXT4_DIRENT_TAIL(block, blocksize) \
206 	((struct ext4_dir_entry_tail *)(((void *)(block)) + \
207 					((blocksize) - \
208 					 sizeof(struct ext4_dir_entry_tail))))
209 
210 static void initialize_dirent_tail(struct ext4_dir_entry_tail *t,
211 				   unsigned int blocksize)
212 {
213 	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
214 	t->det_rec_len = ext4_rec_len_to_disk(
215 			sizeof(struct ext4_dir_entry_tail), blocksize);
216 	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
217 }
218 
219 /* Walk through a dirent block to find a checksum "dirent" at the tail */
220 static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
221 						   struct ext4_dir_entry *de)
222 {
223 	struct ext4_dir_entry_tail *t;
224 
225 #ifdef PARANOID
226 	struct ext4_dir_entry *d, *top;
227 
228 	d = de;
229 	top = (struct ext4_dir_entry *)(((void *)de) +
230 		(EXT4_BLOCK_SIZE(inode->i_sb) -
231 		sizeof(struct ext4_dir_entry_tail)));
232 	while (d < top && d->rec_len)
233 		d = (struct ext4_dir_entry *)(((void *)d) +
234 		    le16_to_cpu(d->rec_len));
235 
236 	if (d != top)
237 		return NULL;
238 
239 	t = (struct ext4_dir_entry_tail *)d;
240 #else
241 	t = EXT4_DIRENT_TAIL(de, EXT4_BLOCK_SIZE(inode->i_sb));
242 #endif
243 
244 	if (t->det_reserved_zero1 ||
245 	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
246 	    t->det_reserved_zero2 ||
247 	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
248 		return NULL;
249 
250 	return t;
251 }
252 
253 static __le32 ext4_dirent_csum(struct inode *inode,
254 			       struct ext4_dir_entry *dirent, int size)
255 {
256 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
257 	struct ext4_inode_info *ei = EXT4_I(inode);
258 	__u32 csum;
259 
260 	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
261 	return cpu_to_le32(csum);
262 }
263 
264 int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
265 {
266 	struct ext4_dir_entry_tail *t;
267 
268 	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
269 					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
270 		return 1;
271 
272 	t = get_dirent_tail(inode, dirent);
273 	if (!t) {
274 		EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
275 				 "leaf for checksum.  Please run e2fsck -D.");
276 		return 0;
277 	}
278 
279 	if (t->det_checksum != ext4_dirent_csum(inode, dirent,
280 						(void *)t - (void *)dirent))
281 		return 0;
282 
283 	return 1;
284 }
285 
286 static void ext4_dirent_csum_set(struct inode *inode,
287 				 struct ext4_dir_entry *dirent)
288 {
289 	struct ext4_dir_entry_tail *t;
290 
291 	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
292 					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
293 		return;
294 
295 	t = get_dirent_tail(inode, dirent);
296 	if (!t) {
297 		EXT4_ERROR_INODE(inode, "metadata_csum set but no space in dir "
298 				 "leaf for checksum.  Please run e2fsck -D.");
299 		return;
300 	}
301 
302 	t->det_checksum = ext4_dirent_csum(inode, dirent,
303 					   (void *)t - (void *)dirent);
304 }
305 
306 static inline int ext4_handle_dirty_dirent_node(handle_t *handle,
307 						struct inode *inode,
308 						struct buffer_head *bh)
309 {
310 	ext4_dirent_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
311 	return ext4_handle_dirty_metadata(handle, inode, bh);
312 }
313 
314 static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
315 					       struct ext4_dir_entry *dirent,
316 					       int *offset)
317 {
318 	struct ext4_dir_entry *dp;
319 	struct dx_root_info *root;
320 	int count_offset;
321 
322 	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
323 		count_offset = 8;
324 	else if (le16_to_cpu(dirent->rec_len) == 12) {
325 		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
326 		if (le16_to_cpu(dp->rec_len) !=
327 		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
328 			return NULL;
329 		root = (struct dx_root_info *)(((void *)dp + 12));
330 		if (root->reserved_zero ||
331 		    root->info_length != sizeof(struct dx_root_info))
332 			return NULL;
333 		count_offset = 32;
334 	} else
335 		return NULL;
336 
337 	if (offset)
338 		*offset = count_offset;
339 	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
340 }
341 
342 static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
343 			   int count_offset, int count, struct dx_tail *t)
344 {
345 	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
346 	struct ext4_inode_info *ei = EXT4_I(inode);
347 	__u32 csum, old_csum;
348 	int size;
349 
350 	size = count_offset + (count * sizeof(struct dx_entry));
351 	old_csum = t->dt_checksum;
352 	t->dt_checksum = 0;
353 	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
354 	csum = ext4_chksum(sbi, csum, (__u8 *)t, sizeof(struct dx_tail));
355 	t->dt_checksum = old_csum;
356 
357 	return cpu_to_le32(csum);
358 }
359 
360 static int ext4_dx_csum_verify(struct inode *inode,
361 			       struct ext4_dir_entry *dirent)
362 {
363 	struct dx_countlimit *c;
364 	struct dx_tail *t;
365 	int count_offset, limit, count;
366 
367 	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
368 					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
369 		return 1;
370 
371 	c = get_dx_countlimit(inode, dirent, &count_offset);
372 	if (!c) {
373 		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
374 		return 1;
375 	}
376 	limit = le16_to_cpu(c->limit);
377 	count = le16_to_cpu(c->count);
378 	if (count_offset + (limit * sizeof(struct dx_entry)) >
379 	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
380 		EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
381 				 "tree checksum found.  Run e2fsck -D.");
382 		return 1;
383 	}
384 	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
385 
386 	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
387 					    count, t))
388 		return 0;
389 	return 1;
390 }
391 
392 static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
393 {
394 	struct dx_countlimit *c;
395 	struct dx_tail *t;
396 	int count_offset, limit, count;
397 
398 	if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
399 					EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
400 		return;
401 
402 	c = get_dx_countlimit(inode, dirent, &count_offset);
403 	if (!c) {
404 		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
405 		return;
406 	}
407 	limit = le16_to_cpu(c->limit);
408 	count = le16_to_cpu(c->count);
409 	if (count_offset + (limit * sizeof(struct dx_entry)) >
410 	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
411 		EXT4_ERROR_INODE(inode, "metadata_csum set but no space for "
412 				 "tree checksum.  Run e2fsck -D.");
413 		return;
414 	}
415 	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
416 
417 	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
418 }
419 
420 static inline int ext4_handle_dirty_dx_node(handle_t *handle,
421 					    struct inode *inode,
422 					    struct buffer_head *bh)
423 {
424 	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
425 	return ext4_handle_dirty_metadata(handle, inode, bh);
426 }
427 
428 /*
429  * p is at least 6 bytes before the end of page
430  */
431 static inline struct ext4_dir_entry_2 *
432 ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
433 {
434 	return (struct ext4_dir_entry_2 *)((char *)p +
435 		ext4_rec_len_from_disk(p->rec_len, blocksize));
436 }
437 
438 /*
439  * Future: use high four bits of block for coalesce-on-delete flags
440  * Mask them off for now.
441  */
442 
443 static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
444 {
445 	return le32_to_cpu(entry->block) & 0x00ffffff;
446 }
447 
448 static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
449 {
450 	entry->block = cpu_to_le32(value);
451 }
452 
453 static inline unsigned dx_get_hash(struct dx_entry *entry)
454 {
455 	return le32_to_cpu(entry->hash);
456 }
457 
458 static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
459 {
460 	entry->hash = cpu_to_le32(value);
461 }
462 
463 static inline unsigned dx_get_count(struct dx_entry *entries)
464 {
465 	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
466 }
467 
468 static inline unsigned dx_get_limit(struct dx_entry *entries)
469 {
470 	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
471 }
472 
473 static inline void dx_set_count(struct dx_entry *entries, unsigned value)
474 {
475 	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
476 }
477 
478 static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
479 {
480 	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
481 }
482 
483 static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
484 {
485 	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
486 		EXT4_DIR_REC_LEN(2) - infosize;
487 
488 	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
489 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
490 		entry_space -= sizeof(struct dx_tail);
491 	return entry_space / sizeof(struct dx_entry);
492 }
493 
494 static inline unsigned dx_node_limit(struct inode *dir)
495 {
496 	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
497 
498 	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
499 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
500 		entry_space -= sizeof(struct dx_tail);
501 	return entry_space / sizeof(struct dx_entry);
502 }
503 
504 /*
505  * Debug
506  */
507 #ifdef DX_DEBUG
508 static void dx_show_index(char * label, struct dx_entry *entries)
509 {
510 	int i, n = dx_get_count (entries);
511 	printk(KERN_DEBUG "%s index ", label);
512 	for (i = 0; i < n; i++) {
513 		printk("%x->%lu ", i ? dx_get_hash(entries + i) :
514 				0, (unsigned long)dx_get_block(entries + i));
515 	}
516 	printk("\n");
517 }
518 
519 struct stats
520 {
521 	unsigned names;
522 	unsigned space;
523 	unsigned bcount;
524 };
525 
526 static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
527 				 int size, int show_names)
528 {
529 	unsigned names = 0, space = 0;
530 	char *base = (char *) de;
531 	struct dx_hash_info h = *hinfo;
532 
533 	printk("names: ");
534 	while ((char *) de < base + size)
535 	{
536 		if (de->inode)
537 		{
538 			if (show_names)
539 			{
540 				int len = de->name_len;
541 				char *name = de->name;
542 				while (len--) printk("%c", *name++);
543 				ext4fs_dirhash(de->name, de->name_len, &h);
544 				printk(":%x.%u ", h.hash,
545 				       (unsigned) ((char *) de - base));
546 			}
547 			space += EXT4_DIR_REC_LEN(de->name_len);
548 			names++;
549 		}
550 		de = ext4_next_entry(de, size);
551 	}
552 	printk("(%i)\n", names);
553 	return (struct stats) { names, space, 1 };
554 }
555 
556 struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
557 			     struct dx_entry *entries, int levels)
558 {
559 	unsigned blocksize = dir->i_sb->s_blocksize;
560 	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
561 	unsigned bcount = 0;
562 	struct buffer_head *bh;
563 	int err;
564 	printk("%i indexed blocks...\n", count);
565 	for (i = 0; i < count; i++, entries++)
566 	{
567 		ext4_lblk_t block = dx_get_block(entries);
568 		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
569 		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
570 		struct stats stats;
571 		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
572 		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
573 		stats = levels?
574 		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
575 		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
576 		names += stats.names;
577 		space += stats.space;
578 		bcount += stats.bcount;
579 		brelse(bh);
580 	}
581 	if (bcount)
582 		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
583 		       levels ? "" : "   ", names, space/bcount,
584 		       (space/bcount)*100/blocksize);
585 	return (struct stats) { names, space, bcount};
586 }
587 #endif /* DX_DEBUG */
588 
589 /*
590  * Probe for a directory leaf block to search.
591  *
592  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
593  * error in the directory index, and the caller should fall back to
594  * searching the directory normally.  The callers of dx_probe **MUST**
595  * check for this error code, and make sure it never gets reflected
596  * back to userspace.
597  */
598 static struct dx_frame *
599 dx_probe(const struct qstr *d_name, struct inode *dir,
600 	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
601 {
602 	unsigned count, indirect;
603 	struct dx_entry *at, *entries, *p, *q, *m;
604 	struct dx_root *root;
605 	struct buffer_head *bh;
606 	struct dx_frame *frame = frame_in;
607 	u32 hash;
608 
609 	frame->bh = NULL;
610 	if (!(bh = ext4_bread(NULL, dir, 0, 0, err))) {
611 		if (*err == 0)
612 			*err = ERR_BAD_DX_DIR;
613 		goto fail;
614 	}
615 	root = (struct dx_root *) bh->b_data;
616 	if (root->info.hash_version != DX_HASH_TEA &&
617 	    root->info.hash_version != DX_HASH_HALF_MD4 &&
618 	    root->info.hash_version != DX_HASH_LEGACY) {
619 		ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
620 			     root->info.hash_version);
621 		brelse(bh);
622 		*err = ERR_BAD_DX_DIR;
623 		goto fail;
624 	}
625 	hinfo->hash_version = root->info.hash_version;
626 	if (hinfo->hash_version <= DX_HASH_TEA)
627 		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
628 	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
629 	if (d_name)
630 		ext4fs_dirhash(d_name->name, d_name->len, hinfo);
631 	hash = hinfo->hash;
632 
633 	if (root->info.unused_flags & 1) {
634 		ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
635 			     root->info.unused_flags);
636 		brelse(bh);
637 		*err = ERR_BAD_DX_DIR;
638 		goto fail;
639 	}
640 
641 	if ((indirect = root->info.indirect_levels) > 1) {
642 		ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
643 			     root->info.indirect_levels);
644 		brelse(bh);
645 		*err = ERR_BAD_DX_DIR;
646 		goto fail;
647 	}
648 
649 	if (!buffer_verified(bh) &&
650 	    !ext4_dx_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data)) {
651 		ext4_warning(dir->i_sb, "Root failed checksum");
652 		brelse(bh);
653 		*err = ERR_BAD_DX_DIR;
654 		goto fail;
655 	}
656 	set_buffer_verified(bh);
657 
658 	entries = (struct dx_entry *) (((char *)&root->info) +
659 				       root->info.info_length);
660 
661 	if (dx_get_limit(entries) != dx_root_limit(dir,
662 						   root->info.info_length)) {
663 		ext4_warning(dir->i_sb, "dx entry: limit != root limit");
664 		brelse(bh);
665 		*err = ERR_BAD_DX_DIR;
666 		goto fail;
667 	}
668 
669 	dxtrace(printk("Look up %x", hash));
670 	while (1)
671 	{
672 		count = dx_get_count(entries);
673 		if (!count || count > dx_get_limit(entries)) {
674 			ext4_warning(dir->i_sb,
675 				     "dx entry: no count or count > limit");
676 			brelse(bh);
677 			*err = ERR_BAD_DX_DIR;
678 			goto fail2;
679 		}
680 
681 		p = entries + 1;
682 		q = entries + count - 1;
683 		while (p <= q)
684 		{
685 			m = p + (q - p)/2;
686 			dxtrace(printk("."));
687 			if (dx_get_hash(m) > hash)
688 				q = m - 1;
689 			else
690 				p = m + 1;
691 		}
692 
693 		if (0) // linear search cross check
694 		{
695 			unsigned n = count - 1;
696 			at = entries;
697 			while (n--)
698 			{
699 				dxtrace(printk(","));
700 				if (dx_get_hash(++at) > hash)
701 				{
702 					at--;
703 					break;
704 				}
705 			}
706 			assert (at == p - 1);
707 		}
708 
709 		at = p - 1;
710 		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
711 		frame->bh = bh;
712 		frame->entries = entries;
713 		frame->at = at;
714 		if (!indirect--) return frame;
715 		if (!(bh = ext4_bread(NULL, dir, dx_get_block(at), 0, err))) {
716 			if (!(*err))
717 				*err = ERR_BAD_DX_DIR;
718 			goto fail2;
719 		}
720 		at = entries = ((struct dx_node *) bh->b_data)->entries;
721 
722 		if (!buffer_verified(bh) &&
723 		    !ext4_dx_csum_verify(dir,
724 					 (struct ext4_dir_entry *)bh->b_data)) {
725 			ext4_warning(dir->i_sb, "Node failed checksum");
726 			brelse(bh);
727 			*err = ERR_BAD_DX_DIR;
728 			goto fail;
729 		}
730 		set_buffer_verified(bh);
731 
732 		if (dx_get_limit(entries) != dx_node_limit (dir)) {
733 			ext4_warning(dir->i_sb,
734 				     "dx entry: limit != node limit");
735 			brelse(bh);
736 			*err = ERR_BAD_DX_DIR;
737 			goto fail2;
738 		}
739 		frame++;
740 		frame->bh = NULL;
741 	}
742 fail2:
743 	while (frame >= frame_in) {
744 		brelse(frame->bh);
745 		frame--;
746 	}
747 fail:
748 	if (*err == ERR_BAD_DX_DIR)
749 		ext4_warning(dir->i_sb,
750 			     "Corrupt dir inode %lu, running e2fsck is "
751 			     "recommended.", dir->i_ino);
752 	return NULL;
753 }
754 
755 static void dx_release (struct dx_frame *frames)
756 {
757 	if (frames[0].bh == NULL)
758 		return;
759 
760 	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
761 		brelse(frames[1].bh);
762 	brelse(frames[0].bh);
763 }
764 
765 /*
766  * This function increments the frame pointer to search the next leaf
767  * block, and reads in the necessary intervening nodes if the search
768  * should be necessary.  Whether or not the search is necessary is
769  * controlled by the hash parameter.  If the hash value is even, then
770  * the search is only continued if the next block starts with that
771  * hash value.  This is used if we are searching for a specific file.
772  *
773  * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
774  *
775  * This function returns 1 if the caller should continue to search,
776  * or 0 if it should not.  If there is an error reading one of the
777  * index blocks, it will a negative error code.
778  *
779  * If start_hash is non-null, it will be filled in with the starting
780  * hash of the next page.
781  */
782 static int ext4_htree_next_block(struct inode *dir, __u32 hash,
783 				 struct dx_frame *frame,
784 				 struct dx_frame *frames,
785 				 __u32 *start_hash)
786 {
787 	struct dx_frame *p;
788 	struct buffer_head *bh;
789 	int err, num_frames = 0;
790 	__u32 bhash;
791 
792 	p = frame;
793 	/*
794 	 * Find the next leaf page by incrementing the frame pointer.
795 	 * If we run out of entries in the interior node, loop around and
796 	 * increment pointer in the parent node.  When we break out of
797 	 * this loop, num_frames indicates the number of interior
798 	 * nodes need to be read.
799 	 */
800 	while (1) {
801 		if (++(p->at) < p->entries + dx_get_count(p->entries))
802 			break;
803 		if (p == frames)
804 			return 0;
805 		num_frames++;
806 		p--;
807 	}
808 
809 	/*
810 	 * If the hash is 1, then continue only if the next page has a
811 	 * continuation hash of any value.  This is used for readdir
812 	 * handling.  Otherwise, check to see if the hash matches the
813 	 * desired contiuation hash.  If it doesn't, return since
814 	 * there's no point to read in the successive index pages.
815 	 */
816 	bhash = dx_get_hash(p->at);
817 	if (start_hash)
818 		*start_hash = bhash;
819 	if ((hash & 1) == 0) {
820 		if ((bhash & ~1) != hash)
821 			return 0;
822 	}
823 	/*
824 	 * If the hash is HASH_NB_ALWAYS, we always go to the next
825 	 * block so no check is necessary
826 	 */
827 	while (num_frames--) {
828 		if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
829 				      0, &err))) {
830 			if (!err) {
831 				ext4_error(dir->i_sb,
832 					   "Directory hole detected on inode %lu\n",
833 					   dir->i_ino);
834 				return -EIO;
835 			}
836 			return err; /* Failure */
837 		}
838 
839 		if (!buffer_verified(bh) &&
840 		    !ext4_dx_csum_verify(dir,
841 					 (struct ext4_dir_entry *)bh->b_data)) {
842 			ext4_warning(dir->i_sb, "Node failed checksum");
843 			return -EIO;
844 		}
845 		set_buffer_verified(bh);
846 
847 		p++;
848 		brelse(p->bh);
849 		p->bh = bh;
850 		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
851 	}
852 	return 1;
853 }
854 
855 
856 /*
857  * This function fills a red-black tree with information from a
858  * directory block.  It returns the number directory entries loaded
859  * into the tree.  If there is an error it is returned in err.
860  */
861 static int htree_dirblock_to_tree(struct file *dir_file,
862 				  struct inode *dir, ext4_lblk_t block,
863 				  struct dx_hash_info *hinfo,
864 				  __u32 start_hash, __u32 start_minor_hash)
865 {
866 	struct buffer_head *bh;
867 	struct ext4_dir_entry_2 *de, *top;
868 	int err = 0, count = 0;
869 
870 	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
871 							(unsigned long)block));
872 	if (!(bh = ext4_bread(NULL, dir, block, 0, &err))) {
873 		if (!err) {
874 			err = -EIO;
875 			ext4_error(dir->i_sb,
876 				   "Directory hole detected on inode %lu\n",
877 				   dir->i_ino);
878 		}
879 		return err;
880 	}
881 
882 	if (!buffer_verified(bh) &&
883 	    !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
884 		return -EIO;
885 	set_buffer_verified(bh);
886 
887 	de = (struct ext4_dir_entry_2 *) bh->b_data;
888 	top = (struct ext4_dir_entry_2 *) ((char *) de +
889 					   dir->i_sb->s_blocksize -
890 					   EXT4_DIR_REC_LEN(0));
891 	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
892 		if (ext4_check_dir_entry(dir, NULL, de, bh,
893 				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
894 					 + ((char *)de - bh->b_data))) {
895 			/* On error, skip the f_pos to the next block. */
896 			dir_file->f_pos = (dir_file->f_pos |
897 					(dir->i_sb->s_blocksize - 1)) + 1;
898 			brelse(bh);
899 			return count;
900 		}
901 		ext4fs_dirhash(de->name, de->name_len, hinfo);
902 		if ((hinfo->hash < start_hash) ||
903 		    ((hinfo->hash == start_hash) &&
904 		     (hinfo->minor_hash < start_minor_hash)))
905 			continue;
906 		if (de->inode == 0)
907 			continue;
908 		if ((err = ext4_htree_store_dirent(dir_file,
909 				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
910 			brelse(bh);
911 			return err;
912 		}
913 		count++;
914 	}
915 	brelse(bh);
916 	return count;
917 }
918 
919 
920 /*
921  * This function fills a red-black tree with information from a
922  * directory.  We start scanning the directory in hash order, starting
923  * at start_hash and start_minor_hash.
924  *
925  * This function returns the number of entries inserted into the tree,
926  * or a negative error code.
927  */
928 int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
929 			 __u32 start_minor_hash, __u32 *next_hash)
930 {
931 	struct dx_hash_info hinfo;
932 	struct ext4_dir_entry_2 *de;
933 	struct dx_frame frames[2], *frame;
934 	struct inode *dir;
935 	ext4_lblk_t block;
936 	int count = 0;
937 	int ret, err;
938 	__u32 hashval;
939 
940 	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
941 		       start_hash, start_minor_hash));
942 	dir = dir_file->f_path.dentry->d_inode;
943 	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
944 		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
945 		if (hinfo.hash_version <= DX_HASH_TEA)
946 			hinfo.hash_version +=
947 				EXT4_SB(dir->i_sb)->s_hash_unsigned;
948 		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
949 		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
950 					       start_hash, start_minor_hash);
951 		*next_hash = ~0;
952 		return count;
953 	}
954 	hinfo.hash = start_hash;
955 	hinfo.minor_hash = 0;
956 	frame = dx_probe(NULL, dir, &hinfo, frames, &err);
957 	if (!frame)
958 		return err;
959 
960 	/* Add '.' and '..' from the htree header */
961 	if (!start_hash && !start_minor_hash) {
962 		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
963 		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
964 			goto errout;
965 		count++;
966 	}
967 	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
968 		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
969 		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
970 		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
971 			goto errout;
972 		count++;
973 	}
974 
975 	while (1) {
976 		block = dx_get_block(frame->at);
977 		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
978 					     start_hash, start_minor_hash);
979 		if (ret < 0) {
980 			err = ret;
981 			goto errout;
982 		}
983 		count += ret;
984 		hashval = ~0;
985 		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
986 					    frame, frames, &hashval);
987 		*next_hash = hashval;
988 		if (ret < 0) {
989 			err = ret;
990 			goto errout;
991 		}
992 		/*
993 		 * Stop if:  (a) there are no more entries, or
994 		 * (b) we have inserted at least one entry and the
995 		 * next hash value is not a continuation
996 		 */
997 		if ((ret == 0) ||
998 		    (count && ((hashval & 1) == 0)))
999 			break;
1000 	}
1001 	dx_release(frames);
1002 	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1003 		       "next hash: %x\n", count, *next_hash));
1004 	return count;
1005 errout:
1006 	dx_release(frames);
1007 	return (err);
1008 }
1009 
1010 
1011 /*
1012  * Directory block splitting, compacting
1013  */
1014 
1015 /*
1016  * Create map of hash values, offsets, and sizes, stored at end of block.
1017  * Returns number of entries mapped.
1018  */
1019 static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
1020 		       struct dx_hash_info *hinfo,
1021 		       struct dx_map_entry *map_tail)
1022 {
1023 	int count = 0;
1024 	char *base = (char *) de;
1025 	struct dx_hash_info h = *hinfo;
1026 
1027 	while ((char *) de < base + blocksize) {
1028 		if (de->name_len && de->inode) {
1029 			ext4fs_dirhash(de->name, de->name_len, &h);
1030 			map_tail--;
1031 			map_tail->hash = h.hash;
1032 			map_tail->offs = ((char *) de - base)>>2;
1033 			map_tail->size = le16_to_cpu(de->rec_len);
1034 			count++;
1035 			cond_resched();
1036 		}
1037 		/* XXX: do we need to check rec_len == 0 case? -Chris */
1038 		de = ext4_next_entry(de, blocksize);
1039 	}
1040 	return count;
1041 }
1042 
1043 /* Sort map by hash value */
1044 static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1045 {
1046 	struct dx_map_entry *p, *q, *top = map + count - 1;
1047 	int more;
1048 	/* Combsort until bubble sort doesn't suck */
1049 	while (count > 2) {
1050 		count = count*10/13;
1051 		if (count - 9 < 2) /* 9, 10 -> 11 */
1052 			count = 11;
1053 		for (p = top, q = p - count; q >= map; p--, q--)
1054 			if (p->hash < q->hash)
1055 				swap(*p, *q);
1056 	}
1057 	/* Garden variety bubble sort */
1058 	do {
1059 		more = 0;
1060 		q = top;
1061 		while (q-- > map) {
1062 			if (q[1].hash >= q[0].hash)
1063 				continue;
1064 			swap(*(q+1), *q);
1065 			more = 1;
1066 		}
1067 	} while(more);
1068 }
1069 
1070 static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1071 {
1072 	struct dx_entry *entries = frame->entries;
1073 	struct dx_entry *old = frame->at, *new = old + 1;
1074 	int count = dx_get_count(entries);
1075 
1076 	assert(count < dx_get_limit(entries));
1077 	assert(old < entries + count);
1078 	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1079 	dx_set_hash(new, hash);
1080 	dx_set_block(new, block);
1081 	dx_set_count(entries, count + 1);
1082 }
1083 
1084 static void ext4_update_dx_flag(struct inode *inode)
1085 {
1086 	if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
1087 				     EXT4_FEATURE_COMPAT_DIR_INDEX))
1088 		ext4_clear_inode_flag(inode, EXT4_INODE_INDEX);
1089 }
1090 
1091 /*
1092  * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
1093  *
1094  * `len <= EXT4_NAME_LEN' is guaranteed by caller.
1095  * `de != NULL' is guaranteed by caller.
1096  */
1097 static inline int ext4_match (int len, const char * const name,
1098 			      struct ext4_dir_entry_2 * de)
1099 {
1100 	if (len != de->name_len)
1101 		return 0;
1102 	if (!de->inode)
1103 		return 0;
1104 	return !memcmp(name, de->name, len);
1105 }
1106 
1107 /*
1108  * Returns 0 if not found, -1 on failure, and 1 on success
1109  */
1110 static inline int search_dirblock(struct buffer_head *bh,
1111 				  struct inode *dir,
1112 				  const struct qstr *d_name,
1113 				  unsigned int offset,
1114 				  struct ext4_dir_entry_2 ** res_dir)
1115 {
1116 	struct ext4_dir_entry_2 * de;
1117 	char * dlimit;
1118 	int de_len;
1119 	const char *name = d_name->name;
1120 	int namelen = d_name->len;
1121 
1122 	de = (struct ext4_dir_entry_2 *) bh->b_data;
1123 	dlimit = bh->b_data + dir->i_sb->s_blocksize;
1124 	while ((char *) de < dlimit) {
1125 		/* this code is executed quadratically often */
1126 		/* do minimal checking `by hand' */
1127 
1128 		if ((char *) de + namelen <= dlimit &&
1129 		    ext4_match (namelen, name, de)) {
1130 			/* found a match - just to be sure, do a full check */
1131 			if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
1132 				return -1;
1133 			*res_dir = de;
1134 			return 1;
1135 		}
1136 		/* prevent looping on a bad block */
1137 		de_len = ext4_rec_len_from_disk(de->rec_len,
1138 						dir->i_sb->s_blocksize);
1139 		if (de_len <= 0)
1140 			return -1;
1141 		offset += de_len;
1142 		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1143 	}
1144 	return 0;
1145 }
1146 
1147 
1148 /*
1149  *	ext4_find_entry()
1150  *
1151  * finds an entry in the specified directory with the wanted name. It
1152  * returns the cache buffer in which the entry was found, and the entry
1153  * itself (as a parameter - res_dir). It does NOT read the inode of the
1154  * entry - you'll have to do that yourself if you want to.
1155  *
1156  * The returned buffer_head has ->b_count elevated.  The caller is expected
1157  * to brelse() it when appropriate.
1158  */
1159 static struct buffer_head * ext4_find_entry (struct inode *dir,
1160 					const struct qstr *d_name,
1161 					struct ext4_dir_entry_2 ** res_dir)
1162 {
1163 	struct super_block *sb;
1164 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
1165 	struct buffer_head *bh, *ret = NULL;
1166 	ext4_lblk_t start, block, b;
1167 	const u8 *name = d_name->name;
1168 	int ra_max = 0;		/* Number of bh's in the readahead
1169 				   buffer, bh_use[] */
1170 	int ra_ptr = 0;		/* Current index into readahead
1171 				   buffer */
1172 	int num = 0;
1173 	ext4_lblk_t  nblocks;
1174 	int i, err;
1175 	int namelen;
1176 
1177 	*res_dir = NULL;
1178 	sb = dir->i_sb;
1179 	namelen = d_name->len;
1180 	if (namelen > EXT4_NAME_LEN)
1181 		return NULL;
1182 	if ((namelen <= 2) && (name[0] == '.') &&
1183 	    (name[1] == '.' || name[1] == '\0')) {
1184 		/*
1185 		 * "." or ".." will only be in the first block
1186 		 * NFS may look up ".."; "." should be handled by the VFS
1187 		 */
1188 		block = start = 0;
1189 		nblocks = 1;
1190 		goto restart;
1191 	}
1192 	if (is_dx(dir)) {
1193 		bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
1194 		/*
1195 		 * On success, or if the error was file not found,
1196 		 * return.  Otherwise, fall back to doing a search the
1197 		 * old fashioned way.
1198 		 */
1199 		if (bh || (err != ERR_BAD_DX_DIR))
1200 			return bh;
1201 		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1202 			       "falling back\n"));
1203 	}
1204 	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1205 	start = EXT4_I(dir)->i_dir_start_lookup;
1206 	if (start >= nblocks)
1207 		start = 0;
1208 	block = start;
1209 restart:
1210 	do {
1211 		/*
1212 		 * We deal with the read-ahead logic here.
1213 		 */
1214 		if (ra_ptr >= ra_max) {
1215 			/* Refill the readahead buffer */
1216 			ra_ptr = 0;
1217 			b = block;
1218 			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
1219 				/*
1220 				 * Terminate if we reach the end of the
1221 				 * directory and must wrap, or if our
1222 				 * search has finished at this block.
1223 				 */
1224 				if (b >= nblocks || (num && block == start)) {
1225 					bh_use[ra_max] = NULL;
1226 					break;
1227 				}
1228 				num++;
1229 				bh = ext4_getblk(NULL, dir, b++, 0, &err);
1230 				bh_use[ra_max] = bh;
1231 				if (bh)
1232 					ll_rw_block(READ | REQ_META | REQ_PRIO,
1233 						    1, &bh);
1234 			}
1235 		}
1236 		if ((bh = bh_use[ra_ptr++]) == NULL)
1237 			goto next;
1238 		wait_on_buffer(bh);
1239 		if (!buffer_uptodate(bh)) {
1240 			/* read error, skip block & hope for the best */
1241 			EXT4_ERROR_INODE(dir, "reading directory lblock %lu",
1242 					 (unsigned long) block);
1243 			brelse(bh);
1244 			goto next;
1245 		}
1246 		if (!buffer_verified(bh) &&
1247 		    !ext4_dirent_csum_verify(dir,
1248 				(struct ext4_dir_entry *)bh->b_data)) {
1249 			EXT4_ERROR_INODE(dir, "checksumming directory "
1250 					 "block %lu", (unsigned long)block);
1251 			brelse(bh);
1252 			goto next;
1253 		}
1254 		set_buffer_verified(bh);
1255 		i = search_dirblock(bh, dir, d_name,
1256 			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1257 		if (i == 1) {
1258 			EXT4_I(dir)->i_dir_start_lookup = block;
1259 			ret = bh;
1260 			goto cleanup_and_exit;
1261 		} else {
1262 			brelse(bh);
1263 			if (i < 0)
1264 				goto cleanup_and_exit;
1265 		}
1266 	next:
1267 		if (++block >= nblocks)
1268 			block = 0;
1269 	} while (block != start);
1270 
1271 	/*
1272 	 * If the directory has grown while we were searching, then
1273 	 * search the last part of the directory before giving up.
1274 	 */
1275 	block = nblocks;
1276 	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1277 	if (block < nblocks) {
1278 		start = 0;
1279 		goto restart;
1280 	}
1281 
1282 cleanup_and_exit:
1283 	/* Clean up the read-ahead blocks */
1284 	for (; ra_ptr < ra_max; ra_ptr++)
1285 		brelse(bh_use[ra_ptr]);
1286 	return ret;
1287 }
1288 
1289 static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1290 		       struct ext4_dir_entry_2 **res_dir, int *err)
1291 {
1292 	struct super_block * sb = dir->i_sb;
1293 	struct dx_hash_info	hinfo;
1294 	struct dx_frame frames[2], *frame;
1295 	struct buffer_head *bh;
1296 	ext4_lblk_t block;
1297 	int retval;
1298 
1299 	if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
1300 		return NULL;
1301 	do {
1302 		block = dx_get_block(frame->at);
1303 		if (!(bh = ext4_bread(NULL, dir, block, 0, err))) {
1304 			if (!(*err)) {
1305 				*err = -EIO;
1306 				ext4_error(dir->i_sb,
1307 					   "Directory hole detected on inode %lu\n",
1308 					   dir->i_ino);
1309 			}
1310 			goto errout;
1311 		}
1312 
1313 		if (!buffer_verified(bh) &&
1314 		    !ext4_dirent_csum_verify(dir,
1315 				(struct ext4_dir_entry *)bh->b_data)) {
1316 			EXT4_ERROR_INODE(dir, "checksumming directory "
1317 					 "block %lu", (unsigned long)block);
1318 			brelse(bh);
1319 			*err = -EIO;
1320 			goto errout;
1321 		}
1322 		set_buffer_verified(bh);
1323 		retval = search_dirblock(bh, dir, d_name,
1324 					 block << EXT4_BLOCK_SIZE_BITS(sb),
1325 					 res_dir);
1326 		if (retval == 1) { 	/* Success! */
1327 			dx_release(frames);
1328 			return bh;
1329 		}
1330 		brelse(bh);
1331 		if (retval == -1) {
1332 			*err = ERR_BAD_DX_DIR;
1333 			goto errout;
1334 		}
1335 
1336 		/* Check to see if we should continue to search */
1337 		retval = ext4_htree_next_block(dir, hinfo.hash, frame,
1338 					       frames, NULL);
1339 		if (retval < 0) {
1340 			ext4_warning(sb,
1341 			     "error reading index page in directory #%lu",
1342 			     dir->i_ino);
1343 			*err = retval;
1344 			goto errout;
1345 		}
1346 	} while (retval == 1);
1347 
1348 	*err = -ENOENT;
1349 errout:
1350 	dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1351 	dx_release (frames);
1352 	return NULL;
1353 }
1354 
1355 static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1356 {
1357 	struct inode *inode;
1358 	struct ext4_dir_entry_2 *de;
1359 	struct buffer_head *bh;
1360 
1361 	if (dentry->d_name.len > EXT4_NAME_LEN)
1362 		return ERR_PTR(-ENAMETOOLONG);
1363 
1364 	bh = ext4_find_entry(dir, &dentry->d_name, &de);
1365 	inode = NULL;
1366 	if (bh) {
1367 		__u32 ino = le32_to_cpu(de->inode);
1368 		brelse(bh);
1369 		if (!ext4_valid_inum(dir->i_sb, ino)) {
1370 			EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1371 			return ERR_PTR(-EIO);
1372 		}
1373 		if (unlikely(ino == dir->i_ino)) {
1374 			EXT4_ERROR_INODE(dir, "'%.*s' linked to parent dir",
1375 					 dentry->d_name.len,
1376 					 dentry->d_name.name);
1377 			return ERR_PTR(-EIO);
1378 		}
1379 		inode = ext4_iget(dir->i_sb, ino);
1380 		if (inode == ERR_PTR(-ESTALE)) {
1381 			EXT4_ERROR_INODE(dir,
1382 					 "deleted inode referenced: %u",
1383 					 ino);
1384 			return ERR_PTR(-EIO);
1385 		}
1386 	}
1387 	return d_splice_alias(inode, dentry);
1388 }
1389 
1390 
1391 struct dentry *ext4_get_parent(struct dentry *child)
1392 {
1393 	__u32 ino;
1394 	static const struct qstr dotdot = QSTR_INIT("..", 2);
1395 	struct ext4_dir_entry_2 * de;
1396 	struct buffer_head *bh;
1397 
1398 	bh = ext4_find_entry(child->d_inode, &dotdot, &de);
1399 	if (!bh)
1400 		return ERR_PTR(-ENOENT);
1401 	ino = le32_to_cpu(de->inode);
1402 	brelse(bh);
1403 
1404 	if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
1405 		EXT4_ERROR_INODE(child->d_inode,
1406 				 "bad parent inode number: %u", ino);
1407 		return ERR_PTR(-EIO);
1408 	}
1409 
1410 	return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino));
1411 }
1412 
1413 #define S_SHIFT 12
1414 static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
1415 	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
1416 	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
1417 	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
1418 	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
1419 	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
1420 	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
1421 	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
1422 };
1423 
1424 static inline void ext4_set_de_type(struct super_block *sb,
1425 				struct ext4_dir_entry_2 *de,
1426 				umode_t mode) {
1427 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
1428 		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
1429 }
1430 
1431 /*
1432  * Move count entries from end of map between two memory locations.
1433  * Returns pointer to last entry moved.
1434  */
1435 static struct ext4_dir_entry_2 *
1436 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count,
1437 		unsigned blocksize)
1438 {
1439 	unsigned rec_len = 0;
1440 
1441 	while (count--) {
1442 		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1443 						(from + (map->offs<<2));
1444 		rec_len = EXT4_DIR_REC_LEN(de->name_len);
1445 		memcpy (to, de, rec_len);
1446 		((struct ext4_dir_entry_2 *) to)->rec_len =
1447 				ext4_rec_len_to_disk(rec_len, blocksize);
1448 		de->inode = 0;
1449 		map++;
1450 		to += rec_len;
1451 	}
1452 	return (struct ext4_dir_entry_2 *) (to - rec_len);
1453 }
1454 
1455 /*
1456  * Compact each dir entry in the range to the minimal rec_len.
1457  * Returns pointer to last entry in range.
1458  */
1459 static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1460 {
1461 	struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1462 	unsigned rec_len = 0;
1463 
1464 	prev = to = de;
1465 	while ((char*)de < base + blocksize) {
1466 		next = ext4_next_entry(de, blocksize);
1467 		if (de->inode && de->name_len) {
1468 			rec_len = EXT4_DIR_REC_LEN(de->name_len);
1469 			if (de > to)
1470 				memmove(to, de, rec_len);
1471 			to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1472 			prev = to;
1473 			to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1474 		}
1475 		de = next;
1476 	}
1477 	return prev;
1478 }
1479 
1480 /*
1481  * Split a full leaf block to make room for a new dir entry.
1482  * Allocate a new block, and move entries so that they are approx. equally full.
1483  * Returns pointer to de in block into which the new entry will be inserted.
1484  */
1485 static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1486 			struct buffer_head **bh,struct dx_frame *frame,
1487 			struct dx_hash_info *hinfo, int *error)
1488 {
1489 	unsigned blocksize = dir->i_sb->s_blocksize;
1490 	unsigned count, continued;
1491 	struct buffer_head *bh2;
1492 	ext4_lblk_t newblock;
1493 	u32 hash2;
1494 	struct dx_map_entry *map;
1495 	char *data1 = (*bh)->b_data, *data2;
1496 	unsigned split, move, size;
1497 	struct ext4_dir_entry_2 *de = NULL, *de2;
1498 	struct ext4_dir_entry_tail *t;
1499 	int	csum_size = 0;
1500 	int	err = 0, i;
1501 
1502 	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
1503 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1504 		csum_size = sizeof(struct ext4_dir_entry_tail);
1505 
1506 	bh2 = ext4_append (handle, dir, &newblock, &err);
1507 	if (!(bh2)) {
1508 		brelse(*bh);
1509 		*bh = NULL;
1510 		goto errout;
1511 	}
1512 
1513 	BUFFER_TRACE(*bh, "get_write_access");
1514 	err = ext4_journal_get_write_access(handle, *bh);
1515 	if (err)
1516 		goto journal_error;
1517 
1518 	BUFFER_TRACE(frame->bh, "get_write_access");
1519 	err = ext4_journal_get_write_access(handle, frame->bh);
1520 	if (err)
1521 		goto journal_error;
1522 
1523 	data2 = bh2->b_data;
1524 
1525 	/* create map in the end of data2 block */
1526 	map = (struct dx_map_entry *) (data2 + blocksize);
1527 	count = dx_make_map((struct ext4_dir_entry_2 *) data1,
1528 			     blocksize, hinfo, map);
1529 	map -= count;
1530 	dx_sort_map(map, count);
1531 	/* Split the existing block in the middle, size-wise */
1532 	size = 0;
1533 	move = 0;
1534 	for (i = count-1; i >= 0; i--) {
1535 		/* is more than half of this entry in 2nd half of the block? */
1536 		if (size + map[i].size/2 > blocksize/2)
1537 			break;
1538 		size += map[i].size;
1539 		move++;
1540 	}
1541 	/* map index at which we will split */
1542 	split = count - move;
1543 	hash2 = map[split].hash;
1544 	continued = hash2 == map[split - 1].hash;
1545 	dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
1546 			(unsigned long)dx_get_block(frame->at),
1547 					hash2, split, count-split));
1548 
1549 	/* Fancy dance to stay within two buffers */
1550 	de2 = dx_move_dirents(data1, data2, map + split, count - split, blocksize);
1551 	de = dx_pack_dirents(data1, blocksize);
1552 	de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1553 					   (char *) de,
1554 					   blocksize);
1555 	de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
1556 					    (char *) de2,
1557 					    blocksize);
1558 	if (csum_size) {
1559 		t = EXT4_DIRENT_TAIL(data2, blocksize);
1560 		initialize_dirent_tail(t, blocksize);
1561 
1562 		t = EXT4_DIRENT_TAIL(data1, blocksize);
1563 		initialize_dirent_tail(t, blocksize);
1564 	}
1565 
1566 	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
1567 	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1568 
1569 	/* Which block gets the new entry? */
1570 	if (hinfo->hash >= hash2)
1571 	{
1572 		swap(*bh, bh2);
1573 		de = de2;
1574 	}
1575 	dx_insert_block(frame, hash2 + continued, newblock);
1576 	err = ext4_handle_dirty_dirent_node(handle, dir, bh2);
1577 	if (err)
1578 		goto journal_error;
1579 	err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1580 	if (err)
1581 		goto journal_error;
1582 	brelse(bh2);
1583 	dxtrace(dx_show_index("frame", frame->entries));
1584 	return de;
1585 
1586 journal_error:
1587 	brelse(*bh);
1588 	brelse(bh2);
1589 	*bh = NULL;
1590 	ext4_std_error(dir->i_sb, err);
1591 errout:
1592 	*error = err;
1593 	return NULL;
1594 }
1595 
1596 /*
1597  * Add a new entry into a directory (leaf) block.  If de is non-NULL,
1598  * it points to a directory entry which is guaranteed to be large
1599  * enough for new directory entry.  If de is NULL, then
1600  * add_dirent_to_buf will attempt search the directory block for
1601  * space.  It will return -ENOSPC if no space is available, and -EIO
1602  * and -EEXIST if directory entry already exists.
1603  */
1604 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1605 			     struct inode *inode, struct ext4_dir_entry_2 *de,
1606 			     struct buffer_head *bh)
1607 {
1608 	struct inode	*dir = dentry->d_parent->d_inode;
1609 	const char	*name = dentry->d_name.name;
1610 	int		namelen = dentry->d_name.len;
1611 	unsigned int	offset = 0;
1612 	unsigned int	blocksize = dir->i_sb->s_blocksize;
1613 	unsigned short	reclen;
1614 	int		nlen, rlen, err;
1615 	char		*top;
1616 	int		csum_size = 0;
1617 
1618 	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1619 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1620 		csum_size = sizeof(struct ext4_dir_entry_tail);
1621 
1622 	reclen = EXT4_DIR_REC_LEN(namelen);
1623 	if (!de) {
1624 		de = (struct ext4_dir_entry_2 *)bh->b_data;
1625 		top = bh->b_data + (blocksize - csum_size) - reclen;
1626 		while ((char *) de <= top) {
1627 			if (ext4_check_dir_entry(dir, NULL, de, bh, offset))
1628 				return -EIO;
1629 			if (ext4_match(namelen, name, de))
1630 				return -EEXIST;
1631 			nlen = EXT4_DIR_REC_LEN(de->name_len);
1632 			rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1633 			if ((de->inode? rlen - nlen: rlen) >= reclen)
1634 				break;
1635 			de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
1636 			offset += rlen;
1637 		}
1638 		if ((char *) de > top)
1639 			return -ENOSPC;
1640 	}
1641 	BUFFER_TRACE(bh, "get_write_access");
1642 	err = ext4_journal_get_write_access(handle, bh);
1643 	if (err) {
1644 		ext4_std_error(dir->i_sb, err);
1645 		return err;
1646 	}
1647 
1648 	/* By now the buffer is marked for journaling */
1649 	nlen = EXT4_DIR_REC_LEN(de->name_len);
1650 	rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1651 	if (de->inode) {
1652 		struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
1653 		de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, blocksize);
1654 		de->rec_len = ext4_rec_len_to_disk(nlen, blocksize);
1655 		de = de1;
1656 	}
1657 	de->file_type = EXT4_FT_UNKNOWN;
1658 	de->inode = cpu_to_le32(inode->i_ino);
1659 	ext4_set_de_type(dir->i_sb, de, inode->i_mode);
1660 	de->name_len = namelen;
1661 	memcpy(de->name, name, namelen);
1662 	/*
1663 	 * XXX shouldn't update any times until successful
1664 	 * completion of syscall, but too many callers depend
1665 	 * on this.
1666 	 *
1667 	 * XXX similarly, too many callers depend on
1668 	 * ext4_new_inode() setting the times, but error
1669 	 * recovery deletes the inode, so the worst that can
1670 	 * happen is that the times are slightly out of date
1671 	 * and/or different from the directory change time.
1672 	 */
1673 	dir->i_mtime = dir->i_ctime = ext4_current_time(dir);
1674 	ext4_update_dx_flag(dir);
1675 	dir->i_version++;
1676 	ext4_mark_inode_dirty(handle, dir);
1677 	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
1678 	err = ext4_handle_dirty_dirent_node(handle, dir, bh);
1679 	if (err)
1680 		ext4_std_error(dir->i_sb, err);
1681 	return 0;
1682 }
1683 
1684 /*
1685  * This converts a one block unindexed directory to a 3 block indexed
1686  * directory, and adds the dentry to the indexed directory.
1687  */
1688 static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1689 			    struct inode *inode, struct buffer_head *bh)
1690 {
1691 	struct inode	*dir = dentry->d_parent->d_inode;
1692 	const char	*name = dentry->d_name.name;
1693 	int		namelen = dentry->d_name.len;
1694 	struct buffer_head *bh2;
1695 	struct dx_root	*root;
1696 	struct dx_frame	frames[2], *frame;
1697 	struct dx_entry *entries;
1698 	struct ext4_dir_entry_2	*de, *de2;
1699 	struct ext4_dir_entry_tail *t;
1700 	char		*data1, *top;
1701 	unsigned	len;
1702 	int		retval;
1703 	unsigned	blocksize;
1704 	struct dx_hash_info hinfo;
1705 	ext4_lblk_t  block;
1706 	struct fake_dirent *fde;
1707 	int		csum_size = 0;
1708 
1709 	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1710 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1711 		csum_size = sizeof(struct ext4_dir_entry_tail);
1712 
1713 	blocksize =  dir->i_sb->s_blocksize;
1714 	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1715 	retval = ext4_journal_get_write_access(handle, bh);
1716 	if (retval) {
1717 		ext4_std_error(dir->i_sb, retval);
1718 		brelse(bh);
1719 		return retval;
1720 	}
1721 	root = (struct dx_root *) bh->b_data;
1722 
1723 	/* The 0th block becomes the root, move the dirents out */
1724 	fde = &root->dotdot;
1725 	de = (struct ext4_dir_entry_2 *)((char *)fde +
1726 		ext4_rec_len_from_disk(fde->rec_len, blocksize));
1727 	if ((char *) de >= (((char *) root) + blocksize)) {
1728 		EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
1729 		brelse(bh);
1730 		return -EIO;
1731 	}
1732 	len = ((char *) root) + (blocksize - csum_size) - (char *) de;
1733 
1734 	/* Allocate new block for the 0th block's dirents */
1735 	bh2 = ext4_append(handle, dir, &block, &retval);
1736 	if (!(bh2)) {
1737 		brelse(bh);
1738 		return retval;
1739 	}
1740 	ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
1741 	data1 = bh2->b_data;
1742 
1743 	memcpy (data1, de, len);
1744 	de = (struct ext4_dir_entry_2 *) data1;
1745 	top = data1 + len;
1746 	while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top)
1747 		de = de2;
1748 	de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
1749 					   (char *) de,
1750 					   blocksize);
1751 
1752 	if (csum_size) {
1753 		t = EXT4_DIRENT_TAIL(data1, blocksize);
1754 		initialize_dirent_tail(t, blocksize);
1755 	}
1756 
1757 	/* Initialize the root; the dot dirents already exist */
1758 	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
1759 	de->rec_len = ext4_rec_len_to_disk(blocksize - EXT4_DIR_REC_LEN(2),
1760 					   blocksize);
1761 	memset (&root->info, 0, sizeof(root->info));
1762 	root->info.info_length = sizeof(root->info);
1763 	root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
1764 	entries = root->entries;
1765 	dx_set_block(entries, 1);
1766 	dx_set_count(entries, 1);
1767 	dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
1768 
1769 	/* Initialize as for dx_probe */
1770 	hinfo.hash_version = root->info.hash_version;
1771 	if (hinfo.hash_version <= DX_HASH_TEA)
1772 		hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1773 	hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1774 	ext4fs_dirhash(name, namelen, &hinfo);
1775 	frame = frames;
1776 	frame->entries = entries;
1777 	frame->at = entries;
1778 	frame->bh = bh;
1779 	bh = bh2;
1780 
1781 	ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1782 	ext4_handle_dirty_dirent_node(handle, dir, bh);
1783 
1784 	de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
1785 	if (!de) {
1786 		/*
1787 		 * Even if the block split failed, we have to properly write
1788 		 * out all the changes we did so far. Otherwise we can end up
1789 		 * with corrupted filesystem.
1790 		 */
1791 		ext4_mark_inode_dirty(handle, dir);
1792 		dx_release(frames);
1793 		return retval;
1794 	}
1795 	dx_release(frames);
1796 
1797 	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1798 	brelse(bh);
1799 	return retval;
1800 }
1801 
1802 /*
1803  *	ext4_add_entry()
1804  *
1805  * adds a file entry to the specified directory, using the same
1806  * semantics as ext4_find_entry(). It returns NULL if it failed.
1807  *
1808  * NOTE!! The inode part of 'de' is left at 0 - which means you
1809  * may not sleep between calling this and putting something into
1810  * the entry, as someone else might have used it while you slept.
1811  */
1812 static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1813 			  struct inode *inode)
1814 {
1815 	struct inode *dir = dentry->d_parent->d_inode;
1816 	struct buffer_head *bh;
1817 	struct ext4_dir_entry_2 *de;
1818 	struct ext4_dir_entry_tail *t;
1819 	struct super_block *sb;
1820 	int	retval;
1821 	int	dx_fallback=0;
1822 	unsigned blocksize;
1823 	ext4_lblk_t block, blocks;
1824 	int	csum_size = 0;
1825 
1826 	if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
1827 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1828 		csum_size = sizeof(struct ext4_dir_entry_tail);
1829 
1830 	sb = dir->i_sb;
1831 	blocksize = sb->s_blocksize;
1832 	if (!dentry->d_name.len)
1833 		return -EINVAL;
1834 	if (is_dx(dir)) {
1835 		retval = ext4_dx_add_entry(handle, dentry, inode);
1836 		if (!retval || (retval != ERR_BAD_DX_DIR))
1837 			return retval;
1838 		ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
1839 		dx_fallback++;
1840 		ext4_mark_inode_dirty(handle, dir);
1841 	}
1842 	blocks = dir->i_size >> sb->s_blocksize_bits;
1843 	for (block = 0; block < blocks; block++) {
1844 		if (!(bh = ext4_bread(handle, dir, block, 0, &retval))) {
1845 			if (!retval) {
1846 				retval = -EIO;
1847 				ext4_error(inode->i_sb,
1848 					   "Directory hole detected on inode %lu\n",
1849 					   inode->i_ino);
1850 			}
1851 			return retval;
1852 		}
1853 		if (!buffer_verified(bh) &&
1854 		    !ext4_dirent_csum_verify(dir,
1855 				(struct ext4_dir_entry *)bh->b_data))
1856 			return -EIO;
1857 		set_buffer_verified(bh);
1858 		retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1859 		if (retval != -ENOSPC) {
1860 			brelse(bh);
1861 			return retval;
1862 		}
1863 
1864 		if (blocks == 1 && !dx_fallback &&
1865 		    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1866 			return make_indexed_dir(handle, dentry, inode, bh);
1867 		brelse(bh);
1868 	}
1869 	bh = ext4_append(handle, dir, &block, &retval);
1870 	if (!bh)
1871 		return retval;
1872 	de = (struct ext4_dir_entry_2 *) bh->b_data;
1873 	de->inode = 0;
1874 	de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
1875 
1876 	if (csum_size) {
1877 		t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
1878 		initialize_dirent_tail(t, blocksize);
1879 	}
1880 
1881 	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1882 	brelse(bh);
1883 	if (retval == 0)
1884 		ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
1885 	return retval;
1886 }
1887 
1888 /*
1889  * Returns 0 for success, or a negative error value
1890  */
1891 static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1892 			     struct inode *inode)
1893 {
1894 	struct dx_frame frames[2], *frame;
1895 	struct dx_entry *entries, *at;
1896 	struct dx_hash_info hinfo;
1897 	struct buffer_head *bh;
1898 	struct inode *dir = dentry->d_parent->d_inode;
1899 	struct super_block *sb = dir->i_sb;
1900 	struct ext4_dir_entry_2 *de;
1901 	int err;
1902 
1903 	frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
1904 	if (!frame)
1905 		return err;
1906 	entries = frame->entries;
1907 	at = frame->at;
1908 
1909 	if (!(bh = ext4_bread(handle, dir, dx_get_block(frame->at), 0, &err))) {
1910 		if (!err) {
1911 			err = -EIO;
1912 			ext4_error(dir->i_sb,
1913 				   "Directory hole detected on inode %lu\n",
1914 				   dir->i_ino);
1915 		}
1916 		goto cleanup;
1917 	}
1918 
1919 	if (!buffer_verified(bh) &&
1920 	    !ext4_dirent_csum_verify(dir, (struct ext4_dir_entry *)bh->b_data))
1921 		goto journal_error;
1922 	set_buffer_verified(bh);
1923 
1924 	BUFFER_TRACE(bh, "get_write_access");
1925 	err = ext4_journal_get_write_access(handle, bh);
1926 	if (err)
1927 		goto journal_error;
1928 
1929 	err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1930 	if (err != -ENOSPC)
1931 		goto cleanup;
1932 
1933 	/* Block full, should compress but for now just split */
1934 	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
1935 		       dx_get_count(entries), dx_get_limit(entries)));
1936 	/* Need to split index? */
1937 	if (dx_get_count(entries) == dx_get_limit(entries)) {
1938 		ext4_lblk_t newblock;
1939 		unsigned icount = dx_get_count(entries);
1940 		int levels = frame - frames;
1941 		struct dx_entry *entries2;
1942 		struct dx_node *node2;
1943 		struct buffer_head *bh2;
1944 
1945 		if (levels && (dx_get_count(frames->entries) ==
1946 			       dx_get_limit(frames->entries))) {
1947 			ext4_warning(sb, "Directory index full!");
1948 			err = -ENOSPC;
1949 			goto cleanup;
1950 		}
1951 		bh2 = ext4_append (handle, dir, &newblock, &err);
1952 		if (!(bh2))
1953 			goto cleanup;
1954 		node2 = (struct dx_node *)(bh2->b_data);
1955 		entries2 = node2->entries;
1956 		memset(&node2->fake, 0, sizeof(struct fake_dirent));
1957 		node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
1958 							   sb->s_blocksize);
1959 		BUFFER_TRACE(frame->bh, "get_write_access");
1960 		err = ext4_journal_get_write_access(handle, frame->bh);
1961 		if (err)
1962 			goto journal_error;
1963 		if (levels) {
1964 			unsigned icount1 = icount/2, icount2 = icount - icount1;
1965 			unsigned hash2 = dx_get_hash(entries + icount1);
1966 			dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
1967 				       icount1, icount2));
1968 
1969 			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
1970 			err = ext4_journal_get_write_access(handle,
1971 							     frames[0].bh);
1972 			if (err)
1973 				goto journal_error;
1974 
1975 			memcpy((char *) entries2, (char *) (entries + icount1),
1976 			       icount2 * sizeof(struct dx_entry));
1977 			dx_set_count(entries, icount1);
1978 			dx_set_count(entries2, icount2);
1979 			dx_set_limit(entries2, dx_node_limit(dir));
1980 
1981 			/* Which index block gets the new entry? */
1982 			if (at - entries >= icount1) {
1983 				frame->at = at = at - entries - icount1 + entries2;
1984 				frame->entries = entries = entries2;
1985 				swap(frame->bh, bh2);
1986 			}
1987 			dx_insert_block(frames + 0, hash2, newblock);
1988 			dxtrace(dx_show_index("node", frames[1].entries));
1989 			dxtrace(dx_show_index("node",
1990 			       ((struct dx_node *) bh2->b_data)->entries));
1991 			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
1992 			if (err)
1993 				goto journal_error;
1994 			brelse (bh2);
1995 		} else {
1996 			dxtrace(printk(KERN_DEBUG
1997 				       "Creating second level index...\n"));
1998 			memcpy((char *) entries2, (char *) entries,
1999 			       icount * sizeof(struct dx_entry));
2000 			dx_set_limit(entries2, dx_node_limit(dir));
2001 
2002 			/* Set up root */
2003 			dx_set_count(entries, 1);
2004 			dx_set_block(entries + 0, newblock);
2005 			((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
2006 
2007 			/* Add new access path frame */
2008 			frame = frames + 1;
2009 			frame->at = at = at - entries + entries2;
2010 			frame->entries = entries = entries2;
2011 			frame->bh = bh2;
2012 			err = ext4_journal_get_write_access(handle,
2013 							     frame->bh);
2014 			if (err)
2015 				goto journal_error;
2016 		}
2017 		err = ext4_handle_dirty_dx_node(handle, dir, frames[0].bh);
2018 		if (err) {
2019 			ext4_std_error(inode->i_sb, err);
2020 			goto cleanup;
2021 		}
2022 	}
2023 	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
2024 	if (!de)
2025 		goto cleanup;
2026 	err = add_dirent_to_buf(handle, dentry, inode, de, bh);
2027 	goto cleanup;
2028 
2029 journal_error:
2030 	ext4_std_error(dir->i_sb, err);
2031 cleanup:
2032 	if (bh)
2033 		brelse(bh);
2034 	dx_release(frames);
2035 	return err;
2036 }
2037 
2038 /*
2039  * ext4_delete_entry deletes a directory entry by merging it with the
2040  * previous entry
2041  */
2042 static int ext4_delete_entry(handle_t *handle,
2043 			     struct inode *dir,
2044 			     struct ext4_dir_entry_2 *de_del,
2045 			     struct buffer_head *bh)
2046 {
2047 	struct ext4_dir_entry_2 *de, *pde;
2048 	unsigned int blocksize = dir->i_sb->s_blocksize;
2049 	int csum_size = 0;
2050 	int i, err;
2051 
2052 	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2053 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2054 		csum_size = sizeof(struct ext4_dir_entry_tail);
2055 
2056 	i = 0;
2057 	pde = NULL;
2058 	de = (struct ext4_dir_entry_2 *) bh->b_data;
2059 	while (i < bh->b_size - csum_size) {
2060 		if (ext4_check_dir_entry(dir, NULL, de, bh, i))
2061 			return -EIO;
2062 		if (de == de_del)  {
2063 			BUFFER_TRACE(bh, "get_write_access");
2064 			err = ext4_journal_get_write_access(handle, bh);
2065 			if (unlikely(err)) {
2066 				ext4_std_error(dir->i_sb, err);
2067 				return err;
2068 			}
2069 			if (pde)
2070 				pde->rec_len = ext4_rec_len_to_disk(
2071 					ext4_rec_len_from_disk(pde->rec_len,
2072 							       blocksize) +
2073 					ext4_rec_len_from_disk(de->rec_len,
2074 							       blocksize),
2075 					blocksize);
2076 			else
2077 				de->inode = 0;
2078 			dir->i_version++;
2079 			BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2080 			err = ext4_handle_dirty_dirent_node(handle, dir, bh);
2081 			if (unlikely(err)) {
2082 				ext4_std_error(dir->i_sb, err);
2083 				return err;
2084 			}
2085 			return 0;
2086 		}
2087 		i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2088 		pde = de;
2089 		de = ext4_next_entry(de, blocksize);
2090 	}
2091 	return -ENOENT;
2092 }
2093 
2094 /*
2095  * DIR_NLINK feature is set if 1) nlinks > EXT4_LINK_MAX or 2) nlinks == 2,
2096  * since this indicates that nlinks count was previously 1.
2097  */
2098 static void ext4_inc_count(handle_t *handle, struct inode *inode)
2099 {
2100 	inc_nlink(inode);
2101 	if (is_dx(inode) && inode->i_nlink > 1) {
2102 		/* limit is 16-bit i_links_count */
2103 		if (inode->i_nlink >= EXT4_LINK_MAX || inode->i_nlink == 2) {
2104 			set_nlink(inode, 1);
2105 			EXT4_SET_RO_COMPAT_FEATURE(inode->i_sb,
2106 					      EXT4_FEATURE_RO_COMPAT_DIR_NLINK);
2107 		}
2108 	}
2109 }
2110 
2111 /*
2112  * If a directory had nlink == 1, then we should let it be 1. This indicates
2113  * directory has >EXT4_LINK_MAX subdirs.
2114  */
2115 static void ext4_dec_count(handle_t *handle, struct inode *inode)
2116 {
2117 	if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2118 		drop_nlink(inode);
2119 }
2120 
2121 
2122 static int ext4_add_nondir(handle_t *handle,
2123 		struct dentry *dentry, struct inode *inode)
2124 {
2125 	int err = ext4_add_entry(handle, dentry, inode);
2126 	if (!err) {
2127 		ext4_mark_inode_dirty(handle, inode);
2128 		unlock_new_inode(inode);
2129 		d_instantiate(dentry, inode);
2130 		return 0;
2131 	}
2132 	drop_nlink(inode);
2133 	unlock_new_inode(inode);
2134 	iput(inode);
2135 	return err;
2136 }
2137 
2138 /*
2139  * By the time this is called, we already have created
2140  * the directory cache entry for the new file, but it
2141  * is so far negative - it has no inode.
2142  *
2143  * If the create succeeds, we fill in the inode information
2144  * with d_instantiate().
2145  */
2146 static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
2147 		       bool excl)
2148 {
2149 	handle_t *handle;
2150 	struct inode *inode;
2151 	int err, retries = 0;
2152 
2153 	dquot_initialize(dir);
2154 
2155 retry:
2156 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2157 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2158 					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2159 	if (IS_ERR(handle))
2160 		return PTR_ERR(handle);
2161 
2162 	if (IS_DIRSYNC(dir))
2163 		ext4_handle_sync(handle);
2164 
2165 	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
2166 	err = PTR_ERR(inode);
2167 	if (!IS_ERR(inode)) {
2168 		inode->i_op = &ext4_file_inode_operations;
2169 		inode->i_fop = &ext4_file_operations;
2170 		ext4_set_aops(inode);
2171 		err = ext4_add_nondir(handle, dentry, inode);
2172 	}
2173 	ext4_journal_stop(handle);
2174 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2175 		goto retry;
2176 	return err;
2177 }
2178 
2179 static int ext4_mknod(struct inode *dir, struct dentry *dentry,
2180 		      umode_t mode, dev_t rdev)
2181 {
2182 	handle_t *handle;
2183 	struct inode *inode;
2184 	int err, retries = 0;
2185 
2186 	if (!new_valid_dev(rdev))
2187 		return -EINVAL;
2188 
2189 	dquot_initialize(dir);
2190 
2191 retry:
2192 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2193 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2194 					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2195 	if (IS_ERR(handle))
2196 		return PTR_ERR(handle);
2197 
2198 	if (IS_DIRSYNC(dir))
2199 		ext4_handle_sync(handle);
2200 
2201 	inode = ext4_new_inode(handle, dir, mode, &dentry->d_name, 0, NULL);
2202 	err = PTR_ERR(inode);
2203 	if (!IS_ERR(inode)) {
2204 		init_special_inode(inode, inode->i_mode, rdev);
2205 		inode->i_op = &ext4_special_inode_operations;
2206 		err = ext4_add_nondir(handle, dentry, inode);
2207 	}
2208 	ext4_journal_stop(handle);
2209 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2210 		goto retry;
2211 	return err;
2212 }
2213 
2214 static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
2215 {
2216 	handle_t *handle;
2217 	struct inode *inode;
2218 	struct buffer_head *dir_block = NULL;
2219 	struct ext4_dir_entry_2 *de;
2220 	struct ext4_dir_entry_tail *t;
2221 	unsigned int blocksize = dir->i_sb->s_blocksize;
2222 	int csum_size = 0;
2223 	int err, retries = 0;
2224 
2225 	if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb,
2226 				       EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2227 		csum_size = sizeof(struct ext4_dir_entry_tail);
2228 
2229 	if (EXT4_DIR_LINK_MAX(dir))
2230 		return -EMLINK;
2231 
2232 	dquot_initialize(dir);
2233 
2234 retry:
2235 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2236 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2237 					EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2238 	if (IS_ERR(handle))
2239 		return PTR_ERR(handle);
2240 
2241 	if (IS_DIRSYNC(dir))
2242 		ext4_handle_sync(handle);
2243 
2244 	inode = ext4_new_inode(handle, dir, S_IFDIR | mode,
2245 			       &dentry->d_name, 0, NULL);
2246 	err = PTR_ERR(inode);
2247 	if (IS_ERR(inode))
2248 		goto out_stop;
2249 
2250 	inode->i_op = &ext4_dir_inode_operations;
2251 	inode->i_fop = &ext4_dir_operations;
2252 	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
2253 	if (!(dir_block = ext4_bread(handle, inode, 0, 1, &err))) {
2254 		if (!err) {
2255 			err = -EIO;
2256 			ext4_error(inode->i_sb,
2257 				   "Directory hole detected on inode %lu\n",
2258 				   inode->i_ino);
2259 		}
2260 		goto out_clear_inode;
2261 	}
2262 	BUFFER_TRACE(dir_block, "get_write_access");
2263 	err = ext4_journal_get_write_access(handle, dir_block);
2264 	if (err)
2265 		goto out_clear_inode;
2266 	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
2267 	de->inode = cpu_to_le32(inode->i_ino);
2268 	de->name_len = 1;
2269 	de->rec_len = ext4_rec_len_to_disk(EXT4_DIR_REC_LEN(de->name_len),
2270 					   blocksize);
2271 	strcpy(de->name, ".");
2272 	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
2273 	de = ext4_next_entry(de, blocksize);
2274 	de->inode = cpu_to_le32(dir->i_ino);
2275 	de->rec_len = ext4_rec_len_to_disk(blocksize -
2276 					   (csum_size + EXT4_DIR_REC_LEN(1)),
2277 					   blocksize);
2278 	de->name_len = 2;
2279 	strcpy(de->name, "..");
2280 	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
2281 	set_nlink(inode, 2);
2282 
2283 	if (csum_size) {
2284 		t = EXT4_DIRENT_TAIL(dir_block->b_data, blocksize);
2285 		initialize_dirent_tail(t, blocksize);
2286 	}
2287 
2288 	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2289 	err = ext4_handle_dirty_dirent_node(handle, inode, dir_block);
2290 	if (err)
2291 		goto out_clear_inode;
2292 	set_buffer_verified(dir_block);
2293 	err = ext4_mark_inode_dirty(handle, inode);
2294 	if (!err)
2295 		err = ext4_add_entry(handle, dentry, inode);
2296 	if (err) {
2297 out_clear_inode:
2298 		clear_nlink(inode);
2299 		unlock_new_inode(inode);
2300 		ext4_mark_inode_dirty(handle, inode);
2301 		iput(inode);
2302 		goto out_stop;
2303 	}
2304 	ext4_inc_count(handle, dir);
2305 	ext4_update_dx_flag(dir);
2306 	err = ext4_mark_inode_dirty(handle, dir);
2307 	if (err)
2308 		goto out_clear_inode;
2309 	unlock_new_inode(inode);
2310 	d_instantiate(dentry, inode);
2311 out_stop:
2312 	brelse(dir_block);
2313 	ext4_journal_stop(handle);
2314 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2315 		goto retry;
2316 	return err;
2317 }
2318 
2319 /*
2320  * routine to check that the specified directory is empty (for rmdir)
2321  */
2322 static int empty_dir(struct inode *inode)
2323 {
2324 	unsigned int offset;
2325 	struct buffer_head *bh;
2326 	struct ext4_dir_entry_2 *de, *de1;
2327 	struct super_block *sb;
2328 	int err = 0;
2329 
2330 	sb = inode->i_sb;
2331 	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
2332 	    !(bh = ext4_bread(NULL, inode, 0, 0, &err))) {
2333 		if (err)
2334 			EXT4_ERROR_INODE(inode,
2335 				"error %d reading directory lblock 0", err);
2336 		else
2337 			ext4_warning(inode->i_sb,
2338 				     "bad directory (dir #%lu) - no data block",
2339 				     inode->i_ino);
2340 		return 1;
2341 	}
2342 	if (!buffer_verified(bh) &&
2343 	    !ext4_dirent_csum_verify(inode,
2344 			(struct ext4_dir_entry *)bh->b_data)) {
2345 		EXT4_ERROR_INODE(inode, "checksum error reading directory "
2346 				 "lblock 0");
2347 		return -EIO;
2348 	}
2349 	set_buffer_verified(bh);
2350 	de = (struct ext4_dir_entry_2 *) bh->b_data;
2351 	de1 = ext4_next_entry(de, sb->s_blocksize);
2352 	if (le32_to_cpu(de->inode) != inode->i_ino ||
2353 			!le32_to_cpu(de1->inode) ||
2354 			strcmp(".", de->name) ||
2355 			strcmp("..", de1->name)) {
2356 		ext4_warning(inode->i_sb,
2357 			     "bad directory (dir #%lu) - no `.' or `..'",
2358 			     inode->i_ino);
2359 		brelse(bh);
2360 		return 1;
2361 	}
2362 	offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) +
2363 		 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
2364 	de = ext4_next_entry(de1, sb->s_blocksize);
2365 	while (offset < inode->i_size) {
2366 		if (!bh ||
2367 		    (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2368 			unsigned int lblock;
2369 			err = 0;
2370 			brelse(bh);
2371 			lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
2372 			bh = ext4_bread(NULL, inode, lblock, 0, &err);
2373 			if (!bh) {
2374 				if (err)
2375 					EXT4_ERROR_INODE(inode,
2376 						"error %d reading directory "
2377 						"lblock %u", err, lblock);
2378 				else
2379 					ext4_warning(inode->i_sb,
2380 						"bad directory (dir #%lu) - no data block",
2381 						inode->i_ino);
2382 
2383 				offset += sb->s_blocksize;
2384 				continue;
2385 			}
2386 			if (!buffer_verified(bh) &&
2387 			    !ext4_dirent_csum_verify(inode,
2388 					(struct ext4_dir_entry *)bh->b_data)) {
2389 				EXT4_ERROR_INODE(inode, "checksum error "
2390 						 "reading directory lblock 0");
2391 				return -EIO;
2392 			}
2393 			set_buffer_verified(bh);
2394 			de = (struct ext4_dir_entry_2 *) bh->b_data;
2395 		}
2396 		if (ext4_check_dir_entry(inode, NULL, de, bh, offset)) {
2397 			de = (struct ext4_dir_entry_2 *)(bh->b_data +
2398 							 sb->s_blocksize);
2399 			offset = (offset | (sb->s_blocksize - 1)) + 1;
2400 			continue;
2401 		}
2402 		if (le32_to_cpu(de->inode)) {
2403 			brelse(bh);
2404 			return 0;
2405 		}
2406 		offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
2407 		de = ext4_next_entry(de, sb->s_blocksize);
2408 	}
2409 	brelse(bh);
2410 	return 1;
2411 }
2412 
2413 /* ext4_orphan_add() links an unlinked or truncated inode into a list of
2414  * such inodes, starting at the superblock, in case we crash before the
2415  * file is closed/deleted, or in case the inode truncate spans multiple
2416  * transactions and the last transaction is not recovered after a crash.
2417  *
2418  * At filesystem recovery time, we walk this list deleting unlinked
2419  * inodes and truncating linked inodes in ext4_orphan_cleanup().
2420  */
2421 int ext4_orphan_add(handle_t *handle, struct inode *inode)
2422 {
2423 	struct super_block *sb = inode->i_sb;
2424 	struct ext4_iloc iloc;
2425 	int err = 0, rc;
2426 
2427 	if (!EXT4_SB(sb)->s_journal)
2428 		return 0;
2429 
2430 	mutex_lock(&EXT4_SB(sb)->s_orphan_lock);
2431 	if (!list_empty(&EXT4_I(inode)->i_orphan))
2432 		goto out_unlock;
2433 
2434 	/*
2435 	 * Orphan handling is only valid for files with data blocks
2436 	 * being truncated, or files being unlinked. Note that we either
2437 	 * hold i_mutex, or the inode can not be referenced from outside,
2438 	 * so i_nlink should not be bumped due to race
2439 	 */
2440 	J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2441 		  S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
2442 
2443 	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
2444 	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
2445 	if (err)
2446 		goto out_unlock;
2447 
2448 	err = ext4_reserve_inode_write(handle, inode, &iloc);
2449 	if (err)
2450 		goto out_unlock;
2451 	/*
2452 	 * Due to previous errors inode may be already a part of on-disk
2453 	 * orphan list. If so skip on-disk list modification.
2454 	 */
2455 	if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <=
2456 		(le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count)))
2457 			goto mem_insert;
2458 
2459 	/* Insert this inode at the head of the on-disk orphan list... */
2460 	NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
2461 	EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2462 	err = ext4_handle_dirty_super(handle, sb);
2463 	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2464 	if (!err)
2465 		err = rc;
2466 
2467 	/* Only add to the head of the in-memory list if all the
2468 	 * previous operations succeeded.  If the orphan_add is going to
2469 	 * fail (possibly taking the journal offline), we can't risk
2470 	 * leaving the inode on the orphan list: stray orphan-list
2471 	 * entries can cause panics at unmount time.
2472 	 *
2473 	 * This is safe: on error we're going to ignore the orphan list
2474 	 * anyway on the next recovery. */
2475 mem_insert:
2476 	if (!err)
2477 		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
2478 
2479 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2480 	jbd_debug(4, "orphan inode %lu will point to %d\n",
2481 			inode->i_ino, NEXT_ORPHAN(inode));
2482 out_unlock:
2483 	mutex_unlock(&EXT4_SB(sb)->s_orphan_lock);
2484 	ext4_std_error(inode->i_sb, err);
2485 	return err;
2486 }
2487 
2488 /*
2489  * ext4_orphan_del() removes an unlinked or truncated inode from the list
2490  * of such inodes stored on disk, because it is finally being cleaned up.
2491  */
2492 int ext4_orphan_del(handle_t *handle, struct inode *inode)
2493 {
2494 	struct list_head *prev;
2495 	struct ext4_inode_info *ei = EXT4_I(inode);
2496 	struct ext4_sb_info *sbi;
2497 	__u32 ino_next;
2498 	struct ext4_iloc iloc;
2499 	int err = 0;
2500 
2501 	if (!EXT4_SB(inode->i_sb)->s_journal)
2502 		return 0;
2503 
2504 	mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2505 	if (list_empty(&ei->i_orphan))
2506 		goto out;
2507 
2508 	ino_next = NEXT_ORPHAN(inode);
2509 	prev = ei->i_orphan.prev;
2510 	sbi = EXT4_SB(inode->i_sb);
2511 
2512 	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2513 
2514 	list_del_init(&ei->i_orphan);
2515 
2516 	/* If we're on an error path, we may not have a valid
2517 	 * transaction handle with which to update the orphan list on
2518 	 * disk, but we still need to remove the inode from the linked
2519 	 * list in memory. */
2520 	if (!handle)
2521 		goto out;
2522 
2523 	err = ext4_reserve_inode_write(handle, inode, &iloc);
2524 	if (err)
2525 		goto out_err;
2526 
2527 	if (prev == &sbi->s_orphan) {
2528 		jbd_debug(4, "superblock will point to %u\n", ino_next);
2529 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2530 		err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2531 		if (err)
2532 			goto out_brelse;
2533 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2534 		err = ext4_handle_dirty_super(handle, inode->i_sb);
2535 	} else {
2536 		struct ext4_iloc iloc2;
2537 		struct inode *i_prev =
2538 			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
2539 
2540 		jbd_debug(4, "orphan inode %lu will point to %u\n",
2541 			  i_prev->i_ino, ino_next);
2542 		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2543 		if (err)
2544 			goto out_brelse;
2545 		NEXT_ORPHAN(i_prev) = ino_next;
2546 		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2547 	}
2548 	if (err)
2549 		goto out_brelse;
2550 	NEXT_ORPHAN(inode) = 0;
2551 	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2552 
2553 out_err:
2554 	ext4_std_error(inode->i_sb, err);
2555 out:
2556 	mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2557 	return err;
2558 
2559 out_brelse:
2560 	brelse(iloc.bh);
2561 	goto out_err;
2562 }
2563 
2564 static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
2565 {
2566 	int retval;
2567 	struct inode *inode;
2568 	struct buffer_head *bh;
2569 	struct ext4_dir_entry_2 *de;
2570 	handle_t *handle;
2571 
2572 	/* Initialize quotas before so that eventual writes go in
2573 	 * separate transaction */
2574 	dquot_initialize(dir);
2575 	dquot_initialize(dentry->d_inode);
2576 
2577 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2578 	if (IS_ERR(handle))
2579 		return PTR_ERR(handle);
2580 
2581 	retval = -ENOENT;
2582 	bh = ext4_find_entry(dir, &dentry->d_name, &de);
2583 	if (!bh)
2584 		goto end_rmdir;
2585 
2586 	if (IS_DIRSYNC(dir))
2587 		ext4_handle_sync(handle);
2588 
2589 	inode = dentry->d_inode;
2590 
2591 	retval = -EIO;
2592 	if (le32_to_cpu(de->inode) != inode->i_ino)
2593 		goto end_rmdir;
2594 
2595 	retval = -ENOTEMPTY;
2596 	if (!empty_dir(inode))
2597 		goto end_rmdir;
2598 
2599 	retval = ext4_delete_entry(handle, dir, de, bh);
2600 	if (retval)
2601 		goto end_rmdir;
2602 	if (!EXT4_DIR_LINK_EMPTY(inode))
2603 		ext4_warning(inode->i_sb,
2604 			     "empty directory has too many links (%d)",
2605 			     inode->i_nlink);
2606 	inode->i_version++;
2607 	clear_nlink(inode);
2608 	/* There's no need to set i_disksize: the fact that i_nlink is
2609 	 * zero will ensure that the right thing happens during any
2610 	 * recovery. */
2611 	inode->i_size = 0;
2612 	ext4_orphan_add(handle, inode);
2613 	inode->i_ctime = dir->i_ctime = dir->i_mtime = ext4_current_time(inode);
2614 	ext4_mark_inode_dirty(handle, inode);
2615 	ext4_dec_count(handle, dir);
2616 	ext4_update_dx_flag(dir);
2617 	ext4_mark_inode_dirty(handle, dir);
2618 
2619 end_rmdir:
2620 	ext4_journal_stop(handle);
2621 	brelse(bh);
2622 	return retval;
2623 }
2624 
2625 static int ext4_unlink(struct inode *dir, struct dentry *dentry)
2626 {
2627 	int retval;
2628 	struct inode *inode;
2629 	struct buffer_head *bh;
2630 	struct ext4_dir_entry_2 *de;
2631 	handle_t *handle;
2632 
2633 	trace_ext4_unlink_enter(dir, dentry);
2634 	/* Initialize quotas before so that eventual writes go
2635 	 * in separate transaction */
2636 	dquot_initialize(dir);
2637 	dquot_initialize(dentry->d_inode);
2638 
2639 	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
2640 	if (IS_ERR(handle))
2641 		return PTR_ERR(handle);
2642 
2643 	if (IS_DIRSYNC(dir))
2644 		ext4_handle_sync(handle);
2645 
2646 	retval = -ENOENT;
2647 	bh = ext4_find_entry(dir, &dentry->d_name, &de);
2648 	if (!bh)
2649 		goto end_unlink;
2650 
2651 	inode = dentry->d_inode;
2652 
2653 	retval = -EIO;
2654 	if (le32_to_cpu(de->inode) != inode->i_ino)
2655 		goto end_unlink;
2656 
2657 	if (!inode->i_nlink) {
2658 		ext4_warning(inode->i_sb,
2659 			     "Deleting nonexistent file (%lu), %d",
2660 			     inode->i_ino, inode->i_nlink);
2661 		set_nlink(inode, 1);
2662 	}
2663 	retval = ext4_delete_entry(handle, dir, de, bh);
2664 	if (retval)
2665 		goto end_unlink;
2666 	dir->i_ctime = dir->i_mtime = ext4_current_time(dir);
2667 	ext4_update_dx_flag(dir);
2668 	ext4_mark_inode_dirty(handle, dir);
2669 	drop_nlink(inode);
2670 	if (!inode->i_nlink)
2671 		ext4_orphan_add(handle, inode);
2672 	inode->i_ctime = ext4_current_time(inode);
2673 	ext4_mark_inode_dirty(handle, inode);
2674 	retval = 0;
2675 
2676 end_unlink:
2677 	ext4_journal_stop(handle);
2678 	brelse(bh);
2679 	trace_ext4_unlink_exit(dentry, retval);
2680 	return retval;
2681 }
2682 
2683 static int ext4_symlink(struct inode *dir,
2684 			struct dentry *dentry, const char *symname)
2685 {
2686 	handle_t *handle;
2687 	struct inode *inode;
2688 	int l, err, retries = 0;
2689 	int credits;
2690 
2691 	l = strlen(symname)+1;
2692 	if (l > dir->i_sb->s_blocksize)
2693 		return -ENAMETOOLONG;
2694 
2695 	dquot_initialize(dir);
2696 
2697 	if (l > EXT4_N_BLOCKS * 4) {
2698 		/*
2699 		 * For non-fast symlinks, we just allocate inode and put it on
2700 		 * orphan list in the first transaction => we need bitmap,
2701 		 * group descriptor, sb, inode block, quota blocks, and
2702 		 * possibly selinux xattr blocks.
2703 		 */
2704 		credits = 4 + EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2705 			  EXT4_XATTR_TRANS_BLOCKS;
2706 	} else {
2707 		/*
2708 		 * Fast symlink. We have to add entry to directory
2709 		 * (EXT4_DATA_TRANS_BLOCKS + EXT4_INDEX_EXTRA_TRANS_BLOCKS),
2710 		 * allocate new inode (bitmap, group descriptor, inode block,
2711 		 * quota blocks, sb is already counted in previous macros).
2712 		 */
2713 		credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2714 			  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
2715 			  EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb);
2716 	}
2717 retry:
2718 	handle = ext4_journal_start(dir, credits);
2719 	if (IS_ERR(handle))
2720 		return PTR_ERR(handle);
2721 
2722 	if (IS_DIRSYNC(dir))
2723 		ext4_handle_sync(handle);
2724 
2725 	inode = ext4_new_inode(handle, dir, S_IFLNK|S_IRWXUGO,
2726 			       &dentry->d_name, 0, NULL);
2727 	err = PTR_ERR(inode);
2728 	if (IS_ERR(inode))
2729 		goto out_stop;
2730 
2731 	if (l > EXT4_N_BLOCKS * 4) {
2732 		inode->i_op = &ext4_symlink_inode_operations;
2733 		ext4_set_aops(inode);
2734 		/*
2735 		 * We cannot call page_symlink() with transaction started
2736 		 * because it calls into ext4_write_begin() which can wait
2737 		 * for transaction commit if we are running out of space
2738 		 * and thus we deadlock. So we have to stop transaction now
2739 		 * and restart it when symlink contents is written.
2740 		 *
2741 		 * To keep fs consistent in case of crash, we have to put inode
2742 		 * to orphan list in the mean time.
2743 		 */
2744 		drop_nlink(inode);
2745 		err = ext4_orphan_add(handle, inode);
2746 		ext4_journal_stop(handle);
2747 		if (err)
2748 			goto err_drop_inode;
2749 		err = __page_symlink(inode, symname, l, 1);
2750 		if (err)
2751 			goto err_drop_inode;
2752 		/*
2753 		 * Now inode is being linked into dir (EXT4_DATA_TRANS_BLOCKS
2754 		 * + EXT4_INDEX_EXTRA_TRANS_BLOCKS), inode is also modified
2755 		 */
2756 		handle = ext4_journal_start(dir,
2757 				EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2758 				EXT4_INDEX_EXTRA_TRANS_BLOCKS + 1);
2759 		if (IS_ERR(handle)) {
2760 			err = PTR_ERR(handle);
2761 			goto err_drop_inode;
2762 		}
2763 		set_nlink(inode, 1);
2764 		err = ext4_orphan_del(handle, inode);
2765 		if (err) {
2766 			ext4_journal_stop(handle);
2767 			clear_nlink(inode);
2768 			goto err_drop_inode;
2769 		}
2770 	} else {
2771 		/* clear the extent format for fast symlink */
2772 		ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
2773 		inode->i_op = &ext4_fast_symlink_inode_operations;
2774 		memcpy((char *)&EXT4_I(inode)->i_data, symname, l);
2775 		inode->i_size = l-1;
2776 	}
2777 	EXT4_I(inode)->i_disksize = inode->i_size;
2778 	err = ext4_add_nondir(handle, dentry, inode);
2779 out_stop:
2780 	ext4_journal_stop(handle);
2781 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2782 		goto retry;
2783 	return err;
2784 err_drop_inode:
2785 	unlock_new_inode(inode);
2786 	iput(inode);
2787 	return err;
2788 }
2789 
2790 static int ext4_link(struct dentry *old_dentry,
2791 		     struct inode *dir, struct dentry *dentry)
2792 {
2793 	handle_t *handle;
2794 	struct inode *inode = old_dentry->d_inode;
2795 	int err, retries = 0;
2796 
2797 	if (inode->i_nlink >= EXT4_LINK_MAX)
2798 		return -EMLINK;
2799 
2800 	dquot_initialize(dir);
2801 
2802 retry:
2803 	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2804 					EXT4_INDEX_EXTRA_TRANS_BLOCKS);
2805 	if (IS_ERR(handle))
2806 		return PTR_ERR(handle);
2807 
2808 	if (IS_DIRSYNC(dir))
2809 		ext4_handle_sync(handle);
2810 
2811 	inode->i_ctime = ext4_current_time(inode);
2812 	ext4_inc_count(handle, inode);
2813 	ihold(inode);
2814 
2815 	err = ext4_add_entry(handle, dentry, inode);
2816 	if (!err) {
2817 		ext4_mark_inode_dirty(handle, inode);
2818 		d_instantiate(dentry, inode);
2819 	} else {
2820 		drop_nlink(inode);
2821 		iput(inode);
2822 	}
2823 	ext4_journal_stop(handle);
2824 	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2825 		goto retry;
2826 	return err;
2827 }
2828 
2829 #define PARENT_INO(buffer, size) \
2830 	(ext4_next_entry((struct ext4_dir_entry_2 *)(buffer), size)->inode)
2831 
2832 /*
2833  * Anybody can rename anything with this: the permission checks are left to the
2834  * higher-level routines.
2835  */
2836 static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
2837 		       struct inode *new_dir, struct dentry *new_dentry)
2838 {
2839 	handle_t *handle;
2840 	struct inode *old_inode, *new_inode;
2841 	struct buffer_head *old_bh, *new_bh, *dir_bh;
2842 	struct ext4_dir_entry_2 *old_de, *new_de;
2843 	int retval, force_da_alloc = 0;
2844 
2845 	dquot_initialize(old_dir);
2846 	dquot_initialize(new_dir);
2847 
2848 	old_bh = new_bh = dir_bh = NULL;
2849 
2850 	/* Initialize quotas before so that eventual writes go
2851 	 * in separate transaction */
2852 	if (new_dentry->d_inode)
2853 		dquot_initialize(new_dentry->d_inode);
2854 	handle = ext4_journal_start(old_dir, 2 *
2855 					EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
2856 					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
2857 	if (IS_ERR(handle))
2858 		return PTR_ERR(handle);
2859 
2860 	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
2861 		ext4_handle_sync(handle);
2862 
2863 	old_bh = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de);
2864 	/*
2865 	 *  Check for inode number is _not_ due to possible IO errors.
2866 	 *  We might rmdir the source, keep it as pwd of some process
2867 	 *  and merrily kill the link to whatever was created under the
2868 	 *  same name. Goodbye sticky bit ;-<
2869 	 */
2870 	old_inode = old_dentry->d_inode;
2871 	retval = -ENOENT;
2872 	if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
2873 		goto end_rename;
2874 
2875 	new_inode = new_dentry->d_inode;
2876 	new_bh = ext4_find_entry(new_dir, &new_dentry->d_name, &new_de);
2877 	if (new_bh) {
2878 		if (!new_inode) {
2879 			brelse(new_bh);
2880 			new_bh = NULL;
2881 		}
2882 	}
2883 	if (S_ISDIR(old_inode->i_mode)) {
2884 		if (new_inode) {
2885 			retval = -ENOTEMPTY;
2886 			if (!empty_dir(new_inode))
2887 				goto end_rename;
2888 		}
2889 		retval = -EIO;
2890 		if (!(dir_bh = ext4_bread(handle, old_inode, 0, 0, &retval))) {
2891 			if (!retval) {
2892 				retval = -EIO;
2893 				ext4_error(old_inode->i_sb,
2894 					   "Directory hole detected on inode %lu\n",
2895 					   old_inode->i_ino);
2896 			}
2897 			goto end_rename;
2898 		}
2899 		if (!buffer_verified(dir_bh) &&
2900 		    !ext4_dirent_csum_verify(old_inode,
2901 				(struct ext4_dir_entry *)dir_bh->b_data))
2902 			goto end_rename;
2903 		set_buffer_verified(dir_bh);
2904 		if (le32_to_cpu(PARENT_INO(dir_bh->b_data,
2905 				old_dir->i_sb->s_blocksize)) != old_dir->i_ino)
2906 			goto end_rename;
2907 		retval = -EMLINK;
2908 		if (!new_inode && new_dir != old_dir &&
2909 		    EXT4_DIR_LINK_MAX(new_dir))
2910 			goto end_rename;
2911 		BUFFER_TRACE(dir_bh, "get_write_access");
2912 		retval = ext4_journal_get_write_access(handle, dir_bh);
2913 		if (retval)
2914 			goto end_rename;
2915 	}
2916 	if (!new_bh) {
2917 		retval = ext4_add_entry(handle, new_dentry, old_inode);
2918 		if (retval)
2919 			goto end_rename;
2920 	} else {
2921 		BUFFER_TRACE(new_bh, "get write access");
2922 		retval = ext4_journal_get_write_access(handle, new_bh);
2923 		if (retval)
2924 			goto end_rename;
2925 		new_de->inode = cpu_to_le32(old_inode->i_ino);
2926 		if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
2927 					      EXT4_FEATURE_INCOMPAT_FILETYPE))
2928 			new_de->file_type = old_de->file_type;
2929 		new_dir->i_version++;
2930 		new_dir->i_ctime = new_dir->i_mtime =
2931 					ext4_current_time(new_dir);
2932 		ext4_mark_inode_dirty(handle, new_dir);
2933 		BUFFER_TRACE(new_bh, "call ext4_handle_dirty_metadata");
2934 		retval = ext4_handle_dirty_dirent_node(handle, new_dir, new_bh);
2935 		if (unlikely(retval)) {
2936 			ext4_std_error(new_dir->i_sb, retval);
2937 			goto end_rename;
2938 		}
2939 		brelse(new_bh);
2940 		new_bh = NULL;
2941 	}
2942 
2943 	/*
2944 	 * Like most other Unix systems, set the ctime for inodes on a
2945 	 * rename.
2946 	 */
2947 	old_inode->i_ctime = ext4_current_time(old_inode);
2948 	ext4_mark_inode_dirty(handle, old_inode);
2949 
2950 	/*
2951 	 * ok, that's it
2952 	 */
2953 	if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
2954 	    old_de->name_len != old_dentry->d_name.len ||
2955 	    strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
2956 	    (retval = ext4_delete_entry(handle, old_dir,
2957 					old_de, old_bh)) == -ENOENT) {
2958 		/* old_de could have moved from under us during htree split, so
2959 		 * make sure that we are deleting the right entry.  We might
2960 		 * also be pointing to a stale entry in the unused part of
2961 		 * old_bh so just checking inum and the name isn't enough. */
2962 		struct buffer_head *old_bh2;
2963 		struct ext4_dir_entry_2 *old_de2;
2964 
2965 		old_bh2 = ext4_find_entry(old_dir, &old_dentry->d_name, &old_de2);
2966 		if (old_bh2) {
2967 			retval = ext4_delete_entry(handle, old_dir,
2968 						   old_de2, old_bh2);
2969 			brelse(old_bh2);
2970 		}
2971 	}
2972 	if (retval) {
2973 		ext4_warning(old_dir->i_sb,
2974 				"Deleting old file (%lu), %d, error=%d",
2975 				old_dir->i_ino, old_dir->i_nlink, retval);
2976 	}
2977 
2978 	if (new_inode) {
2979 		ext4_dec_count(handle, new_inode);
2980 		new_inode->i_ctime = ext4_current_time(new_inode);
2981 	}
2982 	old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir);
2983 	ext4_update_dx_flag(old_dir);
2984 	if (dir_bh) {
2985 		PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) =
2986 						cpu_to_le32(new_dir->i_ino);
2987 		BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata");
2988 		if (is_dx(old_inode)) {
2989 			retval = ext4_handle_dirty_dx_node(handle,
2990 							   old_inode,
2991 							   dir_bh);
2992 		} else {
2993 			retval = ext4_handle_dirty_dirent_node(handle,
2994 							       old_inode,
2995 							       dir_bh);
2996 		}
2997 		if (retval) {
2998 			ext4_std_error(old_dir->i_sb, retval);
2999 			goto end_rename;
3000 		}
3001 		ext4_dec_count(handle, old_dir);
3002 		if (new_inode) {
3003 			/* checked empty_dir above, can't have another parent,
3004 			 * ext4_dec_count() won't work for many-linked dirs */
3005 			clear_nlink(new_inode);
3006 		} else {
3007 			ext4_inc_count(handle, new_dir);
3008 			ext4_update_dx_flag(new_dir);
3009 			ext4_mark_inode_dirty(handle, new_dir);
3010 		}
3011 	}
3012 	ext4_mark_inode_dirty(handle, old_dir);
3013 	if (new_inode) {
3014 		ext4_mark_inode_dirty(handle, new_inode);
3015 		if (!new_inode->i_nlink)
3016 			ext4_orphan_add(handle, new_inode);
3017 		if (!test_opt(new_dir->i_sb, NO_AUTO_DA_ALLOC))
3018 			force_da_alloc = 1;
3019 	}
3020 	retval = 0;
3021 
3022 end_rename:
3023 	brelse(dir_bh);
3024 	brelse(old_bh);
3025 	brelse(new_bh);
3026 	ext4_journal_stop(handle);
3027 	if (retval == 0 && force_da_alloc)
3028 		ext4_alloc_da_blocks(old_inode);
3029 	return retval;
3030 }
3031 
3032 /*
3033  * directories can handle most operations...
3034  */
3035 const struct inode_operations ext4_dir_inode_operations = {
3036 	.create		= ext4_create,
3037 	.lookup		= ext4_lookup,
3038 	.link		= ext4_link,
3039 	.unlink		= ext4_unlink,
3040 	.symlink	= ext4_symlink,
3041 	.mkdir		= ext4_mkdir,
3042 	.rmdir		= ext4_rmdir,
3043 	.mknod		= ext4_mknod,
3044 	.rename		= ext4_rename,
3045 	.setattr	= ext4_setattr,
3046 #ifdef CONFIG_EXT4_FS_XATTR
3047 	.setxattr	= generic_setxattr,
3048 	.getxattr	= generic_getxattr,
3049 	.listxattr	= ext4_listxattr,
3050 	.removexattr	= generic_removexattr,
3051 #endif
3052 	.get_acl	= ext4_get_acl,
3053 	.fiemap         = ext4_fiemap,
3054 };
3055 
3056 const struct inode_operations ext4_special_inode_operations = {
3057 	.setattr	= ext4_setattr,
3058 #ifdef CONFIG_EXT4_FS_XATTR
3059 	.setxattr	= generic_setxattr,
3060 	.getxattr	= generic_getxattr,
3061 	.listxattr	= ext4_listxattr,
3062 	.removexattr	= generic_removexattr,
3063 #endif
3064 	.get_acl	= ext4_get_acl,
3065 };
3066