xref: /openbmc/linux/fs/ext4/namei.c (revision a6978d1b7bb8f3a25305e8ff7d367f7289614c5d)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/ext4/namei.c
4   *
5   * Copyright (C) 1992, 1993, 1994, 1995
6   * Remy Card (card@masi.ibp.fr)
7   * Laboratoire MASI - Institut Blaise Pascal
8   * Universite Pierre et Marie Curie (Paris VI)
9   *
10   *  from
11   *
12   *  linux/fs/minix/namei.c
13   *
14   *  Copyright (C) 1991, 1992  Linus Torvalds
15   *
16   *  Big-endian to little-endian byte-swapping/bitmaps by
17   *        David S. Miller (davem@caip.rutgers.edu), 1995
18   *  Directory entry file type support and forward compatibility hooks
19   *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
20   *  Hash Tree Directory indexing (c)
21   *	Daniel Phillips, 2001
22   *  Hash Tree Directory indexing porting
23   *	Christopher Li, 2002
24   *  Hash Tree Directory indexing cleanup
25   *	Theodore Ts'o, 2002
26   */
27  
28  #include <linux/fs.h>
29  #include <linux/pagemap.h>
30  #include <linux/time.h>
31  #include <linux/fcntl.h>
32  #include <linux/stat.h>
33  #include <linux/string.h>
34  #include <linux/quotaops.h>
35  #include <linux/buffer_head.h>
36  #include <linux/bio.h>
37  #include <linux/iversion.h>
38  #include <linux/unicode.h>
39  #include "ext4.h"
40  #include "ext4_jbd2.h"
41  
42  #include "xattr.h"
43  #include "acl.h"
44  
45  #include <trace/events/ext4.h>
46  /*
47   * define how far ahead to read directories while searching them.
48   */
49  #define NAMEI_RA_CHUNKS  2
50  #define NAMEI_RA_BLOCKS  4
51  #define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
52  
53  static struct buffer_head *ext4_append(handle_t *handle,
54  					struct inode *inode,
55  					ext4_lblk_t *block)
56  {
57  	struct ext4_map_blocks map;
58  	struct buffer_head *bh;
59  	int err;
60  
61  	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
62  		     ((inode->i_size >> 10) >=
63  		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
64  		return ERR_PTR(-ENOSPC);
65  
66  	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
67  	map.m_lblk = *block;
68  	map.m_len = 1;
69  
70  	/*
71  	 * We're appending new directory block. Make sure the block is not
72  	 * allocated yet, otherwise we will end up corrupting the
73  	 * directory.
74  	 */
75  	err = ext4_map_blocks(NULL, inode, &map, 0);
76  	if (err < 0)
77  		return ERR_PTR(err);
78  	if (err) {
79  		EXT4_ERROR_INODE(inode, "Logical block already allocated");
80  		return ERR_PTR(-EFSCORRUPTED);
81  	}
82  
83  	bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
84  	if (IS_ERR(bh))
85  		return bh;
86  	inode->i_size += inode->i_sb->s_blocksize;
87  	EXT4_I(inode)->i_disksize = inode->i_size;
88  	err = ext4_mark_inode_dirty(handle, inode);
89  	if (err)
90  		goto out;
91  	BUFFER_TRACE(bh, "get_write_access");
92  	err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
93  					    EXT4_JTR_NONE);
94  	if (err)
95  		goto out;
96  	return bh;
97  
98  out:
99  	brelse(bh);
100  	ext4_std_error(inode->i_sb, err);
101  	return ERR_PTR(err);
102  }
103  
104  static int ext4_dx_csum_verify(struct inode *inode,
105  			       struct ext4_dir_entry *dirent);
106  
107  /*
108   * Hints to ext4_read_dirblock regarding whether we expect a directory
109   * block being read to be an index block, or a block containing
110   * directory entries (and if the latter, whether it was found via a
111   * logical block in an htree index block).  This is used to control
112   * what sort of sanity checkinig ext4_read_dirblock() will do on the
113   * directory block read from the storage device.  EITHER will means
114   * the caller doesn't know what kind of directory block will be read,
115   * so no specific verification will be done.
116   */
117  typedef enum {
118  	EITHER, INDEX, DIRENT, DIRENT_HTREE
119  } dirblock_type_t;
120  
121  #define ext4_read_dirblock(inode, block, type) \
122  	__ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
123  
124  static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
125  						ext4_lblk_t block,
126  						dirblock_type_t type,
127  						const char *func,
128  						unsigned int line)
129  {
130  	struct buffer_head *bh;
131  	struct ext4_dir_entry *dirent;
132  	int is_dx_block = 0;
133  
134  	if (block >= inode->i_size >> inode->i_blkbits) {
135  		ext4_error_inode(inode, func, line, block,
136  		       "Attempting to read directory block (%u) that is past i_size (%llu)",
137  		       block, inode->i_size);
138  		return ERR_PTR(-EFSCORRUPTED);
139  	}
140  
141  	if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
142  		bh = ERR_PTR(-EIO);
143  	else
144  		bh = ext4_bread(NULL, inode, block, 0);
145  	if (IS_ERR(bh)) {
146  		__ext4_warning(inode->i_sb, func, line,
147  			       "inode #%lu: lblock %lu: comm %s: "
148  			       "error %ld reading directory block",
149  			       inode->i_ino, (unsigned long)block,
150  			       current->comm, PTR_ERR(bh));
151  
152  		return bh;
153  	}
154  	/* The first directory block must not be a hole. */
155  	if (!bh && (type == INDEX || type == DIRENT_HTREE || block == 0)) {
156  		ext4_error_inode(inode, func, line, block,
157  				 "Directory hole found for htree %s block %u",
158  				 (type == INDEX) ? "index" : "leaf", block);
159  		return ERR_PTR(-EFSCORRUPTED);
160  	}
161  	if (!bh)
162  		return NULL;
163  	dirent = (struct ext4_dir_entry *) bh->b_data;
164  	/* Determine whether or not we have an index block */
165  	if (is_dx(inode)) {
166  		if (block == 0)
167  			is_dx_block = 1;
168  		else if (ext4_rec_len_from_disk(dirent->rec_len,
169  						inode->i_sb->s_blocksize) ==
170  			 inode->i_sb->s_blocksize)
171  			is_dx_block = 1;
172  	}
173  	if (!is_dx_block && type == INDEX) {
174  		ext4_error_inode(inode, func, line, block,
175  		       "directory leaf block found instead of index block");
176  		brelse(bh);
177  		return ERR_PTR(-EFSCORRUPTED);
178  	}
179  	if (!ext4_has_metadata_csum(inode->i_sb) ||
180  	    buffer_verified(bh))
181  		return bh;
182  
183  	/*
184  	 * An empty leaf block can get mistaken for a index block; for
185  	 * this reason, we can only check the index checksum when the
186  	 * caller is sure it should be an index block.
187  	 */
188  	if (is_dx_block && type == INDEX) {
189  		if (ext4_dx_csum_verify(inode, dirent) &&
190  		    !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
191  			set_buffer_verified(bh);
192  		else {
193  			ext4_error_inode_err(inode, func, line, block,
194  					     EFSBADCRC,
195  					     "Directory index failed checksum");
196  			brelse(bh);
197  			return ERR_PTR(-EFSBADCRC);
198  		}
199  	}
200  	if (!is_dx_block) {
201  		if (ext4_dirblock_csum_verify(inode, bh) &&
202  		    !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
203  			set_buffer_verified(bh);
204  		else {
205  			ext4_error_inode_err(inode, func, line, block,
206  					     EFSBADCRC,
207  					     "Directory block failed checksum");
208  			brelse(bh);
209  			return ERR_PTR(-EFSBADCRC);
210  		}
211  	}
212  	return bh;
213  }
214  
215  #ifdef DX_DEBUG
216  #define dxtrace(command) command
217  #else
218  #define dxtrace(command)
219  #endif
220  
221  struct fake_dirent
222  {
223  	__le32 inode;
224  	__le16 rec_len;
225  	u8 name_len;
226  	u8 file_type;
227  };
228  
229  struct dx_countlimit
230  {
231  	__le16 limit;
232  	__le16 count;
233  };
234  
235  struct dx_entry
236  {
237  	__le32 hash;
238  	__le32 block;
239  };
240  
241  /*
242   * dx_root_info is laid out so that if it should somehow get overlaid by a
243   * dirent the two low bits of the hash version will be zero.  Therefore, the
244   * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
245   */
246  
247  struct dx_root
248  {
249  	struct fake_dirent dot;
250  	char dot_name[4];
251  	struct fake_dirent dotdot;
252  	char dotdot_name[4];
253  	struct dx_root_info
254  	{
255  		__le32 reserved_zero;
256  		u8 hash_version;
257  		u8 info_length; /* 8 */
258  		u8 indirect_levels;
259  		u8 unused_flags;
260  	}
261  	info;
262  	struct dx_entry	entries[];
263  };
264  
265  struct dx_node
266  {
267  	struct fake_dirent fake;
268  	struct dx_entry	entries[];
269  };
270  
271  
272  struct dx_frame
273  {
274  	struct buffer_head *bh;
275  	struct dx_entry *entries;
276  	struct dx_entry *at;
277  };
278  
279  struct dx_map_entry
280  {
281  	u32 hash;
282  	u16 offs;
283  	u16 size;
284  };
285  
286  /*
287   * This goes at the end of each htree block.
288   */
289  struct dx_tail {
290  	u32 dt_reserved;
291  	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
292  };
293  
294  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
295  static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
296  static inline unsigned dx_get_hash(struct dx_entry *entry);
297  static void dx_set_hash(struct dx_entry *entry, unsigned value);
298  static unsigned dx_get_count(struct dx_entry *entries);
299  static unsigned dx_get_limit(struct dx_entry *entries);
300  static void dx_set_count(struct dx_entry *entries, unsigned value);
301  static void dx_set_limit(struct dx_entry *entries, unsigned value);
302  static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
303  static unsigned dx_node_limit(struct inode *dir);
304  static struct dx_frame *dx_probe(struct ext4_filename *fname,
305  				 struct inode *dir,
306  				 struct dx_hash_info *hinfo,
307  				 struct dx_frame *frame);
308  static void dx_release(struct dx_frame *frames);
309  static int dx_make_map(struct inode *dir, struct buffer_head *bh,
310  		       struct dx_hash_info *hinfo,
311  		       struct dx_map_entry *map_tail);
312  static void dx_sort_map(struct dx_map_entry *map, unsigned count);
313  static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
314  					char *to, struct dx_map_entry *offsets,
315  					int count, unsigned int blocksize);
316  static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
317  						unsigned int blocksize);
318  static void dx_insert_block(struct dx_frame *frame,
319  					u32 hash, ext4_lblk_t block);
320  static int ext4_htree_next_block(struct inode *dir, __u32 hash,
321  				 struct dx_frame *frame,
322  				 struct dx_frame *frames,
323  				 __u32 *start_hash);
324  static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
325  		struct ext4_filename *fname,
326  		struct ext4_dir_entry_2 **res_dir);
327  static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
328  			     struct inode *dir, struct inode *inode);
329  
330  /* checksumming functions */
331  void ext4_initialize_dirent_tail(struct buffer_head *bh,
332  				 unsigned int blocksize)
333  {
334  	struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
335  
336  	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
337  	t->det_rec_len = ext4_rec_len_to_disk(
338  			sizeof(struct ext4_dir_entry_tail), blocksize);
339  	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
340  }
341  
342  /* Walk through a dirent block to find a checksum "dirent" at the tail */
343  static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
344  						   struct buffer_head *bh)
345  {
346  	struct ext4_dir_entry_tail *t;
347  	int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
348  
349  #ifdef PARANOID
350  	struct ext4_dir_entry *d, *top;
351  
352  	d = (struct ext4_dir_entry *)bh->b_data;
353  	top = (struct ext4_dir_entry *)(bh->b_data +
354  		(blocksize - sizeof(struct ext4_dir_entry_tail)));
355  	while (d < top && ext4_rec_len_from_disk(d->rec_len, blocksize))
356  		d = (struct ext4_dir_entry *)(((void *)d) +
357  		    ext4_rec_len_from_disk(d->rec_len, blocksize));
358  
359  	if (d != top)
360  		return NULL;
361  
362  	t = (struct ext4_dir_entry_tail *)d;
363  #else
364  	t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
365  #endif
366  
367  	if (t->det_reserved_zero1 ||
368  	    (ext4_rec_len_from_disk(t->det_rec_len, blocksize) !=
369  	     sizeof(struct ext4_dir_entry_tail)) ||
370  	    t->det_reserved_zero2 ||
371  	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
372  		return NULL;
373  
374  	return t;
375  }
376  
377  static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
378  {
379  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
380  	struct ext4_inode_info *ei = EXT4_I(inode);
381  	__u32 csum;
382  
383  	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
384  	return cpu_to_le32(csum);
385  }
386  
387  #define warn_no_space_for_csum(inode)					\
388  	__warn_no_space_for_csum((inode), __func__, __LINE__)
389  
390  static void __warn_no_space_for_csum(struct inode *inode, const char *func,
391  				     unsigned int line)
392  {
393  	__ext4_warning_inode(inode, func, line,
394  		"No space for directory leaf checksum. Please run e2fsck -D.");
395  }
396  
397  int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
398  {
399  	struct ext4_dir_entry_tail *t;
400  
401  	if (!ext4_has_metadata_csum(inode->i_sb))
402  		return 1;
403  
404  	t = get_dirent_tail(inode, bh);
405  	if (!t) {
406  		warn_no_space_for_csum(inode);
407  		return 0;
408  	}
409  
410  	if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
411  						  (char *)t - bh->b_data))
412  		return 0;
413  
414  	return 1;
415  }
416  
417  static void ext4_dirblock_csum_set(struct inode *inode,
418  				 struct buffer_head *bh)
419  {
420  	struct ext4_dir_entry_tail *t;
421  
422  	if (!ext4_has_metadata_csum(inode->i_sb))
423  		return;
424  
425  	t = get_dirent_tail(inode, bh);
426  	if (!t) {
427  		warn_no_space_for_csum(inode);
428  		return;
429  	}
430  
431  	t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
432  					     (char *)t - bh->b_data);
433  }
434  
435  int ext4_handle_dirty_dirblock(handle_t *handle,
436  			       struct inode *inode,
437  			       struct buffer_head *bh)
438  {
439  	ext4_dirblock_csum_set(inode, bh);
440  	return ext4_handle_dirty_metadata(handle, inode, bh);
441  }
442  
443  static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
444  					       struct ext4_dir_entry *dirent,
445  					       int *offset)
446  {
447  	struct ext4_dir_entry *dp;
448  	struct dx_root_info *root;
449  	int count_offset;
450  	int blocksize = EXT4_BLOCK_SIZE(inode->i_sb);
451  	unsigned int rlen = ext4_rec_len_from_disk(dirent->rec_len, blocksize);
452  
453  	if (rlen == blocksize)
454  		count_offset = 8;
455  	else if (rlen == 12) {
456  		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
457  		if (ext4_rec_len_from_disk(dp->rec_len, blocksize) != blocksize - 12)
458  			return NULL;
459  		root = (struct dx_root_info *)(((void *)dp + 12));
460  		if (root->reserved_zero ||
461  		    root->info_length != sizeof(struct dx_root_info))
462  			return NULL;
463  		count_offset = 32;
464  	} else
465  		return NULL;
466  
467  	if (offset)
468  		*offset = count_offset;
469  	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
470  }
471  
472  static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
473  			   int count_offset, int count, struct dx_tail *t)
474  {
475  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
476  	struct ext4_inode_info *ei = EXT4_I(inode);
477  	__u32 csum;
478  	int size;
479  	__u32 dummy_csum = 0;
480  	int offset = offsetof(struct dx_tail, dt_checksum);
481  
482  	size = count_offset + (count * sizeof(struct dx_entry));
483  	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
484  	csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
485  	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
486  
487  	return cpu_to_le32(csum);
488  }
489  
490  static int ext4_dx_csum_verify(struct inode *inode,
491  			       struct ext4_dir_entry *dirent)
492  {
493  	struct dx_countlimit *c;
494  	struct dx_tail *t;
495  	int count_offset, limit, count;
496  
497  	if (!ext4_has_metadata_csum(inode->i_sb))
498  		return 1;
499  
500  	c = get_dx_countlimit(inode, dirent, &count_offset);
501  	if (!c) {
502  		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
503  		return 0;
504  	}
505  	limit = le16_to_cpu(c->limit);
506  	count = le16_to_cpu(c->count);
507  	if (count_offset + (limit * sizeof(struct dx_entry)) >
508  	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
509  		warn_no_space_for_csum(inode);
510  		return 0;
511  	}
512  	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
513  
514  	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
515  					    count, t))
516  		return 0;
517  	return 1;
518  }
519  
520  static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
521  {
522  	struct dx_countlimit *c;
523  	struct dx_tail *t;
524  	int count_offset, limit, count;
525  
526  	if (!ext4_has_metadata_csum(inode->i_sb))
527  		return;
528  
529  	c = get_dx_countlimit(inode, dirent, &count_offset);
530  	if (!c) {
531  		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
532  		return;
533  	}
534  	limit = le16_to_cpu(c->limit);
535  	count = le16_to_cpu(c->count);
536  	if (count_offset + (limit * sizeof(struct dx_entry)) >
537  	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
538  		warn_no_space_for_csum(inode);
539  		return;
540  	}
541  	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
542  
543  	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
544  }
545  
546  static inline int ext4_handle_dirty_dx_node(handle_t *handle,
547  					    struct inode *inode,
548  					    struct buffer_head *bh)
549  {
550  	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
551  	return ext4_handle_dirty_metadata(handle, inode, bh);
552  }
553  
554  /*
555   * p is at least 6 bytes before the end of page
556   */
557  static inline struct ext4_dir_entry_2 *
558  ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
559  {
560  	return (struct ext4_dir_entry_2 *)((char *)p +
561  		ext4_rec_len_from_disk(p->rec_len, blocksize));
562  }
563  
564  /*
565   * Future: use high four bits of block for coalesce-on-delete flags
566   * Mask them off for now.
567   */
568  
569  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
570  {
571  	return le32_to_cpu(entry->block) & 0x0fffffff;
572  }
573  
574  static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
575  {
576  	entry->block = cpu_to_le32(value);
577  }
578  
579  static inline unsigned dx_get_hash(struct dx_entry *entry)
580  {
581  	return le32_to_cpu(entry->hash);
582  }
583  
584  static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
585  {
586  	entry->hash = cpu_to_le32(value);
587  }
588  
589  static inline unsigned dx_get_count(struct dx_entry *entries)
590  {
591  	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
592  }
593  
594  static inline unsigned dx_get_limit(struct dx_entry *entries)
595  {
596  	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
597  }
598  
599  static inline void dx_set_count(struct dx_entry *entries, unsigned value)
600  {
601  	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
602  }
603  
604  static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
605  {
606  	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
607  }
608  
609  static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
610  {
611  	unsigned int entry_space = dir->i_sb->s_blocksize -
612  			ext4_dir_rec_len(1, NULL) -
613  			ext4_dir_rec_len(2, NULL) - infosize;
614  
615  	if (ext4_has_metadata_csum(dir->i_sb))
616  		entry_space -= sizeof(struct dx_tail);
617  	return entry_space / sizeof(struct dx_entry);
618  }
619  
620  static inline unsigned dx_node_limit(struct inode *dir)
621  {
622  	unsigned int entry_space = dir->i_sb->s_blocksize -
623  			ext4_dir_rec_len(0, dir);
624  
625  	if (ext4_has_metadata_csum(dir->i_sb))
626  		entry_space -= sizeof(struct dx_tail);
627  	return entry_space / sizeof(struct dx_entry);
628  }
629  
630  /*
631   * Debug
632   */
633  #ifdef DX_DEBUG
634  static void dx_show_index(char * label, struct dx_entry *entries)
635  {
636  	int i, n = dx_get_count (entries);
637  	printk(KERN_DEBUG "%s index", label);
638  	for (i = 0; i < n; i++) {
639  		printk(KERN_CONT " %x->%lu",
640  		       i ? dx_get_hash(entries + i) : 0,
641  		       (unsigned long)dx_get_block(entries + i));
642  	}
643  	printk(KERN_CONT "\n");
644  }
645  
646  struct stats
647  {
648  	unsigned names;
649  	unsigned space;
650  	unsigned bcount;
651  };
652  
653  static struct stats dx_show_leaf(struct inode *dir,
654  				struct dx_hash_info *hinfo,
655  				struct ext4_dir_entry_2 *de,
656  				int size, int show_names)
657  {
658  	unsigned names = 0, space = 0;
659  	char *base = (char *) de;
660  	struct dx_hash_info h = *hinfo;
661  
662  	printk("names: ");
663  	while ((char *) de < base + size)
664  	{
665  		if (de->inode)
666  		{
667  			if (show_names)
668  			{
669  #ifdef CONFIG_FS_ENCRYPTION
670  				int len;
671  				char *name;
672  				struct fscrypt_str fname_crypto_str =
673  					FSTR_INIT(NULL, 0);
674  				int res = 0;
675  
676  				name  = de->name;
677  				len = de->name_len;
678  				if (!IS_ENCRYPTED(dir)) {
679  					/* Directory is not encrypted */
680  					(void) ext4fs_dirhash(dir, de->name,
681  						de->name_len, &h);
682  					printk("%*.s:(U)%x.%u ", len,
683  					       name, h.hash,
684  					       (unsigned) ((char *) de
685  							   - base));
686  				} else {
687  					struct fscrypt_str de_name =
688  						FSTR_INIT(name, len);
689  
690  					/* Directory is encrypted */
691  					res = fscrypt_fname_alloc_buffer(
692  						len, &fname_crypto_str);
693  					if (res)
694  						printk(KERN_WARNING "Error "
695  							"allocating crypto "
696  							"buffer--skipping "
697  							"crypto\n");
698  					res = fscrypt_fname_disk_to_usr(dir,
699  						0, 0, &de_name,
700  						&fname_crypto_str);
701  					if (res) {
702  						printk(KERN_WARNING "Error "
703  							"converting filename "
704  							"from disk to usr"
705  							"\n");
706  						name = "??";
707  						len = 2;
708  					} else {
709  						name = fname_crypto_str.name;
710  						len = fname_crypto_str.len;
711  					}
712  					if (IS_CASEFOLDED(dir))
713  						h.hash = EXT4_DIRENT_HASH(de);
714  					else
715  						(void) ext4fs_dirhash(dir,
716  							de->name,
717  							de->name_len, &h);
718  					printk("%*.s:(E)%x.%u ", len, name,
719  					       h.hash, (unsigned) ((char *) de
720  								   - base));
721  					fscrypt_fname_free_buffer(
722  							&fname_crypto_str);
723  				}
724  #else
725  				int len = de->name_len;
726  				char *name = de->name;
727  				(void) ext4fs_dirhash(dir, de->name,
728  						      de->name_len, &h);
729  				printk("%*.s:%x.%u ", len, name, h.hash,
730  				       (unsigned) ((char *) de - base));
731  #endif
732  			}
733  			space += ext4_dir_rec_len(de->name_len, dir);
734  			names++;
735  		}
736  		de = ext4_next_entry(de, size);
737  	}
738  	printk(KERN_CONT "(%i)\n", names);
739  	return (struct stats) { names, space, 1 };
740  }
741  
742  struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
743  			     struct dx_entry *entries, int levels)
744  {
745  	unsigned blocksize = dir->i_sb->s_blocksize;
746  	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
747  	unsigned bcount = 0;
748  	struct buffer_head *bh;
749  	printk("%i indexed blocks...\n", count);
750  	for (i = 0; i < count; i++, entries++)
751  	{
752  		ext4_lblk_t block = dx_get_block(entries);
753  		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
754  		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
755  		struct stats stats;
756  		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
757  		bh = ext4_bread(NULL,dir, block, 0);
758  		if (!bh || IS_ERR(bh))
759  			continue;
760  		stats = levels?
761  		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
762  		   dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
763  			bh->b_data, blocksize, 0);
764  		names += stats.names;
765  		space += stats.space;
766  		bcount += stats.bcount;
767  		brelse(bh);
768  	}
769  	if (bcount)
770  		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
771  		       levels ? "" : "   ", names, space/bcount,
772  		       (space/bcount)*100/blocksize);
773  	return (struct stats) { names, space, bcount};
774  }
775  
776  /*
777   * Linear search cross check
778   */
779  static inline void htree_rep_invariant_check(struct dx_entry *at,
780  					     struct dx_entry *target,
781  					     u32 hash, unsigned int n)
782  {
783  	while (n--) {
784  		dxtrace(printk(KERN_CONT ","));
785  		if (dx_get_hash(++at) > hash) {
786  			at--;
787  			break;
788  		}
789  	}
790  	ASSERT(at == target - 1);
791  }
792  #else /* DX_DEBUG */
793  static inline void htree_rep_invariant_check(struct dx_entry *at,
794  					     struct dx_entry *target,
795  					     u32 hash, unsigned int n)
796  {
797  }
798  #endif /* DX_DEBUG */
799  
800  /*
801   * Probe for a directory leaf block to search.
802   *
803   * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
804   * error in the directory index, and the caller should fall back to
805   * searching the directory normally.  The callers of dx_probe **MUST**
806   * check for this error code, and make sure it never gets reflected
807   * back to userspace.
808   */
809  static struct dx_frame *
810  dx_probe(struct ext4_filename *fname, struct inode *dir,
811  	 struct dx_hash_info *hinfo, struct dx_frame *frame_in)
812  {
813  	unsigned count, indirect, level, i;
814  	struct dx_entry *at, *entries, *p, *q, *m;
815  	struct dx_root *root;
816  	struct dx_frame *frame = frame_in;
817  	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
818  	u32 hash;
819  	ext4_lblk_t block;
820  	ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
821  
822  	memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
823  	frame->bh = ext4_read_dirblock(dir, 0, INDEX);
824  	if (IS_ERR(frame->bh))
825  		return (struct dx_frame *) frame->bh;
826  
827  	root = (struct dx_root *) frame->bh->b_data;
828  	if (root->info.hash_version != DX_HASH_TEA &&
829  	    root->info.hash_version != DX_HASH_HALF_MD4 &&
830  	    root->info.hash_version != DX_HASH_LEGACY &&
831  	    root->info.hash_version != DX_HASH_SIPHASH) {
832  		ext4_warning_inode(dir, "Unrecognised inode hash code %u",
833  				   root->info.hash_version);
834  		goto fail;
835  	}
836  	if (ext4_hash_in_dirent(dir)) {
837  		if (root->info.hash_version != DX_HASH_SIPHASH) {
838  			ext4_warning_inode(dir,
839  				"Hash in dirent, but hash is not SIPHASH");
840  			goto fail;
841  		}
842  	} else {
843  		if (root->info.hash_version == DX_HASH_SIPHASH) {
844  			ext4_warning_inode(dir,
845  				"Hash code is SIPHASH, but hash not in dirent");
846  			goto fail;
847  		}
848  	}
849  	if (fname)
850  		hinfo = &fname->hinfo;
851  	hinfo->hash_version = root->info.hash_version;
852  	if (hinfo->hash_version <= DX_HASH_TEA)
853  		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
854  	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
855  	/* hash is already computed for encrypted casefolded directory */
856  	if (fname && fname_name(fname) &&
857  	    !(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir))) {
858  		int ret = ext4fs_dirhash(dir, fname_name(fname),
859  					 fname_len(fname), hinfo);
860  		if (ret < 0) {
861  			ret_err = ERR_PTR(ret);
862  			goto fail;
863  		}
864  	}
865  	hash = hinfo->hash;
866  
867  	if (root->info.unused_flags & 1) {
868  		ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
869  				   root->info.unused_flags);
870  		goto fail;
871  	}
872  
873  	indirect = root->info.indirect_levels;
874  	if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
875  		ext4_warning(dir->i_sb,
876  			     "Directory (ino: %lu) htree depth %#06x exceed"
877  			     "supported value", dir->i_ino,
878  			     ext4_dir_htree_level(dir->i_sb));
879  		if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
880  			ext4_warning(dir->i_sb, "Enable large directory "
881  						"feature to access it");
882  		}
883  		goto fail;
884  	}
885  
886  	entries = (struct dx_entry *)(((char *)&root->info) +
887  				      root->info.info_length);
888  
889  	if (dx_get_limit(entries) != dx_root_limit(dir,
890  						   root->info.info_length)) {
891  		ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
892  				   dx_get_limit(entries),
893  				   dx_root_limit(dir, root->info.info_length));
894  		goto fail;
895  	}
896  
897  	dxtrace(printk("Look up %x", hash));
898  	level = 0;
899  	blocks[0] = 0;
900  	while (1) {
901  		count = dx_get_count(entries);
902  		if (!count || count > dx_get_limit(entries)) {
903  			ext4_warning_inode(dir,
904  					   "dx entry: count %u beyond limit %u",
905  					   count, dx_get_limit(entries));
906  			goto fail;
907  		}
908  
909  		p = entries + 1;
910  		q = entries + count - 1;
911  		while (p <= q) {
912  			m = p + (q - p) / 2;
913  			dxtrace(printk(KERN_CONT "."));
914  			if (dx_get_hash(m) > hash)
915  				q = m - 1;
916  			else
917  				p = m + 1;
918  		}
919  
920  		htree_rep_invariant_check(entries, p, hash, count - 1);
921  
922  		at = p - 1;
923  		dxtrace(printk(KERN_CONT " %x->%u\n",
924  			       at == entries ? 0 : dx_get_hash(at),
925  			       dx_get_block(at)));
926  		frame->entries = entries;
927  		frame->at = at;
928  
929  		block = dx_get_block(at);
930  		for (i = 0; i <= level; i++) {
931  			if (blocks[i] == block) {
932  				ext4_warning_inode(dir,
933  					"dx entry: tree cycle block %u points back to block %u",
934  					blocks[level], block);
935  				goto fail;
936  			}
937  		}
938  		if (++level > indirect)
939  			return frame;
940  		blocks[level] = block;
941  		frame++;
942  		frame->bh = ext4_read_dirblock(dir, block, INDEX);
943  		if (IS_ERR(frame->bh)) {
944  			ret_err = (struct dx_frame *) frame->bh;
945  			frame->bh = NULL;
946  			goto fail;
947  		}
948  
949  		entries = ((struct dx_node *) frame->bh->b_data)->entries;
950  
951  		if (dx_get_limit(entries) != dx_node_limit(dir)) {
952  			ext4_warning_inode(dir,
953  				"dx entry: limit %u != node limit %u",
954  				dx_get_limit(entries), dx_node_limit(dir));
955  			goto fail;
956  		}
957  	}
958  fail:
959  	while (frame >= frame_in) {
960  		brelse(frame->bh);
961  		frame--;
962  	}
963  
964  	if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
965  		ext4_warning_inode(dir,
966  			"Corrupt directory, running e2fsck is recommended");
967  	return ret_err;
968  }
969  
970  static void dx_release(struct dx_frame *frames)
971  {
972  	struct dx_root_info *info;
973  	int i;
974  	unsigned int indirect_levels;
975  
976  	if (frames[0].bh == NULL)
977  		return;
978  
979  	info = &((struct dx_root *)frames[0].bh->b_data)->info;
980  	/* save local copy, "info" may be freed after brelse() */
981  	indirect_levels = info->indirect_levels;
982  	for (i = 0; i <= indirect_levels; i++) {
983  		if (frames[i].bh == NULL)
984  			break;
985  		brelse(frames[i].bh);
986  		frames[i].bh = NULL;
987  	}
988  }
989  
990  /*
991   * This function increments the frame pointer to search the next leaf
992   * block, and reads in the necessary intervening nodes if the search
993   * should be necessary.  Whether or not the search is necessary is
994   * controlled by the hash parameter.  If the hash value is even, then
995   * the search is only continued if the next block starts with that
996   * hash value.  This is used if we are searching for a specific file.
997   *
998   * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
999   *
1000   * This function returns 1 if the caller should continue to search,
1001   * or 0 if it should not.  If there is an error reading one of the
1002   * index blocks, it will a negative error code.
1003   *
1004   * If start_hash is non-null, it will be filled in with the starting
1005   * hash of the next page.
1006   */
1007  static int ext4_htree_next_block(struct inode *dir, __u32 hash,
1008  				 struct dx_frame *frame,
1009  				 struct dx_frame *frames,
1010  				 __u32 *start_hash)
1011  {
1012  	struct dx_frame *p;
1013  	struct buffer_head *bh;
1014  	int num_frames = 0;
1015  	__u32 bhash;
1016  
1017  	p = frame;
1018  	/*
1019  	 * Find the next leaf page by incrementing the frame pointer.
1020  	 * If we run out of entries in the interior node, loop around and
1021  	 * increment pointer in the parent node.  When we break out of
1022  	 * this loop, num_frames indicates the number of interior
1023  	 * nodes need to be read.
1024  	 */
1025  	while (1) {
1026  		if (++(p->at) < p->entries + dx_get_count(p->entries))
1027  			break;
1028  		if (p == frames)
1029  			return 0;
1030  		num_frames++;
1031  		p--;
1032  	}
1033  
1034  	/*
1035  	 * If the hash is 1, then continue only if the next page has a
1036  	 * continuation hash of any value.  This is used for readdir
1037  	 * handling.  Otherwise, check to see if the hash matches the
1038  	 * desired continuation hash.  If it doesn't, return since
1039  	 * there's no point to read in the successive index pages.
1040  	 */
1041  	bhash = dx_get_hash(p->at);
1042  	if (start_hash)
1043  		*start_hash = bhash;
1044  	if ((hash & 1) == 0) {
1045  		if ((bhash & ~1) != hash)
1046  			return 0;
1047  	}
1048  	/*
1049  	 * If the hash is HASH_NB_ALWAYS, we always go to the next
1050  	 * block so no check is necessary
1051  	 */
1052  	while (num_frames--) {
1053  		bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
1054  		if (IS_ERR(bh))
1055  			return PTR_ERR(bh);
1056  		p++;
1057  		brelse(p->bh);
1058  		p->bh = bh;
1059  		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
1060  	}
1061  	return 1;
1062  }
1063  
1064  
1065  /*
1066   * This function fills a red-black tree with information from a
1067   * directory block.  It returns the number directory entries loaded
1068   * into the tree.  If there is an error it is returned in err.
1069   */
1070  static int htree_dirblock_to_tree(struct file *dir_file,
1071  				  struct inode *dir, ext4_lblk_t block,
1072  				  struct dx_hash_info *hinfo,
1073  				  __u32 start_hash, __u32 start_minor_hash)
1074  {
1075  	struct buffer_head *bh;
1076  	struct ext4_dir_entry_2 *de, *top;
1077  	int err = 0, count = 0;
1078  	struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
1079  	int csum = ext4_has_metadata_csum(dir->i_sb);
1080  
1081  	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
1082  							(unsigned long)block));
1083  	bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1084  	if (IS_ERR(bh))
1085  		return PTR_ERR(bh);
1086  
1087  	de = (struct ext4_dir_entry_2 *) bh->b_data;
1088  	/* csum entries are not larger in the casefolded encrypted case */
1089  	top = (struct ext4_dir_entry_2 *) ((char *) de +
1090  					   dir->i_sb->s_blocksize -
1091  					   ext4_dir_rec_len(0,
1092  							   csum ? NULL : dir));
1093  	/* Check if the directory is encrypted */
1094  	if (IS_ENCRYPTED(dir)) {
1095  		err = fscrypt_prepare_readdir(dir);
1096  		if (err < 0) {
1097  			brelse(bh);
1098  			return err;
1099  		}
1100  		err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN,
1101  						 &fname_crypto_str);
1102  		if (err < 0) {
1103  			brelse(bh);
1104  			return err;
1105  		}
1106  	}
1107  
1108  	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
1109  		if (ext4_check_dir_entry(dir, NULL, de, bh,
1110  				bh->b_data, bh->b_size,
1111  				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
1112  					 + ((char *)de - bh->b_data))) {
1113  			/* silently ignore the rest of the block */
1114  			break;
1115  		}
1116  		if (ext4_hash_in_dirent(dir)) {
1117  			if (de->name_len && de->inode) {
1118  				hinfo->hash = EXT4_DIRENT_HASH(de);
1119  				hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
1120  			} else {
1121  				hinfo->hash = 0;
1122  				hinfo->minor_hash = 0;
1123  			}
1124  		} else {
1125  			err = ext4fs_dirhash(dir, de->name,
1126  					     de->name_len, hinfo);
1127  			if (err < 0) {
1128  				count = err;
1129  				goto errout;
1130  			}
1131  		}
1132  		if ((hinfo->hash < start_hash) ||
1133  		    ((hinfo->hash == start_hash) &&
1134  		     (hinfo->minor_hash < start_minor_hash)))
1135  			continue;
1136  		if (de->inode == 0)
1137  			continue;
1138  		if (!IS_ENCRYPTED(dir)) {
1139  			tmp_str.name = de->name;
1140  			tmp_str.len = de->name_len;
1141  			err = ext4_htree_store_dirent(dir_file,
1142  				   hinfo->hash, hinfo->minor_hash, de,
1143  				   &tmp_str);
1144  		} else {
1145  			int save_len = fname_crypto_str.len;
1146  			struct fscrypt_str de_name = FSTR_INIT(de->name,
1147  								de->name_len);
1148  
1149  			/* Directory is encrypted */
1150  			err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
1151  					hinfo->minor_hash, &de_name,
1152  					&fname_crypto_str);
1153  			if (err) {
1154  				count = err;
1155  				goto errout;
1156  			}
1157  			err = ext4_htree_store_dirent(dir_file,
1158  				   hinfo->hash, hinfo->minor_hash, de,
1159  					&fname_crypto_str);
1160  			fname_crypto_str.len = save_len;
1161  		}
1162  		if (err != 0) {
1163  			count = err;
1164  			goto errout;
1165  		}
1166  		count++;
1167  	}
1168  errout:
1169  	brelse(bh);
1170  	fscrypt_fname_free_buffer(&fname_crypto_str);
1171  	return count;
1172  }
1173  
1174  
1175  /*
1176   * This function fills a red-black tree with information from a
1177   * directory.  We start scanning the directory in hash order, starting
1178   * at start_hash and start_minor_hash.
1179   *
1180   * This function returns the number of entries inserted into the tree,
1181   * or a negative error code.
1182   */
1183  int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1184  			 __u32 start_minor_hash, __u32 *next_hash)
1185  {
1186  	struct dx_hash_info hinfo;
1187  	struct ext4_dir_entry_2 *de;
1188  	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1189  	struct inode *dir;
1190  	ext4_lblk_t block;
1191  	int count = 0;
1192  	int ret, err;
1193  	__u32 hashval;
1194  	struct fscrypt_str tmp_str;
1195  
1196  	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
1197  		       start_hash, start_minor_hash));
1198  	dir = file_inode(dir_file);
1199  	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
1200  		if (ext4_hash_in_dirent(dir))
1201  			hinfo.hash_version = DX_HASH_SIPHASH;
1202  		else
1203  			hinfo.hash_version =
1204  					EXT4_SB(dir->i_sb)->s_def_hash_version;
1205  		if (hinfo.hash_version <= DX_HASH_TEA)
1206  			hinfo.hash_version +=
1207  				EXT4_SB(dir->i_sb)->s_hash_unsigned;
1208  		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1209  		if (ext4_has_inline_data(dir)) {
1210  			int has_inline_data = 1;
1211  			count = ext4_inlinedir_to_tree(dir_file, dir, 0,
1212  						       &hinfo, start_hash,
1213  						       start_minor_hash,
1214  						       &has_inline_data);
1215  			if (has_inline_data) {
1216  				*next_hash = ~0;
1217  				return count;
1218  			}
1219  		}
1220  		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
1221  					       start_hash, start_minor_hash);
1222  		*next_hash = ~0;
1223  		return count;
1224  	}
1225  	hinfo.hash = start_hash;
1226  	hinfo.minor_hash = 0;
1227  	frame = dx_probe(NULL, dir, &hinfo, frames);
1228  	if (IS_ERR(frame))
1229  		return PTR_ERR(frame);
1230  
1231  	/* Add '.' and '..' from the htree header */
1232  	if (!start_hash && !start_minor_hash) {
1233  		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1234  		tmp_str.name = de->name;
1235  		tmp_str.len = de->name_len;
1236  		err = ext4_htree_store_dirent(dir_file, 0, 0,
1237  					      de, &tmp_str);
1238  		if (err != 0)
1239  			goto errout;
1240  		count++;
1241  	}
1242  	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1243  		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1244  		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1245  		tmp_str.name = de->name;
1246  		tmp_str.len = de->name_len;
1247  		err = ext4_htree_store_dirent(dir_file, 2, 0,
1248  					      de, &tmp_str);
1249  		if (err != 0)
1250  			goto errout;
1251  		count++;
1252  	}
1253  
1254  	while (1) {
1255  		if (fatal_signal_pending(current)) {
1256  			err = -ERESTARTSYS;
1257  			goto errout;
1258  		}
1259  		cond_resched();
1260  		block = dx_get_block(frame->at);
1261  		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1262  					     start_hash, start_minor_hash);
1263  		if (ret < 0) {
1264  			err = ret;
1265  			goto errout;
1266  		}
1267  		count += ret;
1268  		hashval = ~0;
1269  		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1270  					    frame, frames, &hashval);
1271  		*next_hash = hashval;
1272  		if (ret < 0) {
1273  			err = ret;
1274  			goto errout;
1275  		}
1276  		/*
1277  		 * Stop if:  (a) there are no more entries, or
1278  		 * (b) we have inserted at least one entry and the
1279  		 * next hash value is not a continuation
1280  		 */
1281  		if ((ret == 0) ||
1282  		    (count && ((hashval & 1) == 0)))
1283  			break;
1284  	}
1285  	dx_release(frames);
1286  	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1287  		       "next hash: %x\n", count, *next_hash));
1288  	return count;
1289  errout:
1290  	dx_release(frames);
1291  	return (err);
1292  }
1293  
1294  static inline int search_dirblock(struct buffer_head *bh,
1295  				  struct inode *dir,
1296  				  struct ext4_filename *fname,
1297  				  unsigned int offset,
1298  				  struct ext4_dir_entry_2 **res_dir)
1299  {
1300  	return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1301  			       fname, offset, res_dir);
1302  }
1303  
1304  /*
1305   * Directory block splitting, compacting
1306   */
1307  
1308  /*
1309   * Create map of hash values, offsets, and sizes, stored at end of block.
1310   * Returns number of entries mapped.
1311   */
1312  static int dx_make_map(struct inode *dir, struct buffer_head *bh,
1313  		       struct dx_hash_info *hinfo,
1314  		       struct dx_map_entry *map_tail)
1315  {
1316  	int count = 0;
1317  	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
1318  	unsigned int buflen = bh->b_size;
1319  	char *base = bh->b_data;
1320  	struct dx_hash_info h = *hinfo;
1321  	int blocksize = EXT4_BLOCK_SIZE(dir->i_sb);
1322  
1323  	if (ext4_has_metadata_csum(dir->i_sb))
1324  		buflen -= sizeof(struct ext4_dir_entry_tail);
1325  
1326  	while ((char *) de < base + buflen) {
1327  		if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
1328  					 ((char *)de) - base))
1329  			return -EFSCORRUPTED;
1330  		if (de->name_len && de->inode) {
1331  			if (ext4_hash_in_dirent(dir))
1332  				h.hash = EXT4_DIRENT_HASH(de);
1333  			else {
1334  				int err = ext4fs_dirhash(dir, de->name,
1335  						     de->name_len, &h);
1336  				if (err < 0)
1337  					return err;
1338  			}
1339  			map_tail--;
1340  			map_tail->hash = h.hash;
1341  			map_tail->offs = ((char *) de - base)>>2;
1342  			map_tail->size = ext4_rec_len_from_disk(de->rec_len,
1343  								blocksize);
1344  			count++;
1345  			cond_resched();
1346  		}
1347  		de = ext4_next_entry(de, blocksize);
1348  	}
1349  	return count;
1350  }
1351  
1352  /* Sort map by hash value */
1353  static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1354  {
1355  	struct dx_map_entry *p, *q, *top = map + count - 1;
1356  	int more;
1357  	/* Combsort until bubble sort doesn't suck */
1358  	while (count > 2) {
1359  		count = count*10/13;
1360  		if (count - 9 < 2) /* 9, 10 -> 11 */
1361  			count = 11;
1362  		for (p = top, q = p - count; q >= map; p--, q--)
1363  			if (p->hash < q->hash)
1364  				swap(*p, *q);
1365  	}
1366  	/* Garden variety bubble sort */
1367  	do {
1368  		more = 0;
1369  		q = top;
1370  		while (q-- > map) {
1371  			if (q[1].hash >= q[0].hash)
1372  				continue;
1373  			swap(*(q+1), *q);
1374  			more = 1;
1375  		}
1376  	} while(more);
1377  }
1378  
1379  static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1380  {
1381  	struct dx_entry *entries = frame->entries;
1382  	struct dx_entry *old = frame->at, *new = old + 1;
1383  	int count = dx_get_count(entries);
1384  
1385  	ASSERT(count < dx_get_limit(entries));
1386  	ASSERT(old < entries + count);
1387  	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1388  	dx_set_hash(new, hash);
1389  	dx_set_block(new, block);
1390  	dx_set_count(entries, count + 1);
1391  }
1392  
1393  #if IS_ENABLED(CONFIG_UNICODE)
1394  /*
1395   * Test whether a case-insensitive directory entry matches the filename
1396   * being searched for.  If quick is set, assume the name being looked up
1397   * is already in the casefolded form.
1398   *
1399   * Returns: 0 if the directory entry matches, more than 0 if it
1400   * doesn't match or less than zero on error.
1401   */
1402  static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
1403  			   u8 *de_name, size_t de_name_len, bool quick)
1404  {
1405  	const struct super_block *sb = parent->i_sb;
1406  	const struct unicode_map *um = sb->s_encoding;
1407  	struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
1408  	struct qstr entry = QSTR_INIT(de_name, de_name_len);
1409  	int ret;
1410  
1411  	if (IS_ENCRYPTED(parent)) {
1412  		const struct fscrypt_str encrypted_name =
1413  				FSTR_INIT(de_name, de_name_len);
1414  
1415  		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
1416  		if (!decrypted_name.name)
1417  			return -ENOMEM;
1418  		ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
1419  						&decrypted_name);
1420  		if (ret < 0)
1421  			goto out;
1422  		entry.name = decrypted_name.name;
1423  		entry.len = decrypted_name.len;
1424  	}
1425  
1426  	if (quick)
1427  		ret = utf8_strncasecmp_folded(um, name, &entry);
1428  	else
1429  		ret = utf8_strncasecmp(um, name, &entry);
1430  	if (ret < 0) {
1431  		/* Handle invalid character sequence as either an error
1432  		 * or as an opaque byte sequence.
1433  		 */
1434  		if (sb_has_strict_encoding(sb))
1435  			ret = -EINVAL;
1436  		else if (name->len != entry.len)
1437  			ret = 1;
1438  		else
1439  			ret = !!memcmp(name->name, entry.name, entry.len);
1440  	}
1441  out:
1442  	kfree(decrypted_name.name);
1443  	return ret;
1444  }
1445  
1446  int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
1447  				  struct ext4_filename *name)
1448  {
1449  	struct fscrypt_str *cf_name = &name->cf_name;
1450  	struct dx_hash_info *hinfo = &name->hinfo;
1451  	int len;
1452  
1453  	if (!IS_CASEFOLDED(dir) ||
1454  	    (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
1455  		cf_name->name = NULL;
1456  		return 0;
1457  	}
1458  
1459  	cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
1460  	if (!cf_name->name)
1461  		return -ENOMEM;
1462  
1463  	len = utf8_casefold(dir->i_sb->s_encoding,
1464  			    iname, cf_name->name,
1465  			    EXT4_NAME_LEN);
1466  	if (len <= 0) {
1467  		kfree(cf_name->name);
1468  		cf_name->name = NULL;
1469  	}
1470  	cf_name->len = (unsigned) len;
1471  	if (!IS_ENCRYPTED(dir))
1472  		return 0;
1473  
1474  	hinfo->hash_version = DX_HASH_SIPHASH;
1475  	hinfo->seed = NULL;
1476  	if (cf_name->name)
1477  		return ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
1478  	else
1479  		return ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
1480  }
1481  #endif
1482  
1483  /*
1484   * Test whether a directory entry matches the filename being searched for.
1485   *
1486   * Return: %true if the directory entry matches, otherwise %false.
1487   */
1488  static bool ext4_match(struct inode *parent,
1489  			      const struct ext4_filename *fname,
1490  			      struct ext4_dir_entry_2 *de)
1491  {
1492  	struct fscrypt_name f;
1493  
1494  	if (!de->inode)
1495  		return false;
1496  
1497  	f.usr_fname = fname->usr_fname;
1498  	f.disk_name = fname->disk_name;
1499  #ifdef CONFIG_FS_ENCRYPTION
1500  	f.crypto_buf = fname->crypto_buf;
1501  #endif
1502  
1503  #if IS_ENABLED(CONFIG_UNICODE)
1504  	if (IS_CASEFOLDED(parent) &&
1505  	    (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
1506  		if (fname->cf_name.name) {
1507  			struct qstr cf = {.name = fname->cf_name.name,
1508  					  .len = fname->cf_name.len};
1509  			if (IS_ENCRYPTED(parent)) {
1510  				if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
1511  					fname->hinfo.minor_hash !=
1512  						EXT4_DIRENT_MINOR_HASH(de)) {
1513  
1514  					return false;
1515  				}
1516  			}
1517  			return !ext4_ci_compare(parent, &cf, de->name,
1518  							de->name_len, true);
1519  		}
1520  		return !ext4_ci_compare(parent, fname->usr_fname, de->name,
1521  						de->name_len, false);
1522  	}
1523  #endif
1524  
1525  	return fscrypt_match_name(&f, de->name, de->name_len);
1526  }
1527  
1528  /*
1529   * Returns 0 if not found, -1 on failure, and 1 on success
1530   */
1531  int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1532  		    struct inode *dir, struct ext4_filename *fname,
1533  		    unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1534  {
1535  	struct ext4_dir_entry_2 * de;
1536  	char * dlimit;
1537  	int de_len;
1538  
1539  	de = (struct ext4_dir_entry_2 *)search_buf;
1540  	dlimit = search_buf + buf_size;
1541  	while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
1542  		/* this code is executed quadratically often */
1543  		/* do minimal checking `by hand' */
1544  		if (de->name + de->name_len <= dlimit &&
1545  		    ext4_match(dir, fname, de)) {
1546  			/* found a match - just to be sure, do
1547  			 * a full check */
1548  			if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
1549  						 buf_size, offset))
1550  				return -1;
1551  			*res_dir = de;
1552  			return 1;
1553  		}
1554  		/* prevent looping on a bad block */
1555  		de_len = ext4_rec_len_from_disk(de->rec_len,
1556  						dir->i_sb->s_blocksize);
1557  		if (de_len <= 0)
1558  			return -1;
1559  		offset += de_len;
1560  		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1561  	}
1562  	return 0;
1563  }
1564  
1565  static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1566  			       struct ext4_dir_entry *de)
1567  {
1568  	struct super_block *sb = dir->i_sb;
1569  
1570  	if (!is_dx(dir))
1571  		return 0;
1572  	if (block == 0)
1573  		return 1;
1574  	if (de->inode == 0 &&
1575  	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1576  			sb->s_blocksize)
1577  		return 1;
1578  	return 0;
1579  }
1580  
1581  /*
1582   *	__ext4_find_entry()
1583   *
1584   * finds an entry in the specified directory with the wanted name. It
1585   * returns the cache buffer in which the entry was found, and the entry
1586   * itself (as a parameter - res_dir). It does NOT read the inode of the
1587   * entry - you'll have to do that yourself if you want to.
1588   *
1589   * The returned buffer_head has ->b_count elevated.  The caller is expected
1590   * to brelse() it when appropriate.
1591   */
1592  static struct buffer_head *__ext4_find_entry(struct inode *dir,
1593  					     struct ext4_filename *fname,
1594  					     struct ext4_dir_entry_2 **res_dir,
1595  					     int *inlined)
1596  {
1597  	struct super_block *sb;
1598  	struct buffer_head *bh_use[NAMEI_RA_SIZE];
1599  	struct buffer_head *bh, *ret = NULL;
1600  	ext4_lblk_t start, block;
1601  	const u8 *name = fname->usr_fname->name;
1602  	size_t ra_max = 0;	/* Number of bh's in the readahead
1603  				   buffer, bh_use[] */
1604  	size_t ra_ptr = 0;	/* Current index into readahead
1605  				   buffer */
1606  	ext4_lblk_t  nblocks;
1607  	int i, namelen, retval;
1608  
1609  	*res_dir = NULL;
1610  	sb = dir->i_sb;
1611  	namelen = fname->usr_fname->len;
1612  	if (namelen > EXT4_NAME_LEN)
1613  		return NULL;
1614  
1615  	if (ext4_has_inline_data(dir)) {
1616  		int has_inline_data = 1;
1617  		ret = ext4_find_inline_entry(dir, fname, res_dir,
1618  					     &has_inline_data);
1619  		if (inlined)
1620  			*inlined = has_inline_data;
1621  		if (has_inline_data)
1622  			goto cleanup_and_exit;
1623  	}
1624  
1625  	if ((namelen <= 2) && (name[0] == '.') &&
1626  	    (name[1] == '.' || name[1] == '\0')) {
1627  		/*
1628  		 * "." or ".." will only be in the first block
1629  		 * NFS may look up ".."; "." should be handled by the VFS
1630  		 */
1631  		block = start = 0;
1632  		nblocks = 1;
1633  		goto restart;
1634  	}
1635  	if (is_dx(dir)) {
1636  		ret = ext4_dx_find_entry(dir, fname, res_dir);
1637  		/*
1638  		 * On success, or if the error was file not found,
1639  		 * return.  Otherwise, fall back to doing a search the
1640  		 * old fashioned way.
1641  		 */
1642  		if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
1643  			goto cleanup_and_exit;
1644  		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1645  			       "falling back\n"));
1646  		ret = NULL;
1647  	}
1648  	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1649  	if (!nblocks) {
1650  		ret = NULL;
1651  		goto cleanup_and_exit;
1652  	}
1653  	start = EXT4_I(dir)->i_dir_start_lookup;
1654  	if (start >= nblocks)
1655  		start = 0;
1656  	block = start;
1657  restart:
1658  	do {
1659  		/*
1660  		 * We deal with the read-ahead logic here.
1661  		 */
1662  		cond_resched();
1663  		if (ra_ptr >= ra_max) {
1664  			/* Refill the readahead buffer */
1665  			ra_ptr = 0;
1666  			if (block < start)
1667  				ra_max = start - block;
1668  			else
1669  				ra_max = nblocks - block;
1670  			ra_max = min(ra_max, ARRAY_SIZE(bh_use));
1671  			retval = ext4_bread_batch(dir, block, ra_max,
1672  						  false /* wait */, bh_use);
1673  			if (retval) {
1674  				ret = ERR_PTR(retval);
1675  				ra_max = 0;
1676  				goto cleanup_and_exit;
1677  			}
1678  		}
1679  		if ((bh = bh_use[ra_ptr++]) == NULL)
1680  			goto next;
1681  		wait_on_buffer(bh);
1682  		if (!buffer_uptodate(bh)) {
1683  			EXT4_ERROR_INODE_ERR(dir, EIO,
1684  					     "reading directory lblock %lu",
1685  					     (unsigned long) block);
1686  			brelse(bh);
1687  			ret = ERR_PTR(-EIO);
1688  			goto cleanup_and_exit;
1689  		}
1690  		if (!buffer_verified(bh) &&
1691  		    !is_dx_internal_node(dir, block,
1692  					 (struct ext4_dir_entry *)bh->b_data) &&
1693  		    !ext4_dirblock_csum_verify(dir, bh)) {
1694  			EXT4_ERROR_INODE_ERR(dir, EFSBADCRC,
1695  					     "checksumming directory "
1696  					     "block %lu", (unsigned long)block);
1697  			brelse(bh);
1698  			ret = ERR_PTR(-EFSBADCRC);
1699  			goto cleanup_and_exit;
1700  		}
1701  		set_buffer_verified(bh);
1702  		i = search_dirblock(bh, dir, fname,
1703  			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1704  		if (i == 1) {
1705  			EXT4_I(dir)->i_dir_start_lookup = block;
1706  			ret = bh;
1707  			goto cleanup_and_exit;
1708  		} else {
1709  			brelse(bh);
1710  			if (i < 0)
1711  				goto cleanup_and_exit;
1712  		}
1713  	next:
1714  		if (++block >= nblocks)
1715  			block = 0;
1716  	} while (block != start);
1717  
1718  	/*
1719  	 * If the directory has grown while we were searching, then
1720  	 * search the last part of the directory before giving up.
1721  	 */
1722  	block = nblocks;
1723  	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1724  	if (block < nblocks) {
1725  		start = 0;
1726  		goto restart;
1727  	}
1728  
1729  cleanup_and_exit:
1730  	/* Clean up the read-ahead blocks */
1731  	for (; ra_ptr < ra_max; ra_ptr++)
1732  		brelse(bh_use[ra_ptr]);
1733  	return ret;
1734  }
1735  
1736  static struct buffer_head *ext4_find_entry(struct inode *dir,
1737  					   const struct qstr *d_name,
1738  					   struct ext4_dir_entry_2 **res_dir,
1739  					   int *inlined)
1740  {
1741  	int err;
1742  	struct ext4_filename fname;
1743  	struct buffer_head *bh;
1744  
1745  	err = ext4_fname_setup_filename(dir, d_name, 1, &fname);
1746  	if (err == -ENOENT)
1747  		return NULL;
1748  	if (err)
1749  		return ERR_PTR(err);
1750  
1751  	bh = __ext4_find_entry(dir, &fname, res_dir, inlined);
1752  
1753  	ext4_fname_free_filename(&fname);
1754  	return bh;
1755  }
1756  
1757  static struct buffer_head *ext4_lookup_entry(struct inode *dir,
1758  					     struct dentry *dentry,
1759  					     struct ext4_dir_entry_2 **res_dir)
1760  {
1761  	int err;
1762  	struct ext4_filename fname;
1763  	struct buffer_head *bh;
1764  
1765  	err = ext4_fname_prepare_lookup(dir, dentry, &fname);
1766  	generic_set_encrypted_ci_d_ops(dentry);
1767  	if (err == -ENOENT)
1768  		return NULL;
1769  	if (err)
1770  		return ERR_PTR(err);
1771  
1772  	bh = __ext4_find_entry(dir, &fname, res_dir, NULL);
1773  
1774  	ext4_fname_free_filename(&fname);
1775  	return bh;
1776  }
1777  
1778  static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
1779  			struct ext4_filename *fname,
1780  			struct ext4_dir_entry_2 **res_dir)
1781  {
1782  	struct super_block * sb = dir->i_sb;
1783  	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1784  	struct buffer_head *bh;
1785  	ext4_lblk_t block;
1786  	int retval;
1787  
1788  #ifdef CONFIG_FS_ENCRYPTION
1789  	*res_dir = NULL;
1790  #endif
1791  	frame = dx_probe(fname, dir, NULL, frames);
1792  	if (IS_ERR(frame))
1793  		return (struct buffer_head *) frame;
1794  	do {
1795  		block = dx_get_block(frame->at);
1796  		bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1797  		if (IS_ERR(bh))
1798  			goto errout;
1799  
1800  		retval = search_dirblock(bh, dir, fname,
1801  					 block << EXT4_BLOCK_SIZE_BITS(sb),
1802  					 res_dir);
1803  		if (retval == 1)
1804  			goto success;
1805  		brelse(bh);
1806  		if (retval == -1) {
1807  			bh = ERR_PTR(ERR_BAD_DX_DIR);
1808  			goto errout;
1809  		}
1810  
1811  		/* Check to see if we should continue to search */
1812  		retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
1813  					       frames, NULL);
1814  		if (retval < 0) {
1815  			ext4_warning_inode(dir,
1816  				"error %d reading directory index block",
1817  				retval);
1818  			bh = ERR_PTR(retval);
1819  			goto errout;
1820  		}
1821  	} while (retval == 1);
1822  
1823  	bh = NULL;
1824  errout:
1825  	dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
1826  success:
1827  	dx_release(frames);
1828  	return bh;
1829  }
1830  
1831  static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1832  {
1833  	struct inode *inode;
1834  	struct ext4_dir_entry_2 *de;
1835  	struct buffer_head *bh;
1836  
1837  	if (dentry->d_name.len > EXT4_NAME_LEN)
1838  		return ERR_PTR(-ENAMETOOLONG);
1839  
1840  	bh = ext4_lookup_entry(dir, dentry, &de);
1841  	if (IS_ERR(bh))
1842  		return ERR_CAST(bh);
1843  	inode = NULL;
1844  	if (bh) {
1845  		__u32 ino = le32_to_cpu(de->inode);
1846  		brelse(bh);
1847  		if (!ext4_valid_inum(dir->i_sb, ino)) {
1848  			EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1849  			return ERR_PTR(-EFSCORRUPTED);
1850  		}
1851  		if (unlikely(ino == dir->i_ino)) {
1852  			EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
1853  					 dentry);
1854  			return ERR_PTR(-EFSCORRUPTED);
1855  		}
1856  		inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
1857  		if (inode == ERR_PTR(-ESTALE)) {
1858  			EXT4_ERROR_INODE(dir,
1859  					 "deleted inode referenced: %u",
1860  					 ino);
1861  			return ERR_PTR(-EFSCORRUPTED);
1862  		}
1863  		if (!IS_ERR(inode) && IS_ENCRYPTED(dir) &&
1864  		    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
1865  		    !fscrypt_has_permitted_context(dir, inode)) {
1866  			ext4_warning(inode->i_sb,
1867  				     "Inconsistent encryption contexts: %lu/%lu",
1868  				     dir->i_ino, inode->i_ino);
1869  			iput(inode);
1870  			return ERR_PTR(-EPERM);
1871  		}
1872  	}
1873  
1874  #if IS_ENABLED(CONFIG_UNICODE)
1875  	if (!inode && IS_CASEFOLDED(dir)) {
1876  		/* Eventually we want to call d_add_ci(dentry, NULL)
1877  		 * for negative dentries in the encoding case as
1878  		 * well.  For now, prevent the negative dentry
1879  		 * from being cached.
1880  		 */
1881  		return NULL;
1882  	}
1883  #endif
1884  	return d_splice_alias(inode, dentry);
1885  }
1886  
1887  
1888  struct dentry *ext4_get_parent(struct dentry *child)
1889  {
1890  	__u32 ino;
1891  	struct ext4_dir_entry_2 * de;
1892  	struct buffer_head *bh;
1893  
1894  	bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL);
1895  	if (IS_ERR(bh))
1896  		return ERR_CAST(bh);
1897  	if (!bh)
1898  		return ERR_PTR(-ENOENT);
1899  	ino = le32_to_cpu(de->inode);
1900  	brelse(bh);
1901  
1902  	if (!ext4_valid_inum(child->d_sb, ino)) {
1903  		EXT4_ERROR_INODE(d_inode(child),
1904  				 "bad parent inode number: %u", ino);
1905  		return ERR_PTR(-EFSCORRUPTED);
1906  	}
1907  
1908  	return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
1909  }
1910  
1911  /*
1912   * Move count entries from end of map between two memory locations.
1913   * Returns pointer to last entry moved.
1914   */
1915  static struct ext4_dir_entry_2 *
1916  dx_move_dirents(struct inode *dir, char *from, char *to,
1917  		struct dx_map_entry *map, int count,
1918  		unsigned blocksize)
1919  {
1920  	unsigned rec_len = 0;
1921  
1922  	while (count--) {
1923  		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1924  						(from + (map->offs<<2));
1925  		rec_len = ext4_dir_rec_len(de->name_len, dir);
1926  
1927  		memcpy (to, de, rec_len);
1928  		((struct ext4_dir_entry_2 *) to)->rec_len =
1929  				ext4_rec_len_to_disk(rec_len, blocksize);
1930  
1931  		/* wipe dir_entry excluding the rec_len field */
1932  		de->inode = 0;
1933  		memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len,
1934  								blocksize) -
1935  					 offsetof(struct ext4_dir_entry_2,
1936  								name_len));
1937  
1938  		map++;
1939  		to += rec_len;
1940  	}
1941  	return (struct ext4_dir_entry_2 *) (to - rec_len);
1942  }
1943  
1944  /*
1945   * Compact each dir entry in the range to the minimal rec_len.
1946   * Returns pointer to last entry in range.
1947   */
1948  static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
1949  							unsigned int blocksize)
1950  {
1951  	struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1952  	unsigned rec_len = 0;
1953  
1954  	prev = to = de;
1955  	while ((char*)de < base + blocksize) {
1956  		next = ext4_next_entry(de, blocksize);
1957  		if (de->inode && de->name_len) {
1958  			rec_len = ext4_dir_rec_len(de->name_len, dir);
1959  			if (de > to)
1960  				memmove(to, de, rec_len);
1961  			to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1962  			prev = to;
1963  			to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1964  		}
1965  		de = next;
1966  	}
1967  	return prev;
1968  }
1969  
1970  /*
1971   * Split a full leaf block to make room for a new dir entry.
1972   * Allocate a new block, and move entries so that they are approx. equally full.
1973   * Returns pointer to de in block into which the new entry will be inserted.
1974   */
1975  static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1976  			struct buffer_head **bh,struct dx_frame *frame,
1977  			struct dx_hash_info *hinfo)
1978  {
1979  	unsigned blocksize = dir->i_sb->s_blocksize;
1980  	unsigned continued;
1981  	int count;
1982  	struct buffer_head *bh2;
1983  	ext4_lblk_t newblock;
1984  	u32 hash2;
1985  	struct dx_map_entry *map;
1986  	char *data1 = (*bh)->b_data, *data2;
1987  	unsigned split, move, size;
1988  	struct ext4_dir_entry_2 *de = NULL, *de2;
1989  	int	csum_size = 0;
1990  	int	err = 0, i;
1991  
1992  	if (ext4_has_metadata_csum(dir->i_sb))
1993  		csum_size = sizeof(struct ext4_dir_entry_tail);
1994  
1995  	bh2 = ext4_append(handle, dir, &newblock);
1996  	if (IS_ERR(bh2)) {
1997  		brelse(*bh);
1998  		*bh = NULL;
1999  		return (struct ext4_dir_entry_2 *) bh2;
2000  	}
2001  
2002  	BUFFER_TRACE(*bh, "get_write_access");
2003  	err = ext4_journal_get_write_access(handle, dir->i_sb, *bh,
2004  					    EXT4_JTR_NONE);
2005  	if (err)
2006  		goto journal_error;
2007  
2008  	BUFFER_TRACE(frame->bh, "get_write_access");
2009  	err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh,
2010  					    EXT4_JTR_NONE);
2011  	if (err)
2012  		goto journal_error;
2013  
2014  	data2 = bh2->b_data;
2015  
2016  	/* create map in the end of data2 block */
2017  	map = (struct dx_map_entry *) (data2 + blocksize);
2018  	count = dx_make_map(dir, *bh, hinfo, map);
2019  	if (count < 0) {
2020  		err = count;
2021  		goto journal_error;
2022  	}
2023  	map -= count;
2024  	dx_sort_map(map, count);
2025  	/* Ensure that neither split block is over half full */
2026  	size = 0;
2027  	move = 0;
2028  	for (i = count-1; i >= 0; i--) {
2029  		/* is more than half of this entry in 2nd half of the block? */
2030  		if (size + map[i].size/2 > blocksize/2)
2031  			break;
2032  		size += map[i].size;
2033  		move++;
2034  	}
2035  	/*
2036  	 * map index at which we will split
2037  	 *
2038  	 * If the sum of active entries didn't exceed half the block size, just
2039  	 * split it in half by count; each resulting block will have at least
2040  	 * half the space free.
2041  	 */
2042  	if (i > 0)
2043  		split = count - move;
2044  	else
2045  		split = count/2;
2046  
2047  	hash2 = map[split].hash;
2048  	continued = hash2 == map[split - 1].hash;
2049  	dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
2050  			(unsigned long)dx_get_block(frame->at),
2051  					hash2, split, count-split));
2052  
2053  	/* Fancy dance to stay within two buffers */
2054  	de2 = dx_move_dirents(dir, data1, data2, map + split, count - split,
2055  			      blocksize);
2056  	de = dx_pack_dirents(dir, data1, blocksize);
2057  	de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
2058  					   (char *) de,
2059  					   blocksize);
2060  	de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2061  					    (char *) de2,
2062  					    blocksize);
2063  	if (csum_size) {
2064  		ext4_initialize_dirent_tail(*bh, blocksize);
2065  		ext4_initialize_dirent_tail(bh2, blocksize);
2066  	}
2067  
2068  	dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
2069  			blocksize, 1));
2070  	dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
2071  			blocksize, 1));
2072  
2073  	/* Which block gets the new entry? */
2074  	if (hinfo->hash >= hash2) {
2075  		swap(*bh, bh2);
2076  		de = de2;
2077  	}
2078  	dx_insert_block(frame, hash2 + continued, newblock);
2079  	err = ext4_handle_dirty_dirblock(handle, dir, bh2);
2080  	if (err)
2081  		goto journal_error;
2082  	err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2083  	if (err)
2084  		goto journal_error;
2085  	brelse(bh2);
2086  	dxtrace(dx_show_index("frame", frame->entries));
2087  	return de;
2088  
2089  journal_error:
2090  	brelse(*bh);
2091  	brelse(bh2);
2092  	*bh = NULL;
2093  	ext4_std_error(dir->i_sb, err);
2094  	return ERR_PTR(err);
2095  }
2096  
2097  int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2098  		      struct buffer_head *bh,
2099  		      void *buf, int buf_size,
2100  		      struct ext4_filename *fname,
2101  		      struct ext4_dir_entry_2 **dest_de)
2102  {
2103  	struct ext4_dir_entry_2 *de;
2104  	unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
2105  	int nlen, rlen;
2106  	unsigned int offset = 0;
2107  	char *top;
2108  
2109  	de = buf;
2110  	top = buf + buf_size - reclen;
2111  	while ((char *) de <= top) {
2112  		if (ext4_check_dir_entry(dir, NULL, de, bh,
2113  					 buf, buf_size, offset))
2114  			return -EFSCORRUPTED;
2115  		if (ext4_match(dir, fname, de))
2116  			return -EEXIST;
2117  		nlen = ext4_dir_rec_len(de->name_len, dir);
2118  		rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2119  		if ((de->inode ? rlen - nlen : rlen) >= reclen)
2120  			break;
2121  		de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
2122  		offset += rlen;
2123  	}
2124  	if ((char *) de > top)
2125  		return -ENOSPC;
2126  
2127  	*dest_de = de;
2128  	return 0;
2129  }
2130  
2131  void ext4_insert_dentry(struct inode *dir,
2132  			struct inode *inode,
2133  			struct ext4_dir_entry_2 *de,
2134  			int buf_size,
2135  			struct ext4_filename *fname)
2136  {
2137  
2138  	int nlen, rlen;
2139  
2140  	nlen = ext4_dir_rec_len(de->name_len, dir);
2141  	rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2142  	if (de->inode) {
2143  		struct ext4_dir_entry_2 *de1 =
2144  			(struct ext4_dir_entry_2 *)((char *)de + nlen);
2145  		de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
2146  		de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
2147  		de = de1;
2148  	}
2149  	de->file_type = EXT4_FT_UNKNOWN;
2150  	de->inode = cpu_to_le32(inode->i_ino);
2151  	ext4_set_de_type(inode->i_sb, de, inode->i_mode);
2152  	de->name_len = fname_len(fname);
2153  	memcpy(de->name, fname_name(fname), fname_len(fname));
2154  	if (ext4_hash_in_dirent(dir)) {
2155  		struct dx_hash_info *hinfo = &fname->hinfo;
2156  
2157  		EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
2158  		EXT4_DIRENT_HASHES(de)->minor_hash =
2159  						cpu_to_le32(hinfo->minor_hash);
2160  	}
2161  }
2162  
2163  /*
2164   * Add a new entry into a directory (leaf) block.  If de is non-NULL,
2165   * it points to a directory entry which is guaranteed to be large
2166   * enough for new directory entry.  If de is NULL, then
2167   * add_dirent_to_buf will attempt search the directory block for
2168   * space.  It will return -ENOSPC if no space is available, and -EIO
2169   * and -EEXIST if directory entry already exists.
2170   */
2171  static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
2172  			     struct inode *dir,
2173  			     struct inode *inode, struct ext4_dir_entry_2 *de,
2174  			     struct buffer_head *bh)
2175  {
2176  	unsigned int	blocksize = dir->i_sb->s_blocksize;
2177  	int		csum_size = 0;
2178  	int		err, err2;
2179  
2180  	if (ext4_has_metadata_csum(inode->i_sb))
2181  		csum_size = sizeof(struct ext4_dir_entry_tail);
2182  
2183  	if (!de) {
2184  		err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
2185  					blocksize - csum_size, fname, &de);
2186  		if (err)
2187  			return err;
2188  	}
2189  	BUFFER_TRACE(bh, "get_write_access");
2190  	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2191  					    EXT4_JTR_NONE);
2192  	if (err) {
2193  		ext4_std_error(dir->i_sb, err);
2194  		return err;
2195  	}
2196  
2197  	/* By now the buffer is marked for journaling */
2198  	ext4_insert_dentry(dir, inode, de, blocksize, fname);
2199  
2200  	/*
2201  	 * XXX shouldn't update any times until successful
2202  	 * completion of syscall, but too many callers depend
2203  	 * on this.
2204  	 *
2205  	 * XXX similarly, too many callers depend on
2206  	 * ext4_new_inode() setting the times, but error
2207  	 * recovery deletes the inode, so the worst that can
2208  	 * happen is that the times are slightly out of date
2209  	 * and/or different from the directory change time.
2210  	 */
2211  	dir->i_mtime = inode_set_ctime_current(dir);
2212  	ext4_update_dx_flag(dir);
2213  	inode_inc_iversion(dir);
2214  	err2 = ext4_mark_inode_dirty(handle, dir);
2215  	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2216  	err = ext4_handle_dirty_dirblock(handle, dir, bh);
2217  	if (err)
2218  		ext4_std_error(dir->i_sb, err);
2219  	return err ? err : err2;
2220  }
2221  
2222  static bool ext4_check_dx_root(struct inode *dir, struct dx_root *root)
2223  {
2224  	struct fake_dirent *fde;
2225  	const char *error_msg;
2226  	unsigned int rlen;
2227  	unsigned int blocksize = dir->i_sb->s_blocksize;
2228  	char *blockend = (char *)root + dir->i_sb->s_blocksize;
2229  
2230  	fde = &root->dot;
2231  	if (unlikely(fde->name_len != 1)) {
2232  		error_msg = "invalid name_len for '.'";
2233  		goto corrupted;
2234  	}
2235  	if (unlikely(strncmp(root->dot_name, ".", fde->name_len))) {
2236  		error_msg = "invalid name for '.'";
2237  		goto corrupted;
2238  	}
2239  	rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize);
2240  	if (unlikely((char *)fde + rlen >= blockend)) {
2241  		error_msg = "invalid rec_len for '.'";
2242  		goto corrupted;
2243  	}
2244  
2245  	fde = &root->dotdot;
2246  	if (unlikely(fde->name_len != 2)) {
2247  		error_msg = "invalid name_len for '..'";
2248  		goto corrupted;
2249  	}
2250  	if (unlikely(strncmp(root->dotdot_name, "..", fde->name_len))) {
2251  		error_msg = "invalid name for '..'";
2252  		goto corrupted;
2253  	}
2254  	rlen = ext4_rec_len_from_disk(fde->rec_len, blocksize);
2255  	if (unlikely((char *)fde + rlen >= blockend)) {
2256  		error_msg = "invalid rec_len for '..'";
2257  		goto corrupted;
2258  	}
2259  
2260  	return true;
2261  
2262  corrupted:
2263  	EXT4_ERROR_INODE(dir, "Corrupt dir, %s, running e2fsck is recommended",
2264  			 error_msg);
2265  	return false;
2266  }
2267  
2268  /*
2269   * This converts a one block unindexed directory to a 3 block indexed
2270   * directory, and adds the dentry to the indexed directory.
2271   */
2272  static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
2273  			    struct inode *dir,
2274  			    struct inode *inode, struct buffer_head *bh)
2275  {
2276  	struct buffer_head *bh2;
2277  	struct dx_root	*root;
2278  	struct dx_frame	frames[EXT4_HTREE_LEVEL], *frame;
2279  	struct dx_entry *entries;
2280  	struct ext4_dir_entry_2	*de, *de2;
2281  	char		*data2, *top;
2282  	unsigned	len;
2283  	int		retval;
2284  	unsigned	blocksize;
2285  	ext4_lblk_t  block;
2286  	struct fake_dirent *fde;
2287  	int csum_size = 0;
2288  
2289  	if (ext4_has_metadata_csum(inode->i_sb))
2290  		csum_size = sizeof(struct ext4_dir_entry_tail);
2291  
2292  	blocksize =  dir->i_sb->s_blocksize;
2293  	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
2294  	BUFFER_TRACE(bh, "get_write_access");
2295  	retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2296  					       EXT4_JTR_NONE);
2297  	if (retval) {
2298  		ext4_std_error(dir->i_sb, retval);
2299  		brelse(bh);
2300  		return retval;
2301  	}
2302  
2303  	root = (struct dx_root *) bh->b_data;
2304  	if (!ext4_check_dx_root(dir, root)) {
2305  		brelse(bh);
2306  		return -EFSCORRUPTED;
2307  	}
2308  
2309  	/* The 0th block becomes the root, move the dirents out */
2310  	fde = &root->dotdot;
2311  	de = (struct ext4_dir_entry_2 *)((char *)fde +
2312  		ext4_rec_len_from_disk(fde->rec_len, blocksize));
2313  	len = ((char *) root) + (blocksize - csum_size) - (char *) de;
2314  
2315  	/* Allocate new block for the 0th block's dirents */
2316  	bh2 = ext4_append(handle, dir, &block);
2317  	if (IS_ERR(bh2)) {
2318  		brelse(bh);
2319  		return PTR_ERR(bh2);
2320  	}
2321  	ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
2322  	data2 = bh2->b_data;
2323  
2324  	memcpy(data2, de, len);
2325  	memset(de, 0, len); /* wipe old data */
2326  	de = (struct ext4_dir_entry_2 *) data2;
2327  	top = data2 + len;
2328  	while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
2329  		if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
2330  					 (data2 + (blocksize - csum_size) -
2331  					  (char *) de))) {
2332  			brelse(bh2);
2333  			brelse(bh);
2334  			return -EFSCORRUPTED;
2335  		}
2336  		de = de2;
2337  	}
2338  	de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2339  					   (char *) de, blocksize);
2340  
2341  	if (csum_size)
2342  		ext4_initialize_dirent_tail(bh2, blocksize);
2343  
2344  	/* Initialize the root; the dot dirents already exist */
2345  	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
2346  	de->rec_len = ext4_rec_len_to_disk(
2347  			blocksize - ext4_dir_rec_len(2, NULL), blocksize);
2348  	memset (&root->info, 0, sizeof(root->info));
2349  	root->info.info_length = sizeof(root->info);
2350  	if (ext4_hash_in_dirent(dir))
2351  		root->info.hash_version = DX_HASH_SIPHASH;
2352  	else
2353  		root->info.hash_version =
2354  				EXT4_SB(dir->i_sb)->s_def_hash_version;
2355  
2356  	entries = root->entries;
2357  	dx_set_block(entries, 1);
2358  	dx_set_count(entries, 1);
2359  	dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
2360  
2361  	/* Initialize as for dx_probe */
2362  	fname->hinfo.hash_version = root->info.hash_version;
2363  	if (fname->hinfo.hash_version <= DX_HASH_TEA)
2364  		fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
2365  	fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2366  
2367  	/* casefolded encrypted hashes are computed on fname setup */
2368  	if (!ext4_hash_in_dirent(dir)) {
2369  		int err = ext4fs_dirhash(dir, fname_name(fname),
2370  					 fname_len(fname), &fname->hinfo);
2371  		if (err < 0) {
2372  			brelse(bh2);
2373  			brelse(bh);
2374  			return err;
2375  		}
2376  	}
2377  	memset(frames, 0, sizeof(frames));
2378  	frame = frames;
2379  	frame->entries = entries;
2380  	frame->at = entries;
2381  	frame->bh = bh;
2382  
2383  	retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2384  	if (retval)
2385  		goto out_frames;
2386  	retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
2387  	if (retval)
2388  		goto out_frames;
2389  
2390  	de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
2391  	if (IS_ERR(de)) {
2392  		retval = PTR_ERR(de);
2393  		goto out_frames;
2394  	}
2395  
2396  	retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
2397  out_frames:
2398  	/*
2399  	 * Even if the block split failed, we have to properly write
2400  	 * out all the changes we did so far. Otherwise we can end up
2401  	 * with corrupted filesystem.
2402  	 */
2403  	if (retval)
2404  		ext4_mark_inode_dirty(handle, dir);
2405  	dx_release(frames);
2406  	brelse(bh2);
2407  	return retval;
2408  }
2409  
2410  /*
2411   *	ext4_add_entry()
2412   *
2413   * adds a file entry to the specified directory, using the same
2414   * semantics as ext4_find_entry(). It returns NULL if it failed.
2415   *
2416   * NOTE!! The inode part of 'de' is left at 0 - which means you
2417   * may not sleep between calling this and putting something into
2418   * the entry, as someone else might have used it while you slept.
2419   */
2420  static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2421  			  struct inode *inode)
2422  {
2423  	struct inode *dir = d_inode(dentry->d_parent);
2424  	struct buffer_head *bh = NULL;
2425  	struct ext4_dir_entry_2 *de;
2426  	struct super_block *sb;
2427  	struct ext4_filename fname;
2428  	int	retval;
2429  	int	dx_fallback=0;
2430  	unsigned blocksize;
2431  	ext4_lblk_t block, blocks;
2432  	int	csum_size = 0;
2433  
2434  	if (ext4_has_metadata_csum(inode->i_sb))
2435  		csum_size = sizeof(struct ext4_dir_entry_tail);
2436  
2437  	sb = dir->i_sb;
2438  	blocksize = sb->s_blocksize;
2439  	if (!dentry->d_name.len)
2440  		return -EINVAL;
2441  
2442  	if (fscrypt_is_nokey_name(dentry))
2443  		return -ENOKEY;
2444  
2445  #if IS_ENABLED(CONFIG_UNICODE)
2446  	if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
2447  	    utf8_validate(sb->s_encoding, &dentry->d_name))
2448  		return -EINVAL;
2449  #endif
2450  
2451  	retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
2452  	if (retval)
2453  		return retval;
2454  
2455  	if (ext4_has_inline_data(dir)) {
2456  		retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
2457  		if (retval < 0)
2458  			goto out;
2459  		if (retval == 1) {
2460  			retval = 0;
2461  			goto out;
2462  		}
2463  	}
2464  
2465  	if (is_dx(dir)) {
2466  		retval = ext4_dx_add_entry(handle, &fname, dir, inode);
2467  		if (!retval || (retval != ERR_BAD_DX_DIR))
2468  			goto out;
2469  		/* Can we just ignore htree data? */
2470  		if (ext4_has_metadata_csum(sb)) {
2471  			EXT4_ERROR_INODE(dir,
2472  				"Directory has corrupted htree index.");
2473  			retval = -EFSCORRUPTED;
2474  			goto out;
2475  		}
2476  		ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
2477  		dx_fallback++;
2478  		retval = ext4_mark_inode_dirty(handle, dir);
2479  		if (unlikely(retval))
2480  			goto out;
2481  	}
2482  	blocks = dir->i_size >> sb->s_blocksize_bits;
2483  	for (block = 0; block < blocks; block++) {
2484  		bh = ext4_read_dirblock(dir, block, DIRENT);
2485  		if (bh == NULL) {
2486  			bh = ext4_bread(handle, dir, block,
2487  					EXT4_GET_BLOCKS_CREATE);
2488  			goto add_to_new_block;
2489  		}
2490  		if (IS_ERR(bh)) {
2491  			retval = PTR_ERR(bh);
2492  			bh = NULL;
2493  			goto out;
2494  		}
2495  		retval = add_dirent_to_buf(handle, &fname, dir, inode,
2496  					   NULL, bh);
2497  		if (retval != -ENOSPC)
2498  			goto out;
2499  
2500  		if (blocks == 1 && !dx_fallback &&
2501  		    ext4_has_feature_dir_index(sb)) {
2502  			retval = make_indexed_dir(handle, &fname, dir,
2503  						  inode, bh);
2504  			bh = NULL; /* make_indexed_dir releases bh */
2505  			goto out;
2506  		}
2507  		brelse(bh);
2508  	}
2509  	bh = ext4_append(handle, dir, &block);
2510  add_to_new_block:
2511  	if (IS_ERR(bh)) {
2512  		retval = PTR_ERR(bh);
2513  		bh = NULL;
2514  		goto out;
2515  	}
2516  	de = (struct ext4_dir_entry_2 *) bh->b_data;
2517  	de->inode = 0;
2518  	de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
2519  
2520  	if (csum_size)
2521  		ext4_initialize_dirent_tail(bh, blocksize);
2522  
2523  	retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
2524  out:
2525  	ext4_fname_free_filename(&fname);
2526  	brelse(bh);
2527  	if (retval == 0)
2528  		ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
2529  	return retval;
2530  }
2531  
2532  /*
2533   * Returns 0 for success, or a negative error value
2534   */
2535  static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2536  			     struct inode *dir, struct inode *inode)
2537  {
2538  	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2539  	struct dx_entry *entries, *at;
2540  	struct buffer_head *bh;
2541  	struct super_block *sb = dir->i_sb;
2542  	struct ext4_dir_entry_2 *de;
2543  	int restart;
2544  	int err;
2545  
2546  again:
2547  	restart = 0;
2548  	frame = dx_probe(fname, dir, NULL, frames);
2549  	if (IS_ERR(frame))
2550  		return PTR_ERR(frame);
2551  	entries = frame->entries;
2552  	at = frame->at;
2553  	bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
2554  	if (IS_ERR(bh)) {
2555  		err = PTR_ERR(bh);
2556  		bh = NULL;
2557  		goto cleanup;
2558  	}
2559  
2560  	BUFFER_TRACE(bh, "get_write_access");
2561  	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
2562  	if (err)
2563  		goto journal_error;
2564  
2565  	err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
2566  	if (err != -ENOSPC)
2567  		goto cleanup;
2568  
2569  	err = 0;
2570  	/* Block full, should compress but for now just split */
2571  	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
2572  		       dx_get_count(entries), dx_get_limit(entries)));
2573  	/* Need to split index? */
2574  	if (dx_get_count(entries) == dx_get_limit(entries)) {
2575  		ext4_lblk_t newblock;
2576  		int levels = frame - frames + 1;
2577  		unsigned int icount;
2578  		int add_level = 1;
2579  		struct dx_entry *entries2;
2580  		struct dx_node *node2;
2581  		struct buffer_head *bh2;
2582  
2583  		while (frame > frames) {
2584  			if (dx_get_count((frame - 1)->entries) <
2585  			    dx_get_limit((frame - 1)->entries)) {
2586  				add_level = 0;
2587  				break;
2588  			}
2589  			frame--; /* split higher index block */
2590  			at = frame->at;
2591  			entries = frame->entries;
2592  			restart = 1;
2593  		}
2594  		if (add_level && levels == ext4_dir_htree_level(sb)) {
2595  			ext4_warning(sb, "Directory (ino: %lu) index full, "
2596  					 "reach max htree level :%d",
2597  					 dir->i_ino, levels);
2598  			if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
2599  				ext4_warning(sb, "Large directory feature is "
2600  						 "not enabled on this "
2601  						 "filesystem");
2602  			}
2603  			err = -ENOSPC;
2604  			goto cleanup;
2605  		}
2606  		icount = dx_get_count(entries);
2607  		bh2 = ext4_append(handle, dir, &newblock);
2608  		if (IS_ERR(bh2)) {
2609  			err = PTR_ERR(bh2);
2610  			goto cleanup;
2611  		}
2612  		node2 = (struct dx_node *)(bh2->b_data);
2613  		entries2 = node2->entries;
2614  		memset(&node2->fake, 0, sizeof(struct fake_dirent));
2615  		node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2616  							   sb->s_blocksize);
2617  		BUFFER_TRACE(frame->bh, "get_write_access");
2618  		err = ext4_journal_get_write_access(handle, sb, frame->bh,
2619  						    EXT4_JTR_NONE);
2620  		if (err)
2621  			goto journal_error;
2622  		if (!add_level) {
2623  			unsigned icount1 = icount/2, icount2 = icount - icount1;
2624  			unsigned hash2 = dx_get_hash(entries + icount1);
2625  			dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2626  				       icount1, icount2));
2627  
2628  			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2629  			err = ext4_journal_get_write_access(handle, sb,
2630  							    (frame - 1)->bh,
2631  							    EXT4_JTR_NONE);
2632  			if (err)
2633  				goto journal_error;
2634  
2635  			memcpy((char *) entries2, (char *) (entries + icount1),
2636  			       icount2 * sizeof(struct dx_entry));
2637  			dx_set_count(entries, icount1);
2638  			dx_set_count(entries2, icount2);
2639  			dx_set_limit(entries2, dx_node_limit(dir));
2640  
2641  			/* Which index block gets the new entry? */
2642  			if (at - entries >= icount1) {
2643  				frame->at = at - entries - icount1 + entries2;
2644  				frame->entries = entries = entries2;
2645  				swap(frame->bh, bh2);
2646  			}
2647  			dx_insert_block((frame - 1), hash2, newblock);
2648  			dxtrace(dx_show_index("node", frame->entries));
2649  			dxtrace(dx_show_index("node",
2650  			       ((struct dx_node *) bh2->b_data)->entries));
2651  			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2652  			if (err)
2653  				goto journal_error;
2654  			brelse (bh2);
2655  			err = ext4_handle_dirty_dx_node(handle, dir,
2656  						   (frame - 1)->bh);
2657  			if (err)
2658  				goto journal_error;
2659  			err = ext4_handle_dirty_dx_node(handle, dir,
2660  							frame->bh);
2661  			if (restart || err)
2662  				goto journal_error;
2663  		} else {
2664  			struct dx_root *dxroot;
2665  			memcpy((char *) entries2, (char *) entries,
2666  			       icount * sizeof(struct dx_entry));
2667  			dx_set_limit(entries2, dx_node_limit(dir));
2668  
2669  			/* Set up root */
2670  			dx_set_count(entries, 1);
2671  			dx_set_block(entries + 0, newblock);
2672  			dxroot = (struct dx_root *)frames[0].bh->b_data;
2673  			dxroot->info.indirect_levels += 1;
2674  			dxtrace(printk(KERN_DEBUG
2675  				       "Creating %d level index...\n",
2676  				       dxroot->info.indirect_levels));
2677  			err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2678  			if (err)
2679  				goto journal_error;
2680  			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2681  			brelse(bh2);
2682  			restart = 1;
2683  			goto journal_error;
2684  		}
2685  	}
2686  	de = do_split(handle, dir, &bh, frame, &fname->hinfo);
2687  	if (IS_ERR(de)) {
2688  		err = PTR_ERR(de);
2689  		goto cleanup;
2690  	}
2691  	err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
2692  	goto cleanup;
2693  
2694  journal_error:
2695  	ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
2696  cleanup:
2697  	brelse(bh);
2698  	dx_release(frames);
2699  	/* @restart is true means htree-path has been changed, we need to
2700  	 * repeat dx_probe() to find out valid htree-path
2701  	 */
2702  	if (restart && err == 0)
2703  		goto again;
2704  	return err;
2705  }
2706  
2707  /*
2708   * ext4_generic_delete_entry deletes a directory entry by merging it
2709   * with the previous entry
2710   */
2711  int ext4_generic_delete_entry(struct inode *dir,
2712  			      struct ext4_dir_entry_2 *de_del,
2713  			      struct buffer_head *bh,
2714  			      void *entry_buf,
2715  			      int buf_size,
2716  			      int csum_size)
2717  {
2718  	struct ext4_dir_entry_2 *de, *pde;
2719  	unsigned int blocksize = dir->i_sb->s_blocksize;
2720  	int i;
2721  
2722  	i = 0;
2723  	pde = NULL;
2724  	de = entry_buf;
2725  	while (i < buf_size - csum_size) {
2726  		if (ext4_check_dir_entry(dir, NULL, de, bh,
2727  					 entry_buf, buf_size, i))
2728  			return -EFSCORRUPTED;
2729  		if (de == de_del)  {
2730  			if (pde) {
2731  				pde->rec_len = ext4_rec_len_to_disk(
2732  					ext4_rec_len_from_disk(pde->rec_len,
2733  							       blocksize) +
2734  					ext4_rec_len_from_disk(de->rec_len,
2735  							       blocksize),
2736  					blocksize);
2737  
2738  				/* wipe entire dir_entry */
2739  				memset(de, 0, ext4_rec_len_from_disk(de->rec_len,
2740  								blocksize));
2741  			} else {
2742  				/* wipe dir_entry excluding the rec_len field */
2743  				de->inode = 0;
2744  				memset(&de->name_len, 0,
2745  					ext4_rec_len_from_disk(de->rec_len,
2746  								blocksize) -
2747  					offsetof(struct ext4_dir_entry_2,
2748  								name_len));
2749  			}
2750  
2751  			inode_inc_iversion(dir);
2752  			return 0;
2753  		}
2754  		i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2755  		pde = de;
2756  		de = ext4_next_entry(de, blocksize);
2757  	}
2758  	return -ENOENT;
2759  }
2760  
2761  static int ext4_delete_entry(handle_t *handle,
2762  			     struct inode *dir,
2763  			     struct ext4_dir_entry_2 *de_del,
2764  			     struct buffer_head *bh)
2765  {
2766  	int err, csum_size = 0;
2767  
2768  	if (ext4_has_inline_data(dir)) {
2769  		int has_inline_data = 1;
2770  		err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2771  					       &has_inline_data);
2772  		if (has_inline_data)
2773  			return err;
2774  	}
2775  
2776  	if (ext4_has_metadata_csum(dir->i_sb))
2777  		csum_size = sizeof(struct ext4_dir_entry_tail);
2778  
2779  	BUFFER_TRACE(bh, "get_write_access");
2780  	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2781  					    EXT4_JTR_NONE);
2782  	if (unlikely(err))
2783  		goto out;
2784  
2785  	err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
2786  					dir->i_sb->s_blocksize, csum_size);
2787  	if (err)
2788  		goto out;
2789  
2790  	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2791  	err = ext4_handle_dirty_dirblock(handle, dir, bh);
2792  	if (unlikely(err))
2793  		goto out;
2794  
2795  	return 0;
2796  out:
2797  	if (err != -ENOENT)
2798  		ext4_std_error(dir->i_sb, err);
2799  	return err;
2800  }
2801  
2802  /*
2803   * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2
2804   * since this indicates that nlinks count was previously 1 to avoid overflowing
2805   * the 16-bit i_links_count field on disk.  Directories with i_nlink == 1 mean
2806   * that subdirectory link counts are not being maintained accurately.
2807   *
2808   * The caller has already checked for i_nlink overflow in case the DIR_LINK
2809   * feature is not enabled and returned -EMLINK.  The is_dx() check is a proxy
2810   * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
2811   * on regular files) and to avoid creating huge/slow non-HTREE directories.
2812   */
2813  static void ext4_inc_count(struct inode *inode)
2814  {
2815  	inc_nlink(inode);
2816  	if (is_dx(inode) &&
2817  	    (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2))
2818  		set_nlink(inode, 1);
2819  }
2820  
2821  /*
2822   * If a directory had nlink == 1, then we should let it be 1. This indicates
2823   * directory has >EXT4_LINK_MAX subdirs.
2824   */
2825  static void ext4_dec_count(struct inode *inode)
2826  {
2827  	if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2828  		drop_nlink(inode);
2829  }
2830  
2831  
2832  /*
2833   * Add non-directory inode to a directory. On success, the inode reference is
2834   * consumed by dentry is instantiation. This is also indicated by clearing of
2835   * *inodep pointer. On failure, the caller is responsible for dropping the
2836   * inode reference in the safe context.
2837   */
2838  static int ext4_add_nondir(handle_t *handle,
2839  		struct dentry *dentry, struct inode **inodep)
2840  {
2841  	struct inode *dir = d_inode(dentry->d_parent);
2842  	struct inode *inode = *inodep;
2843  	int err = ext4_add_entry(handle, dentry, inode);
2844  	if (!err) {
2845  		err = ext4_mark_inode_dirty(handle, inode);
2846  		if (IS_DIRSYNC(dir))
2847  			ext4_handle_sync(handle);
2848  		d_instantiate_new(dentry, inode);
2849  		*inodep = NULL;
2850  		return err;
2851  	}
2852  	drop_nlink(inode);
2853  	ext4_mark_inode_dirty(handle, inode);
2854  	ext4_orphan_add(handle, inode);
2855  	unlock_new_inode(inode);
2856  	return err;
2857  }
2858  
2859  /*
2860   * By the time this is called, we already have created
2861   * the directory cache entry for the new file, but it
2862   * is so far negative - it has no inode.
2863   *
2864   * If the create succeeds, we fill in the inode information
2865   * with d_instantiate().
2866   */
2867  static int ext4_create(struct mnt_idmap *idmap, struct inode *dir,
2868  		       struct dentry *dentry, umode_t mode, bool excl)
2869  {
2870  	handle_t *handle;
2871  	struct inode *inode;
2872  	int err, credits, retries = 0;
2873  
2874  	err = dquot_initialize(dir);
2875  	if (err)
2876  		return err;
2877  
2878  	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2879  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2880  retry:
2881  	inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name,
2882  					    0, NULL, EXT4_HT_DIR, credits);
2883  	handle = ext4_journal_current_handle();
2884  	err = PTR_ERR(inode);
2885  	if (!IS_ERR(inode)) {
2886  		inode->i_op = &ext4_file_inode_operations;
2887  		inode->i_fop = &ext4_file_operations;
2888  		ext4_set_aops(inode);
2889  		err = ext4_add_nondir(handle, dentry, &inode);
2890  		if (!err)
2891  			ext4_fc_track_create(handle, dentry);
2892  	}
2893  	if (handle)
2894  		ext4_journal_stop(handle);
2895  	if (!IS_ERR_OR_NULL(inode))
2896  		iput(inode);
2897  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2898  		goto retry;
2899  	return err;
2900  }
2901  
2902  static int ext4_mknod(struct mnt_idmap *idmap, struct inode *dir,
2903  		      struct dentry *dentry, umode_t mode, dev_t rdev)
2904  {
2905  	handle_t *handle;
2906  	struct inode *inode;
2907  	int err, credits, retries = 0;
2908  
2909  	err = dquot_initialize(dir);
2910  	if (err)
2911  		return err;
2912  
2913  	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2914  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2915  retry:
2916  	inode = ext4_new_inode_start_handle(idmap, dir, mode, &dentry->d_name,
2917  					    0, NULL, EXT4_HT_DIR, credits);
2918  	handle = ext4_journal_current_handle();
2919  	err = PTR_ERR(inode);
2920  	if (!IS_ERR(inode)) {
2921  		init_special_inode(inode, inode->i_mode, rdev);
2922  		inode->i_op = &ext4_special_inode_operations;
2923  		err = ext4_add_nondir(handle, dentry, &inode);
2924  		if (!err)
2925  			ext4_fc_track_create(handle, dentry);
2926  	}
2927  	if (handle)
2928  		ext4_journal_stop(handle);
2929  	if (!IS_ERR_OR_NULL(inode))
2930  		iput(inode);
2931  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2932  		goto retry;
2933  	return err;
2934  }
2935  
2936  static int ext4_tmpfile(struct mnt_idmap *idmap, struct inode *dir,
2937  			struct file *file, umode_t mode)
2938  {
2939  	handle_t *handle;
2940  	struct inode *inode;
2941  	int err, retries = 0;
2942  
2943  	err = dquot_initialize(dir);
2944  	if (err)
2945  		return err;
2946  
2947  retry:
2948  	inode = ext4_new_inode_start_handle(idmap, dir, mode,
2949  					    NULL, 0, NULL,
2950  					    EXT4_HT_DIR,
2951  			EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) +
2952  			  4 + EXT4_XATTR_TRANS_BLOCKS);
2953  	handle = ext4_journal_current_handle();
2954  	err = PTR_ERR(inode);
2955  	if (!IS_ERR(inode)) {
2956  		inode->i_op = &ext4_file_inode_operations;
2957  		inode->i_fop = &ext4_file_operations;
2958  		ext4_set_aops(inode);
2959  		d_tmpfile(file, inode);
2960  		err = ext4_orphan_add(handle, inode);
2961  		if (err)
2962  			goto err_unlock_inode;
2963  		mark_inode_dirty(inode);
2964  		unlock_new_inode(inode);
2965  	}
2966  	if (handle)
2967  		ext4_journal_stop(handle);
2968  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2969  		goto retry;
2970  	return finish_open_simple(file, err);
2971  err_unlock_inode:
2972  	ext4_journal_stop(handle);
2973  	unlock_new_inode(inode);
2974  	return err;
2975  }
2976  
2977  struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2978  			  struct ext4_dir_entry_2 *de,
2979  			  int blocksize, int csum_size,
2980  			  unsigned int parent_ino, int dotdot_real_len)
2981  {
2982  	de->inode = cpu_to_le32(inode->i_ino);
2983  	de->name_len = 1;
2984  	de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
2985  					   blocksize);
2986  	strcpy(de->name, ".");
2987  	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2988  
2989  	de = ext4_next_entry(de, blocksize);
2990  	de->inode = cpu_to_le32(parent_ino);
2991  	de->name_len = 2;
2992  	if (!dotdot_real_len)
2993  		de->rec_len = ext4_rec_len_to_disk(blocksize -
2994  					(csum_size + ext4_dir_rec_len(1, NULL)),
2995  					blocksize);
2996  	else
2997  		de->rec_len = ext4_rec_len_to_disk(
2998  					ext4_dir_rec_len(de->name_len, NULL),
2999  					blocksize);
3000  	strcpy(de->name, "..");
3001  	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
3002  
3003  	return ext4_next_entry(de, blocksize);
3004  }
3005  
3006  int ext4_init_new_dir(handle_t *handle, struct inode *dir,
3007  			     struct inode *inode)
3008  {
3009  	struct buffer_head *dir_block = NULL;
3010  	struct ext4_dir_entry_2 *de;
3011  	ext4_lblk_t block = 0;
3012  	unsigned int blocksize = dir->i_sb->s_blocksize;
3013  	int csum_size = 0;
3014  	int err;
3015  
3016  	if (ext4_has_metadata_csum(dir->i_sb))
3017  		csum_size = sizeof(struct ext4_dir_entry_tail);
3018  
3019  	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
3020  		err = ext4_try_create_inline_dir(handle, dir, inode);
3021  		if (err < 0 && err != -ENOSPC)
3022  			goto out;
3023  		if (!err)
3024  			goto out;
3025  	}
3026  
3027  	inode->i_size = 0;
3028  	dir_block = ext4_append(handle, inode, &block);
3029  	if (IS_ERR(dir_block))
3030  		return PTR_ERR(dir_block);
3031  	de = (struct ext4_dir_entry_2 *)dir_block->b_data;
3032  	ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
3033  	set_nlink(inode, 2);
3034  	if (csum_size)
3035  		ext4_initialize_dirent_tail(dir_block, blocksize);
3036  
3037  	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
3038  	err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
3039  	if (err)
3040  		goto out;
3041  	set_buffer_verified(dir_block);
3042  out:
3043  	brelse(dir_block);
3044  	return err;
3045  }
3046  
3047  static int ext4_mkdir(struct mnt_idmap *idmap, struct inode *dir,
3048  		      struct dentry *dentry, umode_t mode)
3049  {
3050  	handle_t *handle;
3051  	struct inode *inode;
3052  	int err, err2 = 0, credits, retries = 0;
3053  
3054  	if (EXT4_DIR_LINK_MAX(dir))
3055  		return -EMLINK;
3056  
3057  	err = dquot_initialize(dir);
3058  	if (err)
3059  		return err;
3060  
3061  	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3062  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
3063  retry:
3064  	inode = ext4_new_inode_start_handle(idmap, dir, S_IFDIR | mode,
3065  					    &dentry->d_name,
3066  					    0, NULL, EXT4_HT_DIR, credits);
3067  	handle = ext4_journal_current_handle();
3068  	err = PTR_ERR(inode);
3069  	if (IS_ERR(inode))
3070  		goto out_stop;
3071  
3072  	inode->i_op = &ext4_dir_inode_operations;
3073  	inode->i_fop = &ext4_dir_operations;
3074  	err = ext4_init_new_dir(handle, dir, inode);
3075  	if (err)
3076  		goto out_clear_inode;
3077  	err = ext4_mark_inode_dirty(handle, inode);
3078  	if (!err)
3079  		err = ext4_add_entry(handle, dentry, inode);
3080  	if (err) {
3081  out_clear_inode:
3082  		clear_nlink(inode);
3083  		ext4_orphan_add(handle, inode);
3084  		unlock_new_inode(inode);
3085  		err2 = ext4_mark_inode_dirty(handle, inode);
3086  		if (unlikely(err2))
3087  			err = err2;
3088  		ext4_journal_stop(handle);
3089  		iput(inode);
3090  		goto out_retry;
3091  	}
3092  	ext4_inc_count(dir);
3093  
3094  	ext4_update_dx_flag(dir);
3095  	err = ext4_mark_inode_dirty(handle, dir);
3096  	if (err)
3097  		goto out_clear_inode;
3098  	d_instantiate_new(dentry, inode);
3099  	ext4_fc_track_create(handle, dentry);
3100  	if (IS_DIRSYNC(dir))
3101  		ext4_handle_sync(handle);
3102  
3103  out_stop:
3104  	if (handle)
3105  		ext4_journal_stop(handle);
3106  out_retry:
3107  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3108  		goto retry;
3109  	return err;
3110  }
3111  
3112  /*
3113   * routine to check that the specified directory is empty (for rmdir)
3114   */
3115  bool ext4_empty_dir(struct inode *inode)
3116  {
3117  	unsigned int offset;
3118  	struct buffer_head *bh;
3119  	struct ext4_dir_entry_2 *de;
3120  	struct super_block *sb;
3121  
3122  	if (ext4_has_inline_data(inode)) {
3123  		int has_inline_data = 1;
3124  		int ret;
3125  
3126  		ret = empty_inline_dir(inode, &has_inline_data);
3127  		if (has_inline_data)
3128  			return ret;
3129  	}
3130  
3131  	sb = inode->i_sb;
3132  	if (inode->i_size < ext4_dir_rec_len(1, NULL) +
3133  					ext4_dir_rec_len(2, NULL)) {
3134  		EXT4_ERROR_INODE(inode, "invalid size");
3135  		return false;
3136  	}
3137  	bh = ext4_read_dirblock(inode, 0, EITHER);
3138  	if (IS_ERR(bh))
3139  		return false;
3140  
3141  	de = (struct ext4_dir_entry_2 *) bh->b_data;
3142  	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3143  				 0) ||
3144  	    le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
3145  		ext4_warning_inode(inode, "directory missing '.'");
3146  		brelse(bh);
3147  		return false;
3148  	}
3149  	offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3150  	de = ext4_next_entry(de, sb->s_blocksize);
3151  	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3152  				 offset) ||
3153  	    le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3154  		ext4_warning_inode(inode, "directory missing '..'");
3155  		brelse(bh);
3156  		return false;
3157  	}
3158  	offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3159  	while (offset < inode->i_size) {
3160  		if (!(offset & (sb->s_blocksize - 1))) {
3161  			unsigned int lblock;
3162  			brelse(bh);
3163  			lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
3164  			bh = ext4_read_dirblock(inode, lblock, EITHER);
3165  			if (bh == NULL) {
3166  				offset += sb->s_blocksize;
3167  				continue;
3168  			}
3169  			if (IS_ERR(bh))
3170  				return false;
3171  		}
3172  		de = (struct ext4_dir_entry_2 *) (bh->b_data +
3173  					(offset & (sb->s_blocksize - 1)));
3174  		if (ext4_check_dir_entry(inode, NULL, de, bh,
3175  					 bh->b_data, bh->b_size, offset) ||
3176  		    le32_to_cpu(de->inode)) {
3177  			brelse(bh);
3178  			return false;
3179  		}
3180  		offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3181  	}
3182  	brelse(bh);
3183  	return true;
3184  }
3185  
3186  static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
3187  {
3188  	int retval;
3189  	struct inode *inode;
3190  	struct buffer_head *bh;
3191  	struct ext4_dir_entry_2 *de;
3192  	handle_t *handle = NULL;
3193  
3194  	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
3195  		return -EIO;
3196  
3197  	/* Initialize quotas before so that eventual writes go in
3198  	 * separate transaction */
3199  	retval = dquot_initialize(dir);
3200  	if (retval)
3201  		return retval;
3202  	retval = dquot_initialize(d_inode(dentry));
3203  	if (retval)
3204  		return retval;
3205  
3206  	retval = -ENOENT;
3207  	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
3208  	if (IS_ERR(bh))
3209  		return PTR_ERR(bh);
3210  	if (!bh)
3211  		goto end_rmdir;
3212  
3213  	inode = d_inode(dentry);
3214  
3215  	retval = -EFSCORRUPTED;
3216  	if (le32_to_cpu(de->inode) != inode->i_ino)
3217  		goto end_rmdir;
3218  
3219  	retval = -ENOTEMPTY;
3220  	if (!ext4_empty_dir(inode))
3221  		goto end_rmdir;
3222  
3223  	handle = ext4_journal_start(dir, EXT4_HT_DIR,
3224  				    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3225  	if (IS_ERR(handle)) {
3226  		retval = PTR_ERR(handle);
3227  		handle = NULL;
3228  		goto end_rmdir;
3229  	}
3230  
3231  	if (IS_DIRSYNC(dir))
3232  		ext4_handle_sync(handle);
3233  
3234  	retval = ext4_delete_entry(handle, dir, de, bh);
3235  	if (retval)
3236  		goto end_rmdir;
3237  	if (!EXT4_DIR_LINK_EMPTY(inode))
3238  		ext4_warning_inode(inode,
3239  			     "empty directory '%.*s' has too many links (%u)",
3240  			     dentry->d_name.len, dentry->d_name.name,
3241  			     inode->i_nlink);
3242  	inode_inc_iversion(inode);
3243  	clear_nlink(inode);
3244  	/* There's no need to set i_disksize: the fact that i_nlink is
3245  	 * zero will ensure that the right thing happens during any
3246  	 * recovery. */
3247  	inode->i_size = 0;
3248  	ext4_orphan_add(handle, inode);
3249  	dir->i_mtime = inode_set_ctime_current(dir);
3250  	inode_set_ctime_current(inode);
3251  	retval = ext4_mark_inode_dirty(handle, inode);
3252  	if (retval)
3253  		goto end_rmdir;
3254  	ext4_dec_count(dir);
3255  	ext4_update_dx_flag(dir);
3256  	ext4_fc_track_unlink(handle, dentry);
3257  	retval = ext4_mark_inode_dirty(handle, dir);
3258  
3259  #if IS_ENABLED(CONFIG_UNICODE)
3260  	/* VFS negative dentries are incompatible with Encoding and
3261  	 * Case-insensitiveness. Eventually we'll want avoid
3262  	 * invalidating the dentries here, alongside with returning the
3263  	 * negative dentries at ext4_lookup(), when it is better
3264  	 * supported by the VFS for the CI case.
3265  	 */
3266  	if (IS_CASEFOLDED(dir))
3267  		d_invalidate(dentry);
3268  #endif
3269  
3270  end_rmdir:
3271  	brelse(bh);
3272  	if (handle)
3273  		ext4_journal_stop(handle);
3274  	return retval;
3275  }
3276  
3277  int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
3278  		  struct inode *inode,
3279  		  struct dentry *dentry /* NULL during fast_commit recovery */)
3280  {
3281  	int retval = -ENOENT;
3282  	struct buffer_head *bh;
3283  	struct ext4_dir_entry_2 *de;
3284  	handle_t *handle;
3285  	int skip_remove_dentry = 0;
3286  
3287  	/*
3288  	 * Keep this outside the transaction; it may have to set up the
3289  	 * directory's encryption key, which isn't GFP_NOFS-safe.
3290  	 */
3291  	bh = ext4_find_entry(dir, d_name, &de, NULL);
3292  	if (IS_ERR(bh))
3293  		return PTR_ERR(bh);
3294  
3295  	if (!bh)
3296  		return -ENOENT;
3297  
3298  	if (le32_to_cpu(de->inode) != inode->i_ino) {
3299  		/*
3300  		 * It's okay if we find dont find dentry which matches
3301  		 * the inode. That's because it might have gotten
3302  		 * renamed to a different inode number
3303  		 */
3304  		if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
3305  			skip_remove_dentry = 1;
3306  		else
3307  			goto out_bh;
3308  	}
3309  
3310  	handle = ext4_journal_start(dir, EXT4_HT_DIR,
3311  				    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3312  	if (IS_ERR(handle)) {
3313  		retval = PTR_ERR(handle);
3314  		goto out_bh;
3315  	}
3316  
3317  	if (IS_DIRSYNC(dir))
3318  		ext4_handle_sync(handle);
3319  
3320  	if (!skip_remove_dentry) {
3321  		retval = ext4_delete_entry(handle, dir, de, bh);
3322  		if (retval)
3323  			goto out_handle;
3324  		dir->i_mtime = inode_set_ctime_current(dir);
3325  		ext4_update_dx_flag(dir);
3326  		retval = ext4_mark_inode_dirty(handle, dir);
3327  		if (retval)
3328  			goto out_handle;
3329  	} else {
3330  		retval = 0;
3331  	}
3332  	if (inode->i_nlink == 0)
3333  		ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
3334  				   d_name->len, d_name->name);
3335  	else
3336  		drop_nlink(inode);
3337  	if (!inode->i_nlink)
3338  		ext4_orphan_add(handle, inode);
3339  	inode_set_ctime_current(inode);
3340  	retval = ext4_mark_inode_dirty(handle, inode);
3341  	if (dentry && !retval)
3342  		ext4_fc_track_unlink(handle, dentry);
3343  out_handle:
3344  	ext4_journal_stop(handle);
3345  out_bh:
3346  	brelse(bh);
3347  	return retval;
3348  }
3349  
3350  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
3351  {
3352  	int retval;
3353  
3354  	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
3355  		return -EIO;
3356  
3357  	trace_ext4_unlink_enter(dir, dentry);
3358  	/*
3359  	 * Initialize quotas before so that eventual writes go
3360  	 * in separate transaction
3361  	 */
3362  	retval = dquot_initialize(dir);
3363  	if (retval)
3364  		goto out_trace;
3365  	retval = dquot_initialize(d_inode(dentry));
3366  	if (retval)
3367  		goto out_trace;
3368  
3369  	retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry);
3370  #if IS_ENABLED(CONFIG_UNICODE)
3371  	/* VFS negative dentries are incompatible with Encoding and
3372  	 * Case-insensitiveness. Eventually we'll want avoid
3373  	 * invalidating the dentries here, alongside with returning the
3374  	 * negative dentries at ext4_lookup(), when it is  better
3375  	 * supported by the VFS for the CI case.
3376  	 */
3377  	if (IS_CASEFOLDED(dir))
3378  		d_invalidate(dentry);
3379  #endif
3380  
3381  out_trace:
3382  	trace_ext4_unlink_exit(dentry, retval);
3383  	return retval;
3384  }
3385  
3386  static int ext4_init_symlink_block(handle_t *handle, struct inode *inode,
3387  				   struct fscrypt_str *disk_link)
3388  {
3389  	struct buffer_head *bh;
3390  	char *kaddr;
3391  	int err = 0;
3392  
3393  	bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE);
3394  	if (IS_ERR(bh))
3395  		return PTR_ERR(bh);
3396  
3397  	BUFFER_TRACE(bh, "get_write_access");
3398  	err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE);
3399  	if (err)
3400  		goto out;
3401  
3402  	kaddr = (char *)bh->b_data;
3403  	memcpy(kaddr, disk_link->name, disk_link->len);
3404  	inode->i_size = disk_link->len - 1;
3405  	EXT4_I(inode)->i_disksize = inode->i_size;
3406  	err = ext4_handle_dirty_metadata(handle, inode, bh);
3407  out:
3408  	brelse(bh);
3409  	return err;
3410  }
3411  
3412  static int ext4_symlink(struct mnt_idmap *idmap, struct inode *dir,
3413  			struct dentry *dentry, const char *symname)
3414  {
3415  	handle_t *handle;
3416  	struct inode *inode;
3417  	int err, len = strlen(symname);
3418  	int credits;
3419  	struct fscrypt_str disk_link;
3420  	int retries = 0;
3421  
3422  	if (unlikely(ext4_forced_shutdown(dir->i_sb)))
3423  		return -EIO;
3424  
3425  	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
3426  				      &disk_link);
3427  	if (err)
3428  		return err;
3429  
3430  	err = dquot_initialize(dir);
3431  	if (err)
3432  		return err;
3433  
3434  	/*
3435  	 * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the
3436  	 * directory. +3 for inode, inode bitmap, group descriptor allocation.
3437  	 * EXT4_DATA_TRANS_BLOCKS for the data block allocation and
3438  	 * modification.
3439  	 */
3440  	credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3441  		  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
3442  retry:
3443  	inode = ext4_new_inode_start_handle(idmap, dir, S_IFLNK|S_IRWXUGO,
3444  					    &dentry->d_name, 0, NULL,
3445  					    EXT4_HT_DIR, credits);
3446  	handle = ext4_journal_current_handle();
3447  	if (IS_ERR(inode)) {
3448  		if (handle)
3449  			ext4_journal_stop(handle);
3450  		err = PTR_ERR(inode);
3451  		goto out_retry;
3452  	}
3453  
3454  	if (IS_ENCRYPTED(inode)) {
3455  		err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
3456  		if (err)
3457  			goto err_drop_inode;
3458  		inode->i_op = &ext4_encrypted_symlink_inode_operations;
3459  	} else {
3460  		if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3461  			inode->i_op = &ext4_symlink_inode_operations;
3462  		} else {
3463  			inode->i_op = &ext4_fast_symlink_inode_operations;
3464  			inode->i_link = (char *)&EXT4_I(inode)->i_data;
3465  		}
3466  	}
3467  
3468  	if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3469  		/* alloc symlink block and fill it */
3470  		err = ext4_init_symlink_block(handle, inode, &disk_link);
3471  		if (err)
3472  			goto err_drop_inode;
3473  	} else {
3474  		/* clear the extent format for fast symlink */
3475  		ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3476  		memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3477  		       disk_link.len);
3478  		inode->i_size = disk_link.len - 1;
3479  		EXT4_I(inode)->i_disksize = inode->i_size;
3480  	}
3481  	err = ext4_add_nondir(handle, dentry, &inode);
3482  	if (handle)
3483  		ext4_journal_stop(handle);
3484  	iput(inode);
3485  	goto out_retry;
3486  
3487  err_drop_inode:
3488  	clear_nlink(inode);
3489  	ext4_mark_inode_dirty(handle, inode);
3490  	ext4_orphan_add(handle, inode);
3491  	unlock_new_inode(inode);
3492  	if (handle)
3493  		ext4_journal_stop(handle);
3494  	iput(inode);
3495  out_retry:
3496  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3497  		goto retry;
3498  	if (disk_link.name != (unsigned char *)symname)
3499  		kfree(disk_link.name);
3500  	return err;
3501  }
3502  
3503  int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
3504  {
3505  	handle_t *handle;
3506  	int err, retries = 0;
3507  retry:
3508  	handle = ext4_journal_start(dir, EXT4_HT_DIR,
3509  		(EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3510  		 EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
3511  	if (IS_ERR(handle))
3512  		return PTR_ERR(handle);
3513  
3514  	if (IS_DIRSYNC(dir))
3515  		ext4_handle_sync(handle);
3516  
3517  	inode_set_ctime_current(inode);
3518  	ext4_inc_count(inode);
3519  	ihold(inode);
3520  
3521  	err = ext4_add_entry(handle, dentry, inode);
3522  	if (!err) {
3523  		err = ext4_mark_inode_dirty(handle, inode);
3524  		/* this can happen only for tmpfile being
3525  		 * linked the first time
3526  		 */
3527  		if (inode->i_nlink == 1)
3528  			ext4_orphan_del(handle, inode);
3529  		d_instantiate(dentry, inode);
3530  		ext4_fc_track_link(handle, dentry);
3531  	} else {
3532  		drop_nlink(inode);
3533  		iput(inode);
3534  	}
3535  	ext4_journal_stop(handle);
3536  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3537  		goto retry;
3538  	return err;
3539  }
3540  
3541  static int ext4_link(struct dentry *old_dentry,
3542  		     struct inode *dir, struct dentry *dentry)
3543  {
3544  	struct inode *inode = d_inode(old_dentry);
3545  	int err;
3546  
3547  	if (inode->i_nlink >= EXT4_LINK_MAX)
3548  		return -EMLINK;
3549  
3550  	err = fscrypt_prepare_link(old_dentry, dir, dentry);
3551  	if (err)
3552  		return err;
3553  
3554  	if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3555  	    (!projid_eq(EXT4_I(dir)->i_projid,
3556  			EXT4_I(old_dentry->d_inode)->i_projid)))
3557  		return -EXDEV;
3558  
3559  	err = dquot_initialize(dir);
3560  	if (err)
3561  		return err;
3562  	return __ext4_link(dir, inode, dentry);
3563  }
3564  
3565  /*
3566   * Try to find buffer head where contains the parent block.
3567   * It should be the inode block if it is inlined or the 1st block
3568   * if it is a normal dir.
3569   */
3570  static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3571  					struct inode *inode,
3572  					int *retval,
3573  					struct ext4_dir_entry_2 **parent_de,
3574  					int *inlined)
3575  {
3576  	struct buffer_head *bh;
3577  
3578  	if (!ext4_has_inline_data(inode)) {
3579  		struct ext4_dir_entry_2 *de;
3580  		unsigned int offset;
3581  
3582  		bh = ext4_read_dirblock(inode, 0, EITHER);
3583  		if (IS_ERR(bh)) {
3584  			*retval = PTR_ERR(bh);
3585  			return NULL;
3586  		}
3587  
3588  		de = (struct ext4_dir_entry_2 *) bh->b_data;
3589  		if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3590  					 bh->b_size, 0) ||
3591  		    le32_to_cpu(de->inode) != inode->i_ino ||
3592  		    strcmp(".", de->name)) {
3593  			EXT4_ERROR_INODE(inode, "directory missing '.'");
3594  			brelse(bh);
3595  			*retval = -EFSCORRUPTED;
3596  			return NULL;
3597  		}
3598  		offset = ext4_rec_len_from_disk(de->rec_len,
3599  						inode->i_sb->s_blocksize);
3600  		de = ext4_next_entry(de, inode->i_sb->s_blocksize);
3601  		if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3602  					 bh->b_size, offset) ||
3603  		    le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3604  			EXT4_ERROR_INODE(inode, "directory missing '..'");
3605  			brelse(bh);
3606  			*retval = -EFSCORRUPTED;
3607  			return NULL;
3608  		}
3609  		*parent_de = de;
3610  
3611  		return bh;
3612  	}
3613  
3614  	*inlined = 1;
3615  	return ext4_get_first_inline_block(inode, parent_de, retval);
3616  }
3617  
3618  struct ext4_renament {
3619  	struct inode *dir;
3620  	struct dentry *dentry;
3621  	struct inode *inode;
3622  	bool is_dir;
3623  	int dir_nlink_delta;
3624  
3625  	/* entry for "dentry" */
3626  	struct buffer_head *bh;
3627  	struct ext4_dir_entry_2 *de;
3628  	int inlined;
3629  
3630  	/* entry for ".." in inode if it's a directory */
3631  	struct buffer_head *dir_bh;
3632  	struct ext4_dir_entry_2 *parent_de;
3633  	int dir_inlined;
3634  };
3635  
3636  static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
3637  {
3638  	int retval;
3639  
3640  	ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
3641  					      &retval, &ent->parent_de,
3642  					      &ent->dir_inlined);
3643  	if (!ent->dir_bh)
3644  		return retval;
3645  	if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
3646  		return -EFSCORRUPTED;
3647  	BUFFER_TRACE(ent->dir_bh, "get_write_access");
3648  	return ext4_journal_get_write_access(handle, ent->dir->i_sb,
3649  					     ent->dir_bh, EXT4_JTR_NONE);
3650  }
3651  
3652  static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
3653  				  unsigned dir_ino)
3654  {
3655  	int retval;
3656  
3657  	ent->parent_de->inode = cpu_to_le32(dir_ino);
3658  	BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
3659  	if (!ent->dir_inlined) {
3660  		if (is_dx(ent->inode)) {
3661  			retval = ext4_handle_dirty_dx_node(handle,
3662  							   ent->inode,
3663  							   ent->dir_bh);
3664  		} else {
3665  			retval = ext4_handle_dirty_dirblock(handle, ent->inode,
3666  							    ent->dir_bh);
3667  		}
3668  	} else {
3669  		retval = ext4_mark_inode_dirty(handle, ent->inode);
3670  	}
3671  	if (retval) {
3672  		ext4_std_error(ent->dir->i_sb, retval);
3673  		return retval;
3674  	}
3675  	return 0;
3676  }
3677  
3678  static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
3679  		       unsigned ino, unsigned file_type)
3680  {
3681  	int retval, retval2;
3682  
3683  	BUFFER_TRACE(ent->bh, "get write access");
3684  	retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh,
3685  					       EXT4_JTR_NONE);
3686  	if (retval)
3687  		return retval;
3688  	ent->de->inode = cpu_to_le32(ino);
3689  	if (ext4_has_feature_filetype(ent->dir->i_sb))
3690  		ent->de->file_type = file_type;
3691  	inode_inc_iversion(ent->dir);
3692  	ent->dir->i_mtime = inode_set_ctime_current(ent->dir);
3693  	retval = ext4_mark_inode_dirty(handle, ent->dir);
3694  	BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
3695  	if (!ent->inlined) {
3696  		retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
3697  		if (unlikely(retval2)) {
3698  			ext4_std_error(ent->dir->i_sb, retval2);
3699  			return retval2;
3700  		}
3701  	}
3702  	return retval;
3703  }
3704  
3705  static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
3706  			  unsigned ino, unsigned file_type)
3707  {
3708  	struct ext4_renament old = *ent;
3709  	int retval = 0;
3710  
3711  	/*
3712  	 * old->de could have moved from under us during make indexed dir,
3713  	 * so the old->de may no longer valid and need to find it again
3714  	 * before reset old inode info.
3715  	 */
3716  	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
3717  				 &old.inlined);
3718  	if (IS_ERR(old.bh))
3719  		retval = PTR_ERR(old.bh);
3720  	if (!old.bh)
3721  		retval = -ENOENT;
3722  	if (retval) {
3723  		ext4_std_error(old.dir->i_sb, retval);
3724  		return;
3725  	}
3726  
3727  	ext4_setent(handle, &old, ino, file_type);
3728  	brelse(old.bh);
3729  }
3730  
3731  static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3732  				  const struct qstr *d_name)
3733  {
3734  	int retval = -ENOENT;
3735  	struct buffer_head *bh;
3736  	struct ext4_dir_entry_2 *de;
3737  
3738  	bh = ext4_find_entry(dir, d_name, &de, NULL);
3739  	if (IS_ERR(bh))
3740  		return PTR_ERR(bh);
3741  	if (bh) {
3742  		retval = ext4_delete_entry(handle, dir, de, bh);
3743  		brelse(bh);
3744  	}
3745  	return retval;
3746  }
3747  
3748  static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3749  			       int force_reread)
3750  {
3751  	int retval;
3752  	/*
3753  	 * ent->de could have moved from under us during htree split, so make
3754  	 * sure that we are deleting the right entry.  We might also be pointing
3755  	 * to a stale entry in the unused part of ent->bh so just checking inum
3756  	 * and the name isn't enough.
3757  	 */
3758  	if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3759  	    ent->de->name_len != ent->dentry->d_name.len ||
3760  	    strncmp(ent->de->name, ent->dentry->d_name.name,
3761  		    ent->de->name_len) ||
3762  	    force_reread) {
3763  		retval = ext4_find_delete_entry(handle, ent->dir,
3764  						&ent->dentry->d_name);
3765  	} else {
3766  		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
3767  		if (retval == -ENOENT) {
3768  			retval = ext4_find_delete_entry(handle, ent->dir,
3769  							&ent->dentry->d_name);
3770  		}
3771  	}
3772  
3773  	if (retval) {
3774  		ext4_warning_inode(ent->dir,
3775  				   "Deleting old file: nlink %d, error=%d",
3776  				   ent->dir->i_nlink, retval);
3777  	}
3778  }
3779  
3780  static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3781  {
3782  	if (ent->dir_nlink_delta) {
3783  		if (ent->dir_nlink_delta == -1)
3784  			ext4_dec_count(ent->dir);
3785  		else
3786  			ext4_inc_count(ent->dir);
3787  		ext4_mark_inode_dirty(handle, ent->dir);
3788  	}
3789  }
3790  
3791  static struct inode *ext4_whiteout_for_rename(struct mnt_idmap *idmap,
3792  					      struct ext4_renament *ent,
3793  					      int credits, handle_t **h)
3794  {
3795  	struct inode *wh;
3796  	handle_t *handle;
3797  	int retries = 0;
3798  
3799  	/*
3800  	 * for inode block, sb block, group summaries,
3801  	 * and inode bitmap
3802  	 */
3803  	credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3804  		    EXT4_XATTR_TRANS_BLOCKS + 4);
3805  retry:
3806  	wh = ext4_new_inode_start_handle(idmap, ent->dir,
3807  					 S_IFCHR | WHITEOUT_MODE,
3808  					 &ent->dentry->d_name, 0, NULL,
3809  					 EXT4_HT_DIR, credits);
3810  
3811  	handle = ext4_journal_current_handle();
3812  	if (IS_ERR(wh)) {
3813  		if (handle)
3814  			ext4_journal_stop(handle);
3815  		if (PTR_ERR(wh) == -ENOSPC &&
3816  		    ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3817  			goto retry;
3818  	} else {
3819  		*h = handle;
3820  		init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3821  		wh->i_op = &ext4_special_inode_operations;
3822  	}
3823  	return wh;
3824  }
3825  
3826  /*
3827   * Anybody can rename anything with this: the permission checks are left to the
3828   * higher-level routines.
3829   *
3830   * n.b.  old_{dentry,inode) refers to the source dentry/inode
3831   * while new_{dentry,inode) refers to the destination dentry/inode
3832   * This comes from rename(const char *oldpath, const char *newpath)
3833   */
3834  static int ext4_rename(struct mnt_idmap *idmap, struct inode *old_dir,
3835  		       struct dentry *old_dentry, struct inode *new_dir,
3836  		       struct dentry *new_dentry, unsigned int flags)
3837  {
3838  	handle_t *handle = NULL;
3839  	struct ext4_renament old = {
3840  		.dir = old_dir,
3841  		.dentry = old_dentry,
3842  		.inode = d_inode(old_dentry),
3843  	};
3844  	struct ext4_renament new = {
3845  		.dir = new_dir,
3846  		.dentry = new_dentry,
3847  		.inode = d_inode(new_dentry),
3848  	};
3849  	int force_reread;
3850  	int retval;
3851  	struct inode *whiteout = NULL;
3852  	int credits;
3853  	u8 old_file_type;
3854  
3855  	if (new.inode && new.inode->i_nlink == 0) {
3856  		EXT4_ERROR_INODE(new.inode,
3857  				 "target of rename is already freed");
3858  		return -EFSCORRUPTED;
3859  	}
3860  
3861  	if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
3862  	    (!projid_eq(EXT4_I(new_dir)->i_projid,
3863  			EXT4_I(old_dentry->d_inode)->i_projid)))
3864  		return -EXDEV;
3865  
3866  	retval = dquot_initialize(old.dir);
3867  	if (retval)
3868  		return retval;
3869  	retval = dquot_initialize(old.inode);
3870  	if (retval)
3871  		return retval;
3872  	retval = dquot_initialize(new.dir);
3873  	if (retval)
3874  		return retval;
3875  
3876  	/* Initialize quotas before so that eventual writes go
3877  	 * in separate transaction */
3878  	if (new.inode) {
3879  		retval = dquot_initialize(new.inode);
3880  		if (retval)
3881  			return retval;
3882  	}
3883  
3884  	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de,
3885  				 &old.inlined);
3886  	if (IS_ERR(old.bh))
3887  		return PTR_ERR(old.bh);
3888  
3889  	/*
3890  	 *  Check for inode number is _not_ due to possible IO errors.
3891  	 *  We might rmdir the source, keep it as pwd of some process
3892  	 *  and merrily kill the link to whatever was created under the
3893  	 *  same name. Goodbye sticky bit ;-<
3894  	 */
3895  	retval = -ENOENT;
3896  	if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3897  		goto release_bh;
3898  
3899  	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3900  				 &new.de, &new.inlined);
3901  	if (IS_ERR(new.bh)) {
3902  		retval = PTR_ERR(new.bh);
3903  		new.bh = NULL;
3904  		goto release_bh;
3905  	}
3906  	if (new.bh) {
3907  		if (!new.inode) {
3908  			brelse(new.bh);
3909  			new.bh = NULL;
3910  		}
3911  	}
3912  	if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3913  		ext4_alloc_da_blocks(old.inode);
3914  
3915  	credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3916  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3917  	if (!(flags & RENAME_WHITEOUT)) {
3918  		handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3919  		if (IS_ERR(handle)) {
3920  			retval = PTR_ERR(handle);
3921  			goto release_bh;
3922  		}
3923  	} else {
3924  		whiteout = ext4_whiteout_for_rename(idmap, &old, credits, &handle);
3925  		if (IS_ERR(whiteout)) {
3926  			retval = PTR_ERR(whiteout);
3927  			goto release_bh;
3928  		}
3929  	}
3930  
3931  	old_file_type = old.de->file_type;
3932  	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3933  		ext4_handle_sync(handle);
3934  
3935  	if (S_ISDIR(old.inode->i_mode)) {
3936  		if (new.inode) {
3937  			retval = -ENOTEMPTY;
3938  			if (!ext4_empty_dir(new.inode))
3939  				goto end_rename;
3940  		} else {
3941  			retval = -EMLINK;
3942  			if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
3943  				goto end_rename;
3944  		}
3945  		retval = ext4_rename_dir_prepare(handle, &old);
3946  		if (retval)
3947  			goto end_rename;
3948  	}
3949  	/*
3950  	 * If we're renaming a file within an inline_data dir and adding or
3951  	 * setting the new dirent causes a conversion from inline_data to
3952  	 * extents/blockmap, we need to force the dirent delete code to
3953  	 * re-read the directory, or else we end up trying to delete a dirent
3954  	 * from what is now the extent tree root (or a block map).
3955  	 */
3956  	force_reread = (new.dir->i_ino == old.dir->i_ino &&
3957  			ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3958  
3959  	if (whiteout) {
3960  		/*
3961  		 * Do this before adding a new entry, so the old entry is sure
3962  		 * to be still pointing to the valid old entry.
3963  		 */
3964  		retval = ext4_setent(handle, &old, whiteout->i_ino,
3965  				     EXT4_FT_CHRDEV);
3966  		if (retval)
3967  			goto end_rename;
3968  		retval = ext4_mark_inode_dirty(handle, whiteout);
3969  		if (unlikely(retval))
3970  			goto end_rename;
3971  
3972  	}
3973  	if (!new.bh) {
3974  		retval = ext4_add_entry(handle, new.dentry, old.inode);
3975  		if (retval)
3976  			goto end_rename;
3977  	} else {
3978  		retval = ext4_setent(handle, &new,
3979  				     old.inode->i_ino, old_file_type);
3980  		if (retval)
3981  			goto end_rename;
3982  	}
3983  	if (force_reread)
3984  		force_reread = !ext4_test_inode_flag(new.dir,
3985  						     EXT4_INODE_INLINE_DATA);
3986  
3987  	/*
3988  	 * Like most other Unix systems, set the ctime for inodes on a
3989  	 * rename.
3990  	 */
3991  	inode_set_ctime_current(old.inode);
3992  	retval = ext4_mark_inode_dirty(handle, old.inode);
3993  	if (unlikely(retval))
3994  		goto end_rename;
3995  
3996  	if (!whiteout) {
3997  		/*
3998  		 * ok, that's it
3999  		 */
4000  		ext4_rename_delete(handle, &old, force_reread);
4001  	}
4002  
4003  	if (new.inode) {
4004  		ext4_dec_count(new.inode);
4005  		inode_set_ctime_current(new.inode);
4006  	}
4007  	old.dir->i_mtime = inode_set_ctime_current(old.dir);
4008  	ext4_update_dx_flag(old.dir);
4009  	if (old.dir_bh) {
4010  		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
4011  		if (retval)
4012  			goto end_rename;
4013  
4014  		ext4_dec_count(old.dir);
4015  		if (new.inode) {
4016  			/* checked ext4_empty_dir above, can't have another
4017  			 * parent, ext4_dec_count() won't work for many-linked
4018  			 * dirs */
4019  			clear_nlink(new.inode);
4020  		} else {
4021  			ext4_inc_count(new.dir);
4022  			ext4_update_dx_flag(new.dir);
4023  			retval = ext4_mark_inode_dirty(handle, new.dir);
4024  			if (unlikely(retval))
4025  				goto end_rename;
4026  		}
4027  	}
4028  	retval = ext4_mark_inode_dirty(handle, old.dir);
4029  	if (unlikely(retval))
4030  		goto end_rename;
4031  
4032  	if (S_ISDIR(old.inode->i_mode)) {
4033  		/*
4034  		 * We disable fast commits here that's because the
4035  		 * replay code is not yet capable of changing dot dot
4036  		 * dirents in directories.
4037  		 */
4038  		ext4_fc_mark_ineligible(old.inode->i_sb,
4039  			EXT4_FC_REASON_RENAME_DIR, handle);
4040  	} else {
4041  		struct super_block *sb = old.inode->i_sb;
4042  
4043  		if (new.inode)
4044  			ext4_fc_track_unlink(handle, new.dentry);
4045  		if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
4046  		    !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
4047  		    !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
4048  			__ext4_fc_track_link(handle, old.inode, new.dentry);
4049  			__ext4_fc_track_unlink(handle, old.inode, old.dentry);
4050  			if (whiteout)
4051  				__ext4_fc_track_create(handle, whiteout,
4052  						       old.dentry);
4053  		}
4054  	}
4055  
4056  	if (new.inode) {
4057  		retval = ext4_mark_inode_dirty(handle, new.inode);
4058  		if (unlikely(retval))
4059  			goto end_rename;
4060  		if (!new.inode->i_nlink)
4061  			ext4_orphan_add(handle, new.inode);
4062  	}
4063  	retval = 0;
4064  
4065  end_rename:
4066  	if (whiteout) {
4067  		if (retval) {
4068  			ext4_resetent(handle, &old,
4069  				      old.inode->i_ino, old_file_type);
4070  			drop_nlink(whiteout);
4071  			ext4_mark_inode_dirty(handle, whiteout);
4072  			ext4_orphan_add(handle, whiteout);
4073  		}
4074  		unlock_new_inode(whiteout);
4075  		ext4_journal_stop(handle);
4076  		iput(whiteout);
4077  	} else {
4078  		ext4_journal_stop(handle);
4079  	}
4080  release_bh:
4081  	brelse(old.dir_bh);
4082  	brelse(old.bh);
4083  	brelse(new.bh);
4084  
4085  	return retval;
4086  }
4087  
4088  static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
4089  			     struct inode *new_dir, struct dentry *new_dentry)
4090  {
4091  	handle_t *handle = NULL;
4092  	struct ext4_renament old = {
4093  		.dir = old_dir,
4094  		.dentry = old_dentry,
4095  		.inode = d_inode(old_dentry),
4096  	};
4097  	struct ext4_renament new = {
4098  		.dir = new_dir,
4099  		.dentry = new_dentry,
4100  		.inode = d_inode(new_dentry),
4101  	};
4102  	u8 new_file_type;
4103  	int retval;
4104  
4105  	if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
4106  	     !projid_eq(EXT4_I(new_dir)->i_projid,
4107  			EXT4_I(old_dentry->d_inode)->i_projid)) ||
4108  	    (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
4109  	     !projid_eq(EXT4_I(old_dir)->i_projid,
4110  			EXT4_I(new_dentry->d_inode)->i_projid)))
4111  		return -EXDEV;
4112  
4113  	retval = dquot_initialize(old.dir);
4114  	if (retval)
4115  		return retval;
4116  	retval = dquot_initialize(new.dir);
4117  	if (retval)
4118  		return retval;
4119  
4120  	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
4121  				 &old.de, &old.inlined);
4122  	if (IS_ERR(old.bh))
4123  		return PTR_ERR(old.bh);
4124  	/*
4125  	 *  Check for inode number is _not_ due to possible IO errors.
4126  	 *  We might rmdir the source, keep it as pwd of some process
4127  	 *  and merrily kill the link to whatever was created under the
4128  	 *  same name. Goodbye sticky bit ;-<
4129  	 */
4130  	retval = -ENOENT;
4131  	if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
4132  		goto end_rename;
4133  
4134  	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
4135  				 &new.de, &new.inlined);
4136  	if (IS_ERR(new.bh)) {
4137  		retval = PTR_ERR(new.bh);
4138  		new.bh = NULL;
4139  		goto end_rename;
4140  	}
4141  
4142  	/* RENAME_EXCHANGE case: old *and* new must both exist */
4143  	if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
4144  		goto end_rename;
4145  
4146  	handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
4147  		(2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
4148  		 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
4149  	if (IS_ERR(handle)) {
4150  		retval = PTR_ERR(handle);
4151  		handle = NULL;
4152  		goto end_rename;
4153  	}
4154  
4155  	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
4156  		ext4_handle_sync(handle);
4157  
4158  	if (S_ISDIR(old.inode->i_mode)) {
4159  		old.is_dir = true;
4160  		retval = ext4_rename_dir_prepare(handle, &old);
4161  		if (retval)
4162  			goto end_rename;
4163  	}
4164  	if (S_ISDIR(new.inode->i_mode)) {
4165  		new.is_dir = true;
4166  		retval = ext4_rename_dir_prepare(handle, &new);
4167  		if (retval)
4168  			goto end_rename;
4169  	}
4170  
4171  	/*
4172  	 * Other than the special case of overwriting a directory, parents'
4173  	 * nlink only needs to be modified if this is a cross directory rename.
4174  	 */
4175  	if (old.dir != new.dir && old.is_dir != new.is_dir) {
4176  		old.dir_nlink_delta = old.is_dir ? -1 : 1;
4177  		new.dir_nlink_delta = -old.dir_nlink_delta;
4178  		retval = -EMLINK;
4179  		if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
4180  		    (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
4181  			goto end_rename;
4182  	}
4183  
4184  	new_file_type = new.de->file_type;
4185  	retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
4186  	if (retval)
4187  		goto end_rename;
4188  
4189  	retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
4190  	if (retval)
4191  		goto end_rename;
4192  
4193  	/*
4194  	 * Like most other Unix systems, set the ctime for inodes on a
4195  	 * rename.
4196  	 */
4197  	inode_set_ctime_current(old.inode);
4198  	inode_set_ctime_current(new.inode);
4199  	retval = ext4_mark_inode_dirty(handle, old.inode);
4200  	if (unlikely(retval))
4201  		goto end_rename;
4202  	retval = ext4_mark_inode_dirty(handle, new.inode);
4203  	if (unlikely(retval))
4204  		goto end_rename;
4205  	ext4_fc_mark_ineligible(new.inode->i_sb,
4206  				EXT4_FC_REASON_CROSS_RENAME, handle);
4207  	if (old.dir_bh) {
4208  		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
4209  		if (retval)
4210  			goto end_rename;
4211  	}
4212  	if (new.dir_bh) {
4213  		retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
4214  		if (retval)
4215  			goto end_rename;
4216  	}
4217  	ext4_update_dir_count(handle, &old);
4218  	ext4_update_dir_count(handle, &new);
4219  	retval = 0;
4220  
4221  end_rename:
4222  	brelse(old.dir_bh);
4223  	brelse(new.dir_bh);
4224  	brelse(old.bh);
4225  	brelse(new.bh);
4226  	if (handle)
4227  		ext4_journal_stop(handle);
4228  	return retval;
4229  }
4230  
4231  static int ext4_rename2(struct mnt_idmap *idmap,
4232  			struct inode *old_dir, struct dentry *old_dentry,
4233  			struct inode *new_dir, struct dentry *new_dentry,
4234  			unsigned int flags)
4235  {
4236  	int err;
4237  
4238  	if (unlikely(ext4_forced_shutdown(old_dir->i_sb)))
4239  		return -EIO;
4240  
4241  	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
4242  		return -EINVAL;
4243  
4244  	err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
4245  				     flags);
4246  	if (err)
4247  		return err;
4248  
4249  	if (flags & RENAME_EXCHANGE) {
4250  		return ext4_cross_rename(old_dir, old_dentry,
4251  					 new_dir, new_dentry);
4252  	}
4253  
4254  	return ext4_rename(idmap, old_dir, old_dentry, new_dir, new_dentry, flags);
4255  }
4256  
4257  /*
4258   * directories can handle most operations...
4259   */
4260  const struct inode_operations ext4_dir_inode_operations = {
4261  	.create		= ext4_create,
4262  	.lookup		= ext4_lookup,
4263  	.link		= ext4_link,
4264  	.unlink		= ext4_unlink,
4265  	.symlink	= ext4_symlink,
4266  	.mkdir		= ext4_mkdir,
4267  	.rmdir		= ext4_rmdir,
4268  	.mknod		= ext4_mknod,
4269  	.tmpfile	= ext4_tmpfile,
4270  	.rename		= ext4_rename2,
4271  	.setattr	= ext4_setattr,
4272  	.getattr	= ext4_getattr,
4273  	.listxattr	= ext4_listxattr,
4274  	.get_inode_acl	= ext4_get_acl,
4275  	.set_acl	= ext4_set_acl,
4276  	.fiemap         = ext4_fiemap,
4277  	.fileattr_get	= ext4_fileattr_get,
4278  	.fileattr_set	= ext4_fileattr_set,
4279  };
4280  
4281  const struct inode_operations ext4_special_inode_operations = {
4282  	.setattr	= ext4_setattr,
4283  	.getattr	= ext4_getattr,
4284  	.listxattr	= ext4_listxattr,
4285  	.get_inode_acl	= ext4_get_acl,
4286  	.set_acl	= ext4_set_acl,
4287  };
4288