xref: /openbmc/linux/fs/ext4/namei.c (revision 0030d7d6)
1  // SPDX-License-Identifier: GPL-2.0
2  /*
3   *  linux/fs/ext4/namei.c
4   *
5   * Copyright (C) 1992, 1993, 1994, 1995
6   * Remy Card (card@masi.ibp.fr)
7   * Laboratoire MASI - Institut Blaise Pascal
8   * Universite Pierre et Marie Curie (Paris VI)
9   *
10   *  from
11   *
12   *  linux/fs/minix/namei.c
13   *
14   *  Copyright (C) 1991, 1992  Linus Torvalds
15   *
16   *  Big-endian to little-endian byte-swapping/bitmaps by
17   *        David S. Miller (davem@caip.rutgers.edu), 1995
18   *  Directory entry file type support and forward compatibility hooks
19   *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
20   *  Hash Tree Directory indexing (c)
21   *	Daniel Phillips, 2001
22   *  Hash Tree Directory indexing porting
23   *	Christopher Li, 2002
24   *  Hash Tree Directory indexing cleanup
25   *	Theodore Ts'o, 2002
26   */
27  
28  #include <linux/fs.h>
29  #include <linux/pagemap.h>
30  #include <linux/time.h>
31  #include <linux/fcntl.h>
32  #include <linux/stat.h>
33  #include <linux/string.h>
34  #include <linux/quotaops.h>
35  #include <linux/buffer_head.h>
36  #include <linux/bio.h>
37  #include <linux/iversion.h>
38  #include <linux/unicode.h>
39  #include "ext4.h"
40  #include "ext4_jbd2.h"
41  
42  #include "xattr.h"
43  #include "acl.h"
44  
45  #include <trace/events/ext4.h>
46  /*
47   * define how far ahead to read directories while searching them.
48   */
49  #define NAMEI_RA_CHUNKS  2
50  #define NAMEI_RA_BLOCKS  4
51  #define NAMEI_RA_SIZE	     (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
52  
53  static struct buffer_head *ext4_append(handle_t *handle,
54  					struct inode *inode,
55  					ext4_lblk_t *block)
56  {
57  	struct ext4_map_blocks map;
58  	struct buffer_head *bh;
59  	int err;
60  
61  	if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
62  		     ((inode->i_size >> 10) >=
63  		      EXT4_SB(inode->i_sb)->s_max_dir_size_kb)))
64  		return ERR_PTR(-ENOSPC);
65  
66  	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
67  	map.m_lblk = *block;
68  	map.m_len = 1;
69  
70  	/*
71  	 * We're appending new directory block. Make sure the block is not
72  	 * allocated yet, otherwise we will end up corrupting the
73  	 * directory.
74  	 */
75  	err = ext4_map_blocks(NULL, inode, &map, 0);
76  	if (err < 0)
77  		return ERR_PTR(err);
78  	if (err) {
79  		EXT4_ERROR_INODE(inode, "Logical block already allocated");
80  		return ERR_PTR(-EFSCORRUPTED);
81  	}
82  
83  	bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
84  	if (IS_ERR(bh))
85  		return bh;
86  	inode->i_size += inode->i_sb->s_blocksize;
87  	EXT4_I(inode)->i_disksize = inode->i_size;
88  	err = ext4_mark_inode_dirty(handle, inode);
89  	if (err)
90  		goto out;
91  	BUFFER_TRACE(bh, "get_write_access");
92  	err = ext4_journal_get_write_access(handle, inode->i_sb, bh,
93  					    EXT4_JTR_NONE);
94  	if (err)
95  		goto out;
96  	return bh;
97  
98  out:
99  	brelse(bh);
100  	ext4_std_error(inode->i_sb, err);
101  	return ERR_PTR(err);
102  }
103  
104  static int ext4_dx_csum_verify(struct inode *inode,
105  			       struct ext4_dir_entry *dirent);
106  
107  /*
108   * Hints to ext4_read_dirblock regarding whether we expect a directory
109   * block being read to be an index block, or a block containing
110   * directory entries (and if the latter, whether it was found via a
111   * logical block in an htree index block).  This is used to control
112   * what sort of sanity checkinig ext4_read_dirblock() will do on the
113   * directory block read from the storage device.  EITHER will means
114   * the caller doesn't know what kind of directory block will be read,
115   * so no specific verification will be done.
116   */
117  typedef enum {
118  	EITHER, INDEX, DIRENT, DIRENT_HTREE
119  } dirblock_type_t;
120  
121  #define ext4_read_dirblock(inode, block, type) \
122  	__ext4_read_dirblock((inode), (block), (type), __func__, __LINE__)
123  
124  static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
125  						ext4_lblk_t block,
126  						dirblock_type_t type,
127  						const char *func,
128  						unsigned int line)
129  {
130  	struct buffer_head *bh;
131  	struct ext4_dir_entry *dirent;
132  	int is_dx_block = 0;
133  
134  	if (block >= inode->i_size >> inode->i_blkbits) {
135  		ext4_error_inode(inode, func, line, block,
136  		       "Attempting to read directory block (%u) that is past i_size (%llu)",
137  		       block, inode->i_size);
138  		return ERR_PTR(-EFSCORRUPTED);
139  	}
140  
141  	if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
142  		bh = ERR_PTR(-EIO);
143  	else
144  		bh = ext4_bread(NULL, inode, block, 0);
145  	if (IS_ERR(bh)) {
146  		__ext4_warning(inode->i_sb, func, line,
147  			       "inode #%lu: lblock %lu: comm %s: "
148  			       "error %ld reading directory block",
149  			       inode->i_ino, (unsigned long)block,
150  			       current->comm, PTR_ERR(bh));
151  
152  		return bh;
153  	}
154  	if (!bh && (type == INDEX || type == DIRENT_HTREE)) {
155  		ext4_error_inode(inode, func, line, block,
156  				 "Directory hole found for htree %s block",
157  				 (type == INDEX) ? "index" : "leaf");
158  		return ERR_PTR(-EFSCORRUPTED);
159  	}
160  	if (!bh)
161  		return NULL;
162  	dirent = (struct ext4_dir_entry *) bh->b_data;
163  	/* Determine whether or not we have an index block */
164  	if (is_dx(inode)) {
165  		if (block == 0)
166  			is_dx_block = 1;
167  		else if (ext4_rec_len_from_disk(dirent->rec_len,
168  						inode->i_sb->s_blocksize) ==
169  			 inode->i_sb->s_blocksize)
170  			is_dx_block = 1;
171  	}
172  	if (!is_dx_block && type == INDEX) {
173  		ext4_error_inode(inode, func, line, block,
174  		       "directory leaf block found instead of index block");
175  		brelse(bh);
176  		return ERR_PTR(-EFSCORRUPTED);
177  	}
178  	if (!ext4_has_metadata_csum(inode->i_sb) ||
179  	    buffer_verified(bh))
180  		return bh;
181  
182  	/*
183  	 * An empty leaf block can get mistaken for a index block; for
184  	 * this reason, we can only check the index checksum when the
185  	 * caller is sure it should be an index block.
186  	 */
187  	if (is_dx_block && type == INDEX) {
188  		if (ext4_dx_csum_verify(inode, dirent) &&
189  		    !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
190  			set_buffer_verified(bh);
191  		else {
192  			ext4_error_inode_err(inode, func, line, block,
193  					     EFSBADCRC,
194  					     "Directory index failed checksum");
195  			brelse(bh);
196  			return ERR_PTR(-EFSBADCRC);
197  		}
198  	}
199  	if (!is_dx_block) {
200  		if (ext4_dirblock_csum_verify(inode, bh) &&
201  		    !ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_CRC))
202  			set_buffer_verified(bh);
203  		else {
204  			ext4_error_inode_err(inode, func, line, block,
205  					     EFSBADCRC,
206  					     "Directory block failed checksum");
207  			brelse(bh);
208  			return ERR_PTR(-EFSBADCRC);
209  		}
210  	}
211  	return bh;
212  }
213  
214  #ifdef DX_DEBUG
215  #define dxtrace(command) command
216  #else
217  #define dxtrace(command)
218  #endif
219  
220  struct fake_dirent
221  {
222  	__le32 inode;
223  	__le16 rec_len;
224  	u8 name_len;
225  	u8 file_type;
226  };
227  
228  struct dx_countlimit
229  {
230  	__le16 limit;
231  	__le16 count;
232  };
233  
234  struct dx_entry
235  {
236  	__le32 hash;
237  	__le32 block;
238  };
239  
240  /*
241   * dx_root_info is laid out so that if it should somehow get overlaid by a
242   * dirent the two low bits of the hash version will be zero.  Therefore, the
243   * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
244   */
245  
246  struct dx_root
247  {
248  	struct fake_dirent dot;
249  	char dot_name[4];
250  	struct fake_dirent dotdot;
251  	char dotdot_name[4];
252  	struct dx_root_info
253  	{
254  		__le32 reserved_zero;
255  		u8 hash_version;
256  		u8 info_length; /* 8 */
257  		u8 indirect_levels;
258  		u8 unused_flags;
259  	}
260  	info;
261  	struct dx_entry	entries[];
262  };
263  
264  struct dx_node
265  {
266  	struct fake_dirent fake;
267  	struct dx_entry	entries[];
268  };
269  
270  
271  struct dx_frame
272  {
273  	struct buffer_head *bh;
274  	struct dx_entry *entries;
275  	struct dx_entry *at;
276  };
277  
278  struct dx_map_entry
279  {
280  	u32 hash;
281  	u16 offs;
282  	u16 size;
283  };
284  
285  /*
286   * This goes at the end of each htree block.
287   */
288  struct dx_tail {
289  	u32 dt_reserved;
290  	__le32 dt_checksum;	/* crc32c(uuid+inum+dirblock) */
291  };
292  
293  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry);
294  static void dx_set_block(struct dx_entry *entry, ext4_lblk_t value);
295  static inline unsigned dx_get_hash(struct dx_entry *entry);
296  static void dx_set_hash(struct dx_entry *entry, unsigned value);
297  static unsigned dx_get_count(struct dx_entry *entries);
298  static unsigned dx_get_limit(struct dx_entry *entries);
299  static void dx_set_count(struct dx_entry *entries, unsigned value);
300  static void dx_set_limit(struct dx_entry *entries, unsigned value);
301  static unsigned dx_root_limit(struct inode *dir, unsigned infosize);
302  static unsigned dx_node_limit(struct inode *dir);
303  static struct dx_frame *dx_probe(struct ext4_filename *fname,
304  				 struct inode *dir,
305  				 struct dx_hash_info *hinfo,
306  				 struct dx_frame *frame);
307  static void dx_release(struct dx_frame *frames);
308  static int dx_make_map(struct inode *dir, struct buffer_head *bh,
309  		       struct dx_hash_info *hinfo,
310  		       struct dx_map_entry *map_tail);
311  static void dx_sort_map(struct dx_map_entry *map, unsigned count);
312  static struct ext4_dir_entry_2 *dx_move_dirents(struct inode *dir, char *from,
313  					char *to, struct dx_map_entry *offsets,
314  					int count, unsigned int blocksize);
315  static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
316  						unsigned int blocksize);
317  static void dx_insert_block(struct dx_frame *frame,
318  					u32 hash, ext4_lblk_t block);
319  static int ext4_htree_next_block(struct inode *dir, __u32 hash,
320  				 struct dx_frame *frame,
321  				 struct dx_frame *frames,
322  				 __u32 *start_hash);
323  static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
324  		struct ext4_filename *fname,
325  		struct ext4_dir_entry_2 **res_dir);
326  static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
327  			     struct inode *dir, struct inode *inode);
328  
329  /* checksumming functions */
330  void ext4_initialize_dirent_tail(struct buffer_head *bh,
331  				 unsigned int blocksize)
332  {
333  	struct ext4_dir_entry_tail *t = EXT4_DIRENT_TAIL(bh->b_data, blocksize);
334  
335  	memset(t, 0, sizeof(struct ext4_dir_entry_tail));
336  	t->det_rec_len = ext4_rec_len_to_disk(
337  			sizeof(struct ext4_dir_entry_tail), blocksize);
338  	t->det_reserved_ft = EXT4_FT_DIR_CSUM;
339  }
340  
341  /* Walk through a dirent block to find a checksum "dirent" at the tail */
342  static struct ext4_dir_entry_tail *get_dirent_tail(struct inode *inode,
343  						   struct buffer_head *bh)
344  {
345  	struct ext4_dir_entry_tail *t;
346  
347  #ifdef PARANOID
348  	struct ext4_dir_entry *d, *top;
349  
350  	d = (struct ext4_dir_entry *)bh->b_data;
351  	top = (struct ext4_dir_entry *)(bh->b_data +
352  		(EXT4_BLOCK_SIZE(inode->i_sb) -
353  		 sizeof(struct ext4_dir_entry_tail)));
354  	while (d < top && d->rec_len)
355  		d = (struct ext4_dir_entry *)(((void *)d) +
356  		    le16_to_cpu(d->rec_len));
357  
358  	if (d != top)
359  		return NULL;
360  
361  	t = (struct ext4_dir_entry_tail *)d;
362  #else
363  	t = EXT4_DIRENT_TAIL(bh->b_data, EXT4_BLOCK_SIZE(inode->i_sb));
364  #endif
365  
366  	if (t->det_reserved_zero1 ||
367  	    le16_to_cpu(t->det_rec_len) != sizeof(struct ext4_dir_entry_tail) ||
368  	    t->det_reserved_zero2 ||
369  	    t->det_reserved_ft != EXT4_FT_DIR_CSUM)
370  		return NULL;
371  
372  	return t;
373  }
374  
375  static __le32 ext4_dirblock_csum(struct inode *inode, void *dirent, int size)
376  {
377  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
378  	struct ext4_inode_info *ei = EXT4_I(inode);
379  	__u32 csum;
380  
381  	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
382  	return cpu_to_le32(csum);
383  }
384  
385  #define warn_no_space_for_csum(inode)					\
386  	__warn_no_space_for_csum((inode), __func__, __LINE__)
387  
388  static void __warn_no_space_for_csum(struct inode *inode, const char *func,
389  				     unsigned int line)
390  {
391  	__ext4_warning_inode(inode, func, line,
392  		"No space for directory leaf checksum. Please run e2fsck -D.");
393  }
394  
395  int ext4_dirblock_csum_verify(struct inode *inode, struct buffer_head *bh)
396  {
397  	struct ext4_dir_entry_tail *t;
398  
399  	if (!ext4_has_metadata_csum(inode->i_sb))
400  		return 1;
401  
402  	t = get_dirent_tail(inode, bh);
403  	if (!t) {
404  		warn_no_space_for_csum(inode);
405  		return 0;
406  	}
407  
408  	if (t->det_checksum != ext4_dirblock_csum(inode, bh->b_data,
409  						  (char *)t - bh->b_data))
410  		return 0;
411  
412  	return 1;
413  }
414  
415  static void ext4_dirblock_csum_set(struct inode *inode,
416  				 struct buffer_head *bh)
417  {
418  	struct ext4_dir_entry_tail *t;
419  
420  	if (!ext4_has_metadata_csum(inode->i_sb))
421  		return;
422  
423  	t = get_dirent_tail(inode, bh);
424  	if (!t) {
425  		warn_no_space_for_csum(inode);
426  		return;
427  	}
428  
429  	t->det_checksum = ext4_dirblock_csum(inode, bh->b_data,
430  					     (char *)t - bh->b_data);
431  }
432  
433  int ext4_handle_dirty_dirblock(handle_t *handle,
434  			       struct inode *inode,
435  			       struct buffer_head *bh)
436  {
437  	ext4_dirblock_csum_set(inode, bh);
438  	return ext4_handle_dirty_metadata(handle, inode, bh);
439  }
440  
441  static struct dx_countlimit *get_dx_countlimit(struct inode *inode,
442  					       struct ext4_dir_entry *dirent,
443  					       int *offset)
444  {
445  	struct ext4_dir_entry *dp;
446  	struct dx_root_info *root;
447  	int count_offset;
448  
449  	if (le16_to_cpu(dirent->rec_len) == EXT4_BLOCK_SIZE(inode->i_sb))
450  		count_offset = 8;
451  	else if (le16_to_cpu(dirent->rec_len) == 12) {
452  		dp = (struct ext4_dir_entry *)(((void *)dirent) + 12);
453  		if (le16_to_cpu(dp->rec_len) !=
454  		    EXT4_BLOCK_SIZE(inode->i_sb) - 12)
455  			return NULL;
456  		root = (struct dx_root_info *)(((void *)dp + 12));
457  		if (root->reserved_zero ||
458  		    root->info_length != sizeof(struct dx_root_info))
459  			return NULL;
460  		count_offset = 32;
461  	} else
462  		return NULL;
463  
464  	if (offset)
465  		*offset = count_offset;
466  	return (struct dx_countlimit *)(((void *)dirent) + count_offset);
467  }
468  
469  static __le32 ext4_dx_csum(struct inode *inode, struct ext4_dir_entry *dirent,
470  			   int count_offset, int count, struct dx_tail *t)
471  {
472  	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
473  	struct ext4_inode_info *ei = EXT4_I(inode);
474  	__u32 csum;
475  	int size;
476  	__u32 dummy_csum = 0;
477  	int offset = offsetof(struct dx_tail, dt_checksum);
478  
479  	size = count_offset + (count * sizeof(struct dx_entry));
480  	csum = ext4_chksum(sbi, ei->i_csum_seed, (__u8 *)dirent, size);
481  	csum = ext4_chksum(sbi, csum, (__u8 *)t, offset);
482  	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
483  
484  	return cpu_to_le32(csum);
485  }
486  
487  static int ext4_dx_csum_verify(struct inode *inode,
488  			       struct ext4_dir_entry *dirent)
489  {
490  	struct dx_countlimit *c;
491  	struct dx_tail *t;
492  	int count_offset, limit, count;
493  
494  	if (!ext4_has_metadata_csum(inode->i_sb))
495  		return 1;
496  
497  	c = get_dx_countlimit(inode, dirent, &count_offset);
498  	if (!c) {
499  		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
500  		return 0;
501  	}
502  	limit = le16_to_cpu(c->limit);
503  	count = le16_to_cpu(c->count);
504  	if (count_offset + (limit * sizeof(struct dx_entry)) >
505  	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
506  		warn_no_space_for_csum(inode);
507  		return 0;
508  	}
509  	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
510  
511  	if (t->dt_checksum != ext4_dx_csum(inode, dirent, count_offset,
512  					    count, t))
513  		return 0;
514  	return 1;
515  }
516  
517  static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
518  {
519  	struct dx_countlimit *c;
520  	struct dx_tail *t;
521  	int count_offset, limit, count;
522  
523  	if (!ext4_has_metadata_csum(inode->i_sb))
524  		return;
525  
526  	c = get_dx_countlimit(inode, dirent, &count_offset);
527  	if (!c) {
528  		EXT4_ERROR_INODE(inode, "dir seems corrupt?  Run e2fsck -D.");
529  		return;
530  	}
531  	limit = le16_to_cpu(c->limit);
532  	count = le16_to_cpu(c->count);
533  	if (count_offset + (limit * sizeof(struct dx_entry)) >
534  	    EXT4_BLOCK_SIZE(inode->i_sb) - sizeof(struct dx_tail)) {
535  		warn_no_space_for_csum(inode);
536  		return;
537  	}
538  	t = (struct dx_tail *)(((struct dx_entry *)c) + limit);
539  
540  	t->dt_checksum = ext4_dx_csum(inode, dirent, count_offset, count, t);
541  }
542  
543  static inline int ext4_handle_dirty_dx_node(handle_t *handle,
544  					    struct inode *inode,
545  					    struct buffer_head *bh)
546  {
547  	ext4_dx_csum_set(inode, (struct ext4_dir_entry *)bh->b_data);
548  	return ext4_handle_dirty_metadata(handle, inode, bh);
549  }
550  
551  /*
552   * p is at least 6 bytes before the end of page
553   */
554  static inline struct ext4_dir_entry_2 *
555  ext4_next_entry(struct ext4_dir_entry_2 *p, unsigned long blocksize)
556  {
557  	return (struct ext4_dir_entry_2 *)((char *)p +
558  		ext4_rec_len_from_disk(p->rec_len, blocksize));
559  }
560  
561  /*
562   * Future: use high four bits of block for coalesce-on-delete flags
563   * Mask them off for now.
564   */
565  
566  static inline ext4_lblk_t dx_get_block(struct dx_entry *entry)
567  {
568  	return le32_to_cpu(entry->block) & 0x0fffffff;
569  }
570  
571  static inline void dx_set_block(struct dx_entry *entry, ext4_lblk_t value)
572  {
573  	entry->block = cpu_to_le32(value);
574  }
575  
576  static inline unsigned dx_get_hash(struct dx_entry *entry)
577  {
578  	return le32_to_cpu(entry->hash);
579  }
580  
581  static inline void dx_set_hash(struct dx_entry *entry, unsigned value)
582  {
583  	entry->hash = cpu_to_le32(value);
584  }
585  
586  static inline unsigned dx_get_count(struct dx_entry *entries)
587  {
588  	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
589  }
590  
591  static inline unsigned dx_get_limit(struct dx_entry *entries)
592  {
593  	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
594  }
595  
596  static inline void dx_set_count(struct dx_entry *entries, unsigned value)
597  {
598  	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
599  }
600  
601  static inline void dx_set_limit(struct dx_entry *entries, unsigned value)
602  {
603  	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
604  }
605  
606  static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
607  {
608  	unsigned int entry_space = dir->i_sb->s_blocksize -
609  			ext4_dir_rec_len(1, NULL) -
610  			ext4_dir_rec_len(2, NULL) - infosize;
611  
612  	if (ext4_has_metadata_csum(dir->i_sb))
613  		entry_space -= sizeof(struct dx_tail);
614  	return entry_space / sizeof(struct dx_entry);
615  }
616  
617  static inline unsigned dx_node_limit(struct inode *dir)
618  {
619  	unsigned int entry_space = dir->i_sb->s_blocksize -
620  			ext4_dir_rec_len(0, dir);
621  
622  	if (ext4_has_metadata_csum(dir->i_sb))
623  		entry_space -= sizeof(struct dx_tail);
624  	return entry_space / sizeof(struct dx_entry);
625  }
626  
627  /*
628   * Debug
629   */
630  #ifdef DX_DEBUG
631  static void dx_show_index(char * label, struct dx_entry *entries)
632  {
633  	int i, n = dx_get_count (entries);
634  	printk(KERN_DEBUG "%s index", label);
635  	for (i = 0; i < n; i++) {
636  		printk(KERN_CONT " %x->%lu",
637  		       i ? dx_get_hash(entries + i) : 0,
638  		       (unsigned long)dx_get_block(entries + i));
639  	}
640  	printk(KERN_CONT "\n");
641  }
642  
643  struct stats
644  {
645  	unsigned names;
646  	unsigned space;
647  	unsigned bcount;
648  };
649  
650  static struct stats dx_show_leaf(struct inode *dir,
651  				struct dx_hash_info *hinfo,
652  				struct ext4_dir_entry_2 *de,
653  				int size, int show_names)
654  {
655  	unsigned names = 0, space = 0;
656  	char *base = (char *) de;
657  	struct dx_hash_info h = *hinfo;
658  
659  	printk("names: ");
660  	while ((char *) de < base + size)
661  	{
662  		if (de->inode)
663  		{
664  			if (show_names)
665  			{
666  #ifdef CONFIG_FS_ENCRYPTION
667  				int len;
668  				char *name;
669  				struct fscrypt_str fname_crypto_str =
670  					FSTR_INIT(NULL, 0);
671  				int res = 0;
672  
673  				name  = de->name;
674  				len = de->name_len;
675  				if (!IS_ENCRYPTED(dir)) {
676  					/* Directory is not encrypted */
677  					ext4fs_dirhash(dir, de->name,
678  						de->name_len, &h);
679  					printk("%*.s:(U)%x.%u ", len,
680  					       name, h.hash,
681  					       (unsigned) ((char *) de
682  							   - base));
683  				} else {
684  					struct fscrypt_str de_name =
685  						FSTR_INIT(name, len);
686  
687  					/* Directory is encrypted */
688  					res = fscrypt_fname_alloc_buffer(
689  						len, &fname_crypto_str);
690  					if (res)
691  						printk(KERN_WARNING "Error "
692  							"allocating crypto "
693  							"buffer--skipping "
694  							"crypto\n");
695  					res = fscrypt_fname_disk_to_usr(dir,
696  						0, 0, &de_name,
697  						&fname_crypto_str);
698  					if (res) {
699  						printk(KERN_WARNING "Error "
700  							"converting filename "
701  							"from disk to usr"
702  							"\n");
703  						name = "??";
704  						len = 2;
705  					} else {
706  						name = fname_crypto_str.name;
707  						len = fname_crypto_str.len;
708  					}
709  					if (IS_CASEFOLDED(dir))
710  						h.hash = EXT4_DIRENT_HASH(de);
711  					else
712  						ext4fs_dirhash(dir, de->name,
713  						       de->name_len, &h);
714  					printk("%*.s:(E)%x.%u ", len, name,
715  					       h.hash, (unsigned) ((char *) de
716  								   - base));
717  					fscrypt_fname_free_buffer(
718  							&fname_crypto_str);
719  				}
720  #else
721  				int len = de->name_len;
722  				char *name = de->name;
723  				ext4fs_dirhash(dir, de->name, de->name_len, &h);
724  				printk("%*.s:%x.%u ", len, name, h.hash,
725  				       (unsigned) ((char *) de - base));
726  #endif
727  			}
728  			space += ext4_dir_rec_len(de->name_len, dir);
729  			names++;
730  		}
731  		de = ext4_next_entry(de, size);
732  	}
733  	printk(KERN_CONT "(%i)\n", names);
734  	return (struct stats) { names, space, 1 };
735  }
736  
737  struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
738  			     struct dx_entry *entries, int levels)
739  {
740  	unsigned blocksize = dir->i_sb->s_blocksize;
741  	unsigned count = dx_get_count(entries), names = 0, space = 0, i;
742  	unsigned bcount = 0;
743  	struct buffer_head *bh;
744  	printk("%i indexed blocks...\n", count);
745  	for (i = 0; i < count; i++, entries++)
746  	{
747  		ext4_lblk_t block = dx_get_block(entries);
748  		ext4_lblk_t hash  = i ? dx_get_hash(entries): 0;
749  		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
750  		struct stats stats;
751  		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
752  		bh = ext4_bread(NULL,dir, block, 0);
753  		if (!bh || IS_ERR(bh))
754  			continue;
755  		stats = levels?
756  		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
757  		   dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *)
758  			bh->b_data, blocksize, 0);
759  		names += stats.names;
760  		space += stats.space;
761  		bcount += stats.bcount;
762  		brelse(bh);
763  	}
764  	if (bcount)
765  		printk(KERN_DEBUG "%snames %u, fullness %u (%u%%)\n",
766  		       levels ? "" : "   ", names, space/bcount,
767  		       (space/bcount)*100/blocksize);
768  	return (struct stats) { names, space, bcount};
769  }
770  
771  /*
772   * Linear search cross check
773   */
774  static inline void htree_rep_invariant_check(struct dx_entry *at,
775  					     struct dx_entry *target,
776  					     u32 hash, unsigned int n)
777  {
778  	while (n--) {
779  		dxtrace(printk(KERN_CONT ","));
780  		if (dx_get_hash(++at) > hash) {
781  			at--;
782  			break;
783  		}
784  	}
785  	ASSERT(at == target - 1);
786  }
787  #else /* DX_DEBUG */
788  static inline void htree_rep_invariant_check(struct dx_entry *at,
789  					     struct dx_entry *target,
790  					     u32 hash, unsigned int n)
791  {
792  }
793  #endif /* DX_DEBUG */
794  
795  /*
796   * Probe for a directory leaf block to search.
797   *
798   * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
799   * error in the directory index, and the caller should fall back to
800   * searching the directory normally.  The callers of dx_probe **MUST**
801   * check for this error code, and make sure it never gets reflected
802   * back to userspace.
803   */
804  static struct dx_frame *
805  dx_probe(struct ext4_filename *fname, struct inode *dir,
806  	 struct dx_hash_info *hinfo, struct dx_frame *frame_in)
807  {
808  	unsigned count, indirect, level, i;
809  	struct dx_entry *at, *entries, *p, *q, *m;
810  	struct dx_root *root;
811  	struct dx_frame *frame = frame_in;
812  	struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
813  	u32 hash;
814  	ext4_lblk_t block;
815  	ext4_lblk_t blocks[EXT4_HTREE_LEVEL];
816  
817  	memset(frame_in, 0, EXT4_HTREE_LEVEL * sizeof(frame_in[0]));
818  	frame->bh = ext4_read_dirblock(dir, 0, INDEX);
819  	if (IS_ERR(frame->bh))
820  		return (struct dx_frame *) frame->bh;
821  
822  	root = (struct dx_root *) frame->bh->b_data;
823  	if (root->info.hash_version != DX_HASH_TEA &&
824  	    root->info.hash_version != DX_HASH_HALF_MD4 &&
825  	    root->info.hash_version != DX_HASH_LEGACY &&
826  	    root->info.hash_version != DX_HASH_SIPHASH) {
827  		ext4_warning_inode(dir, "Unrecognised inode hash code %u",
828  				   root->info.hash_version);
829  		goto fail;
830  	}
831  	if (ext4_hash_in_dirent(dir)) {
832  		if (root->info.hash_version != DX_HASH_SIPHASH) {
833  			ext4_warning_inode(dir,
834  				"Hash in dirent, but hash is not SIPHASH");
835  			goto fail;
836  		}
837  	} else {
838  		if (root->info.hash_version == DX_HASH_SIPHASH) {
839  			ext4_warning_inode(dir,
840  				"Hash code is SIPHASH, but hash not in dirent");
841  			goto fail;
842  		}
843  	}
844  	if (fname)
845  		hinfo = &fname->hinfo;
846  	hinfo->hash_version = root->info.hash_version;
847  	if (hinfo->hash_version <= DX_HASH_TEA)
848  		hinfo->hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
849  	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
850  	/* hash is already computed for encrypted casefolded directory */
851  	if (fname && fname_name(fname) &&
852  				!(IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)))
853  		ext4fs_dirhash(dir, fname_name(fname), fname_len(fname), hinfo);
854  	hash = hinfo->hash;
855  
856  	if (root->info.unused_flags & 1) {
857  		ext4_warning_inode(dir, "Unimplemented hash flags: %#06x",
858  				   root->info.unused_flags);
859  		goto fail;
860  	}
861  
862  	indirect = root->info.indirect_levels;
863  	if (indirect >= ext4_dir_htree_level(dir->i_sb)) {
864  		ext4_warning(dir->i_sb,
865  			     "Directory (ino: %lu) htree depth %#06x exceed"
866  			     "supported value", dir->i_ino,
867  			     ext4_dir_htree_level(dir->i_sb));
868  		if (ext4_dir_htree_level(dir->i_sb) < EXT4_HTREE_LEVEL) {
869  			ext4_warning(dir->i_sb, "Enable large directory "
870  						"feature to access it");
871  		}
872  		goto fail;
873  	}
874  
875  	entries = (struct dx_entry *)(((char *)&root->info) +
876  				      root->info.info_length);
877  
878  	if (dx_get_limit(entries) != dx_root_limit(dir,
879  						   root->info.info_length)) {
880  		ext4_warning_inode(dir, "dx entry: limit %u != root limit %u",
881  				   dx_get_limit(entries),
882  				   dx_root_limit(dir, root->info.info_length));
883  		goto fail;
884  	}
885  
886  	dxtrace(printk("Look up %x", hash));
887  	level = 0;
888  	blocks[0] = 0;
889  	while (1) {
890  		count = dx_get_count(entries);
891  		if (!count || count > dx_get_limit(entries)) {
892  			ext4_warning_inode(dir,
893  					   "dx entry: count %u beyond limit %u",
894  					   count, dx_get_limit(entries));
895  			goto fail;
896  		}
897  
898  		p = entries + 1;
899  		q = entries + count - 1;
900  		while (p <= q) {
901  			m = p + (q - p) / 2;
902  			dxtrace(printk(KERN_CONT "."));
903  			if (dx_get_hash(m) > hash)
904  				q = m - 1;
905  			else
906  				p = m + 1;
907  		}
908  
909  		htree_rep_invariant_check(entries, p, hash, count - 1);
910  
911  		at = p - 1;
912  		dxtrace(printk(KERN_CONT " %x->%u\n",
913  			       at == entries ? 0 : dx_get_hash(at),
914  			       dx_get_block(at)));
915  		frame->entries = entries;
916  		frame->at = at;
917  
918  		block = dx_get_block(at);
919  		for (i = 0; i <= level; i++) {
920  			if (blocks[i] == block) {
921  				ext4_warning_inode(dir,
922  					"dx entry: tree cycle block %u points back to block %u",
923  					blocks[level], block);
924  				goto fail;
925  			}
926  		}
927  		if (++level > indirect)
928  			return frame;
929  		blocks[level] = block;
930  		frame++;
931  		frame->bh = ext4_read_dirblock(dir, block, INDEX);
932  		if (IS_ERR(frame->bh)) {
933  			ret_err = (struct dx_frame *) frame->bh;
934  			frame->bh = NULL;
935  			goto fail;
936  		}
937  
938  		entries = ((struct dx_node *) frame->bh->b_data)->entries;
939  
940  		if (dx_get_limit(entries) != dx_node_limit(dir)) {
941  			ext4_warning_inode(dir,
942  				"dx entry: limit %u != node limit %u",
943  				dx_get_limit(entries), dx_node_limit(dir));
944  			goto fail;
945  		}
946  	}
947  fail:
948  	while (frame >= frame_in) {
949  		brelse(frame->bh);
950  		frame--;
951  	}
952  
953  	if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
954  		ext4_warning_inode(dir,
955  			"Corrupt directory, running e2fsck is recommended");
956  	return ret_err;
957  }
958  
959  static void dx_release(struct dx_frame *frames)
960  {
961  	struct dx_root_info *info;
962  	int i;
963  	unsigned int indirect_levels;
964  
965  	if (frames[0].bh == NULL)
966  		return;
967  
968  	info = &((struct dx_root *)frames[0].bh->b_data)->info;
969  	/* save local copy, "info" may be freed after brelse() */
970  	indirect_levels = info->indirect_levels;
971  	for (i = 0; i <= indirect_levels; i++) {
972  		if (frames[i].bh == NULL)
973  			break;
974  		brelse(frames[i].bh);
975  		frames[i].bh = NULL;
976  	}
977  }
978  
979  /*
980   * This function increments the frame pointer to search the next leaf
981   * block, and reads in the necessary intervening nodes if the search
982   * should be necessary.  Whether or not the search is necessary is
983   * controlled by the hash parameter.  If the hash value is even, then
984   * the search is only continued if the next block starts with that
985   * hash value.  This is used if we are searching for a specific file.
986   *
987   * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
988   *
989   * This function returns 1 if the caller should continue to search,
990   * or 0 if it should not.  If there is an error reading one of the
991   * index blocks, it will a negative error code.
992   *
993   * If start_hash is non-null, it will be filled in with the starting
994   * hash of the next page.
995   */
996  static int ext4_htree_next_block(struct inode *dir, __u32 hash,
997  				 struct dx_frame *frame,
998  				 struct dx_frame *frames,
999  				 __u32 *start_hash)
1000  {
1001  	struct dx_frame *p;
1002  	struct buffer_head *bh;
1003  	int num_frames = 0;
1004  	__u32 bhash;
1005  
1006  	p = frame;
1007  	/*
1008  	 * Find the next leaf page by incrementing the frame pointer.
1009  	 * If we run out of entries in the interior node, loop around and
1010  	 * increment pointer in the parent node.  When we break out of
1011  	 * this loop, num_frames indicates the number of interior
1012  	 * nodes need to be read.
1013  	 */
1014  	while (1) {
1015  		if (++(p->at) < p->entries + dx_get_count(p->entries))
1016  			break;
1017  		if (p == frames)
1018  			return 0;
1019  		num_frames++;
1020  		p--;
1021  	}
1022  
1023  	/*
1024  	 * If the hash is 1, then continue only if the next page has a
1025  	 * continuation hash of any value.  This is used for readdir
1026  	 * handling.  Otherwise, check to see if the hash matches the
1027  	 * desired continuation hash.  If it doesn't, return since
1028  	 * there's no point to read in the successive index pages.
1029  	 */
1030  	bhash = dx_get_hash(p->at);
1031  	if (start_hash)
1032  		*start_hash = bhash;
1033  	if ((hash & 1) == 0) {
1034  		if ((bhash & ~1) != hash)
1035  			return 0;
1036  	}
1037  	/*
1038  	 * If the hash is HASH_NB_ALWAYS, we always go to the next
1039  	 * block so no check is necessary
1040  	 */
1041  	while (num_frames--) {
1042  		bh = ext4_read_dirblock(dir, dx_get_block(p->at), INDEX);
1043  		if (IS_ERR(bh))
1044  			return PTR_ERR(bh);
1045  		p++;
1046  		brelse(p->bh);
1047  		p->bh = bh;
1048  		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
1049  	}
1050  	return 1;
1051  }
1052  
1053  
1054  /*
1055   * This function fills a red-black tree with information from a
1056   * directory block.  It returns the number directory entries loaded
1057   * into the tree.  If there is an error it is returned in err.
1058   */
1059  static int htree_dirblock_to_tree(struct file *dir_file,
1060  				  struct inode *dir, ext4_lblk_t block,
1061  				  struct dx_hash_info *hinfo,
1062  				  __u32 start_hash, __u32 start_minor_hash)
1063  {
1064  	struct buffer_head *bh;
1065  	struct ext4_dir_entry_2 *de, *top;
1066  	int err = 0, count = 0;
1067  	struct fscrypt_str fname_crypto_str = FSTR_INIT(NULL, 0), tmp_str;
1068  	int csum = ext4_has_metadata_csum(dir->i_sb);
1069  
1070  	dxtrace(printk(KERN_INFO "In htree dirblock_to_tree: block %lu\n",
1071  							(unsigned long)block));
1072  	bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1073  	if (IS_ERR(bh))
1074  		return PTR_ERR(bh);
1075  
1076  	de = (struct ext4_dir_entry_2 *) bh->b_data;
1077  	/* csum entries are not larger in the casefolded encrypted case */
1078  	top = (struct ext4_dir_entry_2 *) ((char *) de +
1079  					   dir->i_sb->s_blocksize -
1080  					   ext4_dir_rec_len(0,
1081  							   csum ? NULL : dir));
1082  	/* Check if the directory is encrypted */
1083  	if (IS_ENCRYPTED(dir)) {
1084  		err = fscrypt_prepare_readdir(dir);
1085  		if (err < 0) {
1086  			brelse(bh);
1087  			return err;
1088  		}
1089  		err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN,
1090  						 &fname_crypto_str);
1091  		if (err < 0) {
1092  			brelse(bh);
1093  			return err;
1094  		}
1095  	}
1096  
1097  	for (; de < top; de = ext4_next_entry(de, dir->i_sb->s_blocksize)) {
1098  		if (ext4_check_dir_entry(dir, NULL, de, bh,
1099  				bh->b_data, bh->b_size,
1100  				(block<<EXT4_BLOCK_SIZE_BITS(dir->i_sb))
1101  					 + ((char *)de - bh->b_data))) {
1102  			/* silently ignore the rest of the block */
1103  			break;
1104  		}
1105  		if (ext4_hash_in_dirent(dir)) {
1106  			if (de->name_len && de->inode) {
1107  				hinfo->hash = EXT4_DIRENT_HASH(de);
1108  				hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de);
1109  			} else {
1110  				hinfo->hash = 0;
1111  				hinfo->minor_hash = 0;
1112  			}
1113  		} else {
1114  			ext4fs_dirhash(dir, de->name, de->name_len, hinfo);
1115  		}
1116  		if ((hinfo->hash < start_hash) ||
1117  		    ((hinfo->hash == start_hash) &&
1118  		     (hinfo->minor_hash < start_minor_hash)))
1119  			continue;
1120  		if (de->inode == 0)
1121  			continue;
1122  		if (!IS_ENCRYPTED(dir)) {
1123  			tmp_str.name = de->name;
1124  			tmp_str.len = de->name_len;
1125  			err = ext4_htree_store_dirent(dir_file,
1126  				   hinfo->hash, hinfo->minor_hash, de,
1127  				   &tmp_str);
1128  		} else {
1129  			int save_len = fname_crypto_str.len;
1130  			struct fscrypt_str de_name = FSTR_INIT(de->name,
1131  								de->name_len);
1132  
1133  			/* Directory is encrypted */
1134  			err = fscrypt_fname_disk_to_usr(dir, hinfo->hash,
1135  					hinfo->minor_hash, &de_name,
1136  					&fname_crypto_str);
1137  			if (err) {
1138  				count = err;
1139  				goto errout;
1140  			}
1141  			err = ext4_htree_store_dirent(dir_file,
1142  				   hinfo->hash, hinfo->minor_hash, de,
1143  					&fname_crypto_str);
1144  			fname_crypto_str.len = save_len;
1145  		}
1146  		if (err != 0) {
1147  			count = err;
1148  			goto errout;
1149  		}
1150  		count++;
1151  	}
1152  errout:
1153  	brelse(bh);
1154  	fscrypt_fname_free_buffer(&fname_crypto_str);
1155  	return count;
1156  }
1157  
1158  
1159  /*
1160   * This function fills a red-black tree with information from a
1161   * directory.  We start scanning the directory in hash order, starting
1162   * at start_hash and start_minor_hash.
1163   *
1164   * This function returns the number of entries inserted into the tree,
1165   * or a negative error code.
1166   */
1167  int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
1168  			 __u32 start_minor_hash, __u32 *next_hash)
1169  {
1170  	struct dx_hash_info hinfo;
1171  	struct ext4_dir_entry_2 *de;
1172  	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1173  	struct inode *dir;
1174  	ext4_lblk_t block;
1175  	int count = 0;
1176  	int ret, err;
1177  	__u32 hashval;
1178  	struct fscrypt_str tmp_str;
1179  
1180  	dxtrace(printk(KERN_DEBUG "In htree_fill_tree, start hash: %x:%x\n",
1181  		       start_hash, start_minor_hash));
1182  	dir = file_inode(dir_file);
1183  	if (!(ext4_test_inode_flag(dir, EXT4_INODE_INDEX))) {
1184  		if (ext4_hash_in_dirent(dir))
1185  			hinfo.hash_version = DX_HASH_SIPHASH;
1186  		else
1187  			hinfo.hash_version =
1188  					EXT4_SB(dir->i_sb)->s_def_hash_version;
1189  		if (hinfo.hash_version <= DX_HASH_TEA)
1190  			hinfo.hash_version +=
1191  				EXT4_SB(dir->i_sb)->s_hash_unsigned;
1192  		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1193  		if (ext4_has_inline_data(dir)) {
1194  			int has_inline_data = 1;
1195  			count = ext4_inlinedir_to_tree(dir_file, dir, 0,
1196  						       &hinfo, start_hash,
1197  						       start_minor_hash,
1198  						       &has_inline_data);
1199  			if (has_inline_data) {
1200  				*next_hash = ~0;
1201  				return count;
1202  			}
1203  		}
1204  		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
1205  					       start_hash, start_minor_hash);
1206  		*next_hash = ~0;
1207  		return count;
1208  	}
1209  	hinfo.hash = start_hash;
1210  	hinfo.minor_hash = 0;
1211  	frame = dx_probe(NULL, dir, &hinfo, frames);
1212  	if (IS_ERR(frame))
1213  		return PTR_ERR(frame);
1214  
1215  	/* Add '.' and '..' from the htree header */
1216  	if (!start_hash && !start_minor_hash) {
1217  		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1218  		tmp_str.name = de->name;
1219  		tmp_str.len = de->name_len;
1220  		err = ext4_htree_store_dirent(dir_file, 0, 0,
1221  					      de, &tmp_str);
1222  		if (err != 0)
1223  			goto errout;
1224  		count++;
1225  	}
1226  	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
1227  		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
1228  		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1229  		tmp_str.name = de->name;
1230  		tmp_str.len = de->name_len;
1231  		err = ext4_htree_store_dirent(dir_file, 2, 0,
1232  					      de, &tmp_str);
1233  		if (err != 0)
1234  			goto errout;
1235  		count++;
1236  	}
1237  
1238  	while (1) {
1239  		if (fatal_signal_pending(current)) {
1240  			err = -ERESTARTSYS;
1241  			goto errout;
1242  		}
1243  		cond_resched();
1244  		block = dx_get_block(frame->at);
1245  		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
1246  					     start_hash, start_minor_hash);
1247  		if (ret < 0) {
1248  			err = ret;
1249  			goto errout;
1250  		}
1251  		count += ret;
1252  		hashval = ~0;
1253  		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
1254  					    frame, frames, &hashval);
1255  		*next_hash = hashval;
1256  		if (ret < 0) {
1257  			err = ret;
1258  			goto errout;
1259  		}
1260  		/*
1261  		 * Stop if:  (a) there are no more entries, or
1262  		 * (b) we have inserted at least one entry and the
1263  		 * next hash value is not a continuation
1264  		 */
1265  		if ((ret == 0) ||
1266  		    (count && ((hashval & 1) == 0)))
1267  			break;
1268  	}
1269  	dx_release(frames);
1270  	dxtrace(printk(KERN_DEBUG "Fill tree: returned %d entries, "
1271  		       "next hash: %x\n", count, *next_hash));
1272  	return count;
1273  errout:
1274  	dx_release(frames);
1275  	return (err);
1276  }
1277  
1278  static inline int search_dirblock(struct buffer_head *bh,
1279  				  struct inode *dir,
1280  				  struct ext4_filename *fname,
1281  				  unsigned int offset,
1282  				  struct ext4_dir_entry_2 **res_dir)
1283  {
1284  	return ext4_search_dir(bh, bh->b_data, dir->i_sb->s_blocksize, dir,
1285  			       fname, offset, res_dir);
1286  }
1287  
1288  /*
1289   * Directory block splitting, compacting
1290   */
1291  
1292  /*
1293   * Create map of hash values, offsets, and sizes, stored at end of block.
1294   * Returns number of entries mapped.
1295   */
1296  static int dx_make_map(struct inode *dir, struct buffer_head *bh,
1297  		       struct dx_hash_info *hinfo,
1298  		       struct dx_map_entry *map_tail)
1299  {
1300  	int count = 0;
1301  	struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)bh->b_data;
1302  	unsigned int buflen = bh->b_size;
1303  	char *base = bh->b_data;
1304  	struct dx_hash_info h = *hinfo;
1305  
1306  	if (ext4_has_metadata_csum(dir->i_sb))
1307  		buflen -= sizeof(struct ext4_dir_entry_tail);
1308  
1309  	while ((char *) de < base + buflen) {
1310  		if (ext4_check_dir_entry(dir, NULL, de, bh, base, buflen,
1311  					 ((char *)de) - base))
1312  			return -EFSCORRUPTED;
1313  		if (de->name_len && de->inode) {
1314  			if (ext4_hash_in_dirent(dir))
1315  				h.hash = EXT4_DIRENT_HASH(de);
1316  			else
1317  				ext4fs_dirhash(dir, de->name, de->name_len, &h);
1318  			map_tail--;
1319  			map_tail->hash = h.hash;
1320  			map_tail->offs = ((char *) de - base)>>2;
1321  			map_tail->size = le16_to_cpu(de->rec_len);
1322  			count++;
1323  			cond_resched();
1324  		}
1325  		de = ext4_next_entry(de, dir->i_sb->s_blocksize);
1326  	}
1327  	return count;
1328  }
1329  
1330  /* Sort map by hash value */
1331  static void dx_sort_map (struct dx_map_entry *map, unsigned count)
1332  {
1333  	struct dx_map_entry *p, *q, *top = map + count - 1;
1334  	int more;
1335  	/* Combsort until bubble sort doesn't suck */
1336  	while (count > 2) {
1337  		count = count*10/13;
1338  		if (count - 9 < 2) /* 9, 10 -> 11 */
1339  			count = 11;
1340  		for (p = top, q = p - count; q >= map; p--, q--)
1341  			if (p->hash < q->hash)
1342  				swap(*p, *q);
1343  	}
1344  	/* Garden variety bubble sort */
1345  	do {
1346  		more = 0;
1347  		q = top;
1348  		while (q-- > map) {
1349  			if (q[1].hash >= q[0].hash)
1350  				continue;
1351  			swap(*(q+1), *q);
1352  			more = 1;
1353  		}
1354  	} while(more);
1355  }
1356  
1357  static void dx_insert_block(struct dx_frame *frame, u32 hash, ext4_lblk_t block)
1358  {
1359  	struct dx_entry *entries = frame->entries;
1360  	struct dx_entry *old = frame->at, *new = old + 1;
1361  	int count = dx_get_count(entries);
1362  
1363  	ASSERT(count < dx_get_limit(entries));
1364  	ASSERT(old < entries + count);
1365  	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
1366  	dx_set_hash(new, hash);
1367  	dx_set_block(new, block);
1368  	dx_set_count(entries, count + 1);
1369  }
1370  
1371  #if IS_ENABLED(CONFIG_UNICODE)
1372  /*
1373   * Test whether a case-insensitive directory entry matches the filename
1374   * being searched for.  If quick is set, assume the name being looked up
1375   * is already in the casefolded form.
1376   *
1377   * Returns: 0 if the directory entry matches, more than 0 if it
1378   * doesn't match or less than zero on error.
1379   */
1380  static int ext4_ci_compare(const struct inode *parent, const struct qstr *name,
1381  			   u8 *de_name, size_t de_name_len, bool quick)
1382  {
1383  	const struct super_block *sb = parent->i_sb;
1384  	const struct unicode_map *um = sb->s_encoding;
1385  	struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
1386  	struct qstr entry = QSTR_INIT(de_name, de_name_len);
1387  	int ret;
1388  
1389  	if (IS_ENCRYPTED(parent)) {
1390  		const struct fscrypt_str encrypted_name =
1391  				FSTR_INIT(de_name, de_name_len);
1392  
1393  		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
1394  		if (!decrypted_name.name)
1395  			return -ENOMEM;
1396  		ret = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
1397  						&decrypted_name);
1398  		if (ret < 0)
1399  			goto out;
1400  		entry.name = decrypted_name.name;
1401  		entry.len = decrypted_name.len;
1402  	}
1403  
1404  	if (quick)
1405  		ret = utf8_strncasecmp_folded(um, name, &entry);
1406  	else
1407  		ret = utf8_strncasecmp(um, name, &entry);
1408  	if (ret < 0) {
1409  		/* Handle invalid character sequence as either an error
1410  		 * or as an opaque byte sequence.
1411  		 */
1412  		if (sb_has_strict_encoding(sb))
1413  			ret = -EINVAL;
1414  		else if (name->len != entry.len)
1415  			ret = 1;
1416  		else
1417  			ret = !!memcmp(name->name, entry.name, entry.len);
1418  	}
1419  out:
1420  	kfree(decrypted_name.name);
1421  	return ret;
1422  }
1423  
1424  int ext4_fname_setup_ci_filename(struct inode *dir, const struct qstr *iname,
1425  				  struct ext4_filename *name)
1426  {
1427  	struct fscrypt_str *cf_name = &name->cf_name;
1428  	struct dx_hash_info *hinfo = &name->hinfo;
1429  	int len;
1430  
1431  	if (!IS_CASEFOLDED(dir) || !dir->i_sb->s_encoding ||
1432  	    (IS_ENCRYPTED(dir) && !fscrypt_has_encryption_key(dir))) {
1433  		cf_name->name = NULL;
1434  		return 0;
1435  	}
1436  
1437  	cf_name->name = kmalloc(EXT4_NAME_LEN, GFP_NOFS);
1438  	if (!cf_name->name)
1439  		return -ENOMEM;
1440  
1441  	len = utf8_casefold(dir->i_sb->s_encoding,
1442  			    iname, cf_name->name,
1443  			    EXT4_NAME_LEN);
1444  	if (len <= 0) {
1445  		kfree(cf_name->name);
1446  		cf_name->name = NULL;
1447  	}
1448  	cf_name->len = (unsigned) len;
1449  	if (!IS_ENCRYPTED(dir))
1450  		return 0;
1451  
1452  	hinfo->hash_version = DX_HASH_SIPHASH;
1453  	hinfo->seed = NULL;
1454  	if (cf_name->name)
1455  		ext4fs_dirhash(dir, cf_name->name, cf_name->len, hinfo);
1456  	else
1457  		ext4fs_dirhash(dir, iname->name, iname->len, hinfo);
1458  	return 0;
1459  }
1460  #endif
1461  
1462  /*
1463   * Test whether a directory entry matches the filename being searched for.
1464   *
1465   * Return: %true if the directory entry matches, otherwise %false.
1466   */
1467  static bool ext4_match(struct inode *parent,
1468  			      const struct ext4_filename *fname,
1469  			      struct ext4_dir_entry_2 *de)
1470  {
1471  	struct fscrypt_name f;
1472  
1473  	if (!de->inode)
1474  		return false;
1475  
1476  	f.usr_fname = fname->usr_fname;
1477  	f.disk_name = fname->disk_name;
1478  #ifdef CONFIG_FS_ENCRYPTION
1479  	f.crypto_buf = fname->crypto_buf;
1480  #endif
1481  
1482  #if IS_ENABLED(CONFIG_UNICODE)
1483  	if (parent->i_sb->s_encoding && IS_CASEFOLDED(parent) &&
1484  	    (!IS_ENCRYPTED(parent) || fscrypt_has_encryption_key(parent))) {
1485  		if (fname->cf_name.name) {
1486  			struct qstr cf = {.name = fname->cf_name.name,
1487  					  .len = fname->cf_name.len};
1488  			if (IS_ENCRYPTED(parent)) {
1489  				if (fname->hinfo.hash != EXT4_DIRENT_HASH(de) ||
1490  					fname->hinfo.minor_hash !=
1491  						EXT4_DIRENT_MINOR_HASH(de)) {
1492  
1493  					return false;
1494  				}
1495  			}
1496  			return !ext4_ci_compare(parent, &cf, de->name,
1497  							de->name_len, true);
1498  		}
1499  		return !ext4_ci_compare(parent, fname->usr_fname, de->name,
1500  						de->name_len, false);
1501  	}
1502  #endif
1503  
1504  	return fscrypt_match_name(&f, de->name, de->name_len);
1505  }
1506  
1507  /*
1508   * Returns 0 if not found, -1 on failure, and 1 on success
1509   */
1510  int ext4_search_dir(struct buffer_head *bh, char *search_buf, int buf_size,
1511  		    struct inode *dir, struct ext4_filename *fname,
1512  		    unsigned int offset, struct ext4_dir_entry_2 **res_dir)
1513  {
1514  	struct ext4_dir_entry_2 * de;
1515  	char * dlimit;
1516  	int de_len;
1517  
1518  	de = (struct ext4_dir_entry_2 *)search_buf;
1519  	dlimit = search_buf + buf_size;
1520  	while ((char *) de < dlimit - EXT4_BASE_DIR_LEN) {
1521  		/* this code is executed quadratically often */
1522  		/* do minimal checking `by hand' */
1523  		if (de->name + de->name_len <= dlimit &&
1524  		    ext4_match(dir, fname, de)) {
1525  			/* found a match - just to be sure, do
1526  			 * a full check */
1527  			if (ext4_check_dir_entry(dir, NULL, de, bh, search_buf,
1528  						 buf_size, offset))
1529  				return -1;
1530  			*res_dir = de;
1531  			return 1;
1532  		}
1533  		/* prevent looping on a bad block */
1534  		de_len = ext4_rec_len_from_disk(de->rec_len,
1535  						dir->i_sb->s_blocksize);
1536  		if (de_len <= 0)
1537  			return -1;
1538  		offset += de_len;
1539  		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
1540  	}
1541  	return 0;
1542  }
1543  
1544  static int is_dx_internal_node(struct inode *dir, ext4_lblk_t block,
1545  			       struct ext4_dir_entry *de)
1546  {
1547  	struct super_block *sb = dir->i_sb;
1548  
1549  	if (!is_dx(dir))
1550  		return 0;
1551  	if (block == 0)
1552  		return 1;
1553  	if (de->inode == 0 &&
1554  	    ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize) ==
1555  			sb->s_blocksize)
1556  		return 1;
1557  	return 0;
1558  }
1559  
1560  /*
1561   *	__ext4_find_entry()
1562   *
1563   * finds an entry in the specified directory with the wanted name. It
1564   * returns the cache buffer in which the entry was found, and the entry
1565   * itself (as a parameter - res_dir). It does NOT read the inode of the
1566   * entry - you'll have to do that yourself if you want to.
1567   *
1568   * The returned buffer_head has ->b_count elevated.  The caller is expected
1569   * to brelse() it when appropriate.
1570   */
1571  static struct buffer_head *__ext4_find_entry(struct inode *dir,
1572  					     struct ext4_filename *fname,
1573  					     struct ext4_dir_entry_2 **res_dir,
1574  					     int *inlined)
1575  {
1576  	struct super_block *sb;
1577  	struct buffer_head *bh_use[NAMEI_RA_SIZE];
1578  	struct buffer_head *bh, *ret = NULL;
1579  	ext4_lblk_t start, block;
1580  	const u8 *name = fname->usr_fname->name;
1581  	size_t ra_max = 0;	/* Number of bh's in the readahead
1582  				   buffer, bh_use[] */
1583  	size_t ra_ptr = 0;	/* Current index into readahead
1584  				   buffer */
1585  	ext4_lblk_t  nblocks;
1586  	int i, namelen, retval;
1587  
1588  	*res_dir = NULL;
1589  	sb = dir->i_sb;
1590  	namelen = fname->usr_fname->len;
1591  	if (namelen > EXT4_NAME_LEN)
1592  		return NULL;
1593  
1594  	if (ext4_has_inline_data(dir)) {
1595  		int has_inline_data = 1;
1596  		ret = ext4_find_inline_entry(dir, fname, res_dir,
1597  					     &has_inline_data);
1598  		if (has_inline_data) {
1599  			if (inlined)
1600  				*inlined = 1;
1601  			goto cleanup_and_exit;
1602  		}
1603  	}
1604  
1605  	if ((namelen <= 2) && (name[0] == '.') &&
1606  	    (name[1] == '.' || name[1] == '\0')) {
1607  		/*
1608  		 * "." or ".." will only be in the first block
1609  		 * NFS may look up ".."; "." should be handled by the VFS
1610  		 */
1611  		block = start = 0;
1612  		nblocks = 1;
1613  		goto restart;
1614  	}
1615  	if (is_dx(dir)) {
1616  		ret = ext4_dx_find_entry(dir, fname, res_dir);
1617  		/*
1618  		 * On success, or if the error was file not found,
1619  		 * return.  Otherwise, fall back to doing a search the
1620  		 * old fashioned way.
1621  		 */
1622  		if (!IS_ERR(ret) || PTR_ERR(ret) != ERR_BAD_DX_DIR)
1623  			goto cleanup_and_exit;
1624  		dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1625  			       "falling back\n"));
1626  		ret = NULL;
1627  	}
1628  	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1629  	if (!nblocks) {
1630  		ret = NULL;
1631  		goto cleanup_and_exit;
1632  	}
1633  	start = EXT4_I(dir)->i_dir_start_lookup;
1634  	if (start >= nblocks)
1635  		start = 0;
1636  	block = start;
1637  restart:
1638  	do {
1639  		/*
1640  		 * We deal with the read-ahead logic here.
1641  		 */
1642  		cond_resched();
1643  		if (ra_ptr >= ra_max) {
1644  			/* Refill the readahead buffer */
1645  			ra_ptr = 0;
1646  			if (block < start)
1647  				ra_max = start - block;
1648  			else
1649  				ra_max = nblocks - block;
1650  			ra_max = min(ra_max, ARRAY_SIZE(bh_use));
1651  			retval = ext4_bread_batch(dir, block, ra_max,
1652  						  false /* wait */, bh_use);
1653  			if (retval) {
1654  				ret = ERR_PTR(retval);
1655  				ra_max = 0;
1656  				goto cleanup_and_exit;
1657  			}
1658  		}
1659  		if ((bh = bh_use[ra_ptr++]) == NULL)
1660  			goto next;
1661  		wait_on_buffer(bh);
1662  		if (!buffer_uptodate(bh)) {
1663  			EXT4_ERROR_INODE_ERR(dir, EIO,
1664  					     "reading directory lblock %lu",
1665  					     (unsigned long) block);
1666  			brelse(bh);
1667  			ret = ERR_PTR(-EIO);
1668  			goto cleanup_and_exit;
1669  		}
1670  		if (!buffer_verified(bh) &&
1671  		    !is_dx_internal_node(dir, block,
1672  					 (struct ext4_dir_entry *)bh->b_data) &&
1673  		    !ext4_dirblock_csum_verify(dir, bh)) {
1674  			EXT4_ERROR_INODE_ERR(dir, EFSBADCRC,
1675  					     "checksumming directory "
1676  					     "block %lu", (unsigned long)block);
1677  			brelse(bh);
1678  			ret = ERR_PTR(-EFSBADCRC);
1679  			goto cleanup_and_exit;
1680  		}
1681  		set_buffer_verified(bh);
1682  		i = search_dirblock(bh, dir, fname,
1683  			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
1684  		if (i == 1) {
1685  			EXT4_I(dir)->i_dir_start_lookup = block;
1686  			ret = bh;
1687  			goto cleanup_and_exit;
1688  		} else {
1689  			brelse(bh);
1690  			if (i < 0)
1691  				goto cleanup_and_exit;
1692  		}
1693  	next:
1694  		if (++block >= nblocks)
1695  			block = 0;
1696  	} while (block != start);
1697  
1698  	/*
1699  	 * If the directory has grown while we were searching, then
1700  	 * search the last part of the directory before giving up.
1701  	 */
1702  	block = nblocks;
1703  	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1704  	if (block < nblocks) {
1705  		start = 0;
1706  		goto restart;
1707  	}
1708  
1709  cleanup_and_exit:
1710  	/* Clean up the read-ahead blocks */
1711  	for (; ra_ptr < ra_max; ra_ptr++)
1712  		brelse(bh_use[ra_ptr]);
1713  	return ret;
1714  }
1715  
1716  static struct buffer_head *ext4_find_entry(struct inode *dir,
1717  					   const struct qstr *d_name,
1718  					   struct ext4_dir_entry_2 **res_dir,
1719  					   int *inlined)
1720  {
1721  	int err;
1722  	struct ext4_filename fname;
1723  	struct buffer_head *bh;
1724  
1725  	err = ext4_fname_setup_filename(dir, d_name, 1, &fname);
1726  	if (err == -ENOENT)
1727  		return NULL;
1728  	if (err)
1729  		return ERR_PTR(err);
1730  
1731  	bh = __ext4_find_entry(dir, &fname, res_dir, inlined);
1732  
1733  	ext4_fname_free_filename(&fname);
1734  	return bh;
1735  }
1736  
1737  static struct buffer_head *ext4_lookup_entry(struct inode *dir,
1738  					     struct dentry *dentry,
1739  					     struct ext4_dir_entry_2 **res_dir)
1740  {
1741  	int err;
1742  	struct ext4_filename fname;
1743  	struct buffer_head *bh;
1744  
1745  	err = ext4_fname_prepare_lookup(dir, dentry, &fname);
1746  	generic_set_encrypted_ci_d_ops(dentry);
1747  	if (err == -ENOENT)
1748  		return NULL;
1749  	if (err)
1750  		return ERR_PTR(err);
1751  
1752  	bh = __ext4_find_entry(dir, &fname, res_dir, NULL);
1753  
1754  	ext4_fname_free_filename(&fname);
1755  	return bh;
1756  }
1757  
1758  static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
1759  			struct ext4_filename *fname,
1760  			struct ext4_dir_entry_2 **res_dir)
1761  {
1762  	struct super_block * sb = dir->i_sb;
1763  	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
1764  	struct buffer_head *bh;
1765  	ext4_lblk_t block;
1766  	int retval;
1767  
1768  #ifdef CONFIG_FS_ENCRYPTION
1769  	*res_dir = NULL;
1770  #endif
1771  	frame = dx_probe(fname, dir, NULL, frames);
1772  	if (IS_ERR(frame))
1773  		return (struct buffer_head *) frame;
1774  	do {
1775  		block = dx_get_block(frame->at);
1776  		bh = ext4_read_dirblock(dir, block, DIRENT_HTREE);
1777  		if (IS_ERR(bh))
1778  			goto errout;
1779  
1780  		retval = search_dirblock(bh, dir, fname,
1781  					 block << EXT4_BLOCK_SIZE_BITS(sb),
1782  					 res_dir);
1783  		if (retval == 1)
1784  			goto success;
1785  		brelse(bh);
1786  		if (retval == -1) {
1787  			bh = ERR_PTR(ERR_BAD_DX_DIR);
1788  			goto errout;
1789  		}
1790  
1791  		/* Check to see if we should continue to search */
1792  		retval = ext4_htree_next_block(dir, fname->hinfo.hash, frame,
1793  					       frames, NULL);
1794  		if (retval < 0) {
1795  			ext4_warning_inode(dir,
1796  				"error %d reading directory index block",
1797  				retval);
1798  			bh = ERR_PTR(retval);
1799  			goto errout;
1800  		}
1801  	} while (retval == 1);
1802  
1803  	bh = NULL;
1804  errout:
1805  	dxtrace(printk(KERN_DEBUG "%s not found\n", fname->usr_fname->name));
1806  success:
1807  	dx_release(frames);
1808  	return bh;
1809  }
1810  
1811  static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
1812  {
1813  	struct inode *inode;
1814  	struct ext4_dir_entry_2 *de;
1815  	struct buffer_head *bh;
1816  
1817  	if (dentry->d_name.len > EXT4_NAME_LEN)
1818  		return ERR_PTR(-ENAMETOOLONG);
1819  
1820  	bh = ext4_lookup_entry(dir, dentry, &de);
1821  	if (IS_ERR(bh))
1822  		return ERR_CAST(bh);
1823  	inode = NULL;
1824  	if (bh) {
1825  		__u32 ino = le32_to_cpu(de->inode);
1826  		brelse(bh);
1827  		if (!ext4_valid_inum(dir->i_sb, ino)) {
1828  			EXT4_ERROR_INODE(dir, "bad inode number: %u", ino);
1829  			return ERR_PTR(-EFSCORRUPTED);
1830  		}
1831  		if (unlikely(ino == dir->i_ino)) {
1832  			EXT4_ERROR_INODE(dir, "'%pd' linked to parent dir",
1833  					 dentry);
1834  			return ERR_PTR(-EFSCORRUPTED);
1835  		}
1836  		inode = ext4_iget(dir->i_sb, ino, EXT4_IGET_NORMAL);
1837  		if (inode == ERR_PTR(-ESTALE)) {
1838  			EXT4_ERROR_INODE(dir,
1839  					 "deleted inode referenced: %u",
1840  					 ino);
1841  			return ERR_PTR(-EFSCORRUPTED);
1842  		}
1843  		if (!IS_ERR(inode) && IS_ENCRYPTED(dir) &&
1844  		    (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
1845  		    !fscrypt_has_permitted_context(dir, inode)) {
1846  			ext4_warning(inode->i_sb,
1847  				     "Inconsistent encryption contexts: %lu/%lu",
1848  				     dir->i_ino, inode->i_ino);
1849  			iput(inode);
1850  			return ERR_PTR(-EPERM);
1851  		}
1852  	}
1853  
1854  #if IS_ENABLED(CONFIG_UNICODE)
1855  	if (!inode && IS_CASEFOLDED(dir)) {
1856  		/* Eventually we want to call d_add_ci(dentry, NULL)
1857  		 * for negative dentries in the encoding case as
1858  		 * well.  For now, prevent the negative dentry
1859  		 * from being cached.
1860  		 */
1861  		return NULL;
1862  	}
1863  #endif
1864  	return d_splice_alias(inode, dentry);
1865  }
1866  
1867  
1868  struct dentry *ext4_get_parent(struct dentry *child)
1869  {
1870  	__u32 ino;
1871  	struct ext4_dir_entry_2 * de;
1872  	struct buffer_head *bh;
1873  
1874  	bh = ext4_find_entry(d_inode(child), &dotdot_name, &de, NULL);
1875  	if (IS_ERR(bh))
1876  		return ERR_CAST(bh);
1877  	if (!bh)
1878  		return ERR_PTR(-ENOENT);
1879  	ino = le32_to_cpu(de->inode);
1880  	brelse(bh);
1881  
1882  	if (!ext4_valid_inum(child->d_sb, ino)) {
1883  		EXT4_ERROR_INODE(d_inode(child),
1884  				 "bad parent inode number: %u", ino);
1885  		return ERR_PTR(-EFSCORRUPTED);
1886  	}
1887  
1888  	return d_obtain_alias(ext4_iget(child->d_sb, ino, EXT4_IGET_NORMAL));
1889  }
1890  
1891  /*
1892   * Move count entries from end of map between two memory locations.
1893   * Returns pointer to last entry moved.
1894   */
1895  static struct ext4_dir_entry_2 *
1896  dx_move_dirents(struct inode *dir, char *from, char *to,
1897  		struct dx_map_entry *map, int count,
1898  		unsigned blocksize)
1899  {
1900  	unsigned rec_len = 0;
1901  
1902  	while (count--) {
1903  		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *)
1904  						(from + (map->offs<<2));
1905  		rec_len = ext4_dir_rec_len(de->name_len, dir);
1906  
1907  		memcpy (to, de, rec_len);
1908  		((struct ext4_dir_entry_2 *) to)->rec_len =
1909  				ext4_rec_len_to_disk(rec_len, blocksize);
1910  
1911  		/* wipe dir_entry excluding the rec_len field */
1912  		de->inode = 0;
1913  		memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len,
1914  								blocksize) -
1915  					 offsetof(struct ext4_dir_entry_2,
1916  								name_len));
1917  
1918  		map++;
1919  		to += rec_len;
1920  	}
1921  	return (struct ext4_dir_entry_2 *) (to - rec_len);
1922  }
1923  
1924  /*
1925   * Compact each dir entry in the range to the minimal rec_len.
1926   * Returns pointer to last entry in range.
1927   */
1928  static struct ext4_dir_entry_2 *dx_pack_dirents(struct inode *dir, char *base,
1929  							unsigned int blocksize)
1930  {
1931  	struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
1932  	unsigned rec_len = 0;
1933  
1934  	prev = to = de;
1935  	while ((char*)de < base + blocksize) {
1936  		next = ext4_next_entry(de, blocksize);
1937  		if (de->inode && de->name_len) {
1938  			rec_len = ext4_dir_rec_len(de->name_len, dir);
1939  			if (de > to)
1940  				memmove(to, de, rec_len);
1941  			to->rec_len = ext4_rec_len_to_disk(rec_len, blocksize);
1942  			prev = to;
1943  			to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
1944  		}
1945  		de = next;
1946  	}
1947  	return prev;
1948  }
1949  
1950  /*
1951   * Split a full leaf block to make room for a new dir entry.
1952   * Allocate a new block, and move entries so that they are approx. equally full.
1953   * Returns pointer to de in block into which the new entry will be inserted.
1954   */
1955  static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1956  			struct buffer_head **bh,struct dx_frame *frame,
1957  			struct dx_hash_info *hinfo)
1958  {
1959  	unsigned blocksize = dir->i_sb->s_blocksize;
1960  	unsigned continued;
1961  	int count;
1962  	struct buffer_head *bh2;
1963  	ext4_lblk_t newblock;
1964  	u32 hash2;
1965  	struct dx_map_entry *map;
1966  	char *data1 = (*bh)->b_data, *data2;
1967  	unsigned split, move, size;
1968  	struct ext4_dir_entry_2 *de = NULL, *de2;
1969  	int	csum_size = 0;
1970  	int	err = 0, i;
1971  
1972  	if (ext4_has_metadata_csum(dir->i_sb))
1973  		csum_size = sizeof(struct ext4_dir_entry_tail);
1974  
1975  	bh2 = ext4_append(handle, dir, &newblock);
1976  	if (IS_ERR(bh2)) {
1977  		brelse(*bh);
1978  		*bh = NULL;
1979  		return (struct ext4_dir_entry_2 *) bh2;
1980  	}
1981  
1982  	BUFFER_TRACE(*bh, "get_write_access");
1983  	err = ext4_journal_get_write_access(handle, dir->i_sb, *bh,
1984  					    EXT4_JTR_NONE);
1985  	if (err)
1986  		goto journal_error;
1987  
1988  	BUFFER_TRACE(frame->bh, "get_write_access");
1989  	err = ext4_journal_get_write_access(handle, dir->i_sb, frame->bh,
1990  					    EXT4_JTR_NONE);
1991  	if (err)
1992  		goto journal_error;
1993  
1994  	data2 = bh2->b_data;
1995  
1996  	/* create map in the end of data2 block */
1997  	map = (struct dx_map_entry *) (data2 + blocksize);
1998  	count = dx_make_map(dir, *bh, hinfo, map);
1999  	if (count < 0) {
2000  		err = count;
2001  		goto journal_error;
2002  	}
2003  	map -= count;
2004  	dx_sort_map(map, count);
2005  	/* Ensure that neither split block is over half full */
2006  	size = 0;
2007  	move = 0;
2008  	for (i = count-1; i >= 0; i--) {
2009  		/* is more than half of this entry in 2nd half of the block? */
2010  		if (size + map[i].size/2 > blocksize/2)
2011  			break;
2012  		size += map[i].size;
2013  		move++;
2014  	}
2015  	/*
2016  	 * map index at which we will split
2017  	 *
2018  	 * If the sum of active entries didn't exceed half the block size, just
2019  	 * split it in half by count; each resulting block will have at least
2020  	 * half the space free.
2021  	 */
2022  	if (i > 0)
2023  		split = count - move;
2024  	else
2025  		split = count/2;
2026  
2027  	hash2 = map[split].hash;
2028  	continued = hash2 == map[split - 1].hash;
2029  	dxtrace(printk(KERN_INFO "Split block %lu at %x, %i/%i\n",
2030  			(unsigned long)dx_get_block(frame->at),
2031  					hash2, split, count-split));
2032  
2033  	/* Fancy dance to stay within two buffers */
2034  	de2 = dx_move_dirents(dir, data1, data2, map + split, count - split,
2035  			      blocksize);
2036  	de = dx_pack_dirents(dir, data1, blocksize);
2037  	de->rec_len = ext4_rec_len_to_disk(data1 + (blocksize - csum_size) -
2038  					   (char *) de,
2039  					   blocksize);
2040  	de2->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2041  					    (char *) de2,
2042  					    blocksize);
2043  	if (csum_size) {
2044  		ext4_initialize_dirent_tail(*bh, blocksize);
2045  		ext4_initialize_dirent_tail(bh2, blocksize);
2046  	}
2047  
2048  	dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data1,
2049  			blocksize, 1));
2050  	dxtrace(dx_show_leaf(dir, hinfo, (struct ext4_dir_entry_2 *) data2,
2051  			blocksize, 1));
2052  
2053  	/* Which block gets the new entry? */
2054  	if (hinfo->hash >= hash2) {
2055  		swap(*bh, bh2);
2056  		de = de2;
2057  	}
2058  	dx_insert_block(frame, hash2 + continued, newblock);
2059  	err = ext4_handle_dirty_dirblock(handle, dir, bh2);
2060  	if (err)
2061  		goto journal_error;
2062  	err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2063  	if (err)
2064  		goto journal_error;
2065  	brelse(bh2);
2066  	dxtrace(dx_show_index("frame", frame->entries));
2067  	return de;
2068  
2069  journal_error:
2070  	brelse(*bh);
2071  	brelse(bh2);
2072  	*bh = NULL;
2073  	ext4_std_error(dir->i_sb, err);
2074  	return ERR_PTR(err);
2075  }
2076  
2077  int ext4_find_dest_de(struct inode *dir, struct inode *inode,
2078  		      struct buffer_head *bh,
2079  		      void *buf, int buf_size,
2080  		      struct ext4_filename *fname,
2081  		      struct ext4_dir_entry_2 **dest_de)
2082  {
2083  	struct ext4_dir_entry_2 *de;
2084  	unsigned short reclen = ext4_dir_rec_len(fname_len(fname), dir);
2085  	int nlen, rlen;
2086  	unsigned int offset = 0;
2087  	char *top;
2088  
2089  	de = buf;
2090  	top = buf + buf_size - reclen;
2091  	while ((char *) de <= top) {
2092  		if (ext4_check_dir_entry(dir, NULL, de, bh,
2093  					 buf, buf_size, offset))
2094  			return -EFSCORRUPTED;
2095  		if (ext4_match(dir, fname, de))
2096  			return -EEXIST;
2097  		nlen = ext4_dir_rec_len(de->name_len, dir);
2098  		rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2099  		if ((de->inode ? rlen - nlen : rlen) >= reclen)
2100  			break;
2101  		de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
2102  		offset += rlen;
2103  	}
2104  	if ((char *) de > top)
2105  		return -ENOSPC;
2106  
2107  	*dest_de = de;
2108  	return 0;
2109  }
2110  
2111  void ext4_insert_dentry(struct inode *dir,
2112  			struct inode *inode,
2113  			struct ext4_dir_entry_2 *de,
2114  			int buf_size,
2115  			struct ext4_filename *fname)
2116  {
2117  
2118  	int nlen, rlen;
2119  
2120  	nlen = ext4_dir_rec_len(de->name_len, dir);
2121  	rlen = ext4_rec_len_from_disk(de->rec_len, buf_size);
2122  	if (de->inode) {
2123  		struct ext4_dir_entry_2 *de1 =
2124  			(struct ext4_dir_entry_2 *)((char *)de + nlen);
2125  		de1->rec_len = ext4_rec_len_to_disk(rlen - nlen, buf_size);
2126  		de->rec_len = ext4_rec_len_to_disk(nlen, buf_size);
2127  		de = de1;
2128  	}
2129  	de->file_type = EXT4_FT_UNKNOWN;
2130  	de->inode = cpu_to_le32(inode->i_ino);
2131  	ext4_set_de_type(inode->i_sb, de, inode->i_mode);
2132  	de->name_len = fname_len(fname);
2133  	memcpy(de->name, fname_name(fname), fname_len(fname));
2134  	if (ext4_hash_in_dirent(dir)) {
2135  		struct dx_hash_info *hinfo = &fname->hinfo;
2136  
2137  		EXT4_DIRENT_HASHES(de)->hash = cpu_to_le32(hinfo->hash);
2138  		EXT4_DIRENT_HASHES(de)->minor_hash =
2139  						cpu_to_le32(hinfo->minor_hash);
2140  	}
2141  }
2142  
2143  /*
2144   * Add a new entry into a directory (leaf) block.  If de is non-NULL,
2145   * it points to a directory entry which is guaranteed to be large
2146   * enough for new directory entry.  If de is NULL, then
2147   * add_dirent_to_buf will attempt search the directory block for
2148   * space.  It will return -ENOSPC if no space is available, and -EIO
2149   * and -EEXIST if directory entry already exists.
2150   */
2151  static int add_dirent_to_buf(handle_t *handle, struct ext4_filename *fname,
2152  			     struct inode *dir,
2153  			     struct inode *inode, struct ext4_dir_entry_2 *de,
2154  			     struct buffer_head *bh)
2155  {
2156  	unsigned int	blocksize = dir->i_sb->s_blocksize;
2157  	int		csum_size = 0;
2158  	int		err, err2;
2159  
2160  	if (ext4_has_metadata_csum(inode->i_sb))
2161  		csum_size = sizeof(struct ext4_dir_entry_tail);
2162  
2163  	if (!de) {
2164  		err = ext4_find_dest_de(dir, inode, bh, bh->b_data,
2165  					blocksize - csum_size, fname, &de);
2166  		if (err)
2167  			return err;
2168  	}
2169  	BUFFER_TRACE(bh, "get_write_access");
2170  	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2171  					    EXT4_JTR_NONE);
2172  	if (err) {
2173  		ext4_std_error(dir->i_sb, err);
2174  		return err;
2175  	}
2176  
2177  	/* By now the buffer is marked for journaling */
2178  	ext4_insert_dentry(dir, inode, de, blocksize, fname);
2179  
2180  	/*
2181  	 * XXX shouldn't update any times until successful
2182  	 * completion of syscall, but too many callers depend
2183  	 * on this.
2184  	 *
2185  	 * XXX similarly, too many callers depend on
2186  	 * ext4_new_inode() setting the times, but error
2187  	 * recovery deletes the inode, so the worst that can
2188  	 * happen is that the times are slightly out of date
2189  	 * and/or different from the directory change time.
2190  	 */
2191  	dir->i_mtime = dir->i_ctime = current_time(dir);
2192  	ext4_update_dx_flag(dir);
2193  	inode_inc_iversion(dir);
2194  	err2 = ext4_mark_inode_dirty(handle, dir);
2195  	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2196  	err = ext4_handle_dirty_dirblock(handle, dir, bh);
2197  	if (err)
2198  		ext4_std_error(dir->i_sb, err);
2199  	return err ? err : err2;
2200  }
2201  
2202  /*
2203   * This converts a one block unindexed directory to a 3 block indexed
2204   * directory, and adds the dentry to the indexed directory.
2205   */
2206  static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname,
2207  			    struct inode *dir,
2208  			    struct inode *inode, struct buffer_head *bh)
2209  {
2210  	struct buffer_head *bh2;
2211  	struct dx_root	*root;
2212  	struct dx_frame	frames[EXT4_HTREE_LEVEL], *frame;
2213  	struct dx_entry *entries;
2214  	struct ext4_dir_entry_2	*de, *de2;
2215  	char		*data2, *top;
2216  	unsigned	len;
2217  	int		retval;
2218  	unsigned	blocksize;
2219  	ext4_lblk_t  block;
2220  	struct fake_dirent *fde;
2221  	int csum_size = 0;
2222  
2223  	if (ext4_has_metadata_csum(inode->i_sb))
2224  		csum_size = sizeof(struct ext4_dir_entry_tail);
2225  
2226  	blocksize =  dir->i_sb->s_blocksize;
2227  	dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
2228  	BUFFER_TRACE(bh, "get_write_access");
2229  	retval = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2230  					       EXT4_JTR_NONE);
2231  	if (retval) {
2232  		ext4_std_error(dir->i_sb, retval);
2233  		brelse(bh);
2234  		return retval;
2235  	}
2236  	root = (struct dx_root *) bh->b_data;
2237  
2238  	/* The 0th block becomes the root, move the dirents out */
2239  	fde = &root->dotdot;
2240  	de = (struct ext4_dir_entry_2 *)((char *)fde +
2241  		ext4_rec_len_from_disk(fde->rec_len, blocksize));
2242  	if ((char *) de >= (((char *) root) + blocksize)) {
2243  		EXT4_ERROR_INODE(dir, "invalid rec_len for '..'");
2244  		brelse(bh);
2245  		return -EFSCORRUPTED;
2246  	}
2247  	len = ((char *) root) + (blocksize - csum_size) - (char *) de;
2248  
2249  	/* Allocate new block for the 0th block's dirents */
2250  	bh2 = ext4_append(handle, dir, &block);
2251  	if (IS_ERR(bh2)) {
2252  		brelse(bh);
2253  		return PTR_ERR(bh2);
2254  	}
2255  	ext4_set_inode_flag(dir, EXT4_INODE_INDEX);
2256  	data2 = bh2->b_data;
2257  
2258  	memcpy(data2, de, len);
2259  	memset(de, 0, len); /* wipe old data */
2260  	de = (struct ext4_dir_entry_2 *) data2;
2261  	top = data2 + len;
2262  	while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) {
2263  		if (ext4_check_dir_entry(dir, NULL, de, bh2, data2, len,
2264  					 (data2 + (blocksize - csum_size) -
2265  					  (char *) de))) {
2266  			brelse(bh2);
2267  			brelse(bh);
2268  			return -EFSCORRUPTED;
2269  		}
2270  		de = de2;
2271  	}
2272  	de->rec_len = ext4_rec_len_to_disk(data2 + (blocksize - csum_size) -
2273  					   (char *) de, blocksize);
2274  
2275  	if (csum_size)
2276  		ext4_initialize_dirent_tail(bh2, blocksize);
2277  
2278  	/* Initialize the root; the dot dirents already exist */
2279  	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
2280  	de->rec_len = ext4_rec_len_to_disk(
2281  			blocksize - ext4_dir_rec_len(2, NULL), blocksize);
2282  	memset (&root->info, 0, sizeof(root->info));
2283  	root->info.info_length = sizeof(root->info);
2284  	if (ext4_hash_in_dirent(dir))
2285  		root->info.hash_version = DX_HASH_SIPHASH;
2286  	else
2287  		root->info.hash_version =
2288  				EXT4_SB(dir->i_sb)->s_def_hash_version;
2289  
2290  	entries = root->entries;
2291  	dx_set_block(entries, 1);
2292  	dx_set_count(entries, 1);
2293  	dx_set_limit(entries, dx_root_limit(dir, sizeof(root->info)));
2294  
2295  	/* Initialize as for dx_probe */
2296  	fname->hinfo.hash_version = root->info.hash_version;
2297  	if (fname->hinfo.hash_version <= DX_HASH_TEA)
2298  		fname->hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
2299  	fname->hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
2300  
2301  	/* casefolded encrypted hashes are computed on fname setup */
2302  	if (!ext4_hash_in_dirent(dir))
2303  		ext4fs_dirhash(dir, fname_name(fname),
2304  				fname_len(fname), &fname->hinfo);
2305  
2306  	memset(frames, 0, sizeof(frames));
2307  	frame = frames;
2308  	frame->entries = entries;
2309  	frame->at = entries;
2310  	frame->bh = bh;
2311  
2312  	retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2313  	if (retval)
2314  		goto out_frames;
2315  	retval = ext4_handle_dirty_dirblock(handle, dir, bh2);
2316  	if (retval)
2317  		goto out_frames;
2318  
2319  	de = do_split(handle,dir, &bh2, frame, &fname->hinfo);
2320  	if (IS_ERR(de)) {
2321  		retval = PTR_ERR(de);
2322  		goto out_frames;
2323  	}
2324  
2325  	retval = add_dirent_to_buf(handle, fname, dir, inode, de, bh2);
2326  out_frames:
2327  	/*
2328  	 * Even if the block split failed, we have to properly write
2329  	 * out all the changes we did so far. Otherwise we can end up
2330  	 * with corrupted filesystem.
2331  	 */
2332  	if (retval)
2333  		ext4_mark_inode_dirty(handle, dir);
2334  	dx_release(frames);
2335  	brelse(bh2);
2336  	return retval;
2337  }
2338  
2339  /*
2340   *	ext4_add_entry()
2341   *
2342   * adds a file entry to the specified directory, using the same
2343   * semantics as ext4_find_entry(). It returns NULL if it failed.
2344   *
2345   * NOTE!! The inode part of 'de' is left at 0 - which means you
2346   * may not sleep between calling this and putting something into
2347   * the entry, as someone else might have used it while you slept.
2348   */
2349  static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
2350  			  struct inode *inode)
2351  {
2352  	struct inode *dir = d_inode(dentry->d_parent);
2353  	struct buffer_head *bh = NULL;
2354  	struct ext4_dir_entry_2 *de;
2355  	struct super_block *sb;
2356  	struct ext4_filename fname;
2357  	int	retval;
2358  	int	dx_fallback=0;
2359  	unsigned blocksize;
2360  	ext4_lblk_t block, blocks;
2361  	int	csum_size = 0;
2362  
2363  	if (ext4_has_metadata_csum(inode->i_sb))
2364  		csum_size = sizeof(struct ext4_dir_entry_tail);
2365  
2366  	sb = dir->i_sb;
2367  	blocksize = sb->s_blocksize;
2368  	if (!dentry->d_name.len)
2369  		return -EINVAL;
2370  
2371  	if (fscrypt_is_nokey_name(dentry))
2372  		return -ENOKEY;
2373  
2374  #if IS_ENABLED(CONFIG_UNICODE)
2375  	if (sb_has_strict_encoding(sb) && IS_CASEFOLDED(dir) &&
2376  	    sb->s_encoding && utf8_validate(sb->s_encoding, &dentry->d_name))
2377  		return -EINVAL;
2378  #endif
2379  
2380  	retval = ext4_fname_setup_filename(dir, &dentry->d_name, 0, &fname);
2381  	if (retval)
2382  		return retval;
2383  
2384  	if (ext4_has_inline_data(dir)) {
2385  		retval = ext4_try_add_inline_entry(handle, &fname, dir, inode);
2386  		if (retval < 0)
2387  			goto out;
2388  		if (retval == 1) {
2389  			retval = 0;
2390  			goto out;
2391  		}
2392  	}
2393  
2394  	if (is_dx(dir)) {
2395  		retval = ext4_dx_add_entry(handle, &fname, dir, inode);
2396  		if (!retval || (retval != ERR_BAD_DX_DIR))
2397  			goto out;
2398  		/* Can we just ignore htree data? */
2399  		if (ext4_has_metadata_csum(sb)) {
2400  			EXT4_ERROR_INODE(dir,
2401  				"Directory has corrupted htree index.");
2402  			retval = -EFSCORRUPTED;
2403  			goto out;
2404  		}
2405  		ext4_clear_inode_flag(dir, EXT4_INODE_INDEX);
2406  		dx_fallback++;
2407  		retval = ext4_mark_inode_dirty(handle, dir);
2408  		if (unlikely(retval))
2409  			goto out;
2410  	}
2411  	blocks = dir->i_size >> sb->s_blocksize_bits;
2412  	for (block = 0; block < blocks; block++) {
2413  		bh = ext4_read_dirblock(dir, block, DIRENT);
2414  		if (bh == NULL) {
2415  			bh = ext4_bread(handle, dir, block,
2416  					EXT4_GET_BLOCKS_CREATE);
2417  			goto add_to_new_block;
2418  		}
2419  		if (IS_ERR(bh)) {
2420  			retval = PTR_ERR(bh);
2421  			bh = NULL;
2422  			goto out;
2423  		}
2424  		retval = add_dirent_to_buf(handle, &fname, dir, inode,
2425  					   NULL, bh);
2426  		if (retval != -ENOSPC)
2427  			goto out;
2428  
2429  		if (blocks == 1 && !dx_fallback &&
2430  		    ext4_has_feature_dir_index(sb)) {
2431  			retval = make_indexed_dir(handle, &fname, dir,
2432  						  inode, bh);
2433  			bh = NULL; /* make_indexed_dir releases bh */
2434  			goto out;
2435  		}
2436  		brelse(bh);
2437  	}
2438  	bh = ext4_append(handle, dir, &block);
2439  add_to_new_block:
2440  	if (IS_ERR(bh)) {
2441  		retval = PTR_ERR(bh);
2442  		bh = NULL;
2443  		goto out;
2444  	}
2445  	de = (struct ext4_dir_entry_2 *) bh->b_data;
2446  	de->inode = 0;
2447  	de->rec_len = ext4_rec_len_to_disk(blocksize - csum_size, blocksize);
2448  
2449  	if (csum_size)
2450  		ext4_initialize_dirent_tail(bh, blocksize);
2451  
2452  	retval = add_dirent_to_buf(handle, &fname, dir, inode, de, bh);
2453  out:
2454  	ext4_fname_free_filename(&fname);
2455  	brelse(bh);
2456  	if (retval == 0)
2457  		ext4_set_inode_state(inode, EXT4_STATE_NEWENTRY);
2458  	return retval;
2459  }
2460  
2461  /*
2462   * Returns 0 for success, or a negative error value
2463   */
2464  static int ext4_dx_add_entry(handle_t *handle, struct ext4_filename *fname,
2465  			     struct inode *dir, struct inode *inode)
2466  {
2467  	struct dx_frame frames[EXT4_HTREE_LEVEL], *frame;
2468  	struct dx_entry *entries, *at;
2469  	struct buffer_head *bh;
2470  	struct super_block *sb = dir->i_sb;
2471  	struct ext4_dir_entry_2 *de;
2472  	int restart;
2473  	int err;
2474  
2475  again:
2476  	restart = 0;
2477  	frame = dx_probe(fname, dir, NULL, frames);
2478  	if (IS_ERR(frame))
2479  		return PTR_ERR(frame);
2480  	entries = frame->entries;
2481  	at = frame->at;
2482  	bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT_HTREE);
2483  	if (IS_ERR(bh)) {
2484  		err = PTR_ERR(bh);
2485  		bh = NULL;
2486  		goto cleanup;
2487  	}
2488  
2489  	BUFFER_TRACE(bh, "get_write_access");
2490  	err = ext4_journal_get_write_access(handle, sb, bh, EXT4_JTR_NONE);
2491  	if (err)
2492  		goto journal_error;
2493  
2494  	err = add_dirent_to_buf(handle, fname, dir, inode, NULL, bh);
2495  	if (err != -ENOSPC)
2496  		goto cleanup;
2497  
2498  	err = 0;
2499  	/* Block full, should compress but for now just split */
2500  	dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
2501  		       dx_get_count(entries), dx_get_limit(entries)));
2502  	/* Need to split index? */
2503  	if (dx_get_count(entries) == dx_get_limit(entries)) {
2504  		ext4_lblk_t newblock;
2505  		int levels = frame - frames + 1;
2506  		unsigned int icount;
2507  		int add_level = 1;
2508  		struct dx_entry *entries2;
2509  		struct dx_node *node2;
2510  		struct buffer_head *bh2;
2511  
2512  		while (frame > frames) {
2513  			if (dx_get_count((frame - 1)->entries) <
2514  			    dx_get_limit((frame - 1)->entries)) {
2515  				add_level = 0;
2516  				break;
2517  			}
2518  			frame--; /* split higher index block */
2519  			at = frame->at;
2520  			entries = frame->entries;
2521  			restart = 1;
2522  		}
2523  		if (add_level && levels == ext4_dir_htree_level(sb)) {
2524  			ext4_warning(sb, "Directory (ino: %lu) index full, "
2525  					 "reach max htree level :%d",
2526  					 dir->i_ino, levels);
2527  			if (ext4_dir_htree_level(sb) < EXT4_HTREE_LEVEL) {
2528  				ext4_warning(sb, "Large directory feature is "
2529  						 "not enabled on this "
2530  						 "filesystem");
2531  			}
2532  			err = -ENOSPC;
2533  			goto cleanup;
2534  		}
2535  		icount = dx_get_count(entries);
2536  		bh2 = ext4_append(handle, dir, &newblock);
2537  		if (IS_ERR(bh2)) {
2538  			err = PTR_ERR(bh2);
2539  			goto cleanup;
2540  		}
2541  		node2 = (struct dx_node *)(bh2->b_data);
2542  		entries2 = node2->entries;
2543  		memset(&node2->fake, 0, sizeof(struct fake_dirent));
2544  		node2->fake.rec_len = ext4_rec_len_to_disk(sb->s_blocksize,
2545  							   sb->s_blocksize);
2546  		BUFFER_TRACE(frame->bh, "get_write_access");
2547  		err = ext4_journal_get_write_access(handle, sb, frame->bh,
2548  						    EXT4_JTR_NONE);
2549  		if (err)
2550  			goto journal_error;
2551  		if (!add_level) {
2552  			unsigned icount1 = icount/2, icount2 = icount - icount1;
2553  			unsigned hash2 = dx_get_hash(entries + icount1);
2554  			dxtrace(printk(KERN_DEBUG "Split index %i/%i\n",
2555  				       icount1, icount2));
2556  
2557  			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
2558  			err = ext4_journal_get_write_access(handle, sb,
2559  							    (frame - 1)->bh,
2560  							    EXT4_JTR_NONE);
2561  			if (err)
2562  				goto journal_error;
2563  
2564  			memcpy((char *) entries2, (char *) (entries + icount1),
2565  			       icount2 * sizeof(struct dx_entry));
2566  			dx_set_count(entries, icount1);
2567  			dx_set_count(entries2, icount2);
2568  			dx_set_limit(entries2, dx_node_limit(dir));
2569  
2570  			/* Which index block gets the new entry? */
2571  			if (at - entries >= icount1) {
2572  				frame->at = at - entries - icount1 + entries2;
2573  				frame->entries = entries = entries2;
2574  				swap(frame->bh, bh2);
2575  			}
2576  			dx_insert_block((frame - 1), hash2, newblock);
2577  			dxtrace(dx_show_index("node", frame->entries));
2578  			dxtrace(dx_show_index("node",
2579  			       ((struct dx_node *) bh2->b_data)->entries));
2580  			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2581  			if (err)
2582  				goto journal_error;
2583  			brelse (bh2);
2584  			err = ext4_handle_dirty_dx_node(handle, dir,
2585  						   (frame - 1)->bh);
2586  			if (err)
2587  				goto journal_error;
2588  			err = ext4_handle_dirty_dx_node(handle, dir,
2589  							frame->bh);
2590  			if (restart || err)
2591  				goto journal_error;
2592  		} else {
2593  			struct dx_root *dxroot;
2594  			memcpy((char *) entries2, (char *) entries,
2595  			       icount * sizeof(struct dx_entry));
2596  			dx_set_limit(entries2, dx_node_limit(dir));
2597  
2598  			/* Set up root */
2599  			dx_set_count(entries, 1);
2600  			dx_set_block(entries + 0, newblock);
2601  			dxroot = (struct dx_root *)frames[0].bh->b_data;
2602  			dxroot->info.indirect_levels += 1;
2603  			dxtrace(printk(KERN_DEBUG
2604  				       "Creating %d level index...\n",
2605  				       dxroot->info.indirect_levels));
2606  			err = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
2607  			if (err)
2608  				goto journal_error;
2609  			err = ext4_handle_dirty_dx_node(handle, dir, bh2);
2610  			brelse(bh2);
2611  			restart = 1;
2612  			goto journal_error;
2613  		}
2614  	}
2615  	de = do_split(handle, dir, &bh, frame, &fname->hinfo);
2616  	if (IS_ERR(de)) {
2617  		err = PTR_ERR(de);
2618  		goto cleanup;
2619  	}
2620  	err = add_dirent_to_buf(handle, fname, dir, inode, de, bh);
2621  	goto cleanup;
2622  
2623  journal_error:
2624  	ext4_std_error(dir->i_sb, err); /* this is a no-op if err == 0 */
2625  cleanup:
2626  	brelse(bh);
2627  	dx_release(frames);
2628  	/* @restart is true means htree-path has been changed, we need to
2629  	 * repeat dx_probe() to find out valid htree-path
2630  	 */
2631  	if (restart && err == 0)
2632  		goto again;
2633  	return err;
2634  }
2635  
2636  /*
2637   * ext4_generic_delete_entry deletes a directory entry by merging it
2638   * with the previous entry
2639   */
2640  int ext4_generic_delete_entry(struct inode *dir,
2641  			      struct ext4_dir_entry_2 *de_del,
2642  			      struct buffer_head *bh,
2643  			      void *entry_buf,
2644  			      int buf_size,
2645  			      int csum_size)
2646  {
2647  	struct ext4_dir_entry_2 *de, *pde;
2648  	unsigned int blocksize = dir->i_sb->s_blocksize;
2649  	int i;
2650  
2651  	i = 0;
2652  	pde = NULL;
2653  	de = entry_buf;
2654  	while (i < buf_size - csum_size) {
2655  		if (ext4_check_dir_entry(dir, NULL, de, bh,
2656  					 entry_buf, buf_size, i))
2657  			return -EFSCORRUPTED;
2658  		if (de == de_del)  {
2659  			if (pde) {
2660  				pde->rec_len = ext4_rec_len_to_disk(
2661  					ext4_rec_len_from_disk(pde->rec_len,
2662  							       blocksize) +
2663  					ext4_rec_len_from_disk(de->rec_len,
2664  							       blocksize),
2665  					blocksize);
2666  
2667  				/* wipe entire dir_entry */
2668  				memset(de, 0, ext4_rec_len_from_disk(de->rec_len,
2669  								blocksize));
2670  			} else {
2671  				/* wipe dir_entry excluding the rec_len field */
2672  				de->inode = 0;
2673  				memset(&de->name_len, 0,
2674  					ext4_rec_len_from_disk(de->rec_len,
2675  								blocksize) -
2676  					offsetof(struct ext4_dir_entry_2,
2677  								name_len));
2678  			}
2679  
2680  			inode_inc_iversion(dir);
2681  			return 0;
2682  		}
2683  		i += ext4_rec_len_from_disk(de->rec_len, blocksize);
2684  		pde = de;
2685  		de = ext4_next_entry(de, blocksize);
2686  	}
2687  	return -ENOENT;
2688  }
2689  
2690  static int ext4_delete_entry(handle_t *handle,
2691  			     struct inode *dir,
2692  			     struct ext4_dir_entry_2 *de_del,
2693  			     struct buffer_head *bh)
2694  {
2695  	int err, csum_size = 0;
2696  
2697  	if (ext4_has_inline_data(dir)) {
2698  		int has_inline_data = 1;
2699  		err = ext4_delete_inline_entry(handle, dir, de_del, bh,
2700  					       &has_inline_data);
2701  		if (has_inline_data)
2702  			return err;
2703  	}
2704  
2705  	if (ext4_has_metadata_csum(dir->i_sb))
2706  		csum_size = sizeof(struct ext4_dir_entry_tail);
2707  
2708  	BUFFER_TRACE(bh, "get_write_access");
2709  	err = ext4_journal_get_write_access(handle, dir->i_sb, bh,
2710  					    EXT4_JTR_NONE);
2711  	if (unlikely(err))
2712  		goto out;
2713  
2714  	err = ext4_generic_delete_entry(dir, de_del, bh, bh->b_data,
2715  					dir->i_sb->s_blocksize, csum_size);
2716  	if (err)
2717  		goto out;
2718  
2719  	BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
2720  	err = ext4_handle_dirty_dirblock(handle, dir, bh);
2721  	if (unlikely(err))
2722  		goto out;
2723  
2724  	return 0;
2725  out:
2726  	if (err != -ENOENT)
2727  		ext4_std_error(dir->i_sb, err);
2728  	return err;
2729  }
2730  
2731  /*
2732   * Set directory link count to 1 if nlinks > EXT4_LINK_MAX, or if nlinks == 2
2733   * since this indicates that nlinks count was previously 1 to avoid overflowing
2734   * the 16-bit i_links_count field on disk.  Directories with i_nlink == 1 mean
2735   * that subdirectory link counts are not being maintained accurately.
2736   *
2737   * The caller has already checked for i_nlink overflow in case the DIR_LINK
2738   * feature is not enabled and returned -EMLINK.  The is_dx() check is a proxy
2739   * for checking S_ISDIR(inode) (since the INODE_INDEX feature will not be set
2740   * on regular files) and to avoid creating huge/slow non-HTREE directories.
2741   */
2742  static void ext4_inc_count(struct inode *inode)
2743  {
2744  	inc_nlink(inode);
2745  	if (is_dx(inode) &&
2746  	    (inode->i_nlink > EXT4_LINK_MAX || inode->i_nlink == 2))
2747  		set_nlink(inode, 1);
2748  }
2749  
2750  /*
2751   * If a directory had nlink == 1, then we should let it be 1. This indicates
2752   * directory has >EXT4_LINK_MAX subdirs.
2753   */
2754  static void ext4_dec_count(struct inode *inode)
2755  {
2756  	if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2)
2757  		drop_nlink(inode);
2758  }
2759  
2760  
2761  /*
2762   * Add non-directory inode to a directory. On success, the inode reference is
2763   * consumed by dentry is instantiation. This is also indicated by clearing of
2764   * *inodep pointer. On failure, the caller is responsible for dropping the
2765   * inode reference in the safe context.
2766   */
2767  static int ext4_add_nondir(handle_t *handle,
2768  		struct dentry *dentry, struct inode **inodep)
2769  {
2770  	struct inode *dir = d_inode(dentry->d_parent);
2771  	struct inode *inode = *inodep;
2772  	int err = ext4_add_entry(handle, dentry, inode);
2773  	if (!err) {
2774  		err = ext4_mark_inode_dirty(handle, inode);
2775  		if (IS_DIRSYNC(dir))
2776  			ext4_handle_sync(handle);
2777  		d_instantiate_new(dentry, inode);
2778  		*inodep = NULL;
2779  		return err;
2780  	}
2781  	drop_nlink(inode);
2782  	ext4_orphan_add(handle, inode);
2783  	unlock_new_inode(inode);
2784  	return err;
2785  }
2786  
2787  /*
2788   * By the time this is called, we already have created
2789   * the directory cache entry for the new file, but it
2790   * is so far negative - it has no inode.
2791   *
2792   * If the create succeeds, we fill in the inode information
2793   * with d_instantiate().
2794   */
2795  static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
2796  		       struct dentry *dentry, umode_t mode, bool excl)
2797  {
2798  	handle_t *handle;
2799  	struct inode *inode;
2800  	int err, credits, retries = 0;
2801  
2802  	err = dquot_initialize(dir);
2803  	if (err)
2804  		return err;
2805  
2806  	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2807  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2808  retry:
2809  	inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
2810  					    0, NULL, EXT4_HT_DIR, credits);
2811  	handle = ext4_journal_current_handle();
2812  	err = PTR_ERR(inode);
2813  	if (!IS_ERR(inode)) {
2814  		inode->i_op = &ext4_file_inode_operations;
2815  		inode->i_fop = &ext4_file_operations;
2816  		ext4_set_aops(inode);
2817  		err = ext4_add_nondir(handle, dentry, &inode);
2818  		if (!err)
2819  			ext4_fc_track_create(handle, dentry);
2820  	}
2821  	if (handle)
2822  		ext4_journal_stop(handle);
2823  	if (!IS_ERR_OR_NULL(inode))
2824  		iput(inode);
2825  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2826  		goto retry;
2827  	return err;
2828  }
2829  
2830  static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
2831  		      struct dentry *dentry, umode_t mode, dev_t rdev)
2832  {
2833  	handle_t *handle;
2834  	struct inode *inode;
2835  	int err, credits, retries = 0;
2836  
2837  	err = dquot_initialize(dir);
2838  	if (err)
2839  		return err;
2840  
2841  	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2842  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2843  retry:
2844  	inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
2845  					    0, NULL, EXT4_HT_DIR, credits);
2846  	handle = ext4_journal_current_handle();
2847  	err = PTR_ERR(inode);
2848  	if (!IS_ERR(inode)) {
2849  		init_special_inode(inode, inode->i_mode, rdev);
2850  		inode->i_op = &ext4_special_inode_operations;
2851  		err = ext4_add_nondir(handle, dentry, &inode);
2852  		if (!err)
2853  			ext4_fc_track_create(handle, dentry);
2854  	}
2855  	if (handle)
2856  		ext4_journal_stop(handle);
2857  	if (!IS_ERR_OR_NULL(inode))
2858  		iput(inode);
2859  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2860  		goto retry;
2861  	return err;
2862  }
2863  
2864  static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
2865  			struct file *file, umode_t mode)
2866  {
2867  	handle_t *handle;
2868  	struct inode *inode;
2869  	int err, retries = 0;
2870  
2871  	err = dquot_initialize(dir);
2872  	if (err)
2873  		return err;
2874  
2875  retry:
2876  	inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
2877  					    NULL, 0, NULL,
2878  					    EXT4_HT_DIR,
2879  			EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
2880  			  4 + EXT4_XATTR_TRANS_BLOCKS);
2881  	handle = ext4_journal_current_handle();
2882  	err = PTR_ERR(inode);
2883  	if (!IS_ERR(inode)) {
2884  		inode->i_op = &ext4_file_inode_operations;
2885  		inode->i_fop = &ext4_file_operations;
2886  		ext4_set_aops(inode);
2887  		d_tmpfile(file, inode);
2888  		err = ext4_orphan_add(handle, inode);
2889  		if (err)
2890  			goto err_unlock_inode;
2891  		mark_inode_dirty(inode);
2892  		unlock_new_inode(inode);
2893  	}
2894  	if (handle)
2895  		ext4_journal_stop(handle);
2896  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
2897  		goto retry;
2898  	return finish_open_simple(file, err);
2899  err_unlock_inode:
2900  	ext4_journal_stop(handle);
2901  	unlock_new_inode(inode);
2902  	return err;
2903  }
2904  
2905  struct ext4_dir_entry_2 *ext4_init_dot_dotdot(struct inode *inode,
2906  			  struct ext4_dir_entry_2 *de,
2907  			  int blocksize, int csum_size,
2908  			  unsigned int parent_ino, int dotdot_real_len)
2909  {
2910  	de->inode = cpu_to_le32(inode->i_ino);
2911  	de->name_len = 1;
2912  	de->rec_len = ext4_rec_len_to_disk(ext4_dir_rec_len(de->name_len, NULL),
2913  					   blocksize);
2914  	strcpy(de->name, ".");
2915  	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2916  
2917  	de = ext4_next_entry(de, blocksize);
2918  	de->inode = cpu_to_le32(parent_ino);
2919  	de->name_len = 2;
2920  	if (!dotdot_real_len)
2921  		de->rec_len = ext4_rec_len_to_disk(blocksize -
2922  					(csum_size + ext4_dir_rec_len(1, NULL)),
2923  					blocksize);
2924  	else
2925  		de->rec_len = ext4_rec_len_to_disk(
2926  					ext4_dir_rec_len(de->name_len, NULL),
2927  					blocksize);
2928  	strcpy(de->name, "..");
2929  	ext4_set_de_type(inode->i_sb, de, S_IFDIR);
2930  
2931  	return ext4_next_entry(de, blocksize);
2932  }
2933  
2934  int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2935  			     struct inode *inode)
2936  {
2937  	struct buffer_head *dir_block = NULL;
2938  	struct ext4_dir_entry_2 *de;
2939  	ext4_lblk_t block = 0;
2940  	unsigned int blocksize = dir->i_sb->s_blocksize;
2941  	int csum_size = 0;
2942  	int err;
2943  
2944  	if (ext4_has_metadata_csum(dir->i_sb))
2945  		csum_size = sizeof(struct ext4_dir_entry_tail);
2946  
2947  	if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
2948  		err = ext4_try_create_inline_dir(handle, dir, inode);
2949  		if (err < 0 && err != -ENOSPC)
2950  			goto out;
2951  		if (!err)
2952  			goto out;
2953  	}
2954  
2955  	inode->i_size = 0;
2956  	dir_block = ext4_append(handle, inode, &block);
2957  	if (IS_ERR(dir_block))
2958  		return PTR_ERR(dir_block);
2959  	de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2960  	ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2961  	set_nlink(inode, 2);
2962  	if (csum_size)
2963  		ext4_initialize_dirent_tail(dir_block, blocksize);
2964  
2965  	BUFFER_TRACE(dir_block, "call ext4_handle_dirty_metadata");
2966  	err = ext4_handle_dirty_dirblock(handle, inode, dir_block);
2967  	if (err)
2968  		goto out;
2969  	set_buffer_verified(dir_block);
2970  out:
2971  	brelse(dir_block);
2972  	return err;
2973  }
2974  
2975  static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
2976  		      struct dentry *dentry, umode_t mode)
2977  {
2978  	handle_t *handle;
2979  	struct inode *inode;
2980  	int err, err2 = 0, credits, retries = 0;
2981  
2982  	if (EXT4_DIR_LINK_MAX(dir))
2983  		return -EMLINK;
2984  
2985  	err = dquot_initialize(dir);
2986  	if (err)
2987  		return err;
2988  
2989  	credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2990  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
2991  retry:
2992  	inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
2993  					    &dentry->d_name,
2994  					    0, NULL, EXT4_HT_DIR, credits);
2995  	handle = ext4_journal_current_handle();
2996  	err = PTR_ERR(inode);
2997  	if (IS_ERR(inode))
2998  		goto out_stop;
2999  
3000  	inode->i_op = &ext4_dir_inode_operations;
3001  	inode->i_fop = &ext4_dir_operations;
3002  	err = ext4_init_new_dir(handle, dir, inode);
3003  	if (err)
3004  		goto out_clear_inode;
3005  	err = ext4_mark_inode_dirty(handle, inode);
3006  	if (!err)
3007  		err = ext4_add_entry(handle, dentry, inode);
3008  	if (err) {
3009  out_clear_inode:
3010  		clear_nlink(inode);
3011  		ext4_orphan_add(handle, inode);
3012  		unlock_new_inode(inode);
3013  		err2 = ext4_mark_inode_dirty(handle, inode);
3014  		if (unlikely(err2))
3015  			err = err2;
3016  		ext4_journal_stop(handle);
3017  		iput(inode);
3018  		goto out_retry;
3019  	}
3020  	ext4_inc_count(dir);
3021  
3022  	ext4_update_dx_flag(dir);
3023  	err = ext4_mark_inode_dirty(handle, dir);
3024  	if (err)
3025  		goto out_clear_inode;
3026  	d_instantiate_new(dentry, inode);
3027  	ext4_fc_track_create(handle, dentry);
3028  	if (IS_DIRSYNC(dir))
3029  		ext4_handle_sync(handle);
3030  
3031  out_stop:
3032  	if (handle)
3033  		ext4_journal_stop(handle);
3034  out_retry:
3035  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3036  		goto retry;
3037  	return err;
3038  }
3039  
3040  /*
3041   * routine to check that the specified directory is empty (for rmdir)
3042   */
3043  bool ext4_empty_dir(struct inode *inode)
3044  {
3045  	unsigned int offset;
3046  	struct buffer_head *bh;
3047  	struct ext4_dir_entry_2 *de;
3048  	struct super_block *sb;
3049  
3050  	if (ext4_has_inline_data(inode)) {
3051  		int has_inline_data = 1;
3052  		int ret;
3053  
3054  		ret = empty_inline_dir(inode, &has_inline_data);
3055  		if (has_inline_data)
3056  			return ret;
3057  	}
3058  
3059  	sb = inode->i_sb;
3060  	if (inode->i_size < ext4_dir_rec_len(1, NULL) +
3061  					ext4_dir_rec_len(2, NULL)) {
3062  		EXT4_ERROR_INODE(inode, "invalid size");
3063  		return false;
3064  	}
3065  	/* The first directory block must not be a hole,
3066  	 * so treat it as DIRENT_HTREE
3067  	 */
3068  	bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3069  	if (IS_ERR(bh))
3070  		return false;
3071  
3072  	de = (struct ext4_dir_entry_2 *) bh->b_data;
3073  	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3074  				 0) ||
3075  	    le32_to_cpu(de->inode) != inode->i_ino || strcmp(".", de->name)) {
3076  		ext4_warning_inode(inode, "directory missing '.'");
3077  		brelse(bh);
3078  		return false;
3079  	}
3080  	offset = ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3081  	de = ext4_next_entry(de, sb->s_blocksize);
3082  	if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data, bh->b_size,
3083  				 offset) ||
3084  	    le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3085  		ext4_warning_inode(inode, "directory missing '..'");
3086  		brelse(bh);
3087  		return false;
3088  	}
3089  	offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3090  	while (offset < inode->i_size) {
3091  		if (!(offset & (sb->s_blocksize - 1))) {
3092  			unsigned int lblock;
3093  			brelse(bh);
3094  			lblock = offset >> EXT4_BLOCK_SIZE_BITS(sb);
3095  			bh = ext4_read_dirblock(inode, lblock, EITHER);
3096  			if (bh == NULL) {
3097  				offset += sb->s_blocksize;
3098  				continue;
3099  			}
3100  			if (IS_ERR(bh))
3101  				return false;
3102  		}
3103  		de = (struct ext4_dir_entry_2 *) (bh->b_data +
3104  					(offset & (sb->s_blocksize - 1)));
3105  		if (ext4_check_dir_entry(inode, NULL, de, bh,
3106  					 bh->b_data, bh->b_size, offset) ||
3107  		    le32_to_cpu(de->inode)) {
3108  			brelse(bh);
3109  			return false;
3110  		}
3111  		offset += ext4_rec_len_from_disk(de->rec_len, sb->s_blocksize);
3112  	}
3113  	brelse(bh);
3114  	return true;
3115  }
3116  
3117  static int ext4_rmdir(struct inode *dir, struct dentry *dentry)
3118  {
3119  	int retval;
3120  	struct inode *inode;
3121  	struct buffer_head *bh;
3122  	struct ext4_dir_entry_2 *de;
3123  	handle_t *handle = NULL;
3124  
3125  	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3126  		return -EIO;
3127  
3128  	/* Initialize quotas before so that eventual writes go in
3129  	 * separate transaction */
3130  	retval = dquot_initialize(dir);
3131  	if (retval)
3132  		return retval;
3133  	retval = dquot_initialize(d_inode(dentry));
3134  	if (retval)
3135  		return retval;
3136  
3137  	retval = -ENOENT;
3138  	bh = ext4_find_entry(dir, &dentry->d_name, &de, NULL);
3139  	if (IS_ERR(bh))
3140  		return PTR_ERR(bh);
3141  	if (!bh)
3142  		goto end_rmdir;
3143  
3144  	inode = d_inode(dentry);
3145  
3146  	retval = -EFSCORRUPTED;
3147  	if (le32_to_cpu(de->inode) != inode->i_ino)
3148  		goto end_rmdir;
3149  
3150  	retval = -ENOTEMPTY;
3151  	if (!ext4_empty_dir(inode))
3152  		goto end_rmdir;
3153  
3154  	handle = ext4_journal_start(dir, EXT4_HT_DIR,
3155  				    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3156  	if (IS_ERR(handle)) {
3157  		retval = PTR_ERR(handle);
3158  		handle = NULL;
3159  		goto end_rmdir;
3160  	}
3161  
3162  	if (IS_DIRSYNC(dir))
3163  		ext4_handle_sync(handle);
3164  
3165  	retval = ext4_delete_entry(handle, dir, de, bh);
3166  	if (retval)
3167  		goto end_rmdir;
3168  	if (!EXT4_DIR_LINK_EMPTY(inode))
3169  		ext4_warning_inode(inode,
3170  			     "empty directory '%.*s' has too many links (%u)",
3171  			     dentry->d_name.len, dentry->d_name.name,
3172  			     inode->i_nlink);
3173  	inode_inc_iversion(inode);
3174  	clear_nlink(inode);
3175  	/* There's no need to set i_disksize: the fact that i_nlink is
3176  	 * zero will ensure that the right thing happens during any
3177  	 * recovery. */
3178  	inode->i_size = 0;
3179  	ext4_orphan_add(handle, inode);
3180  	inode->i_ctime = dir->i_ctime = dir->i_mtime = current_time(inode);
3181  	retval = ext4_mark_inode_dirty(handle, inode);
3182  	if (retval)
3183  		goto end_rmdir;
3184  	ext4_dec_count(dir);
3185  	ext4_update_dx_flag(dir);
3186  	ext4_fc_track_unlink(handle, dentry);
3187  	retval = ext4_mark_inode_dirty(handle, dir);
3188  
3189  #if IS_ENABLED(CONFIG_UNICODE)
3190  	/* VFS negative dentries are incompatible with Encoding and
3191  	 * Case-insensitiveness. Eventually we'll want avoid
3192  	 * invalidating the dentries here, alongside with returning the
3193  	 * negative dentries at ext4_lookup(), when it is better
3194  	 * supported by the VFS for the CI case.
3195  	 */
3196  	if (IS_CASEFOLDED(dir))
3197  		d_invalidate(dentry);
3198  #endif
3199  
3200  end_rmdir:
3201  	brelse(bh);
3202  	if (handle)
3203  		ext4_journal_stop(handle);
3204  	return retval;
3205  }
3206  
3207  int __ext4_unlink(struct inode *dir, const struct qstr *d_name,
3208  		  struct inode *inode,
3209  		  struct dentry *dentry /* NULL during fast_commit recovery */)
3210  {
3211  	int retval = -ENOENT;
3212  	struct buffer_head *bh;
3213  	struct ext4_dir_entry_2 *de;
3214  	handle_t *handle;
3215  	int skip_remove_dentry = 0;
3216  
3217  	/*
3218  	 * Keep this outside the transaction; it may have to set up the
3219  	 * directory's encryption key, which isn't GFP_NOFS-safe.
3220  	 */
3221  	bh = ext4_find_entry(dir, d_name, &de, NULL);
3222  	if (IS_ERR(bh))
3223  		return PTR_ERR(bh);
3224  
3225  	if (!bh)
3226  		return -ENOENT;
3227  
3228  	if (le32_to_cpu(de->inode) != inode->i_ino) {
3229  		/*
3230  		 * It's okay if we find dont find dentry which matches
3231  		 * the inode. That's because it might have gotten
3232  		 * renamed to a different inode number
3233  		 */
3234  		if (EXT4_SB(inode->i_sb)->s_mount_state & EXT4_FC_REPLAY)
3235  			skip_remove_dentry = 1;
3236  		else
3237  			goto out_bh;
3238  	}
3239  
3240  	handle = ext4_journal_start(dir, EXT4_HT_DIR,
3241  				    EXT4_DATA_TRANS_BLOCKS(dir->i_sb));
3242  	if (IS_ERR(handle)) {
3243  		retval = PTR_ERR(handle);
3244  		goto out_bh;
3245  	}
3246  
3247  	if (IS_DIRSYNC(dir))
3248  		ext4_handle_sync(handle);
3249  
3250  	if (!skip_remove_dentry) {
3251  		retval = ext4_delete_entry(handle, dir, de, bh);
3252  		if (retval)
3253  			goto out_handle;
3254  		dir->i_ctime = dir->i_mtime = current_time(dir);
3255  		ext4_update_dx_flag(dir);
3256  		retval = ext4_mark_inode_dirty(handle, dir);
3257  		if (retval)
3258  			goto out_handle;
3259  	} else {
3260  		retval = 0;
3261  	}
3262  	if (inode->i_nlink == 0)
3263  		ext4_warning_inode(inode, "Deleting file '%.*s' with no links",
3264  				   d_name->len, d_name->name);
3265  	else
3266  		drop_nlink(inode);
3267  	if (!inode->i_nlink)
3268  		ext4_orphan_add(handle, inode);
3269  	inode->i_ctime = current_time(inode);
3270  	retval = ext4_mark_inode_dirty(handle, inode);
3271  	if (dentry && !retval)
3272  		ext4_fc_track_unlink(handle, dentry);
3273  out_handle:
3274  	ext4_journal_stop(handle);
3275  out_bh:
3276  	brelse(bh);
3277  	return retval;
3278  }
3279  
3280  static int ext4_unlink(struct inode *dir, struct dentry *dentry)
3281  {
3282  	int retval;
3283  
3284  	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3285  		return -EIO;
3286  
3287  	trace_ext4_unlink_enter(dir, dentry);
3288  	/*
3289  	 * Initialize quotas before so that eventual writes go
3290  	 * in separate transaction
3291  	 */
3292  	retval = dquot_initialize(dir);
3293  	if (retval)
3294  		goto out_trace;
3295  	retval = dquot_initialize(d_inode(dentry));
3296  	if (retval)
3297  		goto out_trace;
3298  
3299  	retval = __ext4_unlink(dir, &dentry->d_name, d_inode(dentry), dentry);
3300  #if IS_ENABLED(CONFIG_UNICODE)
3301  	/* VFS negative dentries are incompatible with Encoding and
3302  	 * Case-insensitiveness. Eventually we'll want avoid
3303  	 * invalidating the dentries here, alongside with returning the
3304  	 * negative dentries at ext4_lookup(), when it is  better
3305  	 * supported by the VFS for the CI case.
3306  	 */
3307  	if (IS_CASEFOLDED(dir))
3308  		d_invalidate(dentry);
3309  #endif
3310  
3311  out_trace:
3312  	trace_ext4_unlink_exit(dentry, retval);
3313  	return retval;
3314  }
3315  
3316  static int ext4_init_symlink_block(handle_t *handle, struct inode *inode,
3317  				   struct fscrypt_str *disk_link)
3318  {
3319  	struct buffer_head *bh;
3320  	char *kaddr;
3321  	int err = 0;
3322  
3323  	bh = ext4_bread(handle, inode, 0, EXT4_GET_BLOCKS_CREATE);
3324  	if (IS_ERR(bh))
3325  		return PTR_ERR(bh);
3326  
3327  	BUFFER_TRACE(bh, "get_write_access");
3328  	err = ext4_journal_get_write_access(handle, inode->i_sb, bh, EXT4_JTR_NONE);
3329  	if (err)
3330  		goto out;
3331  
3332  	kaddr = (char *)bh->b_data;
3333  	memcpy(kaddr, disk_link->name, disk_link->len);
3334  	inode->i_size = disk_link->len - 1;
3335  	EXT4_I(inode)->i_disksize = inode->i_size;
3336  	err = ext4_handle_dirty_metadata(handle, inode, bh);
3337  out:
3338  	brelse(bh);
3339  	return err;
3340  }
3341  
3342  static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
3343  			struct dentry *dentry, const char *symname)
3344  {
3345  	handle_t *handle;
3346  	struct inode *inode;
3347  	int err, len = strlen(symname);
3348  	int credits;
3349  	struct fscrypt_str disk_link;
3350  	int retries = 0;
3351  
3352  	if (unlikely(ext4_forced_shutdown(EXT4_SB(dir->i_sb))))
3353  		return -EIO;
3354  
3355  	err = fscrypt_prepare_symlink(dir, symname, len, dir->i_sb->s_blocksize,
3356  				      &disk_link);
3357  	if (err)
3358  		return err;
3359  
3360  	err = dquot_initialize(dir);
3361  	if (err)
3362  		return err;
3363  
3364  	/*
3365  	 * EXT4_INDEX_EXTRA_TRANS_BLOCKS for addition of entry into the
3366  	 * directory. +3 for inode, inode bitmap, group descriptor allocation.
3367  	 * EXT4_DATA_TRANS_BLOCKS for the data block allocation and
3368  	 * modification.
3369  	 */
3370  	credits = EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3371  		  EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
3372  retry:
3373  	inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
3374  					    &dentry->d_name, 0, NULL,
3375  					    EXT4_HT_DIR, credits);
3376  	handle = ext4_journal_current_handle();
3377  	if (IS_ERR(inode)) {
3378  		if (handle)
3379  			ext4_journal_stop(handle);
3380  		err = PTR_ERR(inode);
3381  		goto out_retry;
3382  	}
3383  
3384  	if (IS_ENCRYPTED(inode)) {
3385  		err = fscrypt_encrypt_symlink(inode, symname, len, &disk_link);
3386  		if (err)
3387  			goto err_drop_inode;
3388  		inode->i_op = &ext4_encrypted_symlink_inode_operations;
3389  	} else {
3390  		if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3391  			inode->i_op = &ext4_symlink_inode_operations;
3392  		} else {
3393  			inode->i_op = &ext4_fast_symlink_inode_operations;
3394  			inode->i_link = (char *)&EXT4_I(inode)->i_data;
3395  		}
3396  	}
3397  
3398  	if ((disk_link.len > EXT4_N_BLOCKS * 4)) {
3399  		/* alloc symlink block and fill it */
3400  		err = ext4_init_symlink_block(handle, inode, &disk_link);
3401  		if (err)
3402  			goto err_drop_inode;
3403  	} else {
3404  		/* clear the extent format for fast symlink */
3405  		ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
3406  		memcpy((char *)&EXT4_I(inode)->i_data, disk_link.name,
3407  		       disk_link.len);
3408  		inode->i_size = disk_link.len - 1;
3409  		EXT4_I(inode)->i_disksize = inode->i_size;
3410  	}
3411  	err = ext4_add_nondir(handle, dentry, &inode);
3412  	if (handle)
3413  		ext4_journal_stop(handle);
3414  	iput(inode);
3415  	goto out_retry;
3416  
3417  err_drop_inode:
3418  	clear_nlink(inode);
3419  	ext4_orphan_add(handle, inode);
3420  	unlock_new_inode(inode);
3421  	if (handle)
3422  		ext4_journal_stop(handle);
3423  	iput(inode);
3424  out_retry:
3425  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3426  		goto retry;
3427  	if (disk_link.name != (unsigned char *)symname)
3428  		kfree(disk_link.name);
3429  	return err;
3430  }
3431  
3432  int __ext4_link(struct inode *dir, struct inode *inode, struct dentry *dentry)
3433  {
3434  	handle_t *handle;
3435  	int err, retries = 0;
3436  retry:
3437  	handle = ext4_journal_start(dir, EXT4_HT_DIR,
3438  		(EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
3439  		 EXT4_INDEX_EXTRA_TRANS_BLOCKS) + 1);
3440  	if (IS_ERR(handle))
3441  		return PTR_ERR(handle);
3442  
3443  	if (IS_DIRSYNC(dir))
3444  		ext4_handle_sync(handle);
3445  
3446  	inode->i_ctime = current_time(inode);
3447  	ext4_inc_count(inode);
3448  	ihold(inode);
3449  
3450  	err = ext4_add_entry(handle, dentry, inode);
3451  	if (!err) {
3452  		err = ext4_mark_inode_dirty(handle, inode);
3453  		/* this can happen only for tmpfile being
3454  		 * linked the first time
3455  		 */
3456  		if (inode->i_nlink == 1)
3457  			ext4_orphan_del(handle, inode);
3458  		d_instantiate(dentry, inode);
3459  		ext4_fc_track_link(handle, dentry);
3460  	} else {
3461  		drop_nlink(inode);
3462  		iput(inode);
3463  	}
3464  	ext4_journal_stop(handle);
3465  	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
3466  		goto retry;
3467  	return err;
3468  }
3469  
3470  static int ext4_link(struct dentry *old_dentry,
3471  		     struct inode *dir, struct dentry *dentry)
3472  {
3473  	struct inode *inode = d_inode(old_dentry);
3474  	int err;
3475  
3476  	if (inode->i_nlink >= EXT4_LINK_MAX)
3477  		return -EMLINK;
3478  
3479  	err = fscrypt_prepare_link(old_dentry, dir, dentry);
3480  	if (err)
3481  		return err;
3482  
3483  	if ((ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT)) &&
3484  	    (!projid_eq(EXT4_I(dir)->i_projid,
3485  			EXT4_I(old_dentry->d_inode)->i_projid)))
3486  		return -EXDEV;
3487  
3488  	err = dquot_initialize(dir);
3489  	if (err)
3490  		return err;
3491  	return __ext4_link(dir, inode, dentry);
3492  }
3493  
3494  /*
3495   * Try to find buffer head where contains the parent block.
3496   * It should be the inode block if it is inlined or the 1st block
3497   * if it is a normal dir.
3498   */
3499  static struct buffer_head *ext4_get_first_dir_block(handle_t *handle,
3500  					struct inode *inode,
3501  					int *retval,
3502  					struct ext4_dir_entry_2 **parent_de,
3503  					int *inlined)
3504  {
3505  	struct buffer_head *bh;
3506  
3507  	if (!ext4_has_inline_data(inode)) {
3508  		struct ext4_dir_entry_2 *de;
3509  		unsigned int offset;
3510  
3511  		/* The first directory block must not be a hole, so
3512  		 * treat it as DIRENT_HTREE
3513  		 */
3514  		bh = ext4_read_dirblock(inode, 0, DIRENT_HTREE);
3515  		if (IS_ERR(bh)) {
3516  			*retval = PTR_ERR(bh);
3517  			return NULL;
3518  		}
3519  
3520  		de = (struct ext4_dir_entry_2 *) bh->b_data;
3521  		if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3522  					 bh->b_size, 0) ||
3523  		    le32_to_cpu(de->inode) != inode->i_ino ||
3524  		    strcmp(".", de->name)) {
3525  			EXT4_ERROR_INODE(inode, "directory missing '.'");
3526  			brelse(bh);
3527  			*retval = -EFSCORRUPTED;
3528  			return NULL;
3529  		}
3530  		offset = ext4_rec_len_from_disk(de->rec_len,
3531  						inode->i_sb->s_blocksize);
3532  		de = ext4_next_entry(de, inode->i_sb->s_blocksize);
3533  		if (ext4_check_dir_entry(inode, NULL, de, bh, bh->b_data,
3534  					 bh->b_size, offset) ||
3535  		    le32_to_cpu(de->inode) == 0 || strcmp("..", de->name)) {
3536  			EXT4_ERROR_INODE(inode, "directory missing '..'");
3537  			brelse(bh);
3538  			*retval = -EFSCORRUPTED;
3539  			return NULL;
3540  		}
3541  		*parent_de = de;
3542  
3543  		return bh;
3544  	}
3545  
3546  	*inlined = 1;
3547  	return ext4_get_first_inline_block(inode, parent_de, retval);
3548  }
3549  
3550  struct ext4_renament {
3551  	struct inode *dir;
3552  	struct dentry *dentry;
3553  	struct inode *inode;
3554  	bool is_dir;
3555  	int dir_nlink_delta;
3556  
3557  	/* entry for "dentry" */
3558  	struct buffer_head *bh;
3559  	struct ext4_dir_entry_2 *de;
3560  	int inlined;
3561  
3562  	/* entry for ".." in inode if it's a directory */
3563  	struct buffer_head *dir_bh;
3564  	struct ext4_dir_entry_2 *parent_de;
3565  	int dir_inlined;
3566  };
3567  
3568  static int ext4_rename_dir_prepare(handle_t *handle, struct ext4_renament *ent)
3569  {
3570  	int retval;
3571  
3572  	ent->dir_bh = ext4_get_first_dir_block(handle, ent->inode,
3573  					      &retval, &ent->parent_de,
3574  					      &ent->dir_inlined);
3575  	if (!ent->dir_bh)
3576  		return retval;
3577  	if (le32_to_cpu(ent->parent_de->inode) != ent->dir->i_ino)
3578  		return -EFSCORRUPTED;
3579  	BUFFER_TRACE(ent->dir_bh, "get_write_access");
3580  	return ext4_journal_get_write_access(handle, ent->dir->i_sb,
3581  					     ent->dir_bh, EXT4_JTR_NONE);
3582  }
3583  
3584  static int ext4_rename_dir_finish(handle_t *handle, struct ext4_renament *ent,
3585  				  unsigned dir_ino)
3586  {
3587  	int retval;
3588  
3589  	ent->parent_de->inode = cpu_to_le32(dir_ino);
3590  	BUFFER_TRACE(ent->dir_bh, "call ext4_handle_dirty_metadata");
3591  	if (!ent->dir_inlined) {
3592  		if (is_dx(ent->inode)) {
3593  			retval = ext4_handle_dirty_dx_node(handle,
3594  							   ent->inode,
3595  							   ent->dir_bh);
3596  		} else {
3597  			retval = ext4_handle_dirty_dirblock(handle, ent->inode,
3598  							    ent->dir_bh);
3599  		}
3600  	} else {
3601  		retval = ext4_mark_inode_dirty(handle, ent->inode);
3602  	}
3603  	if (retval) {
3604  		ext4_std_error(ent->dir->i_sb, retval);
3605  		return retval;
3606  	}
3607  	return 0;
3608  }
3609  
3610  static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
3611  		       unsigned ino, unsigned file_type)
3612  {
3613  	int retval, retval2;
3614  
3615  	BUFFER_TRACE(ent->bh, "get write access");
3616  	retval = ext4_journal_get_write_access(handle, ent->dir->i_sb, ent->bh,
3617  					       EXT4_JTR_NONE);
3618  	if (retval)
3619  		return retval;
3620  	ent->de->inode = cpu_to_le32(ino);
3621  	if (ext4_has_feature_filetype(ent->dir->i_sb))
3622  		ent->de->file_type = file_type;
3623  	inode_inc_iversion(ent->dir);
3624  	ent->dir->i_ctime = ent->dir->i_mtime =
3625  		current_time(ent->dir);
3626  	retval = ext4_mark_inode_dirty(handle, ent->dir);
3627  	BUFFER_TRACE(ent->bh, "call ext4_handle_dirty_metadata");
3628  	if (!ent->inlined) {
3629  		retval2 = ext4_handle_dirty_dirblock(handle, ent->dir, ent->bh);
3630  		if (unlikely(retval2)) {
3631  			ext4_std_error(ent->dir->i_sb, retval2);
3632  			return retval2;
3633  		}
3634  	}
3635  	return retval;
3636  }
3637  
3638  static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
3639  			  unsigned ino, unsigned file_type)
3640  {
3641  	struct ext4_renament old = *ent;
3642  	int retval = 0;
3643  
3644  	/*
3645  	 * old->de could have moved from under us during make indexed dir,
3646  	 * so the old->de may no longer valid and need to find it again
3647  	 * before reset old inode info.
3648  	 */
3649  	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3650  	if (IS_ERR(old.bh))
3651  		retval = PTR_ERR(old.bh);
3652  	if (!old.bh)
3653  		retval = -ENOENT;
3654  	if (retval) {
3655  		ext4_std_error(old.dir->i_sb, retval);
3656  		return;
3657  	}
3658  
3659  	ext4_setent(handle, &old, ino, file_type);
3660  	brelse(old.bh);
3661  }
3662  
3663  static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
3664  				  const struct qstr *d_name)
3665  {
3666  	int retval = -ENOENT;
3667  	struct buffer_head *bh;
3668  	struct ext4_dir_entry_2 *de;
3669  
3670  	bh = ext4_find_entry(dir, d_name, &de, NULL);
3671  	if (IS_ERR(bh))
3672  		return PTR_ERR(bh);
3673  	if (bh) {
3674  		retval = ext4_delete_entry(handle, dir, de, bh);
3675  		brelse(bh);
3676  	}
3677  	return retval;
3678  }
3679  
3680  static void ext4_rename_delete(handle_t *handle, struct ext4_renament *ent,
3681  			       int force_reread)
3682  {
3683  	int retval;
3684  	/*
3685  	 * ent->de could have moved from under us during htree split, so make
3686  	 * sure that we are deleting the right entry.  We might also be pointing
3687  	 * to a stale entry in the unused part of ent->bh so just checking inum
3688  	 * and the name isn't enough.
3689  	 */
3690  	if (le32_to_cpu(ent->de->inode) != ent->inode->i_ino ||
3691  	    ent->de->name_len != ent->dentry->d_name.len ||
3692  	    strncmp(ent->de->name, ent->dentry->d_name.name,
3693  		    ent->de->name_len) ||
3694  	    force_reread) {
3695  		retval = ext4_find_delete_entry(handle, ent->dir,
3696  						&ent->dentry->d_name);
3697  	} else {
3698  		retval = ext4_delete_entry(handle, ent->dir, ent->de, ent->bh);
3699  		if (retval == -ENOENT) {
3700  			retval = ext4_find_delete_entry(handle, ent->dir,
3701  							&ent->dentry->d_name);
3702  		}
3703  	}
3704  
3705  	if (retval) {
3706  		ext4_warning_inode(ent->dir,
3707  				   "Deleting old file: nlink %d, error=%d",
3708  				   ent->dir->i_nlink, retval);
3709  	}
3710  }
3711  
3712  static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3713  {
3714  	if (ent->dir_nlink_delta) {
3715  		if (ent->dir_nlink_delta == -1)
3716  			ext4_dec_count(ent->dir);
3717  		else
3718  			ext4_inc_count(ent->dir);
3719  		ext4_mark_inode_dirty(handle, ent->dir);
3720  	}
3721  }
3722  
3723  static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
3724  					      struct ext4_renament *ent,
3725  					      int credits, handle_t **h)
3726  {
3727  	struct inode *wh;
3728  	handle_t *handle;
3729  	int retries = 0;
3730  
3731  	/*
3732  	 * for inode block, sb block, group summaries,
3733  	 * and inode bitmap
3734  	 */
3735  	credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3736  		    EXT4_XATTR_TRANS_BLOCKS + 4);
3737  retry:
3738  	wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
3739  					 S_IFCHR | WHITEOUT_MODE,
3740  					 &ent->dentry->d_name, 0, NULL,
3741  					 EXT4_HT_DIR, credits);
3742  
3743  	handle = ext4_journal_current_handle();
3744  	if (IS_ERR(wh)) {
3745  		if (handle)
3746  			ext4_journal_stop(handle);
3747  		if (PTR_ERR(wh) == -ENOSPC &&
3748  		    ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3749  			goto retry;
3750  	} else {
3751  		*h = handle;
3752  		init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3753  		wh->i_op = &ext4_special_inode_operations;
3754  	}
3755  	return wh;
3756  }
3757  
3758  /*
3759   * Anybody can rename anything with this: the permission checks are left to the
3760   * higher-level routines.
3761   *
3762   * n.b.  old_{dentry,inode) refers to the source dentry/inode
3763   * while new_{dentry,inode) refers to the destination dentry/inode
3764   * This comes from rename(const char *oldpath, const char *newpath)
3765   */
3766  static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
3767  		       struct dentry *old_dentry, struct inode *new_dir,
3768  		       struct dentry *new_dentry, unsigned int flags)
3769  {
3770  	handle_t *handle = NULL;
3771  	struct ext4_renament old = {
3772  		.dir = old_dir,
3773  		.dentry = old_dentry,
3774  		.inode = d_inode(old_dentry),
3775  	};
3776  	struct ext4_renament new = {
3777  		.dir = new_dir,
3778  		.dentry = new_dentry,
3779  		.inode = d_inode(new_dentry),
3780  	};
3781  	int force_reread;
3782  	int retval;
3783  	struct inode *whiteout = NULL;
3784  	int credits;
3785  	u8 old_file_type;
3786  
3787  	if (new.inode && new.inode->i_nlink == 0) {
3788  		EXT4_ERROR_INODE(new.inode,
3789  				 "target of rename is already freed");
3790  		return -EFSCORRUPTED;
3791  	}
3792  
3793  	if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT)) &&
3794  	    (!projid_eq(EXT4_I(new_dir)->i_projid,
3795  			EXT4_I(old_dentry->d_inode)->i_projid)))
3796  		return -EXDEV;
3797  
3798  	retval = dquot_initialize(old.dir);
3799  	if (retval)
3800  		return retval;
3801  	retval = dquot_initialize(old.inode);
3802  	if (retval)
3803  		return retval;
3804  	retval = dquot_initialize(new.dir);
3805  	if (retval)
3806  		return retval;
3807  
3808  	/* Initialize quotas before so that eventual writes go
3809  	 * in separate transaction */
3810  	if (new.inode) {
3811  		retval = dquot_initialize(new.inode);
3812  		if (retval)
3813  			return retval;
3814  	}
3815  
3816  	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
3817  	if (IS_ERR(old.bh))
3818  		return PTR_ERR(old.bh);
3819  	/*
3820  	 *  Check for inode number is _not_ due to possible IO errors.
3821  	 *  We might rmdir the source, keep it as pwd of some process
3822  	 *  and merrily kill the link to whatever was created under the
3823  	 *  same name. Goodbye sticky bit ;-<
3824  	 */
3825  	retval = -ENOENT;
3826  	if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
3827  		goto release_bh;
3828  
3829  	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
3830  				 &new.de, &new.inlined);
3831  	if (IS_ERR(new.bh)) {
3832  		retval = PTR_ERR(new.bh);
3833  		new.bh = NULL;
3834  		goto release_bh;
3835  	}
3836  	if (new.bh) {
3837  		if (!new.inode) {
3838  			brelse(new.bh);
3839  			new.bh = NULL;
3840  		}
3841  	}
3842  	if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3843  		ext4_alloc_da_blocks(old.inode);
3844  
3845  	credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3846  		   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3847  	if (!(flags & RENAME_WHITEOUT)) {
3848  		handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3849  		if (IS_ERR(handle)) {
3850  			retval = PTR_ERR(handle);
3851  			goto release_bh;
3852  		}
3853  	} else {
3854  		whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
3855  		if (IS_ERR(whiteout)) {
3856  			retval = PTR_ERR(whiteout);
3857  			goto release_bh;
3858  		}
3859  	}
3860  
3861  	old_file_type = old.de->file_type;
3862  	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3863  		ext4_handle_sync(handle);
3864  
3865  	if (S_ISDIR(old.inode->i_mode)) {
3866  		if (new.inode) {
3867  			retval = -ENOTEMPTY;
3868  			if (!ext4_empty_dir(new.inode))
3869  				goto end_rename;
3870  		} else {
3871  			retval = -EMLINK;
3872  			if (new.dir != old.dir && EXT4_DIR_LINK_MAX(new.dir))
3873  				goto end_rename;
3874  		}
3875  		retval = ext4_rename_dir_prepare(handle, &old);
3876  		if (retval)
3877  			goto end_rename;
3878  	}
3879  	/*
3880  	 * If we're renaming a file within an inline_data dir and adding or
3881  	 * setting the new dirent causes a conversion from inline_data to
3882  	 * extents/blockmap, we need to force the dirent delete code to
3883  	 * re-read the directory, or else we end up trying to delete a dirent
3884  	 * from what is now the extent tree root (or a block map).
3885  	 */
3886  	force_reread = (new.dir->i_ino == old.dir->i_ino &&
3887  			ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3888  
3889  	if (whiteout) {
3890  		/*
3891  		 * Do this before adding a new entry, so the old entry is sure
3892  		 * to be still pointing to the valid old entry.
3893  		 */
3894  		retval = ext4_setent(handle, &old, whiteout->i_ino,
3895  				     EXT4_FT_CHRDEV);
3896  		if (retval)
3897  			goto end_rename;
3898  		retval = ext4_mark_inode_dirty(handle, whiteout);
3899  		if (unlikely(retval))
3900  			goto end_rename;
3901  
3902  	}
3903  	if (!new.bh) {
3904  		retval = ext4_add_entry(handle, new.dentry, old.inode);
3905  		if (retval)
3906  			goto end_rename;
3907  	} else {
3908  		retval = ext4_setent(handle, &new,
3909  				     old.inode->i_ino, old_file_type);
3910  		if (retval)
3911  			goto end_rename;
3912  	}
3913  	if (force_reread)
3914  		force_reread = !ext4_test_inode_flag(new.dir,
3915  						     EXT4_INODE_INLINE_DATA);
3916  
3917  	/*
3918  	 * Like most other Unix systems, set the ctime for inodes on a
3919  	 * rename.
3920  	 */
3921  	old.inode->i_ctime = current_time(old.inode);
3922  	retval = ext4_mark_inode_dirty(handle, old.inode);
3923  	if (unlikely(retval))
3924  		goto end_rename;
3925  
3926  	if (!whiteout) {
3927  		/*
3928  		 * ok, that's it
3929  		 */
3930  		ext4_rename_delete(handle, &old, force_reread);
3931  	}
3932  
3933  	if (new.inode) {
3934  		ext4_dec_count(new.inode);
3935  		new.inode->i_ctime = current_time(new.inode);
3936  	}
3937  	old.dir->i_ctime = old.dir->i_mtime = current_time(old.dir);
3938  	ext4_update_dx_flag(old.dir);
3939  	if (old.dir_bh) {
3940  		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
3941  		if (retval)
3942  			goto end_rename;
3943  
3944  		ext4_dec_count(old.dir);
3945  		if (new.inode) {
3946  			/* checked ext4_empty_dir above, can't have another
3947  			 * parent, ext4_dec_count() won't work for many-linked
3948  			 * dirs */
3949  			clear_nlink(new.inode);
3950  		} else {
3951  			ext4_inc_count(new.dir);
3952  			ext4_update_dx_flag(new.dir);
3953  			retval = ext4_mark_inode_dirty(handle, new.dir);
3954  			if (unlikely(retval))
3955  				goto end_rename;
3956  		}
3957  	}
3958  	retval = ext4_mark_inode_dirty(handle, old.dir);
3959  	if (unlikely(retval))
3960  		goto end_rename;
3961  
3962  	if (S_ISDIR(old.inode->i_mode)) {
3963  		/*
3964  		 * We disable fast commits here that's because the
3965  		 * replay code is not yet capable of changing dot dot
3966  		 * dirents in directories.
3967  		 */
3968  		ext4_fc_mark_ineligible(old.inode->i_sb,
3969  			EXT4_FC_REASON_RENAME_DIR, handle);
3970  	} else {
3971  		struct super_block *sb = old.inode->i_sb;
3972  
3973  		if (new.inode)
3974  			ext4_fc_track_unlink(handle, new.dentry);
3975  		if (test_opt2(sb, JOURNAL_FAST_COMMIT) &&
3976  		    !(EXT4_SB(sb)->s_mount_state & EXT4_FC_REPLAY) &&
3977  		    !(ext4_test_mount_flag(sb, EXT4_MF_FC_INELIGIBLE))) {
3978  			__ext4_fc_track_link(handle, old.inode, new.dentry);
3979  			__ext4_fc_track_unlink(handle, old.inode, old.dentry);
3980  			if (whiteout)
3981  				__ext4_fc_track_create(handle, whiteout,
3982  						       old.dentry);
3983  		}
3984  	}
3985  
3986  	if (new.inode) {
3987  		retval = ext4_mark_inode_dirty(handle, new.inode);
3988  		if (unlikely(retval))
3989  			goto end_rename;
3990  		if (!new.inode->i_nlink)
3991  			ext4_orphan_add(handle, new.inode);
3992  	}
3993  	retval = 0;
3994  
3995  end_rename:
3996  	if (whiteout) {
3997  		if (retval) {
3998  			ext4_resetent(handle, &old,
3999  				      old.inode->i_ino, old_file_type);
4000  			drop_nlink(whiteout);
4001  			ext4_orphan_add(handle, whiteout);
4002  		}
4003  		unlock_new_inode(whiteout);
4004  		ext4_journal_stop(handle);
4005  		iput(whiteout);
4006  	} else {
4007  		ext4_journal_stop(handle);
4008  	}
4009  release_bh:
4010  	brelse(old.dir_bh);
4011  	brelse(old.bh);
4012  	brelse(new.bh);
4013  	return retval;
4014  }
4015  
4016  static int ext4_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
4017  			     struct inode *new_dir, struct dentry *new_dentry)
4018  {
4019  	handle_t *handle = NULL;
4020  	struct ext4_renament old = {
4021  		.dir = old_dir,
4022  		.dentry = old_dentry,
4023  		.inode = d_inode(old_dentry),
4024  	};
4025  	struct ext4_renament new = {
4026  		.dir = new_dir,
4027  		.dentry = new_dentry,
4028  		.inode = d_inode(new_dentry),
4029  	};
4030  	u8 new_file_type;
4031  	int retval;
4032  	struct timespec64 ctime;
4033  
4034  	if ((ext4_test_inode_flag(new_dir, EXT4_INODE_PROJINHERIT) &&
4035  	     !projid_eq(EXT4_I(new_dir)->i_projid,
4036  			EXT4_I(old_dentry->d_inode)->i_projid)) ||
4037  	    (ext4_test_inode_flag(old_dir, EXT4_INODE_PROJINHERIT) &&
4038  	     !projid_eq(EXT4_I(old_dir)->i_projid,
4039  			EXT4_I(new_dentry->d_inode)->i_projid)))
4040  		return -EXDEV;
4041  
4042  	retval = dquot_initialize(old.dir);
4043  	if (retval)
4044  		return retval;
4045  	retval = dquot_initialize(new.dir);
4046  	if (retval)
4047  		return retval;
4048  
4049  	old.bh = ext4_find_entry(old.dir, &old.dentry->d_name,
4050  				 &old.de, &old.inlined);
4051  	if (IS_ERR(old.bh))
4052  		return PTR_ERR(old.bh);
4053  	/*
4054  	 *  Check for inode number is _not_ due to possible IO errors.
4055  	 *  We might rmdir the source, keep it as pwd of some process
4056  	 *  and merrily kill the link to whatever was created under the
4057  	 *  same name. Goodbye sticky bit ;-<
4058  	 */
4059  	retval = -ENOENT;
4060  	if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
4061  		goto end_rename;
4062  
4063  	new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
4064  				 &new.de, &new.inlined);
4065  	if (IS_ERR(new.bh)) {
4066  		retval = PTR_ERR(new.bh);
4067  		new.bh = NULL;
4068  		goto end_rename;
4069  	}
4070  
4071  	/* RENAME_EXCHANGE case: old *and* new must both exist */
4072  	if (!new.bh || le32_to_cpu(new.de->inode) != new.inode->i_ino)
4073  		goto end_rename;
4074  
4075  	handle = ext4_journal_start(old.dir, EXT4_HT_DIR,
4076  		(2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
4077  		 2 * EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2));
4078  	if (IS_ERR(handle)) {
4079  		retval = PTR_ERR(handle);
4080  		handle = NULL;
4081  		goto end_rename;
4082  	}
4083  
4084  	if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
4085  		ext4_handle_sync(handle);
4086  
4087  	if (S_ISDIR(old.inode->i_mode)) {
4088  		old.is_dir = true;
4089  		retval = ext4_rename_dir_prepare(handle, &old);
4090  		if (retval)
4091  			goto end_rename;
4092  	}
4093  	if (S_ISDIR(new.inode->i_mode)) {
4094  		new.is_dir = true;
4095  		retval = ext4_rename_dir_prepare(handle, &new);
4096  		if (retval)
4097  			goto end_rename;
4098  	}
4099  
4100  	/*
4101  	 * Other than the special case of overwriting a directory, parents'
4102  	 * nlink only needs to be modified if this is a cross directory rename.
4103  	 */
4104  	if (old.dir != new.dir && old.is_dir != new.is_dir) {
4105  		old.dir_nlink_delta = old.is_dir ? -1 : 1;
4106  		new.dir_nlink_delta = -old.dir_nlink_delta;
4107  		retval = -EMLINK;
4108  		if ((old.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(old.dir)) ||
4109  		    (new.dir_nlink_delta > 0 && EXT4_DIR_LINK_MAX(new.dir)))
4110  			goto end_rename;
4111  	}
4112  
4113  	new_file_type = new.de->file_type;
4114  	retval = ext4_setent(handle, &new, old.inode->i_ino, old.de->file_type);
4115  	if (retval)
4116  		goto end_rename;
4117  
4118  	retval = ext4_setent(handle, &old, new.inode->i_ino, new_file_type);
4119  	if (retval)
4120  		goto end_rename;
4121  
4122  	/*
4123  	 * Like most other Unix systems, set the ctime for inodes on a
4124  	 * rename.
4125  	 */
4126  	ctime = current_time(old.inode);
4127  	old.inode->i_ctime = ctime;
4128  	new.inode->i_ctime = ctime;
4129  	retval = ext4_mark_inode_dirty(handle, old.inode);
4130  	if (unlikely(retval))
4131  		goto end_rename;
4132  	retval = ext4_mark_inode_dirty(handle, new.inode);
4133  	if (unlikely(retval))
4134  		goto end_rename;
4135  	ext4_fc_mark_ineligible(new.inode->i_sb,
4136  				EXT4_FC_REASON_CROSS_RENAME, handle);
4137  	if (old.dir_bh) {
4138  		retval = ext4_rename_dir_finish(handle, &old, new.dir->i_ino);
4139  		if (retval)
4140  			goto end_rename;
4141  	}
4142  	if (new.dir_bh) {
4143  		retval = ext4_rename_dir_finish(handle, &new, old.dir->i_ino);
4144  		if (retval)
4145  			goto end_rename;
4146  	}
4147  	ext4_update_dir_count(handle, &old);
4148  	ext4_update_dir_count(handle, &new);
4149  	retval = 0;
4150  
4151  end_rename:
4152  	brelse(old.dir_bh);
4153  	brelse(new.dir_bh);
4154  	brelse(old.bh);
4155  	brelse(new.bh);
4156  	if (handle)
4157  		ext4_journal_stop(handle);
4158  	return retval;
4159  }
4160  
4161  static int ext4_rename2(struct user_namespace *mnt_userns,
4162  			struct inode *old_dir, struct dentry *old_dentry,
4163  			struct inode *new_dir, struct dentry *new_dentry,
4164  			unsigned int flags)
4165  {
4166  	int err;
4167  
4168  	if (unlikely(ext4_forced_shutdown(EXT4_SB(old_dir->i_sb))))
4169  		return -EIO;
4170  
4171  	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
4172  		return -EINVAL;
4173  
4174  	err = fscrypt_prepare_rename(old_dir, old_dentry, new_dir, new_dentry,
4175  				     flags);
4176  	if (err)
4177  		return err;
4178  
4179  	if (flags & RENAME_EXCHANGE) {
4180  		return ext4_cross_rename(old_dir, old_dentry,
4181  					 new_dir, new_dentry);
4182  	}
4183  
4184  	return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
4185  }
4186  
4187  /*
4188   * directories can handle most operations...
4189   */
4190  const struct inode_operations ext4_dir_inode_operations = {
4191  	.create		= ext4_create,
4192  	.lookup		= ext4_lookup,
4193  	.link		= ext4_link,
4194  	.unlink		= ext4_unlink,
4195  	.symlink	= ext4_symlink,
4196  	.mkdir		= ext4_mkdir,
4197  	.rmdir		= ext4_rmdir,
4198  	.mknod		= ext4_mknod,
4199  	.tmpfile	= ext4_tmpfile,
4200  	.rename		= ext4_rename2,
4201  	.setattr	= ext4_setattr,
4202  	.getattr	= ext4_getattr,
4203  	.listxattr	= ext4_listxattr,
4204  	.get_inode_acl	= ext4_get_acl,
4205  	.set_acl	= ext4_set_acl,
4206  	.fiemap         = ext4_fiemap,
4207  	.fileattr_get	= ext4_fileattr_get,
4208  	.fileattr_set	= ext4_fileattr_set,
4209  };
4210  
4211  const struct inode_operations ext4_special_inode_operations = {
4212  	.setattr	= ext4_setattr,
4213  	.getattr	= ext4_getattr,
4214  	.listxattr	= ext4_listxattr,
4215  	.get_inode_acl	= ext4_get_acl,
4216  	.set_acl	= ext4_set_acl,
4217  };
4218