xref: /openbmc/linux/fs/ocfs2/dir.c (revision 6861de97)
1328970deSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2fa60ce2cSMasahiro Yamada /*
3ccd979bdSMark Fasheh  * dir.c
4ccd979bdSMark Fasheh  *
5ccd979bdSMark Fasheh  * Creates, reads, walks and deletes directory-nodes
6ccd979bdSMark Fasheh  *
7ccd979bdSMark Fasheh  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
8ccd979bdSMark Fasheh  *
9ccd979bdSMark Fasheh  *  Portions of this code from linux/fs/ext3/dir.c
10ccd979bdSMark Fasheh  *
11ccd979bdSMark Fasheh  *  Copyright (C) 1992, 1993, 1994, 1995
12ccd979bdSMark Fasheh  *  Remy Card (card@masi.ibp.fr)
13ccd979bdSMark Fasheh  *  Laboratoire MASI - Institut Blaise pascal
14ccd979bdSMark Fasheh  *  Universite Pierre et Marie Curie (Paris VI)
15ccd979bdSMark Fasheh  *
16ccd979bdSMark Fasheh  *   from
17ccd979bdSMark Fasheh  *
18ccd979bdSMark Fasheh  *   linux/fs/minix/dir.c
19ccd979bdSMark Fasheh  *
20762515a8SJakub Wilk  *   Copyright (C) 1991, 1992 Linus Torvalds
21ccd979bdSMark Fasheh  */
22ccd979bdSMark Fasheh 
23ccd979bdSMark Fasheh #include <linux/fs.h>
24ccd979bdSMark Fasheh #include <linux/types.h>
25ccd979bdSMark Fasheh #include <linux/slab.h>
26ccd979bdSMark Fasheh #include <linux/highmem.h>
27a90714c1SJan Kara #include <linux/quotaops.h>
289b7895efSMark Fasheh #include <linux/sort.h>
29cc56c33eSJeff Layton #include <linux/iversion.h>
30ccd979bdSMark Fasheh 
31ccd979bdSMark Fasheh #include <cluster/masklog.h>
32ccd979bdSMark Fasheh 
33ccd979bdSMark Fasheh #include "ocfs2.h"
34ccd979bdSMark Fasheh 
35ccd979bdSMark Fasheh #include "alloc.h"
36c175a518SJoel Becker #include "blockcheck.h"
37ccd979bdSMark Fasheh #include "dir.h"
38ccd979bdSMark Fasheh #include "dlmglue.h"
39ccd979bdSMark Fasheh #include "extent_map.h"
40ccd979bdSMark Fasheh #include "file.h"
41ccd979bdSMark Fasheh #include "inode.h"
42ccd979bdSMark Fasheh #include "journal.h"
43ccd979bdSMark Fasheh #include "namei.h"
44ccd979bdSMark Fasheh #include "suballoc.h"
45316f4b9fSMark Fasheh #include "super.h"
469b7895efSMark Fasheh #include "sysfile.h"
47ccd979bdSMark Fasheh #include "uptodate.h"
48f1088d47STao Ma #include "ocfs2_trace.h"
49ccd979bdSMark Fasheh 
50ccd979bdSMark Fasheh #include "buffer_head_io.h"
51ccd979bdSMark Fasheh 
52316f4b9fSMark Fasheh #define NAMEI_RA_CHUNKS  2
53316f4b9fSMark Fasheh #define NAMEI_RA_BLOCKS  4
54316f4b9fSMark Fasheh #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
55316f4b9fSMark Fasheh 
56316f4b9fSMark Fasheh static int ocfs2_do_extend_dir(struct super_block *sb,
57316f4b9fSMark Fasheh 			       handle_t *handle,
58316f4b9fSMark Fasheh 			       struct inode *dir,
59316f4b9fSMark Fasheh 			       struct buffer_head *parent_fe_bh,
60316f4b9fSMark Fasheh 			       struct ocfs2_alloc_context *data_ac,
61316f4b9fSMark Fasheh 			       struct ocfs2_alloc_context *meta_ac,
62316f4b9fSMark Fasheh 			       struct buffer_head **new_bh);
63e7c17e43SMark Fasheh static int ocfs2_dir_indexed(struct inode *inode);
64316f4b9fSMark Fasheh 
6523193e51SMark Fasheh /*
6687d35a74SMark Fasheh  * These are distinct checks because future versions of the file system will
6787d35a74SMark Fasheh  * want to have a trailing dirent structure independent of indexing.
6887d35a74SMark Fasheh  */
ocfs2_supports_dir_trailer(struct inode * dir)69e7c17e43SMark Fasheh static int ocfs2_supports_dir_trailer(struct inode *dir)
7087d35a74SMark Fasheh {
71e7c17e43SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
72e7c17e43SMark Fasheh 
7387d35a74SMark Fasheh 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
7487d35a74SMark Fasheh 		return 0;
7587d35a74SMark Fasheh 
76e7c17e43SMark Fasheh 	return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir);
7787d35a74SMark Fasheh }
7887d35a74SMark Fasheh 
79e7c17e43SMark Fasheh /*
80e7c17e43SMark Fasheh  * "new' here refers to the point at which we're creating a new
81e7c17e43SMark Fasheh  * directory via "mkdir()", but also when we're expanding an inline
82e7c17e43SMark Fasheh  * directory. In either case, we don't yet have the indexing bit set
83e7c17e43SMark Fasheh  * on the directory, so the standard checks will fail in when metaecc
84e7c17e43SMark Fasheh  * is turned off. Only directory-initialization type functions should
85e7c17e43SMark Fasheh  * use this then. Everything else wants ocfs2_supports_dir_trailer()
86e7c17e43SMark Fasheh  */
ocfs2_new_dir_wants_trailer(struct inode * dir)87e7c17e43SMark Fasheh static int ocfs2_new_dir_wants_trailer(struct inode *dir)
8887d35a74SMark Fasheh {
89e7c17e43SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
90e7c17e43SMark Fasheh 
91e7c17e43SMark Fasheh 	return ocfs2_meta_ecc(osb) ||
92e7c17e43SMark Fasheh 		ocfs2_supports_indexed_dirs(osb);
9387d35a74SMark Fasheh }
9487d35a74SMark Fasheh 
ocfs2_dir_trailer_blk_off(struct super_block * sb)9587d35a74SMark Fasheh static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
9687d35a74SMark Fasheh {
9787d35a74SMark Fasheh 	return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
9887d35a74SMark Fasheh }
9987d35a74SMark Fasheh 
10087d35a74SMark Fasheh #define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
10187d35a74SMark Fasheh 
102c175a518SJoel Becker /* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
103c175a518SJoel Becker  * them more consistent? */
ocfs2_dir_trailer_from_size(int blocksize,void * data)104c175a518SJoel Becker struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
105c175a518SJoel Becker 							    void *data)
106c175a518SJoel Becker {
107c175a518SJoel Becker 	char *p = data;
108c175a518SJoel Becker 
109c175a518SJoel Becker 	p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
110c175a518SJoel Becker 	return (struct ocfs2_dir_block_trailer *)p;
111c175a518SJoel Becker }
112c175a518SJoel Becker 
11387d35a74SMark Fasheh /*
11487d35a74SMark Fasheh  * XXX: This is executed once on every dirent. We should consider optimizing
11587d35a74SMark Fasheh  * it.
11687d35a74SMark Fasheh  */
ocfs2_skip_dir_trailer(struct inode * dir,struct ocfs2_dir_entry * de,unsigned long offset,unsigned long blklen)11787d35a74SMark Fasheh static int ocfs2_skip_dir_trailer(struct inode *dir,
11887d35a74SMark Fasheh 				  struct ocfs2_dir_entry *de,
11987d35a74SMark Fasheh 				  unsigned long offset,
12087d35a74SMark Fasheh 				  unsigned long blklen)
12187d35a74SMark Fasheh {
12287d35a74SMark Fasheh 	unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
12387d35a74SMark Fasheh 
124e7c17e43SMark Fasheh 	if (!ocfs2_supports_dir_trailer(dir))
12587d35a74SMark Fasheh 		return 0;
12687d35a74SMark Fasheh 
12787d35a74SMark Fasheh 	if (offset != toff)
12887d35a74SMark Fasheh 		return 0;
12987d35a74SMark Fasheh 
13087d35a74SMark Fasheh 	return 1;
13187d35a74SMark Fasheh }
13287d35a74SMark Fasheh 
ocfs2_init_dir_trailer(struct inode * inode,struct buffer_head * bh,u16 rec_len)13387d35a74SMark Fasheh static void ocfs2_init_dir_trailer(struct inode *inode,
134e7c17e43SMark Fasheh 				   struct buffer_head *bh, u16 rec_len)
13587d35a74SMark Fasheh {
13687d35a74SMark Fasheh 	struct ocfs2_dir_block_trailer *trailer;
13787d35a74SMark Fasheh 
13887d35a74SMark Fasheh 	trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
13987d35a74SMark Fasheh 	strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
14087d35a74SMark Fasheh 	trailer->db_compat_rec_len =
14187d35a74SMark Fasheh 			cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
14287d35a74SMark Fasheh 	trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
14387d35a74SMark Fasheh 	trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
144e7c17e43SMark Fasheh 	trailer->db_free_rec_len = cpu_to_le16(rec_len);
145e7c17e43SMark Fasheh }
146e7c17e43SMark Fasheh /*
147e7c17e43SMark Fasheh  * Link an unindexed block with a dir trailer structure into the index free
148e7c17e43SMark Fasheh  * list. This function will modify dirdata_bh, but assumes you've already
149e7c17e43SMark Fasheh  * passed it to the journal.
150e7c17e43SMark Fasheh  */
ocfs2_dx_dir_link_trailer(struct inode * dir,handle_t * handle,struct buffer_head * dx_root_bh,struct buffer_head * dirdata_bh)151e7c17e43SMark Fasheh static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
152e7c17e43SMark Fasheh 				     struct buffer_head *dx_root_bh,
153e7c17e43SMark Fasheh 				     struct buffer_head *dirdata_bh)
154e7c17e43SMark Fasheh {
155e7c17e43SMark Fasheh 	int ret;
156e7c17e43SMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
157e7c17e43SMark Fasheh 	struct ocfs2_dir_block_trailer *trailer;
158e7c17e43SMark Fasheh 
1590cf2f763SJoel Becker 	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
160e7c17e43SMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
161e7c17e43SMark Fasheh 	if (ret) {
162e7c17e43SMark Fasheh 		mlog_errno(ret);
163e7c17e43SMark Fasheh 		goto out;
164e7c17e43SMark Fasheh 	}
165e7c17e43SMark Fasheh 	trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
166e7c17e43SMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
167e7c17e43SMark Fasheh 
168e7c17e43SMark Fasheh 	trailer->db_free_next = dx_root->dr_free_blk;
169e7c17e43SMark Fasheh 	dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
170e7c17e43SMark Fasheh 
171e7c17e43SMark Fasheh 	ocfs2_journal_dirty(handle, dx_root_bh);
172e7c17e43SMark Fasheh 
173e7c17e43SMark Fasheh out:
174e7c17e43SMark Fasheh 	return ret;
175e7c17e43SMark Fasheh }
176e7c17e43SMark Fasheh 
ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result * res)177e7c17e43SMark Fasheh static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res)
178e7c17e43SMark Fasheh {
179e7c17e43SMark Fasheh 	return res->dl_prev_leaf_bh == NULL;
18087d35a74SMark Fasheh }
18187d35a74SMark Fasheh 
ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result * res)1824a12ca3aSMark Fasheh void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
1834a12ca3aSMark Fasheh {
1844ed8a6bbSMark Fasheh 	brelse(res->dl_dx_root_bh);
1854a12ca3aSMark Fasheh 	brelse(res->dl_leaf_bh);
1869b7895efSMark Fasheh 	brelse(res->dl_dx_leaf_bh);
187e7c17e43SMark Fasheh 	brelse(res->dl_prev_leaf_bh);
1889b7895efSMark Fasheh }
1899b7895efSMark Fasheh 
ocfs2_dir_indexed(struct inode * inode)1909b7895efSMark Fasheh static int ocfs2_dir_indexed(struct inode *inode)
1919b7895efSMark Fasheh {
1929b7895efSMark Fasheh 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
1939b7895efSMark Fasheh 		return 1;
1949b7895efSMark Fasheh 	return 0;
1959b7895efSMark Fasheh }
1969b7895efSMark Fasheh 
ocfs2_dx_root_inline(struct ocfs2_dx_root_block * dx_root)1974ed8a6bbSMark Fasheh static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
1984ed8a6bbSMark Fasheh {
1994ed8a6bbSMark Fasheh 	return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
2004ed8a6bbSMark Fasheh }
2014ed8a6bbSMark Fasheh 
2029b7895efSMark Fasheh /*
2039b7895efSMark Fasheh  * Hashing code adapted from ext3
2049b7895efSMark Fasheh  */
2059b7895efSMark Fasheh #define DELTA 0x9E3779B9
2069b7895efSMark Fasheh 
TEA_transform(__u32 buf[4],__u32 const in[])2079b7895efSMark Fasheh static void TEA_transform(__u32 buf[4], __u32 const in[])
2089b7895efSMark Fasheh {
2099b7895efSMark Fasheh 	__u32	sum = 0;
2109b7895efSMark Fasheh 	__u32	b0 = buf[0], b1 = buf[1];
2119b7895efSMark Fasheh 	__u32	a = in[0], b = in[1], c = in[2], d = in[3];
2129b7895efSMark Fasheh 	int	n = 16;
2139b7895efSMark Fasheh 
2149b7895efSMark Fasheh 	do {
2159b7895efSMark Fasheh 		sum += DELTA;
2169b7895efSMark Fasheh 		b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
2179b7895efSMark Fasheh 		b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
2189b7895efSMark Fasheh 	} while (--n);
2199b7895efSMark Fasheh 
2209b7895efSMark Fasheh 	buf[0] += b0;
2219b7895efSMark Fasheh 	buf[1] += b1;
2229b7895efSMark Fasheh }
2239b7895efSMark Fasheh 
str2hashbuf(const char * msg,int len,__u32 * buf,int num)2249b7895efSMark Fasheh static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
2259b7895efSMark Fasheh {
2269b7895efSMark Fasheh 	__u32	pad, val;
2279b7895efSMark Fasheh 	int	i;
2289b7895efSMark Fasheh 
2299b7895efSMark Fasheh 	pad = (__u32)len | ((__u32)len << 8);
2309b7895efSMark Fasheh 	pad |= pad << 16;
2319b7895efSMark Fasheh 
2329b7895efSMark Fasheh 	val = pad;
2339b7895efSMark Fasheh 	if (len > num*4)
2349b7895efSMark Fasheh 		len = num * 4;
2359b7895efSMark Fasheh 	for (i = 0; i < len; i++) {
2369b7895efSMark Fasheh 		if ((i % 4) == 0)
2379b7895efSMark Fasheh 			val = pad;
2389b7895efSMark Fasheh 		val = msg[i] + (val << 8);
2399b7895efSMark Fasheh 		if ((i % 4) == 3) {
2409b7895efSMark Fasheh 			*buf++ = val;
2419b7895efSMark Fasheh 			val = pad;
2429b7895efSMark Fasheh 			num--;
2439b7895efSMark Fasheh 		}
2449b7895efSMark Fasheh 	}
2459b7895efSMark Fasheh 	if (--num >= 0)
2469b7895efSMark Fasheh 		*buf++ = val;
2479b7895efSMark Fasheh 	while (--num >= 0)
2489b7895efSMark Fasheh 		*buf++ = pad;
2499b7895efSMark Fasheh }
2509b7895efSMark Fasheh 
ocfs2_dx_dir_name_hash(struct inode * dir,const char * name,int len,struct ocfs2_dx_hinfo * hinfo)2519b7895efSMark Fasheh static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
2529b7895efSMark Fasheh 				   struct ocfs2_dx_hinfo *hinfo)
2539b7895efSMark Fasheh {
2549b7895efSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2559b7895efSMark Fasheh 	const char	*p;
2569b7895efSMark Fasheh 	__u32		in[8], buf[4];
2579b7895efSMark Fasheh 
2589b7895efSMark Fasheh 	/*
2599b7895efSMark Fasheh 	 * XXX: Is this really necessary, if the index is never looked
2609b7895efSMark Fasheh 	 * at by readdir? Is a hash value of '0' a bad idea?
2619b7895efSMark Fasheh 	 */
2629b7895efSMark Fasheh 	if ((len == 1 && !strncmp(".", name, 1)) ||
2639b7895efSMark Fasheh 	    (len == 2 && !strncmp("..", name, 2))) {
2649b7895efSMark Fasheh 		buf[0] = buf[1] = 0;
2659b7895efSMark Fasheh 		goto out;
2669b7895efSMark Fasheh 	}
2679b7895efSMark Fasheh 
2689b7895efSMark Fasheh #ifdef OCFS2_DEBUG_DX_DIRS
2699b7895efSMark Fasheh 	/*
2709b7895efSMark Fasheh 	 * This makes it very easy to debug indexing problems. We
2719b7895efSMark Fasheh 	 * should never allow this to be selected without hand editing
2729b7895efSMark Fasheh 	 * this file though.
2739b7895efSMark Fasheh 	 */
2749b7895efSMark Fasheh 	buf[0] = buf[1] = len;
2759b7895efSMark Fasheh 	goto out;
2769b7895efSMark Fasheh #endif
2779b7895efSMark Fasheh 
2789b7895efSMark Fasheh 	memcpy(buf, osb->osb_dx_seed, sizeof(buf));
2799b7895efSMark Fasheh 
2809b7895efSMark Fasheh 	p = name;
2819b7895efSMark Fasheh 	while (len > 0) {
2829b7895efSMark Fasheh 		str2hashbuf(p, len, in, 4);
2839b7895efSMark Fasheh 		TEA_transform(buf, in);
2849b7895efSMark Fasheh 		len -= 16;
2859b7895efSMark Fasheh 		p += 16;
2869b7895efSMark Fasheh 	}
2879b7895efSMark Fasheh 
2889b7895efSMark Fasheh out:
2899b7895efSMark Fasheh 	hinfo->major_hash = buf[0];
2909b7895efSMark Fasheh 	hinfo->minor_hash = buf[1];
2914a12ca3aSMark Fasheh }
2924a12ca3aSMark Fasheh 
29387d35a74SMark Fasheh /*
29423193e51SMark Fasheh  * bh passed here can be an inode block or a dir data block, depending
29523193e51SMark Fasheh  * on the inode inline data flag.
29623193e51SMark Fasheh  */
ocfs2_check_dir_entry(struct inode * dir,struct ocfs2_dir_entry * de,struct buffer_head * bh,unsigned long offset)2975eae5b96SMark Fasheh static int ocfs2_check_dir_entry(struct inode * dir,
298316f4b9fSMark Fasheh 				 struct ocfs2_dir_entry * de,
299316f4b9fSMark Fasheh 				 struct buffer_head * bh,
300316f4b9fSMark Fasheh 				 unsigned long offset)
301316f4b9fSMark Fasheh {
302316f4b9fSMark Fasheh 	const char *error_msg = NULL;
303316f4b9fSMark Fasheh 	const int rlen = le16_to_cpu(de->rec_len);
304316f4b9fSMark Fasheh 
3051dd9ffc8STao Ma 	if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
306316f4b9fSMark Fasheh 		error_msg = "rec_len is smaller than minimal";
3071dd9ffc8STao Ma 	else if (unlikely(rlen % 4 != 0))
308316f4b9fSMark Fasheh 		error_msg = "rec_len % 4 != 0";
3091dd9ffc8STao Ma 	else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
310316f4b9fSMark Fasheh 		error_msg = "rec_len is too small for name_len";
3111dd9ffc8STao Ma 	else if (unlikely(
3121dd9ffc8STao Ma 		 ((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize))
313316f4b9fSMark Fasheh 		error_msg = "directory entry across blocks";
314316f4b9fSMark Fasheh 
3151dd9ffc8STao Ma 	if (unlikely(error_msg != NULL))
316316f4b9fSMark Fasheh 		mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
317316f4b9fSMark Fasheh 		     "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
318316f4b9fSMark Fasheh 		     (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
319316f4b9fSMark Fasheh 		     offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
320316f4b9fSMark Fasheh 		     de->name_len);
3211dd9ffc8STao Ma 
322316f4b9fSMark Fasheh 	return error_msg == NULL ? 1 : 0;
323316f4b9fSMark Fasheh }
324316f4b9fSMark Fasheh 
ocfs2_match(int len,const char * const name,struct ocfs2_dir_entry * de)325316f4b9fSMark Fasheh static inline int ocfs2_match(int len,
326316f4b9fSMark Fasheh 			      const char * const name,
327316f4b9fSMark Fasheh 			      struct ocfs2_dir_entry *de)
328316f4b9fSMark Fasheh {
329316f4b9fSMark Fasheh 	if (len != de->name_len)
330316f4b9fSMark Fasheh 		return 0;
331316f4b9fSMark Fasheh 	if (!de->inode)
332316f4b9fSMark Fasheh 		return 0;
333316f4b9fSMark Fasheh 	return !memcmp(name, de->name, len);
334316f4b9fSMark Fasheh }
335316f4b9fSMark Fasheh 
336316f4b9fSMark Fasheh /*
337316f4b9fSMark Fasheh  * Returns 0 if not found, -1 on failure, and 1 on success
338316f4b9fSMark Fasheh  */
ocfs2_search_dirblock(struct buffer_head * bh,struct inode * dir,const char * name,int namelen,unsigned long offset,char * first_de,unsigned int bytes,struct ocfs2_dir_entry ** res_dir)33942b16b3fSJesper Juhl static inline int ocfs2_search_dirblock(struct buffer_head *bh,
340316f4b9fSMark Fasheh 					struct inode *dir,
341316f4b9fSMark Fasheh 					const char *name, int namelen,
342316f4b9fSMark Fasheh 					unsigned long offset,
34323193e51SMark Fasheh 					char *first_de,
34423193e51SMark Fasheh 					unsigned int bytes,
345316f4b9fSMark Fasheh 					struct ocfs2_dir_entry **res_dir)
346316f4b9fSMark Fasheh {
347316f4b9fSMark Fasheh 	struct ocfs2_dir_entry *de;
348316f4b9fSMark Fasheh 	char *dlimit, *de_buf;
349316f4b9fSMark Fasheh 	int de_len;
350316f4b9fSMark Fasheh 	int ret = 0;
351316f4b9fSMark Fasheh 
35223193e51SMark Fasheh 	de_buf = first_de;
35323193e51SMark Fasheh 	dlimit = de_buf + bytes;
354316f4b9fSMark Fasheh 
355316f4b9fSMark Fasheh 	while (de_buf < dlimit) {
356316f4b9fSMark Fasheh 		/* this code is executed quadratically often */
357316f4b9fSMark Fasheh 		/* do minimal checking `by hand' */
358316f4b9fSMark Fasheh 
359316f4b9fSMark Fasheh 		de = (struct ocfs2_dir_entry *) de_buf;
360316f4b9fSMark Fasheh 
361316f4b9fSMark Fasheh 		if (de_buf + namelen <= dlimit &&
362316f4b9fSMark Fasheh 		    ocfs2_match(namelen, name, de)) {
363316f4b9fSMark Fasheh 			/* found a match - just to be sure, do a full check */
364316f4b9fSMark Fasheh 			if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
365316f4b9fSMark Fasheh 				ret = -1;
366316f4b9fSMark Fasheh 				goto bail;
367316f4b9fSMark Fasheh 			}
368316f4b9fSMark Fasheh 			*res_dir = de;
369316f4b9fSMark Fasheh 			ret = 1;
370316f4b9fSMark Fasheh 			goto bail;
371316f4b9fSMark Fasheh 		}
372316f4b9fSMark Fasheh 
373316f4b9fSMark Fasheh 		/* prevent looping on a bad block */
374316f4b9fSMark Fasheh 		de_len = le16_to_cpu(de->rec_len);
375316f4b9fSMark Fasheh 		if (de_len <= 0) {
376316f4b9fSMark Fasheh 			ret = -1;
377316f4b9fSMark Fasheh 			goto bail;
378316f4b9fSMark Fasheh 		}
379316f4b9fSMark Fasheh 
380316f4b9fSMark Fasheh 		de_buf += de_len;
381316f4b9fSMark Fasheh 		offset += de_len;
382316f4b9fSMark Fasheh 	}
383316f4b9fSMark Fasheh 
384316f4b9fSMark Fasheh bail:
385f1088d47STao Ma 	trace_ocfs2_search_dirblock(ret);
386316f4b9fSMark Fasheh 	return ret;
387316f4b9fSMark Fasheh }
388316f4b9fSMark Fasheh 
ocfs2_find_entry_id(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_entry ** res_dir)38923193e51SMark Fasheh static struct buffer_head *ocfs2_find_entry_id(const char *name,
39023193e51SMark Fasheh 					       int namelen,
39123193e51SMark Fasheh 					       struct inode *dir,
39223193e51SMark Fasheh 					       struct ocfs2_dir_entry **res_dir)
39323193e51SMark Fasheh {
39423193e51SMark Fasheh 	int ret, found;
39523193e51SMark Fasheh 	struct buffer_head *di_bh = NULL;
39623193e51SMark Fasheh 	struct ocfs2_dinode *di;
39723193e51SMark Fasheh 	struct ocfs2_inline_data *data;
39823193e51SMark Fasheh 
399b657c95cSJoel Becker 	ret = ocfs2_read_inode_block(dir, &di_bh);
40023193e51SMark Fasheh 	if (ret) {
40123193e51SMark Fasheh 		mlog_errno(ret);
40223193e51SMark Fasheh 		goto out;
40323193e51SMark Fasheh 	}
40423193e51SMark Fasheh 
40523193e51SMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
40623193e51SMark Fasheh 	data = &di->id2.i_data;
40723193e51SMark Fasheh 
40823193e51SMark Fasheh 	found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
40923193e51SMark Fasheh 				      data->id_data, i_size_read(dir), res_dir);
41023193e51SMark Fasheh 	if (found == 1)
41123193e51SMark Fasheh 		return di_bh;
41223193e51SMark Fasheh 
41323193e51SMark Fasheh 	brelse(di_bh);
41423193e51SMark Fasheh out:
41523193e51SMark Fasheh 	return NULL;
41623193e51SMark Fasheh }
41723193e51SMark Fasheh 
ocfs2_validate_dir_block(struct super_block * sb,struct buffer_head * bh)418a22305ccSJoel Becker static int ocfs2_validate_dir_block(struct super_block *sb,
419a22305ccSJoel Becker 				    struct buffer_head *bh)
420a22305ccSJoel Becker {
421c175a518SJoel Becker 	int rc;
422c175a518SJoel Becker 	struct ocfs2_dir_block_trailer *trailer =
423c175a518SJoel Becker 		ocfs2_trailer_from_bh(bh, sb);
424c175a518SJoel Becker 
425c175a518SJoel Becker 
426a22305ccSJoel Becker 	/*
427c175a518SJoel Becker 	 * We don't validate dirents here, that's handled
428a22305ccSJoel Becker 	 * in-place when the code walks them.
429a22305ccSJoel Becker 	 */
430f1088d47STao Ma 	trace_ocfs2_validate_dir_block((unsigned long long)bh->b_blocknr);
431a22305ccSJoel Becker 
432c175a518SJoel Becker 	BUG_ON(!buffer_uptodate(bh));
433c175a518SJoel Becker 
434c175a518SJoel Becker 	/*
435c175a518SJoel Becker 	 * If the ecc fails, we return the error but otherwise
436c175a518SJoel Becker 	 * leave the filesystem running.  We know any error is
437c175a518SJoel Becker 	 * local to this block.
438c175a518SJoel Becker 	 *
439c175a518SJoel Becker 	 * Note that we are safe to call this even if the directory
440c175a518SJoel Becker 	 * doesn't have a trailer.  Filesystems without metaecc will do
441c175a518SJoel Becker 	 * nothing, and filesystems with it will have one.
442c175a518SJoel Becker 	 */
443c175a518SJoel Becker 	rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
444c175a518SJoel Becker 	if (rc)
445c175a518SJoel Becker 		mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
446c175a518SJoel Becker 		     (unsigned long long)bh->b_blocknr);
447c175a518SJoel Becker 
448c175a518SJoel Becker 	return rc;
449a22305ccSJoel Becker }
450a22305ccSJoel Becker 
451a22305ccSJoel Becker /*
4529b7895efSMark Fasheh  * Validate a directory trailer.
4539b7895efSMark Fasheh  *
4549b7895efSMark Fasheh  * We check the trailer here rather than in ocfs2_validate_dir_block()
4559b7895efSMark Fasheh  * because that function doesn't have the inode to test.
4569b7895efSMark Fasheh  */
ocfs2_check_dir_trailer(struct inode * dir,struct buffer_head * bh)4579b7895efSMark Fasheh static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
4589b7895efSMark Fasheh {
4599b7895efSMark Fasheh 	int rc = 0;
4609b7895efSMark Fasheh 	struct ocfs2_dir_block_trailer *trailer;
4619b7895efSMark Fasheh 
4629b7895efSMark Fasheh 	trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
4639b7895efSMark Fasheh 	if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
46417a5b9abSGoldwyn Rodrigues 		rc = ocfs2_error(dir->i_sb,
4657ecef14aSJoe Perches 				 "Invalid dirblock #%llu: signature = %.*s\n",
4669b7895efSMark Fasheh 				 (unsigned long long)bh->b_blocknr, 7,
4679b7895efSMark Fasheh 				 trailer->db_signature);
4689b7895efSMark Fasheh 		goto out;
4699b7895efSMark Fasheh 	}
4709b7895efSMark Fasheh 	if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
47117a5b9abSGoldwyn Rodrigues 		rc = ocfs2_error(dir->i_sb,
4727ecef14aSJoe Perches 				 "Directory block #%llu has an invalid db_blkno of %llu\n",
4739b7895efSMark Fasheh 				 (unsigned long long)bh->b_blocknr,
4749b7895efSMark Fasheh 				 (unsigned long long)le64_to_cpu(trailer->db_blkno));
4759b7895efSMark Fasheh 		goto out;
4769b7895efSMark Fasheh 	}
4779b7895efSMark Fasheh 	if (le64_to_cpu(trailer->db_parent_dinode) !=
4789b7895efSMark Fasheh 	    OCFS2_I(dir)->ip_blkno) {
47917a5b9abSGoldwyn Rodrigues 		rc = ocfs2_error(dir->i_sb,
4807ecef14aSJoe Perches 				 "Directory block #%llu on dinode #%llu has an invalid parent_dinode of %llu\n",
4819b7895efSMark Fasheh 				 (unsigned long long)bh->b_blocknr,
4829b7895efSMark Fasheh 				 (unsigned long long)OCFS2_I(dir)->ip_blkno,
4839b7895efSMark Fasheh 				 (unsigned long long)le64_to_cpu(trailer->db_blkno));
4849b7895efSMark Fasheh 		goto out;
4859b7895efSMark Fasheh 	}
4869b7895efSMark Fasheh out:
4879b7895efSMark Fasheh 	return rc;
4889b7895efSMark Fasheh }
4899b7895efSMark Fasheh 
4909b7895efSMark Fasheh /*
491a22305ccSJoel Becker  * This function forces all errors to -EIO for consistency with its
492a22305ccSJoel Becker  * predecessor, ocfs2_bread().  We haven't audited what returning the
493a22305ccSJoel Becker  * real error codes would do to callers.  We log the real codes with
494a22305ccSJoel Becker  * mlog_errno() before we squash them.
495a22305ccSJoel Becker  */
ocfs2_read_dir_block(struct inode * inode,u64 v_block,struct buffer_head ** bh,int flags)496a22305ccSJoel Becker static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
497a22305ccSJoel Becker 				struct buffer_head **bh, int flags)
498a22305ccSJoel Becker {
499a22305ccSJoel Becker 	int rc = 0;
500a22305ccSJoel Becker 	struct buffer_head *tmp = *bh;
501a22305ccSJoel Becker 
502511308d9SJoel Becker 	rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
503970e4936SJoel Becker 				    ocfs2_validate_dir_block);
50487d35a74SMark Fasheh 	if (rc) {
505a22305ccSJoel Becker 		mlog_errno(rc);
50687d35a74SMark Fasheh 		goto out;
50787d35a74SMark Fasheh 	}
50887d35a74SMark Fasheh 
50987d35a74SMark Fasheh 	if (!(flags & OCFS2_BH_READAHEAD) &&
510e7c17e43SMark Fasheh 	    ocfs2_supports_dir_trailer(inode)) {
5119b7895efSMark Fasheh 		rc = ocfs2_check_dir_trailer(inode, tmp);
5129b7895efSMark Fasheh 		if (rc) {
5139b7895efSMark Fasheh 			if (!*bh)
5149b7895efSMark Fasheh 				brelse(tmp);
5159b7895efSMark Fasheh 			mlog_errno(rc);
51687d35a74SMark Fasheh 			goto out;
51787d35a74SMark Fasheh 		}
51887d35a74SMark Fasheh 	}
519a22305ccSJoel Becker 
520511308d9SJoel Becker 	/* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
52187d35a74SMark Fasheh 	if (!*bh)
522a22305ccSJoel Becker 		*bh = tmp;
523a22305ccSJoel Becker 
52487d35a74SMark Fasheh out:
525a22305ccSJoel Becker 	return rc ? -EIO : 0;
526a22305ccSJoel Becker }
527a22305ccSJoel Becker 
5289b7895efSMark Fasheh /*
5299b7895efSMark Fasheh  * Read the block at 'phys' which belongs to this directory
5309b7895efSMark Fasheh  * inode. This function does no virtual->physical block translation -
5319b7895efSMark Fasheh  * what's passed in is assumed to be a valid directory block.
5329b7895efSMark Fasheh  */
ocfs2_read_dir_block_direct(struct inode * dir,u64 phys,struct buffer_head ** bh)5339b7895efSMark Fasheh static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
5349b7895efSMark Fasheh 				       struct buffer_head **bh)
5359b7895efSMark Fasheh {
5369b7895efSMark Fasheh 	int ret;
5379b7895efSMark Fasheh 	struct buffer_head *tmp = *bh;
5389b7895efSMark Fasheh 
5398cb471e8SJoel Becker 	ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
5408cb471e8SJoel Becker 			       ocfs2_validate_dir_block);
5419b7895efSMark Fasheh 	if (ret) {
5429b7895efSMark Fasheh 		mlog_errno(ret);
5439b7895efSMark Fasheh 		goto out;
5449b7895efSMark Fasheh 	}
5459b7895efSMark Fasheh 
5469b7895efSMark Fasheh 	if (ocfs2_supports_dir_trailer(dir)) {
5479b7895efSMark Fasheh 		ret = ocfs2_check_dir_trailer(dir, tmp);
5489b7895efSMark Fasheh 		if (ret) {
5499b7895efSMark Fasheh 			if (!*bh)
5509b7895efSMark Fasheh 				brelse(tmp);
5519b7895efSMark Fasheh 			mlog_errno(ret);
5529b7895efSMark Fasheh 			goto out;
5539b7895efSMark Fasheh 		}
5549b7895efSMark Fasheh 	}
5559b7895efSMark Fasheh 
5569b7895efSMark Fasheh 	if (!ret && !*bh)
5579b7895efSMark Fasheh 		*bh = tmp;
5589b7895efSMark Fasheh out:
5599b7895efSMark Fasheh 	return ret;
5609b7895efSMark Fasheh }
5619b7895efSMark Fasheh 
ocfs2_validate_dx_root(struct super_block * sb,struct buffer_head * bh)5629b7895efSMark Fasheh static int ocfs2_validate_dx_root(struct super_block *sb,
5639b7895efSMark Fasheh 				  struct buffer_head *bh)
5649b7895efSMark Fasheh {
5659b7895efSMark Fasheh 	int ret;
5669b7895efSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
5679b7895efSMark Fasheh 
5689b7895efSMark Fasheh 	BUG_ON(!buffer_uptodate(bh));
5699b7895efSMark Fasheh 
5709b7895efSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
5719b7895efSMark Fasheh 
5729b7895efSMark Fasheh 	ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check);
5739b7895efSMark Fasheh 	if (ret) {
5749b7895efSMark Fasheh 		mlog(ML_ERROR,
5759b7895efSMark Fasheh 		     "Checksum failed for dir index root block %llu\n",
5769b7895efSMark Fasheh 		     (unsigned long long)bh->b_blocknr);
5779b7895efSMark Fasheh 		return ret;
5789b7895efSMark Fasheh 	}
5799b7895efSMark Fasheh 
5809b7895efSMark Fasheh 	if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
58117a5b9abSGoldwyn Rodrigues 		ret = ocfs2_error(sb,
5827ecef14aSJoe Perches 				  "Dir Index Root # %llu has bad signature %.*s\n",
5839b7895efSMark Fasheh 				  (unsigned long long)le64_to_cpu(dx_root->dr_blkno),
5849b7895efSMark Fasheh 				  7, dx_root->dr_signature);
5859b7895efSMark Fasheh 	}
5869b7895efSMark Fasheh 
58717a5b9abSGoldwyn Rodrigues 	return ret;
5889b7895efSMark Fasheh }
5899b7895efSMark Fasheh 
ocfs2_read_dx_root(struct inode * dir,struct ocfs2_dinode * di,struct buffer_head ** dx_root_bh)5909b7895efSMark Fasheh static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
5919b7895efSMark Fasheh 			      struct buffer_head **dx_root_bh)
5929b7895efSMark Fasheh {
5939b7895efSMark Fasheh 	int ret;
5949b7895efSMark Fasheh 	u64 blkno = le64_to_cpu(di->i_dx_root);
5959b7895efSMark Fasheh 	struct buffer_head *tmp = *dx_root_bh;
5969b7895efSMark Fasheh 
5978cb471e8SJoel Becker 	ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
5988cb471e8SJoel Becker 			       ocfs2_validate_dx_root);
5999b7895efSMark Fasheh 
6009b7895efSMark Fasheh 	/* If ocfs2_read_block() got us a new bh, pass it up. */
6019b7895efSMark Fasheh 	if (!ret && !*dx_root_bh)
6029b7895efSMark Fasheh 		*dx_root_bh = tmp;
6039b7895efSMark Fasheh 
6049b7895efSMark Fasheh 	return ret;
6059b7895efSMark Fasheh }
6069b7895efSMark Fasheh 
ocfs2_validate_dx_leaf(struct super_block * sb,struct buffer_head * bh)6079b7895efSMark Fasheh static int ocfs2_validate_dx_leaf(struct super_block *sb,
6089b7895efSMark Fasheh 				  struct buffer_head *bh)
6099b7895efSMark Fasheh {
6109b7895efSMark Fasheh 	int ret;
6119b7895efSMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data;
6129b7895efSMark Fasheh 
6139b7895efSMark Fasheh 	BUG_ON(!buffer_uptodate(bh));
6149b7895efSMark Fasheh 
6159b7895efSMark Fasheh 	ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check);
6169b7895efSMark Fasheh 	if (ret) {
6179b7895efSMark Fasheh 		mlog(ML_ERROR,
6189b7895efSMark Fasheh 		     "Checksum failed for dir index leaf block %llu\n",
6199b7895efSMark Fasheh 		     (unsigned long long)bh->b_blocknr);
6209b7895efSMark Fasheh 		return ret;
6219b7895efSMark Fasheh 	}
6229b7895efSMark Fasheh 
6239b7895efSMark Fasheh 	if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
6247ecef14aSJoe Perches 		ret = ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s\n",
6259b7895efSMark Fasheh 				  7, dx_leaf->dl_signature);
6269b7895efSMark Fasheh 	}
6279b7895efSMark Fasheh 
62817a5b9abSGoldwyn Rodrigues 	return ret;
6299b7895efSMark Fasheh }
6309b7895efSMark Fasheh 
ocfs2_read_dx_leaf(struct inode * dir,u64 blkno,struct buffer_head ** dx_leaf_bh)6319b7895efSMark Fasheh static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
6329b7895efSMark Fasheh 			      struct buffer_head **dx_leaf_bh)
6339b7895efSMark Fasheh {
6349b7895efSMark Fasheh 	int ret;
6359b7895efSMark Fasheh 	struct buffer_head *tmp = *dx_leaf_bh;
6369b7895efSMark Fasheh 
6378cb471e8SJoel Becker 	ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
6388cb471e8SJoel Becker 			       ocfs2_validate_dx_leaf);
6399b7895efSMark Fasheh 
6409b7895efSMark Fasheh 	/* If ocfs2_read_block() got us a new bh, pass it up. */
6419b7895efSMark Fasheh 	if (!ret && !*dx_leaf_bh)
6429b7895efSMark Fasheh 		*dx_leaf_bh = tmp;
6439b7895efSMark Fasheh 
6449b7895efSMark Fasheh 	return ret;
6459b7895efSMark Fasheh }
6469b7895efSMark Fasheh 
6479b7895efSMark Fasheh /*
6489b7895efSMark Fasheh  * Read a series of dx_leaf blocks. This expects all buffer_head
6499b7895efSMark Fasheh  * pointers to be NULL on function entry.
6509b7895efSMark Fasheh  */
ocfs2_read_dx_leaves(struct inode * dir,u64 start,int num,struct buffer_head ** dx_leaf_bhs)6519b7895efSMark Fasheh static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
6529b7895efSMark Fasheh 				struct buffer_head **dx_leaf_bhs)
6539b7895efSMark Fasheh {
6549b7895efSMark Fasheh 	int ret;
6559b7895efSMark Fasheh 
6568cb471e8SJoel Becker 	ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
6579b7895efSMark Fasheh 				ocfs2_validate_dx_leaf);
6589b7895efSMark Fasheh 	if (ret)
6599b7895efSMark Fasheh 		mlog_errno(ret);
6609b7895efSMark Fasheh 
6619b7895efSMark Fasheh 	return ret;
6629b7895efSMark Fasheh }
6639b7895efSMark Fasheh 
ocfs2_find_entry_el(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_entry ** res_dir)6640af4bd38SAdrian Bunk static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
665316f4b9fSMark Fasheh 					       struct inode *dir,
666316f4b9fSMark Fasheh 					       struct ocfs2_dir_entry **res_dir)
667316f4b9fSMark Fasheh {
668316f4b9fSMark Fasheh 	struct super_block *sb;
669316f4b9fSMark Fasheh 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
670316f4b9fSMark Fasheh 	struct buffer_head *bh, *ret = NULL;
671316f4b9fSMark Fasheh 	unsigned long start, block, b;
672316f4b9fSMark Fasheh 	int ra_max = 0;		/* Number of bh's in the readahead
673316f4b9fSMark Fasheh 				   buffer, bh_use[] */
674316f4b9fSMark Fasheh 	int ra_ptr = 0;		/* Current index into readahead
675316f4b9fSMark Fasheh 				   buffer */
676316f4b9fSMark Fasheh 	int num = 0;
6771a5692e4SAlex Shi 	int nblocks, i;
678316f4b9fSMark Fasheh 
679316f4b9fSMark Fasheh 	sb = dir->i_sb;
680316f4b9fSMark Fasheh 
681316f4b9fSMark Fasheh 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
682316f4b9fSMark Fasheh 	start = OCFS2_I(dir)->ip_dir_start_lookup;
683316f4b9fSMark Fasheh 	if (start >= nblocks)
684316f4b9fSMark Fasheh 		start = 0;
685316f4b9fSMark Fasheh 	block = start;
686316f4b9fSMark Fasheh 
687316f4b9fSMark Fasheh restart:
688316f4b9fSMark Fasheh 	do {
689316f4b9fSMark Fasheh 		/*
690316f4b9fSMark Fasheh 		 * We deal with the read-ahead logic here.
691316f4b9fSMark Fasheh 		 */
692316f4b9fSMark Fasheh 		if (ra_ptr >= ra_max) {
693316f4b9fSMark Fasheh 			/* Refill the readahead buffer */
694316f4b9fSMark Fasheh 			ra_ptr = 0;
695316f4b9fSMark Fasheh 			b = block;
696316f4b9fSMark Fasheh 			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
697316f4b9fSMark Fasheh 				/*
698316f4b9fSMark Fasheh 				 * Terminate if we reach the end of the
699316f4b9fSMark Fasheh 				 * directory and must wrap, or if our
700316f4b9fSMark Fasheh 				 * search has finished at this block.
701316f4b9fSMark Fasheh 				 */
702316f4b9fSMark Fasheh 				if (b >= nblocks || (num && block == start)) {
703316f4b9fSMark Fasheh 					bh_use[ra_max] = NULL;
704316f4b9fSMark Fasheh 					break;
705316f4b9fSMark Fasheh 				}
706316f4b9fSMark Fasheh 				num++;
707316f4b9fSMark Fasheh 
708a22305ccSJoel Becker 				bh = NULL;
7091a5692e4SAlex Shi 				ocfs2_read_dir_block(dir, b++, &bh,
710a22305ccSJoel Becker 							   OCFS2_BH_READAHEAD);
711316f4b9fSMark Fasheh 				bh_use[ra_max] = bh;
712316f4b9fSMark Fasheh 			}
713316f4b9fSMark Fasheh 		}
714316f4b9fSMark Fasheh 		if ((bh = bh_use[ra_ptr++]) == NULL)
715316f4b9fSMark Fasheh 			goto next;
716a22305ccSJoel Becker 		if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
7175e0b3decSJoel Becker 			/* read error, skip block & hope for the best.
718a22305ccSJoel Becker 			 * ocfs2_read_dir_block() has released the bh. */
71961fb9ea4Sjiangyiwen 			mlog(ML_ERROR, "reading directory %llu, "
720316f4b9fSMark Fasheh 				    "offset %lu\n",
721316f4b9fSMark Fasheh 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
722316f4b9fSMark Fasheh 				    block);
723316f4b9fSMark Fasheh 			goto next;
724316f4b9fSMark Fasheh 		}
725316f4b9fSMark Fasheh 		i = ocfs2_search_dirblock(bh, dir, name, namelen,
726316f4b9fSMark Fasheh 					  block << sb->s_blocksize_bits,
72723193e51SMark Fasheh 					  bh->b_data, sb->s_blocksize,
728316f4b9fSMark Fasheh 					  res_dir);
729316f4b9fSMark Fasheh 		if (i == 1) {
730316f4b9fSMark Fasheh 			OCFS2_I(dir)->ip_dir_start_lookup = block;
731316f4b9fSMark Fasheh 			ret = bh;
732316f4b9fSMark Fasheh 			goto cleanup_and_exit;
733316f4b9fSMark Fasheh 		} else {
734316f4b9fSMark Fasheh 			brelse(bh);
735316f4b9fSMark Fasheh 			if (i < 0)
736316f4b9fSMark Fasheh 				goto cleanup_and_exit;
737316f4b9fSMark Fasheh 		}
738316f4b9fSMark Fasheh 	next:
739316f4b9fSMark Fasheh 		if (++block >= nblocks)
740316f4b9fSMark Fasheh 			block = 0;
741316f4b9fSMark Fasheh 	} while (block != start);
742316f4b9fSMark Fasheh 
743316f4b9fSMark Fasheh 	/*
744316f4b9fSMark Fasheh 	 * If the directory has grown while we were searching, then
745316f4b9fSMark Fasheh 	 * search the last part of the directory before giving up.
746316f4b9fSMark Fasheh 	 */
747316f4b9fSMark Fasheh 	block = nblocks;
748316f4b9fSMark Fasheh 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
749316f4b9fSMark Fasheh 	if (block < nblocks) {
750316f4b9fSMark Fasheh 		start = 0;
751316f4b9fSMark Fasheh 		goto restart;
752316f4b9fSMark Fasheh 	}
753316f4b9fSMark Fasheh 
754316f4b9fSMark Fasheh cleanup_and_exit:
755316f4b9fSMark Fasheh 	/* Clean up the read-ahead blocks */
756316f4b9fSMark Fasheh 	for (; ra_ptr < ra_max; ra_ptr++)
757316f4b9fSMark Fasheh 		brelse(bh_use[ra_ptr]);
758316f4b9fSMark Fasheh 
759f1088d47STao Ma 	trace_ocfs2_find_entry_el(ret);
760316f4b9fSMark Fasheh 	return ret;
761316f4b9fSMark Fasheh }
762316f4b9fSMark Fasheh 
ocfs2_dx_dir_lookup_rec(struct inode * inode,struct ocfs2_extent_list * el,u32 major_hash,u32 * ret_cpos,u64 * ret_phys_blkno,unsigned int * ret_clen)7639b7895efSMark Fasheh static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
7649b7895efSMark Fasheh 				   struct ocfs2_extent_list *el,
7659b7895efSMark Fasheh 				   u32 major_hash,
7669b7895efSMark Fasheh 				   u32 *ret_cpos,
7679b7895efSMark Fasheh 				   u64 *ret_phys_blkno,
7689b7895efSMark Fasheh 				   unsigned int *ret_clen)
7699b7895efSMark Fasheh {
7709b7895efSMark Fasheh 	int ret = 0, i, found;
7719b7895efSMark Fasheh 	struct buffer_head *eb_bh = NULL;
7729b7895efSMark Fasheh 	struct ocfs2_extent_block *eb;
7739b7895efSMark Fasheh 	struct ocfs2_extent_rec *rec = NULL;
7749b7895efSMark Fasheh 
7759b7895efSMark Fasheh 	if (el->l_tree_depth) {
776facdb77fSJoel Becker 		ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
777facdb77fSJoel Becker 				      &eb_bh);
7789b7895efSMark Fasheh 		if (ret) {
7799b7895efSMark Fasheh 			mlog_errno(ret);
7809b7895efSMark Fasheh 			goto out;
7819b7895efSMark Fasheh 		}
7829b7895efSMark Fasheh 
7839b7895efSMark Fasheh 		eb = (struct ocfs2_extent_block *) eb_bh->b_data;
7849b7895efSMark Fasheh 		el = &eb->h_list;
7859b7895efSMark Fasheh 
7869b7895efSMark Fasheh 		if (el->l_tree_depth) {
78717a5b9abSGoldwyn Rodrigues 			ret = ocfs2_error(inode->i_sb,
7887ecef14aSJoe Perches 					  "Inode %lu has non zero tree depth in btree tree block %llu\n",
7897ecef14aSJoe Perches 					  inode->i_ino,
7909b7895efSMark Fasheh 					  (unsigned long long)eb_bh->b_blocknr);
7919b7895efSMark Fasheh 			goto out;
7929b7895efSMark Fasheh 		}
7939b7895efSMark Fasheh 	}
7949b7895efSMark Fasheh 
7959b7895efSMark Fasheh 	found = 0;
7969b7895efSMark Fasheh 	for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
7979b7895efSMark Fasheh 		rec = &el->l_recs[i];
7989b7895efSMark Fasheh 
7999b7895efSMark Fasheh 		if (le32_to_cpu(rec->e_cpos) <= major_hash) {
8009b7895efSMark Fasheh 			found = 1;
8019b7895efSMark Fasheh 			break;
8029b7895efSMark Fasheh 		}
8039b7895efSMark Fasheh 	}
8049b7895efSMark Fasheh 
8059b7895efSMark Fasheh 	if (!found) {
8067ecef14aSJoe Perches 		ret = ocfs2_error(inode->i_sb,
8077ecef14aSJoe Perches 				  "Inode %lu has bad extent record (%u, %u, 0) in btree\n",
8087ecef14aSJoe Perches 				  inode->i_ino,
8099b7895efSMark Fasheh 				  le32_to_cpu(rec->e_cpos),
8109b7895efSMark Fasheh 				  ocfs2_rec_clusters(el, rec));
8119b7895efSMark Fasheh 		goto out;
8129b7895efSMark Fasheh 	}
8139b7895efSMark Fasheh 
8149b7895efSMark Fasheh 	if (ret_phys_blkno)
8159b7895efSMark Fasheh 		*ret_phys_blkno = le64_to_cpu(rec->e_blkno);
8169b7895efSMark Fasheh 	if (ret_cpos)
8179b7895efSMark Fasheh 		*ret_cpos = le32_to_cpu(rec->e_cpos);
8189b7895efSMark Fasheh 	if (ret_clen)
8199b7895efSMark Fasheh 		*ret_clen = le16_to_cpu(rec->e_leaf_clusters);
8209b7895efSMark Fasheh 
8219b7895efSMark Fasheh out:
8229b7895efSMark Fasheh 	brelse(eb_bh);
8239b7895efSMark Fasheh 	return ret;
8249b7895efSMark Fasheh }
8259b7895efSMark Fasheh 
8269b7895efSMark Fasheh /*
8279b7895efSMark Fasheh  * Returns the block index, from the start of the cluster which this
8289b7895efSMark Fasheh  * hash belongs too.
8299b7895efSMark Fasheh  */
__ocfs2_dx_dir_hash_idx(struct ocfs2_super * osb,u32 minor_hash)8304ed8a6bbSMark Fasheh static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
8314ed8a6bbSMark Fasheh 						   u32 minor_hash)
8324ed8a6bbSMark Fasheh {
8334ed8a6bbSMark Fasheh 	return minor_hash & osb->osb_dx_mask;
8344ed8a6bbSMark Fasheh }
8354ed8a6bbSMark Fasheh 
ocfs2_dx_dir_hash_idx(struct ocfs2_super * osb,struct ocfs2_dx_hinfo * hinfo)8364ed8a6bbSMark Fasheh static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
8379b7895efSMark Fasheh 					  struct ocfs2_dx_hinfo *hinfo)
8389b7895efSMark Fasheh {
8394ed8a6bbSMark Fasheh 	return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
8409b7895efSMark Fasheh }
8419b7895efSMark Fasheh 
ocfs2_dx_dir_lookup(struct inode * inode,struct ocfs2_extent_list * el,struct ocfs2_dx_hinfo * hinfo,u32 * ret_cpos,u64 * ret_phys_blkno)8429b7895efSMark Fasheh static int ocfs2_dx_dir_lookup(struct inode *inode,
8439b7895efSMark Fasheh 			       struct ocfs2_extent_list *el,
8449b7895efSMark Fasheh 			       struct ocfs2_dx_hinfo *hinfo,
8459b7895efSMark Fasheh 			       u32 *ret_cpos,
8469b7895efSMark Fasheh 			       u64 *ret_phys_blkno)
8479b7895efSMark Fasheh {
8489b7895efSMark Fasheh 	int ret = 0;
8493f649ab7SKees Cook 	unsigned int cend, clen;
8503f649ab7SKees Cook 	u32 cpos;
8513f649ab7SKees Cook 	u64 blkno;
8529b7895efSMark Fasheh 	u32 name_hash = hinfo->major_hash;
8539b7895efSMark Fasheh 
8549b7895efSMark Fasheh 	ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
8559b7895efSMark Fasheh 				      &clen);
8569b7895efSMark Fasheh 	if (ret) {
8579b7895efSMark Fasheh 		mlog_errno(ret);
8589b7895efSMark Fasheh 		goto out;
8599b7895efSMark Fasheh 	}
8609b7895efSMark Fasheh 
8619b7895efSMark Fasheh 	cend = cpos + clen;
8629b7895efSMark Fasheh 	if (name_hash >= cend) {
8639b7895efSMark Fasheh 		/* We want the last cluster */
8649b7895efSMark Fasheh 		blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1);
8659b7895efSMark Fasheh 		cpos += clen - 1;
8669b7895efSMark Fasheh 	} else {
8679b7895efSMark Fasheh 		blkno += ocfs2_clusters_to_blocks(inode->i_sb,
8689b7895efSMark Fasheh 						  name_hash - cpos);
8699b7895efSMark Fasheh 		cpos = name_hash;
8709b7895efSMark Fasheh 	}
8719b7895efSMark Fasheh 
8729b7895efSMark Fasheh 	/*
8739b7895efSMark Fasheh 	 * We now have the cluster which should hold our entry. To
8749b7895efSMark Fasheh 	 * find the exact block from the start of the cluster to
8759b7895efSMark Fasheh 	 * search, we take the lower bits of the hash.
8769b7895efSMark Fasheh 	 */
8779b7895efSMark Fasheh 	blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo);
8789b7895efSMark Fasheh 
8799b7895efSMark Fasheh 	if (ret_phys_blkno)
8809b7895efSMark Fasheh 		*ret_phys_blkno = blkno;
8819b7895efSMark Fasheh 	if (ret_cpos)
8829b7895efSMark Fasheh 		*ret_cpos = cpos;
8839b7895efSMark Fasheh 
8849b7895efSMark Fasheh out:
8859b7895efSMark Fasheh 
8869b7895efSMark Fasheh 	return ret;
8879b7895efSMark Fasheh }
8889b7895efSMark Fasheh 
ocfs2_dx_dir_search(const char * name,int namelen,struct inode * dir,struct ocfs2_dx_root_block * dx_root,struct ocfs2_dir_lookup_result * res)8899b7895efSMark Fasheh static int ocfs2_dx_dir_search(const char *name, int namelen,
8909b7895efSMark Fasheh 			       struct inode *dir,
8914ed8a6bbSMark Fasheh 			       struct ocfs2_dx_root_block *dx_root,
8929b7895efSMark Fasheh 			       struct ocfs2_dir_lookup_result *res)
8939b7895efSMark Fasheh {
8949b7895efSMark Fasheh 	int ret, i, found;
8953f649ab7SKees Cook 	u64 phys;
8969b7895efSMark Fasheh 	struct buffer_head *dx_leaf_bh = NULL;
8979b7895efSMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf;
8989b7895efSMark Fasheh 	struct ocfs2_dx_entry *dx_entry = NULL;
8999b7895efSMark Fasheh 	struct buffer_head *dir_ent_bh = NULL;
9009b7895efSMark Fasheh 	struct ocfs2_dir_entry *dir_ent = NULL;
9019b7895efSMark Fasheh 	struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
9024ed8a6bbSMark Fasheh 	struct ocfs2_extent_list *dr_el;
9034ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry_list *entry_list;
9049b7895efSMark Fasheh 
9059b7895efSMark Fasheh 	ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
9069b7895efSMark Fasheh 
9074ed8a6bbSMark Fasheh 	if (ocfs2_dx_root_inline(dx_root)) {
9084ed8a6bbSMark Fasheh 		entry_list = &dx_root->dr_entries;
9094ed8a6bbSMark Fasheh 		goto search;
9104ed8a6bbSMark Fasheh 	}
9114ed8a6bbSMark Fasheh 
9124ed8a6bbSMark Fasheh 	dr_el = &dx_root->dr_list;
9134ed8a6bbSMark Fasheh 
9149b7895efSMark Fasheh 	ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
9159b7895efSMark Fasheh 	if (ret) {
9169b7895efSMark Fasheh 		mlog_errno(ret);
9179b7895efSMark Fasheh 		goto out;
9189b7895efSMark Fasheh 	}
9199b7895efSMark Fasheh 
920f1088d47STao Ma 	trace_ocfs2_dx_dir_search((unsigned long long)OCFS2_I(dir)->ip_blkno,
921f1088d47STao Ma 				  namelen, name, hinfo->major_hash,
922f1088d47STao Ma 				  hinfo->minor_hash, (unsigned long long)phys);
9239b7895efSMark Fasheh 
9249b7895efSMark Fasheh 	ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
9259b7895efSMark Fasheh 	if (ret) {
9269b7895efSMark Fasheh 		mlog_errno(ret);
9279b7895efSMark Fasheh 		goto out;
9289b7895efSMark Fasheh 	}
9299b7895efSMark Fasheh 
9309b7895efSMark Fasheh 	dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
9319b7895efSMark Fasheh 
932f1088d47STao Ma 	trace_ocfs2_dx_dir_search_leaf_info(
9339b7895efSMark Fasheh 			le16_to_cpu(dx_leaf->dl_list.de_num_used),
9349b7895efSMark Fasheh 			le16_to_cpu(dx_leaf->dl_list.de_count));
9359b7895efSMark Fasheh 
9364ed8a6bbSMark Fasheh 	entry_list = &dx_leaf->dl_list;
9374ed8a6bbSMark Fasheh 
9384ed8a6bbSMark Fasheh search:
9399b7895efSMark Fasheh 	/*
9409b7895efSMark Fasheh 	 * Empty leaf is legal, so no need to check for that.
9419b7895efSMark Fasheh 	 */
9429b7895efSMark Fasheh 	found = 0;
9434ed8a6bbSMark Fasheh 	for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
9444ed8a6bbSMark Fasheh 		dx_entry = &entry_list->de_entries[i];
9459b7895efSMark Fasheh 
9469b7895efSMark Fasheh 		if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
9479b7895efSMark Fasheh 		    || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
9489b7895efSMark Fasheh 			continue;
9499b7895efSMark Fasheh 
9509b7895efSMark Fasheh 		/*
9519b7895efSMark Fasheh 		 * Search unindexed leaf block now. We're not
9529b7895efSMark Fasheh 		 * guaranteed to find anything.
9539b7895efSMark Fasheh 		 */
9549b7895efSMark Fasheh 		ret = ocfs2_read_dir_block_direct(dir,
9559b7895efSMark Fasheh 					  le64_to_cpu(dx_entry->dx_dirent_blk),
9569b7895efSMark Fasheh 					  &dir_ent_bh);
9579b7895efSMark Fasheh 		if (ret) {
9589b7895efSMark Fasheh 			mlog_errno(ret);
9599b7895efSMark Fasheh 			goto out;
9609b7895efSMark Fasheh 		}
9619b7895efSMark Fasheh 
9629b7895efSMark Fasheh 		/*
9639b7895efSMark Fasheh 		 * XXX: We should check the unindexed block here,
9649b7895efSMark Fasheh 		 * before using it.
9659b7895efSMark Fasheh 		 */
9669b7895efSMark Fasheh 
9679b7895efSMark Fasheh 		found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen,
9689b7895efSMark Fasheh 					      0, dir_ent_bh->b_data,
9699b7895efSMark Fasheh 					      dir->i_sb->s_blocksize, &dir_ent);
9709b7895efSMark Fasheh 		if (found == 1)
9719b7895efSMark Fasheh 			break;
9729b7895efSMark Fasheh 
9739b7895efSMark Fasheh 		if (found == -1) {
9749b7895efSMark Fasheh 			/* This means we found a bad directory entry. */
9759b7895efSMark Fasheh 			ret = -EIO;
9769b7895efSMark Fasheh 			mlog_errno(ret);
9779b7895efSMark Fasheh 			goto out;
9789b7895efSMark Fasheh 		}
9799b7895efSMark Fasheh 
9809b7895efSMark Fasheh 		brelse(dir_ent_bh);
9819b7895efSMark Fasheh 		dir_ent_bh = NULL;
9829b7895efSMark Fasheh 	}
9839b7895efSMark Fasheh 
9849b7895efSMark Fasheh 	if (found <= 0) {
9859b7895efSMark Fasheh 		ret = -ENOENT;
9869b7895efSMark Fasheh 		goto out;
9879b7895efSMark Fasheh 	}
9889b7895efSMark Fasheh 
9899b7895efSMark Fasheh 	res->dl_leaf_bh = dir_ent_bh;
9909b7895efSMark Fasheh 	res->dl_entry = dir_ent;
9919b7895efSMark Fasheh 	res->dl_dx_leaf_bh = dx_leaf_bh;
9929b7895efSMark Fasheh 	res->dl_dx_entry = dx_entry;
9939b7895efSMark Fasheh 
9949b7895efSMark Fasheh 	ret = 0;
9959b7895efSMark Fasheh out:
9969b7895efSMark Fasheh 	if (ret) {
9979b7895efSMark Fasheh 		brelse(dx_leaf_bh);
9989b7895efSMark Fasheh 		brelse(dir_ent_bh);
9999b7895efSMark Fasheh 	}
10009b7895efSMark Fasheh 	return ret;
10019b7895efSMark Fasheh }
10029b7895efSMark Fasheh 
ocfs2_find_entry_dx(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_lookup_result * lookup)10039b7895efSMark Fasheh static int ocfs2_find_entry_dx(const char *name, int namelen,
10049b7895efSMark Fasheh 			       struct inode *dir,
10059b7895efSMark Fasheh 			       struct ocfs2_dir_lookup_result *lookup)
10069b7895efSMark Fasheh {
10079b7895efSMark Fasheh 	int ret;
10089b7895efSMark Fasheh 	struct buffer_head *di_bh = NULL;
10099b7895efSMark Fasheh 	struct ocfs2_dinode *di;
10109b7895efSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
10119b7895efSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
10129b7895efSMark Fasheh 
10139b7895efSMark Fasheh 	ret = ocfs2_read_inode_block(dir, &di_bh);
10149b7895efSMark Fasheh 	if (ret) {
10159b7895efSMark Fasheh 		mlog_errno(ret);
10169b7895efSMark Fasheh 		goto out;
10179b7895efSMark Fasheh 	}
10189b7895efSMark Fasheh 
10199b7895efSMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
10209b7895efSMark Fasheh 
10219b7895efSMark Fasheh 	ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
10229b7895efSMark Fasheh 	if (ret) {
10239b7895efSMark Fasheh 		mlog_errno(ret);
10249b7895efSMark Fasheh 		goto out;
10259b7895efSMark Fasheh 	}
10269b7895efSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
10279b7895efSMark Fasheh 
10284ed8a6bbSMark Fasheh 	ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
10299b7895efSMark Fasheh 	if (ret) {
10309b7895efSMark Fasheh 		if (ret != -ENOENT)
10319b7895efSMark Fasheh 			mlog_errno(ret);
10329b7895efSMark Fasheh 		goto out;
10339b7895efSMark Fasheh 	}
10349b7895efSMark Fasheh 
10354ed8a6bbSMark Fasheh 	lookup->dl_dx_root_bh = dx_root_bh;
10364ed8a6bbSMark Fasheh 	dx_root_bh = NULL;
10379b7895efSMark Fasheh out:
10389b7895efSMark Fasheh 	brelse(di_bh);
10399b7895efSMark Fasheh 	brelse(dx_root_bh);
10409b7895efSMark Fasheh 	return ret;
10419b7895efSMark Fasheh }
10429b7895efSMark Fasheh 
104323193e51SMark Fasheh /*
104423193e51SMark Fasheh  * Try to find an entry of the provided name within 'dir'.
104523193e51SMark Fasheh  *
10464a12ca3aSMark Fasheh  * If nothing was found, -ENOENT is returned. Otherwise, zero is
10474a12ca3aSMark Fasheh  * returned and the struct 'res' will contain information useful to
10484a12ca3aSMark Fasheh  * other directory manipulation functions.
104923193e51SMark Fasheh  *
105023193e51SMark Fasheh  * Caller can NOT assume anything about the contents of the
10519b7895efSMark Fasheh  * buffer_heads - they are passed back only so that it can be passed
10529b7895efSMark Fasheh  * into any one of the manipulation functions (add entry, delete
10539b7895efSMark Fasheh  * entry, etc). As an example, bh in the extent directory case is a
10549b7895efSMark Fasheh  * data block, in the inline-data case it actually points to an inode,
10559b7895efSMark Fasheh  * in the indexed directory case, multiple buffers are involved.
105623193e51SMark Fasheh  */
ocfs2_find_entry(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_lookup_result * lookup)10574a12ca3aSMark Fasheh int ocfs2_find_entry(const char *name, int namelen,
10584a12ca3aSMark Fasheh 		     struct inode *dir, struct ocfs2_dir_lookup_result *lookup)
105923193e51SMark Fasheh {
10604a12ca3aSMark Fasheh 	struct buffer_head *bh;
10614a12ca3aSMark Fasheh 	struct ocfs2_dir_entry *res_dir = NULL;
106223193e51SMark Fasheh 
10639b7895efSMark Fasheh 	if (ocfs2_dir_indexed(dir))
10649b7895efSMark Fasheh 		return ocfs2_find_entry_dx(name, namelen, dir, lookup);
10659b7895efSMark Fasheh 
10669b7895efSMark Fasheh 	/*
10679b7895efSMark Fasheh 	 * The unindexed dir code only uses part of the lookup
10689b7895efSMark Fasheh 	 * structure, so there's no reason to push it down further
10699b7895efSMark Fasheh 	 * than this.
10709b7895efSMark Fasheh 	 */
107123193e51SMark Fasheh 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
10724a12ca3aSMark Fasheh 		bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
10734a12ca3aSMark Fasheh 	else
10744a12ca3aSMark Fasheh 		bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
107523193e51SMark Fasheh 
10764a12ca3aSMark Fasheh 	if (bh == NULL)
10774a12ca3aSMark Fasheh 		return -ENOENT;
10784a12ca3aSMark Fasheh 
10794a12ca3aSMark Fasheh 	lookup->dl_leaf_bh = bh;
10804a12ca3aSMark Fasheh 	lookup->dl_entry = res_dir;
10814a12ca3aSMark Fasheh 	return 0;
108223193e51SMark Fasheh }
108323193e51SMark Fasheh 
10845b6a3a2bSMark Fasheh /*
10855b6a3a2bSMark Fasheh  * Update inode number and type of a previously found directory entry.
10865b6a3a2bSMark Fasheh  */
ocfs2_update_entry(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * res,struct inode * new_entry_inode)108738760e24SMark Fasheh int ocfs2_update_entry(struct inode *dir, handle_t *handle,
10884a12ca3aSMark Fasheh 		       struct ocfs2_dir_lookup_result *res,
108938760e24SMark Fasheh 		       struct inode *new_entry_inode)
109038760e24SMark Fasheh {
109138760e24SMark Fasheh 	int ret;
109213723d00SJoel Becker 	ocfs2_journal_access_func access = ocfs2_journal_access_db;
10934a12ca3aSMark Fasheh 	struct ocfs2_dir_entry *de = res->dl_entry;
10944a12ca3aSMark Fasheh 	struct buffer_head *de_bh = res->dl_leaf_bh;
109538760e24SMark Fasheh 
10965b6a3a2bSMark Fasheh 	/*
10975b6a3a2bSMark Fasheh 	 * The same code works fine for both inline-data and extent
109813723d00SJoel Becker 	 * based directories, so no need to split this up.  The only
109913723d00SJoel Becker 	 * difference is the journal_access function.
11005b6a3a2bSMark Fasheh 	 */
11015b6a3a2bSMark Fasheh 
110213723d00SJoel Becker 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
110313723d00SJoel Becker 		access = ocfs2_journal_access_di;
110413723d00SJoel Becker 
11050cf2f763SJoel Becker 	ret = access(handle, INODE_CACHE(dir), de_bh,
11060cf2f763SJoel Becker 		     OCFS2_JOURNAL_ACCESS_WRITE);
110738760e24SMark Fasheh 	if (ret) {
110838760e24SMark Fasheh 		mlog_errno(ret);
110938760e24SMark Fasheh 		goto out;
111038760e24SMark Fasheh 	}
111138760e24SMark Fasheh 
111238760e24SMark Fasheh 	de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
111338760e24SMark Fasheh 	ocfs2_set_de_type(de, new_entry_inode->i_mode);
111438760e24SMark Fasheh 
111538760e24SMark Fasheh 	ocfs2_journal_dirty(handle, de_bh);
111638760e24SMark Fasheh 
111738760e24SMark Fasheh out:
111838760e24SMark Fasheh 	return ret;
111938760e24SMark Fasheh }
112038760e24SMark Fasheh 
11219b7895efSMark Fasheh /*
11229b7895efSMark Fasheh  * __ocfs2_delete_entry deletes a directory entry by merging it with the
11239b7895efSMark Fasheh  * previous entry
11249b7895efSMark Fasheh  */
__ocfs2_delete_entry(handle_t * handle,struct inode * dir,struct ocfs2_dir_entry * de_del,struct buffer_head * bh,char * first_de,unsigned int bytes)11255b6a3a2bSMark Fasheh static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1126316f4b9fSMark Fasheh 				struct ocfs2_dir_entry *de_del,
11275b6a3a2bSMark Fasheh 				struct buffer_head *bh, char *first_de,
11285b6a3a2bSMark Fasheh 				unsigned int bytes)
1129316f4b9fSMark Fasheh {
1130316f4b9fSMark Fasheh 	struct ocfs2_dir_entry *de, *pde;
1131316f4b9fSMark Fasheh 	int i, status = -ENOENT;
113213723d00SJoel Becker 	ocfs2_journal_access_func access = ocfs2_journal_access_db;
1133316f4b9fSMark Fasheh 
113413723d00SJoel Becker 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
113513723d00SJoel Becker 		access = ocfs2_journal_access_di;
113613723d00SJoel Becker 
1137316f4b9fSMark Fasheh 	i = 0;
1138316f4b9fSMark Fasheh 	pde = NULL;
11395b6a3a2bSMark Fasheh 	de = (struct ocfs2_dir_entry *) first_de;
11405b6a3a2bSMark Fasheh 	while (i < bytes) {
1141316f4b9fSMark Fasheh 		if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1142316f4b9fSMark Fasheh 			status = -EIO;
1143316f4b9fSMark Fasheh 			mlog_errno(status);
1144316f4b9fSMark Fasheh 			goto bail;
1145316f4b9fSMark Fasheh 		}
1146316f4b9fSMark Fasheh 		if (de == de_del)  {
11470cf2f763SJoel Becker 			status = access(handle, INODE_CACHE(dir), bh,
1148316f4b9fSMark Fasheh 					OCFS2_JOURNAL_ACCESS_WRITE);
1149316f4b9fSMark Fasheh 			if (status < 0) {
1150316f4b9fSMark Fasheh 				status = -EIO;
1151316f4b9fSMark Fasheh 				mlog_errno(status);
1152316f4b9fSMark Fasheh 				goto bail;
1153316f4b9fSMark Fasheh 			}
1154316f4b9fSMark Fasheh 			if (pde)
11550dd3256eSMarcin Slusarz 				le16_add_cpu(&pde->rec_len,
1156316f4b9fSMark Fasheh 						le16_to_cpu(de->rec_len));
1157316f4b9fSMark Fasheh 			de->inode = 0;
1158cc56c33eSJeff Layton 			inode_inc_iversion(dir);
1159ec20cec7SJoel Becker 			ocfs2_journal_dirty(handle, bh);
1160316f4b9fSMark Fasheh 			goto bail;
1161316f4b9fSMark Fasheh 		}
1162316f4b9fSMark Fasheh 		i += le16_to_cpu(de->rec_len);
1163316f4b9fSMark Fasheh 		pde = de;
1164316f4b9fSMark Fasheh 		de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1165316f4b9fSMark Fasheh 	}
1166316f4b9fSMark Fasheh bail:
1167316f4b9fSMark Fasheh 	return status;
1168316f4b9fSMark Fasheh }
1169316f4b9fSMark Fasheh 
ocfs2_figure_dirent_hole(struct ocfs2_dir_entry * de)1170e7c17e43SMark Fasheh static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
1171e7c17e43SMark Fasheh {
1172e7c17e43SMark Fasheh 	unsigned int hole;
1173e7c17e43SMark Fasheh 
1174e7c17e43SMark Fasheh 	if (le64_to_cpu(de->inode) == 0)
1175e7c17e43SMark Fasheh 		hole = le16_to_cpu(de->rec_len);
1176e7c17e43SMark Fasheh 	else
1177e7c17e43SMark Fasheh 		hole = le16_to_cpu(de->rec_len) -
1178e7c17e43SMark Fasheh 			OCFS2_DIR_REC_LEN(de->name_len);
1179e7c17e43SMark Fasheh 
1180e7c17e43SMark Fasheh 	return hole;
1181e7c17e43SMark Fasheh }
1182e7c17e43SMark Fasheh 
ocfs2_find_max_rec_len(struct super_block * sb,struct buffer_head * dirblock_bh)1183e7c17e43SMark Fasheh static int ocfs2_find_max_rec_len(struct super_block *sb,
1184e7c17e43SMark Fasheh 				  struct buffer_head *dirblock_bh)
1185e7c17e43SMark Fasheh {
1186e7c17e43SMark Fasheh 	int size, this_hole, largest_hole = 0;
1187e7c17e43SMark Fasheh 	char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data;
1188e7c17e43SMark Fasheh 	struct ocfs2_dir_entry *de;
1189e7c17e43SMark Fasheh 
1190e7c17e43SMark Fasheh 	trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb);
1191e7c17e43SMark Fasheh 	size = ocfs2_dir_trailer_blk_off(sb);
1192e7c17e43SMark Fasheh 	limit = start + size;
1193e7c17e43SMark Fasheh 	de_buf = start;
1194e7c17e43SMark Fasheh 	de = (struct ocfs2_dir_entry *)de_buf;
1195e7c17e43SMark Fasheh 	do {
1196e7c17e43SMark Fasheh 		if (de_buf != trailer) {
1197e7c17e43SMark Fasheh 			this_hole = ocfs2_figure_dirent_hole(de);
1198e7c17e43SMark Fasheh 			if (this_hole > largest_hole)
1199e7c17e43SMark Fasheh 				largest_hole = this_hole;
1200e7c17e43SMark Fasheh 		}
1201e7c17e43SMark Fasheh 
1202e7c17e43SMark Fasheh 		de_buf += le16_to_cpu(de->rec_len);
1203e7c17e43SMark Fasheh 		de = (struct ocfs2_dir_entry *)de_buf;
1204e7c17e43SMark Fasheh 	} while (de_buf < limit);
1205e7c17e43SMark Fasheh 
1206e7c17e43SMark Fasheh 	if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
1207e7c17e43SMark Fasheh 		return largest_hole;
1208e7c17e43SMark Fasheh 	return 0;
1209e7c17e43SMark Fasheh }
1210e7c17e43SMark Fasheh 
ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list * entry_list,int index)12114ed8a6bbSMark Fasheh static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
12124ed8a6bbSMark Fasheh 				       int index)
12139b7895efSMark Fasheh {
12144ed8a6bbSMark Fasheh 	int num_used = le16_to_cpu(entry_list->de_num_used);
12159b7895efSMark Fasheh 
12169b7895efSMark Fasheh 	if (num_used == 1 || index == (num_used - 1))
12179b7895efSMark Fasheh 		goto clear;
12189b7895efSMark Fasheh 
12194ed8a6bbSMark Fasheh 	memmove(&entry_list->de_entries[index],
12204ed8a6bbSMark Fasheh 		&entry_list->de_entries[index + 1],
12219b7895efSMark Fasheh 		(num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
12229b7895efSMark Fasheh clear:
12239b7895efSMark Fasheh 	num_used--;
12244ed8a6bbSMark Fasheh 	memset(&entry_list->de_entries[num_used], 0,
12259b7895efSMark Fasheh 	       sizeof(struct ocfs2_dx_entry));
12264ed8a6bbSMark Fasheh 	entry_list->de_num_used = cpu_to_le16(num_used);
12279b7895efSMark Fasheh }
12289b7895efSMark Fasheh 
ocfs2_delete_entry_dx(handle_t * handle,struct inode * dir,struct ocfs2_dir_lookup_result * lookup)12299b7895efSMark Fasheh static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
12309b7895efSMark Fasheh 				 struct ocfs2_dir_lookup_result *lookup)
12319b7895efSMark Fasheh {
1232e7c17e43SMark Fasheh 	int ret, index, max_rec_len, add_to_free_list = 0;
12334ed8a6bbSMark Fasheh 	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
12349b7895efSMark Fasheh 	struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
12359b7895efSMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf;
12369b7895efSMark Fasheh 	struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
1237e7c17e43SMark Fasheh 	struct ocfs2_dir_block_trailer *trailer;
12384ed8a6bbSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
12394ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry_list *entry_list;
12409b7895efSMark Fasheh 
1241e7c17e43SMark Fasheh 	/*
1242e7c17e43SMark Fasheh 	 * This function gets a bit messy because we might have to
1243e7c17e43SMark Fasheh 	 * modify the root block, regardless of whether the indexed
1244e7c17e43SMark Fasheh 	 * entries are stored inline.
1245e7c17e43SMark Fasheh 	 */
1246e7c17e43SMark Fasheh 
1247e7c17e43SMark Fasheh 	/*
1248e7c17e43SMark Fasheh 	 * *Only* set 'entry_list' here, based on where we're looking
1249e7c17e43SMark Fasheh 	 * for the indexed entries. Later, we might still want to
1250e7c17e43SMark Fasheh 	 * journal both blocks, based on free list state.
1251e7c17e43SMark Fasheh 	 */
12524ed8a6bbSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
12534ed8a6bbSMark Fasheh 	if (ocfs2_dx_root_inline(dx_root)) {
12544ed8a6bbSMark Fasheh 		entry_list = &dx_root->dr_entries;
12554ed8a6bbSMark Fasheh 	} else {
12569b7895efSMark Fasheh 		dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
12574ed8a6bbSMark Fasheh 		entry_list = &dx_leaf->dl_list;
12584ed8a6bbSMark Fasheh 	}
12594ed8a6bbSMark Fasheh 
12609b7895efSMark Fasheh 	/* Neither of these are a disk corruption - that should have
12619b7895efSMark Fasheh 	 * been caught by lookup, before we got here. */
12624ed8a6bbSMark Fasheh 	BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
12634ed8a6bbSMark Fasheh 	BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);
12649b7895efSMark Fasheh 
12654ed8a6bbSMark Fasheh 	index = (char *)dx_entry - (char *)entry_list->de_entries;
12669b7895efSMark Fasheh 	index /= sizeof(*dx_entry);
12679b7895efSMark Fasheh 
12684ed8a6bbSMark Fasheh 	if (index >= le16_to_cpu(entry_list->de_num_used)) {
12699b7895efSMark Fasheh 		mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
12704ed8a6bbSMark Fasheh 		     (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
12714ed8a6bbSMark Fasheh 		     entry_list, dx_entry);
12729b7895efSMark Fasheh 		return -EIO;
12739b7895efSMark Fasheh 	}
12749b7895efSMark Fasheh 
12759b7895efSMark Fasheh 	/*
1276e7c17e43SMark Fasheh 	 * We know that removal of this dirent will leave enough room
1277e7c17e43SMark Fasheh 	 * for a new one, so add this block to the free list if it
1278e7c17e43SMark Fasheh 	 * isn't already there.
1279e7c17e43SMark Fasheh 	 */
1280e7c17e43SMark Fasheh 	trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
1281e7c17e43SMark Fasheh 	if (trailer->db_free_rec_len == 0)
1282e7c17e43SMark Fasheh 		add_to_free_list = 1;
1283e7c17e43SMark Fasheh 
1284e7c17e43SMark Fasheh 	/*
12854ed8a6bbSMark Fasheh 	 * Add the block holding our index into the journal before
12864ed8a6bbSMark Fasheh 	 * removing the unindexed entry. If we get an error return
12874ed8a6bbSMark Fasheh 	 * from __ocfs2_delete_entry(), then it hasn't removed the
12884ed8a6bbSMark Fasheh 	 * entry yet. Likewise, successful return means we *must*
12894ed8a6bbSMark Fasheh 	 * remove the indexed entry.
12904ed8a6bbSMark Fasheh 	 *
1291e3a93c2dSMark Fasheh 	 * We're also careful to journal the root tree block here as
1292e3a93c2dSMark Fasheh 	 * the entry count needs to be updated. Also, we might be
1293e3a93c2dSMark Fasheh 	 * adding to the start of the free list.
12949b7895efSMark Fasheh 	 */
12950cf2f763SJoel Becker 	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
12969b7895efSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
12979b7895efSMark Fasheh 	if (ret) {
12989b7895efSMark Fasheh 		mlog_errno(ret);
12999b7895efSMark Fasheh 		goto out;
13009b7895efSMark Fasheh 	}
1301e7c17e43SMark Fasheh 
1302e7c17e43SMark Fasheh 	if (!ocfs2_dx_root_inline(dx_root)) {
13030cf2f763SJoel Becker 		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
13044ed8a6bbSMark Fasheh 					      lookup->dl_dx_leaf_bh,
13054ed8a6bbSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
13064ed8a6bbSMark Fasheh 		if (ret) {
13074ed8a6bbSMark Fasheh 			mlog_errno(ret);
13084ed8a6bbSMark Fasheh 			goto out;
13094ed8a6bbSMark Fasheh 		}
13104ed8a6bbSMark Fasheh 	}
13114ed8a6bbSMark Fasheh 
1312f1088d47STao Ma 	trace_ocfs2_delete_entry_dx((unsigned long long)OCFS2_I(dir)->ip_blkno,
1313f1088d47STao Ma 				    index);
13149b7895efSMark Fasheh 
13159b7895efSMark Fasheh 	ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
13169b7895efSMark Fasheh 				   leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
13179b7895efSMark Fasheh 	if (ret) {
13189b7895efSMark Fasheh 		mlog_errno(ret);
13199b7895efSMark Fasheh 		goto out;
13209b7895efSMark Fasheh 	}
13219b7895efSMark Fasheh 
1322e7c17e43SMark Fasheh 	max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh);
1323e7c17e43SMark Fasheh 	trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1324e7c17e43SMark Fasheh 	if (add_to_free_list) {
1325e7c17e43SMark Fasheh 		trailer->db_free_next = dx_root->dr_free_blk;
1326e7c17e43SMark Fasheh 		dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr);
1327e7c17e43SMark Fasheh 		ocfs2_journal_dirty(handle, dx_root_bh);
1328e7c17e43SMark Fasheh 	}
1329e7c17e43SMark Fasheh 
1330e7c17e43SMark Fasheh 	/* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */
1331e7c17e43SMark Fasheh 	ocfs2_journal_dirty(handle, leaf_bh);
1332e7c17e43SMark Fasheh 
1333e3a93c2dSMark Fasheh 	le32_add_cpu(&dx_root->dr_num_entries, -1);
1334e3a93c2dSMark Fasheh 	ocfs2_journal_dirty(handle, dx_root_bh);
1335e3a93c2dSMark Fasheh 
13364ed8a6bbSMark Fasheh 	ocfs2_dx_list_remove_entry(entry_list, index);
13379b7895efSMark Fasheh 
1338e3a93c2dSMark Fasheh 	if (!ocfs2_dx_root_inline(dx_root))
13399b7895efSMark Fasheh 		ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
13409b7895efSMark Fasheh 
13419b7895efSMark Fasheh out:
13429b7895efSMark Fasheh 	return ret;
13439b7895efSMark Fasheh }
13449b7895efSMark Fasheh 
ocfs2_delete_entry_id(handle_t * handle,struct inode * dir,struct ocfs2_dir_entry * de_del,struct buffer_head * bh)13455b6a3a2bSMark Fasheh static inline int ocfs2_delete_entry_id(handle_t *handle,
13465b6a3a2bSMark Fasheh 					struct inode *dir,
13475b6a3a2bSMark Fasheh 					struct ocfs2_dir_entry *de_del,
13485b6a3a2bSMark Fasheh 					struct buffer_head *bh)
13495b6a3a2bSMark Fasheh {
13505b6a3a2bSMark Fasheh 	int ret;
13515b6a3a2bSMark Fasheh 	struct buffer_head *di_bh = NULL;
13525b6a3a2bSMark Fasheh 	struct ocfs2_dinode *di;
13535b6a3a2bSMark Fasheh 	struct ocfs2_inline_data *data;
13545b6a3a2bSMark Fasheh 
1355b657c95cSJoel Becker 	ret = ocfs2_read_inode_block(dir, &di_bh);
13565b6a3a2bSMark Fasheh 	if (ret) {
13575b6a3a2bSMark Fasheh 		mlog_errno(ret);
13585b6a3a2bSMark Fasheh 		goto out;
13595b6a3a2bSMark Fasheh 	}
13605b6a3a2bSMark Fasheh 
13615b6a3a2bSMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
13625b6a3a2bSMark Fasheh 	data = &di->id2.i_data;
13635b6a3a2bSMark Fasheh 
13645b6a3a2bSMark Fasheh 	ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
13655b6a3a2bSMark Fasheh 				   i_size_read(dir));
13665b6a3a2bSMark Fasheh 
13675b6a3a2bSMark Fasheh 	brelse(di_bh);
13685b6a3a2bSMark Fasheh out:
13695b6a3a2bSMark Fasheh 	return ret;
13705b6a3a2bSMark Fasheh }
13715b6a3a2bSMark Fasheh 
ocfs2_delete_entry_el(handle_t * handle,struct inode * dir,struct ocfs2_dir_entry * de_del,struct buffer_head * bh)13725b6a3a2bSMark Fasheh static inline int ocfs2_delete_entry_el(handle_t *handle,
13735b6a3a2bSMark Fasheh 					struct inode *dir,
13745b6a3a2bSMark Fasheh 					struct ocfs2_dir_entry *de_del,
13755b6a3a2bSMark Fasheh 					struct buffer_head *bh)
13765b6a3a2bSMark Fasheh {
13775b6a3a2bSMark Fasheh 	return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
13785b6a3a2bSMark Fasheh 				    bh->b_size);
13795b6a3a2bSMark Fasheh }
13805b6a3a2bSMark Fasheh 
13815b6a3a2bSMark Fasheh /*
13829b7895efSMark Fasheh  * Delete a directory entry. Hide the details of directory
13839b7895efSMark Fasheh  * implementation from the caller.
13845b6a3a2bSMark Fasheh  */
ocfs2_delete_entry(handle_t * handle,struct inode * dir,struct ocfs2_dir_lookup_result * res)13855b6a3a2bSMark Fasheh int ocfs2_delete_entry(handle_t *handle,
13865b6a3a2bSMark Fasheh 		       struct inode *dir,
13874a12ca3aSMark Fasheh 		       struct ocfs2_dir_lookup_result *res)
13885b6a3a2bSMark Fasheh {
13899b7895efSMark Fasheh 	if (ocfs2_dir_indexed(dir))
13909b7895efSMark Fasheh 		return ocfs2_delete_entry_dx(handle, dir, res);
13919b7895efSMark Fasheh 
13925b6a3a2bSMark Fasheh 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
13934a12ca3aSMark Fasheh 		return ocfs2_delete_entry_id(handle, dir, res->dl_entry,
13944a12ca3aSMark Fasheh 					     res->dl_leaf_bh);
13955b6a3a2bSMark Fasheh 
13964a12ca3aSMark Fasheh 	return ocfs2_delete_entry_el(handle, dir, res->dl_entry,
13974a12ca3aSMark Fasheh 				     res->dl_leaf_bh);
13985b6a3a2bSMark Fasheh }
13995b6a3a2bSMark Fasheh 
14008553cf4fSMark Fasheh /*
14018553cf4fSMark Fasheh  * Check whether 'de' has enough room to hold an entry of
14028553cf4fSMark Fasheh  * 'new_rec_len' bytes.
14038553cf4fSMark Fasheh  */
ocfs2_dirent_would_fit(struct ocfs2_dir_entry * de,unsigned int new_rec_len)14048553cf4fSMark Fasheh static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
14058553cf4fSMark Fasheh 					 unsigned int new_rec_len)
14068553cf4fSMark Fasheh {
14078553cf4fSMark Fasheh 	unsigned int de_really_used;
14088553cf4fSMark Fasheh 
14098553cf4fSMark Fasheh 	/* Check whether this is an empty record with enough space */
14108553cf4fSMark Fasheh 	if (le64_to_cpu(de->inode) == 0 &&
14118553cf4fSMark Fasheh 	    le16_to_cpu(de->rec_len) >= new_rec_len)
14128553cf4fSMark Fasheh 		return 1;
14138553cf4fSMark Fasheh 
14148553cf4fSMark Fasheh 	/*
14158553cf4fSMark Fasheh 	 * Record might have free space at the end which we can
14168553cf4fSMark Fasheh 	 * use.
14178553cf4fSMark Fasheh 	 */
14188553cf4fSMark Fasheh 	de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
14198553cf4fSMark Fasheh 	if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
14208553cf4fSMark Fasheh 	    return 1;
14218553cf4fSMark Fasheh 
14228553cf4fSMark Fasheh 	return 0;
14238553cf4fSMark Fasheh }
14248553cf4fSMark Fasheh 
ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf * dx_leaf,struct ocfs2_dx_entry * dx_new_entry)14259b7895efSMark Fasheh static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
14269b7895efSMark Fasheh 					  struct ocfs2_dx_entry *dx_new_entry)
14279b7895efSMark Fasheh {
14289b7895efSMark Fasheh 	int i;
14299b7895efSMark Fasheh 
14309b7895efSMark Fasheh 	i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
14319b7895efSMark Fasheh 	dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
14329b7895efSMark Fasheh 
14339b7895efSMark Fasheh 	le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
14349b7895efSMark Fasheh }
14359b7895efSMark Fasheh 
ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list * entry_list,struct ocfs2_dx_hinfo * hinfo,u64 dirent_blk)14364ed8a6bbSMark Fasheh static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
14374ed8a6bbSMark Fasheh 				       struct ocfs2_dx_hinfo *hinfo,
14384ed8a6bbSMark Fasheh 				       u64 dirent_blk)
14394ed8a6bbSMark Fasheh {
14404ed8a6bbSMark Fasheh 	int i;
14414ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry *dx_entry;
14424ed8a6bbSMark Fasheh 
14434ed8a6bbSMark Fasheh 	i = le16_to_cpu(entry_list->de_num_used);
14444ed8a6bbSMark Fasheh 	dx_entry = &entry_list->de_entries[i];
14454ed8a6bbSMark Fasheh 
14464ed8a6bbSMark Fasheh 	memset(dx_entry, 0, sizeof(*dx_entry));
14474ed8a6bbSMark Fasheh 	dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
14484ed8a6bbSMark Fasheh 	dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
14494ed8a6bbSMark Fasheh 	dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
14504ed8a6bbSMark Fasheh 
14514ed8a6bbSMark Fasheh 	le16_add_cpu(&entry_list->de_num_used, 1);
14524ed8a6bbSMark Fasheh }
14534ed8a6bbSMark Fasheh 
__ocfs2_dx_dir_leaf_insert(struct inode * dir,handle_t * handle,struct ocfs2_dx_hinfo * hinfo,u64 dirent_blk,struct buffer_head * dx_leaf_bh)14549b7895efSMark Fasheh static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
14559b7895efSMark Fasheh 				      struct ocfs2_dx_hinfo *hinfo,
14569b7895efSMark Fasheh 				      u64 dirent_blk,
14579b7895efSMark Fasheh 				      struct buffer_head *dx_leaf_bh)
14589b7895efSMark Fasheh {
14594ed8a6bbSMark Fasheh 	int ret;
14609b7895efSMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf;
14619b7895efSMark Fasheh 
14620cf2f763SJoel Becker 	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
14639b7895efSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
14649b7895efSMark Fasheh 	if (ret) {
14659b7895efSMark Fasheh 		mlog_errno(ret);
14669b7895efSMark Fasheh 		goto out;
14679b7895efSMark Fasheh 	}
14689b7895efSMark Fasheh 
14699b7895efSMark Fasheh 	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
14704ed8a6bbSMark Fasheh 	ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
14719b7895efSMark Fasheh 	ocfs2_journal_dirty(handle, dx_leaf_bh);
14729b7895efSMark Fasheh 
14739b7895efSMark Fasheh out:
14749b7895efSMark Fasheh 	return ret;
14759b7895efSMark Fasheh }
14769b7895efSMark Fasheh 
ocfs2_dx_inline_root_insert(struct inode * dir,handle_t * handle,struct ocfs2_dx_hinfo * hinfo,u64 dirent_blk,struct ocfs2_dx_root_block * dx_root)1477e3a93c2dSMark Fasheh static void ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
14784ed8a6bbSMark Fasheh 					struct ocfs2_dx_hinfo *hinfo,
14794ed8a6bbSMark Fasheh 					u64 dirent_blk,
1480e3a93c2dSMark Fasheh 					struct ocfs2_dx_root_block *dx_root)
14814ed8a6bbSMark Fasheh {
1482e3a93c2dSMark Fasheh 	ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
1483e3a93c2dSMark Fasheh }
1484e3a93c2dSMark Fasheh 
ocfs2_dx_dir_insert(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * lookup)1485e3a93c2dSMark Fasheh static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
1486e3a93c2dSMark Fasheh 			       struct ocfs2_dir_lookup_result *lookup)
1487e3a93c2dSMark Fasheh {
1488e3a93c2dSMark Fasheh 	int ret = 0;
14894ed8a6bbSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
1490e3a93c2dSMark Fasheh 	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
14914ed8a6bbSMark Fasheh 
14920cf2f763SJoel Becker 	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
14934ed8a6bbSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
14944ed8a6bbSMark Fasheh 	if (ret) {
14954ed8a6bbSMark Fasheh 		mlog_errno(ret);
14964ed8a6bbSMark Fasheh 		goto out;
14974ed8a6bbSMark Fasheh 	}
14984ed8a6bbSMark Fasheh 
1499e3a93c2dSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
1500e3a93c2dSMark Fasheh 	if (ocfs2_dx_root_inline(dx_root)) {
1501e3a93c2dSMark Fasheh 		ocfs2_dx_inline_root_insert(dir, handle,
1502e3a93c2dSMark Fasheh 					    &lookup->dl_hinfo,
1503e3a93c2dSMark Fasheh 					    lookup->dl_leaf_bh->b_blocknr,
1504e3a93c2dSMark Fasheh 					    dx_root);
1505e3a93c2dSMark Fasheh 	} else {
1506e3a93c2dSMark Fasheh 		ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
1507e3a93c2dSMark Fasheh 						 lookup->dl_leaf_bh->b_blocknr,
1508e3a93c2dSMark Fasheh 						 lookup->dl_dx_leaf_bh);
1509e3a93c2dSMark Fasheh 		if (ret)
1510e3a93c2dSMark Fasheh 			goto out;
1511e3a93c2dSMark Fasheh 	}
1512e3a93c2dSMark Fasheh 
1513e3a93c2dSMark Fasheh 	le32_add_cpu(&dx_root->dr_num_entries, 1);
15144ed8a6bbSMark Fasheh 	ocfs2_journal_dirty(handle, dx_root_bh);
15154ed8a6bbSMark Fasheh 
15164ed8a6bbSMark Fasheh out:
15174ed8a6bbSMark Fasheh 	return ret;
15184ed8a6bbSMark Fasheh }
15194ed8a6bbSMark Fasheh 
ocfs2_remove_block_from_free_list(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * lookup)1520e7c17e43SMark Fasheh static void ocfs2_remove_block_from_free_list(struct inode *dir,
1521e7c17e43SMark Fasheh 				       handle_t *handle,
1522e7c17e43SMark Fasheh 				       struct ocfs2_dir_lookup_result *lookup)
1523e7c17e43SMark Fasheh {
1524e7c17e43SMark Fasheh 	struct ocfs2_dir_block_trailer *trailer, *prev;
1525e7c17e43SMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
1526e7c17e43SMark Fasheh 	struct buffer_head *bh;
1527e7c17e43SMark Fasheh 
1528e7c17e43SMark Fasheh 	trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1529e7c17e43SMark Fasheh 
1530e7c17e43SMark Fasheh 	if (ocfs2_free_list_at_root(lookup)) {
1531e7c17e43SMark Fasheh 		bh = lookup->dl_dx_root_bh;
1532e7c17e43SMark Fasheh 		dx_root = (struct ocfs2_dx_root_block *)bh->b_data;
1533e7c17e43SMark Fasheh 		dx_root->dr_free_blk = trailer->db_free_next;
1534e7c17e43SMark Fasheh 	} else {
1535e7c17e43SMark Fasheh 		bh = lookup->dl_prev_leaf_bh;
1536e7c17e43SMark Fasheh 		prev = ocfs2_trailer_from_bh(bh, dir->i_sb);
1537e7c17e43SMark Fasheh 		prev->db_free_next = trailer->db_free_next;
1538e7c17e43SMark Fasheh 	}
1539e7c17e43SMark Fasheh 
1540e7c17e43SMark Fasheh 	trailer->db_free_rec_len = cpu_to_le16(0);
1541e7c17e43SMark Fasheh 	trailer->db_free_next = cpu_to_le64(0);
1542e7c17e43SMark Fasheh 
1543e7c17e43SMark Fasheh 	ocfs2_journal_dirty(handle, bh);
1544e7c17e43SMark Fasheh 	ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1545e7c17e43SMark Fasheh }
1546e7c17e43SMark Fasheh 
1547e7c17e43SMark Fasheh /*
1548e7c17e43SMark Fasheh  * This expects that a journal write has been reserved on
1549e7c17e43SMark Fasheh  * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh
1550e7c17e43SMark Fasheh  */
ocfs2_recalc_free_list(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * lookup)1551e7c17e43SMark Fasheh static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle,
1552e7c17e43SMark Fasheh 				   struct ocfs2_dir_lookup_result *lookup)
1553e7c17e43SMark Fasheh {
1554e7c17e43SMark Fasheh 	int max_rec_len;
1555e7c17e43SMark Fasheh 	struct ocfs2_dir_block_trailer *trailer;
1556e7c17e43SMark Fasheh 
1557e7c17e43SMark Fasheh 	/* Walk dl_leaf_bh to figure out what the new free rec_len is. */
1558e7c17e43SMark Fasheh 	max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh);
1559e7c17e43SMark Fasheh 	if (max_rec_len) {
1560e7c17e43SMark Fasheh 		/*
1561e7c17e43SMark Fasheh 		 * There's still room in this block, so no need to remove it
1562e7c17e43SMark Fasheh 		 * from the free list. In this case, we just want to update
1563e7c17e43SMark Fasheh 		 * the rec len accounting.
1564e7c17e43SMark Fasheh 		 */
1565e7c17e43SMark Fasheh 		trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1566e7c17e43SMark Fasheh 		trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1567e7c17e43SMark Fasheh 		ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1568e7c17e43SMark Fasheh 	} else {
1569e7c17e43SMark Fasheh 		ocfs2_remove_block_from_free_list(dir, handle, lookup);
1570e7c17e43SMark Fasheh 	}
1571e7c17e43SMark Fasheh }
1572e7c17e43SMark Fasheh 
1573316f4b9fSMark Fasheh /* we don't always have a dentry for what we want to add, so people
1574316f4b9fSMark Fasheh  * like orphan dir can call this instead.
1575316f4b9fSMark Fasheh  *
15764a12ca3aSMark Fasheh  * The lookup context must have been filled from
15774a12ca3aSMark Fasheh  * ocfs2_prepare_dir_for_insert.
1578316f4b9fSMark Fasheh  */
__ocfs2_add_entry(handle_t * handle,struct inode * dir,const char * name,int namelen,struct inode * inode,u64 blkno,struct buffer_head * parent_fe_bh,struct ocfs2_dir_lookup_result * lookup)1579316f4b9fSMark Fasheh int __ocfs2_add_entry(handle_t *handle,
1580316f4b9fSMark Fasheh 		      struct inode *dir,
1581316f4b9fSMark Fasheh 		      const char *name, int namelen,
1582316f4b9fSMark Fasheh 		      struct inode *inode, u64 blkno,
1583316f4b9fSMark Fasheh 		      struct buffer_head *parent_fe_bh,
15844a12ca3aSMark Fasheh 		      struct ocfs2_dir_lookup_result *lookup)
1585316f4b9fSMark Fasheh {
1586316f4b9fSMark Fasheh 	unsigned long offset;
1587316f4b9fSMark Fasheh 	unsigned short rec_len;
1588316f4b9fSMark Fasheh 	struct ocfs2_dir_entry *de, *de1;
15895b6a3a2bSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
15905b6a3a2bSMark Fasheh 	struct super_block *sb = dir->i_sb;
15912e173152SDaeseok Youn 	int retval;
15925b6a3a2bSMark Fasheh 	unsigned int size = sb->s_blocksize;
15934a12ca3aSMark Fasheh 	struct buffer_head *insert_bh = lookup->dl_leaf_bh;
15945b6a3a2bSMark Fasheh 	char *data_start = insert_bh->b_data;
1595316f4b9fSMark Fasheh 
1596316f4b9fSMark Fasheh 	if (!namelen)
1597316f4b9fSMark Fasheh 		return -EINVAL;
1598316f4b9fSMark Fasheh 
1599e7c17e43SMark Fasheh 	if (ocfs2_dir_indexed(dir)) {
1600e7c17e43SMark Fasheh 		struct buffer_head *bh;
1601e7c17e43SMark Fasheh 
1602e7c17e43SMark Fasheh 		/*
1603e7c17e43SMark Fasheh 		 * An indexed dir may require that we update the free space
1604e7c17e43SMark Fasheh 		 * list. Reserve a write to the previous node in the list so
1605e7c17e43SMark Fasheh 		 * that we don't fail later.
1606e7c17e43SMark Fasheh 		 *
1607e7c17e43SMark Fasheh 		 * XXX: This can be either a dx_root_block, or an unindexed
1608e7c17e43SMark Fasheh 		 * directory tree leaf block.
1609e7c17e43SMark Fasheh 		 */
1610e7c17e43SMark Fasheh 		if (ocfs2_free_list_at_root(lookup)) {
1611e7c17e43SMark Fasheh 			bh = lookup->dl_dx_root_bh;
16120cf2f763SJoel Becker 			retval = ocfs2_journal_access_dr(handle,
16130cf2f763SJoel Becker 						 INODE_CACHE(dir), bh,
1614e7c17e43SMark Fasheh 						 OCFS2_JOURNAL_ACCESS_WRITE);
1615e7c17e43SMark Fasheh 		} else {
1616e7c17e43SMark Fasheh 			bh = lookup->dl_prev_leaf_bh;
16170cf2f763SJoel Becker 			retval = ocfs2_journal_access_db(handle,
16180cf2f763SJoel Becker 						 INODE_CACHE(dir), bh,
1619e7c17e43SMark Fasheh 						 OCFS2_JOURNAL_ACCESS_WRITE);
1620e7c17e43SMark Fasheh 		}
1621e7c17e43SMark Fasheh 		if (retval) {
1622e7c17e43SMark Fasheh 			mlog_errno(retval);
1623e7c17e43SMark Fasheh 			return retval;
1624e7c17e43SMark Fasheh 		}
1625e7c17e43SMark Fasheh 	} else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
16265b6a3a2bSMark Fasheh 		data_start = di->id2.i_data.id_data;
16275b6a3a2bSMark Fasheh 		size = i_size_read(dir);
16285b6a3a2bSMark Fasheh 
16295b6a3a2bSMark Fasheh 		BUG_ON(insert_bh != parent_fe_bh);
16305b6a3a2bSMark Fasheh 	}
16315b6a3a2bSMark Fasheh 
1632316f4b9fSMark Fasheh 	rec_len = OCFS2_DIR_REC_LEN(namelen);
1633316f4b9fSMark Fasheh 	offset = 0;
16345b6a3a2bSMark Fasheh 	de = (struct ocfs2_dir_entry *) data_start;
1635316f4b9fSMark Fasheh 	while (1) {
16365b6a3a2bSMark Fasheh 		BUG_ON((char *)de >= (size + data_start));
16375b6a3a2bSMark Fasheh 
1638316f4b9fSMark Fasheh 		/* These checks should've already been passed by the
1639316f4b9fSMark Fasheh 		 * prepare function, but I guess we can leave them
1640316f4b9fSMark Fasheh 		 * here anyway. */
1641316f4b9fSMark Fasheh 		if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1642316f4b9fSMark Fasheh 			retval = -ENOENT;
1643316f4b9fSMark Fasheh 			goto bail;
1644316f4b9fSMark Fasheh 		}
1645316f4b9fSMark Fasheh 		if (ocfs2_match(namelen, name, de)) {
1646316f4b9fSMark Fasheh 			retval = -EEXIST;
1647316f4b9fSMark Fasheh 			goto bail;
1648316f4b9fSMark Fasheh 		}
16498553cf4fSMark Fasheh 
165087d35a74SMark Fasheh 		/* We're guaranteed that we should have space, so we
165187d35a74SMark Fasheh 		 * can't possibly have hit the trailer...right? */
165287d35a74SMark Fasheh 		mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
165387d35a74SMark Fasheh 				"Hit dir trailer trying to insert %.*s "
165487d35a74SMark Fasheh 			        "(namelen %d) into directory %llu.  "
165587d35a74SMark Fasheh 				"offset is %lu, trailer offset is %d\n",
165687d35a74SMark Fasheh 				namelen, name, namelen,
165787d35a74SMark Fasheh 				(unsigned long long)parent_fe_bh->b_blocknr,
165887d35a74SMark Fasheh 				offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
165987d35a74SMark Fasheh 
16608553cf4fSMark Fasheh 		if (ocfs2_dirent_would_fit(de, rec_len)) {
1661*6861de97SJeff Layton 			dir->i_mtime = inode_set_ctime_current(dir);
1662316f4b9fSMark Fasheh 			retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
1663316f4b9fSMark Fasheh 			if (retval < 0) {
1664316f4b9fSMark Fasheh 				mlog_errno(retval);
1665316f4b9fSMark Fasheh 				goto bail;
1666316f4b9fSMark Fasheh 			}
1667316f4b9fSMark Fasheh 
166813723d00SJoel Becker 			if (insert_bh == parent_fe_bh)
16692e173152SDaeseok Youn 				retval = ocfs2_journal_access_di(handle,
16700cf2f763SJoel Becker 								 INODE_CACHE(dir),
167113723d00SJoel Becker 								 insert_bh,
167213723d00SJoel Becker 								 OCFS2_JOURNAL_ACCESS_WRITE);
16739b7895efSMark Fasheh 			else {
16742e173152SDaeseok Youn 				retval = ocfs2_journal_access_db(handle,
16750cf2f763SJoel Becker 								 INODE_CACHE(dir),
167613723d00SJoel Becker 								 insert_bh,
1677316f4b9fSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
16784ed8a6bbSMark Fasheh 
16792e173152SDaeseok Youn 				if (!retval && ocfs2_dir_indexed(dir))
16802e173152SDaeseok Youn 					retval = ocfs2_dx_dir_insert(dir,
16819b7895efSMark Fasheh 								handle,
16829b7895efSMark Fasheh 								lookup);
16832e173152SDaeseok Youn 			}
16842e173152SDaeseok Youn 
16852e173152SDaeseok Youn 			if (retval) {
16862e173152SDaeseok Youn 				mlog_errno(retval);
16879b7895efSMark Fasheh 				goto bail;
16889b7895efSMark Fasheh 			}
16899b7895efSMark Fasheh 
1690316f4b9fSMark Fasheh 			/* By now the buffer is marked for journaling */
1691316f4b9fSMark Fasheh 			offset += le16_to_cpu(de->rec_len);
1692316f4b9fSMark Fasheh 			if (le64_to_cpu(de->inode)) {
1693316f4b9fSMark Fasheh 				de1 = (struct ocfs2_dir_entry *)((char *) de +
1694316f4b9fSMark Fasheh 					OCFS2_DIR_REC_LEN(de->name_len));
1695316f4b9fSMark Fasheh 				de1->rec_len =
1696316f4b9fSMark Fasheh 					cpu_to_le16(le16_to_cpu(de->rec_len) -
1697316f4b9fSMark Fasheh 					OCFS2_DIR_REC_LEN(de->name_len));
1698316f4b9fSMark Fasheh 				de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1699316f4b9fSMark Fasheh 				de = de1;
1700316f4b9fSMark Fasheh 			}
17019dc2108dSPhillip Potter 			de->file_type = FT_UNKNOWN;
1702316f4b9fSMark Fasheh 			if (blkno) {
1703316f4b9fSMark Fasheh 				de->inode = cpu_to_le64(blkno);
1704316f4b9fSMark Fasheh 				ocfs2_set_de_type(de, inode->i_mode);
1705316f4b9fSMark Fasheh 			} else
1706316f4b9fSMark Fasheh 				de->inode = 0;
1707316f4b9fSMark Fasheh 			de->name_len = namelen;
1708316f4b9fSMark Fasheh 			memcpy(de->name, name, namelen);
1709316f4b9fSMark Fasheh 
1710e7c17e43SMark Fasheh 			if (ocfs2_dir_indexed(dir))
1711e7c17e43SMark Fasheh 				ocfs2_recalc_free_list(dir, handle, lookup);
1712e7c17e43SMark Fasheh 
1713cc56c33eSJeff Layton 			inode_inc_iversion(dir);
1714ec20cec7SJoel Becker 			ocfs2_journal_dirty(handle, insert_bh);
1715316f4b9fSMark Fasheh 			retval = 0;
1716316f4b9fSMark Fasheh 			goto bail;
1717316f4b9fSMark Fasheh 		}
171887d35a74SMark Fasheh 
1719316f4b9fSMark Fasheh 		offset += le16_to_cpu(de->rec_len);
1720316f4b9fSMark Fasheh 		de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1721316f4b9fSMark Fasheh 	}
1722316f4b9fSMark Fasheh 
1723316f4b9fSMark Fasheh 	/* when you think about it, the assert above should prevent us
1724316f4b9fSMark Fasheh 	 * from ever getting here. */
1725316f4b9fSMark Fasheh 	retval = -ENOSPC;
1726316f4b9fSMark Fasheh bail:
1727c1e8d35eSTao Ma 	if (retval)
1728c1e8d35eSTao Ma 		mlog_errno(retval);
1729316f4b9fSMark Fasheh 
1730316f4b9fSMark Fasheh 	return retval;
1731316f4b9fSMark Fasheh }
1732316f4b9fSMark Fasheh 
ocfs2_dir_foreach_blk_id(struct inode * inode,u64 * f_version,struct dir_context * ctx)173323193e51SMark Fasheh static int ocfs2_dir_foreach_blk_id(struct inode *inode,
17342b47c361SMathieu Desnoyers 				    u64 *f_version,
17353704412bSAl Viro 				    struct dir_context *ctx)
173623193e51SMark Fasheh {
17373704412bSAl Viro 	int ret, i;
17383704412bSAl Viro 	unsigned long offset = ctx->pos;
173923193e51SMark Fasheh 	struct buffer_head *di_bh = NULL;
174023193e51SMark Fasheh 	struct ocfs2_dinode *di;
174123193e51SMark Fasheh 	struct ocfs2_inline_data *data;
174223193e51SMark Fasheh 	struct ocfs2_dir_entry *de;
174323193e51SMark Fasheh 
1744b657c95cSJoel Becker 	ret = ocfs2_read_inode_block(inode, &di_bh);
174523193e51SMark Fasheh 	if (ret) {
174623193e51SMark Fasheh 		mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
174723193e51SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
174823193e51SMark Fasheh 		goto out;
174923193e51SMark Fasheh 	}
175023193e51SMark Fasheh 
175123193e51SMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
175223193e51SMark Fasheh 	data = &di->id2.i_data;
175323193e51SMark Fasheh 
17543704412bSAl Viro 	while (ctx->pos < i_size_read(inode)) {
175523193e51SMark Fasheh 		/* If the dir block has changed since the last call to
175623193e51SMark Fasheh 		 * readdir(2), then we might be pointing to an invalid
175723193e51SMark Fasheh 		 * dirent right now.  Scan from the start of the block
175823193e51SMark Fasheh 		 * to make sure. */
1759c472c07bSGoffredo Baroncelli 		if (!inode_eq_iversion(inode, *f_version)) {
176023193e51SMark Fasheh 			for (i = 0; i < i_size_read(inode) && i < offset; ) {
176123193e51SMark Fasheh 				de = (struct ocfs2_dir_entry *)
176223193e51SMark Fasheh 					(data->id_data + i);
176323193e51SMark Fasheh 				/* It's too expensive to do a full
176423193e51SMark Fasheh 				 * dirent test each time round this
176523193e51SMark Fasheh 				 * loop, but we do have to test at
176623193e51SMark Fasheh 				 * least that it is non-zero.  A
176723193e51SMark Fasheh 				 * failure will be detected in the
176823193e51SMark Fasheh 				 * dirent test below. */
176923193e51SMark Fasheh 				if (le16_to_cpu(de->rec_len) <
177023193e51SMark Fasheh 				    OCFS2_DIR_REC_LEN(1))
177123193e51SMark Fasheh 					break;
177223193e51SMark Fasheh 				i += le16_to_cpu(de->rec_len);
177323193e51SMark Fasheh 			}
17743704412bSAl Viro 			ctx->pos = offset = i;
1775cc56c33eSJeff Layton 			*f_version = inode_query_iversion(inode);
177623193e51SMark Fasheh 		}
177723193e51SMark Fasheh 
17783704412bSAl Viro 		de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
17793704412bSAl Viro 		if (!ocfs2_check_dir_entry(inode, de, di_bh, ctx->pos)) {
178023193e51SMark Fasheh 			/* On error, skip the f_pos to the end. */
17813704412bSAl Viro 			ctx->pos = i_size_read(inode);
17823704412bSAl Viro 			break;
178323193e51SMark Fasheh 		}
178423193e51SMark Fasheh 		offset += le16_to_cpu(de->rec_len);
178523193e51SMark Fasheh 		if (le64_to_cpu(de->inode)) {
17863704412bSAl Viro 			if (!dir_emit(ctx, de->name, de->name_len,
17879dc2108dSPhillip Potter 				      le64_to_cpu(de->inode),
17889dc2108dSPhillip Potter 				      fs_ftype_to_dtype(de->file_type)))
17893704412bSAl Viro 				goto out;
1790e7b34019SMark Fasheh 		}
17913704412bSAl Viro 		ctx->pos += le16_to_cpu(de->rec_len);
179223193e51SMark Fasheh 	}
179323193e51SMark Fasheh out:
179423193e51SMark Fasheh 	brelse(di_bh);
179523193e51SMark Fasheh 	return 0;
179623193e51SMark Fasheh }
179723193e51SMark Fasheh 
17989b7895efSMark Fasheh /*
17999b7895efSMark Fasheh  * NOTE: This function can be called against unindexed directories,
18009b7895efSMark Fasheh  * and indexed ones.
18019b7895efSMark Fasheh  */
ocfs2_dir_foreach_blk_el(struct inode * inode,u64 * f_version,struct dir_context * ctx,bool persist)180223193e51SMark Fasheh static int ocfs2_dir_foreach_blk_el(struct inode *inode,
18032b47c361SMathieu Desnoyers 				    u64 *f_version,
18043704412bSAl Viro 				    struct dir_context *ctx,
18053704412bSAl Viro 				    bool persist)
1806ccd979bdSMark Fasheh {
1807aa958874SMark Fasheh 	unsigned long offset, blk, last_ra_blk = 0;
18083704412bSAl Viro 	int i;
1809ccd979bdSMark Fasheh 	struct buffer_head * bh, * tmp;
1810ccd979bdSMark Fasheh 	struct ocfs2_dir_entry * de;
1811ccd979bdSMark Fasheh 	struct super_block * sb = inode->i_sb;
1812aa958874SMark Fasheh 	unsigned int ra_sectors = 16;
18133704412bSAl Viro 	int stored = 0;
1814ccd979bdSMark Fasheh 
1815ccd979bdSMark Fasheh 	bh = NULL;
1816ccd979bdSMark Fasheh 
18173704412bSAl Viro 	offset = ctx->pos & (sb->s_blocksize - 1);
1818ccd979bdSMark Fasheh 
18193704412bSAl Viro 	while (ctx->pos < i_size_read(inode)) {
18203704412bSAl Viro 		blk = ctx->pos >> sb->s_blocksize_bits;
1821a22305ccSJoel Becker 		if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
1822a22305ccSJoel Becker 			/* Skip the corrupt dirblock and keep trying */
18233704412bSAl Viro 			ctx->pos += sb->s_blocksize - offset;
1824ccd979bdSMark Fasheh 			continue;
1825ccd979bdSMark Fasheh 		}
1826ccd979bdSMark Fasheh 
1827aa958874SMark Fasheh 		/* The idea here is to begin with 8k read-ahead and to stay
1828aa958874SMark Fasheh 		 * 4k ahead of our current position.
1829aa958874SMark Fasheh 		 *
1830aa958874SMark Fasheh 		 * TODO: Use the pagecache for this. We just need to
1831aa958874SMark Fasheh 		 * make sure it's cluster-safe... */
1832aa958874SMark Fasheh 		if (!last_ra_blk
1833aa958874SMark Fasheh 		    || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
1834aa958874SMark Fasheh 			for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
1835ccd979bdSMark Fasheh 			     i > 0; i--) {
1836a22305ccSJoel Becker 				tmp = NULL;
1837a22305ccSJoel Becker 				if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
1838a22305ccSJoel Becker 							  OCFS2_BH_READAHEAD))
1839ccd979bdSMark Fasheh 					brelse(tmp);
1840ccd979bdSMark Fasheh 			}
1841aa958874SMark Fasheh 			last_ra_blk = blk;
1842aa958874SMark Fasheh 			ra_sectors = 8;
1843ccd979bdSMark Fasheh 		}
1844ccd979bdSMark Fasheh 
1845ccd979bdSMark Fasheh 		/* If the dir block has changed since the last call to
1846ccd979bdSMark Fasheh 		 * readdir(2), then we might be pointing to an invalid
1847ccd979bdSMark Fasheh 		 * dirent right now.  Scan from the start of the block
1848ccd979bdSMark Fasheh 		 * to make sure. */
1849c472c07bSGoffredo Baroncelli 		if (!inode_eq_iversion(inode, *f_version)) {
1850ccd979bdSMark Fasheh 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
1851ccd979bdSMark Fasheh 				de = (struct ocfs2_dir_entry *) (bh->b_data + i);
1852ccd979bdSMark Fasheh 				/* It's too expensive to do a full
1853ccd979bdSMark Fasheh 				 * dirent test each time round this
1854ccd979bdSMark Fasheh 				 * loop, but we do have to test at
1855ccd979bdSMark Fasheh 				 * least that it is non-zero.  A
1856ccd979bdSMark Fasheh 				 * failure will be detected in the
1857ccd979bdSMark Fasheh 				 * dirent test below. */
1858ccd979bdSMark Fasheh 				if (le16_to_cpu(de->rec_len) <
1859ccd979bdSMark Fasheh 				    OCFS2_DIR_REC_LEN(1))
1860ccd979bdSMark Fasheh 					break;
1861ccd979bdSMark Fasheh 				i += le16_to_cpu(de->rec_len);
1862ccd979bdSMark Fasheh 			}
1863ccd979bdSMark Fasheh 			offset = i;
18643704412bSAl Viro 			ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
1865ccd979bdSMark Fasheh 				| offset;
1866cc56c33eSJeff Layton 			*f_version = inode_query_iversion(inode);
1867ccd979bdSMark Fasheh 		}
1868ccd979bdSMark Fasheh 
18693704412bSAl Viro 		while (ctx->pos < i_size_read(inode)
1870ccd979bdSMark Fasheh 		       && offset < sb->s_blocksize) {
1871ccd979bdSMark Fasheh 			de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
1872ccd979bdSMark Fasheh 			if (!ocfs2_check_dir_entry(inode, de, bh, offset)) {
1873ccd979bdSMark Fasheh 				/* On error, skip the f_pos to the
1874ccd979bdSMark Fasheh 				   next block. */
18753704412bSAl Viro 				ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
187629aa3016SChangwei Ge 				break;
1877ccd979bdSMark Fasheh 			}
1878ccd979bdSMark Fasheh 			if (le64_to_cpu(de->inode)) {
18793704412bSAl Viro 				if (!dir_emit(ctx, de->name,
1880ccd979bdSMark Fasheh 						de->name_len,
18817e853679SMark Fasheh 						le64_to_cpu(de->inode),
18829dc2108dSPhillip Potter 					fs_ftype_to_dtype(de->file_type))) {
18833704412bSAl Viro 					brelse(bh);
18843704412bSAl Viro 					return 0;
1885e7b34019SMark Fasheh 				}
1886ccd979bdSMark Fasheh 				stored++;
1887ccd979bdSMark Fasheh 			}
18883704412bSAl Viro 			offset += le16_to_cpu(de->rec_len);
18893704412bSAl Viro 			ctx->pos += le16_to_cpu(de->rec_len);
1890ccd979bdSMark Fasheh 		}
1891ccd979bdSMark Fasheh 		offset = 0;
1892ccd979bdSMark Fasheh 		brelse(bh);
1893a22305ccSJoel Becker 		bh = NULL;
18943704412bSAl Viro 		if (!persist && stored)
18953704412bSAl Viro 			break;
1896ccd979bdSMark Fasheh 	}
18973704412bSAl Viro 	return 0;
1898b8bc5f4fSMark Fasheh }
1899b8bc5f4fSMark Fasheh 
ocfs2_dir_foreach_blk(struct inode * inode,u64 * f_version,struct dir_context * ctx,bool persist)19002b47c361SMathieu Desnoyers static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version,
19013704412bSAl Viro 				 struct dir_context *ctx,
19023704412bSAl Viro 				 bool persist)
190323193e51SMark Fasheh {
190423193e51SMark Fasheh 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
19053704412bSAl Viro 		return ocfs2_dir_foreach_blk_id(inode, f_version, ctx);
19063704412bSAl Viro 	return ocfs2_dir_foreach_blk_el(inode, f_version, ctx, persist);
190723193e51SMark Fasheh }
190823193e51SMark Fasheh 
1909b8bc5f4fSMark Fasheh /*
19105eae5b96SMark Fasheh  * This is intended to be called from inside other kernel functions,
19115eae5b96SMark Fasheh  * so we fake some arguments.
19125eae5b96SMark Fasheh  */
ocfs2_dir_foreach(struct inode * inode,struct dir_context * ctx)19133704412bSAl Viro int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx)
19145eae5b96SMark Fasheh {
1915cc56c33eSJeff Layton 	u64 version = inode_query_iversion(inode);
19163704412bSAl Viro 	ocfs2_dir_foreach_blk(inode, &version, ctx, true);
19175eae5b96SMark Fasheh 	return 0;
19185eae5b96SMark Fasheh }
19195eae5b96SMark Fasheh 
19205eae5b96SMark Fasheh /*
1921b8bc5f4fSMark Fasheh  * ocfs2_readdir()
1922b8bc5f4fSMark Fasheh  *
1923b8bc5f4fSMark Fasheh  */
ocfs2_readdir(struct file * file,struct dir_context * ctx)19243704412bSAl Viro int ocfs2_readdir(struct file *file, struct dir_context *ctx)
1925b8bc5f4fSMark Fasheh {
1926b8bc5f4fSMark Fasheh 	int error = 0;
19273704412bSAl Viro 	struct inode *inode = file_inode(file);
1928b8bc5f4fSMark Fasheh 	int lock_level = 0;
1929b8bc5f4fSMark Fasheh 
1930f1088d47STao Ma 	trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
1931b8bc5f4fSMark Fasheh 
1932c4c2416aSGang He 	error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
1933b8bc5f4fSMark Fasheh 	if (lock_level && error >= 0) {
1934b8bc5f4fSMark Fasheh 		/* We release EX lock which used to update atime
1935b8bc5f4fSMark Fasheh 		 * and get PR lock again to reduce contention
1936b8bc5f4fSMark Fasheh 		 * on commonly accessed directories. */
1937e63aecb6SMark Fasheh 		ocfs2_inode_unlock(inode, 1);
1938b8bc5f4fSMark Fasheh 		lock_level = 0;
1939e63aecb6SMark Fasheh 		error = ocfs2_inode_lock(inode, NULL, 0);
1940b8bc5f4fSMark Fasheh 	}
1941b8bc5f4fSMark Fasheh 	if (error < 0) {
1942b8bc5f4fSMark Fasheh 		if (error != -ENOENT)
1943b8bc5f4fSMark Fasheh 			mlog_errno(error);
1944b8bc5f4fSMark Fasheh 		/* we haven't got any yet, so propagate the error. */
1945b8bc5f4fSMark Fasheh 		goto bail_nolock;
1946b8bc5f4fSMark Fasheh 	}
1947b8bc5f4fSMark Fasheh 
19483704412bSAl Viro 	error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false);
1949b8bc5f4fSMark Fasheh 
1950e63aecb6SMark Fasheh 	ocfs2_inode_unlock(inode, lock_level);
1951c1e8d35eSTao Ma 	if (error)
1952c1e8d35eSTao Ma 		mlog_errno(error);
1953ccd979bdSMark Fasheh 
1954aa958874SMark Fasheh bail_nolock:
1955ccd979bdSMark Fasheh 
1956b8bc5f4fSMark Fasheh 	return error;
1957ccd979bdSMark Fasheh }
1958ccd979bdSMark Fasheh 
1959ccd979bdSMark Fasheh /*
1960137cebf9Shongnanli  * NOTE: this should always be called with parent dir i_rwsem taken.
1961ccd979bdSMark Fasheh  */
ocfs2_find_files_on_disk(const char * name,int namelen,u64 * blkno,struct inode * inode,struct ocfs2_dir_lookup_result * lookup)1962ccd979bdSMark Fasheh int ocfs2_find_files_on_disk(const char *name,
1963ccd979bdSMark Fasheh 			     int namelen,
1964ccd979bdSMark Fasheh 			     u64 *blkno,
1965ccd979bdSMark Fasheh 			     struct inode *inode,
19664a12ca3aSMark Fasheh 			     struct ocfs2_dir_lookup_result *lookup)
1967ccd979bdSMark Fasheh {
1968ccd979bdSMark Fasheh 	int status = -ENOENT;
1969ccd979bdSMark Fasheh 
1970f1088d47STao Ma 	trace_ocfs2_find_files_on_disk(namelen, name, blkno,
19714a12ca3aSMark Fasheh 				(unsigned long long)OCFS2_I(inode)->ip_blkno);
1972ccd979bdSMark Fasheh 
19734a12ca3aSMark Fasheh 	status = ocfs2_find_entry(name, namelen, inode, lookup);
19744a12ca3aSMark Fasheh 	if (status)
1975ccd979bdSMark Fasheh 		goto leave;
1976ccd979bdSMark Fasheh 
19774a12ca3aSMark Fasheh 	*blkno = le64_to_cpu(lookup->dl_entry->inode);
1978ccd979bdSMark Fasheh 
1979ccd979bdSMark Fasheh 	status = 0;
1980ccd979bdSMark Fasheh leave:
1981ccd979bdSMark Fasheh 
1982ccd979bdSMark Fasheh 	return status;
1983ccd979bdSMark Fasheh }
1984ccd979bdSMark Fasheh 
1985be94d117SMark Fasheh /*
1986be94d117SMark Fasheh  * Convenience function for callers which just want the block number
1987be94d117SMark Fasheh  * mapped to a name and don't require the full dirent info, etc.
1988be94d117SMark Fasheh  */
ocfs2_lookup_ino_from_name(struct inode * dir,const char * name,int namelen,u64 * blkno)1989be94d117SMark Fasheh int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
1990be94d117SMark Fasheh 			       int namelen, u64 *blkno)
1991be94d117SMark Fasheh {
1992be94d117SMark Fasheh 	int ret;
19934a12ca3aSMark Fasheh 	struct ocfs2_dir_lookup_result lookup = { NULL, };
1994be94d117SMark Fasheh 
19954a12ca3aSMark Fasheh 	ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &lookup);
19964a12ca3aSMark Fasheh 	ocfs2_free_dir_lookup_result(&lookup);
1997be94d117SMark Fasheh 
1998be94d117SMark Fasheh 	return ret;
1999be94d117SMark Fasheh }
2000be94d117SMark Fasheh 
2001ccd979bdSMark Fasheh /* Check for a name within a directory.
2002ccd979bdSMark Fasheh  *
2003ccd979bdSMark Fasheh  * Return 0 if the name does not exist
2004ccd979bdSMark Fasheh  * Return -EEXIST if the directory contains the name
2005ccd979bdSMark Fasheh  *
2006137cebf9Shongnanli  * Callers should have i_rwsem + a cluster lock on dir
2007ccd979bdSMark Fasheh  */
ocfs2_check_dir_for_entry(struct inode * dir,const char * name,int namelen)2008ccd979bdSMark Fasheh int ocfs2_check_dir_for_entry(struct inode *dir,
2009ccd979bdSMark Fasheh 			      const char *name,
2010ccd979bdSMark Fasheh 			      int namelen)
2011ccd979bdSMark Fasheh {
20127c01ad8fSDaeseok Youn 	int ret = 0;
20134a12ca3aSMark Fasheh 	struct ocfs2_dir_lookup_result lookup = { NULL, };
2014ccd979bdSMark Fasheh 
2015f1088d47STao Ma 	trace_ocfs2_check_dir_for_entry(
2016b0697053SMark Fasheh 		(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2017ccd979bdSMark Fasheh 
20187c01ad8fSDaeseok Youn 	if (ocfs2_find_entry(name, namelen, dir, &lookup) == 0) {
2019ccd979bdSMark Fasheh 		ret = -EEXIST;
20207c01ad8fSDaeseok Youn 		mlog_errno(ret);
20217c01ad8fSDaeseok Youn 	}
2022ccd979bdSMark Fasheh 
20234a12ca3aSMark Fasheh 	ocfs2_free_dir_lookup_result(&lookup);
2024ccd979bdSMark Fasheh 
2025ccd979bdSMark Fasheh 	return ret;
2026ccd979bdSMark Fasheh }
2027ccd979bdSMark Fasheh 
20280bfbbf62SMark Fasheh struct ocfs2_empty_dir_priv {
20293704412bSAl Viro 	struct dir_context ctx;
20300bfbbf62SMark Fasheh 	unsigned seen_dot;
20310bfbbf62SMark Fasheh 	unsigned seen_dot_dot;
20320bfbbf62SMark Fasheh 	unsigned seen_other;
2033e3a93c2dSMark Fasheh 	unsigned dx_dir;
20340bfbbf62SMark Fasheh };
ocfs2_empty_dir_filldir(struct dir_context * ctx,const char * name,int name_len,loff_t pos,u64 ino,unsigned type)203525885a35SAl Viro static bool ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
2036ac7576f4SMiklos Szeredi 				   int name_len, loff_t pos, u64 ino,
2037ac7576f4SMiklos Szeredi 				   unsigned type)
20380bfbbf62SMark Fasheh {
2039ac7576f4SMiklos Szeredi 	struct ocfs2_empty_dir_priv *p =
2040ac7576f4SMiklos Szeredi 		container_of(ctx, struct ocfs2_empty_dir_priv, ctx);
20410bfbbf62SMark Fasheh 
20420bfbbf62SMark Fasheh 	/*
20430bfbbf62SMark Fasheh 	 * Check the positions of "." and ".." records to be sure
20440bfbbf62SMark Fasheh 	 * they're in the correct place.
2045e3a93c2dSMark Fasheh 	 *
2046e3a93c2dSMark Fasheh 	 * Indexed directories don't need to proceed past the first
2047e3a93c2dSMark Fasheh 	 * two entries, so we end the scan after seeing '..'. Despite
2048e3a93c2dSMark Fasheh 	 * that, we allow the scan to proceed In the event that we
2049e3a93c2dSMark Fasheh 	 * have a corrupted indexed directory (no dot or dot dot
2050e3a93c2dSMark Fasheh 	 * entries). This allows us to double check for existing
2051e3a93c2dSMark Fasheh 	 * entries which might not have been found in the index.
20520bfbbf62SMark Fasheh 	 */
20530bfbbf62SMark Fasheh 	if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
20540bfbbf62SMark Fasheh 		p->seen_dot = 1;
205525885a35SAl Viro 		return true;
20560bfbbf62SMark Fasheh 	}
20570bfbbf62SMark Fasheh 
20580bfbbf62SMark Fasheh 	if (name_len == 2 && !strncmp("..", name, 2) &&
20590bfbbf62SMark Fasheh 	    pos == OCFS2_DIR_REC_LEN(1)) {
20600bfbbf62SMark Fasheh 		p->seen_dot_dot = 1;
2061e3a93c2dSMark Fasheh 
2062e3a93c2dSMark Fasheh 		if (p->dx_dir && p->seen_dot)
206325885a35SAl Viro 			return false;
2064e3a93c2dSMark Fasheh 
206525885a35SAl Viro 		return true;
20660bfbbf62SMark Fasheh 	}
20670bfbbf62SMark Fasheh 
20680bfbbf62SMark Fasheh 	p->seen_other = 1;
206925885a35SAl Viro 	return false;
20700bfbbf62SMark Fasheh }
2071e3a93c2dSMark Fasheh 
ocfs2_empty_dir_dx(struct inode * inode,struct ocfs2_empty_dir_priv * priv)2072e3a93c2dSMark Fasheh static int ocfs2_empty_dir_dx(struct inode *inode,
2073e3a93c2dSMark Fasheh 			      struct ocfs2_empty_dir_priv *priv)
2074e3a93c2dSMark Fasheh {
2075e3a93c2dSMark Fasheh 	int ret;
2076e3a93c2dSMark Fasheh 	struct buffer_head *di_bh = NULL;
2077e3a93c2dSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
2078e3a93c2dSMark Fasheh 	struct ocfs2_dinode *di;
2079e3a93c2dSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
2080e3a93c2dSMark Fasheh 
2081e3a93c2dSMark Fasheh 	priv->dx_dir = 1;
2082e3a93c2dSMark Fasheh 
2083e3a93c2dSMark Fasheh 	ret = ocfs2_read_inode_block(inode, &di_bh);
2084e3a93c2dSMark Fasheh 	if (ret) {
2085e3a93c2dSMark Fasheh 		mlog_errno(ret);
2086e3a93c2dSMark Fasheh 		goto out;
2087e3a93c2dSMark Fasheh 	}
2088e3a93c2dSMark Fasheh 	di = (struct ocfs2_dinode *)di_bh->b_data;
2089e3a93c2dSMark Fasheh 
2090e3a93c2dSMark Fasheh 	ret = ocfs2_read_dx_root(inode, di, &dx_root_bh);
2091e3a93c2dSMark Fasheh 	if (ret) {
2092e3a93c2dSMark Fasheh 		mlog_errno(ret);
2093e3a93c2dSMark Fasheh 		goto out;
2094e3a93c2dSMark Fasheh 	}
2095e3a93c2dSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2096e3a93c2dSMark Fasheh 
2097e3a93c2dSMark Fasheh 	if (le32_to_cpu(dx_root->dr_num_entries) != 2)
2098e3a93c2dSMark Fasheh 		priv->seen_other = 1;
2099e3a93c2dSMark Fasheh 
2100e3a93c2dSMark Fasheh out:
2101e3a93c2dSMark Fasheh 	brelse(di_bh);
2102e3a93c2dSMark Fasheh 	brelse(dx_root_bh);
2103e3a93c2dSMark Fasheh 	return ret;
2104e3a93c2dSMark Fasheh }
2105e3a93c2dSMark Fasheh 
2106ccd979bdSMark Fasheh /*
2107ccd979bdSMark Fasheh  * routine to check that the specified directory is empty (for rmdir)
21080bfbbf62SMark Fasheh  *
21090bfbbf62SMark Fasheh  * Returns 1 if dir is empty, zero otherwise.
21109b7895efSMark Fasheh  *
2111e3a93c2dSMark Fasheh  * XXX: This is a performance problem for unindexed directories.
2112ccd979bdSMark Fasheh  */
ocfs2_empty_dir(struct inode * inode)2113ccd979bdSMark Fasheh int ocfs2_empty_dir(struct inode *inode)
2114ccd979bdSMark Fasheh {
21150bfbbf62SMark Fasheh 	int ret;
21163704412bSAl Viro 	struct ocfs2_empty_dir_priv priv = {
2117d6394b59SJeff Liu 		.ctx.actor = ocfs2_empty_dir_filldir,
21183704412bSAl Viro 	};
2119ccd979bdSMark Fasheh 
2120e3a93c2dSMark Fasheh 	if (ocfs2_dir_indexed(inode)) {
2121e3a93c2dSMark Fasheh 		ret = ocfs2_empty_dir_dx(inode, &priv);
2122e3a93c2dSMark Fasheh 		if (ret)
2123e3a93c2dSMark Fasheh 			mlog_errno(ret);
2124e3a93c2dSMark Fasheh 		/*
2125e3a93c2dSMark Fasheh 		 * We still run ocfs2_dir_foreach to get the checks
2126e3a93c2dSMark Fasheh 		 * for "." and "..".
2127e3a93c2dSMark Fasheh 		 */
2128e3a93c2dSMark Fasheh 	}
2129e3a93c2dSMark Fasheh 
21303704412bSAl Viro 	ret = ocfs2_dir_foreach(inode, &priv.ctx);
21310bfbbf62SMark Fasheh 	if (ret)
21320bfbbf62SMark Fasheh 		mlog_errno(ret);
21330bfbbf62SMark Fasheh 
21340bfbbf62SMark Fasheh 	if (!priv.seen_dot || !priv.seen_dot_dot) {
2135b0697053SMark Fasheh 		mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
2136b0697053SMark Fasheh 		     (unsigned long long)OCFS2_I(inode)->ip_blkno);
21370bfbbf62SMark Fasheh 		/*
21380bfbbf62SMark Fasheh 		 * XXX: Is it really safe to allow an unlink to continue?
21390bfbbf62SMark Fasheh 		 */
2140ccd979bdSMark Fasheh 		return 1;
2141ccd979bdSMark Fasheh 	}
21420bfbbf62SMark Fasheh 
21430bfbbf62SMark Fasheh 	return !priv.seen_other;
2144ccd979bdSMark Fasheh }
2145ccd979bdSMark Fasheh 
214687d35a74SMark Fasheh /*
214787d35a74SMark Fasheh  * Fills "." and ".." dirents in a new directory block. Returns dirent for
214887d35a74SMark Fasheh  * "..", which might be used during creation of a directory with a trailing
214987d35a74SMark Fasheh  * header. It is otherwise safe to ignore the return code.
215087d35a74SMark Fasheh  */
ocfs2_fill_initial_dirents(struct inode * inode,struct inode * parent,char * start,unsigned int size)215187d35a74SMark Fasheh static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
21525b6a3a2bSMark Fasheh 							  struct inode *parent,
215387d35a74SMark Fasheh 							  char *start,
215487d35a74SMark Fasheh 							  unsigned int size)
21555b6a3a2bSMark Fasheh {
21565b6a3a2bSMark Fasheh 	struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
21575b6a3a2bSMark Fasheh 
21585b6a3a2bSMark Fasheh 	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
21595b6a3a2bSMark Fasheh 	de->name_len = 1;
21605b6a3a2bSMark Fasheh 	de->rec_len =
21615b6a3a2bSMark Fasheh 		cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
21625b6a3a2bSMark Fasheh 	strcpy(de->name, ".");
21635b6a3a2bSMark Fasheh 	ocfs2_set_de_type(de, S_IFDIR);
21645b6a3a2bSMark Fasheh 
21655b6a3a2bSMark Fasheh 	de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
21665b6a3a2bSMark Fasheh 	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
21675b6a3a2bSMark Fasheh 	de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
21685b6a3a2bSMark Fasheh 	de->name_len = 2;
21695b6a3a2bSMark Fasheh 	strcpy(de->name, "..");
21705b6a3a2bSMark Fasheh 	ocfs2_set_de_type(de, S_IFDIR);
217187d35a74SMark Fasheh 
217287d35a74SMark Fasheh 	return de;
21735b6a3a2bSMark Fasheh }
21745b6a3a2bSMark Fasheh 
21755b6a3a2bSMark Fasheh /*
21765b6a3a2bSMark Fasheh  * This works together with code in ocfs2_mknod_locked() which sets
21775b6a3a2bSMark Fasheh  * the inline-data flag and initializes the inline-data section.
21785b6a3a2bSMark Fasheh  */
ocfs2_fill_new_dir_id(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * di_bh)21795b6a3a2bSMark Fasheh static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
21805b6a3a2bSMark Fasheh 				 handle_t *handle,
21815b6a3a2bSMark Fasheh 				 struct inode *parent,
21825b6a3a2bSMark Fasheh 				 struct inode *inode,
21835b6a3a2bSMark Fasheh 				 struct buffer_head *di_bh)
21845b6a3a2bSMark Fasheh {
21855b6a3a2bSMark Fasheh 	int ret;
21865b6a3a2bSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
21875b6a3a2bSMark Fasheh 	struct ocfs2_inline_data *data = &di->id2.i_data;
21885b6a3a2bSMark Fasheh 	unsigned int size = le16_to_cpu(data->id_count);
21895b6a3a2bSMark Fasheh 
21900cf2f763SJoel Becker 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
21915b6a3a2bSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
21925b6a3a2bSMark Fasheh 	if (ret) {
21935b6a3a2bSMark Fasheh 		mlog_errno(ret);
21945b6a3a2bSMark Fasheh 		goto out;
21955b6a3a2bSMark Fasheh 	}
21965b6a3a2bSMark Fasheh 
21975b6a3a2bSMark Fasheh 	ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
21985b6a3a2bSMark Fasheh 	ocfs2_journal_dirty(handle, di_bh);
21995b6a3a2bSMark Fasheh 
22005b6a3a2bSMark Fasheh 	i_size_write(inode, size);
2201bfe86848SMiklos Szeredi 	set_nlink(inode, 2);
22025b6a3a2bSMark Fasheh 	inode->i_blocks = ocfs2_inode_sector_count(inode);
22035b6a3a2bSMark Fasheh 
22045b6a3a2bSMark Fasheh 	ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
22055b6a3a2bSMark Fasheh 	if (ret < 0)
22065b6a3a2bSMark Fasheh 		mlog_errno(ret);
22075b6a3a2bSMark Fasheh 
22085b6a3a2bSMark Fasheh out:
22095b6a3a2bSMark Fasheh 	return ret;
22105b6a3a2bSMark Fasheh }
22115b6a3a2bSMark Fasheh 
ocfs2_fill_new_dir_el(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * fe_bh,struct ocfs2_alloc_context * data_ac,struct buffer_head ** ret_new_bh)22125b6a3a2bSMark Fasheh static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2213316f4b9fSMark Fasheh 				 handle_t *handle,
2214316f4b9fSMark Fasheh 				 struct inode *parent,
2215316f4b9fSMark Fasheh 				 struct inode *inode,
2216316f4b9fSMark Fasheh 				 struct buffer_head *fe_bh,
22179b7895efSMark Fasheh 				 struct ocfs2_alloc_context *data_ac,
22189b7895efSMark Fasheh 				 struct buffer_head **ret_new_bh)
2219316f4b9fSMark Fasheh {
2220316f4b9fSMark Fasheh 	int status;
222187d35a74SMark Fasheh 	unsigned int size = osb->sb->s_blocksize;
2222316f4b9fSMark Fasheh 	struct buffer_head *new_bh = NULL;
222387d35a74SMark Fasheh 	struct ocfs2_dir_entry *de;
2224316f4b9fSMark Fasheh 
2225e7c17e43SMark Fasheh 	if (ocfs2_new_dir_wants_trailer(inode))
222687d35a74SMark Fasheh 		size = ocfs2_dir_trailer_blk_off(parent->i_sb);
222787d35a74SMark Fasheh 
2228316f4b9fSMark Fasheh 	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
2229316f4b9fSMark Fasheh 				     data_ac, NULL, &new_bh);
2230316f4b9fSMark Fasheh 	if (status < 0) {
2231316f4b9fSMark Fasheh 		mlog_errno(status);
2232316f4b9fSMark Fasheh 		goto bail;
2233316f4b9fSMark Fasheh 	}
2234316f4b9fSMark Fasheh 
22358cb471e8SJoel Becker 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2236316f4b9fSMark Fasheh 
22370cf2f763SJoel Becker 	status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
2238316f4b9fSMark Fasheh 					 OCFS2_JOURNAL_ACCESS_CREATE);
2239316f4b9fSMark Fasheh 	if (status < 0) {
2240316f4b9fSMark Fasheh 		mlog_errno(status);
2241316f4b9fSMark Fasheh 		goto bail;
2242316f4b9fSMark Fasheh 	}
2243316f4b9fSMark Fasheh 	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
2244316f4b9fSMark Fasheh 
224587d35a74SMark Fasheh 	de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
2246e7c17e43SMark Fasheh 	if (ocfs2_new_dir_wants_trailer(inode)) {
2247e7c17e43SMark Fasheh 		int size = le16_to_cpu(de->rec_len);
2248e7c17e43SMark Fasheh 
2249e7c17e43SMark Fasheh 		/*
2250e7c17e43SMark Fasheh 		 * Figure out the size of the hole left over after
2251e7c17e43SMark Fasheh 		 * insertion of '.' and '..'. The trailer wants this
2252e7c17e43SMark Fasheh 		 * information.
2253e7c17e43SMark Fasheh 		 */
2254e7c17e43SMark Fasheh 		size -= OCFS2_DIR_REC_LEN(2);
2255e7c17e43SMark Fasheh 		size -= sizeof(struct ocfs2_dir_block_trailer);
2256e7c17e43SMark Fasheh 
2257e7c17e43SMark Fasheh 		ocfs2_init_dir_trailer(inode, new_bh, size);
2258e7c17e43SMark Fasheh 	}
2259316f4b9fSMark Fasheh 
2260ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, new_bh);
2261316f4b9fSMark Fasheh 
2262316f4b9fSMark Fasheh 	i_size_write(inode, inode->i_sb->s_blocksize);
2263bfe86848SMiklos Szeredi 	set_nlink(inode, 2);
2264316f4b9fSMark Fasheh 	inode->i_blocks = ocfs2_inode_sector_count(inode);
2265316f4b9fSMark Fasheh 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
2266316f4b9fSMark Fasheh 	if (status < 0) {
2267316f4b9fSMark Fasheh 		mlog_errno(status);
2268316f4b9fSMark Fasheh 		goto bail;
2269316f4b9fSMark Fasheh 	}
2270316f4b9fSMark Fasheh 
2271316f4b9fSMark Fasheh 	status = 0;
22729b7895efSMark Fasheh 	if (ret_new_bh) {
22739b7895efSMark Fasheh 		*ret_new_bh = new_bh;
22749b7895efSMark Fasheh 		new_bh = NULL;
22759b7895efSMark Fasheh 	}
2276316f4b9fSMark Fasheh bail:
2277316f4b9fSMark Fasheh 	brelse(new_bh);
2278316f4b9fSMark Fasheh 
2279316f4b9fSMark Fasheh 	return status;
2280316f4b9fSMark Fasheh }
2281316f4b9fSMark Fasheh 
ocfs2_dx_dir_attach_index(struct ocfs2_super * osb,handle_t * handle,struct inode * dir,struct buffer_head * di_bh,struct buffer_head * dirdata_bh,struct ocfs2_alloc_context * meta_ac,int dx_inline,u32 num_entries,struct buffer_head ** ret_dx_root_bh)22829b7895efSMark Fasheh static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
22839b7895efSMark Fasheh 				     handle_t *handle, struct inode *dir,
22849b7895efSMark Fasheh 				     struct buffer_head *di_bh,
2285e7c17e43SMark Fasheh 				     struct buffer_head *dirdata_bh,
22869b7895efSMark Fasheh 				     struct ocfs2_alloc_context *meta_ac,
2287e3a93c2dSMark Fasheh 				     int dx_inline, u32 num_entries,
22889b7895efSMark Fasheh 				     struct buffer_head **ret_dx_root_bh)
22899b7895efSMark Fasheh {
22909b7895efSMark Fasheh 	int ret;
22919b7895efSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
22929b7895efSMark Fasheh 	u16 dr_suballoc_bit;
22932b6cb576SJoel Becker 	u64 suballoc_loc, dr_blkno;
22949b7895efSMark Fasheh 	unsigned int num_bits;
22959b7895efSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
22969b7895efSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
2297e7c17e43SMark Fasheh 	struct ocfs2_dir_block_trailer *trailer =
2298e7c17e43SMark Fasheh 		ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
22999b7895efSMark Fasheh 
23002b6cb576SJoel Becker 	ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
23012b6cb576SJoel Becker 				   &dr_suballoc_bit, &num_bits, &dr_blkno);
23029b7895efSMark Fasheh 	if (ret) {
23039b7895efSMark Fasheh 		mlog_errno(ret);
23049b7895efSMark Fasheh 		goto out;
23059b7895efSMark Fasheh 	}
23069b7895efSMark Fasheh 
2307f1088d47STao Ma 	trace_ocfs2_dx_dir_attach_index(
23089b7895efSMark Fasheh 				(unsigned long long)OCFS2_I(dir)->ip_blkno,
23099b7895efSMark Fasheh 				(unsigned long long)dr_blkno);
23109b7895efSMark Fasheh 
23119b7895efSMark Fasheh 	dx_root_bh = sb_getblk(osb->sb, dr_blkno);
23129b7895efSMark Fasheh 	if (dx_root_bh == NULL) {
23137391a294SRui Xiang 		ret = -ENOMEM;
23149b7895efSMark Fasheh 		goto out;
23159b7895efSMark Fasheh 	}
23168cb471e8SJoel Becker 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
23179b7895efSMark Fasheh 
23180cf2f763SJoel Becker 	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
23199b7895efSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_CREATE);
23209b7895efSMark Fasheh 	if (ret < 0) {
23219b7895efSMark Fasheh 		mlog_errno(ret);
23229b7895efSMark Fasheh 		goto out;
23239b7895efSMark Fasheh 	}
23249b7895efSMark Fasheh 
23259b7895efSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
23269b7895efSMark Fasheh 	memset(dx_root, 0, osb->sb->s_blocksize);
23279b7895efSMark Fasheh 	strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
2328b89c5428STiger Yang 	dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
23292b6cb576SJoel Becker 	dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
23309b7895efSMark Fasheh 	dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
23319b7895efSMark Fasheh 	dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
23329b7895efSMark Fasheh 	dx_root->dr_blkno = cpu_to_le64(dr_blkno);
23339b7895efSMark Fasheh 	dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
2334e3a93c2dSMark Fasheh 	dx_root->dr_num_entries = cpu_to_le32(num_entries);
2335e7c17e43SMark Fasheh 	if (le16_to_cpu(trailer->db_free_rec_len))
2336e7c17e43SMark Fasheh 		dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
2337e7c17e43SMark Fasheh 	else
2338e7c17e43SMark Fasheh 		dx_root->dr_free_blk = cpu_to_le64(0);
23394ed8a6bbSMark Fasheh 
23404ed8a6bbSMark Fasheh 	if (dx_inline) {
23414ed8a6bbSMark Fasheh 		dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
23424ed8a6bbSMark Fasheh 		dx_root->dr_entries.de_count =
23434ed8a6bbSMark Fasheh 			cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
23444ed8a6bbSMark Fasheh 	} else {
23459b7895efSMark Fasheh 		dx_root->dr_list.l_count =
23469b7895efSMark Fasheh 			cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
23474ed8a6bbSMark Fasheh 	}
2348ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, dx_root_bh);
23499b7895efSMark Fasheh 
23500cf2f763SJoel Becker 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
23519b7895efSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_CREATE);
23529b7895efSMark Fasheh 	if (ret) {
23539b7895efSMark Fasheh 		mlog_errno(ret);
23549b7895efSMark Fasheh 		goto out;
23559b7895efSMark Fasheh 	}
23569b7895efSMark Fasheh 
23579b7895efSMark Fasheh 	di->i_dx_root = cpu_to_le64(dr_blkno);
23589b7895efSMark Fasheh 
23598ac33dc8STao Ma 	spin_lock(&OCFS2_I(dir)->ip_lock);
23609b7895efSMark Fasheh 	OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
23619b7895efSMark Fasheh 	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
23628ac33dc8STao Ma 	spin_unlock(&OCFS2_I(dir)->ip_lock);
23639b7895efSMark Fasheh 
2364ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, di_bh);
23659b7895efSMark Fasheh 
23669b7895efSMark Fasheh 	*ret_dx_root_bh = dx_root_bh;
23679b7895efSMark Fasheh 	dx_root_bh = NULL;
23689b7895efSMark Fasheh 
23699b7895efSMark Fasheh out:
23709b7895efSMark Fasheh 	brelse(dx_root_bh);
23719b7895efSMark Fasheh 	return ret;
23729b7895efSMark Fasheh }
23739b7895efSMark Fasheh 
ocfs2_dx_dir_format_cluster(struct ocfs2_super * osb,handle_t * handle,struct inode * dir,struct buffer_head ** dx_leaves,int num_dx_leaves,u64 start_blk)23749b7895efSMark Fasheh static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
23759b7895efSMark Fasheh 				       handle_t *handle, struct inode *dir,
23769b7895efSMark Fasheh 				       struct buffer_head **dx_leaves,
23779b7895efSMark Fasheh 				       int num_dx_leaves, u64 start_blk)
23789b7895efSMark Fasheh {
23799b7895efSMark Fasheh 	int ret, i;
23809b7895efSMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf;
23819b7895efSMark Fasheh 	struct buffer_head *bh;
23829b7895efSMark Fasheh 
23839b7895efSMark Fasheh 	for (i = 0; i < num_dx_leaves; i++) {
23849b7895efSMark Fasheh 		bh = sb_getblk(osb->sb, start_blk + i);
23859b7895efSMark Fasheh 		if (bh == NULL) {
23867391a294SRui Xiang 			ret = -ENOMEM;
23879b7895efSMark Fasheh 			goto out;
23889b7895efSMark Fasheh 		}
23899b7895efSMark Fasheh 		dx_leaves[i] = bh;
23909b7895efSMark Fasheh 
23918cb471e8SJoel Becker 		ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
23929b7895efSMark Fasheh 
23930cf2f763SJoel Becker 		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
23949b7895efSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_CREATE);
23959b7895efSMark Fasheh 		if (ret < 0) {
23969b7895efSMark Fasheh 			mlog_errno(ret);
23979b7895efSMark Fasheh 			goto out;
23989b7895efSMark Fasheh 		}
23999b7895efSMark Fasheh 
24009b7895efSMark Fasheh 		dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data;
24019b7895efSMark Fasheh 
24029b7895efSMark Fasheh 		memset(dx_leaf, 0, osb->sb->s_blocksize);
24039b7895efSMark Fasheh 		strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE);
24049b7895efSMark Fasheh 		dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation);
24059b7895efSMark Fasheh 		dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr);
24069b7895efSMark Fasheh 		dx_leaf->dl_list.de_count =
24079b7895efSMark Fasheh 			cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
24089b7895efSMark Fasheh 
2409f1088d47STao Ma 		trace_ocfs2_dx_dir_format_cluster(
24109b7895efSMark Fasheh 				(unsigned long long)OCFS2_I(dir)->ip_blkno,
24119b7895efSMark Fasheh 				(unsigned long long)bh->b_blocknr,
24129b7895efSMark Fasheh 				le16_to_cpu(dx_leaf->dl_list.de_count));
24139b7895efSMark Fasheh 
24149b7895efSMark Fasheh 		ocfs2_journal_dirty(handle, bh);
24159b7895efSMark Fasheh 	}
24169b7895efSMark Fasheh 
24179b7895efSMark Fasheh 	ret = 0;
24189b7895efSMark Fasheh out:
24199b7895efSMark Fasheh 	return ret;
24209b7895efSMark Fasheh }
24219b7895efSMark Fasheh 
24229b7895efSMark Fasheh /*
24239b7895efSMark Fasheh  * Allocates and formats a new cluster for use in an indexed dir
24249b7895efSMark Fasheh  * leaf. This version will not do the extent insert, so that it can be
24259b7895efSMark Fasheh  * used by operations which need careful ordering.
24269b7895efSMark Fasheh  */
__ocfs2_dx_dir_new_cluster(struct inode * dir,u32 cpos,handle_t * handle,struct ocfs2_alloc_context * data_ac,struct buffer_head ** dx_leaves,int num_dx_leaves,u64 * ret_phys_blkno)24279b7895efSMark Fasheh static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
24289b7895efSMark Fasheh 				      u32 cpos, handle_t *handle,
24299b7895efSMark Fasheh 				      struct ocfs2_alloc_context *data_ac,
24309b7895efSMark Fasheh 				      struct buffer_head **dx_leaves,
24319b7895efSMark Fasheh 				      int num_dx_leaves, u64 *ret_phys_blkno)
24329b7895efSMark Fasheh {
24339b7895efSMark Fasheh 	int ret;
24349b7895efSMark Fasheh 	u32 phys, num;
24359b7895efSMark Fasheh 	u64 phys_blkno;
24369b7895efSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
24379b7895efSMark Fasheh 
24389b7895efSMark Fasheh 	/*
24399b7895efSMark Fasheh 	 * XXX: For create, this should claim cluster for the index
24409b7895efSMark Fasheh 	 * *before* the unindexed insert so that we have a better
24419b7895efSMark Fasheh 	 * chance of contiguousness as the directory grows in number
24429b7895efSMark Fasheh 	 * of entries.
24439b7895efSMark Fasheh 	 */
24441ed9b777SJoel Becker 	ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
24459b7895efSMark Fasheh 	if (ret) {
24469b7895efSMark Fasheh 		mlog_errno(ret);
24479b7895efSMark Fasheh 		goto out;
24489b7895efSMark Fasheh 	}
24499b7895efSMark Fasheh 
24509b7895efSMark Fasheh 	/*
24519b7895efSMark Fasheh 	 * Format the new cluster first. That way, we're inserting
24529b7895efSMark Fasheh 	 * valid data.
24539b7895efSMark Fasheh 	 */
24549b7895efSMark Fasheh 	phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys);
24559b7895efSMark Fasheh 	ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves,
24569b7895efSMark Fasheh 					  num_dx_leaves, phys_blkno);
24579b7895efSMark Fasheh 	if (ret) {
24589b7895efSMark Fasheh 		mlog_errno(ret);
24599b7895efSMark Fasheh 		goto out;
24609b7895efSMark Fasheh 	}
24619b7895efSMark Fasheh 
24629b7895efSMark Fasheh 	*ret_phys_blkno = phys_blkno;
24639b7895efSMark Fasheh out:
24649b7895efSMark Fasheh 	return ret;
24659b7895efSMark Fasheh }
24669b7895efSMark Fasheh 
ocfs2_dx_dir_new_cluster(struct inode * dir,struct ocfs2_extent_tree * et,u32 cpos,handle_t * handle,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac,struct buffer_head ** dx_leaves,int num_dx_leaves)24679b7895efSMark Fasheh static int ocfs2_dx_dir_new_cluster(struct inode *dir,
24689b7895efSMark Fasheh 				    struct ocfs2_extent_tree *et,
24699b7895efSMark Fasheh 				    u32 cpos, handle_t *handle,
24709b7895efSMark Fasheh 				    struct ocfs2_alloc_context *data_ac,
24719b7895efSMark Fasheh 				    struct ocfs2_alloc_context *meta_ac,
24729b7895efSMark Fasheh 				    struct buffer_head **dx_leaves,
24739b7895efSMark Fasheh 				    int num_dx_leaves)
24749b7895efSMark Fasheh {
24759b7895efSMark Fasheh 	int ret;
24769b7895efSMark Fasheh 	u64 phys_blkno;
24779b7895efSMark Fasheh 
24789b7895efSMark Fasheh 	ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
24799b7895efSMark Fasheh 					 num_dx_leaves, &phys_blkno);
24809b7895efSMark Fasheh 	if (ret) {
24819b7895efSMark Fasheh 		mlog_errno(ret);
24829b7895efSMark Fasheh 		goto out;
24839b7895efSMark Fasheh 	}
24849b7895efSMark Fasheh 
2485cc79d8c1SJoel Becker 	ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
24869b7895efSMark Fasheh 				  meta_ac);
24879b7895efSMark Fasheh 	if (ret)
24889b7895efSMark Fasheh 		mlog_errno(ret);
24899b7895efSMark Fasheh out:
24909b7895efSMark Fasheh 	return ret;
24919b7895efSMark Fasheh }
24929b7895efSMark Fasheh 
ocfs2_dx_dir_kmalloc_leaves(struct super_block * sb,int * ret_num_leaves)24939b7895efSMark Fasheh static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb,
24949b7895efSMark Fasheh 							int *ret_num_leaves)
24959b7895efSMark Fasheh {
24969b7895efSMark Fasheh 	int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1);
24979b7895efSMark Fasheh 	struct buffer_head **dx_leaves;
24989b7895efSMark Fasheh 
24999b7895efSMark Fasheh 	dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *),
25009b7895efSMark Fasheh 			    GFP_NOFS);
25019b7895efSMark Fasheh 	if (dx_leaves && ret_num_leaves)
25029b7895efSMark Fasheh 		*ret_num_leaves = num_dx_leaves;
25039b7895efSMark Fasheh 
25049b7895efSMark Fasheh 	return dx_leaves;
25059b7895efSMark Fasheh }
25069b7895efSMark Fasheh 
ocfs2_fill_new_dir_dx(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * di_bh,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac)25079b7895efSMark Fasheh static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
25089b7895efSMark Fasheh 				 handle_t *handle,
25099b7895efSMark Fasheh 				 struct inode *parent,
25109b7895efSMark Fasheh 				 struct inode *inode,
25119b7895efSMark Fasheh 				 struct buffer_head *di_bh,
25129b7895efSMark Fasheh 				 struct ocfs2_alloc_context *data_ac,
25139b7895efSMark Fasheh 				 struct ocfs2_alloc_context *meta_ac)
25149b7895efSMark Fasheh {
25154ed8a6bbSMark Fasheh 	int ret;
25169b7895efSMark Fasheh 	struct buffer_head *leaf_bh = NULL;
25179b7895efSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
25189b7895efSMark Fasheh 	struct ocfs2_dx_hinfo hinfo;
25194ed8a6bbSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
25204ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry_list *entry_list;
25219b7895efSMark Fasheh 
25229b7895efSMark Fasheh 	/*
25239b7895efSMark Fasheh 	 * Our strategy is to create the directory as though it were
25249b7895efSMark Fasheh 	 * unindexed, then add the index block. This works with very
25259b7895efSMark Fasheh 	 * little complication since the state of a new directory is a
25269b7895efSMark Fasheh 	 * very well known quantity.
25279b7895efSMark Fasheh 	 *
25289b7895efSMark Fasheh 	 * Essentially, we have two dirents ("." and ".."), in the 1st
25294ed8a6bbSMark Fasheh 	 * block which need indexing. These are easily inserted into
25304ed8a6bbSMark Fasheh 	 * the index block.
25319b7895efSMark Fasheh 	 */
25329b7895efSMark Fasheh 
25339b7895efSMark Fasheh 	ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
25349b7895efSMark Fasheh 				    data_ac, &leaf_bh);
25359b7895efSMark Fasheh 	if (ret) {
25369b7895efSMark Fasheh 		mlog_errno(ret);
25379b7895efSMark Fasheh 		goto out;
25389b7895efSMark Fasheh 	}
25399b7895efSMark Fasheh 
2540e7c17e43SMark Fasheh 	ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh,
2541e3a93c2dSMark Fasheh 					meta_ac, 1, 2, &dx_root_bh);
25429b7895efSMark Fasheh 	if (ret) {
25439b7895efSMark Fasheh 		mlog_errno(ret);
25449b7895efSMark Fasheh 		goto out;
25459b7895efSMark Fasheh 	}
25464ed8a6bbSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
25474ed8a6bbSMark Fasheh 	entry_list = &dx_root->dr_entries;
25489b7895efSMark Fasheh 
25494ed8a6bbSMark Fasheh 	/* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
2550e7c17e43SMark Fasheh 	ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo);
25514ed8a6bbSMark Fasheh 	ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
25529b7895efSMark Fasheh 
25539b7895efSMark Fasheh 	ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
25544ed8a6bbSMark Fasheh 	ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
25559b7895efSMark Fasheh 
25569b7895efSMark Fasheh out:
25579b7895efSMark Fasheh 	brelse(dx_root_bh);
25589b7895efSMark Fasheh 	brelse(leaf_bh);
25599b7895efSMark Fasheh 	return ret;
25609b7895efSMark Fasheh }
25619b7895efSMark Fasheh 
ocfs2_fill_new_dir(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * fe_bh,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac)25625b6a3a2bSMark Fasheh int ocfs2_fill_new_dir(struct ocfs2_super *osb,
25635b6a3a2bSMark Fasheh 		       handle_t *handle,
25645b6a3a2bSMark Fasheh 		       struct inode *parent,
25655b6a3a2bSMark Fasheh 		       struct inode *inode,
25665b6a3a2bSMark Fasheh 		       struct buffer_head *fe_bh,
25679b7895efSMark Fasheh 		       struct ocfs2_alloc_context *data_ac,
25689b7895efSMark Fasheh 		       struct ocfs2_alloc_context *meta_ac)
25699b7895efSMark Fasheh 
25705b6a3a2bSMark Fasheh {
25715b6a3a2bSMark Fasheh 	BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
25725b6a3a2bSMark Fasheh 
25735b6a3a2bSMark Fasheh 	if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
25745b6a3a2bSMark Fasheh 		return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
25755b6a3a2bSMark Fasheh 
25769b7895efSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb))
25779b7895efSMark Fasheh 		return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh,
25789b7895efSMark Fasheh 					     data_ac, meta_ac);
25799b7895efSMark Fasheh 
25805b6a3a2bSMark Fasheh 	return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
25819b7895efSMark Fasheh 				     data_ac, NULL);
25829b7895efSMark Fasheh }
25839b7895efSMark Fasheh 
ocfs2_dx_dir_index_block(struct inode * dir,handle_t * handle,struct buffer_head ** dx_leaves,int num_dx_leaves,u32 * num_dx_entries,struct buffer_head * dirent_bh)25849b7895efSMark Fasheh static int ocfs2_dx_dir_index_block(struct inode *dir,
25859b7895efSMark Fasheh 				    handle_t *handle,
25869b7895efSMark Fasheh 				    struct buffer_head **dx_leaves,
25879b7895efSMark Fasheh 				    int num_dx_leaves,
2588e3a93c2dSMark Fasheh 				    u32 *num_dx_entries,
25899b7895efSMark Fasheh 				    struct buffer_head *dirent_bh)
25909b7895efSMark Fasheh {
25910fba8137STao Ma 	int ret = 0, namelen, i;
25929b7895efSMark Fasheh 	char *de_buf, *limit;
25939b7895efSMark Fasheh 	struct ocfs2_dir_entry *de;
25949b7895efSMark Fasheh 	struct buffer_head *dx_leaf_bh;
25959b7895efSMark Fasheh 	struct ocfs2_dx_hinfo hinfo;
25969b7895efSMark Fasheh 	u64 dirent_blk = dirent_bh->b_blocknr;
25979b7895efSMark Fasheh 
25989b7895efSMark Fasheh 	de_buf = dirent_bh->b_data;
25999b7895efSMark Fasheh 	limit = de_buf + dir->i_sb->s_blocksize;
26009b7895efSMark Fasheh 
26019b7895efSMark Fasheh 	while (de_buf < limit) {
26029b7895efSMark Fasheh 		de = (struct ocfs2_dir_entry *)de_buf;
26039b7895efSMark Fasheh 
26049b7895efSMark Fasheh 		namelen = de->name_len;
26059b7895efSMark Fasheh 		if (!namelen || !de->inode)
26069b7895efSMark Fasheh 			goto inc;
26079b7895efSMark Fasheh 
26089b7895efSMark Fasheh 		ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo);
26099b7895efSMark Fasheh 
26109b7895efSMark Fasheh 		i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo);
26119b7895efSMark Fasheh 		dx_leaf_bh = dx_leaves[i];
26129b7895efSMark Fasheh 
26139b7895efSMark Fasheh 		ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo,
26149b7895efSMark Fasheh 						 dirent_blk, dx_leaf_bh);
26159b7895efSMark Fasheh 		if (ret) {
26169b7895efSMark Fasheh 			mlog_errno(ret);
26179b7895efSMark Fasheh 			goto out;
26189b7895efSMark Fasheh 		}
26199b7895efSMark Fasheh 
2620e3a93c2dSMark Fasheh 		*num_dx_entries = *num_dx_entries + 1;
2621e3a93c2dSMark Fasheh 
26229b7895efSMark Fasheh inc:
26239b7895efSMark Fasheh 		de_buf += le16_to_cpu(de->rec_len);
26249b7895efSMark Fasheh 	}
26259b7895efSMark Fasheh 
26269b7895efSMark Fasheh out:
26279b7895efSMark Fasheh 	return ret;
26285b6a3a2bSMark Fasheh }
2629e7c17e43SMark Fasheh 
26304ed8a6bbSMark Fasheh /*
26314ed8a6bbSMark Fasheh  * XXX: This expects dx_root_bh to already be part of the transaction.
26324ed8a6bbSMark Fasheh  */
ocfs2_dx_dir_index_root_block(struct inode * dir,struct buffer_head * dx_root_bh,struct buffer_head * dirent_bh)26334ed8a6bbSMark Fasheh static void ocfs2_dx_dir_index_root_block(struct inode *dir,
26344ed8a6bbSMark Fasheh 					 struct buffer_head *dx_root_bh,
26354ed8a6bbSMark Fasheh 					 struct buffer_head *dirent_bh)
26364ed8a6bbSMark Fasheh {
26374ed8a6bbSMark Fasheh 	char *de_buf, *limit;
26384ed8a6bbSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
26394ed8a6bbSMark Fasheh 	struct ocfs2_dir_entry *de;
26404ed8a6bbSMark Fasheh 	struct ocfs2_dx_hinfo hinfo;
26414ed8a6bbSMark Fasheh 	u64 dirent_blk = dirent_bh->b_blocknr;
26424ed8a6bbSMark Fasheh 
26434ed8a6bbSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
26444ed8a6bbSMark Fasheh 
26454ed8a6bbSMark Fasheh 	de_buf = dirent_bh->b_data;
26464ed8a6bbSMark Fasheh 	limit = de_buf + dir->i_sb->s_blocksize;
26474ed8a6bbSMark Fasheh 
26484ed8a6bbSMark Fasheh 	while (de_buf < limit) {
26494ed8a6bbSMark Fasheh 		de = (struct ocfs2_dir_entry *)de_buf;
26504ed8a6bbSMark Fasheh 
26514ed8a6bbSMark Fasheh 		if (!de->name_len || !de->inode)
26524ed8a6bbSMark Fasheh 			goto inc;
26534ed8a6bbSMark Fasheh 
26544ed8a6bbSMark Fasheh 		ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
26554ed8a6bbSMark Fasheh 
2656f1088d47STao Ma 		trace_ocfs2_dx_dir_index_root_block(
2657f1088d47STao Ma 				(unsigned long long)dir->i_ino,
2658f1088d47STao Ma 				hinfo.major_hash, hinfo.minor_hash,
2659f1088d47STao Ma 				de->name_len, de->name,
2660f1088d47STao Ma 				le16_to_cpu(dx_root->dr_entries.de_num_used));
26614ed8a6bbSMark Fasheh 
26624ed8a6bbSMark Fasheh 		ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
26634ed8a6bbSMark Fasheh 					   dirent_blk);
2664e3a93c2dSMark Fasheh 
2665e3a93c2dSMark Fasheh 		le32_add_cpu(&dx_root->dr_num_entries, 1);
26664ed8a6bbSMark Fasheh inc:
26674ed8a6bbSMark Fasheh 		de_buf += le16_to_cpu(de->rec_len);
26684ed8a6bbSMark Fasheh 	}
26694ed8a6bbSMark Fasheh }
26704ed8a6bbSMark Fasheh 
26714ed8a6bbSMark Fasheh /*
26724ed8a6bbSMark Fasheh  * Count the number of inline directory entries in di_bh and compare
26734ed8a6bbSMark Fasheh  * them against the number of entries we can hold in an inline dx root
26744ed8a6bbSMark Fasheh  * block.
26754ed8a6bbSMark Fasheh  */
ocfs2_new_dx_should_be_inline(struct inode * dir,struct buffer_head * di_bh)26764ed8a6bbSMark Fasheh static int ocfs2_new_dx_should_be_inline(struct inode *dir,
26774ed8a6bbSMark Fasheh 					 struct buffer_head *di_bh)
26784ed8a6bbSMark Fasheh {
26794ed8a6bbSMark Fasheh 	int dirent_count = 0;
26804ed8a6bbSMark Fasheh 	char *de_buf, *limit;
26814ed8a6bbSMark Fasheh 	struct ocfs2_dir_entry *de;
26824ed8a6bbSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
26834ed8a6bbSMark Fasheh 
26844ed8a6bbSMark Fasheh 	de_buf = di->id2.i_data.id_data;
26854ed8a6bbSMark Fasheh 	limit = de_buf + i_size_read(dir);
26864ed8a6bbSMark Fasheh 
26874ed8a6bbSMark Fasheh 	while (de_buf < limit) {
26884ed8a6bbSMark Fasheh 		de = (struct ocfs2_dir_entry *)de_buf;
26894ed8a6bbSMark Fasheh 
26904ed8a6bbSMark Fasheh 		if (de->name_len && de->inode)
26914ed8a6bbSMark Fasheh 			dirent_count++;
26924ed8a6bbSMark Fasheh 
26934ed8a6bbSMark Fasheh 		de_buf += le16_to_cpu(de->rec_len);
26944ed8a6bbSMark Fasheh 	}
26954ed8a6bbSMark Fasheh 
26964ed8a6bbSMark Fasheh 	/* We are careful to leave room for one extra record. */
26974ed8a6bbSMark Fasheh 	return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
26984ed8a6bbSMark Fasheh }
26995b6a3a2bSMark Fasheh 
270087d35a74SMark Fasheh /*
270187d35a74SMark Fasheh  * Expand rec_len of the rightmost dirent in a directory block so that it
270287d35a74SMark Fasheh  * contains the end of our valid space for dirents. We do this during
270387d35a74SMark Fasheh  * expansion from an inline directory to one with extents. The first dir block
270487d35a74SMark Fasheh  * in that case is taken from the inline data portion of the inode block.
270587d35a74SMark Fasheh  *
2706e7c17e43SMark Fasheh  * This will also return the largest amount of contiguous space for a dirent
2707e7c17e43SMark Fasheh  * in the block. That value is *not* necessarily the last dirent, even after
2708e7c17e43SMark Fasheh  * expansion. The directory indexing code wants this value for free space
2709e7c17e43SMark Fasheh  * accounting. We do this here since we're already walking the entire dir
2710e7c17e43SMark Fasheh  * block.
2711e7c17e43SMark Fasheh  *
271287d35a74SMark Fasheh  * We add the dir trailer if this filesystem wants it.
271387d35a74SMark Fasheh  */
ocfs2_expand_last_dirent(char * start,unsigned int old_size,struct inode * dir)2714e7c17e43SMark Fasheh static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size,
2715e7c17e43SMark Fasheh 					     struct inode *dir)
27165b6a3a2bSMark Fasheh {
2717e7c17e43SMark Fasheh 	struct super_block *sb = dir->i_sb;
27185b6a3a2bSMark Fasheh 	struct ocfs2_dir_entry *de;
27195b6a3a2bSMark Fasheh 	struct ocfs2_dir_entry *prev_de;
27205b6a3a2bSMark Fasheh 	char *de_buf, *limit;
272187d35a74SMark Fasheh 	unsigned int new_size = sb->s_blocksize;
2722e7c17e43SMark Fasheh 	unsigned int bytes, this_hole;
2723e7c17e43SMark Fasheh 	unsigned int largest_hole = 0;
272487d35a74SMark Fasheh 
2725e7c17e43SMark Fasheh 	if (ocfs2_new_dir_wants_trailer(dir))
272687d35a74SMark Fasheh 		new_size = ocfs2_dir_trailer_blk_off(sb);
272787d35a74SMark Fasheh 
272887d35a74SMark Fasheh 	bytes = new_size - old_size;
27295b6a3a2bSMark Fasheh 
27305b6a3a2bSMark Fasheh 	limit = start + old_size;
27315b6a3a2bSMark Fasheh 	de_buf = start;
27325b6a3a2bSMark Fasheh 	de = (struct ocfs2_dir_entry *)de_buf;
27335b6a3a2bSMark Fasheh 	do {
2734e7c17e43SMark Fasheh 		this_hole = ocfs2_figure_dirent_hole(de);
2735e7c17e43SMark Fasheh 		if (this_hole > largest_hole)
2736e7c17e43SMark Fasheh 			largest_hole = this_hole;
2737e7c17e43SMark Fasheh 
27385b6a3a2bSMark Fasheh 		prev_de = de;
27395b6a3a2bSMark Fasheh 		de_buf += le16_to_cpu(de->rec_len);
27405b6a3a2bSMark Fasheh 		de = (struct ocfs2_dir_entry *)de_buf;
27415b6a3a2bSMark Fasheh 	} while (de_buf < limit);
27425b6a3a2bSMark Fasheh 
27435b6a3a2bSMark Fasheh 	le16_add_cpu(&prev_de->rec_len, bytes);
2744e7c17e43SMark Fasheh 
2745e7c17e43SMark Fasheh 	/* We need to double check this after modification of the final
2746e7c17e43SMark Fasheh 	 * dirent. */
2747e7c17e43SMark Fasheh 	this_hole = ocfs2_figure_dirent_hole(prev_de);
2748e7c17e43SMark Fasheh 	if (this_hole > largest_hole)
2749e7c17e43SMark Fasheh 		largest_hole = this_hole;
2750e7c17e43SMark Fasheh 
2751e7c17e43SMark Fasheh 	if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
2752e7c17e43SMark Fasheh 		return largest_hole;
2753e7c17e43SMark Fasheh 	return 0;
27545b6a3a2bSMark Fasheh }
27555b6a3a2bSMark Fasheh 
27565b6a3a2bSMark Fasheh /*
27575b6a3a2bSMark Fasheh  * We allocate enough clusters to fulfill "blocks_wanted", but set
27585b6a3a2bSMark Fasheh  * i_size to exactly one block. Ocfs2_extend_dir() will handle the
27595b6a3a2bSMark Fasheh  * rest automatically for us.
27605b6a3a2bSMark Fasheh  *
27615b6a3a2bSMark Fasheh  * *first_block_bh is a pointer to the 1st data block allocated to the
27625b6a3a2bSMark Fasheh  *  directory.
27635b6a3a2bSMark Fasheh  */
ocfs2_expand_inline_dir(struct inode * dir,struct buffer_head * di_bh,unsigned int blocks_wanted,struct ocfs2_dir_lookup_result * lookup,struct buffer_head ** first_block_bh)27645b6a3a2bSMark Fasheh static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
27655b6a3a2bSMark Fasheh 				   unsigned int blocks_wanted,
27669b7895efSMark Fasheh 				   struct ocfs2_dir_lookup_result *lookup,
27675b6a3a2bSMark Fasheh 				   struct buffer_head **first_block_bh)
27685b6a3a2bSMark Fasheh {
2769e3a93c2dSMark Fasheh 	u32 alloc, dx_alloc, bit_off, len, num_dx_entries = 0;
27705b6a3a2bSMark Fasheh 	struct super_block *sb = dir->i_sb;
27714ed8a6bbSMark Fasheh 	int ret, i, num_dx_leaves = 0, dx_inline = 0,
27729b7895efSMark Fasheh 		credits = ocfs2_inline_to_extents_credits(sb);
27739b7895efSMark Fasheh 	u64 dx_insert_blkno, blkno,
27749b7895efSMark Fasheh 		bytes = blocks_wanted << sb->s_blocksize_bits;
27755b6a3a2bSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
27765b6a3a2bSMark Fasheh 	struct ocfs2_inode_info *oi = OCFS2_I(dir);
27775d44670fSMarcus Meissner 	struct ocfs2_alloc_context *data_ac = NULL;
27789b7895efSMark Fasheh 	struct ocfs2_alloc_context *meta_ac = NULL;
27795b6a3a2bSMark Fasheh 	struct buffer_head *dirdata_bh = NULL;
27809b7895efSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
27819b7895efSMark Fasheh 	struct buffer_head **dx_leaves = NULL;
27825b6a3a2bSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
27835b6a3a2bSMark Fasheh 	handle_t *handle;
2784f99b9b7cSJoel Becker 	struct ocfs2_extent_tree et;
27859b7895efSMark Fasheh 	struct ocfs2_extent_tree dx_et;
27869b7895efSMark Fasheh 	int did_quota = 0, bytes_allocated = 0;
2787f99b9b7cSJoel Becker 
27885e404e9eSJoel Becker 	ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
27895b6a3a2bSMark Fasheh 
27905b6a3a2bSMark Fasheh 	alloc = ocfs2_clusters_for_bytes(sb, bytes);
27919b7895efSMark Fasheh 	dx_alloc = 0;
27929b7895efSMark Fasheh 
2793edd45c08SJan Kara 	down_write(&oi->ip_alloc_sem);
2794edd45c08SJan Kara 
27959b7895efSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb)) {
27969b7895efSMark Fasheh 		credits += ocfs2_add_dir_index_credits(sb);
27979b7895efSMark Fasheh 
27984ed8a6bbSMark Fasheh 		dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
27994ed8a6bbSMark Fasheh 		if (!dx_inline) {
28004ed8a6bbSMark Fasheh 			/* Add one more cluster for an index leaf */
28014ed8a6bbSMark Fasheh 			dx_alloc++;
28024ed8a6bbSMark Fasheh 			dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
28034ed8a6bbSMark Fasheh 								&num_dx_leaves);
28049b7895efSMark Fasheh 			if (!dx_leaves) {
28059b7895efSMark Fasheh 				ret = -ENOMEM;
28069b7895efSMark Fasheh 				mlog_errno(ret);
28079b7895efSMark Fasheh 				goto out;
28089b7895efSMark Fasheh 			}
28094ed8a6bbSMark Fasheh 		}
28109b7895efSMark Fasheh 
28119b7895efSMark Fasheh 		/* This gets us the dx_root */
28129b7895efSMark Fasheh 		ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
28139b7895efSMark Fasheh 		if (ret) {
28149b7895efSMark Fasheh 			mlog_errno(ret);
28159b7895efSMark Fasheh 			goto out;
28169b7895efSMark Fasheh 		}
28179b7895efSMark Fasheh 	}
28185b6a3a2bSMark Fasheh 
28195b6a3a2bSMark Fasheh 	/*
28209b7895efSMark Fasheh 	 * We should never need more than 2 clusters for the unindexed
28219b7895efSMark Fasheh 	 * tree - maximum dirent size is far less than one block. In
28229b7895efSMark Fasheh 	 * fact, the only time we'd need more than one cluster is if
28235b6a3a2bSMark Fasheh 	 * blocksize == clustersize and the dirent won't fit in the
28245b6a3a2bSMark Fasheh 	 * extra space that the expansion to a single block gives. As
28255b6a3a2bSMark Fasheh 	 * of today, that only happens on 4k/4k file systems.
28265b6a3a2bSMark Fasheh 	 */
28275b6a3a2bSMark Fasheh 	BUG_ON(alloc > 2);
28285b6a3a2bSMark Fasheh 
2829035a5711STao Ma 	ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac);
28305b6a3a2bSMark Fasheh 	if (ret) {
28315b6a3a2bSMark Fasheh 		mlog_errno(ret);
28325b6a3a2bSMark Fasheh 		goto out;
28335b6a3a2bSMark Fasheh 	}
28345b6a3a2bSMark Fasheh 
28355b6a3a2bSMark Fasheh 	/*
2836c78bad11SJoe Perches 	 * Prepare for worst case allocation scenario of two separate
28379b7895efSMark Fasheh 	 * extents in the unindexed tree.
28385b6a3a2bSMark Fasheh 	 */
28395b6a3a2bSMark Fasheh 	if (alloc == 2)
28405b6a3a2bSMark Fasheh 		credits += OCFS2_SUBALLOC_ALLOC;
28415b6a3a2bSMark Fasheh 
28425b6a3a2bSMark Fasheh 	handle = ocfs2_start_trans(osb, credits);
28435b6a3a2bSMark Fasheh 	if (IS_ERR(handle)) {
28445b6a3a2bSMark Fasheh 		ret = PTR_ERR(handle);
28455b6a3a2bSMark Fasheh 		mlog_errno(ret);
2846edd45c08SJan Kara 		goto out;
28475b6a3a2bSMark Fasheh 	}
28485b6a3a2bSMark Fasheh 
28495dd4056dSChristoph Hellwig 	ret = dquot_alloc_space_nodirty(dir,
28505dd4056dSChristoph Hellwig 		ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
28515dd4056dSChristoph Hellwig 	if (ret)
2852a90714c1SJan Kara 		goto out_commit;
2853a90714c1SJan Kara 	did_quota = 1;
28549b7895efSMark Fasheh 
28554ed8a6bbSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
28569b7895efSMark Fasheh 		/*
28579b7895efSMark Fasheh 		 * Allocate our index cluster first, to maximize the
28589b7895efSMark Fasheh 		 * possibility that unindexed leaves grow
28599b7895efSMark Fasheh 		 * contiguously.
28609b7895efSMark Fasheh 		 */
28619b7895efSMark Fasheh 		ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac,
28629b7895efSMark Fasheh 						 dx_leaves, num_dx_leaves,
28639b7895efSMark Fasheh 						 &dx_insert_blkno);
28649b7895efSMark Fasheh 		if (ret) {
28659b7895efSMark Fasheh 			mlog_errno(ret);
28669b7895efSMark Fasheh 			goto out_commit;
28679b7895efSMark Fasheh 		}
28689b7895efSMark Fasheh 		bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
28699b7895efSMark Fasheh 	}
28709b7895efSMark Fasheh 
28715b6a3a2bSMark Fasheh 	/*
28725b6a3a2bSMark Fasheh 	 * Try to claim as many clusters as the bitmap can give though
28735b6a3a2bSMark Fasheh 	 * if we only get one now, that's enough to continue. The rest
28745b6a3a2bSMark Fasheh 	 * will be claimed after the conversion to extents.
28755b6a3a2bSMark Fasheh 	 */
287683f92318SMark Fasheh 	if (ocfs2_dir_resv_allowed(osb))
2877e3b4a97dSMark Fasheh 		data_ac->ac_resv = &oi->ip_la_data_resv;
28781ed9b777SJoel Becker 	ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
28795b6a3a2bSMark Fasheh 	if (ret) {
28805b6a3a2bSMark Fasheh 		mlog_errno(ret);
28815b6a3a2bSMark Fasheh 		goto out_commit;
28825b6a3a2bSMark Fasheh 	}
28839b7895efSMark Fasheh 	bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
28845b6a3a2bSMark Fasheh 
28855b6a3a2bSMark Fasheh 	/*
28865b6a3a2bSMark Fasheh 	 * Operations are carefully ordered so that we set up the new
28875b6a3a2bSMark Fasheh 	 * data block first. The conversion from inline data to
28885b6a3a2bSMark Fasheh 	 * extents follows.
28895b6a3a2bSMark Fasheh 	 */
28905b6a3a2bSMark Fasheh 	blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
28915b6a3a2bSMark Fasheh 	dirdata_bh = sb_getblk(sb, blkno);
28925b6a3a2bSMark Fasheh 	if (!dirdata_bh) {
28937391a294SRui Xiang 		ret = -ENOMEM;
28945b6a3a2bSMark Fasheh 		mlog_errno(ret);
28955b6a3a2bSMark Fasheh 		goto out_commit;
28965b6a3a2bSMark Fasheh 	}
28975b6a3a2bSMark Fasheh 
28988cb471e8SJoel Becker 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
28995b6a3a2bSMark Fasheh 
29000cf2f763SJoel Becker 	ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
29015b6a3a2bSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_CREATE);
29025b6a3a2bSMark Fasheh 	if (ret) {
29035b6a3a2bSMark Fasheh 		mlog_errno(ret);
29045b6a3a2bSMark Fasheh 		goto out_commit;
29055b6a3a2bSMark Fasheh 	}
29065b6a3a2bSMark Fasheh 
29075b6a3a2bSMark Fasheh 	memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
29085b6a3a2bSMark Fasheh 	memset(dirdata_bh->b_data + i_size_read(dir), 0,
29095b6a3a2bSMark Fasheh 	       sb->s_blocksize - i_size_read(dir));
2910e7c17e43SMark Fasheh 	i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir);
2911e7c17e43SMark Fasheh 	if (ocfs2_new_dir_wants_trailer(dir)) {
2912e7c17e43SMark Fasheh 		/*
2913e7c17e43SMark Fasheh 		 * Prepare the dir trailer up front. It will otherwise look
2914e7c17e43SMark Fasheh 		 * like a valid dirent. Even if inserting the index fails
2915e7c17e43SMark Fasheh 		 * (unlikely), then all we'll have done is given first dir
2916e7c17e43SMark Fasheh 		 * block a small amount of fragmentation.
2917e7c17e43SMark Fasheh 		 */
2918e7c17e43SMark Fasheh 		ocfs2_init_dir_trailer(dir, dirdata_bh, i);
2919e7c17e43SMark Fasheh 	}
29205b6a3a2bSMark Fasheh 
29212931cdcbSDarrick J. Wong 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
2922ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, dirdata_bh);
29235b6a3a2bSMark Fasheh 
29244ed8a6bbSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
29254ed8a6bbSMark Fasheh 		/*
29264ed8a6bbSMark Fasheh 		 * Dx dirs with an external cluster need to do this up
29274ed8a6bbSMark Fasheh 		 * front. Inline dx root's get handled later, after
2928e3a93c2dSMark Fasheh 		 * we've allocated our root block. We get passed back
2929e3a93c2dSMark Fasheh 		 * a total number of items so that dr_num_entries can
2930e3a93c2dSMark Fasheh 		 * be correctly set once the dx_root has been
2931e3a93c2dSMark Fasheh 		 * allocated.
29324ed8a6bbSMark Fasheh 		 */
29339b7895efSMark Fasheh 		ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
2934e3a93c2dSMark Fasheh 					       num_dx_leaves, &num_dx_entries,
2935e3a93c2dSMark Fasheh 					       dirdata_bh);
29369b7895efSMark Fasheh 		if (ret) {
29379b7895efSMark Fasheh 			mlog_errno(ret);
29389b7895efSMark Fasheh 			goto out_commit;
29399b7895efSMark Fasheh 		}
29409b7895efSMark Fasheh 	}
29419b7895efSMark Fasheh 
29425b6a3a2bSMark Fasheh 	/*
29435b6a3a2bSMark Fasheh 	 * Set extent, i_size, etc on the directory. After this, the
29445b6a3a2bSMark Fasheh 	 * inode should contain the same exact dirents as before and
29455b6a3a2bSMark Fasheh 	 * be fully accessible from system calls.
29465b6a3a2bSMark Fasheh 	 *
29475b6a3a2bSMark Fasheh 	 * We let the later dirent insert modify c/mtime - to the user
29485b6a3a2bSMark Fasheh 	 * the data hasn't changed.
29495b6a3a2bSMark Fasheh 	 */
29500cf2f763SJoel Becker 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
29515b6a3a2bSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_CREATE);
29525b6a3a2bSMark Fasheh 	if (ret) {
29535b6a3a2bSMark Fasheh 		mlog_errno(ret);
29545b6a3a2bSMark Fasheh 		goto out_commit;
29555b6a3a2bSMark Fasheh 	}
29565b6a3a2bSMark Fasheh 
29575b6a3a2bSMark Fasheh 	spin_lock(&oi->ip_lock);
29585b6a3a2bSMark Fasheh 	oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
29595b6a3a2bSMark Fasheh 	di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
29605b6a3a2bSMark Fasheh 	spin_unlock(&oi->ip_lock);
29615b6a3a2bSMark Fasheh 
29625b6a3a2bSMark Fasheh 	ocfs2_dinode_new_extent_list(dir, di);
29635b6a3a2bSMark Fasheh 
29645b6a3a2bSMark Fasheh 	i_size_write(dir, sb->s_blocksize);
2965*6861de97SJeff Layton 	dir->i_mtime = inode_set_ctime_current(dir);
29665b6a3a2bSMark Fasheh 
29675b6a3a2bSMark Fasheh 	di->i_size = cpu_to_le64(sb->s_blocksize);
2968*6861de97SJeff Layton 	di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime(dir).tv_sec);
2969*6861de97SJeff Layton 	di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime(dir).tv_nsec);
29706fdb702dSDarrick J. Wong 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
29715b6a3a2bSMark Fasheh 
29725b6a3a2bSMark Fasheh 	/*
29735b6a3a2bSMark Fasheh 	 * This should never fail as our extent list is empty and all
29745b6a3a2bSMark Fasheh 	 * related blocks have been journaled already.
29755b6a3a2bSMark Fasheh 	 */
2976cc79d8c1SJoel Becker 	ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
2977f99b9b7cSJoel Becker 				  0, NULL);
29785b6a3a2bSMark Fasheh 	if (ret) {
29795b6a3a2bSMark Fasheh 		mlog_errno(ret);
298083cab533STao Ma 		goto out_commit;
29815b6a3a2bSMark Fasheh 	}
29825b6a3a2bSMark Fasheh 
29839780eb6cSMark Fasheh 	/*
29849780eb6cSMark Fasheh 	 * Set i_blocks after the extent insert for the most up to
29859780eb6cSMark Fasheh 	 * date ip_clusters value.
29869780eb6cSMark Fasheh 	 */
29879780eb6cSMark Fasheh 	dir->i_blocks = ocfs2_inode_sector_count(dir);
29889780eb6cSMark Fasheh 
2989ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, di_bh);
29905b6a3a2bSMark Fasheh 
29919b7895efSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb)) {
29929b7895efSMark Fasheh 		ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
2993e7c17e43SMark Fasheh 						dirdata_bh, meta_ac, dx_inline,
2994e3a93c2dSMark Fasheh 						num_dx_entries, &dx_root_bh);
29959b7895efSMark Fasheh 		if (ret) {
29969b7895efSMark Fasheh 			mlog_errno(ret);
29979b7895efSMark Fasheh 			goto out_commit;
29989b7895efSMark Fasheh 		}
29999b7895efSMark Fasheh 
30004ed8a6bbSMark Fasheh 		if (dx_inline) {
30014ed8a6bbSMark Fasheh 			ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
30024ed8a6bbSMark Fasheh 						      dirdata_bh);
30034ed8a6bbSMark Fasheh 		} else {
30045e404e9eSJoel Becker 			ocfs2_init_dx_root_extent_tree(&dx_et,
30055e404e9eSJoel Becker 						       INODE_CACHE(dir),
30065e404e9eSJoel Becker 						       dx_root_bh);
3007cc79d8c1SJoel Becker 			ret = ocfs2_insert_extent(handle, &dx_et, 0,
30089b7895efSMark Fasheh 						  dx_insert_blkno, 1, 0, NULL);
30099b7895efSMark Fasheh 			if (ret)
30109b7895efSMark Fasheh 				mlog_errno(ret);
30119b7895efSMark Fasheh 		}
30124ed8a6bbSMark Fasheh 	}
30139b7895efSMark Fasheh 
30145b6a3a2bSMark Fasheh 	/*
30155b6a3a2bSMark Fasheh 	 * We asked for two clusters, but only got one in the 1st
30165b6a3a2bSMark Fasheh 	 * pass. Claim the 2nd cluster as a separate extent.
30175b6a3a2bSMark Fasheh 	 */
30185b6a3a2bSMark Fasheh 	if (alloc > len) {
30191ed9b777SJoel Becker 		ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
30205b6a3a2bSMark Fasheh 					   &len);
30215b6a3a2bSMark Fasheh 		if (ret) {
30225b6a3a2bSMark Fasheh 			mlog_errno(ret);
30235b6a3a2bSMark Fasheh 			goto out_commit;
30245b6a3a2bSMark Fasheh 		}
30255b6a3a2bSMark Fasheh 		blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
30265b6a3a2bSMark Fasheh 
3027cc79d8c1SJoel Becker 		ret = ocfs2_insert_extent(handle, &et, 1,
3028f56654c4STao Ma 					  blkno, len, 0, NULL);
30295b6a3a2bSMark Fasheh 		if (ret) {
30305b6a3a2bSMark Fasheh 			mlog_errno(ret);
303183cab533STao Ma 			goto out_commit;
30325b6a3a2bSMark Fasheh 		}
30339b7895efSMark Fasheh 		bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
30345b6a3a2bSMark Fasheh 	}
30355b6a3a2bSMark Fasheh 
30365b6a3a2bSMark Fasheh 	*first_block_bh = dirdata_bh;
30375b6a3a2bSMark Fasheh 	dirdata_bh = NULL;
30389b7895efSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb)) {
30399b7895efSMark Fasheh 		unsigned int off;
30409b7895efSMark Fasheh 
30414ed8a6bbSMark Fasheh 		if (!dx_inline) {
30429b7895efSMark Fasheh 			/*
30439b7895efSMark Fasheh 			 * We need to return the correct block within the
30449b7895efSMark Fasheh 			 * cluster which should hold our entry.
30459b7895efSMark Fasheh 			 */
30461119d3c0Spiaojun 			off = ocfs2_dx_dir_hash_idx(osb,
30479b7895efSMark Fasheh 						    &lookup->dl_hinfo);
30489b7895efSMark Fasheh 			get_bh(dx_leaves[off]);
30499b7895efSMark Fasheh 			lookup->dl_dx_leaf_bh = dx_leaves[off];
30509b7895efSMark Fasheh 		}
30514ed8a6bbSMark Fasheh 		lookup->dl_dx_root_bh = dx_root_bh;
30524ed8a6bbSMark Fasheh 		dx_root_bh = NULL;
30534ed8a6bbSMark Fasheh 	}
30545b6a3a2bSMark Fasheh 
30555b6a3a2bSMark Fasheh out_commit:
3056a90714c1SJan Kara 	if (ret < 0 && did_quota)
30575dd4056dSChristoph Hellwig 		dquot_free_space_nodirty(dir, bytes_allocated);
30589b7895efSMark Fasheh 
30595b6a3a2bSMark Fasheh 	ocfs2_commit_trans(osb, handle);
30605b6a3a2bSMark Fasheh 
30615b6a3a2bSMark Fasheh out:
3062edd45c08SJan Kara 	up_write(&oi->ip_alloc_sem);
30635b6a3a2bSMark Fasheh 	if (data_ac)
30645b6a3a2bSMark Fasheh 		ocfs2_free_alloc_context(data_ac);
30659b7895efSMark Fasheh 	if (meta_ac)
30669b7895efSMark Fasheh 		ocfs2_free_alloc_context(meta_ac);
30679b7895efSMark Fasheh 
30689b7895efSMark Fasheh 	if (dx_leaves) {
30699b7895efSMark Fasheh 		for (i = 0; i < num_dx_leaves; i++)
30709b7895efSMark Fasheh 			brelse(dx_leaves[i]);
30719b7895efSMark Fasheh 		kfree(dx_leaves);
30729b7895efSMark Fasheh 	}
30735b6a3a2bSMark Fasheh 
30745b6a3a2bSMark Fasheh 	brelse(dirdata_bh);
30759b7895efSMark Fasheh 	brelse(dx_root_bh);
30765b6a3a2bSMark Fasheh 
30775b6a3a2bSMark Fasheh 	return ret;
30785b6a3a2bSMark Fasheh }
30795b6a3a2bSMark Fasheh 
3080ccd979bdSMark Fasheh /* returns a bh of the 1st new block in the allocation. */
ocfs2_do_extend_dir(struct super_block * sb,handle_t * handle,struct inode * dir,struct buffer_head * parent_fe_bh,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac,struct buffer_head ** new_bh)3081316f4b9fSMark Fasheh static int ocfs2_do_extend_dir(struct super_block *sb,
30821fabe148SMark Fasheh 			       handle_t *handle,
3083ccd979bdSMark Fasheh 			       struct inode *dir,
3084ccd979bdSMark Fasheh 			       struct buffer_head *parent_fe_bh,
3085ccd979bdSMark Fasheh 			       struct ocfs2_alloc_context *data_ac,
3086ccd979bdSMark Fasheh 			       struct ocfs2_alloc_context *meta_ac,
3087ccd979bdSMark Fasheh 			       struct buffer_head **new_bh)
3088ccd979bdSMark Fasheh {
3089ccd979bdSMark Fasheh 	int status;
3090a90714c1SJan Kara 	int extend, did_quota = 0;
30918110b073SMark Fasheh 	u64 p_blkno, v_blkno;
3092ccd979bdSMark Fasheh 
3093ccd979bdSMark Fasheh 	spin_lock(&OCFS2_I(dir)->ip_lock);
3094ccd979bdSMark Fasheh 	extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
3095ccd979bdSMark Fasheh 	spin_unlock(&OCFS2_I(dir)->ip_lock);
3096ccd979bdSMark Fasheh 
3097ccd979bdSMark Fasheh 	if (extend) {
3098dcd0538fSMark Fasheh 		u32 offset = OCFS2_I(dir)->ip_clusters;
3099dcd0538fSMark Fasheh 
31005dd4056dSChristoph Hellwig 		status = dquot_alloc_space_nodirty(dir,
31015dd4056dSChristoph Hellwig 					ocfs2_clusters_to_bytes(sb, 1));
31025dd4056dSChristoph Hellwig 		if (status)
3103a90714c1SJan Kara 			goto bail;
3104a90714c1SJan Kara 		did_quota = 1;
3105a90714c1SJan Kara 
31060eb8d47eSTao Ma 		status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
31072ae99a60SMark Fasheh 					      1, 0, parent_fe_bh, handle,
3108ccd979bdSMark Fasheh 					      data_ac, meta_ac, NULL);
3109ccd979bdSMark Fasheh 		BUG_ON(status == -EAGAIN);
3110ccd979bdSMark Fasheh 		if (status < 0) {
3111ccd979bdSMark Fasheh 			mlog_errno(status);
3112ccd979bdSMark Fasheh 			goto bail;
3113ccd979bdSMark Fasheh 		}
3114ccd979bdSMark Fasheh 	}
3115ccd979bdSMark Fasheh 
31168110b073SMark Fasheh 	v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
31178110b073SMark Fasheh 	status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
3118ccd979bdSMark Fasheh 	if (status < 0) {
3119ccd979bdSMark Fasheh 		mlog_errno(status);
3120ccd979bdSMark Fasheh 		goto bail;
3121ccd979bdSMark Fasheh 	}
3122ccd979bdSMark Fasheh 
3123ccd979bdSMark Fasheh 	*new_bh = sb_getblk(sb, p_blkno);
3124ccd979bdSMark Fasheh 	if (!*new_bh) {
31257391a294SRui Xiang 		status = -ENOMEM;
3126ccd979bdSMark Fasheh 		mlog_errno(status);
3127ccd979bdSMark Fasheh 		goto bail;
3128ccd979bdSMark Fasheh 	}
3129ccd979bdSMark Fasheh 	status = 0;
3130ccd979bdSMark Fasheh bail:
3131a90714c1SJan Kara 	if (did_quota && status < 0)
31325dd4056dSChristoph Hellwig 		dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
3133ccd979bdSMark Fasheh 	return status;
3134ccd979bdSMark Fasheh }
3135ccd979bdSMark Fasheh 
31365b6a3a2bSMark Fasheh /*
31375b6a3a2bSMark Fasheh  * Assumes you already have a cluster lock on the directory.
31385b6a3a2bSMark Fasheh  *
31395b6a3a2bSMark Fasheh  * 'blocks_wanted' is only used if we have an inline directory which
31405b6a3a2bSMark Fasheh  * is to be turned into an extent based one. The size of the dirent to
31415b6a3a2bSMark Fasheh  * insert might be larger than the space gained by growing to just one
31425b6a3a2bSMark Fasheh  * block, so we may have to grow the inode by two blocks in that case.
3143e7c17e43SMark Fasheh  *
3144e7c17e43SMark Fasheh  * If the directory is already indexed, dx_root_bh must be provided.
31455b6a3a2bSMark Fasheh  */
ocfs2_extend_dir(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * parent_fe_bh,unsigned int blocks_wanted,struct ocfs2_dir_lookup_result * lookup,struct buffer_head ** new_de_bh)3146ccd979bdSMark Fasheh static int ocfs2_extend_dir(struct ocfs2_super *osb,
3147ccd979bdSMark Fasheh 			    struct inode *dir,
3148ccd979bdSMark Fasheh 			    struct buffer_head *parent_fe_bh,
31495b6a3a2bSMark Fasheh 			    unsigned int blocks_wanted,
31509b7895efSMark Fasheh 			    struct ocfs2_dir_lookup_result *lookup,
3151ccd979bdSMark Fasheh 			    struct buffer_head **new_de_bh)
3152ccd979bdSMark Fasheh {
3153ccd979bdSMark Fasheh 	int status = 0;
3154ee19a779SJoel Becker 	int credits, num_free_extents, drop_alloc_sem = 0;
3155ccd979bdSMark Fasheh 	loff_t dir_i_size;
3156ccd979bdSMark Fasheh 	struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
3157811f933dSTao Ma 	struct ocfs2_extent_list *el = &fe->id2.i_list;
3158ccd979bdSMark Fasheh 	struct ocfs2_alloc_context *data_ac = NULL;
3159ccd979bdSMark Fasheh 	struct ocfs2_alloc_context *meta_ac = NULL;
31601fabe148SMark Fasheh 	handle_t *handle = NULL;
3161ccd979bdSMark Fasheh 	struct buffer_head *new_bh = NULL;
3162ccd979bdSMark Fasheh 	struct ocfs2_dir_entry * de;
3163ccd979bdSMark Fasheh 	struct super_block *sb = osb->sb;
3164f99b9b7cSJoel Becker 	struct ocfs2_extent_tree et;
3165e7c17e43SMark Fasheh 	struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
3166ccd979bdSMark Fasheh 
31675b6a3a2bSMark Fasheh 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
3168e7c17e43SMark Fasheh 		/*
3169e7c17e43SMark Fasheh 		 * This would be a code error as an inline directory should
3170e7c17e43SMark Fasheh 		 * never have an index root.
3171e7c17e43SMark Fasheh 		 */
3172e7c17e43SMark Fasheh 		BUG_ON(dx_root_bh);
3173e7c17e43SMark Fasheh 
31745b6a3a2bSMark Fasheh 		status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
31759b7895efSMark Fasheh 						 blocks_wanted, lookup,
31769b7895efSMark Fasheh 						 &new_bh);
31775b6a3a2bSMark Fasheh 		if (status) {
31785b6a3a2bSMark Fasheh 			mlog_errno(status);
31795b6a3a2bSMark Fasheh 			goto bail;
31805b6a3a2bSMark Fasheh 		}
31815b6a3a2bSMark Fasheh 
3182e7c17e43SMark Fasheh 		/* Expansion from inline to an indexed directory will
3183e7c17e43SMark Fasheh 		 * have given us this. */
3184e7c17e43SMark Fasheh 		dx_root_bh = lookup->dl_dx_root_bh;
3185e7c17e43SMark Fasheh 
31865b6a3a2bSMark Fasheh 		if (blocks_wanted == 1) {
31875b6a3a2bSMark Fasheh 			/*
31885b6a3a2bSMark Fasheh 			 * If the new dirent will fit inside the space
31895b6a3a2bSMark Fasheh 			 * created by pushing out to one block, then
31905b6a3a2bSMark Fasheh 			 * we can complete the operation
31915b6a3a2bSMark Fasheh 			 * here. Otherwise we have to expand i_size
31925b6a3a2bSMark Fasheh 			 * and format the 2nd block below.
31935b6a3a2bSMark Fasheh 			 */
31945b6a3a2bSMark Fasheh 			BUG_ON(new_bh == NULL);
31955b6a3a2bSMark Fasheh 			goto bail_bh;
31965b6a3a2bSMark Fasheh 		}
31975b6a3a2bSMark Fasheh 
31985b6a3a2bSMark Fasheh 		/*
31995b6a3a2bSMark Fasheh 		 * Get rid of 'new_bh' - we want to format the 2nd
32005b6a3a2bSMark Fasheh 		 * data block and return that instead.
32015b6a3a2bSMark Fasheh 		 */
32025b6a3a2bSMark Fasheh 		brelse(new_bh);
32035b6a3a2bSMark Fasheh 		new_bh = NULL;
32045b6a3a2bSMark Fasheh 
3205edd45c08SJan Kara 		down_write(&OCFS2_I(dir)->ip_alloc_sem);
3206edd45c08SJan Kara 		drop_alloc_sem = 1;
32075b6a3a2bSMark Fasheh 		dir_i_size = i_size_read(dir);
32085b6a3a2bSMark Fasheh 		credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
32095b6a3a2bSMark Fasheh 		goto do_extend;
32105b6a3a2bSMark Fasheh 	}
32115b6a3a2bSMark Fasheh 
3212edd45c08SJan Kara 	down_write(&OCFS2_I(dir)->ip_alloc_sem);
3213edd45c08SJan Kara 	drop_alloc_sem = 1;
3214ccd979bdSMark Fasheh 	dir_i_size = i_size_read(dir);
3215f1088d47STao Ma 	trace_ocfs2_extend_dir((unsigned long long)OCFS2_I(dir)->ip_blkno,
3216f1088d47STao Ma 			       dir_i_size);
3217ccd979bdSMark Fasheh 
3218ccd979bdSMark Fasheh 	/* dir->i_size is always block aligned. */
3219ccd979bdSMark Fasheh 	spin_lock(&OCFS2_I(dir)->ip_lock);
3220ccd979bdSMark Fasheh 	if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
3221ccd979bdSMark Fasheh 		spin_unlock(&OCFS2_I(dir)->ip_lock);
32225e404e9eSJoel Becker 		ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
32235e404e9eSJoel Becker 					      parent_fe_bh);
3224964f14a0SJun Piao 		num_free_extents = ocfs2_num_free_extents(&et);
3225ccd979bdSMark Fasheh 		if (num_free_extents < 0) {
3226ccd979bdSMark Fasheh 			status = num_free_extents;
3227ccd979bdSMark Fasheh 			mlog_errno(status);
3228ccd979bdSMark Fasheh 			goto bail;
3229ccd979bdSMark Fasheh 		}
3230ccd979bdSMark Fasheh 
3231ccd979bdSMark Fasheh 		if (!num_free_extents) {
3232811f933dSTao Ma 			status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
3233ccd979bdSMark Fasheh 			if (status < 0) {
3234ccd979bdSMark Fasheh 				if (status != -ENOSPC)
3235ccd979bdSMark Fasheh 					mlog_errno(status);
3236ccd979bdSMark Fasheh 				goto bail;
3237ccd979bdSMark Fasheh 			}
3238ccd979bdSMark Fasheh 		}
3239ccd979bdSMark Fasheh 
3240da5cbf2fSMark Fasheh 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
3241ccd979bdSMark Fasheh 		if (status < 0) {
3242ccd979bdSMark Fasheh 			if (status != -ENOSPC)
3243ccd979bdSMark Fasheh 				mlog_errno(status);
3244ccd979bdSMark Fasheh 			goto bail;
3245ccd979bdSMark Fasheh 		}
3246ccd979bdSMark Fasheh 
324783f92318SMark Fasheh 		if (ocfs2_dir_resv_allowed(osb))
3248e3b4a97dSMark Fasheh 			data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
3249e3b4a97dSMark Fasheh 
325006f9da6eSGoldwyn Rodrigues 		credits = ocfs2_calc_extend_credits(sb, el);
3251ccd979bdSMark Fasheh 	} else {
3252ccd979bdSMark Fasheh 		spin_unlock(&OCFS2_I(dir)->ip_lock);
3253ccd979bdSMark Fasheh 		credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3254ccd979bdSMark Fasheh 	}
3255ccd979bdSMark Fasheh 
32565b6a3a2bSMark Fasheh do_extend:
3257e7c17e43SMark Fasheh 	if (ocfs2_dir_indexed(dir))
3258e7c17e43SMark Fasheh 		credits++; /* For attaching the new dirent block to the
3259e7c17e43SMark Fasheh 			    * dx_root */
3260e7c17e43SMark Fasheh 
326165eff9ccSMark Fasheh 	handle = ocfs2_start_trans(osb, credits);
3262ccd979bdSMark Fasheh 	if (IS_ERR(handle)) {
3263ccd979bdSMark Fasheh 		status = PTR_ERR(handle);
3264ccd979bdSMark Fasheh 		handle = NULL;
3265ccd979bdSMark Fasheh 		mlog_errno(status);
3266ccd979bdSMark Fasheh 		goto bail;
3267ccd979bdSMark Fasheh 	}
3268ccd979bdSMark Fasheh 
3269ccd979bdSMark Fasheh 	status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
3270ccd979bdSMark Fasheh 				     data_ac, meta_ac, &new_bh);
3271ccd979bdSMark Fasheh 	if (status < 0) {
3272ccd979bdSMark Fasheh 		mlog_errno(status);
3273ccd979bdSMark Fasheh 		goto bail;
3274ccd979bdSMark Fasheh 	}
3275ccd979bdSMark Fasheh 
32768cb471e8SJoel Becker 	ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
3277ccd979bdSMark Fasheh 
32780cf2f763SJoel Becker 	status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
3279ccd979bdSMark Fasheh 					 OCFS2_JOURNAL_ACCESS_CREATE);
3280ccd979bdSMark Fasheh 	if (status < 0) {
3281ccd979bdSMark Fasheh 		mlog_errno(status);
3282ccd979bdSMark Fasheh 		goto bail;
3283ccd979bdSMark Fasheh 	}
3284ccd979bdSMark Fasheh 	memset(new_bh->b_data, 0, sb->s_blocksize);
328587d35a74SMark Fasheh 
3286ccd979bdSMark Fasheh 	de = (struct ocfs2_dir_entry *) new_bh->b_data;
3287ccd979bdSMark Fasheh 	de->inode = 0;
3288e7c17e43SMark Fasheh 	if (ocfs2_supports_dir_trailer(dir)) {
328987d35a74SMark Fasheh 		de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
3290e7c17e43SMark Fasheh 
3291e7c17e43SMark Fasheh 		ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len));
3292e7c17e43SMark Fasheh 
3293e7c17e43SMark Fasheh 		if (ocfs2_dir_indexed(dir)) {
3294e7c17e43SMark Fasheh 			status = ocfs2_dx_dir_link_trailer(dir, handle,
3295e7c17e43SMark Fasheh 							   dx_root_bh, new_bh);
3296e7c17e43SMark Fasheh 			if (status) {
3297e7c17e43SMark Fasheh 				mlog_errno(status);
3298e7c17e43SMark Fasheh 				goto bail;
3299e7c17e43SMark Fasheh 			}
3300e7c17e43SMark Fasheh 		}
330187d35a74SMark Fasheh 	} else {
3302ccd979bdSMark Fasheh 		de->rec_len = cpu_to_le16(sb->s_blocksize);
330387d35a74SMark Fasheh 	}
33042931cdcbSDarrick J. Wong 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
3305ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, new_bh);
3306ccd979bdSMark Fasheh 
3307ccd979bdSMark Fasheh 	dir_i_size += dir->i_sb->s_blocksize;
3308ccd979bdSMark Fasheh 	i_size_write(dir, dir_i_size);
33098110b073SMark Fasheh 	dir->i_blocks = ocfs2_inode_sector_count(dir);
3310ccd979bdSMark Fasheh 	status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
3311ccd979bdSMark Fasheh 	if (status < 0) {
3312ccd979bdSMark Fasheh 		mlog_errno(status);
3313ccd979bdSMark Fasheh 		goto bail;
3314ccd979bdSMark Fasheh 	}
3315ccd979bdSMark Fasheh 
33165b6a3a2bSMark Fasheh bail_bh:
3317ccd979bdSMark Fasheh 	*new_de_bh = new_bh;
3318ccd979bdSMark Fasheh 	get_bh(*new_de_bh);
3319ccd979bdSMark Fasheh bail:
3320ccd979bdSMark Fasheh 	if (handle)
332102dc1af4SMark Fasheh 		ocfs2_commit_trans(osb, handle);
3322edd45c08SJan Kara 	if (drop_alloc_sem)
3323edd45c08SJan Kara 		up_write(&OCFS2_I(dir)->ip_alloc_sem);
3324ccd979bdSMark Fasheh 
3325ccd979bdSMark Fasheh 	if (data_ac)
3326ccd979bdSMark Fasheh 		ocfs2_free_alloc_context(data_ac);
3327ccd979bdSMark Fasheh 	if (meta_ac)
3328ccd979bdSMark Fasheh 		ocfs2_free_alloc_context(meta_ac);
3329ccd979bdSMark Fasheh 
3330ccd979bdSMark Fasheh 	brelse(new_bh);
3331ccd979bdSMark Fasheh 
3332ccd979bdSMark Fasheh 	return status;
3333ccd979bdSMark Fasheh }
3334ccd979bdSMark Fasheh 
ocfs2_find_dir_space_id(struct inode * dir,struct buffer_head * di_bh,const char * name,int namelen,struct buffer_head ** ret_de_bh,unsigned int * blocks_wanted)33355b6a3a2bSMark Fasheh static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
33365b6a3a2bSMark Fasheh 				   const char *name, int namelen,
33375b6a3a2bSMark Fasheh 				   struct buffer_head **ret_de_bh,
33385b6a3a2bSMark Fasheh 				   unsigned int *blocks_wanted)
33395b6a3a2bSMark Fasheh {
33405b6a3a2bSMark Fasheh 	int ret;
334187d35a74SMark Fasheh 	struct super_block *sb = dir->i_sb;
33425b6a3a2bSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
33435b6a3a2bSMark Fasheh 	struct ocfs2_dir_entry *de, *last_de = NULL;
33445b6a3a2bSMark Fasheh 	char *de_buf, *limit;
33455b6a3a2bSMark Fasheh 	unsigned long offset = 0;
33469a25d051SColin Ian King 	unsigned int rec_len, new_rec_len, free_space;
334787d35a74SMark Fasheh 
334887d35a74SMark Fasheh 	/*
334987d35a74SMark Fasheh 	 * This calculates how many free bytes we'd have in block zero, should
335087d35a74SMark Fasheh 	 * this function force expansion to an extent tree.
335187d35a74SMark Fasheh 	 */
3352e7c17e43SMark Fasheh 	if (ocfs2_new_dir_wants_trailer(dir))
335387d35a74SMark Fasheh 		free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
335487d35a74SMark Fasheh 	else
335587d35a74SMark Fasheh 		free_space = dir->i_sb->s_blocksize - i_size_read(dir);
33565b6a3a2bSMark Fasheh 
33575b6a3a2bSMark Fasheh 	de_buf = di->id2.i_data.id_data;
33585b6a3a2bSMark Fasheh 	limit = de_buf + i_size_read(dir);
33595b6a3a2bSMark Fasheh 	rec_len = OCFS2_DIR_REC_LEN(namelen);
33605b6a3a2bSMark Fasheh 
33615b6a3a2bSMark Fasheh 	while (de_buf < limit) {
33625b6a3a2bSMark Fasheh 		de = (struct ocfs2_dir_entry *)de_buf;
33635b6a3a2bSMark Fasheh 
33645b6a3a2bSMark Fasheh 		if (!ocfs2_check_dir_entry(dir, de, di_bh, offset)) {
33655b6a3a2bSMark Fasheh 			ret = -ENOENT;
33665b6a3a2bSMark Fasheh 			goto out;
33675b6a3a2bSMark Fasheh 		}
33685b6a3a2bSMark Fasheh 		if (ocfs2_match(namelen, name, de)) {
33695b6a3a2bSMark Fasheh 			ret = -EEXIST;
33705b6a3a2bSMark Fasheh 			goto out;
33715b6a3a2bSMark Fasheh 		}
337287d35a74SMark Fasheh 		/*
337387d35a74SMark Fasheh 		 * No need to check for a trailing dirent record here as
337487d35a74SMark Fasheh 		 * they're not used for inline dirs.
337587d35a74SMark Fasheh 		 */
337687d35a74SMark Fasheh 
33775b6a3a2bSMark Fasheh 		if (ocfs2_dirent_would_fit(de, rec_len)) {
33785b6a3a2bSMark Fasheh 			/* Ok, we found a spot. Return this bh and let
33795b6a3a2bSMark Fasheh 			 * the caller actually fill it in. */
33805b6a3a2bSMark Fasheh 			*ret_de_bh = di_bh;
33815b6a3a2bSMark Fasheh 			get_bh(*ret_de_bh);
33825b6a3a2bSMark Fasheh 			ret = 0;
33835b6a3a2bSMark Fasheh 			goto out;
33845b6a3a2bSMark Fasheh 		}
33855b6a3a2bSMark Fasheh 
33865b6a3a2bSMark Fasheh 		last_de = de;
33875b6a3a2bSMark Fasheh 		de_buf += le16_to_cpu(de->rec_len);
33885b6a3a2bSMark Fasheh 		offset += le16_to_cpu(de->rec_len);
33895b6a3a2bSMark Fasheh 	}
33905b6a3a2bSMark Fasheh 
3391ccd979bdSMark Fasheh 	/*
33925b6a3a2bSMark Fasheh 	 * We're going to require expansion of the directory - figure
33935b6a3a2bSMark Fasheh 	 * out how many blocks we'll need so that a place for the
33945b6a3a2bSMark Fasheh 	 * dirent can be found.
3395ccd979bdSMark Fasheh 	 */
33965b6a3a2bSMark Fasheh 	*blocks_wanted = 1;
339787d35a74SMark Fasheh 	new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
33985b6a3a2bSMark Fasheh 	if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
33995b6a3a2bSMark Fasheh 		*blocks_wanted = 2;
34005b6a3a2bSMark Fasheh 
34015b6a3a2bSMark Fasheh 	ret = -ENOSPC;
34025b6a3a2bSMark Fasheh out:
34035b6a3a2bSMark Fasheh 	return ret;
34045b6a3a2bSMark Fasheh }
34055b6a3a2bSMark Fasheh 
ocfs2_find_dir_space_el(struct inode * dir,const char * name,int namelen,struct buffer_head ** ret_de_bh)34065b6a3a2bSMark Fasheh static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
34075b6a3a2bSMark Fasheh 				   int namelen, struct buffer_head **ret_de_bh)
3408ccd979bdSMark Fasheh {
3409ccd979bdSMark Fasheh 	unsigned long offset;
3410ccd979bdSMark Fasheh 	struct buffer_head *bh = NULL;
3411ccd979bdSMark Fasheh 	unsigned short rec_len;
3412ccd979bdSMark Fasheh 	struct ocfs2_dir_entry *de;
34135b6a3a2bSMark Fasheh 	struct super_block *sb = dir->i_sb;
3414ccd979bdSMark Fasheh 	int status;
341587d35a74SMark Fasheh 	int blocksize = dir->i_sb->s_blocksize;
3416ccd979bdSMark Fasheh 
3417a22305ccSJoel Becker 	status = ocfs2_read_dir_block(dir, 0, &bh, 0);
34189b572691SDaeseok Youn 	if (status)
3419ccd979bdSMark Fasheh 		goto bail;
3420ccd979bdSMark Fasheh 
3421ccd979bdSMark Fasheh 	rec_len = OCFS2_DIR_REC_LEN(namelen);
3422ccd979bdSMark Fasheh 	offset = 0;
3423ccd979bdSMark Fasheh 	de = (struct ocfs2_dir_entry *) bh->b_data;
3424ccd979bdSMark Fasheh 	while (1) {
3425ccd979bdSMark Fasheh 		if ((char *)de >= sb->s_blocksize + bh->b_data) {
3426ccd979bdSMark Fasheh 			brelse(bh);
3427ccd979bdSMark Fasheh 			bh = NULL;
3428ccd979bdSMark Fasheh 
3429ccd979bdSMark Fasheh 			if (i_size_read(dir) <= offset) {
34305b6a3a2bSMark Fasheh 				/*
34315b6a3a2bSMark Fasheh 				 * Caller will have to expand this
34325b6a3a2bSMark Fasheh 				 * directory.
34335b6a3a2bSMark Fasheh 				 */
34345b6a3a2bSMark Fasheh 				status = -ENOSPC;
3435ccd979bdSMark Fasheh 				goto bail;
3436ccd979bdSMark Fasheh 			}
3437a22305ccSJoel Becker 			status = ocfs2_read_dir_block(dir,
3438ccd979bdSMark Fasheh 					     offset >> sb->s_blocksize_bits,
3439a22305ccSJoel Becker 					     &bh, 0);
34409b572691SDaeseok Youn 			if (status)
3441ccd979bdSMark Fasheh 				goto bail;
34429b572691SDaeseok Youn 
3443ccd979bdSMark Fasheh 			/* move to next block */
3444ccd979bdSMark Fasheh 			de = (struct ocfs2_dir_entry *) bh->b_data;
3445ccd979bdSMark Fasheh 		}
3446ccd979bdSMark Fasheh 		if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
3447ccd979bdSMark Fasheh 			status = -ENOENT;
3448ccd979bdSMark Fasheh 			goto bail;
3449ccd979bdSMark Fasheh 		}
3450ccd979bdSMark Fasheh 		if (ocfs2_match(namelen, name, de)) {
3451ccd979bdSMark Fasheh 			status = -EEXIST;
3452ccd979bdSMark Fasheh 			goto bail;
3453ccd979bdSMark Fasheh 		}
345487d35a74SMark Fasheh 
345587d35a74SMark Fasheh 		if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
345687d35a74SMark Fasheh 					   blocksize))
345787d35a74SMark Fasheh 			goto next;
345887d35a74SMark Fasheh 
34598553cf4fSMark Fasheh 		if (ocfs2_dirent_would_fit(de, rec_len)) {
3460ccd979bdSMark Fasheh 			/* Ok, we found a spot. Return this bh and let
3461ccd979bdSMark Fasheh 			 * the caller actually fill it in. */
3462ccd979bdSMark Fasheh 			*ret_de_bh = bh;
3463ccd979bdSMark Fasheh 			get_bh(*ret_de_bh);
3464ccd979bdSMark Fasheh 			status = 0;
3465ccd979bdSMark Fasheh 			goto bail;
3466ccd979bdSMark Fasheh 		}
346787d35a74SMark Fasheh next:
3468ccd979bdSMark Fasheh 		offset += le16_to_cpu(de->rec_len);
3469ccd979bdSMark Fasheh 		de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
3470ccd979bdSMark Fasheh 	}
3471ccd979bdSMark Fasheh 
3472ccd979bdSMark Fasheh bail:
3473ccd979bdSMark Fasheh 	brelse(bh);
3474c1e8d35eSTao Ma 	if (status)
3475c1e8d35eSTao Ma 		mlog_errno(status);
3476ccd979bdSMark Fasheh 
3477ccd979bdSMark Fasheh 	return status;
3478ccd979bdSMark Fasheh }
34795b6a3a2bSMark Fasheh 
dx_leaf_sort_cmp(const void * a,const void * b)34809b7895efSMark Fasheh static int dx_leaf_sort_cmp(const void *a, const void *b)
34819b7895efSMark Fasheh {
34829b7895efSMark Fasheh 	const struct ocfs2_dx_entry *entry1 = a;
34839b7895efSMark Fasheh 	const struct ocfs2_dx_entry *entry2 = b;
34849b7895efSMark Fasheh 	u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash);
34859b7895efSMark Fasheh 	u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash);
34869b7895efSMark Fasheh 	u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash);
34879b7895efSMark Fasheh 	u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash);
34889b7895efSMark Fasheh 
34899b7895efSMark Fasheh 	if (major_hash1 > major_hash2)
34909b7895efSMark Fasheh 		return 1;
34919b7895efSMark Fasheh 	if (major_hash1 < major_hash2)
34929b7895efSMark Fasheh 		return -1;
34939b7895efSMark Fasheh 
34949b7895efSMark Fasheh 	/*
34959b7895efSMark Fasheh 	 * It is not strictly necessary to sort by minor
34969b7895efSMark Fasheh 	 */
34979b7895efSMark Fasheh 	if (minor_hash1 > minor_hash2)
34989b7895efSMark Fasheh 		return 1;
34999b7895efSMark Fasheh 	if (minor_hash1 < minor_hash2)
35009b7895efSMark Fasheh 		return -1;
35019b7895efSMark Fasheh 	return 0;
35029b7895efSMark Fasheh }
35039b7895efSMark Fasheh 
dx_leaf_sort_swap(void * a,void * b,int size)35049b7895efSMark Fasheh static void dx_leaf_sort_swap(void *a, void *b, int size)
35059b7895efSMark Fasheh {
35069b7895efSMark Fasheh 	struct ocfs2_dx_entry *entry1 = a;
35079b7895efSMark Fasheh 	struct ocfs2_dx_entry *entry2 = b;
35089b7895efSMark Fasheh 
35099b7895efSMark Fasheh 	BUG_ON(size != sizeof(*entry1));
35109b7895efSMark Fasheh 
35112a28f98cSFabian Frederick 	swap(*entry1, *entry2);
35129b7895efSMark Fasheh }
35139b7895efSMark Fasheh 
ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf * dx_leaf)35149b7895efSMark Fasheh static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
35159b7895efSMark Fasheh {
35169b7895efSMark Fasheh 	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
35179b7895efSMark Fasheh 	int i, num = le16_to_cpu(dl_list->de_num_used);
35189b7895efSMark Fasheh 
35199b7895efSMark Fasheh 	for (i = 0; i < (num - 1); i++) {
35209b7895efSMark Fasheh 		if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) !=
35219b7895efSMark Fasheh 		    le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash))
35229b7895efSMark Fasheh 			return 0;
35239b7895efSMark Fasheh 	}
35249b7895efSMark Fasheh 
35259b7895efSMark Fasheh 	return 1;
35269b7895efSMark Fasheh }
35279b7895efSMark Fasheh 
35289b7895efSMark Fasheh /*
35299b7895efSMark Fasheh  * Find the optimal value to split this leaf on. This expects the leaf
35309b7895efSMark Fasheh  * entries to be in sorted order.
35319b7895efSMark Fasheh  *
35329b7895efSMark Fasheh  * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is
35339b7895efSMark Fasheh  * the hash we want to insert.
35349b7895efSMark Fasheh  *
35359b7895efSMark Fasheh  * This function is only concerned with the major hash - that which
35369b7895efSMark Fasheh  * determines which cluster an item belongs to.
35379b7895efSMark Fasheh  */
ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf * dx_leaf,u32 leaf_cpos,u32 insert_hash,u32 * split_hash)35389b7895efSMark Fasheh static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf,
35399b7895efSMark Fasheh 					u32 leaf_cpos, u32 insert_hash,
35409b7895efSMark Fasheh 					u32 *split_hash)
35419b7895efSMark Fasheh {
35429b7895efSMark Fasheh 	struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
35439b7895efSMark Fasheh 	int i, num_used = le16_to_cpu(dl_list->de_num_used);
35449b7895efSMark Fasheh 	int allsame;
35459b7895efSMark Fasheh 
35469b7895efSMark Fasheh 	/*
35479b7895efSMark Fasheh 	 * There's a couple rare, but nasty corner cases we have to
35489b7895efSMark Fasheh 	 * check for here. All of them involve a leaf where all value
35499b7895efSMark Fasheh 	 * have the same hash, which is what we look for first.
35509b7895efSMark Fasheh 	 *
35519b7895efSMark Fasheh 	 * Most of the time, all of the above is false, and we simply
35529b7895efSMark Fasheh 	 * pick the median value for a split.
35539b7895efSMark Fasheh 	 */
35549b7895efSMark Fasheh 	allsame = ocfs2_dx_leaf_same_major(dx_leaf);
35559b7895efSMark Fasheh 	if (allsame) {
35569b7895efSMark Fasheh 		u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash);
35579b7895efSMark Fasheh 
35589b7895efSMark Fasheh 		if (val == insert_hash) {
35599b7895efSMark Fasheh 			/*
35609b7895efSMark Fasheh 			 * No matter where we would choose to split,
35619b7895efSMark Fasheh 			 * the new entry would want to occupy the same
35629b7895efSMark Fasheh 			 * block as these. Since there's no space left
35639b7895efSMark Fasheh 			 * in their existing block, we know there
35649b7895efSMark Fasheh 			 * won't be space after the split.
35659b7895efSMark Fasheh 			 */
35669b7895efSMark Fasheh 			return -ENOSPC;
35679b7895efSMark Fasheh 		}
35689b7895efSMark Fasheh 
35699b7895efSMark Fasheh 		if (val == leaf_cpos) {
35709b7895efSMark Fasheh 			/*
35719b7895efSMark Fasheh 			 * Because val is the same as leaf_cpos (which
35729b7895efSMark Fasheh 			 * is the smallest value this leaf can have),
35739b7895efSMark Fasheh 			 * yet is not equal to insert_hash, then we
35749b7895efSMark Fasheh 			 * know that insert_hash *must* be larger than
35759b7895efSMark Fasheh 			 * val (and leaf_cpos). At least cpos+1 in value.
35769b7895efSMark Fasheh 			 *
35779b7895efSMark Fasheh 			 * We also know then, that there cannot be an
35789b7895efSMark Fasheh 			 * adjacent extent (otherwise we'd be looking
35799b7895efSMark Fasheh 			 * at it). Choosing this value gives us a
35809b7895efSMark Fasheh 			 * chance to get some contiguousness.
35819b7895efSMark Fasheh 			 */
35829b7895efSMark Fasheh 			*split_hash = leaf_cpos + 1;
35839b7895efSMark Fasheh 			return 0;
35849b7895efSMark Fasheh 		}
35859b7895efSMark Fasheh 
35869b7895efSMark Fasheh 		if (val > insert_hash) {
35879b7895efSMark Fasheh 			/*
35889b7895efSMark Fasheh 			 * val can not be the same as insert hash, and
35899b7895efSMark Fasheh 			 * also must be larger than leaf_cpos. Also,
35909b7895efSMark Fasheh 			 * we know that there can't be a leaf between
35919b7895efSMark Fasheh 			 * cpos and val, otherwise the entries with
35929b7895efSMark Fasheh 			 * hash 'val' would be there.
35939b7895efSMark Fasheh 			 */
35949b7895efSMark Fasheh 			*split_hash = val;
35959b7895efSMark Fasheh 			return 0;
35969b7895efSMark Fasheh 		}
35979b7895efSMark Fasheh 
35989b7895efSMark Fasheh 		*split_hash = insert_hash;
35999b7895efSMark Fasheh 		return 0;
36009b7895efSMark Fasheh 	}
36019b7895efSMark Fasheh 
36029b7895efSMark Fasheh 	/*
36039b7895efSMark Fasheh 	 * Since the records are sorted and the checks above
36049b7895efSMark Fasheh 	 * guaranteed that not all records in this block are the same,
36059b7895efSMark Fasheh 	 * we simple travel forward, from the median, and pick the 1st
36069b7895efSMark Fasheh 	 * record whose value is larger than leaf_cpos.
36079b7895efSMark Fasheh 	 */
36089b7895efSMark Fasheh 	for (i = (num_used / 2); i < num_used; i++)
36099b7895efSMark Fasheh 		if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) >
36109b7895efSMark Fasheh 		    leaf_cpos)
36119b7895efSMark Fasheh 			break;
36129b7895efSMark Fasheh 
36139b7895efSMark Fasheh 	BUG_ON(i == num_used); /* Should be impossible */
36149b7895efSMark Fasheh 	*split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash);
36159b7895efSMark Fasheh 	return 0;
36169b7895efSMark Fasheh }
36179b7895efSMark Fasheh 
36189b7895efSMark Fasheh /*
36199b7895efSMark Fasheh  * Transfer all entries in orig_dx_leaves whose major hash is equal to or
36209b7895efSMark Fasheh  * larger than split_hash into new_dx_leaves. We use a temporary
36219b7895efSMark Fasheh  * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks.
36229b7895efSMark Fasheh  *
36239b7895efSMark Fasheh  * Since the block offset inside a leaf (cluster) is a constant mask
36249b7895efSMark Fasheh  * of minor_hash, we can optimize - an item at block offset X within
36259b7895efSMark Fasheh  * the original cluster, will be at offset X within the new cluster.
36269b7895efSMark Fasheh  */
ocfs2_dx_dir_transfer_leaf(struct inode * dir,u32 split_hash,handle_t * handle,struct ocfs2_dx_leaf * tmp_dx_leaf,struct buffer_head ** orig_dx_leaves,struct buffer_head ** new_dx_leaves,int num_dx_leaves)36279b7895efSMark Fasheh static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
36289b7895efSMark Fasheh 				       handle_t *handle,
36299b7895efSMark Fasheh 				       struct ocfs2_dx_leaf *tmp_dx_leaf,
36309b7895efSMark Fasheh 				       struct buffer_head **orig_dx_leaves,
36319b7895efSMark Fasheh 				       struct buffer_head **new_dx_leaves,
36329b7895efSMark Fasheh 				       int num_dx_leaves)
36339b7895efSMark Fasheh {
36349b7895efSMark Fasheh 	int i, j, num_used;
36359b7895efSMark Fasheh 	u32 major_hash;
36369b7895efSMark Fasheh 	struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
363777461ba1Szhengbin 	struct ocfs2_dx_entry_list *orig_list, *tmp_list;
36389b7895efSMark Fasheh 	struct ocfs2_dx_entry *dx_entry;
36399b7895efSMark Fasheh 
36409b7895efSMark Fasheh 	tmp_list = &tmp_dx_leaf->dl_list;
36419b7895efSMark Fasheh 
36429b7895efSMark Fasheh 	for (i = 0; i < num_dx_leaves; i++) {
36439b7895efSMark Fasheh 		orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
36449b7895efSMark Fasheh 		orig_list = &orig_dx_leaf->dl_list;
36459b7895efSMark Fasheh 		new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
36469b7895efSMark Fasheh 
36479b7895efSMark Fasheh 		num_used = le16_to_cpu(orig_list->de_num_used);
36489b7895efSMark Fasheh 
36499b7895efSMark Fasheh 		memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize);
36509b7895efSMark Fasheh 		tmp_list->de_num_used = cpu_to_le16(0);
36519b7895efSMark Fasheh 		memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used);
36529b7895efSMark Fasheh 
36539b7895efSMark Fasheh 		for (j = 0; j < num_used; j++) {
36549b7895efSMark Fasheh 			dx_entry = &orig_list->de_entries[j];
36559b7895efSMark Fasheh 			major_hash = le32_to_cpu(dx_entry->dx_major_hash);
36569b7895efSMark Fasheh 			if (major_hash >= split_hash)
36579b7895efSMark Fasheh 				ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf,
36589b7895efSMark Fasheh 							      dx_entry);
36599b7895efSMark Fasheh 			else
36609b7895efSMark Fasheh 				ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf,
36619b7895efSMark Fasheh 							      dx_entry);
36629b7895efSMark Fasheh 		}
36639b7895efSMark Fasheh 		memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize);
36649b7895efSMark Fasheh 
36659b7895efSMark Fasheh 		ocfs2_journal_dirty(handle, orig_dx_leaves[i]);
36669b7895efSMark Fasheh 		ocfs2_journal_dirty(handle, new_dx_leaves[i]);
36679b7895efSMark Fasheh 	}
36689b7895efSMark Fasheh }
36699b7895efSMark Fasheh 
ocfs2_dx_dir_rebalance_credits(struct ocfs2_super * osb,struct ocfs2_dx_root_block * dx_root)36709b7895efSMark Fasheh static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
36719b7895efSMark Fasheh 					  struct ocfs2_dx_root_block *dx_root)
36729b7895efSMark Fasheh {
3673d006c71fSJunxiao Bi 	int credits = ocfs2_clusters_to_blocks(osb->sb, 3);
36749b7895efSMark Fasheh 
367506f9da6eSGoldwyn Rodrigues 	credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list);
36769b7895efSMark Fasheh 	credits += ocfs2_quota_trans_credits(osb->sb);
36779b7895efSMark Fasheh 	return credits;
36789b7895efSMark Fasheh }
36799b7895efSMark Fasheh 
36809b7895efSMark Fasheh /*
36819b7895efSMark Fasheh  * Find the median value in dx_leaf_bh and allocate a new leaf to move
36829b7895efSMark Fasheh  * half our entries into.
36839b7895efSMark Fasheh  */
ocfs2_dx_dir_rebalance(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * dx_root_bh,struct buffer_head * dx_leaf_bh,struct ocfs2_dx_hinfo * hinfo,u32 leaf_cpos,u64 leaf_blkno)36849b7895efSMark Fasheh static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
36859b7895efSMark Fasheh 				  struct buffer_head *dx_root_bh,
36869b7895efSMark Fasheh 				  struct buffer_head *dx_leaf_bh,
36879b7895efSMark Fasheh 				  struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos,
36889b7895efSMark Fasheh 				  u64 leaf_blkno)
36899b7895efSMark Fasheh {
36909b7895efSMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
36919b7895efSMark Fasheh 	int credits, ret, i, num_used, did_quota = 0;
36929b7895efSMark Fasheh 	u32 cpos, split_hash, insert_hash = hinfo->major_hash;
36939b7895efSMark Fasheh 	u64 orig_leaves_start;
36949b7895efSMark Fasheh 	int num_dx_leaves;
36959b7895efSMark Fasheh 	struct buffer_head **orig_dx_leaves = NULL;
36969b7895efSMark Fasheh 	struct buffer_head **new_dx_leaves = NULL;
36979b7895efSMark Fasheh 	struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL;
36989b7895efSMark Fasheh 	struct ocfs2_extent_tree et;
36999b7895efSMark Fasheh 	handle_t *handle = NULL;
37009b7895efSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
37019b7895efSMark Fasheh 	struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
37029b7895efSMark Fasheh 
3703f1088d47STao Ma 	trace_ocfs2_dx_dir_rebalance((unsigned long long)OCFS2_I(dir)->ip_blkno,
3704f1088d47STao Ma 				     (unsigned long long)leaf_blkno,
3705f1088d47STao Ma 				     insert_hash);
37069b7895efSMark Fasheh 
37075e404e9eSJoel Becker 	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
37089b7895efSMark Fasheh 
37099b7895efSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
37109b7895efSMark Fasheh 	/*
37119b7895efSMark Fasheh 	 * XXX: This is a rather large limit. We should use a more
37129b7895efSMark Fasheh 	 * realistic value.
37139b7895efSMark Fasheh 	 */
37149b7895efSMark Fasheh 	if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX)
37159b7895efSMark Fasheh 		return -ENOSPC;
37169b7895efSMark Fasheh 
37179b7895efSMark Fasheh 	num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used);
37189b7895efSMark Fasheh 	if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) {
37199b7895efSMark Fasheh 		mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: "
37209b7895efSMark Fasheh 		     "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno,
37219b7895efSMark Fasheh 		     (unsigned long long)leaf_blkno, num_used);
37229b7895efSMark Fasheh 		ret = -EIO;
37239b7895efSMark Fasheh 		goto out;
37249b7895efSMark Fasheh 	}
37259b7895efSMark Fasheh 
37269b7895efSMark Fasheh 	orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
37279b7895efSMark Fasheh 	if (!orig_dx_leaves) {
37289b7895efSMark Fasheh 		ret = -ENOMEM;
37299b7895efSMark Fasheh 		mlog_errno(ret);
37309b7895efSMark Fasheh 		goto out;
37319b7895efSMark Fasheh 	}
37329b7895efSMark Fasheh 
37339b7895efSMark Fasheh 	new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL);
37349b7895efSMark Fasheh 	if (!new_dx_leaves) {
37359b7895efSMark Fasheh 		ret = -ENOMEM;
37369b7895efSMark Fasheh 		mlog_errno(ret);
37379b7895efSMark Fasheh 		goto out;
37389b7895efSMark Fasheh 	}
37399b7895efSMark Fasheh 
37409b7895efSMark Fasheh 	ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac);
37419b7895efSMark Fasheh 	if (ret) {
37429b7895efSMark Fasheh 		if (ret != -ENOSPC)
37439b7895efSMark Fasheh 			mlog_errno(ret);
37449b7895efSMark Fasheh 		goto out;
37459b7895efSMark Fasheh 	}
37469b7895efSMark Fasheh 
37479b7895efSMark Fasheh 	credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root);
37489b7895efSMark Fasheh 	handle = ocfs2_start_trans(osb, credits);
37499b7895efSMark Fasheh 	if (IS_ERR(handle)) {
37509b7895efSMark Fasheh 		ret = PTR_ERR(handle);
37519b7895efSMark Fasheh 		handle = NULL;
37529b7895efSMark Fasheh 		mlog_errno(ret);
37539b7895efSMark Fasheh 		goto out;
37549b7895efSMark Fasheh 	}
37559b7895efSMark Fasheh 
37565dd4056dSChristoph Hellwig 	ret = dquot_alloc_space_nodirty(dir,
37575dd4056dSChristoph Hellwig 				       ocfs2_clusters_to_bytes(dir->i_sb, 1));
37585dd4056dSChristoph Hellwig 	if (ret)
37599b7895efSMark Fasheh 		goto out_commit;
37609b7895efSMark Fasheh 	did_quota = 1;
37619b7895efSMark Fasheh 
37620cf2f763SJoel Becker 	ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
37639b7895efSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
37649b7895efSMark Fasheh 	if (ret) {
37659b7895efSMark Fasheh 		mlog_errno(ret);
37669b7895efSMark Fasheh 		goto out_commit;
37679b7895efSMark Fasheh 	}
37689b7895efSMark Fasheh 
37699b7895efSMark Fasheh 	/*
37709b7895efSMark Fasheh 	 * This block is changing anyway, so we can sort it in place.
37719b7895efSMark Fasheh 	 */
37729b7895efSMark Fasheh 	sort(dx_leaf->dl_list.de_entries, num_used,
37739b7895efSMark Fasheh 	     sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
37749b7895efSMark Fasheh 	     dx_leaf_sort_swap);
37759b7895efSMark Fasheh 
3776ec20cec7SJoel Becker 	ocfs2_journal_dirty(handle, dx_leaf_bh);
37779b7895efSMark Fasheh 
37789b7895efSMark Fasheh 	ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
37799b7895efSMark Fasheh 					   &split_hash);
37809b7895efSMark Fasheh 	if (ret) {
37819b7895efSMark Fasheh 		mlog_errno(ret);
37829b7895efSMark Fasheh 		goto  out_commit;
37839b7895efSMark Fasheh 	}
37849b7895efSMark Fasheh 
3785f1088d47STao Ma 	trace_ocfs2_dx_dir_rebalance_split(leaf_cpos, split_hash, insert_hash);
37869b7895efSMark Fasheh 
37879b7895efSMark Fasheh 	/*
37889b7895efSMark Fasheh 	 * We have to carefully order operations here. There are items
37899b7895efSMark Fasheh 	 * which want to be in the new cluster before insert, but in
37909b7895efSMark Fasheh 	 * order to put those items in the new cluster, we alter the
37919b7895efSMark Fasheh 	 * old cluster. A failure to insert gets nasty.
37929b7895efSMark Fasheh 	 *
37939b7895efSMark Fasheh 	 * So, start by reserving writes to the old
37949b7895efSMark Fasheh 	 * cluster. ocfs2_dx_dir_new_cluster will reserve writes on
37959b7895efSMark Fasheh 	 * the new cluster for us, before inserting it. The insert
37969b7895efSMark Fasheh 	 * won't happen if there's an error before that. Once the
37979b7895efSMark Fasheh 	 * insert is done then, we can transfer from one leaf into the
37989b7895efSMark Fasheh 	 * other without fear of hitting any error.
37999b7895efSMark Fasheh 	 */
38009b7895efSMark Fasheh 
38019b7895efSMark Fasheh 	/*
38029b7895efSMark Fasheh 	 * The leaf transfer wants some scratch space so that we don't
38039b7895efSMark Fasheh 	 * wind up doing a bunch of expensive memmove().
38049b7895efSMark Fasheh 	 */
38059b7895efSMark Fasheh 	tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
38069b7895efSMark Fasheh 	if (!tmp_dx_leaf) {
38079b7895efSMark Fasheh 		ret = -ENOMEM;
38089b7895efSMark Fasheh 		mlog_errno(ret);
38099b7895efSMark Fasheh 		goto out_commit;
38109b7895efSMark Fasheh 	}
38119b7895efSMark Fasheh 
38121d46dc08SMark Fasheh 	orig_leaves_start = ocfs2_block_to_cluster_start(dir->i_sb, leaf_blkno);
38139b7895efSMark Fasheh 	ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves,
38149b7895efSMark Fasheh 				   orig_dx_leaves);
38159b7895efSMark Fasheh 	if (ret) {
38169b7895efSMark Fasheh 		mlog_errno(ret);
38179b7895efSMark Fasheh 		goto out_commit;
38189b7895efSMark Fasheh 	}
38199b7895efSMark Fasheh 
38200f4da216STristan Ye 	cpos = split_hash;
38210f4da216STristan Ye 	ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
38220f4da216STristan Ye 				       data_ac, meta_ac, new_dx_leaves,
38230f4da216STristan Ye 				       num_dx_leaves);
38240f4da216STristan Ye 	if (ret) {
38250f4da216STristan Ye 		mlog_errno(ret);
38260f4da216STristan Ye 		goto out_commit;
38270f4da216STristan Ye 	}
38280f4da216STristan Ye 
38299b7895efSMark Fasheh 	for (i = 0; i < num_dx_leaves; i++) {
38300cf2f763SJoel Becker 		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
38310cf2f763SJoel Becker 					      orig_dx_leaves[i],
38329b7895efSMark Fasheh 					      OCFS2_JOURNAL_ACCESS_WRITE);
38339b7895efSMark Fasheh 		if (ret) {
38349b7895efSMark Fasheh 			mlog_errno(ret);
38359b7895efSMark Fasheh 			goto out_commit;
38369b7895efSMark Fasheh 		}
38379b7895efSMark Fasheh 
38380f4da216STristan Ye 		ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
38390f4da216STristan Ye 					      new_dx_leaves[i],
38400f4da216STristan Ye 					      OCFS2_JOURNAL_ACCESS_WRITE);
38419b7895efSMark Fasheh 		if (ret) {
38429b7895efSMark Fasheh 			mlog_errno(ret);
38439b7895efSMark Fasheh 			goto out_commit;
38449b7895efSMark Fasheh 		}
38450f4da216STristan Ye 	}
38469b7895efSMark Fasheh 
38479b7895efSMark Fasheh 	ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
38489b7895efSMark Fasheh 				   orig_dx_leaves, new_dx_leaves, num_dx_leaves);
38499b7895efSMark Fasheh 
38509b7895efSMark Fasheh out_commit:
38519b7895efSMark Fasheh 	if (ret < 0 && did_quota)
38525dd4056dSChristoph Hellwig 		dquot_free_space_nodirty(dir,
38539b7895efSMark Fasheh 				ocfs2_clusters_to_bytes(dir->i_sb, 1));
38549b7895efSMark Fasheh 
38552931cdcbSDarrick J. Wong 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
38569b7895efSMark Fasheh 	ocfs2_commit_trans(osb, handle);
38579b7895efSMark Fasheh 
38589b7895efSMark Fasheh out:
38599b7895efSMark Fasheh 	if (orig_dx_leaves || new_dx_leaves) {
38609b7895efSMark Fasheh 		for (i = 0; i < num_dx_leaves; i++) {
38619b7895efSMark Fasheh 			if (orig_dx_leaves)
38629b7895efSMark Fasheh 				brelse(orig_dx_leaves[i]);
38639b7895efSMark Fasheh 			if (new_dx_leaves)
38649b7895efSMark Fasheh 				brelse(new_dx_leaves[i]);
38659b7895efSMark Fasheh 		}
38669b7895efSMark Fasheh 		kfree(orig_dx_leaves);
38679b7895efSMark Fasheh 		kfree(new_dx_leaves);
38689b7895efSMark Fasheh 	}
38699b7895efSMark Fasheh 
38709b7895efSMark Fasheh 	if (meta_ac)
38719b7895efSMark Fasheh 		ocfs2_free_alloc_context(meta_ac);
38729b7895efSMark Fasheh 	if (data_ac)
38739b7895efSMark Fasheh 		ocfs2_free_alloc_context(data_ac);
38749b7895efSMark Fasheh 
38759b7895efSMark Fasheh 	kfree(tmp_dx_leaf);
38769b7895efSMark Fasheh 	return ret;
38779b7895efSMark Fasheh }
38789b7895efSMark Fasheh 
ocfs2_find_dir_space_dx(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * di_bh,struct buffer_head * dx_root_bh,const char * name,int namelen,struct ocfs2_dir_lookup_result * lookup)3879e7c17e43SMark Fasheh static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
3880e7c17e43SMark Fasheh 				   struct buffer_head *di_bh,
3881e7c17e43SMark Fasheh 				   struct buffer_head *dx_root_bh,
3882e7c17e43SMark Fasheh 				   const char *name, int namelen,
3883e7c17e43SMark Fasheh 				   struct ocfs2_dir_lookup_result *lookup)
3884e7c17e43SMark Fasheh {
3885e7c17e43SMark Fasheh 	int ret, rebalanced = 0;
3886e7c17e43SMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
3887e7c17e43SMark Fasheh 	struct buffer_head *dx_leaf_bh = NULL;
3888e7c17e43SMark Fasheh 	struct ocfs2_dx_leaf *dx_leaf;
3889e7c17e43SMark Fasheh 	u64 blkno;
3890e7c17e43SMark Fasheh 	u32 leaf_cpos;
3891e7c17e43SMark Fasheh 
3892e7c17e43SMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3893e7c17e43SMark Fasheh 
3894e7c17e43SMark Fasheh restart_search:
3895e7c17e43SMark Fasheh 	ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
3896e7c17e43SMark Fasheh 				  &leaf_cpos, &blkno);
3897e7c17e43SMark Fasheh 	if (ret) {
3898e7c17e43SMark Fasheh 		mlog_errno(ret);
3899e7c17e43SMark Fasheh 		goto out;
3900e7c17e43SMark Fasheh 	}
3901e7c17e43SMark Fasheh 
3902e7c17e43SMark Fasheh 	ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh);
3903e7c17e43SMark Fasheh 	if (ret) {
3904e7c17e43SMark Fasheh 		mlog_errno(ret);
3905e7c17e43SMark Fasheh 		goto out;
3906e7c17e43SMark Fasheh 	}
3907e7c17e43SMark Fasheh 
3908e7c17e43SMark Fasheh 	dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
3909e7c17e43SMark Fasheh 
3910e7c17e43SMark Fasheh 	if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >=
3911e7c17e43SMark Fasheh 	    le16_to_cpu(dx_leaf->dl_list.de_count)) {
3912e7c17e43SMark Fasheh 		if (rebalanced) {
3913e7c17e43SMark Fasheh 			/*
3914e7c17e43SMark Fasheh 			 * Rebalancing should have provided us with
3915e7c17e43SMark Fasheh 			 * space in an appropriate leaf.
3916e7c17e43SMark Fasheh 			 *
3917e7c17e43SMark Fasheh 			 * XXX: Is this an abnormal condition then?
3918e7c17e43SMark Fasheh 			 * Should we print a message here?
3919e7c17e43SMark Fasheh 			 */
3920e7c17e43SMark Fasheh 			ret = -ENOSPC;
3921e7c17e43SMark Fasheh 			goto out;
3922e7c17e43SMark Fasheh 		}
3923e7c17e43SMark Fasheh 
3924e7c17e43SMark Fasheh 		ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
3925e7c17e43SMark Fasheh 					     &lookup->dl_hinfo, leaf_cpos,
3926e7c17e43SMark Fasheh 					     blkno);
3927e7c17e43SMark Fasheh 		if (ret) {
3928e7c17e43SMark Fasheh 			if (ret != -ENOSPC)
3929e7c17e43SMark Fasheh 				mlog_errno(ret);
3930e7c17e43SMark Fasheh 			goto out;
3931e7c17e43SMark Fasheh 		}
3932e7c17e43SMark Fasheh 
3933e7c17e43SMark Fasheh 		/*
3934e7c17e43SMark Fasheh 		 * Restart the lookup. The rebalance might have
3935e7c17e43SMark Fasheh 		 * changed which block our item fits into. Mark our
3936e7c17e43SMark Fasheh 		 * progress, so we only execute this once.
3937e7c17e43SMark Fasheh 		 */
3938e7c17e43SMark Fasheh 		brelse(dx_leaf_bh);
3939e7c17e43SMark Fasheh 		dx_leaf_bh = NULL;
3940e7c17e43SMark Fasheh 		rebalanced = 1;
3941e7c17e43SMark Fasheh 		goto restart_search;
3942e7c17e43SMark Fasheh 	}
3943e7c17e43SMark Fasheh 
3944e7c17e43SMark Fasheh 	lookup->dl_dx_leaf_bh = dx_leaf_bh;
3945e7c17e43SMark Fasheh 	dx_leaf_bh = NULL;
3946e7c17e43SMark Fasheh 
3947e7c17e43SMark Fasheh out:
3948e7c17e43SMark Fasheh 	brelse(dx_leaf_bh);
3949e7c17e43SMark Fasheh 	return ret;
3950e7c17e43SMark Fasheh }
3951e7c17e43SMark Fasheh 
ocfs2_search_dx_free_list(struct inode * dir,struct buffer_head * dx_root_bh,int namelen,struct ocfs2_dir_lookup_result * lookup)3952e7c17e43SMark Fasheh static int ocfs2_search_dx_free_list(struct inode *dir,
3953e7c17e43SMark Fasheh 				     struct buffer_head *dx_root_bh,
3954e7c17e43SMark Fasheh 				     int namelen,
3955e7c17e43SMark Fasheh 				     struct ocfs2_dir_lookup_result *lookup)
3956e7c17e43SMark Fasheh {
3957e7c17e43SMark Fasheh 	int ret = -ENOSPC;
3958e7c17e43SMark Fasheh 	struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL;
3959e7c17e43SMark Fasheh 	struct ocfs2_dir_block_trailer *db;
3960e7c17e43SMark Fasheh 	u64 next_block;
3961e7c17e43SMark Fasheh 	int rec_len = OCFS2_DIR_REC_LEN(namelen);
3962e7c17e43SMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
3963e7c17e43SMark Fasheh 
3964e7c17e43SMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3965e7c17e43SMark Fasheh 	next_block = le64_to_cpu(dx_root->dr_free_blk);
3966e7c17e43SMark Fasheh 
3967e7c17e43SMark Fasheh 	while (next_block) {
3968e7c17e43SMark Fasheh 		brelse(prev_leaf_bh);
3969e7c17e43SMark Fasheh 		prev_leaf_bh = leaf_bh;
3970e7c17e43SMark Fasheh 		leaf_bh = NULL;
3971e7c17e43SMark Fasheh 
3972e7c17e43SMark Fasheh 		ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh);
3973e7c17e43SMark Fasheh 		if (ret) {
3974e7c17e43SMark Fasheh 			mlog_errno(ret);
3975e7c17e43SMark Fasheh 			goto out;
3976e7c17e43SMark Fasheh 		}
3977e7c17e43SMark Fasheh 
3978e7c17e43SMark Fasheh 		db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
3979e7c17e43SMark Fasheh 		if (rec_len <= le16_to_cpu(db->db_free_rec_len)) {
3980e7c17e43SMark Fasheh 			lookup->dl_leaf_bh = leaf_bh;
3981e7c17e43SMark Fasheh 			lookup->dl_prev_leaf_bh = prev_leaf_bh;
3982e7c17e43SMark Fasheh 			leaf_bh = NULL;
3983e7c17e43SMark Fasheh 			prev_leaf_bh = NULL;
3984e7c17e43SMark Fasheh 			break;
3985e7c17e43SMark Fasheh 		}
3986e7c17e43SMark Fasheh 
3987e7c17e43SMark Fasheh 		next_block = le64_to_cpu(db->db_free_next);
3988e7c17e43SMark Fasheh 	}
3989e7c17e43SMark Fasheh 
3990e7c17e43SMark Fasheh 	if (!next_block)
3991e7c17e43SMark Fasheh 		ret = -ENOSPC;
3992e7c17e43SMark Fasheh 
3993e7c17e43SMark Fasheh out:
3994e7c17e43SMark Fasheh 
3995e7c17e43SMark Fasheh 	brelse(leaf_bh);
3996e7c17e43SMark Fasheh 	brelse(prev_leaf_bh);
3997e7c17e43SMark Fasheh 	return ret;
3998e7c17e43SMark Fasheh }
3999e7c17e43SMark Fasheh 
ocfs2_expand_inline_dx_root(struct inode * dir,struct buffer_head * dx_root_bh)40004ed8a6bbSMark Fasheh static int ocfs2_expand_inline_dx_root(struct inode *dir,
40014ed8a6bbSMark Fasheh 				       struct buffer_head *dx_root_bh)
40024ed8a6bbSMark Fasheh {
40034ed8a6bbSMark Fasheh 	int ret, num_dx_leaves, i, j, did_quota = 0;
40044ed8a6bbSMark Fasheh 	struct buffer_head **dx_leaves = NULL;
40054ed8a6bbSMark Fasheh 	struct ocfs2_extent_tree et;
40064ed8a6bbSMark Fasheh 	u64 insert_blkno;
40074ed8a6bbSMark Fasheh 	struct ocfs2_alloc_context *data_ac = NULL;
40084ed8a6bbSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
40094ed8a6bbSMark Fasheh 	handle_t *handle = NULL;
40104ed8a6bbSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
40114ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry_list *entry_list;
40124ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry *dx_entry;
40134ed8a6bbSMark Fasheh 	struct ocfs2_dx_leaf *target_leaf;
40144ed8a6bbSMark Fasheh 
40154ed8a6bbSMark Fasheh 	ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
40164ed8a6bbSMark Fasheh 	if (ret) {
40174ed8a6bbSMark Fasheh 		mlog_errno(ret);
40184ed8a6bbSMark Fasheh 		goto out;
40194ed8a6bbSMark Fasheh 	}
40204ed8a6bbSMark Fasheh 
40214ed8a6bbSMark Fasheh 	dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
40224ed8a6bbSMark Fasheh 	if (!dx_leaves) {
40234ed8a6bbSMark Fasheh 		ret = -ENOMEM;
40244ed8a6bbSMark Fasheh 		mlog_errno(ret);
40254ed8a6bbSMark Fasheh 		goto out;
40264ed8a6bbSMark Fasheh 	}
40274ed8a6bbSMark Fasheh 
40284ed8a6bbSMark Fasheh 	handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
40294ed8a6bbSMark Fasheh 	if (IS_ERR(handle)) {
40304ed8a6bbSMark Fasheh 		ret = PTR_ERR(handle);
40314ed8a6bbSMark Fasheh 		mlog_errno(ret);
40324ed8a6bbSMark Fasheh 		goto out;
40334ed8a6bbSMark Fasheh 	}
40344ed8a6bbSMark Fasheh 
40355dd4056dSChristoph Hellwig 	ret = dquot_alloc_space_nodirty(dir,
40365dd4056dSChristoph Hellwig 				       ocfs2_clusters_to_bytes(osb->sb, 1));
40375dd4056dSChristoph Hellwig 	if (ret)
40384ed8a6bbSMark Fasheh 		goto out_commit;
40394ed8a6bbSMark Fasheh 	did_quota = 1;
40404ed8a6bbSMark Fasheh 
40414ed8a6bbSMark Fasheh 	/*
40424ed8a6bbSMark Fasheh 	 * We do this up front, before the allocation, so that a
40434ed8a6bbSMark Fasheh 	 * failure to add the dx_root_bh to the journal won't result
40444ed8a6bbSMark Fasheh 	 * us losing clusters.
40454ed8a6bbSMark Fasheh 	 */
40460cf2f763SJoel Becker 	ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
40474ed8a6bbSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
40484ed8a6bbSMark Fasheh 	if (ret) {
40494ed8a6bbSMark Fasheh 		mlog_errno(ret);
40504ed8a6bbSMark Fasheh 		goto out_commit;
40514ed8a6bbSMark Fasheh 	}
40524ed8a6bbSMark Fasheh 
40534ed8a6bbSMark Fasheh 	ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
40544ed8a6bbSMark Fasheh 					 num_dx_leaves, &insert_blkno);
40554ed8a6bbSMark Fasheh 	if (ret) {
40564ed8a6bbSMark Fasheh 		mlog_errno(ret);
40574ed8a6bbSMark Fasheh 		goto out_commit;
40584ed8a6bbSMark Fasheh 	}
40594ed8a6bbSMark Fasheh 
40604ed8a6bbSMark Fasheh 	/*
40614ed8a6bbSMark Fasheh 	 * Transfer the entries from our dx_root into the appropriate
40624ed8a6bbSMark Fasheh 	 * block
40634ed8a6bbSMark Fasheh 	 */
40644ed8a6bbSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
40654ed8a6bbSMark Fasheh 	entry_list = &dx_root->dr_entries;
40664ed8a6bbSMark Fasheh 
40674ed8a6bbSMark Fasheh 	for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
40684ed8a6bbSMark Fasheh 		dx_entry = &entry_list->de_entries[i];
40694ed8a6bbSMark Fasheh 
40704ed8a6bbSMark Fasheh 		j = __ocfs2_dx_dir_hash_idx(osb,
40714ed8a6bbSMark Fasheh 					    le32_to_cpu(dx_entry->dx_minor_hash));
40724ed8a6bbSMark Fasheh 		target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
40734ed8a6bbSMark Fasheh 
40744ed8a6bbSMark Fasheh 		ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
40754ed8a6bbSMark Fasheh 
40764ed8a6bbSMark Fasheh 		/* Each leaf has been passed to the journal already
40774ed8a6bbSMark Fasheh 		 * via __ocfs2_dx_dir_new_cluster() */
40784ed8a6bbSMark Fasheh 	}
40794ed8a6bbSMark Fasheh 
40804ed8a6bbSMark Fasheh 	dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
40814ed8a6bbSMark Fasheh 	memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
40824ed8a6bbSMark Fasheh 	       offsetof(struct ocfs2_dx_root_block, dr_list));
40834ed8a6bbSMark Fasheh 	dx_root->dr_list.l_count =
40844ed8a6bbSMark Fasheh 		cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
40854ed8a6bbSMark Fasheh 
40864ed8a6bbSMark Fasheh 	/* This should never fail considering we start with an empty
40874ed8a6bbSMark Fasheh 	 * dx_root. */
40885e404e9eSJoel Becker 	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
4089cc79d8c1SJoel Becker 	ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
40904ed8a6bbSMark Fasheh 	if (ret)
40914ed8a6bbSMark Fasheh 		mlog_errno(ret);
40924ed8a6bbSMark Fasheh 	did_quota = 0;
40934ed8a6bbSMark Fasheh 
40942931cdcbSDarrick J. Wong 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
40954ed8a6bbSMark Fasheh 	ocfs2_journal_dirty(handle, dx_root_bh);
40964ed8a6bbSMark Fasheh 
40974ed8a6bbSMark Fasheh out_commit:
40984ed8a6bbSMark Fasheh 	if (ret < 0 && did_quota)
40995dd4056dSChristoph Hellwig 		dquot_free_space_nodirty(dir,
41004ed8a6bbSMark Fasheh 					  ocfs2_clusters_to_bytes(dir->i_sb, 1));
41014ed8a6bbSMark Fasheh 
41024ed8a6bbSMark Fasheh 	ocfs2_commit_trans(osb, handle);
41034ed8a6bbSMark Fasheh 
41044ed8a6bbSMark Fasheh out:
41054ed8a6bbSMark Fasheh 	if (data_ac)
41064ed8a6bbSMark Fasheh 		ocfs2_free_alloc_context(data_ac);
41074ed8a6bbSMark Fasheh 
41084ed8a6bbSMark Fasheh 	if (dx_leaves) {
41094ed8a6bbSMark Fasheh 		for (i = 0; i < num_dx_leaves; i++)
41104ed8a6bbSMark Fasheh 			brelse(dx_leaves[i]);
41114ed8a6bbSMark Fasheh 		kfree(dx_leaves);
41124ed8a6bbSMark Fasheh 	}
41134ed8a6bbSMark Fasheh 	return ret;
41144ed8a6bbSMark Fasheh }
41154ed8a6bbSMark Fasheh 
ocfs2_inline_dx_has_space(struct buffer_head * dx_root_bh)41164ed8a6bbSMark Fasheh static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
41174ed8a6bbSMark Fasheh {
41184ed8a6bbSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
41194ed8a6bbSMark Fasheh 	struct ocfs2_dx_entry_list *entry_list;
41204ed8a6bbSMark Fasheh 
41214ed8a6bbSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
41224ed8a6bbSMark Fasheh 	entry_list = &dx_root->dr_entries;
41234ed8a6bbSMark Fasheh 
41244ed8a6bbSMark Fasheh 	if (le16_to_cpu(entry_list->de_num_used) >=
41254ed8a6bbSMark Fasheh 	    le16_to_cpu(entry_list->de_count))
41264ed8a6bbSMark Fasheh 		return -ENOSPC;
41274ed8a6bbSMark Fasheh 
41284ed8a6bbSMark Fasheh 	return 0;
41294ed8a6bbSMark Fasheh }
41304ed8a6bbSMark Fasheh 
ocfs2_prepare_dx_dir_for_insert(struct inode * dir,struct buffer_head * di_bh,const char * name,int namelen,struct ocfs2_dir_lookup_result * lookup)4131e7c17e43SMark Fasheh static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir,
4132e7c17e43SMark Fasheh 					   struct buffer_head *di_bh,
4133e7c17e43SMark Fasheh 					   const char *name,
41349b7895efSMark Fasheh 					   int namelen,
41359b7895efSMark Fasheh 					   struct ocfs2_dir_lookup_result *lookup)
41369b7895efSMark Fasheh {
4137e7c17e43SMark Fasheh 	int ret, free_dx_root = 1;
4138e7c17e43SMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
41399b7895efSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
4140e7c17e43SMark Fasheh 	struct buffer_head *leaf_bh = NULL;
41419b7895efSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4142e7c17e43SMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
41439b7895efSMark Fasheh 
41449b7895efSMark Fasheh 	ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
41459b7895efSMark Fasheh 	if (ret) {
41469b7895efSMark Fasheh 		mlog_errno(ret);
41479b7895efSMark Fasheh 		goto out;
41489b7895efSMark Fasheh 	}
41499b7895efSMark Fasheh 
41509b7895efSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4151e3a93c2dSMark Fasheh 	if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) {
4152e3a93c2dSMark Fasheh 		ret = -ENOSPC;
4153e3a93c2dSMark Fasheh 		mlog_errno(ret);
4154e3a93c2dSMark Fasheh 		goto out;
4155e3a93c2dSMark Fasheh 	}
4156e3a93c2dSMark Fasheh 
41574ed8a6bbSMark Fasheh 	if (ocfs2_dx_root_inline(dx_root)) {
41584ed8a6bbSMark Fasheh 		ret = ocfs2_inline_dx_has_space(dx_root_bh);
41594ed8a6bbSMark Fasheh 
41604ed8a6bbSMark Fasheh 		if (ret == 0)
41614ed8a6bbSMark Fasheh 			goto search_el;
41624ed8a6bbSMark Fasheh 
41634ed8a6bbSMark Fasheh 		/*
41644ed8a6bbSMark Fasheh 		 * We ran out of room in the root block. Expand it to
41654ed8a6bbSMark Fasheh 		 * an extent, then allow ocfs2_find_dir_space_dx to do
41664ed8a6bbSMark Fasheh 		 * the rest.
41674ed8a6bbSMark Fasheh 		 */
41684ed8a6bbSMark Fasheh 		ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
41694ed8a6bbSMark Fasheh 		if (ret) {
41704ed8a6bbSMark Fasheh 			mlog_errno(ret);
41714ed8a6bbSMark Fasheh 			goto out;
41724ed8a6bbSMark Fasheh 		}
41734ed8a6bbSMark Fasheh 	}
41749b7895efSMark Fasheh 
41759b7895efSMark Fasheh 	/*
4176e7c17e43SMark Fasheh 	 * Insert preparation for an indexed directory is split into two
4177e7c17e43SMark Fasheh 	 * steps. The call to find_dir_space_dx reserves room in the index for
4178e7c17e43SMark Fasheh 	 * an additional item. If we run out of space there, it's a real error
4179e7c17e43SMark Fasheh 	 * we can't continue on.
41809b7895efSMark Fasheh 	 */
4181e7c17e43SMark Fasheh 	ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name,
4182e7c17e43SMark Fasheh 				      namelen, lookup);
41839b7895efSMark Fasheh 	if (ret) {
41849b7895efSMark Fasheh 		mlog_errno(ret);
41859b7895efSMark Fasheh 		goto out;
41869b7895efSMark Fasheh 	}
41879b7895efSMark Fasheh 
41884ed8a6bbSMark Fasheh search_el:
4189e7c17e43SMark Fasheh 	/*
4190e7c17e43SMark Fasheh 	 * Next, we need to find space in the unindexed tree. This call
4191e7c17e43SMark Fasheh 	 * searches using the free space linked list. If the unindexed tree
4192e7c17e43SMark Fasheh 	 * lacks sufficient space, we'll expand it below. The expansion code
4193e7c17e43SMark Fasheh 	 * is smart enough to add any new blocks to the free space list.
4194e7c17e43SMark Fasheh 	 */
4195e7c17e43SMark Fasheh 	ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup);
4196e7c17e43SMark Fasheh 	if (ret && ret != -ENOSPC) {
4197e7c17e43SMark Fasheh 		mlog_errno(ret);
4198e7c17e43SMark Fasheh 		goto out;
4199e7c17e43SMark Fasheh 	}
4200e7c17e43SMark Fasheh 
4201e7c17e43SMark Fasheh 	/* Do this up here - ocfs2_extend_dir might need the dx_root */
42024ed8a6bbSMark Fasheh 	lookup->dl_dx_root_bh = dx_root_bh;
4203e7c17e43SMark Fasheh 	free_dx_root = 0;
4204e7c17e43SMark Fasheh 
4205e7c17e43SMark Fasheh 	if (ret == -ENOSPC) {
4206e7c17e43SMark Fasheh 		ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh);
4207e7c17e43SMark Fasheh 
4208e7c17e43SMark Fasheh 		if (ret) {
4209e7c17e43SMark Fasheh 			mlog_errno(ret);
4210e7c17e43SMark Fasheh 			goto out;
4211e7c17e43SMark Fasheh 		}
4212e7c17e43SMark Fasheh 
4213e7c17e43SMark Fasheh 		/*
4214e7c17e43SMark Fasheh 		 * We make the assumption here that new leaf blocks are added
4215e7c17e43SMark Fasheh 		 * to the front of our free list.
4216e7c17e43SMark Fasheh 		 */
4217e7c17e43SMark Fasheh 		lookup->dl_prev_leaf_bh = NULL;
4218e7c17e43SMark Fasheh 		lookup->dl_leaf_bh = leaf_bh;
4219e7c17e43SMark Fasheh 	}
42209b7895efSMark Fasheh 
42219b7895efSMark Fasheh out:
4222e7c17e43SMark Fasheh 	if (free_dx_root)
42239b7895efSMark Fasheh 		brelse(dx_root_bh);
42249b7895efSMark Fasheh 	return ret;
42259b7895efSMark Fasheh }
42269b7895efSMark Fasheh 
42274a12ca3aSMark Fasheh /*
42284a12ca3aSMark Fasheh  * Get a directory ready for insert. Any directory allocation required
42294a12ca3aSMark Fasheh  * happens here. Success returns zero, and enough context in the dir
42304a12ca3aSMark Fasheh  * lookup result that ocfs2_add_entry() will be able complete the task
42314a12ca3aSMark Fasheh  * with minimal performance impact.
42324a12ca3aSMark Fasheh  */
ocfs2_prepare_dir_for_insert(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * parent_fe_bh,const char * name,int namelen,struct ocfs2_dir_lookup_result * lookup)42335b6a3a2bSMark Fasheh int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
42345b6a3a2bSMark Fasheh 				 struct inode *dir,
42355b6a3a2bSMark Fasheh 				 struct buffer_head *parent_fe_bh,
42365b6a3a2bSMark Fasheh 				 const char *name,
42375b6a3a2bSMark Fasheh 				 int namelen,
42384a12ca3aSMark Fasheh 				 struct ocfs2_dir_lookup_result *lookup)
42395b6a3a2bSMark Fasheh {
42405b6a3a2bSMark Fasheh 	int ret;
42415b6a3a2bSMark Fasheh 	unsigned int blocks_wanted = 1;
42425b6a3a2bSMark Fasheh 	struct buffer_head *bh = NULL;
42435b6a3a2bSMark Fasheh 
4244f1088d47STao Ma 	trace_ocfs2_prepare_dir_for_insert(
4245f1088d47STao Ma 		(unsigned long long)OCFS2_I(dir)->ip_blkno, namelen);
42465b6a3a2bSMark Fasheh 
42475b6a3a2bSMark Fasheh 	if (!namelen) {
42485b6a3a2bSMark Fasheh 		ret = -EINVAL;
42495b6a3a2bSMark Fasheh 		mlog_errno(ret);
42505b6a3a2bSMark Fasheh 		goto out;
42515b6a3a2bSMark Fasheh 	}
42525b6a3a2bSMark Fasheh 
42539b7895efSMark Fasheh 	/*
42549b7895efSMark Fasheh 	 * Do this up front to reduce confusion.
42559b7895efSMark Fasheh 	 *
42569b7895efSMark Fasheh 	 * The directory might start inline, then be turned into an
42579b7895efSMark Fasheh 	 * indexed one, in which case we'd need to hash deep inside
42589b7895efSMark Fasheh 	 * ocfs2_find_dir_space_id(). Since
42599b7895efSMark Fasheh 	 * ocfs2_prepare_dx_dir_for_insert() also needs this hash
42609b7895efSMark Fasheh 	 * done, there seems no point in spreading out the calls. We
42619b7895efSMark Fasheh 	 * can optimize away the case where the file system doesn't
42629b7895efSMark Fasheh 	 * support indexing.
42639b7895efSMark Fasheh 	 */
42649b7895efSMark Fasheh 	if (ocfs2_supports_indexed_dirs(osb))
42659b7895efSMark Fasheh 		ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo);
42669b7895efSMark Fasheh 
42679b7895efSMark Fasheh 	if (ocfs2_dir_indexed(dir)) {
4268e7c17e43SMark Fasheh 		ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh,
4269e7c17e43SMark Fasheh 						      name, namelen, lookup);
4270e7c17e43SMark Fasheh 		if (ret)
42719b7895efSMark Fasheh 			mlog_errno(ret);
42729b7895efSMark Fasheh 		goto out;
42739b7895efSMark Fasheh 	}
42749b7895efSMark Fasheh 
42755b6a3a2bSMark Fasheh 	if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
42765b6a3a2bSMark Fasheh 		ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
42775b6a3a2bSMark Fasheh 					      namelen, &bh, &blocks_wanted);
42785b6a3a2bSMark Fasheh 	} else
42795b6a3a2bSMark Fasheh 		ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
42805b6a3a2bSMark Fasheh 
42815b6a3a2bSMark Fasheh 	if (ret && ret != -ENOSPC) {
42825b6a3a2bSMark Fasheh 		mlog_errno(ret);
42835b6a3a2bSMark Fasheh 		goto out;
42845b6a3a2bSMark Fasheh 	}
42855b6a3a2bSMark Fasheh 
42865b6a3a2bSMark Fasheh 	if (ret == -ENOSPC) {
42875b6a3a2bSMark Fasheh 		/*
42885b6a3a2bSMark Fasheh 		 * We have to expand the directory to add this name.
42895b6a3a2bSMark Fasheh 		 */
42905b6a3a2bSMark Fasheh 		BUG_ON(bh);
42915b6a3a2bSMark Fasheh 
42925b6a3a2bSMark Fasheh 		ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
42939b7895efSMark Fasheh 				       lookup, &bh);
42945b6a3a2bSMark Fasheh 		if (ret) {
42955b6a3a2bSMark Fasheh 			if (ret != -ENOSPC)
42965b6a3a2bSMark Fasheh 				mlog_errno(ret);
42975b6a3a2bSMark Fasheh 			goto out;
42985b6a3a2bSMark Fasheh 		}
42995b6a3a2bSMark Fasheh 
43005b6a3a2bSMark Fasheh 		BUG_ON(!bh);
43015b6a3a2bSMark Fasheh 	}
43025b6a3a2bSMark Fasheh 
43034a12ca3aSMark Fasheh 	lookup->dl_leaf_bh = bh;
43045b6a3a2bSMark Fasheh 	bh = NULL;
43055b6a3a2bSMark Fasheh out:
43065b6a3a2bSMark Fasheh 	brelse(bh);
43075b6a3a2bSMark Fasheh 	return ret;
43085b6a3a2bSMark Fasheh }
43099b7895efSMark Fasheh 
ocfs2_dx_dir_remove_index(struct inode * dir,struct buffer_head * di_bh,struct buffer_head * dx_root_bh)43109b7895efSMark Fasheh static int ocfs2_dx_dir_remove_index(struct inode *dir,
43119b7895efSMark Fasheh 				     struct buffer_head *di_bh,
43129b7895efSMark Fasheh 				     struct buffer_head *dx_root_bh)
43139b7895efSMark Fasheh {
43149b7895efSMark Fasheh 	int ret;
43159b7895efSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
43169b7895efSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
43179b7895efSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
43189b7895efSMark Fasheh 	struct inode *dx_alloc_inode = NULL;
43199b7895efSMark Fasheh 	struct buffer_head *dx_alloc_bh = NULL;
43209b7895efSMark Fasheh 	handle_t *handle;
43219b7895efSMark Fasheh 	u64 blk;
43229b7895efSMark Fasheh 	u16 bit;
43239b7895efSMark Fasheh 	u64 bg_blkno;
43249b7895efSMark Fasheh 
43259b7895efSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
43269b7895efSMark Fasheh 
43279b7895efSMark Fasheh 	dx_alloc_inode = ocfs2_get_system_file_inode(osb,
43289b7895efSMark Fasheh 					EXTENT_ALLOC_SYSTEM_INODE,
43299b7895efSMark Fasheh 					le16_to_cpu(dx_root->dr_suballoc_slot));
43309b7895efSMark Fasheh 	if (!dx_alloc_inode) {
43319b7895efSMark Fasheh 		ret = -ENOMEM;
43329b7895efSMark Fasheh 		mlog_errno(ret);
43339b7895efSMark Fasheh 		goto out;
43349b7895efSMark Fasheh 	}
43355955102cSAl Viro 	inode_lock(dx_alloc_inode);
43369b7895efSMark Fasheh 
43379b7895efSMark Fasheh 	ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
43389b7895efSMark Fasheh 	if (ret) {
43399b7895efSMark Fasheh 		mlog_errno(ret);
43409b7895efSMark Fasheh 		goto out_mutex;
43419b7895efSMark Fasheh 	}
43429b7895efSMark Fasheh 
43439b7895efSMark Fasheh 	handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS);
43449b7895efSMark Fasheh 	if (IS_ERR(handle)) {
43459b7895efSMark Fasheh 		ret = PTR_ERR(handle);
43469b7895efSMark Fasheh 		mlog_errno(ret);
43479b7895efSMark Fasheh 		goto out_unlock;
43489b7895efSMark Fasheh 	}
43499b7895efSMark Fasheh 
43500cf2f763SJoel Becker 	ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
43519b7895efSMark Fasheh 				      OCFS2_JOURNAL_ACCESS_WRITE);
43529b7895efSMark Fasheh 	if (ret) {
43539b7895efSMark Fasheh 		mlog_errno(ret);
43549b7895efSMark Fasheh 		goto out_commit;
43559b7895efSMark Fasheh 	}
43569b7895efSMark Fasheh 
43578ac33dc8STao Ma 	spin_lock(&OCFS2_I(dir)->ip_lock);
43589b7895efSMark Fasheh 	OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
43599b7895efSMark Fasheh 	di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
43608ac33dc8STao Ma 	spin_unlock(&OCFS2_I(dir)->ip_lock);
43619b7895efSMark Fasheh 	di->i_dx_root = cpu_to_le64(0ULL);
43626fdb702dSDarrick J. Wong 	ocfs2_update_inode_fsync_trans(handle, dir, 1);
43639b7895efSMark Fasheh 
43649b7895efSMark Fasheh 	ocfs2_journal_dirty(handle, di_bh);
43659b7895efSMark Fasheh 
43669b7895efSMark Fasheh 	blk = le64_to_cpu(dx_root->dr_blkno);
43679b7895efSMark Fasheh 	bit = le16_to_cpu(dx_root->dr_suballoc_bit);
436874380c47STao Ma 	if (dx_root->dr_suballoc_loc)
436974380c47STao Ma 		bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
437074380c47STao Ma 	else
43719b7895efSMark Fasheh 		bg_blkno = ocfs2_which_suballoc_group(blk, bit);
43729b7895efSMark Fasheh 	ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
43739b7895efSMark Fasheh 				       bit, bg_blkno, 1);
43749b7895efSMark Fasheh 	if (ret)
43759b7895efSMark Fasheh 		mlog_errno(ret);
43769b7895efSMark Fasheh 
43779b7895efSMark Fasheh out_commit:
43789b7895efSMark Fasheh 	ocfs2_commit_trans(osb, handle);
43799b7895efSMark Fasheh 
43809b7895efSMark Fasheh out_unlock:
43819b7895efSMark Fasheh 	ocfs2_inode_unlock(dx_alloc_inode, 1);
43829b7895efSMark Fasheh 
43839b7895efSMark Fasheh out_mutex:
43845955102cSAl Viro 	inode_unlock(dx_alloc_inode);
43859b7895efSMark Fasheh 	brelse(dx_alloc_bh);
43869b7895efSMark Fasheh out:
43879b7895efSMark Fasheh 	iput(dx_alloc_inode);
43889b7895efSMark Fasheh 	return ret;
43899b7895efSMark Fasheh }
43909b7895efSMark Fasheh 
ocfs2_dx_dir_truncate(struct inode * dir,struct buffer_head * di_bh)43919b7895efSMark Fasheh int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
43929b7895efSMark Fasheh {
43939b7895efSMark Fasheh 	int ret;
43943f649ab7SKees Cook 	unsigned int clen;
43953f649ab7SKees Cook 	u32 major_hash = UINT_MAX, p_cpos, cpos;
43963f649ab7SKees Cook 	u64 blkno;
43979b7895efSMark Fasheh 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
43989b7895efSMark Fasheh 	struct buffer_head *dx_root_bh = NULL;
43999b7895efSMark Fasheh 	struct ocfs2_dx_root_block *dx_root;
44009b7895efSMark Fasheh 	struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
44019b7895efSMark Fasheh 	struct ocfs2_cached_dealloc_ctxt dealloc;
44029b7895efSMark Fasheh 	struct ocfs2_extent_tree et;
44039b7895efSMark Fasheh 
44049b7895efSMark Fasheh 	ocfs2_init_dealloc_ctxt(&dealloc);
44059b7895efSMark Fasheh 
44069b7895efSMark Fasheh 	if (!ocfs2_dir_indexed(dir))
44079b7895efSMark Fasheh 		return 0;
44089b7895efSMark Fasheh 
44099b7895efSMark Fasheh 	ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
44109b7895efSMark Fasheh 	if (ret) {
44119b7895efSMark Fasheh 		mlog_errno(ret);
44129b7895efSMark Fasheh 		goto out;
44139b7895efSMark Fasheh 	}
44144ed8a6bbSMark Fasheh 	dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
44154ed8a6bbSMark Fasheh 
44164ed8a6bbSMark Fasheh 	if (ocfs2_dx_root_inline(dx_root))
44174ed8a6bbSMark Fasheh 		goto remove_index;
44189b7895efSMark Fasheh 
44195e404e9eSJoel Becker 	ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
44209b7895efSMark Fasheh 
44219b7895efSMark Fasheh 	/* XXX: What if dr_clusters is too large? */
44229b7895efSMark Fasheh 	while (le32_to_cpu(dx_root->dr_clusters)) {
44239b7895efSMark Fasheh 		ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list,
44249b7895efSMark Fasheh 					      major_hash, &cpos, &blkno, &clen);
44259b7895efSMark Fasheh 		if (ret) {
44269b7895efSMark Fasheh 			mlog_errno(ret);
44279b7895efSMark Fasheh 			goto out;
44289b7895efSMark Fasheh 		}
44299b7895efSMark Fasheh 
44309b7895efSMark Fasheh 		p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
44319b7895efSMark Fasheh 
443278f94673STristan Ye 		ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4433f62f12b3SJunxiao Bi 					       &dealloc, 0, false);
44349b7895efSMark Fasheh 		if (ret) {
44359b7895efSMark Fasheh 			mlog_errno(ret);
44369b7895efSMark Fasheh 			goto out;
44379b7895efSMark Fasheh 		}
44389b7895efSMark Fasheh 
44399b7895efSMark Fasheh 		if (cpos == 0)
44409b7895efSMark Fasheh 			break;
44419b7895efSMark Fasheh 
44429b7895efSMark Fasheh 		major_hash = cpos - 1;
44439b7895efSMark Fasheh 	}
44449b7895efSMark Fasheh 
44454ed8a6bbSMark Fasheh remove_index:
44469b7895efSMark Fasheh 	ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
44479b7895efSMark Fasheh 	if (ret) {
44489b7895efSMark Fasheh 		mlog_errno(ret);
44499b7895efSMark Fasheh 		goto out;
44509b7895efSMark Fasheh 	}
44519b7895efSMark Fasheh 
44528cb471e8SJoel Becker 	ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
44539b7895efSMark Fasheh out:
44549b7895efSMark Fasheh 	ocfs2_schedule_truncate_log_flush(osb, 1);
44559b7895efSMark Fasheh 	ocfs2_run_deallocs(osb, &dealloc);
44569b7895efSMark Fasheh 
44579b7895efSMark Fasheh 	brelse(dx_root_bh);
44589b7895efSMark Fasheh 	return ret;
44599b7895efSMark Fasheh }
4460