1328970deSThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2fa60ce2cSMasahiro Yamada /*
3ccd979bdSMark Fasheh * dir.c
4ccd979bdSMark Fasheh *
5ccd979bdSMark Fasheh * Creates, reads, walks and deletes directory-nodes
6ccd979bdSMark Fasheh *
7ccd979bdSMark Fasheh * Copyright (C) 2002, 2004 Oracle. All rights reserved.
8ccd979bdSMark Fasheh *
9ccd979bdSMark Fasheh * Portions of this code from linux/fs/ext3/dir.c
10ccd979bdSMark Fasheh *
11ccd979bdSMark Fasheh * Copyright (C) 1992, 1993, 1994, 1995
12ccd979bdSMark Fasheh * Remy Card (card@masi.ibp.fr)
13ccd979bdSMark Fasheh * Laboratoire MASI - Institut Blaise pascal
14ccd979bdSMark Fasheh * Universite Pierre et Marie Curie (Paris VI)
15ccd979bdSMark Fasheh *
16ccd979bdSMark Fasheh * from
17ccd979bdSMark Fasheh *
18ccd979bdSMark Fasheh * linux/fs/minix/dir.c
19ccd979bdSMark Fasheh *
20762515a8SJakub Wilk * Copyright (C) 1991, 1992 Linus Torvalds
21ccd979bdSMark Fasheh */
22ccd979bdSMark Fasheh
23ccd979bdSMark Fasheh #include <linux/fs.h>
24ccd979bdSMark Fasheh #include <linux/types.h>
25ccd979bdSMark Fasheh #include <linux/slab.h>
26ccd979bdSMark Fasheh #include <linux/highmem.h>
27a90714c1SJan Kara #include <linux/quotaops.h>
289b7895efSMark Fasheh #include <linux/sort.h>
29cc56c33eSJeff Layton #include <linux/iversion.h>
30ccd979bdSMark Fasheh
31ccd979bdSMark Fasheh #include <cluster/masklog.h>
32ccd979bdSMark Fasheh
33ccd979bdSMark Fasheh #include "ocfs2.h"
34ccd979bdSMark Fasheh
35ccd979bdSMark Fasheh #include "alloc.h"
36c175a518SJoel Becker #include "blockcheck.h"
37ccd979bdSMark Fasheh #include "dir.h"
38ccd979bdSMark Fasheh #include "dlmglue.h"
39ccd979bdSMark Fasheh #include "extent_map.h"
40ccd979bdSMark Fasheh #include "file.h"
41ccd979bdSMark Fasheh #include "inode.h"
42ccd979bdSMark Fasheh #include "journal.h"
43ccd979bdSMark Fasheh #include "namei.h"
44ccd979bdSMark Fasheh #include "suballoc.h"
45316f4b9fSMark Fasheh #include "super.h"
469b7895efSMark Fasheh #include "sysfile.h"
47ccd979bdSMark Fasheh #include "uptodate.h"
48f1088d47STao Ma #include "ocfs2_trace.h"
49ccd979bdSMark Fasheh
50ccd979bdSMark Fasheh #include "buffer_head_io.h"
51ccd979bdSMark Fasheh
52316f4b9fSMark Fasheh #define NAMEI_RA_CHUNKS 2
53316f4b9fSMark Fasheh #define NAMEI_RA_BLOCKS 4
54316f4b9fSMark Fasheh #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
55316f4b9fSMark Fasheh
56316f4b9fSMark Fasheh static int ocfs2_do_extend_dir(struct super_block *sb,
57316f4b9fSMark Fasheh handle_t *handle,
58316f4b9fSMark Fasheh struct inode *dir,
59316f4b9fSMark Fasheh struct buffer_head *parent_fe_bh,
60316f4b9fSMark Fasheh struct ocfs2_alloc_context *data_ac,
61316f4b9fSMark Fasheh struct ocfs2_alloc_context *meta_ac,
62316f4b9fSMark Fasheh struct buffer_head **new_bh);
63e7c17e43SMark Fasheh static int ocfs2_dir_indexed(struct inode *inode);
64316f4b9fSMark Fasheh
6523193e51SMark Fasheh /*
6687d35a74SMark Fasheh * These are distinct checks because future versions of the file system will
6787d35a74SMark Fasheh * want to have a trailing dirent structure independent of indexing.
6887d35a74SMark Fasheh */
ocfs2_supports_dir_trailer(struct inode * dir)69e7c17e43SMark Fasheh static int ocfs2_supports_dir_trailer(struct inode *dir)
7087d35a74SMark Fasheh {
71e7c17e43SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
72e7c17e43SMark Fasheh
7387d35a74SMark Fasheh if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
7487d35a74SMark Fasheh return 0;
7587d35a74SMark Fasheh
76e7c17e43SMark Fasheh return ocfs2_meta_ecc(osb) || ocfs2_dir_indexed(dir);
7787d35a74SMark Fasheh }
7887d35a74SMark Fasheh
79e7c17e43SMark Fasheh /*
80e7c17e43SMark Fasheh * "new' here refers to the point at which we're creating a new
81e7c17e43SMark Fasheh * directory via "mkdir()", but also when we're expanding an inline
82e7c17e43SMark Fasheh * directory. In either case, we don't yet have the indexing bit set
83e7c17e43SMark Fasheh * on the directory, so the standard checks will fail in when metaecc
84e7c17e43SMark Fasheh * is turned off. Only directory-initialization type functions should
85e7c17e43SMark Fasheh * use this then. Everything else wants ocfs2_supports_dir_trailer()
86e7c17e43SMark Fasheh */
ocfs2_new_dir_wants_trailer(struct inode * dir)87e7c17e43SMark Fasheh static int ocfs2_new_dir_wants_trailer(struct inode *dir)
8887d35a74SMark Fasheh {
89e7c17e43SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
90e7c17e43SMark Fasheh
91e7c17e43SMark Fasheh return ocfs2_meta_ecc(osb) ||
92e7c17e43SMark Fasheh ocfs2_supports_indexed_dirs(osb);
9387d35a74SMark Fasheh }
9487d35a74SMark Fasheh
ocfs2_dir_trailer_blk_off(struct super_block * sb)9587d35a74SMark Fasheh static inline unsigned int ocfs2_dir_trailer_blk_off(struct super_block *sb)
9687d35a74SMark Fasheh {
9787d35a74SMark Fasheh return sb->s_blocksize - sizeof(struct ocfs2_dir_block_trailer);
9887d35a74SMark Fasheh }
9987d35a74SMark Fasheh
10087d35a74SMark Fasheh #define ocfs2_trailer_from_bh(_bh, _sb) ((struct ocfs2_dir_block_trailer *) ((_bh)->b_data + ocfs2_dir_trailer_blk_off((_sb))))
10187d35a74SMark Fasheh
102c175a518SJoel Becker /* XXX ocfs2_block_dqtrailer() is similar but not quite - can we make
103c175a518SJoel Becker * them more consistent? */
ocfs2_dir_trailer_from_size(int blocksize,void * data)104c175a518SJoel Becker struct ocfs2_dir_block_trailer *ocfs2_dir_trailer_from_size(int blocksize,
105c175a518SJoel Becker void *data)
106c175a518SJoel Becker {
107c175a518SJoel Becker char *p = data;
108c175a518SJoel Becker
109c175a518SJoel Becker p += blocksize - sizeof(struct ocfs2_dir_block_trailer);
110c175a518SJoel Becker return (struct ocfs2_dir_block_trailer *)p;
111c175a518SJoel Becker }
112c175a518SJoel Becker
11387d35a74SMark Fasheh /*
11487d35a74SMark Fasheh * XXX: This is executed once on every dirent. We should consider optimizing
11587d35a74SMark Fasheh * it.
11687d35a74SMark Fasheh */
ocfs2_skip_dir_trailer(struct inode * dir,struct ocfs2_dir_entry * de,unsigned long offset,unsigned long blklen)11787d35a74SMark Fasheh static int ocfs2_skip_dir_trailer(struct inode *dir,
11887d35a74SMark Fasheh struct ocfs2_dir_entry *de,
11987d35a74SMark Fasheh unsigned long offset,
12087d35a74SMark Fasheh unsigned long blklen)
12187d35a74SMark Fasheh {
12287d35a74SMark Fasheh unsigned long toff = blklen - sizeof(struct ocfs2_dir_block_trailer);
12387d35a74SMark Fasheh
124e7c17e43SMark Fasheh if (!ocfs2_supports_dir_trailer(dir))
12587d35a74SMark Fasheh return 0;
12687d35a74SMark Fasheh
12787d35a74SMark Fasheh if (offset != toff)
12887d35a74SMark Fasheh return 0;
12987d35a74SMark Fasheh
13087d35a74SMark Fasheh return 1;
13187d35a74SMark Fasheh }
13287d35a74SMark Fasheh
ocfs2_init_dir_trailer(struct inode * inode,struct buffer_head * bh,u16 rec_len)13387d35a74SMark Fasheh static void ocfs2_init_dir_trailer(struct inode *inode,
134e7c17e43SMark Fasheh struct buffer_head *bh, u16 rec_len)
13587d35a74SMark Fasheh {
13687d35a74SMark Fasheh struct ocfs2_dir_block_trailer *trailer;
13787d35a74SMark Fasheh
13887d35a74SMark Fasheh trailer = ocfs2_trailer_from_bh(bh, inode->i_sb);
13987d35a74SMark Fasheh strcpy(trailer->db_signature, OCFS2_DIR_TRAILER_SIGNATURE);
14087d35a74SMark Fasheh trailer->db_compat_rec_len =
14187d35a74SMark Fasheh cpu_to_le16(sizeof(struct ocfs2_dir_block_trailer));
14287d35a74SMark Fasheh trailer->db_parent_dinode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
14387d35a74SMark Fasheh trailer->db_blkno = cpu_to_le64(bh->b_blocknr);
144e7c17e43SMark Fasheh trailer->db_free_rec_len = cpu_to_le16(rec_len);
145e7c17e43SMark Fasheh }
146e7c17e43SMark Fasheh /*
147e7c17e43SMark Fasheh * Link an unindexed block with a dir trailer structure into the index free
148e7c17e43SMark Fasheh * list. This function will modify dirdata_bh, but assumes you've already
149e7c17e43SMark Fasheh * passed it to the journal.
150e7c17e43SMark Fasheh */
ocfs2_dx_dir_link_trailer(struct inode * dir,handle_t * handle,struct buffer_head * dx_root_bh,struct buffer_head * dirdata_bh)151e7c17e43SMark Fasheh static int ocfs2_dx_dir_link_trailer(struct inode *dir, handle_t *handle,
152e7c17e43SMark Fasheh struct buffer_head *dx_root_bh,
153e7c17e43SMark Fasheh struct buffer_head *dirdata_bh)
154e7c17e43SMark Fasheh {
155e7c17e43SMark Fasheh int ret;
156e7c17e43SMark Fasheh struct ocfs2_dx_root_block *dx_root;
157e7c17e43SMark Fasheh struct ocfs2_dir_block_trailer *trailer;
158e7c17e43SMark Fasheh
1590cf2f763SJoel Becker ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
160e7c17e43SMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
161e7c17e43SMark Fasheh if (ret) {
162e7c17e43SMark Fasheh mlog_errno(ret);
163e7c17e43SMark Fasheh goto out;
164e7c17e43SMark Fasheh }
165e7c17e43SMark Fasheh trailer = ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
166e7c17e43SMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
167e7c17e43SMark Fasheh
168e7c17e43SMark Fasheh trailer->db_free_next = dx_root->dr_free_blk;
169e7c17e43SMark Fasheh dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
170e7c17e43SMark Fasheh
171e7c17e43SMark Fasheh ocfs2_journal_dirty(handle, dx_root_bh);
172e7c17e43SMark Fasheh
173e7c17e43SMark Fasheh out:
174e7c17e43SMark Fasheh return ret;
175e7c17e43SMark Fasheh }
176e7c17e43SMark Fasheh
ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result * res)177e7c17e43SMark Fasheh static int ocfs2_free_list_at_root(struct ocfs2_dir_lookup_result *res)
178e7c17e43SMark Fasheh {
179e7c17e43SMark Fasheh return res->dl_prev_leaf_bh == NULL;
18087d35a74SMark Fasheh }
18187d35a74SMark Fasheh
ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result * res)1824a12ca3aSMark Fasheh void ocfs2_free_dir_lookup_result(struct ocfs2_dir_lookup_result *res)
1834a12ca3aSMark Fasheh {
1844ed8a6bbSMark Fasheh brelse(res->dl_dx_root_bh);
1854a12ca3aSMark Fasheh brelse(res->dl_leaf_bh);
1869b7895efSMark Fasheh brelse(res->dl_dx_leaf_bh);
187e7c17e43SMark Fasheh brelse(res->dl_prev_leaf_bh);
1889b7895efSMark Fasheh }
1899b7895efSMark Fasheh
ocfs2_dir_indexed(struct inode * inode)1909b7895efSMark Fasheh static int ocfs2_dir_indexed(struct inode *inode)
1919b7895efSMark Fasheh {
1929b7895efSMark Fasheh if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INDEXED_DIR_FL)
1939b7895efSMark Fasheh return 1;
1949b7895efSMark Fasheh return 0;
1959b7895efSMark Fasheh }
1969b7895efSMark Fasheh
ocfs2_dx_root_inline(struct ocfs2_dx_root_block * dx_root)1974ed8a6bbSMark Fasheh static inline int ocfs2_dx_root_inline(struct ocfs2_dx_root_block *dx_root)
1984ed8a6bbSMark Fasheh {
1994ed8a6bbSMark Fasheh return dx_root->dr_flags & OCFS2_DX_FLAG_INLINE;
2004ed8a6bbSMark Fasheh }
2014ed8a6bbSMark Fasheh
2029b7895efSMark Fasheh /*
2039b7895efSMark Fasheh * Hashing code adapted from ext3
2049b7895efSMark Fasheh */
2059b7895efSMark Fasheh #define DELTA 0x9E3779B9
2069b7895efSMark Fasheh
TEA_transform(__u32 buf[4],__u32 const in[])2079b7895efSMark Fasheh static void TEA_transform(__u32 buf[4], __u32 const in[])
2089b7895efSMark Fasheh {
2099b7895efSMark Fasheh __u32 sum = 0;
2109b7895efSMark Fasheh __u32 b0 = buf[0], b1 = buf[1];
2119b7895efSMark Fasheh __u32 a = in[0], b = in[1], c = in[2], d = in[3];
2129b7895efSMark Fasheh int n = 16;
2139b7895efSMark Fasheh
2149b7895efSMark Fasheh do {
2159b7895efSMark Fasheh sum += DELTA;
2169b7895efSMark Fasheh b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
2179b7895efSMark Fasheh b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
2189b7895efSMark Fasheh } while (--n);
2199b7895efSMark Fasheh
2209b7895efSMark Fasheh buf[0] += b0;
2219b7895efSMark Fasheh buf[1] += b1;
2229b7895efSMark Fasheh }
2239b7895efSMark Fasheh
str2hashbuf(const char * msg,int len,__u32 * buf,int num)2249b7895efSMark Fasheh static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
2259b7895efSMark Fasheh {
2269b7895efSMark Fasheh __u32 pad, val;
2279b7895efSMark Fasheh int i;
2289b7895efSMark Fasheh
2299b7895efSMark Fasheh pad = (__u32)len | ((__u32)len << 8);
2309b7895efSMark Fasheh pad |= pad << 16;
2319b7895efSMark Fasheh
2329b7895efSMark Fasheh val = pad;
2339b7895efSMark Fasheh if (len > num*4)
2349b7895efSMark Fasheh len = num * 4;
2359b7895efSMark Fasheh for (i = 0; i < len; i++) {
2369b7895efSMark Fasheh if ((i % 4) == 0)
2379b7895efSMark Fasheh val = pad;
2389b7895efSMark Fasheh val = msg[i] + (val << 8);
2399b7895efSMark Fasheh if ((i % 4) == 3) {
2409b7895efSMark Fasheh *buf++ = val;
2419b7895efSMark Fasheh val = pad;
2429b7895efSMark Fasheh num--;
2439b7895efSMark Fasheh }
2449b7895efSMark Fasheh }
2459b7895efSMark Fasheh if (--num >= 0)
2469b7895efSMark Fasheh *buf++ = val;
2479b7895efSMark Fasheh while (--num >= 0)
2489b7895efSMark Fasheh *buf++ = pad;
2499b7895efSMark Fasheh }
2509b7895efSMark Fasheh
ocfs2_dx_dir_name_hash(struct inode * dir,const char * name,int len,struct ocfs2_dx_hinfo * hinfo)2519b7895efSMark Fasheh static void ocfs2_dx_dir_name_hash(struct inode *dir, const char *name, int len,
2529b7895efSMark Fasheh struct ocfs2_dx_hinfo *hinfo)
2539b7895efSMark Fasheh {
2549b7895efSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
2559b7895efSMark Fasheh const char *p;
2569b7895efSMark Fasheh __u32 in[8], buf[4];
2579b7895efSMark Fasheh
2589b7895efSMark Fasheh /*
2599b7895efSMark Fasheh * XXX: Is this really necessary, if the index is never looked
2609b7895efSMark Fasheh * at by readdir? Is a hash value of '0' a bad idea?
2619b7895efSMark Fasheh */
2629b7895efSMark Fasheh if ((len == 1 && !strncmp(".", name, 1)) ||
2639b7895efSMark Fasheh (len == 2 && !strncmp("..", name, 2))) {
2649b7895efSMark Fasheh buf[0] = buf[1] = 0;
2659b7895efSMark Fasheh goto out;
2669b7895efSMark Fasheh }
2679b7895efSMark Fasheh
2689b7895efSMark Fasheh #ifdef OCFS2_DEBUG_DX_DIRS
2699b7895efSMark Fasheh /*
2709b7895efSMark Fasheh * This makes it very easy to debug indexing problems. We
2719b7895efSMark Fasheh * should never allow this to be selected without hand editing
2729b7895efSMark Fasheh * this file though.
2739b7895efSMark Fasheh */
2749b7895efSMark Fasheh buf[0] = buf[1] = len;
2759b7895efSMark Fasheh goto out;
2769b7895efSMark Fasheh #endif
2779b7895efSMark Fasheh
2789b7895efSMark Fasheh memcpy(buf, osb->osb_dx_seed, sizeof(buf));
2799b7895efSMark Fasheh
2809b7895efSMark Fasheh p = name;
2819b7895efSMark Fasheh while (len > 0) {
2829b7895efSMark Fasheh str2hashbuf(p, len, in, 4);
2839b7895efSMark Fasheh TEA_transform(buf, in);
2849b7895efSMark Fasheh len -= 16;
2859b7895efSMark Fasheh p += 16;
2869b7895efSMark Fasheh }
2879b7895efSMark Fasheh
2889b7895efSMark Fasheh out:
2899b7895efSMark Fasheh hinfo->major_hash = buf[0];
2909b7895efSMark Fasheh hinfo->minor_hash = buf[1];
2914a12ca3aSMark Fasheh }
2924a12ca3aSMark Fasheh
29387d35a74SMark Fasheh /*
29423193e51SMark Fasheh * bh passed here can be an inode block or a dir data block, depending
29523193e51SMark Fasheh * on the inode inline data flag.
29623193e51SMark Fasheh */
ocfs2_check_dir_entry(struct inode * dir,struct ocfs2_dir_entry * de,struct buffer_head * bh,char * buf,unsigned int size,unsigned long offset)2975eae5b96SMark Fasheh static int ocfs2_check_dir_entry(struct inode *dir,
298316f4b9fSMark Fasheh struct ocfs2_dir_entry *de,
299316f4b9fSMark Fasheh struct buffer_head *bh,
300e05a2428Slei lu char *buf,
301e05a2428Slei lu unsigned int size,
302316f4b9fSMark Fasheh unsigned long offset)
303316f4b9fSMark Fasheh {
304316f4b9fSMark Fasheh const char *error_msg = NULL;
305316f4b9fSMark Fasheh const int rlen = le16_to_cpu(de->rec_len);
306e05a2428Slei lu const unsigned long next_offset = ((char *) de - buf) + rlen;
307316f4b9fSMark Fasheh
3081dd9ffc8STao Ma if (unlikely(rlen < OCFS2_DIR_REC_LEN(1)))
309316f4b9fSMark Fasheh error_msg = "rec_len is smaller than minimal";
3101dd9ffc8STao Ma else if (unlikely(rlen % 4 != 0))
311316f4b9fSMark Fasheh error_msg = "rec_len % 4 != 0";
3121dd9ffc8STao Ma else if (unlikely(rlen < OCFS2_DIR_REC_LEN(de->name_len)))
313316f4b9fSMark Fasheh error_msg = "rec_len is too small for name_len";
314e05a2428Slei lu else if (unlikely(next_offset > size))
315e05a2428Slei lu error_msg = "directory entry overrun";
316e05a2428Slei lu else if (unlikely(next_offset > size - OCFS2_DIR_REC_LEN(1)) &&
317e05a2428Slei lu next_offset != size)
318e05a2428Slei lu error_msg = "directory entry too close to end";
319316f4b9fSMark Fasheh
3201dd9ffc8STao Ma if (unlikely(error_msg != NULL))
321316f4b9fSMark Fasheh mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
322316f4b9fSMark Fasheh "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
323316f4b9fSMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
324316f4b9fSMark Fasheh offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
325316f4b9fSMark Fasheh de->name_len);
3261dd9ffc8STao Ma
327316f4b9fSMark Fasheh return error_msg == NULL ? 1 : 0;
328316f4b9fSMark Fasheh }
329316f4b9fSMark Fasheh
ocfs2_match(int len,const char * const name,struct ocfs2_dir_entry * de)330316f4b9fSMark Fasheh static inline int ocfs2_match(int len,
331316f4b9fSMark Fasheh const char * const name,
332316f4b9fSMark Fasheh struct ocfs2_dir_entry *de)
333316f4b9fSMark Fasheh {
334316f4b9fSMark Fasheh if (len != de->name_len)
335316f4b9fSMark Fasheh return 0;
336316f4b9fSMark Fasheh if (!de->inode)
337316f4b9fSMark Fasheh return 0;
338316f4b9fSMark Fasheh return !memcmp(name, de->name, len);
339316f4b9fSMark Fasheh }
340316f4b9fSMark Fasheh
341316f4b9fSMark Fasheh /*
342316f4b9fSMark Fasheh * Returns 0 if not found, -1 on failure, and 1 on success
343316f4b9fSMark Fasheh */
ocfs2_search_dirblock(struct buffer_head * bh,struct inode * dir,const char * name,int namelen,unsigned long offset,char * first_de,unsigned int bytes,struct ocfs2_dir_entry ** res_dir)34442b16b3fSJesper Juhl static inline int ocfs2_search_dirblock(struct buffer_head *bh,
345316f4b9fSMark Fasheh struct inode *dir,
346316f4b9fSMark Fasheh const char *name, int namelen,
347316f4b9fSMark Fasheh unsigned long offset,
34823193e51SMark Fasheh char *first_de,
34923193e51SMark Fasheh unsigned int bytes,
350316f4b9fSMark Fasheh struct ocfs2_dir_entry **res_dir)
351316f4b9fSMark Fasheh {
352316f4b9fSMark Fasheh struct ocfs2_dir_entry *de;
353316f4b9fSMark Fasheh char *dlimit, *de_buf;
354316f4b9fSMark Fasheh int de_len;
355316f4b9fSMark Fasheh int ret = 0;
356316f4b9fSMark Fasheh
35723193e51SMark Fasheh de_buf = first_de;
35823193e51SMark Fasheh dlimit = de_buf + bytes;
359316f4b9fSMark Fasheh
360e05a2428Slei lu while (de_buf < dlimit - OCFS2_DIR_MEMBER_LEN) {
361316f4b9fSMark Fasheh /* this code is executed quadratically often */
362316f4b9fSMark Fasheh /* do minimal checking `by hand' */
363316f4b9fSMark Fasheh
364316f4b9fSMark Fasheh de = (struct ocfs2_dir_entry *) de_buf;
365316f4b9fSMark Fasheh
366e05a2428Slei lu if (de->name + namelen <= dlimit &&
367316f4b9fSMark Fasheh ocfs2_match(namelen, name, de)) {
368316f4b9fSMark Fasheh /* found a match - just to be sure, do a full check */
369e05a2428Slei lu if (!ocfs2_check_dir_entry(dir, de, bh, first_de,
370e05a2428Slei lu bytes, offset)) {
371316f4b9fSMark Fasheh ret = -1;
372316f4b9fSMark Fasheh goto bail;
373316f4b9fSMark Fasheh }
374316f4b9fSMark Fasheh *res_dir = de;
375316f4b9fSMark Fasheh ret = 1;
376316f4b9fSMark Fasheh goto bail;
377316f4b9fSMark Fasheh }
378316f4b9fSMark Fasheh
379316f4b9fSMark Fasheh /* prevent looping on a bad block */
380316f4b9fSMark Fasheh de_len = le16_to_cpu(de->rec_len);
381316f4b9fSMark Fasheh if (de_len <= 0) {
382316f4b9fSMark Fasheh ret = -1;
383316f4b9fSMark Fasheh goto bail;
384316f4b9fSMark Fasheh }
385316f4b9fSMark Fasheh
386316f4b9fSMark Fasheh de_buf += de_len;
387316f4b9fSMark Fasheh offset += de_len;
388316f4b9fSMark Fasheh }
389316f4b9fSMark Fasheh
390316f4b9fSMark Fasheh bail:
391f1088d47STao Ma trace_ocfs2_search_dirblock(ret);
392316f4b9fSMark Fasheh return ret;
393316f4b9fSMark Fasheh }
394316f4b9fSMark Fasheh
ocfs2_find_entry_id(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_entry ** res_dir)39523193e51SMark Fasheh static struct buffer_head *ocfs2_find_entry_id(const char *name,
39623193e51SMark Fasheh int namelen,
39723193e51SMark Fasheh struct inode *dir,
39823193e51SMark Fasheh struct ocfs2_dir_entry **res_dir)
39923193e51SMark Fasheh {
40023193e51SMark Fasheh int ret, found;
40123193e51SMark Fasheh struct buffer_head *di_bh = NULL;
40223193e51SMark Fasheh struct ocfs2_dinode *di;
40323193e51SMark Fasheh struct ocfs2_inline_data *data;
40423193e51SMark Fasheh
405b657c95cSJoel Becker ret = ocfs2_read_inode_block(dir, &di_bh);
40623193e51SMark Fasheh if (ret) {
40723193e51SMark Fasheh mlog_errno(ret);
40823193e51SMark Fasheh goto out;
40923193e51SMark Fasheh }
41023193e51SMark Fasheh
41123193e51SMark Fasheh di = (struct ocfs2_dinode *)di_bh->b_data;
41223193e51SMark Fasheh data = &di->id2.i_data;
41323193e51SMark Fasheh
41423193e51SMark Fasheh found = ocfs2_search_dirblock(di_bh, dir, name, namelen, 0,
41523193e51SMark Fasheh data->id_data, i_size_read(dir), res_dir);
41623193e51SMark Fasheh if (found == 1)
41723193e51SMark Fasheh return di_bh;
41823193e51SMark Fasheh
41923193e51SMark Fasheh brelse(di_bh);
42023193e51SMark Fasheh out:
42123193e51SMark Fasheh return NULL;
42223193e51SMark Fasheh }
42323193e51SMark Fasheh
ocfs2_validate_dir_block(struct super_block * sb,struct buffer_head * bh)424a22305ccSJoel Becker static int ocfs2_validate_dir_block(struct super_block *sb,
425a22305ccSJoel Becker struct buffer_head *bh)
426a22305ccSJoel Becker {
427c175a518SJoel Becker int rc;
428c175a518SJoel Becker struct ocfs2_dir_block_trailer *trailer =
429c175a518SJoel Becker ocfs2_trailer_from_bh(bh, sb);
430c175a518SJoel Becker
431c175a518SJoel Becker
432a22305ccSJoel Becker /*
433c175a518SJoel Becker * We don't validate dirents here, that's handled
434a22305ccSJoel Becker * in-place when the code walks them.
435a22305ccSJoel Becker */
436f1088d47STao Ma trace_ocfs2_validate_dir_block((unsigned long long)bh->b_blocknr);
437a22305ccSJoel Becker
438c175a518SJoel Becker BUG_ON(!buffer_uptodate(bh));
439c175a518SJoel Becker
440c175a518SJoel Becker /*
441c175a518SJoel Becker * If the ecc fails, we return the error but otherwise
442c175a518SJoel Becker * leave the filesystem running. We know any error is
443c175a518SJoel Becker * local to this block.
444c175a518SJoel Becker *
445c175a518SJoel Becker * Note that we are safe to call this even if the directory
446c175a518SJoel Becker * doesn't have a trailer. Filesystems without metaecc will do
447c175a518SJoel Becker * nothing, and filesystems with it will have one.
448c175a518SJoel Becker */
449c175a518SJoel Becker rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &trailer->db_check);
450c175a518SJoel Becker if (rc)
451c175a518SJoel Becker mlog(ML_ERROR, "Checksum failed for dinode %llu\n",
452c175a518SJoel Becker (unsigned long long)bh->b_blocknr);
453c175a518SJoel Becker
454c175a518SJoel Becker return rc;
455a22305ccSJoel Becker }
456a22305ccSJoel Becker
457a22305ccSJoel Becker /*
4589b7895efSMark Fasheh * Validate a directory trailer.
4599b7895efSMark Fasheh *
4609b7895efSMark Fasheh * We check the trailer here rather than in ocfs2_validate_dir_block()
4619b7895efSMark Fasheh * because that function doesn't have the inode to test.
4629b7895efSMark Fasheh */
ocfs2_check_dir_trailer(struct inode * dir,struct buffer_head * bh)4639b7895efSMark Fasheh static int ocfs2_check_dir_trailer(struct inode *dir, struct buffer_head *bh)
4649b7895efSMark Fasheh {
4659b7895efSMark Fasheh int rc = 0;
4669b7895efSMark Fasheh struct ocfs2_dir_block_trailer *trailer;
4679b7895efSMark Fasheh
4689b7895efSMark Fasheh trailer = ocfs2_trailer_from_bh(bh, dir->i_sb);
4699b7895efSMark Fasheh if (!OCFS2_IS_VALID_DIR_TRAILER(trailer)) {
47017a5b9abSGoldwyn Rodrigues rc = ocfs2_error(dir->i_sb,
4717ecef14aSJoe Perches "Invalid dirblock #%llu: signature = %.*s\n",
4729b7895efSMark Fasheh (unsigned long long)bh->b_blocknr, 7,
4739b7895efSMark Fasheh trailer->db_signature);
4749b7895efSMark Fasheh goto out;
4759b7895efSMark Fasheh }
4769b7895efSMark Fasheh if (le64_to_cpu(trailer->db_blkno) != bh->b_blocknr) {
47717a5b9abSGoldwyn Rodrigues rc = ocfs2_error(dir->i_sb,
4787ecef14aSJoe Perches "Directory block #%llu has an invalid db_blkno of %llu\n",
4799b7895efSMark Fasheh (unsigned long long)bh->b_blocknr,
4809b7895efSMark Fasheh (unsigned long long)le64_to_cpu(trailer->db_blkno));
4819b7895efSMark Fasheh goto out;
4829b7895efSMark Fasheh }
4839b7895efSMark Fasheh if (le64_to_cpu(trailer->db_parent_dinode) !=
4849b7895efSMark Fasheh OCFS2_I(dir)->ip_blkno) {
48517a5b9abSGoldwyn Rodrigues rc = ocfs2_error(dir->i_sb,
4867ecef14aSJoe Perches "Directory block #%llu on dinode #%llu has an invalid parent_dinode of %llu\n",
4879b7895efSMark Fasheh (unsigned long long)bh->b_blocknr,
4889b7895efSMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno,
4899b7895efSMark Fasheh (unsigned long long)le64_to_cpu(trailer->db_blkno));
4909b7895efSMark Fasheh goto out;
4919b7895efSMark Fasheh }
4929b7895efSMark Fasheh out:
4939b7895efSMark Fasheh return rc;
4949b7895efSMark Fasheh }
4959b7895efSMark Fasheh
4969b7895efSMark Fasheh /*
497a22305ccSJoel Becker * This function forces all errors to -EIO for consistency with its
498a22305ccSJoel Becker * predecessor, ocfs2_bread(). We haven't audited what returning the
499a22305ccSJoel Becker * real error codes would do to callers. We log the real codes with
500a22305ccSJoel Becker * mlog_errno() before we squash them.
501a22305ccSJoel Becker */
ocfs2_read_dir_block(struct inode * inode,u64 v_block,struct buffer_head ** bh,int flags)502a22305ccSJoel Becker static int ocfs2_read_dir_block(struct inode *inode, u64 v_block,
503a22305ccSJoel Becker struct buffer_head **bh, int flags)
504a22305ccSJoel Becker {
505a22305ccSJoel Becker int rc = 0;
506a22305ccSJoel Becker struct buffer_head *tmp = *bh;
507a22305ccSJoel Becker
508511308d9SJoel Becker rc = ocfs2_read_virt_blocks(inode, v_block, 1, &tmp, flags,
509970e4936SJoel Becker ocfs2_validate_dir_block);
51087d35a74SMark Fasheh if (rc) {
511a22305ccSJoel Becker mlog_errno(rc);
51287d35a74SMark Fasheh goto out;
51387d35a74SMark Fasheh }
51487d35a74SMark Fasheh
51587d35a74SMark Fasheh if (!(flags & OCFS2_BH_READAHEAD) &&
516e7c17e43SMark Fasheh ocfs2_supports_dir_trailer(inode)) {
5179b7895efSMark Fasheh rc = ocfs2_check_dir_trailer(inode, tmp);
5189b7895efSMark Fasheh if (rc) {
5199b7895efSMark Fasheh if (!*bh)
5209b7895efSMark Fasheh brelse(tmp);
5219b7895efSMark Fasheh mlog_errno(rc);
52287d35a74SMark Fasheh goto out;
52387d35a74SMark Fasheh }
52487d35a74SMark Fasheh }
525a22305ccSJoel Becker
526511308d9SJoel Becker /* If ocfs2_read_virt_blocks() got us a new bh, pass it up. */
52787d35a74SMark Fasheh if (!*bh)
528a22305ccSJoel Becker *bh = tmp;
529a22305ccSJoel Becker
53087d35a74SMark Fasheh out:
531a22305ccSJoel Becker return rc ? -EIO : 0;
532a22305ccSJoel Becker }
533a22305ccSJoel Becker
5349b7895efSMark Fasheh /*
5359b7895efSMark Fasheh * Read the block at 'phys' which belongs to this directory
5369b7895efSMark Fasheh * inode. This function does no virtual->physical block translation -
5379b7895efSMark Fasheh * what's passed in is assumed to be a valid directory block.
5389b7895efSMark Fasheh */
ocfs2_read_dir_block_direct(struct inode * dir,u64 phys,struct buffer_head ** bh)5399b7895efSMark Fasheh static int ocfs2_read_dir_block_direct(struct inode *dir, u64 phys,
5409b7895efSMark Fasheh struct buffer_head **bh)
5419b7895efSMark Fasheh {
5429b7895efSMark Fasheh int ret;
5439b7895efSMark Fasheh struct buffer_head *tmp = *bh;
5449b7895efSMark Fasheh
5458cb471e8SJoel Becker ret = ocfs2_read_block(INODE_CACHE(dir), phys, &tmp,
5468cb471e8SJoel Becker ocfs2_validate_dir_block);
5479b7895efSMark Fasheh if (ret) {
5489b7895efSMark Fasheh mlog_errno(ret);
5499b7895efSMark Fasheh goto out;
5509b7895efSMark Fasheh }
5519b7895efSMark Fasheh
5529b7895efSMark Fasheh if (ocfs2_supports_dir_trailer(dir)) {
5539b7895efSMark Fasheh ret = ocfs2_check_dir_trailer(dir, tmp);
5549b7895efSMark Fasheh if (ret) {
5559b7895efSMark Fasheh if (!*bh)
5569b7895efSMark Fasheh brelse(tmp);
5579b7895efSMark Fasheh mlog_errno(ret);
5589b7895efSMark Fasheh goto out;
5599b7895efSMark Fasheh }
5609b7895efSMark Fasheh }
5619b7895efSMark Fasheh
5629b7895efSMark Fasheh if (!ret && !*bh)
5639b7895efSMark Fasheh *bh = tmp;
5649b7895efSMark Fasheh out:
5659b7895efSMark Fasheh return ret;
5669b7895efSMark Fasheh }
5679b7895efSMark Fasheh
ocfs2_validate_dx_root(struct super_block * sb,struct buffer_head * bh)5689b7895efSMark Fasheh static int ocfs2_validate_dx_root(struct super_block *sb,
5699b7895efSMark Fasheh struct buffer_head *bh)
5709b7895efSMark Fasheh {
5719b7895efSMark Fasheh int ret;
5729b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root;
5739b7895efSMark Fasheh
5749b7895efSMark Fasheh BUG_ON(!buffer_uptodate(bh));
5759b7895efSMark Fasheh
5769b7895efSMark Fasheh dx_root = (struct ocfs2_dx_root_block *) bh->b_data;
5779b7895efSMark Fasheh
5789b7895efSMark Fasheh ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_root->dr_check);
5799b7895efSMark Fasheh if (ret) {
5809b7895efSMark Fasheh mlog(ML_ERROR,
5819b7895efSMark Fasheh "Checksum failed for dir index root block %llu\n",
5829b7895efSMark Fasheh (unsigned long long)bh->b_blocknr);
5839b7895efSMark Fasheh return ret;
5849b7895efSMark Fasheh }
5859b7895efSMark Fasheh
5869b7895efSMark Fasheh if (!OCFS2_IS_VALID_DX_ROOT(dx_root)) {
58717a5b9abSGoldwyn Rodrigues ret = ocfs2_error(sb,
5887ecef14aSJoe Perches "Dir Index Root # %llu has bad signature %.*s\n",
5899b7895efSMark Fasheh (unsigned long long)le64_to_cpu(dx_root->dr_blkno),
5909b7895efSMark Fasheh 7, dx_root->dr_signature);
5919b7895efSMark Fasheh }
5929b7895efSMark Fasheh
59317a5b9abSGoldwyn Rodrigues return ret;
5949b7895efSMark Fasheh }
5959b7895efSMark Fasheh
ocfs2_read_dx_root(struct inode * dir,struct ocfs2_dinode * di,struct buffer_head ** dx_root_bh)5969b7895efSMark Fasheh static int ocfs2_read_dx_root(struct inode *dir, struct ocfs2_dinode *di,
5979b7895efSMark Fasheh struct buffer_head **dx_root_bh)
5989b7895efSMark Fasheh {
5999b7895efSMark Fasheh int ret;
6009b7895efSMark Fasheh u64 blkno = le64_to_cpu(di->i_dx_root);
6019b7895efSMark Fasheh struct buffer_head *tmp = *dx_root_bh;
6029b7895efSMark Fasheh
6038cb471e8SJoel Becker ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
6048cb471e8SJoel Becker ocfs2_validate_dx_root);
6059b7895efSMark Fasheh
6069b7895efSMark Fasheh /* If ocfs2_read_block() got us a new bh, pass it up. */
6079b7895efSMark Fasheh if (!ret && !*dx_root_bh)
6089b7895efSMark Fasheh *dx_root_bh = tmp;
6099b7895efSMark Fasheh
6109b7895efSMark Fasheh return ret;
6119b7895efSMark Fasheh }
6129b7895efSMark Fasheh
ocfs2_validate_dx_leaf(struct super_block * sb,struct buffer_head * bh)6139b7895efSMark Fasheh static int ocfs2_validate_dx_leaf(struct super_block *sb,
6149b7895efSMark Fasheh struct buffer_head *bh)
6159b7895efSMark Fasheh {
6169b7895efSMark Fasheh int ret;
6179b7895efSMark Fasheh struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)bh->b_data;
6189b7895efSMark Fasheh
6199b7895efSMark Fasheh BUG_ON(!buffer_uptodate(bh));
6209b7895efSMark Fasheh
6219b7895efSMark Fasheh ret = ocfs2_validate_meta_ecc(sb, bh->b_data, &dx_leaf->dl_check);
6229b7895efSMark Fasheh if (ret) {
6239b7895efSMark Fasheh mlog(ML_ERROR,
6249b7895efSMark Fasheh "Checksum failed for dir index leaf block %llu\n",
6259b7895efSMark Fasheh (unsigned long long)bh->b_blocknr);
6269b7895efSMark Fasheh return ret;
6279b7895efSMark Fasheh }
6289b7895efSMark Fasheh
6299b7895efSMark Fasheh if (!OCFS2_IS_VALID_DX_LEAF(dx_leaf)) {
6307ecef14aSJoe Perches ret = ocfs2_error(sb, "Dir Index Leaf has bad signature %.*s\n",
6319b7895efSMark Fasheh 7, dx_leaf->dl_signature);
6329b7895efSMark Fasheh }
6339b7895efSMark Fasheh
63417a5b9abSGoldwyn Rodrigues return ret;
6359b7895efSMark Fasheh }
6369b7895efSMark Fasheh
ocfs2_read_dx_leaf(struct inode * dir,u64 blkno,struct buffer_head ** dx_leaf_bh)6379b7895efSMark Fasheh static int ocfs2_read_dx_leaf(struct inode *dir, u64 blkno,
6389b7895efSMark Fasheh struct buffer_head **dx_leaf_bh)
6399b7895efSMark Fasheh {
6409b7895efSMark Fasheh int ret;
6419b7895efSMark Fasheh struct buffer_head *tmp = *dx_leaf_bh;
6429b7895efSMark Fasheh
6438cb471e8SJoel Becker ret = ocfs2_read_block(INODE_CACHE(dir), blkno, &tmp,
6448cb471e8SJoel Becker ocfs2_validate_dx_leaf);
6459b7895efSMark Fasheh
6469b7895efSMark Fasheh /* If ocfs2_read_block() got us a new bh, pass it up. */
6479b7895efSMark Fasheh if (!ret && !*dx_leaf_bh)
6489b7895efSMark Fasheh *dx_leaf_bh = tmp;
6499b7895efSMark Fasheh
6509b7895efSMark Fasheh return ret;
6519b7895efSMark Fasheh }
6529b7895efSMark Fasheh
6539b7895efSMark Fasheh /*
6549b7895efSMark Fasheh * Read a series of dx_leaf blocks. This expects all buffer_head
6559b7895efSMark Fasheh * pointers to be NULL on function entry.
6569b7895efSMark Fasheh */
ocfs2_read_dx_leaves(struct inode * dir,u64 start,int num,struct buffer_head ** dx_leaf_bhs)6579b7895efSMark Fasheh static int ocfs2_read_dx_leaves(struct inode *dir, u64 start, int num,
6589b7895efSMark Fasheh struct buffer_head **dx_leaf_bhs)
6599b7895efSMark Fasheh {
6609b7895efSMark Fasheh int ret;
6619b7895efSMark Fasheh
6628cb471e8SJoel Becker ret = ocfs2_read_blocks(INODE_CACHE(dir), start, num, dx_leaf_bhs, 0,
6639b7895efSMark Fasheh ocfs2_validate_dx_leaf);
6649b7895efSMark Fasheh if (ret)
6659b7895efSMark Fasheh mlog_errno(ret);
6669b7895efSMark Fasheh
6679b7895efSMark Fasheh return ret;
6689b7895efSMark Fasheh }
6699b7895efSMark Fasheh
ocfs2_find_entry_el(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_entry ** res_dir)6700af4bd38SAdrian Bunk static struct buffer_head *ocfs2_find_entry_el(const char *name, int namelen,
671316f4b9fSMark Fasheh struct inode *dir,
672316f4b9fSMark Fasheh struct ocfs2_dir_entry **res_dir)
673316f4b9fSMark Fasheh {
674316f4b9fSMark Fasheh struct super_block *sb;
675316f4b9fSMark Fasheh struct buffer_head *bh_use[NAMEI_RA_SIZE];
676316f4b9fSMark Fasheh struct buffer_head *bh, *ret = NULL;
677316f4b9fSMark Fasheh unsigned long start, block, b;
678316f4b9fSMark Fasheh int ra_max = 0; /* Number of bh's in the readahead
679316f4b9fSMark Fasheh buffer, bh_use[] */
680316f4b9fSMark Fasheh int ra_ptr = 0; /* Current index into readahead
681316f4b9fSMark Fasheh buffer */
682316f4b9fSMark Fasheh int num = 0;
6831a5692e4SAlex Shi int nblocks, i;
684316f4b9fSMark Fasheh
685316f4b9fSMark Fasheh sb = dir->i_sb;
686316f4b9fSMark Fasheh
687316f4b9fSMark Fasheh nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
688316f4b9fSMark Fasheh start = OCFS2_I(dir)->ip_dir_start_lookup;
689316f4b9fSMark Fasheh if (start >= nblocks)
690316f4b9fSMark Fasheh start = 0;
691316f4b9fSMark Fasheh block = start;
692316f4b9fSMark Fasheh
693316f4b9fSMark Fasheh restart:
694316f4b9fSMark Fasheh do {
695316f4b9fSMark Fasheh /*
696316f4b9fSMark Fasheh * We deal with the read-ahead logic here.
697316f4b9fSMark Fasheh */
698316f4b9fSMark Fasheh if (ra_ptr >= ra_max) {
699316f4b9fSMark Fasheh /* Refill the readahead buffer */
700316f4b9fSMark Fasheh ra_ptr = 0;
701316f4b9fSMark Fasheh b = block;
702316f4b9fSMark Fasheh for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
703316f4b9fSMark Fasheh /*
704316f4b9fSMark Fasheh * Terminate if we reach the end of the
705316f4b9fSMark Fasheh * directory and must wrap, or if our
706316f4b9fSMark Fasheh * search has finished at this block.
707316f4b9fSMark Fasheh */
708316f4b9fSMark Fasheh if (b >= nblocks || (num && block == start)) {
709316f4b9fSMark Fasheh bh_use[ra_max] = NULL;
710316f4b9fSMark Fasheh break;
711316f4b9fSMark Fasheh }
712316f4b9fSMark Fasheh num++;
713316f4b9fSMark Fasheh
714a22305ccSJoel Becker bh = NULL;
7151a5692e4SAlex Shi ocfs2_read_dir_block(dir, b++, &bh,
716a22305ccSJoel Becker OCFS2_BH_READAHEAD);
717316f4b9fSMark Fasheh bh_use[ra_max] = bh;
718316f4b9fSMark Fasheh }
719316f4b9fSMark Fasheh }
720316f4b9fSMark Fasheh if ((bh = bh_use[ra_ptr++]) == NULL)
721316f4b9fSMark Fasheh goto next;
722a22305ccSJoel Becker if (ocfs2_read_dir_block(dir, block, &bh, 0)) {
7235e0b3decSJoel Becker /* read error, skip block & hope for the best.
724a22305ccSJoel Becker * ocfs2_read_dir_block() has released the bh. */
72561fb9ea4Sjiangyiwen mlog(ML_ERROR, "reading directory %llu, "
726316f4b9fSMark Fasheh "offset %lu\n",
727316f4b9fSMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno,
728316f4b9fSMark Fasheh block);
729316f4b9fSMark Fasheh goto next;
730316f4b9fSMark Fasheh }
731316f4b9fSMark Fasheh i = ocfs2_search_dirblock(bh, dir, name, namelen,
732316f4b9fSMark Fasheh block << sb->s_blocksize_bits,
73323193e51SMark Fasheh bh->b_data, sb->s_blocksize,
734316f4b9fSMark Fasheh res_dir);
735316f4b9fSMark Fasheh if (i == 1) {
736316f4b9fSMark Fasheh OCFS2_I(dir)->ip_dir_start_lookup = block;
737316f4b9fSMark Fasheh ret = bh;
738316f4b9fSMark Fasheh goto cleanup_and_exit;
739316f4b9fSMark Fasheh } else {
740316f4b9fSMark Fasheh brelse(bh);
741316f4b9fSMark Fasheh if (i < 0)
742316f4b9fSMark Fasheh goto cleanup_and_exit;
743316f4b9fSMark Fasheh }
744316f4b9fSMark Fasheh next:
745316f4b9fSMark Fasheh if (++block >= nblocks)
746316f4b9fSMark Fasheh block = 0;
747316f4b9fSMark Fasheh } while (block != start);
748316f4b9fSMark Fasheh
749316f4b9fSMark Fasheh /*
750316f4b9fSMark Fasheh * If the directory has grown while we were searching, then
751316f4b9fSMark Fasheh * search the last part of the directory before giving up.
752316f4b9fSMark Fasheh */
753316f4b9fSMark Fasheh block = nblocks;
754316f4b9fSMark Fasheh nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
755316f4b9fSMark Fasheh if (block < nblocks) {
756316f4b9fSMark Fasheh start = 0;
757316f4b9fSMark Fasheh goto restart;
758316f4b9fSMark Fasheh }
759316f4b9fSMark Fasheh
760316f4b9fSMark Fasheh cleanup_and_exit:
761316f4b9fSMark Fasheh /* Clean up the read-ahead blocks */
762316f4b9fSMark Fasheh for (; ra_ptr < ra_max; ra_ptr++)
763316f4b9fSMark Fasheh brelse(bh_use[ra_ptr]);
764316f4b9fSMark Fasheh
765f1088d47STao Ma trace_ocfs2_find_entry_el(ret);
766316f4b9fSMark Fasheh return ret;
767316f4b9fSMark Fasheh }
768316f4b9fSMark Fasheh
ocfs2_dx_dir_lookup_rec(struct inode * inode,struct ocfs2_extent_list * el,u32 major_hash,u32 * ret_cpos,u64 * ret_phys_blkno,unsigned int * ret_clen)7699b7895efSMark Fasheh static int ocfs2_dx_dir_lookup_rec(struct inode *inode,
7709b7895efSMark Fasheh struct ocfs2_extent_list *el,
7719b7895efSMark Fasheh u32 major_hash,
7729b7895efSMark Fasheh u32 *ret_cpos,
7739b7895efSMark Fasheh u64 *ret_phys_blkno,
7749b7895efSMark Fasheh unsigned int *ret_clen)
7759b7895efSMark Fasheh {
7769b7895efSMark Fasheh int ret = 0, i, found;
7779b7895efSMark Fasheh struct buffer_head *eb_bh = NULL;
7789b7895efSMark Fasheh struct ocfs2_extent_block *eb;
7799b7895efSMark Fasheh struct ocfs2_extent_rec *rec = NULL;
7809b7895efSMark Fasheh
7819b7895efSMark Fasheh if (el->l_tree_depth) {
782facdb77fSJoel Becker ret = ocfs2_find_leaf(INODE_CACHE(inode), el, major_hash,
783facdb77fSJoel Becker &eb_bh);
7849b7895efSMark Fasheh if (ret) {
7859b7895efSMark Fasheh mlog_errno(ret);
7869b7895efSMark Fasheh goto out;
7879b7895efSMark Fasheh }
7889b7895efSMark Fasheh
7899b7895efSMark Fasheh eb = (struct ocfs2_extent_block *) eb_bh->b_data;
7909b7895efSMark Fasheh el = &eb->h_list;
7919b7895efSMark Fasheh
7929b7895efSMark Fasheh if (el->l_tree_depth) {
79317a5b9abSGoldwyn Rodrigues ret = ocfs2_error(inode->i_sb,
7947ecef14aSJoe Perches "Inode %lu has non zero tree depth in btree tree block %llu\n",
7957ecef14aSJoe Perches inode->i_ino,
7969b7895efSMark Fasheh (unsigned long long)eb_bh->b_blocknr);
7979b7895efSMark Fasheh goto out;
7989b7895efSMark Fasheh }
7999b7895efSMark Fasheh }
8009b7895efSMark Fasheh
8019b7895efSMark Fasheh found = 0;
8029b7895efSMark Fasheh for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
8039b7895efSMark Fasheh rec = &el->l_recs[i];
8049b7895efSMark Fasheh
8059b7895efSMark Fasheh if (le32_to_cpu(rec->e_cpos) <= major_hash) {
8069b7895efSMark Fasheh found = 1;
8079b7895efSMark Fasheh break;
8089b7895efSMark Fasheh }
8099b7895efSMark Fasheh }
8109b7895efSMark Fasheh
8119b7895efSMark Fasheh if (!found) {
8127ecef14aSJoe Perches ret = ocfs2_error(inode->i_sb,
8137ecef14aSJoe Perches "Inode %lu has bad extent record (%u, %u, 0) in btree\n",
8147ecef14aSJoe Perches inode->i_ino,
8159b7895efSMark Fasheh le32_to_cpu(rec->e_cpos),
8169b7895efSMark Fasheh ocfs2_rec_clusters(el, rec));
8179b7895efSMark Fasheh goto out;
8189b7895efSMark Fasheh }
8199b7895efSMark Fasheh
8209b7895efSMark Fasheh if (ret_phys_blkno)
8219b7895efSMark Fasheh *ret_phys_blkno = le64_to_cpu(rec->e_blkno);
8229b7895efSMark Fasheh if (ret_cpos)
8239b7895efSMark Fasheh *ret_cpos = le32_to_cpu(rec->e_cpos);
8249b7895efSMark Fasheh if (ret_clen)
8259b7895efSMark Fasheh *ret_clen = le16_to_cpu(rec->e_leaf_clusters);
8269b7895efSMark Fasheh
8279b7895efSMark Fasheh out:
8289b7895efSMark Fasheh brelse(eb_bh);
8299b7895efSMark Fasheh return ret;
8309b7895efSMark Fasheh }
8319b7895efSMark Fasheh
8329b7895efSMark Fasheh /*
8339b7895efSMark Fasheh * Returns the block index, from the start of the cluster which this
8349b7895efSMark Fasheh * hash belongs too.
8359b7895efSMark Fasheh */
__ocfs2_dx_dir_hash_idx(struct ocfs2_super * osb,u32 minor_hash)8364ed8a6bbSMark Fasheh static inline unsigned int __ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
8374ed8a6bbSMark Fasheh u32 minor_hash)
8384ed8a6bbSMark Fasheh {
8394ed8a6bbSMark Fasheh return minor_hash & osb->osb_dx_mask;
8404ed8a6bbSMark Fasheh }
8414ed8a6bbSMark Fasheh
ocfs2_dx_dir_hash_idx(struct ocfs2_super * osb,struct ocfs2_dx_hinfo * hinfo)8424ed8a6bbSMark Fasheh static inline unsigned int ocfs2_dx_dir_hash_idx(struct ocfs2_super *osb,
8439b7895efSMark Fasheh struct ocfs2_dx_hinfo *hinfo)
8449b7895efSMark Fasheh {
8454ed8a6bbSMark Fasheh return __ocfs2_dx_dir_hash_idx(osb, hinfo->minor_hash);
8469b7895efSMark Fasheh }
8479b7895efSMark Fasheh
ocfs2_dx_dir_lookup(struct inode * inode,struct ocfs2_extent_list * el,struct ocfs2_dx_hinfo * hinfo,u32 * ret_cpos,u64 * ret_phys_blkno)8489b7895efSMark Fasheh static int ocfs2_dx_dir_lookup(struct inode *inode,
8499b7895efSMark Fasheh struct ocfs2_extent_list *el,
8509b7895efSMark Fasheh struct ocfs2_dx_hinfo *hinfo,
8519b7895efSMark Fasheh u32 *ret_cpos,
8529b7895efSMark Fasheh u64 *ret_phys_blkno)
8539b7895efSMark Fasheh {
8549b7895efSMark Fasheh int ret = 0;
8553f649ab7SKees Cook unsigned int cend, clen;
8563f649ab7SKees Cook u32 cpos;
8573f649ab7SKees Cook u64 blkno;
8589b7895efSMark Fasheh u32 name_hash = hinfo->major_hash;
8599b7895efSMark Fasheh
8609b7895efSMark Fasheh ret = ocfs2_dx_dir_lookup_rec(inode, el, name_hash, &cpos, &blkno,
8619b7895efSMark Fasheh &clen);
8629b7895efSMark Fasheh if (ret) {
8639b7895efSMark Fasheh mlog_errno(ret);
8649b7895efSMark Fasheh goto out;
8659b7895efSMark Fasheh }
8669b7895efSMark Fasheh
8679b7895efSMark Fasheh cend = cpos + clen;
8689b7895efSMark Fasheh if (name_hash >= cend) {
8699b7895efSMark Fasheh /* We want the last cluster */
8709b7895efSMark Fasheh blkno += ocfs2_clusters_to_blocks(inode->i_sb, clen - 1);
8719b7895efSMark Fasheh cpos += clen - 1;
8729b7895efSMark Fasheh } else {
8739b7895efSMark Fasheh blkno += ocfs2_clusters_to_blocks(inode->i_sb,
8749b7895efSMark Fasheh name_hash - cpos);
8759b7895efSMark Fasheh cpos = name_hash;
8769b7895efSMark Fasheh }
8779b7895efSMark Fasheh
8789b7895efSMark Fasheh /*
8799b7895efSMark Fasheh * We now have the cluster which should hold our entry. To
8809b7895efSMark Fasheh * find the exact block from the start of the cluster to
8819b7895efSMark Fasheh * search, we take the lower bits of the hash.
8829b7895efSMark Fasheh */
8839b7895efSMark Fasheh blkno += ocfs2_dx_dir_hash_idx(OCFS2_SB(inode->i_sb), hinfo);
8849b7895efSMark Fasheh
8859b7895efSMark Fasheh if (ret_phys_blkno)
8869b7895efSMark Fasheh *ret_phys_blkno = blkno;
8879b7895efSMark Fasheh if (ret_cpos)
8889b7895efSMark Fasheh *ret_cpos = cpos;
8899b7895efSMark Fasheh
8909b7895efSMark Fasheh out:
8919b7895efSMark Fasheh
8929b7895efSMark Fasheh return ret;
8939b7895efSMark Fasheh }
8949b7895efSMark Fasheh
ocfs2_dx_dir_search(const char * name,int namelen,struct inode * dir,struct ocfs2_dx_root_block * dx_root,struct ocfs2_dir_lookup_result * res)8959b7895efSMark Fasheh static int ocfs2_dx_dir_search(const char *name, int namelen,
8969b7895efSMark Fasheh struct inode *dir,
8974ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root,
8989b7895efSMark Fasheh struct ocfs2_dir_lookup_result *res)
8999b7895efSMark Fasheh {
9009b7895efSMark Fasheh int ret, i, found;
9013f649ab7SKees Cook u64 phys;
9029b7895efSMark Fasheh struct buffer_head *dx_leaf_bh = NULL;
9039b7895efSMark Fasheh struct ocfs2_dx_leaf *dx_leaf;
9049b7895efSMark Fasheh struct ocfs2_dx_entry *dx_entry = NULL;
9059b7895efSMark Fasheh struct buffer_head *dir_ent_bh = NULL;
9069b7895efSMark Fasheh struct ocfs2_dir_entry *dir_ent = NULL;
9079b7895efSMark Fasheh struct ocfs2_dx_hinfo *hinfo = &res->dl_hinfo;
9084ed8a6bbSMark Fasheh struct ocfs2_extent_list *dr_el;
9094ed8a6bbSMark Fasheh struct ocfs2_dx_entry_list *entry_list;
9109b7895efSMark Fasheh
9119b7895efSMark Fasheh ocfs2_dx_dir_name_hash(dir, name, namelen, &res->dl_hinfo);
9129b7895efSMark Fasheh
9134ed8a6bbSMark Fasheh if (ocfs2_dx_root_inline(dx_root)) {
9144ed8a6bbSMark Fasheh entry_list = &dx_root->dr_entries;
9154ed8a6bbSMark Fasheh goto search;
9164ed8a6bbSMark Fasheh }
9174ed8a6bbSMark Fasheh
9184ed8a6bbSMark Fasheh dr_el = &dx_root->dr_list;
9194ed8a6bbSMark Fasheh
9209b7895efSMark Fasheh ret = ocfs2_dx_dir_lookup(dir, dr_el, hinfo, NULL, &phys);
9219b7895efSMark Fasheh if (ret) {
9229b7895efSMark Fasheh mlog_errno(ret);
9239b7895efSMark Fasheh goto out;
9249b7895efSMark Fasheh }
9259b7895efSMark Fasheh
926f1088d47STao Ma trace_ocfs2_dx_dir_search((unsigned long long)OCFS2_I(dir)->ip_blkno,
927f1088d47STao Ma namelen, name, hinfo->major_hash,
928f1088d47STao Ma hinfo->minor_hash, (unsigned long long)phys);
9299b7895efSMark Fasheh
9309b7895efSMark Fasheh ret = ocfs2_read_dx_leaf(dir, phys, &dx_leaf_bh);
9319b7895efSMark Fasheh if (ret) {
9329b7895efSMark Fasheh mlog_errno(ret);
9339b7895efSMark Fasheh goto out;
9349b7895efSMark Fasheh }
9359b7895efSMark Fasheh
9369b7895efSMark Fasheh dx_leaf = (struct ocfs2_dx_leaf *) dx_leaf_bh->b_data;
9379b7895efSMark Fasheh
938f1088d47STao Ma trace_ocfs2_dx_dir_search_leaf_info(
9399b7895efSMark Fasheh le16_to_cpu(dx_leaf->dl_list.de_num_used),
9409b7895efSMark Fasheh le16_to_cpu(dx_leaf->dl_list.de_count));
9419b7895efSMark Fasheh
9424ed8a6bbSMark Fasheh entry_list = &dx_leaf->dl_list;
9434ed8a6bbSMark Fasheh
9444ed8a6bbSMark Fasheh search:
9459b7895efSMark Fasheh /*
9469b7895efSMark Fasheh * Empty leaf is legal, so no need to check for that.
9479b7895efSMark Fasheh */
9489b7895efSMark Fasheh found = 0;
9494ed8a6bbSMark Fasheh for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
9504ed8a6bbSMark Fasheh dx_entry = &entry_list->de_entries[i];
9519b7895efSMark Fasheh
9529b7895efSMark Fasheh if (hinfo->major_hash != le32_to_cpu(dx_entry->dx_major_hash)
9539b7895efSMark Fasheh || hinfo->minor_hash != le32_to_cpu(dx_entry->dx_minor_hash))
9549b7895efSMark Fasheh continue;
9559b7895efSMark Fasheh
9569b7895efSMark Fasheh /*
9579b7895efSMark Fasheh * Search unindexed leaf block now. We're not
9589b7895efSMark Fasheh * guaranteed to find anything.
9599b7895efSMark Fasheh */
9609b7895efSMark Fasheh ret = ocfs2_read_dir_block_direct(dir,
9619b7895efSMark Fasheh le64_to_cpu(dx_entry->dx_dirent_blk),
9629b7895efSMark Fasheh &dir_ent_bh);
9639b7895efSMark Fasheh if (ret) {
9649b7895efSMark Fasheh mlog_errno(ret);
9659b7895efSMark Fasheh goto out;
9669b7895efSMark Fasheh }
9679b7895efSMark Fasheh
9689b7895efSMark Fasheh /*
9699b7895efSMark Fasheh * XXX: We should check the unindexed block here,
9709b7895efSMark Fasheh * before using it.
9719b7895efSMark Fasheh */
9729b7895efSMark Fasheh
9739b7895efSMark Fasheh found = ocfs2_search_dirblock(dir_ent_bh, dir, name, namelen,
9749b7895efSMark Fasheh 0, dir_ent_bh->b_data,
9759b7895efSMark Fasheh dir->i_sb->s_blocksize, &dir_ent);
9769b7895efSMark Fasheh if (found == 1)
9779b7895efSMark Fasheh break;
9789b7895efSMark Fasheh
9799b7895efSMark Fasheh if (found == -1) {
9809b7895efSMark Fasheh /* This means we found a bad directory entry. */
9819b7895efSMark Fasheh ret = -EIO;
9829b7895efSMark Fasheh mlog_errno(ret);
9839b7895efSMark Fasheh goto out;
9849b7895efSMark Fasheh }
9859b7895efSMark Fasheh
9869b7895efSMark Fasheh brelse(dir_ent_bh);
9879b7895efSMark Fasheh dir_ent_bh = NULL;
9889b7895efSMark Fasheh }
9899b7895efSMark Fasheh
9909b7895efSMark Fasheh if (found <= 0) {
9919b7895efSMark Fasheh ret = -ENOENT;
9929b7895efSMark Fasheh goto out;
9939b7895efSMark Fasheh }
9949b7895efSMark Fasheh
9959b7895efSMark Fasheh res->dl_leaf_bh = dir_ent_bh;
9969b7895efSMark Fasheh res->dl_entry = dir_ent;
9979b7895efSMark Fasheh res->dl_dx_leaf_bh = dx_leaf_bh;
9989b7895efSMark Fasheh res->dl_dx_entry = dx_entry;
9999b7895efSMark Fasheh
10009b7895efSMark Fasheh ret = 0;
10019b7895efSMark Fasheh out:
10029b7895efSMark Fasheh if (ret) {
10039b7895efSMark Fasheh brelse(dx_leaf_bh);
10049b7895efSMark Fasheh brelse(dir_ent_bh);
10059b7895efSMark Fasheh }
10069b7895efSMark Fasheh return ret;
10079b7895efSMark Fasheh }
10089b7895efSMark Fasheh
ocfs2_find_entry_dx(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_lookup_result * lookup)10099b7895efSMark Fasheh static int ocfs2_find_entry_dx(const char *name, int namelen,
10109b7895efSMark Fasheh struct inode *dir,
10119b7895efSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
10129b7895efSMark Fasheh {
10139b7895efSMark Fasheh int ret;
10149b7895efSMark Fasheh struct buffer_head *di_bh = NULL;
10159b7895efSMark Fasheh struct ocfs2_dinode *di;
10169b7895efSMark Fasheh struct buffer_head *dx_root_bh = NULL;
10179b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root;
10189b7895efSMark Fasheh
10199b7895efSMark Fasheh ret = ocfs2_read_inode_block(dir, &di_bh);
10209b7895efSMark Fasheh if (ret) {
10219b7895efSMark Fasheh mlog_errno(ret);
10229b7895efSMark Fasheh goto out;
10239b7895efSMark Fasheh }
10249b7895efSMark Fasheh
10259b7895efSMark Fasheh di = (struct ocfs2_dinode *)di_bh->b_data;
10269b7895efSMark Fasheh
10279b7895efSMark Fasheh ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
10289b7895efSMark Fasheh if (ret) {
10299b7895efSMark Fasheh mlog_errno(ret);
10309b7895efSMark Fasheh goto out;
10319b7895efSMark Fasheh }
10329b7895efSMark Fasheh dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
10339b7895efSMark Fasheh
10344ed8a6bbSMark Fasheh ret = ocfs2_dx_dir_search(name, namelen, dir, dx_root, lookup);
10359b7895efSMark Fasheh if (ret) {
10369b7895efSMark Fasheh if (ret != -ENOENT)
10379b7895efSMark Fasheh mlog_errno(ret);
10389b7895efSMark Fasheh goto out;
10399b7895efSMark Fasheh }
10409b7895efSMark Fasheh
10414ed8a6bbSMark Fasheh lookup->dl_dx_root_bh = dx_root_bh;
10424ed8a6bbSMark Fasheh dx_root_bh = NULL;
10439b7895efSMark Fasheh out:
10449b7895efSMark Fasheh brelse(di_bh);
10459b7895efSMark Fasheh brelse(dx_root_bh);
10469b7895efSMark Fasheh return ret;
10479b7895efSMark Fasheh }
10489b7895efSMark Fasheh
104923193e51SMark Fasheh /*
105023193e51SMark Fasheh * Try to find an entry of the provided name within 'dir'.
105123193e51SMark Fasheh *
10524a12ca3aSMark Fasheh * If nothing was found, -ENOENT is returned. Otherwise, zero is
10534a12ca3aSMark Fasheh * returned and the struct 'res' will contain information useful to
10544a12ca3aSMark Fasheh * other directory manipulation functions.
105523193e51SMark Fasheh *
105623193e51SMark Fasheh * Caller can NOT assume anything about the contents of the
10579b7895efSMark Fasheh * buffer_heads - they are passed back only so that it can be passed
10589b7895efSMark Fasheh * into any one of the manipulation functions (add entry, delete
10599b7895efSMark Fasheh * entry, etc). As an example, bh in the extent directory case is a
10609b7895efSMark Fasheh * data block, in the inline-data case it actually points to an inode,
10619b7895efSMark Fasheh * in the indexed directory case, multiple buffers are involved.
106223193e51SMark Fasheh */
ocfs2_find_entry(const char * name,int namelen,struct inode * dir,struct ocfs2_dir_lookup_result * lookup)10634a12ca3aSMark Fasheh int ocfs2_find_entry(const char *name, int namelen,
10644a12ca3aSMark Fasheh struct inode *dir, struct ocfs2_dir_lookup_result *lookup)
106523193e51SMark Fasheh {
10664a12ca3aSMark Fasheh struct buffer_head *bh;
10674a12ca3aSMark Fasheh struct ocfs2_dir_entry *res_dir = NULL;
1068*94459962SSu Yue int ret = 0;
106923193e51SMark Fasheh
10709b7895efSMark Fasheh if (ocfs2_dir_indexed(dir))
10719b7895efSMark Fasheh return ocfs2_find_entry_dx(name, namelen, dir, lookup);
10729b7895efSMark Fasheh
1073*94459962SSu Yue if (unlikely(i_size_read(dir) <= 0)) {
1074*94459962SSu Yue ret = -EFSCORRUPTED;
1075*94459962SSu Yue mlog_errno(ret);
1076*94459962SSu Yue goto out;
1077*94459962SSu Yue }
10789b7895efSMark Fasheh /*
10799b7895efSMark Fasheh * The unindexed dir code only uses part of the lookup
10809b7895efSMark Fasheh * structure, so there's no reason to push it down further
10819b7895efSMark Fasheh * than this.
10829b7895efSMark Fasheh */
1083*94459962SSu Yue if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1084*94459962SSu Yue if (unlikely(i_size_read(dir) > dir->i_sb->s_blocksize)) {
1085*94459962SSu Yue ret = -EFSCORRUPTED;
1086*94459962SSu Yue mlog_errno(ret);
1087*94459962SSu Yue goto out;
1088*94459962SSu Yue }
10894a12ca3aSMark Fasheh bh = ocfs2_find_entry_id(name, namelen, dir, &res_dir);
1090*94459962SSu Yue } else {
10914a12ca3aSMark Fasheh bh = ocfs2_find_entry_el(name, namelen, dir, &res_dir);
1092*94459962SSu Yue }
109323193e51SMark Fasheh
10944a12ca3aSMark Fasheh if (bh == NULL)
10954a12ca3aSMark Fasheh return -ENOENT;
10964a12ca3aSMark Fasheh
10974a12ca3aSMark Fasheh lookup->dl_leaf_bh = bh;
10984a12ca3aSMark Fasheh lookup->dl_entry = res_dir;
1099*94459962SSu Yue out:
1100*94459962SSu Yue return ret;
110123193e51SMark Fasheh }
110223193e51SMark Fasheh
11035b6a3a2bSMark Fasheh /*
11045b6a3a2bSMark Fasheh * Update inode number and type of a previously found directory entry.
11055b6a3a2bSMark Fasheh */
ocfs2_update_entry(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * res,struct inode * new_entry_inode)110638760e24SMark Fasheh int ocfs2_update_entry(struct inode *dir, handle_t *handle,
11074a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result *res,
110838760e24SMark Fasheh struct inode *new_entry_inode)
110938760e24SMark Fasheh {
111038760e24SMark Fasheh int ret;
111113723d00SJoel Becker ocfs2_journal_access_func access = ocfs2_journal_access_db;
11124a12ca3aSMark Fasheh struct ocfs2_dir_entry *de = res->dl_entry;
11134a12ca3aSMark Fasheh struct buffer_head *de_bh = res->dl_leaf_bh;
111438760e24SMark Fasheh
11155b6a3a2bSMark Fasheh /*
11165b6a3a2bSMark Fasheh * The same code works fine for both inline-data and extent
111713723d00SJoel Becker * based directories, so no need to split this up. The only
111813723d00SJoel Becker * difference is the journal_access function.
11195b6a3a2bSMark Fasheh */
11205b6a3a2bSMark Fasheh
112113723d00SJoel Becker if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
112213723d00SJoel Becker access = ocfs2_journal_access_di;
112313723d00SJoel Becker
11240cf2f763SJoel Becker ret = access(handle, INODE_CACHE(dir), de_bh,
11250cf2f763SJoel Becker OCFS2_JOURNAL_ACCESS_WRITE);
112638760e24SMark Fasheh if (ret) {
112738760e24SMark Fasheh mlog_errno(ret);
112838760e24SMark Fasheh goto out;
112938760e24SMark Fasheh }
113038760e24SMark Fasheh
113138760e24SMark Fasheh de->inode = cpu_to_le64(OCFS2_I(new_entry_inode)->ip_blkno);
113238760e24SMark Fasheh ocfs2_set_de_type(de, new_entry_inode->i_mode);
113338760e24SMark Fasheh
113438760e24SMark Fasheh ocfs2_journal_dirty(handle, de_bh);
113538760e24SMark Fasheh
113638760e24SMark Fasheh out:
113738760e24SMark Fasheh return ret;
113838760e24SMark Fasheh }
113938760e24SMark Fasheh
11409b7895efSMark Fasheh /*
11419b7895efSMark Fasheh * __ocfs2_delete_entry deletes a directory entry by merging it with the
11429b7895efSMark Fasheh * previous entry
11439b7895efSMark Fasheh */
__ocfs2_delete_entry(handle_t * handle,struct inode * dir,struct ocfs2_dir_entry * de_del,struct buffer_head * bh,char * first_de,unsigned int bytes)11445b6a3a2bSMark Fasheh static int __ocfs2_delete_entry(handle_t *handle, struct inode *dir,
1145316f4b9fSMark Fasheh struct ocfs2_dir_entry *de_del,
11465b6a3a2bSMark Fasheh struct buffer_head *bh, char *first_de,
11475b6a3a2bSMark Fasheh unsigned int bytes)
1148316f4b9fSMark Fasheh {
1149316f4b9fSMark Fasheh struct ocfs2_dir_entry *de, *pde;
1150316f4b9fSMark Fasheh int i, status = -ENOENT;
115113723d00SJoel Becker ocfs2_journal_access_func access = ocfs2_journal_access_db;
1152316f4b9fSMark Fasheh
115313723d00SJoel Becker if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
115413723d00SJoel Becker access = ocfs2_journal_access_di;
115513723d00SJoel Becker
1156316f4b9fSMark Fasheh i = 0;
1157316f4b9fSMark Fasheh pde = NULL;
11585b6a3a2bSMark Fasheh de = (struct ocfs2_dir_entry *) first_de;
11595b6a3a2bSMark Fasheh while (i < bytes) {
1160e05a2428Slei lu if (!ocfs2_check_dir_entry(dir, de, bh, first_de, bytes, i)) {
1161316f4b9fSMark Fasheh status = -EIO;
1162316f4b9fSMark Fasheh mlog_errno(status);
1163316f4b9fSMark Fasheh goto bail;
1164316f4b9fSMark Fasheh }
1165316f4b9fSMark Fasheh if (de == de_del) {
11660cf2f763SJoel Becker status = access(handle, INODE_CACHE(dir), bh,
1167316f4b9fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
1168316f4b9fSMark Fasheh if (status < 0) {
1169316f4b9fSMark Fasheh status = -EIO;
1170316f4b9fSMark Fasheh mlog_errno(status);
1171316f4b9fSMark Fasheh goto bail;
1172316f4b9fSMark Fasheh }
1173316f4b9fSMark Fasheh if (pde)
11740dd3256eSMarcin Slusarz le16_add_cpu(&pde->rec_len,
1175316f4b9fSMark Fasheh le16_to_cpu(de->rec_len));
1176316f4b9fSMark Fasheh de->inode = 0;
1177cc56c33eSJeff Layton inode_inc_iversion(dir);
1178ec20cec7SJoel Becker ocfs2_journal_dirty(handle, bh);
1179316f4b9fSMark Fasheh goto bail;
1180316f4b9fSMark Fasheh }
1181316f4b9fSMark Fasheh i += le16_to_cpu(de->rec_len);
1182316f4b9fSMark Fasheh pde = de;
1183316f4b9fSMark Fasheh de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1184316f4b9fSMark Fasheh }
1185316f4b9fSMark Fasheh bail:
1186316f4b9fSMark Fasheh return status;
1187316f4b9fSMark Fasheh }
1188316f4b9fSMark Fasheh
ocfs2_figure_dirent_hole(struct ocfs2_dir_entry * de)1189e7c17e43SMark Fasheh static unsigned int ocfs2_figure_dirent_hole(struct ocfs2_dir_entry *de)
1190e7c17e43SMark Fasheh {
1191e7c17e43SMark Fasheh unsigned int hole;
1192e7c17e43SMark Fasheh
1193e7c17e43SMark Fasheh if (le64_to_cpu(de->inode) == 0)
1194e7c17e43SMark Fasheh hole = le16_to_cpu(de->rec_len);
1195e7c17e43SMark Fasheh else
1196e7c17e43SMark Fasheh hole = le16_to_cpu(de->rec_len) -
1197e7c17e43SMark Fasheh OCFS2_DIR_REC_LEN(de->name_len);
1198e7c17e43SMark Fasheh
1199e7c17e43SMark Fasheh return hole;
1200e7c17e43SMark Fasheh }
1201e7c17e43SMark Fasheh
ocfs2_find_max_rec_len(struct super_block * sb,struct buffer_head * dirblock_bh)1202e7c17e43SMark Fasheh static int ocfs2_find_max_rec_len(struct super_block *sb,
1203e7c17e43SMark Fasheh struct buffer_head *dirblock_bh)
1204e7c17e43SMark Fasheh {
1205e7c17e43SMark Fasheh int size, this_hole, largest_hole = 0;
1206e7c17e43SMark Fasheh char *trailer, *de_buf, *limit, *start = dirblock_bh->b_data;
1207e7c17e43SMark Fasheh struct ocfs2_dir_entry *de;
1208e7c17e43SMark Fasheh
1209e7c17e43SMark Fasheh trailer = (char *)ocfs2_trailer_from_bh(dirblock_bh, sb);
1210e7c17e43SMark Fasheh size = ocfs2_dir_trailer_blk_off(sb);
1211e7c17e43SMark Fasheh limit = start + size;
1212e7c17e43SMark Fasheh de_buf = start;
1213e7c17e43SMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
1214e7c17e43SMark Fasheh do {
1215e7c17e43SMark Fasheh if (de_buf != trailer) {
1216e7c17e43SMark Fasheh this_hole = ocfs2_figure_dirent_hole(de);
1217e7c17e43SMark Fasheh if (this_hole > largest_hole)
1218e7c17e43SMark Fasheh largest_hole = this_hole;
1219e7c17e43SMark Fasheh }
1220e7c17e43SMark Fasheh
1221e7c17e43SMark Fasheh de_buf += le16_to_cpu(de->rec_len);
1222e7c17e43SMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
1223e7c17e43SMark Fasheh } while (de_buf < limit);
1224e7c17e43SMark Fasheh
1225e7c17e43SMark Fasheh if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
1226e7c17e43SMark Fasheh return largest_hole;
1227e7c17e43SMark Fasheh return 0;
1228e7c17e43SMark Fasheh }
1229e7c17e43SMark Fasheh
ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list * entry_list,int index)12304ed8a6bbSMark Fasheh static void ocfs2_dx_list_remove_entry(struct ocfs2_dx_entry_list *entry_list,
12314ed8a6bbSMark Fasheh int index)
12329b7895efSMark Fasheh {
12334ed8a6bbSMark Fasheh int num_used = le16_to_cpu(entry_list->de_num_used);
12349b7895efSMark Fasheh
12359b7895efSMark Fasheh if (num_used == 1 || index == (num_used - 1))
12369b7895efSMark Fasheh goto clear;
12379b7895efSMark Fasheh
12384ed8a6bbSMark Fasheh memmove(&entry_list->de_entries[index],
12394ed8a6bbSMark Fasheh &entry_list->de_entries[index + 1],
12409b7895efSMark Fasheh (num_used - index - 1)*sizeof(struct ocfs2_dx_entry));
12419b7895efSMark Fasheh clear:
12429b7895efSMark Fasheh num_used--;
12434ed8a6bbSMark Fasheh memset(&entry_list->de_entries[num_used], 0,
12449b7895efSMark Fasheh sizeof(struct ocfs2_dx_entry));
12454ed8a6bbSMark Fasheh entry_list->de_num_used = cpu_to_le16(num_used);
12469b7895efSMark Fasheh }
12479b7895efSMark Fasheh
ocfs2_delete_entry_dx(handle_t * handle,struct inode * dir,struct ocfs2_dir_lookup_result * lookup)12489b7895efSMark Fasheh static int ocfs2_delete_entry_dx(handle_t *handle, struct inode *dir,
12499b7895efSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
12509b7895efSMark Fasheh {
1251e7c17e43SMark Fasheh int ret, index, max_rec_len, add_to_free_list = 0;
12524ed8a6bbSMark Fasheh struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
12539b7895efSMark Fasheh struct buffer_head *leaf_bh = lookup->dl_leaf_bh;
12549b7895efSMark Fasheh struct ocfs2_dx_leaf *dx_leaf;
12559b7895efSMark Fasheh struct ocfs2_dx_entry *dx_entry = lookup->dl_dx_entry;
1256e7c17e43SMark Fasheh struct ocfs2_dir_block_trailer *trailer;
12574ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root;
12584ed8a6bbSMark Fasheh struct ocfs2_dx_entry_list *entry_list;
12599b7895efSMark Fasheh
1260e7c17e43SMark Fasheh /*
1261e7c17e43SMark Fasheh * This function gets a bit messy because we might have to
1262e7c17e43SMark Fasheh * modify the root block, regardless of whether the indexed
1263e7c17e43SMark Fasheh * entries are stored inline.
1264e7c17e43SMark Fasheh */
1265e7c17e43SMark Fasheh
1266e7c17e43SMark Fasheh /*
1267e7c17e43SMark Fasheh * *Only* set 'entry_list' here, based on where we're looking
1268e7c17e43SMark Fasheh * for the indexed entries. Later, we might still want to
1269e7c17e43SMark Fasheh * journal both blocks, based on free list state.
1270e7c17e43SMark Fasheh */
12714ed8a6bbSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
12724ed8a6bbSMark Fasheh if (ocfs2_dx_root_inline(dx_root)) {
12734ed8a6bbSMark Fasheh entry_list = &dx_root->dr_entries;
12744ed8a6bbSMark Fasheh } else {
12759b7895efSMark Fasheh dx_leaf = (struct ocfs2_dx_leaf *) lookup->dl_dx_leaf_bh->b_data;
12764ed8a6bbSMark Fasheh entry_list = &dx_leaf->dl_list;
12774ed8a6bbSMark Fasheh }
12784ed8a6bbSMark Fasheh
12799b7895efSMark Fasheh /* Neither of these are a disk corruption - that should have
12809b7895efSMark Fasheh * been caught by lookup, before we got here. */
12814ed8a6bbSMark Fasheh BUG_ON(le16_to_cpu(entry_list->de_count) <= 0);
12824ed8a6bbSMark Fasheh BUG_ON(le16_to_cpu(entry_list->de_num_used) <= 0);
12839b7895efSMark Fasheh
12844ed8a6bbSMark Fasheh index = (char *)dx_entry - (char *)entry_list->de_entries;
12859b7895efSMark Fasheh index /= sizeof(*dx_entry);
12869b7895efSMark Fasheh
12874ed8a6bbSMark Fasheh if (index >= le16_to_cpu(entry_list->de_num_used)) {
12889b7895efSMark Fasheh mlog(ML_ERROR, "Dir %llu: Bad dx_entry ptr idx %d, (%p, %p)\n",
12894ed8a6bbSMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno, index,
12904ed8a6bbSMark Fasheh entry_list, dx_entry);
12919b7895efSMark Fasheh return -EIO;
12929b7895efSMark Fasheh }
12939b7895efSMark Fasheh
12949b7895efSMark Fasheh /*
1295e7c17e43SMark Fasheh * We know that removal of this dirent will leave enough room
1296e7c17e43SMark Fasheh * for a new one, so add this block to the free list if it
1297e7c17e43SMark Fasheh * isn't already there.
1298e7c17e43SMark Fasheh */
1299e7c17e43SMark Fasheh trailer = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
1300e7c17e43SMark Fasheh if (trailer->db_free_rec_len == 0)
1301e7c17e43SMark Fasheh add_to_free_list = 1;
1302e7c17e43SMark Fasheh
1303e7c17e43SMark Fasheh /*
13044ed8a6bbSMark Fasheh * Add the block holding our index into the journal before
13054ed8a6bbSMark Fasheh * removing the unindexed entry. If we get an error return
13064ed8a6bbSMark Fasheh * from __ocfs2_delete_entry(), then it hasn't removed the
13074ed8a6bbSMark Fasheh * entry yet. Likewise, successful return means we *must*
13084ed8a6bbSMark Fasheh * remove the indexed entry.
13094ed8a6bbSMark Fasheh *
1310e3a93c2dSMark Fasheh * We're also careful to journal the root tree block here as
1311e3a93c2dSMark Fasheh * the entry count needs to be updated. Also, we might be
1312e3a93c2dSMark Fasheh * adding to the start of the free list.
13139b7895efSMark Fasheh */
13140cf2f763SJoel Becker ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
13159b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
13169b7895efSMark Fasheh if (ret) {
13179b7895efSMark Fasheh mlog_errno(ret);
13189b7895efSMark Fasheh goto out;
13199b7895efSMark Fasheh }
1320e7c17e43SMark Fasheh
1321e7c17e43SMark Fasheh if (!ocfs2_dx_root_inline(dx_root)) {
13220cf2f763SJoel Becker ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
13234ed8a6bbSMark Fasheh lookup->dl_dx_leaf_bh,
13244ed8a6bbSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
13254ed8a6bbSMark Fasheh if (ret) {
13264ed8a6bbSMark Fasheh mlog_errno(ret);
13274ed8a6bbSMark Fasheh goto out;
13284ed8a6bbSMark Fasheh }
13294ed8a6bbSMark Fasheh }
13304ed8a6bbSMark Fasheh
1331f1088d47STao Ma trace_ocfs2_delete_entry_dx((unsigned long long)OCFS2_I(dir)->ip_blkno,
1332f1088d47STao Ma index);
13339b7895efSMark Fasheh
13349b7895efSMark Fasheh ret = __ocfs2_delete_entry(handle, dir, lookup->dl_entry,
13359b7895efSMark Fasheh leaf_bh, leaf_bh->b_data, leaf_bh->b_size);
13369b7895efSMark Fasheh if (ret) {
13379b7895efSMark Fasheh mlog_errno(ret);
13389b7895efSMark Fasheh goto out;
13399b7895efSMark Fasheh }
13409b7895efSMark Fasheh
1341e7c17e43SMark Fasheh max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, leaf_bh);
1342e7c17e43SMark Fasheh trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1343e7c17e43SMark Fasheh if (add_to_free_list) {
1344e7c17e43SMark Fasheh trailer->db_free_next = dx_root->dr_free_blk;
1345e7c17e43SMark Fasheh dx_root->dr_free_blk = cpu_to_le64(leaf_bh->b_blocknr);
1346e7c17e43SMark Fasheh ocfs2_journal_dirty(handle, dx_root_bh);
1347e7c17e43SMark Fasheh }
1348e7c17e43SMark Fasheh
1349e7c17e43SMark Fasheh /* leaf_bh was journal_accessed for us in __ocfs2_delete_entry */
1350e7c17e43SMark Fasheh ocfs2_journal_dirty(handle, leaf_bh);
1351e7c17e43SMark Fasheh
1352e3a93c2dSMark Fasheh le32_add_cpu(&dx_root->dr_num_entries, -1);
1353e3a93c2dSMark Fasheh ocfs2_journal_dirty(handle, dx_root_bh);
1354e3a93c2dSMark Fasheh
13554ed8a6bbSMark Fasheh ocfs2_dx_list_remove_entry(entry_list, index);
13569b7895efSMark Fasheh
1357e3a93c2dSMark Fasheh if (!ocfs2_dx_root_inline(dx_root))
13589b7895efSMark Fasheh ocfs2_journal_dirty(handle, lookup->dl_dx_leaf_bh);
13599b7895efSMark Fasheh
13609b7895efSMark Fasheh out:
13619b7895efSMark Fasheh return ret;
13629b7895efSMark Fasheh }
13639b7895efSMark Fasheh
ocfs2_delete_entry_id(handle_t * handle,struct inode * dir,struct ocfs2_dir_entry * de_del,struct buffer_head * bh)13645b6a3a2bSMark Fasheh static inline int ocfs2_delete_entry_id(handle_t *handle,
13655b6a3a2bSMark Fasheh struct inode *dir,
13665b6a3a2bSMark Fasheh struct ocfs2_dir_entry *de_del,
13675b6a3a2bSMark Fasheh struct buffer_head *bh)
13685b6a3a2bSMark Fasheh {
13695b6a3a2bSMark Fasheh int ret;
13705b6a3a2bSMark Fasheh struct buffer_head *di_bh = NULL;
13715b6a3a2bSMark Fasheh struct ocfs2_dinode *di;
13725b6a3a2bSMark Fasheh struct ocfs2_inline_data *data;
13735b6a3a2bSMark Fasheh
1374b657c95cSJoel Becker ret = ocfs2_read_inode_block(dir, &di_bh);
13755b6a3a2bSMark Fasheh if (ret) {
13765b6a3a2bSMark Fasheh mlog_errno(ret);
13775b6a3a2bSMark Fasheh goto out;
13785b6a3a2bSMark Fasheh }
13795b6a3a2bSMark Fasheh
13805b6a3a2bSMark Fasheh di = (struct ocfs2_dinode *)di_bh->b_data;
13815b6a3a2bSMark Fasheh data = &di->id2.i_data;
13825b6a3a2bSMark Fasheh
13835b6a3a2bSMark Fasheh ret = __ocfs2_delete_entry(handle, dir, de_del, bh, data->id_data,
13845b6a3a2bSMark Fasheh i_size_read(dir));
13855b6a3a2bSMark Fasheh
13865b6a3a2bSMark Fasheh brelse(di_bh);
13875b6a3a2bSMark Fasheh out:
13885b6a3a2bSMark Fasheh return ret;
13895b6a3a2bSMark Fasheh }
13905b6a3a2bSMark Fasheh
ocfs2_delete_entry_el(handle_t * handle,struct inode * dir,struct ocfs2_dir_entry * de_del,struct buffer_head * bh)13915b6a3a2bSMark Fasheh static inline int ocfs2_delete_entry_el(handle_t *handle,
13925b6a3a2bSMark Fasheh struct inode *dir,
13935b6a3a2bSMark Fasheh struct ocfs2_dir_entry *de_del,
13945b6a3a2bSMark Fasheh struct buffer_head *bh)
13955b6a3a2bSMark Fasheh {
13965b6a3a2bSMark Fasheh return __ocfs2_delete_entry(handle, dir, de_del, bh, bh->b_data,
13975b6a3a2bSMark Fasheh bh->b_size);
13985b6a3a2bSMark Fasheh }
13995b6a3a2bSMark Fasheh
14005b6a3a2bSMark Fasheh /*
14019b7895efSMark Fasheh * Delete a directory entry. Hide the details of directory
14029b7895efSMark Fasheh * implementation from the caller.
14035b6a3a2bSMark Fasheh */
ocfs2_delete_entry(handle_t * handle,struct inode * dir,struct ocfs2_dir_lookup_result * res)14045b6a3a2bSMark Fasheh int ocfs2_delete_entry(handle_t *handle,
14055b6a3a2bSMark Fasheh struct inode *dir,
14064a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result *res)
14075b6a3a2bSMark Fasheh {
14089b7895efSMark Fasheh if (ocfs2_dir_indexed(dir))
14099b7895efSMark Fasheh return ocfs2_delete_entry_dx(handle, dir, res);
14109b7895efSMark Fasheh
14115b6a3a2bSMark Fasheh if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
14124a12ca3aSMark Fasheh return ocfs2_delete_entry_id(handle, dir, res->dl_entry,
14134a12ca3aSMark Fasheh res->dl_leaf_bh);
14145b6a3a2bSMark Fasheh
14154a12ca3aSMark Fasheh return ocfs2_delete_entry_el(handle, dir, res->dl_entry,
14164a12ca3aSMark Fasheh res->dl_leaf_bh);
14175b6a3a2bSMark Fasheh }
14185b6a3a2bSMark Fasheh
14198553cf4fSMark Fasheh /*
14208553cf4fSMark Fasheh * Check whether 'de' has enough room to hold an entry of
14218553cf4fSMark Fasheh * 'new_rec_len' bytes.
14228553cf4fSMark Fasheh */
ocfs2_dirent_would_fit(struct ocfs2_dir_entry * de,unsigned int new_rec_len)14238553cf4fSMark Fasheh static inline int ocfs2_dirent_would_fit(struct ocfs2_dir_entry *de,
14248553cf4fSMark Fasheh unsigned int new_rec_len)
14258553cf4fSMark Fasheh {
14268553cf4fSMark Fasheh unsigned int de_really_used;
14278553cf4fSMark Fasheh
14288553cf4fSMark Fasheh /* Check whether this is an empty record with enough space */
14298553cf4fSMark Fasheh if (le64_to_cpu(de->inode) == 0 &&
14308553cf4fSMark Fasheh le16_to_cpu(de->rec_len) >= new_rec_len)
14318553cf4fSMark Fasheh return 1;
14328553cf4fSMark Fasheh
14338553cf4fSMark Fasheh /*
14348553cf4fSMark Fasheh * Record might have free space at the end which we can
14358553cf4fSMark Fasheh * use.
14368553cf4fSMark Fasheh */
14378553cf4fSMark Fasheh de_really_used = OCFS2_DIR_REC_LEN(de->name_len);
14388553cf4fSMark Fasheh if (le16_to_cpu(de->rec_len) >= (de_really_used + new_rec_len))
14398553cf4fSMark Fasheh return 1;
14408553cf4fSMark Fasheh
14418553cf4fSMark Fasheh return 0;
14428553cf4fSMark Fasheh }
14438553cf4fSMark Fasheh
ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf * dx_leaf,struct ocfs2_dx_entry * dx_new_entry)14449b7895efSMark Fasheh static void ocfs2_dx_dir_leaf_insert_tail(struct ocfs2_dx_leaf *dx_leaf,
14459b7895efSMark Fasheh struct ocfs2_dx_entry *dx_new_entry)
14469b7895efSMark Fasheh {
14479b7895efSMark Fasheh int i;
14489b7895efSMark Fasheh
14499b7895efSMark Fasheh i = le16_to_cpu(dx_leaf->dl_list.de_num_used);
14509b7895efSMark Fasheh dx_leaf->dl_list.de_entries[i] = *dx_new_entry;
14519b7895efSMark Fasheh
14529b7895efSMark Fasheh le16_add_cpu(&dx_leaf->dl_list.de_num_used, 1);
14539b7895efSMark Fasheh }
14549b7895efSMark Fasheh
ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list * entry_list,struct ocfs2_dx_hinfo * hinfo,u64 dirent_blk)14554ed8a6bbSMark Fasheh static void ocfs2_dx_entry_list_insert(struct ocfs2_dx_entry_list *entry_list,
14564ed8a6bbSMark Fasheh struct ocfs2_dx_hinfo *hinfo,
14574ed8a6bbSMark Fasheh u64 dirent_blk)
14584ed8a6bbSMark Fasheh {
14594ed8a6bbSMark Fasheh int i;
14604ed8a6bbSMark Fasheh struct ocfs2_dx_entry *dx_entry;
14614ed8a6bbSMark Fasheh
14624ed8a6bbSMark Fasheh i = le16_to_cpu(entry_list->de_num_used);
14634ed8a6bbSMark Fasheh dx_entry = &entry_list->de_entries[i];
14644ed8a6bbSMark Fasheh
14654ed8a6bbSMark Fasheh memset(dx_entry, 0, sizeof(*dx_entry));
14664ed8a6bbSMark Fasheh dx_entry->dx_major_hash = cpu_to_le32(hinfo->major_hash);
14674ed8a6bbSMark Fasheh dx_entry->dx_minor_hash = cpu_to_le32(hinfo->minor_hash);
14684ed8a6bbSMark Fasheh dx_entry->dx_dirent_blk = cpu_to_le64(dirent_blk);
14694ed8a6bbSMark Fasheh
14704ed8a6bbSMark Fasheh le16_add_cpu(&entry_list->de_num_used, 1);
14714ed8a6bbSMark Fasheh }
14724ed8a6bbSMark Fasheh
__ocfs2_dx_dir_leaf_insert(struct inode * dir,handle_t * handle,struct ocfs2_dx_hinfo * hinfo,u64 dirent_blk,struct buffer_head * dx_leaf_bh)14739b7895efSMark Fasheh static int __ocfs2_dx_dir_leaf_insert(struct inode *dir, handle_t *handle,
14749b7895efSMark Fasheh struct ocfs2_dx_hinfo *hinfo,
14759b7895efSMark Fasheh u64 dirent_blk,
14769b7895efSMark Fasheh struct buffer_head *dx_leaf_bh)
14779b7895efSMark Fasheh {
14784ed8a6bbSMark Fasheh int ret;
14799b7895efSMark Fasheh struct ocfs2_dx_leaf *dx_leaf;
14809b7895efSMark Fasheh
14810cf2f763SJoel Becker ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
14829b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
14839b7895efSMark Fasheh if (ret) {
14849b7895efSMark Fasheh mlog_errno(ret);
14859b7895efSMark Fasheh goto out;
14869b7895efSMark Fasheh }
14879b7895efSMark Fasheh
14889b7895efSMark Fasheh dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
14894ed8a6bbSMark Fasheh ocfs2_dx_entry_list_insert(&dx_leaf->dl_list, hinfo, dirent_blk);
14909b7895efSMark Fasheh ocfs2_journal_dirty(handle, dx_leaf_bh);
14919b7895efSMark Fasheh
14929b7895efSMark Fasheh out:
14939b7895efSMark Fasheh return ret;
14949b7895efSMark Fasheh }
14959b7895efSMark Fasheh
ocfs2_dx_inline_root_insert(struct inode * dir,handle_t * handle,struct ocfs2_dx_hinfo * hinfo,u64 dirent_blk,struct ocfs2_dx_root_block * dx_root)1496e3a93c2dSMark Fasheh static void ocfs2_dx_inline_root_insert(struct inode *dir, handle_t *handle,
14974ed8a6bbSMark Fasheh struct ocfs2_dx_hinfo *hinfo,
14984ed8a6bbSMark Fasheh u64 dirent_blk,
1499e3a93c2dSMark Fasheh struct ocfs2_dx_root_block *dx_root)
15004ed8a6bbSMark Fasheh {
1501e3a93c2dSMark Fasheh ocfs2_dx_entry_list_insert(&dx_root->dr_entries, hinfo, dirent_blk);
1502e3a93c2dSMark Fasheh }
1503e3a93c2dSMark Fasheh
ocfs2_dx_dir_insert(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * lookup)1504e3a93c2dSMark Fasheh static int ocfs2_dx_dir_insert(struct inode *dir, handle_t *handle,
1505e3a93c2dSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
1506e3a93c2dSMark Fasheh {
1507e3a93c2dSMark Fasheh int ret = 0;
15084ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root;
1509e3a93c2dSMark Fasheh struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
15104ed8a6bbSMark Fasheh
15110cf2f763SJoel Becker ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
15124ed8a6bbSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
15134ed8a6bbSMark Fasheh if (ret) {
15144ed8a6bbSMark Fasheh mlog_errno(ret);
15154ed8a6bbSMark Fasheh goto out;
15164ed8a6bbSMark Fasheh }
15174ed8a6bbSMark Fasheh
1518e3a93c2dSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)lookup->dl_dx_root_bh->b_data;
1519e3a93c2dSMark Fasheh if (ocfs2_dx_root_inline(dx_root)) {
1520e3a93c2dSMark Fasheh ocfs2_dx_inline_root_insert(dir, handle,
1521e3a93c2dSMark Fasheh &lookup->dl_hinfo,
1522e3a93c2dSMark Fasheh lookup->dl_leaf_bh->b_blocknr,
1523e3a93c2dSMark Fasheh dx_root);
1524e3a93c2dSMark Fasheh } else {
1525e3a93c2dSMark Fasheh ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &lookup->dl_hinfo,
1526e3a93c2dSMark Fasheh lookup->dl_leaf_bh->b_blocknr,
1527e3a93c2dSMark Fasheh lookup->dl_dx_leaf_bh);
1528e3a93c2dSMark Fasheh if (ret)
1529e3a93c2dSMark Fasheh goto out;
1530e3a93c2dSMark Fasheh }
1531e3a93c2dSMark Fasheh
1532e3a93c2dSMark Fasheh le32_add_cpu(&dx_root->dr_num_entries, 1);
15334ed8a6bbSMark Fasheh ocfs2_journal_dirty(handle, dx_root_bh);
15344ed8a6bbSMark Fasheh
15354ed8a6bbSMark Fasheh out:
15364ed8a6bbSMark Fasheh return ret;
15374ed8a6bbSMark Fasheh }
15384ed8a6bbSMark Fasheh
ocfs2_remove_block_from_free_list(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * lookup)1539e7c17e43SMark Fasheh static void ocfs2_remove_block_from_free_list(struct inode *dir,
1540e7c17e43SMark Fasheh handle_t *handle,
1541e7c17e43SMark Fasheh struct ocfs2_dir_lookup_result *lookup)
1542e7c17e43SMark Fasheh {
1543e7c17e43SMark Fasheh struct ocfs2_dir_block_trailer *trailer, *prev;
1544e7c17e43SMark Fasheh struct ocfs2_dx_root_block *dx_root;
1545e7c17e43SMark Fasheh struct buffer_head *bh;
1546e7c17e43SMark Fasheh
1547e7c17e43SMark Fasheh trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1548e7c17e43SMark Fasheh
1549e7c17e43SMark Fasheh if (ocfs2_free_list_at_root(lookup)) {
1550e7c17e43SMark Fasheh bh = lookup->dl_dx_root_bh;
1551e7c17e43SMark Fasheh dx_root = (struct ocfs2_dx_root_block *)bh->b_data;
1552e7c17e43SMark Fasheh dx_root->dr_free_blk = trailer->db_free_next;
1553e7c17e43SMark Fasheh } else {
1554e7c17e43SMark Fasheh bh = lookup->dl_prev_leaf_bh;
1555e7c17e43SMark Fasheh prev = ocfs2_trailer_from_bh(bh, dir->i_sb);
1556e7c17e43SMark Fasheh prev->db_free_next = trailer->db_free_next;
1557e7c17e43SMark Fasheh }
1558e7c17e43SMark Fasheh
1559e7c17e43SMark Fasheh trailer->db_free_rec_len = cpu_to_le16(0);
1560e7c17e43SMark Fasheh trailer->db_free_next = cpu_to_le64(0);
1561e7c17e43SMark Fasheh
1562e7c17e43SMark Fasheh ocfs2_journal_dirty(handle, bh);
1563e7c17e43SMark Fasheh ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1564e7c17e43SMark Fasheh }
1565e7c17e43SMark Fasheh
1566e7c17e43SMark Fasheh /*
1567e7c17e43SMark Fasheh * This expects that a journal write has been reserved on
1568e7c17e43SMark Fasheh * lookup->dl_prev_leaf_bh or lookup->dl_dx_root_bh
1569e7c17e43SMark Fasheh */
ocfs2_recalc_free_list(struct inode * dir,handle_t * handle,struct ocfs2_dir_lookup_result * lookup)1570e7c17e43SMark Fasheh static void ocfs2_recalc_free_list(struct inode *dir, handle_t *handle,
1571e7c17e43SMark Fasheh struct ocfs2_dir_lookup_result *lookup)
1572e7c17e43SMark Fasheh {
1573e7c17e43SMark Fasheh int max_rec_len;
1574e7c17e43SMark Fasheh struct ocfs2_dir_block_trailer *trailer;
1575e7c17e43SMark Fasheh
1576e7c17e43SMark Fasheh /* Walk dl_leaf_bh to figure out what the new free rec_len is. */
1577e7c17e43SMark Fasheh max_rec_len = ocfs2_find_max_rec_len(dir->i_sb, lookup->dl_leaf_bh);
1578e7c17e43SMark Fasheh if (max_rec_len) {
1579e7c17e43SMark Fasheh /*
1580e7c17e43SMark Fasheh * There's still room in this block, so no need to remove it
1581e7c17e43SMark Fasheh * from the free list. In this case, we just want to update
1582e7c17e43SMark Fasheh * the rec len accounting.
1583e7c17e43SMark Fasheh */
1584e7c17e43SMark Fasheh trailer = ocfs2_trailer_from_bh(lookup->dl_leaf_bh, dir->i_sb);
1585e7c17e43SMark Fasheh trailer->db_free_rec_len = cpu_to_le16(max_rec_len);
1586e7c17e43SMark Fasheh ocfs2_journal_dirty(handle, lookup->dl_leaf_bh);
1587e7c17e43SMark Fasheh } else {
1588e7c17e43SMark Fasheh ocfs2_remove_block_from_free_list(dir, handle, lookup);
1589e7c17e43SMark Fasheh }
1590e7c17e43SMark Fasheh }
1591e7c17e43SMark Fasheh
1592316f4b9fSMark Fasheh /* we don't always have a dentry for what we want to add, so people
1593316f4b9fSMark Fasheh * like orphan dir can call this instead.
1594316f4b9fSMark Fasheh *
15954a12ca3aSMark Fasheh * The lookup context must have been filled from
15964a12ca3aSMark Fasheh * ocfs2_prepare_dir_for_insert.
1597316f4b9fSMark Fasheh */
__ocfs2_add_entry(handle_t * handle,struct inode * dir,const char * name,int namelen,struct inode * inode,u64 blkno,struct buffer_head * parent_fe_bh,struct ocfs2_dir_lookup_result * lookup)1598316f4b9fSMark Fasheh int __ocfs2_add_entry(handle_t *handle,
1599316f4b9fSMark Fasheh struct inode *dir,
1600316f4b9fSMark Fasheh const char *name, int namelen,
1601316f4b9fSMark Fasheh struct inode *inode, u64 blkno,
1602316f4b9fSMark Fasheh struct buffer_head *parent_fe_bh,
16034a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
1604316f4b9fSMark Fasheh {
1605316f4b9fSMark Fasheh unsigned long offset;
1606316f4b9fSMark Fasheh unsigned short rec_len;
1607316f4b9fSMark Fasheh struct ocfs2_dir_entry *de, *de1;
16085b6a3a2bSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)parent_fe_bh->b_data;
16095b6a3a2bSMark Fasheh struct super_block *sb = dir->i_sb;
16102e173152SDaeseok Youn int retval;
16115b6a3a2bSMark Fasheh unsigned int size = sb->s_blocksize;
16124a12ca3aSMark Fasheh struct buffer_head *insert_bh = lookup->dl_leaf_bh;
16135b6a3a2bSMark Fasheh char *data_start = insert_bh->b_data;
1614316f4b9fSMark Fasheh
1615316f4b9fSMark Fasheh if (!namelen)
1616316f4b9fSMark Fasheh return -EINVAL;
1617316f4b9fSMark Fasheh
1618e7c17e43SMark Fasheh if (ocfs2_dir_indexed(dir)) {
1619e7c17e43SMark Fasheh struct buffer_head *bh;
1620e7c17e43SMark Fasheh
1621e7c17e43SMark Fasheh /*
1622e7c17e43SMark Fasheh * An indexed dir may require that we update the free space
1623e7c17e43SMark Fasheh * list. Reserve a write to the previous node in the list so
1624e7c17e43SMark Fasheh * that we don't fail later.
1625e7c17e43SMark Fasheh *
1626e7c17e43SMark Fasheh * XXX: This can be either a dx_root_block, or an unindexed
1627e7c17e43SMark Fasheh * directory tree leaf block.
1628e7c17e43SMark Fasheh */
1629e7c17e43SMark Fasheh if (ocfs2_free_list_at_root(lookup)) {
1630e7c17e43SMark Fasheh bh = lookup->dl_dx_root_bh;
16310cf2f763SJoel Becker retval = ocfs2_journal_access_dr(handle,
16320cf2f763SJoel Becker INODE_CACHE(dir), bh,
1633e7c17e43SMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
1634e7c17e43SMark Fasheh } else {
1635e7c17e43SMark Fasheh bh = lookup->dl_prev_leaf_bh;
16360cf2f763SJoel Becker retval = ocfs2_journal_access_db(handle,
16370cf2f763SJoel Becker INODE_CACHE(dir), bh,
1638e7c17e43SMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
1639e7c17e43SMark Fasheh }
1640e7c17e43SMark Fasheh if (retval) {
1641e7c17e43SMark Fasheh mlog_errno(retval);
1642e7c17e43SMark Fasheh return retval;
1643e7c17e43SMark Fasheh }
1644e7c17e43SMark Fasheh } else if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
16455b6a3a2bSMark Fasheh data_start = di->id2.i_data.id_data;
16465b6a3a2bSMark Fasheh size = i_size_read(dir);
16475b6a3a2bSMark Fasheh
16485b6a3a2bSMark Fasheh BUG_ON(insert_bh != parent_fe_bh);
16495b6a3a2bSMark Fasheh }
16505b6a3a2bSMark Fasheh
1651316f4b9fSMark Fasheh rec_len = OCFS2_DIR_REC_LEN(namelen);
1652316f4b9fSMark Fasheh offset = 0;
16535b6a3a2bSMark Fasheh de = (struct ocfs2_dir_entry *) data_start;
1654316f4b9fSMark Fasheh while (1) {
16555b6a3a2bSMark Fasheh BUG_ON((char *)de >= (size + data_start));
16565b6a3a2bSMark Fasheh
1657316f4b9fSMark Fasheh /* These checks should've already been passed by the
1658316f4b9fSMark Fasheh * prepare function, but I guess we can leave them
1659316f4b9fSMark Fasheh * here anyway. */
1660e05a2428Slei lu if (!ocfs2_check_dir_entry(dir, de, insert_bh, data_start,
1661e05a2428Slei lu size, offset)) {
1662316f4b9fSMark Fasheh retval = -ENOENT;
1663316f4b9fSMark Fasheh goto bail;
1664316f4b9fSMark Fasheh }
1665316f4b9fSMark Fasheh if (ocfs2_match(namelen, name, de)) {
1666316f4b9fSMark Fasheh retval = -EEXIST;
1667316f4b9fSMark Fasheh goto bail;
1668316f4b9fSMark Fasheh }
16698553cf4fSMark Fasheh
167087d35a74SMark Fasheh /* We're guaranteed that we should have space, so we
167187d35a74SMark Fasheh * can't possibly have hit the trailer...right? */
167287d35a74SMark Fasheh mlog_bug_on_msg(ocfs2_skip_dir_trailer(dir, de, offset, size),
167387d35a74SMark Fasheh "Hit dir trailer trying to insert %.*s "
167487d35a74SMark Fasheh "(namelen %d) into directory %llu. "
167587d35a74SMark Fasheh "offset is %lu, trailer offset is %d\n",
167687d35a74SMark Fasheh namelen, name, namelen,
167787d35a74SMark Fasheh (unsigned long long)parent_fe_bh->b_blocknr,
167887d35a74SMark Fasheh offset, ocfs2_dir_trailer_blk_off(dir->i_sb));
167987d35a74SMark Fasheh
16808553cf4fSMark Fasheh if (ocfs2_dirent_would_fit(de, rec_len)) {
168110fc3a18SJeff Layton inode_set_mtime_to_ts(dir,
168210fc3a18SJeff Layton inode_set_ctime_current(dir));
1683316f4b9fSMark Fasheh retval = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
1684316f4b9fSMark Fasheh if (retval < 0) {
1685316f4b9fSMark Fasheh mlog_errno(retval);
1686316f4b9fSMark Fasheh goto bail;
1687316f4b9fSMark Fasheh }
1688316f4b9fSMark Fasheh
168913723d00SJoel Becker if (insert_bh == parent_fe_bh)
16902e173152SDaeseok Youn retval = ocfs2_journal_access_di(handle,
16910cf2f763SJoel Becker INODE_CACHE(dir),
169213723d00SJoel Becker insert_bh,
169313723d00SJoel Becker OCFS2_JOURNAL_ACCESS_WRITE);
16949b7895efSMark Fasheh else {
16952e173152SDaeseok Youn retval = ocfs2_journal_access_db(handle,
16960cf2f763SJoel Becker INODE_CACHE(dir),
169713723d00SJoel Becker insert_bh,
1698316f4b9fSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
16994ed8a6bbSMark Fasheh
17002e173152SDaeseok Youn if (!retval && ocfs2_dir_indexed(dir))
17012e173152SDaeseok Youn retval = ocfs2_dx_dir_insert(dir,
17029b7895efSMark Fasheh handle,
17039b7895efSMark Fasheh lookup);
17042e173152SDaeseok Youn }
17052e173152SDaeseok Youn
17062e173152SDaeseok Youn if (retval) {
17072e173152SDaeseok Youn mlog_errno(retval);
17089b7895efSMark Fasheh goto bail;
17099b7895efSMark Fasheh }
17109b7895efSMark Fasheh
1711316f4b9fSMark Fasheh /* By now the buffer is marked for journaling */
1712316f4b9fSMark Fasheh offset += le16_to_cpu(de->rec_len);
1713316f4b9fSMark Fasheh if (le64_to_cpu(de->inode)) {
1714316f4b9fSMark Fasheh de1 = (struct ocfs2_dir_entry *)((char *) de +
1715316f4b9fSMark Fasheh OCFS2_DIR_REC_LEN(de->name_len));
1716316f4b9fSMark Fasheh de1->rec_len =
1717316f4b9fSMark Fasheh cpu_to_le16(le16_to_cpu(de->rec_len) -
1718316f4b9fSMark Fasheh OCFS2_DIR_REC_LEN(de->name_len));
1719316f4b9fSMark Fasheh de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1720316f4b9fSMark Fasheh de = de1;
1721316f4b9fSMark Fasheh }
17229dc2108dSPhillip Potter de->file_type = FT_UNKNOWN;
1723316f4b9fSMark Fasheh if (blkno) {
1724316f4b9fSMark Fasheh de->inode = cpu_to_le64(blkno);
1725316f4b9fSMark Fasheh ocfs2_set_de_type(de, inode->i_mode);
1726316f4b9fSMark Fasheh } else
1727316f4b9fSMark Fasheh de->inode = 0;
1728316f4b9fSMark Fasheh de->name_len = namelen;
1729316f4b9fSMark Fasheh memcpy(de->name, name, namelen);
1730316f4b9fSMark Fasheh
1731e7c17e43SMark Fasheh if (ocfs2_dir_indexed(dir))
1732e7c17e43SMark Fasheh ocfs2_recalc_free_list(dir, handle, lookup);
1733e7c17e43SMark Fasheh
1734cc56c33eSJeff Layton inode_inc_iversion(dir);
1735ec20cec7SJoel Becker ocfs2_journal_dirty(handle, insert_bh);
1736316f4b9fSMark Fasheh retval = 0;
1737316f4b9fSMark Fasheh goto bail;
1738316f4b9fSMark Fasheh }
173987d35a74SMark Fasheh
1740316f4b9fSMark Fasheh offset += le16_to_cpu(de->rec_len);
1741316f4b9fSMark Fasheh de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1742316f4b9fSMark Fasheh }
1743316f4b9fSMark Fasheh
1744316f4b9fSMark Fasheh /* when you think about it, the assert above should prevent us
1745316f4b9fSMark Fasheh * from ever getting here. */
1746316f4b9fSMark Fasheh retval = -ENOSPC;
1747316f4b9fSMark Fasheh bail:
1748c1e8d35eSTao Ma if (retval)
1749c1e8d35eSTao Ma mlog_errno(retval);
1750316f4b9fSMark Fasheh
1751316f4b9fSMark Fasheh return retval;
1752316f4b9fSMark Fasheh }
1753316f4b9fSMark Fasheh
ocfs2_dir_foreach_blk_id(struct inode * inode,u64 * f_version,struct dir_context * ctx)175423193e51SMark Fasheh static int ocfs2_dir_foreach_blk_id(struct inode *inode,
17552b47c361SMathieu Desnoyers u64 *f_version,
17563704412bSAl Viro struct dir_context *ctx)
175723193e51SMark Fasheh {
17583704412bSAl Viro int ret, i;
17593704412bSAl Viro unsigned long offset = ctx->pos;
176023193e51SMark Fasheh struct buffer_head *di_bh = NULL;
176123193e51SMark Fasheh struct ocfs2_dinode *di;
176223193e51SMark Fasheh struct ocfs2_inline_data *data;
176323193e51SMark Fasheh struct ocfs2_dir_entry *de;
176423193e51SMark Fasheh
1765b657c95cSJoel Becker ret = ocfs2_read_inode_block(inode, &di_bh);
176623193e51SMark Fasheh if (ret) {
176723193e51SMark Fasheh mlog(ML_ERROR, "Unable to read inode block for dir %llu\n",
176823193e51SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno);
176923193e51SMark Fasheh goto out;
177023193e51SMark Fasheh }
177123193e51SMark Fasheh
177223193e51SMark Fasheh di = (struct ocfs2_dinode *)di_bh->b_data;
177323193e51SMark Fasheh data = &di->id2.i_data;
177423193e51SMark Fasheh
17753704412bSAl Viro while (ctx->pos < i_size_read(inode)) {
177623193e51SMark Fasheh /* If the dir block has changed since the last call to
177723193e51SMark Fasheh * readdir(2), then we might be pointing to an invalid
177823193e51SMark Fasheh * dirent right now. Scan from the start of the block
177923193e51SMark Fasheh * to make sure. */
1780c472c07bSGoffredo Baroncelli if (!inode_eq_iversion(inode, *f_version)) {
178123193e51SMark Fasheh for (i = 0; i < i_size_read(inode) && i < offset; ) {
178223193e51SMark Fasheh de = (struct ocfs2_dir_entry *)
178323193e51SMark Fasheh (data->id_data + i);
178423193e51SMark Fasheh /* It's too expensive to do a full
178523193e51SMark Fasheh * dirent test each time round this
178623193e51SMark Fasheh * loop, but we do have to test at
178723193e51SMark Fasheh * least that it is non-zero. A
178823193e51SMark Fasheh * failure will be detected in the
178923193e51SMark Fasheh * dirent test below. */
179023193e51SMark Fasheh if (le16_to_cpu(de->rec_len) <
179123193e51SMark Fasheh OCFS2_DIR_REC_LEN(1))
179223193e51SMark Fasheh break;
179323193e51SMark Fasheh i += le16_to_cpu(de->rec_len);
179423193e51SMark Fasheh }
17953704412bSAl Viro ctx->pos = offset = i;
1796cc56c33eSJeff Layton *f_version = inode_query_iversion(inode);
179723193e51SMark Fasheh }
179823193e51SMark Fasheh
17993704412bSAl Viro de = (struct ocfs2_dir_entry *) (data->id_data + ctx->pos);
1800e05a2428Slei lu if (!ocfs2_check_dir_entry(inode, de, di_bh, (char *)data->id_data,
1801e05a2428Slei lu i_size_read(inode), ctx->pos)) {
180223193e51SMark Fasheh /* On error, skip the f_pos to the end. */
18033704412bSAl Viro ctx->pos = i_size_read(inode);
18043704412bSAl Viro break;
180523193e51SMark Fasheh }
180623193e51SMark Fasheh offset += le16_to_cpu(de->rec_len);
180723193e51SMark Fasheh if (le64_to_cpu(de->inode)) {
18083704412bSAl Viro if (!dir_emit(ctx, de->name, de->name_len,
18099dc2108dSPhillip Potter le64_to_cpu(de->inode),
18109dc2108dSPhillip Potter fs_ftype_to_dtype(de->file_type)))
18113704412bSAl Viro goto out;
1812e7b34019SMark Fasheh }
18133704412bSAl Viro ctx->pos += le16_to_cpu(de->rec_len);
181423193e51SMark Fasheh }
181523193e51SMark Fasheh out:
181623193e51SMark Fasheh brelse(di_bh);
181723193e51SMark Fasheh return 0;
181823193e51SMark Fasheh }
181923193e51SMark Fasheh
18209b7895efSMark Fasheh /*
18219b7895efSMark Fasheh * NOTE: This function can be called against unindexed directories,
18229b7895efSMark Fasheh * and indexed ones.
18239b7895efSMark Fasheh */
ocfs2_dir_foreach_blk_el(struct inode * inode,u64 * f_version,struct dir_context * ctx,bool persist)182423193e51SMark Fasheh static int ocfs2_dir_foreach_blk_el(struct inode *inode,
18252b47c361SMathieu Desnoyers u64 *f_version,
18263704412bSAl Viro struct dir_context *ctx,
18273704412bSAl Viro bool persist)
1828ccd979bdSMark Fasheh {
1829aa958874SMark Fasheh unsigned long offset, blk, last_ra_blk = 0;
18303704412bSAl Viro int i;
1831ccd979bdSMark Fasheh struct buffer_head * bh, * tmp;
1832ccd979bdSMark Fasheh struct ocfs2_dir_entry * de;
1833ccd979bdSMark Fasheh struct super_block * sb = inode->i_sb;
1834aa958874SMark Fasheh unsigned int ra_sectors = 16;
18353704412bSAl Viro int stored = 0;
1836ccd979bdSMark Fasheh
1837ccd979bdSMark Fasheh bh = NULL;
1838ccd979bdSMark Fasheh
18393704412bSAl Viro offset = ctx->pos & (sb->s_blocksize - 1);
1840ccd979bdSMark Fasheh
18413704412bSAl Viro while (ctx->pos < i_size_read(inode)) {
18423704412bSAl Viro blk = ctx->pos >> sb->s_blocksize_bits;
1843a22305ccSJoel Becker if (ocfs2_read_dir_block(inode, blk, &bh, 0)) {
1844a22305ccSJoel Becker /* Skip the corrupt dirblock and keep trying */
18453704412bSAl Viro ctx->pos += sb->s_blocksize - offset;
1846ccd979bdSMark Fasheh continue;
1847ccd979bdSMark Fasheh }
1848ccd979bdSMark Fasheh
1849aa958874SMark Fasheh /* The idea here is to begin with 8k read-ahead and to stay
1850aa958874SMark Fasheh * 4k ahead of our current position.
1851aa958874SMark Fasheh *
1852aa958874SMark Fasheh * TODO: Use the pagecache for this. We just need to
1853aa958874SMark Fasheh * make sure it's cluster-safe... */
1854aa958874SMark Fasheh if (!last_ra_blk
1855aa958874SMark Fasheh || (((last_ra_blk - blk) << 9) <= (ra_sectors / 2))) {
1856aa958874SMark Fasheh for (i = ra_sectors >> (sb->s_blocksize_bits - 9);
1857ccd979bdSMark Fasheh i > 0; i--) {
1858a22305ccSJoel Becker tmp = NULL;
1859a22305ccSJoel Becker if (!ocfs2_read_dir_block(inode, ++blk, &tmp,
1860a22305ccSJoel Becker OCFS2_BH_READAHEAD))
1861ccd979bdSMark Fasheh brelse(tmp);
1862ccd979bdSMark Fasheh }
1863aa958874SMark Fasheh last_ra_blk = blk;
1864aa958874SMark Fasheh ra_sectors = 8;
1865ccd979bdSMark Fasheh }
1866ccd979bdSMark Fasheh
1867ccd979bdSMark Fasheh /* If the dir block has changed since the last call to
1868ccd979bdSMark Fasheh * readdir(2), then we might be pointing to an invalid
1869ccd979bdSMark Fasheh * dirent right now. Scan from the start of the block
1870ccd979bdSMark Fasheh * to make sure. */
1871c472c07bSGoffredo Baroncelli if (!inode_eq_iversion(inode, *f_version)) {
1872ccd979bdSMark Fasheh for (i = 0; i < sb->s_blocksize && i < offset; ) {
1873ccd979bdSMark Fasheh de = (struct ocfs2_dir_entry *) (bh->b_data + i);
1874ccd979bdSMark Fasheh /* It's too expensive to do a full
1875ccd979bdSMark Fasheh * dirent test each time round this
1876ccd979bdSMark Fasheh * loop, but we do have to test at
1877ccd979bdSMark Fasheh * least that it is non-zero. A
1878ccd979bdSMark Fasheh * failure will be detected in the
1879ccd979bdSMark Fasheh * dirent test below. */
1880ccd979bdSMark Fasheh if (le16_to_cpu(de->rec_len) <
1881ccd979bdSMark Fasheh OCFS2_DIR_REC_LEN(1))
1882ccd979bdSMark Fasheh break;
1883ccd979bdSMark Fasheh i += le16_to_cpu(de->rec_len);
1884ccd979bdSMark Fasheh }
1885ccd979bdSMark Fasheh offset = i;
18863704412bSAl Viro ctx->pos = (ctx->pos & ~(sb->s_blocksize - 1))
1887ccd979bdSMark Fasheh | offset;
1888cc56c33eSJeff Layton *f_version = inode_query_iversion(inode);
1889ccd979bdSMark Fasheh }
1890ccd979bdSMark Fasheh
18913704412bSAl Viro while (ctx->pos < i_size_read(inode)
1892ccd979bdSMark Fasheh && offset < sb->s_blocksize) {
1893ccd979bdSMark Fasheh de = (struct ocfs2_dir_entry *) (bh->b_data + offset);
1894e05a2428Slei lu if (!ocfs2_check_dir_entry(inode, de, bh, bh->b_data,
1895e05a2428Slei lu sb->s_blocksize, offset)) {
1896ccd979bdSMark Fasheh /* On error, skip the f_pos to the
1897ccd979bdSMark Fasheh next block. */
18983704412bSAl Viro ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
189929aa3016SChangwei Ge break;
1900ccd979bdSMark Fasheh }
1901ccd979bdSMark Fasheh if (le64_to_cpu(de->inode)) {
19023704412bSAl Viro if (!dir_emit(ctx, de->name,
1903ccd979bdSMark Fasheh de->name_len,
19047e853679SMark Fasheh le64_to_cpu(de->inode),
19059dc2108dSPhillip Potter fs_ftype_to_dtype(de->file_type))) {
19063704412bSAl Viro brelse(bh);
19073704412bSAl Viro return 0;
1908e7b34019SMark Fasheh }
1909ccd979bdSMark Fasheh stored++;
1910ccd979bdSMark Fasheh }
19113704412bSAl Viro offset += le16_to_cpu(de->rec_len);
19123704412bSAl Viro ctx->pos += le16_to_cpu(de->rec_len);
1913ccd979bdSMark Fasheh }
1914ccd979bdSMark Fasheh offset = 0;
1915ccd979bdSMark Fasheh brelse(bh);
1916a22305ccSJoel Becker bh = NULL;
19173704412bSAl Viro if (!persist && stored)
19183704412bSAl Viro break;
1919ccd979bdSMark Fasheh }
19203704412bSAl Viro return 0;
1921b8bc5f4fSMark Fasheh }
1922b8bc5f4fSMark Fasheh
ocfs2_dir_foreach_blk(struct inode * inode,u64 * f_version,struct dir_context * ctx,bool persist)19232b47c361SMathieu Desnoyers static int ocfs2_dir_foreach_blk(struct inode *inode, u64 *f_version,
19243704412bSAl Viro struct dir_context *ctx,
19253704412bSAl Viro bool persist)
192623193e51SMark Fasheh {
192723193e51SMark Fasheh if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
19283704412bSAl Viro return ocfs2_dir_foreach_blk_id(inode, f_version, ctx);
19293704412bSAl Viro return ocfs2_dir_foreach_blk_el(inode, f_version, ctx, persist);
193023193e51SMark Fasheh }
193123193e51SMark Fasheh
1932b8bc5f4fSMark Fasheh /*
19335eae5b96SMark Fasheh * This is intended to be called from inside other kernel functions,
19345eae5b96SMark Fasheh * so we fake some arguments.
19355eae5b96SMark Fasheh */
ocfs2_dir_foreach(struct inode * inode,struct dir_context * ctx)19363704412bSAl Viro int ocfs2_dir_foreach(struct inode *inode, struct dir_context *ctx)
19375eae5b96SMark Fasheh {
1938cc56c33eSJeff Layton u64 version = inode_query_iversion(inode);
19393704412bSAl Viro ocfs2_dir_foreach_blk(inode, &version, ctx, true);
19405eae5b96SMark Fasheh return 0;
19415eae5b96SMark Fasheh }
19425eae5b96SMark Fasheh
19435eae5b96SMark Fasheh /*
1944b8bc5f4fSMark Fasheh * ocfs2_readdir()
1945b8bc5f4fSMark Fasheh *
1946b8bc5f4fSMark Fasheh */
ocfs2_readdir(struct file * file,struct dir_context * ctx)19473704412bSAl Viro int ocfs2_readdir(struct file *file, struct dir_context *ctx)
1948b8bc5f4fSMark Fasheh {
1949b8bc5f4fSMark Fasheh int error = 0;
19503704412bSAl Viro struct inode *inode = file_inode(file);
1951b8bc5f4fSMark Fasheh int lock_level = 0;
1952b8bc5f4fSMark Fasheh
1953f1088d47STao Ma trace_ocfs2_readdir((unsigned long long)OCFS2_I(inode)->ip_blkno);
1954b8bc5f4fSMark Fasheh
1955c4c2416aSGang He error = ocfs2_inode_lock_atime(inode, file->f_path.mnt, &lock_level, 1);
1956b8bc5f4fSMark Fasheh if (lock_level && error >= 0) {
1957b8bc5f4fSMark Fasheh /* We release EX lock which used to update atime
1958b8bc5f4fSMark Fasheh * and get PR lock again to reduce contention
1959b8bc5f4fSMark Fasheh * on commonly accessed directories. */
1960e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, 1);
1961b8bc5f4fSMark Fasheh lock_level = 0;
1962e63aecb6SMark Fasheh error = ocfs2_inode_lock(inode, NULL, 0);
1963b8bc5f4fSMark Fasheh }
1964b8bc5f4fSMark Fasheh if (error < 0) {
1965b8bc5f4fSMark Fasheh if (error != -ENOENT)
1966b8bc5f4fSMark Fasheh mlog_errno(error);
1967b8bc5f4fSMark Fasheh /* we haven't got any yet, so propagate the error. */
1968b8bc5f4fSMark Fasheh goto bail_nolock;
1969b8bc5f4fSMark Fasheh }
1970b8bc5f4fSMark Fasheh
19713704412bSAl Viro error = ocfs2_dir_foreach_blk(inode, &file->f_version, ctx, false);
1972b8bc5f4fSMark Fasheh
1973e63aecb6SMark Fasheh ocfs2_inode_unlock(inode, lock_level);
1974c1e8d35eSTao Ma if (error)
1975c1e8d35eSTao Ma mlog_errno(error);
1976ccd979bdSMark Fasheh
1977aa958874SMark Fasheh bail_nolock:
1978ccd979bdSMark Fasheh
1979b8bc5f4fSMark Fasheh return error;
1980ccd979bdSMark Fasheh }
1981ccd979bdSMark Fasheh
1982ccd979bdSMark Fasheh /*
1983137cebf9Shongnanli * NOTE: this should always be called with parent dir i_rwsem taken.
1984ccd979bdSMark Fasheh */
ocfs2_find_files_on_disk(const char * name,int namelen,u64 * blkno,struct inode * inode,struct ocfs2_dir_lookup_result * lookup)1985ccd979bdSMark Fasheh int ocfs2_find_files_on_disk(const char *name,
1986ccd979bdSMark Fasheh int namelen,
1987ccd979bdSMark Fasheh u64 *blkno,
1988ccd979bdSMark Fasheh struct inode *inode,
19894a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
1990ccd979bdSMark Fasheh {
1991ccd979bdSMark Fasheh int status = -ENOENT;
1992ccd979bdSMark Fasheh
1993f1088d47STao Ma trace_ocfs2_find_files_on_disk(namelen, name, blkno,
19944a12ca3aSMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno);
1995ccd979bdSMark Fasheh
19964a12ca3aSMark Fasheh status = ocfs2_find_entry(name, namelen, inode, lookup);
19974a12ca3aSMark Fasheh if (status)
1998ccd979bdSMark Fasheh goto leave;
1999ccd979bdSMark Fasheh
20004a12ca3aSMark Fasheh *blkno = le64_to_cpu(lookup->dl_entry->inode);
2001ccd979bdSMark Fasheh
2002ccd979bdSMark Fasheh status = 0;
2003ccd979bdSMark Fasheh leave:
2004ccd979bdSMark Fasheh
2005ccd979bdSMark Fasheh return status;
2006ccd979bdSMark Fasheh }
2007ccd979bdSMark Fasheh
2008be94d117SMark Fasheh /*
2009be94d117SMark Fasheh * Convenience function for callers which just want the block number
2010be94d117SMark Fasheh * mapped to a name and don't require the full dirent info, etc.
2011be94d117SMark Fasheh */
ocfs2_lookup_ino_from_name(struct inode * dir,const char * name,int namelen,u64 * blkno)2012be94d117SMark Fasheh int ocfs2_lookup_ino_from_name(struct inode *dir, const char *name,
2013be94d117SMark Fasheh int namelen, u64 *blkno)
2014be94d117SMark Fasheh {
2015be94d117SMark Fasheh int ret;
20164a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result lookup = { NULL, };
2017be94d117SMark Fasheh
20184a12ca3aSMark Fasheh ret = ocfs2_find_files_on_disk(name, namelen, blkno, dir, &lookup);
20194a12ca3aSMark Fasheh ocfs2_free_dir_lookup_result(&lookup);
2020be94d117SMark Fasheh
2021be94d117SMark Fasheh return ret;
2022be94d117SMark Fasheh }
2023be94d117SMark Fasheh
2024ccd979bdSMark Fasheh /* Check for a name within a directory.
2025ccd979bdSMark Fasheh *
2026ccd979bdSMark Fasheh * Return 0 if the name does not exist
2027ccd979bdSMark Fasheh * Return -EEXIST if the directory contains the name
2028*94459962SSu Yue * Return -EFSCORRUPTED if found corruption
2029ccd979bdSMark Fasheh *
2030137cebf9Shongnanli * Callers should have i_rwsem + a cluster lock on dir
2031ccd979bdSMark Fasheh */
ocfs2_check_dir_for_entry(struct inode * dir,const char * name,int namelen)2032ccd979bdSMark Fasheh int ocfs2_check_dir_for_entry(struct inode *dir,
2033ccd979bdSMark Fasheh const char *name,
2034ccd979bdSMark Fasheh int namelen)
2035ccd979bdSMark Fasheh {
20367c01ad8fSDaeseok Youn int ret = 0;
20374a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result lookup = { NULL, };
2038ccd979bdSMark Fasheh
2039f1088d47STao Ma trace_ocfs2_check_dir_for_entry(
2040b0697053SMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen, name);
2041ccd979bdSMark Fasheh
2042*94459962SSu Yue ret = ocfs2_find_entry(name, namelen, dir, &lookup);
2043*94459962SSu Yue if (ret == 0) {
2044ccd979bdSMark Fasheh ret = -EEXIST;
20457c01ad8fSDaeseok Youn mlog_errno(ret);
2046*94459962SSu Yue } else if (ret == -ENOENT) {
2047*94459962SSu Yue ret = 0;
20487c01ad8fSDaeseok Youn }
2049ccd979bdSMark Fasheh
20504a12ca3aSMark Fasheh ocfs2_free_dir_lookup_result(&lookup);
2051ccd979bdSMark Fasheh
2052ccd979bdSMark Fasheh return ret;
2053ccd979bdSMark Fasheh }
2054ccd979bdSMark Fasheh
20550bfbbf62SMark Fasheh struct ocfs2_empty_dir_priv {
20563704412bSAl Viro struct dir_context ctx;
20570bfbbf62SMark Fasheh unsigned seen_dot;
20580bfbbf62SMark Fasheh unsigned seen_dot_dot;
20590bfbbf62SMark Fasheh unsigned seen_other;
2060e3a93c2dSMark Fasheh unsigned dx_dir;
20610bfbbf62SMark Fasheh };
ocfs2_empty_dir_filldir(struct dir_context * ctx,const char * name,int name_len,loff_t pos,u64 ino,unsigned type)206225885a35SAl Viro static bool ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
2063ac7576f4SMiklos Szeredi int name_len, loff_t pos, u64 ino,
2064ac7576f4SMiklos Szeredi unsigned type)
20650bfbbf62SMark Fasheh {
2066ac7576f4SMiklos Szeredi struct ocfs2_empty_dir_priv *p =
2067ac7576f4SMiklos Szeredi container_of(ctx, struct ocfs2_empty_dir_priv, ctx);
20680bfbbf62SMark Fasheh
20690bfbbf62SMark Fasheh /*
20700bfbbf62SMark Fasheh * Check the positions of "." and ".." records to be sure
20710bfbbf62SMark Fasheh * they're in the correct place.
2072e3a93c2dSMark Fasheh *
2073e3a93c2dSMark Fasheh * Indexed directories don't need to proceed past the first
2074e3a93c2dSMark Fasheh * two entries, so we end the scan after seeing '..'. Despite
2075e3a93c2dSMark Fasheh * that, we allow the scan to proceed In the event that we
2076e3a93c2dSMark Fasheh * have a corrupted indexed directory (no dot or dot dot
2077e3a93c2dSMark Fasheh * entries). This allows us to double check for existing
2078e3a93c2dSMark Fasheh * entries which might not have been found in the index.
20790bfbbf62SMark Fasheh */
20800bfbbf62SMark Fasheh if (name_len == 1 && !strncmp(".", name, 1) && pos == 0) {
20810bfbbf62SMark Fasheh p->seen_dot = 1;
208225885a35SAl Viro return true;
20830bfbbf62SMark Fasheh }
20840bfbbf62SMark Fasheh
20850bfbbf62SMark Fasheh if (name_len == 2 && !strncmp("..", name, 2) &&
20860bfbbf62SMark Fasheh pos == OCFS2_DIR_REC_LEN(1)) {
20870bfbbf62SMark Fasheh p->seen_dot_dot = 1;
2088e3a93c2dSMark Fasheh
2089e3a93c2dSMark Fasheh if (p->dx_dir && p->seen_dot)
209025885a35SAl Viro return false;
2091e3a93c2dSMark Fasheh
209225885a35SAl Viro return true;
20930bfbbf62SMark Fasheh }
20940bfbbf62SMark Fasheh
20950bfbbf62SMark Fasheh p->seen_other = 1;
209625885a35SAl Viro return false;
20970bfbbf62SMark Fasheh }
2098e3a93c2dSMark Fasheh
ocfs2_empty_dir_dx(struct inode * inode,struct ocfs2_empty_dir_priv * priv)2099e3a93c2dSMark Fasheh static int ocfs2_empty_dir_dx(struct inode *inode,
2100e3a93c2dSMark Fasheh struct ocfs2_empty_dir_priv *priv)
2101e3a93c2dSMark Fasheh {
2102e3a93c2dSMark Fasheh int ret;
2103e3a93c2dSMark Fasheh struct buffer_head *di_bh = NULL;
2104e3a93c2dSMark Fasheh struct buffer_head *dx_root_bh = NULL;
2105e3a93c2dSMark Fasheh struct ocfs2_dinode *di;
2106e3a93c2dSMark Fasheh struct ocfs2_dx_root_block *dx_root;
2107e3a93c2dSMark Fasheh
2108e3a93c2dSMark Fasheh priv->dx_dir = 1;
2109e3a93c2dSMark Fasheh
2110e3a93c2dSMark Fasheh ret = ocfs2_read_inode_block(inode, &di_bh);
2111e3a93c2dSMark Fasheh if (ret) {
2112e3a93c2dSMark Fasheh mlog_errno(ret);
2113e3a93c2dSMark Fasheh goto out;
2114e3a93c2dSMark Fasheh }
2115e3a93c2dSMark Fasheh di = (struct ocfs2_dinode *)di_bh->b_data;
2116e3a93c2dSMark Fasheh
2117e3a93c2dSMark Fasheh ret = ocfs2_read_dx_root(inode, di, &dx_root_bh);
2118e3a93c2dSMark Fasheh if (ret) {
2119e3a93c2dSMark Fasheh mlog_errno(ret);
2120e3a93c2dSMark Fasheh goto out;
2121e3a93c2dSMark Fasheh }
2122e3a93c2dSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
2123e3a93c2dSMark Fasheh
2124e3a93c2dSMark Fasheh if (le32_to_cpu(dx_root->dr_num_entries) != 2)
2125e3a93c2dSMark Fasheh priv->seen_other = 1;
2126e3a93c2dSMark Fasheh
2127e3a93c2dSMark Fasheh out:
2128e3a93c2dSMark Fasheh brelse(di_bh);
2129e3a93c2dSMark Fasheh brelse(dx_root_bh);
2130e3a93c2dSMark Fasheh return ret;
2131e3a93c2dSMark Fasheh }
2132e3a93c2dSMark Fasheh
2133ccd979bdSMark Fasheh /*
2134ccd979bdSMark Fasheh * routine to check that the specified directory is empty (for rmdir)
21350bfbbf62SMark Fasheh *
21360bfbbf62SMark Fasheh * Returns 1 if dir is empty, zero otherwise.
21379b7895efSMark Fasheh *
2138e3a93c2dSMark Fasheh * XXX: This is a performance problem for unindexed directories.
2139ccd979bdSMark Fasheh */
ocfs2_empty_dir(struct inode * inode)2140ccd979bdSMark Fasheh int ocfs2_empty_dir(struct inode *inode)
2141ccd979bdSMark Fasheh {
21420bfbbf62SMark Fasheh int ret;
21433704412bSAl Viro struct ocfs2_empty_dir_priv priv = {
2144d6394b59SJeff Liu .ctx.actor = ocfs2_empty_dir_filldir,
21453704412bSAl Viro };
2146ccd979bdSMark Fasheh
2147e3a93c2dSMark Fasheh if (ocfs2_dir_indexed(inode)) {
2148e3a93c2dSMark Fasheh ret = ocfs2_empty_dir_dx(inode, &priv);
2149e3a93c2dSMark Fasheh if (ret)
2150e3a93c2dSMark Fasheh mlog_errno(ret);
2151e3a93c2dSMark Fasheh /*
2152e3a93c2dSMark Fasheh * We still run ocfs2_dir_foreach to get the checks
2153e3a93c2dSMark Fasheh * for "." and "..".
2154e3a93c2dSMark Fasheh */
2155e3a93c2dSMark Fasheh }
2156e3a93c2dSMark Fasheh
21573704412bSAl Viro ret = ocfs2_dir_foreach(inode, &priv.ctx);
21580bfbbf62SMark Fasheh if (ret)
21590bfbbf62SMark Fasheh mlog_errno(ret);
21600bfbbf62SMark Fasheh
21610bfbbf62SMark Fasheh if (!priv.seen_dot || !priv.seen_dot_dot) {
2162b0697053SMark Fasheh mlog(ML_ERROR, "bad directory (dir #%llu) - no `.' or `..'\n",
2163b0697053SMark Fasheh (unsigned long long)OCFS2_I(inode)->ip_blkno);
21640bfbbf62SMark Fasheh /*
21650bfbbf62SMark Fasheh * XXX: Is it really safe to allow an unlink to continue?
21660bfbbf62SMark Fasheh */
2167ccd979bdSMark Fasheh return 1;
2168ccd979bdSMark Fasheh }
21690bfbbf62SMark Fasheh
21700bfbbf62SMark Fasheh return !priv.seen_other;
2171ccd979bdSMark Fasheh }
2172ccd979bdSMark Fasheh
217387d35a74SMark Fasheh /*
217487d35a74SMark Fasheh * Fills "." and ".." dirents in a new directory block. Returns dirent for
217587d35a74SMark Fasheh * "..", which might be used during creation of a directory with a trailing
217687d35a74SMark Fasheh * header. It is otherwise safe to ignore the return code.
217787d35a74SMark Fasheh */
ocfs2_fill_initial_dirents(struct inode * inode,struct inode * parent,char * start,unsigned int size)217887d35a74SMark Fasheh static struct ocfs2_dir_entry *ocfs2_fill_initial_dirents(struct inode *inode,
21795b6a3a2bSMark Fasheh struct inode *parent,
218087d35a74SMark Fasheh char *start,
218187d35a74SMark Fasheh unsigned int size)
21825b6a3a2bSMark Fasheh {
21835b6a3a2bSMark Fasheh struct ocfs2_dir_entry *de = (struct ocfs2_dir_entry *)start;
21845b6a3a2bSMark Fasheh
21855b6a3a2bSMark Fasheh de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
21865b6a3a2bSMark Fasheh de->name_len = 1;
21875b6a3a2bSMark Fasheh de->rec_len =
21885b6a3a2bSMark Fasheh cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
21895b6a3a2bSMark Fasheh strcpy(de->name, ".");
21905b6a3a2bSMark Fasheh ocfs2_set_de_type(de, S_IFDIR);
21915b6a3a2bSMark Fasheh
21925b6a3a2bSMark Fasheh de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
21935b6a3a2bSMark Fasheh de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
21945b6a3a2bSMark Fasheh de->rec_len = cpu_to_le16(size - OCFS2_DIR_REC_LEN(1));
21955b6a3a2bSMark Fasheh de->name_len = 2;
21965b6a3a2bSMark Fasheh strcpy(de->name, "..");
21975b6a3a2bSMark Fasheh ocfs2_set_de_type(de, S_IFDIR);
219887d35a74SMark Fasheh
219987d35a74SMark Fasheh return de;
22005b6a3a2bSMark Fasheh }
22015b6a3a2bSMark Fasheh
22025b6a3a2bSMark Fasheh /*
22035b6a3a2bSMark Fasheh * This works together with code in ocfs2_mknod_locked() which sets
22045b6a3a2bSMark Fasheh * the inline-data flag and initializes the inline-data section.
22055b6a3a2bSMark Fasheh */
ocfs2_fill_new_dir_id(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * di_bh)22065b6a3a2bSMark Fasheh static int ocfs2_fill_new_dir_id(struct ocfs2_super *osb,
22075b6a3a2bSMark Fasheh handle_t *handle,
22085b6a3a2bSMark Fasheh struct inode *parent,
22095b6a3a2bSMark Fasheh struct inode *inode,
22105b6a3a2bSMark Fasheh struct buffer_head *di_bh)
22115b6a3a2bSMark Fasheh {
22125b6a3a2bSMark Fasheh int ret;
22135b6a3a2bSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
22145b6a3a2bSMark Fasheh struct ocfs2_inline_data *data = &di->id2.i_data;
22155b6a3a2bSMark Fasheh unsigned int size = le16_to_cpu(data->id_count);
22165b6a3a2bSMark Fasheh
22170cf2f763SJoel Becker ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
22185b6a3a2bSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
22195b6a3a2bSMark Fasheh if (ret) {
22205b6a3a2bSMark Fasheh mlog_errno(ret);
22215b6a3a2bSMark Fasheh goto out;
22225b6a3a2bSMark Fasheh }
22235b6a3a2bSMark Fasheh
22245b6a3a2bSMark Fasheh ocfs2_fill_initial_dirents(inode, parent, data->id_data, size);
22255b6a3a2bSMark Fasheh ocfs2_journal_dirty(handle, di_bh);
22265b6a3a2bSMark Fasheh
22275b6a3a2bSMark Fasheh i_size_write(inode, size);
2228bfe86848SMiklos Szeredi set_nlink(inode, 2);
22295b6a3a2bSMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode);
22305b6a3a2bSMark Fasheh
22315b6a3a2bSMark Fasheh ret = ocfs2_mark_inode_dirty(handle, inode, di_bh);
22325b6a3a2bSMark Fasheh if (ret < 0)
22335b6a3a2bSMark Fasheh mlog_errno(ret);
22345b6a3a2bSMark Fasheh
22355b6a3a2bSMark Fasheh out:
22365b6a3a2bSMark Fasheh return ret;
22375b6a3a2bSMark Fasheh }
22385b6a3a2bSMark Fasheh
ocfs2_fill_new_dir_el(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * fe_bh,struct ocfs2_alloc_context * data_ac,struct buffer_head ** ret_new_bh)22395b6a3a2bSMark Fasheh static int ocfs2_fill_new_dir_el(struct ocfs2_super *osb,
2240316f4b9fSMark Fasheh handle_t *handle,
2241316f4b9fSMark Fasheh struct inode *parent,
2242316f4b9fSMark Fasheh struct inode *inode,
2243316f4b9fSMark Fasheh struct buffer_head *fe_bh,
22449b7895efSMark Fasheh struct ocfs2_alloc_context *data_ac,
22459b7895efSMark Fasheh struct buffer_head **ret_new_bh)
2246316f4b9fSMark Fasheh {
2247316f4b9fSMark Fasheh int status;
224887d35a74SMark Fasheh unsigned int size = osb->sb->s_blocksize;
2249316f4b9fSMark Fasheh struct buffer_head *new_bh = NULL;
225087d35a74SMark Fasheh struct ocfs2_dir_entry *de;
2251316f4b9fSMark Fasheh
2252e7c17e43SMark Fasheh if (ocfs2_new_dir_wants_trailer(inode))
225387d35a74SMark Fasheh size = ocfs2_dir_trailer_blk_off(parent->i_sb);
225487d35a74SMark Fasheh
2255316f4b9fSMark Fasheh status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
2256316f4b9fSMark Fasheh data_ac, NULL, &new_bh);
2257316f4b9fSMark Fasheh if (status < 0) {
2258316f4b9fSMark Fasheh mlog_errno(status);
2259316f4b9fSMark Fasheh goto bail;
2260316f4b9fSMark Fasheh }
2261316f4b9fSMark Fasheh
22628cb471e8SJoel Becker ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2263316f4b9fSMark Fasheh
22640cf2f763SJoel Becker status = ocfs2_journal_access_db(handle, INODE_CACHE(inode), new_bh,
2265316f4b9fSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
2266316f4b9fSMark Fasheh if (status < 0) {
2267316f4b9fSMark Fasheh mlog_errno(status);
2268316f4b9fSMark Fasheh goto bail;
2269316f4b9fSMark Fasheh }
2270316f4b9fSMark Fasheh memset(new_bh->b_data, 0, osb->sb->s_blocksize);
2271316f4b9fSMark Fasheh
227287d35a74SMark Fasheh de = ocfs2_fill_initial_dirents(inode, parent, new_bh->b_data, size);
2273e7c17e43SMark Fasheh if (ocfs2_new_dir_wants_trailer(inode)) {
2274e7c17e43SMark Fasheh int size = le16_to_cpu(de->rec_len);
2275e7c17e43SMark Fasheh
2276e7c17e43SMark Fasheh /*
2277e7c17e43SMark Fasheh * Figure out the size of the hole left over after
2278e7c17e43SMark Fasheh * insertion of '.' and '..'. The trailer wants this
2279e7c17e43SMark Fasheh * information.
2280e7c17e43SMark Fasheh */
2281e7c17e43SMark Fasheh size -= OCFS2_DIR_REC_LEN(2);
2282e7c17e43SMark Fasheh size -= sizeof(struct ocfs2_dir_block_trailer);
2283e7c17e43SMark Fasheh
2284e7c17e43SMark Fasheh ocfs2_init_dir_trailer(inode, new_bh, size);
2285e7c17e43SMark Fasheh }
2286316f4b9fSMark Fasheh
2287ec20cec7SJoel Becker ocfs2_journal_dirty(handle, new_bh);
2288316f4b9fSMark Fasheh
2289316f4b9fSMark Fasheh i_size_write(inode, inode->i_sb->s_blocksize);
2290bfe86848SMiklos Szeredi set_nlink(inode, 2);
2291316f4b9fSMark Fasheh inode->i_blocks = ocfs2_inode_sector_count(inode);
2292316f4b9fSMark Fasheh status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
2293316f4b9fSMark Fasheh if (status < 0) {
2294316f4b9fSMark Fasheh mlog_errno(status);
2295316f4b9fSMark Fasheh goto bail;
2296316f4b9fSMark Fasheh }
2297316f4b9fSMark Fasheh
2298316f4b9fSMark Fasheh status = 0;
22999b7895efSMark Fasheh if (ret_new_bh) {
23009b7895efSMark Fasheh *ret_new_bh = new_bh;
23019b7895efSMark Fasheh new_bh = NULL;
23029b7895efSMark Fasheh }
2303316f4b9fSMark Fasheh bail:
2304316f4b9fSMark Fasheh brelse(new_bh);
2305316f4b9fSMark Fasheh
2306316f4b9fSMark Fasheh return status;
2307316f4b9fSMark Fasheh }
2308316f4b9fSMark Fasheh
ocfs2_dx_dir_attach_index(struct ocfs2_super * osb,handle_t * handle,struct inode * dir,struct buffer_head * di_bh,struct buffer_head * dirdata_bh,struct ocfs2_alloc_context * meta_ac,int dx_inline,u32 num_entries,struct buffer_head ** ret_dx_root_bh)23099b7895efSMark Fasheh static int ocfs2_dx_dir_attach_index(struct ocfs2_super *osb,
23109b7895efSMark Fasheh handle_t *handle, struct inode *dir,
23119b7895efSMark Fasheh struct buffer_head *di_bh,
2312e7c17e43SMark Fasheh struct buffer_head *dirdata_bh,
23139b7895efSMark Fasheh struct ocfs2_alloc_context *meta_ac,
2314e3a93c2dSMark Fasheh int dx_inline, u32 num_entries,
23159b7895efSMark Fasheh struct buffer_head **ret_dx_root_bh)
23169b7895efSMark Fasheh {
23179b7895efSMark Fasheh int ret;
23189b7895efSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
23199b7895efSMark Fasheh u16 dr_suballoc_bit;
23202b6cb576SJoel Becker u64 suballoc_loc, dr_blkno;
23219b7895efSMark Fasheh unsigned int num_bits;
23229b7895efSMark Fasheh struct buffer_head *dx_root_bh = NULL;
23239b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root;
2324e7c17e43SMark Fasheh struct ocfs2_dir_block_trailer *trailer =
2325e7c17e43SMark Fasheh ocfs2_trailer_from_bh(dirdata_bh, dir->i_sb);
23269b7895efSMark Fasheh
23272b6cb576SJoel Becker ret = ocfs2_claim_metadata(handle, meta_ac, 1, &suballoc_loc,
23282b6cb576SJoel Becker &dr_suballoc_bit, &num_bits, &dr_blkno);
23299b7895efSMark Fasheh if (ret) {
23309b7895efSMark Fasheh mlog_errno(ret);
23319b7895efSMark Fasheh goto out;
23329b7895efSMark Fasheh }
23339b7895efSMark Fasheh
2334f1088d47STao Ma trace_ocfs2_dx_dir_attach_index(
23359b7895efSMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno,
23369b7895efSMark Fasheh (unsigned long long)dr_blkno);
23379b7895efSMark Fasheh
23389b7895efSMark Fasheh dx_root_bh = sb_getblk(osb->sb, dr_blkno);
23399b7895efSMark Fasheh if (dx_root_bh == NULL) {
23407391a294SRui Xiang ret = -ENOMEM;
23419b7895efSMark Fasheh goto out;
23429b7895efSMark Fasheh }
23438cb471e8SJoel Becker ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dx_root_bh);
23449b7895efSMark Fasheh
23450cf2f763SJoel Becker ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
23469b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
23479b7895efSMark Fasheh if (ret < 0) {
23489b7895efSMark Fasheh mlog_errno(ret);
23499b7895efSMark Fasheh goto out;
23509b7895efSMark Fasheh }
23519b7895efSMark Fasheh
23529b7895efSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
23539b7895efSMark Fasheh memset(dx_root, 0, osb->sb->s_blocksize);
23549b7895efSMark Fasheh strcpy(dx_root->dr_signature, OCFS2_DX_ROOT_SIGNATURE);
2355b89c5428STiger Yang dx_root->dr_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
23562b6cb576SJoel Becker dx_root->dr_suballoc_loc = cpu_to_le64(suballoc_loc);
23579b7895efSMark Fasheh dx_root->dr_suballoc_bit = cpu_to_le16(dr_suballoc_bit);
23589b7895efSMark Fasheh dx_root->dr_fs_generation = cpu_to_le32(osb->fs_generation);
23599b7895efSMark Fasheh dx_root->dr_blkno = cpu_to_le64(dr_blkno);
23609b7895efSMark Fasheh dx_root->dr_dir_blkno = cpu_to_le64(OCFS2_I(dir)->ip_blkno);
2361e3a93c2dSMark Fasheh dx_root->dr_num_entries = cpu_to_le32(num_entries);
2362e7c17e43SMark Fasheh if (le16_to_cpu(trailer->db_free_rec_len))
2363e7c17e43SMark Fasheh dx_root->dr_free_blk = cpu_to_le64(dirdata_bh->b_blocknr);
2364e7c17e43SMark Fasheh else
2365e7c17e43SMark Fasheh dx_root->dr_free_blk = cpu_to_le64(0);
23664ed8a6bbSMark Fasheh
23674ed8a6bbSMark Fasheh if (dx_inline) {
23684ed8a6bbSMark Fasheh dx_root->dr_flags |= OCFS2_DX_FLAG_INLINE;
23694ed8a6bbSMark Fasheh dx_root->dr_entries.de_count =
23704ed8a6bbSMark Fasheh cpu_to_le16(ocfs2_dx_entries_per_root(osb->sb));
23714ed8a6bbSMark Fasheh } else {
23729b7895efSMark Fasheh dx_root->dr_list.l_count =
23739b7895efSMark Fasheh cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
23744ed8a6bbSMark Fasheh }
2375ec20cec7SJoel Becker ocfs2_journal_dirty(handle, dx_root_bh);
23769b7895efSMark Fasheh
23770cf2f763SJoel Becker ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
23789b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
23799b7895efSMark Fasheh if (ret) {
23809b7895efSMark Fasheh mlog_errno(ret);
23819b7895efSMark Fasheh goto out;
23829b7895efSMark Fasheh }
23839b7895efSMark Fasheh
23849b7895efSMark Fasheh di->i_dx_root = cpu_to_le64(dr_blkno);
23859b7895efSMark Fasheh
23868ac33dc8STao Ma spin_lock(&OCFS2_I(dir)->ip_lock);
23879b7895efSMark Fasheh OCFS2_I(dir)->ip_dyn_features |= OCFS2_INDEXED_DIR_FL;
23889b7895efSMark Fasheh di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
23898ac33dc8STao Ma spin_unlock(&OCFS2_I(dir)->ip_lock);
23909b7895efSMark Fasheh
2391ec20cec7SJoel Becker ocfs2_journal_dirty(handle, di_bh);
23929b7895efSMark Fasheh
23939b7895efSMark Fasheh *ret_dx_root_bh = dx_root_bh;
23949b7895efSMark Fasheh dx_root_bh = NULL;
23959b7895efSMark Fasheh
23969b7895efSMark Fasheh out:
23979b7895efSMark Fasheh brelse(dx_root_bh);
23989b7895efSMark Fasheh return ret;
23999b7895efSMark Fasheh }
24009b7895efSMark Fasheh
ocfs2_dx_dir_format_cluster(struct ocfs2_super * osb,handle_t * handle,struct inode * dir,struct buffer_head ** dx_leaves,int num_dx_leaves,u64 start_blk)24019b7895efSMark Fasheh static int ocfs2_dx_dir_format_cluster(struct ocfs2_super *osb,
24029b7895efSMark Fasheh handle_t *handle, struct inode *dir,
24039b7895efSMark Fasheh struct buffer_head **dx_leaves,
24049b7895efSMark Fasheh int num_dx_leaves, u64 start_blk)
24059b7895efSMark Fasheh {
24069b7895efSMark Fasheh int ret, i;
24079b7895efSMark Fasheh struct ocfs2_dx_leaf *dx_leaf;
24089b7895efSMark Fasheh struct buffer_head *bh;
24099b7895efSMark Fasheh
24109b7895efSMark Fasheh for (i = 0; i < num_dx_leaves; i++) {
24119b7895efSMark Fasheh bh = sb_getblk(osb->sb, start_blk + i);
24129b7895efSMark Fasheh if (bh == NULL) {
24137391a294SRui Xiang ret = -ENOMEM;
24149b7895efSMark Fasheh goto out;
24159b7895efSMark Fasheh }
24169b7895efSMark Fasheh dx_leaves[i] = bh;
24179b7895efSMark Fasheh
24188cb471e8SJoel Becker ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), bh);
24199b7895efSMark Fasheh
24200cf2f763SJoel Becker ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), bh,
24219b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
24229b7895efSMark Fasheh if (ret < 0) {
24239b7895efSMark Fasheh mlog_errno(ret);
24249b7895efSMark Fasheh goto out;
24259b7895efSMark Fasheh }
24269b7895efSMark Fasheh
24279b7895efSMark Fasheh dx_leaf = (struct ocfs2_dx_leaf *) bh->b_data;
24289b7895efSMark Fasheh
24299b7895efSMark Fasheh memset(dx_leaf, 0, osb->sb->s_blocksize);
24309b7895efSMark Fasheh strcpy(dx_leaf->dl_signature, OCFS2_DX_LEAF_SIGNATURE);
24319b7895efSMark Fasheh dx_leaf->dl_fs_generation = cpu_to_le32(osb->fs_generation);
24329b7895efSMark Fasheh dx_leaf->dl_blkno = cpu_to_le64(bh->b_blocknr);
24339b7895efSMark Fasheh dx_leaf->dl_list.de_count =
24349b7895efSMark Fasheh cpu_to_le16(ocfs2_dx_entries_per_leaf(osb->sb));
24359b7895efSMark Fasheh
2436f1088d47STao Ma trace_ocfs2_dx_dir_format_cluster(
24379b7895efSMark Fasheh (unsigned long long)OCFS2_I(dir)->ip_blkno,
24389b7895efSMark Fasheh (unsigned long long)bh->b_blocknr,
24399b7895efSMark Fasheh le16_to_cpu(dx_leaf->dl_list.de_count));
24409b7895efSMark Fasheh
24419b7895efSMark Fasheh ocfs2_journal_dirty(handle, bh);
24429b7895efSMark Fasheh }
24439b7895efSMark Fasheh
24449b7895efSMark Fasheh ret = 0;
24459b7895efSMark Fasheh out:
24469b7895efSMark Fasheh return ret;
24479b7895efSMark Fasheh }
24489b7895efSMark Fasheh
24499b7895efSMark Fasheh /*
24509b7895efSMark Fasheh * Allocates and formats a new cluster for use in an indexed dir
24519b7895efSMark Fasheh * leaf. This version will not do the extent insert, so that it can be
24529b7895efSMark Fasheh * used by operations which need careful ordering.
24539b7895efSMark Fasheh */
__ocfs2_dx_dir_new_cluster(struct inode * dir,u32 cpos,handle_t * handle,struct ocfs2_alloc_context * data_ac,struct buffer_head ** dx_leaves,int num_dx_leaves,u64 * ret_phys_blkno)24549b7895efSMark Fasheh static int __ocfs2_dx_dir_new_cluster(struct inode *dir,
24559b7895efSMark Fasheh u32 cpos, handle_t *handle,
24569b7895efSMark Fasheh struct ocfs2_alloc_context *data_ac,
24579b7895efSMark Fasheh struct buffer_head **dx_leaves,
24589b7895efSMark Fasheh int num_dx_leaves, u64 *ret_phys_blkno)
24599b7895efSMark Fasheh {
24609b7895efSMark Fasheh int ret;
24619b7895efSMark Fasheh u32 phys, num;
24629b7895efSMark Fasheh u64 phys_blkno;
24639b7895efSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
24649b7895efSMark Fasheh
24659b7895efSMark Fasheh /*
24669b7895efSMark Fasheh * XXX: For create, this should claim cluster for the index
24679b7895efSMark Fasheh * *before* the unindexed insert so that we have a better
24689b7895efSMark Fasheh * chance of contiguousness as the directory grows in number
24699b7895efSMark Fasheh * of entries.
24709b7895efSMark Fasheh */
24711ed9b777SJoel Becker ret = __ocfs2_claim_clusters(handle, data_ac, 1, 1, &phys, &num);
24729b7895efSMark Fasheh if (ret) {
24739b7895efSMark Fasheh mlog_errno(ret);
24749b7895efSMark Fasheh goto out;
24759b7895efSMark Fasheh }
24769b7895efSMark Fasheh
24779b7895efSMark Fasheh /*
24789b7895efSMark Fasheh * Format the new cluster first. That way, we're inserting
24799b7895efSMark Fasheh * valid data.
24809b7895efSMark Fasheh */
24819b7895efSMark Fasheh phys_blkno = ocfs2_clusters_to_blocks(osb->sb, phys);
24829b7895efSMark Fasheh ret = ocfs2_dx_dir_format_cluster(osb, handle, dir, dx_leaves,
24839b7895efSMark Fasheh num_dx_leaves, phys_blkno);
24849b7895efSMark Fasheh if (ret) {
24859b7895efSMark Fasheh mlog_errno(ret);
24869b7895efSMark Fasheh goto out;
24879b7895efSMark Fasheh }
24889b7895efSMark Fasheh
24899b7895efSMark Fasheh *ret_phys_blkno = phys_blkno;
24909b7895efSMark Fasheh out:
24919b7895efSMark Fasheh return ret;
24929b7895efSMark Fasheh }
24939b7895efSMark Fasheh
ocfs2_dx_dir_new_cluster(struct inode * dir,struct ocfs2_extent_tree * et,u32 cpos,handle_t * handle,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac,struct buffer_head ** dx_leaves,int num_dx_leaves)24949b7895efSMark Fasheh static int ocfs2_dx_dir_new_cluster(struct inode *dir,
24959b7895efSMark Fasheh struct ocfs2_extent_tree *et,
24969b7895efSMark Fasheh u32 cpos, handle_t *handle,
24979b7895efSMark Fasheh struct ocfs2_alloc_context *data_ac,
24989b7895efSMark Fasheh struct ocfs2_alloc_context *meta_ac,
24999b7895efSMark Fasheh struct buffer_head **dx_leaves,
25009b7895efSMark Fasheh int num_dx_leaves)
25019b7895efSMark Fasheh {
25029b7895efSMark Fasheh int ret;
25039b7895efSMark Fasheh u64 phys_blkno;
25049b7895efSMark Fasheh
25059b7895efSMark Fasheh ret = __ocfs2_dx_dir_new_cluster(dir, cpos, handle, data_ac, dx_leaves,
25069b7895efSMark Fasheh num_dx_leaves, &phys_blkno);
25079b7895efSMark Fasheh if (ret) {
25089b7895efSMark Fasheh mlog_errno(ret);
25099b7895efSMark Fasheh goto out;
25109b7895efSMark Fasheh }
25119b7895efSMark Fasheh
2512cc79d8c1SJoel Becker ret = ocfs2_insert_extent(handle, et, cpos, phys_blkno, 1, 0,
25139b7895efSMark Fasheh meta_ac);
25149b7895efSMark Fasheh if (ret)
25159b7895efSMark Fasheh mlog_errno(ret);
25169b7895efSMark Fasheh out:
25179b7895efSMark Fasheh return ret;
25189b7895efSMark Fasheh }
25199b7895efSMark Fasheh
ocfs2_dx_dir_kmalloc_leaves(struct super_block * sb,int * ret_num_leaves)25209b7895efSMark Fasheh static struct buffer_head **ocfs2_dx_dir_kmalloc_leaves(struct super_block *sb,
25219b7895efSMark Fasheh int *ret_num_leaves)
25229b7895efSMark Fasheh {
25239b7895efSMark Fasheh int num_dx_leaves = ocfs2_clusters_to_blocks(sb, 1);
25249b7895efSMark Fasheh struct buffer_head **dx_leaves;
25259b7895efSMark Fasheh
25269b7895efSMark Fasheh dx_leaves = kcalloc(num_dx_leaves, sizeof(struct buffer_head *),
25279b7895efSMark Fasheh GFP_NOFS);
25289b7895efSMark Fasheh if (dx_leaves && ret_num_leaves)
25299b7895efSMark Fasheh *ret_num_leaves = num_dx_leaves;
25309b7895efSMark Fasheh
25319b7895efSMark Fasheh return dx_leaves;
25329b7895efSMark Fasheh }
25339b7895efSMark Fasheh
ocfs2_fill_new_dir_dx(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * di_bh,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac)25349b7895efSMark Fasheh static int ocfs2_fill_new_dir_dx(struct ocfs2_super *osb,
25359b7895efSMark Fasheh handle_t *handle,
25369b7895efSMark Fasheh struct inode *parent,
25379b7895efSMark Fasheh struct inode *inode,
25389b7895efSMark Fasheh struct buffer_head *di_bh,
25399b7895efSMark Fasheh struct ocfs2_alloc_context *data_ac,
25409b7895efSMark Fasheh struct ocfs2_alloc_context *meta_ac)
25419b7895efSMark Fasheh {
25424ed8a6bbSMark Fasheh int ret;
25439b7895efSMark Fasheh struct buffer_head *leaf_bh = NULL;
25449b7895efSMark Fasheh struct buffer_head *dx_root_bh = NULL;
25459b7895efSMark Fasheh struct ocfs2_dx_hinfo hinfo;
25464ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root;
25474ed8a6bbSMark Fasheh struct ocfs2_dx_entry_list *entry_list;
25489b7895efSMark Fasheh
25499b7895efSMark Fasheh /*
25509b7895efSMark Fasheh * Our strategy is to create the directory as though it were
25519b7895efSMark Fasheh * unindexed, then add the index block. This works with very
25529b7895efSMark Fasheh * little complication since the state of a new directory is a
25539b7895efSMark Fasheh * very well known quantity.
25549b7895efSMark Fasheh *
25559b7895efSMark Fasheh * Essentially, we have two dirents ("." and ".."), in the 1st
25564ed8a6bbSMark Fasheh * block which need indexing. These are easily inserted into
25574ed8a6bbSMark Fasheh * the index block.
25589b7895efSMark Fasheh */
25599b7895efSMark Fasheh
25609b7895efSMark Fasheh ret = ocfs2_fill_new_dir_el(osb, handle, parent, inode, di_bh,
25619b7895efSMark Fasheh data_ac, &leaf_bh);
25629b7895efSMark Fasheh if (ret) {
25639b7895efSMark Fasheh mlog_errno(ret);
25649b7895efSMark Fasheh goto out;
25659b7895efSMark Fasheh }
25669b7895efSMark Fasheh
2567e7c17e43SMark Fasheh ret = ocfs2_dx_dir_attach_index(osb, handle, inode, di_bh, leaf_bh,
2568e3a93c2dSMark Fasheh meta_ac, 1, 2, &dx_root_bh);
25699b7895efSMark Fasheh if (ret) {
25709b7895efSMark Fasheh mlog_errno(ret);
25719b7895efSMark Fasheh goto out;
25729b7895efSMark Fasheh }
25734ed8a6bbSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
25744ed8a6bbSMark Fasheh entry_list = &dx_root->dr_entries;
25759b7895efSMark Fasheh
25764ed8a6bbSMark Fasheh /* Buffer has been journaled for us by ocfs2_dx_dir_attach_index */
2577e7c17e43SMark Fasheh ocfs2_dx_dir_name_hash(inode, ".", 1, &hinfo);
25784ed8a6bbSMark Fasheh ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
25799b7895efSMark Fasheh
25809b7895efSMark Fasheh ocfs2_dx_dir_name_hash(inode, "..", 2, &hinfo);
25814ed8a6bbSMark Fasheh ocfs2_dx_entry_list_insert(entry_list, &hinfo, leaf_bh->b_blocknr);
25829b7895efSMark Fasheh
25839b7895efSMark Fasheh out:
25849b7895efSMark Fasheh brelse(dx_root_bh);
25859b7895efSMark Fasheh brelse(leaf_bh);
25869b7895efSMark Fasheh return ret;
25879b7895efSMark Fasheh }
25889b7895efSMark Fasheh
ocfs2_fill_new_dir(struct ocfs2_super * osb,handle_t * handle,struct inode * parent,struct inode * inode,struct buffer_head * fe_bh,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac)25895b6a3a2bSMark Fasheh int ocfs2_fill_new_dir(struct ocfs2_super *osb,
25905b6a3a2bSMark Fasheh handle_t *handle,
25915b6a3a2bSMark Fasheh struct inode *parent,
25925b6a3a2bSMark Fasheh struct inode *inode,
25935b6a3a2bSMark Fasheh struct buffer_head *fe_bh,
25949b7895efSMark Fasheh struct ocfs2_alloc_context *data_ac,
25959b7895efSMark Fasheh struct ocfs2_alloc_context *meta_ac)
25969b7895efSMark Fasheh
25975b6a3a2bSMark Fasheh {
25985b6a3a2bSMark Fasheh BUG_ON(!ocfs2_supports_inline_data(osb) && data_ac == NULL);
25995b6a3a2bSMark Fasheh
26005b6a3a2bSMark Fasheh if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
26015b6a3a2bSMark Fasheh return ocfs2_fill_new_dir_id(osb, handle, parent, inode, fe_bh);
26025b6a3a2bSMark Fasheh
26039b7895efSMark Fasheh if (ocfs2_supports_indexed_dirs(osb))
26049b7895efSMark Fasheh return ocfs2_fill_new_dir_dx(osb, handle, parent, inode, fe_bh,
26059b7895efSMark Fasheh data_ac, meta_ac);
26069b7895efSMark Fasheh
26075b6a3a2bSMark Fasheh return ocfs2_fill_new_dir_el(osb, handle, parent, inode, fe_bh,
26089b7895efSMark Fasheh data_ac, NULL);
26099b7895efSMark Fasheh }
26109b7895efSMark Fasheh
ocfs2_dx_dir_index_block(struct inode * dir,handle_t * handle,struct buffer_head ** dx_leaves,int num_dx_leaves,u32 * num_dx_entries,struct buffer_head * dirent_bh)26119b7895efSMark Fasheh static int ocfs2_dx_dir_index_block(struct inode *dir,
26129b7895efSMark Fasheh handle_t *handle,
26139b7895efSMark Fasheh struct buffer_head **dx_leaves,
26149b7895efSMark Fasheh int num_dx_leaves,
2615e3a93c2dSMark Fasheh u32 *num_dx_entries,
26169b7895efSMark Fasheh struct buffer_head *dirent_bh)
26179b7895efSMark Fasheh {
26180fba8137STao Ma int ret = 0, namelen, i;
26199b7895efSMark Fasheh char *de_buf, *limit;
26209b7895efSMark Fasheh struct ocfs2_dir_entry *de;
26219b7895efSMark Fasheh struct buffer_head *dx_leaf_bh;
26229b7895efSMark Fasheh struct ocfs2_dx_hinfo hinfo;
26239b7895efSMark Fasheh u64 dirent_blk = dirent_bh->b_blocknr;
26249b7895efSMark Fasheh
26259b7895efSMark Fasheh de_buf = dirent_bh->b_data;
26269b7895efSMark Fasheh limit = de_buf + dir->i_sb->s_blocksize;
26279b7895efSMark Fasheh
26289b7895efSMark Fasheh while (de_buf < limit) {
26299b7895efSMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
26309b7895efSMark Fasheh
26319b7895efSMark Fasheh namelen = de->name_len;
26329b7895efSMark Fasheh if (!namelen || !de->inode)
26339b7895efSMark Fasheh goto inc;
26349b7895efSMark Fasheh
26359b7895efSMark Fasheh ocfs2_dx_dir_name_hash(dir, de->name, namelen, &hinfo);
26369b7895efSMark Fasheh
26379b7895efSMark Fasheh i = ocfs2_dx_dir_hash_idx(OCFS2_SB(dir->i_sb), &hinfo);
26389b7895efSMark Fasheh dx_leaf_bh = dx_leaves[i];
26399b7895efSMark Fasheh
26409b7895efSMark Fasheh ret = __ocfs2_dx_dir_leaf_insert(dir, handle, &hinfo,
26419b7895efSMark Fasheh dirent_blk, dx_leaf_bh);
26429b7895efSMark Fasheh if (ret) {
26439b7895efSMark Fasheh mlog_errno(ret);
26449b7895efSMark Fasheh goto out;
26459b7895efSMark Fasheh }
26469b7895efSMark Fasheh
2647e3a93c2dSMark Fasheh *num_dx_entries = *num_dx_entries + 1;
2648e3a93c2dSMark Fasheh
26499b7895efSMark Fasheh inc:
26509b7895efSMark Fasheh de_buf += le16_to_cpu(de->rec_len);
26519b7895efSMark Fasheh }
26529b7895efSMark Fasheh
26539b7895efSMark Fasheh out:
26549b7895efSMark Fasheh return ret;
26555b6a3a2bSMark Fasheh }
2656e7c17e43SMark Fasheh
26574ed8a6bbSMark Fasheh /*
26584ed8a6bbSMark Fasheh * XXX: This expects dx_root_bh to already be part of the transaction.
26594ed8a6bbSMark Fasheh */
ocfs2_dx_dir_index_root_block(struct inode * dir,struct buffer_head * dx_root_bh,struct buffer_head * dirent_bh)26604ed8a6bbSMark Fasheh static void ocfs2_dx_dir_index_root_block(struct inode *dir,
26614ed8a6bbSMark Fasheh struct buffer_head *dx_root_bh,
26624ed8a6bbSMark Fasheh struct buffer_head *dirent_bh)
26634ed8a6bbSMark Fasheh {
26644ed8a6bbSMark Fasheh char *de_buf, *limit;
26654ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root;
26664ed8a6bbSMark Fasheh struct ocfs2_dir_entry *de;
26674ed8a6bbSMark Fasheh struct ocfs2_dx_hinfo hinfo;
26684ed8a6bbSMark Fasheh u64 dirent_blk = dirent_bh->b_blocknr;
26694ed8a6bbSMark Fasheh
26704ed8a6bbSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
26714ed8a6bbSMark Fasheh
26724ed8a6bbSMark Fasheh de_buf = dirent_bh->b_data;
26734ed8a6bbSMark Fasheh limit = de_buf + dir->i_sb->s_blocksize;
26744ed8a6bbSMark Fasheh
26754ed8a6bbSMark Fasheh while (de_buf < limit) {
26764ed8a6bbSMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
26774ed8a6bbSMark Fasheh
26784ed8a6bbSMark Fasheh if (!de->name_len || !de->inode)
26794ed8a6bbSMark Fasheh goto inc;
26804ed8a6bbSMark Fasheh
26814ed8a6bbSMark Fasheh ocfs2_dx_dir_name_hash(dir, de->name, de->name_len, &hinfo);
26824ed8a6bbSMark Fasheh
2683f1088d47STao Ma trace_ocfs2_dx_dir_index_root_block(
2684f1088d47STao Ma (unsigned long long)dir->i_ino,
2685f1088d47STao Ma hinfo.major_hash, hinfo.minor_hash,
2686f1088d47STao Ma de->name_len, de->name,
2687f1088d47STao Ma le16_to_cpu(dx_root->dr_entries.de_num_used));
26884ed8a6bbSMark Fasheh
26894ed8a6bbSMark Fasheh ocfs2_dx_entry_list_insert(&dx_root->dr_entries, &hinfo,
26904ed8a6bbSMark Fasheh dirent_blk);
2691e3a93c2dSMark Fasheh
2692e3a93c2dSMark Fasheh le32_add_cpu(&dx_root->dr_num_entries, 1);
26934ed8a6bbSMark Fasheh inc:
26944ed8a6bbSMark Fasheh de_buf += le16_to_cpu(de->rec_len);
26954ed8a6bbSMark Fasheh }
26964ed8a6bbSMark Fasheh }
26974ed8a6bbSMark Fasheh
26984ed8a6bbSMark Fasheh /*
26994ed8a6bbSMark Fasheh * Count the number of inline directory entries in di_bh and compare
27004ed8a6bbSMark Fasheh * them against the number of entries we can hold in an inline dx root
27014ed8a6bbSMark Fasheh * block.
27024ed8a6bbSMark Fasheh */
ocfs2_new_dx_should_be_inline(struct inode * dir,struct buffer_head * di_bh)27034ed8a6bbSMark Fasheh static int ocfs2_new_dx_should_be_inline(struct inode *dir,
27044ed8a6bbSMark Fasheh struct buffer_head *di_bh)
27054ed8a6bbSMark Fasheh {
27064ed8a6bbSMark Fasheh int dirent_count = 0;
27074ed8a6bbSMark Fasheh char *de_buf, *limit;
27084ed8a6bbSMark Fasheh struct ocfs2_dir_entry *de;
27094ed8a6bbSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
27104ed8a6bbSMark Fasheh
27114ed8a6bbSMark Fasheh de_buf = di->id2.i_data.id_data;
27124ed8a6bbSMark Fasheh limit = de_buf + i_size_read(dir);
27134ed8a6bbSMark Fasheh
27144ed8a6bbSMark Fasheh while (de_buf < limit) {
27154ed8a6bbSMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
27164ed8a6bbSMark Fasheh
27174ed8a6bbSMark Fasheh if (de->name_len && de->inode)
27184ed8a6bbSMark Fasheh dirent_count++;
27194ed8a6bbSMark Fasheh
27204ed8a6bbSMark Fasheh de_buf += le16_to_cpu(de->rec_len);
27214ed8a6bbSMark Fasheh }
27224ed8a6bbSMark Fasheh
27234ed8a6bbSMark Fasheh /* We are careful to leave room for one extra record. */
27244ed8a6bbSMark Fasheh return dirent_count < ocfs2_dx_entries_per_root(dir->i_sb);
27254ed8a6bbSMark Fasheh }
27265b6a3a2bSMark Fasheh
272787d35a74SMark Fasheh /*
272887d35a74SMark Fasheh * Expand rec_len of the rightmost dirent in a directory block so that it
272987d35a74SMark Fasheh * contains the end of our valid space for dirents. We do this during
273087d35a74SMark Fasheh * expansion from an inline directory to one with extents. The first dir block
273187d35a74SMark Fasheh * in that case is taken from the inline data portion of the inode block.
273287d35a74SMark Fasheh *
2733e7c17e43SMark Fasheh * This will also return the largest amount of contiguous space for a dirent
2734e7c17e43SMark Fasheh * in the block. That value is *not* necessarily the last dirent, even after
2735e7c17e43SMark Fasheh * expansion. The directory indexing code wants this value for free space
2736e7c17e43SMark Fasheh * accounting. We do this here since we're already walking the entire dir
2737e7c17e43SMark Fasheh * block.
2738e7c17e43SMark Fasheh *
273987d35a74SMark Fasheh * We add the dir trailer if this filesystem wants it.
274087d35a74SMark Fasheh */
ocfs2_expand_last_dirent(char * start,unsigned int old_size,struct inode * dir)2741e7c17e43SMark Fasheh static unsigned int ocfs2_expand_last_dirent(char *start, unsigned int old_size,
2742e7c17e43SMark Fasheh struct inode *dir)
27435b6a3a2bSMark Fasheh {
2744e7c17e43SMark Fasheh struct super_block *sb = dir->i_sb;
27455b6a3a2bSMark Fasheh struct ocfs2_dir_entry *de;
27465b6a3a2bSMark Fasheh struct ocfs2_dir_entry *prev_de;
27475b6a3a2bSMark Fasheh char *de_buf, *limit;
274887d35a74SMark Fasheh unsigned int new_size = sb->s_blocksize;
2749e7c17e43SMark Fasheh unsigned int bytes, this_hole;
2750e7c17e43SMark Fasheh unsigned int largest_hole = 0;
275187d35a74SMark Fasheh
2752e7c17e43SMark Fasheh if (ocfs2_new_dir_wants_trailer(dir))
275387d35a74SMark Fasheh new_size = ocfs2_dir_trailer_blk_off(sb);
275487d35a74SMark Fasheh
275587d35a74SMark Fasheh bytes = new_size - old_size;
27565b6a3a2bSMark Fasheh
27575b6a3a2bSMark Fasheh limit = start + old_size;
27585b6a3a2bSMark Fasheh de_buf = start;
27595b6a3a2bSMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
27605b6a3a2bSMark Fasheh do {
2761e7c17e43SMark Fasheh this_hole = ocfs2_figure_dirent_hole(de);
2762e7c17e43SMark Fasheh if (this_hole > largest_hole)
2763e7c17e43SMark Fasheh largest_hole = this_hole;
2764e7c17e43SMark Fasheh
27655b6a3a2bSMark Fasheh prev_de = de;
27665b6a3a2bSMark Fasheh de_buf += le16_to_cpu(de->rec_len);
27675b6a3a2bSMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
27685b6a3a2bSMark Fasheh } while (de_buf < limit);
27695b6a3a2bSMark Fasheh
27705b6a3a2bSMark Fasheh le16_add_cpu(&prev_de->rec_len, bytes);
2771e7c17e43SMark Fasheh
2772e7c17e43SMark Fasheh /* We need to double check this after modification of the final
2773e7c17e43SMark Fasheh * dirent. */
2774e7c17e43SMark Fasheh this_hole = ocfs2_figure_dirent_hole(prev_de);
2775e7c17e43SMark Fasheh if (this_hole > largest_hole)
2776e7c17e43SMark Fasheh largest_hole = this_hole;
2777e7c17e43SMark Fasheh
2778e7c17e43SMark Fasheh if (largest_hole >= OCFS2_DIR_MIN_REC_LEN)
2779e7c17e43SMark Fasheh return largest_hole;
2780e7c17e43SMark Fasheh return 0;
27815b6a3a2bSMark Fasheh }
27825b6a3a2bSMark Fasheh
27835b6a3a2bSMark Fasheh /*
27845b6a3a2bSMark Fasheh * We allocate enough clusters to fulfill "blocks_wanted", but set
27855b6a3a2bSMark Fasheh * i_size to exactly one block. Ocfs2_extend_dir() will handle the
27865b6a3a2bSMark Fasheh * rest automatically for us.
27875b6a3a2bSMark Fasheh *
27885b6a3a2bSMark Fasheh * *first_block_bh is a pointer to the 1st data block allocated to the
27895b6a3a2bSMark Fasheh * directory.
27905b6a3a2bSMark Fasheh */
ocfs2_expand_inline_dir(struct inode * dir,struct buffer_head * di_bh,unsigned int blocks_wanted,struct ocfs2_dir_lookup_result * lookup,struct buffer_head ** first_block_bh)27915b6a3a2bSMark Fasheh static int ocfs2_expand_inline_dir(struct inode *dir, struct buffer_head *di_bh,
27925b6a3a2bSMark Fasheh unsigned int blocks_wanted,
27939b7895efSMark Fasheh struct ocfs2_dir_lookup_result *lookup,
27945b6a3a2bSMark Fasheh struct buffer_head **first_block_bh)
27955b6a3a2bSMark Fasheh {
2796e3a93c2dSMark Fasheh u32 alloc, dx_alloc, bit_off, len, num_dx_entries = 0;
27975b6a3a2bSMark Fasheh struct super_block *sb = dir->i_sb;
27984ed8a6bbSMark Fasheh int ret, i, num_dx_leaves = 0, dx_inline = 0,
27999b7895efSMark Fasheh credits = ocfs2_inline_to_extents_credits(sb);
28009b7895efSMark Fasheh u64 dx_insert_blkno, blkno,
28019b7895efSMark Fasheh bytes = blocks_wanted << sb->s_blocksize_bits;
28025b6a3a2bSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
28035b6a3a2bSMark Fasheh struct ocfs2_inode_info *oi = OCFS2_I(dir);
28045d44670fSMarcus Meissner struct ocfs2_alloc_context *data_ac = NULL;
28059b7895efSMark Fasheh struct ocfs2_alloc_context *meta_ac = NULL;
28065b6a3a2bSMark Fasheh struct buffer_head *dirdata_bh = NULL;
28079b7895efSMark Fasheh struct buffer_head *dx_root_bh = NULL;
28089b7895efSMark Fasheh struct buffer_head **dx_leaves = NULL;
28095b6a3a2bSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
28105b6a3a2bSMark Fasheh handle_t *handle;
2811f99b9b7cSJoel Becker struct ocfs2_extent_tree et;
28129b7895efSMark Fasheh struct ocfs2_extent_tree dx_et;
28139b7895efSMark Fasheh int did_quota = 0, bytes_allocated = 0;
2814f99b9b7cSJoel Becker
28155e404e9eSJoel Becker ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir), di_bh);
28165b6a3a2bSMark Fasheh
28175b6a3a2bSMark Fasheh alloc = ocfs2_clusters_for_bytes(sb, bytes);
28189b7895efSMark Fasheh dx_alloc = 0;
28199b7895efSMark Fasheh
2820edd45c08SJan Kara down_write(&oi->ip_alloc_sem);
2821edd45c08SJan Kara
28229b7895efSMark Fasheh if (ocfs2_supports_indexed_dirs(osb)) {
28239b7895efSMark Fasheh credits += ocfs2_add_dir_index_credits(sb);
28249b7895efSMark Fasheh
28254ed8a6bbSMark Fasheh dx_inline = ocfs2_new_dx_should_be_inline(dir, di_bh);
28264ed8a6bbSMark Fasheh if (!dx_inline) {
28274ed8a6bbSMark Fasheh /* Add one more cluster for an index leaf */
28284ed8a6bbSMark Fasheh dx_alloc++;
28294ed8a6bbSMark Fasheh dx_leaves = ocfs2_dx_dir_kmalloc_leaves(sb,
28304ed8a6bbSMark Fasheh &num_dx_leaves);
28319b7895efSMark Fasheh if (!dx_leaves) {
28329b7895efSMark Fasheh ret = -ENOMEM;
28339b7895efSMark Fasheh mlog_errno(ret);
28349b7895efSMark Fasheh goto out;
28359b7895efSMark Fasheh }
28364ed8a6bbSMark Fasheh }
28379b7895efSMark Fasheh
28389b7895efSMark Fasheh /* This gets us the dx_root */
28399b7895efSMark Fasheh ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
28409b7895efSMark Fasheh if (ret) {
28419b7895efSMark Fasheh mlog_errno(ret);
28429b7895efSMark Fasheh goto out;
28439b7895efSMark Fasheh }
28449b7895efSMark Fasheh }
28455b6a3a2bSMark Fasheh
28465b6a3a2bSMark Fasheh /*
28479b7895efSMark Fasheh * We should never need more than 2 clusters for the unindexed
28489b7895efSMark Fasheh * tree - maximum dirent size is far less than one block. In
28499b7895efSMark Fasheh * fact, the only time we'd need more than one cluster is if
28505b6a3a2bSMark Fasheh * blocksize == clustersize and the dirent won't fit in the
28515b6a3a2bSMark Fasheh * extra space that the expansion to a single block gives. As
28525b6a3a2bSMark Fasheh * of today, that only happens on 4k/4k file systems.
28535b6a3a2bSMark Fasheh */
28545b6a3a2bSMark Fasheh BUG_ON(alloc > 2);
28555b6a3a2bSMark Fasheh
2856035a5711STao Ma ret = ocfs2_reserve_clusters(osb, alloc + dx_alloc, &data_ac);
28575b6a3a2bSMark Fasheh if (ret) {
28585b6a3a2bSMark Fasheh mlog_errno(ret);
28595b6a3a2bSMark Fasheh goto out;
28605b6a3a2bSMark Fasheh }
28615b6a3a2bSMark Fasheh
28625b6a3a2bSMark Fasheh /*
2863c78bad11SJoe Perches * Prepare for worst case allocation scenario of two separate
28649b7895efSMark Fasheh * extents in the unindexed tree.
28655b6a3a2bSMark Fasheh */
28665b6a3a2bSMark Fasheh if (alloc == 2)
28675b6a3a2bSMark Fasheh credits += OCFS2_SUBALLOC_ALLOC;
28685b6a3a2bSMark Fasheh
28695b6a3a2bSMark Fasheh handle = ocfs2_start_trans(osb, credits);
28705b6a3a2bSMark Fasheh if (IS_ERR(handle)) {
28715b6a3a2bSMark Fasheh ret = PTR_ERR(handle);
28725b6a3a2bSMark Fasheh mlog_errno(ret);
2873edd45c08SJan Kara goto out;
28745b6a3a2bSMark Fasheh }
28755b6a3a2bSMark Fasheh
28765dd4056dSChristoph Hellwig ret = dquot_alloc_space_nodirty(dir,
28775dd4056dSChristoph Hellwig ocfs2_clusters_to_bytes(osb->sb, alloc + dx_alloc));
28785dd4056dSChristoph Hellwig if (ret)
2879a90714c1SJan Kara goto out_commit;
2880a90714c1SJan Kara did_quota = 1;
28819b7895efSMark Fasheh
28824ed8a6bbSMark Fasheh if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
28839b7895efSMark Fasheh /*
28849b7895efSMark Fasheh * Allocate our index cluster first, to maximize the
28859b7895efSMark Fasheh * possibility that unindexed leaves grow
28869b7895efSMark Fasheh * contiguously.
28879b7895efSMark Fasheh */
28889b7895efSMark Fasheh ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac,
28899b7895efSMark Fasheh dx_leaves, num_dx_leaves,
28909b7895efSMark Fasheh &dx_insert_blkno);
28919b7895efSMark Fasheh if (ret) {
28929b7895efSMark Fasheh mlog_errno(ret);
28939b7895efSMark Fasheh goto out_commit;
28949b7895efSMark Fasheh }
28959b7895efSMark Fasheh bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
28969b7895efSMark Fasheh }
28979b7895efSMark Fasheh
28985b6a3a2bSMark Fasheh /*
28995b6a3a2bSMark Fasheh * Try to claim as many clusters as the bitmap can give though
29005b6a3a2bSMark Fasheh * if we only get one now, that's enough to continue. The rest
29015b6a3a2bSMark Fasheh * will be claimed after the conversion to extents.
29025b6a3a2bSMark Fasheh */
290383f92318SMark Fasheh if (ocfs2_dir_resv_allowed(osb))
2904e3b4a97dSMark Fasheh data_ac->ac_resv = &oi->ip_la_data_resv;
29051ed9b777SJoel Becker ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off, &len);
29065b6a3a2bSMark Fasheh if (ret) {
29075b6a3a2bSMark Fasheh mlog_errno(ret);
29085b6a3a2bSMark Fasheh goto out_commit;
29095b6a3a2bSMark Fasheh }
29109b7895efSMark Fasheh bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
29115b6a3a2bSMark Fasheh
29125b6a3a2bSMark Fasheh /*
29135b6a3a2bSMark Fasheh * Operations are carefully ordered so that we set up the new
29145b6a3a2bSMark Fasheh * data block first. The conversion from inline data to
29155b6a3a2bSMark Fasheh * extents follows.
29165b6a3a2bSMark Fasheh */
29175b6a3a2bSMark Fasheh blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
29185b6a3a2bSMark Fasheh dirdata_bh = sb_getblk(sb, blkno);
29195b6a3a2bSMark Fasheh if (!dirdata_bh) {
29207391a294SRui Xiang ret = -ENOMEM;
29215b6a3a2bSMark Fasheh mlog_errno(ret);
29225b6a3a2bSMark Fasheh goto out_commit;
29235b6a3a2bSMark Fasheh }
29245b6a3a2bSMark Fasheh
29258cb471e8SJoel Becker ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), dirdata_bh);
29265b6a3a2bSMark Fasheh
29270cf2f763SJoel Becker ret = ocfs2_journal_access_db(handle, INODE_CACHE(dir), dirdata_bh,
29285b6a3a2bSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
29295b6a3a2bSMark Fasheh if (ret) {
29305b6a3a2bSMark Fasheh mlog_errno(ret);
29315b6a3a2bSMark Fasheh goto out_commit;
29325b6a3a2bSMark Fasheh }
29335b6a3a2bSMark Fasheh
29345b6a3a2bSMark Fasheh memcpy(dirdata_bh->b_data, di->id2.i_data.id_data, i_size_read(dir));
29355b6a3a2bSMark Fasheh memset(dirdata_bh->b_data + i_size_read(dir), 0,
29365b6a3a2bSMark Fasheh sb->s_blocksize - i_size_read(dir));
2937e7c17e43SMark Fasheh i = ocfs2_expand_last_dirent(dirdata_bh->b_data, i_size_read(dir), dir);
2938e7c17e43SMark Fasheh if (ocfs2_new_dir_wants_trailer(dir)) {
2939e7c17e43SMark Fasheh /*
2940e7c17e43SMark Fasheh * Prepare the dir trailer up front. It will otherwise look
2941e7c17e43SMark Fasheh * like a valid dirent. Even if inserting the index fails
2942e7c17e43SMark Fasheh * (unlikely), then all we'll have done is given first dir
2943e7c17e43SMark Fasheh * block a small amount of fragmentation.
2944e7c17e43SMark Fasheh */
2945e7c17e43SMark Fasheh ocfs2_init_dir_trailer(dir, dirdata_bh, i);
2946e7c17e43SMark Fasheh }
29475b6a3a2bSMark Fasheh
29482931cdcbSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, dir, 1);
2949ec20cec7SJoel Becker ocfs2_journal_dirty(handle, dirdata_bh);
29505b6a3a2bSMark Fasheh
29514ed8a6bbSMark Fasheh if (ocfs2_supports_indexed_dirs(osb) && !dx_inline) {
29524ed8a6bbSMark Fasheh /*
29534ed8a6bbSMark Fasheh * Dx dirs with an external cluster need to do this up
29544ed8a6bbSMark Fasheh * front. Inline dx root's get handled later, after
2955e3a93c2dSMark Fasheh * we've allocated our root block. We get passed back
2956e3a93c2dSMark Fasheh * a total number of items so that dr_num_entries can
2957e3a93c2dSMark Fasheh * be correctly set once the dx_root has been
2958e3a93c2dSMark Fasheh * allocated.
29594ed8a6bbSMark Fasheh */
29609b7895efSMark Fasheh ret = ocfs2_dx_dir_index_block(dir, handle, dx_leaves,
2961e3a93c2dSMark Fasheh num_dx_leaves, &num_dx_entries,
2962e3a93c2dSMark Fasheh dirdata_bh);
29639b7895efSMark Fasheh if (ret) {
29649b7895efSMark Fasheh mlog_errno(ret);
29659b7895efSMark Fasheh goto out_commit;
29669b7895efSMark Fasheh }
29679b7895efSMark Fasheh }
29689b7895efSMark Fasheh
29695b6a3a2bSMark Fasheh /*
29705b6a3a2bSMark Fasheh * Set extent, i_size, etc on the directory. After this, the
29715b6a3a2bSMark Fasheh * inode should contain the same exact dirents as before and
29725b6a3a2bSMark Fasheh * be fully accessible from system calls.
29735b6a3a2bSMark Fasheh *
29745b6a3a2bSMark Fasheh * We let the later dirent insert modify c/mtime - to the user
29755b6a3a2bSMark Fasheh * the data hasn't changed.
29765b6a3a2bSMark Fasheh */
29770cf2f763SJoel Becker ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
29785b6a3a2bSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
29795b6a3a2bSMark Fasheh if (ret) {
29805b6a3a2bSMark Fasheh mlog_errno(ret);
29815b6a3a2bSMark Fasheh goto out_commit;
29825b6a3a2bSMark Fasheh }
29835b6a3a2bSMark Fasheh
29845b6a3a2bSMark Fasheh spin_lock(&oi->ip_lock);
29855b6a3a2bSMark Fasheh oi->ip_dyn_features &= ~OCFS2_INLINE_DATA_FL;
29865b6a3a2bSMark Fasheh di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
29875b6a3a2bSMark Fasheh spin_unlock(&oi->ip_lock);
29885b6a3a2bSMark Fasheh
29895b6a3a2bSMark Fasheh ocfs2_dinode_new_extent_list(dir, di);
29905b6a3a2bSMark Fasheh
29915b6a3a2bSMark Fasheh i_size_write(dir, sb->s_blocksize);
299210fc3a18SJeff Layton inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir));
29935b6a3a2bSMark Fasheh
29945b6a3a2bSMark Fasheh di->i_size = cpu_to_le64(sb->s_blocksize);
299510fc3a18SJeff Layton di->i_ctime = di->i_mtime = cpu_to_le64(inode_get_ctime_sec(dir));
299610fc3a18SJeff Layton di->i_ctime_nsec = di->i_mtime_nsec = cpu_to_le32(inode_get_ctime_nsec(dir));
29976fdb702dSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, dir, 1);
29985b6a3a2bSMark Fasheh
29995b6a3a2bSMark Fasheh /*
30005b6a3a2bSMark Fasheh * This should never fail as our extent list is empty and all
30015b6a3a2bSMark Fasheh * related blocks have been journaled already.
30025b6a3a2bSMark Fasheh */
3003cc79d8c1SJoel Becker ret = ocfs2_insert_extent(handle, &et, 0, blkno, len,
3004f99b9b7cSJoel Becker 0, NULL);
30055b6a3a2bSMark Fasheh if (ret) {
30065b6a3a2bSMark Fasheh mlog_errno(ret);
300783cab533STao Ma goto out_commit;
30085b6a3a2bSMark Fasheh }
30095b6a3a2bSMark Fasheh
30109780eb6cSMark Fasheh /*
30119780eb6cSMark Fasheh * Set i_blocks after the extent insert for the most up to
30129780eb6cSMark Fasheh * date ip_clusters value.
30139780eb6cSMark Fasheh */
30149780eb6cSMark Fasheh dir->i_blocks = ocfs2_inode_sector_count(dir);
30159780eb6cSMark Fasheh
3016ec20cec7SJoel Becker ocfs2_journal_dirty(handle, di_bh);
30175b6a3a2bSMark Fasheh
30189b7895efSMark Fasheh if (ocfs2_supports_indexed_dirs(osb)) {
30199b7895efSMark Fasheh ret = ocfs2_dx_dir_attach_index(osb, handle, dir, di_bh,
3020e7c17e43SMark Fasheh dirdata_bh, meta_ac, dx_inline,
3021e3a93c2dSMark Fasheh num_dx_entries, &dx_root_bh);
30229b7895efSMark Fasheh if (ret) {
30239b7895efSMark Fasheh mlog_errno(ret);
30249b7895efSMark Fasheh goto out_commit;
30259b7895efSMark Fasheh }
30269b7895efSMark Fasheh
30274ed8a6bbSMark Fasheh if (dx_inline) {
30284ed8a6bbSMark Fasheh ocfs2_dx_dir_index_root_block(dir, dx_root_bh,
30294ed8a6bbSMark Fasheh dirdata_bh);
30304ed8a6bbSMark Fasheh } else {
30315e404e9eSJoel Becker ocfs2_init_dx_root_extent_tree(&dx_et,
30325e404e9eSJoel Becker INODE_CACHE(dir),
30335e404e9eSJoel Becker dx_root_bh);
3034cc79d8c1SJoel Becker ret = ocfs2_insert_extent(handle, &dx_et, 0,
30359b7895efSMark Fasheh dx_insert_blkno, 1, 0, NULL);
30369b7895efSMark Fasheh if (ret)
30379b7895efSMark Fasheh mlog_errno(ret);
30389b7895efSMark Fasheh }
30394ed8a6bbSMark Fasheh }
30409b7895efSMark Fasheh
30415b6a3a2bSMark Fasheh /*
30425b6a3a2bSMark Fasheh * We asked for two clusters, but only got one in the 1st
30435b6a3a2bSMark Fasheh * pass. Claim the 2nd cluster as a separate extent.
30445b6a3a2bSMark Fasheh */
30455b6a3a2bSMark Fasheh if (alloc > len) {
30461ed9b777SJoel Becker ret = ocfs2_claim_clusters(handle, data_ac, 1, &bit_off,
30475b6a3a2bSMark Fasheh &len);
30485b6a3a2bSMark Fasheh if (ret) {
30495b6a3a2bSMark Fasheh mlog_errno(ret);
30505b6a3a2bSMark Fasheh goto out_commit;
30515b6a3a2bSMark Fasheh }
30525b6a3a2bSMark Fasheh blkno = ocfs2_clusters_to_blocks(dir->i_sb, bit_off);
30535b6a3a2bSMark Fasheh
3054cc79d8c1SJoel Becker ret = ocfs2_insert_extent(handle, &et, 1,
3055f56654c4STao Ma blkno, len, 0, NULL);
30565b6a3a2bSMark Fasheh if (ret) {
30575b6a3a2bSMark Fasheh mlog_errno(ret);
305883cab533STao Ma goto out_commit;
30595b6a3a2bSMark Fasheh }
30609b7895efSMark Fasheh bytes_allocated += ocfs2_clusters_to_bytes(dir->i_sb, 1);
30615b6a3a2bSMark Fasheh }
30625b6a3a2bSMark Fasheh
30635b6a3a2bSMark Fasheh *first_block_bh = dirdata_bh;
30645b6a3a2bSMark Fasheh dirdata_bh = NULL;
30659b7895efSMark Fasheh if (ocfs2_supports_indexed_dirs(osb)) {
30669b7895efSMark Fasheh unsigned int off;
30679b7895efSMark Fasheh
30684ed8a6bbSMark Fasheh if (!dx_inline) {
30699b7895efSMark Fasheh /*
30709b7895efSMark Fasheh * We need to return the correct block within the
30719b7895efSMark Fasheh * cluster which should hold our entry.
30729b7895efSMark Fasheh */
30731119d3c0Spiaojun off = ocfs2_dx_dir_hash_idx(osb,
30749b7895efSMark Fasheh &lookup->dl_hinfo);
30759b7895efSMark Fasheh get_bh(dx_leaves[off]);
30769b7895efSMark Fasheh lookup->dl_dx_leaf_bh = dx_leaves[off];
30779b7895efSMark Fasheh }
30784ed8a6bbSMark Fasheh lookup->dl_dx_root_bh = dx_root_bh;
30794ed8a6bbSMark Fasheh dx_root_bh = NULL;
30804ed8a6bbSMark Fasheh }
30815b6a3a2bSMark Fasheh
30825b6a3a2bSMark Fasheh out_commit:
3083a90714c1SJan Kara if (ret < 0 && did_quota)
30845dd4056dSChristoph Hellwig dquot_free_space_nodirty(dir, bytes_allocated);
30859b7895efSMark Fasheh
30865b6a3a2bSMark Fasheh ocfs2_commit_trans(osb, handle);
30875b6a3a2bSMark Fasheh
30885b6a3a2bSMark Fasheh out:
3089edd45c08SJan Kara up_write(&oi->ip_alloc_sem);
30905b6a3a2bSMark Fasheh if (data_ac)
30915b6a3a2bSMark Fasheh ocfs2_free_alloc_context(data_ac);
30929b7895efSMark Fasheh if (meta_ac)
30939b7895efSMark Fasheh ocfs2_free_alloc_context(meta_ac);
30949b7895efSMark Fasheh
30959b7895efSMark Fasheh if (dx_leaves) {
30969b7895efSMark Fasheh for (i = 0; i < num_dx_leaves; i++)
30979b7895efSMark Fasheh brelse(dx_leaves[i]);
30989b7895efSMark Fasheh kfree(dx_leaves);
30999b7895efSMark Fasheh }
31005b6a3a2bSMark Fasheh
31015b6a3a2bSMark Fasheh brelse(dirdata_bh);
31029b7895efSMark Fasheh brelse(dx_root_bh);
31035b6a3a2bSMark Fasheh
31045b6a3a2bSMark Fasheh return ret;
31055b6a3a2bSMark Fasheh }
31065b6a3a2bSMark Fasheh
3107ccd979bdSMark Fasheh /* returns a bh of the 1st new block in the allocation. */
ocfs2_do_extend_dir(struct super_block * sb,handle_t * handle,struct inode * dir,struct buffer_head * parent_fe_bh,struct ocfs2_alloc_context * data_ac,struct ocfs2_alloc_context * meta_ac,struct buffer_head ** new_bh)3108316f4b9fSMark Fasheh static int ocfs2_do_extend_dir(struct super_block *sb,
31091fabe148SMark Fasheh handle_t *handle,
3110ccd979bdSMark Fasheh struct inode *dir,
3111ccd979bdSMark Fasheh struct buffer_head *parent_fe_bh,
3112ccd979bdSMark Fasheh struct ocfs2_alloc_context *data_ac,
3113ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac,
3114ccd979bdSMark Fasheh struct buffer_head **new_bh)
3115ccd979bdSMark Fasheh {
3116ccd979bdSMark Fasheh int status;
3117a90714c1SJan Kara int extend, did_quota = 0;
31188110b073SMark Fasheh u64 p_blkno, v_blkno;
3119ccd979bdSMark Fasheh
3120ccd979bdSMark Fasheh spin_lock(&OCFS2_I(dir)->ip_lock);
3121ccd979bdSMark Fasheh extend = (i_size_read(dir) == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters));
3122ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(dir)->ip_lock);
3123ccd979bdSMark Fasheh
3124ccd979bdSMark Fasheh if (extend) {
3125dcd0538fSMark Fasheh u32 offset = OCFS2_I(dir)->ip_clusters;
3126dcd0538fSMark Fasheh
31275dd4056dSChristoph Hellwig status = dquot_alloc_space_nodirty(dir,
31285dd4056dSChristoph Hellwig ocfs2_clusters_to_bytes(sb, 1));
31295dd4056dSChristoph Hellwig if (status)
3130a90714c1SJan Kara goto bail;
3131a90714c1SJan Kara did_quota = 1;
3132a90714c1SJan Kara
31330eb8d47eSTao Ma status = ocfs2_add_inode_data(OCFS2_SB(sb), dir, &offset,
31342ae99a60SMark Fasheh 1, 0, parent_fe_bh, handle,
3135ccd979bdSMark Fasheh data_ac, meta_ac, NULL);
3136ccd979bdSMark Fasheh BUG_ON(status == -EAGAIN);
3137ccd979bdSMark Fasheh if (status < 0) {
3138ccd979bdSMark Fasheh mlog_errno(status);
3139ccd979bdSMark Fasheh goto bail;
3140ccd979bdSMark Fasheh }
3141ccd979bdSMark Fasheh }
3142ccd979bdSMark Fasheh
31438110b073SMark Fasheh v_blkno = ocfs2_blocks_for_bytes(sb, i_size_read(dir));
31448110b073SMark Fasheh status = ocfs2_extent_map_get_blocks(dir, v_blkno, &p_blkno, NULL, NULL);
3145ccd979bdSMark Fasheh if (status < 0) {
3146ccd979bdSMark Fasheh mlog_errno(status);
3147ccd979bdSMark Fasheh goto bail;
3148ccd979bdSMark Fasheh }
3149ccd979bdSMark Fasheh
3150ccd979bdSMark Fasheh *new_bh = sb_getblk(sb, p_blkno);
3151ccd979bdSMark Fasheh if (!*new_bh) {
31527391a294SRui Xiang status = -ENOMEM;
3153ccd979bdSMark Fasheh mlog_errno(status);
3154ccd979bdSMark Fasheh goto bail;
3155ccd979bdSMark Fasheh }
3156ccd979bdSMark Fasheh status = 0;
3157ccd979bdSMark Fasheh bail:
3158a90714c1SJan Kara if (did_quota && status < 0)
31595dd4056dSChristoph Hellwig dquot_free_space_nodirty(dir, ocfs2_clusters_to_bytes(sb, 1));
3160ccd979bdSMark Fasheh return status;
3161ccd979bdSMark Fasheh }
3162ccd979bdSMark Fasheh
31635b6a3a2bSMark Fasheh /*
31645b6a3a2bSMark Fasheh * Assumes you already have a cluster lock on the directory.
31655b6a3a2bSMark Fasheh *
31665b6a3a2bSMark Fasheh * 'blocks_wanted' is only used if we have an inline directory which
31675b6a3a2bSMark Fasheh * is to be turned into an extent based one. The size of the dirent to
31685b6a3a2bSMark Fasheh * insert might be larger than the space gained by growing to just one
31695b6a3a2bSMark Fasheh * block, so we may have to grow the inode by two blocks in that case.
3170e7c17e43SMark Fasheh *
3171e7c17e43SMark Fasheh * If the directory is already indexed, dx_root_bh must be provided.
31725b6a3a2bSMark Fasheh */
ocfs2_extend_dir(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * parent_fe_bh,unsigned int blocks_wanted,struct ocfs2_dir_lookup_result * lookup,struct buffer_head ** new_de_bh)3173ccd979bdSMark Fasheh static int ocfs2_extend_dir(struct ocfs2_super *osb,
3174ccd979bdSMark Fasheh struct inode *dir,
3175ccd979bdSMark Fasheh struct buffer_head *parent_fe_bh,
31765b6a3a2bSMark Fasheh unsigned int blocks_wanted,
31779b7895efSMark Fasheh struct ocfs2_dir_lookup_result *lookup,
3178ccd979bdSMark Fasheh struct buffer_head **new_de_bh)
3179ccd979bdSMark Fasheh {
3180ccd979bdSMark Fasheh int status = 0;
3181ee19a779SJoel Becker int credits, num_free_extents, drop_alloc_sem = 0;
3182ccd979bdSMark Fasheh loff_t dir_i_size;
3183ccd979bdSMark Fasheh struct ocfs2_dinode *fe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
3184811f933dSTao Ma struct ocfs2_extent_list *el = &fe->id2.i_list;
3185ccd979bdSMark Fasheh struct ocfs2_alloc_context *data_ac = NULL;
3186ccd979bdSMark Fasheh struct ocfs2_alloc_context *meta_ac = NULL;
31871fabe148SMark Fasheh handle_t *handle = NULL;
3188ccd979bdSMark Fasheh struct buffer_head *new_bh = NULL;
3189ccd979bdSMark Fasheh struct ocfs2_dir_entry * de;
3190ccd979bdSMark Fasheh struct super_block *sb = osb->sb;
3191f99b9b7cSJoel Becker struct ocfs2_extent_tree et;
3192e7c17e43SMark Fasheh struct buffer_head *dx_root_bh = lookup->dl_dx_root_bh;
3193ccd979bdSMark Fasheh
31945b6a3a2bSMark Fasheh if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
3195e7c17e43SMark Fasheh /*
3196e7c17e43SMark Fasheh * This would be a code error as an inline directory should
3197e7c17e43SMark Fasheh * never have an index root.
3198e7c17e43SMark Fasheh */
3199e7c17e43SMark Fasheh BUG_ON(dx_root_bh);
3200e7c17e43SMark Fasheh
32015b6a3a2bSMark Fasheh status = ocfs2_expand_inline_dir(dir, parent_fe_bh,
32029b7895efSMark Fasheh blocks_wanted, lookup,
32039b7895efSMark Fasheh &new_bh);
32045b6a3a2bSMark Fasheh if (status) {
32055b6a3a2bSMark Fasheh mlog_errno(status);
32065b6a3a2bSMark Fasheh goto bail;
32075b6a3a2bSMark Fasheh }
32085b6a3a2bSMark Fasheh
3209e7c17e43SMark Fasheh /* Expansion from inline to an indexed directory will
3210e7c17e43SMark Fasheh * have given us this. */
3211e7c17e43SMark Fasheh dx_root_bh = lookup->dl_dx_root_bh;
3212e7c17e43SMark Fasheh
32135b6a3a2bSMark Fasheh if (blocks_wanted == 1) {
32145b6a3a2bSMark Fasheh /*
32155b6a3a2bSMark Fasheh * If the new dirent will fit inside the space
32165b6a3a2bSMark Fasheh * created by pushing out to one block, then
32175b6a3a2bSMark Fasheh * we can complete the operation
32185b6a3a2bSMark Fasheh * here. Otherwise we have to expand i_size
32195b6a3a2bSMark Fasheh * and format the 2nd block below.
32205b6a3a2bSMark Fasheh */
32215b6a3a2bSMark Fasheh BUG_ON(new_bh == NULL);
32225b6a3a2bSMark Fasheh goto bail_bh;
32235b6a3a2bSMark Fasheh }
32245b6a3a2bSMark Fasheh
32255b6a3a2bSMark Fasheh /*
32265b6a3a2bSMark Fasheh * Get rid of 'new_bh' - we want to format the 2nd
32275b6a3a2bSMark Fasheh * data block and return that instead.
32285b6a3a2bSMark Fasheh */
32295b6a3a2bSMark Fasheh brelse(new_bh);
32305b6a3a2bSMark Fasheh new_bh = NULL;
32315b6a3a2bSMark Fasheh
3232edd45c08SJan Kara down_write(&OCFS2_I(dir)->ip_alloc_sem);
3233edd45c08SJan Kara drop_alloc_sem = 1;
32345b6a3a2bSMark Fasheh dir_i_size = i_size_read(dir);
32355b6a3a2bSMark Fasheh credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
32365b6a3a2bSMark Fasheh goto do_extend;
32375b6a3a2bSMark Fasheh }
32385b6a3a2bSMark Fasheh
3239edd45c08SJan Kara down_write(&OCFS2_I(dir)->ip_alloc_sem);
3240edd45c08SJan Kara drop_alloc_sem = 1;
3241ccd979bdSMark Fasheh dir_i_size = i_size_read(dir);
3242f1088d47STao Ma trace_ocfs2_extend_dir((unsigned long long)OCFS2_I(dir)->ip_blkno,
3243f1088d47STao Ma dir_i_size);
3244ccd979bdSMark Fasheh
3245ccd979bdSMark Fasheh /* dir->i_size is always block aligned. */
3246ccd979bdSMark Fasheh spin_lock(&OCFS2_I(dir)->ip_lock);
3247ccd979bdSMark Fasheh if (dir_i_size == ocfs2_clusters_to_bytes(sb, OCFS2_I(dir)->ip_clusters)) {
3248ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(dir)->ip_lock);
32495e404e9eSJoel Becker ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(dir),
32505e404e9eSJoel Becker parent_fe_bh);
3251964f14a0SJun Piao num_free_extents = ocfs2_num_free_extents(&et);
3252ccd979bdSMark Fasheh if (num_free_extents < 0) {
3253ccd979bdSMark Fasheh status = num_free_extents;
3254ccd979bdSMark Fasheh mlog_errno(status);
3255ccd979bdSMark Fasheh goto bail;
3256ccd979bdSMark Fasheh }
3257ccd979bdSMark Fasheh
3258ccd979bdSMark Fasheh if (!num_free_extents) {
3259811f933dSTao Ma status = ocfs2_reserve_new_metadata(osb, el, &meta_ac);
3260ccd979bdSMark Fasheh if (status < 0) {
3261ccd979bdSMark Fasheh if (status != -ENOSPC)
3262ccd979bdSMark Fasheh mlog_errno(status);
3263ccd979bdSMark Fasheh goto bail;
3264ccd979bdSMark Fasheh }
3265ccd979bdSMark Fasheh }
3266ccd979bdSMark Fasheh
3267da5cbf2fSMark Fasheh status = ocfs2_reserve_clusters(osb, 1, &data_ac);
3268ccd979bdSMark Fasheh if (status < 0) {
3269ccd979bdSMark Fasheh if (status != -ENOSPC)
3270ccd979bdSMark Fasheh mlog_errno(status);
3271ccd979bdSMark Fasheh goto bail;
3272ccd979bdSMark Fasheh }
3273ccd979bdSMark Fasheh
327483f92318SMark Fasheh if (ocfs2_dir_resv_allowed(osb))
3275e3b4a97dSMark Fasheh data_ac->ac_resv = &OCFS2_I(dir)->ip_la_data_resv;
3276e3b4a97dSMark Fasheh
327706f9da6eSGoldwyn Rodrigues credits = ocfs2_calc_extend_credits(sb, el);
3278ccd979bdSMark Fasheh } else {
3279ccd979bdSMark Fasheh spin_unlock(&OCFS2_I(dir)->ip_lock);
3280ccd979bdSMark Fasheh credits = OCFS2_SIMPLE_DIR_EXTEND_CREDITS;
3281ccd979bdSMark Fasheh }
3282ccd979bdSMark Fasheh
32835b6a3a2bSMark Fasheh do_extend:
3284e7c17e43SMark Fasheh if (ocfs2_dir_indexed(dir))
3285e7c17e43SMark Fasheh credits++; /* For attaching the new dirent block to the
3286e7c17e43SMark Fasheh * dx_root */
3287e7c17e43SMark Fasheh
328865eff9ccSMark Fasheh handle = ocfs2_start_trans(osb, credits);
3289ccd979bdSMark Fasheh if (IS_ERR(handle)) {
3290ccd979bdSMark Fasheh status = PTR_ERR(handle);
3291ccd979bdSMark Fasheh handle = NULL;
3292ccd979bdSMark Fasheh mlog_errno(status);
3293ccd979bdSMark Fasheh goto bail;
3294ccd979bdSMark Fasheh }
3295ccd979bdSMark Fasheh
3296ccd979bdSMark Fasheh status = ocfs2_do_extend_dir(osb->sb, handle, dir, parent_fe_bh,
3297ccd979bdSMark Fasheh data_ac, meta_ac, &new_bh);
3298ccd979bdSMark Fasheh if (status < 0) {
3299ccd979bdSMark Fasheh mlog_errno(status);
3300ccd979bdSMark Fasheh goto bail;
3301ccd979bdSMark Fasheh }
3302ccd979bdSMark Fasheh
33038cb471e8SJoel Becker ocfs2_set_new_buffer_uptodate(INODE_CACHE(dir), new_bh);
3304ccd979bdSMark Fasheh
33050cf2f763SJoel Becker status = ocfs2_journal_access_db(handle, INODE_CACHE(dir), new_bh,
3306ccd979bdSMark Fasheh OCFS2_JOURNAL_ACCESS_CREATE);
3307ccd979bdSMark Fasheh if (status < 0) {
3308ccd979bdSMark Fasheh mlog_errno(status);
3309ccd979bdSMark Fasheh goto bail;
3310ccd979bdSMark Fasheh }
3311ccd979bdSMark Fasheh memset(new_bh->b_data, 0, sb->s_blocksize);
331287d35a74SMark Fasheh
3313ccd979bdSMark Fasheh de = (struct ocfs2_dir_entry *) new_bh->b_data;
3314ccd979bdSMark Fasheh de->inode = 0;
3315e7c17e43SMark Fasheh if (ocfs2_supports_dir_trailer(dir)) {
331687d35a74SMark Fasheh de->rec_len = cpu_to_le16(ocfs2_dir_trailer_blk_off(sb));
3317e7c17e43SMark Fasheh
3318e7c17e43SMark Fasheh ocfs2_init_dir_trailer(dir, new_bh, le16_to_cpu(de->rec_len));
3319e7c17e43SMark Fasheh
3320e7c17e43SMark Fasheh if (ocfs2_dir_indexed(dir)) {
3321e7c17e43SMark Fasheh status = ocfs2_dx_dir_link_trailer(dir, handle,
3322e7c17e43SMark Fasheh dx_root_bh, new_bh);
3323e7c17e43SMark Fasheh if (status) {
3324e7c17e43SMark Fasheh mlog_errno(status);
3325e7c17e43SMark Fasheh goto bail;
3326e7c17e43SMark Fasheh }
3327e7c17e43SMark Fasheh }
332887d35a74SMark Fasheh } else {
3329ccd979bdSMark Fasheh de->rec_len = cpu_to_le16(sb->s_blocksize);
333087d35a74SMark Fasheh }
33312931cdcbSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, dir, 1);
3332ec20cec7SJoel Becker ocfs2_journal_dirty(handle, new_bh);
3333ccd979bdSMark Fasheh
3334ccd979bdSMark Fasheh dir_i_size += dir->i_sb->s_blocksize;
3335ccd979bdSMark Fasheh i_size_write(dir, dir_i_size);
33368110b073SMark Fasheh dir->i_blocks = ocfs2_inode_sector_count(dir);
3337ccd979bdSMark Fasheh status = ocfs2_mark_inode_dirty(handle, dir, parent_fe_bh);
3338ccd979bdSMark Fasheh if (status < 0) {
3339ccd979bdSMark Fasheh mlog_errno(status);
3340ccd979bdSMark Fasheh goto bail;
3341ccd979bdSMark Fasheh }
3342ccd979bdSMark Fasheh
33435b6a3a2bSMark Fasheh bail_bh:
3344ccd979bdSMark Fasheh *new_de_bh = new_bh;
3345ccd979bdSMark Fasheh get_bh(*new_de_bh);
3346ccd979bdSMark Fasheh bail:
3347ccd979bdSMark Fasheh if (handle)
334802dc1af4SMark Fasheh ocfs2_commit_trans(osb, handle);
3349edd45c08SJan Kara if (drop_alloc_sem)
3350edd45c08SJan Kara up_write(&OCFS2_I(dir)->ip_alloc_sem);
3351ccd979bdSMark Fasheh
3352ccd979bdSMark Fasheh if (data_ac)
3353ccd979bdSMark Fasheh ocfs2_free_alloc_context(data_ac);
3354ccd979bdSMark Fasheh if (meta_ac)
3355ccd979bdSMark Fasheh ocfs2_free_alloc_context(meta_ac);
3356ccd979bdSMark Fasheh
3357ccd979bdSMark Fasheh brelse(new_bh);
3358ccd979bdSMark Fasheh
3359ccd979bdSMark Fasheh return status;
3360ccd979bdSMark Fasheh }
3361ccd979bdSMark Fasheh
ocfs2_find_dir_space_id(struct inode * dir,struct buffer_head * di_bh,const char * name,int namelen,struct buffer_head ** ret_de_bh,unsigned int * blocks_wanted)33625b6a3a2bSMark Fasheh static int ocfs2_find_dir_space_id(struct inode *dir, struct buffer_head *di_bh,
33635b6a3a2bSMark Fasheh const char *name, int namelen,
33645b6a3a2bSMark Fasheh struct buffer_head **ret_de_bh,
33655b6a3a2bSMark Fasheh unsigned int *blocks_wanted)
33665b6a3a2bSMark Fasheh {
33675b6a3a2bSMark Fasheh int ret;
336887d35a74SMark Fasheh struct super_block *sb = dir->i_sb;
33695b6a3a2bSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
33705b6a3a2bSMark Fasheh struct ocfs2_dir_entry *de, *last_de = NULL;
3371e05a2428Slei lu char *first_de, *de_buf, *limit;
33725b6a3a2bSMark Fasheh unsigned long offset = 0;
33739a25d051SColin Ian King unsigned int rec_len, new_rec_len, free_space;
337487d35a74SMark Fasheh
337587d35a74SMark Fasheh /*
337687d35a74SMark Fasheh * This calculates how many free bytes we'd have in block zero, should
337787d35a74SMark Fasheh * this function force expansion to an extent tree.
337887d35a74SMark Fasheh */
3379e7c17e43SMark Fasheh if (ocfs2_new_dir_wants_trailer(dir))
338087d35a74SMark Fasheh free_space = ocfs2_dir_trailer_blk_off(sb) - i_size_read(dir);
338187d35a74SMark Fasheh else
338287d35a74SMark Fasheh free_space = dir->i_sb->s_blocksize - i_size_read(dir);
33835b6a3a2bSMark Fasheh
3384e05a2428Slei lu first_de = di->id2.i_data.id_data;
3385e05a2428Slei lu de_buf = first_de;
33865b6a3a2bSMark Fasheh limit = de_buf + i_size_read(dir);
33875b6a3a2bSMark Fasheh rec_len = OCFS2_DIR_REC_LEN(namelen);
33885b6a3a2bSMark Fasheh
33895b6a3a2bSMark Fasheh while (de_buf < limit) {
33905b6a3a2bSMark Fasheh de = (struct ocfs2_dir_entry *)de_buf;
33915b6a3a2bSMark Fasheh
3392e05a2428Slei lu if (!ocfs2_check_dir_entry(dir, de, di_bh, first_de,
3393e05a2428Slei lu i_size_read(dir), offset)) {
33945b6a3a2bSMark Fasheh ret = -ENOENT;
33955b6a3a2bSMark Fasheh goto out;
33965b6a3a2bSMark Fasheh }
33975b6a3a2bSMark Fasheh if (ocfs2_match(namelen, name, de)) {
33985b6a3a2bSMark Fasheh ret = -EEXIST;
33995b6a3a2bSMark Fasheh goto out;
34005b6a3a2bSMark Fasheh }
340187d35a74SMark Fasheh /*
340287d35a74SMark Fasheh * No need to check for a trailing dirent record here as
340387d35a74SMark Fasheh * they're not used for inline dirs.
340487d35a74SMark Fasheh */
340587d35a74SMark Fasheh
34065b6a3a2bSMark Fasheh if (ocfs2_dirent_would_fit(de, rec_len)) {
34075b6a3a2bSMark Fasheh /* Ok, we found a spot. Return this bh and let
34085b6a3a2bSMark Fasheh * the caller actually fill it in. */
34095b6a3a2bSMark Fasheh *ret_de_bh = di_bh;
34105b6a3a2bSMark Fasheh get_bh(*ret_de_bh);
34115b6a3a2bSMark Fasheh ret = 0;
34125b6a3a2bSMark Fasheh goto out;
34135b6a3a2bSMark Fasheh }
34145b6a3a2bSMark Fasheh
34155b6a3a2bSMark Fasheh last_de = de;
34165b6a3a2bSMark Fasheh de_buf += le16_to_cpu(de->rec_len);
34175b6a3a2bSMark Fasheh offset += le16_to_cpu(de->rec_len);
34185b6a3a2bSMark Fasheh }
34195b6a3a2bSMark Fasheh
3420ccd979bdSMark Fasheh /*
34215b6a3a2bSMark Fasheh * We're going to require expansion of the directory - figure
34225b6a3a2bSMark Fasheh * out how many blocks we'll need so that a place for the
34235b6a3a2bSMark Fasheh * dirent can be found.
3424ccd979bdSMark Fasheh */
34255b6a3a2bSMark Fasheh *blocks_wanted = 1;
342687d35a74SMark Fasheh new_rec_len = le16_to_cpu(last_de->rec_len) + free_space;
34275b6a3a2bSMark Fasheh if (new_rec_len < (rec_len + OCFS2_DIR_REC_LEN(last_de->name_len)))
34285b6a3a2bSMark Fasheh *blocks_wanted = 2;
34295b6a3a2bSMark Fasheh
34305b6a3a2bSMark Fasheh ret = -ENOSPC;
34315b6a3a2bSMark Fasheh out:
34325b6a3a2bSMark Fasheh return ret;
34335b6a3a2bSMark Fasheh }
34345b6a3a2bSMark Fasheh
ocfs2_find_dir_space_el(struct inode * dir,const char * name,int namelen,struct buffer_head ** ret_de_bh)34355b6a3a2bSMark Fasheh static int ocfs2_find_dir_space_el(struct inode *dir, const char *name,
34365b6a3a2bSMark Fasheh int namelen, struct buffer_head **ret_de_bh)
3437ccd979bdSMark Fasheh {
3438ccd979bdSMark Fasheh unsigned long offset;
3439ccd979bdSMark Fasheh struct buffer_head *bh = NULL;
3440ccd979bdSMark Fasheh unsigned short rec_len;
3441ccd979bdSMark Fasheh struct ocfs2_dir_entry *de;
34425b6a3a2bSMark Fasheh struct super_block *sb = dir->i_sb;
3443ccd979bdSMark Fasheh int status;
344487d35a74SMark Fasheh int blocksize = dir->i_sb->s_blocksize;
3445ccd979bdSMark Fasheh
3446a22305ccSJoel Becker status = ocfs2_read_dir_block(dir, 0, &bh, 0);
34479b572691SDaeseok Youn if (status)
3448ccd979bdSMark Fasheh goto bail;
3449ccd979bdSMark Fasheh
3450ccd979bdSMark Fasheh rec_len = OCFS2_DIR_REC_LEN(namelen);
3451ccd979bdSMark Fasheh offset = 0;
3452ccd979bdSMark Fasheh de = (struct ocfs2_dir_entry *) bh->b_data;
3453ccd979bdSMark Fasheh while (1) {
3454ccd979bdSMark Fasheh if ((char *)de >= sb->s_blocksize + bh->b_data) {
3455ccd979bdSMark Fasheh brelse(bh);
3456ccd979bdSMark Fasheh bh = NULL;
3457ccd979bdSMark Fasheh
3458ccd979bdSMark Fasheh if (i_size_read(dir) <= offset) {
34595b6a3a2bSMark Fasheh /*
34605b6a3a2bSMark Fasheh * Caller will have to expand this
34615b6a3a2bSMark Fasheh * directory.
34625b6a3a2bSMark Fasheh */
34635b6a3a2bSMark Fasheh status = -ENOSPC;
3464ccd979bdSMark Fasheh goto bail;
3465ccd979bdSMark Fasheh }
3466a22305ccSJoel Becker status = ocfs2_read_dir_block(dir,
3467ccd979bdSMark Fasheh offset >> sb->s_blocksize_bits,
3468a22305ccSJoel Becker &bh, 0);
34699b572691SDaeseok Youn if (status)
3470ccd979bdSMark Fasheh goto bail;
34719b572691SDaeseok Youn
3472ccd979bdSMark Fasheh /* move to next block */
3473ccd979bdSMark Fasheh de = (struct ocfs2_dir_entry *) bh->b_data;
3474ccd979bdSMark Fasheh }
3475e05a2428Slei lu if (!ocfs2_check_dir_entry(dir, de, bh, bh->b_data, blocksize,
3476e05a2428Slei lu offset)) {
3477ccd979bdSMark Fasheh status = -ENOENT;
3478ccd979bdSMark Fasheh goto bail;
3479ccd979bdSMark Fasheh }
3480ccd979bdSMark Fasheh if (ocfs2_match(namelen, name, de)) {
3481ccd979bdSMark Fasheh status = -EEXIST;
3482ccd979bdSMark Fasheh goto bail;
3483ccd979bdSMark Fasheh }
348487d35a74SMark Fasheh
348587d35a74SMark Fasheh if (ocfs2_skip_dir_trailer(dir, de, offset % blocksize,
348687d35a74SMark Fasheh blocksize))
348787d35a74SMark Fasheh goto next;
348887d35a74SMark Fasheh
34898553cf4fSMark Fasheh if (ocfs2_dirent_would_fit(de, rec_len)) {
3490ccd979bdSMark Fasheh /* Ok, we found a spot. Return this bh and let
3491ccd979bdSMark Fasheh * the caller actually fill it in. */
3492ccd979bdSMark Fasheh *ret_de_bh = bh;
3493ccd979bdSMark Fasheh get_bh(*ret_de_bh);
3494ccd979bdSMark Fasheh status = 0;
3495ccd979bdSMark Fasheh goto bail;
3496ccd979bdSMark Fasheh }
349787d35a74SMark Fasheh next:
3498ccd979bdSMark Fasheh offset += le16_to_cpu(de->rec_len);
3499ccd979bdSMark Fasheh de = (struct ocfs2_dir_entry *)((char *) de + le16_to_cpu(de->rec_len));
3500ccd979bdSMark Fasheh }
3501ccd979bdSMark Fasheh
3502ccd979bdSMark Fasheh bail:
3503ccd979bdSMark Fasheh brelse(bh);
3504c1e8d35eSTao Ma if (status)
3505c1e8d35eSTao Ma mlog_errno(status);
3506ccd979bdSMark Fasheh
3507ccd979bdSMark Fasheh return status;
3508ccd979bdSMark Fasheh }
35095b6a3a2bSMark Fasheh
dx_leaf_sort_cmp(const void * a,const void * b)35109b7895efSMark Fasheh static int dx_leaf_sort_cmp(const void *a, const void *b)
35119b7895efSMark Fasheh {
35129b7895efSMark Fasheh const struct ocfs2_dx_entry *entry1 = a;
35139b7895efSMark Fasheh const struct ocfs2_dx_entry *entry2 = b;
35149b7895efSMark Fasheh u32 major_hash1 = le32_to_cpu(entry1->dx_major_hash);
35159b7895efSMark Fasheh u32 major_hash2 = le32_to_cpu(entry2->dx_major_hash);
35169b7895efSMark Fasheh u32 minor_hash1 = le32_to_cpu(entry1->dx_minor_hash);
35179b7895efSMark Fasheh u32 minor_hash2 = le32_to_cpu(entry2->dx_minor_hash);
35189b7895efSMark Fasheh
35199b7895efSMark Fasheh if (major_hash1 > major_hash2)
35209b7895efSMark Fasheh return 1;
35219b7895efSMark Fasheh if (major_hash1 < major_hash2)
35229b7895efSMark Fasheh return -1;
35239b7895efSMark Fasheh
35249b7895efSMark Fasheh /*
35259b7895efSMark Fasheh * It is not strictly necessary to sort by minor
35269b7895efSMark Fasheh */
35279b7895efSMark Fasheh if (minor_hash1 > minor_hash2)
35289b7895efSMark Fasheh return 1;
35299b7895efSMark Fasheh if (minor_hash1 < minor_hash2)
35309b7895efSMark Fasheh return -1;
35319b7895efSMark Fasheh return 0;
35329b7895efSMark Fasheh }
35339b7895efSMark Fasheh
dx_leaf_sort_swap(void * a,void * b,int size)35349b7895efSMark Fasheh static void dx_leaf_sort_swap(void *a, void *b, int size)
35359b7895efSMark Fasheh {
35369b7895efSMark Fasheh struct ocfs2_dx_entry *entry1 = a;
35379b7895efSMark Fasheh struct ocfs2_dx_entry *entry2 = b;
35389b7895efSMark Fasheh
35399b7895efSMark Fasheh BUG_ON(size != sizeof(*entry1));
35409b7895efSMark Fasheh
35412a28f98cSFabian Frederick swap(*entry1, *entry2);
35429b7895efSMark Fasheh }
35439b7895efSMark Fasheh
ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf * dx_leaf)35449b7895efSMark Fasheh static int ocfs2_dx_leaf_same_major(struct ocfs2_dx_leaf *dx_leaf)
35459b7895efSMark Fasheh {
35469b7895efSMark Fasheh struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
35479b7895efSMark Fasheh int i, num = le16_to_cpu(dl_list->de_num_used);
35489b7895efSMark Fasheh
35499b7895efSMark Fasheh for (i = 0; i < (num - 1); i++) {
35509b7895efSMark Fasheh if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) !=
35519b7895efSMark Fasheh le32_to_cpu(dl_list->de_entries[i + 1].dx_major_hash))
35529b7895efSMark Fasheh return 0;
35539b7895efSMark Fasheh }
35549b7895efSMark Fasheh
35559b7895efSMark Fasheh return 1;
35569b7895efSMark Fasheh }
35579b7895efSMark Fasheh
35589b7895efSMark Fasheh /*
35599b7895efSMark Fasheh * Find the optimal value to split this leaf on. This expects the leaf
35609b7895efSMark Fasheh * entries to be in sorted order.
35619b7895efSMark Fasheh *
35629b7895efSMark Fasheh * leaf_cpos is the cpos of the leaf we're splitting. insert_hash is
35639b7895efSMark Fasheh * the hash we want to insert.
35649b7895efSMark Fasheh *
35659b7895efSMark Fasheh * This function is only concerned with the major hash - that which
35669b7895efSMark Fasheh * determines which cluster an item belongs to.
35679b7895efSMark Fasheh */
ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf * dx_leaf,u32 leaf_cpos,u32 insert_hash,u32 * split_hash)35689b7895efSMark Fasheh static int ocfs2_dx_dir_find_leaf_split(struct ocfs2_dx_leaf *dx_leaf,
35699b7895efSMark Fasheh u32 leaf_cpos, u32 insert_hash,
35709b7895efSMark Fasheh u32 *split_hash)
35719b7895efSMark Fasheh {
35729b7895efSMark Fasheh struct ocfs2_dx_entry_list *dl_list = &dx_leaf->dl_list;
35739b7895efSMark Fasheh int i, num_used = le16_to_cpu(dl_list->de_num_used);
35749b7895efSMark Fasheh int allsame;
35759b7895efSMark Fasheh
35769b7895efSMark Fasheh /*
35779b7895efSMark Fasheh * There's a couple rare, but nasty corner cases we have to
35789b7895efSMark Fasheh * check for here. All of them involve a leaf where all value
35799b7895efSMark Fasheh * have the same hash, which is what we look for first.
35809b7895efSMark Fasheh *
35819b7895efSMark Fasheh * Most of the time, all of the above is false, and we simply
35829b7895efSMark Fasheh * pick the median value for a split.
35839b7895efSMark Fasheh */
35849b7895efSMark Fasheh allsame = ocfs2_dx_leaf_same_major(dx_leaf);
35859b7895efSMark Fasheh if (allsame) {
35869b7895efSMark Fasheh u32 val = le32_to_cpu(dl_list->de_entries[0].dx_major_hash);
35879b7895efSMark Fasheh
35889b7895efSMark Fasheh if (val == insert_hash) {
35899b7895efSMark Fasheh /*
35909b7895efSMark Fasheh * No matter where we would choose to split,
35919b7895efSMark Fasheh * the new entry would want to occupy the same
35929b7895efSMark Fasheh * block as these. Since there's no space left
35939b7895efSMark Fasheh * in their existing block, we know there
35949b7895efSMark Fasheh * won't be space after the split.
35959b7895efSMark Fasheh */
35969b7895efSMark Fasheh return -ENOSPC;
35979b7895efSMark Fasheh }
35989b7895efSMark Fasheh
35999b7895efSMark Fasheh if (val == leaf_cpos) {
36009b7895efSMark Fasheh /*
36019b7895efSMark Fasheh * Because val is the same as leaf_cpos (which
36029b7895efSMark Fasheh * is the smallest value this leaf can have),
36039b7895efSMark Fasheh * yet is not equal to insert_hash, then we
36049b7895efSMark Fasheh * know that insert_hash *must* be larger than
36059b7895efSMark Fasheh * val (and leaf_cpos). At least cpos+1 in value.
36069b7895efSMark Fasheh *
36079b7895efSMark Fasheh * We also know then, that there cannot be an
36089b7895efSMark Fasheh * adjacent extent (otherwise we'd be looking
36099b7895efSMark Fasheh * at it). Choosing this value gives us a
36109b7895efSMark Fasheh * chance to get some contiguousness.
36119b7895efSMark Fasheh */
36129b7895efSMark Fasheh *split_hash = leaf_cpos + 1;
36139b7895efSMark Fasheh return 0;
36149b7895efSMark Fasheh }
36159b7895efSMark Fasheh
36169b7895efSMark Fasheh if (val > insert_hash) {
36179b7895efSMark Fasheh /*
36189b7895efSMark Fasheh * val can not be the same as insert hash, and
36199b7895efSMark Fasheh * also must be larger than leaf_cpos. Also,
36209b7895efSMark Fasheh * we know that there can't be a leaf between
36219b7895efSMark Fasheh * cpos and val, otherwise the entries with
36229b7895efSMark Fasheh * hash 'val' would be there.
36239b7895efSMark Fasheh */
36249b7895efSMark Fasheh *split_hash = val;
36259b7895efSMark Fasheh return 0;
36269b7895efSMark Fasheh }
36279b7895efSMark Fasheh
36289b7895efSMark Fasheh *split_hash = insert_hash;
36299b7895efSMark Fasheh return 0;
36309b7895efSMark Fasheh }
36319b7895efSMark Fasheh
36329b7895efSMark Fasheh /*
36339b7895efSMark Fasheh * Since the records are sorted and the checks above
36349b7895efSMark Fasheh * guaranteed that not all records in this block are the same,
36359b7895efSMark Fasheh * we simple travel forward, from the median, and pick the 1st
36369b7895efSMark Fasheh * record whose value is larger than leaf_cpos.
36379b7895efSMark Fasheh */
36389b7895efSMark Fasheh for (i = (num_used / 2); i < num_used; i++)
36399b7895efSMark Fasheh if (le32_to_cpu(dl_list->de_entries[i].dx_major_hash) >
36409b7895efSMark Fasheh leaf_cpos)
36419b7895efSMark Fasheh break;
36429b7895efSMark Fasheh
36439b7895efSMark Fasheh BUG_ON(i == num_used); /* Should be impossible */
36449b7895efSMark Fasheh *split_hash = le32_to_cpu(dl_list->de_entries[i].dx_major_hash);
36459b7895efSMark Fasheh return 0;
36469b7895efSMark Fasheh }
36479b7895efSMark Fasheh
36489b7895efSMark Fasheh /*
36499b7895efSMark Fasheh * Transfer all entries in orig_dx_leaves whose major hash is equal to or
36509b7895efSMark Fasheh * larger than split_hash into new_dx_leaves. We use a temporary
36519b7895efSMark Fasheh * buffer (tmp_dx_leaf) to make the changes to the original leaf blocks.
36529b7895efSMark Fasheh *
36539b7895efSMark Fasheh * Since the block offset inside a leaf (cluster) is a constant mask
36549b7895efSMark Fasheh * of minor_hash, we can optimize - an item at block offset X within
36559b7895efSMark Fasheh * the original cluster, will be at offset X within the new cluster.
36569b7895efSMark Fasheh */
ocfs2_dx_dir_transfer_leaf(struct inode * dir,u32 split_hash,handle_t * handle,struct ocfs2_dx_leaf * tmp_dx_leaf,struct buffer_head ** orig_dx_leaves,struct buffer_head ** new_dx_leaves,int num_dx_leaves)36579b7895efSMark Fasheh static void ocfs2_dx_dir_transfer_leaf(struct inode *dir, u32 split_hash,
36589b7895efSMark Fasheh handle_t *handle,
36599b7895efSMark Fasheh struct ocfs2_dx_leaf *tmp_dx_leaf,
36609b7895efSMark Fasheh struct buffer_head **orig_dx_leaves,
36619b7895efSMark Fasheh struct buffer_head **new_dx_leaves,
36629b7895efSMark Fasheh int num_dx_leaves)
36639b7895efSMark Fasheh {
36649b7895efSMark Fasheh int i, j, num_used;
36659b7895efSMark Fasheh u32 major_hash;
36669b7895efSMark Fasheh struct ocfs2_dx_leaf *orig_dx_leaf, *new_dx_leaf;
366777461ba1Szhengbin struct ocfs2_dx_entry_list *orig_list, *tmp_list;
36689b7895efSMark Fasheh struct ocfs2_dx_entry *dx_entry;
36699b7895efSMark Fasheh
36709b7895efSMark Fasheh tmp_list = &tmp_dx_leaf->dl_list;
36719b7895efSMark Fasheh
36729b7895efSMark Fasheh for (i = 0; i < num_dx_leaves; i++) {
36739b7895efSMark Fasheh orig_dx_leaf = (struct ocfs2_dx_leaf *) orig_dx_leaves[i]->b_data;
36749b7895efSMark Fasheh orig_list = &orig_dx_leaf->dl_list;
36759b7895efSMark Fasheh new_dx_leaf = (struct ocfs2_dx_leaf *) new_dx_leaves[i]->b_data;
36769b7895efSMark Fasheh
36779b7895efSMark Fasheh num_used = le16_to_cpu(orig_list->de_num_used);
36789b7895efSMark Fasheh
36799b7895efSMark Fasheh memcpy(tmp_dx_leaf, orig_dx_leaf, dir->i_sb->s_blocksize);
36809b7895efSMark Fasheh tmp_list->de_num_used = cpu_to_le16(0);
36819b7895efSMark Fasheh memset(&tmp_list->de_entries, 0, sizeof(*dx_entry)*num_used);
36829b7895efSMark Fasheh
36839b7895efSMark Fasheh for (j = 0; j < num_used; j++) {
36849b7895efSMark Fasheh dx_entry = &orig_list->de_entries[j];
36859b7895efSMark Fasheh major_hash = le32_to_cpu(dx_entry->dx_major_hash);
36869b7895efSMark Fasheh if (major_hash >= split_hash)
36879b7895efSMark Fasheh ocfs2_dx_dir_leaf_insert_tail(new_dx_leaf,
36889b7895efSMark Fasheh dx_entry);
36899b7895efSMark Fasheh else
36909b7895efSMark Fasheh ocfs2_dx_dir_leaf_insert_tail(tmp_dx_leaf,
36919b7895efSMark Fasheh dx_entry);
36929b7895efSMark Fasheh }
36939b7895efSMark Fasheh memcpy(orig_dx_leaf, tmp_dx_leaf, dir->i_sb->s_blocksize);
36949b7895efSMark Fasheh
36959b7895efSMark Fasheh ocfs2_journal_dirty(handle, orig_dx_leaves[i]);
36969b7895efSMark Fasheh ocfs2_journal_dirty(handle, new_dx_leaves[i]);
36979b7895efSMark Fasheh }
36989b7895efSMark Fasheh }
36999b7895efSMark Fasheh
ocfs2_dx_dir_rebalance_credits(struct ocfs2_super * osb,struct ocfs2_dx_root_block * dx_root)37009b7895efSMark Fasheh static int ocfs2_dx_dir_rebalance_credits(struct ocfs2_super *osb,
37019b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root)
37029b7895efSMark Fasheh {
3703d006c71fSJunxiao Bi int credits = ocfs2_clusters_to_blocks(osb->sb, 3);
37049b7895efSMark Fasheh
370506f9da6eSGoldwyn Rodrigues credits += ocfs2_calc_extend_credits(osb->sb, &dx_root->dr_list);
37069b7895efSMark Fasheh credits += ocfs2_quota_trans_credits(osb->sb);
37079b7895efSMark Fasheh return credits;
37089b7895efSMark Fasheh }
37099b7895efSMark Fasheh
37109b7895efSMark Fasheh /*
37119b7895efSMark Fasheh * Find the median value in dx_leaf_bh and allocate a new leaf to move
37129b7895efSMark Fasheh * half our entries into.
37139b7895efSMark Fasheh */
ocfs2_dx_dir_rebalance(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * dx_root_bh,struct buffer_head * dx_leaf_bh,struct ocfs2_dx_hinfo * hinfo,u32 leaf_cpos,u64 leaf_blkno)37149b7895efSMark Fasheh static int ocfs2_dx_dir_rebalance(struct ocfs2_super *osb, struct inode *dir,
37159b7895efSMark Fasheh struct buffer_head *dx_root_bh,
37169b7895efSMark Fasheh struct buffer_head *dx_leaf_bh,
37179b7895efSMark Fasheh struct ocfs2_dx_hinfo *hinfo, u32 leaf_cpos,
37189b7895efSMark Fasheh u64 leaf_blkno)
37199b7895efSMark Fasheh {
37209b7895efSMark Fasheh struct ocfs2_dx_leaf *dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
37219b7895efSMark Fasheh int credits, ret, i, num_used, did_quota = 0;
37229b7895efSMark Fasheh u32 cpos, split_hash, insert_hash = hinfo->major_hash;
37239b7895efSMark Fasheh u64 orig_leaves_start;
37249b7895efSMark Fasheh int num_dx_leaves;
37259b7895efSMark Fasheh struct buffer_head **orig_dx_leaves = NULL;
37269b7895efSMark Fasheh struct buffer_head **new_dx_leaves = NULL;
37279b7895efSMark Fasheh struct ocfs2_alloc_context *data_ac = NULL, *meta_ac = NULL;
37289b7895efSMark Fasheh struct ocfs2_extent_tree et;
37299b7895efSMark Fasheh handle_t *handle = NULL;
37309b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root;
37319b7895efSMark Fasheh struct ocfs2_dx_leaf *tmp_dx_leaf = NULL;
37329b7895efSMark Fasheh
3733f1088d47STao Ma trace_ocfs2_dx_dir_rebalance((unsigned long long)OCFS2_I(dir)->ip_blkno,
3734f1088d47STao Ma (unsigned long long)leaf_blkno,
3735f1088d47STao Ma insert_hash);
37369b7895efSMark Fasheh
37375e404e9eSJoel Becker ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
37389b7895efSMark Fasheh
37399b7895efSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
37409b7895efSMark Fasheh /*
37419b7895efSMark Fasheh * XXX: This is a rather large limit. We should use a more
37429b7895efSMark Fasheh * realistic value.
37439b7895efSMark Fasheh */
37449b7895efSMark Fasheh if (le32_to_cpu(dx_root->dr_clusters) == UINT_MAX)
37459b7895efSMark Fasheh return -ENOSPC;
37469b7895efSMark Fasheh
37479b7895efSMark Fasheh num_used = le16_to_cpu(dx_leaf->dl_list.de_num_used);
37489b7895efSMark Fasheh if (num_used < le16_to_cpu(dx_leaf->dl_list.de_count)) {
37499b7895efSMark Fasheh mlog(ML_ERROR, "DX Dir: %llu, Asked to rebalance empty leaf: "
37509b7895efSMark Fasheh "%llu, %d\n", (unsigned long long)OCFS2_I(dir)->ip_blkno,
37519b7895efSMark Fasheh (unsigned long long)leaf_blkno, num_used);
37529b7895efSMark Fasheh ret = -EIO;
37539b7895efSMark Fasheh goto out;
37549b7895efSMark Fasheh }
37559b7895efSMark Fasheh
37569b7895efSMark Fasheh orig_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
37579b7895efSMark Fasheh if (!orig_dx_leaves) {
37589b7895efSMark Fasheh ret = -ENOMEM;
37599b7895efSMark Fasheh mlog_errno(ret);
37609b7895efSMark Fasheh goto out;
37619b7895efSMark Fasheh }
37629b7895efSMark Fasheh
37639b7895efSMark Fasheh new_dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, NULL);
37649b7895efSMark Fasheh if (!new_dx_leaves) {
37659b7895efSMark Fasheh ret = -ENOMEM;
37669b7895efSMark Fasheh mlog_errno(ret);
37679b7895efSMark Fasheh goto out;
37689b7895efSMark Fasheh }
37699b7895efSMark Fasheh
37709b7895efSMark Fasheh ret = ocfs2_lock_allocators(dir, &et, 1, 0, &data_ac, &meta_ac);
37719b7895efSMark Fasheh if (ret) {
37729b7895efSMark Fasheh if (ret != -ENOSPC)
37739b7895efSMark Fasheh mlog_errno(ret);
37749b7895efSMark Fasheh goto out;
37759b7895efSMark Fasheh }
37769b7895efSMark Fasheh
37779b7895efSMark Fasheh credits = ocfs2_dx_dir_rebalance_credits(osb, dx_root);
37789b7895efSMark Fasheh handle = ocfs2_start_trans(osb, credits);
37799b7895efSMark Fasheh if (IS_ERR(handle)) {
37809b7895efSMark Fasheh ret = PTR_ERR(handle);
37819b7895efSMark Fasheh handle = NULL;
37829b7895efSMark Fasheh mlog_errno(ret);
37839b7895efSMark Fasheh goto out;
37849b7895efSMark Fasheh }
37859b7895efSMark Fasheh
37865dd4056dSChristoph Hellwig ret = dquot_alloc_space_nodirty(dir,
37875dd4056dSChristoph Hellwig ocfs2_clusters_to_bytes(dir->i_sb, 1));
37885dd4056dSChristoph Hellwig if (ret)
37899b7895efSMark Fasheh goto out_commit;
37909b7895efSMark Fasheh did_quota = 1;
37919b7895efSMark Fasheh
37920cf2f763SJoel Becker ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir), dx_leaf_bh,
37939b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
37949b7895efSMark Fasheh if (ret) {
37959b7895efSMark Fasheh mlog_errno(ret);
37969b7895efSMark Fasheh goto out_commit;
37979b7895efSMark Fasheh }
37989b7895efSMark Fasheh
37999b7895efSMark Fasheh /*
38009b7895efSMark Fasheh * This block is changing anyway, so we can sort it in place.
38019b7895efSMark Fasheh */
38029b7895efSMark Fasheh sort(dx_leaf->dl_list.de_entries, num_used,
38039b7895efSMark Fasheh sizeof(struct ocfs2_dx_entry), dx_leaf_sort_cmp,
38049b7895efSMark Fasheh dx_leaf_sort_swap);
38059b7895efSMark Fasheh
3806ec20cec7SJoel Becker ocfs2_journal_dirty(handle, dx_leaf_bh);
38079b7895efSMark Fasheh
38089b7895efSMark Fasheh ret = ocfs2_dx_dir_find_leaf_split(dx_leaf, leaf_cpos, insert_hash,
38099b7895efSMark Fasheh &split_hash);
38109b7895efSMark Fasheh if (ret) {
38119b7895efSMark Fasheh mlog_errno(ret);
38129b7895efSMark Fasheh goto out_commit;
38139b7895efSMark Fasheh }
38149b7895efSMark Fasheh
3815f1088d47STao Ma trace_ocfs2_dx_dir_rebalance_split(leaf_cpos, split_hash, insert_hash);
38169b7895efSMark Fasheh
38179b7895efSMark Fasheh /*
38189b7895efSMark Fasheh * We have to carefully order operations here. There are items
38199b7895efSMark Fasheh * which want to be in the new cluster before insert, but in
38209b7895efSMark Fasheh * order to put those items in the new cluster, we alter the
38219b7895efSMark Fasheh * old cluster. A failure to insert gets nasty.
38229b7895efSMark Fasheh *
38239b7895efSMark Fasheh * So, start by reserving writes to the old
38249b7895efSMark Fasheh * cluster. ocfs2_dx_dir_new_cluster will reserve writes on
38259b7895efSMark Fasheh * the new cluster for us, before inserting it. The insert
38269b7895efSMark Fasheh * won't happen if there's an error before that. Once the
38279b7895efSMark Fasheh * insert is done then, we can transfer from one leaf into the
38289b7895efSMark Fasheh * other without fear of hitting any error.
38299b7895efSMark Fasheh */
38309b7895efSMark Fasheh
38319b7895efSMark Fasheh /*
38329b7895efSMark Fasheh * The leaf transfer wants some scratch space so that we don't
38339b7895efSMark Fasheh * wind up doing a bunch of expensive memmove().
38349b7895efSMark Fasheh */
38359b7895efSMark Fasheh tmp_dx_leaf = kmalloc(osb->sb->s_blocksize, GFP_NOFS);
38369b7895efSMark Fasheh if (!tmp_dx_leaf) {
38379b7895efSMark Fasheh ret = -ENOMEM;
38389b7895efSMark Fasheh mlog_errno(ret);
38399b7895efSMark Fasheh goto out_commit;
38409b7895efSMark Fasheh }
38419b7895efSMark Fasheh
38421d46dc08SMark Fasheh orig_leaves_start = ocfs2_block_to_cluster_start(dir->i_sb, leaf_blkno);
38439b7895efSMark Fasheh ret = ocfs2_read_dx_leaves(dir, orig_leaves_start, num_dx_leaves,
38449b7895efSMark Fasheh orig_dx_leaves);
38459b7895efSMark Fasheh if (ret) {
38469b7895efSMark Fasheh mlog_errno(ret);
38479b7895efSMark Fasheh goto out_commit;
38489b7895efSMark Fasheh }
38499b7895efSMark Fasheh
38500f4da216STristan Ye cpos = split_hash;
38510f4da216STristan Ye ret = ocfs2_dx_dir_new_cluster(dir, &et, cpos, handle,
38520f4da216STristan Ye data_ac, meta_ac, new_dx_leaves,
38530f4da216STristan Ye num_dx_leaves);
38540f4da216STristan Ye if (ret) {
38550f4da216STristan Ye mlog_errno(ret);
38560f4da216STristan Ye goto out_commit;
38570f4da216STristan Ye }
38580f4da216STristan Ye
38599b7895efSMark Fasheh for (i = 0; i < num_dx_leaves; i++) {
38600cf2f763SJoel Becker ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
38610cf2f763SJoel Becker orig_dx_leaves[i],
38629b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
38639b7895efSMark Fasheh if (ret) {
38649b7895efSMark Fasheh mlog_errno(ret);
38659b7895efSMark Fasheh goto out_commit;
38669b7895efSMark Fasheh }
38679b7895efSMark Fasheh
38680f4da216STristan Ye ret = ocfs2_journal_access_dl(handle, INODE_CACHE(dir),
38690f4da216STristan Ye new_dx_leaves[i],
38700f4da216STristan Ye OCFS2_JOURNAL_ACCESS_WRITE);
38719b7895efSMark Fasheh if (ret) {
38729b7895efSMark Fasheh mlog_errno(ret);
38739b7895efSMark Fasheh goto out_commit;
38749b7895efSMark Fasheh }
38750f4da216STristan Ye }
38769b7895efSMark Fasheh
38779b7895efSMark Fasheh ocfs2_dx_dir_transfer_leaf(dir, split_hash, handle, tmp_dx_leaf,
38789b7895efSMark Fasheh orig_dx_leaves, new_dx_leaves, num_dx_leaves);
38799b7895efSMark Fasheh
38809b7895efSMark Fasheh out_commit:
38819b7895efSMark Fasheh if (ret < 0 && did_quota)
38825dd4056dSChristoph Hellwig dquot_free_space_nodirty(dir,
38839b7895efSMark Fasheh ocfs2_clusters_to_bytes(dir->i_sb, 1));
38849b7895efSMark Fasheh
38852931cdcbSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, dir, 1);
38869b7895efSMark Fasheh ocfs2_commit_trans(osb, handle);
38879b7895efSMark Fasheh
38889b7895efSMark Fasheh out:
38899b7895efSMark Fasheh if (orig_dx_leaves || new_dx_leaves) {
38909b7895efSMark Fasheh for (i = 0; i < num_dx_leaves; i++) {
38919b7895efSMark Fasheh if (orig_dx_leaves)
38929b7895efSMark Fasheh brelse(orig_dx_leaves[i]);
38939b7895efSMark Fasheh if (new_dx_leaves)
38949b7895efSMark Fasheh brelse(new_dx_leaves[i]);
38959b7895efSMark Fasheh }
38969b7895efSMark Fasheh kfree(orig_dx_leaves);
38979b7895efSMark Fasheh kfree(new_dx_leaves);
38989b7895efSMark Fasheh }
38999b7895efSMark Fasheh
39009b7895efSMark Fasheh if (meta_ac)
39019b7895efSMark Fasheh ocfs2_free_alloc_context(meta_ac);
39029b7895efSMark Fasheh if (data_ac)
39039b7895efSMark Fasheh ocfs2_free_alloc_context(data_ac);
39049b7895efSMark Fasheh
39059b7895efSMark Fasheh kfree(tmp_dx_leaf);
39069b7895efSMark Fasheh return ret;
39079b7895efSMark Fasheh }
39089b7895efSMark Fasheh
ocfs2_find_dir_space_dx(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * di_bh,struct buffer_head * dx_root_bh,const char * name,int namelen,struct ocfs2_dir_lookup_result * lookup)3909e7c17e43SMark Fasheh static int ocfs2_find_dir_space_dx(struct ocfs2_super *osb, struct inode *dir,
3910e7c17e43SMark Fasheh struct buffer_head *di_bh,
3911e7c17e43SMark Fasheh struct buffer_head *dx_root_bh,
3912e7c17e43SMark Fasheh const char *name, int namelen,
3913e7c17e43SMark Fasheh struct ocfs2_dir_lookup_result *lookup)
3914e7c17e43SMark Fasheh {
3915e7c17e43SMark Fasheh int ret, rebalanced = 0;
3916e7c17e43SMark Fasheh struct ocfs2_dx_root_block *dx_root;
3917e7c17e43SMark Fasheh struct buffer_head *dx_leaf_bh = NULL;
3918e7c17e43SMark Fasheh struct ocfs2_dx_leaf *dx_leaf;
3919e7c17e43SMark Fasheh u64 blkno;
3920e7c17e43SMark Fasheh u32 leaf_cpos;
3921e7c17e43SMark Fasheh
3922e7c17e43SMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3923e7c17e43SMark Fasheh
3924e7c17e43SMark Fasheh restart_search:
3925e7c17e43SMark Fasheh ret = ocfs2_dx_dir_lookup(dir, &dx_root->dr_list, &lookup->dl_hinfo,
3926e7c17e43SMark Fasheh &leaf_cpos, &blkno);
3927e7c17e43SMark Fasheh if (ret) {
3928e7c17e43SMark Fasheh mlog_errno(ret);
3929e7c17e43SMark Fasheh goto out;
3930e7c17e43SMark Fasheh }
3931e7c17e43SMark Fasheh
3932e7c17e43SMark Fasheh ret = ocfs2_read_dx_leaf(dir, blkno, &dx_leaf_bh);
3933e7c17e43SMark Fasheh if (ret) {
3934e7c17e43SMark Fasheh mlog_errno(ret);
3935e7c17e43SMark Fasheh goto out;
3936e7c17e43SMark Fasheh }
3937e7c17e43SMark Fasheh
3938e7c17e43SMark Fasheh dx_leaf = (struct ocfs2_dx_leaf *)dx_leaf_bh->b_data;
3939e7c17e43SMark Fasheh
3940e7c17e43SMark Fasheh if (le16_to_cpu(dx_leaf->dl_list.de_num_used) >=
3941e7c17e43SMark Fasheh le16_to_cpu(dx_leaf->dl_list.de_count)) {
3942e7c17e43SMark Fasheh if (rebalanced) {
3943e7c17e43SMark Fasheh /*
3944e7c17e43SMark Fasheh * Rebalancing should have provided us with
3945e7c17e43SMark Fasheh * space in an appropriate leaf.
3946e7c17e43SMark Fasheh *
3947e7c17e43SMark Fasheh * XXX: Is this an abnormal condition then?
3948e7c17e43SMark Fasheh * Should we print a message here?
3949e7c17e43SMark Fasheh */
3950e7c17e43SMark Fasheh ret = -ENOSPC;
3951e7c17e43SMark Fasheh goto out;
3952e7c17e43SMark Fasheh }
3953e7c17e43SMark Fasheh
3954e7c17e43SMark Fasheh ret = ocfs2_dx_dir_rebalance(osb, dir, dx_root_bh, dx_leaf_bh,
3955e7c17e43SMark Fasheh &lookup->dl_hinfo, leaf_cpos,
3956e7c17e43SMark Fasheh blkno);
3957e7c17e43SMark Fasheh if (ret) {
3958e7c17e43SMark Fasheh if (ret != -ENOSPC)
3959e7c17e43SMark Fasheh mlog_errno(ret);
3960e7c17e43SMark Fasheh goto out;
3961e7c17e43SMark Fasheh }
3962e7c17e43SMark Fasheh
3963e7c17e43SMark Fasheh /*
3964e7c17e43SMark Fasheh * Restart the lookup. The rebalance might have
3965e7c17e43SMark Fasheh * changed which block our item fits into. Mark our
3966e7c17e43SMark Fasheh * progress, so we only execute this once.
3967e7c17e43SMark Fasheh */
3968e7c17e43SMark Fasheh brelse(dx_leaf_bh);
3969e7c17e43SMark Fasheh dx_leaf_bh = NULL;
3970e7c17e43SMark Fasheh rebalanced = 1;
3971e7c17e43SMark Fasheh goto restart_search;
3972e7c17e43SMark Fasheh }
3973e7c17e43SMark Fasheh
3974e7c17e43SMark Fasheh lookup->dl_dx_leaf_bh = dx_leaf_bh;
3975e7c17e43SMark Fasheh dx_leaf_bh = NULL;
3976e7c17e43SMark Fasheh
3977e7c17e43SMark Fasheh out:
3978e7c17e43SMark Fasheh brelse(dx_leaf_bh);
3979e7c17e43SMark Fasheh return ret;
3980e7c17e43SMark Fasheh }
3981e7c17e43SMark Fasheh
ocfs2_search_dx_free_list(struct inode * dir,struct buffer_head * dx_root_bh,int namelen,struct ocfs2_dir_lookup_result * lookup)3982e7c17e43SMark Fasheh static int ocfs2_search_dx_free_list(struct inode *dir,
3983e7c17e43SMark Fasheh struct buffer_head *dx_root_bh,
3984e7c17e43SMark Fasheh int namelen,
3985e7c17e43SMark Fasheh struct ocfs2_dir_lookup_result *lookup)
3986e7c17e43SMark Fasheh {
3987e7c17e43SMark Fasheh int ret = -ENOSPC;
3988e7c17e43SMark Fasheh struct buffer_head *leaf_bh = NULL, *prev_leaf_bh = NULL;
3989e7c17e43SMark Fasheh struct ocfs2_dir_block_trailer *db;
3990e7c17e43SMark Fasheh u64 next_block;
3991e7c17e43SMark Fasheh int rec_len = OCFS2_DIR_REC_LEN(namelen);
3992e7c17e43SMark Fasheh struct ocfs2_dx_root_block *dx_root;
3993e7c17e43SMark Fasheh
3994e7c17e43SMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
3995e7c17e43SMark Fasheh next_block = le64_to_cpu(dx_root->dr_free_blk);
3996e7c17e43SMark Fasheh
3997e7c17e43SMark Fasheh while (next_block) {
3998e7c17e43SMark Fasheh brelse(prev_leaf_bh);
3999e7c17e43SMark Fasheh prev_leaf_bh = leaf_bh;
4000e7c17e43SMark Fasheh leaf_bh = NULL;
4001e7c17e43SMark Fasheh
4002e7c17e43SMark Fasheh ret = ocfs2_read_dir_block_direct(dir, next_block, &leaf_bh);
4003e7c17e43SMark Fasheh if (ret) {
4004e7c17e43SMark Fasheh mlog_errno(ret);
4005e7c17e43SMark Fasheh goto out;
4006e7c17e43SMark Fasheh }
4007e7c17e43SMark Fasheh
4008e7c17e43SMark Fasheh db = ocfs2_trailer_from_bh(leaf_bh, dir->i_sb);
4009e7c17e43SMark Fasheh if (rec_len <= le16_to_cpu(db->db_free_rec_len)) {
4010e7c17e43SMark Fasheh lookup->dl_leaf_bh = leaf_bh;
4011e7c17e43SMark Fasheh lookup->dl_prev_leaf_bh = prev_leaf_bh;
4012e7c17e43SMark Fasheh leaf_bh = NULL;
4013e7c17e43SMark Fasheh prev_leaf_bh = NULL;
4014e7c17e43SMark Fasheh break;
4015e7c17e43SMark Fasheh }
4016e7c17e43SMark Fasheh
4017e7c17e43SMark Fasheh next_block = le64_to_cpu(db->db_free_next);
4018e7c17e43SMark Fasheh }
4019e7c17e43SMark Fasheh
4020e7c17e43SMark Fasheh if (!next_block)
4021e7c17e43SMark Fasheh ret = -ENOSPC;
4022e7c17e43SMark Fasheh
4023e7c17e43SMark Fasheh out:
4024e7c17e43SMark Fasheh
4025e7c17e43SMark Fasheh brelse(leaf_bh);
4026e7c17e43SMark Fasheh brelse(prev_leaf_bh);
4027e7c17e43SMark Fasheh return ret;
4028e7c17e43SMark Fasheh }
4029e7c17e43SMark Fasheh
ocfs2_expand_inline_dx_root(struct inode * dir,struct buffer_head * dx_root_bh)40304ed8a6bbSMark Fasheh static int ocfs2_expand_inline_dx_root(struct inode *dir,
40314ed8a6bbSMark Fasheh struct buffer_head *dx_root_bh)
40324ed8a6bbSMark Fasheh {
40334ed8a6bbSMark Fasheh int ret, num_dx_leaves, i, j, did_quota = 0;
40344ed8a6bbSMark Fasheh struct buffer_head **dx_leaves = NULL;
40354ed8a6bbSMark Fasheh struct ocfs2_extent_tree et;
40364ed8a6bbSMark Fasheh u64 insert_blkno;
40374ed8a6bbSMark Fasheh struct ocfs2_alloc_context *data_ac = NULL;
40384ed8a6bbSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
40394ed8a6bbSMark Fasheh handle_t *handle = NULL;
40404ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root;
40414ed8a6bbSMark Fasheh struct ocfs2_dx_entry_list *entry_list;
40424ed8a6bbSMark Fasheh struct ocfs2_dx_entry *dx_entry;
40434ed8a6bbSMark Fasheh struct ocfs2_dx_leaf *target_leaf;
40444ed8a6bbSMark Fasheh
40454ed8a6bbSMark Fasheh ret = ocfs2_reserve_clusters(osb, 1, &data_ac);
40464ed8a6bbSMark Fasheh if (ret) {
40474ed8a6bbSMark Fasheh mlog_errno(ret);
40484ed8a6bbSMark Fasheh goto out;
40494ed8a6bbSMark Fasheh }
40504ed8a6bbSMark Fasheh
40514ed8a6bbSMark Fasheh dx_leaves = ocfs2_dx_dir_kmalloc_leaves(osb->sb, &num_dx_leaves);
40524ed8a6bbSMark Fasheh if (!dx_leaves) {
40534ed8a6bbSMark Fasheh ret = -ENOMEM;
40544ed8a6bbSMark Fasheh mlog_errno(ret);
40554ed8a6bbSMark Fasheh goto out;
40564ed8a6bbSMark Fasheh }
40574ed8a6bbSMark Fasheh
40584ed8a6bbSMark Fasheh handle = ocfs2_start_trans(osb, ocfs2_calc_dxi_expand_credits(osb->sb));
40594ed8a6bbSMark Fasheh if (IS_ERR(handle)) {
40604ed8a6bbSMark Fasheh ret = PTR_ERR(handle);
40614ed8a6bbSMark Fasheh mlog_errno(ret);
40624ed8a6bbSMark Fasheh goto out;
40634ed8a6bbSMark Fasheh }
40644ed8a6bbSMark Fasheh
40655dd4056dSChristoph Hellwig ret = dquot_alloc_space_nodirty(dir,
40665dd4056dSChristoph Hellwig ocfs2_clusters_to_bytes(osb->sb, 1));
40675dd4056dSChristoph Hellwig if (ret)
40684ed8a6bbSMark Fasheh goto out_commit;
40694ed8a6bbSMark Fasheh did_quota = 1;
40704ed8a6bbSMark Fasheh
40714ed8a6bbSMark Fasheh /*
40724ed8a6bbSMark Fasheh * We do this up front, before the allocation, so that a
40734ed8a6bbSMark Fasheh * failure to add the dx_root_bh to the journal won't result
40744ed8a6bbSMark Fasheh * us losing clusters.
40754ed8a6bbSMark Fasheh */
40760cf2f763SJoel Becker ret = ocfs2_journal_access_dr(handle, INODE_CACHE(dir), dx_root_bh,
40774ed8a6bbSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
40784ed8a6bbSMark Fasheh if (ret) {
40794ed8a6bbSMark Fasheh mlog_errno(ret);
40804ed8a6bbSMark Fasheh goto out_commit;
40814ed8a6bbSMark Fasheh }
40824ed8a6bbSMark Fasheh
40834ed8a6bbSMark Fasheh ret = __ocfs2_dx_dir_new_cluster(dir, 0, handle, data_ac, dx_leaves,
40844ed8a6bbSMark Fasheh num_dx_leaves, &insert_blkno);
40854ed8a6bbSMark Fasheh if (ret) {
40864ed8a6bbSMark Fasheh mlog_errno(ret);
40874ed8a6bbSMark Fasheh goto out_commit;
40884ed8a6bbSMark Fasheh }
40894ed8a6bbSMark Fasheh
40904ed8a6bbSMark Fasheh /*
40914ed8a6bbSMark Fasheh * Transfer the entries from our dx_root into the appropriate
40924ed8a6bbSMark Fasheh * block
40934ed8a6bbSMark Fasheh */
40944ed8a6bbSMark Fasheh dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
40954ed8a6bbSMark Fasheh entry_list = &dx_root->dr_entries;
40964ed8a6bbSMark Fasheh
40974ed8a6bbSMark Fasheh for (i = 0; i < le16_to_cpu(entry_list->de_num_used); i++) {
40984ed8a6bbSMark Fasheh dx_entry = &entry_list->de_entries[i];
40994ed8a6bbSMark Fasheh
41004ed8a6bbSMark Fasheh j = __ocfs2_dx_dir_hash_idx(osb,
41014ed8a6bbSMark Fasheh le32_to_cpu(dx_entry->dx_minor_hash));
41024ed8a6bbSMark Fasheh target_leaf = (struct ocfs2_dx_leaf *)dx_leaves[j]->b_data;
41034ed8a6bbSMark Fasheh
41044ed8a6bbSMark Fasheh ocfs2_dx_dir_leaf_insert_tail(target_leaf, dx_entry);
41054ed8a6bbSMark Fasheh
41064ed8a6bbSMark Fasheh /* Each leaf has been passed to the journal already
41074ed8a6bbSMark Fasheh * via __ocfs2_dx_dir_new_cluster() */
41084ed8a6bbSMark Fasheh }
41094ed8a6bbSMark Fasheh
41104ed8a6bbSMark Fasheh dx_root->dr_flags &= ~OCFS2_DX_FLAG_INLINE;
41114ed8a6bbSMark Fasheh memset(&dx_root->dr_list, 0, osb->sb->s_blocksize -
41124ed8a6bbSMark Fasheh offsetof(struct ocfs2_dx_root_block, dr_list));
41134ed8a6bbSMark Fasheh dx_root->dr_list.l_count =
41144ed8a6bbSMark Fasheh cpu_to_le16(ocfs2_extent_recs_per_dx_root(osb->sb));
41154ed8a6bbSMark Fasheh
41164ed8a6bbSMark Fasheh /* This should never fail considering we start with an empty
41174ed8a6bbSMark Fasheh * dx_root. */
41185e404e9eSJoel Becker ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
4119cc79d8c1SJoel Becker ret = ocfs2_insert_extent(handle, &et, 0, insert_blkno, 1, 0, NULL);
41204ed8a6bbSMark Fasheh if (ret)
41214ed8a6bbSMark Fasheh mlog_errno(ret);
41224ed8a6bbSMark Fasheh did_quota = 0;
41234ed8a6bbSMark Fasheh
41242931cdcbSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, dir, 1);
41254ed8a6bbSMark Fasheh ocfs2_journal_dirty(handle, dx_root_bh);
41264ed8a6bbSMark Fasheh
41274ed8a6bbSMark Fasheh out_commit:
41284ed8a6bbSMark Fasheh if (ret < 0 && did_quota)
41295dd4056dSChristoph Hellwig dquot_free_space_nodirty(dir,
41304ed8a6bbSMark Fasheh ocfs2_clusters_to_bytes(dir->i_sb, 1));
41314ed8a6bbSMark Fasheh
41324ed8a6bbSMark Fasheh ocfs2_commit_trans(osb, handle);
41334ed8a6bbSMark Fasheh
41344ed8a6bbSMark Fasheh out:
41354ed8a6bbSMark Fasheh if (data_ac)
41364ed8a6bbSMark Fasheh ocfs2_free_alloc_context(data_ac);
41374ed8a6bbSMark Fasheh
41384ed8a6bbSMark Fasheh if (dx_leaves) {
41394ed8a6bbSMark Fasheh for (i = 0; i < num_dx_leaves; i++)
41404ed8a6bbSMark Fasheh brelse(dx_leaves[i]);
41414ed8a6bbSMark Fasheh kfree(dx_leaves);
41424ed8a6bbSMark Fasheh }
41434ed8a6bbSMark Fasheh return ret;
41444ed8a6bbSMark Fasheh }
41454ed8a6bbSMark Fasheh
ocfs2_inline_dx_has_space(struct buffer_head * dx_root_bh)41464ed8a6bbSMark Fasheh static int ocfs2_inline_dx_has_space(struct buffer_head *dx_root_bh)
41474ed8a6bbSMark Fasheh {
41484ed8a6bbSMark Fasheh struct ocfs2_dx_root_block *dx_root;
41494ed8a6bbSMark Fasheh struct ocfs2_dx_entry_list *entry_list;
41504ed8a6bbSMark Fasheh
41514ed8a6bbSMark Fasheh dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
41524ed8a6bbSMark Fasheh entry_list = &dx_root->dr_entries;
41534ed8a6bbSMark Fasheh
41544ed8a6bbSMark Fasheh if (le16_to_cpu(entry_list->de_num_used) >=
41554ed8a6bbSMark Fasheh le16_to_cpu(entry_list->de_count))
41564ed8a6bbSMark Fasheh return -ENOSPC;
41574ed8a6bbSMark Fasheh
41584ed8a6bbSMark Fasheh return 0;
41594ed8a6bbSMark Fasheh }
41604ed8a6bbSMark Fasheh
ocfs2_prepare_dx_dir_for_insert(struct inode * dir,struct buffer_head * di_bh,const char * name,int namelen,struct ocfs2_dir_lookup_result * lookup)4161e7c17e43SMark Fasheh static int ocfs2_prepare_dx_dir_for_insert(struct inode *dir,
4162e7c17e43SMark Fasheh struct buffer_head *di_bh,
4163e7c17e43SMark Fasheh const char *name,
41649b7895efSMark Fasheh int namelen,
41659b7895efSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
41669b7895efSMark Fasheh {
4167e7c17e43SMark Fasheh int ret, free_dx_root = 1;
4168e7c17e43SMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
41699b7895efSMark Fasheh struct buffer_head *dx_root_bh = NULL;
4170e7c17e43SMark Fasheh struct buffer_head *leaf_bh = NULL;
41719b7895efSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
4172e7c17e43SMark Fasheh struct ocfs2_dx_root_block *dx_root;
41739b7895efSMark Fasheh
41749b7895efSMark Fasheh ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
41759b7895efSMark Fasheh if (ret) {
41769b7895efSMark Fasheh mlog_errno(ret);
41779b7895efSMark Fasheh goto out;
41789b7895efSMark Fasheh }
41799b7895efSMark Fasheh
41809b7895efSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
4181e3a93c2dSMark Fasheh if (le32_to_cpu(dx_root->dr_num_entries) == OCFS2_DX_ENTRIES_MAX) {
4182e3a93c2dSMark Fasheh ret = -ENOSPC;
4183e3a93c2dSMark Fasheh mlog_errno(ret);
4184e3a93c2dSMark Fasheh goto out;
4185e3a93c2dSMark Fasheh }
4186e3a93c2dSMark Fasheh
41874ed8a6bbSMark Fasheh if (ocfs2_dx_root_inline(dx_root)) {
41884ed8a6bbSMark Fasheh ret = ocfs2_inline_dx_has_space(dx_root_bh);
41894ed8a6bbSMark Fasheh
41904ed8a6bbSMark Fasheh if (ret == 0)
41914ed8a6bbSMark Fasheh goto search_el;
41924ed8a6bbSMark Fasheh
41934ed8a6bbSMark Fasheh /*
41944ed8a6bbSMark Fasheh * We ran out of room in the root block. Expand it to
41954ed8a6bbSMark Fasheh * an extent, then allow ocfs2_find_dir_space_dx to do
41964ed8a6bbSMark Fasheh * the rest.
41974ed8a6bbSMark Fasheh */
41984ed8a6bbSMark Fasheh ret = ocfs2_expand_inline_dx_root(dir, dx_root_bh);
41994ed8a6bbSMark Fasheh if (ret) {
42004ed8a6bbSMark Fasheh mlog_errno(ret);
42014ed8a6bbSMark Fasheh goto out;
42024ed8a6bbSMark Fasheh }
42034ed8a6bbSMark Fasheh }
42049b7895efSMark Fasheh
42059b7895efSMark Fasheh /*
4206e7c17e43SMark Fasheh * Insert preparation for an indexed directory is split into two
4207e7c17e43SMark Fasheh * steps. The call to find_dir_space_dx reserves room in the index for
4208e7c17e43SMark Fasheh * an additional item. If we run out of space there, it's a real error
4209e7c17e43SMark Fasheh * we can't continue on.
42109b7895efSMark Fasheh */
4211e7c17e43SMark Fasheh ret = ocfs2_find_dir_space_dx(osb, dir, di_bh, dx_root_bh, name,
4212e7c17e43SMark Fasheh namelen, lookup);
42139b7895efSMark Fasheh if (ret) {
42149b7895efSMark Fasheh mlog_errno(ret);
42159b7895efSMark Fasheh goto out;
42169b7895efSMark Fasheh }
42179b7895efSMark Fasheh
42184ed8a6bbSMark Fasheh search_el:
4219e7c17e43SMark Fasheh /*
4220e7c17e43SMark Fasheh * Next, we need to find space in the unindexed tree. This call
4221e7c17e43SMark Fasheh * searches using the free space linked list. If the unindexed tree
4222e7c17e43SMark Fasheh * lacks sufficient space, we'll expand it below. The expansion code
4223e7c17e43SMark Fasheh * is smart enough to add any new blocks to the free space list.
4224e7c17e43SMark Fasheh */
4225e7c17e43SMark Fasheh ret = ocfs2_search_dx_free_list(dir, dx_root_bh, namelen, lookup);
4226e7c17e43SMark Fasheh if (ret && ret != -ENOSPC) {
4227e7c17e43SMark Fasheh mlog_errno(ret);
4228e7c17e43SMark Fasheh goto out;
4229e7c17e43SMark Fasheh }
4230e7c17e43SMark Fasheh
4231e7c17e43SMark Fasheh /* Do this up here - ocfs2_extend_dir might need the dx_root */
42324ed8a6bbSMark Fasheh lookup->dl_dx_root_bh = dx_root_bh;
4233e7c17e43SMark Fasheh free_dx_root = 0;
4234e7c17e43SMark Fasheh
4235e7c17e43SMark Fasheh if (ret == -ENOSPC) {
4236e7c17e43SMark Fasheh ret = ocfs2_extend_dir(osb, dir, di_bh, 1, lookup, &leaf_bh);
4237e7c17e43SMark Fasheh
4238e7c17e43SMark Fasheh if (ret) {
4239e7c17e43SMark Fasheh mlog_errno(ret);
4240e7c17e43SMark Fasheh goto out;
4241e7c17e43SMark Fasheh }
4242e7c17e43SMark Fasheh
4243e7c17e43SMark Fasheh /*
4244e7c17e43SMark Fasheh * We make the assumption here that new leaf blocks are added
4245e7c17e43SMark Fasheh * to the front of our free list.
4246e7c17e43SMark Fasheh */
4247e7c17e43SMark Fasheh lookup->dl_prev_leaf_bh = NULL;
4248e7c17e43SMark Fasheh lookup->dl_leaf_bh = leaf_bh;
4249e7c17e43SMark Fasheh }
42509b7895efSMark Fasheh
42519b7895efSMark Fasheh out:
4252e7c17e43SMark Fasheh if (free_dx_root)
42539b7895efSMark Fasheh brelse(dx_root_bh);
42549b7895efSMark Fasheh return ret;
42559b7895efSMark Fasheh }
42569b7895efSMark Fasheh
42574a12ca3aSMark Fasheh /*
42584a12ca3aSMark Fasheh * Get a directory ready for insert. Any directory allocation required
42594a12ca3aSMark Fasheh * happens here. Success returns zero, and enough context in the dir
42604a12ca3aSMark Fasheh * lookup result that ocfs2_add_entry() will be able complete the task
42614a12ca3aSMark Fasheh * with minimal performance impact.
42624a12ca3aSMark Fasheh */
ocfs2_prepare_dir_for_insert(struct ocfs2_super * osb,struct inode * dir,struct buffer_head * parent_fe_bh,const char * name,int namelen,struct ocfs2_dir_lookup_result * lookup)42635b6a3a2bSMark Fasheh int ocfs2_prepare_dir_for_insert(struct ocfs2_super *osb,
42645b6a3a2bSMark Fasheh struct inode *dir,
42655b6a3a2bSMark Fasheh struct buffer_head *parent_fe_bh,
42665b6a3a2bSMark Fasheh const char *name,
42675b6a3a2bSMark Fasheh int namelen,
42684a12ca3aSMark Fasheh struct ocfs2_dir_lookup_result *lookup)
42695b6a3a2bSMark Fasheh {
42705b6a3a2bSMark Fasheh int ret;
42715b6a3a2bSMark Fasheh unsigned int blocks_wanted = 1;
42725b6a3a2bSMark Fasheh struct buffer_head *bh = NULL;
42735b6a3a2bSMark Fasheh
4274f1088d47STao Ma trace_ocfs2_prepare_dir_for_insert(
4275f1088d47STao Ma (unsigned long long)OCFS2_I(dir)->ip_blkno, namelen);
42765b6a3a2bSMark Fasheh
42775b6a3a2bSMark Fasheh if (!namelen) {
42785b6a3a2bSMark Fasheh ret = -EINVAL;
42795b6a3a2bSMark Fasheh mlog_errno(ret);
42805b6a3a2bSMark Fasheh goto out;
42815b6a3a2bSMark Fasheh }
42825b6a3a2bSMark Fasheh
42839b7895efSMark Fasheh /*
42849b7895efSMark Fasheh * Do this up front to reduce confusion.
42859b7895efSMark Fasheh *
42869b7895efSMark Fasheh * The directory might start inline, then be turned into an
42879b7895efSMark Fasheh * indexed one, in which case we'd need to hash deep inside
42889b7895efSMark Fasheh * ocfs2_find_dir_space_id(). Since
42899b7895efSMark Fasheh * ocfs2_prepare_dx_dir_for_insert() also needs this hash
42909b7895efSMark Fasheh * done, there seems no point in spreading out the calls. We
42919b7895efSMark Fasheh * can optimize away the case where the file system doesn't
42929b7895efSMark Fasheh * support indexing.
42939b7895efSMark Fasheh */
42949b7895efSMark Fasheh if (ocfs2_supports_indexed_dirs(osb))
42959b7895efSMark Fasheh ocfs2_dx_dir_name_hash(dir, name, namelen, &lookup->dl_hinfo);
42969b7895efSMark Fasheh
42979b7895efSMark Fasheh if (ocfs2_dir_indexed(dir)) {
4298e7c17e43SMark Fasheh ret = ocfs2_prepare_dx_dir_for_insert(dir, parent_fe_bh,
4299e7c17e43SMark Fasheh name, namelen, lookup);
4300e7c17e43SMark Fasheh if (ret)
43019b7895efSMark Fasheh mlog_errno(ret);
43029b7895efSMark Fasheh goto out;
43039b7895efSMark Fasheh }
43049b7895efSMark Fasheh
43055b6a3a2bSMark Fasheh if (OCFS2_I(dir)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
43065b6a3a2bSMark Fasheh ret = ocfs2_find_dir_space_id(dir, parent_fe_bh, name,
43075b6a3a2bSMark Fasheh namelen, &bh, &blocks_wanted);
43085b6a3a2bSMark Fasheh } else
43095b6a3a2bSMark Fasheh ret = ocfs2_find_dir_space_el(dir, name, namelen, &bh);
43105b6a3a2bSMark Fasheh
43115b6a3a2bSMark Fasheh if (ret && ret != -ENOSPC) {
43125b6a3a2bSMark Fasheh mlog_errno(ret);
43135b6a3a2bSMark Fasheh goto out;
43145b6a3a2bSMark Fasheh }
43155b6a3a2bSMark Fasheh
43165b6a3a2bSMark Fasheh if (ret == -ENOSPC) {
43175b6a3a2bSMark Fasheh /*
43185b6a3a2bSMark Fasheh * We have to expand the directory to add this name.
43195b6a3a2bSMark Fasheh */
43205b6a3a2bSMark Fasheh BUG_ON(bh);
43215b6a3a2bSMark Fasheh
43225b6a3a2bSMark Fasheh ret = ocfs2_extend_dir(osb, dir, parent_fe_bh, blocks_wanted,
43239b7895efSMark Fasheh lookup, &bh);
43245b6a3a2bSMark Fasheh if (ret) {
43255b6a3a2bSMark Fasheh if (ret != -ENOSPC)
43265b6a3a2bSMark Fasheh mlog_errno(ret);
43275b6a3a2bSMark Fasheh goto out;
43285b6a3a2bSMark Fasheh }
43295b6a3a2bSMark Fasheh
43305b6a3a2bSMark Fasheh BUG_ON(!bh);
43315b6a3a2bSMark Fasheh }
43325b6a3a2bSMark Fasheh
43334a12ca3aSMark Fasheh lookup->dl_leaf_bh = bh;
43345b6a3a2bSMark Fasheh bh = NULL;
43355b6a3a2bSMark Fasheh out:
43365b6a3a2bSMark Fasheh brelse(bh);
43375b6a3a2bSMark Fasheh return ret;
43385b6a3a2bSMark Fasheh }
43399b7895efSMark Fasheh
ocfs2_dx_dir_remove_index(struct inode * dir,struct buffer_head * di_bh,struct buffer_head * dx_root_bh)43409b7895efSMark Fasheh static int ocfs2_dx_dir_remove_index(struct inode *dir,
43419b7895efSMark Fasheh struct buffer_head *di_bh,
43429b7895efSMark Fasheh struct buffer_head *dx_root_bh)
43439b7895efSMark Fasheh {
43449b7895efSMark Fasheh int ret;
43459b7895efSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
43469b7895efSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
43479b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root;
43489b7895efSMark Fasheh struct inode *dx_alloc_inode = NULL;
43499b7895efSMark Fasheh struct buffer_head *dx_alloc_bh = NULL;
43509b7895efSMark Fasheh handle_t *handle;
43519b7895efSMark Fasheh u64 blk;
43529b7895efSMark Fasheh u16 bit;
43539b7895efSMark Fasheh u64 bg_blkno;
43549b7895efSMark Fasheh
43559b7895efSMark Fasheh dx_root = (struct ocfs2_dx_root_block *) dx_root_bh->b_data;
43569b7895efSMark Fasheh
43579b7895efSMark Fasheh dx_alloc_inode = ocfs2_get_system_file_inode(osb,
43589b7895efSMark Fasheh EXTENT_ALLOC_SYSTEM_INODE,
43599b7895efSMark Fasheh le16_to_cpu(dx_root->dr_suballoc_slot));
43609b7895efSMark Fasheh if (!dx_alloc_inode) {
43619b7895efSMark Fasheh ret = -ENOMEM;
43629b7895efSMark Fasheh mlog_errno(ret);
43639b7895efSMark Fasheh goto out;
43649b7895efSMark Fasheh }
43655955102cSAl Viro inode_lock(dx_alloc_inode);
43669b7895efSMark Fasheh
43679b7895efSMark Fasheh ret = ocfs2_inode_lock(dx_alloc_inode, &dx_alloc_bh, 1);
43689b7895efSMark Fasheh if (ret) {
43699b7895efSMark Fasheh mlog_errno(ret);
43709b7895efSMark Fasheh goto out_mutex;
43719b7895efSMark Fasheh }
43729b7895efSMark Fasheh
43739b7895efSMark Fasheh handle = ocfs2_start_trans(osb, OCFS2_DX_ROOT_REMOVE_CREDITS);
43749b7895efSMark Fasheh if (IS_ERR(handle)) {
43759b7895efSMark Fasheh ret = PTR_ERR(handle);
43769b7895efSMark Fasheh mlog_errno(ret);
43779b7895efSMark Fasheh goto out_unlock;
43789b7895efSMark Fasheh }
43799b7895efSMark Fasheh
43800cf2f763SJoel Becker ret = ocfs2_journal_access_di(handle, INODE_CACHE(dir), di_bh,
43819b7895efSMark Fasheh OCFS2_JOURNAL_ACCESS_WRITE);
43829b7895efSMark Fasheh if (ret) {
43839b7895efSMark Fasheh mlog_errno(ret);
43849b7895efSMark Fasheh goto out_commit;
43859b7895efSMark Fasheh }
43869b7895efSMark Fasheh
43878ac33dc8STao Ma spin_lock(&OCFS2_I(dir)->ip_lock);
43889b7895efSMark Fasheh OCFS2_I(dir)->ip_dyn_features &= ~OCFS2_INDEXED_DIR_FL;
43899b7895efSMark Fasheh di->i_dyn_features = cpu_to_le16(OCFS2_I(dir)->ip_dyn_features);
43908ac33dc8STao Ma spin_unlock(&OCFS2_I(dir)->ip_lock);
43919b7895efSMark Fasheh di->i_dx_root = cpu_to_le64(0ULL);
43926fdb702dSDarrick J. Wong ocfs2_update_inode_fsync_trans(handle, dir, 1);
43939b7895efSMark Fasheh
43949b7895efSMark Fasheh ocfs2_journal_dirty(handle, di_bh);
43959b7895efSMark Fasheh
43969b7895efSMark Fasheh blk = le64_to_cpu(dx_root->dr_blkno);
43979b7895efSMark Fasheh bit = le16_to_cpu(dx_root->dr_suballoc_bit);
439874380c47STao Ma if (dx_root->dr_suballoc_loc)
439974380c47STao Ma bg_blkno = le64_to_cpu(dx_root->dr_suballoc_loc);
440074380c47STao Ma else
44019b7895efSMark Fasheh bg_blkno = ocfs2_which_suballoc_group(blk, bit);
44029b7895efSMark Fasheh ret = ocfs2_free_suballoc_bits(handle, dx_alloc_inode, dx_alloc_bh,
44039b7895efSMark Fasheh bit, bg_blkno, 1);
44049b7895efSMark Fasheh if (ret)
44059b7895efSMark Fasheh mlog_errno(ret);
44069b7895efSMark Fasheh
44079b7895efSMark Fasheh out_commit:
44089b7895efSMark Fasheh ocfs2_commit_trans(osb, handle);
44099b7895efSMark Fasheh
44109b7895efSMark Fasheh out_unlock:
44119b7895efSMark Fasheh ocfs2_inode_unlock(dx_alloc_inode, 1);
44129b7895efSMark Fasheh
44139b7895efSMark Fasheh out_mutex:
44145955102cSAl Viro inode_unlock(dx_alloc_inode);
44159b7895efSMark Fasheh brelse(dx_alloc_bh);
44169b7895efSMark Fasheh out:
44179b7895efSMark Fasheh iput(dx_alloc_inode);
44189b7895efSMark Fasheh return ret;
44199b7895efSMark Fasheh }
44209b7895efSMark Fasheh
ocfs2_dx_dir_truncate(struct inode * dir,struct buffer_head * di_bh)44219b7895efSMark Fasheh int ocfs2_dx_dir_truncate(struct inode *dir, struct buffer_head *di_bh)
44229b7895efSMark Fasheh {
44239b7895efSMark Fasheh int ret;
44243f649ab7SKees Cook unsigned int clen;
44253f649ab7SKees Cook u32 major_hash = UINT_MAX, p_cpos, cpos;
44263f649ab7SKees Cook u64 blkno;
44279b7895efSMark Fasheh struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
44289b7895efSMark Fasheh struct buffer_head *dx_root_bh = NULL;
44299b7895efSMark Fasheh struct ocfs2_dx_root_block *dx_root;
44309b7895efSMark Fasheh struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
44319b7895efSMark Fasheh struct ocfs2_cached_dealloc_ctxt dealloc;
44329b7895efSMark Fasheh struct ocfs2_extent_tree et;
44339b7895efSMark Fasheh
44349b7895efSMark Fasheh ocfs2_init_dealloc_ctxt(&dealloc);
44359b7895efSMark Fasheh
44369b7895efSMark Fasheh if (!ocfs2_dir_indexed(dir))
44379b7895efSMark Fasheh return 0;
44389b7895efSMark Fasheh
44399b7895efSMark Fasheh ret = ocfs2_read_dx_root(dir, di, &dx_root_bh);
44409b7895efSMark Fasheh if (ret) {
44419b7895efSMark Fasheh mlog_errno(ret);
44429b7895efSMark Fasheh goto out;
44439b7895efSMark Fasheh }
44444ed8a6bbSMark Fasheh dx_root = (struct ocfs2_dx_root_block *)dx_root_bh->b_data;
44454ed8a6bbSMark Fasheh
44464ed8a6bbSMark Fasheh if (ocfs2_dx_root_inline(dx_root))
44474ed8a6bbSMark Fasheh goto remove_index;
44489b7895efSMark Fasheh
44495e404e9eSJoel Becker ocfs2_init_dx_root_extent_tree(&et, INODE_CACHE(dir), dx_root_bh);
44509b7895efSMark Fasheh
44519b7895efSMark Fasheh /* XXX: What if dr_clusters is too large? */
44529b7895efSMark Fasheh while (le32_to_cpu(dx_root->dr_clusters)) {
44539b7895efSMark Fasheh ret = ocfs2_dx_dir_lookup_rec(dir, &dx_root->dr_list,
44549b7895efSMark Fasheh major_hash, &cpos, &blkno, &clen);
44559b7895efSMark Fasheh if (ret) {
44569b7895efSMark Fasheh mlog_errno(ret);
44579b7895efSMark Fasheh goto out;
44589b7895efSMark Fasheh }
44599b7895efSMark Fasheh
44609b7895efSMark Fasheh p_cpos = ocfs2_blocks_to_clusters(dir->i_sb, blkno);
44619b7895efSMark Fasheh
446278f94673STristan Ye ret = ocfs2_remove_btree_range(dir, &et, cpos, p_cpos, clen, 0,
4463f62f12b3SJunxiao Bi &dealloc, 0, false);
44649b7895efSMark Fasheh if (ret) {
44659b7895efSMark Fasheh mlog_errno(ret);
44669b7895efSMark Fasheh goto out;
44679b7895efSMark Fasheh }
44689b7895efSMark Fasheh
44699b7895efSMark Fasheh if (cpos == 0)
44709b7895efSMark Fasheh break;
44719b7895efSMark Fasheh
44729b7895efSMark Fasheh major_hash = cpos - 1;
44739b7895efSMark Fasheh }
44749b7895efSMark Fasheh
44754ed8a6bbSMark Fasheh remove_index:
44769b7895efSMark Fasheh ret = ocfs2_dx_dir_remove_index(dir, di_bh, dx_root_bh);
44779b7895efSMark Fasheh if (ret) {
44789b7895efSMark Fasheh mlog_errno(ret);
44799b7895efSMark Fasheh goto out;
44809b7895efSMark Fasheh }
44819b7895efSMark Fasheh
44828cb471e8SJoel Becker ocfs2_remove_from_cache(INODE_CACHE(dir), dx_root_bh);
44839b7895efSMark Fasheh out:
44849b7895efSMark Fasheh ocfs2_schedule_truncate_log_flush(osb, 1);
44859b7895efSMark Fasheh ocfs2_run_deallocs(osb, &dealloc);
44869b7895efSMark Fasheh
44879b7895efSMark Fasheh brelse(dx_root_bh);
44889b7895efSMark Fasheh return ret;
44899b7895efSMark Fasheh }
4490