xref: /openbmc/linux/fs/ocfs2/namei.c (revision 606d099c)
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * namei.c
5  *
6  * Create and rename file, directory, symlinks
7  *
8  * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
9  *
10  *  Portions of this code from linux/fs/ext3/dir.c
11  *
12  *  Copyright (C) 1992, 1993, 1994, 1995
13  *  Remy Card (card@masi.ibp.fr)
14  *  Laboratoire MASI - Institut Blaise pascal
15  *  Universite Pierre et Marie Curie (Paris VI)
16  *
17  *   from
18  *
19  *   linux/fs/minix/dir.c
20  *
21  *   Copyright (C) 1991, 1992 Linux Torvalds
22  *
23  * This program is free software; you can redistribute it and/or
24  * modify it under the terms of the GNU General Public
25  * License as published by the Free Software Foundation; either
26  * version 2 of the License, or (at your option) any later version.
27  *
28  * This program is distributed in the hope that it will be useful,
29  * but WITHOUT ANY WARRANTY; without even the implied warranty of
30  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
31  * General Public License for more details.
32  *
33  * You should have received a copy of the GNU General Public
34  * License along with this program; if not, write to the
35  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
36  * Boston, MA 021110-1307, USA.
37  */
38 
39 #include <linux/fs.h>
40 #include <linux/types.h>
41 #include <linux/slab.h>
42 #include <linux/highmem.h>
43 
44 #define MLOG_MASK_PREFIX ML_NAMEI
45 #include <cluster/masklog.h>
46 
47 #include "ocfs2.h"
48 
49 #include "alloc.h"
50 #include "dcache.h"
51 #include "dir.h"
52 #include "dlmglue.h"
53 #include "extent_map.h"
54 #include "file.h"
55 #include "inode.h"
56 #include "journal.h"
57 #include "namei.h"
58 #include "suballoc.h"
59 #include "super.h"
60 #include "symlink.h"
61 #include "sysfile.h"
62 #include "uptodate.h"
63 #include "vote.h"
64 
65 #include "buffer_head_io.h"
66 
67 #define NAMEI_RA_CHUNKS  2
68 #define NAMEI_RA_BLOCKS  4
69 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
70 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
71 
72 static int inline ocfs2_search_dirblock(struct buffer_head *bh,
73 					struct inode *dir,
74 					const char *name, int namelen,
75 					unsigned long offset,
76 					struct ocfs2_dir_entry **res_dir);
77 
78 static int ocfs2_delete_entry(handle_t *handle,
79 			      struct inode *dir,
80 			      struct ocfs2_dir_entry *de_del,
81 			      struct buffer_head *bh);
82 
83 static int __ocfs2_add_entry(handle_t *handle,
84 			     struct inode *dir,
85 			     const char *name, int namelen,
86 			     struct inode *inode, u64 blkno,
87 			     struct buffer_head *parent_fe_bh,
88 			     struct buffer_head *insert_bh);
89 
90 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
91 			      struct inode *dir,
92 			      struct dentry *dentry, int mode,
93 			      dev_t dev,
94 			      struct buffer_head **new_fe_bh,
95 			      struct buffer_head *parent_fe_bh,
96 			      handle_t *handle,
97 			      struct inode **ret_inode,
98 			      struct ocfs2_alloc_context *inode_ac);
99 
100 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
101 			      handle_t *handle,
102 			      struct inode *parent,
103 			      struct inode *inode,
104 			      struct buffer_head *fe_bh,
105 			      struct ocfs2_alloc_context *data_ac);
106 
107 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
108 				    struct inode **ret_orphan_dir,
109 				    struct inode *inode,
110 				    char *name,
111 				    struct buffer_head **de_bh);
112 
113 static int ocfs2_orphan_add(struct ocfs2_super *osb,
114 			    handle_t *handle,
115 			    struct inode *inode,
116 			    struct ocfs2_dinode *fe,
117 			    char *name,
118 			    struct buffer_head *de_bh,
119 			    struct inode *orphan_dir_inode);
120 
121 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
122 				     handle_t *handle,
123 				     struct inode *inode,
124 				     const char *symname);
125 
126 static inline int ocfs2_add_entry(handle_t *handle,
127 				  struct dentry *dentry,
128 				  struct inode *inode, u64 blkno,
129 				  struct buffer_head *parent_fe_bh,
130 				  struct buffer_head *insert_bh)
131 {
132 	return __ocfs2_add_entry(handle, dentry->d_parent->d_inode,
133 				 dentry->d_name.name, dentry->d_name.len,
134 				 inode, blkno, parent_fe_bh, insert_bh);
135 }
136 
137 /* An orphan dir name is an 8 byte value, printed as a hex string */
138 #define OCFS2_ORPHAN_NAMELEN ((int)(2 * sizeof(u64)))
139 
140 static struct dentry *ocfs2_lookup(struct inode *dir, struct dentry *dentry,
141 				   struct nameidata *nd)
142 {
143 	int status;
144 	u64 blkno;
145 	struct buffer_head *dirent_bh = NULL;
146 	struct inode *inode = NULL;
147 	struct dentry *ret;
148 	struct ocfs2_dir_entry *dirent;
149 	struct ocfs2_inode_info *oi;
150 
151 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
152 		   dentry->d_name.len, dentry->d_name.name);
153 
154 	if (dentry->d_name.len > OCFS2_MAX_FILENAME_LEN) {
155 		ret = ERR_PTR(-ENAMETOOLONG);
156 		goto bail;
157 	}
158 
159 	mlog(0, "find name %.*s in directory %llu\n", dentry->d_name.len,
160 	     dentry->d_name.name, (unsigned long long)OCFS2_I(dir)->ip_blkno);
161 
162 	status = ocfs2_meta_lock(dir, NULL, 0);
163 	if (status < 0) {
164 		if (status != -ENOENT)
165 			mlog_errno(status);
166 		ret = ERR_PTR(status);
167 		goto bail;
168 	}
169 
170 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
171 					  dentry->d_name.len, &blkno,
172 					  dir, &dirent_bh, &dirent);
173 	if (status < 0)
174 		goto bail_add;
175 
176 	inode = ocfs2_iget(OCFS2_SB(dir->i_sb), blkno, 0);
177 	if (IS_ERR(inode)) {
178 		mlog(ML_ERROR, "Unable to create inode %llu\n",
179 		     (unsigned long long)blkno);
180 		ret = ERR_PTR(-EACCES);
181 		goto bail_unlock;
182 	}
183 
184 	oi = OCFS2_I(inode);
185 	/* Clear any orphaned state... If we were able to look up the
186 	 * inode from a directory, it certainly can't be orphaned. We
187 	 * might have the bad state from a node which intended to
188 	 * orphan this inode but crashed before it could commit the
189 	 * unlink. */
190 	spin_lock(&oi->ip_lock);
191 	oi->ip_flags &= ~OCFS2_INODE_MAYBE_ORPHANED;
192 	oi->ip_orphaned_slot = OCFS2_INVALID_SLOT;
193 	spin_unlock(&oi->ip_lock);
194 
195 bail_add:
196 	dentry->d_op = &ocfs2_dentry_ops;
197 	ret = d_splice_alias(inode, dentry);
198 
199 	if (inode) {
200 		/*
201 		 * If d_splice_alias() finds a DCACHE_DISCONNECTED
202 		 * dentry, it will d_move() it on top of ourse. The
203 		 * return value will indicate this however, so in
204 		 * those cases, we switch them around for the locking
205 		 * code.
206 		 *
207 		 * NOTE: This dentry already has ->d_op set from
208 		 * ocfs2_get_parent() and ocfs2_get_dentry()
209 		 */
210 		if (ret)
211 			dentry = ret;
212 
213 		status = ocfs2_dentry_attach_lock(dentry, inode,
214 						  OCFS2_I(dir)->ip_blkno);
215 		if (status) {
216 			mlog_errno(status);
217 			ret = ERR_PTR(status);
218 			goto bail_unlock;
219 		}
220 	}
221 
222 bail_unlock:
223 	/* Don't drop the cluster lock until *after* the d_add --
224 	 * unlink on another node will message us to remove that
225 	 * dentry under this lock so otherwise we can race this with
226 	 * the vote thread and have a stale dentry. */
227 	ocfs2_meta_unlock(dir, 0);
228 
229 bail:
230 	if (dirent_bh)
231 		brelse(dirent_bh);
232 
233 	mlog_exit_ptr(ret);
234 
235 	return ret;
236 }
237 
238 static int ocfs2_fill_new_dir(struct ocfs2_super *osb,
239 			      handle_t *handle,
240 			      struct inode *parent,
241 			      struct inode *inode,
242 			      struct buffer_head *fe_bh,
243 			      struct ocfs2_alloc_context *data_ac)
244 {
245 	int status;
246 	struct buffer_head *new_bh = NULL;
247 	struct ocfs2_dir_entry *de = NULL;
248 
249 	mlog_entry_void();
250 
251 	status = ocfs2_do_extend_dir(osb->sb, handle, inode, fe_bh,
252 				     data_ac, NULL, &new_bh);
253 	if (status < 0) {
254 		mlog_errno(status);
255 		goto bail;
256 	}
257 
258 	ocfs2_set_new_buffer_uptodate(inode, new_bh);
259 
260 	status = ocfs2_journal_access(handle, inode, new_bh,
261 				      OCFS2_JOURNAL_ACCESS_CREATE);
262 	if (status < 0) {
263 		mlog_errno(status);
264 		goto bail;
265 	}
266 	memset(new_bh->b_data, 0, osb->sb->s_blocksize);
267 
268 	de = (struct ocfs2_dir_entry *) new_bh->b_data;
269 	de->inode = cpu_to_le64(OCFS2_I(inode)->ip_blkno);
270 	de->name_len = 1;
271 	de->rec_len =
272 		cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
273 	strcpy(de->name, ".");
274 	ocfs2_set_de_type(de, S_IFDIR);
275 	de = (struct ocfs2_dir_entry *) ((char *)de + le16_to_cpu(de->rec_len));
276 	de->inode = cpu_to_le64(OCFS2_I(parent)->ip_blkno);
277 	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize -
278 				  OCFS2_DIR_REC_LEN(1));
279 	de->name_len = 2;
280 	strcpy(de->name, "..");
281 	ocfs2_set_de_type(de, S_IFDIR);
282 
283 	status = ocfs2_journal_dirty(handle, new_bh);
284 	if (status < 0) {
285 		mlog_errno(status);
286 		goto bail;
287 	}
288 
289 	i_size_write(inode, inode->i_sb->s_blocksize);
290 	inode->i_nlink = 2;
291 	inode->i_blocks = ocfs2_align_bytes_to_sectors(inode->i_sb->s_blocksize);
292 	status = ocfs2_mark_inode_dirty(handle, inode, fe_bh);
293 	if (status < 0) {
294 		mlog_errno(status);
295 		goto bail;
296 	}
297 
298 	status = 0;
299 bail:
300 	if (new_bh)
301 		brelse(new_bh);
302 
303 	mlog_exit(status);
304 	return status;
305 }
306 
307 static int ocfs2_mknod(struct inode *dir,
308 		       struct dentry *dentry,
309 		       int mode,
310 		       dev_t dev)
311 {
312 	int status = 0;
313 	struct buffer_head *parent_fe_bh = NULL;
314 	handle_t *handle = NULL;
315 	struct ocfs2_super *osb;
316 	struct ocfs2_dinode *dirfe;
317 	struct buffer_head *new_fe_bh = NULL;
318 	struct buffer_head *de_bh = NULL;
319 	struct inode *inode = NULL;
320 	struct ocfs2_alloc_context *inode_ac = NULL;
321 	struct ocfs2_alloc_context *data_ac = NULL;
322 
323 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
324 		   (unsigned long)dev, dentry->d_name.len,
325 		   dentry->d_name.name);
326 
327 	/* get our super block */
328 	osb = OCFS2_SB(dir->i_sb);
329 
330 	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
331 	if (status < 0) {
332 		if (status != -ENOENT)
333 			mlog_errno(status);
334 		return status;
335 	}
336 
337 	if (S_ISDIR(mode) && (dir->i_nlink >= OCFS2_LINK_MAX)) {
338 		status = -EMLINK;
339 		goto leave;
340 	}
341 
342 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
343 	if (!dirfe->i_links_count) {
344 		/* can't make a file in a deleted directory. */
345 		status = -ENOENT;
346 		goto leave;
347 	}
348 
349 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
350 					   dentry->d_name.len);
351 	if (status)
352 		goto leave;
353 
354 	/* get a spot inside the dir. */
355 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
356 					      dentry->d_name.name,
357 					      dentry->d_name.len, &de_bh);
358 	if (status < 0) {
359 		mlog_errno(status);
360 		goto leave;
361 	}
362 
363 	/* reserve an inode spot */
364 	status = ocfs2_reserve_new_inode(osb, &inode_ac);
365 	if (status < 0) {
366 		if (status != -ENOSPC)
367 			mlog_errno(status);
368 		goto leave;
369 	}
370 
371 	/* are we making a directory? If so, reserve a cluster for his
372 	 * 1st extent. */
373 	if (S_ISDIR(mode)) {
374 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
375 		if (status < 0) {
376 			if (status != -ENOSPC)
377 				mlog_errno(status);
378 			goto leave;
379 		}
380 	}
381 
382 	handle = ocfs2_start_trans(osb, OCFS2_MKNOD_CREDITS);
383 	if (IS_ERR(handle)) {
384 		status = PTR_ERR(handle);
385 		handle = NULL;
386 		mlog_errno(status);
387 		goto leave;
388 	}
389 
390 	/* do the real work now. */
391 	status = ocfs2_mknod_locked(osb, dir, dentry, mode, dev,
392 				    &new_fe_bh, parent_fe_bh, handle,
393 				    &inode, inode_ac);
394 	if (status < 0) {
395 		mlog_errno(status);
396 		goto leave;
397 	}
398 
399 	if (S_ISDIR(mode)) {
400 		status = ocfs2_fill_new_dir(osb, handle, dir, inode,
401 					    new_fe_bh, data_ac);
402 		if (status < 0) {
403 			mlog_errno(status);
404 			goto leave;
405 		}
406 
407 		status = ocfs2_journal_access(handle, dir, parent_fe_bh,
408 					      OCFS2_JOURNAL_ACCESS_WRITE);
409 		if (status < 0) {
410 			mlog_errno(status);
411 			goto leave;
412 		}
413 		le16_add_cpu(&dirfe->i_links_count, 1);
414 		status = ocfs2_journal_dirty(handle, parent_fe_bh);
415 		if (status < 0) {
416 			mlog_errno(status);
417 			goto leave;
418 		}
419 		inc_nlink(dir);
420 	}
421 
422 	status = ocfs2_add_entry(handle, dentry, inode,
423 				 OCFS2_I(inode)->ip_blkno, parent_fe_bh,
424 				 de_bh);
425 	if (status < 0) {
426 		mlog_errno(status);
427 		goto leave;
428 	}
429 
430 	status = ocfs2_dentry_attach_lock(dentry, inode,
431 					  OCFS2_I(dir)->ip_blkno);
432 	if (status) {
433 		mlog_errno(status);
434 		goto leave;
435 	}
436 
437 	insert_inode_hash(inode);
438 	dentry->d_op = &ocfs2_dentry_ops;
439 	d_instantiate(dentry, inode);
440 	status = 0;
441 leave:
442 	if (handle)
443 		ocfs2_commit_trans(osb, handle);
444 
445 	ocfs2_meta_unlock(dir, 1);
446 
447 	if (status == -ENOSPC)
448 		mlog(0, "Disk is full\n");
449 
450 	if (new_fe_bh)
451 		brelse(new_fe_bh);
452 
453 	if (de_bh)
454 		brelse(de_bh);
455 
456 	if (parent_fe_bh)
457 		brelse(parent_fe_bh);
458 
459 	if ((status < 0) && inode)
460 		iput(inode);
461 
462 	if (inode_ac)
463 		ocfs2_free_alloc_context(inode_ac);
464 
465 	if (data_ac)
466 		ocfs2_free_alloc_context(data_ac);
467 
468 	mlog_exit(status);
469 
470 	return status;
471 }
472 
473 static int ocfs2_mknod_locked(struct ocfs2_super *osb,
474 			      struct inode *dir,
475 			      struct dentry *dentry, int mode,
476 			      dev_t dev,
477 			      struct buffer_head **new_fe_bh,
478 			      struct buffer_head *parent_fe_bh,
479 			      handle_t *handle,
480 			      struct inode **ret_inode,
481 			      struct ocfs2_alloc_context *inode_ac)
482 {
483 	int status = 0;
484 	struct ocfs2_dinode *fe = NULL;
485 	struct ocfs2_extent_list *fel;
486 	u64 fe_blkno = 0;
487 	u16 suballoc_bit;
488 	struct inode *inode = NULL;
489 
490 	mlog_entry("(0x%p, 0x%p, %d, %lu, '%.*s')\n", dir, dentry, mode,
491 		   (unsigned long)dev, dentry->d_name.len,
492 		   dentry->d_name.name);
493 
494 	*new_fe_bh = NULL;
495 	*ret_inode = NULL;
496 
497 	status = ocfs2_claim_new_inode(osb, handle, inode_ac, &suballoc_bit,
498 				       &fe_blkno);
499 	if (status < 0) {
500 		mlog_errno(status);
501 		goto leave;
502 	}
503 
504 	inode = new_inode(dir->i_sb);
505 	if (IS_ERR(inode)) {
506 		status = PTR_ERR(inode);
507 		mlog(ML_ERROR, "new_inode failed!\n");
508 		goto leave;
509 	}
510 
511 	/* populate as many fields early on as possible - many of
512 	 * these are used by the support functions here and in
513 	 * callers. */
514 	inode->i_ino = ino_from_blkno(osb->sb, fe_blkno);
515 	OCFS2_I(inode)->ip_blkno = fe_blkno;
516 	if (S_ISDIR(mode))
517 		inode->i_nlink = 2;
518 	else
519 		inode->i_nlink = 1;
520 	inode->i_mode = mode;
521 	spin_lock(&osb->osb_lock);
522 	inode->i_generation = osb->s_next_generation++;
523 	spin_unlock(&osb->osb_lock);
524 
525 	*new_fe_bh = sb_getblk(osb->sb, fe_blkno);
526 	if (!*new_fe_bh) {
527 		status = -EIO;
528 		mlog_errno(status);
529 		goto leave;
530 	}
531 	ocfs2_set_new_buffer_uptodate(inode, *new_fe_bh);
532 
533 	status = ocfs2_journal_access(handle, inode, *new_fe_bh,
534 				      OCFS2_JOURNAL_ACCESS_CREATE);
535 	if (status < 0) {
536 		mlog_errno(status);
537 		goto leave;
538 	}
539 
540 	fe = (struct ocfs2_dinode *) (*new_fe_bh)->b_data;
541 	memset(fe, 0, osb->sb->s_blocksize);
542 
543 	fe->i_generation = cpu_to_le32(inode->i_generation);
544 	fe->i_fs_generation = cpu_to_le32(osb->fs_generation);
545 	fe->i_blkno = cpu_to_le64(fe_blkno);
546 	fe->i_suballoc_bit = cpu_to_le16(suballoc_bit);
547 	fe->i_suballoc_slot = cpu_to_le16(osb->slot_num);
548 	fe->i_uid = cpu_to_le32(current->fsuid);
549 	if (dir->i_mode & S_ISGID) {
550 		fe->i_gid = cpu_to_le32(dir->i_gid);
551 		if (S_ISDIR(mode))
552 			mode |= S_ISGID;
553 	} else
554 		fe->i_gid = cpu_to_le32(current->fsgid);
555 	fe->i_mode = cpu_to_le16(mode);
556 	if (S_ISCHR(mode) || S_ISBLK(mode))
557 		fe->id1.dev1.i_rdev = cpu_to_le64(huge_encode_dev(dev));
558 
559 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
560 
561 	fe->i_last_eb_blk = 0;
562 	strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE);
563 	le32_add_cpu(&fe->i_flags, OCFS2_VALID_FL);
564 	fe->i_atime = fe->i_ctime = fe->i_mtime =
565 		cpu_to_le64(CURRENT_TIME.tv_sec);
566 	fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec =
567 		cpu_to_le32(CURRENT_TIME.tv_nsec);
568 	fe->i_dtime = 0;
569 
570 	fel = &fe->id2.i_list;
571 	fel->l_tree_depth = 0;
572 	fel->l_next_free_rec = 0;
573 	fel->l_count = cpu_to_le16(ocfs2_extent_recs_per_inode(osb->sb));
574 
575 	status = ocfs2_journal_dirty(handle, *new_fe_bh);
576 	if (status < 0) {
577 		mlog_errno(status);
578 		goto leave;
579 	}
580 
581 	if (ocfs2_populate_inode(inode, fe, 1) < 0) {
582 		mlog(ML_ERROR, "populate inode failed! bh->b_blocknr=%llu, "
583 		     "i_blkno=%llu, i_ino=%lu\n",
584 		     (unsigned long long) (*new_fe_bh)->b_blocknr,
585 		     (unsigned long long)fe->i_blkno, inode->i_ino);
586 		BUG();
587 	}
588 
589 	ocfs2_inode_set_new(osb, inode);
590 	status = ocfs2_create_new_inode_locks(inode);
591 	if (status < 0)
592 		mlog_errno(status);
593 
594 	status = 0; /* error in ocfs2_create_new_inode_locks is not
595 		     * critical */
596 
597 	*ret_inode = inode;
598 leave:
599 	if (status < 0) {
600 		if (*new_fe_bh) {
601 			brelse(*new_fe_bh);
602 			*new_fe_bh = NULL;
603 		}
604 		if (inode)
605 			iput(inode);
606 	}
607 
608 	mlog_exit(status);
609 	return status;
610 }
611 
612 static int ocfs2_mkdir(struct inode *dir,
613 		       struct dentry *dentry,
614 		       int mode)
615 {
616 	int ret;
617 
618 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
619 		   dentry->d_name.len, dentry->d_name.name);
620 	ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
621 	mlog_exit(ret);
622 
623 	return ret;
624 }
625 
626 static int ocfs2_create(struct inode *dir,
627 			struct dentry *dentry,
628 			int mode,
629 			struct nameidata *nd)
630 {
631 	int ret;
632 
633 	mlog_entry("(0x%p, 0x%p, %d, '%.*s')\n", dir, dentry, mode,
634 		   dentry->d_name.len, dentry->d_name.name);
635 	ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
636 	mlog_exit(ret);
637 
638 	return ret;
639 }
640 
641 static int ocfs2_link(struct dentry *old_dentry,
642 		      struct inode *dir,
643 		      struct dentry *dentry)
644 {
645 	handle_t *handle;
646 	struct inode *inode = old_dentry->d_inode;
647 	int err;
648 	struct buffer_head *fe_bh = NULL;
649 	struct buffer_head *parent_fe_bh = NULL;
650 	struct buffer_head *de_bh = NULL;
651 	struct ocfs2_dinode *fe = NULL;
652 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
653 
654 	mlog_entry("(inode=%lu, old='%.*s' new='%.*s')\n", inode->i_ino,
655 		   old_dentry->d_name.len, old_dentry->d_name.name,
656 		   dentry->d_name.len, dentry->d_name.name);
657 
658 	if (S_ISDIR(inode->i_mode))
659 		return -EPERM;
660 
661 	err = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
662 	if (err < 0) {
663 		if (err != -ENOENT)
664 			mlog_errno(err);
665 		return err;
666 	}
667 
668 	if (!dir->i_nlink) {
669 		err = -ENOENT;
670 		goto out;
671 	}
672 
673 	err = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
674 					dentry->d_name.len);
675 	if (err)
676 		goto out;
677 
678 	err = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
679 					   dentry->d_name.name,
680 					   dentry->d_name.len, &de_bh);
681 	if (err < 0) {
682 		mlog_errno(err);
683 		goto out;
684 	}
685 
686 	err = ocfs2_meta_lock(inode, &fe_bh, 1);
687 	if (err < 0) {
688 		if (err != -ENOENT)
689 			mlog_errno(err);
690 		goto out;
691 	}
692 
693 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
694 	if (le16_to_cpu(fe->i_links_count) >= OCFS2_LINK_MAX) {
695 		err = -EMLINK;
696 		goto out_unlock_inode;
697 	}
698 
699 	handle = ocfs2_start_trans(osb, OCFS2_LINK_CREDITS);
700 	if (IS_ERR(handle)) {
701 		err = PTR_ERR(handle);
702 		handle = NULL;
703 		mlog_errno(err);
704 		goto out_unlock_inode;
705 	}
706 
707 	err = ocfs2_journal_access(handle, inode, fe_bh,
708 				   OCFS2_JOURNAL_ACCESS_WRITE);
709 	if (err < 0) {
710 		mlog_errno(err);
711 		goto out_commit;
712 	}
713 
714 	inc_nlink(inode);
715 	inode->i_ctime = CURRENT_TIME;
716 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
717 	fe->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
718 	fe->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
719 
720 	err = ocfs2_journal_dirty(handle, fe_bh);
721 	if (err < 0) {
722 		le16_add_cpu(&fe->i_links_count, -1);
723 		drop_nlink(inode);
724 		mlog_errno(err);
725 		goto out_commit;
726 	}
727 
728 	err = ocfs2_add_entry(handle, dentry, inode,
729 			      OCFS2_I(inode)->ip_blkno,
730 			      parent_fe_bh, de_bh);
731 	if (err) {
732 		le16_add_cpu(&fe->i_links_count, -1);
733 		drop_nlink(inode);
734 		mlog_errno(err);
735 		goto out_commit;
736 	}
737 
738 	err = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
739 	if (err) {
740 		mlog_errno(err);
741 		goto out_commit;
742 	}
743 
744 	atomic_inc(&inode->i_count);
745 	dentry->d_op = &ocfs2_dentry_ops;
746 	d_instantiate(dentry, inode);
747 
748 out_commit:
749 	ocfs2_commit_trans(osb, handle);
750 out_unlock_inode:
751 	ocfs2_meta_unlock(inode, 1);
752 
753 out:
754 	ocfs2_meta_unlock(dir, 1);
755 
756 	if (de_bh)
757 		brelse(de_bh);
758 	if (fe_bh)
759 		brelse(fe_bh);
760 	if (parent_fe_bh)
761 		brelse(parent_fe_bh);
762 
763 	mlog_exit(err);
764 
765 	return err;
766 }
767 
768 /*
769  * Takes and drops an exclusive lock on the given dentry. This will
770  * force other nodes to drop it.
771  */
772 static int ocfs2_remote_dentry_delete(struct dentry *dentry)
773 {
774 	int ret;
775 
776 	ret = ocfs2_dentry_lock(dentry, 1);
777 	if (ret)
778 		mlog_errno(ret);
779 	else
780 		ocfs2_dentry_unlock(dentry, 1);
781 
782 	return ret;
783 }
784 
785 static inline int inode_is_unlinkable(struct inode *inode)
786 {
787 	if (S_ISDIR(inode->i_mode)) {
788 		if (inode->i_nlink == 2)
789 			return 1;
790 		return 0;
791 	}
792 
793 	if (inode->i_nlink == 1)
794 		return 1;
795 	return 0;
796 }
797 
798 static int ocfs2_unlink(struct inode *dir,
799 			struct dentry *dentry)
800 {
801 	int status;
802 	int child_locked = 0;
803 	struct inode *inode = dentry->d_inode;
804 	struct inode *orphan_dir = NULL;
805 	struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
806 	u64 blkno;
807 	struct ocfs2_dinode *fe = NULL;
808 	struct buffer_head *fe_bh = NULL;
809 	struct buffer_head *parent_node_bh = NULL;
810 	handle_t *handle = NULL;
811 	struct ocfs2_dir_entry *dirent = NULL;
812 	struct buffer_head *dirent_bh = NULL;
813 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
814 	struct buffer_head *orphan_entry_bh = NULL;
815 
816 	mlog_entry("(0x%p, 0x%p, '%.*s')\n", dir, dentry,
817 		   dentry->d_name.len, dentry->d_name.name);
818 
819 	BUG_ON(dentry->d_parent->d_inode != dir);
820 
821 	mlog(0, "ino = %llu\n", (unsigned long long)OCFS2_I(inode)->ip_blkno);
822 
823 	if (inode == osb->root_inode) {
824 		mlog(0, "Cannot delete the root directory\n");
825 		return -EPERM;
826 	}
827 
828 	status = ocfs2_meta_lock(dir, &parent_node_bh, 1);
829 	if (status < 0) {
830 		if (status != -ENOENT)
831 			mlog_errno(status);
832 		return status;
833 	}
834 
835 	status = ocfs2_find_files_on_disk(dentry->d_name.name,
836 					  dentry->d_name.len, &blkno,
837 					  dir, &dirent_bh, &dirent);
838 	if (status < 0) {
839 		if (status != -ENOENT)
840 			mlog_errno(status);
841 		goto leave;
842 	}
843 
844 	if (OCFS2_I(inode)->ip_blkno != blkno) {
845 		status = -ENOENT;
846 
847 		mlog(0, "ip_blkno %llu != dirent blkno %llu ip_flags = %x\n",
848 		     (unsigned long long)OCFS2_I(inode)->ip_blkno,
849 		     (unsigned long long)blkno, OCFS2_I(inode)->ip_flags);
850 		goto leave;
851 	}
852 
853 	status = ocfs2_meta_lock(inode, &fe_bh, 1);
854 	if (status < 0) {
855 		if (status != -ENOENT)
856 			mlog_errno(status);
857 		goto leave;
858 	}
859 	child_locked = 1;
860 
861 	if (S_ISDIR(inode->i_mode)) {
862 	       	if (!ocfs2_empty_dir(inode)) {
863 			status = -ENOTEMPTY;
864 			goto leave;
865 		} else if (inode->i_nlink != 2) {
866 			status = -ENOTEMPTY;
867 			goto leave;
868 		}
869 	}
870 
871 	status = ocfs2_remote_dentry_delete(dentry);
872 	if (status < 0) {
873 		/* This vote should succeed under all normal
874 		 * circumstances. */
875 		mlog_errno(status);
876 		goto leave;
877 	}
878 
879 	if (inode_is_unlinkable(inode)) {
880 		status = ocfs2_prepare_orphan_dir(osb, &orphan_dir, inode,
881 						  orphan_name,
882 						  &orphan_entry_bh);
883 		if (status < 0) {
884 			mlog_errno(status);
885 			goto leave;
886 		}
887 	}
888 
889 	handle = ocfs2_start_trans(osb, OCFS2_UNLINK_CREDITS);
890 	if (IS_ERR(handle)) {
891 		status = PTR_ERR(handle);
892 		handle = NULL;
893 		mlog_errno(status);
894 		goto leave;
895 	}
896 
897 	status = ocfs2_journal_access(handle, inode, fe_bh,
898 				      OCFS2_JOURNAL_ACCESS_WRITE);
899 	if (status < 0) {
900 		mlog_errno(status);
901 		goto leave;
902 	}
903 
904 	fe = (struct ocfs2_dinode *) fe_bh->b_data;
905 
906 	if (inode_is_unlinkable(inode)) {
907 		status = ocfs2_orphan_add(osb, handle, inode, fe, orphan_name,
908 					  orphan_entry_bh, orphan_dir);
909 		if (status < 0) {
910 			mlog_errno(status);
911 			goto leave;
912 		}
913 	}
914 
915 	/* delete the name from the parent dir */
916 	status = ocfs2_delete_entry(handle, dir, dirent, dirent_bh);
917 	if (status < 0) {
918 		mlog_errno(status);
919 		goto leave;
920 	}
921 
922 	if (S_ISDIR(inode->i_mode))
923 		drop_nlink(inode);
924 	drop_nlink(inode);
925 	fe->i_links_count = cpu_to_le16(inode->i_nlink);
926 
927 	status = ocfs2_journal_dirty(handle, fe_bh);
928 	if (status < 0) {
929 		mlog_errno(status);
930 		goto leave;
931 	}
932 
933 	if (S_ISDIR(inode->i_mode)) {
934 		drop_nlink(dir);
935 		status = ocfs2_mark_inode_dirty(handle, dir,
936 						parent_node_bh);
937 		if (status < 0) {
938 			mlog_errno(status);
939 			inc_nlink(dir);
940 		}
941 	}
942 
943 leave:
944 	if (handle)
945 		ocfs2_commit_trans(osb, handle);
946 
947 	if (child_locked)
948 		ocfs2_meta_unlock(inode, 1);
949 
950 	ocfs2_meta_unlock(dir, 1);
951 
952 	if (orphan_dir) {
953 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
954 		ocfs2_meta_unlock(orphan_dir, 1);
955 		mutex_unlock(&orphan_dir->i_mutex);
956 		iput(orphan_dir);
957 	}
958 
959 	if (fe_bh)
960 		brelse(fe_bh);
961 
962 	if (dirent_bh)
963 		brelse(dirent_bh);
964 
965 	if (parent_node_bh)
966 		brelse(parent_node_bh);
967 
968 	if (orphan_entry_bh)
969 		brelse(orphan_entry_bh);
970 
971 	mlog_exit(status);
972 
973 	return status;
974 }
975 
976 /*
977  * The only place this should be used is rename!
978  * if they have the same id, then the 1st one is the only one locked.
979  */
980 static int ocfs2_double_lock(struct ocfs2_super *osb,
981 			     struct buffer_head **bh1,
982 			     struct inode *inode1,
983 			     struct buffer_head **bh2,
984 			     struct inode *inode2)
985 {
986 	int status;
987 	struct ocfs2_inode_info *oi1 = OCFS2_I(inode1);
988 	struct ocfs2_inode_info *oi2 = OCFS2_I(inode2);
989 	struct buffer_head **tmpbh;
990 	struct inode *tmpinode;
991 
992 	mlog_entry("(inode1 = %llu, inode2 = %llu)\n",
993 		   (unsigned long long)oi1->ip_blkno,
994 		   (unsigned long long)oi2->ip_blkno);
995 
996 	if (*bh1)
997 		*bh1 = NULL;
998 	if (*bh2)
999 		*bh2 = NULL;
1000 
1001 	/* we always want to lock the one with the lower lockid first. */
1002 	if (oi1->ip_blkno != oi2->ip_blkno) {
1003 		if (oi1->ip_blkno < oi2->ip_blkno) {
1004 			/* switch id1 and id2 around */
1005 			mlog(0, "switching them around...\n");
1006 			tmpbh = bh2;
1007 			bh2 = bh1;
1008 			bh1 = tmpbh;
1009 
1010 			tmpinode = inode2;
1011 			inode2 = inode1;
1012 			inode1 = tmpinode;
1013 		}
1014 		/* lock id2 */
1015 		status = ocfs2_meta_lock(inode2, bh2, 1);
1016 		if (status < 0) {
1017 			if (status != -ENOENT)
1018 				mlog_errno(status);
1019 			goto bail;
1020 		}
1021 	}
1022 
1023 	/* lock id1 */
1024 	status = ocfs2_meta_lock(inode1, bh1, 1);
1025 	if (status < 0) {
1026 		/*
1027 		 * An error return must mean that no cluster locks
1028 		 * were held on function exit.
1029 		 */
1030 		if (oi1->ip_blkno != oi2->ip_blkno)
1031 			ocfs2_meta_unlock(inode2, 1);
1032 
1033 		if (status != -ENOENT)
1034 			mlog_errno(status);
1035 	}
1036 
1037 bail:
1038 	mlog_exit(status);
1039 	return status;
1040 }
1041 
1042 static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
1043 {
1044 	ocfs2_meta_unlock(inode1, 1);
1045 
1046 	if (inode1 != inode2)
1047 		ocfs2_meta_unlock(inode2, 1);
1048 }
1049 
1050 #define PARENT_INO(buffer) \
1051 	((struct ocfs2_dir_entry *) \
1052 	 ((char *)buffer + \
1053 	  le16_to_cpu(((struct ocfs2_dir_entry *)buffer)->rec_len)))->inode
1054 
1055 static int ocfs2_rename(struct inode *old_dir,
1056 			struct dentry *old_dentry,
1057 			struct inode *new_dir,
1058 			struct dentry *new_dentry)
1059 {
1060 	int status = 0, rename_lock = 0, parents_locked = 0;
1061 	int old_child_locked = 0, new_child_locked = 0;
1062 	struct inode *old_inode = old_dentry->d_inode;
1063 	struct inode *new_inode = new_dentry->d_inode;
1064 	struct inode *orphan_dir = NULL;
1065 	struct ocfs2_dinode *newfe = NULL;
1066 	char orphan_name[OCFS2_ORPHAN_NAMELEN + 1];
1067 	struct buffer_head *orphan_entry_bh = NULL;
1068 	struct buffer_head *newfe_bh = NULL;
1069 	struct buffer_head *insert_entry_bh = NULL;
1070 	struct ocfs2_super *osb = NULL;
1071 	u64 newfe_blkno;
1072 	handle_t *handle = NULL;
1073 	struct buffer_head *old_dir_bh = NULL;
1074 	struct buffer_head *new_dir_bh = NULL;
1075 	struct ocfs2_dir_entry *old_de = NULL, *new_de = NULL; // dirent for old_dentry
1076 							       // and new_dentry
1077 	struct buffer_head *new_de_bh = NULL, *old_de_bh = NULL; // bhs for above
1078 	struct buffer_head *old_inode_de_bh = NULL; // if old_dentry is a dir,
1079 						    // this is the 1st dirent bh
1080 	nlink_t old_dir_nlink = old_dir->i_nlink, new_dir_nlink = new_dir->i_nlink;
1081 
1082 	/* At some point it might be nice to break this function up a
1083 	 * bit. */
1084 
1085 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p, from='%.*s' to='%.*s')\n",
1086 		   old_dir, old_dentry, new_dir, new_dentry,
1087 		   old_dentry->d_name.len, old_dentry->d_name.name,
1088 		   new_dentry->d_name.len, new_dentry->d_name.name);
1089 
1090 	osb = OCFS2_SB(old_dir->i_sb);
1091 
1092 	if (new_inode) {
1093 		if (!igrab(new_inode))
1094 			BUG();
1095 	}
1096 
1097 	/* Assume a directory heirarchy thusly:
1098 	 * a/b/c
1099 	 * a/d
1100 	 * a,b,c, and d are all directories.
1101 	 *
1102 	 * from cwd of 'a' on both nodes:
1103 	 * node1: mv b/c d
1104 	 * node2: mv d   b/c
1105 	 *
1106 	 * And that's why, just like the VFS, we need a file system
1107 	 * rename lock. */
1108 	if (old_dentry != new_dentry) {
1109 		status = ocfs2_rename_lock(osb);
1110 		if (status < 0) {
1111 			mlog_errno(status);
1112 			goto bail;
1113 		}
1114 		rename_lock = 1;
1115 	}
1116 
1117 	/* if old and new are the same, this'll just do one lock. */
1118 	status = ocfs2_double_lock(osb, &old_dir_bh, old_dir,
1119 				   &new_dir_bh, new_dir);
1120 	if (status < 0) {
1121 		mlog_errno(status);
1122 		goto bail;
1123 	}
1124 	parents_locked = 1;
1125 
1126 	/* make sure both dirs have bhs
1127 	 * get an extra ref on old_dir_bh if old==new */
1128 	if (!new_dir_bh) {
1129 		if (old_dir_bh) {
1130 			new_dir_bh = old_dir_bh;
1131 			get_bh(new_dir_bh);
1132 		} else {
1133 			mlog(ML_ERROR, "no old_dir_bh!\n");
1134 			status = -EIO;
1135 			goto bail;
1136 		}
1137 	}
1138 
1139 	/*
1140 	 * Though we don't require an inode meta data update if
1141 	 * old_inode is not a directory, we lock anyway here to ensure
1142 	 * the vote thread on other nodes won't have to concurrently
1143 	 * downconvert the inode and the dentry locks.
1144 	 */
1145 	status = ocfs2_meta_lock(old_inode, NULL, 1);
1146 	if (status < 0) {
1147 		if (status != -ENOENT)
1148 			mlog_errno(status);
1149 		goto bail;
1150 	}
1151 	old_child_locked = 1;
1152 
1153 	status = ocfs2_remote_dentry_delete(old_dentry);
1154 	if (status < 0) {
1155 		mlog_errno(status);
1156 		goto bail;
1157 	}
1158 
1159 	if (S_ISDIR(old_inode->i_mode)) {
1160 		status = -EIO;
1161 		old_inode_de_bh = ocfs2_bread(old_inode, 0, &status, 0);
1162 		if (!old_inode_de_bh)
1163 			goto bail;
1164 
1165 		status = -EIO;
1166 		if (le64_to_cpu(PARENT_INO(old_inode_de_bh->b_data)) !=
1167 		    OCFS2_I(old_dir)->ip_blkno)
1168 			goto bail;
1169 		status = -EMLINK;
1170 		if (!new_inode && new_dir!=old_dir &&
1171 		    new_dir->i_nlink >= OCFS2_LINK_MAX)
1172 			goto bail;
1173 	}
1174 
1175 	status = -ENOENT;
1176 	old_de_bh = ocfs2_find_entry(old_dentry->d_name.name,
1177 				     old_dentry->d_name.len,
1178 				     old_dir, &old_de);
1179 	if (!old_de_bh)
1180 		goto bail;
1181 
1182 	/*
1183 	 *  Check for inode number is _not_ due to possible IO errors.
1184 	 *  We might rmdir the source, keep it as pwd of some process
1185 	 *  and merrily kill the link to whatever was created under the
1186 	 *  same name. Goodbye sticky bit ;-<
1187 	 */
1188 	if (le64_to_cpu(old_de->inode) != OCFS2_I(old_inode)->ip_blkno)
1189 		goto bail;
1190 
1191 	/* check if the target already exists (in which case we need
1192 	 * to delete it */
1193 	status = ocfs2_find_files_on_disk(new_dentry->d_name.name,
1194 					  new_dentry->d_name.len,
1195 					  &newfe_blkno, new_dir, &new_de_bh,
1196 					  &new_de);
1197 	/* The only error we allow here is -ENOENT because the new
1198 	 * file not existing is perfectly valid. */
1199 	if ((status < 0) && (status != -ENOENT)) {
1200 		/* If we cannot find the file specified we should just */
1201 		/* return the error... */
1202 		mlog_errno(status);
1203 		goto bail;
1204 	}
1205 
1206 	if (!new_de && new_inode)
1207 		mlog(ML_ERROR, "inode %lu does not exist in it's parent "
1208 		     "directory!", new_inode->i_ino);
1209 
1210 	/* In case we need to overwrite an existing file, we blow it
1211 	 * away first */
1212 	if (new_de) {
1213 		/* VFS didn't think there existed an inode here, but
1214 		 * someone else in the cluster must have raced our
1215 		 * rename to create one. Today we error cleanly, in
1216 		 * the future we should consider calling iget to build
1217 		 * a new struct inode for this entry. */
1218 		if (!new_inode) {
1219 			status = -EACCES;
1220 
1221 			mlog(0, "We found an inode for name %.*s but VFS "
1222 			     "didn't give us one.\n", new_dentry->d_name.len,
1223 			     new_dentry->d_name.name);
1224 			goto bail;
1225 		}
1226 
1227 		if (OCFS2_I(new_inode)->ip_blkno != newfe_blkno) {
1228 			status = -EACCES;
1229 
1230 			mlog(0, "Inode %llu and dir %llu disagree. flags = %x\n",
1231 			     (unsigned long long)OCFS2_I(new_inode)->ip_blkno,
1232 			     (unsigned long long)newfe_blkno,
1233 			     OCFS2_I(new_inode)->ip_flags);
1234 			goto bail;
1235 		}
1236 
1237 		status = ocfs2_meta_lock(new_inode, &newfe_bh, 1);
1238 		if (status < 0) {
1239 			if (status != -ENOENT)
1240 				mlog_errno(status);
1241 			goto bail;
1242 		}
1243 		new_child_locked = 1;
1244 
1245 		status = ocfs2_remote_dentry_delete(new_dentry);
1246 		if (status < 0) {
1247 			mlog_errno(status);
1248 			goto bail;
1249 		}
1250 
1251 		newfe = (struct ocfs2_dinode *) newfe_bh->b_data;
1252 
1253 		mlog(0, "aha rename over existing... new_de=%p new_blkno=%llu "
1254 		     "newfebh=%p bhblocknr=%llu\n", new_de,
1255 		     (unsigned long long)newfe_blkno, newfe_bh, newfe_bh ?
1256 		     (unsigned long long)newfe_bh->b_blocknr : 0ULL);
1257 
1258 		if (S_ISDIR(new_inode->i_mode) || (new_inode->i_nlink == 1)) {
1259 			status = ocfs2_prepare_orphan_dir(osb, &orphan_dir,
1260 							  new_inode,
1261 							  orphan_name,
1262 							  &orphan_entry_bh);
1263 			if (status < 0) {
1264 				mlog_errno(status);
1265 				goto bail;
1266 			}
1267 		}
1268 	} else {
1269 		BUG_ON(new_dentry->d_parent->d_inode != new_dir);
1270 
1271 		status = ocfs2_check_dir_for_entry(new_dir,
1272 						   new_dentry->d_name.name,
1273 						   new_dentry->d_name.len);
1274 		if (status)
1275 			goto bail;
1276 
1277 		status = ocfs2_prepare_dir_for_insert(osb, new_dir, new_dir_bh,
1278 						      new_dentry->d_name.name,
1279 						      new_dentry->d_name.len,
1280 						      &insert_entry_bh);
1281 		if (status < 0) {
1282 			mlog_errno(status);
1283 			goto bail;
1284 		}
1285 	}
1286 
1287 	handle = ocfs2_start_trans(osb, OCFS2_RENAME_CREDITS);
1288 	if (IS_ERR(handle)) {
1289 		status = PTR_ERR(handle);
1290 		handle = NULL;
1291 		mlog_errno(status);
1292 		goto bail;
1293 	}
1294 
1295 	if (new_de) {
1296 		if (S_ISDIR(new_inode->i_mode)) {
1297 			if (!ocfs2_empty_dir(new_inode) ||
1298 			    new_inode->i_nlink != 2) {
1299 				status = -ENOTEMPTY;
1300 				goto bail;
1301 			}
1302 		}
1303 		status = ocfs2_journal_access(handle, new_inode, newfe_bh,
1304 					      OCFS2_JOURNAL_ACCESS_WRITE);
1305 		if (status < 0) {
1306 			mlog_errno(status);
1307 			goto bail;
1308 		}
1309 
1310 		if (S_ISDIR(new_inode->i_mode) ||
1311 		    (newfe->i_links_count == cpu_to_le16(1))){
1312 			status = ocfs2_orphan_add(osb, handle, new_inode,
1313 						  newfe, orphan_name,
1314 						  orphan_entry_bh, orphan_dir);
1315 			if (status < 0) {
1316 				mlog_errno(status);
1317 				goto bail;
1318 			}
1319 		}
1320 
1321 		/* change the dirent to point to the correct inode */
1322 		status = ocfs2_journal_access(handle, new_dir, new_de_bh,
1323 					      OCFS2_JOURNAL_ACCESS_WRITE);
1324 		if (status < 0) {
1325 			mlog_errno(status);
1326 			goto bail;
1327 		}
1328 		new_de->inode = cpu_to_le64(OCFS2_I(old_inode)->ip_blkno);
1329 		new_de->file_type = old_de->file_type;
1330 		new_dir->i_version++;
1331 		status = ocfs2_journal_dirty(handle, new_de_bh);
1332 		if (status < 0) {
1333 			mlog_errno(status);
1334 			goto bail;
1335 		}
1336 
1337 		if (S_ISDIR(new_inode->i_mode))
1338 			newfe->i_links_count = 0;
1339 		else
1340 			le16_add_cpu(&newfe->i_links_count, -1);
1341 
1342 		status = ocfs2_journal_dirty(handle, newfe_bh);
1343 		if (status < 0) {
1344 			mlog_errno(status);
1345 			goto bail;
1346 		}
1347 	} else {
1348 		/* if the name was not found in new_dir, add it now */
1349 		status = ocfs2_add_entry(handle, new_dentry, old_inode,
1350 					 OCFS2_I(old_inode)->ip_blkno,
1351 					 new_dir_bh, insert_entry_bh);
1352 	}
1353 
1354 	old_inode->i_ctime = CURRENT_TIME;
1355 	mark_inode_dirty(old_inode);
1356 
1357 	/* now that the name has been added to new_dir, remove the old name */
1358 	status = ocfs2_delete_entry(handle, old_dir, old_de, old_de_bh);
1359 	if (status < 0) {
1360 		mlog_errno(status);
1361 		goto bail;
1362 	}
1363 
1364 	if (new_inode) {
1365 		new_inode->i_nlink--;
1366 		new_inode->i_ctime = CURRENT_TIME;
1367 	}
1368 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME;
1369 	if (old_inode_de_bh) {
1370 		status = ocfs2_journal_access(handle, old_inode,
1371 					     old_inode_de_bh,
1372 					     OCFS2_JOURNAL_ACCESS_WRITE);
1373 		PARENT_INO(old_inode_de_bh->b_data) =
1374 			cpu_to_le64(OCFS2_I(new_dir)->ip_blkno);
1375 		status = ocfs2_journal_dirty(handle, old_inode_de_bh);
1376 		old_dir->i_nlink--;
1377 		if (new_inode) {
1378 			new_inode->i_nlink--;
1379 		} else {
1380 			inc_nlink(new_dir);
1381 			mark_inode_dirty(new_dir);
1382 		}
1383 	}
1384 	mark_inode_dirty(old_dir);
1385 	if (new_inode)
1386 		mark_inode_dirty(new_inode);
1387 
1388 	if (old_dir != new_dir)
1389 		if (new_dir_nlink != new_dir->i_nlink) {
1390 			if (!new_dir_bh) {
1391 				mlog(ML_ERROR, "need to change nlink for new "
1392 				     "dir %llu from %d to %d but bh is NULL\n",
1393 				     (unsigned long long)OCFS2_I(new_dir)->ip_blkno,
1394 				     (int)new_dir_nlink, new_dir->i_nlink);
1395 			} else {
1396 				struct ocfs2_dinode *fe;
1397 				status = ocfs2_journal_access(handle,
1398 							      new_dir,
1399 							      new_dir_bh,
1400 							      OCFS2_JOURNAL_ACCESS_WRITE);
1401 				fe = (struct ocfs2_dinode *) new_dir_bh->b_data;
1402 				fe->i_links_count = cpu_to_le16(new_dir->i_nlink);
1403 				status = ocfs2_journal_dirty(handle, new_dir_bh);
1404 			}
1405 		}
1406 
1407 	if (old_dir_nlink != old_dir->i_nlink) {
1408 		if (!old_dir_bh) {
1409 			mlog(ML_ERROR, "need to change nlink for old dir "
1410 			     "%llu from %d to %d but bh is NULL!\n",
1411 			     (unsigned long long)OCFS2_I(old_dir)->ip_blkno,
1412 			     (int)old_dir_nlink, old_dir->i_nlink);
1413 		} else {
1414 			struct ocfs2_dinode *fe;
1415 			status = ocfs2_journal_access(handle, old_dir,
1416 						      old_dir_bh,
1417 						      OCFS2_JOURNAL_ACCESS_WRITE);
1418 			fe = (struct ocfs2_dinode *) old_dir_bh->b_data;
1419 			fe->i_links_count = cpu_to_le16(old_dir->i_nlink);
1420 			status = ocfs2_journal_dirty(handle, old_dir_bh);
1421 		}
1422 	}
1423 
1424 	ocfs2_dentry_move(old_dentry, new_dentry, old_dir, new_dir);
1425 	status = 0;
1426 bail:
1427 	if (rename_lock)
1428 		ocfs2_rename_unlock(osb);
1429 
1430 	if (handle)
1431 		ocfs2_commit_trans(osb, handle);
1432 
1433 	if (parents_locked)
1434 		ocfs2_double_unlock(old_dir, new_dir);
1435 
1436 	if (old_child_locked)
1437 		ocfs2_meta_unlock(old_inode, 1);
1438 
1439 	if (new_child_locked)
1440 		ocfs2_meta_unlock(new_inode, 1);
1441 
1442 	if (orphan_dir) {
1443 		/* This was locked for us in ocfs2_prepare_orphan_dir() */
1444 		ocfs2_meta_unlock(orphan_dir, 1);
1445 		mutex_unlock(&orphan_dir->i_mutex);
1446 		iput(orphan_dir);
1447 	}
1448 
1449 	if (new_inode)
1450 		sync_mapping_buffers(old_inode->i_mapping);
1451 
1452 	if (new_inode)
1453 		iput(new_inode);
1454 	if (newfe_bh)
1455 		brelse(newfe_bh);
1456 	if (old_dir_bh)
1457 		brelse(old_dir_bh);
1458 	if (new_dir_bh)
1459 		brelse(new_dir_bh);
1460 	if (new_de_bh)
1461 		brelse(new_de_bh);
1462 	if (old_de_bh)
1463 		brelse(old_de_bh);
1464 	if (old_inode_de_bh)
1465 		brelse(old_inode_de_bh);
1466 	if (orphan_entry_bh)
1467 		brelse(orphan_entry_bh);
1468 	if (insert_entry_bh)
1469 		brelse(insert_entry_bh);
1470 
1471 	mlog_exit(status);
1472 
1473 	return status;
1474 }
1475 
1476 /*
1477  * we expect i_size = strlen(symname). Copy symname into the file
1478  * data, including the null terminator.
1479  */
1480 static int ocfs2_create_symlink_data(struct ocfs2_super *osb,
1481 				     handle_t *handle,
1482 				     struct inode *inode,
1483 				     const char *symname)
1484 {
1485 	struct buffer_head **bhs = NULL;
1486 	const char *c;
1487 	struct super_block *sb = osb->sb;
1488 	u64 p_blkno;
1489 	int p_blocks;
1490 	int virtual, blocks, status, i, bytes_left;
1491 
1492 	bytes_left = i_size_read(inode) + 1;
1493 	/* we can't trust i_blocks because we're actually going to
1494 	 * write i_size + 1 bytes. */
1495 	blocks = (bytes_left + sb->s_blocksize - 1) >> sb->s_blocksize_bits;
1496 
1497 	mlog_entry("i_blocks = %llu, i_size = %llu, blocks = %d\n",
1498 			(unsigned long long)inode->i_blocks,
1499 			i_size_read(inode), blocks);
1500 
1501 	/* Sanity check -- make sure we're going to fit. */
1502 	if (bytes_left >
1503 	    ocfs2_clusters_to_bytes(sb, OCFS2_I(inode)->ip_clusters)) {
1504 		status = -EIO;
1505 		mlog_errno(status);
1506 		goto bail;
1507 	}
1508 
1509 	bhs = kcalloc(blocks, sizeof(struct buffer_head *), GFP_KERNEL);
1510 	if (!bhs) {
1511 		status = -ENOMEM;
1512 		mlog_errno(status);
1513 		goto bail;
1514 	}
1515 
1516 	status = ocfs2_extent_map_get_blocks(inode, 0, 1, &p_blkno,
1517 					     &p_blocks);
1518 	if (status < 0) {
1519 		mlog_errno(status);
1520 		goto bail;
1521 	}
1522 
1523 	/* links can never be larger than one cluster so we know this
1524 	 * is all going to be contiguous, but do a sanity check
1525 	 * anyway. */
1526 	if ((p_blocks << sb->s_blocksize_bits) < bytes_left) {
1527 		status = -EIO;
1528 		mlog_errno(status);
1529 		goto bail;
1530 	}
1531 
1532 	virtual = 0;
1533 	while(bytes_left > 0) {
1534 		c = &symname[virtual * sb->s_blocksize];
1535 
1536 		bhs[virtual] = sb_getblk(sb, p_blkno);
1537 		if (!bhs[virtual]) {
1538 			status = -ENOMEM;
1539 			mlog_errno(status);
1540 			goto bail;
1541 		}
1542 		ocfs2_set_new_buffer_uptodate(inode, bhs[virtual]);
1543 
1544 		status = ocfs2_journal_access(handle, inode, bhs[virtual],
1545 					      OCFS2_JOURNAL_ACCESS_CREATE);
1546 		if (status < 0) {
1547 			mlog_errno(status);
1548 			goto bail;
1549 		}
1550 
1551 		memset(bhs[virtual]->b_data, 0, sb->s_blocksize);
1552 
1553 		memcpy(bhs[virtual]->b_data, c,
1554 		       (bytes_left > sb->s_blocksize) ? sb->s_blocksize :
1555 		       bytes_left);
1556 
1557 		status = ocfs2_journal_dirty(handle, bhs[virtual]);
1558 		if (status < 0) {
1559 			mlog_errno(status);
1560 			goto bail;
1561 		}
1562 
1563 		virtual++;
1564 		p_blkno++;
1565 		bytes_left -= sb->s_blocksize;
1566 	}
1567 
1568 	status = 0;
1569 bail:
1570 
1571 	if (bhs) {
1572 		for(i = 0; i < blocks; i++)
1573 			if (bhs[i])
1574 				brelse(bhs[i]);
1575 		kfree(bhs);
1576 	}
1577 
1578 	mlog_exit(status);
1579 	return status;
1580 }
1581 
1582 static int ocfs2_symlink(struct inode *dir,
1583 			 struct dentry *dentry,
1584 			 const char *symname)
1585 {
1586 	int status, l, credits;
1587 	u64 newsize;
1588 	struct ocfs2_super *osb = NULL;
1589 	struct inode *inode = NULL;
1590 	struct super_block *sb;
1591 	struct buffer_head *new_fe_bh = NULL;
1592 	struct buffer_head *de_bh = NULL;
1593 	struct buffer_head *parent_fe_bh = NULL;
1594 	struct ocfs2_dinode *fe = NULL;
1595 	struct ocfs2_dinode *dirfe;
1596 	handle_t *handle = NULL;
1597 	struct ocfs2_alloc_context *inode_ac = NULL;
1598 	struct ocfs2_alloc_context *data_ac = NULL;
1599 
1600 	mlog_entry("(0x%p, 0x%p, symname='%s' actual='%.*s')\n", dir,
1601 		   dentry, symname, dentry->d_name.len, dentry->d_name.name);
1602 
1603 	sb = dir->i_sb;
1604 	osb = OCFS2_SB(sb);
1605 
1606 	l = strlen(symname) + 1;
1607 
1608 	credits = ocfs2_calc_symlink_credits(sb);
1609 
1610 	/* lock the parent directory */
1611 	status = ocfs2_meta_lock(dir, &parent_fe_bh, 1);
1612 	if (status < 0) {
1613 		if (status != -ENOENT)
1614 			mlog_errno(status);
1615 		return status;
1616 	}
1617 
1618 	dirfe = (struct ocfs2_dinode *) parent_fe_bh->b_data;
1619 	if (!dirfe->i_links_count) {
1620 		/* can't make a file in a deleted directory. */
1621 		status = -ENOENT;
1622 		goto bail;
1623 	}
1624 
1625 	status = ocfs2_check_dir_for_entry(dir, dentry->d_name.name,
1626 					   dentry->d_name.len);
1627 	if (status)
1628 		goto bail;
1629 
1630 	status = ocfs2_prepare_dir_for_insert(osb, dir, parent_fe_bh,
1631 					      dentry->d_name.name,
1632 					      dentry->d_name.len, &de_bh);
1633 	if (status < 0) {
1634 		mlog_errno(status);
1635 		goto bail;
1636 	}
1637 
1638 	status = ocfs2_reserve_new_inode(osb, &inode_ac);
1639 	if (status < 0) {
1640 		if (status != -ENOSPC)
1641 			mlog_errno(status);
1642 		goto bail;
1643 	}
1644 
1645 	/* don't reserve bitmap space for fast symlinks. */
1646 	if (l > ocfs2_fast_symlink_chars(sb)) {
1647 		status = ocfs2_reserve_clusters(osb, 1, &data_ac);
1648 		if (status < 0) {
1649 			if (status != -ENOSPC)
1650 				mlog_errno(status);
1651 			goto bail;
1652 		}
1653 	}
1654 
1655 	handle = ocfs2_start_trans(osb, credits);
1656 	if (IS_ERR(handle)) {
1657 		status = PTR_ERR(handle);
1658 		handle = NULL;
1659 		mlog_errno(status);
1660 		goto bail;
1661 	}
1662 
1663 	status = ocfs2_mknod_locked(osb, dir, dentry,
1664 				    S_IFLNK | S_IRWXUGO, 0,
1665 				    &new_fe_bh, parent_fe_bh, handle,
1666 				    &inode, inode_ac);
1667 	if (status < 0) {
1668 		mlog_errno(status);
1669 		goto bail;
1670 	}
1671 
1672 	fe = (struct ocfs2_dinode *) new_fe_bh->b_data;
1673 	inode->i_rdev = 0;
1674 	newsize = l - 1;
1675 	if (l > ocfs2_fast_symlink_chars(sb)) {
1676 		inode->i_op = &ocfs2_symlink_inode_operations;
1677 		status = ocfs2_do_extend_allocation(osb, inode, 1, new_fe_bh,
1678 						    handle, data_ac, NULL,
1679 						    NULL);
1680 		if (status < 0) {
1681 			if (status != -ENOSPC && status != -EINTR) {
1682 				mlog(ML_ERROR,
1683 				     "Failed to extend file to %llu\n",
1684 				     (unsigned long long)newsize);
1685 				mlog_errno(status);
1686 				status = -ENOSPC;
1687 			}
1688 			goto bail;
1689 		}
1690 		i_size_write(inode, newsize);
1691 		inode->i_blocks = ocfs2_align_bytes_to_sectors(newsize);
1692 	} else {
1693 		inode->i_op = &ocfs2_fast_symlink_inode_operations;
1694 		memcpy((char *) fe->id2.i_symlink, symname, l);
1695 		i_size_write(inode, newsize);
1696 		inode->i_blocks = 0;
1697 	}
1698 
1699 	status = ocfs2_mark_inode_dirty(handle, inode, new_fe_bh);
1700 	if (status < 0) {
1701 		mlog_errno(status);
1702 		goto bail;
1703 	}
1704 
1705 	if (!ocfs2_inode_is_fast_symlink(inode)) {
1706 		status = ocfs2_create_symlink_data(osb, handle, inode,
1707 						   symname);
1708 		if (status < 0) {
1709 			mlog_errno(status);
1710 			goto bail;
1711 		}
1712 	}
1713 
1714 	status = ocfs2_add_entry(handle, dentry, inode,
1715 				 le64_to_cpu(fe->i_blkno), parent_fe_bh,
1716 				 de_bh);
1717 	if (status < 0) {
1718 		mlog_errno(status);
1719 		goto bail;
1720 	}
1721 
1722 	status = ocfs2_dentry_attach_lock(dentry, inode, OCFS2_I(dir)->ip_blkno);
1723 	if (status) {
1724 		mlog_errno(status);
1725 		goto bail;
1726 	}
1727 
1728 	insert_inode_hash(inode);
1729 	dentry->d_op = &ocfs2_dentry_ops;
1730 	d_instantiate(dentry, inode);
1731 bail:
1732 	if (handle)
1733 		ocfs2_commit_trans(osb, handle);
1734 
1735 	ocfs2_meta_unlock(dir, 1);
1736 
1737 	if (new_fe_bh)
1738 		brelse(new_fe_bh);
1739 	if (parent_fe_bh)
1740 		brelse(parent_fe_bh);
1741 	if (de_bh)
1742 		brelse(de_bh);
1743 	if (inode_ac)
1744 		ocfs2_free_alloc_context(inode_ac);
1745 	if (data_ac)
1746 		ocfs2_free_alloc_context(data_ac);
1747 	if ((status < 0) && inode)
1748 		iput(inode);
1749 
1750 	mlog_exit(status);
1751 
1752 	return status;
1753 }
1754 
1755 int ocfs2_check_dir_entry(struct inode * dir,
1756 			  struct ocfs2_dir_entry * de,
1757 			  struct buffer_head * bh,
1758 			  unsigned long offset)
1759 {
1760 	const char *error_msg = NULL;
1761 	const int rlen = le16_to_cpu(de->rec_len);
1762 
1763 	if (rlen < OCFS2_DIR_REC_LEN(1))
1764 		error_msg = "rec_len is smaller than minimal";
1765 	else if (rlen % 4 != 0)
1766 		error_msg = "rec_len % 4 != 0";
1767 	else if (rlen < OCFS2_DIR_REC_LEN(de->name_len))
1768 		error_msg = "rec_len is too small for name_len";
1769 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
1770 		error_msg = "directory entry across blocks";
1771 
1772 	if (error_msg != NULL)
1773 		mlog(ML_ERROR, "bad entry in directory #%llu: %s - "
1774 		     "offset=%lu, inode=%llu, rec_len=%d, name_len=%d\n",
1775 		     (unsigned long long)OCFS2_I(dir)->ip_blkno, error_msg,
1776 		     offset, (unsigned long long)le64_to_cpu(de->inode), rlen,
1777 		     de->name_len);
1778 	return error_msg == NULL ? 1 : 0;
1779 }
1780 
1781 /* we don't always have a dentry for what we want to add, so people
1782  * like orphan dir can call this instead.
1783  *
1784  * If you pass me insert_bh, I'll skip the search of the other dir
1785  * blocks and put the record in there.
1786  */
1787 static int __ocfs2_add_entry(handle_t *handle,
1788 			     struct inode *dir,
1789 			     const char *name, int namelen,
1790 			     struct inode *inode, u64 blkno,
1791 			     struct buffer_head *parent_fe_bh,
1792 			     struct buffer_head *insert_bh)
1793 {
1794 	unsigned long offset;
1795 	unsigned short rec_len;
1796 	struct ocfs2_dir_entry *de, *de1;
1797 	struct super_block *sb;
1798 	int retval, status;
1799 
1800 	mlog_entry_void();
1801 
1802 	sb = dir->i_sb;
1803 
1804 	if (!namelen)
1805 		return -EINVAL;
1806 
1807 	rec_len = OCFS2_DIR_REC_LEN(namelen);
1808 	offset = 0;
1809 	de = (struct ocfs2_dir_entry *) insert_bh->b_data;
1810 	while (1) {
1811 		BUG_ON((char *)de >= sb->s_blocksize + insert_bh->b_data);
1812 		/* These checks should've already been passed by the
1813 		 * prepare function, but I guess we can leave them
1814 		 * here anyway. */
1815 		if (!ocfs2_check_dir_entry(dir, de, insert_bh, offset)) {
1816 			retval = -ENOENT;
1817 			goto bail;
1818 		}
1819 		if (ocfs2_match(namelen, name, de)) {
1820 			retval = -EEXIST;
1821 			goto bail;
1822 		}
1823 		if (((le64_to_cpu(de->inode) == 0) &&
1824 		     (le16_to_cpu(de->rec_len) >= rec_len)) ||
1825 		    (le16_to_cpu(de->rec_len) >=
1826 		     (OCFS2_DIR_REC_LEN(de->name_len) + rec_len))) {
1827 			status = ocfs2_journal_access(handle, dir, insert_bh,
1828 						      OCFS2_JOURNAL_ACCESS_WRITE);
1829 			/* By now the buffer is marked for journaling */
1830 			offset += le16_to_cpu(de->rec_len);
1831 			if (le64_to_cpu(de->inode)) {
1832 				de1 = (struct ocfs2_dir_entry *)((char *) de +
1833 					OCFS2_DIR_REC_LEN(de->name_len));
1834 				de1->rec_len =
1835 					cpu_to_le16(le16_to_cpu(de->rec_len) -
1836 					OCFS2_DIR_REC_LEN(de->name_len));
1837 				de->rec_len = cpu_to_le16(OCFS2_DIR_REC_LEN(de->name_len));
1838 				de = de1;
1839 			}
1840 			de->file_type = OCFS2_FT_UNKNOWN;
1841 			if (blkno) {
1842 				de->inode = cpu_to_le64(blkno);
1843 				ocfs2_set_de_type(de, inode->i_mode);
1844 			} else
1845 				de->inode = 0;
1846 			de->name_len = namelen;
1847 			memcpy(de->name, name, namelen);
1848 
1849 			dir->i_mtime = dir->i_ctime = CURRENT_TIME;
1850 			dir->i_version++;
1851 			status = ocfs2_journal_dirty(handle, insert_bh);
1852 			retval = 0;
1853 			goto bail;
1854 		}
1855 		offset += le16_to_cpu(de->rec_len);
1856 		de = (struct ocfs2_dir_entry *) ((char *) de + le16_to_cpu(de->rec_len));
1857 	}
1858 
1859 	/* when you think about it, the assert above should prevent us
1860 	 * from ever getting here. */
1861 	retval = -ENOSPC;
1862 bail:
1863 
1864 	mlog_exit(retval);
1865 	return retval;
1866 }
1867 
1868 
1869 /*
1870  * ocfs2_delete_entry deletes a directory entry by merging it with the
1871  * previous entry
1872  */
1873 static int ocfs2_delete_entry(handle_t *handle,
1874 			      struct inode *dir,
1875 			      struct ocfs2_dir_entry *de_del,
1876 			      struct buffer_head *bh)
1877 {
1878 	struct ocfs2_dir_entry *de, *pde;
1879 	int i, status = -ENOENT;
1880 
1881 	mlog_entry("(0x%p, 0x%p, 0x%p, 0x%p)\n", handle, dir, de_del, bh);
1882 
1883 	i = 0;
1884 	pde = NULL;
1885 	de = (struct ocfs2_dir_entry *) bh->b_data;
1886 	while (i < bh->b_size) {
1887 		if (!ocfs2_check_dir_entry(dir, de, bh, i)) {
1888 			status = -EIO;
1889 			mlog_errno(status);
1890 			goto bail;
1891 		}
1892 		if (de == de_del)  {
1893 			status = ocfs2_journal_access(handle, dir, bh,
1894 						      OCFS2_JOURNAL_ACCESS_WRITE);
1895 			if (status < 0) {
1896 				status = -EIO;
1897 				mlog_errno(status);
1898 				goto bail;
1899 			}
1900 			if (pde)
1901 				pde->rec_len =
1902 					cpu_to_le16(le16_to_cpu(pde->rec_len) +
1903 						    le16_to_cpu(de->rec_len));
1904 			else
1905 				de->inode = 0;
1906 			dir->i_version++;
1907 			status = ocfs2_journal_dirty(handle, bh);
1908 			goto bail;
1909 		}
1910 		i += le16_to_cpu(de->rec_len);
1911 		pde = de;
1912 		de = (struct ocfs2_dir_entry *)((char *)de + le16_to_cpu(de->rec_len));
1913 	}
1914 bail:
1915 	mlog_exit(status);
1916 	return status;
1917 }
1918 
1919 /*
1920  * Returns 0 if not found, -1 on failure, and 1 on success
1921  */
1922 static int inline ocfs2_search_dirblock(struct buffer_head *bh,
1923 					struct inode *dir,
1924 					const char *name, int namelen,
1925 					unsigned long offset,
1926 					struct ocfs2_dir_entry **res_dir)
1927 {
1928 	struct ocfs2_dir_entry *de;
1929 	char *dlimit, *de_buf;
1930 	int de_len;
1931 	int ret = 0;
1932 
1933 	mlog_entry_void();
1934 
1935 	de_buf = bh->b_data;
1936 	dlimit = de_buf + dir->i_sb->s_blocksize;
1937 
1938 	while (de_buf < dlimit) {
1939 		/* this code is executed quadratically often */
1940 		/* do minimal checking `by hand' */
1941 
1942 		de = (struct ocfs2_dir_entry *) de_buf;
1943 
1944 		if (de_buf + namelen <= dlimit &&
1945 		    ocfs2_match(namelen, name, de)) {
1946 			/* found a match - just to be sure, do a full check */
1947 			if (!ocfs2_check_dir_entry(dir, de, bh, offset)) {
1948 				ret = -1;
1949 				goto bail;
1950 			}
1951 			*res_dir = de;
1952 			ret = 1;
1953 			goto bail;
1954 		}
1955 
1956 		/* prevent looping on a bad block */
1957 		de_len = le16_to_cpu(de->rec_len);
1958 		if (de_len <= 0) {
1959 			ret = -1;
1960 			goto bail;
1961 		}
1962 
1963 		de_buf += de_len;
1964 		offset += de_len;
1965 	}
1966 
1967 bail:
1968 	mlog_exit(ret);
1969 	return ret;
1970 }
1971 
1972 struct buffer_head *ocfs2_find_entry(const char *name, int namelen,
1973 				     struct inode *dir,
1974 				     struct ocfs2_dir_entry **res_dir)
1975 {
1976 	struct super_block *sb;
1977 	struct buffer_head *bh_use[NAMEI_RA_SIZE];
1978 	struct buffer_head *bh, *ret = NULL;
1979 	unsigned long start, block, b;
1980 	int ra_max = 0;		/* Number of bh's in the readahead
1981 				   buffer, bh_use[] */
1982 	int ra_ptr = 0;		/* Current index into readahead
1983 				   buffer */
1984 	int num = 0;
1985 	int nblocks, i, err;
1986 
1987 	mlog_entry_void();
1988 
1989 	*res_dir = NULL;
1990 	sb = dir->i_sb;
1991 
1992 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
1993 	start = OCFS2_I(dir)->ip_dir_start_lookup;
1994 	if (start >= nblocks)
1995 		start = 0;
1996 	block = start;
1997 
1998 restart:
1999 	do {
2000 		/*
2001 		 * We deal with the read-ahead logic here.
2002 		 */
2003 		if (ra_ptr >= ra_max) {
2004 			/* Refill the readahead buffer */
2005 			ra_ptr = 0;
2006 			b = block;
2007 			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
2008 				/*
2009 				 * Terminate if we reach the end of the
2010 				 * directory and must wrap, or if our
2011 				 * search has finished at this block.
2012 				 */
2013 				if (b >= nblocks || (num && block == start)) {
2014 					bh_use[ra_max] = NULL;
2015 					break;
2016 				}
2017 				num++;
2018 
2019 				bh = ocfs2_bread(dir, b++, &err, 1);
2020 				bh_use[ra_max] = bh;
2021 			}
2022 		}
2023 		if ((bh = bh_use[ra_ptr++]) == NULL)
2024 			goto next;
2025 		wait_on_buffer(bh);
2026 		if (!buffer_uptodate(bh)) {
2027 			/* read error, skip block & hope for the best */
2028 			ocfs2_error(dir->i_sb, "reading directory %llu, "
2029 				    "offset %lu\n",
2030 				    (unsigned long long)OCFS2_I(dir)->ip_blkno,
2031 				    block);
2032 			brelse(bh);
2033 			goto next;
2034 		}
2035 		i = ocfs2_search_dirblock(bh, dir, name, namelen,
2036 					  block << sb->s_blocksize_bits,
2037 					  res_dir);
2038 		if (i == 1) {
2039 			OCFS2_I(dir)->ip_dir_start_lookup = block;
2040 			ret = bh;
2041 			goto cleanup_and_exit;
2042 		} else {
2043 			brelse(bh);
2044 			if (i < 0)
2045 				goto cleanup_and_exit;
2046 		}
2047 	next:
2048 		if (++block >= nblocks)
2049 			block = 0;
2050 	} while (block != start);
2051 
2052 	/*
2053 	 * If the directory has grown while we were searching, then
2054 	 * search the last part of the directory before giving up.
2055 	 */
2056 	block = nblocks;
2057 	nblocks = i_size_read(dir) >> sb->s_blocksize_bits;
2058 	if (block < nblocks) {
2059 		start = 0;
2060 		goto restart;
2061 	}
2062 
2063 cleanup_and_exit:
2064 	/* Clean up the read-ahead blocks */
2065 	for (; ra_ptr < ra_max; ra_ptr++)
2066 		brelse(bh_use[ra_ptr]);
2067 
2068 	mlog_exit_ptr(ret);
2069 	return ret;
2070 }
2071 
2072 static int ocfs2_blkno_stringify(u64 blkno, char *name)
2073 {
2074 	int status, namelen;
2075 
2076 	mlog_entry_void();
2077 
2078 	namelen = snprintf(name, OCFS2_ORPHAN_NAMELEN + 1, "%016llx",
2079 			   (long long)blkno);
2080 	if (namelen <= 0) {
2081 		if (namelen)
2082 			status = namelen;
2083 		else
2084 			status = -EINVAL;
2085 		mlog_errno(status);
2086 		goto bail;
2087 	}
2088 	if (namelen != OCFS2_ORPHAN_NAMELEN) {
2089 		status = -EINVAL;
2090 		mlog_errno(status);
2091 		goto bail;
2092 	}
2093 
2094 	mlog(0, "built filename '%s' for orphan dir (len=%d)\n", name,
2095 	     namelen);
2096 
2097 	status = 0;
2098 bail:
2099 	mlog_exit(status);
2100 	return status;
2101 }
2102 
2103 static int ocfs2_prepare_orphan_dir(struct ocfs2_super *osb,
2104 				    struct inode **ret_orphan_dir,
2105 				    struct inode *inode,
2106 				    char *name,
2107 				    struct buffer_head **de_bh)
2108 {
2109 	struct inode *orphan_dir_inode;
2110 	struct buffer_head *orphan_dir_bh = NULL;
2111 	int status = 0;
2112 
2113 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2114 	if (status < 0) {
2115 		mlog_errno(status);
2116 		return status;
2117 	}
2118 
2119 	orphan_dir_inode = ocfs2_get_system_file_inode(osb,
2120 						       ORPHAN_DIR_SYSTEM_INODE,
2121 						       osb->slot_num);
2122 	if (!orphan_dir_inode) {
2123 		status = -ENOENT;
2124 		mlog_errno(status);
2125 		return status;
2126 	}
2127 
2128 	mutex_lock(&orphan_dir_inode->i_mutex);
2129 
2130 	status = ocfs2_meta_lock(orphan_dir_inode, &orphan_dir_bh, 1);
2131 	if (status < 0) {
2132 		mlog_errno(status);
2133 		goto leave;
2134 	}
2135 
2136 	status = ocfs2_prepare_dir_for_insert(osb, orphan_dir_inode,
2137 					      orphan_dir_bh, name,
2138 					      OCFS2_ORPHAN_NAMELEN, de_bh);
2139 	if (status < 0) {
2140 		ocfs2_meta_unlock(orphan_dir_inode, 1);
2141 
2142 		mlog_errno(status);
2143 		goto leave;
2144 	}
2145 
2146 	*ret_orphan_dir = orphan_dir_inode;
2147 
2148 leave:
2149 	if (status) {
2150 		mutex_unlock(&orphan_dir_inode->i_mutex);
2151 		iput(orphan_dir_inode);
2152 	}
2153 
2154 	if (orphan_dir_bh)
2155 		brelse(orphan_dir_bh);
2156 
2157 	mlog_exit(status);
2158 	return status;
2159 }
2160 
2161 static int ocfs2_orphan_add(struct ocfs2_super *osb,
2162 			    handle_t *handle,
2163 			    struct inode *inode,
2164 			    struct ocfs2_dinode *fe,
2165 			    char *name,
2166 			    struct buffer_head *de_bh,
2167 			    struct inode *orphan_dir_inode)
2168 {
2169 	struct buffer_head *orphan_dir_bh = NULL;
2170 	int status = 0;
2171 	struct ocfs2_dinode *orphan_fe;
2172 
2173 	mlog_entry("(inode->i_ino = %lu)\n", inode->i_ino);
2174 
2175 	status = ocfs2_read_block(osb,
2176 				  OCFS2_I(orphan_dir_inode)->ip_blkno,
2177 				  &orphan_dir_bh, OCFS2_BH_CACHED,
2178 				  orphan_dir_inode);
2179 	if (status < 0) {
2180 		mlog_errno(status);
2181 		goto leave;
2182 	}
2183 
2184 	status = ocfs2_journal_access(handle, orphan_dir_inode, orphan_dir_bh,
2185 				      OCFS2_JOURNAL_ACCESS_WRITE);
2186 	if (status < 0) {
2187 		mlog_errno(status);
2188 		goto leave;
2189 	}
2190 
2191 	/* we're a cluster, and nlink can change on disk from
2192 	 * underneath us... */
2193 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2194 	if (S_ISDIR(inode->i_mode))
2195 		le16_add_cpu(&orphan_fe->i_links_count, 1);
2196 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
2197 
2198 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2199 	if (status < 0) {
2200 		mlog_errno(status);
2201 		goto leave;
2202 	}
2203 
2204 	status = __ocfs2_add_entry(handle, orphan_dir_inode, name,
2205 				   OCFS2_ORPHAN_NAMELEN, inode,
2206 				   OCFS2_I(inode)->ip_blkno,
2207 				   orphan_dir_bh, de_bh);
2208 	if (status < 0) {
2209 		mlog_errno(status);
2210 		goto leave;
2211 	}
2212 
2213 	le32_add_cpu(&fe->i_flags, OCFS2_ORPHANED_FL);
2214 
2215 	/* Record which orphan dir our inode now resides
2216 	 * in. delete_inode will use this to determine which orphan
2217 	 * dir to lock. */
2218 	spin_lock(&OCFS2_I(inode)->ip_lock);
2219 	OCFS2_I(inode)->ip_orphaned_slot = osb->slot_num;
2220 	spin_unlock(&OCFS2_I(inode)->ip_lock);
2221 
2222 	mlog(0, "Inode %llu orphaned in slot %d\n",
2223 	     (unsigned long long)OCFS2_I(inode)->ip_blkno, osb->slot_num);
2224 
2225 leave:
2226 	if (orphan_dir_bh)
2227 		brelse(orphan_dir_bh);
2228 
2229 	mlog_exit(status);
2230 	return status;
2231 }
2232 
2233 /* unlike orphan_add, we expect the orphan dir to already be locked here. */
2234 int ocfs2_orphan_del(struct ocfs2_super *osb,
2235 		     handle_t *handle,
2236 		     struct inode *orphan_dir_inode,
2237 		     struct inode *inode,
2238 		     struct buffer_head *orphan_dir_bh)
2239 {
2240 	char name[OCFS2_ORPHAN_NAMELEN + 1];
2241 	struct ocfs2_dinode *orphan_fe;
2242 	int status = 0;
2243 	struct buffer_head *target_de_bh = NULL;
2244 	struct ocfs2_dir_entry *target_de = NULL;
2245 
2246 	mlog_entry_void();
2247 
2248 	status = ocfs2_blkno_stringify(OCFS2_I(inode)->ip_blkno, name);
2249 	if (status < 0) {
2250 		mlog_errno(status);
2251 		goto leave;
2252 	}
2253 
2254 	mlog(0, "removing '%s' from orphan dir %llu (namelen=%d)\n",
2255 	     name, (unsigned long long)OCFS2_I(orphan_dir_inode)->ip_blkno,
2256 	     OCFS2_ORPHAN_NAMELEN);
2257 
2258 	/* find it's spot in the orphan directory */
2259 	target_de_bh = ocfs2_find_entry(name, OCFS2_ORPHAN_NAMELEN,
2260 					orphan_dir_inode, &target_de);
2261 	if (!target_de_bh) {
2262 		status = -ENOENT;
2263 		mlog_errno(status);
2264 		goto leave;
2265 	}
2266 
2267 	/* remove it from the orphan directory */
2268 	status = ocfs2_delete_entry(handle, orphan_dir_inode, target_de,
2269 				    target_de_bh);
2270 	if (status < 0) {
2271 		mlog_errno(status);
2272 		goto leave;
2273 	}
2274 
2275 	status = ocfs2_journal_access(handle,orphan_dir_inode,  orphan_dir_bh,
2276 				      OCFS2_JOURNAL_ACCESS_WRITE);
2277 	if (status < 0) {
2278 		mlog_errno(status);
2279 		goto leave;
2280 	}
2281 
2282 	/* do the i_nlink dance! :) */
2283 	orphan_fe = (struct ocfs2_dinode *) orphan_dir_bh->b_data;
2284 	if (S_ISDIR(inode->i_mode))
2285 		le16_add_cpu(&orphan_fe->i_links_count, -1);
2286 	orphan_dir_inode->i_nlink = le16_to_cpu(orphan_fe->i_links_count);
2287 
2288 	status = ocfs2_journal_dirty(handle, orphan_dir_bh);
2289 	if (status < 0) {
2290 		mlog_errno(status);
2291 		goto leave;
2292 	}
2293 
2294 leave:
2295 	if (target_de_bh)
2296 		brelse(target_de_bh);
2297 
2298 	mlog_exit(status);
2299 	return status;
2300 }
2301 
2302 struct inode_operations ocfs2_dir_iops = {
2303 	.create		= ocfs2_create,
2304 	.lookup		= ocfs2_lookup,
2305 	.link		= ocfs2_link,
2306 	.unlink		= ocfs2_unlink,
2307 	.rmdir		= ocfs2_unlink,
2308 	.symlink	= ocfs2_symlink,
2309 	.mkdir		= ocfs2_mkdir,
2310 	.mknod		= ocfs2_mknod,
2311 	.rename		= ocfs2_rename,
2312 	.setattr	= ocfs2_setattr,
2313 	.getattr	= ocfs2_getattr,
2314 	.permission	= ocfs2_permission,
2315 };
2316