xref: /openbmc/linux/fs/gfs2/inode.c (revision c752666c)
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License v.2.
8  */
9 
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/posix_acl.h>
16 #include <linux/sort.h>
17 #include <linux/gfs2_ondisk.h>
18 #include <asm/semaphore.h>
19 
20 #include "gfs2.h"
21 #include "lm_interface.h"
22 #include "incore.h"
23 #include "acl.h"
24 #include "bmap.h"
25 #include "dir.h"
26 #include "eattr.h"
27 #include "glock.h"
28 #include "glops.h"
29 #include "inode.h"
30 #include "log.h"
31 #include "meta_io.h"
32 #include "ops_address.h"
33 #include "ops_file.h"
34 #include "ops_inode.h"
35 #include "quota.h"
36 #include "rgrp.h"
37 #include "trans.h"
38 #include "unlinked.h"
39 #include "util.h"
40 
41 /**
42  * inode_attr_in - Copy attributes from the dinode into the VFS inode
43  * @ip: The GFS2 inode (with embedded disk inode data)
44  * @inode:  The Linux VFS inode
45  *
46  */
47 
48 static void inode_attr_in(struct gfs2_inode *ip, struct inode *inode)
49 {
50 	inode->i_ino = ip->i_num.no_formal_ino;
51 
52 	switch (ip->i_di.di_mode & S_IFMT) {
53 	case S_IFBLK:
54 	case S_IFCHR:
55 		inode->i_rdev = MKDEV(ip->i_di.di_major, ip->i_di.di_minor);
56 		break;
57 	default:
58 		inode->i_rdev = 0;
59 		break;
60 	};
61 
62 	inode->i_mode = ip->i_di.di_mode;
63 	inode->i_nlink = ip->i_di.di_nlink;
64 	inode->i_uid = ip->i_di.di_uid;
65 	inode->i_gid = ip->i_di.di_gid;
66 	i_size_write(inode, ip->i_di.di_size);
67 	inode->i_atime.tv_sec = ip->i_di.di_atime;
68 	inode->i_mtime.tv_sec = ip->i_di.di_mtime;
69 	inode->i_ctime.tv_sec = ip->i_di.di_ctime;
70 	inode->i_atime.tv_nsec = 0;
71 	inode->i_mtime.tv_nsec = 0;
72 	inode->i_ctime.tv_nsec = 0;
73 	inode->i_blksize = PAGE_SIZE;
74 	inode->i_blocks = ip->i_di.di_blocks <<
75 		(ip->i_sbd->sd_sb.sb_bsize_shift - GFS2_BASIC_BLOCK_SHIFT);
76 
77 	if (ip->i_di.di_flags & GFS2_DIF_IMMUTABLE)
78 		inode->i_flags |= S_IMMUTABLE;
79 	else
80 		inode->i_flags &= ~S_IMMUTABLE;
81 
82 	if (ip->i_di.di_flags & GFS2_DIF_APPENDONLY)
83 		inode->i_flags |= S_APPEND;
84 	else
85 		inode->i_flags &= ~S_APPEND;
86 }
87 
88 /**
89  * gfs2_inode_attr_in - Copy attributes from the dinode into the VFS inode
90  * @ip: The GFS2 inode (with embedded disk inode data)
91  *
92  */
93 
94 void gfs2_inode_attr_in(struct gfs2_inode *ip)
95 {
96 	struct inode *inode;
97 
98 	inode = gfs2_ip2v_lookup(ip);
99 	if (inode) {
100 		inode_attr_in(ip, inode);
101 		iput(inode);
102 	}
103 }
104 
105 /**
106  * gfs2_inode_attr_out - Copy attributes from VFS inode into the dinode
107  * @ip: The GFS2 inode
108  *
109  * Only copy out the attributes that we want the VFS layer
110  * to be able to modify.
111  */
112 
113 void gfs2_inode_attr_out(struct gfs2_inode *ip)
114 {
115 	struct inode *inode = ip->i_vnode;
116 
117 	gfs2_assert_withdraw(ip->i_sbd,
118 		(ip->i_di.di_mode & S_IFMT) == (inode->i_mode & S_IFMT));
119 	ip->i_di.di_mode = inode->i_mode;
120 	ip->i_di.di_uid = inode->i_uid;
121 	ip->i_di.di_gid = inode->i_gid;
122 	ip->i_di.di_atime = inode->i_atime.tv_sec;
123 	ip->i_di.di_mtime = inode->i_mtime.tv_sec;
124 	ip->i_di.di_ctime = inode->i_ctime.tv_sec;
125 }
126 
127 /**
128  * gfs2_ip2v_lookup - Get the struct inode for a struct gfs2_inode
129  * @ip: the struct gfs2_inode to get the struct inode for
130  *
131  * Returns: A VFS inode, or NULL if none
132  */
133 
134 struct inode *gfs2_ip2v_lookup(struct gfs2_inode *ip)
135 {
136 	struct inode *inode = NULL;
137 
138 	gfs2_assert_warn(ip->i_sbd, test_bit(GIF_MIN_INIT, &ip->i_flags));
139 
140 	spin_lock(&ip->i_spin);
141 	if (ip->i_vnode)
142 		inode = igrab(ip->i_vnode);
143 	spin_unlock(&ip->i_spin);
144 
145 	return inode;
146 }
147 
148 /**
149  * gfs2_ip2v - Get/Create a struct inode for a struct gfs2_inode
150  * @ip: the struct gfs2_inode to get the struct inode for
151  *
152  * Returns: A VFS inode, or NULL if no mem
153  */
154 
155 struct inode *gfs2_ip2v(struct gfs2_inode *ip)
156 {
157 	struct inode *inode, *tmp;
158 
159 	inode = gfs2_ip2v_lookup(ip);
160 	if (inode)
161 		return inode;
162 
163 	tmp = new_inode(ip->i_sbd->sd_vfs);
164 	if (!tmp)
165 		return NULL;
166 
167 	inode_attr_in(ip, tmp);
168 
169 	if (S_ISREG(ip->i_di.di_mode)) {
170 		tmp->i_op = &gfs2_file_iops;
171 		tmp->i_fop = &gfs2_file_fops;
172 		tmp->i_mapping->a_ops = &gfs2_file_aops;
173 	} else if (S_ISDIR(ip->i_di.di_mode)) {
174 		tmp->i_op = &gfs2_dir_iops;
175 		tmp->i_fop = &gfs2_dir_fops;
176 	} else if (S_ISLNK(ip->i_di.di_mode)) {
177 		tmp->i_op = &gfs2_symlink_iops;
178 	} else {
179 		tmp->i_op = &gfs2_dev_iops;
180 		init_special_inode(tmp, tmp->i_mode, tmp->i_rdev);
181 	}
182 
183 	tmp->u.generic_ip = NULL;
184 
185 	for (;;) {
186 		spin_lock(&ip->i_spin);
187 		if (!ip->i_vnode)
188 			break;
189 		inode = igrab(ip->i_vnode);
190 		spin_unlock(&ip->i_spin);
191 
192 		if (inode) {
193 			iput(tmp);
194 			return inode;
195 		}
196 		yield();
197 	}
198 
199 	inode = tmp;
200 
201 	gfs2_inode_hold(ip);
202 	ip->i_vnode = inode;
203 	inode->u.generic_ip = ip;
204 
205 	spin_unlock(&ip->i_spin);
206 
207 	insert_inode_hash(inode);
208 
209 	return inode;
210 }
211 
212 static int iget_test(struct inode *inode, void *opaque)
213 {
214 	struct gfs2_inode *ip = inode->u.generic_ip;
215 	struct gfs2_inum *inum = (struct gfs2_inum *)opaque;
216 
217 	if (ip && ip->i_num.no_addr == inum->no_addr)
218 		return 1;
219 
220 	return 0;
221 }
222 
223 struct inode *gfs2_iget(struct super_block *sb, struct gfs2_inum *inum)
224 {
225 	return ilookup5(sb, (unsigned long)inum->no_formal_ino,
226 			iget_test, inum);
227 }
228 
229 void gfs2_inode_min_init(struct gfs2_inode *ip, unsigned int type)
230 {
231 	if (!test_and_set_bit(GIF_MIN_INIT, &ip->i_flags)) {
232 		ip->i_di.di_nlink = 1;
233 		ip->i_di.di_mode = DT2IF(type);
234 	}
235 }
236 
237 /**
238  * gfs2_inode_refresh - Refresh the incore copy of the dinode
239  * @ip: The GFS2 inode
240  *
241  * Returns: errno
242  */
243 
244 int gfs2_inode_refresh(struct gfs2_inode *ip)
245 {
246 	struct buffer_head *dibh;
247 	int error;
248 
249 	error = gfs2_meta_inode_buffer(ip, &dibh);
250 	if (error)
251 		return error;
252 
253 	if (gfs2_metatype_check(ip->i_sbd, dibh, GFS2_METATYPE_DI)) {
254 		brelse(dibh);
255 		return -EIO;
256 	}
257 
258 	gfs2_dinode_in(&ip->i_di, dibh->b_data);
259 	set_bit(GIF_MIN_INIT, &ip->i_flags);
260 
261 	brelse(dibh);
262 
263 	if (ip->i_num.no_addr != ip->i_di.di_num.no_addr) {
264 		if (gfs2_consist_inode(ip))
265 			gfs2_dinode_print(&ip->i_di);
266 		return -EIO;
267 	}
268 	if (ip->i_num.no_formal_ino != ip->i_di.di_num.no_formal_ino)
269 		return -ESTALE;
270 
271 	ip->i_vn = ip->i_gl->gl_vn;
272 
273 	return 0;
274 }
275 
276 /**
277  * inode_create - create a struct gfs2_inode
278  * @i_gl: The glock covering the inode
279  * @inum: The inode number
280  * @io_gl: the iopen glock to acquire/hold (using holder in new gfs2_inode)
281  * @io_state: the state the iopen glock should be acquired in
282  * @ipp: pointer to put the returned inode in
283  *
284  * Returns: errno
285  */
286 
287 static int inode_create(struct gfs2_glock *i_gl, const struct gfs2_inum *inum,
288 			struct gfs2_glock *io_gl, unsigned int io_state,
289 			struct gfs2_inode **ipp)
290 {
291 	struct gfs2_sbd *sdp = i_gl->gl_sbd;
292 	struct gfs2_inode *ip;
293 	int error = 0;
294 
295 	ip = kmem_cache_alloc(gfs2_inode_cachep, GFP_KERNEL);
296 	if (!ip)
297 		return -ENOMEM;
298 	memset(ip, 0, sizeof(struct gfs2_inode));
299 
300 	ip->i_num = *inum;
301 
302 	atomic_set(&ip->i_count, 1);
303 
304 	ip->i_vn = i_gl->gl_vn - 1;
305 
306 	ip->i_gl = i_gl;
307 	ip->i_sbd = sdp;
308 
309 	spin_lock_init(&ip->i_spin);
310 	init_rwsem(&ip->i_rw_mutex);
311 
312 	ip->i_greedy = gfs2_tune_get(sdp, gt_greedy_default);
313 
314 	error = gfs2_glock_nq_init(io_gl,
315 				   io_state, GL_LOCAL_EXCL | GL_EXACT,
316 				   &ip->i_iopen_gh);
317 	if (error)
318 		goto fail;
319 	ip->i_iopen_gh.gh_owner = NULL;
320 
321 	spin_lock(&io_gl->gl_spin);
322 	gfs2_glock_hold(i_gl);
323 	io_gl->gl_object = i_gl;
324 	spin_unlock(&io_gl->gl_spin);
325 
326 	gfs2_glock_hold(i_gl);
327 	i_gl->gl_object = ip;
328 
329 	atomic_inc(&sdp->sd_inode_count);
330 
331 	*ipp = ip;
332 
333 	return 0;
334 
335  fail:
336 	gfs2_meta_cache_flush(ip);
337 	kmem_cache_free(gfs2_inode_cachep, ip);
338 	*ipp = NULL;
339 
340 	return error;
341 }
342 
343 /**
344  * gfs2_inode_get - Create or get a reference on an inode
345  * @i_gl: The glock covering the inode
346  * @inum: The inode number
347  * @create:
348  * @ipp: pointer to put the returned inode in
349  *
350  * Returns: errno
351  */
352 
353 int gfs2_inode_get(struct gfs2_glock *i_gl, const struct gfs2_inum *inum,
354 		   int create, struct gfs2_inode **ipp)
355 {
356 	struct gfs2_sbd *sdp = i_gl->gl_sbd;
357 	struct gfs2_glock *io_gl;
358 	int error = 0;
359 
360 	gfs2_glmutex_lock(i_gl);
361 
362 	*ipp = i_gl->gl_object;
363 	if (*ipp) {
364 		error = -ESTALE;
365 		if ((*ipp)->i_num.no_formal_ino != inum->no_formal_ino)
366 			goto out;
367 		atomic_inc(&(*ipp)->i_count);
368 		error = 0;
369 		goto out;
370 	}
371 
372 	if (!create)
373 		goto out;
374 
375 	error = gfs2_glock_get(sdp, inum->no_addr, &gfs2_iopen_glops,
376 			       CREATE, &io_gl);
377 	if (!error) {
378 		error = inode_create(i_gl, inum, io_gl, LM_ST_SHARED, ipp);
379 		gfs2_glock_put(io_gl);
380 	}
381 
382  out:
383 	gfs2_glmutex_unlock(i_gl);
384 
385 	return error;
386 }
387 
388 void gfs2_inode_hold(struct gfs2_inode *ip)
389 {
390 	gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
391 	atomic_inc(&ip->i_count);
392 }
393 
394 void gfs2_inode_put(struct gfs2_inode *ip)
395 {
396 	gfs2_assert(ip->i_sbd, atomic_read(&ip->i_count) > 0);
397 	atomic_dec(&ip->i_count);
398 }
399 
400 void gfs2_inode_destroy(struct gfs2_inode *ip)
401 {
402 	struct gfs2_sbd *sdp = ip->i_sbd;
403 	struct gfs2_glock *io_gl = ip->i_iopen_gh.gh_gl;
404 	struct gfs2_glock *i_gl = ip->i_gl;
405 
406 	gfs2_assert_warn(sdp, !atomic_read(&ip->i_count));
407 	gfs2_assert(sdp, io_gl->gl_object == i_gl);
408 
409 	spin_lock(&io_gl->gl_spin);
410 	io_gl->gl_object = NULL;
411 	gfs2_glock_put(i_gl);
412 	spin_unlock(&io_gl->gl_spin);
413 
414 	gfs2_glock_dq_uninit(&ip->i_iopen_gh);
415 
416 	gfs2_meta_cache_flush(ip);
417 	kmem_cache_free(gfs2_inode_cachep, ip);
418 
419 	i_gl->gl_object = NULL;
420 	gfs2_glock_put(i_gl);
421 
422 	atomic_dec(&sdp->sd_inode_count);
423 }
424 
425 static int dinode_dealloc(struct gfs2_inode *ip, struct gfs2_unlinked *ul)
426 {
427 	struct gfs2_sbd *sdp = ip->i_sbd;
428 	struct gfs2_alloc *al;
429 	struct gfs2_rgrpd *rgd;
430 	int error;
431 
432 	if (ip->i_di.di_blocks != 1) {
433 		if (gfs2_consist_inode(ip))
434 			gfs2_dinode_print(&ip->i_di);
435 		return -EIO;
436 	}
437 
438 	al = gfs2_alloc_get(ip);
439 
440 	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
441 	if (error)
442 		goto out;
443 
444 	error = gfs2_rindex_hold(sdp, &al->al_ri_gh);
445 	if (error)
446 		goto out_qs;
447 
448 	rgd = gfs2_blk2rgrpd(sdp, ip->i_num.no_addr);
449 	if (!rgd) {
450 		gfs2_consist_inode(ip);
451 		error = -EIO;
452 		goto out_rindex_relse;
453 	}
454 
455 	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0,
456 				   &al->al_rgd_gh);
457 	if (error)
458 		goto out_rindex_relse;
459 
460 	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
461 				 RES_STATFS + RES_QUOTA, 1);
462 	if (error)
463 		goto out_rg_gunlock;
464 
465 	gfs2_trans_add_gl(ip->i_gl);
466 
467 	gfs2_free_di(rgd, ip);
468 
469 	error = gfs2_unlinked_ondisk_rm(sdp, ul);
470 
471 	gfs2_trans_end(sdp);
472 	clear_bit(GLF_STICKY, &ip->i_gl->gl_flags);
473 
474  out_rg_gunlock:
475 	gfs2_glock_dq_uninit(&al->al_rgd_gh);
476 
477  out_rindex_relse:
478 	gfs2_glock_dq_uninit(&al->al_ri_gh);
479 
480  out_qs:
481 	gfs2_quota_unhold(ip);
482 
483  out:
484 	gfs2_alloc_put(ip);
485 
486 	return error;
487 }
488 
489 /**
490  * inode_dealloc - Deallocate all on-disk blocks for an inode (dinode)
491  * @sdp: the filesystem
492  * @inum: the inode number to deallocate
493  * @io_gh: a holder for the iopen glock for this inode
494  *
495  * Returns: errno
496  */
497 
498 static int inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul,
499 			 struct gfs2_holder *io_gh)
500 {
501 	struct gfs2_inode *ip;
502 	struct gfs2_holder i_gh;
503 	int error;
504 
505 	error = gfs2_glock_nq_num(sdp,
506 				  ul->ul_ut.ut_inum.no_addr, &gfs2_inode_glops,
507 				  LM_ST_EXCLUSIVE, 0, &i_gh);
508 	if (error)
509 		return error;
510 
511 	/* We reacquire the iopen lock here to avoid a race with the NFS server
512 	   calling gfs2_read_inode() with the inode number of a inode we're in
513 	   the process of deallocating.  And we can't keep our hold on the lock
514 	   from inode_dealloc_init() for deadlock reasons. */
515 
516 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY, io_gh);
517 	error = gfs2_glock_nq(io_gh);
518 	switch (error) {
519 	case 0:
520 		break;
521 	case GLR_TRYFAILED:
522 		error = 1;
523 	default:
524 		goto out;
525 	}
526 
527 	gfs2_assert_warn(sdp, !i_gh.gh_gl->gl_object);
528 	error = inode_create(i_gh.gh_gl, &ul->ul_ut.ut_inum, io_gh->gh_gl,
529 			     LM_ST_EXCLUSIVE, &ip);
530 
531 	gfs2_glock_dq(io_gh);
532 
533 	if (error)
534 		goto out;
535 
536 	error = gfs2_inode_refresh(ip);
537 	if (error)
538 		goto out_iput;
539 
540 	if (ip->i_di.di_nlink) {
541 		if (gfs2_consist_inode(ip))
542 			gfs2_dinode_print(&ip->i_di);
543 		error = -EIO;
544 		goto out_iput;
545 	}
546 
547 	if (S_ISDIR(ip->i_di.di_mode) &&
548 	    (ip->i_di.di_flags & GFS2_DIF_EXHASH)) {
549 		error = gfs2_dir_exhash_dealloc(ip);
550 		if (error)
551 			goto out_iput;
552 	}
553 
554 	if (ip->i_di.di_eattr) {
555 		error = gfs2_ea_dealloc(ip);
556 		if (error)
557 			goto out_iput;
558 	}
559 
560 	if (!gfs2_is_stuffed(ip)) {
561 		error = gfs2_file_dealloc(ip);
562 		if (error)
563 			goto out_iput;
564 	}
565 
566 	error = dinode_dealloc(ip, ul);
567 	if (error)
568 		goto out_iput;
569 
570  out_iput:
571 	gfs2_glmutex_lock(i_gh.gh_gl);
572 	gfs2_inode_put(ip);
573 	gfs2_inode_destroy(ip);
574 	gfs2_glmutex_unlock(i_gh.gh_gl);
575 
576  out:
577 	gfs2_glock_dq_uninit(&i_gh);
578 
579 	return error;
580 }
581 
582 /**
583  * try_inode_dealloc - Try to deallocate an inode and all its blocks
584  * @sdp: the filesystem
585  *
586  * Returns: 0 on success, -errno on error, 1 on busy (inode open)
587  */
588 
589 static int try_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
590 {
591 	struct gfs2_holder io_gh;
592 	int error = 0;
593 
594 	gfs2_try_toss_inode(sdp, &ul->ul_ut.ut_inum);
595 
596 	error = gfs2_glock_nq_num(sdp,
597 				  ul->ul_ut.ut_inum.no_addr, &gfs2_iopen_glops,
598 				  LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB, &io_gh);
599 	switch (error) {
600 	case 0:
601 		break;
602 	case GLR_TRYFAILED:
603 		return 1;
604 	default:
605 		return error;
606 	}
607 
608 	gfs2_glock_dq(&io_gh);
609 	error = inode_dealloc(sdp, ul, &io_gh);
610 	gfs2_holder_uninit(&io_gh);
611 
612 	return error;
613 }
614 
615 static int inode_dealloc_uninit(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
616 {
617 	struct gfs2_rgrpd *rgd;
618 	struct gfs2_holder ri_gh, rgd_gh;
619 	int error;
620 
621 	error = gfs2_rindex_hold(sdp, &ri_gh);
622 	if (error)
623 		return error;
624 
625 	rgd = gfs2_blk2rgrpd(sdp, ul->ul_ut.ut_inum.no_addr);
626 	if (!rgd) {
627 		gfs2_consist(sdp);
628 		error = -EIO;
629 		goto out;
630 	}
631 
632 	error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rgd_gh);
633 	if (error)
634 		goto out;
635 
636 	error = gfs2_trans_begin(sdp,
637 				 RES_RG_BIT + RES_UNLINKED + RES_STATFS,
638 				 0);
639 	if (error)
640 		goto out_gunlock;
641 
642 	gfs2_free_uninit_di(rgd, ul->ul_ut.ut_inum.no_addr);
643 	gfs2_unlinked_ondisk_rm(sdp, ul);
644 
645 	gfs2_trans_end(sdp);
646 
647  out_gunlock:
648 	gfs2_glock_dq_uninit(&rgd_gh);
649  out:
650 	gfs2_glock_dq_uninit(&ri_gh);
651 
652 	return error;
653 }
654 
655 int gfs2_inode_dealloc(struct gfs2_sbd *sdp, struct gfs2_unlinked *ul)
656 {
657 	if (ul->ul_ut.ut_flags & GFS2_UTF_UNINIT)
658 		return inode_dealloc_uninit(sdp, ul);
659 	else
660 		return try_inode_dealloc(sdp, ul);
661 }
662 
663 /**
664  * gfs2_change_nlink - Change nlink count on inode
665  * @ip: The GFS2 inode
666  * @diff: The change in the nlink count required
667  *
668  * Returns: errno
669  */
670 
671 int gfs2_change_nlink(struct gfs2_inode *ip, int diff)
672 {
673 	struct buffer_head *dibh;
674 	uint32_t nlink;
675 	int error;
676 
677 	nlink = ip->i_di.di_nlink + diff;
678 
679 	/* If we are reducing the nlink count, but the new value ends up being
680 	   bigger than the old one, we must have underflowed. */
681 	if (diff < 0 && nlink > ip->i_di.di_nlink) {
682 		if (gfs2_consist_inode(ip))
683 			gfs2_dinode_print(&ip->i_di);
684 		return -EIO;
685 	}
686 
687 	error = gfs2_meta_inode_buffer(ip, &dibh);
688 	if (error)
689 		return error;
690 
691 	ip->i_di.di_nlink = nlink;
692 	ip->i_di.di_ctime = get_seconds();
693 
694 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
695 	gfs2_dinode_out(&ip->i_di, dibh->b_data);
696 	brelse(dibh);
697 
698 	return 0;
699 }
700 
701 struct inode *gfs2_lookup_simple(struct inode *dip, const char *name)
702 {
703 	struct qstr qstr;
704 	qstr.name = name;
705 	qstr.len = strlen(name);
706 	qstr.hash = gfs2_disk_hash(qstr.name, qstr.len);
707 	return gfs2_lookupi(dip, &qstr, 1, NULL);
708 }
709 
710 
711 /**
712  * gfs2_lookupi - Look up a filename in a directory and return its inode
713  * @d_gh: An initialized holder for the directory glock
714  * @name: The name of the inode to look for
715  * @is_root: If 1, ignore the caller's permissions
716  * @i_gh: An uninitialized holder for the new inode glock
717  *
718  * There will always be a vnode (Linux VFS inode) for the d_gh inode unless
719  * @is_root is true.
720  *
721  * Returns: errno
722  */
723 
724 struct inode *gfs2_lookupi(struct inode *dir, struct qstr *name, int is_root,
725 			   struct nameidata *nd)
726 
727 {
728 	struct super_block *sb = dir->i_sb;
729 	struct gfs2_inode *ipp;
730 	struct gfs2_inode *dip = dir->u.generic_ip;
731 	struct gfs2_sbd *sdp = dip->i_sbd;
732 	struct gfs2_holder d_gh;
733 	struct gfs2_inum inum;
734 	unsigned int type;
735 	struct gfs2_glock *gl;
736 	int error = 0;
737 	struct inode *inode = NULL;
738 
739 	if (!name->len || name->len > GFS2_FNAMESIZE)
740 		return ERR_PTR(-ENAMETOOLONG);
741 
742 	if ((name->len == 1 && memcmp(name->name, ".", 1) == 0) ||
743 	    (name->len == 2 && memcmp(name->name, "..", 2) == 0 &&
744 	     dir == sb->s_root->d_inode)) {
745 		gfs2_inode_hold(dip);
746 		ipp = dip;
747 		goto done;
748 	}
749 
750 	error = gfs2_glock_nq_init(dip->i_gl, LM_ST_SHARED, 0, &d_gh);
751 	if (error)
752 		return ERR_PTR(error);
753 
754 	if (!is_root) {
755 		error = gfs2_repermission(dip->i_vnode, MAY_EXEC, NULL);
756 		if (error)
757 			goto out;
758 	}
759 
760 	error = gfs2_dir_search(dir, name, &inum, &type);
761 	if (error)
762 		goto out;
763 
764 	error = gfs2_glock_get(sdp, inum.no_addr, &gfs2_inode_glops,
765 			       CREATE, &gl);
766 	if (error)
767 		goto out;
768 
769 	error = gfs2_inode_get(gl, &inum, CREATE, &ipp);
770 	if (!error)
771 		gfs2_inode_min_init(ipp, type);
772 
773 	gfs2_glock_put(gl);
774 
775 out:
776 	gfs2_glock_dq_uninit(&d_gh);
777 done:
778 	if (error == -ENOENT)
779 		return NULL;
780 	if (error == 0) {
781 		inode = gfs2_ip2v(ipp);
782 		gfs2_inode_put(ipp);
783 		if (!inode)
784 			return ERR_PTR(-ENOMEM);
785 		return inode;
786 	}
787 	return ERR_PTR(error);
788 }
789 
790 static int pick_formal_ino_1(struct gfs2_sbd *sdp, uint64_t *formal_ino)
791 {
792 	struct gfs2_inode *ip = sdp->sd_ir_inode->u.generic_ip;
793 	struct buffer_head *bh;
794 	struct gfs2_inum_range ir;
795 	int error;
796 
797 	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
798 	if (error)
799 		return error;
800 	mutex_lock(&sdp->sd_inum_mutex);
801 
802 	error = gfs2_meta_inode_buffer(ip, &bh);
803 	if (error) {
804 		mutex_unlock(&sdp->sd_inum_mutex);
805 		gfs2_trans_end(sdp);
806 		return error;
807 	}
808 
809 	gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
810 
811 	if (ir.ir_length) {
812 		*formal_ino = ir.ir_start++;
813 		ir.ir_length--;
814 		gfs2_trans_add_bh(ip->i_gl, bh, 1);
815 		gfs2_inum_range_out(&ir,
816 				    bh->b_data + sizeof(struct gfs2_dinode));
817 		brelse(bh);
818 		mutex_unlock(&sdp->sd_inum_mutex);
819 		gfs2_trans_end(sdp);
820 		return 0;
821 	}
822 
823 	brelse(bh);
824 
825 	mutex_unlock(&sdp->sd_inum_mutex);
826 	gfs2_trans_end(sdp);
827 
828 	return 1;
829 }
830 
831 static int pick_formal_ino_2(struct gfs2_sbd *sdp, uint64_t *formal_ino)
832 {
833 	struct gfs2_inode *ip = sdp->sd_ir_inode->u.generic_ip;
834 	struct gfs2_inode *m_ip = sdp->sd_inum_inode->u.generic_ip;
835 	struct gfs2_holder gh;
836 	struct buffer_head *bh;
837 	struct gfs2_inum_range ir;
838 	int error;
839 
840 	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
841 	if (error)
842 		return error;
843 
844 	error = gfs2_trans_begin(sdp, 2 * RES_DINODE, 0);
845 	if (error)
846 		goto out;
847 	mutex_lock(&sdp->sd_inum_mutex);
848 
849 	error = gfs2_meta_inode_buffer(ip, &bh);
850 	if (error)
851 		goto out_end_trans;
852 
853 	gfs2_inum_range_in(&ir, bh->b_data + sizeof(struct gfs2_dinode));
854 
855 	if (!ir.ir_length) {
856 		struct buffer_head *m_bh;
857 		uint64_t x, y;
858 
859 		error = gfs2_meta_inode_buffer(m_ip, &m_bh);
860 		if (error)
861 			goto out_brelse;
862 
863 		x = *(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode));
864 		x = y = be64_to_cpu(x);
865 		ir.ir_start = x;
866 		ir.ir_length = GFS2_INUM_QUANTUM;
867 		x += GFS2_INUM_QUANTUM;
868 		if (x < y)
869 			gfs2_consist_inode(m_ip);
870 		x = cpu_to_be64(x);
871 		gfs2_trans_add_bh(m_ip->i_gl, m_bh, 1);
872 		*(uint64_t *)(m_bh->b_data + sizeof(struct gfs2_dinode)) = x;
873 
874 		brelse(m_bh);
875 	}
876 
877 	*formal_ino = ir.ir_start++;
878 	ir.ir_length--;
879 
880 	gfs2_trans_add_bh(ip->i_gl, bh, 1);
881 	gfs2_inum_range_out(&ir, bh->b_data + sizeof(struct gfs2_dinode));
882 
883  out_brelse:
884 	brelse(bh);
885 
886  out_end_trans:
887 	mutex_unlock(&sdp->sd_inum_mutex);
888 	gfs2_trans_end(sdp);
889 
890  out:
891 	gfs2_glock_dq_uninit(&gh);
892 
893 	return error;
894 }
895 
896 static int pick_formal_ino(struct gfs2_sbd *sdp, uint64_t *inum)
897 {
898 	int error;
899 
900 	error = pick_formal_ino_1(sdp, inum);
901 	if (error <= 0)
902 		return error;
903 
904 	error = pick_formal_ino_2(sdp, inum);
905 
906 	return error;
907 }
908 
909 /**
910  * create_ok - OK to create a new on-disk inode here?
911  * @dip:  Directory in which dinode is to be created
912  * @name:  Name of new dinode
913  * @mode:
914  *
915  * Returns: errno
916  */
917 
918 static int create_ok(struct gfs2_inode *dip, struct qstr *name,
919 		     unsigned int mode)
920 {
921 	int error;
922 
923 	error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
924 	if (error)
925 		return error;
926 
927 	/*  Don't create entries in an unlinked directory  */
928 	if (!dip->i_di.di_nlink)
929 		return -EPERM;
930 
931 	error = gfs2_dir_search(dip->i_vnode, name, NULL, NULL);
932 	switch (error) {
933 	case -ENOENT:
934 		error = 0;
935 		break;
936 	case 0:
937 		return -EEXIST;
938 	default:
939 		return error;
940 	}
941 
942 	if (dip->i_di.di_entries == (uint32_t)-1)
943 		return -EFBIG;
944 	if (S_ISDIR(mode) && dip->i_di.di_nlink == (uint32_t)-1)
945 		return -EMLINK;
946 
947 	return 0;
948 }
949 
950 static void munge_mode_uid_gid(struct gfs2_inode *dip, unsigned int *mode,
951 			       unsigned int *uid, unsigned int *gid)
952 {
953 	if (dip->i_sbd->sd_args.ar_suiddir &&
954 	    (dip->i_di.di_mode & S_ISUID) &&
955 	    dip->i_di.di_uid) {
956 		if (S_ISDIR(*mode))
957 			*mode |= S_ISUID;
958 		else if (dip->i_di.di_uid != current->fsuid)
959 			*mode &= ~07111;
960 		*uid = dip->i_di.di_uid;
961 	} else
962 		*uid = current->fsuid;
963 
964 	if (dip->i_di.di_mode & S_ISGID) {
965 		if (S_ISDIR(*mode))
966 			*mode |= S_ISGID;
967 		*gid = dip->i_di.di_gid;
968 	} else
969 		*gid = current->fsgid;
970 }
971 
972 static int alloc_dinode(struct gfs2_inode *dip, struct gfs2_unlinked *ul)
973 {
974 	struct gfs2_sbd *sdp = dip->i_sbd;
975 	int error;
976 
977 	gfs2_alloc_get(dip);
978 
979 	dip->i_alloc.al_requested = RES_DINODE;
980 	error = gfs2_inplace_reserve(dip);
981 	if (error)
982 		goto out;
983 
984 	error = gfs2_trans_begin(sdp, RES_RG_BIT + RES_UNLINKED +
985 				 RES_STATFS, 0);
986 	if (error)
987 		goto out_ipreserv;
988 
989 	ul->ul_ut.ut_inum.no_addr = gfs2_alloc_di(dip);
990 
991 	ul->ul_ut.ut_flags = GFS2_UTF_UNINIT;
992 	error = gfs2_unlinked_ondisk_add(sdp, ul);
993 
994 	gfs2_trans_end(sdp);
995 
996  out_ipreserv:
997 	gfs2_inplace_release(dip);
998 
999  out:
1000 	gfs2_alloc_put(dip);
1001 
1002 	return error;
1003 }
1004 
1005 /**
1006  * init_dinode - Fill in a new dinode structure
1007  * @dip: the directory this inode is being created in
1008  * @gl: The glock covering the new inode
1009  * @inum: the inode number
1010  * @mode: the file permissions
1011  * @uid:
1012  * @gid:
1013  *
1014  */
1015 
1016 static void init_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1017 			struct gfs2_inum *inum, unsigned int mode,
1018 			unsigned int uid, unsigned int gid)
1019 {
1020 	struct gfs2_sbd *sdp = dip->i_sbd;
1021 	struct gfs2_dinode *di;
1022 	struct buffer_head *dibh;
1023 
1024 	dibh = gfs2_meta_new(gl, inum->no_addr);
1025 	gfs2_trans_add_bh(gl, dibh, 1);
1026 	gfs2_metatype_set(dibh, GFS2_METATYPE_DI, GFS2_FORMAT_DI);
1027 	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
1028 	di = (struct gfs2_dinode *)dibh->b_data;
1029 
1030 	di->di_num.no_formal_ino = cpu_to_be64(inum->no_formal_ino);
1031 	di->di_num.no_addr = cpu_to_be64(inum->no_addr);
1032 	di->di_mode = cpu_to_be32(mode);
1033 	di->di_uid = cpu_to_be32(uid);
1034 	di->di_gid = cpu_to_be32(gid);
1035 	di->di_nlink = cpu_to_be32(0);
1036 	di->di_size = cpu_to_be64(0);
1037 	di->di_blocks = cpu_to_be64(1);
1038 	di->di_atime = di->di_mtime = di->di_ctime = cpu_to_be64(get_seconds());
1039 	di->di_major = di->di_minor = cpu_to_be32(0);
1040 	di->di_goal_meta = di->di_goal_data = cpu_to_be64(inum->no_addr);
1041 	di->__pad[0] = di->__pad[1] = 0;
1042 	di->di_flags = cpu_to_be32(0);
1043 
1044 	if (S_ISREG(mode)) {
1045 		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_JDATA) ||
1046 		    gfs2_tune_get(sdp, gt_new_files_jdata))
1047 			di->di_flags |= cpu_to_be32(GFS2_DIF_JDATA);
1048 		if ((dip->i_di.di_flags & GFS2_DIF_INHERIT_DIRECTIO) ||
1049 		    gfs2_tune_get(sdp, gt_new_files_directio))
1050 			di->di_flags |= cpu_to_be32(GFS2_DIF_DIRECTIO);
1051 	} else if (S_ISDIR(mode)) {
1052 		di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1053 					    GFS2_DIF_INHERIT_DIRECTIO);
1054 		di->di_flags |= cpu_to_be32(dip->i_di.di_flags &
1055 					    GFS2_DIF_INHERIT_JDATA);
1056 	}
1057 
1058 	di->__pad1 = 0;
1059 	di->di_height = cpu_to_be32(0);
1060 	di->__pad2 = 0;
1061 	di->__pad3 = 0;
1062 	di->di_depth = cpu_to_be16(0);
1063 	di->di_entries = cpu_to_be32(0);
1064 	memset(&di->__pad4, 0, sizeof(di->__pad4));
1065 	di->di_eattr = cpu_to_be64(0);
1066 	memset(&di->di_reserved, 0, sizeof(di->di_reserved));
1067 
1068 	brelse(dibh);
1069 }
1070 
1071 static int make_dinode(struct gfs2_inode *dip, struct gfs2_glock *gl,
1072 		       unsigned int mode, struct gfs2_unlinked *ul)
1073 {
1074 	struct gfs2_sbd *sdp = dip->i_sbd;
1075 	unsigned int uid, gid;
1076 	int error;
1077 
1078 	munge_mode_uid_gid(dip, &mode, &uid, &gid);
1079 
1080 	gfs2_alloc_get(dip);
1081 
1082 	error = gfs2_quota_lock(dip, uid, gid);
1083 	if (error)
1084 		goto out;
1085 
1086 	error = gfs2_quota_check(dip, uid, gid);
1087 	if (error)
1088 		goto out_quota;
1089 
1090 	error = gfs2_trans_begin(sdp, RES_DINODE + RES_UNLINKED +
1091 				 RES_QUOTA, 0);
1092 	if (error)
1093 		goto out_quota;
1094 
1095 	ul->ul_ut.ut_flags = 0;
1096 	error = gfs2_unlinked_ondisk_munge(sdp, ul);
1097 
1098 	init_dinode(dip, gl, &ul->ul_ut.ut_inum,
1099 		     mode, uid, gid);
1100 
1101 	gfs2_quota_change(dip, +1, uid, gid);
1102 
1103 	gfs2_trans_end(sdp);
1104 
1105  out_quota:
1106 	gfs2_quota_unlock(dip);
1107 
1108  out:
1109 	gfs2_alloc_put(dip);
1110 
1111 	return error;
1112 }
1113 
1114 static int link_dinode(struct gfs2_inode *dip, struct qstr *name,
1115 		       struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1116 {
1117 	struct gfs2_sbd *sdp = dip->i_sbd;
1118 	struct gfs2_alloc *al;
1119 	int alloc_required;
1120 	struct buffer_head *dibh;
1121 	int error;
1122 
1123 	al = gfs2_alloc_get(dip);
1124 
1125 	error = gfs2_quota_lock(dip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
1126 	if (error)
1127 		goto fail;
1128 
1129 	error = alloc_required = gfs2_diradd_alloc_required(dip->i_vnode, name);
1130 	if (alloc_required < 0)
1131 		goto fail;
1132 	if (alloc_required) {
1133 		error = gfs2_quota_check(dip, dip->i_di.di_uid,
1134 					 dip->i_di.di_gid);
1135 		if (error)
1136 			goto fail_quota_locks;
1137 
1138 		al->al_requested = sdp->sd_max_dirres;
1139 
1140 		error = gfs2_inplace_reserve(dip);
1141 		if (error)
1142 			goto fail_quota_locks;
1143 
1144 		error = gfs2_trans_begin(sdp,
1145 					 sdp->sd_max_dirres +
1146 					 al->al_rgd->rd_ri.ri_length +
1147 					 2 * RES_DINODE + RES_UNLINKED +
1148 					 RES_STATFS + RES_QUOTA, 0);
1149 		if (error)
1150 			goto fail_ipreserv;
1151 	} else {
1152 		error = gfs2_trans_begin(sdp,
1153 					 RES_LEAF +
1154 					 2 * RES_DINODE +
1155 					 RES_UNLINKED, 0);
1156 		if (error)
1157 			goto fail_quota_locks;
1158 	}
1159 
1160 	error = gfs2_dir_add(dip->i_vnode, name, &ip->i_num, IF2DT(ip->i_di.di_mode));
1161 	if (error)
1162 		goto fail_end_trans;
1163 
1164 	error = gfs2_meta_inode_buffer(ip, &dibh);
1165 	if (error)
1166 		goto fail_end_trans;
1167 	ip->i_di.di_nlink = 1;
1168 	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1169 	gfs2_dinode_out(&ip->i_di, dibh->b_data);
1170 	brelse(dibh);
1171 
1172 	error = gfs2_unlinked_ondisk_rm(sdp, ul);
1173 	if (error)
1174 		goto fail_end_trans;
1175 
1176 	return 0;
1177 
1178  fail_end_trans:
1179 	gfs2_trans_end(sdp);
1180 
1181  fail_ipreserv:
1182 	if (dip->i_alloc.al_rgd)
1183 		gfs2_inplace_release(dip);
1184 
1185  fail_quota_locks:
1186 	gfs2_quota_unlock(dip);
1187 
1188  fail:
1189 	gfs2_alloc_put(dip);
1190 
1191 	return error;
1192 }
1193 
1194 /**
1195  * gfs2_createi - Create a new inode
1196  * @ghs: An array of two holders
1197  * @name: The name of the new file
1198  * @mode: the permissions on the new inode
1199  *
1200  * @ghs[0] is an initialized holder for the directory
1201  * @ghs[1] is the holder for the inode lock
1202  *
1203  * If the return value is not NULL, the glocks on both the directory and the new
1204  * file are held.  A transaction has been started and an inplace reservation
1205  * is held, as well.
1206  *
1207  * Returns: An inode
1208  */
1209 
1210 struct inode *gfs2_createi(struct gfs2_holder *ghs, struct qstr *name,
1211 			   unsigned int mode)
1212 {
1213 	struct inode *inode;
1214 	struct gfs2_inode *dip = ghs->gh_gl->gl_object;
1215 	struct gfs2_sbd *sdp = dip->i_sbd;
1216 	struct gfs2_unlinked *ul;
1217 	struct gfs2_inode *ip;
1218 	int error;
1219 
1220 	if (!name->len || name->len > GFS2_FNAMESIZE)
1221 		return ERR_PTR(-ENAMETOOLONG);
1222 
1223 	error = gfs2_unlinked_get(sdp, &ul);
1224 	if (error)
1225 		return ERR_PTR(error);
1226 
1227 	gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1228 	error = gfs2_glock_nq(ghs);
1229 	if (error)
1230 		goto fail;
1231 
1232 	error = create_ok(dip, name, mode);
1233 	if (error)
1234 		goto fail_gunlock;
1235 
1236 	error = pick_formal_ino(sdp, &ul->ul_ut.ut_inum.no_formal_ino);
1237 	if (error)
1238 		goto fail_gunlock;
1239 
1240 	error = alloc_dinode(dip, ul);
1241 	if (error)
1242 		goto fail_gunlock;
1243 
1244 	if (ul->ul_ut.ut_inum.no_addr < dip->i_num.no_addr) {
1245 		gfs2_glock_dq(ghs);
1246 
1247 		error = gfs2_glock_nq_num(sdp,
1248 					  ul->ul_ut.ut_inum.no_addr,
1249 					  &gfs2_inode_glops,
1250 					  LM_ST_EXCLUSIVE, GL_SKIP,
1251 					  ghs + 1);
1252 		if (error) {
1253 			gfs2_unlinked_put(sdp, ul);
1254 			return ERR_PTR(error);
1255 		}
1256 
1257 		gfs2_holder_reinit(LM_ST_EXCLUSIVE, 0, ghs);
1258 		error = gfs2_glock_nq(ghs);
1259 		if (error) {
1260 			gfs2_glock_dq_uninit(ghs + 1);
1261 			gfs2_unlinked_put(sdp, ul);
1262 			return ERR_PTR(error);
1263 		}
1264 
1265 		error = create_ok(dip, name, mode);
1266 		if (error)
1267 			goto fail_gunlock2;
1268 	} else {
1269 		error = gfs2_glock_nq_num(sdp,
1270 					  ul->ul_ut.ut_inum.no_addr,
1271 					  &gfs2_inode_glops,
1272 					  LM_ST_EXCLUSIVE, GL_SKIP,
1273 					  ghs + 1);
1274 		if (error)
1275 			goto fail_gunlock;
1276 	}
1277 
1278 	error = make_dinode(dip, ghs[1].gh_gl, mode, ul);
1279 	if (error)
1280 		goto fail_gunlock2;
1281 
1282 	error = gfs2_inode_get(ghs[1].gh_gl, &ul->ul_ut.ut_inum, CREATE, &ip);
1283 	if (error)
1284 		goto fail_gunlock2;
1285 
1286 	error = gfs2_inode_refresh(ip);
1287 	if (error)
1288 		goto fail_iput;
1289 
1290 	error = gfs2_acl_create(dip, ip);
1291 	if (error)
1292 		goto fail_iput;
1293 
1294 	error = link_dinode(dip, name, ip, ul);
1295 	if (error)
1296 		goto fail_iput;
1297 
1298 	gfs2_unlinked_put(sdp, ul);
1299 
1300 	inode = gfs2_ip2v(ip);
1301 	gfs2_inode_put(ip);
1302 	if (!inode)
1303 		return ERR_PTR(-ENOMEM);
1304 	return inode;
1305 
1306  fail_iput:
1307 	gfs2_inode_put(ip);
1308 
1309  fail_gunlock2:
1310 	gfs2_glock_dq_uninit(ghs + 1);
1311 
1312  fail_gunlock:
1313 	gfs2_glock_dq(ghs);
1314 
1315  fail:
1316 	gfs2_unlinked_put(sdp, ul);
1317 
1318 	return ERR_PTR(error);
1319 }
1320 
1321 /**
1322  * gfs2_unlinki - Unlink a file
1323  * @dip: The inode of the directory
1324  * @name: The name of the file to be unlinked
1325  * @ip: The inode of the file to be removed
1326  *
1327  * Assumes Glocks on both dip and ip are held.
1328  *
1329  * Returns: errno
1330  */
1331 
1332 int gfs2_unlinki(struct gfs2_inode *dip, struct qstr *name,
1333 		 struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1334 {
1335 	struct gfs2_sbd *sdp = dip->i_sbd;
1336 	int error;
1337 
1338 	error = gfs2_dir_del(dip, name);
1339 	if (error)
1340 		return error;
1341 
1342 	error = gfs2_change_nlink(ip, -1);
1343 	if (error)
1344 		return error;
1345 
1346 	/* If this inode is being unlinked from the directory structure,
1347 	   we need to mark that in the log so that it isn't lost during
1348 	   a crash. */
1349 
1350 	if (!ip->i_di.di_nlink) {
1351 		ul->ul_ut.ut_inum = ip->i_num;
1352 		error = gfs2_unlinked_ondisk_add(sdp, ul);
1353 		if (!error)
1354 			set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1355 	}
1356 
1357 	return error;
1358 }
1359 
1360 /**
1361  * gfs2_rmdiri - Remove a directory
1362  * @dip: The parent directory of the directory to be removed
1363  * @name: The name of the directory to be removed
1364  * @ip: The GFS2 inode of the directory to be removed
1365  *
1366  * Assumes Glocks on dip and ip are held
1367  *
1368  * Returns: errno
1369  */
1370 
1371 int gfs2_rmdiri(struct gfs2_inode *dip, struct qstr *name,
1372 		struct gfs2_inode *ip, struct gfs2_unlinked *ul)
1373 {
1374 	struct gfs2_sbd *sdp = dip->i_sbd;
1375 	struct qstr dotname;
1376 	int error;
1377 
1378 	if (ip->i_di.di_entries != 2) {
1379 		if (gfs2_consist_inode(ip))
1380 			gfs2_dinode_print(&ip->i_di);
1381 		return -EIO;
1382 	}
1383 
1384 	error = gfs2_dir_del(dip, name);
1385 	if (error)
1386 		return error;
1387 
1388 	error = gfs2_change_nlink(dip, -1);
1389 	if (error)
1390 		return error;
1391 
1392 	dotname.len = 1;
1393 	dotname.name = ".";
1394 	dotname.hash = gfs2_disk_hash(dotname.name, dotname.len);
1395 	error = gfs2_dir_del(ip, &dotname);
1396 	if (error)
1397 		return error;
1398 
1399 	dotname.len = 2;
1400 	dotname.name = "..";
1401 	dotname.hash = gfs2_disk_hash(dotname.name, dotname.len);
1402 	error = gfs2_dir_del(ip, &dotname);
1403 	if (error)
1404 		return error;
1405 
1406 	error = gfs2_change_nlink(ip, -2);
1407 	if (error)
1408 		return error;
1409 
1410 	/* This inode is being unlinked from the directory structure and
1411 	   we need to mark that in the log so that it isn't lost during
1412 	   a crash. */
1413 
1414 	ul->ul_ut.ut_inum = ip->i_num;
1415 	error = gfs2_unlinked_ondisk_add(sdp, ul);
1416 	if (!error)
1417 		set_bit(GLF_STICKY, &ip->i_gl->gl_flags);
1418 
1419 	return error;
1420 }
1421 
1422 /*
1423  * gfs2_unlink_ok - check to see that a inode is still in a directory
1424  * @dip: the directory
1425  * @name: the name of the file
1426  * @ip: the inode
1427  *
1428  * Assumes that the lock on (at least) @dip is held.
1429  *
1430  * Returns: 0 if the parent/child relationship is correct, errno if it isn't
1431  */
1432 
1433 int gfs2_unlink_ok(struct gfs2_inode *dip, struct qstr *name,
1434 		   struct gfs2_inode *ip)
1435 {
1436 	struct gfs2_inum inum;
1437 	unsigned int type;
1438 	int error;
1439 
1440 	if (IS_IMMUTABLE(ip->i_vnode) || IS_APPEND(ip->i_vnode))
1441 		return -EPERM;
1442 
1443 	if ((dip->i_di.di_mode & S_ISVTX) &&
1444 	    dip->i_di.di_uid != current->fsuid &&
1445 	    ip->i_di.di_uid != current->fsuid &&
1446 	    !capable(CAP_FOWNER))
1447 		return -EPERM;
1448 
1449 	if (IS_APPEND(dip->i_vnode))
1450 		return -EPERM;
1451 
1452 	error = gfs2_repermission(dip->i_vnode, MAY_WRITE | MAY_EXEC, NULL);
1453 	if (error)
1454 		return error;
1455 
1456 	error = gfs2_dir_search(dip->i_vnode, name, &inum, &type);
1457 	if (error)
1458 		return error;
1459 
1460 	if (!gfs2_inum_equal(&inum, &ip->i_num))
1461 		return -ENOENT;
1462 
1463 	if (IF2DT(ip->i_di.di_mode) != type) {
1464 		gfs2_consist_inode(dip);
1465 		return -EIO;
1466 	}
1467 
1468 	return 0;
1469 }
1470 
1471 /*
1472  * gfs2_ok_to_move - check if it's ok to move a directory to another directory
1473  * @this: move this
1474  * @to: to here
1475  *
1476  * Follow @to back to the root and make sure we don't encounter @this
1477  * Assumes we already hold the rename lock.
1478  *
1479  * Returns: errno
1480  */
1481 
1482 int gfs2_ok_to_move(struct gfs2_inode *this, struct gfs2_inode *to)
1483 {
1484 	struct inode *dir = to->i_vnode;
1485 	struct super_block *sb = dir->i_sb;
1486 	struct inode *tmp;
1487 	struct qstr dotdot;
1488 	int error = 0;
1489 
1490 	memset(&dotdot, 0, sizeof(struct qstr));
1491 	dotdot.name = "..";
1492 	dotdot.len = 2;
1493 	dotdot.hash = gfs2_disk_hash(dotdot.name, dotdot.len);
1494 
1495 	igrab(dir);
1496 
1497 	for (;;) {
1498 		if (dir == this->i_vnode) {
1499 			error = -EINVAL;
1500 			break;
1501 		}
1502 		if (dir == sb->s_root->d_inode) {
1503 			error = 0;
1504 			break;
1505 		}
1506 
1507 		tmp = gfs2_lookupi(dir, &dotdot, 1, NULL);
1508 		if (IS_ERR(tmp)) {
1509 			error = PTR_ERR(tmp);
1510 			break;
1511 		}
1512 
1513 		iput(dir);
1514 		dir = tmp;
1515 	}
1516 
1517 	iput(dir);
1518 
1519 	return error;
1520 }
1521 
1522 /**
1523  * gfs2_readlinki - return the contents of a symlink
1524  * @ip: the symlink's inode
1525  * @buf: a pointer to the buffer to be filled
1526  * @len: a pointer to the length of @buf
1527  *
1528  * If @buf is too small, a piece of memory is kmalloc()ed and needs
1529  * to be freed by the caller.
1530  *
1531  * Returns: errno
1532  */
1533 
1534 int gfs2_readlinki(struct gfs2_inode *ip, char **buf, unsigned int *len)
1535 {
1536 	struct gfs2_holder i_gh;
1537 	struct buffer_head *dibh;
1538 	unsigned int x;
1539 	int error;
1540 
1541 	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &i_gh);
1542 	error = gfs2_glock_nq_atime(&i_gh);
1543 	if (error) {
1544 		gfs2_holder_uninit(&i_gh);
1545 		return error;
1546 	}
1547 
1548 	if (!ip->i_di.di_size) {
1549 		gfs2_consist_inode(ip);
1550 		error = -EIO;
1551 		goto out;
1552 	}
1553 
1554 	error = gfs2_meta_inode_buffer(ip, &dibh);
1555 	if (error)
1556 		goto out;
1557 
1558 	x = ip->i_di.di_size + 1;
1559 	if (x > *len) {
1560 		*buf = kmalloc(x, GFP_KERNEL);
1561 		if (!*buf) {
1562 			error = -ENOMEM;
1563 			goto out_brelse;
1564 		}
1565 	}
1566 
1567 	memcpy(*buf, dibh->b_data + sizeof(struct gfs2_dinode), x);
1568 	*len = x;
1569 
1570  out_brelse:
1571 	brelse(dibh);
1572 
1573  out:
1574 	gfs2_glock_dq_uninit(&i_gh);
1575 
1576 	return error;
1577 }
1578 
1579 /**
1580  * gfs2_glock_nq_atime - Acquire a hold on an inode's glock, and
1581  *       conditionally update the inode's atime
1582  * @gh: the holder to acquire
1583  *
1584  * Tests atime (access time) for gfs2_read, gfs2_readdir and gfs2_mmap
1585  * Update if the difference between the current time and the inode's current
1586  * atime is greater than an interval specified at mount.
1587  *
1588  * Returns: errno
1589  */
1590 
1591 int gfs2_glock_nq_atime(struct gfs2_holder *gh)
1592 {
1593 	struct gfs2_glock *gl = gh->gh_gl;
1594 	struct gfs2_sbd *sdp = gl->gl_sbd;
1595 	struct gfs2_inode *ip = gl->gl_object;
1596 	int64_t curtime, quantum = gfs2_tune_get(sdp, gt_atime_quantum);
1597 	unsigned int state;
1598 	int flags;
1599 	int error;
1600 
1601 	if (gfs2_assert_warn(sdp, gh->gh_flags & GL_ATIME) ||
1602 	    gfs2_assert_warn(sdp, !(gh->gh_flags & GL_ASYNC)) ||
1603 	    gfs2_assert_warn(sdp, gl->gl_ops == &gfs2_inode_glops))
1604 		return -EINVAL;
1605 
1606 	state = gh->gh_state;
1607 	flags = gh->gh_flags;
1608 
1609 	error = gfs2_glock_nq(gh);
1610 	if (error)
1611 		return error;
1612 
1613 	if (test_bit(SDF_NOATIME, &sdp->sd_flags) ||
1614 	    (sdp->sd_vfs->s_flags & MS_RDONLY))
1615 		return 0;
1616 
1617 	curtime = get_seconds();
1618 	if (curtime - ip->i_di.di_atime >= quantum) {
1619 		gfs2_glock_dq(gh);
1620 		gfs2_holder_reinit(LM_ST_EXCLUSIVE,
1621 				  gh->gh_flags & ~LM_FLAG_ANY,
1622 				  gh);
1623 		error = gfs2_glock_nq(gh);
1624 		if (error)
1625 			return error;
1626 
1627 		/* Verify that atime hasn't been updated while we were
1628 		   trying to get exclusive lock. */
1629 
1630 		curtime = get_seconds();
1631 		if (curtime - ip->i_di.di_atime >= quantum) {
1632 			struct buffer_head *dibh;
1633 
1634 			error = gfs2_trans_begin(sdp, RES_DINODE, 0);
1635 			if (error == -EROFS)
1636 				return 0;
1637 			if (error)
1638 				goto fail;
1639 
1640 			error = gfs2_meta_inode_buffer(ip, &dibh);
1641 			if (error)
1642 				goto fail_end_trans;
1643 
1644 			ip->i_di.di_atime = curtime;
1645 
1646 			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1647 			gfs2_dinode_out(&ip->i_di, dibh->b_data);
1648 			brelse(dibh);
1649 
1650 			gfs2_trans_end(sdp);
1651 		}
1652 
1653 		/* If someone else has asked for the glock,
1654 		   unlock and let them have it. Then reacquire
1655 		   in the original state. */
1656 		if (gfs2_glock_is_blocking(gl)) {
1657 			gfs2_glock_dq(gh);
1658 			gfs2_holder_reinit(state, flags, gh);
1659 			return gfs2_glock_nq(gh);
1660 		}
1661 	}
1662 
1663 	return 0;
1664 
1665  fail_end_trans:
1666 	gfs2_trans_end(sdp);
1667 
1668  fail:
1669 	gfs2_glock_dq(gh);
1670 
1671 	return error;
1672 }
1673 
1674 /**
1675  * glock_compare_atime - Compare two struct gfs2_glock structures for sort
1676  * @arg_a: the first structure
1677  * @arg_b: the second structure
1678  *
1679  * Returns: 1 if A > B
1680  *         -1 if A < B
1681  *          0 if A = B
1682  */
1683 
1684 static int glock_compare_atime(const void *arg_a, const void *arg_b)
1685 {
1686 	struct gfs2_holder *gh_a = *(struct gfs2_holder **)arg_a;
1687 	struct gfs2_holder *gh_b = *(struct gfs2_holder **)arg_b;
1688 	struct lm_lockname *a = &gh_a->gh_gl->gl_name;
1689 	struct lm_lockname *b = &gh_b->gh_gl->gl_name;
1690 	int ret = 0;
1691 
1692 	if (a->ln_number > b->ln_number)
1693 		ret = 1;
1694 	else if (a->ln_number < b->ln_number)
1695 		ret = -1;
1696 	else {
1697 		if (gh_a->gh_state == LM_ST_SHARED &&
1698 		    gh_b->gh_state == LM_ST_EXCLUSIVE)
1699 			ret = 1;
1700 		else if (gh_a->gh_state == LM_ST_SHARED &&
1701 			 (gh_b->gh_flags & GL_ATIME))
1702 			ret = 1;
1703 	}
1704 
1705 	return ret;
1706 }
1707 
1708 /**
1709  * gfs2_glock_nq_m_atime - acquire multiple glocks where one may need an
1710  *      atime update
1711  * @num_gh: the number of structures
1712  * @ghs: an array of struct gfs2_holder structures
1713  *
1714  * Returns: 0 on success (all glocks acquired),
1715  *          errno on failure (no glocks acquired)
1716  */
1717 
1718 int gfs2_glock_nq_m_atime(unsigned int num_gh, struct gfs2_holder *ghs)
1719 {
1720 	struct gfs2_holder **p;
1721 	unsigned int x;
1722 	int error = 0;
1723 
1724 	if (!num_gh)
1725 		return 0;
1726 
1727 	if (num_gh == 1) {
1728 		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1729 		if (ghs->gh_flags & GL_ATIME)
1730 			error = gfs2_glock_nq_atime(ghs);
1731 		else
1732 			error = gfs2_glock_nq(ghs);
1733 		return error;
1734 	}
1735 
1736 	p = kcalloc(num_gh, sizeof(struct gfs2_holder *), GFP_KERNEL);
1737 	if (!p)
1738 		return -ENOMEM;
1739 
1740 	for (x = 0; x < num_gh; x++)
1741 		p[x] = &ghs[x];
1742 
1743 	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare_atime,NULL);
1744 
1745 	for (x = 0; x < num_gh; x++) {
1746 		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
1747 
1748 		if (p[x]->gh_flags & GL_ATIME)
1749 			error = gfs2_glock_nq_atime(p[x]);
1750 		else
1751 			error = gfs2_glock_nq(p[x]);
1752 
1753 		if (error) {
1754 			while (x--)
1755 				gfs2_glock_dq(p[x]);
1756 			break;
1757 		}
1758 	}
1759 
1760 	kfree(p);
1761 
1762 	return error;
1763 }
1764 
1765 /**
1766  * gfs2_try_toss_vnode - See if we can toss a vnode from memory
1767  * @ip: the inode
1768  *
1769  * Returns:  1 if the vnode was tossed
1770  */
1771 
1772 void gfs2_try_toss_vnode(struct gfs2_inode *ip)
1773 {
1774 	struct inode *inode;
1775 
1776 	inode = gfs2_ip2v_lookup(ip);
1777 	if (!inode)
1778 		return;
1779 
1780 	d_prune_aliases(inode);
1781 
1782 	if (S_ISDIR(ip->i_di.di_mode)) {
1783 		struct list_head *head = &inode->i_dentry;
1784 		struct dentry *d = NULL;
1785 
1786 		spin_lock(&dcache_lock);
1787 		if (list_empty(head))
1788 			spin_unlock(&dcache_lock);
1789 		else {
1790 			d = list_entry(head->next, struct dentry, d_alias);
1791 			dget_locked(d);
1792 			spin_unlock(&dcache_lock);
1793 
1794 			if (have_submounts(d))
1795 				dput(d);
1796 			else {
1797 				shrink_dcache_parent(d);
1798 				dput(d);
1799 				d_prune_aliases(inode);
1800 			}
1801 		}
1802 	}
1803 
1804 	inode->i_nlink = 0;
1805 	iput(inode);
1806 }
1807 
1808 
1809 static int
1810 __gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1811 {
1812 	struct buffer_head *dibh;
1813 	int error;
1814 
1815 	error = gfs2_meta_inode_buffer(ip, &dibh);
1816 	if (!error) {
1817 		error = inode_setattr(ip->i_vnode, attr);
1818 		gfs2_assert_warn(ip->i_sbd, !error);
1819 		gfs2_inode_attr_out(ip);
1820 
1821 		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1822 		gfs2_dinode_out(&ip->i_di, dibh->b_data);
1823 		brelse(dibh);
1824 	}
1825 	return error;
1826 }
1827 
1828 /**
1829  * gfs2_setattr_simple -
1830  * @ip:
1831  * @attr:
1832  *
1833  * Called with a reference on the vnode.
1834  *
1835  * Returns: errno
1836  */
1837 
1838 int gfs2_setattr_simple(struct gfs2_inode *ip, struct iattr *attr)
1839 {
1840 	int error;
1841 
1842 	if (current->journal_info)
1843 		return __gfs2_setattr_simple(ip, attr);
1844 
1845 	error = gfs2_trans_begin(ip->i_sbd, RES_DINODE, 0);
1846 	if (error)
1847 		return error;
1848 
1849 	error = __gfs2_setattr_simple(ip, attr);
1850 
1851 	gfs2_trans_end(ip->i_sbd);
1852 
1853 	return error;
1854 }
1855 
1856 int gfs2_repermission(struct inode *inode, int mask, struct nameidata *nd)
1857 {
1858 	return permission(inode, mask, nd);
1859 }
1860 
1861