xref: /openbmc/linux/fs/jfs/jfs_imap.c (revision 34d6f206a88c2651d216bd3487ac956a40b2ba8e)
1  // SPDX-License-Identifier: GPL-2.0-or-later
2  /*
3   *   Copyright (C) International Business Machines Corp., 2000-2004
4   */
5  
6  /*
7   *	jfs_imap.c: inode allocation map manager
8   *
9   * Serialization:
10   *   Each AG has a simple lock which is used to control the serialization of
11   *	the AG level lists.  This lock should be taken first whenever an AG
12   *	level list will be modified or accessed.
13   *
14   *   Each IAG is locked by obtaining the buffer for the IAG page.
15   *
16   *   There is also a inode lock for the inode map inode.  A read lock needs to
17   *	be taken whenever an IAG is read from the map or the global level
18   *	information is read.  A write lock needs to be taken whenever the global
19   *	level information is modified or an atomic operation needs to be used.
20   *
21   *	If more than one IAG is read at one time, the read lock may not
22   *	be given up until all of the IAG's are read.  Otherwise, a deadlock
23   *	may occur when trying to obtain the read lock while another thread
24   *	holding the read lock is waiting on the IAG already being held.
25   *
26   *   The control page of the inode map is read into memory by diMount().
27   *	Thereafter it should only be modified in memory and then it will be
28   *	written out when the filesystem is unmounted by diUnmount().
29   */
30  
31  #include <linux/fs.h>
32  #include <linux/buffer_head.h>
33  #include <linux/pagemap.h>
34  #include <linux/quotaops.h>
35  #include <linux/slab.h>
36  
37  #include "jfs_incore.h"
38  #include "jfs_inode.h"
39  #include "jfs_filsys.h"
40  #include "jfs_dinode.h"
41  #include "jfs_dmap.h"
42  #include "jfs_imap.h"
43  #include "jfs_metapage.h"
44  #include "jfs_superblock.h"
45  #include "jfs_debug.h"
46  
47  /*
48   * imap locks
49   */
50  /* iag free list lock */
51  #define IAGFREE_LOCK_INIT(imap)		mutex_init(&imap->im_freelock)
52  #define IAGFREE_LOCK(imap)		mutex_lock(&imap->im_freelock)
53  #define IAGFREE_UNLOCK(imap)		mutex_unlock(&imap->im_freelock)
54  
55  /* per ag iag list locks */
56  #define AG_LOCK_INIT(imap,index)	mutex_init(&(imap->im_aglock[index]))
57  #define AG_LOCK(imap,agno)		mutex_lock(&imap->im_aglock[agno])
58  #define AG_UNLOCK(imap,agno)		mutex_unlock(&imap->im_aglock[agno])
59  
60  /*
61   * forward references
62   */
63  static int diAllocAG(struct inomap *, int, bool, struct inode *);
64  static int diAllocAny(struct inomap *, int, bool, struct inode *);
65  static int diAllocBit(struct inomap *, struct iag *, int);
66  static int diAllocExt(struct inomap *, int, struct inode *);
67  static int diAllocIno(struct inomap *, int, struct inode *);
68  static int diFindFree(u32, int);
69  static int diNewExt(struct inomap *, struct iag *, int);
70  static int diNewIAG(struct inomap *, int *, int, struct metapage **);
71  static void duplicateIXtree(struct super_block *, s64, int, s64 *);
72  
73  static int diIAGRead(struct inomap * imap, int, struct metapage **);
74  static int copy_from_dinode(struct dinode *, struct inode *);
75  static void copy_to_dinode(struct dinode *, struct inode *);
76  
77  /*
78   * NAME:	diMount()
79   *
80   * FUNCTION:	initialize the incore inode map control structures for
81   *		a fileset or aggregate init time.
82   *
83   *		the inode map's control structure (dinomap) is
84   *		brought in from disk and placed in virtual memory.
85   *
86   * PARAMETERS:
87   *	ipimap	- pointer to inode map inode for the aggregate or fileset.
88   *
89   * RETURN VALUES:
90   *	0	- success
91   *	-ENOMEM	- insufficient free virtual memory.
92   *	-EIO	- i/o error.
93   */
diMount(struct inode * ipimap)94  int diMount(struct inode *ipimap)
95  {
96  	struct inomap *imap;
97  	struct metapage *mp;
98  	int index;
99  	struct dinomap_disk *dinom_le;
100  
101  	/*
102  	 * allocate/initialize the in-memory inode map control structure
103  	 */
104  	/* allocate the in-memory inode map control structure. */
105  	imap = kmalloc(sizeof(struct inomap), GFP_KERNEL);
106  	if (imap == NULL)
107  		return -ENOMEM;
108  
109  	/* read the on-disk inode map control structure. */
110  
111  	mp = read_metapage(ipimap,
112  			   IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
113  			   PSIZE, 0);
114  	if (mp == NULL) {
115  		kfree(imap);
116  		return -EIO;
117  	}
118  
119  	/* copy the on-disk version to the in-memory version. */
120  	dinom_le = (struct dinomap_disk *) mp->data;
121  	imap->im_freeiag = le32_to_cpu(dinom_le->in_freeiag);
122  	imap->im_nextiag = le32_to_cpu(dinom_le->in_nextiag);
123  	atomic_set(&imap->im_numinos, le32_to_cpu(dinom_le->in_numinos));
124  	atomic_set(&imap->im_numfree, le32_to_cpu(dinom_le->in_numfree));
125  	imap->im_nbperiext = le32_to_cpu(dinom_le->in_nbperiext);
126  	imap->im_l2nbperiext = le32_to_cpu(dinom_le->in_l2nbperiext);
127  	for (index = 0; index < MAXAG; index++) {
128  		imap->im_agctl[index].inofree =
129  		    le32_to_cpu(dinom_le->in_agctl[index].inofree);
130  		imap->im_agctl[index].extfree =
131  		    le32_to_cpu(dinom_le->in_agctl[index].extfree);
132  		imap->im_agctl[index].numinos =
133  		    le32_to_cpu(dinom_le->in_agctl[index].numinos);
134  		imap->im_agctl[index].numfree =
135  		    le32_to_cpu(dinom_le->in_agctl[index].numfree);
136  	}
137  
138  	/* release the buffer. */
139  	release_metapage(mp);
140  
141  	/*
142  	 * allocate/initialize inode allocation map locks
143  	 */
144  	/* allocate and init iag free list lock */
145  	IAGFREE_LOCK_INIT(imap);
146  
147  	/* allocate and init ag list locks */
148  	for (index = 0; index < MAXAG; index++) {
149  		AG_LOCK_INIT(imap, index);
150  	}
151  
152  	/* bind the inode map inode and inode map control structure
153  	 * to each other.
154  	 */
155  	imap->im_ipimap = ipimap;
156  	JFS_IP(ipimap)->i_imap = imap;
157  
158  	return (0);
159  }
160  
161  
162  /*
163   * NAME:	diUnmount()
164   *
165   * FUNCTION:	write to disk the incore inode map control structures for
166   *		a fileset or aggregate at unmount time.
167   *
168   * PARAMETERS:
169   *	ipimap	- pointer to inode map inode for the aggregate or fileset.
170   *
171   * RETURN VALUES:
172   *	0	- success
173   *	-ENOMEM	- insufficient free virtual memory.
174   *	-EIO	- i/o error.
175   */
diUnmount(struct inode * ipimap,int mounterror)176  int diUnmount(struct inode *ipimap, int mounterror)
177  {
178  	struct inomap *imap = JFS_IP(ipimap)->i_imap;
179  
180  	/*
181  	 * update the on-disk inode map control structure
182  	 */
183  
184  	if (!(mounterror || isReadOnly(ipimap)))
185  		diSync(ipimap);
186  
187  	/*
188  	 * Invalidate the page cache buffers
189  	 */
190  	truncate_inode_pages(ipimap->i_mapping, 0);
191  
192  	/*
193  	 * free in-memory control structure
194  	 */
195  	kfree(imap);
196  	JFS_IP(ipimap)->i_imap = NULL;
197  
198  	return (0);
199  }
200  
201  
202  /*
203   *	diSync()
204   */
diSync(struct inode * ipimap)205  int diSync(struct inode *ipimap)
206  {
207  	struct dinomap_disk *dinom_le;
208  	struct inomap *imp = JFS_IP(ipimap)->i_imap;
209  	struct metapage *mp;
210  	int index;
211  
212  	/*
213  	 * write imap global conrol page
214  	 */
215  	/* read the on-disk inode map control structure */
216  	mp = get_metapage(ipimap,
217  			  IMAPBLKNO << JFS_SBI(ipimap->i_sb)->l2nbperpage,
218  			  PSIZE, 0);
219  	if (mp == NULL) {
220  		jfs_err("diSync: get_metapage failed!");
221  		return -EIO;
222  	}
223  
224  	/* copy the in-memory version to the on-disk version */
225  	dinom_le = (struct dinomap_disk *) mp->data;
226  	dinom_le->in_freeiag = cpu_to_le32(imp->im_freeiag);
227  	dinom_le->in_nextiag = cpu_to_le32(imp->im_nextiag);
228  	dinom_le->in_numinos = cpu_to_le32(atomic_read(&imp->im_numinos));
229  	dinom_le->in_numfree = cpu_to_le32(atomic_read(&imp->im_numfree));
230  	dinom_le->in_nbperiext = cpu_to_le32(imp->im_nbperiext);
231  	dinom_le->in_l2nbperiext = cpu_to_le32(imp->im_l2nbperiext);
232  	for (index = 0; index < MAXAG; index++) {
233  		dinom_le->in_agctl[index].inofree =
234  		    cpu_to_le32(imp->im_agctl[index].inofree);
235  		dinom_le->in_agctl[index].extfree =
236  		    cpu_to_le32(imp->im_agctl[index].extfree);
237  		dinom_le->in_agctl[index].numinos =
238  		    cpu_to_le32(imp->im_agctl[index].numinos);
239  		dinom_le->in_agctl[index].numfree =
240  		    cpu_to_le32(imp->im_agctl[index].numfree);
241  	}
242  
243  	/* write out the control structure */
244  	write_metapage(mp);
245  
246  	/*
247  	 * write out dirty pages of imap
248  	 */
249  	filemap_write_and_wait(ipimap->i_mapping);
250  
251  	diWriteSpecial(ipimap, 0);
252  
253  	return (0);
254  }
255  
256  
257  /*
258   * NAME:	diRead()
259   *
260   * FUNCTION:	initialize an incore inode from disk.
261   *
262   *		on entry, the specifed incore inode should itself
263   *		specify the disk inode number corresponding to the
264   *		incore inode (i.e. i_number should be initialized).
265   *
266   *		this routine handles incore inode initialization for
267   *		both "special" and "regular" inodes.  special inodes
268   *		are those required early in the mount process and
269   *		require special handling since much of the file system
270   *		is not yet initialized.  these "special" inodes are
271   *		identified by a NULL inode map inode pointer and are
272   *		actually initialized by a call to diReadSpecial().
273   *
274   *		for regular inodes, the iag describing the disk inode
275   *		is read from disk to determine the inode extent address
276   *		for the disk inode.  with the inode extent address in
277   *		hand, the page of the extent that contains the disk
278   *		inode is read and the disk inode is copied to the
279   *		incore inode.
280   *
281   * PARAMETERS:
282   *	ip	-  pointer to incore inode to be initialized from disk.
283   *
284   * RETURN VALUES:
285   *	0	- success
286   *	-EIO	- i/o error.
287   *	-ENOMEM	- insufficient memory
288   *
289   */
diRead(struct inode * ip)290  int diRead(struct inode *ip)
291  {
292  	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
293  	int iagno, ino, extno, rc, agno;
294  	struct inode *ipimap;
295  	struct dinode *dp;
296  	struct iag *iagp;
297  	struct metapage *mp;
298  	s64 blkno, agstart;
299  	struct inomap *imap;
300  	int block_offset;
301  	int inodes_left;
302  	unsigned long pageno;
303  	int rel_inode;
304  
305  	jfs_info("diRead: ino = %ld", ip->i_ino);
306  
307  	ipimap = sbi->ipimap;
308  	JFS_IP(ip)->ipimap = ipimap;
309  
310  	/* determine the iag number for this inode (number) */
311  	iagno = INOTOIAG(ip->i_ino);
312  
313  	/* read the iag */
314  	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
315  	imap = JFS_IP(ipimap)->i_imap;
316  	rc = diIAGRead(imap, iagno, &mp);
317  	IREAD_UNLOCK(ipimap);
318  	if (rc) {
319  		jfs_err("diRead: diIAGRead returned %d", rc);
320  		return (rc);
321  	}
322  
323  	iagp = (struct iag *) mp->data;
324  
325  	/* determine inode extent that holds the disk inode */
326  	ino = ip->i_ino & (INOSPERIAG - 1);
327  	extno = ino >> L2INOSPEREXT;
328  
329  	if ((lengthPXD(&iagp->inoext[extno]) != imap->im_nbperiext) ||
330  	    (addressPXD(&iagp->inoext[extno]) == 0)) {
331  		release_metapage(mp);
332  		return -ESTALE;
333  	}
334  
335  	/* get disk block number of the page within the inode extent
336  	 * that holds the disk inode.
337  	 */
338  	blkno = INOPBLK(&iagp->inoext[extno], ino, sbi->l2nbperpage);
339  
340  	/* get the ag for the iag */
341  	agstart = le64_to_cpu(iagp->agstart);
342  	agno = BLKTOAG(agstart, JFS_SBI(ip->i_sb));
343  
344  	release_metapage(mp);
345  	if (agno >= MAXAG || agno < 0)
346  		return -EIO;
347  
348  	rel_inode = (ino & (INOSPERPAGE - 1));
349  	pageno = blkno >> sbi->l2nbperpage;
350  
351  	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
352  		/*
353  		 * OS/2 didn't always align inode extents on page boundaries
354  		 */
355  		inodes_left =
356  		     (sbi->nbperpage - block_offset) << sbi->l2niperblk;
357  
358  		if (rel_inode < inodes_left)
359  			rel_inode += block_offset << sbi->l2niperblk;
360  		else {
361  			pageno += 1;
362  			rel_inode -= inodes_left;
363  		}
364  	}
365  
366  	/* read the page of disk inode */
367  	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
368  	if (!mp) {
369  		jfs_err("diRead: read_metapage failed");
370  		return -EIO;
371  	}
372  
373  	/* locate the disk inode requested */
374  	dp = (struct dinode *) mp->data;
375  	dp += rel_inode;
376  
377  	if (ip->i_ino != le32_to_cpu(dp->di_number)) {
378  		jfs_error(ip->i_sb, "i_ino != di_number\n");
379  		rc = -EIO;
380  	} else if (le32_to_cpu(dp->di_nlink) == 0)
381  		rc = -ESTALE;
382  	else
383  		/* copy the disk inode to the in-memory inode */
384  		rc = copy_from_dinode(dp, ip);
385  
386  	release_metapage(mp);
387  
388  	/* set the ag for the inode */
389  	JFS_IP(ip)->agstart = agstart;
390  	JFS_IP(ip)->active_ag = -1;
391  
392  	return (rc);
393  }
394  
395  
396  /*
397   * NAME:	diReadSpecial()
398   *
399   * FUNCTION:	initialize a 'special' inode from disk.
400   *
401   *		this routines handles aggregate level inodes.  The
402   *		inode cache cannot differentiate between the
403   *		aggregate inodes and the filesystem inodes, so we
404   *		handle these here.  We don't actually use the aggregate
405   *		inode map, since these inodes are at a fixed location
406   *		and in some cases the aggregate inode map isn't initialized
407   *		yet.
408   *
409   * PARAMETERS:
410   *	sb - filesystem superblock
411   *	inum - aggregate inode number
412   *	secondary - 1 if secondary aggregate inode table
413   *
414   * RETURN VALUES:
415   *	new inode	- success
416   *	NULL		- i/o error.
417   */
diReadSpecial(struct super_block * sb,ino_t inum,int secondary)418  struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
419  {
420  	struct jfs_sb_info *sbi = JFS_SBI(sb);
421  	uint address;
422  	struct dinode *dp;
423  	struct inode *ip;
424  	struct metapage *mp;
425  
426  	ip = new_inode(sb);
427  	if (ip == NULL) {
428  		jfs_err("diReadSpecial: new_inode returned NULL!");
429  		return ip;
430  	}
431  
432  	if (secondary) {
433  		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
434  		JFS_IP(ip)->ipimap = sbi->ipaimap2;
435  	} else {
436  		address = AITBL_OFF >> L2PSIZE;
437  		JFS_IP(ip)->ipimap = sbi->ipaimap;
438  	}
439  
440  	ASSERT(inum < INOSPEREXT);
441  
442  	ip->i_ino = inum;
443  
444  	address += inum >> 3;	/* 8 inodes per 4K page */
445  
446  	/* read the page of fixed disk inode (AIT) in raw mode */
447  	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
448  	if (mp == NULL) {
449  		set_nlink(ip, 1);	/* Don't want iput() deleting it */
450  		iput(ip);
451  		return (NULL);
452  	}
453  
454  	/* get the pointer to the disk inode of interest */
455  	dp = (struct dinode *) (mp->data);
456  	dp += inum % 8;		/* 8 inodes per 4K page */
457  
458  	/* copy on-disk inode to in-memory inode */
459  	if ((copy_from_dinode(dp, ip)) != 0) {
460  		/* handle bad return by returning NULL for ip */
461  		set_nlink(ip, 1);	/* Don't want iput() deleting it */
462  		iput(ip);
463  		/* release the page */
464  		release_metapage(mp);
465  		return (NULL);
466  
467  	}
468  
469  	ip->i_mapping->a_ops = &jfs_metapage_aops;
470  	mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
471  
472  	/* Allocations to metadata inodes should not affect quotas */
473  	ip->i_flags |= S_NOQUOTA;
474  
475  	if ((inum == FILESYSTEM_I) && (JFS_IP(ip)->ipimap == sbi->ipaimap)) {
476  		sbi->gengen = le32_to_cpu(dp->di_gengen);
477  		sbi->inostamp = le32_to_cpu(dp->di_inostamp);
478  	}
479  
480  	/* release the page */
481  	release_metapage(mp);
482  
483  	inode_fake_hash(ip);
484  
485  	return (ip);
486  }
487  
488  /*
489   * NAME:	diWriteSpecial()
490   *
491   * FUNCTION:	Write the special inode to disk
492   *
493   * PARAMETERS:
494   *	ip - special inode
495   *	secondary - 1 if secondary aggregate inode table
496   *
497   * RETURN VALUES: none
498   */
499  
diWriteSpecial(struct inode * ip,int secondary)500  void diWriteSpecial(struct inode *ip, int secondary)
501  {
502  	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
503  	uint address;
504  	struct dinode *dp;
505  	ino_t inum = ip->i_ino;
506  	struct metapage *mp;
507  
508  	if (secondary)
509  		address = addressPXD(&sbi->ait2) >> sbi->l2nbperpage;
510  	else
511  		address = AITBL_OFF >> L2PSIZE;
512  
513  	ASSERT(inum < INOSPEREXT);
514  
515  	address += inum >> 3;	/* 8 inodes per 4K page */
516  
517  	/* read the page of fixed disk inode (AIT) in raw mode */
518  	mp = read_metapage(ip, address << sbi->l2nbperpage, PSIZE, 1);
519  	if (mp == NULL) {
520  		jfs_err("diWriteSpecial: failed to read aggregate inode extent!");
521  		return;
522  	}
523  
524  	/* get the pointer to the disk inode of interest */
525  	dp = (struct dinode *) (mp->data);
526  	dp += inum % 8;		/* 8 inodes per 4K page */
527  
528  	/* copy on-disk inode to in-memory inode */
529  	copy_to_dinode(dp, ip);
530  	memcpy(&dp->di_xtroot, &JFS_IP(ip)->i_xtroot, 288);
531  
532  	if (inum == FILESYSTEM_I)
533  		dp->di_gengen = cpu_to_le32(sbi->gengen);
534  
535  	/* write the page */
536  	write_metapage(mp);
537  }
538  
539  /*
540   * NAME:	diFreeSpecial()
541   *
542   * FUNCTION:	Free allocated space for special inode
543   */
diFreeSpecial(struct inode * ip)544  void diFreeSpecial(struct inode *ip)
545  {
546  	if (ip == NULL) {
547  		jfs_err("diFreeSpecial called with NULL ip!");
548  		return;
549  	}
550  	filemap_write_and_wait(ip->i_mapping);
551  	truncate_inode_pages(ip->i_mapping, 0);
552  	iput(ip);
553  }
554  
555  
556  
557  /*
558   * NAME:	diWrite()
559   *
560   * FUNCTION:	write the on-disk inode portion of the in-memory inode
561   *		to its corresponding on-disk inode.
562   *
563   *		on entry, the specifed incore inode should itself
564   *		specify the disk inode number corresponding to the
565   *		incore inode (i.e. i_number should be initialized).
566   *
567   *		the inode contains the inode extent address for the disk
568   *		inode.  with the inode extent address in hand, the
569   *		page of the extent that contains the disk inode is
570   *		read and the disk inode portion of the incore inode
571   *		is copied to the disk inode.
572   *
573   * PARAMETERS:
574   *	tid -  transacation id
575   *	ip  -  pointer to incore inode to be written to the inode extent.
576   *
577   * RETURN VALUES:
578   *	0	- success
579   *	-EIO	- i/o error.
580   */
diWrite(tid_t tid,struct inode * ip)581  int diWrite(tid_t tid, struct inode *ip)
582  {
583  	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
584  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
585  	int rc = 0;
586  	s32 ino;
587  	struct dinode *dp;
588  	s64 blkno;
589  	int block_offset;
590  	int inodes_left;
591  	struct metapage *mp;
592  	unsigned long pageno;
593  	int rel_inode;
594  	int dioffset;
595  	struct inode *ipimap;
596  	uint type;
597  	lid_t lid;
598  	struct tlock *ditlck, *tlck;
599  	struct linelock *dilinelock, *ilinelock;
600  	struct lv *lv;
601  	int n;
602  
603  	ipimap = jfs_ip->ipimap;
604  
605  	ino = ip->i_ino & (INOSPERIAG - 1);
606  
607  	if (!addressPXD(&(jfs_ip->ixpxd)) ||
608  	    (lengthPXD(&(jfs_ip->ixpxd)) !=
609  	     JFS_IP(ipimap)->i_imap->im_nbperiext)) {
610  		jfs_error(ip->i_sb, "ixpxd invalid\n");
611  		return -EIO;
612  	}
613  
614  	/*
615  	 * read the page of disk inode containing the specified inode:
616  	 */
617  	/* compute the block address of the page */
618  	blkno = INOPBLK(&(jfs_ip->ixpxd), ino, sbi->l2nbperpage);
619  
620  	rel_inode = (ino & (INOSPERPAGE - 1));
621  	pageno = blkno >> sbi->l2nbperpage;
622  
623  	if ((block_offset = ((u32) blkno & (sbi->nbperpage - 1)))) {
624  		/*
625  		 * OS/2 didn't always align inode extents on page boundaries
626  		 */
627  		inodes_left =
628  		    (sbi->nbperpage - block_offset) << sbi->l2niperblk;
629  
630  		if (rel_inode < inodes_left)
631  			rel_inode += block_offset << sbi->l2niperblk;
632  		else {
633  			pageno += 1;
634  			rel_inode -= inodes_left;
635  		}
636  	}
637  	/* read the page of disk inode */
638        retry:
639  	mp = read_metapage(ipimap, pageno << sbi->l2nbperpage, PSIZE, 1);
640  	if (!mp)
641  		return -EIO;
642  
643  	/* get the pointer to the disk inode */
644  	dp = (struct dinode *) mp->data;
645  	dp += rel_inode;
646  
647  	dioffset = (ino & (INOSPERPAGE - 1)) << L2DISIZE;
648  
649  	/*
650  	 * acquire transaction lock on the on-disk inode;
651  	 * N.B. tlock is acquired on ipimap not ip;
652  	 */
653  	if ((ditlck =
654  	     txLock(tid, ipimap, mp, tlckINODE | tlckENTRY)) == NULL)
655  		goto retry;
656  	dilinelock = (struct linelock *) & ditlck->lock;
657  
658  	/*
659  	 * copy btree root from in-memory inode to on-disk inode
660  	 *
661  	 * (tlock is taken from inline B+-tree root in in-memory
662  	 * inode when the B+-tree root is updated, which is pointed
663  	 * by jfs_ip->blid as well as being on tx tlock list)
664  	 *
665  	 * further processing of btree root is based on the copy
666  	 * in in-memory inode, where txLog() will log from, and,
667  	 * for xtree root, txUpdateMap() will update map and reset
668  	 * XAD_NEW bit;
669  	 */
670  
671  	if (S_ISDIR(ip->i_mode) && (lid = jfs_ip->xtlid)) {
672  		/*
673  		 * This is the special xtree inside the directory for storing
674  		 * the directory table
675  		 */
676  		xtroot_t *p, *xp;
677  		xad_t *xad;
678  
679  		jfs_ip->xtlid = 0;
680  		tlck = lid_to_tlock(lid);
681  		assert(tlck->type & tlckXTREE);
682  		tlck->type |= tlckBTROOT;
683  		tlck->mp = mp;
684  		ilinelock = (struct linelock *) & tlck->lock;
685  
686  		/*
687  		 * copy xtree root from inode to dinode:
688  		 */
689  		p = &jfs_ip->i_xtroot;
690  		xp = (xtroot_t *) &dp->di_dirtable;
691  		lv = ilinelock->lv;
692  		for (n = 0; n < ilinelock->index; n++, lv++) {
693  			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
694  			       lv->length << L2XTSLOTSIZE);
695  		}
696  
697  		/* reset on-disk (metadata page) xtree XAD_NEW bit */
698  		xad = &xp->xad[XTENTRYSTART];
699  		for (n = XTENTRYSTART;
700  		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
701  			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
702  				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
703  	}
704  
705  	if ((lid = jfs_ip->blid) == 0)
706  		goto inlineData;
707  	jfs_ip->blid = 0;
708  
709  	tlck = lid_to_tlock(lid);
710  	type = tlck->type;
711  	tlck->type |= tlckBTROOT;
712  	tlck->mp = mp;
713  	ilinelock = (struct linelock *) & tlck->lock;
714  
715  	/*
716  	 *	regular file: 16 byte (XAD slot) granularity
717  	 */
718  	if (type & tlckXTREE) {
719  		xtroot_t *p, *xp;
720  		xad_t *xad;
721  
722  		/*
723  		 * copy xtree root from inode to dinode:
724  		 */
725  		p = &jfs_ip->i_xtroot;
726  		xp = &dp->di_xtroot;
727  		lv = ilinelock->lv;
728  		for (n = 0; n < ilinelock->index; n++, lv++) {
729  			memcpy(&xp->xad[lv->offset], &p->xad[lv->offset],
730  			       lv->length << L2XTSLOTSIZE);
731  		}
732  
733  		/* reset on-disk (metadata page) xtree XAD_NEW bit */
734  		xad = &xp->xad[XTENTRYSTART];
735  		for (n = XTENTRYSTART;
736  		     n < le16_to_cpu(xp->header.nextindex); n++, xad++)
737  			if (xad->flag & (XAD_NEW | XAD_EXTENDED))
738  				xad->flag &= ~(XAD_NEW | XAD_EXTENDED);
739  	}
740  	/*
741  	 *	directory: 32 byte (directory entry slot) granularity
742  	 */
743  	else if (type & tlckDTREE) {
744  		dtpage_t *p, *xp;
745  
746  		/*
747  		 * copy dtree root from inode to dinode:
748  		 */
749  		p = (dtpage_t *) &jfs_ip->i_dtroot;
750  		xp = (dtpage_t *) & dp->di_dtroot;
751  		lv = ilinelock->lv;
752  		for (n = 0; n < ilinelock->index; n++, lv++) {
753  			memcpy(&xp->slot[lv->offset], &p->slot[lv->offset],
754  			       lv->length << L2DTSLOTSIZE);
755  		}
756  	} else {
757  		jfs_err("diWrite: UFO tlock");
758  	}
759  
760        inlineData:
761  	/*
762  	 * copy inline symlink from in-memory inode to on-disk inode
763  	 */
764  	if (S_ISLNK(ip->i_mode) && ip->i_size < IDATASIZE) {
765  		lv = & dilinelock->lv[dilinelock->index];
766  		lv->offset = (dioffset + 2 * 128) >> L2INODESLOTSIZE;
767  		lv->length = 2;
768  		memcpy(&dp->di_inline_all, jfs_ip->i_inline_all, IDATASIZE);
769  		dilinelock->index++;
770  	}
771  	/*
772  	 * copy inline data from in-memory inode to on-disk inode:
773  	 * 128 byte slot granularity
774  	 */
775  	if (test_cflag(COMMIT_Inlineea, ip)) {
776  		lv = & dilinelock->lv[dilinelock->index];
777  		lv->offset = (dioffset + 3 * 128) >> L2INODESLOTSIZE;
778  		lv->length = 1;
779  		memcpy(&dp->di_inlineea, jfs_ip->i_inline_ea, INODESLOTSIZE);
780  		dilinelock->index++;
781  
782  		clear_cflag(COMMIT_Inlineea, ip);
783  	}
784  
785  	/*
786  	 *	lock/copy inode base: 128 byte slot granularity
787  	 */
788  	lv = & dilinelock->lv[dilinelock->index];
789  	lv->offset = dioffset >> L2INODESLOTSIZE;
790  	copy_to_dinode(dp, ip);
791  	if (test_and_clear_cflag(COMMIT_Dirtable, ip)) {
792  		lv->length = 2;
793  		memcpy(&dp->di_dirtable, &jfs_ip->i_dirtable, 96);
794  	} else
795  		lv->length = 1;
796  	dilinelock->index++;
797  
798  	/* release the buffer holding the updated on-disk inode.
799  	 * the buffer will be later written by commit processing.
800  	 */
801  	write_metapage(mp);
802  
803  	return (rc);
804  }
805  
806  
807  /*
808   * NAME:	diFree(ip)
809   *
810   * FUNCTION:	free a specified inode from the inode working map
811   *		for a fileset or aggregate.
812   *
813   *		if the inode to be freed represents the first (only)
814   *		free inode within the iag, the iag will be placed on
815   *		the ag free inode list.
816   *
817   *		freeing the inode will cause the inode extent to be
818   *		freed if the inode is the only allocated inode within
819   *		the extent.  in this case all the disk resource backing
820   *		up the inode extent will be freed. in addition, the iag
821   *		will be placed on the ag extent free list if the extent
822   *		is the first free extent in the iag.  if freeing the
823   *		extent also means that no free inodes will exist for
824   *		the iag, the iag will also be removed from the ag free
825   *		inode list.
826   *
827   *		the iag describing the inode will be freed if the extent
828   *		is to be freed and it is the only backed extent within
829   *		the iag.  in this case, the iag will be removed from the
830   *		ag free extent list and ag free inode list and placed on
831   *		the inode map's free iag list.
832   *
833   *		a careful update approach is used to provide consistency
834   *		in the face of updates to multiple buffers.  under this
835   *		approach, all required buffers are obtained before making
836   *		any updates and are held until all updates are complete.
837   *
838   * PARAMETERS:
839   *	ip	- inode to be freed.
840   *
841   * RETURN VALUES:
842   *	0	- success
843   *	-EIO	- i/o error.
844   */
diFree(struct inode * ip)845  int diFree(struct inode *ip)
846  {
847  	int rc;
848  	ino_t inum = ip->i_ino;
849  	struct iag *iagp, *aiagp, *biagp, *ciagp, *diagp;
850  	struct metapage *mp, *amp, *bmp, *cmp, *dmp;
851  	int iagno, ino, extno, bitno, sword, agno;
852  	int back, fwd;
853  	u32 bitmap, mask;
854  	struct inode *ipimap = JFS_SBI(ip->i_sb)->ipimap;
855  	struct inomap *imap = JFS_IP(ipimap)->i_imap;
856  	pxd_t freepxd;
857  	tid_t tid;
858  	struct inode *iplist[3];
859  	struct tlock *tlck;
860  	struct pxd_lock *pxdlock;
861  
862  	/*
863  	 * This is just to suppress compiler warnings.  The same logic that
864  	 * references these variables is used to initialize them.
865  	 */
866  	aiagp = biagp = ciagp = diagp = NULL;
867  
868  	/* get the iag number containing the inode.
869  	 */
870  	iagno = INOTOIAG(inum);
871  
872  	/* make sure that the iag is contained within
873  	 * the map.
874  	 */
875  	if (iagno >= imap->im_nextiag) {
876  		print_hex_dump(KERN_ERR, "imap: ", DUMP_PREFIX_ADDRESS, 16, 4,
877  			       imap, 32, 0);
878  		jfs_error(ip->i_sb, "inum = %d, iagno = %d, nextiag = %d\n",
879  			  (uint) inum, iagno, imap->im_nextiag);
880  		return -EIO;
881  	}
882  
883  	/* get the allocation group for this ino.
884  	 */
885  	agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
886  
887  	/* Lock the AG specific inode map information
888  	 */
889  	AG_LOCK(imap, agno);
890  
891  	/* Obtain read lock in imap inode.  Don't release it until we have
892  	 * read all of the IAG's that we are going to.
893  	 */
894  	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
895  
896  	/* read the iag.
897  	 */
898  	if ((rc = diIAGRead(imap, iagno, &mp))) {
899  		IREAD_UNLOCK(ipimap);
900  		AG_UNLOCK(imap, agno);
901  		return (rc);
902  	}
903  	iagp = (struct iag *) mp->data;
904  
905  	/* get the inode number and extent number of the inode within
906  	 * the iag and the inode number within the extent.
907  	 */
908  	ino = inum & (INOSPERIAG - 1);
909  	extno = ino >> L2INOSPEREXT;
910  	bitno = ino & (INOSPEREXT - 1);
911  	mask = HIGHORDER >> bitno;
912  
913  	if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
914  		jfs_error(ip->i_sb, "wmap shows inode already free\n");
915  	}
916  
917  	if (!addressPXD(&iagp->inoext[extno])) {
918  		release_metapage(mp);
919  		IREAD_UNLOCK(ipimap);
920  		AG_UNLOCK(imap, agno);
921  		jfs_error(ip->i_sb, "invalid inoext\n");
922  		return -EIO;
923  	}
924  
925  	/* compute the bitmap for the extent reflecting the freed inode.
926  	 */
927  	bitmap = le32_to_cpu(iagp->wmap[extno]) & ~mask;
928  
929  	if (imap->im_agctl[agno].numfree > imap->im_agctl[agno].numinos) {
930  		release_metapage(mp);
931  		IREAD_UNLOCK(ipimap);
932  		AG_UNLOCK(imap, agno);
933  		jfs_error(ip->i_sb, "numfree > numinos\n");
934  		return -EIO;
935  	}
936  	/*
937  	 *	inode extent still has some inodes or below low water mark:
938  	 *	keep the inode extent;
939  	 */
940  	if (bitmap ||
941  	    imap->im_agctl[agno].numfree < 96 ||
942  	    (imap->im_agctl[agno].numfree < 288 &&
943  	     (((imap->im_agctl[agno].numfree * 100) /
944  	       imap->im_agctl[agno].numinos) <= 25))) {
945  		/* if the iag currently has no free inodes (i.e.,
946  		 * the inode being freed is the first free inode of iag),
947  		 * insert the iag at head of the inode free list for the ag.
948  		 */
949  		if (iagp->nfreeinos == 0) {
950  			/* check if there are any iags on the ag inode
951  			 * free list.  if so, read the first one so that
952  			 * we can link the current iag onto the list at
953  			 * the head.
954  			 */
955  			if ((fwd = imap->im_agctl[agno].inofree) >= 0) {
956  				/* read the iag that currently is the head
957  				 * of the list.
958  				 */
959  				if ((rc = diIAGRead(imap, fwd, &amp))) {
960  					IREAD_UNLOCK(ipimap);
961  					AG_UNLOCK(imap, agno);
962  					release_metapage(mp);
963  					return (rc);
964  				}
965  				aiagp = (struct iag *) amp->data;
966  
967  				/* make current head point back to the iag.
968  				 */
969  				aiagp->inofreeback = cpu_to_le32(iagno);
970  
971  				write_metapage(amp);
972  			}
973  
974  			/* iag points forward to current head and iag
975  			 * becomes the new head of the list.
976  			 */
977  			iagp->inofreefwd =
978  			    cpu_to_le32(imap->im_agctl[agno].inofree);
979  			iagp->inofreeback = cpu_to_le32(-1);
980  			imap->im_agctl[agno].inofree = iagno;
981  		}
982  		IREAD_UNLOCK(ipimap);
983  
984  		/* update the free inode summary map for the extent if
985  		 * freeing the inode means the extent will now have free
986  		 * inodes (i.e., the inode being freed is the first free
987  		 * inode of extent),
988  		 */
989  		if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
990  			sword = extno >> L2EXTSPERSUM;
991  			bitno = extno & (EXTSPERSUM - 1);
992  			iagp->inosmap[sword] &=
993  			    cpu_to_le32(~(HIGHORDER >> bitno));
994  		}
995  
996  		/* update the bitmap.
997  		 */
998  		iagp->wmap[extno] = cpu_to_le32(bitmap);
999  
1000  		/* update the free inode counts at the iag, ag and
1001  		 * map level.
1002  		 */
1003  		le32_add_cpu(&iagp->nfreeinos, 1);
1004  		imap->im_agctl[agno].numfree += 1;
1005  		atomic_inc(&imap->im_numfree);
1006  
1007  		/* release the AG inode map lock
1008  		 */
1009  		AG_UNLOCK(imap, agno);
1010  
1011  		/* write the iag */
1012  		write_metapage(mp);
1013  
1014  		return (0);
1015  	}
1016  
1017  
1018  	/*
1019  	 *	inode extent has become free and above low water mark:
1020  	 *	free the inode extent;
1021  	 */
1022  
1023  	/*
1024  	 *	prepare to update iag list(s) (careful update step 1)
1025  	 */
1026  	amp = bmp = cmp = dmp = NULL;
1027  	fwd = back = -1;
1028  
1029  	/* check if the iag currently has no free extents.  if so,
1030  	 * it will be placed on the head of the ag extent free list.
1031  	 */
1032  	if (iagp->nfreeexts == 0) {
1033  		/* check if the ag extent free list has any iags.
1034  		 * if so, read the iag at the head of the list now.
1035  		 * this (head) iag will be updated later to reflect
1036  		 * the addition of the current iag at the head of
1037  		 * the list.
1038  		 */
1039  		if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
1040  			if ((rc = diIAGRead(imap, fwd, &amp)))
1041  				goto error_out;
1042  			aiagp = (struct iag *) amp->data;
1043  		}
1044  	} else {
1045  		/* iag has free extents. check if the addition of a free
1046  		 * extent will cause all extents to be free within this
1047  		 * iag.  if so, the iag will be removed from the ag extent
1048  		 * free list and placed on the inode map's free iag list.
1049  		 */
1050  		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1051  			/* in preparation for removing the iag from the
1052  			 * ag extent free list, read the iags preceding
1053  			 * and following the iag on the ag extent free
1054  			 * list.
1055  			 */
1056  			if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
1057  				if ((rc = diIAGRead(imap, fwd, &amp)))
1058  					goto error_out;
1059  				aiagp = (struct iag *) amp->data;
1060  			}
1061  
1062  			if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
1063  				if ((rc = diIAGRead(imap, back, &bmp)))
1064  					goto error_out;
1065  				biagp = (struct iag *) bmp->data;
1066  			}
1067  		}
1068  	}
1069  
1070  	/* remove the iag from the ag inode free list if freeing
1071  	 * this extent cause the iag to have no free inodes.
1072  	 */
1073  	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1074  		int inofreeback = le32_to_cpu(iagp->inofreeback);
1075  		int inofreefwd = le32_to_cpu(iagp->inofreefwd);
1076  
1077  		/* in preparation for removing the iag from the
1078  		 * ag inode free list, read the iags preceding
1079  		 * and following the iag on the ag inode free
1080  		 * list.  before reading these iags, we must make
1081  		 * sure that we already don't have them in hand
1082  		 * from up above, since re-reading an iag (buffer)
1083  		 * we are currently holding would cause a deadlock.
1084  		 */
1085  		if (inofreefwd >= 0) {
1086  
1087  			if (inofreefwd == fwd)
1088  				ciagp = (struct iag *) amp->data;
1089  			else if (inofreefwd == back)
1090  				ciagp = (struct iag *) bmp->data;
1091  			else {
1092  				if ((rc =
1093  				     diIAGRead(imap, inofreefwd, &cmp)))
1094  					goto error_out;
1095  				ciagp = (struct iag *) cmp->data;
1096  			}
1097  			assert(ciagp != NULL);
1098  		}
1099  
1100  		if (inofreeback >= 0) {
1101  			if (inofreeback == fwd)
1102  				diagp = (struct iag *) amp->data;
1103  			else if (inofreeback == back)
1104  				diagp = (struct iag *) bmp->data;
1105  			else {
1106  				if ((rc =
1107  				     diIAGRead(imap, inofreeback, &dmp)))
1108  					goto error_out;
1109  				diagp = (struct iag *) dmp->data;
1110  			}
1111  			assert(diagp != NULL);
1112  		}
1113  	}
1114  
1115  	IREAD_UNLOCK(ipimap);
1116  
1117  	/*
1118  	 * invalidate any page of the inode extent freed from buffer cache;
1119  	 */
1120  	freepxd = iagp->inoext[extno];
1121  	invalidate_pxd_metapages(ip, freepxd);
1122  
1123  	/*
1124  	 *	update iag list(s) (careful update step 2)
1125  	 */
1126  	/* add the iag to the ag extent free list if this is the
1127  	 * first free extent for the iag.
1128  	 */
1129  	if (iagp->nfreeexts == 0) {
1130  		if (fwd >= 0)
1131  			aiagp->extfreeback = cpu_to_le32(iagno);
1132  
1133  		iagp->extfreefwd =
1134  		    cpu_to_le32(imap->im_agctl[agno].extfree);
1135  		iagp->extfreeback = cpu_to_le32(-1);
1136  		imap->im_agctl[agno].extfree = iagno;
1137  	} else {
1138  		/* remove the iag from the ag extent list if all extents
1139  		 * are now free and place it on the inode map iag free list.
1140  		 */
1141  		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG - 1)) {
1142  			if (fwd >= 0)
1143  				aiagp->extfreeback = iagp->extfreeback;
1144  
1145  			if (back >= 0)
1146  				biagp->extfreefwd = iagp->extfreefwd;
1147  			else
1148  				imap->im_agctl[agno].extfree =
1149  				    le32_to_cpu(iagp->extfreefwd);
1150  
1151  			iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
1152  
1153  			IAGFREE_LOCK(imap);
1154  			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1155  			imap->im_freeiag = iagno;
1156  			IAGFREE_UNLOCK(imap);
1157  		}
1158  	}
1159  
1160  	/* remove the iag from the ag inode free list if freeing
1161  	 * this extent causes the iag to have no free inodes.
1162  	 */
1163  	if (iagp->nfreeinos == cpu_to_le32(INOSPEREXT - 1)) {
1164  		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0)
1165  			ciagp->inofreeback = iagp->inofreeback;
1166  
1167  		if ((int) le32_to_cpu(iagp->inofreeback) >= 0)
1168  			diagp->inofreefwd = iagp->inofreefwd;
1169  		else
1170  			imap->im_agctl[agno].inofree =
1171  			    le32_to_cpu(iagp->inofreefwd);
1172  
1173  		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
1174  	}
1175  
1176  	/* update the inode extent address and working map
1177  	 * to reflect the free extent.
1178  	 * the permanent map should have been updated already
1179  	 * for the inode being freed.
1180  	 */
1181  	if (iagp->pmap[extno] != 0) {
1182  		jfs_error(ip->i_sb, "the pmap does not show inode free\n");
1183  	}
1184  	iagp->wmap[extno] = 0;
1185  	PXDlength(&iagp->inoext[extno], 0);
1186  	PXDaddress(&iagp->inoext[extno], 0);
1187  
1188  	/* update the free extent and free inode summary maps
1189  	 * to reflect the freed extent.
1190  	 * the inode summary map is marked to indicate no inodes
1191  	 * available for the freed extent.
1192  	 */
1193  	sword = extno >> L2EXTSPERSUM;
1194  	bitno = extno & (EXTSPERSUM - 1);
1195  	mask = HIGHORDER >> bitno;
1196  	iagp->inosmap[sword] |= cpu_to_le32(mask);
1197  	iagp->extsmap[sword] &= cpu_to_le32(~mask);
1198  
1199  	/* update the number of free inodes and number of free extents
1200  	 * for the iag.
1201  	 */
1202  	le32_add_cpu(&iagp->nfreeinos, -(INOSPEREXT - 1));
1203  	le32_add_cpu(&iagp->nfreeexts, 1);
1204  
1205  	/* update the number of free inodes and backed inodes
1206  	 * at the ag and inode map level.
1207  	 */
1208  	imap->im_agctl[agno].numfree -= (INOSPEREXT - 1);
1209  	imap->im_agctl[agno].numinos -= INOSPEREXT;
1210  	atomic_sub(INOSPEREXT - 1, &imap->im_numfree);
1211  	atomic_sub(INOSPEREXT, &imap->im_numinos);
1212  
1213  	if (amp)
1214  		write_metapage(amp);
1215  	if (bmp)
1216  		write_metapage(bmp);
1217  	if (cmp)
1218  		write_metapage(cmp);
1219  	if (dmp)
1220  		write_metapage(dmp);
1221  
1222  	/*
1223  	 * start transaction to update block allocation map
1224  	 * for the inode extent freed;
1225  	 *
1226  	 * N.B. AG_LOCK is released and iag will be released below, and
1227  	 * other thread may allocate inode from/reusing the ixad freed
1228  	 * BUT with new/different backing inode extent from the extent
1229  	 * to be freed by the transaction;
1230  	 */
1231  	tid = txBegin(ipimap->i_sb, COMMIT_FORCE);
1232  	mutex_lock(&JFS_IP(ipimap)->commit_mutex);
1233  
1234  	/* acquire tlock of the iag page of the freed ixad
1235  	 * to force the page NOHOMEOK (even though no data is
1236  	 * logged from the iag page) until NOREDOPAGE|FREEXTENT log
1237  	 * for the free of the extent is committed;
1238  	 * write FREEXTENT|NOREDOPAGE log record
1239  	 * N.B. linelock is overlaid as freed extent descriptor;
1240  	 */
1241  	tlck = txLock(tid, ipimap, mp, tlckINODE | tlckFREE);
1242  	pxdlock = (struct pxd_lock *) & tlck->lock;
1243  	pxdlock->flag = mlckFREEPXD;
1244  	pxdlock->pxd = freepxd;
1245  	pxdlock->index = 1;
1246  
1247  	write_metapage(mp);
1248  
1249  	iplist[0] = ipimap;
1250  
1251  	/*
1252  	 * logredo needs the IAG number and IAG extent index in order
1253  	 * to ensure that the IMap is consistent.  The least disruptive
1254  	 * way to pass these values through  to the transaction manager
1255  	 * is in the iplist array.
1256  	 *
1257  	 * It's not pretty, but it works.
1258  	 */
1259  	iplist[1] = (struct inode *) (size_t)iagno;
1260  	iplist[2] = (struct inode *) (size_t)extno;
1261  
1262  	rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
1263  
1264  	txEnd(tid);
1265  	mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
1266  
1267  	/* unlock the AG inode map information */
1268  	AG_UNLOCK(imap, agno);
1269  
1270  	return (0);
1271  
1272        error_out:
1273  	IREAD_UNLOCK(ipimap);
1274  
1275  	if (amp)
1276  		release_metapage(amp);
1277  	if (bmp)
1278  		release_metapage(bmp);
1279  	if (cmp)
1280  		release_metapage(cmp);
1281  	if (dmp)
1282  		release_metapage(dmp);
1283  
1284  	AG_UNLOCK(imap, agno);
1285  
1286  	release_metapage(mp);
1287  
1288  	return (rc);
1289  }
1290  
1291  /*
1292   * There are several places in the diAlloc* routines where we initialize
1293   * the inode.
1294   */
1295  static inline void
diInitInode(struct inode * ip,int iagno,int ino,int extno,struct iag * iagp)1296  diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
1297  {
1298  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
1299  
1300  	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
1301  	jfs_ip->ixpxd = iagp->inoext[extno];
1302  	jfs_ip->agstart = le64_to_cpu(iagp->agstart);
1303  	jfs_ip->active_ag = -1;
1304  }
1305  
1306  
1307  /*
1308   * NAME:	diAlloc(pip,dir,ip)
1309   *
1310   * FUNCTION:	allocate a disk inode from the inode working map
1311   *		for a fileset or aggregate.
1312   *
1313   * PARAMETERS:
1314   *	pip	- pointer to incore inode for the parent inode.
1315   *	dir	- 'true' if the new disk inode is for a directory.
1316   *	ip	- pointer to a new inode
1317   *
1318   * RETURN VALUES:
1319   *	0	- success.
1320   *	-ENOSPC	- insufficient disk resources.
1321   *	-EIO	- i/o error.
1322   */
diAlloc(struct inode * pip,bool dir,struct inode * ip)1323  int diAlloc(struct inode *pip, bool dir, struct inode *ip)
1324  {
1325  	int rc, ino, iagno, addext, extno, bitno, sword;
1326  	int nwords, rem, i, agno, dn_numag;
1327  	u32 mask, inosmap, extsmap;
1328  	struct inode *ipimap;
1329  	struct metapage *mp;
1330  	ino_t inum;
1331  	struct iag *iagp;
1332  	struct inomap *imap;
1333  
1334  	/* get the pointers to the inode map inode and the
1335  	 * corresponding imap control structure.
1336  	 */
1337  	ipimap = JFS_SBI(pip->i_sb)->ipimap;
1338  	imap = JFS_IP(ipimap)->i_imap;
1339  	JFS_IP(ip)->ipimap = ipimap;
1340  	JFS_IP(ip)->fileset = FILESYSTEM_I;
1341  
1342  	/* for a directory, the allocation policy is to start
1343  	 * at the ag level using the preferred ag.
1344  	 */
1345  	if (dir) {
1346  		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1347  		AG_LOCK(imap, agno);
1348  		goto tryag;
1349  	}
1350  
1351  	/* for files, the policy starts off by trying to allocate from
1352  	 * the same iag containing the parent disk inode:
1353  	 * try to allocate the new disk inode close to the parent disk
1354  	 * inode, using parent disk inode number + 1 as the allocation
1355  	 * hint.  (we use a left-to-right policy to attempt to avoid
1356  	 * moving backward on the disk.)  compute the hint within the
1357  	 * file system and the iag.
1358  	 */
1359  
1360  	/* get the ag number of this iag */
1361  	agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
1362  	dn_numag = JFS_SBI(pip->i_sb)->bmap->db_numag;
1363  	if (agno < 0 || agno > dn_numag || agno >= MAXAG)
1364  		return -EIO;
1365  
1366  	if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
1367  		/*
1368  		 * There is an open file actively growing.  We want to
1369  		 * allocate new inodes from a different ag to avoid
1370  		 * fragmentation problems.
1371  		 */
1372  		agno = dbNextAG(JFS_SBI(pip->i_sb)->ipbmap);
1373  		AG_LOCK(imap, agno);
1374  		goto tryag;
1375  	}
1376  
1377  	inum = pip->i_ino + 1;
1378  	ino = inum & (INOSPERIAG - 1);
1379  
1380  	/* back off the hint if it is outside of the iag */
1381  	if (ino == 0)
1382  		inum = pip->i_ino;
1383  
1384  	/* lock the AG inode map information */
1385  	AG_LOCK(imap, agno);
1386  
1387  	/* Get read lock on imap inode */
1388  	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
1389  
1390  	/* get the iag number and read the iag */
1391  	iagno = INOTOIAG(inum);
1392  	if ((rc = diIAGRead(imap, iagno, &mp))) {
1393  		IREAD_UNLOCK(ipimap);
1394  		AG_UNLOCK(imap, agno);
1395  		return (rc);
1396  	}
1397  	iagp = (struct iag *) mp->data;
1398  
1399  	/* determine if new inode extent is allowed to be added to the iag.
1400  	 * new inode extent can be added to the iag if the ag
1401  	 * has less than 32 free disk inodes and the iag has free extents.
1402  	 */
1403  	addext = (imap->im_agctl[agno].numfree < 32 && iagp->nfreeexts);
1404  
1405  	/*
1406  	 *	try to allocate from the IAG
1407  	 */
1408  	/* check if the inode may be allocated from the iag
1409  	 * (i.e. the inode has free inodes or new extent can be added).
1410  	 */
1411  	if (iagp->nfreeinos || addext) {
1412  		/* determine the extent number of the hint.
1413  		 */
1414  		extno = ino >> L2INOSPEREXT;
1415  
1416  		/* check if the extent containing the hint has backed
1417  		 * inodes.  if so, try to allocate within this extent.
1418  		 */
1419  		if (addressPXD(&iagp->inoext[extno])) {
1420  			bitno = ino & (INOSPEREXT - 1);
1421  			if ((bitno =
1422  			     diFindFree(le32_to_cpu(iagp->wmap[extno]),
1423  					bitno))
1424  			    < INOSPEREXT) {
1425  				ino = (extno << L2INOSPEREXT) + bitno;
1426  
1427  				/* a free inode (bit) was found within this
1428  				 * extent, so allocate it.
1429  				 */
1430  				rc = diAllocBit(imap, iagp, ino);
1431  				IREAD_UNLOCK(ipimap);
1432  				if (rc) {
1433  					assert(rc == -EIO);
1434  				} else {
1435  					/* set the results of the allocation
1436  					 * and write the iag.
1437  					 */
1438  					diInitInode(ip, iagno, ino, extno,
1439  						    iagp);
1440  					mark_metapage_dirty(mp);
1441  				}
1442  				release_metapage(mp);
1443  
1444  				/* free the AG lock and return.
1445  				 */
1446  				AG_UNLOCK(imap, agno);
1447  				return (rc);
1448  			}
1449  
1450  			if (!addext)
1451  				extno =
1452  				    (extno ==
1453  				     EXTSPERIAG - 1) ? 0 : extno + 1;
1454  		}
1455  
1456  		/*
1457  		 * no free inodes within the extent containing the hint.
1458  		 *
1459  		 * try to allocate from the backed extents following
1460  		 * hint or, if appropriate (i.e. addext is true), allocate
1461  		 * an extent of free inodes at or following the extent
1462  		 * containing the hint.
1463  		 *
1464  		 * the free inode and free extent summary maps are used
1465  		 * here, so determine the starting summary map position
1466  		 * and the number of words we'll have to examine.  again,
1467  		 * the approach is to allocate following the hint, so we
1468  		 * might have to initially ignore prior bits of the summary
1469  		 * map that represent extents prior to the extent containing
1470  		 * the hint and later revisit these bits.
1471  		 */
1472  		bitno = extno & (EXTSPERSUM - 1);
1473  		nwords = (bitno == 0) ? SMAPSZ : SMAPSZ + 1;
1474  		sword = extno >> L2EXTSPERSUM;
1475  
1476  		/* mask any prior bits for the starting words of the
1477  		 * summary map.
1478  		 */
1479  		mask = (bitno == 0) ? 0 : (ONES << (EXTSPERSUM - bitno));
1480  		inosmap = le32_to_cpu(iagp->inosmap[sword]) | mask;
1481  		extsmap = le32_to_cpu(iagp->extsmap[sword]) | mask;
1482  
1483  		/* scan the free inode and free extent summary maps for
1484  		 * free resources.
1485  		 */
1486  		for (i = 0; i < nwords; i++) {
1487  			/* check if this word of the free inode summary
1488  			 * map describes an extent with free inodes.
1489  			 */
1490  			if (~inosmap) {
1491  				/* an extent with free inodes has been
1492  				 * found. determine the extent number
1493  				 * and the inode number within the extent.
1494  				 */
1495  				rem = diFindFree(inosmap, 0);
1496  				extno = (sword << L2EXTSPERSUM) + rem;
1497  				rem = diFindFree(le32_to_cpu(iagp->wmap[extno]),
1498  						 0);
1499  				if (rem >= INOSPEREXT) {
1500  					IREAD_UNLOCK(ipimap);
1501  					release_metapage(mp);
1502  					AG_UNLOCK(imap, agno);
1503  					jfs_error(ip->i_sb,
1504  						  "can't find free bit in wmap\n");
1505  					return -EIO;
1506  				}
1507  
1508  				/* determine the inode number within the
1509  				 * iag and allocate the inode from the
1510  				 * map.
1511  				 */
1512  				ino = (extno << L2INOSPEREXT) + rem;
1513  				rc = diAllocBit(imap, iagp, ino);
1514  				IREAD_UNLOCK(ipimap);
1515  				if (rc)
1516  					assert(rc == -EIO);
1517  				else {
1518  					/* set the results of the allocation
1519  					 * and write the iag.
1520  					 */
1521  					diInitInode(ip, iagno, ino, extno,
1522  						    iagp);
1523  					mark_metapage_dirty(mp);
1524  				}
1525  				release_metapage(mp);
1526  
1527  				/* free the AG lock and return.
1528  				 */
1529  				AG_UNLOCK(imap, agno);
1530  				return (rc);
1531  
1532  			}
1533  
1534  			/* check if we may allocate an extent of free
1535  			 * inodes and whether this word of the free
1536  			 * extents summary map describes a free extent.
1537  			 */
1538  			if (addext && ~extsmap) {
1539  				/* a free extent has been found.  determine
1540  				 * the extent number.
1541  				 */
1542  				rem = diFindFree(extsmap, 0);
1543  				extno = (sword << L2EXTSPERSUM) + rem;
1544  
1545  				/* allocate an extent of free inodes.
1546  				 */
1547  				if ((rc = diNewExt(imap, iagp, extno))) {
1548  					/* if there is no disk space for a
1549  					 * new extent, try to allocate the
1550  					 * disk inode from somewhere else.
1551  					 */
1552  					if (rc == -ENOSPC)
1553  						break;
1554  
1555  					assert(rc == -EIO);
1556  				} else {
1557  					/* set the results of the allocation
1558  					 * and write the iag.
1559  					 */
1560  					diInitInode(ip, iagno,
1561  						    extno << L2INOSPEREXT,
1562  						    extno, iagp);
1563  					mark_metapage_dirty(mp);
1564  				}
1565  				release_metapage(mp);
1566  				/* free the imap inode & the AG lock & return.
1567  				 */
1568  				IREAD_UNLOCK(ipimap);
1569  				AG_UNLOCK(imap, agno);
1570  				return (rc);
1571  			}
1572  
1573  			/* move on to the next set of summary map words.
1574  			 */
1575  			sword = (sword == SMAPSZ - 1) ? 0 : sword + 1;
1576  			inosmap = le32_to_cpu(iagp->inosmap[sword]);
1577  			extsmap = le32_to_cpu(iagp->extsmap[sword]);
1578  		}
1579  	}
1580  	/* unlock imap inode */
1581  	IREAD_UNLOCK(ipimap);
1582  
1583  	/* nothing doing in this iag, so release it. */
1584  	release_metapage(mp);
1585  
1586        tryag:
1587  	/*
1588  	 * try to allocate anywhere within the same AG as the parent inode.
1589  	 */
1590  	rc = diAllocAG(imap, agno, dir, ip);
1591  
1592  	AG_UNLOCK(imap, agno);
1593  
1594  	if (rc != -ENOSPC)
1595  		return (rc);
1596  
1597  	/*
1598  	 * try to allocate in any AG.
1599  	 */
1600  	return (diAllocAny(imap, agno, dir, ip));
1601  }
1602  
1603  
1604  /*
1605   * NAME:	diAllocAG(imap,agno,dir,ip)
1606   *
1607   * FUNCTION:	allocate a disk inode from the allocation group.
1608   *
1609   *		this routine first determines if a new extent of free
1610   *		inodes should be added for the allocation group, with
1611   *		the current request satisfied from this extent. if this
1612   *		is the case, an attempt will be made to do just that.  if
1613   *		this attempt fails or it has been determined that a new
1614   *		extent should not be added, an attempt is made to satisfy
1615   *		the request by allocating an existing (backed) free inode
1616   *		from the allocation group.
1617   *
1618   * PRE CONDITION: Already have the AG lock for this AG.
1619   *
1620   * PARAMETERS:
1621   *	imap	- pointer to inode map control structure.
1622   *	agno	- allocation group to allocate from.
1623   *	dir	- 'true' if the new disk inode is for a directory.
1624   *	ip	- pointer to the new inode to be filled in on successful return
1625   *		  with the disk inode number allocated, its extent address
1626   *		  and the start of the ag.
1627   *
1628   * RETURN VALUES:
1629   *	0	- success.
1630   *	-ENOSPC	- insufficient disk resources.
1631   *	-EIO	- i/o error.
1632   */
1633  static int
diAllocAG(struct inomap * imap,int agno,bool dir,struct inode * ip)1634  diAllocAG(struct inomap * imap, int agno, bool dir, struct inode *ip)
1635  {
1636  	int rc, addext, numfree, numinos;
1637  
1638  	/* get the number of free and the number of backed disk
1639  	 * inodes currently within the ag.
1640  	 */
1641  	numfree = imap->im_agctl[agno].numfree;
1642  	numinos = imap->im_agctl[agno].numinos;
1643  
1644  	if (numfree > numinos) {
1645  		jfs_error(ip->i_sb, "numfree > numinos\n");
1646  		return -EIO;
1647  	}
1648  
1649  	/* determine if we should allocate a new extent of free inodes
1650  	 * within the ag: for directory inodes, add a new extent
1651  	 * if there are a small number of free inodes or number of free
1652  	 * inodes is a small percentage of the number of backed inodes.
1653  	 */
1654  	if (dir)
1655  		addext = (numfree < 64 ||
1656  			  (numfree < 256
1657  			   && ((numfree * 100) / numinos) <= 20));
1658  	else
1659  		addext = (numfree == 0);
1660  
1661  	/*
1662  	 * try to allocate a new extent of free inodes.
1663  	 */
1664  	if (addext) {
1665  		/* if free space is not available for this new extent, try
1666  		 * below to allocate a free and existing (already backed)
1667  		 * inode from the ag.
1668  		 */
1669  		if ((rc = diAllocExt(imap, agno, ip)) != -ENOSPC)
1670  			return (rc);
1671  	}
1672  
1673  	/*
1674  	 * try to allocate an existing free inode from the ag.
1675  	 */
1676  	return (diAllocIno(imap, agno, ip));
1677  }
1678  
1679  
1680  /*
1681   * NAME:	diAllocAny(imap,agno,dir,iap)
1682   *
1683   * FUNCTION:	allocate a disk inode from any other allocation group.
1684   *
1685   *		this routine is called when an allocation attempt within
1686   *		the primary allocation group has failed. if attempts to
1687   *		allocate an inode from any allocation group other than the
1688   *		specified primary group.
1689   *
1690   * PARAMETERS:
1691   *	imap	- pointer to inode map control structure.
1692   *	agno	- primary allocation group (to avoid).
1693   *	dir	- 'true' if the new disk inode is for a directory.
1694   *	ip	- pointer to a new inode to be filled in on successful return
1695   *		  with the disk inode number allocated, its extent address
1696   *		  and the start of the ag.
1697   *
1698   * RETURN VALUES:
1699   *	0	- success.
1700   *	-ENOSPC	- insufficient disk resources.
1701   *	-EIO	- i/o error.
1702   */
1703  static int
diAllocAny(struct inomap * imap,int agno,bool dir,struct inode * ip)1704  diAllocAny(struct inomap * imap, int agno, bool dir, struct inode *ip)
1705  {
1706  	int ag, rc;
1707  	int maxag = JFS_SBI(imap->im_ipimap->i_sb)->bmap->db_maxag;
1708  
1709  
1710  	/* try to allocate from the ags following agno up to
1711  	 * the maximum ag number.
1712  	 */
1713  	for (ag = agno + 1; ag <= maxag; ag++) {
1714  		AG_LOCK(imap, ag);
1715  
1716  		rc = diAllocAG(imap, ag, dir, ip);
1717  
1718  		AG_UNLOCK(imap, ag);
1719  
1720  		if (rc != -ENOSPC)
1721  			return (rc);
1722  	}
1723  
1724  	/* try to allocate from the ags in front of agno.
1725  	 */
1726  	for (ag = 0; ag < agno; ag++) {
1727  		AG_LOCK(imap, ag);
1728  
1729  		rc = diAllocAG(imap, ag, dir, ip);
1730  
1731  		AG_UNLOCK(imap, ag);
1732  
1733  		if (rc != -ENOSPC)
1734  			return (rc);
1735  	}
1736  
1737  	/* no free disk inodes.
1738  	 */
1739  	return -ENOSPC;
1740  }
1741  
1742  
1743  /*
1744   * NAME:	diAllocIno(imap,agno,ip)
1745   *
1746   * FUNCTION:	allocate a disk inode from the allocation group's free
1747   *		inode list, returning an error if this free list is
1748   *		empty (i.e. no iags on the list).
1749   *
1750   *		allocation occurs from the first iag on the list using
1751   *		the iag's free inode summary map to find the leftmost
1752   *		free inode in the iag.
1753   *
1754   * PRE CONDITION: Already have AG lock for this AG.
1755   *
1756   * PARAMETERS:
1757   *	imap	- pointer to inode map control structure.
1758   *	agno	- allocation group.
1759   *	ip	- pointer to new inode to be filled in on successful return
1760   *		  with the disk inode number allocated, its extent address
1761   *		  and the start of the ag.
1762   *
1763   * RETURN VALUES:
1764   *	0	- success.
1765   *	-ENOSPC	- insufficient disk resources.
1766   *	-EIO	- i/o error.
1767   */
diAllocIno(struct inomap * imap,int agno,struct inode * ip)1768  static int diAllocIno(struct inomap * imap, int agno, struct inode *ip)
1769  {
1770  	int iagno, ino, rc, rem, extno, sword;
1771  	struct metapage *mp;
1772  	struct iag *iagp;
1773  
1774  	/* check if there are iags on the ag's free inode list.
1775  	 */
1776  	if ((iagno = imap->im_agctl[agno].inofree) < 0)
1777  		return -ENOSPC;
1778  
1779  	/* obtain read lock on imap inode */
1780  	IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1781  
1782  	/* read the iag at the head of the list.
1783  	 */
1784  	if ((rc = diIAGRead(imap, iagno, &mp))) {
1785  		IREAD_UNLOCK(imap->im_ipimap);
1786  		return (rc);
1787  	}
1788  	iagp = (struct iag *) mp->data;
1789  
1790  	/* better be free inodes in this iag if it is on the
1791  	 * list.
1792  	 */
1793  	if (!iagp->nfreeinos) {
1794  		IREAD_UNLOCK(imap->im_ipimap);
1795  		release_metapage(mp);
1796  		jfs_error(ip->i_sb, "nfreeinos = 0, but iag on freelist\n");
1797  		return -EIO;
1798  	}
1799  
1800  	/* scan the free inode summary map to find an extent
1801  	 * with free inodes.
1802  	 */
1803  	for (sword = 0;; sword++) {
1804  		if (sword >= SMAPSZ) {
1805  			IREAD_UNLOCK(imap->im_ipimap);
1806  			release_metapage(mp);
1807  			jfs_error(ip->i_sb,
1808  				  "free inode not found in summary map\n");
1809  			return -EIO;
1810  		}
1811  
1812  		if (~iagp->inosmap[sword])
1813  			break;
1814  	}
1815  
1816  	/* found a extent with free inodes. determine
1817  	 * the extent number.
1818  	 */
1819  	rem = diFindFree(le32_to_cpu(iagp->inosmap[sword]), 0);
1820  	if (rem >= EXTSPERSUM) {
1821  		IREAD_UNLOCK(imap->im_ipimap);
1822  		release_metapage(mp);
1823  		jfs_error(ip->i_sb, "no free extent found\n");
1824  		return -EIO;
1825  	}
1826  	extno = (sword << L2EXTSPERSUM) + rem;
1827  
1828  	/* find the first free inode in the extent.
1829  	 */
1830  	rem = diFindFree(le32_to_cpu(iagp->wmap[extno]), 0);
1831  	if (rem >= INOSPEREXT) {
1832  		IREAD_UNLOCK(imap->im_ipimap);
1833  		release_metapage(mp);
1834  		jfs_error(ip->i_sb, "free inode not found\n");
1835  		return -EIO;
1836  	}
1837  
1838  	/* compute the inode number within the iag.
1839  	 */
1840  	ino = (extno << L2INOSPEREXT) + rem;
1841  
1842  	/* allocate the inode.
1843  	 */
1844  	rc = diAllocBit(imap, iagp, ino);
1845  	IREAD_UNLOCK(imap->im_ipimap);
1846  	if (rc) {
1847  		release_metapage(mp);
1848  		return (rc);
1849  	}
1850  
1851  	/* set the results of the allocation and write the iag.
1852  	 */
1853  	diInitInode(ip, iagno, ino, extno, iagp);
1854  	write_metapage(mp);
1855  
1856  	return (0);
1857  }
1858  
1859  
1860  /*
1861   * NAME:	diAllocExt(imap,agno,ip)
1862   *
1863   * FUNCTION:	add a new extent of free inodes to an iag, allocating
1864   *		an inode from this extent to satisfy the current allocation
1865   *		request.
1866   *
1867   *		this routine first tries to find an existing iag with free
1868   *		extents through the ag free extent list.  if list is not
1869   *		empty, the head of the list will be selected as the home
1870   *		of the new extent of free inodes.  otherwise (the list is
1871   *		empty), a new iag will be allocated for the ag to contain
1872   *		the extent.
1873   *
1874   *		once an iag has been selected, the free extent summary map
1875   *		is used to locate a free extent within the iag and diNewExt()
1876   *		is called to initialize the extent, with initialization
1877   *		including the allocation of the first inode of the extent
1878   *		for the purpose of satisfying this request.
1879   *
1880   * PARAMETERS:
1881   *	imap	- pointer to inode map control structure.
1882   *	agno	- allocation group number.
1883   *	ip	- pointer to new inode to be filled in on successful return
1884   *		  with the disk inode number allocated, its extent address
1885   *		  and the start of the ag.
1886   *
1887   * RETURN VALUES:
1888   *	0	- success.
1889   *	-ENOSPC	- insufficient disk resources.
1890   *	-EIO	- i/o error.
1891   */
diAllocExt(struct inomap * imap,int agno,struct inode * ip)1892  static int diAllocExt(struct inomap * imap, int agno, struct inode *ip)
1893  {
1894  	int rem, iagno, sword, extno, rc;
1895  	struct metapage *mp;
1896  	struct iag *iagp;
1897  
1898  	/* check if the ag has any iags with free extents.  if not,
1899  	 * allocate a new iag for the ag.
1900  	 */
1901  	if ((iagno = imap->im_agctl[agno].extfree) < 0) {
1902  		/* If successful, diNewIAG will obtain the read lock on the
1903  		 * imap inode.
1904  		 */
1905  		if ((rc = diNewIAG(imap, &iagno, agno, &mp))) {
1906  			return (rc);
1907  		}
1908  		iagp = (struct iag *) mp->data;
1909  
1910  		/* set the ag number if this a brand new iag
1911  		 */
1912  		iagp->agstart =
1913  		    cpu_to_le64(AGTOBLK(agno, imap->im_ipimap));
1914  	} else {
1915  		/* read the iag.
1916  		 */
1917  		IREAD_LOCK(imap->im_ipimap, RDWRLOCK_IMAP);
1918  		if ((rc = diIAGRead(imap, iagno, &mp))) {
1919  			IREAD_UNLOCK(imap->im_ipimap);
1920  			jfs_error(ip->i_sb, "error reading iag\n");
1921  			return rc;
1922  		}
1923  		iagp = (struct iag *) mp->data;
1924  	}
1925  
1926  	/* using the free extent summary map, find a free extent.
1927  	 */
1928  	for (sword = 0;; sword++) {
1929  		if (sword >= SMAPSZ) {
1930  			release_metapage(mp);
1931  			IREAD_UNLOCK(imap->im_ipimap);
1932  			jfs_error(ip->i_sb, "free ext summary map not found\n");
1933  			return -EIO;
1934  		}
1935  		if (~iagp->extsmap[sword])
1936  			break;
1937  	}
1938  
1939  	/* determine the extent number of the free extent.
1940  	 */
1941  	rem = diFindFree(le32_to_cpu(iagp->extsmap[sword]), 0);
1942  	if (rem >= EXTSPERSUM) {
1943  		release_metapage(mp);
1944  		IREAD_UNLOCK(imap->im_ipimap);
1945  		jfs_error(ip->i_sb, "free extent not found\n");
1946  		return -EIO;
1947  	}
1948  	extno = (sword << L2EXTSPERSUM) + rem;
1949  
1950  	/* initialize the new extent.
1951  	 */
1952  	rc = diNewExt(imap, iagp, extno);
1953  	IREAD_UNLOCK(imap->im_ipimap);
1954  	if (rc) {
1955  		/* something bad happened.  if a new iag was allocated,
1956  		 * place it back on the inode map's iag free list, and
1957  		 * clear the ag number information.
1958  		 */
1959  		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
1960  			IAGFREE_LOCK(imap);
1961  			iagp->iagfree = cpu_to_le32(imap->im_freeiag);
1962  			imap->im_freeiag = iagno;
1963  			IAGFREE_UNLOCK(imap);
1964  		}
1965  		write_metapage(mp);
1966  		return (rc);
1967  	}
1968  
1969  	/* set the results of the allocation and write the iag.
1970  	 */
1971  	diInitInode(ip, iagno, extno << L2INOSPEREXT, extno, iagp);
1972  
1973  	write_metapage(mp);
1974  
1975  	return (0);
1976  }
1977  
1978  
1979  /*
1980   * NAME:	diAllocBit(imap,iagp,ino)
1981   *
1982   * FUNCTION:	allocate a backed inode from an iag.
1983   *
1984   *		this routine performs the mechanics of allocating a
1985   *		specified inode from a backed extent.
1986   *
1987   *		if the inode to be allocated represents the last free
1988   *		inode within the iag, the iag will be removed from the
1989   *		ag free inode list.
1990   *
1991   *		a careful update approach is used to provide consistency
1992   *		in the face of updates to multiple buffers.  under this
1993   *		approach, all required buffers are obtained before making
1994   *		any updates and are held all are updates are complete.
1995   *
1996   * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
1997   *	this AG.  Must have read lock on imap inode.
1998   *
1999   * PARAMETERS:
2000   *	imap	- pointer to inode map control structure.
2001   *	iagp	- pointer to iag.
2002   *	ino	- inode number to be allocated within the iag.
2003   *
2004   * RETURN VALUES:
2005   *	0	- success.
2006   *	-ENOSPC	- insufficient disk resources.
2007   *	-EIO	- i/o error.
2008   */
diAllocBit(struct inomap * imap,struct iag * iagp,int ino)2009  static int diAllocBit(struct inomap * imap, struct iag * iagp, int ino)
2010  {
2011  	int extno, bitno, agno, sword, rc;
2012  	struct metapage *amp = NULL, *bmp = NULL;
2013  	struct iag *aiagp = NULL, *biagp = NULL;
2014  	u32 mask;
2015  
2016  	/* check if this is the last free inode within the iag.
2017  	 * if so, it will have to be removed from the ag free
2018  	 * inode list, so get the iags preceding and following
2019  	 * it on the list.
2020  	 */
2021  	if (iagp->nfreeinos == cpu_to_le32(1)) {
2022  		if ((int) le32_to_cpu(iagp->inofreefwd) >= 0) {
2023  			if ((rc =
2024  			     diIAGRead(imap, le32_to_cpu(iagp->inofreefwd),
2025  				       &amp)))
2026  				return (rc);
2027  			aiagp = (struct iag *) amp->data;
2028  		}
2029  
2030  		if ((int) le32_to_cpu(iagp->inofreeback) >= 0) {
2031  			if ((rc =
2032  			     diIAGRead(imap,
2033  				       le32_to_cpu(iagp->inofreeback),
2034  				       &bmp))) {
2035  				if (amp)
2036  					release_metapage(amp);
2037  				return (rc);
2038  			}
2039  			biagp = (struct iag *) bmp->data;
2040  		}
2041  	}
2042  
2043  	/* get the ag number, extent number, inode number within
2044  	 * the extent.
2045  	 */
2046  	agno = BLKTOAG(le64_to_cpu(iagp->agstart), JFS_SBI(imap->im_ipimap->i_sb));
2047  	extno = ino >> L2INOSPEREXT;
2048  	bitno = ino & (INOSPEREXT - 1);
2049  
2050  	/* compute the mask for setting the map.
2051  	 */
2052  	mask = HIGHORDER >> bitno;
2053  
2054  	/* the inode should be free and backed.
2055  	 */
2056  	if (((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) ||
2057  	    ((le32_to_cpu(iagp->wmap[extno]) & mask) != 0) ||
2058  	    (addressPXD(&iagp->inoext[extno]) == 0)) {
2059  		if (amp)
2060  			release_metapage(amp);
2061  		if (bmp)
2062  			release_metapage(bmp);
2063  
2064  		jfs_error(imap->im_ipimap->i_sb, "iag inconsistent\n");
2065  		return -EIO;
2066  	}
2067  
2068  	/* mark the inode as allocated in the working map.
2069  	 */
2070  	iagp->wmap[extno] |= cpu_to_le32(mask);
2071  
2072  	/* check if all inodes within the extent are now
2073  	 * allocated.  if so, update the free inode summary
2074  	 * map to reflect this.
2075  	 */
2076  	if (iagp->wmap[extno] == cpu_to_le32(ONES)) {
2077  		sword = extno >> L2EXTSPERSUM;
2078  		bitno = extno & (EXTSPERSUM - 1);
2079  		iagp->inosmap[sword] |= cpu_to_le32(HIGHORDER >> bitno);
2080  	}
2081  
2082  	/* if this was the last free inode in the iag, remove the
2083  	 * iag from the ag free inode list.
2084  	 */
2085  	if (iagp->nfreeinos == cpu_to_le32(1)) {
2086  		if (amp) {
2087  			aiagp->inofreeback = iagp->inofreeback;
2088  			write_metapage(amp);
2089  		}
2090  
2091  		if (bmp) {
2092  			biagp->inofreefwd = iagp->inofreefwd;
2093  			write_metapage(bmp);
2094  		} else {
2095  			imap->im_agctl[agno].inofree =
2096  			    le32_to_cpu(iagp->inofreefwd);
2097  		}
2098  		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2099  	}
2100  
2101  	/* update the free inode count at the iag, ag, inode
2102  	 * map levels.
2103  	 */
2104  	le32_add_cpu(&iagp->nfreeinos, -1);
2105  	imap->im_agctl[agno].numfree -= 1;
2106  	atomic_dec(&imap->im_numfree);
2107  
2108  	return (0);
2109  }
2110  
2111  
2112  /*
2113   * NAME:	diNewExt(imap,iagp,extno)
2114   *
2115   * FUNCTION:	initialize a new extent of inodes for an iag, allocating
2116   *		the first inode of the extent for use for the current
2117   *		allocation request.
2118   *
2119   *		disk resources are allocated for the new extent of inodes
2120   *		and the inodes themselves are initialized to reflect their
2121   *		existence within the extent (i.e. their inode numbers and
2122   *		inode extent addresses are set) and their initial state
2123   *		(mode and link count are set to zero).
2124   *
2125   *		if the iag is new, it is not yet on an ag extent free list
2126   *		but will now be placed on this list.
2127   *
2128   *		if the allocation of the new extent causes the iag to
2129   *		have no free extent, the iag will be removed from the
2130   *		ag extent free list.
2131   *
2132   *		if the iag has no free backed inodes, it will be placed
2133   *		on the ag free inode list, since the addition of the new
2134   *		extent will now cause it to have free inodes.
2135   *
2136   *		a careful update approach is used to provide consistency
2137   *		(i.e. list consistency) in the face of updates to multiple
2138   *		buffers.  under this approach, all required buffers are
2139   *		obtained before making any updates and are held until all
2140   *		updates are complete.
2141   *
2142   * PRE CONDITION: Already have buffer lock on iagp.  Already have AG lock on
2143   *	this AG.  Must have read lock on imap inode.
2144   *
2145   * PARAMETERS:
2146   *	imap	- pointer to inode map control structure.
2147   *	iagp	- pointer to iag.
2148   *	extno	- extent number.
2149   *
2150   * RETURN VALUES:
2151   *	0	- success.
2152   *	-ENOSPC	- insufficient disk resources.
2153   *	-EIO	- i/o error.
2154   */
diNewExt(struct inomap * imap,struct iag * iagp,int extno)2155  static int diNewExt(struct inomap * imap, struct iag * iagp, int extno)
2156  {
2157  	int agno, iagno, fwd, back, freei = 0, sword, rc;
2158  	struct iag *aiagp = NULL, *biagp = NULL, *ciagp = NULL;
2159  	struct metapage *amp, *bmp, *cmp, *dmp;
2160  	struct inode *ipimap;
2161  	s64 blkno, hint;
2162  	int i, j;
2163  	u32 mask;
2164  	ino_t ino;
2165  	struct dinode *dp;
2166  	struct jfs_sb_info *sbi;
2167  
2168  	/* better have free extents.
2169  	 */
2170  	if (!iagp->nfreeexts) {
2171  		jfs_error(imap->im_ipimap->i_sb, "no free extents\n");
2172  		return -EIO;
2173  	}
2174  
2175  	/* get the inode map inode.
2176  	 */
2177  	ipimap = imap->im_ipimap;
2178  	sbi = JFS_SBI(ipimap->i_sb);
2179  
2180  	amp = bmp = cmp = NULL;
2181  
2182  	/* get the ag and iag numbers for this iag.
2183  	 */
2184  	agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
2185  	if (agno >= MAXAG || agno < 0)
2186  		return -EIO;
2187  
2188  	iagno = le32_to_cpu(iagp->iagnum);
2189  
2190  	/* check if this is the last free extent within the
2191  	 * iag.  if so, the iag must be removed from the ag
2192  	 * free extent list, so get the iags preceding and
2193  	 * following the iag on this list.
2194  	 */
2195  	if (iagp->nfreeexts == cpu_to_le32(1)) {
2196  		if ((fwd = le32_to_cpu(iagp->extfreefwd)) >= 0) {
2197  			if ((rc = diIAGRead(imap, fwd, &amp)))
2198  				return (rc);
2199  			aiagp = (struct iag *) amp->data;
2200  		}
2201  
2202  		if ((back = le32_to_cpu(iagp->extfreeback)) >= 0) {
2203  			if ((rc = diIAGRead(imap, back, &bmp)))
2204  				goto error_out;
2205  			biagp = (struct iag *) bmp->data;
2206  		}
2207  	} else {
2208  		/* the iag has free extents.  if all extents are free
2209  		 * (as is the case for a newly allocated iag), the iag
2210  		 * must be added to the ag free extent list, so get
2211  		 * the iag at the head of the list in preparation for
2212  		 * adding this iag to this list.
2213  		 */
2214  		fwd = back = -1;
2215  		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2216  			if ((fwd = imap->im_agctl[agno].extfree) >= 0) {
2217  				if ((rc = diIAGRead(imap, fwd, &amp)))
2218  					goto error_out;
2219  				aiagp = (struct iag *) amp->data;
2220  			}
2221  		}
2222  	}
2223  
2224  	/* check if the iag has no free inodes.  if so, the iag
2225  	 * will have to be added to the ag free inode list, so get
2226  	 * the iag at the head of the list in preparation for
2227  	 * adding this iag to this list.  in doing this, we must
2228  	 * check if we already have the iag at the head of
2229  	 * the list in hand.
2230  	 */
2231  	if (iagp->nfreeinos == 0) {
2232  		freei = imap->im_agctl[agno].inofree;
2233  
2234  		if (freei >= 0) {
2235  			if (freei == fwd) {
2236  				ciagp = aiagp;
2237  			} else if (freei == back) {
2238  				ciagp = biagp;
2239  			} else {
2240  				if ((rc = diIAGRead(imap, freei, &cmp)))
2241  					goto error_out;
2242  				ciagp = (struct iag *) cmp->data;
2243  			}
2244  			if (ciagp == NULL) {
2245  				jfs_error(imap->im_ipimap->i_sb,
2246  					  "ciagp == NULL\n");
2247  				rc = -EIO;
2248  				goto error_out;
2249  			}
2250  		}
2251  	}
2252  
2253  	/* allocate disk space for the inode extent.
2254  	 */
2255  	if ((extno == 0) || (addressPXD(&iagp->inoext[extno - 1]) == 0))
2256  		hint = ((s64) agno << sbi->bmap->db_agl2size) - 1;
2257  	else
2258  		hint = addressPXD(&iagp->inoext[extno - 1]) +
2259  		    lengthPXD(&iagp->inoext[extno - 1]) - 1;
2260  
2261  	if ((rc = dbAlloc(ipimap, hint, (s64) imap->im_nbperiext, &blkno)))
2262  		goto error_out;
2263  
2264  	/* compute the inode number of the first inode within the
2265  	 * extent.
2266  	 */
2267  	ino = (iagno << L2INOSPERIAG) + (extno << L2INOSPEREXT);
2268  
2269  	/* initialize the inodes within the newly allocated extent a
2270  	 * page at a time.
2271  	 */
2272  	for (i = 0; i < imap->im_nbperiext; i += sbi->nbperpage) {
2273  		/* get a buffer for this page of disk inodes.
2274  		 */
2275  		dmp = get_metapage(ipimap, blkno + i, PSIZE, 1);
2276  		if (dmp == NULL) {
2277  			rc = -EIO;
2278  			goto error_out;
2279  		}
2280  		dp = (struct dinode *) dmp->data;
2281  
2282  		/* initialize the inode number, mode, link count and
2283  		 * inode extent address.
2284  		 */
2285  		for (j = 0; j < INOSPERPAGE; j++, dp++, ino++) {
2286  			dp->di_inostamp = cpu_to_le32(sbi->inostamp);
2287  			dp->di_number = cpu_to_le32(ino);
2288  			dp->di_fileset = cpu_to_le32(FILESYSTEM_I);
2289  			dp->di_mode = 0;
2290  			dp->di_nlink = 0;
2291  			PXDaddress(&(dp->di_ixpxd), blkno);
2292  			PXDlength(&(dp->di_ixpxd), imap->im_nbperiext);
2293  		}
2294  		write_metapage(dmp);
2295  	}
2296  
2297  	/* if this is the last free extent within the iag, remove the
2298  	 * iag from the ag free extent list.
2299  	 */
2300  	if (iagp->nfreeexts == cpu_to_le32(1)) {
2301  		if (fwd >= 0)
2302  			aiagp->extfreeback = iagp->extfreeback;
2303  
2304  		if (back >= 0)
2305  			biagp->extfreefwd = iagp->extfreefwd;
2306  		else
2307  			imap->im_agctl[agno].extfree =
2308  			    le32_to_cpu(iagp->extfreefwd);
2309  
2310  		iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2311  	} else {
2312  		/* if the iag has all free extents (newly allocated iag),
2313  		 * add the iag to the ag free extent list.
2314  		 */
2315  		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2316  			if (fwd >= 0)
2317  				aiagp->extfreeback = cpu_to_le32(iagno);
2318  
2319  			iagp->extfreefwd = cpu_to_le32(fwd);
2320  			iagp->extfreeback = cpu_to_le32(-1);
2321  			imap->im_agctl[agno].extfree = iagno;
2322  		}
2323  	}
2324  
2325  	/* if the iag has no free inodes, add the iag to the
2326  	 * ag free inode list.
2327  	 */
2328  	if (iagp->nfreeinos == 0) {
2329  		if (freei >= 0)
2330  			ciagp->inofreeback = cpu_to_le32(iagno);
2331  
2332  		iagp->inofreefwd =
2333  		    cpu_to_le32(imap->im_agctl[agno].inofree);
2334  		iagp->inofreeback = cpu_to_le32(-1);
2335  		imap->im_agctl[agno].inofree = iagno;
2336  	}
2337  
2338  	/* initialize the extent descriptor of the extent. */
2339  	PXDlength(&iagp->inoext[extno], imap->im_nbperiext);
2340  	PXDaddress(&iagp->inoext[extno], blkno);
2341  
2342  	/* initialize the working and persistent map of the extent.
2343  	 * the working map will be initialized such that
2344  	 * it indicates the first inode of the extent is allocated.
2345  	 */
2346  	iagp->wmap[extno] = cpu_to_le32(HIGHORDER);
2347  	iagp->pmap[extno] = 0;
2348  
2349  	/* update the free inode and free extent summary maps
2350  	 * for the extent to indicate the extent has free inodes
2351  	 * and no longer represents a free extent.
2352  	 */
2353  	sword = extno >> L2EXTSPERSUM;
2354  	mask = HIGHORDER >> (extno & (EXTSPERSUM - 1));
2355  	iagp->extsmap[sword] |= cpu_to_le32(mask);
2356  	iagp->inosmap[sword] &= cpu_to_le32(~mask);
2357  
2358  	/* update the free inode and free extent counts for the
2359  	 * iag.
2360  	 */
2361  	le32_add_cpu(&iagp->nfreeinos, (INOSPEREXT - 1));
2362  	le32_add_cpu(&iagp->nfreeexts, -1);
2363  
2364  	/* update the free and backed inode counts for the ag.
2365  	 */
2366  	imap->im_agctl[agno].numfree += (INOSPEREXT - 1);
2367  	imap->im_agctl[agno].numinos += INOSPEREXT;
2368  
2369  	/* update the free and backed inode counts for the inode map.
2370  	 */
2371  	atomic_add(INOSPEREXT - 1, &imap->im_numfree);
2372  	atomic_add(INOSPEREXT, &imap->im_numinos);
2373  
2374  	/* write the iags.
2375  	 */
2376  	if (amp)
2377  		write_metapage(amp);
2378  	if (bmp)
2379  		write_metapage(bmp);
2380  	if (cmp)
2381  		write_metapage(cmp);
2382  
2383  	return (0);
2384  
2385        error_out:
2386  
2387  	/* release the iags.
2388  	 */
2389  	if (amp)
2390  		release_metapage(amp);
2391  	if (bmp)
2392  		release_metapage(bmp);
2393  	if (cmp)
2394  		release_metapage(cmp);
2395  
2396  	return (rc);
2397  }
2398  
2399  
2400  /*
2401   * NAME:	diNewIAG(imap,iagnop,agno)
2402   *
2403   * FUNCTION:	allocate a new iag for an allocation group.
2404   *
2405   *		first tries to allocate the iag from the inode map
2406   *		iagfree list:
2407   *		if the list has free iags, the head of the list is removed
2408   *		and returned to satisfy the request.
2409   *		if the inode map's iag free list is empty, the inode map
2410   *		is extended to hold a new iag. this new iag is initialized
2411   *		and returned to satisfy the request.
2412   *
2413   * PARAMETERS:
2414   *	imap	- pointer to inode map control structure.
2415   *	iagnop	- pointer to an iag number set with the number of the
2416   *		  newly allocated iag upon successful return.
2417   *	agno	- allocation group number.
2418   *	bpp	- Buffer pointer to be filled in with new IAG's buffer
2419   *
2420   * RETURN VALUES:
2421   *	0	- success.
2422   *	-ENOSPC	- insufficient disk resources.
2423   *	-EIO	- i/o error.
2424   *
2425   * serialization:
2426   *	AG lock held on entry/exit;
2427   *	write lock on the map is held inside;
2428   *	read lock on the map is held on successful completion;
2429   *
2430   * note: new iag transaction:
2431   * . synchronously write iag;
2432   * . write log of xtree and inode of imap;
2433   * . commit;
2434   * . synchronous write of xtree (right to left, bottom to top);
2435   * . at start of logredo(): init in-memory imap with one additional iag page;
2436   * . at end of logredo(): re-read imap inode to determine
2437   *   new imap size;
2438   */
2439  static int
diNewIAG(struct inomap * imap,int * iagnop,int agno,struct metapage ** mpp)2440  diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
2441  {
2442  	int rc;
2443  	int iagno, i, xlen;
2444  	struct inode *ipimap;
2445  	struct super_block *sb;
2446  	struct jfs_sb_info *sbi;
2447  	struct metapage *mp;
2448  	struct iag *iagp;
2449  	s64 xaddr = 0;
2450  	s64 blkno;
2451  	tid_t tid;
2452  	struct inode *iplist[1];
2453  
2454  	/* pick up pointers to the inode map and mount inodes */
2455  	ipimap = imap->im_ipimap;
2456  	sb = ipimap->i_sb;
2457  	sbi = JFS_SBI(sb);
2458  
2459  	/* acquire the free iag lock */
2460  	IAGFREE_LOCK(imap);
2461  
2462  	/* if there are any iags on the inode map free iag list,
2463  	 * allocate the iag from the head of the list.
2464  	 */
2465  	if (imap->im_freeiag >= 0) {
2466  		/* pick up the iag number at the head of the list */
2467  		iagno = imap->im_freeiag;
2468  
2469  		/* determine the logical block number of the iag */
2470  		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2471  	} else {
2472  		/* no free iags. the inode map will have to be extented
2473  		 * to include a new iag.
2474  		 */
2475  
2476  		/* acquire inode map lock */
2477  		IWRITE_LOCK(ipimap, RDWRLOCK_IMAP);
2478  
2479  		if (ipimap->i_size >> L2PSIZE != imap->im_nextiag + 1) {
2480  			IWRITE_UNLOCK(ipimap);
2481  			IAGFREE_UNLOCK(imap);
2482  			jfs_error(imap->im_ipimap->i_sb,
2483  				  "ipimap->i_size is wrong\n");
2484  			return -EIO;
2485  		}
2486  
2487  
2488  		/* get the next available iag number */
2489  		iagno = imap->im_nextiag;
2490  
2491  		/* make sure that we have not exceeded the maximum inode
2492  		 * number limit.
2493  		 */
2494  		if (iagno > (MAXIAGS - 1)) {
2495  			/* release the inode map lock */
2496  			IWRITE_UNLOCK(ipimap);
2497  
2498  			rc = -ENOSPC;
2499  			goto out;
2500  		}
2501  
2502  		/*
2503  		 * synchronously append new iag page.
2504  		 */
2505  		/* determine the logical address of iag page to append */
2506  		blkno = IAGTOLBLK(iagno, sbi->l2nbperpage);
2507  
2508  		/* Allocate extent for new iag page */
2509  		xlen = sbi->nbperpage;
2510  		if ((rc = dbAlloc(ipimap, 0, (s64) xlen, &xaddr))) {
2511  			/* release the inode map lock */
2512  			IWRITE_UNLOCK(ipimap);
2513  
2514  			goto out;
2515  		}
2516  
2517  		/*
2518  		 * start transaction of update of the inode map
2519  		 * addressing structure pointing to the new iag page;
2520  		 */
2521  		tid = txBegin(sb, COMMIT_FORCE);
2522  		mutex_lock(&JFS_IP(ipimap)->commit_mutex);
2523  
2524  		/* update the inode map addressing structure to point to it */
2525  		if ((rc =
2526  		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
2527  			txEnd(tid);
2528  			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2529  			/* Free the blocks allocated for the iag since it was
2530  			 * not successfully added to the inode map
2531  			 */
2532  			dbFree(ipimap, xaddr, (s64) xlen);
2533  
2534  			/* release the inode map lock */
2535  			IWRITE_UNLOCK(ipimap);
2536  
2537  			goto out;
2538  		}
2539  
2540  		/* update the inode map's inode to reflect the extension */
2541  		ipimap->i_size += PSIZE;
2542  		inode_add_bytes(ipimap, PSIZE);
2543  
2544  		/* assign a buffer for the page */
2545  		mp = get_metapage(ipimap, blkno, PSIZE, 0);
2546  		if (!mp) {
2547  			/*
2548  			 * This is very unlikely since we just created the
2549  			 * extent, but let's try to handle it correctly
2550  			 */
2551  			xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
2552  				   COMMIT_PWMAP);
2553  
2554  			txAbort(tid, 0);
2555  			txEnd(tid);
2556  			mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2557  
2558  			/* release the inode map lock */
2559  			IWRITE_UNLOCK(ipimap);
2560  
2561  			rc = -EIO;
2562  			goto out;
2563  		}
2564  		iagp = (struct iag *) mp->data;
2565  
2566  		/* init the iag */
2567  		memset(iagp, 0, sizeof(struct iag));
2568  		iagp->iagnum = cpu_to_le32(iagno);
2569  		iagp->inofreefwd = iagp->inofreeback = cpu_to_le32(-1);
2570  		iagp->extfreefwd = iagp->extfreeback = cpu_to_le32(-1);
2571  		iagp->iagfree = cpu_to_le32(-1);
2572  		iagp->nfreeinos = 0;
2573  		iagp->nfreeexts = cpu_to_le32(EXTSPERIAG);
2574  
2575  		/* initialize the free inode summary map (free extent
2576  		 * summary map initialization handled by bzero).
2577  		 */
2578  		for (i = 0; i < SMAPSZ; i++)
2579  			iagp->inosmap[i] = cpu_to_le32(ONES);
2580  
2581  		/*
2582  		 * Write and sync the metapage
2583  		 */
2584  		flush_metapage(mp);
2585  
2586  		/*
2587  		 * txCommit(COMMIT_FORCE) will synchronously write address
2588  		 * index pages and inode after commit in careful update order
2589  		 * of address index pages (right to left, bottom up);
2590  		 */
2591  		iplist[0] = ipimap;
2592  		rc = txCommit(tid, 1, &iplist[0], COMMIT_FORCE);
2593  
2594  		txEnd(tid);
2595  		mutex_unlock(&JFS_IP(ipimap)->commit_mutex);
2596  
2597  		duplicateIXtree(sb, blkno, xlen, &xaddr);
2598  
2599  		/* update the next available iag number */
2600  		imap->im_nextiag += 1;
2601  
2602  		/* Add the iag to the iag free list so we don't lose the iag
2603  		 * if a failure happens now.
2604  		 */
2605  		imap->im_freeiag = iagno;
2606  
2607  		/* Until we have logredo working, we want the imap inode &
2608  		 * control page to be up to date.
2609  		 */
2610  		diSync(ipimap);
2611  
2612  		/* release the inode map lock */
2613  		IWRITE_UNLOCK(ipimap);
2614  	}
2615  
2616  	/* obtain read lock on map */
2617  	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2618  
2619  	/* read the iag */
2620  	if ((rc = diIAGRead(imap, iagno, &mp))) {
2621  		IREAD_UNLOCK(ipimap);
2622  		rc = -EIO;
2623  		goto out;
2624  	}
2625  	iagp = (struct iag *) mp->data;
2626  
2627  	/* remove the iag from the iag free list */
2628  	imap->im_freeiag = le32_to_cpu(iagp->iagfree);
2629  	iagp->iagfree = cpu_to_le32(-1);
2630  
2631  	/* set the return iag number and buffer pointer */
2632  	*iagnop = iagno;
2633  	*mpp = mp;
2634  
2635        out:
2636  	/* release the iag free lock */
2637  	IAGFREE_UNLOCK(imap);
2638  
2639  	return (rc);
2640  }
2641  
2642  /*
2643   * NAME:	diIAGRead()
2644   *
2645   * FUNCTION:	get the buffer for the specified iag within a fileset
2646   *		or aggregate inode map.
2647   *
2648   * PARAMETERS:
2649   *	imap	- pointer to inode map control structure.
2650   *	iagno	- iag number.
2651   *	bpp	- point to buffer pointer to be filled in on successful
2652   *		  exit.
2653   *
2654   * SERIALIZATION:
2655   *	must have read lock on imap inode
2656   *	(When called by diExtendFS, the filesystem is quiesced, therefore
2657   *	 the read lock is unnecessary.)
2658   *
2659   * RETURN VALUES:
2660   *	0	- success.
2661   *	-EIO	- i/o error.
2662   */
diIAGRead(struct inomap * imap,int iagno,struct metapage ** mpp)2663  static int diIAGRead(struct inomap * imap, int iagno, struct metapage ** mpp)
2664  {
2665  	struct inode *ipimap = imap->im_ipimap;
2666  	s64 blkno;
2667  
2668  	/* compute the logical block number of the iag. */
2669  	blkno = IAGTOLBLK(iagno, JFS_SBI(ipimap->i_sb)->l2nbperpage);
2670  
2671  	/* read the iag. */
2672  	*mpp = read_metapage(ipimap, blkno, PSIZE, 0);
2673  	if (*mpp == NULL) {
2674  		return -EIO;
2675  	}
2676  
2677  	return (0);
2678  }
2679  
2680  /*
2681   * NAME:	diFindFree()
2682   *
2683   * FUNCTION:	find the first free bit in a word starting at
2684   *		the specified bit position.
2685   *
2686   * PARAMETERS:
2687   *	word	- word to be examined.
2688   *	start	- starting bit position.
2689   *
2690   * RETURN VALUES:
2691   *	bit position of first free bit in the word or 32 if
2692   *	no free bits were found.
2693   */
diFindFree(u32 word,int start)2694  static int diFindFree(u32 word, int start)
2695  {
2696  	int bitno;
2697  	assert(start < 32);
2698  	/* scan the word for the first free bit. */
2699  	for (word <<= start, bitno = start; bitno < 32;
2700  	     bitno++, word <<= 1) {
2701  		if ((word & HIGHORDER) == 0)
2702  			break;
2703  	}
2704  	return (bitno);
2705  }
2706  
2707  /*
2708   * NAME:	diUpdatePMap()
2709   *
2710   * FUNCTION: Update the persistent map in an IAG for the allocation or
2711   *	freeing of the specified inode.
2712   *
2713   * PRE CONDITIONS: Working map has already been updated for allocate.
2714   *
2715   * PARAMETERS:
2716   *	ipimap	- Incore inode map inode
2717   *	inum	- Number of inode to mark in permanent map
2718   *	is_free	- If 'true' indicates inode should be marked freed, otherwise
2719   *		  indicates inode should be marked allocated.
2720   *
2721   * RETURN VALUES:
2722   *		0 for success
2723   */
2724  int
diUpdatePMap(struct inode * ipimap,unsigned long inum,bool is_free,struct tblock * tblk)2725  diUpdatePMap(struct inode *ipimap,
2726  	     unsigned long inum, bool is_free, struct tblock * tblk)
2727  {
2728  	int rc;
2729  	struct iag *iagp;
2730  	struct metapage *mp;
2731  	int iagno, ino, extno, bitno;
2732  	struct inomap *imap;
2733  	u32 mask;
2734  	struct jfs_log *log;
2735  	int lsn, difft, diffp;
2736  	unsigned long flags;
2737  
2738  	imap = JFS_IP(ipimap)->i_imap;
2739  	/* get the iag number containing the inode */
2740  	iagno = INOTOIAG(inum);
2741  	/* make sure that the iag is contained within the map */
2742  	if (iagno >= imap->im_nextiag) {
2743  		jfs_error(ipimap->i_sb, "the iag is outside the map\n");
2744  		return -EIO;
2745  	}
2746  	/* read the iag */
2747  	IREAD_LOCK(ipimap, RDWRLOCK_IMAP);
2748  	rc = diIAGRead(imap, iagno, &mp);
2749  	IREAD_UNLOCK(ipimap);
2750  	if (rc)
2751  		return (rc);
2752  	metapage_wait_for_io(mp);
2753  	iagp = (struct iag *) mp->data;
2754  	/* get the inode number and extent number of the inode within
2755  	 * the iag and the inode number within the extent.
2756  	 */
2757  	ino = inum & (INOSPERIAG - 1);
2758  	extno = ino >> L2INOSPEREXT;
2759  	bitno = ino & (INOSPEREXT - 1);
2760  	mask = HIGHORDER >> bitno;
2761  	/*
2762  	 * mark the inode free in persistent map:
2763  	 */
2764  	if (is_free) {
2765  		/* The inode should have been allocated both in working
2766  		 * map and in persistent map;
2767  		 * the inode will be freed from working map at the release
2768  		 * of last reference release;
2769  		 */
2770  		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2771  			jfs_error(ipimap->i_sb,
2772  				  "inode %ld not marked as allocated in wmap!\n",
2773  				  inum);
2774  		}
2775  		if (!(le32_to_cpu(iagp->pmap[extno]) & mask)) {
2776  			jfs_error(ipimap->i_sb,
2777  				  "inode %ld not marked as allocated in pmap!\n",
2778  				  inum);
2779  		}
2780  		/* update the bitmap for the extent of the freed inode */
2781  		iagp->pmap[extno] &= cpu_to_le32(~mask);
2782  	}
2783  	/*
2784  	 * mark the inode allocated in persistent map:
2785  	 */
2786  	else {
2787  		/* The inode should be already allocated in the working map
2788  		 * and should be free in persistent map;
2789  		 */
2790  		if (!(le32_to_cpu(iagp->wmap[extno]) & mask)) {
2791  			release_metapage(mp);
2792  			jfs_error(ipimap->i_sb,
2793  				  "the inode is not allocated in the working map\n");
2794  			return -EIO;
2795  		}
2796  		if ((le32_to_cpu(iagp->pmap[extno]) & mask) != 0) {
2797  			release_metapage(mp);
2798  			jfs_error(ipimap->i_sb,
2799  				  "the inode is not free in the persistent map\n");
2800  			return -EIO;
2801  		}
2802  		/* update the bitmap for the extent of the allocated inode */
2803  		iagp->pmap[extno] |= cpu_to_le32(mask);
2804  	}
2805  	/*
2806  	 * update iag lsn
2807  	 */
2808  	lsn = tblk->lsn;
2809  	log = JFS_SBI(tblk->sb)->log;
2810  	LOGSYNC_LOCK(log, flags);
2811  	if (mp->lsn != 0) {
2812  		/* inherit older/smaller lsn */
2813  		logdiff(difft, lsn, log);
2814  		logdiff(diffp, mp->lsn, log);
2815  		if (difft < diffp) {
2816  			mp->lsn = lsn;
2817  			/* move mp after tblock in logsync list */
2818  			list_move(&mp->synclist, &tblk->synclist);
2819  		}
2820  		/* inherit younger/larger clsn */
2821  		assert(mp->clsn);
2822  		logdiff(difft, tblk->clsn, log);
2823  		logdiff(diffp, mp->clsn, log);
2824  		if (difft > diffp)
2825  			mp->clsn = tblk->clsn;
2826  	} else {
2827  		mp->log = log;
2828  		mp->lsn = lsn;
2829  		/* insert mp after tblock in logsync list */
2830  		log->count++;
2831  		list_add(&mp->synclist, &tblk->synclist);
2832  		mp->clsn = tblk->clsn;
2833  	}
2834  	LOGSYNC_UNLOCK(log, flags);
2835  	write_metapage(mp);
2836  	return (0);
2837  }
2838  
2839  /*
2840   *	diExtendFS()
2841   *
2842   * function: update imap for extendfs();
2843   *
2844   * note: AG size has been increased s.t. each k old contiguous AGs are
2845   * coalesced into a new AG;
2846   */
diExtendFS(struct inode * ipimap,struct inode * ipbmap)2847  int diExtendFS(struct inode *ipimap, struct inode *ipbmap)
2848  {
2849  	int rc, rcx = 0;
2850  	struct inomap *imap = JFS_IP(ipimap)->i_imap;
2851  	struct iag *iagp = NULL, *hiagp = NULL;
2852  	struct bmap *mp = JFS_SBI(ipbmap->i_sb)->bmap;
2853  	struct metapage *bp, *hbp;
2854  	int i, n, head;
2855  	int numinos, xnuminos = 0, xnumfree = 0;
2856  	s64 agstart;
2857  
2858  	jfs_info("diExtendFS: nextiag:%d numinos:%d numfree:%d",
2859  		   imap->im_nextiag, atomic_read(&imap->im_numinos),
2860  		   atomic_read(&imap->im_numfree));
2861  
2862  	/*
2863  	 *	reconstruct imap
2864  	 *
2865  	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
2866  	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
2867  	 * note: new AG size = old AG size * (2**x).
2868  	 */
2869  
2870  	/* init per AG control information im_agctl[] */
2871  	for (i = 0; i < MAXAG; i++) {
2872  		imap->im_agctl[i].inofree = -1;
2873  		imap->im_agctl[i].extfree = -1;
2874  		imap->im_agctl[i].numinos = 0;	/* number of backed inodes */
2875  		imap->im_agctl[i].numfree = 0;	/* number of free backed inodes */
2876  	}
2877  
2878  	/*
2879  	 *	process each iag page of the map.
2880  	 *
2881  	 * rebuild AG Free Inode List, AG Free Inode Extent List;
2882  	 */
2883  	for (i = 0; i < imap->im_nextiag; i++) {
2884  		if ((rc = diIAGRead(imap, i, &bp))) {
2885  			rcx = rc;
2886  			continue;
2887  		}
2888  		iagp = (struct iag *) bp->data;
2889  		if (le32_to_cpu(iagp->iagnum) != i) {
2890  			release_metapage(bp);
2891  			jfs_error(ipimap->i_sb, "unexpected value of iagnum\n");
2892  			return -EIO;
2893  		}
2894  
2895  		/* leave free iag in the free iag list */
2896  		if (iagp->nfreeexts == cpu_to_le32(EXTSPERIAG)) {
2897  			release_metapage(bp);
2898  			continue;
2899  		}
2900  
2901  		agstart = le64_to_cpu(iagp->agstart);
2902  		n = agstart >> mp->db_agl2size;
2903  		iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
2904  
2905  		/* compute backed inodes */
2906  		numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
2907  		    << L2INOSPEREXT;
2908  		if (numinos > 0) {
2909  			/* merge AG backed inodes */
2910  			imap->im_agctl[n].numinos += numinos;
2911  			xnuminos += numinos;
2912  		}
2913  
2914  		/* if any backed free inodes, insert at AG free inode list */
2915  		if ((int) le32_to_cpu(iagp->nfreeinos) > 0) {
2916  			if ((head = imap->im_agctl[n].inofree) == -1) {
2917  				iagp->inofreefwd = cpu_to_le32(-1);
2918  				iagp->inofreeback = cpu_to_le32(-1);
2919  			} else {
2920  				if ((rc = diIAGRead(imap, head, &hbp))) {
2921  					rcx = rc;
2922  					goto nextiag;
2923  				}
2924  				hiagp = (struct iag *) hbp->data;
2925  				hiagp->inofreeback = iagp->iagnum;
2926  				iagp->inofreefwd = cpu_to_le32(head);
2927  				iagp->inofreeback = cpu_to_le32(-1);
2928  				write_metapage(hbp);
2929  			}
2930  
2931  			imap->im_agctl[n].inofree =
2932  			    le32_to_cpu(iagp->iagnum);
2933  
2934  			/* merge AG backed free inodes */
2935  			imap->im_agctl[n].numfree +=
2936  			    le32_to_cpu(iagp->nfreeinos);
2937  			xnumfree += le32_to_cpu(iagp->nfreeinos);
2938  		}
2939  
2940  		/* if any free extents, insert at AG free extent list */
2941  		if (le32_to_cpu(iagp->nfreeexts) > 0) {
2942  			if ((head = imap->im_agctl[n].extfree) == -1) {
2943  				iagp->extfreefwd = cpu_to_le32(-1);
2944  				iagp->extfreeback = cpu_to_le32(-1);
2945  			} else {
2946  				if ((rc = diIAGRead(imap, head, &hbp))) {
2947  					rcx = rc;
2948  					goto nextiag;
2949  				}
2950  				hiagp = (struct iag *) hbp->data;
2951  				hiagp->extfreeback = iagp->iagnum;
2952  				iagp->extfreefwd = cpu_to_le32(head);
2953  				iagp->extfreeback = cpu_to_le32(-1);
2954  				write_metapage(hbp);
2955  			}
2956  
2957  			imap->im_agctl[n].extfree =
2958  			    le32_to_cpu(iagp->iagnum);
2959  		}
2960  
2961  	      nextiag:
2962  		write_metapage(bp);
2963  	}
2964  
2965  	if (xnuminos != atomic_read(&imap->im_numinos) ||
2966  	    xnumfree != atomic_read(&imap->im_numfree)) {
2967  		jfs_error(ipimap->i_sb, "numinos or numfree incorrect\n");
2968  		return -EIO;
2969  	}
2970  
2971  	return rcx;
2972  }
2973  
2974  
2975  /*
2976   *	duplicateIXtree()
2977   *
2978   * serialization: IWRITE_LOCK held on entry/exit
2979   *
2980   * note: shadow page with regular inode (rel.2);
2981   */
duplicateIXtree(struct super_block * sb,s64 blkno,int xlen,s64 * xaddr)2982  static void duplicateIXtree(struct super_block *sb, s64 blkno,
2983  			    int xlen, s64 *xaddr)
2984  {
2985  	struct jfs_superblock *j_sb;
2986  	struct buffer_head *bh;
2987  	struct inode *ip;
2988  	tid_t tid;
2989  
2990  	/* if AIT2 ipmap2 is bad, do not try to update it */
2991  	if (JFS_SBI(sb)->mntflag & JFS_BAD_SAIT)	/* s_flag */
2992  		return;
2993  	ip = diReadSpecial(sb, FILESYSTEM_I, 1);
2994  	if (ip == NULL) {
2995  		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
2996  		if (readSuper(sb, &bh))
2997  			return;
2998  		j_sb = (struct jfs_superblock *)bh->b_data;
2999  		j_sb->s_flag |= cpu_to_le32(JFS_BAD_SAIT);
3000  
3001  		mark_buffer_dirty(bh);
3002  		sync_dirty_buffer(bh);
3003  		brelse(bh);
3004  		return;
3005  	}
3006  
3007  	/* start transaction */
3008  	tid = txBegin(sb, COMMIT_FORCE);
3009  	/* update the inode map addressing structure to point to it */
3010  	if (xtInsert(tid, ip, 0, blkno, xlen, xaddr, 0)) {
3011  		JFS_SBI(sb)->mntflag |= JFS_BAD_SAIT;
3012  		txAbort(tid, 1);
3013  		goto cleanup;
3014  
3015  	}
3016  	/* update the inode map's inode to reflect the extension */
3017  	ip->i_size += PSIZE;
3018  	inode_add_bytes(ip, PSIZE);
3019  	txCommit(tid, 1, &ip, COMMIT_FORCE);
3020        cleanup:
3021  	txEnd(tid);
3022  	diFreeSpecial(ip);
3023  }
3024  
3025  /*
3026   * NAME:	copy_from_dinode()
3027   *
3028   * FUNCTION:	Copies inode info from disk inode to in-memory inode
3029   *
3030   * RETURN VALUES:
3031   *	0	- success
3032   *	-ENOMEM	- insufficient memory
3033   */
copy_from_dinode(struct dinode * dip,struct inode * ip)3034  static int copy_from_dinode(struct dinode * dip, struct inode *ip)
3035  {
3036  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3037  	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3038  
3039  	jfs_ip->fileset = le32_to_cpu(dip->di_fileset);
3040  	jfs_ip->mode2 = le32_to_cpu(dip->di_mode);
3041  	jfs_set_inode_flags(ip);
3042  
3043  	ip->i_mode = le32_to_cpu(dip->di_mode) & 0xffff;
3044  	if (sbi->umask != -1) {
3045  		ip->i_mode = (ip->i_mode & ~0777) | (0777 & ~sbi->umask);
3046  		/* For directories, add x permission if r is allowed by umask */
3047  		if (S_ISDIR(ip->i_mode)) {
3048  			if (ip->i_mode & 0400)
3049  				ip->i_mode |= 0100;
3050  			if (ip->i_mode & 0040)
3051  				ip->i_mode |= 0010;
3052  			if (ip->i_mode & 0004)
3053  				ip->i_mode |= 0001;
3054  		}
3055  	}
3056  	set_nlink(ip, le32_to_cpu(dip->di_nlink));
3057  
3058  	jfs_ip->saved_uid = make_kuid(&init_user_ns, le32_to_cpu(dip->di_uid));
3059  	if (!uid_valid(sbi->uid))
3060  		ip->i_uid = jfs_ip->saved_uid;
3061  	else {
3062  		ip->i_uid = sbi->uid;
3063  	}
3064  
3065  	jfs_ip->saved_gid = make_kgid(&init_user_ns, le32_to_cpu(dip->di_gid));
3066  	if (!gid_valid(sbi->gid))
3067  		ip->i_gid = jfs_ip->saved_gid;
3068  	else {
3069  		ip->i_gid = sbi->gid;
3070  	}
3071  
3072  	ip->i_size = le64_to_cpu(dip->di_size);
3073  	ip->i_atime.tv_sec = le32_to_cpu(dip->di_atime.tv_sec);
3074  	ip->i_atime.tv_nsec = le32_to_cpu(dip->di_atime.tv_nsec);
3075  	ip->i_mtime.tv_sec = le32_to_cpu(dip->di_mtime.tv_sec);
3076  	ip->i_mtime.tv_nsec = le32_to_cpu(dip->di_mtime.tv_nsec);
3077  	inode_set_ctime(ip, le32_to_cpu(dip->di_ctime.tv_sec),
3078  			le32_to_cpu(dip->di_ctime.tv_nsec));
3079  	ip->i_blocks = LBLK2PBLK(ip->i_sb, le64_to_cpu(dip->di_nblocks));
3080  	ip->i_generation = le32_to_cpu(dip->di_gen);
3081  
3082  	jfs_ip->ixpxd = dip->di_ixpxd;	/* in-memory pxd's are little-endian */
3083  	jfs_ip->acl = dip->di_acl;	/* as are dxd's */
3084  	jfs_ip->ea = dip->di_ea;
3085  	jfs_ip->next_index = le32_to_cpu(dip->di_next_index);
3086  	jfs_ip->otime = le32_to_cpu(dip->di_otime.tv_sec);
3087  	jfs_ip->acltype = le32_to_cpu(dip->di_acltype);
3088  
3089  	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode)) {
3090  		jfs_ip->dev = le32_to_cpu(dip->di_rdev);
3091  		ip->i_rdev = new_decode_dev(jfs_ip->dev);
3092  	}
3093  
3094  	if (S_ISDIR(ip->i_mode)) {
3095  		memcpy(&jfs_ip->u.dir, &dip->u._dir, 384);
3096  	} else if (S_ISREG(ip->i_mode) || S_ISLNK(ip->i_mode)) {
3097  		memcpy(&jfs_ip->i_xtroot, &dip->di_xtroot, 288);
3098  	} else
3099  		memcpy(&jfs_ip->i_inline_ea, &dip->di_inlineea, 128);
3100  
3101  	/* Zero the in-memory-only stuff */
3102  	jfs_ip->cflag = 0;
3103  	jfs_ip->btindex = 0;
3104  	jfs_ip->btorder = 0;
3105  	jfs_ip->bxflag = 0;
3106  	jfs_ip->blid = 0;
3107  	jfs_ip->atlhead = 0;
3108  	jfs_ip->atltail = 0;
3109  	jfs_ip->xtlid = 0;
3110  	return (0);
3111  }
3112  
3113  /*
3114   * NAME:	copy_to_dinode()
3115   *
3116   * FUNCTION:	Copies inode info from in-memory inode to disk inode
3117   */
copy_to_dinode(struct dinode * dip,struct inode * ip)3118  static void copy_to_dinode(struct dinode * dip, struct inode *ip)
3119  {
3120  	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
3121  	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
3122  
3123  	dip->di_fileset = cpu_to_le32(jfs_ip->fileset);
3124  	dip->di_inostamp = cpu_to_le32(sbi->inostamp);
3125  	dip->di_number = cpu_to_le32(ip->i_ino);
3126  	dip->di_gen = cpu_to_le32(ip->i_generation);
3127  	dip->di_size = cpu_to_le64(ip->i_size);
3128  	dip->di_nblocks = cpu_to_le64(PBLK2LBLK(ip->i_sb, ip->i_blocks));
3129  	dip->di_nlink = cpu_to_le32(ip->i_nlink);
3130  	if (!uid_valid(sbi->uid))
3131  		dip->di_uid = cpu_to_le32(i_uid_read(ip));
3132  	else
3133  		dip->di_uid =cpu_to_le32(from_kuid(&init_user_ns,
3134  						   jfs_ip->saved_uid));
3135  	if (!gid_valid(sbi->gid))
3136  		dip->di_gid = cpu_to_le32(i_gid_read(ip));
3137  	else
3138  		dip->di_gid = cpu_to_le32(from_kgid(&init_user_ns,
3139  						    jfs_ip->saved_gid));
3140  	/*
3141  	 * mode2 is only needed for storing the higher order bits.
3142  	 * Trust i_mode for the lower order ones
3143  	 */
3144  	if (sbi->umask == -1)
3145  		dip->di_mode = cpu_to_le32((jfs_ip->mode2 & 0xffff0000) |
3146  					   ip->i_mode);
3147  	else /* Leave the original permissions alone */
3148  		dip->di_mode = cpu_to_le32(jfs_ip->mode2);
3149  
3150  	dip->di_atime.tv_sec = cpu_to_le32(ip->i_atime.tv_sec);
3151  	dip->di_atime.tv_nsec = cpu_to_le32(ip->i_atime.tv_nsec);
3152  	dip->di_ctime.tv_sec = cpu_to_le32(inode_get_ctime(ip).tv_sec);
3153  	dip->di_ctime.tv_nsec = cpu_to_le32(inode_get_ctime(ip).tv_nsec);
3154  	dip->di_mtime.tv_sec = cpu_to_le32(ip->i_mtime.tv_sec);
3155  	dip->di_mtime.tv_nsec = cpu_to_le32(ip->i_mtime.tv_nsec);
3156  	dip->di_ixpxd = jfs_ip->ixpxd;	/* in-memory pxd's are little-endian */
3157  	dip->di_acl = jfs_ip->acl;	/* as are dxd's */
3158  	dip->di_ea = jfs_ip->ea;
3159  	dip->di_next_index = cpu_to_le32(jfs_ip->next_index);
3160  	dip->di_otime.tv_sec = cpu_to_le32(jfs_ip->otime);
3161  	dip->di_otime.tv_nsec = 0;
3162  	dip->di_acltype = cpu_to_le32(jfs_ip->acltype);
3163  	if (S_ISCHR(ip->i_mode) || S_ISBLK(ip->i_mode))
3164  		dip->di_rdev = cpu_to_le32(jfs_ip->dev);
3165  }
3166