xref: /openbmc/linux/fs/jfs/jfs_dmap.c (revision 278002edb19bce2c628fafb0af936e77000f3a5b)
11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
21da177e4SLinus Torvalds /*
31da177e4SLinus Torvalds  *   Copyright (C) International Business Machines Corp., 2000-2004
4b40c2e66STino Reichardt  *   Portions Copyright (C) Tino Reichardt, 2012
51da177e4SLinus Torvalds  */
61da177e4SLinus Torvalds 
71da177e4SLinus Torvalds #include <linux/fs.h>
85a0e3ad6STejun Heo #include <linux/slab.h>
91da177e4SLinus Torvalds #include "jfs_incore.h"
101da177e4SLinus Torvalds #include "jfs_superblock.h"
111da177e4SLinus Torvalds #include "jfs_dmap.h"
121da177e4SLinus Torvalds #include "jfs_imap.h"
131da177e4SLinus Torvalds #include "jfs_lock.h"
141da177e4SLinus Torvalds #include "jfs_metapage.h"
151da177e4SLinus Torvalds #include "jfs_debug.h"
16b40c2e66STino Reichardt #include "jfs_discard.h"
171da177e4SLinus Torvalds 
181da177e4SLinus Torvalds /*
191da177e4SLinus Torvalds  *	SERIALIZATION of the Block Allocation Map.
201da177e4SLinus Torvalds  *
211da177e4SLinus Torvalds  *	the working state of the block allocation map is accessed in
221da177e4SLinus Torvalds  *	two directions:
231da177e4SLinus Torvalds  *
241da177e4SLinus Torvalds  *	1) allocation and free requests that start at the dmap
251da177e4SLinus Torvalds  *	   level and move up through the dmap control pages (i.e.
261da177e4SLinus Torvalds  *	   the vast majority of requests).
271da177e4SLinus Torvalds  *
281da177e4SLinus Torvalds  *	2) allocation requests that start at dmap control page
291da177e4SLinus Torvalds  *	   level and work down towards the dmaps.
301da177e4SLinus Torvalds  *
311da177e4SLinus Torvalds  *	the serialization scheme used here is as follows.
321da177e4SLinus Torvalds  *
331da177e4SLinus Torvalds  *	requests which start at the bottom are serialized against each
341da177e4SLinus Torvalds  *	other through buffers and each requests holds onto its buffers
351da177e4SLinus Torvalds  *	as it works it way up from a single dmap to the required level
361da177e4SLinus Torvalds  *	of dmap control page.
371da177e4SLinus Torvalds  *	requests that start at the top are serialized against each other
381da177e4SLinus Torvalds  *	and request that start from the bottom by the multiple read/single
391da177e4SLinus Torvalds  *	write inode lock of the bmap inode. requests starting at the top
401da177e4SLinus Torvalds  *	take this lock in write mode while request starting at the bottom
411da177e4SLinus Torvalds  *	take the lock in read mode.  a single top-down request may proceed
421da177e4SLinus Torvalds  *	exclusively while multiple bottoms-up requests may proceed
431da177e4SLinus Torvalds  *	simultaneously (under the protection of busy buffers).
441da177e4SLinus Torvalds  *
451da177e4SLinus Torvalds  *	in addition to information found in dmaps and dmap control pages,
461da177e4SLinus Torvalds  *	the working state of the block allocation map also includes read/
471da177e4SLinus Torvalds  *	write information maintained in the bmap descriptor (i.e. total
481da177e4SLinus Torvalds  *	free block count, allocation group level free block counts).
491da177e4SLinus Torvalds  *	a single exclusive lock (BMAP_LOCK) is used to guard this information
501da177e4SLinus Torvalds  *	in the face of multiple-bottoms up requests.
511da177e4SLinus Torvalds  *	(lock ordering: IREAD_LOCK, BMAP_LOCK);
521da177e4SLinus Torvalds  *
531da177e4SLinus Torvalds  *	accesses to the persistent state of the block allocation map (limited
541da177e4SLinus Torvalds  *	to the persistent bitmaps in dmaps) is guarded by (busy) buffers.
551da177e4SLinus Torvalds  */
561da177e4SLinus Torvalds 
571de87444SIngo Molnar #define BMAP_LOCK_INIT(bmp)	mutex_init(&bmp->db_bmaplock)
581de87444SIngo Molnar #define BMAP_LOCK(bmp)		mutex_lock(&bmp->db_bmaplock)
591de87444SIngo Molnar #define BMAP_UNLOCK(bmp)	mutex_unlock(&bmp->db_bmaplock)
601da177e4SLinus Torvalds 
611da177e4SLinus Torvalds /*
621da177e4SLinus Torvalds  * forward references
631da177e4SLinus Torvalds  */
641da177e4SLinus Torvalds static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
651da177e4SLinus Torvalds 			int nblocks);
662e16a138SManas Ghandat static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl);
672e16a138SManas Ghandat static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl);
682e16a138SManas Ghandat static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl);
692e16a138SManas Ghandat static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl);
701da177e4SLinus Torvalds static int dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc,
711da177e4SLinus Torvalds 		    int level);
721da177e4SLinus Torvalds static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results);
731da177e4SLinus Torvalds static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
741da177e4SLinus Torvalds 		       int nblocks);
751da177e4SLinus Torvalds static int dbAllocNear(struct bmap * bmp, struct dmap * dp, s64 blkno,
761da177e4SLinus Torvalds 		       int nblocks,
771da177e4SLinus Torvalds 		       int l2nb, s64 * results);
781da177e4SLinus Torvalds static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
791da177e4SLinus Torvalds 		       int nblocks);
801da177e4SLinus Torvalds static int dbAllocDmapLev(struct bmap * bmp, struct dmap * dp, int nblocks,
811da177e4SLinus Torvalds 			  int l2nb,
821da177e4SLinus Torvalds 			  s64 * results);
831da177e4SLinus Torvalds static int dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb,
841da177e4SLinus Torvalds 		     s64 * results);
851da177e4SLinus Torvalds static int dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno,
861da177e4SLinus Torvalds 		      s64 * results);
871da177e4SLinus Torvalds static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks);
881da177e4SLinus Torvalds static int dbFindBits(u32 word, int l2nb);
891da177e4SLinus Torvalds static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno);
9087c681abSManas Ghandat static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl);
9156d12549SDave Kleikamp static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
921da177e4SLinus Torvalds 		      int nblocks);
931da177e4SLinus Torvalds static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
941da177e4SLinus Torvalds 		      int nblocks);
951da177e4SLinus Torvalds static int dbMaxBud(u8 * cp);
961da177e4SLinus Torvalds static int blkstol2(s64 nb);
971da177e4SLinus Torvalds 
981da177e4SLinus Torvalds static int cntlz(u32 value);
991da177e4SLinus Torvalds static int cnttz(u32 word);
1001da177e4SLinus Torvalds 
1011da177e4SLinus Torvalds static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
1021da177e4SLinus Torvalds 			 int nblocks);
1031da177e4SLinus Torvalds static int dbInitDmap(struct dmap * dp, s64 blkno, int nblocks);
1041da177e4SLinus Torvalds static int dbInitDmapTree(struct dmap * dp);
1051da177e4SLinus Torvalds static int dbInitTree(struct dmaptree * dtp);
1061da177e4SLinus Torvalds static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i);
1071da177e4SLinus Torvalds static int dbGetL2AGSize(s64 nblocks);
1081da177e4SLinus Torvalds 
1091da177e4SLinus Torvalds /*
1101da177e4SLinus Torvalds  *	buddy table
1111da177e4SLinus Torvalds  *
1121da177e4SLinus Torvalds  * table used for determining buddy sizes within characters of
1131da177e4SLinus Torvalds  * dmap bitmap words.  the characters themselves serve as indexes
1141da177e4SLinus Torvalds  * into the table, with the table elements yielding the maximum
1151da177e4SLinus Torvalds  * binary buddy of free bits within the character.
1161da177e4SLinus Torvalds  */
1174d5dbd09SArjan van de Ven static const s8 budtab[256] = {
1181da177e4SLinus Torvalds 	3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1191da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1201da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1211da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1221da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1231da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1241da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1251da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1261da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1271da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1281da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1291da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1301da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1311da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1321da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1331da177e4SLinus Torvalds 	2, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, -1
1341da177e4SLinus Torvalds };
1351da177e4SLinus Torvalds 
1361da177e4SLinus Torvalds /*
1371da177e4SLinus Torvalds  * NAME:	dbMount()
1381da177e4SLinus Torvalds  *
1391da177e4SLinus Torvalds  * FUNCTION:	initializate the block allocation map.
1401da177e4SLinus Torvalds  *
1411da177e4SLinus Torvalds  *		memory is allocated for the in-core bmap descriptor and
1421da177e4SLinus Torvalds  *		the in-core descriptor is initialized from disk.
1431da177e4SLinus Torvalds  *
1441da177e4SLinus Torvalds  * PARAMETERS:
1451da177e4SLinus Torvalds  *	ipbmap	- pointer to in-core inode for the block map.
1461da177e4SLinus Torvalds  *
1471da177e4SLinus Torvalds  * RETURN VALUES:
1481da177e4SLinus Torvalds  *	0	- success
1491da177e4SLinus Torvalds  *	-ENOMEM	- insufficient memory
1501da177e4SLinus Torvalds  *	-EIO	- i/o error
1512cc7cc01SPavel Skripkin  *	-EINVAL - wrong bmap data
1521da177e4SLinus Torvalds  */
dbMount(struct inode * ipbmap)1531da177e4SLinus Torvalds int dbMount(struct inode *ipbmap)
1541da177e4SLinus Torvalds {
1551da177e4SLinus Torvalds 	struct bmap *bmp;
1561da177e4SLinus Torvalds 	struct dbmap_disk *dbmp_le;
1571da177e4SLinus Torvalds 	struct metapage *mp;
158898f7066SDongliang Mu 	int i, err;
1591da177e4SLinus Torvalds 
1601da177e4SLinus Torvalds 	/*
1611da177e4SLinus Torvalds 	 * allocate/initialize the in-memory bmap descriptor
1621da177e4SLinus Torvalds 	 */
1631da177e4SLinus Torvalds 	/* allocate memory for the in-memory bmap descriptor */
1641da177e4SLinus Torvalds 	bmp = kmalloc(sizeof(struct bmap), GFP_KERNEL);
1651da177e4SLinus Torvalds 	if (bmp == NULL)
1661da177e4SLinus Torvalds 		return -ENOMEM;
1671da177e4SLinus Torvalds 
1681da177e4SLinus Torvalds 	/* read the on-disk bmap descriptor. */
1691da177e4SLinus Torvalds 	mp = read_metapage(ipbmap,
1701da177e4SLinus Torvalds 			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
1711da177e4SLinus Torvalds 			   PSIZE, 0);
1721da177e4SLinus Torvalds 	if (mp == NULL) {
173898f7066SDongliang Mu 		err = -EIO;
174898f7066SDongliang Mu 		goto err_kfree_bmp;
1751da177e4SLinus Torvalds 	}
1761da177e4SLinus Torvalds 
1771da177e4SLinus Torvalds 	/* copy the on-disk bmap descriptor to its in-memory version. */
1781da177e4SLinus Torvalds 	dbmp_le = (struct dbmap_disk *) mp->data;
1791da177e4SLinus Torvalds 	bmp->db_mapsize = le64_to_cpu(dbmp_le->dn_mapsize);
1801da177e4SLinus Torvalds 	bmp->db_nfree = le64_to_cpu(dbmp_le->dn_nfree);
18111509910SSiddh Raman Pant 
1821da177e4SLinus Torvalds 	bmp->db_l2nbperpage = le32_to_cpu(dbmp_le->dn_l2nbperpage);
1831a7c53fdSJuntong Deng 	if (bmp->db_l2nbperpage > L2PSIZE - L2MINBLOCKSIZE ||
1841a7c53fdSJuntong Deng 		bmp->db_l2nbperpage < 0) {
18511509910SSiddh Raman Pant 		err = -EINVAL;
18611509910SSiddh Raman Pant 		goto err_release_metapage;
18711509910SSiddh Raman Pant 	}
18811509910SSiddh Raman Pant 
1891da177e4SLinus Torvalds 	bmp->db_numag = le32_to_cpu(dbmp_le->dn_numag);
190f475d8a0SDave Kleikamp 	if (!bmp->db_numag || bmp->db_numag > MAXAG) {
191898f7066SDongliang Mu 		err = -EINVAL;
192898f7066SDongliang Mu 		goto err_release_metapage;
1932cc7cc01SPavel Skripkin 	}
1942cc7cc01SPavel Skripkin 
1951da177e4SLinus Torvalds 	bmp->db_maxlevel = le32_to_cpu(dbmp_le->dn_maxlevel);
1961da177e4SLinus Torvalds 	bmp->db_maxag = le32_to_cpu(dbmp_le->dn_maxag);
1971da177e4SLinus Torvalds 	bmp->db_agpref = le32_to_cpu(dbmp_le->dn_agpref);
1982323de34SJuntong Deng 	if (bmp->db_maxag >= MAXAG || bmp->db_maxag < 0 ||
1992323de34SJuntong Deng 		bmp->db_agpref >= MAXAG || bmp->db_agpref < 0) {
2002323de34SJuntong Deng 		err = -EINVAL;
2012323de34SJuntong Deng 		goto err_release_metapage;
2022323de34SJuntong Deng 	}
2032323de34SJuntong Deng 
2041da177e4SLinus Torvalds 	bmp->db_aglevel = le32_to_cpu(dbmp_le->dn_aglevel);
205d7eecb48SDaniel Mack 	bmp->db_agheight = le32_to_cpu(dbmp_le->dn_agheight);
2061da177e4SLinus Torvalds 	bmp->db_agwidth = le32_to_cpu(dbmp_le->dn_agwidth);
2071da177e4SLinus Torvalds 	bmp->db_agstart = le32_to_cpu(dbmp_le->dn_agstart);
2081da177e4SLinus Torvalds 	bmp->db_agl2size = le32_to_cpu(dbmp_le->dn_agl2size);
209fad376fcSLiu Shixin via Jfs-discussion 	if (bmp->db_agl2size > L2MAXL2SIZE - L2MAXAG ||
210fad376fcSLiu Shixin via Jfs-discussion 	    bmp->db_agl2size < 0) {
211898f7066SDongliang Mu 		err = -EINVAL;
212898f7066SDongliang Mu 		goto err_release_metapage;
213898f7066SDongliang Mu 	}
214898f7066SDongliang Mu 
21525e70c61SHoi Pok Wu 	if (((bmp->db_mapsize - 1) >> bmp->db_agl2size) > MAXAG) {
21625e70c61SHoi Pok Wu 		err = -EINVAL;
21725e70c61SHoi Pok Wu 		goto err_release_metapage;
21825e70c61SHoi Pok Wu 	}
21925e70c61SHoi Pok Wu 
2201da177e4SLinus Torvalds 	for (i = 0; i < MAXAG; i++)
2211da177e4SLinus Torvalds 		bmp->db_agfree[i] = le64_to_cpu(dbmp_le->dn_agfree[i]);
2221da177e4SLinus Torvalds 	bmp->db_agsize = le64_to_cpu(dbmp_le->dn_agsize);
2231da177e4SLinus Torvalds 	bmp->db_maxfreebud = dbmp_le->dn_maxfreebud;
2241da177e4SLinus Torvalds 
2251da177e4SLinus Torvalds 	/* release the buffer. */
2261da177e4SLinus Torvalds 	release_metapage(mp);
2271da177e4SLinus Torvalds 
2281da177e4SLinus Torvalds 	/* bind the bmap inode and the bmap descriptor to each other. */
2291da177e4SLinus Torvalds 	bmp->db_ipbmap = ipbmap;
2301da177e4SLinus Torvalds 	JFS_SBI(ipbmap->i_sb)->bmap = bmp;
2311da177e4SLinus Torvalds 
2321da177e4SLinus Torvalds 	memset(bmp->db_active, 0, sizeof(bmp->db_active));
2331da177e4SLinus Torvalds 
2341da177e4SLinus Torvalds 	/*
2351da177e4SLinus Torvalds 	 * allocate/initialize the bmap lock
2361da177e4SLinus Torvalds 	 */
2371da177e4SLinus Torvalds 	BMAP_LOCK_INIT(bmp);
2381da177e4SLinus Torvalds 
2391da177e4SLinus Torvalds 	return (0);
240898f7066SDongliang Mu 
241898f7066SDongliang Mu err_release_metapage:
242898f7066SDongliang Mu 	release_metapage(mp);
243898f7066SDongliang Mu err_kfree_bmp:
244898f7066SDongliang Mu 	kfree(bmp);
245898f7066SDongliang Mu 	return err;
2461da177e4SLinus Torvalds }
2471da177e4SLinus Torvalds 
2481da177e4SLinus Torvalds 
2491da177e4SLinus Torvalds /*
2501da177e4SLinus Torvalds  * NAME:	dbUnmount()
2511da177e4SLinus Torvalds  *
2521da177e4SLinus Torvalds  * FUNCTION:	terminate the block allocation map in preparation for
2531da177e4SLinus Torvalds  *		file system unmount.
2541da177e4SLinus Torvalds  *
2551da177e4SLinus Torvalds  *		the in-core bmap descriptor is written to disk and
2561da177e4SLinus Torvalds  *		the memory for this descriptor is freed.
2571da177e4SLinus Torvalds  *
2581da177e4SLinus Torvalds  * PARAMETERS:
2591da177e4SLinus Torvalds  *	ipbmap	- pointer to in-core inode for the block map.
2601da177e4SLinus Torvalds  *
2611da177e4SLinus Torvalds  * RETURN VALUES:
2621da177e4SLinus Torvalds  *	0	- success
2631da177e4SLinus Torvalds  *	-EIO	- i/o error
2641da177e4SLinus Torvalds  */
dbUnmount(struct inode * ipbmap,int mounterror)2651da177e4SLinus Torvalds int dbUnmount(struct inode *ipbmap, int mounterror)
2661da177e4SLinus Torvalds {
2671da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
2681da177e4SLinus Torvalds 
2691da177e4SLinus Torvalds 	if (!(mounterror || isReadOnly(ipbmap)))
2701da177e4SLinus Torvalds 		dbSync(ipbmap);
2711da177e4SLinus Torvalds 
2721da177e4SLinus Torvalds 	/*
2731da177e4SLinus Torvalds 	 * Invalidate the page cache buffers
2741da177e4SLinus Torvalds 	 */
2751da177e4SLinus Torvalds 	truncate_inode_pages(ipbmap->i_mapping, 0);
2761da177e4SLinus Torvalds 
2771da177e4SLinus Torvalds 	/* free the memory for the in-memory bmap. */
2781da177e4SLinus Torvalds 	kfree(bmp);
279cade5397SAndrew Kanner 	JFS_SBI(ipbmap->i_sb)->bmap = NULL;
2801da177e4SLinus Torvalds 
2811da177e4SLinus Torvalds 	return (0);
2821da177e4SLinus Torvalds }
2831da177e4SLinus Torvalds 
2841da177e4SLinus Torvalds /*
2851da177e4SLinus Torvalds  *	dbSync()
2861da177e4SLinus Torvalds  */
dbSync(struct inode * ipbmap)2871da177e4SLinus Torvalds int dbSync(struct inode *ipbmap)
2881da177e4SLinus Torvalds {
2891da177e4SLinus Torvalds 	struct dbmap_disk *dbmp_le;
2901da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
2911da177e4SLinus Torvalds 	struct metapage *mp;
2921da177e4SLinus Torvalds 	int i;
2931da177e4SLinus Torvalds 
2941da177e4SLinus Torvalds 	/*
2951da177e4SLinus Torvalds 	 * write bmap global control page
2961da177e4SLinus Torvalds 	 */
2971da177e4SLinus Torvalds 	/* get the buffer for the on-disk bmap descriptor. */
2981da177e4SLinus Torvalds 	mp = read_metapage(ipbmap,
2991da177e4SLinus Torvalds 			   BMAPBLKNO << JFS_SBI(ipbmap->i_sb)->l2nbperpage,
3001da177e4SLinus Torvalds 			   PSIZE, 0);
3011da177e4SLinus Torvalds 	if (mp == NULL) {
3021da177e4SLinus Torvalds 		jfs_err("dbSync: read_metapage failed!");
3031da177e4SLinus Torvalds 		return -EIO;
3041da177e4SLinus Torvalds 	}
3051da177e4SLinus Torvalds 	/* copy the in-memory version of the bmap to the on-disk version */
3061da177e4SLinus Torvalds 	dbmp_le = (struct dbmap_disk *) mp->data;
3071da177e4SLinus Torvalds 	dbmp_le->dn_mapsize = cpu_to_le64(bmp->db_mapsize);
3081da177e4SLinus Torvalds 	dbmp_le->dn_nfree = cpu_to_le64(bmp->db_nfree);
3091da177e4SLinus Torvalds 	dbmp_le->dn_l2nbperpage = cpu_to_le32(bmp->db_l2nbperpage);
3101da177e4SLinus Torvalds 	dbmp_le->dn_numag = cpu_to_le32(bmp->db_numag);
3111da177e4SLinus Torvalds 	dbmp_le->dn_maxlevel = cpu_to_le32(bmp->db_maxlevel);
3121da177e4SLinus Torvalds 	dbmp_le->dn_maxag = cpu_to_le32(bmp->db_maxag);
3131da177e4SLinus Torvalds 	dbmp_le->dn_agpref = cpu_to_le32(bmp->db_agpref);
3141da177e4SLinus Torvalds 	dbmp_le->dn_aglevel = cpu_to_le32(bmp->db_aglevel);
315d7eecb48SDaniel Mack 	dbmp_le->dn_agheight = cpu_to_le32(bmp->db_agheight);
3161da177e4SLinus Torvalds 	dbmp_le->dn_agwidth = cpu_to_le32(bmp->db_agwidth);
3171da177e4SLinus Torvalds 	dbmp_le->dn_agstart = cpu_to_le32(bmp->db_agstart);
3181da177e4SLinus Torvalds 	dbmp_le->dn_agl2size = cpu_to_le32(bmp->db_agl2size);
3191da177e4SLinus Torvalds 	for (i = 0; i < MAXAG; i++)
3201da177e4SLinus Torvalds 		dbmp_le->dn_agfree[i] = cpu_to_le64(bmp->db_agfree[i]);
3211da177e4SLinus Torvalds 	dbmp_le->dn_agsize = cpu_to_le64(bmp->db_agsize);
3221da177e4SLinus Torvalds 	dbmp_le->dn_maxfreebud = bmp->db_maxfreebud;
3231da177e4SLinus Torvalds 
3241da177e4SLinus Torvalds 	/* write the buffer */
3251da177e4SLinus Torvalds 	write_metapage(mp);
3261da177e4SLinus Torvalds 
3271da177e4SLinus Torvalds 	/*
3281da177e4SLinus Torvalds 	 * write out dirty pages of bmap
3291da177e4SLinus Torvalds 	 */
33028fd1298SOGAWA Hirofumi 	filemap_write_and_wait(ipbmap->i_mapping);
3311da177e4SLinus Torvalds 
3321da177e4SLinus Torvalds 	diWriteSpecial(ipbmap, 0);
3331da177e4SLinus Torvalds 
3341da177e4SLinus Torvalds 	return (0);
3351da177e4SLinus Torvalds }
3361da177e4SLinus Torvalds 
3371da177e4SLinus Torvalds /*
3381da177e4SLinus Torvalds  * NAME:	dbFree()
3391da177e4SLinus Torvalds  *
3401da177e4SLinus Torvalds  * FUNCTION:	free the specified block range from the working block
3411da177e4SLinus Torvalds  *		allocation map.
3421da177e4SLinus Torvalds  *
3431da177e4SLinus Torvalds  *		the blocks will be free from the working map one dmap
3441da177e4SLinus Torvalds  *		at a time.
3451da177e4SLinus Torvalds  *
3461da177e4SLinus Torvalds  * PARAMETERS:
3471da177e4SLinus Torvalds  *	ip	- pointer to in-core inode;
3481da177e4SLinus Torvalds  *	blkno	- starting block number to be freed.
3491da177e4SLinus Torvalds  *	nblocks	- number of blocks to be freed.
3501da177e4SLinus Torvalds  *
3511da177e4SLinus Torvalds  * RETURN VALUES:
3521da177e4SLinus Torvalds  *	0	- success
3531da177e4SLinus Torvalds  *	-EIO	- i/o error
3541da177e4SLinus Torvalds  */
dbFree(struct inode * ip,s64 blkno,s64 nblocks)3551da177e4SLinus Torvalds int dbFree(struct inode *ip, s64 blkno, s64 nblocks)
3561da177e4SLinus Torvalds {
3571da177e4SLinus Torvalds 	struct metapage *mp;
3581da177e4SLinus Torvalds 	struct dmap *dp;
3591da177e4SLinus Torvalds 	int nb, rc;
3601da177e4SLinus Torvalds 	s64 lblkno, rem;
3611da177e4SLinus Torvalds 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
3621da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
363b40c2e66STino Reichardt 	struct super_block *sb = ipbmap->i_sb;
3641da177e4SLinus Torvalds 
36582d5b9a7SDave Kleikamp 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
3661da177e4SLinus Torvalds 
3671da177e4SLinus Torvalds 	/* block to be freed better be within the mapsize. */
3681da177e4SLinus Torvalds 	if (unlikely((blkno == 0) || (blkno + nblocks > bmp->db_mapsize))) {
3691da177e4SLinus Torvalds 		IREAD_UNLOCK(ipbmap);
3701da177e4SLinus Torvalds 		printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
3711da177e4SLinus Torvalds 		       (unsigned long long) blkno,
3721da177e4SLinus Torvalds 		       (unsigned long long) nblocks);
373eb8630d7SJoe Perches 		jfs_error(ip->i_sb, "block to be freed is outside the map\n");
3741da177e4SLinus Torvalds 		return -EIO;
3751da177e4SLinus Torvalds 	}
3761da177e4SLinus Torvalds 
377b40c2e66STino Reichardt 	/**
378b40c2e66STino Reichardt 	 * TRIM the blocks, when mounted with discard option
379b40c2e66STino Reichardt 	 */
380b40c2e66STino Reichardt 	if (JFS_SBI(sb)->flag & JFS_DISCARD)
381b40c2e66STino Reichardt 		if (JFS_SBI(sb)->minblks_trim <= nblocks)
382b40c2e66STino Reichardt 			jfs_issue_discard(ipbmap, blkno, nblocks);
383b40c2e66STino Reichardt 
3841da177e4SLinus Torvalds 	/*
3851da177e4SLinus Torvalds 	 * free the blocks a dmap at a time.
3861da177e4SLinus Torvalds 	 */
3871da177e4SLinus Torvalds 	mp = NULL;
3881da177e4SLinus Torvalds 	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
3891da177e4SLinus Torvalds 		/* release previous dmap if any */
3901da177e4SLinus Torvalds 		if (mp) {
3911da177e4SLinus Torvalds 			write_metapage(mp);
3921da177e4SLinus Torvalds 		}
3931da177e4SLinus Torvalds 
3941da177e4SLinus Torvalds 		/* get the buffer for the current dmap. */
3951da177e4SLinus Torvalds 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
3961da177e4SLinus Torvalds 		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
3971da177e4SLinus Torvalds 		if (mp == NULL) {
3981da177e4SLinus Torvalds 			IREAD_UNLOCK(ipbmap);
3991da177e4SLinus Torvalds 			return -EIO;
4001da177e4SLinus Torvalds 		}
4011da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
4021da177e4SLinus Torvalds 
4031da177e4SLinus Torvalds 		/* determine the number of blocks to be freed from
4041da177e4SLinus Torvalds 		 * this dmap.
4051da177e4SLinus Torvalds 		 */
4061da177e4SLinus Torvalds 		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
4071da177e4SLinus Torvalds 
4081da177e4SLinus Torvalds 		/* free the blocks. */
4091da177e4SLinus Torvalds 		if ((rc = dbFreeDmap(bmp, dp, blkno, nb))) {
410eb8630d7SJoe Perches 			jfs_error(ip->i_sb, "error in block map\n");
4111da177e4SLinus Torvalds 			release_metapage(mp);
4121da177e4SLinus Torvalds 			IREAD_UNLOCK(ipbmap);
4131da177e4SLinus Torvalds 			return (rc);
4141da177e4SLinus Torvalds 		}
4151da177e4SLinus Torvalds 	}
4161da177e4SLinus Torvalds 
4171da177e4SLinus Torvalds 	/* write the last buffer. */
4180d4837fdSZixuan Fu 	if (mp)
4191da177e4SLinus Torvalds 		write_metapage(mp);
4201da177e4SLinus Torvalds 
4211da177e4SLinus Torvalds 	IREAD_UNLOCK(ipbmap);
4221da177e4SLinus Torvalds 
4231da177e4SLinus Torvalds 	return (0);
4241da177e4SLinus Torvalds }
4251da177e4SLinus Torvalds 
4261da177e4SLinus Torvalds 
4271da177e4SLinus Torvalds /*
4281da177e4SLinus Torvalds  * NAME:	dbUpdatePMap()
4291da177e4SLinus Torvalds  *
4301da177e4SLinus Torvalds  * FUNCTION:	update the allocation state (free or allocate) of the
4311da177e4SLinus Torvalds  *		specified block range in the persistent block allocation map.
4321da177e4SLinus Torvalds  *
4331da177e4SLinus Torvalds  *		the blocks will be updated in the persistent map one
4341da177e4SLinus Torvalds  *		dmap at a time.
4351da177e4SLinus Torvalds  *
4361da177e4SLinus Torvalds  * PARAMETERS:
4371da177e4SLinus Torvalds  *	ipbmap	- pointer to in-core inode for the block map.
4384d81715fSRichard Knutsson  *	free	- 'true' if block range is to be freed from the persistent
4394d81715fSRichard Knutsson  *		  map; 'false' if it is to be allocated.
4401da177e4SLinus Torvalds  *	blkno	- starting block number of the range.
4411da177e4SLinus Torvalds  *	nblocks	- number of contiguous blocks in the range.
4421da177e4SLinus Torvalds  *	tblk	- transaction block;
4431da177e4SLinus Torvalds  *
4441da177e4SLinus Torvalds  * RETURN VALUES:
4451da177e4SLinus Torvalds  *	0	- success
4461da177e4SLinus Torvalds  *	-EIO	- i/o error
4471da177e4SLinus Torvalds  */
4481da177e4SLinus Torvalds int
dbUpdatePMap(struct inode * ipbmap,int free,s64 blkno,s64 nblocks,struct tblock * tblk)4491da177e4SLinus Torvalds dbUpdatePMap(struct inode *ipbmap,
4501da177e4SLinus Torvalds 	     int free, s64 blkno, s64 nblocks, struct tblock * tblk)
4511da177e4SLinus Torvalds {
4521da177e4SLinus Torvalds 	int nblks, dbitno, wbitno, rbits;
4531da177e4SLinus Torvalds 	int word, nbits, nwords;
4541da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
4551da177e4SLinus Torvalds 	s64 lblkno, rem, lastlblkno;
4561da177e4SLinus Torvalds 	u32 mask;
4571da177e4SLinus Torvalds 	struct dmap *dp;
4581da177e4SLinus Torvalds 	struct metapage *mp;
4591da177e4SLinus Torvalds 	struct jfs_log *log;
4601da177e4SLinus Torvalds 	int lsn, difft, diffp;
4617fab479bSDave Kleikamp 	unsigned long flags;
4621da177e4SLinus Torvalds 
4631da177e4SLinus Torvalds 	/* the blocks better be within the mapsize. */
4641da177e4SLinus Torvalds 	if (blkno + nblocks > bmp->db_mapsize) {
4651da177e4SLinus Torvalds 		printk(KERN_ERR "blkno = %Lx, nblocks = %Lx\n",
4661da177e4SLinus Torvalds 		       (unsigned long long) blkno,
4671da177e4SLinus Torvalds 		       (unsigned long long) nblocks);
468eb8630d7SJoe Perches 		jfs_error(ipbmap->i_sb, "blocks are outside the map\n");
4691da177e4SLinus Torvalds 		return -EIO;
4701da177e4SLinus Torvalds 	}
4711da177e4SLinus Torvalds 
4721da177e4SLinus Torvalds 	/* compute delta of transaction lsn from log syncpt */
4731da177e4SLinus Torvalds 	lsn = tblk->lsn;
4741da177e4SLinus Torvalds 	log = (struct jfs_log *) JFS_SBI(tblk->sb)->log;
4751da177e4SLinus Torvalds 	logdiff(difft, lsn, log);
4761da177e4SLinus Torvalds 
4771da177e4SLinus Torvalds 	/*
4781da177e4SLinus Torvalds 	 * update the block state a dmap at a time.
4791da177e4SLinus Torvalds 	 */
4801da177e4SLinus Torvalds 	mp = NULL;
4811da177e4SLinus Torvalds 	lastlblkno = 0;
4821da177e4SLinus Torvalds 	for (rem = nblocks; rem > 0; rem -= nblks, blkno += nblks) {
4831da177e4SLinus Torvalds 		/* get the buffer for the current dmap. */
4841da177e4SLinus Torvalds 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
4851da177e4SLinus Torvalds 		if (lblkno != lastlblkno) {
4861da177e4SLinus Torvalds 			if (mp) {
4871da177e4SLinus Torvalds 				write_metapage(mp);
4881da177e4SLinus Torvalds 			}
4891da177e4SLinus Torvalds 
4901da177e4SLinus Torvalds 			mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE,
4911da177e4SLinus Torvalds 					   0);
4921da177e4SLinus Torvalds 			if (mp == NULL)
4931da177e4SLinus Torvalds 				return -EIO;
4947fab479bSDave Kleikamp 			metapage_wait_for_io(mp);
4951da177e4SLinus Torvalds 		}
4961da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
4971da177e4SLinus Torvalds 
4981da177e4SLinus Torvalds 		/* determine the bit number and word within the dmap of
4991da177e4SLinus Torvalds 		 * the starting block.  also determine how many blocks
5001da177e4SLinus Torvalds 		 * are to be updated within this dmap.
5011da177e4SLinus Torvalds 		 */
5021da177e4SLinus Torvalds 		dbitno = blkno & (BPERDMAP - 1);
5031da177e4SLinus Torvalds 		word = dbitno >> L2DBWORD;
5041da177e4SLinus Torvalds 		nblks = min(rem, (s64)BPERDMAP - dbitno);
5051da177e4SLinus Torvalds 
5061da177e4SLinus Torvalds 		/* update the bits of the dmap words. the first and last
5071da177e4SLinus Torvalds 		 * words may only have a subset of their bits updated. if
5081da177e4SLinus Torvalds 		 * this is the case, we'll work against that word (i.e.
5091da177e4SLinus Torvalds 		 * partial first and/or last) only in a single pass.  a
5101da177e4SLinus Torvalds 		 * single pass will also be used to update all words that
5111da177e4SLinus Torvalds 		 * are to have all their bits updated.
5121da177e4SLinus Torvalds 		 */
5131da177e4SLinus Torvalds 		for (rbits = nblks; rbits > 0;
5141da177e4SLinus Torvalds 		     rbits -= nbits, dbitno += nbits) {
5151da177e4SLinus Torvalds 			/* determine the bit number within the word and
5161da177e4SLinus Torvalds 			 * the number of bits within the word.
5171da177e4SLinus Torvalds 			 */
5181da177e4SLinus Torvalds 			wbitno = dbitno & (DBWORD - 1);
5191da177e4SLinus Torvalds 			nbits = min(rbits, DBWORD - wbitno);
5201da177e4SLinus Torvalds 
5211da177e4SLinus Torvalds 			/* check if only part of the word is to be updated. */
5221da177e4SLinus Torvalds 			if (nbits < DBWORD) {
5231da177e4SLinus Torvalds 				/* update (free or allocate) the bits
5241da177e4SLinus Torvalds 				 * in this word.
5251da177e4SLinus Torvalds 				 */
5261da177e4SLinus Torvalds 				mask =
5271da177e4SLinus Torvalds 				    (ONES << (DBWORD - nbits) >> wbitno);
5281da177e4SLinus Torvalds 				if (free)
5291da177e4SLinus Torvalds 					dp->pmap[word] &=
5301da177e4SLinus Torvalds 					    cpu_to_le32(~mask);
5311da177e4SLinus Torvalds 				else
5321da177e4SLinus Torvalds 					dp->pmap[word] |=
5331da177e4SLinus Torvalds 					    cpu_to_le32(mask);
5341da177e4SLinus Torvalds 
5351da177e4SLinus Torvalds 				word += 1;
5361da177e4SLinus Torvalds 			} else {
5371da177e4SLinus Torvalds 				/* one or more words are to have all
5381da177e4SLinus Torvalds 				 * their bits updated.  determine how
5391da177e4SLinus Torvalds 				 * many words and how many bits.
5401da177e4SLinus Torvalds 				 */
5411da177e4SLinus Torvalds 				nwords = rbits >> L2DBWORD;
5421da177e4SLinus Torvalds 				nbits = nwords << L2DBWORD;
5431da177e4SLinus Torvalds 
5441da177e4SLinus Torvalds 				/* update (free or allocate) the bits
5451da177e4SLinus Torvalds 				 * in these words.
5461da177e4SLinus Torvalds 				 */
5471da177e4SLinus Torvalds 				if (free)
5481da177e4SLinus Torvalds 					memset(&dp->pmap[word], 0,
5491da177e4SLinus Torvalds 					       nwords * 4);
5501da177e4SLinus Torvalds 				else
5511da177e4SLinus Torvalds 					memset(&dp->pmap[word], (int) ONES,
5521da177e4SLinus Torvalds 					       nwords * 4);
5531da177e4SLinus Torvalds 
5541da177e4SLinus Torvalds 				word += nwords;
5551da177e4SLinus Torvalds 			}
5561da177e4SLinus Torvalds 		}
5571da177e4SLinus Torvalds 
5581da177e4SLinus Torvalds 		/*
5591da177e4SLinus Torvalds 		 * update dmap lsn
5601da177e4SLinus Torvalds 		 */
5611da177e4SLinus Torvalds 		if (lblkno == lastlblkno)
5621da177e4SLinus Torvalds 			continue;
5631da177e4SLinus Torvalds 
5641da177e4SLinus Torvalds 		lastlblkno = lblkno;
5651da177e4SLinus Torvalds 
566be0bf7daSDave Kleikamp 		LOGSYNC_LOCK(log, flags);
5671da177e4SLinus Torvalds 		if (mp->lsn != 0) {
5681da177e4SLinus Torvalds 			/* inherit older/smaller lsn */
5691da177e4SLinus Torvalds 			logdiff(diffp, mp->lsn, log);
5701da177e4SLinus Torvalds 			if (difft < diffp) {
5711da177e4SLinus Torvalds 				mp->lsn = lsn;
5721da177e4SLinus Torvalds 
5731da177e4SLinus Torvalds 				/* move bp after tblock in logsync list */
5741da177e4SLinus Torvalds 				list_move(&mp->synclist, &tblk->synclist);
5751da177e4SLinus Torvalds 			}
5761da177e4SLinus Torvalds 
5771da177e4SLinus Torvalds 			/* inherit younger/larger clsn */
5781da177e4SLinus Torvalds 			logdiff(difft, tblk->clsn, log);
5791da177e4SLinus Torvalds 			logdiff(diffp, mp->clsn, log);
5801da177e4SLinus Torvalds 			if (difft > diffp)
5811da177e4SLinus Torvalds 				mp->clsn = tblk->clsn;
5821da177e4SLinus Torvalds 		} else {
5831da177e4SLinus Torvalds 			mp->log = log;
5841da177e4SLinus Torvalds 			mp->lsn = lsn;
5851da177e4SLinus Torvalds 
5861da177e4SLinus Torvalds 			/* insert bp after tblock in logsync list */
5871da177e4SLinus Torvalds 			log->count++;
5881da177e4SLinus Torvalds 			list_add(&mp->synclist, &tblk->synclist);
5891da177e4SLinus Torvalds 
5901da177e4SLinus Torvalds 			mp->clsn = tblk->clsn;
5911da177e4SLinus Torvalds 		}
592be0bf7daSDave Kleikamp 		LOGSYNC_UNLOCK(log, flags);
5931da177e4SLinus Torvalds 	}
5941da177e4SLinus Torvalds 
5951da177e4SLinus Torvalds 	/* write the last buffer. */
5961da177e4SLinus Torvalds 	if (mp) {
5971da177e4SLinus Torvalds 		write_metapage(mp);
5981da177e4SLinus Torvalds 	}
5991da177e4SLinus Torvalds 
6001da177e4SLinus Torvalds 	return (0);
6011da177e4SLinus Torvalds }
6021da177e4SLinus Torvalds 
6031da177e4SLinus Torvalds 
6041da177e4SLinus Torvalds /*
6051da177e4SLinus Torvalds  * NAME:	dbNextAG()
6061da177e4SLinus Torvalds  *
6071da177e4SLinus Torvalds  * FUNCTION:	find the preferred allocation group for new allocations.
6081da177e4SLinus Torvalds  *
6091da177e4SLinus Torvalds  *		Within the allocation groups, we maintain a preferred
6101da177e4SLinus Torvalds  *		allocation group which consists of a group with at least
6111da177e4SLinus Torvalds  *		average free space.  It is the preferred group that we target
6121da177e4SLinus Torvalds  *		new inode allocation towards.  The tie-in between inode
6131da177e4SLinus Torvalds  *		allocation and block allocation occurs as we allocate the
6141da177e4SLinus Torvalds  *		first (data) block of an inode and specify the inode (block)
6151da177e4SLinus Torvalds  *		as the allocation hint for this block.
6161da177e4SLinus Torvalds  *
6171da177e4SLinus Torvalds  *		We try to avoid having more than one open file growing in
6181da177e4SLinus Torvalds  *		an allocation group, as this will lead to fragmentation.
6191da177e4SLinus Torvalds  *		This differs from the old OS/2 method of trying to keep
6201da177e4SLinus Torvalds  *		empty ags around for large allocations.
6211da177e4SLinus Torvalds  *
6221da177e4SLinus Torvalds  * PARAMETERS:
6231da177e4SLinus Torvalds  *	ipbmap	- pointer to in-core inode for the block map.
6241da177e4SLinus Torvalds  *
6251da177e4SLinus Torvalds  * RETURN VALUES:
6261da177e4SLinus Torvalds  *	the preferred allocation group number.
6271da177e4SLinus Torvalds  */
dbNextAG(struct inode * ipbmap)6281da177e4SLinus Torvalds int dbNextAG(struct inode *ipbmap)
6291da177e4SLinus Torvalds {
6301da177e4SLinus Torvalds 	s64 avgfree;
6311da177e4SLinus Torvalds 	int agpref;
6321da177e4SLinus Torvalds 	s64 hwm = 0;
6331da177e4SLinus Torvalds 	int i;
6341da177e4SLinus Torvalds 	int next_best = -1;
6351da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
6361da177e4SLinus Torvalds 
6371da177e4SLinus Torvalds 	BMAP_LOCK(bmp);
6381da177e4SLinus Torvalds 
6391da177e4SLinus Torvalds 	/* determine the average number of free blocks within the ags. */
6401da177e4SLinus Torvalds 	avgfree = (u32)bmp->db_nfree / bmp->db_numag;
6411da177e4SLinus Torvalds 
6421da177e4SLinus Torvalds 	/*
6431da177e4SLinus Torvalds 	 * if the current preferred ag does not have an active allocator
6441da177e4SLinus Torvalds 	 * and has at least average freespace, return it
6451da177e4SLinus Torvalds 	 */
6461da177e4SLinus Torvalds 	agpref = bmp->db_agpref;
6471da177e4SLinus Torvalds 	if ((atomic_read(&bmp->db_active[agpref]) == 0) &&
6481da177e4SLinus Torvalds 	    (bmp->db_agfree[agpref] >= avgfree))
6491da177e4SLinus Torvalds 		goto unlock;
6501da177e4SLinus Torvalds 
6511da177e4SLinus Torvalds 	/* From the last preferred ag, find the next one with at least
6521da177e4SLinus Torvalds 	 * average free space.
6531da177e4SLinus Torvalds 	 */
6541da177e4SLinus Torvalds 	for (i = 0 ; i < bmp->db_numag; i++, agpref++) {
655c1ba4b8cSJeongjun Park 		if (agpref >= bmp->db_numag)
6561da177e4SLinus Torvalds 			agpref = 0;
6571da177e4SLinus Torvalds 
6581da177e4SLinus Torvalds 		if (atomic_read(&bmp->db_active[agpref]))
6591da177e4SLinus Torvalds 			/* open file is currently growing in this ag */
6601da177e4SLinus Torvalds 			continue;
6611da177e4SLinus Torvalds 		if (bmp->db_agfree[agpref] >= avgfree) {
6621da177e4SLinus Torvalds 			/* Return this one */
6631da177e4SLinus Torvalds 			bmp->db_agpref = agpref;
6641da177e4SLinus Torvalds 			goto unlock;
6651da177e4SLinus Torvalds 		} else if (bmp->db_agfree[agpref] > hwm) {
6661da177e4SLinus Torvalds 			/* Less than avg. freespace, but best so far */
6671da177e4SLinus Torvalds 			hwm = bmp->db_agfree[agpref];
6681da177e4SLinus Torvalds 			next_best = agpref;
6691da177e4SLinus Torvalds 		}
6701da177e4SLinus Torvalds 	}
6711da177e4SLinus Torvalds 
6721da177e4SLinus Torvalds 	/*
6731da177e4SLinus Torvalds 	 * If no inactive ag was found with average freespace, use the
6741da177e4SLinus Torvalds 	 * next best
6751da177e4SLinus Torvalds 	 */
6761da177e4SLinus Torvalds 	if (next_best != -1)
6771da177e4SLinus Torvalds 		bmp->db_agpref = next_best;
6781da177e4SLinus Torvalds 	/* else leave db_agpref unchanged */
6791da177e4SLinus Torvalds unlock:
6801da177e4SLinus Torvalds 	BMAP_UNLOCK(bmp);
6811da177e4SLinus Torvalds 
6821da177e4SLinus Torvalds 	/* return the preferred group.
6831da177e4SLinus Torvalds 	 */
6841da177e4SLinus Torvalds 	return (bmp->db_agpref);
6851da177e4SLinus Torvalds }
6861da177e4SLinus Torvalds 
6871da177e4SLinus Torvalds /*
6881da177e4SLinus Torvalds  * NAME:	dbAlloc()
6891da177e4SLinus Torvalds  *
6901da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate a specified number of contiguous free
6911da177e4SLinus Torvalds  *		blocks from the working allocation block map.
6921da177e4SLinus Torvalds  *
6931da177e4SLinus Torvalds  *		the block allocation policy uses hints and a multi-step
6941da177e4SLinus Torvalds  *		approach.
6951da177e4SLinus Torvalds  *
6961da177e4SLinus Torvalds  *		for allocation requests smaller than the number of blocks
6971da177e4SLinus Torvalds  *		per dmap, we first try to allocate the new blocks
6981da177e4SLinus Torvalds  *		immediately following the hint.  if these blocks are not
6991da177e4SLinus Torvalds  *		available, we try to allocate blocks near the hint.  if
7001da177e4SLinus Torvalds  *		no blocks near the hint are available, we next try to
7011da177e4SLinus Torvalds  *		allocate within the same dmap as contains the hint.
7021da177e4SLinus Torvalds  *
7031da177e4SLinus Torvalds  *		if no blocks are available in the dmap or the allocation
7041da177e4SLinus Torvalds  *		request is larger than the dmap size, we try to allocate
7051da177e4SLinus Torvalds  *		within the same allocation group as contains the hint. if
7061da177e4SLinus Torvalds  *		this does not succeed, we finally try to allocate anywhere
7071da177e4SLinus Torvalds  *		within the aggregate.
7081da177e4SLinus Torvalds  *
709ed1c9a7aSRandy Dunlap  *		we also try to allocate anywhere within the aggregate
7101da177e4SLinus Torvalds  *		for allocation requests larger than the allocation group
7111da177e4SLinus Torvalds  *		size or requests that specify no hint value.
7121da177e4SLinus Torvalds  *
7131da177e4SLinus Torvalds  * PARAMETERS:
7141da177e4SLinus Torvalds  *	ip	- pointer to in-core inode;
7151da177e4SLinus Torvalds  *	hint	- allocation hint.
7161da177e4SLinus Torvalds  *	nblocks	- number of contiguous blocks in the range.
7171da177e4SLinus Torvalds  *	results	- on successful return, set to the starting block number
7181da177e4SLinus Torvalds  *		  of the newly allocated contiguous range.
7191da177e4SLinus Torvalds  *
7201da177e4SLinus Torvalds  * RETURN VALUES:
7211da177e4SLinus Torvalds  *	0	- success
7221da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
7231da177e4SLinus Torvalds  *	-EIO	- i/o error
7241da177e4SLinus Torvalds  */
dbAlloc(struct inode * ip,s64 hint,s64 nblocks,s64 * results)7251da177e4SLinus Torvalds int dbAlloc(struct inode *ip, s64 hint, s64 nblocks, s64 * results)
7261da177e4SLinus Torvalds {
7271da177e4SLinus Torvalds 	int rc, agno;
7281da177e4SLinus Torvalds 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
7291da177e4SLinus Torvalds 	struct bmap *bmp;
7301da177e4SLinus Torvalds 	struct metapage *mp;
7311da177e4SLinus Torvalds 	s64 lblkno, blkno;
7321da177e4SLinus Torvalds 	struct dmap *dp;
7331da177e4SLinus Torvalds 	int l2nb;
7341da177e4SLinus Torvalds 	s64 mapSize;
7351da177e4SLinus Torvalds 	int writers;
7361da177e4SLinus Torvalds 
7371da177e4SLinus Torvalds 	/* assert that nblocks is valid */
7381da177e4SLinus Torvalds 	assert(nblocks > 0);
7391da177e4SLinus Torvalds 
7401da177e4SLinus Torvalds 	/* get the log2 number of blocks to be allocated.
7411da177e4SLinus Torvalds 	 * if the number of blocks is not a log2 multiple,
7421da177e4SLinus Torvalds 	 * it will be rounded up to the next log2 multiple.
7431da177e4SLinus Torvalds 	 */
7441da177e4SLinus Torvalds 	l2nb = BLKSTOL2(nblocks);
7451da177e4SLinus Torvalds 
7461da177e4SLinus Torvalds 	bmp = JFS_SBI(ip->i_sb)->bmap;
7471da177e4SLinus Torvalds 
7481da177e4SLinus Torvalds 	mapSize = bmp->db_mapsize;
7491da177e4SLinus Torvalds 
7501da177e4SLinus Torvalds 	/* the hint should be within the map */
7511da177e4SLinus Torvalds 	if (hint >= mapSize) {
752eb8630d7SJoe Perches 		jfs_error(ip->i_sb, "the hint is outside the map\n");
7531da177e4SLinus Torvalds 		return -EIO;
7541da177e4SLinus Torvalds 	}
7551da177e4SLinus Torvalds 
7561da177e4SLinus Torvalds 	/* if the number of blocks to be allocated is greater than the
7571da177e4SLinus Torvalds 	 * allocation group size, try to allocate anywhere.
7581da177e4SLinus Torvalds 	 */
7591da177e4SLinus Torvalds 	if (l2nb > bmp->db_agl2size) {
76082d5b9a7SDave Kleikamp 		IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
7611da177e4SLinus Torvalds 
7621da177e4SLinus Torvalds 		rc = dbAllocAny(bmp, nblocks, l2nb, results);
7631da177e4SLinus Torvalds 
7641da177e4SLinus Torvalds 		goto write_unlock;
7651da177e4SLinus Torvalds 	}
7661da177e4SLinus Torvalds 
7671da177e4SLinus Torvalds 	/*
7681da177e4SLinus Torvalds 	 * If no hint, let dbNextAG recommend an allocation group
7691da177e4SLinus Torvalds 	 */
7701da177e4SLinus Torvalds 	if (hint == 0)
7711da177e4SLinus Torvalds 		goto pref_ag;
7721da177e4SLinus Torvalds 
7731da177e4SLinus Torvalds 	/* we would like to allocate close to the hint.  adjust the
7741da177e4SLinus Torvalds 	 * hint to the block following the hint since the allocators
7751da177e4SLinus Torvalds 	 * will start looking for free space starting at this point.
7761da177e4SLinus Torvalds 	 */
7771da177e4SLinus Torvalds 	blkno = hint + 1;
7781da177e4SLinus Torvalds 
7791da177e4SLinus Torvalds 	if (blkno >= bmp->db_mapsize)
7801da177e4SLinus Torvalds 		goto pref_ag;
7811da177e4SLinus Torvalds 
7821da177e4SLinus Torvalds 	agno = blkno >> bmp->db_agl2size;
7831da177e4SLinus Torvalds 
7841da177e4SLinus Torvalds 	/* check if blkno crosses over into a new allocation group.
7851da177e4SLinus Torvalds 	 * if so, check if we should allow allocations within this
7861da177e4SLinus Torvalds 	 * allocation group.
7871da177e4SLinus Torvalds 	 */
7881da177e4SLinus Torvalds 	if ((blkno & (bmp->db_agsize - 1)) == 0)
789af901ca1SAndré Goddard Rosa 		/* check if the AG is currently being written to.
7901da177e4SLinus Torvalds 		 * if so, call dbNextAG() to find a non-busy
7911da177e4SLinus Torvalds 		 * AG with sufficient free space.
7921da177e4SLinus Torvalds 		 */
7931da177e4SLinus Torvalds 		if (atomic_read(&bmp->db_active[agno]))
7941da177e4SLinus Torvalds 			goto pref_ag;
7951da177e4SLinus Torvalds 
7961da177e4SLinus Torvalds 	/* check if the allocation request size can be satisfied from a
7971da177e4SLinus Torvalds 	 * single dmap.  if so, try to allocate from the dmap containing
7981da177e4SLinus Torvalds 	 * the hint using a tiered strategy.
7991da177e4SLinus Torvalds 	 */
8001da177e4SLinus Torvalds 	if (nblocks <= BPERDMAP) {
80182d5b9a7SDave Kleikamp 		IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
8021da177e4SLinus Torvalds 
8031da177e4SLinus Torvalds 		/* get the buffer for the dmap containing the hint.
8041da177e4SLinus Torvalds 		 */
8051da177e4SLinus Torvalds 		rc = -EIO;
8061da177e4SLinus Torvalds 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
8071da177e4SLinus Torvalds 		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
8081da177e4SLinus Torvalds 		if (mp == NULL)
8091da177e4SLinus Torvalds 			goto read_unlock;
8101da177e4SLinus Torvalds 
8111da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
8121da177e4SLinus Torvalds 
8131da177e4SLinus Torvalds 		/* first, try to satisfy the allocation request with the
8141da177e4SLinus Torvalds 		 * blocks beginning at the hint.
8151da177e4SLinus Torvalds 		 */
8161da177e4SLinus Torvalds 		if ((rc = dbAllocNext(bmp, dp, blkno, (int) nblocks))
8171da177e4SLinus Torvalds 		    != -ENOSPC) {
8181da177e4SLinus Torvalds 			if (rc == 0) {
8191da177e4SLinus Torvalds 				*results = blkno;
8201da177e4SLinus Torvalds 				mark_metapage_dirty(mp);
8211da177e4SLinus Torvalds 			}
8221da177e4SLinus Torvalds 
8231da177e4SLinus Torvalds 			release_metapage(mp);
8241da177e4SLinus Torvalds 			goto read_unlock;
8251da177e4SLinus Torvalds 		}
8261da177e4SLinus Torvalds 
8271da177e4SLinus Torvalds 		writers = atomic_read(&bmp->db_active[agno]);
8281da177e4SLinus Torvalds 		if ((writers > 1) ||
8291da177e4SLinus Torvalds 		    ((writers == 1) && (JFS_IP(ip)->active_ag != agno))) {
8301da177e4SLinus Torvalds 			/*
8311da177e4SLinus Torvalds 			 * Someone else is writing in this allocation
8321da177e4SLinus Torvalds 			 * group.  To avoid fragmenting, try another ag
8331da177e4SLinus Torvalds 			 */
8341da177e4SLinus Torvalds 			release_metapage(mp);
8351da177e4SLinus Torvalds 			IREAD_UNLOCK(ipbmap);
8361da177e4SLinus Torvalds 			goto pref_ag;
8371da177e4SLinus Torvalds 		}
8381da177e4SLinus Torvalds 
8391da177e4SLinus Torvalds 		/* next, try to satisfy the allocation request with blocks
8401da177e4SLinus Torvalds 		 * near the hint.
8411da177e4SLinus Torvalds 		 */
8421da177e4SLinus Torvalds 		if ((rc =
8431da177e4SLinus Torvalds 		     dbAllocNear(bmp, dp, blkno, (int) nblocks, l2nb, results))
8441da177e4SLinus Torvalds 		    != -ENOSPC) {
845b38a3ab3SDave Kleikamp 			if (rc == 0)
8461da177e4SLinus Torvalds 				mark_metapage_dirty(mp);
8471da177e4SLinus Torvalds 
8481da177e4SLinus Torvalds 			release_metapage(mp);
8491da177e4SLinus Torvalds 			goto read_unlock;
8501da177e4SLinus Torvalds 		}
8511da177e4SLinus Torvalds 
8521da177e4SLinus Torvalds 		/* try to satisfy the allocation request with blocks within
8531da177e4SLinus Torvalds 		 * the same dmap as the hint.
8541da177e4SLinus Torvalds 		 */
8551da177e4SLinus Torvalds 		if ((rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results))
8561da177e4SLinus Torvalds 		    != -ENOSPC) {
857b38a3ab3SDave Kleikamp 			if (rc == 0)
8581da177e4SLinus Torvalds 				mark_metapage_dirty(mp);
8591da177e4SLinus Torvalds 
8601da177e4SLinus Torvalds 			release_metapage(mp);
8611da177e4SLinus Torvalds 			goto read_unlock;
8621da177e4SLinus Torvalds 		}
8631da177e4SLinus Torvalds 
8641da177e4SLinus Torvalds 		release_metapage(mp);
8651da177e4SLinus Torvalds 		IREAD_UNLOCK(ipbmap);
8661da177e4SLinus Torvalds 	}
8671da177e4SLinus Torvalds 
8681da177e4SLinus Torvalds 	/* try to satisfy the allocation request with blocks within
8691da177e4SLinus Torvalds 	 * the same allocation group as the hint.
8701da177e4SLinus Torvalds 	 */
87182d5b9a7SDave Kleikamp 	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
872b38a3ab3SDave Kleikamp 	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) != -ENOSPC)
8731da177e4SLinus Torvalds 		goto write_unlock;
874b38a3ab3SDave Kleikamp 
8751da177e4SLinus Torvalds 	IWRITE_UNLOCK(ipbmap);
8761da177e4SLinus Torvalds 
8771da177e4SLinus Torvalds 
8781da177e4SLinus Torvalds       pref_ag:
8791da177e4SLinus Torvalds 	/*
8801da177e4SLinus Torvalds 	 * Let dbNextAG recommend a preferred allocation group
8811da177e4SLinus Torvalds 	 */
8821da177e4SLinus Torvalds 	agno = dbNextAG(ipbmap);
88382d5b9a7SDave Kleikamp 	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
8841da177e4SLinus Torvalds 
8851da177e4SLinus Torvalds 	/* Try to allocate within this allocation group.  if that fails, try to
8861da177e4SLinus Torvalds 	 * allocate anywhere in the map.
8871da177e4SLinus Torvalds 	 */
8881da177e4SLinus Torvalds 	if ((rc = dbAllocAG(bmp, agno, nblocks, l2nb, results)) == -ENOSPC)
8891da177e4SLinus Torvalds 		rc = dbAllocAny(bmp, nblocks, l2nb, results);
8901da177e4SLinus Torvalds 
8911da177e4SLinus Torvalds       write_unlock:
8921da177e4SLinus Torvalds 	IWRITE_UNLOCK(ipbmap);
8931da177e4SLinus Torvalds 
8941da177e4SLinus Torvalds 	return (rc);
8951da177e4SLinus Torvalds 
8961da177e4SLinus Torvalds       read_unlock:
8971da177e4SLinus Torvalds 	IREAD_UNLOCK(ipbmap);
8981da177e4SLinus Torvalds 
8991da177e4SLinus Torvalds 	return (rc);
9001da177e4SLinus Torvalds }
9011da177e4SLinus Torvalds 
9021da177e4SLinus Torvalds /*
9031da177e4SLinus Torvalds  * NAME:	dbReAlloc()
9041da177e4SLinus Torvalds  *
9051da177e4SLinus Torvalds  * FUNCTION:	attempt to extend a current allocation by a specified
9061da177e4SLinus Torvalds  *		number of blocks.
9071da177e4SLinus Torvalds  *
9081da177e4SLinus Torvalds  *		this routine attempts to satisfy the allocation request
9091da177e4SLinus Torvalds  *		by first trying to extend the existing allocation in
9101da177e4SLinus Torvalds  *		place by allocating the additional blocks as the blocks
9111da177e4SLinus Torvalds  *		immediately following the current allocation.  if these
9121da177e4SLinus Torvalds  *		blocks are not available, this routine will attempt to
9131da177e4SLinus Torvalds  *		allocate a new set of contiguous blocks large enough
9141da177e4SLinus Torvalds  *		to cover the existing allocation plus the additional
9151da177e4SLinus Torvalds  *		number of blocks required.
9161da177e4SLinus Torvalds  *
9171da177e4SLinus Torvalds  * PARAMETERS:
9181da177e4SLinus Torvalds  *	ip	    -  pointer to in-core inode requiring allocation.
9191da177e4SLinus Torvalds  *	blkno	    -  starting block of the current allocation.
9201da177e4SLinus Torvalds  *	nblocks	    -  number of contiguous blocks within the current
9211da177e4SLinus Torvalds  *		       allocation.
9221da177e4SLinus Torvalds  *	addnblocks  -  number of blocks to add to the allocation.
9231da177e4SLinus Torvalds  *	results	-      on successful return, set to the starting block number
9241da177e4SLinus Torvalds  *		       of the existing allocation if the existing allocation
9251da177e4SLinus Torvalds  *		       was extended in place or to a newly allocated contiguous
9261da177e4SLinus Torvalds  *		       range if the existing allocation could not be extended
9271da177e4SLinus Torvalds  *		       in place.
9281da177e4SLinus Torvalds  *
9291da177e4SLinus Torvalds  * RETURN VALUES:
9301da177e4SLinus Torvalds  *	0	- success
9311da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
9321da177e4SLinus Torvalds  *	-EIO	- i/o error
9331da177e4SLinus Torvalds  */
9341da177e4SLinus Torvalds int
dbReAlloc(struct inode * ip,s64 blkno,s64 nblocks,s64 addnblocks,s64 * results)9351da177e4SLinus Torvalds dbReAlloc(struct inode *ip,
9361da177e4SLinus Torvalds 	  s64 blkno, s64 nblocks, s64 addnblocks, s64 * results)
9371da177e4SLinus Torvalds {
9381da177e4SLinus Torvalds 	int rc;
9391da177e4SLinus Torvalds 
9401da177e4SLinus Torvalds 	/* try to extend the allocation in place.
9411da177e4SLinus Torvalds 	 */
9421da177e4SLinus Torvalds 	if ((rc = dbExtend(ip, blkno, nblocks, addnblocks)) == 0) {
9431da177e4SLinus Torvalds 		*results = blkno;
9441da177e4SLinus Torvalds 		return (0);
9451da177e4SLinus Torvalds 	} else {
9461da177e4SLinus Torvalds 		if (rc != -ENOSPC)
9471da177e4SLinus Torvalds 			return (rc);
9481da177e4SLinus Torvalds 	}
9491da177e4SLinus Torvalds 
9501da177e4SLinus Torvalds 	/* could not extend the allocation in place, so allocate a
9511da177e4SLinus Torvalds 	 * new set of blocks for the entire request (i.e. try to get
9521da177e4SLinus Torvalds 	 * a range of contiguous blocks large enough to cover the
9531da177e4SLinus Torvalds 	 * existing allocation plus the additional blocks.)
9541da177e4SLinus Torvalds 	 */
9551da177e4SLinus Torvalds 	return (dbAlloc
9561da177e4SLinus Torvalds 		(ip, blkno + nblocks - 1, addnblocks + nblocks, results));
9571da177e4SLinus Torvalds }
9581da177e4SLinus Torvalds 
9591da177e4SLinus Torvalds 
9601da177e4SLinus Torvalds /*
9611da177e4SLinus Torvalds  * NAME:	dbExtend()
9621da177e4SLinus Torvalds  *
9631da177e4SLinus Torvalds  * FUNCTION:	attempt to extend a current allocation by a specified
9641da177e4SLinus Torvalds  *		number of blocks.
9651da177e4SLinus Torvalds  *
9661da177e4SLinus Torvalds  *		this routine attempts to satisfy the allocation request
9671da177e4SLinus Torvalds  *		by first trying to extend the existing allocation in
9681da177e4SLinus Torvalds  *		place by allocating the additional blocks as the blocks
9691da177e4SLinus Torvalds  *		immediately following the current allocation.
9701da177e4SLinus Torvalds  *
9711da177e4SLinus Torvalds  * PARAMETERS:
9721da177e4SLinus Torvalds  *	ip	    -  pointer to in-core inode requiring allocation.
9731da177e4SLinus Torvalds  *	blkno	    -  starting block of the current allocation.
9741da177e4SLinus Torvalds  *	nblocks	    -  number of contiguous blocks within the current
9751da177e4SLinus Torvalds  *		       allocation.
9761da177e4SLinus Torvalds  *	addnblocks  -  number of blocks to add to the allocation.
9771da177e4SLinus Torvalds  *
9781da177e4SLinus Torvalds  * RETURN VALUES:
9791da177e4SLinus Torvalds  *	0	- success
9801da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
9811da177e4SLinus Torvalds  *	-EIO	- i/o error
9821da177e4SLinus Torvalds  */
dbExtend(struct inode * ip,s64 blkno,s64 nblocks,s64 addnblocks)9831da177e4SLinus Torvalds static int dbExtend(struct inode *ip, s64 blkno, s64 nblocks, s64 addnblocks)
9841da177e4SLinus Torvalds {
9851da177e4SLinus Torvalds 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
9861da177e4SLinus Torvalds 	s64 lblkno, lastblkno, extblkno;
9871da177e4SLinus Torvalds 	uint rel_block;
9881da177e4SLinus Torvalds 	struct metapage *mp;
9891da177e4SLinus Torvalds 	struct dmap *dp;
9901da177e4SLinus Torvalds 	int rc;
9911da177e4SLinus Torvalds 	struct inode *ipbmap = sbi->ipbmap;
9921da177e4SLinus Torvalds 	struct bmap *bmp;
9931da177e4SLinus Torvalds 
9941da177e4SLinus Torvalds 	/*
9951da177e4SLinus Torvalds 	 * We don't want a non-aligned extent to cross a page boundary
9961da177e4SLinus Torvalds 	 */
9971da177e4SLinus Torvalds 	if (((rel_block = blkno & (sbi->nbperpage - 1))) &&
9981da177e4SLinus Torvalds 	    (rel_block + nblocks + addnblocks > sbi->nbperpage))
9991da177e4SLinus Torvalds 		return -ENOSPC;
10001da177e4SLinus Torvalds 
10011da177e4SLinus Torvalds 	/* get the last block of the current allocation */
10021da177e4SLinus Torvalds 	lastblkno = blkno + nblocks - 1;
10031da177e4SLinus Torvalds 
10041da177e4SLinus Torvalds 	/* determine the block number of the block following
10051da177e4SLinus Torvalds 	 * the existing allocation.
10061da177e4SLinus Torvalds 	 */
10071da177e4SLinus Torvalds 	extblkno = lastblkno + 1;
10081da177e4SLinus Torvalds 
100982d5b9a7SDave Kleikamp 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
10101da177e4SLinus Torvalds 
10111da177e4SLinus Torvalds 	/* better be within the file system */
10121da177e4SLinus Torvalds 	bmp = sbi->bmap;
10131da177e4SLinus Torvalds 	if (lastblkno < 0 || lastblkno >= bmp->db_mapsize) {
10141da177e4SLinus Torvalds 		IREAD_UNLOCK(ipbmap);
1015eb8630d7SJoe Perches 		jfs_error(ip->i_sb, "the block is outside the filesystem\n");
10161da177e4SLinus Torvalds 		return -EIO;
10171da177e4SLinus Torvalds 	}
10181da177e4SLinus Torvalds 
10191da177e4SLinus Torvalds 	/* we'll attempt to extend the current allocation in place by
10201da177e4SLinus Torvalds 	 * allocating the additional blocks as the blocks immediately
10211da177e4SLinus Torvalds 	 * following the current allocation.  we only try to extend the
10221da177e4SLinus Torvalds 	 * current allocation in place if the number of additional blocks
10231da177e4SLinus Torvalds 	 * can fit into a dmap, the last block of the current allocation
10241da177e4SLinus Torvalds 	 * is not the last block of the file system, and the start of the
10251da177e4SLinus Torvalds 	 * inplace extension is not on an allocation group boundary.
10261da177e4SLinus Torvalds 	 */
10271da177e4SLinus Torvalds 	if (addnblocks > BPERDMAP || extblkno >= bmp->db_mapsize ||
10281da177e4SLinus Torvalds 	    (extblkno & (bmp->db_agsize - 1)) == 0) {
10291da177e4SLinus Torvalds 		IREAD_UNLOCK(ipbmap);
10301da177e4SLinus Torvalds 		return -ENOSPC;
10311da177e4SLinus Torvalds 	}
10321da177e4SLinus Torvalds 
10331da177e4SLinus Torvalds 	/* get the buffer for the dmap containing the first block
10341da177e4SLinus Torvalds 	 * of the extension.
10351da177e4SLinus Torvalds 	 */
10361da177e4SLinus Torvalds 	lblkno = BLKTODMAP(extblkno, bmp->db_l2nbperpage);
10371da177e4SLinus Torvalds 	mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
10381da177e4SLinus Torvalds 	if (mp == NULL) {
10391da177e4SLinus Torvalds 		IREAD_UNLOCK(ipbmap);
10401da177e4SLinus Torvalds 		return -EIO;
10411da177e4SLinus Torvalds 	}
10421da177e4SLinus Torvalds 
10431da177e4SLinus Torvalds 	dp = (struct dmap *) mp->data;
10441da177e4SLinus Torvalds 
10451da177e4SLinus Torvalds 	/* try to allocate the blocks immediately following the
10461da177e4SLinus Torvalds 	 * current allocation.
10471da177e4SLinus Torvalds 	 */
10481da177e4SLinus Torvalds 	rc = dbAllocNext(bmp, dp, extblkno, (int) addnblocks);
10491da177e4SLinus Torvalds 
10501da177e4SLinus Torvalds 	IREAD_UNLOCK(ipbmap);
10511da177e4SLinus Torvalds 
10521da177e4SLinus Torvalds 	/* were we successful ? */
1053b38a3ab3SDave Kleikamp 	if (rc == 0)
10541da177e4SLinus Torvalds 		write_metapage(mp);
1055b38a3ab3SDave Kleikamp 	else
10561da177e4SLinus Torvalds 		/* we were not successful */
10571da177e4SLinus Torvalds 		release_metapage(mp);
10581da177e4SLinus Torvalds 
10591da177e4SLinus Torvalds 	return (rc);
10601da177e4SLinus Torvalds }
10611da177e4SLinus Torvalds 
10621da177e4SLinus Torvalds 
10631da177e4SLinus Torvalds /*
10641da177e4SLinus Torvalds  * NAME:	dbAllocNext()
10651da177e4SLinus Torvalds  *
10661da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate the blocks of the specified block
10671da177e4SLinus Torvalds  *		range within a dmap.
10681da177e4SLinus Torvalds  *
10691da177e4SLinus Torvalds  * PARAMETERS:
10701da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
10711da177e4SLinus Torvalds  *	dp	-  pointer to dmap.
10721da177e4SLinus Torvalds  *	blkno	-  starting block number of the range.
10731da177e4SLinus Torvalds  *	nblocks	-  number of contiguous free blocks of the range.
10741da177e4SLinus Torvalds  *
10751da177e4SLinus Torvalds  * RETURN VALUES:
10761da177e4SLinus Torvalds  *	0	- success
10771da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
10781da177e4SLinus Torvalds  *	-EIO	- i/o error
10791da177e4SLinus Torvalds  *
10801da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
10811da177e4SLinus Torvalds  */
dbAllocNext(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)10821da177e4SLinus Torvalds static int dbAllocNext(struct bmap * bmp, struct dmap * dp, s64 blkno,
10831da177e4SLinus Torvalds 		       int nblocks)
10841da177e4SLinus Torvalds {
10851da177e4SLinus Torvalds 	int dbitno, word, rembits, nb, nwords, wbitno, nw;
10861da177e4SLinus Torvalds 	int l2size;
10871da177e4SLinus Torvalds 	s8 *leaf;
10881da177e4SLinus Torvalds 	u32 mask;
10891da177e4SLinus Torvalds 
10901da177e4SLinus Torvalds 	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1091eb8630d7SJoe Perches 		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n");
10921da177e4SLinus Torvalds 		return -EIO;
10931da177e4SLinus Torvalds 	}
10941da177e4SLinus Torvalds 
10951da177e4SLinus Torvalds 	/* pick up a pointer to the leaves of the dmap tree.
10961da177e4SLinus Torvalds 	 */
10971da177e4SLinus Torvalds 	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
10981da177e4SLinus Torvalds 
10991da177e4SLinus Torvalds 	/* determine the bit number and word within the dmap of the
11001da177e4SLinus Torvalds 	 * starting block.
11011da177e4SLinus Torvalds 	 */
11021da177e4SLinus Torvalds 	dbitno = blkno & (BPERDMAP - 1);
11031da177e4SLinus Torvalds 	word = dbitno >> L2DBWORD;
11041da177e4SLinus Torvalds 
11051da177e4SLinus Torvalds 	/* check if the specified block range is contained within
11061da177e4SLinus Torvalds 	 * this dmap.
11071da177e4SLinus Torvalds 	 */
11081da177e4SLinus Torvalds 	if (dbitno + nblocks > BPERDMAP)
11091da177e4SLinus Torvalds 		return -ENOSPC;
11101da177e4SLinus Torvalds 
11111da177e4SLinus Torvalds 	/* check if the starting leaf indicates that anything
11121da177e4SLinus Torvalds 	 * is free.
11131da177e4SLinus Torvalds 	 */
11141da177e4SLinus Torvalds 	if (leaf[word] == NOFREE)
11151da177e4SLinus Torvalds 		return -ENOSPC;
11161da177e4SLinus Torvalds 
11171da177e4SLinus Torvalds 	/* check the dmaps words corresponding to block range to see
11181da177e4SLinus Torvalds 	 * if the block range is free.  not all bits of the first and
11191da177e4SLinus Torvalds 	 * last words may be contained within the block range.  if this
11201da177e4SLinus Torvalds 	 * is the case, we'll work against those words (i.e. partial first
11211da177e4SLinus Torvalds 	 * and/or last) on an individual basis (a single pass) and examine
11221da177e4SLinus Torvalds 	 * the actual bits to determine if they are free.  a single pass
11231da177e4SLinus Torvalds 	 * will be used for all dmap words fully contained within the
11241da177e4SLinus Torvalds 	 * specified range.  within this pass, the leaves of the dmap
11251da177e4SLinus Torvalds 	 * tree will be examined to determine if the blocks are free. a
11261da177e4SLinus Torvalds 	 * single leaf may describe the free space of multiple dmap
11271da177e4SLinus Torvalds 	 * words, so we may visit only a subset of the actual leaves
11281da177e4SLinus Torvalds 	 * corresponding to the dmap words of the block range.
11291da177e4SLinus Torvalds 	 */
11301da177e4SLinus Torvalds 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
11311da177e4SLinus Torvalds 		/* determine the bit number within the word and
11321da177e4SLinus Torvalds 		 * the number of bits within the word.
11331da177e4SLinus Torvalds 		 */
11341da177e4SLinus Torvalds 		wbitno = dbitno & (DBWORD - 1);
11351da177e4SLinus Torvalds 		nb = min(rembits, DBWORD - wbitno);
11361da177e4SLinus Torvalds 
11371da177e4SLinus Torvalds 		/* check if only part of the word is to be examined.
11381da177e4SLinus Torvalds 		 */
11391da177e4SLinus Torvalds 		if (nb < DBWORD) {
11401da177e4SLinus Torvalds 			/* check if the bits are free.
11411da177e4SLinus Torvalds 			 */
11421da177e4SLinus Torvalds 			mask = (ONES << (DBWORD - nb) >> wbitno);
11431da177e4SLinus Torvalds 			if ((mask & ~le32_to_cpu(dp->wmap[word])) != mask)
11441da177e4SLinus Torvalds 				return -ENOSPC;
11451da177e4SLinus Torvalds 
11461da177e4SLinus Torvalds 			word += 1;
11471da177e4SLinus Torvalds 		} else {
11481da177e4SLinus Torvalds 			/* one or more dmap words are fully contained
11491da177e4SLinus Torvalds 			 * within the block range.  determine how many
11501da177e4SLinus Torvalds 			 * words and how many bits.
11511da177e4SLinus Torvalds 			 */
11521da177e4SLinus Torvalds 			nwords = rembits >> L2DBWORD;
11531da177e4SLinus Torvalds 			nb = nwords << L2DBWORD;
11541da177e4SLinus Torvalds 
11551da177e4SLinus Torvalds 			/* now examine the appropriate leaves to determine
11561da177e4SLinus Torvalds 			 * if the blocks are free.
11571da177e4SLinus Torvalds 			 */
11581da177e4SLinus Torvalds 			while (nwords > 0) {
11591da177e4SLinus Torvalds 				/* does the leaf describe any free space ?
11601da177e4SLinus Torvalds 				 */
11611da177e4SLinus Torvalds 				if (leaf[word] < BUDMIN)
11621da177e4SLinus Torvalds 					return -ENOSPC;
11631da177e4SLinus Torvalds 
11641da177e4SLinus Torvalds 				/* determine the l2 number of bits provided
11651da177e4SLinus Torvalds 				 * by this leaf.
11661da177e4SLinus Torvalds 				 */
11671da177e4SLinus Torvalds 				l2size =
11684f65b6dbSFabian Frederick 				    min_t(int, leaf[word], NLSTOL2BSZ(nwords));
11691da177e4SLinus Torvalds 
11701da177e4SLinus Torvalds 				/* determine how many words were handled.
11711da177e4SLinus Torvalds 				 */
11721da177e4SLinus Torvalds 				nw = BUDSIZE(l2size, BUDMIN);
11731da177e4SLinus Torvalds 
11741da177e4SLinus Torvalds 				nwords -= nw;
11751da177e4SLinus Torvalds 				word += nw;
11761da177e4SLinus Torvalds 			}
11771da177e4SLinus Torvalds 		}
11781da177e4SLinus Torvalds 	}
11791da177e4SLinus Torvalds 
11801da177e4SLinus Torvalds 	/* allocate the blocks.
11811da177e4SLinus Torvalds 	 */
11821da177e4SLinus Torvalds 	return (dbAllocDmap(bmp, dp, blkno, nblocks));
11831da177e4SLinus Torvalds }
11841da177e4SLinus Torvalds 
11851da177e4SLinus Torvalds 
11861da177e4SLinus Torvalds /*
11871da177e4SLinus Torvalds  * NAME:	dbAllocNear()
11881da177e4SLinus Torvalds  *
11891da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate a number of contiguous free blocks near
11901da177e4SLinus Torvalds  *		a specified block (hint) within a dmap.
11911da177e4SLinus Torvalds  *
11921da177e4SLinus Torvalds  *		starting with the dmap leaf that covers the hint, we'll
11931da177e4SLinus Torvalds  *		check the next four contiguous leaves for sufficient free
11941da177e4SLinus Torvalds  *		space.  if sufficient free space is found, we'll allocate
11951da177e4SLinus Torvalds  *		the desired free space.
11961da177e4SLinus Torvalds  *
11971da177e4SLinus Torvalds  * PARAMETERS:
11981da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
11991da177e4SLinus Torvalds  *	dp	-  pointer to dmap.
12001da177e4SLinus Torvalds  *	blkno	-  block number to allocate near.
12011da177e4SLinus Torvalds  *	nblocks	-  actual number of contiguous free blocks desired.
12021da177e4SLinus Torvalds  *	l2nb	-  log2 number of contiguous free blocks desired.
12031da177e4SLinus Torvalds  *	results	-  on successful return, set to the starting block number
12041da177e4SLinus Torvalds  *		   of the newly allocated range.
12051da177e4SLinus Torvalds  *
12061da177e4SLinus Torvalds  * RETURN VALUES:
12071da177e4SLinus Torvalds  *	0	- success
12081da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
12091da177e4SLinus Torvalds  *	-EIO	- i/o error
12101da177e4SLinus Torvalds  *
12111da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) held on entry/exit;
12121da177e4SLinus Torvalds  */
12131da177e4SLinus Torvalds static int
dbAllocNear(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks,int l2nb,s64 * results)12141da177e4SLinus Torvalds dbAllocNear(struct bmap * bmp,
12151da177e4SLinus Torvalds 	    struct dmap * dp, s64 blkno, int nblocks, int l2nb, s64 * results)
12161da177e4SLinus Torvalds {
12171da177e4SLinus Torvalds 	int word, lword, rc;
12181da177e4SLinus Torvalds 	s8 *leaf;
12191da177e4SLinus Torvalds 
12201da177e4SLinus Torvalds 	if (dp->tree.leafidx != cpu_to_le32(LEAFIND)) {
1221eb8630d7SJoe Perches 		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmap page\n");
12221da177e4SLinus Torvalds 		return -EIO;
12231da177e4SLinus Torvalds 	}
12241da177e4SLinus Torvalds 
12251da177e4SLinus Torvalds 	leaf = dp->tree.stree + le32_to_cpu(dp->tree.leafidx);
12261da177e4SLinus Torvalds 
12271da177e4SLinus Torvalds 	/* determine the word within the dmap that holds the hint
12281da177e4SLinus Torvalds 	 * (i.e. blkno).  also, determine the last word in the dmap
12291da177e4SLinus Torvalds 	 * that we'll include in our examination.
12301da177e4SLinus Torvalds 	 */
12311da177e4SLinus Torvalds 	word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
12321da177e4SLinus Torvalds 	lword = min(word + 4, LPERDMAP);
12331da177e4SLinus Torvalds 
12341da177e4SLinus Torvalds 	/* examine the leaves for sufficient free space.
12351da177e4SLinus Torvalds 	 */
12361da177e4SLinus Torvalds 	for (; word < lword; word++) {
12371da177e4SLinus Torvalds 		/* does the leaf describe sufficient free space ?
12381da177e4SLinus Torvalds 		 */
12391da177e4SLinus Torvalds 		if (leaf[word] < l2nb)
12401da177e4SLinus Torvalds 			continue;
12411da177e4SLinus Torvalds 
12421da177e4SLinus Torvalds 		/* determine the block number within the file system
12431da177e4SLinus Torvalds 		 * of the first block described by this dmap word.
12441da177e4SLinus Torvalds 		 */
12451da177e4SLinus Torvalds 		blkno = le64_to_cpu(dp->start) + (word << L2DBWORD);
12461da177e4SLinus Torvalds 
12471da177e4SLinus Torvalds 		/* if not all bits of the dmap word are free, get the
12481da177e4SLinus Torvalds 		 * starting bit number within the dmap word of the required
12491da177e4SLinus Torvalds 		 * string of free bits and adjust the block number with the
12501da177e4SLinus Torvalds 		 * value.
12511da177e4SLinus Torvalds 		 */
12521da177e4SLinus Torvalds 		if (leaf[word] < BUDMIN)
12531da177e4SLinus Torvalds 			blkno +=
12541da177e4SLinus Torvalds 			    dbFindBits(le32_to_cpu(dp->wmap[word]), l2nb);
12551da177e4SLinus Torvalds 
12561da177e4SLinus Torvalds 		/* allocate the blocks.
12571da177e4SLinus Torvalds 		 */
12581da177e4SLinus Torvalds 		if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
12591da177e4SLinus Torvalds 			*results = blkno;
12601da177e4SLinus Torvalds 
12611da177e4SLinus Torvalds 		return (rc);
12621da177e4SLinus Torvalds 	}
12631da177e4SLinus Torvalds 
12641da177e4SLinus Torvalds 	return -ENOSPC;
12651da177e4SLinus Torvalds }
12661da177e4SLinus Torvalds 
12671da177e4SLinus Torvalds 
12681da177e4SLinus Torvalds /*
12691da177e4SLinus Torvalds  * NAME:	dbAllocAG()
12701da177e4SLinus Torvalds  *
12711da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate the specified number of contiguous
12721da177e4SLinus Torvalds  *		free blocks within the specified allocation group.
12731da177e4SLinus Torvalds  *
12741da177e4SLinus Torvalds  *		unless the allocation group size is equal to the number
12751da177e4SLinus Torvalds  *		of blocks per dmap, the dmap control pages will be used to
12761da177e4SLinus Torvalds  *		find the required free space, if available.  we start the
12771da177e4SLinus Torvalds  *		search at the highest dmap control page level which
12781da177e4SLinus Torvalds  *		distinctly describes the allocation group's free space
12791da177e4SLinus Torvalds  *		(i.e. the highest level at which the allocation group's
12801da177e4SLinus Torvalds  *		free space is not mixed in with that of any other group).
12811da177e4SLinus Torvalds  *		in addition, we start the search within this level at a
12821da177e4SLinus Torvalds  *		height of the dmapctl dmtree at which the nodes distinctly
12831da177e4SLinus Torvalds  *		describe the allocation group's free space.  at this height,
12841da177e4SLinus Torvalds  *		the allocation group's free space may be represented by 1
12851da177e4SLinus Torvalds  *		or two sub-trees, depending on the allocation group size.
12861da177e4SLinus Torvalds  *		we search the top nodes of these subtrees left to right for
12871da177e4SLinus Torvalds  *		sufficient free space.  if sufficient free space is found,
12881da177e4SLinus Torvalds  *		the subtree is searched to find the leftmost leaf that
12891da177e4SLinus Torvalds  *		has free space.  once we have made it to the leaf, we
12901da177e4SLinus Torvalds  *		move the search to the next lower level dmap control page
12911da177e4SLinus Torvalds  *		corresponding to this leaf.  we continue down the dmap control
12921da177e4SLinus Torvalds  *		pages until we find the dmap that contains or starts the
12931da177e4SLinus Torvalds  *		sufficient free space and we allocate at this dmap.
12941da177e4SLinus Torvalds  *
12951da177e4SLinus Torvalds  *		if the allocation group size is equal to the dmap size,
12961da177e4SLinus Torvalds  *		we'll start at the dmap corresponding to the allocation
12971da177e4SLinus Torvalds  *		group and attempt the allocation at this level.
12981da177e4SLinus Torvalds  *
12991da177e4SLinus Torvalds  *		the dmap control page search is also not performed if the
13001da177e4SLinus Torvalds  *		allocation group is completely free and we go to the first
13011da177e4SLinus Torvalds  *		dmap of the allocation group to do the allocation.  this is
13021da177e4SLinus Torvalds  *		done because the allocation group may be part (not the first
13031da177e4SLinus Torvalds  *		part) of a larger binary buddy system, causing the dmap
13041da177e4SLinus Torvalds  *		control pages to indicate no free space (NOFREE) within
13051da177e4SLinus Torvalds  *		the allocation group.
13061da177e4SLinus Torvalds  *
13071da177e4SLinus Torvalds  * PARAMETERS:
13081da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
13091da177e4SLinus Torvalds  *	agno	- allocation group number.
13101da177e4SLinus Torvalds  *	nblocks	-  actual number of contiguous free blocks desired.
13111da177e4SLinus Torvalds  *	l2nb	-  log2 number of contiguous free blocks desired.
13121da177e4SLinus Torvalds  *	results	-  on successful return, set to the starting block number
13131da177e4SLinus Torvalds  *		   of the newly allocated range.
13141da177e4SLinus Torvalds  *
13151da177e4SLinus Torvalds  * RETURN VALUES:
13161da177e4SLinus Torvalds  *	0	- success
13171da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
13181da177e4SLinus Torvalds  *	-EIO	- i/o error
13191da177e4SLinus Torvalds  *
13201da177e4SLinus Torvalds  * note: IWRITE_LOCK(ipmap) held on entry/exit;
13211da177e4SLinus Torvalds  */
13221da177e4SLinus Torvalds static int
dbAllocAG(struct bmap * bmp,int agno,s64 nblocks,int l2nb,s64 * results)13231da177e4SLinus Torvalds dbAllocAG(struct bmap * bmp, int agno, s64 nblocks, int l2nb, s64 * results)
13241da177e4SLinus Torvalds {
13251da177e4SLinus Torvalds 	struct metapage *mp;
13261da177e4SLinus Torvalds 	struct dmapctl *dcp;
13271da177e4SLinus Torvalds 	int rc, ti, i, k, m, n, agperlev;
13281da177e4SLinus Torvalds 	s64 blkno, lblkno;
13291da177e4SLinus Torvalds 	int budmin;
13301da177e4SLinus Torvalds 
13311da177e4SLinus Torvalds 	/* allocation request should not be for more than the
13321da177e4SLinus Torvalds 	 * allocation group size.
13331da177e4SLinus Torvalds 	 */
13341da177e4SLinus Torvalds 	if (l2nb > bmp->db_agl2size) {
13351da177e4SLinus Torvalds 		jfs_error(bmp->db_ipbmap->i_sb,
1336eb8630d7SJoe Perches 			  "allocation request is larger than the allocation group size\n");
13371da177e4SLinus Torvalds 		return -EIO;
13381da177e4SLinus Torvalds 	}
13391da177e4SLinus Torvalds 
13401da177e4SLinus Torvalds 	/* determine the starting block number of the allocation
13411da177e4SLinus Torvalds 	 * group.
13421da177e4SLinus Torvalds 	 */
13431da177e4SLinus Torvalds 	blkno = (s64) agno << bmp->db_agl2size;
13441da177e4SLinus Torvalds 
13451da177e4SLinus Torvalds 	/* check if the allocation group size is the minimum allocation
13461da177e4SLinus Torvalds 	 * group size or if the allocation group is completely free. if
13471da177e4SLinus Torvalds 	 * the allocation group size is the minimum size of BPERDMAP (i.e.
13481da177e4SLinus Torvalds 	 * 1 dmap), there is no need to search the dmap control page (below)
13491da177e4SLinus Torvalds 	 * that fully describes the allocation group since the allocation
13501da177e4SLinus Torvalds 	 * group is already fully described by a dmap.  in this case, we
13511da177e4SLinus Torvalds 	 * just call dbAllocCtl() to search the dmap tree and allocate the
13521da177e4SLinus Torvalds 	 * required space if available.
13531da177e4SLinus Torvalds 	 *
13541da177e4SLinus Torvalds 	 * if the allocation group is completely free, dbAllocCtl() is
13551da177e4SLinus Torvalds 	 * also called to allocate the required space.  this is done for
13561da177e4SLinus Torvalds 	 * two reasons.  first, it makes no sense searching the dmap control
13571da177e4SLinus Torvalds 	 * pages for free space when we know that free space exists.  second,
13581da177e4SLinus Torvalds 	 * the dmap control pages may indicate that the allocation group
13591da177e4SLinus Torvalds 	 * has no free space if the allocation group is part (not the first
13601da177e4SLinus Torvalds 	 * part) of a larger binary buddy system.
13611da177e4SLinus Torvalds 	 */
13621da177e4SLinus Torvalds 	if (bmp->db_agsize == BPERDMAP
13631da177e4SLinus Torvalds 	    || bmp->db_agfree[agno] == bmp->db_agsize) {
13641da177e4SLinus Torvalds 		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
13651da177e4SLinus Torvalds 		if ((rc == -ENOSPC) &&
13661da177e4SLinus Torvalds 		    (bmp->db_agfree[agno] == bmp->db_agsize)) {
13671da177e4SLinus Torvalds 			printk(KERN_ERR "blkno = %Lx, blocks = %Lx\n",
13681da177e4SLinus Torvalds 			       (unsigned long long) blkno,
13691da177e4SLinus Torvalds 			       (unsigned long long) nblocks);
13701da177e4SLinus Torvalds 			jfs_error(bmp->db_ipbmap->i_sb,
1371eb8630d7SJoe Perches 				  "dbAllocCtl failed in free AG\n");
13721da177e4SLinus Torvalds 		}
13731da177e4SLinus Torvalds 		return (rc);
13741da177e4SLinus Torvalds 	}
13751da177e4SLinus Torvalds 
13761da177e4SLinus Torvalds 	/* the buffer for the dmap control page that fully describes the
13771da177e4SLinus Torvalds 	 * allocation group.
13781da177e4SLinus Torvalds 	 */
13791da177e4SLinus Torvalds 	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, bmp->db_aglevel);
13801da177e4SLinus Torvalds 	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
13811da177e4SLinus Torvalds 	if (mp == NULL)
13821da177e4SLinus Torvalds 		return -EIO;
13831da177e4SLinus Torvalds 	dcp = (struct dmapctl *) mp->data;
13841da177e4SLinus Torvalds 	budmin = dcp->budmin;
13851da177e4SLinus Torvalds 
13861da177e4SLinus Torvalds 	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
1387eb8630d7SJoe Perches 		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
13881da177e4SLinus Torvalds 		release_metapage(mp);
13891da177e4SLinus Torvalds 		return -EIO;
13901da177e4SLinus Torvalds 	}
13911da177e4SLinus Torvalds 
13921da177e4SLinus Torvalds 	/* search the subtree(s) of the dmap control page that describes
13931da177e4SLinus Torvalds 	 * the allocation group, looking for sufficient free space.  to begin,
13941da177e4SLinus Torvalds 	 * determine how many allocation groups are represented in a dmap
13951da177e4SLinus Torvalds 	 * control page at the control page level (i.e. L0, L1, L2) that
13961da177e4SLinus Torvalds 	 * fully describes an allocation group. next, determine the starting
13971da177e4SLinus Torvalds 	 * tree index of this allocation group within the control page.
13981da177e4SLinus Torvalds 	 */
13991da177e4SLinus Torvalds 	agperlev =
1400d7eecb48SDaniel Mack 	    (1 << (L2LPERCTL - (bmp->db_agheight << 1))) / bmp->db_agwidth;
14011da177e4SLinus Torvalds 	ti = bmp->db_agstart + bmp->db_agwidth * (agno & (agperlev - 1));
14021da177e4SLinus Torvalds 
14031da177e4SLinus Torvalds 	/* dmap control page trees fan-out by 4 and a single allocation
14041da177e4SLinus Torvalds 	 * group may be described by 1 or 2 subtrees within the ag level
14051da177e4SLinus Torvalds 	 * dmap control page, depending upon the ag size. examine the ag's
14061da177e4SLinus Torvalds 	 * subtrees for sufficient free space, starting with the leftmost
14071da177e4SLinus Torvalds 	 * subtree.
14081da177e4SLinus Torvalds 	 */
14091da177e4SLinus Torvalds 	for (i = 0; i < bmp->db_agwidth; i++, ti++) {
14101da177e4SLinus Torvalds 		/* is there sufficient free space ?
14111da177e4SLinus Torvalds 		 */
14121da177e4SLinus Torvalds 		if (l2nb > dcp->stree[ti])
14131da177e4SLinus Torvalds 			continue;
14141da177e4SLinus Torvalds 
14151da177e4SLinus Torvalds 		/* sufficient free space found in a subtree. now search down
14161da177e4SLinus Torvalds 		 * the subtree to find the leftmost leaf that describes this
14171da177e4SLinus Torvalds 		 * free space.
14181da177e4SLinus Torvalds 		 */
1419d7eecb48SDaniel Mack 		for (k = bmp->db_agheight; k > 0; k--) {
14201da177e4SLinus Torvalds 			for (n = 0, m = (ti << 2) + 1; n < 4; n++) {
14211da177e4SLinus Torvalds 				if (l2nb <= dcp->stree[m + n]) {
14221da177e4SLinus Torvalds 					ti = m + n;
14231da177e4SLinus Torvalds 					break;
14241da177e4SLinus Torvalds 				}
14251da177e4SLinus Torvalds 			}
14261da177e4SLinus Torvalds 			if (n == 4) {
14271da177e4SLinus Torvalds 				jfs_error(bmp->db_ipbmap->i_sb,
1428eb8630d7SJoe Perches 					  "failed descending stree\n");
14291da177e4SLinus Torvalds 				release_metapage(mp);
14301da177e4SLinus Torvalds 				return -EIO;
14311da177e4SLinus Torvalds 			}
14321da177e4SLinus Torvalds 		}
14331da177e4SLinus Torvalds 
14341da177e4SLinus Torvalds 		/* determine the block number within the file system
14351da177e4SLinus Torvalds 		 * that corresponds to this leaf.
14361da177e4SLinus Torvalds 		 */
14371da177e4SLinus Torvalds 		if (bmp->db_aglevel == 2)
14381da177e4SLinus Torvalds 			blkno = 0;
14391da177e4SLinus Torvalds 		else if (bmp->db_aglevel == 1)
14401da177e4SLinus Torvalds 			blkno &= ~(MAXL1SIZE - 1);
14411da177e4SLinus Torvalds 		else		/* bmp->db_aglevel == 0 */
14421da177e4SLinus Torvalds 			blkno &= ~(MAXL0SIZE - 1);
14431da177e4SLinus Torvalds 
14441da177e4SLinus Torvalds 		blkno +=
14451da177e4SLinus Torvalds 		    ((s64) (ti - le32_to_cpu(dcp->leafidx))) << budmin;
14461da177e4SLinus Torvalds 
14471da177e4SLinus Torvalds 		/* release the buffer in preparation for going down
14481da177e4SLinus Torvalds 		 * the next level of dmap control pages.
14491da177e4SLinus Torvalds 		 */
14501da177e4SLinus Torvalds 		release_metapage(mp);
14511da177e4SLinus Torvalds 
14521da177e4SLinus Torvalds 		/* check if we need to continue to search down the lower
14531da177e4SLinus Torvalds 		 * level dmap control pages.  we need to if the number of
14541da177e4SLinus Torvalds 		 * blocks required is less than maximum number of blocks
14551da177e4SLinus Torvalds 		 * described at the next lower level.
14561da177e4SLinus Torvalds 		 */
14571da177e4SLinus Torvalds 		if (l2nb < budmin) {
14581da177e4SLinus Torvalds 
14591da177e4SLinus Torvalds 			/* search the lower level dmap control pages to get
146059c51591SMichael Opdenacker 			 * the starting block number of the dmap that
14611da177e4SLinus Torvalds 			 * contains or starts off the free space.
14621da177e4SLinus Torvalds 			 */
14631da177e4SLinus Torvalds 			if ((rc =
14641da177e4SLinus Torvalds 			     dbFindCtl(bmp, l2nb, bmp->db_aglevel - 1,
14651da177e4SLinus Torvalds 				       &blkno))) {
14661da177e4SLinus Torvalds 				if (rc == -ENOSPC) {
14671da177e4SLinus Torvalds 					jfs_error(bmp->db_ipbmap->i_sb,
1468eb8630d7SJoe Perches 						  "control page inconsistent\n");
14691da177e4SLinus Torvalds 					return -EIO;
14701da177e4SLinus Torvalds 				}
14711da177e4SLinus Torvalds 				return (rc);
14721da177e4SLinus Torvalds 			}
14731da177e4SLinus Torvalds 		}
14741da177e4SLinus Torvalds 
14751da177e4SLinus Torvalds 		/* allocate the blocks.
14761da177e4SLinus Torvalds 		 */
14771da177e4SLinus Torvalds 		rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
14781da177e4SLinus Torvalds 		if (rc == -ENOSPC) {
14791da177e4SLinus Torvalds 			jfs_error(bmp->db_ipbmap->i_sb,
1480eb8630d7SJoe Perches 				  "unable to allocate blocks\n");
14811da177e4SLinus Torvalds 			rc = -EIO;
14821da177e4SLinus Torvalds 		}
14831da177e4SLinus Torvalds 		return (rc);
14841da177e4SLinus Torvalds 	}
14851da177e4SLinus Torvalds 
14861da177e4SLinus Torvalds 	/* no space in the allocation group.  release the buffer and
14871da177e4SLinus Torvalds 	 * return -ENOSPC.
14881da177e4SLinus Torvalds 	 */
14891da177e4SLinus Torvalds 	release_metapage(mp);
14901da177e4SLinus Torvalds 
14911da177e4SLinus Torvalds 	return -ENOSPC;
14921da177e4SLinus Torvalds }
14931da177e4SLinus Torvalds 
14941da177e4SLinus Torvalds 
14951da177e4SLinus Torvalds /*
14961da177e4SLinus Torvalds  * NAME:	dbAllocAny()
14971da177e4SLinus Torvalds  *
14981da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate the specified number of contiguous
14991da177e4SLinus Torvalds  *		free blocks anywhere in the file system.
15001da177e4SLinus Torvalds  *
15011da177e4SLinus Torvalds  *		dbAllocAny() attempts to find the sufficient free space by
15021da177e4SLinus Torvalds  *		searching down the dmap control pages, starting with the
15031da177e4SLinus Torvalds  *		highest level (i.e. L0, L1, L2) control page.  if free space
15041da177e4SLinus Torvalds  *		large enough to satisfy the desired free space is found, the
15051da177e4SLinus Torvalds  *		desired free space is allocated.
15061da177e4SLinus Torvalds  *
15071da177e4SLinus Torvalds  * PARAMETERS:
15081da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
15091da177e4SLinus Torvalds  *	nblocks	 -  actual number of contiguous free blocks desired.
15101da177e4SLinus Torvalds  *	l2nb	 -  log2 number of contiguous free blocks desired.
15111da177e4SLinus Torvalds  *	results	-  on successful return, set to the starting block number
15121da177e4SLinus Torvalds  *		   of the newly allocated range.
15131da177e4SLinus Torvalds  *
15141da177e4SLinus Torvalds  * RETURN VALUES:
15151da177e4SLinus Torvalds  *	0	- success
15161da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
15171da177e4SLinus Torvalds  *	-EIO	- i/o error
15181da177e4SLinus Torvalds  *
15191da177e4SLinus Torvalds  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
15201da177e4SLinus Torvalds  */
dbAllocAny(struct bmap * bmp,s64 nblocks,int l2nb,s64 * results)15211da177e4SLinus Torvalds static int dbAllocAny(struct bmap * bmp, s64 nblocks, int l2nb, s64 * results)
15221da177e4SLinus Torvalds {
15231da177e4SLinus Torvalds 	int rc;
15241da177e4SLinus Torvalds 	s64 blkno = 0;
15251da177e4SLinus Torvalds 
15261da177e4SLinus Torvalds 	/* starting with the top level dmap control page, search
15271da177e4SLinus Torvalds 	 * down the dmap control levels for sufficient free space.
15281da177e4SLinus Torvalds 	 * if free space is found, dbFindCtl() returns the starting
15291da177e4SLinus Torvalds 	 * block number of the dmap that contains or starts off the
15301da177e4SLinus Torvalds 	 * range of free space.
15311da177e4SLinus Torvalds 	 */
15321da177e4SLinus Torvalds 	if ((rc = dbFindCtl(bmp, l2nb, bmp->db_maxlevel, &blkno)))
15331da177e4SLinus Torvalds 		return (rc);
15341da177e4SLinus Torvalds 
15351da177e4SLinus Torvalds 	/* allocate the blocks.
15361da177e4SLinus Torvalds 	 */
15371da177e4SLinus Torvalds 	rc = dbAllocCtl(bmp, nblocks, l2nb, blkno, results);
15381da177e4SLinus Torvalds 	if (rc == -ENOSPC) {
1539eb8630d7SJoe Perches 		jfs_error(bmp->db_ipbmap->i_sb, "unable to allocate blocks\n");
15401da177e4SLinus Torvalds 		return -EIO;
15411da177e4SLinus Torvalds 	}
15421da177e4SLinus Torvalds 	return (rc);
15431da177e4SLinus Torvalds }
15441da177e4SLinus Torvalds 
15451da177e4SLinus Torvalds 
15461da177e4SLinus Torvalds /*
1547b40c2e66STino Reichardt  * NAME:	dbDiscardAG()
1548b40c2e66STino Reichardt  *
1549b40c2e66STino Reichardt  * FUNCTION:	attempt to discard (TRIM) all free blocks of specific AG
1550b40c2e66STino Reichardt  *
1551b40c2e66STino Reichardt  *		algorithm:
1552b40c2e66STino Reichardt  *		1) allocate blocks, as large as possible and save them
1553b40c2e66STino Reichardt  *		   while holding IWRITE_LOCK on ipbmap
1554b40c2e66STino Reichardt  *		2) trim all these saved block/length values
1555b40c2e66STino Reichardt  *		3) mark the blocks free again
1556b40c2e66STino Reichardt  *
1557b40c2e66STino Reichardt  *		benefit:
1558b40c2e66STino Reichardt  *		- we work only on one ag at some time, minimizing how long we
1559b40c2e66STino Reichardt  *		  need to lock ipbmap
1560b40c2e66STino Reichardt  *		- reading / writing the fs is possible most time, even on
1561b40c2e66STino Reichardt  *		  trimming
1562b40c2e66STino Reichardt  *
1563b40c2e66STino Reichardt  *		downside:
1564b40c2e66STino Reichardt  *		- we write two times to the dmapctl and dmap pages
1565b40c2e66STino Reichardt  *		- but for me, this seems the best way, better ideas?
1566b40c2e66STino Reichardt  *		/TR 2012
1567b40c2e66STino Reichardt  *
1568b40c2e66STino Reichardt  * PARAMETERS:
1569b40c2e66STino Reichardt  *	ip	- pointer to in-core inode
1570b40c2e66STino Reichardt  *	agno	- ag to trim
1571b40c2e66STino Reichardt  *	minlen	- minimum value of contiguous blocks
1572b40c2e66STino Reichardt  *
1573b40c2e66STino Reichardt  * RETURN VALUES:
1574b40c2e66STino Reichardt  *	s64	- actual number of blocks trimmed
1575b40c2e66STino Reichardt  */
dbDiscardAG(struct inode * ip,int agno,s64 minlen)1576b40c2e66STino Reichardt s64 dbDiscardAG(struct inode *ip, int agno, s64 minlen)
1577b40c2e66STino Reichardt {
1578b40c2e66STino Reichardt 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
1579b40c2e66STino Reichardt 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
1580b40c2e66STino Reichardt 	s64 nblocks, blkno;
1581b40c2e66STino Reichardt 	u64 trimmed = 0;
1582b40c2e66STino Reichardt 	int rc, l2nb;
1583b40c2e66STino Reichardt 	struct super_block *sb = ipbmap->i_sb;
1584b40c2e66STino Reichardt 
1585b40c2e66STino Reichardt 	struct range2trim {
1586b40c2e66STino Reichardt 		u64 blkno;
1587b40c2e66STino Reichardt 		u64 nblocks;
1588b40c2e66STino Reichardt 	} *totrim, *tt;
1589b40c2e66STino Reichardt 
1590b40c2e66STino Reichardt 	/* max blkno / nblocks pairs to trim */
1591b40c2e66STino Reichardt 	int count = 0, range_cnt;
159284f4141eSDave Kleikamp 	u64 max_ranges;
1593b40c2e66STino Reichardt 
1594b40c2e66STino Reichardt 	/* prevent others from writing new stuff here, while trimming */
1595b40c2e66STino Reichardt 	IWRITE_LOCK(ipbmap, RDWRLOCK_DMAP);
1596b40c2e66STino Reichardt 
1597b40c2e66STino Reichardt 	nblocks = bmp->db_agfree[agno];
159884f4141eSDave Kleikamp 	max_ranges = nblocks;
159984f4141eSDave Kleikamp 	do_div(max_ranges, minlen);
160084f4141eSDave Kleikamp 	range_cnt = min_t(u64, max_ranges + 1, 32 * 1024);
16016da2ec56SKees Cook 	totrim = kmalloc_array(range_cnt, sizeof(struct range2trim), GFP_NOFS);
1602b40c2e66STino Reichardt 	if (totrim == NULL) {
1603eb8630d7SJoe Perches 		jfs_error(bmp->db_ipbmap->i_sb, "no memory for trim array\n");
1604b40c2e66STino Reichardt 		IWRITE_UNLOCK(ipbmap);
1605b40c2e66STino Reichardt 		return 0;
1606b40c2e66STino Reichardt 	}
1607b40c2e66STino Reichardt 
1608b40c2e66STino Reichardt 	tt = totrim;
1609b40c2e66STino Reichardt 	while (nblocks >= minlen) {
1610b40c2e66STino Reichardt 		l2nb = BLKSTOL2(nblocks);
1611b40c2e66STino Reichardt 
1612b40c2e66STino Reichardt 		/* 0 = okay, -EIO = fatal, -ENOSPC -> try smaller block */
1613b40c2e66STino Reichardt 		rc = dbAllocAG(bmp, agno, nblocks, l2nb, &blkno);
1614b40c2e66STino Reichardt 		if (rc == 0) {
1615b40c2e66STino Reichardt 			tt->blkno = blkno;
1616b40c2e66STino Reichardt 			tt->nblocks = nblocks;
1617b40c2e66STino Reichardt 			tt++; count++;
1618b40c2e66STino Reichardt 
1619b40c2e66STino Reichardt 			/* the whole ag is free, trim now */
1620b40c2e66STino Reichardt 			if (bmp->db_agfree[agno] == 0)
1621b40c2e66STino Reichardt 				break;
1622b40c2e66STino Reichardt 
1623b40c2e66STino Reichardt 			/* give a hint for the next while */
1624b40c2e66STino Reichardt 			nblocks = bmp->db_agfree[agno];
1625b40c2e66STino Reichardt 			continue;
1626b40c2e66STino Reichardt 		} else if (rc == -ENOSPC) {
1627b40c2e66STino Reichardt 			/* search for next smaller log2 block */
1628b40c2e66STino Reichardt 			l2nb = BLKSTOL2(nblocks) - 1;
1629f650148bSPei Li 			if (unlikely(l2nb < 0))
1630f650148bSPei Li 				break;
16314208c398SColin Ian King 			nblocks = 1LL << l2nb;
1632b40c2e66STino Reichardt 		} else {
1633b40c2e66STino Reichardt 			/* Trim any already allocated blocks */
1634eb8630d7SJoe Perches 			jfs_error(bmp->db_ipbmap->i_sb, "-EIO\n");
1635b40c2e66STino Reichardt 			break;
1636b40c2e66STino Reichardt 		}
1637b40c2e66STino Reichardt 
1638b40c2e66STino Reichardt 		/* check, if our trim array is full */
1639b40c2e66STino Reichardt 		if (unlikely(count >= range_cnt - 1))
1640b40c2e66STino Reichardt 			break;
1641b40c2e66STino Reichardt 	}
1642b40c2e66STino Reichardt 	IWRITE_UNLOCK(ipbmap);
1643b40c2e66STino Reichardt 
1644b40c2e66STino Reichardt 	tt->nblocks = 0; /* mark the current end */
1645b40c2e66STino Reichardt 	for (tt = totrim; tt->nblocks != 0; tt++) {
1646b40c2e66STino Reichardt 		/* when mounted with online discard, dbFree() will
1647b40c2e66STino Reichardt 		 * call jfs_issue_discard() itself */
1648b40c2e66STino Reichardt 		if (!(JFS_SBI(sb)->flag & JFS_DISCARD))
1649b40c2e66STino Reichardt 			jfs_issue_discard(ip, tt->blkno, tt->nblocks);
1650b40c2e66STino Reichardt 		dbFree(ip, tt->blkno, tt->nblocks);
1651b40c2e66STino Reichardt 		trimmed += tt->nblocks;
1652b40c2e66STino Reichardt 	}
1653b40c2e66STino Reichardt 	kfree(totrim);
1654b40c2e66STino Reichardt 
1655b40c2e66STino Reichardt 	return trimmed;
1656b40c2e66STino Reichardt }
1657b40c2e66STino Reichardt 
1658b40c2e66STino Reichardt /*
16591da177e4SLinus Torvalds  * NAME:	dbFindCtl()
16601da177e4SLinus Torvalds  *
16611da177e4SLinus Torvalds  * FUNCTION:	starting at a specified dmap control page level and block
16621da177e4SLinus Torvalds  *		number, search down the dmap control levels for a range of
16631da177e4SLinus Torvalds  *		contiguous free blocks large enough to satisfy an allocation
16641da177e4SLinus Torvalds  *		request for the specified number of free blocks.
16651da177e4SLinus Torvalds  *
16661da177e4SLinus Torvalds  *		if sufficient contiguous free blocks are found, this routine
16671da177e4SLinus Torvalds  *		returns the starting block number within a dmap page that
16681da177e4SLinus Torvalds  *		contains or starts a range of contiqious free blocks that
16691da177e4SLinus Torvalds  *		is sufficient in size.
16701da177e4SLinus Torvalds  *
16711da177e4SLinus Torvalds  * PARAMETERS:
16721da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
16731da177e4SLinus Torvalds  *	level	-  starting dmap control page level.
16741da177e4SLinus Torvalds  *	l2nb	-  log2 number of contiguous free blocks desired.
16751da177e4SLinus Torvalds  *	*blkno	-  on entry, starting block number for conducting the search.
16761da177e4SLinus Torvalds  *		   on successful return, the first block within a dmap page
16771da177e4SLinus Torvalds  *		   that contains or starts a range of contiguous free blocks.
16781da177e4SLinus Torvalds  *
16791da177e4SLinus Torvalds  * RETURN VALUES:
16801da177e4SLinus Torvalds  *	0	- success
16811da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
16821da177e4SLinus Torvalds  *	-EIO	- i/o error
16831da177e4SLinus Torvalds  *
16841da177e4SLinus Torvalds  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
16851da177e4SLinus Torvalds  */
dbFindCtl(struct bmap * bmp,int l2nb,int level,s64 * blkno)16861da177e4SLinus Torvalds static int dbFindCtl(struct bmap * bmp, int l2nb, int level, s64 * blkno)
16871da177e4SLinus Torvalds {
16881da177e4SLinus Torvalds 	int rc, leafidx, lev;
16891da177e4SLinus Torvalds 	s64 b, lblkno;
16901da177e4SLinus Torvalds 	struct dmapctl *dcp;
16911da177e4SLinus Torvalds 	int budmin;
16921da177e4SLinus Torvalds 	struct metapage *mp;
16931da177e4SLinus Torvalds 
16941da177e4SLinus Torvalds 	/* starting at the specified dmap control page level and block
16951da177e4SLinus Torvalds 	 * number, search down the dmap control levels for the starting
16961da177e4SLinus Torvalds 	 * block number of a dmap page that contains or starts off
16971da177e4SLinus Torvalds 	 * sufficient free blocks.
16981da177e4SLinus Torvalds 	 */
16991da177e4SLinus Torvalds 	for (lev = level, b = *blkno; lev >= 0; lev--) {
17001da177e4SLinus Torvalds 		/* get the buffer of the dmap control page for the block
17011da177e4SLinus Torvalds 		 * number and level (i.e. L0, L1, L2).
17021da177e4SLinus Torvalds 		 */
17031da177e4SLinus Torvalds 		lblkno = BLKTOCTL(b, bmp->db_l2nbperpage, lev);
17041da177e4SLinus Torvalds 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
17051da177e4SLinus Torvalds 		if (mp == NULL)
17061da177e4SLinus Torvalds 			return -EIO;
17071da177e4SLinus Torvalds 		dcp = (struct dmapctl *) mp->data;
17081da177e4SLinus Torvalds 		budmin = dcp->budmin;
17091da177e4SLinus Torvalds 
17101da177e4SLinus Torvalds 		if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
17111da177e4SLinus Torvalds 			jfs_error(bmp->db_ipbmap->i_sb,
1712eb8630d7SJoe Perches 				  "Corrupt dmapctl page\n");
17131da177e4SLinus Torvalds 			release_metapage(mp);
17141da177e4SLinus Torvalds 			return -EIO;
17151da177e4SLinus Torvalds 		}
17161da177e4SLinus Torvalds 
17171da177e4SLinus Torvalds 		/* search the tree within the dmap control page for
171825985edcSLucas De Marchi 		 * sufficient free space.  if sufficient free space is found,
17191da177e4SLinus Torvalds 		 * dbFindLeaf() returns the index of the leaf at which
17201da177e4SLinus Torvalds 		 * free space was found.
17211da177e4SLinus Torvalds 		 */
172287c681abSManas Ghandat 		rc = dbFindLeaf((dmtree_t *) dcp, l2nb, &leafidx, true);
17231da177e4SLinus Torvalds 
17241da177e4SLinus Torvalds 		/* release the buffer.
17251da177e4SLinus Torvalds 		 */
17261da177e4SLinus Torvalds 		release_metapage(mp);
17271da177e4SLinus Torvalds 
17281da177e4SLinus Torvalds 		/* space found ?
17291da177e4SLinus Torvalds 		 */
17301da177e4SLinus Torvalds 		if (rc) {
17311da177e4SLinus Torvalds 			if (lev != level) {
17321da177e4SLinus Torvalds 				jfs_error(bmp->db_ipbmap->i_sb,
1733eb8630d7SJoe Perches 					  "dmap inconsistent\n");
17341da177e4SLinus Torvalds 				return -EIO;
17351da177e4SLinus Torvalds 			}
17361da177e4SLinus Torvalds 			return -ENOSPC;
17371da177e4SLinus Torvalds 		}
17381da177e4SLinus Torvalds 
17391da177e4SLinus Torvalds 		/* adjust the block number to reflect the location within
17401da177e4SLinus Torvalds 		 * the dmap control page (i.e. the leaf) at which free
17411da177e4SLinus Torvalds 		 * space was found.
17421da177e4SLinus Torvalds 		 */
17431da177e4SLinus Torvalds 		b += (((s64) leafidx) << budmin);
17441da177e4SLinus Torvalds 
17451da177e4SLinus Torvalds 		/* we stop the search at this dmap control page level if
17461da177e4SLinus Torvalds 		 * the number of blocks required is greater than or equal
17471da177e4SLinus Torvalds 		 * to the maximum number of blocks described at the next
17481da177e4SLinus Torvalds 		 * (lower) level.
17491da177e4SLinus Torvalds 		 */
17501da177e4SLinus Torvalds 		if (l2nb >= budmin)
17511da177e4SLinus Torvalds 			break;
17521da177e4SLinus Torvalds 	}
17531da177e4SLinus Torvalds 
17541da177e4SLinus Torvalds 	*blkno = b;
17551da177e4SLinus Torvalds 	return (0);
17561da177e4SLinus Torvalds }
17571da177e4SLinus Torvalds 
17581da177e4SLinus Torvalds 
17591da177e4SLinus Torvalds /*
17601da177e4SLinus Torvalds  * NAME:	dbAllocCtl()
17611da177e4SLinus Torvalds  *
17621da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate a specified number of contiguous
17631da177e4SLinus Torvalds  *		blocks starting within a specific dmap.
17641da177e4SLinus Torvalds  *
17651da177e4SLinus Torvalds  *		this routine is called by higher level routines that search
17661da177e4SLinus Torvalds  *		the dmap control pages above the actual dmaps for contiguous
17671da177e4SLinus Torvalds  *		free space.  the result of successful searches by these
17681da177e4SLinus Torvalds  *		routines are the starting block numbers within dmaps, with
17691da177e4SLinus Torvalds  *		the dmaps themselves containing the desired contiguous free
17701da177e4SLinus Torvalds  *		space or starting a contiguous free space of desired size
17711da177e4SLinus Torvalds  *		that is made up of the blocks of one or more dmaps. these
17721da177e4SLinus Torvalds  *		calls should not fail due to insufficent resources.
17731da177e4SLinus Torvalds  *
17741da177e4SLinus Torvalds  *		this routine is called in some cases where it is not known
17751da177e4SLinus Torvalds  *		whether it will fail due to insufficient resources.  more
17761da177e4SLinus Torvalds  *		specifically, this occurs when allocating from an allocation
17771da177e4SLinus Torvalds  *		group whose size is equal to the number of blocks per dmap.
17781da177e4SLinus Torvalds  *		in this case, the dmap control pages are not examined prior
17791da177e4SLinus Torvalds  *		to calling this routine (to save pathlength) and the call
17801da177e4SLinus Torvalds  *		might fail.
17811da177e4SLinus Torvalds  *
17821da177e4SLinus Torvalds  *		for a request size that fits within a dmap, this routine relies
17831da177e4SLinus Torvalds  *		upon the dmap's dmtree to find the requested contiguous free
17841da177e4SLinus Torvalds  *		space.  for request sizes that are larger than a dmap, the
17851da177e4SLinus Torvalds  *		requested free space will start at the first block of the
17861da177e4SLinus Torvalds  *		first dmap (i.e. blkno).
17871da177e4SLinus Torvalds  *
17881da177e4SLinus Torvalds  * PARAMETERS:
17891da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
17901da177e4SLinus Torvalds  *	nblocks	 -  actual number of contiguous free blocks to allocate.
17911da177e4SLinus Torvalds  *	l2nb	 -  log2 number of contiguous free blocks to allocate.
17921da177e4SLinus Torvalds  *	blkno	 -  starting block number of the dmap to start the allocation
17931da177e4SLinus Torvalds  *		    from.
17941da177e4SLinus Torvalds  *	results	-  on successful return, set to the starting block number
17951da177e4SLinus Torvalds  *		   of the newly allocated range.
17961da177e4SLinus Torvalds  *
17971da177e4SLinus Torvalds  * RETURN VALUES:
17981da177e4SLinus Torvalds  *	0	- success
17991da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
18001da177e4SLinus Torvalds  *	-EIO	- i/o error
18011da177e4SLinus Torvalds  *
18021da177e4SLinus Torvalds  * serialization: IWRITE_LOCK(ipbmap) held on entry/exit;
18031da177e4SLinus Torvalds  */
18041da177e4SLinus Torvalds static int
dbAllocCtl(struct bmap * bmp,s64 nblocks,int l2nb,s64 blkno,s64 * results)18051da177e4SLinus Torvalds dbAllocCtl(struct bmap * bmp, s64 nblocks, int l2nb, s64 blkno, s64 * results)
18061da177e4SLinus Torvalds {
18071da177e4SLinus Torvalds 	int rc, nb;
18081da177e4SLinus Torvalds 	s64 b, lblkno, n;
18091da177e4SLinus Torvalds 	struct metapage *mp;
18101da177e4SLinus Torvalds 	struct dmap *dp;
18111da177e4SLinus Torvalds 
18121da177e4SLinus Torvalds 	/* check if the allocation request is confined to a single dmap.
18131da177e4SLinus Torvalds 	 */
18141da177e4SLinus Torvalds 	if (l2nb <= L2BPERDMAP) {
18151da177e4SLinus Torvalds 		/* get the buffer for the dmap.
18161da177e4SLinus Torvalds 		 */
18171da177e4SLinus Torvalds 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
18181da177e4SLinus Torvalds 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
18191da177e4SLinus Torvalds 		if (mp == NULL)
18201da177e4SLinus Torvalds 			return -EIO;
18211da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
18221da177e4SLinus Torvalds 
1823c56245baSGhanshyam Agrawal 		if (dp->tree.budmin < 0)
1824c56245baSGhanshyam Agrawal 			return -EIO;
1825c56245baSGhanshyam Agrawal 
18261da177e4SLinus Torvalds 		/* try to allocate the blocks.
18271da177e4SLinus Torvalds 		 */
18281da177e4SLinus Torvalds 		rc = dbAllocDmapLev(bmp, dp, (int) nblocks, l2nb, results);
18291da177e4SLinus Torvalds 		if (rc == 0)
18301da177e4SLinus Torvalds 			mark_metapage_dirty(mp);
18311da177e4SLinus Torvalds 
18321da177e4SLinus Torvalds 		release_metapage(mp);
18331da177e4SLinus Torvalds 
18341da177e4SLinus Torvalds 		return (rc);
18351da177e4SLinus Torvalds 	}
18361da177e4SLinus Torvalds 
18371da177e4SLinus Torvalds 	/* allocation request involving multiple dmaps. it must start on
18381da177e4SLinus Torvalds 	 * a dmap boundary.
18391da177e4SLinus Torvalds 	 */
18401da177e4SLinus Torvalds 	assert((blkno & (BPERDMAP - 1)) == 0);
18411da177e4SLinus Torvalds 
18421da177e4SLinus Torvalds 	/* allocate the blocks dmap by dmap.
18431da177e4SLinus Torvalds 	 */
18441da177e4SLinus Torvalds 	for (n = nblocks, b = blkno; n > 0; n -= nb, b += nb) {
18451da177e4SLinus Torvalds 		/* get the buffer for the dmap.
18461da177e4SLinus Torvalds 		 */
18471da177e4SLinus Torvalds 		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
18481da177e4SLinus Torvalds 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
18491da177e4SLinus Torvalds 		if (mp == NULL) {
18501da177e4SLinus Torvalds 			rc = -EIO;
18511da177e4SLinus Torvalds 			goto backout;
18521da177e4SLinus Torvalds 		}
18531da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
18541da177e4SLinus Torvalds 
18551da177e4SLinus Torvalds 		/* the dmap better be all free.
18561da177e4SLinus Torvalds 		 */
18571da177e4SLinus Torvalds 		if (dp->tree.stree[ROOT] != L2BPERDMAP) {
18581da177e4SLinus Torvalds 			release_metapage(mp);
18591da177e4SLinus Torvalds 			jfs_error(bmp->db_ipbmap->i_sb,
1860eb8630d7SJoe Perches 				  "the dmap is not all free\n");
18611da177e4SLinus Torvalds 			rc = -EIO;
18621da177e4SLinus Torvalds 			goto backout;
18631da177e4SLinus Torvalds 		}
18641da177e4SLinus Torvalds 
18651da177e4SLinus Torvalds 		/* determine how many blocks to allocate from this dmap.
18661da177e4SLinus Torvalds 		 */
18674f65b6dbSFabian Frederick 		nb = min_t(s64, n, BPERDMAP);
18681da177e4SLinus Torvalds 
18691da177e4SLinus Torvalds 		/* allocate the blocks from the dmap.
18701da177e4SLinus Torvalds 		 */
18711da177e4SLinus Torvalds 		if ((rc = dbAllocDmap(bmp, dp, b, nb))) {
18721da177e4SLinus Torvalds 			release_metapage(mp);
18731da177e4SLinus Torvalds 			goto backout;
18741da177e4SLinus Torvalds 		}
18751da177e4SLinus Torvalds 
18761da177e4SLinus Torvalds 		/* write the buffer.
18771da177e4SLinus Torvalds 		 */
18781da177e4SLinus Torvalds 		write_metapage(mp);
18791da177e4SLinus Torvalds 	}
18801da177e4SLinus Torvalds 
18811da177e4SLinus Torvalds 	/* set the results (starting block number) and return.
18821da177e4SLinus Torvalds 	 */
18831da177e4SLinus Torvalds 	*results = blkno;
18841da177e4SLinus Torvalds 	return (0);
18851da177e4SLinus Torvalds 
18861da177e4SLinus Torvalds 	/* something failed in handling an allocation request involving
18871da177e4SLinus Torvalds 	 * multiple dmaps.  we'll try to clean up by backing out any
18881da177e4SLinus Torvalds 	 * allocation that has already happened for this request.  if
18891da177e4SLinus Torvalds 	 * we fail in backing out the allocation, we'll mark the file
18901da177e4SLinus Torvalds 	 * system to indicate that blocks have been leaked.
18911da177e4SLinus Torvalds 	 */
18921da177e4SLinus Torvalds       backout:
18931da177e4SLinus Torvalds 
18941da177e4SLinus Torvalds 	/* try to backout the allocations dmap by dmap.
18951da177e4SLinus Torvalds 	 */
18961da177e4SLinus Torvalds 	for (n = nblocks - n, b = blkno; n > 0;
18971da177e4SLinus Torvalds 	     n -= BPERDMAP, b += BPERDMAP) {
18981da177e4SLinus Torvalds 		/* get the buffer for this dmap.
18991da177e4SLinus Torvalds 		 */
19001da177e4SLinus Torvalds 		lblkno = BLKTODMAP(b, bmp->db_l2nbperpage);
19011da177e4SLinus Torvalds 		mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
19021da177e4SLinus Torvalds 		if (mp == NULL) {
19031da177e4SLinus Torvalds 			/* could not back out.  mark the file system
19041da177e4SLinus Torvalds 			 * to indicate that we have leaked blocks.
19051da177e4SLinus Torvalds 			 */
19061da177e4SLinus Torvalds 			jfs_error(bmp->db_ipbmap->i_sb,
1907eb8630d7SJoe Perches 				  "I/O Error: Block Leakage\n");
19081da177e4SLinus Torvalds 			continue;
19091da177e4SLinus Torvalds 		}
19101da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
19111da177e4SLinus Torvalds 
19121da177e4SLinus Torvalds 		/* free the blocks is this dmap.
19131da177e4SLinus Torvalds 		 */
19141da177e4SLinus Torvalds 		if (dbFreeDmap(bmp, dp, b, BPERDMAP)) {
19151da177e4SLinus Torvalds 			/* could not back out.  mark the file system
19161da177e4SLinus Torvalds 			 * to indicate that we have leaked blocks.
19171da177e4SLinus Torvalds 			 */
19181da177e4SLinus Torvalds 			release_metapage(mp);
1919eb8630d7SJoe Perches 			jfs_error(bmp->db_ipbmap->i_sb, "Block Leakage\n");
19201da177e4SLinus Torvalds 			continue;
19211da177e4SLinus Torvalds 		}
19221da177e4SLinus Torvalds 
19231da177e4SLinus Torvalds 		/* write the buffer.
19241da177e4SLinus Torvalds 		 */
19251da177e4SLinus Torvalds 		write_metapage(mp);
19261da177e4SLinus Torvalds 	}
19271da177e4SLinus Torvalds 
19281da177e4SLinus Torvalds 	return (rc);
19291da177e4SLinus Torvalds }
19301da177e4SLinus Torvalds 
19311da177e4SLinus Torvalds 
19321da177e4SLinus Torvalds /*
19331da177e4SLinus Torvalds  * NAME:	dbAllocDmapLev()
19341da177e4SLinus Torvalds  *
19351da177e4SLinus Torvalds  * FUNCTION:	attempt to allocate a specified number of contiguous blocks
19361da177e4SLinus Torvalds  *		from a specified dmap.
19371da177e4SLinus Torvalds  *
19381da177e4SLinus Torvalds  *		this routine checks if the contiguous blocks are available.
19391da177e4SLinus Torvalds  *		if so, nblocks of blocks are allocated; otherwise, ENOSPC is
19401da177e4SLinus Torvalds  *		returned.
19411da177e4SLinus Torvalds  *
19421da177e4SLinus Torvalds  * PARAMETERS:
19431da177e4SLinus Torvalds  *	mp	-  pointer to bmap descriptor
19441da177e4SLinus Torvalds  *	dp	-  pointer to dmap to attempt to allocate blocks from.
19451da177e4SLinus Torvalds  *	l2nb	-  log2 number of contiguous block desired.
19461da177e4SLinus Torvalds  *	nblocks	-  actual number of contiguous block desired.
19471da177e4SLinus Torvalds  *	results	-  on successful return, set to the starting block number
19481da177e4SLinus Torvalds  *		   of the newly allocated range.
19491da177e4SLinus Torvalds  *
19501da177e4SLinus Torvalds  * RETURN VALUES:
19511da177e4SLinus Torvalds  *	0	- success
19521da177e4SLinus Torvalds  *	-ENOSPC	- insufficient disk resources
19531da177e4SLinus Torvalds  *	-EIO	- i/o error
19541da177e4SLinus Torvalds  *
19551da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap), e.g., from dbAlloc(), or
19561da177e4SLinus Torvalds  *	IWRITE_LOCK(ipbmap), e.g., dbAllocCtl(), held on entry/exit;
19571da177e4SLinus Torvalds  */
19581da177e4SLinus Torvalds static int
dbAllocDmapLev(struct bmap * bmp,struct dmap * dp,int nblocks,int l2nb,s64 * results)19591da177e4SLinus Torvalds dbAllocDmapLev(struct bmap * bmp,
19601da177e4SLinus Torvalds 	       struct dmap * dp, int nblocks, int l2nb, s64 * results)
19611da177e4SLinus Torvalds {
19621da177e4SLinus Torvalds 	s64 blkno;
19631da177e4SLinus Torvalds 	int leafidx, rc;
19641da177e4SLinus Torvalds 
19651da177e4SLinus Torvalds 	/* can't be more than a dmaps worth of blocks */
19661da177e4SLinus Torvalds 	assert(l2nb <= L2BPERDMAP);
19671da177e4SLinus Torvalds 
19681da177e4SLinus Torvalds 	/* search the tree within the dmap page for sufficient
19691da177e4SLinus Torvalds 	 * free space.  if sufficient free space is found, dbFindLeaf()
19701da177e4SLinus Torvalds 	 * returns the index of the leaf at which free space was found.
19711da177e4SLinus Torvalds 	 */
197287c681abSManas Ghandat 	if (dbFindLeaf((dmtree_t *) &dp->tree, l2nb, &leafidx, false))
19731da177e4SLinus Torvalds 		return -ENOSPC;
19741da177e4SLinus Torvalds 
19754e302336SYogesh 	if (leafidx < 0)
19764e302336SYogesh 		return -EIO;
19774e302336SYogesh 
19781da177e4SLinus Torvalds 	/* determine the block number within the file system corresponding
19791da177e4SLinus Torvalds 	 * to the leaf at which free space was found.
19801da177e4SLinus Torvalds 	 */
19811da177e4SLinus Torvalds 	blkno = le64_to_cpu(dp->start) + (leafidx << L2DBWORD);
19821da177e4SLinus Torvalds 
19831da177e4SLinus Torvalds 	/* if not all bits of the dmap word are free, get the starting
19841da177e4SLinus Torvalds 	 * bit number within the dmap word of the required string of free
19851da177e4SLinus Torvalds 	 * bits and adjust the block number with this value.
19861da177e4SLinus Torvalds 	 */
19871da177e4SLinus Torvalds 	if (dp->tree.stree[leafidx + LEAFIND] < BUDMIN)
19881da177e4SLinus Torvalds 		blkno += dbFindBits(le32_to_cpu(dp->wmap[leafidx]), l2nb);
19891da177e4SLinus Torvalds 
19901da177e4SLinus Torvalds 	/* allocate the blocks */
19911da177e4SLinus Torvalds 	if ((rc = dbAllocDmap(bmp, dp, blkno, nblocks)) == 0)
19921da177e4SLinus Torvalds 		*results = blkno;
19931da177e4SLinus Torvalds 
19941da177e4SLinus Torvalds 	return (rc);
19951da177e4SLinus Torvalds }
19961da177e4SLinus Torvalds 
19971da177e4SLinus Torvalds 
19981da177e4SLinus Torvalds /*
19991da177e4SLinus Torvalds  * NAME:	dbAllocDmap()
20001da177e4SLinus Torvalds  *
20011da177e4SLinus Torvalds  * FUNCTION:	adjust the disk allocation map to reflect the allocation
20021da177e4SLinus Torvalds  *		of a specified block range within a dmap.
20031da177e4SLinus Torvalds  *
20041da177e4SLinus Torvalds  *		this routine allocates the specified blocks from the dmap
20051da177e4SLinus Torvalds  *		through a call to dbAllocBits(). if the allocation of the
20061da177e4SLinus Torvalds  *		block range causes the maximum string of free blocks within
20071da177e4SLinus Torvalds  *		the dmap to change (i.e. the value of the root of the dmap's
20081da177e4SLinus Torvalds  *		dmtree), this routine will cause this change to be reflected
20091da177e4SLinus Torvalds  *		up through the appropriate levels of the dmap control pages
20101da177e4SLinus Torvalds  *		by a call to dbAdjCtl() for the L0 dmap control page that
20111da177e4SLinus Torvalds  *		covers this dmap.
20121da177e4SLinus Torvalds  *
20131da177e4SLinus Torvalds  * PARAMETERS:
20141da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
20151da177e4SLinus Torvalds  *	dp	-  pointer to dmap to allocate the block range from.
20161da177e4SLinus Torvalds  *	blkno	-  starting block number of the block to be allocated.
20171da177e4SLinus Torvalds  *	nblocks	-  number of blocks to be allocated.
20181da177e4SLinus Torvalds  *
20191da177e4SLinus Torvalds  * RETURN VALUES:
20201da177e4SLinus Torvalds  *	0	- success
20211da177e4SLinus Torvalds  *	-EIO	- i/o error
20221da177e4SLinus Torvalds  *
20231da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
20241da177e4SLinus Torvalds  */
dbAllocDmap(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)20251da177e4SLinus Torvalds static int dbAllocDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
20261da177e4SLinus Torvalds 		       int nblocks)
20271da177e4SLinus Torvalds {
20281da177e4SLinus Torvalds 	s8 oldroot;
20291da177e4SLinus Torvalds 	int rc;
20301da177e4SLinus Torvalds 
20311da177e4SLinus Torvalds 	/* save the current value of the root (i.e. maximum free string)
20321da177e4SLinus Torvalds 	 * of the dmap tree.
20331da177e4SLinus Torvalds 	 */
20341da177e4SLinus Torvalds 	oldroot = dp->tree.stree[ROOT];
20351da177e4SLinus Torvalds 
20361da177e4SLinus Torvalds 	/* allocate the specified (blocks) bits */
20371da177e4SLinus Torvalds 	dbAllocBits(bmp, dp, blkno, nblocks);
20381da177e4SLinus Torvalds 
20391da177e4SLinus Torvalds 	/* if the root has not changed, done. */
20401da177e4SLinus Torvalds 	if (dp->tree.stree[ROOT] == oldroot)
20411da177e4SLinus Torvalds 		return (0);
20421da177e4SLinus Torvalds 
20431da177e4SLinus Torvalds 	/* root changed. bubble the change up to the dmap control pages.
20441da177e4SLinus Torvalds 	 * if the adjustment of the upper level control pages fails,
20451da177e4SLinus Torvalds 	 * backout the bit allocation (thus making everything consistent).
20461da177e4SLinus Torvalds 	 */
20471da177e4SLinus Torvalds 	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 1, 0)))
20481da177e4SLinus Torvalds 		dbFreeBits(bmp, dp, blkno, nblocks);
20491da177e4SLinus Torvalds 
20501da177e4SLinus Torvalds 	return (rc);
20511da177e4SLinus Torvalds }
20521da177e4SLinus Torvalds 
20531da177e4SLinus Torvalds 
20541da177e4SLinus Torvalds /*
20551da177e4SLinus Torvalds  * NAME:	dbFreeDmap()
20561da177e4SLinus Torvalds  *
20571da177e4SLinus Torvalds  * FUNCTION:	adjust the disk allocation map to reflect the allocation
20581da177e4SLinus Torvalds  *		of a specified block range within a dmap.
20591da177e4SLinus Torvalds  *
20601da177e4SLinus Torvalds  *		this routine frees the specified blocks from the dmap through
20611da177e4SLinus Torvalds  *		a call to dbFreeBits(). if the deallocation of the block range
20621da177e4SLinus Torvalds  *		causes the maximum string of free blocks within the dmap to
20631da177e4SLinus Torvalds  *		change (i.e. the value of the root of the dmap's dmtree), this
20641da177e4SLinus Torvalds  *		routine will cause this change to be reflected up through the
20651da177e4SLinus Torvalds  *		appropriate levels of the dmap control pages by a call to
20661da177e4SLinus Torvalds  *		dbAdjCtl() for the L0 dmap control page that covers this dmap.
20671da177e4SLinus Torvalds  *
20681da177e4SLinus Torvalds  * PARAMETERS:
20691da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
20701da177e4SLinus Torvalds  *	dp	-  pointer to dmap to free the block range from.
20711da177e4SLinus Torvalds  *	blkno	-  starting block number of the block to be freed.
20721da177e4SLinus Torvalds  *	nblocks	-  number of blocks to be freed.
20731da177e4SLinus Torvalds  *
20741da177e4SLinus Torvalds  * RETURN VALUES:
20751da177e4SLinus Torvalds  *	0	- success
20761da177e4SLinus Torvalds  *	-EIO	- i/o error
20771da177e4SLinus Torvalds  *
20781da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
20791da177e4SLinus Torvalds  */
dbFreeDmap(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)20801da177e4SLinus Torvalds static int dbFreeDmap(struct bmap * bmp, struct dmap * dp, s64 blkno,
20811da177e4SLinus Torvalds 		      int nblocks)
20821da177e4SLinus Torvalds {
20831da177e4SLinus Torvalds 	s8 oldroot;
208456d12549SDave Kleikamp 	int rc = 0, word;
20851da177e4SLinus Torvalds 
20861da177e4SLinus Torvalds 	/* save the current value of the root (i.e. maximum free string)
20871da177e4SLinus Torvalds 	 * of the dmap tree.
20881da177e4SLinus Torvalds 	 */
20891da177e4SLinus Torvalds 	oldroot = dp->tree.stree[ROOT];
20901da177e4SLinus Torvalds 
20911da177e4SLinus Torvalds 	/* free the specified (blocks) bits */
209256d12549SDave Kleikamp 	rc = dbFreeBits(bmp, dp, blkno, nblocks);
20931da177e4SLinus Torvalds 
209456d12549SDave Kleikamp 	/* if error or the root has not changed, done. */
209556d12549SDave Kleikamp 	if (rc || (dp->tree.stree[ROOT] == oldroot))
209656d12549SDave Kleikamp 		return (rc);
20971da177e4SLinus Torvalds 
20981da177e4SLinus Torvalds 	/* root changed. bubble the change up to the dmap control pages.
20991da177e4SLinus Torvalds 	 * if the adjustment of the upper level control pages fails,
21001da177e4SLinus Torvalds 	 * backout the deallocation.
21011da177e4SLinus Torvalds 	 */
21021da177e4SLinus Torvalds 	if ((rc = dbAdjCtl(bmp, blkno, dp->tree.stree[ROOT], 0, 0))) {
21031da177e4SLinus Torvalds 		word = (blkno & (BPERDMAP - 1)) >> L2DBWORD;
21041da177e4SLinus Torvalds 
21051da177e4SLinus Torvalds 		/* as part of backing out the deallocation, we will have
21061da177e4SLinus Torvalds 		 * to back split the dmap tree if the deallocation caused
21071da177e4SLinus Torvalds 		 * the freed blocks to become part of a larger binary buddy
21081da177e4SLinus Torvalds 		 * system.
21091da177e4SLinus Torvalds 		 */
21101da177e4SLinus Torvalds 		if (dp->tree.stree[word] == NOFREE)
21112e16a138SManas Ghandat 			dbBackSplit((dmtree_t *)&dp->tree, word, false);
21121da177e4SLinus Torvalds 
21131da177e4SLinus Torvalds 		dbAllocBits(bmp, dp, blkno, nblocks);
21141da177e4SLinus Torvalds 	}
21151da177e4SLinus Torvalds 
21161da177e4SLinus Torvalds 	return (rc);
21171da177e4SLinus Torvalds }
21181da177e4SLinus Torvalds 
21191da177e4SLinus Torvalds 
21201da177e4SLinus Torvalds /*
21211da177e4SLinus Torvalds  * NAME:	dbAllocBits()
21221da177e4SLinus Torvalds  *
21231da177e4SLinus Torvalds  * FUNCTION:	allocate a specified block range from a dmap.
21241da177e4SLinus Torvalds  *
21251da177e4SLinus Torvalds  *		this routine updates the dmap to reflect the working
21261da177e4SLinus Torvalds  *		state allocation of the specified block range. it directly
21271da177e4SLinus Torvalds  *		updates the bits of the working map and causes the adjustment
21281da177e4SLinus Torvalds  *		of the binary buddy system described by the dmap's dmtree
21291da177e4SLinus Torvalds  *		leaves to reflect the bits allocated.  it also causes the
21301da177e4SLinus Torvalds  *		dmap's dmtree, as a whole, to reflect the allocated range.
21311da177e4SLinus Torvalds  *
21321da177e4SLinus Torvalds  * PARAMETERS:
21331da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
21341da177e4SLinus Torvalds  *	dp	-  pointer to dmap to allocate bits from.
21351da177e4SLinus Torvalds  *	blkno	-  starting block number of the bits to be allocated.
21361da177e4SLinus Torvalds  *	nblocks	-  number of bits to be allocated.
21371da177e4SLinus Torvalds  *
21381da177e4SLinus Torvalds  * RETURN VALUES: none
21391da177e4SLinus Torvalds  *
21401da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
21411da177e4SLinus Torvalds  */
dbAllocBits(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)21421da177e4SLinus Torvalds static void dbAllocBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
21431da177e4SLinus Torvalds 			int nblocks)
21441da177e4SLinus Torvalds {
21451da177e4SLinus Torvalds 	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
21461da177e4SLinus Torvalds 	dmtree_t *tp = (dmtree_t *) & dp->tree;
21471da177e4SLinus Torvalds 	int size;
21481da177e4SLinus Torvalds 	s8 *leaf;
21491da177e4SLinus Torvalds 
21501da177e4SLinus Torvalds 	/* pick up a pointer to the leaves of the dmap tree */
21511da177e4SLinus Torvalds 	leaf = dp->tree.stree + LEAFIND;
21521da177e4SLinus Torvalds 
21531da177e4SLinus Torvalds 	/* determine the bit number and word within the dmap of the
21541da177e4SLinus Torvalds 	 * starting block.
21551da177e4SLinus Torvalds 	 */
21561da177e4SLinus Torvalds 	dbitno = blkno & (BPERDMAP - 1);
21571da177e4SLinus Torvalds 	word = dbitno >> L2DBWORD;
21581da177e4SLinus Torvalds 
21591da177e4SLinus Torvalds 	/* block range better be within the dmap */
21601da177e4SLinus Torvalds 	assert(dbitno + nblocks <= BPERDMAP);
21611da177e4SLinus Torvalds 
21621da177e4SLinus Torvalds 	/* allocate the bits of the dmap's words corresponding to the block
21631da177e4SLinus Torvalds 	 * range. not all bits of the first and last words may be contained
21641da177e4SLinus Torvalds 	 * within the block range.  if this is the case, we'll work against
21651da177e4SLinus Torvalds 	 * those words (i.e. partial first and/or last) on an individual basis
21661da177e4SLinus Torvalds 	 * (a single pass), allocating the bits of interest by hand and
21671da177e4SLinus Torvalds 	 * updating the leaf corresponding to the dmap word. a single pass
21681da177e4SLinus Torvalds 	 * will be used for all dmap words fully contained within the
21691da177e4SLinus Torvalds 	 * specified range.  within this pass, the bits of all fully contained
21701da177e4SLinus Torvalds 	 * dmap words will be marked as free in a single shot and the leaves
21711da177e4SLinus Torvalds 	 * will be updated. a single leaf may describe the free space of
21721da177e4SLinus Torvalds 	 * multiple dmap words, so we may update only a subset of the actual
21731da177e4SLinus Torvalds 	 * leaves corresponding to the dmap words of the block range.
21741da177e4SLinus Torvalds 	 */
21751da177e4SLinus Torvalds 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
21761da177e4SLinus Torvalds 		/* determine the bit number within the word and
21771da177e4SLinus Torvalds 		 * the number of bits within the word.
21781da177e4SLinus Torvalds 		 */
21791da177e4SLinus Torvalds 		wbitno = dbitno & (DBWORD - 1);
21801da177e4SLinus Torvalds 		nb = min(rembits, DBWORD - wbitno);
21811da177e4SLinus Torvalds 
21821da177e4SLinus Torvalds 		/* check if only part of a word is to be allocated.
21831da177e4SLinus Torvalds 		 */
21841da177e4SLinus Torvalds 		if (nb < DBWORD) {
21851da177e4SLinus Torvalds 			/* allocate (set to 1) the appropriate bits within
21861da177e4SLinus Torvalds 			 * this dmap word.
21871da177e4SLinus Torvalds 			 */
21881da177e4SLinus Torvalds 			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
21891da177e4SLinus Torvalds 						      >> wbitno);
21901da177e4SLinus Torvalds 
21911da177e4SLinus Torvalds 			/* update the leaf for this dmap word. in addition
21921da177e4SLinus Torvalds 			 * to setting the leaf value to the binary buddy max
21931da177e4SLinus Torvalds 			 * of the updated dmap word, dbSplit() will split
21941da177e4SLinus Torvalds 			 * the binary system of the leaves if need be.
21951da177e4SLinus Torvalds 			 */
21961da177e4SLinus Torvalds 			dbSplit(tp, word, BUDMIN,
21972e16a138SManas Ghandat 				dbMaxBud((u8 *)&dp->wmap[word]), false);
21981da177e4SLinus Torvalds 
21991da177e4SLinus Torvalds 			word += 1;
22001da177e4SLinus Torvalds 		} else {
22011da177e4SLinus Torvalds 			/* one or more dmap words are fully contained
22021da177e4SLinus Torvalds 			 * within the block range.  determine how many
22031da177e4SLinus Torvalds 			 * words and allocate (set to 1) the bits of these
22041da177e4SLinus Torvalds 			 * words.
22051da177e4SLinus Torvalds 			 */
22061da177e4SLinus Torvalds 			nwords = rembits >> L2DBWORD;
22071da177e4SLinus Torvalds 			memset(&dp->wmap[word], (int) ONES, nwords * 4);
22081da177e4SLinus Torvalds 
22091da177e4SLinus Torvalds 			/* determine how many bits.
22101da177e4SLinus Torvalds 			 */
22111da177e4SLinus Torvalds 			nb = nwords << L2DBWORD;
22121da177e4SLinus Torvalds 
22131da177e4SLinus Torvalds 			/* now update the appropriate leaves to reflect
22141da177e4SLinus Torvalds 			 * the allocated words.
22151da177e4SLinus Torvalds 			 */
22161da177e4SLinus Torvalds 			for (; nwords > 0; nwords -= nw) {
22171da177e4SLinus Torvalds 				if (leaf[word] < BUDMIN) {
22181da177e4SLinus Torvalds 					jfs_error(bmp->db_ipbmap->i_sb,
2219eb8630d7SJoe Perches 						  "leaf page corrupt\n");
22201da177e4SLinus Torvalds 					break;
22211da177e4SLinus Torvalds 				}
22221da177e4SLinus Torvalds 
22231da177e4SLinus Torvalds 				/* determine what the leaf value should be
22241da177e4SLinus Torvalds 				 * updated to as the minimum of the l2 number
22251da177e4SLinus Torvalds 				 * of bits being allocated and the l2 number
22261da177e4SLinus Torvalds 				 * of bits currently described by this leaf.
22271da177e4SLinus Torvalds 				 */
22284f65b6dbSFabian Frederick 				size = min_t(int, leaf[word],
22294f65b6dbSFabian Frederick 					     NLSTOL2BSZ(nwords));
22301da177e4SLinus Torvalds 
22311da177e4SLinus Torvalds 				/* update the leaf to reflect the allocation.
22321da177e4SLinus Torvalds 				 * in addition to setting the leaf value to
22331da177e4SLinus Torvalds 				 * NOFREE, dbSplit() will split the binary
22341da177e4SLinus Torvalds 				 * system of the leaves to reflect the current
22351da177e4SLinus Torvalds 				 * allocation (size).
22361da177e4SLinus Torvalds 				 */
22372e16a138SManas Ghandat 				dbSplit(tp, word, size, NOFREE, false);
22381da177e4SLinus Torvalds 
22391da177e4SLinus Torvalds 				/* get the number of dmap words handled */
22401da177e4SLinus Torvalds 				nw = BUDSIZE(size, BUDMIN);
22411da177e4SLinus Torvalds 				word += nw;
22421da177e4SLinus Torvalds 			}
22431da177e4SLinus Torvalds 		}
22441da177e4SLinus Torvalds 	}
22451da177e4SLinus Torvalds 
22461da177e4SLinus Torvalds 	/* update the free count for this dmap */
224789145622SMarcin Slusarz 	le32_add_cpu(&dp->nfree, -nblocks);
22481da177e4SLinus Torvalds 
22491da177e4SLinus Torvalds 	BMAP_LOCK(bmp);
22501da177e4SLinus Torvalds 
22511da177e4SLinus Torvalds 	/* if this allocation group is completely free,
22521da177e4SLinus Torvalds 	 * update the maximum allocation group number if this allocation
22531da177e4SLinus Torvalds 	 * group is the new max.
22541da177e4SLinus Torvalds 	 */
22551da177e4SLinus Torvalds 	agno = blkno >> bmp->db_agl2size;
22561da177e4SLinus Torvalds 	if (agno > bmp->db_maxag)
22571da177e4SLinus Torvalds 		bmp->db_maxag = agno;
22581da177e4SLinus Torvalds 
22591da177e4SLinus Torvalds 	/* update the free count for the allocation group and map */
22601da177e4SLinus Torvalds 	bmp->db_agfree[agno] -= nblocks;
22611da177e4SLinus Torvalds 	bmp->db_nfree -= nblocks;
22621da177e4SLinus Torvalds 
22631da177e4SLinus Torvalds 	BMAP_UNLOCK(bmp);
22641da177e4SLinus Torvalds }
22651da177e4SLinus Torvalds 
22661da177e4SLinus Torvalds 
22671da177e4SLinus Torvalds /*
22681da177e4SLinus Torvalds  * NAME:	dbFreeBits()
22691da177e4SLinus Torvalds  *
22701da177e4SLinus Torvalds  * FUNCTION:	free a specified block range from a dmap.
22711da177e4SLinus Torvalds  *
22721da177e4SLinus Torvalds  *		this routine updates the dmap to reflect the working
22731da177e4SLinus Torvalds  *		state allocation of the specified block range. it directly
22741da177e4SLinus Torvalds  *		updates the bits of the working map and causes the adjustment
22751da177e4SLinus Torvalds  *		of the binary buddy system described by the dmap's dmtree
22761da177e4SLinus Torvalds  *		leaves to reflect the bits freed.  it also causes the dmap's
22771da177e4SLinus Torvalds  *		dmtree, as a whole, to reflect the deallocated range.
22781da177e4SLinus Torvalds  *
22791da177e4SLinus Torvalds  * PARAMETERS:
22801da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
22811da177e4SLinus Torvalds  *	dp	-  pointer to dmap to free bits from.
22821da177e4SLinus Torvalds  *	blkno	-  starting block number of the bits to be freed.
22831da177e4SLinus Torvalds  *	nblocks	-  number of bits to be freed.
22841da177e4SLinus Torvalds  *
228556d12549SDave Kleikamp  * RETURN VALUES: 0 for success
22861da177e4SLinus Torvalds  *
22871da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
22881da177e4SLinus Torvalds  */
dbFreeBits(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)228956d12549SDave Kleikamp static int dbFreeBits(struct bmap * bmp, struct dmap * dp, s64 blkno,
22901da177e4SLinus Torvalds 		       int nblocks)
22911da177e4SLinus Torvalds {
22921da177e4SLinus Torvalds 	int dbitno, word, rembits, nb, nwords, wbitno, nw, agno;
22931da177e4SLinus Torvalds 	dmtree_t *tp = (dmtree_t *) & dp->tree;
229456d12549SDave Kleikamp 	int rc = 0;
22951da177e4SLinus Torvalds 	int size;
22961da177e4SLinus Torvalds 
22971da177e4SLinus Torvalds 	/* determine the bit number and word within the dmap of the
22981da177e4SLinus Torvalds 	 * starting block.
22991da177e4SLinus Torvalds 	 */
23001da177e4SLinus Torvalds 	dbitno = blkno & (BPERDMAP - 1);
23011da177e4SLinus Torvalds 	word = dbitno >> L2DBWORD;
23021da177e4SLinus Torvalds 
23031da177e4SLinus Torvalds 	/* block range better be within the dmap.
23041da177e4SLinus Torvalds 	 */
23051da177e4SLinus Torvalds 	assert(dbitno + nblocks <= BPERDMAP);
23061da177e4SLinus Torvalds 
23071da177e4SLinus Torvalds 	/* free the bits of the dmaps words corresponding to the block range.
23081da177e4SLinus Torvalds 	 * not all bits of the first and last words may be contained within
23091da177e4SLinus Torvalds 	 * the block range.  if this is the case, we'll work against those
23101da177e4SLinus Torvalds 	 * words (i.e. partial first and/or last) on an individual basis
23111da177e4SLinus Torvalds 	 * (a single pass), freeing the bits of interest by hand and updating
23121da177e4SLinus Torvalds 	 * the leaf corresponding to the dmap word. a single pass will be used
23131da177e4SLinus Torvalds 	 * for all dmap words fully contained within the specified range.
23141da177e4SLinus Torvalds 	 * within this pass, the bits of all fully contained dmap words will
23151da177e4SLinus Torvalds 	 * be marked as free in a single shot and the leaves will be updated. a
23161da177e4SLinus Torvalds 	 * single leaf may describe the free space of multiple dmap words,
23171da177e4SLinus Torvalds 	 * so we may update only a subset of the actual leaves corresponding
23181da177e4SLinus Torvalds 	 * to the dmap words of the block range.
23191da177e4SLinus Torvalds 	 *
23201da177e4SLinus Torvalds 	 * dbJoin() is used to update leaf values and will join the binary
23211da177e4SLinus Torvalds 	 * buddy system of the leaves if the new leaf values indicate this
23221da177e4SLinus Torvalds 	 * should be done.
23231da177e4SLinus Torvalds 	 */
23241da177e4SLinus Torvalds 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
23251da177e4SLinus Torvalds 		/* determine the bit number within the word and
23261da177e4SLinus Torvalds 		 * the number of bits within the word.
23271da177e4SLinus Torvalds 		 */
23281da177e4SLinus Torvalds 		wbitno = dbitno & (DBWORD - 1);
23291da177e4SLinus Torvalds 		nb = min(rembits, DBWORD - wbitno);
23301da177e4SLinus Torvalds 
23311da177e4SLinus Torvalds 		/* check if only part of a word is to be freed.
23321da177e4SLinus Torvalds 		 */
23331da177e4SLinus Torvalds 		if (nb < DBWORD) {
23341da177e4SLinus Torvalds 			/* free (zero) the appropriate bits within this
23351da177e4SLinus Torvalds 			 * dmap word.
23361da177e4SLinus Torvalds 			 */
23371da177e4SLinus Torvalds 			dp->wmap[word] &=
23381da177e4SLinus Torvalds 			    cpu_to_le32(~(ONES << (DBWORD - nb)
23391da177e4SLinus Torvalds 					  >> wbitno));
23401da177e4SLinus Torvalds 
23411da177e4SLinus Torvalds 			/* update the leaf for this dmap word.
23421da177e4SLinus Torvalds 			 */
234356d12549SDave Kleikamp 			rc = dbJoin(tp, word,
23442e16a138SManas Ghandat 				    dbMaxBud((u8 *)&dp->wmap[word]), false);
234556d12549SDave Kleikamp 			if (rc)
234656d12549SDave Kleikamp 				return rc;
23471da177e4SLinus Torvalds 
23481da177e4SLinus Torvalds 			word += 1;
23491da177e4SLinus Torvalds 		} else {
23501da177e4SLinus Torvalds 			/* one or more dmap words are fully contained
23511da177e4SLinus Torvalds 			 * within the block range.  determine how many
23521da177e4SLinus Torvalds 			 * words and free (zero) the bits of these words.
23531da177e4SLinus Torvalds 			 */
23541da177e4SLinus Torvalds 			nwords = rembits >> L2DBWORD;
23551da177e4SLinus Torvalds 			memset(&dp->wmap[word], 0, nwords * 4);
23561da177e4SLinus Torvalds 
23571da177e4SLinus Torvalds 			/* determine how many bits.
23581da177e4SLinus Torvalds 			 */
23591da177e4SLinus Torvalds 			nb = nwords << L2DBWORD;
23601da177e4SLinus Torvalds 
23611da177e4SLinus Torvalds 			/* now update the appropriate leaves to reflect
23621da177e4SLinus Torvalds 			 * the freed words.
23631da177e4SLinus Torvalds 			 */
23641da177e4SLinus Torvalds 			for (; nwords > 0; nwords -= nw) {
23651da177e4SLinus Torvalds 				/* determine what the leaf value should be
23661da177e4SLinus Torvalds 				 * updated to as the minimum of the l2 number
23671da177e4SLinus Torvalds 				 * of bits being freed and the l2 (max) number
23681da177e4SLinus Torvalds 				 * of bits that can be described by this leaf.
23691da177e4SLinus Torvalds 				 */
23701da177e4SLinus Torvalds 				size =
23711da177e4SLinus Torvalds 				    min(LITOL2BSZ
23721da177e4SLinus Torvalds 					(word, L2LPERDMAP, BUDMIN),
23731da177e4SLinus Torvalds 					NLSTOL2BSZ(nwords));
23741da177e4SLinus Torvalds 
23751da177e4SLinus Torvalds 				/* update the leaf.
23761da177e4SLinus Torvalds 				 */
23772e16a138SManas Ghandat 				rc = dbJoin(tp, word, size, false);
237856d12549SDave Kleikamp 				if (rc)
237956d12549SDave Kleikamp 					return rc;
23801da177e4SLinus Torvalds 
23811da177e4SLinus Torvalds 				/* get the number of dmap words handled.
23821da177e4SLinus Torvalds 				 */
23831da177e4SLinus Torvalds 				nw = BUDSIZE(size, BUDMIN);
23841da177e4SLinus Torvalds 				word += nw;
23851da177e4SLinus Torvalds 			}
23861da177e4SLinus Torvalds 		}
23871da177e4SLinus Torvalds 	}
23881da177e4SLinus Torvalds 
23891da177e4SLinus Torvalds 	/* update the free count for this dmap.
23901da177e4SLinus Torvalds 	 */
239189145622SMarcin Slusarz 	le32_add_cpu(&dp->nfree, nblocks);
23921da177e4SLinus Torvalds 
23931da177e4SLinus Torvalds 	BMAP_LOCK(bmp);
23941da177e4SLinus Torvalds 
23951da177e4SLinus Torvalds 	/* update the free count for the allocation group and
23961da177e4SLinus Torvalds 	 * map.
23971da177e4SLinus Torvalds 	 */
23981da177e4SLinus Torvalds 	agno = blkno >> bmp->db_agl2size;
23991da177e4SLinus Torvalds 	bmp->db_nfree += nblocks;
24001da177e4SLinus Torvalds 	bmp->db_agfree[agno] += nblocks;
24011da177e4SLinus Torvalds 
24021da177e4SLinus Torvalds 	/* check if this allocation group is not completely free and
24031da177e4SLinus Torvalds 	 * if it is currently the maximum (rightmost) allocation group.
24041da177e4SLinus Torvalds 	 * if so, establish the new maximum allocation group number by
24051da177e4SLinus Torvalds 	 * searching left for the first allocation group with allocation.
24061da177e4SLinus Torvalds 	 */
24071da177e4SLinus Torvalds 	if ((bmp->db_agfree[agno] == bmp->db_agsize && agno == bmp->db_maxag) ||
24081da177e4SLinus Torvalds 	    (agno == bmp->db_numag - 1 &&
24091da177e4SLinus Torvalds 	     bmp->db_agfree[agno] == (bmp-> db_mapsize & (BPERDMAP - 1)))) {
24101da177e4SLinus Torvalds 		while (bmp->db_maxag > 0) {
24111da177e4SLinus Torvalds 			bmp->db_maxag -= 1;
24121da177e4SLinus Torvalds 			if (bmp->db_agfree[bmp->db_maxag] !=
24131da177e4SLinus Torvalds 			    bmp->db_agsize)
24141da177e4SLinus Torvalds 				break;
24151da177e4SLinus Torvalds 		}
24161da177e4SLinus Torvalds 
24171da177e4SLinus Torvalds 		/* re-establish the allocation group preference if the
24181da177e4SLinus Torvalds 		 * current preference is right of the maximum allocation
24191da177e4SLinus Torvalds 		 * group.
24201da177e4SLinus Torvalds 		 */
24211da177e4SLinus Torvalds 		if (bmp->db_agpref > bmp->db_maxag)
24221da177e4SLinus Torvalds 			bmp->db_agpref = bmp->db_maxag;
24231da177e4SLinus Torvalds 	}
24241da177e4SLinus Torvalds 
24251da177e4SLinus Torvalds 	BMAP_UNLOCK(bmp);
242656d12549SDave Kleikamp 
242756d12549SDave Kleikamp 	return 0;
24281da177e4SLinus Torvalds }
24291da177e4SLinus Torvalds 
24301da177e4SLinus Torvalds 
24311da177e4SLinus Torvalds /*
24321da177e4SLinus Torvalds  * NAME:	dbAdjCtl()
24331da177e4SLinus Torvalds  *
24341da177e4SLinus Torvalds  * FUNCTION:	adjust a dmap control page at a specified level to reflect
24351da177e4SLinus Torvalds  *		the change in a lower level dmap or dmap control page's
24361da177e4SLinus Torvalds  *		maximum string of free blocks (i.e. a change in the root
24371da177e4SLinus Torvalds  *		of the lower level object's dmtree) due to the allocation
24381da177e4SLinus Torvalds  *		or deallocation of a range of blocks with a single dmap.
24391da177e4SLinus Torvalds  *
24401da177e4SLinus Torvalds  *		on entry, this routine is provided with the new value of
24411da177e4SLinus Torvalds  *		the lower level dmap or dmap control page root and the
24421da177e4SLinus Torvalds  *		starting block number of the block range whose allocation
24431da177e4SLinus Torvalds  *		or deallocation resulted in the root change.  this range
24441da177e4SLinus Torvalds  *		is respresented by a single leaf of the current dmapctl
24451da177e4SLinus Torvalds  *		and the leaf will be updated with this value, possibly
24461da177e4SLinus Torvalds  *		causing a binary buddy system within the leaves to be
24471da177e4SLinus Torvalds  *		split or joined.  the update may also cause the dmapctl's
24481da177e4SLinus Torvalds  *		dmtree to be updated.
24491da177e4SLinus Torvalds  *
24501da177e4SLinus Torvalds  *		if the adjustment of the dmap control page, itself, causes its
24511da177e4SLinus Torvalds  *		root to change, this change will be bubbled up to the next dmap
24521da177e4SLinus Torvalds  *		control level by a recursive call to this routine, specifying
24531da177e4SLinus Torvalds  *		the new root value and the next dmap control page level to
24541da177e4SLinus Torvalds  *		be adjusted.
24551da177e4SLinus Torvalds  * PARAMETERS:
24561da177e4SLinus Torvalds  *	bmp	-  pointer to bmap descriptor
24571da177e4SLinus Torvalds  *	blkno	-  the first block of a block range within a dmap.  it is
24581da177e4SLinus Torvalds  *		   the allocation or deallocation of this block range that
24591da177e4SLinus Torvalds  *		   requires the dmap control page to be adjusted.
24601da177e4SLinus Torvalds  *	newval	-  the new value of the lower level dmap or dmap control
24611da177e4SLinus Torvalds  *		   page root.
24624d81715fSRichard Knutsson  *	alloc	-  'true' if adjustment is due to an allocation.
24631da177e4SLinus Torvalds  *	level	-  current level of dmap control page (i.e. L0, L1, L2) to
24641da177e4SLinus Torvalds  *		   be adjusted.
24651da177e4SLinus Torvalds  *
24661da177e4SLinus Torvalds  * RETURN VALUES:
24671da177e4SLinus Torvalds  *	0	- success
24681da177e4SLinus Torvalds  *	-EIO	- i/o error
24691da177e4SLinus Torvalds  *
24701da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
24711da177e4SLinus Torvalds  */
24721da177e4SLinus Torvalds static int
dbAdjCtl(struct bmap * bmp,s64 blkno,int newval,int alloc,int level)24731da177e4SLinus Torvalds dbAdjCtl(struct bmap * bmp, s64 blkno, int newval, int alloc, int level)
24741da177e4SLinus Torvalds {
24751da177e4SLinus Torvalds 	struct metapage *mp;
24761da177e4SLinus Torvalds 	s8 oldroot;
24771da177e4SLinus Torvalds 	int oldval;
24781da177e4SLinus Torvalds 	s64 lblkno;
24791da177e4SLinus Torvalds 	struct dmapctl *dcp;
24801da177e4SLinus Torvalds 	int rc, leafno, ti;
24811da177e4SLinus Torvalds 
24821da177e4SLinus Torvalds 	/* get the buffer for the dmap control page for the specified
24831da177e4SLinus Torvalds 	 * block number and control page level.
24841da177e4SLinus Torvalds 	 */
24851da177e4SLinus Torvalds 	lblkno = BLKTOCTL(blkno, bmp->db_l2nbperpage, level);
24861da177e4SLinus Torvalds 	mp = read_metapage(bmp->db_ipbmap, lblkno, PSIZE, 0);
24871da177e4SLinus Torvalds 	if (mp == NULL)
24881da177e4SLinus Torvalds 		return -EIO;
24891da177e4SLinus Torvalds 	dcp = (struct dmapctl *) mp->data;
24901da177e4SLinus Torvalds 
24911da177e4SLinus Torvalds 	if (dcp->leafidx != cpu_to_le32(CTLLEAFIND)) {
2492eb8630d7SJoe Perches 		jfs_error(bmp->db_ipbmap->i_sb, "Corrupt dmapctl page\n");
24931da177e4SLinus Torvalds 		release_metapage(mp);
24941da177e4SLinus Torvalds 		return -EIO;
24951da177e4SLinus Torvalds 	}
24961da177e4SLinus Torvalds 
24971da177e4SLinus Torvalds 	/* determine the leaf number corresponding to the block and
24981da177e4SLinus Torvalds 	 * the index within the dmap control tree.
24991da177e4SLinus Torvalds 	 */
25001da177e4SLinus Torvalds 	leafno = BLKTOCTLLEAF(blkno, dcp->budmin);
25011da177e4SLinus Torvalds 	ti = leafno + le32_to_cpu(dcp->leafidx);
25021da177e4SLinus Torvalds 
25031da177e4SLinus Torvalds 	/* save the current leaf value and the current root level (i.e.
25041da177e4SLinus Torvalds 	 * maximum l2 free string described by this dmapctl).
25051da177e4SLinus Torvalds 	 */
25061da177e4SLinus Torvalds 	oldval = dcp->stree[ti];
25071da177e4SLinus Torvalds 	oldroot = dcp->stree[ROOT];
25081da177e4SLinus Torvalds 
25091da177e4SLinus Torvalds 	/* check if this is a control page update for an allocation.
25101da177e4SLinus Torvalds 	 * if so, update the leaf to reflect the new leaf value using
251188393161SThomas Weber 	 * dbSplit(); otherwise (deallocation), use dbJoin() to update
25121da177e4SLinus Torvalds 	 * the leaf with the new value.  in addition to updating the
25131da177e4SLinus Torvalds 	 * leaf, dbSplit() will also split the binary buddy system of
25141da177e4SLinus Torvalds 	 * the leaves, if required, and bubble new values within the
25151da177e4SLinus Torvalds 	 * dmapctl tree, if required.  similarly, dbJoin() will join
25161da177e4SLinus Torvalds 	 * the binary buddy system of leaves and bubble new values up
25171da177e4SLinus Torvalds 	 * the dmapctl tree as required by the new leaf value.
25181da177e4SLinus Torvalds 	 */
25191da177e4SLinus Torvalds 	if (alloc) {
25201da177e4SLinus Torvalds 		/* check if we are in the middle of a binary buddy
25211da177e4SLinus Torvalds 		 * system.  this happens when we are performing the
25221da177e4SLinus Torvalds 		 * first allocation out of an allocation group that
25231da177e4SLinus Torvalds 		 * is part (not the first part) of a larger binary
25241da177e4SLinus Torvalds 		 * buddy system.  if we are in the middle, back split
25251da177e4SLinus Torvalds 		 * the system prior to calling dbSplit() which assumes
25261da177e4SLinus Torvalds 		 * that it is at the front of a binary buddy system.
25271da177e4SLinus Torvalds 		 */
25281da177e4SLinus Torvalds 		if (oldval == NOFREE) {
25292e16a138SManas Ghandat 			rc = dbBackSplit((dmtree_t *)dcp, leafno, true);
2530751341b4SDinghao Liu 			if (rc) {
2531751341b4SDinghao Liu 				release_metapage(mp);
2532b6a47fd8SDave Kleikamp 				return rc;
2533751341b4SDinghao Liu 			}
25341da177e4SLinus Torvalds 			oldval = dcp->stree[ti];
25351da177e4SLinus Torvalds 		}
25362e16a138SManas Ghandat 		dbSplit((dmtree_t *) dcp, leafno, dcp->budmin, newval, true);
25371da177e4SLinus Torvalds 	} else {
25382e16a138SManas Ghandat 		rc = dbJoin((dmtree_t *) dcp, leafno, newval, true);
2539751341b4SDinghao Liu 		if (rc) {
2540751341b4SDinghao Liu 			release_metapage(mp);
254156d12549SDave Kleikamp 			return rc;
25421da177e4SLinus Torvalds 		}
2543751341b4SDinghao Liu 	}
25441da177e4SLinus Torvalds 
25451da177e4SLinus Torvalds 	/* check if the root of the current dmap control page changed due
25461da177e4SLinus Torvalds 	 * to the update and if the current dmap control page is not at
25471da177e4SLinus Torvalds 	 * the current top level (i.e. L0, L1, L2) of the map.  if so (i.e.
25481da177e4SLinus Torvalds 	 * root changed and this is not the top level), call this routine
25491da177e4SLinus Torvalds 	 * again (recursion) for the next higher level of the mapping to
25501da177e4SLinus Torvalds 	 * reflect the change in root for the current dmap control page.
25511da177e4SLinus Torvalds 	 */
25521da177e4SLinus Torvalds 	if (dcp->stree[ROOT] != oldroot) {
25531da177e4SLinus Torvalds 		/* are we below the top level of the map.  if so,
25541da177e4SLinus Torvalds 		 * bubble the root up to the next higher level.
25551da177e4SLinus Torvalds 		 */
25561da177e4SLinus Torvalds 		if (level < bmp->db_maxlevel) {
25571da177e4SLinus Torvalds 			/* bubble up the new root of this dmap control page to
25581da177e4SLinus Torvalds 			 * the next level.
25591da177e4SLinus Torvalds 			 */
25601da177e4SLinus Torvalds 			if ((rc =
25611da177e4SLinus Torvalds 			     dbAdjCtl(bmp, blkno, dcp->stree[ROOT], alloc,
25621da177e4SLinus Torvalds 				      level + 1))) {
25631da177e4SLinus Torvalds 				/* something went wrong in bubbling up the new
25641da177e4SLinus Torvalds 				 * root value, so backout the changes to the
25651da177e4SLinus Torvalds 				 * current dmap control page.
25661da177e4SLinus Torvalds 				 */
25671da177e4SLinus Torvalds 				if (alloc) {
25681da177e4SLinus Torvalds 					dbJoin((dmtree_t *) dcp, leafno,
25692e16a138SManas Ghandat 					       oldval, true);
25701da177e4SLinus Torvalds 				} else {
25711da177e4SLinus Torvalds 					/* the dbJoin() above might have
25721da177e4SLinus Torvalds 					 * caused a larger binary buddy system
25731da177e4SLinus Torvalds 					 * to form and we may now be in the
25741da177e4SLinus Torvalds 					 * middle of it.  if this is the case,
25751da177e4SLinus Torvalds 					 * back split the buddies.
25761da177e4SLinus Torvalds 					 */
25771da177e4SLinus Torvalds 					if (dcp->stree[ti] == NOFREE)
25781da177e4SLinus Torvalds 						dbBackSplit((dmtree_t *)
25792e16a138SManas Ghandat 							    dcp, leafno, true);
25801da177e4SLinus Torvalds 					dbSplit((dmtree_t *) dcp, leafno,
25812e16a138SManas Ghandat 						dcp->budmin, oldval, true);
25821da177e4SLinus Torvalds 				}
25831da177e4SLinus Torvalds 
25841da177e4SLinus Torvalds 				/* release the buffer and return the error.
25851da177e4SLinus Torvalds 				 */
25861da177e4SLinus Torvalds 				release_metapage(mp);
25871da177e4SLinus Torvalds 				return (rc);
25881da177e4SLinus Torvalds 			}
25891da177e4SLinus Torvalds 		} else {
25901da177e4SLinus Torvalds 			/* we're at the top level of the map. update
25911da177e4SLinus Torvalds 			 * the bmap control page to reflect the size
25921da177e4SLinus Torvalds 			 * of the maximum free buddy system.
25931da177e4SLinus Torvalds 			 */
25941da177e4SLinus Torvalds 			assert(level == bmp->db_maxlevel);
25951da177e4SLinus Torvalds 			if (bmp->db_maxfreebud != oldroot) {
25961da177e4SLinus Torvalds 				jfs_error(bmp->db_ipbmap->i_sb,
2597eb8630d7SJoe Perches 					  "the maximum free buddy is not the old root\n");
25981da177e4SLinus Torvalds 			}
25991da177e4SLinus Torvalds 			bmp->db_maxfreebud = dcp->stree[ROOT];
26001da177e4SLinus Torvalds 		}
26011da177e4SLinus Torvalds 	}
26021da177e4SLinus Torvalds 
26031da177e4SLinus Torvalds 	/* write the buffer.
26041da177e4SLinus Torvalds 	 */
26051da177e4SLinus Torvalds 	write_metapage(mp);
26061da177e4SLinus Torvalds 
26071da177e4SLinus Torvalds 	return (0);
26081da177e4SLinus Torvalds }
26091da177e4SLinus Torvalds 
26101da177e4SLinus Torvalds 
26111da177e4SLinus Torvalds /*
26121da177e4SLinus Torvalds  * NAME:	dbSplit()
26131da177e4SLinus Torvalds  *
26141da177e4SLinus Torvalds  * FUNCTION:	update the leaf of a dmtree with a new value, splitting
26151da177e4SLinus Torvalds  *		the leaf from the binary buddy system of the dmtree's
26161da177e4SLinus Torvalds  *		leaves, as required.
26171da177e4SLinus Torvalds  *
26181da177e4SLinus Torvalds  * PARAMETERS:
26191da177e4SLinus Torvalds  *	tp	- pointer to the tree containing the leaf.
26201da177e4SLinus Torvalds  *	leafno	- the number of the leaf to be updated.
26211da177e4SLinus Torvalds  *	splitsz	- the size the binary buddy system starting at the leaf
26221da177e4SLinus Torvalds  *		  must be split to, specified as the log2 number of blocks.
26231da177e4SLinus Torvalds  *	newval	- the new value for the leaf.
26241da177e4SLinus Torvalds  *
26251da177e4SLinus Torvalds  * RETURN VALUES: none
26261da177e4SLinus Torvalds  *
26271da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
26281da177e4SLinus Torvalds  */
dbSplit(dmtree_t * tp,int leafno,int splitsz,int newval,bool is_ctl)26292e16a138SManas Ghandat static void dbSplit(dmtree_t *tp, int leafno, int splitsz, int newval, bool is_ctl)
26301da177e4SLinus Torvalds {
26311da177e4SLinus Torvalds 	int budsz;
26321da177e4SLinus Torvalds 	int cursz;
26331da177e4SLinus Torvalds 	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
26341da177e4SLinus Torvalds 
26351da177e4SLinus Torvalds 	/* check if the leaf needs to be split.
26361da177e4SLinus Torvalds 	 */
26371da177e4SLinus Torvalds 	if (leaf[leafno] > tp->dmt_budmin) {
26381da177e4SLinus Torvalds 		/* the split occurs by cutting the buddy system in half
26391da177e4SLinus Torvalds 		 * at the specified leaf until we reach the specified
26401da177e4SLinus Torvalds 		 * size.  pick up the starting split size (current size
26411da177e4SLinus Torvalds 		 * - 1 in l2) and the corresponding buddy size.
26421da177e4SLinus Torvalds 		 */
26431da177e4SLinus Torvalds 		cursz = leaf[leafno] - 1;
26441da177e4SLinus Torvalds 		budsz = BUDSIZE(cursz, tp->dmt_budmin);
26451da177e4SLinus Torvalds 
26461da177e4SLinus Torvalds 		/* split until we reach the specified size.
26471da177e4SLinus Torvalds 		 */
26481da177e4SLinus Torvalds 		while (cursz >= splitsz) {
26491da177e4SLinus Torvalds 			/* update the buddy's leaf with its new value.
26501da177e4SLinus Torvalds 			 */
26512e16a138SManas Ghandat 			dbAdjTree(tp, leafno ^ budsz, cursz, is_ctl);
26521da177e4SLinus Torvalds 
26531da177e4SLinus Torvalds 			/* on to the next size and buddy.
26541da177e4SLinus Torvalds 			 */
26551da177e4SLinus Torvalds 			cursz -= 1;
26561da177e4SLinus Torvalds 			budsz >>= 1;
26571da177e4SLinus Torvalds 		}
26581da177e4SLinus Torvalds 	}
26591da177e4SLinus Torvalds 
26601da177e4SLinus Torvalds 	/* adjust the dmap tree to reflect the specified leaf's new
26611da177e4SLinus Torvalds 	 * value.
26621da177e4SLinus Torvalds 	 */
26632e16a138SManas Ghandat 	dbAdjTree(tp, leafno, newval, is_ctl);
26641da177e4SLinus Torvalds }
26651da177e4SLinus Torvalds 
26661da177e4SLinus Torvalds 
26671da177e4SLinus Torvalds /*
26681da177e4SLinus Torvalds  * NAME:	dbBackSplit()
26691da177e4SLinus Torvalds  *
26701da177e4SLinus Torvalds  * FUNCTION:	back split the binary buddy system of dmtree leaves
26711da177e4SLinus Torvalds  *		that hold a specified leaf until the specified leaf
26721da177e4SLinus Torvalds  *		starts its own binary buddy system.
26731da177e4SLinus Torvalds  *
26741da177e4SLinus Torvalds  *		the allocators typically perform allocations at the start
26751da177e4SLinus Torvalds  *		of binary buddy systems and dbSplit() is used to accomplish
26761da177e4SLinus Torvalds  *		any required splits.  in some cases, however, allocation
26771da177e4SLinus Torvalds  *		may occur in the middle of a binary system and requires a
26781da177e4SLinus Torvalds  *		back split, with the split proceeding out from the middle of
26791da177e4SLinus Torvalds  *		the system (less efficient) rather than the start of the
26801da177e4SLinus Torvalds  *		system (more efficient).  the cases in which a back split
26811da177e4SLinus Torvalds  *		is required are rare and are limited to the first allocation
26821da177e4SLinus Torvalds  *		within an allocation group which is a part (not first part)
26831da177e4SLinus Torvalds  *		of a larger binary buddy system and a few exception cases
26841da177e4SLinus Torvalds  *		in which a previous join operation must be backed out.
26851da177e4SLinus Torvalds  *
26861da177e4SLinus Torvalds  * PARAMETERS:
26871da177e4SLinus Torvalds  *	tp	- pointer to the tree containing the leaf.
26881da177e4SLinus Torvalds  *	leafno	- the number of the leaf to be updated.
26891da177e4SLinus Torvalds  *
26901da177e4SLinus Torvalds  * RETURN VALUES: none
26911da177e4SLinus Torvalds  *
26921da177e4SLinus Torvalds  * serialization: IREAD_LOCK(ipbmap) or IWRITE_LOCK(ipbmap) held on entry/exit;
26931da177e4SLinus Torvalds  */
dbBackSplit(dmtree_t * tp,int leafno,bool is_ctl)26942e16a138SManas Ghandat static int dbBackSplit(dmtree_t *tp, int leafno, bool is_ctl)
26951da177e4SLinus Torvalds {
26961da177e4SLinus Torvalds 	int budsz, bud, w, bsz, size;
26971da177e4SLinus Torvalds 	int cursz;
26981da177e4SLinus Torvalds 	s8 *leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
26991da177e4SLinus Torvalds 
27001da177e4SLinus Torvalds 	/* leaf should be part (not first part) of a binary
27011da177e4SLinus Torvalds 	 * buddy system.
27021da177e4SLinus Torvalds 	 */
27031da177e4SLinus Torvalds 	assert(leaf[leafno] == NOFREE);
27041da177e4SLinus Torvalds 
27051da177e4SLinus Torvalds 	/* the back split is accomplished by iteratively finding the leaf
27061da177e4SLinus Torvalds 	 * that starts the buddy system that contains the specified leaf and
27071da177e4SLinus Torvalds 	 * splitting that system in two.  this iteration continues until
27081da177e4SLinus Torvalds 	 * the specified leaf becomes the start of a buddy system.
27091da177e4SLinus Torvalds 	 *
27101da177e4SLinus Torvalds 	 * determine maximum possible l2 size for the specified leaf.
27111da177e4SLinus Torvalds 	 */
27121da177e4SLinus Torvalds 	size =
27131da177e4SLinus Torvalds 	    LITOL2BSZ(leafno, le32_to_cpu(tp->dmt_l2nleafs),
27141da177e4SLinus Torvalds 		      tp->dmt_budmin);
27151da177e4SLinus Torvalds 
27161da177e4SLinus Torvalds 	/* determine the number of leaves covered by this size.  this
27171da177e4SLinus Torvalds 	 * is the buddy size that we will start with as we search for
27181da177e4SLinus Torvalds 	 * the buddy system that contains the specified leaf.
27191da177e4SLinus Torvalds 	 */
27201da177e4SLinus Torvalds 	budsz = BUDSIZE(size, tp->dmt_budmin);
27211da177e4SLinus Torvalds 
27221da177e4SLinus Torvalds 	/* back split.
27231da177e4SLinus Torvalds 	 */
27241da177e4SLinus Torvalds 	while (leaf[leafno] == NOFREE) {
27251da177e4SLinus Torvalds 		/* find the leftmost buddy leaf.
27261da177e4SLinus Torvalds 		 */
27271da177e4SLinus Torvalds 		for (w = leafno, bsz = budsz;; bsz <<= 1,
27281da177e4SLinus Torvalds 		     w = (w < bud) ? w : bud) {
2729b6a47fd8SDave Kleikamp 			if (bsz >= le32_to_cpu(tp->dmt_nleafs)) {
2730b6a47fd8SDave Kleikamp 				jfs_err("JFS: block map error in dbBackSplit");
2731b6a47fd8SDave Kleikamp 				return -EIO;
2732b6a47fd8SDave Kleikamp 			}
27331da177e4SLinus Torvalds 
27341da177e4SLinus Torvalds 			/* determine the buddy.
27351da177e4SLinus Torvalds 			 */
27361da177e4SLinus Torvalds 			bud = w ^ bsz;
27371da177e4SLinus Torvalds 
27381da177e4SLinus Torvalds 			/* check if this buddy is the start of the system.
27391da177e4SLinus Torvalds 			 */
27401da177e4SLinus Torvalds 			if (leaf[bud] != NOFREE) {
27411da177e4SLinus Torvalds 				/* split the leaf at the start of the
27421da177e4SLinus Torvalds 				 * system in two.
27431da177e4SLinus Torvalds 				 */
27441da177e4SLinus Torvalds 				cursz = leaf[bud] - 1;
27452e16a138SManas Ghandat 				dbSplit(tp, bud, cursz, cursz, is_ctl);
27461da177e4SLinus Torvalds 				break;
27471da177e4SLinus Torvalds 			}
27481da177e4SLinus Torvalds 		}
27491da177e4SLinus Torvalds 	}
27501da177e4SLinus Torvalds 
2751b6a47fd8SDave Kleikamp 	if (leaf[leafno] != size) {
2752b6a47fd8SDave Kleikamp 		jfs_err("JFS: wrong leaf value in dbBackSplit");
2753b6a47fd8SDave Kleikamp 		return -EIO;
2754b6a47fd8SDave Kleikamp 	}
2755b6a47fd8SDave Kleikamp 	return 0;
27561da177e4SLinus Torvalds }
27571da177e4SLinus Torvalds 
27581da177e4SLinus Torvalds 
27591da177e4SLinus Torvalds /*
27601da177e4SLinus Torvalds  * NAME:	dbJoin()
27611da177e4SLinus Torvalds  *
27621da177e4SLinus Torvalds  * FUNCTION:	update the leaf of a dmtree with a new value, joining
27631da177e4SLinus Torvalds  *		the leaf with other leaves of the dmtree into a multi-leaf
27641da177e4SLinus Torvalds  *		binary buddy system, as required.
27651da177e4SLinus Torvalds  *
27661da177e4SLinus Torvalds  * PARAMETERS:
27671da177e4SLinus Torvalds  *	tp	- pointer to the tree containing the leaf.
27681da177e4SLinus Torvalds  *	leafno	- the number of the leaf to be updated.
27691da177e4SLinus Torvalds  *	newval	- the new value for the leaf.
27701da177e4SLinus Torvalds  *
27716fb93eebSDave Kleikamp  * RETURN VALUES: none
27721da177e4SLinus Torvalds  */
dbJoin(dmtree_t * tp,int leafno,int newval,bool is_ctl)27732e16a138SManas Ghandat static int dbJoin(dmtree_t *tp, int leafno, int newval, bool is_ctl)
27741da177e4SLinus Torvalds {
27751da177e4SLinus Torvalds 	int budsz, buddy;
27761da177e4SLinus Torvalds 	s8 *leaf;
27771da177e4SLinus Torvalds 
27781da177e4SLinus Torvalds 	/* can the new leaf value require a join with other leaves ?
27791da177e4SLinus Torvalds 	 */
27801da177e4SLinus Torvalds 	if (newval >= tp->dmt_budmin) {
27811da177e4SLinus Torvalds 		/* pickup a pointer to the leaves of the tree.
27821da177e4SLinus Torvalds 		 */
27831da177e4SLinus Torvalds 		leaf = tp->dmt_stree + le32_to_cpu(tp->dmt_leafidx);
27841da177e4SLinus Torvalds 
27851da177e4SLinus Torvalds 		/* try to join the specified leaf into a large binary
27861da177e4SLinus Torvalds 		 * buddy system.  the join proceeds by attempting to join
27871da177e4SLinus Torvalds 		 * the specified leafno with its buddy (leaf) at new value.
27881da177e4SLinus Torvalds 		 * if the join occurs, we attempt to join the left leaf
27891da177e4SLinus Torvalds 		 * of the joined buddies with its buddy at new value + 1.
27901da177e4SLinus Torvalds 		 * we continue to join until we find a buddy that cannot be
27911da177e4SLinus Torvalds 		 * joined (does not have a value equal to the size of the
27921da177e4SLinus Torvalds 		 * last join) or until all leaves have been joined into a
27931da177e4SLinus Torvalds 		 * single system.
27941da177e4SLinus Torvalds 		 *
27951da177e4SLinus Torvalds 		 * get the buddy size (number of words covered) of
27961da177e4SLinus Torvalds 		 * the new value.
27971da177e4SLinus Torvalds 		 */
27981da177e4SLinus Torvalds 		budsz = BUDSIZE(newval, tp->dmt_budmin);
27991da177e4SLinus Torvalds 
28001da177e4SLinus Torvalds 		/* try to join.
28011da177e4SLinus Torvalds 		 */
28021da177e4SLinus Torvalds 		while (budsz < le32_to_cpu(tp->dmt_nleafs)) {
28031da177e4SLinus Torvalds 			/* get the buddy leaf.
28041da177e4SLinus Torvalds 			 */
28051da177e4SLinus Torvalds 			buddy = leafno ^ budsz;
28061da177e4SLinus Torvalds 
28071da177e4SLinus Torvalds 			/* if the leaf's new value is greater than its
28081da177e4SLinus Torvalds 			 * buddy's value, we join no more.
28091da177e4SLinus Torvalds 			 */
28101da177e4SLinus Torvalds 			if (newval > leaf[buddy])
28111da177e4SLinus Torvalds 				break;
28121da177e4SLinus Torvalds 
281356d12549SDave Kleikamp 			/* It shouldn't be less */
281456d12549SDave Kleikamp 			if (newval < leaf[buddy])
281556d12549SDave Kleikamp 				return -EIO;
28161da177e4SLinus Torvalds 
28171da177e4SLinus Torvalds 			/* check which (leafno or buddy) is the left buddy.
28181da177e4SLinus Torvalds 			 * the left buddy gets to claim the blocks resulting
28191da177e4SLinus Torvalds 			 * from the join while the right gets to claim none.
282025985edcSLucas De Marchi 			 * the left buddy is also eligible to participate in
28211da177e4SLinus Torvalds 			 * a join at the next higher level while the right
28221da177e4SLinus Torvalds 			 * is not.
28231da177e4SLinus Torvalds 			 *
28241da177e4SLinus Torvalds 			 */
28251da177e4SLinus Torvalds 			if (leafno < buddy) {
28261da177e4SLinus Torvalds 				/* leafno is the left buddy.
28271da177e4SLinus Torvalds 				 */
28282e16a138SManas Ghandat 				dbAdjTree(tp, buddy, NOFREE, is_ctl);
28291da177e4SLinus Torvalds 			} else {
28301da177e4SLinus Torvalds 				/* buddy is the left buddy and becomes
28311da177e4SLinus Torvalds 				 * leafno.
28321da177e4SLinus Torvalds 				 */
28332e16a138SManas Ghandat 				dbAdjTree(tp, leafno, NOFREE, is_ctl);
28341da177e4SLinus Torvalds 				leafno = buddy;
28351da177e4SLinus Torvalds 			}
28361da177e4SLinus Torvalds 
28371da177e4SLinus Torvalds 			/* on to try the next join.
28381da177e4SLinus Torvalds 			 */
28391da177e4SLinus Torvalds 			newval += 1;
28401da177e4SLinus Torvalds 			budsz <<= 1;
28411da177e4SLinus Torvalds 		}
28421da177e4SLinus Torvalds 	}
28431da177e4SLinus Torvalds 
28441da177e4SLinus Torvalds 	/* update the leaf value.
28451da177e4SLinus Torvalds 	 */
28462e16a138SManas Ghandat 	dbAdjTree(tp, leafno, newval, is_ctl);
284756d12549SDave Kleikamp 
284856d12549SDave Kleikamp 	return 0;
28491da177e4SLinus Torvalds }
28501da177e4SLinus Torvalds 
28511da177e4SLinus Torvalds 
28521da177e4SLinus Torvalds /*
28531da177e4SLinus Torvalds  * NAME:	dbAdjTree()
28541da177e4SLinus Torvalds  *
28551da177e4SLinus Torvalds  * FUNCTION:	update a leaf of a dmtree with a new value, adjusting
28561da177e4SLinus Torvalds  *		the dmtree, as required, to reflect the new leaf value.
28571da177e4SLinus Torvalds  *		the combination of any buddies must already be done before
28581da177e4SLinus Torvalds  *		this is called.
28591da177e4SLinus Torvalds  *
28601da177e4SLinus Torvalds  * PARAMETERS:
28611da177e4SLinus Torvalds  *	tp	- pointer to the tree to be adjusted.
28621da177e4SLinus Torvalds  *	leafno	- the number of the leaf to be updated.
28631da177e4SLinus Torvalds  *	newval	- the new value for the leaf.
28641da177e4SLinus Torvalds  *
28651da177e4SLinus Torvalds  * RETURN VALUES: none
28661da177e4SLinus Torvalds  */
dbAdjTree(dmtree_t * tp,int leafno,int newval,bool is_ctl)28672e16a138SManas Ghandat static void dbAdjTree(dmtree_t *tp, int leafno, int newval, bool is_ctl)
28681da177e4SLinus Torvalds {
28691da177e4SLinus Torvalds 	int lp, pp, k;
28702e16a138SManas Ghandat 	int max, size;
28712e16a138SManas Ghandat 
28722e16a138SManas Ghandat 	size = is_ctl ? CTLTREESIZE : TREESIZE;
28731da177e4SLinus Torvalds 
28741da177e4SLinus Torvalds 	/* pick up the index of the leaf for this leafno.
28751da177e4SLinus Torvalds 	 */
28761da177e4SLinus Torvalds 	lp = leafno + le32_to_cpu(tp->dmt_leafidx);
28771da177e4SLinus Torvalds 
28782e16a138SManas Ghandat 	if (WARN_ON_ONCE(lp >= size || lp < 0))
28792e16a138SManas Ghandat 		return;
28802e16a138SManas Ghandat 
28811da177e4SLinus Torvalds 	/* is the current value the same as the old value ?  if so,
28821da177e4SLinus Torvalds 	 * there is nothing to do.
28831da177e4SLinus Torvalds 	 */
28841da177e4SLinus Torvalds 	if (tp->dmt_stree[lp] == newval)
28851da177e4SLinus Torvalds 		return;
28861da177e4SLinus Torvalds 
28871da177e4SLinus Torvalds 	/* set the new value.
28881da177e4SLinus Torvalds 	 */
28891da177e4SLinus Torvalds 	tp->dmt_stree[lp] = newval;
28901da177e4SLinus Torvalds 
28911da177e4SLinus Torvalds 	/* bubble the new value up the tree as required.
28921da177e4SLinus Torvalds 	 */
28931da177e4SLinus Torvalds 	for (k = 0; k < le32_to_cpu(tp->dmt_height); k++) {
2894*3b5d21b5SNihar Chaithanya 		if (lp == 0)
2895*3b5d21b5SNihar Chaithanya 			break;
2896*3b5d21b5SNihar Chaithanya 
28971da177e4SLinus Torvalds 		/* get the index of the first leaf of the 4 leaf
28981da177e4SLinus Torvalds 		 * group containing the specified leaf (leafno).
28991da177e4SLinus Torvalds 		 */
29001da177e4SLinus Torvalds 		lp = ((lp - 1) & ~0x03) + 1;
29011da177e4SLinus Torvalds 
29021da177e4SLinus Torvalds 		/* get the index of the parent of this 4 leaf group.
29031da177e4SLinus Torvalds 		 */
29041da177e4SLinus Torvalds 		pp = (lp - 1) >> 2;
29051da177e4SLinus Torvalds 
29061da177e4SLinus Torvalds 		/* determine the maximum of the 4 leaves.
29071da177e4SLinus Torvalds 		 */
29081da177e4SLinus Torvalds 		max = TREEMAX(&tp->dmt_stree[lp]);
29091da177e4SLinus Torvalds 
29101da177e4SLinus Torvalds 		/* if the maximum of the 4 is the same as the
29111da177e4SLinus Torvalds 		 * parent's value, we're done.
29121da177e4SLinus Torvalds 		 */
29131da177e4SLinus Torvalds 		if (tp->dmt_stree[pp] == max)
29141da177e4SLinus Torvalds 			break;
29151da177e4SLinus Torvalds 
29161da177e4SLinus Torvalds 		/* parent gets new value.
29171da177e4SLinus Torvalds 		 */
29181da177e4SLinus Torvalds 		tp->dmt_stree[pp] = max;
29191da177e4SLinus Torvalds 
29201da177e4SLinus Torvalds 		/* parent becomes leaf for next go-round.
29211da177e4SLinus Torvalds 		 */
29221da177e4SLinus Torvalds 		lp = pp;
29231da177e4SLinus Torvalds 	}
29241da177e4SLinus Torvalds }
29251da177e4SLinus Torvalds 
29261da177e4SLinus Torvalds 
29271da177e4SLinus Torvalds /*
29281da177e4SLinus Torvalds  * NAME:	dbFindLeaf()
29291da177e4SLinus Torvalds  *
29301da177e4SLinus Torvalds  * FUNCTION:	search a dmtree_t for sufficient free blocks, returning
29311da177e4SLinus Torvalds  *		the index of a leaf describing the free blocks if
29321da177e4SLinus Torvalds  *		sufficient free blocks are found.
29331da177e4SLinus Torvalds  *
29341da177e4SLinus Torvalds  *		the search starts at the top of the dmtree_t tree and
29351da177e4SLinus Torvalds  *		proceeds down the tree to the leftmost leaf with sufficient
29361da177e4SLinus Torvalds  *		free space.
29371da177e4SLinus Torvalds  *
29381da177e4SLinus Torvalds  * PARAMETERS:
29391da177e4SLinus Torvalds  *	tp	- pointer to the tree to be searched.
29401da177e4SLinus Torvalds  *	l2nb	- log2 number of free blocks to search for.
29411da177e4SLinus Torvalds  *	leafidx	- return pointer to be set to the index of the leaf
29421da177e4SLinus Torvalds  *		  describing at least l2nb free blocks if sufficient
29431da177e4SLinus Torvalds  *		  free blocks are found.
294487c681abSManas Ghandat  *	is_ctl	- determines if the tree is of type ctl
29451da177e4SLinus Torvalds  *
29461da177e4SLinus Torvalds  * RETURN VALUES:
29471da177e4SLinus Torvalds  *	0	- success
29481da177e4SLinus Torvalds  *	-ENOSPC	- insufficient free blocks.
29491da177e4SLinus Torvalds  */
dbFindLeaf(dmtree_t * tp,int l2nb,int * leafidx,bool is_ctl)295087c681abSManas Ghandat static int dbFindLeaf(dmtree_t *tp, int l2nb, int *leafidx, bool is_ctl)
29511da177e4SLinus Torvalds {
29521da177e4SLinus Torvalds 	int ti, n = 0, k, x = 0;
29537fff9a9fSEdward Adam Davis 	int max_size, max_idx;
295487c681abSManas Ghandat 
295587c681abSManas Ghandat 	max_size = is_ctl ? CTLTREESIZE : TREESIZE;
29567fff9a9fSEdward Adam Davis 	max_idx = is_ctl ? LPERCTL : LPERDMAP;
29571da177e4SLinus Torvalds 
29581da177e4SLinus Torvalds 	/* first check the root of the tree to see if there is
29591da177e4SLinus Torvalds 	 * sufficient free space.
29601da177e4SLinus Torvalds 	 */
29611da177e4SLinus Torvalds 	if (l2nb > tp->dmt_stree[ROOT])
29621da177e4SLinus Torvalds 		return -ENOSPC;
29631da177e4SLinus Torvalds 
29641da177e4SLinus Torvalds 	/* sufficient free space available. now search down the tree
29651da177e4SLinus Torvalds 	 * starting at the next level for the leftmost leaf that
29661da177e4SLinus Torvalds 	 * describes sufficient free space.
29671da177e4SLinus Torvalds 	 */
29681da177e4SLinus Torvalds 	for (k = le32_to_cpu(tp->dmt_height), ti = 1;
29691da177e4SLinus Torvalds 	     k > 0; k--, ti = ((ti + n) << 2) + 1) {
29701da177e4SLinus Torvalds 		/* search the four nodes at this level, starting from
29711da177e4SLinus Torvalds 		 * the left.
29721da177e4SLinus Torvalds 		 */
29731da177e4SLinus Torvalds 		for (x = ti, n = 0; n < 4; n++) {
29741da177e4SLinus Torvalds 			/* sufficient free space found.  move to the next
29751da177e4SLinus Torvalds 			 * level (or quit if this is the last level).
29761da177e4SLinus Torvalds 			 */
297787c681abSManas Ghandat 			if (x + n > max_size)
297887c681abSManas Ghandat 				return -ENOSPC;
29791da177e4SLinus Torvalds 			if (l2nb <= tp->dmt_stree[x + n])
29801da177e4SLinus Torvalds 				break;
29811da177e4SLinus Torvalds 		}
29821da177e4SLinus Torvalds 
29831da177e4SLinus Torvalds 		/* better have found something since the higher
29841da177e4SLinus Torvalds 		 * levels of the tree said it was here.
29851da177e4SLinus Torvalds 		 */
29861da177e4SLinus Torvalds 		assert(n < 4);
29871da177e4SLinus Torvalds 	}
29887fff9a9fSEdward Adam Davis 	if (le32_to_cpu(tp->dmt_leafidx) >= max_idx)
29897fff9a9fSEdward Adam Davis 		return -ENOSPC;
29901da177e4SLinus Torvalds 
29911da177e4SLinus Torvalds 	/* set the return to the leftmost leaf describing sufficient
29921da177e4SLinus Torvalds 	 * free space.
29931da177e4SLinus Torvalds 	 */
29941da177e4SLinus Torvalds 	*leafidx = x + n - le32_to_cpu(tp->dmt_leafidx);
29951da177e4SLinus Torvalds 
29961da177e4SLinus Torvalds 	return (0);
29971da177e4SLinus Torvalds }
29981da177e4SLinus Torvalds 
29991da177e4SLinus Torvalds 
30001da177e4SLinus Torvalds /*
30011da177e4SLinus Torvalds  * NAME:	dbFindBits()
30021da177e4SLinus Torvalds  *
30031da177e4SLinus Torvalds  * FUNCTION:	find a specified number of binary buddy free bits within a
30041da177e4SLinus Torvalds  *		dmap bitmap word value.
30051da177e4SLinus Torvalds  *
30061da177e4SLinus Torvalds  *		this routine searches the bitmap value for (1 << l2nb) free
30071da177e4SLinus Torvalds  *		bits at (1 << l2nb) alignments within the value.
30081da177e4SLinus Torvalds  *
30091da177e4SLinus Torvalds  * PARAMETERS:
30101da177e4SLinus Torvalds  *	word	-  dmap bitmap word value.
30111da177e4SLinus Torvalds  *	l2nb	-  number of free bits specified as a log2 number.
30121da177e4SLinus Torvalds  *
30131da177e4SLinus Torvalds  * RETURN VALUES:
30141da177e4SLinus Torvalds  *	starting bit number of free bits.
30151da177e4SLinus Torvalds  */
dbFindBits(u32 word,int l2nb)30161da177e4SLinus Torvalds static int dbFindBits(u32 word, int l2nb)
30171da177e4SLinus Torvalds {
30181da177e4SLinus Torvalds 	int bitno, nb;
30191da177e4SLinus Torvalds 	u32 mask;
30201da177e4SLinus Torvalds 
30211da177e4SLinus Torvalds 	/* get the number of bits.
30221da177e4SLinus Torvalds 	 */
30231da177e4SLinus Torvalds 	nb = 1 << l2nb;
30241da177e4SLinus Torvalds 	assert(nb <= DBWORD);
30251da177e4SLinus Torvalds 
30261da177e4SLinus Torvalds 	/* complement the word so we can use a mask (i.e. 0s represent
30271da177e4SLinus Torvalds 	 * free bits) and compute the mask.
30281da177e4SLinus Torvalds 	 */
30291da177e4SLinus Torvalds 	word = ~word;
30301da177e4SLinus Torvalds 	mask = ONES << (DBWORD - nb);
30311da177e4SLinus Torvalds 
30321da177e4SLinus Torvalds 	/* scan the word for nb free bits at nb alignments.
30331da177e4SLinus Torvalds 	 */
3034f04925a0SRemington Brasga 	for (bitno = 0; mask != 0; bitno += nb, mask = (mask >> nb)) {
30351da177e4SLinus Torvalds 		if ((mask & word) == mask)
30361da177e4SLinus Torvalds 			break;
30371da177e4SLinus Torvalds 	}
30381da177e4SLinus Torvalds 
30391da177e4SLinus Torvalds 	ASSERT(bitno < 32);
30401da177e4SLinus Torvalds 
30411da177e4SLinus Torvalds 	/* return the bit number.
30421da177e4SLinus Torvalds 	 */
30431da177e4SLinus Torvalds 	return (bitno);
30441da177e4SLinus Torvalds }
30451da177e4SLinus Torvalds 
30461da177e4SLinus Torvalds 
30471da177e4SLinus Torvalds /*
30481da177e4SLinus Torvalds  * NAME:	dbMaxBud(u8 *cp)
30491da177e4SLinus Torvalds  *
30501da177e4SLinus Torvalds  * FUNCTION:	determine the largest binary buddy string of free
30511da177e4SLinus Torvalds  *		bits within 32-bits of the map.
30521da177e4SLinus Torvalds  *
30531da177e4SLinus Torvalds  * PARAMETERS:
30541da177e4SLinus Torvalds  *	cp	-  pointer to the 32-bit value.
30551da177e4SLinus Torvalds  *
30561da177e4SLinus Torvalds  * RETURN VALUES:
30571da177e4SLinus Torvalds  *	largest binary buddy of free bits within a dmap word.
30581da177e4SLinus Torvalds  */
dbMaxBud(u8 * cp)30591da177e4SLinus Torvalds static int dbMaxBud(u8 * cp)
30601da177e4SLinus Torvalds {
30611da177e4SLinus Torvalds 	signed char tmp1, tmp2;
30621da177e4SLinus Torvalds 
30631da177e4SLinus Torvalds 	/* check if the wmap word is all free. if so, the
30641da177e4SLinus Torvalds 	 * free buddy size is BUDMIN.
30651da177e4SLinus Torvalds 	 */
30661da177e4SLinus Torvalds 	if (*((uint *) cp) == 0)
30671da177e4SLinus Torvalds 		return (BUDMIN);
30681da177e4SLinus Torvalds 
30691da177e4SLinus Torvalds 	/* check if the wmap word is half free. if so, the
30701da177e4SLinus Torvalds 	 * free buddy size is BUDMIN-1.
30711da177e4SLinus Torvalds 	 */
30721da177e4SLinus Torvalds 	if (*((u16 *) cp) == 0 || *((u16 *) cp + 1) == 0)
30731da177e4SLinus Torvalds 		return (BUDMIN - 1);
30741da177e4SLinus Torvalds 
30751da177e4SLinus Torvalds 	/* not all free or half free. determine the free buddy
30761da177e4SLinus Torvalds 	 * size thru table lookup using quarters of the wmap word.
30771da177e4SLinus Torvalds 	 */
30781da177e4SLinus Torvalds 	tmp1 = max(budtab[cp[2]], budtab[cp[3]]);
30791da177e4SLinus Torvalds 	tmp2 = max(budtab[cp[0]], budtab[cp[1]]);
30801da177e4SLinus Torvalds 	return (max(tmp1, tmp2));
30811da177e4SLinus Torvalds }
30821da177e4SLinus Torvalds 
30831da177e4SLinus Torvalds 
30841da177e4SLinus Torvalds /*
30851da177e4SLinus Torvalds  * NAME:	cnttz(uint word)
30861da177e4SLinus Torvalds  *
30871da177e4SLinus Torvalds  * FUNCTION:	determine the number of trailing zeros within a 32-bit
30881da177e4SLinus Torvalds  *		value.
30891da177e4SLinus Torvalds  *
30901da177e4SLinus Torvalds  * PARAMETERS:
30911da177e4SLinus Torvalds  *	value	-  32-bit value to be examined.
30921da177e4SLinus Torvalds  *
30931da177e4SLinus Torvalds  * RETURN VALUES:
30941da177e4SLinus Torvalds  *	count of trailing zeros
30951da177e4SLinus Torvalds  */
cnttz(u32 word)30961da177e4SLinus Torvalds static int cnttz(u32 word)
30971da177e4SLinus Torvalds {
30981da177e4SLinus Torvalds 	int n;
30991da177e4SLinus Torvalds 
31001da177e4SLinus Torvalds 	for (n = 0; n < 32; n++, word >>= 1) {
31011da177e4SLinus Torvalds 		if (word & 0x01)
31021da177e4SLinus Torvalds 			break;
31031da177e4SLinus Torvalds 	}
31041da177e4SLinus Torvalds 
31051da177e4SLinus Torvalds 	return (n);
31061da177e4SLinus Torvalds }
31071da177e4SLinus Torvalds 
31081da177e4SLinus Torvalds 
31091da177e4SLinus Torvalds /*
31101da177e4SLinus Torvalds  * NAME:	cntlz(u32 value)
31111da177e4SLinus Torvalds  *
31121da177e4SLinus Torvalds  * FUNCTION:	determine the number of leading zeros within a 32-bit
31131da177e4SLinus Torvalds  *		value.
31141da177e4SLinus Torvalds  *
31151da177e4SLinus Torvalds  * PARAMETERS:
31161da177e4SLinus Torvalds  *	value	-  32-bit value to be examined.
31171da177e4SLinus Torvalds  *
31181da177e4SLinus Torvalds  * RETURN VALUES:
31191da177e4SLinus Torvalds  *	count of leading zeros
31201da177e4SLinus Torvalds  */
cntlz(u32 value)31211da177e4SLinus Torvalds static int cntlz(u32 value)
31221da177e4SLinus Torvalds {
31231da177e4SLinus Torvalds 	int n;
31241da177e4SLinus Torvalds 
31251da177e4SLinus Torvalds 	for (n = 0; n < 32; n++, value <<= 1) {
31261da177e4SLinus Torvalds 		if (value & HIGHORDER)
31271da177e4SLinus Torvalds 			break;
31281da177e4SLinus Torvalds 	}
31291da177e4SLinus Torvalds 	return (n);
31301da177e4SLinus Torvalds }
31311da177e4SLinus Torvalds 
31321da177e4SLinus Torvalds 
31331da177e4SLinus Torvalds /*
31341da177e4SLinus Torvalds  * NAME:	blkstol2(s64 nb)
31351da177e4SLinus Torvalds  *
31361da177e4SLinus Torvalds  * FUNCTION:	convert a block count to its log2 value. if the block
31371da177e4SLinus Torvalds  *		count is not a l2 multiple, it is rounded up to the next
31381da177e4SLinus Torvalds  *		larger l2 multiple.
31391da177e4SLinus Torvalds  *
31401da177e4SLinus Torvalds  * PARAMETERS:
31411da177e4SLinus Torvalds  *	nb	-  number of blocks
31421da177e4SLinus Torvalds  *
31431da177e4SLinus Torvalds  * RETURN VALUES:
31441da177e4SLinus Torvalds  *	log2 number of blocks
31451da177e4SLinus Torvalds  */
blkstol2(s64 nb)31466cb1269bSDave Kleikamp static int blkstol2(s64 nb)
31471da177e4SLinus Torvalds {
31481da177e4SLinus Torvalds 	int l2nb;
31491da177e4SLinus Torvalds 	s64 mask;		/* meant to be signed */
31501da177e4SLinus Torvalds 
31511da177e4SLinus Torvalds 	mask = (s64) 1 << (64 - 1);
31521da177e4SLinus Torvalds 
31531da177e4SLinus Torvalds 	/* count the leading bits.
31541da177e4SLinus Torvalds 	 */
31551da177e4SLinus Torvalds 	for (l2nb = 0; l2nb < 64; l2nb++, mask >>= 1) {
31561da177e4SLinus Torvalds 		/* leading bit found.
31571da177e4SLinus Torvalds 		 */
31581da177e4SLinus Torvalds 		if (nb & mask) {
31591da177e4SLinus Torvalds 			/* determine the l2 value.
31601da177e4SLinus Torvalds 			 */
31611da177e4SLinus Torvalds 			l2nb = (64 - 1) - l2nb;
31621da177e4SLinus Torvalds 
31631da177e4SLinus Torvalds 			/* check if we need to round up.
31641da177e4SLinus Torvalds 			 */
31651da177e4SLinus Torvalds 			if (~mask & nb)
31661da177e4SLinus Torvalds 				l2nb++;
31671da177e4SLinus Torvalds 
31681da177e4SLinus Torvalds 			return (l2nb);
31691da177e4SLinus Torvalds 		}
31701da177e4SLinus Torvalds 	}
31711da177e4SLinus Torvalds 	assert(0);
31721da177e4SLinus Torvalds 	return 0;		/* fix compiler warning */
31731da177e4SLinus Torvalds }
31741da177e4SLinus Torvalds 
31751da177e4SLinus Torvalds 
31761da177e4SLinus Torvalds /*
31771da177e4SLinus Torvalds  * NAME:	dbAllocBottomUp()
31781da177e4SLinus Torvalds  *
31791da177e4SLinus Torvalds  * FUNCTION:	alloc the specified block range from the working block
31801da177e4SLinus Torvalds  *		allocation map.
31811da177e4SLinus Torvalds  *
31821da177e4SLinus Torvalds  *		the blocks will be alloc from the working map one dmap
31831da177e4SLinus Torvalds  *		at a time.
31841da177e4SLinus Torvalds  *
31851da177e4SLinus Torvalds  * PARAMETERS:
31861da177e4SLinus Torvalds  *	ip	-  pointer to in-core inode;
31871da177e4SLinus Torvalds  *	blkno	-  starting block number to be freed.
31881da177e4SLinus Torvalds  *	nblocks	-  number of blocks to be freed.
31891da177e4SLinus Torvalds  *
31901da177e4SLinus Torvalds  * RETURN VALUES:
31911da177e4SLinus Torvalds  *	0	- success
31921da177e4SLinus Torvalds  *	-EIO	- i/o error
31931da177e4SLinus Torvalds  */
dbAllocBottomUp(struct inode * ip,s64 blkno,s64 nblocks)31941da177e4SLinus Torvalds int dbAllocBottomUp(struct inode *ip, s64 blkno, s64 nblocks)
31951da177e4SLinus Torvalds {
31961da177e4SLinus Torvalds 	struct metapage *mp;
31971da177e4SLinus Torvalds 	struct dmap *dp;
31981da177e4SLinus Torvalds 	int nb, rc;
31991da177e4SLinus Torvalds 	s64 lblkno, rem;
32001da177e4SLinus Torvalds 	struct inode *ipbmap = JFS_SBI(ip->i_sb)->ipbmap;
32011da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ip->i_sb)->bmap;
32021da177e4SLinus Torvalds 
320382d5b9a7SDave Kleikamp 	IREAD_LOCK(ipbmap, RDWRLOCK_DMAP);
32041da177e4SLinus Torvalds 
32051da177e4SLinus Torvalds 	/* block to be allocated better be within the mapsize. */
32061da177e4SLinus Torvalds 	ASSERT(nblocks <= bmp->db_mapsize - blkno);
32071da177e4SLinus Torvalds 
32081da177e4SLinus Torvalds 	/*
32091da177e4SLinus Torvalds 	 * allocate the blocks a dmap at a time.
32101da177e4SLinus Torvalds 	 */
32111da177e4SLinus Torvalds 	mp = NULL;
32121da177e4SLinus Torvalds 	for (rem = nblocks; rem > 0; rem -= nb, blkno += nb) {
32131da177e4SLinus Torvalds 		/* release previous dmap if any */
32141da177e4SLinus Torvalds 		if (mp) {
32151da177e4SLinus Torvalds 			write_metapage(mp);
32161da177e4SLinus Torvalds 		}
32171da177e4SLinus Torvalds 
32181da177e4SLinus Torvalds 		/* get the buffer for the current dmap. */
32191da177e4SLinus Torvalds 		lblkno = BLKTODMAP(blkno, bmp->db_l2nbperpage);
32201da177e4SLinus Torvalds 		mp = read_metapage(ipbmap, lblkno, PSIZE, 0);
32211da177e4SLinus Torvalds 		if (mp == NULL) {
32221da177e4SLinus Torvalds 			IREAD_UNLOCK(ipbmap);
32231da177e4SLinus Torvalds 			return -EIO;
32241da177e4SLinus Torvalds 		}
32251da177e4SLinus Torvalds 		dp = (struct dmap *) mp->data;
32261da177e4SLinus Torvalds 
32271da177e4SLinus Torvalds 		/* determine the number of blocks to be allocated from
32281da177e4SLinus Torvalds 		 * this dmap.
32291da177e4SLinus Torvalds 		 */
32301da177e4SLinus Torvalds 		nb = min(rem, BPERDMAP - (blkno & (BPERDMAP - 1)));
32311da177e4SLinus Torvalds 
32321da177e4SLinus Torvalds 		/* allocate the blocks. */
32331da177e4SLinus Torvalds 		if ((rc = dbAllocDmapBU(bmp, dp, blkno, nb))) {
32341da177e4SLinus Torvalds 			release_metapage(mp);
32351da177e4SLinus Torvalds 			IREAD_UNLOCK(ipbmap);
32361da177e4SLinus Torvalds 			return (rc);
32371da177e4SLinus Torvalds 		}
32381da177e4SLinus Torvalds 	}
32391da177e4SLinus Torvalds 
32401da177e4SLinus Torvalds 	/* write the last buffer. */
32411da177e4SLinus Torvalds 	write_metapage(mp);
32421da177e4SLinus Torvalds 
32431da177e4SLinus Torvalds 	IREAD_UNLOCK(ipbmap);
32441da177e4SLinus Torvalds 
32451da177e4SLinus Torvalds 	return (0);
32461da177e4SLinus Torvalds }
32471da177e4SLinus Torvalds 
32481da177e4SLinus Torvalds 
dbAllocDmapBU(struct bmap * bmp,struct dmap * dp,s64 blkno,int nblocks)32491da177e4SLinus Torvalds static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno,
32501da177e4SLinus Torvalds 			 int nblocks)
32511da177e4SLinus Torvalds {
32521da177e4SLinus Torvalds 	int rc;
32531da177e4SLinus Torvalds 	int dbitno, word, rembits, nb, nwords, wbitno, agno;
32543c2c2262SDave Kleikamp 	s8 oldroot;
32551da177e4SLinus Torvalds 	struct dmaptree *tp = (struct dmaptree *) & dp->tree;
32561da177e4SLinus Torvalds 
32571da177e4SLinus Torvalds 	/* save the current value of the root (i.e. maximum free string)
32581da177e4SLinus Torvalds 	 * of the dmap tree.
32591da177e4SLinus Torvalds 	 */
32601da177e4SLinus Torvalds 	oldroot = tp->stree[ROOT];
32611da177e4SLinus Torvalds 
32621da177e4SLinus Torvalds 	/* determine the bit number and word within the dmap of the
32631da177e4SLinus Torvalds 	 * starting block.
32641da177e4SLinus Torvalds 	 */
32651da177e4SLinus Torvalds 	dbitno = blkno & (BPERDMAP - 1);
32661da177e4SLinus Torvalds 	word = dbitno >> L2DBWORD;
32671da177e4SLinus Torvalds 
32681da177e4SLinus Torvalds 	/* block range better be within the dmap */
32691da177e4SLinus Torvalds 	assert(dbitno + nblocks <= BPERDMAP);
32701da177e4SLinus Torvalds 
32711da177e4SLinus Torvalds 	/* allocate the bits of the dmap's words corresponding to the block
32721da177e4SLinus Torvalds 	 * range. not all bits of the first and last words may be contained
32731da177e4SLinus Torvalds 	 * within the block range.  if this is the case, we'll work against
32741da177e4SLinus Torvalds 	 * those words (i.e. partial first and/or last) on an individual basis
32751da177e4SLinus Torvalds 	 * (a single pass), allocating the bits of interest by hand and
32761da177e4SLinus Torvalds 	 * updating the leaf corresponding to the dmap word. a single pass
32771da177e4SLinus Torvalds 	 * will be used for all dmap words fully contained within the
32781da177e4SLinus Torvalds 	 * specified range.  within this pass, the bits of all fully contained
32791da177e4SLinus Torvalds 	 * dmap words will be marked as free in a single shot and the leaves
32801da177e4SLinus Torvalds 	 * will be updated. a single leaf may describe the free space of
32811da177e4SLinus Torvalds 	 * multiple dmap words, so we may update only a subset of the actual
32821da177e4SLinus Torvalds 	 * leaves corresponding to the dmap words of the block range.
32831da177e4SLinus Torvalds 	 */
32841da177e4SLinus Torvalds 	for (rembits = nblocks; rembits > 0; rembits -= nb, dbitno += nb) {
32851da177e4SLinus Torvalds 		/* determine the bit number within the word and
32861da177e4SLinus Torvalds 		 * the number of bits within the word.
32871da177e4SLinus Torvalds 		 */
32881da177e4SLinus Torvalds 		wbitno = dbitno & (DBWORD - 1);
32891da177e4SLinus Torvalds 		nb = min(rembits, DBWORD - wbitno);
32901da177e4SLinus Torvalds 
32911da177e4SLinus Torvalds 		/* check if only part of a word is to be allocated.
32921da177e4SLinus Torvalds 		 */
32931da177e4SLinus Torvalds 		if (nb < DBWORD) {
32941da177e4SLinus Torvalds 			/* allocate (set to 1) the appropriate bits within
32951da177e4SLinus Torvalds 			 * this dmap word.
32961da177e4SLinus Torvalds 			 */
32971da177e4SLinus Torvalds 			dp->wmap[word] |= cpu_to_le32(ONES << (DBWORD - nb)
32981da177e4SLinus Torvalds 						      >> wbitno);
32991da177e4SLinus Torvalds 
33001da177e4SLinus Torvalds 			word++;
33011da177e4SLinus Torvalds 		} else {
33021da177e4SLinus Torvalds 			/* one or more dmap words are fully contained
33031da177e4SLinus Torvalds 			 * within the block range.  determine how many
33041da177e4SLinus Torvalds 			 * words and allocate (set to 1) the bits of these
33051da177e4SLinus Torvalds 			 * words.
33061da177e4SLinus Torvalds 			 */
33071da177e4SLinus Torvalds 			nwords = rembits >> L2DBWORD;
33081da177e4SLinus Torvalds 			memset(&dp->wmap[word], (int) ONES, nwords * 4);
33091da177e4SLinus Torvalds 
33101da177e4SLinus Torvalds 			/* determine how many bits */
33111da177e4SLinus Torvalds 			nb = nwords << L2DBWORD;
33121da177e4SLinus Torvalds 			word += nwords;
33131da177e4SLinus Torvalds 		}
33141da177e4SLinus Torvalds 	}
33151da177e4SLinus Torvalds 
33161da177e4SLinus Torvalds 	/* update the free count for this dmap */
331789145622SMarcin Slusarz 	le32_add_cpu(&dp->nfree, -nblocks);
33181da177e4SLinus Torvalds 
33191da177e4SLinus Torvalds 	/* reconstruct summary tree */
33201da177e4SLinus Torvalds 	dbInitDmapTree(dp);
33211da177e4SLinus Torvalds 
33221da177e4SLinus Torvalds 	BMAP_LOCK(bmp);
33231da177e4SLinus Torvalds 
33241da177e4SLinus Torvalds 	/* if this allocation group is completely free,
33251da177e4SLinus Torvalds 	 * update the highest active allocation group number
33261da177e4SLinus Torvalds 	 * if this allocation group is the new max.
33271da177e4SLinus Torvalds 	 */
33281da177e4SLinus Torvalds 	agno = blkno >> bmp->db_agl2size;
33291da177e4SLinus Torvalds 	if (agno > bmp->db_maxag)
33301da177e4SLinus Torvalds 		bmp->db_maxag = agno;
33311da177e4SLinus Torvalds 
33321da177e4SLinus Torvalds 	/* update the free count for the allocation group and map */
33331da177e4SLinus Torvalds 	bmp->db_agfree[agno] -= nblocks;
33341da177e4SLinus Torvalds 	bmp->db_nfree -= nblocks;
33351da177e4SLinus Torvalds 
33361da177e4SLinus Torvalds 	BMAP_UNLOCK(bmp);
33371da177e4SLinus Torvalds 
33381da177e4SLinus Torvalds 	/* if the root has not changed, done. */
33391da177e4SLinus Torvalds 	if (tp->stree[ROOT] == oldroot)
33401da177e4SLinus Torvalds 		return (0);
33411da177e4SLinus Torvalds 
33421da177e4SLinus Torvalds 	/* root changed. bubble the change up to the dmap control pages.
33431da177e4SLinus Torvalds 	 * if the adjustment of the upper level control pages fails,
33441da177e4SLinus Torvalds 	 * backout the bit allocation (thus making everything consistent).
33451da177e4SLinus Torvalds 	 */
33461da177e4SLinus Torvalds 	if ((rc = dbAdjCtl(bmp, blkno, tp->stree[ROOT], 1, 0)))
33471da177e4SLinus Torvalds 		dbFreeBits(bmp, dp, blkno, nblocks);
33481da177e4SLinus Torvalds 
33491da177e4SLinus Torvalds 	return (rc);
33501da177e4SLinus Torvalds }
33511da177e4SLinus Torvalds 
33521da177e4SLinus Torvalds 
33531da177e4SLinus Torvalds /*
33541da177e4SLinus Torvalds  * NAME:	dbExtendFS()
33551da177e4SLinus Torvalds  *
33561da177e4SLinus Torvalds  * FUNCTION:	extend bmap from blkno for nblocks;
33571da177e4SLinus Torvalds  *		dbExtendFS() updates bmap ready for dbAllocBottomUp();
33581da177e4SLinus Torvalds  *
33591da177e4SLinus Torvalds  * L2
33601da177e4SLinus Torvalds  *  |
33611da177e4SLinus Torvalds  *   L1---------------------------------L1
33621da177e4SLinus Torvalds  *    |					 |
33631da177e4SLinus Torvalds  *     L0---------L0---------L0		  L0---------L0---------L0
33641da177e4SLinus Torvalds  *      |	   |	      |		   |	      |		 |
33651da177e4SLinus Torvalds  *	 d0,...,dn  d0,...,dn  d0,...,dn    d0,...,dn  d0,...,dn  d0,.,dm;
33661da177e4SLinus Torvalds  * L2L1L0d0,...,dnL0d0,...,dnL0d0,...,dnL1L0d0,...,dnL0d0,...,dnL0d0,..dm
33671da177e4SLinus Torvalds  *
33681da177e4SLinus Torvalds  * <---old---><----------------------------extend----------------------->
33691da177e4SLinus Torvalds  */
dbExtendFS(struct inode * ipbmap,s64 blkno,s64 nblocks)33701da177e4SLinus Torvalds int dbExtendFS(struct inode *ipbmap, s64 blkno,	s64 nblocks)
33711da177e4SLinus Torvalds {
33721da177e4SLinus Torvalds 	struct jfs_sb_info *sbi = JFS_SBI(ipbmap->i_sb);
33731da177e4SLinus Torvalds 	int nbperpage = sbi->nbperpage;
33744d81715fSRichard Knutsson 	int i, i0 = true, j, j0 = true, k, n;
33751da177e4SLinus Torvalds 	s64 newsize;
33761da177e4SLinus Torvalds 	s64 p;
33771da177e4SLinus Torvalds 	struct metapage *mp, *l2mp, *l1mp = NULL, *l0mp = NULL;
33781da177e4SLinus Torvalds 	struct dmapctl *l2dcp, *l1dcp, *l0dcp;
33791da177e4SLinus Torvalds 	struct dmap *dp;
33801da177e4SLinus Torvalds 	s8 *l0leaf, *l1leaf, *l2leaf;
33811da177e4SLinus Torvalds 	struct bmap *bmp = sbi->bmap;
33821da177e4SLinus Torvalds 	int agno, l2agsize, oldl2agsize;
33831da177e4SLinus Torvalds 	s64 ag_rem;
33841da177e4SLinus Torvalds 
33851da177e4SLinus Torvalds 	newsize = blkno + nblocks;
33861da177e4SLinus Torvalds 
33871da177e4SLinus Torvalds 	jfs_info("dbExtendFS: blkno:%Ld nblocks:%Ld newsize:%Ld",
33881da177e4SLinus Torvalds 		 (long long) blkno, (long long) nblocks, (long long) newsize);
33891da177e4SLinus Torvalds 
33901da177e4SLinus Torvalds 	/*
33911da177e4SLinus Torvalds 	 *	initialize bmap control page.
33921da177e4SLinus Torvalds 	 *
33931da177e4SLinus Torvalds 	 * all the data in bmap control page should exclude
33941da177e4SLinus Torvalds 	 * the mkfs hidden dmap page.
33951da177e4SLinus Torvalds 	 */
33961da177e4SLinus Torvalds 
33971da177e4SLinus Torvalds 	/* update mapsize */
33981da177e4SLinus Torvalds 	bmp->db_mapsize = newsize;
33991da177e4SLinus Torvalds 	bmp->db_maxlevel = BMAPSZTOLEV(bmp->db_mapsize);
34001da177e4SLinus Torvalds 
34011da177e4SLinus Torvalds 	/* compute new AG size */
34021da177e4SLinus Torvalds 	l2agsize = dbGetL2AGSize(newsize);
34031da177e4SLinus Torvalds 	oldl2agsize = bmp->db_agl2size;
34041da177e4SLinus Torvalds 
34051da177e4SLinus Torvalds 	bmp->db_agl2size = l2agsize;
34061da177e4SLinus Torvalds 	bmp->db_agsize = 1 << l2agsize;
34071da177e4SLinus Torvalds 
34081da177e4SLinus Torvalds 	/* compute new number of AG */
34091da177e4SLinus Torvalds 	agno = bmp->db_numag;
34101da177e4SLinus Torvalds 	bmp->db_numag = newsize >> l2agsize;
34111da177e4SLinus Torvalds 	bmp->db_numag += ((u32) newsize % (u32) bmp->db_agsize) ? 1 : 0;
34121da177e4SLinus Torvalds 
34131da177e4SLinus Torvalds 	/*
34141da177e4SLinus Torvalds 	 *	reconfigure db_agfree[]
34151da177e4SLinus Torvalds 	 * from old AG configuration to new AG configuration;
34161da177e4SLinus Torvalds 	 *
34171da177e4SLinus Torvalds 	 * coalesce contiguous k (newAGSize/oldAGSize) AGs;
34181da177e4SLinus Torvalds 	 * i.e., (AGi, ..., AGj) where i = k*n and j = k*(n+1) - 1 to AGn;
34191da177e4SLinus Torvalds 	 * note: new AG size = old AG size * (2**x).
34201da177e4SLinus Torvalds 	 */
34211da177e4SLinus Torvalds 	if (l2agsize == oldl2agsize)
34221da177e4SLinus Torvalds 		goto extend;
34231da177e4SLinus Torvalds 	k = 1 << (l2agsize - oldl2agsize);
34241da177e4SLinus Torvalds 	ag_rem = bmp->db_agfree[0];	/* save agfree[0] */
34251da177e4SLinus Torvalds 	for (i = 0, n = 0; i < agno; n++) {
34261da177e4SLinus Torvalds 		bmp->db_agfree[n] = 0;	/* init collection point */
34271da177e4SLinus Torvalds 
3428af901ca1SAndré Goddard Rosa 		/* coalesce contiguous k AGs; */
34291da177e4SLinus Torvalds 		for (j = 0; j < k && i < agno; j++, i++) {
34301da177e4SLinus Torvalds 			/* merge AGi to AGn */
34311da177e4SLinus Torvalds 			bmp->db_agfree[n] += bmp->db_agfree[i];
34321da177e4SLinus Torvalds 		}
34331da177e4SLinus Torvalds 	}
34341da177e4SLinus Torvalds 	bmp->db_agfree[0] += ag_rem;	/* restore agfree[0] */
34351da177e4SLinus Torvalds 
34361da177e4SLinus Torvalds 	for (; n < MAXAG; n++)
34371da177e4SLinus Torvalds 		bmp->db_agfree[n] = 0;
34381da177e4SLinus Torvalds 
34391da177e4SLinus Torvalds 	/*
34401da177e4SLinus Torvalds 	 * update highest active ag number
34411da177e4SLinus Torvalds 	 */
34421da177e4SLinus Torvalds 
34431da177e4SLinus Torvalds 	bmp->db_maxag = bmp->db_maxag / k;
34441da177e4SLinus Torvalds 
34451da177e4SLinus Torvalds 	/*
34461da177e4SLinus Torvalds 	 *	extend bmap
34471da177e4SLinus Torvalds 	 *
34481da177e4SLinus Torvalds 	 * update bit maps and corresponding level control pages;
34491da177e4SLinus Torvalds 	 * global control page db_nfree, db_agfree[agno], db_maxfreebud;
34501da177e4SLinus Torvalds 	 */
34511da177e4SLinus Torvalds       extend:
34521da177e4SLinus Torvalds 	/* get L2 page */
34531da177e4SLinus Torvalds 	p = BMAPBLKNO + nbperpage;	/* L2 page */
34541da177e4SLinus Torvalds 	l2mp = read_metapage(ipbmap, p, PSIZE, 0);
34551da177e4SLinus Torvalds 	if (!l2mp) {
3456eb8630d7SJoe Perches 		jfs_error(ipbmap->i_sb, "L2 page could not be read\n");
34571da177e4SLinus Torvalds 		return -EIO;
34581da177e4SLinus Torvalds 	}
34591da177e4SLinus Torvalds 	l2dcp = (struct dmapctl *) l2mp->data;
34601da177e4SLinus Torvalds 
34611da177e4SLinus Torvalds 	/* compute start L1 */
34621da177e4SLinus Torvalds 	k = blkno >> L2MAXL1SIZE;
34631da177e4SLinus Torvalds 	l2leaf = l2dcp->stree + CTLLEAFIND + k;
34641da177e4SLinus Torvalds 	p = BLKTOL1(blkno, sbi->l2nbperpage);	/* L1 page */
34651da177e4SLinus Torvalds 
34661da177e4SLinus Torvalds 	/*
34671da177e4SLinus Torvalds 	 * extend each L1 in L2
34681da177e4SLinus Torvalds 	 */
34691da177e4SLinus Torvalds 	for (; k < LPERCTL; k++, p += nbperpage) {
34701da177e4SLinus Torvalds 		/* get L1 page */
34711da177e4SLinus Torvalds 		if (j0) {
34721da177e4SLinus Torvalds 			/* read in L1 page: (blkno & (MAXL1SIZE - 1)) */
34731da177e4SLinus Torvalds 			l1mp = read_metapage(ipbmap, p, PSIZE, 0);
34741da177e4SLinus Torvalds 			if (l1mp == NULL)
34751da177e4SLinus Torvalds 				goto errout;
34761da177e4SLinus Torvalds 			l1dcp = (struct dmapctl *) l1mp->data;
34771da177e4SLinus Torvalds 
34781da177e4SLinus Torvalds 			/* compute start L0 */
34791da177e4SLinus Torvalds 			j = (blkno & (MAXL1SIZE - 1)) >> L2MAXL0SIZE;
34801da177e4SLinus Torvalds 			l1leaf = l1dcp->stree + CTLLEAFIND + j;
34811da177e4SLinus Torvalds 			p = BLKTOL0(blkno, sbi->l2nbperpage);
34824d81715fSRichard Knutsson 			j0 = false;
34831da177e4SLinus Torvalds 		} else {
34841da177e4SLinus Torvalds 			/* assign/init L1 page */
34851da177e4SLinus Torvalds 			l1mp = get_metapage(ipbmap, p, PSIZE, 0);
34861da177e4SLinus Torvalds 			if (l1mp == NULL)
34871da177e4SLinus Torvalds 				goto errout;
34881da177e4SLinus Torvalds 
34891da177e4SLinus Torvalds 			l1dcp = (struct dmapctl *) l1mp->data;
34901da177e4SLinus Torvalds 
34911da177e4SLinus Torvalds 			/* compute start L0 */
34921da177e4SLinus Torvalds 			j = 0;
34931da177e4SLinus Torvalds 			l1leaf = l1dcp->stree + CTLLEAFIND;
34941da177e4SLinus Torvalds 			p += nbperpage;	/* 1st L0 of L1.k */
34951da177e4SLinus Torvalds 		}
34961da177e4SLinus Torvalds 
34971da177e4SLinus Torvalds 		/*
34981da177e4SLinus Torvalds 		 * extend each L0 in L1
34991da177e4SLinus Torvalds 		 */
35001da177e4SLinus Torvalds 		for (; j < LPERCTL; j++) {
35011da177e4SLinus Torvalds 			/* get L0 page */
35021da177e4SLinus Torvalds 			if (i0) {
35031da177e4SLinus Torvalds 				/* read in L0 page: (blkno & (MAXL0SIZE - 1)) */
35041da177e4SLinus Torvalds 
35051da177e4SLinus Torvalds 				l0mp = read_metapage(ipbmap, p, PSIZE, 0);
35061da177e4SLinus Torvalds 				if (l0mp == NULL)
35071da177e4SLinus Torvalds 					goto errout;
35081da177e4SLinus Torvalds 				l0dcp = (struct dmapctl *) l0mp->data;
35091da177e4SLinus Torvalds 
35101da177e4SLinus Torvalds 				/* compute start dmap */
35111da177e4SLinus Torvalds 				i = (blkno & (MAXL0SIZE - 1)) >>
35121da177e4SLinus Torvalds 				    L2BPERDMAP;
35131da177e4SLinus Torvalds 				l0leaf = l0dcp->stree + CTLLEAFIND + i;
35141da177e4SLinus Torvalds 				p = BLKTODMAP(blkno,
35151da177e4SLinus Torvalds 					      sbi->l2nbperpage);
35164d81715fSRichard Knutsson 				i0 = false;
35171da177e4SLinus Torvalds 			} else {
35181da177e4SLinus Torvalds 				/* assign/init L0 page */
35191da177e4SLinus Torvalds 				l0mp = get_metapage(ipbmap, p, PSIZE, 0);
35201da177e4SLinus Torvalds 				if (l0mp == NULL)
35211da177e4SLinus Torvalds 					goto errout;
35221da177e4SLinus Torvalds 
35231da177e4SLinus Torvalds 				l0dcp = (struct dmapctl *) l0mp->data;
35241da177e4SLinus Torvalds 
35251da177e4SLinus Torvalds 				/* compute start dmap */
35261da177e4SLinus Torvalds 				i = 0;
35271da177e4SLinus Torvalds 				l0leaf = l0dcp->stree + CTLLEAFIND;
35281da177e4SLinus Torvalds 				p += nbperpage;	/* 1st dmap of L0.j */
35291da177e4SLinus Torvalds 			}
35301da177e4SLinus Torvalds 
35311da177e4SLinus Torvalds 			/*
35321da177e4SLinus Torvalds 			 * extend each dmap in L0
35331da177e4SLinus Torvalds 			 */
35341da177e4SLinus Torvalds 			for (; i < LPERCTL; i++) {
35351da177e4SLinus Torvalds 				/*
35361da177e4SLinus Torvalds 				 * reconstruct the dmap page, and
35371da177e4SLinus Torvalds 				 * initialize corresponding parent L0 leaf
35381da177e4SLinus Torvalds 				 */
35391da177e4SLinus Torvalds 				if ((n = blkno & (BPERDMAP - 1))) {
35401da177e4SLinus Torvalds 					/* read in dmap page: */
35411da177e4SLinus Torvalds 					mp = read_metapage(ipbmap, p,
35421da177e4SLinus Torvalds 							   PSIZE, 0);
35431da177e4SLinus Torvalds 					if (mp == NULL)
35441da177e4SLinus Torvalds 						goto errout;
35451da177e4SLinus Torvalds 					n = min(nblocks, (s64)BPERDMAP - n);
35461da177e4SLinus Torvalds 				} else {
35471da177e4SLinus Torvalds 					/* assign/init dmap page */
35481da177e4SLinus Torvalds 					mp = read_metapage(ipbmap, p,
35491da177e4SLinus Torvalds 							   PSIZE, 0);
35501da177e4SLinus Torvalds 					if (mp == NULL)
35511da177e4SLinus Torvalds 						goto errout;
35521da177e4SLinus Torvalds 
35534f65b6dbSFabian Frederick 					n = min_t(s64, nblocks, BPERDMAP);
35541da177e4SLinus Torvalds 				}
35551da177e4SLinus Torvalds 
35561da177e4SLinus Torvalds 				dp = (struct dmap *) mp->data;
35571da177e4SLinus Torvalds 				*l0leaf = dbInitDmap(dp, blkno, n);
35581da177e4SLinus Torvalds 
35591da177e4SLinus Torvalds 				bmp->db_nfree += n;
35601da177e4SLinus Torvalds 				agno = le64_to_cpu(dp->start) >> l2agsize;
35611da177e4SLinus Torvalds 				bmp->db_agfree[agno] += n;
35621da177e4SLinus Torvalds 
35631da177e4SLinus Torvalds 				write_metapage(mp);
35641da177e4SLinus Torvalds 
35651da177e4SLinus Torvalds 				l0leaf++;
35661da177e4SLinus Torvalds 				p += nbperpage;
35671da177e4SLinus Torvalds 
35681da177e4SLinus Torvalds 				blkno += n;
35691da177e4SLinus Torvalds 				nblocks -= n;
35701da177e4SLinus Torvalds 				if (nblocks == 0)
35711da177e4SLinus Torvalds 					break;
35721da177e4SLinus Torvalds 			}	/* for each dmap in a L0 */
35731da177e4SLinus Torvalds 
35741da177e4SLinus Torvalds 			/*
35751da177e4SLinus Torvalds 			 * build current L0 page from its leaves, and
35761da177e4SLinus Torvalds 			 * initialize corresponding parent L1 leaf
35771da177e4SLinus Torvalds 			 */
35781da177e4SLinus Torvalds 			*l1leaf = dbInitDmapCtl(l0dcp, 0, ++i);
35791da177e4SLinus Torvalds 			write_metapage(l0mp);
35801da177e4SLinus Torvalds 			l0mp = NULL;
35811da177e4SLinus Torvalds 
35821da177e4SLinus Torvalds 			if (nblocks)
35831da177e4SLinus Torvalds 				l1leaf++;	/* continue for next L0 */
35841da177e4SLinus Torvalds 			else {
35851da177e4SLinus Torvalds 				/* more than 1 L0 ? */
35861da177e4SLinus Torvalds 				if (j > 0)
35871da177e4SLinus Torvalds 					break;	/* build L1 page */
35881da177e4SLinus Torvalds 				else {
35891da177e4SLinus Torvalds 					/* summarize in global bmap page */
35901da177e4SLinus Torvalds 					bmp->db_maxfreebud = *l1leaf;
35911da177e4SLinus Torvalds 					release_metapage(l1mp);
35921da177e4SLinus Torvalds 					release_metapage(l2mp);
35931da177e4SLinus Torvalds 					goto finalize;
35941da177e4SLinus Torvalds 				}
35951da177e4SLinus Torvalds 			}
35961da177e4SLinus Torvalds 		}		/* for each L0 in a L1 */
35971da177e4SLinus Torvalds 
35981da177e4SLinus Torvalds 		/*
35991da177e4SLinus Torvalds 		 * build current L1 page from its leaves, and
36001da177e4SLinus Torvalds 		 * initialize corresponding parent L2 leaf
36011da177e4SLinus Torvalds 		 */
36021da177e4SLinus Torvalds 		*l2leaf = dbInitDmapCtl(l1dcp, 1, ++j);
36031da177e4SLinus Torvalds 		write_metapage(l1mp);
36041da177e4SLinus Torvalds 		l1mp = NULL;
36051da177e4SLinus Torvalds 
36061da177e4SLinus Torvalds 		if (nblocks)
36071da177e4SLinus Torvalds 			l2leaf++;	/* continue for next L1 */
36081da177e4SLinus Torvalds 		else {
36091da177e4SLinus Torvalds 			/* more than 1 L1 ? */
36101da177e4SLinus Torvalds 			if (k > 0)
36111da177e4SLinus Torvalds 				break;	/* build L2 page */
36121da177e4SLinus Torvalds 			else {
36131da177e4SLinus Torvalds 				/* summarize in global bmap page */
36141da177e4SLinus Torvalds 				bmp->db_maxfreebud = *l2leaf;
36151da177e4SLinus Torvalds 				release_metapage(l2mp);
36161da177e4SLinus Torvalds 				goto finalize;
36171da177e4SLinus Torvalds 			}
36181da177e4SLinus Torvalds 		}
36191da177e4SLinus Torvalds 	}			/* for each L1 in a L2 */
36201da177e4SLinus Torvalds 
3621eb8630d7SJoe Perches 	jfs_error(ipbmap->i_sb, "function has not returned as expected\n");
36221da177e4SLinus Torvalds errout:
36231da177e4SLinus Torvalds 	if (l0mp)
36241da177e4SLinus Torvalds 		release_metapage(l0mp);
36251da177e4SLinus Torvalds 	if (l1mp)
36261da177e4SLinus Torvalds 		release_metapage(l1mp);
36271da177e4SLinus Torvalds 	release_metapage(l2mp);
36281da177e4SLinus Torvalds 	return -EIO;
36291da177e4SLinus Torvalds 
36301da177e4SLinus Torvalds 	/*
36311da177e4SLinus Torvalds 	 *	finalize bmap control page
36321da177e4SLinus Torvalds 	 */
36331da177e4SLinus Torvalds finalize:
36341da177e4SLinus Torvalds 
36351da177e4SLinus Torvalds 	return 0;
36361da177e4SLinus Torvalds }
36371da177e4SLinus Torvalds 
36381da177e4SLinus Torvalds 
36391da177e4SLinus Torvalds /*
36401da177e4SLinus Torvalds  *	dbFinalizeBmap()
36411da177e4SLinus Torvalds  */
dbFinalizeBmap(struct inode * ipbmap)36421da177e4SLinus Torvalds void dbFinalizeBmap(struct inode *ipbmap)
36431da177e4SLinus Torvalds {
36441da177e4SLinus Torvalds 	struct bmap *bmp = JFS_SBI(ipbmap->i_sb)->bmap;
36451da177e4SLinus Torvalds 	int actags, inactags, l2nl;
36461da177e4SLinus Torvalds 	s64 ag_rem, actfree, inactfree, avgfree;
36471da177e4SLinus Torvalds 	int i, n;
36481da177e4SLinus Torvalds 
36491da177e4SLinus Torvalds 	/*
36501da177e4SLinus Torvalds 	 *	finalize bmap control page
36511da177e4SLinus Torvalds 	 */
36521da177e4SLinus Torvalds //finalize:
36531da177e4SLinus Torvalds 	/*
36541da177e4SLinus Torvalds 	 * compute db_agpref: preferred ag to allocate from
36551da177e4SLinus Torvalds 	 * (the leftmost ag with average free space in it);
36561da177e4SLinus Torvalds 	 */
36571da177e4SLinus Torvalds //agpref:
3658577ebd19Szuoqilin 	/* get the number of active ags and inactive ags */
36591da177e4SLinus Torvalds 	actags = bmp->db_maxag + 1;
36601da177e4SLinus Torvalds 	inactags = bmp->db_numag - actags;
36611da177e4SLinus Torvalds 	ag_rem = bmp->db_mapsize & (bmp->db_agsize - 1);	/* ??? */
36621da177e4SLinus Torvalds 
36631da177e4SLinus Torvalds 	/* determine how many blocks are in the inactive allocation
36641da177e4SLinus Torvalds 	 * groups. in doing this, we must account for the fact that
36651da177e4SLinus Torvalds 	 * the rightmost group might be a partial group (i.e. file
36661da177e4SLinus Torvalds 	 * system size is not a multiple of the group size).
36671da177e4SLinus Torvalds 	 */
36681da177e4SLinus Torvalds 	inactfree = (inactags && ag_rem) ?
36691da177e4SLinus Torvalds 	    ((inactags - 1) << bmp->db_agl2size) + ag_rem
36701da177e4SLinus Torvalds 	    : inactags << bmp->db_agl2size;
36711da177e4SLinus Torvalds 
36721da177e4SLinus Torvalds 	/* determine how many free blocks are in the active
36731da177e4SLinus Torvalds 	 * allocation groups plus the average number of free blocks
36741da177e4SLinus Torvalds 	 * within the active ags.
36751da177e4SLinus Torvalds 	 */
36761da177e4SLinus Torvalds 	actfree = bmp->db_nfree - inactfree;
36771da177e4SLinus Torvalds 	avgfree = (u32) actfree / (u32) actags;
36781da177e4SLinus Torvalds 
36791da177e4SLinus Torvalds 	/* if the preferred allocation group has not average free space.
36801da177e4SLinus Torvalds 	 * re-establish the preferred group as the leftmost
36811da177e4SLinus Torvalds 	 * group with average free space.
36821da177e4SLinus Torvalds 	 */
36831da177e4SLinus Torvalds 	if (bmp->db_agfree[bmp->db_agpref] < avgfree) {
36841da177e4SLinus Torvalds 		for (bmp->db_agpref = 0; bmp->db_agpref < actags;
36851da177e4SLinus Torvalds 		     bmp->db_agpref++) {
36861da177e4SLinus Torvalds 			if (bmp->db_agfree[bmp->db_agpref] >= avgfree)
36871da177e4SLinus Torvalds 				break;
36881da177e4SLinus Torvalds 		}
36891da177e4SLinus Torvalds 		if (bmp->db_agpref >= bmp->db_numag) {
36901da177e4SLinus Torvalds 			jfs_error(ipbmap->i_sb,
3691eb8630d7SJoe Perches 				  "cannot find ag with average freespace\n");
36921da177e4SLinus Torvalds 		}
36931da177e4SLinus Torvalds 	}
36941da177e4SLinus Torvalds 
36951da177e4SLinus Torvalds 	/*
3696d7eecb48SDaniel Mack 	 * compute db_aglevel, db_agheight, db_width, db_agstart:
36971da177e4SLinus Torvalds 	 * an ag is covered in aglevel dmapctl summary tree,
36981da177e4SLinus Torvalds 	 * at agheight level height (from leaf) with agwidth number of nodes
36991da177e4SLinus Torvalds 	 * each, which starts at agstart index node of the smmary tree node
37001da177e4SLinus Torvalds 	 * array;
37011da177e4SLinus Torvalds 	 */
37021da177e4SLinus Torvalds 	bmp->db_aglevel = BMAPSZTOLEV(bmp->db_agsize);
37031da177e4SLinus Torvalds 	l2nl =
37041da177e4SLinus Torvalds 	    bmp->db_agl2size - (L2BPERDMAP + bmp->db_aglevel * L2LPERCTL);
3705d7eecb48SDaniel Mack 	bmp->db_agheight = l2nl >> 1;
3706d7eecb48SDaniel Mack 	bmp->db_agwidth = 1 << (l2nl - (bmp->db_agheight << 1));
3707d7eecb48SDaniel Mack 	for (i = 5 - bmp->db_agheight, bmp->db_agstart = 0, n = 1; i > 0;
37081da177e4SLinus Torvalds 	     i--) {
37091da177e4SLinus Torvalds 		bmp->db_agstart += n;
37101da177e4SLinus Torvalds 		n <<= 2;
37111da177e4SLinus Torvalds 	}
37121da177e4SLinus Torvalds 
37131da177e4SLinus Torvalds }
37141da177e4SLinus Torvalds 
37151da177e4SLinus Torvalds 
37161da177e4SLinus Torvalds /*
37171da177e4SLinus Torvalds  * NAME:	dbInitDmap()/ujfs_idmap_page()
37181da177e4SLinus Torvalds  *
37191da177e4SLinus Torvalds  * FUNCTION:	initialize working/persistent bitmap of the dmap page
37201da177e4SLinus Torvalds  *		for the specified number of blocks:
37211da177e4SLinus Torvalds  *
37221da177e4SLinus Torvalds  *		at entry, the bitmaps had been initialized as free (ZEROS);
37231da177e4SLinus Torvalds  *		The number of blocks will only account for the actually
37241da177e4SLinus Torvalds  *		existing blocks. Blocks which don't actually exist in
37251da177e4SLinus Torvalds  *		the aggregate will be marked as allocated (ONES);
37261da177e4SLinus Torvalds  *
37271da177e4SLinus Torvalds  * PARAMETERS:
37281da177e4SLinus Torvalds  *	dp	- pointer to page of map
37291da177e4SLinus Torvalds  *	nblocks	- number of blocks this page
37301da177e4SLinus Torvalds  *
37311da177e4SLinus Torvalds  * RETURNS: NONE
37321da177e4SLinus Torvalds  */
dbInitDmap(struct dmap * dp,s64 Blkno,int nblocks)37331da177e4SLinus Torvalds static int dbInitDmap(struct dmap * dp, s64 Blkno, int nblocks)
37341da177e4SLinus Torvalds {
37351da177e4SLinus Torvalds 	int blkno, w, b, r, nw, nb, i;
37361da177e4SLinus Torvalds 
37371da177e4SLinus Torvalds 	/* starting block number within the dmap */
37381da177e4SLinus Torvalds 	blkno = Blkno & (BPERDMAP - 1);
37391da177e4SLinus Torvalds 
37401da177e4SLinus Torvalds 	if (blkno == 0) {
37411da177e4SLinus Torvalds 		dp->nblocks = dp->nfree = cpu_to_le32(nblocks);
37421da177e4SLinus Torvalds 		dp->start = cpu_to_le64(Blkno);
37431da177e4SLinus Torvalds 
37441da177e4SLinus Torvalds 		if (nblocks == BPERDMAP) {
37451da177e4SLinus Torvalds 			memset(&dp->wmap[0], 0, LPERDMAP * 4);
37461da177e4SLinus Torvalds 			memset(&dp->pmap[0], 0, LPERDMAP * 4);
37471da177e4SLinus Torvalds 			goto initTree;
37481da177e4SLinus Torvalds 		}
37491da177e4SLinus Torvalds 	} else {
375089145622SMarcin Slusarz 		le32_add_cpu(&dp->nblocks, nblocks);
375189145622SMarcin Slusarz 		le32_add_cpu(&dp->nfree, nblocks);
37521da177e4SLinus Torvalds 	}
37531da177e4SLinus Torvalds 
37541da177e4SLinus Torvalds 	/* word number containing start block number */
37551da177e4SLinus Torvalds 	w = blkno >> L2DBWORD;
37561da177e4SLinus Torvalds 
37571da177e4SLinus Torvalds 	/*
37581da177e4SLinus Torvalds 	 * free the bits corresponding to the block range (ZEROS):
37591da177e4SLinus Torvalds 	 * note: not all bits of the first and last words may be contained
37601da177e4SLinus Torvalds 	 * within the block range.
37611da177e4SLinus Torvalds 	 */
37621da177e4SLinus Torvalds 	for (r = nblocks; r > 0; r -= nb, blkno += nb) {
37631da177e4SLinus Torvalds 		/* number of bits preceding range to be freed in the word */
37641da177e4SLinus Torvalds 		b = blkno & (DBWORD - 1);
37651da177e4SLinus Torvalds 		/* number of bits to free in the word */
37661da177e4SLinus Torvalds 		nb = min(r, DBWORD - b);
37671da177e4SLinus Torvalds 
37681da177e4SLinus Torvalds 		/* is partial word to be freed ? */
37691da177e4SLinus Torvalds 		if (nb < DBWORD) {
37701da177e4SLinus Torvalds 			/* free (set to 0) from the bitmap word */
37711da177e4SLinus Torvalds 			dp->wmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
37721da177e4SLinus Torvalds 						     >> b));
37731da177e4SLinus Torvalds 			dp->pmap[w] &= cpu_to_le32(~(ONES << (DBWORD - nb)
37741da177e4SLinus Torvalds 						     >> b));
37751da177e4SLinus Torvalds 
37761da177e4SLinus Torvalds 			/* skip the word freed */
37771da177e4SLinus Torvalds 			w++;
37781da177e4SLinus Torvalds 		} else {
37791da177e4SLinus Torvalds 			/* free (set to 0) contiguous bitmap words */
37801da177e4SLinus Torvalds 			nw = r >> L2DBWORD;
37811da177e4SLinus Torvalds 			memset(&dp->wmap[w], 0, nw * 4);
37821da177e4SLinus Torvalds 			memset(&dp->pmap[w], 0, nw * 4);
37831da177e4SLinus Torvalds 
37841da177e4SLinus Torvalds 			/* skip the words freed */
37851da177e4SLinus Torvalds 			nb = nw << L2DBWORD;
37861da177e4SLinus Torvalds 			w += nw;
37871da177e4SLinus Torvalds 		}
37881da177e4SLinus Torvalds 	}
37891da177e4SLinus Torvalds 
37901da177e4SLinus Torvalds 	/*
37911da177e4SLinus Torvalds 	 * mark bits following the range to be freed (non-existing
37921da177e4SLinus Torvalds 	 * blocks) as allocated (ONES)
37931da177e4SLinus Torvalds 	 */
37941da177e4SLinus Torvalds 
37951da177e4SLinus Torvalds 	if (blkno == BPERDMAP)
37961da177e4SLinus Torvalds 		goto initTree;
37971da177e4SLinus Torvalds 
37981da177e4SLinus Torvalds 	/* the first word beyond the end of existing blocks */
37991da177e4SLinus Torvalds 	w = blkno >> L2DBWORD;
38001da177e4SLinus Torvalds 
38011da177e4SLinus Torvalds 	/* does nblocks fall on a 32-bit boundary ? */
38021da177e4SLinus Torvalds 	b = blkno & (DBWORD - 1);
38031da177e4SLinus Torvalds 	if (b) {
38041da177e4SLinus Torvalds 		/* mark a partial word allocated */
38051da177e4SLinus Torvalds 		dp->wmap[w] = dp->pmap[w] = cpu_to_le32(ONES >> b);
38061da177e4SLinus Torvalds 		w++;
38071da177e4SLinus Torvalds 	}
38081da177e4SLinus Torvalds 
38091da177e4SLinus Torvalds 	/* set the rest of the words in the page to allocated (ONES) */
38101da177e4SLinus Torvalds 	for (i = w; i < LPERDMAP; i++)
38111da177e4SLinus Torvalds 		dp->pmap[i] = dp->wmap[i] = cpu_to_le32(ONES);
38121da177e4SLinus Torvalds 
38131da177e4SLinus Torvalds 	/*
38141da177e4SLinus Torvalds 	 * init tree
38151da177e4SLinus Torvalds 	 */
38161da177e4SLinus Torvalds       initTree:
38171da177e4SLinus Torvalds 	return (dbInitDmapTree(dp));
38181da177e4SLinus Torvalds }
38191da177e4SLinus Torvalds 
38201da177e4SLinus Torvalds 
38211da177e4SLinus Torvalds /*
38221da177e4SLinus Torvalds  * NAME:	dbInitDmapTree()/ujfs_complete_dmap()
38231da177e4SLinus Torvalds  *
38241da177e4SLinus Torvalds  * FUNCTION:	initialize summary tree of the specified dmap:
38251da177e4SLinus Torvalds  *
38261da177e4SLinus Torvalds  *		at entry, bitmap of the dmap has been initialized;
38271da177e4SLinus Torvalds  *
38281da177e4SLinus Torvalds  * PARAMETERS:
38291da177e4SLinus Torvalds  *	dp	- dmap to complete
38301da177e4SLinus Torvalds  *	blkno	- starting block number for this dmap
38311da177e4SLinus Torvalds  *	treemax	- will be filled in with max free for this dmap
38321da177e4SLinus Torvalds  *
38331da177e4SLinus Torvalds  * RETURNS:	max free string at the root of the tree
38341da177e4SLinus Torvalds  */
dbInitDmapTree(struct dmap * dp)38351da177e4SLinus Torvalds static int dbInitDmapTree(struct dmap * dp)
38361da177e4SLinus Torvalds {
38371da177e4SLinus Torvalds 	struct dmaptree *tp;
38381da177e4SLinus Torvalds 	s8 *cp;
38391da177e4SLinus Torvalds 	int i;
38401da177e4SLinus Torvalds 
38411da177e4SLinus Torvalds 	/* init fixed info of tree */
38421da177e4SLinus Torvalds 	tp = &dp->tree;
38431da177e4SLinus Torvalds 	tp->nleafs = cpu_to_le32(LPERDMAP);
38441da177e4SLinus Torvalds 	tp->l2nleafs = cpu_to_le32(L2LPERDMAP);
38451da177e4SLinus Torvalds 	tp->leafidx = cpu_to_le32(LEAFIND);
38461da177e4SLinus Torvalds 	tp->height = cpu_to_le32(4);
38471da177e4SLinus Torvalds 	tp->budmin = BUDMIN;
38481da177e4SLinus Torvalds 
38491da177e4SLinus Torvalds 	/* init each leaf from corresponding wmap word:
38501da177e4SLinus Torvalds 	 * note: leaf is set to NOFREE(-1) if all blocks of corresponding
38511da177e4SLinus Torvalds 	 * bitmap word are allocated.
38521da177e4SLinus Torvalds 	 */
38531da177e4SLinus Torvalds 	cp = tp->stree + le32_to_cpu(tp->leafidx);
38541da177e4SLinus Torvalds 	for (i = 0; i < LPERDMAP; i++)
38551da177e4SLinus Torvalds 		*cp++ = dbMaxBud((u8 *) & dp->wmap[i]);
38561da177e4SLinus Torvalds 
38571da177e4SLinus Torvalds 	/* build the dmap's binary buddy summary tree */
38581da177e4SLinus Torvalds 	return (dbInitTree(tp));
38591da177e4SLinus Torvalds }
38601da177e4SLinus Torvalds 
38611da177e4SLinus Torvalds 
38621da177e4SLinus Torvalds /*
38631da177e4SLinus Torvalds  * NAME:	dbInitTree()/ujfs_adjtree()
38641da177e4SLinus Torvalds  *
38651da177e4SLinus Torvalds  * FUNCTION:	initialize binary buddy summary tree of a dmap or dmapctl.
38661da177e4SLinus Torvalds  *
38671da177e4SLinus Torvalds  *		at entry, the leaves of the tree has been initialized
38681da177e4SLinus Torvalds  *		from corresponding bitmap word or root of summary tree
38691da177e4SLinus Torvalds  *		of the child control page;
38701da177e4SLinus Torvalds  *		configure binary buddy system at the leaf level, then
38711da177e4SLinus Torvalds  *		bubble up the values of the leaf nodes up the tree.
38721da177e4SLinus Torvalds  *
38731da177e4SLinus Torvalds  * PARAMETERS:
38741da177e4SLinus Torvalds  *	cp	- Pointer to the root of the tree
38751da177e4SLinus Torvalds  *	l2leaves- Number of leaf nodes as a power of 2
38761da177e4SLinus Torvalds  *	l2min	- Number of blocks that can be covered by a leaf
38771da177e4SLinus Torvalds  *		  as a power of 2
38781da177e4SLinus Torvalds  *
38791da177e4SLinus Torvalds  * RETURNS: max free string at the root of the tree
38801da177e4SLinus Torvalds  */
dbInitTree(struct dmaptree * dtp)38811da177e4SLinus Torvalds static int dbInitTree(struct dmaptree * dtp)
38821da177e4SLinus Torvalds {
38831da177e4SLinus Torvalds 	int l2max, l2free, bsize, nextb, i;
38841da177e4SLinus Torvalds 	int child, parent, nparent;
38851da177e4SLinus Torvalds 	s8 *tp, *cp, *cp1;
38861da177e4SLinus Torvalds 
38871da177e4SLinus Torvalds 	tp = dtp->stree;
38881da177e4SLinus Torvalds 
38891da177e4SLinus Torvalds 	/* Determine the maximum free string possible for the leaves */
38901da177e4SLinus Torvalds 	l2max = le32_to_cpu(dtp->l2nleafs) + dtp->budmin;
38911da177e4SLinus Torvalds 
38921da177e4SLinus Torvalds 	/*
3893f3fb4624SWonguk Lee 	 * configure the leaf level into binary buddy system
38941da177e4SLinus Torvalds 	 *
38951da177e4SLinus Torvalds 	 * Try to combine buddies starting with a buddy size of 1
38961da177e4SLinus Torvalds 	 * (i.e. two leaves). At a buddy size of 1 two buddy leaves
38971da177e4SLinus Torvalds 	 * can be combined if both buddies have a maximum free of l2min;
38981da177e4SLinus Torvalds 	 * the combination will result in the left-most buddy leaf having
38991da177e4SLinus Torvalds 	 * a maximum free of l2min+1.
39001da177e4SLinus Torvalds 	 * After processing all buddies for a given size, process buddies
39011da177e4SLinus Torvalds 	 * at the next higher buddy size (i.e. current size * 2) and
39021da177e4SLinus Torvalds 	 * the next maximum free (current free + 1).
39031da177e4SLinus Torvalds 	 * This continues until the maximum possible buddy combination
39041da177e4SLinus Torvalds 	 * yields maximum free.
39051da177e4SLinus Torvalds 	 */
39061da177e4SLinus Torvalds 	for (l2free = dtp->budmin, bsize = 1; l2free < l2max;
39071da177e4SLinus Torvalds 	     l2free++, bsize = nextb) {
39081da177e4SLinus Torvalds 		/* get next buddy size == current buddy pair size */
39091da177e4SLinus Torvalds 		nextb = bsize << 1;
39101da177e4SLinus Torvalds 
39111da177e4SLinus Torvalds 		/* scan each adjacent buddy pair at current buddy size */
39121da177e4SLinus Torvalds 		for (i = 0, cp = tp + le32_to_cpu(dtp->leafidx);
39131da177e4SLinus Torvalds 		     i < le32_to_cpu(dtp->nleafs);
39141da177e4SLinus Torvalds 		     i += nextb, cp += nextb) {
39151da177e4SLinus Torvalds 			/* coalesce if both adjacent buddies are max free */
39161da177e4SLinus Torvalds 			if (*cp == l2free && *(cp + bsize) == l2free) {
39171da177e4SLinus Torvalds 				*cp = l2free + 1;	/* left take right */
39181da177e4SLinus Torvalds 				*(cp + bsize) = -1;	/* right give left */
39191da177e4SLinus Torvalds 			}
39201da177e4SLinus Torvalds 		}
39211da177e4SLinus Torvalds 	}
39221da177e4SLinus Torvalds 
39231da177e4SLinus Torvalds 	/*
39241da177e4SLinus Torvalds 	 * bubble summary information of leaves up the tree.
39251da177e4SLinus Torvalds 	 *
39261da177e4SLinus Torvalds 	 * Starting at the leaf node level, the four nodes described by
39271da177e4SLinus Torvalds 	 * the higher level parent node are compared for a maximum free and
39281da177e4SLinus Torvalds 	 * this maximum becomes the value of the parent node.
39291da177e4SLinus Torvalds 	 * when all lower level nodes are processed in this fashion then
39301da177e4SLinus Torvalds 	 * move up to the next level (parent becomes a lower level node) and
39311da177e4SLinus Torvalds 	 * continue the process for that level.
39321da177e4SLinus Torvalds 	 */
39331da177e4SLinus Torvalds 	for (child = le32_to_cpu(dtp->leafidx),
39341da177e4SLinus Torvalds 	     nparent = le32_to_cpu(dtp->nleafs) >> 2;
39351da177e4SLinus Torvalds 	     nparent > 0; nparent >>= 2, child = parent) {
39361da177e4SLinus Torvalds 		/* get index of 1st node of parent level */
39371da177e4SLinus Torvalds 		parent = (child - 1) >> 2;
39381da177e4SLinus Torvalds 
39391da177e4SLinus Torvalds 		/* set the value of the parent node as the maximum
39401da177e4SLinus Torvalds 		 * of the four nodes of the current level.
39411da177e4SLinus Torvalds 		 */
39421da177e4SLinus Torvalds 		for (i = 0, cp = tp + child, cp1 = tp + parent;
39431da177e4SLinus Torvalds 		     i < nparent; i++, cp += 4, cp1++)
39441da177e4SLinus Torvalds 			*cp1 = TREEMAX(cp);
39451da177e4SLinus Torvalds 	}
39461da177e4SLinus Torvalds 
39471da177e4SLinus Torvalds 	return (*tp);
39481da177e4SLinus Torvalds }
39491da177e4SLinus Torvalds 
39501da177e4SLinus Torvalds 
39511da177e4SLinus Torvalds /*
39521da177e4SLinus Torvalds  *	dbInitDmapCtl()
39531da177e4SLinus Torvalds  *
39541da177e4SLinus Torvalds  * function: initialize dmapctl page
39551da177e4SLinus Torvalds  */
dbInitDmapCtl(struct dmapctl * dcp,int level,int i)39561da177e4SLinus Torvalds static int dbInitDmapCtl(struct dmapctl * dcp, int level, int i)
39571da177e4SLinus Torvalds {				/* start leaf index not covered by range */
39581da177e4SLinus Torvalds 	s8 *cp;
39591da177e4SLinus Torvalds 
39601da177e4SLinus Torvalds 	dcp->nleafs = cpu_to_le32(LPERCTL);
39611da177e4SLinus Torvalds 	dcp->l2nleafs = cpu_to_le32(L2LPERCTL);
39621da177e4SLinus Torvalds 	dcp->leafidx = cpu_to_le32(CTLLEAFIND);
39631da177e4SLinus Torvalds 	dcp->height = cpu_to_le32(5);
39641da177e4SLinus Torvalds 	dcp->budmin = L2BPERDMAP + L2LPERCTL * level;
39651da177e4SLinus Torvalds 
39661da177e4SLinus Torvalds 	/*
39671da177e4SLinus Torvalds 	 * initialize the leaves of current level that were not covered
39681da177e4SLinus Torvalds 	 * by the specified input block range (i.e. the leaves have no
39691da177e4SLinus Torvalds 	 * low level dmapctl or dmap).
39701da177e4SLinus Torvalds 	 */
39711da177e4SLinus Torvalds 	cp = &dcp->stree[CTLLEAFIND + i];
39721da177e4SLinus Torvalds 	for (; i < LPERCTL; i++)
39731da177e4SLinus Torvalds 		*cp++ = NOFREE;
39741da177e4SLinus Torvalds 
39751da177e4SLinus Torvalds 	/* build the dmap's binary buddy summary tree */
39761da177e4SLinus Torvalds 	return (dbInitTree((struct dmaptree *) dcp));
39771da177e4SLinus Torvalds }
39781da177e4SLinus Torvalds 
39791da177e4SLinus Torvalds 
39801da177e4SLinus Torvalds /*
39811da177e4SLinus Torvalds  * NAME:	dbGetL2AGSize()/ujfs_getagl2size()
39821da177e4SLinus Torvalds  *
39831da177e4SLinus Torvalds  * FUNCTION:	Determine log2(allocation group size) from aggregate size
39841da177e4SLinus Torvalds  *
39851da177e4SLinus Torvalds  * PARAMETERS:
39861da177e4SLinus Torvalds  *	nblocks	- Number of blocks in aggregate
39871da177e4SLinus Torvalds  *
39881da177e4SLinus Torvalds  * RETURNS: log2(allocation group size) in aggregate blocks
39891da177e4SLinus Torvalds  */
dbGetL2AGSize(s64 nblocks)39901da177e4SLinus Torvalds static int dbGetL2AGSize(s64 nblocks)
39911da177e4SLinus Torvalds {
39921da177e4SLinus Torvalds 	s64 sz;
39931da177e4SLinus Torvalds 	s64 m;
39941da177e4SLinus Torvalds 	int l2sz;
39951da177e4SLinus Torvalds 
39961da177e4SLinus Torvalds 	if (nblocks < BPERDMAP * MAXAG)
39971da177e4SLinus Torvalds 		return (L2BPERDMAP);
39981da177e4SLinus Torvalds 
39991da177e4SLinus Torvalds 	/* round up aggregate size to power of 2 */
40001da177e4SLinus Torvalds 	m = ((u64) 1 << (64 - 1));
40011da177e4SLinus Torvalds 	for (l2sz = 64; l2sz >= 0; l2sz--, m >>= 1) {
40021da177e4SLinus Torvalds 		if (m & nblocks)
40031da177e4SLinus Torvalds 			break;
40041da177e4SLinus Torvalds 	}
40051da177e4SLinus Torvalds 
40061da177e4SLinus Torvalds 	sz = (s64) 1 << l2sz;
40071da177e4SLinus Torvalds 	if (sz < nblocks)
40081da177e4SLinus Torvalds 		l2sz += 1;
40091da177e4SLinus Torvalds 
40101da177e4SLinus Torvalds 	/* agsize = roundupSize/max_number_of_ag */
40111da177e4SLinus Torvalds 	return (l2sz - L2MAXAG);
40121da177e4SLinus Torvalds }
40131da177e4SLinus Torvalds 
40141da177e4SLinus Torvalds 
40151da177e4SLinus Torvalds /*
40161da177e4SLinus Torvalds  * NAME:	dbMapFileSizeToMapSize()
40171da177e4SLinus Torvalds  *
40181da177e4SLinus Torvalds  * FUNCTION:	compute number of blocks the block allocation map file
40191da177e4SLinus Torvalds  *		can cover from the map file size;
40201da177e4SLinus Torvalds  *
40211da177e4SLinus Torvalds  * RETURNS:	Number of blocks which can be covered by this block map file;
40221da177e4SLinus Torvalds  */
40231da177e4SLinus Torvalds 
40241da177e4SLinus Torvalds /*
40251da177e4SLinus Torvalds  * maximum number of map pages at each level including control pages
40261da177e4SLinus Torvalds  */
40271da177e4SLinus Torvalds #define MAXL0PAGES	(1 + LPERCTL)
40281da177e4SLinus Torvalds #define MAXL1PAGES	(1 + LPERCTL * MAXL0PAGES)
40291da177e4SLinus Torvalds 
40301da177e4SLinus Torvalds /*
40311da177e4SLinus Torvalds  * convert number of map pages to the zero origin top dmapctl level
40321da177e4SLinus Torvalds  */
40331da177e4SLinus Torvalds #define BMAPPGTOLEV(npages)	\
4034f720e3baSDave Kleikamp 	(((npages) <= 3 + MAXL0PAGES) ? 0 : \
4035f720e3baSDave Kleikamp 	 ((npages) <= 2 + MAXL1PAGES) ? 1 : 2)
40361da177e4SLinus Torvalds 
dbMapFileSizeToMapSize(struct inode * ipbmap)40371da177e4SLinus Torvalds s64 dbMapFileSizeToMapSize(struct inode * ipbmap)
40381da177e4SLinus Torvalds {
40391da177e4SLinus Torvalds 	struct super_block *sb = ipbmap->i_sb;
40401da177e4SLinus Torvalds 	s64 nblocks;
40411da177e4SLinus Torvalds 	s64 npages, ndmaps;
40421da177e4SLinus Torvalds 	int level, i;
40431da177e4SLinus Torvalds 	int complete, factor;
40441da177e4SLinus Torvalds 
40451da177e4SLinus Torvalds 	nblocks = ipbmap->i_size >> JFS_SBI(sb)->l2bsize;
40461da177e4SLinus Torvalds 	npages = nblocks >> JFS_SBI(sb)->l2nbperpage;
40471da177e4SLinus Torvalds 	level = BMAPPGTOLEV(npages);
40481da177e4SLinus Torvalds 
40491da177e4SLinus Torvalds 	/* At each level, accumulate the number of dmap pages covered by
40501da177e4SLinus Torvalds 	 * the number of full child levels below it;
40511da177e4SLinus Torvalds 	 * repeat for the last incomplete child level.
40521da177e4SLinus Torvalds 	 */
40531da177e4SLinus Torvalds 	ndmaps = 0;
40541da177e4SLinus Torvalds 	npages--;		/* skip the first global control page */
40551da177e4SLinus Torvalds 	/* skip higher level control pages above top level covered by map */
40561da177e4SLinus Torvalds 	npages -= (2 - level);
40571da177e4SLinus Torvalds 	npages--;		/* skip top level's control page */
40581da177e4SLinus Torvalds 	for (i = level; i >= 0; i--) {
40591da177e4SLinus Torvalds 		factor =
40601da177e4SLinus Torvalds 		    (i == 2) ? MAXL1PAGES : ((i == 1) ? MAXL0PAGES : 1);
40611da177e4SLinus Torvalds 		complete = (u32) npages / factor;
4062f720e3baSDave Kleikamp 		ndmaps += complete * ((i == 2) ? LPERCTL * LPERCTL :
4063f720e3baSDave Kleikamp 				      ((i == 1) ? LPERCTL : 1));
40641da177e4SLinus Torvalds 
40651da177e4SLinus Torvalds 		/* pages in last/incomplete child */
40661da177e4SLinus Torvalds 		npages = (u32) npages % factor;
40671da177e4SLinus Torvalds 		/* skip incomplete child's level control page */
40681da177e4SLinus Torvalds 		npages--;
40691da177e4SLinus Torvalds 	}
40701da177e4SLinus Torvalds 
40711da177e4SLinus Torvalds 	/* convert the number of dmaps into the number of blocks
40721da177e4SLinus Torvalds 	 * which can be covered by the dmaps;
40731da177e4SLinus Torvalds 	 */
40741da177e4SLinus Torvalds 	nblocks = ndmaps << L2BPERDMAP;
40751da177e4SLinus Torvalds 
40761da177e4SLinus Torvalds 	return (nblocks);
40771da177e4SLinus Torvalds }
4078