xref: /openbmc/linux/fs/jfs/jfs_extent.c (revision 367b8112)
1 /*
2  *   Copyright (C) International Business Machines Corp., 2000-2004
3  *
4  *   This program is free software;  you can redistribute it and/or modify
5  *   it under the terms of the GNU General Public License as published by
6  *   the Free Software Foundation; either version 2 of the License, or
7  *   (at your option) any later version.
8  *
9  *   This program is distributed in the hope that it will be useful,
10  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
12  *   the GNU General Public License for more details.
13  *
14  *   You should have received a copy of the GNU General Public License
15  *   along with this program;  if not, write to the Free Software
16  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
17  */
18 
19 #include <linux/fs.h>
20 #include <linux/quotaops.h>
21 #include "jfs_incore.h"
22 #include "jfs_inode.h"
23 #include "jfs_superblock.h"
24 #include "jfs_dmap.h"
25 #include "jfs_extent.h"
26 #include "jfs_debug.h"
27 
28 /*
29  * forward references
30  */
31 static int extBalloc(struct inode *, s64, s64 *, s64 *);
32 #ifdef _NOTYET
33 static int extBrealloc(struct inode *, s64, s64, s64 *, s64 *);
34 #endif
35 static s64 extRoundDown(s64 nb);
36 
37 #define DPD(a)		(printk("(a): %d\n",(a)))
38 #define DPC(a)		(printk("(a): %c\n",(a)))
39 #define DPL1(a)					\
40 {						\
41 	if ((a) >> 32)				\
42 		printk("(a): %x%08x  ",(a));	\
43 	else					\
44 		printk("(a): %x  ",(a) << 32);	\
45 }
46 #define DPL(a)					\
47 {						\
48 	if ((a) >> 32)				\
49 		printk("(a): %x%08x\n",(a));	\
50 	else					\
51 		printk("(a): %x\n",(a) << 32);	\
52 }
53 
54 #define DPD1(a)		(printk("(a): %d  ",(a)))
55 #define DPX(a)		(printk("(a): %08x\n",(a)))
56 #define DPX1(a)		(printk("(a): %08x  ",(a)))
57 #define DPS(a)		(printk("%s\n",(a)))
58 #define DPE(a)		(printk("\nENTERING: %s\n",(a)))
59 #define DPE1(a)		(printk("\nENTERING: %s",(a)))
60 #define DPS1(a)		(printk("  %s  ",(a)))
61 
62 
63 /*
64  * NAME:	extAlloc()
65  *
66  * FUNCTION:	allocate an extent for a specified page range within a
67  *		file.
68  *
69  * PARAMETERS:
70  *	ip	- the inode of the file.
71  *	xlen	- requested extent length.
72  *	pno	- the starting page number with the file.
73  *	xp	- pointer to an xad.  on entry, xad describes an
74  *		  extent that is used as an allocation hint if the
75  *		  xaddr of the xad is non-zero.  on successful exit,
76  *		  the xad describes the newly allocated extent.
77  *	abnr	- bool indicating whether the newly allocated extent
78  *		  should be marked as allocated but not recorded.
79  *
80  * RETURN VALUES:
81  *	0	- success
82  *	-EIO	- i/o error.
83  *	-ENOSPC	- insufficient disk resources.
84  */
85 int
86 extAlloc(struct inode *ip, s64 xlen, s64 pno, xad_t * xp, bool abnr)
87 {
88 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
89 	s64 nxlen, nxaddr, xoff, hint, xaddr = 0;
90 	int rc;
91 	int xflag;
92 
93 	/* This blocks if we are low on resources */
94 	txBeginAnon(ip->i_sb);
95 
96 	/* Avoid race with jfs_commit_inode() */
97 	mutex_lock(&JFS_IP(ip)->commit_mutex);
98 
99 	/* validate extent length */
100 	if (xlen > MAXXLEN)
101 		xlen = MAXXLEN;
102 
103 	/* get the page's starting extent offset */
104 	xoff = pno << sbi->l2nbperpage;
105 
106 	/* check if an allocation hint was provided */
107 	if ((hint = addressXAD(xp))) {
108 		/* get the size of the extent described by the hint */
109 		nxlen = lengthXAD(xp);
110 
111 		/* check if the hint is for the portion of the file
112 		 * immediately previous to the current allocation
113 		 * request and if hint extent has the same abnr
114 		 * value as the current request.  if so, we can
115 		 * extend the hint extent to include the current
116 		 * extent if we can allocate the blocks immediately
117 		 * following the hint extent.
118 		 */
119 		if (offsetXAD(xp) + nxlen == xoff &&
120 		    abnr == ((xp->flag & XAD_NOTRECORDED) ? true : false))
121 			xaddr = hint + nxlen;
122 
123 		/* adjust the hint to the last block of the extent */
124 		hint += (nxlen - 1);
125 	}
126 
127 	/* allocate the disk blocks for the extent.  initially, extBalloc()
128 	 * will try to allocate disk blocks for the requested size (xlen).
129 	 * if this fails (xlen contiguous free blocks not avaliable), it'll
130 	 * try to allocate a smaller number of blocks (producing a smaller
131 	 * extent), with this smaller number of blocks consisting of the
132 	 * requested number of blocks rounded down to the next smaller
133 	 * power of 2 number (i.e. 16 -> 8).  it'll continue to round down
134 	 * and retry the allocation until the number of blocks to allocate
135 	 * is smaller than the number of blocks per page.
136 	 */
137 	nxlen = xlen;
138 	if ((rc = extBalloc(ip, hint ? hint : INOHINT(ip), &nxlen, &nxaddr))) {
139 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
140 		return (rc);
141 	}
142 
143 	/* Allocate blocks to quota. */
144 	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
145 		dbFree(ip, nxaddr, (s64) nxlen);
146 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
147 		return -EDQUOT;
148 	}
149 
150 	/* determine the value of the extent flag */
151 	xflag = abnr ? XAD_NOTRECORDED : 0;
152 
153 	/* if we can extend the hint extent to cover the current request,
154 	 * extend it.  otherwise, insert a new extent to
155 	 * cover the current request.
156 	 */
157 	if (xaddr && xaddr == nxaddr)
158 		rc = xtExtend(0, ip, xoff, (int) nxlen, 0);
159 	else
160 		rc = xtInsert(0, ip, xflag, xoff, (int) nxlen, &nxaddr, 0);
161 
162 	/* if the extend or insert failed,
163 	 * free the newly allocated blocks and return the error.
164 	 */
165 	if (rc) {
166 		dbFree(ip, nxaddr, nxlen);
167 		DQUOT_FREE_BLOCK(ip, nxlen);
168 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
169 		return (rc);
170 	}
171 
172 	/* set the results of the extent allocation */
173 	XADaddress(xp, nxaddr);
174 	XADlength(xp, nxlen);
175 	XADoffset(xp, xoff);
176 	xp->flag = xflag;
177 
178 	mark_inode_dirty(ip);
179 
180 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
181 	/*
182 	 * COMMIT_SyncList flags an anonymous tlock on page that is on
183 	 * sync list.
184 	 * We need to commit the inode to get the page written disk.
185 	 */
186 	if (test_and_clear_cflag(COMMIT_Synclist,ip))
187 		jfs_commit_inode(ip, 0);
188 
189 	return (0);
190 }
191 
192 
193 #ifdef _NOTYET
194 /*
195  * NAME:	extRealloc()
196  *
197  * FUNCTION:	extend the allocation of a file extent containing a
198  *		partial back last page.
199  *
200  * PARAMETERS:
201  *	ip	- the inode of the file.
202  *	cp	- cbuf for the partial backed last page.
203  *	xlen	- request size of the resulting extent.
204  *	xp	- pointer to an xad. on successful exit, the xad
205  *		  describes the newly allocated extent.
206  *	abnr	- bool indicating whether the newly allocated extent
207  *		  should be marked as allocated but not recorded.
208  *
209  * RETURN VALUES:
210  *	0	- success
211  *	-EIO	- i/o error.
212  *	-ENOSPC	- insufficient disk resources.
213  */
214 int extRealloc(struct inode *ip, s64 nxlen, xad_t * xp, bool abnr)
215 {
216 	struct super_block *sb = ip->i_sb;
217 	s64 xaddr, xlen, nxaddr, delta, xoff;
218 	s64 ntail, nextend, ninsert;
219 	int rc, nbperpage = JFS_SBI(sb)->nbperpage;
220 	int xflag;
221 
222 	/* This blocks if we are low on resources */
223 	txBeginAnon(ip->i_sb);
224 
225 	mutex_lock(&JFS_IP(ip)->commit_mutex);
226 	/* validate extent length */
227 	if (nxlen > MAXXLEN)
228 		nxlen = MAXXLEN;
229 
230 	/* get the extend (partial) page's disk block address and
231 	 * number of blocks.
232 	 */
233 	xaddr = addressXAD(xp);
234 	xlen = lengthXAD(xp);
235 	xoff = offsetXAD(xp);
236 
237 	/* if the extend page is abnr and if the request is for
238 	 * the extent to be allocated and recorded,
239 	 * make the page allocated and recorded.
240 	 */
241 	if ((xp->flag & XAD_NOTRECORDED) && !abnr) {
242 		xp->flag = 0;
243 		if ((rc = xtUpdate(0, ip, xp)))
244 			goto exit;
245 	}
246 
247 	/* try to allocated the request number of blocks for the
248 	 * extent.  dbRealloc() first tries to satisfy the request
249 	 * by extending the allocation in place. otherwise, it will
250 	 * try to allocate a new set of blocks large enough for the
251 	 * request.  in satisfying a request, dbReAlloc() may allocate
252 	 * less than what was request but will always allocate enough
253 	 * space as to satisfy the extend page.
254 	 */
255 	if ((rc = extBrealloc(ip, xaddr, xlen, &nxlen, &nxaddr)))
256 		goto exit;
257 
258 	/* Allocat blocks to quota. */
259 	if (DQUOT_ALLOC_BLOCK(ip, nxlen)) {
260 		dbFree(ip, nxaddr, (s64) nxlen);
261 		mutex_unlock(&JFS_IP(ip)->commit_mutex);
262 		return -EDQUOT;
263 	}
264 
265 	delta = nxlen - xlen;
266 
267 	/* check if the extend page is not abnr but the request is abnr
268 	 * and the allocated disk space is for more than one page.  if this
269 	 * is the case, there is a miss match of abnr between the extend page
270 	 * and the one or more pages following the extend page.  as a result,
271 	 * two extents will have to be manipulated. the first will be that
272 	 * of the extent of the extend page and will be manipulated thru
273 	 * an xtExtend() or an xtTailgate(), depending upon whether the
274 	 * disk allocation occurred as an inplace extension.  the second
275 	 * extent will be manipulated (created) through an xtInsert() and
276 	 * will be for the pages following the extend page.
277 	 */
278 	if (abnr && (!(xp->flag & XAD_NOTRECORDED)) && (nxlen > nbperpage)) {
279 		ntail = nbperpage;
280 		nextend = ntail - xlen;
281 		ninsert = nxlen - nbperpage;
282 
283 		xflag = XAD_NOTRECORDED;
284 	} else {
285 		ntail = nxlen;
286 		nextend = delta;
287 		ninsert = 0;
288 
289 		xflag = xp->flag;
290 	}
291 
292 	/* if we were able to extend the disk allocation in place,
293 	 * extend the extent.  otherwise, move the extent to a
294 	 * new disk location.
295 	 */
296 	if (xaddr == nxaddr) {
297 		/* extend the extent */
298 		if ((rc = xtExtend(0, ip, xoff + xlen, (int) nextend, 0))) {
299 			dbFree(ip, xaddr + xlen, delta);
300 			DQUOT_FREE_BLOCK(ip, nxlen);
301 			goto exit;
302 		}
303 	} else {
304 		/*
305 		 * move the extent to a new location:
306 		 *
307 		 * xtTailgate() accounts for relocated tail extent;
308 		 */
309 		if ((rc = xtTailgate(0, ip, xoff, (int) ntail, nxaddr, 0))) {
310 			dbFree(ip, nxaddr, nxlen);
311 			DQUOT_FREE_BLOCK(ip, nxlen);
312 			goto exit;
313 		}
314 	}
315 
316 
317 	/* check if we need to also insert a new extent */
318 	if (ninsert) {
319 		/* perform the insert.  if it fails, free the blocks
320 		 * to be inserted and make it appear that we only did
321 		 * the xtExtend() or xtTailgate() above.
322 		 */
323 		xaddr = nxaddr + ntail;
324 		if (xtInsert (0, ip, xflag, xoff + ntail, (int) ninsert,
325 			      &xaddr, 0)) {
326 			dbFree(ip, xaddr, (s64) ninsert);
327 			delta = nextend;
328 			nxlen = ntail;
329 			xflag = 0;
330 		}
331 	}
332 
333 	/* set the return results */
334 	XADaddress(xp, nxaddr);
335 	XADlength(xp, nxlen);
336 	XADoffset(xp, xoff);
337 	xp->flag = xflag;
338 
339 	mark_inode_dirty(ip);
340 exit:
341 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
342 	return (rc);
343 }
344 #endif			/* _NOTYET */
345 
346 
347 /*
348  * NAME:	extHint()
349  *
350  * FUNCTION:	produce an extent allocation hint for a file offset.
351  *
352  * PARAMETERS:
353  *	ip	- the inode of the file.
354  *	offset  - file offset for which the hint is needed.
355  *	xp	- pointer to the xad that is to be filled in with
356  *		  the hint.
357  *
358  * RETURN VALUES:
359  *	0	- success
360  *	-EIO	- i/o error.
361  */
362 int extHint(struct inode *ip, s64 offset, xad_t * xp)
363 {
364 	struct super_block *sb = ip->i_sb;
365 	struct xadlist xadl;
366 	struct lxdlist lxdl;
367 	lxd_t lxd;
368 	s64 prev;
369 	int rc, nbperpage = JFS_SBI(sb)->nbperpage;
370 
371 	/* init the hint as "no hint provided" */
372 	XADaddress(xp, 0);
373 
374 	/* determine the starting extent offset of the page previous
375 	 * to the page containing the offset.
376 	 */
377 	prev = ((offset & ~POFFSET) >> JFS_SBI(sb)->l2bsize) - nbperpage;
378 
379 	/* if the offsets in the first page of the file,
380 	 * no hint provided.
381 	 */
382 	if (prev < 0)
383 		return (0);
384 
385 	/* prepare to lookup the previous page's extent info */
386 	lxdl.maxnlxd = 1;
387 	lxdl.nlxd = 1;
388 	lxdl.lxd = &lxd;
389 	LXDoffset(&lxd, prev)
390 	LXDlength(&lxd, nbperpage);
391 
392 	xadl.maxnxad = 1;
393 	xadl.nxad = 0;
394 	xadl.xad = xp;
395 
396 	/* perform the lookup */
397 	if ((rc = xtLookupList(ip, &lxdl, &xadl, 0)))
398 		return (rc);
399 
400 	/* check if no extent exists for the previous page.
401 	 * this is possible for sparse files.
402 	 */
403 	if (xadl.nxad == 0) {
404 //		assert(ISSPARSE(ip));
405 		return (0);
406 	}
407 
408 	/* only preserve the abnr flag within the xad flags
409 	 * of the returned hint.
410 	 */
411 	xp->flag &= XAD_NOTRECORDED;
412 
413 	if(xadl.nxad != 1 || lengthXAD(xp) != nbperpage) {
414 		jfs_error(ip->i_sb, "extHint: corrupt xtree");
415 		return -EIO;
416 	}
417 
418 	return (0);
419 }
420 
421 
422 /*
423  * NAME:	extRecord()
424  *
425  * FUNCTION:	change a page with a file from not recorded to recorded.
426  *
427  * PARAMETERS:
428  *	ip	- inode of the file.
429  *	cp	- cbuf of the file page.
430  *
431  * RETURN VALUES:
432  *	0	- success
433  *	-EIO	- i/o error.
434  *	-ENOSPC	- insufficient disk resources.
435  */
436 int extRecord(struct inode *ip, xad_t * xp)
437 {
438 	int rc;
439 
440 	txBeginAnon(ip->i_sb);
441 
442 	mutex_lock(&JFS_IP(ip)->commit_mutex);
443 
444 	/* update the extent */
445 	rc = xtUpdate(0, ip, xp);
446 
447 	mutex_unlock(&JFS_IP(ip)->commit_mutex);
448 	return rc;
449 }
450 
451 
452 #ifdef _NOTYET
453 /*
454  * NAME:	extFill()
455  *
456  * FUNCTION:	allocate disk space for a file page that represents
457  *		a file hole.
458  *
459  * PARAMETERS:
460  *	ip	- the inode of the file.
461  *	cp	- cbuf of the file page represent the hole.
462  *
463  * RETURN VALUES:
464  *	0	- success
465  *	-EIO	- i/o error.
466  *	-ENOSPC	- insufficient disk resources.
467  */
468 int extFill(struct inode *ip, xad_t * xp)
469 {
470 	int rc, nbperpage = JFS_SBI(ip->i_sb)->nbperpage;
471 	s64 blkno = offsetXAD(xp) >> ip->i_blkbits;
472 
473 //	assert(ISSPARSE(ip));
474 
475 	/* initialize the extent allocation hint */
476 	XADaddress(xp, 0);
477 
478 	/* allocate an extent to fill the hole */
479 	if ((rc = extAlloc(ip, nbperpage, blkno, xp, false)))
480 		return (rc);
481 
482 	assert(lengthPXD(xp) == nbperpage);
483 
484 	return (0);
485 }
486 #endif			/* _NOTYET */
487 
488 
489 /*
490  * NAME:	extBalloc()
491  *
492  * FUNCTION:	allocate disk blocks to form an extent.
493  *
494  *		initially, we will try to allocate disk blocks for the
495  *		requested size (nblocks).  if this fails (nblocks
496  *		contiguous free blocks not avaliable), we'll try to allocate
497  *		a smaller number of blocks (producing a smaller extent), with
498  *		this smaller number of blocks consisting of the requested
499  *		number of blocks rounded down to the next smaller power of 2
500  *		number (i.e. 16 -> 8).  we'll continue to round down and
501  *		retry the allocation until the number of blocks to allocate
502  *		is smaller than the number of blocks per page.
503  *
504  * PARAMETERS:
505  *	ip	 - the inode of the file.
506  *	hint	 - disk block number to be used as an allocation hint.
507  *	*nblocks - pointer to an s64 value.  on entry, this value specifies
508  *		   the desired number of block to be allocated. on successful
509  *		   exit, this value is set to the number of blocks actually
510  *		   allocated.
511  *	blkno	 - pointer to a block address that is filled in on successful
512  *		   return with the starting block number of the newly
513  *		   allocated block range.
514  *
515  * RETURN VALUES:
516  *	0	- success
517  *	-EIO	- i/o error.
518  *	-ENOSPC	- insufficient disk resources.
519  */
520 static int
521 extBalloc(struct inode *ip, s64 hint, s64 * nblocks, s64 * blkno)
522 {
523 	struct jfs_inode_info *ji = JFS_IP(ip);
524 	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
525 	s64 nb, nblks, daddr, max;
526 	int rc, nbperpage = sbi->nbperpage;
527 	struct bmap *bmp = sbi->bmap;
528 	int ag;
529 
530 	/* get the number of blocks to initially attempt to allocate.
531 	 * we'll first try the number of blocks requested unless this
532 	 * number is greater than the maximum number of contiguous free
533 	 * blocks in the map. in that case, we'll start off with the
534 	 * maximum free.
535 	 */
536 	max = (s64) 1 << bmp->db_maxfreebud;
537 	if (*nblocks >= max && *nblocks > nbperpage)
538 		nb = nblks = (max > nbperpage) ? max : nbperpage;
539 	else
540 		nb = nblks = *nblocks;
541 
542 	/* try to allocate blocks */
543 	while ((rc = dbAlloc(ip, hint, nb, &daddr)) != 0) {
544 		/* if something other than an out of space error,
545 		 * stop and return this error.
546 		 */
547 		if (rc != -ENOSPC)
548 			return (rc);
549 
550 		/* decrease the allocation request size */
551 		nb = min(nblks, extRoundDown(nb));
552 
553 		/* give up if we cannot cover a page */
554 		if (nb < nbperpage)
555 			return (rc);
556 	}
557 
558 	*nblocks = nb;
559 	*blkno = daddr;
560 
561 	if (S_ISREG(ip->i_mode) && (ji->fileset == FILESYSTEM_I)) {
562 		ag = BLKTOAG(daddr, sbi);
563 		spin_lock_irq(&ji->ag_lock);
564 		if (ji->active_ag == -1) {
565 			atomic_inc(&bmp->db_active[ag]);
566 			ji->active_ag = ag;
567 		} else if (ji->active_ag != ag) {
568 			atomic_dec(&bmp->db_active[ji->active_ag]);
569 			atomic_inc(&bmp->db_active[ag]);
570 			ji->active_ag = ag;
571 		}
572 		spin_unlock_irq(&ji->ag_lock);
573 	}
574 
575 	return (0);
576 }
577 
578 
579 #ifdef _NOTYET
580 /*
581  * NAME:	extBrealloc()
582  *
583  * FUNCTION:	attempt to extend an extent's allocation.
584  *
585  *		Initially, we will try to extend the extent's allocation
586  *		in place.  If this fails, we'll try to move the extent
587  *		to a new set of blocks.  If moving the extent, we initially
588  *		will try to allocate disk blocks for the requested size
589  *		(newnblks).  if this fails (new contiguous free blocks not
590  *		avaliable), we'll try to allocate a smaller number of
591  *		blocks (producing a smaller extent), with this smaller
592  *		number of blocks consisting of the requested number of
593  *		blocks rounded down to the next smaller power of 2
594  *		number (i.e. 16 -> 8).  We'll continue to round down and
595  *		retry the allocation until the number of blocks to allocate
596  *		is smaller than the number of blocks per page.
597  *
598  * PARAMETERS:
599  *	ip	 - the inode of the file.
600  *	blkno	 - starting block number of the extents current allocation.
601  *	nblks	 - number of blocks within the extents current allocation.
602  *	newnblks - pointer to a s64 value.  on entry, this value is the
603  *		   the new desired extent size (number of blocks).  on
604  *		   successful exit, this value is set to the extent's actual
605  *		   new size (new number of blocks).
606  *	newblkno - the starting block number of the extents new allocation.
607  *
608  * RETURN VALUES:
609  *	0	- success
610  *	-EIO	- i/o error.
611  *	-ENOSPC	- insufficient disk resources.
612  */
613 static int
614 extBrealloc(struct inode *ip,
615 	    s64 blkno, s64 nblks, s64 * newnblks, s64 * newblkno)
616 {
617 	int rc;
618 
619 	/* try to extend in place */
620 	if ((rc = dbExtend(ip, blkno, nblks, *newnblks - nblks)) == 0) {
621 		*newblkno = blkno;
622 		return (0);
623 	} else {
624 		if (rc != -ENOSPC)
625 			return (rc);
626 	}
627 
628 	/* in place extension not possible.
629 	 * try to move the extent to a new set of blocks.
630 	 */
631 	return (extBalloc(ip, blkno, newnblks, newblkno));
632 }
633 #endif			/* _NOTYET */
634 
635 
636 /*
637  * NAME:	extRoundDown()
638  *
639  * FUNCTION:	round down a specified number of blocks to the next
640  *		smallest power of 2 number.
641  *
642  * PARAMETERS:
643  *	nb	- the inode of the file.
644  *
645  * RETURN VALUES:
646  *	next smallest power of 2 number.
647  */
648 static s64 extRoundDown(s64 nb)
649 {
650 	int i;
651 	u64 m, k;
652 
653 	for (i = 0, m = (u64) 1 << 63; i < 64; i++, m >>= 1) {
654 		if (m & nb)
655 			break;
656 	}
657 
658 	i = 63 - i;
659 	k = (u64) 1 << i;
660 	k = ((k - 1) & nb) ? k : k >> 1;
661 
662 	return (k);
663 }
664