xref: /openbmc/linux/fs/xfs/xfs_fsops.c (revision 293d5b43)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_sb.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_inode.h"
30 #include "xfs_trans.h"
31 #include "xfs_inode_item.h"
32 #include "xfs_error.h"
33 #include "xfs_btree.h"
34 #include "xfs_alloc_btree.h"
35 #include "xfs_alloc.h"
36 #include "xfs_rmap_btree.h"
37 #include "xfs_ialloc.h"
38 #include "xfs_fsops.h"
39 #include "xfs_itable.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_trace.h"
43 #include "xfs_log.h"
44 #include "xfs_filestream.h"
45 #include "xfs_rmap.h"
46 
47 /*
48  * File system operations
49  */
50 
51 int
52 xfs_fs_geometry(
53 	xfs_mount_t		*mp,
54 	xfs_fsop_geom_t		*geo,
55 	int			new_version)
56 {
57 
58 	memset(geo, 0, sizeof(*geo));
59 
60 	geo->blocksize = mp->m_sb.sb_blocksize;
61 	geo->rtextsize = mp->m_sb.sb_rextsize;
62 	geo->agblocks = mp->m_sb.sb_agblocks;
63 	geo->agcount = mp->m_sb.sb_agcount;
64 	geo->logblocks = mp->m_sb.sb_logblocks;
65 	geo->sectsize = mp->m_sb.sb_sectsize;
66 	geo->inodesize = mp->m_sb.sb_inodesize;
67 	geo->imaxpct = mp->m_sb.sb_imax_pct;
68 	geo->datablocks = mp->m_sb.sb_dblocks;
69 	geo->rtblocks = mp->m_sb.sb_rblocks;
70 	geo->rtextents = mp->m_sb.sb_rextents;
71 	geo->logstart = mp->m_sb.sb_logstart;
72 	ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid));
73 	memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid));
74 	if (new_version >= 2) {
75 		geo->sunit = mp->m_sb.sb_unit;
76 		geo->swidth = mp->m_sb.sb_width;
77 	}
78 	if (new_version >= 3) {
79 		geo->version = XFS_FSOP_GEOM_VERSION;
80 		geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
81 			     XFS_FSOP_GEOM_FLAGS_DIRV2 |
82 			(xfs_sb_version_hasattr(&mp->m_sb) ?
83 				XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
84 			(xfs_sb_version_hasquota(&mp->m_sb) ?
85 				XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
86 			(xfs_sb_version_hasalign(&mp->m_sb) ?
87 				XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
88 			(xfs_sb_version_hasdalign(&mp->m_sb) ?
89 				XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
90 			(xfs_sb_version_hasextflgbit(&mp->m_sb) ?
91 				XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
92 			(xfs_sb_version_hassector(&mp->m_sb) ?
93 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
94 			(xfs_sb_version_hasasciici(&mp->m_sb) ?
95 				XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
96 			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
97 				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
98 			(xfs_sb_version_hasattr2(&mp->m_sb) ?
99 				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
100 			(xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
101 				XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
102 			(xfs_sb_version_hascrc(&mp->m_sb) ?
103 				XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
104 			(xfs_sb_version_hasftype(&mp->m_sb) ?
105 				XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
106 			(xfs_sb_version_hasfinobt(&mp->m_sb) ?
107 				XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
108 			(xfs_sb_version_hassparseinodes(&mp->m_sb) ?
109 				XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
110 			(xfs_sb_version_hasrmapbt(&mp->m_sb) ?
111 				XFS_FSOP_GEOM_FLAGS_RMAPBT : 0);
112 		geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
113 				mp->m_sb.sb_logsectsize : BBSIZE;
114 		geo->rtsectsize = mp->m_sb.sb_blocksize;
115 		geo->dirblocksize = mp->m_dir_geo->blksize;
116 	}
117 	if (new_version >= 4) {
118 		geo->flags |=
119 			(xfs_sb_version_haslogv2(&mp->m_sb) ?
120 				XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
121 		geo->logsunit = mp->m_sb.sb_logsunit;
122 	}
123 	return 0;
124 }
125 
126 static struct xfs_buf *
127 xfs_growfs_get_hdr_buf(
128 	struct xfs_mount	*mp,
129 	xfs_daddr_t		blkno,
130 	size_t			numblks,
131 	int			flags,
132 	const struct xfs_buf_ops *ops)
133 {
134 	struct xfs_buf		*bp;
135 
136 	bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
137 	if (!bp)
138 		return NULL;
139 
140 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
141 	bp->b_bn = blkno;
142 	bp->b_maps[0].bm_bn = blkno;
143 	bp->b_ops = ops;
144 
145 	return bp;
146 }
147 
148 static int
149 xfs_growfs_data_private(
150 	xfs_mount_t		*mp,		/* mount point for filesystem */
151 	xfs_growfs_data_t	*in)		/* growfs data input struct */
152 {
153 	xfs_agf_t		*agf;
154 	struct xfs_agfl		*agfl;
155 	xfs_agi_t		*agi;
156 	xfs_agnumber_t		agno;
157 	xfs_extlen_t		agsize;
158 	xfs_extlen_t		tmpsize;
159 	xfs_alloc_rec_t		*arec;
160 	xfs_buf_t		*bp;
161 	int			bucket;
162 	int			dpct;
163 	int			error, saved_error = 0;
164 	xfs_agnumber_t		nagcount;
165 	xfs_agnumber_t		nagimax = 0;
166 	xfs_rfsblock_t		nb, nb_mod;
167 	xfs_rfsblock_t		new;
168 	xfs_rfsblock_t		nfree;
169 	xfs_agnumber_t		oagcount;
170 	int			pct;
171 	xfs_trans_t		*tp;
172 
173 	nb = in->newblocks;
174 	pct = in->imaxpct;
175 	if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
176 		return -EINVAL;
177 	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
178 		return error;
179 	dpct = pct - mp->m_sb.sb_imax_pct;
180 	error = xfs_buf_read_uncached(mp->m_ddev_targp,
181 				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
182 				XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
183 	if (error)
184 		return error;
185 	xfs_buf_relse(bp);
186 
187 	new = nb;	/* use new as a temporary here */
188 	nb_mod = do_div(new, mp->m_sb.sb_agblocks);
189 	nagcount = new + (nb_mod != 0);
190 	if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
191 		nagcount--;
192 		nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
193 		if (nb < mp->m_sb.sb_dblocks)
194 			return -EINVAL;
195 	}
196 	new = nb - mp->m_sb.sb_dblocks;
197 	oagcount = mp->m_sb.sb_agcount;
198 
199 	/* allocate the new per-ag structures */
200 	if (nagcount > oagcount) {
201 		error = xfs_initialize_perag(mp, nagcount, &nagimax);
202 		if (error)
203 			return error;
204 	}
205 
206 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
207 			XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
208 	if (error)
209 		return error;
210 
211 	/*
212 	 * Write new AG headers to disk. Non-transactional, but written
213 	 * synchronously so they are completed prior to the growfs transaction
214 	 * being logged.
215 	 */
216 	nfree = 0;
217 	for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
218 		__be32	*agfl_bno;
219 
220 		/*
221 		 * AG freespace header block
222 		 */
223 		bp = xfs_growfs_get_hdr_buf(mp,
224 				XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
225 				XFS_FSS_TO_BB(mp, 1), 0,
226 				&xfs_agf_buf_ops);
227 		if (!bp) {
228 			error = -ENOMEM;
229 			goto error0;
230 		}
231 
232 		agf = XFS_BUF_TO_AGF(bp);
233 		agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
234 		agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
235 		agf->agf_seqno = cpu_to_be32(agno);
236 		if (agno == nagcount - 1)
237 			agsize =
238 				nb -
239 				(agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
240 		else
241 			agsize = mp->m_sb.sb_agblocks;
242 		agf->agf_length = cpu_to_be32(agsize);
243 		agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
244 		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
245 		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
246 		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
247 		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
248 			agf->agf_roots[XFS_BTNUM_RMAPi] =
249 						cpu_to_be32(XFS_RMAP_BLOCK(mp));
250 			agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
251 		}
252 
253 		agf->agf_flfirst = cpu_to_be32(1);
254 		agf->agf_fllast = 0;
255 		agf->agf_flcount = 0;
256 		tmpsize = agsize - mp->m_ag_prealloc_blocks;
257 		agf->agf_freeblks = cpu_to_be32(tmpsize);
258 		agf->agf_longest = cpu_to_be32(tmpsize);
259 		if (xfs_sb_version_hascrc(&mp->m_sb))
260 			uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
261 
262 		error = xfs_bwrite(bp);
263 		xfs_buf_relse(bp);
264 		if (error)
265 			goto error0;
266 
267 		/*
268 		 * AG freelist header block
269 		 */
270 		bp = xfs_growfs_get_hdr_buf(mp,
271 				XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
272 				XFS_FSS_TO_BB(mp, 1), 0,
273 				&xfs_agfl_buf_ops);
274 		if (!bp) {
275 			error = -ENOMEM;
276 			goto error0;
277 		}
278 
279 		agfl = XFS_BUF_TO_AGFL(bp);
280 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
281 			agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
282 			agfl->agfl_seqno = cpu_to_be32(agno);
283 			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
284 		}
285 
286 		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
287 		for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
288 			agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
289 
290 		error = xfs_bwrite(bp);
291 		xfs_buf_relse(bp);
292 		if (error)
293 			goto error0;
294 
295 		/*
296 		 * AG inode header block
297 		 */
298 		bp = xfs_growfs_get_hdr_buf(mp,
299 				XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
300 				XFS_FSS_TO_BB(mp, 1), 0,
301 				&xfs_agi_buf_ops);
302 		if (!bp) {
303 			error = -ENOMEM;
304 			goto error0;
305 		}
306 
307 		agi = XFS_BUF_TO_AGI(bp);
308 		agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
309 		agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
310 		agi->agi_seqno = cpu_to_be32(agno);
311 		agi->agi_length = cpu_to_be32(agsize);
312 		agi->agi_count = 0;
313 		agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
314 		agi->agi_level = cpu_to_be32(1);
315 		agi->agi_freecount = 0;
316 		agi->agi_newino = cpu_to_be32(NULLAGINO);
317 		agi->agi_dirino = cpu_to_be32(NULLAGINO);
318 		if (xfs_sb_version_hascrc(&mp->m_sb))
319 			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
320 		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
321 			agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
322 			agi->agi_free_level = cpu_to_be32(1);
323 		}
324 		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
325 			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
326 
327 		error = xfs_bwrite(bp);
328 		xfs_buf_relse(bp);
329 		if (error)
330 			goto error0;
331 
332 		/*
333 		 * BNO btree root block
334 		 */
335 		bp = xfs_growfs_get_hdr_buf(mp,
336 				XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
337 				BTOBB(mp->m_sb.sb_blocksize), 0,
338 				&xfs_allocbt_buf_ops);
339 
340 		if (!bp) {
341 			error = -ENOMEM;
342 			goto error0;
343 		}
344 
345 		if (xfs_sb_version_hascrc(&mp->m_sb))
346 			xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
347 						agno, XFS_BTREE_CRC_BLOCKS);
348 		else
349 			xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
350 						agno, 0);
351 
352 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
353 		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
354 		arec->ar_blockcount = cpu_to_be32(
355 			agsize - be32_to_cpu(arec->ar_startblock));
356 
357 		error = xfs_bwrite(bp);
358 		xfs_buf_relse(bp);
359 		if (error)
360 			goto error0;
361 
362 		/*
363 		 * CNT btree root block
364 		 */
365 		bp = xfs_growfs_get_hdr_buf(mp,
366 				XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
367 				BTOBB(mp->m_sb.sb_blocksize), 0,
368 				&xfs_allocbt_buf_ops);
369 		if (!bp) {
370 			error = -ENOMEM;
371 			goto error0;
372 		}
373 
374 		if (xfs_sb_version_hascrc(&mp->m_sb))
375 			xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
376 						agno, XFS_BTREE_CRC_BLOCKS);
377 		else
378 			xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
379 						agno, 0);
380 
381 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
382 		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
383 		arec->ar_blockcount = cpu_to_be32(
384 			agsize - be32_to_cpu(arec->ar_startblock));
385 		nfree += be32_to_cpu(arec->ar_blockcount);
386 
387 		error = xfs_bwrite(bp);
388 		xfs_buf_relse(bp);
389 		if (error)
390 			goto error0;
391 
392 		/* RMAP btree root block */
393 		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
394 			struct xfs_rmap_rec	*rrec;
395 			struct xfs_btree_block	*block;
396 
397 			bp = xfs_growfs_get_hdr_buf(mp,
398 				XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
399 				BTOBB(mp->m_sb.sb_blocksize), 0,
400 				&xfs_rmapbt_buf_ops);
401 			if (!bp) {
402 				error = -ENOMEM;
403 				goto error0;
404 			}
405 
406 			xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
407 						agno, XFS_BTREE_CRC_BLOCKS);
408 			block = XFS_BUF_TO_BLOCK(bp);
409 
410 
411 			/*
412 			 * mark the AG header regions as static metadata The BNO
413 			 * btree block is the first block after the headers, so
414 			 * it's location defines the size of region the static
415 			 * metadata consumes.
416 			 *
417 			 * Note: unlike mkfs, we never have to account for log
418 			 * space when growing the data regions
419 			 */
420 			rrec = XFS_RMAP_REC_ADDR(block, 1);
421 			rrec->rm_startblock = 0;
422 			rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
423 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
424 			rrec->rm_offset = 0;
425 			be16_add_cpu(&block->bb_numrecs, 1);
426 
427 			/* account freespace btree root blocks */
428 			rrec = XFS_RMAP_REC_ADDR(block, 2);
429 			rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
430 			rrec->rm_blockcount = cpu_to_be32(2);
431 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
432 			rrec->rm_offset = 0;
433 			be16_add_cpu(&block->bb_numrecs, 1);
434 
435 			/* account inode btree root blocks */
436 			rrec = XFS_RMAP_REC_ADDR(block, 3);
437 			rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
438 			rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
439 							XFS_IBT_BLOCK(mp));
440 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
441 			rrec->rm_offset = 0;
442 			be16_add_cpu(&block->bb_numrecs, 1);
443 
444 			/* account for rmap btree root */
445 			rrec = XFS_RMAP_REC_ADDR(block, 4);
446 			rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
447 			rrec->rm_blockcount = cpu_to_be32(1);
448 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
449 			rrec->rm_offset = 0;
450 			be16_add_cpu(&block->bb_numrecs, 1);
451 
452 			error = xfs_bwrite(bp);
453 			xfs_buf_relse(bp);
454 			if (error)
455 				goto error0;
456 		}
457 
458 		/*
459 		 * INO btree root block
460 		 */
461 		bp = xfs_growfs_get_hdr_buf(mp,
462 				XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
463 				BTOBB(mp->m_sb.sb_blocksize), 0,
464 				&xfs_inobt_buf_ops);
465 		if (!bp) {
466 			error = -ENOMEM;
467 			goto error0;
468 		}
469 
470 		if (xfs_sb_version_hascrc(&mp->m_sb))
471 			xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
472 						agno, XFS_BTREE_CRC_BLOCKS);
473 		else
474 			xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
475 						agno, 0);
476 
477 		error = xfs_bwrite(bp);
478 		xfs_buf_relse(bp);
479 		if (error)
480 			goto error0;
481 
482 		/*
483 		 * FINO btree root block
484 		 */
485 		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
486 			bp = xfs_growfs_get_hdr_buf(mp,
487 				XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
488 				BTOBB(mp->m_sb.sb_blocksize), 0,
489 				&xfs_inobt_buf_ops);
490 			if (!bp) {
491 				error = -ENOMEM;
492 				goto error0;
493 			}
494 
495 			if (xfs_sb_version_hascrc(&mp->m_sb))
496 				xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
497 						     0, 0, agno,
498 						     XFS_BTREE_CRC_BLOCKS);
499 			else
500 				xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
501 						     0, agno, 0);
502 
503 			error = xfs_bwrite(bp);
504 			xfs_buf_relse(bp);
505 			if (error)
506 				goto error0;
507 		}
508 
509 	}
510 	xfs_trans_agblocks_delta(tp, nfree);
511 	/*
512 	 * There are new blocks in the old last a.g.
513 	 */
514 	if (new) {
515 		struct xfs_owner_info	oinfo;
516 
517 		/*
518 		 * Change the agi length.
519 		 */
520 		error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
521 		if (error) {
522 			goto error0;
523 		}
524 		ASSERT(bp);
525 		agi = XFS_BUF_TO_AGI(bp);
526 		be32_add_cpu(&agi->agi_length, new);
527 		ASSERT(nagcount == oagcount ||
528 		       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
529 		xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
530 		/*
531 		 * Change agf length.
532 		 */
533 		error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp);
534 		if (error) {
535 			goto error0;
536 		}
537 		ASSERT(bp);
538 		agf = XFS_BUF_TO_AGF(bp);
539 		be32_add_cpu(&agf->agf_length, new);
540 		ASSERT(be32_to_cpu(agf->agf_length) ==
541 		       be32_to_cpu(agi->agi_length));
542 
543 		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
544 
545 		/*
546 		 * Free the new space.
547 		 *
548 		 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
549 		 * this doesn't actually exist in the rmap btree.
550 		 */
551 		xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
552 		error = xfs_free_extent(tp,
553 				XFS_AGB_TO_FSB(mp, agno,
554 					be32_to_cpu(agf->agf_length) - new),
555 				new, &oinfo);
556 		if (error)
557 			goto error0;
558 	}
559 
560 	/*
561 	 * Update changed superblock fields transactionally. These are not
562 	 * seen by the rest of the world until the transaction commit applies
563 	 * them atomically to the superblock.
564 	 */
565 	if (nagcount > oagcount)
566 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
567 	if (nb > mp->m_sb.sb_dblocks)
568 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
569 				 nb - mp->m_sb.sb_dblocks);
570 	if (nfree)
571 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
572 	if (dpct)
573 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
574 	xfs_trans_set_sync(tp);
575 	error = xfs_trans_commit(tp);
576 	if (error)
577 		return error;
578 
579 	/* New allocation groups fully initialized, so update mount struct */
580 	if (nagimax)
581 		mp->m_maxagi = nagimax;
582 	if (mp->m_sb.sb_imax_pct) {
583 		__uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
584 		do_div(icount, 100);
585 		mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
586 	} else
587 		mp->m_maxicount = 0;
588 	xfs_set_low_space_thresholds(mp);
589 	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
590 
591 	/* update secondary superblocks. */
592 	for (agno = 1; agno < nagcount; agno++) {
593 		error = 0;
594 		/*
595 		 * new secondary superblocks need to be zeroed, not read from
596 		 * disk as the contents of the new area we are growing into is
597 		 * completely unknown.
598 		 */
599 		if (agno < oagcount) {
600 			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
601 				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
602 				  XFS_FSS_TO_BB(mp, 1), 0, &bp,
603 				  &xfs_sb_buf_ops);
604 		} else {
605 			bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
606 				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
607 				  XFS_FSS_TO_BB(mp, 1), 0);
608 			if (bp) {
609 				bp->b_ops = &xfs_sb_buf_ops;
610 				xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
611 			} else
612 				error = -ENOMEM;
613 		}
614 
615 		/*
616 		 * If we get an error reading or writing alternate superblocks,
617 		 * continue.  xfs_repair chooses the "best" superblock based
618 		 * on most matches; if we break early, we'll leave more
619 		 * superblocks un-updated than updated, and xfs_repair may
620 		 * pick them over the properly-updated primary.
621 		 */
622 		if (error) {
623 			xfs_warn(mp,
624 		"error %d reading secondary superblock for ag %d",
625 				error, agno);
626 			saved_error = error;
627 			continue;
628 		}
629 		xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
630 
631 		error = xfs_bwrite(bp);
632 		xfs_buf_relse(bp);
633 		if (error) {
634 			xfs_warn(mp,
635 		"write error %d updating secondary superblock for ag %d",
636 				error, agno);
637 			saved_error = error;
638 			continue;
639 		}
640 	}
641 	return saved_error ? saved_error : error;
642 
643  error0:
644 	xfs_trans_cancel(tp);
645 	return error;
646 }
647 
648 static int
649 xfs_growfs_log_private(
650 	xfs_mount_t		*mp,	/* mount point for filesystem */
651 	xfs_growfs_log_t	*in)	/* growfs log input struct */
652 {
653 	xfs_extlen_t		nb;
654 
655 	nb = in->newblocks;
656 	if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
657 		return -EINVAL;
658 	if (nb == mp->m_sb.sb_logblocks &&
659 	    in->isint == (mp->m_sb.sb_logstart != 0))
660 		return -EINVAL;
661 	/*
662 	 * Moving the log is hard, need new interfaces to sync
663 	 * the log first, hold off all activity while moving it.
664 	 * Can have shorter or longer log in the same space,
665 	 * or transform internal to external log or vice versa.
666 	 */
667 	return -ENOSYS;
668 }
669 
670 /*
671  * protected versions of growfs function acquire and release locks on the mount
672  * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
673  * XFS_IOC_FSGROWFSRT
674  */
675 
676 
677 int
678 xfs_growfs_data(
679 	xfs_mount_t		*mp,
680 	xfs_growfs_data_t	*in)
681 {
682 	int error;
683 
684 	if (!capable(CAP_SYS_ADMIN))
685 		return -EPERM;
686 	if (!mutex_trylock(&mp->m_growlock))
687 		return -EWOULDBLOCK;
688 	error = xfs_growfs_data_private(mp, in);
689 	/*
690 	 * Increment the generation unconditionally, the error could be from
691 	 * updating the secondary superblocks, in which case the new size
692 	 * is live already.
693 	 */
694 	mp->m_generation++;
695 	mutex_unlock(&mp->m_growlock);
696 	return error;
697 }
698 
699 int
700 xfs_growfs_log(
701 	xfs_mount_t		*mp,
702 	xfs_growfs_log_t	*in)
703 {
704 	int error;
705 
706 	if (!capable(CAP_SYS_ADMIN))
707 		return -EPERM;
708 	if (!mutex_trylock(&mp->m_growlock))
709 		return -EWOULDBLOCK;
710 	error = xfs_growfs_log_private(mp, in);
711 	mutex_unlock(&mp->m_growlock);
712 	return error;
713 }
714 
715 /*
716  * exported through ioctl XFS_IOC_FSCOUNTS
717  */
718 
719 int
720 xfs_fs_counts(
721 	xfs_mount_t		*mp,
722 	xfs_fsop_counts_t	*cnt)
723 {
724 	cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
725 	cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
726 	cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
727 						mp->m_alloc_set_aside;
728 
729 	spin_lock(&mp->m_sb_lock);
730 	cnt->freertx = mp->m_sb.sb_frextents;
731 	spin_unlock(&mp->m_sb_lock);
732 	return 0;
733 }
734 
735 /*
736  * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
737  *
738  * xfs_reserve_blocks is called to set m_resblks
739  * in the in-core mount table. The number of unused reserved blocks
740  * is kept in m_resblks_avail.
741  *
742  * Reserve the requested number of blocks if available. Otherwise return
743  * as many as possible to satisfy the request. The actual number
744  * reserved are returned in outval
745  *
746  * A null inval pointer indicates that only the current reserved blocks
747  * available  should  be returned no settings are changed.
748  */
749 
750 int
751 xfs_reserve_blocks(
752 	xfs_mount_t             *mp,
753 	__uint64_t              *inval,
754 	xfs_fsop_resblks_t      *outval)
755 {
756 	__int64_t		lcounter, delta;
757 	__int64_t		fdblks_delta = 0;
758 	__uint64_t		request;
759 	__int64_t		free;
760 	int			error = 0;
761 
762 	/* If inval is null, report current values and return */
763 	if (inval == (__uint64_t *)NULL) {
764 		if (!outval)
765 			return -EINVAL;
766 		outval->resblks = mp->m_resblks;
767 		outval->resblks_avail = mp->m_resblks_avail;
768 		return 0;
769 	}
770 
771 	request = *inval;
772 
773 	/*
774 	 * With per-cpu counters, this becomes an interesting problem. we need
775 	 * to work out if we are freeing or allocation blocks first, then we can
776 	 * do the modification as necessary.
777 	 *
778 	 * We do this under the m_sb_lock so that if we are near ENOSPC, we will
779 	 * hold out any changes while we work out what to do. This means that
780 	 * the amount of free space can change while we do this, so we need to
781 	 * retry if we end up trying to reserve more space than is available.
782 	 */
783 	spin_lock(&mp->m_sb_lock);
784 
785 	/*
786 	 * If our previous reservation was larger than the current value,
787 	 * then move any unused blocks back to the free pool. Modify the resblks
788 	 * counters directly since we shouldn't have any problems unreserving
789 	 * space.
790 	 */
791 	if (mp->m_resblks > request) {
792 		lcounter = mp->m_resblks_avail - request;
793 		if (lcounter  > 0) {		/* release unused blocks */
794 			fdblks_delta = lcounter;
795 			mp->m_resblks_avail -= lcounter;
796 		}
797 		mp->m_resblks = request;
798 		if (fdblks_delta) {
799 			spin_unlock(&mp->m_sb_lock);
800 			error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
801 			spin_lock(&mp->m_sb_lock);
802 		}
803 
804 		goto out;
805 	}
806 
807 	/*
808 	 * If the request is larger than the current reservation, reserve the
809 	 * blocks before we update the reserve counters. Sample m_fdblocks and
810 	 * perform a partial reservation if the request exceeds free space.
811 	 */
812 	error = -ENOSPC;
813 	do {
814 		free = percpu_counter_sum(&mp->m_fdblocks) -
815 						mp->m_alloc_set_aside;
816 		if (!free)
817 			break;
818 
819 		delta = request - mp->m_resblks;
820 		lcounter = free - delta;
821 		if (lcounter < 0)
822 			/* We can't satisfy the request, just get what we can */
823 			fdblks_delta = free;
824 		else
825 			fdblks_delta = delta;
826 
827 		/*
828 		 * We'll either succeed in getting space from the free block
829 		 * count or we'll get an ENOSPC. If we get a ENOSPC, it means
830 		 * things changed while we were calculating fdblks_delta and so
831 		 * we should try again to see if there is anything left to
832 		 * reserve.
833 		 *
834 		 * Don't set the reserved flag here - we don't want to reserve
835 		 * the extra reserve blocks from the reserve.....
836 		 */
837 		spin_unlock(&mp->m_sb_lock);
838 		error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
839 		spin_lock(&mp->m_sb_lock);
840 	} while (error == -ENOSPC);
841 
842 	/*
843 	 * Update the reserve counters if blocks have been successfully
844 	 * allocated.
845 	 */
846 	if (!error && fdblks_delta) {
847 		mp->m_resblks += fdblks_delta;
848 		mp->m_resblks_avail += fdblks_delta;
849 	}
850 
851 out:
852 	if (outval) {
853 		outval->resblks = mp->m_resblks;
854 		outval->resblks_avail = mp->m_resblks_avail;
855 	}
856 
857 	spin_unlock(&mp->m_sb_lock);
858 	return error;
859 }
860 
861 int
862 xfs_fs_goingdown(
863 	xfs_mount_t	*mp,
864 	__uint32_t	inflags)
865 {
866 	switch (inflags) {
867 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
868 		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
869 
870 		if (sb && !IS_ERR(sb)) {
871 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
872 			thaw_bdev(sb->s_bdev, sb);
873 		}
874 
875 		break;
876 	}
877 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
878 		xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
879 		break;
880 	case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
881 		xfs_force_shutdown(mp,
882 				SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
883 		break;
884 	default:
885 		return -EINVAL;
886 	}
887 
888 	return 0;
889 }
890 
891 /*
892  * Force a shutdown of the filesystem instantly while keeping the filesystem
893  * consistent. We don't do an unmount here; just shutdown the shop, make sure
894  * that absolutely nothing persistent happens to this filesystem after this
895  * point.
896  */
897 void
898 xfs_do_force_shutdown(
899 	xfs_mount_t	*mp,
900 	int		flags,
901 	char		*fname,
902 	int		lnnum)
903 {
904 	int		logerror;
905 
906 	logerror = flags & SHUTDOWN_LOG_IO_ERROR;
907 
908 	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
909 		xfs_notice(mp,
910 	"%s(0x%x) called from line %d of file %s.  Return address = 0x%p",
911 			__func__, flags, lnnum, fname, __return_address);
912 	}
913 	/*
914 	 * No need to duplicate efforts.
915 	 */
916 	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
917 		return;
918 
919 	/*
920 	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
921 	 * queue up anybody new on the log reservations, and wakes up
922 	 * everybody who's sleeping on log reservations to tell them
923 	 * the bad news.
924 	 */
925 	if (xfs_log_force_umount(mp, logerror))
926 		return;
927 
928 	if (flags & SHUTDOWN_CORRUPT_INCORE) {
929 		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
930     "Corruption of in-memory data detected.  Shutting down filesystem");
931 		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
932 			xfs_stack_trace();
933 	} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
934 		if (logerror) {
935 			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
936 		"Log I/O Error Detected.  Shutting down filesystem");
937 		} else if (flags & SHUTDOWN_DEVICE_REQ) {
938 			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
939 		"All device paths lost.  Shutting down filesystem");
940 		} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
941 			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
942 		"I/O Error Detected. Shutting down filesystem");
943 		}
944 	}
945 	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
946 		xfs_alert(mp,
947 	"Please umount the filesystem and rectify the problem(s)");
948 	}
949 }
950