xref: /openbmc/linux/fs/xfs/xfs_fsops.c (revision bc5aa3a0)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_sb.h"
25 #include "xfs_mount.h"
26 #include "xfs_defer.h"
27 #include "xfs_da_format.h"
28 #include "xfs_da_btree.h"
29 #include "xfs_inode.h"
30 #include "xfs_trans.h"
31 #include "xfs_inode_item.h"
32 #include "xfs_error.h"
33 #include "xfs_btree.h"
34 #include "xfs_alloc_btree.h"
35 #include "xfs_alloc.h"
36 #include "xfs_rmap_btree.h"
37 #include "xfs_ialloc.h"
38 #include "xfs_fsops.h"
39 #include "xfs_itable.h"
40 #include "xfs_trans_space.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_trace.h"
43 #include "xfs_log.h"
44 #include "xfs_filestream.h"
45 #include "xfs_rmap.h"
46 
47 /*
48  * File system operations
49  */
50 
51 int
52 xfs_fs_geometry(
53 	xfs_mount_t		*mp,
54 	xfs_fsop_geom_t		*geo,
55 	int			new_version)
56 {
57 
58 	memset(geo, 0, sizeof(*geo));
59 
60 	geo->blocksize = mp->m_sb.sb_blocksize;
61 	geo->rtextsize = mp->m_sb.sb_rextsize;
62 	geo->agblocks = mp->m_sb.sb_agblocks;
63 	geo->agcount = mp->m_sb.sb_agcount;
64 	geo->logblocks = mp->m_sb.sb_logblocks;
65 	geo->sectsize = mp->m_sb.sb_sectsize;
66 	geo->inodesize = mp->m_sb.sb_inodesize;
67 	geo->imaxpct = mp->m_sb.sb_imax_pct;
68 	geo->datablocks = mp->m_sb.sb_dblocks;
69 	geo->rtblocks = mp->m_sb.sb_rblocks;
70 	geo->rtextents = mp->m_sb.sb_rextents;
71 	geo->logstart = mp->m_sb.sb_logstart;
72 	ASSERT(sizeof(geo->uuid)==sizeof(mp->m_sb.sb_uuid));
73 	memcpy(geo->uuid, &mp->m_sb.sb_uuid, sizeof(mp->m_sb.sb_uuid));
74 	if (new_version >= 2) {
75 		geo->sunit = mp->m_sb.sb_unit;
76 		geo->swidth = mp->m_sb.sb_width;
77 	}
78 	if (new_version >= 3) {
79 		geo->version = XFS_FSOP_GEOM_VERSION;
80 		geo->flags = XFS_FSOP_GEOM_FLAGS_NLINK |
81 			     XFS_FSOP_GEOM_FLAGS_DIRV2 |
82 			(xfs_sb_version_hasattr(&mp->m_sb) ?
83 				XFS_FSOP_GEOM_FLAGS_ATTR : 0) |
84 			(xfs_sb_version_hasquota(&mp->m_sb) ?
85 				XFS_FSOP_GEOM_FLAGS_QUOTA : 0) |
86 			(xfs_sb_version_hasalign(&mp->m_sb) ?
87 				XFS_FSOP_GEOM_FLAGS_IALIGN : 0) |
88 			(xfs_sb_version_hasdalign(&mp->m_sb) ?
89 				XFS_FSOP_GEOM_FLAGS_DALIGN : 0) |
90 			(xfs_sb_version_hasextflgbit(&mp->m_sb) ?
91 				XFS_FSOP_GEOM_FLAGS_EXTFLG : 0) |
92 			(xfs_sb_version_hassector(&mp->m_sb) ?
93 				XFS_FSOP_GEOM_FLAGS_SECTOR : 0) |
94 			(xfs_sb_version_hasasciici(&mp->m_sb) ?
95 				XFS_FSOP_GEOM_FLAGS_DIRV2CI : 0) |
96 			(xfs_sb_version_haslazysbcount(&mp->m_sb) ?
97 				XFS_FSOP_GEOM_FLAGS_LAZYSB : 0) |
98 			(xfs_sb_version_hasattr2(&mp->m_sb) ?
99 				XFS_FSOP_GEOM_FLAGS_ATTR2 : 0) |
100 			(xfs_sb_version_hasprojid32bit(&mp->m_sb) ?
101 				XFS_FSOP_GEOM_FLAGS_PROJID32 : 0) |
102 			(xfs_sb_version_hascrc(&mp->m_sb) ?
103 				XFS_FSOP_GEOM_FLAGS_V5SB : 0) |
104 			(xfs_sb_version_hasftype(&mp->m_sb) ?
105 				XFS_FSOP_GEOM_FLAGS_FTYPE : 0) |
106 			(xfs_sb_version_hasfinobt(&mp->m_sb) ?
107 				XFS_FSOP_GEOM_FLAGS_FINOBT : 0) |
108 			(xfs_sb_version_hassparseinodes(&mp->m_sb) ?
109 				XFS_FSOP_GEOM_FLAGS_SPINODES : 0) |
110 			(xfs_sb_version_hasrmapbt(&mp->m_sb) ?
111 				XFS_FSOP_GEOM_FLAGS_RMAPBT : 0);
112 		geo->logsectsize = xfs_sb_version_hassector(&mp->m_sb) ?
113 				mp->m_sb.sb_logsectsize : BBSIZE;
114 		geo->rtsectsize = mp->m_sb.sb_blocksize;
115 		geo->dirblocksize = mp->m_dir_geo->blksize;
116 	}
117 	if (new_version >= 4) {
118 		geo->flags |=
119 			(xfs_sb_version_haslogv2(&mp->m_sb) ?
120 				XFS_FSOP_GEOM_FLAGS_LOGV2 : 0);
121 		geo->logsunit = mp->m_sb.sb_logsunit;
122 	}
123 	return 0;
124 }
125 
126 static struct xfs_buf *
127 xfs_growfs_get_hdr_buf(
128 	struct xfs_mount	*mp,
129 	xfs_daddr_t		blkno,
130 	size_t			numblks,
131 	int			flags,
132 	const struct xfs_buf_ops *ops)
133 {
134 	struct xfs_buf		*bp;
135 
136 	bp = xfs_buf_get_uncached(mp->m_ddev_targp, numblks, flags);
137 	if (!bp)
138 		return NULL;
139 
140 	xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
141 	bp->b_bn = blkno;
142 	bp->b_maps[0].bm_bn = blkno;
143 	bp->b_ops = ops;
144 
145 	return bp;
146 }
147 
148 static int
149 xfs_growfs_data_private(
150 	xfs_mount_t		*mp,		/* mount point for filesystem */
151 	xfs_growfs_data_t	*in)		/* growfs data input struct */
152 {
153 	xfs_agf_t		*agf;
154 	struct xfs_agfl		*agfl;
155 	xfs_agi_t		*agi;
156 	xfs_agnumber_t		agno;
157 	xfs_extlen_t		agsize;
158 	xfs_extlen_t		tmpsize;
159 	xfs_alloc_rec_t		*arec;
160 	xfs_buf_t		*bp;
161 	int			bucket;
162 	int			dpct;
163 	int			error, saved_error = 0;
164 	xfs_agnumber_t		nagcount;
165 	xfs_agnumber_t		nagimax = 0;
166 	xfs_rfsblock_t		nb, nb_mod;
167 	xfs_rfsblock_t		new;
168 	xfs_rfsblock_t		nfree;
169 	xfs_agnumber_t		oagcount;
170 	int			pct;
171 	xfs_trans_t		*tp;
172 
173 	nb = in->newblocks;
174 	pct = in->imaxpct;
175 	if (nb < mp->m_sb.sb_dblocks || pct < 0 || pct > 100)
176 		return -EINVAL;
177 	if ((error = xfs_sb_validate_fsb_count(&mp->m_sb, nb)))
178 		return error;
179 	dpct = pct - mp->m_sb.sb_imax_pct;
180 	error = xfs_buf_read_uncached(mp->m_ddev_targp,
181 				XFS_FSB_TO_BB(mp, nb) - XFS_FSS_TO_BB(mp, 1),
182 				XFS_FSS_TO_BB(mp, 1), 0, &bp, NULL);
183 	if (error)
184 		return error;
185 	xfs_buf_relse(bp);
186 
187 	new = nb;	/* use new as a temporary here */
188 	nb_mod = do_div(new, mp->m_sb.sb_agblocks);
189 	nagcount = new + (nb_mod != 0);
190 	if (nb_mod && nb_mod < XFS_MIN_AG_BLOCKS) {
191 		nagcount--;
192 		nb = (xfs_rfsblock_t)nagcount * mp->m_sb.sb_agblocks;
193 		if (nb < mp->m_sb.sb_dblocks)
194 			return -EINVAL;
195 	}
196 	new = nb - mp->m_sb.sb_dblocks;
197 	oagcount = mp->m_sb.sb_agcount;
198 
199 	/* allocate the new per-ag structures */
200 	if (nagcount > oagcount) {
201 		error = xfs_initialize_perag(mp, nagcount, &nagimax);
202 		if (error)
203 			return error;
204 	}
205 
206 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_growdata,
207 			XFS_GROWFS_SPACE_RES(mp), 0, XFS_TRANS_RESERVE, &tp);
208 	if (error)
209 		return error;
210 
211 	/*
212 	 * Write new AG headers to disk. Non-transactional, but written
213 	 * synchronously so they are completed prior to the growfs transaction
214 	 * being logged.
215 	 */
216 	nfree = 0;
217 	for (agno = nagcount - 1; agno >= oagcount; agno--, new -= agsize) {
218 		__be32	*agfl_bno;
219 
220 		/*
221 		 * AG freespace header block
222 		 */
223 		bp = xfs_growfs_get_hdr_buf(mp,
224 				XFS_AG_DADDR(mp, agno, XFS_AGF_DADDR(mp)),
225 				XFS_FSS_TO_BB(mp, 1), 0,
226 				&xfs_agf_buf_ops);
227 		if (!bp) {
228 			error = -ENOMEM;
229 			goto error0;
230 		}
231 
232 		agf = XFS_BUF_TO_AGF(bp);
233 		agf->agf_magicnum = cpu_to_be32(XFS_AGF_MAGIC);
234 		agf->agf_versionnum = cpu_to_be32(XFS_AGF_VERSION);
235 		agf->agf_seqno = cpu_to_be32(agno);
236 		if (agno == nagcount - 1)
237 			agsize =
238 				nb -
239 				(agno * (xfs_rfsblock_t)mp->m_sb.sb_agblocks);
240 		else
241 			agsize = mp->m_sb.sb_agblocks;
242 		agf->agf_length = cpu_to_be32(agsize);
243 		agf->agf_roots[XFS_BTNUM_BNOi] = cpu_to_be32(XFS_BNO_BLOCK(mp));
244 		agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
245 		agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
246 		agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
247 		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
248 			agf->agf_roots[XFS_BTNUM_RMAPi] =
249 						cpu_to_be32(XFS_RMAP_BLOCK(mp));
250 			agf->agf_levels[XFS_BTNUM_RMAPi] = cpu_to_be32(1);
251 			agf->agf_rmap_blocks = cpu_to_be32(1);
252 		}
253 
254 		agf->agf_flfirst = cpu_to_be32(1);
255 		agf->agf_fllast = 0;
256 		agf->agf_flcount = 0;
257 		tmpsize = agsize - mp->m_ag_prealloc_blocks;
258 		agf->agf_freeblks = cpu_to_be32(tmpsize);
259 		agf->agf_longest = cpu_to_be32(tmpsize);
260 		if (xfs_sb_version_hascrc(&mp->m_sb))
261 			uuid_copy(&agf->agf_uuid, &mp->m_sb.sb_meta_uuid);
262 
263 		error = xfs_bwrite(bp);
264 		xfs_buf_relse(bp);
265 		if (error)
266 			goto error0;
267 
268 		/*
269 		 * AG freelist header block
270 		 */
271 		bp = xfs_growfs_get_hdr_buf(mp,
272 				XFS_AG_DADDR(mp, agno, XFS_AGFL_DADDR(mp)),
273 				XFS_FSS_TO_BB(mp, 1), 0,
274 				&xfs_agfl_buf_ops);
275 		if (!bp) {
276 			error = -ENOMEM;
277 			goto error0;
278 		}
279 
280 		agfl = XFS_BUF_TO_AGFL(bp);
281 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
282 			agfl->agfl_magicnum = cpu_to_be32(XFS_AGFL_MAGIC);
283 			agfl->agfl_seqno = cpu_to_be32(agno);
284 			uuid_copy(&agfl->agfl_uuid, &mp->m_sb.sb_meta_uuid);
285 		}
286 
287 		agfl_bno = XFS_BUF_TO_AGFL_BNO(mp, bp);
288 		for (bucket = 0; bucket < XFS_AGFL_SIZE(mp); bucket++)
289 			agfl_bno[bucket] = cpu_to_be32(NULLAGBLOCK);
290 
291 		error = xfs_bwrite(bp);
292 		xfs_buf_relse(bp);
293 		if (error)
294 			goto error0;
295 
296 		/*
297 		 * AG inode header block
298 		 */
299 		bp = xfs_growfs_get_hdr_buf(mp,
300 				XFS_AG_DADDR(mp, agno, XFS_AGI_DADDR(mp)),
301 				XFS_FSS_TO_BB(mp, 1), 0,
302 				&xfs_agi_buf_ops);
303 		if (!bp) {
304 			error = -ENOMEM;
305 			goto error0;
306 		}
307 
308 		agi = XFS_BUF_TO_AGI(bp);
309 		agi->agi_magicnum = cpu_to_be32(XFS_AGI_MAGIC);
310 		agi->agi_versionnum = cpu_to_be32(XFS_AGI_VERSION);
311 		agi->agi_seqno = cpu_to_be32(agno);
312 		agi->agi_length = cpu_to_be32(agsize);
313 		agi->agi_count = 0;
314 		agi->agi_root = cpu_to_be32(XFS_IBT_BLOCK(mp));
315 		agi->agi_level = cpu_to_be32(1);
316 		agi->agi_freecount = 0;
317 		agi->agi_newino = cpu_to_be32(NULLAGINO);
318 		agi->agi_dirino = cpu_to_be32(NULLAGINO);
319 		if (xfs_sb_version_hascrc(&mp->m_sb))
320 			uuid_copy(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid);
321 		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
322 			agi->agi_free_root = cpu_to_be32(XFS_FIBT_BLOCK(mp));
323 			agi->agi_free_level = cpu_to_be32(1);
324 		}
325 		for (bucket = 0; bucket < XFS_AGI_UNLINKED_BUCKETS; bucket++)
326 			agi->agi_unlinked[bucket] = cpu_to_be32(NULLAGINO);
327 
328 		error = xfs_bwrite(bp);
329 		xfs_buf_relse(bp);
330 		if (error)
331 			goto error0;
332 
333 		/*
334 		 * BNO btree root block
335 		 */
336 		bp = xfs_growfs_get_hdr_buf(mp,
337 				XFS_AGB_TO_DADDR(mp, agno, XFS_BNO_BLOCK(mp)),
338 				BTOBB(mp->m_sb.sb_blocksize), 0,
339 				&xfs_allocbt_buf_ops);
340 
341 		if (!bp) {
342 			error = -ENOMEM;
343 			goto error0;
344 		}
345 
346 		if (xfs_sb_version_hascrc(&mp->m_sb))
347 			xfs_btree_init_block(mp, bp, XFS_ABTB_CRC_MAGIC, 0, 1,
348 						agno, XFS_BTREE_CRC_BLOCKS);
349 		else
350 			xfs_btree_init_block(mp, bp, XFS_ABTB_MAGIC, 0, 1,
351 						agno, 0);
352 
353 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
354 		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
355 		arec->ar_blockcount = cpu_to_be32(
356 			agsize - be32_to_cpu(arec->ar_startblock));
357 
358 		error = xfs_bwrite(bp);
359 		xfs_buf_relse(bp);
360 		if (error)
361 			goto error0;
362 
363 		/*
364 		 * CNT btree root block
365 		 */
366 		bp = xfs_growfs_get_hdr_buf(mp,
367 				XFS_AGB_TO_DADDR(mp, agno, XFS_CNT_BLOCK(mp)),
368 				BTOBB(mp->m_sb.sb_blocksize), 0,
369 				&xfs_allocbt_buf_ops);
370 		if (!bp) {
371 			error = -ENOMEM;
372 			goto error0;
373 		}
374 
375 		if (xfs_sb_version_hascrc(&mp->m_sb))
376 			xfs_btree_init_block(mp, bp, XFS_ABTC_CRC_MAGIC, 0, 1,
377 						agno, XFS_BTREE_CRC_BLOCKS);
378 		else
379 			xfs_btree_init_block(mp, bp, XFS_ABTC_MAGIC, 0, 1,
380 						agno, 0);
381 
382 		arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1);
383 		arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks);
384 		arec->ar_blockcount = cpu_to_be32(
385 			agsize - be32_to_cpu(arec->ar_startblock));
386 		nfree += be32_to_cpu(arec->ar_blockcount);
387 
388 		error = xfs_bwrite(bp);
389 		xfs_buf_relse(bp);
390 		if (error)
391 			goto error0;
392 
393 		/* RMAP btree root block */
394 		if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
395 			struct xfs_rmap_rec	*rrec;
396 			struct xfs_btree_block	*block;
397 
398 			bp = xfs_growfs_get_hdr_buf(mp,
399 				XFS_AGB_TO_DADDR(mp, agno, XFS_RMAP_BLOCK(mp)),
400 				BTOBB(mp->m_sb.sb_blocksize), 0,
401 				&xfs_rmapbt_buf_ops);
402 			if (!bp) {
403 				error = -ENOMEM;
404 				goto error0;
405 			}
406 
407 			xfs_btree_init_block(mp, bp, XFS_RMAP_CRC_MAGIC, 0, 0,
408 						agno, XFS_BTREE_CRC_BLOCKS);
409 			block = XFS_BUF_TO_BLOCK(bp);
410 
411 
412 			/*
413 			 * mark the AG header regions as static metadata The BNO
414 			 * btree block is the first block after the headers, so
415 			 * it's location defines the size of region the static
416 			 * metadata consumes.
417 			 *
418 			 * Note: unlike mkfs, we never have to account for log
419 			 * space when growing the data regions
420 			 */
421 			rrec = XFS_RMAP_REC_ADDR(block, 1);
422 			rrec->rm_startblock = 0;
423 			rrec->rm_blockcount = cpu_to_be32(XFS_BNO_BLOCK(mp));
424 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_FS);
425 			rrec->rm_offset = 0;
426 			be16_add_cpu(&block->bb_numrecs, 1);
427 
428 			/* account freespace btree root blocks */
429 			rrec = XFS_RMAP_REC_ADDR(block, 2);
430 			rrec->rm_startblock = cpu_to_be32(XFS_BNO_BLOCK(mp));
431 			rrec->rm_blockcount = cpu_to_be32(2);
432 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
433 			rrec->rm_offset = 0;
434 			be16_add_cpu(&block->bb_numrecs, 1);
435 
436 			/* account inode btree root blocks */
437 			rrec = XFS_RMAP_REC_ADDR(block, 3);
438 			rrec->rm_startblock = cpu_to_be32(XFS_IBT_BLOCK(mp));
439 			rrec->rm_blockcount = cpu_to_be32(XFS_RMAP_BLOCK(mp) -
440 							XFS_IBT_BLOCK(mp));
441 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_INOBT);
442 			rrec->rm_offset = 0;
443 			be16_add_cpu(&block->bb_numrecs, 1);
444 
445 			/* account for rmap btree root */
446 			rrec = XFS_RMAP_REC_ADDR(block, 4);
447 			rrec->rm_startblock = cpu_to_be32(XFS_RMAP_BLOCK(mp));
448 			rrec->rm_blockcount = cpu_to_be32(1);
449 			rrec->rm_owner = cpu_to_be64(XFS_RMAP_OWN_AG);
450 			rrec->rm_offset = 0;
451 			be16_add_cpu(&block->bb_numrecs, 1);
452 
453 			error = xfs_bwrite(bp);
454 			xfs_buf_relse(bp);
455 			if (error)
456 				goto error0;
457 		}
458 
459 		/*
460 		 * INO btree root block
461 		 */
462 		bp = xfs_growfs_get_hdr_buf(mp,
463 				XFS_AGB_TO_DADDR(mp, agno, XFS_IBT_BLOCK(mp)),
464 				BTOBB(mp->m_sb.sb_blocksize), 0,
465 				&xfs_inobt_buf_ops);
466 		if (!bp) {
467 			error = -ENOMEM;
468 			goto error0;
469 		}
470 
471 		if (xfs_sb_version_hascrc(&mp->m_sb))
472 			xfs_btree_init_block(mp, bp, XFS_IBT_CRC_MAGIC, 0, 0,
473 						agno, XFS_BTREE_CRC_BLOCKS);
474 		else
475 			xfs_btree_init_block(mp, bp, XFS_IBT_MAGIC, 0, 0,
476 						agno, 0);
477 
478 		error = xfs_bwrite(bp);
479 		xfs_buf_relse(bp);
480 		if (error)
481 			goto error0;
482 
483 		/*
484 		 * FINO btree root block
485 		 */
486 		if (xfs_sb_version_hasfinobt(&mp->m_sb)) {
487 			bp = xfs_growfs_get_hdr_buf(mp,
488 				XFS_AGB_TO_DADDR(mp, agno, XFS_FIBT_BLOCK(mp)),
489 				BTOBB(mp->m_sb.sb_blocksize), 0,
490 				&xfs_inobt_buf_ops);
491 			if (!bp) {
492 				error = -ENOMEM;
493 				goto error0;
494 			}
495 
496 			if (xfs_sb_version_hascrc(&mp->m_sb))
497 				xfs_btree_init_block(mp, bp, XFS_FIBT_CRC_MAGIC,
498 						     0, 0, agno,
499 						     XFS_BTREE_CRC_BLOCKS);
500 			else
501 				xfs_btree_init_block(mp, bp, XFS_FIBT_MAGIC, 0,
502 						     0, agno, 0);
503 
504 			error = xfs_bwrite(bp);
505 			xfs_buf_relse(bp);
506 			if (error)
507 				goto error0;
508 		}
509 
510 	}
511 	xfs_trans_agblocks_delta(tp, nfree);
512 	/*
513 	 * There are new blocks in the old last a.g.
514 	 */
515 	if (new) {
516 		struct xfs_owner_info	oinfo;
517 
518 		/*
519 		 * Change the agi length.
520 		 */
521 		error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
522 		if (error) {
523 			goto error0;
524 		}
525 		ASSERT(bp);
526 		agi = XFS_BUF_TO_AGI(bp);
527 		be32_add_cpu(&agi->agi_length, new);
528 		ASSERT(nagcount == oagcount ||
529 		       be32_to_cpu(agi->agi_length) == mp->m_sb.sb_agblocks);
530 		xfs_ialloc_log_agi(tp, bp, XFS_AGI_LENGTH);
531 		/*
532 		 * Change agf length.
533 		 */
534 		error = xfs_alloc_read_agf(mp, tp, agno, 0, &bp);
535 		if (error) {
536 			goto error0;
537 		}
538 		ASSERT(bp);
539 		agf = XFS_BUF_TO_AGF(bp);
540 		be32_add_cpu(&agf->agf_length, new);
541 		ASSERT(be32_to_cpu(agf->agf_length) ==
542 		       be32_to_cpu(agi->agi_length));
543 
544 		xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH);
545 
546 		/*
547 		 * Free the new space.
548 		 *
549 		 * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that
550 		 * this doesn't actually exist in the rmap btree.
551 		 */
552 		xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
553 		error = xfs_free_extent(tp,
554 				XFS_AGB_TO_FSB(mp, agno,
555 					be32_to_cpu(agf->agf_length) - new),
556 				new, &oinfo);
557 		if (error)
558 			goto error0;
559 	}
560 
561 	/*
562 	 * Update changed superblock fields transactionally. These are not
563 	 * seen by the rest of the world until the transaction commit applies
564 	 * them atomically to the superblock.
565 	 */
566 	if (nagcount > oagcount)
567 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_AGCOUNT, nagcount - oagcount);
568 	if (nb > mp->m_sb.sb_dblocks)
569 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_DBLOCKS,
570 				 nb - mp->m_sb.sb_dblocks);
571 	if (nfree)
572 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_FDBLOCKS, nfree);
573 	if (dpct)
574 		xfs_trans_mod_sb(tp, XFS_TRANS_SB_IMAXPCT, dpct);
575 	xfs_trans_set_sync(tp);
576 	error = xfs_trans_commit(tp);
577 	if (error)
578 		return error;
579 
580 	/* New allocation groups fully initialized, so update mount struct */
581 	if (nagimax)
582 		mp->m_maxagi = nagimax;
583 	if (mp->m_sb.sb_imax_pct) {
584 		__uint64_t icount = mp->m_sb.sb_dblocks * mp->m_sb.sb_imax_pct;
585 		do_div(icount, 100);
586 		mp->m_maxicount = icount << mp->m_sb.sb_inopblog;
587 	} else
588 		mp->m_maxicount = 0;
589 	xfs_set_low_space_thresholds(mp);
590 	mp->m_alloc_set_aside = xfs_alloc_set_aside(mp);
591 
592 	/* update secondary superblocks. */
593 	for (agno = 1; agno < nagcount; agno++) {
594 		error = 0;
595 		/*
596 		 * new secondary superblocks need to be zeroed, not read from
597 		 * disk as the contents of the new area we are growing into is
598 		 * completely unknown.
599 		 */
600 		if (agno < oagcount) {
601 			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
602 				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
603 				  XFS_FSS_TO_BB(mp, 1), 0, &bp,
604 				  &xfs_sb_buf_ops);
605 		} else {
606 			bp = xfs_trans_get_buf(NULL, mp->m_ddev_targp,
607 				  XFS_AGB_TO_DADDR(mp, agno, XFS_SB_BLOCK(mp)),
608 				  XFS_FSS_TO_BB(mp, 1), 0);
609 			if (bp) {
610 				bp->b_ops = &xfs_sb_buf_ops;
611 				xfs_buf_zero(bp, 0, BBTOB(bp->b_length));
612 			} else
613 				error = -ENOMEM;
614 		}
615 
616 		/*
617 		 * If we get an error reading or writing alternate superblocks,
618 		 * continue.  xfs_repair chooses the "best" superblock based
619 		 * on most matches; if we break early, we'll leave more
620 		 * superblocks un-updated than updated, and xfs_repair may
621 		 * pick them over the properly-updated primary.
622 		 */
623 		if (error) {
624 			xfs_warn(mp,
625 		"error %d reading secondary superblock for ag %d",
626 				error, agno);
627 			saved_error = error;
628 			continue;
629 		}
630 		xfs_sb_to_disk(XFS_BUF_TO_SBP(bp), &mp->m_sb);
631 
632 		error = xfs_bwrite(bp);
633 		xfs_buf_relse(bp);
634 		if (error) {
635 			xfs_warn(mp,
636 		"write error %d updating secondary superblock for ag %d",
637 				error, agno);
638 			saved_error = error;
639 			continue;
640 		}
641 	}
642 	return saved_error ? saved_error : error;
643 
644  error0:
645 	xfs_trans_cancel(tp);
646 	return error;
647 }
648 
649 static int
650 xfs_growfs_log_private(
651 	xfs_mount_t		*mp,	/* mount point for filesystem */
652 	xfs_growfs_log_t	*in)	/* growfs log input struct */
653 {
654 	xfs_extlen_t		nb;
655 
656 	nb = in->newblocks;
657 	if (nb < XFS_MIN_LOG_BLOCKS || nb < XFS_B_TO_FSB(mp, XFS_MIN_LOG_BYTES))
658 		return -EINVAL;
659 	if (nb == mp->m_sb.sb_logblocks &&
660 	    in->isint == (mp->m_sb.sb_logstart != 0))
661 		return -EINVAL;
662 	/*
663 	 * Moving the log is hard, need new interfaces to sync
664 	 * the log first, hold off all activity while moving it.
665 	 * Can have shorter or longer log in the same space,
666 	 * or transform internal to external log or vice versa.
667 	 */
668 	return -ENOSYS;
669 }
670 
671 /*
672  * protected versions of growfs function acquire and release locks on the mount
673  * point - exported through ioctls: XFS_IOC_FSGROWFSDATA, XFS_IOC_FSGROWFSLOG,
674  * XFS_IOC_FSGROWFSRT
675  */
676 
677 
678 int
679 xfs_growfs_data(
680 	xfs_mount_t		*mp,
681 	xfs_growfs_data_t	*in)
682 {
683 	int error;
684 
685 	if (!capable(CAP_SYS_ADMIN))
686 		return -EPERM;
687 	if (!mutex_trylock(&mp->m_growlock))
688 		return -EWOULDBLOCK;
689 	error = xfs_growfs_data_private(mp, in);
690 	/*
691 	 * Increment the generation unconditionally, the error could be from
692 	 * updating the secondary superblocks, in which case the new size
693 	 * is live already.
694 	 */
695 	mp->m_generation++;
696 	mutex_unlock(&mp->m_growlock);
697 	return error;
698 }
699 
700 int
701 xfs_growfs_log(
702 	xfs_mount_t		*mp,
703 	xfs_growfs_log_t	*in)
704 {
705 	int error;
706 
707 	if (!capable(CAP_SYS_ADMIN))
708 		return -EPERM;
709 	if (!mutex_trylock(&mp->m_growlock))
710 		return -EWOULDBLOCK;
711 	error = xfs_growfs_log_private(mp, in);
712 	mutex_unlock(&mp->m_growlock);
713 	return error;
714 }
715 
716 /*
717  * exported through ioctl XFS_IOC_FSCOUNTS
718  */
719 
720 int
721 xfs_fs_counts(
722 	xfs_mount_t		*mp,
723 	xfs_fsop_counts_t	*cnt)
724 {
725 	cnt->allocino = percpu_counter_read_positive(&mp->m_icount);
726 	cnt->freeino = percpu_counter_read_positive(&mp->m_ifree);
727 	cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) -
728 						mp->m_alloc_set_aside;
729 
730 	spin_lock(&mp->m_sb_lock);
731 	cnt->freertx = mp->m_sb.sb_frextents;
732 	spin_unlock(&mp->m_sb_lock);
733 	return 0;
734 }
735 
736 /*
737  * exported through ioctl XFS_IOC_SET_RESBLKS & XFS_IOC_GET_RESBLKS
738  *
739  * xfs_reserve_blocks is called to set m_resblks
740  * in the in-core mount table. The number of unused reserved blocks
741  * is kept in m_resblks_avail.
742  *
743  * Reserve the requested number of blocks if available. Otherwise return
744  * as many as possible to satisfy the request. The actual number
745  * reserved are returned in outval
746  *
747  * A null inval pointer indicates that only the current reserved blocks
748  * available  should  be returned no settings are changed.
749  */
750 
751 int
752 xfs_reserve_blocks(
753 	xfs_mount_t             *mp,
754 	__uint64_t              *inval,
755 	xfs_fsop_resblks_t      *outval)
756 {
757 	__int64_t		lcounter, delta;
758 	__int64_t		fdblks_delta = 0;
759 	__uint64_t		request;
760 	__int64_t		free;
761 	int			error = 0;
762 
763 	/* If inval is null, report current values and return */
764 	if (inval == (__uint64_t *)NULL) {
765 		if (!outval)
766 			return -EINVAL;
767 		outval->resblks = mp->m_resblks;
768 		outval->resblks_avail = mp->m_resblks_avail;
769 		return 0;
770 	}
771 
772 	request = *inval;
773 
774 	/*
775 	 * With per-cpu counters, this becomes an interesting problem. we need
776 	 * to work out if we are freeing or allocation blocks first, then we can
777 	 * do the modification as necessary.
778 	 *
779 	 * We do this under the m_sb_lock so that if we are near ENOSPC, we will
780 	 * hold out any changes while we work out what to do. This means that
781 	 * the amount of free space can change while we do this, so we need to
782 	 * retry if we end up trying to reserve more space than is available.
783 	 */
784 	spin_lock(&mp->m_sb_lock);
785 
786 	/*
787 	 * If our previous reservation was larger than the current value,
788 	 * then move any unused blocks back to the free pool. Modify the resblks
789 	 * counters directly since we shouldn't have any problems unreserving
790 	 * space.
791 	 */
792 	if (mp->m_resblks > request) {
793 		lcounter = mp->m_resblks_avail - request;
794 		if (lcounter  > 0) {		/* release unused blocks */
795 			fdblks_delta = lcounter;
796 			mp->m_resblks_avail -= lcounter;
797 		}
798 		mp->m_resblks = request;
799 		if (fdblks_delta) {
800 			spin_unlock(&mp->m_sb_lock);
801 			error = xfs_mod_fdblocks(mp, fdblks_delta, 0);
802 			spin_lock(&mp->m_sb_lock);
803 		}
804 
805 		goto out;
806 	}
807 
808 	/*
809 	 * If the request is larger than the current reservation, reserve the
810 	 * blocks before we update the reserve counters. Sample m_fdblocks and
811 	 * perform a partial reservation if the request exceeds free space.
812 	 */
813 	error = -ENOSPC;
814 	do {
815 		free = percpu_counter_sum(&mp->m_fdblocks) -
816 						mp->m_alloc_set_aside;
817 		if (!free)
818 			break;
819 
820 		delta = request - mp->m_resblks;
821 		lcounter = free - delta;
822 		if (lcounter < 0)
823 			/* We can't satisfy the request, just get what we can */
824 			fdblks_delta = free;
825 		else
826 			fdblks_delta = delta;
827 
828 		/*
829 		 * We'll either succeed in getting space from the free block
830 		 * count or we'll get an ENOSPC. If we get a ENOSPC, it means
831 		 * things changed while we were calculating fdblks_delta and so
832 		 * we should try again to see if there is anything left to
833 		 * reserve.
834 		 *
835 		 * Don't set the reserved flag here - we don't want to reserve
836 		 * the extra reserve blocks from the reserve.....
837 		 */
838 		spin_unlock(&mp->m_sb_lock);
839 		error = xfs_mod_fdblocks(mp, -fdblks_delta, 0);
840 		spin_lock(&mp->m_sb_lock);
841 	} while (error == -ENOSPC);
842 
843 	/*
844 	 * Update the reserve counters if blocks have been successfully
845 	 * allocated.
846 	 */
847 	if (!error && fdblks_delta) {
848 		mp->m_resblks += fdblks_delta;
849 		mp->m_resblks_avail += fdblks_delta;
850 	}
851 
852 out:
853 	if (outval) {
854 		outval->resblks = mp->m_resblks;
855 		outval->resblks_avail = mp->m_resblks_avail;
856 	}
857 
858 	spin_unlock(&mp->m_sb_lock);
859 	return error;
860 }
861 
862 int
863 xfs_fs_goingdown(
864 	xfs_mount_t	*mp,
865 	__uint32_t	inflags)
866 {
867 	switch (inflags) {
868 	case XFS_FSOP_GOING_FLAGS_DEFAULT: {
869 		struct super_block *sb = freeze_bdev(mp->m_super->s_bdev);
870 
871 		if (sb && !IS_ERR(sb)) {
872 			xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
873 			thaw_bdev(sb->s_bdev, sb);
874 		}
875 
876 		break;
877 	}
878 	case XFS_FSOP_GOING_FLAGS_LOGFLUSH:
879 		xfs_force_shutdown(mp, SHUTDOWN_FORCE_UMOUNT);
880 		break;
881 	case XFS_FSOP_GOING_FLAGS_NOLOGFLUSH:
882 		xfs_force_shutdown(mp,
883 				SHUTDOWN_FORCE_UMOUNT | SHUTDOWN_LOG_IO_ERROR);
884 		break;
885 	default:
886 		return -EINVAL;
887 	}
888 
889 	return 0;
890 }
891 
892 /*
893  * Force a shutdown of the filesystem instantly while keeping the filesystem
894  * consistent. We don't do an unmount here; just shutdown the shop, make sure
895  * that absolutely nothing persistent happens to this filesystem after this
896  * point.
897  */
898 void
899 xfs_do_force_shutdown(
900 	xfs_mount_t	*mp,
901 	int		flags,
902 	char		*fname,
903 	int		lnnum)
904 {
905 	int		logerror;
906 
907 	logerror = flags & SHUTDOWN_LOG_IO_ERROR;
908 
909 	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
910 		xfs_notice(mp,
911 	"%s(0x%x) called from line %d of file %s.  Return address = 0x%p",
912 			__func__, flags, lnnum, fname, __return_address);
913 	}
914 	/*
915 	 * No need to duplicate efforts.
916 	 */
917 	if (XFS_FORCED_SHUTDOWN(mp) && !logerror)
918 		return;
919 
920 	/*
921 	 * This flags XFS_MOUNT_FS_SHUTDOWN, makes sure that we don't
922 	 * queue up anybody new on the log reservations, and wakes up
923 	 * everybody who's sleeping on log reservations to tell them
924 	 * the bad news.
925 	 */
926 	if (xfs_log_force_umount(mp, logerror))
927 		return;
928 
929 	if (flags & SHUTDOWN_CORRUPT_INCORE) {
930 		xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_CORRUPT,
931     "Corruption of in-memory data detected.  Shutting down filesystem");
932 		if (XFS_ERRLEVEL_HIGH <= xfs_error_level)
933 			xfs_stack_trace();
934 	} else if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
935 		if (logerror) {
936 			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_LOGERROR,
937 		"Log I/O Error Detected.  Shutting down filesystem");
938 		} else if (flags & SHUTDOWN_DEVICE_REQ) {
939 			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
940 		"All device paths lost.  Shutting down filesystem");
941 		} else if (!(flags & SHUTDOWN_REMOTE_REQ)) {
942 			xfs_alert_tag(mp, XFS_PTAG_SHUTDOWN_IOERROR,
943 		"I/O Error Detected. Shutting down filesystem");
944 		}
945 	}
946 	if (!(flags & SHUTDOWN_FORCE_UMOUNT)) {
947 		xfs_alert(mp,
948 	"Please umount the filesystem and rectify the problem(s)");
949 	}
950 }
951