xref: /openbmc/linux/fs/xfs/xfs_super.c (revision 95e9fd10)
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 
19 #include "xfs.h"
20 #include "xfs_log.h"
21 #include "xfs_inum.h"
22 #include "xfs_trans.h"
23 #include "xfs_sb.h"
24 #include "xfs_ag.h"
25 #include "xfs_dir2.h"
26 #include "xfs_alloc.h"
27 #include "xfs_quota.h"
28 #include "xfs_mount.h"
29 #include "xfs_bmap_btree.h"
30 #include "xfs_alloc_btree.h"
31 #include "xfs_ialloc_btree.h"
32 #include "xfs_dinode.h"
33 #include "xfs_inode.h"
34 #include "xfs_btree.h"
35 #include "xfs_ialloc.h"
36 #include "xfs_bmap.h"
37 #include "xfs_rtalloc.h"
38 #include "xfs_error.h"
39 #include "xfs_itable.h"
40 #include "xfs_fsops.h"
41 #include "xfs_attr.h"
42 #include "xfs_buf_item.h"
43 #include "xfs_utils.h"
44 #include "xfs_vnodeops.h"
45 #include "xfs_log_priv.h"
46 #include "xfs_trans_priv.h"
47 #include "xfs_filestream.h"
48 #include "xfs_da_btree.h"
49 #include "xfs_extfree_item.h"
50 #include "xfs_mru_cache.h"
51 #include "xfs_inode_item.h"
52 #include "xfs_sync.h"
53 #include "xfs_trace.h"
54 
55 #include <linux/namei.h>
56 #include <linux/init.h>
57 #include <linux/slab.h>
58 #include <linux/mount.h>
59 #include <linux/mempool.h>
60 #include <linux/writeback.h>
61 #include <linux/kthread.h>
62 #include <linux/freezer.h>
63 #include <linux/parser.h>
64 
65 static const struct super_operations xfs_super_operations;
66 static kmem_zone_t *xfs_ioend_zone;
67 mempool_t *xfs_ioend_pool;
68 
69 #define MNTOPT_LOGBUFS	"logbufs"	/* number of XFS log buffers */
70 #define MNTOPT_LOGBSIZE	"logbsize"	/* size of XFS log buffers */
71 #define MNTOPT_LOGDEV	"logdev"	/* log device */
72 #define MNTOPT_RTDEV	"rtdev"		/* realtime I/O device */
73 #define MNTOPT_BIOSIZE	"biosize"	/* log2 of preferred buffered io size */
74 #define MNTOPT_WSYNC	"wsync"		/* safe-mode nfs compatible mount */
75 #define MNTOPT_NOALIGN	"noalign"	/* turn off stripe alignment */
76 #define MNTOPT_SWALLOC	"swalloc"	/* turn on stripe width allocation */
77 #define MNTOPT_SUNIT	"sunit"		/* data volume stripe unit */
78 #define MNTOPT_SWIDTH	"swidth"	/* data volume stripe width */
79 #define MNTOPT_NOUUID	"nouuid"	/* ignore filesystem UUID */
80 #define MNTOPT_MTPT	"mtpt"		/* filesystem mount point */
81 #define MNTOPT_GRPID	"grpid"		/* group-ID from parent directory */
82 #define MNTOPT_NOGRPID	"nogrpid"	/* group-ID from current process */
83 #define MNTOPT_BSDGROUPS    "bsdgroups"    /* group-ID from parent directory */
84 #define MNTOPT_SYSVGROUPS   "sysvgroups"   /* group-ID from current process */
85 #define MNTOPT_ALLOCSIZE    "allocsize"    /* preferred allocation size */
86 #define MNTOPT_NORECOVERY   "norecovery"   /* don't run XFS recovery */
87 #define MNTOPT_BARRIER	"barrier"	/* use writer barriers for log write and
88 					 * unwritten extent conversion */
89 #define MNTOPT_NOBARRIER "nobarrier"	/* .. disable */
90 #define MNTOPT_64BITINODE   "inode64"	/* inodes can be allocated anywhere */
91 #define MNTOPT_IKEEP	"ikeep"		/* do not free empty inode clusters */
92 #define MNTOPT_NOIKEEP	"noikeep"	/* free empty inode clusters */
93 #define MNTOPT_LARGEIO	   "largeio"	/* report large I/O sizes in stat() */
94 #define MNTOPT_NOLARGEIO   "nolargeio"	/* do not report large I/O sizes
95 					 * in stat(). */
96 #define MNTOPT_ATTR2	"attr2"		/* do use attr2 attribute format */
97 #define MNTOPT_NOATTR2	"noattr2"	/* do not use attr2 attribute format */
98 #define MNTOPT_FILESTREAM  "filestreams" /* use filestreams allocator */
99 #define MNTOPT_QUOTA	"quota"		/* disk quotas (user) */
100 #define MNTOPT_NOQUOTA	"noquota"	/* no quotas */
101 #define MNTOPT_USRQUOTA	"usrquota"	/* user quota enabled */
102 #define MNTOPT_GRPQUOTA	"grpquota"	/* group quota enabled */
103 #define MNTOPT_PRJQUOTA	"prjquota"	/* project quota enabled */
104 #define MNTOPT_UQUOTA	"uquota"	/* user quota (IRIX variant) */
105 #define MNTOPT_GQUOTA	"gquota"	/* group quota (IRIX variant) */
106 #define MNTOPT_PQUOTA	"pquota"	/* project quota (IRIX variant) */
107 #define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
108 #define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
109 #define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
110 #define MNTOPT_QUOTANOENF  "qnoenforce"	/* same as uqnoenforce */
111 #define MNTOPT_DELAYLOG    "delaylog"	/* Delayed logging enabled */
112 #define MNTOPT_NODELAYLOG  "nodelaylog"	/* Delayed logging disabled */
113 #define MNTOPT_DISCARD	   "discard"	/* Discard unused blocks */
114 #define MNTOPT_NODISCARD   "nodiscard"	/* Do not discard unused blocks */
115 
116 /*
117  * Table driven mount option parser.
118  *
119  * Currently only used for remount, but it will be used for mount
120  * in the future, too.
121  */
122 enum {
123 	Opt_barrier, Opt_nobarrier, Opt_err
124 };
125 
126 static const match_table_t tokens = {
127 	{Opt_barrier, "barrier"},
128 	{Opt_nobarrier, "nobarrier"},
129 	{Opt_err, NULL}
130 };
131 
132 
133 STATIC unsigned long
134 suffix_strtoul(char *s, char **endp, unsigned int base)
135 {
136 	int	last, shift_left_factor = 0;
137 	char	*value = s;
138 
139 	last = strlen(value) - 1;
140 	if (value[last] == 'K' || value[last] == 'k') {
141 		shift_left_factor = 10;
142 		value[last] = '\0';
143 	}
144 	if (value[last] == 'M' || value[last] == 'm') {
145 		shift_left_factor = 20;
146 		value[last] = '\0';
147 	}
148 	if (value[last] == 'G' || value[last] == 'g') {
149 		shift_left_factor = 30;
150 		value[last] = '\0';
151 	}
152 
153 	return simple_strtoul((const char *)s, endp, base) << shift_left_factor;
154 }
155 
156 /*
157  * This function fills in xfs_mount_t fields based on mount args.
158  * Note: the superblock has _not_ yet been read in.
159  *
160  * Note that this function leaks the various device name allocations on
161  * failure.  The caller takes care of them.
162  */
163 STATIC int
164 xfs_parseargs(
165 	struct xfs_mount	*mp,
166 	char			*options)
167 {
168 	struct super_block	*sb = mp->m_super;
169 	char			*this_char, *value, *eov;
170 	int			dsunit = 0;
171 	int			dswidth = 0;
172 	int			iosize = 0;
173 	__uint8_t		iosizelog = 0;
174 
175 	/*
176 	 * set up the mount name first so all the errors will refer to the
177 	 * correct device.
178 	 */
179 	mp->m_fsname = kstrndup(sb->s_id, MAXNAMELEN, GFP_KERNEL);
180 	if (!mp->m_fsname)
181 		return ENOMEM;
182 	mp->m_fsname_len = strlen(mp->m_fsname) + 1;
183 
184 	/*
185 	 * Copy binary VFS mount flags we are interested in.
186 	 */
187 	if (sb->s_flags & MS_RDONLY)
188 		mp->m_flags |= XFS_MOUNT_RDONLY;
189 	if (sb->s_flags & MS_DIRSYNC)
190 		mp->m_flags |= XFS_MOUNT_DIRSYNC;
191 	if (sb->s_flags & MS_SYNCHRONOUS)
192 		mp->m_flags |= XFS_MOUNT_WSYNC;
193 
194 	/*
195 	 * Set some default flags that could be cleared by the mount option
196 	 * parsing.
197 	 */
198 	mp->m_flags |= XFS_MOUNT_BARRIER;
199 	mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
200 	mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
201 
202 	/*
203 	 * These can be overridden by the mount option parsing.
204 	 */
205 	mp->m_logbufs = -1;
206 	mp->m_logbsize = -1;
207 
208 	if (!options)
209 		goto done;
210 
211 	while ((this_char = strsep(&options, ",")) != NULL) {
212 		if (!*this_char)
213 			continue;
214 		if ((value = strchr(this_char, '=')) != NULL)
215 			*value++ = 0;
216 
217 		if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
218 			if (!value || !*value) {
219 				xfs_warn(mp, "%s option requires an argument",
220 					this_char);
221 				return EINVAL;
222 			}
223 			mp->m_logbufs = simple_strtoul(value, &eov, 10);
224 		} else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
225 			if (!value || !*value) {
226 				xfs_warn(mp, "%s option requires an argument",
227 					this_char);
228 				return EINVAL;
229 			}
230 			mp->m_logbsize = suffix_strtoul(value, &eov, 10);
231 		} else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
232 			if (!value || !*value) {
233 				xfs_warn(mp, "%s option requires an argument",
234 					this_char);
235 				return EINVAL;
236 			}
237 			mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
238 			if (!mp->m_logname)
239 				return ENOMEM;
240 		} else if (!strcmp(this_char, MNTOPT_MTPT)) {
241 			xfs_warn(mp, "%s option not allowed on this system",
242 				this_char);
243 			return EINVAL;
244 		} else if (!strcmp(this_char, MNTOPT_RTDEV)) {
245 			if (!value || !*value) {
246 				xfs_warn(mp, "%s option requires an argument",
247 					this_char);
248 				return EINVAL;
249 			}
250 			mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
251 			if (!mp->m_rtname)
252 				return ENOMEM;
253 		} else if (!strcmp(this_char, MNTOPT_BIOSIZE)) {
254 			if (!value || !*value) {
255 				xfs_warn(mp, "%s option requires an argument",
256 					this_char);
257 				return EINVAL;
258 			}
259 			iosize = simple_strtoul(value, &eov, 10);
260 			iosizelog = ffs(iosize) - 1;
261 		} else if (!strcmp(this_char, MNTOPT_ALLOCSIZE)) {
262 			if (!value || !*value) {
263 				xfs_warn(mp, "%s option requires an argument",
264 					this_char);
265 				return EINVAL;
266 			}
267 			iosize = suffix_strtoul(value, &eov, 10);
268 			iosizelog = ffs(iosize) - 1;
269 		} else if (!strcmp(this_char, MNTOPT_GRPID) ||
270 			   !strcmp(this_char, MNTOPT_BSDGROUPS)) {
271 			mp->m_flags |= XFS_MOUNT_GRPID;
272 		} else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
273 			   !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
274 			mp->m_flags &= ~XFS_MOUNT_GRPID;
275 		} else if (!strcmp(this_char, MNTOPT_WSYNC)) {
276 			mp->m_flags |= XFS_MOUNT_WSYNC;
277 		} else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
278 			mp->m_flags |= XFS_MOUNT_NORECOVERY;
279 		} else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
280 			mp->m_flags |= XFS_MOUNT_NOALIGN;
281 		} else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
282 			mp->m_flags |= XFS_MOUNT_SWALLOC;
283 		} else if (!strcmp(this_char, MNTOPT_SUNIT)) {
284 			if (!value || !*value) {
285 				xfs_warn(mp, "%s option requires an argument",
286 					this_char);
287 				return EINVAL;
288 			}
289 			dsunit = simple_strtoul(value, &eov, 10);
290 		} else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
291 			if (!value || !*value) {
292 				xfs_warn(mp, "%s option requires an argument",
293 					this_char);
294 				return EINVAL;
295 			}
296 			dswidth = simple_strtoul(value, &eov, 10);
297 		} else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
298 			mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
299 #if !XFS_BIG_INUMS
300 			xfs_warn(mp, "%s option not allowed on this system",
301 				this_char);
302 			return EINVAL;
303 #endif
304 		} else if (!strcmp(this_char, MNTOPT_NOUUID)) {
305 			mp->m_flags |= XFS_MOUNT_NOUUID;
306 		} else if (!strcmp(this_char, MNTOPT_BARRIER)) {
307 			mp->m_flags |= XFS_MOUNT_BARRIER;
308 		} else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
309 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
310 		} else if (!strcmp(this_char, MNTOPT_IKEEP)) {
311 			mp->m_flags |= XFS_MOUNT_IKEEP;
312 		} else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
313 			mp->m_flags &= ~XFS_MOUNT_IKEEP;
314 		} else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
315 			mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
316 		} else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
317 			mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
318 		} else if (!strcmp(this_char, MNTOPT_ATTR2)) {
319 			mp->m_flags |= XFS_MOUNT_ATTR2;
320 		} else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
321 			mp->m_flags &= ~XFS_MOUNT_ATTR2;
322 			mp->m_flags |= XFS_MOUNT_NOATTR2;
323 		} else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
324 			mp->m_flags |= XFS_MOUNT_FILESTREAMS;
325 		} else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
326 			mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
327 			mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
328 			mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
329 		} else if (!strcmp(this_char, MNTOPT_QUOTA) ||
330 			   !strcmp(this_char, MNTOPT_UQUOTA) ||
331 			   !strcmp(this_char, MNTOPT_USRQUOTA)) {
332 			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
333 					 XFS_UQUOTA_ENFD);
334 		} else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
335 			   !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
336 			mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
337 			mp->m_qflags &= ~XFS_UQUOTA_ENFD;
338 		} else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
339 			   !strcmp(this_char, MNTOPT_PRJQUOTA)) {
340 			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
341 					 XFS_OQUOTA_ENFD);
342 		} else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
343 			mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
344 			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
345 		} else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
346 			   !strcmp(this_char, MNTOPT_GRPQUOTA)) {
347 			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
348 					 XFS_OQUOTA_ENFD);
349 		} else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
350 			mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
351 			mp->m_qflags &= ~XFS_OQUOTA_ENFD;
352 		} else if (!strcmp(this_char, MNTOPT_DELAYLOG)) {
353 			xfs_warn(mp,
354 	"delaylog is the default now, option is deprecated.");
355 		} else if (!strcmp(this_char, MNTOPT_NODELAYLOG)) {
356 			xfs_warn(mp,
357 	"nodelaylog support has been removed, option is deprecated.");
358 		} else if (!strcmp(this_char, MNTOPT_DISCARD)) {
359 			mp->m_flags |= XFS_MOUNT_DISCARD;
360 		} else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
361 			mp->m_flags &= ~XFS_MOUNT_DISCARD;
362 		} else if (!strcmp(this_char, "ihashsize")) {
363 			xfs_warn(mp,
364 	"ihashsize no longer used, option is deprecated.");
365 		} else if (!strcmp(this_char, "osyncisdsync")) {
366 			xfs_warn(mp,
367 	"osyncisdsync has no effect, option is deprecated.");
368 		} else if (!strcmp(this_char, "osyncisosync")) {
369 			xfs_warn(mp,
370 	"osyncisosync has no effect, option is deprecated.");
371 		} else if (!strcmp(this_char, "irixsgid")) {
372 			xfs_warn(mp,
373 	"irixsgid is now a sysctl(2) variable, option is deprecated.");
374 		} else {
375 			xfs_warn(mp, "unknown mount option [%s].", this_char);
376 			return EINVAL;
377 		}
378 	}
379 
380 	/*
381 	 * no recovery flag requires a read-only mount
382 	 */
383 	if ((mp->m_flags & XFS_MOUNT_NORECOVERY) &&
384 	    !(mp->m_flags & XFS_MOUNT_RDONLY)) {
385 		xfs_warn(mp, "no-recovery mounts must be read-only.");
386 		return EINVAL;
387 	}
388 
389 	if ((mp->m_flags & XFS_MOUNT_NOALIGN) && (dsunit || dswidth)) {
390 		xfs_warn(mp,
391 	"sunit and swidth options incompatible with the noalign option");
392 		return EINVAL;
393 	}
394 
395 #ifndef CONFIG_XFS_QUOTA
396 	if (XFS_IS_QUOTA_RUNNING(mp)) {
397 		xfs_warn(mp, "quota support not available in this kernel.");
398 		return EINVAL;
399 	}
400 #endif
401 
402 	if ((mp->m_qflags & (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE)) &&
403 	    (mp->m_qflags & (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE))) {
404 		xfs_warn(mp, "cannot mount with both project and group quota");
405 		return EINVAL;
406 	}
407 
408 	if ((dsunit && !dswidth) || (!dsunit && dswidth)) {
409 		xfs_warn(mp, "sunit and swidth must be specified together");
410 		return EINVAL;
411 	}
412 
413 	if (dsunit && (dswidth % dsunit != 0)) {
414 		xfs_warn(mp,
415 	"stripe width (%d) must be a multiple of the stripe unit (%d)",
416 			dswidth, dsunit);
417 		return EINVAL;
418 	}
419 
420 done:
421 	if (!(mp->m_flags & XFS_MOUNT_NOALIGN)) {
422 		/*
423 		 * At this point the superblock has not been read
424 		 * in, therefore we do not know the block size.
425 		 * Before the mount call ends we will convert
426 		 * these to FSBs.
427 		 */
428 		if (dsunit) {
429 			mp->m_dalign = dsunit;
430 			mp->m_flags |= XFS_MOUNT_RETERR;
431 		}
432 
433 		if (dswidth)
434 			mp->m_swidth = dswidth;
435 	}
436 
437 	if (mp->m_logbufs != -1 &&
438 	    mp->m_logbufs != 0 &&
439 	    (mp->m_logbufs < XLOG_MIN_ICLOGS ||
440 	     mp->m_logbufs > XLOG_MAX_ICLOGS)) {
441 		xfs_warn(mp, "invalid logbufs value: %d [not %d-%d]",
442 			mp->m_logbufs, XLOG_MIN_ICLOGS, XLOG_MAX_ICLOGS);
443 		return XFS_ERROR(EINVAL);
444 	}
445 	if (mp->m_logbsize != -1 &&
446 	    mp->m_logbsize !=  0 &&
447 	    (mp->m_logbsize < XLOG_MIN_RECORD_BSIZE ||
448 	     mp->m_logbsize > XLOG_MAX_RECORD_BSIZE ||
449 	     !is_power_of_2(mp->m_logbsize))) {
450 		xfs_warn(mp,
451 			"invalid logbufsize: %d [not 16k,32k,64k,128k or 256k]",
452 			mp->m_logbsize);
453 		return XFS_ERROR(EINVAL);
454 	}
455 
456 	if (iosizelog) {
457 		if (iosizelog > XFS_MAX_IO_LOG ||
458 		    iosizelog < XFS_MIN_IO_LOG) {
459 			xfs_warn(mp, "invalid log iosize: %d [not %d-%d]",
460 				iosizelog, XFS_MIN_IO_LOG,
461 				XFS_MAX_IO_LOG);
462 			return XFS_ERROR(EINVAL);
463 		}
464 
465 		mp->m_flags |= XFS_MOUNT_DFLT_IOSIZE;
466 		mp->m_readio_log = iosizelog;
467 		mp->m_writeio_log = iosizelog;
468 	}
469 
470 	return 0;
471 }
472 
473 struct proc_xfs_info {
474 	int	flag;
475 	char	*str;
476 };
477 
478 STATIC int
479 xfs_showargs(
480 	struct xfs_mount	*mp,
481 	struct seq_file		*m)
482 {
483 	static struct proc_xfs_info xfs_info_set[] = {
484 		/* the few simple ones we can get from the mount struct */
485 		{ XFS_MOUNT_IKEEP,		"," MNTOPT_IKEEP },
486 		{ XFS_MOUNT_WSYNC,		"," MNTOPT_WSYNC },
487 		{ XFS_MOUNT_NOALIGN,		"," MNTOPT_NOALIGN },
488 		{ XFS_MOUNT_SWALLOC,		"," MNTOPT_SWALLOC },
489 		{ XFS_MOUNT_NOUUID,		"," MNTOPT_NOUUID },
490 		{ XFS_MOUNT_NORECOVERY,		"," MNTOPT_NORECOVERY },
491 		{ XFS_MOUNT_ATTR2,		"," MNTOPT_ATTR2 },
492 		{ XFS_MOUNT_FILESTREAMS,	"," MNTOPT_FILESTREAM },
493 		{ XFS_MOUNT_GRPID,		"," MNTOPT_GRPID },
494 		{ XFS_MOUNT_DISCARD,		"," MNTOPT_DISCARD },
495 		{ 0, NULL }
496 	};
497 	static struct proc_xfs_info xfs_info_unset[] = {
498 		/* the few simple ones we can get from the mount struct */
499 		{ XFS_MOUNT_COMPAT_IOSIZE,	"," MNTOPT_LARGEIO },
500 		{ XFS_MOUNT_BARRIER,		"," MNTOPT_NOBARRIER },
501 		{ XFS_MOUNT_SMALL_INUMS,	"," MNTOPT_64BITINODE },
502 		{ 0, NULL }
503 	};
504 	struct proc_xfs_info	*xfs_infop;
505 
506 	for (xfs_infop = xfs_info_set; xfs_infop->flag; xfs_infop++) {
507 		if (mp->m_flags & xfs_infop->flag)
508 			seq_puts(m, xfs_infop->str);
509 	}
510 	for (xfs_infop = xfs_info_unset; xfs_infop->flag; xfs_infop++) {
511 		if (!(mp->m_flags & xfs_infop->flag))
512 			seq_puts(m, xfs_infop->str);
513 	}
514 
515 	if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
516 		seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
517 				(int)(1 << mp->m_writeio_log) >> 10);
518 
519 	if (mp->m_logbufs > 0)
520 		seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
521 	if (mp->m_logbsize > 0)
522 		seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
523 
524 	if (mp->m_logname)
525 		seq_printf(m, "," MNTOPT_LOGDEV "=%s", mp->m_logname);
526 	if (mp->m_rtname)
527 		seq_printf(m, "," MNTOPT_RTDEV "=%s", mp->m_rtname);
528 
529 	if (mp->m_dalign > 0)
530 		seq_printf(m, "," MNTOPT_SUNIT "=%d",
531 				(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
532 	if (mp->m_swidth > 0)
533 		seq_printf(m, "," MNTOPT_SWIDTH "=%d",
534 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
535 
536 	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
537 		seq_puts(m, "," MNTOPT_USRQUOTA);
538 	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
539 		seq_puts(m, "," MNTOPT_UQUOTANOENF);
540 
541 	/* Either project or group quotas can be active, not both */
542 
543 	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
544 		if (mp->m_qflags & XFS_OQUOTA_ENFD)
545 			seq_puts(m, "," MNTOPT_PRJQUOTA);
546 		else
547 			seq_puts(m, "," MNTOPT_PQUOTANOENF);
548 	} else if (mp->m_qflags & XFS_GQUOTA_ACCT) {
549 		if (mp->m_qflags & XFS_OQUOTA_ENFD)
550 			seq_puts(m, "," MNTOPT_GRPQUOTA);
551 		else
552 			seq_puts(m, "," MNTOPT_GQUOTANOENF);
553 	}
554 
555 	if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
556 		seq_puts(m, "," MNTOPT_NOQUOTA);
557 
558 	return 0;
559 }
560 __uint64_t
561 xfs_max_file_offset(
562 	unsigned int		blockshift)
563 {
564 	unsigned int		pagefactor = 1;
565 	unsigned int		bitshift = BITS_PER_LONG - 1;
566 
567 	/* Figure out maximum filesize, on Linux this can depend on
568 	 * the filesystem blocksize (on 32 bit platforms).
569 	 * __block_write_begin does this in an [unsigned] long...
570 	 *      page->index << (PAGE_CACHE_SHIFT - bbits)
571 	 * So, for page sized blocks (4K on 32 bit platforms),
572 	 * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
573 	 *      (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
574 	 * but for smaller blocksizes it is less (bbits = log2 bsize).
575 	 * Note1: get_block_t takes a long (implicit cast from above)
576 	 * Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
577 	 * can optionally convert the [unsigned] long from above into
578 	 * an [unsigned] long long.
579 	 */
580 
581 #if BITS_PER_LONG == 32
582 # if defined(CONFIG_LBDAF)
583 	ASSERT(sizeof(sector_t) == 8);
584 	pagefactor = PAGE_CACHE_SIZE;
585 	bitshift = BITS_PER_LONG;
586 # else
587 	pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
588 # endif
589 #endif
590 
591 	return (((__uint64_t)pagefactor) << bitshift) - 1;
592 }
593 
594 STATIC int
595 xfs_blkdev_get(
596 	xfs_mount_t		*mp,
597 	const char		*name,
598 	struct block_device	**bdevp)
599 {
600 	int			error = 0;
601 
602 	*bdevp = blkdev_get_by_path(name, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
603 				    mp);
604 	if (IS_ERR(*bdevp)) {
605 		error = PTR_ERR(*bdevp);
606 		xfs_warn(mp, "Invalid device [%s], error=%d\n", name, error);
607 	}
608 
609 	return -error;
610 }
611 
612 STATIC void
613 xfs_blkdev_put(
614 	struct block_device	*bdev)
615 {
616 	if (bdev)
617 		blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
618 }
619 
620 void
621 xfs_blkdev_issue_flush(
622 	xfs_buftarg_t		*buftarg)
623 {
624 	blkdev_issue_flush(buftarg->bt_bdev, GFP_NOFS, NULL);
625 }
626 
627 STATIC void
628 xfs_close_devices(
629 	struct xfs_mount	*mp)
630 {
631 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
632 		struct block_device *logdev = mp->m_logdev_targp->bt_bdev;
633 		xfs_free_buftarg(mp, mp->m_logdev_targp);
634 		xfs_blkdev_put(logdev);
635 	}
636 	if (mp->m_rtdev_targp) {
637 		struct block_device *rtdev = mp->m_rtdev_targp->bt_bdev;
638 		xfs_free_buftarg(mp, mp->m_rtdev_targp);
639 		xfs_blkdev_put(rtdev);
640 	}
641 	xfs_free_buftarg(mp, mp->m_ddev_targp);
642 }
643 
644 /*
645  * The file system configurations are:
646  *	(1) device (partition) with data and internal log
647  *	(2) logical volume with data and log subvolumes.
648  *	(3) logical volume with data, log, and realtime subvolumes.
649  *
650  * We only have to handle opening the log and realtime volumes here if
651  * they are present.  The data subvolume has already been opened by
652  * get_sb_bdev() and is stored in sb->s_bdev.
653  */
654 STATIC int
655 xfs_open_devices(
656 	struct xfs_mount	*mp)
657 {
658 	struct block_device	*ddev = mp->m_super->s_bdev;
659 	struct block_device	*logdev = NULL, *rtdev = NULL;
660 	int			error;
661 
662 	/*
663 	 * Open real time and log devices - order is important.
664 	 */
665 	if (mp->m_logname) {
666 		error = xfs_blkdev_get(mp, mp->m_logname, &logdev);
667 		if (error)
668 			goto out;
669 	}
670 
671 	if (mp->m_rtname) {
672 		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev);
673 		if (error)
674 			goto out_close_logdev;
675 
676 		if (rtdev == ddev || rtdev == logdev) {
677 			xfs_warn(mp,
678 	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
679 			error = EINVAL;
680 			goto out_close_rtdev;
681 		}
682 	}
683 
684 	/*
685 	 * Setup xfs_mount buffer target pointers
686 	 */
687 	error = ENOMEM;
688 	mp->m_ddev_targp = xfs_alloc_buftarg(mp, ddev, 0, mp->m_fsname);
689 	if (!mp->m_ddev_targp)
690 		goto out_close_rtdev;
691 
692 	if (rtdev) {
693 		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev, 1,
694 							mp->m_fsname);
695 		if (!mp->m_rtdev_targp)
696 			goto out_free_ddev_targ;
697 	}
698 
699 	if (logdev && logdev != ddev) {
700 		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev, 1,
701 							mp->m_fsname);
702 		if (!mp->m_logdev_targp)
703 			goto out_free_rtdev_targ;
704 	} else {
705 		mp->m_logdev_targp = mp->m_ddev_targp;
706 	}
707 
708 	return 0;
709 
710  out_free_rtdev_targ:
711 	if (mp->m_rtdev_targp)
712 		xfs_free_buftarg(mp, mp->m_rtdev_targp);
713  out_free_ddev_targ:
714 	xfs_free_buftarg(mp, mp->m_ddev_targp);
715  out_close_rtdev:
716 	if (rtdev)
717 		xfs_blkdev_put(rtdev);
718  out_close_logdev:
719 	if (logdev && logdev != ddev)
720 		xfs_blkdev_put(logdev);
721  out:
722 	return error;
723 }
724 
725 /*
726  * Setup xfs_mount buffer target pointers based on superblock
727  */
728 STATIC int
729 xfs_setup_devices(
730 	struct xfs_mount	*mp)
731 {
732 	int			error;
733 
734 	error = xfs_setsize_buftarg(mp->m_ddev_targp, mp->m_sb.sb_blocksize,
735 				    mp->m_sb.sb_sectsize);
736 	if (error)
737 		return error;
738 
739 	if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp) {
740 		unsigned int	log_sector_size = BBSIZE;
741 
742 		if (xfs_sb_version_hassector(&mp->m_sb))
743 			log_sector_size = mp->m_sb.sb_logsectsize;
744 		error = xfs_setsize_buftarg(mp->m_logdev_targp,
745 					    mp->m_sb.sb_blocksize,
746 					    log_sector_size);
747 		if (error)
748 			return error;
749 	}
750 	if (mp->m_rtdev_targp) {
751 		error = xfs_setsize_buftarg(mp->m_rtdev_targp,
752 					    mp->m_sb.sb_blocksize,
753 					    mp->m_sb.sb_sectsize);
754 		if (error)
755 			return error;
756 	}
757 
758 	return 0;
759 }
760 
761 STATIC int
762 xfs_init_mount_workqueues(
763 	struct xfs_mount	*mp)
764 {
765 	mp->m_data_workqueue = alloc_workqueue("xfs-data/%s",
766 			WQ_MEM_RECLAIM, 0, mp->m_fsname);
767 	if (!mp->m_data_workqueue)
768 		goto out;
769 
770 	mp->m_unwritten_workqueue = alloc_workqueue("xfs-conv/%s",
771 			WQ_MEM_RECLAIM, 0, mp->m_fsname);
772 	if (!mp->m_unwritten_workqueue)
773 		goto out_destroy_data_iodone_queue;
774 
775 	mp->m_cil_workqueue = alloc_workqueue("xfs-cil/%s",
776 			WQ_MEM_RECLAIM, 0, mp->m_fsname);
777 	if (!mp->m_cil_workqueue)
778 		goto out_destroy_unwritten;
779 	return 0;
780 
781 out_destroy_unwritten:
782 	destroy_workqueue(mp->m_unwritten_workqueue);
783 out_destroy_data_iodone_queue:
784 	destroy_workqueue(mp->m_data_workqueue);
785 out:
786 	return -ENOMEM;
787 }
788 
789 STATIC void
790 xfs_destroy_mount_workqueues(
791 	struct xfs_mount	*mp)
792 {
793 	destroy_workqueue(mp->m_cil_workqueue);
794 	destroy_workqueue(mp->m_data_workqueue);
795 	destroy_workqueue(mp->m_unwritten_workqueue);
796 }
797 
798 /* Catch misguided souls that try to use this interface on XFS */
799 STATIC struct inode *
800 xfs_fs_alloc_inode(
801 	struct super_block	*sb)
802 {
803 	BUG();
804 	return NULL;
805 }
806 
807 /*
808  * Now that the generic code is guaranteed not to be accessing
809  * the linux inode, we can reclaim the inode.
810  */
811 STATIC void
812 xfs_fs_destroy_inode(
813 	struct inode		*inode)
814 {
815 	struct xfs_inode	*ip = XFS_I(inode);
816 
817 	trace_xfs_destroy_inode(ip);
818 
819 	XFS_STATS_INC(vn_reclaim);
820 
821 	/* bad inode, get out here ASAP */
822 	if (is_bad_inode(inode))
823 		goto out_reclaim;
824 
825 	ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
826 
827 	/*
828 	 * We should never get here with one of the reclaim flags already set.
829 	 */
830 	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIMABLE));
831 	ASSERT_ALWAYS(!xfs_iflags_test(ip, XFS_IRECLAIM));
832 
833 	/*
834 	 * We always use background reclaim here because even if the
835 	 * inode is clean, it still may be under IO and hence we have
836 	 * to take the flush lock. The background reclaim path handles
837 	 * this more efficiently than we can here, so simply let background
838 	 * reclaim tear down all inodes.
839 	 */
840 out_reclaim:
841 	xfs_inode_set_reclaim_tag(ip);
842 }
843 
844 /*
845  * Slab object creation initialisation for the XFS inode.
846  * This covers only the idempotent fields in the XFS inode;
847  * all other fields need to be initialised on allocation
848  * from the slab. This avoids the need to repeatedly initialise
849  * fields in the xfs inode that left in the initialise state
850  * when freeing the inode.
851  */
852 STATIC void
853 xfs_fs_inode_init_once(
854 	void			*inode)
855 {
856 	struct xfs_inode	*ip = inode;
857 
858 	memset(ip, 0, sizeof(struct xfs_inode));
859 
860 	/* vfs inode */
861 	inode_init_once(VFS_I(ip));
862 
863 	/* xfs inode */
864 	atomic_set(&ip->i_pincount, 0);
865 	spin_lock_init(&ip->i_flags_lock);
866 
867 	mrlock_init(&ip->i_lock, MRLOCK_ALLOW_EQUAL_PRI|MRLOCK_BARRIER,
868 		     "xfsino", ip->i_ino);
869 }
870 
871 STATIC void
872 xfs_fs_evict_inode(
873 	struct inode		*inode)
874 {
875 	xfs_inode_t		*ip = XFS_I(inode);
876 
877 	ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
878 
879 	trace_xfs_evict_inode(ip);
880 
881 	truncate_inode_pages(&inode->i_data, 0);
882 	clear_inode(inode);
883 	XFS_STATS_INC(vn_rele);
884 	XFS_STATS_INC(vn_remove);
885 	XFS_STATS_DEC(vn_active);
886 
887 	xfs_inactive(ip);
888 }
889 
890 /*
891  * We do an unlocked check for XFS_IDONTCACHE here because we are already
892  * serialised against cache hits here via the inode->i_lock and igrab() in
893  * xfs_iget_cache_hit(). Hence a lookup that might clear this flag will not be
894  * racing with us, and it avoids needing to grab a spinlock here for every inode
895  * we drop the final reference on.
896  */
897 STATIC int
898 xfs_fs_drop_inode(
899 	struct inode		*inode)
900 {
901 	struct xfs_inode	*ip = XFS_I(inode);
902 
903 	return generic_drop_inode(inode) || (ip->i_flags & XFS_IDONTCACHE);
904 }
905 
906 STATIC void
907 xfs_free_fsname(
908 	struct xfs_mount	*mp)
909 {
910 	kfree(mp->m_fsname);
911 	kfree(mp->m_rtname);
912 	kfree(mp->m_logname);
913 }
914 
915 STATIC void
916 xfs_fs_put_super(
917 	struct super_block	*sb)
918 {
919 	struct xfs_mount	*mp = XFS_M(sb);
920 
921 	xfs_filestream_unmount(mp);
922 	xfs_unmountfs(mp);
923 	xfs_syncd_stop(mp);
924 	xfs_freesb(mp);
925 	xfs_icsb_destroy_counters(mp);
926 	xfs_destroy_mount_workqueues(mp);
927 	xfs_close_devices(mp);
928 	xfs_free_fsname(mp);
929 	kfree(mp);
930 }
931 
932 STATIC int
933 xfs_fs_sync_fs(
934 	struct super_block	*sb,
935 	int			wait)
936 {
937 	struct xfs_mount	*mp = XFS_M(sb);
938 	int			error;
939 
940 	/*
941 	 * Doing anything during the async pass would be counterproductive.
942 	 */
943 	if (!wait)
944 		return 0;
945 
946 	error = xfs_quiesce_data(mp);
947 	if (error)
948 		return -error;
949 
950 	if (laptop_mode) {
951 		/*
952 		 * The disk must be active because we're syncing.
953 		 * We schedule xfssyncd now (now that the disk is
954 		 * active) instead of later (when it might not be).
955 		 */
956 		flush_delayed_work_sync(&mp->m_sync_work);
957 	}
958 
959 	return 0;
960 }
961 
962 STATIC int
963 xfs_fs_statfs(
964 	struct dentry		*dentry,
965 	struct kstatfs		*statp)
966 {
967 	struct xfs_mount	*mp = XFS_M(dentry->d_sb);
968 	xfs_sb_t		*sbp = &mp->m_sb;
969 	struct xfs_inode	*ip = XFS_I(dentry->d_inode);
970 	__uint64_t		fakeinos, id;
971 	xfs_extlen_t		lsize;
972 	__int64_t		ffree;
973 
974 	statp->f_type = XFS_SB_MAGIC;
975 	statp->f_namelen = MAXNAMELEN - 1;
976 
977 	id = huge_encode_dev(mp->m_ddev_targp->bt_dev);
978 	statp->f_fsid.val[0] = (u32)id;
979 	statp->f_fsid.val[1] = (u32)(id >> 32);
980 
981 	xfs_icsb_sync_counters(mp, XFS_ICSB_LAZY_COUNT);
982 
983 	spin_lock(&mp->m_sb_lock);
984 	statp->f_bsize = sbp->sb_blocksize;
985 	lsize = sbp->sb_logstart ? sbp->sb_logblocks : 0;
986 	statp->f_blocks = sbp->sb_dblocks - lsize;
987 	statp->f_bfree = statp->f_bavail =
988 				sbp->sb_fdblocks - XFS_ALLOC_SET_ASIDE(mp);
989 	fakeinos = statp->f_bfree << sbp->sb_inopblog;
990 	statp->f_files =
991 	    MIN(sbp->sb_icount + fakeinos, (__uint64_t)XFS_MAXINUMBER);
992 	if (mp->m_maxicount)
993 		statp->f_files = min_t(typeof(statp->f_files),
994 					statp->f_files,
995 					mp->m_maxicount);
996 
997 	/* make sure statp->f_ffree does not underflow */
998 	ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
999 	statp->f_ffree = max_t(__int64_t, ffree, 0);
1000 
1001 	spin_unlock(&mp->m_sb_lock);
1002 
1003 	if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) &&
1004 	    ((mp->m_qflags & (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))) ==
1005 			      (XFS_PQUOTA_ACCT|XFS_OQUOTA_ENFD))
1006 		xfs_qm_statvfs(ip, statp);
1007 	return 0;
1008 }
1009 
1010 STATIC void
1011 xfs_save_resvblks(struct xfs_mount *mp)
1012 {
1013 	__uint64_t resblks = 0;
1014 
1015 	mp->m_resblks_save = mp->m_resblks;
1016 	xfs_reserve_blocks(mp, &resblks, NULL);
1017 }
1018 
1019 STATIC void
1020 xfs_restore_resvblks(struct xfs_mount *mp)
1021 {
1022 	__uint64_t resblks;
1023 
1024 	if (mp->m_resblks_save) {
1025 		resblks = mp->m_resblks_save;
1026 		mp->m_resblks_save = 0;
1027 	} else
1028 		resblks = xfs_default_resblks(mp);
1029 
1030 	xfs_reserve_blocks(mp, &resblks, NULL);
1031 }
1032 
1033 STATIC int
1034 xfs_fs_remount(
1035 	struct super_block	*sb,
1036 	int			*flags,
1037 	char			*options)
1038 {
1039 	struct xfs_mount	*mp = XFS_M(sb);
1040 	substring_t		args[MAX_OPT_ARGS];
1041 	char			*p;
1042 	int			error;
1043 
1044 	while ((p = strsep(&options, ",")) != NULL) {
1045 		int token;
1046 
1047 		if (!*p)
1048 			continue;
1049 
1050 		token = match_token(p, tokens, args);
1051 		switch (token) {
1052 		case Opt_barrier:
1053 			mp->m_flags |= XFS_MOUNT_BARRIER;
1054 			break;
1055 		case Opt_nobarrier:
1056 			mp->m_flags &= ~XFS_MOUNT_BARRIER;
1057 			break;
1058 		default:
1059 			/*
1060 			 * Logically we would return an error here to prevent
1061 			 * users from believing they might have changed
1062 			 * mount options using remount which can't be changed.
1063 			 *
1064 			 * But unfortunately mount(8) adds all options from
1065 			 * mtab and fstab to the mount arguments in some cases
1066 			 * so we can't blindly reject options, but have to
1067 			 * check for each specified option if it actually
1068 			 * differs from the currently set option and only
1069 			 * reject it if that's the case.
1070 			 *
1071 			 * Until that is implemented we return success for
1072 			 * every remount request, and silently ignore all
1073 			 * options that we can't actually change.
1074 			 */
1075 #if 0
1076 			xfs_info(mp,
1077 		"mount option \"%s\" not supported for remount\n", p);
1078 			return -EINVAL;
1079 #else
1080 			break;
1081 #endif
1082 		}
1083 	}
1084 
1085 	/* ro -> rw */
1086 	if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & MS_RDONLY)) {
1087 		mp->m_flags &= ~XFS_MOUNT_RDONLY;
1088 
1089 		/*
1090 		 * If this is the first remount to writeable state we
1091 		 * might have some superblock changes to update.
1092 		 */
1093 		if (mp->m_update_flags) {
1094 			error = xfs_mount_log_sb(mp, mp->m_update_flags);
1095 			if (error) {
1096 				xfs_warn(mp, "failed to write sb changes");
1097 				return error;
1098 			}
1099 			mp->m_update_flags = 0;
1100 		}
1101 
1102 		/*
1103 		 * Fill out the reserve pool if it is empty. Use the stashed
1104 		 * value if it is non-zero, otherwise go with the default.
1105 		 */
1106 		xfs_restore_resvblks(mp);
1107 	}
1108 
1109 	/* rw -> ro */
1110 	if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & MS_RDONLY)) {
1111 		/*
1112 		 * After we have synced the data but before we sync the
1113 		 * metadata, we need to free up the reserve block pool so that
1114 		 * the used block count in the superblock on disk is correct at
1115 		 * the end of the remount. Stash the current reserve pool size
1116 		 * so that if we get remounted rw, we can return it to the same
1117 		 * size.
1118 		 */
1119 
1120 		xfs_quiesce_data(mp);
1121 		xfs_save_resvblks(mp);
1122 		xfs_quiesce_attr(mp);
1123 		mp->m_flags |= XFS_MOUNT_RDONLY;
1124 	}
1125 
1126 	return 0;
1127 }
1128 
1129 /*
1130  * Second stage of a freeze. The data is already frozen so we only
1131  * need to take care of the metadata. Once that's done write a dummy
1132  * record to dirty the log in case of a crash while frozen.
1133  */
1134 STATIC int
1135 xfs_fs_freeze(
1136 	struct super_block	*sb)
1137 {
1138 	struct xfs_mount	*mp = XFS_M(sb);
1139 
1140 	xfs_save_resvblks(mp);
1141 	xfs_quiesce_attr(mp);
1142 	return -xfs_fs_log_dummy(mp);
1143 }
1144 
1145 STATIC int
1146 xfs_fs_unfreeze(
1147 	struct super_block	*sb)
1148 {
1149 	struct xfs_mount	*mp = XFS_M(sb);
1150 
1151 	xfs_restore_resvblks(mp);
1152 	return 0;
1153 }
1154 
1155 STATIC int
1156 xfs_fs_show_options(
1157 	struct seq_file		*m,
1158 	struct dentry		*root)
1159 {
1160 	return -xfs_showargs(XFS_M(root->d_sb), m);
1161 }
1162 
1163 /*
1164  * This function fills in xfs_mount_t fields based on mount args.
1165  * Note: the superblock _has_ now been read in.
1166  */
1167 STATIC int
1168 xfs_finish_flags(
1169 	struct xfs_mount	*mp)
1170 {
1171 	int			ronly = (mp->m_flags & XFS_MOUNT_RDONLY);
1172 
1173 	/* Fail a mount where the logbuf is smaller than the log stripe */
1174 	if (xfs_sb_version_haslogv2(&mp->m_sb)) {
1175 		if (mp->m_logbsize <= 0 &&
1176 		    mp->m_sb.sb_logsunit > XLOG_BIG_RECORD_BSIZE) {
1177 			mp->m_logbsize = mp->m_sb.sb_logsunit;
1178 		} else if (mp->m_logbsize > 0 &&
1179 			   mp->m_logbsize < mp->m_sb.sb_logsunit) {
1180 			xfs_warn(mp,
1181 		"logbuf size must be greater than or equal to log stripe size");
1182 			return XFS_ERROR(EINVAL);
1183 		}
1184 	} else {
1185 		/* Fail a mount if the logbuf is larger than 32K */
1186 		if (mp->m_logbsize > XLOG_BIG_RECORD_BSIZE) {
1187 			xfs_warn(mp,
1188 		"logbuf size for version 1 logs must be 16K or 32K");
1189 			return XFS_ERROR(EINVAL);
1190 		}
1191 	}
1192 
1193 	/*
1194 	 * mkfs'ed attr2 will turn on attr2 mount unless explicitly
1195 	 * told by noattr2 to turn it off
1196 	 */
1197 	if (xfs_sb_version_hasattr2(&mp->m_sb) &&
1198 	    !(mp->m_flags & XFS_MOUNT_NOATTR2))
1199 		mp->m_flags |= XFS_MOUNT_ATTR2;
1200 
1201 	/*
1202 	 * prohibit r/w mounts of read-only filesystems
1203 	 */
1204 	if ((mp->m_sb.sb_flags & XFS_SBF_READONLY) && !ronly) {
1205 		xfs_warn(mp,
1206 			"cannot mount a read-only filesystem as read-write");
1207 		return XFS_ERROR(EROFS);
1208 	}
1209 
1210 	return 0;
1211 }
1212 
1213 STATIC int
1214 xfs_fs_fill_super(
1215 	struct super_block	*sb,
1216 	void			*data,
1217 	int			silent)
1218 {
1219 	struct inode		*root;
1220 	struct xfs_mount	*mp = NULL;
1221 	int			flags = 0, error = ENOMEM;
1222 
1223 	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL);
1224 	if (!mp)
1225 		goto out;
1226 
1227 	spin_lock_init(&mp->m_sb_lock);
1228 	mutex_init(&mp->m_growlock);
1229 	atomic_set(&mp->m_active_trans, 0);
1230 
1231 	mp->m_super = sb;
1232 	sb->s_fs_info = mp;
1233 
1234 	error = xfs_parseargs(mp, (char *)data);
1235 	if (error)
1236 		goto out_free_fsname;
1237 
1238 	sb_min_blocksize(sb, BBSIZE);
1239 	sb->s_xattr = xfs_xattr_handlers;
1240 	sb->s_export_op = &xfs_export_operations;
1241 #ifdef CONFIG_XFS_QUOTA
1242 	sb->s_qcop = &xfs_quotactl_operations;
1243 #endif
1244 	sb->s_op = &xfs_super_operations;
1245 
1246 	if (silent)
1247 		flags |= XFS_MFSI_QUIET;
1248 
1249 	error = xfs_open_devices(mp);
1250 	if (error)
1251 		goto out_free_fsname;
1252 
1253 	error = xfs_init_mount_workqueues(mp);
1254 	if (error)
1255 		goto out_close_devices;
1256 
1257 	error = xfs_icsb_init_counters(mp);
1258 	if (error)
1259 		goto out_destroy_workqueues;
1260 
1261 	error = xfs_readsb(mp, flags);
1262 	if (error)
1263 		goto out_destroy_counters;
1264 
1265 	error = xfs_finish_flags(mp);
1266 	if (error)
1267 		goto out_free_sb;
1268 
1269 	error = xfs_setup_devices(mp);
1270 	if (error)
1271 		goto out_free_sb;
1272 
1273 	error = xfs_filestream_mount(mp);
1274 	if (error)
1275 		goto out_free_sb;
1276 
1277 	/*
1278 	 * we must configure the block size in the superblock before we run the
1279 	 * full mount process as the mount process can lookup and cache inodes.
1280 	 * For the same reason we must also initialise the syncd and register
1281 	 * the inode cache shrinker so that inodes can be reclaimed during
1282 	 * operations like a quotacheck that iterate all inodes in the
1283 	 * filesystem.
1284 	 */
1285 	sb->s_magic = XFS_SB_MAGIC;
1286 	sb->s_blocksize = mp->m_sb.sb_blocksize;
1287 	sb->s_blocksize_bits = ffs(sb->s_blocksize) - 1;
1288 	sb->s_maxbytes = xfs_max_file_offset(sb->s_blocksize_bits);
1289 	sb->s_max_links = XFS_MAXLINK;
1290 	sb->s_time_gran = 1;
1291 	set_posix_acl_flag(sb);
1292 
1293 	error = xfs_syncd_init(mp);
1294 	if (error)
1295 		goto out_filestream_unmount;
1296 
1297 	error = xfs_mountfs(mp);
1298 	if (error)
1299 		goto out_syncd_stop;
1300 
1301 	root = igrab(VFS_I(mp->m_rootip));
1302 	if (!root) {
1303 		error = ENOENT;
1304 		goto out_unmount;
1305 	}
1306 	if (is_bad_inode(root)) {
1307 		error = EINVAL;
1308 		goto out_unmount;
1309 	}
1310 	sb->s_root = d_make_root(root);
1311 	if (!sb->s_root) {
1312 		error = ENOMEM;
1313 		goto out_unmount;
1314 	}
1315 
1316 	return 0;
1317  out_syncd_stop:
1318 	xfs_syncd_stop(mp);
1319  out_filestream_unmount:
1320 	xfs_filestream_unmount(mp);
1321  out_free_sb:
1322 	xfs_freesb(mp);
1323  out_destroy_counters:
1324 	xfs_icsb_destroy_counters(mp);
1325 out_destroy_workqueues:
1326 	xfs_destroy_mount_workqueues(mp);
1327  out_close_devices:
1328 	xfs_close_devices(mp);
1329  out_free_fsname:
1330 	xfs_free_fsname(mp);
1331 	kfree(mp);
1332  out:
1333 	return -error;
1334 
1335  out_unmount:
1336 	xfs_filestream_unmount(mp);
1337 	xfs_unmountfs(mp);
1338 	xfs_syncd_stop(mp);
1339 	goto out_free_sb;
1340 }
1341 
1342 STATIC struct dentry *
1343 xfs_fs_mount(
1344 	struct file_system_type	*fs_type,
1345 	int			flags,
1346 	const char		*dev_name,
1347 	void			*data)
1348 {
1349 	return mount_bdev(fs_type, flags, dev_name, data, xfs_fs_fill_super);
1350 }
1351 
1352 static int
1353 xfs_fs_nr_cached_objects(
1354 	struct super_block	*sb)
1355 {
1356 	return xfs_reclaim_inodes_count(XFS_M(sb));
1357 }
1358 
1359 static void
1360 xfs_fs_free_cached_objects(
1361 	struct super_block	*sb,
1362 	int			nr_to_scan)
1363 {
1364 	xfs_reclaim_inodes_nr(XFS_M(sb), nr_to_scan);
1365 }
1366 
1367 static const struct super_operations xfs_super_operations = {
1368 	.alloc_inode		= xfs_fs_alloc_inode,
1369 	.destroy_inode		= xfs_fs_destroy_inode,
1370 	.evict_inode		= xfs_fs_evict_inode,
1371 	.drop_inode		= xfs_fs_drop_inode,
1372 	.put_super		= xfs_fs_put_super,
1373 	.sync_fs		= xfs_fs_sync_fs,
1374 	.freeze_fs		= xfs_fs_freeze,
1375 	.unfreeze_fs		= xfs_fs_unfreeze,
1376 	.statfs			= xfs_fs_statfs,
1377 	.remount_fs		= xfs_fs_remount,
1378 	.show_options		= xfs_fs_show_options,
1379 	.nr_cached_objects	= xfs_fs_nr_cached_objects,
1380 	.free_cached_objects	= xfs_fs_free_cached_objects,
1381 };
1382 
1383 static struct file_system_type xfs_fs_type = {
1384 	.owner			= THIS_MODULE,
1385 	.name			= "xfs",
1386 	.mount			= xfs_fs_mount,
1387 	.kill_sb		= kill_block_super,
1388 	.fs_flags		= FS_REQUIRES_DEV,
1389 };
1390 
1391 STATIC int __init
1392 xfs_init_zones(void)
1393 {
1394 
1395 	xfs_ioend_zone = kmem_zone_init(sizeof(xfs_ioend_t), "xfs_ioend");
1396 	if (!xfs_ioend_zone)
1397 		goto out;
1398 
1399 	xfs_ioend_pool = mempool_create_slab_pool(4 * MAX_BUF_PER_PAGE,
1400 						  xfs_ioend_zone);
1401 	if (!xfs_ioend_pool)
1402 		goto out_destroy_ioend_zone;
1403 
1404 	xfs_log_ticket_zone = kmem_zone_init(sizeof(xlog_ticket_t),
1405 						"xfs_log_ticket");
1406 	if (!xfs_log_ticket_zone)
1407 		goto out_destroy_ioend_pool;
1408 
1409 	xfs_bmap_free_item_zone = kmem_zone_init(sizeof(xfs_bmap_free_item_t),
1410 						"xfs_bmap_free_item");
1411 	if (!xfs_bmap_free_item_zone)
1412 		goto out_destroy_log_ticket_zone;
1413 
1414 	xfs_btree_cur_zone = kmem_zone_init(sizeof(xfs_btree_cur_t),
1415 						"xfs_btree_cur");
1416 	if (!xfs_btree_cur_zone)
1417 		goto out_destroy_bmap_free_item_zone;
1418 
1419 	xfs_da_state_zone = kmem_zone_init(sizeof(xfs_da_state_t),
1420 						"xfs_da_state");
1421 	if (!xfs_da_state_zone)
1422 		goto out_destroy_btree_cur_zone;
1423 
1424 	xfs_ifork_zone = kmem_zone_init(sizeof(xfs_ifork_t), "xfs_ifork");
1425 	if (!xfs_ifork_zone)
1426 		goto out_destroy_da_state_zone;
1427 
1428 	xfs_trans_zone = kmem_zone_init(sizeof(xfs_trans_t), "xfs_trans");
1429 	if (!xfs_trans_zone)
1430 		goto out_destroy_ifork_zone;
1431 
1432 	xfs_log_item_desc_zone =
1433 		kmem_zone_init(sizeof(struct xfs_log_item_desc),
1434 			       "xfs_log_item_desc");
1435 	if (!xfs_log_item_desc_zone)
1436 		goto out_destroy_trans_zone;
1437 
1438 	/*
1439 	 * The size of the zone allocated buf log item is the maximum
1440 	 * size possible under XFS.  This wastes a little bit of memory,
1441 	 * but it is much faster.
1442 	 */
1443 	xfs_buf_item_zone = kmem_zone_init(sizeof(struct xfs_buf_log_item),
1444 					   "xfs_buf_item");
1445 	if (!xfs_buf_item_zone)
1446 		goto out_destroy_log_item_desc_zone;
1447 
1448 	xfs_efd_zone = kmem_zone_init((sizeof(xfs_efd_log_item_t) +
1449 			((XFS_EFD_MAX_FAST_EXTENTS - 1) *
1450 				 sizeof(xfs_extent_t))), "xfs_efd_item");
1451 	if (!xfs_efd_zone)
1452 		goto out_destroy_buf_item_zone;
1453 
1454 	xfs_efi_zone = kmem_zone_init((sizeof(xfs_efi_log_item_t) +
1455 			((XFS_EFI_MAX_FAST_EXTENTS - 1) *
1456 				sizeof(xfs_extent_t))), "xfs_efi_item");
1457 	if (!xfs_efi_zone)
1458 		goto out_destroy_efd_zone;
1459 
1460 	xfs_inode_zone =
1461 		kmem_zone_init_flags(sizeof(xfs_inode_t), "xfs_inode",
1462 			KM_ZONE_HWALIGN | KM_ZONE_RECLAIM | KM_ZONE_SPREAD,
1463 			xfs_fs_inode_init_once);
1464 	if (!xfs_inode_zone)
1465 		goto out_destroy_efi_zone;
1466 
1467 	xfs_ili_zone =
1468 		kmem_zone_init_flags(sizeof(xfs_inode_log_item_t), "xfs_ili",
1469 					KM_ZONE_SPREAD, NULL);
1470 	if (!xfs_ili_zone)
1471 		goto out_destroy_inode_zone;
1472 
1473 	return 0;
1474 
1475  out_destroy_inode_zone:
1476 	kmem_zone_destroy(xfs_inode_zone);
1477  out_destroy_efi_zone:
1478 	kmem_zone_destroy(xfs_efi_zone);
1479  out_destroy_efd_zone:
1480 	kmem_zone_destroy(xfs_efd_zone);
1481  out_destroy_buf_item_zone:
1482 	kmem_zone_destroy(xfs_buf_item_zone);
1483  out_destroy_log_item_desc_zone:
1484 	kmem_zone_destroy(xfs_log_item_desc_zone);
1485  out_destroy_trans_zone:
1486 	kmem_zone_destroy(xfs_trans_zone);
1487  out_destroy_ifork_zone:
1488 	kmem_zone_destroy(xfs_ifork_zone);
1489  out_destroy_da_state_zone:
1490 	kmem_zone_destroy(xfs_da_state_zone);
1491  out_destroy_btree_cur_zone:
1492 	kmem_zone_destroy(xfs_btree_cur_zone);
1493  out_destroy_bmap_free_item_zone:
1494 	kmem_zone_destroy(xfs_bmap_free_item_zone);
1495  out_destroy_log_ticket_zone:
1496 	kmem_zone_destroy(xfs_log_ticket_zone);
1497  out_destroy_ioend_pool:
1498 	mempool_destroy(xfs_ioend_pool);
1499  out_destroy_ioend_zone:
1500 	kmem_zone_destroy(xfs_ioend_zone);
1501  out:
1502 	return -ENOMEM;
1503 }
1504 
1505 STATIC void
1506 xfs_destroy_zones(void)
1507 {
1508 	kmem_zone_destroy(xfs_ili_zone);
1509 	kmem_zone_destroy(xfs_inode_zone);
1510 	kmem_zone_destroy(xfs_efi_zone);
1511 	kmem_zone_destroy(xfs_efd_zone);
1512 	kmem_zone_destroy(xfs_buf_item_zone);
1513 	kmem_zone_destroy(xfs_log_item_desc_zone);
1514 	kmem_zone_destroy(xfs_trans_zone);
1515 	kmem_zone_destroy(xfs_ifork_zone);
1516 	kmem_zone_destroy(xfs_da_state_zone);
1517 	kmem_zone_destroy(xfs_btree_cur_zone);
1518 	kmem_zone_destroy(xfs_bmap_free_item_zone);
1519 	kmem_zone_destroy(xfs_log_ticket_zone);
1520 	mempool_destroy(xfs_ioend_pool);
1521 	kmem_zone_destroy(xfs_ioend_zone);
1522 
1523 }
1524 
1525 STATIC int __init
1526 xfs_init_workqueues(void)
1527 {
1528 	/*
1529 	 * We never want to the same work item to run twice, reclaiming inodes
1530 	 * or idling the log is not going to get any faster by multiple CPUs
1531 	 * competing for ressources.  Use the default large max_active value
1532 	 * so that even lots of filesystems can perform these task in parallel.
1533 	 */
1534 	xfs_syncd_wq = alloc_workqueue("xfssyncd", WQ_NON_REENTRANT, 0);
1535 	if (!xfs_syncd_wq)
1536 		return -ENOMEM;
1537 
1538 	/*
1539 	 * The allocation workqueue can be used in memory reclaim situations
1540 	 * (writepage path), and parallelism is only limited by the number of
1541 	 * AGs in all the filesystems mounted. Hence use the default large
1542 	 * max_active value for this workqueue.
1543 	 */
1544 	xfs_alloc_wq = alloc_workqueue("xfsalloc", WQ_MEM_RECLAIM, 0);
1545 	if (!xfs_alloc_wq)
1546 		goto out_destroy_syncd;
1547 
1548 	return 0;
1549 
1550 out_destroy_syncd:
1551 	destroy_workqueue(xfs_syncd_wq);
1552 	return -ENOMEM;
1553 }
1554 
1555 STATIC void
1556 xfs_destroy_workqueues(void)
1557 {
1558 	destroy_workqueue(xfs_alloc_wq);
1559 	destroy_workqueue(xfs_syncd_wq);
1560 }
1561 
1562 STATIC int __init
1563 init_xfs_fs(void)
1564 {
1565 	int			error;
1566 
1567 	printk(KERN_INFO XFS_VERSION_STRING " with "
1568 			 XFS_BUILD_OPTIONS " enabled\n");
1569 
1570 	xfs_dir_startup();
1571 
1572 	error = xfs_init_zones();
1573 	if (error)
1574 		goto out;
1575 
1576 	error = xfs_init_workqueues();
1577 	if (error)
1578 		goto out_destroy_zones;
1579 
1580 	error = xfs_mru_cache_init();
1581 	if (error)
1582 		goto out_destroy_wq;
1583 
1584 	error = xfs_filestream_init();
1585 	if (error)
1586 		goto out_mru_cache_uninit;
1587 
1588 	error = xfs_buf_init();
1589 	if (error)
1590 		goto out_filestream_uninit;
1591 
1592 	error = xfs_init_procfs();
1593 	if (error)
1594 		goto out_buf_terminate;
1595 
1596 	error = xfs_sysctl_register();
1597 	if (error)
1598 		goto out_cleanup_procfs;
1599 
1600 	error = xfs_qm_init();
1601 	if (error)
1602 		goto out_sysctl_unregister;
1603 
1604 	error = register_filesystem(&xfs_fs_type);
1605 	if (error)
1606 		goto out_qm_exit;
1607 	return 0;
1608 
1609  out_qm_exit:
1610 	xfs_qm_exit();
1611  out_sysctl_unregister:
1612 	xfs_sysctl_unregister();
1613  out_cleanup_procfs:
1614 	xfs_cleanup_procfs();
1615  out_buf_terminate:
1616 	xfs_buf_terminate();
1617  out_filestream_uninit:
1618 	xfs_filestream_uninit();
1619  out_mru_cache_uninit:
1620 	xfs_mru_cache_uninit();
1621  out_destroy_wq:
1622 	xfs_destroy_workqueues();
1623  out_destroy_zones:
1624 	xfs_destroy_zones();
1625  out:
1626 	return error;
1627 }
1628 
1629 STATIC void __exit
1630 exit_xfs_fs(void)
1631 {
1632 	xfs_qm_exit();
1633 	unregister_filesystem(&xfs_fs_type);
1634 	xfs_sysctl_unregister();
1635 	xfs_cleanup_procfs();
1636 	xfs_buf_terminate();
1637 	xfs_filestream_uninit();
1638 	xfs_mru_cache_uninit();
1639 	xfs_destroy_workqueues();
1640 	xfs_destroy_zones();
1641 }
1642 
1643 module_init(init_xfs_fs);
1644 module_exit(exit_xfs_fs);
1645 
1646 MODULE_AUTHOR("Silicon Graphics, Inc.");
1647 MODULE_DESCRIPTION(XFS_VERSION_STRING " with " XFS_BUILD_OPTIONS " enabled");
1648 MODULE_LICENSE("GPL");
1649