xref: /openbmc/linux/fs/xfs/xfs_qm.c (revision 4e5e4705)
1 /*
2  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_bit.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_inode.h"
29 #include "xfs_ialloc.h"
30 #include "xfs_itable.h"
31 #include "xfs_quota.h"
32 #include "xfs_error.h"
33 #include "xfs_bmap.h"
34 #include "xfs_bmap_btree.h"
35 #include "xfs_trans.h"
36 #include "xfs_trans_space.h"
37 #include "xfs_qm.h"
38 #include "xfs_trace.h"
39 #include "xfs_icache.h"
40 #include "xfs_cksum.h"
41 #include "xfs_dinode.h"
42 
43 /*
44  * The global quota manager. There is only one of these for the entire
45  * system, _not_ one per file system. XQM keeps track of the overall
46  * quota functionality, including maintaining the freelist and hash
47  * tables of dquots.
48  */
49 STATIC int	xfs_qm_init_quotainos(xfs_mount_t *);
50 STATIC int	xfs_qm_init_quotainfo(xfs_mount_t *);
51 
52 
53 STATIC void	xfs_qm_dqfree_one(struct xfs_dquot *dqp);
54 /*
55  * We use the batch lookup interface to iterate over the dquots as it
56  * currently is the only interface into the radix tree code that allows
57  * fuzzy lookups instead of exact matches.  Holding the lock over multiple
58  * operations is fine as all callers are used either during mount/umount
59  * or quotaoff.
60  */
61 #define XFS_DQ_LOOKUP_BATCH	32
62 
63 STATIC int
64 xfs_qm_dquot_walk(
65 	struct xfs_mount	*mp,
66 	int			type,
67 	int			(*execute)(struct xfs_dquot *dqp, void *data),
68 	void			*data)
69 {
70 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
71 	struct radix_tree_root	*tree = xfs_dquot_tree(qi, type);
72 	uint32_t		next_index;
73 	int			last_error = 0;
74 	int			skipped;
75 	int			nr_found;
76 
77 restart:
78 	skipped = 0;
79 	next_index = 0;
80 	nr_found = 0;
81 
82 	while (1) {
83 		struct xfs_dquot *batch[XFS_DQ_LOOKUP_BATCH];
84 		int		error = 0;
85 		int		i;
86 
87 		mutex_lock(&qi->qi_tree_lock);
88 		nr_found = radix_tree_gang_lookup(tree, (void **)batch,
89 					next_index, XFS_DQ_LOOKUP_BATCH);
90 		if (!nr_found) {
91 			mutex_unlock(&qi->qi_tree_lock);
92 			break;
93 		}
94 
95 		for (i = 0; i < nr_found; i++) {
96 			struct xfs_dquot *dqp = batch[i];
97 
98 			next_index = be32_to_cpu(dqp->q_core.d_id) + 1;
99 
100 			error = execute(batch[i], data);
101 			if (error == EAGAIN) {
102 				skipped++;
103 				continue;
104 			}
105 			if (error && last_error != EFSCORRUPTED)
106 				last_error = error;
107 		}
108 
109 		mutex_unlock(&qi->qi_tree_lock);
110 
111 		/* bail out if the filesystem is corrupted.  */
112 		if (last_error == EFSCORRUPTED) {
113 			skipped = 0;
114 			break;
115 		}
116 	}
117 
118 	if (skipped) {
119 		delay(1);
120 		goto restart;
121 	}
122 
123 	return last_error;
124 }
125 
126 
127 /*
128  * Purge a dquot from all tracking data structures and free it.
129  */
130 STATIC int
131 xfs_qm_dqpurge(
132 	struct xfs_dquot	*dqp,
133 	void			*data)
134 {
135 	struct xfs_mount	*mp = dqp->q_mount;
136 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
137 	struct xfs_dquot	*gdqp = NULL;
138 	struct xfs_dquot	*pdqp = NULL;
139 
140 	xfs_dqlock(dqp);
141 	if ((dqp->dq_flags & XFS_DQ_FREEING) || dqp->q_nrefs != 0) {
142 		xfs_dqunlock(dqp);
143 		return EAGAIN;
144 	}
145 
146 	/*
147 	 * If this quota has a hint attached, prepare for releasing it now.
148 	 */
149 	gdqp = dqp->q_gdquot;
150 	if (gdqp) {
151 		xfs_dqlock(gdqp);
152 		dqp->q_gdquot = NULL;
153 	}
154 
155 	pdqp = dqp->q_pdquot;
156 	if (pdqp) {
157 		xfs_dqlock(pdqp);
158 		dqp->q_pdquot = NULL;
159 	}
160 
161 	dqp->dq_flags |= XFS_DQ_FREEING;
162 
163 	xfs_dqflock(dqp);
164 
165 	/*
166 	 * If we are turning this type of quotas off, we don't care
167 	 * about the dirty metadata sitting in this dquot. OTOH, if
168 	 * we're unmounting, we do care, so we flush it and wait.
169 	 */
170 	if (XFS_DQ_IS_DIRTY(dqp)) {
171 		struct xfs_buf	*bp = NULL;
172 		int		error;
173 
174 		/*
175 		 * We don't care about getting disk errors here. We need
176 		 * to purge this dquot anyway, so we go ahead regardless.
177 		 */
178 		error = xfs_qm_dqflush(dqp, &bp);
179 		if (error) {
180 			xfs_warn(mp, "%s: dquot %p flush failed",
181 				__func__, dqp);
182 		} else {
183 			error = xfs_bwrite(bp);
184 			xfs_buf_relse(bp);
185 		}
186 		xfs_dqflock(dqp);
187 	}
188 
189 	ASSERT(atomic_read(&dqp->q_pincount) == 0);
190 	ASSERT(XFS_FORCED_SHUTDOWN(mp) ||
191 	       !(dqp->q_logitem.qli_item.li_flags & XFS_LI_IN_AIL));
192 
193 	xfs_dqfunlock(dqp);
194 	xfs_dqunlock(dqp);
195 
196 	radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags),
197 			  be32_to_cpu(dqp->q_core.d_id));
198 	qi->qi_dquots--;
199 
200 	/*
201 	 * We move dquots to the freelist as soon as their reference count
202 	 * hits zero, so it really should be on the freelist here.
203 	 */
204 	ASSERT(!list_empty(&dqp->q_lru));
205 	list_lru_del(&qi->qi_lru, &dqp->q_lru);
206 	XFS_STATS_DEC(xs_qm_dquot_unused);
207 
208 	xfs_qm_dqdestroy(dqp);
209 
210 	if (gdqp)
211 		xfs_qm_dqput(gdqp);
212 	if (pdqp)
213 		xfs_qm_dqput(pdqp);
214 	return 0;
215 }
216 
217 /*
218  * Purge the dquot cache.
219  */
220 void
221 xfs_qm_dqpurge_all(
222 	struct xfs_mount	*mp,
223 	uint			flags)
224 {
225 	if (flags & XFS_QMOPT_UQUOTA)
226 		xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_dqpurge, NULL);
227 	if (flags & XFS_QMOPT_GQUOTA)
228 		xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_dqpurge, NULL);
229 	if (flags & XFS_QMOPT_PQUOTA)
230 		xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_dqpurge, NULL);
231 }
232 
233 /*
234  * Just destroy the quotainfo structure.
235  */
236 void
237 xfs_qm_unmount(
238 	struct xfs_mount	*mp)
239 {
240 	if (mp->m_quotainfo) {
241 		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
242 		xfs_qm_destroy_quotainfo(mp);
243 	}
244 }
245 
246 
247 /*
248  * This is called from xfs_mountfs to start quotas and initialize all
249  * necessary data structures like quotainfo.  This is also responsible for
250  * running a quotacheck as necessary.  We are guaranteed that the superblock
251  * is consistently read in at this point.
252  *
253  * If we fail here, the mount will continue with quota turned off. We don't
254  * need to inidicate success or failure at all.
255  */
256 void
257 xfs_qm_mount_quotas(
258 	xfs_mount_t	*mp)
259 {
260 	int		error = 0;
261 	uint		sbf;
262 
263 	/*
264 	 * If quotas on realtime volumes is not supported, we disable
265 	 * quotas immediately.
266 	 */
267 	if (mp->m_sb.sb_rextents) {
268 		xfs_notice(mp, "Cannot turn on quotas for realtime filesystem");
269 		mp->m_qflags = 0;
270 		goto write_changes;
271 	}
272 
273 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
274 
275 	/*
276 	 * Allocate the quotainfo structure inside the mount struct, and
277 	 * create quotainode(s), and change/rev superblock if necessary.
278 	 */
279 	error = xfs_qm_init_quotainfo(mp);
280 	if (error) {
281 		/*
282 		 * We must turn off quotas.
283 		 */
284 		ASSERT(mp->m_quotainfo == NULL);
285 		mp->m_qflags = 0;
286 		goto write_changes;
287 	}
288 	/*
289 	 * If any of the quotas are not consistent, do a quotacheck.
290 	 */
291 	if (XFS_QM_NEED_QUOTACHECK(mp)) {
292 		error = xfs_qm_quotacheck(mp);
293 		if (error) {
294 			/* Quotacheck failed and disabled quotas. */
295 			return;
296 		}
297 	}
298 	/*
299 	 * If one type of quotas is off, then it will lose its
300 	 * quotachecked status, since we won't be doing accounting for
301 	 * that type anymore.
302 	 */
303 	if (!XFS_IS_UQUOTA_ON(mp))
304 		mp->m_qflags &= ~XFS_UQUOTA_CHKD;
305 	if (!XFS_IS_GQUOTA_ON(mp))
306 		mp->m_qflags &= ~XFS_GQUOTA_CHKD;
307 	if (!XFS_IS_PQUOTA_ON(mp))
308 		mp->m_qflags &= ~XFS_PQUOTA_CHKD;
309 
310  write_changes:
311 	/*
312 	 * We actually don't have to acquire the m_sb_lock at all.
313 	 * This can only be called from mount, and that's single threaded. XXX
314 	 */
315 	spin_lock(&mp->m_sb_lock);
316 	sbf = mp->m_sb.sb_qflags;
317 	mp->m_sb.sb_qflags = mp->m_qflags & XFS_MOUNT_QUOTA_ALL;
318 	spin_unlock(&mp->m_sb_lock);
319 
320 	if (sbf != (mp->m_qflags & XFS_MOUNT_QUOTA_ALL)) {
321 		if (xfs_qm_write_sb_changes(mp, XFS_SB_QFLAGS)) {
322 			/*
323 			 * We could only have been turning quotas off.
324 			 * We aren't in very good shape actually because
325 			 * the incore structures are convinced that quotas are
326 			 * off, but the on disk superblock doesn't know that !
327 			 */
328 			ASSERT(!(XFS_IS_QUOTA_RUNNING(mp)));
329 			xfs_alert(mp, "%s: Superblock update failed!",
330 				__func__);
331 		}
332 	}
333 
334 	if (error) {
335 		xfs_warn(mp, "Failed to initialize disk quotas.");
336 		return;
337 	}
338 }
339 
340 /*
341  * Called from the vfsops layer.
342  */
343 void
344 xfs_qm_unmount_quotas(
345 	xfs_mount_t	*mp)
346 {
347 	/*
348 	 * Release the dquots that root inode, et al might be holding,
349 	 * before we flush quotas and blow away the quotainfo structure.
350 	 */
351 	ASSERT(mp->m_rootip);
352 	xfs_qm_dqdetach(mp->m_rootip);
353 	if (mp->m_rbmip)
354 		xfs_qm_dqdetach(mp->m_rbmip);
355 	if (mp->m_rsumip)
356 		xfs_qm_dqdetach(mp->m_rsumip);
357 
358 	/*
359 	 * Release the quota inodes.
360 	 */
361 	if (mp->m_quotainfo) {
362 		if (mp->m_quotainfo->qi_uquotaip) {
363 			IRELE(mp->m_quotainfo->qi_uquotaip);
364 			mp->m_quotainfo->qi_uquotaip = NULL;
365 		}
366 		if (mp->m_quotainfo->qi_gquotaip) {
367 			IRELE(mp->m_quotainfo->qi_gquotaip);
368 			mp->m_quotainfo->qi_gquotaip = NULL;
369 		}
370 		if (mp->m_quotainfo->qi_pquotaip) {
371 			IRELE(mp->m_quotainfo->qi_pquotaip);
372 			mp->m_quotainfo->qi_pquotaip = NULL;
373 		}
374 	}
375 }
376 
377 STATIC int
378 xfs_qm_dqattach_one(
379 	xfs_inode_t	*ip,
380 	xfs_dqid_t	id,
381 	uint		type,
382 	uint		doalloc,
383 	xfs_dquot_t	*udqhint, /* hint */
384 	xfs_dquot_t	**IO_idqpp)
385 {
386 	xfs_dquot_t	*dqp;
387 	int		error;
388 
389 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
390 	error = 0;
391 
392 	/*
393 	 * See if we already have it in the inode itself. IO_idqpp is
394 	 * &i_udquot or &i_gdquot. This made the code look weird, but
395 	 * made the logic a lot simpler.
396 	 */
397 	dqp = *IO_idqpp;
398 	if (dqp) {
399 		trace_xfs_dqattach_found(dqp);
400 		return 0;
401 	}
402 
403 	/*
404 	 * udqhint is the i_udquot field in inode, and is non-NULL only
405 	 * when the type arg is group/project. Its purpose is to save a
406 	 * lookup by dqid (xfs_qm_dqget) by caching a group dquot inside
407 	 * the user dquot.
408 	 */
409 	if (udqhint) {
410 		ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
411 		xfs_dqlock(udqhint);
412 
413 		/*
414 		 * No need to take dqlock to look at the id.
415 		 *
416 		 * The ID can't change until it gets reclaimed, and it won't
417 		 * be reclaimed as long as we have a ref from inode and we
418 		 * hold the ilock.
419 		 */
420 		if (type == XFS_DQ_GROUP)
421 			dqp = udqhint->q_gdquot;
422 		else
423 			dqp = udqhint->q_pdquot;
424 		if (dqp && be32_to_cpu(dqp->q_core.d_id) == id) {
425 			ASSERT(*IO_idqpp == NULL);
426 
427 			*IO_idqpp = xfs_qm_dqhold(dqp);
428 			xfs_dqunlock(udqhint);
429 			return 0;
430 		}
431 
432 		/*
433 		 * We can't hold a dquot lock when we call the dqget code.
434 		 * We'll deadlock in no time, because of (not conforming to)
435 		 * lock ordering - the inodelock comes before any dquot lock,
436 		 * and we may drop and reacquire the ilock in xfs_qm_dqget().
437 		 */
438 		xfs_dqunlock(udqhint);
439 	}
440 
441 	/*
442 	 * Find the dquot from somewhere. This bumps the
443 	 * reference count of dquot and returns it locked.
444 	 * This can return ENOENT if dquot didn't exist on
445 	 * disk and we didn't ask it to allocate;
446 	 * ESRCH if quotas got turned off suddenly.
447 	 */
448 	error = xfs_qm_dqget(ip->i_mount, ip, id, type,
449 			     doalloc | XFS_QMOPT_DOWARN, &dqp);
450 	if (error)
451 		return error;
452 
453 	trace_xfs_dqattach_get(dqp);
454 
455 	/*
456 	 * dqget may have dropped and re-acquired the ilock, but it guarantees
457 	 * that the dquot returned is the one that should go in the inode.
458 	 */
459 	*IO_idqpp = dqp;
460 	xfs_dqunlock(dqp);
461 	return 0;
462 }
463 
464 
465 /*
466  * Given a udquot and group/project type, attach the group/project
467  * dquot pointer to the udquot as a hint for future lookups.
468  */
469 STATIC void
470 xfs_qm_dqattach_hint(
471 	struct xfs_inode	*ip,
472 	int			type)
473 {
474 	struct xfs_dquot **dqhintp;
475 	struct xfs_dquot *dqp;
476 	struct xfs_dquot *udq = ip->i_udquot;
477 
478 	ASSERT(type == XFS_DQ_GROUP || type == XFS_DQ_PROJ);
479 
480 	xfs_dqlock(udq);
481 
482 	if (type == XFS_DQ_GROUP) {
483 		dqp = ip->i_gdquot;
484 		dqhintp = &udq->q_gdquot;
485 	} else {
486 		dqp = ip->i_pdquot;
487 		dqhintp = &udq->q_pdquot;
488 	}
489 
490 	if (*dqhintp) {
491 		struct xfs_dquot *tmp;
492 
493 		if (*dqhintp == dqp)
494 			goto done;
495 
496 		tmp = *dqhintp;
497 		*dqhintp = NULL;
498 		xfs_qm_dqrele(tmp);
499 	}
500 
501 	*dqhintp = xfs_qm_dqhold(dqp);
502 done:
503 	xfs_dqunlock(udq);
504 }
505 
506 static bool
507 xfs_qm_need_dqattach(
508 	struct xfs_inode	*ip)
509 {
510 	struct xfs_mount	*mp = ip->i_mount;
511 
512 	if (!XFS_IS_QUOTA_RUNNING(mp))
513 		return false;
514 	if (!XFS_IS_QUOTA_ON(mp))
515 		return false;
516 	if (!XFS_NOT_DQATTACHED(mp, ip))
517 		return false;
518 	if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
519 		return false;
520 	return true;
521 }
522 
523 /*
524  * Given a locked inode, attach dquot(s) to it, taking U/G/P-QUOTAON
525  * into account.
526  * If XFS_QMOPT_DQALLOC, the dquot(s) will be allocated if needed.
527  * Inode may get unlocked and relocked in here, and the caller must deal with
528  * the consequences.
529  */
530 int
531 xfs_qm_dqattach_locked(
532 	xfs_inode_t	*ip,
533 	uint		flags)
534 {
535 	xfs_mount_t	*mp = ip->i_mount;
536 	uint		nquotas = 0;
537 	int		error = 0;
538 
539 	if (!xfs_qm_need_dqattach(ip))
540 		return 0;
541 
542 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
543 
544 	if (XFS_IS_UQUOTA_ON(mp)) {
545 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_uid, XFS_DQ_USER,
546 						flags & XFS_QMOPT_DQALLOC,
547 						NULL, &ip->i_udquot);
548 		if (error)
549 			goto done;
550 		nquotas++;
551 	}
552 
553 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
554 	if (XFS_IS_GQUOTA_ON(mp)) {
555 		error = xfs_qm_dqattach_one(ip, ip->i_d.di_gid, XFS_DQ_GROUP,
556 						flags & XFS_QMOPT_DQALLOC,
557 						ip->i_udquot, &ip->i_gdquot);
558 		/*
559 		 * Don't worry about the udquot that we may have
560 		 * attached above. It'll get detached, if not already.
561 		 */
562 		if (error)
563 			goto done;
564 		nquotas++;
565 	}
566 
567 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
568 	if (XFS_IS_PQUOTA_ON(mp)) {
569 		error = xfs_qm_dqattach_one(ip, xfs_get_projid(ip), XFS_DQ_PROJ,
570 						flags & XFS_QMOPT_DQALLOC,
571 						ip->i_udquot, &ip->i_pdquot);
572 		/*
573 		 * Don't worry about the udquot that we may have
574 		 * attached above. It'll get detached, if not already.
575 		 */
576 		if (error)
577 			goto done;
578 		nquotas++;
579 	}
580 
581 	/*
582 	 * Attach this group/project quota to the user quota as a hint.
583 	 * This WON'T, in general, result in a thrash.
584 	 */
585 	if (nquotas > 1 && ip->i_udquot) {
586 		ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
587 		ASSERT(ip->i_gdquot || !XFS_IS_GQUOTA_ON(mp));
588 		ASSERT(ip->i_pdquot || !XFS_IS_PQUOTA_ON(mp));
589 
590 		/*
591 		 * We do not have i_udquot locked at this point, but this check
592 		 * is OK since we don't depend on the i_gdquot to be accurate
593 		 * 100% all the time. It is just a hint, and this will
594 		 * succeed in general.
595 		 */
596 		if (ip->i_udquot->q_gdquot != ip->i_gdquot)
597 			xfs_qm_dqattach_hint(ip, XFS_DQ_GROUP);
598 
599 		if (ip->i_udquot->q_pdquot != ip->i_pdquot)
600 			xfs_qm_dqattach_hint(ip, XFS_DQ_PROJ);
601 	}
602 
603  done:
604 #ifdef DEBUG
605 	if (!error) {
606 		if (XFS_IS_UQUOTA_ON(mp))
607 			ASSERT(ip->i_udquot);
608 		if (XFS_IS_GQUOTA_ON(mp))
609 			ASSERT(ip->i_gdquot);
610 		if (XFS_IS_PQUOTA_ON(mp))
611 			ASSERT(ip->i_pdquot);
612 	}
613 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
614 #endif
615 	return error;
616 }
617 
618 int
619 xfs_qm_dqattach(
620 	struct xfs_inode	*ip,
621 	uint			flags)
622 {
623 	int			error;
624 
625 	if (!xfs_qm_need_dqattach(ip))
626 		return 0;
627 
628 	xfs_ilock(ip, XFS_ILOCK_EXCL);
629 	error = xfs_qm_dqattach_locked(ip, flags);
630 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
631 
632 	return error;
633 }
634 
635 /*
636  * Release dquots (and their references) if any.
637  * The inode should be locked EXCL except when this's called by
638  * xfs_ireclaim.
639  */
640 void
641 xfs_qm_dqdetach(
642 	xfs_inode_t	*ip)
643 {
644 	if (!(ip->i_udquot || ip->i_gdquot || ip->i_pdquot))
645 		return;
646 
647 	trace_xfs_dquot_dqdetach(ip);
648 
649 	ASSERT(!xfs_is_quota_inode(&ip->i_mount->m_sb, ip->i_ino));
650 	if (ip->i_udquot) {
651 		xfs_qm_dqrele(ip->i_udquot);
652 		ip->i_udquot = NULL;
653 	}
654 	if (ip->i_gdquot) {
655 		xfs_qm_dqrele(ip->i_gdquot);
656 		ip->i_gdquot = NULL;
657 	}
658 	if (ip->i_pdquot) {
659 		xfs_qm_dqrele(ip->i_pdquot);
660 		ip->i_pdquot = NULL;
661 	}
662 }
663 
664 struct xfs_qm_isolate {
665 	struct list_head	buffers;
666 	struct list_head	dispose;
667 };
668 
669 static enum lru_status
670 xfs_qm_dquot_isolate(
671 	struct list_head	*item,
672 	spinlock_t		*lru_lock,
673 	void			*arg)
674 {
675 	struct xfs_dquot	*dqp = container_of(item,
676 						struct xfs_dquot, q_lru);
677 	struct xfs_qm_isolate	*isol = arg;
678 
679 	if (!xfs_dqlock_nowait(dqp))
680 		goto out_miss_busy;
681 
682 	/*
683 	 * This dquot has acquired a reference in the meantime remove it from
684 	 * the freelist and try again.
685 	 */
686 	if (dqp->q_nrefs) {
687 		xfs_dqunlock(dqp);
688 		XFS_STATS_INC(xs_qm_dqwants);
689 
690 		trace_xfs_dqreclaim_want(dqp);
691 		list_del_init(&dqp->q_lru);
692 		XFS_STATS_DEC(xs_qm_dquot_unused);
693 		return LRU_REMOVED;
694 	}
695 
696 	/*
697 	 * If the dquot is dirty, flush it. If it's already being flushed, just
698 	 * skip it so there is time for the IO to complete before we try to
699 	 * reclaim it again on the next LRU pass.
700 	 */
701 	if (!xfs_dqflock_nowait(dqp)) {
702 		xfs_dqunlock(dqp);
703 		goto out_miss_busy;
704 	}
705 
706 	if (XFS_DQ_IS_DIRTY(dqp)) {
707 		struct xfs_buf	*bp = NULL;
708 		int		error;
709 
710 		trace_xfs_dqreclaim_dirty(dqp);
711 
712 		/* we have to drop the LRU lock to flush the dquot */
713 		spin_unlock(lru_lock);
714 
715 		error = xfs_qm_dqflush(dqp, &bp);
716 		if (error) {
717 			xfs_warn(dqp->q_mount, "%s: dquot %p flush failed",
718 				 __func__, dqp);
719 			goto out_unlock_dirty;
720 		}
721 
722 		xfs_buf_delwri_queue(bp, &isol->buffers);
723 		xfs_buf_relse(bp);
724 		goto out_unlock_dirty;
725 	}
726 	xfs_dqfunlock(dqp);
727 
728 	/*
729 	 * Prevent lookups now that we are past the point of no return.
730 	 */
731 	dqp->dq_flags |= XFS_DQ_FREEING;
732 	xfs_dqunlock(dqp);
733 
734 	ASSERT(dqp->q_nrefs == 0);
735 	list_move_tail(&dqp->q_lru, &isol->dispose);
736 	XFS_STATS_DEC(xs_qm_dquot_unused);
737 	trace_xfs_dqreclaim_done(dqp);
738 	XFS_STATS_INC(xs_qm_dqreclaims);
739 	return LRU_REMOVED;
740 
741 out_miss_busy:
742 	trace_xfs_dqreclaim_busy(dqp);
743 	XFS_STATS_INC(xs_qm_dqreclaim_misses);
744 	return LRU_SKIP;
745 
746 out_unlock_dirty:
747 	trace_xfs_dqreclaim_busy(dqp);
748 	XFS_STATS_INC(xs_qm_dqreclaim_misses);
749 	xfs_dqunlock(dqp);
750 	spin_lock(lru_lock);
751 	return LRU_RETRY;
752 }
753 
754 static unsigned long
755 xfs_qm_shrink_scan(
756 	struct shrinker		*shrink,
757 	struct shrink_control	*sc)
758 {
759 	struct xfs_quotainfo	*qi = container_of(shrink,
760 					struct xfs_quotainfo, qi_shrinker);
761 	struct xfs_qm_isolate	isol;
762 	unsigned long		freed;
763 	int			error;
764 	unsigned long		nr_to_scan = sc->nr_to_scan;
765 
766 	if ((sc->gfp_mask & (__GFP_FS|__GFP_WAIT)) != (__GFP_FS|__GFP_WAIT))
767 		return 0;
768 
769 	INIT_LIST_HEAD(&isol.buffers);
770 	INIT_LIST_HEAD(&isol.dispose);
771 
772 	freed = list_lru_walk_node(&qi->qi_lru, sc->nid, xfs_qm_dquot_isolate, &isol,
773 					&nr_to_scan);
774 
775 	error = xfs_buf_delwri_submit(&isol.buffers);
776 	if (error)
777 		xfs_warn(NULL, "%s: dquot reclaim failed", __func__);
778 
779 	while (!list_empty(&isol.dispose)) {
780 		struct xfs_dquot	*dqp;
781 
782 		dqp = list_first_entry(&isol.dispose, struct xfs_dquot, q_lru);
783 		list_del_init(&dqp->q_lru);
784 		xfs_qm_dqfree_one(dqp);
785 	}
786 
787 	return freed;
788 }
789 
790 static unsigned long
791 xfs_qm_shrink_count(
792 	struct shrinker		*shrink,
793 	struct shrink_control	*sc)
794 {
795 	struct xfs_quotainfo	*qi = container_of(shrink,
796 					struct xfs_quotainfo, qi_shrinker);
797 
798 	return list_lru_count_node(&qi->qi_lru, sc->nid);
799 }
800 
801 /*
802  * This initializes all the quota information that's kept in the
803  * mount structure
804  */
805 STATIC int
806 xfs_qm_init_quotainfo(
807 	xfs_mount_t	*mp)
808 {
809 	xfs_quotainfo_t *qinf;
810 	int		error;
811 	xfs_dquot_t	*dqp;
812 
813 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
814 
815 	qinf = mp->m_quotainfo = kmem_zalloc(sizeof(xfs_quotainfo_t), KM_SLEEP);
816 
817 	if ((error = list_lru_init(&qinf->qi_lru))) {
818 		kmem_free(qinf);
819 		mp->m_quotainfo = NULL;
820 		return error;
821 	}
822 
823 	/*
824 	 * See if quotainodes are setup, and if not, allocate them,
825 	 * and change the superblock accordingly.
826 	 */
827 	if ((error = xfs_qm_init_quotainos(mp))) {
828 		list_lru_destroy(&qinf->qi_lru);
829 		kmem_free(qinf);
830 		mp->m_quotainfo = NULL;
831 		return error;
832 	}
833 
834 	INIT_RADIX_TREE(&qinf->qi_uquota_tree, GFP_NOFS);
835 	INIT_RADIX_TREE(&qinf->qi_gquota_tree, GFP_NOFS);
836 	INIT_RADIX_TREE(&qinf->qi_pquota_tree, GFP_NOFS);
837 	mutex_init(&qinf->qi_tree_lock);
838 
839 	/* mutex used to serialize quotaoffs */
840 	mutex_init(&qinf->qi_quotaofflock);
841 
842 	/* Precalc some constants */
843 	qinf->qi_dqchunklen = XFS_FSB_TO_BB(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
844 	qinf->qi_dqperchunk = xfs_calc_dquots_per_chunk(mp,
845 							qinf->qi_dqchunklen);
846 
847 	mp->m_qflags |= (mp->m_sb.sb_qflags & XFS_ALL_QUOTA_CHKD);
848 
849 	/*
850 	 * We try to get the limits from the superuser's limits fields.
851 	 * This is quite hacky, but it is standard quota practice.
852 	 *
853 	 * We look at the USR dquot with id == 0 first, but if user quotas
854 	 * are not enabled we goto the GRP dquot with id == 0.
855 	 * We don't really care to keep separate default limits for user
856 	 * and group quotas, at least not at this point.
857 	 *
858 	 * Since we may not have done a quotacheck by this point, just read
859 	 * the dquot without attaching it to any hashtables or lists.
860 	 */
861 	error = xfs_qm_dqread(mp, 0,
862 			XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
863 			 (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
864 			  XFS_DQ_PROJ),
865 			XFS_QMOPT_DOWARN, &dqp);
866 	if (!error) {
867 		xfs_disk_dquot_t	*ddqp = &dqp->q_core;
868 
869 		/*
870 		 * The warnings and timers set the grace period given to
871 		 * a user or group before he or she can not perform any
872 		 * more writing. If it is zero, a default is used.
873 		 */
874 		qinf->qi_btimelimit = ddqp->d_btimer ?
875 			be32_to_cpu(ddqp->d_btimer) : XFS_QM_BTIMELIMIT;
876 		qinf->qi_itimelimit = ddqp->d_itimer ?
877 			be32_to_cpu(ddqp->d_itimer) : XFS_QM_ITIMELIMIT;
878 		qinf->qi_rtbtimelimit = ddqp->d_rtbtimer ?
879 			be32_to_cpu(ddqp->d_rtbtimer) : XFS_QM_RTBTIMELIMIT;
880 		qinf->qi_bwarnlimit = ddqp->d_bwarns ?
881 			be16_to_cpu(ddqp->d_bwarns) : XFS_QM_BWARNLIMIT;
882 		qinf->qi_iwarnlimit = ddqp->d_iwarns ?
883 			be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
884 		qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
885 			be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
886 		qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
887 		qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
888 		qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
889 		qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
890 		qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
891 		qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
892 
893 		xfs_qm_dqdestroy(dqp);
894 	} else {
895 		qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
896 		qinf->qi_itimelimit = XFS_QM_ITIMELIMIT;
897 		qinf->qi_rtbtimelimit = XFS_QM_RTBTIMELIMIT;
898 		qinf->qi_bwarnlimit = XFS_QM_BWARNLIMIT;
899 		qinf->qi_iwarnlimit = XFS_QM_IWARNLIMIT;
900 		qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
901 	}
902 
903 	qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
904 	qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
905 	qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
906 	qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
907 	register_shrinker(&qinf->qi_shrinker);
908 	return 0;
909 }
910 
911 
912 /*
913  * Gets called when unmounting a filesystem or when all quotas get
914  * turned off.
915  * This purges the quota inodes, destroys locks and frees itself.
916  */
917 void
918 xfs_qm_destroy_quotainfo(
919 	xfs_mount_t	*mp)
920 {
921 	xfs_quotainfo_t *qi;
922 
923 	qi = mp->m_quotainfo;
924 	ASSERT(qi != NULL);
925 
926 	unregister_shrinker(&qi->qi_shrinker);
927 	list_lru_destroy(&qi->qi_lru);
928 
929 	if (qi->qi_uquotaip) {
930 		IRELE(qi->qi_uquotaip);
931 		qi->qi_uquotaip = NULL; /* paranoia */
932 	}
933 	if (qi->qi_gquotaip) {
934 		IRELE(qi->qi_gquotaip);
935 		qi->qi_gquotaip = NULL;
936 	}
937 	if (qi->qi_pquotaip) {
938 		IRELE(qi->qi_pquotaip);
939 		qi->qi_pquotaip = NULL;
940 	}
941 	mutex_destroy(&qi->qi_quotaofflock);
942 	kmem_free(qi);
943 	mp->m_quotainfo = NULL;
944 }
945 
946 /*
947  * Create an inode and return with a reference already taken, but unlocked
948  * This is how we create quota inodes
949  */
950 STATIC int
951 xfs_qm_qino_alloc(
952 	xfs_mount_t	*mp,
953 	xfs_inode_t	**ip,
954 	__int64_t	sbfields,
955 	uint		flags)
956 {
957 	xfs_trans_t	*tp;
958 	int		error;
959 	int		committed;
960 
961 	*ip = NULL;
962 	/*
963 	 * With superblock that doesn't have separate pquotino, we
964 	 * share an inode between gquota and pquota. If the on-disk
965 	 * superblock has GQUOTA and the filesystem is now mounted
966 	 * with PQUOTA, just use sb_gquotino for sb_pquotino and
967 	 * vice-versa.
968 	 */
969 	if (!xfs_sb_version_has_pquotino(&mp->m_sb) &&
970 			(flags & (XFS_QMOPT_PQUOTA|XFS_QMOPT_GQUOTA))) {
971 		xfs_ino_t ino = NULLFSINO;
972 
973 		if ((flags & XFS_QMOPT_PQUOTA) &&
974 			     (mp->m_sb.sb_gquotino != NULLFSINO)) {
975 			ino = mp->m_sb.sb_gquotino;
976 			ASSERT(mp->m_sb.sb_pquotino == NULLFSINO);
977 		} else if ((flags & XFS_QMOPT_GQUOTA) &&
978 			     (mp->m_sb.sb_pquotino != NULLFSINO)) {
979 			ino = mp->m_sb.sb_pquotino;
980 			ASSERT(mp->m_sb.sb_gquotino == NULLFSINO);
981 		}
982 		if (ino != NULLFSINO) {
983 			error = xfs_iget(mp, NULL, ino, 0, 0, ip);
984 			if (error)
985 				return error;
986 			mp->m_sb.sb_gquotino = NULLFSINO;
987 			mp->m_sb.sb_pquotino = NULLFSINO;
988 		}
989 	}
990 
991 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_QINOCREATE);
992 	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_create,
993 				  XFS_QM_QINOCREATE_SPACE_RES(mp), 0);
994 	if (error) {
995 		xfs_trans_cancel(tp, 0);
996 		return error;
997 	}
998 
999 	if (!*ip) {
1000 		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip,
1001 								&committed);
1002 		if (error) {
1003 			xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES |
1004 					 XFS_TRANS_ABORT);
1005 			return error;
1006 		}
1007 	}
1008 
1009 	/*
1010 	 * Make the changes in the superblock, and log those too.
1011 	 * sbfields arg may contain fields other than *QUOTINO;
1012 	 * VERSIONNUM for example.
1013 	 */
1014 	spin_lock(&mp->m_sb_lock);
1015 	if (flags & XFS_QMOPT_SBVERSION) {
1016 		ASSERT(!xfs_sb_version_hasquota(&mp->m_sb));
1017 		ASSERT((sbfields & (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1018 			XFS_SB_GQUOTINO | XFS_SB_PQUOTINO | XFS_SB_QFLAGS)) ==
1019 				(XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1020 				 XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
1021 				 XFS_SB_QFLAGS));
1022 
1023 		xfs_sb_version_addquota(&mp->m_sb);
1024 		mp->m_sb.sb_uquotino = NULLFSINO;
1025 		mp->m_sb.sb_gquotino = NULLFSINO;
1026 		mp->m_sb.sb_pquotino = NULLFSINO;
1027 
1028 		/* qflags will get updated fully _after_ quotacheck */
1029 		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
1030 	}
1031 	if (flags & XFS_QMOPT_UQUOTA)
1032 		mp->m_sb.sb_uquotino = (*ip)->i_ino;
1033 	else if (flags & XFS_QMOPT_GQUOTA)
1034 		mp->m_sb.sb_gquotino = (*ip)->i_ino;
1035 	else
1036 		mp->m_sb.sb_pquotino = (*ip)->i_ino;
1037 	spin_unlock(&mp->m_sb_lock);
1038 	xfs_mod_sb(tp, sbfields);
1039 
1040 	if ((error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES))) {
1041 		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
1042 		return error;
1043 	}
1044 	return 0;
1045 }
1046 
1047 
1048 STATIC void
1049 xfs_qm_reset_dqcounts(
1050 	xfs_mount_t	*mp,
1051 	xfs_buf_t	*bp,
1052 	xfs_dqid_t	id,
1053 	uint		type)
1054 {
1055 	struct xfs_dqblk	*dqb;
1056 	int			j;
1057 
1058 	trace_xfs_reset_dqcounts(bp, _RET_IP_);
1059 
1060 	/*
1061 	 * Reset all counters and timers. They'll be
1062 	 * started afresh by xfs_qm_quotacheck.
1063 	 */
1064 #ifdef DEBUG
1065 	j = XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB);
1066 	do_div(j, sizeof(xfs_dqblk_t));
1067 	ASSERT(mp->m_quotainfo->qi_dqperchunk == j);
1068 #endif
1069 	dqb = bp->b_addr;
1070 	for (j = 0; j < mp->m_quotainfo->qi_dqperchunk; j++) {
1071 		struct xfs_disk_dquot	*ddq;
1072 
1073 		ddq = (struct xfs_disk_dquot *)&dqb[j];
1074 
1075 		/*
1076 		 * Do a sanity check, and if needed, repair the dqblk. Don't
1077 		 * output any warnings because it's perfectly possible to
1078 		 * find uninitialised dquot blks. See comment in xfs_dqcheck.
1079 		 */
1080 		xfs_dqcheck(mp, ddq, id+j, type, XFS_QMOPT_DQREPAIR,
1081 			    "xfs_quotacheck");
1082 		ddq->d_bcount = 0;
1083 		ddq->d_icount = 0;
1084 		ddq->d_rtbcount = 0;
1085 		ddq->d_btimer = 0;
1086 		ddq->d_itimer = 0;
1087 		ddq->d_rtbtimer = 0;
1088 		ddq->d_bwarns = 0;
1089 		ddq->d_iwarns = 0;
1090 		ddq->d_rtbwarns = 0;
1091 
1092 		if (xfs_sb_version_hascrc(&mp->m_sb)) {
1093 			xfs_update_cksum((char *)&dqb[j],
1094 					 sizeof(struct xfs_dqblk),
1095 					 XFS_DQUOT_CRC_OFF);
1096 		}
1097 	}
1098 }
1099 
1100 STATIC int
1101 xfs_qm_dqiter_bufs(
1102 	struct xfs_mount	*mp,
1103 	xfs_dqid_t		firstid,
1104 	xfs_fsblock_t		bno,
1105 	xfs_filblks_t		blkcnt,
1106 	uint			flags,
1107 	struct list_head	*buffer_list)
1108 {
1109 	struct xfs_buf		*bp;
1110 	int			error;
1111 	int			type;
1112 
1113 	ASSERT(blkcnt > 0);
1114 	type = flags & XFS_QMOPT_UQUOTA ? XFS_DQ_USER :
1115 		(flags & XFS_QMOPT_PQUOTA ? XFS_DQ_PROJ : XFS_DQ_GROUP);
1116 	error = 0;
1117 
1118 	/*
1119 	 * Blkcnt arg can be a very big number, and might even be
1120 	 * larger than the log itself. So, we have to break it up into
1121 	 * manageable-sized transactions.
1122 	 * Note that we don't start a permanent transaction here; we might
1123 	 * not be able to get a log reservation for the whole thing up front,
1124 	 * and we don't really care to either, because we just discard
1125 	 * everything if we were to crash in the middle of this loop.
1126 	 */
1127 	while (blkcnt--) {
1128 		error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1129 			      XFS_FSB_TO_DADDR(mp, bno),
1130 			      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1131 			      &xfs_dquot_buf_ops);
1132 
1133 		/*
1134 		 * CRC and validation errors will return a EFSCORRUPTED here. If
1135 		 * this occurs, re-read without CRC validation so that we can
1136 		 * repair the damage via xfs_qm_reset_dqcounts(). This process
1137 		 * will leave a trace in the log indicating corruption has
1138 		 * been detected.
1139 		 */
1140 		if (error == EFSCORRUPTED) {
1141 			error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp,
1142 				      XFS_FSB_TO_DADDR(mp, bno),
1143 				      mp->m_quotainfo->qi_dqchunklen, 0, &bp,
1144 				      NULL);
1145 		}
1146 
1147 		if (error)
1148 			break;
1149 
1150 		xfs_qm_reset_dqcounts(mp, bp, firstid, type);
1151 		xfs_buf_delwri_queue(bp, buffer_list);
1152 		xfs_buf_relse(bp);
1153 
1154 		/* goto the next block. */
1155 		bno++;
1156 		firstid += mp->m_quotainfo->qi_dqperchunk;
1157 	}
1158 
1159 	return error;
1160 }
1161 
1162 /*
1163  * Iterate over all allocated USR/GRP/PRJ dquots in the system, calling a
1164  * caller supplied function for every chunk of dquots that we find.
1165  */
1166 STATIC int
1167 xfs_qm_dqiterate(
1168 	struct xfs_mount	*mp,
1169 	struct xfs_inode	*qip,
1170 	uint			flags,
1171 	struct list_head	*buffer_list)
1172 {
1173 	struct xfs_bmbt_irec	*map;
1174 	int			i, nmaps;	/* number of map entries */
1175 	int			error;		/* return value */
1176 	xfs_fileoff_t		lblkno;
1177 	xfs_filblks_t		maxlblkcnt;
1178 	xfs_dqid_t		firstid;
1179 	xfs_fsblock_t		rablkno;
1180 	xfs_filblks_t		rablkcnt;
1181 
1182 	error = 0;
1183 	/*
1184 	 * This looks racy, but we can't keep an inode lock across a
1185 	 * trans_reserve. But, this gets called during quotacheck, and that
1186 	 * happens only at mount time which is single threaded.
1187 	 */
1188 	if (qip->i_d.di_nblocks == 0)
1189 		return 0;
1190 
1191 	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), KM_SLEEP);
1192 
1193 	lblkno = 0;
1194 	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
1195 	do {
1196 		nmaps = XFS_DQITER_MAP_SIZE;
1197 		/*
1198 		 * We aren't changing the inode itself. Just changing
1199 		 * some of its data. No new blocks are added here, and
1200 		 * the inode is never added to the transaction.
1201 		 */
1202 		xfs_ilock(qip, XFS_ILOCK_SHARED);
1203 		error = xfs_bmapi_read(qip, lblkno, maxlblkcnt - lblkno,
1204 				       map, &nmaps, 0);
1205 		xfs_iunlock(qip, XFS_ILOCK_SHARED);
1206 		if (error)
1207 			break;
1208 
1209 		ASSERT(nmaps <= XFS_DQITER_MAP_SIZE);
1210 		for (i = 0; i < nmaps; i++) {
1211 			ASSERT(map[i].br_startblock != DELAYSTARTBLOCK);
1212 			ASSERT(map[i].br_blockcount);
1213 
1214 
1215 			lblkno += map[i].br_blockcount;
1216 
1217 			if (map[i].br_startblock == HOLESTARTBLOCK)
1218 				continue;
1219 
1220 			firstid = (xfs_dqid_t) map[i].br_startoff *
1221 				mp->m_quotainfo->qi_dqperchunk;
1222 			/*
1223 			 * Do a read-ahead on the next extent.
1224 			 */
1225 			if ((i+1 < nmaps) &&
1226 			    (map[i+1].br_startblock != HOLESTARTBLOCK)) {
1227 				rablkcnt =  map[i+1].br_blockcount;
1228 				rablkno = map[i+1].br_startblock;
1229 				while (rablkcnt--) {
1230 					xfs_buf_readahead(mp->m_ddev_targp,
1231 					       XFS_FSB_TO_DADDR(mp, rablkno),
1232 					       mp->m_quotainfo->qi_dqchunklen,
1233 					       NULL);
1234 					rablkno++;
1235 				}
1236 			}
1237 			/*
1238 			 * Iterate thru all the blks in the extent and
1239 			 * reset the counters of all the dquots inside them.
1240 			 */
1241 			error = xfs_qm_dqiter_bufs(mp, firstid,
1242 						   map[i].br_startblock,
1243 						   map[i].br_blockcount,
1244 						   flags, buffer_list);
1245 			if (error)
1246 				goto out;
1247 		}
1248 	} while (nmaps > 0);
1249 
1250 out:
1251 	kmem_free(map);
1252 	return error;
1253 }
1254 
1255 /*
1256  * Called by dqusage_adjust in doing a quotacheck.
1257  *
1258  * Given the inode, and a dquot id this updates both the incore dqout as well
1259  * as the buffer copy. This is so that once the quotacheck is done, we can
1260  * just log all the buffers, as opposed to logging numerous updates to
1261  * individual dquots.
1262  */
1263 STATIC int
1264 xfs_qm_quotacheck_dqadjust(
1265 	struct xfs_inode	*ip,
1266 	xfs_dqid_t		id,
1267 	uint			type,
1268 	xfs_qcnt_t		nblks,
1269 	xfs_qcnt_t		rtblks)
1270 {
1271 	struct xfs_mount	*mp = ip->i_mount;
1272 	struct xfs_dquot	*dqp;
1273 	int			error;
1274 
1275 	error = xfs_qm_dqget(mp, ip, id, type,
1276 			     XFS_QMOPT_DQALLOC | XFS_QMOPT_DOWARN, &dqp);
1277 	if (error) {
1278 		/*
1279 		 * Shouldn't be able to turn off quotas here.
1280 		 */
1281 		ASSERT(error != ESRCH);
1282 		ASSERT(error != ENOENT);
1283 		return error;
1284 	}
1285 
1286 	trace_xfs_dqadjust(dqp);
1287 
1288 	/*
1289 	 * Adjust the inode count and the block count to reflect this inode's
1290 	 * resource usage.
1291 	 */
1292 	be64_add_cpu(&dqp->q_core.d_icount, 1);
1293 	dqp->q_res_icount++;
1294 	if (nblks) {
1295 		be64_add_cpu(&dqp->q_core.d_bcount, nblks);
1296 		dqp->q_res_bcount += nblks;
1297 	}
1298 	if (rtblks) {
1299 		be64_add_cpu(&dqp->q_core.d_rtbcount, rtblks);
1300 		dqp->q_res_rtbcount += rtblks;
1301 	}
1302 
1303 	/*
1304 	 * Set default limits, adjust timers (since we changed usages)
1305 	 *
1306 	 * There are no timers for the default values set in the root dquot.
1307 	 */
1308 	if (dqp->q_core.d_id) {
1309 		xfs_qm_adjust_dqlimits(mp, dqp);
1310 		xfs_qm_adjust_dqtimers(mp, &dqp->q_core);
1311 	}
1312 
1313 	dqp->dq_flags |= XFS_DQ_DIRTY;
1314 	xfs_qm_dqput(dqp);
1315 	return 0;
1316 }
1317 
1318 STATIC int
1319 xfs_qm_get_rtblks(
1320 	xfs_inode_t	*ip,
1321 	xfs_qcnt_t	*O_rtblks)
1322 {
1323 	xfs_filblks_t	rtblks;			/* total rt blks */
1324 	xfs_extnum_t	idx;			/* extent record index */
1325 	xfs_ifork_t	*ifp;			/* inode fork pointer */
1326 	xfs_extnum_t	nextents;		/* number of extent entries */
1327 	int		error;
1328 
1329 	ASSERT(XFS_IS_REALTIME_INODE(ip));
1330 	ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1331 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
1332 		if ((error = xfs_iread_extents(NULL, ip, XFS_DATA_FORK)))
1333 			return error;
1334 	}
1335 	rtblks = 0;
1336 	nextents = ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t);
1337 	for (idx = 0; idx < nextents; idx++)
1338 		rtblks += xfs_bmbt_get_blockcount(xfs_iext_get_ext(ifp, idx));
1339 	*O_rtblks = (xfs_qcnt_t)rtblks;
1340 	return 0;
1341 }
1342 
1343 /*
1344  * callback routine supplied to bulkstat(). Given an inumber, find its
1345  * dquots and update them to account for resources taken by that inode.
1346  */
1347 /* ARGSUSED */
1348 STATIC int
1349 xfs_qm_dqusage_adjust(
1350 	xfs_mount_t	*mp,		/* mount point for filesystem */
1351 	xfs_ino_t	ino,		/* inode number to get data for */
1352 	void		__user *buffer,	/* not used */
1353 	int		ubsize,		/* not used */
1354 	int		*ubused,	/* not used */
1355 	int		*res)		/* result code value */
1356 {
1357 	xfs_inode_t	*ip;
1358 	xfs_qcnt_t	nblks, rtblks = 0;
1359 	int		error;
1360 
1361 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1362 
1363 	/*
1364 	 * rootino must have its resources accounted for, not so with the quota
1365 	 * inodes.
1366 	 */
1367 	if (xfs_is_quota_inode(&mp->m_sb, ino)) {
1368 		*res = BULKSTAT_RV_NOTHING;
1369 		return XFS_ERROR(EINVAL);
1370 	}
1371 
1372 	/*
1373 	 * We don't _need_ to take the ilock EXCL. However, the xfs_qm_dqget
1374 	 * interface expects the inode to be exclusively locked because that's
1375 	 * the case in all other instances. It's OK that we do this because
1376 	 * quotacheck is done only at mount time.
1377 	 */
1378 	error = xfs_iget(mp, NULL, ino, 0, XFS_ILOCK_EXCL, &ip);
1379 	if (error) {
1380 		*res = BULKSTAT_RV_NOTHING;
1381 		return error;
1382 	}
1383 
1384 	ASSERT(ip->i_delayed_blks == 0);
1385 
1386 	if (XFS_IS_REALTIME_INODE(ip)) {
1387 		/*
1388 		 * Walk thru the extent list and count the realtime blocks.
1389 		 */
1390 		error = xfs_qm_get_rtblks(ip, &rtblks);
1391 		if (error)
1392 			goto error0;
1393 	}
1394 
1395 	nblks = (xfs_qcnt_t)ip->i_d.di_nblocks - rtblks;
1396 
1397 	/*
1398 	 * Add the (disk blocks and inode) resources occupied by this
1399 	 * inode to its dquots. We do this adjustment in the incore dquot,
1400 	 * and also copy the changes to its buffer.
1401 	 * We don't care about putting these changes in a transaction
1402 	 * envelope because if we crash in the middle of a 'quotacheck'
1403 	 * we have to start from the beginning anyway.
1404 	 * Once we're done, we'll log all the dquot bufs.
1405 	 *
1406 	 * The *QUOTA_ON checks below may look pretty racy, but quotachecks
1407 	 * and quotaoffs don't race. (Quotachecks happen at mount time only).
1408 	 */
1409 	if (XFS_IS_UQUOTA_ON(mp)) {
1410 		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_uid,
1411 						   XFS_DQ_USER, nblks, rtblks);
1412 		if (error)
1413 			goto error0;
1414 	}
1415 
1416 	if (XFS_IS_GQUOTA_ON(mp)) {
1417 		error = xfs_qm_quotacheck_dqadjust(ip, ip->i_d.di_gid,
1418 						   XFS_DQ_GROUP, nblks, rtblks);
1419 		if (error)
1420 			goto error0;
1421 	}
1422 
1423 	if (XFS_IS_PQUOTA_ON(mp)) {
1424 		error = xfs_qm_quotacheck_dqadjust(ip, xfs_get_projid(ip),
1425 						   XFS_DQ_PROJ, nblks, rtblks);
1426 		if (error)
1427 			goto error0;
1428 	}
1429 
1430 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1431 	IRELE(ip);
1432 	*res = BULKSTAT_RV_DIDONE;
1433 	return 0;
1434 
1435 error0:
1436 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1437 	IRELE(ip);
1438 	*res = BULKSTAT_RV_GIVEUP;
1439 	return error;
1440 }
1441 
1442 STATIC int
1443 xfs_qm_flush_one(
1444 	struct xfs_dquot	*dqp,
1445 	void			*data)
1446 {
1447 	struct list_head	*buffer_list = data;
1448 	struct xfs_buf		*bp = NULL;
1449 	int			error = 0;
1450 
1451 	xfs_dqlock(dqp);
1452 	if (dqp->dq_flags & XFS_DQ_FREEING)
1453 		goto out_unlock;
1454 	if (!XFS_DQ_IS_DIRTY(dqp))
1455 		goto out_unlock;
1456 
1457 	xfs_dqflock(dqp);
1458 	error = xfs_qm_dqflush(dqp, &bp);
1459 	if (error)
1460 		goto out_unlock;
1461 
1462 	xfs_buf_delwri_queue(bp, buffer_list);
1463 	xfs_buf_relse(bp);
1464 out_unlock:
1465 	xfs_dqunlock(dqp);
1466 	return error;
1467 }
1468 
1469 /*
1470  * Walk thru all the filesystem inodes and construct a consistent view
1471  * of the disk quota world. If the quotacheck fails, disable quotas.
1472  */
1473 int
1474 xfs_qm_quotacheck(
1475 	xfs_mount_t	*mp)
1476 {
1477 	int			done, count, error, error2;
1478 	xfs_ino_t		lastino;
1479 	size_t			structsz;
1480 	uint			flags;
1481 	LIST_HEAD		(buffer_list);
1482 	struct xfs_inode	*uip = mp->m_quotainfo->qi_uquotaip;
1483 	struct xfs_inode	*gip = mp->m_quotainfo->qi_gquotaip;
1484 	struct xfs_inode	*pip = mp->m_quotainfo->qi_pquotaip;
1485 
1486 	count = INT_MAX;
1487 	structsz = 1;
1488 	lastino = 0;
1489 	flags = 0;
1490 
1491 	ASSERT(uip || gip || pip);
1492 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1493 
1494 	xfs_notice(mp, "Quotacheck needed: Please wait.");
1495 
1496 	/*
1497 	 * First we go thru all the dquots on disk, USR and GRP/PRJ, and reset
1498 	 * their counters to zero. We need a clean slate.
1499 	 * We don't log our changes till later.
1500 	 */
1501 	if (uip) {
1502 		error = xfs_qm_dqiterate(mp, uip, XFS_QMOPT_UQUOTA,
1503 					 &buffer_list);
1504 		if (error)
1505 			goto error_return;
1506 		flags |= XFS_UQUOTA_CHKD;
1507 	}
1508 
1509 	if (gip) {
1510 		error = xfs_qm_dqiterate(mp, gip, XFS_QMOPT_GQUOTA,
1511 					 &buffer_list);
1512 		if (error)
1513 			goto error_return;
1514 		flags |= XFS_GQUOTA_CHKD;
1515 	}
1516 
1517 	if (pip) {
1518 		error = xfs_qm_dqiterate(mp, pip, XFS_QMOPT_PQUOTA,
1519 					 &buffer_list);
1520 		if (error)
1521 			goto error_return;
1522 		flags |= XFS_PQUOTA_CHKD;
1523 	}
1524 
1525 	do {
1526 		/*
1527 		 * Iterate thru all the inodes in the file system,
1528 		 * adjusting the corresponding dquot counters in core.
1529 		 */
1530 		error = xfs_bulkstat(mp, &lastino, &count,
1531 				     xfs_qm_dqusage_adjust,
1532 				     structsz, NULL, &done);
1533 		if (error)
1534 			break;
1535 
1536 	} while (!done);
1537 
1538 	/*
1539 	 * We've made all the changes that we need to make incore.  Flush them
1540 	 * down to disk buffers if everything was updated successfully.
1541 	 */
1542 	if (XFS_IS_UQUOTA_ON(mp)) {
1543 		error = xfs_qm_dquot_walk(mp, XFS_DQ_USER, xfs_qm_flush_one,
1544 					  &buffer_list);
1545 	}
1546 	if (XFS_IS_GQUOTA_ON(mp)) {
1547 		error2 = xfs_qm_dquot_walk(mp, XFS_DQ_GROUP, xfs_qm_flush_one,
1548 					   &buffer_list);
1549 		if (!error)
1550 			error = error2;
1551 	}
1552 	if (XFS_IS_PQUOTA_ON(mp)) {
1553 		error2 = xfs_qm_dquot_walk(mp, XFS_DQ_PROJ, xfs_qm_flush_one,
1554 					   &buffer_list);
1555 		if (!error)
1556 			error = error2;
1557 	}
1558 
1559 	error2 = xfs_buf_delwri_submit(&buffer_list);
1560 	if (!error)
1561 		error = error2;
1562 
1563 	/*
1564 	 * We can get this error if we couldn't do a dquot allocation inside
1565 	 * xfs_qm_dqusage_adjust (via bulkstat). We don't care about the
1566 	 * dirty dquots that might be cached, we just want to get rid of them
1567 	 * and turn quotaoff. The dquots won't be attached to any of the inodes
1568 	 * at this point (because we intentionally didn't in dqget_noattach).
1569 	 */
1570 	if (error) {
1571 		xfs_qm_dqpurge_all(mp, XFS_QMOPT_QUOTALL);
1572 		goto error_return;
1573 	}
1574 
1575 	/*
1576 	 * If one type of quotas is off, then it will lose its
1577 	 * quotachecked status, since we won't be doing accounting for
1578 	 * that type anymore.
1579 	 */
1580 	mp->m_qflags &= ~XFS_ALL_QUOTA_CHKD;
1581 	mp->m_qflags |= flags;
1582 
1583  error_return:
1584 	while (!list_empty(&buffer_list)) {
1585 		struct xfs_buf *bp =
1586 			list_first_entry(&buffer_list, struct xfs_buf, b_list);
1587 		list_del_init(&bp->b_list);
1588 		xfs_buf_relse(bp);
1589 	}
1590 
1591 	if (error) {
1592 		xfs_warn(mp,
1593 	"Quotacheck: Unsuccessful (Error %d): Disabling quotas.",
1594 			error);
1595 		/*
1596 		 * We must turn off quotas.
1597 		 */
1598 		ASSERT(mp->m_quotainfo != NULL);
1599 		xfs_qm_destroy_quotainfo(mp);
1600 		if (xfs_mount_reset_sbqflags(mp)) {
1601 			xfs_warn(mp,
1602 				"Quotacheck: Failed to reset quota flags.");
1603 		}
1604 	} else
1605 		xfs_notice(mp, "Quotacheck: Done.");
1606 	return (error);
1607 }
1608 
1609 /*
1610  * This is called after the superblock has been read in and we're ready to
1611  * iget the quota inodes.
1612  */
1613 STATIC int
1614 xfs_qm_init_quotainos(
1615 	xfs_mount_t	*mp)
1616 {
1617 	struct xfs_inode	*uip = NULL;
1618 	struct xfs_inode	*gip = NULL;
1619 	struct xfs_inode	*pip = NULL;
1620 	int			error;
1621 	__int64_t		sbflags = 0;
1622 	uint			flags = 0;
1623 
1624 	ASSERT(mp->m_quotainfo);
1625 
1626 	/*
1627 	 * Get the uquota and gquota inodes
1628 	 */
1629 	if (xfs_sb_version_hasquota(&mp->m_sb)) {
1630 		if (XFS_IS_UQUOTA_ON(mp) &&
1631 		    mp->m_sb.sb_uquotino != NULLFSINO) {
1632 			ASSERT(mp->m_sb.sb_uquotino > 0);
1633 			error = xfs_iget(mp, NULL, mp->m_sb.sb_uquotino,
1634 					     0, 0, &uip);
1635 			if (error)
1636 				return XFS_ERROR(error);
1637 		}
1638 		if (XFS_IS_GQUOTA_ON(mp) &&
1639 		    mp->m_sb.sb_gquotino != NULLFSINO) {
1640 			ASSERT(mp->m_sb.sb_gquotino > 0);
1641 			error = xfs_iget(mp, NULL, mp->m_sb.sb_gquotino,
1642 					     0, 0, &gip);
1643 			if (error)
1644 				goto error_rele;
1645 		}
1646 		if (XFS_IS_PQUOTA_ON(mp) &&
1647 		    mp->m_sb.sb_pquotino != NULLFSINO) {
1648 			ASSERT(mp->m_sb.sb_pquotino > 0);
1649 			error = xfs_iget(mp, NULL, mp->m_sb.sb_pquotino,
1650 					     0, 0, &pip);
1651 			if (error)
1652 				goto error_rele;
1653 		}
1654 	} else {
1655 		flags |= XFS_QMOPT_SBVERSION;
1656 		sbflags |= (XFS_SB_VERSIONNUM | XFS_SB_UQUOTINO |
1657 			    XFS_SB_GQUOTINO | XFS_SB_PQUOTINO |
1658 			    XFS_SB_QFLAGS);
1659 	}
1660 
1661 	/*
1662 	 * Create the three inodes, if they don't exist already. The changes
1663 	 * made above will get added to a transaction and logged in one of
1664 	 * the qino_alloc calls below.  If the device is readonly,
1665 	 * temporarily switch to read-write to do this.
1666 	 */
1667 	if (XFS_IS_UQUOTA_ON(mp) && uip == NULL) {
1668 		error = xfs_qm_qino_alloc(mp, &uip,
1669 					      sbflags | XFS_SB_UQUOTINO,
1670 					      flags | XFS_QMOPT_UQUOTA);
1671 		if (error)
1672 			goto error_rele;
1673 
1674 		flags &= ~XFS_QMOPT_SBVERSION;
1675 	}
1676 	if (XFS_IS_GQUOTA_ON(mp) && gip == NULL) {
1677 		error = xfs_qm_qino_alloc(mp, &gip,
1678 					  sbflags | XFS_SB_GQUOTINO,
1679 					  flags | XFS_QMOPT_GQUOTA);
1680 		if (error)
1681 			goto error_rele;
1682 
1683 		flags &= ~XFS_QMOPT_SBVERSION;
1684 	}
1685 	if (XFS_IS_PQUOTA_ON(mp) && pip == NULL) {
1686 		error = xfs_qm_qino_alloc(mp, &pip,
1687 					  sbflags | XFS_SB_PQUOTINO,
1688 					  flags | XFS_QMOPT_PQUOTA);
1689 		if (error)
1690 			goto error_rele;
1691 	}
1692 
1693 	mp->m_quotainfo->qi_uquotaip = uip;
1694 	mp->m_quotainfo->qi_gquotaip = gip;
1695 	mp->m_quotainfo->qi_pquotaip = pip;
1696 
1697 	return 0;
1698 
1699 error_rele:
1700 	if (uip)
1701 		IRELE(uip);
1702 	if (gip)
1703 		IRELE(gip);
1704 	if (pip)
1705 		IRELE(pip);
1706 	return XFS_ERROR(error);
1707 }
1708 
1709 STATIC void
1710 xfs_qm_dqfree_one(
1711 	struct xfs_dquot	*dqp)
1712 {
1713 	struct xfs_mount	*mp = dqp->q_mount;
1714 	struct xfs_quotainfo	*qi = mp->m_quotainfo;
1715 
1716 	mutex_lock(&qi->qi_tree_lock);
1717 	radix_tree_delete(xfs_dquot_tree(qi, dqp->q_core.d_flags),
1718 			  be32_to_cpu(dqp->q_core.d_id));
1719 
1720 	qi->qi_dquots--;
1721 	mutex_unlock(&qi->qi_tree_lock);
1722 
1723 	xfs_qm_dqdestroy(dqp);
1724 }
1725 
1726 /*
1727  * Start a transaction and write the incore superblock changes to
1728  * disk. flags parameter indicates which fields have changed.
1729  */
1730 int
1731 xfs_qm_write_sb_changes(
1732 	xfs_mount_t	*mp,
1733 	__int64_t	flags)
1734 {
1735 	xfs_trans_t	*tp;
1736 	int		error;
1737 
1738 	tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SBCHANGE);
1739 	error = xfs_trans_reserve(tp, &M_RES(mp)->tr_qm_sbchange, 0, 0);
1740 	if (error) {
1741 		xfs_trans_cancel(tp, 0);
1742 		return error;
1743 	}
1744 
1745 	xfs_mod_sb(tp, flags);
1746 	error = xfs_trans_commit(tp, 0);
1747 
1748 	return error;
1749 }
1750 
1751 
1752 /* --------------- utility functions for vnodeops ---------------- */
1753 
1754 
1755 /*
1756  * Given an inode, a uid, gid and prid make sure that we have
1757  * allocated relevant dquot(s) on disk, and that we won't exceed inode
1758  * quotas by creating this file.
1759  * This also attaches dquot(s) to the given inode after locking it,
1760  * and returns the dquots corresponding to the uid and/or gid.
1761  *
1762  * in	: inode (unlocked)
1763  * out	: udquot, gdquot with references taken and unlocked
1764  */
1765 int
1766 xfs_qm_vop_dqalloc(
1767 	struct xfs_inode	*ip,
1768 	xfs_dqid_t		uid,
1769 	xfs_dqid_t		gid,
1770 	prid_t			prid,
1771 	uint			flags,
1772 	struct xfs_dquot	**O_udqpp,
1773 	struct xfs_dquot	**O_gdqpp,
1774 	struct xfs_dquot	**O_pdqpp)
1775 {
1776 	struct xfs_mount	*mp = ip->i_mount;
1777 	struct xfs_dquot	*uq = NULL;
1778 	struct xfs_dquot	*gq = NULL;
1779 	struct xfs_dquot	*pq = NULL;
1780 	int			error;
1781 	uint			lockflags;
1782 
1783 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
1784 		return 0;
1785 
1786 	lockflags = XFS_ILOCK_EXCL;
1787 	xfs_ilock(ip, lockflags);
1788 
1789 	if ((flags & XFS_QMOPT_INHERIT) && XFS_INHERIT_GID(ip))
1790 		gid = ip->i_d.di_gid;
1791 
1792 	/*
1793 	 * Attach the dquot(s) to this inode, doing a dquot allocation
1794 	 * if necessary. The dquot(s) will not be locked.
1795 	 */
1796 	if (XFS_NOT_DQATTACHED(mp, ip)) {
1797 		error = xfs_qm_dqattach_locked(ip, XFS_QMOPT_DQALLOC);
1798 		if (error) {
1799 			xfs_iunlock(ip, lockflags);
1800 			return error;
1801 		}
1802 	}
1803 
1804 	if ((flags & XFS_QMOPT_UQUOTA) && XFS_IS_UQUOTA_ON(mp)) {
1805 		if (ip->i_d.di_uid != uid) {
1806 			/*
1807 			 * What we need is the dquot that has this uid, and
1808 			 * if we send the inode to dqget, the uid of the inode
1809 			 * takes priority over what's sent in the uid argument.
1810 			 * We must unlock inode here before calling dqget if
1811 			 * we're not sending the inode, because otherwise
1812 			 * we'll deadlock by doing trans_reserve while
1813 			 * holding ilock.
1814 			 */
1815 			xfs_iunlock(ip, lockflags);
1816 			error = xfs_qm_dqget(mp, NULL, uid,
1817 						 XFS_DQ_USER,
1818 						 XFS_QMOPT_DQALLOC |
1819 						 XFS_QMOPT_DOWARN,
1820 						 &uq);
1821 			if (error) {
1822 				ASSERT(error != ENOENT);
1823 				return error;
1824 			}
1825 			/*
1826 			 * Get the ilock in the right order.
1827 			 */
1828 			xfs_dqunlock(uq);
1829 			lockflags = XFS_ILOCK_SHARED;
1830 			xfs_ilock(ip, lockflags);
1831 		} else {
1832 			/*
1833 			 * Take an extra reference, because we'll return
1834 			 * this to caller
1835 			 */
1836 			ASSERT(ip->i_udquot);
1837 			uq = xfs_qm_dqhold(ip->i_udquot);
1838 		}
1839 	}
1840 	if ((flags & XFS_QMOPT_GQUOTA) && XFS_IS_GQUOTA_ON(mp)) {
1841 		if (ip->i_d.di_gid != gid) {
1842 			xfs_iunlock(ip, lockflags);
1843 			error = xfs_qm_dqget(mp, NULL, gid,
1844 						 XFS_DQ_GROUP,
1845 						 XFS_QMOPT_DQALLOC |
1846 						 XFS_QMOPT_DOWARN,
1847 						 &gq);
1848 			if (error) {
1849 				ASSERT(error != ENOENT);
1850 				goto error_rele;
1851 			}
1852 			xfs_dqunlock(gq);
1853 			lockflags = XFS_ILOCK_SHARED;
1854 			xfs_ilock(ip, lockflags);
1855 		} else {
1856 			ASSERT(ip->i_gdquot);
1857 			gq = xfs_qm_dqhold(ip->i_gdquot);
1858 		}
1859 	}
1860 	if ((flags & XFS_QMOPT_PQUOTA) && XFS_IS_PQUOTA_ON(mp)) {
1861 		if (xfs_get_projid(ip) != prid) {
1862 			xfs_iunlock(ip, lockflags);
1863 			error = xfs_qm_dqget(mp, NULL, (xfs_dqid_t)prid,
1864 						 XFS_DQ_PROJ,
1865 						 XFS_QMOPT_DQALLOC |
1866 						 XFS_QMOPT_DOWARN,
1867 						 &pq);
1868 			if (error) {
1869 				ASSERT(error != ENOENT);
1870 				goto error_rele;
1871 			}
1872 			xfs_dqunlock(pq);
1873 			lockflags = XFS_ILOCK_SHARED;
1874 			xfs_ilock(ip, lockflags);
1875 		} else {
1876 			ASSERT(ip->i_pdquot);
1877 			pq = xfs_qm_dqhold(ip->i_pdquot);
1878 		}
1879 	}
1880 	if (uq)
1881 		trace_xfs_dquot_dqalloc(ip);
1882 
1883 	xfs_iunlock(ip, lockflags);
1884 	if (O_udqpp)
1885 		*O_udqpp = uq;
1886 	else if (uq)
1887 		xfs_qm_dqrele(uq);
1888 	if (O_gdqpp)
1889 		*O_gdqpp = gq;
1890 	else if (gq)
1891 		xfs_qm_dqrele(gq);
1892 	if (O_pdqpp)
1893 		*O_pdqpp = pq;
1894 	else if (pq)
1895 		xfs_qm_dqrele(pq);
1896 	return 0;
1897 
1898 error_rele:
1899 	if (gq)
1900 		xfs_qm_dqrele(gq);
1901 	if (uq)
1902 		xfs_qm_dqrele(uq);
1903 	return error;
1904 }
1905 
1906 /*
1907  * Actually transfer ownership, and do dquot modifications.
1908  * These were already reserved.
1909  */
1910 xfs_dquot_t *
1911 xfs_qm_vop_chown(
1912 	xfs_trans_t	*tp,
1913 	xfs_inode_t	*ip,
1914 	xfs_dquot_t	**IO_olddq,
1915 	xfs_dquot_t	*newdq)
1916 {
1917 	xfs_dquot_t	*prevdq;
1918 	uint		bfield = XFS_IS_REALTIME_INODE(ip) ?
1919 				 XFS_TRANS_DQ_RTBCOUNT : XFS_TRANS_DQ_BCOUNT;
1920 
1921 
1922 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1923 	ASSERT(XFS_IS_QUOTA_RUNNING(ip->i_mount));
1924 
1925 	/* old dquot */
1926 	prevdq = *IO_olddq;
1927 	ASSERT(prevdq);
1928 	ASSERT(prevdq != newdq);
1929 
1930 	xfs_trans_mod_dquot(tp, prevdq, bfield, -(ip->i_d.di_nblocks));
1931 	xfs_trans_mod_dquot(tp, prevdq, XFS_TRANS_DQ_ICOUNT, -1);
1932 
1933 	/* the sparkling new dquot */
1934 	xfs_trans_mod_dquot(tp, newdq, bfield, ip->i_d.di_nblocks);
1935 	xfs_trans_mod_dquot(tp, newdq, XFS_TRANS_DQ_ICOUNT, 1);
1936 
1937 	/*
1938 	 * Take an extra reference, because the inode is going to keep
1939 	 * this dquot pointer even after the trans_commit.
1940 	 */
1941 	*IO_olddq = xfs_qm_dqhold(newdq);
1942 
1943 	return prevdq;
1944 }
1945 
1946 /*
1947  * Quota reservations for setattr(AT_UID|AT_GID|AT_PROJID).
1948  */
1949 int
1950 xfs_qm_vop_chown_reserve(
1951 	struct xfs_trans	*tp,
1952 	struct xfs_inode	*ip,
1953 	struct xfs_dquot	*udqp,
1954 	struct xfs_dquot	*gdqp,
1955 	struct xfs_dquot	*pdqp,
1956 	uint			flags)
1957 {
1958 	struct xfs_mount	*mp = ip->i_mount;
1959 	uint			delblks, blkflags, prjflags = 0;
1960 	struct xfs_dquot	*udq_unres = NULL;
1961 	struct xfs_dquot	*gdq_unres = NULL;
1962 	struct xfs_dquot	*pdq_unres = NULL;
1963 	struct xfs_dquot	*udq_delblks = NULL;
1964 	struct xfs_dquot	*gdq_delblks = NULL;
1965 	struct xfs_dquot	*pdq_delblks = NULL;
1966 	int			error;
1967 
1968 
1969 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL|XFS_ILOCK_SHARED));
1970 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
1971 
1972 	delblks = ip->i_delayed_blks;
1973 	blkflags = XFS_IS_REALTIME_INODE(ip) ?
1974 			XFS_QMOPT_RES_RTBLKS : XFS_QMOPT_RES_REGBLKS;
1975 
1976 	if (XFS_IS_UQUOTA_ON(mp) && udqp &&
1977 	    ip->i_d.di_uid != be32_to_cpu(udqp->q_core.d_id)) {
1978 		udq_delblks = udqp;
1979 		/*
1980 		 * If there are delayed allocation blocks, then we have to
1981 		 * unreserve those from the old dquot, and add them to the
1982 		 * new dquot.
1983 		 */
1984 		if (delblks) {
1985 			ASSERT(ip->i_udquot);
1986 			udq_unres = ip->i_udquot;
1987 		}
1988 	}
1989 	if (XFS_IS_GQUOTA_ON(ip->i_mount) && gdqp &&
1990 	    ip->i_d.di_gid != be32_to_cpu(gdqp->q_core.d_id)) {
1991 		gdq_delblks = gdqp;
1992 		if (delblks) {
1993 			ASSERT(ip->i_gdquot);
1994 			gdq_unres = ip->i_gdquot;
1995 		}
1996 	}
1997 
1998 	if (XFS_IS_PQUOTA_ON(ip->i_mount) && pdqp &&
1999 	    xfs_get_projid(ip) != be32_to_cpu(pdqp->q_core.d_id)) {
2000 		prjflags = XFS_QMOPT_ENOSPC;
2001 		pdq_delblks = pdqp;
2002 		if (delblks) {
2003 			ASSERT(ip->i_pdquot);
2004 			pdq_unres = ip->i_pdquot;
2005 		}
2006 	}
2007 
2008 	error = xfs_trans_reserve_quota_bydquots(tp, ip->i_mount,
2009 				udq_delblks, gdq_delblks, pdq_delblks,
2010 				ip->i_d.di_nblocks, 1,
2011 				flags | blkflags | prjflags);
2012 	if (error)
2013 		return error;
2014 
2015 	/*
2016 	 * Do the delayed blks reservations/unreservations now. Since, these
2017 	 * are done without the help of a transaction, if a reservation fails
2018 	 * its previous reservations won't be automatically undone by trans
2019 	 * code. So, we have to do it manually here.
2020 	 */
2021 	if (delblks) {
2022 		/*
2023 		 * Do the reservations first. Unreservation can't fail.
2024 		 */
2025 		ASSERT(udq_delblks || gdq_delblks || pdq_delblks);
2026 		ASSERT(udq_unres || gdq_unres || pdq_unres);
2027 		error = xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2028 			    udq_delblks, gdq_delblks, pdq_delblks,
2029 			    (xfs_qcnt_t)delblks, 0,
2030 			    flags | blkflags | prjflags);
2031 		if (error)
2032 			return error;
2033 		xfs_trans_reserve_quota_bydquots(NULL, ip->i_mount,
2034 				udq_unres, gdq_unres, pdq_unres,
2035 				-((xfs_qcnt_t)delblks), 0, blkflags);
2036 	}
2037 
2038 	return (0);
2039 }
2040 
2041 int
2042 xfs_qm_vop_rename_dqattach(
2043 	struct xfs_inode	**i_tab)
2044 {
2045 	struct xfs_mount	*mp = i_tab[0]->i_mount;
2046 	int			i;
2047 
2048 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2049 		return 0;
2050 
2051 	for (i = 0; (i < 4 && i_tab[i]); i++) {
2052 		struct xfs_inode	*ip = i_tab[i];
2053 		int			error;
2054 
2055 		/*
2056 		 * Watch out for duplicate entries in the table.
2057 		 */
2058 		if (i == 0 || ip != i_tab[i-1]) {
2059 			if (XFS_NOT_DQATTACHED(mp, ip)) {
2060 				error = xfs_qm_dqattach(ip, 0);
2061 				if (error)
2062 					return error;
2063 			}
2064 		}
2065 	}
2066 	return 0;
2067 }
2068 
2069 void
2070 xfs_qm_vop_create_dqattach(
2071 	struct xfs_trans	*tp,
2072 	struct xfs_inode	*ip,
2073 	struct xfs_dquot	*udqp,
2074 	struct xfs_dquot	*gdqp,
2075 	struct xfs_dquot	*pdqp)
2076 {
2077 	struct xfs_mount	*mp = tp->t_mountp;
2078 
2079 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
2080 		return;
2081 
2082 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
2083 	ASSERT(XFS_IS_QUOTA_RUNNING(mp));
2084 
2085 	if (udqp) {
2086 		ASSERT(ip->i_udquot == NULL);
2087 		ASSERT(XFS_IS_UQUOTA_ON(mp));
2088 		ASSERT(ip->i_d.di_uid == be32_to_cpu(udqp->q_core.d_id));
2089 
2090 		ip->i_udquot = xfs_qm_dqhold(udqp);
2091 		xfs_trans_mod_dquot(tp, udqp, XFS_TRANS_DQ_ICOUNT, 1);
2092 	}
2093 	if (gdqp) {
2094 		ASSERT(ip->i_gdquot == NULL);
2095 		ASSERT(XFS_IS_GQUOTA_ON(mp));
2096 		ASSERT(ip->i_d.di_gid == be32_to_cpu(gdqp->q_core.d_id));
2097 		ip->i_gdquot = xfs_qm_dqhold(gdqp);
2098 		xfs_trans_mod_dquot(tp, gdqp, XFS_TRANS_DQ_ICOUNT, 1);
2099 	}
2100 	if (pdqp) {
2101 		ASSERT(ip->i_pdquot == NULL);
2102 		ASSERT(XFS_IS_PQUOTA_ON(mp));
2103 		ASSERT(xfs_get_projid(ip) == be32_to_cpu(pdqp->q_core.d_id));
2104 
2105 		ip->i_pdquot = xfs_qm_dqhold(pdqp);
2106 		xfs_trans_mod_dquot(tp, pdqp, XFS_TRANS_DQ_ICOUNT, 1);
2107 	}
2108 }
2109 
2110