1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2003 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_format.h" 9 #include "xfs_log_format.h" 10 #include "xfs_shared.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_bit.h" 13 #include "xfs_mount.h" 14 #include "xfs_defer.h" 15 #include "xfs_inode.h" 16 #include "xfs_bmap.h" 17 #include "xfs_quota.h" 18 #include "xfs_trans.h" 19 #include "xfs_buf_item.h" 20 #include "xfs_trans_space.h" 21 #include "xfs_trans_priv.h" 22 #include "xfs_qm.h" 23 #include "xfs_trace.h" 24 #include "xfs_log.h" 25 #include "xfs_bmap_btree.h" 26 27 /* 28 * Lock order: 29 * 30 * ip->i_lock 31 * qi->qi_tree_lock 32 * dquot->q_qlock (xfs_dqlock() and friends) 33 * dquot->q_flush (xfs_dqflock() and friends) 34 * qi->qi_lru_lock 35 * 36 * If two dquots need to be locked the order is user before group/project, 37 * otherwise by the lowest id first, see xfs_dqlock2. 38 */ 39 40 struct kmem_zone *xfs_qm_dqtrxzone; 41 static struct kmem_zone *xfs_qm_dqzone; 42 43 static struct lock_class_key xfs_dquot_group_class; 44 static struct lock_class_key xfs_dquot_project_class; 45 46 /* 47 * This is called to free all the memory associated with a dquot 48 */ 49 void 50 xfs_qm_dqdestroy( 51 struct xfs_dquot *dqp) 52 { 53 ASSERT(list_empty(&dqp->q_lru)); 54 55 kmem_free(dqp->q_logitem.qli_item.li_lv_shadow); 56 mutex_destroy(&dqp->q_qlock); 57 58 XFS_STATS_DEC(dqp->q_mount, xs_qm_dquot); 59 kmem_cache_free(xfs_qm_dqzone, dqp); 60 } 61 62 /* 63 * If default limits are in force, push them into the dquot now. 64 * We overwrite the dquot limits only if they are zero and this 65 * is not the root dquot. 66 */ 67 void 68 xfs_qm_adjust_dqlimits( 69 struct xfs_mount *mp, 70 struct xfs_dquot *dq) 71 { 72 struct xfs_quotainfo *q = mp->m_quotainfo; 73 struct xfs_disk_dquot *d = &dq->q_core; 74 struct xfs_def_quota *defq; 75 int prealloc = 0; 76 77 ASSERT(d->d_id); 78 defq = xfs_get_defquota(q, xfs_dquot_type(dq)); 79 80 if (defq->bsoftlimit && !d->d_blk_softlimit) { 81 d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit); 82 prealloc = 1; 83 } 84 if (defq->bhardlimit && !d->d_blk_hardlimit) { 85 d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit); 86 prealloc = 1; 87 } 88 if (defq->isoftlimit && !d->d_ino_softlimit) 89 d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit); 90 if (defq->ihardlimit && !d->d_ino_hardlimit) 91 d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit); 92 if (defq->rtbsoftlimit && !d->d_rtb_softlimit) 93 d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit); 94 if (defq->rtbhardlimit && !d->d_rtb_hardlimit) 95 d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit); 96 97 if (prealloc) 98 xfs_dquot_set_prealloc_limits(dq); 99 } 100 101 /* 102 * Check the limits and timers of a dquot and start or reset timers 103 * if necessary. 104 * This gets called even when quota enforcement is OFF, which makes our 105 * life a little less complicated. (We just don't reject any quota 106 * reservations in that case, when enforcement is off). 107 * We also return 0 as the values of the timers in Q_GETQUOTA calls, when 108 * enforcement's off. 109 * In contrast, warnings are a little different in that they don't 110 * 'automatically' get started when limits get exceeded. They do 111 * get reset to zero, however, when we find the count to be under 112 * the soft limit (they are only ever set non-zero via userspace). 113 */ 114 void 115 xfs_qm_adjust_dqtimers( 116 struct xfs_mount *mp, 117 struct xfs_dquot *dq) 118 { 119 struct xfs_quotainfo *qi = mp->m_quotainfo; 120 struct xfs_disk_dquot *d = &dq->q_core; 121 struct xfs_def_quota *defq; 122 123 ASSERT(d->d_id); 124 defq = xfs_get_defquota(qi, xfs_dquot_type(dq)); 125 126 #ifdef DEBUG 127 if (d->d_blk_hardlimit) 128 ASSERT(be64_to_cpu(d->d_blk_softlimit) <= 129 be64_to_cpu(d->d_blk_hardlimit)); 130 if (d->d_ino_hardlimit) 131 ASSERT(be64_to_cpu(d->d_ino_softlimit) <= 132 be64_to_cpu(d->d_ino_hardlimit)); 133 if (d->d_rtb_hardlimit) 134 ASSERT(be64_to_cpu(d->d_rtb_softlimit) <= 135 be64_to_cpu(d->d_rtb_hardlimit)); 136 #endif 137 138 if (!d->d_btimer) { 139 if ((d->d_blk_softlimit && 140 (be64_to_cpu(d->d_bcount) > 141 be64_to_cpu(d->d_blk_softlimit))) || 142 (d->d_blk_hardlimit && 143 (be64_to_cpu(d->d_bcount) > 144 be64_to_cpu(d->d_blk_hardlimit)))) { 145 d->d_btimer = cpu_to_be32(ktime_get_real_seconds() + 146 defq->btimelimit); 147 } else { 148 d->d_bwarns = 0; 149 } 150 } else { 151 if ((!d->d_blk_softlimit || 152 (be64_to_cpu(d->d_bcount) <= 153 be64_to_cpu(d->d_blk_softlimit))) && 154 (!d->d_blk_hardlimit || 155 (be64_to_cpu(d->d_bcount) <= 156 be64_to_cpu(d->d_blk_hardlimit)))) { 157 d->d_btimer = 0; 158 } 159 } 160 161 if (!d->d_itimer) { 162 if ((d->d_ino_softlimit && 163 (be64_to_cpu(d->d_icount) > 164 be64_to_cpu(d->d_ino_softlimit))) || 165 (d->d_ino_hardlimit && 166 (be64_to_cpu(d->d_icount) > 167 be64_to_cpu(d->d_ino_hardlimit)))) { 168 d->d_itimer = cpu_to_be32(ktime_get_real_seconds() + 169 defq->itimelimit); 170 } else { 171 d->d_iwarns = 0; 172 } 173 } else { 174 if ((!d->d_ino_softlimit || 175 (be64_to_cpu(d->d_icount) <= 176 be64_to_cpu(d->d_ino_softlimit))) && 177 (!d->d_ino_hardlimit || 178 (be64_to_cpu(d->d_icount) <= 179 be64_to_cpu(d->d_ino_hardlimit)))) { 180 d->d_itimer = 0; 181 } 182 } 183 184 if (!d->d_rtbtimer) { 185 if ((d->d_rtb_softlimit && 186 (be64_to_cpu(d->d_rtbcount) > 187 be64_to_cpu(d->d_rtb_softlimit))) || 188 (d->d_rtb_hardlimit && 189 (be64_to_cpu(d->d_rtbcount) > 190 be64_to_cpu(d->d_rtb_hardlimit)))) { 191 d->d_rtbtimer = cpu_to_be32(ktime_get_real_seconds() + 192 defq->rtbtimelimit); 193 } else { 194 d->d_rtbwarns = 0; 195 } 196 } else { 197 if ((!d->d_rtb_softlimit || 198 (be64_to_cpu(d->d_rtbcount) <= 199 be64_to_cpu(d->d_rtb_softlimit))) && 200 (!d->d_rtb_hardlimit || 201 (be64_to_cpu(d->d_rtbcount) <= 202 be64_to_cpu(d->d_rtb_hardlimit)))) { 203 d->d_rtbtimer = 0; 204 } 205 } 206 } 207 208 /* 209 * initialize a buffer full of dquots and log the whole thing 210 */ 211 STATIC void 212 xfs_qm_init_dquot_blk( 213 struct xfs_trans *tp, 214 struct xfs_mount *mp, 215 xfs_dqid_t id, 216 uint type, 217 struct xfs_buf *bp) 218 { 219 struct xfs_quotainfo *q = mp->m_quotainfo; 220 struct xfs_dqblk *d; 221 xfs_dqid_t curid; 222 unsigned int qflag; 223 unsigned int blftype; 224 int i; 225 226 ASSERT(tp); 227 ASSERT(xfs_buf_islocked(bp)); 228 229 d = bp->b_addr; 230 231 /* 232 * ID of the first dquot in the block - id's are zero based. 233 */ 234 curid = id - (id % q->qi_dqperchunk); 235 memset(d, 0, BBTOB(q->qi_dqchunklen)); 236 for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { 237 d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); 238 d->dd_diskdq.d_version = XFS_DQUOT_VERSION; 239 d->dd_diskdq.d_id = cpu_to_be32(curid); 240 d->dd_diskdq.d_flags = type; 241 if (xfs_sb_version_hascrc(&mp->m_sb)) { 242 uuid_copy(&d->dd_uuid, &mp->m_sb.sb_meta_uuid); 243 xfs_update_cksum((char *)d, sizeof(struct xfs_dqblk), 244 XFS_DQUOT_CRC_OFF); 245 } 246 } 247 248 if (type & XFS_DQ_USER) { 249 qflag = XFS_UQUOTA_CHKD; 250 blftype = XFS_BLF_UDQUOT_BUF; 251 } else if (type & XFS_DQ_PROJ) { 252 qflag = XFS_PQUOTA_CHKD; 253 blftype = XFS_BLF_PDQUOT_BUF; 254 } else { 255 qflag = XFS_GQUOTA_CHKD; 256 blftype = XFS_BLF_GDQUOT_BUF; 257 } 258 259 xfs_trans_dquot_buf(tp, bp, blftype); 260 261 /* 262 * quotacheck uses delayed writes to update all the dquots on disk in an 263 * efficient manner instead of logging the individual dquot changes as 264 * they are made. However if we log the buffer allocated here and crash 265 * after quotacheck while the logged initialisation is still in the 266 * active region of the log, log recovery can replay the dquot buffer 267 * initialisation over the top of the checked dquots and corrupt quota 268 * accounting. 269 * 270 * To avoid this problem, quotacheck cannot log the initialised buffer. 271 * We must still dirty the buffer and write it back before the 272 * allocation transaction clears the log. Therefore, mark the buffer as 273 * ordered instead of logging it directly. This is safe for quotacheck 274 * because it detects and repairs allocated but initialized dquot blocks 275 * in the quota inodes. 276 */ 277 if (!(mp->m_qflags & qflag)) 278 xfs_trans_ordered_buf(tp, bp); 279 else 280 xfs_trans_log_buf(tp, bp, 0, BBTOB(q->qi_dqchunklen) - 1); 281 } 282 283 /* 284 * Initialize the dynamic speculative preallocation thresholds. The lo/hi 285 * watermarks correspond to the soft and hard limits by default. If a soft limit 286 * is not specified, we use 95% of the hard limit. 287 */ 288 void 289 xfs_dquot_set_prealloc_limits(struct xfs_dquot *dqp) 290 { 291 uint64_t space; 292 293 dqp->q_prealloc_hi_wmark = be64_to_cpu(dqp->q_core.d_blk_hardlimit); 294 dqp->q_prealloc_lo_wmark = be64_to_cpu(dqp->q_core.d_blk_softlimit); 295 if (!dqp->q_prealloc_lo_wmark) { 296 dqp->q_prealloc_lo_wmark = dqp->q_prealloc_hi_wmark; 297 do_div(dqp->q_prealloc_lo_wmark, 100); 298 dqp->q_prealloc_lo_wmark *= 95; 299 } 300 301 space = dqp->q_prealloc_hi_wmark; 302 303 do_div(space, 100); 304 dqp->q_low_space[XFS_QLOWSP_1_PCNT] = space; 305 dqp->q_low_space[XFS_QLOWSP_3_PCNT] = space * 3; 306 dqp->q_low_space[XFS_QLOWSP_5_PCNT] = space * 5; 307 } 308 309 /* 310 * Ensure that the given in-core dquot has a buffer on disk backing it, and 311 * return the buffer locked and held. This is called when the bmapi finds a 312 * hole. 313 */ 314 STATIC int 315 xfs_dquot_disk_alloc( 316 struct xfs_trans **tpp, 317 struct xfs_dquot *dqp, 318 struct xfs_buf **bpp) 319 { 320 struct xfs_bmbt_irec map; 321 struct xfs_trans *tp = *tpp; 322 struct xfs_mount *mp = tp->t_mountp; 323 struct xfs_buf *bp; 324 struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); 325 int nmaps = 1; 326 int error; 327 328 trace_xfs_dqalloc(dqp); 329 330 xfs_ilock(quotip, XFS_ILOCK_EXCL); 331 if (!xfs_this_quota_on(dqp->q_mount, dqp->dq_flags)) { 332 /* 333 * Return if this type of quotas is turned off while we didn't 334 * have an inode lock 335 */ 336 xfs_iunlock(quotip, XFS_ILOCK_EXCL); 337 return -ESRCH; 338 } 339 340 /* Create the block mapping. */ 341 xfs_trans_ijoin(tp, quotip, XFS_ILOCK_EXCL); 342 error = xfs_bmapi_write(tp, quotip, dqp->q_fileoffset, 343 XFS_DQUOT_CLUSTER_SIZE_FSB, XFS_BMAPI_METADATA, 0, &map, 344 &nmaps); 345 if (error) 346 return error; 347 ASSERT(map.br_blockcount == XFS_DQUOT_CLUSTER_SIZE_FSB); 348 ASSERT(nmaps == 1); 349 ASSERT((map.br_startblock != DELAYSTARTBLOCK) && 350 (map.br_startblock != HOLESTARTBLOCK)); 351 352 /* 353 * Keep track of the blkno to save a lookup later 354 */ 355 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 356 357 /* now we can just get the buffer (there's nothing to read yet) */ 358 error = xfs_trans_get_buf(tp, mp->m_ddev_targp, dqp->q_blkno, 359 mp->m_quotainfo->qi_dqchunklen, 0, &bp); 360 if (error) 361 return error; 362 bp->b_ops = &xfs_dquot_buf_ops; 363 364 /* 365 * Make a chunk of dquots out of this buffer and log 366 * the entire thing. 367 */ 368 xfs_qm_init_dquot_blk(tp, mp, be32_to_cpu(dqp->q_core.d_id), 369 dqp->dq_flags & XFS_DQ_ALLTYPES, bp); 370 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 371 372 /* 373 * Hold the buffer and join it to the dfops so that we'll still own 374 * the buffer when we return to the caller. The buffer disposal on 375 * error must be paid attention to very carefully, as it has been 376 * broken since commit efa092f3d4c6 "[XFS] Fixes a bug in the quota 377 * code when allocating a new dquot record" in 2005, and the later 378 * conversion to xfs_defer_ops in commit 310a75a3c6c747 failed to keep 379 * the buffer locked across the _defer_finish call. We can now do 380 * this correctly with xfs_defer_bjoin. 381 * 382 * Above, we allocated a disk block for the dquot information and used 383 * get_buf to initialize the dquot. If the _defer_finish fails, the old 384 * transaction is gone but the new buffer is not joined or held to any 385 * transaction, so we must _buf_relse it. 386 * 387 * If everything succeeds, the caller of this function is returned a 388 * buffer that is locked and held to the transaction. The caller 389 * is responsible for unlocking any buffer passed back, either 390 * manually or by committing the transaction. On error, the buffer is 391 * released and not passed back. 392 */ 393 xfs_trans_bhold(tp, bp); 394 error = xfs_defer_finish(tpp); 395 if (error) { 396 xfs_trans_bhold_release(*tpp, bp); 397 xfs_trans_brelse(*tpp, bp); 398 return error; 399 } 400 *bpp = bp; 401 return 0; 402 } 403 404 /* 405 * Read in the in-core dquot's on-disk metadata and return the buffer. 406 * Returns ENOENT to signal a hole. 407 */ 408 STATIC int 409 xfs_dquot_disk_read( 410 struct xfs_mount *mp, 411 struct xfs_dquot *dqp, 412 struct xfs_buf **bpp) 413 { 414 struct xfs_bmbt_irec map; 415 struct xfs_buf *bp; 416 struct xfs_inode *quotip = xfs_quota_inode(mp, dqp->dq_flags); 417 uint lock_mode; 418 int nmaps = 1; 419 int error; 420 421 lock_mode = xfs_ilock_data_map_shared(quotip); 422 if (!xfs_this_quota_on(mp, dqp->dq_flags)) { 423 /* 424 * Return if this type of quotas is turned off while we 425 * didn't have the quota inode lock. 426 */ 427 xfs_iunlock(quotip, lock_mode); 428 return -ESRCH; 429 } 430 431 /* 432 * Find the block map; no allocations yet 433 */ 434 error = xfs_bmapi_read(quotip, dqp->q_fileoffset, 435 XFS_DQUOT_CLUSTER_SIZE_FSB, &map, &nmaps, 0); 436 xfs_iunlock(quotip, lock_mode); 437 if (error) 438 return error; 439 440 ASSERT(nmaps == 1); 441 ASSERT(map.br_blockcount >= 1); 442 ASSERT(map.br_startblock != DELAYSTARTBLOCK); 443 if (map.br_startblock == HOLESTARTBLOCK) 444 return -ENOENT; 445 446 trace_xfs_dqtobp_read(dqp); 447 448 /* 449 * store the blkno etc so that we don't have to do the 450 * mapping all the time 451 */ 452 dqp->q_blkno = XFS_FSB_TO_DADDR(mp, map.br_startblock); 453 454 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 455 mp->m_quotainfo->qi_dqchunklen, 0, &bp, 456 &xfs_dquot_buf_ops); 457 if (error) { 458 ASSERT(bp == NULL); 459 return error; 460 } 461 462 ASSERT(xfs_buf_islocked(bp)); 463 xfs_buf_set_ref(bp, XFS_DQUOT_REF); 464 *bpp = bp; 465 466 return 0; 467 } 468 469 /* Allocate and initialize everything we need for an incore dquot. */ 470 STATIC struct xfs_dquot * 471 xfs_dquot_alloc( 472 struct xfs_mount *mp, 473 xfs_dqid_t id, 474 uint type) 475 { 476 struct xfs_dquot *dqp; 477 478 dqp = kmem_zone_zalloc(xfs_qm_dqzone, 0); 479 480 dqp->dq_flags = type; 481 dqp->q_core.d_id = cpu_to_be32(id); 482 dqp->q_mount = mp; 483 INIT_LIST_HEAD(&dqp->q_lru); 484 mutex_init(&dqp->q_qlock); 485 init_waitqueue_head(&dqp->q_pinwait); 486 dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; 487 /* 488 * Offset of dquot in the (fixed sized) dquot chunk. 489 */ 490 dqp->q_bufoffset = (id % mp->m_quotainfo->qi_dqperchunk) * 491 sizeof(xfs_dqblk_t); 492 493 /* 494 * Because we want to use a counting completion, complete 495 * the flush completion once to allow a single access to 496 * the flush completion without blocking. 497 */ 498 init_completion(&dqp->q_flush); 499 complete(&dqp->q_flush); 500 501 /* 502 * Make sure group quotas have a different lock class than user 503 * quotas. 504 */ 505 switch (type) { 506 case XFS_DQ_USER: 507 /* uses the default lock class */ 508 break; 509 case XFS_DQ_GROUP: 510 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_group_class); 511 break; 512 case XFS_DQ_PROJ: 513 lockdep_set_class(&dqp->q_qlock, &xfs_dquot_project_class); 514 break; 515 default: 516 ASSERT(0); 517 break; 518 } 519 520 xfs_qm_dquot_logitem_init(dqp); 521 522 XFS_STATS_INC(mp, xs_qm_dquot); 523 return dqp; 524 } 525 526 /* Copy the in-core quota fields in from the on-disk buffer. */ 527 STATIC void 528 xfs_dquot_from_disk( 529 struct xfs_dquot *dqp, 530 struct xfs_buf *bp) 531 { 532 struct xfs_disk_dquot *ddqp = bp->b_addr + dqp->q_bufoffset; 533 534 /* copy everything from disk dquot to the incore dquot */ 535 memcpy(&dqp->q_core, ddqp, sizeof(struct xfs_disk_dquot)); 536 537 /* 538 * Reservation counters are defined as reservation plus current usage 539 * to avoid having to add every time. 540 */ 541 dqp->q_res_bcount = be64_to_cpu(ddqp->d_bcount); 542 dqp->q_res_icount = be64_to_cpu(ddqp->d_icount); 543 dqp->q_res_rtbcount = be64_to_cpu(ddqp->d_rtbcount); 544 545 /* initialize the dquot speculative prealloc thresholds */ 546 xfs_dquot_set_prealloc_limits(dqp); 547 } 548 549 /* Allocate and initialize the dquot buffer for this in-core dquot. */ 550 static int 551 xfs_qm_dqread_alloc( 552 struct xfs_mount *mp, 553 struct xfs_dquot *dqp, 554 struct xfs_buf **bpp) 555 { 556 struct xfs_trans *tp; 557 int error; 558 559 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_qm_dqalloc, 560 XFS_QM_DQALLOC_SPACE_RES(mp), 0, 0, &tp); 561 if (error) 562 goto err; 563 564 error = xfs_dquot_disk_alloc(&tp, dqp, bpp); 565 if (error) 566 goto err_cancel; 567 568 error = xfs_trans_commit(tp); 569 if (error) { 570 /* 571 * Buffer was held to the transaction, so we have to unlock it 572 * manually here because we're not passing it back. 573 */ 574 xfs_buf_relse(*bpp); 575 *bpp = NULL; 576 goto err; 577 } 578 return 0; 579 580 err_cancel: 581 xfs_trans_cancel(tp); 582 err: 583 return error; 584 } 585 586 /* 587 * Read in the ondisk dquot using dqtobp() then copy it to an incore version, 588 * and release the buffer immediately. If @can_alloc is true, fill any 589 * holes in the on-disk metadata. 590 */ 591 static int 592 xfs_qm_dqread( 593 struct xfs_mount *mp, 594 xfs_dqid_t id, 595 uint type, 596 bool can_alloc, 597 struct xfs_dquot **dqpp) 598 { 599 struct xfs_dquot *dqp; 600 struct xfs_buf *bp; 601 int error; 602 603 dqp = xfs_dquot_alloc(mp, id, type); 604 trace_xfs_dqread(dqp); 605 606 /* Try to read the buffer, allocating if necessary. */ 607 error = xfs_dquot_disk_read(mp, dqp, &bp); 608 if (error == -ENOENT && can_alloc) 609 error = xfs_qm_dqread_alloc(mp, dqp, &bp); 610 if (error) 611 goto err; 612 613 /* 614 * At this point we should have a clean locked buffer. Copy the data 615 * to the incore dquot and release the buffer since the incore dquot 616 * has its own locking protocol so we needn't tie up the buffer any 617 * further. 618 */ 619 ASSERT(xfs_buf_islocked(bp)); 620 xfs_dquot_from_disk(dqp, bp); 621 622 xfs_buf_relse(bp); 623 *dqpp = dqp; 624 return error; 625 626 err: 627 trace_xfs_dqread_fail(dqp); 628 xfs_qm_dqdestroy(dqp); 629 *dqpp = NULL; 630 return error; 631 } 632 633 /* 634 * Advance to the next id in the current chunk, or if at the 635 * end of the chunk, skip ahead to first id in next allocated chunk 636 * using the SEEK_DATA interface. 637 */ 638 static int 639 xfs_dq_get_next_id( 640 struct xfs_mount *mp, 641 uint type, 642 xfs_dqid_t *id) 643 { 644 struct xfs_inode *quotip = xfs_quota_inode(mp, type); 645 xfs_dqid_t next_id = *id + 1; /* simple advance */ 646 uint lock_flags; 647 struct xfs_bmbt_irec got; 648 struct xfs_iext_cursor cur; 649 xfs_fsblock_t start; 650 int error = 0; 651 652 /* If we'd wrap past the max ID, stop */ 653 if (next_id < *id) 654 return -ENOENT; 655 656 /* If new ID is within the current chunk, advancing it sufficed */ 657 if (next_id % mp->m_quotainfo->qi_dqperchunk) { 658 *id = next_id; 659 return 0; 660 } 661 662 /* Nope, next_id is now past the current chunk, so find the next one */ 663 start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; 664 665 lock_flags = xfs_ilock_data_map_shared(quotip); 666 if (!(quotip->i_df.if_flags & XFS_IFEXTENTS)) { 667 error = xfs_iread_extents(NULL, quotip, XFS_DATA_FORK); 668 if (error) 669 return error; 670 } 671 672 if (xfs_iext_lookup_extent(quotip, "ip->i_df, start, &cur, &got)) { 673 /* contiguous chunk, bump startoff for the id calculation */ 674 if (got.br_startoff < start) 675 got.br_startoff = start; 676 *id = got.br_startoff * mp->m_quotainfo->qi_dqperchunk; 677 } else { 678 error = -ENOENT; 679 } 680 681 xfs_iunlock(quotip, lock_flags); 682 683 return error; 684 } 685 686 /* 687 * Look up the dquot in the in-core cache. If found, the dquot is returned 688 * locked and ready to go. 689 */ 690 static struct xfs_dquot * 691 xfs_qm_dqget_cache_lookup( 692 struct xfs_mount *mp, 693 struct xfs_quotainfo *qi, 694 struct radix_tree_root *tree, 695 xfs_dqid_t id) 696 { 697 struct xfs_dquot *dqp; 698 699 restart: 700 mutex_lock(&qi->qi_tree_lock); 701 dqp = radix_tree_lookup(tree, id); 702 if (!dqp) { 703 mutex_unlock(&qi->qi_tree_lock); 704 XFS_STATS_INC(mp, xs_qm_dqcachemisses); 705 return NULL; 706 } 707 708 xfs_dqlock(dqp); 709 if (dqp->dq_flags & XFS_DQ_FREEING) { 710 xfs_dqunlock(dqp); 711 mutex_unlock(&qi->qi_tree_lock); 712 trace_xfs_dqget_freeing(dqp); 713 delay(1); 714 goto restart; 715 } 716 717 dqp->q_nrefs++; 718 mutex_unlock(&qi->qi_tree_lock); 719 720 trace_xfs_dqget_hit(dqp); 721 XFS_STATS_INC(mp, xs_qm_dqcachehits); 722 return dqp; 723 } 724 725 /* 726 * Try to insert a new dquot into the in-core cache. If an error occurs the 727 * caller should throw away the dquot and start over. Otherwise, the dquot 728 * is returned locked (and held by the cache) as if there had been a cache 729 * hit. 730 */ 731 static int 732 xfs_qm_dqget_cache_insert( 733 struct xfs_mount *mp, 734 struct xfs_quotainfo *qi, 735 struct radix_tree_root *tree, 736 xfs_dqid_t id, 737 struct xfs_dquot *dqp) 738 { 739 int error; 740 741 mutex_lock(&qi->qi_tree_lock); 742 error = radix_tree_insert(tree, id, dqp); 743 if (unlikely(error)) { 744 /* Duplicate found! Caller must try again. */ 745 WARN_ON(error != -EEXIST); 746 mutex_unlock(&qi->qi_tree_lock); 747 trace_xfs_dqget_dup(dqp); 748 return error; 749 } 750 751 /* Return a locked dquot to the caller, with a reference taken. */ 752 xfs_dqlock(dqp); 753 dqp->q_nrefs = 1; 754 755 qi->qi_dquots++; 756 mutex_unlock(&qi->qi_tree_lock); 757 758 return 0; 759 } 760 761 /* Check our input parameters. */ 762 static int 763 xfs_qm_dqget_checks( 764 struct xfs_mount *mp, 765 uint type) 766 { 767 if (WARN_ON_ONCE(!XFS_IS_QUOTA_RUNNING(mp))) 768 return -ESRCH; 769 770 switch (type) { 771 case XFS_DQ_USER: 772 if (!XFS_IS_UQUOTA_ON(mp)) 773 return -ESRCH; 774 return 0; 775 case XFS_DQ_GROUP: 776 if (!XFS_IS_GQUOTA_ON(mp)) 777 return -ESRCH; 778 return 0; 779 case XFS_DQ_PROJ: 780 if (!XFS_IS_PQUOTA_ON(mp)) 781 return -ESRCH; 782 return 0; 783 default: 784 WARN_ON_ONCE(0); 785 return -EINVAL; 786 } 787 } 788 789 /* 790 * Given the file system, id, and type (UDQUOT/GDQUOT), return a a locked 791 * dquot, doing an allocation (if requested) as needed. 792 */ 793 int 794 xfs_qm_dqget( 795 struct xfs_mount *mp, 796 xfs_dqid_t id, 797 uint type, 798 bool can_alloc, 799 struct xfs_dquot **O_dqpp) 800 { 801 struct xfs_quotainfo *qi = mp->m_quotainfo; 802 struct radix_tree_root *tree = xfs_dquot_tree(qi, type); 803 struct xfs_dquot *dqp; 804 int error; 805 806 error = xfs_qm_dqget_checks(mp, type); 807 if (error) 808 return error; 809 810 restart: 811 dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id); 812 if (dqp) { 813 *O_dqpp = dqp; 814 return 0; 815 } 816 817 error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); 818 if (error) 819 return error; 820 821 error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp); 822 if (error) { 823 /* 824 * Duplicate found. Just throw away the new dquot and start 825 * over. 826 */ 827 xfs_qm_dqdestroy(dqp); 828 XFS_STATS_INC(mp, xs_qm_dquot_dups); 829 goto restart; 830 } 831 832 trace_xfs_dqget_miss(dqp); 833 *O_dqpp = dqp; 834 return 0; 835 } 836 837 /* 838 * Given a dquot id and type, read and initialize a dquot from the on-disk 839 * metadata. This function is only for use during quota initialization so 840 * it ignores the dquot cache assuming that the dquot shrinker isn't set up. 841 * The caller is responsible for _qm_dqdestroy'ing the returned dquot. 842 */ 843 int 844 xfs_qm_dqget_uncached( 845 struct xfs_mount *mp, 846 xfs_dqid_t id, 847 uint type, 848 struct xfs_dquot **dqpp) 849 { 850 int error; 851 852 error = xfs_qm_dqget_checks(mp, type); 853 if (error) 854 return error; 855 856 return xfs_qm_dqread(mp, id, type, 0, dqpp); 857 } 858 859 /* Return the quota id for a given inode and type. */ 860 xfs_dqid_t 861 xfs_qm_id_for_quotatype( 862 struct xfs_inode *ip, 863 uint type) 864 { 865 switch (type) { 866 case XFS_DQ_USER: 867 return i_uid_read(VFS_I(ip)); 868 case XFS_DQ_GROUP: 869 return i_gid_read(VFS_I(ip)); 870 case XFS_DQ_PROJ: 871 return ip->i_d.di_projid; 872 } 873 ASSERT(0); 874 return 0; 875 } 876 877 /* 878 * Return the dquot for a given inode and type. If @can_alloc is true, then 879 * allocate blocks if needed. The inode's ILOCK must be held and it must not 880 * have already had an inode attached. 881 */ 882 int 883 xfs_qm_dqget_inode( 884 struct xfs_inode *ip, 885 uint type, 886 bool can_alloc, 887 struct xfs_dquot **O_dqpp) 888 { 889 struct xfs_mount *mp = ip->i_mount; 890 struct xfs_quotainfo *qi = mp->m_quotainfo; 891 struct radix_tree_root *tree = xfs_dquot_tree(qi, type); 892 struct xfs_dquot *dqp; 893 xfs_dqid_t id; 894 int error; 895 896 error = xfs_qm_dqget_checks(mp, type); 897 if (error) 898 return error; 899 900 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 901 ASSERT(xfs_inode_dquot(ip, type) == NULL); 902 903 id = xfs_qm_id_for_quotatype(ip, type); 904 905 restart: 906 dqp = xfs_qm_dqget_cache_lookup(mp, qi, tree, id); 907 if (dqp) { 908 *O_dqpp = dqp; 909 return 0; 910 } 911 912 /* 913 * Dquot cache miss. We don't want to keep the inode lock across 914 * a (potential) disk read. Also we don't want to deal with the lock 915 * ordering between quotainode and this inode. OTOH, dropping the inode 916 * lock here means dealing with a chown that can happen before 917 * we re-acquire the lock. 918 */ 919 xfs_iunlock(ip, XFS_ILOCK_EXCL); 920 error = xfs_qm_dqread(mp, id, type, can_alloc, &dqp); 921 xfs_ilock(ip, XFS_ILOCK_EXCL); 922 if (error) 923 return error; 924 925 /* 926 * A dquot could be attached to this inode by now, since we had 927 * dropped the ilock. 928 */ 929 if (xfs_this_quota_on(mp, type)) { 930 struct xfs_dquot *dqp1; 931 932 dqp1 = xfs_inode_dquot(ip, type); 933 if (dqp1) { 934 xfs_qm_dqdestroy(dqp); 935 dqp = dqp1; 936 xfs_dqlock(dqp); 937 goto dqret; 938 } 939 } else { 940 /* inode stays locked on return */ 941 xfs_qm_dqdestroy(dqp); 942 return -ESRCH; 943 } 944 945 error = xfs_qm_dqget_cache_insert(mp, qi, tree, id, dqp); 946 if (error) { 947 /* 948 * Duplicate found. Just throw away the new dquot and start 949 * over. 950 */ 951 xfs_qm_dqdestroy(dqp); 952 XFS_STATS_INC(mp, xs_qm_dquot_dups); 953 goto restart; 954 } 955 956 dqret: 957 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL)); 958 trace_xfs_dqget_miss(dqp); 959 *O_dqpp = dqp; 960 return 0; 961 } 962 963 /* 964 * Starting at @id and progressing upwards, look for an initialized incore 965 * dquot, lock it, and return it. 966 */ 967 int 968 xfs_qm_dqget_next( 969 struct xfs_mount *mp, 970 xfs_dqid_t id, 971 uint type, 972 struct xfs_dquot **dqpp) 973 { 974 struct xfs_dquot *dqp; 975 int error = 0; 976 977 *dqpp = NULL; 978 for (; !error; error = xfs_dq_get_next_id(mp, type, &id)) { 979 error = xfs_qm_dqget(mp, id, type, false, &dqp); 980 if (error == -ENOENT) 981 continue; 982 else if (error != 0) 983 break; 984 985 if (!XFS_IS_DQUOT_UNINITIALIZED(dqp)) { 986 *dqpp = dqp; 987 return 0; 988 } 989 990 xfs_qm_dqput(dqp); 991 } 992 993 return error; 994 } 995 996 /* 997 * Release a reference to the dquot (decrement ref-count) and unlock it. 998 * 999 * If there is a group quota attached to this dquot, carefully release that 1000 * too without tripping over deadlocks'n'stuff. 1001 */ 1002 void 1003 xfs_qm_dqput( 1004 struct xfs_dquot *dqp) 1005 { 1006 ASSERT(dqp->q_nrefs > 0); 1007 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1008 1009 trace_xfs_dqput(dqp); 1010 1011 if (--dqp->q_nrefs == 0) { 1012 struct xfs_quotainfo *qi = dqp->q_mount->m_quotainfo; 1013 trace_xfs_dqput_free(dqp); 1014 1015 if (list_lru_add(&qi->qi_lru, &dqp->q_lru)) 1016 XFS_STATS_INC(dqp->q_mount, xs_qm_dquot_unused); 1017 } 1018 xfs_dqunlock(dqp); 1019 } 1020 1021 /* 1022 * Release a dquot. Flush it if dirty, then dqput() it. 1023 * dquot must not be locked. 1024 */ 1025 void 1026 xfs_qm_dqrele( 1027 struct xfs_dquot *dqp) 1028 { 1029 if (!dqp) 1030 return; 1031 1032 trace_xfs_dqrele(dqp); 1033 1034 xfs_dqlock(dqp); 1035 /* 1036 * We don't care to flush it if the dquot is dirty here. 1037 * That will create stutters that we want to avoid. 1038 * Instead we do a delayed write when we try to reclaim 1039 * a dirty dquot. Also xfs_sync will take part of the burden... 1040 */ 1041 xfs_qm_dqput(dqp); 1042 } 1043 1044 /* 1045 * This is the dquot flushing I/O completion routine. It is called 1046 * from interrupt level when the buffer containing the dquot is 1047 * flushed to disk. It is responsible for removing the dquot logitem 1048 * from the AIL if it has not been re-logged, and unlocking the dquot's 1049 * flush lock. This behavior is very similar to that of inodes.. 1050 */ 1051 STATIC void 1052 xfs_qm_dqflush_done( 1053 struct xfs_buf *bp, 1054 struct xfs_log_item *lip) 1055 { 1056 struct xfs_dq_logitem *qip = (struct xfs_dq_logitem *)lip; 1057 struct xfs_dquot *dqp = qip->qli_dquot; 1058 struct xfs_ail *ailp = lip->li_ailp; 1059 xfs_lsn_t tail_lsn; 1060 1061 /* 1062 * We only want to pull the item from the AIL if its 1063 * location in the log has not changed since we started the flush. 1064 * Thus, we only bother if the dquot's lsn has 1065 * not changed. First we check the lsn outside the lock 1066 * since it's cheaper, and then we recheck while 1067 * holding the lock before removing the dquot from the AIL. 1068 */ 1069 if (test_bit(XFS_LI_IN_AIL, &lip->li_flags) && 1070 ((lip->li_lsn == qip->qli_flush_lsn) || 1071 test_bit(XFS_LI_FAILED, &lip->li_flags))) { 1072 1073 spin_lock(&ailp->ail_lock); 1074 if (lip->li_lsn == qip->qli_flush_lsn) { 1075 /* xfs_ail_update_finish() drops the AIL lock */ 1076 tail_lsn = xfs_ail_delete_one(ailp, lip); 1077 xfs_ail_update_finish(ailp, tail_lsn); 1078 } else { 1079 /* 1080 * Clear the failed state since we are about to drop the 1081 * flush lock 1082 */ 1083 xfs_clear_li_failed(lip); 1084 spin_unlock(&ailp->ail_lock); 1085 } 1086 } 1087 1088 /* 1089 * Release the dq's flush lock since we're done with it. 1090 */ 1091 xfs_dqfunlock(dqp); 1092 } 1093 1094 /* 1095 * Write a modified dquot to disk. 1096 * The dquot must be locked and the flush lock too taken by caller. 1097 * The flush lock will not be unlocked until the dquot reaches the disk, 1098 * but the dquot is free to be unlocked and modified by the caller 1099 * in the interim. Dquot is still locked on return. This behavior is 1100 * identical to that of inodes. 1101 */ 1102 int 1103 xfs_qm_dqflush( 1104 struct xfs_dquot *dqp, 1105 struct xfs_buf **bpp) 1106 { 1107 struct xfs_mount *mp = dqp->q_mount; 1108 struct xfs_log_item *lip = &dqp->q_logitem.qli_item; 1109 struct xfs_buf *bp; 1110 struct xfs_dqblk *dqb; 1111 struct xfs_disk_dquot *ddqp; 1112 xfs_failaddr_t fa; 1113 int error; 1114 1115 ASSERT(XFS_DQ_IS_LOCKED(dqp)); 1116 ASSERT(!completion_done(&dqp->q_flush)); 1117 1118 trace_xfs_dqflush(dqp); 1119 1120 *bpp = NULL; 1121 1122 xfs_qm_dqunpin_wait(dqp); 1123 1124 /* 1125 * Get the buffer containing the on-disk dquot 1126 */ 1127 error = xfs_trans_read_buf(mp, NULL, mp->m_ddev_targp, dqp->q_blkno, 1128 mp->m_quotainfo->qi_dqchunklen, XBF_TRYLOCK, 1129 &bp, &xfs_dquot_buf_ops); 1130 if (error == -EAGAIN) 1131 goto out_unlock; 1132 if (error) 1133 goto out_abort; 1134 1135 /* 1136 * Calculate the location of the dquot inside the buffer. 1137 */ 1138 dqb = bp->b_addr + dqp->q_bufoffset; 1139 ddqp = &dqb->dd_diskdq; 1140 1141 /* sanity check the in-core structure before we flush */ 1142 fa = xfs_dquot_verify(mp, &dqp->q_core, be32_to_cpu(dqp->q_core.d_id), 1143 0); 1144 if (fa) { 1145 xfs_alert(mp, "corrupt dquot ID 0x%x in memory at %pS", 1146 be32_to_cpu(dqp->q_core.d_id), fa); 1147 xfs_buf_relse(bp); 1148 error = -EFSCORRUPTED; 1149 goto out_abort; 1150 } 1151 1152 /* This is the only portion of data that needs to persist */ 1153 memcpy(ddqp, &dqp->q_core, sizeof(struct xfs_disk_dquot)); 1154 1155 /* 1156 * Clear the dirty field and remember the flush lsn for later use. 1157 */ 1158 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1159 1160 xfs_trans_ail_copy_lsn(mp->m_ail, &dqp->q_logitem.qli_flush_lsn, 1161 &dqp->q_logitem.qli_item.li_lsn); 1162 1163 /* 1164 * copy the lsn into the on-disk dquot now while we have the in memory 1165 * dquot here. This can't be done later in the write verifier as we 1166 * can't get access to the log item at that point in time. 1167 * 1168 * We also calculate the CRC here so that the on-disk dquot in the 1169 * buffer always has a valid CRC. This ensures there is no possibility 1170 * of a dquot without an up-to-date CRC getting to disk. 1171 */ 1172 if (xfs_sb_version_hascrc(&mp->m_sb)) { 1173 dqb->dd_lsn = cpu_to_be64(dqp->q_logitem.qli_item.li_lsn); 1174 xfs_update_cksum((char *)dqb, sizeof(struct xfs_dqblk), 1175 XFS_DQUOT_CRC_OFF); 1176 } 1177 1178 /* 1179 * Attach an iodone routine so that we can remove this dquot from the 1180 * AIL and release the flush lock once the dquot is synced to disk. 1181 */ 1182 xfs_buf_attach_iodone(bp, xfs_qm_dqflush_done, 1183 &dqp->q_logitem.qli_item); 1184 1185 /* 1186 * If the buffer is pinned then push on the log so we won't 1187 * get stuck waiting in the write for too long. 1188 */ 1189 if (xfs_buf_ispinned(bp)) { 1190 trace_xfs_dqflush_force(dqp); 1191 xfs_log_force(mp, 0); 1192 } 1193 1194 trace_xfs_dqflush_done(dqp); 1195 *bpp = bp; 1196 return 0; 1197 1198 out_abort: 1199 dqp->dq_flags &= ~XFS_DQ_DIRTY; 1200 xfs_trans_ail_delete(lip, 0); 1201 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1202 out_unlock: 1203 xfs_dqfunlock(dqp); 1204 return error; 1205 } 1206 1207 /* 1208 * Lock two xfs_dquot structures. 1209 * 1210 * To avoid deadlocks we always lock the quota structure with 1211 * the lowerd id first. 1212 */ 1213 void 1214 xfs_dqlock2( 1215 struct xfs_dquot *d1, 1216 struct xfs_dquot *d2) 1217 { 1218 if (d1 && d2) { 1219 ASSERT(d1 != d2); 1220 if (be32_to_cpu(d1->q_core.d_id) > 1221 be32_to_cpu(d2->q_core.d_id)) { 1222 mutex_lock(&d2->q_qlock); 1223 mutex_lock_nested(&d1->q_qlock, XFS_QLOCK_NESTED); 1224 } else { 1225 mutex_lock(&d1->q_qlock); 1226 mutex_lock_nested(&d2->q_qlock, XFS_QLOCK_NESTED); 1227 } 1228 } else if (d1) { 1229 mutex_lock(&d1->q_qlock); 1230 } else if (d2) { 1231 mutex_lock(&d2->q_qlock); 1232 } 1233 } 1234 1235 int __init 1236 xfs_qm_init(void) 1237 { 1238 xfs_qm_dqzone = kmem_cache_create("xfs_dquot", 1239 sizeof(struct xfs_dquot), 1240 0, 0, NULL); 1241 if (!xfs_qm_dqzone) 1242 goto out; 1243 1244 xfs_qm_dqtrxzone = kmem_cache_create("xfs_dqtrx", 1245 sizeof(struct xfs_dquot_acct), 1246 0, 0, NULL); 1247 if (!xfs_qm_dqtrxzone) 1248 goto out_free_dqzone; 1249 1250 return 0; 1251 1252 out_free_dqzone: 1253 kmem_cache_destroy(xfs_qm_dqzone); 1254 out: 1255 return -ENOMEM; 1256 } 1257 1258 void 1259 xfs_qm_exit(void) 1260 { 1261 kmem_cache_destroy(xfs_qm_dqtrxzone); 1262 kmem_cache_destroy(xfs_qm_dqzone); 1263 } 1264 1265 /* 1266 * Iterate every dquot of a particular type. The caller must ensure that the 1267 * particular quota type is active. iter_fn can return negative error codes, 1268 * or -ECANCELED to indicate that it wants to stop iterating. 1269 */ 1270 int 1271 xfs_qm_dqiterate( 1272 struct xfs_mount *mp, 1273 uint dqtype, 1274 xfs_qm_dqiterate_fn iter_fn, 1275 void *priv) 1276 { 1277 struct xfs_dquot *dq; 1278 xfs_dqid_t id = 0; 1279 int error; 1280 1281 do { 1282 error = xfs_qm_dqget_next(mp, id, dqtype, &dq); 1283 if (error == -ENOENT) 1284 return 0; 1285 if (error) 1286 return error; 1287 1288 error = iter_fn(dq, dqtype, priv); 1289 id = be32_to_cpu(dq->q_core.d_id); 1290 xfs_qm_dqput(dq); 1291 id++; 1292 } while (error == 0 && id != 0); 1293 1294 return error; 1295 } 1296