1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 4 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 5 */ 6 7 #include <linux/sched.h> 8 #include <linux/slab.h> 9 #include <linux/spinlock.h> 10 #include <linux/completion.h> 11 #include <linux/buffer_head.h> 12 #include <linux/gfs2_ondisk.h> 13 #include <linux/crc32.h> 14 #include <linux/crc32c.h> 15 #include <linux/delay.h> 16 #include <linux/kthread.h> 17 #include <linux/freezer.h> 18 #include <linux/bio.h> 19 #include <linux/blkdev.h> 20 #include <linux/writeback.h> 21 #include <linux/list_sort.h> 22 23 #include "gfs2.h" 24 #include "incore.h" 25 #include "bmap.h" 26 #include "glock.h" 27 #include "log.h" 28 #include "lops.h" 29 #include "meta_io.h" 30 #include "util.h" 31 #include "dir.h" 32 #include "trace_gfs2.h" 33 34 static void gfs2_log_shutdown(struct gfs2_sbd *sdp); 35 36 /** 37 * gfs2_struct2blk - compute stuff 38 * @sdp: the filesystem 39 * @nstruct: the number of structures 40 * 41 * Compute the number of log descriptor blocks needed to hold a certain number 42 * of structures of a certain size. 43 * 44 * Returns: the number of blocks needed (minimum is always 1) 45 */ 46 47 unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct) 48 { 49 unsigned int blks; 50 unsigned int first, second; 51 52 blks = 1; 53 first = sdp->sd_ldptrs; 54 55 if (nstruct > first) { 56 second = sdp->sd_inptrs; 57 blks += DIV_ROUND_UP(nstruct - first, second); 58 } 59 60 return blks; 61 } 62 63 /** 64 * gfs2_remove_from_ail - Remove an entry from the ail lists, updating counters 65 * @mapping: The associated mapping (maybe NULL) 66 * @bd: The gfs2_bufdata to remove 67 * 68 * The ail lock _must_ be held when calling this function 69 * 70 */ 71 72 static void gfs2_remove_from_ail(struct gfs2_bufdata *bd) 73 { 74 bd->bd_tr = NULL; 75 list_del_init(&bd->bd_ail_st_list); 76 list_del_init(&bd->bd_ail_gl_list); 77 atomic_dec(&bd->bd_gl->gl_ail_count); 78 brelse(bd->bd_bh); 79 } 80 81 /** 82 * gfs2_ail1_start_one - Start I/O on a part of the AIL 83 * @sdp: the filesystem 84 * @wbc: The writeback control structure 85 * @ai: The ail structure 86 * 87 */ 88 89 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp, 90 struct writeback_control *wbc, 91 struct gfs2_trans *tr) 92 __releases(&sdp->sd_ail_lock) 93 __acquires(&sdp->sd_ail_lock) 94 { 95 struct gfs2_glock *gl = NULL; 96 struct address_space *mapping; 97 struct gfs2_bufdata *bd, *s; 98 struct buffer_head *bh; 99 int ret = 0; 100 101 list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, bd_ail_st_list) { 102 bh = bd->bd_bh; 103 104 gfs2_assert(sdp, bd->bd_tr == tr); 105 106 if (!buffer_busy(bh)) { 107 if (buffer_uptodate(bh)) { 108 list_move(&bd->bd_ail_st_list, 109 &tr->tr_ail2_list); 110 continue; 111 } 112 if (!cmpxchg(&sdp->sd_log_error, 0, -EIO)) { 113 gfs2_io_error_bh(sdp, bh); 114 gfs2_withdraw_delayed(sdp); 115 } 116 } 117 118 if (gfs2_withdrawn(sdp)) { 119 gfs2_remove_from_ail(bd); 120 continue; 121 } 122 if (!buffer_dirty(bh)) 123 continue; 124 if (gl == bd->bd_gl) 125 continue; 126 gl = bd->bd_gl; 127 list_move(&bd->bd_ail_st_list, &tr->tr_ail1_list); 128 mapping = bh->b_page->mapping; 129 if (!mapping) 130 continue; 131 spin_unlock(&sdp->sd_ail_lock); 132 ret = generic_writepages(mapping, wbc); 133 spin_lock(&sdp->sd_ail_lock); 134 if (ret || wbc->nr_to_write <= 0) 135 break; 136 return -EBUSY; 137 } 138 139 return ret; 140 } 141 142 143 /** 144 * gfs2_ail1_flush - start writeback of some ail1 entries 145 * @sdp: The super block 146 * @wbc: The writeback control structure 147 * 148 * Writes back some ail1 entries, according to the limits in the 149 * writeback control structure 150 */ 151 152 void gfs2_ail1_flush(struct gfs2_sbd *sdp, struct writeback_control *wbc) 153 { 154 struct list_head *head = &sdp->sd_ail1_list; 155 struct gfs2_trans *tr; 156 struct blk_plug plug; 157 int ret = 0; 158 159 trace_gfs2_ail_flush(sdp, wbc, 1); 160 blk_start_plug(&plug); 161 spin_lock(&sdp->sd_ail_lock); 162 restart: 163 list_for_each_entry_reverse(tr, head, tr_list) { 164 if (wbc->nr_to_write <= 0) 165 break; 166 ret = gfs2_ail1_start_one(sdp, wbc, tr); 167 if (ret) { 168 if (ret == -EBUSY) 169 goto restart; 170 break; 171 } 172 } 173 spin_unlock(&sdp->sd_ail_lock); 174 blk_finish_plug(&plug); 175 if (ret) 176 gfs2_withdraw(sdp); 177 trace_gfs2_ail_flush(sdp, wbc, 0); 178 } 179 180 /** 181 * gfs2_ail1_start - start writeback of all ail1 entries 182 * @sdp: The superblock 183 */ 184 185 static void gfs2_ail1_start(struct gfs2_sbd *sdp) 186 { 187 struct writeback_control wbc = { 188 .sync_mode = WB_SYNC_NONE, 189 .nr_to_write = LONG_MAX, 190 .range_start = 0, 191 .range_end = LLONG_MAX, 192 }; 193 194 return gfs2_ail1_flush(sdp, &wbc); 195 } 196 197 /** 198 * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced 199 * @sdp: the filesystem 200 * @tr: the transaction 201 * @max_revokes: If nonzero, issue revokes for the bd items for written buffers 202 * 203 */ 204 205 static void gfs2_ail1_empty_one(struct gfs2_sbd *sdp, struct gfs2_trans *tr, 206 int *max_revokes) 207 { 208 struct gfs2_bufdata *bd, *s; 209 struct buffer_head *bh; 210 211 list_for_each_entry_safe_reverse(bd, s, &tr->tr_ail1_list, 212 bd_ail_st_list) { 213 bh = bd->bd_bh; 214 gfs2_assert(sdp, bd->bd_tr == tr); 215 /* 216 * If another process flagged an io error, e.g. writing to the 217 * journal, error all other bhs and move them off the ail1 to 218 * prevent a tight loop when unmount tries to flush ail1, 219 * regardless of whether they're still busy. If no outside 220 * errors were found and the buffer is busy, move to the next. 221 * If the ail buffer is not busy and caught an error, flag it 222 * for others. 223 */ 224 if (!sdp->sd_log_error && buffer_busy(bh)) 225 continue; 226 if (!buffer_uptodate(bh) && 227 !cmpxchg(&sdp->sd_log_error, 0, -EIO)) { 228 gfs2_io_error_bh(sdp, bh); 229 gfs2_withdraw_delayed(sdp); 230 } 231 /* 232 * If we have space for revokes and the bd is no longer on any 233 * buf list, we can just add a revoke for it immediately and 234 * avoid having to put it on the ail2 list, where it would need 235 * to be revoked later. 236 */ 237 if (*max_revokes && list_empty(&bd->bd_list)) { 238 gfs2_add_revoke(sdp, bd); 239 (*max_revokes)--; 240 continue; 241 } 242 list_move(&bd->bd_ail_st_list, &tr->tr_ail2_list); 243 } 244 } 245 246 /** 247 * gfs2_ail1_empty - Try to empty the ail1 lists 248 * @sdp: The superblock 249 * @max_revokes: If non-zero, add revokes where appropriate 250 * 251 * Tries to empty the ail1 lists, starting with the oldest first 252 */ 253 254 static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes) 255 { 256 struct gfs2_trans *tr, *s; 257 int oldest_tr = 1; 258 int ret; 259 260 spin_lock(&sdp->sd_ail_lock); 261 list_for_each_entry_safe_reverse(tr, s, &sdp->sd_ail1_list, tr_list) { 262 gfs2_ail1_empty_one(sdp, tr, &max_revokes); 263 if (list_empty(&tr->tr_ail1_list) && oldest_tr) 264 list_move(&tr->tr_list, &sdp->sd_ail2_list); 265 else 266 oldest_tr = 0; 267 } 268 ret = list_empty(&sdp->sd_ail1_list); 269 spin_unlock(&sdp->sd_ail_lock); 270 271 if (test_bit(SDF_WITHDRAWING, &sdp->sd_flags)) { 272 gfs2_lm(sdp, "fatal: I/O error(s)\n"); 273 gfs2_withdraw(sdp); 274 } 275 276 return ret; 277 } 278 279 static void gfs2_ail1_wait(struct gfs2_sbd *sdp) 280 { 281 struct gfs2_trans *tr; 282 struct gfs2_bufdata *bd; 283 struct buffer_head *bh; 284 285 spin_lock(&sdp->sd_ail_lock); 286 list_for_each_entry_reverse(tr, &sdp->sd_ail1_list, tr_list) { 287 list_for_each_entry(bd, &tr->tr_ail1_list, bd_ail_st_list) { 288 bh = bd->bd_bh; 289 if (!buffer_locked(bh)) 290 continue; 291 get_bh(bh); 292 spin_unlock(&sdp->sd_ail_lock); 293 wait_on_buffer(bh); 294 brelse(bh); 295 return; 296 } 297 } 298 spin_unlock(&sdp->sd_ail_lock); 299 } 300 301 /** 302 * gfs2_ail_empty_tr - empty one of the ail lists for a transaction 303 */ 304 305 static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr, 306 struct list_head *head) 307 { 308 struct gfs2_bufdata *bd; 309 310 while (!list_empty(head)) { 311 bd = list_first_entry(head, struct gfs2_bufdata, 312 bd_ail_st_list); 313 gfs2_assert(sdp, bd->bd_tr == tr); 314 gfs2_remove_from_ail(bd); 315 } 316 } 317 318 static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail) 319 { 320 struct gfs2_trans *tr, *safe; 321 unsigned int old_tail = sdp->sd_log_tail; 322 int wrap = (new_tail < old_tail); 323 int a, b, rm; 324 325 spin_lock(&sdp->sd_ail_lock); 326 327 list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) { 328 a = (old_tail <= tr->tr_first); 329 b = (tr->tr_first < new_tail); 330 rm = (wrap) ? (a || b) : (a && b); 331 if (!rm) 332 continue; 333 334 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); 335 list_del(&tr->tr_list); 336 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list)); 337 gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list)); 338 kfree(tr); 339 } 340 341 spin_unlock(&sdp->sd_ail_lock); 342 } 343 344 /** 345 * gfs2_log_release - Release a given number of log blocks 346 * @sdp: The GFS2 superblock 347 * @blks: The number of blocks 348 * 349 */ 350 351 void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks) 352 { 353 354 atomic_add(blks, &sdp->sd_log_blks_free); 355 trace_gfs2_log_blocks(sdp, blks); 356 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= 357 sdp->sd_jdesc->jd_blocks); 358 up_read(&sdp->sd_log_flush_lock); 359 } 360 361 /** 362 * gfs2_log_reserve - Make a log reservation 363 * @sdp: The GFS2 superblock 364 * @blks: The number of blocks to reserve 365 * 366 * Note that we never give out the last few blocks of the journal. Thats 367 * due to the fact that there is a small number of header blocks 368 * associated with each log flush. The exact number can't be known until 369 * flush time, so we ensure that we have just enough free blocks at all 370 * times to avoid running out during a log flush. 371 * 372 * We no longer flush the log here, instead we wake up logd to do that 373 * for us. To avoid the thundering herd and to ensure that we deal fairly 374 * with queued waiters, we use an exclusive wait. This means that when we 375 * get woken with enough journal space to get our reservation, we need to 376 * wake the next waiter on the list. 377 * 378 * Returns: errno 379 */ 380 381 int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks) 382 { 383 int ret = 0; 384 unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize); 385 unsigned wanted = blks + reserved_blks; 386 DEFINE_WAIT(wait); 387 int did_wait = 0; 388 unsigned int free_blocks; 389 390 if (gfs2_assert_warn(sdp, blks) || 391 gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks)) 392 return -EINVAL; 393 atomic_add(blks, &sdp->sd_log_blks_needed); 394 retry: 395 free_blocks = atomic_read(&sdp->sd_log_blks_free); 396 if (unlikely(free_blocks <= wanted)) { 397 do { 398 prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait, 399 TASK_UNINTERRUPTIBLE); 400 wake_up(&sdp->sd_logd_waitq); 401 did_wait = 1; 402 if (atomic_read(&sdp->sd_log_blks_free) <= wanted) 403 io_schedule(); 404 free_blocks = atomic_read(&sdp->sd_log_blks_free); 405 } while(free_blocks <= wanted); 406 finish_wait(&sdp->sd_log_waitq, &wait); 407 } 408 atomic_inc(&sdp->sd_reserving_log); 409 if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks, 410 free_blocks - blks) != free_blocks) { 411 if (atomic_dec_and_test(&sdp->sd_reserving_log)) 412 wake_up(&sdp->sd_reserving_log_wait); 413 goto retry; 414 } 415 atomic_sub(blks, &sdp->sd_log_blks_needed); 416 trace_gfs2_log_blocks(sdp, -blks); 417 418 /* 419 * If we waited, then so might others, wake them up _after_ we get 420 * our share of the log. 421 */ 422 if (unlikely(did_wait)) 423 wake_up(&sdp->sd_log_waitq); 424 425 down_read(&sdp->sd_log_flush_lock); 426 if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) { 427 gfs2_log_release(sdp, blks); 428 ret = -EROFS; 429 } 430 if (atomic_dec_and_test(&sdp->sd_reserving_log)) 431 wake_up(&sdp->sd_reserving_log_wait); 432 return ret; 433 } 434 435 /** 436 * log_distance - Compute distance between two journal blocks 437 * @sdp: The GFS2 superblock 438 * @newer: The most recent journal block of the pair 439 * @older: The older journal block of the pair 440 * 441 * Compute the distance (in the journal direction) between two 442 * blocks in the journal 443 * 444 * Returns: the distance in blocks 445 */ 446 447 static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer, 448 unsigned int older) 449 { 450 int dist; 451 452 dist = newer - older; 453 if (dist < 0) 454 dist += sdp->sd_jdesc->jd_blocks; 455 456 return dist; 457 } 458 459 /** 460 * calc_reserved - Calculate the number of blocks to reserve when 461 * refunding a transaction's unused buffers. 462 * @sdp: The GFS2 superblock 463 * 464 * This is complex. We need to reserve room for all our currently used 465 * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 466 * all our journaled data buffers for journaled files (e.g. files in the 467 * meta_fs like rindex, or files for which chattr +j was done.) 468 * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush 469 * will count it as free space (sd_log_blks_free) and corruption will follow. 470 * 471 * We can have metadata bufs and jdata bufs in the same journal. So each 472 * type gets its own log header, for which we need to reserve a block. 473 * In fact, each type has the potential for needing more than one header 474 * in cases where we have more buffers than will fit on a journal page. 475 * Metadata journal entries take up half the space of journaled buffer entries. 476 * Thus, metadata entries have buf_limit (502) and journaled buffers have 477 * databuf_limit (251) before they cause a wrap around. 478 * 479 * Also, we need to reserve blocks for revoke journal entries and one for an 480 * overall header for the lot. 481 * 482 * Returns: the number of blocks reserved 483 */ 484 static unsigned int calc_reserved(struct gfs2_sbd *sdp) 485 { 486 unsigned int reserved = 0; 487 unsigned int mbuf; 488 unsigned int dbuf; 489 struct gfs2_trans *tr = sdp->sd_log_tr; 490 491 if (tr) { 492 mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm; 493 dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm; 494 reserved = mbuf + dbuf; 495 /* Account for header blocks */ 496 reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp)); 497 reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp)); 498 } 499 500 if (sdp->sd_log_committed_revoke > 0) 501 reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke); 502 /* One for the overall header */ 503 if (reserved) 504 reserved++; 505 return reserved; 506 } 507 508 static unsigned int current_tail(struct gfs2_sbd *sdp) 509 { 510 struct gfs2_trans *tr; 511 unsigned int tail; 512 513 spin_lock(&sdp->sd_ail_lock); 514 515 if (list_empty(&sdp->sd_ail1_list)) { 516 tail = sdp->sd_log_head; 517 } else { 518 tr = list_entry(sdp->sd_ail1_list.prev, struct gfs2_trans, 519 tr_list); 520 tail = tr->tr_first; 521 } 522 523 spin_unlock(&sdp->sd_ail_lock); 524 525 return tail; 526 } 527 528 static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail) 529 { 530 unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail); 531 532 ail2_empty(sdp, new_tail); 533 534 atomic_add(dist, &sdp->sd_log_blks_free); 535 trace_gfs2_log_blocks(sdp, dist); 536 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= 537 sdp->sd_jdesc->jd_blocks); 538 539 sdp->sd_log_tail = new_tail; 540 } 541 542 543 void log_flush_wait(struct gfs2_sbd *sdp) 544 { 545 DEFINE_WAIT(wait); 546 547 if (atomic_read(&sdp->sd_log_in_flight)) { 548 do { 549 prepare_to_wait(&sdp->sd_log_flush_wait, &wait, 550 TASK_UNINTERRUPTIBLE); 551 if (atomic_read(&sdp->sd_log_in_flight)) 552 io_schedule(); 553 } while(atomic_read(&sdp->sd_log_in_flight)); 554 finish_wait(&sdp->sd_log_flush_wait, &wait); 555 } 556 } 557 558 static int ip_cmp(void *priv, struct list_head *a, struct list_head *b) 559 { 560 struct gfs2_inode *ipa, *ipb; 561 562 ipa = list_entry(a, struct gfs2_inode, i_ordered); 563 ipb = list_entry(b, struct gfs2_inode, i_ordered); 564 565 if (ipa->i_no_addr < ipb->i_no_addr) 566 return -1; 567 if (ipa->i_no_addr > ipb->i_no_addr) 568 return 1; 569 return 0; 570 } 571 572 static void gfs2_ordered_write(struct gfs2_sbd *sdp) 573 { 574 struct gfs2_inode *ip; 575 LIST_HEAD(written); 576 577 spin_lock(&sdp->sd_ordered_lock); 578 list_sort(NULL, &sdp->sd_log_ordered, &ip_cmp); 579 while (!list_empty(&sdp->sd_log_ordered)) { 580 ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered); 581 if (ip->i_inode.i_mapping->nrpages == 0) { 582 test_and_clear_bit(GIF_ORDERED, &ip->i_flags); 583 list_del(&ip->i_ordered); 584 continue; 585 } 586 list_move(&ip->i_ordered, &written); 587 spin_unlock(&sdp->sd_ordered_lock); 588 filemap_fdatawrite(ip->i_inode.i_mapping); 589 spin_lock(&sdp->sd_ordered_lock); 590 } 591 list_splice(&written, &sdp->sd_log_ordered); 592 spin_unlock(&sdp->sd_ordered_lock); 593 } 594 595 static void gfs2_ordered_wait(struct gfs2_sbd *sdp) 596 { 597 struct gfs2_inode *ip; 598 599 spin_lock(&sdp->sd_ordered_lock); 600 while (!list_empty(&sdp->sd_log_ordered)) { 601 ip = list_entry(sdp->sd_log_ordered.next, struct gfs2_inode, i_ordered); 602 list_del(&ip->i_ordered); 603 WARN_ON(!test_and_clear_bit(GIF_ORDERED, &ip->i_flags)); 604 if (ip->i_inode.i_mapping->nrpages == 0) 605 continue; 606 spin_unlock(&sdp->sd_ordered_lock); 607 filemap_fdatawait(ip->i_inode.i_mapping); 608 spin_lock(&sdp->sd_ordered_lock); 609 } 610 spin_unlock(&sdp->sd_ordered_lock); 611 } 612 613 void gfs2_ordered_del_inode(struct gfs2_inode *ip) 614 { 615 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 616 617 spin_lock(&sdp->sd_ordered_lock); 618 if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) 619 list_del(&ip->i_ordered); 620 spin_unlock(&sdp->sd_ordered_lock); 621 } 622 623 void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) 624 { 625 struct buffer_head *bh = bd->bd_bh; 626 struct gfs2_glock *gl = bd->bd_gl; 627 628 bh->b_private = NULL; 629 bd->bd_blkno = bh->b_blocknr; 630 gfs2_remove_from_ail(bd); /* drops ref on bh */ 631 bd->bd_bh = NULL; 632 sdp->sd_log_num_revoke++; 633 if (atomic_inc_return(&gl->gl_revokes) == 1) 634 gfs2_glock_hold(gl); 635 set_bit(GLF_LFLUSH, &gl->gl_flags); 636 list_add(&bd->bd_list, &sdp->sd_log_revokes); 637 } 638 639 void gfs2_glock_remove_revoke(struct gfs2_glock *gl) 640 { 641 if (atomic_dec_return(&gl->gl_revokes) == 0) { 642 clear_bit(GLF_LFLUSH, &gl->gl_flags); 643 gfs2_glock_queue_put(gl); 644 } 645 } 646 647 /** 648 * gfs2_write_revokes - Add as many revokes to the system transaction as we can 649 * @sdp: The GFS2 superblock 650 * 651 * Our usual strategy is to defer writing revokes as much as we can in the hope 652 * that we'll eventually overwrite the journal, which will make those revokes 653 * go away. This changes when we flush the log: at that point, there will 654 * likely be some left-over space in the last revoke block of that transaction. 655 * We can fill that space with additional revokes for blocks that have already 656 * been written back. This will basically come at no cost now, and will save 657 * us from having to keep track of those blocks on the AIL2 list later. 658 */ 659 void gfs2_write_revokes(struct gfs2_sbd *sdp) 660 { 661 /* number of revokes we still have room for */ 662 int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64); 663 664 gfs2_log_lock(sdp); 665 while (sdp->sd_log_num_revoke > max_revokes) 666 max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64); 667 max_revokes -= sdp->sd_log_num_revoke; 668 if (!sdp->sd_log_num_revoke) { 669 atomic_dec(&sdp->sd_log_blks_free); 670 /* If no blocks have been reserved, we need to also 671 * reserve a block for the header */ 672 if (!sdp->sd_log_blks_reserved) 673 atomic_dec(&sdp->sd_log_blks_free); 674 } 675 gfs2_ail1_empty(sdp, max_revokes); 676 gfs2_log_unlock(sdp); 677 678 if (!sdp->sd_log_num_revoke) { 679 atomic_inc(&sdp->sd_log_blks_free); 680 if (!sdp->sd_log_blks_reserved) 681 atomic_inc(&sdp->sd_log_blks_free); 682 } 683 } 684 685 /** 686 * gfs2_write_log_header - Write a journal log header buffer at lblock 687 * @sdp: The GFS2 superblock 688 * @jd: journal descriptor of the journal to which we are writing 689 * @seq: sequence number 690 * @tail: tail of the log 691 * @lblock: value for lh_blkno (block number relative to start of journal) 692 * @flags: log header flags GFS2_LOG_HEAD_* 693 * @op_flags: flags to pass to the bio 694 * 695 * Returns: the initialized log buffer descriptor 696 */ 697 698 void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, 699 u64 seq, u32 tail, u32 lblock, u32 flags, 700 int op_flags) 701 { 702 struct gfs2_log_header *lh; 703 u32 hash, crc; 704 struct page *page; 705 struct gfs2_statfs_change_host *l_sc = &sdp->sd_statfs_local; 706 struct timespec64 tv; 707 struct super_block *sb = sdp->sd_vfs; 708 u64 dblock; 709 710 if (gfs2_withdrawn(sdp)) 711 goto out; 712 713 page = mempool_alloc(gfs2_page_pool, GFP_NOIO); 714 lh = page_address(page); 715 clear_page(lh); 716 717 lh->lh_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 718 lh->lh_header.mh_type = cpu_to_be32(GFS2_METATYPE_LH); 719 lh->lh_header.__pad0 = cpu_to_be64(0); 720 lh->lh_header.mh_format = cpu_to_be32(GFS2_FORMAT_LH); 721 lh->lh_header.mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); 722 lh->lh_sequence = cpu_to_be64(seq); 723 lh->lh_flags = cpu_to_be32(flags); 724 lh->lh_tail = cpu_to_be32(tail); 725 lh->lh_blkno = cpu_to_be32(lblock); 726 hash = ~crc32(~0, lh, LH_V1_SIZE); 727 lh->lh_hash = cpu_to_be32(hash); 728 729 ktime_get_coarse_real_ts64(&tv); 730 lh->lh_nsec = cpu_to_be32(tv.tv_nsec); 731 lh->lh_sec = cpu_to_be64(tv.tv_sec); 732 if (!list_empty(&jd->extent_list)) 733 dblock = gfs2_log_bmap(jd, lblock); 734 else { 735 int ret = gfs2_lblk_to_dblk(jd->jd_inode, lblock, &dblock); 736 if (gfs2_assert_withdraw(sdp, ret == 0)) 737 return; 738 } 739 lh->lh_addr = cpu_to_be64(dblock); 740 lh->lh_jinode = cpu_to_be64(GFS2_I(jd->jd_inode)->i_no_addr); 741 742 /* We may only write local statfs, quota, etc., when writing to our 743 own journal. The values are left 0 when recovering a journal 744 different from our own. */ 745 if (!(flags & GFS2_LOG_HEAD_RECOVERY)) { 746 lh->lh_statfs_addr = 747 cpu_to_be64(GFS2_I(sdp->sd_sc_inode)->i_no_addr); 748 lh->lh_quota_addr = 749 cpu_to_be64(GFS2_I(sdp->sd_qc_inode)->i_no_addr); 750 751 spin_lock(&sdp->sd_statfs_spin); 752 lh->lh_local_total = cpu_to_be64(l_sc->sc_total); 753 lh->lh_local_free = cpu_to_be64(l_sc->sc_free); 754 lh->lh_local_dinodes = cpu_to_be64(l_sc->sc_dinodes); 755 spin_unlock(&sdp->sd_statfs_spin); 756 } 757 758 BUILD_BUG_ON(offsetof(struct gfs2_log_header, lh_crc) != LH_V1_SIZE); 759 760 crc = crc32c(~0, (void *)lh + LH_V1_SIZE + 4, 761 sb->s_blocksize - LH_V1_SIZE - 4); 762 lh->lh_crc = cpu_to_be32(crc); 763 764 gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock); 765 gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags); 766 out: 767 log_flush_wait(sdp); 768 } 769 770 /** 771 * log_write_header - Get and initialize a journal header buffer 772 * @sdp: The GFS2 superblock 773 * @flags: The log header flags, including log header origin 774 * 775 * Returns: the initialized log buffer descriptor 776 */ 777 778 static void log_write_header(struct gfs2_sbd *sdp, u32 flags) 779 { 780 unsigned int tail; 781 int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC; 782 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); 783 784 gfs2_assert_withdraw(sdp, (state != SFS_FROZEN)); 785 tail = current_tail(sdp); 786 787 if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) { 788 gfs2_ordered_wait(sdp); 789 log_flush_wait(sdp); 790 op_flags = REQ_SYNC | REQ_META | REQ_PRIO; 791 } 792 sdp->sd_log_idle = (tail == sdp->sd_log_flush_head); 793 gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail, 794 sdp->sd_log_flush_head, flags, op_flags); 795 gfs2_log_incr_head(sdp); 796 797 if (sdp->sd_log_tail != tail) 798 log_pull_tail(sdp, tail); 799 } 800 801 /** 802 * ail_drain - drain the ail lists after a withdraw 803 * @sdp: Pointer to GFS2 superblock 804 */ 805 static void ail_drain(struct gfs2_sbd *sdp) 806 { 807 struct gfs2_trans *tr; 808 809 spin_lock(&sdp->sd_ail_lock); 810 /* 811 * For transactions on the sd_ail1_list we need to drain both the 812 * ail1 and ail2 lists. That's because function gfs2_ail1_start_one 813 * (temporarily) moves items from its tr_ail1 list to tr_ail2 list 814 * before revokes are sent for that block. Items on the sd_ail2_list 815 * should have already gotten beyond that point, so no need. 816 */ 817 while (!list_empty(&sdp->sd_ail1_list)) { 818 tr = list_first_entry(&sdp->sd_ail1_list, struct gfs2_trans, 819 tr_list); 820 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail1_list); 821 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); 822 list_del(&tr->tr_list); 823 kfree(tr); 824 } 825 while (!list_empty(&sdp->sd_ail2_list)) { 826 tr = list_first_entry(&sdp->sd_ail2_list, struct gfs2_trans, 827 tr_list); 828 gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list); 829 list_del(&tr->tr_list); 830 kfree(tr); 831 } 832 spin_unlock(&sdp->sd_ail_lock); 833 } 834 835 /** 836 * gfs2_log_flush - flush incore transaction(s) 837 * @sdp: the filesystem 838 * @gl: The glock structure to flush. If NULL, flush the whole incore log 839 * @flags: The log header flags: GFS2_LOG_HEAD_FLUSH_* and debug flags 840 * 841 */ 842 843 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags) 844 { 845 struct gfs2_trans *tr = NULL; 846 enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state); 847 848 down_write(&sdp->sd_log_flush_lock); 849 850 /* 851 * Do this check while holding the log_flush_lock to prevent new 852 * buffers from being added to the ail via gfs2_pin() 853 */ 854 if (gfs2_withdrawn(sdp)) 855 goto out; 856 857 /* Log might have been flushed while we waited for the flush lock */ 858 if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags)) { 859 up_write(&sdp->sd_log_flush_lock); 860 return; 861 } 862 trace_gfs2_log_flush(sdp, 1, flags); 863 864 if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN) 865 clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags); 866 867 sdp->sd_log_flush_head = sdp->sd_log_head; 868 tr = sdp->sd_log_tr; 869 if (tr) { 870 sdp->sd_log_tr = NULL; 871 INIT_LIST_HEAD(&tr->tr_ail1_list); 872 INIT_LIST_HEAD(&tr->tr_ail2_list); 873 tr->tr_first = sdp->sd_log_flush_head; 874 if (unlikely (state == SFS_FROZEN)) 875 if (gfs2_assert_withdraw_delayed(sdp, 876 !tr->tr_num_buf_new && !tr->tr_num_databuf_new)) 877 goto out; 878 } 879 880 if (unlikely(state == SFS_FROZEN)) 881 if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke)) 882 goto out; 883 if (gfs2_assert_withdraw_delayed(sdp, 884 sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke)) 885 goto out; 886 887 gfs2_ordered_write(sdp); 888 if (gfs2_withdrawn(sdp)) 889 goto out; 890 lops_before_commit(sdp, tr); 891 if (gfs2_withdrawn(sdp)) 892 goto out; 893 gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE); 894 if (gfs2_withdrawn(sdp)) 895 goto out; 896 897 if (sdp->sd_log_head != sdp->sd_log_flush_head) { 898 log_flush_wait(sdp); 899 log_write_header(sdp, flags); 900 } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){ 901 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 902 trace_gfs2_log_blocks(sdp, -1); 903 log_write_header(sdp, flags); 904 } 905 if (gfs2_withdrawn(sdp)) 906 goto out; 907 lops_after_commit(sdp, tr); 908 909 gfs2_log_lock(sdp); 910 sdp->sd_log_head = sdp->sd_log_flush_head; 911 sdp->sd_log_blks_reserved = 0; 912 sdp->sd_log_committed_revoke = 0; 913 914 spin_lock(&sdp->sd_ail_lock); 915 if (tr && !list_empty(&tr->tr_ail1_list)) { 916 list_add(&tr->tr_list, &sdp->sd_ail1_list); 917 tr = NULL; 918 } 919 spin_unlock(&sdp->sd_ail_lock); 920 gfs2_log_unlock(sdp); 921 922 if (!(flags & GFS2_LOG_HEAD_FLUSH_NORMAL)) { 923 if (!sdp->sd_log_idle) { 924 for (;;) { 925 gfs2_ail1_start(sdp); 926 gfs2_ail1_wait(sdp); 927 if (gfs2_ail1_empty(sdp, 0)) 928 break; 929 } 930 if (gfs2_withdrawn(sdp)) 931 goto out; 932 atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */ 933 trace_gfs2_log_blocks(sdp, -1); 934 log_write_header(sdp, flags); 935 sdp->sd_log_head = sdp->sd_log_flush_head; 936 } 937 if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN | 938 GFS2_LOG_HEAD_FLUSH_FREEZE)) 939 gfs2_log_shutdown(sdp); 940 if (flags & GFS2_LOG_HEAD_FLUSH_FREEZE) 941 atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); 942 } 943 944 out: 945 if (gfs2_withdrawn(sdp)) { 946 ail_drain(sdp); /* frees all transactions */ 947 tr = NULL; 948 } 949 950 trace_gfs2_log_flush(sdp, 0, flags); 951 up_write(&sdp->sd_log_flush_lock); 952 953 kfree(tr); 954 } 955 956 /** 957 * gfs2_merge_trans - Merge a new transaction into a cached transaction 958 * @old: Original transaction to be expanded 959 * @new: New transaction to be merged 960 */ 961 962 static void gfs2_merge_trans(struct gfs2_trans *old, struct gfs2_trans *new) 963 { 964 WARN_ON_ONCE(!test_bit(TR_ATTACHED, &old->tr_flags)); 965 966 old->tr_num_buf_new += new->tr_num_buf_new; 967 old->tr_num_databuf_new += new->tr_num_databuf_new; 968 old->tr_num_buf_rm += new->tr_num_buf_rm; 969 old->tr_num_databuf_rm += new->tr_num_databuf_rm; 970 old->tr_num_revoke += new->tr_num_revoke; 971 old->tr_num_revoke_rm += new->tr_num_revoke_rm; 972 973 list_splice_tail_init(&new->tr_databuf, &old->tr_databuf); 974 list_splice_tail_init(&new->tr_buf, &old->tr_buf); 975 } 976 977 static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 978 { 979 unsigned int reserved; 980 unsigned int unused; 981 unsigned int maxres; 982 983 gfs2_log_lock(sdp); 984 985 if (sdp->sd_log_tr) { 986 gfs2_merge_trans(sdp->sd_log_tr, tr); 987 } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) { 988 gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags)); 989 sdp->sd_log_tr = tr; 990 set_bit(TR_ATTACHED, &tr->tr_flags); 991 } 992 993 sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm; 994 reserved = calc_reserved(sdp); 995 maxres = sdp->sd_log_blks_reserved + tr->tr_reserved; 996 gfs2_assert_withdraw(sdp, maxres >= reserved); 997 unused = maxres - reserved; 998 atomic_add(unused, &sdp->sd_log_blks_free); 999 trace_gfs2_log_blocks(sdp, unused); 1000 gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <= 1001 sdp->sd_jdesc->jd_blocks); 1002 sdp->sd_log_blks_reserved = reserved; 1003 1004 gfs2_log_unlock(sdp); 1005 } 1006 1007 /** 1008 * gfs2_log_commit - Commit a transaction to the log 1009 * @sdp: the filesystem 1010 * @tr: the transaction 1011 * 1012 * We wake up gfs2_logd if the number of pinned blocks exceed thresh1 1013 * or the total number of used blocks (pinned blocks plus AIL blocks) 1014 * is greater than thresh2. 1015 * 1016 * At mount time thresh1 is 1/3rd of journal size, thresh2 is 2/3rd of 1017 * journal size. 1018 * 1019 * Returns: errno 1020 */ 1021 1022 void gfs2_log_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 1023 { 1024 log_refund(sdp, tr); 1025 1026 if (atomic_read(&sdp->sd_log_pinned) > atomic_read(&sdp->sd_log_thresh1) || 1027 ((sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free)) > 1028 atomic_read(&sdp->sd_log_thresh2))) 1029 wake_up(&sdp->sd_logd_waitq); 1030 } 1031 1032 /** 1033 * gfs2_log_shutdown - write a shutdown header into a journal 1034 * @sdp: the filesystem 1035 * 1036 */ 1037 1038 static void gfs2_log_shutdown(struct gfs2_sbd *sdp) 1039 { 1040 gfs2_assert_withdraw(sdp, !sdp->sd_log_blks_reserved); 1041 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 1042 gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list)); 1043 1044 sdp->sd_log_flush_head = sdp->sd_log_head; 1045 1046 log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN); 1047 1048 gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail); 1049 gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list)); 1050 1051 sdp->sd_log_head = sdp->sd_log_flush_head; 1052 sdp->sd_log_tail = sdp->sd_log_head; 1053 } 1054 1055 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp) 1056 { 1057 return (atomic_read(&sdp->sd_log_pinned) + 1058 atomic_read(&sdp->sd_log_blks_needed) >= 1059 atomic_read(&sdp->sd_log_thresh1)); 1060 } 1061 1062 static inline int gfs2_ail_flush_reqd(struct gfs2_sbd *sdp) 1063 { 1064 unsigned int used_blocks = sdp->sd_jdesc->jd_blocks - atomic_read(&sdp->sd_log_blks_free); 1065 1066 if (test_and_clear_bit(SDF_FORCE_AIL_FLUSH, &sdp->sd_flags)) 1067 return 1; 1068 1069 return used_blocks + atomic_read(&sdp->sd_log_blks_needed) >= 1070 atomic_read(&sdp->sd_log_thresh2); 1071 } 1072 1073 /** 1074 * gfs2_logd - Update log tail as Active Items get flushed to in-place blocks 1075 * @sdp: Pointer to GFS2 superblock 1076 * 1077 * Also, periodically check to make sure that we're using the most recent 1078 * journal index. 1079 */ 1080 1081 int gfs2_logd(void *data) 1082 { 1083 struct gfs2_sbd *sdp = data; 1084 unsigned long t = 1; 1085 DEFINE_WAIT(wait); 1086 bool did_flush; 1087 1088 while (!kthread_should_stop()) { 1089 1090 /* Check for errors writing to the journal */ 1091 if (sdp->sd_log_error) { 1092 gfs2_lm(sdp, 1093 "GFS2: fsid=%s: error %d: " 1094 "withdrawing the file system to " 1095 "prevent further damage.\n", 1096 sdp->sd_fsname, sdp->sd_log_error); 1097 gfs2_withdraw(sdp); 1098 } 1099 1100 did_flush = false; 1101 if (gfs2_jrnl_flush_reqd(sdp) || t == 0) { 1102 gfs2_ail1_empty(sdp, 0); 1103 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | 1104 GFS2_LFC_LOGD_JFLUSH_REQD); 1105 did_flush = true; 1106 } 1107 1108 if (gfs2_ail_flush_reqd(sdp)) { 1109 gfs2_ail1_start(sdp); 1110 gfs2_ail1_wait(sdp); 1111 gfs2_ail1_empty(sdp, 0); 1112 gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL | 1113 GFS2_LFC_LOGD_AIL_FLUSH_REQD); 1114 did_flush = true; 1115 } 1116 1117 if (!gfs2_ail_flush_reqd(sdp) || did_flush) 1118 wake_up(&sdp->sd_log_waitq); 1119 1120 t = gfs2_tune_get(sdp, gt_logd_secs) * HZ; 1121 1122 try_to_freeze(); 1123 1124 do { 1125 prepare_to_wait(&sdp->sd_logd_waitq, &wait, 1126 TASK_INTERRUPTIBLE); 1127 if (!gfs2_ail_flush_reqd(sdp) && 1128 !gfs2_jrnl_flush_reqd(sdp) && 1129 !kthread_should_stop()) 1130 t = schedule_timeout(t); 1131 } while(t && !gfs2_ail_flush_reqd(sdp) && 1132 !gfs2_jrnl_flush_reqd(sdp) && 1133 !kthread_should_stop()); 1134 finish_wait(&sdp->sd_logd_waitq, &wait); 1135 } 1136 1137 return 0; 1138 } 1139 1140