1 /* 2 * fs/fs-writeback.c 3 * 4 * Copyright (C) 2002, Linus Torvalds. 5 * 6 * Contains all the functions related to writing back and waiting 7 * upon dirty inodes against superblocks, and writing back dirty 8 * pages against inodes. ie: data writeback. Writeout of the 9 * inode itself is not handled here. 10 * 11 * 10Apr2002 Andrew Morton 12 * Split out of fs/inode.c 13 * Additions for address_space-based writeback 14 */ 15 16 #include <linux/kernel.h> 17 #include <linux/export.h> 18 #include <linux/spinlock.h> 19 #include <linux/slab.h> 20 #include <linux/sched.h> 21 #include <linux/fs.h> 22 #include <linux/mm.h> 23 #include <linux/pagemap.h> 24 #include <linux/kthread.h> 25 #include <linux/writeback.h> 26 #include <linux/blkdev.h> 27 #include <linux/backing-dev.h> 28 #include <linux/tracepoint.h> 29 #include "internal.h" 30 31 /* 32 * 4MB minimal write chunk size 33 */ 34 #define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10)) 35 36 /* 37 * Passed into wb_writeback(), essentially a subset of writeback_control 38 */ 39 struct wb_writeback_work { 40 long nr_pages; 41 struct super_block *sb; 42 unsigned long *older_than_this; 43 enum writeback_sync_modes sync_mode; 44 unsigned int tagged_writepages:1; 45 unsigned int for_kupdate:1; 46 unsigned int range_cyclic:1; 47 unsigned int for_background:1; 48 enum wb_reason reason; /* why was writeback initiated? */ 49 50 struct list_head list; /* pending work list */ 51 struct completion *done; /* set if the caller waits */ 52 }; 53 54 /** 55 * writeback_in_progress - determine whether there is writeback in progress 56 * @bdi: the device's backing_dev_info structure. 57 * 58 * Determine whether there is writeback waiting to be handled against a 59 * backing device. 60 */ 61 int writeback_in_progress(struct backing_dev_info *bdi) 62 { 63 return test_bit(BDI_writeback_running, &bdi->state); 64 } 65 EXPORT_SYMBOL(writeback_in_progress); 66 67 static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) 68 { 69 struct super_block *sb = inode->i_sb; 70 71 if (strcmp(sb->s_type->name, "bdev") == 0) 72 return inode->i_mapping->backing_dev_info; 73 74 return sb->s_bdi; 75 } 76 77 static inline struct inode *wb_inode(struct list_head *head) 78 { 79 return list_entry(head, struct inode, i_wb_list); 80 } 81 82 /* 83 * Include the creation of the trace points after defining the 84 * wb_writeback_work structure and inline functions so that the definition 85 * remains local to this file. 86 */ 87 #define CREATE_TRACE_POINTS 88 #include <trace/events/writeback.h> 89 90 static void bdi_queue_work(struct backing_dev_info *bdi, 91 struct wb_writeback_work *work) 92 { 93 trace_writeback_queue(bdi, work); 94 95 spin_lock_bh(&bdi->wb_lock); 96 list_add_tail(&work->list, &bdi->work_list); 97 spin_unlock_bh(&bdi->wb_lock); 98 99 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); 100 } 101 102 static void 103 __bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 104 bool range_cyclic, enum wb_reason reason) 105 { 106 struct wb_writeback_work *work; 107 108 /* 109 * This is WB_SYNC_NONE writeback, so if allocation fails just 110 * wakeup the thread for old dirty data writeback 111 */ 112 work = kzalloc(sizeof(*work), GFP_ATOMIC); 113 if (!work) { 114 trace_writeback_nowork(bdi); 115 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); 116 return; 117 } 118 119 work->sync_mode = WB_SYNC_NONE; 120 work->nr_pages = nr_pages; 121 work->range_cyclic = range_cyclic; 122 work->reason = reason; 123 124 bdi_queue_work(bdi, work); 125 } 126 127 /** 128 * bdi_start_writeback - start writeback 129 * @bdi: the backing device to write from 130 * @nr_pages: the number of pages to write 131 * @reason: reason why some writeback work was initiated 132 * 133 * Description: 134 * This does WB_SYNC_NONE opportunistic writeback. The IO is only 135 * started when this function returns, we make no guarantees on 136 * completion. Caller need not hold sb s_umount semaphore. 137 * 138 */ 139 void bdi_start_writeback(struct backing_dev_info *bdi, long nr_pages, 140 enum wb_reason reason) 141 { 142 __bdi_start_writeback(bdi, nr_pages, true, reason); 143 } 144 145 /** 146 * bdi_start_background_writeback - start background writeback 147 * @bdi: the backing device to write from 148 * 149 * Description: 150 * This makes sure WB_SYNC_NONE background writeback happens. When 151 * this function returns, it is only guaranteed that for given BDI 152 * some IO is happening if we are over background dirty threshold. 153 * Caller need not hold sb s_umount semaphore. 154 */ 155 void bdi_start_background_writeback(struct backing_dev_info *bdi) 156 { 157 /* 158 * We just wake up the flusher thread. It will perform background 159 * writeback as soon as there is no other work to do. 160 */ 161 trace_writeback_wake_background(bdi); 162 mod_delayed_work(bdi_wq, &bdi->wb.dwork, 0); 163 } 164 165 /* 166 * Remove the inode from the writeback list it is on. 167 */ 168 void inode_wb_list_del(struct inode *inode) 169 { 170 struct backing_dev_info *bdi = inode_to_bdi(inode); 171 172 spin_lock(&bdi->wb.list_lock); 173 list_del_init(&inode->i_wb_list); 174 spin_unlock(&bdi->wb.list_lock); 175 } 176 177 /* 178 * Redirty an inode: set its when-it-was dirtied timestamp and move it to the 179 * furthest end of its superblock's dirty-inode list. 180 * 181 * Before stamping the inode's ->dirtied_when, we check to see whether it is 182 * already the most-recently-dirtied inode on the b_dirty list. If that is 183 * the case then the inode must have been redirtied while it was being written 184 * out and we don't reset its dirtied_when. 185 */ 186 static void redirty_tail(struct inode *inode, struct bdi_writeback *wb) 187 { 188 assert_spin_locked(&wb->list_lock); 189 if (!list_empty(&wb->b_dirty)) { 190 struct inode *tail; 191 192 tail = wb_inode(wb->b_dirty.next); 193 if (time_before(inode->dirtied_when, tail->dirtied_when)) 194 inode->dirtied_when = jiffies; 195 } 196 list_move(&inode->i_wb_list, &wb->b_dirty); 197 } 198 199 /* 200 * requeue inode for re-scanning after bdi->b_io list is exhausted. 201 */ 202 static void requeue_io(struct inode *inode, struct bdi_writeback *wb) 203 { 204 assert_spin_locked(&wb->list_lock); 205 list_move(&inode->i_wb_list, &wb->b_more_io); 206 } 207 208 static void inode_sync_complete(struct inode *inode) 209 { 210 inode->i_state &= ~I_SYNC; 211 /* If inode is clean an unused, put it into LRU now... */ 212 inode_add_lru(inode); 213 /* Waiters must see I_SYNC cleared before being woken up */ 214 smp_mb(); 215 wake_up_bit(&inode->i_state, __I_SYNC); 216 } 217 218 static bool inode_dirtied_after(struct inode *inode, unsigned long t) 219 { 220 bool ret = time_after(inode->dirtied_when, t); 221 #ifndef CONFIG_64BIT 222 /* 223 * For inodes being constantly redirtied, dirtied_when can get stuck. 224 * It _appears_ to be in the future, but is actually in distant past. 225 * This test is necessary to prevent such wrapped-around relative times 226 * from permanently stopping the whole bdi writeback. 227 */ 228 ret = ret && time_before_eq(inode->dirtied_when, jiffies); 229 #endif 230 return ret; 231 } 232 233 /* 234 * Move expired (dirtied before work->older_than_this) dirty inodes from 235 * @delaying_queue to @dispatch_queue. 236 */ 237 static int move_expired_inodes(struct list_head *delaying_queue, 238 struct list_head *dispatch_queue, 239 struct wb_writeback_work *work) 240 { 241 LIST_HEAD(tmp); 242 struct list_head *pos, *node; 243 struct super_block *sb = NULL; 244 struct inode *inode; 245 int do_sb_sort = 0; 246 int moved = 0; 247 248 while (!list_empty(delaying_queue)) { 249 inode = wb_inode(delaying_queue->prev); 250 if (work->older_than_this && 251 inode_dirtied_after(inode, *work->older_than_this)) 252 break; 253 if (sb && sb != inode->i_sb) 254 do_sb_sort = 1; 255 sb = inode->i_sb; 256 list_move(&inode->i_wb_list, &tmp); 257 moved++; 258 } 259 260 /* just one sb in list, splice to dispatch_queue and we're done */ 261 if (!do_sb_sort) { 262 list_splice(&tmp, dispatch_queue); 263 goto out; 264 } 265 266 /* Move inodes from one superblock together */ 267 while (!list_empty(&tmp)) { 268 sb = wb_inode(tmp.prev)->i_sb; 269 list_for_each_prev_safe(pos, node, &tmp) { 270 inode = wb_inode(pos); 271 if (inode->i_sb == sb) 272 list_move(&inode->i_wb_list, dispatch_queue); 273 } 274 } 275 out: 276 return moved; 277 } 278 279 /* 280 * Queue all expired dirty inodes for io, eldest first. 281 * Before 282 * newly dirtied b_dirty b_io b_more_io 283 * =============> gf edc BA 284 * After 285 * newly dirtied b_dirty b_io b_more_io 286 * =============> g fBAedc 287 * | 288 * +--> dequeue for IO 289 */ 290 static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) 291 { 292 int moved; 293 assert_spin_locked(&wb->list_lock); 294 list_splice_init(&wb->b_more_io, &wb->b_io); 295 moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work); 296 trace_writeback_queue_io(wb, work, moved); 297 } 298 299 static int write_inode(struct inode *inode, struct writeback_control *wbc) 300 { 301 int ret; 302 303 if (inode->i_sb->s_op->write_inode && !is_bad_inode(inode)) { 304 trace_writeback_write_inode_start(inode, wbc); 305 ret = inode->i_sb->s_op->write_inode(inode, wbc); 306 trace_writeback_write_inode(inode, wbc); 307 return ret; 308 } 309 return 0; 310 } 311 312 /* 313 * Wait for writeback on an inode to complete. Called with i_lock held. 314 * Caller must make sure inode cannot go away when we drop i_lock. 315 */ 316 static void __inode_wait_for_writeback(struct inode *inode) 317 __releases(inode->i_lock) 318 __acquires(inode->i_lock) 319 { 320 DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); 321 wait_queue_head_t *wqh; 322 323 wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 324 while (inode->i_state & I_SYNC) { 325 spin_unlock(&inode->i_lock); 326 __wait_on_bit(wqh, &wq, inode_wait, TASK_UNINTERRUPTIBLE); 327 spin_lock(&inode->i_lock); 328 } 329 } 330 331 /* 332 * Wait for writeback on an inode to complete. Caller must have inode pinned. 333 */ 334 void inode_wait_for_writeback(struct inode *inode) 335 { 336 spin_lock(&inode->i_lock); 337 __inode_wait_for_writeback(inode); 338 spin_unlock(&inode->i_lock); 339 } 340 341 /* 342 * Sleep until I_SYNC is cleared. This function must be called with i_lock 343 * held and drops it. It is aimed for callers not holding any inode reference 344 * so once i_lock is dropped, inode can go away. 345 */ 346 static void inode_sleep_on_writeback(struct inode *inode) 347 __releases(inode->i_lock) 348 { 349 DEFINE_WAIT(wait); 350 wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); 351 int sleep; 352 353 prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); 354 sleep = inode->i_state & I_SYNC; 355 spin_unlock(&inode->i_lock); 356 if (sleep) 357 schedule(); 358 finish_wait(wqh, &wait); 359 } 360 361 /* 362 * Find proper writeback list for the inode depending on its current state and 363 * possibly also change of its state while we were doing writeback. Here we 364 * handle things such as livelock prevention or fairness of writeback among 365 * inodes. This function can be called only by flusher thread - noone else 366 * processes all inodes in writeback lists and requeueing inodes behind flusher 367 * thread's back can have unexpected consequences. 368 */ 369 static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, 370 struct writeback_control *wbc) 371 { 372 if (inode->i_state & I_FREEING) 373 return; 374 375 /* 376 * Sync livelock prevention. Each inode is tagged and synced in one 377 * shot. If still dirty, it will be redirty_tail()'ed below. Update 378 * the dirty time to prevent enqueue and sync it again. 379 */ 380 if ((inode->i_state & I_DIRTY) && 381 (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)) 382 inode->dirtied_when = jiffies; 383 384 if (wbc->pages_skipped) { 385 /* 386 * writeback is not making progress due to locked 387 * buffers. Skip this inode for now. 388 */ 389 redirty_tail(inode, wb); 390 return; 391 } 392 393 if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { 394 /* 395 * We didn't write back all the pages. nfs_writepages() 396 * sometimes bales out without doing anything. 397 */ 398 if (wbc->nr_to_write <= 0) { 399 /* Slice used up. Queue for next turn. */ 400 requeue_io(inode, wb); 401 } else { 402 /* 403 * Writeback blocked by something other than 404 * congestion. Delay the inode for some time to 405 * avoid spinning on the CPU (100% iowait) 406 * retrying writeback of the dirty page/inode 407 * that cannot be performed immediately. 408 */ 409 redirty_tail(inode, wb); 410 } 411 } else if (inode->i_state & I_DIRTY) { 412 /* 413 * Filesystems can dirty the inode during writeback operations, 414 * such as delayed allocation during submission or metadata 415 * updates after data IO completion. 416 */ 417 redirty_tail(inode, wb); 418 } else { 419 /* The inode is clean. Remove from writeback lists. */ 420 list_del_init(&inode->i_wb_list); 421 } 422 } 423 424 /* 425 * Write out an inode and its dirty pages. Do not update the writeback list 426 * linkage. That is left to the caller. The caller is also responsible for 427 * setting I_SYNC flag and calling inode_sync_complete() to clear it. 428 */ 429 static int 430 __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) 431 { 432 struct address_space *mapping = inode->i_mapping; 433 long nr_to_write = wbc->nr_to_write; 434 unsigned dirty; 435 int ret; 436 437 WARN_ON(!(inode->i_state & I_SYNC)); 438 439 trace_writeback_single_inode_start(inode, wbc, nr_to_write); 440 441 ret = do_writepages(mapping, wbc); 442 443 /* 444 * Make sure to wait on the data before writing out the metadata. 445 * This is important for filesystems that modify metadata on data 446 * I/O completion. 447 */ 448 if (wbc->sync_mode == WB_SYNC_ALL) { 449 int err = filemap_fdatawait(mapping); 450 if (ret == 0) 451 ret = err; 452 } 453 454 /* 455 * Some filesystems may redirty the inode during the writeback 456 * due to delalloc, clear dirty metadata flags right before 457 * write_inode() 458 */ 459 spin_lock(&inode->i_lock); 460 /* Clear I_DIRTY_PAGES if we've written out all dirty pages */ 461 if (!mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) 462 inode->i_state &= ~I_DIRTY_PAGES; 463 dirty = inode->i_state & I_DIRTY; 464 inode->i_state &= ~(I_DIRTY_SYNC | I_DIRTY_DATASYNC); 465 spin_unlock(&inode->i_lock); 466 /* Don't write the inode if only I_DIRTY_PAGES was set */ 467 if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 468 int err = write_inode(inode, wbc); 469 if (ret == 0) 470 ret = err; 471 } 472 trace_writeback_single_inode(inode, wbc, nr_to_write); 473 return ret; 474 } 475 476 /* 477 * Write out an inode's dirty pages. Either the caller has an active reference 478 * on the inode or the inode has I_WILL_FREE set. 479 * 480 * This function is designed to be called for writing back one inode which 481 * we go e.g. from filesystem. Flusher thread uses __writeback_single_inode() 482 * and does more profound writeback list handling in writeback_sb_inodes(). 483 */ 484 static int 485 writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, 486 struct writeback_control *wbc) 487 { 488 int ret = 0; 489 490 spin_lock(&inode->i_lock); 491 if (!atomic_read(&inode->i_count)) 492 WARN_ON(!(inode->i_state & (I_WILL_FREE|I_FREEING))); 493 else 494 WARN_ON(inode->i_state & I_WILL_FREE); 495 496 if (inode->i_state & I_SYNC) { 497 if (wbc->sync_mode != WB_SYNC_ALL) 498 goto out; 499 /* 500 * It's a data-integrity sync. We must wait. Since callers hold 501 * inode reference or inode has I_WILL_FREE set, it cannot go 502 * away under us. 503 */ 504 __inode_wait_for_writeback(inode); 505 } 506 WARN_ON(inode->i_state & I_SYNC); 507 /* 508 * Skip inode if it is clean. We don't want to mess with writeback 509 * lists in this function since flusher thread may be doing for example 510 * sync in parallel and if we move the inode, it could get skipped. So 511 * here we make sure inode is on some writeback list and leave it there 512 * unless we have completely cleaned the inode. 513 */ 514 if (!(inode->i_state & I_DIRTY)) 515 goto out; 516 inode->i_state |= I_SYNC; 517 spin_unlock(&inode->i_lock); 518 519 ret = __writeback_single_inode(inode, wbc); 520 521 spin_lock(&wb->list_lock); 522 spin_lock(&inode->i_lock); 523 /* 524 * If inode is clean, remove it from writeback lists. Otherwise don't 525 * touch it. See comment above for explanation. 526 */ 527 if (!(inode->i_state & I_DIRTY)) 528 list_del_init(&inode->i_wb_list); 529 spin_unlock(&wb->list_lock); 530 inode_sync_complete(inode); 531 out: 532 spin_unlock(&inode->i_lock); 533 return ret; 534 } 535 536 static long writeback_chunk_size(struct backing_dev_info *bdi, 537 struct wb_writeback_work *work) 538 { 539 long pages; 540 541 /* 542 * WB_SYNC_ALL mode does livelock avoidance by syncing dirty 543 * inodes/pages in one big loop. Setting wbc.nr_to_write=LONG_MAX 544 * here avoids calling into writeback_inodes_wb() more than once. 545 * 546 * The intended call sequence for WB_SYNC_ALL writeback is: 547 * 548 * wb_writeback() 549 * writeback_sb_inodes() <== called only once 550 * write_cache_pages() <== called once for each inode 551 * (quickly) tag currently dirty pages 552 * (maybe slowly) sync all tagged pages 553 */ 554 if (work->sync_mode == WB_SYNC_ALL || work->tagged_writepages) 555 pages = LONG_MAX; 556 else { 557 pages = min(bdi->avg_write_bandwidth / 2, 558 global_dirty_limit / DIRTY_SCOPE); 559 pages = min(pages, work->nr_pages); 560 pages = round_down(pages + MIN_WRITEBACK_PAGES, 561 MIN_WRITEBACK_PAGES); 562 } 563 564 return pages; 565 } 566 567 /* 568 * Write a portion of b_io inodes which belong to @sb. 569 * 570 * Return the number of pages and/or inodes written. 571 */ 572 static long writeback_sb_inodes(struct super_block *sb, 573 struct bdi_writeback *wb, 574 struct wb_writeback_work *work) 575 { 576 struct writeback_control wbc = { 577 .sync_mode = work->sync_mode, 578 .tagged_writepages = work->tagged_writepages, 579 .for_kupdate = work->for_kupdate, 580 .for_background = work->for_background, 581 .range_cyclic = work->range_cyclic, 582 .range_start = 0, 583 .range_end = LLONG_MAX, 584 }; 585 unsigned long start_time = jiffies; 586 long write_chunk; 587 long wrote = 0; /* count both pages and inodes */ 588 589 while (!list_empty(&wb->b_io)) { 590 struct inode *inode = wb_inode(wb->b_io.prev); 591 592 if (inode->i_sb != sb) { 593 if (work->sb) { 594 /* 595 * We only want to write back data for this 596 * superblock, move all inodes not belonging 597 * to it back onto the dirty list. 598 */ 599 redirty_tail(inode, wb); 600 continue; 601 } 602 603 /* 604 * The inode belongs to a different superblock. 605 * Bounce back to the caller to unpin this and 606 * pin the next superblock. 607 */ 608 break; 609 } 610 611 /* 612 * Don't bother with new inodes or inodes being freed, first 613 * kind does not need periodic writeout yet, and for the latter 614 * kind writeout is handled by the freer. 615 */ 616 spin_lock(&inode->i_lock); 617 if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { 618 spin_unlock(&inode->i_lock); 619 redirty_tail(inode, wb); 620 continue; 621 } 622 if ((inode->i_state & I_SYNC) && wbc.sync_mode != WB_SYNC_ALL) { 623 /* 624 * If this inode is locked for writeback and we are not 625 * doing writeback-for-data-integrity, move it to 626 * b_more_io so that writeback can proceed with the 627 * other inodes on s_io. 628 * 629 * We'll have another go at writing back this inode 630 * when we completed a full scan of b_io. 631 */ 632 spin_unlock(&inode->i_lock); 633 requeue_io(inode, wb); 634 trace_writeback_sb_inodes_requeue(inode); 635 continue; 636 } 637 spin_unlock(&wb->list_lock); 638 639 /* 640 * We already requeued the inode if it had I_SYNC set and we 641 * are doing WB_SYNC_NONE writeback. So this catches only the 642 * WB_SYNC_ALL case. 643 */ 644 if (inode->i_state & I_SYNC) { 645 /* Wait for I_SYNC. This function drops i_lock... */ 646 inode_sleep_on_writeback(inode); 647 /* Inode may be gone, start again */ 648 spin_lock(&wb->list_lock); 649 continue; 650 } 651 inode->i_state |= I_SYNC; 652 spin_unlock(&inode->i_lock); 653 654 write_chunk = writeback_chunk_size(wb->bdi, work); 655 wbc.nr_to_write = write_chunk; 656 wbc.pages_skipped = 0; 657 658 /* 659 * We use I_SYNC to pin the inode in memory. While it is set 660 * evict_inode() will wait so the inode cannot be freed. 661 */ 662 __writeback_single_inode(inode, &wbc); 663 664 work->nr_pages -= write_chunk - wbc.nr_to_write; 665 wrote += write_chunk - wbc.nr_to_write; 666 spin_lock(&wb->list_lock); 667 spin_lock(&inode->i_lock); 668 if (!(inode->i_state & I_DIRTY)) 669 wrote++; 670 requeue_inode(inode, wb, &wbc); 671 inode_sync_complete(inode); 672 spin_unlock(&inode->i_lock); 673 cond_resched_lock(&wb->list_lock); 674 /* 675 * bail out to wb_writeback() often enough to check 676 * background threshold and other termination conditions. 677 */ 678 if (wrote) { 679 if (time_is_before_jiffies(start_time + HZ / 10UL)) 680 break; 681 if (work->nr_pages <= 0) 682 break; 683 } 684 } 685 return wrote; 686 } 687 688 static long __writeback_inodes_wb(struct bdi_writeback *wb, 689 struct wb_writeback_work *work) 690 { 691 unsigned long start_time = jiffies; 692 long wrote = 0; 693 694 while (!list_empty(&wb->b_io)) { 695 struct inode *inode = wb_inode(wb->b_io.prev); 696 struct super_block *sb = inode->i_sb; 697 698 if (!grab_super_passive(sb)) { 699 /* 700 * grab_super_passive() may fail consistently due to 701 * s_umount being grabbed by someone else. Don't use 702 * requeue_io() to avoid busy retrying the inode/sb. 703 */ 704 redirty_tail(inode, wb); 705 continue; 706 } 707 wrote += writeback_sb_inodes(sb, wb, work); 708 drop_super(sb); 709 710 /* refer to the same tests at the end of writeback_sb_inodes */ 711 if (wrote) { 712 if (time_is_before_jiffies(start_time + HZ / 10UL)) 713 break; 714 if (work->nr_pages <= 0) 715 break; 716 } 717 } 718 /* Leave any unwritten inodes on b_io */ 719 return wrote; 720 } 721 722 long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, 723 enum wb_reason reason) 724 { 725 struct wb_writeback_work work = { 726 .nr_pages = nr_pages, 727 .sync_mode = WB_SYNC_NONE, 728 .range_cyclic = 1, 729 .reason = reason, 730 }; 731 732 spin_lock(&wb->list_lock); 733 if (list_empty(&wb->b_io)) 734 queue_io(wb, &work); 735 __writeback_inodes_wb(wb, &work); 736 spin_unlock(&wb->list_lock); 737 738 return nr_pages - work.nr_pages; 739 } 740 741 static bool over_bground_thresh(struct backing_dev_info *bdi) 742 { 743 unsigned long background_thresh, dirty_thresh; 744 745 global_dirty_limits(&background_thresh, &dirty_thresh); 746 747 if (global_page_state(NR_FILE_DIRTY) + 748 global_page_state(NR_UNSTABLE_NFS) > background_thresh) 749 return true; 750 751 if (bdi_stat(bdi, BDI_RECLAIMABLE) > 752 bdi_dirty_limit(bdi, background_thresh)) 753 return true; 754 755 return false; 756 } 757 758 /* 759 * Called under wb->list_lock. If there are multiple wb per bdi, 760 * only the flusher working on the first wb should do it. 761 */ 762 static void wb_update_bandwidth(struct bdi_writeback *wb, 763 unsigned long start_time) 764 { 765 __bdi_update_bandwidth(wb->bdi, 0, 0, 0, 0, 0, start_time); 766 } 767 768 /* 769 * Explicit flushing or periodic writeback of "old" data. 770 * 771 * Define "old": the first time one of an inode's pages is dirtied, we mark the 772 * dirtying-time in the inode's address_space. So this periodic writeback code 773 * just walks the superblock inode list, writing back any inodes which are 774 * older than a specific point in time. 775 * 776 * Try to run once per dirty_writeback_interval. But if a writeback event 777 * takes longer than a dirty_writeback_interval interval, then leave a 778 * one-second gap. 779 * 780 * older_than_this takes precedence over nr_to_write. So we'll only write back 781 * all dirty pages if they are all attached to "old" mappings. 782 */ 783 static long wb_writeback(struct bdi_writeback *wb, 784 struct wb_writeback_work *work) 785 { 786 unsigned long wb_start = jiffies; 787 long nr_pages = work->nr_pages; 788 unsigned long oldest_jif; 789 struct inode *inode; 790 long progress; 791 792 oldest_jif = jiffies; 793 work->older_than_this = &oldest_jif; 794 795 spin_lock(&wb->list_lock); 796 for (;;) { 797 /* 798 * Stop writeback when nr_pages has been consumed 799 */ 800 if (work->nr_pages <= 0) 801 break; 802 803 /* 804 * Background writeout and kupdate-style writeback may 805 * run forever. Stop them if there is other work to do 806 * so that e.g. sync can proceed. They'll be restarted 807 * after the other works are all done. 808 */ 809 if ((work->for_background || work->for_kupdate) && 810 !list_empty(&wb->bdi->work_list)) 811 break; 812 813 /* 814 * For background writeout, stop when we are below the 815 * background dirty threshold 816 */ 817 if (work->for_background && !over_bground_thresh(wb->bdi)) 818 break; 819 820 /* 821 * Kupdate and background works are special and we want to 822 * include all inodes that need writing. Livelock avoidance is 823 * handled by these works yielding to any other work so we are 824 * safe. 825 */ 826 if (work->for_kupdate) { 827 oldest_jif = jiffies - 828 msecs_to_jiffies(dirty_expire_interval * 10); 829 } else if (work->for_background) 830 oldest_jif = jiffies; 831 832 trace_writeback_start(wb->bdi, work); 833 if (list_empty(&wb->b_io)) 834 queue_io(wb, work); 835 if (work->sb) 836 progress = writeback_sb_inodes(work->sb, wb, work); 837 else 838 progress = __writeback_inodes_wb(wb, work); 839 trace_writeback_written(wb->bdi, work); 840 841 wb_update_bandwidth(wb, wb_start); 842 843 /* 844 * Did we write something? Try for more 845 * 846 * Dirty inodes are moved to b_io for writeback in batches. 847 * The completion of the current batch does not necessarily 848 * mean the overall work is done. So we keep looping as long 849 * as made some progress on cleaning pages or inodes. 850 */ 851 if (progress) 852 continue; 853 /* 854 * No more inodes for IO, bail 855 */ 856 if (list_empty(&wb->b_more_io)) 857 break; 858 /* 859 * Nothing written. Wait for some inode to 860 * become available for writeback. Otherwise 861 * we'll just busyloop. 862 */ 863 if (!list_empty(&wb->b_more_io)) { 864 trace_writeback_wait(wb->bdi, work); 865 inode = wb_inode(wb->b_more_io.prev); 866 spin_lock(&inode->i_lock); 867 spin_unlock(&wb->list_lock); 868 /* This function drops i_lock... */ 869 inode_sleep_on_writeback(inode); 870 spin_lock(&wb->list_lock); 871 } 872 } 873 spin_unlock(&wb->list_lock); 874 875 return nr_pages - work->nr_pages; 876 } 877 878 /* 879 * Return the next wb_writeback_work struct that hasn't been processed yet. 880 */ 881 static struct wb_writeback_work * 882 get_next_work_item(struct backing_dev_info *bdi) 883 { 884 struct wb_writeback_work *work = NULL; 885 886 spin_lock_bh(&bdi->wb_lock); 887 if (!list_empty(&bdi->work_list)) { 888 work = list_entry(bdi->work_list.next, 889 struct wb_writeback_work, list); 890 list_del_init(&work->list); 891 } 892 spin_unlock_bh(&bdi->wb_lock); 893 return work; 894 } 895 896 /* 897 * Add in the number of potentially dirty inodes, because each inode 898 * write can dirty pagecache in the underlying blockdev. 899 */ 900 static unsigned long get_nr_dirty_pages(void) 901 { 902 return global_page_state(NR_FILE_DIRTY) + 903 global_page_state(NR_UNSTABLE_NFS) + 904 get_nr_dirty_inodes(); 905 } 906 907 static long wb_check_background_flush(struct bdi_writeback *wb) 908 { 909 if (over_bground_thresh(wb->bdi)) { 910 911 struct wb_writeback_work work = { 912 .nr_pages = LONG_MAX, 913 .sync_mode = WB_SYNC_NONE, 914 .for_background = 1, 915 .range_cyclic = 1, 916 .reason = WB_REASON_BACKGROUND, 917 }; 918 919 return wb_writeback(wb, &work); 920 } 921 922 return 0; 923 } 924 925 static long wb_check_old_data_flush(struct bdi_writeback *wb) 926 { 927 unsigned long expired; 928 long nr_pages; 929 930 /* 931 * When set to zero, disable periodic writeback 932 */ 933 if (!dirty_writeback_interval) 934 return 0; 935 936 expired = wb->last_old_flush + 937 msecs_to_jiffies(dirty_writeback_interval * 10); 938 if (time_before(jiffies, expired)) 939 return 0; 940 941 wb->last_old_flush = jiffies; 942 nr_pages = get_nr_dirty_pages(); 943 944 if (nr_pages) { 945 struct wb_writeback_work work = { 946 .nr_pages = nr_pages, 947 .sync_mode = WB_SYNC_NONE, 948 .for_kupdate = 1, 949 .range_cyclic = 1, 950 .reason = WB_REASON_PERIODIC, 951 }; 952 953 return wb_writeback(wb, &work); 954 } 955 956 return 0; 957 } 958 959 /* 960 * Retrieve work items and do the writeback they describe 961 */ 962 long wb_do_writeback(struct bdi_writeback *wb, int force_wait) 963 { 964 struct backing_dev_info *bdi = wb->bdi; 965 struct wb_writeback_work *work; 966 long wrote = 0; 967 968 set_bit(BDI_writeback_running, &wb->bdi->state); 969 while ((work = get_next_work_item(bdi)) != NULL) { 970 /* 971 * Override sync mode, in case we must wait for completion 972 * because this thread is exiting now. 973 */ 974 if (force_wait) 975 work->sync_mode = WB_SYNC_ALL; 976 977 trace_writeback_exec(bdi, work); 978 979 wrote += wb_writeback(wb, work); 980 981 /* 982 * Notify the caller of completion if this is a synchronous 983 * work item, otherwise just free it. 984 */ 985 if (work->done) 986 complete(work->done); 987 else 988 kfree(work); 989 } 990 991 /* 992 * Check for periodic writeback, kupdated() style 993 */ 994 wrote += wb_check_old_data_flush(wb); 995 wrote += wb_check_background_flush(wb); 996 clear_bit(BDI_writeback_running, &wb->bdi->state); 997 998 return wrote; 999 } 1000 1001 /* 1002 * Handle writeback of dirty data for the device backed by this bdi. Also 1003 * reschedules periodically and does kupdated style flushing. 1004 */ 1005 void bdi_writeback_workfn(struct work_struct *work) 1006 { 1007 struct bdi_writeback *wb = container_of(to_delayed_work(work), 1008 struct bdi_writeback, dwork); 1009 struct backing_dev_info *bdi = wb->bdi; 1010 long pages_written; 1011 1012 set_worker_desc("flush-%s", dev_name(bdi->dev)); 1013 current->flags |= PF_SWAPWRITE; 1014 1015 if (likely(!current_is_workqueue_rescuer() || 1016 list_empty(&bdi->bdi_list))) { 1017 /* 1018 * The normal path. Keep writing back @bdi until its 1019 * work_list is empty. Note that this path is also taken 1020 * if @bdi is shutting down even when we're running off the 1021 * rescuer as work_list needs to be drained. 1022 */ 1023 do { 1024 pages_written = wb_do_writeback(wb, 0); 1025 trace_writeback_pages_written(pages_written); 1026 } while (!list_empty(&bdi->work_list)); 1027 } else { 1028 /* 1029 * bdi_wq can't get enough workers and we're running off 1030 * the emergency worker. Don't hog it. Hopefully, 1024 is 1031 * enough for efficient IO. 1032 */ 1033 pages_written = writeback_inodes_wb(&bdi->wb, 1024, 1034 WB_REASON_FORKER_THREAD); 1035 trace_writeback_pages_written(pages_written); 1036 } 1037 1038 if (!list_empty(&bdi->work_list) || 1039 (wb_has_dirty_io(wb) && dirty_writeback_interval)) 1040 queue_delayed_work(bdi_wq, &wb->dwork, 1041 msecs_to_jiffies(dirty_writeback_interval * 10)); 1042 1043 current->flags &= ~PF_SWAPWRITE; 1044 } 1045 1046 /* 1047 * Start writeback of `nr_pages' pages. If `nr_pages' is zero, write back 1048 * the whole world. 1049 */ 1050 void wakeup_flusher_threads(long nr_pages, enum wb_reason reason) 1051 { 1052 struct backing_dev_info *bdi; 1053 1054 if (!nr_pages) { 1055 nr_pages = global_page_state(NR_FILE_DIRTY) + 1056 global_page_state(NR_UNSTABLE_NFS); 1057 } 1058 1059 rcu_read_lock(); 1060 list_for_each_entry_rcu(bdi, &bdi_list, bdi_list) { 1061 if (!bdi_has_dirty_io(bdi)) 1062 continue; 1063 __bdi_start_writeback(bdi, nr_pages, false, reason); 1064 } 1065 rcu_read_unlock(); 1066 } 1067 1068 static noinline void block_dump___mark_inode_dirty(struct inode *inode) 1069 { 1070 if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) { 1071 struct dentry *dentry; 1072 const char *name = "?"; 1073 1074 dentry = d_find_alias(inode); 1075 if (dentry) { 1076 spin_lock(&dentry->d_lock); 1077 name = (const char *) dentry->d_name.name; 1078 } 1079 printk(KERN_DEBUG 1080 "%s(%d): dirtied inode %lu (%s) on %s\n", 1081 current->comm, task_pid_nr(current), inode->i_ino, 1082 name, inode->i_sb->s_id); 1083 if (dentry) { 1084 spin_unlock(&dentry->d_lock); 1085 dput(dentry); 1086 } 1087 } 1088 } 1089 1090 /** 1091 * __mark_inode_dirty - internal function 1092 * @inode: inode to mark 1093 * @flags: what kind of dirty (i.e. I_DIRTY_SYNC) 1094 * Mark an inode as dirty. Callers should use mark_inode_dirty or 1095 * mark_inode_dirty_sync. 1096 * 1097 * Put the inode on the super block's dirty list. 1098 * 1099 * CAREFUL! We mark it dirty unconditionally, but move it onto the 1100 * dirty list only if it is hashed or if it refers to a blockdev. 1101 * If it was not hashed, it will never be added to the dirty list 1102 * even if it is later hashed, as it will have been marked dirty already. 1103 * 1104 * In short, make sure you hash any inodes _before_ you start marking 1105 * them dirty. 1106 * 1107 * Note that for blockdevs, inode->dirtied_when represents the dirtying time of 1108 * the block-special inode (/dev/hda1) itself. And the ->dirtied_when field of 1109 * the kernel-internal blockdev inode represents the dirtying time of the 1110 * blockdev's pages. This is why for I_DIRTY_PAGES we always use 1111 * page->mapping->host, so the page-dirtying time is recorded in the internal 1112 * blockdev inode. 1113 */ 1114 void __mark_inode_dirty(struct inode *inode, int flags) 1115 { 1116 struct super_block *sb = inode->i_sb; 1117 struct backing_dev_info *bdi = NULL; 1118 1119 /* 1120 * Don't do this for I_DIRTY_PAGES - that doesn't actually 1121 * dirty the inode itself 1122 */ 1123 if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { 1124 trace_writeback_dirty_inode_start(inode, flags); 1125 1126 if (sb->s_op->dirty_inode) 1127 sb->s_op->dirty_inode(inode, flags); 1128 1129 trace_writeback_dirty_inode(inode, flags); 1130 } 1131 1132 /* 1133 * make sure that changes are seen by all cpus before we test i_state 1134 * -- mikulas 1135 */ 1136 smp_mb(); 1137 1138 /* avoid the locking if we can */ 1139 if ((inode->i_state & flags) == flags) 1140 return; 1141 1142 if (unlikely(block_dump)) 1143 block_dump___mark_inode_dirty(inode); 1144 1145 spin_lock(&inode->i_lock); 1146 if ((inode->i_state & flags) != flags) { 1147 const int was_dirty = inode->i_state & I_DIRTY; 1148 1149 inode->i_state |= flags; 1150 1151 /* 1152 * If the inode is being synced, just update its dirty state. 1153 * The unlocker will place the inode on the appropriate 1154 * superblock list, based upon its state. 1155 */ 1156 if (inode->i_state & I_SYNC) 1157 goto out_unlock_inode; 1158 1159 /* 1160 * Only add valid (hashed) inodes to the superblock's 1161 * dirty list. Add blockdev inodes as well. 1162 */ 1163 if (!S_ISBLK(inode->i_mode)) { 1164 if (inode_unhashed(inode)) 1165 goto out_unlock_inode; 1166 } 1167 if (inode->i_state & I_FREEING) 1168 goto out_unlock_inode; 1169 1170 /* 1171 * If the inode was already on b_dirty/b_io/b_more_io, don't 1172 * reposition it (that would break b_dirty time-ordering). 1173 */ 1174 if (!was_dirty) { 1175 bool wakeup_bdi = false; 1176 bdi = inode_to_bdi(inode); 1177 1178 if (bdi_cap_writeback_dirty(bdi)) { 1179 WARN(!test_bit(BDI_registered, &bdi->state), 1180 "bdi-%s not registered\n", bdi->name); 1181 1182 /* 1183 * If this is the first dirty inode for this 1184 * bdi, we have to wake-up the corresponding 1185 * bdi thread to make sure background 1186 * write-back happens later. 1187 */ 1188 if (!wb_has_dirty_io(&bdi->wb)) 1189 wakeup_bdi = true; 1190 } 1191 1192 spin_unlock(&inode->i_lock); 1193 spin_lock(&bdi->wb.list_lock); 1194 inode->dirtied_when = jiffies; 1195 list_move(&inode->i_wb_list, &bdi->wb.b_dirty); 1196 spin_unlock(&bdi->wb.list_lock); 1197 1198 if (wakeup_bdi) 1199 bdi_wakeup_thread_delayed(bdi); 1200 return; 1201 } 1202 } 1203 out_unlock_inode: 1204 spin_unlock(&inode->i_lock); 1205 1206 } 1207 EXPORT_SYMBOL(__mark_inode_dirty); 1208 1209 static void wait_sb_inodes(struct super_block *sb) 1210 { 1211 struct inode *inode, *old_inode = NULL; 1212 1213 /* 1214 * We need to be protected against the filesystem going from 1215 * r/o to r/w or vice versa. 1216 */ 1217 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1218 1219 spin_lock(&inode_sb_list_lock); 1220 1221 /* 1222 * Data integrity sync. Must wait for all pages under writeback, 1223 * because there may have been pages dirtied before our sync 1224 * call, but which had writeout started before we write it out. 1225 * In which case, the inode may not be on the dirty list, but 1226 * we still have to wait for that writeout. 1227 */ 1228 list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { 1229 struct address_space *mapping = inode->i_mapping; 1230 1231 spin_lock(&inode->i_lock); 1232 if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || 1233 (mapping->nrpages == 0)) { 1234 spin_unlock(&inode->i_lock); 1235 continue; 1236 } 1237 __iget(inode); 1238 spin_unlock(&inode->i_lock); 1239 spin_unlock(&inode_sb_list_lock); 1240 1241 /* 1242 * We hold a reference to 'inode' so it couldn't have been 1243 * removed from s_inodes list while we dropped the 1244 * inode_sb_list_lock. We cannot iput the inode now as we can 1245 * be holding the last reference and we cannot iput it under 1246 * inode_sb_list_lock. So we keep the reference and iput it 1247 * later. 1248 */ 1249 iput(old_inode); 1250 old_inode = inode; 1251 1252 filemap_fdatawait(mapping); 1253 1254 cond_resched(); 1255 1256 spin_lock(&inode_sb_list_lock); 1257 } 1258 spin_unlock(&inode_sb_list_lock); 1259 iput(old_inode); 1260 } 1261 1262 /** 1263 * writeback_inodes_sb_nr - writeback dirty inodes from given super_block 1264 * @sb: the superblock 1265 * @nr: the number of pages to write 1266 * @reason: reason why some writeback work initiated 1267 * 1268 * Start writeback on some inodes on this super_block. No guarantees are made 1269 * on how many (if any) will be written, and this function does not wait 1270 * for IO completion of submitted IO. 1271 */ 1272 void writeback_inodes_sb_nr(struct super_block *sb, 1273 unsigned long nr, 1274 enum wb_reason reason) 1275 { 1276 DECLARE_COMPLETION_ONSTACK(done); 1277 struct wb_writeback_work work = { 1278 .sb = sb, 1279 .sync_mode = WB_SYNC_NONE, 1280 .tagged_writepages = 1, 1281 .done = &done, 1282 .nr_pages = nr, 1283 .reason = reason, 1284 }; 1285 1286 if (sb->s_bdi == &noop_backing_dev_info) 1287 return; 1288 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1289 bdi_queue_work(sb->s_bdi, &work); 1290 wait_for_completion(&done); 1291 } 1292 EXPORT_SYMBOL(writeback_inodes_sb_nr); 1293 1294 /** 1295 * writeback_inodes_sb - writeback dirty inodes from given super_block 1296 * @sb: the superblock 1297 * @reason: reason why some writeback work was initiated 1298 * 1299 * Start writeback on some inodes on this super_block. No guarantees are made 1300 * on how many (if any) will be written, and this function does not wait 1301 * for IO completion of submitted IO. 1302 */ 1303 void writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) 1304 { 1305 return writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); 1306 } 1307 EXPORT_SYMBOL(writeback_inodes_sb); 1308 1309 /** 1310 * try_to_writeback_inodes_sb_nr - try to start writeback if none underway 1311 * @sb: the superblock 1312 * @nr: the number of pages to write 1313 * @reason: the reason of writeback 1314 * 1315 * Invoke writeback_inodes_sb_nr if no writeback is currently underway. 1316 * Returns 1 if writeback was started, 0 if not. 1317 */ 1318 int try_to_writeback_inodes_sb_nr(struct super_block *sb, 1319 unsigned long nr, 1320 enum wb_reason reason) 1321 { 1322 if (writeback_in_progress(sb->s_bdi)) 1323 return 1; 1324 1325 if (!down_read_trylock(&sb->s_umount)) 1326 return 0; 1327 1328 writeback_inodes_sb_nr(sb, nr, reason); 1329 up_read(&sb->s_umount); 1330 return 1; 1331 } 1332 EXPORT_SYMBOL(try_to_writeback_inodes_sb_nr); 1333 1334 /** 1335 * try_to_writeback_inodes_sb - try to start writeback if none underway 1336 * @sb: the superblock 1337 * @reason: reason why some writeback work was initiated 1338 * 1339 * Implement by try_to_writeback_inodes_sb_nr() 1340 * Returns 1 if writeback was started, 0 if not. 1341 */ 1342 int try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason) 1343 { 1344 return try_to_writeback_inodes_sb_nr(sb, get_nr_dirty_pages(), reason); 1345 } 1346 EXPORT_SYMBOL(try_to_writeback_inodes_sb); 1347 1348 /** 1349 * sync_inodes_sb - sync sb inode pages 1350 * @sb: the superblock 1351 * 1352 * This function writes and waits on any dirty inode belonging to this 1353 * super_block. 1354 */ 1355 void sync_inodes_sb(struct super_block *sb) 1356 { 1357 DECLARE_COMPLETION_ONSTACK(done); 1358 struct wb_writeback_work work = { 1359 .sb = sb, 1360 .sync_mode = WB_SYNC_ALL, 1361 .nr_pages = LONG_MAX, 1362 .range_cyclic = 0, 1363 .done = &done, 1364 .reason = WB_REASON_SYNC, 1365 }; 1366 1367 /* Nothing to do? */ 1368 if (sb->s_bdi == &noop_backing_dev_info) 1369 return; 1370 WARN_ON(!rwsem_is_locked(&sb->s_umount)); 1371 1372 bdi_queue_work(sb->s_bdi, &work); 1373 wait_for_completion(&done); 1374 1375 wait_sb_inodes(sb); 1376 } 1377 EXPORT_SYMBOL(sync_inodes_sb); 1378 1379 /** 1380 * write_inode_now - write an inode to disk 1381 * @inode: inode to write to disk 1382 * @sync: whether the write should be synchronous or not 1383 * 1384 * This function commits an inode to disk immediately if it is dirty. This is 1385 * primarily needed by knfsd. 1386 * 1387 * The caller must either have a ref on the inode or must have set I_WILL_FREE. 1388 */ 1389 int write_inode_now(struct inode *inode, int sync) 1390 { 1391 struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; 1392 struct writeback_control wbc = { 1393 .nr_to_write = LONG_MAX, 1394 .sync_mode = sync ? WB_SYNC_ALL : WB_SYNC_NONE, 1395 .range_start = 0, 1396 .range_end = LLONG_MAX, 1397 }; 1398 1399 if (!mapping_cap_writeback_dirty(inode->i_mapping)) 1400 wbc.nr_to_write = 0; 1401 1402 might_sleep(); 1403 return writeback_single_inode(inode, wb, &wbc); 1404 } 1405 EXPORT_SYMBOL(write_inode_now); 1406 1407 /** 1408 * sync_inode - write an inode and its pages to disk. 1409 * @inode: the inode to sync 1410 * @wbc: controls the writeback mode 1411 * 1412 * sync_inode() will write an inode and its pages to disk. It will also 1413 * correctly update the inode on its superblock's dirty inode lists and will 1414 * update inode->i_state. 1415 * 1416 * The caller must have a ref on the inode. 1417 */ 1418 int sync_inode(struct inode *inode, struct writeback_control *wbc) 1419 { 1420 return writeback_single_inode(inode, &inode_to_bdi(inode)->wb, wbc); 1421 } 1422 EXPORT_SYMBOL(sync_inode); 1423 1424 /** 1425 * sync_inode_metadata - write an inode to disk 1426 * @inode: the inode to sync 1427 * @wait: wait for I/O to complete. 1428 * 1429 * Write an inode to disk and adjust its dirty state after completion. 1430 * 1431 * Note: only writes the actual inode, no associated data or other metadata. 1432 */ 1433 int sync_inode_metadata(struct inode *inode, int wait) 1434 { 1435 struct writeback_control wbc = { 1436 .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE, 1437 .nr_to_write = 0, /* metadata-only */ 1438 }; 1439 1440 return sync_inode(inode, &wbc); 1441 } 1442 EXPORT_SYMBOL(sync_inode_metadata); 1443