1 /* 2 * segment.c - NILFS segment constructor. 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net> 21 * 22 */ 23 24 #include <linux/pagemap.h> 25 #include <linux/buffer_head.h> 26 #include <linux/writeback.h> 27 #include <linux/bitops.h> 28 #include <linux/bio.h> 29 #include <linux/completion.h> 30 #include <linux/blkdev.h> 31 #include <linux/backing-dev.h> 32 #include <linux/freezer.h> 33 #include <linux/kthread.h> 34 #include <linux/crc32.h> 35 #include <linux/pagevec.h> 36 #include <linux/slab.h> 37 #include "nilfs.h" 38 #include "btnode.h" 39 #include "page.h" 40 #include "segment.h" 41 #include "sufile.h" 42 #include "cpfile.h" 43 #include "ifile.h" 44 #include "segbuf.h" 45 46 47 /* 48 * Segment constructor 49 */ 50 #define SC_N_INODEVEC 16 /* Size of locally allocated inode vector */ 51 52 #define SC_MAX_SEGDELTA 64 /* Upper limit of the number of segments 53 appended in collection retry loop */ 54 55 /* Construction mode */ 56 enum { 57 SC_LSEG_SR = 1, /* Make a logical segment having a super root */ 58 SC_LSEG_DSYNC, /* Flush data blocks of a given file and make 59 a logical segment without a super root */ 60 SC_FLUSH_FILE, /* Flush data files, leads to segment writes without 61 creating a checkpoint */ 62 SC_FLUSH_DAT, /* Flush DAT file. This also creates segments without 63 a checkpoint */ 64 }; 65 66 /* Stage numbers of dirty block collection */ 67 enum { 68 NILFS_ST_INIT = 0, 69 NILFS_ST_GC, /* Collecting dirty blocks for GC */ 70 NILFS_ST_FILE, 71 NILFS_ST_IFILE, 72 NILFS_ST_CPFILE, 73 NILFS_ST_SUFILE, 74 NILFS_ST_DAT, 75 NILFS_ST_SR, /* Super root */ 76 NILFS_ST_DSYNC, /* Data sync blocks */ 77 NILFS_ST_DONE, 78 }; 79 80 #define CREATE_TRACE_POINTS 81 #include <trace/events/nilfs2.h> 82 83 /* 84 * nilfs_sc_cstage_inc(), nilfs_sc_cstage_set(), nilfs_sc_cstage_get() are 85 * wrapper functions of stage count (nilfs_sc_info->sc_stage.scnt). Users of 86 * the variable must use them because transition of stage count must involve 87 * trace events (trace_nilfs2_collection_stage_transition). 88 * 89 * nilfs_sc_cstage_get() isn't required for the above purpose because it doesn't 90 * produce tracepoint events. It is provided just for making the intention 91 * clear. 92 */ 93 static inline void nilfs_sc_cstage_inc(struct nilfs_sc_info *sci) 94 { 95 sci->sc_stage.scnt++; 96 trace_nilfs2_collection_stage_transition(sci); 97 } 98 99 static inline void nilfs_sc_cstage_set(struct nilfs_sc_info *sci, int next_scnt) 100 { 101 sci->sc_stage.scnt = next_scnt; 102 trace_nilfs2_collection_stage_transition(sci); 103 } 104 105 static inline int nilfs_sc_cstage_get(struct nilfs_sc_info *sci) 106 { 107 return sci->sc_stage.scnt; 108 } 109 110 /* State flags of collection */ 111 #define NILFS_CF_NODE 0x0001 /* Collecting node blocks */ 112 #define NILFS_CF_IFILE_STARTED 0x0002 /* IFILE stage has started */ 113 #define NILFS_CF_SUFREED 0x0004 /* segment usages has been freed */ 114 #define NILFS_CF_HISTORY_MASK (NILFS_CF_IFILE_STARTED | NILFS_CF_SUFREED) 115 116 /* Operations depending on the construction mode and file type */ 117 struct nilfs_sc_operations { 118 int (*collect_data)(struct nilfs_sc_info *, struct buffer_head *, 119 struct inode *); 120 int (*collect_node)(struct nilfs_sc_info *, struct buffer_head *, 121 struct inode *); 122 int (*collect_bmap)(struct nilfs_sc_info *, struct buffer_head *, 123 struct inode *); 124 void (*write_data_binfo)(struct nilfs_sc_info *, 125 struct nilfs_segsum_pointer *, 126 union nilfs_binfo *); 127 void (*write_node_binfo)(struct nilfs_sc_info *, 128 struct nilfs_segsum_pointer *, 129 union nilfs_binfo *); 130 }; 131 132 /* 133 * Other definitions 134 */ 135 static void nilfs_segctor_start_timer(struct nilfs_sc_info *); 136 static void nilfs_segctor_do_flush(struct nilfs_sc_info *, int); 137 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *); 138 static void nilfs_dispose_list(struct the_nilfs *, struct list_head *, int); 139 140 #define nilfs_cnt32_gt(a, b) \ 141 (typecheck(__u32, a) && typecheck(__u32, b) && \ 142 ((__s32)(b) - (__s32)(a) < 0)) 143 #define nilfs_cnt32_ge(a, b) \ 144 (typecheck(__u32, a) && typecheck(__u32, b) && \ 145 ((__s32)(a) - (__s32)(b) >= 0)) 146 #define nilfs_cnt32_lt(a, b) nilfs_cnt32_gt(b, a) 147 #define nilfs_cnt32_le(a, b) nilfs_cnt32_ge(b, a) 148 149 static int nilfs_prepare_segment_lock(struct nilfs_transaction_info *ti) 150 { 151 struct nilfs_transaction_info *cur_ti = current->journal_info; 152 void *save = NULL; 153 154 if (cur_ti) { 155 if (cur_ti->ti_magic == NILFS_TI_MAGIC) 156 return ++cur_ti->ti_count; 157 else { 158 /* 159 * If journal_info field is occupied by other FS, 160 * it is saved and will be restored on 161 * nilfs_transaction_commit(). 162 */ 163 printk(KERN_WARNING 164 "NILFS warning: journal info from a different " 165 "FS\n"); 166 save = current->journal_info; 167 } 168 } 169 if (!ti) { 170 ti = kmem_cache_alloc(nilfs_transaction_cachep, GFP_NOFS); 171 if (!ti) 172 return -ENOMEM; 173 ti->ti_flags = NILFS_TI_DYNAMIC_ALLOC; 174 } else { 175 ti->ti_flags = 0; 176 } 177 ti->ti_count = 0; 178 ti->ti_save = save; 179 ti->ti_magic = NILFS_TI_MAGIC; 180 current->journal_info = ti; 181 return 0; 182 } 183 184 /** 185 * nilfs_transaction_begin - start indivisible file operations. 186 * @sb: super block 187 * @ti: nilfs_transaction_info 188 * @vacancy_check: flags for vacancy rate checks 189 * 190 * nilfs_transaction_begin() acquires a reader/writer semaphore, called 191 * the segment semaphore, to make a segment construction and write tasks 192 * exclusive. The function is used with nilfs_transaction_commit() in pairs. 193 * The region enclosed by these two functions can be nested. To avoid a 194 * deadlock, the semaphore is only acquired or released in the outermost call. 195 * 196 * This function allocates a nilfs_transaction_info struct to keep context 197 * information on it. It is initialized and hooked onto the current task in 198 * the outermost call. If a pre-allocated struct is given to @ti, it is used 199 * instead; otherwise a new struct is assigned from a slab. 200 * 201 * When @vacancy_check flag is set, this function will check the amount of 202 * free space, and will wait for the GC to reclaim disk space if low capacity. 203 * 204 * Return Value: On success, 0 is returned. On error, one of the following 205 * negative error code is returned. 206 * 207 * %-ENOMEM - Insufficient memory available. 208 * 209 * %-ENOSPC - No space left on device 210 */ 211 int nilfs_transaction_begin(struct super_block *sb, 212 struct nilfs_transaction_info *ti, 213 int vacancy_check) 214 { 215 struct the_nilfs *nilfs; 216 int ret = nilfs_prepare_segment_lock(ti); 217 struct nilfs_transaction_info *trace_ti; 218 219 if (unlikely(ret < 0)) 220 return ret; 221 if (ret > 0) { 222 trace_ti = current->journal_info; 223 224 trace_nilfs2_transaction_transition(sb, trace_ti, 225 trace_ti->ti_count, trace_ti->ti_flags, 226 TRACE_NILFS2_TRANSACTION_BEGIN); 227 return 0; 228 } 229 230 sb_start_intwrite(sb); 231 232 nilfs = sb->s_fs_info; 233 down_read(&nilfs->ns_segctor_sem); 234 if (vacancy_check && nilfs_near_disk_full(nilfs)) { 235 up_read(&nilfs->ns_segctor_sem); 236 ret = -ENOSPC; 237 goto failed; 238 } 239 240 trace_ti = current->journal_info; 241 trace_nilfs2_transaction_transition(sb, trace_ti, trace_ti->ti_count, 242 trace_ti->ti_flags, 243 TRACE_NILFS2_TRANSACTION_BEGIN); 244 return 0; 245 246 failed: 247 ti = current->journal_info; 248 current->journal_info = ti->ti_save; 249 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 250 kmem_cache_free(nilfs_transaction_cachep, ti); 251 sb_end_intwrite(sb); 252 return ret; 253 } 254 255 /** 256 * nilfs_transaction_commit - commit indivisible file operations. 257 * @sb: super block 258 * 259 * nilfs_transaction_commit() releases the read semaphore which is 260 * acquired by nilfs_transaction_begin(). This is only performed 261 * in outermost call of this function. If a commit flag is set, 262 * nilfs_transaction_commit() sets a timer to start the segment 263 * constructor. If a sync flag is set, it starts construction 264 * directly. 265 */ 266 int nilfs_transaction_commit(struct super_block *sb) 267 { 268 struct nilfs_transaction_info *ti = current->journal_info; 269 struct the_nilfs *nilfs = sb->s_fs_info; 270 int err = 0; 271 272 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 273 ti->ti_flags |= NILFS_TI_COMMIT; 274 if (ti->ti_count > 0) { 275 ti->ti_count--; 276 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 277 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); 278 return 0; 279 } 280 if (nilfs->ns_writer) { 281 struct nilfs_sc_info *sci = nilfs->ns_writer; 282 283 if (ti->ti_flags & NILFS_TI_COMMIT) 284 nilfs_segctor_start_timer(sci); 285 if (atomic_read(&nilfs->ns_ndirtyblks) > sci->sc_watermark) 286 nilfs_segctor_do_flush(sci, 0); 287 } 288 up_read(&nilfs->ns_segctor_sem); 289 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 290 ti->ti_flags, TRACE_NILFS2_TRANSACTION_COMMIT); 291 292 current->journal_info = ti->ti_save; 293 294 if (ti->ti_flags & NILFS_TI_SYNC) 295 err = nilfs_construct_segment(sb); 296 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 297 kmem_cache_free(nilfs_transaction_cachep, ti); 298 sb_end_intwrite(sb); 299 return err; 300 } 301 302 void nilfs_transaction_abort(struct super_block *sb) 303 { 304 struct nilfs_transaction_info *ti = current->journal_info; 305 struct the_nilfs *nilfs = sb->s_fs_info; 306 307 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 308 if (ti->ti_count > 0) { 309 ti->ti_count--; 310 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 311 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); 312 return; 313 } 314 up_read(&nilfs->ns_segctor_sem); 315 316 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 317 ti->ti_flags, TRACE_NILFS2_TRANSACTION_ABORT); 318 319 current->journal_info = ti->ti_save; 320 if (ti->ti_flags & NILFS_TI_DYNAMIC_ALLOC) 321 kmem_cache_free(nilfs_transaction_cachep, ti); 322 sb_end_intwrite(sb); 323 } 324 325 void nilfs_relax_pressure_in_lock(struct super_block *sb) 326 { 327 struct the_nilfs *nilfs = sb->s_fs_info; 328 struct nilfs_sc_info *sci = nilfs->ns_writer; 329 330 if (!sci || !sci->sc_flush_request) 331 return; 332 333 set_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 334 up_read(&nilfs->ns_segctor_sem); 335 336 down_write(&nilfs->ns_segctor_sem); 337 if (sci->sc_flush_request && 338 test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) { 339 struct nilfs_transaction_info *ti = current->journal_info; 340 341 ti->ti_flags |= NILFS_TI_WRITER; 342 nilfs_segctor_do_immediate_flush(sci); 343 ti->ti_flags &= ~NILFS_TI_WRITER; 344 } 345 downgrade_write(&nilfs->ns_segctor_sem); 346 } 347 348 static void nilfs_transaction_lock(struct super_block *sb, 349 struct nilfs_transaction_info *ti, 350 int gcflag) 351 { 352 struct nilfs_transaction_info *cur_ti = current->journal_info; 353 struct the_nilfs *nilfs = sb->s_fs_info; 354 struct nilfs_sc_info *sci = nilfs->ns_writer; 355 356 WARN_ON(cur_ti); 357 ti->ti_flags = NILFS_TI_WRITER; 358 ti->ti_count = 0; 359 ti->ti_save = cur_ti; 360 ti->ti_magic = NILFS_TI_MAGIC; 361 current->journal_info = ti; 362 363 for (;;) { 364 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 365 ti->ti_flags, TRACE_NILFS2_TRANSACTION_TRYLOCK); 366 367 down_write(&nilfs->ns_segctor_sem); 368 if (!test_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags)) 369 break; 370 371 nilfs_segctor_do_immediate_flush(sci); 372 373 up_write(&nilfs->ns_segctor_sem); 374 yield(); 375 } 376 if (gcflag) 377 ti->ti_flags |= NILFS_TI_GC; 378 379 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 380 ti->ti_flags, TRACE_NILFS2_TRANSACTION_LOCK); 381 } 382 383 static void nilfs_transaction_unlock(struct super_block *sb) 384 { 385 struct nilfs_transaction_info *ti = current->journal_info; 386 struct the_nilfs *nilfs = sb->s_fs_info; 387 388 BUG_ON(ti == NULL || ti->ti_magic != NILFS_TI_MAGIC); 389 BUG_ON(ti->ti_count > 0); 390 391 up_write(&nilfs->ns_segctor_sem); 392 current->journal_info = ti->ti_save; 393 394 trace_nilfs2_transaction_transition(sb, ti, ti->ti_count, 395 ti->ti_flags, TRACE_NILFS2_TRANSACTION_UNLOCK); 396 } 397 398 static void *nilfs_segctor_map_segsum_entry(struct nilfs_sc_info *sci, 399 struct nilfs_segsum_pointer *ssp, 400 unsigned bytes) 401 { 402 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 403 unsigned blocksize = sci->sc_super->s_blocksize; 404 void *p; 405 406 if (unlikely(ssp->offset + bytes > blocksize)) { 407 ssp->offset = 0; 408 BUG_ON(NILFS_SEGBUF_BH_IS_LAST(ssp->bh, 409 &segbuf->sb_segsum_buffers)); 410 ssp->bh = NILFS_SEGBUF_NEXT_BH(ssp->bh); 411 } 412 p = ssp->bh->b_data + ssp->offset; 413 ssp->offset += bytes; 414 return p; 415 } 416 417 /** 418 * nilfs_segctor_reset_segment_buffer - reset the current segment buffer 419 * @sci: nilfs_sc_info 420 */ 421 static int nilfs_segctor_reset_segment_buffer(struct nilfs_sc_info *sci) 422 { 423 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 424 struct buffer_head *sumbh; 425 unsigned sumbytes; 426 unsigned flags = 0; 427 int err; 428 429 if (nilfs_doing_gc()) 430 flags = NILFS_SS_GC; 431 err = nilfs_segbuf_reset(segbuf, flags, sci->sc_seg_ctime, sci->sc_cno); 432 if (unlikely(err)) 433 return err; 434 435 sumbh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 436 sumbytes = segbuf->sb_sum.sumbytes; 437 sci->sc_finfo_ptr.bh = sumbh; sci->sc_finfo_ptr.offset = sumbytes; 438 sci->sc_binfo_ptr.bh = sumbh; sci->sc_binfo_ptr.offset = sumbytes; 439 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 440 return 0; 441 } 442 443 static int nilfs_segctor_feed_segment(struct nilfs_sc_info *sci) 444 { 445 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 446 if (NILFS_SEGBUF_IS_LAST(sci->sc_curseg, &sci->sc_segbufs)) 447 return -E2BIG; /* The current segment is filled up 448 (internal code) */ 449 sci->sc_curseg = NILFS_NEXT_SEGBUF(sci->sc_curseg); 450 return nilfs_segctor_reset_segment_buffer(sci); 451 } 452 453 static int nilfs_segctor_add_super_root(struct nilfs_sc_info *sci) 454 { 455 struct nilfs_segment_buffer *segbuf = sci->sc_curseg; 456 int err; 457 458 if (segbuf->sb_sum.nblocks >= segbuf->sb_rest_blocks) { 459 err = nilfs_segctor_feed_segment(sci); 460 if (err) 461 return err; 462 segbuf = sci->sc_curseg; 463 } 464 err = nilfs_segbuf_extend_payload(segbuf, &segbuf->sb_super_root); 465 if (likely(!err)) 466 segbuf->sb_sum.flags |= NILFS_SS_SR; 467 return err; 468 } 469 470 /* 471 * Functions for making segment summary and payloads 472 */ 473 static int nilfs_segctor_segsum_block_required( 474 struct nilfs_sc_info *sci, const struct nilfs_segsum_pointer *ssp, 475 unsigned binfo_size) 476 { 477 unsigned blocksize = sci->sc_super->s_blocksize; 478 /* Size of finfo and binfo is enough small against blocksize */ 479 480 return ssp->offset + binfo_size + 481 (!sci->sc_blk_cnt ? sizeof(struct nilfs_finfo) : 0) > 482 blocksize; 483 } 484 485 static void nilfs_segctor_begin_finfo(struct nilfs_sc_info *sci, 486 struct inode *inode) 487 { 488 sci->sc_curseg->sb_sum.nfinfo++; 489 sci->sc_binfo_ptr = sci->sc_finfo_ptr; 490 nilfs_segctor_map_segsum_entry( 491 sci, &sci->sc_binfo_ptr, sizeof(struct nilfs_finfo)); 492 493 if (NILFS_I(inode)->i_root && 494 !test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 495 set_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 496 /* skip finfo */ 497 } 498 499 static void nilfs_segctor_end_finfo(struct nilfs_sc_info *sci, 500 struct inode *inode) 501 { 502 struct nilfs_finfo *finfo; 503 struct nilfs_inode_info *ii; 504 struct nilfs_segment_buffer *segbuf; 505 __u64 cno; 506 507 if (sci->sc_blk_cnt == 0) 508 return; 509 510 ii = NILFS_I(inode); 511 512 if (test_bit(NILFS_I_GCINODE, &ii->i_state)) 513 cno = ii->i_cno; 514 else if (NILFS_ROOT_METADATA_FILE(inode->i_ino)) 515 cno = 0; 516 else 517 cno = sci->sc_cno; 518 519 finfo = nilfs_segctor_map_segsum_entry(sci, &sci->sc_finfo_ptr, 520 sizeof(*finfo)); 521 finfo->fi_ino = cpu_to_le64(inode->i_ino); 522 finfo->fi_nblocks = cpu_to_le32(sci->sc_blk_cnt); 523 finfo->fi_ndatablk = cpu_to_le32(sci->sc_datablk_cnt); 524 finfo->fi_cno = cpu_to_le64(cno); 525 526 segbuf = sci->sc_curseg; 527 segbuf->sb_sum.sumbytes = sci->sc_binfo_ptr.offset + 528 sci->sc_super->s_blocksize * (segbuf->sb_sum.nsumblk - 1); 529 sci->sc_finfo_ptr = sci->sc_binfo_ptr; 530 sci->sc_blk_cnt = sci->sc_datablk_cnt = 0; 531 } 532 533 static int nilfs_segctor_add_file_block(struct nilfs_sc_info *sci, 534 struct buffer_head *bh, 535 struct inode *inode, 536 unsigned binfo_size) 537 { 538 struct nilfs_segment_buffer *segbuf; 539 int required, err = 0; 540 541 retry: 542 segbuf = sci->sc_curseg; 543 required = nilfs_segctor_segsum_block_required( 544 sci, &sci->sc_binfo_ptr, binfo_size); 545 if (segbuf->sb_sum.nblocks + required + 1 > segbuf->sb_rest_blocks) { 546 nilfs_segctor_end_finfo(sci, inode); 547 err = nilfs_segctor_feed_segment(sci); 548 if (err) 549 return err; 550 goto retry; 551 } 552 if (unlikely(required)) { 553 err = nilfs_segbuf_extend_segsum(segbuf); 554 if (unlikely(err)) 555 goto failed; 556 } 557 if (sci->sc_blk_cnt == 0) 558 nilfs_segctor_begin_finfo(sci, inode); 559 560 nilfs_segctor_map_segsum_entry(sci, &sci->sc_binfo_ptr, binfo_size); 561 /* Substitution to vblocknr is delayed until update_blocknr() */ 562 nilfs_segbuf_add_file_buffer(segbuf, bh); 563 sci->sc_blk_cnt++; 564 failed: 565 return err; 566 } 567 568 /* 569 * Callback functions that enumerate, mark, and collect dirty blocks 570 */ 571 static int nilfs_collect_file_data(struct nilfs_sc_info *sci, 572 struct buffer_head *bh, struct inode *inode) 573 { 574 int err; 575 576 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 577 if (err < 0) 578 return err; 579 580 err = nilfs_segctor_add_file_block(sci, bh, inode, 581 sizeof(struct nilfs_binfo_v)); 582 if (!err) 583 sci->sc_datablk_cnt++; 584 return err; 585 } 586 587 static int nilfs_collect_file_node(struct nilfs_sc_info *sci, 588 struct buffer_head *bh, 589 struct inode *inode) 590 { 591 return nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 592 } 593 594 static int nilfs_collect_file_bmap(struct nilfs_sc_info *sci, 595 struct buffer_head *bh, 596 struct inode *inode) 597 { 598 WARN_ON(!buffer_dirty(bh)); 599 return nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 600 } 601 602 static void nilfs_write_file_data_binfo(struct nilfs_sc_info *sci, 603 struct nilfs_segsum_pointer *ssp, 604 union nilfs_binfo *binfo) 605 { 606 struct nilfs_binfo_v *binfo_v = nilfs_segctor_map_segsum_entry( 607 sci, ssp, sizeof(*binfo_v)); 608 *binfo_v = binfo->bi_v; 609 } 610 611 static void nilfs_write_file_node_binfo(struct nilfs_sc_info *sci, 612 struct nilfs_segsum_pointer *ssp, 613 union nilfs_binfo *binfo) 614 { 615 __le64 *vblocknr = nilfs_segctor_map_segsum_entry( 616 sci, ssp, sizeof(*vblocknr)); 617 *vblocknr = binfo->bi_v.bi_vblocknr; 618 } 619 620 static struct nilfs_sc_operations nilfs_sc_file_ops = { 621 .collect_data = nilfs_collect_file_data, 622 .collect_node = nilfs_collect_file_node, 623 .collect_bmap = nilfs_collect_file_bmap, 624 .write_data_binfo = nilfs_write_file_data_binfo, 625 .write_node_binfo = nilfs_write_file_node_binfo, 626 }; 627 628 static int nilfs_collect_dat_data(struct nilfs_sc_info *sci, 629 struct buffer_head *bh, struct inode *inode) 630 { 631 int err; 632 633 err = nilfs_bmap_propagate(NILFS_I(inode)->i_bmap, bh); 634 if (err < 0) 635 return err; 636 637 err = nilfs_segctor_add_file_block(sci, bh, inode, sizeof(__le64)); 638 if (!err) 639 sci->sc_datablk_cnt++; 640 return err; 641 } 642 643 static int nilfs_collect_dat_bmap(struct nilfs_sc_info *sci, 644 struct buffer_head *bh, struct inode *inode) 645 { 646 WARN_ON(!buffer_dirty(bh)); 647 return nilfs_segctor_add_file_block(sci, bh, inode, 648 sizeof(struct nilfs_binfo_dat)); 649 } 650 651 static void nilfs_write_dat_data_binfo(struct nilfs_sc_info *sci, 652 struct nilfs_segsum_pointer *ssp, 653 union nilfs_binfo *binfo) 654 { 655 __le64 *blkoff = nilfs_segctor_map_segsum_entry(sci, ssp, 656 sizeof(*blkoff)); 657 *blkoff = binfo->bi_dat.bi_blkoff; 658 } 659 660 static void nilfs_write_dat_node_binfo(struct nilfs_sc_info *sci, 661 struct nilfs_segsum_pointer *ssp, 662 union nilfs_binfo *binfo) 663 { 664 struct nilfs_binfo_dat *binfo_dat = 665 nilfs_segctor_map_segsum_entry(sci, ssp, sizeof(*binfo_dat)); 666 *binfo_dat = binfo->bi_dat; 667 } 668 669 static struct nilfs_sc_operations nilfs_sc_dat_ops = { 670 .collect_data = nilfs_collect_dat_data, 671 .collect_node = nilfs_collect_file_node, 672 .collect_bmap = nilfs_collect_dat_bmap, 673 .write_data_binfo = nilfs_write_dat_data_binfo, 674 .write_node_binfo = nilfs_write_dat_node_binfo, 675 }; 676 677 static struct nilfs_sc_operations nilfs_sc_dsync_ops = { 678 .collect_data = nilfs_collect_file_data, 679 .collect_node = NULL, 680 .collect_bmap = NULL, 681 .write_data_binfo = nilfs_write_file_data_binfo, 682 .write_node_binfo = NULL, 683 }; 684 685 static size_t nilfs_lookup_dirty_data_buffers(struct inode *inode, 686 struct list_head *listp, 687 size_t nlimit, 688 loff_t start, loff_t end) 689 { 690 struct address_space *mapping = inode->i_mapping; 691 struct pagevec pvec; 692 pgoff_t index = 0, last = ULONG_MAX; 693 size_t ndirties = 0; 694 int i; 695 696 if (unlikely(start != 0 || end != LLONG_MAX)) { 697 /* 698 * A valid range is given for sync-ing data pages. The 699 * range is rounded to per-page; extra dirty buffers 700 * may be included if blocksize < pagesize. 701 */ 702 index = start >> PAGE_SHIFT; 703 last = end >> PAGE_SHIFT; 704 } 705 pagevec_init(&pvec, 0); 706 repeat: 707 if (unlikely(index > last) || 708 !pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 709 min_t(pgoff_t, last - index, 710 PAGEVEC_SIZE - 1) + 1)) 711 return ndirties; 712 713 for (i = 0; i < pagevec_count(&pvec); i++) { 714 struct buffer_head *bh, *head; 715 struct page *page = pvec.pages[i]; 716 717 if (unlikely(page->index > last)) 718 break; 719 720 lock_page(page); 721 if (!page_has_buffers(page)) 722 create_empty_buffers(page, 1 << inode->i_blkbits, 0); 723 unlock_page(page); 724 725 bh = head = page_buffers(page); 726 do { 727 if (!buffer_dirty(bh) || buffer_async_write(bh)) 728 continue; 729 get_bh(bh); 730 list_add_tail(&bh->b_assoc_buffers, listp); 731 ndirties++; 732 if (unlikely(ndirties >= nlimit)) { 733 pagevec_release(&pvec); 734 cond_resched(); 735 return ndirties; 736 } 737 } while (bh = bh->b_this_page, bh != head); 738 } 739 pagevec_release(&pvec); 740 cond_resched(); 741 goto repeat; 742 } 743 744 static void nilfs_lookup_dirty_node_buffers(struct inode *inode, 745 struct list_head *listp) 746 { 747 struct nilfs_inode_info *ii = NILFS_I(inode); 748 struct address_space *mapping = &ii->i_btnode_cache; 749 struct pagevec pvec; 750 struct buffer_head *bh, *head; 751 unsigned int i; 752 pgoff_t index = 0; 753 754 pagevec_init(&pvec, 0); 755 756 while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, 757 PAGEVEC_SIZE)) { 758 for (i = 0; i < pagevec_count(&pvec); i++) { 759 bh = head = page_buffers(pvec.pages[i]); 760 do { 761 if (buffer_dirty(bh) && 762 !buffer_async_write(bh)) { 763 get_bh(bh); 764 list_add_tail(&bh->b_assoc_buffers, 765 listp); 766 } 767 bh = bh->b_this_page; 768 } while (bh != head); 769 } 770 pagevec_release(&pvec); 771 cond_resched(); 772 } 773 } 774 775 static void nilfs_dispose_list(struct the_nilfs *nilfs, 776 struct list_head *head, int force) 777 { 778 struct nilfs_inode_info *ii, *n; 779 struct nilfs_inode_info *ivec[SC_N_INODEVEC], **pii; 780 unsigned nv = 0; 781 782 while (!list_empty(head)) { 783 spin_lock(&nilfs->ns_inode_lock); 784 list_for_each_entry_safe(ii, n, head, i_dirty) { 785 list_del_init(&ii->i_dirty); 786 if (force) { 787 if (unlikely(ii->i_bh)) { 788 brelse(ii->i_bh); 789 ii->i_bh = NULL; 790 } 791 } else if (test_bit(NILFS_I_DIRTY, &ii->i_state)) { 792 set_bit(NILFS_I_QUEUED, &ii->i_state); 793 list_add_tail(&ii->i_dirty, 794 &nilfs->ns_dirty_files); 795 continue; 796 } 797 ivec[nv++] = ii; 798 if (nv == SC_N_INODEVEC) 799 break; 800 } 801 spin_unlock(&nilfs->ns_inode_lock); 802 803 for (pii = ivec; nv > 0; pii++, nv--) 804 iput(&(*pii)->vfs_inode); 805 } 806 } 807 808 static void nilfs_iput_work_func(struct work_struct *work) 809 { 810 struct nilfs_sc_info *sci = container_of(work, struct nilfs_sc_info, 811 sc_iput_work); 812 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 813 814 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 0); 815 } 816 817 static int nilfs_test_metadata_dirty(struct the_nilfs *nilfs, 818 struct nilfs_root *root) 819 { 820 int ret = 0; 821 822 if (nilfs_mdt_fetch_dirty(root->ifile)) 823 ret++; 824 if (nilfs_mdt_fetch_dirty(nilfs->ns_cpfile)) 825 ret++; 826 if (nilfs_mdt_fetch_dirty(nilfs->ns_sufile)) 827 ret++; 828 if ((ret || nilfs_doing_gc()) && nilfs_mdt_fetch_dirty(nilfs->ns_dat)) 829 ret++; 830 return ret; 831 } 832 833 static int nilfs_segctor_clean(struct nilfs_sc_info *sci) 834 { 835 return list_empty(&sci->sc_dirty_files) && 836 !test_bit(NILFS_SC_DIRTY, &sci->sc_flags) && 837 sci->sc_nfreesegs == 0 && 838 (!nilfs_doing_gc() || list_empty(&sci->sc_gc_inodes)); 839 } 840 841 static int nilfs_segctor_confirm(struct nilfs_sc_info *sci) 842 { 843 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 844 int ret = 0; 845 846 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) 847 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 848 849 spin_lock(&nilfs->ns_inode_lock); 850 if (list_empty(&nilfs->ns_dirty_files) && nilfs_segctor_clean(sci)) 851 ret++; 852 853 spin_unlock(&nilfs->ns_inode_lock); 854 return ret; 855 } 856 857 static void nilfs_segctor_clear_metadata_dirty(struct nilfs_sc_info *sci) 858 { 859 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 860 861 nilfs_mdt_clear_dirty(sci->sc_root->ifile); 862 nilfs_mdt_clear_dirty(nilfs->ns_cpfile); 863 nilfs_mdt_clear_dirty(nilfs->ns_sufile); 864 nilfs_mdt_clear_dirty(nilfs->ns_dat); 865 } 866 867 static int nilfs_segctor_create_checkpoint(struct nilfs_sc_info *sci) 868 { 869 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 870 struct buffer_head *bh_cp; 871 struct nilfs_checkpoint *raw_cp; 872 int err; 873 874 /* XXX: this interface will be changed */ 875 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 1, 876 &raw_cp, &bh_cp); 877 if (likely(!err)) { 878 /* The following code is duplicated with cpfile. But, it is 879 needed to collect the checkpoint even if it was not newly 880 created */ 881 mark_buffer_dirty(bh_cp); 882 nilfs_mdt_mark_dirty(nilfs->ns_cpfile); 883 nilfs_cpfile_put_checkpoint( 884 nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 885 } else 886 WARN_ON(err == -EINVAL || err == -ENOENT); 887 888 return err; 889 } 890 891 static int nilfs_segctor_fill_in_checkpoint(struct nilfs_sc_info *sci) 892 { 893 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 894 struct buffer_head *bh_cp; 895 struct nilfs_checkpoint *raw_cp; 896 int err; 897 898 err = nilfs_cpfile_get_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, 0, 899 &raw_cp, &bh_cp); 900 if (unlikely(err)) { 901 WARN_ON(err == -EINVAL || err == -ENOENT); 902 goto failed_ibh; 903 } 904 raw_cp->cp_snapshot_list.ssl_next = 0; 905 raw_cp->cp_snapshot_list.ssl_prev = 0; 906 raw_cp->cp_inodes_count = 907 cpu_to_le64(atomic64_read(&sci->sc_root->inodes_count)); 908 raw_cp->cp_blocks_count = 909 cpu_to_le64(atomic64_read(&sci->sc_root->blocks_count)); 910 raw_cp->cp_nblk_inc = 911 cpu_to_le64(sci->sc_nblk_inc + sci->sc_nblk_this_inc); 912 raw_cp->cp_create = cpu_to_le64(sci->sc_seg_ctime); 913 raw_cp->cp_cno = cpu_to_le64(nilfs->ns_cno); 914 915 if (test_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags)) 916 nilfs_checkpoint_clear_minor(raw_cp); 917 else 918 nilfs_checkpoint_set_minor(raw_cp); 919 920 nilfs_write_inode_common(sci->sc_root->ifile, 921 &raw_cp->cp_ifile_inode, 1); 922 nilfs_cpfile_put_checkpoint(nilfs->ns_cpfile, nilfs->ns_cno, bh_cp); 923 return 0; 924 925 failed_ibh: 926 return err; 927 } 928 929 static void nilfs_fill_in_file_bmap(struct inode *ifile, 930 struct nilfs_inode_info *ii) 931 932 { 933 struct buffer_head *ibh; 934 struct nilfs_inode *raw_inode; 935 936 if (test_bit(NILFS_I_BMAP, &ii->i_state)) { 937 ibh = ii->i_bh; 938 BUG_ON(!ibh); 939 raw_inode = nilfs_ifile_map_inode(ifile, ii->vfs_inode.i_ino, 940 ibh); 941 nilfs_bmap_write(ii->i_bmap, raw_inode); 942 nilfs_ifile_unmap_inode(ifile, ii->vfs_inode.i_ino, ibh); 943 } 944 } 945 946 static void nilfs_segctor_fill_in_file_bmap(struct nilfs_sc_info *sci) 947 { 948 struct nilfs_inode_info *ii; 949 950 list_for_each_entry(ii, &sci->sc_dirty_files, i_dirty) { 951 nilfs_fill_in_file_bmap(sci->sc_root->ifile, ii); 952 set_bit(NILFS_I_COLLECTED, &ii->i_state); 953 } 954 } 955 956 static void nilfs_segctor_fill_in_super_root(struct nilfs_sc_info *sci, 957 struct the_nilfs *nilfs) 958 { 959 struct buffer_head *bh_sr; 960 struct nilfs_super_root *raw_sr; 961 unsigned isz, srsz; 962 963 bh_sr = NILFS_LAST_SEGBUF(&sci->sc_segbufs)->sb_super_root; 964 raw_sr = (struct nilfs_super_root *)bh_sr->b_data; 965 isz = nilfs->ns_inode_size; 966 srsz = NILFS_SR_BYTES(isz); 967 968 raw_sr->sr_bytes = cpu_to_le16(srsz); 969 raw_sr->sr_nongc_ctime 970 = cpu_to_le64(nilfs_doing_gc() ? 971 nilfs->ns_nongc_ctime : sci->sc_seg_ctime); 972 raw_sr->sr_flags = 0; 973 974 nilfs_write_inode_common(nilfs->ns_dat, (void *)raw_sr + 975 NILFS_SR_DAT_OFFSET(isz), 1); 976 nilfs_write_inode_common(nilfs->ns_cpfile, (void *)raw_sr + 977 NILFS_SR_CPFILE_OFFSET(isz), 1); 978 nilfs_write_inode_common(nilfs->ns_sufile, (void *)raw_sr + 979 NILFS_SR_SUFILE_OFFSET(isz), 1); 980 memset((void *)raw_sr + srsz, 0, nilfs->ns_blocksize - srsz); 981 } 982 983 static void nilfs_redirty_inodes(struct list_head *head) 984 { 985 struct nilfs_inode_info *ii; 986 987 list_for_each_entry(ii, head, i_dirty) { 988 if (test_bit(NILFS_I_COLLECTED, &ii->i_state)) 989 clear_bit(NILFS_I_COLLECTED, &ii->i_state); 990 } 991 } 992 993 static void nilfs_drop_collected_inodes(struct list_head *head) 994 { 995 struct nilfs_inode_info *ii; 996 997 list_for_each_entry(ii, head, i_dirty) { 998 if (!test_and_clear_bit(NILFS_I_COLLECTED, &ii->i_state)) 999 continue; 1000 1001 clear_bit(NILFS_I_INODE_SYNC, &ii->i_state); 1002 set_bit(NILFS_I_UPDATED, &ii->i_state); 1003 } 1004 } 1005 1006 static int nilfs_segctor_apply_buffers(struct nilfs_sc_info *sci, 1007 struct inode *inode, 1008 struct list_head *listp, 1009 int (*collect)(struct nilfs_sc_info *, 1010 struct buffer_head *, 1011 struct inode *)) 1012 { 1013 struct buffer_head *bh, *n; 1014 int err = 0; 1015 1016 if (collect) { 1017 list_for_each_entry_safe(bh, n, listp, b_assoc_buffers) { 1018 list_del_init(&bh->b_assoc_buffers); 1019 err = collect(sci, bh, inode); 1020 brelse(bh); 1021 if (unlikely(err)) 1022 goto dispose_buffers; 1023 } 1024 return 0; 1025 } 1026 1027 dispose_buffers: 1028 while (!list_empty(listp)) { 1029 bh = list_first_entry(listp, struct buffer_head, 1030 b_assoc_buffers); 1031 list_del_init(&bh->b_assoc_buffers); 1032 brelse(bh); 1033 } 1034 return err; 1035 } 1036 1037 static size_t nilfs_segctor_buffer_rest(struct nilfs_sc_info *sci) 1038 { 1039 /* Remaining number of blocks within segment buffer */ 1040 return sci->sc_segbuf_nblocks - 1041 (sci->sc_nblk_this_inc + sci->sc_curseg->sb_sum.nblocks); 1042 } 1043 1044 static int nilfs_segctor_scan_file(struct nilfs_sc_info *sci, 1045 struct inode *inode, 1046 struct nilfs_sc_operations *sc_ops) 1047 { 1048 LIST_HEAD(data_buffers); 1049 LIST_HEAD(node_buffers); 1050 int err; 1051 1052 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1053 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1054 1055 n = nilfs_lookup_dirty_data_buffers( 1056 inode, &data_buffers, rest + 1, 0, LLONG_MAX); 1057 if (n > rest) { 1058 err = nilfs_segctor_apply_buffers( 1059 sci, inode, &data_buffers, 1060 sc_ops->collect_data); 1061 BUG_ON(!err); /* always receive -E2BIG or true error */ 1062 goto break_or_fail; 1063 } 1064 } 1065 nilfs_lookup_dirty_node_buffers(inode, &node_buffers); 1066 1067 if (!(sci->sc_stage.flags & NILFS_CF_NODE)) { 1068 err = nilfs_segctor_apply_buffers( 1069 sci, inode, &data_buffers, sc_ops->collect_data); 1070 if (unlikely(err)) { 1071 /* dispose node list */ 1072 nilfs_segctor_apply_buffers( 1073 sci, inode, &node_buffers, NULL); 1074 goto break_or_fail; 1075 } 1076 sci->sc_stage.flags |= NILFS_CF_NODE; 1077 } 1078 /* Collect node */ 1079 err = nilfs_segctor_apply_buffers( 1080 sci, inode, &node_buffers, sc_ops->collect_node); 1081 if (unlikely(err)) 1082 goto break_or_fail; 1083 1084 nilfs_bmap_lookup_dirty_buffers(NILFS_I(inode)->i_bmap, &node_buffers); 1085 err = nilfs_segctor_apply_buffers( 1086 sci, inode, &node_buffers, sc_ops->collect_bmap); 1087 if (unlikely(err)) 1088 goto break_or_fail; 1089 1090 nilfs_segctor_end_finfo(sci, inode); 1091 sci->sc_stage.flags &= ~NILFS_CF_NODE; 1092 1093 break_or_fail: 1094 return err; 1095 } 1096 1097 static int nilfs_segctor_scan_file_dsync(struct nilfs_sc_info *sci, 1098 struct inode *inode) 1099 { 1100 LIST_HEAD(data_buffers); 1101 size_t n, rest = nilfs_segctor_buffer_rest(sci); 1102 int err; 1103 1104 n = nilfs_lookup_dirty_data_buffers(inode, &data_buffers, rest + 1, 1105 sci->sc_dsync_start, 1106 sci->sc_dsync_end); 1107 1108 err = nilfs_segctor_apply_buffers(sci, inode, &data_buffers, 1109 nilfs_collect_file_data); 1110 if (!err) { 1111 nilfs_segctor_end_finfo(sci, inode); 1112 BUG_ON(n > rest); 1113 /* always receive -E2BIG or true error if n > rest */ 1114 } 1115 return err; 1116 } 1117 1118 static int nilfs_segctor_collect_blocks(struct nilfs_sc_info *sci, int mode) 1119 { 1120 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1121 struct list_head *head; 1122 struct nilfs_inode_info *ii; 1123 size_t ndone; 1124 int err = 0; 1125 1126 switch (nilfs_sc_cstage_get(sci)) { 1127 case NILFS_ST_INIT: 1128 /* Pre-processes */ 1129 sci->sc_stage.flags = 0; 1130 1131 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) { 1132 sci->sc_nblk_inc = 0; 1133 sci->sc_curseg->sb_sum.flags = NILFS_SS_LOGBGN; 1134 if (mode == SC_LSEG_DSYNC) { 1135 nilfs_sc_cstage_set(sci, NILFS_ST_DSYNC); 1136 goto dsync_mode; 1137 } 1138 } 1139 1140 sci->sc_stage.dirty_file_ptr = NULL; 1141 sci->sc_stage.gc_inode_ptr = NULL; 1142 if (mode == SC_FLUSH_DAT) { 1143 nilfs_sc_cstage_set(sci, NILFS_ST_DAT); 1144 goto dat_stage; 1145 } 1146 nilfs_sc_cstage_inc(sci); /* Fall through */ 1147 case NILFS_ST_GC: 1148 if (nilfs_doing_gc()) { 1149 head = &sci->sc_gc_inodes; 1150 ii = list_prepare_entry(sci->sc_stage.gc_inode_ptr, 1151 head, i_dirty); 1152 list_for_each_entry_continue(ii, head, i_dirty) { 1153 err = nilfs_segctor_scan_file( 1154 sci, &ii->vfs_inode, 1155 &nilfs_sc_file_ops); 1156 if (unlikely(err)) { 1157 sci->sc_stage.gc_inode_ptr = list_entry( 1158 ii->i_dirty.prev, 1159 struct nilfs_inode_info, 1160 i_dirty); 1161 goto break_or_fail; 1162 } 1163 set_bit(NILFS_I_COLLECTED, &ii->i_state); 1164 } 1165 sci->sc_stage.gc_inode_ptr = NULL; 1166 } 1167 nilfs_sc_cstage_inc(sci); /* Fall through */ 1168 case NILFS_ST_FILE: 1169 head = &sci->sc_dirty_files; 1170 ii = list_prepare_entry(sci->sc_stage.dirty_file_ptr, head, 1171 i_dirty); 1172 list_for_each_entry_continue(ii, head, i_dirty) { 1173 clear_bit(NILFS_I_DIRTY, &ii->i_state); 1174 1175 err = nilfs_segctor_scan_file(sci, &ii->vfs_inode, 1176 &nilfs_sc_file_ops); 1177 if (unlikely(err)) { 1178 sci->sc_stage.dirty_file_ptr = 1179 list_entry(ii->i_dirty.prev, 1180 struct nilfs_inode_info, 1181 i_dirty); 1182 goto break_or_fail; 1183 } 1184 /* sci->sc_stage.dirty_file_ptr = NILFS_I(inode); */ 1185 /* XXX: required ? */ 1186 } 1187 sci->sc_stage.dirty_file_ptr = NULL; 1188 if (mode == SC_FLUSH_FILE) { 1189 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1190 return 0; 1191 } 1192 nilfs_sc_cstage_inc(sci); 1193 sci->sc_stage.flags |= NILFS_CF_IFILE_STARTED; 1194 /* Fall through */ 1195 case NILFS_ST_IFILE: 1196 err = nilfs_segctor_scan_file(sci, sci->sc_root->ifile, 1197 &nilfs_sc_file_ops); 1198 if (unlikely(err)) 1199 break; 1200 nilfs_sc_cstage_inc(sci); 1201 /* Creating a checkpoint */ 1202 err = nilfs_segctor_create_checkpoint(sci); 1203 if (unlikely(err)) 1204 break; 1205 /* Fall through */ 1206 case NILFS_ST_CPFILE: 1207 err = nilfs_segctor_scan_file(sci, nilfs->ns_cpfile, 1208 &nilfs_sc_file_ops); 1209 if (unlikely(err)) 1210 break; 1211 nilfs_sc_cstage_inc(sci); /* Fall through */ 1212 case NILFS_ST_SUFILE: 1213 err = nilfs_sufile_freev(nilfs->ns_sufile, sci->sc_freesegs, 1214 sci->sc_nfreesegs, &ndone); 1215 if (unlikely(err)) { 1216 nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1217 sci->sc_freesegs, ndone, 1218 NULL); 1219 break; 1220 } 1221 sci->sc_stage.flags |= NILFS_CF_SUFREED; 1222 1223 err = nilfs_segctor_scan_file(sci, nilfs->ns_sufile, 1224 &nilfs_sc_file_ops); 1225 if (unlikely(err)) 1226 break; 1227 nilfs_sc_cstage_inc(sci); /* Fall through */ 1228 case NILFS_ST_DAT: 1229 dat_stage: 1230 err = nilfs_segctor_scan_file(sci, nilfs->ns_dat, 1231 &nilfs_sc_dat_ops); 1232 if (unlikely(err)) 1233 break; 1234 if (mode == SC_FLUSH_DAT) { 1235 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1236 return 0; 1237 } 1238 nilfs_sc_cstage_inc(sci); /* Fall through */ 1239 case NILFS_ST_SR: 1240 if (mode == SC_LSEG_SR) { 1241 /* Appending a super root */ 1242 err = nilfs_segctor_add_super_root(sci); 1243 if (unlikely(err)) 1244 break; 1245 } 1246 /* End of a logical segment */ 1247 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1248 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1249 return 0; 1250 case NILFS_ST_DSYNC: 1251 dsync_mode: 1252 sci->sc_curseg->sb_sum.flags |= NILFS_SS_SYNDT; 1253 ii = sci->sc_dsync_inode; 1254 if (!test_bit(NILFS_I_BUSY, &ii->i_state)) 1255 break; 1256 1257 err = nilfs_segctor_scan_file_dsync(sci, &ii->vfs_inode); 1258 if (unlikely(err)) 1259 break; 1260 sci->sc_curseg->sb_sum.flags |= NILFS_SS_LOGEND; 1261 nilfs_sc_cstage_set(sci, NILFS_ST_DONE); 1262 return 0; 1263 case NILFS_ST_DONE: 1264 return 0; 1265 default: 1266 BUG(); 1267 } 1268 1269 break_or_fail: 1270 return err; 1271 } 1272 1273 /** 1274 * nilfs_segctor_begin_construction - setup segment buffer to make a new log 1275 * @sci: nilfs_sc_info 1276 * @nilfs: nilfs object 1277 */ 1278 static int nilfs_segctor_begin_construction(struct nilfs_sc_info *sci, 1279 struct the_nilfs *nilfs) 1280 { 1281 struct nilfs_segment_buffer *segbuf, *prev; 1282 __u64 nextnum; 1283 int err, alloc = 0; 1284 1285 segbuf = nilfs_segbuf_new(sci->sc_super); 1286 if (unlikely(!segbuf)) 1287 return -ENOMEM; 1288 1289 if (list_empty(&sci->sc_write_logs)) { 1290 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 1291 nilfs->ns_pseg_offset, nilfs); 1292 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1293 nilfs_shift_to_next_segment(nilfs); 1294 nilfs_segbuf_map(segbuf, nilfs->ns_segnum, 0, nilfs); 1295 } 1296 1297 segbuf->sb_sum.seg_seq = nilfs->ns_seg_seq; 1298 nextnum = nilfs->ns_nextnum; 1299 1300 if (nilfs->ns_segnum == nilfs->ns_nextnum) 1301 /* Start from the head of a new full segment */ 1302 alloc++; 1303 } else { 1304 /* Continue logs */ 1305 prev = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1306 nilfs_segbuf_map_cont(segbuf, prev); 1307 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq; 1308 nextnum = prev->sb_nextnum; 1309 1310 if (segbuf->sb_rest_blocks < NILFS_PSEG_MIN_BLOCKS) { 1311 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1312 segbuf->sb_sum.seg_seq++; 1313 alloc++; 1314 } 1315 } 1316 1317 err = nilfs_sufile_mark_dirty(nilfs->ns_sufile, segbuf->sb_segnum); 1318 if (err) 1319 goto failed; 1320 1321 if (alloc) { 1322 err = nilfs_sufile_alloc(nilfs->ns_sufile, &nextnum); 1323 if (err) 1324 goto failed; 1325 } 1326 nilfs_segbuf_set_next_segnum(segbuf, nextnum, nilfs); 1327 1328 BUG_ON(!list_empty(&sci->sc_segbufs)); 1329 list_add_tail(&segbuf->sb_list, &sci->sc_segbufs); 1330 sci->sc_segbuf_nblocks = segbuf->sb_rest_blocks; 1331 return 0; 1332 1333 failed: 1334 nilfs_segbuf_free(segbuf); 1335 return err; 1336 } 1337 1338 static int nilfs_segctor_extend_segments(struct nilfs_sc_info *sci, 1339 struct the_nilfs *nilfs, int nadd) 1340 { 1341 struct nilfs_segment_buffer *segbuf, *prev; 1342 struct inode *sufile = nilfs->ns_sufile; 1343 __u64 nextnextnum; 1344 LIST_HEAD(list); 1345 int err, ret, i; 1346 1347 prev = NILFS_LAST_SEGBUF(&sci->sc_segbufs); 1348 /* 1349 * Since the segment specified with nextnum might be allocated during 1350 * the previous construction, the buffer including its segusage may 1351 * not be dirty. The following call ensures that the buffer is dirty 1352 * and will pin the buffer on memory until the sufile is written. 1353 */ 1354 err = nilfs_sufile_mark_dirty(sufile, prev->sb_nextnum); 1355 if (unlikely(err)) 1356 return err; 1357 1358 for (i = 0; i < nadd; i++) { 1359 /* extend segment info */ 1360 err = -ENOMEM; 1361 segbuf = nilfs_segbuf_new(sci->sc_super); 1362 if (unlikely(!segbuf)) 1363 goto failed; 1364 1365 /* map this buffer to region of segment on-disk */ 1366 nilfs_segbuf_map(segbuf, prev->sb_nextnum, 0, nilfs); 1367 sci->sc_segbuf_nblocks += segbuf->sb_rest_blocks; 1368 1369 /* allocate the next next full segment */ 1370 err = nilfs_sufile_alloc(sufile, &nextnextnum); 1371 if (unlikely(err)) 1372 goto failed_segbuf; 1373 1374 segbuf->sb_sum.seg_seq = prev->sb_sum.seg_seq + 1; 1375 nilfs_segbuf_set_next_segnum(segbuf, nextnextnum, nilfs); 1376 1377 list_add_tail(&segbuf->sb_list, &list); 1378 prev = segbuf; 1379 } 1380 list_splice_tail(&list, &sci->sc_segbufs); 1381 return 0; 1382 1383 failed_segbuf: 1384 nilfs_segbuf_free(segbuf); 1385 failed: 1386 list_for_each_entry(segbuf, &list, sb_list) { 1387 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1388 WARN_ON(ret); /* never fails */ 1389 } 1390 nilfs_destroy_logs(&list); 1391 return err; 1392 } 1393 1394 static void nilfs_free_incomplete_logs(struct list_head *logs, 1395 struct the_nilfs *nilfs) 1396 { 1397 struct nilfs_segment_buffer *segbuf, *prev; 1398 struct inode *sufile = nilfs->ns_sufile; 1399 int ret; 1400 1401 segbuf = NILFS_FIRST_SEGBUF(logs); 1402 if (nilfs->ns_nextnum != segbuf->sb_nextnum) { 1403 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1404 WARN_ON(ret); /* never fails */ 1405 } 1406 if (atomic_read(&segbuf->sb_err)) { 1407 /* Case 1: The first segment failed */ 1408 if (segbuf->sb_pseg_start != segbuf->sb_fseg_start) 1409 /* Case 1a: Partial segment appended into an existing 1410 segment */ 1411 nilfs_terminate_segment(nilfs, segbuf->sb_fseg_start, 1412 segbuf->sb_fseg_end); 1413 else /* Case 1b: New full segment */ 1414 set_nilfs_discontinued(nilfs); 1415 } 1416 1417 prev = segbuf; 1418 list_for_each_entry_continue(segbuf, logs, sb_list) { 1419 if (prev->sb_nextnum != segbuf->sb_nextnum) { 1420 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1421 WARN_ON(ret); /* never fails */ 1422 } 1423 if (atomic_read(&segbuf->sb_err) && 1424 segbuf->sb_segnum != nilfs->ns_nextnum) 1425 /* Case 2: extended segment (!= next) failed */ 1426 nilfs_sufile_set_error(sufile, segbuf->sb_segnum); 1427 prev = segbuf; 1428 } 1429 } 1430 1431 static void nilfs_segctor_update_segusage(struct nilfs_sc_info *sci, 1432 struct inode *sufile) 1433 { 1434 struct nilfs_segment_buffer *segbuf; 1435 unsigned long live_blocks; 1436 int ret; 1437 1438 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1439 live_blocks = segbuf->sb_sum.nblocks + 1440 (segbuf->sb_pseg_start - segbuf->sb_fseg_start); 1441 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1442 live_blocks, 1443 sci->sc_seg_ctime); 1444 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1445 } 1446 } 1447 1448 static void nilfs_cancel_segusage(struct list_head *logs, struct inode *sufile) 1449 { 1450 struct nilfs_segment_buffer *segbuf; 1451 int ret; 1452 1453 segbuf = NILFS_FIRST_SEGBUF(logs); 1454 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1455 segbuf->sb_pseg_start - 1456 segbuf->sb_fseg_start, 0); 1457 WARN_ON(ret); /* always succeed because the segusage is dirty */ 1458 1459 list_for_each_entry_continue(segbuf, logs, sb_list) { 1460 ret = nilfs_sufile_set_segment_usage(sufile, segbuf->sb_segnum, 1461 0, 0); 1462 WARN_ON(ret); /* always succeed */ 1463 } 1464 } 1465 1466 static void nilfs_segctor_truncate_segments(struct nilfs_sc_info *sci, 1467 struct nilfs_segment_buffer *last, 1468 struct inode *sufile) 1469 { 1470 struct nilfs_segment_buffer *segbuf = last; 1471 int ret; 1472 1473 list_for_each_entry_continue(segbuf, &sci->sc_segbufs, sb_list) { 1474 sci->sc_segbuf_nblocks -= segbuf->sb_rest_blocks; 1475 ret = nilfs_sufile_free(sufile, segbuf->sb_nextnum); 1476 WARN_ON(ret); 1477 } 1478 nilfs_truncate_logs(&sci->sc_segbufs, last); 1479 } 1480 1481 1482 static int nilfs_segctor_collect(struct nilfs_sc_info *sci, 1483 struct the_nilfs *nilfs, int mode) 1484 { 1485 struct nilfs_cstage prev_stage = sci->sc_stage; 1486 int err, nadd = 1; 1487 1488 /* Collection retry loop */ 1489 for (;;) { 1490 sci->sc_nblk_this_inc = 0; 1491 sci->sc_curseg = NILFS_FIRST_SEGBUF(&sci->sc_segbufs); 1492 1493 err = nilfs_segctor_reset_segment_buffer(sci); 1494 if (unlikely(err)) 1495 goto failed; 1496 1497 err = nilfs_segctor_collect_blocks(sci, mode); 1498 sci->sc_nblk_this_inc += sci->sc_curseg->sb_sum.nblocks; 1499 if (!err) 1500 break; 1501 1502 if (unlikely(err != -E2BIG)) 1503 goto failed; 1504 1505 /* The current segment is filled up */ 1506 if (mode != SC_LSEG_SR || 1507 nilfs_sc_cstage_get(sci) < NILFS_ST_CPFILE) 1508 break; 1509 1510 nilfs_clear_logs(&sci->sc_segbufs); 1511 1512 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1513 err = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1514 sci->sc_freesegs, 1515 sci->sc_nfreesegs, 1516 NULL); 1517 WARN_ON(err); /* do not happen */ 1518 sci->sc_stage.flags &= ~NILFS_CF_SUFREED; 1519 } 1520 1521 err = nilfs_segctor_extend_segments(sci, nilfs, nadd); 1522 if (unlikely(err)) 1523 return err; 1524 1525 nadd = min_t(int, nadd << 1, SC_MAX_SEGDELTA); 1526 sci->sc_stage = prev_stage; 1527 } 1528 nilfs_segctor_truncate_segments(sci, sci->sc_curseg, nilfs->ns_sufile); 1529 return 0; 1530 1531 failed: 1532 return err; 1533 } 1534 1535 static void nilfs_list_replace_buffer(struct buffer_head *old_bh, 1536 struct buffer_head *new_bh) 1537 { 1538 BUG_ON(!list_empty(&new_bh->b_assoc_buffers)); 1539 1540 list_replace_init(&old_bh->b_assoc_buffers, &new_bh->b_assoc_buffers); 1541 /* The caller must release old_bh */ 1542 } 1543 1544 static int 1545 nilfs_segctor_update_payload_blocknr(struct nilfs_sc_info *sci, 1546 struct nilfs_segment_buffer *segbuf, 1547 int mode) 1548 { 1549 struct inode *inode = NULL; 1550 sector_t blocknr; 1551 unsigned long nfinfo = segbuf->sb_sum.nfinfo; 1552 unsigned long nblocks = 0, ndatablk = 0; 1553 struct nilfs_sc_operations *sc_op = NULL; 1554 struct nilfs_segsum_pointer ssp; 1555 struct nilfs_finfo *finfo = NULL; 1556 union nilfs_binfo binfo; 1557 struct buffer_head *bh, *bh_org; 1558 ino_t ino = 0; 1559 int err = 0; 1560 1561 if (!nfinfo) 1562 goto out; 1563 1564 blocknr = segbuf->sb_pseg_start + segbuf->sb_sum.nsumblk; 1565 ssp.bh = NILFS_SEGBUF_FIRST_BH(&segbuf->sb_segsum_buffers); 1566 ssp.offset = sizeof(struct nilfs_segment_summary); 1567 1568 list_for_each_entry(bh, &segbuf->sb_payload_buffers, b_assoc_buffers) { 1569 if (bh == segbuf->sb_super_root) 1570 break; 1571 if (!finfo) { 1572 finfo = nilfs_segctor_map_segsum_entry( 1573 sci, &ssp, sizeof(*finfo)); 1574 ino = le64_to_cpu(finfo->fi_ino); 1575 nblocks = le32_to_cpu(finfo->fi_nblocks); 1576 ndatablk = le32_to_cpu(finfo->fi_ndatablk); 1577 1578 inode = bh->b_page->mapping->host; 1579 1580 if (mode == SC_LSEG_DSYNC) 1581 sc_op = &nilfs_sc_dsync_ops; 1582 else if (ino == NILFS_DAT_INO) 1583 sc_op = &nilfs_sc_dat_ops; 1584 else /* file blocks */ 1585 sc_op = &nilfs_sc_file_ops; 1586 } 1587 bh_org = bh; 1588 get_bh(bh_org); 1589 err = nilfs_bmap_assign(NILFS_I(inode)->i_bmap, &bh, blocknr, 1590 &binfo); 1591 if (bh != bh_org) 1592 nilfs_list_replace_buffer(bh_org, bh); 1593 brelse(bh_org); 1594 if (unlikely(err)) 1595 goto failed_bmap; 1596 1597 if (ndatablk > 0) 1598 sc_op->write_data_binfo(sci, &ssp, &binfo); 1599 else 1600 sc_op->write_node_binfo(sci, &ssp, &binfo); 1601 1602 blocknr++; 1603 if (--nblocks == 0) { 1604 finfo = NULL; 1605 if (--nfinfo == 0) 1606 break; 1607 } else if (ndatablk > 0) 1608 ndatablk--; 1609 } 1610 out: 1611 return 0; 1612 1613 failed_bmap: 1614 return err; 1615 } 1616 1617 static int nilfs_segctor_assign(struct nilfs_sc_info *sci, int mode) 1618 { 1619 struct nilfs_segment_buffer *segbuf; 1620 int err; 1621 1622 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1623 err = nilfs_segctor_update_payload_blocknr(sci, segbuf, mode); 1624 if (unlikely(err)) 1625 return err; 1626 nilfs_segbuf_fill_in_segsum(segbuf); 1627 } 1628 return 0; 1629 } 1630 1631 static void nilfs_begin_page_io(struct page *page) 1632 { 1633 if (!page || PageWriteback(page)) 1634 /* For split b-tree node pages, this function may be called 1635 twice. We ignore the 2nd or later calls by this check. */ 1636 return; 1637 1638 lock_page(page); 1639 clear_page_dirty_for_io(page); 1640 set_page_writeback(page); 1641 unlock_page(page); 1642 } 1643 1644 static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) 1645 { 1646 struct nilfs_segment_buffer *segbuf; 1647 struct page *bd_page = NULL, *fs_page = NULL; 1648 1649 list_for_each_entry(segbuf, &sci->sc_segbufs, sb_list) { 1650 struct buffer_head *bh; 1651 1652 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1653 b_assoc_buffers) { 1654 if (bh->b_page != bd_page) { 1655 if (bd_page) { 1656 lock_page(bd_page); 1657 clear_page_dirty_for_io(bd_page); 1658 set_page_writeback(bd_page); 1659 unlock_page(bd_page); 1660 } 1661 bd_page = bh->b_page; 1662 } 1663 } 1664 1665 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1666 b_assoc_buffers) { 1667 set_buffer_async_write(bh); 1668 if (bh == segbuf->sb_super_root) { 1669 if (bh->b_page != bd_page) { 1670 lock_page(bd_page); 1671 clear_page_dirty_for_io(bd_page); 1672 set_page_writeback(bd_page); 1673 unlock_page(bd_page); 1674 bd_page = bh->b_page; 1675 } 1676 break; 1677 } 1678 if (bh->b_page != fs_page) { 1679 nilfs_begin_page_io(fs_page); 1680 fs_page = bh->b_page; 1681 } 1682 } 1683 } 1684 if (bd_page) { 1685 lock_page(bd_page); 1686 clear_page_dirty_for_io(bd_page); 1687 set_page_writeback(bd_page); 1688 unlock_page(bd_page); 1689 } 1690 nilfs_begin_page_io(fs_page); 1691 } 1692 1693 static int nilfs_segctor_write(struct nilfs_sc_info *sci, 1694 struct the_nilfs *nilfs) 1695 { 1696 int ret; 1697 1698 ret = nilfs_write_logs(&sci->sc_segbufs, nilfs); 1699 list_splice_tail_init(&sci->sc_segbufs, &sci->sc_write_logs); 1700 return ret; 1701 } 1702 1703 static void nilfs_end_page_io(struct page *page, int err) 1704 { 1705 if (!page) 1706 return; 1707 1708 if (buffer_nilfs_node(page_buffers(page)) && !PageWriteback(page)) { 1709 /* 1710 * For b-tree node pages, this function may be called twice 1711 * or more because they might be split in a segment. 1712 */ 1713 if (PageDirty(page)) { 1714 /* 1715 * For pages holding split b-tree node buffers, dirty 1716 * flag on the buffers may be cleared discretely. 1717 * In that case, the page is once redirtied for 1718 * remaining buffers, and it must be cancelled if 1719 * all the buffers get cleaned later. 1720 */ 1721 lock_page(page); 1722 if (nilfs_page_buffers_clean(page)) 1723 __nilfs_clear_page_dirty(page); 1724 unlock_page(page); 1725 } 1726 return; 1727 } 1728 1729 if (!err) { 1730 if (!nilfs_page_buffers_clean(page)) 1731 __set_page_dirty_nobuffers(page); 1732 ClearPageError(page); 1733 } else { 1734 __set_page_dirty_nobuffers(page); 1735 SetPageError(page); 1736 } 1737 1738 end_page_writeback(page); 1739 } 1740 1741 static void nilfs_abort_logs(struct list_head *logs, int err) 1742 { 1743 struct nilfs_segment_buffer *segbuf; 1744 struct page *bd_page = NULL, *fs_page = NULL; 1745 struct buffer_head *bh; 1746 1747 if (list_empty(logs)) 1748 return; 1749 1750 list_for_each_entry(segbuf, logs, sb_list) { 1751 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1752 b_assoc_buffers) { 1753 if (bh->b_page != bd_page) { 1754 if (bd_page) 1755 end_page_writeback(bd_page); 1756 bd_page = bh->b_page; 1757 } 1758 } 1759 1760 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1761 b_assoc_buffers) { 1762 clear_buffer_async_write(bh); 1763 if (bh == segbuf->sb_super_root) { 1764 if (bh->b_page != bd_page) { 1765 end_page_writeback(bd_page); 1766 bd_page = bh->b_page; 1767 } 1768 break; 1769 } 1770 if (bh->b_page != fs_page) { 1771 nilfs_end_page_io(fs_page, err); 1772 fs_page = bh->b_page; 1773 } 1774 } 1775 } 1776 if (bd_page) 1777 end_page_writeback(bd_page); 1778 1779 nilfs_end_page_io(fs_page, err); 1780 } 1781 1782 static void nilfs_segctor_abort_construction(struct nilfs_sc_info *sci, 1783 struct the_nilfs *nilfs, int err) 1784 { 1785 LIST_HEAD(logs); 1786 int ret; 1787 1788 list_splice_tail_init(&sci->sc_write_logs, &logs); 1789 ret = nilfs_wait_on_logs(&logs); 1790 nilfs_abort_logs(&logs, ret ? : err); 1791 1792 list_splice_tail_init(&sci->sc_segbufs, &logs); 1793 nilfs_cancel_segusage(&logs, nilfs->ns_sufile); 1794 nilfs_free_incomplete_logs(&logs, nilfs); 1795 1796 if (sci->sc_stage.flags & NILFS_CF_SUFREED) { 1797 ret = nilfs_sufile_cancel_freev(nilfs->ns_sufile, 1798 sci->sc_freesegs, 1799 sci->sc_nfreesegs, 1800 NULL); 1801 WARN_ON(ret); /* do not happen */ 1802 } 1803 1804 nilfs_destroy_logs(&logs); 1805 } 1806 1807 static void nilfs_set_next_segment(struct the_nilfs *nilfs, 1808 struct nilfs_segment_buffer *segbuf) 1809 { 1810 nilfs->ns_segnum = segbuf->sb_segnum; 1811 nilfs->ns_nextnum = segbuf->sb_nextnum; 1812 nilfs->ns_pseg_offset = segbuf->sb_pseg_start - segbuf->sb_fseg_start 1813 + segbuf->sb_sum.nblocks; 1814 nilfs->ns_seg_seq = segbuf->sb_sum.seg_seq; 1815 nilfs->ns_ctime = segbuf->sb_sum.ctime; 1816 } 1817 1818 static void nilfs_segctor_complete_write(struct nilfs_sc_info *sci) 1819 { 1820 struct nilfs_segment_buffer *segbuf; 1821 struct page *bd_page = NULL, *fs_page = NULL; 1822 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 1823 int update_sr = false; 1824 1825 list_for_each_entry(segbuf, &sci->sc_write_logs, sb_list) { 1826 struct buffer_head *bh; 1827 1828 list_for_each_entry(bh, &segbuf->sb_segsum_buffers, 1829 b_assoc_buffers) { 1830 set_buffer_uptodate(bh); 1831 clear_buffer_dirty(bh); 1832 if (bh->b_page != bd_page) { 1833 if (bd_page) 1834 end_page_writeback(bd_page); 1835 bd_page = bh->b_page; 1836 } 1837 } 1838 /* 1839 * We assume that the buffers which belong to the same page 1840 * continue over the buffer list. 1841 * Under this assumption, the last BHs of pages is 1842 * identifiable by the discontinuity of bh->b_page 1843 * (page != fs_page). 1844 * 1845 * For B-tree node blocks, however, this assumption is not 1846 * guaranteed. The cleanup code of B-tree node pages needs 1847 * special care. 1848 */ 1849 list_for_each_entry(bh, &segbuf->sb_payload_buffers, 1850 b_assoc_buffers) { 1851 const unsigned long set_bits = (1 << BH_Uptodate); 1852 const unsigned long clear_bits = 1853 (1 << BH_Dirty | 1 << BH_Async_Write | 1854 1 << BH_Delay | 1 << BH_NILFS_Volatile | 1855 1 << BH_NILFS_Redirected); 1856 1857 set_mask_bits(&bh->b_state, clear_bits, set_bits); 1858 if (bh == segbuf->sb_super_root) { 1859 if (bh->b_page != bd_page) { 1860 end_page_writeback(bd_page); 1861 bd_page = bh->b_page; 1862 } 1863 update_sr = true; 1864 break; 1865 } 1866 if (bh->b_page != fs_page) { 1867 nilfs_end_page_io(fs_page, 0); 1868 fs_page = bh->b_page; 1869 } 1870 } 1871 1872 if (!nilfs_segbuf_simplex(segbuf)) { 1873 if (segbuf->sb_sum.flags & NILFS_SS_LOGBGN) { 1874 set_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1875 sci->sc_lseg_stime = jiffies; 1876 } 1877 if (segbuf->sb_sum.flags & NILFS_SS_LOGEND) 1878 clear_bit(NILFS_SC_UNCLOSED, &sci->sc_flags); 1879 } 1880 } 1881 /* 1882 * Since pages may continue over multiple segment buffers, 1883 * end of the last page must be checked outside of the loop. 1884 */ 1885 if (bd_page) 1886 end_page_writeback(bd_page); 1887 1888 nilfs_end_page_io(fs_page, 0); 1889 1890 nilfs_drop_collected_inodes(&sci->sc_dirty_files); 1891 1892 if (nilfs_doing_gc()) 1893 nilfs_drop_collected_inodes(&sci->sc_gc_inodes); 1894 else 1895 nilfs->ns_nongc_ctime = sci->sc_seg_ctime; 1896 1897 sci->sc_nblk_inc += sci->sc_nblk_this_inc; 1898 1899 segbuf = NILFS_LAST_SEGBUF(&sci->sc_write_logs); 1900 nilfs_set_next_segment(nilfs, segbuf); 1901 1902 if (update_sr) { 1903 nilfs->ns_flushed_device = 0; 1904 nilfs_set_last_segment(nilfs, segbuf->sb_pseg_start, 1905 segbuf->sb_sum.seg_seq, nilfs->ns_cno++); 1906 1907 clear_bit(NILFS_SC_HAVE_DELTA, &sci->sc_flags); 1908 clear_bit(NILFS_SC_DIRTY, &sci->sc_flags); 1909 set_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1910 nilfs_segctor_clear_metadata_dirty(sci); 1911 } else 1912 clear_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags); 1913 } 1914 1915 static int nilfs_segctor_wait(struct nilfs_sc_info *sci) 1916 { 1917 int ret; 1918 1919 ret = nilfs_wait_on_logs(&sci->sc_write_logs); 1920 if (!ret) { 1921 nilfs_segctor_complete_write(sci); 1922 nilfs_destroy_logs(&sci->sc_write_logs); 1923 } 1924 return ret; 1925 } 1926 1927 static int nilfs_segctor_collect_dirty_files(struct nilfs_sc_info *sci, 1928 struct the_nilfs *nilfs) 1929 { 1930 struct nilfs_inode_info *ii, *n; 1931 struct inode *ifile = sci->sc_root->ifile; 1932 1933 spin_lock(&nilfs->ns_inode_lock); 1934 retry: 1935 list_for_each_entry_safe(ii, n, &nilfs->ns_dirty_files, i_dirty) { 1936 if (!ii->i_bh) { 1937 struct buffer_head *ibh; 1938 int err; 1939 1940 spin_unlock(&nilfs->ns_inode_lock); 1941 err = nilfs_ifile_get_inode_block( 1942 ifile, ii->vfs_inode.i_ino, &ibh); 1943 if (unlikely(err)) { 1944 nilfs_warning(sci->sc_super, __func__, 1945 "failed to get inode block.\n"); 1946 return err; 1947 } 1948 mark_buffer_dirty(ibh); 1949 nilfs_mdt_mark_dirty(ifile); 1950 spin_lock(&nilfs->ns_inode_lock); 1951 if (likely(!ii->i_bh)) 1952 ii->i_bh = ibh; 1953 else 1954 brelse(ibh); 1955 goto retry; 1956 } 1957 1958 clear_bit(NILFS_I_QUEUED, &ii->i_state); 1959 set_bit(NILFS_I_BUSY, &ii->i_state); 1960 list_move_tail(&ii->i_dirty, &sci->sc_dirty_files); 1961 } 1962 spin_unlock(&nilfs->ns_inode_lock); 1963 1964 return 0; 1965 } 1966 1967 static void nilfs_segctor_drop_written_files(struct nilfs_sc_info *sci, 1968 struct the_nilfs *nilfs) 1969 { 1970 struct nilfs_inode_info *ii, *n; 1971 int during_mount = !(sci->sc_super->s_flags & MS_ACTIVE); 1972 int defer_iput = false; 1973 1974 spin_lock(&nilfs->ns_inode_lock); 1975 list_for_each_entry_safe(ii, n, &sci->sc_dirty_files, i_dirty) { 1976 if (!test_and_clear_bit(NILFS_I_UPDATED, &ii->i_state) || 1977 test_bit(NILFS_I_DIRTY, &ii->i_state)) 1978 continue; 1979 1980 clear_bit(NILFS_I_BUSY, &ii->i_state); 1981 brelse(ii->i_bh); 1982 ii->i_bh = NULL; 1983 list_del_init(&ii->i_dirty); 1984 if (!ii->vfs_inode.i_nlink || during_mount) { 1985 /* 1986 * Defer calling iput() to avoid deadlocks if 1987 * i_nlink == 0 or mount is not yet finished. 1988 */ 1989 list_add_tail(&ii->i_dirty, &sci->sc_iput_queue); 1990 defer_iput = true; 1991 } else { 1992 spin_unlock(&nilfs->ns_inode_lock); 1993 iput(&ii->vfs_inode); 1994 spin_lock(&nilfs->ns_inode_lock); 1995 } 1996 } 1997 spin_unlock(&nilfs->ns_inode_lock); 1998 1999 if (defer_iput) 2000 schedule_work(&sci->sc_iput_work); 2001 } 2002 2003 /* 2004 * Main procedure of segment constructor 2005 */ 2006 static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode) 2007 { 2008 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2009 int err; 2010 2011 nilfs_sc_cstage_set(sci, NILFS_ST_INIT); 2012 sci->sc_cno = nilfs->ns_cno; 2013 2014 err = nilfs_segctor_collect_dirty_files(sci, nilfs); 2015 if (unlikely(err)) 2016 goto out; 2017 2018 if (nilfs_test_metadata_dirty(nilfs, sci->sc_root)) 2019 set_bit(NILFS_SC_DIRTY, &sci->sc_flags); 2020 2021 if (nilfs_segctor_clean(sci)) 2022 goto out; 2023 2024 do { 2025 sci->sc_stage.flags &= ~NILFS_CF_HISTORY_MASK; 2026 2027 err = nilfs_segctor_begin_construction(sci, nilfs); 2028 if (unlikely(err)) 2029 goto out; 2030 2031 /* Update time stamp */ 2032 sci->sc_seg_ctime = get_seconds(); 2033 2034 err = nilfs_segctor_collect(sci, nilfs, mode); 2035 if (unlikely(err)) 2036 goto failed; 2037 2038 /* Avoid empty segment */ 2039 if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE && 2040 nilfs_segbuf_empty(sci->sc_curseg)) { 2041 nilfs_segctor_abort_construction(sci, nilfs, 1); 2042 goto out; 2043 } 2044 2045 err = nilfs_segctor_assign(sci, mode); 2046 if (unlikely(err)) 2047 goto failed; 2048 2049 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2050 nilfs_segctor_fill_in_file_bmap(sci); 2051 2052 if (mode == SC_LSEG_SR && 2053 nilfs_sc_cstage_get(sci) >= NILFS_ST_CPFILE) { 2054 err = nilfs_segctor_fill_in_checkpoint(sci); 2055 if (unlikely(err)) 2056 goto failed_to_write; 2057 2058 nilfs_segctor_fill_in_super_root(sci, nilfs); 2059 } 2060 nilfs_segctor_update_segusage(sci, nilfs->ns_sufile); 2061 2062 /* Write partial segments */ 2063 nilfs_segctor_prepare_write(sci); 2064 2065 nilfs_add_checksums_on_logs(&sci->sc_segbufs, 2066 nilfs->ns_crc_seed); 2067 2068 err = nilfs_segctor_write(sci, nilfs); 2069 if (unlikely(err)) 2070 goto failed_to_write; 2071 2072 if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE || 2073 nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) { 2074 /* 2075 * At this point, we avoid double buffering 2076 * for blocksize < pagesize because page dirty 2077 * flag is turned off during write and dirty 2078 * buffers are not properly collected for 2079 * pages crossing over segments. 2080 */ 2081 err = nilfs_segctor_wait(sci); 2082 if (err) 2083 goto failed_to_write; 2084 } 2085 } while (nilfs_sc_cstage_get(sci) != NILFS_ST_DONE); 2086 2087 out: 2088 nilfs_segctor_drop_written_files(sci, nilfs); 2089 return err; 2090 2091 failed_to_write: 2092 if (sci->sc_stage.flags & NILFS_CF_IFILE_STARTED) 2093 nilfs_redirty_inodes(&sci->sc_dirty_files); 2094 2095 failed: 2096 if (nilfs_doing_gc()) 2097 nilfs_redirty_inodes(&sci->sc_gc_inodes); 2098 nilfs_segctor_abort_construction(sci, nilfs, err); 2099 goto out; 2100 } 2101 2102 /** 2103 * nilfs_segctor_start_timer - set timer of background write 2104 * @sci: nilfs_sc_info 2105 * 2106 * If the timer has already been set, it ignores the new request. 2107 * This function MUST be called within a section locking the segment 2108 * semaphore. 2109 */ 2110 static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) 2111 { 2112 spin_lock(&sci->sc_state_lock); 2113 if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { 2114 sci->sc_timer.expires = jiffies + sci->sc_interval; 2115 add_timer(&sci->sc_timer); 2116 sci->sc_state |= NILFS_SEGCTOR_COMMIT; 2117 } 2118 spin_unlock(&sci->sc_state_lock); 2119 } 2120 2121 static void nilfs_segctor_do_flush(struct nilfs_sc_info *sci, int bn) 2122 { 2123 spin_lock(&sci->sc_state_lock); 2124 if (!(sci->sc_flush_request & (1 << bn))) { 2125 unsigned long prev_req = sci->sc_flush_request; 2126 2127 sci->sc_flush_request |= (1 << bn); 2128 if (!prev_req) 2129 wake_up(&sci->sc_wait_daemon); 2130 } 2131 spin_unlock(&sci->sc_state_lock); 2132 } 2133 2134 /** 2135 * nilfs_flush_segment - trigger a segment construction for resource control 2136 * @sb: super block 2137 * @ino: inode number of the file to be flushed out. 2138 */ 2139 void nilfs_flush_segment(struct super_block *sb, ino_t ino) 2140 { 2141 struct the_nilfs *nilfs = sb->s_fs_info; 2142 struct nilfs_sc_info *sci = nilfs->ns_writer; 2143 2144 if (!sci || nilfs_doing_construction()) 2145 return; 2146 nilfs_segctor_do_flush(sci, NILFS_MDT_INODE(sb, ino) ? ino : 0); 2147 /* assign bit 0 to data files */ 2148 } 2149 2150 struct nilfs_segctor_wait_request { 2151 wait_queue_t wq; 2152 __u32 seq; 2153 int err; 2154 atomic_t done; 2155 }; 2156 2157 static int nilfs_segctor_sync(struct nilfs_sc_info *sci) 2158 { 2159 struct nilfs_segctor_wait_request wait_req; 2160 int err = 0; 2161 2162 spin_lock(&sci->sc_state_lock); 2163 init_wait(&wait_req.wq); 2164 wait_req.err = 0; 2165 atomic_set(&wait_req.done, 0); 2166 wait_req.seq = ++sci->sc_seq_request; 2167 spin_unlock(&sci->sc_state_lock); 2168 2169 init_waitqueue_entry(&wait_req.wq, current); 2170 add_wait_queue(&sci->sc_wait_request, &wait_req.wq); 2171 set_current_state(TASK_INTERRUPTIBLE); 2172 wake_up(&sci->sc_wait_daemon); 2173 2174 for (;;) { 2175 if (atomic_read(&wait_req.done)) { 2176 err = wait_req.err; 2177 break; 2178 } 2179 if (!signal_pending(current)) { 2180 schedule(); 2181 continue; 2182 } 2183 err = -ERESTARTSYS; 2184 break; 2185 } 2186 finish_wait(&sci->sc_wait_request, &wait_req.wq); 2187 return err; 2188 } 2189 2190 static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) 2191 { 2192 struct nilfs_segctor_wait_request *wrq, *n; 2193 unsigned long flags; 2194 2195 spin_lock_irqsave(&sci->sc_wait_request.lock, flags); 2196 list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.task_list, 2197 wq.task_list) { 2198 if (!atomic_read(&wrq->done) && 2199 nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { 2200 wrq->err = err; 2201 atomic_set(&wrq->done, 1); 2202 } 2203 if (atomic_read(&wrq->done)) { 2204 wrq->wq.func(&wrq->wq, 2205 TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, 2206 0, NULL); 2207 } 2208 } 2209 spin_unlock_irqrestore(&sci->sc_wait_request.lock, flags); 2210 } 2211 2212 /** 2213 * nilfs_construct_segment - construct a logical segment 2214 * @sb: super block 2215 * 2216 * Return Value: On success, 0 is retured. On errors, one of the following 2217 * negative error code is returned. 2218 * 2219 * %-EROFS - Read only filesystem. 2220 * 2221 * %-EIO - I/O error 2222 * 2223 * %-ENOSPC - No space left on device (only in a panic state). 2224 * 2225 * %-ERESTARTSYS - Interrupted. 2226 * 2227 * %-ENOMEM - Insufficient memory available. 2228 */ 2229 int nilfs_construct_segment(struct super_block *sb) 2230 { 2231 struct the_nilfs *nilfs = sb->s_fs_info; 2232 struct nilfs_sc_info *sci = nilfs->ns_writer; 2233 struct nilfs_transaction_info *ti; 2234 int err; 2235 2236 if (!sci) 2237 return -EROFS; 2238 2239 /* A call inside transactions causes a deadlock. */ 2240 BUG_ON((ti = current->journal_info) && ti->ti_magic == NILFS_TI_MAGIC); 2241 2242 err = nilfs_segctor_sync(sci); 2243 return err; 2244 } 2245 2246 /** 2247 * nilfs_construct_dsync_segment - construct a data-only logical segment 2248 * @sb: super block 2249 * @inode: inode whose data blocks should be written out 2250 * @start: start byte offset 2251 * @end: end byte offset (inclusive) 2252 * 2253 * Return Value: On success, 0 is retured. On errors, one of the following 2254 * negative error code is returned. 2255 * 2256 * %-EROFS - Read only filesystem. 2257 * 2258 * %-EIO - I/O error 2259 * 2260 * %-ENOSPC - No space left on device (only in a panic state). 2261 * 2262 * %-ERESTARTSYS - Interrupted. 2263 * 2264 * %-ENOMEM - Insufficient memory available. 2265 */ 2266 int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, 2267 loff_t start, loff_t end) 2268 { 2269 struct the_nilfs *nilfs = sb->s_fs_info; 2270 struct nilfs_sc_info *sci = nilfs->ns_writer; 2271 struct nilfs_inode_info *ii; 2272 struct nilfs_transaction_info ti; 2273 int err = 0; 2274 2275 if (!sci) 2276 return -EROFS; 2277 2278 nilfs_transaction_lock(sb, &ti, 0); 2279 2280 ii = NILFS_I(inode); 2281 if (test_bit(NILFS_I_INODE_SYNC, &ii->i_state) || 2282 nilfs_test_opt(nilfs, STRICT_ORDER) || 2283 test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2284 nilfs_discontinued(nilfs)) { 2285 nilfs_transaction_unlock(sb); 2286 err = nilfs_segctor_sync(sci); 2287 return err; 2288 } 2289 2290 spin_lock(&nilfs->ns_inode_lock); 2291 if (!test_bit(NILFS_I_QUEUED, &ii->i_state) && 2292 !test_bit(NILFS_I_BUSY, &ii->i_state)) { 2293 spin_unlock(&nilfs->ns_inode_lock); 2294 nilfs_transaction_unlock(sb); 2295 return 0; 2296 } 2297 spin_unlock(&nilfs->ns_inode_lock); 2298 sci->sc_dsync_inode = ii; 2299 sci->sc_dsync_start = start; 2300 sci->sc_dsync_end = end; 2301 2302 err = nilfs_segctor_do_construct(sci, SC_LSEG_DSYNC); 2303 if (!err) 2304 nilfs->ns_flushed_device = 0; 2305 2306 nilfs_transaction_unlock(sb); 2307 return err; 2308 } 2309 2310 #define FLUSH_FILE_BIT (0x1) /* data file only */ 2311 #define FLUSH_DAT_BIT (1 << NILFS_DAT_INO) /* DAT only */ 2312 2313 /** 2314 * nilfs_segctor_accept - record accepted sequence count of log-write requests 2315 * @sci: segment constructor object 2316 */ 2317 static void nilfs_segctor_accept(struct nilfs_sc_info *sci) 2318 { 2319 spin_lock(&sci->sc_state_lock); 2320 sci->sc_seq_accepted = sci->sc_seq_request; 2321 spin_unlock(&sci->sc_state_lock); 2322 del_timer_sync(&sci->sc_timer); 2323 } 2324 2325 /** 2326 * nilfs_segctor_notify - notify the result of request to caller threads 2327 * @sci: segment constructor object 2328 * @mode: mode of log forming 2329 * @err: error code to be notified 2330 */ 2331 static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) 2332 { 2333 /* Clear requests (even when the construction failed) */ 2334 spin_lock(&sci->sc_state_lock); 2335 2336 if (mode == SC_LSEG_SR) { 2337 sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; 2338 sci->sc_seq_done = sci->sc_seq_accepted; 2339 nilfs_segctor_wakeup(sci, err); 2340 sci->sc_flush_request = 0; 2341 } else { 2342 if (mode == SC_FLUSH_FILE) 2343 sci->sc_flush_request &= ~FLUSH_FILE_BIT; 2344 else if (mode == SC_FLUSH_DAT) 2345 sci->sc_flush_request &= ~FLUSH_DAT_BIT; 2346 2347 /* re-enable timer if checkpoint creation was not done */ 2348 if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2349 time_before(jiffies, sci->sc_timer.expires)) 2350 add_timer(&sci->sc_timer); 2351 } 2352 spin_unlock(&sci->sc_state_lock); 2353 } 2354 2355 /** 2356 * nilfs_segctor_construct - form logs and write them to disk 2357 * @sci: segment constructor object 2358 * @mode: mode of log forming 2359 */ 2360 static int nilfs_segctor_construct(struct nilfs_sc_info *sci, int mode) 2361 { 2362 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2363 struct nilfs_super_block **sbp; 2364 int err = 0; 2365 2366 nilfs_segctor_accept(sci); 2367 2368 if (nilfs_discontinued(nilfs)) 2369 mode = SC_LSEG_SR; 2370 if (!nilfs_segctor_confirm(sci)) 2371 err = nilfs_segctor_do_construct(sci, mode); 2372 2373 if (likely(!err)) { 2374 if (mode != SC_FLUSH_DAT) 2375 atomic_set(&nilfs->ns_ndirtyblks, 0); 2376 if (test_bit(NILFS_SC_SUPER_ROOT, &sci->sc_flags) && 2377 nilfs_discontinued(nilfs)) { 2378 down_write(&nilfs->ns_sem); 2379 err = -EIO; 2380 sbp = nilfs_prepare_super(sci->sc_super, 2381 nilfs_sb_will_flip(nilfs)); 2382 if (likely(sbp)) { 2383 nilfs_set_log_cursor(sbp[0], nilfs); 2384 err = nilfs_commit_super(sci->sc_super, 2385 NILFS_SB_COMMIT); 2386 } 2387 up_write(&nilfs->ns_sem); 2388 } 2389 } 2390 2391 nilfs_segctor_notify(sci, mode, err); 2392 return err; 2393 } 2394 2395 static void nilfs_construction_timeout(unsigned long data) 2396 { 2397 struct task_struct *p = (struct task_struct *)data; 2398 wake_up_process(p); 2399 } 2400 2401 static void 2402 nilfs_remove_written_gcinodes(struct the_nilfs *nilfs, struct list_head *head) 2403 { 2404 struct nilfs_inode_info *ii, *n; 2405 2406 list_for_each_entry_safe(ii, n, head, i_dirty) { 2407 if (!test_bit(NILFS_I_UPDATED, &ii->i_state)) 2408 continue; 2409 list_del_init(&ii->i_dirty); 2410 truncate_inode_pages(&ii->vfs_inode.i_data, 0); 2411 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 2412 iput(&ii->vfs_inode); 2413 } 2414 } 2415 2416 int nilfs_clean_segments(struct super_block *sb, struct nilfs_argv *argv, 2417 void **kbufs) 2418 { 2419 struct the_nilfs *nilfs = sb->s_fs_info; 2420 struct nilfs_sc_info *sci = nilfs->ns_writer; 2421 struct nilfs_transaction_info ti; 2422 int err; 2423 2424 if (unlikely(!sci)) 2425 return -EROFS; 2426 2427 nilfs_transaction_lock(sb, &ti, 1); 2428 2429 err = nilfs_mdt_save_to_shadow_map(nilfs->ns_dat); 2430 if (unlikely(err)) 2431 goto out_unlock; 2432 2433 err = nilfs_ioctl_prepare_clean_segments(nilfs, argv, kbufs); 2434 if (unlikely(err)) { 2435 nilfs_mdt_restore_from_shadow_map(nilfs->ns_dat); 2436 goto out_unlock; 2437 } 2438 2439 sci->sc_freesegs = kbufs[4]; 2440 sci->sc_nfreesegs = argv[4].v_nmembs; 2441 list_splice_tail_init(&nilfs->ns_gc_inodes, &sci->sc_gc_inodes); 2442 2443 for (;;) { 2444 err = nilfs_segctor_construct(sci, SC_LSEG_SR); 2445 nilfs_remove_written_gcinodes(nilfs, &sci->sc_gc_inodes); 2446 2447 if (likely(!err)) 2448 break; 2449 2450 nilfs_warning(sb, __func__, 2451 "segment construction failed. (err=%d)", err); 2452 set_current_state(TASK_INTERRUPTIBLE); 2453 schedule_timeout(sci->sc_interval); 2454 } 2455 if (nilfs_test_opt(nilfs, DISCARD)) { 2456 int ret = nilfs_discard_segments(nilfs, sci->sc_freesegs, 2457 sci->sc_nfreesegs); 2458 if (ret) { 2459 printk(KERN_WARNING 2460 "NILFS warning: error %d on discard request, " 2461 "turning discards off for the device\n", ret); 2462 nilfs_clear_opt(nilfs, DISCARD); 2463 } 2464 } 2465 2466 out_unlock: 2467 sci->sc_freesegs = NULL; 2468 sci->sc_nfreesegs = 0; 2469 nilfs_mdt_clear_shadow_map(nilfs->ns_dat); 2470 nilfs_transaction_unlock(sb); 2471 return err; 2472 } 2473 2474 static void nilfs_segctor_thread_construct(struct nilfs_sc_info *sci, int mode) 2475 { 2476 struct nilfs_transaction_info ti; 2477 2478 nilfs_transaction_lock(sci->sc_super, &ti, 0); 2479 nilfs_segctor_construct(sci, mode); 2480 2481 /* 2482 * Unclosed segment should be retried. We do this using sc_timer. 2483 * Timeout of sc_timer will invoke complete construction which leads 2484 * to close the current logical segment. 2485 */ 2486 if (test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags)) 2487 nilfs_segctor_start_timer(sci); 2488 2489 nilfs_transaction_unlock(sci->sc_super); 2490 } 2491 2492 static void nilfs_segctor_do_immediate_flush(struct nilfs_sc_info *sci) 2493 { 2494 int mode = 0; 2495 2496 spin_lock(&sci->sc_state_lock); 2497 mode = (sci->sc_flush_request & FLUSH_DAT_BIT) ? 2498 SC_FLUSH_DAT : SC_FLUSH_FILE; 2499 spin_unlock(&sci->sc_state_lock); 2500 2501 if (mode) { 2502 nilfs_segctor_do_construct(sci, mode); 2503 2504 spin_lock(&sci->sc_state_lock); 2505 sci->sc_flush_request &= (mode == SC_FLUSH_FILE) ? 2506 ~FLUSH_FILE_BIT : ~FLUSH_DAT_BIT; 2507 spin_unlock(&sci->sc_state_lock); 2508 } 2509 clear_bit(NILFS_SC_PRIOR_FLUSH, &sci->sc_flags); 2510 } 2511 2512 static int nilfs_segctor_flush_mode(struct nilfs_sc_info *sci) 2513 { 2514 if (!test_bit(NILFS_SC_UNCLOSED, &sci->sc_flags) || 2515 time_before(jiffies, sci->sc_lseg_stime + sci->sc_mjcp_freq)) { 2516 if (!(sci->sc_flush_request & ~FLUSH_FILE_BIT)) 2517 return SC_FLUSH_FILE; 2518 else if (!(sci->sc_flush_request & ~FLUSH_DAT_BIT)) 2519 return SC_FLUSH_DAT; 2520 } 2521 return SC_LSEG_SR; 2522 } 2523 2524 /** 2525 * nilfs_segctor_thread - main loop of the segment constructor thread. 2526 * @arg: pointer to a struct nilfs_sc_info. 2527 * 2528 * nilfs_segctor_thread() initializes a timer and serves as a daemon 2529 * to execute segment constructions. 2530 */ 2531 static int nilfs_segctor_thread(void *arg) 2532 { 2533 struct nilfs_sc_info *sci = (struct nilfs_sc_info *)arg; 2534 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2535 int timeout = 0; 2536 2537 sci->sc_timer.data = (unsigned long)current; 2538 sci->sc_timer.function = nilfs_construction_timeout; 2539 2540 /* start sync. */ 2541 sci->sc_task = current; 2542 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_start_thread() */ 2543 printk(KERN_INFO 2544 "segctord starting. Construction interval = %lu seconds, " 2545 "CP frequency < %lu seconds\n", 2546 sci->sc_interval / HZ, sci->sc_mjcp_freq / HZ); 2547 2548 spin_lock(&sci->sc_state_lock); 2549 loop: 2550 for (;;) { 2551 int mode; 2552 2553 if (sci->sc_state & NILFS_SEGCTOR_QUIT) 2554 goto end_thread; 2555 2556 if (timeout || sci->sc_seq_request != sci->sc_seq_done) 2557 mode = SC_LSEG_SR; 2558 else if (!sci->sc_flush_request) 2559 break; 2560 else 2561 mode = nilfs_segctor_flush_mode(sci); 2562 2563 spin_unlock(&sci->sc_state_lock); 2564 nilfs_segctor_thread_construct(sci, mode); 2565 spin_lock(&sci->sc_state_lock); 2566 timeout = 0; 2567 } 2568 2569 2570 if (freezing(current)) { 2571 spin_unlock(&sci->sc_state_lock); 2572 try_to_freeze(); 2573 spin_lock(&sci->sc_state_lock); 2574 } else { 2575 DEFINE_WAIT(wait); 2576 int should_sleep = 1; 2577 2578 prepare_to_wait(&sci->sc_wait_daemon, &wait, 2579 TASK_INTERRUPTIBLE); 2580 2581 if (sci->sc_seq_request != sci->sc_seq_done) 2582 should_sleep = 0; 2583 else if (sci->sc_flush_request) 2584 should_sleep = 0; 2585 else if (sci->sc_state & NILFS_SEGCTOR_COMMIT) 2586 should_sleep = time_before(jiffies, 2587 sci->sc_timer.expires); 2588 2589 if (should_sleep) { 2590 spin_unlock(&sci->sc_state_lock); 2591 schedule(); 2592 spin_lock(&sci->sc_state_lock); 2593 } 2594 finish_wait(&sci->sc_wait_daemon, &wait); 2595 timeout = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && 2596 time_after_eq(jiffies, sci->sc_timer.expires)); 2597 2598 if (nilfs_sb_dirty(nilfs) && nilfs_sb_need_update(nilfs)) 2599 set_nilfs_discontinued(nilfs); 2600 } 2601 goto loop; 2602 2603 end_thread: 2604 spin_unlock(&sci->sc_state_lock); 2605 2606 /* end sync. */ 2607 sci->sc_task = NULL; 2608 wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ 2609 return 0; 2610 } 2611 2612 static int nilfs_segctor_start_thread(struct nilfs_sc_info *sci) 2613 { 2614 struct task_struct *t; 2615 2616 t = kthread_run(nilfs_segctor_thread, sci, "segctord"); 2617 if (IS_ERR(t)) { 2618 int err = PTR_ERR(t); 2619 2620 printk(KERN_ERR "NILFS: error %d creating segctord thread\n", 2621 err); 2622 return err; 2623 } 2624 wait_event(sci->sc_wait_task, sci->sc_task != NULL); 2625 return 0; 2626 } 2627 2628 static void nilfs_segctor_kill_thread(struct nilfs_sc_info *sci) 2629 __acquires(&sci->sc_state_lock) 2630 __releases(&sci->sc_state_lock) 2631 { 2632 sci->sc_state |= NILFS_SEGCTOR_QUIT; 2633 2634 while (sci->sc_task) { 2635 wake_up(&sci->sc_wait_daemon); 2636 spin_unlock(&sci->sc_state_lock); 2637 wait_event(sci->sc_wait_task, sci->sc_task == NULL); 2638 spin_lock(&sci->sc_state_lock); 2639 } 2640 } 2641 2642 /* 2643 * Setup & clean-up functions 2644 */ 2645 static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, 2646 struct nilfs_root *root) 2647 { 2648 struct the_nilfs *nilfs = sb->s_fs_info; 2649 struct nilfs_sc_info *sci; 2650 2651 sci = kzalloc(sizeof(*sci), GFP_KERNEL); 2652 if (!sci) 2653 return NULL; 2654 2655 sci->sc_super = sb; 2656 2657 nilfs_get_root(root); 2658 sci->sc_root = root; 2659 2660 init_waitqueue_head(&sci->sc_wait_request); 2661 init_waitqueue_head(&sci->sc_wait_daemon); 2662 init_waitqueue_head(&sci->sc_wait_task); 2663 spin_lock_init(&sci->sc_state_lock); 2664 INIT_LIST_HEAD(&sci->sc_dirty_files); 2665 INIT_LIST_HEAD(&sci->sc_segbufs); 2666 INIT_LIST_HEAD(&sci->sc_write_logs); 2667 INIT_LIST_HEAD(&sci->sc_gc_inodes); 2668 INIT_LIST_HEAD(&sci->sc_iput_queue); 2669 INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); 2670 init_timer(&sci->sc_timer); 2671 2672 sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; 2673 sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; 2674 sci->sc_watermark = NILFS_SC_DEFAULT_WATERMARK; 2675 2676 if (nilfs->ns_interval) 2677 sci->sc_interval = HZ * nilfs->ns_interval; 2678 if (nilfs->ns_watermark) 2679 sci->sc_watermark = nilfs->ns_watermark; 2680 return sci; 2681 } 2682 2683 static void nilfs_segctor_write_out(struct nilfs_sc_info *sci) 2684 { 2685 int ret, retrycount = NILFS_SC_CLEANUP_RETRY; 2686 2687 /* The segctord thread was stopped and its timer was removed. 2688 But some tasks remain. */ 2689 do { 2690 struct nilfs_transaction_info ti; 2691 2692 nilfs_transaction_lock(sci->sc_super, &ti, 0); 2693 ret = nilfs_segctor_construct(sci, SC_LSEG_SR); 2694 nilfs_transaction_unlock(sci->sc_super); 2695 2696 flush_work(&sci->sc_iput_work); 2697 2698 } while (ret && retrycount-- > 0); 2699 } 2700 2701 /** 2702 * nilfs_segctor_destroy - destroy the segment constructor. 2703 * @sci: nilfs_sc_info 2704 * 2705 * nilfs_segctor_destroy() kills the segctord thread and frees 2706 * the nilfs_sc_info struct. 2707 * Caller must hold the segment semaphore. 2708 */ 2709 static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) 2710 { 2711 struct the_nilfs *nilfs = sci->sc_super->s_fs_info; 2712 int flag; 2713 2714 up_write(&nilfs->ns_segctor_sem); 2715 2716 spin_lock(&sci->sc_state_lock); 2717 nilfs_segctor_kill_thread(sci); 2718 flag = ((sci->sc_state & NILFS_SEGCTOR_COMMIT) || sci->sc_flush_request 2719 || sci->sc_seq_request != sci->sc_seq_done); 2720 spin_unlock(&sci->sc_state_lock); 2721 2722 if (flush_work(&sci->sc_iput_work)) 2723 flag = true; 2724 2725 if (flag || !nilfs_segctor_confirm(sci)) 2726 nilfs_segctor_write_out(sci); 2727 2728 if (!list_empty(&sci->sc_dirty_files)) { 2729 nilfs_warning(sci->sc_super, __func__, 2730 "dirty file(s) after the final construction\n"); 2731 nilfs_dispose_list(nilfs, &sci->sc_dirty_files, 1); 2732 } 2733 2734 if (!list_empty(&sci->sc_iput_queue)) { 2735 nilfs_warning(sci->sc_super, __func__, 2736 "iput queue is not empty\n"); 2737 nilfs_dispose_list(nilfs, &sci->sc_iput_queue, 1); 2738 } 2739 2740 WARN_ON(!list_empty(&sci->sc_segbufs)); 2741 WARN_ON(!list_empty(&sci->sc_write_logs)); 2742 2743 nilfs_put_root(sci->sc_root); 2744 2745 down_write(&nilfs->ns_segctor_sem); 2746 2747 del_timer_sync(&sci->sc_timer); 2748 kfree(sci); 2749 } 2750 2751 /** 2752 * nilfs_attach_log_writer - attach log writer 2753 * @sb: super block instance 2754 * @root: root object of the current filesystem tree 2755 * 2756 * This allocates a log writer object, initializes it, and starts the 2757 * log writer. 2758 * 2759 * Return Value: On success, 0 is returned. On error, one of the following 2760 * negative error code is returned. 2761 * 2762 * %-ENOMEM - Insufficient memory available. 2763 */ 2764 int nilfs_attach_log_writer(struct super_block *sb, struct nilfs_root *root) 2765 { 2766 struct the_nilfs *nilfs = sb->s_fs_info; 2767 int err; 2768 2769 if (nilfs->ns_writer) { 2770 /* 2771 * This happens if the filesystem was remounted 2772 * read/write after nilfs_error degenerated it into a 2773 * read-only mount. 2774 */ 2775 nilfs_detach_log_writer(sb); 2776 } 2777 2778 nilfs->ns_writer = nilfs_segctor_new(sb, root); 2779 if (!nilfs->ns_writer) 2780 return -ENOMEM; 2781 2782 err = nilfs_segctor_start_thread(nilfs->ns_writer); 2783 if (err) { 2784 kfree(nilfs->ns_writer); 2785 nilfs->ns_writer = NULL; 2786 } 2787 return err; 2788 } 2789 2790 /** 2791 * nilfs_detach_log_writer - destroy log writer 2792 * @sb: super block instance 2793 * 2794 * This kills log writer daemon, frees the log writer object, and 2795 * destroys list of dirty files. 2796 */ 2797 void nilfs_detach_log_writer(struct super_block *sb) 2798 { 2799 struct the_nilfs *nilfs = sb->s_fs_info; 2800 LIST_HEAD(garbage_list); 2801 2802 down_write(&nilfs->ns_segctor_sem); 2803 if (nilfs->ns_writer) { 2804 nilfs_segctor_destroy(nilfs->ns_writer); 2805 nilfs->ns_writer = NULL; 2806 } 2807 2808 /* Force to free the list of dirty files */ 2809 spin_lock(&nilfs->ns_inode_lock); 2810 if (!list_empty(&nilfs->ns_dirty_files)) { 2811 list_splice_init(&nilfs->ns_dirty_files, &garbage_list); 2812 nilfs_warning(sb, __func__, 2813 "Hit dirty file after stopped log writer\n"); 2814 } 2815 spin_unlock(&nilfs->ns_inode_lock); 2816 up_write(&nilfs->ns_segctor_sem); 2817 2818 nilfs_dispose_list(nilfs, &garbage_list, 1); 2819 } 2820