1 /* 2 * mdt.c - meta data file for NILFS 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net> 21 */ 22 23 #include <linux/buffer_head.h> 24 #include <linux/mpage.h> 25 #include <linux/mm.h> 26 #include <linux/writeback.h> 27 #include <linux/backing-dev.h> 28 #include <linux/swap.h> 29 #include <linux/slab.h> 30 #include "nilfs.h" 31 #include "btnode.h" 32 #include "segment.h" 33 #include "page.h" 34 #include "mdt.h" 35 36 37 #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) 38 39 #define INIT_UNUSED_INODE_FIELDS 40 41 static int 42 nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, 43 struct buffer_head *bh, 44 void (*init_block)(struct inode *, 45 struct buffer_head *, void *)) 46 { 47 struct nilfs_inode_info *ii = NILFS_I(inode); 48 void *kaddr; 49 int ret; 50 51 /* Caller exclude read accesses using page lock */ 52 53 /* set_buffer_new(bh); */ 54 bh->b_blocknr = 0; 55 56 ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); 57 if (unlikely(ret)) 58 return ret; 59 60 set_buffer_mapped(bh); 61 62 kaddr = kmap_atomic(bh->b_page, KM_USER0); 63 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); 64 if (init_block) 65 init_block(inode, bh, kaddr); 66 flush_dcache_page(bh->b_page); 67 kunmap_atomic(kaddr, KM_USER0); 68 69 set_buffer_uptodate(bh); 70 nilfs_mark_buffer_dirty(bh); 71 nilfs_mdt_mark_dirty(inode); 72 return 0; 73 } 74 75 static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, 76 struct buffer_head **out_bh, 77 void (*init_block)(struct inode *, 78 struct buffer_head *, 79 void *)) 80 { 81 struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; 82 struct super_block *sb = inode->i_sb; 83 struct nilfs_transaction_info ti; 84 struct buffer_head *bh; 85 int err; 86 87 if (!sb) { 88 /* 89 * Make sure this function is not called from any 90 * read-only context. 91 */ 92 if (!nilfs->ns_writer) { 93 WARN_ON(1); 94 err = -EROFS; 95 goto out; 96 } 97 sb = nilfs->ns_writer->s_super; 98 } 99 100 nilfs_transaction_begin(sb, &ti, 0); 101 102 err = -ENOMEM; 103 bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); 104 if (unlikely(!bh)) 105 goto failed_unlock; 106 107 err = -EEXIST; 108 if (buffer_uptodate(bh)) 109 goto failed_bh; 110 111 wait_on_buffer(bh); 112 if (buffer_uptodate(bh)) 113 goto failed_bh; 114 115 bh->b_bdev = nilfs->ns_bdev; 116 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); 117 if (likely(!err)) { 118 get_bh(bh); 119 *out_bh = bh; 120 } 121 122 failed_bh: 123 unlock_page(bh->b_page); 124 page_cache_release(bh->b_page); 125 brelse(bh); 126 127 failed_unlock: 128 if (likely(!err)) 129 err = nilfs_transaction_commit(sb); 130 else 131 nilfs_transaction_abort(sb); 132 out: 133 return err; 134 } 135 136 static int 137 nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, 138 int mode, struct buffer_head **out_bh) 139 { 140 struct buffer_head *bh; 141 __u64 blknum = 0; 142 int ret = -ENOMEM; 143 144 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); 145 if (unlikely(!bh)) 146 goto failed; 147 148 ret = -EEXIST; /* internal code */ 149 if (buffer_uptodate(bh)) 150 goto out; 151 152 if (mode == READA) { 153 if (!trylock_buffer(bh)) { 154 ret = -EBUSY; 155 goto failed_bh; 156 } 157 } else /* mode == READ */ 158 lock_buffer(bh); 159 160 if (buffer_uptodate(bh)) { 161 unlock_buffer(bh); 162 goto out; 163 } 164 165 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, &blknum); 166 if (unlikely(ret)) { 167 unlock_buffer(bh); 168 goto failed_bh; 169 } 170 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; 171 bh->b_blocknr = (sector_t)blknum; 172 set_buffer_mapped(bh); 173 174 bh->b_end_io = end_buffer_read_sync; 175 get_bh(bh); 176 submit_bh(mode, bh); 177 ret = 0; 178 out: 179 get_bh(bh); 180 *out_bh = bh; 181 182 failed_bh: 183 unlock_page(bh->b_page); 184 page_cache_release(bh->b_page); 185 brelse(bh); 186 failed: 187 return ret; 188 } 189 190 static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, 191 int readahead, struct buffer_head **out_bh) 192 { 193 struct buffer_head *first_bh, *bh; 194 unsigned long blkoff; 195 int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; 196 int err; 197 198 err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); 199 if (err == -EEXIST) /* internal code */ 200 goto out; 201 202 if (unlikely(err)) 203 goto failed; 204 205 if (readahead) { 206 blkoff = block + 1; 207 for (i = 0; i < nr_ra_blocks; i++, blkoff++) { 208 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); 209 if (likely(!err || err == -EEXIST)) 210 brelse(bh); 211 else if (err != -EBUSY) 212 break; 213 /* abort readahead if bmap lookup failed */ 214 if (!buffer_locked(first_bh)) 215 goto out_no_wait; 216 } 217 } 218 219 wait_on_buffer(first_bh); 220 221 out_no_wait: 222 err = -EIO; 223 if (!buffer_uptodate(first_bh)) 224 goto failed_bh; 225 out: 226 *out_bh = first_bh; 227 return 0; 228 229 failed_bh: 230 brelse(first_bh); 231 failed: 232 return err; 233 } 234 235 /** 236 * nilfs_mdt_get_block - read or create a buffer on meta data file. 237 * @inode: inode of the meta data file 238 * @blkoff: block offset 239 * @create: create flag 240 * @init_block: initializer used for newly allocated block 241 * @out_bh: output of a pointer to the buffer_head 242 * 243 * nilfs_mdt_get_block() looks up the specified buffer and tries to create 244 * a new buffer if @create is not zero. On success, the returned buffer is 245 * assured to be either existing or formatted using a buffer lock on success. 246 * @out_bh is substituted only when zero is returned. 247 * 248 * Return Value: On success, it returns 0. On error, the following negative 249 * error code is returned. 250 * 251 * %-ENOMEM - Insufficient memory available. 252 * 253 * %-EIO - I/O error 254 * 255 * %-ENOENT - the specified block does not exist (hole block) 256 * 257 * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) 258 * 259 * %-EROFS - Read only filesystem (for create mode) 260 */ 261 int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, 262 void (*init_block)(struct inode *, 263 struct buffer_head *, void *), 264 struct buffer_head **out_bh) 265 { 266 int ret; 267 268 /* Should be rewritten with merging nilfs_mdt_read_block() */ 269 retry: 270 ret = nilfs_mdt_read_block(inode, blkoff, !create, out_bh); 271 if (!create || ret != -ENOENT) 272 return ret; 273 274 ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); 275 if (unlikely(ret == -EEXIST)) { 276 /* create = 0; */ /* limit read-create loop retries */ 277 goto retry; 278 } 279 return ret; 280 } 281 282 /** 283 * nilfs_mdt_delete_block - make a hole on the meta data file. 284 * @inode: inode of the meta data file 285 * @block: block offset 286 * 287 * Return Value: On success, zero is returned. 288 * On error, one of the following negative error code is returned. 289 * 290 * %-ENOMEM - Insufficient memory available. 291 * 292 * %-EIO - I/O error 293 * 294 * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) 295 */ 296 int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) 297 { 298 struct nilfs_inode_info *ii = NILFS_I(inode); 299 int err; 300 301 err = nilfs_bmap_delete(ii->i_bmap, block); 302 if (!err || err == -ENOENT) { 303 nilfs_mdt_mark_dirty(inode); 304 nilfs_mdt_forget_block(inode, block); 305 } 306 return err; 307 } 308 309 /** 310 * nilfs_mdt_forget_block - discard dirty state and try to remove the page 311 * @inode: inode of the meta data file 312 * @block: block offset 313 * 314 * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and 315 * tries to release the page including the buffer from a page cache. 316 * 317 * Return Value: On success, 0 is returned. On error, one of the following 318 * negative error code is returned. 319 * 320 * %-EBUSY - page has an active buffer. 321 * 322 * %-ENOENT - page cache has no page addressed by the offset. 323 */ 324 int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) 325 { 326 pgoff_t index = (pgoff_t)block >> 327 (PAGE_CACHE_SHIFT - inode->i_blkbits); 328 struct page *page; 329 unsigned long first_block; 330 int ret = 0; 331 int still_dirty; 332 333 page = find_lock_page(inode->i_mapping, index); 334 if (!page) 335 return -ENOENT; 336 337 wait_on_page_writeback(page); 338 339 first_block = (unsigned long)index << 340 (PAGE_CACHE_SHIFT - inode->i_blkbits); 341 if (page_has_buffers(page)) { 342 struct buffer_head *bh; 343 344 bh = nilfs_page_get_nth_block(page, block - first_block); 345 nilfs_forget_buffer(bh); 346 } 347 still_dirty = PageDirty(page); 348 unlock_page(page); 349 page_cache_release(page); 350 351 if (still_dirty || 352 invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) 353 ret = -EBUSY; 354 return ret; 355 } 356 357 /** 358 * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty. 359 * @inode: inode of the meta data file 360 * @block: block offset 361 * 362 * Return Value: On success, it returns 0. On error, the following negative 363 * error code is returned. 364 * 365 * %-ENOMEM - Insufficient memory available. 366 * 367 * %-EIO - I/O error 368 * 369 * %-ENOENT - the specified block does not exist (hole block) 370 * 371 * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) 372 */ 373 int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) 374 { 375 struct buffer_head *bh; 376 int err; 377 378 err = nilfs_mdt_read_block(inode, block, 0, &bh); 379 if (unlikely(err)) 380 return err; 381 nilfs_mark_buffer_dirty(bh); 382 nilfs_mdt_mark_dirty(inode); 383 brelse(bh); 384 return 0; 385 } 386 387 int nilfs_mdt_fetch_dirty(struct inode *inode) 388 { 389 struct nilfs_inode_info *ii = NILFS_I(inode); 390 391 if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { 392 set_bit(NILFS_I_DIRTY, &ii->i_state); 393 return 1; 394 } 395 return test_bit(NILFS_I_DIRTY, &ii->i_state); 396 } 397 398 static int 399 nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) 400 { 401 struct inode *inode = container_of(page->mapping, 402 struct inode, i_data); 403 struct super_block *sb = inode->i_sb; 404 struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; 405 struct nilfs_sb_info *writer = NULL; 406 int err = 0; 407 408 redirty_page_for_writepage(wbc, page); 409 unlock_page(page); 410 411 if (page->mapping->assoc_mapping) 412 return 0; /* Do not request flush for shadow page cache */ 413 if (!sb) { 414 down_read(&nilfs->ns_writer_sem); 415 writer = nilfs->ns_writer; 416 if (!writer) { 417 up_read(&nilfs->ns_writer_sem); 418 return -EROFS; 419 } 420 sb = writer->s_super; 421 } 422 423 if (wbc->sync_mode == WB_SYNC_ALL) 424 err = nilfs_construct_segment(sb); 425 else if (wbc->for_reclaim) 426 nilfs_flush_segment(sb, inode->i_ino); 427 428 if (writer) 429 up_read(&nilfs->ns_writer_sem); 430 return err; 431 } 432 433 434 static const struct address_space_operations def_mdt_aops = { 435 .writepage = nilfs_mdt_write_page, 436 .sync_page = block_sync_page, 437 }; 438 439 static const struct inode_operations def_mdt_iops; 440 static const struct file_operations def_mdt_fops; 441 442 /* 443 * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, 444 * ifile, or gcinodes. This allows the B-tree code and segment constructor 445 * to treat them like regular files, and this helps to simplify the 446 * implementation. 447 * On the other hand, some of the pseudo inodes have an irregular point: 448 * They don't have valid inode->i_sb pointer because their lifetimes are 449 * longer than those of the super block structs; they may continue for 450 * several consecutive mounts/umounts. This would need discussions. 451 */ 452 /** 453 * nilfs_mdt_new_common - allocate a pseudo inode for metadata file 454 * @nilfs: nilfs object 455 * @sb: super block instance the metadata file belongs to 456 * @ino: inode number 457 * @gfp_mask: gfp mask for data pages 458 * @objsz: size of the private object attached to inode->i_private 459 */ 460 struct inode * 461 nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, 462 ino_t ino, gfp_t gfp_mask, size_t objsz) 463 { 464 struct inode *inode = nilfs_alloc_inode_common(nilfs); 465 466 if (!inode) 467 return NULL; 468 else { 469 struct address_space * const mapping = &inode->i_data; 470 struct nilfs_mdt_info *mi; 471 472 mi = kzalloc(max(sizeof(*mi), objsz), GFP_NOFS); 473 if (!mi) { 474 nilfs_destroy_inode(inode); 475 return NULL; 476 } 477 mi->mi_nilfs = nilfs; 478 init_rwsem(&mi->mi_sem); 479 480 inode->i_sb = sb; /* sb may be NULL for some meta data files */ 481 inode->i_blkbits = nilfs->ns_blocksize_bits; 482 inode->i_flags = 0; 483 atomic_set(&inode->i_count, 1); 484 inode->i_nlink = 1; 485 inode->i_ino = ino; 486 inode->i_mode = S_IFREG; 487 inode->i_private = mi; 488 489 #ifdef INIT_UNUSED_INODE_FIELDS 490 atomic_set(&inode->i_writecount, 0); 491 inode->i_size = 0; 492 inode->i_blocks = 0; 493 inode->i_bytes = 0; 494 inode->i_generation = 0; 495 #ifdef CONFIG_QUOTA 496 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 497 #endif 498 inode->i_pipe = NULL; 499 inode->i_bdev = NULL; 500 inode->i_cdev = NULL; 501 inode->i_rdev = 0; 502 #ifdef CONFIG_SECURITY 503 inode->i_security = NULL; 504 #endif 505 inode->dirtied_when = 0; 506 507 INIT_LIST_HEAD(&inode->i_list); 508 INIT_LIST_HEAD(&inode->i_sb_list); 509 inode->i_state = 0; 510 #endif 511 512 spin_lock_init(&inode->i_lock); 513 mutex_init(&inode->i_mutex); 514 init_rwsem(&inode->i_alloc_sem); 515 516 mapping->host = NULL; /* instead of inode */ 517 mapping->flags = 0; 518 mapping_set_gfp_mask(mapping, gfp_mask); 519 mapping->assoc_mapping = NULL; 520 mapping->backing_dev_info = nilfs->ns_bdi; 521 522 inode->i_mapping = mapping; 523 } 524 525 return inode; 526 } 527 528 struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, 529 ino_t ino, size_t objsz) 530 { 531 struct inode *inode; 532 533 inode = nilfs_mdt_new_common(nilfs, sb, ino, NILFS_MDT_GFP, objsz); 534 if (!inode) 535 return NULL; 536 537 inode->i_op = &def_mdt_iops; 538 inode->i_fop = &def_mdt_fops; 539 inode->i_mapping->a_ops = &def_mdt_aops; 540 return inode; 541 } 542 543 void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, 544 unsigned header_size) 545 { 546 struct nilfs_mdt_info *mi = NILFS_MDT(inode); 547 548 mi->mi_entry_size = entry_size; 549 mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; 550 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); 551 } 552 553 void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) 554 { 555 shadow->i_mapping->assoc_mapping = orig->i_mapping; 556 NILFS_I(shadow)->i_btnode_cache.assoc_mapping = 557 &NILFS_I(orig)->i_btnode_cache; 558 } 559 560 static void nilfs_mdt_clear(struct inode *inode) 561 { 562 struct nilfs_inode_info *ii = NILFS_I(inode); 563 564 invalidate_mapping_pages(inode->i_mapping, 0, -1); 565 truncate_inode_pages(inode->i_mapping, 0); 566 567 if (test_bit(NILFS_I_BMAP, &ii->i_state)) 568 nilfs_bmap_clear(ii->i_bmap); 569 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 570 } 571 572 void nilfs_mdt_destroy(struct inode *inode) 573 { 574 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 575 576 if (mdi->mi_palloc_cache) 577 nilfs_palloc_destroy_cache(inode); 578 nilfs_mdt_clear(inode); 579 580 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 581 kfree(mdi); 582 nilfs_destroy_inode(inode); 583 } 584