1 /* 2 * mdt.c - meta data file for NILFS 3 * 4 * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 19 * 20 * Written by Ryusuke Konishi <ryusuke@osrg.net> 21 */ 22 23 #include <linux/buffer_head.h> 24 #include <linux/mpage.h> 25 #include <linux/mm.h> 26 #include <linux/writeback.h> 27 #include <linux/backing-dev.h> 28 #include <linux/swap.h> 29 #include "nilfs.h" 30 #include "segment.h" 31 #include "page.h" 32 #include "mdt.h" 33 34 35 #define NILFS_MDT_MAX_RA_BLOCKS (16 - 1) 36 37 #define INIT_UNUSED_INODE_FIELDS 38 39 static int 40 nilfs_mdt_insert_new_block(struct inode *inode, unsigned long block, 41 struct buffer_head *bh, 42 void (*init_block)(struct inode *, 43 struct buffer_head *, void *)) 44 { 45 struct nilfs_inode_info *ii = NILFS_I(inode); 46 void *kaddr; 47 int ret; 48 49 /* Caller exclude read accesses using page lock */ 50 51 /* set_buffer_new(bh); */ 52 bh->b_blocknr = 0; 53 54 ret = nilfs_bmap_insert(ii->i_bmap, block, (unsigned long)bh); 55 if (unlikely(ret)) 56 return ret; 57 58 set_buffer_mapped(bh); 59 60 kaddr = kmap_atomic(bh->b_page, KM_USER0); 61 memset(kaddr + bh_offset(bh), 0, 1 << inode->i_blkbits); 62 if (init_block) 63 init_block(inode, bh, kaddr); 64 flush_dcache_page(bh->b_page); 65 kunmap_atomic(kaddr, KM_USER0); 66 67 set_buffer_uptodate(bh); 68 nilfs_mark_buffer_dirty(bh); 69 nilfs_mdt_mark_dirty(inode); 70 return 0; 71 } 72 73 static int nilfs_mdt_create_block(struct inode *inode, unsigned long block, 74 struct buffer_head **out_bh, 75 void (*init_block)(struct inode *, 76 struct buffer_head *, 77 void *)) 78 { 79 struct the_nilfs *nilfs = NILFS_MDT(inode)->mi_nilfs; 80 struct nilfs_sb_info *writer = NULL; 81 struct super_block *sb = inode->i_sb; 82 struct nilfs_transaction_info ti; 83 struct buffer_head *bh; 84 int err; 85 86 if (!sb) { 87 writer = nilfs_get_writer(nilfs); 88 if (!writer) { 89 err = -EROFS; 90 goto out; 91 } 92 sb = writer->s_super; 93 } 94 95 nilfs_transaction_begin(sb, &ti, 0); 96 97 err = -ENOMEM; 98 bh = nilfs_grab_buffer(inode, inode->i_mapping, block, 0); 99 if (unlikely(!bh)) 100 goto failed_unlock; 101 102 err = -EEXIST; 103 if (buffer_uptodate(bh) || buffer_mapped(bh)) 104 goto failed_bh; 105 #if 0 106 /* The uptodate flag is not protected by the page lock, but 107 the mapped flag is. Thus, we don't have to wait the buffer. */ 108 wait_on_buffer(bh); 109 if (buffer_uptodate(bh)) 110 goto failed_bh; 111 #endif 112 113 bh->b_bdev = nilfs->ns_bdev; 114 err = nilfs_mdt_insert_new_block(inode, block, bh, init_block); 115 if (likely(!err)) { 116 get_bh(bh); 117 *out_bh = bh; 118 } 119 120 failed_bh: 121 unlock_page(bh->b_page); 122 page_cache_release(bh->b_page); 123 brelse(bh); 124 125 failed_unlock: 126 if (likely(!err)) 127 err = nilfs_transaction_commit(sb); 128 else 129 nilfs_transaction_abort(sb); 130 if (writer) 131 nilfs_put_writer(nilfs); 132 out: 133 return err; 134 } 135 136 static int 137 nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, 138 int mode, struct buffer_head **out_bh) 139 { 140 struct buffer_head *bh; 141 unsigned long blknum = 0; 142 int ret = -ENOMEM; 143 144 bh = nilfs_grab_buffer(inode, inode->i_mapping, blkoff, 0); 145 if (unlikely(!bh)) 146 goto failed; 147 148 ret = -EEXIST; /* internal code */ 149 if (buffer_uptodate(bh)) 150 goto out; 151 152 if (mode == READA) { 153 if (!trylock_buffer(bh)) { 154 ret = -EBUSY; 155 goto failed_bh; 156 } 157 } else /* mode == READ */ 158 lock_buffer(bh); 159 160 if (buffer_uptodate(bh)) { 161 unlock_buffer(bh); 162 goto out; 163 } 164 if (!buffer_mapped(bh)) { /* unused buffer */ 165 ret = nilfs_bmap_lookup(NILFS_I(inode)->i_bmap, blkoff, 166 &blknum); 167 if (unlikely(ret)) { 168 unlock_buffer(bh); 169 goto failed_bh; 170 } 171 bh->b_bdev = NILFS_MDT(inode)->mi_nilfs->ns_bdev; 172 bh->b_blocknr = blknum; 173 set_buffer_mapped(bh); 174 } 175 176 bh->b_end_io = end_buffer_read_sync; 177 get_bh(bh); 178 submit_bh(mode, bh); 179 ret = 0; 180 out: 181 get_bh(bh); 182 *out_bh = bh; 183 184 failed_bh: 185 unlock_page(bh->b_page); 186 page_cache_release(bh->b_page); 187 brelse(bh); 188 failed: 189 return ret; 190 } 191 192 static int nilfs_mdt_read_block(struct inode *inode, unsigned long block, 193 struct buffer_head **out_bh) 194 { 195 struct buffer_head *first_bh, *bh; 196 unsigned long blkoff; 197 int i, nr_ra_blocks = NILFS_MDT_MAX_RA_BLOCKS; 198 int err; 199 200 err = nilfs_mdt_submit_block(inode, block, READ, &first_bh); 201 if (err == -EEXIST) /* internal code */ 202 goto out; 203 204 if (unlikely(err)) 205 goto failed; 206 207 blkoff = block + 1; 208 for (i = 0; i < nr_ra_blocks; i++, blkoff++) { 209 err = nilfs_mdt_submit_block(inode, blkoff, READA, &bh); 210 if (likely(!err || err == -EEXIST)) 211 brelse(bh); 212 else if (err != -EBUSY) 213 break; /* abort readahead if bmap lookup failed */ 214 215 if (!buffer_locked(first_bh)) 216 goto out_no_wait; 217 } 218 219 wait_on_buffer(first_bh); 220 221 out_no_wait: 222 err = -EIO; 223 if (!buffer_uptodate(first_bh)) 224 goto failed_bh; 225 out: 226 *out_bh = first_bh; 227 return 0; 228 229 failed_bh: 230 brelse(first_bh); 231 failed: 232 return err; 233 } 234 235 /** 236 * nilfs_mdt_get_block - read or create a buffer on meta data file. 237 * @inode: inode of the meta data file 238 * @blkoff: block offset 239 * @create: create flag 240 * @init_block: initializer used for newly allocated block 241 * @out_bh: output of a pointer to the buffer_head 242 * 243 * nilfs_mdt_get_block() looks up the specified buffer and tries to create 244 * a new buffer if @create is not zero. On success, the returned buffer is 245 * assured to be either existing or formatted using a buffer lock on success. 246 * @out_bh is substituted only when zero is returned. 247 * 248 * Return Value: On success, it returns 0. On error, the following negative 249 * error code is returned. 250 * 251 * %-ENOMEM - Insufficient memory available. 252 * 253 * %-EIO - I/O error 254 * 255 * %-ENOENT - the specified block does not exist (hole block) 256 * 257 * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) 258 * 259 * %-EROFS - Read only filesystem (for create mode) 260 */ 261 int nilfs_mdt_get_block(struct inode *inode, unsigned long blkoff, int create, 262 void (*init_block)(struct inode *, 263 struct buffer_head *, void *), 264 struct buffer_head **out_bh) 265 { 266 int ret; 267 268 /* Should be rewritten with merging nilfs_mdt_read_block() */ 269 retry: 270 ret = nilfs_mdt_read_block(inode, blkoff, out_bh); 271 if (!create || ret != -ENOENT) 272 return ret; 273 274 ret = nilfs_mdt_create_block(inode, blkoff, out_bh, init_block); 275 if (unlikely(ret == -EEXIST)) { 276 /* create = 0; */ /* limit read-create loop retries */ 277 goto retry; 278 } 279 return ret; 280 } 281 282 /** 283 * nilfs_mdt_delete_block - make a hole on the meta data file. 284 * @inode: inode of the meta data file 285 * @block: block offset 286 * 287 * Return Value: On success, zero is returned. 288 * On error, one of the following negative error code is returned. 289 * 290 * %-ENOMEM - Insufficient memory available. 291 * 292 * %-EIO - I/O error 293 * 294 * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) 295 */ 296 int nilfs_mdt_delete_block(struct inode *inode, unsigned long block) 297 { 298 struct nilfs_inode_info *ii = NILFS_I(inode); 299 int err; 300 301 err = nilfs_bmap_delete(ii->i_bmap, block); 302 if (likely(!err)) { 303 nilfs_mdt_mark_dirty(inode); 304 nilfs_mdt_forget_block(inode, block); 305 } 306 return err; 307 } 308 309 /** 310 * nilfs_mdt_forget_block - discard dirty state and try to remove the page 311 * @inode: inode of the meta data file 312 * @block: block offset 313 * 314 * nilfs_mdt_forget_block() clears a dirty flag of the specified buffer, and 315 * tries to release the page including the buffer from a page cache. 316 * 317 * Return Value: On success, 0 is returned. On error, one of the following 318 * negative error code is returned. 319 * 320 * %-EBUSY - page has an active buffer. 321 * 322 * %-ENOENT - page cache has no page addressed by the offset. 323 */ 324 int nilfs_mdt_forget_block(struct inode *inode, unsigned long block) 325 { 326 pgoff_t index = (pgoff_t)block >> 327 (PAGE_CACHE_SHIFT - inode->i_blkbits); 328 struct page *page; 329 unsigned long first_block; 330 int ret = 0; 331 int still_dirty; 332 333 page = find_lock_page(inode->i_mapping, index); 334 if (!page) 335 return -ENOENT; 336 337 wait_on_page_writeback(page); 338 339 first_block = (unsigned long)index << 340 (PAGE_CACHE_SHIFT - inode->i_blkbits); 341 if (page_has_buffers(page)) { 342 struct buffer_head *bh; 343 344 bh = nilfs_page_get_nth_block(page, block - first_block); 345 nilfs_forget_buffer(bh); 346 } 347 still_dirty = PageDirty(page); 348 unlock_page(page); 349 page_cache_release(page); 350 351 if (still_dirty || 352 invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0) 353 ret = -EBUSY; 354 return ret; 355 } 356 357 /** 358 * nilfs_mdt_mark_block_dirty - mark a block on the meta data file dirty. 359 * @inode: inode of the meta data file 360 * @block: block offset 361 * 362 * Return Value: On success, it returns 0. On error, the following negative 363 * error code is returned. 364 * 365 * %-ENOMEM - Insufficient memory available. 366 * 367 * %-EIO - I/O error 368 * 369 * %-ENOENT - the specified block does not exist (hole block) 370 * 371 * %-EINVAL - bmap is broken. (the caller should call nilfs_error()) 372 */ 373 int nilfs_mdt_mark_block_dirty(struct inode *inode, unsigned long block) 374 { 375 struct buffer_head *bh; 376 int err; 377 378 err = nilfs_mdt_read_block(inode, block, &bh); 379 if (unlikely(err)) 380 return err; 381 nilfs_mark_buffer_dirty(bh); 382 nilfs_mdt_mark_dirty(inode); 383 brelse(bh); 384 return 0; 385 } 386 387 int nilfs_mdt_fetch_dirty(struct inode *inode) 388 { 389 struct nilfs_inode_info *ii = NILFS_I(inode); 390 391 if (nilfs_bmap_test_and_clear_dirty(ii->i_bmap)) { 392 set_bit(NILFS_I_DIRTY, &ii->i_state); 393 return 1; 394 } 395 return test_bit(NILFS_I_DIRTY, &ii->i_state); 396 } 397 398 static int 399 nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc) 400 { 401 struct inode *inode = container_of(page->mapping, 402 struct inode, i_data); 403 struct super_block *sb = inode->i_sb; 404 struct nilfs_sb_info *writer = NULL; 405 int err = 0; 406 407 redirty_page_for_writepage(wbc, page); 408 unlock_page(page); 409 410 if (page->mapping->assoc_mapping) 411 return 0; /* Do not request flush for shadow page cache */ 412 if (!sb) { 413 writer = nilfs_get_writer(NILFS_MDT(inode)->mi_nilfs); 414 if (!writer) 415 return -EROFS; 416 sb = writer->s_super; 417 } 418 419 if (wbc->sync_mode == WB_SYNC_ALL) 420 err = nilfs_construct_segment(sb); 421 else if (wbc->for_reclaim) 422 nilfs_flush_segment(sb, inode->i_ino); 423 424 if (writer) 425 nilfs_put_writer(NILFS_MDT(inode)->mi_nilfs); 426 return err; 427 } 428 429 430 static struct address_space_operations def_mdt_aops = { 431 .writepage = nilfs_mdt_write_page, 432 }; 433 434 static struct inode_operations def_mdt_iops; 435 static struct file_operations def_mdt_fops; 436 437 /* 438 * NILFS2 uses pseudo inodes for meta data files such as DAT, cpfile, sufile, 439 * ifile, or gcinodes. This allows the B-tree code and segment constructor 440 * to treat them like regular files, and this helps to simplify the 441 * implementation. 442 * On the other hand, some of the pseudo inodes have an irregular point: 443 * They don't have valid inode->i_sb pointer because their lifetimes are 444 * longer than those of the super block structs; they may continue for 445 * several consecutive mounts/umounts. This would need discussions. 446 */ 447 struct inode * 448 nilfs_mdt_new_common(struct the_nilfs *nilfs, struct super_block *sb, 449 ino_t ino, gfp_t gfp_mask) 450 { 451 struct inode *inode = nilfs_alloc_inode(sb); 452 453 if (!inode) 454 return NULL; 455 else { 456 struct address_space * const mapping = &inode->i_data; 457 struct nilfs_mdt_info *mi = kzalloc(sizeof(*mi), GFP_NOFS); 458 459 if (!mi) { 460 nilfs_destroy_inode(inode); 461 return NULL; 462 } 463 mi->mi_nilfs = nilfs; 464 init_rwsem(&mi->mi_sem); 465 466 inode->i_sb = sb; /* sb may be NULL for some meta data files */ 467 inode->i_blkbits = nilfs->ns_blocksize_bits; 468 inode->i_flags = 0; 469 atomic_set(&inode->i_count, 1); 470 inode->i_nlink = 1; 471 inode->i_ino = ino; 472 inode->i_mode = S_IFREG; 473 inode->i_private = mi; 474 475 #ifdef INIT_UNUSED_INODE_FIELDS 476 atomic_set(&inode->i_writecount, 0); 477 inode->i_size = 0; 478 inode->i_blocks = 0; 479 inode->i_bytes = 0; 480 inode->i_generation = 0; 481 #ifdef CONFIG_QUOTA 482 memset(&inode->i_dquot, 0, sizeof(inode->i_dquot)); 483 #endif 484 inode->i_pipe = NULL; 485 inode->i_bdev = NULL; 486 inode->i_cdev = NULL; 487 inode->i_rdev = 0; 488 #ifdef CONFIG_SECURITY 489 inode->i_security = NULL; 490 #endif 491 inode->dirtied_when = 0; 492 493 INIT_LIST_HEAD(&inode->i_list); 494 INIT_LIST_HEAD(&inode->i_sb_list); 495 inode->i_state = 0; 496 #endif 497 498 spin_lock_init(&inode->i_lock); 499 mutex_init(&inode->i_mutex); 500 init_rwsem(&inode->i_alloc_sem); 501 502 mapping->host = NULL; /* instead of inode */ 503 mapping->flags = 0; 504 mapping_set_gfp_mask(mapping, gfp_mask); 505 mapping->assoc_mapping = NULL; 506 mapping->backing_dev_info = nilfs->ns_bdi; 507 508 inode->i_mapping = mapping; 509 } 510 511 return inode; 512 } 513 514 struct inode *nilfs_mdt_new(struct the_nilfs *nilfs, struct super_block *sb, 515 ino_t ino, gfp_t gfp_mask) 516 { 517 struct inode *inode = nilfs_mdt_new_common(nilfs, sb, ino, gfp_mask); 518 519 if (!inode) 520 return NULL; 521 522 inode->i_op = &def_mdt_iops; 523 inode->i_fop = &def_mdt_fops; 524 inode->i_mapping->a_ops = &def_mdt_aops; 525 return inode; 526 } 527 528 void nilfs_mdt_set_entry_size(struct inode *inode, unsigned entry_size, 529 unsigned header_size) 530 { 531 struct nilfs_mdt_info *mi = NILFS_MDT(inode); 532 533 mi->mi_entry_size = entry_size; 534 mi->mi_entries_per_block = (1 << inode->i_blkbits) / entry_size; 535 mi->mi_first_entry_offset = DIV_ROUND_UP(header_size, entry_size); 536 } 537 538 void nilfs_mdt_set_shadow(struct inode *orig, struct inode *shadow) 539 { 540 shadow->i_mapping->assoc_mapping = orig->i_mapping; 541 NILFS_I(shadow)->i_btnode_cache.assoc_mapping = 542 &NILFS_I(orig)->i_btnode_cache; 543 } 544 545 void nilfs_mdt_clear(struct inode *inode) 546 { 547 struct nilfs_inode_info *ii = NILFS_I(inode); 548 549 invalidate_mapping_pages(inode->i_mapping, 0, -1); 550 truncate_inode_pages(inode->i_mapping, 0); 551 552 nilfs_bmap_clear(ii->i_bmap); 553 nilfs_btnode_cache_clear(&ii->i_btnode_cache); 554 } 555 556 void nilfs_mdt_destroy(struct inode *inode) 557 { 558 struct nilfs_mdt_info *mdi = NILFS_MDT(inode); 559 560 kfree(mdi->mi_bgl); /* kfree(NULL) is safe */ 561 kfree(mdi); 562 nilfs_destroy_inode(inode); 563 } 564