1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/mm.h> 16 #include <linux/pagemap.h> 17 #include <linux/writeback.h> 18 #include <linux/swap.h> 19 #include <linux/delay.h> 20 #include <linux/bio.h> 21 #include <linux/gfs2_ondisk.h> 22 23 #include "gfs2.h" 24 #include "incore.h" 25 #include "glock.h" 26 #include "glops.h" 27 #include "inode.h" 28 #include "log.h" 29 #include "lops.h" 30 #include "meta_io.h" 31 #include "rgrp.h" 32 #include "trans.h" 33 #include "util.h" 34 35 static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wbc) 36 { 37 int err; 38 struct buffer_head *bh, *head; 39 int nr_underway = 0; 40 int write_op = (1 << BIO_RW_META) | ((wbc->sync_mode == WB_SYNC_ALL ? 41 WRITE_SYNC_PLUG : WRITE)); 42 43 BUG_ON(!PageLocked(page)); 44 BUG_ON(!page_has_buffers(page)); 45 46 head = page_buffers(page); 47 bh = head; 48 49 do { 50 if (!buffer_mapped(bh)) 51 continue; 52 /* 53 * If it's a fully non-blocking write attempt and we cannot 54 * lock the buffer then redirty the page. Note that this can 55 * potentially cause a busy-wait loop from pdflush and kswapd 56 * activity, but those code paths have their own higher-level 57 * throttling. 58 */ 59 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) { 60 lock_buffer(bh); 61 } else if (!trylock_buffer(bh)) { 62 redirty_page_for_writepage(wbc, page); 63 continue; 64 } 65 if (test_clear_buffer_dirty(bh)) { 66 mark_buffer_async_write(bh); 67 } else { 68 unlock_buffer(bh); 69 } 70 } while ((bh = bh->b_this_page) != head); 71 72 /* 73 * The page and its buffers are protected by PageWriteback(), so we can 74 * drop the bh refcounts early. 75 */ 76 BUG_ON(PageWriteback(page)); 77 set_page_writeback(page); 78 79 do { 80 struct buffer_head *next = bh->b_this_page; 81 if (buffer_async_write(bh)) { 82 submit_bh(write_op, bh); 83 nr_underway++; 84 } 85 bh = next; 86 } while (bh != head); 87 unlock_page(page); 88 89 err = 0; 90 if (nr_underway == 0) 91 end_page_writeback(page); 92 93 return err; 94 } 95 96 static const struct address_space_operations aspace_aops = { 97 .writepage = gfs2_aspace_writepage, 98 .releasepage = gfs2_releasepage, 99 .sync_page = block_sync_page, 100 }; 101 102 /** 103 * gfs2_aspace_get - Create and initialize a struct inode structure 104 * @sdp: the filesystem the aspace is in 105 * 106 * Right now a struct inode is just a struct inode. Maybe Linux 107 * will supply a more lightweight address space construct (that works) 108 * in the future. 109 * 110 * Make sure pages/buffers in this aspace aren't in high memory. 111 * 112 * Returns: the aspace 113 */ 114 115 struct inode *gfs2_aspace_get(struct gfs2_sbd *sdp) 116 { 117 struct inode *aspace; 118 struct gfs2_inode *ip; 119 120 aspace = new_inode(sdp->sd_vfs); 121 if (aspace) { 122 mapping_set_gfp_mask(aspace->i_mapping, GFP_NOFS); 123 aspace->i_mapping->a_ops = &aspace_aops; 124 aspace->i_size = ~0ULL; 125 ip = GFS2_I(aspace); 126 clear_bit(GIF_USER, &ip->i_flags); 127 insert_inode_hash(aspace); 128 } 129 return aspace; 130 } 131 132 void gfs2_aspace_put(struct inode *aspace) 133 { 134 remove_inode_hash(aspace); 135 iput(aspace); 136 } 137 138 /** 139 * gfs2_meta_sync - Sync all buffers associated with a glock 140 * @gl: The glock 141 * 142 */ 143 144 void gfs2_meta_sync(struct gfs2_glock *gl) 145 { 146 struct address_space *mapping = gl->gl_aspace->i_mapping; 147 int error; 148 149 filemap_fdatawrite(mapping); 150 error = filemap_fdatawait(mapping); 151 152 if (error) 153 gfs2_io_error(gl->gl_sbd); 154 } 155 156 /** 157 * gfs2_getbuf - Get a buffer with a given address space 158 * @gl: the glock 159 * @blkno: the block number (filesystem scope) 160 * @create: 1 if the buffer should be created 161 * 162 * Returns: the buffer 163 */ 164 165 struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create) 166 { 167 struct address_space *mapping = gl->gl_aspace->i_mapping; 168 struct gfs2_sbd *sdp = gl->gl_sbd; 169 struct page *page; 170 struct buffer_head *bh; 171 unsigned int shift; 172 unsigned long index; 173 unsigned int bufnum; 174 175 shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift; 176 index = blkno >> shift; /* convert block to page */ 177 bufnum = blkno - (index << shift); /* block buf index within page */ 178 179 if (create) { 180 for (;;) { 181 page = grab_cache_page(mapping, index); 182 if (page) 183 break; 184 yield(); 185 } 186 } else { 187 page = find_lock_page(mapping, index); 188 if (!page) 189 return NULL; 190 } 191 192 if (!page_has_buffers(page)) 193 create_empty_buffers(page, sdp->sd_sb.sb_bsize, 0); 194 195 /* Locate header for our buffer within our page */ 196 for (bh = page_buffers(page); bufnum--; bh = bh->b_this_page) 197 /* Do nothing */; 198 get_bh(bh); 199 200 if (!buffer_mapped(bh)) 201 map_bh(bh, sdp->sd_vfs, blkno); 202 203 unlock_page(page); 204 mark_page_accessed(page); 205 page_cache_release(page); 206 207 return bh; 208 } 209 210 static void meta_prep_new(struct buffer_head *bh) 211 { 212 struct gfs2_meta_header *mh = (struct gfs2_meta_header *)bh->b_data; 213 214 lock_buffer(bh); 215 clear_buffer_dirty(bh); 216 set_buffer_uptodate(bh); 217 unlock_buffer(bh); 218 219 mh->mh_magic = cpu_to_be32(GFS2_MAGIC); 220 } 221 222 /** 223 * gfs2_meta_new - Get a block 224 * @gl: The glock associated with this block 225 * @blkno: The block number 226 * 227 * Returns: The buffer 228 */ 229 230 struct buffer_head *gfs2_meta_new(struct gfs2_glock *gl, u64 blkno) 231 { 232 struct buffer_head *bh; 233 bh = gfs2_getbuf(gl, blkno, CREATE); 234 meta_prep_new(bh); 235 return bh; 236 } 237 238 /** 239 * gfs2_meta_read - Read a block from disk 240 * @gl: The glock covering the block 241 * @blkno: The block number 242 * @flags: flags 243 * @bhp: the place where the buffer is returned (NULL on failure) 244 * 245 * Returns: errno 246 */ 247 248 int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, 249 struct buffer_head **bhp) 250 { 251 struct gfs2_sbd *sdp = gl->gl_sbd; 252 struct buffer_head *bh; 253 254 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 255 return -EIO; 256 257 *bhp = bh = gfs2_getbuf(gl, blkno, CREATE); 258 259 lock_buffer(bh); 260 if (buffer_uptodate(bh)) { 261 unlock_buffer(bh); 262 return 0; 263 } 264 bh->b_end_io = end_buffer_read_sync; 265 get_bh(bh); 266 submit_bh(READ_SYNC | (1 << BIO_RW_META), bh); 267 if (!(flags & DIO_WAIT)) 268 return 0; 269 270 wait_on_buffer(bh); 271 if (unlikely(!buffer_uptodate(bh))) { 272 struct gfs2_trans *tr = current->journal_info; 273 if (tr && tr->tr_touched) 274 gfs2_io_error_bh(sdp, bh); 275 brelse(bh); 276 return -EIO; 277 } 278 279 return 0; 280 } 281 282 /** 283 * gfs2_meta_wait - Reread a block from disk 284 * @sdp: the filesystem 285 * @bh: The block to wait for 286 * 287 * Returns: errno 288 */ 289 290 int gfs2_meta_wait(struct gfs2_sbd *sdp, struct buffer_head *bh) 291 { 292 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 293 return -EIO; 294 295 wait_on_buffer(bh); 296 297 if (!buffer_uptodate(bh)) { 298 struct gfs2_trans *tr = current->journal_info; 299 if (tr && tr->tr_touched) 300 gfs2_io_error_bh(sdp, bh); 301 return -EIO; 302 } 303 if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags))) 304 return -EIO; 305 306 return 0; 307 } 308 309 /** 310 * gfs2_attach_bufdata - attach a struct gfs2_bufdata structure to a buffer 311 * @gl: the glock the buffer belongs to 312 * @bh: The buffer to be attached to 313 * @meta: Flag to indicate whether its metadata or not 314 */ 315 316 void gfs2_attach_bufdata(struct gfs2_glock *gl, struct buffer_head *bh, 317 int meta) 318 { 319 struct gfs2_bufdata *bd; 320 321 if (meta) 322 lock_page(bh->b_page); 323 324 if (bh->b_private) { 325 if (meta) 326 unlock_page(bh->b_page); 327 return; 328 } 329 330 bd = kmem_cache_zalloc(gfs2_bufdata_cachep, GFP_NOFS | __GFP_NOFAIL); 331 bd->bd_bh = bh; 332 bd->bd_gl = gl; 333 334 INIT_LIST_HEAD(&bd->bd_list_tr); 335 if (meta) 336 lops_init_le(&bd->bd_le, &gfs2_buf_lops); 337 else 338 lops_init_le(&bd->bd_le, &gfs2_databuf_lops); 339 bh->b_private = bd; 340 341 if (meta) 342 unlock_page(bh->b_page); 343 } 344 345 void gfs2_remove_from_journal(struct buffer_head *bh, struct gfs2_trans *tr, int meta) 346 { 347 struct gfs2_sbd *sdp = GFS2_SB(bh->b_page->mapping->host); 348 struct gfs2_bufdata *bd = bh->b_private; 349 if (test_clear_buffer_pinned(bh)) { 350 list_del_init(&bd->bd_le.le_list); 351 if (meta) { 352 gfs2_assert_warn(sdp, sdp->sd_log_num_buf); 353 sdp->sd_log_num_buf--; 354 tr->tr_num_buf_rm++; 355 } else { 356 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf); 357 sdp->sd_log_num_databuf--; 358 tr->tr_num_databuf_rm++; 359 } 360 tr->tr_touched = 1; 361 brelse(bh); 362 } 363 if (bd) { 364 if (bd->bd_ail) { 365 gfs2_remove_from_ail(bd); 366 bh->b_private = NULL; 367 bd->bd_bh = NULL; 368 bd->bd_blkno = bh->b_blocknr; 369 gfs2_trans_add_revoke(sdp, bd); 370 } 371 } 372 clear_buffer_dirty(bh); 373 clear_buffer_uptodate(bh); 374 } 375 376 /** 377 * gfs2_meta_wipe - make inode's buffers so they aren't dirty/pinned anymore 378 * @ip: the inode who owns the buffers 379 * @bstart: the first buffer in the run 380 * @blen: the number of buffers in the run 381 * 382 */ 383 384 void gfs2_meta_wipe(struct gfs2_inode *ip, u64 bstart, u32 blen) 385 { 386 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 387 struct buffer_head *bh; 388 389 while (blen) { 390 bh = gfs2_getbuf(ip->i_gl, bstart, NO_CREATE); 391 if (bh) { 392 lock_buffer(bh); 393 gfs2_log_lock(sdp); 394 gfs2_remove_from_journal(bh, current->journal_info, 1); 395 gfs2_log_unlock(sdp); 396 unlock_buffer(bh); 397 brelse(bh); 398 } 399 400 bstart++; 401 blen--; 402 } 403 } 404 405 /** 406 * gfs2_meta_indirect_buffer - Get a metadata buffer 407 * @ip: The GFS2 inode 408 * @height: The level of this buf in the metadata (indir addr) tree (if any) 409 * @num: The block number (device relative) of the buffer 410 * @new: Non-zero if we may create a new buffer 411 * @bhp: the buffer is returned here 412 * 413 * Returns: errno 414 */ 415 416 int gfs2_meta_indirect_buffer(struct gfs2_inode *ip, int height, u64 num, 417 int new, struct buffer_head **bhp) 418 { 419 struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode); 420 struct gfs2_glock *gl = ip->i_gl; 421 struct buffer_head *bh; 422 int ret = 0; 423 424 if (new) { 425 BUG_ON(height == 0); 426 bh = gfs2_meta_new(gl, num); 427 gfs2_trans_add_bh(ip->i_gl, bh, 1); 428 gfs2_metatype_set(bh, GFS2_METATYPE_IN, GFS2_FORMAT_IN); 429 gfs2_buffer_clear_tail(bh, sizeof(struct gfs2_meta_header)); 430 } else { 431 u32 mtype = height ? GFS2_METATYPE_IN : GFS2_METATYPE_DI; 432 ret = gfs2_meta_read(gl, num, DIO_WAIT, &bh); 433 if (ret == 0 && gfs2_metatype_check(sdp, bh, mtype)) { 434 brelse(bh); 435 ret = -EIO; 436 } 437 } 438 *bhp = bh; 439 return ret; 440 } 441 442 /** 443 * gfs2_meta_ra - start readahead on an extent of a file 444 * @gl: the glock the blocks belong to 445 * @dblock: the starting disk block 446 * @extlen: the number of blocks in the extent 447 * 448 * returns: the first buffer in the extent 449 */ 450 451 struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) 452 { 453 struct gfs2_sbd *sdp = gl->gl_sbd; 454 struct buffer_head *first_bh, *bh; 455 u32 max_ra = gfs2_tune_get(sdp, gt_max_readahead) >> 456 sdp->sd_sb.sb_bsize_shift; 457 458 BUG_ON(!extlen); 459 460 if (max_ra < 1) 461 max_ra = 1; 462 if (extlen > max_ra) 463 extlen = max_ra; 464 465 first_bh = gfs2_getbuf(gl, dblock, CREATE); 466 467 if (buffer_uptodate(first_bh)) 468 goto out; 469 if (!buffer_locked(first_bh)) 470 ll_rw_block(READ_SYNC | (1 << BIO_RW_META), 1, &first_bh); 471 472 dblock++; 473 extlen--; 474 475 while (extlen) { 476 bh = gfs2_getbuf(gl, dblock, CREATE); 477 478 if (!buffer_uptodate(bh) && !buffer_locked(bh)) 479 ll_rw_block(READA, 1, &bh); 480 brelse(bh); 481 dblock++; 482 extlen--; 483 if (!buffer_locked(first_bh) && buffer_uptodate(first_bh)) 484 goto out; 485 } 486 487 wait_on_buffer(first_bh); 488 out: 489 return first_bh; 490 } 491 492