1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/mempool.h> 16 #include <linux/gfs2_ondisk.h> 17 #include <linux/bio.h> 18 #include <linux/fs.h> 19 20 #include "gfs2.h" 21 #include "incore.h" 22 #include "inode.h" 23 #include "glock.h" 24 #include "log.h" 25 #include "lops.h" 26 #include "meta_io.h" 27 #include "recovery.h" 28 #include "rgrp.h" 29 #include "trans.h" 30 #include "util.h" 31 #include "trace_gfs2.h" 32 33 /** 34 * gfs2_pin - Pin a buffer in memory 35 * @sdp: The superblock 36 * @bh: The buffer to be pinned 37 * 38 * The log lock must be held when calling this function 39 */ 40 static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 41 { 42 struct gfs2_bufdata *bd; 43 44 BUG_ON(!current->journal_info); 45 46 clear_buffer_dirty(bh); 47 if (test_set_buffer_pinned(bh)) 48 gfs2_assert_withdraw(sdp, 0); 49 if (!buffer_uptodate(bh)) 50 gfs2_io_error_bh(sdp, bh); 51 bd = bh->b_private; 52 /* If this buffer is in the AIL and it has already been written 53 * to in-place disk block, remove it from the AIL. 54 */ 55 spin_lock(&sdp->sd_ail_lock); 56 if (bd->bd_ail) 57 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 58 spin_unlock(&sdp->sd_ail_lock); 59 get_bh(bh); 60 atomic_inc(&sdp->sd_log_pinned); 61 trace_gfs2_pin(bd, 1); 62 } 63 64 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd) 65 { 66 return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP; 67 } 68 69 static void maybe_release_space(struct gfs2_bufdata *bd) 70 { 71 struct gfs2_glock *gl = bd->bd_gl; 72 struct gfs2_sbd *sdp = gl->gl_sbd; 73 struct gfs2_rgrpd *rgd = gl->gl_object; 74 unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; 75 struct gfs2_bitmap *bi = rgd->rd_bits + index; 76 77 if (bi->bi_clone == 0) 78 return; 79 if (sdp->sd_args.ar_discard) 80 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL); 81 memcpy(bi->bi_clone + bi->bi_offset, 82 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); 83 clear_bit(GBF_FULL, &bi->bi_flags); 84 rgd->rd_free_clone = rgd->rd_free; 85 } 86 87 /** 88 * gfs2_unpin - Unpin a buffer 89 * @sdp: the filesystem the buffer belongs to 90 * @bh: The buffer to unpin 91 * @ai: 92 * @flags: The inode dirty flags 93 * 94 */ 95 96 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 97 struct gfs2_ail *ai) 98 { 99 struct gfs2_bufdata *bd = bh->b_private; 100 101 BUG_ON(!buffer_uptodate(bh)); 102 BUG_ON(!buffer_pinned(bh)); 103 104 lock_buffer(bh); 105 mark_buffer_dirty(bh); 106 clear_buffer_pinned(bh); 107 108 if (buffer_is_rgrp(bd)) 109 maybe_release_space(bd); 110 111 spin_lock(&sdp->sd_ail_lock); 112 if (bd->bd_ail) { 113 list_del(&bd->bd_ail_st_list); 114 brelse(bh); 115 } else { 116 struct gfs2_glock *gl = bd->bd_gl; 117 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); 118 atomic_inc(&gl->gl_ail_count); 119 } 120 bd->bd_ail = ai; 121 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 122 spin_unlock(&sdp->sd_ail_lock); 123 124 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 125 trace_gfs2_pin(bd, 0); 126 unlock_buffer(bh); 127 atomic_dec(&sdp->sd_log_pinned); 128 } 129 130 131 static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh) 132 { 133 return (struct gfs2_log_descriptor *)bh->b_data; 134 } 135 136 static inline __be64 *bh_log_ptr(struct buffer_head *bh) 137 { 138 struct gfs2_log_descriptor *ld = bh_log_desc(bh); 139 return (__force __be64 *)(ld + 1); 140 } 141 142 static inline __be64 *bh_ptr_end(struct buffer_head *bh) 143 { 144 return (__force __be64 *)(bh->b_data + bh->b_size); 145 } 146 147 /** 148 * gfs2_log_write_endio - End of I/O for a log buffer 149 * @bh: The buffer head 150 * @uptodate: I/O Status 151 * 152 */ 153 154 static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) 155 { 156 struct gfs2_sbd *sdp = bh->b_private; 157 bh->b_private = NULL; 158 159 end_buffer_write_sync(bh, uptodate); 160 if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 161 wake_up(&sdp->sd_log_flush_wait); 162 } 163 164 /** 165 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data 166 * @sdp: The GFS2 superblock 167 * 168 * tReturns: the buffer_head 169 */ 170 171 static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) 172 { 173 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); 174 struct buffer_head *bh; 175 176 bh = sb_getblk(sdp->sd_vfs, blkno); 177 lock_buffer(bh); 178 memset(bh->b_data, 0, bh->b_size); 179 set_buffer_uptodate(bh); 180 clear_buffer_dirty(bh); 181 gfs2_log_incr_head(sdp); 182 atomic_inc(&sdp->sd_log_in_flight); 183 bh->b_private = sdp; 184 bh->b_end_io = gfs2_log_write_endio; 185 186 return bh; 187 } 188 189 /** 190 * gfs2_fake_write_endio - 191 * @bh: The buffer head 192 * @uptodate: The I/O Status 193 * 194 */ 195 196 static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) 197 { 198 struct buffer_head *real_bh = bh->b_private; 199 struct gfs2_bufdata *bd = real_bh->b_private; 200 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; 201 202 end_buffer_write_sync(bh, uptodate); 203 mempool_free(bh, gfs2_bh_pool); 204 unlock_buffer(real_bh); 205 brelse(real_bh); 206 if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 207 wake_up(&sdp->sd_log_flush_wait); 208 } 209 210 /** 211 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log 212 * @sdp: the filesystem 213 * @data: the data the buffer_head should point to 214 * 215 * Returns: the log buffer descriptor 216 */ 217 218 static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, 219 struct buffer_head *real) 220 { 221 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); 222 struct buffer_head *bh; 223 224 bh = mempool_alloc(gfs2_bh_pool, GFP_NOFS); 225 atomic_set(&bh->b_count, 1); 226 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); 227 set_bh_page(bh, real->b_page, bh_offset(real)); 228 bh->b_blocknr = blkno; 229 bh->b_size = sdp->sd_sb.sb_bsize; 230 bh->b_bdev = sdp->sd_vfs->s_bdev; 231 bh->b_private = real; 232 bh->b_end_io = gfs2_fake_write_endio; 233 234 gfs2_log_incr_head(sdp); 235 atomic_inc(&sdp->sd_log_in_flight); 236 237 return bh; 238 } 239 240 static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 241 { 242 struct buffer_head *bh = gfs2_log_get_buf(sdp); 243 struct gfs2_log_descriptor *ld = bh_log_desc(bh); 244 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 245 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 246 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 247 ld->ld_type = cpu_to_be32(ld_type); 248 ld->ld_length = 0; 249 ld->ld_data1 = 0; 250 ld->ld_data2 = 0; 251 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 252 return bh; 253 } 254 255 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 256 { 257 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 258 struct gfs2_meta_header *mh; 259 struct gfs2_trans *tr; 260 261 lock_buffer(bd->bd_bh); 262 gfs2_log_lock(sdp); 263 if (!list_empty(&bd->bd_list_tr)) 264 goto out; 265 tr = current->journal_info; 266 tr->tr_touched = 1; 267 tr->tr_num_buf++; 268 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 269 if (!list_empty(&le->le_list)) 270 goto out; 271 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 272 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 273 gfs2_meta_check(sdp, bd->bd_bh); 274 gfs2_pin(sdp, bd->bd_bh); 275 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; 276 mh->__pad0 = cpu_to_be64(0); 277 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); 278 sdp->sd_log_num_buf++; 279 list_add(&le->le_list, &sdp->sd_log_le_buf); 280 tr->tr_num_buf_new++; 281 out: 282 gfs2_log_unlock(sdp); 283 unlock_buffer(bd->bd_bh); 284 } 285 286 static void buf_lo_before_commit(struct gfs2_sbd *sdp) 287 { 288 struct buffer_head *bh; 289 struct gfs2_log_descriptor *ld; 290 struct gfs2_bufdata *bd1 = NULL, *bd2; 291 unsigned int total; 292 unsigned int limit; 293 unsigned int num; 294 unsigned n; 295 __be64 *ptr; 296 297 limit = buf_limit(sdp); 298 /* for 4k blocks, limit = 503 */ 299 300 gfs2_log_lock(sdp); 301 total = sdp->sd_log_num_buf; 302 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 303 while(total) { 304 num = total; 305 if (total > limit) 306 num = limit; 307 gfs2_log_unlock(sdp); 308 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); 309 gfs2_log_lock(sdp); 310 ld = bh_log_desc(bh); 311 ptr = bh_log_ptr(bh); 312 ld->ld_length = cpu_to_be32(num + 1); 313 ld->ld_data1 = cpu_to_be32(num); 314 315 n = 0; 316 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, 317 bd_le.le_list) { 318 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); 319 if (++n >= num) 320 break; 321 } 322 323 gfs2_log_unlock(sdp); 324 submit_bh(WRITE_SYNC, bh); 325 gfs2_log_lock(sdp); 326 327 n = 0; 328 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 329 bd_le.le_list) { 330 get_bh(bd2->bd_bh); 331 gfs2_log_unlock(sdp); 332 lock_buffer(bd2->bd_bh); 333 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 334 submit_bh(WRITE_SYNC, bh); 335 gfs2_log_lock(sdp); 336 if (++n >= num) 337 break; 338 } 339 340 BUG_ON(total < num); 341 total -= num; 342 } 343 gfs2_log_unlock(sdp); 344 } 345 346 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 347 { 348 struct list_head *head = &sdp->sd_log_le_buf; 349 struct gfs2_bufdata *bd; 350 351 while (!list_empty(head)) { 352 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 353 list_del_init(&bd->bd_le.le_list); 354 sdp->sd_log_num_buf--; 355 356 gfs2_unpin(sdp, bd->bd_bh, ai); 357 } 358 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); 359 } 360 361 static void buf_lo_before_scan(struct gfs2_jdesc *jd, 362 struct gfs2_log_header_host *head, int pass) 363 { 364 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 365 366 if (pass != 0) 367 return; 368 369 sdp->sd_found_blocks = 0; 370 sdp->sd_replayed_blocks = 0; 371 } 372 373 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 374 struct gfs2_log_descriptor *ld, __be64 *ptr, 375 int pass) 376 { 377 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 378 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 379 struct gfs2_glock *gl = ip->i_gl; 380 unsigned int blks = be32_to_cpu(ld->ld_data1); 381 struct buffer_head *bh_log, *bh_ip; 382 u64 blkno; 383 int error = 0; 384 385 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) 386 return 0; 387 388 gfs2_replay_incr_blk(sdp, &start); 389 390 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 391 blkno = be64_to_cpu(*ptr++); 392 393 sdp->sd_found_blocks++; 394 395 if (gfs2_revoke_check(sdp, blkno, start)) 396 continue; 397 398 error = gfs2_replay_read_block(jd, start, &bh_log); 399 if (error) 400 return error; 401 402 bh_ip = gfs2_meta_new(gl, blkno); 403 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); 404 405 if (gfs2_meta_check(sdp, bh_ip)) 406 error = -EIO; 407 else 408 mark_buffer_dirty(bh_ip); 409 410 brelse(bh_log); 411 brelse(bh_ip); 412 413 if (error) 414 break; 415 416 sdp->sd_replayed_blocks++; 417 } 418 419 return error; 420 } 421 422 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 423 { 424 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 425 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 426 427 if (error) { 428 gfs2_meta_sync(ip->i_gl); 429 return; 430 } 431 if (pass != 1) 432 return; 433 434 gfs2_meta_sync(ip->i_gl); 435 436 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", 437 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 438 } 439 440 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 441 { 442 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 443 struct gfs2_glock *gl = bd->bd_gl; 444 struct gfs2_trans *tr; 445 446 tr = current->journal_info; 447 tr->tr_touched = 1; 448 tr->tr_num_revoke++; 449 sdp->sd_log_num_revoke++; 450 atomic_inc(&gl->gl_revokes); 451 set_bit(GLF_LFLUSH, &gl->gl_flags); 452 list_add(&le->le_list, &sdp->sd_log_le_revoke); 453 } 454 455 static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 456 { 457 struct gfs2_log_descriptor *ld; 458 struct gfs2_meta_header *mh; 459 struct buffer_head *bh; 460 unsigned int offset; 461 struct list_head *head = &sdp->sd_log_le_revoke; 462 struct gfs2_bufdata *bd; 463 464 if (!sdp->sd_log_num_revoke) 465 return; 466 467 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); 468 ld = bh_log_desc(bh); 469 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 470 sizeof(u64))); 471 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 472 offset = sizeof(struct gfs2_log_descriptor); 473 474 list_for_each_entry(bd, head, bd_le.le_list) { 475 sdp->sd_log_num_revoke--; 476 477 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 478 submit_bh(WRITE_SYNC, bh); 479 480 bh = gfs2_log_get_buf(sdp); 481 mh = (struct gfs2_meta_header *)bh->b_data; 482 mh->mh_magic = cpu_to_be32(GFS2_MAGIC); 483 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB); 484 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB); 485 offset = sizeof(struct gfs2_meta_header); 486 } 487 488 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 489 offset += sizeof(u64); 490 } 491 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 492 493 submit_bh(WRITE_SYNC, bh); 494 } 495 496 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 497 { 498 struct list_head *head = &sdp->sd_log_le_revoke; 499 struct gfs2_bufdata *bd; 500 struct gfs2_glock *gl; 501 502 while (!list_empty(head)) { 503 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 504 list_del_init(&bd->bd_le.le_list); 505 gl = bd->bd_gl; 506 atomic_dec(&gl->gl_revokes); 507 clear_bit(GLF_LFLUSH, &gl->gl_flags); 508 kmem_cache_free(gfs2_bufdata_cachep, bd); 509 } 510 } 511 512 static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 513 struct gfs2_log_header_host *head, int pass) 514 { 515 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 516 517 if (pass != 0) 518 return; 519 520 sdp->sd_found_revokes = 0; 521 sdp->sd_replay_tail = head->lh_tail; 522 } 523 524 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 525 struct gfs2_log_descriptor *ld, __be64 *ptr, 526 int pass) 527 { 528 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 529 unsigned int blks = be32_to_cpu(ld->ld_length); 530 unsigned int revokes = be32_to_cpu(ld->ld_data1); 531 struct buffer_head *bh; 532 unsigned int offset; 533 u64 blkno; 534 int first = 1; 535 int error; 536 537 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE) 538 return 0; 539 540 offset = sizeof(struct gfs2_log_descriptor); 541 542 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 543 error = gfs2_replay_read_block(jd, start, &bh); 544 if (error) 545 return error; 546 547 if (!first) 548 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); 549 550 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { 551 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); 552 553 error = gfs2_revoke_add(sdp, blkno, start); 554 if (error < 0) { 555 brelse(bh); 556 return error; 557 } 558 else if (error) 559 sdp->sd_found_revokes++; 560 561 if (!--revokes) 562 break; 563 offset += sizeof(u64); 564 } 565 566 brelse(bh); 567 offset = sizeof(struct gfs2_meta_header); 568 first = 0; 569 } 570 571 return 0; 572 } 573 574 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 575 { 576 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 577 578 if (error) { 579 gfs2_revoke_clean(sdp); 580 return; 581 } 582 if (pass != 1) 583 return; 584 585 fs_info(sdp, "jid=%u: Found %u revoke tags\n", 586 jd->jd_jid, sdp->sd_found_revokes); 587 588 gfs2_revoke_clean(sdp); 589 } 590 591 /** 592 * databuf_lo_add - Add a databuf to the transaction. 593 * 594 * This is used in two distinct cases: 595 * i) In ordered write mode 596 * We put the data buffer on a list so that we can ensure that its 597 * synced to disk at the right time 598 * ii) In journaled data mode 599 * We need to journal the data block in the same way as metadata in 600 * the functions above. The difference is that here we have a tag 601 * which is two __be64's being the block number (as per meta data) 602 * and a flag which says whether the data block needs escaping or 603 * not. This means we need a new log entry for each 251 or so data 604 * blocks, which isn't an enormous overhead but twice as much as 605 * for normal metadata blocks. 606 */ 607 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 608 { 609 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 610 struct gfs2_trans *tr = current->journal_info; 611 struct address_space *mapping = bd->bd_bh->b_page->mapping; 612 struct gfs2_inode *ip = GFS2_I(mapping->host); 613 614 lock_buffer(bd->bd_bh); 615 gfs2_log_lock(sdp); 616 if (tr) { 617 if (!list_empty(&bd->bd_list_tr)) 618 goto out; 619 tr->tr_touched = 1; 620 if (gfs2_is_jdata(ip)) { 621 tr->tr_num_buf++; 622 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 623 } 624 } 625 if (!list_empty(&le->le_list)) 626 goto out; 627 628 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 629 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 630 if (gfs2_is_jdata(ip)) { 631 gfs2_pin(sdp, bd->bd_bh); 632 tr->tr_num_databuf_new++; 633 sdp->sd_log_num_databuf++; 634 list_add_tail(&le->le_list, &sdp->sd_log_le_databuf); 635 } else { 636 list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); 637 } 638 out: 639 gfs2_log_unlock(sdp); 640 unlock_buffer(bd->bd_bh); 641 } 642 643 static void gfs2_check_magic(struct buffer_head *bh) 644 { 645 void *kaddr; 646 __be32 *ptr; 647 648 clear_buffer_escaped(bh); 649 kaddr = kmap_atomic(bh->b_page); 650 ptr = kaddr + bh_offset(bh); 651 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 652 set_buffer_escaped(bh); 653 kunmap_atomic(kaddr); 654 } 655 656 static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, 657 struct list_head *list, struct list_head *done, 658 unsigned int n) 659 { 660 struct buffer_head *bh1; 661 struct gfs2_log_descriptor *ld; 662 struct gfs2_bufdata *bd; 663 __be64 *ptr; 664 665 if (!bh) 666 return; 667 668 ld = bh_log_desc(bh); 669 ld->ld_length = cpu_to_be32(n + 1); 670 ld->ld_data1 = cpu_to_be32(n); 671 672 ptr = bh_log_ptr(bh); 673 674 get_bh(bh); 675 submit_bh(WRITE_SYNC, bh); 676 gfs2_log_lock(sdp); 677 while(!list_empty(list)) { 678 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); 679 list_move_tail(&bd->bd_le.le_list, done); 680 get_bh(bd->bd_bh); 681 while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) { 682 gfs2_log_incr_head(sdp); 683 ptr += 2; 684 } 685 gfs2_log_unlock(sdp); 686 lock_buffer(bd->bd_bh); 687 if (buffer_escaped(bd->bd_bh)) { 688 void *kaddr; 689 bh1 = gfs2_log_get_buf(sdp); 690 kaddr = kmap_atomic(bd->bd_bh->b_page); 691 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), 692 bh1->b_size); 693 kunmap_atomic(kaddr); 694 *(__be32 *)bh1->b_data = 0; 695 clear_buffer_escaped(bd->bd_bh); 696 unlock_buffer(bd->bd_bh); 697 brelse(bd->bd_bh); 698 } else { 699 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); 700 } 701 submit_bh(WRITE_SYNC, bh1); 702 gfs2_log_lock(sdp); 703 ptr += 2; 704 } 705 gfs2_log_unlock(sdp); 706 brelse(bh); 707 } 708 709 /** 710 * databuf_lo_before_commit - Scan the data buffers, writing as we go 711 * 712 */ 713 714 static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 715 { 716 struct gfs2_bufdata *bd = NULL; 717 struct buffer_head *bh = NULL; 718 unsigned int n = 0; 719 __be64 *ptr = NULL, *end = NULL; 720 LIST_HEAD(processed); 721 LIST_HEAD(in_progress); 722 723 gfs2_log_lock(sdp); 724 while (!list_empty(&sdp->sd_log_le_databuf)) { 725 if (ptr == end) { 726 gfs2_log_unlock(sdp); 727 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); 728 n = 0; 729 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA); 730 ptr = bh_log_ptr(bh); 731 end = bh_ptr_end(bh) - 1; 732 gfs2_log_lock(sdp); 733 continue; 734 } 735 bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list); 736 list_move_tail(&bd->bd_le.le_list, &in_progress); 737 gfs2_check_magic(bd->bd_bh); 738 *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr); 739 *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0); 740 n++; 741 } 742 gfs2_log_unlock(sdp); 743 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); 744 gfs2_log_lock(sdp); 745 list_splice(&processed, &sdp->sd_log_le_databuf); 746 gfs2_log_unlock(sdp); 747 } 748 749 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 750 struct gfs2_log_descriptor *ld, 751 __be64 *ptr, int pass) 752 { 753 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 754 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 755 struct gfs2_glock *gl = ip->i_gl; 756 unsigned int blks = be32_to_cpu(ld->ld_data1); 757 struct buffer_head *bh_log, *bh_ip; 758 u64 blkno; 759 u64 esc; 760 int error = 0; 761 762 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) 763 return 0; 764 765 gfs2_replay_incr_blk(sdp, &start); 766 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 767 blkno = be64_to_cpu(*ptr++); 768 esc = be64_to_cpu(*ptr++); 769 770 sdp->sd_found_blocks++; 771 772 if (gfs2_revoke_check(sdp, blkno, start)) 773 continue; 774 775 error = gfs2_replay_read_block(jd, start, &bh_log); 776 if (error) 777 return error; 778 779 bh_ip = gfs2_meta_new(gl, blkno); 780 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); 781 782 /* Unescape */ 783 if (esc) { 784 __be32 *eptr = (__be32 *)bh_ip->b_data; 785 *eptr = cpu_to_be32(GFS2_MAGIC); 786 } 787 mark_buffer_dirty(bh_ip); 788 789 brelse(bh_log); 790 brelse(bh_ip); 791 792 sdp->sd_replayed_blocks++; 793 } 794 795 return error; 796 } 797 798 /* FIXME: sort out accounting for log blocks etc. */ 799 800 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 801 { 802 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 803 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 804 805 if (error) { 806 gfs2_meta_sync(ip->i_gl); 807 return; 808 } 809 if (pass != 1) 810 return; 811 812 /* data sync? */ 813 gfs2_meta_sync(ip->i_gl); 814 815 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", 816 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 817 } 818 819 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 820 { 821 struct list_head *head = &sdp->sd_log_le_databuf; 822 struct gfs2_bufdata *bd; 823 824 while (!list_empty(head)) { 825 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 826 list_del_init(&bd->bd_le.le_list); 827 sdp->sd_log_num_databuf--; 828 gfs2_unpin(sdp, bd->bd_bh, ai); 829 } 830 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 831 } 832 833 834 const struct gfs2_log_operations gfs2_buf_lops = { 835 .lo_add = buf_lo_add, 836 .lo_before_commit = buf_lo_before_commit, 837 .lo_after_commit = buf_lo_after_commit, 838 .lo_before_scan = buf_lo_before_scan, 839 .lo_scan_elements = buf_lo_scan_elements, 840 .lo_after_scan = buf_lo_after_scan, 841 .lo_name = "buf", 842 }; 843 844 const struct gfs2_log_operations gfs2_revoke_lops = { 845 .lo_add = revoke_lo_add, 846 .lo_before_commit = revoke_lo_before_commit, 847 .lo_after_commit = revoke_lo_after_commit, 848 .lo_before_scan = revoke_lo_before_scan, 849 .lo_scan_elements = revoke_lo_scan_elements, 850 .lo_after_scan = revoke_lo_after_scan, 851 .lo_name = "revoke", 852 }; 853 854 const struct gfs2_log_operations gfs2_rg_lops = { 855 .lo_name = "rg", 856 }; 857 858 const struct gfs2_log_operations gfs2_databuf_lops = { 859 .lo_add = databuf_lo_add, 860 .lo_before_commit = databuf_lo_before_commit, 861 .lo_after_commit = databuf_lo_after_commit, 862 .lo_scan_elements = databuf_lo_scan_elements, 863 .lo_after_scan = databuf_lo_after_scan, 864 .lo_name = "databuf", 865 }; 866 867 const struct gfs2_log_operations *gfs2_log_ops[] = { 868 &gfs2_databuf_lops, 869 &gfs2_buf_lops, 870 &gfs2_rg_lops, 871 &gfs2_revoke_lops, 872 NULL, 873 }; 874 875