1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/bio.h> 17 #include <linux/fs.h> 18 19 #include "gfs2.h" 20 #include "incore.h" 21 #include "inode.h" 22 #include "glock.h" 23 #include "log.h" 24 #include "lops.h" 25 #include "meta_io.h" 26 #include "recovery.h" 27 #include "rgrp.h" 28 #include "trans.h" 29 #include "util.h" 30 #include "trace_gfs2.h" 31 32 /** 33 * gfs2_pin - Pin a buffer in memory 34 * @sdp: The superblock 35 * @bh: The buffer to be pinned 36 * 37 * The log lock must be held when calling this function 38 */ 39 static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh) 40 { 41 struct gfs2_bufdata *bd; 42 43 BUG_ON(!current->journal_info); 44 45 clear_buffer_dirty(bh); 46 if (test_set_buffer_pinned(bh)) 47 gfs2_assert_withdraw(sdp, 0); 48 if (!buffer_uptodate(bh)) 49 gfs2_io_error_bh(sdp, bh); 50 bd = bh->b_private; 51 /* If this buffer is in the AIL and it has already been written 52 * to in-place disk block, remove it from the AIL. 53 */ 54 spin_lock(&sdp->sd_ail_lock); 55 if (bd->bd_ail) 56 list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list); 57 spin_unlock(&sdp->sd_ail_lock); 58 get_bh(bh); 59 atomic_inc(&sdp->sd_log_pinned); 60 trace_gfs2_pin(bd, 1); 61 } 62 63 static bool buffer_is_rgrp(const struct gfs2_bufdata *bd) 64 { 65 return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP; 66 } 67 68 static void maybe_release_space(struct gfs2_bufdata *bd) 69 { 70 struct gfs2_glock *gl = bd->bd_gl; 71 struct gfs2_sbd *sdp = gl->gl_sbd; 72 struct gfs2_rgrpd *rgd = gl->gl_object; 73 unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number; 74 struct gfs2_bitmap *bi = rgd->rd_bits + index; 75 76 if (bi->bi_clone == 0) 77 return; 78 if (sdp->sd_args.ar_discard) 79 gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi); 80 memcpy(bi->bi_clone + bi->bi_offset, 81 bd->bd_bh->b_data + bi->bi_offset, bi->bi_len); 82 clear_bit(GBF_FULL, &bi->bi_flags); 83 rgd->rd_free_clone = rgd->rd_free; 84 } 85 86 /** 87 * gfs2_unpin - Unpin a buffer 88 * @sdp: the filesystem the buffer belongs to 89 * @bh: The buffer to unpin 90 * @ai: 91 * @flags: The inode dirty flags 92 * 93 */ 94 95 static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh, 96 struct gfs2_ail *ai) 97 { 98 struct gfs2_bufdata *bd = bh->b_private; 99 100 BUG_ON(!buffer_uptodate(bh)); 101 BUG_ON(!buffer_pinned(bh)); 102 103 lock_buffer(bh); 104 mark_buffer_dirty(bh); 105 clear_buffer_pinned(bh); 106 107 if (buffer_is_rgrp(bd)) 108 maybe_release_space(bd); 109 110 spin_lock(&sdp->sd_ail_lock); 111 if (bd->bd_ail) { 112 list_del(&bd->bd_ail_st_list); 113 brelse(bh); 114 } else { 115 struct gfs2_glock *gl = bd->bd_gl; 116 list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list); 117 atomic_inc(&gl->gl_ail_count); 118 } 119 bd->bd_ail = ai; 120 list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list); 121 spin_unlock(&sdp->sd_ail_lock); 122 123 clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 124 trace_gfs2_pin(bd, 0); 125 unlock_buffer(bh); 126 atomic_dec(&sdp->sd_log_pinned); 127 } 128 129 130 static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh) 131 { 132 return (struct gfs2_log_descriptor *)bh->b_data; 133 } 134 135 static inline __be64 *bh_log_ptr(struct buffer_head *bh) 136 { 137 struct gfs2_log_descriptor *ld = bh_log_desc(bh); 138 return (__force __be64 *)(ld + 1); 139 } 140 141 static inline __be64 *bh_ptr_end(struct buffer_head *bh) 142 { 143 return (__force __be64 *)(bh->b_data + bh->b_size); 144 } 145 146 /** 147 * gfs2_log_write_endio - End of I/O for a log buffer 148 * @bh: The buffer head 149 * @uptodate: I/O Status 150 * 151 */ 152 153 static void gfs2_log_write_endio(struct buffer_head *bh, int uptodate) 154 { 155 struct gfs2_sbd *sdp = bh->b_private; 156 bh->b_private = NULL; 157 158 end_buffer_write_sync(bh, uptodate); 159 if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 160 wake_up(&sdp->sd_log_flush_wait); 161 } 162 163 /** 164 * gfs2_log_get_buf - Get and initialize a buffer to use for log control data 165 * @sdp: The GFS2 superblock 166 * 167 * tReturns: the buffer_head 168 */ 169 170 static struct buffer_head *gfs2_log_get_buf(struct gfs2_sbd *sdp) 171 { 172 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); 173 struct buffer_head *bh; 174 175 bh = sb_getblk(sdp->sd_vfs, blkno); 176 lock_buffer(bh); 177 memset(bh->b_data, 0, bh->b_size); 178 set_buffer_uptodate(bh); 179 clear_buffer_dirty(bh); 180 gfs2_log_incr_head(sdp); 181 atomic_inc(&sdp->sd_log_in_flight); 182 bh->b_private = sdp; 183 bh->b_end_io = gfs2_log_write_endio; 184 185 return bh; 186 } 187 188 /** 189 * gfs2_fake_write_endio - 190 * @bh: The buffer head 191 * @uptodate: The I/O Status 192 * 193 */ 194 195 static void gfs2_fake_write_endio(struct buffer_head *bh, int uptodate) 196 { 197 struct buffer_head *real_bh = bh->b_private; 198 struct gfs2_bufdata *bd = real_bh->b_private; 199 struct gfs2_sbd *sdp = bd->bd_gl->gl_sbd; 200 201 end_buffer_write_sync(bh, uptodate); 202 free_buffer_head(bh); 203 unlock_buffer(real_bh); 204 brelse(real_bh); 205 if (atomic_dec_and_test(&sdp->sd_log_in_flight)) 206 wake_up(&sdp->sd_log_flush_wait); 207 } 208 209 /** 210 * gfs2_log_fake_buf - Build a fake buffer head to write metadata buffer to log 211 * @sdp: the filesystem 212 * @data: the data the buffer_head should point to 213 * 214 * Returns: the log buffer descriptor 215 */ 216 217 static struct buffer_head *gfs2_log_fake_buf(struct gfs2_sbd *sdp, 218 struct buffer_head *real) 219 { 220 u64 blkno = gfs2_log_bmap(sdp, sdp->sd_log_flush_head); 221 struct buffer_head *bh; 222 223 bh = alloc_buffer_head(GFP_NOFS | __GFP_NOFAIL); 224 atomic_set(&bh->b_count, 1); 225 bh->b_state = (1 << BH_Mapped) | (1 << BH_Uptodate) | (1 << BH_Lock); 226 set_bh_page(bh, real->b_page, bh_offset(real)); 227 bh->b_blocknr = blkno; 228 bh->b_size = sdp->sd_sb.sb_bsize; 229 bh->b_bdev = sdp->sd_vfs->s_bdev; 230 bh->b_private = real; 231 bh->b_end_io = gfs2_fake_write_endio; 232 233 gfs2_log_incr_head(sdp); 234 atomic_inc(&sdp->sd_log_in_flight); 235 236 return bh; 237 } 238 239 static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type) 240 { 241 struct buffer_head *bh = gfs2_log_get_buf(sdp); 242 struct gfs2_log_descriptor *ld = bh_log_desc(bh); 243 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 244 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 245 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 246 ld->ld_type = cpu_to_be32(ld_type); 247 ld->ld_length = 0; 248 ld->ld_data1 = 0; 249 ld->ld_data2 = 0; 250 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 251 return bh; 252 } 253 254 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 255 { 256 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 257 struct gfs2_meta_header *mh; 258 struct gfs2_trans *tr; 259 260 lock_buffer(bd->bd_bh); 261 gfs2_log_lock(sdp); 262 if (!list_empty(&bd->bd_list_tr)) 263 goto out; 264 tr = current->journal_info; 265 tr->tr_touched = 1; 266 tr->tr_num_buf++; 267 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 268 if (!list_empty(&le->le_list)) 269 goto out; 270 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 271 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 272 gfs2_meta_check(sdp, bd->bd_bh); 273 gfs2_pin(sdp, bd->bd_bh); 274 mh = (struct gfs2_meta_header *)bd->bd_bh->b_data; 275 mh->__pad0 = cpu_to_be64(0); 276 mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid); 277 sdp->sd_log_num_buf++; 278 list_add(&le->le_list, &sdp->sd_log_le_buf); 279 tr->tr_num_buf_new++; 280 out: 281 gfs2_log_unlock(sdp); 282 unlock_buffer(bd->bd_bh); 283 } 284 285 static void buf_lo_before_commit(struct gfs2_sbd *sdp) 286 { 287 struct buffer_head *bh; 288 struct gfs2_log_descriptor *ld; 289 struct gfs2_bufdata *bd1 = NULL, *bd2; 290 unsigned int total; 291 unsigned int limit; 292 unsigned int num; 293 unsigned n; 294 __be64 *ptr; 295 296 limit = buf_limit(sdp); 297 /* for 4k blocks, limit = 503 */ 298 299 gfs2_log_lock(sdp); 300 total = sdp->sd_log_num_buf; 301 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 302 while(total) { 303 num = total; 304 if (total > limit) 305 num = limit; 306 gfs2_log_unlock(sdp); 307 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA); 308 gfs2_log_lock(sdp); 309 ld = bh_log_desc(bh); 310 ptr = bh_log_ptr(bh); 311 ld->ld_length = cpu_to_be32(num + 1); 312 ld->ld_data1 = cpu_to_be32(num); 313 314 n = 0; 315 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, 316 bd_le.le_list) { 317 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); 318 if (++n >= num) 319 break; 320 } 321 322 gfs2_log_unlock(sdp); 323 submit_bh(WRITE_SYNC, bh); 324 gfs2_log_lock(sdp); 325 326 n = 0; 327 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 328 bd_le.le_list) { 329 get_bh(bd2->bd_bh); 330 gfs2_log_unlock(sdp); 331 lock_buffer(bd2->bd_bh); 332 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 333 submit_bh(WRITE_SYNC, bh); 334 gfs2_log_lock(sdp); 335 if (++n >= num) 336 break; 337 } 338 339 BUG_ON(total < num); 340 total -= num; 341 } 342 gfs2_log_unlock(sdp); 343 } 344 345 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 346 { 347 struct list_head *head = &sdp->sd_log_le_buf; 348 struct gfs2_bufdata *bd; 349 350 while (!list_empty(head)) { 351 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 352 list_del_init(&bd->bd_le.le_list); 353 sdp->sd_log_num_buf--; 354 355 gfs2_unpin(sdp, bd->bd_bh, ai); 356 } 357 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); 358 } 359 360 static void buf_lo_before_scan(struct gfs2_jdesc *jd, 361 struct gfs2_log_header_host *head, int pass) 362 { 363 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 364 365 if (pass != 0) 366 return; 367 368 sdp->sd_found_blocks = 0; 369 sdp->sd_replayed_blocks = 0; 370 } 371 372 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 373 struct gfs2_log_descriptor *ld, __be64 *ptr, 374 int pass) 375 { 376 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 377 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 378 struct gfs2_glock *gl = ip->i_gl; 379 unsigned int blks = be32_to_cpu(ld->ld_data1); 380 struct buffer_head *bh_log, *bh_ip; 381 u64 blkno; 382 int error = 0; 383 384 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) 385 return 0; 386 387 gfs2_replay_incr_blk(sdp, &start); 388 389 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 390 blkno = be64_to_cpu(*ptr++); 391 392 sdp->sd_found_blocks++; 393 394 if (gfs2_revoke_check(sdp, blkno, start)) 395 continue; 396 397 error = gfs2_replay_read_block(jd, start, &bh_log); 398 if (error) 399 return error; 400 401 bh_ip = gfs2_meta_new(gl, blkno); 402 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); 403 404 if (gfs2_meta_check(sdp, bh_ip)) 405 error = -EIO; 406 else 407 mark_buffer_dirty(bh_ip); 408 409 brelse(bh_log); 410 brelse(bh_ip); 411 412 if (error) 413 break; 414 415 sdp->sd_replayed_blocks++; 416 } 417 418 return error; 419 } 420 421 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 422 { 423 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 424 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 425 426 if (error) { 427 gfs2_meta_sync(ip->i_gl); 428 return; 429 } 430 if (pass != 1) 431 return; 432 433 gfs2_meta_sync(ip->i_gl); 434 435 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", 436 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 437 } 438 439 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 440 { 441 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 442 struct gfs2_glock *gl = bd->bd_gl; 443 struct gfs2_trans *tr; 444 445 tr = current->journal_info; 446 tr->tr_touched = 1; 447 tr->tr_num_revoke++; 448 sdp->sd_log_num_revoke++; 449 atomic_inc(&gl->gl_revokes); 450 set_bit(GLF_LFLUSH, &gl->gl_flags); 451 list_add(&le->le_list, &sdp->sd_log_le_revoke); 452 } 453 454 static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 455 { 456 struct gfs2_log_descriptor *ld; 457 struct gfs2_meta_header *mh; 458 struct buffer_head *bh; 459 unsigned int offset; 460 struct list_head *head = &sdp->sd_log_le_revoke; 461 struct gfs2_bufdata *bd; 462 463 if (!sdp->sd_log_num_revoke) 464 return; 465 466 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE); 467 ld = bh_log_desc(bh); 468 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 469 sizeof(u64))); 470 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 471 offset = sizeof(struct gfs2_log_descriptor); 472 473 list_for_each_entry(bd, head, bd_le.le_list) { 474 sdp->sd_log_num_revoke--; 475 476 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 477 submit_bh(WRITE_SYNC, bh); 478 479 bh = gfs2_log_get_buf(sdp); 480 mh = (struct gfs2_meta_header *)bh->b_data; 481 mh->mh_magic = cpu_to_be32(GFS2_MAGIC); 482 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB); 483 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB); 484 offset = sizeof(struct gfs2_meta_header); 485 } 486 487 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno); 488 offset += sizeof(u64); 489 } 490 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 491 492 submit_bh(WRITE_SYNC, bh); 493 } 494 495 static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 496 { 497 struct list_head *head = &sdp->sd_log_le_revoke; 498 struct gfs2_bufdata *bd; 499 struct gfs2_glock *gl; 500 501 while (!list_empty(head)) { 502 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 503 list_del_init(&bd->bd_le.le_list); 504 gl = bd->bd_gl; 505 atomic_dec(&gl->gl_revokes); 506 clear_bit(GLF_LFLUSH, &gl->gl_flags); 507 kmem_cache_free(gfs2_bufdata_cachep, bd); 508 } 509 } 510 511 static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 512 struct gfs2_log_header_host *head, int pass) 513 { 514 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 515 516 if (pass != 0) 517 return; 518 519 sdp->sd_found_revokes = 0; 520 sdp->sd_replay_tail = head->lh_tail; 521 } 522 523 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 524 struct gfs2_log_descriptor *ld, __be64 *ptr, 525 int pass) 526 { 527 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 528 unsigned int blks = be32_to_cpu(ld->ld_length); 529 unsigned int revokes = be32_to_cpu(ld->ld_data1); 530 struct buffer_head *bh; 531 unsigned int offset; 532 u64 blkno; 533 int first = 1; 534 int error; 535 536 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE) 537 return 0; 538 539 offset = sizeof(struct gfs2_log_descriptor); 540 541 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 542 error = gfs2_replay_read_block(jd, start, &bh); 543 if (error) 544 return error; 545 546 if (!first) 547 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); 548 549 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { 550 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); 551 552 error = gfs2_revoke_add(sdp, blkno, start); 553 if (error < 0) { 554 brelse(bh); 555 return error; 556 } 557 else if (error) 558 sdp->sd_found_revokes++; 559 560 if (!--revokes) 561 break; 562 offset += sizeof(u64); 563 } 564 565 brelse(bh); 566 offset = sizeof(struct gfs2_meta_header); 567 first = 0; 568 } 569 570 return 0; 571 } 572 573 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 574 { 575 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 576 577 if (error) { 578 gfs2_revoke_clean(sdp); 579 return; 580 } 581 if (pass != 1) 582 return; 583 584 fs_info(sdp, "jid=%u: Found %u revoke tags\n", 585 jd->jd_jid, sdp->sd_found_revokes); 586 587 gfs2_revoke_clean(sdp); 588 } 589 590 /** 591 * databuf_lo_add - Add a databuf to the transaction. 592 * 593 * This is used in two distinct cases: 594 * i) In ordered write mode 595 * We put the data buffer on a list so that we can ensure that its 596 * synced to disk at the right time 597 * ii) In journaled data mode 598 * We need to journal the data block in the same way as metadata in 599 * the functions above. The difference is that here we have a tag 600 * which is two __be64's being the block number (as per meta data) 601 * and a flag which says whether the data block needs escaping or 602 * not. This means we need a new log entry for each 251 or so data 603 * blocks, which isn't an enormous overhead but twice as much as 604 * for normal metadata blocks. 605 */ 606 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 607 { 608 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 609 struct gfs2_trans *tr = current->journal_info; 610 struct address_space *mapping = bd->bd_bh->b_page->mapping; 611 struct gfs2_inode *ip = GFS2_I(mapping->host); 612 613 lock_buffer(bd->bd_bh); 614 gfs2_log_lock(sdp); 615 if (tr) { 616 if (!list_empty(&bd->bd_list_tr)) 617 goto out; 618 tr->tr_touched = 1; 619 if (gfs2_is_jdata(ip)) { 620 tr->tr_num_buf++; 621 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 622 } 623 } 624 if (!list_empty(&le->le_list)) 625 goto out; 626 627 set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags); 628 set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags); 629 if (gfs2_is_jdata(ip)) { 630 gfs2_pin(sdp, bd->bd_bh); 631 tr->tr_num_databuf_new++; 632 sdp->sd_log_num_databuf++; 633 list_add_tail(&le->le_list, &sdp->sd_log_le_databuf); 634 } else { 635 list_add_tail(&le->le_list, &sdp->sd_log_le_ordered); 636 } 637 out: 638 gfs2_log_unlock(sdp); 639 unlock_buffer(bd->bd_bh); 640 } 641 642 static void gfs2_check_magic(struct buffer_head *bh) 643 { 644 void *kaddr; 645 __be32 *ptr; 646 647 clear_buffer_escaped(bh); 648 kaddr = kmap_atomic(bh->b_page, KM_USER0); 649 ptr = kaddr + bh_offset(bh); 650 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 651 set_buffer_escaped(bh); 652 kunmap_atomic(kaddr, KM_USER0); 653 } 654 655 static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh, 656 struct list_head *list, struct list_head *done, 657 unsigned int n) 658 { 659 struct buffer_head *bh1; 660 struct gfs2_log_descriptor *ld; 661 struct gfs2_bufdata *bd; 662 __be64 *ptr; 663 664 if (!bh) 665 return; 666 667 ld = bh_log_desc(bh); 668 ld->ld_length = cpu_to_be32(n + 1); 669 ld->ld_data1 = cpu_to_be32(n); 670 671 ptr = bh_log_ptr(bh); 672 673 get_bh(bh); 674 submit_bh(WRITE_SYNC, bh); 675 gfs2_log_lock(sdp); 676 while(!list_empty(list)) { 677 bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list); 678 list_move_tail(&bd->bd_le.le_list, done); 679 get_bh(bd->bd_bh); 680 while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) { 681 gfs2_log_incr_head(sdp); 682 ptr += 2; 683 } 684 gfs2_log_unlock(sdp); 685 lock_buffer(bd->bd_bh); 686 if (buffer_escaped(bd->bd_bh)) { 687 void *kaddr; 688 bh1 = gfs2_log_get_buf(sdp); 689 kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0); 690 memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh), 691 bh1->b_size); 692 kunmap_atomic(kaddr, KM_USER0); 693 *(__be32 *)bh1->b_data = 0; 694 clear_buffer_escaped(bd->bd_bh); 695 unlock_buffer(bd->bd_bh); 696 brelse(bd->bd_bh); 697 } else { 698 bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh); 699 } 700 submit_bh(WRITE_SYNC, bh1); 701 gfs2_log_lock(sdp); 702 ptr += 2; 703 } 704 gfs2_log_unlock(sdp); 705 brelse(bh); 706 } 707 708 /** 709 * databuf_lo_before_commit - Scan the data buffers, writing as we go 710 * 711 */ 712 713 static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 714 { 715 struct gfs2_bufdata *bd = NULL; 716 struct buffer_head *bh = NULL; 717 unsigned int n = 0; 718 __be64 *ptr = NULL, *end = NULL; 719 LIST_HEAD(processed); 720 LIST_HEAD(in_progress); 721 722 gfs2_log_lock(sdp); 723 while (!list_empty(&sdp->sd_log_le_databuf)) { 724 if (ptr == end) { 725 gfs2_log_unlock(sdp); 726 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); 727 n = 0; 728 bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA); 729 ptr = bh_log_ptr(bh); 730 end = bh_ptr_end(bh) - 1; 731 gfs2_log_lock(sdp); 732 continue; 733 } 734 bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list); 735 list_move_tail(&bd->bd_le.le_list, &in_progress); 736 gfs2_check_magic(bd->bd_bh); 737 *ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr); 738 *ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0); 739 n++; 740 } 741 gfs2_log_unlock(sdp); 742 gfs2_write_blocks(sdp, bh, &in_progress, &processed, n); 743 gfs2_log_lock(sdp); 744 list_splice(&processed, &sdp->sd_log_le_databuf); 745 gfs2_log_unlock(sdp); 746 } 747 748 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 749 struct gfs2_log_descriptor *ld, 750 __be64 *ptr, int pass) 751 { 752 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 753 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 754 struct gfs2_glock *gl = ip->i_gl; 755 unsigned int blks = be32_to_cpu(ld->ld_data1); 756 struct buffer_head *bh_log, *bh_ip; 757 u64 blkno; 758 u64 esc; 759 int error = 0; 760 761 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) 762 return 0; 763 764 gfs2_replay_incr_blk(sdp, &start); 765 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 766 blkno = be64_to_cpu(*ptr++); 767 esc = be64_to_cpu(*ptr++); 768 769 sdp->sd_found_blocks++; 770 771 if (gfs2_revoke_check(sdp, blkno, start)) 772 continue; 773 774 error = gfs2_replay_read_block(jd, start, &bh_log); 775 if (error) 776 return error; 777 778 bh_ip = gfs2_meta_new(gl, blkno); 779 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); 780 781 /* Unescape */ 782 if (esc) { 783 __be32 *eptr = (__be32 *)bh_ip->b_data; 784 *eptr = cpu_to_be32(GFS2_MAGIC); 785 } 786 mark_buffer_dirty(bh_ip); 787 788 brelse(bh_log); 789 brelse(bh_ip); 790 791 sdp->sd_replayed_blocks++; 792 } 793 794 return error; 795 } 796 797 /* FIXME: sort out accounting for log blocks etc. */ 798 799 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 800 { 801 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 802 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 803 804 if (error) { 805 gfs2_meta_sync(ip->i_gl); 806 return; 807 } 808 if (pass != 1) 809 return; 810 811 /* data sync? */ 812 gfs2_meta_sync(ip->i_gl); 813 814 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", 815 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 816 } 817 818 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 819 { 820 struct list_head *head = &sdp->sd_log_le_databuf; 821 struct gfs2_bufdata *bd; 822 823 while (!list_empty(head)) { 824 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 825 list_del_init(&bd->bd_le.le_list); 826 sdp->sd_log_num_databuf--; 827 gfs2_unpin(sdp, bd->bd_bh, ai); 828 } 829 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 830 } 831 832 833 const struct gfs2_log_operations gfs2_buf_lops = { 834 .lo_add = buf_lo_add, 835 .lo_before_commit = buf_lo_before_commit, 836 .lo_after_commit = buf_lo_after_commit, 837 .lo_before_scan = buf_lo_before_scan, 838 .lo_scan_elements = buf_lo_scan_elements, 839 .lo_after_scan = buf_lo_after_scan, 840 .lo_name = "buf", 841 }; 842 843 const struct gfs2_log_operations gfs2_revoke_lops = { 844 .lo_add = revoke_lo_add, 845 .lo_before_commit = revoke_lo_before_commit, 846 .lo_after_commit = revoke_lo_after_commit, 847 .lo_before_scan = revoke_lo_before_scan, 848 .lo_scan_elements = revoke_lo_scan_elements, 849 .lo_after_scan = revoke_lo_after_scan, 850 .lo_name = "revoke", 851 }; 852 853 const struct gfs2_log_operations gfs2_rg_lops = { 854 .lo_name = "rg", 855 }; 856 857 const struct gfs2_log_operations gfs2_databuf_lops = { 858 .lo_add = databuf_lo_add, 859 .lo_before_commit = databuf_lo_before_commit, 860 .lo_after_commit = databuf_lo_after_commit, 861 .lo_scan_elements = databuf_lo_scan_elements, 862 .lo_after_scan = databuf_lo_after_scan, 863 .lo_name = "databuf", 864 }; 865 866 const struct gfs2_log_operations *gfs2_log_ops[] = { 867 &gfs2_databuf_lops, 868 &gfs2_buf_lops, 869 &gfs2_rg_lops, 870 &gfs2_revoke_lops, 871 NULL, 872 }; 873 874