1 /* 2 * Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. 3 * Copyright (C) 2004-2006 Red Hat, Inc. All rights reserved. 4 * 5 * This copyrighted material is made available to anyone wishing to use, 6 * modify, copy, or redistribute it subject to the terms and conditions 7 * of the GNU General Public License version 2. 8 */ 9 10 #include <linux/sched.h> 11 #include <linux/slab.h> 12 #include <linux/spinlock.h> 13 #include <linux/completion.h> 14 #include <linux/buffer_head.h> 15 #include <linux/gfs2_ondisk.h> 16 #include <linux/lm_interface.h> 17 18 #include "gfs2.h" 19 #include "incore.h" 20 #include "inode.h" 21 #include "glock.h" 22 #include "log.h" 23 #include "lops.h" 24 #include "meta_io.h" 25 #include "recovery.h" 26 #include "rgrp.h" 27 #include "trans.h" 28 #include "util.h" 29 30 static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 31 { 32 struct gfs2_glock *gl; 33 struct gfs2_trans *tr = current->journal_info; 34 35 tr->tr_touched = 1; 36 37 gl = container_of(le, struct gfs2_glock, gl_le); 38 if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl))) 39 return; 40 41 gfs2_log_lock(sdp); 42 if (!list_empty(&le->le_list)){ 43 gfs2_log_unlock(sdp); 44 return; 45 } 46 gfs2_glock_hold(gl); 47 set_bit(GLF_DIRTY, &gl->gl_flags); 48 sdp->sd_log_num_gl++; 49 list_add(&le->le_list, &sdp->sd_log_le_gl); 50 gfs2_log_unlock(sdp); 51 } 52 53 static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 54 { 55 struct list_head *head = &sdp->sd_log_le_gl; 56 struct gfs2_glock *gl; 57 58 while (!list_empty(head)) { 59 gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list); 60 list_del_init(&gl->gl_le.le_list); 61 sdp->sd_log_num_gl--; 62 63 gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)); 64 gfs2_glock_put(gl); 65 } 66 gfs2_assert_warn(sdp, !sdp->sd_log_num_gl); 67 } 68 69 static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 70 { 71 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 72 struct gfs2_trans *tr; 73 74 gfs2_log_lock(sdp); 75 if (!list_empty(&bd->bd_list_tr)) { 76 gfs2_log_unlock(sdp); 77 return; 78 } 79 tr = current->journal_info; 80 tr->tr_touched = 1; 81 tr->tr_num_buf++; 82 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 83 gfs2_log_unlock(sdp); 84 85 if (!list_empty(&le->le_list)) 86 return; 87 88 gfs2_trans_add_gl(bd->bd_gl); 89 90 gfs2_meta_check(sdp, bd->bd_bh); 91 gfs2_pin(sdp, bd->bd_bh); 92 gfs2_log_lock(sdp); 93 sdp->sd_log_num_buf++; 94 list_add(&le->le_list, &sdp->sd_log_le_buf); 95 gfs2_log_unlock(sdp); 96 97 tr->tr_num_buf_new++; 98 } 99 100 static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr) 101 { 102 struct list_head *head = &tr->tr_list_buf; 103 struct gfs2_bufdata *bd; 104 105 gfs2_log_lock(sdp); 106 while (!list_empty(head)) { 107 bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr); 108 list_del_init(&bd->bd_list_tr); 109 tr->tr_num_buf--; 110 } 111 gfs2_log_unlock(sdp); 112 gfs2_assert_warn(sdp, !tr->tr_num_buf); 113 } 114 115 static void buf_lo_before_commit(struct gfs2_sbd *sdp) 116 { 117 struct buffer_head *bh; 118 struct gfs2_log_descriptor *ld; 119 struct gfs2_bufdata *bd1 = NULL, *bd2; 120 unsigned int total = sdp->sd_log_num_buf; 121 unsigned int offset = BUF_OFFSET; 122 unsigned int limit; 123 unsigned int num; 124 unsigned n; 125 __be64 *ptr; 126 127 limit = buf_limit(sdp); 128 /* for 4k blocks, limit = 503 */ 129 130 bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list); 131 while(total) { 132 num = total; 133 if (total > limit) 134 num = limit; 135 bh = gfs2_log_get_buf(sdp); 136 ld = (struct gfs2_log_descriptor *)bh->b_data; 137 ptr = (__be64 *)(bh->b_data + offset); 138 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 139 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 140 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 141 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA); 142 ld->ld_length = cpu_to_be32(num + 1); 143 ld->ld_data1 = cpu_to_be32(num); 144 ld->ld_data2 = cpu_to_be32(0); 145 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 146 147 n = 0; 148 list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf, 149 bd_le.le_list) { 150 *ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr); 151 if (++n >= num) 152 break; 153 } 154 155 set_buffer_dirty(bh); 156 ll_rw_block(WRITE, 1, &bh); 157 158 n = 0; 159 list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf, 160 bd_le.le_list) { 161 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 162 set_buffer_dirty(bh); 163 ll_rw_block(WRITE, 1, &bh); 164 if (++n >= num) 165 break; 166 } 167 168 total -= num; 169 } 170 } 171 172 static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 173 { 174 struct list_head *head = &sdp->sd_log_le_buf; 175 struct gfs2_bufdata *bd; 176 177 while (!list_empty(head)) { 178 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 179 list_del_init(&bd->bd_le.le_list); 180 sdp->sd_log_num_buf--; 181 182 gfs2_unpin(sdp, bd->bd_bh, ai); 183 } 184 gfs2_assert_warn(sdp, !sdp->sd_log_num_buf); 185 } 186 187 static void buf_lo_before_scan(struct gfs2_jdesc *jd, 188 struct gfs2_log_header_host *head, int pass) 189 { 190 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 191 192 if (pass != 0) 193 return; 194 195 sdp->sd_found_blocks = 0; 196 sdp->sd_replayed_blocks = 0; 197 } 198 199 static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 200 struct gfs2_log_descriptor *ld, __be64 *ptr, 201 int pass) 202 { 203 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 204 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 205 struct gfs2_glock *gl = ip->i_gl; 206 unsigned int blks = be32_to_cpu(ld->ld_data1); 207 struct buffer_head *bh_log, *bh_ip; 208 u64 blkno; 209 int error = 0; 210 211 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA) 212 return 0; 213 214 gfs2_replay_incr_blk(sdp, &start); 215 216 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 217 blkno = be64_to_cpu(*ptr++); 218 219 sdp->sd_found_blocks++; 220 221 if (gfs2_revoke_check(sdp, blkno, start)) 222 continue; 223 224 error = gfs2_replay_read_block(jd, start, &bh_log); 225 if (error) 226 return error; 227 228 bh_ip = gfs2_meta_new(gl, blkno); 229 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); 230 231 if (gfs2_meta_check(sdp, bh_ip)) 232 error = -EIO; 233 else 234 mark_buffer_dirty(bh_ip); 235 236 brelse(bh_log); 237 brelse(bh_ip); 238 239 if (error) 240 break; 241 242 sdp->sd_replayed_blocks++; 243 } 244 245 return error; 246 } 247 248 static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 249 { 250 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 251 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 252 253 if (error) { 254 gfs2_meta_sync(ip->i_gl); 255 return; 256 } 257 if (pass != 1) 258 return; 259 260 gfs2_meta_sync(ip->i_gl); 261 262 fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n", 263 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 264 } 265 266 static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 267 { 268 struct gfs2_trans *tr; 269 270 tr = current->journal_info; 271 tr->tr_touched = 1; 272 tr->tr_num_revoke++; 273 274 gfs2_log_lock(sdp); 275 sdp->sd_log_num_revoke++; 276 list_add(&le->le_list, &sdp->sd_log_le_revoke); 277 gfs2_log_unlock(sdp); 278 } 279 280 static void revoke_lo_before_commit(struct gfs2_sbd *sdp) 281 { 282 struct gfs2_log_descriptor *ld; 283 struct gfs2_meta_header *mh; 284 struct buffer_head *bh; 285 unsigned int offset; 286 struct list_head *head = &sdp->sd_log_le_revoke; 287 struct gfs2_revoke *rv; 288 289 if (!sdp->sd_log_num_revoke) 290 return; 291 292 bh = gfs2_log_get_buf(sdp); 293 ld = (struct gfs2_log_descriptor *)bh->b_data; 294 ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC); 295 ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD); 296 ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD); 297 ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE); 298 ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke, 299 sizeof(u64))); 300 ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke); 301 ld->ld_data2 = cpu_to_be32(0); 302 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 303 offset = sizeof(struct gfs2_log_descriptor); 304 305 while (!list_empty(head)) { 306 rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list); 307 list_del_init(&rv->rv_le.le_list); 308 sdp->sd_log_num_revoke--; 309 310 if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) { 311 set_buffer_dirty(bh); 312 ll_rw_block(WRITE, 1, &bh); 313 314 bh = gfs2_log_get_buf(sdp); 315 mh = (struct gfs2_meta_header *)bh->b_data; 316 mh->mh_magic = cpu_to_be32(GFS2_MAGIC); 317 mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB); 318 mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB); 319 offset = sizeof(struct gfs2_meta_header); 320 } 321 322 *(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno); 323 kfree(rv); 324 325 offset += sizeof(u64); 326 } 327 gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke); 328 329 set_buffer_dirty(bh); 330 ll_rw_block(WRITE, 1, &bh); 331 } 332 333 static void revoke_lo_before_scan(struct gfs2_jdesc *jd, 334 struct gfs2_log_header_host *head, int pass) 335 { 336 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 337 338 if (pass != 0) 339 return; 340 341 sdp->sd_found_revokes = 0; 342 sdp->sd_replay_tail = head->lh_tail; 343 } 344 345 static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 346 struct gfs2_log_descriptor *ld, __be64 *ptr, 347 int pass) 348 { 349 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 350 unsigned int blks = be32_to_cpu(ld->ld_length); 351 unsigned int revokes = be32_to_cpu(ld->ld_data1); 352 struct buffer_head *bh; 353 unsigned int offset; 354 u64 blkno; 355 int first = 1; 356 int error; 357 358 if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE) 359 return 0; 360 361 offset = sizeof(struct gfs2_log_descriptor); 362 363 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 364 error = gfs2_replay_read_block(jd, start, &bh); 365 if (error) 366 return error; 367 368 if (!first) 369 gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB); 370 371 while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) { 372 blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset)); 373 374 error = gfs2_revoke_add(sdp, blkno, start); 375 if (error < 0) 376 return error; 377 else if (error) 378 sdp->sd_found_revokes++; 379 380 if (!--revokes) 381 break; 382 offset += sizeof(u64); 383 } 384 385 brelse(bh); 386 offset = sizeof(struct gfs2_meta_header); 387 first = 0; 388 } 389 390 return 0; 391 } 392 393 static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 394 { 395 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 396 397 if (error) { 398 gfs2_revoke_clean(sdp); 399 return; 400 } 401 if (pass != 1) 402 return; 403 404 fs_info(sdp, "jid=%u: Found %u revoke tags\n", 405 jd->jd_jid, sdp->sd_found_revokes); 406 407 gfs2_revoke_clean(sdp); 408 } 409 410 static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 411 { 412 struct gfs2_rgrpd *rgd; 413 struct gfs2_trans *tr = current->journal_info; 414 415 tr->tr_touched = 1; 416 417 rgd = container_of(le, struct gfs2_rgrpd, rd_le); 418 419 gfs2_log_lock(sdp); 420 if (!list_empty(&le->le_list)){ 421 gfs2_log_unlock(sdp); 422 return; 423 } 424 gfs2_rgrp_bh_hold(rgd); 425 sdp->sd_log_num_rg++; 426 list_add(&le->le_list, &sdp->sd_log_le_rg); 427 gfs2_log_unlock(sdp); 428 } 429 430 static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 431 { 432 struct list_head *head = &sdp->sd_log_le_rg; 433 struct gfs2_rgrpd *rgd; 434 435 while (!list_empty(head)) { 436 rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list); 437 list_del_init(&rgd->rd_le.le_list); 438 sdp->sd_log_num_rg--; 439 440 gfs2_rgrp_repolish_clones(rgd); 441 gfs2_rgrp_bh_put(rgd); 442 } 443 gfs2_assert_warn(sdp, !sdp->sd_log_num_rg); 444 } 445 446 /** 447 * databuf_lo_add - Add a databuf to the transaction. 448 * 449 * This is used in two distinct cases: 450 * i) In ordered write mode 451 * We put the data buffer on a list so that we can ensure that its 452 * synced to disk at the right time 453 * ii) In journaled data mode 454 * We need to journal the data block in the same way as metadata in 455 * the functions above. The difference is that here we have a tag 456 * which is two __be64's being the block number (as per meta data) 457 * and a flag which says whether the data block needs escaping or 458 * not. This means we need a new log entry for each 251 or so data 459 * blocks, which isn't an enormous overhead but twice as much as 460 * for normal metadata blocks. 461 */ 462 static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le) 463 { 464 struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le); 465 struct gfs2_trans *tr = current->journal_info; 466 struct address_space *mapping = bd->bd_bh->b_page->mapping; 467 struct gfs2_inode *ip = GFS2_I(mapping->host); 468 469 gfs2_log_lock(sdp); 470 if (!list_empty(&bd->bd_list_tr)) { 471 gfs2_log_unlock(sdp); 472 return; 473 } 474 tr->tr_touched = 1; 475 if (gfs2_is_jdata(ip)) { 476 tr->tr_num_buf++; 477 list_add(&bd->bd_list_tr, &tr->tr_list_buf); 478 } 479 gfs2_log_unlock(sdp); 480 if (!list_empty(&le->le_list)) 481 return; 482 483 gfs2_trans_add_gl(bd->bd_gl); 484 if (gfs2_is_jdata(ip)) { 485 sdp->sd_log_num_jdata++; 486 gfs2_pin(sdp, bd->bd_bh); 487 tr->tr_num_databuf_new++; 488 } 489 gfs2_log_lock(sdp); 490 sdp->sd_log_num_databuf++; 491 list_add(&le->le_list, &sdp->sd_log_le_databuf); 492 gfs2_log_unlock(sdp); 493 } 494 495 static int gfs2_check_magic(struct buffer_head *bh) 496 { 497 struct page *page = bh->b_page; 498 void *kaddr; 499 __be32 *ptr; 500 int rv = 0; 501 502 kaddr = kmap_atomic(page, KM_USER0); 503 ptr = kaddr + bh_offset(bh); 504 if (*ptr == cpu_to_be32(GFS2_MAGIC)) 505 rv = 1; 506 kunmap_atomic(kaddr, KM_USER0); 507 508 return rv; 509 } 510 511 /** 512 * databuf_lo_before_commit - Scan the data buffers, writing as we go 513 * 514 * Here we scan through the lists of buffers and make the assumption 515 * that any buffer thats been pinned is being journaled, and that 516 * any unpinned buffer is an ordered write data buffer and therefore 517 * will be written back rather than journaled. 518 */ 519 static void databuf_lo_before_commit(struct gfs2_sbd *sdp) 520 { 521 LIST_HEAD(started); 522 struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt; 523 struct buffer_head *bh = NULL,*bh1 = NULL; 524 struct gfs2_log_descriptor *ld; 525 unsigned int limit; 526 unsigned int total_dbuf; 527 unsigned int total_jdata = sdp->sd_log_num_jdata; 528 unsigned int num, n; 529 __be64 *ptr = NULL; 530 531 limit = databuf_limit(sdp); 532 533 /* 534 * Start writing ordered buffers, write journaled buffers 535 * into the log along with a header 536 */ 537 gfs2_log_lock(sdp); 538 total_dbuf = sdp->sd_log_num_databuf; 539 bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf, 540 bd_le.le_list); 541 while(total_dbuf) { 542 num = total_jdata; 543 if (num > limit) 544 num = limit; 545 n = 0; 546 list_for_each_entry_safe_continue(bd1, bdt, 547 &sdp->sd_log_le_databuf, 548 bd_le.le_list) { 549 /* store off the buffer head in a local ptr since 550 * gfs2_bufdata might change when we drop the log lock 551 */ 552 bh1 = bd1->bd_bh; 553 554 /* An ordered write buffer */ 555 if (bh1 && !buffer_pinned(bh1)) { 556 list_move(&bd1->bd_le.le_list, &started); 557 if (bd1 == bd2) { 558 bd2 = NULL; 559 bd2 = list_prepare_entry(bd2, 560 &sdp->sd_log_le_databuf, 561 bd_le.le_list); 562 } 563 total_dbuf--; 564 if (bh1) { 565 if (buffer_dirty(bh1)) { 566 get_bh(bh1); 567 568 gfs2_log_unlock(sdp); 569 570 ll_rw_block(SWRITE, 1, &bh1); 571 brelse(bh1); 572 573 gfs2_log_lock(sdp); 574 } 575 continue; 576 } 577 continue; 578 } else if (bh1) { /* A journaled buffer */ 579 int magic; 580 gfs2_log_unlock(sdp); 581 if (!bh) { 582 bh = gfs2_log_get_buf(sdp); 583 ld = (struct gfs2_log_descriptor *) 584 bh->b_data; 585 ptr = (__be64 *)(bh->b_data + 586 DATABUF_OFFSET); 587 ld->ld_header.mh_magic = 588 cpu_to_be32(GFS2_MAGIC); 589 ld->ld_header.mh_type = 590 cpu_to_be32(GFS2_METATYPE_LD); 591 ld->ld_header.mh_format = 592 cpu_to_be32(GFS2_FORMAT_LD); 593 ld->ld_type = 594 cpu_to_be32(GFS2_LOG_DESC_JDATA); 595 ld->ld_length = cpu_to_be32(num + 1); 596 ld->ld_data1 = cpu_to_be32(num); 597 ld->ld_data2 = cpu_to_be32(0); 598 memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved)); 599 } 600 magic = gfs2_check_magic(bh1); 601 *ptr++ = cpu_to_be64(bh1->b_blocknr); 602 *ptr++ = cpu_to_be64((__u64)magic); 603 clear_buffer_escaped(bh1); 604 if (unlikely(magic != 0)) 605 set_buffer_escaped(bh1); 606 gfs2_log_lock(sdp); 607 if (++n >= num) 608 break; 609 } else if (!bh1) { 610 total_dbuf--; 611 sdp->sd_log_num_databuf--; 612 list_del_init(&bd1->bd_le.le_list); 613 if (bd1 == bd2) { 614 bd2 = NULL; 615 bd2 = list_prepare_entry(bd2, 616 &sdp->sd_log_le_databuf, 617 bd_le.le_list); 618 } 619 kmem_cache_free(gfs2_bufdata_cachep, bd1); 620 } 621 } 622 gfs2_log_unlock(sdp); 623 if (bh) { 624 set_buffer_mapped(bh); 625 set_buffer_dirty(bh); 626 ll_rw_block(WRITE, 1, &bh); 627 bh = NULL; 628 } 629 n = 0; 630 gfs2_log_lock(sdp); 631 list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf, 632 bd_le.le_list) { 633 if (!bd2->bd_bh) 634 continue; 635 /* copy buffer if it needs escaping */ 636 gfs2_log_unlock(sdp); 637 if (unlikely(buffer_escaped(bd2->bd_bh))) { 638 void *kaddr; 639 struct page *page = bd2->bd_bh->b_page; 640 bh = gfs2_log_get_buf(sdp); 641 kaddr = kmap_atomic(page, KM_USER0); 642 memcpy(bh->b_data, 643 kaddr + bh_offset(bd2->bd_bh), 644 sdp->sd_sb.sb_bsize); 645 kunmap_atomic(kaddr, KM_USER0); 646 *(__be32 *)bh->b_data = 0; 647 } else { 648 bh = gfs2_log_fake_buf(sdp, bd2->bd_bh); 649 } 650 set_buffer_dirty(bh); 651 ll_rw_block(WRITE, 1, &bh); 652 gfs2_log_lock(sdp); 653 if (++n >= num) 654 break; 655 } 656 bh = NULL; 657 BUG_ON(total_dbuf < num); 658 total_dbuf -= num; 659 total_jdata -= num; 660 } 661 gfs2_log_unlock(sdp); 662 663 /* Wait on all ordered buffers */ 664 while (!list_empty(&started)) { 665 gfs2_log_lock(sdp); 666 bd1 = list_entry(started.next, struct gfs2_bufdata, 667 bd_le.le_list); 668 list_del_init(&bd1->bd_le.le_list); 669 sdp->sd_log_num_databuf--; 670 bh = bd1->bd_bh; 671 if (bh) { 672 bh->b_private = NULL; 673 get_bh(bh); 674 gfs2_log_unlock(sdp); 675 wait_on_buffer(bh); 676 brelse(bh); 677 } else 678 gfs2_log_unlock(sdp); 679 680 kmem_cache_free(gfs2_bufdata_cachep, bd1); 681 } 682 683 /* We've removed all the ordered write bufs here, so only jdata left */ 684 gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata); 685 } 686 687 static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start, 688 struct gfs2_log_descriptor *ld, 689 __be64 *ptr, int pass) 690 { 691 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 692 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 693 struct gfs2_glock *gl = ip->i_gl; 694 unsigned int blks = be32_to_cpu(ld->ld_data1); 695 struct buffer_head *bh_log, *bh_ip; 696 u64 blkno; 697 u64 esc; 698 int error = 0; 699 700 if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA) 701 return 0; 702 703 gfs2_replay_incr_blk(sdp, &start); 704 for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) { 705 blkno = be64_to_cpu(*ptr++); 706 esc = be64_to_cpu(*ptr++); 707 708 sdp->sd_found_blocks++; 709 710 if (gfs2_revoke_check(sdp, blkno, start)) 711 continue; 712 713 error = gfs2_replay_read_block(jd, start, &bh_log); 714 if (error) 715 return error; 716 717 bh_ip = gfs2_meta_new(gl, blkno); 718 memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size); 719 720 /* Unescape */ 721 if (esc) { 722 __be32 *eptr = (__be32 *)bh_ip->b_data; 723 *eptr = cpu_to_be32(GFS2_MAGIC); 724 } 725 mark_buffer_dirty(bh_ip); 726 727 brelse(bh_log); 728 brelse(bh_ip); 729 if (error) 730 break; 731 732 sdp->sd_replayed_blocks++; 733 } 734 735 return error; 736 } 737 738 /* FIXME: sort out accounting for log blocks etc. */ 739 740 static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass) 741 { 742 struct gfs2_inode *ip = GFS2_I(jd->jd_inode); 743 struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode); 744 745 if (error) { 746 gfs2_meta_sync(ip->i_gl); 747 return; 748 } 749 if (pass != 1) 750 return; 751 752 /* data sync? */ 753 gfs2_meta_sync(ip->i_gl); 754 755 fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n", 756 jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks); 757 } 758 759 static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai) 760 { 761 struct list_head *head = &sdp->sd_log_le_databuf; 762 struct gfs2_bufdata *bd; 763 764 while (!list_empty(head)) { 765 bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list); 766 list_del_init(&bd->bd_le.le_list); 767 sdp->sd_log_num_databuf--; 768 sdp->sd_log_num_jdata--; 769 gfs2_unpin(sdp, bd->bd_bh, ai); 770 } 771 gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf); 772 gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata); 773 } 774 775 776 const struct gfs2_log_operations gfs2_glock_lops = { 777 .lo_add = glock_lo_add, 778 .lo_after_commit = glock_lo_after_commit, 779 .lo_name = "glock", 780 }; 781 782 const struct gfs2_log_operations gfs2_buf_lops = { 783 .lo_add = buf_lo_add, 784 .lo_incore_commit = buf_lo_incore_commit, 785 .lo_before_commit = buf_lo_before_commit, 786 .lo_after_commit = buf_lo_after_commit, 787 .lo_before_scan = buf_lo_before_scan, 788 .lo_scan_elements = buf_lo_scan_elements, 789 .lo_after_scan = buf_lo_after_scan, 790 .lo_name = "buf", 791 }; 792 793 const struct gfs2_log_operations gfs2_revoke_lops = { 794 .lo_add = revoke_lo_add, 795 .lo_before_commit = revoke_lo_before_commit, 796 .lo_before_scan = revoke_lo_before_scan, 797 .lo_scan_elements = revoke_lo_scan_elements, 798 .lo_after_scan = revoke_lo_after_scan, 799 .lo_name = "revoke", 800 }; 801 802 const struct gfs2_log_operations gfs2_rg_lops = { 803 .lo_add = rg_lo_add, 804 .lo_after_commit = rg_lo_after_commit, 805 .lo_name = "rg", 806 }; 807 808 const struct gfs2_log_operations gfs2_databuf_lops = { 809 .lo_add = databuf_lo_add, 810 .lo_incore_commit = buf_lo_incore_commit, 811 .lo_before_commit = databuf_lo_before_commit, 812 .lo_after_commit = databuf_lo_after_commit, 813 .lo_scan_elements = databuf_lo_scan_elements, 814 .lo_after_scan = databuf_lo_after_scan, 815 .lo_name = "databuf", 816 }; 817 818 const struct gfs2_log_operations *gfs2_log_ops[] = { 819 &gfs2_glock_lops, 820 &gfs2_buf_lops, 821 &gfs2_revoke_lops, 822 &gfs2_rg_lops, 823 &gfs2_databuf_lops, 824 NULL, 825 }; 826 827