1 /* 2 * Copyright (C) International Business Machines Corp., 2000-2004 3 * Portions Copyright (C) Christoph Hellwig, 2001-2002 4 * 5 * This program is free software; you can redistribute it and/or modify 6 * it under the terms of the GNU General Public License as published by 7 * the Free Software Foundation; either version 2 of the License, or 8 * (at your option) any later version. 9 * 10 * This program is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See 13 * the GNU General Public License for more details. 14 * 15 * You should have received a copy of the GNU General Public License 16 * along with this program; if not, write to the Free Software 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 18 */ 19 20 /* 21 * jfs_logmgr.c: log manager 22 * 23 * for related information, see transaction manager (jfs_txnmgr.c), and 24 * recovery manager (jfs_logredo.c). 25 * 26 * note: for detail, RTFS. 27 * 28 * log buffer manager: 29 * special purpose buffer manager supporting log i/o requirements. 30 * per log serial pageout of logpage 31 * queuing i/o requests and redrive i/o at iodone 32 * maintain current logpage buffer 33 * no caching since append only 34 * appropriate jfs buffer cache buffers as needed 35 * 36 * group commit: 37 * transactions which wrote COMMIT records in the same in-memory 38 * log page during the pageout of previous/current log page(s) are 39 * committed together by the pageout of the page. 40 * 41 * TBD lazy commit: 42 * transactions are committed asynchronously when the log page 43 * containing it COMMIT is paged out when it becomes full; 44 * 45 * serialization: 46 * . a per log lock serialize log write. 47 * . a per log lock serialize group commit. 48 * . a per log lock serialize log open/close; 49 * 50 * TBD log integrity: 51 * careful-write (ping-pong) of last logpage to recover from crash 52 * in overwrite. 53 * detection of split (out-of-order) write of physical sectors 54 * of last logpage via timestamp at end of each sector 55 * with its mirror data array at trailer). 56 * 57 * alternatives: 58 * lsn - 64-bit monotonically increasing integer vs 59 * 32-bit lspn and page eor. 60 */ 61 62 #include <linux/fs.h> 63 #include <linux/blkdev.h> 64 #include <linux/interrupt.h> 65 #include <linux/smp_lock.h> 66 #include <linux/completion.h> 67 #include <linux/buffer_head.h> /* for sync_blockdev() */ 68 #include <linux/bio.h> 69 #include <linux/suspend.h> 70 #include <linux/delay.h> 71 #include "jfs_incore.h" 72 #include "jfs_filsys.h" 73 #include "jfs_metapage.h" 74 #include "jfs_txnmgr.h" 75 #include "jfs_debug.h" 76 77 78 /* 79 * lbuf's ready to be redriven. Protected by log_redrive_lock (jfsIO thread) 80 */ 81 static struct lbuf *log_redrive_list; 82 static DEFINE_SPINLOCK(log_redrive_lock); 83 DECLARE_WAIT_QUEUE_HEAD(jfs_IO_thread_wait); 84 85 86 /* 87 * log read/write serialization (per log) 88 */ 89 #define LOG_LOCK_INIT(log) init_MUTEX(&(log)->loglock) 90 #define LOG_LOCK(log) down(&((log)->loglock)) 91 #define LOG_UNLOCK(log) up(&((log)->loglock)) 92 93 94 /* 95 * log group commit serialization (per log) 96 */ 97 98 #define LOGGC_LOCK_INIT(log) spin_lock_init(&(log)->gclock) 99 #define LOGGC_LOCK(log) spin_lock_irq(&(log)->gclock) 100 #define LOGGC_UNLOCK(log) spin_unlock_irq(&(log)->gclock) 101 #define LOGGC_WAKEUP(tblk) wake_up_all(&(tblk)->gcwait) 102 103 /* 104 * log sync serialization (per log) 105 */ 106 #define LOGSYNC_DELTA(logsize) min((logsize)/8, 128*LOGPSIZE) 107 #define LOGSYNC_BARRIER(logsize) ((logsize)/4) 108 /* 109 #define LOGSYNC_DELTA(logsize) min((logsize)/4, 256*LOGPSIZE) 110 #define LOGSYNC_BARRIER(logsize) ((logsize)/2) 111 */ 112 113 114 /* 115 * log buffer cache synchronization 116 */ 117 static DEFINE_SPINLOCK(jfsLCacheLock); 118 119 #define LCACHE_LOCK(flags) spin_lock_irqsave(&jfsLCacheLock, flags) 120 #define LCACHE_UNLOCK(flags) spin_unlock_irqrestore(&jfsLCacheLock, flags) 121 122 /* 123 * See __SLEEP_COND in jfs_locks.h 124 */ 125 #define LCACHE_SLEEP_COND(wq, cond, flags) \ 126 do { \ 127 if (cond) \ 128 break; \ 129 __SLEEP_COND(wq, cond, LCACHE_LOCK(flags), LCACHE_UNLOCK(flags)); \ 130 } while (0) 131 132 #define LCACHE_WAKEUP(event) wake_up(event) 133 134 135 /* 136 * lbuf buffer cache (lCache) control 137 */ 138 /* log buffer manager pageout control (cumulative, inclusive) */ 139 #define lbmREAD 0x0001 140 #define lbmWRITE 0x0002 /* enqueue at tail of write queue; 141 * init pageout if at head of queue; 142 */ 143 #define lbmRELEASE 0x0004 /* remove from write queue 144 * at completion of pageout; 145 * do not free/recycle it yet: 146 * caller will free it; 147 */ 148 #define lbmSYNC 0x0008 /* do not return to freelist 149 * when removed from write queue; 150 */ 151 #define lbmFREE 0x0010 /* return to freelist 152 * at completion of pageout; 153 * the buffer may be recycled; 154 */ 155 #define lbmDONE 0x0020 156 #define lbmERROR 0x0040 157 #define lbmGC 0x0080 /* lbmIODone to perform post-GC processing 158 * of log page 159 */ 160 #define lbmDIRECT 0x0100 161 162 /* 163 * Global list of active external journals 164 */ 165 static LIST_HEAD(jfs_external_logs); 166 static struct jfs_log *dummy_log = NULL; 167 static DECLARE_MUTEX(jfs_log_sem); 168 169 /* 170 * external references 171 */ 172 extern void txLazyUnlock(struct tblock * tblk); 173 extern int jfs_stop_threads; 174 extern struct completion jfsIOwait; 175 extern int jfs_tlocks_low; 176 177 /* 178 * forward references 179 */ 180 static int lmWriteRecord(struct jfs_log * log, struct tblock * tblk, 181 struct lrd * lrd, struct tlock * tlck); 182 183 static int lmNextPage(struct jfs_log * log); 184 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, 185 int activate); 186 187 static int open_inline_log(struct super_block *sb); 188 static int open_dummy_log(struct super_block *sb); 189 static int lbmLogInit(struct jfs_log * log); 190 static void lbmLogShutdown(struct jfs_log * log); 191 static struct lbuf *lbmAllocate(struct jfs_log * log, int); 192 static void lbmFree(struct lbuf * bp); 193 static void lbmfree(struct lbuf * bp); 194 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp); 195 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, int cant_block); 196 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag); 197 static int lbmIOWait(struct lbuf * bp, int flag); 198 static bio_end_io_t lbmIODone; 199 static void lbmStartIO(struct lbuf * bp); 200 static void lmGCwrite(struct jfs_log * log, int cant_block); 201 static int lmLogSync(struct jfs_log * log, int nosyncwait); 202 203 204 205 /* 206 * statistics 207 */ 208 #ifdef CONFIG_JFS_STATISTICS 209 static struct lmStat { 210 uint commit; /* # of commit */ 211 uint pagedone; /* # of page written */ 212 uint submitted; /* # of pages submitted */ 213 uint full_page; /* # of full pages submitted */ 214 uint partial_page; /* # of partial pages submitted */ 215 } lmStat; 216 #endif 217 218 219 /* 220 * NAME: lmLog() 221 * 222 * FUNCTION: write a log record; 223 * 224 * PARAMETER: 225 * 226 * RETURN: lsn - offset to the next log record to write (end-of-log); 227 * -1 - error; 228 * 229 * note: todo: log error handler 230 */ 231 int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 232 struct tlock * tlck) 233 { 234 int lsn; 235 int diffp, difft; 236 struct metapage *mp = NULL; 237 238 jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p", 239 log, tblk, lrd, tlck); 240 241 LOG_LOCK(log); 242 243 /* log by (out-of-transaction) JFS ? */ 244 if (tblk == NULL) 245 goto writeRecord; 246 247 /* log from page ? */ 248 if (tlck == NULL || 249 tlck->type & tlckBTROOT || (mp = tlck->mp) == NULL) 250 goto writeRecord; 251 252 /* 253 * initialize/update page/transaction recovery lsn 254 */ 255 lsn = log->lsn; 256 257 LOGSYNC_LOCK(log); 258 259 /* 260 * initialize page lsn if first log write of the page 261 */ 262 if (mp->lsn == 0) { 263 mp->log = log; 264 mp->lsn = lsn; 265 log->count++; 266 267 /* insert page at tail of logsynclist */ 268 list_add_tail(&mp->synclist, &log->synclist); 269 } 270 271 /* 272 * initialize/update lsn of tblock of the page 273 * 274 * transaction inherits oldest lsn of pages associated 275 * with allocation/deallocation of resources (their 276 * log records are used to reconstruct allocation map 277 * at recovery time: inode for inode allocation map, 278 * B+-tree index of extent descriptors for block 279 * allocation map); 280 * allocation map pages inherit transaction lsn at 281 * commit time to allow forwarding log syncpt past log 282 * records associated with allocation/deallocation of 283 * resources only after persistent map of these map pages 284 * have been updated and propagated to home. 285 */ 286 /* 287 * initialize transaction lsn: 288 */ 289 if (tblk->lsn == 0) { 290 /* inherit lsn of its first page logged */ 291 tblk->lsn = mp->lsn; 292 log->count++; 293 294 /* insert tblock after the page on logsynclist */ 295 list_add(&tblk->synclist, &mp->synclist); 296 } 297 /* 298 * update transaction lsn: 299 */ 300 else { 301 /* inherit oldest/smallest lsn of page */ 302 logdiff(diffp, mp->lsn, log); 303 logdiff(difft, tblk->lsn, log); 304 if (diffp < difft) { 305 /* update tblock lsn with page lsn */ 306 tblk->lsn = mp->lsn; 307 308 /* move tblock after page on logsynclist */ 309 list_move(&tblk->synclist, &mp->synclist); 310 } 311 } 312 313 LOGSYNC_UNLOCK(log); 314 315 /* 316 * write the log record 317 */ 318 writeRecord: 319 lsn = lmWriteRecord(log, tblk, lrd, tlck); 320 321 /* 322 * forward log syncpt if log reached next syncpt trigger 323 */ 324 logdiff(diffp, lsn, log); 325 if (diffp >= log->nextsync) 326 lsn = lmLogSync(log, 0); 327 328 /* update end-of-log lsn */ 329 log->lsn = lsn; 330 331 LOG_UNLOCK(log); 332 333 /* return end-of-log address */ 334 return lsn; 335 } 336 337 338 /* 339 * NAME: lmWriteRecord() 340 * 341 * FUNCTION: move the log record to current log page 342 * 343 * PARAMETER: cd - commit descriptor 344 * 345 * RETURN: end-of-log address 346 * 347 * serialization: LOG_LOCK() held on entry/exit 348 */ 349 static int 350 lmWriteRecord(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd, 351 struct tlock * tlck) 352 { 353 int lsn = 0; /* end-of-log address */ 354 struct lbuf *bp; /* dst log page buffer */ 355 struct logpage *lp; /* dst log page */ 356 caddr_t dst; /* destination address in log page */ 357 int dstoffset; /* end-of-log offset in log page */ 358 int freespace; /* free space in log page */ 359 caddr_t p; /* src meta-data page */ 360 caddr_t src; 361 int srclen; 362 int nbytes; /* number of bytes to move */ 363 int i; 364 int len; 365 struct linelock *linelock; 366 struct lv *lv; 367 struct lvd *lvd; 368 int l2linesize; 369 370 len = 0; 371 372 /* retrieve destination log page to write */ 373 bp = (struct lbuf *) log->bp; 374 lp = (struct logpage *) bp->l_ldata; 375 dstoffset = log->eor; 376 377 /* any log data to write ? */ 378 if (tlck == NULL) 379 goto moveLrd; 380 381 /* 382 * move log record data 383 */ 384 /* retrieve source meta-data page to log */ 385 if (tlck->flag & tlckPAGELOCK) { 386 p = (caddr_t) (tlck->mp->data); 387 linelock = (struct linelock *) & tlck->lock; 388 } 389 /* retrieve source in-memory inode to log */ 390 else if (tlck->flag & tlckINODELOCK) { 391 if (tlck->type & tlckDTREE) 392 p = (caddr_t) &JFS_IP(tlck->ip)->i_dtroot; 393 else 394 p = (caddr_t) &JFS_IP(tlck->ip)->i_xtroot; 395 linelock = (struct linelock *) & tlck->lock; 396 } 397 #ifdef _JFS_WIP 398 else if (tlck->flag & tlckINLINELOCK) { 399 400 inlinelock = (struct inlinelock *) & tlck; 401 p = (caddr_t) & inlinelock->pxd; 402 linelock = (struct linelock *) & tlck; 403 } 404 #endif /* _JFS_WIP */ 405 else { 406 jfs_err("lmWriteRecord: UFO tlck:0x%p", tlck); 407 return 0; /* Probably should trap */ 408 } 409 l2linesize = linelock->l2linesize; 410 411 moveData: 412 ASSERT(linelock->index <= linelock->maxcnt); 413 414 lv = linelock->lv; 415 for (i = 0; i < linelock->index; i++, lv++) { 416 if (lv->length == 0) 417 continue; 418 419 /* is page full ? */ 420 if (dstoffset >= LOGPSIZE - LOGPTLRSIZE) { 421 /* page become full: move on to next page */ 422 lmNextPage(log); 423 424 bp = log->bp; 425 lp = (struct logpage *) bp->l_ldata; 426 dstoffset = LOGPHDRSIZE; 427 } 428 429 /* 430 * move log vector data 431 */ 432 src = (u8 *) p + (lv->offset << l2linesize); 433 srclen = lv->length << l2linesize; 434 len += srclen; 435 while (srclen > 0) { 436 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; 437 nbytes = min(freespace, srclen); 438 dst = (caddr_t) lp + dstoffset; 439 memcpy(dst, src, nbytes); 440 dstoffset += nbytes; 441 442 /* is page not full ? */ 443 if (dstoffset < LOGPSIZE - LOGPTLRSIZE) 444 break; 445 446 /* page become full: move on to next page */ 447 lmNextPage(log); 448 449 bp = (struct lbuf *) log->bp; 450 lp = (struct logpage *) bp->l_ldata; 451 dstoffset = LOGPHDRSIZE; 452 453 srclen -= nbytes; 454 src += nbytes; 455 } 456 457 /* 458 * move log vector descriptor 459 */ 460 len += 4; 461 lvd = (struct lvd *) ((caddr_t) lp + dstoffset); 462 lvd->offset = cpu_to_le16(lv->offset); 463 lvd->length = cpu_to_le16(lv->length); 464 dstoffset += 4; 465 jfs_info("lmWriteRecord: lv offset:%d length:%d", 466 lv->offset, lv->length); 467 } 468 469 if ((i = linelock->next)) { 470 linelock = (struct linelock *) lid_to_tlock(i); 471 goto moveData; 472 } 473 474 /* 475 * move log record descriptor 476 */ 477 moveLrd: 478 lrd->length = cpu_to_le16(len); 479 480 src = (caddr_t) lrd; 481 srclen = LOGRDSIZE; 482 483 while (srclen > 0) { 484 freespace = (LOGPSIZE - LOGPTLRSIZE) - dstoffset; 485 nbytes = min(freespace, srclen); 486 dst = (caddr_t) lp + dstoffset; 487 memcpy(dst, src, nbytes); 488 489 dstoffset += nbytes; 490 srclen -= nbytes; 491 492 /* are there more to move than freespace of page ? */ 493 if (srclen) 494 goto pageFull; 495 496 /* 497 * end of log record descriptor 498 */ 499 500 /* update last log record eor */ 501 log->eor = dstoffset; 502 bp->l_eor = dstoffset; 503 lsn = (log->page << L2LOGPSIZE) + dstoffset; 504 505 if (lrd->type & cpu_to_le16(LOG_COMMIT)) { 506 tblk->clsn = lsn; 507 jfs_info("wr: tclsn:0x%x, beor:0x%x", tblk->clsn, 508 bp->l_eor); 509 510 INCREMENT(lmStat.commit); /* # of commit */ 511 512 /* 513 * enqueue tblock for group commit: 514 * 515 * enqueue tblock of non-trivial/synchronous COMMIT 516 * at tail of group commit queue 517 * (trivial/asynchronous COMMITs are ignored by 518 * group commit.) 519 */ 520 LOGGC_LOCK(log); 521 522 /* init tblock gc state */ 523 tblk->flag = tblkGC_QUEUE; 524 tblk->bp = log->bp; 525 tblk->pn = log->page; 526 tblk->eor = log->eor; 527 528 /* enqueue transaction to commit queue */ 529 list_add_tail(&tblk->cqueue, &log->cqueue); 530 531 LOGGC_UNLOCK(log); 532 } 533 534 jfs_info("lmWriteRecord: lrd:0x%04x bp:0x%p pn:%d eor:0x%x", 535 le16_to_cpu(lrd->type), log->bp, log->page, dstoffset); 536 537 /* page not full ? */ 538 if (dstoffset < LOGPSIZE - LOGPTLRSIZE) 539 return lsn; 540 541 pageFull: 542 /* page become full: move on to next page */ 543 lmNextPage(log); 544 545 bp = (struct lbuf *) log->bp; 546 lp = (struct logpage *) bp->l_ldata; 547 dstoffset = LOGPHDRSIZE; 548 src += nbytes; 549 } 550 551 return lsn; 552 } 553 554 555 /* 556 * NAME: lmNextPage() 557 * 558 * FUNCTION: write current page and allocate next page. 559 * 560 * PARAMETER: log 561 * 562 * RETURN: 0 563 * 564 * serialization: LOG_LOCK() held on entry/exit 565 */ 566 static int lmNextPage(struct jfs_log * log) 567 { 568 struct logpage *lp; 569 int lspn; /* log sequence page number */ 570 int pn; /* current page number */ 571 struct lbuf *bp; 572 struct lbuf *nextbp; 573 struct tblock *tblk; 574 575 /* get current log page number and log sequence page number */ 576 pn = log->page; 577 bp = log->bp; 578 lp = (struct logpage *) bp->l_ldata; 579 lspn = le32_to_cpu(lp->h.page); 580 581 LOGGC_LOCK(log); 582 583 /* 584 * write or queue the full page at the tail of write queue 585 */ 586 /* get the tail tblk on commit queue */ 587 if (list_empty(&log->cqueue)) 588 tblk = NULL; 589 else 590 tblk = list_entry(log->cqueue.prev, struct tblock, cqueue); 591 592 /* every tblk who has COMMIT record on the current page, 593 * and has not been committed, must be on commit queue 594 * since tblk is queued at commit queueu at the time 595 * of writing its COMMIT record on the page before 596 * page becomes full (even though the tblk thread 597 * who wrote COMMIT record may have been suspended 598 * currently); 599 */ 600 601 /* is page bound with outstanding tail tblk ? */ 602 if (tblk && tblk->pn == pn) { 603 /* mark tblk for end-of-page */ 604 tblk->flag |= tblkGC_EOP; 605 606 if (log->cflag & logGC_PAGEOUT) { 607 /* if page is not already on write queue, 608 * just enqueue (no lbmWRITE to prevent redrive) 609 * buffer to wqueue to ensure correct serial order 610 * of the pages since log pages will be added 611 * continuously 612 */ 613 if (bp->l_wqnext == NULL) 614 lbmWrite(log, bp, 0, 0); 615 } else { 616 /* 617 * No current GC leader, initiate group commit 618 */ 619 log->cflag |= logGC_PAGEOUT; 620 lmGCwrite(log, 0); 621 } 622 } 623 /* page is not bound with outstanding tblk: 624 * init write or mark it to be redriven (lbmWRITE) 625 */ 626 else { 627 /* finalize the page */ 628 bp->l_ceor = bp->l_eor; 629 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 630 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 0); 631 } 632 LOGGC_UNLOCK(log); 633 634 /* 635 * allocate/initialize next page 636 */ 637 /* if log wraps, the first data page of log is 2 638 * (0 never used, 1 is superblock). 639 */ 640 log->page = (pn == log->size - 1) ? 2 : pn + 1; 641 log->eor = LOGPHDRSIZE; /* ? valid page empty/full at logRedo() */ 642 643 /* allocate/initialize next log page buffer */ 644 nextbp = lbmAllocate(log, log->page); 645 nextbp->l_eor = log->eor; 646 log->bp = nextbp; 647 648 /* initialize next log page */ 649 lp = (struct logpage *) nextbp->l_ldata; 650 lp->h.page = lp->t.page = cpu_to_le32(lspn + 1); 651 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); 652 653 return 0; 654 } 655 656 657 /* 658 * NAME: lmGroupCommit() 659 * 660 * FUNCTION: group commit 661 * initiate pageout of the pages with COMMIT in the order of 662 * page number - redrive pageout of the page at the head of 663 * pageout queue until full page has been written. 664 * 665 * RETURN: 666 * 667 * NOTE: 668 * LOGGC_LOCK serializes log group commit queue, and 669 * transaction blocks on the commit queue. 670 * N.B. LOG_LOCK is NOT held during lmGroupCommit(). 671 */ 672 int lmGroupCommit(struct jfs_log * log, struct tblock * tblk) 673 { 674 int rc = 0; 675 676 LOGGC_LOCK(log); 677 678 /* group committed already ? */ 679 if (tblk->flag & tblkGC_COMMITTED) { 680 if (tblk->flag & tblkGC_ERROR) 681 rc = -EIO; 682 683 LOGGC_UNLOCK(log); 684 return rc; 685 } 686 jfs_info("lmGroup Commit: tblk = 0x%p, gcrtc = %d", tblk, log->gcrtc); 687 688 if (tblk->xflag & COMMIT_LAZY) 689 tblk->flag |= tblkGC_LAZY; 690 691 if ((!(log->cflag & logGC_PAGEOUT)) && (!list_empty(&log->cqueue)) && 692 (!(tblk->xflag & COMMIT_LAZY) || test_bit(log_FLUSH, &log->flag) 693 || jfs_tlocks_low)) { 694 /* 695 * No pageout in progress 696 * 697 * start group commit as its group leader. 698 */ 699 log->cflag |= logGC_PAGEOUT; 700 701 lmGCwrite(log, 0); 702 } 703 704 if (tblk->xflag & COMMIT_LAZY) { 705 /* 706 * Lazy transactions can leave now 707 */ 708 LOGGC_UNLOCK(log); 709 return 0; 710 } 711 712 /* lmGCwrite gives up LOGGC_LOCK, check again */ 713 714 if (tblk->flag & tblkGC_COMMITTED) { 715 if (tblk->flag & tblkGC_ERROR) 716 rc = -EIO; 717 718 LOGGC_UNLOCK(log); 719 return rc; 720 } 721 722 /* upcount transaction waiting for completion 723 */ 724 log->gcrtc++; 725 tblk->flag |= tblkGC_READY; 726 727 __SLEEP_COND(tblk->gcwait, (tblk->flag & tblkGC_COMMITTED), 728 LOGGC_LOCK(log), LOGGC_UNLOCK(log)); 729 730 /* removed from commit queue */ 731 if (tblk->flag & tblkGC_ERROR) 732 rc = -EIO; 733 734 LOGGC_UNLOCK(log); 735 return rc; 736 } 737 738 /* 739 * NAME: lmGCwrite() 740 * 741 * FUNCTION: group commit write 742 * initiate write of log page, building a group of all transactions 743 * with commit records on that page. 744 * 745 * RETURN: None 746 * 747 * NOTE: 748 * LOGGC_LOCK must be held by caller. 749 * N.B. LOG_LOCK is NOT held during lmGroupCommit(). 750 */ 751 static void lmGCwrite(struct jfs_log * log, int cant_write) 752 { 753 struct lbuf *bp; 754 struct logpage *lp; 755 int gcpn; /* group commit page number */ 756 struct tblock *tblk; 757 struct tblock *xtblk = NULL; 758 759 /* 760 * build the commit group of a log page 761 * 762 * scan commit queue and make a commit group of all 763 * transactions with COMMIT records on the same log page. 764 */ 765 /* get the head tblk on the commit queue */ 766 gcpn = list_entry(log->cqueue.next, struct tblock, cqueue)->pn; 767 768 list_for_each_entry(tblk, &log->cqueue, cqueue) { 769 if (tblk->pn != gcpn) 770 break; 771 772 xtblk = tblk; 773 774 /* state transition: (QUEUE, READY) -> COMMIT */ 775 tblk->flag |= tblkGC_COMMIT; 776 } 777 tblk = xtblk; /* last tblk of the page */ 778 779 /* 780 * pageout to commit transactions on the log page. 781 */ 782 bp = (struct lbuf *) tblk->bp; 783 lp = (struct logpage *) bp->l_ldata; 784 /* is page already full ? */ 785 if (tblk->flag & tblkGC_EOP) { 786 /* mark page to free at end of group commit of the page */ 787 tblk->flag &= ~tblkGC_EOP; 788 tblk->flag |= tblkGC_FREE; 789 bp->l_ceor = bp->l_eor; 790 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 791 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmGC, 792 cant_write); 793 INCREMENT(lmStat.full_page); 794 } 795 /* page is not yet full */ 796 else { 797 bp->l_ceor = tblk->eor; /* ? bp->l_ceor = bp->l_eor; */ 798 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_ceor); 799 lbmWrite(log, bp, lbmWRITE | lbmGC, cant_write); 800 INCREMENT(lmStat.partial_page); 801 } 802 } 803 804 /* 805 * NAME: lmPostGC() 806 * 807 * FUNCTION: group commit post-processing 808 * Processes transactions after their commit records have been written 809 * to disk, redriving log I/O if necessary. 810 * 811 * RETURN: None 812 * 813 * NOTE: 814 * This routine is called a interrupt time by lbmIODone 815 */ 816 static void lmPostGC(struct lbuf * bp) 817 { 818 unsigned long flags; 819 struct jfs_log *log = bp->l_log; 820 struct logpage *lp; 821 struct tblock *tblk, *temp; 822 823 //LOGGC_LOCK(log); 824 spin_lock_irqsave(&log->gclock, flags); 825 /* 826 * current pageout of group commit completed. 827 * 828 * remove/wakeup transactions from commit queue who were 829 * group committed with the current log page 830 */ 831 list_for_each_entry_safe(tblk, temp, &log->cqueue, cqueue) { 832 if (!(tblk->flag & tblkGC_COMMIT)) 833 break; 834 /* if transaction was marked GC_COMMIT then 835 * it has been shipped in the current pageout 836 * and made it to disk - it is committed. 837 */ 838 839 if (bp->l_flag & lbmERROR) 840 tblk->flag |= tblkGC_ERROR; 841 842 /* remove it from the commit queue */ 843 list_del(&tblk->cqueue); 844 tblk->flag &= ~tblkGC_QUEUE; 845 846 if (tblk == log->flush_tblk) { 847 /* we can stop flushing the log now */ 848 clear_bit(log_FLUSH, &log->flag); 849 log->flush_tblk = NULL; 850 } 851 852 jfs_info("lmPostGC: tblk = 0x%p, flag = 0x%x", tblk, 853 tblk->flag); 854 855 if (!(tblk->xflag & COMMIT_FORCE)) 856 /* 857 * Hand tblk over to lazy commit thread 858 */ 859 txLazyUnlock(tblk); 860 else { 861 /* state transition: COMMIT -> COMMITTED */ 862 tblk->flag |= tblkGC_COMMITTED; 863 864 if (tblk->flag & tblkGC_READY) 865 log->gcrtc--; 866 867 LOGGC_WAKEUP(tblk); 868 } 869 870 /* was page full before pageout ? 871 * (and this is the last tblk bound with the page) 872 */ 873 if (tblk->flag & tblkGC_FREE) 874 lbmFree(bp); 875 /* did page become full after pageout ? 876 * (and this is the last tblk bound with the page) 877 */ 878 else if (tblk->flag & tblkGC_EOP) { 879 /* finalize the page */ 880 lp = (struct logpage *) bp->l_ldata; 881 bp->l_ceor = bp->l_eor; 882 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 883 jfs_info("lmPostGC: calling lbmWrite"); 884 lbmWrite(log, bp, lbmWRITE | lbmRELEASE | lbmFREE, 885 1); 886 } 887 888 } 889 890 /* are there any transactions who have entered lnGroupCommit() 891 * (whose COMMITs are after that of the last log page written. 892 * They are waiting for new group commit (above at (SLEEP 1)) 893 * or lazy transactions are on a full (queued) log page, 894 * select the latest ready transaction as new group leader and 895 * wake her up to lead her group. 896 */ 897 if ((!list_empty(&log->cqueue)) && 898 ((log->gcrtc > 0) || (tblk->bp->l_wqnext != NULL) || 899 test_bit(log_FLUSH, &log->flag) || jfs_tlocks_low)) 900 /* 901 * Call lmGCwrite with new group leader 902 */ 903 lmGCwrite(log, 1); 904 905 /* no transaction are ready yet (transactions are only just 906 * queued (GC_QUEUE) and not entered for group commit yet). 907 * the first transaction entering group commit 908 * will elect herself as new group leader. 909 */ 910 else 911 log->cflag &= ~logGC_PAGEOUT; 912 913 //LOGGC_UNLOCK(log); 914 spin_unlock_irqrestore(&log->gclock, flags); 915 return; 916 } 917 918 /* 919 * NAME: lmLogSync() 920 * 921 * FUNCTION: write log SYNCPT record for specified log 922 * if new sync address is available 923 * (normally the case if sync() is executed by back-ground 924 * process). 925 * if not, explicitly run jfs_blogsync() to initiate 926 * getting of new sync address. 927 * calculate new value of i_nextsync which determines when 928 * this code is called again. 929 * 930 * this is called only from lmLog(). 931 * 932 * PARAMETER: ip - pointer to logs inode. 933 * 934 * RETURN: 0 935 * 936 * serialization: LOG_LOCK() held on entry/exit 937 */ 938 static int lmLogSync(struct jfs_log * log, int nosyncwait) 939 { 940 int logsize; 941 int written; /* written since last syncpt */ 942 int free; /* free space left available */ 943 int delta; /* additional delta to write normally */ 944 int more; /* additional write granted */ 945 struct lrd lrd; 946 int lsn; 947 struct logsyncblk *lp; 948 949 /* 950 * forward syncpt 951 */ 952 /* if last sync is same as last syncpt, 953 * invoke sync point forward processing to update sync. 954 */ 955 956 if (log->sync == log->syncpt) { 957 LOGSYNC_LOCK(log); 958 /* ToDo: push dirty metapages out to disk */ 959 // bmLogSync(log); 960 961 if (list_empty(&log->synclist)) 962 log->sync = log->lsn; 963 else { 964 lp = list_entry(log->synclist.next, 965 struct logsyncblk, synclist); 966 log->sync = lp->lsn; 967 } 968 LOGSYNC_UNLOCK(log); 969 970 } 971 972 /* if sync is different from last syncpt, 973 * write a SYNCPT record with syncpt = sync. 974 * reset syncpt = sync 975 */ 976 if (log->sync != log->syncpt) { 977 struct jfs_sb_info *sbi; 978 979 /* 980 * We need to make sure all of the "written" metapages 981 * actually make it to disk 982 */ 983 list_for_each_entry(sbi, &log->sb_list, log_list) { 984 if (sbi->flag & JFS_NOINTEGRITY) 985 continue; 986 filemap_fdatawrite(sbi->ipbmap->i_mapping); 987 filemap_fdatawrite(sbi->ipimap->i_mapping); 988 filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping); 989 } 990 list_for_each_entry(sbi, &log->sb_list, log_list) { 991 if (sbi->flag & JFS_NOINTEGRITY) 992 continue; 993 filemap_fdatawait(sbi->ipbmap->i_mapping); 994 filemap_fdatawait(sbi->ipimap->i_mapping); 995 filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping); 996 } 997 998 lrd.logtid = 0; 999 lrd.backchain = 0; 1000 lrd.type = cpu_to_le16(LOG_SYNCPT); 1001 lrd.length = 0; 1002 lrd.log.syncpt.sync = cpu_to_le32(log->sync); 1003 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1004 1005 log->syncpt = log->sync; 1006 } else 1007 lsn = log->lsn; 1008 1009 /* 1010 * setup next syncpt trigger (SWAG) 1011 */ 1012 logsize = log->logsize; 1013 1014 logdiff(written, lsn, log); 1015 free = logsize - written; 1016 delta = LOGSYNC_DELTA(logsize); 1017 more = min(free / 2, delta); 1018 if (more < 2 * LOGPSIZE) { 1019 jfs_warn("\n ... Log Wrap ... Log Wrap ... Log Wrap ...\n"); 1020 /* 1021 * log wrapping 1022 * 1023 * option 1 - panic ? No.! 1024 * option 2 - shutdown file systems 1025 * associated with log ? 1026 * option 3 - extend log ? 1027 */ 1028 /* 1029 * option 4 - second chance 1030 * 1031 * mark log wrapped, and continue. 1032 * when all active transactions are completed, 1033 * mark log vaild for recovery. 1034 * if crashed during invalid state, log state 1035 * implies invald log, forcing fsck(). 1036 */ 1037 /* mark log state log wrap in log superblock */ 1038 /* log->state = LOGWRAP; */ 1039 1040 /* reset sync point computation */ 1041 log->syncpt = log->sync = lsn; 1042 log->nextsync = delta; 1043 } else 1044 /* next syncpt trigger = written + more */ 1045 log->nextsync = written + more; 1046 1047 /* return if lmLogSync() from outside of transaction, e.g., sync() */ 1048 if (nosyncwait) 1049 return lsn; 1050 1051 /* if number of bytes written from last sync point is more 1052 * than 1/4 of the log size, stop new transactions from 1053 * starting until all current transactions are completed 1054 * by setting syncbarrier flag. 1055 */ 1056 if (written > LOGSYNC_BARRIER(logsize) && logsize > 32 * LOGPSIZE) { 1057 set_bit(log_SYNCBARRIER, &log->flag); 1058 jfs_info("log barrier on: lsn=0x%x syncpt=0x%x", lsn, 1059 log->syncpt); 1060 /* 1061 * We may have to initiate group commit 1062 */ 1063 jfs_flush_journal(log, 0); 1064 } 1065 1066 return lsn; 1067 } 1068 1069 1070 /* 1071 * NAME: lmLogOpen() 1072 * 1073 * FUNCTION: open the log on first open; 1074 * insert filesystem in the active list of the log. 1075 * 1076 * PARAMETER: ipmnt - file system mount inode 1077 * iplog - log inode (out) 1078 * 1079 * RETURN: 1080 * 1081 * serialization: 1082 */ 1083 int lmLogOpen(struct super_block *sb) 1084 { 1085 int rc; 1086 struct block_device *bdev; 1087 struct jfs_log *log; 1088 struct jfs_sb_info *sbi = JFS_SBI(sb); 1089 1090 if (sbi->flag & JFS_NOINTEGRITY) 1091 return open_dummy_log(sb); 1092 1093 if (sbi->mntflag & JFS_INLINELOG) 1094 return open_inline_log(sb); 1095 1096 down(&jfs_log_sem); 1097 list_for_each_entry(log, &jfs_external_logs, journal_list) { 1098 if (log->bdev->bd_dev == sbi->logdev) { 1099 if (memcmp(log->uuid, sbi->loguuid, 1100 sizeof(log->uuid))) { 1101 jfs_warn("wrong uuid on JFS journal\n"); 1102 up(&jfs_log_sem); 1103 return -EINVAL; 1104 } 1105 /* 1106 * add file system to log active file system list 1107 */ 1108 if ((rc = lmLogFileSystem(log, sbi, 1))) { 1109 up(&jfs_log_sem); 1110 return rc; 1111 } 1112 goto journal_found; 1113 } 1114 } 1115 1116 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) { 1117 up(&jfs_log_sem); 1118 return -ENOMEM; 1119 } 1120 memset(log, 0, sizeof(struct jfs_log)); 1121 INIT_LIST_HEAD(&log->sb_list); 1122 init_waitqueue_head(&log->syncwait); 1123 1124 /* 1125 * external log as separate logical volume 1126 * 1127 * file systems to log may have n-to-1 relationship; 1128 */ 1129 1130 bdev = open_by_devnum(sbi->logdev, FMODE_READ|FMODE_WRITE); 1131 if (IS_ERR(bdev)) { 1132 rc = -PTR_ERR(bdev); 1133 goto free; 1134 } 1135 1136 if ((rc = bd_claim(bdev, log))) { 1137 goto close; 1138 } 1139 1140 log->bdev = bdev; 1141 memcpy(log->uuid, sbi->loguuid, sizeof(log->uuid)); 1142 1143 /* 1144 * initialize log: 1145 */ 1146 if ((rc = lmLogInit(log))) 1147 goto unclaim; 1148 1149 list_add(&log->journal_list, &jfs_external_logs); 1150 1151 /* 1152 * add file system to log active file system list 1153 */ 1154 if ((rc = lmLogFileSystem(log, sbi, 1))) 1155 goto shutdown; 1156 1157 journal_found: 1158 LOG_LOCK(log); 1159 list_add(&sbi->log_list, &log->sb_list); 1160 sbi->log = log; 1161 LOG_UNLOCK(log); 1162 1163 up(&jfs_log_sem); 1164 return 0; 1165 1166 /* 1167 * unwind on error 1168 */ 1169 shutdown: /* unwind lbmLogInit() */ 1170 list_del(&log->journal_list); 1171 lbmLogShutdown(log); 1172 1173 unclaim: 1174 bd_release(bdev); 1175 1176 close: /* close external log device */ 1177 blkdev_put(bdev); 1178 1179 free: /* free log descriptor */ 1180 up(&jfs_log_sem); 1181 kfree(log); 1182 1183 jfs_warn("lmLogOpen: exit(%d)", rc); 1184 return rc; 1185 } 1186 1187 static int open_inline_log(struct super_block *sb) 1188 { 1189 struct jfs_log *log; 1190 int rc; 1191 1192 if (!(log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL))) 1193 return -ENOMEM; 1194 memset(log, 0, sizeof(struct jfs_log)); 1195 INIT_LIST_HEAD(&log->sb_list); 1196 init_waitqueue_head(&log->syncwait); 1197 1198 set_bit(log_INLINELOG, &log->flag); 1199 log->bdev = sb->s_bdev; 1200 log->base = addressPXD(&JFS_SBI(sb)->logpxd); 1201 log->size = lengthPXD(&JFS_SBI(sb)->logpxd) >> 1202 (L2LOGPSIZE - sb->s_blocksize_bits); 1203 log->l2bsize = sb->s_blocksize_bits; 1204 ASSERT(L2LOGPSIZE >= sb->s_blocksize_bits); 1205 1206 /* 1207 * initialize log. 1208 */ 1209 if ((rc = lmLogInit(log))) { 1210 kfree(log); 1211 jfs_warn("lmLogOpen: exit(%d)", rc); 1212 return rc; 1213 } 1214 1215 list_add(&JFS_SBI(sb)->log_list, &log->sb_list); 1216 JFS_SBI(sb)->log = log; 1217 1218 return rc; 1219 } 1220 1221 static int open_dummy_log(struct super_block *sb) 1222 { 1223 int rc; 1224 1225 down(&jfs_log_sem); 1226 if (!dummy_log) { 1227 dummy_log = kmalloc(sizeof(struct jfs_log), GFP_KERNEL); 1228 if (!dummy_log) { 1229 up(&jfs_log_sem); 1230 return -ENOMEM; 1231 } 1232 memset(dummy_log, 0, sizeof(struct jfs_log)); 1233 INIT_LIST_HEAD(&dummy_log->sb_list); 1234 init_waitqueue_head(&dummy_log->syncwait); 1235 dummy_log->no_integrity = 1; 1236 /* Make up some stuff */ 1237 dummy_log->base = 0; 1238 dummy_log->size = 1024; 1239 rc = lmLogInit(dummy_log); 1240 if (rc) { 1241 kfree(dummy_log); 1242 dummy_log = NULL; 1243 up(&jfs_log_sem); 1244 return rc; 1245 } 1246 } 1247 1248 LOG_LOCK(dummy_log); 1249 list_add(&JFS_SBI(sb)->log_list, &dummy_log->sb_list); 1250 JFS_SBI(sb)->log = dummy_log; 1251 LOG_UNLOCK(dummy_log); 1252 up(&jfs_log_sem); 1253 1254 return 0; 1255 } 1256 1257 /* 1258 * NAME: lmLogInit() 1259 * 1260 * FUNCTION: log initialization at first log open. 1261 * 1262 * logredo() (or logformat()) should have been run previously. 1263 * initialize the log from log superblock. 1264 * set the log state in the superblock to LOGMOUNT and 1265 * write SYNCPT log record. 1266 * 1267 * PARAMETER: log - log structure 1268 * 1269 * RETURN: 0 - if ok 1270 * -EINVAL - bad log magic number or superblock dirty 1271 * error returned from logwait() 1272 * 1273 * serialization: single first open thread 1274 */ 1275 int lmLogInit(struct jfs_log * log) 1276 { 1277 int rc = 0; 1278 struct lrd lrd; 1279 struct logsuper *logsuper; 1280 struct lbuf *bpsuper; 1281 struct lbuf *bp; 1282 struct logpage *lp; 1283 int lsn = 0; 1284 1285 jfs_info("lmLogInit: log:0x%p", log); 1286 1287 /* initialize the group commit serialization lock */ 1288 LOGGC_LOCK_INIT(log); 1289 1290 /* allocate/initialize the log write serialization lock */ 1291 LOG_LOCK_INIT(log); 1292 1293 LOGSYNC_LOCK_INIT(log); 1294 1295 INIT_LIST_HEAD(&log->synclist); 1296 1297 INIT_LIST_HEAD(&log->cqueue); 1298 log->flush_tblk = NULL; 1299 1300 log->count = 0; 1301 1302 /* 1303 * initialize log i/o 1304 */ 1305 if ((rc = lbmLogInit(log))) 1306 return rc; 1307 1308 if (!test_bit(log_INLINELOG, &log->flag)) 1309 log->l2bsize = L2LOGPSIZE; 1310 1311 /* check for disabled journaling to disk */ 1312 if (log->no_integrity) { 1313 /* 1314 * Journal pages will still be filled. When the time comes 1315 * to actually do the I/O, the write is not done, and the 1316 * endio routine is called directly. 1317 */ 1318 bp = lbmAllocate(log , 0); 1319 log->bp = bp; 1320 bp->l_pn = bp->l_eor = 0; 1321 } else { 1322 /* 1323 * validate log superblock 1324 */ 1325 if ((rc = lbmRead(log, 1, &bpsuper))) 1326 goto errout10; 1327 1328 logsuper = (struct logsuper *) bpsuper->l_ldata; 1329 1330 if (logsuper->magic != cpu_to_le32(LOGMAGIC)) { 1331 jfs_warn("*** Log Format Error ! ***"); 1332 rc = -EINVAL; 1333 goto errout20; 1334 } 1335 1336 /* logredo() should have been run successfully. */ 1337 if (logsuper->state != cpu_to_le32(LOGREDONE)) { 1338 jfs_warn("*** Log Is Dirty ! ***"); 1339 rc = -EINVAL; 1340 goto errout20; 1341 } 1342 1343 /* initialize log from log superblock */ 1344 if (test_bit(log_INLINELOG,&log->flag)) { 1345 if (log->size != le32_to_cpu(logsuper->size)) { 1346 rc = -EINVAL; 1347 goto errout20; 1348 } 1349 jfs_info("lmLogInit: inline log:0x%p base:0x%Lx " 1350 "size:0x%x", log, 1351 (unsigned long long) log->base, log->size); 1352 } else { 1353 if (memcmp(logsuper->uuid, log->uuid, 16)) { 1354 jfs_warn("wrong uuid on JFS log device"); 1355 goto errout20; 1356 } 1357 log->size = le32_to_cpu(logsuper->size); 1358 log->l2bsize = le32_to_cpu(logsuper->l2bsize); 1359 jfs_info("lmLogInit: external log:0x%p base:0x%Lx " 1360 "size:0x%x", log, 1361 (unsigned long long) log->base, log->size); 1362 } 1363 1364 log->page = le32_to_cpu(logsuper->end) / LOGPSIZE; 1365 log->eor = le32_to_cpu(logsuper->end) - (LOGPSIZE * log->page); 1366 1367 /* 1368 * initialize for log append write mode 1369 */ 1370 /* establish current/end-of-log page/buffer */ 1371 if ((rc = lbmRead(log, log->page, &bp))) 1372 goto errout20; 1373 1374 lp = (struct logpage *) bp->l_ldata; 1375 1376 jfs_info("lmLogInit: lsn:0x%x page:%d eor:%d:%d", 1377 le32_to_cpu(logsuper->end), log->page, log->eor, 1378 le16_to_cpu(lp->h.eor)); 1379 1380 log->bp = bp; 1381 bp->l_pn = log->page; 1382 bp->l_eor = log->eor; 1383 1384 /* if current page is full, move on to next page */ 1385 if (log->eor >= LOGPSIZE - LOGPTLRSIZE) 1386 lmNextPage(log); 1387 1388 /* 1389 * initialize log syncpoint 1390 */ 1391 /* 1392 * write the first SYNCPT record with syncpoint = 0 1393 * (i.e., log redo up to HERE !); 1394 * remove current page from lbm write queue at end of pageout 1395 * (to write log superblock update), but do not release to 1396 * freelist; 1397 */ 1398 lrd.logtid = 0; 1399 lrd.backchain = 0; 1400 lrd.type = cpu_to_le16(LOG_SYNCPT); 1401 lrd.length = 0; 1402 lrd.log.syncpt.sync = 0; 1403 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1404 bp = log->bp; 1405 bp->l_ceor = bp->l_eor; 1406 lp = (struct logpage *) bp->l_ldata; 1407 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1408 lbmWrite(log, bp, lbmWRITE | lbmSYNC, 0); 1409 if ((rc = lbmIOWait(bp, 0))) 1410 goto errout30; 1411 1412 /* 1413 * update/write superblock 1414 */ 1415 logsuper->state = cpu_to_le32(LOGMOUNT); 1416 log->serial = le32_to_cpu(logsuper->serial) + 1; 1417 logsuper->serial = cpu_to_le32(log->serial); 1418 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1419 if ((rc = lbmIOWait(bpsuper, lbmFREE))) 1420 goto errout30; 1421 } 1422 1423 /* initialize logsync parameters */ 1424 log->logsize = (log->size - 2) << L2LOGPSIZE; 1425 log->lsn = lsn; 1426 log->syncpt = lsn; 1427 log->sync = log->syncpt; 1428 log->nextsync = LOGSYNC_DELTA(log->logsize); 1429 1430 jfs_info("lmLogInit: lsn:0x%x syncpt:0x%x sync:0x%x", 1431 log->lsn, log->syncpt, log->sync); 1432 1433 /* 1434 * initialize for lazy/group commit 1435 */ 1436 log->clsn = lsn; 1437 1438 return 0; 1439 1440 /* 1441 * unwind on error 1442 */ 1443 errout30: /* release log page */ 1444 log->wqueue = NULL; 1445 bp->l_wqnext = NULL; 1446 lbmFree(bp); 1447 1448 errout20: /* release log superblock */ 1449 lbmFree(bpsuper); 1450 1451 errout10: /* unwind lbmLogInit() */ 1452 lbmLogShutdown(log); 1453 1454 jfs_warn("lmLogInit: exit(%d)", rc); 1455 return rc; 1456 } 1457 1458 1459 /* 1460 * NAME: lmLogClose() 1461 * 1462 * FUNCTION: remove file system <ipmnt> from active list of log <iplog> 1463 * and close it on last close. 1464 * 1465 * PARAMETER: sb - superblock 1466 * 1467 * RETURN: errors from subroutines 1468 * 1469 * serialization: 1470 */ 1471 int lmLogClose(struct super_block *sb) 1472 { 1473 struct jfs_sb_info *sbi = JFS_SBI(sb); 1474 struct jfs_log *log = sbi->log; 1475 struct block_device *bdev; 1476 int rc = 0; 1477 1478 jfs_info("lmLogClose: log:0x%p", log); 1479 1480 down(&jfs_log_sem); 1481 LOG_LOCK(log); 1482 list_del(&sbi->log_list); 1483 LOG_UNLOCK(log); 1484 sbi->log = NULL; 1485 1486 /* 1487 * We need to make sure all of the "written" metapages 1488 * actually make it to disk 1489 */ 1490 sync_blockdev(sb->s_bdev); 1491 1492 if (test_bit(log_INLINELOG, &log->flag)) { 1493 /* 1494 * in-line log in host file system 1495 */ 1496 rc = lmLogShutdown(log); 1497 kfree(log); 1498 goto out; 1499 } 1500 1501 if (!log->no_integrity) 1502 lmLogFileSystem(log, sbi, 0); 1503 1504 if (!list_empty(&log->sb_list)) 1505 goto out; 1506 1507 /* 1508 * TODO: ensure that the dummy_log is in a state to allow 1509 * lbmLogShutdown to deallocate all the buffers and call 1510 * kfree against dummy_log. For now, leave dummy_log & its 1511 * buffers in memory, and resuse if another no-integrity mount 1512 * is requested. 1513 */ 1514 if (log->no_integrity) 1515 goto out; 1516 1517 /* 1518 * external log as separate logical volume 1519 */ 1520 list_del(&log->journal_list); 1521 bdev = log->bdev; 1522 rc = lmLogShutdown(log); 1523 1524 bd_release(bdev); 1525 blkdev_put(bdev); 1526 1527 kfree(log); 1528 1529 out: 1530 up(&jfs_log_sem); 1531 jfs_info("lmLogClose: exit(%d)", rc); 1532 return rc; 1533 } 1534 1535 1536 /* 1537 * NAME: jfs_flush_journal() 1538 * 1539 * FUNCTION: initiate write of any outstanding transactions to the journal 1540 * and optionally wait until they are all written to disk 1541 * 1542 * wait == 0 flush until latest txn is committed, don't wait 1543 * wait == 1 flush until latest txn is committed, wait 1544 * wait > 1 flush until all txn's are complete, wait 1545 */ 1546 void jfs_flush_journal(struct jfs_log *log, int wait) 1547 { 1548 int i; 1549 struct tblock *target = NULL; 1550 1551 /* jfs_write_inode may call us during read-only mount */ 1552 if (!log) 1553 return; 1554 1555 jfs_info("jfs_flush_journal: log:0x%p wait=%d", log, wait); 1556 1557 LOGGC_LOCK(log); 1558 1559 if (!list_empty(&log->cqueue)) { 1560 /* 1561 * This ensures that we will keep writing to the journal as long 1562 * as there are unwritten commit records 1563 */ 1564 target = list_entry(log->cqueue.prev, struct tblock, cqueue); 1565 1566 if (test_bit(log_FLUSH, &log->flag)) { 1567 /* 1568 * We're already flushing. 1569 * if flush_tblk is NULL, we are flushing everything, 1570 * so leave it that way. Otherwise, update it to the 1571 * latest transaction 1572 */ 1573 if (log->flush_tblk) 1574 log->flush_tblk = target; 1575 } else { 1576 /* Only flush until latest transaction is committed */ 1577 log->flush_tblk = target; 1578 set_bit(log_FLUSH, &log->flag); 1579 1580 /* 1581 * Initiate I/O on outstanding transactions 1582 */ 1583 if (!(log->cflag & logGC_PAGEOUT)) { 1584 log->cflag |= logGC_PAGEOUT; 1585 lmGCwrite(log, 0); 1586 } 1587 } 1588 } 1589 if ((wait > 1) || test_bit(log_SYNCBARRIER, &log->flag)) { 1590 /* Flush until all activity complete */ 1591 set_bit(log_FLUSH, &log->flag); 1592 log->flush_tblk = NULL; 1593 } 1594 1595 if (wait && target && !(target->flag & tblkGC_COMMITTED)) { 1596 DECLARE_WAITQUEUE(__wait, current); 1597 1598 add_wait_queue(&target->gcwait, &__wait); 1599 set_current_state(TASK_UNINTERRUPTIBLE); 1600 LOGGC_UNLOCK(log); 1601 schedule(); 1602 current->state = TASK_RUNNING; 1603 LOGGC_LOCK(log); 1604 remove_wait_queue(&target->gcwait, &__wait); 1605 } 1606 LOGGC_UNLOCK(log); 1607 1608 if (wait < 2) 1609 return; 1610 1611 /* 1612 * If there was recent activity, we may need to wait 1613 * for the lazycommit thread to catch up 1614 */ 1615 if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) { 1616 for (i = 0; i < 800; i++) { /* Too much? */ 1617 msleep(250); 1618 if (list_empty(&log->cqueue) && 1619 list_empty(&log->synclist)) 1620 break; 1621 } 1622 } 1623 assert(list_empty(&log->cqueue)); 1624 assert(list_empty(&log->synclist)); 1625 clear_bit(log_FLUSH, &log->flag); 1626 } 1627 1628 /* 1629 * NAME: lmLogShutdown() 1630 * 1631 * FUNCTION: log shutdown at last LogClose(). 1632 * 1633 * write log syncpt record. 1634 * update super block to set redone flag to 0. 1635 * 1636 * PARAMETER: log - log inode 1637 * 1638 * RETURN: 0 - success 1639 * 1640 * serialization: single last close thread 1641 */ 1642 int lmLogShutdown(struct jfs_log * log) 1643 { 1644 int rc; 1645 struct lrd lrd; 1646 int lsn; 1647 struct logsuper *logsuper; 1648 struct lbuf *bpsuper; 1649 struct lbuf *bp; 1650 struct logpage *lp; 1651 1652 jfs_info("lmLogShutdown: log:0x%p", log); 1653 1654 jfs_flush_journal(log, 2); 1655 1656 /* 1657 * write the last SYNCPT record with syncpoint = 0 1658 * (i.e., log redo up to HERE !) 1659 */ 1660 lrd.logtid = 0; 1661 lrd.backchain = 0; 1662 lrd.type = cpu_to_le16(LOG_SYNCPT); 1663 lrd.length = 0; 1664 lrd.log.syncpt.sync = 0; 1665 1666 lsn = lmWriteRecord(log, NULL, &lrd, NULL); 1667 bp = log->bp; 1668 lp = (struct logpage *) bp->l_ldata; 1669 lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor); 1670 lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0); 1671 lbmIOWait(log->bp, lbmFREE); 1672 1673 /* 1674 * synchronous update log superblock 1675 * mark log state as shutdown cleanly 1676 * (i.e., Log does not need to be replayed). 1677 */ 1678 if ((rc = lbmRead(log, 1, &bpsuper))) 1679 goto out; 1680 1681 logsuper = (struct logsuper *) bpsuper->l_ldata; 1682 logsuper->state = cpu_to_le32(LOGREDONE); 1683 logsuper->end = cpu_to_le32(lsn); 1684 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1685 rc = lbmIOWait(bpsuper, lbmFREE); 1686 1687 jfs_info("lmLogShutdown: lsn:0x%x page:%d eor:%d", 1688 lsn, log->page, log->eor); 1689 1690 out: 1691 /* 1692 * shutdown per log i/o 1693 */ 1694 lbmLogShutdown(log); 1695 1696 if (rc) { 1697 jfs_warn("lmLogShutdown: exit(%d)", rc); 1698 } 1699 return rc; 1700 } 1701 1702 1703 /* 1704 * NAME: lmLogFileSystem() 1705 * 1706 * FUNCTION: insert (<activate> = true)/remove (<activate> = false) 1707 * file system into/from log active file system list. 1708 * 1709 * PARAMETE: log - pointer to logs inode. 1710 * fsdev - kdev_t of filesystem. 1711 * serial - pointer to returned log serial number 1712 * activate - insert/remove device from active list. 1713 * 1714 * RETURN: 0 - success 1715 * errors returned by vms_iowait(). 1716 */ 1717 static int lmLogFileSystem(struct jfs_log * log, struct jfs_sb_info *sbi, 1718 int activate) 1719 { 1720 int rc = 0; 1721 int i; 1722 struct logsuper *logsuper; 1723 struct lbuf *bpsuper; 1724 char *uuid = sbi->uuid; 1725 1726 /* 1727 * insert/remove file system device to log active file system list. 1728 */ 1729 if ((rc = lbmRead(log, 1, &bpsuper))) 1730 return rc; 1731 1732 logsuper = (struct logsuper *) bpsuper->l_ldata; 1733 if (activate) { 1734 for (i = 0; i < MAX_ACTIVE; i++) 1735 if (!memcmp(logsuper->active[i].uuid, NULL_UUID, 16)) { 1736 memcpy(logsuper->active[i].uuid, uuid, 16); 1737 sbi->aggregate = i; 1738 break; 1739 } 1740 if (i == MAX_ACTIVE) { 1741 jfs_warn("Too many file systems sharing journal!"); 1742 lbmFree(bpsuper); 1743 return -EMFILE; /* Is there a better rc? */ 1744 } 1745 } else { 1746 for (i = 0; i < MAX_ACTIVE; i++) 1747 if (!memcmp(logsuper->active[i].uuid, uuid, 16)) { 1748 memcpy(logsuper->active[i].uuid, NULL_UUID, 16); 1749 break; 1750 } 1751 if (i == MAX_ACTIVE) { 1752 jfs_warn("Somebody stomped on the journal!"); 1753 lbmFree(bpsuper); 1754 return -EIO; 1755 } 1756 1757 } 1758 1759 /* 1760 * synchronous write log superblock: 1761 * 1762 * write sidestream bypassing write queue: 1763 * at file system mount, log super block is updated for 1764 * activation of the file system before any log record 1765 * (MOUNT record) of the file system, and at file system 1766 * unmount, all meta data for the file system has been 1767 * flushed before log super block is updated for deactivation 1768 * of the file system. 1769 */ 1770 lbmDirectWrite(log, bpsuper, lbmWRITE | lbmRELEASE | lbmSYNC); 1771 rc = lbmIOWait(bpsuper, lbmFREE); 1772 1773 return rc; 1774 } 1775 1776 /* 1777 * log buffer manager (lbm) 1778 * ------------------------ 1779 * 1780 * special purpose buffer manager supporting log i/o requirements. 1781 * 1782 * per log write queue: 1783 * log pageout occurs in serial order by fifo write queue and 1784 * restricting to a single i/o in pregress at any one time. 1785 * a circular singly-linked list 1786 * (log->wrqueue points to the tail, and buffers are linked via 1787 * bp->wrqueue field), and 1788 * maintains log page in pageout ot waiting for pageout in serial pageout. 1789 */ 1790 1791 /* 1792 * lbmLogInit() 1793 * 1794 * initialize per log I/O setup at lmLogInit() 1795 */ 1796 static int lbmLogInit(struct jfs_log * log) 1797 { /* log inode */ 1798 int i; 1799 struct lbuf *lbuf; 1800 1801 jfs_info("lbmLogInit: log:0x%p", log); 1802 1803 /* initialize current buffer cursor */ 1804 log->bp = NULL; 1805 1806 /* initialize log device write queue */ 1807 log->wqueue = NULL; 1808 1809 /* 1810 * Each log has its own buffer pages allocated to it. These are 1811 * not managed by the page cache. This ensures that a transaction 1812 * writing to the log does not block trying to allocate a page from 1813 * the page cache (for the log). This would be bad, since page 1814 * allocation waits on the kswapd thread that may be committing inodes 1815 * which would cause log activity. Was that clear? I'm trying to 1816 * avoid deadlock here. 1817 */ 1818 init_waitqueue_head(&log->free_wait); 1819 1820 log->lbuf_free = NULL; 1821 1822 for (i = 0; i < LOGPAGES; i++) { 1823 lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL); 1824 if (lbuf == 0) 1825 goto error; 1826 lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL); 1827 if (lbuf->l_ldata == 0) { 1828 kfree(lbuf); 1829 goto error; 1830 } 1831 lbuf->l_log = log; 1832 init_waitqueue_head(&lbuf->l_ioevent); 1833 1834 lbuf->l_freelist = log->lbuf_free; 1835 log->lbuf_free = lbuf; 1836 } 1837 1838 return (0); 1839 1840 error: 1841 lbmLogShutdown(log); 1842 return -ENOMEM; 1843 } 1844 1845 1846 /* 1847 * lbmLogShutdown() 1848 * 1849 * finalize per log I/O setup at lmLogShutdown() 1850 */ 1851 static void lbmLogShutdown(struct jfs_log * log) 1852 { 1853 struct lbuf *lbuf; 1854 1855 jfs_info("lbmLogShutdown: log:0x%p", log); 1856 1857 lbuf = log->lbuf_free; 1858 while (lbuf) { 1859 struct lbuf *next = lbuf->l_freelist; 1860 free_page((unsigned long) lbuf->l_ldata); 1861 kfree(lbuf); 1862 lbuf = next; 1863 } 1864 1865 log->bp = NULL; 1866 } 1867 1868 1869 /* 1870 * lbmAllocate() 1871 * 1872 * allocate an empty log buffer 1873 */ 1874 static struct lbuf *lbmAllocate(struct jfs_log * log, int pn) 1875 { 1876 struct lbuf *bp; 1877 unsigned long flags; 1878 1879 /* 1880 * recycle from log buffer freelist if any 1881 */ 1882 LCACHE_LOCK(flags); 1883 LCACHE_SLEEP_COND(log->free_wait, (bp = log->lbuf_free), flags); 1884 log->lbuf_free = bp->l_freelist; 1885 LCACHE_UNLOCK(flags); 1886 1887 bp->l_flag = 0; 1888 1889 bp->l_wqnext = NULL; 1890 bp->l_freelist = NULL; 1891 1892 bp->l_pn = pn; 1893 bp->l_blkno = log->base + (pn << (L2LOGPSIZE - log->l2bsize)); 1894 bp->l_ceor = 0; 1895 1896 return bp; 1897 } 1898 1899 1900 /* 1901 * lbmFree() 1902 * 1903 * release a log buffer to freelist 1904 */ 1905 static void lbmFree(struct lbuf * bp) 1906 { 1907 unsigned long flags; 1908 1909 LCACHE_LOCK(flags); 1910 1911 lbmfree(bp); 1912 1913 LCACHE_UNLOCK(flags); 1914 } 1915 1916 static void lbmfree(struct lbuf * bp) 1917 { 1918 struct jfs_log *log = bp->l_log; 1919 1920 assert(bp->l_wqnext == NULL); 1921 1922 /* 1923 * return the buffer to head of freelist 1924 */ 1925 bp->l_freelist = log->lbuf_free; 1926 log->lbuf_free = bp; 1927 1928 wake_up(&log->free_wait); 1929 return; 1930 } 1931 1932 1933 /* 1934 * NAME: lbmRedrive 1935 * 1936 * FUNCTION: add a log buffer to the the log redrive list 1937 * 1938 * PARAMETER: 1939 * bp - log buffer 1940 * 1941 * NOTES: 1942 * Takes log_redrive_lock. 1943 */ 1944 static inline void lbmRedrive(struct lbuf *bp) 1945 { 1946 unsigned long flags; 1947 1948 spin_lock_irqsave(&log_redrive_lock, flags); 1949 bp->l_redrive_next = log_redrive_list; 1950 log_redrive_list = bp; 1951 spin_unlock_irqrestore(&log_redrive_lock, flags); 1952 1953 wake_up(&jfs_IO_thread_wait); 1954 } 1955 1956 1957 /* 1958 * lbmRead() 1959 */ 1960 static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp) 1961 { 1962 struct bio *bio; 1963 struct lbuf *bp; 1964 1965 /* 1966 * allocate a log buffer 1967 */ 1968 *bpp = bp = lbmAllocate(log, pn); 1969 jfs_info("lbmRead: bp:0x%p pn:0x%x", bp, pn); 1970 1971 bp->l_flag |= lbmREAD; 1972 1973 bio = bio_alloc(GFP_NOFS, 1); 1974 1975 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 1976 bio->bi_bdev = log->bdev; 1977 bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); 1978 bio->bi_io_vec[0].bv_len = LOGPSIZE; 1979 bio->bi_io_vec[0].bv_offset = 0; 1980 1981 bio->bi_vcnt = 1; 1982 bio->bi_idx = 0; 1983 bio->bi_size = LOGPSIZE; 1984 1985 bio->bi_end_io = lbmIODone; 1986 bio->bi_private = bp; 1987 submit_bio(READ_SYNC, bio); 1988 1989 wait_event(bp->l_ioevent, (bp->l_flag != lbmREAD)); 1990 1991 return 0; 1992 } 1993 1994 1995 /* 1996 * lbmWrite() 1997 * 1998 * buffer at head of pageout queue stays after completion of 1999 * partial-page pageout and redriven by explicit initiation of 2000 * pageout by caller until full-page pageout is completed and 2001 * released. 2002 * 2003 * device driver i/o done redrives pageout of new buffer at 2004 * head of pageout queue when current buffer at head of pageout 2005 * queue is released at the completion of its full-page pageout. 2006 * 2007 * LOGGC_LOCK() serializes lbmWrite() by lmNextPage() and lmGroupCommit(). 2008 * LCACHE_LOCK() serializes xflag between lbmWrite() and lbmIODone() 2009 */ 2010 static void lbmWrite(struct jfs_log * log, struct lbuf * bp, int flag, 2011 int cant_block) 2012 { 2013 struct lbuf *tail; 2014 unsigned long flags; 2015 2016 jfs_info("lbmWrite: bp:0x%p flag:0x%x pn:0x%x", bp, flag, bp->l_pn); 2017 2018 /* map the logical block address to physical block address */ 2019 bp->l_blkno = 2020 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2021 2022 LCACHE_LOCK(flags); /* disable+lock */ 2023 2024 /* 2025 * initialize buffer for device driver 2026 */ 2027 bp->l_flag = flag; 2028 2029 /* 2030 * insert bp at tail of write queue associated with log 2031 * 2032 * (request is either for bp already/currently at head of queue 2033 * or new bp to be inserted at tail) 2034 */ 2035 tail = log->wqueue; 2036 2037 /* is buffer not already on write queue ? */ 2038 if (bp->l_wqnext == NULL) { 2039 /* insert at tail of wqueue */ 2040 if (tail == NULL) { 2041 log->wqueue = bp; 2042 bp->l_wqnext = bp; 2043 } else { 2044 log->wqueue = bp; 2045 bp->l_wqnext = tail->l_wqnext; 2046 tail->l_wqnext = bp; 2047 } 2048 2049 tail = bp; 2050 } 2051 2052 /* is buffer at head of wqueue and for write ? */ 2053 if ((bp != tail->l_wqnext) || !(flag & lbmWRITE)) { 2054 LCACHE_UNLOCK(flags); /* unlock+enable */ 2055 return; 2056 } 2057 2058 LCACHE_UNLOCK(flags); /* unlock+enable */ 2059 2060 if (cant_block) 2061 lbmRedrive(bp); 2062 else if (flag & lbmSYNC) 2063 lbmStartIO(bp); 2064 else { 2065 LOGGC_UNLOCK(log); 2066 lbmStartIO(bp); 2067 LOGGC_LOCK(log); 2068 } 2069 } 2070 2071 2072 /* 2073 * lbmDirectWrite() 2074 * 2075 * initiate pageout bypassing write queue for sidestream 2076 * (e.g., log superblock) write; 2077 */ 2078 static void lbmDirectWrite(struct jfs_log * log, struct lbuf * bp, int flag) 2079 { 2080 jfs_info("lbmDirectWrite: bp:0x%p flag:0x%x pn:0x%x", 2081 bp, flag, bp->l_pn); 2082 2083 /* 2084 * initialize buffer for device driver 2085 */ 2086 bp->l_flag = flag | lbmDIRECT; 2087 2088 /* map the logical block address to physical block address */ 2089 bp->l_blkno = 2090 log->base + (bp->l_pn << (L2LOGPSIZE - log->l2bsize)); 2091 2092 /* 2093 * initiate pageout of the page 2094 */ 2095 lbmStartIO(bp); 2096 } 2097 2098 2099 /* 2100 * NAME: lbmStartIO() 2101 * 2102 * FUNCTION: Interface to DD strategy routine 2103 * 2104 * RETURN: none 2105 * 2106 * serialization: LCACHE_LOCK() is NOT held during log i/o; 2107 */ 2108 static void lbmStartIO(struct lbuf * bp) 2109 { 2110 struct bio *bio; 2111 struct jfs_log *log = bp->l_log; 2112 2113 jfs_info("lbmStartIO\n"); 2114 2115 bio = bio_alloc(GFP_NOFS, 1); 2116 bio->bi_sector = bp->l_blkno << (log->l2bsize - 9); 2117 bio->bi_bdev = log->bdev; 2118 bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata); 2119 bio->bi_io_vec[0].bv_len = LOGPSIZE; 2120 bio->bi_io_vec[0].bv_offset = 0; 2121 2122 bio->bi_vcnt = 1; 2123 bio->bi_idx = 0; 2124 bio->bi_size = LOGPSIZE; 2125 2126 bio->bi_end_io = lbmIODone; 2127 bio->bi_private = bp; 2128 2129 /* check if journaling to disk has been disabled */ 2130 if (!log->no_integrity) { 2131 submit_bio(WRITE_SYNC, bio); 2132 INCREMENT(lmStat.submitted); 2133 } 2134 else { 2135 bio->bi_size = 0; 2136 lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0 2137 * 3rd argument appears to not be used => 0 2138 */ 2139 } 2140 } 2141 2142 2143 /* 2144 * lbmIOWait() 2145 */ 2146 static int lbmIOWait(struct lbuf * bp, int flag) 2147 { 2148 unsigned long flags; 2149 int rc = 0; 2150 2151 jfs_info("lbmIOWait1: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); 2152 2153 LCACHE_LOCK(flags); /* disable+lock */ 2154 2155 LCACHE_SLEEP_COND(bp->l_ioevent, (bp->l_flag & lbmDONE), flags); 2156 2157 rc = (bp->l_flag & lbmERROR) ? -EIO : 0; 2158 2159 if (flag & lbmFREE) 2160 lbmfree(bp); 2161 2162 LCACHE_UNLOCK(flags); /* unlock+enable */ 2163 2164 jfs_info("lbmIOWait2: bp:0x%p flag:0x%x:0x%x", bp, bp->l_flag, flag); 2165 return rc; 2166 } 2167 2168 /* 2169 * lbmIODone() 2170 * 2171 * executed at INTIODONE level 2172 */ 2173 static int lbmIODone(struct bio *bio, unsigned int bytes_done, int error) 2174 { 2175 struct lbuf *bp = bio->bi_private; 2176 struct lbuf *nextbp, *tail; 2177 struct jfs_log *log; 2178 unsigned long flags; 2179 2180 if (bio->bi_size) 2181 return 1; 2182 2183 /* 2184 * get back jfs buffer bound to the i/o buffer 2185 */ 2186 jfs_info("lbmIODone: bp:0x%p flag:0x%x", bp, bp->l_flag); 2187 2188 LCACHE_LOCK(flags); /* disable+lock */ 2189 2190 bp->l_flag |= lbmDONE; 2191 2192 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) { 2193 bp->l_flag |= lbmERROR; 2194 2195 jfs_err("lbmIODone: I/O error in JFS log"); 2196 } 2197 2198 bio_put(bio); 2199 2200 /* 2201 * pagein completion 2202 */ 2203 if (bp->l_flag & lbmREAD) { 2204 bp->l_flag &= ~lbmREAD; 2205 2206 LCACHE_UNLOCK(flags); /* unlock+enable */ 2207 2208 /* wakeup I/O initiator */ 2209 LCACHE_WAKEUP(&bp->l_ioevent); 2210 2211 return 0; 2212 } 2213 2214 /* 2215 * pageout completion 2216 * 2217 * the bp at the head of write queue has completed pageout. 2218 * 2219 * if single-commit/full-page pageout, remove the current buffer 2220 * from head of pageout queue, and redrive pageout with 2221 * the new buffer at head of pageout queue; 2222 * otherwise, the partial-page pageout buffer stays at 2223 * the head of pageout queue to be redriven for pageout 2224 * by lmGroupCommit() until full-page pageout is completed. 2225 */ 2226 bp->l_flag &= ~lbmWRITE; 2227 INCREMENT(lmStat.pagedone); 2228 2229 /* update committed lsn */ 2230 log = bp->l_log; 2231 log->clsn = (bp->l_pn << L2LOGPSIZE) + bp->l_ceor; 2232 2233 if (bp->l_flag & lbmDIRECT) { 2234 LCACHE_WAKEUP(&bp->l_ioevent); 2235 LCACHE_UNLOCK(flags); 2236 return 0; 2237 } 2238 2239 tail = log->wqueue; 2240 2241 /* single element queue */ 2242 if (bp == tail) { 2243 /* remove head buffer of full-page pageout 2244 * from log device write queue 2245 */ 2246 if (bp->l_flag & lbmRELEASE) { 2247 log->wqueue = NULL; 2248 bp->l_wqnext = NULL; 2249 } 2250 } 2251 /* multi element queue */ 2252 else { 2253 /* remove head buffer of full-page pageout 2254 * from log device write queue 2255 */ 2256 if (bp->l_flag & lbmRELEASE) { 2257 nextbp = tail->l_wqnext = bp->l_wqnext; 2258 bp->l_wqnext = NULL; 2259 2260 /* 2261 * redrive pageout of next page at head of write queue: 2262 * redrive next page without any bound tblk 2263 * (i.e., page w/o any COMMIT records), or 2264 * first page of new group commit which has been 2265 * queued after current page (subsequent pageout 2266 * is performed synchronously, except page without 2267 * any COMMITs) by lmGroupCommit() as indicated 2268 * by lbmWRITE flag; 2269 */ 2270 if (nextbp->l_flag & lbmWRITE) { 2271 /* 2272 * We can't do the I/O at interrupt time. 2273 * The jfsIO thread can do it 2274 */ 2275 lbmRedrive(nextbp); 2276 } 2277 } 2278 } 2279 2280 /* 2281 * synchronous pageout: 2282 * 2283 * buffer has not necessarily been removed from write queue 2284 * (e.g., synchronous write of partial-page with COMMIT): 2285 * leave buffer for i/o initiator to dispose 2286 */ 2287 if (bp->l_flag & lbmSYNC) { 2288 LCACHE_UNLOCK(flags); /* unlock+enable */ 2289 2290 /* wakeup I/O initiator */ 2291 LCACHE_WAKEUP(&bp->l_ioevent); 2292 } 2293 2294 /* 2295 * Group Commit pageout: 2296 */ 2297 else if (bp->l_flag & lbmGC) { 2298 LCACHE_UNLOCK(flags); 2299 lmPostGC(bp); 2300 } 2301 2302 /* 2303 * asynchronous pageout: 2304 * 2305 * buffer must have been removed from write queue: 2306 * insert buffer at head of freelist where it can be recycled 2307 */ 2308 else { 2309 assert(bp->l_flag & lbmRELEASE); 2310 assert(bp->l_flag & lbmFREE); 2311 lbmfree(bp); 2312 2313 LCACHE_UNLOCK(flags); /* unlock+enable */ 2314 } 2315 2316 return 0; 2317 } 2318 2319 int jfsIOWait(void *arg) 2320 { 2321 struct lbuf *bp; 2322 2323 daemonize("jfsIO"); 2324 2325 complete(&jfsIOwait); 2326 2327 do { 2328 DECLARE_WAITQUEUE(wq, current); 2329 2330 spin_lock_irq(&log_redrive_lock); 2331 while ((bp = log_redrive_list) != 0) { 2332 log_redrive_list = bp->l_redrive_next; 2333 bp->l_redrive_next = NULL; 2334 spin_unlock_irq(&log_redrive_lock); 2335 lbmStartIO(bp); 2336 spin_lock_irq(&log_redrive_lock); 2337 } 2338 if (current->flags & PF_FREEZE) { 2339 spin_unlock_irq(&log_redrive_lock); 2340 refrigerator(PF_FREEZE); 2341 } else { 2342 add_wait_queue(&jfs_IO_thread_wait, &wq); 2343 set_current_state(TASK_INTERRUPTIBLE); 2344 spin_unlock_irq(&log_redrive_lock); 2345 schedule(); 2346 current->state = TASK_RUNNING; 2347 remove_wait_queue(&jfs_IO_thread_wait, &wq); 2348 } 2349 } while (!jfs_stop_threads); 2350 2351 jfs_info("jfsIOWait being killed!"); 2352 complete_and_exit(&jfsIOwait, 0); 2353 } 2354 2355 /* 2356 * NAME: lmLogFormat()/jfs_logform() 2357 * 2358 * FUNCTION: format file system log 2359 * 2360 * PARAMETERS: 2361 * log - volume log 2362 * logAddress - start address of log space in FS block 2363 * logSize - length of log space in FS block; 2364 * 2365 * RETURN: 0 - success 2366 * -EIO - i/o error 2367 * 2368 * XXX: We're synchronously writing one page at a time. This needs to 2369 * be improved by writing multiple pages at once. 2370 */ 2371 int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize) 2372 { 2373 int rc = -EIO; 2374 struct jfs_sb_info *sbi; 2375 struct logsuper *logsuper; 2376 struct logpage *lp; 2377 int lspn; /* log sequence page number */ 2378 struct lrd *lrd_ptr; 2379 int npages = 0; 2380 struct lbuf *bp; 2381 2382 jfs_info("lmLogFormat: logAddress:%Ld logSize:%d", 2383 (long long)logAddress, logSize); 2384 2385 sbi = list_entry(log->sb_list.next, struct jfs_sb_info, log_list); 2386 2387 /* allocate a log buffer */ 2388 bp = lbmAllocate(log, 1); 2389 2390 npages = logSize >> sbi->l2nbperpage; 2391 2392 /* 2393 * log space: 2394 * 2395 * page 0 - reserved; 2396 * page 1 - log superblock; 2397 * page 2 - log data page: A SYNC log record is written 2398 * into this page at logform time; 2399 * pages 3-N - log data page: set to empty log data pages; 2400 */ 2401 /* 2402 * init log superblock: log page 1 2403 */ 2404 logsuper = (struct logsuper *) bp->l_ldata; 2405 2406 logsuper->magic = cpu_to_le32(LOGMAGIC); 2407 logsuper->version = cpu_to_le32(LOGVERSION); 2408 logsuper->state = cpu_to_le32(LOGREDONE); 2409 logsuper->flag = cpu_to_le32(sbi->mntflag); /* ? */ 2410 logsuper->size = cpu_to_le32(npages); 2411 logsuper->bsize = cpu_to_le32(sbi->bsize); 2412 logsuper->l2bsize = cpu_to_le32(sbi->l2bsize); 2413 logsuper->end = cpu_to_le32(2 * LOGPSIZE + LOGPHDRSIZE + LOGRDSIZE); 2414 2415 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2416 bp->l_blkno = logAddress + sbi->nbperpage; 2417 lbmStartIO(bp); 2418 if ((rc = lbmIOWait(bp, 0))) 2419 goto exit; 2420 2421 /* 2422 * init pages 2 to npages-1 as log data pages: 2423 * 2424 * log page sequence number (lpsn) initialization: 2425 * 2426 * pn: 0 1 2 3 n-1 2427 * +-----+-----+=====+=====+===.....===+=====+ 2428 * lspn: N-1 0 1 N-2 2429 * <--- N page circular file ----> 2430 * 2431 * the N (= npages-2) data pages of the log is maintained as 2432 * a circular file for the log records; 2433 * lpsn grows by 1 monotonically as each log page is written 2434 * to the circular file of the log; 2435 * and setLogpage() will not reset the page number even if 2436 * the eor is equal to LOGPHDRSIZE. In order for binary search 2437 * still work in find log end process, we have to simulate the 2438 * log wrap situation at the log format time. 2439 * The 1st log page written will have the highest lpsn. Then 2440 * the succeeding log pages will have ascending order of 2441 * the lspn starting from 0, ... (N-2) 2442 */ 2443 lp = (struct logpage *) bp->l_ldata; 2444 /* 2445 * initialize 1st log page to be written: lpsn = N - 1, 2446 * write a SYNCPT log record is written to this page 2447 */ 2448 lp->h.page = lp->t.page = cpu_to_le32(npages - 3); 2449 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE + LOGRDSIZE); 2450 2451 lrd_ptr = (struct lrd *) &lp->data; 2452 lrd_ptr->logtid = 0; 2453 lrd_ptr->backchain = 0; 2454 lrd_ptr->type = cpu_to_le16(LOG_SYNCPT); 2455 lrd_ptr->length = 0; 2456 lrd_ptr->log.syncpt.sync = 0; 2457 2458 bp->l_blkno += sbi->nbperpage; 2459 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2460 lbmStartIO(bp); 2461 if ((rc = lbmIOWait(bp, 0))) 2462 goto exit; 2463 2464 /* 2465 * initialize succeeding log pages: lpsn = 0, 1, ..., (N-2) 2466 */ 2467 for (lspn = 0; lspn < npages - 3; lspn++) { 2468 lp->h.page = lp->t.page = cpu_to_le32(lspn); 2469 lp->h.eor = lp->t.eor = cpu_to_le16(LOGPHDRSIZE); 2470 2471 bp->l_blkno += sbi->nbperpage; 2472 bp->l_flag = lbmWRITE | lbmSYNC | lbmDIRECT; 2473 lbmStartIO(bp); 2474 if ((rc = lbmIOWait(bp, 0))) 2475 goto exit; 2476 } 2477 2478 rc = 0; 2479 exit: 2480 /* 2481 * finalize log 2482 */ 2483 /* release the buffer */ 2484 lbmFree(bp); 2485 2486 return rc; 2487 } 2488 2489 #ifdef CONFIG_JFS_STATISTICS 2490 int jfs_lmstats_read(char *buffer, char **start, off_t offset, int length, 2491 int *eof, void *data) 2492 { 2493 int len = 0; 2494 off_t begin; 2495 2496 len += sprintf(buffer, 2497 "JFS Logmgr stats\n" 2498 "================\n" 2499 "commits = %d\n" 2500 "writes submitted = %d\n" 2501 "writes completed = %d\n" 2502 "full pages submitted = %d\n" 2503 "partial pages submitted = %d\n", 2504 lmStat.commit, 2505 lmStat.submitted, 2506 lmStat.pagedone, 2507 lmStat.full_page, 2508 lmStat.partial_page); 2509 2510 begin = offset; 2511 *start = buffer + begin; 2512 len -= begin; 2513 2514 if (len > length) 2515 len = length; 2516 else 2517 *eof = 1; 2518 2519 if (len < 0) 2520 len = 0; 2521 2522 return len; 2523 } 2524 #endif /* CONFIG_JFS_STATISTICS */ 2525