1 /* 2 * This file is part of UBIFS. 3 * 4 * Copyright (C) 2006-2008 Nokia Corporation. 5 * 6 * This program is free software; you can redistribute it and/or modify it 7 * under the terms of the GNU General Public License version 2 as published by 8 * the Free Software Foundation. 9 * 10 * This program is distributed in the hope that it will be useful, but WITHOUT 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13 * more details. 14 * 15 * You should have received a copy of the GNU General Public License along with 16 * this program; if not, write to the Free Software Foundation, Inc., 51 17 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18 * 19 * Authors: Adrian Hunter 20 * Artem Bityutskiy (Битюцкий Артём) 21 */ 22 23 /* 24 * This file implements functions that manage the running of the commit process. 25 * Each affected module has its own functions to accomplish their part in the 26 * commit and those functions are called here. 27 * 28 * The commit is the process whereby all updates to the index and LEB properties 29 * are written out together and the journal becomes empty. This keeps the 30 * file system consistent - at all times the state can be recreated by reading 31 * the index and LEB properties and then replaying the journal. 32 * 33 * The commit is split into two parts named "commit start" and "commit end". 34 * During commit start, the commit process has exclusive access to the journal 35 * by holding the commit semaphore down for writing. As few I/O operations as 36 * possible are performed during commit start, instead the nodes that are to be 37 * written are merely identified. During commit end, the commit semaphore is no 38 * longer held and the journal is again in operation, allowing users to continue 39 * to use the file system while the bulk of the commit I/O is performed. The 40 * purpose of this two-step approach is to prevent the commit from causing any 41 * latency blips. Note that in any case, the commit does not prevent lookups 42 * (as permitted by the TNC mutex), or access to VFS data structures e.g. page 43 * cache. 44 */ 45 46 #include <linux/freezer.h> 47 #include <linux/kthread.h> 48 #include "ubifs.h" 49 50 /** 51 * do_commit - commit the journal. 52 * @c: UBIFS file-system description object 53 * 54 * This function implements UBIFS commit. It has to be called with commit lock 55 * locked. Returns zero in case of success and a negative error code in case of 56 * failure. 57 */ 58 static int do_commit(struct ubifs_info *c) 59 { 60 int err, new_ltail_lnum, old_ltail_lnum, i; 61 struct ubifs_zbranch zroot; 62 struct ubifs_lp_stats lst; 63 64 dbg_cmt("start"); 65 if (c->ro_media) { 66 err = -EROFS; 67 goto out_up; 68 } 69 70 /* Sync all write buffers (necessary for recovery) */ 71 for (i = 0; i < c->jhead_cnt; i++) { 72 err = ubifs_wbuf_sync(&c->jheads[i].wbuf); 73 if (err) 74 goto out_up; 75 } 76 77 err = ubifs_gc_start_commit(c); 78 if (err) 79 goto out_up; 80 err = dbg_check_lprops(c); 81 if (err) 82 goto out_up; 83 err = ubifs_log_start_commit(c, &new_ltail_lnum); 84 if (err) 85 goto out_up; 86 err = ubifs_tnc_start_commit(c, &zroot); 87 if (err) 88 goto out_up; 89 err = ubifs_lpt_start_commit(c); 90 if (err) 91 goto out_up; 92 err = ubifs_orphan_start_commit(c); 93 if (err) 94 goto out_up; 95 96 ubifs_get_lp_stats(c, &lst); 97 98 up_write(&c->commit_sem); 99 100 err = ubifs_tnc_end_commit(c); 101 if (err) 102 goto out; 103 err = ubifs_lpt_end_commit(c); 104 if (err) 105 goto out; 106 err = ubifs_orphan_end_commit(c); 107 if (err) 108 goto out; 109 old_ltail_lnum = c->ltail_lnum; 110 err = ubifs_log_end_commit(c, new_ltail_lnum); 111 if (err) 112 goto out; 113 err = dbg_check_old_index(c, &zroot); 114 if (err) 115 goto out; 116 117 mutex_lock(&c->mst_mutex); 118 c->mst_node->cmt_no = cpu_to_le64(++c->cmt_no); 119 c->mst_node->log_lnum = cpu_to_le32(new_ltail_lnum); 120 c->mst_node->root_lnum = cpu_to_le32(zroot.lnum); 121 c->mst_node->root_offs = cpu_to_le32(zroot.offs); 122 c->mst_node->root_len = cpu_to_le32(zroot.len); 123 c->mst_node->ihead_lnum = cpu_to_le32(c->ihead_lnum); 124 c->mst_node->ihead_offs = cpu_to_le32(c->ihead_offs); 125 c->mst_node->index_size = cpu_to_le64(c->old_idx_sz); 126 c->mst_node->lpt_lnum = cpu_to_le32(c->lpt_lnum); 127 c->mst_node->lpt_offs = cpu_to_le32(c->lpt_offs); 128 c->mst_node->nhead_lnum = cpu_to_le32(c->nhead_lnum); 129 c->mst_node->nhead_offs = cpu_to_le32(c->nhead_offs); 130 c->mst_node->ltab_lnum = cpu_to_le32(c->ltab_lnum); 131 c->mst_node->ltab_offs = cpu_to_le32(c->ltab_offs); 132 c->mst_node->lsave_lnum = cpu_to_le32(c->lsave_lnum); 133 c->mst_node->lsave_offs = cpu_to_le32(c->lsave_offs); 134 c->mst_node->lscan_lnum = cpu_to_le32(c->lscan_lnum); 135 c->mst_node->empty_lebs = cpu_to_le32(lst.empty_lebs); 136 c->mst_node->idx_lebs = cpu_to_le32(lst.idx_lebs); 137 c->mst_node->total_free = cpu_to_le64(lst.total_free); 138 c->mst_node->total_dirty = cpu_to_le64(lst.total_dirty); 139 c->mst_node->total_used = cpu_to_le64(lst.total_used); 140 c->mst_node->total_dead = cpu_to_le64(lst.total_dead); 141 c->mst_node->total_dark = cpu_to_le64(lst.total_dark); 142 if (c->no_orphs) 143 c->mst_node->flags |= cpu_to_le32(UBIFS_MST_NO_ORPHS); 144 else 145 c->mst_node->flags &= ~cpu_to_le32(UBIFS_MST_NO_ORPHS); 146 err = ubifs_write_master(c); 147 mutex_unlock(&c->mst_mutex); 148 if (err) 149 goto out; 150 151 err = ubifs_log_post_commit(c, old_ltail_lnum); 152 if (err) 153 goto out; 154 err = ubifs_gc_end_commit(c); 155 if (err) 156 goto out; 157 err = ubifs_lpt_post_commit(c); 158 if (err) 159 goto out; 160 161 spin_lock(&c->cs_lock); 162 c->cmt_state = COMMIT_RESTING; 163 wake_up(&c->cmt_wq); 164 dbg_cmt("commit end"); 165 spin_unlock(&c->cs_lock); 166 167 return 0; 168 169 out_up: 170 up_write(&c->commit_sem); 171 out: 172 ubifs_err("commit failed, error %d", err); 173 spin_lock(&c->cs_lock); 174 c->cmt_state = COMMIT_BROKEN; 175 wake_up(&c->cmt_wq); 176 spin_unlock(&c->cs_lock); 177 ubifs_ro_mode(c, err); 178 return err; 179 } 180 181 /** 182 * run_bg_commit - run background commit if it is needed. 183 * @c: UBIFS file-system description object 184 * 185 * This function runs background commit if it is needed. Returns zero in case 186 * of success and a negative error code in case of failure. 187 */ 188 static int run_bg_commit(struct ubifs_info *c) 189 { 190 spin_lock(&c->cs_lock); 191 /* 192 * Run background commit only if background commit was requested or if 193 * commit is required. 194 */ 195 if (c->cmt_state != COMMIT_BACKGROUND && 196 c->cmt_state != COMMIT_REQUIRED) 197 goto out; 198 spin_unlock(&c->cs_lock); 199 200 down_write(&c->commit_sem); 201 spin_lock(&c->cs_lock); 202 if (c->cmt_state == COMMIT_REQUIRED) 203 c->cmt_state = COMMIT_RUNNING_REQUIRED; 204 else if (c->cmt_state == COMMIT_BACKGROUND) 205 c->cmt_state = COMMIT_RUNNING_BACKGROUND; 206 else 207 goto out_cmt_unlock; 208 spin_unlock(&c->cs_lock); 209 210 return do_commit(c); 211 212 out_cmt_unlock: 213 up_write(&c->commit_sem); 214 out: 215 spin_unlock(&c->cs_lock); 216 return 0; 217 } 218 219 /** 220 * ubifs_bg_thread - UBIFS background thread function. 221 * @info: points to the file-system description object 222 * 223 * This function implements various file-system background activities: 224 * o when a write-buffer timer expires it synchronizes the appropriate 225 * write-buffer; 226 * o when the journal is about to be full, it starts in-advance commit. 227 * 228 * Note, other stuff like background garbage collection may be added here in 229 * future. 230 */ 231 int ubifs_bg_thread(void *info) 232 { 233 int err; 234 struct ubifs_info *c = info; 235 236 ubifs_msg("background thread \"%s\" started, PID %d", 237 c->bgt_name, current->pid); 238 set_freezable(); 239 240 while (1) { 241 if (kthread_should_stop()) 242 break; 243 244 if (try_to_freeze()) 245 continue; 246 247 set_current_state(TASK_INTERRUPTIBLE); 248 /* Check if there is something to do */ 249 if (!c->need_bgt) { 250 /* 251 * Nothing prevents us from going sleep now and 252 * be never woken up and block the task which 253 * could wait in 'kthread_stop()' forever. 254 */ 255 if (kthread_should_stop()) 256 break; 257 schedule(); 258 continue; 259 } else 260 __set_current_state(TASK_RUNNING); 261 262 c->need_bgt = 0; 263 err = ubifs_bg_wbufs_sync(c); 264 if (err) 265 ubifs_ro_mode(c, err); 266 267 run_bg_commit(c); 268 cond_resched(); 269 } 270 271 dbg_msg("background thread \"%s\" stops", c->bgt_name); 272 return 0; 273 } 274 275 /** 276 * ubifs_commit_required - set commit state to "required". 277 * @c: UBIFS file-system description object 278 * 279 * This function is called if a commit is required but cannot be done from the 280 * calling function, so it is just flagged instead. 281 */ 282 void ubifs_commit_required(struct ubifs_info *c) 283 { 284 spin_lock(&c->cs_lock); 285 switch (c->cmt_state) { 286 case COMMIT_RESTING: 287 case COMMIT_BACKGROUND: 288 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), 289 dbg_cstate(COMMIT_REQUIRED)); 290 c->cmt_state = COMMIT_REQUIRED; 291 break; 292 case COMMIT_RUNNING_BACKGROUND: 293 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), 294 dbg_cstate(COMMIT_RUNNING_REQUIRED)); 295 c->cmt_state = COMMIT_RUNNING_REQUIRED; 296 break; 297 case COMMIT_REQUIRED: 298 case COMMIT_RUNNING_REQUIRED: 299 case COMMIT_BROKEN: 300 break; 301 } 302 spin_unlock(&c->cs_lock); 303 } 304 305 /** 306 * ubifs_request_bg_commit - notify the background thread to do a commit. 307 * @c: UBIFS file-system description object 308 * 309 * This function is called if the journal is full enough to make a commit 310 * worthwhile, so background thread is kicked to start it. 311 */ 312 void ubifs_request_bg_commit(struct ubifs_info *c) 313 { 314 spin_lock(&c->cs_lock); 315 if (c->cmt_state == COMMIT_RESTING) { 316 dbg_cmt("old: %s, new: %s", dbg_cstate(c->cmt_state), 317 dbg_cstate(COMMIT_BACKGROUND)); 318 c->cmt_state = COMMIT_BACKGROUND; 319 spin_unlock(&c->cs_lock); 320 ubifs_wake_up_bgt(c); 321 } else 322 spin_unlock(&c->cs_lock); 323 } 324 325 /** 326 * wait_for_commit - wait for commit. 327 * @c: UBIFS file-system description object 328 * 329 * This function sleeps until the commit operation is no longer running. 330 */ 331 static int wait_for_commit(struct ubifs_info *c) 332 { 333 dbg_cmt("pid %d goes sleep", current->pid); 334 335 /* 336 * The following sleeps if the condition is false, and will be woken 337 * when the commit ends. It is possible, although very unlikely, that we 338 * will wake up and see the subsequent commit running, rather than the 339 * one we were waiting for, and go back to sleep. However, we will be 340 * woken again, so there is no danger of sleeping forever. 341 */ 342 wait_event(c->cmt_wq, c->cmt_state != COMMIT_RUNNING_BACKGROUND && 343 c->cmt_state != COMMIT_RUNNING_REQUIRED); 344 dbg_cmt("commit finished, pid %d woke up", current->pid); 345 return 0; 346 } 347 348 /** 349 * ubifs_run_commit - run or wait for commit. 350 * @c: UBIFS file-system description object 351 * 352 * This function runs commit and returns zero in case of success and a negative 353 * error code in case of failure. 354 */ 355 int ubifs_run_commit(struct ubifs_info *c) 356 { 357 int err = 0; 358 359 spin_lock(&c->cs_lock); 360 if (c->cmt_state == COMMIT_BROKEN) { 361 err = -EINVAL; 362 goto out; 363 } 364 365 if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) 366 /* 367 * We set the commit state to 'running required' to indicate 368 * that we want it to complete as quickly as possible. 369 */ 370 c->cmt_state = COMMIT_RUNNING_REQUIRED; 371 372 if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { 373 spin_unlock(&c->cs_lock); 374 return wait_for_commit(c); 375 } 376 spin_unlock(&c->cs_lock); 377 378 /* Ok, the commit is indeed needed */ 379 380 down_write(&c->commit_sem); 381 spin_lock(&c->cs_lock); 382 /* 383 * Since we unlocked 'c->cs_lock', the state may have changed, so 384 * re-check it. 385 */ 386 if (c->cmt_state == COMMIT_BROKEN) { 387 err = -EINVAL; 388 goto out_cmt_unlock; 389 } 390 391 if (c->cmt_state == COMMIT_RUNNING_BACKGROUND) 392 c->cmt_state = COMMIT_RUNNING_REQUIRED; 393 394 if (c->cmt_state == COMMIT_RUNNING_REQUIRED) { 395 up_write(&c->commit_sem); 396 spin_unlock(&c->cs_lock); 397 return wait_for_commit(c); 398 } 399 c->cmt_state = COMMIT_RUNNING_REQUIRED; 400 spin_unlock(&c->cs_lock); 401 402 err = do_commit(c); 403 return err; 404 405 out_cmt_unlock: 406 up_write(&c->commit_sem); 407 out: 408 spin_unlock(&c->cs_lock); 409 return err; 410 } 411 412 /** 413 * ubifs_gc_should_commit - determine if it is time for GC to run commit. 414 * @c: UBIFS file-system description object 415 * 416 * This function is called by garbage collection to determine if commit should 417 * be run. If commit state is @COMMIT_BACKGROUND, which means that the journal 418 * is full enough to start commit, this function returns true. It is not 419 * absolutely necessary to commit yet, but it feels like this should be better 420 * then to keep doing GC. This function returns %1 if GC has to initiate commit 421 * and %0 if not. 422 */ 423 int ubifs_gc_should_commit(struct ubifs_info *c) 424 { 425 int ret = 0; 426 427 spin_lock(&c->cs_lock); 428 if (c->cmt_state == COMMIT_BACKGROUND) { 429 dbg_cmt("commit required now"); 430 c->cmt_state = COMMIT_REQUIRED; 431 } else 432 dbg_cmt("commit not requested"); 433 if (c->cmt_state == COMMIT_REQUIRED) 434 ret = 1; 435 spin_unlock(&c->cs_lock); 436 return ret; 437 } 438 439 #ifdef CONFIG_UBIFS_FS_DEBUG 440 441 /** 442 * struct idx_node - hold index nodes during index tree traversal. 443 * @list: list 444 * @iip: index in parent (slot number of this indexing node in the parent 445 * indexing node) 446 * @upper_key: all keys in this indexing node have to be less or equivalent to 447 * this key 448 * @idx: index node (8-byte aligned because all node structures must be 8-byte 449 * aligned) 450 */ 451 struct idx_node { 452 struct list_head list; 453 int iip; 454 union ubifs_key upper_key; 455 struct ubifs_idx_node idx __attribute__((aligned(8))); 456 }; 457 458 /** 459 * dbg_old_index_check_init - get information for the next old index check. 460 * @c: UBIFS file-system description object 461 * @zroot: root of the index 462 * 463 * This function records information about the index that will be needed for the 464 * next old index check i.e. 'dbg_check_old_index()'. 465 * 466 * This function returns %0 on success and a negative error code on failure. 467 */ 468 int dbg_old_index_check_init(struct ubifs_info *c, struct ubifs_zbranch *zroot) 469 { 470 struct ubifs_idx_node *idx; 471 int lnum, offs, len, err = 0; 472 473 c->old_zroot = *zroot; 474 475 lnum = c->old_zroot.lnum; 476 offs = c->old_zroot.offs; 477 len = c->old_zroot.len; 478 479 idx = kmalloc(c->max_idx_node_sz, GFP_NOFS); 480 if (!idx) 481 return -ENOMEM; 482 483 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); 484 if (err) 485 goto out; 486 487 c->old_zroot_level = le16_to_cpu(idx->level); 488 c->old_zroot_sqnum = le64_to_cpu(idx->ch.sqnum); 489 out: 490 kfree(idx); 491 return err; 492 } 493 494 /** 495 * dbg_check_old_index - check the old copy of the index. 496 * @c: UBIFS file-system description object 497 * @zroot: root of the new index 498 * 499 * In order to be able to recover from an unclean unmount, a complete copy of 500 * the index must exist on flash. This is the "old" index. The commit process 501 * must write the "new" index to flash without overwriting or destroying any 502 * part of the old index. This function is run at commit end in order to check 503 * that the old index does indeed exist completely intact. 504 * 505 * This function returns %0 on success and a negative error code on failure. 506 */ 507 int dbg_check_old_index(struct ubifs_info *c, struct ubifs_zbranch *zroot) 508 { 509 int lnum, offs, len, err = 0, uninitialized_var(last_level), child_cnt; 510 int first = 1, iip; 511 union ubifs_key lower_key, upper_key, l_key, u_key; 512 unsigned long long uninitialized_var(last_sqnum); 513 struct ubifs_idx_node *idx; 514 struct list_head list; 515 struct idx_node *i; 516 size_t sz; 517 518 if (!(ubifs_chk_flags & UBIFS_CHK_OLD_IDX)) 519 goto out; 520 521 INIT_LIST_HEAD(&list); 522 523 sz = sizeof(struct idx_node) + ubifs_idx_node_sz(c, c->fanout) - 524 UBIFS_IDX_NODE_SZ; 525 526 /* Start at the old zroot */ 527 lnum = c->old_zroot.lnum; 528 offs = c->old_zroot.offs; 529 len = c->old_zroot.len; 530 iip = 0; 531 532 /* 533 * Traverse the index tree preorder depth-first i.e. do a node and then 534 * its subtrees from left to right. 535 */ 536 while (1) { 537 struct ubifs_branch *br; 538 539 /* Get the next index node */ 540 i = kmalloc(sz, GFP_NOFS); 541 if (!i) { 542 err = -ENOMEM; 543 goto out_free; 544 } 545 i->iip = iip; 546 /* Keep the index nodes on our path in a linked list */ 547 list_add_tail(&i->list, &list); 548 /* Read the index node */ 549 idx = &i->idx; 550 err = ubifs_read_node(c, idx, UBIFS_IDX_NODE, len, lnum, offs); 551 if (err) 552 goto out_free; 553 /* Validate index node */ 554 child_cnt = le16_to_cpu(idx->child_cnt); 555 if (child_cnt < 1 || child_cnt > c->fanout) { 556 err = 1; 557 goto out_dump; 558 } 559 if (first) { 560 first = 0; 561 /* Check root level and sqnum */ 562 if (le16_to_cpu(idx->level) != c->old_zroot_level) { 563 err = 2; 564 goto out_dump; 565 } 566 if (le64_to_cpu(idx->ch.sqnum) != c->old_zroot_sqnum) { 567 err = 3; 568 goto out_dump; 569 } 570 /* Set last values as though root had a parent */ 571 last_level = le16_to_cpu(idx->level) + 1; 572 last_sqnum = le64_to_cpu(idx->ch.sqnum) + 1; 573 key_read(c, ubifs_idx_key(c, idx), &lower_key); 574 highest_ino_key(c, &upper_key, INUM_WATERMARK); 575 } 576 key_copy(c, &upper_key, &i->upper_key); 577 if (le16_to_cpu(idx->level) != last_level - 1) { 578 err = 3; 579 goto out_dump; 580 } 581 /* 582 * The index is always written bottom up hence a child's sqnum 583 * is always less than the parents. 584 */ 585 if (le64_to_cpu(idx->ch.sqnum) >= last_sqnum) { 586 err = 4; 587 goto out_dump; 588 } 589 /* Check key range */ 590 key_read(c, ubifs_idx_key(c, idx), &l_key); 591 br = ubifs_idx_branch(c, idx, child_cnt - 1); 592 key_read(c, &br->key, &u_key); 593 if (keys_cmp(c, &lower_key, &l_key) > 0) { 594 err = 5; 595 goto out_dump; 596 } 597 if (keys_cmp(c, &upper_key, &u_key) < 0) { 598 err = 6; 599 goto out_dump; 600 } 601 if (keys_cmp(c, &upper_key, &u_key) == 0) 602 if (!is_hash_key(c, &u_key)) { 603 err = 7; 604 goto out_dump; 605 } 606 /* Go to next index node */ 607 if (le16_to_cpu(idx->level) == 0) { 608 /* At the bottom, so go up until can go right */ 609 while (1) { 610 /* Drop the bottom of the list */ 611 list_del(&i->list); 612 kfree(i); 613 /* No more list means we are done */ 614 if (list_empty(&list)) 615 goto out; 616 /* Look at the new bottom */ 617 i = list_entry(list.prev, struct idx_node, 618 list); 619 idx = &i->idx; 620 /* Can we go right */ 621 if (iip + 1 < le16_to_cpu(idx->child_cnt)) { 622 iip = iip + 1; 623 break; 624 } else 625 /* Nope, so go up again */ 626 iip = i->iip; 627 } 628 } else 629 /* Go down left */ 630 iip = 0; 631 /* 632 * We have the parent in 'idx' and now we set up for reading the 633 * child pointed to by slot 'iip'. 634 */ 635 last_level = le16_to_cpu(idx->level); 636 last_sqnum = le64_to_cpu(idx->ch.sqnum); 637 br = ubifs_idx_branch(c, idx, iip); 638 lnum = le32_to_cpu(br->lnum); 639 offs = le32_to_cpu(br->offs); 640 len = le32_to_cpu(br->len); 641 key_read(c, &br->key, &lower_key); 642 if (iip + 1 < le16_to_cpu(idx->child_cnt)) { 643 br = ubifs_idx_branch(c, idx, iip + 1); 644 key_read(c, &br->key, &upper_key); 645 } else 646 key_copy(c, &i->upper_key, &upper_key); 647 } 648 out: 649 err = dbg_old_index_check_init(c, zroot); 650 if (err) 651 goto out_free; 652 653 return 0; 654 655 out_dump: 656 dbg_err("dumping index node (iip=%d)", i->iip); 657 dbg_dump_node(c, idx); 658 list_del(&i->list); 659 kfree(i); 660 if (!list_empty(&list)) { 661 i = list_entry(list.prev, struct idx_node, list); 662 dbg_err("dumping parent index node"); 663 dbg_dump_node(c, &i->idx); 664 } 665 out_free: 666 while (!list_empty(&list)) { 667 i = list_entry(list.next, struct idx_node, list); 668 list_del(&i->list); 669 kfree(i); 670 } 671 ubifs_err("failed, error %d", err); 672 if (err > 0) 673 err = -EINVAL; 674 return err; 675 } 676 677 #endif /* CONFIG_UBIFS_FS_DEBUG */ 678