1*9eefe2a2SStefan Roese /* 2*9eefe2a2SStefan Roese * This file is part of UBIFS. 3*9eefe2a2SStefan Roese * 4*9eefe2a2SStefan Roese * Copyright (C) 2006-2008 Nokia Corporation. 5*9eefe2a2SStefan Roese * 6*9eefe2a2SStefan Roese * This program is free software; you can redistribute it and/or modify it 7*9eefe2a2SStefan Roese * under the terms of the GNU General Public License version 2 as published by 8*9eefe2a2SStefan Roese * the Free Software Foundation. 9*9eefe2a2SStefan Roese * 10*9eefe2a2SStefan Roese * This program is distributed in the hope that it will be useful, but WITHOUT 11*9eefe2a2SStefan Roese * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 12*9eefe2a2SStefan Roese * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 13*9eefe2a2SStefan Roese * more details. 14*9eefe2a2SStefan Roese * 15*9eefe2a2SStefan Roese * You should have received a copy of the GNU General Public License along with 16*9eefe2a2SStefan Roese * this program; if not, write to the Free Software Foundation, Inc., 51 17*9eefe2a2SStefan Roese * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 18*9eefe2a2SStefan Roese * 19*9eefe2a2SStefan Roese * Authors: Adrian Hunter 20*9eefe2a2SStefan Roese * Artem Bityutskiy (Битюцкий Артём) 21*9eefe2a2SStefan Roese */ 22*9eefe2a2SStefan Roese 23*9eefe2a2SStefan Roese /* 24*9eefe2a2SStefan Roese * This file contains journal replay code. It runs when the file-system is being 25*9eefe2a2SStefan Roese * mounted and requires no locking. 26*9eefe2a2SStefan Roese * 27*9eefe2a2SStefan Roese * The larger is the journal, the longer it takes to scan it, so the longer it 28*9eefe2a2SStefan Roese * takes to mount UBIFS. This is why the journal has limited size which may be 29*9eefe2a2SStefan Roese * changed depending on the system requirements. But a larger journal gives 30*9eefe2a2SStefan Roese * faster I/O speed because it writes the index less frequently. So this is a 31*9eefe2a2SStefan Roese * trade-off. Also, the journal is indexed by the in-memory index (TNC), so the 32*9eefe2a2SStefan Roese * larger is the journal, the more memory its index may consume. 33*9eefe2a2SStefan Roese */ 34*9eefe2a2SStefan Roese 35*9eefe2a2SStefan Roese #include "ubifs.h" 36*9eefe2a2SStefan Roese 37*9eefe2a2SStefan Roese /* 38*9eefe2a2SStefan Roese * Replay flags. 39*9eefe2a2SStefan Roese * 40*9eefe2a2SStefan Roese * REPLAY_DELETION: node was deleted 41*9eefe2a2SStefan Roese * REPLAY_REF: node is a reference node 42*9eefe2a2SStefan Roese */ 43*9eefe2a2SStefan Roese enum { 44*9eefe2a2SStefan Roese REPLAY_DELETION = 1, 45*9eefe2a2SStefan Roese REPLAY_REF = 2, 46*9eefe2a2SStefan Roese }; 47*9eefe2a2SStefan Roese 48*9eefe2a2SStefan Roese /** 49*9eefe2a2SStefan Roese * struct replay_entry - replay tree entry. 50*9eefe2a2SStefan Roese * @lnum: logical eraseblock number of the node 51*9eefe2a2SStefan Roese * @offs: node offset 52*9eefe2a2SStefan Roese * @len: node length 53*9eefe2a2SStefan Roese * @sqnum: node sequence number 54*9eefe2a2SStefan Roese * @flags: replay flags 55*9eefe2a2SStefan Roese * @rb: links the replay tree 56*9eefe2a2SStefan Roese * @key: node key 57*9eefe2a2SStefan Roese * @nm: directory entry name 58*9eefe2a2SStefan Roese * @old_size: truncation old size 59*9eefe2a2SStefan Roese * @new_size: truncation new size 60*9eefe2a2SStefan Roese * @free: amount of free space in a bud 61*9eefe2a2SStefan Roese * @dirty: amount of dirty space in a bud from padding and deletion nodes 62*9eefe2a2SStefan Roese * 63*9eefe2a2SStefan Roese * UBIFS journal replay must compare node sequence numbers, which means it must 64*9eefe2a2SStefan Roese * build a tree of node information to insert into the TNC. 65*9eefe2a2SStefan Roese */ 66*9eefe2a2SStefan Roese struct replay_entry { 67*9eefe2a2SStefan Roese int lnum; 68*9eefe2a2SStefan Roese int offs; 69*9eefe2a2SStefan Roese int len; 70*9eefe2a2SStefan Roese unsigned long long sqnum; 71*9eefe2a2SStefan Roese int flags; 72*9eefe2a2SStefan Roese struct rb_node rb; 73*9eefe2a2SStefan Roese union ubifs_key key; 74*9eefe2a2SStefan Roese union { 75*9eefe2a2SStefan Roese struct qstr nm; 76*9eefe2a2SStefan Roese struct { 77*9eefe2a2SStefan Roese loff_t old_size; 78*9eefe2a2SStefan Roese loff_t new_size; 79*9eefe2a2SStefan Roese }; 80*9eefe2a2SStefan Roese struct { 81*9eefe2a2SStefan Roese int free; 82*9eefe2a2SStefan Roese int dirty; 83*9eefe2a2SStefan Roese }; 84*9eefe2a2SStefan Roese }; 85*9eefe2a2SStefan Roese }; 86*9eefe2a2SStefan Roese 87*9eefe2a2SStefan Roese /** 88*9eefe2a2SStefan Roese * struct bud_entry - entry in the list of buds to replay. 89*9eefe2a2SStefan Roese * @list: next bud in the list 90*9eefe2a2SStefan Roese * @bud: bud description object 91*9eefe2a2SStefan Roese * @free: free bytes in the bud 92*9eefe2a2SStefan Roese * @sqnum: reference node sequence number 93*9eefe2a2SStefan Roese */ 94*9eefe2a2SStefan Roese struct bud_entry { 95*9eefe2a2SStefan Roese struct list_head list; 96*9eefe2a2SStefan Roese struct ubifs_bud *bud; 97*9eefe2a2SStefan Roese int free; 98*9eefe2a2SStefan Roese unsigned long long sqnum; 99*9eefe2a2SStefan Roese }; 100*9eefe2a2SStefan Roese 101*9eefe2a2SStefan Roese /** 102*9eefe2a2SStefan Roese * set_bud_lprops - set free and dirty space used by a bud. 103*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 104*9eefe2a2SStefan Roese * @r: replay entry of bud 105*9eefe2a2SStefan Roese */ 106*9eefe2a2SStefan Roese static int set_bud_lprops(struct ubifs_info *c, struct replay_entry *r) 107*9eefe2a2SStefan Roese { 108*9eefe2a2SStefan Roese const struct ubifs_lprops *lp; 109*9eefe2a2SStefan Roese int err = 0, dirty; 110*9eefe2a2SStefan Roese 111*9eefe2a2SStefan Roese ubifs_get_lprops(c); 112*9eefe2a2SStefan Roese 113*9eefe2a2SStefan Roese lp = ubifs_lpt_lookup_dirty(c, r->lnum); 114*9eefe2a2SStefan Roese if (IS_ERR(lp)) { 115*9eefe2a2SStefan Roese err = PTR_ERR(lp); 116*9eefe2a2SStefan Roese goto out; 117*9eefe2a2SStefan Roese } 118*9eefe2a2SStefan Roese 119*9eefe2a2SStefan Roese dirty = lp->dirty; 120*9eefe2a2SStefan Roese if (r->offs == 0 && (lp->free != c->leb_size || lp->dirty != 0)) { 121*9eefe2a2SStefan Roese /* 122*9eefe2a2SStefan Roese * The LEB was added to the journal with a starting offset of 123*9eefe2a2SStefan Roese * zero which means the LEB must have been empty. The LEB 124*9eefe2a2SStefan Roese * property values should be lp->free == c->leb_size and 125*9eefe2a2SStefan Roese * lp->dirty == 0, but that is not the case. The reason is that 126*9eefe2a2SStefan Roese * the LEB was garbage collected. The garbage collector resets 127*9eefe2a2SStefan Roese * the free and dirty space without recording it anywhere except 128*9eefe2a2SStefan Roese * lprops, so if there is not a commit then lprops does not have 129*9eefe2a2SStefan Roese * that information next time the file system is mounted. 130*9eefe2a2SStefan Roese * 131*9eefe2a2SStefan Roese * We do not need to adjust free space because the scan has told 132*9eefe2a2SStefan Roese * us the exact value which is recorded in the replay entry as 133*9eefe2a2SStefan Roese * r->free. 134*9eefe2a2SStefan Roese * 135*9eefe2a2SStefan Roese * However we do need to subtract from the dirty space the 136*9eefe2a2SStefan Roese * amount of space that the garbage collector reclaimed, which 137*9eefe2a2SStefan Roese * is the whole LEB minus the amount of space that was free. 138*9eefe2a2SStefan Roese */ 139*9eefe2a2SStefan Roese dbg_mnt("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 140*9eefe2a2SStefan Roese lp->free, lp->dirty); 141*9eefe2a2SStefan Roese dbg_gc("bud LEB %d was GC'd (%d free, %d dirty)", r->lnum, 142*9eefe2a2SStefan Roese lp->free, lp->dirty); 143*9eefe2a2SStefan Roese dirty -= c->leb_size - lp->free; 144*9eefe2a2SStefan Roese /* 145*9eefe2a2SStefan Roese * If the replay order was perfect the dirty space would now be 146*9eefe2a2SStefan Roese * zero. The order is not perfect because the the journal heads 147*9eefe2a2SStefan Roese * race with each other. This is not a problem but is does mean 148*9eefe2a2SStefan Roese * that the dirty space may temporarily exceed c->leb_size 149*9eefe2a2SStefan Roese * during the replay. 150*9eefe2a2SStefan Roese */ 151*9eefe2a2SStefan Roese if (dirty != 0) 152*9eefe2a2SStefan Roese dbg_msg("LEB %d lp: %d free %d dirty " 153*9eefe2a2SStefan Roese "replay: %d free %d dirty", r->lnum, lp->free, 154*9eefe2a2SStefan Roese lp->dirty, r->free, r->dirty); 155*9eefe2a2SStefan Roese } 156*9eefe2a2SStefan Roese lp = ubifs_change_lp(c, lp, r->free, dirty + r->dirty, 157*9eefe2a2SStefan Roese lp->flags | LPROPS_TAKEN, 0); 158*9eefe2a2SStefan Roese if (IS_ERR(lp)) { 159*9eefe2a2SStefan Roese err = PTR_ERR(lp); 160*9eefe2a2SStefan Roese goto out; 161*9eefe2a2SStefan Roese } 162*9eefe2a2SStefan Roese out: 163*9eefe2a2SStefan Roese ubifs_release_lprops(c); 164*9eefe2a2SStefan Roese return err; 165*9eefe2a2SStefan Roese } 166*9eefe2a2SStefan Roese 167*9eefe2a2SStefan Roese /** 168*9eefe2a2SStefan Roese * trun_remove_range - apply a replay entry for a truncation to the TNC. 169*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 170*9eefe2a2SStefan Roese * @r: replay entry of truncation 171*9eefe2a2SStefan Roese */ 172*9eefe2a2SStefan Roese static int trun_remove_range(struct ubifs_info *c, struct replay_entry *r) 173*9eefe2a2SStefan Roese { 174*9eefe2a2SStefan Roese unsigned min_blk, max_blk; 175*9eefe2a2SStefan Roese union ubifs_key min_key, max_key; 176*9eefe2a2SStefan Roese ino_t ino; 177*9eefe2a2SStefan Roese 178*9eefe2a2SStefan Roese min_blk = r->new_size / UBIFS_BLOCK_SIZE; 179*9eefe2a2SStefan Roese if (r->new_size & (UBIFS_BLOCK_SIZE - 1)) 180*9eefe2a2SStefan Roese min_blk += 1; 181*9eefe2a2SStefan Roese 182*9eefe2a2SStefan Roese max_blk = r->old_size / UBIFS_BLOCK_SIZE; 183*9eefe2a2SStefan Roese if ((r->old_size & (UBIFS_BLOCK_SIZE - 1)) == 0) 184*9eefe2a2SStefan Roese max_blk -= 1; 185*9eefe2a2SStefan Roese 186*9eefe2a2SStefan Roese ino = key_inum(c, &r->key); 187*9eefe2a2SStefan Roese 188*9eefe2a2SStefan Roese data_key_init(c, &min_key, ino, min_blk); 189*9eefe2a2SStefan Roese data_key_init(c, &max_key, ino, max_blk); 190*9eefe2a2SStefan Roese 191*9eefe2a2SStefan Roese return ubifs_tnc_remove_range(c, &min_key, &max_key); 192*9eefe2a2SStefan Roese } 193*9eefe2a2SStefan Roese 194*9eefe2a2SStefan Roese /** 195*9eefe2a2SStefan Roese * apply_replay_entry - apply a replay entry to the TNC. 196*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 197*9eefe2a2SStefan Roese * @r: replay entry to apply 198*9eefe2a2SStefan Roese * 199*9eefe2a2SStefan Roese * Apply a replay entry to the TNC. 200*9eefe2a2SStefan Roese */ 201*9eefe2a2SStefan Roese static int apply_replay_entry(struct ubifs_info *c, struct replay_entry *r) 202*9eefe2a2SStefan Roese { 203*9eefe2a2SStefan Roese int err, deletion = ((r->flags & REPLAY_DELETION) != 0); 204*9eefe2a2SStefan Roese 205*9eefe2a2SStefan Roese dbg_mnt("LEB %d:%d len %d flgs %d sqnum %llu %s", r->lnum, 206*9eefe2a2SStefan Roese r->offs, r->len, r->flags, r->sqnum, DBGKEY(&r->key)); 207*9eefe2a2SStefan Roese 208*9eefe2a2SStefan Roese /* Set c->replay_sqnum to help deal with dangling branches. */ 209*9eefe2a2SStefan Roese c->replay_sqnum = r->sqnum; 210*9eefe2a2SStefan Roese 211*9eefe2a2SStefan Roese if (r->flags & REPLAY_REF) 212*9eefe2a2SStefan Roese err = set_bud_lprops(c, r); 213*9eefe2a2SStefan Roese else if (is_hash_key(c, &r->key)) { 214*9eefe2a2SStefan Roese if (deletion) 215*9eefe2a2SStefan Roese err = ubifs_tnc_remove_nm(c, &r->key, &r->nm); 216*9eefe2a2SStefan Roese else 217*9eefe2a2SStefan Roese err = ubifs_tnc_add_nm(c, &r->key, r->lnum, r->offs, 218*9eefe2a2SStefan Roese r->len, &r->nm); 219*9eefe2a2SStefan Roese } else { 220*9eefe2a2SStefan Roese if (deletion) 221*9eefe2a2SStefan Roese switch (key_type(c, &r->key)) { 222*9eefe2a2SStefan Roese case UBIFS_INO_KEY: 223*9eefe2a2SStefan Roese { 224*9eefe2a2SStefan Roese ino_t inum = key_inum(c, &r->key); 225*9eefe2a2SStefan Roese 226*9eefe2a2SStefan Roese err = ubifs_tnc_remove_ino(c, inum); 227*9eefe2a2SStefan Roese break; 228*9eefe2a2SStefan Roese } 229*9eefe2a2SStefan Roese case UBIFS_TRUN_KEY: 230*9eefe2a2SStefan Roese err = trun_remove_range(c, r); 231*9eefe2a2SStefan Roese break; 232*9eefe2a2SStefan Roese default: 233*9eefe2a2SStefan Roese err = ubifs_tnc_remove(c, &r->key); 234*9eefe2a2SStefan Roese break; 235*9eefe2a2SStefan Roese } 236*9eefe2a2SStefan Roese else 237*9eefe2a2SStefan Roese err = ubifs_tnc_add(c, &r->key, r->lnum, r->offs, 238*9eefe2a2SStefan Roese r->len); 239*9eefe2a2SStefan Roese if (err) 240*9eefe2a2SStefan Roese return err; 241*9eefe2a2SStefan Roese 242*9eefe2a2SStefan Roese if (c->need_recovery) 243*9eefe2a2SStefan Roese err = ubifs_recover_size_accum(c, &r->key, deletion, 244*9eefe2a2SStefan Roese r->new_size); 245*9eefe2a2SStefan Roese } 246*9eefe2a2SStefan Roese 247*9eefe2a2SStefan Roese return err; 248*9eefe2a2SStefan Roese } 249*9eefe2a2SStefan Roese 250*9eefe2a2SStefan Roese /** 251*9eefe2a2SStefan Roese * destroy_replay_tree - destroy the replay. 252*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 253*9eefe2a2SStefan Roese * 254*9eefe2a2SStefan Roese * Destroy the replay tree. 255*9eefe2a2SStefan Roese */ 256*9eefe2a2SStefan Roese static void destroy_replay_tree(struct ubifs_info *c) 257*9eefe2a2SStefan Roese { 258*9eefe2a2SStefan Roese struct rb_node *this = c->replay_tree.rb_node; 259*9eefe2a2SStefan Roese struct replay_entry *r; 260*9eefe2a2SStefan Roese 261*9eefe2a2SStefan Roese while (this) { 262*9eefe2a2SStefan Roese if (this->rb_left) { 263*9eefe2a2SStefan Roese this = this->rb_left; 264*9eefe2a2SStefan Roese continue; 265*9eefe2a2SStefan Roese } else if (this->rb_right) { 266*9eefe2a2SStefan Roese this = this->rb_right; 267*9eefe2a2SStefan Roese continue; 268*9eefe2a2SStefan Roese } 269*9eefe2a2SStefan Roese r = rb_entry(this, struct replay_entry, rb); 270*9eefe2a2SStefan Roese this = rb_parent(this); 271*9eefe2a2SStefan Roese if (this) { 272*9eefe2a2SStefan Roese if (this->rb_left == &r->rb) 273*9eefe2a2SStefan Roese this->rb_left = NULL; 274*9eefe2a2SStefan Roese else 275*9eefe2a2SStefan Roese this->rb_right = NULL; 276*9eefe2a2SStefan Roese } 277*9eefe2a2SStefan Roese if (is_hash_key(c, &r->key)) 278*9eefe2a2SStefan Roese kfree((void *)r->nm.name); 279*9eefe2a2SStefan Roese kfree(r); 280*9eefe2a2SStefan Roese } 281*9eefe2a2SStefan Roese c->replay_tree = RB_ROOT; 282*9eefe2a2SStefan Roese } 283*9eefe2a2SStefan Roese 284*9eefe2a2SStefan Roese /** 285*9eefe2a2SStefan Roese * apply_replay_tree - apply the replay tree to the TNC. 286*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 287*9eefe2a2SStefan Roese * 288*9eefe2a2SStefan Roese * Apply the replay tree. 289*9eefe2a2SStefan Roese * Returns zero in case of success and a negative error code in case of 290*9eefe2a2SStefan Roese * failure. 291*9eefe2a2SStefan Roese */ 292*9eefe2a2SStefan Roese static int apply_replay_tree(struct ubifs_info *c) 293*9eefe2a2SStefan Roese { 294*9eefe2a2SStefan Roese struct rb_node *this = rb_first(&c->replay_tree); 295*9eefe2a2SStefan Roese 296*9eefe2a2SStefan Roese while (this) { 297*9eefe2a2SStefan Roese struct replay_entry *r; 298*9eefe2a2SStefan Roese int err; 299*9eefe2a2SStefan Roese 300*9eefe2a2SStefan Roese cond_resched(); 301*9eefe2a2SStefan Roese 302*9eefe2a2SStefan Roese r = rb_entry(this, struct replay_entry, rb); 303*9eefe2a2SStefan Roese err = apply_replay_entry(c, r); 304*9eefe2a2SStefan Roese if (err) 305*9eefe2a2SStefan Roese return err; 306*9eefe2a2SStefan Roese this = rb_next(this); 307*9eefe2a2SStefan Roese } 308*9eefe2a2SStefan Roese return 0; 309*9eefe2a2SStefan Roese } 310*9eefe2a2SStefan Roese 311*9eefe2a2SStefan Roese /** 312*9eefe2a2SStefan Roese * insert_node - insert a node to the replay tree. 313*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 314*9eefe2a2SStefan Roese * @lnum: node logical eraseblock number 315*9eefe2a2SStefan Roese * @offs: node offset 316*9eefe2a2SStefan Roese * @len: node length 317*9eefe2a2SStefan Roese * @key: node key 318*9eefe2a2SStefan Roese * @sqnum: sequence number 319*9eefe2a2SStefan Roese * @deletion: non-zero if this is a deletion 320*9eefe2a2SStefan Roese * @used: number of bytes in use in a LEB 321*9eefe2a2SStefan Roese * @old_size: truncation old size 322*9eefe2a2SStefan Roese * @new_size: truncation new size 323*9eefe2a2SStefan Roese * 324*9eefe2a2SStefan Roese * This function inserts a scanned non-direntry node to the replay tree. The 325*9eefe2a2SStefan Roese * replay tree is an RB-tree containing @struct replay_entry elements which are 326*9eefe2a2SStefan Roese * indexed by the sequence number. The replay tree is applied at the very end 327*9eefe2a2SStefan Roese * of the replay process. Since the tree is sorted in sequence number order, 328*9eefe2a2SStefan Roese * the older modifications are applied first. This function returns zero in 329*9eefe2a2SStefan Roese * case of success and a negative error code in case of failure. 330*9eefe2a2SStefan Roese */ 331*9eefe2a2SStefan Roese static int insert_node(struct ubifs_info *c, int lnum, int offs, int len, 332*9eefe2a2SStefan Roese union ubifs_key *key, unsigned long long sqnum, 333*9eefe2a2SStefan Roese int deletion, int *used, loff_t old_size, 334*9eefe2a2SStefan Roese loff_t new_size) 335*9eefe2a2SStefan Roese { 336*9eefe2a2SStefan Roese struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 337*9eefe2a2SStefan Roese struct replay_entry *r; 338*9eefe2a2SStefan Roese 339*9eefe2a2SStefan Roese if (key_inum(c, key) >= c->highest_inum) 340*9eefe2a2SStefan Roese c->highest_inum = key_inum(c, key); 341*9eefe2a2SStefan Roese 342*9eefe2a2SStefan Roese dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 343*9eefe2a2SStefan Roese while (*p) { 344*9eefe2a2SStefan Roese parent = *p; 345*9eefe2a2SStefan Roese r = rb_entry(parent, struct replay_entry, rb); 346*9eefe2a2SStefan Roese if (sqnum < r->sqnum) { 347*9eefe2a2SStefan Roese p = &(*p)->rb_left; 348*9eefe2a2SStefan Roese continue; 349*9eefe2a2SStefan Roese } else if (sqnum > r->sqnum) { 350*9eefe2a2SStefan Roese p = &(*p)->rb_right; 351*9eefe2a2SStefan Roese continue; 352*9eefe2a2SStefan Roese } 353*9eefe2a2SStefan Roese ubifs_err("duplicate sqnum in replay"); 354*9eefe2a2SStefan Roese return -EINVAL; 355*9eefe2a2SStefan Roese } 356*9eefe2a2SStefan Roese 357*9eefe2a2SStefan Roese r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 358*9eefe2a2SStefan Roese if (!r) 359*9eefe2a2SStefan Roese return -ENOMEM; 360*9eefe2a2SStefan Roese 361*9eefe2a2SStefan Roese if (!deletion) 362*9eefe2a2SStefan Roese *used += ALIGN(len, 8); 363*9eefe2a2SStefan Roese r->lnum = lnum; 364*9eefe2a2SStefan Roese r->offs = offs; 365*9eefe2a2SStefan Roese r->len = len; 366*9eefe2a2SStefan Roese r->sqnum = sqnum; 367*9eefe2a2SStefan Roese r->flags = (deletion ? REPLAY_DELETION : 0); 368*9eefe2a2SStefan Roese r->old_size = old_size; 369*9eefe2a2SStefan Roese r->new_size = new_size; 370*9eefe2a2SStefan Roese key_copy(c, key, &r->key); 371*9eefe2a2SStefan Roese 372*9eefe2a2SStefan Roese rb_link_node(&r->rb, parent, p); 373*9eefe2a2SStefan Roese rb_insert_color(&r->rb, &c->replay_tree); 374*9eefe2a2SStefan Roese return 0; 375*9eefe2a2SStefan Roese } 376*9eefe2a2SStefan Roese 377*9eefe2a2SStefan Roese /** 378*9eefe2a2SStefan Roese * insert_dent - insert a directory entry node into the replay tree. 379*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 380*9eefe2a2SStefan Roese * @lnum: node logical eraseblock number 381*9eefe2a2SStefan Roese * @offs: node offset 382*9eefe2a2SStefan Roese * @len: node length 383*9eefe2a2SStefan Roese * @key: node key 384*9eefe2a2SStefan Roese * @name: directory entry name 385*9eefe2a2SStefan Roese * @nlen: directory entry name length 386*9eefe2a2SStefan Roese * @sqnum: sequence number 387*9eefe2a2SStefan Roese * @deletion: non-zero if this is a deletion 388*9eefe2a2SStefan Roese * @used: number of bytes in use in a LEB 389*9eefe2a2SStefan Roese * 390*9eefe2a2SStefan Roese * This function inserts a scanned directory entry node to the replay tree. 391*9eefe2a2SStefan Roese * Returns zero in case of success and a negative error code in case of 392*9eefe2a2SStefan Roese * failure. 393*9eefe2a2SStefan Roese * 394*9eefe2a2SStefan Roese * This function is also used for extended attribute entries because they are 395*9eefe2a2SStefan Roese * implemented as directory entry nodes. 396*9eefe2a2SStefan Roese */ 397*9eefe2a2SStefan Roese static int insert_dent(struct ubifs_info *c, int lnum, int offs, int len, 398*9eefe2a2SStefan Roese union ubifs_key *key, const char *name, int nlen, 399*9eefe2a2SStefan Roese unsigned long long sqnum, int deletion, int *used) 400*9eefe2a2SStefan Roese { 401*9eefe2a2SStefan Roese struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 402*9eefe2a2SStefan Roese struct replay_entry *r; 403*9eefe2a2SStefan Roese char *nbuf; 404*9eefe2a2SStefan Roese 405*9eefe2a2SStefan Roese if (key_inum(c, key) >= c->highest_inum) 406*9eefe2a2SStefan Roese c->highest_inum = key_inum(c, key); 407*9eefe2a2SStefan Roese 408*9eefe2a2SStefan Roese dbg_mnt("add LEB %d:%d, key %s", lnum, offs, DBGKEY(key)); 409*9eefe2a2SStefan Roese while (*p) { 410*9eefe2a2SStefan Roese parent = *p; 411*9eefe2a2SStefan Roese r = rb_entry(parent, struct replay_entry, rb); 412*9eefe2a2SStefan Roese if (sqnum < r->sqnum) { 413*9eefe2a2SStefan Roese p = &(*p)->rb_left; 414*9eefe2a2SStefan Roese continue; 415*9eefe2a2SStefan Roese } 416*9eefe2a2SStefan Roese if (sqnum > r->sqnum) { 417*9eefe2a2SStefan Roese p = &(*p)->rb_right; 418*9eefe2a2SStefan Roese continue; 419*9eefe2a2SStefan Roese } 420*9eefe2a2SStefan Roese ubifs_err("duplicate sqnum in replay"); 421*9eefe2a2SStefan Roese return -EINVAL; 422*9eefe2a2SStefan Roese } 423*9eefe2a2SStefan Roese 424*9eefe2a2SStefan Roese r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 425*9eefe2a2SStefan Roese if (!r) 426*9eefe2a2SStefan Roese return -ENOMEM; 427*9eefe2a2SStefan Roese nbuf = kmalloc(nlen + 1, GFP_KERNEL); 428*9eefe2a2SStefan Roese if (!nbuf) { 429*9eefe2a2SStefan Roese kfree(r); 430*9eefe2a2SStefan Roese return -ENOMEM; 431*9eefe2a2SStefan Roese } 432*9eefe2a2SStefan Roese 433*9eefe2a2SStefan Roese if (!deletion) 434*9eefe2a2SStefan Roese *used += ALIGN(len, 8); 435*9eefe2a2SStefan Roese r->lnum = lnum; 436*9eefe2a2SStefan Roese r->offs = offs; 437*9eefe2a2SStefan Roese r->len = len; 438*9eefe2a2SStefan Roese r->sqnum = sqnum; 439*9eefe2a2SStefan Roese r->nm.len = nlen; 440*9eefe2a2SStefan Roese memcpy(nbuf, name, nlen); 441*9eefe2a2SStefan Roese nbuf[nlen] = '\0'; 442*9eefe2a2SStefan Roese r->nm.name = nbuf; 443*9eefe2a2SStefan Roese r->flags = (deletion ? REPLAY_DELETION : 0); 444*9eefe2a2SStefan Roese key_copy(c, key, &r->key); 445*9eefe2a2SStefan Roese 446*9eefe2a2SStefan Roese ubifs_assert(!*p); 447*9eefe2a2SStefan Roese rb_link_node(&r->rb, parent, p); 448*9eefe2a2SStefan Roese rb_insert_color(&r->rb, &c->replay_tree); 449*9eefe2a2SStefan Roese return 0; 450*9eefe2a2SStefan Roese } 451*9eefe2a2SStefan Roese 452*9eefe2a2SStefan Roese /** 453*9eefe2a2SStefan Roese * ubifs_validate_entry - validate directory or extended attribute entry node. 454*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 455*9eefe2a2SStefan Roese * @dent: the node to validate 456*9eefe2a2SStefan Roese * 457*9eefe2a2SStefan Roese * This function validates directory or extended attribute entry node @dent. 458*9eefe2a2SStefan Roese * Returns zero if the node is all right and a %-EINVAL if not. 459*9eefe2a2SStefan Roese */ 460*9eefe2a2SStefan Roese int ubifs_validate_entry(struct ubifs_info *c, 461*9eefe2a2SStefan Roese const struct ubifs_dent_node *dent) 462*9eefe2a2SStefan Roese { 463*9eefe2a2SStefan Roese int key_type = key_type_flash(c, dent->key); 464*9eefe2a2SStefan Roese int nlen = le16_to_cpu(dent->nlen); 465*9eefe2a2SStefan Roese 466*9eefe2a2SStefan Roese if (le32_to_cpu(dent->ch.len) != nlen + UBIFS_DENT_NODE_SZ + 1 || 467*9eefe2a2SStefan Roese dent->type >= UBIFS_ITYPES_CNT || 468*9eefe2a2SStefan Roese nlen > UBIFS_MAX_NLEN || dent->name[nlen] != 0 || 469*9eefe2a2SStefan Roese strnlen((char *)dent->name, nlen) != nlen || 470*9eefe2a2SStefan Roese le64_to_cpu(dent->inum) > MAX_INUM) { 471*9eefe2a2SStefan Roese ubifs_err("bad %s node", key_type == UBIFS_DENT_KEY ? 472*9eefe2a2SStefan Roese "directory entry" : "extended attribute entry"); 473*9eefe2a2SStefan Roese return -EINVAL; 474*9eefe2a2SStefan Roese } 475*9eefe2a2SStefan Roese 476*9eefe2a2SStefan Roese if (key_type != UBIFS_DENT_KEY && key_type != UBIFS_XENT_KEY) { 477*9eefe2a2SStefan Roese ubifs_err("bad key type %d", key_type); 478*9eefe2a2SStefan Roese return -EINVAL; 479*9eefe2a2SStefan Roese } 480*9eefe2a2SStefan Roese 481*9eefe2a2SStefan Roese return 0; 482*9eefe2a2SStefan Roese } 483*9eefe2a2SStefan Roese 484*9eefe2a2SStefan Roese /** 485*9eefe2a2SStefan Roese * replay_bud - replay a bud logical eraseblock. 486*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 487*9eefe2a2SStefan Roese * @lnum: bud logical eraseblock number to replay 488*9eefe2a2SStefan Roese * @offs: bud start offset 489*9eefe2a2SStefan Roese * @jhead: journal head to which this bud belongs 490*9eefe2a2SStefan Roese * @free: amount of free space in the bud is returned here 491*9eefe2a2SStefan Roese * @dirty: amount of dirty space from padding and deletion nodes is returned 492*9eefe2a2SStefan Roese * here 493*9eefe2a2SStefan Roese * 494*9eefe2a2SStefan Roese * This function returns zero in case of success and a negative error code in 495*9eefe2a2SStefan Roese * case of failure. 496*9eefe2a2SStefan Roese */ 497*9eefe2a2SStefan Roese static int replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 498*9eefe2a2SStefan Roese int *free, int *dirty) 499*9eefe2a2SStefan Roese { 500*9eefe2a2SStefan Roese int err = 0, used = 0; 501*9eefe2a2SStefan Roese struct ubifs_scan_leb *sleb; 502*9eefe2a2SStefan Roese struct ubifs_scan_node *snod; 503*9eefe2a2SStefan Roese struct ubifs_bud *bud; 504*9eefe2a2SStefan Roese 505*9eefe2a2SStefan Roese dbg_mnt("replay bud LEB %d, head %d", lnum, jhead); 506*9eefe2a2SStefan Roese if (c->need_recovery) 507*9eefe2a2SStefan Roese sleb = ubifs_recover_leb(c, lnum, offs, c->sbuf, jhead != GCHD); 508*9eefe2a2SStefan Roese else 509*9eefe2a2SStefan Roese sleb = ubifs_scan(c, lnum, offs, c->sbuf); 510*9eefe2a2SStefan Roese if (IS_ERR(sleb)) 511*9eefe2a2SStefan Roese return PTR_ERR(sleb); 512*9eefe2a2SStefan Roese 513*9eefe2a2SStefan Roese /* 514*9eefe2a2SStefan Roese * The bud does not have to start from offset zero - the beginning of 515*9eefe2a2SStefan Roese * the 'lnum' LEB may contain previously committed data. One of the 516*9eefe2a2SStefan Roese * things we have to do in replay is to correctly update lprops with 517*9eefe2a2SStefan Roese * newer information about this LEB. 518*9eefe2a2SStefan Roese * 519*9eefe2a2SStefan Roese * At this point lprops thinks that this LEB has 'c->leb_size - offs' 520*9eefe2a2SStefan Roese * bytes of free space because it only contain information about 521*9eefe2a2SStefan Roese * committed data. 522*9eefe2a2SStefan Roese * 523*9eefe2a2SStefan Roese * But we know that real amount of free space is 'c->leb_size - 524*9eefe2a2SStefan Roese * sleb->endpt', and the space in the 'lnum' LEB between 'offs' and 525*9eefe2a2SStefan Roese * 'sleb->endpt' is used by bud data. We have to correctly calculate 526*9eefe2a2SStefan Roese * how much of these data are dirty and update lprops with this 527*9eefe2a2SStefan Roese * information. 528*9eefe2a2SStefan Roese * 529*9eefe2a2SStefan Roese * The dirt in that LEB region is comprised of padding nodes, deletion 530*9eefe2a2SStefan Roese * nodes, truncation nodes and nodes which are obsoleted by subsequent 531*9eefe2a2SStefan Roese * nodes in this LEB. So instead of calculating clean space, we 532*9eefe2a2SStefan Roese * calculate used space ('used' variable). 533*9eefe2a2SStefan Roese */ 534*9eefe2a2SStefan Roese 535*9eefe2a2SStefan Roese list_for_each_entry(snod, &sleb->nodes, list) { 536*9eefe2a2SStefan Roese int deletion = 0; 537*9eefe2a2SStefan Roese 538*9eefe2a2SStefan Roese cond_resched(); 539*9eefe2a2SStefan Roese 540*9eefe2a2SStefan Roese if (snod->sqnum >= SQNUM_WATERMARK) { 541*9eefe2a2SStefan Roese ubifs_err("file system's life ended"); 542*9eefe2a2SStefan Roese goto out_dump; 543*9eefe2a2SStefan Roese } 544*9eefe2a2SStefan Roese 545*9eefe2a2SStefan Roese if (snod->sqnum > c->max_sqnum) 546*9eefe2a2SStefan Roese c->max_sqnum = snod->sqnum; 547*9eefe2a2SStefan Roese 548*9eefe2a2SStefan Roese switch (snod->type) { 549*9eefe2a2SStefan Roese case UBIFS_INO_NODE: 550*9eefe2a2SStefan Roese { 551*9eefe2a2SStefan Roese struct ubifs_ino_node *ino = snod->node; 552*9eefe2a2SStefan Roese loff_t new_size = le64_to_cpu(ino->size); 553*9eefe2a2SStefan Roese 554*9eefe2a2SStefan Roese if (le32_to_cpu(ino->nlink) == 0) 555*9eefe2a2SStefan Roese deletion = 1; 556*9eefe2a2SStefan Roese err = insert_node(c, lnum, snod->offs, snod->len, 557*9eefe2a2SStefan Roese &snod->key, snod->sqnum, deletion, 558*9eefe2a2SStefan Roese &used, 0, new_size); 559*9eefe2a2SStefan Roese break; 560*9eefe2a2SStefan Roese } 561*9eefe2a2SStefan Roese case UBIFS_DATA_NODE: 562*9eefe2a2SStefan Roese { 563*9eefe2a2SStefan Roese struct ubifs_data_node *dn = snod->node; 564*9eefe2a2SStefan Roese loff_t new_size = le32_to_cpu(dn->size) + 565*9eefe2a2SStefan Roese key_block(c, &snod->key) * 566*9eefe2a2SStefan Roese UBIFS_BLOCK_SIZE; 567*9eefe2a2SStefan Roese 568*9eefe2a2SStefan Roese err = insert_node(c, lnum, snod->offs, snod->len, 569*9eefe2a2SStefan Roese &snod->key, snod->sqnum, deletion, 570*9eefe2a2SStefan Roese &used, 0, new_size); 571*9eefe2a2SStefan Roese break; 572*9eefe2a2SStefan Roese } 573*9eefe2a2SStefan Roese case UBIFS_DENT_NODE: 574*9eefe2a2SStefan Roese case UBIFS_XENT_NODE: 575*9eefe2a2SStefan Roese { 576*9eefe2a2SStefan Roese struct ubifs_dent_node *dent = snod->node; 577*9eefe2a2SStefan Roese 578*9eefe2a2SStefan Roese err = ubifs_validate_entry(c, dent); 579*9eefe2a2SStefan Roese if (err) 580*9eefe2a2SStefan Roese goto out_dump; 581*9eefe2a2SStefan Roese 582*9eefe2a2SStefan Roese err = insert_dent(c, lnum, snod->offs, snod->len, 583*9eefe2a2SStefan Roese &snod->key, (char *)dent->name, 584*9eefe2a2SStefan Roese le16_to_cpu(dent->nlen), snod->sqnum, 585*9eefe2a2SStefan Roese !le64_to_cpu(dent->inum), &used); 586*9eefe2a2SStefan Roese break; 587*9eefe2a2SStefan Roese } 588*9eefe2a2SStefan Roese case UBIFS_TRUN_NODE: 589*9eefe2a2SStefan Roese { 590*9eefe2a2SStefan Roese struct ubifs_trun_node *trun = snod->node; 591*9eefe2a2SStefan Roese loff_t old_size = le64_to_cpu(trun->old_size); 592*9eefe2a2SStefan Roese loff_t new_size = le64_to_cpu(trun->new_size); 593*9eefe2a2SStefan Roese union ubifs_key key; 594*9eefe2a2SStefan Roese 595*9eefe2a2SStefan Roese /* Validate truncation node */ 596*9eefe2a2SStefan Roese if (old_size < 0 || old_size > c->max_inode_sz || 597*9eefe2a2SStefan Roese new_size < 0 || new_size > c->max_inode_sz || 598*9eefe2a2SStefan Roese old_size <= new_size) { 599*9eefe2a2SStefan Roese ubifs_err("bad truncation node"); 600*9eefe2a2SStefan Roese goto out_dump; 601*9eefe2a2SStefan Roese } 602*9eefe2a2SStefan Roese 603*9eefe2a2SStefan Roese /* 604*9eefe2a2SStefan Roese * Create a fake truncation key just to use the same 605*9eefe2a2SStefan Roese * functions which expect nodes to have keys. 606*9eefe2a2SStefan Roese */ 607*9eefe2a2SStefan Roese trun_key_init(c, &key, le32_to_cpu(trun->inum)); 608*9eefe2a2SStefan Roese err = insert_node(c, lnum, snod->offs, snod->len, 609*9eefe2a2SStefan Roese &key, snod->sqnum, 1, &used, 610*9eefe2a2SStefan Roese old_size, new_size); 611*9eefe2a2SStefan Roese break; 612*9eefe2a2SStefan Roese } 613*9eefe2a2SStefan Roese default: 614*9eefe2a2SStefan Roese ubifs_err("unexpected node type %d in bud LEB %d:%d", 615*9eefe2a2SStefan Roese snod->type, lnum, snod->offs); 616*9eefe2a2SStefan Roese err = -EINVAL; 617*9eefe2a2SStefan Roese goto out_dump; 618*9eefe2a2SStefan Roese } 619*9eefe2a2SStefan Roese if (err) 620*9eefe2a2SStefan Roese goto out; 621*9eefe2a2SStefan Roese } 622*9eefe2a2SStefan Roese 623*9eefe2a2SStefan Roese bud = ubifs_search_bud(c, lnum); 624*9eefe2a2SStefan Roese if (!bud) 625*9eefe2a2SStefan Roese BUG(); 626*9eefe2a2SStefan Roese 627*9eefe2a2SStefan Roese ubifs_assert(sleb->endpt - offs >= used); 628*9eefe2a2SStefan Roese ubifs_assert(sleb->endpt % c->min_io_size == 0); 629*9eefe2a2SStefan Roese 630*9eefe2a2SStefan Roese *dirty = sleb->endpt - offs - used; 631*9eefe2a2SStefan Roese *free = c->leb_size - sleb->endpt; 632*9eefe2a2SStefan Roese 633*9eefe2a2SStefan Roese out: 634*9eefe2a2SStefan Roese ubifs_scan_destroy(sleb); 635*9eefe2a2SStefan Roese return err; 636*9eefe2a2SStefan Roese 637*9eefe2a2SStefan Roese out_dump: 638*9eefe2a2SStefan Roese ubifs_err("bad node is at LEB %d:%d", lnum, snod->offs); 639*9eefe2a2SStefan Roese dbg_dump_node(c, snod->node); 640*9eefe2a2SStefan Roese ubifs_scan_destroy(sleb); 641*9eefe2a2SStefan Roese return -EINVAL; 642*9eefe2a2SStefan Roese } 643*9eefe2a2SStefan Roese 644*9eefe2a2SStefan Roese /** 645*9eefe2a2SStefan Roese * insert_ref_node - insert a reference node to the replay tree. 646*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 647*9eefe2a2SStefan Roese * @lnum: node logical eraseblock number 648*9eefe2a2SStefan Roese * @offs: node offset 649*9eefe2a2SStefan Roese * @sqnum: sequence number 650*9eefe2a2SStefan Roese * @free: amount of free space in bud 651*9eefe2a2SStefan Roese * @dirty: amount of dirty space from padding and deletion nodes 652*9eefe2a2SStefan Roese * 653*9eefe2a2SStefan Roese * This function inserts a reference node to the replay tree and returns zero 654*9eefe2a2SStefan Roese * in case of success or a negative error code in case of failure. 655*9eefe2a2SStefan Roese */ 656*9eefe2a2SStefan Roese static int insert_ref_node(struct ubifs_info *c, int lnum, int offs, 657*9eefe2a2SStefan Roese unsigned long long sqnum, int free, int dirty) 658*9eefe2a2SStefan Roese { 659*9eefe2a2SStefan Roese struct rb_node **p = &c->replay_tree.rb_node, *parent = NULL; 660*9eefe2a2SStefan Roese struct replay_entry *r; 661*9eefe2a2SStefan Roese 662*9eefe2a2SStefan Roese dbg_mnt("add ref LEB %d:%d", lnum, offs); 663*9eefe2a2SStefan Roese while (*p) { 664*9eefe2a2SStefan Roese parent = *p; 665*9eefe2a2SStefan Roese r = rb_entry(parent, struct replay_entry, rb); 666*9eefe2a2SStefan Roese if (sqnum < r->sqnum) { 667*9eefe2a2SStefan Roese p = &(*p)->rb_left; 668*9eefe2a2SStefan Roese continue; 669*9eefe2a2SStefan Roese } else if (sqnum > r->sqnum) { 670*9eefe2a2SStefan Roese p = &(*p)->rb_right; 671*9eefe2a2SStefan Roese continue; 672*9eefe2a2SStefan Roese } 673*9eefe2a2SStefan Roese ubifs_err("duplicate sqnum in replay tree"); 674*9eefe2a2SStefan Roese return -EINVAL; 675*9eefe2a2SStefan Roese } 676*9eefe2a2SStefan Roese 677*9eefe2a2SStefan Roese r = kzalloc(sizeof(struct replay_entry), GFP_KERNEL); 678*9eefe2a2SStefan Roese if (!r) 679*9eefe2a2SStefan Roese return -ENOMEM; 680*9eefe2a2SStefan Roese 681*9eefe2a2SStefan Roese r->lnum = lnum; 682*9eefe2a2SStefan Roese r->offs = offs; 683*9eefe2a2SStefan Roese r->sqnum = sqnum; 684*9eefe2a2SStefan Roese r->flags = REPLAY_REF; 685*9eefe2a2SStefan Roese r->free = free; 686*9eefe2a2SStefan Roese r->dirty = dirty; 687*9eefe2a2SStefan Roese 688*9eefe2a2SStefan Roese rb_link_node(&r->rb, parent, p); 689*9eefe2a2SStefan Roese rb_insert_color(&r->rb, &c->replay_tree); 690*9eefe2a2SStefan Roese return 0; 691*9eefe2a2SStefan Roese } 692*9eefe2a2SStefan Roese 693*9eefe2a2SStefan Roese /** 694*9eefe2a2SStefan Roese * replay_buds - replay all buds. 695*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 696*9eefe2a2SStefan Roese * 697*9eefe2a2SStefan Roese * This function returns zero in case of success and a negative error code in 698*9eefe2a2SStefan Roese * case of failure. 699*9eefe2a2SStefan Roese */ 700*9eefe2a2SStefan Roese static int replay_buds(struct ubifs_info *c) 701*9eefe2a2SStefan Roese { 702*9eefe2a2SStefan Roese struct bud_entry *b; 703*9eefe2a2SStefan Roese int err, uninitialized_var(free), uninitialized_var(dirty); 704*9eefe2a2SStefan Roese 705*9eefe2a2SStefan Roese list_for_each_entry(b, &c->replay_buds, list) { 706*9eefe2a2SStefan Roese err = replay_bud(c, b->bud->lnum, b->bud->start, b->bud->jhead, 707*9eefe2a2SStefan Roese &free, &dirty); 708*9eefe2a2SStefan Roese if (err) 709*9eefe2a2SStefan Roese return err; 710*9eefe2a2SStefan Roese err = insert_ref_node(c, b->bud->lnum, b->bud->start, b->sqnum, 711*9eefe2a2SStefan Roese free, dirty); 712*9eefe2a2SStefan Roese if (err) 713*9eefe2a2SStefan Roese return err; 714*9eefe2a2SStefan Roese } 715*9eefe2a2SStefan Roese 716*9eefe2a2SStefan Roese return 0; 717*9eefe2a2SStefan Roese } 718*9eefe2a2SStefan Roese 719*9eefe2a2SStefan Roese /** 720*9eefe2a2SStefan Roese * destroy_bud_list - destroy the list of buds to replay. 721*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 722*9eefe2a2SStefan Roese */ 723*9eefe2a2SStefan Roese static void destroy_bud_list(struct ubifs_info *c) 724*9eefe2a2SStefan Roese { 725*9eefe2a2SStefan Roese struct bud_entry *b; 726*9eefe2a2SStefan Roese 727*9eefe2a2SStefan Roese while (!list_empty(&c->replay_buds)) { 728*9eefe2a2SStefan Roese b = list_entry(c->replay_buds.next, struct bud_entry, list); 729*9eefe2a2SStefan Roese list_del(&b->list); 730*9eefe2a2SStefan Roese kfree(b); 731*9eefe2a2SStefan Roese } 732*9eefe2a2SStefan Roese } 733*9eefe2a2SStefan Roese 734*9eefe2a2SStefan Roese /** 735*9eefe2a2SStefan Roese * add_replay_bud - add a bud to the list of buds to replay. 736*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 737*9eefe2a2SStefan Roese * @lnum: bud logical eraseblock number to replay 738*9eefe2a2SStefan Roese * @offs: bud start offset 739*9eefe2a2SStefan Roese * @jhead: journal head to which this bud belongs 740*9eefe2a2SStefan Roese * @sqnum: reference node sequence number 741*9eefe2a2SStefan Roese * 742*9eefe2a2SStefan Roese * This function returns zero in case of success and a negative error code in 743*9eefe2a2SStefan Roese * case of failure. 744*9eefe2a2SStefan Roese */ 745*9eefe2a2SStefan Roese static int add_replay_bud(struct ubifs_info *c, int lnum, int offs, int jhead, 746*9eefe2a2SStefan Roese unsigned long long sqnum) 747*9eefe2a2SStefan Roese { 748*9eefe2a2SStefan Roese struct ubifs_bud *bud; 749*9eefe2a2SStefan Roese struct bud_entry *b; 750*9eefe2a2SStefan Roese 751*9eefe2a2SStefan Roese dbg_mnt("add replay bud LEB %d:%d, head %d", lnum, offs, jhead); 752*9eefe2a2SStefan Roese 753*9eefe2a2SStefan Roese bud = kmalloc(sizeof(struct ubifs_bud), GFP_KERNEL); 754*9eefe2a2SStefan Roese if (!bud) 755*9eefe2a2SStefan Roese return -ENOMEM; 756*9eefe2a2SStefan Roese 757*9eefe2a2SStefan Roese b = kmalloc(sizeof(struct bud_entry), GFP_KERNEL); 758*9eefe2a2SStefan Roese if (!b) { 759*9eefe2a2SStefan Roese kfree(bud); 760*9eefe2a2SStefan Roese return -ENOMEM; 761*9eefe2a2SStefan Roese } 762*9eefe2a2SStefan Roese 763*9eefe2a2SStefan Roese bud->lnum = lnum; 764*9eefe2a2SStefan Roese bud->start = offs; 765*9eefe2a2SStefan Roese bud->jhead = jhead; 766*9eefe2a2SStefan Roese ubifs_add_bud(c, bud); 767*9eefe2a2SStefan Roese 768*9eefe2a2SStefan Roese b->bud = bud; 769*9eefe2a2SStefan Roese b->sqnum = sqnum; 770*9eefe2a2SStefan Roese list_add_tail(&b->list, &c->replay_buds); 771*9eefe2a2SStefan Roese 772*9eefe2a2SStefan Roese return 0; 773*9eefe2a2SStefan Roese } 774*9eefe2a2SStefan Roese 775*9eefe2a2SStefan Roese /** 776*9eefe2a2SStefan Roese * validate_ref - validate a reference node. 777*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 778*9eefe2a2SStefan Roese * @ref: the reference node to validate 779*9eefe2a2SStefan Roese * @ref_lnum: LEB number of the reference node 780*9eefe2a2SStefan Roese * @ref_offs: reference node offset 781*9eefe2a2SStefan Roese * 782*9eefe2a2SStefan Roese * This function returns %1 if a bud reference already exists for the LEB. %0 is 783*9eefe2a2SStefan Roese * returned if the reference node is new, otherwise %-EINVAL is returned if 784*9eefe2a2SStefan Roese * validation failed. 785*9eefe2a2SStefan Roese */ 786*9eefe2a2SStefan Roese static int validate_ref(struct ubifs_info *c, const struct ubifs_ref_node *ref) 787*9eefe2a2SStefan Roese { 788*9eefe2a2SStefan Roese struct ubifs_bud *bud; 789*9eefe2a2SStefan Roese int lnum = le32_to_cpu(ref->lnum); 790*9eefe2a2SStefan Roese unsigned int offs = le32_to_cpu(ref->offs); 791*9eefe2a2SStefan Roese unsigned int jhead = le32_to_cpu(ref->jhead); 792*9eefe2a2SStefan Roese 793*9eefe2a2SStefan Roese /* 794*9eefe2a2SStefan Roese * ref->offs may point to the end of LEB when the journal head points 795*9eefe2a2SStefan Roese * to the end of LEB and we write reference node for it during commit. 796*9eefe2a2SStefan Roese * So this is why we require 'offs > c->leb_size'. 797*9eefe2a2SStefan Roese */ 798*9eefe2a2SStefan Roese if (jhead >= c->jhead_cnt || lnum >= c->leb_cnt || 799*9eefe2a2SStefan Roese lnum < c->main_first || offs > c->leb_size || 800*9eefe2a2SStefan Roese offs & (c->min_io_size - 1)) 801*9eefe2a2SStefan Roese return -EINVAL; 802*9eefe2a2SStefan Roese 803*9eefe2a2SStefan Roese /* Make sure we have not already looked at this bud */ 804*9eefe2a2SStefan Roese bud = ubifs_search_bud(c, lnum); 805*9eefe2a2SStefan Roese if (bud) { 806*9eefe2a2SStefan Roese if (bud->jhead == jhead && bud->start <= offs) 807*9eefe2a2SStefan Roese return 1; 808*9eefe2a2SStefan Roese ubifs_err("bud at LEB %d:%d was already referred", lnum, offs); 809*9eefe2a2SStefan Roese return -EINVAL; 810*9eefe2a2SStefan Roese } 811*9eefe2a2SStefan Roese 812*9eefe2a2SStefan Roese return 0; 813*9eefe2a2SStefan Roese } 814*9eefe2a2SStefan Roese 815*9eefe2a2SStefan Roese /** 816*9eefe2a2SStefan Roese * replay_log_leb - replay a log logical eraseblock. 817*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 818*9eefe2a2SStefan Roese * @lnum: log logical eraseblock to replay 819*9eefe2a2SStefan Roese * @offs: offset to start replaying from 820*9eefe2a2SStefan Roese * @sbuf: scan buffer 821*9eefe2a2SStefan Roese * 822*9eefe2a2SStefan Roese * This function replays a log LEB and returns zero in case of success, %1 if 823*9eefe2a2SStefan Roese * this is the last LEB in the log, and a negative error code in case of 824*9eefe2a2SStefan Roese * failure. 825*9eefe2a2SStefan Roese */ 826*9eefe2a2SStefan Roese static int replay_log_leb(struct ubifs_info *c, int lnum, int offs, void *sbuf) 827*9eefe2a2SStefan Roese { 828*9eefe2a2SStefan Roese int err; 829*9eefe2a2SStefan Roese struct ubifs_scan_leb *sleb; 830*9eefe2a2SStefan Roese struct ubifs_scan_node *snod; 831*9eefe2a2SStefan Roese const struct ubifs_cs_node *node; 832*9eefe2a2SStefan Roese 833*9eefe2a2SStefan Roese dbg_mnt("replay log LEB %d:%d", lnum, offs); 834*9eefe2a2SStefan Roese sleb = ubifs_scan(c, lnum, offs, sbuf); 835*9eefe2a2SStefan Roese if (IS_ERR(sleb)) { 836*9eefe2a2SStefan Roese if (c->need_recovery) 837*9eefe2a2SStefan Roese sleb = ubifs_recover_log_leb(c, lnum, offs, sbuf); 838*9eefe2a2SStefan Roese if (IS_ERR(sleb)) 839*9eefe2a2SStefan Roese return PTR_ERR(sleb); 840*9eefe2a2SStefan Roese } 841*9eefe2a2SStefan Roese 842*9eefe2a2SStefan Roese if (sleb->nodes_cnt == 0) { 843*9eefe2a2SStefan Roese err = 1; 844*9eefe2a2SStefan Roese goto out; 845*9eefe2a2SStefan Roese } 846*9eefe2a2SStefan Roese 847*9eefe2a2SStefan Roese node = sleb->buf; 848*9eefe2a2SStefan Roese 849*9eefe2a2SStefan Roese snod = list_entry(sleb->nodes.next, struct ubifs_scan_node, list); 850*9eefe2a2SStefan Roese if (c->cs_sqnum == 0) { 851*9eefe2a2SStefan Roese /* 852*9eefe2a2SStefan Roese * This is the first log LEB we are looking at, make sure that 853*9eefe2a2SStefan Roese * the first node is a commit start node. Also record its 854*9eefe2a2SStefan Roese * sequence number so that UBIFS can determine where the log 855*9eefe2a2SStefan Roese * ends, because all nodes which were have higher sequence 856*9eefe2a2SStefan Roese * numbers. 857*9eefe2a2SStefan Roese */ 858*9eefe2a2SStefan Roese if (snod->type != UBIFS_CS_NODE) { 859*9eefe2a2SStefan Roese dbg_err("first log node at LEB %d:%d is not CS node", 860*9eefe2a2SStefan Roese lnum, offs); 861*9eefe2a2SStefan Roese goto out_dump; 862*9eefe2a2SStefan Roese } 863*9eefe2a2SStefan Roese if (le64_to_cpu(node->cmt_no) != c->cmt_no) { 864*9eefe2a2SStefan Roese dbg_err("first CS node at LEB %d:%d has wrong " 865*9eefe2a2SStefan Roese "commit number %llu expected %llu", 866*9eefe2a2SStefan Roese lnum, offs, 867*9eefe2a2SStefan Roese (unsigned long long)le64_to_cpu(node->cmt_no), 868*9eefe2a2SStefan Roese c->cmt_no); 869*9eefe2a2SStefan Roese goto out_dump; 870*9eefe2a2SStefan Roese } 871*9eefe2a2SStefan Roese 872*9eefe2a2SStefan Roese c->cs_sqnum = le64_to_cpu(node->ch.sqnum); 873*9eefe2a2SStefan Roese dbg_mnt("commit start sqnum %llu", c->cs_sqnum); 874*9eefe2a2SStefan Roese } 875*9eefe2a2SStefan Roese 876*9eefe2a2SStefan Roese if (snod->sqnum < c->cs_sqnum) { 877*9eefe2a2SStefan Roese /* 878*9eefe2a2SStefan Roese * This means that we reached end of log and now 879*9eefe2a2SStefan Roese * look to the older log data, which was already 880*9eefe2a2SStefan Roese * committed but the eraseblock was not erased (UBIFS 881*9eefe2a2SStefan Roese * only un-maps it). So this basically means we have to 882*9eefe2a2SStefan Roese * exit with "end of log" code. 883*9eefe2a2SStefan Roese */ 884*9eefe2a2SStefan Roese err = 1; 885*9eefe2a2SStefan Roese goto out; 886*9eefe2a2SStefan Roese } 887*9eefe2a2SStefan Roese 888*9eefe2a2SStefan Roese /* Make sure the first node sits at offset zero of the LEB */ 889*9eefe2a2SStefan Roese if (snod->offs != 0) { 890*9eefe2a2SStefan Roese dbg_err("first node is not at zero offset"); 891*9eefe2a2SStefan Roese goto out_dump; 892*9eefe2a2SStefan Roese } 893*9eefe2a2SStefan Roese 894*9eefe2a2SStefan Roese list_for_each_entry(snod, &sleb->nodes, list) { 895*9eefe2a2SStefan Roese 896*9eefe2a2SStefan Roese cond_resched(); 897*9eefe2a2SStefan Roese 898*9eefe2a2SStefan Roese if (snod->sqnum >= SQNUM_WATERMARK) { 899*9eefe2a2SStefan Roese ubifs_err("file system's life ended"); 900*9eefe2a2SStefan Roese goto out_dump; 901*9eefe2a2SStefan Roese } 902*9eefe2a2SStefan Roese 903*9eefe2a2SStefan Roese if (snod->sqnum < c->cs_sqnum) { 904*9eefe2a2SStefan Roese dbg_err("bad sqnum %llu, commit sqnum %llu", 905*9eefe2a2SStefan Roese snod->sqnum, c->cs_sqnum); 906*9eefe2a2SStefan Roese goto out_dump; 907*9eefe2a2SStefan Roese } 908*9eefe2a2SStefan Roese 909*9eefe2a2SStefan Roese if (snod->sqnum > c->max_sqnum) 910*9eefe2a2SStefan Roese c->max_sqnum = snod->sqnum; 911*9eefe2a2SStefan Roese 912*9eefe2a2SStefan Roese switch (snod->type) { 913*9eefe2a2SStefan Roese case UBIFS_REF_NODE: { 914*9eefe2a2SStefan Roese const struct ubifs_ref_node *ref = snod->node; 915*9eefe2a2SStefan Roese 916*9eefe2a2SStefan Roese err = validate_ref(c, ref); 917*9eefe2a2SStefan Roese if (err == 1) 918*9eefe2a2SStefan Roese break; /* Already have this bud */ 919*9eefe2a2SStefan Roese if (err) 920*9eefe2a2SStefan Roese goto out_dump; 921*9eefe2a2SStefan Roese 922*9eefe2a2SStefan Roese err = add_replay_bud(c, le32_to_cpu(ref->lnum), 923*9eefe2a2SStefan Roese le32_to_cpu(ref->offs), 924*9eefe2a2SStefan Roese le32_to_cpu(ref->jhead), 925*9eefe2a2SStefan Roese snod->sqnum); 926*9eefe2a2SStefan Roese if (err) 927*9eefe2a2SStefan Roese goto out; 928*9eefe2a2SStefan Roese 929*9eefe2a2SStefan Roese break; 930*9eefe2a2SStefan Roese } 931*9eefe2a2SStefan Roese case UBIFS_CS_NODE: 932*9eefe2a2SStefan Roese /* Make sure it sits at the beginning of LEB */ 933*9eefe2a2SStefan Roese if (snod->offs != 0) { 934*9eefe2a2SStefan Roese ubifs_err("unexpected node in log"); 935*9eefe2a2SStefan Roese goto out_dump; 936*9eefe2a2SStefan Roese } 937*9eefe2a2SStefan Roese break; 938*9eefe2a2SStefan Roese default: 939*9eefe2a2SStefan Roese ubifs_err("unexpected node in log"); 940*9eefe2a2SStefan Roese goto out_dump; 941*9eefe2a2SStefan Roese } 942*9eefe2a2SStefan Roese } 943*9eefe2a2SStefan Roese 944*9eefe2a2SStefan Roese if (sleb->endpt || c->lhead_offs >= c->leb_size) { 945*9eefe2a2SStefan Roese c->lhead_lnum = lnum; 946*9eefe2a2SStefan Roese c->lhead_offs = sleb->endpt; 947*9eefe2a2SStefan Roese } 948*9eefe2a2SStefan Roese 949*9eefe2a2SStefan Roese err = !sleb->endpt; 950*9eefe2a2SStefan Roese out: 951*9eefe2a2SStefan Roese ubifs_scan_destroy(sleb); 952*9eefe2a2SStefan Roese return err; 953*9eefe2a2SStefan Roese 954*9eefe2a2SStefan Roese out_dump: 955*9eefe2a2SStefan Roese ubifs_err("log error detected while replying the log at LEB %d:%d", 956*9eefe2a2SStefan Roese lnum, offs + snod->offs); 957*9eefe2a2SStefan Roese dbg_dump_node(c, snod->node); 958*9eefe2a2SStefan Roese ubifs_scan_destroy(sleb); 959*9eefe2a2SStefan Roese return -EINVAL; 960*9eefe2a2SStefan Roese } 961*9eefe2a2SStefan Roese 962*9eefe2a2SStefan Roese /** 963*9eefe2a2SStefan Roese * take_ihead - update the status of the index head in lprops to 'taken'. 964*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 965*9eefe2a2SStefan Roese * 966*9eefe2a2SStefan Roese * This function returns the amount of free space in the index head LEB or a 967*9eefe2a2SStefan Roese * negative error code. 968*9eefe2a2SStefan Roese */ 969*9eefe2a2SStefan Roese static int take_ihead(struct ubifs_info *c) 970*9eefe2a2SStefan Roese { 971*9eefe2a2SStefan Roese const struct ubifs_lprops *lp; 972*9eefe2a2SStefan Roese int err, free; 973*9eefe2a2SStefan Roese 974*9eefe2a2SStefan Roese ubifs_get_lprops(c); 975*9eefe2a2SStefan Roese 976*9eefe2a2SStefan Roese lp = ubifs_lpt_lookup_dirty(c, c->ihead_lnum); 977*9eefe2a2SStefan Roese if (IS_ERR(lp)) { 978*9eefe2a2SStefan Roese err = PTR_ERR(lp); 979*9eefe2a2SStefan Roese goto out; 980*9eefe2a2SStefan Roese } 981*9eefe2a2SStefan Roese 982*9eefe2a2SStefan Roese free = lp->free; 983*9eefe2a2SStefan Roese 984*9eefe2a2SStefan Roese lp = ubifs_change_lp(c, lp, LPROPS_NC, LPROPS_NC, 985*9eefe2a2SStefan Roese lp->flags | LPROPS_TAKEN, 0); 986*9eefe2a2SStefan Roese if (IS_ERR(lp)) { 987*9eefe2a2SStefan Roese err = PTR_ERR(lp); 988*9eefe2a2SStefan Roese goto out; 989*9eefe2a2SStefan Roese } 990*9eefe2a2SStefan Roese 991*9eefe2a2SStefan Roese err = free; 992*9eefe2a2SStefan Roese out: 993*9eefe2a2SStefan Roese ubifs_release_lprops(c); 994*9eefe2a2SStefan Roese return err; 995*9eefe2a2SStefan Roese } 996*9eefe2a2SStefan Roese 997*9eefe2a2SStefan Roese /** 998*9eefe2a2SStefan Roese * ubifs_replay_journal - replay journal. 999*9eefe2a2SStefan Roese * @c: UBIFS file-system description object 1000*9eefe2a2SStefan Roese * 1001*9eefe2a2SStefan Roese * This function scans the journal, replays and cleans it up. It makes sure all 1002*9eefe2a2SStefan Roese * memory data structures related to uncommitted journal are built (dirty TNC 1003*9eefe2a2SStefan Roese * tree, tree of buds, modified lprops, etc). 1004*9eefe2a2SStefan Roese */ 1005*9eefe2a2SStefan Roese int ubifs_replay_journal(struct ubifs_info *c) 1006*9eefe2a2SStefan Roese { 1007*9eefe2a2SStefan Roese int err, i, lnum, offs, _free; 1008*9eefe2a2SStefan Roese void *sbuf = NULL; 1009*9eefe2a2SStefan Roese 1010*9eefe2a2SStefan Roese BUILD_BUG_ON(UBIFS_TRUN_KEY > 5); 1011*9eefe2a2SStefan Roese 1012*9eefe2a2SStefan Roese /* Update the status of the index head in lprops to 'taken' */ 1013*9eefe2a2SStefan Roese _free = take_ihead(c); 1014*9eefe2a2SStefan Roese if (_free < 0) 1015*9eefe2a2SStefan Roese return _free; /* Error code */ 1016*9eefe2a2SStefan Roese 1017*9eefe2a2SStefan Roese if (c->ihead_offs != c->leb_size - _free) { 1018*9eefe2a2SStefan Roese ubifs_err("bad index head LEB %d:%d", c->ihead_lnum, 1019*9eefe2a2SStefan Roese c->ihead_offs); 1020*9eefe2a2SStefan Roese return -EINVAL; 1021*9eefe2a2SStefan Roese } 1022*9eefe2a2SStefan Roese 1023*9eefe2a2SStefan Roese sbuf = vmalloc(c->leb_size); 1024*9eefe2a2SStefan Roese if (!sbuf) 1025*9eefe2a2SStefan Roese return -ENOMEM; 1026*9eefe2a2SStefan Roese 1027*9eefe2a2SStefan Roese dbg_mnt("start replaying the journal"); 1028*9eefe2a2SStefan Roese 1029*9eefe2a2SStefan Roese c->replaying = 1; 1030*9eefe2a2SStefan Roese 1031*9eefe2a2SStefan Roese lnum = c->ltail_lnum = c->lhead_lnum; 1032*9eefe2a2SStefan Roese offs = c->lhead_offs; 1033*9eefe2a2SStefan Roese 1034*9eefe2a2SStefan Roese for (i = 0; i < c->log_lebs; i++, lnum++) { 1035*9eefe2a2SStefan Roese if (lnum >= UBIFS_LOG_LNUM + c->log_lebs) { 1036*9eefe2a2SStefan Roese /* 1037*9eefe2a2SStefan Roese * The log is logically circular, we reached the last 1038*9eefe2a2SStefan Roese * LEB, switch to the first one. 1039*9eefe2a2SStefan Roese */ 1040*9eefe2a2SStefan Roese lnum = UBIFS_LOG_LNUM; 1041*9eefe2a2SStefan Roese offs = 0; 1042*9eefe2a2SStefan Roese } 1043*9eefe2a2SStefan Roese err = replay_log_leb(c, lnum, offs, sbuf); 1044*9eefe2a2SStefan Roese if (err == 1) 1045*9eefe2a2SStefan Roese /* We hit the end of the log */ 1046*9eefe2a2SStefan Roese break; 1047*9eefe2a2SStefan Roese if (err) 1048*9eefe2a2SStefan Roese goto out; 1049*9eefe2a2SStefan Roese offs = 0; 1050*9eefe2a2SStefan Roese } 1051*9eefe2a2SStefan Roese 1052*9eefe2a2SStefan Roese err = replay_buds(c); 1053*9eefe2a2SStefan Roese if (err) 1054*9eefe2a2SStefan Roese goto out; 1055*9eefe2a2SStefan Roese 1056*9eefe2a2SStefan Roese err = apply_replay_tree(c); 1057*9eefe2a2SStefan Roese if (err) 1058*9eefe2a2SStefan Roese goto out; 1059*9eefe2a2SStefan Roese 1060*9eefe2a2SStefan Roese ubifs_assert(c->bud_bytes <= c->max_bud_bytes || c->need_recovery); 1061*9eefe2a2SStefan Roese dbg_mnt("finished, log head LEB %d:%d, max_sqnum %llu, " 1062*9eefe2a2SStefan Roese "highest_inum %lu", c->lhead_lnum, c->lhead_offs, c->max_sqnum, 1063*9eefe2a2SStefan Roese (unsigned long)c->highest_inum); 1064*9eefe2a2SStefan Roese out: 1065*9eefe2a2SStefan Roese destroy_replay_tree(c); 1066*9eefe2a2SStefan Roese destroy_bud_list(c); 1067*9eefe2a2SStefan Roese vfree(sbuf); 1068*9eefe2a2SStefan Roese c->replaying = 0; 1069*9eefe2a2SStefan Roese return err; 1070*9eefe2a2SStefan Roese } 1071