xref: /openbmc/u-boot/fs/ubifs/tnc.c (revision 522e4fbb)
1  /*
2   * This file is part of UBIFS.
3   *
4   * Copyright (C) 2006-2008 Nokia Corporation.
5   *
6   * SPDX-License-Identifier:	GPL-2.0+
7   *
8   * Authors: Adrian Hunter
9   *          Artem Bityutskiy (Битюцкий Артём)
10   */
11  
12  /*
13   * This file implements TNC (Tree Node Cache) which caches indexing nodes of
14   * the UBIFS B-tree.
15   *
16   * At the moment the locking rules of the TNC tree are quite simple and
17   * straightforward. We just have a mutex and lock it when we traverse the
18   * tree. If a znode is not in memory, we read it from flash while still having
19   * the mutex locked.
20   */
21  
22  #ifndef __UBOOT__
23  #include <linux/crc32.h>
24  #include <linux/slab.h>
25  #else
26  #include <linux/compat.h>
27  #include <linux/err.h>
28  #include <linux/stat.h>
29  #endif
30  #include "ubifs.h"
31  
32  /*
33   * Returned codes of 'matches_name()' and 'fallible_matches_name()' functions.
34   * @NAME_LESS: name corresponding to the first argument is less than second
35   * @NAME_MATCHES: names match
36   * @NAME_GREATER: name corresponding to the second argument is greater than
37   *                first
38   * @NOT_ON_MEDIA: node referred by zbranch does not exist on the media
39   *
40   * These constants were introduce to improve readability.
41   */
42  enum {
43  	NAME_LESS    = 0,
44  	NAME_MATCHES = 1,
45  	NAME_GREATER = 2,
46  	NOT_ON_MEDIA = 3,
47  };
48  
49  /**
50   * insert_old_idx - record an index node obsoleted since the last commit start.
51   * @c: UBIFS file-system description object
52   * @lnum: LEB number of obsoleted index node
53   * @offs: offset of obsoleted index node
54   *
55   * Returns %0 on success, and a negative error code on failure.
56   *
57   * For recovery, there must always be a complete intact version of the index on
58   * flash at all times. That is called the "old index". It is the index as at the
59   * time of the last successful commit. Many of the index nodes in the old index
60   * may be dirty, but they must not be erased until the next successful commit
61   * (at which point that index becomes the old index).
62   *
63   * That means that the garbage collection and the in-the-gaps method of
64   * committing must be able to determine if an index node is in the old index.
65   * Most of the old index nodes can be found by looking up the TNC using the
66   * 'lookup_znode()' function. However, some of the old index nodes may have
67   * been deleted from the current index or may have been changed so much that
68   * they cannot be easily found. In those cases, an entry is added to an RB-tree.
69   * That is what this function does. The RB-tree is ordered by LEB number and
70   * offset because they uniquely identify the old index node.
71   */
72  static int insert_old_idx(struct ubifs_info *c, int lnum, int offs)
73  {
74  	struct ubifs_old_idx *old_idx, *o;
75  	struct rb_node **p, *parent = NULL;
76  
77  	old_idx = kmalloc(sizeof(struct ubifs_old_idx), GFP_NOFS);
78  	if (unlikely(!old_idx))
79  		return -ENOMEM;
80  	old_idx->lnum = lnum;
81  	old_idx->offs = offs;
82  
83  	p = &c->old_idx.rb_node;
84  	while (*p) {
85  		parent = *p;
86  		o = rb_entry(parent, struct ubifs_old_idx, rb);
87  		if (lnum < o->lnum)
88  			p = &(*p)->rb_left;
89  		else if (lnum > o->lnum)
90  			p = &(*p)->rb_right;
91  		else if (offs < o->offs)
92  			p = &(*p)->rb_left;
93  		else if (offs > o->offs)
94  			p = &(*p)->rb_right;
95  		else {
96  			ubifs_err(c, "old idx added twice!");
97  			kfree(old_idx);
98  			return 0;
99  		}
100  	}
101  	rb_link_node(&old_idx->rb, parent, p);
102  	rb_insert_color(&old_idx->rb, &c->old_idx);
103  	return 0;
104  }
105  
106  /**
107   * insert_old_idx_znode - record a znode obsoleted since last commit start.
108   * @c: UBIFS file-system description object
109   * @znode: znode of obsoleted index node
110   *
111   * Returns %0 on success, and a negative error code on failure.
112   */
113  int insert_old_idx_znode(struct ubifs_info *c, struct ubifs_znode *znode)
114  {
115  	if (znode->parent) {
116  		struct ubifs_zbranch *zbr;
117  
118  		zbr = &znode->parent->zbranch[znode->iip];
119  		if (zbr->len)
120  			return insert_old_idx(c, zbr->lnum, zbr->offs);
121  	} else
122  		if (c->zroot.len)
123  			return insert_old_idx(c, c->zroot.lnum,
124  					      c->zroot.offs);
125  	return 0;
126  }
127  
128  /**
129   * ins_clr_old_idx_znode - record a znode obsoleted since last commit start.
130   * @c: UBIFS file-system description object
131   * @znode: znode of obsoleted index node
132   *
133   * Returns %0 on success, and a negative error code on failure.
134   */
135  static int ins_clr_old_idx_znode(struct ubifs_info *c,
136  				 struct ubifs_znode *znode)
137  {
138  	int err;
139  
140  	if (znode->parent) {
141  		struct ubifs_zbranch *zbr;
142  
143  		zbr = &znode->parent->zbranch[znode->iip];
144  		if (zbr->len) {
145  			err = insert_old_idx(c, zbr->lnum, zbr->offs);
146  			if (err)
147  				return err;
148  			zbr->lnum = 0;
149  			zbr->offs = 0;
150  			zbr->len = 0;
151  		}
152  	} else
153  		if (c->zroot.len) {
154  			err = insert_old_idx(c, c->zroot.lnum, c->zroot.offs);
155  			if (err)
156  				return err;
157  			c->zroot.lnum = 0;
158  			c->zroot.offs = 0;
159  			c->zroot.len = 0;
160  		}
161  	return 0;
162  }
163  
164  /**
165   * destroy_old_idx - destroy the old_idx RB-tree.
166   * @c: UBIFS file-system description object
167   *
168   * During start commit, the old_idx RB-tree is used to avoid overwriting index
169   * nodes that were in the index last commit but have since been deleted.  This
170   * is necessary for recovery i.e. the old index must be kept intact until the
171   * new index is successfully written.  The old-idx RB-tree is used for the
172   * in-the-gaps method of writing index nodes and is destroyed every commit.
173   */
174  void destroy_old_idx(struct ubifs_info *c)
175  {
176  	struct ubifs_old_idx *old_idx, *n;
177  
178  	rbtree_postorder_for_each_entry_safe(old_idx, n, &c->old_idx, rb)
179  		kfree(old_idx);
180  
181  	c->old_idx = RB_ROOT;
182  }
183  
184  /**
185   * copy_znode - copy a dirty znode.
186   * @c: UBIFS file-system description object
187   * @znode: znode to copy
188   *
189   * A dirty znode being committed may not be changed, so it is copied.
190   */
191  static struct ubifs_znode *copy_znode(struct ubifs_info *c,
192  				      struct ubifs_znode *znode)
193  {
194  	struct ubifs_znode *zn;
195  
196  	zn = kmalloc(c->max_znode_sz, GFP_NOFS);
197  	if (unlikely(!zn))
198  		return ERR_PTR(-ENOMEM);
199  
200  	memcpy(zn, znode, c->max_znode_sz);
201  	zn->cnext = NULL;
202  	__set_bit(DIRTY_ZNODE, &zn->flags);
203  	__clear_bit(COW_ZNODE, &zn->flags);
204  
205  	ubifs_assert(!ubifs_zn_obsolete(znode));
206  	__set_bit(OBSOLETE_ZNODE, &znode->flags);
207  
208  	if (znode->level != 0) {
209  		int i;
210  		const int n = zn->child_cnt;
211  
212  		/* The children now have new parent */
213  		for (i = 0; i < n; i++) {
214  			struct ubifs_zbranch *zbr = &zn->zbranch[i];
215  
216  			if (zbr->znode)
217  				zbr->znode->parent = zn;
218  		}
219  	}
220  
221  	atomic_long_inc(&c->dirty_zn_cnt);
222  	return zn;
223  }
224  
225  /**
226   * add_idx_dirt - add dirt due to a dirty znode.
227   * @c: UBIFS file-system description object
228   * @lnum: LEB number of index node
229   * @dirt: size of index node
230   *
231   * This function updates lprops dirty space and the new size of the index.
232   */
233  static int add_idx_dirt(struct ubifs_info *c, int lnum, int dirt)
234  {
235  	c->calc_idx_sz -= ALIGN(dirt, 8);
236  	return ubifs_add_dirt(c, lnum, dirt);
237  }
238  
239  /**
240   * dirty_cow_znode - ensure a znode is not being committed.
241   * @c: UBIFS file-system description object
242   * @zbr: branch of znode to check
243   *
244   * Returns dirtied znode on success or negative error code on failure.
245   */
246  static struct ubifs_znode *dirty_cow_znode(struct ubifs_info *c,
247  					   struct ubifs_zbranch *zbr)
248  {
249  	struct ubifs_znode *znode = zbr->znode;
250  	struct ubifs_znode *zn;
251  	int err;
252  
253  	if (!ubifs_zn_cow(znode)) {
254  		/* znode is not being committed */
255  		if (!test_and_set_bit(DIRTY_ZNODE, &znode->flags)) {
256  			atomic_long_inc(&c->dirty_zn_cnt);
257  			atomic_long_dec(&c->clean_zn_cnt);
258  			atomic_long_dec(&ubifs_clean_zn_cnt);
259  			err = add_idx_dirt(c, zbr->lnum, zbr->len);
260  			if (unlikely(err))
261  				return ERR_PTR(err);
262  		}
263  		return znode;
264  	}
265  
266  	zn = copy_znode(c, znode);
267  	if (IS_ERR(zn))
268  		return zn;
269  
270  	if (zbr->len) {
271  		err = insert_old_idx(c, zbr->lnum, zbr->offs);
272  		if (unlikely(err))
273  			return ERR_PTR(err);
274  		err = add_idx_dirt(c, zbr->lnum, zbr->len);
275  	} else
276  		err = 0;
277  
278  	zbr->znode = zn;
279  	zbr->lnum = 0;
280  	zbr->offs = 0;
281  	zbr->len = 0;
282  
283  	if (unlikely(err))
284  		return ERR_PTR(err);
285  	return zn;
286  }
287  
288  /**
289   * lnc_add - add a leaf node to the leaf node cache.
290   * @c: UBIFS file-system description object
291   * @zbr: zbranch of leaf node
292   * @node: leaf node
293   *
294   * Leaf nodes are non-index nodes directory entry nodes or data nodes. The
295   * purpose of the leaf node cache is to save re-reading the same leaf node over
296   * and over again. Most things are cached by VFS, however the file system must
297   * cache directory entries for readdir and for resolving hash collisions. The
298   * present implementation of the leaf node cache is extremely simple, and
299   * allows for error returns that are not used but that may be needed if a more
300   * complex implementation is created.
301   *
302   * Note, this function does not add the @node object to LNC directly, but
303   * allocates a copy of the object and adds the copy to LNC. The reason for this
304   * is that @node has been allocated outside of the TNC subsystem and will be
305   * used with @c->tnc_mutex unlock upon return from the TNC subsystem. But LNC
306   * may be changed at any time, e.g. freed by the shrinker.
307   */
308  static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
309  		   const void *node)
310  {
311  	int err;
312  	void *lnc_node;
313  	const struct ubifs_dent_node *dent = node;
314  
315  	ubifs_assert(!zbr->leaf);
316  	ubifs_assert(zbr->len != 0);
317  	ubifs_assert(is_hash_key(c, &zbr->key));
318  
319  	err = ubifs_validate_entry(c, dent);
320  	if (err) {
321  		dump_stack();
322  		ubifs_dump_node(c, dent);
323  		return err;
324  	}
325  
326  	lnc_node = kmemdup(node, zbr->len, GFP_NOFS);
327  	if (!lnc_node)
328  		/* We don't have to have the cache, so no error */
329  		return 0;
330  
331  	zbr->leaf = lnc_node;
332  	return 0;
333  }
334  
335   /**
336   * lnc_add_directly - add a leaf node to the leaf-node-cache.
337   * @c: UBIFS file-system description object
338   * @zbr: zbranch of leaf node
339   * @node: leaf node
340   *
341   * This function is similar to 'lnc_add()', but it does not create a copy of
342   * @node but inserts @node to TNC directly.
343   */
344  static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
345  			    void *node)
346  {
347  	int err;
348  
349  	ubifs_assert(!zbr->leaf);
350  	ubifs_assert(zbr->len != 0);
351  
352  	err = ubifs_validate_entry(c, node);
353  	if (err) {
354  		dump_stack();
355  		ubifs_dump_node(c, node);
356  		return err;
357  	}
358  
359  	zbr->leaf = node;
360  	return 0;
361  }
362  
363  /**
364   * lnc_free - remove a leaf node from the leaf node cache.
365   * @zbr: zbranch of leaf node
366   * @node: leaf node
367   */
368  static void lnc_free(struct ubifs_zbranch *zbr)
369  {
370  	if (!zbr->leaf)
371  		return;
372  	kfree(zbr->leaf);
373  	zbr->leaf = NULL;
374  }
375  
376  /**
377   * tnc_read_node_nm - read a "hashed" leaf node.
378   * @c: UBIFS file-system description object
379   * @zbr: key and position of the node
380   * @node: node is returned here
381   *
382   * This function reads a "hashed" node defined by @zbr from the leaf node cache
383   * (in it is there) or from the hash media, in which case the node is also
384   * added to LNC. Returns zero in case of success or a negative negative error
385   * code in case of failure.
386   */
387  static int tnc_read_node_nm(struct ubifs_info *c, struct ubifs_zbranch *zbr,
388  			    void *node)
389  {
390  	int err;
391  
392  	ubifs_assert(is_hash_key(c, &zbr->key));
393  
394  	if (zbr->leaf) {
395  		/* Read from the leaf node cache */
396  		ubifs_assert(zbr->len != 0);
397  		memcpy(node, zbr->leaf, zbr->len);
398  		return 0;
399  	}
400  
401  	err = ubifs_tnc_read_node(c, zbr, node);
402  	if (err)
403  		return err;
404  
405  	/* Add the node to the leaf node cache */
406  	err = lnc_add(c, zbr, node);
407  	return err;
408  }
409  
410  /**
411   * try_read_node - read a node if it is a node.
412   * @c: UBIFS file-system description object
413   * @buf: buffer to read to
414   * @type: node type
415   * @len: node length (not aligned)
416   * @lnum: LEB number of node to read
417   * @offs: offset of node to read
418   *
419   * This function tries to read a node of known type and length, checks it and
420   * stores it in @buf. This function returns %1 if a node is present and %0 if
421   * a node is not present. A negative error code is returned for I/O errors.
422   * This function performs that same function as ubifs_read_node except that
423   * it does not require that there is actually a node present and instead
424   * the return code indicates if a node was read.
425   *
426   * Note, this function does not check CRC of data nodes if @c->no_chk_data_crc
427   * is true (it is controlled by corresponding mount option). However, if
428   * @c->mounting or @c->remounting_rw is true (we are mounting or re-mounting to
429   * R/W mode), @c->no_chk_data_crc is ignored and CRC is checked. This is
430   * because during mounting or re-mounting from R/O mode to R/W mode we may read
431   * journal nodes (when replying the journal or doing the recovery) and the
432   * journal nodes may potentially be corrupted, so checking is required.
433   */
434  static int try_read_node(const struct ubifs_info *c, void *buf, int type,
435  			 int len, int lnum, int offs)
436  {
437  	int err, node_len;
438  	struct ubifs_ch *ch = buf;
439  	uint32_t crc, node_crc;
440  
441  	dbg_io("LEB %d:%d, %s, length %d", lnum, offs, dbg_ntype(type), len);
442  
443  	err = ubifs_leb_read(c, lnum, buf, offs, len, 1);
444  	if (err) {
445  		ubifs_err(c, "cannot read node type %d from LEB %d:%d, error %d",
446  			  type, lnum, offs, err);
447  		return err;
448  	}
449  
450  	if (le32_to_cpu(ch->magic) != UBIFS_NODE_MAGIC)
451  		return 0;
452  
453  	if (ch->node_type != type)
454  		return 0;
455  
456  	node_len = le32_to_cpu(ch->len);
457  	if (node_len != len)
458  		return 0;
459  
460  	if (type == UBIFS_DATA_NODE && c->no_chk_data_crc && !c->mounting &&
461  	    !c->remounting_rw)
462  		return 1;
463  
464  	crc = crc32(UBIFS_CRC32_INIT, buf + 8, node_len - 8);
465  	node_crc = le32_to_cpu(ch->crc);
466  	if (crc != node_crc)
467  		return 0;
468  
469  	return 1;
470  }
471  
472  /**
473   * fallible_read_node - try to read a leaf node.
474   * @c: UBIFS file-system description object
475   * @key:  key of node to read
476   * @zbr:  position of node
477   * @node: node returned
478   *
479   * This function tries to read a node and returns %1 if the node is read, %0
480   * if the node is not present, and a negative error code in the case of error.
481   */
482  static int fallible_read_node(struct ubifs_info *c, const union ubifs_key *key,
483  			      struct ubifs_zbranch *zbr, void *node)
484  {
485  	int ret;
486  
487  	dbg_tnck(key, "LEB %d:%d, key ", zbr->lnum, zbr->offs);
488  
489  	ret = try_read_node(c, node, key_type(c, key), zbr->len, zbr->lnum,
490  			    zbr->offs);
491  	if (ret == 1) {
492  		union ubifs_key node_key;
493  		struct ubifs_dent_node *dent = node;
494  
495  		/* All nodes have key in the same place */
496  		key_read(c, &dent->key, &node_key);
497  		if (keys_cmp(c, key, &node_key) != 0)
498  			ret = 0;
499  	}
500  	if (ret == 0 && c->replaying)
501  		dbg_mntk(key, "dangling branch LEB %d:%d len %d, key ",
502  			zbr->lnum, zbr->offs, zbr->len);
503  	return ret;
504  }
505  
506  /**
507   * matches_name - determine if a direntry or xattr entry matches a given name.
508   * @c: UBIFS file-system description object
509   * @zbr: zbranch of dent
510   * @nm: name to match
511   *
512   * This function checks if xentry/direntry referred by zbranch @zbr matches name
513   * @nm. Returns %NAME_MATCHES if it does, %NAME_LESS if the name referred by
514   * @zbr is less than @nm, and %NAME_GREATER if it is greater than @nm. In case
515   * of failure, a negative error code is returned.
516   */
517  static int matches_name(struct ubifs_info *c, struct ubifs_zbranch *zbr,
518  			const struct qstr *nm)
519  {
520  	struct ubifs_dent_node *dent;
521  	int nlen, err;
522  
523  	/* If possible, match against the dent in the leaf node cache */
524  	if (!zbr->leaf) {
525  		dent = kmalloc(zbr->len, GFP_NOFS);
526  		if (!dent)
527  			return -ENOMEM;
528  
529  		err = ubifs_tnc_read_node(c, zbr, dent);
530  		if (err)
531  			goto out_free;
532  
533  		/* Add the node to the leaf node cache */
534  		err = lnc_add_directly(c, zbr, dent);
535  		if (err)
536  			goto out_free;
537  	} else
538  		dent = zbr->leaf;
539  
540  	nlen = le16_to_cpu(dent->nlen);
541  	err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
542  	if (err == 0) {
543  		if (nlen == nm->len)
544  			return NAME_MATCHES;
545  		else if (nlen < nm->len)
546  			return NAME_LESS;
547  		else
548  			return NAME_GREATER;
549  	} else if (err < 0)
550  		return NAME_LESS;
551  	else
552  		return NAME_GREATER;
553  
554  out_free:
555  	kfree(dent);
556  	return err;
557  }
558  
559  /**
560   * get_znode - get a TNC znode that may not be loaded yet.
561   * @c: UBIFS file-system description object
562   * @znode: parent znode
563   * @n: znode branch slot number
564   *
565   * This function returns the znode or a negative error code.
566   */
567  static struct ubifs_znode *get_znode(struct ubifs_info *c,
568  				     struct ubifs_znode *znode, int n)
569  {
570  	struct ubifs_zbranch *zbr;
571  
572  	zbr = &znode->zbranch[n];
573  	if (zbr->znode)
574  		znode = zbr->znode;
575  	else
576  		znode = ubifs_load_znode(c, zbr, znode, n);
577  	return znode;
578  }
579  
580  /**
581   * tnc_next - find next TNC entry.
582   * @c: UBIFS file-system description object
583   * @zn: znode is passed and returned here
584   * @n: znode branch slot number is passed and returned here
585   *
586   * This function returns %0 if the next TNC entry is found, %-ENOENT if there is
587   * no next entry, or a negative error code otherwise.
588   */
589  static int tnc_next(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
590  {
591  	struct ubifs_znode *znode = *zn;
592  	int nn = *n;
593  
594  	nn += 1;
595  	if (nn < znode->child_cnt) {
596  		*n = nn;
597  		return 0;
598  	}
599  	while (1) {
600  		struct ubifs_znode *zp;
601  
602  		zp = znode->parent;
603  		if (!zp)
604  			return -ENOENT;
605  		nn = znode->iip + 1;
606  		znode = zp;
607  		if (nn < znode->child_cnt) {
608  			znode = get_znode(c, znode, nn);
609  			if (IS_ERR(znode))
610  				return PTR_ERR(znode);
611  			while (znode->level != 0) {
612  				znode = get_znode(c, znode, 0);
613  				if (IS_ERR(znode))
614  					return PTR_ERR(znode);
615  			}
616  			nn = 0;
617  			break;
618  		}
619  	}
620  	*zn = znode;
621  	*n = nn;
622  	return 0;
623  }
624  
625  /**
626   * tnc_prev - find previous TNC entry.
627   * @c: UBIFS file-system description object
628   * @zn: znode is returned here
629   * @n: znode branch slot number is passed and returned here
630   *
631   * This function returns %0 if the previous TNC entry is found, %-ENOENT if
632   * there is no next entry, or a negative error code otherwise.
633   */
634  static int tnc_prev(struct ubifs_info *c, struct ubifs_znode **zn, int *n)
635  {
636  	struct ubifs_znode *znode = *zn;
637  	int nn = *n;
638  
639  	if (nn > 0) {
640  		*n = nn - 1;
641  		return 0;
642  	}
643  	while (1) {
644  		struct ubifs_znode *zp;
645  
646  		zp = znode->parent;
647  		if (!zp)
648  			return -ENOENT;
649  		nn = znode->iip - 1;
650  		znode = zp;
651  		if (nn >= 0) {
652  			znode = get_znode(c, znode, nn);
653  			if (IS_ERR(znode))
654  				return PTR_ERR(znode);
655  			while (znode->level != 0) {
656  				nn = znode->child_cnt - 1;
657  				znode = get_znode(c, znode, nn);
658  				if (IS_ERR(znode))
659  					return PTR_ERR(znode);
660  			}
661  			nn = znode->child_cnt - 1;
662  			break;
663  		}
664  	}
665  	*zn = znode;
666  	*n = nn;
667  	return 0;
668  }
669  
670  /**
671   * resolve_collision - resolve a collision.
672   * @c: UBIFS file-system description object
673   * @key: key of a directory or extended attribute entry
674   * @zn: znode is returned here
675   * @n: zbranch number is passed and returned here
676   * @nm: name of the entry
677   *
678   * This function is called for "hashed" keys to make sure that the found key
679   * really corresponds to the looked up node (directory or extended attribute
680   * entry). It returns %1 and sets @zn and @n if the collision is resolved.
681   * %0 is returned if @nm is not found and @zn and @n are set to the previous
682   * entry, i.e. to the entry after which @nm could follow if it were in TNC.
683   * This means that @n may be set to %-1 if the leftmost key in @zn is the
684   * previous one. A negative error code is returned on failures.
685   */
686  static int resolve_collision(struct ubifs_info *c, const union ubifs_key *key,
687  			     struct ubifs_znode **zn, int *n,
688  			     const struct qstr *nm)
689  {
690  	int err;
691  
692  	err = matches_name(c, &(*zn)->zbranch[*n], nm);
693  	if (unlikely(err < 0))
694  		return err;
695  	if (err == NAME_MATCHES)
696  		return 1;
697  
698  	if (err == NAME_GREATER) {
699  		/* Look left */
700  		while (1) {
701  			err = tnc_prev(c, zn, n);
702  			if (err == -ENOENT) {
703  				ubifs_assert(*n == 0);
704  				*n = -1;
705  				return 0;
706  			}
707  			if (err < 0)
708  				return err;
709  			if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
710  				/*
711  				 * We have found the branch after which we would
712  				 * like to insert, but inserting in this znode
713  				 * may still be wrong. Consider the following 3
714  				 * znodes, in the case where we are resolving a
715  				 * collision with Key2.
716  				 *
717  				 *                  znode zp
718  				 *            ----------------------
719  				 * level 1     |  Key0  |  Key1  |
720  				 *            -----------------------
721  				 *                 |            |
722  				 *       znode za  |            |  znode zb
723  				 *          ------------      ------------
724  				 * level 0  |  Key0  |        |  Key2  |
725  				 *          ------------      ------------
726  				 *
727  				 * The lookup finds Key2 in znode zb. Lets say
728  				 * there is no match and the name is greater so
729  				 * we look left. When we find Key0, we end up
730  				 * here. If we return now, we will insert into
731  				 * znode za at slot n = 1.  But that is invalid
732  				 * according to the parent's keys.  Key2 must
733  				 * be inserted into znode zb.
734  				 *
735  				 * Note, this problem is not relevant for the
736  				 * case when we go right, because
737  				 * 'tnc_insert()' would correct the parent key.
738  				 */
739  				if (*n == (*zn)->child_cnt - 1) {
740  					err = tnc_next(c, zn, n);
741  					if (err) {
742  						/* Should be impossible */
743  						ubifs_assert(0);
744  						if (err == -ENOENT)
745  							err = -EINVAL;
746  						return err;
747  					}
748  					ubifs_assert(*n == 0);
749  					*n = -1;
750  				}
751  				return 0;
752  			}
753  			err = matches_name(c, &(*zn)->zbranch[*n], nm);
754  			if (err < 0)
755  				return err;
756  			if (err == NAME_LESS)
757  				return 0;
758  			if (err == NAME_MATCHES)
759  				return 1;
760  			ubifs_assert(err == NAME_GREATER);
761  		}
762  	} else {
763  		int nn = *n;
764  		struct ubifs_znode *znode = *zn;
765  
766  		/* Look right */
767  		while (1) {
768  			err = tnc_next(c, &znode, &nn);
769  			if (err == -ENOENT)
770  				return 0;
771  			if (err < 0)
772  				return err;
773  			if (keys_cmp(c, &znode->zbranch[nn].key, key))
774  				return 0;
775  			err = matches_name(c, &znode->zbranch[nn], nm);
776  			if (err < 0)
777  				return err;
778  			if (err == NAME_GREATER)
779  				return 0;
780  			*zn = znode;
781  			*n = nn;
782  			if (err == NAME_MATCHES)
783  				return 1;
784  			ubifs_assert(err == NAME_LESS);
785  		}
786  	}
787  }
788  
789  /**
790   * fallible_matches_name - determine if a dent matches a given name.
791   * @c: UBIFS file-system description object
792   * @zbr: zbranch of dent
793   * @nm: name to match
794   *
795   * This is a "fallible" version of 'matches_name()' function which does not
796   * panic if the direntry/xentry referred by @zbr does not exist on the media.
797   *
798   * This function checks if xentry/direntry referred by zbranch @zbr matches name
799   * @nm. Returns %NAME_MATCHES it does, %NAME_LESS if the name referred by @zbr
800   * is less than @nm, %NAME_GREATER if it is greater than @nm, and @NOT_ON_MEDIA
801   * if xentry/direntry referred by @zbr does not exist on the media. A negative
802   * error code is returned in case of failure.
803   */
804  static int fallible_matches_name(struct ubifs_info *c,
805  				 struct ubifs_zbranch *zbr,
806  				 const struct qstr *nm)
807  {
808  	struct ubifs_dent_node *dent;
809  	int nlen, err;
810  
811  	/* If possible, match against the dent in the leaf node cache */
812  	if (!zbr->leaf) {
813  		dent = kmalloc(zbr->len, GFP_NOFS);
814  		if (!dent)
815  			return -ENOMEM;
816  
817  		err = fallible_read_node(c, &zbr->key, zbr, dent);
818  		if (err < 0)
819  			goto out_free;
820  		if (err == 0) {
821  			/* The node was not present */
822  			err = NOT_ON_MEDIA;
823  			goto out_free;
824  		}
825  		ubifs_assert(err == 1);
826  
827  		err = lnc_add_directly(c, zbr, dent);
828  		if (err)
829  			goto out_free;
830  	} else
831  		dent = zbr->leaf;
832  
833  	nlen = le16_to_cpu(dent->nlen);
834  	err = memcmp(dent->name, nm->name, min_t(int, nlen, nm->len));
835  	if (err == 0) {
836  		if (nlen == nm->len)
837  			return NAME_MATCHES;
838  		else if (nlen < nm->len)
839  			return NAME_LESS;
840  		else
841  			return NAME_GREATER;
842  	} else if (err < 0)
843  		return NAME_LESS;
844  	else
845  		return NAME_GREATER;
846  
847  out_free:
848  	kfree(dent);
849  	return err;
850  }
851  
852  /**
853   * fallible_resolve_collision - resolve a collision even if nodes are missing.
854   * @c: UBIFS file-system description object
855   * @key: key
856   * @zn: znode is returned here
857   * @n: branch number is passed and returned here
858   * @nm: name of directory entry
859   * @adding: indicates caller is adding a key to the TNC
860   *
861   * This is a "fallible" version of the 'resolve_collision()' function which
862   * does not panic if one of the nodes referred to by TNC does not exist on the
863   * media. This may happen when replaying the journal if a deleted node was
864   * Garbage-collected and the commit was not done. A branch that refers to a node
865   * that is not present is called a dangling branch. The following are the return
866   * codes for this function:
867   *  o if @nm was found, %1 is returned and @zn and @n are set to the found
868   *    branch;
869   *  o if we are @adding and @nm was not found, %0 is returned;
870   *  o if we are not @adding and @nm was not found, but a dangling branch was
871   *    found, then %1 is returned and @zn and @n are set to the dangling branch;
872   *  o a negative error code is returned in case of failure.
873   */
874  static int fallible_resolve_collision(struct ubifs_info *c,
875  				      const union ubifs_key *key,
876  				      struct ubifs_znode **zn, int *n,
877  				      const struct qstr *nm, int adding)
878  {
879  	struct ubifs_znode *o_znode = NULL, *znode = *zn;
880  	int uninitialized_var(o_n), err, cmp, unsure = 0, nn = *n;
881  
882  	cmp = fallible_matches_name(c, &znode->zbranch[nn], nm);
883  	if (unlikely(cmp < 0))
884  		return cmp;
885  	if (cmp == NAME_MATCHES)
886  		return 1;
887  	if (cmp == NOT_ON_MEDIA) {
888  		o_znode = znode;
889  		o_n = nn;
890  		/*
891  		 * We are unlucky and hit a dangling branch straight away.
892  		 * Now we do not really know where to go to find the needed
893  		 * branch - to the left or to the right. Well, let's try left.
894  		 */
895  		unsure = 1;
896  	} else if (!adding)
897  		unsure = 1; /* Remove a dangling branch wherever it is */
898  
899  	if (cmp == NAME_GREATER || unsure) {
900  		/* Look left */
901  		while (1) {
902  			err = tnc_prev(c, zn, n);
903  			if (err == -ENOENT) {
904  				ubifs_assert(*n == 0);
905  				*n = -1;
906  				break;
907  			}
908  			if (err < 0)
909  				return err;
910  			if (keys_cmp(c, &(*zn)->zbranch[*n].key, key)) {
911  				/* See comments in 'resolve_collision()' */
912  				if (*n == (*zn)->child_cnt - 1) {
913  					err = tnc_next(c, zn, n);
914  					if (err) {
915  						/* Should be impossible */
916  						ubifs_assert(0);
917  						if (err == -ENOENT)
918  							err = -EINVAL;
919  						return err;
920  					}
921  					ubifs_assert(*n == 0);
922  					*n = -1;
923  				}
924  				break;
925  			}
926  			err = fallible_matches_name(c, &(*zn)->zbranch[*n], nm);
927  			if (err < 0)
928  				return err;
929  			if (err == NAME_MATCHES)
930  				return 1;
931  			if (err == NOT_ON_MEDIA) {
932  				o_znode = *zn;
933  				o_n = *n;
934  				continue;
935  			}
936  			if (!adding)
937  				continue;
938  			if (err == NAME_LESS)
939  				break;
940  			else
941  				unsure = 0;
942  		}
943  	}
944  
945  	if (cmp == NAME_LESS || unsure) {
946  		/* Look right */
947  		*zn = znode;
948  		*n = nn;
949  		while (1) {
950  			err = tnc_next(c, &znode, &nn);
951  			if (err == -ENOENT)
952  				break;
953  			if (err < 0)
954  				return err;
955  			if (keys_cmp(c, &znode->zbranch[nn].key, key))
956  				break;
957  			err = fallible_matches_name(c, &znode->zbranch[nn], nm);
958  			if (err < 0)
959  				return err;
960  			if (err == NAME_GREATER)
961  				break;
962  			*zn = znode;
963  			*n = nn;
964  			if (err == NAME_MATCHES)
965  				return 1;
966  			if (err == NOT_ON_MEDIA) {
967  				o_znode = znode;
968  				o_n = nn;
969  			}
970  		}
971  	}
972  
973  	/* Never match a dangling branch when adding */
974  	if (adding || !o_znode)
975  		return 0;
976  
977  	dbg_mntk(key, "dangling match LEB %d:%d len %d key ",
978  		o_znode->zbranch[o_n].lnum, o_znode->zbranch[o_n].offs,
979  		o_znode->zbranch[o_n].len);
980  	*zn = o_znode;
981  	*n = o_n;
982  	return 1;
983  }
984  
985  /**
986   * matches_position - determine if a zbranch matches a given position.
987   * @zbr: zbranch of dent
988   * @lnum: LEB number of dent to match
989   * @offs: offset of dent to match
990   *
991   * This function returns %1 if @lnum:@offs matches, and %0 otherwise.
992   */
993  static int matches_position(struct ubifs_zbranch *zbr, int lnum, int offs)
994  {
995  	if (zbr->lnum == lnum && zbr->offs == offs)
996  		return 1;
997  	else
998  		return 0;
999  }
1000  
1001  /**
1002   * resolve_collision_directly - resolve a collision directly.
1003   * @c: UBIFS file-system description object
1004   * @key: key of directory entry
1005   * @zn: znode is passed and returned here
1006   * @n: zbranch number is passed and returned here
1007   * @lnum: LEB number of dent node to match
1008   * @offs: offset of dent node to match
1009   *
1010   * This function is used for "hashed" keys to make sure the found directory or
1011   * extended attribute entry node is what was looked for. It is used when the
1012   * flash address of the right node is known (@lnum:@offs) which makes it much
1013   * easier to resolve collisions (no need to read entries and match full
1014   * names). This function returns %1 and sets @zn and @n if the collision is
1015   * resolved, %0 if @lnum:@offs is not found and @zn and @n are set to the
1016   * previous directory entry. Otherwise a negative error code is returned.
1017   */
1018  static int resolve_collision_directly(struct ubifs_info *c,
1019  				      const union ubifs_key *key,
1020  				      struct ubifs_znode **zn, int *n,
1021  				      int lnum, int offs)
1022  {
1023  	struct ubifs_znode *znode;
1024  	int nn, err;
1025  
1026  	znode = *zn;
1027  	nn = *n;
1028  	if (matches_position(&znode->zbranch[nn], lnum, offs))
1029  		return 1;
1030  
1031  	/* Look left */
1032  	while (1) {
1033  		err = tnc_prev(c, &znode, &nn);
1034  		if (err == -ENOENT)
1035  			break;
1036  		if (err < 0)
1037  			return err;
1038  		if (keys_cmp(c, &znode->zbranch[nn].key, key))
1039  			break;
1040  		if (matches_position(&znode->zbranch[nn], lnum, offs)) {
1041  			*zn = znode;
1042  			*n = nn;
1043  			return 1;
1044  		}
1045  	}
1046  
1047  	/* Look right */
1048  	znode = *zn;
1049  	nn = *n;
1050  	while (1) {
1051  		err = tnc_next(c, &znode, &nn);
1052  		if (err == -ENOENT)
1053  			return 0;
1054  		if (err < 0)
1055  			return err;
1056  		if (keys_cmp(c, &znode->zbranch[nn].key, key))
1057  			return 0;
1058  		*zn = znode;
1059  		*n = nn;
1060  		if (matches_position(&znode->zbranch[nn], lnum, offs))
1061  			return 1;
1062  	}
1063  }
1064  
1065  /**
1066   * dirty_cow_bottom_up - dirty a znode and its ancestors.
1067   * @c: UBIFS file-system description object
1068   * @znode: znode to dirty
1069   *
1070   * If we do not have a unique key that resides in a znode, then we cannot
1071   * dirty that znode from the top down (i.e. by using lookup_level0_dirty)
1072   * This function records the path back to the last dirty ancestor, and then
1073   * dirties the znodes on that path.
1074   */
1075  static struct ubifs_znode *dirty_cow_bottom_up(struct ubifs_info *c,
1076  					       struct ubifs_znode *znode)
1077  {
1078  	struct ubifs_znode *zp;
1079  	int *path = c->bottom_up_buf, p = 0;
1080  
1081  	ubifs_assert(c->zroot.znode);
1082  	ubifs_assert(znode);
1083  	if (c->zroot.znode->level > BOTTOM_UP_HEIGHT) {
1084  		kfree(c->bottom_up_buf);
1085  		c->bottom_up_buf = kmalloc(c->zroot.znode->level * sizeof(int),
1086  					   GFP_NOFS);
1087  		if (!c->bottom_up_buf)
1088  			return ERR_PTR(-ENOMEM);
1089  		path = c->bottom_up_buf;
1090  	}
1091  	if (c->zroot.znode->level) {
1092  		/* Go up until parent is dirty */
1093  		while (1) {
1094  			int n;
1095  
1096  			zp = znode->parent;
1097  			if (!zp)
1098  				break;
1099  			n = znode->iip;
1100  			ubifs_assert(p < c->zroot.znode->level);
1101  			path[p++] = n;
1102  			if (!zp->cnext && ubifs_zn_dirty(znode))
1103  				break;
1104  			znode = zp;
1105  		}
1106  	}
1107  
1108  	/* Come back down, dirtying as we go */
1109  	while (1) {
1110  		struct ubifs_zbranch *zbr;
1111  
1112  		zp = znode->parent;
1113  		if (zp) {
1114  			ubifs_assert(path[p - 1] >= 0);
1115  			ubifs_assert(path[p - 1] < zp->child_cnt);
1116  			zbr = &zp->zbranch[path[--p]];
1117  			znode = dirty_cow_znode(c, zbr);
1118  		} else {
1119  			ubifs_assert(znode == c->zroot.znode);
1120  			znode = dirty_cow_znode(c, &c->zroot);
1121  		}
1122  		if (IS_ERR(znode) || !p)
1123  			break;
1124  		ubifs_assert(path[p - 1] >= 0);
1125  		ubifs_assert(path[p - 1] < znode->child_cnt);
1126  		znode = znode->zbranch[path[p - 1]].znode;
1127  	}
1128  
1129  	return znode;
1130  }
1131  
1132  /**
1133   * ubifs_lookup_level0 - search for zero-level znode.
1134   * @c: UBIFS file-system description object
1135   * @key:  key to lookup
1136   * @zn: znode is returned here
1137   * @n: znode branch slot number is returned here
1138   *
1139   * This function looks up the TNC tree and search for zero-level znode which
1140   * refers key @key. The found zero-level znode is returned in @zn. There are 3
1141   * cases:
1142   *   o exact match, i.e. the found zero-level znode contains key @key, then %1
1143   *     is returned and slot number of the matched branch is stored in @n;
1144   *   o not exact match, which means that zero-level znode does not contain
1145   *     @key, then %0 is returned and slot number of the closest branch is stored
1146   *     in @n;
1147   *   o @key is so small that it is even less than the lowest key of the
1148   *     leftmost zero-level node, then %0 is returned and %0 is stored in @n.
1149   *
1150   * Note, when the TNC tree is traversed, some znodes may be absent, then this
1151   * function reads corresponding indexing nodes and inserts them to TNC. In
1152   * case of failure, a negative error code is returned.
1153   */
1154  int ubifs_lookup_level0(struct ubifs_info *c, const union ubifs_key *key,
1155  			struct ubifs_znode **zn, int *n)
1156  {
1157  	int err, exact;
1158  	struct ubifs_znode *znode;
1159  	unsigned long time = get_seconds();
1160  
1161  	dbg_tnck(key, "search key ");
1162  	ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
1163  
1164  	znode = c->zroot.znode;
1165  	if (unlikely(!znode)) {
1166  		znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1167  		if (IS_ERR(znode))
1168  			return PTR_ERR(znode);
1169  	}
1170  
1171  	znode->time = time;
1172  
1173  	while (1) {
1174  		struct ubifs_zbranch *zbr;
1175  
1176  		exact = ubifs_search_zbranch(c, znode, key, n);
1177  
1178  		if (znode->level == 0)
1179  			break;
1180  
1181  		if (*n < 0)
1182  			*n = 0;
1183  		zbr = &znode->zbranch[*n];
1184  
1185  		if (zbr->znode) {
1186  			znode->time = time;
1187  			znode = zbr->znode;
1188  			continue;
1189  		}
1190  
1191  		/* znode is not in TNC cache, load it from the media */
1192  		znode = ubifs_load_znode(c, zbr, znode, *n);
1193  		if (IS_ERR(znode))
1194  			return PTR_ERR(znode);
1195  	}
1196  
1197  	*zn = znode;
1198  	if (exact || !is_hash_key(c, key) || *n != -1) {
1199  		dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
1200  		return exact;
1201  	}
1202  
1203  	/*
1204  	 * Here is a tricky place. We have not found the key and this is a
1205  	 * "hashed" key, which may collide. The rest of the code deals with
1206  	 * situations like this:
1207  	 *
1208  	 *                  | 3 | 5 |
1209  	 *                  /       \
1210  	 *          | 3 | 5 |      | 6 | 7 | (x)
1211  	 *
1212  	 * Or more a complex example:
1213  	 *
1214  	 *                | 1 | 5 |
1215  	 *                /       \
1216  	 *       | 1 | 3 |         | 5 | 8 |
1217  	 *              \           /
1218  	 *          | 5 | 5 |   | 6 | 7 | (x)
1219  	 *
1220  	 * In the examples, if we are looking for key "5", we may reach nodes
1221  	 * marked with "(x)". In this case what we have do is to look at the
1222  	 * left and see if there is "5" key there. If there is, we have to
1223  	 * return it.
1224  	 *
1225  	 * Note, this whole situation is possible because we allow to have
1226  	 * elements which are equivalent to the next key in the parent in the
1227  	 * children of current znode. For example, this happens if we split a
1228  	 * znode like this: | 3 | 5 | 5 | 6 | 7 |, which results in something
1229  	 * like this:
1230  	 *                      | 3 | 5 |
1231  	 *                       /     \
1232  	 *                | 3 | 5 |   | 5 | 6 | 7 |
1233  	 *                              ^
1234  	 * And this becomes what is at the first "picture" after key "5" marked
1235  	 * with "^" is removed. What could be done is we could prohibit
1236  	 * splitting in the middle of the colliding sequence. Also, when
1237  	 * removing the leftmost key, we would have to correct the key of the
1238  	 * parent node, which would introduce additional complications. Namely,
1239  	 * if we changed the leftmost key of the parent znode, the garbage
1240  	 * collector would be unable to find it (GC is doing this when GC'ing
1241  	 * indexing LEBs). Although we already have an additional RB-tree where
1242  	 * we save such changed znodes (see 'ins_clr_old_idx_znode()') until
1243  	 * after the commit. But anyway, this does not look easy to implement
1244  	 * so we did not try this.
1245  	 */
1246  	err = tnc_prev(c, &znode, n);
1247  	if (err == -ENOENT) {
1248  		dbg_tnc("found 0, lvl %d, n -1", znode->level);
1249  		*n = -1;
1250  		return 0;
1251  	}
1252  	if (unlikely(err < 0))
1253  		return err;
1254  	if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
1255  		dbg_tnc("found 0, lvl %d, n -1", znode->level);
1256  		*n = -1;
1257  		return 0;
1258  	}
1259  
1260  	dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
1261  	*zn = znode;
1262  	return 1;
1263  }
1264  
1265  /**
1266   * lookup_level0_dirty - search for zero-level znode dirtying.
1267   * @c: UBIFS file-system description object
1268   * @key:  key to lookup
1269   * @zn: znode is returned here
1270   * @n: znode branch slot number is returned here
1271   *
1272   * This function looks up the TNC tree and search for zero-level znode which
1273   * refers key @key. The found zero-level znode is returned in @zn. There are 3
1274   * cases:
1275   *   o exact match, i.e. the found zero-level znode contains key @key, then %1
1276   *     is returned and slot number of the matched branch is stored in @n;
1277   *   o not exact match, which means that zero-level znode does not contain @key
1278   *     then %0 is returned and slot number of the closed branch is stored in
1279   *     @n;
1280   *   o @key is so small that it is even less than the lowest key of the
1281   *     leftmost zero-level node, then %0 is returned and %-1 is stored in @n.
1282   *
1283   * Additionally all znodes in the path from the root to the located zero-level
1284   * znode are marked as dirty.
1285   *
1286   * Note, when the TNC tree is traversed, some znodes may be absent, then this
1287   * function reads corresponding indexing nodes and inserts them to TNC. In
1288   * case of failure, a negative error code is returned.
1289   */
1290  static int lookup_level0_dirty(struct ubifs_info *c, const union ubifs_key *key,
1291  			       struct ubifs_znode **zn, int *n)
1292  {
1293  	int err, exact;
1294  	struct ubifs_znode *znode;
1295  	unsigned long time = get_seconds();
1296  
1297  	dbg_tnck(key, "search and dirty key ");
1298  
1299  	znode = c->zroot.znode;
1300  	if (unlikely(!znode)) {
1301  		znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
1302  		if (IS_ERR(znode))
1303  			return PTR_ERR(znode);
1304  	}
1305  
1306  	znode = dirty_cow_znode(c, &c->zroot);
1307  	if (IS_ERR(znode))
1308  		return PTR_ERR(znode);
1309  
1310  	znode->time = time;
1311  
1312  	while (1) {
1313  		struct ubifs_zbranch *zbr;
1314  
1315  		exact = ubifs_search_zbranch(c, znode, key, n);
1316  
1317  		if (znode->level == 0)
1318  			break;
1319  
1320  		if (*n < 0)
1321  			*n = 0;
1322  		zbr = &znode->zbranch[*n];
1323  
1324  		if (zbr->znode) {
1325  			znode->time = time;
1326  			znode = dirty_cow_znode(c, zbr);
1327  			if (IS_ERR(znode))
1328  				return PTR_ERR(znode);
1329  			continue;
1330  		}
1331  
1332  		/* znode is not in TNC cache, load it from the media */
1333  		znode = ubifs_load_znode(c, zbr, znode, *n);
1334  		if (IS_ERR(znode))
1335  			return PTR_ERR(znode);
1336  		znode = dirty_cow_znode(c, zbr);
1337  		if (IS_ERR(znode))
1338  			return PTR_ERR(znode);
1339  	}
1340  
1341  	*zn = znode;
1342  	if (exact || !is_hash_key(c, key) || *n != -1) {
1343  		dbg_tnc("found %d, lvl %d, n %d", exact, znode->level, *n);
1344  		return exact;
1345  	}
1346  
1347  	/*
1348  	 * See huge comment at 'lookup_level0_dirty()' what is the rest of the
1349  	 * code.
1350  	 */
1351  	err = tnc_prev(c, &znode, n);
1352  	if (err == -ENOENT) {
1353  		*n = -1;
1354  		dbg_tnc("found 0, lvl %d, n -1", znode->level);
1355  		return 0;
1356  	}
1357  	if (unlikely(err < 0))
1358  		return err;
1359  	if (keys_cmp(c, key, &znode->zbranch[*n].key)) {
1360  		*n = -1;
1361  		dbg_tnc("found 0, lvl %d, n -1", znode->level);
1362  		return 0;
1363  	}
1364  
1365  	if (znode->cnext || !ubifs_zn_dirty(znode)) {
1366  		znode = dirty_cow_bottom_up(c, znode);
1367  		if (IS_ERR(znode))
1368  			return PTR_ERR(znode);
1369  	}
1370  
1371  	dbg_tnc("found 1, lvl %d, n %d", znode->level, *n);
1372  	*zn = znode;
1373  	return 1;
1374  }
1375  
1376  /**
1377   * maybe_leb_gced - determine if a LEB may have been garbage collected.
1378   * @c: UBIFS file-system description object
1379   * @lnum: LEB number
1380   * @gc_seq1: garbage collection sequence number
1381   *
1382   * This function determines if @lnum may have been garbage collected since
1383   * sequence number @gc_seq1. If it may have been then %1 is returned, otherwise
1384   * %0 is returned.
1385   */
1386  static int maybe_leb_gced(struct ubifs_info *c, int lnum, int gc_seq1)
1387  {
1388  #ifndef __UBOOT__
1389  	int gc_seq2, gced_lnum;
1390  
1391  	gced_lnum = c->gced_lnum;
1392  	smp_rmb();
1393  	gc_seq2 = c->gc_seq;
1394  	/* Same seq means no GC */
1395  	if (gc_seq1 == gc_seq2)
1396  		return 0;
1397  	/* Different by more than 1 means we don't know */
1398  	if (gc_seq1 + 1 != gc_seq2)
1399  		return 1;
1400  	/*
1401  	 * We have seen the sequence number has increased by 1. Now we need to
1402  	 * be sure we read the right LEB number, so read it again.
1403  	 */
1404  	smp_rmb();
1405  	if (gced_lnum != c->gced_lnum)
1406  		return 1;
1407  	/* Finally we can check lnum */
1408  	if (gced_lnum == lnum)
1409  		return 1;
1410  #else
1411  	/* No garbage collection in the read-only U-Boot implementation */
1412  #endif
1413  	return 0;
1414  }
1415  
1416  /**
1417   * ubifs_tnc_locate - look up a file-system node and return it and its location.
1418   * @c: UBIFS file-system description object
1419   * @key: node key to lookup
1420   * @node: the node is returned here
1421   * @lnum: LEB number is returned here
1422   * @offs: offset is returned here
1423   *
1424   * This function looks up and reads node with key @key. The caller has to make
1425   * sure the @node buffer is large enough to fit the node. Returns zero in case
1426   * of success, %-ENOENT if the node was not found, and a negative error code in
1427   * case of failure. The node location can be returned in @lnum and @offs.
1428   */
1429  int ubifs_tnc_locate(struct ubifs_info *c, const union ubifs_key *key,
1430  		     void *node, int *lnum, int *offs)
1431  {
1432  	int found, n, err, safely = 0, gc_seq1;
1433  	struct ubifs_znode *znode;
1434  	struct ubifs_zbranch zbr, *zt;
1435  
1436  again:
1437  	mutex_lock(&c->tnc_mutex);
1438  	found = ubifs_lookup_level0(c, key, &znode, &n);
1439  	if (!found) {
1440  		err = -ENOENT;
1441  		goto out;
1442  	} else if (found < 0) {
1443  		err = found;
1444  		goto out;
1445  	}
1446  	zt = &znode->zbranch[n];
1447  	if (lnum) {
1448  		*lnum = zt->lnum;
1449  		*offs = zt->offs;
1450  	}
1451  	if (is_hash_key(c, key)) {
1452  		/*
1453  		 * In this case the leaf node cache gets used, so we pass the
1454  		 * address of the zbranch and keep the mutex locked
1455  		 */
1456  		err = tnc_read_node_nm(c, zt, node);
1457  		goto out;
1458  	}
1459  	if (safely) {
1460  		err = ubifs_tnc_read_node(c, zt, node);
1461  		goto out;
1462  	}
1463  	/* Drop the TNC mutex prematurely and race with garbage collection */
1464  	zbr = znode->zbranch[n];
1465  	gc_seq1 = c->gc_seq;
1466  	mutex_unlock(&c->tnc_mutex);
1467  
1468  	if (ubifs_get_wbuf(c, zbr.lnum)) {
1469  		/* We do not GC journal heads */
1470  		err = ubifs_tnc_read_node(c, &zbr, node);
1471  		return err;
1472  	}
1473  
1474  	err = fallible_read_node(c, key, &zbr, node);
1475  	if (err <= 0 || maybe_leb_gced(c, zbr.lnum, gc_seq1)) {
1476  		/*
1477  		 * The node may have been GC'ed out from under us so try again
1478  		 * while keeping the TNC mutex locked.
1479  		 */
1480  		safely = 1;
1481  		goto again;
1482  	}
1483  	return 0;
1484  
1485  out:
1486  	mutex_unlock(&c->tnc_mutex);
1487  	return err;
1488  }
1489  
1490  /**
1491   * ubifs_tnc_get_bu_keys - lookup keys for bulk-read.
1492   * @c: UBIFS file-system description object
1493   * @bu: bulk-read parameters and results
1494   *
1495   * Lookup consecutive data node keys for the same inode that reside
1496   * consecutively in the same LEB. This function returns zero in case of success
1497   * and a negative error code in case of failure.
1498   *
1499   * Note, if the bulk-read buffer length (@bu->buf_len) is known, this function
1500   * makes sure bulk-read nodes fit the buffer. Otherwise, this function prepares
1501   * maximum possible amount of nodes for bulk-read.
1502   */
1503  int ubifs_tnc_get_bu_keys(struct ubifs_info *c, struct bu_info *bu)
1504  {
1505  	int n, err = 0, lnum = -1, uninitialized_var(offs);
1506  	int uninitialized_var(len);
1507  	unsigned int block = key_block(c, &bu->key);
1508  	struct ubifs_znode *znode;
1509  
1510  	bu->cnt = 0;
1511  	bu->blk_cnt = 0;
1512  	bu->eof = 0;
1513  
1514  	mutex_lock(&c->tnc_mutex);
1515  	/* Find first key */
1516  	err = ubifs_lookup_level0(c, &bu->key, &znode, &n);
1517  	if (err < 0)
1518  		goto out;
1519  	if (err) {
1520  		/* Key found */
1521  		len = znode->zbranch[n].len;
1522  		/* The buffer must be big enough for at least 1 node */
1523  		if (len > bu->buf_len) {
1524  			err = -EINVAL;
1525  			goto out;
1526  		}
1527  		/* Add this key */
1528  		bu->zbranch[bu->cnt++] = znode->zbranch[n];
1529  		bu->blk_cnt += 1;
1530  		lnum = znode->zbranch[n].lnum;
1531  		offs = ALIGN(znode->zbranch[n].offs + len, 8);
1532  	}
1533  	while (1) {
1534  		struct ubifs_zbranch *zbr;
1535  		union ubifs_key *key;
1536  		unsigned int next_block;
1537  
1538  		/* Find next key */
1539  		err = tnc_next(c, &znode, &n);
1540  		if (err)
1541  			goto out;
1542  		zbr = &znode->zbranch[n];
1543  		key = &zbr->key;
1544  		/* See if there is another data key for this file */
1545  		if (key_inum(c, key) != key_inum(c, &bu->key) ||
1546  		    key_type(c, key) != UBIFS_DATA_KEY) {
1547  			err = -ENOENT;
1548  			goto out;
1549  		}
1550  		if (lnum < 0) {
1551  			/* First key found */
1552  			lnum = zbr->lnum;
1553  			offs = ALIGN(zbr->offs + zbr->len, 8);
1554  			len = zbr->len;
1555  			if (len > bu->buf_len) {
1556  				err = -EINVAL;
1557  				goto out;
1558  			}
1559  		} else {
1560  			/*
1561  			 * The data nodes must be in consecutive positions in
1562  			 * the same LEB.
1563  			 */
1564  			if (zbr->lnum != lnum || zbr->offs != offs)
1565  				goto out;
1566  			offs += ALIGN(zbr->len, 8);
1567  			len = ALIGN(len, 8) + zbr->len;
1568  			/* Must not exceed buffer length */
1569  			if (len > bu->buf_len)
1570  				goto out;
1571  		}
1572  		/* Allow for holes */
1573  		next_block = key_block(c, key);
1574  		bu->blk_cnt += (next_block - block - 1);
1575  		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
1576  			goto out;
1577  		block = next_block;
1578  		/* Add this key */
1579  		bu->zbranch[bu->cnt++] = *zbr;
1580  		bu->blk_cnt += 1;
1581  		/* See if we have room for more */
1582  		if (bu->cnt >= UBIFS_MAX_BULK_READ)
1583  			goto out;
1584  		if (bu->blk_cnt >= UBIFS_MAX_BULK_READ)
1585  			goto out;
1586  	}
1587  out:
1588  	if (err == -ENOENT) {
1589  		bu->eof = 1;
1590  		err = 0;
1591  	}
1592  	bu->gc_seq = c->gc_seq;
1593  	mutex_unlock(&c->tnc_mutex);
1594  	if (err)
1595  		return err;
1596  	/*
1597  	 * An enormous hole could cause bulk-read to encompass too many
1598  	 * page cache pages, so limit the number here.
1599  	 */
1600  	if (bu->blk_cnt > UBIFS_MAX_BULK_READ)
1601  		bu->blk_cnt = UBIFS_MAX_BULK_READ;
1602  	/*
1603  	 * Ensure that bulk-read covers a whole number of page cache
1604  	 * pages.
1605  	 */
1606  	if (UBIFS_BLOCKS_PER_PAGE == 1 ||
1607  	    !(bu->blk_cnt & (UBIFS_BLOCKS_PER_PAGE - 1)))
1608  		return 0;
1609  	if (bu->eof) {
1610  		/* At the end of file we can round up */
1611  		bu->blk_cnt += UBIFS_BLOCKS_PER_PAGE - 1;
1612  		return 0;
1613  	}
1614  	/* Exclude data nodes that do not make up a whole page cache page */
1615  	block = key_block(c, &bu->key) + bu->blk_cnt;
1616  	block &= ~(UBIFS_BLOCKS_PER_PAGE - 1);
1617  	while (bu->cnt) {
1618  		if (key_block(c, &bu->zbranch[bu->cnt - 1].key) < block)
1619  			break;
1620  		bu->cnt -= 1;
1621  	}
1622  	return 0;
1623  }
1624  
1625  /**
1626   * read_wbuf - bulk-read from a LEB with a wbuf.
1627   * @wbuf: wbuf that may overlap the read
1628   * @buf: buffer into which to read
1629   * @len: read length
1630   * @lnum: LEB number from which to read
1631   * @offs: offset from which to read
1632   *
1633   * This functions returns %0 on success or a negative error code on failure.
1634   */
1635  static int read_wbuf(struct ubifs_wbuf *wbuf, void *buf, int len, int lnum,
1636  		     int offs)
1637  {
1638  	const struct ubifs_info *c = wbuf->c;
1639  	int rlen, overlap;
1640  
1641  	dbg_io("LEB %d:%d, length %d", lnum, offs, len);
1642  	ubifs_assert(wbuf && lnum >= 0 && lnum < c->leb_cnt && offs >= 0);
1643  	ubifs_assert(!(offs & 7) && offs < c->leb_size);
1644  	ubifs_assert(offs + len <= c->leb_size);
1645  
1646  	spin_lock(&wbuf->lock);
1647  	overlap = (lnum == wbuf->lnum && offs + len > wbuf->offs);
1648  	if (!overlap) {
1649  		/* We may safely unlock the write-buffer and read the data */
1650  		spin_unlock(&wbuf->lock);
1651  		return ubifs_leb_read(c, lnum, buf, offs, len, 0);
1652  	}
1653  
1654  	/* Don't read under wbuf */
1655  	rlen = wbuf->offs - offs;
1656  	if (rlen < 0)
1657  		rlen = 0;
1658  
1659  	/* Copy the rest from the write-buffer */
1660  	memcpy(buf + rlen, wbuf->buf + offs + rlen - wbuf->offs, len - rlen);
1661  	spin_unlock(&wbuf->lock);
1662  
1663  	if (rlen > 0)
1664  		/* Read everything that goes before write-buffer */
1665  		return ubifs_leb_read(c, lnum, buf, offs, rlen, 0);
1666  
1667  	return 0;
1668  }
1669  
1670  /**
1671   * validate_data_node - validate data nodes for bulk-read.
1672   * @c: UBIFS file-system description object
1673   * @buf: buffer containing data node to validate
1674   * @zbr: zbranch of data node to validate
1675   *
1676   * This functions returns %0 on success or a negative error code on failure.
1677   */
1678  static int validate_data_node(struct ubifs_info *c, void *buf,
1679  			      struct ubifs_zbranch *zbr)
1680  {
1681  	union ubifs_key key1;
1682  	struct ubifs_ch *ch = buf;
1683  	int err, len;
1684  
1685  	if (ch->node_type != UBIFS_DATA_NODE) {
1686  		ubifs_err(c, "bad node type (%d but expected %d)",
1687  			  ch->node_type, UBIFS_DATA_NODE);
1688  		goto out_err;
1689  	}
1690  
1691  	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
1692  	if (err) {
1693  		ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE);
1694  		goto out;
1695  	}
1696  
1697  	len = le32_to_cpu(ch->len);
1698  	if (len != zbr->len) {
1699  		ubifs_err(c, "bad node length %d, expected %d", len, zbr->len);
1700  		goto out_err;
1701  	}
1702  
1703  	/* Make sure the key of the read node is correct */
1704  	key_read(c, buf + UBIFS_KEY_OFFSET, &key1);
1705  	if (!keys_eq(c, &zbr->key, &key1)) {
1706  		ubifs_err(c, "bad key in node at LEB %d:%d",
1707  			  zbr->lnum, zbr->offs);
1708  		dbg_tnck(&zbr->key, "looked for key ");
1709  		dbg_tnck(&key1, "found node's key ");
1710  		goto out_err;
1711  	}
1712  
1713  	return 0;
1714  
1715  out_err:
1716  	err = -EINVAL;
1717  out:
1718  	ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs);
1719  	ubifs_dump_node(c, buf);
1720  	dump_stack();
1721  	return err;
1722  }
1723  
1724  /**
1725   * ubifs_tnc_bulk_read - read a number of data nodes in one go.
1726   * @c: UBIFS file-system description object
1727   * @bu: bulk-read parameters and results
1728   *
1729   * This functions reads and validates the data nodes that were identified by the
1730   * 'ubifs_tnc_get_bu_keys()' function. This functions returns %0 on success,
1731   * -EAGAIN to indicate a race with GC, or another negative error code on
1732   * failure.
1733   */
1734  int ubifs_tnc_bulk_read(struct ubifs_info *c, struct bu_info *bu)
1735  {
1736  	int lnum = bu->zbranch[0].lnum, offs = bu->zbranch[0].offs, len, err, i;
1737  	struct ubifs_wbuf *wbuf;
1738  	void *buf;
1739  
1740  	len = bu->zbranch[bu->cnt - 1].offs;
1741  	len += bu->zbranch[bu->cnt - 1].len - offs;
1742  	if (len > bu->buf_len) {
1743  		ubifs_err(c, "buffer too small %d vs %d", bu->buf_len, len);
1744  		return -EINVAL;
1745  	}
1746  
1747  	/* Do the read */
1748  	wbuf = ubifs_get_wbuf(c, lnum);
1749  	if (wbuf)
1750  		err = read_wbuf(wbuf, bu->buf, len, lnum, offs);
1751  	else
1752  		err = ubifs_leb_read(c, lnum, bu->buf, offs, len, 0);
1753  
1754  	/* Check for a race with GC */
1755  	if (maybe_leb_gced(c, lnum, bu->gc_seq))
1756  		return -EAGAIN;
1757  
1758  	if (err && err != -EBADMSG) {
1759  		ubifs_err(c, "failed to read from LEB %d:%d, error %d",
1760  			  lnum, offs, err);
1761  		dump_stack();
1762  		dbg_tnck(&bu->key, "key ");
1763  		return err;
1764  	}
1765  
1766  	/* Validate the nodes read */
1767  	buf = bu->buf;
1768  	for (i = 0; i < bu->cnt; i++) {
1769  		err = validate_data_node(c, buf, &bu->zbranch[i]);
1770  		if (err)
1771  			return err;
1772  		buf = buf + ALIGN(bu->zbranch[i].len, 8);
1773  	}
1774  
1775  	return 0;
1776  }
1777  
1778  /**
1779   * do_lookup_nm- look up a "hashed" node.
1780   * @c: UBIFS file-system description object
1781   * @key: node key to lookup
1782   * @node: the node is returned here
1783   * @nm: node name
1784   *
1785   * This function look up and reads a node which contains name hash in the key.
1786   * Since the hash may have collisions, there may be many nodes with the same
1787   * key, so we have to sequentially look to all of them until the needed one is
1788   * found. This function returns zero in case of success, %-ENOENT if the node
1789   * was not found, and a negative error code in case of failure.
1790   */
1791  static int do_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1792  			void *node, const struct qstr *nm)
1793  {
1794  	int found, n, err;
1795  	struct ubifs_znode *znode;
1796  
1797  	dbg_tnck(key, "name '%.*s' key ", nm->len, nm->name);
1798  	mutex_lock(&c->tnc_mutex);
1799  	found = ubifs_lookup_level0(c, key, &znode, &n);
1800  	if (!found) {
1801  		err = -ENOENT;
1802  		goto out_unlock;
1803  	} else if (found < 0) {
1804  		err = found;
1805  		goto out_unlock;
1806  	}
1807  
1808  	ubifs_assert(n >= 0);
1809  
1810  	err = resolve_collision(c, key, &znode, &n, nm);
1811  	dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
1812  	if (unlikely(err < 0))
1813  		goto out_unlock;
1814  	if (err == 0) {
1815  		err = -ENOENT;
1816  		goto out_unlock;
1817  	}
1818  
1819  	err = tnc_read_node_nm(c, &znode->zbranch[n], node);
1820  
1821  out_unlock:
1822  	mutex_unlock(&c->tnc_mutex);
1823  	return err;
1824  }
1825  
1826  /**
1827   * ubifs_tnc_lookup_nm - look up a "hashed" node.
1828   * @c: UBIFS file-system description object
1829   * @key: node key to lookup
1830   * @node: the node is returned here
1831   * @nm: node name
1832   *
1833   * This function look up and reads a node which contains name hash in the key.
1834   * Since the hash may have collisions, there may be many nodes with the same
1835   * key, so we have to sequentially look to all of them until the needed one is
1836   * found. This function returns zero in case of success, %-ENOENT if the node
1837   * was not found, and a negative error code in case of failure.
1838   */
1839  int ubifs_tnc_lookup_nm(struct ubifs_info *c, const union ubifs_key *key,
1840  			void *node, const struct qstr *nm)
1841  {
1842  	int err, len;
1843  	const struct ubifs_dent_node *dent = node;
1844  
1845  	/*
1846  	 * We assume that in most of the cases there are no name collisions and
1847  	 * 'ubifs_tnc_lookup()' returns us the right direntry.
1848  	 */
1849  	err = ubifs_tnc_lookup(c, key, node);
1850  	if (err)
1851  		return err;
1852  
1853  	len = le16_to_cpu(dent->nlen);
1854  	if (nm->len == len && !memcmp(dent->name, nm->name, len))
1855  		return 0;
1856  
1857  	/*
1858  	 * Unluckily, there are hash collisions and we have to iterate over
1859  	 * them look at each direntry with colliding name hash sequentially.
1860  	 */
1861  	return do_lookup_nm(c, key, node, nm);
1862  }
1863  
1864  /**
1865   * correct_parent_keys - correct parent znodes' keys.
1866   * @c: UBIFS file-system description object
1867   * @znode: znode to correct parent znodes for
1868   *
1869   * This is a helper function for 'tnc_insert()'. When the key of the leftmost
1870   * zbranch changes, keys of parent znodes have to be corrected. This helper
1871   * function is called in such situations and corrects the keys if needed.
1872   */
1873  static void correct_parent_keys(const struct ubifs_info *c,
1874  				struct ubifs_znode *znode)
1875  {
1876  	union ubifs_key *key, *key1;
1877  
1878  	ubifs_assert(znode->parent);
1879  	ubifs_assert(znode->iip == 0);
1880  
1881  	key = &znode->zbranch[0].key;
1882  	key1 = &znode->parent->zbranch[0].key;
1883  
1884  	while (keys_cmp(c, key, key1) < 0) {
1885  		key_copy(c, key, key1);
1886  		znode = znode->parent;
1887  		znode->alt = 1;
1888  		if (!znode->parent || znode->iip)
1889  			break;
1890  		key1 = &znode->parent->zbranch[0].key;
1891  	}
1892  }
1893  
1894  /**
1895   * insert_zbranch - insert a zbranch into a znode.
1896   * @znode: znode into which to insert
1897   * @zbr: zbranch to insert
1898   * @n: slot number to insert to
1899   *
1900   * This is a helper function for 'tnc_insert()'. UBIFS does not allow "gaps" in
1901   * znode's array of zbranches and keeps zbranches consolidated, so when a new
1902   * zbranch has to be inserted to the @znode->zbranches[]' array at the @n-th
1903   * slot, zbranches starting from @n have to be moved right.
1904   */
1905  static void insert_zbranch(struct ubifs_znode *znode,
1906  			   const struct ubifs_zbranch *zbr, int n)
1907  {
1908  	int i;
1909  
1910  	ubifs_assert(ubifs_zn_dirty(znode));
1911  
1912  	if (znode->level) {
1913  		for (i = znode->child_cnt; i > n; i--) {
1914  			znode->zbranch[i] = znode->zbranch[i - 1];
1915  			if (znode->zbranch[i].znode)
1916  				znode->zbranch[i].znode->iip = i;
1917  		}
1918  		if (zbr->znode)
1919  			zbr->znode->iip = n;
1920  	} else
1921  		for (i = znode->child_cnt; i > n; i--)
1922  			znode->zbranch[i] = znode->zbranch[i - 1];
1923  
1924  	znode->zbranch[n] = *zbr;
1925  	znode->child_cnt += 1;
1926  
1927  	/*
1928  	 * After inserting at slot zero, the lower bound of the key range of
1929  	 * this znode may have changed. If this znode is subsequently split
1930  	 * then the upper bound of the key range may change, and furthermore
1931  	 * it could change to be lower than the original lower bound. If that
1932  	 * happens, then it will no longer be possible to find this znode in the
1933  	 * TNC using the key from the index node on flash. That is bad because
1934  	 * if it is not found, we will assume it is obsolete and may overwrite
1935  	 * it. Then if there is an unclean unmount, we will start using the
1936  	 * old index which will be broken.
1937  	 *
1938  	 * So we first mark znodes that have insertions at slot zero, and then
1939  	 * if they are split we add their lnum/offs to the old_idx tree.
1940  	 */
1941  	if (n == 0)
1942  		znode->alt = 1;
1943  }
1944  
1945  /**
1946   * tnc_insert - insert a node into TNC.
1947   * @c: UBIFS file-system description object
1948   * @znode: znode to insert into
1949   * @zbr: branch to insert
1950   * @n: slot number to insert new zbranch to
1951   *
1952   * This function inserts a new node described by @zbr into znode @znode. If
1953   * znode does not have a free slot for new zbranch, it is split. Parent znodes
1954   * are splat as well if needed. Returns zero in case of success or a negative
1955   * error code in case of failure.
1956   */
1957  static int tnc_insert(struct ubifs_info *c, struct ubifs_znode *znode,
1958  		      struct ubifs_zbranch *zbr, int n)
1959  {
1960  	struct ubifs_znode *zn, *zi, *zp;
1961  	int i, keep, move, appending = 0;
1962  	union ubifs_key *key = &zbr->key, *key1;
1963  
1964  	ubifs_assert(n >= 0 && n <= c->fanout);
1965  
1966  	/* Implement naive insert for now */
1967  again:
1968  	zp = znode->parent;
1969  	if (znode->child_cnt < c->fanout) {
1970  		ubifs_assert(n != c->fanout);
1971  		dbg_tnck(key, "inserted at %d level %d, key ", n, znode->level);
1972  
1973  		insert_zbranch(znode, zbr, n);
1974  
1975  		/* Ensure parent's key is correct */
1976  		if (n == 0 && zp && znode->iip == 0)
1977  			correct_parent_keys(c, znode);
1978  
1979  		return 0;
1980  	}
1981  
1982  	/*
1983  	 * Unfortunately, @znode does not have more empty slots and we have to
1984  	 * split it.
1985  	 */
1986  	dbg_tnck(key, "splitting level %d, key ", znode->level);
1987  
1988  	if (znode->alt)
1989  		/*
1990  		 * We can no longer be sure of finding this znode by key, so we
1991  		 * record it in the old_idx tree.
1992  		 */
1993  		ins_clr_old_idx_znode(c, znode);
1994  
1995  	zn = kzalloc(c->max_znode_sz, GFP_NOFS);
1996  	if (!zn)
1997  		return -ENOMEM;
1998  	zn->parent = zp;
1999  	zn->level = znode->level;
2000  
2001  	/* Decide where to split */
2002  	if (znode->level == 0 && key_type(c, key) == UBIFS_DATA_KEY) {
2003  		/* Try not to split consecutive data keys */
2004  		if (n == c->fanout) {
2005  			key1 = &znode->zbranch[n - 1].key;
2006  			if (key_inum(c, key1) == key_inum(c, key) &&
2007  			    key_type(c, key1) == UBIFS_DATA_KEY)
2008  				appending = 1;
2009  		} else
2010  			goto check_split;
2011  	} else if (appending && n != c->fanout) {
2012  		/* Try not to split consecutive data keys */
2013  		appending = 0;
2014  check_split:
2015  		if (n >= (c->fanout + 1) / 2) {
2016  			key1 = &znode->zbranch[0].key;
2017  			if (key_inum(c, key1) == key_inum(c, key) &&
2018  			    key_type(c, key1) == UBIFS_DATA_KEY) {
2019  				key1 = &znode->zbranch[n].key;
2020  				if (key_inum(c, key1) != key_inum(c, key) ||
2021  				    key_type(c, key1) != UBIFS_DATA_KEY) {
2022  					keep = n;
2023  					move = c->fanout - keep;
2024  					zi = znode;
2025  					goto do_split;
2026  				}
2027  			}
2028  		}
2029  	}
2030  
2031  	if (appending) {
2032  		keep = c->fanout;
2033  		move = 0;
2034  	} else {
2035  		keep = (c->fanout + 1) / 2;
2036  		move = c->fanout - keep;
2037  	}
2038  
2039  	/*
2040  	 * Although we don't at present, we could look at the neighbors and see
2041  	 * if we can move some zbranches there.
2042  	 */
2043  
2044  	if (n < keep) {
2045  		/* Insert into existing znode */
2046  		zi = znode;
2047  		move += 1;
2048  		keep -= 1;
2049  	} else {
2050  		/* Insert into new znode */
2051  		zi = zn;
2052  		n -= keep;
2053  		/* Re-parent */
2054  		if (zn->level != 0)
2055  			zbr->znode->parent = zn;
2056  	}
2057  
2058  do_split:
2059  
2060  	__set_bit(DIRTY_ZNODE, &zn->flags);
2061  	atomic_long_inc(&c->dirty_zn_cnt);
2062  
2063  	zn->child_cnt = move;
2064  	znode->child_cnt = keep;
2065  
2066  	dbg_tnc("moving %d, keeping %d", move, keep);
2067  
2068  	/* Move zbranch */
2069  	for (i = 0; i < move; i++) {
2070  		zn->zbranch[i] = znode->zbranch[keep + i];
2071  		/* Re-parent */
2072  		if (zn->level != 0)
2073  			if (zn->zbranch[i].znode) {
2074  				zn->zbranch[i].znode->parent = zn;
2075  				zn->zbranch[i].znode->iip = i;
2076  			}
2077  	}
2078  
2079  	/* Insert new key and branch */
2080  	dbg_tnck(key, "inserting at %d level %d, key ", n, zn->level);
2081  
2082  	insert_zbranch(zi, zbr, n);
2083  
2084  	/* Insert new znode (produced by spitting) into the parent */
2085  	if (zp) {
2086  		if (n == 0 && zi == znode && znode->iip == 0)
2087  			correct_parent_keys(c, znode);
2088  
2089  		/* Locate insertion point */
2090  		n = znode->iip + 1;
2091  
2092  		/* Tail recursion */
2093  		zbr->key = zn->zbranch[0].key;
2094  		zbr->znode = zn;
2095  		zbr->lnum = 0;
2096  		zbr->offs = 0;
2097  		zbr->len = 0;
2098  		znode = zp;
2099  
2100  		goto again;
2101  	}
2102  
2103  	/* We have to split root znode */
2104  	dbg_tnc("creating new zroot at level %d", znode->level + 1);
2105  
2106  	zi = kzalloc(c->max_znode_sz, GFP_NOFS);
2107  	if (!zi)
2108  		return -ENOMEM;
2109  
2110  	zi->child_cnt = 2;
2111  	zi->level = znode->level + 1;
2112  
2113  	__set_bit(DIRTY_ZNODE, &zi->flags);
2114  	atomic_long_inc(&c->dirty_zn_cnt);
2115  
2116  	zi->zbranch[0].key = znode->zbranch[0].key;
2117  	zi->zbranch[0].znode = znode;
2118  	zi->zbranch[0].lnum = c->zroot.lnum;
2119  	zi->zbranch[0].offs = c->zroot.offs;
2120  	zi->zbranch[0].len = c->zroot.len;
2121  	zi->zbranch[1].key = zn->zbranch[0].key;
2122  	zi->zbranch[1].znode = zn;
2123  
2124  	c->zroot.lnum = 0;
2125  	c->zroot.offs = 0;
2126  	c->zroot.len = 0;
2127  	c->zroot.znode = zi;
2128  
2129  	zn->parent = zi;
2130  	zn->iip = 1;
2131  	znode->parent = zi;
2132  	znode->iip = 0;
2133  
2134  	return 0;
2135  }
2136  
2137  /**
2138   * ubifs_tnc_add - add a node to TNC.
2139   * @c: UBIFS file-system description object
2140   * @key: key to add
2141   * @lnum: LEB number of node
2142   * @offs: node offset
2143   * @len: node length
2144   *
2145   * This function adds a node with key @key to TNC. The node may be new or it may
2146   * obsolete some existing one. Returns %0 on success or negative error code on
2147   * failure.
2148   */
2149  int ubifs_tnc_add(struct ubifs_info *c, const union ubifs_key *key, int lnum,
2150  		  int offs, int len)
2151  {
2152  	int found, n, err = 0;
2153  	struct ubifs_znode *znode;
2154  
2155  	mutex_lock(&c->tnc_mutex);
2156  	dbg_tnck(key, "%d:%d, len %d, key ", lnum, offs, len);
2157  	found = lookup_level0_dirty(c, key, &znode, &n);
2158  	if (!found) {
2159  		struct ubifs_zbranch zbr;
2160  
2161  		zbr.znode = NULL;
2162  		zbr.lnum = lnum;
2163  		zbr.offs = offs;
2164  		zbr.len = len;
2165  		key_copy(c, key, &zbr.key);
2166  		err = tnc_insert(c, znode, &zbr, n + 1);
2167  	} else if (found == 1) {
2168  		struct ubifs_zbranch *zbr = &znode->zbranch[n];
2169  
2170  		lnc_free(zbr);
2171  		err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2172  		zbr->lnum = lnum;
2173  		zbr->offs = offs;
2174  		zbr->len = len;
2175  	} else
2176  		err = found;
2177  	if (!err)
2178  		err = dbg_check_tnc(c, 0);
2179  	mutex_unlock(&c->tnc_mutex);
2180  
2181  	return err;
2182  }
2183  
2184  /**
2185   * ubifs_tnc_replace - replace a node in the TNC only if the old node is found.
2186   * @c: UBIFS file-system description object
2187   * @key: key to add
2188   * @old_lnum: LEB number of old node
2189   * @old_offs: old node offset
2190   * @lnum: LEB number of node
2191   * @offs: node offset
2192   * @len: node length
2193   *
2194   * This function replaces a node with key @key in the TNC only if the old node
2195   * is found.  This function is called by garbage collection when node are moved.
2196   * Returns %0 on success or negative error code on failure.
2197   */
2198  int ubifs_tnc_replace(struct ubifs_info *c, const union ubifs_key *key,
2199  		      int old_lnum, int old_offs, int lnum, int offs, int len)
2200  {
2201  	int found, n, err = 0;
2202  	struct ubifs_znode *znode;
2203  
2204  	mutex_lock(&c->tnc_mutex);
2205  	dbg_tnck(key, "old LEB %d:%d, new LEB %d:%d, len %d, key ", old_lnum,
2206  		 old_offs, lnum, offs, len);
2207  	found = lookup_level0_dirty(c, key, &znode, &n);
2208  	if (found < 0) {
2209  		err = found;
2210  		goto out_unlock;
2211  	}
2212  
2213  	if (found == 1) {
2214  		struct ubifs_zbranch *zbr = &znode->zbranch[n];
2215  
2216  		found = 0;
2217  		if (zbr->lnum == old_lnum && zbr->offs == old_offs) {
2218  			lnc_free(zbr);
2219  			err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2220  			if (err)
2221  				goto out_unlock;
2222  			zbr->lnum = lnum;
2223  			zbr->offs = offs;
2224  			zbr->len = len;
2225  			found = 1;
2226  		} else if (is_hash_key(c, key)) {
2227  			found = resolve_collision_directly(c, key, &znode, &n,
2228  							   old_lnum, old_offs);
2229  			dbg_tnc("rc returned %d, znode %p, n %d, LEB %d:%d",
2230  				found, znode, n, old_lnum, old_offs);
2231  			if (found < 0) {
2232  				err = found;
2233  				goto out_unlock;
2234  			}
2235  
2236  			if (found) {
2237  				/* Ensure the znode is dirtied */
2238  				if (znode->cnext || !ubifs_zn_dirty(znode)) {
2239  					znode = dirty_cow_bottom_up(c, znode);
2240  					if (IS_ERR(znode)) {
2241  						err = PTR_ERR(znode);
2242  						goto out_unlock;
2243  					}
2244  				}
2245  				zbr = &znode->zbranch[n];
2246  				lnc_free(zbr);
2247  				err = ubifs_add_dirt(c, zbr->lnum,
2248  						     zbr->len);
2249  				if (err)
2250  					goto out_unlock;
2251  				zbr->lnum = lnum;
2252  				zbr->offs = offs;
2253  				zbr->len = len;
2254  			}
2255  		}
2256  	}
2257  
2258  	if (!found)
2259  		err = ubifs_add_dirt(c, lnum, len);
2260  
2261  	if (!err)
2262  		err = dbg_check_tnc(c, 0);
2263  
2264  out_unlock:
2265  	mutex_unlock(&c->tnc_mutex);
2266  	return err;
2267  }
2268  
2269  /**
2270   * ubifs_tnc_add_nm - add a "hashed" node to TNC.
2271   * @c: UBIFS file-system description object
2272   * @key: key to add
2273   * @lnum: LEB number of node
2274   * @offs: node offset
2275   * @len: node length
2276   * @nm: node name
2277   *
2278   * This is the same as 'ubifs_tnc_add()' but it should be used with keys which
2279   * may have collisions, like directory entry keys.
2280   */
2281  int ubifs_tnc_add_nm(struct ubifs_info *c, const union ubifs_key *key,
2282  		     int lnum, int offs, int len, const struct qstr *nm)
2283  {
2284  	int found, n, err = 0;
2285  	struct ubifs_znode *znode;
2286  
2287  	mutex_lock(&c->tnc_mutex);
2288  	dbg_tnck(key, "LEB %d:%d, name '%.*s', key ",
2289  		 lnum, offs, nm->len, nm->name);
2290  	found = lookup_level0_dirty(c, key, &znode, &n);
2291  	if (found < 0) {
2292  		err = found;
2293  		goto out_unlock;
2294  	}
2295  
2296  	if (found == 1) {
2297  		if (c->replaying)
2298  			found = fallible_resolve_collision(c, key, &znode, &n,
2299  							   nm, 1);
2300  		else
2301  			found = resolve_collision(c, key, &znode, &n, nm);
2302  		dbg_tnc("rc returned %d, znode %p, n %d", found, znode, n);
2303  		if (found < 0) {
2304  			err = found;
2305  			goto out_unlock;
2306  		}
2307  
2308  		/* Ensure the znode is dirtied */
2309  		if (znode->cnext || !ubifs_zn_dirty(znode)) {
2310  			znode = dirty_cow_bottom_up(c, znode);
2311  			if (IS_ERR(znode)) {
2312  				err = PTR_ERR(znode);
2313  				goto out_unlock;
2314  			}
2315  		}
2316  
2317  		if (found == 1) {
2318  			struct ubifs_zbranch *zbr = &znode->zbranch[n];
2319  
2320  			lnc_free(zbr);
2321  			err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2322  			zbr->lnum = lnum;
2323  			zbr->offs = offs;
2324  			zbr->len = len;
2325  			goto out_unlock;
2326  		}
2327  	}
2328  
2329  	if (!found) {
2330  		struct ubifs_zbranch zbr;
2331  
2332  		zbr.znode = NULL;
2333  		zbr.lnum = lnum;
2334  		zbr.offs = offs;
2335  		zbr.len = len;
2336  		key_copy(c, key, &zbr.key);
2337  		err = tnc_insert(c, znode, &zbr, n + 1);
2338  		if (err)
2339  			goto out_unlock;
2340  		if (c->replaying) {
2341  			/*
2342  			 * We did not find it in the index so there may be a
2343  			 * dangling branch still in the index. So we remove it
2344  			 * by passing 'ubifs_tnc_remove_nm()' the same key but
2345  			 * an unmatchable name.
2346  			 */
2347  			struct qstr noname = { .name = "" };
2348  
2349  			err = dbg_check_tnc(c, 0);
2350  			mutex_unlock(&c->tnc_mutex);
2351  			if (err)
2352  				return err;
2353  			return ubifs_tnc_remove_nm(c, key, &noname);
2354  		}
2355  	}
2356  
2357  out_unlock:
2358  	if (!err)
2359  		err = dbg_check_tnc(c, 0);
2360  	mutex_unlock(&c->tnc_mutex);
2361  	return err;
2362  }
2363  
2364  /**
2365   * tnc_delete - delete a znode form TNC.
2366   * @c: UBIFS file-system description object
2367   * @znode: znode to delete from
2368   * @n: zbranch slot number to delete
2369   *
2370   * This function deletes a leaf node from @n-th slot of @znode. Returns zero in
2371   * case of success and a negative error code in case of failure.
2372   */
2373  static int tnc_delete(struct ubifs_info *c, struct ubifs_znode *znode, int n)
2374  {
2375  	struct ubifs_zbranch *zbr;
2376  	struct ubifs_znode *zp;
2377  	int i, err;
2378  
2379  	/* Delete without merge for now */
2380  	ubifs_assert(znode->level == 0);
2381  	ubifs_assert(n >= 0 && n < c->fanout);
2382  	dbg_tnck(&znode->zbranch[n].key, "deleting key ");
2383  
2384  	zbr = &znode->zbranch[n];
2385  	lnc_free(zbr);
2386  
2387  	err = ubifs_add_dirt(c, zbr->lnum, zbr->len);
2388  	if (err) {
2389  		ubifs_dump_znode(c, znode);
2390  		return err;
2391  	}
2392  
2393  	/* We do not "gap" zbranch slots */
2394  	for (i = n; i < znode->child_cnt - 1; i++)
2395  		znode->zbranch[i] = znode->zbranch[i + 1];
2396  	znode->child_cnt -= 1;
2397  
2398  	if (znode->child_cnt > 0)
2399  		return 0;
2400  
2401  	/*
2402  	 * This was the last zbranch, we have to delete this znode from the
2403  	 * parent.
2404  	 */
2405  
2406  	do {
2407  		ubifs_assert(!ubifs_zn_obsolete(znode));
2408  		ubifs_assert(ubifs_zn_dirty(znode));
2409  
2410  		zp = znode->parent;
2411  		n = znode->iip;
2412  
2413  		atomic_long_dec(&c->dirty_zn_cnt);
2414  
2415  		err = insert_old_idx_znode(c, znode);
2416  		if (err)
2417  			return err;
2418  
2419  		if (znode->cnext) {
2420  			__set_bit(OBSOLETE_ZNODE, &znode->flags);
2421  			atomic_long_inc(&c->clean_zn_cnt);
2422  			atomic_long_inc(&ubifs_clean_zn_cnt);
2423  		} else
2424  			kfree(znode);
2425  		znode = zp;
2426  	} while (znode->child_cnt == 1); /* while removing last child */
2427  
2428  	/* Remove from znode, entry n - 1 */
2429  	znode->child_cnt -= 1;
2430  	ubifs_assert(znode->level != 0);
2431  	for (i = n; i < znode->child_cnt; i++) {
2432  		znode->zbranch[i] = znode->zbranch[i + 1];
2433  		if (znode->zbranch[i].znode)
2434  			znode->zbranch[i].znode->iip = i;
2435  	}
2436  
2437  	/*
2438  	 * If this is the root and it has only 1 child then
2439  	 * collapse the tree.
2440  	 */
2441  	if (!znode->parent) {
2442  		while (znode->child_cnt == 1 && znode->level != 0) {
2443  			zp = znode;
2444  			zbr = &znode->zbranch[0];
2445  			znode = get_znode(c, znode, 0);
2446  			if (IS_ERR(znode))
2447  				return PTR_ERR(znode);
2448  			znode = dirty_cow_znode(c, zbr);
2449  			if (IS_ERR(znode))
2450  				return PTR_ERR(znode);
2451  			znode->parent = NULL;
2452  			znode->iip = 0;
2453  			if (c->zroot.len) {
2454  				err = insert_old_idx(c, c->zroot.lnum,
2455  						     c->zroot.offs);
2456  				if (err)
2457  					return err;
2458  			}
2459  			c->zroot.lnum = zbr->lnum;
2460  			c->zroot.offs = zbr->offs;
2461  			c->zroot.len = zbr->len;
2462  			c->zroot.znode = znode;
2463  			ubifs_assert(!ubifs_zn_obsolete(zp));
2464  			ubifs_assert(ubifs_zn_dirty(zp));
2465  			atomic_long_dec(&c->dirty_zn_cnt);
2466  
2467  			if (zp->cnext) {
2468  				__set_bit(OBSOLETE_ZNODE, &zp->flags);
2469  				atomic_long_inc(&c->clean_zn_cnt);
2470  				atomic_long_inc(&ubifs_clean_zn_cnt);
2471  			} else
2472  				kfree(zp);
2473  		}
2474  	}
2475  
2476  	return 0;
2477  }
2478  
2479  /**
2480   * ubifs_tnc_remove - remove an index entry of a node.
2481   * @c: UBIFS file-system description object
2482   * @key: key of node
2483   *
2484   * Returns %0 on success or negative error code on failure.
2485   */
2486  int ubifs_tnc_remove(struct ubifs_info *c, const union ubifs_key *key)
2487  {
2488  	int found, n, err = 0;
2489  	struct ubifs_znode *znode;
2490  
2491  	mutex_lock(&c->tnc_mutex);
2492  	dbg_tnck(key, "key ");
2493  	found = lookup_level0_dirty(c, key, &znode, &n);
2494  	if (found < 0) {
2495  		err = found;
2496  		goto out_unlock;
2497  	}
2498  	if (found == 1)
2499  		err = tnc_delete(c, znode, n);
2500  	if (!err)
2501  		err = dbg_check_tnc(c, 0);
2502  
2503  out_unlock:
2504  	mutex_unlock(&c->tnc_mutex);
2505  	return err;
2506  }
2507  
2508  /**
2509   * ubifs_tnc_remove_nm - remove an index entry for a "hashed" node.
2510   * @c: UBIFS file-system description object
2511   * @key: key of node
2512   * @nm: directory entry name
2513   *
2514   * Returns %0 on success or negative error code on failure.
2515   */
2516  int ubifs_tnc_remove_nm(struct ubifs_info *c, const union ubifs_key *key,
2517  			const struct qstr *nm)
2518  {
2519  	int n, err;
2520  	struct ubifs_znode *znode;
2521  
2522  	mutex_lock(&c->tnc_mutex);
2523  	dbg_tnck(key, "%.*s, key ", nm->len, nm->name);
2524  	err = lookup_level0_dirty(c, key, &znode, &n);
2525  	if (err < 0)
2526  		goto out_unlock;
2527  
2528  	if (err) {
2529  		if (c->replaying)
2530  			err = fallible_resolve_collision(c, key, &znode, &n,
2531  							 nm, 0);
2532  		else
2533  			err = resolve_collision(c, key, &znode, &n, nm);
2534  		dbg_tnc("rc returned %d, znode %p, n %d", err, znode, n);
2535  		if (err < 0)
2536  			goto out_unlock;
2537  		if (err) {
2538  			/* Ensure the znode is dirtied */
2539  			if (znode->cnext || !ubifs_zn_dirty(znode)) {
2540  				znode = dirty_cow_bottom_up(c, znode);
2541  				if (IS_ERR(znode)) {
2542  					err = PTR_ERR(znode);
2543  					goto out_unlock;
2544  				}
2545  			}
2546  			err = tnc_delete(c, znode, n);
2547  		}
2548  	}
2549  
2550  out_unlock:
2551  	if (!err)
2552  		err = dbg_check_tnc(c, 0);
2553  	mutex_unlock(&c->tnc_mutex);
2554  	return err;
2555  }
2556  
2557  /**
2558   * key_in_range - determine if a key falls within a range of keys.
2559   * @c: UBIFS file-system description object
2560   * @key: key to check
2561   * @from_key: lowest key in range
2562   * @to_key: highest key in range
2563   *
2564   * This function returns %1 if the key is in range and %0 otherwise.
2565   */
2566  static int key_in_range(struct ubifs_info *c, union ubifs_key *key,
2567  			union ubifs_key *from_key, union ubifs_key *to_key)
2568  {
2569  	if (keys_cmp(c, key, from_key) < 0)
2570  		return 0;
2571  	if (keys_cmp(c, key, to_key) > 0)
2572  		return 0;
2573  	return 1;
2574  }
2575  
2576  /**
2577   * ubifs_tnc_remove_range - remove index entries in range.
2578   * @c: UBIFS file-system description object
2579   * @from_key: lowest key to remove
2580   * @to_key: highest key to remove
2581   *
2582   * This function removes index entries starting at @from_key and ending at
2583   * @to_key.  This function returns zero in case of success and a negative error
2584   * code in case of failure.
2585   */
2586  int ubifs_tnc_remove_range(struct ubifs_info *c, union ubifs_key *from_key,
2587  			   union ubifs_key *to_key)
2588  {
2589  	int i, n, k, err = 0;
2590  	struct ubifs_znode *znode;
2591  	union ubifs_key *key;
2592  
2593  	mutex_lock(&c->tnc_mutex);
2594  	while (1) {
2595  		/* Find first level 0 znode that contains keys to remove */
2596  		err = ubifs_lookup_level0(c, from_key, &znode, &n);
2597  		if (err < 0)
2598  			goto out_unlock;
2599  
2600  		if (err)
2601  			key = from_key;
2602  		else {
2603  			err = tnc_next(c, &znode, &n);
2604  			if (err == -ENOENT) {
2605  				err = 0;
2606  				goto out_unlock;
2607  			}
2608  			if (err < 0)
2609  				goto out_unlock;
2610  			key = &znode->zbranch[n].key;
2611  			if (!key_in_range(c, key, from_key, to_key)) {
2612  				err = 0;
2613  				goto out_unlock;
2614  			}
2615  		}
2616  
2617  		/* Ensure the znode is dirtied */
2618  		if (znode->cnext || !ubifs_zn_dirty(znode)) {
2619  			znode = dirty_cow_bottom_up(c, znode);
2620  			if (IS_ERR(znode)) {
2621  				err = PTR_ERR(znode);
2622  				goto out_unlock;
2623  			}
2624  		}
2625  
2626  		/* Remove all keys in range except the first */
2627  		for (i = n + 1, k = 0; i < znode->child_cnt; i++, k++) {
2628  			key = &znode->zbranch[i].key;
2629  			if (!key_in_range(c, key, from_key, to_key))
2630  				break;
2631  			lnc_free(&znode->zbranch[i]);
2632  			err = ubifs_add_dirt(c, znode->zbranch[i].lnum,
2633  					     znode->zbranch[i].len);
2634  			if (err) {
2635  				ubifs_dump_znode(c, znode);
2636  				goto out_unlock;
2637  			}
2638  			dbg_tnck(key, "removing key ");
2639  		}
2640  		if (k) {
2641  			for (i = n + 1 + k; i < znode->child_cnt; i++)
2642  				znode->zbranch[i - k] = znode->zbranch[i];
2643  			znode->child_cnt -= k;
2644  		}
2645  
2646  		/* Now delete the first */
2647  		err = tnc_delete(c, znode, n);
2648  		if (err)
2649  			goto out_unlock;
2650  	}
2651  
2652  out_unlock:
2653  	if (!err)
2654  		err = dbg_check_tnc(c, 0);
2655  	mutex_unlock(&c->tnc_mutex);
2656  	return err;
2657  }
2658  
2659  /**
2660   * ubifs_tnc_remove_ino - remove an inode from TNC.
2661   * @c: UBIFS file-system description object
2662   * @inum: inode number to remove
2663   *
2664   * This function remove inode @inum and all the extended attributes associated
2665   * with the anode from TNC and returns zero in case of success or a negative
2666   * error code in case of failure.
2667   */
2668  int ubifs_tnc_remove_ino(struct ubifs_info *c, ino_t inum)
2669  {
2670  	union ubifs_key key1, key2;
2671  	struct ubifs_dent_node *xent, *pxent = NULL;
2672  	struct qstr nm = { .name = NULL };
2673  
2674  	dbg_tnc("ino %lu", (unsigned long)inum);
2675  
2676  	/*
2677  	 * Walk all extended attribute entries and remove them together with
2678  	 * corresponding extended attribute inodes.
2679  	 */
2680  	lowest_xent_key(c, &key1, inum);
2681  	while (1) {
2682  		ino_t xattr_inum;
2683  		int err;
2684  
2685  		xent = ubifs_tnc_next_ent(c, &key1, &nm);
2686  		if (IS_ERR(xent)) {
2687  			err = PTR_ERR(xent);
2688  			if (err == -ENOENT)
2689  				break;
2690  			return err;
2691  		}
2692  
2693  		xattr_inum = le64_to_cpu(xent->inum);
2694  		dbg_tnc("xent '%s', ino %lu", xent->name,
2695  			(unsigned long)xattr_inum);
2696  
2697  		nm.name = xent->name;
2698  		nm.len = le16_to_cpu(xent->nlen);
2699  		err = ubifs_tnc_remove_nm(c, &key1, &nm);
2700  		if (err) {
2701  			kfree(xent);
2702  			return err;
2703  		}
2704  
2705  		lowest_ino_key(c, &key1, xattr_inum);
2706  		highest_ino_key(c, &key2, xattr_inum);
2707  		err = ubifs_tnc_remove_range(c, &key1, &key2);
2708  		if (err) {
2709  			kfree(xent);
2710  			return err;
2711  		}
2712  
2713  		kfree(pxent);
2714  		pxent = xent;
2715  		key_read(c, &xent->key, &key1);
2716  	}
2717  
2718  	kfree(pxent);
2719  	lowest_ino_key(c, &key1, inum);
2720  	highest_ino_key(c, &key2, inum);
2721  
2722  	return ubifs_tnc_remove_range(c, &key1, &key2);
2723  }
2724  
2725  /**
2726   * ubifs_tnc_next_ent - walk directory or extended attribute entries.
2727   * @c: UBIFS file-system description object
2728   * @key: key of last entry
2729   * @nm: name of last entry found or %NULL
2730   *
2731   * This function finds and reads the next directory or extended attribute entry
2732   * after the given key (@key) if there is one. @nm is used to resolve
2733   * collisions.
2734   *
2735   * If the name of the current entry is not known and only the key is known,
2736   * @nm->name has to be %NULL. In this case the semantics of this function is a
2737   * little bit different and it returns the entry corresponding to this key, not
2738   * the next one. If the key was not found, the closest "right" entry is
2739   * returned.
2740   *
2741   * If the fist entry has to be found, @key has to contain the lowest possible
2742   * key value for this inode and @name has to be %NULL.
2743   *
2744   * This function returns the found directory or extended attribute entry node
2745   * in case of success, %-ENOENT is returned if no entry was found, and a
2746   * negative error code is returned in case of failure.
2747   */
2748  struct ubifs_dent_node *ubifs_tnc_next_ent(struct ubifs_info *c,
2749  					   union ubifs_key *key,
2750  					   const struct qstr *nm)
2751  {
2752  	int n, err, type = key_type(c, key);
2753  	struct ubifs_znode *znode;
2754  	struct ubifs_dent_node *dent;
2755  	struct ubifs_zbranch *zbr;
2756  	union ubifs_key *dkey;
2757  
2758  	dbg_tnck(key, "%s ", nm->name ? (char *)nm->name : "(lowest)");
2759  	ubifs_assert(is_hash_key(c, key));
2760  
2761  	mutex_lock(&c->tnc_mutex);
2762  	err = ubifs_lookup_level0(c, key, &znode, &n);
2763  	if (unlikely(err < 0))
2764  		goto out_unlock;
2765  
2766  	if (nm->name) {
2767  		if (err) {
2768  			/* Handle collisions */
2769  			err = resolve_collision(c, key, &znode, &n, nm);
2770  			dbg_tnc("rc returned %d, znode %p, n %d",
2771  				err, znode, n);
2772  			if (unlikely(err < 0))
2773  				goto out_unlock;
2774  		}
2775  
2776  		/* Now find next entry */
2777  		err = tnc_next(c, &znode, &n);
2778  		if (unlikely(err))
2779  			goto out_unlock;
2780  	} else {
2781  		/*
2782  		 * The full name of the entry was not given, in which case the
2783  		 * behavior of this function is a little different and it
2784  		 * returns current entry, not the next one.
2785  		 */
2786  		if (!err) {
2787  			/*
2788  			 * However, the given key does not exist in the TNC
2789  			 * tree and @znode/@n variables contain the closest
2790  			 * "preceding" element. Switch to the next one.
2791  			 */
2792  			err = tnc_next(c, &znode, &n);
2793  			if (err)
2794  				goto out_unlock;
2795  		}
2796  	}
2797  
2798  	zbr = &znode->zbranch[n];
2799  	dent = kmalloc(zbr->len, GFP_NOFS);
2800  	if (unlikely(!dent)) {
2801  		err = -ENOMEM;
2802  		goto out_unlock;
2803  	}
2804  
2805  	/*
2806  	 * The above 'tnc_next()' call could lead us to the next inode, check
2807  	 * this.
2808  	 */
2809  	dkey = &zbr->key;
2810  	if (key_inum(c, dkey) != key_inum(c, key) ||
2811  	    key_type(c, dkey) != type) {
2812  		err = -ENOENT;
2813  		goto out_free;
2814  	}
2815  
2816  	err = tnc_read_node_nm(c, zbr, dent);
2817  	if (unlikely(err))
2818  		goto out_free;
2819  
2820  	mutex_unlock(&c->tnc_mutex);
2821  	return dent;
2822  
2823  out_free:
2824  	kfree(dent);
2825  out_unlock:
2826  	mutex_unlock(&c->tnc_mutex);
2827  	return ERR_PTR(err);
2828  }
2829  
2830  /**
2831   * tnc_destroy_cnext - destroy left-over obsolete znodes from a failed commit.
2832   * @c: UBIFS file-system description object
2833   *
2834   * Destroy left-over obsolete znodes from a failed commit.
2835   */
2836  static void tnc_destroy_cnext(struct ubifs_info *c)
2837  {
2838  	struct ubifs_znode *cnext;
2839  
2840  	if (!c->cnext)
2841  		return;
2842  	ubifs_assert(c->cmt_state == COMMIT_BROKEN);
2843  	cnext = c->cnext;
2844  	do {
2845  		struct ubifs_znode *znode = cnext;
2846  
2847  		cnext = cnext->cnext;
2848  		if (ubifs_zn_obsolete(znode))
2849  			kfree(znode);
2850  	} while (cnext && cnext != c->cnext);
2851  }
2852  
2853  /**
2854   * ubifs_tnc_close - close TNC subsystem and free all related resources.
2855   * @c: UBIFS file-system description object
2856   */
2857  void ubifs_tnc_close(struct ubifs_info *c)
2858  {
2859  	tnc_destroy_cnext(c);
2860  	if (c->zroot.znode) {
2861  		long n, freed;
2862  
2863  		n = atomic_long_read(&c->clean_zn_cnt);
2864  		freed = ubifs_destroy_tnc_subtree(c->zroot.znode);
2865  		ubifs_assert(freed == n);
2866  		atomic_long_sub(n, &ubifs_clean_zn_cnt);
2867  	}
2868  	kfree(c->gap_lebs);
2869  	kfree(c->ilebs);
2870  	destroy_old_idx(c);
2871  }
2872  
2873  /**
2874   * left_znode - get the znode to the left.
2875   * @c: UBIFS file-system description object
2876   * @znode: znode
2877   *
2878   * This function returns a pointer to the znode to the left of @znode or NULL if
2879   * there is not one. A negative error code is returned on failure.
2880   */
2881  static struct ubifs_znode *left_znode(struct ubifs_info *c,
2882  				      struct ubifs_znode *znode)
2883  {
2884  	int level = znode->level;
2885  
2886  	while (1) {
2887  		int n = znode->iip - 1;
2888  
2889  		/* Go up until we can go left */
2890  		znode = znode->parent;
2891  		if (!znode)
2892  			return NULL;
2893  		if (n >= 0) {
2894  			/* Now go down the rightmost branch to 'level' */
2895  			znode = get_znode(c, znode, n);
2896  			if (IS_ERR(znode))
2897  				return znode;
2898  			while (znode->level != level) {
2899  				n = znode->child_cnt - 1;
2900  				znode = get_znode(c, znode, n);
2901  				if (IS_ERR(znode))
2902  					return znode;
2903  			}
2904  			break;
2905  		}
2906  	}
2907  	return znode;
2908  }
2909  
2910  /**
2911   * right_znode - get the znode to the right.
2912   * @c: UBIFS file-system description object
2913   * @znode: znode
2914   *
2915   * This function returns a pointer to the znode to the right of @znode or NULL
2916   * if there is not one. A negative error code is returned on failure.
2917   */
2918  static struct ubifs_znode *right_znode(struct ubifs_info *c,
2919  				       struct ubifs_znode *znode)
2920  {
2921  	int level = znode->level;
2922  
2923  	while (1) {
2924  		int n = znode->iip + 1;
2925  
2926  		/* Go up until we can go right */
2927  		znode = znode->parent;
2928  		if (!znode)
2929  			return NULL;
2930  		if (n < znode->child_cnt) {
2931  			/* Now go down the leftmost branch to 'level' */
2932  			znode = get_znode(c, znode, n);
2933  			if (IS_ERR(znode))
2934  				return znode;
2935  			while (znode->level != level) {
2936  				znode = get_znode(c, znode, 0);
2937  				if (IS_ERR(znode))
2938  					return znode;
2939  			}
2940  			break;
2941  		}
2942  	}
2943  	return znode;
2944  }
2945  
2946  /**
2947   * lookup_znode - find a particular indexing node from TNC.
2948   * @c: UBIFS file-system description object
2949   * @key: index node key to lookup
2950   * @level: index node level
2951   * @lnum: index node LEB number
2952   * @offs: index node offset
2953   *
2954   * This function searches an indexing node by its first key @key and its
2955   * address @lnum:@offs. It looks up the indexing tree by pulling all indexing
2956   * nodes it traverses to TNC. This function is called for indexing nodes which
2957   * were found on the media by scanning, for example when garbage-collecting or
2958   * when doing in-the-gaps commit. This means that the indexing node which is
2959   * looked for does not have to have exactly the same leftmost key @key, because
2960   * the leftmost key may have been changed, in which case TNC will contain a
2961   * dirty znode which still refers the same @lnum:@offs. This function is clever
2962   * enough to recognize such indexing nodes.
2963   *
2964   * Note, if a znode was deleted or changed too much, then this function will
2965   * not find it. For situations like this UBIFS has the old index RB-tree
2966   * (indexed by @lnum:@offs).
2967   *
2968   * This function returns a pointer to the znode found or %NULL if it is not
2969   * found. A negative error code is returned on failure.
2970   */
2971  static struct ubifs_znode *lookup_znode(struct ubifs_info *c,
2972  					union ubifs_key *key, int level,
2973  					int lnum, int offs)
2974  {
2975  	struct ubifs_znode *znode, *zn;
2976  	int n, nn;
2977  
2978  	ubifs_assert(key_type(c, key) < UBIFS_INVALID_KEY);
2979  
2980  	/*
2981  	 * The arguments have probably been read off flash, so don't assume
2982  	 * they are valid.
2983  	 */
2984  	if (level < 0)
2985  		return ERR_PTR(-EINVAL);
2986  
2987  	/* Get the root znode */
2988  	znode = c->zroot.znode;
2989  	if (!znode) {
2990  		znode = ubifs_load_znode(c, &c->zroot, NULL, 0);
2991  		if (IS_ERR(znode))
2992  			return znode;
2993  	}
2994  	/* Check if it is the one we are looking for */
2995  	if (c->zroot.lnum == lnum && c->zroot.offs == offs)
2996  		return znode;
2997  	/* Descend to the parent level i.e. (level + 1) */
2998  	if (level >= znode->level)
2999  		return NULL;
3000  	while (1) {
3001  		ubifs_search_zbranch(c, znode, key, &n);
3002  		if (n < 0) {
3003  			/*
3004  			 * We reached a znode where the leftmost key is greater
3005  			 * than the key we are searching for. This is the same
3006  			 * situation as the one described in a huge comment at
3007  			 * the end of the 'ubifs_lookup_level0()' function. And
3008  			 * for exactly the same reasons we have to try to look
3009  			 * left before giving up.
3010  			 */
3011  			znode = left_znode(c, znode);
3012  			if (!znode)
3013  				return NULL;
3014  			if (IS_ERR(znode))
3015  				return znode;
3016  			ubifs_search_zbranch(c, znode, key, &n);
3017  			ubifs_assert(n >= 0);
3018  		}
3019  		if (znode->level == level + 1)
3020  			break;
3021  		znode = get_znode(c, znode, n);
3022  		if (IS_ERR(znode))
3023  			return znode;
3024  	}
3025  	/* Check if the child is the one we are looking for */
3026  	if (znode->zbranch[n].lnum == lnum && znode->zbranch[n].offs == offs)
3027  		return get_znode(c, znode, n);
3028  	/* If the key is unique, there is nowhere else to look */
3029  	if (!is_hash_key(c, key))
3030  		return NULL;
3031  	/*
3032  	 * The key is not unique and so may be also in the znodes to either
3033  	 * side.
3034  	 */
3035  	zn = znode;
3036  	nn = n;
3037  	/* Look left */
3038  	while (1) {
3039  		/* Move one branch to the left */
3040  		if (n)
3041  			n -= 1;
3042  		else {
3043  			znode = left_znode(c, znode);
3044  			if (!znode)
3045  				break;
3046  			if (IS_ERR(znode))
3047  				return znode;
3048  			n = znode->child_cnt - 1;
3049  		}
3050  		/* Check it */
3051  		if (znode->zbranch[n].lnum == lnum &&
3052  		    znode->zbranch[n].offs == offs)
3053  			return get_znode(c, znode, n);
3054  		/* Stop if the key is less than the one we are looking for */
3055  		if (keys_cmp(c, &znode->zbranch[n].key, key) < 0)
3056  			break;
3057  	}
3058  	/* Back to the middle */
3059  	znode = zn;
3060  	n = nn;
3061  	/* Look right */
3062  	while (1) {
3063  		/* Move one branch to the right */
3064  		if (++n >= znode->child_cnt) {
3065  			znode = right_znode(c, znode);
3066  			if (!znode)
3067  				break;
3068  			if (IS_ERR(znode))
3069  				return znode;
3070  			n = 0;
3071  		}
3072  		/* Check it */
3073  		if (znode->zbranch[n].lnum == lnum &&
3074  		    znode->zbranch[n].offs == offs)
3075  			return get_znode(c, znode, n);
3076  		/* Stop if the key is greater than the one we are looking for */
3077  		if (keys_cmp(c, &znode->zbranch[n].key, key) > 0)
3078  			break;
3079  	}
3080  	return NULL;
3081  }
3082  
3083  /**
3084   * is_idx_node_in_tnc - determine if an index node is in the TNC.
3085   * @c: UBIFS file-system description object
3086   * @key: key of index node
3087   * @level: index node level
3088   * @lnum: LEB number of index node
3089   * @offs: offset of index node
3090   *
3091   * This function returns %0 if the index node is not referred to in the TNC, %1
3092   * if the index node is referred to in the TNC and the corresponding znode is
3093   * dirty, %2 if an index node is referred to in the TNC and the corresponding
3094   * znode is clean, and a negative error code in case of failure.
3095   *
3096   * Note, the @key argument has to be the key of the first child. Also note,
3097   * this function relies on the fact that 0:0 is never a valid LEB number and
3098   * offset for a main-area node.
3099   */
3100  int is_idx_node_in_tnc(struct ubifs_info *c, union ubifs_key *key, int level,
3101  		       int lnum, int offs)
3102  {
3103  	struct ubifs_znode *znode;
3104  
3105  	znode = lookup_znode(c, key, level, lnum, offs);
3106  	if (!znode)
3107  		return 0;
3108  	if (IS_ERR(znode))
3109  		return PTR_ERR(znode);
3110  
3111  	return ubifs_zn_dirty(znode) ? 1 : 2;
3112  }
3113  
3114  /**
3115   * is_leaf_node_in_tnc - determine if a non-indexing not is in the TNC.
3116   * @c: UBIFS file-system description object
3117   * @key: node key
3118   * @lnum: node LEB number
3119   * @offs: node offset
3120   *
3121   * This function returns %1 if the node is referred to in the TNC, %0 if it is
3122   * not, and a negative error code in case of failure.
3123   *
3124   * Note, this function relies on the fact that 0:0 is never a valid LEB number
3125   * and offset for a main-area node.
3126   */
3127  static int is_leaf_node_in_tnc(struct ubifs_info *c, union ubifs_key *key,
3128  			       int lnum, int offs)
3129  {
3130  	struct ubifs_zbranch *zbr;
3131  	struct ubifs_znode *znode, *zn;
3132  	int n, found, err, nn;
3133  	const int unique = !is_hash_key(c, key);
3134  
3135  	found = ubifs_lookup_level0(c, key, &znode, &n);
3136  	if (found < 0)
3137  		return found; /* Error code */
3138  	if (!found)
3139  		return 0;
3140  	zbr = &znode->zbranch[n];
3141  	if (lnum == zbr->lnum && offs == zbr->offs)
3142  		return 1; /* Found it */
3143  	if (unique)
3144  		return 0;
3145  	/*
3146  	 * Because the key is not unique, we have to look left
3147  	 * and right as well
3148  	 */
3149  	zn = znode;
3150  	nn = n;
3151  	/* Look left */
3152  	while (1) {
3153  		err = tnc_prev(c, &znode, &n);
3154  		if (err == -ENOENT)
3155  			break;
3156  		if (err)
3157  			return err;
3158  		if (keys_cmp(c, key, &znode->zbranch[n].key))
3159  			break;
3160  		zbr = &znode->zbranch[n];
3161  		if (lnum == zbr->lnum && offs == zbr->offs)
3162  			return 1; /* Found it */
3163  	}
3164  	/* Look right */
3165  	znode = zn;
3166  	n = nn;
3167  	while (1) {
3168  		err = tnc_next(c, &znode, &n);
3169  		if (err) {
3170  			if (err == -ENOENT)
3171  				return 0;
3172  			return err;
3173  		}
3174  		if (keys_cmp(c, key, &znode->zbranch[n].key))
3175  			break;
3176  		zbr = &znode->zbranch[n];
3177  		if (lnum == zbr->lnum && offs == zbr->offs)
3178  			return 1; /* Found it */
3179  	}
3180  	return 0;
3181  }
3182  
3183  /**
3184   * ubifs_tnc_has_node - determine whether a node is in the TNC.
3185   * @c: UBIFS file-system description object
3186   * @key: node key
3187   * @level: index node level (if it is an index node)
3188   * @lnum: node LEB number
3189   * @offs: node offset
3190   * @is_idx: non-zero if the node is an index node
3191   *
3192   * This function returns %1 if the node is in the TNC, %0 if it is not, and a
3193   * negative error code in case of failure. For index nodes, @key has to be the
3194   * key of the first child. An index node is considered to be in the TNC only if
3195   * the corresponding znode is clean or has not been loaded.
3196   */
3197  int ubifs_tnc_has_node(struct ubifs_info *c, union ubifs_key *key, int level,
3198  		       int lnum, int offs, int is_idx)
3199  {
3200  	int err;
3201  
3202  	mutex_lock(&c->tnc_mutex);
3203  	if (is_idx) {
3204  		err = is_idx_node_in_tnc(c, key, level, lnum, offs);
3205  		if (err < 0)
3206  			goto out_unlock;
3207  		if (err == 1)
3208  			/* The index node was found but it was dirty */
3209  			err = 0;
3210  		else if (err == 2)
3211  			/* The index node was found and it was clean */
3212  			err = 1;
3213  		else
3214  			BUG_ON(err != 0);
3215  	} else
3216  		err = is_leaf_node_in_tnc(c, key, lnum, offs);
3217  
3218  out_unlock:
3219  	mutex_unlock(&c->tnc_mutex);
3220  	return err;
3221  }
3222  
3223  /**
3224   * ubifs_dirty_idx_node - dirty an index node.
3225   * @c: UBIFS file-system description object
3226   * @key: index node key
3227   * @level: index node level
3228   * @lnum: index node LEB number
3229   * @offs: index node offset
3230   *
3231   * This function loads and dirties an index node so that it can be garbage
3232   * collected. The @key argument has to be the key of the first child. This
3233   * function relies on the fact that 0:0 is never a valid LEB number and offset
3234   * for a main-area node. Returns %0 on success and a negative error code on
3235   * failure.
3236   */
3237  int ubifs_dirty_idx_node(struct ubifs_info *c, union ubifs_key *key, int level,
3238  			 int lnum, int offs)
3239  {
3240  	struct ubifs_znode *znode;
3241  	int err = 0;
3242  
3243  	mutex_lock(&c->tnc_mutex);
3244  	znode = lookup_znode(c, key, level, lnum, offs);
3245  	if (!znode)
3246  		goto out_unlock;
3247  	if (IS_ERR(znode)) {
3248  		err = PTR_ERR(znode);
3249  		goto out_unlock;
3250  	}
3251  	znode = dirty_cow_bottom_up(c, znode);
3252  	if (IS_ERR(znode)) {
3253  		err = PTR_ERR(znode);
3254  		goto out_unlock;
3255  	}
3256  
3257  out_unlock:
3258  	mutex_unlock(&c->tnc_mutex);
3259  	return err;
3260  }
3261  
3262  /**
3263   * dbg_check_inode_size - check if inode size is correct.
3264   * @c: UBIFS file-system description object
3265   * @inum: inode number
3266   * @size: inode size
3267   *
3268   * This function makes sure that the inode size (@size) is correct and it does
3269   * not have any pages beyond @size. Returns zero if the inode is OK, %-EINVAL
3270   * if it has a data page beyond @size, and other negative error code in case of
3271   * other errors.
3272   */
3273  int dbg_check_inode_size(struct ubifs_info *c, const struct inode *inode,
3274  			 loff_t size)
3275  {
3276  	int err, n;
3277  	union ubifs_key from_key, to_key, *key;
3278  	struct ubifs_znode *znode;
3279  	unsigned int block;
3280  
3281  	if (!S_ISREG(inode->i_mode))
3282  		return 0;
3283  	if (!dbg_is_chk_gen(c))
3284  		return 0;
3285  
3286  	block = (size + UBIFS_BLOCK_SIZE - 1) >> UBIFS_BLOCK_SHIFT;
3287  	data_key_init(c, &from_key, inode->i_ino, block);
3288  	highest_data_key(c, &to_key, inode->i_ino);
3289  
3290  	mutex_lock(&c->tnc_mutex);
3291  	err = ubifs_lookup_level0(c, &from_key, &znode, &n);
3292  	if (err < 0)
3293  		goto out_unlock;
3294  
3295  	if (err) {
3296  		key = &from_key;
3297  		goto out_dump;
3298  	}
3299  
3300  	err = tnc_next(c, &znode, &n);
3301  	if (err == -ENOENT) {
3302  		err = 0;
3303  		goto out_unlock;
3304  	}
3305  	if (err < 0)
3306  		goto out_unlock;
3307  
3308  	ubifs_assert(err == 0);
3309  	key = &znode->zbranch[n].key;
3310  	if (!key_in_range(c, key, &from_key, &to_key))
3311  		goto out_unlock;
3312  
3313  out_dump:
3314  	block = key_block(c, key);
3315  	ubifs_err(c, "inode %lu has size %lld, but there are data at offset %lld",
3316  		  (unsigned long)inode->i_ino, size,
3317  		  ((loff_t)block) << UBIFS_BLOCK_SHIFT);
3318  	mutex_unlock(&c->tnc_mutex);
3319  	ubifs_dump_inode(c, inode);
3320  	dump_stack();
3321  	return -EINVAL;
3322  
3323  out_unlock:
3324  	mutex_unlock(&c->tnc_mutex);
3325  	return err;
3326  }
3327