xref: /openbmc/linux/fs/jffs2/readinode.c (revision 87c2ce3b)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23 
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30 	struct rb_node **p = &list->rb_node;
31 	struct rb_node * parent = NULL;
32 	struct jffs2_tmp_dnode_info *this;
33 
34 	while (*p) {
35 		parent = *p;
36 		this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37 
38 		/* There may actually be a collision here, but it doesn't
39 		   actually matter. As long as the two nodes with the same
40 		   version are together, it's all fine. */
41 		if (tn->version > this->version)
42 			p = &(*p)->rb_left;
43 		else
44 			p = &(*p)->rb_right;
45 	}
46 
47 	rb_link_node(&tn->rb, parent, p);
48 	rb_insert_color(&tn->rb, list);
49 }
50 
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53 	struct rb_node *this;
54 	struct jffs2_tmp_dnode_info *tn;
55 
56 	this = list->rb_node;
57 
58 	/* Now at bottom of tree */
59 	while (this) {
60 		if (this->rb_left)
61 			this = this->rb_left;
62 		else if (this->rb_right)
63 			this = this->rb_right;
64 		else {
65 			tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66 			jffs2_free_full_dnode(tn->fn);
67 			jffs2_free_tmp_dnode_info(tn);
68 
69 			this = this->rb_parent;
70 			if (!this)
71 				break;
72 
73 			if (this->rb_left == &tn->rb)
74 				this->rb_left = NULL;
75 			else if (this->rb_right == &tn->rb)
76 				this->rb_right = NULL;
77 			else BUG();
78 		}
79 	}
80 	list->rb_node = NULL;
81 }
82 
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85 	struct jffs2_full_dirent *next;
86 
87 	while (fd) {
88 		next = fd->next;
89 		jffs2_free_full_dirent(fd);
90 		fd = next;
91 	}
92 }
93 
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97 	while (ref && ref->next_in_ino) {
98 		if (!ref_obsolete(ref))
99 			return ref;
100 		dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101 		ref = ref->next_in_ino;
102 	}
103 	return NULL;
104 }
105 
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  * 	    1 if the node should be marked obsolete;
112  * 	    negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115 				struct jffs2_raw_dirent *rd, uint32_t read, struct jffs2_full_dirent **fdp,
116 				uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118 	struct jffs2_full_dirent *fd;
119 
120 	/* The direntry nodes are checked during the flash scanning */
121 	BUG_ON(ref_flags(ref) == REF_UNCHECKED);
122 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
123 	BUG_ON(ref_obsolete(ref));
124 
125 	/* Sanity check */
126 	if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
127 		JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
128 		       ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
129 		return 1;
130 	}
131 
132 	fd = jffs2_alloc_full_dirent(rd->nsize + 1);
133 	if (unlikely(!fd))
134 		return -ENOMEM;
135 
136 	fd->raw = ref;
137 	fd->version = je32_to_cpu(rd->version);
138 	fd->ino = je32_to_cpu(rd->ino);
139 	fd->type = rd->type;
140 
141 	/* Pick out the mctime of the latest dirent */
142 	if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
143 		*mctime_ver = fd->version;
144 		*latest_mctime = je32_to_cpu(rd->mctime);
145 	}
146 
147 	/*
148 	 * Copy as much of the name as possible from the raw
149 	 * dirent we've already read from the flash.
150 	 */
151 	if (read > sizeof(*rd))
152 		memcpy(&fd->name[0], &rd->name[0],
153 		       min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
154 
155 	/* Do we need to copy any more of the name directly from the flash? */
156 	if (rd->nsize + sizeof(*rd) > read) {
157 		/* FIXME: point() */
158 		int err;
159 		int already = read - sizeof(*rd);
160 
161 		err = jffs2_flash_read(c, (ref_offset(ref)) + read,
162 				rd->nsize - already, &read, &fd->name[already]);
163 		if (unlikely(read != rd->nsize - already) && likely(!err))
164 			return -EIO;
165 
166 		if (unlikely(err)) {
167 			JFFS2_ERROR("read remainder of name: error %d\n", err);
168 			jffs2_free_full_dirent(fd);
169 			return -EIO;
170 		}
171 	}
172 
173 	fd->nhash = full_name_hash(fd->name, rd->nsize);
174 	fd->next = NULL;
175 	fd->name[rd->nsize] = '\0';
176 
177 	/*
178 	 * Wheee. We now have a complete jffs2_full_dirent structure, with
179 	 * the name in it and everything. Link it into the list
180 	 */
181 	jffs2_add_fd_to_list(c, fd, fdp);
182 
183 	return 0;
184 }
185 
186 /*
187  * Helper function for jffs2_get_inode_nodes().
188  * It is called every time an inode node is found.
189  *
190  * Returns: 0 on succes;
191  * 	    1 if the node should be marked obsolete;
192  * 	    negative error code on failure.
193  */
194 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
195 			     struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
196 			     uint32_t *latest_mctime, uint32_t *mctime_ver)
197 {
198 	struct jffs2_tmp_dnode_info *tn;
199 	uint32_t len, csize;
200 	int ret = 1;
201 
202 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
203 	BUG_ON(ref_obsolete(ref));
204 
205 	tn = jffs2_alloc_tmp_dnode_info();
206 	if (!tn) {
207 		JFFS2_ERROR("failed to allocate tn (%d bytes).\n", sizeof(*tn));
208 		return -ENOMEM;
209 	}
210 
211 	tn->partial_crc = 0;
212 	csize = je32_to_cpu(rd->csize);
213 
214 	/* If we've never checked the CRCs on this node, check them now */
215 	if (ref_flags(ref) == REF_UNCHECKED) {
216 		uint32_t crc;
217 
218 		crc = crc32(0, rd, sizeof(*rd) - 8);
219 		if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
220 			JFFS2_NOTICE("header CRC failed on node at %#08x: read %#08x, calculated %#08x\n",
221 					ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
222 			goto free_out;
223 		}
224 
225 		/* Sanity checks */
226 		if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
227 		    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
228 				JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
229 				jffs2_dbg_dump_node(c, ref_offset(ref));
230 			goto free_out;
231 		}
232 
233 		if (jffs2_is_writebuffered(c) && csize != 0) {
234 			/* At this point we are supposed to check the data CRC
235 			 * of our unchecked node. But thus far, we do not
236 			 * know whether the node is valid or obsolete. To
237 			 * figure this out, we need to walk all the nodes of
238 			 * the inode and build the inode fragtree. We don't
239 			 * want to spend time checking data of nodes which may
240 			 * later be found to be obsolete. So we put off the full
241 			 * data CRC checking until we have read all the inode
242 			 * nodes and have started building the fragtree.
243 			 *
244 			 * The fragtree is being built starting with nodes
245 			 * having the highest version number, so we'll be able
246 			 * to detect whether a node is valid (i.e., it is not
247 			 * overlapped by a node with higher version) or not.
248 			 * And we'll be able to check only those nodes, which
249 			 * are not obsolete.
250 			 *
251 			 * Of course, this optimization only makes sense in case
252 			 * of NAND flashes (or other flashes whith
253 			 * !jffs2_can_mark_obsolete()), since on NOR flashes
254 			 * nodes are marked obsolete physically.
255 			 *
256 			 * Since NAND flashes (or other flashes with
257 			 * jffs2_is_writebuffered(c)) are anyway read by
258 			 * fractions of c->wbuf_pagesize, and we have just read
259 			 * the node header, it is likely that the starting part
260 			 * of the node data is also read when we read the
261 			 * header. So we don't mind to check the CRC of the
262 			 * starting part of the data of the node now, and check
263 			 * the second part later (in jffs2_check_node_data()).
264 			 * Of course, we will not need to re-read and re-check
265 			 * the NAND page which we have just read. This is why we
266 			 * read the whole NAND page at jffs2_get_inode_nodes(),
267 			 * while we needed only the node header.
268 			 */
269 			unsigned char *buf;
270 
271 			/* 'buf' will point to the start of data */
272 			buf = (unsigned char *)rd + sizeof(*rd);
273 			/* len will be the read data length */
274 			len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
275 			tn->partial_crc = crc32(0, buf, len);
276 
277 			dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
278 
279 			/* If we actually calculated the whole data CRC
280 			 * and it is wrong, drop the node. */
281 			if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
282 				JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
283 					ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
284 				goto free_out;
285 			}
286 
287 		} else if (csize == 0) {
288 			/*
289 			 * We checked the header CRC. If the node has no data, adjust
290 			 * the space accounting now. For other nodes this will be done
291 			 * later either when the node is marked obsolete or when its
292 			 * data is checked.
293 			 */
294 			struct jffs2_eraseblock *jeb;
295 
296 			dbg_readinode("the node has no data.\n");
297 			jeb = &c->blocks[ref->flash_offset / c->sector_size];
298 			len = ref_totlen(c, jeb, ref);
299 
300 			spin_lock(&c->erase_completion_lock);
301 			jeb->used_size += len;
302 			jeb->unchecked_size -= len;
303 			c->used_size += len;
304 			c->unchecked_size -= len;
305 			ref->flash_offset = ref_offset(ref) | REF_NORMAL;
306 			spin_unlock(&c->erase_completion_lock);
307 		}
308 	}
309 
310 	tn->fn = jffs2_alloc_full_dnode();
311 	if (!tn->fn) {
312 		JFFS2_ERROR("alloc fn failed\n");
313 		ret = -ENOMEM;
314 		goto free_out;
315 	}
316 
317 	tn->version = je32_to_cpu(rd->version);
318 	tn->fn->ofs = je32_to_cpu(rd->offset);
319 	tn->data_crc = je32_to_cpu(rd->data_crc);
320 	tn->csize = csize;
321 	tn->fn->raw = ref;
322 
323 	/* There was a bug where we wrote hole nodes out with
324 	   csize/dsize swapped. Deal with it */
325 	if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
326 		tn->fn->size = csize;
327 	else // normal case...
328 		tn->fn->size = je32_to_cpu(rd->dsize);
329 
330 	dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
331 		  ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
332 
333 	jffs2_add_tn_to_tree(tn, tnp);
334 
335 	return 0;
336 
337 free_out:
338 	jffs2_free_tmp_dnode_info(tn);
339 	return ret;
340 }
341 
342 /*
343  * Helper function for jffs2_get_inode_nodes().
344  * It is called every time an unknown node is found.
345  *
346  * Returns: 0 on succes;
347  * 	    1 if the node should be marked obsolete;
348  * 	    negative error code on failure.
349  */
350 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
351 {
352 	/* We don't mark unknown nodes as REF_UNCHECKED */
353 	BUG_ON(ref_flags(ref) == REF_UNCHECKED);
354 
355 	un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
356 
357 	if (crc32(0, un, sizeof(struct jffs2_unknown_node) - 4) != je32_to_cpu(un->hdr_crc)) {
358 		/* Hmmm. This should have been caught at scan time. */
359 		JFFS2_NOTICE("node header CRC failed at %#08x. But it must have been OK earlier.\n", ref_offset(ref));
360 		jffs2_dbg_dump_node(c, ref_offset(ref));
361 		return 1;
362 	} else {
363 		switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
364 
365 		case JFFS2_FEATURE_INCOMPAT:
366 			JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
367 				je16_to_cpu(un->nodetype), ref_offset(ref));
368 			/* EEP */
369 			BUG();
370 			break;
371 
372 		case JFFS2_FEATURE_ROCOMPAT:
373 			JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
374 					je16_to_cpu(un->nodetype), ref_offset(ref));
375 			BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
376 			break;
377 
378 		case JFFS2_FEATURE_RWCOMPAT_COPY:
379 			JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
380 					je16_to_cpu(un->nodetype), ref_offset(ref));
381 			break;
382 
383 		case JFFS2_FEATURE_RWCOMPAT_DELETE:
384 			JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
385 					je16_to_cpu(un->nodetype), ref_offset(ref));
386 			return 1;
387 		}
388 	}
389 
390 	return 0;
391 }
392 
393 /*
394  * Helper function for jffs2_get_inode_nodes().
395  * The function detects whether more data should be read and reads it if yes.
396  *
397  * Returns: 0 on succes;
398  * 	    negative error code on failure.
399  */
400 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
401 		     int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
402 {
403 	int right_len, err, len;
404 	size_t retlen;
405 	uint32_t offs;
406 
407 	if (jffs2_is_writebuffered(c)) {
408 		right_len = c->wbuf_pagesize - (bufstart - buf);
409 		if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
410 			right_len += c->wbuf_pagesize;
411 	} else
412 		right_len = right_size;
413 
414 	if (*rdlen == right_len)
415 		return 0;
416 
417 	/* We need to read more data */
418 	offs = ref_offset(ref) + *rdlen;
419 	if (jffs2_is_writebuffered(c)) {
420 		bufstart = buf + c->wbuf_pagesize;
421 		len = c->wbuf_pagesize;
422 	} else {
423 		bufstart = buf + *rdlen;
424 		len = right_size - *rdlen;
425 	}
426 
427 	dbg_readinode("read more %d bytes\n", len);
428 
429 	err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
430 	if (err) {
431 		JFFS2_ERROR("can not read %d bytes from 0x%08x, "
432 			"error code: %d.\n", len, offs, err);
433 		return err;
434 	}
435 
436 	if (retlen < len) {
437 		JFFS2_ERROR("short read at %#08x: %d instead of %d.\n",
438 				offs, retlen, len);
439 		return -EIO;
440 	}
441 
442 	*rdlen = right_len;
443 
444 	return 0;
445 }
446 
447 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
448    with this ino, returning the former in order of version */
449 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
450 				 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
451 				 uint32_t *highest_version, uint32_t *latest_mctime,
452 				 uint32_t *mctime_ver)
453 {
454 	struct jffs2_raw_node_ref *ref, *valid_ref;
455 	struct rb_root ret_tn = RB_ROOT;
456 	struct jffs2_full_dirent *ret_fd = NULL;
457 	unsigned char *buf = NULL;
458 	union jffs2_node_union *node;
459 	size_t retlen;
460 	int len, err;
461 
462 	*mctime_ver = 0;
463 
464 	dbg_readinode("ino #%u\n", f->inocache->ino);
465 
466 	if (jffs2_is_writebuffered(c)) {
467 		/*
468 		 * If we have the write buffer, we assume the minimal I/O unit
469 		 * is c->wbuf_pagesize. We implement some optimizations which in
470 		 * this case and we need a temporary buffer of size =
471 		 * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
472 		 * Basically, we want to read not only the node header, but the
473 		 * whole wbuf (NAND page in case of NAND) or 2, if the node
474 		 * header overlaps the border between the 2 wbufs.
475 		 */
476 		len = 2*c->wbuf_pagesize;
477 	} else {
478 		/*
479 		 * When there is no write buffer, the size of the temporary
480 		 * buffer is the size of the larges node header.
481 		 */
482 		len = sizeof(union jffs2_node_union);
483 	}
484 
485 	/* FIXME: in case of NOR and available ->point() this
486 	 * needs to be fixed. */
487 	buf = kmalloc(len, GFP_KERNEL);
488 	if (!buf)
489 		return -ENOMEM;
490 
491 	spin_lock(&c->erase_completion_lock);
492 	valid_ref = jffs2_first_valid_node(f->inocache->nodes);
493 	if (!valid_ref && f->inocache->ino != 1)
494 		JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
495 	while (valid_ref) {
496 		unsigned char *bufstart;
497 
498 		/* We can hold a pointer to a non-obsolete node without the spinlock,
499 		   but _obsolete_ nodes may disappear at any time, if the block
500 		   they're in gets erased. So if we mark 'ref' obsolete while we're
501 		   not holding the lock, it can go away immediately. For that reason,
502 		   we find the next valid node first, before processing 'ref'.
503 		*/
504 		ref = valid_ref;
505 		valid_ref = jffs2_first_valid_node(ref->next_in_ino);
506 		spin_unlock(&c->erase_completion_lock);
507 
508 		cond_resched();
509 
510 		/*
511 		 * At this point we don't know the type of the node we're going
512 		 * to read, so we do not know the size of its header. In order
513 		 * to minimize the amount of flash IO we assume the node has
514 		 * size = JFFS2_MIN_NODE_HEADER.
515 		 */
516 		if (jffs2_is_writebuffered(c)) {
517 			/*
518 			 * We treat 'buf' as 2 adjacent wbufs. We want to
519 			 * adjust bufstart such as it points to the
520 			 * beginning of the node within this wbuf.
521 			 */
522 			bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
523 			/* We will read either one wbuf or 2 wbufs. */
524 			len = c->wbuf_pagesize - (bufstart - buf);
525 			if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
526 				/* The header spans the border of the first wbuf */
527 				len += c->wbuf_pagesize;
528 			}
529 		} else {
530 			bufstart = buf;
531 			len = JFFS2_MIN_NODE_HEADER;
532 		}
533 
534 		dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
535 
536 		/* FIXME: point() */
537 		err = jffs2_flash_read(c, ref_offset(ref), len,
538 				       &retlen, bufstart);
539 		if (err) {
540 			JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
541 			goto free_out;
542 		}
543 
544 		if (retlen < len) {
545 			JFFS2_ERROR("short read at %#08x: %d instead of %d.\n", ref_offset(ref), retlen, len);
546 			err = -EIO;
547 			goto free_out;
548 		}
549 
550 		node = (union jffs2_node_union *)bufstart;
551 
552 		switch (je16_to_cpu(node->u.nodetype)) {
553 
554 		case JFFS2_NODETYPE_DIRENT:
555 
556 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
557 				err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
558 				if (unlikely(err))
559 					goto free_out;
560 			}
561 
562 			err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
563 			if (err == 1) {
564 				jffs2_mark_node_obsolete(c, ref);
565 				break;
566 			} else if (unlikely(err))
567 				goto free_out;
568 
569 			if (je32_to_cpu(node->d.version) > *highest_version)
570 				*highest_version = je32_to_cpu(node->d.version);
571 
572 			break;
573 
574 		case JFFS2_NODETYPE_INODE:
575 
576 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
577 				err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
578 				if (unlikely(err))
579 					goto free_out;
580 			}
581 
582 			err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
583 			if (err == 1) {
584 				jffs2_mark_node_obsolete(c, ref);
585 				break;
586 			} else if (unlikely(err))
587 				goto free_out;
588 
589 			if (je32_to_cpu(node->i.version) > *highest_version)
590 				*highest_version = je32_to_cpu(node->i.version);
591 
592 			break;
593 
594 		default:
595 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
596 				err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
597 				if (unlikely(err))
598 					goto free_out;
599 			}
600 
601 			err = read_unknown(c, ref, &node->u);
602 			if (err == 1) {
603 				jffs2_mark_node_obsolete(c, ref);
604 				break;
605 			} else if (unlikely(err))
606 				goto free_out;
607 
608 		}
609 		spin_lock(&c->erase_completion_lock);
610 	}
611 
612 	spin_unlock(&c->erase_completion_lock);
613 	*tnp = ret_tn;
614 	*fdp = ret_fd;
615 	kfree(buf);
616 
617 	dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
618 			f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
619 	return 0;
620 
621  free_out:
622 	jffs2_free_tmp_dnode_info_list(&ret_tn);
623 	jffs2_free_full_dirent_list(ret_fd);
624 	kfree(buf);
625 	return err;
626 }
627 
628 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
629 					struct jffs2_inode_info *f,
630 					struct jffs2_raw_inode *latest_node)
631 {
632 	struct jffs2_tmp_dnode_info *tn;
633 	struct rb_root tn_list;
634 	struct rb_node *rb, *repl_rb;
635 	struct jffs2_full_dirent *fd_list;
636 	struct jffs2_full_dnode *fn, *first_fn = NULL;
637 	uint32_t crc;
638 	uint32_t latest_mctime, mctime_ver;
639 	size_t retlen;
640 	int ret;
641 
642 	dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
643 
644 	/* Grab all nodes relevant to this ino */
645 	ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
646 
647 	if (ret) {
648 		JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
649 		if (f->inocache->state == INO_STATE_READING)
650 			jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
651 		return ret;
652 	}
653 	f->dents = fd_list;
654 
655 	rb = rb_first(&tn_list);
656 
657 	while (rb) {
658 		cond_resched();
659 		tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
660 		fn = tn->fn;
661 		ret = 1;
662 		dbg_readinode("consider node ver %u, phys offset "
663 			"%#08x(%d), range %u-%u.\n", tn->version,
664 			ref_offset(fn->raw), ref_flags(fn->raw),
665 			fn->ofs, fn->ofs + fn->size);
666 
667 		if (fn->size) {
668 			ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
669 			/* TODO: the error code isn't checked, check it */
670 			jffs2_dbg_fragtree_paranoia_check_nolock(f);
671 			BUG_ON(ret < 0);
672 			if (!first_fn && ret == 0)
673 				first_fn = fn;
674 		} else if (!first_fn) {
675 			first_fn = fn;
676 			f->metadata = fn;
677 			ret = 0; /* Prevent freeing the metadata update node */
678 		} else
679 			jffs2_mark_node_obsolete(c, fn->raw);
680 
681 		BUG_ON(rb->rb_left);
682 		if (rb->rb_parent && rb->rb_parent->rb_left == rb) {
683 			/* We were then left-hand child of our parent. We need
684 			 * to move our own right-hand child into our place. */
685 			repl_rb = rb->rb_right;
686 			if (repl_rb)
687 				repl_rb->rb_parent = rb->rb_parent;
688 		} else
689 			repl_rb = NULL;
690 
691 		rb = rb_next(rb);
692 
693 		/* Remove the spent tn from the tree; don't bother rebalancing
694 		 * but put our right-hand child in our own place. */
695 		if (tn->rb.rb_parent) {
696 			if (tn->rb.rb_parent->rb_left == &tn->rb)
697 				tn->rb.rb_parent->rb_left = repl_rb;
698 			else if (tn->rb.rb_parent->rb_right == &tn->rb)
699 				tn->rb.rb_parent->rb_right = repl_rb;
700 			else BUG();
701 		} else if (tn->rb.rb_right)
702 			tn->rb.rb_right->rb_parent = NULL;
703 
704 		jffs2_free_tmp_dnode_info(tn);
705 		if (ret) {
706 			dbg_readinode("delete dnode %u-%u.\n",
707 				fn->ofs, fn->ofs + fn->size);
708 			jffs2_free_full_dnode(fn);
709 		}
710 	}
711 	jffs2_dbg_fragtree_paranoia_check_nolock(f);
712 
713 	BUG_ON(first_fn && ref_obsolete(first_fn->raw));
714 
715 	fn = first_fn;
716 	if (unlikely(!first_fn)) {
717 		/* No data nodes for this inode. */
718 		if (f->inocache->ino != 1) {
719 			JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
720 			if (!fd_list) {
721 				if (f->inocache->state == INO_STATE_READING)
722 					jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
723 				return -EIO;
724 			}
725 			JFFS2_NOTICE("but it has children so we fake some modes for it\n");
726 		}
727 		latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
728 		latest_node->version = cpu_to_je32(0);
729 		latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
730 		latest_node->isize = cpu_to_je32(0);
731 		latest_node->gid = cpu_to_je16(0);
732 		latest_node->uid = cpu_to_je16(0);
733 		if (f->inocache->state == INO_STATE_READING)
734 			jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
735 		return 0;
736 	}
737 
738 	ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
739 	if (ret || retlen != sizeof(*latest_node)) {
740 		JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
741 			ret, retlen, sizeof(*latest_node));
742 		/* FIXME: If this fails, there seems to be a memory leak. Find it. */
743 		up(&f->sem);
744 		jffs2_do_clear_inode(c, f);
745 		return ret?ret:-EIO;
746 	}
747 
748 	crc = crc32(0, latest_node, sizeof(*latest_node)-8);
749 	if (crc != je32_to_cpu(latest_node->node_crc)) {
750 		JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
751 			f->inocache->ino, ref_offset(fn->raw));
752 		up(&f->sem);
753 		jffs2_do_clear_inode(c, f);
754 		return -EIO;
755 	}
756 
757 	switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
758 	case S_IFDIR:
759 		if (mctime_ver > je32_to_cpu(latest_node->version)) {
760 			/* The times in the latest_node are actually older than
761 			   mctime in the latest dirent. Cheat. */
762 			latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
763 		}
764 		break;
765 
766 
767 	case S_IFREG:
768 		/* If it was a regular file, truncate it to the latest node's isize */
769 		jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
770 		break;
771 
772 	case S_IFLNK:
773 		/* Hack to work around broken isize in old symlink code.
774 		   Remove this when dwmw2 comes to his senses and stops
775 		   symlinks from being an entirely gratuitous special
776 		   case. */
777 		if (!je32_to_cpu(latest_node->isize))
778 			latest_node->isize = latest_node->dsize;
779 
780 		if (f->inocache->state != INO_STATE_CHECKING) {
781 			/* Symlink's inode data is the target path. Read it and
782 			 * keep in RAM to facilitate quick follow symlink
783 			 * operation. */
784 			f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
785 			if (!f->target) {
786 				JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
787 				up(&f->sem);
788 				jffs2_do_clear_inode(c, f);
789 				return -ENOMEM;
790 			}
791 
792 			ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
793 						je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
794 
795 			if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
796 				if (retlen != je32_to_cpu(latest_node->csize))
797 					ret = -EIO;
798 				kfree(f->target);
799 				f->target = NULL;
800 				up(&f->sem);
801 				jffs2_do_clear_inode(c, f);
802 				return -ret;
803 			}
804 
805 			f->target[je32_to_cpu(latest_node->csize)] = '\0';
806 			dbg_readinode("symlink's target '%s' cached\n", f->target);
807 		}
808 
809 		/* fall through... */
810 
811 	case S_IFBLK:
812 	case S_IFCHR:
813 		/* Certain inode types should have only one data node, and it's
814 		   kept as the metadata node */
815 		if (f->metadata) {
816 			JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
817 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
818 			up(&f->sem);
819 			jffs2_do_clear_inode(c, f);
820 			return -EIO;
821 		}
822 		if (!frag_first(&f->fragtree)) {
823 			JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
824 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
825 			up(&f->sem);
826 			jffs2_do_clear_inode(c, f);
827 			return -EIO;
828 		}
829 		/* ASSERT: f->fraglist != NULL */
830 		if (frag_next(frag_first(&f->fragtree))) {
831 			JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
832 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
833 			/* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
834 			up(&f->sem);
835 			jffs2_do_clear_inode(c, f);
836 			return -EIO;
837 		}
838 		/* OK. We're happy */
839 		f->metadata = frag_first(&f->fragtree)->node;
840 		jffs2_free_node_frag(frag_first(&f->fragtree));
841 		f->fragtree = RB_ROOT;
842 		break;
843 	}
844 	if (f->inocache->state == INO_STATE_READING)
845 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
846 
847 	return 0;
848 }
849 
850 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
851 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
852 			uint32_t ino, struct jffs2_raw_inode *latest_node)
853 {
854 	dbg_readinode("read inode #%u\n", ino);
855 
856  retry_inocache:
857 	spin_lock(&c->inocache_lock);
858 	f->inocache = jffs2_get_ino_cache(c, ino);
859 
860 	if (f->inocache) {
861 		/* Check its state. We may need to wait before we can use it */
862 		switch(f->inocache->state) {
863 		case INO_STATE_UNCHECKED:
864 		case INO_STATE_CHECKEDABSENT:
865 			f->inocache->state = INO_STATE_READING;
866 			break;
867 
868 		case INO_STATE_CHECKING:
869 		case INO_STATE_GC:
870 			/* If it's in either of these states, we need
871 			   to wait for whoever's got it to finish and
872 			   put it back. */
873 			dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
874 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
875 			goto retry_inocache;
876 
877 		case INO_STATE_READING:
878 		case INO_STATE_PRESENT:
879 			/* Eep. This should never happen. It can
880 			happen if Linux calls read_inode() again
881 			before clear_inode() has finished though. */
882 			JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
883 			/* Fail. That's probably better than allowing it to succeed */
884 			f->inocache = NULL;
885 			break;
886 
887 		default:
888 			BUG();
889 		}
890 	}
891 	spin_unlock(&c->inocache_lock);
892 
893 	if (!f->inocache && ino == 1) {
894 		/* Special case - no root inode on medium */
895 		f->inocache = jffs2_alloc_inode_cache();
896 		if (!f->inocache) {
897 			JFFS2_ERROR("cannot allocate inocache for root inode\n");
898 			return -ENOMEM;
899 		}
900 		dbg_readinode("creating inocache for root inode\n");
901 		memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
902 		f->inocache->ino = f->inocache->nlink = 1;
903 		f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
904 		f->inocache->state = INO_STATE_READING;
905 		jffs2_add_ino_cache(c, f->inocache);
906 	}
907 	if (!f->inocache) {
908 		JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
909 		return -ENOENT;
910 	}
911 
912 	return jffs2_do_read_inode_internal(c, f, latest_node);
913 }
914 
915 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
916 {
917 	struct jffs2_raw_inode n;
918 	struct jffs2_inode_info *f = kmalloc(sizeof(*f), GFP_KERNEL);
919 	int ret;
920 
921 	if (!f)
922 		return -ENOMEM;
923 
924 	memset(f, 0, sizeof(*f));
925 	init_MUTEX_LOCKED(&f->sem);
926 	f->inocache = ic;
927 
928 	ret = jffs2_do_read_inode_internal(c, f, &n);
929 	if (!ret) {
930 		up(&f->sem);
931 		jffs2_do_clear_inode(c, f);
932 	}
933 	kfree (f);
934 	return ret;
935 }
936 
937 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
938 {
939 	struct jffs2_full_dirent *fd, *fds;
940 	int deleted;
941 
942 	down(&f->sem);
943 	deleted = f->inocache && !f->inocache->nlink;
944 
945 	if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
946 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
947 
948 	if (f->metadata) {
949 		if (deleted)
950 			jffs2_mark_node_obsolete(c, f->metadata->raw);
951 		jffs2_free_full_dnode(f->metadata);
952 	}
953 
954 	jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
955 
956 	if (f->target) {
957 		kfree(f->target);
958 		f->target = NULL;
959 	}
960 
961 	fds = f->dents;
962 	while(fds) {
963 		fd = fds;
964 		fds = fd->next;
965 		jffs2_free_full_dirent(fd);
966 	}
967 
968 	if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
969 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
970 		if (f->inocache->nodes == (void *)f->inocache)
971 			jffs2_del_ino_cache(c, f->inocache);
972 	}
973 
974 	up(&f->sem);
975 }
976