xref: /openbmc/linux/fs/jffs2/readinode.c (revision b454cc66)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright (C) 2001-2003 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  * $Id: readinode.c,v 1.143 2005/11/07 11:14:41 gleixner Exp $
11  *
12  */
13 
14 #include <linux/kernel.h>
15 #include <linux/sched.h>
16 #include <linux/slab.h>
17 #include <linux/fs.h>
18 #include <linux/crc32.h>
19 #include <linux/pagemap.h>
20 #include <linux/mtd/mtd.h>
21 #include <linux/compiler.h>
22 #include "nodelist.h"
23 
24 /*
25  * Put a new tmp_dnode_info into the temporaty RB-tree, keeping the list in
26  * order of increasing version.
27  */
28 static void jffs2_add_tn_to_tree(struct jffs2_tmp_dnode_info *tn, struct rb_root *list)
29 {
30 	struct rb_node **p = &list->rb_node;
31 	struct rb_node * parent = NULL;
32 	struct jffs2_tmp_dnode_info *this;
33 
34 	while (*p) {
35 		parent = *p;
36 		this = rb_entry(parent, struct jffs2_tmp_dnode_info, rb);
37 
38 		/* There may actually be a collision here, but it doesn't
39 		   actually matter. As long as the two nodes with the same
40 		   version are together, it's all fine. */
41 		if (tn->version > this->version)
42 			p = &(*p)->rb_left;
43 		else
44 			p = &(*p)->rb_right;
45 	}
46 
47 	rb_link_node(&tn->rb, parent, p);
48 	rb_insert_color(&tn->rb, list);
49 }
50 
51 static void jffs2_free_tmp_dnode_info_list(struct rb_root *list)
52 {
53 	struct rb_node *this;
54 	struct jffs2_tmp_dnode_info *tn;
55 
56 	this = list->rb_node;
57 
58 	/* Now at bottom of tree */
59 	while (this) {
60 		if (this->rb_left)
61 			this = this->rb_left;
62 		else if (this->rb_right)
63 			this = this->rb_right;
64 		else {
65 			tn = rb_entry(this, struct jffs2_tmp_dnode_info, rb);
66 			jffs2_free_full_dnode(tn->fn);
67 			jffs2_free_tmp_dnode_info(tn);
68 
69 			this = rb_parent(this);
70 			if (!this)
71 				break;
72 
73 			if (this->rb_left == &tn->rb)
74 				this->rb_left = NULL;
75 			else if (this->rb_right == &tn->rb)
76 				this->rb_right = NULL;
77 			else BUG();
78 		}
79 	}
80 	list->rb_node = NULL;
81 }
82 
83 static void jffs2_free_full_dirent_list(struct jffs2_full_dirent *fd)
84 {
85 	struct jffs2_full_dirent *next;
86 
87 	while (fd) {
88 		next = fd->next;
89 		jffs2_free_full_dirent(fd);
90 		fd = next;
91 	}
92 }
93 
94 /* Returns first valid node after 'ref'. May return 'ref' */
95 static struct jffs2_raw_node_ref *jffs2_first_valid_node(struct jffs2_raw_node_ref *ref)
96 {
97 	while (ref && ref->next_in_ino) {
98 		if (!ref_obsolete(ref))
99 			return ref;
100 		dbg_noderef("node at 0x%08x is obsoleted. Ignoring.\n", ref_offset(ref));
101 		ref = ref->next_in_ino;
102 	}
103 	return NULL;
104 }
105 
106 /*
107  * Helper function for jffs2_get_inode_nodes().
108  * It is called every time an directory entry node is found.
109  *
110  * Returns: 0 on succes;
111  * 	    1 if the node should be marked obsolete;
112  * 	    negative error code on failure.
113  */
114 static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
115 				struct jffs2_raw_dirent *rd, size_t read, struct jffs2_full_dirent **fdp,
116 				uint32_t *latest_mctime, uint32_t *mctime_ver)
117 {
118 	struct jffs2_full_dirent *fd;
119 	uint32_t crc;
120 
121 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
122 	BUG_ON(ref_obsolete(ref));
123 
124 	crc = crc32(0, rd, sizeof(*rd) - 8);
125 	if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
126 		JFFS2_NOTICE("header CRC failed on dirent node at %#08x: read %#08x, calculated %#08x\n",
127 			     ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
128 		return 1;
129 	}
130 
131 	/* If we've never checked the CRCs on this node, check them now */
132 	if (ref_flags(ref) == REF_UNCHECKED) {
133 		struct jffs2_eraseblock *jeb;
134 		int len;
135 
136 		/* Sanity check */
137 		if (unlikely(PAD((rd->nsize + sizeof(*rd))) != PAD(je32_to_cpu(rd->totlen)))) {
138 			JFFS2_ERROR("illegal nsize in node at %#08x: nsize %#02x, totlen %#04x\n",
139 				    ref_offset(ref), rd->nsize, je32_to_cpu(rd->totlen));
140 			return 1;
141 		}
142 
143 		jeb = &c->blocks[ref->flash_offset / c->sector_size];
144 		len = ref_totlen(c, jeb, ref);
145 
146 		spin_lock(&c->erase_completion_lock);
147 		jeb->used_size += len;
148 		jeb->unchecked_size -= len;
149 		c->used_size += len;
150 		c->unchecked_size -= len;
151 		ref->flash_offset = ref_offset(ref) | REF_PRISTINE;
152 		spin_unlock(&c->erase_completion_lock);
153 	}
154 
155 	fd = jffs2_alloc_full_dirent(rd->nsize + 1);
156 	if (unlikely(!fd))
157 		return -ENOMEM;
158 
159 	fd->raw = ref;
160 	fd->version = je32_to_cpu(rd->version);
161 	fd->ino = je32_to_cpu(rd->ino);
162 	fd->type = rd->type;
163 
164 	/* Pick out the mctime of the latest dirent */
165 	if(fd->version > *mctime_ver && je32_to_cpu(rd->mctime)) {
166 		*mctime_ver = fd->version;
167 		*latest_mctime = je32_to_cpu(rd->mctime);
168 	}
169 
170 	/*
171 	 * Copy as much of the name as possible from the raw
172 	 * dirent we've already read from the flash.
173 	 */
174 	if (read > sizeof(*rd))
175 		memcpy(&fd->name[0], &rd->name[0],
176 		       min_t(uint32_t, rd->nsize, (read - sizeof(*rd)) ));
177 
178 	/* Do we need to copy any more of the name directly from the flash? */
179 	if (rd->nsize + sizeof(*rd) > read) {
180 		/* FIXME: point() */
181 		int err;
182 		int already = read - sizeof(*rd);
183 
184 		err = jffs2_flash_read(c, (ref_offset(ref)) + read,
185 				rd->nsize - already, &read, &fd->name[already]);
186 		if (unlikely(read != rd->nsize - already) && likely(!err))
187 			return -EIO;
188 
189 		if (unlikely(err)) {
190 			JFFS2_ERROR("read remainder of name: error %d\n", err);
191 			jffs2_free_full_dirent(fd);
192 			return -EIO;
193 		}
194 	}
195 
196 	fd->nhash = full_name_hash(fd->name, rd->nsize);
197 	fd->next = NULL;
198 	fd->name[rd->nsize] = '\0';
199 
200 	/*
201 	 * Wheee. We now have a complete jffs2_full_dirent structure, with
202 	 * the name in it and everything. Link it into the list
203 	 */
204 	jffs2_add_fd_to_list(c, fd, fdp);
205 
206 	return 0;
207 }
208 
209 /*
210  * Helper function for jffs2_get_inode_nodes().
211  * It is called every time an inode node is found.
212  *
213  * Returns: 0 on succes;
214  * 	    1 if the node should be marked obsolete;
215  * 	    negative error code on failure.
216  */
217 static inline int read_dnode(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
218 			     struct jffs2_raw_inode *rd, struct rb_root *tnp, int rdlen,
219 			     uint32_t *latest_mctime, uint32_t *mctime_ver)
220 {
221 	struct jffs2_tmp_dnode_info *tn;
222 	uint32_t len, csize;
223 	int ret = 1;
224 	uint32_t crc;
225 
226 	/* Obsoleted. This cannot happen, surely? dwmw2 20020308 */
227 	BUG_ON(ref_obsolete(ref));
228 
229 	crc = crc32(0, rd, sizeof(*rd) - 8);
230 	if (unlikely(crc != je32_to_cpu(rd->node_crc))) {
231 		JFFS2_NOTICE("node CRC failed on dnode at %#08x: read %#08x, calculated %#08x\n",
232 			     ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
233 		return 1;
234 	}
235 
236 	tn = jffs2_alloc_tmp_dnode_info();
237 	if (!tn) {
238 		JFFS2_ERROR("failed to allocate tn (%zu bytes).\n", sizeof(*tn));
239 		return -ENOMEM;
240 	}
241 
242 	tn->partial_crc = 0;
243 	csize = je32_to_cpu(rd->csize);
244 
245 	/* If we've never checked the CRCs on this node, check them now */
246 	if (ref_flags(ref) == REF_UNCHECKED) {
247 
248 		/* Sanity checks */
249 		if (unlikely(je32_to_cpu(rd->offset) > je32_to_cpu(rd->isize)) ||
250 		    unlikely(PAD(je32_to_cpu(rd->csize) + sizeof(*rd)) != PAD(je32_to_cpu(rd->totlen)))) {
251 				JFFS2_WARNING("inode node header CRC is corrupted at %#08x\n", ref_offset(ref));
252 				jffs2_dbg_dump_node(c, ref_offset(ref));
253 			goto free_out;
254 		}
255 
256 		if (jffs2_is_writebuffered(c) && csize != 0) {
257 			/* At this point we are supposed to check the data CRC
258 			 * of our unchecked node. But thus far, we do not
259 			 * know whether the node is valid or obsolete. To
260 			 * figure this out, we need to walk all the nodes of
261 			 * the inode and build the inode fragtree. We don't
262 			 * want to spend time checking data of nodes which may
263 			 * later be found to be obsolete. So we put off the full
264 			 * data CRC checking until we have read all the inode
265 			 * nodes and have started building the fragtree.
266 			 *
267 			 * The fragtree is being built starting with nodes
268 			 * having the highest version number, so we'll be able
269 			 * to detect whether a node is valid (i.e., it is not
270 			 * overlapped by a node with higher version) or not.
271 			 * And we'll be able to check only those nodes, which
272 			 * are not obsolete.
273 			 *
274 			 * Of course, this optimization only makes sense in case
275 			 * of NAND flashes (or other flashes whith
276 			 * !jffs2_can_mark_obsolete()), since on NOR flashes
277 			 * nodes are marked obsolete physically.
278 			 *
279 			 * Since NAND flashes (or other flashes with
280 			 * jffs2_is_writebuffered(c)) are anyway read by
281 			 * fractions of c->wbuf_pagesize, and we have just read
282 			 * the node header, it is likely that the starting part
283 			 * of the node data is also read when we read the
284 			 * header. So we don't mind to check the CRC of the
285 			 * starting part of the data of the node now, and check
286 			 * the second part later (in jffs2_check_node_data()).
287 			 * Of course, we will not need to re-read and re-check
288 			 * the NAND page which we have just read. This is why we
289 			 * read the whole NAND page at jffs2_get_inode_nodes(),
290 			 * while we needed only the node header.
291 			 */
292 			unsigned char *buf;
293 
294 			/* 'buf' will point to the start of data */
295 			buf = (unsigned char *)rd + sizeof(*rd);
296 			/* len will be the read data length */
297 			len = min_t(uint32_t, rdlen - sizeof(*rd), csize);
298 			tn->partial_crc = crc32(0, buf, len);
299 
300 			dbg_readinode("Calculates CRC (%#08x) for %d bytes, csize %d\n", tn->partial_crc, len, csize);
301 
302 			/* If we actually calculated the whole data CRC
303 			 * and it is wrong, drop the node. */
304 			if (len >= csize && unlikely(tn->partial_crc != je32_to_cpu(rd->data_crc))) {
305 				JFFS2_NOTICE("wrong data CRC in data node at 0x%08x: read %#08x, calculated %#08x.\n",
306 					ref_offset(ref), tn->partial_crc, je32_to_cpu(rd->data_crc));
307 				goto free_out;
308 			}
309 
310 		} else if (csize == 0) {
311 			/*
312 			 * We checked the header CRC. If the node has no data, adjust
313 			 * the space accounting now. For other nodes this will be done
314 			 * later either when the node is marked obsolete or when its
315 			 * data is checked.
316 			 */
317 			struct jffs2_eraseblock *jeb;
318 
319 			dbg_readinode("the node has no data.\n");
320 			jeb = &c->blocks[ref->flash_offset / c->sector_size];
321 			len = ref_totlen(c, jeb, ref);
322 
323 			spin_lock(&c->erase_completion_lock);
324 			jeb->used_size += len;
325 			jeb->unchecked_size -= len;
326 			c->used_size += len;
327 			c->unchecked_size -= len;
328 			ref->flash_offset = ref_offset(ref) | REF_NORMAL;
329 			spin_unlock(&c->erase_completion_lock);
330 		}
331 	}
332 
333 	tn->fn = jffs2_alloc_full_dnode();
334 	if (!tn->fn) {
335 		JFFS2_ERROR("alloc fn failed\n");
336 		ret = -ENOMEM;
337 		goto free_out;
338 	}
339 
340 	tn->version = je32_to_cpu(rd->version);
341 	tn->fn->ofs = je32_to_cpu(rd->offset);
342 	tn->data_crc = je32_to_cpu(rd->data_crc);
343 	tn->csize = csize;
344 	tn->fn->raw = ref;
345 
346 	/* There was a bug where we wrote hole nodes out with
347 	   csize/dsize swapped. Deal with it */
348 	if (rd->compr == JFFS2_COMPR_ZERO && !je32_to_cpu(rd->dsize) && csize)
349 		tn->fn->size = csize;
350 	else // normal case...
351 		tn->fn->size = je32_to_cpu(rd->dsize);
352 
353 	dbg_readinode("dnode @%08x: ver %u, offset %#04x, dsize %#04x, csize %#04x\n",
354 		  ref_offset(ref), je32_to_cpu(rd->version), je32_to_cpu(rd->offset), je32_to_cpu(rd->dsize), csize);
355 
356 	jffs2_add_tn_to_tree(tn, tnp);
357 
358 	return 0;
359 
360 free_out:
361 	jffs2_free_tmp_dnode_info(tn);
362 	return ret;
363 }
364 
365 /*
366  * Helper function for jffs2_get_inode_nodes().
367  * It is called every time an unknown node is found.
368  *
369  * Returns: 0 on success;
370  * 	    1 if the node should be marked obsolete;
371  * 	    negative error code on failure.
372  */
373 static inline int read_unknown(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref, struct jffs2_unknown_node *un)
374 {
375 	/* We don't mark unknown nodes as REF_UNCHECKED */
376 	BUG_ON(ref_flags(ref) == REF_UNCHECKED);
377 
378 	un->nodetype = cpu_to_je16(JFFS2_NODE_ACCURATE | je16_to_cpu(un->nodetype));
379 
380 	switch(je16_to_cpu(un->nodetype) & JFFS2_COMPAT_MASK) {
381 
382 	case JFFS2_FEATURE_INCOMPAT:
383 		JFFS2_ERROR("unknown INCOMPAT nodetype %#04X at %#08x\n",
384 			    je16_to_cpu(un->nodetype), ref_offset(ref));
385 		/* EEP */
386 		BUG();
387 		break;
388 
389 	case JFFS2_FEATURE_ROCOMPAT:
390 		JFFS2_ERROR("unknown ROCOMPAT nodetype %#04X at %#08x\n",
391 			    je16_to_cpu(un->nodetype), ref_offset(ref));
392 		BUG_ON(!(c->flags & JFFS2_SB_FLAG_RO));
393 		break;
394 
395 	case JFFS2_FEATURE_RWCOMPAT_COPY:
396 		JFFS2_NOTICE("unknown RWCOMPAT_COPY nodetype %#04X at %#08x\n",
397 			     je16_to_cpu(un->nodetype), ref_offset(ref));
398 		break;
399 
400 	case JFFS2_FEATURE_RWCOMPAT_DELETE:
401 		JFFS2_NOTICE("unknown RWCOMPAT_DELETE nodetype %#04X at %#08x\n",
402 			     je16_to_cpu(un->nodetype), ref_offset(ref));
403 		return 1;
404 	}
405 
406 	return 0;
407 }
408 
409 /*
410  * Helper function for jffs2_get_inode_nodes().
411  * The function detects whether more data should be read and reads it if yes.
412  *
413  * Returns: 0 on succes;
414  * 	    negative error code on failure.
415  */
416 static int read_more(struct jffs2_sb_info *c, struct jffs2_raw_node_ref *ref,
417 		     int right_size, int *rdlen, unsigned char *buf, unsigned char *bufstart)
418 {
419 	int right_len, err, len;
420 	size_t retlen;
421 	uint32_t offs;
422 
423 	if (jffs2_is_writebuffered(c)) {
424 		right_len = c->wbuf_pagesize - (bufstart - buf);
425 		if (right_size + (int)(bufstart - buf) > c->wbuf_pagesize)
426 			right_len += c->wbuf_pagesize;
427 	} else
428 		right_len = right_size;
429 
430 	if (*rdlen == right_len)
431 		return 0;
432 
433 	/* We need to read more data */
434 	offs = ref_offset(ref) + *rdlen;
435 	if (jffs2_is_writebuffered(c)) {
436 		bufstart = buf + c->wbuf_pagesize;
437 		len = c->wbuf_pagesize;
438 	} else {
439 		bufstart = buf + *rdlen;
440 		len = right_size - *rdlen;
441 	}
442 
443 	dbg_readinode("read more %d bytes\n", len);
444 
445 	err = jffs2_flash_read(c, offs, len, &retlen, bufstart);
446 	if (err) {
447 		JFFS2_ERROR("can not read %d bytes from 0x%08x, "
448 			"error code: %d.\n", len, offs, err);
449 		return err;
450 	}
451 
452 	if (retlen < len) {
453 		JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n",
454 				offs, retlen, len);
455 		return -EIO;
456 	}
457 
458 	*rdlen = right_len;
459 
460 	return 0;
461 }
462 
463 /* Get tmp_dnode_info and full_dirent for all non-obsolete nodes associated
464    with this ino, returning the former in order of version */
465 static int jffs2_get_inode_nodes(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
466 				 struct rb_root *tnp, struct jffs2_full_dirent **fdp,
467 				 uint32_t *highest_version, uint32_t *latest_mctime,
468 				 uint32_t *mctime_ver)
469 {
470 	struct jffs2_raw_node_ref *ref, *valid_ref;
471 	struct rb_root ret_tn = RB_ROOT;
472 	struct jffs2_full_dirent *ret_fd = NULL;
473 	unsigned char *buf = NULL;
474 	union jffs2_node_union *node;
475 	size_t retlen;
476 	int len, err;
477 
478 	*mctime_ver = 0;
479 
480 	dbg_readinode("ino #%u\n", f->inocache->ino);
481 
482 	if (jffs2_is_writebuffered(c)) {
483 		/*
484 		 * If we have the write buffer, we assume the minimal I/O unit
485 		 * is c->wbuf_pagesize. We implement some optimizations which in
486 		 * this case and we need a temporary buffer of size =
487 		 * 2*c->wbuf_pagesize bytes (see comments in read_dnode()).
488 		 * Basically, we want to read not only the node header, but the
489 		 * whole wbuf (NAND page in case of NAND) or 2, if the node
490 		 * header overlaps the border between the 2 wbufs.
491 		 */
492 		len = 2*c->wbuf_pagesize;
493 	} else {
494 		/*
495 		 * When there is no write buffer, the size of the temporary
496 		 * buffer is the size of the larges node header.
497 		 */
498 		len = sizeof(union jffs2_node_union);
499 	}
500 
501 	/* FIXME: in case of NOR and available ->point() this
502 	 * needs to be fixed. */
503 	buf = kmalloc(len, GFP_KERNEL);
504 	if (!buf)
505 		return -ENOMEM;
506 
507 	spin_lock(&c->erase_completion_lock);
508 	valid_ref = jffs2_first_valid_node(f->inocache->nodes);
509 	if (!valid_ref && f->inocache->ino != 1)
510 		JFFS2_WARNING("Eep. No valid nodes for ino #%u.\n", f->inocache->ino);
511 	while (valid_ref) {
512 		unsigned char *bufstart;
513 
514 		/* We can hold a pointer to a non-obsolete node without the spinlock,
515 		   but _obsolete_ nodes may disappear at any time, if the block
516 		   they're in gets erased. So if we mark 'ref' obsolete while we're
517 		   not holding the lock, it can go away immediately. For that reason,
518 		   we find the next valid node first, before processing 'ref'.
519 		*/
520 		ref = valid_ref;
521 		valid_ref = jffs2_first_valid_node(ref->next_in_ino);
522 		spin_unlock(&c->erase_completion_lock);
523 
524 		cond_resched();
525 
526 		/*
527 		 * At this point we don't know the type of the node we're going
528 		 * to read, so we do not know the size of its header. In order
529 		 * to minimize the amount of flash IO we assume the node has
530 		 * size = JFFS2_MIN_NODE_HEADER.
531 		 */
532 		if (jffs2_is_writebuffered(c)) {
533 			/*
534 			 * We treat 'buf' as 2 adjacent wbufs. We want to
535 			 * adjust bufstart such as it points to the
536 			 * beginning of the node within this wbuf.
537 			 */
538 			bufstart = buf + (ref_offset(ref) % c->wbuf_pagesize);
539 			/* We will read either one wbuf or 2 wbufs. */
540 			len = c->wbuf_pagesize - (bufstart - buf);
541 			if (JFFS2_MIN_NODE_HEADER + (int)(bufstart - buf) > c->wbuf_pagesize) {
542 				/* The header spans the border of the first wbuf */
543 				len += c->wbuf_pagesize;
544 			}
545 		} else {
546 			bufstart = buf;
547 			len = JFFS2_MIN_NODE_HEADER;
548 		}
549 
550 		dbg_readinode("read %d bytes at %#08x(%d).\n", len, ref_offset(ref), ref_flags(ref));
551 
552 		/* FIXME: point() */
553 		err = jffs2_flash_read(c, ref_offset(ref), len,
554 				       &retlen, bufstart);
555 		if (err) {
556 			JFFS2_ERROR("can not read %d bytes from 0x%08x, " "error code: %d.\n", len, ref_offset(ref), err);
557 			goto free_out;
558 		}
559 
560 		if (retlen < len) {
561 			JFFS2_ERROR("short read at %#08x: %zu instead of %d.\n", ref_offset(ref), retlen, len);
562 			err = -EIO;
563 			goto free_out;
564 		}
565 
566 		node = (union jffs2_node_union *)bufstart;
567 
568 		/* No need to mask in the valid bit; it shouldn't be invalid */
569 		if (je32_to_cpu(node->u.hdr_crc) != crc32(0, node, sizeof(node->u)-4)) {
570 			JFFS2_NOTICE("Node header CRC failed at %#08x. {%04x,%04x,%08x,%08x}\n",
571 				     ref_offset(ref), je16_to_cpu(node->u.magic),
572 				     je16_to_cpu(node->u.nodetype),
573 				     je32_to_cpu(node->u.totlen),
574 				     je32_to_cpu(node->u.hdr_crc));
575 			jffs2_dbg_dump_node(c, ref_offset(ref));
576 			jffs2_mark_node_obsolete(c, ref);
577 			goto cont;
578 		}
579 
580 		switch (je16_to_cpu(node->u.nodetype)) {
581 
582 		case JFFS2_NODETYPE_DIRENT:
583 
584 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_dirent)) {
585 				err = read_more(c, ref, sizeof(struct jffs2_raw_dirent), &len, buf, bufstart);
586 				if (unlikely(err))
587 					goto free_out;
588 			}
589 
590 			err = read_direntry(c, ref, &node->d, retlen, &ret_fd, latest_mctime, mctime_ver);
591 			if (err == 1) {
592 				jffs2_mark_node_obsolete(c, ref);
593 				break;
594 			} else if (unlikely(err))
595 				goto free_out;
596 
597 			if (je32_to_cpu(node->d.version) > *highest_version)
598 				*highest_version = je32_to_cpu(node->d.version);
599 
600 			break;
601 
602 		case JFFS2_NODETYPE_INODE:
603 
604 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_raw_inode)) {
605 				err = read_more(c, ref, sizeof(struct jffs2_raw_inode), &len, buf, bufstart);
606 				if (unlikely(err))
607 					goto free_out;
608 			}
609 
610 			err = read_dnode(c, ref, &node->i, &ret_tn, len, latest_mctime, mctime_ver);
611 			if (err == 1) {
612 				jffs2_mark_node_obsolete(c, ref);
613 				break;
614 			} else if (unlikely(err))
615 				goto free_out;
616 
617 			if (je32_to_cpu(node->i.version) > *highest_version)
618 				*highest_version = je32_to_cpu(node->i.version);
619 
620 			break;
621 
622 		default:
623 			if (JFFS2_MIN_NODE_HEADER < sizeof(struct jffs2_unknown_node)) {
624 				err = read_more(c, ref, sizeof(struct jffs2_unknown_node), &len, buf, bufstart);
625 				if (unlikely(err))
626 					goto free_out;
627 			}
628 
629 			err = read_unknown(c, ref, &node->u);
630 			if (err == 1) {
631 				jffs2_mark_node_obsolete(c, ref);
632 				break;
633 			} else if (unlikely(err))
634 				goto free_out;
635 
636 		}
637 	cont:
638 		spin_lock(&c->erase_completion_lock);
639 	}
640 
641 	spin_unlock(&c->erase_completion_lock);
642 	*tnp = ret_tn;
643 	*fdp = ret_fd;
644 	kfree(buf);
645 
646 	dbg_readinode("nodes of inode #%u were read, the highest version is %u, latest_mctime %u, mctime_ver %u.\n",
647 			f->inocache->ino, *highest_version, *latest_mctime, *mctime_ver);
648 	return 0;
649 
650  free_out:
651 	jffs2_free_tmp_dnode_info_list(&ret_tn);
652 	jffs2_free_full_dirent_list(ret_fd);
653 	kfree(buf);
654 	return err;
655 }
656 
657 static int jffs2_do_read_inode_internal(struct jffs2_sb_info *c,
658 					struct jffs2_inode_info *f,
659 					struct jffs2_raw_inode *latest_node)
660 {
661 	struct jffs2_tmp_dnode_info *tn;
662 	struct rb_root tn_list;
663 	struct rb_node *rb, *repl_rb;
664 	struct jffs2_full_dirent *fd_list;
665 	struct jffs2_full_dnode *fn, *first_fn = NULL;
666 	uint32_t crc;
667 	uint32_t latest_mctime, mctime_ver;
668 	size_t retlen;
669 	int ret;
670 
671 	dbg_readinode("ino #%u nlink is %d\n", f->inocache->ino, f->inocache->nlink);
672 
673 	/* Grab all nodes relevant to this ino */
674 	ret = jffs2_get_inode_nodes(c, f, &tn_list, &fd_list, &f->highest_version, &latest_mctime, &mctime_ver);
675 
676 	if (ret) {
677 		JFFS2_ERROR("cannot read nodes for ino %u, returned error is %d\n", f->inocache->ino, ret);
678 		if (f->inocache->state == INO_STATE_READING)
679 			jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
680 		return ret;
681 	}
682 	f->dents = fd_list;
683 
684 	rb = rb_first(&tn_list);
685 
686 	while (rb) {
687 		cond_resched();
688 		tn = rb_entry(rb, struct jffs2_tmp_dnode_info, rb);
689 		fn = tn->fn;
690 		ret = 1;
691 		dbg_readinode("consider node ver %u, phys offset "
692 			"%#08x(%d), range %u-%u.\n", tn->version,
693 			ref_offset(fn->raw), ref_flags(fn->raw),
694 			fn->ofs, fn->ofs + fn->size);
695 
696 		if (fn->size) {
697 			ret = jffs2_add_older_frag_to_fragtree(c, f, tn);
698 			/* TODO: the error code isn't checked, check it */
699 			jffs2_dbg_fragtree_paranoia_check_nolock(f);
700 			BUG_ON(ret < 0);
701 			if (!first_fn && ret == 0)
702 				first_fn = fn;
703 		} else if (!first_fn) {
704 			first_fn = fn;
705 			f->metadata = fn;
706 			ret = 0; /* Prevent freeing the metadata update node */
707 		} else
708 			jffs2_mark_node_obsolete(c, fn->raw);
709 
710 		BUG_ON(rb->rb_left);
711 		if (rb_parent(rb) && rb_parent(rb)->rb_left == rb) {
712 			/* We were then left-hand child of our parent. We need
713 			 * to move our own right-hand child into our place. */
714 			repl_rb = rb->rb_right;
715 			if (repl_rb)
716 				rb_set_parent(repl_rb, rb_parent(rb));
717 		} else
718 			repl_rb = NULL;
719 
720 		rb = rb_next(rb);
721 
722 		/* Remove the spent tn from the tree; don't bother rebalancing
723 		 * but put our right-hand child in our own place. */
724 		if (rb_parent(&tn->rb)) {
725 			if (rb_parent(&tn->rb)->rb_left == &tn->rb)
726 				rb_parent(&tn->rb)->rb_left = repl_rb;
727 			else if (rb_parent(&tn->rb)->rb_right == &tn->rb)
728 				rb_parent(&tn->rb)->rb_right = repl_rb;
729 			else BUG();
730 		} else if (tn->rb.rb_right)
731 			rb_set_parent(tn->rb.rb_right, NULL);
732 
733 		jffs2_free_tmp_dnode_info(tn);
734 		if (ret) {
735 			dbg_readinode("delete dnode %u-%u.\n",
736 				fn->ofs, fn->ofs + fn->size);
737 			jffs2_free_full_dnode(fn);
738 		}
739 	}
740 	jffs2_dbg_fragtree_paranoia_check_nolock(f);
741 
742 	BUG_ON(first_fn && ref_obsolete(first_fn->raw));
743 
744 	fn = first_fn;
745 	if (unlikely(!first_fn)) {
746 		/* No data nodes for this inode. */
747 		if (f->inocache->ino != 1) {
748 			JFFS2_WARNING("no data nodes found for ino #%u\n", f->inocache->ino);
749 			if (!fd_list) {
750 				if (f->inocache->state == INO_STATE_READING)
751 					jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
752 				return -EIO;
753 			}
754 			JFFS2_NOTICE("but it has children so we fake some modes for it\n");
755 		}
756 		latest_node->mode = cpu_to_jemode(S_IFDIR|S_IRUGO|S_IWUSR|S_IXUGO);
757 		latest_node->version = cpu_to_je32(0);
758 		latest_node->atime = latest_node->ctime = latest_node->mtime = cpu_to_je32(0);
759 		latest_node->isize = cpu_to_je32(0);
760 		latest_node->gid = cpu_to_je16(0);
761 		latest_node->uid = cpu_to_je16(0);
762 		if (f->inocache->state == INO_STATE_READING)
763 			jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
764 		return 0;
765 	}
766 
767 	ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(*latest_node), &retlen, (void *)latest_node);
768 	if (ret || retlen != sizeof(*latest_node)) {
769 		JFFS2_ERROR("failed to read from flash: error %d, %zd of %zd bytes read\n",
770 			ret, retlen, sizeof(*latest_node));
771 		/* FIXME: If this fails, there seems to be a memory leak. Find it. */
772 		up(&f->sem);
773 		jffs2_do_clear_inode(c, f);
774 		return ret?ret:-EIO;
775 	}
776 
777 	crc = crc32(0, latest_node, sizeof(*latest_node)-8);
778 	if (crc != je32_to_cpu(latest_node->node_crc)) {
779 		JFFS2_ERROR("CRC failed for read_inode of inode %u at physical location 0x%x\n",
780 			f->inocache->ino, ref_offset(fn->raw));
781 		up(&f->sem);
782 		jffs2_do_clear_inode(c, f);
783 		return -EIO;
784 	}
785 
786 	switch(jemode_to_cpu(latest_node->mode) & S_IFMT) {
787 	case S_IFDIR:
788 		if (mctime_ver > je32_to_cpu(latest_node->version)) {
789 			/* The times in the latest_node are actually older than
790 			   mctime in the latest dirent. Cheat. */
791 			latest_node->ctime = latest_node->mtime = cpu_to_je32(latest_mctime);
792 		}
793 		break;
794 
795 
796 	case S_IFREG:
797 		/* If it was a regular file, truncate it to the latest node's isize */
798 		jffs2_truncate_fragtree(c, &f->fragtree, je32_to_cpu(latest_node->isize));
799 		break;
800 
801 	case S_IFLNK:
802 		/* Hack to work around broken isize in old symlink code.
803 		   Remove this when dwmw2 comes to his senses and stops
804 		   symlinks from being an entirely gratuitous special
805 		   case. */
806 		if (!je32_to_cpu(latest_node->isize))
807 			latest_node->isize = latest_node->dsize;
808 
809 		if (f->inocache->state != INO_STATE_CHECKING) {
810 			/* Symlink's inode data is the target path. Read it and
811 			 * keep in RAM to facilitate quick follow symlink
812 			 * operation. */
813 			f->target = kmalloc(je32_to_cpu(latest_node->csize) + 1, GFP_KERNEL);
814 			if (!f->target) {
815 				JFFS2_ERROR("can't allocate %d bytes of memory for the symlink target path cache\n", je32_to_cpu(latest_node->csize));
816 				up(&f->sem);
817 				jffs2_do_clear_inode(c, f);
818 				return -ENOMEM;
819 			}
820 
821 			ret = jffs2_flash_read(c, ref_offset(fn->raw) + sizeof(*latest_node),
822 						je32_to_cpu(latest_node->csize), &retlen, (char *)f->target);
823 
824 			if (ret  || retlen != je32_to_cpu(latest_node->csize)) {
825 				if (retlen != je32_to_cpu(latest_node->csize))
826 					ret = -EIO;
827 				kfree(f->target);
828 				f->target = NULL;
829 				up(&f->sem);
830 				jffs2_do_clear_inode(c, f);
831 				return -ret;
832 			}
833 
834 			f->target[je32_to_cpu(latest_node->csize)] = '\0';
835 			dbg_readinode("symlink's target '%s' cached\n", f->target);
836 		}
837 
838 		/* fall through... */
839 
840 	case S_IFBLK:
841 	case S_IFCHR:
842 		/* Certain inode types should have only one data node, and it's
843 		   kept as the metadata node */
844 		if (f->metadata) {
845 			JFFS2_ERROR("Argh. Special inode #%u with mode 0%o had metadata node\n",
846 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
847 			up(&f->sem);
848 			jffs2_do_clear_inode(c, f);
849 			return -EIO;
850 		}
851 		if (!frag_first(&f->fragtree)) {
852 			JFFS2_ERROR("Argh. Special inode #%u with mode 0%o has no fragments\n",
853 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
854 			up(&f->sem);
855 			jffs2_do_clear_inode(c, f);
856 			return -EIO;
857 		}
858 		/* ASSERT: f->fraglist != NULL */
859 		if (frag_next(frag_first(&f->fragtree))) {
860 			JFFS2_ERROR("Argh. Special inode #%u with mode 0x%x had more than one node\n",
861 			       f->inocache->ino, jemode_to_cpu(latest_node->mode));
862 			/* FIXME: Deal with it - check crc32, check for duplicate node, check times and discard the older one */
863 			up(&f->sem);
864 			jffs2_do_clear_inode(c, f);
865 			return -EIO;
866 		}
867 		/* OK. We're happy */
868 		f->metadata = frag_first(&f->fragtree)->node;
869 		jffs2_free_node_frag(frag_first(&f->fragtree));
870 		f->fragtree = RB_ROOT;
871 		break;
872 	}
873 	if (f->inocache->state == INO_STATE_READING)
874 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_PRESENT);
875 
876 	return 0;
877 }
878 
879 /* Scan the list of all nodes present for this ino, build map of versions, etc. */
880 int jffs2_do_read_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
881 			uint32_t ino, struct jffs2_raw_inode *latest_node)
882 {
883 	dbg_readinode("read inode #%u\n", ino);
884 
885  retry_inocache:
886 	spin_lock(&c->inocache_lock);
887 	f->inocache = jffs2_get_ino_cache(c, ino);
888 
889 	if (f->inocache) {
890 		/* Check its state. We may need to wait before we can use it */
891 		switch(f->inocache->state) {
892 		case INO_STATE_UNCHECKED:
893 		case INO_STATE_CHECKEDABSENT:
894 			f->inocache->state = INO_STATE_READING;
895 			break;
896 
897 		case INO_STATE_CHECKING:
898 		case INO_STATE_GC:
899 			/* If it's in either of these states, we need
900 			   to wait for whoever's got it to finish and
901 			   put it back. */
902 			dbg_readinode("waiting for ino #%u in state %d\n", ino, f->inocache->state);
903 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
904 			goto retry_inocache;
905 
906 		case INO_STATE_READING:
907 		case INO_STATE_PRESENT:
908 			/* Eep. This should never happen. It can
909 			happen if Linux calls read_inode() again
910 			before clear_inode() has finished though. */
911 			JFFS2_ERROR("Eep. Trying to read_inode #%u when it's already in state %d!\n", ino, f->inocache->state);
912 			/* Fail. That's probably better than allowing it to succeed */
913 			f->inocache = NULL;
914 			break;
915 
916 		default:
917 			BUG();
918 		}
919 	}
920 	spin_unlock(&c->inocache_lock);
921 
922 	if (!f->inocache && ino == 1) {
923 		/* Special case - no root inode on medium */
924 		f->inocache = jffs2_alloc_inode_cache();
925 		if (!f->inocache) {
926 			JFFS2_ERROR("cannot allocate inocache for root inode\n");
927 			return -ENOMEM;
928 		}
929 		dbg_readinode("creating inocache for root inode\n");
930 		memset(f->inocache, 0, sizeof(struct jffs2_inode_cache));
931 		f->inocache->ino = f->inocache->nlink = 1;
932 		f->inocache->nodes = (struct jffs2_raw_node_ref *)f->inocache;
933 		f->inocache->state = INO_STATE_READING;
934 		jffs2_add_ino_cache(c, f->inocache);
935 	}
936 	if (!f->inocache) {
937 		JFFS2_ERROR("requestied to read an nonexistent ino %u\n", ino);
938 		return -ENOENT;
939 	}
940 
941 	return jffs2_do_read_inode_internal(c, f, latest_node);
942 }
943 
944 int jffs2_do_crccheck_inode(struct jffs2_sb_info *c, struct jffs2_inode_cache *ic)
945 {
946 	struct jffs2_raw_inode n;
947 	struct jffs2_inode_info *f = kzalloc(sizeof(*f), GFP_KERNEL);
948 	int ret;
949 
950 	if (!f)
951 		return -ENOMEM;
952 
953 	init_MUTEX_LOCKED(&f->sem);
954 	f->inocache = ic;
955 
956 	ret = jffs2_do_read_inode_internal(c, f, &n);
957 	if (!ret) {
958 		up(&f->sem);
959 		jffs2_do_clear_inode(c, f);
960 	}
961 	kfree (f);
962 	return ret;
963 }
964 
965 void jffs2_do_clear_inode(struct jffs2_sb_info *c, struct jffs2_inode_info *f)
966 {
967 	struct jffs2_full_dirent *fd, *fds;
968 	int deleted;
969 
970 	jffs2_clear_acl(f);
971 	jffs2_xattr_delete_inode(c, f->inocache);
972 	down(&f->sem);
973 	deleted = f->inocache && !f->inocache->nlink;
974 
975 	if (f->inocache && f->inocache->state != INO_STATE_CHECKING)
976 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_CLEARING);
977 
978 	if (f->metadata) {
979 		if (deleted)
980 			jffs2_mark_node_obsolete(c, f->metadata->raw);
981 		jffs2_free_full_dnode(f->metadata);
982 	}
983 
984 	jffs2_kill_fragtree(&f->fragtree, deleted?c:NULL);
985 
986 	if (f->target) {
987 		kfree(f->target);
988 		f->target = NULL;
989 	}
990 
991 	fds = f->dents;
992 	while(fds) {
993 		fd = fds;
994 		fds = fd->next;
995 		jffs2_free_full_dirent(fd);
996 	}
997 
998 	if (f->inocache && f->inocache->state != INO_STATE_CHECKING) {
999 		jffs2_set_inocache_state(c, f->inocache, INO_STATE_CHECKEDABSENT);
1000 		if (f->inocache->nodes == (void *)f->inocache)
1001 			jffs2_del_ino_cache(c, f->inocache);
1002 	}
1003 
1004 	up(&f->sem);
1005 }
1006