xref: /openbmc/linux/fs/jffs2/gc.c (revision c00c310e)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/mtd/mtd.h>
14 #include <linux/slab.h>
15 #include <linux/pagemap.h>
16 #include <linux/crc32.h>
17 #include <linux/compiler.h>
18 #include <linux/stat.h>
19 #include "nodelist.h"
20 #include "compr.h"
21 
22 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
23 					  struct jffs2_inode_cache *ic,
24 					  struct jffs2_raw_node_ref *raw);
25 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
26 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
27 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
29 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
33 				      uint32_t start, uint32_t end);
34 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
38 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
39 
40 /* Called with erase_completion_lock held */
41 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
42 {
43 	struct jffs2_eraseblock *ret;
44 	struct list_head *nextlist = NULL;
45 	int n = jiffies % 128;
46 
47 	/* Pick an eraseblock to garbage collect next. This is where we'll
48 	   put the clever wear-levelling algorithms. Eventually.  */
49 	/* We possibly want to favour the dirtier blocks more when the
50 	   number of free blocks is low. */
51 again:
52 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54 		nextlist = &c->bad_used_list;
55 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
56 		/* Note that most of them will have gone directly to be erased.
57 		   So don't favour the erasable_list _too_ much. */
58 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59 		nextlist = &c->erasable_list;
60 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61 		/* Most of the time, pick one off the very_dirty list */
62 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63 		nextlist = &c->very_dirty_list;
64 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
65 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66 		nextlist = &c->dirty_list;
67 	} else if (!list_empty(&c->clean_list)) {
68 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69 		nextlist = &c->clean_list;
70 	} else if (!list_empty(&c->dirty_list)) {
71 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72 
73 		nextlist = &c->dirty_list;
74 	} else if (!list_empty(&c->very_dirty_list)) {
75 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76 		nextlist = &c->very_dirty_list;
77 	} else if (!list_empty(&c->erasable_list)) {
78 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79 
80 		nextlist = &c->erasable_list;
81 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
82 		/* There are blocks are wating for the wbuf sync */
83 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
84 		spin_unlock(&c->erase_completion_lock);
85 		jffs2_flush_wbuf_pad(c);
86 		spin_lock(&c->erase_completion_lock);
87 		goto again;
88 	} else {
89 		/* Eep. All were empty */
90 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91 		return NULL;
92 	}
93 
94 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95 	list_del(&ret->list);
96 	c->gcblock = ret;
97 	ret->gc_node = ret->first_node;
98 	if (!ret->gc_node) {
99 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100 		BUG();
101 	}
102 
103 	/* Have we accidentally picked a clean block with wasted space ? */
104 	if (ret->wasted_size) {
105 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106 		ret->dirty_size += ret->wasted_size;
107 		c->wasted_size -= ret->wasted_size;
108 		c->dirty_size += ret->wasted_size;
109 		ret->wasted_size = 0;
110 	}
111 
112 	return ret;
113 }
114 
115 /* jffs2_garbage_collect_pass
116  * Make a single attempt to progress GC. Move one node, and possibly
117  * start erasing one eraseblock.
118  */
119 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
120 {
121 	struct jffs2_inode_info *f;
122 	struct jffs2_inode_cache *ic;
123 	struct jffs2_eraseblock *jeb;
124 	struct jffs2_raw_node_ref *raw;
125 	int ret = 0, inum, nlink;
126 	int xattr = 0;
127 
128 	if (down_interruptible(&c->alloc_sem))
129 		return -EINTR;
130 
131 	for (;;) {
132 		spin_lock(&c->erase_completion_lock);
133 		if (!c->unchecked_size)
134 			break;
135 
136 		/* We can't start doing GC yet. We haven't finished checking
137 		   the node CRCs etc. Do it now. */
138 
139 		/* checked_ino is protected by the alloc_sem */
140 		if (c->checked_ino > c->highest_ino && xattr) {
141 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
142 			       c->unchecked_size);
143 			jffs2_dbg_dump_block_lists_nolock(c);
144 			spin_unlock(&c->erase_completion_lock);
145 			up(&c->alloc_sem);
146 			return -ENOSPC;
147 		}
148 
149 		spin_unlock(&c->erase_completion_lock);
150 
151 		if (!xattr)
152 			xattr = jffs2_verify_xattr(c);
153 
154 		spin_lock(&c->inocache_lock);
155 
156 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
157 
158 		if (!ic) {
159 			spin_unlock(&c->inocache_lock);
160 			continue;
161 		}
162 
163 		if (!ic->nlink) {
164 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
165 				  ic->ino));
166 			spin_unlock(&c->inocache_lock);
167 			jffs2_xattr_delete_inode(c, ic);
168 			continue;
169 		}
170 		switch(ic->state) {
171 		case INO_STATE_CHECKEDABSENT:
172 		case INO_STATE_PRESENT:
173 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
174 			spin_unlock(&c->inocache_lock);
175 			continue;
176 
177 		case INO_STATE_GC:
178 		case INO_STATE_CHECKING:
179 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
180 			spin_unlock(&c->inocache_lock);
181 			BUG();
182 
183 		case INO_STATE_READING:
184 			/* We need to wait for it to finish, lest we move on
185 			   and trigger the BUG() above while we haven't yet
186 			   finished checking all its nodes */
187 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
188 			/* We need to come back again for the _same_ inode. We've
189 			 made no progress in this case, but that should be OK */
190 			c->checked_ino--;
191 
192 			up(&c->alloc_sem);
193 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
194 			return 0;
195 
196 		default:
197 			BUG();
198 
199 		case INO_STATE_UNCHECKED:
200 			;
201 		}
202 		ic->state = INO_STATE_CHECKING;
203 		spin_unlock(&c->inocache_lock);
204 
205 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
206 
207 		ret = jffs2_do_crccheck_inode(c, ic);
208 		if (ret)
209 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
210 
211 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
212 		up(&c->alloc_sem);
213 		return ret;
214 	}
215 
216 	/* First, work out which block we're garbage-collecting */
217 	jeb = c->gcblock;
218 
219 	if (!jeb)
220 		jeb = jffs2_find_gc_block(c);
221 
222 	if (!jeb) {
223 		D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
224 		spin_unlock(&c->erase_completion_lock);
225 		up(&c->alloc_sem);
226 		return -EIO;
227 	}
228 
229 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
230 	D1(if (c->nextblock)
231 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
232 
233 	if (!jeb->used_size) {
234 		up(&c->alloc_sem);
235 		goto eraseit;
236 	}
237 
238 	raw = jeb->gc_node;
239 
240 	while(ref_obsolete(raw)) {
241 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
242 		raw = ref_next(raw);
243 		if (unlikely(!raw)) {
244 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
245 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
246 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
247 			jeb->gc_node = raw;
248 			spin_unlock(&c->erase_completion_lock);
249 			up(&c->alloc_sem);
250 			BUG();
251 		}
252 	}
253 	jeb->gc_node = raw;
254 
255 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
256 
257 	if (!raw->next_in_ino) {
258 		/* Inode-less node. Clean marker, snapshot or something like that */
259 		spin_unlock(&c->erase_completion_lock);
260 		if (ref_flags(raw) == REF_PRISTINE) {
261 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
262 			jffs2_garbage_collect_pristine(c, NULL, raw);
263 		} else {
264 			/* Just mark it obsolete */
265 			jffs2_mark_node_obsolete(c, raw);
266 		}
267 		up(&c->alloc_sem);
268 		goto eraseit_lock;
269 	}
270 
271 	ic = jffs2_raw_ref_to_ic(raw);
272 
273 #ifdef CONFIG_JFFS2_FS_XATTR
274 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
275 	 * We can decide whether this node is inode or xattr by ic->class.     */
276 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
277 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
278 		spin_unlock(&c->erase_completion_lock);
279 
280 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
281 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
282 		} else {
283 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
284 		}
285 		goto release_sem;
286 	}
287 #endif
288 
289 	/* We need to hold the inocache. Either the erase_completion_lock or
290 	   the inocache_lock are sufficient; we trade down since the inocache_lock
291 	   causes less contention. */
292 	spin_lock(&c->inocache_lock);
293 
294 	spin_unlock(&c->erase_completion_lock);
295 
296 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
297 
298 	/* Three possibilities:
299 	   1. Inode is already in-core. We must iget it and do proper
300 	      updating to its fragtree, etc.
301 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
302 	      inocache to prevent a read_inode(), copy the node intact.
303 	   3. Inode is not in-core, node is not pristine. We must iget()
304 	      and take the slow path.
305 	*/
306 
307 	switch(ic->state) {
308 	case INO_STATE_CHECKEDABSENT:
309 		/* It's been checked, but it's not currently in-core.
310 		   We can just copy any pristine nodes, but have
311 		   to prevent anyone else from doing read_inode() while
312 		   we're at it, so we set the state accordingly */
313 		if (ref_flags(raw) == REF_PRISTINE)
314 			ic->state = INO_STATE_GC;
315 		else {
316 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
317 				  ic->ino));
318 		}
319 		break;
320 
321 	case INO_STATE_PRESENT:
322 		/* It's in-core. GC must iget() it. */
323 		break;
324 
325 	case INO_STATE_UNCHECKED:
326 	case INO_STATE_CHECKING:
327 	case INO_STATE_GC:
328 		/* Should never happen. We should have finished checking
329 		   by the time we actually start doing any GC, and since
330 		   we're holding the alloc_sem, no other garbage collection
331 		   can happen.
332 		*/
333 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
334 		       ic->ino, ic->state);
335 		up(&c->alloc_sem);
336 		spin_unlock(&c->inocache_lock);
337 		BUG();
338 
339 	case INO_STATE_READING:
340 		/* Someone's currently trying to read it. We must wait for
341 		   them to finish and then go through the full iget() route
342 		   to do the GC. However, sometimes read_inode() needs to get
343 		   the alloc_sem() (for marking nodes invalid) so we must
344 		   drop the alloc_sem before sleeping. */
345 
346 		up(&c->alloc_sem);
347 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
348 			  ic->ino, ic->state));
349 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
350 		/* And because we dropped the alloc_sem we must start again from the
351 		   beginning. Ponder chance of livelock here -- we're returning success
352 		   without actually making any progress.
353 
354 		   Q: What are the chances that the inode is back in INO_STATE_READING
355 		   again by the time we next enter this function? And that this happens
356 		   enough times to cause a real delay?
357 
358 		   A: Small enough that I don't care :)
359 		*/
360 		return 0;
361 	}
362 
363 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
364 	   node intact, and we don't have to muck about with the fragtree etc.
365 	   because we know it's not in-core. If it _was_ in-core, we go through
366 	   all the iget() crap anyway */
367 
368 	if (ic->state == INO_STATE_GC) {
369 		spin_unlock(&c->inocache_lock);
370 
371 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
372 
373 		spin_lock(&c->inocache_lock);
374 		ic->state = INO_STATE_CHECKEDABSENT;
375 		wake_up(&c->inocache_wq);
376 
377 		if (ret != -EBADFD) {
378 			spin_unlock(&c->inocache_lock);
379 			goto release_sem;
380 		}
381 
382 		/* Fall through if it wanted us to, with inocache_lock held */
383 	}
384 
385 	/* Prevent the fairly unlikely race where the gcblock is
386 	   entirely obsoleted by the final close of a file which had
387 	   the only valid nodes in the block, followed by erasure,
388 	   followed by freeing of the ic because the erased block(s)
389 	   held _all_ the nodes of that inode.... never been seen but
390 	   it's vaguely possible. */
391 
392 	inum = ic->ino;
393 	nlink = ic->nlink;
394 	spin_unlock(&c->inocache_lock);
395 
396 	f = jffs2_gc_fetch_inode(c, inum, nlink);
397 	if (IS_ERR(f)) {
398 		ret = PTR_ERR(f);
399 		goto release_sem;
400 	}
401 	if (!f) {
402 		ret = 0;
403 		goto release_sem;
404 	}
405 
406 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
407 
408 	jffs2_gc_release_inode(c, f);
409 
410  release_sem:
411 	up(&c->alloc_sem);
412 
413  eraseit_lock:
414 	/* If we've finished this block, start it erasing */
415 	spin_lock(&c->erase_completion_lock);
416 
417  eraseit:
418 	if (c->gcblock && !c->gcblock->used_size) {
419 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
420 		/* We're GC'ing an empty block? */
421 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
422 		c->gcblock = NULL;
423 		c->nr_erasing_blocks++;
424 		jffs2_erase_pending_trigger(c);
425 	}
426 	spin_unlock(&c->erase_completion_lock);
427 
428 	return ret;
429 }
430 
431 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
432 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
433 {
434 	struct jffs2_node_frag *frag;
435 	struct jffs2_full_dnode *fn = NULL;
436 	struct jffs2_full_dirent *fd;
437 	uint32_t start = 0, end = 0, nrfrags = 0;
438 	int ret = 0;
439 
440 	down(&f->sem);
441 
442 	/* Now we have the lock for this inode. Check that it's still the one at the head
443 	   of the list. */
444 
445 	spin_lock(&c->erase_completion_lock);
446 
447 	if (c->gcblock != jeb) {
448 		spin_unlock(&c->erase_completion_lock);
449 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
450 		goto upnout;
451 	}
452 	if (ref_obsolete(raw)) {
453 		spin_unlock(&c->erase_completion_lock);
454 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
455 		/* They'll call again */
456 		goto upnout;
457 	}
458 	spin_unlock(&c->erase_completion_lock);
459 
460 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
461 	if (f->metadata && f->metadata->raw == raw) {
462 		fn = f->metadata;
463 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
464 		goto upnout;
465 	}
466 
467 	/* FIXME. Read node and do lookup? */
468 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
469 		if (frag->node && frag->node->raw == raw) {
470 			fn = frag->node;
471 			end = frag->ofs + frag->size;
472 			if (!nrfrags++)
473 				start = frag->ofs;
474 			if (nrfrags == frag->node->frags)
475 				break; /* We've found them all */
476 		}
477 	}
478 	if (fn) {
479 		if (ref_flags(raw) == REF_PRISTINE) {
480 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
481 			if (!ret) {
482 				/* Urgh. Return it sensibly. */
483 				frag->node->raw = f->inocache->nodes;
484 			}
485 			if (ret != -EBADFD)
486 				goto upnout;
487 		}
488 		/* We found a datanode. Do the GC */
489 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
490 			/* It crosses a page boundary. Therefore, it must be a hole. */
491 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
492 		} else {
493 			/* It could still be a hole. But we GC the page this way anyway */
494 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
495 		}
496 		goto upnout;
497 	}
498 
499 	/* Wasn't a dnode. Try dirent */
500 	for (fd = f->dents; fd; fd=fd->next) {
501 		if (fd->raw == raw)
502 			break;
503 	}
504 
505 	if (fd && fd->ino) {
506 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
507 	} else if (fd) {
508 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
509 	} else {
510 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
511 		       ref_offset(raw), f->inocache->ino);
512 		if (ref_obsolete(raw)) {
513 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
514 		} else {
515 			jffs2_dbg_dump_node(c, ref_offset(raw));
516 			BUG();
517 		}
518 	}
519  upnout:
520 	up(&f->sem);
521 
522 	return ret;
523 }
524 
525 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
526 					  struct jffs2_inode_cache *ic,
527 					  struct jffs2_raw_node_ref *raw)
528 {
529 	union jffs2_node_union *node;
530 	size_t retlen;
531 	int ret;
532 	uint32_t phys_ofs, alloclen;
533 	uint32_t crc, rawlen;
534 	int retried = 0;
535 
536 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
537 
538 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
539 
540 	/* Ask for a small amount of space (or the totlen if smaller) because we
541 	   don't want to force wastage of the end of a block if splitting would
542 	   work. */
543 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
544 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
545 
546 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
547 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
548 
549 	if (ret)
550 		return ret;
551 
552 	if (alloclen < rawlen) {
553 		/* Doesn't fit untouched. We'll go the old route and split it */
554 		return -EBADFD;
555 	}
556 
557 	node = kmalloc(rawlen, GFP_KERNEL);
558 	if (!node)
559                return -ENOMEM;
560 
561 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
562 	if (!ret && retlen != rawlen)
563 		ret = -EIO;
564 	if (ret)
565 		goto out_node;
566 
567 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
568 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
569 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
570 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
571 		goto bail;
572 	}
573 
574 	switch(je16_to_cpu(node->u.nodetype)) {
575 	case JFFS2_NODETYPE_INODE:
576 		crc = crc32(0, node, sizeof(node->i)-8);
577 		if (je32_to_cpu(node->i.node_crc) != crc) {
578 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
579 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
580 			goto bail;
581 		}
582 
583 		if (je32_to_cpu(node->i.dsize)) {
584 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
585 			if (je32_to_cpu(node->i.data_crc) != crc) {
586 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
587 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
588 				goto bail;
589 			}
590 		}
591 		break;
592 
593 	case JFFS2_NODETYPE_DIRENT:
594 		crc = crc32(0, node, sizeof(node->d)-8);
595 		if (je32_to_cpu(node->d.node_crc) != crc) {
596 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
597 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
598 			goto bail;
599 		}
600 
601 		if (node->d.nsize) {
602 			crc = crc32(0, node->d.name, node->d.nsize);
603 			if (je32_to_cpu(node->d.name_crc) != crc) {
604 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
605 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
606 				goto bail;
607 			}
608 		}
609 		break;
610 	default:
611 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
612 		if (ic) {
613 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
614 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
615 			goto bail;
616 		}
617 	}
618 
619 	/* OK, all the CRCs are good; this node can just be copied as-is. */
620  retry:
621 	phys_ofs = write_ofs(c);
622 
623 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
624 
625 	if (ret || (retlen != rawlen)) {
626 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
627                        rawlen, phys_ofs, ret, retlen);
628 		if (retlen) {
629 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
630 		} else {
631 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
632 		}
633 		if (!retried) {
634 			/* Try to reallocate space and retry */
635 			uint32_t dummy;
636 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
637 
638 			retried = 1;
639 
640 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
641 
642 			jffs2_dbg_acct_sanity_check(c,jeb);
643 			jffs2_dbg_acct_paranoia_check(c, jeb);
644 
645 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
646 						/* this is not the exact summary size of it,
647 							it is only an upper estimation */
648 
649 			if (!ret) {
650 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
651 
652 				jffs2_dbg_acct_sanity_check(c,jeb);
653 				jffs2_dbg_acct_paranoia_check(c, jeb);
654 
655 				goto retry;
656 			}
657 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
658 		}
659 
660 		if (!ret)
661 			ret = -EIO;
662 		goto out_node;
663 	}
664 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
665 
666 	jffs2_mark_node_obsolete(c, raw);
667 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
668 
669  out_node:
670 	kfree(node);
671 	return ret;
672  bail:
673 	ret = -EBADFD;
674 	goto out_node;
675 }
676 
677 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
678 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
679 {
680 	struct jffs2_full_dnode *new_fn;
681 	struct jffs2_raw_inode ri;
682 	struct jffs2_node_frag *last_frag;
683 	union jffs2_device_node dev;
684 	char *mdata = NULL, mdatalen = 0;
685 	uint32_t alloclen, ilen;
686 	int ret;
687 
688 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
689 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
690 		/* For these, we don't actually need to read the old node */
691 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
692 		mdata = (char *)&dev;
693 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
694 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
695 		mdatalen = fn->size;
696 		mdata = kmalloc(fn->size, GFP_KERNEL);
697 		if (!mdata) {
698 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
699 			return -ENOMEM;
700 		}
701 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
702 		if (ret) {
703 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
704 			kfree(mdata);
705 			return ret;
706 		}
707 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
708 
709 	}
710 
711 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
712 				JFFS2_SUMMARY_INODE_SIZE);
713 	if (ret) {
714 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
715 		       sizeof(ri)+ mdatalen, ret);
716 		goto out;
717 	}
718 
719 	last_frag = frag_last(&f->fragtree);
720 	if (last_frag)
721 		/* Fetch the inode length from the fragtree rather then
722 		 * from i_size since i_size may have not been updated yet */
723 		ilen = last_frag->ofs + last_frag->size;
724 	else
725 		ilen = JFFS2_F_I_SIZE(f);
726 
727 	memset(&ri, 0, sizeof(ri));
728 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
729 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
730 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
731 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
732 
733 	ri.ino = cpu_to_je32(f->inocache->ino);
734 	ri.version = cpu_to_je32(++f->highest_version);
735 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
736 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
737 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
738 	ri.isize = cpu_to_je32(ilen);
739 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
740 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
741 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
742 	ri.offset = cpu_to_je32(0);
743 	ri.csize = cpu_to_je32(mdatalen);
744 	ri.dsize = cpu_to_je32(mdatalen);
745 	ri.compr = JFFS2_COMPR_NONE;
746 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
747 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
748 
749 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
750 
751 	if (IS_ERR(new_fn)) {
752 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
753 		ret = PTR_ERR(new_fn);
754 		goto out;
755 	}
756 	jffs2_mark_node_obsolete(c, fn->raw);
757 	jffs2_free_full_dnode(fn);
758 	f->metadata = new_fn;
759  out:
760 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
761 		kfree(mdata);
762 	return ret;
763 }
764 
765 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
766 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
767 {
768 	struct jffs2_full_dirent *new_fd;
769 	struct jffs2_raw_dirent rd;
770 	uint32_t alloclen;
771 	int ret;
772 
773 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
774 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
775 	rd.nsize = strlen(fd->name);
776 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
777 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
778 
779 	rd.pino = cpu_to_je32(f->inocache->ino);
780 	rd.version = cpu_to_je32(++f->highest_version);
781 	rd.ino = cpu_to_je32(fd->ino);
782 	/* If the times on this inode were set by explicit utime() they can be different,
783 	   so refrain from splatting them. */
784 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
785 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
786 	else
787 		rd.mctime = cpu_to_je32(0);
788 	rd.type = fd->type;
789 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
790 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
791 
792 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
793 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
794 	if (ret) {
795 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
796 		       sizeof(rd)+rd.nsize, ret);
797 		return ret;
798 	}
799 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
800 
801 	if (IS_ERR(new_fd)) {
802 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
803 		return PTR_ERR(new_fd);
804 	}
805 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
806 	return 0;
807 }
808 
809 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
810 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
811 {
812 	struct jffs2_full_dirent **fdp = &f->dents;
813 	int found = 0;
814 
815 	/* On a medium where we can't actually mark nodes obsolete
816 	   pernamently, such as NAND flash, we need to work out
817 	   whether this deletion dirent is still needed to actively
818 	   delete a 'real' dirent with the same name that's still
819 	   somewhere else on the flash. */
820 	if (!jffs2_can_mark_obsolete(c)) {
821 		struct jffs2_raw_dirent *rd;
822 		struct jffs2_raw_node_ref *raw;
823 		int ret;
824 		size_t retlen;
825 		int name_len = strlen(fd->name);
826 		uint32_t name_crc = crc32(0, fd->name, name_len);
827 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
828 
829 		rd = kmalloc(rawlen, GFP_KERNEL);
830 		if (!rd)
831 			return -ENOMEM;
832 
833 		/* Prevent the erase code from nicking the obsolete node refs while
834 		   we're looking at them. I really don't like this extra lock but
835 		   can't see any alternative. Suggestions on a postcard to... */
836 		down(&c->erase_free_sem);
837 
838 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
839 
840 			cond_resched();
841 
842 			/* We only care about obsolete ones */
843 			if (!(ref_obsolete(raw)))
844 				continue;
845 
846 			/* Any dirent with the same name is going to have the same length... */
847 			if (ref_totlen(c, NULL, raw) != rawlen)
848 				continue;
849 
850 			/* Doesn't matter if there's one in the same erase block. We're going to
851 			   delete it too at the same time. */
852 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
853 				continue;
854 
855 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
856 
857 			/* This is an obsolete node belonging to the same directory, and it's of the right
858 			   length. We need to take a closer look...*/
859 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
860 			if (ret) {
861 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
862 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
863 				continue;
864 			}
865 			if (retlen != rawlen) {
866 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
867 				       retlen, rawlen, ref_offset(raw));
868 				continue;
869 			}
870 
871 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
872 				continue;
873 
874 			/* If the name CRC doesn't match, skip */
875 			if (je32_to_cpu(rd->name_crc) != name_crc)
876 				continue;
877 
878 			/* If the name length doesn't match, or it's another deletion dirent, skip */
879 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
880 				continue;
881 
882 			/* OK, check the actual name now */
883 			if (memcmp(rd->name, fd->name, name_len))
884 				continue;
885 
886 			/* OK. The name really does match. There really is still an older node on
887 			   the flash which our deletion dirent obsoletes. So we have to write out
888 			   a new deletion dirent to replace it */
889 			up(&c->erase_free_sem);
890 
891 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
892 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
893 			kfree(rd);
894 
895 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
896 		}
897 
898 		up(&c->erase_free_sem);
899 		kfree(rd);
900 	}
901 
902 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
903 	   we should update the metadata node with those times accordingly */
904 
905 	/* No need for it any more. Just mark it obsolete and remove it from the list */
906 	while (*fdp) {
907 		if ((*fdp) == fd) {
908 			found = 1;
909 			*fdp = fd->next;
910 			break;
911 		}
912 		fdp = &(*fdp)->next;
913 	}
914 	if (!found) {
915 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
916 	}
917 	jffs2_mark_node_obsolete(c, fd->raw);
918 	jffs2_free_full_dirent(fd);
919 	return 0;
920 }
921 
922 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
923 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
924 				      uint32_t start, uint32_t end)
925 {
926 	struct jffs2_raw_inode ri;
927 	struct jffs2_node_frag *frag;
928 	struct jffs2_full_dnode *new_fn;
929 	uint32_t alloclen, ilen;
930 	int ret;
931 
932 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
933 		  f->inocache->ino, start, end));
934 
935 	memset(&ri, 0, sizeof(ri));
936 
937 	if(fn->frags > 1) {
938 		size_t readlen;
939 		uint32_t crc;
940 		/* It's partially obsoleted by a later write. So we have to
941 		   write it out again with the _same_ version as before */
942 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
943 		if (readlen != sizeof(ri) || ret) {
944 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
945 			goto fill;
946 		}
947 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
948 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
949 			       ref_offset(fn->raw),
950 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
951 			return -EIO;
952 		}
953 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
954 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
955 			       ref_offset(fn->raw),
956 			       je32_to_cpu(ri.totlen), sizeof(ri));
957 			return -EIO;
958 		}
959 		crc = crc32(0, &ri, sizeof(ri)-8);
960 		if (crc != je32_to_cpu(ri.node_crc)) {
961 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
962 			       ref_offset(fn->raw),
963 			       je32_to_cpu(ri.node_crc), crc);
964 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
965 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
966 			       start, end, f->inocache->ino);
967 			goto fill;
968 		}
969 		if (ri.compr != JFFS2_COMPR_ZERO) {
970 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
971 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
972 			       start, end, f->inocache->ino);
973 			goto fill;
974 		}
975 	} else {
976 	fill:
977 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
978 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
979 		ri.totlen = cpu_to_je32(sizeof(ri));
980 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
981 
982 		ri.ino = cpu_to_je32(f->inocache->ino);
983 		ri.version = cpu_to_je32(++f->highest_version);
984 		ri.offset = cpu_to_je32(start);
985 		ri.dsize = cpu_to_je32(end - start);
986 		ri.csize = cpu_to_je32(0);
987 		ri.compr = JFFS2_COMPR_ZERO;
988 	}
989 
990 	frag = frag_last(&f->fragtree);
991 	if (frag)
992 		/* Fetch the inode length from the fragtree rather then
993 		 * from i_size since i_size may have not been updated yet */
994 		ilen = frag->ofs + frag->size;
995 	else
996 		ilen = JFFS2_F_I_SIZE(f);
997 
998 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
999 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1000 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1001 	ri.isize = cpu_to_je32(ilen);
1002 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1003 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1004 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1005 	ri.data_crc = cpu_to_je32(0);
1006 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1007 
1008 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1009 				     JFFS2_SUMMARY_INODE_SIZE);
1010 	if (ret) {
1011 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1012 		       sizeof(ri), ret);
1013 		return ret;
1014 	}
1015 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1016 
1017 	if (IS_ERR(new_fn)) {
1018 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1019 		return PTR_ERR(new_fn);
1020 	}
1021 	if (je32_to_cpu(ri.version) == f->highest_version) {
1022 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1023 		if (f->metadata) {
1024 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1025 			jffs2_free_full_dnode(f->metadata);
1026 			f->metadata = NULL;
1027 		}
1028 		return 0;
1029 	}
1030 
1031 	/*
1032 	 * We should only get here in the case where the node we are
1033 	 * replacing had more than one frag, so we kept the same version
1034 	 * number as before. (Except in case of error -- see 'goto fill;'
1035 	 * above.)
1036 	 */
1037 	D1(if(unlikely(fn->frags <= 1)) {
1038 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1039 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1040 		       je32_to_cpu(ri.ino));
1041 	});
1042 
1043 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1044 	mark_ref_normal(new_fn->raw);
1045 
1046 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1047 	     frag; frag = frag_next(frag)) {
1048 		if (frag->ofs > fn->size + fn->ofs)
1049 			break;
1050 		if (frag->node == fn) {
1051 			frag->node = new_fn;
1052 			new_fn->frags++;
1053 			fn->frags--;
1054 		}
1055 	}
1056 	if (fn->frags) {
1057 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1058 		BUG();
1059 	}
1060 	if (!new_fn->frags) {
1061 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1062 		BUG();
1063 	}
1064 
1065 	jffs2_mark_node_obsolete(c, fn->raw);
1066 	jffs2_free_full_dnode(fn);
1067 
1068 	return 0;
1069 }
1070 
1071 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1072 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1073 				       uint32_t start, uint32_t end)
1074 {
1075 	struct jffs2_full_dnode *new_fn;
1076 	struct jffs2_raw_inode ri;
1077 	uint32_t alloclen, offset, orig_end, orig_start;
1078 	int ret = 0;
1079 	unsigned char *comprbuf = NULL, *writebuf;
1080 	unsigned long pg;
1081 	unsigned char *pg_ptr;
1082 
1083 	memset(&ri, 0, sizeof(ri));
1084 
1085 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1086 		  f->inocache->ino, start, end));
1087 
1088 	orig_end = end;
1089 	orig_start = start;
1090 
1091 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1092 		/* Attempt to do some merging. But only expand to cover logically
1093 		   adjacent frags if the block containing them is already considered
1094 		   to be dirty. Otherwise we end up with GC just going round in
1095 		   circles dirtying the nodes it already wrote out, especially
1096 		   on NAND where we have small eraseblocks and hence a much higher
1097 		   chance of nodes having to be split to cross boundaries. */
1098 
1099 		struct jffs2_node_frag *frag;
1100 		uint32_t min, max;
1101 
1102 		min = start & ~(PAGE_CACHE_SIZE-1);
1103 		max = min + PAGE_CACHE_SIZE;
1104 
1105 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1106 
1107 		/* BUG_ON(!frag) but that'll happen anyway... */
1108 
1109 		BUG_ON(frag->ofs != start);
1110 
1111 		/* First grow down... */
1112 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1113 
1114 			/* If the previous frag doesn't even reach the beginning, there's
1115 			   excessive fragmentation. Just merge. */
1116 			if (frag->ofs > min) {
1117 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1118 					  frag->ofs, frag->ofs+frag->size));
1119 				start = frag->ofs;
1120 				continue;
1121 			}
1122 			/* OK. This frag holds the first byte of the page. */
1123 			if (!frag->node || !frag->node->raw) {
1124 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1125 					  frag->ofs, frag->ofs+frag->size));
1126 				break;
1127 			} else {
1128 
1129 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1130 				   in a block which is still considered clean? If so, don't obsolete it.
1131 				   If not, cover it anyway. */
1132 
1133 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1134 				struct jffs2_eraseblock *jeb;
1135 
1136 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1137 
1138 				if (jeb == c->gcblock) {
1139 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1140 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1141 					start = frag->ofs;
1142 					break;
1143 				}
1144 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1145 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1146 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1147 					break;
1148 				}
1149 
1150 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1151 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1152 				start = frag->ofs;
1153 				break;
1154 			}
1155 		}
1156 
1157 		/* ... then up */
1158 
1159 		/* Find last frag which is actually part of the node we're to GC. */
1160 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1161 
1162 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1163 
1164 			/* If the previous frag doesn't even reach the beginning, there's lots
1165 			   of fragmentation. Just merge. */
1166 			if (frag->ofs+frag->size < max) {
1167 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1168 					  frag->ofs, frag->ofs+frag->size));
1169 				end = frag->ofs + frag->size;
1170 				continue;
1171 			}
1172 
1173 			if (!frag->node || !frag->node->raw) {
1174 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1175 					  frag->ofs, frag->ofs+frag->size));
1176 				break;
1177 			} else {
1178 
1179 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1180 				   in a block which is still considered clean? If so, don't obsolete it.
1181 				   If not, cover it anyway. */
1182 
1183 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1184 				struct jffs2_eraseblock *jeb;
1185 
1186 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1187 
1188 				if (jeb == c->gcblock) {
1189 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1190 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1191 					end = frag->ofs + frag->size;
1192 					break;
1193 				}
1194 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1195 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1196 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1197 					break;
1198 				}
1199 
1200 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1201 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1202 				end = frag->ofs + frag->size;
1203 				break;
1204 			}
1205 		}
1206 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1207 			  orig_start, orig_end, start, end));
1208 
1209 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1210 		BUG_ON(end < orig_end);
1211 		BUG_ON(start > orig_start);
1212 	}
1213 
1214 	/* First, use readpage() to read the appropriate page into the page cache */
1215 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1216 	 *    triggered garbage collection in the first place?
1217 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1218 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1219 	 *    suboptimal, but at least we're correct.
1220 	 */
1221 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1222 
1223 	if (IS_ERR(pg_ptr)) {
1224 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1225 		return PTR_ERR(pg_ptr);
1226 	}
1227 
1228 	offset = start;
1229 	while(offset < orig_end) {
1230 		uint32_t datalen;
1231 		uint32_t cdatalen;
1232 		uint16_t comprtype = JFFS2_COMPR_NONE;
1233 
1234 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1235 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1236 
1237 		if (ret) {
1238 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1239 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1240 			break;
1241 		}
1242 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1243 		datalen = end - offset;
1244 
1245 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1246 
1247 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1248 
1249 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1250 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1251 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1252 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1253 
1254 		ri.ino = cpu_to_je32(f->inocache->ino);
1255 		ri.version = cpu_to_je32(++f->highest_version);
1256 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1257 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1258 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1259 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1260 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1261 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1262 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1263 		ri.offset = cpu_to_je32(offset);
1264 		ri.csize = cpu_to_je32(cdatalen);
1265 		ri.dsize = cpu_to_je32(datalen);
1266 		ri.compr = comprtype & 0xff;
1267 		ri.usercompr = (comprtype >> 8) & 0xff;
1268 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1269 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1270 
1271 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1272 
1273 		jffs2_free_comprbuf(comprbuf, writebuf);
1274 
1275 		if (IS_ERR(new_fn)) {
1276 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1277 			ret = PTR_ERR(new_fn);
1278 			break;
1279 		}
1280 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1281 		offset += datalen;
1282 		if (f->metadata) {
1283 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1284 			jffs2_free_full_dnode(f->metadata);
1285 			f->metadata = NULL;
1286 		}
1287 	}
1288 
1289 	jffs2_gc_release_page(c, pg_ptr, &pg);
1290 	return ret;
1291 }
1292