xref: /openbmc/linux/fs/jffs2/gc.c (revision ae3b6ba0)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/mtd/mtd.h>
14 #include <linux/slab.h>
15 #include <linux/pagemap.h>
16 #include <linux/crc32.h>
17 #include <linux/compiler.h>
18 #include <linux/stat.h>
19 #include "nodelist.h"
20 #include "compr.h"
21 
22 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
23 					  struct jffs2_inode_cache *ic,
24 					  struct jffs2_raw_node_ref *raw);
25 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
26 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
27 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
29 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
33 				      uint32_t start, uint32_t end);
34 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
38 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
39 
40 /* Called with erase_completion_lock held */
41 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
42 {
43 	struct jffs2_eraseblock *ret;
44 	struct list_head *nextlist = NULL;
45 	int n = jiffies % 128;
46 
47 	/* Pick an eraseblock to garbage collect next. This is where we'll
48 	   put the clever wear-levelling algorithms. Eventually.  */
49 	/* We possibly want to favour the dirtier blocks more when the
50 	   number of free blocks is low. */
51 again:
52 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54 		nextlist = &c->bad_used_list;
55 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
56 		/* Note that most of them will have gone directly to be erased.
57 		   So don't favour the erasable_list _too_ much. */
58 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59 		nextlist = &c->erasable_list;
60 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61 		/* Most of the time, pick one off the very_dirty list */
62 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63 		nextlist = &c->very_dirty_list;
64 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
65 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66 		nextlist = &c->dirty_list;
67 	} else if (!list_empty(&c->clean_list)) {
68 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69 		nextlist = &c->clean_list;
70 	} else if (!list_empty(&c->dirty_list)) {
71 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72 
73 		nextlist = &c->dirty_list;
74 	} else if (!list_empty(&c->very_dirty_list)) {
75 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76 		nextlist = &c->very_dirty_list;
77 	} else if (!list_empty(&c->erasable_list)) {
78 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79 
80 		nextlist = &c->erasable_list;
81 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
82 		/* There are blocks are wating for the wbuf sync */
83 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
84 		spin_unlock(&c->erase_completion_lock);
85 		jffs2_flush_wbuf_pad(c);
86 		spin_lock(&c->erase_completion_lock);
87 		goto again;
88 	} else {
89 		/* Eep. All were empty */
90 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91 		return NULL;
92 	}
93 
94 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95 	list_del(&ret->list);
96 	c->gcblock = ret;
97 	ret->gc_node = ret->first_node;
98 	if (!ret->gc_node) {
99 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100 		BUG();
101 	}
102 
103 	/* Have we accidentally picked a clean block with wasted space ? */
104 	if (ret->wasted_size) {
105 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106 		ret->dirty_size += ret->wasted_size;
107 		c->wasted_size -= ret->wasted_size;
108 		c->dirty_size += ret->wasted_size;
109 		ret->wasted_size = 0;
110 	}
111 
112 	return ret;
113 }
114 
115 /* jffs2_garbage_collect_pass
116  * Make a single attempt to progress GC. Move one node, and possibly
117  * start erasing one eraseblock.
118  */
119 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
120 {
121 	struct jffs2_inode_info *f;
122 	struct jffs2_inode_cache *ic;
123 	struct jffs2_eraseblock *jeb;
124 	struct jffs2_raw_node_ref *raw;
125 	uint32_t gcblock_dirty;
126 	int ret = 0, inum, nlink;
127 	int xattr = 0;
128 
129 	if (mutex_lock_interruptible(&c->alloc_sem))
130 		return -EINTR;
131 
132 	for (;;) {
133 		spin_lock(&c->erase_completion_lock);
134 		if (!c->unchecked_size)
135 			break;
136 
137 		/* We can't start doing GC yet. We haven't finished checking
138 		   the node CRCs etc. Do it now. */
139 
140 		/* checked_ino is protected by the alloc_sem */
141 		if (c->checked_ino > c->highest_ino && xattr) {
142 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 			       c->unchecked_size);
144 			jffs2_dbg_dump_block_lists_nolock(c);
145 			spin_unlock(&c->erase_completion_lock);
146 			mutex_unlock(&c->alloc_sem);
147 			return -ENOSPC;
148 		}
149 
150 		spin_unlock(&c->erase_completion_lock);
151 
152 		if (!xattr)
153 			xattr = jffs2_verify_xattr(c);
154 
155 		spin_lock(&c->inocache_lock);
156 
157 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
158 
159 		if (!ic) {
160 			spin_unlock(&c->inocache_lock);
161 			continue;
162 		}
163 
164 		if (!ic->pino_nlink) {
165 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
166 				  ic->ino));
167 			spin_unlock(&c->inocache_lock);
168 			jffs2_xattr_delete_inode(c, ic);
169 			continue;
170 		}
171 		switch(ic->state) {
172 		case INO_STATE_CHECKEDABSENT:
173 		case INO_STATE_PRESENT:
174 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
175 			spin_unlock(&c->inocache_lock);
176 			continue;
177 
178 		case INO_STATE_GC:
179 		case INO_STATE_CHECKING:
180 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
181 			spin_unlock(&c->inocache_lock);
182 			BUG();
183 
184 		case INO_STATE_READING:
185 			/* We need to wait for it to finish, lest we move on
186 			   and trigger the BUG() above while we haven't yet
187 			   finished checking all its nodes */
188 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189 			/* We need to come back again for the _same_ inode. We've
190 			 made no progress in this case, but that should be OK */
191 			c->checked_ino--;
192 
193 			mutex_unlock(&c->alloc_sem);
194 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195 			return 0;
196 
197 		default:
198 			BUG();
199 
200 		case INO_STATE_UNCHECKED:
201 			;
202 		}
203 		ic->state = INO_STATE_CHECKING;
204 		spin_unlock(&c->inocache_lock);
205 
206 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
207 
208 		ret = jffs2_do_crccheck_inode(c, ic);
209 		if (ret)
210 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211 
212 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213 		mutex_unlock(&c->alloc_sem);
214 		return ret;
215 	}
216 
217 	/* If there are any blocks which need erasing, erase them now */
218 	if (!list_empty(&c->erase_complete_list) ||
219 	    !list_empty(&c->erase_pending_list)) {
220 		spin_unlock(&c->erase_completion_lock);
221 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
222 		if (jffs2_erase_pending_blocks(c, 1)) {
223 			mutex_unlock(&c->alloc_sem);
224 			return 0;
225 		}
226 		D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
227 		spin_lock(&c->erase_completion_lock);
228 	}
229 
230 	/* First, work out which block we're garbage-collecting */
231 	jeb = c->gcblock;
232 
233 	if (!jeb)
234 		jeb = jffs2_find_gc_block(c);
235 
236 	if (!jeb) {
237 		/* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
238 		if (c->nr_erasing_blocks) {
239 			spin_unlock(&c->erase_completion_lock);
240 			mutex_unlock(&c->alloc_sem);
241 			return -EAGAIN;
242 		}
243 		D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
244 		spin_unlock(&c->erase_completion_lock);
245 		mutex_unlock(&c->alloc_sem);
246 		return -EIO;
247 	}
248 
249 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
250 	D1(if (c->nextblock)
251 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
252 
253 	if (!jeb->used_size) {
254 		mutex_unlock(&c->alloc_sem);
255 		goto eraseit;
256 	}
257 
258 	raw = jeb->gc_node;
259 	gcblock_dirty = jeb->dirty_size;
260 
261 	while(ref_obsolete(raw)) {
262 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
263 		raw = ref_next(raw);
264 		if (unlikely(!raw)) {
265 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
266 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
267 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
268 			jeb->gc_node = raw;
269 			spin_unlock(&c->erase_completion_lock);
270 			mutex_unlock(&c->alloc_sem);
271 			BUG();
272 		}
273 	}
274 	jeb->gc_node = raw;
275 
276 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
277 
278 	if (!raw->next_in_ino) {
279 		/* Inode-less node. Clean marker, snapshot or something like that */
280 		spin_unlock(&c->erase_completion_lock);
281 		if (ref_flags(raw) == REF_PRISTINE) {
282 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
283 			jffs2_garbage_collect_pristine(c, NULL, raw);
284 		} else {
285 			/* Just mark it obsolete */
286 			jffs2_mark_node_obsolete(c, raw);
287 		}
288 		mutex_unlock(&c->alloc_sem);
289 		goto eraseit_lock;
290 	}
291 
292 	ic = jffs2_raw_ref_to_ic(raw);
293 
294 #ifdef CONFIG_JFFS2_FS_XATTR
295 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
296 	 * We can decide whether this node is inode or xattr by ic->class.     */
297 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
298 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
299 		spin_unlock(&c->erase_completion_lock);
300 
301 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
302 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
303 		} else {
304 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
305 		}
306 		goto test_gcnode;
307 	}
308 #endif
309 
310 	/* We need to hold the inocache. Either the erase_completion_lock or
311 	   the inocache_lock are sufficient; we trade down since the inocache_lock
312 	   causes less contention. */
313 	spin_lock(&c->inocache_lock);
314 
315 	spin_unlock(&c->erase_completion_lock);
316 
317 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
318 
319 	/* Three possibilities:
320 	   1. Inode is already in-core. We must iget it and do proper
321 	      updating to its fragtree, etc.
322 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
323 	      inocache to prevent a read_inode(), copy the node intact.
324 	   3. Inode is not in-core, node is not pristine. We must iget()
325 	      and take the slow path.
326 	*/
327 
328 	switch(ic->state) {
329 	case INO_STATE_CHECKEDABSENT:
330 		/* It's been checked, but it's not currently in-core.
331 		   We can just copy any pristine nodes, but have
332 		   to prevent anyone else from doing read_inode() while
333 		   we're at it, so we set the state accordingly */
334 		if (ref_flags(raw) == REF_PRISTINE)
335 			ic->state = INO_STATE_GC;
336 		else {
337 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
338 				  ic->ino));
339 		}
340 		break;
341 
342 	case INO_STATE_PRESENT:
343 		/* It's in-core. GC must iget() it. */
344 		break;
345 
346 	case INO_STATE_UNCHECKED:
347 	case INO_STATE_CHECKING:
348 	case INO_STATE_GC:
349 		/* Should never happen. We should have finished checking
350 		   by the time we actually start doing any GC, and since
351 		   we're holding the alloc_sem, no other garbage collection
352 		   can happen.
353 		*/
354 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
355 		       ic->ino, ic->state);
356 		mutex_unlock(&c->alloc_sem);
357 		spin_unlock(&c->inocache_lock);
358 		BUG();
359 
360 	case INO_STATE_READING:
361 		/* Someone's currently trying to read it. We must wait for
362 		   them to finish and then go through the full iget() route
363 		   to do the GC. However, sometimes read_inode() needs to get
364 		   the alloc_sem() (for marking nodes invalid) so we must
365 		   drop the alloc_sem before sleeping. */
366 
367 		mutex_unlock(&c->alloc_sem);
368 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
369 			  ic->ino, ic->state));
370 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
371 		/* And because we dropped the alloc_sem we must start again from the
372 		   beginning. Ponder chance of livelock here -- we're returning success
373 		   without actually making any progress.
374 
375 		   Q: What are the chances that the inode is back in INO_STATE_READING
376 		   again by the time we next enter this function? And that this happens
377 		   enough times to cause a real delay?
378 
379 		   A: Small enough that I don't care :)
380 		*/
381 		return 0;
382 	}
383 
384 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
385 	   node intact, and we don't have to muck about with the fragtree etc.
386 	   because we know it's not in-core. If it _was_ in-core, we go through
387 	   all the iget() crap anyway */
388 
389 	if (ic->state == INO_STATE_GC) {
390 		spin_unlock(&c->inocache_lock);
391 
392 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
393 
394 		spin_lock(&c->inocache_lock);
395 		ic->state = INO_STATE_CHECKEDABSENT;
396 		wake_up(&c->inocache_wq);
397 
398 		if (ret != -EBADFD) {
399 			spin_unlock(&c->inocache_lock);
400 			goto test_gcnode;
401 		}
402 
403 		/* Fall through if it wanted us to, with inocache_lock held */
404 	}
405 
406 	/* Prevent the fairly unlikely race where the gcblock is
407 	   entirely obsoleted by the final close of a file which had
408 	   the only valid nodes in the block, followed by erasure,
409 	   followed by freeing of the ic because the erased block(s)
410 	   held _all_ the nodes of that inode.... never been seen but
411 	   it's vaguely possible. */
412 
413 	inum = ic->ino;
414 	nlink = ic->pino_nlink;
415 	spin_unlock(&c->inocache_lock);
416 
417 	f = jffs2_gc_fetch_inode(c, inum, !nlink);
418 	if (IS_ERR(f)) {
419 		ret = PTR_ERR(f);
420 		goto release_sem;
421 	}
422 	if (!f) {
423 		ret = 0;
424 		goto release_sem;
425 	}
426 
427 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
428 
429 	jffs2_gc_release_inode(c, f);
430 
431  test_gcnode:
432 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
433 		/* Eep. This really should never happen. GC is broken */
434 		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
435 		ret = -ENOSPC;
436 	}
437  release_sem:
438 	mutex_unlock(&c->alloc_sem);
439 
440  eraseit_lock:
441 	/* If we've finished this block, start it erasing */
442 	spin_lock(&c->erase_completion_lock);
443 
444  eraseit:
445 	if (c->gcblock && !c->gcblock->used_size) {
446 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
447 		/* We're GC'ing an empty block? */
448 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
449 		c->gcblock = NULL;
450 		c->nr_erasing_blocks++;
451 		jffs2_garbage_collect_trigger(c);
452 	}
453 	spin_unlock(&c->erase_completion_lock);
454 
455 	return ret;
456 }
457 
458 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
459 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
460 {
461 	struct jffs2_node_frag *frag;
462 	struct jffs2_full_dnode *fn = NULL;
463 	struct jffs2_full_dirent *fd;
464 	uint32_t start = 0, end = 0, nrfrags = 0;
465 	int ret = 0;
466 
467 	mutex_lock(&f->sem);
468 
469 	/* Now we have the lock for this inode. Check that it's still the one at the head
470 	   of the list. */
471 
472 	spin_lock(&c->erase_completion_lock);
473 
474 	if (c->gcblock != jeb) {
475 		spin_unlock(&c->erase_completion_lock);
476 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
477 		goto upnout;
478 	}
479 	if (ref_obsolete(raw)) {
480 		spin_unlock(&c->erase_completion_lock);
481 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
482 		/* They'll call again */
483 		goto upnout;
484 	}
485 	spin_unlock(&c->erase_completion_lock);
486 
487 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
488 	if (f->metadata && f->metadata->raw == raw) {
489 		fn = f->metadata;
490 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
491 		goto upnout;
492 	}
493 
494 	/* FIXME. Read node and do lookup? */
495 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
496 		if (frag->node && frag->node->raw == raw) {
497 			fn = frag->node;
498 			end = frag->ofs + frag->size;
499 			if (!nrfrags++)
500 				start = frag->ofs;
501 			if (nrfrags == frag->node->frags)
502 				break; /* We've found them all */
503 		}
504 	}
505 	if (fn) {
506 		if (ref_flags(raw) == REF_PRISTINE) {
507 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
508 			if (!ret) {
509 				/* Urgh. Return it sensibly. */
510 				frag->node->raw = f->inocache->nodes;
511 			}
512 			if (ret != -EBADFD)
513 				goto upnout;
514 		}
515 		/* We found a datanode. Do the GC */
516 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
517 			/* It crosses a page boundary. Therefore, it must be a hole. */
518 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
519 		} else {
520 			/* It could still be a hole. But we GC the page this way anyway */
521 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
522 		}
523 		goto upnout;
524 	}
525 
526 	/* Wasn't a dnode. Try dirent */
527 	for (fd = f->dents; fd; fd=fd->next) {
528 		if (fd->raw == raw)
529 			break;
530 	}
531 
532 	if (fd && fd->ino) {
533 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
534 	} else if (fd) {
535 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
536 	} else {
537 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
538 		       ref_offset(raw), f->inocache->ino);
539 		if (ref_obsolete(raw)) {
540 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
541 		} else {
542 			jffs2_dbg_dump_node(c, ref_offset(raw));
543 			BUG();
544 		}
545 	}
546  upnout:
547 	mutex_unlock(&f->sem);
548 
549 	return ret;
550 }
551 
552 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
553 					  struct jffs2_inode_cache *ic,
554 					  struct jffs2_raw_node_ref *raw)
555 {
556 	union jffs2_node_union *node;
557 	size_t retlen;
558 	int ret;
559 	uint32_t phys_ofs, alloclen;
560 	uint32_t crc, rawlen;
561 	int retried = 0;
562 
563 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
564 
565 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
566 
567 	/* Ask for a small amount of space (or the totlen if smaller) because we
568 	   don't want to force wastage of the end of a block if splitting would
569 	   work. */
570 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
571 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
572 
573 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
574 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
575 
576 	if (ret)
577 		return ret;
578 
579 	if (alloclen < rawlen) {
580 		/* Doesn't fit untouched. We'll go the old route and split it */
581 		return -EBADFD;
582 	}
583 
584 	node = kmalloc(rawlen, GFP_KERNEL);
585 	if (!node)
586 		return -ENOMEM;
587 
588 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
589 	if (!ret && retlen != rawlen)
590 		ret = -EIO;
591 	if (ret)
592 		goto out_node;
593 
594 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
595 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
596 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
597 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
598 		goto bail;
599 	}
600 
601 	switch(je16_to_cpu(node->u.nodetype)) {
602 	case JFFS2_NODETYPE_INODE:
603 		crc = crc32(0, node, sizeof(node->i)-8);
604 		if (je32_to_cpu(node->i.node_crc) != crc) {
605 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
606 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
607 			goto bail;
608 		}
609 
610 		if (je32_to_cpu(node->i.dsize)) {
611 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
612 			if (je32_to_cpu(node->i.data_crc) != crc) {
613 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
614 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
615 				goto bail;
616 			}
617 		}
618 		break;
619 
620 	case JFFS2_NODETYPE_DIRENT:
621 		crc = crc32(0, node, sizeof(node->d)-8);
622 		if (je32_to_cpu(node->d.node_crc) != crc) {
623 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
624 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
625 			goto bail;
626 		}
627 
628 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
629 			printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
630 			goto bail;
631 		}
632 
633 		if (node->d.nsize) {
634 			crc = crc32(0, node->d.name, node->d.nsize);
635 			if (je32_to_cpu(node->d.name_crc) != crc) {
636 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
637 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
638 				goto bail;
639 			}
640 		}
641 		break;
642 	default:
643 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
644 		if (ic) {
645 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
646 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
647 			goto bail;
648 		}
649 	}
650 
651 	/* OK, all the CRCs are good; this node can just be copied as-is. */
652  retry:
653 	phys_ofs = write_ofs(c);
654 
655 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
656 
657 	if (ret || (retlen != rawlen)) {
658 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
659 		       rawlen, phys_ofs, ret, retlen);
660 		if (retlen) {
661 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
662 		} else {
663 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
664 		}
665 		if (!retried) {
666 			/* Try to reallocate space and retry */
667 			uint32_t dummy;
668 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
669 
670 			retried = 1;
671 
672 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
673 
674 			jffs2_dbg_acct_sanity_check(c,jeb);
675 			jffs2_dbg_acct_paranoia_check(c, jeb);
676 
677 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
678 						/* this is not the exact summary size of it,
679 							it is only an upper estimation */
680 
681 			if (!ret) {
682 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
683 
684 				jffs2_dbg_acct_sanity_check(c,jeb);
685 				jffs2_dbg_acct_paranoia_check(c, jeb);
686 
687 				goto retry;
688 			}
689 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
690 		}
691 
692 		if (!ret)
693 			ret = -EIO;
694 		goto out_node;
695 	}
696 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
697 
698 	jffs2_mark_node_obsolete(c, raw);
699 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
700 
701  out_node:
702 	kfree(node);
703 	return ret;
704  bail:
705 	ret = -EBADFD;
706 	goto out_node;
707 }
708 
709 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
710 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
711 {
712 	struct jffs2_full_dnode *new_fn;
713 	struct jffs2_raw_inode ri;
714 	struct jffs2_node_frag *last_frag;
715 	union jffs2_device_node dev;
716 	char *mdata = NULL;
717 	int mdatalen = 0;
718 	uint32_t alloclen, ilen;
719 	int ret;
720 
721 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
722 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
723 		/* For these, we don't actually need to read the old node */
724 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
725 		mdata = (char *)&dev;
726 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
727 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
728 		mdatalen = fn->size;
729 		mdata = kmalloc(fn->size, GFP_KERNEL);
730 		if (!mdata) {
731 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
732 			return -ENOMEM;
733 		}
734 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
735 		if (ret) {
736 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
737 			kfree(mdata);
738 			return ret;
739 		}
740 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
741 
742 	}
743 
744 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
745 				JFFS2_SUMMARY_INODE_SIZE);
746 	if (ret) {
747 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
748 		       sizeof(ri)+ mdatalen, ret);
749 		goto out;
750 	}
751 
752 	last_frag = frag_last(&f->fragtree);
753 	if (last_frag)
754 		/* Fetch the inode length from the fragtree rather then
755 		 * from i_size since i_size may have not been updated yet */
756 		ilen = last_frag->ofs + last_frag->size;
757 	else
758 		ilen = JFFS2_F_I_SIZE(f);
759 
760 	memset(&ri, 0, sizeof(ri));
761 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
762 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
763 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
764 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
765 
766 	ri.ino = cpu_to_je32(f->inocache->ino);
767 	ri.version = cpu_to_je32(++f->highest_version);
768 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
769 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
770 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
771 	ri.isize = cpu_to_je32(ilen);
772 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
773 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
774 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
775 	ri.offset = cpu_to_je32(0);
776 	ri.csize = cpu_to_je32(mdatalen);
777 	ri.dsize = cpu_to_je32(mdatalen);
778 	ri.compr = JFFS2_COMPR_NONE;
779 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
780 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
781 
782 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
783 
784 	if (IS_ERR(new_fn)) {
785 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
786 		ret = PTR_ERR(new_fn);
787 		goto out;
788 	}
789 	jffs2_mark_node_obsolete(c, fn->raw);
790 	jffs2_free_full_dnode(fn);
791 	f->metadata = new_fn;
792  out:
793 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
794 		kfree(mdata);
795 	return ret;
796 }
797 
798 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
799 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
800 {
801 	struct jffs2_full_dirent *new_fd;
802 	struct jffs2_raw_dirent rd;
803 	uint32_t alloclen;
804 	int ret;
805 
806 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
807 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
808 	rd.nsize = strlen(fd->name);
809 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
810 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
811 
812 	rd.pino = cpu_to_je32(f->inocache->ino);
813 	rd.version = cpu_to_je32(++f->highest_version);
814 	rd.ino = cpu_to_je32(fd->ino);
815 	/* If the times on this inode were set by explicit utime() they can be different,
816 	   so refrain from splatting them. */
817 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
818 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
819 	else
820 		rd.mctime = cpu_to_je32(0);
821 	rd.type = fd->type;
822 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
823 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
824 
825 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
826 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
827 	if (ret) {
828 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
829 		       sizeof(rd)+rd.nsize, ret);
830 		return ret;
831 	}
832 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
833 
834 	if (IS_ERR(new_fd)) {
835 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
836 		return PTR_ERR(new_fd);
837 	}
838 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
839 	return 0;
840 }
841 
842 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
843 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
844 {
845 	struct jffs2_full_dirent **fdp = &f->dents;
846 	int found = 0;
847 
848 	/* On a medium where we can't actually mark nodes obsolete
849 	   pernamently, such as NAND flash, we need to work out
850 	   whether this deletion dirent is still needed to actively
851 	   delete a 'real' dirent with the same name that's still
852 	   somewhere else on the flash. */
853 	if (!jffs2_can_mark_obsolete(c)) {
854 		struct jffs2_raw_dirent *rd;
855 		struct jffs2_raw_node_ref *raw;
856 		int ret;
857 		size_t retlen;
858 		int name_len = strlen(fd->name);
859 		uint32_t name_crc = crc32(0, fd->name, name_len);
860 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
861 
862 		rd = kmalloc(rawlen, GFP_KERNEL);
863 		if (!rd)
864 			return -ENOMEM;
865 
866 		/* Prevent the erase code from nicking the obsolete node refs while
867 		   we're looking at them. I really don't like this extra lock but
868 		   can't see any alternative. Suggestions on a postcard to... */
869 		mutex_lock(&c->erase_free_sem);
870 
871 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
872 
873 			cond_resched();
874 
875 			/* We only care about obsolete ones */
876 			if (!(ref_obsolete(raw)))
877 				continue;
878 
879 			/* Any dirent with the same name is going to have the same length... */
880 			if (ref_totlen(c, NULL, raw) != rawlen)
881 				continue;
882 
883 			/* Doesn't matter if there's one in the same erase block. We're going to
884 			   delete it too at the same time. */
885 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
886 				continue;
887 
888 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
889 
890 			/* This is an obsolete node belonging to the same directory, and it's of the right
891 			   length. We need to take a closer look...*/
892 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
893 			if (ret) {
894 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
895 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
896 				continue;
897 			}
898 			if (retlen != rawlen) {
899 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
900 				       retlen, rawlen, ref_offset(raw));
901 				continue;
902 			}
903 
904 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
905 				continue;
906 
907 			/* If the name CRC doesn't match, skip */
908 			if (je32_to_cpu(rd->name_crc) != name_crc)
909 				continue;
910 
911 			/* If the name length doesn't match, or it's another deletion dirent, skip */
912 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
913 				continue;
914 
915 			/* OK, check the actual name now */
916 			if (memcmp(rd->name, fd->name, name_len))
917 				continue;
918 
919 			/* OK. The name really does match. There really is still an older node on
920 			   the flash which our deletion dirent obsoletes. So we have to write out
921 			   a new deletion dirent to replace it */
922 			mutex_unlock(&c->erase_free_sem);
923 
924 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
925 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
926 			kfree(rd);
927 
928 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
929 		}
930 
931 		mutex_unlock(&c->erase_free_sem);
932 		kfree(rd);
933 	}
934 
935 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
936 	   we should update the metadata node with those times accordingly */
937 
938 	/* No need for it any more. Just mark it obsolete and remove it from the list */
939 	while (*fdp) {
940 		if ((*fdp) == fd) {
941 			found = 1;
942 			*fdp = fd->next;
943 			break;
944 		}
945 		fdp = &(*fdp)->next;
946 	}
947 	if (!found) {
948 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
949 	}
950 	jffs2_mark_node_obsolete(c, fd->raw);
951 	jffs2_free_full_dirent(fd);
952 	return 0;
953 }
954 
955 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
956 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
957 				      uint32_t start, uint32_t end)
958 {
959 	struct jffs2_raw_inode ri;
960 	struct jffs2_node_frag *frag;
961 	struct jffs2_full_dnode *new_fn;
962 	uint32_t alloclen, ilen;
963 	int ret;
964 
965 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
966 		  f->inocache->ino, start, end));
967 
968 	memset(&ri, 0, sizeof(ri));
969 
970 	if(fn->frags > 1) {
971 		size_t readlen;
972 		uint32_t crc;
973 		/* It's partially obsoleted by a later write. So we have to
974 		   write it out again with the _same_ version as before */
975 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
976 		if (readlen != sizeof(ri) || ret) {
977 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
978 			goto fill;
979 		}
980 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
981 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
982 			       ref_offset(fn->raw),
983 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
984 			return -EIO;
985 		}
986 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
987 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
988 			       ref_offset(fn->raw),
989 			       je32_to_cpu(ri.totlen), sizeof(ri));
990 			return -EIO;
991 		}
992 		crc = crc32(0, &ri, sizeof(ri)-8);
993 		if (crc != je32_to_cpu(ri.node_crc)) {
994 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
995 			       ref_offset(fn->raw),
996 			       je32_to_cpu(ri.node_crc), crc);
997 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
998 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
999 			       start, end, f->inocache->ino);
1000 			goto fill;
1001 		}
1002 		if (ri.compr != JFFS2_COMPR_ZERO) {
1003 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1004 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1005 			       start, end, f->inocache->ino);
1006 			goto fill;
1007 		}
1008 	} else {
1009 	fill:
1010 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1011 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1012 		ri.totlen = cpu_to_je32(sizeof(ri));
1013 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1014 
1015 		ri.ino = cpu_to_je32(f->inocache->ino);
1016 		ri.version = cpu_to_je32(++f->highest_version);
1017 		ri.offset = cpu_to_je32(start);
1018 		ri.dsize = cpu_to_je32(end - start);
1019 		ri.csize = cpu_to_je32(0);
1020 		ri.compr = JFFS2_COMPR_ZERO;
1021 	}
1022 
1023 	frag = frag_last(&f->fragtree);
1024 	if (frag)
1025 		/* Fetch the inode length from the fragtree rather then
1026 		 * from i_size since i_size may have not been updated yet */
1027 		ilen = frag->ofs + frag->size;
1028 	else
1029 		ilen = JFFS2_F_I_SIZE(f);
1030 
1031 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1032 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1033 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1034 	ri.isize = cpu_to_je32(ilen);
1035 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1036 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1037 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1038 	ri.data_crc = cpu_to_je32(0);
1039 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1040 
1041 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1042 				     JFFS2_SUMMARY_INODE_SIZE);
1043 	if (ret) {
1044 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1045 		       sizeof(ri), ret);
1046 		return ret;
1047 	}
1048 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1049 
1050 	if (IS_ERR(new_fn)) {
1051 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1052 		return PTR_ERR(new_fn);
1053 	}
1054 	if (je32_to_cpu(ri.version) == f->highest_version) {
1055 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1056 		if (f->metadata) {
1057 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1058 			jffs2_free_full_dnode(f->metadata);
1059 			f->metadata = NULL;
1060 		}
1061 		return 0;
1062 	}
1063 
1064 	/*
1065 	 * We should only get here in the case where the node we are
1066 	 * replacing had more than one frag, so we kept the same version
1067 	 * number as before. (Except in case of error -- see 'goto fill;'
1068 	 * above.)
1069 	 */
1070 	D1(if(unlikely(fn->frags <= 1)) {
1071 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1072 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1073 		       je32_to_cpu(ri.ino));
1074 	});
1075 
1076 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1077 	mark_ref_normal(new_fn->raw);
1078 
1079 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1080 	     frag; frag = frag_next(frag)) {
1081 		if (frag->ofs > fn->size + fn->ofs)
1082 			break;
1083 		if (frag->node == fn) {
1084 			frag->node = new_fn;
1085 			new_fn->frags++;
1086 			fn->frags--;
1087 		}
1088 	}
1089 	if (fn->frags) {
1090 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1091 		BUG();
1092 	}
1093 	if (!new_fn->frags) {
1094 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1095 		BUG();
1096 	}
1097 
1098 	jffs2_mark_node_obsolete(c, fn->raw);
1099 	jffs2_free_full_dnode(fn);
1100 
1101 	return 0;
1102 }
1103 
1104 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1105 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1106 				       uint32_t start, uint32_t end)
1107 {
1108 	struct jffs2_full_dnode *new_fn;
1109 	struct jffs2_raw_inode ri;
1110 	uint32_t alloclen, offset, orig_end, orig_start;
1111 	int ret = 0;
1112 	unsigned char *comprbuf = NULL, *writebuf;
1113 	unsigned long pg;
1114 	unsigned char *pg_ptr;
1115 
1116 	memset(&ri, 0, sizeof(ri));
1117 
1118 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1119 		  f->inocache->ino, start, end));
1120 
1121 	orig_end = end;
1122 	orig_start = start;
1123 
1124 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1125 		/* Attempt to do some merging. But only expand to cover logically
1126 		   adjacent frags if the block containing them is already considered
1127 		   to be dirty. Otherwise we end up with GC just going round in
1128 		   circles dirtying the nodes it already wrote out, especially
1129 		   on NAND where we have small eraseblocks and hence a much higher
1130 		   chance of nodes having to be split to cross boundaries. */
1131 
1132 		struct jffs2_node_frag *frag;
1133 		uint32_t min, max;
1134 
1135 		min = start & ~(PAGE_CACHE_SIZE-1);
1136 		max = min + PAGE_CACHE_SIZE;
1137 
1138 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1139 
1140 		/* BUG_ON(!frag) but that'll happen anyway... */
1141 
1142 		BUG_ON(frag->ofs != start);
1143 
1144 		/* First grow down... */
1145 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1146 
1147 			/* If the previous frag doesn't even reach the beginning, there's
1148 			   excessive fragmentation. Just merge. */
1149 			if (frag->ofs > min) {
1150 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1151 					  frag->ofs, frag->ofs+frag->size));
1152 				start = frag->ofs;
1153 				continue;
1154 			}
1155 			/* OK. This frag holds the first byte of the page. */
1156 			if (!frag->node || !frag->node->raw) {
1157 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1158 					  frag->ofs, frag->ofs+frag->size));
1159 				break;
1160 			} else {
1161 
1162 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1163 				   in a block which is still considered clean? If so, don't obsolete it.
1164 				   If not, cover it anyway. */
1165 
1166 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1167 				struct jffs2_eraseblock *jeb;
1168 
1169 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1170 
1171 				if (jeb == c->gcblock) {
1172 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1173 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1174 					start = frag->ofs;
1175 					break;
1176 				}
1177 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1178 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1179 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1180 					break;
1181 				}
1182 
1183 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1184 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1185 				start = frag->ofs;
1186 				break;
1187 			}
1188 		}
1189 
1190 		/* ... then up */
1191 
1192 		/* Find last frag which is actually part of the node we're to GC. */
1193 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1194 
1195 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1196 
1197 			/* If the previous frag doesn't even reach the beginning, there's lots
1198 			   of fragmentation. Just merge. */
1199 			if (frag->ofs+frag->size < max) {
1200 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1201 					  frag->ofs, frag->ofs+frag->size));
1202 				end = frag->ofs + frag->size;
1203 				continue;
1204 			}
1205 
1206 			if (!frag->node || !frag->node->raw) {
1207 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1208 					  frag->ofs, frag->ofs+frag->size));
1209 				break;
1210 			} else {
1211 
1212 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1213 				   in a block which is still considered clean? If so, don't obsolete it.
1214 				   If not, cover it anyway. */
1215 
1216 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1217 				struct jffs2_eraseblock *jeb;
1218 
1219 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1220 
1221 				if (jeb == c->gcblock) {
1222 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1223 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1224 					end = frag->ofs + frag->size;
1225 					break;
1226 				}
1227 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1228 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1229 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1230 					break;
1231 				}
1232 
1233 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1234 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1235 				end = frag->ofs + frag->size;
1236 				break;
1237 			}
1238 		}
1239 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1240 			  orig_start, orig_end, start, end));
1241 
1242 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1243 		BUG_ON(end < orig_end);
1244 		BUG_ON(start > orig_start);
1245 	}
1246 
1247 	/* First, use readpage() to read the appropriate page into the page cache */
1248 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1249 	 *    triggered garbage collection in the first place?
1250 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1251 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1252 	 *    suboptimal, but at least we're correct.
1253 	 */
1254 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1255 
1256 	if (IS_ERR(pg_ptr)) {
1257 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1258 		return PTR_ERR(pg_ptr);
1259 	}
1260 
1261 	offset = start;
1262 	while(offset < orig_end) {
1263 		uint32_t datalen;
1264 		uint32_t cdatalen;
1265 		uint16_t comprtype = JFFS2_COMPR_NONE;
1266 
1267 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1268 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1269 
1270 		if (ret) {
1271 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1272 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1273 			break;
1274 		}
1275 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1276 		datalen = end - offset;
1277 
1278 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1279 
1280 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1281 
1282 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1283 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1284 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1285 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1286 
1287 		ri.ino = cpu_to_je32(f->inocache->ino);
1288 		ri.version = cpu_to_je32(++f->highest_version);
1289 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1290 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1291 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1292 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1293 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1294 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1295 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1296 		ri.offset = cpu_to_je32(offset);
1297 		ri.csize = cpu_to_je32(cdatalen);
1298 		ri.dsize = cpu_to_je32(datalen);
1299 		ri.compr = comprtype & 0xff;
1300 		ri.usercompr = (comprtype >> 8) & 0xff;
1301 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1302 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1303 
1304 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1305 
1306 		jffs2_free_comprbuf(comprbuf, writebuf);
1307 
1308 		if (IS_ERR(new_fn)) {
1309 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1310 			ret = PTR_ERR(new_fn);
1311 			break;
1312 		}
1313 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1314 		offset += datalen;
1315 		if (f->metadata) {
1316 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1317 			jffs2_free_full_dnode(f->metadata);
1318 			f->metadata = NULL;
1319 		}
1320 	}
1321 
1322 	jffs2_gc_release_page(c, pg_ptr, &pg);
1323 	return ret;
1324 }
1325