xref: /openbmc/linux/fs/jffs2/gc.c (revision b534e70c)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/mtd/mtd.h>
14 #include <linux/slab.h>
15 #include <linux/pagemap.h>
16 #include <linux/crc32.h>
17 #include <linux/compiler.h>
18 #include <linux/stat.h>
19 #include "nodelist.h"
20 #include "compr.h"
21 
22 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
23 					  struct jffs2_inode_cache *ic,
24 					  struct jffs2_raw_node_ref *raw);
25 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
26 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
27 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
29 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
33 				      uint32_t start, uint32_t end);
34 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
38 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
39 
40 /* Called with erase_completion_lock held */
41 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
42 {
43 	struct jffs2_eraseblock *ret;
44 	struct list_head *nextlist = NULL;
45 	int n = jiffies % 128;
46 
47 	/* Pick an eraseblock to garbage collect next. This is where we'll
48 	   put the clever wear-levelling algorithms. Eventually.  */
49 	/* We possibly want to favour the dirtier blocks more when the
50 	   number of free blocks is low. */
51 again:
52 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54 		nextlist = &c->bad_used_list;
55 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
56 		/* Note that most of them will have gone directly to be erased.
57 		   So don't favour the erasable_list _too_ much. */
58 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59 		nextlist = &c->erasable_list;
60 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61 		/* Most of the time, pick one off the very_dirty list */
62 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63 		nextlist = &c->very_dirty_list;
64 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
65 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66 		nextlist = &c->dirty_list;
67 	} else if (!list_empty(&c->clean_list)) {
68 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69 		nextlist = &c->clean_list;
70 	} else if (!list_empty(&c->dirty_list)) {
71 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72 
73 		nextlist = &c->dirty_list;
74 	} else if (!list_empty(&c->very_dirty_list)) {
75 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76 		nextlist = &c->very_dirty_list;
77 	} else if (!list_empty(&c->erasable_list)) {
78 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79 
80 		nextlist = &c->erasable_list;
81 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
82 		/* There are blocks are wating for the wbuf sync */
83 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
84 		spin_unlock(&c->erase_completion_lock);
85 		jffs2_flush_wbuf_pad(c);
86 		spin_lock(&c->erase_completion_lock);
87 		goto again;
88 	} else {
89 		/* Eep. All were empty */
90 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91 		return NULL;
92 	}
93 
94 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95 	list_del(&ret->list);
96 	c->gcblock = ret;
97 	ret->gc_node = ret->first_node;
98 	if (!ret->gc_node) {
99 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100 		BUG();
101 	}
102 
103 	/* Have we accidentally picked a clean block with wasted space ? */
104 	if (ret->wasted_size) {
105 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106 		ret->dirty_size += ret->wasted_size;
107 		c->wasted_size -= ret->wasted_size;
108 		c->dirty_size += ret->wasted_size;
109 		ret->wasted_size = 0;
110 	}
111 
112 	return ret;
113 }
114 
115 /* jffs2_garbage_collect_pass
116  * Make a single attempt to progress GC. Move one node, and possibly
117  * start erasing one eraseblock.
118  */
119 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
120 {
121 	struct jffs2_inode_info *f;
122 	struct jffs2_inode_cache *ic;
123 	struct jffs2_eraseblock *jeb;
124 	struct jffs2_raw_node_ref *raw;
125 	uint32_t gcblock_dirty;
126 	int ret = 0, inum, nlink;
127 	int xattr = 0;
128 
129 	if (down_interruptible(&c->alloc_sem))
130 		return -EINTR;
131 
132 	for (;;) {
133 		spin_lock(&c->erase_completion_lock);
134 		if (!c->unchecked_size)
135 			break;
136 
137 		/* We can't start doing GC yet. We haven't finished checking
138 		   the node CRCs etc. Do it now. */
139 
140 		/* checked_ino is protected by the alloc_sem */
141 		if (c->checked_ino > c->highest_ino && xattr) {
142 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 			       c->unchecked_size);
144 			jffs2_dbg_dump_block_lists_nolock(c);
145 			spin_unlock(&c->erase_completion_lock);
146 			up(&c->alloc_sem);
147 			return -ENOSPC;
148 		}
149 
150 		spin_unlock(&c->erase_completion_lock);
151 
152 		if (!xattr)
153 			xattr = jffs2_verify_xattr(c);
154 
155 		spin_lock(&c->inocache_lock);
156 
157 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
158 
159 		if (!ic) {
160 			spin_unlock(&c->inocache_lock);
161 			continue;
162 		}
163 
164 		if (!ic->nlink) {
165 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 				  ic->ino));
167 			spin_unlock(&c->inocache_lock);
168 			jffs2_xattr_delete_inode(c, ic);
169 			continue;
170 		}
171 		switch(ic->state) {
172 		case INO_STATE_CHECKEDABSENT:
173 		case INO_STATE_PRESENT:
174 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
175 			spin_unlock(&c->inocache_lock);
176 			continue;
177 
178 		case INO_STATE_GC:
179 		case INO_STATE_CHECKING:
180 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
181 			spin_unlock(&c->inocache_lock);
182 			BUG();
183 
184 		case INO_STATE_READING:
185 			/* We need to wait for it to finish, lest we move on
186 			   and trigger the BUG() above while we haven't yet
187 			   finished checking all its nodes */
188 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189 			/* We need to come back again for the _same_ inode. We've
190 			 made no progress in this case, but that should be OK */
191 			c->checked_ino--;
192 
193 			up(&c->alloc_sem);
194 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195 			return 0;
196 
197 		default:
198 			BUG();
199 
200 		case INO_STATE_UNCHECKED:
201 			;
202 		}
203 		ic->state = INO_STATE_CHECKING;
204 		spin_unlock(&c->inocache_lock);
205 
206 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
207 
208 		ret = jffs2_do_crccheck_inode(c, ic);
209 		if (ret)
210 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211 
212 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213 		up(&c->alloc_sem);
214 		return ret;
215 	}
216 
217 	/* First, work out which block we're garbage-collecting */
218 	jeb = c->gcblock;
219 
220 	if (!jeb)
221 		jeb = jffs2_find_gc_block(c);
222 
223 	if (!jeb) {
224 		D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
225 		spin_unlock(&c->erase_completion_lock);
226 		up(&c->alloc_sem);
227 		return -EIO;
228 	}
229 
230 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
231 	D1(if (c->nextblock)
232 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
233 
234 	if (!jeb->used_size) {
235 		up(&c->alloc_sem);
236 		goto eraseit;
237 	}
238 
239 	raw = jeb->gc_node;
240 	gcblock_dirty = jeb->dirty_size;
241 
242 	while(ref_obsolete(raw)) {
243 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
244 		raw = ref_next(raw);
245 		if (unlikely(!raw)) {
246 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
247 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
248 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
249 			jeb->gc_node = raw;
250 			spin_unlock(&c->erase_completion_lock);
251 			up(&c->alloc_sem);
252 			BUG();
253 		}
254 	}
255 	jeb->gc_node = raw;
256 
257 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
258 
259 	if (!raw->next_in_ino) {
260 		/* Inode-less node. Clean marker, snapshot or something like that */
261 		spin_unlock(&c->erase_completion_lock);
262 		if (ref_flags(raw) == REF_PRISTINE) {
263 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
264 			jffs2_garbage_collect_pristine(c, NULL, raw);
265 		} else {
266 			/* Just mark it obsolete */
267 			jffs2_mark_node_obsolete(c, raw);
268 		}
269 		up(&c->alloc_sem);
270 		goto eraseit_lock;
271 	}
272 
273 	ic = jffs2_raw_ref_to_ic(raw);
274 
275 #ifdef CONFIG_JFFS2_FS_XATTR
276 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
277 	 * We can decide whether this node is inode or xattr by ic->class.     */
278 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
279 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
280 		spin_unlock(&c->erase_completion_lock);
281 
282 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
283 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
284 		} else {
285 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
286 		}
287 		goto test_gcnode;
288 	}
289 #endif
290 
291 	/* We need to hold the inocache. Either the erase_completion_lock or
292 	   the inocache_lock are sufficient; we trade down since the inocache_lock
293 	   causes less contention. */
294 	spin_lock(&c->inocache_lock);
295 
296 	spin_unlock(&c->erase_completion_lock);
297 
298 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
299 
300 	/* Three possibilities:
301 	   1. Inode is already in-core. We must iget it and do proper
302 	      updating to its fragtree, etc.
303 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
304 	      inocache to prevent a read_inode(), copy the node intact.
305 	   3. Inode is not in-core, node is not pristine. We must iget()
306 	      and take the slow path.
307 	*/
308 
309 	switch(ic->state) {
310 	case INO_STATE_CHECKEDABSENT:
311 		/* It's been checked, but it's not currently in-core.
312 		   We can just copy any pristine nodes, but have
313 		   to prevent anyone else from doing read_inode() while
314 		   we're at it, so we set the state accordingly */
315 		if (ref_flags(raw) == REF_PRISTINE)
316 			ic->state = INO_STATE_GC;
317 		else {
318 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
319 				  ic->ino));
320 		}
321 		break;
322 
323 	case INO_STATE_PRESENT:
324 		/* It's in-core. GC must iget() it. */
325 		break;
326 
327 	case INO_STATE_UNCHECKED:
328 	case INO_STATE_CHECKING:
329 	case INO_STATE_GC:
330 		/* Should never happen. We should have finished checking
331 		   by the time we actually start doing any GC, and since
332 		   we're holding the alloc_sem, no other garbage collection
333 		   can happen.
334 		*/
335 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
336 		       ic->ino, ic->state);
337 		up(&c->alloc_sem);
338 		spin_unlock(&c->inocache_lock);
339 		BUG();
340 
341 	case INO_STATE_READING:
342 		/* Someone's currently trying to read it. We must wait for
343 		   them to finish and then go through the full iget() route
344 		   to do the GC. However, sometimes read_inode() needs to get
345 		   the alloc_sem() (for marking nodes invalid) so we must
346 		   drop the alloc_sem before sleeping. */
347 
348 		up(&c->alloc_sem);
349 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
350 			  ic->ino, ic->state));
351 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
352 		/* And because we dropped the alloc_sem we must start again from the
353 		   beginning. Ponder chance of livelock here -- we're returning success
354 		   without actually making any progress.
355 
356 		   Q: What are the chances that the inode is back in INO_STATE_READING
357 		   again by the time we next enter this function? And that this happens
358 		   enough times to cause a real delay?
359 
360 		   A: Small enough that I don't care :)
361 		*/
362 		return 0;
363 	}
364 
365 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
366 	   node intact, and we don't have to muck about with the fragtree etc.
367 	   because we know it's not in-core. If it _was_ in-core, we go through
368 	   all the iget() crap anyway */
369 
370 	if (ic->state == INO_STATE_GC) {
371 		spin_unlock(&c->inocache_lock);
372 
373 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
374 
375 		spin_lock(&c->inocache_lock);
376 		ic->state = INO_STATE_CHECKEDABSENT;
377 		wake_up(&c->inocache_wq);
378 
379 		if (ret != -EBADFD) {
380 			spin_unlock(&c->inocache_lock);
381 			goto test_gcnode;
382 		}
383 
384 		/* Fall through if it wanted us to, with inocache_lock held */
385 	}
386 
387 	/* Prevent the fairly unlikely race where the gcblock is
388 	   entirely obsoleted by the final close of a file which had
389 	   the only valid nodes in the block, followed by erasure,
390 	   followed by freeing of the ic because the erased block(s)
391 	   held _all_ the nodes of that inode.... never been seen but
392 	   it's vaguely possible. */
393 
394 	inum = ic->ino;
395 	nlink = ic->nlink;
396 	spin_unlock(&c->inocache_lock);
397 
398 	f = jffs2_gc_fetch_inode(c, inum, nlink);
399 	if (IS_ERR(f)) {
400 		ret = PTR_ERR(f);
401 		goto release_sem;
402 	}
403 	if (!f) {
404 		ret = 0;
405 		goto release_sem;
406 	}
407 
408 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
409 
410 	jffs2_gc_release_inode(c, f);
411 
412  test_gcnode:
413 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
414 		/* Eep. This really should never happen. GC is broken */
415 		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
416 		ret = -ENOSPC;
417 	} else if (ref_offset(jeb->gc_node) == 0x1c616bdc)
418 		printk(KERN_ERR "Wheee. Correctly GC'd node at %08x\n", ref_offset(jeb->gc_node));
419 
420  release_sem:
421 	up(&c->alloc_sem);
422 
423  eraseit_lock:
424 	/* If we've finished this block, start it erasing */
425 	spin_lock(&c->erase_completion_lock);
426 
427  eraseit:
428 	if (c->gcblock && !c->gcblock->used_size) {
429 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
430 		/* We're GC'ing an empty block? */
431 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
432 		c->gcblock = NULL;
433 		c->nr_erasing_blocks++;
434 		jffs2_erase_pending_trigger(c);
435 	}
436 	spin_unlock(&c->erase_completion_lock);
437 
438 	return ret;
439 }
440 
441 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
442 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
443 {
444 	struct jffs2_node_frag *frag;
445 	struct jffs2_full_dnode *fn = NULL;
446 	struct jffs2_full_dirent *fd;
447 	uint32_t start = 0, end = 0, nrfrags = 0;
448 	int ret = 0;
449 
450 	down(&f->sem);
451 
452 	/* Now we have the lock for this inode. Check that it's still the one at the head
453 	   of the list. */
454 
455 	spin_lock(&c->erase_completion_lock);
456 
457 	if (c->gcblock != jeb) {
458 		spin_unlock(&c->erase_completion_lock);
459 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
460 		goto upnout;
461 	}
462 	if (ref_obsolete(raw)) {
463 		spin_unlock(&c->erase_completion_lock);
464 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
465 		/* They'll call again */
466 		goto upnout;
467 	}
468 	spin_unlock(&c->erase_completion_lock);
469 
470 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
471 	if (f->metadata && f->metadata->raw == raw) {
472 		fn = f->metadata;
473 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
474 		goto upnout;
475 	}
476 
477 	/* FIXME. Read node and do lookup? */
478 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
479 		if (frag->node && frag->node->raw == raw) {
480 			fn = frag->node;
481 			end = frag->ofs + frag->size;
482 			if (!nrfrags++)
483 				start = frag->ofs;
484 			if (nrfrags == frag->node->frags)
485 				break; /* We've found them all */
486 		}
487 	}
488 	if (fn) {
489 		if (ref_flags(raw) == REF_PRISTINE) {
490 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
491 			if (!ret) {
492 				/* Urgh. Return it sensibly. */
493 				frag->node->raw = f->inocache->nodes;
494 			}
495 			if (ret != -EBADFD)
496 				goto upnout;
497 		}
498 		/* We found a datanode. Do the GC */
499 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
500 			/* It crosses a page boundary. Therefore, it must be a hole. */
501 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
502 		} else {
503 			/* It could still be a hole. But we GC the page this way anyway */
504 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
505 		}
506 		goto upnout;
507 	}
508 
509 	/* Wasn't a dnode. Try dirent */
510 	for (fd = f->dents; fd; fd=fd->next) {
511 		if (fd->raw == raw)
512 			break;
513 	}
514 
515 	if (fd && fd->ino) {
516 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
517 	} else if (fd) {
518 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
519 	} else {
520 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
521 		       ref_offset(raw), f->inocache->ino);
522 		if (ref_obsolete(raw)) {
523 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
524 		} else {
525 			jffs2_dbg_dump_node(c, ref_offset(raw));
526 			BUG();
527 		}
528 	}
529  upnout:
530 	up(&f->sem);
531 
532 	return ret;
533 }
534 
535 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
536 					  struct jffs2_inode_cache *ic,
537 					  struct jffs2_raw_node_ref *raw)
538 {
539 	union jffs2_node_union *node;
540 	size_t retlen;
541 	int ret;
542 	uint32_t phys_ofs, alloclen;
543 	uint32_t crc, rawlen;
544 	int retried = 0;
545 
546 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
547 
548 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
549 
550 	/* Ask for a small amount of space (or the totlen if smaller) because we
551 	   don't want to force wastage of the end of a block if splitting would
552 	   work. */
553 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
554 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
555 
556 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
557 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
558 
559 	if (ret)
560 		return ret;
561 
562 	if (alloclen < rawlen) {
563 		/* Doesn't fit untouched. We'll go the old route and split it */
564 		return -EBADFD;
565 	}
566 
567 	node = kmalloc(rawlen, GFP_KERNEL);
568 	if (!node)
569 		return -ENOMEM;
570 
571 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
572 	if (!ret && retlen != rawlen)
573 		ret = -EIO;
574 	if (ret)
575 		goto out_node;
576 
577 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
578 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
579 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
580 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
581 		goto bail;
582 	}
583 
584 	switch(je16_to_cpu(node->u.nodetype)) {
585 	case JFFS2_NODETYPE_INODE:
586 		crc = crc32(0, node, sizeof(node->i)-8);
587 		if (je32_to_cpu(node->i.node_crc) != crc) {
588 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
589 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
590 			goto bail;
591 		}
592 
593 		if (je32_to_cpu(node->i.dsize)) {
594 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
595 			if (je32_to_cpu(node->i.data_crc) != crc) {
596 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
597 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
598 				goto bail;
599 			}
600 		}
601 		break;
602 
603 	case JFFS2_NODETYPE_DIRENT:
604 		crc = crc32(0, node, sizeof(node->d)-8);
605 		if (je32_to_cpu(node->d.node_crc) != crc) {
606 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
607 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
608 			goto bail;
609 		}
610 
611 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
612 			printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
613 			goto bail;
614 		}
615 
616 		if (node->d.nsize) {
617 			crc = crc32(0, node->d.name, node->d.nsize);
618 			if (je32_to_cpu(node->d.name_crc) != crc) {
619 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
620 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
621 				goto bail;
622 			}
623 		}
624 		break;
625 	default:
626 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
627 		if (ic) {
628 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
629 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
630 			goto bail;
631 		}
632 	}
633 
634 	/* OK, all the CRCs are good; this node can just be copied as-is. */
635  retry:
636 	phys_ofs = write_ofs(c);
637 
638 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
639 
640 	if (ret || (retlen != rawlen)) {
641 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
642 		       rawlen, phys_ofs, ret, retlen);
643 		if (retlen) {
644 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
645 		} else {
646 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
647 		}
648 		if (!retried) {
649 			/* Try to reallocate space and retry */
650 			uint32_t dummy;
651 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
652 
653 			retried = 1;
654 
655 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
656 
657 			jffs2_dbg_acct_sanity_check(c,jeb);
658 			jffs2_dbg_acct_paranoia_check(c, jeb);
659 
660 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
661 						/* this is not the exact summary size of it,
662 							it is only an upper estimation */
663 
664 			if (!ret) {
665 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
666 
667 				jffs2_dbg_acct_sanity_check(c,jeb);
668 				jffs2_dbg_acct_paranoia_check(c, jeb);
669 
670 				goto retry;
671 			}
672 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
673 		}
674 
675 		if (!ret)
676 			ret = -EIO;
677 		goto out_node;
678 	}
679 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
680 
681 	jffs2_mark_node_obsolete(c, raw);
682 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
683 
684  out_node:
685 	kfree(node);
686 	return ret;
687  bail:
688 	ret = -EBADFD;
689 	goto out_node;
690 }
691 
692 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
693 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
694 {
695 	struct jffs2_full_dnode *new_fn;
696 	struct jffs2_raw_inode ri;
697 	struct jffs2_node_frag *last_frag;
698 	union jffs2_device_node dev;
699 	char *mdata = NULL, mdatalen = 0;
700 	uint32_t alloclen, ilen;
701 	int ret;
702 
703 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
704 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
705 		/* For these, we don't actually need to read the old node */
706 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
707 		mdata = (char *)&dev;
708 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
709 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
710 		mdatalen = fn->size;
711 		mdata = kmalloc(fn->size, GFP_KERNEL);
712 		if (!mdata) {
713 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
714 			return -ENOMEM;
715 		}
716 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
717 		if (ret) {
718 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
719 			kfree(mdata);
720 			return ret;
721 		}
722 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
723 
724 	}
725 
726 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
727 				JFFS2_SUMMARY_INODE_SIZE);
728 	if (ret) {
729 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
730 		       sizeof(ri)+ mdatalen, ret);
731 		goto out;
732 	}
733 
734 	last_frag = frag_last(&f->fragtree);
735 	if (last_frag)
736 		/* Fetch the inode length from the fragtree rather then
737 		 * from i_size since i_size may have not been updated yet */
738 		ilen = last_frag->ofs + last_frag->size;
739 	else
740 		ilen = JFFS2_F_I_SIZE(f);
741 
742 	memset(&ri, 0, sizeof(ri));
743 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
744 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
745 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
746 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
747 
748 	ri.ino = cpu_to_je32(f->inocache->ino);
749 	ri.version = cpu_to_je32(++f->highest_version);
750 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
751 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
752 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
753 	ri.isize = cpu_to_je32(ilen);
754 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
755 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
756 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
757 	ri.offset = cpu_to_je32(0);
758 	ri.csize = cpu_to_je32(mdatalen);
759 	ri.dsize = cpu_to_je32(mdatalen);
760 	ri.compr = JFFS2_COMPR_NONE;
761 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
762 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
763 
764 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
765 
766 	if (IS_ERR(new_fn)) {
767 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
768 		ret = PTR_ERR(new_fn);
769 		goto out;
770 	}
771 	jffs2_mark_node_obsolete(c, fn->raw);
772 	jffs2_free_full_dnode(fn);
773 	f->metadata = new_fn;
774  out:
775 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
776 		kfree(mdata);
777 	return ret;
778 }
779 
780 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
781 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
782 {
783 	struct jffs2_full_dirent *new_fd;
784 	struct jffs2_raw_dirent rd;
785 	uint32_t alloclen;
786 	int ret;
787 
788 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
789 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
790 	rd.nsize = strlen(fd->name);
791 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
792 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
793 
794 	rd.pino = cpu_to_je32(f->inocache->ino);
795 	rd.version = cpu_to_je32(++f->highest_version);
796 	rd.ino = cpu_to_je32(fd->ino);
797 	/* If the times on this inode were set by explicit utime() they can be different,
798 	   so refrain from splatting them. */
799 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
800 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
801 	else
802 		rd.mctime = cpu_to_je32(0);
803 	rd.type = fd->type;
804 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
805 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
806 
807 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
808 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
809 	if (ret) {
810 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
811 		       sizeof(rd)+rd.nsize, ret);
812 		return ret;
813 	}
814 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
815 
816 	if (IS_ERR(new_fd)) {
817 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
818 		return PTR_ERR(new_fd);
819 	}
820 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
821 	return 0;
822 }
823 
824 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
825 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
826 {
827 	struct jffs2_full_dirent **fdp = &f->dents;
828 	int found = 0;
829 
830 	/* On a medium where we can't actually mark nodes obsolete
831 	   pernamently, such as NAND flash, we need to work out
832 	   whether this deletion dirent is still needed to actively
833 	   delete a 'real' dirent with the same name that's still
834 	   somewhere else on the flash. */
835 	if (!jffs2_can_mark_obsolete(c)) {
836 		struct jffs2_raw_dirent *rd;
837 		struct jffs2_raw_node_ref *raw;
838 		int ret;
839 		size_t retlen;
840 		int name_len = strlen(fd->name);
841 		uint32_t name_crc = crc32(0, fd->name, name_len);
842 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
843 
844 		rd = kmalloc(rawlen, GFP_KERNEL);
845 		if (!rd)
846 			return -ENOMEM;
847 
848 		/* Prevent the erase code from nicking the obsolete node refs while
849 		   we're looking at them. I really don't like this extra lock but
850 		   can't see any alternative. Suggestions on a postcard to... */
851 		down(&c->erase_free_sem);
852 
853 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
854 
855 			cond_resched();
856 
857 			/* We only care about obsolete ones */
858 			if (!(ref_obsolete(raw)))
859 				continue;
860 
861 			/* Any dirent with the same name is going to have the same length... */
862 			if (ref_totlen(c, NULL, raw) != rawlen)
863 				continue;
864 
865 			/* Doesn't matter if there's one in the same erase block. We're going to
866 			   delete it too at the same time. */
867 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
868 				continue;
869 
870 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
871 
872 			/* This is an obsolete node belonging to the same directory, and it's of the right
873 			   length. We need to take a closer look...*/
874 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
875 			if (ret) {
876 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
877 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
878 				continue;
879 			}
880 			if (retlen != rawlen) {
881 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
882 				       retlen, rawlen, ref_offset(raw));
883 				continue;
884 			}
885 
886 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
887 				continue;
888 
889 			/* If the name CRC doesn't match, skip */
890 			if (je32_to_cpu(rd->name_crc) != name_crc)
891 				continue;
892 
893 			/* If the name length doesn't match, or it's another deletion dirent, skip */
894 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
895 				continue;
896 
897 			/* OK, check the actual name now */
898 			if (memcmp(rd->name, fd->name, name_len))
899 				continue;
900 
901 			/* OK. The name really does match. There really is still an older node on
902 			   the flash which our deletion dirent obsoletes. So we have to write out
903 			   a new deletion dirent to replace it */
904 			up(&c->erase_free_sem);
905 
906 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
907 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
908 			kfree(rd);
909 
910 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
911 		}
912 
913 		up(&c->erase_free_sem);
914 		kfree(rd);
915 	}
916 
917 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
918 	   we should update the metadata node with those times accordingly */
919 
920 	/* No need for it any more. Just mark it obsolete and remove it from the list */
921 	while (*fdp) {
922 		if ((*fdp) == fd) {
923 			found = 1;
924 			*fdp = fd->next;
925 			break;
926 		}
927 		fdp = &(*fdp)->next;
928 	}
929 	if (!found) {
930 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
931 	}
932 	jffs2_mark_node_obsolete(c, fd->raw);
933 	jffs2_free_full_dirent(fd);
934 	return 0;
935 }
936 
937 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
938 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
939 				      uint32_t start, uint32_t end)
940 {
941 	struct jffs2_raw_inode ri;
942 	struct jffs2_node_frag *frag;
943 	struct jffs2_full_dnode *new_fn;
944 	uint32_t alloclen, ilen;
945 	int ret;
946 
947 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
948 		  f->inocache->ino, start, end));
949 
950 	memset(&ri, 0, sizeof(ri));
951 
952 	if(fn->frags > 1) {
953 		size_t readlen;
954 		uint32_t crc;
955 		/* It's partially obsoleted by a later write. So we have to
956 		   write it out again with the _same_ version as before */
957 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
958 		if (readlen != sizeof(ri) || ret) {
959 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
960 			goto fill;
961 		}
962 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
963 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
964 			       ref_offset(fn->raw),
965 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
966 			return -EIO;
967 		}
968 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
969 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
970 			       ref_offset(fn->raw),
971 			       je32_to_cpu(ri.totlen), sizeof(ri));
972 			return -EIO;
973 		}
974 		crc = crc32(0, &ri, sizeof(ri)-8);
975 		if (crc != je32_to_cpu(ri.node_crc)) {
976 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
977 			       ref_offset(fn->raw),
978 			       je32_to_cpu(ri.node_crc), crc);
979 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
980 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
981 			       start, end, f->inocache->ino);
982 			goto fill;
983 		}
984 		if (ri.compr != JFFS2_COMPR_ZERO) {
985 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
986 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
987 			       start, end, f->inocache->ino);
988 			goto fill;
989 		}
990 	} else {
991 	fill:
992 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
993 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
994 		ri.totlen = cpu_to_je32(sizeof(ri));
995 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
996 
997 		ri.ino = cpu_to_je32(f->inocache->ino);
998 		ri.version = cpu_to_je32(++f->highest_version);
999 		ri.offset = cpu_to_je32(start);
1000 		ri.dsize = cpu_to_je32(end - start);
1001 		ri.csize = cpu_to_je32(0);
1002 		ri.compr = JFFS2_COMPR_ZERO;
1003 	}
1004 
1005 	frag = frag_last(&f->fragtree);
1006 	if (frag)
1007 		/* Fetch the inode length from the fragtree rather then
1008 		 * from i_size since i_size may have not been updated yet */
1009 		ilen = frag->ofs + frag->size;
1010 	else
1011 		ilen = JFFS2_F_I_SIZE(f);
1012 
1013 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1014 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1015 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1016 	ri.isize = cpu_to_je32(ilen);
1017 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1018 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1019 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1020 	ri.data_crc = cpu_to_je32(0);
1021 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1022 
1023 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1024 				     JFFS2_SUMMARY_INODE_SIZE);
1025 	if (ret) {
1026 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1027 		       sizeof(ri), ret);
1028 		return ret;
1029 	}
1030 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1031 
1032 	if (IS_ERR(new_fn)) {
1033 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1034 		return PTR_ERR(new_fn);
1035 	}
1036 	if (je32_to_cpu(ri.version) == f->highest_version) {
1037 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1038 		if (f->metadata) {
1039 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1040 			jffs2_free_full_dnode(f->metadata);
1041 			f->metadata = NULL;
1042 		}
1043 		return 0;
1044 	}
1045 
1046 	/*
1047 	 * We should only get here in the case where the node we are
1048 	 * replacing had more than one frag, so we kept the same version
1049 	 * number as before. (Except in case of error -- see 'goto fill;'
1050 	 * above.)
1051 	 */
1052 	D1(if(unlikely(fn->frags <= 1)) {
1053 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1054 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1055 		       je32_to_cpu(ri.ino));
1056 	});
1057 
1058 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1059 	mark_ref_normal(new_fn->raw);
1060 
1061 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1062 	     frag; frag = frag_next(frag)) {
1063 		if (frag->ofs > fn->size + fn->ofs)
1064 			break;
1065 		if (frag->node == fn) {
1066 			frag->node = new_fn;
1067 			new_fn->frags++;
1068 			fn->frags--;
1069 		}
1070 	}
1071 	if (fn->frags) {
1072 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1073 		BUG();
1074 	}
1075 	if (!new_fn->frags) {
1076 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1077 		BUG();
1078 	}
1079 
1080 	jffs2_mark_node_obsolete(c, fn->raw);
1081 	jffs2_free_full_dnode(fn);
1082 
1083 	return 0;
1084 }
1085 
1086 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1087 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1088 				       uint32_t start, uint32_t end)
1089 {
1090 	struct jffs2_full_dnode *new_fn;
1091 	struct jffs2_raw_inode ri;
1092 	uint32_t alloclen, offset, orig_end, orig_start;
1093 	int ret = 0;
1094 	unsigned char *comprbuf = NULL, *writebuf;
1095 	unsigned long pg;
1096 	unsigned char *pg_ptr;
1097 
1098 	memset(&ri, 0, sizeof(ri));
1099 
1100 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1101 		  f->inocache->ino, start, end));
1102 
1103 	orig_end = end;
1104 	orig_start = start;
1105 
1106 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1107 		/* Attempt to do some merging. But only expand to cover logically
1108 		   adjacent frags if the block containing them is already considered
1109 		   to be dirty. Otherwise we end up with GC just going round in
1110 		   circles dirtying the nodes it already wrote out, especially
1111 		   on NAND where we have small eraseblocks and hence a much higher
1112 		   chance of nodes having to be split to cross boundaries. */
1113 
1114 		struct jffs2_node_frag *frag;
1115 		uint32_t min, max;
1116 
1117 		min = start & ~(PAGE_CACHE_SIZE-1);
1118 		max = min + PAGE_CACHE_SIZE;
1119 
1120 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1121 
1122 		/* BUG_ON(!frag) but that'll happen anyway... */
1123 
1124 		BUG_ON(frag->ofs != start);
1125 
1126 		/* First grow down... */
1127 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1128 
1129 			/* If the previous frag doesn't even reach the beginning, there's
1130 			   excessive fragmentation. Just merge. */
1131 			if (frag->ofs > min) {
1132 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1133 					  frag->ofs, frag->ofs+frag->size));
1134 				start = frag->ofs;
1135 				continue;
1136 			}
1137 			/* OK. This frag holds the first byte of the page. */
1138 			if (!frag->node || !frag->node->raw) {
1139 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1140 					  frag->ofs, frag->ofs+frag->size));
1141 				break;
1142 			} else {
1143 
1144 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1145 				   in a block which is still considered clean? If so, don't obsolete it.
1146 				   If not, cover it anyway. */
1147 
1148 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1149 				struct jffs2_eraseblock *jeb;
1150 
1151 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1152 
1153 				if (jeb == c->gcblock) {
1154 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1155 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1156 					start = frag->ofs;
1157 					break;
1158 				}
1159 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1160 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1161 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1162 					break;
1163 				}
1164 
1165 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1166 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1167 				start = frag->ofs;
1168 				break;
1169 			}
1170 		}
1171 
1172 		/* ... then up */
1173 
1174 		/* Find last frag which is actually part of the node we're to GC. */
1175 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1176 
1177 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1178 
1179 			/* If the previous frag doesn't even reach the beginning, there's lots
1180 			   of fragmentation. Just merge. */
1181 			if (frag->ofs+frag->size < max) {
1182 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1183 					  frag->ofs, frag->ofs+frag->size));
1184 				end = frag->ofs + frag->size;
1185 				continue;
1186 			}
1187 
1188 			if (!frag->node || !frag->node->raw) {
1189 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1190 					  frag->ofs, frag->ofs+frag->size));
1191 				break;
1192 			} else {
1193 
1194 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1195 				   in a block which is still considered clean? If so, don't obsolete it.
1196 				   If not, cover it anyway. */
1197 
1198 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1199 				struct jffs2_eraseblock *jeb;
1200 
1201 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1202 
1203 				if (jeb == c->gcblock) {
1204 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1205 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1206 					end = frag->ofs + frag->size;
1207 					break;
1208 				}
1209 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1210 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1211 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1212 					break;
1213 				}
1214 
1215 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1216 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1217 				end = frag->ofs + frag->size;
1218 				break;
1219 			}
1220 		}
1221 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1222 			  orig_start, orig_end, start, end));
1223 
1224 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1225 		BUG_ON(end < orig_end);
1226 		BUG_ON(start > orig_start);
1227 	}
1228 
1229 	/* First, use readpage() to read the appropriate page into the page cache */
1230 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1231 	 *    triggered garbage collection in the first place?
1232 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1233 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1234 	 *    suboptimal, but at least we're correct.
1235 	 */
1236 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1237 
1238 	if (IS_ERR(pg_ptr)) {
1239 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1240 		return PTR_ERR(pg_ptr);
1241 	}
1242 
1243 	offset = start;
1244 	while(offset < orig_end) {
1245 		uint32_t datalen;
1246 		uint32_t cdatalen;
1247 		uint16_t comprtype = JFFS2_COMPR_NONE;
1248 
1249 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1250 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1251 
1252 		if (ret) {
1253 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1254 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1255 			break;
1256 		}
1257 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1258 		datalen = end - offset;
1259 
1260 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1261 
1262 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1263 
1264 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1265 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1266 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1267 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1268 
1269 		ri.ino = cpu_to_je32(f->inocache->ino);
1270 		ri.version = cpu_to_je32(++f->highest_version);
1271 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1272 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1273 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1274 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1275 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1276 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1277 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1278 		ri.offset = cpu_to_je32(offset);
1279 		ri.csize = cpu_to_je32(cdatalen);
1280 		ri.dsize = cpu_to_je32(datalen);
1281 		ri.compr = comprtype & 0xff;
1282 		ri.usercompr = (comprtype >> 8) & 0xff;
1283 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1284 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1285 
1286 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1287 
1288 		jffs2_free_comprbuf(comprbuf, writebuf);
1289 
1290 		if (IS_ERR(new_fn)) {
1291 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1292 			ret = PTR_ERR(new_fn);
1293 			break;
1294 		}
1295 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1296 		offset += datalen;
1297 		if (f->metadata) {
1298 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1299 			jffs2_free_full_dnode(f->metadata);
1300 			f->metadata = NULL;
1301 		}
1302 	}
1303 
1304 	jffs2_gc_release_page(c, pg_ptr, &pg);
1305 	return ret;
1306 }
1307