xref: /openbmc/linux/fs/jffs2/gc.c (revision 6088c058)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/mtd/mtd.h>
15 #include <linux/slab.h>
16 #include <linux/pagemap.h>
17 #include <linux/crc32.h>
18 #include <linux/compiler.h>
19 #include <linux/stat.h>
20 #include "nodelist.h"
21 #include "compr.h"
22 
23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
24 					  struct jffs2_inode_cache *ic,
25 					  struct jffs2_raw_node_ref *raw);
26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
27 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
34 				      uint32_t start, uint32_t end);
35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
36 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
37 				       uint32_t start, uint32_t end);
38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
39 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
40 
41 /* Called with erase_completion_lock held */
42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
43 {
44 	struct jffs2_eraseblock *ret;
45 	struct list_head *nextlist = NULL;
46 	int n = jiffies % 128;
47 
48 	/* Pick an eraseblock to garbage collect next. This is where we'll
49 	   put the clever wear-levelling algorithms. Eventually.  */
50 	/* We possibly want to favour the dirtier blocks more when the
51 	   number of free blocks is low. */
52 again:
53 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
55 		nextlist = &c->bad_used_list;
56 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
57 		/* Note that most of them will have gone directly to be erased.
58 		   So don't favour the erasable_list _too_ much. */
59 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
60 		nextlist = &c->erasable_list;
61 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
62 		/* Most of the time, pick one off the very_dirty list */
63 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
64 		nextlist = &c->very_dirty_list;
65 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
66 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
67 		nextlist = &c->dirty_list;
68 	} else if (!list_empty(&c->clean_list)) {
69 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
70 		nextlist = &c->clean_list;
71 	} else if (!list_empty(&c->dirty_list)) {
72 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
73 
74 		nextlist = &c->dirty_list;
75 	} else if (!list_empty(&c->very_dirty_list)) {
76 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
77 		nextlist = &c->very_dirty_list;
78 	} else if (!list_empty(&c->erasable_list)) {
79 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
80 
81 		nextlist = &c->erasable_list;
82 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
83 		/* There are blocks are wating for the wbuf sync */
84 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
85 		spin_unlock(&c->erase_completion_lock);
86 		jffs2_flush_wbuf_pad(c);
87 		spin_lock(&c->erase_completion_lock);
88 		goto again;
89 	} else {
90 		/* Eep. All were empty */
91 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
92 		return NULL;
93 	}
94 
95 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
96 	list_del(&ret->list);
97 	c->gcblock = ret;
98 	ret->gc_node = ret->first_node;
99 	if (!ret->gc_node) {
100 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
101 		BUG();
102 	}
103 
104 	/* Have we accidentally picked a clean block with wasted space ? */
105 	if (ret->wasted_size) {
106 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
107 		ret->dirty_size += ret->wasted_size;
108 		c->wasted_size -= ret->wasted_size;
109 		c->dirty_size += ret->wasted_size;
110 		ret->wasted_size = 0;
111 	}
112 
113 	return ret;
114 }
115 
116 /* jffs2_garbage_collect_pass
117  * Make a single attempt to progress GC. Move one node, and possibly
118  * start erasing one eraseblock.
119  */
120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
121 {
122 	struct jffs2_inode_info *f;
123 	struct jffs2_inode_cache *ic;
124 	struct jffs2_eraseblock *jeb;
125 	struct jffs2_raw_node_ref *raw;
126 	uint32_t gcblock_dirty;
127 	int ret = 0, inum, nlink;
128 	int xattr = 0;
129 
130 	if (mutex_lock_interruptible(&c->alloc_sem))
131 		return -EINTR;
132 
133 	for (;;) {
134 		spin_lock(&c->erase_completion_lock);
135 		if (!c->unchecked_size)
136 			break;
137 
138 		/* We can't start doing GC yet. We haven't finished checking
139 		   the node CRCs etc. Do it now. */
140 
141 		/* checked_ino is protected by the alloc_sem */
142 		if (c->checked_ino > c->highest_ino && xattr) {
143 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 			       c->unchecked_size);
145 			jffs2_dbg_dump_block_lists_nolock(c);
146 			spin_unlock(&c->erase_completion_lock);
147 			mutex_unlock(&c->alloc_sem);
148 			return -ENOSPC;
149 		}
150 
151 		spin_unlock(&c->erase_completion_lock);
152 
153 		if (!xattr)
154 			xattr = jffs2_verify_xattr(c);
155 
156 		spin_lock(&c->inocache_lock);
157 
158 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
159 
160 		if (!ic) {
161 			spin_unlock(&c->inocache_lock);
162 			continue;
163 		}
164 
165 		if (!ic->pino_nlink) {
166 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
167 				  ic->ino));
168 			spin_unlock(&c->inocache_lock);
169 			jffs2_xattr_delete_inode(c, ic);
170 			continue;
171 		}
172 		switch(ic->state) {
173 		case INO_STATE_CHECKEDABSENT:
174 		case INO_STATE_PRESENT:
175 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
176 			spin_unlock(&c->inocache_lock);
177 			continue;
178 
179 		case INO_STATE_GC:
180 		case INO_STATE_CHECKING:
181 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
182 			spin_unlock(&c->inocache_lock);
183 			BUG();
184 
185 		case INO_STATE_READING:
186 			/* We need to wait for it to finish, lest we move on
187 			   and trigger the BUG() above while we haven't yet
188 			   finished checking all its nodes */
189 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
190 			/* We need to come back again for the _same_ inode. We've
191 			 made no progress in this case, but that should be OK */
192 			c->checked_ino--;
193 
194 			mutex_unlock(&c->alloc_sem);
195 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
196 			return 0;
197 
198 		default:
199 			BUG();
200 
201 		case INO_STATE_UNCHECKED:
202 			;
203 		}
204 		ic->state = INO_STATE_CHECKING;
205 		spin_unlock(&c->inocache_lock);
206 
207 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
208 
209 		ret = jffs2_do_crccheck_inode(c, ic);
210 		if (ret)
211 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
212 
213 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
214 		mutex_unlock(&c->alloc_sem);
215 		return ret;
216 	}
217 
218 	/* If there are any blocks which need erasing, erase them now */
219 	if (!list_empty(&c->erase_complete_list) ||
220 	    !list_empty(&c->erase_pending_list)) {
221 		spin_unlock(&c->erase_completion_lock);
222 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
223 		if (jffs2_erase_pending_blocks(c, 1)) {
224 			mutex_unlock(&c->alloc_sem);
225 			return 0;
226 		}
227 		D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228 		spin_lock(&c->erase_completion_lock);
229 	}
230 
231 	/* First, work out which block we're garbage-collecting */
232 	jeb = c->gcblock;
233 
234 	if (!jeb)
235 		jeb = jffs2_find_gc_block(c);
236 
237 	if (!jeb) {
238 		/* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
239 		if (c->nr_erasing_blocks) {
240 			spin_unlock(&c->erase_completion_lock);
241 			mutex_unlock(&c->alloc_sem);
242 			return -EAGAIN;
243 		}
244 		D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
245 		spin_unlock(&c->erase_completion_lock);
246 		mutex_unlock(&c->alloc_sem);
247 		return -EIO;
248 	}
249 
250 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
251 	D1(if (c->nextblock)
252 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
253 
254 	if (!jeb->used_size) {
255 		mutex_unlock(&c->alloc_sem);
256 		goto eraseit;
257 	}
258 
259 	raw = jeb->gc_node;
260 	gcblock_dirty = jeb->dirty_size;
261 
262 	while(ref_obsolete(raw)) {
263 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
264 		raw = ref_next(raw);
265 		if (unlikely(!raw)) {
266 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
267 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
268 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
269 			jeb->gc_node = raw;
270 			spin_unlock(&c->erase_completion_lock);
271 			mutex_unlock(&c->alloc_sem);
272 			BUG();
273 		}
274 	}
275 	jeb->gc_node = raw;
276 
277 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
278 
279 	if (!raw->next_in_ino) {
280 		/* Inode-less node. Clean marker, snapshot or something like that */
281 		spin_unlock(&c->erase_completion_lock);
282 		if (ref_flags(raw) == REF_PRISTINE) {
283 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
284 			jffs2_garbage_collect_pristine(c, NULL, raw);
285 		} else {
286 			/* Just mark it obsolete */
287 			jffs2_mark_node_obsolete(c, raw);
288 		}
289 		mutex_unlock(&c->alloc_sem);
290 		goto eraseit_lock;
291 	}
292 
293 	ic = jffs2_raw_ref_to_ic(raw);
294 
295 #ifdef CONFIG_JFFS2_FS_XATTR
296 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
297 	 * We can decide whether this node is inode or xattr by ic->class.     */
298 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
299 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
300 		spin_unlock(&c->erase_completion_lock);
301 
302 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
303 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
304 		} else {
305 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
306 		}
307 		goto test_gcnode;
308 	}
309 #endif
310 
311 	/* We need to hold the inocache. Either the erase_completion_lock or
312 	   the inocache_lock are sufficient; we trade down since the inocache_lock
313 	   causes less contention. */
314 	spin_lock(&c->inocache_lock);
315 
316 	spin_unlock(&c->erase_completion_lock);
317 
318 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
319 
320 	/* Three possibilities:
321 	   1. Inode is already in-core. We must iget it and do proper
322 	      updating to its fragtree, etc.
323 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
324 	      inocache to prevent a read_inode(), copy the node intact.
325 	   3. Inode is not in-core, node is not pristine. We must iget()
326 	      and take the slow path.
327 	*/
328 
329 	switch(ic->state) {
330 	case INO_STATE_CHECKEDABSENT:
331 		/* It's been checked, but it's not currently in-core.
332 		   We can just copy any pristine nodes, but have
333 		   to prevent anyone else from doing read_inode() while
334 		   we're at it, so we set the state accordingly */
335 		if (ref_flags(raw) == REF_PRISTINE)
336 			ic->state = INO_STATE_GC;
337 		else {
338 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
339 				  ic->ino));
340 		}
341 		break;
342 
343 	case INO_STATE_PRESENT:
344 		/* It's in-core. GC must iget() it. */
345 		break;
346 
347 	case INO_STATE_UNCHECKED:
348 	case INO_STATE_CHECKING:
349 	case INO_STATE_GC:
350 		/* Should never happen. We should have finished checking
351 		   by the time we actually start doing any GC, and since
352 		   we're holding the alloc_sem, no other garbage collection
353 		   can happen.
354 		*/
355 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
356 		       ic->ino, ic->state);
357 		mutex_unlock(&c->alloc_sem);
358 		spin_unlock(&c->inocache_lock);
359 		BUG();
360 
361 	case INO_STATE_READING:
362 		/* Someone's currently trying to read it. We must wait for
363 		   them to finish and then go through the full iget() route
364 		   to do the GC. However, sometimes read_inode() needs to get
365 		   the alloc_sem() (for marking nodes invalid) so we must
366 		   drop the alloc_sem before sleeping. */
367 
368 		mutex_unlock(&c->alloc_sem);
369 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
370 			  ic->ino, ic->state));
371 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
372 		/* And because we dropped the alloc_sem we must start again from the
373 		   beginning. Ponder chance of livelock here -- we're returning success
374 		   without actually making any progress.
375 
376 		   Q: What are the chances that the inode is back in INO_STATE_READING
377 		   again by the time we next enter this function? And that this happens
378 		   enough times to cause a real delay?
379 
380 		   A: Small enough that I don't care :)
381 		*/
382 		return 0;
383 	}
384 
385 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
386 	   node intact, and we don't have to muck about with the fragtree etc.
387 	   because we know it's not in-core. If it _was_ in-core, we go through
388 	   all the iget() crap anyway */
389 
390 	if (ic->state == INO_STATE_GC) {
391 		spin_unlock(&c->inocache_lock);
392 
393 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
394 
395 		spin_lock(&c->inocache_lock);
396 		ic->state = INO_STATE_CHECKEDABSENT;
397 		wake_up(&c->inocache_wq);
398 
399 		if (ret != -EBADFD) {
400 			spin_unlock(&c->inocache_lock);
401 			goto test_gcnode;
402 		}
403 
404 		/* Fall through if it wanted us to, with inocache_lock held */
405 	}
406 
407 	/* Prevent the fairly unlikely race where the gcblock is
408 	   entirely obsoleted by the final close of a file which had
409 	   the only valid nodes in the block, followed by erasure,
410 	   followed by freeing of the ic because the erased block(s)
411 	   held _all_ the nodes of that inode.... never been seen but
412 	   it's vaguely possible. */
413 
414 	inum = ic->ino;
415 	nlink = ic->pino_nlink;
416 	spin_unlock(&c->inocache_lock);
417 
418 	f = jffs2_gc_fetch_inode(c, inum, !nlink);
419 	if (IS_ERR(f)) {
420 		ret = PTR_ERR(f);
421 		goto release_sem;
422 	}
423 	if (!f) {
424 		ret = 0;
425 		goto release_sem;
426 	}
427 
428 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
429 
430 	jffs2_gc_release_inode(c, f);
431 
432  test_gcnode:
433 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
434 		/* Eep. This really should never happen. GC is broken */
435 		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
436 		ret = -ENOSPC;
437 	}
438  release_sem:
439 	mutex_unlock(&c->alloc_sem);
440 
441  eraseit_lock:
442 	/* If we've finished this block, start it erasing */
443 	spin_lock(&c->erase_completion_lock);
444 
445  eraseit:
446 	if (c->gcblock && !c->gcblock->used_size) {
447 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
448 		/* We're GC'ing an empty block? */
449 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
450 		c->gcblock = NULL;
451 		c->nr_erasing_blocks++;
452 		jffs2_garbage_collect_trigger(c);
453 	}
454 	spin_unlock(&c->erase_completion_lock);
455 
456 	return ret;
457 }
458 
459 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
460 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
461 {
462 	struct jffs2_node_frag *frag;
463 	struct jffs2_full_dnode *fn = NULL;
464 	struct jffs2_full_dirent *fd;
465 	uint32_t start = 0, end = 0, nrfrags = 0;
466 	int ret = 0;
467 
468 	mutex_lock(&f->sem);
469 
470 	/* Now we have the lock for this inode. Check that it's still the one at the head
471 	   of the list. */
472 
473 	spin_lock(&c->erase_completion_lock);
474 
475 	if (c->gcblock != jeb) {
476 		spin_unlock(&c->erase_completion_lock);
477 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
478 		goto upnout;
479 	}
480 	if (ref_obsolete(raw)) {
481 		spin_unlock(&c->erase_completion_lock);
482 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
483 		/* They'll call again */
484 		goto upnout;
485 	}
486 	spin_unlock(&c->erase_completion_lock);
487 
488 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
489 	if (f->metadata && f->metadata->raw == raw) {
490 		fn = f->metadata;
491 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
492 		goto upnout;
493 	}
494 
495 	/* FIXME. Read node and do lookup? */
496 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
497 		if (frag->node && frag->node->raw == raw) {
498 			fn = frag->node;
499 			end = frag->ofs + frag->size;
500 			if (!nrfrags++)
501 				start = frag->ofs;
502 			if (nrfrags == frag->node->frags)
503 				break; /* We've found them all */
504 		}
505 	}
506 	if (fn) {
507 		if (ref_flags(raw) == REF_PRISTINE) {
508 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
509 			if (!ret) {
510 				/* Urgh. Return it sensibly. */
511 				frag->node->raw = f->inocache->nodes;
512 			}
513 			if (ret != -EBADFD)
514 				goto upnout;
515 		}
516 		/* We found a datanode. Do the GC */
517 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
518 			/* It crosses a page boundary. Therefore, it must be a hole. */
519 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
520 		} else {
521 			/* It could still be a hole. But we GC the page this way anyway */
522 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
523 		}
524 		goto upnout;
525 	}
526 
527 	/* Wasn't a dnode. Try dirent */
528 	for (fd = f->dents; fd; fd=fd->next) {
529 		if (fd->raw == raw)
530 			break;
531 	}
532 
533 	if (fd && fd->ino) {
534 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
535 	} else if (fd) {
536 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
537 	} else {
538 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
539 		       ref_offset(raw), f->inocache->ino);
540 		if (ref_obsolete(raw)) {
541 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
542 		} else {
543 			jffs2_dbg_dump_node(c, ref_offset(raw));
544 			BUG();
545 		}
546 	}
547  upnout:
548 	mutex_unlock(&f->sem);
549 
550 	return ret;
551 }
552 
553 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
554 					  struct jffs2_inode_cache *ic,
555 					  struct jffs2_raw_node_ref *raw)
556 {
557 	union jffs2_node_union *node;
558 	size_t retlen;
559 	int ret;
560 	uint32_t phys_ofs, alloclen;
561 	uint32_t crc, rawlen;
562 	int retried = 0;
563 
564 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
565 
566 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
567 
568 	/* Ask for a small amount of space (or the totlen if smaller) because we
569 	   don't want to force wastage of the end of a block if splitting would
570 	   work. */
571 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
572 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
573 
574 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
575 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
576 
577 	if (ret)
578 		return ret;
579 
580 	if (alloclen < rawlen) {
581 		/* Doesn't fit untouched. We'll go the old route and split it */
582 		return -EBADFD;
583 	}
584 
585 	node = kmalloc(rawlen, GFP_KERNEL);
586 	if (!node)
587 		return -ENOMEM;
588 
589 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
590 	if (!ret && retlen != rawlen)
591 		ret = -EIO;
592 	if (ret)
593 		goto out_node;
594 
595 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
596 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
597 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
598 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
599 		goto bail;
600 	}
601 
602 	switch(je16_to_cpu(node->u.nodetype)) {
603 	case JFFS2_NODETYPE_INODE:
604 		crc = crc32(0, node, sizeof(node->i)-8);
605 		if (je32_to_cpu(node->i.node_crc) != crc) {
606 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
607 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
608 			goto bail;
609 		}
610 
611 		if (je32_to_cpu(node->i.dsize)) {
612 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
613 			if (je32_to_cpu(node->i.data_crc) != crc) {
614 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
615 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
616 				goto bail;
617 			}
618 		}
619 		break;
620 
621 	case JFFS2_NODETYPE_DIRENT:
622 		crc = crc32(0, node, sizeof(node->d)-8);
623 		if (je32_to_cpu(node->d.node_crc) != crc) {
624 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
625 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
626 			goto bail;
627 		}
628 
629 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
630 			printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
631 			goto bail;
632 		}
633 
634 		if (node->d.nsize) {
635 			crc = crc32(0, node->d.name, node->d.nsize);
636 			if (je32_to_cpu(node->d.name_crc) != crc) {
637 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
638 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
639 				goto bail;
640 			}
641 		}
642 		break;
643 	default:
644 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
645 		if (ic) {
646 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
647 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
648 			goto bail;
649 		}
650 	}
651 
652 	/* OK, all the CRCs are good; this node can just be copied as-is. */
653  retry:
654 	phys_ofs = write_ofs(c);
655 
656 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
657 
658 	if (ret || (retlen != rawlen)) {
659 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
660 		       rawlen, phys_ofs, ret, retlen);
661 		if (retlen) {
662 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
663 		} else {
664 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
665 		}
666 		if (!retried) {
667 			/* Try to reallocate space and retry */
668 			uint32_t dummy;
669 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
670 
671 			retried = 1;
672 
673 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
674 
675 			jffs2_dbg_acct_sanity_check(c,jeb);
676 			jffs2_dbg_acct_paranoia_check(c, jeb);
677 
678 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
679 						/* this is not the exact summary size of it,
680 							it is only an upper estimation */
681 
682 			if (!ret) {
683 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
684 
685 				jffs2_dbg_acct_sanity_check(c,jeb);
686 				jffs2_dbg_acct_paranoia_check(c, jeb);
687 
688 				goto retry;
689 			}
690 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
691 		}
692 
693 		if (!ret)
694 			ret = -EIO;
695 		goto out_node;
696 	}
697 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
698 
699 	jffs2_mark_node_obsolete(c, raw);
700 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
701 
702  out_node:
703 	kfree(node);
704 	return ret;
705  bail:
706 	ret = -EBADFD;
707 	goto out_node;
708 }
709 
710 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
711 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
712 {
713 	struct jffs2_full_dnode *new_fn;
714 	struct jffs2_raw_inode ri;
715 	struct jffs2_node_frag *last_frag;
716 	union jffs2_device_node dev;
717 	char *mdata = NULL;
718 	int mdatalen = 0;
719 	uint32_t alloclen, ilen;
720 	int ret;
721 
722 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
723 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
724 		/* For these, we don't actually need to read the old node */
725 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
726 		mdata = (char *)&dev;
727 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
728 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
729 		mdatalen = fn->size;
730 		mdata = kmalloc(fn->size, GFP_KERNEL);
731 		if (!mdata) {
732 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
733 			return -ENOMEM;
734 		}
735 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
736 		if (ret) {
737 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
738 			kfree(mdata);
739 			return ret;
740 		}
741 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
742 
743 	}
744 
745 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
746 				JFFS2_SUMMARY_INODE_SIZE);
747 	if (ret) {
748 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
749 		       sizeof(ri)+ mdatalen, ret);
750 		goto out;
751 	}
752 
753 	last_frag = frag_last(&f->fragtree);
754 	if (last_frag)
755 		/* Fetch the inode length from the fragtree rather then
756 		 * from i_size since i_size may have not been updated yet */
757 		ilen = last_frag->ofs + last_frag->size;
758 	else
759 		ilen = JFFS2_F_I_SIZE(f);
760 
761 	memset(&ri, 0, sizeof(ri));
762 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
763 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
764 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
765 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
766 
767 	ri.ino = cpu_to_je32(f->inocache->ino);
768 	ri.version = cpu_to_je32(++f->highest_version);
769 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
770 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
771 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
772 	ri.isize = cpu_to_je32(ilen);
773 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
774 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
775 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
776 	ri.offset = cpu_to_je32(0);
777 	ri.csize = cpu_to_je32(mdatalen);
778 	ri.dsize = cpu_to_je32(mdatalen);
779 	ri.compr = JFFS2_COMPR_NONE;
780 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
781 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
782 
783 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
784 
785 	if (IS_ERR(new_fn)) {
786 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
787 		ret = PTR_ERR(new_fn);
788 		goto out;
789 	}
790 	jffs2_mark_node_obsolete(c, fn->raw);
791 	jffs2_free_full_dnode(fn);
792 	f->metadata = new_fn;
793  out:
794 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
795 		kfree(mdata);
796 	return ret;
797 }
798 
799 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
800 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
801 {
802 	struct jffs2_full_dirent *new_fd;
803 	struct jffs2_raw_dirent rd;
804 	uint32_t alloclen;
805 	int ret;
806 
807 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
808 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
809 	rd.nsize = strlen(fd->name);
810 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
811 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
812 
813 	rd.pino = cpu_to_je32(f->inocache->ino);
814 	rd.version = cpu_to_je32(++f->highest_version);
815 	rd.ino = cpu_to_je32(fd->ino);
816 	/* If the times on this inode were set by explicit utime() they can be different,
817 	   so refrain from splatting them. */
818 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
819 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
820 	else
821 		rd.mctime = cpu_to_je32(0);
822 	rd.type = fd->type;
823 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
824 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
825 
826 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
827 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
828 	if (ret) {
829 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
830 		       sizeof(rd)+rd.nsize, ret);
831 		return ret;
832 	}
833 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
834 
835 	if (IS_ERR(new_fd)) {
836 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
837 		return PTR_ERR(new_fd);
838 	}
839 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
840 	return 0;
841 }
842 
843 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
844 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
845 {
846 	struct jffs2_full_dirent **fdp = &f->dents;
847 	int found = 0;
848 
849 	/* On a medium where we can't actually mark nodes obsolete
850 	   pernamently, such as NAND flash, we need to work out
851 	   whether this deletion dirent is still needed to actively
852 	   delete a 'real' dirent with the same name that's still
853 	   somewhere else on the flash. */
854 	if (!jffs2_can_mark_obsolete(c)) {
855 		struct jffs2_raw_dirent *rd;
856 		struct jffs2_raw_node_ref *raw;
857 		int ret;
858 		size_t retlen;
859 		int name_len = strlen(fd->name);
860 		uint32_t name_crc = crc32(0, fd->name, name_len);
861 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
862 
863 		rd = kmalloc(rawlen, GFP_KERNEL);
864 		if (!rd)
865 			return -ENOMEM;
866 
867 		/* Prevent the erase code from nicking the obsolete node refs while
868 		   we're looking at them. I really don't like this extra lock but
869 		   can't see any alternative. Suggestions on a postcard to... */
870 		mutex_lock(&c->erase_free_sem);
871 
872 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
873 
874 			cond_resched();
875 
876 			/* We only care about obsolete ones */
877 			if (!(ref_obsolete(raw)))
878 				continue;
879 
880 			/* Any dirent with the same name is going to have the same length... */
881 			if (ref_totlen(c, NULL, raw) != rawlen)
882 				continue;
883 
884 			/* Doesn't matter if there's one in the same erase block. We're going to
885 			   delete it too at the same time. */
886 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
887 				continue;
888 
889 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
890 
891 			/* This is an obsolete node belonging to the same directory, and it's of the right
892 			   length. We need to take a closer look...*/
893 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
894 			if (ret) {
895 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
896 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
897 				continue;
898 			}
899 			if (retlen != rawlen) {
900 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
901 				       retlen, rawlen, ref_offset(raw));
902 				continue;
903 			}
904 
905 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
906 				continue;
907 
908 			/* If the name CRC doesn't match, skip */
909 			if (je32_to_cpu(rd->name_crc) != name_crc)
910 				continue;
911 
912 			/* If the name length doesn't match, or it's another deletion dirent, skip */
913 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
914 				continue;
915 
916 			/* OK, check the actual name now */
917 			if (memcmp(rd->name, fd->name, name_len))
918 				continue;
919 
920 			/* OK. The name really does match. There really is still an older node on
921 			   the flash which our deletion dirent obsoletes. So we have to write out
922 			   a new deletion dirent to replace it */
923 			mutex_unlock(&c->erase_free_sem);
924 
925 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
926 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
927 			kfree(rd);
928 
929 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
930 		}
931 
932 		mutex_unlock(&c->erase_free_sem);
933 		kfree(rd);
934 	}
935 
936 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
937 	   we should update the metadata node with those times accordingly */
938 
939 	/* No need for it any more. Just mark it obsolete and remove it from the list */
940 	while (*fdp) {
941 		if ((*fdp) == fd) {
942 			found = 1;
943 			*fdp = fd->next;
944 			break;
945 		}
946 		fdp = &(*fdp)->next;
947 	}
948 	if (!found) {
949 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
950 	}
951 	jffs2_mark_node_obsolete(c, fd->raw);
952 	jffs2_free_full_dirent(fd);
953 	return 0;
954 }
955 
956 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
957 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
958 				      uint32_t start, uint32_t end)
959 {
960 	struct jffs2_raw_inode ri;
961 	struct jffs2_node_frag *frag;
962 	struct jffs2_full_dnode *new_fn;
963 	uint32_t alloclen, ilen;
964 	int ret;
965 
966 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
967 		  f->inocache->ino, start, end));
968 
969 	memset(&ri, 0, sizeof(ri));
970 
971 	if(fn->frags > 1) {
972 		size_t readlen;
973 		uint32_t crc;
974 		/* It's partially obsoleted by a later write. So we have to
975 		   write it out again with the _same_ version as before */
976 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
977 		if (readlen != sizeof(ri) || ret) {
978 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
979 			goto fill;
980 		}
981 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
982 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
983 			       ref_offset(fn->raw),
984 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
985 			return -EIO;
986 		}
987 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
988 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
989 			       ref_offset(fn->raw),
990 			       je32_to_cpu(ri.totlen), sizeof(ri));
991 			return -EIO;
992 		}
993 		crc = crc32(0, &ri, sizeof(ri)-8);
994 		if (crc != je32_to_cpu(ri.node_crc)) {
995 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
996 			       ref_offset(fn->raw),
997 			       je32_to_cpu(ri.node_crc), crc);
998 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
999 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1000 			       start, end, f->inocache->ino);
1001 			goto fill;
1002 		}
1003 		if (ri.compr != JFFS2_COMPR_ZERO) {
1004 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1005 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1006 			       start, end, f->inocache->ino);
1007 			goto fill;
1008 		}
1009 	} else {
1010 	fill:
1011 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1012 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1013 		ri.totlen = cpu_to_je32(sizeof(ri));
1014 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1015 
1016 		ri.ino = cpu_to_je32(f->inocache->ino);
1017 		ri.version = cpu_to_je32(++f->highest_version);
1018 		ri.offset = cpu_to_je32(start);
1019 		ri.dsize = cpu_to_je32(end - start);
1020 		ri.csize = cpu_to_je32(0);
1021 		ri.compr = JFFS2_COMPR_ZERO;
1022 	}
1023 
1024 	frag = frag_last(&f->fragtree);
1025 	if (frag)
1026 		/* Fetch the inode length from the fragtree rather then
1027 		 * from i_size since i_size may have not been updated yet */
1028 		ilen = frag->ofs + frag->size;
1029 	else
1030 		ilen = JFFS2_F_I_SIZE(f);
1031 
1032 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1033 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1034 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1035 	ri.isize = cpu_to_je32(ilen);
1036 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1037 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1038 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1039 	ri.data_crc = cpu_to_je32(0);
1040 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1041 
1042 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1043 				     JFFS2_SUMMARY_INODE_SIZE);
1044 	if (ret) {
1045 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1046 		       sizeof(ri), ret);
1047 		return ret;
1048 	}
1049 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1050 
1051 	if (IS_ERR(new_fn)) {
1052 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1053 		return PTR_ERR(new_fn);
1054 	}
1055 	if (je32_to_cpu(ri.version) == f->highest_version) {
1056 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1057 		if (f->metadata) {
1058 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1059 			jffs2_free_full_dnode(f->metadata);
1060 			f->metadata = NULL;
1061 		}
1062 		return 0;
1063 	}
1064 
1065 	/*
1066 	 * We should only get here in the case where the node we are
1067 	 * replacing had more than one frag, so we kept the same version
1068 	 * number as before. (Except in case of error -- see 'goto fill;'
1069 	 * above.)
1070 	 */
1071 	D1(if(unlikely(fn->frags <= 1)) {
1072 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1073 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1074 		       je32_to_cpu(ri.ino));
1075 	});
1076 
1077 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1078 	mark_ref_normal(new_fn->raw);
1079 
1080 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1081 	     frag; frag = frag_next(frag)) {
1082 		if (frag->ofs > fn->size + fn->ofs)
1083 			break;
1084 		if (frag->node == fn) {
1085 			frag->node = new_fn;
1086 			new_fn->frags++;
1087 			fn->frags--;
1088 		}
1089 	}
1090 	if (fn->frags) {
1091 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1092 		BUG();
1093 	}
1094 	if (!new_fn->frags) {
1095 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1096 		BUG();
1097 	}
1098 
1099 	jffs2_mark_node_obsolete(c, fn->raw);
1100 	jffs2_free_full_dnode(fn);
1101 
1102 	return 0;
1103 }
1104 
1105 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1106 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1107 				       uint32_t start, uint32_t end)
1108 {
1109 	struct jffs2_full_dnode *new_fn;
1110 	struct jffs2_raw_inode ri;
1111 	uint32_t alloclen, offset, orig_end, orig_start;
1112 	int ret = 0;
1113 	unsigned char *comprbuf = NULL, *writebuf;
1114 	unsigned long pg;
1115 	unsigned char *pg_ptr;
1116 
1117 	memset(&ri, 0, sizeof(ri));
1118 
1119 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1120 		  f->inocache->ino, start, end));
1121 
1122 	orig_end = end;
1123 	orig_start = start;
1124 
1125 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1126 		/* Attempt to do some merging. But only expand to cover logically
1127 		   adjacent frags if the block containing them is already considered
1128 		   to be dirty. Otherwise we end up with GC just going round in
1129 		   circles dirtying the nodes it already wrote out, especially
1130 		   on NAND where we have small eraseblocks and hence a much higher
1131 		   chance of nodes having to be split to cross boundaries. */
1132 
1133 		struct jffs2_node_frag *frag;
1134 		uint32_t min, max;
1135 
1136 		min = start & ~(PAGE_CACHE_SIZE-1);
1137 		max = min + PAGE_CACHE_SIZE;
1138 
1139 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1140 
1141 		/* BUG_ON(!frag) but that'll happen anyway... */
1142 
1143 		BUG_ON(frag->ofs != start);
1144 
1145 		/* First grow down... */
1146 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1147 
1148 			/* If the previous frag doesn't even reach the beginning, there's
1149 			   excessive fragmentation. Just merge. */
1150 			if (frag->ofs > min) {
1151 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1152 					  frag->ofs, frag->ofs+frag->size));
1153 				start = frag->ofs;
1154 				continue;
1155 			}
1156 			/* OK. This frag holds the first byte of the page. */
1157 			if (!frag->node || !frag->node->raw) {
1158 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1159 					  frag->ofs, frag->ofs+frag->size));
1160 				break;
1161 			} else {
1162 
1163 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1164 				   in a block which is still considered clean? If so, don't obsolete it.
1165 				   If not, cover it anyway. */
1166 
1167 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1168 				struct jffs2_eraseblock *jeb;
1169 
1170 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1171 
1172 				if (jeb == c->gcblock) {
1173 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1174 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1175 					start = frag->ofs;
1176 					break;
1177 				}
1178 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1179 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1180 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1181 					break;
1182 				}
1183 
1184 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1185 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1186 				start = frag->ofs;
1187 				break;
1188 			}
1189 		}
1190 
1191 		/* ... then up */
1192 
1193 		/* Find last frag which is actually part of the node we're to GC. */
1194 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1195 
1196 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1197 
1198 			/* If the previous frag doesn't even reach the beginning, there's lots
1199 			   of fragmentation. Just merge. */
1200 			if (frag->ofs+frag->size < max) {
1201 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1202 					  frag->ofs, frag->ofs+frag->size));
1203 				end = frag->ofs + frag->size;
1204 				continue;
1205 			}
1206 
1207 			if (!frag->node || !frag->node->raw) {
1208 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1209 					  frag->ofs, frag->ofs+frag->size));
1210 				break;
1211 			} else {
1212 
1213 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1214 				   in a block which is still considered clean? If so, don't obsolete it.
1215 				   If not, cover it anyway. */
1216 
1217 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1218 				struct jffs2_eraseblock *jeb;
1219 
1220 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1221 
1222 				if (jeb == c->gcblock) {
1223 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1224 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1225 					end = frag->ofs + frag->size;
1226 					break;
1227 				}
1228 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1229 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1230 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1231 					break;
1232 				}
1233 
1234 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1235 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1236 				end = frag->ofs + frag->size;
1237 				break;
1238 			}
1239 		}
1240 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1241 			  orig_start, orig_end, start, end));
1242 
1243 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1244 		BUG_ON(end < orig_end);
1245 		BUG_ON(start > orig_start);
1246 	}
1247 
1248 	/* First, use readpage() to read the appropriate page into the page cache */
1249 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1250 	 *    triggered garbage collection in the first place?
1251 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1252 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1253 	 *    suboptimal, but at least we're correct.
1254 	 */
1255 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1256 
1257 	if (IS_ERR(pg_ptr)) {
1258 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1259 		return PTR_ERR(pg_ptr);
1260 	}
1261 
1262 	offset = start;
1263 	while(offset < orig_end) {
1264 		uint32_t datalen;
1265 		uint32_t cdatalen;
1266 		uint16_t comprtype = JFFS2_COMPR_NONE;
1267 
1268 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1269 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1270 
1271 		if (ret) {
1272 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1273 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1274 			break;
1275 		}
1276 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1277 		datalen = end - offset;
1278 
1279 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1280 
1281 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1282 
1283 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1284 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1285 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1286 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1287 
1288 		ri.ino = cpu_to_je32(f->inocache->ino);
1289 		ri.version = cpu_to_je32(++f->highest_version);
1290 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1291 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1292 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1293 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1294 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1295 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1296 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1297 		ri.offset = cpu_to_je32(offset);
1298 		ri.csize = cpu_to_je32(cdatalen);
1299 		ri.dsize = cpu_to_je32(datalen);
1300 		ri.compr = comprtype & 0xff;
1301 		ri.usercompr = (comprtype >> 8) & 0xff;
1302 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1303 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1304 
1305 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1306 
1307 		jffs2_free_comprbuf(comprbuf, writebuf);
1308 
1309 		if (IS_ERR(new_fn)) {
1310 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1311 			ret = PTR_ERR(new_fn);
1312 			break;
1313 		}
1314 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1315 		offset += datalen;
1316 		if (f->metadata) {
1317 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1318 			jffs2_free_full_dnode(f->metadata);
1319 			f->metadata = NULL;
1320 		}
1321 	}
1322 
1323 	jffs2_gc_release_page(c, pg_ptr, &pg);
1324 	return ret;
1325 }
1326