xref: /openbmc/linux/fs/jffs2/gc.c (revision 81cfc9f1)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12 
13 #include <linux/kernel.h>
14 #include <linux/mtd/mtd.h>
15 #include <linux/slab.h>
16 #include <linux/pagemap.h>
17 #include <linux/crc32.h>
18 #include <linux/compiler.h>
19 #include <linux/stat.h>
20 #include "nodelist.h"
21 #include "compr.h"
22 
23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
24 					  struct jffs2_inode_cache *ic,
25 					  struct jffs2_raw_node_ref *raw);
26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
27 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
34 				      uint32_t start, uint32_t end);
35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
36 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
37 				       uint32_t start, uint32_t end);
38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
39 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
40 
41 /* Called with erase_completion_lock held */
42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
43 {
44 	struct jffs2_eraseblock *ret;
45 	struct list_head *nextlist = NULL;
46 	int n = jiffies % 128;
47 
48 	/* Pick an eraseblock to garbage collect next. This is where we'll
49 	   put the clever wear-levelling algorithms. Eventually.  */
50 	/* We possibly want to favour the dirtier blocks more when the
51 	   number of free blocks is low. */
52 again:
53 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
55 		nextlist = &c->bad_used_list;
56 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
57 		/* Note that most of them will have gone directly to be erased.
58 		   So don't favour the erasable_list _too_ much. */
59 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
60 		nextlist = &c->erasable_list;
61 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
62 		/* Most of the time, pick one off the very_dirty list */
63 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
64 		nextlist = &c->very_dirty_list;
65 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
66 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
67 		nextlist = &c->dirty_list;
68 	} else if (!list_empty(&c->clean_list)) {
69 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
70 		nextlist = &c->clean_list;
71 	} else if (!list_empty(&c->dirty_list)) {
72 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
73 
74 		nextlist = &c->dirty_list;
75 	} else if (!list_empty(&c->very_dirty_list)) {
76 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
77 		nextlist = &c->very_dirty_list;
78 	} else if (!list_empty(&c->erasable_list)) {
79 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
80 
81 		nextlist = &c->erasable_list;
82 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
83 		/* There are blocks are wating for the wbuf sync */
84 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
85 		spin_unlock(&c->erase_completion_lock);
86 		jffs2_flush_wbuf_pad(c);
87 		spin_lock(&c->erase_completion_lock);
88 		goto again;
89 	} else {
90 		/* Eep. All were empty */
91 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
92 		return NULL;
93 	}
94 
95 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
96 	list_del(&ret->list);
97 	c->gcblock = ret;
98 	ret->gc_node = ret->first_node;
99 	if (!ret->gc_node) {
100 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
101 		BUG();
102 	}
103 
104 	/* Have we accidentally picked a clean block with wasted space ? */
105 	if (ret->wasted_size) {
106 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
107 		ret->dirty_size += ret->wasted_size;
108 		c->wasted_size -= ret->wasted_size;
109 		c->dirty_size += ret->wasted_size;
110 		ret->wasted_size = 0;
111 	}
112 
113 	return ret;
114 }
115 
116 /* jffs2_garbage_collect_pass
117  * Make a single attempt to progress GC. Move one node, and possibly
118  * start erasing one eraseblock.
119  */
120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
121 {
122 	struct jffs2_inode_info *f;
123 	struct jffs2_inode_cache *ic;
124 	struct jffs2_eraseblock *jeb;
125 	struct jffs2_raw_node_ref *raw;
126 	uint32_t gcblock_dirty;
127 	int ret = 0, inum, nlink;
128 	int xattr = 0;
129 
130 	if (mutex_lock_interruptible(&c->alloc_sem))
131 		return -EINTR;
132 
133 	for (;;) {
134 		spin_lock(&c->erase_completion_lock);
135 		if (!c->unchecked_size)
136 			break;
137 
138 		/* We can't start doing GC yet. We haven't finished checking
139 		   the node CRCs etc. Do it now. */
140 
141 		/* checked_ino is protected by the alloc_sem */
142 		if (c->checked_ino > c->highest_ino && xattr) {
143 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 			       c->unchecked_size);
145 			jffs2_dbg_dump_block_lists_nolock(c);
146 			spin_unlock(&c->erase_completion_lock);
147 			mutex_unlock(&c->alloc_sem);
148 			return -ENOSPC;
149 		}
150 
151 		spin_unlock(&c->erase_completion_lock);
152 
153 		if (!xattr)
154 			xattr = jffs2_verify_xattr(c);
155 
156 		spin_lock(&c->inocache_lock);
157 
158 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
159 
160 		if (!ic) {
161 			spin_unlock(&c->inocache_lock);
162 			continue;
163 		}
164 
165 		if (!ic->pino_nlink) {
166 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
167 				  ic->ino));
168 			spin_unlock(&c->inocache_lock);
169 			jffs2_xattr_delete_inode(c, ic);
170 			continue;
171 		}
172 		switch(ic->state) {
173 		case INO_STATE_CHECKEDABSENT:
174 		case INO_STATE_PRESENT:
175 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
176 			spin_unlock(&c->inocache_lock);
177 			continue;
178 
179 		case INO_STATE_GC:
180 		case INO_STATE_CHECKING:
181 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
182 			spin_unlock(&c->inocache_lock);
183 			BUG();
184 
185 		case INO_STATE_READING:
186 			/* We need to wait for it to finish, lest we move on
187 			   and trigger the BUG() above while we haven't yet
188 			   finished checking all its nodes */
189 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
190 			/* We need to come back again for the _same_ inode. We've
191 			 made no progress in this case, but that should be OK */
192 			c->checked_ino--;
193 
194 			mutex_unlock(&c->alloc_sem);
195 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
196 			return 0;
197 
198 		default:
199 			BUG();
200 
201 		case INO_STATE_UNCHECKED:
202 			;
203 		}
204 		ic->state = INO_STATE_CHECKING;
205 		spin_unlock(&c->inocache_lock);
206 
207 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
208 
209 		ret = jffs2_do_crccheck_inode(c, ic);
210 		if (ret)
211 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
212 
213 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
214 		mutex_unlock(&c->alloc_sem);
215 		return ret;
216 	}
217 
218 	/* If there are any blocks which need erasing, erase them now */
219 	if (!list_empty(&c->erase_complete_list) ||
220 	    !list_empty(&c->erase_pending_list)) {
221 		spin_unlock(&c->erase_completion_lock);
222 		mutex_unlock(&c->alloc_sem);
223 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
224 		if (jffs2_erase_pending_blocks(c, 1))
225 			return 0;
226 
227 		D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228 		spin_lock(&c->erase_completion_lock);
229 		mutex_lock(&c->alloc_sem);
230 	}
231 
232 	/* First, work out which block we're garbage-collecting */
233 	jeb = c->gcblock;
234 
235 	if (!jeb)
236 		jeb = jffs2_find_gc_block(c);
237 
238 	if (!jeb) {
239 		/* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
240 		if (c->nr_erasing_blocks) {
241 			spin_unlock(&c->erase_completion_lock);
242 			mutex_unlock(&c->alloc_sem);
243 			return -EAGAIN;
244 		}
245 		D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
246 		spin_unlock(&c->erase_completion_lock);
247 		mutex_unlock(&c->alloc_sem);
248 		return -EIO;
249 	}
250 
251 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
252 	D1(if (c->nextblock)
253 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
254 
255 	if (!jeb->used_size) {
256 		mutex_unlock(&c->alloc_sem);
257 		goto eraseit;
258 	}
259 
260 	raw = jeb->gc_node;
261 	gcblock_dirty = jeb->dirty_size;
262 
263 	while(ref_obsolete(raw)) {
264 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
265 		raw = ref_next(raw);
266 		if (unlikely(!raw)) {
267 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
268 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
269 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
270 			jeb->gc_node = raw;
271 			spin_unlock(&c->erase_completion_lock);
272 			mutex_unlock(&c->alloc_sem);
273 			BUG();
274 		}
275 	}
276 	jeb->gc_node = raw;
277 
278 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
279 
280 	if (!raw->next_in_ino) {
281 		/* Inode-less node. Clean marker, snapshot or something like that */
282 		spin_unlock(&c->erase_completion_lock);
283 		if (ref_flags(raw) == REF_PRISTINE) {
284 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
285 			jffs2_garbage_collect_pristine(c, NULL, raw);
286 		} else {
287 			/* Just mark it obsolete */
288 			jffs2_mark_node_obsolete(c, raw);
289 		}
290 		mutex_unlock(&c->alloc_sem);
291 		goto eraseit_lock;
292 	}
293 
294 	ic = jffs2_raw_ref_to_ic(raw);
295 
296 #ifdef CONFIG_JFFS2_FS_XATTR
297 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
298 	 * We can decide whether this node is inode or xattr by ic->class.     */
299 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
300 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
301 		spin_unlock(&c->erase_completion_lock);
302 
303 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
304 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
305 		} else {
306 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
307 		}
308 		goto test_gcnode;
309 	}
310 #endif
311 
312 	/* We need to hold the inocache. Either the erase_completion_lock or
313 	   the inocache_lock are sufficient; we trade down since the inocache_lock
314 	   causes less contention. */
315 	spin_lock(&c->inocache_lock);
316 
317 	spin_unlock(&c->erase_completion_lock);
318 
319 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
320 
321 	/* Three possibilities:
322 	   1. Inode is already in-core. We must iget it and do proper
323 	      updating to its fragtree, etc.
324 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
325 	      inocache to prevent a read_inode(), copy the node intact.
326 	   3. Inode is not in-core, node is not pristine. We must iget()
327 	      and take the slow path.
328 	*/
329 
330 	switch(ic->state) {
331 	case INO_STATE_CHECKEDABSENT:
332 		/* It's been checked, but it's not currently in-core.
333 		   We can just copy any pristine nodes, but have
334 		   to prevent anyone else from doing read_inode() while
335 		   we're at it, so we set the state accordingly */
336 		if (ref_flags(raw) == REF_PRISTINE)
337 			ic->state = INO_STATE_GC;
338 		else {
339 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
340 				  ic->ino));
341 		}
342 		break;
343 
344 	case INO_STATE_PRESENT:
345 		/* It's in-core. GC must iget() it. */
346 		break;
347 
348 	case INO_STATE_UNCHECKED:
349 	case INO_STATE_CHECKING:
350 	case INO_STATE_GC:
351 		/* Should never happen. We should have finished checking
352 		   by the time we actually start doing any GC, and since
353 		   we're holding the alloc_sem, no other garbage collection
354 		   can happen.
355 		*/
356 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
357 		       ic->ino, ic->state);
358 		mutex_unlock(&c->alloc_sem);
359 		spin_unlock(&c->inocache_lock);
360 		BUG();
361 
362 	case INO_STATE_READING:
363 		/* Someone's currently trying to read it. We must wait for
364 		   them to finish and then go through the full iget() route
365 		   to do the GC. However, sometimes read_inode() needs to get
366 		   the alloc_sem() (for marking nodes invalid) so we must
367 		   drop the alloc_sem before sleeping. */
368 
369 		mutex_unlock(&c->alloc_sem);
370 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
371 			  ic->ino, ic->state));
372 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
373 		/* And because we dropped the alloc_sem we must start again from the
374 		   beginning. Ponder chance of livelock here -- we're returning success
375 		   without actually making any progress.
376 
377 		   Q: What are the chances that the inode is back in INO_STATE_READING
378 		   again by the time we next enter this function? And that this happens
379 		   enough times to cause a real delay?
380 
381 		   A: Small enough that I don't care :)
382 		*/
383 		return 0;
384 	}
385 
386 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
387 	   node intact, and we don't have to muck about with the fragtree etc.
388 	   because we know it's not in-core. If it _was_ in-core, we go through
389 	   all the iget() crap anyway */
390 
391 	if (ic->state == INO_STATE_GC) {
392 		spin_unlock(&c->inocache_lock);
393 
394 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
395 
396 		spin_lock(&c->inocache_lock);
397 		ic->state = INO_STATE_CHECKEDABSENT;
398 		wake_up(&c->inocache_wq);
399 
400 		if (ret != -EBADFD) {
401 			spin_unlock(&c->inocache_lock);
402 			goto test_gcnode;
403 		}
404 
405 		/* Fall through if it wanted us to, with inocache_lock held */
406 	}
407 
408 	/* Prevent the fairly unlikely race where the gcblock is
409 	   entirely obsoleted by the final close of a file which had
410 	   the only valid nodes in the block, followed by erasure,
411 	   followed by freeing of the ic because the erased block(s)
412 	   held _all_ the nodes of that inode.... never been seen but
413 	   it's vaguely possible. */
414 
415 	inum = ic->ino;
416 	nlink = ic->pino_nlink;
417 	spin_unlock(&c->inocache_lock);
418 
419 	f = jffs2_gc_fetch_inode(c, inum, !nlink);
420 	if (IS_ERR(f)) {
421 		ret = PTR_ERR(f);
422 		goto release_sem;
423 	}
424 	if (!f) {
425 		ret = 0;
426 		goto release_sem;
427 	}
428 
429 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
430 
431 	jffs2_gc_release_inode(c, f);
432 
433  test_gcnode:
434 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
435 		/* Eep. This really should never happen. GC is broken */
436 		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
437 		ret = -ENOSPC;
438 	}
439  release_sem:
440 	mutex_unlock(&c->alloc_sem);
441 
442  eraseit_lock:
443 	/* If we've finished this block, start it erasing */
444 	spin_lock(&c->erase_completion_lock);
445 
446  eraseit:
447 	if (c->gcblock && !c->gcblock->used_size) {
448 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
449 		/* We're GC'ing an empty block? */
450 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
451 		c->gcblock = NULL;
452 		c->nr_erasing_blocks++;
453 		jffs2_garbage_collect_trigger(c);
454 	}
455 	spin_unlock(&c->erase_completion_lock);
456 
457 	return ret;
458 }
459 
460 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
461 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
462 {
463 	struct jffs2_node_frag *frag;
464 	struct jffs2_full_dnode *fn = NULL;
465 	struct jffs2_full_dirent *fd;
466 	uint32_t start = 0, end = 0, nrfrags = 0;
467 	int ret = 0;
468 
469 	mutex_lock(&f->sem);
470 
471 	/* Now we have the lock for this inode. Check that it's still the one at the head
472 	   of the list. */
473 
474 	spin_lock(&c->erase_completion_lock);
475 
476 	if (c->gcblock != jeb) {
477 		spin_unlock(&c->erase_completion_lock);
478 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
479 		goto upnout;
480 	}
481 	if (ref_obsolete(raw)) {
482 		spin_unlock(&c->erase_completion_lock);
483 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
484 		/* They'll call again */
485 		goto upnout;
486 	}
487 	spin_unlock(&c->erase_completion_lock);
488 
489 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
490 	if (f->metadata && f->metadata->raw == raw) {
491 		fn = f->metadata;
492 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
493 		goto upnout;
494 	}
495 
496 	/* FIXME. Read node and do lookup? */
497 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
498 		if (frag->node && frag->node->raw == raw) {
499 			fn = frag->node;
500 			end = frag->ofs + frag->size;
501 			if (!nrfrags++)
502 				start = frag->ofs;
503 			if (nrfrags == frag->node->frags)
504 				break; /* We've found them all */
505 		}
506 	}
507 	if (fn) {
508 		if (ref_flags(raw) == REF_PRISTINE) {
509 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
510 			if (!ret) {
511 				/* Urgh. Return it sensibly. */
512 				frag->node->raw = f->inocache->nodes;
513 			}
514 			if (ret != -EBADFD)
515 				goto upnout;
516 		}
517 		/* We found a datanode. Do the GC */
518 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
519 			/* It crosses a page boundary. Therefore, it must be a hole. */
520 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
521 		} else {
522 			/* It could still be a hole. But we GC the page this way anyway */
523 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
524 		}
525 		goto upnout;
526 	}
527 
528 	/* Wasn't a dnode. Try dirent */
529 	for (fd = f->dents; fd; fd=fd->next) {
530 		if (fd->raw == raw)
531 			break;
532 	}
533 
534 	if (fd && fd->ino) {
535 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
536 	} else if (fd) {
537 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
538 	} else {
539 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
540 		       ref_offset(raw), f->inocache->ino);
541 		if (ref_obsolete(raw)) {
542 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
543 		} else {
544 			jffs2_dbg_dump_node(c, ref_offset(raw));
545 			BUG();
546 		}
547 	}
548  upnout:
549 	mutex_unlock(&f->sem);
550 
551 	return ret;
552 }
553 
554 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
555 					  struct jffs2_inode_cache *ic,
556 					  struct jffs2_raw_node_ref *raw)
557 {
558 	union jffs2_node_union *node;
559 	size_t retlen;
560 	int ret;
561 	uint32_t phys_ofs, alloclen;
562 	uint32_t crc, rawlen;
563 	int retried = 0;
564 
565 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
566 
567 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
568 
569 	/* Ask for a small amount of space (or the totlen if smaller) because we
570 	   don't want to force wastage of the end of a block if splitting would
571 	   work. */
572 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
573 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
574 
575 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
576 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
577 
578 	if (ret)
579 		return ret;
580 
581 	if (alloclen < rawlen) {
582 		/* Doesn't fit untouched. We'll go the old route and split it */
583 		return -EBADFD;
584 	}
585 
586 	node = kmalloc(rawlen, GFP_KERNEL);
587 	if (!node)
588 		return -ENOMEM;
589 
590 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
591 	if (!ret && retlen != rawlen)
592 		ret = -EIO;
593 	if (ret)
594 		goto out_node;
595 
596 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
597 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
598 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
599 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
600 		goto bail;
601 	}
602 
603 	switch(je16_to_cpu(node->u.nodetype)) {
604 	case JFFS2_NODETYPE_INODE:
605 		crc = crc32(0, node, sizeof(node->i)-8);
606 		if (je32_to_cpu(node->i.node_crc) != crc) {
607 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
608 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
609 			goto bail;
610 		}
611 
612 		if (je32_to_cpu(node->i.dsize)) {
613 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
614 			if (je32_to_cpu(node->i.data_crc) != crc) {
615 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
616 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
617 				goto bail;
618 			}
619 		}
620 		break;
621 
622 	case JFFS2_NODETYPE_DIRENT:
623 		crc = crc32(0, node, sizeof(node->d)-8);
624 		if (je32_to_cpu(node->d.node_crc) != crc) {
625 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
626 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
627 			goto bail;
628 		}
629 
630 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
631 			printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
632 			goto bail;
633 		}
634 
635 		if (node->d.nsize) {
636 			crc = crc32(0, node->d.name, node->d.nsize);
637 			if (je32_to_cpu(node->d.name_crc) != crc) {
638 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
639 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
640 				goto bail;
641 			}
642 		}
643 		break;
644 	default:
645 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
646 		if (ic) {
647 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
648 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
649 			goto bail;
650 		}
651 	}
652 
653 	/* OK, all the CRCs are good; this node can just be copied as-is. */
654  retry:
655 	phys_ofs = write_ofs(c);
656 
657 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
658 
659 	if (ret || (retlen != rawlen)) {
660 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
661 		       rawlen, phys_ofs, ret, retlen);
662 		if (retlen) {
663 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
664 		} else {
665 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
666 		}
667 		if (!retried) {
668 			/* Try to reallocate space and retry */
669 			uint32_t dummy;
670 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
671 
672 			retried = 1;
673 
674 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
675 
676 			jffs2_dbg_acct_sanity_check(c,jeb);
677 			jffs2_dbg_acct_paranoia_check(c, jeb);
678 
679 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
680 						/* this is not the exact summary size of it,
681 							it is only an upper estimation */
682 
683 			if (!ret) {
684 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
685 
686 				jffs2_dbg_acct_sanity_check(c,jeb);
687 				jffs2_dbg_acct_paranoia_check(c, jeb);
688 
689 				goto retry;
690 			}
691 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
692 		}
693 
694 		if (!ret)
695 			ret = -EIO;
696 		goto out_node;
697 	}
698 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
699 
700 	jffs2_mark_node_obsolete(c, raw);
701 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
702 
703  out_node:
704 	kfree(node);
705 	return ret;
706  bail:
707 	ret = -EBADFD;
708 	goto out_node;
709 }
710 
711 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
712 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
713 {
714 	struct jffs2_full_dnode *new_fn;
715 	struct jffs2_raw_inode ri;
716 	struct jffs2_node_frag *last_frag;
717 	union jffs2_device_node dev;
718 	char *mdata = NULL;
719 	int mdatalen = 0;
720 	uint32_t alloclen, ilen;
721 	int ret;
722 
723 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
724 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
725 		/* For these, we don't actually need to read the old node */
726 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
727 		mdata = (char *)&dev;
728 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
729 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
730 		mdatalen = fn->size;
731 		mdata = kmalloc(fn->size, GFP_KERNEL);
732 		if (!mdata) {
733 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
734 			return -ENOMEM;
735 		}
736 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
737 		if (ret) {
738 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
739 			kfree(mdata);
740 			return ret;
741 		}
742 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
743 
744 	}
745 
746 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
747 				JFFS2_SUMMARY_INODE_SIZE);
748 	if (ret) {
749 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
750 		       sizeof(ri)+ mdatalen, ret);
751 		goto out;
752 	}
753 
754 	last_frag = frag_last(&f->fragtree);
755 	if (last_frag)
756 		/* Fetch the inode length from the fragtree rather then
757 		 * from i_size since i_size may have not been updated yet */
758 		ilen = last_frag->ofs + last_frag->size;
759 	else
760 		ilen = JFFS2_F_I_SIZE(f);
761 
762 	memset(&ri, 0, sizeof(ri));
763 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
764 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
765 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
766 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
767 
768 	ri.ino = cpu_to_je32(f->inocache->ino);
769 	ri.version = cpu_to_je32(++f->highest_version);
770 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
771 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
772 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
773 	ri.isize = cpu_to_je32(ilen);
774 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
775 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
776 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
777 	ri.offset = cpu_to_je32(0);
778 	ri.csize = cpu_to_je32(mdatalen);
779 	ri.dsize = cpu_to_je32(mdatalen);
780 	ri.compr = JFFS2_COMPR_NONE;
781 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
782 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
783 
784 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
785 
786 	if (IS_ERR(new_fn)) {
787 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
788 		ret = PTR_ERR(new_fn);
789 		goto out;
790 	}
791 	jffs2_mark_node_obsolete(c, fn->raw);
792 	jffs2_free_full_dnode(fn);
793 	f->metadata = new_fn;
794  out:
795 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
796 		kfree(mdata);
797 	return ret;
798 }
799 
800 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
801 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
802 {
803 	struct jffs2_full_dirent *new_fd;
804 	struct jffs2_raw_dirent rd;
805 	uint32_t alloclen;
806 	int ret;
807 
808 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
809 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
810 	rd.nsize = strlen(fd->name);
811 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
812 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
813 
814 	rd.pino = cpu_to_je32(f->inocache->ino);
815 	rd.version = cpu_to_je32(++f->highest_version);
816 	rd.ino = cpu_to_je32(fd->ino);
817 	/* If the times on this inode were set by explicit utime() they can be different,
818 	   so refrain from splatting them. */
819 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
820 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
821 	else
822 		rd.mctime = cpu_to_je32(0);
823 	rd.type = fd->type;
824 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
825 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
826 
827 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
828 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
829 	if (ret) {
830 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
831 		       sizeof(rd)+rd.nsize, ret);
832 		return ret;
833 	}
834 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
835 
836 	if (IS_ERR(new_fd)) {
837 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
838 		return PTR_ERR(new_fd);
839 	}
840 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
841 	return 0;
842 }
843 
844 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
845 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
846 {
847 	struct jffs2_full_dirent **fdp = &f->dents;
848 	int found = 0;
849 
850 	/* On a medium where we can't actually mark nodes obsolete
851 	   pernamently, such as NAND flash, we need to work out
852 	   whether this deletion dirent is still needed to actively
853 	   delete a 'real' dirent with the same name that's still
854 	   somewhere else on the flash. */
855 	if (!jffs2_can_mark_obsolete(c)) {
856 		struct jffs2_raw_dirent *rd;
857 		struct jffs2_raw_node_ref *raw;
858 		int ret;
859 		size_t retlen;
860 		int name_len = strlen(fd->name);
861 		uint32_t name_crc = crc32(0, fd->name, name_len);
862 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
863 
864 		rd = kmalloc(rawlen, GFP_KERNEL);
865 		if (!rd)
866 			return -ENOMEM;
867 
868 		/* Prevent the erase code from nicking the obsolete node refs while
869 		   we're looking at them. I really don't like this extra lock but
870 		   can't see any alternative. Suggestions on a postcard to... */
871 		mutex_lock(&c->erase_free_sem);
872 
873 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
874 
875 			cond_resched();
876 
877 			/* We only care about obsolete ones */
878 			if (!(ref_obsolete(raw)))
879 				continue;
880 
881 			/* Any dirent with the same name is going to have the same length... */
882 			if (ref_totlen(c, NULL, raw) != rawlen)
883 				continue;
884 
885 			/* Doesn't matter if there's one in the same erase block. We're going to
886 			   delete it too at the same time. */
887 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
888 				continue;
889 
890 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
891 
892 			/* This is an obsolete node belonging to the same directory, and it's of the right
893 			   length. We need to take a closer look...*/
894 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
895 			if (ret) {
896 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
897 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
898 				continue;
899 			}
900 			if (retlen != rawlen) {
901 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
902 				       retlen, rawlen, ref_offset(raw));
903 				continue;
904 			}
905 
906 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
907 				continue;
908 
909 			/* If the name CRC doesn't match, skip */
910 			if (je32_to_cpu(rd->name_crc) != name_crc)
911 				continue;
912 
913 			/* If the name length doesn't match, or it's another deletion dirent, skip */
914 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
915 				continue;
916 
917 			/* OK, check the actual name now */
918 			if (memcmp(rd->name, fd->name, name_len))
919 				continue;
920 
921 			/* OK. The name really does match. There really is still an older node on
922 			   the flash which our deletion dirent obsoletes. So we have to write out
923 			   a new deletion dirent to replace it */
924 			mutex_unlock(&c->erase_free_sem);
925 
926 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
927 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
928 			kfree(rd);
929 
930 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
931 		}
932 
933 		mutex_unlock(&c->erase_free_sem);
934 		kfree(rd);
935 	}
936 
937 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
938 	   we should update the metadata node with those times accordingly */
939 
940 	/* No need for it any more. Just mark it obsolete and remove it from the list */
941 	while (*fdp) {
942 		if ((*fdp) == fd) {
943 			found = 1;
944 			*fdp = fd->next;
945 			break;
946 		}
947 		fdp = &(*fdp)->next;
948 	}
949 	if (!found) {
950 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
951 	}
952 	jffs2_mark_node_obsolete(c, fd->raw);
953 	jffs2_free_full_dirent(fd);
954 	return 0;
955 }
956 
957 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
958 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
959 				      uint32_t start, uint32_t end)
960 {
961 	struct jffs2_raw_inode ri;
962 	struct jffs2_node_frag *frag;
963 	struct jffs2_full_dnode *new_fn;
964 	uint32_t alloclen, ilen;
965 	int ret;
966 
967 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
968 		  f->inocache->ino, start, end));
969 
970 	memset(&ri, 0, sizeof(ri));
971 
972 	if(fn->frags > 1) {
973 		size_t readlen;
974 		uint32_t crc;
975 		/* It's partially obsoleted by a later write. So we have to
976 		   write it out again with the _same_ version as before */
977 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
978 		if (readlen != sizeof(ri) || ret) {
979 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
980 			goto fill;
981 		}
982 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
983 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
984 			       ref_offset(fn->raw),
985 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
986 			return -EIO;
987 		}
988 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
989 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
990 			       ref_offset(fn->raw),
991 			       je32_to_cpu(ri.totlen), sizeof(ri));
992 			return -EIO;
993 		}
994 		crc = crc32(0, &ri, sizeof(ri)-8);
995 		if (crc != je32_to_cpu(ri.node_crc)) {
996 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
997 			       ref_offset(fn->raw),
998 			       je32_to_cpu(ri.node_crc), crc);
999 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
1000 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1001 			       start, end, f->inocache->ino);
1002 			goto fill;
1003 		}
1004 		if (ri.compr != JFFS2_COMPR_ZERO) {
1005 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1006 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1007 			       start, end, f->inocache->ino);
1008 			goto fill;
1009 		}
1010 	} else {
1011 	fill:
1012 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1013 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1014 		ri.totlen = cpu_to_je32(sizeof(ri));
1015 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1016 
1017 		ri.ino = cpu_to_je32(f->inocache->ino);
1018 		ri.version = cpu_to_je32(++f->highest_version);
1019 		ri.offset = cpu_to_je32(start);
1020 		ri.dsize = cpu_to_je32(end - start);
1021 		ri.csize = cpu_to_je32(0);
1022 		ri.compr = JFFS2_COMPR_ZERO;
1023 	}
1024 
1025 	frag = frag_last(&f->fragtree);
1026 	if (frag)
1027 		/* Fetch the inode length from the fragtree rather then
1028 		 * from i_size since i_size may have not been updated yet */
1029 		ilen = frag->ofs + frag->size;
1030 	else
1031 		ilen = JFFS2_F_I_SIZE(f);
1032 
1033 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1034 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1035 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1036 	ri.isize = cpu_to_je32(ilen);
1037 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1038 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1039 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1040 	ri.data_crc = cpu_to_je32(0);
1041 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1042 
1043 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1044 				     JFFS2_SUMMARY_INODE_SIZE);
1045 	if (ret) {
1046 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1047 		       sizeof(ri), ret);
1048 		return ret;
1049 	}
1050 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1051 
1052 	if (IS_ERR(new_fn)) {
1053 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1054 		return PTR_ERR(new_fn);
1055 	}
1056 	if (je32_to_cpu(ri.version) == f->highest_version) {
1057 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1058 		if (f->metadata) {
1059 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1060 			jffs2_free_full_dnode(f->metadata);
1061 			f->metadata = NULL;
1062 		}
1063 		return 0;
1064 	}
1065 
1066 	/*
1067 	 * We should only get here in the case where the node we are
1068 	 * replacing had more than one frag, so we kept the same version
1069 	 * number as before. (Except in case of error -- see 'goto fill;'
1070 	 * above.)
1071 	 */
1072 	D1(if(unlikely(fn->frags <= 1)) {
1073 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1074 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1075 		       je32_to_cpu(ri.ino));
1076 	});
1077 
1078 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1079 	mark_ref_normal(new_fn->raw);
1080 
1081 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1082 	     frag; frag = frag_next(frag)) {
1083 		if (frag->ofs > fn->size + fn->ofs)
1084 			break;
1085 		if (frag->node == fn) {
1086 			frag->node = new_fn;
1087 			new_fn->frags++;
1088 			fn->frags--;
1089 		}
1090 	}
1091 	if (fn->frags) {
1092 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1093 		BUG();
1094 	}
1095 	if (!new_fn->frags) {
1096 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1097 		BUG();
1098 	}
1099 
1100 	jffs2_mark_node_obsolete(c, fn->raw);
1101 	jffs2_free_full_dnode(fn);
1102 
1103 	return 0;
1104 }
1105 
1106 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1107 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1108 				       uint32_t start, uint32_t end)
1109 {
1110 	struct jffs2_full_dnode *new_fn;
1111 	struct jffs2_raw_inode ri;
1112 	uint32_t alloclen, offset, orig_end, orig_start;
1113 	int ret = 0;
1114 	unsigned char *comprbuf = NULL, *writebuf;
1115 	unsigned long pg;
1116 	unsigned char *pg_ptr;
1117 
1118 	memset(&ri, 0, sizeof(ri));
1119 
1120 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1121 		  f->inocache->ino, start, end));
1122 
1123 	orig_end = end;
1124 	orig_start = start;
1125 
1126 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1127 		/* Attempt to do some merging. But only expand to cover logically
1128 		   adjacent frags if the block containing them is already considered
1129 		   to be dirty. Otherwise we end up with GC just going round in
1130 		   circles dirtying the nodes it already wrote out, especially
1131 		   on NAND where we have small eraseblocks and hence a much higher
1132 		   chance of nodes having to be split to cross boundaries. */
1133 
1134 		struct jffs2_node_frag *frag;
1135 		uint32_t min, max;
1136 
1137 		min = start & ~(PAGE_CACHE_SIZE-1);
1138 		max = min + PAGE_CACHE_SIZE;
1139 
1140 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1141 
1142 		/* BUG_ON(!frag) but that'll happen anyway... */
1143 
1144 		BUG_ON(frag->ofs != start);
1145 
1146 		/* First grow down... */
1147 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1148 
1149 			/* If the previous frag doesn't even reach the beginning, there's
1150 			   excessive fragmentation. Just merge. */
1151 			if (frag->ofs > min) {
1152 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1153 					  frag->ofs, frag->ofs+frag->size));
1154 				start = frag->ofs;
1155 				continue;
1156 			}
1157 			/* OK. This frag holds the first byte of the page. */
1158 			if (!frag->node || !frag->node->raw) {
1159 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1160 					  frag->ofs, frag->ofs+frag->size));
1161 				break;
1162 			} else {
1163 
1164 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1165 				   in a block which is still considered clean? If so, don't obsolete it.
1166 				   If not, cover it anyway. */
1167 
1168 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1169 				struct jffs2_eraseblock *jeb;
1170 
1171 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1172 
1173 				if (jeb == c->gcblock) {
1174 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1175 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1176 					start = frag->ofs;
1177 					break;
1178 				}
1179 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1180 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1181 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1182 					break;
1183 				}
1184 
1185 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1186 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1187 				start = frag->ofs;
1188 				break;
1189 			}
1190 		}
1191 
1192 		/* ... then up */
1193 
1194 		/* Find last frag which is actually part of the node we're to GC. */
1195 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1196 
1197 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1198 
1199 			/* If the previous frag doesn't even reach the beginning, there's lots
1200 			   of fragmentation. Just merge. */
1201 			if (frag->ofs+frag->size < max) {
1202 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1203 					  frag->ofs, frag->ofs+frag->size));
1204 				end = frag->ofs + frag->size;
1205 				continue;
1206 			}
1207 
1208 			if (!frag->node || !frag->node->raw) {
1209 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1210 					  frag->ofs, frag->ofs+frag->size));
1211 				break;
1212 			} else {
1213 
1214 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1215 				   in a block which is still considered clean? If so, don't obsolete it.
1216 				   If not, cover it anyway. */
1217 
1218 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1219 				struct jffs2_eraseblock *jeb;
1220 
1221 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1222 
1223 				if (jeb == c->gcblock) {
1224 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1225 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1226 					end = frag->ofs + frag->size;
1227 					break;
1228 				}
1229 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1230 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1231 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1232 					break;
1233 				}
1234 
1235 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1236 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1237 				end = frag->ofs + frag->size;
1238 				break;
1239 			}
1240 		}
1241 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1242 			  orig_start, orig_end, start, end));
1243 
1244 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1245 		BUG_ON(end < orig_end);
1246 		BUG_ON(start > orig_start);
1247 	}
1248 
1249 	/* First, use readpage() to read the appropriate page into the page cache */
1250 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1251 	 *    triggered garbage collection in the first place?
1252 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1253 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1254 	 *    suboptimal, but at least we're correct.
1255 	 */
1256 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1257 
1258 	if (IS_ERR(pg_ptr)) {
1259 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1260 		return PTR_ERR(pg_ptr);
1261 	}
1262 
1263 	offset = start;
1264 	while(offset < orig_end) {
1265 		uint32_t datalen;
1266 		uint32_t cdatalen;
1267 		uint16_t comprtype = JFFS2_COMPR_NONE;
1268 
1269 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1270 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1271 
1272 		if (ret) {
1273 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1274 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1275 			break;
1276 		}
1277 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1278 		datalen = end - offset;
1279 
1280 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1281 
1282 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1283 
1284 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1285 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1286 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1287 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1288 
1289 		ri.ino = cpu_to_je32(f->inocache->ino);
1290 		ri.version = cpu_to_je32(++f->highest_version);
1291 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1292 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1293 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1294 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1295 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1296 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1297 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1298 		ri.offset = cpu_to_je32(offset);
1299 		ri.csize = cpu_to_je32(cdatalen);
1300 		ri.dsize = cpu_to_je32(datalen);
1301 		ri.compr = comprtype & 0xff;
1302 		ri.usercompr = (comprtype >> 8) & 0xff;
1303 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1304 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1305 
1306 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1307 
1308 		jffs2_free_comprbuf(comprbuf, writebuf);
1309 
1310 		if (IS_ERR(new_fn)) {
1311 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1312 			ret = PTR_ERR(new_fn);
1313 			break;
1314 		}
1315 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1316 		offset += datalen;
1317 		if (f->metadata) {
1318 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1319 			jffs2_free_full_dnode(f->metadata);
1320 			f->metadata = NULL;
1321 		}
1322 	}
1323 
1324 	jffs2_gc_release_page(c, pg_ptr, &pg);
1325 	return ret;
1326 }
1327