xref: /openbmc/linux/fs/jffs2/gc.c (revision 2665ea84)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/mtd/mtd.h>
14 #include <linux/slab.h>
15 #include <linux/pagemap.h>
16 #include <linux/crc32.h>
17 #include <linux/compiler.h>
18 #include <linux/stat.h>
19 #include "nodelist.h"
20 #include "compr.h"
21 
22 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
23 					  struct jffs2_inode_cache *ic,
24 					  struct jffs2_raw_node_ref *raw);
25 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
26 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
27 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
29 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
33 				      uint32_t start, uint32_t end);
34 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
38 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
39 
40 /* Called with erase_completion_lock held */
41 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
42 {
43 	struct jffs2_eraseblock *ret;
44 	struct list_head *nextlist = NULL;
45 	int n = jiffies % 128;
46 
47 	/* Pick an eraseblock to garbage collect next. This is where we'll
48 	   put the clever wear-levelling algorithms. Eventually.  */
49 	/* We possibly want to favour the dirtier blocks more when the
50 	   number of free blocks is low. */
51 again:
52 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54 		nextlist = &c->bad_used_list;
55 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
56 		/* Note that most of them will have gone directly to be erased.
57 		   So don't favour the erasable_list _too_ much. */
58 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59 		nextlist = &c->erasable_list;
60 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61 		/* Most of the time, pick one off the very_dirty list */
62 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63 		nextlist = &c->very_dirty_list;
64 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
65 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66 		nextlist = &c->dirty_list;
67 	} else if (!list_empty(&c->clean_list)) {
68 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69 		nextlist = &c->clean_list;
70 	} else if (!list_empty(&c->dirty_list)) {
71 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72 
73 		nextlist = &c->dirty_list;
74 	} else if (!list_empty(&c->very_dirty_list)) {
75 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76 		nextlist = &c->very_dirty_list;
77 	} else if (!list_empty(&c->erasable_list)) {
78 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79 
80 		nextlist = &c->erasable_list;
81 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
82 		/* There are blocks are wating for the wbuf sync */
83 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
84 		spin_unlock(&c->erase_completion_lock);
85 		jffs2_flush_wbuf_pad(c);
86 		spin_lock(&c->erase_completion_lock);
87 		goto again;
88 	} else {
89 		/* Eep. All were empty */
90 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91 		return NULL;
92 	}
93 
94 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95 	list_del(&ret->list);
96 	c->gcblock = ret;
97 	ret->gc_node = ret->first_node;
98 	if (!ret->gc_node) {
99 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100 		BUG();
101 	}
102 
103 	/* Have we accidentally picked a clean block with wasted space ? */
104 	if (ret->wasted_size) {
105 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106 		ret->dirty_size += ret->wasted_size;
107 		c->wasted_size -= ret->wasted_size;
108 		c->dirty_size += ret->wasted_size;
109 		ret->wasted_size = 0;
110 	}
111 
112 	return ret;
113 }
114 
115 /* jffs2_garbage_collect_pass
116  * Make a single attempt to progress GC. Move one node, and possibly
117  * start erasing one eraseblock.
118  */
119 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
120 {
121 	struct jffs2_inode_info *f;
122 	struct jffs2_inode_cache *ic;
123 	struct jffs2_eraseblock *jeb;
124 	struct jffs2_raw_node_ref *raw;
125 	uint32_t gcblock_dirty;
126 	int ret = 0, inum, nlink;
127 	int xattr = 0;
128 
129 	if (down_interruptible(&c->alloc_sem))
130 		return -EINTR;
131 
132 	for (;;) {
133 		spin_lock(&c->erase_completion_lock);
134 		if (!c->unchecked_size)
135 			break;
136 
137 		/* We can't start doing GC yet. We haven't finished checking
138 		   the node CRCs etc. Do it now. */
139 
140 		/* checked_ino is protected by the alloc_sem */
141 		if (c->checked_ino > c->highest_ino && xattr) {
142 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 			       c->unchecked_size);
144 			jffs2_dbg_dump_block_lists_nolock(c);
145 			spin_unlock(&c->erase_completion_lock);
146 			up(&c->alloc_sem);
147 			return -ENOSPC;
148 		}
149 
150 		spin_unlock(&c->erase_completion_lock);
151 
152 		if (!xattr)
153 			xattr = jffs2_verify_xattr(c);
154 
155 		spin_lock(&c->inocache_lock);
156 
157 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
158 
159 		if (!ic) {
160 			spin_unlock(&c->inocache_lock);
161 			continue;
162 		}
163 
164 		if (!ic->nlink) {
165 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 				  ic->ino));
167 			spin_unlock(&c->inocache_lock);
168 			jffs2_xattr_delete_inode(c, ic);
169 			continue;
170 		}
171 		switch(ic->state) {
172 		case INO_STATE_CHECKEDABSENT:
173 		case INO_STATE_PRESENT:
174 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
175 			spin_unlock(&c->inocache_lock);
176 			continue;
177 
178 		case INO_STATE_GC:
179 		case INO_STATE_CHECKING:
180 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
181 			spin_unlock(&c->inocache_lock);
182 			BUG();
183 
184 		case INO_STATE_READING:
185 			/* We need to wait for it to finish, lest we move on
186 			   and trigger the BUG() above while we haven't yet
187 			   finished checking all its nodes */
188 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189 			/* We need to come back again for the _same_ inode. We've
190 			 made no progress in this case, but that should be OK */
191 			c->checked_ino--;
192 
193 			up(&c->alloc_sem);
194 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195 			return 0;
196 
197 		default:
198 			BUG();
199 
200 		case INO_STATE_UNCHECKED:
201 			;
202 		}
203 		ic->state = INO_STATE_CHECKING;
204 		spin_unlock(&c->inocache_lock);
205 
206 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
207 
208 		ret = jffs2_do_crccheck_inode(c, ic);
209 		if (ret)
210 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211 
212 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213 		up(&c->alloc_sem);
214 		return ret;
215 	}
216 
217 	/* First, work out which block we're garbage-collecting */
218 	jeb = c->gcblock;
219 
220 	if (!jeb)
221 		jeb = jffs2_find_gc_block(c);
222 
223 	if (!jeb) {
224 		D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
225 		spin_unlock(&c->erase_completion_lock);
226 		up(&c->alloc_sem);
227 		return -EIO;
228 	}
229 
230 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
231 	D1(if (c->nextblock)
232 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
233 
234 	if (!jeb->used_size) {
235 		up(&c->alloc_sem);
236 		goto eraseit;
237 	}
238 
239 	raw = jeb->gc_node;
240 	gcblock_dirty = jeb->dirty_size;
241 
242 	while(ref_obsolete(raw)) {
243 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
244 		raw = ref_next(raw);
245 		if (unlikely(!raw)) {
246 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
247 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
248 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
249 			jeb->gc_node = raw;
250 			spin_unlock(&c->erase_completion_lock);
251 			up(&c->alloc_sem);
252 			BUG();
253 		}
254 	}
255 	jeb->gc_node = raw;
256 
257 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
258 
259 	if (!raw->next_in_ino) {
260 		/* Inode-less node. Clean marker, snapshot or something like that */
261 		spin_unlock(&c->erase_completion_lock);
262 		if (ref_flags(raw) == REF_PRISTINE) {
263 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
264 			jffs2_garbage_collect_pristine(c, NULL, raw);
265 		} else {
266 			/* Just mark it obsolete */
267 			jffs2_mark_node_obsolete(c, raw);
268 		}
269 		up(&c->alloc_sem);
270 		goto eraseit_lock;
271 	}
272 
273 	ic = jffs2_raw_ref_to_ic(raw);
274 
275 #ifdef CONFIG_JFFS2_FS_XATTR
276 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
277 	 * We can decide whether this node is inode or xattr by ic->class.     */
278 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
279 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
280 		spin_unlock(&c->erase_completion_lock);
281 
282 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
283 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
284 		} else {
285 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
286 		}
287 		goto test_gcnode;
288 	}
289 #endif
290 
291 	/* We need to hold the inocache. Either the erase_completion_lock or
292 	   the inocache_lock are sufficient; we trade down since the inocache_lock
293 	   causes less contention. */
294 	spin_lock(&c->inocache_lock);
295 
296 	spin_unlock(&c->erase_completion_lock);
297 
298 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
299 
300 	/* Three possibilities:
301 	   1. Inode is already in-core. We must iget it and do proper
302 	      updating to its fragtree, etc.
303 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
304 	      inocache to prevent a read_inode(), copy the node intact.
305 	   3. Inode is not in-core, node is not pristine. We must iget()
306 	      and take the slow path.
307 	*/
308 
309 	switch(ic->state) {
310 	case INO_STATE_CHECKEDABSENT:
311 		/* It's been checked, but it's not currently in-core.
312 		   We can just copy any pristine nodes, but have
313 		   to prevent anyone else from doing read_inode() while
314 		   we're at it, so we set the state accordingly */
315 		if (ref_flags(raw) == REF_PRISTINE)
316 			ic->state = INO_STATE_GC;
317 		else {
318 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
319 				  ic->ino));
320 		}
321 		break;
322 
323 	case INO_STATE_PRESENT:
324 		/* It's in-core. GC must iget() it. */
325 		break;
326 
327 	case INO_STATE_UNCHECKED:
328 	case INO_STATE_CHECKING:
329 	case INO_STATE_GC:
330 		/* Should never happen. We should have finished checking
331 		   by the time we actually start doing any GC, and since
332 		   we're holding the alloc_sem, no other garbage collection
333 		   can happen.
334 		*/
335 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
336 		       ic->ino, ic->state);
337 		up(&c->alloc_sem);
338 		spin_unlock(&c->inocache_lock);
339 		BUG();
340 
341 	case INO_STATE_READING:
342 		/* Someone's currently trying to read it. We must wait for
343 		   them to finish and then go through the full iget() route
344 		   to do the GC. However, sometimes read_inode() needs to get
345 		   the alloc_sem() (for marking nodes invalid) so we must
346 		   drop the alloc_sem before sleeping. */
347 
348 		up(&c->alloc_sem);
349 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
350 			  ic->ino, ic->state));
351 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
352 		/* And because we dropped the alloc_sem we must start again from the
353 		   beginning. Ponder chance of livelock here -- we're returning success
354 		   without actually making any progress.
355 
356 		   Q: What are the chances that the inode is back in INO_STATE_READING
357 		   again by the time we next enter this function? And that this happens
358 		   enough times to cause a real delay?
359 
360 		   A: Small enough that I don't care :)
361 		*/
362 		return 0;
363 	}
364 
365 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
366 	   node intact, and we don't have to muck about with the fragtree etc.
367 	   because we know it's not in-core. If it _was_ in-core, we go through
368 	   all the iget() crap anyway */
369 
370 	if (ic->state == INO_STATE_GC) {
371 		spin_unlock(&c->inocache_lock);
372 
373 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
374 
375 		spin_lock(&c->inocache_lock);
376 		ic->state = INO_STATE_CHECKEDABSENT;
377 		wake_up(&c->inocache_wq);
378 
379 		if (ret != -EBADFD) {
380 			spin_unlock(&c->inocache_lock);
381 			goto test_gcnode;
382 		}
383 
384 		/* Fall through if it wanted us to, with inocache_lock held */
385 	}
386 
387 	/* Prevent the fairly unlikely race where the gcblock is
388 	   entirely obsoleted by the final close of a file which had
389 	   the only valid nodes in the block, followed by erasure,
390 	   followed by freeing of the ic because the erased block(s)
391 	   held _all_ the nodes of that inode.... never been seen but
392 	   it's vaguely possible. */
393 
394 	inum = ic->ino;
395 	nlink = ic->nlink;
396 	spin_unlock(&c->inocache_lock);
397 
398 	f = jffs2_gc_fetch_inode(c, inum, nlink);
399 	if (IS_ERR(f)) {
400 		ret = PTR_ERR(f);
401 		goto release_sem;
402 	}
403 	if (!f) {
404 		ret = 0;
405 		goto release_sem;
406 	}
407 
408 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
409 
410 	jffs2_gc_release_inode(c, f);
411 
412  test_gcnode:
413 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
414 		/* Eep. This really should never happen. GC is broken */
415 		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
416 		ret = -ENOSPC;
417 	} else if (ref_offset(jeb->gc_node) == 0x1c616bdc)
418 		printk(KERN_ERR "Wheee. Correctly GC'd node at %08x\n", ref_offset(jeb->gc_node));
419 
420  release_sem:
421 	up(&c->alloc_sem);
422 
423  eraseit_lock:
424 	/* If we've finished this block, start it erasing */
425 	spin_lock(&c->erase_completion_lock);
426 
427  eraseit:
428 	if (c->gcblock && !c->gcblock->used_size) {
429 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
430 		/* We're GC'ing an empty block? */
431 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
432 		c->gcblock = NULL;
433 		c->nr_erasing_blocks++;
434 		jffs2_erase_pending_trigger(c);
435 	}
436 	spin_unlock(&c->erase_completion_lock);
437 
438 	return ret;
439 }
440 
441 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
442 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
443 {
444 	struct jffs2_node_frag *frag;
445 	struct jffs2_full_dnode *fn = NULL;
446 	struct jffs2_full_dirent *fd;
447 	uint32_t start = 0, end = 0, nrfrags = 0;
448 	int ret = 0;
449 
450 	down(&f->sem);
451 
452 	/* Now we have the lock for this inode. Check that it's still the one at the head
453 	   of the list. */
454 
455 	spin_lock(&c->erase_completion_lock);
456 
457 	if (c->gcblock != jeb) {
458 		spin_unlock(&c->erase_completion_lock);
459 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
460 		goto upnout;
461 	}
462 	if (ref_obsolete(raw)) {
463 		spin_unlock(&c->erase_completion_lock);
464 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
465 		/* They'll call again */
466 		goto upnout;
467 	}
468 	spin_unlock(&c->erase_completion_lock);
469 
470 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
471 	if (f->metadata && f->metadata->raw == raw) {
472 		fn = f->metadata;
473 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
474 		goto upnout;
475 	}
476 
477 	/* FIXME. Read node and do lookup? */
478 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
479 		if (frag->node && frag->node->raw == raw) {
480 			fn = frag->node;
481 			end = frag->ofs + frag->size;
482 			if (!nrfrags++)
483 				start = frag->ofs;
484 			if (nrfrags == frag->node->frags)
485 				break; /* We've found them all */
486 		}
487 	}
488 	if (fn) {
489 		if (ref_flags(raw) == REF_PRISTINE) {
490 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
491 			if (!ret) {
492 				/* Urgh. Return it sensibly. */
493 				frag->node->raw = f->inocache->nodes;
494 			}
495 			if (ret != -EBADFD)
496 				goto upnout;
497 		}
498 		/* We found a datanode. Do the GC */
499 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
500 			/* It crosses a page boundary. Therefore, it must be a hole. */
501 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
502 		} else {
503 			/* It could still be a hole. But we GC the page this way anyway */
504 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
505 		}
506 		goto upnout;
507 	}
508 
509 	/* Wasn't a dnode. Try dirent */
510 	for (fd = f->dents; fd; fd=fd->next) {
511 		if (fd->raw == raw)
512 			break;
513 	}
514 
515 	if (fd && fd->ino) {
516 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
517 	} else if (fd) {
518 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
519 	} else {
520 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
521 		       ref_offset(raw), f->inocache->ino);
522 		if (ref_obsolete(raw)) {
523 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
524 		} else {
525 			jffs2_dbg_dump_node(c, ref_offset(raw));
526 			BUG();
527 		}
528 	}
529  upnout:
530 	up(&f->sem);
531 
532 	return ret;
533 }
534 
535 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
536 					  struct jffs2_inode_cache *ic,
537 					  struct jffs2_raw_node_ref *raw)
538 {
539 	union jffs2_node_union *node;
540 	size_t retlen;
541 	int ret;
542 	uint32_t phys_ofs, alloclen;
543 	uint32_t crc, rawlen;
544 	int retried = 0;
545 
546 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
547 
548 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
549 
550 	/* Ask for a small amount of space (or the totlen if smaller) because we
551 	   don't want to force wastage of the end of a block if splitting would
552 	   work. */
553 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
554 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
555 
556 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
557 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
558 
559 	if (ret)
560 		return ret;
561 
562 	if (alloclen < rawlen) {
563 		/* Doesn't fit untouched. We'll go the old route and split it */
564 		return -EBADFD;
565 	}
566 
567 	node = kmalloc(rawlen, GFP_KERNEL);
568 	if (!node)
569 		return -ENOMEM;
570 
571 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
572 	if (!ret && retlen != rawlen)
573 		ret = -EIO;
574 	if (ret)
575 		goto out_node;
576 
577 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
578 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
579 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
580 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
581 		goto bail;
582 	}
583 
584 	switch(je16_to_cpu(node->u.nodetype)) {
585 	case JFFS2_NODETYPE_INODE:
586 		crc = crc32(0, node, sizeof(node->i)-8);
587 		if (je32_to_cpu(node->i.node_crc) != crc) {
588 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
589 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
590 			goto bail;
591 		}
592 
593 		if (je32_to_cpu(node->i.dsize)) {
594 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
595 			if (je32_to_cpu(node->i.data_crc) != crc) {
596 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
597 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
598 				goto bail;
599 			}
600 		}
601 		break;
602 
603 	case JFFS2_NODETYPE_DIRENT:
604 		crc = crc32(0, node, sizeof(node->d)-8);
605 		if (je32_to_cpu(node->d.node_crc) != crc) {
606 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
607 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
608 			goto bail;
609 		}
610 
611 		if (node->d.nsize) {
612 			crc = crc32(0, node->d.name, node->d.nsize);
613 			if (je32_to_cpu(node->d.name_crc) != crc) {
614 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
615 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
616 				goto bail;
617 			}
618 		}
619 		break;
620 	default:
621 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
622 		if (ic) {
623 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
624 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
625 			goto bail;
626 		}
627 	}
628 
629 	/* OK, all the CRCs are good; this node can just be copied as-is. */
630  retry:
631 	phys_ofs = write_ofs(c);
632 
633 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
634 
635 	if (ret || (retlen != rawlen)) {
636 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
637 		       rawlen, phys_ofs, ret, retlen);
638 		if (retlen) {
639 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
640 		} else {
641 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
642 		}
643 		if (!retried) {
644 			/* Try to reallocate space and retry */
645 			uint32_t dummy;
646 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
647 
648 			retried = 1;
649 
650 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
651 
652 			jffs2_dbg_acct_sanity_check(c,jeb);
653 			jffs2_dbg_acct_paranoia_check(c, jeb);
654 
655 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
656 						/* this is not the exact summary size of it,
657 							it is only an upper estimation */
658 
659 			if (!ret) {
660 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
661 
662 				jffs2_dbg_acct_sanity_check(c,jeb);
663 				jffs2_dbg_acct_paranoia_check(c, jeb);
664 
665 				goto retry;
666 			}
667 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
668 		}
669 
670 		if (!ret)
671 			ret = -EIO;
672 		goto out_node;
673 	}
674 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
675 
676 	jffs2_mark_node_obsolete(c, raw);
677 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
678 
679  out_node:
680 	kfree(node);
681 	return ret;
682  bail:
683 	ret = -EBADFD;
684 	goto out_node;
685 }
686 
687 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
688 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
689 {
690 	struct jffs2_full_dnode *new_fn;
691 	struct jffs2_raw_inode ri;
692 	struct jffs2_node_frag *last_frag;
693 	union jffs2_device_node dev;
694 	char *mdata = NULL, mdatalen = 0;
695 	uint32_t alloclen, ilen;
696 	int ret;
697 
698 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
699 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
700 		/* For these, we don't actually need to read the old node */
701 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
702 		mdata = (char *)&dev;
703 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
704 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
705 		mdatalen = fn->size;
706 		mdata = kmalloc(fn->size, GFP_KERNEL);
707 		if (!mdata) {
708 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
709 			return -ENOMEM;
710 		}
711 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
712 		if (ret) {
713 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
714 			kfree(mdata);
715 			return ret;
716 		}
717 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
718 
719 	}
720 
721 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
722 				JFFS2_SUMMARY_INODE_SIZE);
723 	if (ret) {
724 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
725 		       sizeof(ri)+ mdatalen, ret);
726 		goto out;
727 	}
728 
729 	last_frag = frag_last(&f->fragtree);
730 	if (last_frag)
731 		/* Fetch the inode length from the fragtree rather then
732 		 * from i_size since i_size may have not been updated yet */
733 		ilen = last_frag->ofs + last_frag->size;
734 	else
735 		ilen = JFFS2_F_I_SIZE(f);
736 
737 	memset(&ri, 0, sizeof(ri));
738 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
739 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
740 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
741 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
742 
743 	ri.ino = cpu_to_je32(f->inocache->ino);
744 	ri.version = cpu_to_je32(++f->highest_version);
745 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
746 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
747 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
748 	ri.isize = cpu_to_je32(ilen);
749 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
750 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
751 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
752 	ri.offset = cpu_to_je32(0);
753 	ri.csize = cpu_to_je32(mdatalen);
754 	ri.dsize = cpu_to_je32(mdatalen);
755 	ri.compr = JFFS2_COMPR_NONE;
756 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
757 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
758 
759 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
760 
761 	if (IS_ERR(new_fn)) {
762 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
763 		ret = PTR_ERR(new_fn);
764 		goto out;
765 	}
766 	jffs2_mark_node_obsolete(c, fn->raw);
767 	jffs2_free_full_dnode(fn);
768 	f->metadata = new_fn;
769  out:
770 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
771 		kfree(mdata);
772 	return ret;
773 }
774 
775 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
776 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
777 {
778 	struct jffs2_full_dirent *new_fd;
779 	struct jffs2_raw_dirent rd;
780 	uint32_t alloclen;
781 	int ret;
782 
783 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
784 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
785 	rd.nsize = strlen(fd->name);
786 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
787 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
788 
789 	rd.pino = cpu_to_je32(f->inocache->ino);
790 	rd.version = cpu_to_je32(++f->highest_version);
791 	rd.ino = cpu_to_je32(fd->ino);
792 	/* If the times on this inode were set by explicit utime() they can be different,
793 	   so refrain from splatting them. */
794 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
795 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
796 	else
797 		rd.mctime = cpu_to_je32(0);
798 	rd.type = fd->type;
799 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
800 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
801 
802 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
803 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
804 	if (ret) {
805 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
806 		       sizeof(rd)+rd.nsize, ret);
807 		return ret;
808 	}
809 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
810 
811 	if (IS_ERR(new_fd)) {
812 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
813 		return PTR_ERR(new_fd);
814 	}
815 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
816 	return 0;
817 }
818 
819 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
820 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
821 {
822 	struct jffs2_full_dirent **fdp = &f->dents;
823 	int found = 0;
824 
825 	/* On a medium where we can't actually mark nodes obsolete
826 	   pernamently, such as NAND flash, we need to work out
827 	   whether this deletion dirent is still needed to actively
828 	   delete a 'real' dirent with the same name that's still
829 	   somewhere else on the flash. */
830 	if (!jffs2_can_mark_obsolete(c)) {
831 		struct jffs2_raw_dirent *rd;
832 		struct jffs2_raw_node_ref *raw;
833 		int ret;
834 		size_t retlen;
835 		int name_len = strlen(fd->name);
836 		uint32_t name_crc = crc32(0, fd->name, name_len);
837 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
838 
839 		rd = kmalloc(rawlen, GFP_KERNEL);
840 		if (!rd)
841 			return -ENOMEM;
842 
843 		/* Prevent the erase code from nicking the obsolete node refs while
844 		   we're looking at them. I really don't like this extra lock but
845 		   can't see any alternative. Suggestions on a postcard to... */
846 		down(&c->erase_free_sem);
847 
848 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
849 
850 			cond_resched();
851 
852 			/* We only care about obsolete ones */
853 			if (!(ref_obsolete(raw)))
854 				continue;
855 
856 			/* Any dirent with the same name is going to have the same length... */
857 			if (ref_totlen(c, NULL, raw) != rawlen)
858 				continue;
859 
860 			/* Doesn't matter if there's one in the same erase block. We're going to
861 			   delete it too at the same time. */
862 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
863 				continue;
864 
865 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
866 
867 			/* This is an obsolete node belonging to the same directory, and it's of the right
868 			   length. We need to take a closer look...*/
869 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
870 			if (ret) {
871 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
872 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
873 				continue;
874 			}
875 			if (retlen != rawlen) {
876 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
877 				       retlen, rawlen, ref_offset(raw));
878 				continue;
879 			}
880 
881 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
882 				continue;
883 
884 			/* If the name CRC doesn't match, skip */
885 			if (je32_to_cpu(rd->name_crc) != name_crc)
886 				continue;
887 
888 			/* If the name length doesn't match, or it's another deletion dirent, skip */
889 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
890 				continue;
891 
892 			/* OK, check the actual name now */
893 			if (memcmp(rd->name, fd->name, name_len))
894 				continue;
895 
896 			/* OK. The name really does match. There really is still an older node on
897 			   the flash which our deletion dirent obsoletes. So we have to write out
898 			   a new deletion dirent to replace it */
899 			up(&c->erase_free_sem);
900 
901 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
902 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
903 			kfree(rd);
904 
905 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
906 		}
907 
908 		up(&c->erase_free_sem);
909 		kfree(rd);
910 	}
911 
912 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
913 	   we should update the metadata node with those times accordingly */
914 
915 	/* No need for it any more. Just mark it obsolete and remove it from the list */
916 	while (*fdp) {
917 		if ((*fdp) == fd) {
918 			found = 1;
919 			*fdp = fd->next;
920 			break;
921 		}
922 		fdp = &(*fdp)->next;
923 	}
924 	if (!found) {
925 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
926 	}
927 	jffs2_mark_node_obsolete(c, fd->raw);
928 	jffs2_free_full_dirent(fd);
929 	return 0;
930 }
931 
932 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
933 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
934 				      uint32_t start, uint32_t end)
935 {
936 	struct jffs2_raw_inode ri;
937 	struct jffs2_node_frag *frag;
938 	struct jffs2_full_dnode *new_fn;
939 	uint32_t alloclen, ilen;
940 	int ret;
941 
942 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
943 		  f->inocache->ino, start, end));
944 
945 	memset(&ri, 0, sizeof(ri));
946 
947 	if(fn->frags > 1) {
948 		size_t readlen;
949 		uint32_t crc;
950 		/* It's partially obsoleted by a later write. So we have to
951 		   write it out again with the _same_ version as before */
952 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
953 		if (readlen != sizeof(ri) || ret) {
954 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
955 			goto fill;
956 		}
957 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
958 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
959 			       ref_offset(fn->raw),
960 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
961 			return -EIO;
962 		}
963 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
964 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
965 			       ref_offset(fn->raw),
966 			       je32_to_cpu(ri.totlen), sizeof(ri));
967 			return -EIO;
968 		}
969 		crc = crc32(0, &ri, sizeof(ri)-8);
970 		if (crc != je32_to_cpu(ri.node_crc)) {
971 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
972 			       ref_offset(fn->raw),
973 			       je32_to_cpu(ri.node_crc), crc);
974 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
975 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
976 			       start, end, f->inocache->ino);
977 			goto fill;
978 		}
979 		if (ri.compr != JFFS2_COMPR_ZERO) {
980 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
981 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
982 			       start, end, f->inocache->ino);
983 			goto fill;
984 		}
985 	} else {
986 	fill:
987 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
988 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
989 		ri.totlen = cpu_to_je32(sizeof(ri));
990 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
991 
992 		ri.ino = cpu_to_je32(f->inocache->ino);
993 		ri.version = cpu_to_je32(++f->highest_version);
994 		ri.offset = cpu_to_je32(start);
995 		ri.dsize = cpu_to_je32(end - start);
996 		ri.csize = cpu_to_je32(0);
997 		ri.compr = JFFS2_COMPR_ZERO;
998 	}
999 
1000 	frag = frag_last(&f->fragtree);
1001 	if (frag)
1002 		/* Fetch the inode length from the fragtree rather then
1003 		 * from i_size since i_size may have not been updated yet */
1004 		ilen = frag->ofs + frag->size;
1005 	else
1006 		ilen = JFFS2_F_I_SIZE(f);
1007 
1008 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1009 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1010 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1011 	ri.isize = cpu_to_je32(ilen);
1012 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1013 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1014 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1015 	ri.data_crc = cpu_to_je32(0);
1016 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1017 
1018 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1019 				     JFFS2_SUMMARY_INODE_SIZE);
1020 	if (ret) {
1021 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1022 		       sizeof(ri), ret);
1023 		return ret;
1024 	}
1025 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1026 
1027 	if (IS_ERR(new_fn)) {
1028 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1029 		return PTR_ERR(new_fn);
1030 	}
1031 	if (je32_to_cpu(ri.version) == f->highest_version) {
1032 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1033 		if (f->metadata) {
1034 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1035 			jffs2_free_full_dnode(f->metadata);
1036 			f->metadata = NULL;
1037 		}
1038 		return 0;
1039 	}
1040 
1041 	/*
1042 	 * We should only get here in the case where the node we are
1043 	 * replacing had more than one frag, so we kept the same version
1044 	 * number as before. (Except in case of error -- see 'goto fill;'
1045 	 * above.)
1046 	 */
1047 	D1(if(unlikely(fn->frags <= 1)) {
1048 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1049 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1050 		       je32_to_cpu(ri.ino));
1051 	});
1052 
1053 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1054 	mark_ref_normal(new_fn->raw);
1055 
1056 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1057 	     frag; frag = frag_next(frag)) {
1058 		if (frag->ofs > fn->size + fn->ofs)
1059 			break;
1060 		if (frag->node == fn) {
1061 			frag->node = new_fn;
1062 			new_fn->frags++;
1063 			fn->frags--;
1064 		}
1065 	}
1066 	if (fn->frags) {
1067 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1068 		BUG();
1069 	}
1070 	if (!new_fn->frags) {
1071 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1072 		BUG();
1073 	}
1074 
1075 	jffs2_mark_node_obsolete(c, fn->raw);
1076 	jffs2_free_full_dnode(fn);
1077 
1078 	return 0;
1079 }
1080 
1081 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1082 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1083 				       uint32_t start, uint32_t end)
1084 {
1085 	struct jffs2_full_dnode *new_fn;
1086 	struct jffs2_raw_inode ri;
1087 	uint32_t alloclen, offset, orig_end, orig_start;
1088 	int ret = 0;
1089 	unsigned char *comprbuf = NULL, *writebuf;
1090 	unsigned long pg;
1091 	unsigned char *pg_ptr;
1092 
1093 	memset(&ri, 0, sizeof(ri));
1094 
1095 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1096 		  f->inocache->ino, start, end));
1097 
1098 	orig_end = end;
1099 	orig_start = start;
1100 
1101 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1102 		/* Attempt to do some merging. But only expand to cover logically
1103 		   adjacent frags if the block containing them is already considered
1104 		   to be dirty. Otherwise we end up with GC just going round in
1105 		   circles dirtying the nodes it already wrote out, especially
1106 		   on NAND where we have small eraseblocks and hence a much higher
1107 		   chance of nodes having to be split to cross boundaries. */
1108 
1109 		struct jffs2_node_frag *frag;
1110 		uint32_t min, max;
1111 
1112 		min = start & ~(PAGE_CACHE_SIZE-1);
1113 		max = min + PAGE_CACHE_SIZE;
1114 
1115 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1116 
1117 		/* BUG_ON(!frag) but that'll happen anyway... */
1118 
1119 		BUG_ON(frag->ofs != start);
1120 
1121 		/* First grow down... */
1122 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1123 
1124 			/* If the previous frag doesn't even reach the beginning, there's
1125 			   excessive fragmentation. Just merge. */
1126 			if (frag->ofs > min) {
1127 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1128 					  frag->ofs, frag->ofs+frag->size));
1129 				start = frag->ofs;
1130 				continue;
1131 			}
1132 			/* OK. This frag holds the first byte of the page. */
1133 			if (!frag->node || !frag->node->raw) {
1134 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1135 					  frag->ofs, frag->ofs+frag->size));
1136 				break;
1137 			} else {
1138 
1139 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1140 				   in a block which is still considered clean? If so, don't obsolete it.
1141 				   If not, cover it anyway. */
1142 
1143 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1144 				struct jffs2_eraseblock *jeb;
1145 
1146 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1147 
1148 				if (jeb == c->gcblock) {
1149 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1150 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1151 					start = frag->ofs;
1152 					break;
1153 				}
1154 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1155 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1156 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1157 					break;
1158 				}
1159 
1160 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1161 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1162 				start = frag->ofs;
1163 				break;
1164 			}
1165 		}
1166 
1167 		/* ... then up */
1168 
1169 		/* Find last frag which is actually part of the node we're to GC. */
1170 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1171 
1172 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1173 
1174 			/* If the previous frag doesn't even reach the beginning, there's lots
1175 			   of fragmentation. Just merge. */
1176 			if (frag->ofs+frag->size < max) {
1177 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1178 					  frag->ofs, frag->ofs+frag->size));
1179 				end = frag->ofs + frag->size;
1180 				continue;
1181 			}
1182 
1183 			if (!frag->node || !frag->node->raw) {
1184 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1185 					  frag->ofs, frag->ofs+frag->size));
1186 				break;
1187 			} else {
1188 
1189 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1190 				   in a block which is still considered clean? If so, don't obsolete it.
1191 				   If not, cover it anyway. */
1192 
1193 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1194 				struct jffs2_eraseblock *jeb;
1195 
1196 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1197 
1198 				if (jeb == c->gcblock) {
1199 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1200 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1201 					end = frag->ofs + frag->size;
1202 					break;
1203 				}
1204 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1205 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1206 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1207 					break;
1208 				}
1209 
1210 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1211 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1212 				end = frag->ofs + frag->size;
1213 				break;
1214 			}
1215 		}
1216 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1217 			  orig_start, orig_end, start, end));
1218 
1219 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1220 		BUG_ON(end < orig_end);
1221 		BUG_ON(start > orig_start);
1222 	}
1223 
1224 	/* First, use readpage() to read the appropriate page into the page cache */
1225 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1226 	 *    triggered garbage collection in the first place?
1227 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1228 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1229 	 *    suboptimal, but at least we're correct.
1230 	 */
1231 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1232 
1233 	if (IS_ERR(pg_ptr)) {
1234 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1235 		return PTR_ERR(pg_ptr);
1236 	}
1237 
1238 	offset = start;
1239 	while(offset < orig_end) {
1240 		uint32_t datalen;
1241 		uint32_t cdatalen;
1242 		uint16_t comprtype = JFFS2_COMPR_NONE;
1243 
1244 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1245 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1246 
1247 		if (ret) {
1248 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1249 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1250 			break;
1251 		}
1252 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1253 		datalen = end - offset;
1254 
1255 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1256 
1257 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1258 
1259 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1260 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1261 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1262 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1263 
1264 		ri.ino = cpu_to_je32(f->inocache->ino);
1265 		ri.version = cpu_to_je32(++f->highest_version);
1266 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1267 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1268 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1269 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1270 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1271 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1272 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1273 		ri.offset = cpu_to_je32(offset);
1274 		ri.csize = cpu_to_je32(cdatalen);
1275 		ri.dsize = cpu_to_je32(datalen);
1276 		ri.compr = comprtype & 0xff;
1277 		ri.usercompr = (comprtype >> 8) & 0xff;
1278 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1279 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1280 
1281 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1282 
1283 		jffs2_free_comprbuf(comprbuf, writebuf);
1284 
1285 		if (IS_ERR(new_fn)) {
1286 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1287 			ret = PTR_ERR(new_fn);
1288 			break;
1289 		}
1290 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1291 		offset += datalen;
1292 		if (f->metadata) {
1293 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1294 			jffs2_free_full_dnode(f->metadata);
1295 			f->metadata = NULL;
1296 		}
1297 	}
1298 
1299 	jffs2_gc_release_page(c, pg_ptr, &pg);
1300 	return ret;
1301 }
1302