xref: /openbmc/linux/fs/jffs2/gc.c (revision ced22070)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11 
12 #include <linux/kernel.h>
13 #include <linux/mtd/mtd.h>
14 #include <linux/slab.h>
15 #include <linux/pagemap.h>
16 #include <linux/crc32.h>
17 #include <linux/compiler.h>
18 #include <linux/stat.h>
19 #include "nodelist.h"
20 #include "compr.h"
21 
22 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
23 					  struct jffs2_inode_cache *ic,
24 					  struct jffs2_raw_node_ref *raw);
25 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
26 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
27 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
29 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
33 				      uint32_t start, uint32_t end);
34 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36 				       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
38 			       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
39 
40 /* Called with erase_completion_lock held */
41 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
42 {
43 	struct jffs2_eraseblock *ret;
44 	struct list_head *nextlist = NULL;
45 	int n = jiffies % 128;
46 
47 	/* Pick an eraseblock to garbage collect next. This is where we'll
48 	   put the clever wear-levelling algorithms. Eventually.  */
49 	/* We possibly want to favour the dirtier blocks more when the
50 	   number of free blocks is low. */
51 again:
52 	if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53 		D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54 		nextlist = &c->bad_used_list;
55 	} else if (n < 50 && !list_empty(&c->erasable_list)) {
56 		/* Note that most of them will have gone directly to be erased.
57 		   So don't favour the erasable_list _too_ much. */
58 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59 		nextlist = &c->erasable_list;
60 	} else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61 		/* Most of the time, pick one off the very_dirty list */
62 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63 		nextlist = &c->very_dirty_list;
64 	} else if (n < 126 && !list_empty(&c->dirty_list)) {
65 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66 		nextlist = &c->dirty_list;
67 	} else if (!list_empty(&c->clean_list)) {
68 		D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69 		nextlist = &c->clean_list;
70 	} else if (!list_empty(&c->dirty_list)) {
71 		D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72 
73 		nextlist = &c->dirty_list;
74 	} else if (!list_empty(&c->very_dirty_list)) {
75 		D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76 		nextlist = &c->very_dirty_list;
77 	} else if (!list_empty(&c->erasable_list)) {
78 		D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79 
80 		nextlist = &c->erasable_list;
81 	} else if (!list_empty(&c->erasable_pending_wbuf_list)) {
82 		/* There are blocks are wating for the wbuf sync */
83 		D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
84 		spin_unlock(&c->erase_completion_lock);
85 		jffs2_flush_wbuf_pad(c);
86 		spin_lock(&c->erase_completion_lock);
87 		goto again;
88 	} else {
89 		/* Eep. All were empty */
90 		D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91 		return NULL;
92 	}
93 
94 	ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95 	list_del(&ret->list);
96 	c->gcblock = ret;
97 	ret->gc_node = ret->first_node;
98 	if (!ret->gc_node) {
99 		printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100 		BUG();
101 	}
102 
103 	/* Have we accidentally picked a clean block with wasted space ? */
104 	if (ret->wasted_size) {
105 		D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106 		ret->dirty_size += ret->wasted_size;
107 		c->wasted_size -= ret->wasted_size;
108 		c->dirty_size += ret->wasted_size;
109 		ret->wasted_size = 0;
110 	}
111 
112 	return ret;
113 }
114 
115 /* jffs2_garbage_collect_pass
116  * Make a single attempt to progress GC. Move one node, and possibly
117  * start erasing one eraseblock.
118  */
119 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
120 {
121 	struct jffs2_inode_info *f;
122 	struct jffs2_inode_cache *ic;
123 	struct jffs2_eraseblock *jeb;
124 	struct jffs2_raw_node_ref *raw;
125 	uint32_t gcblock_dirty;
126 	int ret = 0, inum, nlink;
127 	int xattr = 0;
128 
129 	if (mutex_lock_interruptible(&c->alloc_sem))
130 		return -EINTR;
131 
132 	for (;;) {
133 		spin_lock(&c->erase_completion_lock);
134 		if (!c->unchecked_size)
135 			break;
136 
137 		/* We can't start doing GC yet. We haven't finished checking
138 		   the node CRCs etc. Do it now. */
139 
140 		/* checked_ino is protected by the alloc_sem */
141 		if (c->checked_ino > c->highest_ino && xattr) {
142 			printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143 			       c->unchecked_size);
144 			jffs2_dbg_dump_block_lists_nolock(c);
145 			spin_unlock(&c->erase_completion_lock);
146 			mutex_unlock(&c->alloc_sem);
147 			return -ENOSPC;
148 		}
149 
150 		spin_unlock(&c->erase_completion_lock);
151 
152 		if (!xattr)
153 			xattr = jffs2_verify_xattr(c);
154 
155 		spin_lock(&c->inocache_lock);
156 
157 		ic = jffs2_get_ino_cache(c, c->checked_ino++);
158 
159 		if (!ic) {
160 			spin_unlock(&c->inocache_lock);
161 			continue;
162 		}
163 
164 		if (!ic->nlink) {
165 			D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 				  ic->ino));
167 			spin_unlock(&c->inocache_lock);
168 			jffs2_xattr_delete_inode(c, ic);
169 			continue;
170 		}
171 		switch(ic->state) {
172 		case INO_STATE_CHECKEDABSENT:
173 		case INO_STATE_PRESENT:
174 			D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
175 			spin_unlock(&c->inocache_lock);
176 			continue;
177 
178 		case INO_STATE_GC:
179 		case INO_STATE_CHECKING:
180 			printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
181 			spin_unlock(&c->inocache_lock);
182 			BUG();
183 
184 		case INO_STATE_READING:
185 			/* We need to wait for it to finish, lest we move on
186 			   and trigger the BUG() above while we haven't yet
187 			   finished checking all its nodes */
188 			D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189 			/* We need to come back again for the _same_ inode. We've
190 			 made no progress in this case, but that should be OK */
191 			c->checked_ino--;
192 
193 			mutex_unlock(&c->alloc_sem);
194 			sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195 			return 0;
196 
197 		default:
198 			BUG();
199 
200 		case INO_STATE_UNCHECKED:
201 			;
202 		}
203 		ic->state = INO_STATE_CHECKING;
204 		spin_unlock(&c->inocache_lock);
205 
206 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
207 
208 		ret = jffs2_do_crccheck_inode(c, ic);
209 		if (ret)
210 			printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211 
212 		jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213 		mutex_unlock(&c->alloc_sem);
214 		return ret;
215 	}
216 
217 	/* First, work out which block we're garbage-collecting */
218 	jeb = c->gcblock;
219 
220 	if (!jeb)
221 		jeb = jffs2_find_gc_block(c);
222 
223 	if (!jeb) {
224 		D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
225 		spin_unlock(&c->erase_completion_lock);
226 		mutex_unlock(&c->alloc_sem);
227 		return -EIO;
228 	}
229 
230 	D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
231 	D1(if (c->nextblock)
232 	   printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
233 
234 	if (!jeb->used_size) {
235 		mutex_unlock(&c->alloc_sem);
236 		goto eraseit;
237 	}
238 
239 	raw = jeb->gc_node;
240 	gcblock_dirty = jeb->dirty_size;
241 
242 	while(ref_obsolete(raw)) {
243 		D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
244 		raw = ref_next(raw);
245 		if (unlikely(!raw)) {
246 			printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
247 			printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
248 			       jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
249 			jeb->gc_node = raw;
250 			spin_unlock(&c->erase_completion_lock);
251 			mutex_unlock(&c->alloc_sem);
252 			BUG();
253 		}
254 	}
255 	jeb->gc_node = raw;
256 
257 	D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
258 
259 	if (!raw->next_in_ino) {
260 		/* Inode-less node. Clean marker, snapshot or something like that */
261 		spin_unlock(&c->erase_completion_lock);
262 		if (ref_flags(raw) == REF_PRISTINE) {
263 			/* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
264 			jffs2_garbage_collect_pristine(c, NULL, raw);
265 		} else {
266 			/* Just mark it obsolete */
267 			jffs2_mark_node_obsolete(c, raw);
268 		}
269 		mutex_unlock(&c->alloc_sem);
270 		goto eraseit_lock;
271 	}
272 
273 	ic = jffs2_raw_ref_to_ic(raw);
274 
275 #ifdef CONFIG_JFFS2_FS_XATTR
276 	/* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
277 	 * We can decide whether this node is inode or xattr by ic->class.     */
278 	if (ic->class == RAWNODE_CLASS_XATTR_DATUM
279 	    || ic->class == RAWNODE_CLASS_XATTR_REF) {
280 		spin_unlock(&c->erase_completion_lock);
281 
282 		if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
283 			ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
284 		} else {
285 			ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
286 		}
287 		goto test_gcnode;
288 	}
289 #endif
290 
291 	/* We need to hold the inocache. Either the erase_completion_lock or
292 	   the inocache_lock are sufficient; we trade down since the inocache_lock
293 	   causes less contention. */
294 	spin_lock(&c->inocache_lock);
295 
296 	spin_unlock(&c->erase_completion_lock);
297 
298 	D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
299 
300 	/* Three possibilities:
301 	   1. Inode is already in-core. We must iget it and do proper
302 	      updating to its fragtree, etc.
303 	   2. Inode is not in-core, node is REF_PRISTINE. We lock the
304 	      inocache to prevent a read_inode(), copy the node intact.
305 	   3. Inode is not in-core, node is not pristine. We must iget()
306 	      and take the slow path.
307 	*/
308 
309 	switch(ic->state) {
310 	case INO_STATE_CHECKEDABSENT:
311 		/* It's been checked, but it's not currently in-core.
312 		   We can just copy any pristine nodes, but have
313 		   to prevent anyone else from doing read_inode() while
314 		   we're at it, so we set the state accordingly */
315 		if (ref_flags(raw) == REF_PRISTINE)
316 			ic->state = INO_STATE_GC;
317 		else {
318 			D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
319 				  ic->ino));
320 		}
321 		break;
322 
323 	case INO_STATE_PRESENT:
324 		/* It's in-core. GC must iget() it. */
325 		break;
326 
327 	case INO_STATE_UNCHECKED:
328 	case INO_STATE_CHECKING:
329 	case INO_STATE_GC:
330 		/* Should never happen. We should have finished checking
331 		   by the time we actually start doing any GC, and since
332 		   we're holding the alloc_sem, no other garbage collection
333 		   can happen.
334 		*/
335 		printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
336 		       ic->ino, ic->state);
337 		mutex_unlock(&c->alloc_sem);
338 		spin_unlock(&c->inocache_lock);
339 		BUG();
340 
341 	case INO_STATE_READING:
342 		/* Someone's currently trying to read it. We must wait for
343 		   them to finish and then go through the full iget() route
344 		   to do the GC. However, sometimes read_inode() needs to get
345 		   the alloc_sem() (for marking nodes invalid) so we must
346 		   drop the alloc_sem before sleeping. */
347 
348 		mutex_unlock(&c->alloc_sem);
349 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
350 			  ic->ino, ic->state));
351 		sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
352 		/* And because we dropped the alloc_sem we must start again from the
353 		   beginning. Ponder chance of livelock here -- we're returning success
354 		   without actually making any progress.
355 
356 		   Q: What are the chances that the inode is back in INO_STATE_READING
357 		   again by the time we next enter this function? And that this happens
358 		   enough times to cause a real delay?
359 
360 		   A: Small enough that I don't care :)
361 		*/
362 		return 0;
363 	}
364 
365 	/* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
366 	   node intact, and we don't have to muck about with the fragtree etc.
367 	   because we know it's not in-core. If it _was_ in-core, we go through
368 	   all the iget() crap anyway */
369 
370 	if (ic->state == INO_STATE_GC) {
371 		spin_unlock(&c->inocache_lock);
372 
373 		ret = jffs2_garbage_collect_pristine(c, ic, raw);
374 
375 		spin_lock(&c->inocache_lock);
376 		ic->state = INO_STATE_CHECKEDABSENT;
377 		wake_up(&c->inocache_wq);
378 
379 		if (ret != -EBADFD) {
380 			spin_unlock(&c->inocache_lock);
381 			goto test_gcnode;
382 		}
383 
384 		/* Fall through if it wanted us to, with inocache_lock held */
385 	}
386 
387 	/* Prevent the fairly unlikely race where the gcblock is
388 	   entirely obsoleted by the final close of a file which had
389 	   the only valid nodes in the block, followed by erasure,
390 	   followed by freeing of the ic because the erased block(s)
391 	   held _all_ the nodes of that inode.... never been seen but
392 	   it's vaguely possible. */
393 
394 	inum = ic->ino;
395 	nlink = ic->nlink;
396 	spin_unlock(&c->inocache_lock);
397 
398 	f = jffs2_gc_fetch_inode(c, inum, nlink);
399 	if (IS_ERR(f)) {
400 		ret = PTR_ERR(f);
401 		goto release_sem;
402 	}
403 	if (!f) {
404 		ret = 0;
405 		goto release_sem;
406 	}
407 
408 	ret = jffs2_garbage_collect_live(c, jeb, raw, f);
409 
410 	jffs2_gc_release_inode(c, f);
411 
412  test_gcnode:
413 	if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
414 		/* Eep. This really should never happen. GC is broken */
415 		printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
416 		ret = -ENOSPC;
417 	}
418  release_sem:
419 	mutex_unlock(&c->alloc_sem);
420 
421  eraseit_lock:
422 	/* If we've finished this block, start it erasing */
423 	spin_lock(&c->erase_completion_lock);
424 
425  eraseit:
426 	if (c->gcblock && !c->gcblock->used_size) {
427 		D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
428 		/* We're GC'ing an empty block? */
429 		list_add_tail(&c->gcblock->list, &c->erase_pending_list);
430 		c->gcblock = NULL;
431 		c->nr_erasing_blocks++;
432 		jffs2_erase_pending_trigger(c);
433 	}
434 	spin_unlock(&c->erase_completion_lock);
435 
436 	return ret;
437 }
438 
439 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
440 				      struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
441 {
442 	struct jffs2_node_frag *frag;
443 	struct jffs2_full_dnode *fn = NULL;
444 	struct jffs2_full_dirent *fd;
445 	uint32_t start = 0, end = 0, nrfrags = 0;
446 	int ret = 0;
447 
448 	mutex_lock(&f->sem);
449 
450 	/* Now we have the lock for this inode. Check that it's still the one at the head
451 	   of the list. */
452 
453 	spin_lock(&c->erase_completion_lock);
454 
455 	if (c->gcblock != jeb) {
456 		spin_unlock(&c->erase_completion_lock);
457 		D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
458 		goto upnout;
459 	}
460 	if (ref_obsolete(raw)) {
461 		spin_unlock(&c->erase_completion_lock);
462 		D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
463 		/* They'll call again */
464 		goto upnout;
465 	}
466 	spin_unlock(&c->erase_completion_lock);
467 
468 	/* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
469 	if (f->metadata && f->metadata->raw == raw) {
470 		fn = f->metadata;
471 		ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
472 		goto upnout;
473 	}
474 
475 	/* FIXME. Read node and do lookup? */
476 	for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
477 		if (frag->node && frag->node->raw == raw) {
478 			fn = frag->node;
479 			end = frag->ofs + frag->size;
480 			if (!nrfrags++)
481 				start = frag->ofs;
482 			if (nrfrags == frag->node->frags)
483 				break; /* We've found them all */
484 		}
485 	}
486 	if (fn) {
487 		if (ref_flags(raw) == REF_PRISTINE) {
488 			ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
489 			if (!ret) {
490 				/* Urgh. Return it sensibly. */
491 				frag->node->raw = f->inocache->nodes;
492 			}
493 			if (ret != -EBADFD)
494 				goto upnout;
495 		}
496 		/* We found a datanode. Do the GC */
497 		if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
498 			/* It crosses a page boundary. Therefore, it must be a hole. */
499 			ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
500 		} else {
501 			/* It could still be a hole. But we GC the page this way anyway */
502 			ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
503 		}
504 		goto upnout;
505 	}
506 
507 	/* Wasn't a dnode. Try dirent */
508 	for (fd = f->dents; fd; fd=fd->next) {
509 		if (fd->raw == raw)
510 			break;
511 	}
512 
513 	if (fd && fd->ino) {
514 		ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
515 	} else if (fd) {
516 		ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
517 	} else {
518 		printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
519 		       ref_offset(raw), f->inocache->ino);
520 		if (ref_obsolete(raw)) {
521 			printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
522 		} else {
523 			jffs2_dbg_dump_node(c, ref_offset(raw));
524 			BUG();
525 		}
526 	}
527  upnout:
528 	mutex_unlock(&f->sem);
529 
530 	return ret;
531 }
532 
533 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
534 					  struct jffs2_inode_cache *ic,
535 					  struct jffs2_raw_node_ref *raw)
536 {
537 	union jffs2_node_union *node;
538 	size_t retlen;
539 	int ret;
540 	uint32_t phys_ofs, alloclen;
541 	uint32_t crc, rawlen;
542 	int retried = 0;
543 
544 	D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
545 
546 	alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
547 
548 	/* Ask for a small amount of space (or the totlen if smaller) because we
549 	   don't want to force wastage of the end of a block if splitting would
550 	   work. */
551 	if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
552 		alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
553 
554 	ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
555 	/* 'rawlen' is not the exact summary size; it is only an upper estimation */
556 
557 	if (ret)
558 		return ret;
559 
560 	if (alloclen < rawlen) {
561 		/* Doesn't fit untouched. We'll go the old route and split it */
562 		return -EBADFD;
563 	}
564 
565 	node = kmalloc(rawlen, GFP_KERNEL);
566 	if (!node)
567 		return -ENOMEM;
568 
569 	ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
570 	if (!ret && retlen != rawlen)
571 		ret = -EIO;
572 	if (ret)
573 		goto out_node;
574 
575 	crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
576 	if (je32_to_cpu(node->u.hdr_crc) != crc) {
577 		printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
578 		       ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
579 		goto bail;
580 	}
581 
582 	switch(je16_to_cpu(node->u.nodetype)) {
583 	case JFFS2_NODETYPE_INODE:
584 		crc = crc32(0, node, sizeof(node->i)-8);
585 		if (je32_to_cpu(node->i.node_crc) != crc) {
586 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
587 			       ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
588 			goto bail;
589 		}
590 
591 		if (je32_to_cpu(node->i.dsize)) {
592 			crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
593 			if (je32_to_cpu(node->i.data_crc) != crc) {
594 				printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
595 				       ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
596 				goto bail;
597 			}
598 		}
599 		break;
600 
601 	case JFFS2_NODETYPE_DIRENT:
602 		crc = crc32(0, node, sizeof(node->d)-8);
603 		if (je32_to_cpu(node->d.node_crc) != crc) {
604 			printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
605 			       ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
606 			goto bail;
607 		}
608 
609 		if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
610 			printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
611 			goto bail;
612 		}
613 
614 		if (node->d.nsize) {
615 			crc = crc32(0, node->d.name, node->d.nsize);
616 			if (je32_to_cpu(node->d.name_crc) != crc) {
617 				printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
618 				       ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
619 				goto bail;
620 			}
621 		}
622 		break;
623 	default:
624 		/* If it's inode-less, we don't _know_ what it is. Just copy it intact */
625 		if (ic) {
626 			printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
627 			       ref_offset(raw), je16_to_cpu(node->u.nodetype));
628 			goto bail;
629 		}
630 	}
631 
632 	/* OK, all the CRCs are good; this node can just be copied as-is. */
633  retry:
634 	phys_ofs = write_ofs(c);
635 
636 	ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
637 
638 	if (ret || (retlen != rawlen)) {
639 		printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
640 		       rawlen, phys_ofs, ret, retlen);
641 		if (retlen) {
642 			jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
643 		} else {
644 			printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
645 		}
646 		if (!retried) {
647 			/* Try to reallocate space and retry */
648 			uint32_t dummy;
649 			struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
650 
651 			retried = 1;
652 
653 			D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
654 
655 			jffs2_dbg_acct_sanity_check(c,jeb);
656 			jffs2_dbg_acct_paranoia_check(c, jeb);
657 
658 			ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
659 						/* this is not the exact summary size of it,
660 							it is only an upper estimation */
661 
662 			if (!ret) {
663 				D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
664 
665 				jffs2_dbg_acct_sanity_check(c,jeb);
666 				jffs2_dbg_acct_paranoia_check(c, jeb);
667 
668 				goto retry;
669 			}
670 			D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
671 		}
672 
673 		if (!ret)
674 			ret = -EIO;
675 		goto out_node;
676 	}
677 	jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
678 
679 	jffs2_mark_node_obsolete(c, raw);
680 	D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
681 
682  out_node:
683 	kfree(node);
684 	return ret;
685  bail:
686 	ret = -EBADFD;
687 	goto out_node;
688 }
689 
690 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
691 					struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
692 {
693 	struct jffs2_full_dnode *new_fn;
694 	struct jffs2_raw_inode ri;
695 	struct jffs2_node_frag *last_frag;
696 	union jffs2_device_node dev;
697 	char *mdata = NULL, mdatalen = 0;
698 	uint32_t alloclen, ilen;
699 	int ret;
700 
701 	if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
702 	    S_ISCHR(JFFS2_F_I_MODE(f)) ) {
703 		/* For these, we don't actually need to read the old node */
704 		mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
705 		mdata = (char *)&dev;
706 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
707 	} else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
708 		mdatalen = fn->size;
709 		mdata = kmalloc(fn->size, GFP_KERNEL);
710 		if (!mdata) {
711 			printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
712 			return -ENOMEM;
713 		}
714 		ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
715 		if (ret) {
716 			printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
717 			kfree(mdata);
718 			return ret;
719 		}
720 		D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
721 
722 	}
723 
724 	ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
725 				JFFS2_SUMMARY_INODE_SIZE);
726 	if (ret) {
727 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
728 		       sizeof(ri)+ mdatalen, ret);
729 		goto out;
730 	}
731 
732 	last_frag = frag_last(&f->fragtree);
733 	if (last_frag)
734 		/* Fetch the inode length from the fragtree rather then
735 		 * from i_size since i_size may have not been updated yet */
736 		ilen = last_frag->ofs + last_frag->size;
737 	else
738 		ilen = JFFS2_F_I_SIZE(f);
739 
740 	memset(&ri, 0, sizeof(ri));
741 	ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
742 	ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
743 	ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
744 	ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
745 
746 	ri.ino = cpu_to_je32(f->inocache->ino);
747 	ri.version = cpu_to_je32(++f->highest_version);
748 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
749 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
750 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
751 	ri.isize = cpu_to_je32(ilen);
752 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
753 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
754 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
755 	ri.offset = cpu_to_je32(0);
756 	ri.csize = cpu_to_je32(mdatalen);
757 	ri.dsize = cpu_to_je32(mdatalen);
758 	ri.compr = JFFS2_COMPR_NONE;
759 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
760 	ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
761 
762 	new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
763 
764 	if (IS_ERR(new_fn)) {
765 		printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
766 		ret = PTR_ERR(new_fn);
767 		goto out;
768 	}
769 	jffs2_mark_node_obsolete(c, fn->raw);
770 	jffs2_free_full_dnode(fn);
771 	f->metadata = new_fn;
772  out:
773 	if (S_ISLNK(JFFS2_F_I_MODE(f)))
774 		kfree(mdata);
775 	return ret;
776 }
777 
778 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
779 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
780 {
781 	struct jffs2_full_dirent *new_fd;
782 	struct jffs2_raw_dirent rd;
783 	uint32_t alloclen;
784 	int ret;
785 
786 	rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
787 	rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
788 	rd.nsize = strlen(fd->name);
789 	rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
790 	rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
791 
792 	rd.pino = cpu_to_je32(f->inocache->ino);
793 	rd.version = cpu_to_je32(++f->highest_version);
794 	rd.ino = cpu_to_je32(fd->ino);
795 	/* If the times on this inode were set by explicit utime() they can be different,
796 	   so refrain from splatting them. */
797 	if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
798 		rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
799 	else
800 		rd.mctime = cpu_to_je32(0);
801 	rd.type = fd->type;
802 	rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
803 	rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
804 
805 	ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
806 				JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
807 	if (ret) {
808 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
809 		       sizeof(rd)+rd.nsize, ret);
810 		return ret;
811 	}
812 	new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
813 
814 	if (IS_ERR(new_fd)) {
815 		printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
816 		return PTR_ERR(new_fd);
817 	}
818 	jffs2_add_fd_to_list(c, new_fd, &f->dents);
819 	return 0;
820 }
821 
822 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
823 					struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
824 {
825 	struct jffs2_full_dirent **fdp = &f->dents;
826 	int found = 0;
827 
828 	/* On a medium where we can't actually mark nodes obsolete
829 	   pernamently, such as NAND flash, we need to work out
830 	   whether this deletion dirent is still needed to actively
831 	   delete a 'real' dirent with the same name that's still
832 	   somewhere else on the flash. */
833 	if (!jffs2_can_mark_obsolete(c)) {
834 		struct jffs2_raw_dirent *rd;
835 		struct jffs2_raw_node_ref *raw;
836 		int ret;
837 		size_t retlen;
838 		int name_len = strlen(fd->name);
839 		uint32_t name_crc = crc32(0, fd->name, name_len);
840 		uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
841 
842 		rd = kmalloc(rawlen, GFP_KERNEL);
843 		if (!rd)
844 			return -ENOMEM;
845 
846 		/* Prevent the erase code from nicking the obsolete node refs while
847 		   we're looking at them. I really don't like this extra lock but
848 		   can't see any alternative. Suggestions on a postcard to... */
849 		mutex_lock(&c->erase_free_sem);
850 
851 		for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
852 
853 			cond_resched();
854 
855 			/* We only care about obsolete ones */
856 			if (!(ref_obsolete(raw)))
857 				continue;
858 
859 			/* Any dirent with the same name is going to have the same length... */
860 			if (ref_totlen(c, NULL, raw) != rawlen)
861 				continue;
862 
863 			/* Doesn't matter if there's one in the same erase block. We're going to
864 			   delete it too at the same time. */
865 			if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
866 				continue;
867 
868 			D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
869 
870 			/* This is an obsolete node belonging to the same directory, and it's of the right
871 			   length. We need to take a closer look...*/
872 			ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
873 			if (ret) {
874 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
875 				/* If we can't read it, we don't need to continue to obsolete it. Continue */
876 				continue;
877 			}
878 			if (retlen != rawlen) {
879 				printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
880 				       retlen, rawlen, ref_offset(raw));
881 				continue;
882 			}
883 
884 			if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
885 				continue;
886 
887 			/* If the name CRC doesn't match, skip */
888 			if (je32_to_cpu(rd->name_crc) != name_crc)
889 				continue;
890 
891 			/* If the name length doesn't match, or it's another deletion dirent, skip */
892 			if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
893 				continue;
894 
895 			/* OK, check the actual name now */
896 			if (memcmp(rd->name, fd->name, name_len))
897 				continue;
898 
899 			/* OK. The name really does match. There really is still an older node on
900 			   the flash which our deletion dirent obsoletes. So we have to write out
901 			   a new deletion dirent to replace it */
902 			mutex_unlock(&c->erase_free_sem);
903 
904 			D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
905 				  ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
906 			kfree(rd);
907 
908 			return jffs2_garbage_collect_dirent(c, jeb, f, fd);
909 		}
910 
911 		mutex_unlock(&c->erase_free_sem);
912 		kfree(rd);
913 	}
914 
915 	/* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
916 	   we should update the metadata node with those times accordingly */
917 
918 	/* No need for it any more. Just mark it obsolete and remove it from the list */
919 	while (*fdp) {
920 		if ((*fdp) == fd) {
921 			found = 1;
922 			*fdp = fd->next;
923 			break;
924 		}
925 		fdp = &(*fdp)->next;
926 	}
927 	if (!found) {
928 		printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
929 	}
930 	jffs2_mark_node_obsolete(c, fd->raw);
931 	jffs2_free_full_dirent(fd);
932 	return 0;
933 }
934 
935 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
936 				      struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
937 				      uint32_t start, uint32_t end)
938 {
939 	struct jffs2_raw_inode ri;
940 	struct jffs2_node_frag *frag;
941 	struct jffs2_full_dnode *new_fn;
942 	uint32_t alloclen, ilen;
943 	int ret;
944 
945 	D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
946 		  f->inocache->ino, start, end));
947 
948 	memset(&ri, 0, sizeof(ri));
949 
950 	if(fn->frags > 1) {
951 		size_t readlen;
952 		uint32_t crc;
953 		/* It's partially obsoleted by a later write. So we have to
954 		   write it out again with the _same_ version as before */
955 		ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
956 		if (readlen != sizeof(ri) || ret) {
957 			printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
958 			goto fill;
959 		}
960 		if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
961 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
962 			       ref_offset(fn->raw),
963 			       je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
964 			return -EIO;
965 		}
966 		if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
967 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
968 			       ref_offset(fn->raw),
969 			       je32_to_cpu(ri.totlen), sizeof(ri));
970 			return -EIO;
971 		}
972 		crc = crc32(0, &ri, sizeof(ri)-8);
973 		if (crc != je32_to_cpu(ri.node_crc)) {
974 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
975 			       ref_offset(fn->raw),
976 			       je32_to_cpu(ri.node_crc), crc);
977 			/* FIXME: We could possibly deal with this by writing new holes for each frag */
978 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
979 			       start, end, f->inocache->ino);
980 			goto fill;
981 		}
982 		if (ri.compr != JFFS2_COMPR_ZERO) {
983 			printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
984 			printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
985 			       start, end, f->inocache->ino);
986 			goto fill;
987 		}
988 	} else {
989 	fill:
990 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
991 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
992 		ri.totlen = cpu_to_je32(sizeof(ri));
993 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
994 
995 		ri.ino = cpu_to_je32(f->inocache->ino);
996 		ri.version = cpu_to_je32(++f->highest_version);
997 		ri.offset = cpu_to_je32(start);
998 		ri.dsize = cpu_to_je32(end - start);
999 		ri.csize = cpu_to_je32(0);
1000 		ri.compr = JFFS2_COMPR_ZERO;
1001 	}
1002 
1003 	frag = frag_last(&f->fragtree);
1004 	if (frag)
1005 		/* Fetch the inode length from the fragtree rather then
1006 		 * from i_size since i_size may have not been updated yet */
1007 		ilen = frag->ofs + frag->size;
1008 	else
1009 		ilen = JFFS2_F_I_SIZE(f);
1010 
1011 	ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1012 	ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1013 	ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1014 	ri.isize = cpu_to_je32(ilen);
1015 	ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1016 	ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1017 	ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1018 	ri.data_crc = cpu_to_je32(0);
1019 	ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1020 
1021 	ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1022 				     JFFS2_SUMMARY_INODE_SIZE);
1023 	if (ret) {
1024 		printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1025 		       sizeof(ri), ret);
1026 		return ret;
1027 	}
1028 	new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1029 
1030 	if (IS_ERR(new_fn)) {
1031 		printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1032 		return PTR_ERR(new_fn);
1033 	}
1034 	if (je32_to_cpu(ri.version) == f->highest_version) {
1035 		jffs2_add_full_dnode_to_inode(c, f, new_fn);
1036 		if (f->metadata) {
1037 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1038 			jffs2_free_full_dnode(f->metadata);
1039 			f->metadata = NULL;
1040 		}
1041 		return 0;
1042 	}
1043 
1044 	/*
1045 	 * We should only get here in the case where the node we are
1046 	 * replacing had more than one frag, so we kept the same version
1047 	 * number as before. (Except in case of error -- see 'goto fill;'
1048 	 * above.)
1049 	 */
1050 	D1(if(unlikely(fn->frags <= 1)) {
1051 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1052 		       fn->frags, je32_to_cpu(ri.version), f->highest_version,
1053 		       je32_to_cpu(ri.ino));
1054 	});
1055 
1056 	/* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1057 	mark_ref_normal(new_fn->raw);
1058 
1059 	for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1060 	     frag; frag = frag_next(frag)) {
1061 		if (frag->ofs > fn->size + fn->ofs)
1062 			break;
1063 		if (frag->node == fn) {
1064 			frag->node = new_fn;
1065 			new_fn->frags++;
1066 			fn->frags--;
1067 		}
1068 	}
1069 	if (fn->frags) {
1070 		printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1071 		BUG();
1072 	}
1073 	if (!new_fn->frags) {
1074 		printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1075 		BUG();
1076 	}
1077 
1078 	jffs2_mark_node_obsolete(c, fn->raw);
1079 	jffs2_free_full_dnode(fn);
1080 
1081 	return 0;
1082 }
1083 
1084 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1085 				       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1086 				       uint32_t start, uint32_t end)
1087 {
1088 	struct jffs2_full_dnode *new_fn;
1089 	struct jffs2_raw_inode ri;
1090 	uint32_t alloclen, offset, orig_end, orig_start;
1091 	int ret = 0;
1092 	unsigned char *comprbuf = NULL, *writebuf;
1093 	unsigned long pg;
1094 	unsigned char *pg_ptr;
1095 
1096 	memset(&ri, 0, sizeof(ri));
1097 
1098 	D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1099 		  f->inocache->ino, start, end));
1100 
1101 	orig_end = end;
1102 	orig_start = start;
1103 
1104 	if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1105 		/* Attempt to do some merging. But only expand to cover logically
1106 		   adjacent frags if the block containing them is already considered
1107 		   to be dirty. Otherwise we end up with GC just going round in
1108 		   circles dirtying the nodes it already wrote out, especially
1109 		   on NAND where we have small eraseblocks and hence a much higher
1110 		   chance of nodes having to be split to cross boundaries. */
1111 
1112 		struct jffs2_node_frag *frag;
1113 		uint32_t min, max;
1114 
1115 		min = start & ~(PAGE_CACHE_SIZE-1);
1116 		max = min + PAGE_CACHE_SIZE;
1117 
1118 		frag = jffs2_lookup_node_frag(&f->fragtree, start);
1119 
1120 		/* BUG_ON(!frag) but that'll happen anyway... */
1121 
1122 		BUG_ON(frag->ofs != start);
1123 
1124 		/* First grow down... */
1125 		while((frag = frag_prev(frag)) && frag->ofs >= min) {
1126 
1127 			/* If the previous frag doesn't even reach the beginning, there's
1128 			   excessive fragmentation. Just merge. */
1129 			if (frag->ofs > min) {
1130 				D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1131 					  frag->ofs, frag->ofs+frag->size));
1132 				start = frag->ofs;
1133 				continue;
1134 			}
1135 			/* OK. This frag holds the first byte of the page. */
1136 			if (!frag->node || !frag->node->raw) {
1137 				D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1138 					  frag->ofs, frag->ofs+frag->size));
1139 				break;
1140 			} else {
1141 
1142 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1143 				   in a block which is still considered clean? If so, don't obsolete it.
1144 				   If not, cover it anyway. */
1145 
1146 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1147 				struct jffs2_eraseblock *jeb;
1148 
1149 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1150 
1151 				if (jeb == c->gcblock) {
1152 					D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1153 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1154 					start = frag->ofs;
1155 					break;
1156 				}
1157 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1158 					D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1159 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1160 					break;
1161 				}
1162 
1163 				D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1164 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1165 				start = frag->ofs;
1166 				break;
1167 			}
1168 		}
1169 
1170 		/* ... then up */
1171 
1172 		/* Find last frag which is actually part of the node we're to GC. */
1173 		frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1174 
1175 		while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1176 
1177 			/* If the previous frag doesn't even reach the beginning, there's lots
1178 			   of fragmentation. Just merge. */
1179 			if (frag->ofs+frag->size < max) {
1180 				D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1181 					  frag->ofs, frag->ofs+frag->size));
1182 				end = frag->ofs + frag->size;
1183 				continue;
1184 			}
1185 
1186 			if (!frag->node || !frag->node->raw) {
1187 				D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1188 					  frag->ofs, frag->ofs+frag->size));
1189 				break;
1190 			} else {
1191 
1192 				/* OK, it's a frag which extends to the beginning of the page. Does it live
1193 				   in a block which is still considered clean? If so, don't obsolete it.
1194 				   If not, cover it anyway. */
1195 
1196 				struct jffs2_raw_node_ref *raw = frag->node->raw;
1197 				struct jffs2_eraseblock *jeb;
1198 
1199 				jeb = &c->blocks[raw->flash_offset / c->sector_size];
1200 
1201 				if (jeb == c->gcblock) {
1202 					D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1203 						  frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1204 					end = frag->ofs + frag->size;
1205 					break;
1206 				}
1207 				if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1208 					D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1209 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1210 					break;
1211 				}
1212 
1213 				D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1214 						  frag->ofs, frag->ofs+frag->size, jeb->offset));
1215 				end = frag->ofs + frag->size;
1216 				break;
1217 			}
1218 		}
1219 		D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1220 			  orig_start, orig_end, start, end));
1221 
1222 		D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1223 		BUG_ON(end < orig_end);
1224 		BUG_ON(start > orig_start);
1225 	}
1226 
1227 	/* First, use readpage() to read the appropriate page into the page cache */
1228 	/* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1229 	 *    triggered garbage collection in the first place?
1230 	 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1231 	 *    page OK. We'll actually write it out again in commit_write, which is a little
1232 	 *    suboptimal, but at least we're correct.
1233 	 */
1234 	pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1235 
1236 	if (IS_ERR(pg_ptr)) {
1237 		printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1238 		return PTR_ERR(pg_ptr);
1239 	}
1240 
1241 	offset = start;
1242 	while(offset < orig_end) {
1243 		uint32_t datalen;
1244 		uint32_t cdatalen;
1245 		uint16_t comprtype = JFFS2_COMPR_NONE;
1246 
1247 		ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1248 					&alloclen, JFFS2_SUMMARY_INODE_SIZE);
1249 
1250 		if (ret) {
1251 			printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1252 			       sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1253 			break;
1254 		}
1255 		cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1256 		datalen = end - offset;
1257 
1258 		writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1259 
1260 		comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1261 
1262 		ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1263 		ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1264 		ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1265 		ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1266 
1267 		ri.ino = cpu_to_je32(f->inocache->ino);
1268 		ri.version = cpu_to_je32(++f->highest_version);
1269 		ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1270 		ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1271 		ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1272 		ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1273 		ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1274 		ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1275 		ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1276 		ri.offset = cpu_to_je32(offset);
1277 		ri.csize = cpu_to_je32(cdatalen);
1278 		ri.dsize = cpu_to_je32(datalen);
1279 		ri.compr = comprtype & 0xff;
1280 		ri.usercompr = (comprtype >> 8) & 0xff;
1281 		ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1282 		ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1283 
1284 		new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1285 
1286 		jffs2_free_comprbuf(comprbuf, writebuf);
1287 
1288 		if (IS_ERR(new_fn)) {
1289 			printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1290 			ret = PTR_ERR(new_fn);
1291 			break;
1292 		}
1293 		ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1294 		offset += datalen;
1295 		if (f->metadata) {
1296 			jffs2_mark_node_obsolete(c, f->metadata->raw);
1297 			jffs2_free_full_dnode(f->metadata);
1298 			f->metadata = NULL;
1299 		}
1300 	}
1301 
1302 	jffs2_gc_release_page(c, pg_ptr, &pg);
1303 	return ret;
1304 }
1305