xref: /openbmc/linux/fs/jffs2/build.c (revision 32981ea5)
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12 
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14 
15 #include <linux/kernel.h>
16 #include <linux/sched.h>
17 #include <linux/slab.h>
18 #include <linux/vmalloc.h>
19 #include <linux/mtd/mtd.h>
20 #include <linux/mm.h> /* kvfree() */
21 #include "nodelist.h"
22 
23 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *,
24 		struct jffs2_inode_cache *, struct jffs2_full_dirent **);
25 
26 static inline struct jffs2_inode_cache *
27 first_inode_chain(int *i, struct jffs2_sb_info *c)
28 {
29 	for (; *i < c->inocache_hashsize; (*i)++) {
30 		if (c->inocache_list[*i])
31 			return c->inocache_list[*i];
32 	}
33 	return NULL;
34 }
35 
36 static inline struct jffs2_inode_cache *
37 next_inode(int *i, struct jffs2_inode_cache *ic, struct jffs2_sb_info *c)
38 {
39 	/* More in this chain? */
40 	if (ic->next)
41 		return ic->next;
42 	(*i)++;
43 	return first_inode_chain(i, c);
44 }
45 
46 #define for_each_inode(i, c, ic)			\
47 	for (i = 0, ic = first_inode_chain(&i, (c));	\
48 	     ic;					\
49 	     ic = next_inode(&i, ic, (c)))
50 
51 
52 static void jffs2_build_inode_pass1(struct jffs2_sb_info *c,
53 				    struct jffs2_inode_cache *ic,
54 				    int *dir_hardlinks)
55 {
56 	struct jffs2_full_dirent *fd;
57 
58 	dbg_fsbuild("building directory inode #%u\n", ic->ino);
59 
60 	/* For each child, increase nlink */
61 	for(fd = ic->scan_dents; fd; fd = fd->next) {
62 		struct jffs2_inode_cache *child_ic;
63 		if (!fd->ino)
64 			continue;
65 
66 		/* we can get high latency here with huge directories */
67 
68 		child_ic = jffs2_get_ino_cache(c, fd->ino);
69 		if (!child_ic) {
70 			dbg_fsbuild("child \"%s\" (ino #%u) of dir ino #%u doesn't exist!\n",
71 				  fd->name, fd->ino, ic->ino);
72 			jffs2_mark_node_obsolete(c, fd->raw);
73 			/* Clear the ic/raw union so it doesn't cause problems later. */
74 			fd->ic = NULL;
75 			continue;
76 		}
77 
78 		/* From this point, fd->raw is no longer used so we can set fd->ic */
79 		fd->ic = child_ic;
80 		child_ic->pino_nlink++;
81 		/* If we appear (at this stage) to have hard-linked directories,
82 		 * set a flag to trigger a scan later */
83 		if (fd->type == DT_DIR) {
84 			child_ic->flags |= INO_FLAGS_IS_DIR;
85 			if (child_ic->pino_nlink > 1)
86 				*dir_hardlinks = 1;
87 		}
88 
89 		dbg_fsbuild("increased nlink for child \"%s\" (ino #%u)\n", fd->name, fd->ino);
90 		/* Can't free scan_dents so far. We might need them in pass 2 */
91 	}
92 }
93 
94 /* Scan plan:
95  - Scan physical nodes. Build map of inodes/dirents. Allocate inocaches as we go
96  - Scan directory tree from top down, setting nlink in inocaches
97  - Scan inocaches for inodes with nlink==0
98 */
99 static int jffs2_build_filesystem(struct jffs2_sb_info *c)
100 {
101 	int ret, i, dir_hardlinks = 0;
102 	struct jffs2_inode_cache *ic;
103 	struct jffs2_full_dirent *fd;
104 	struct jffs2_full_dirent *dead_fds = NULL;
105 
106 	dbg_fsbuild("build FS data structures\n");
107 
108 	/* First, scan the medium and build all the inode caches with
109 	   lists of physical nodes */
110 
111 	c->flags |= JFFS2_SB_FLAG_SCANNING;
112 	ret = jffs2_scan_medium(c);
113 	c->flags &= ~JFFS2_SB_FLAG_SCANNING;
114 	if (ret)
115 		goto exit;
116 
117 	dbg_fsbuild("scanned flash completely\n");
118 	jffs2_dbg_dump_block_lists_nolock(c);
119 
120 	dbg_fsbuild("pass 1 starting\n");
121 	c->flags |= JFFS2_SB_FLAG_BUILDING;
122 	/* Now scan the directory tree, increasing nlink according to every dirent found. */
123 	for_each_inode(i, c, ic) {
124 		if (ic->scan_dents) {
125 			jffs2_build_inode_pass1(c, ic, &dir_hardlinks);
126 			cond_resched();
127 		}
128 	}
129 
130 	dbg_fsbuild("pass 1 complete\n");
131 
132 	/* Next, scan for inodes with nlink == 0 and remove them. If
133 	   they were directories, then decrement the nlink of their
134 	   children too, and repeat the scan. As that's going to be
135 	   a fairly uncommon occurrence, it's not so evil to do it this
136 	   way. Recursion bad. */
137 	dbg_fsbuild("pass 2 starting\n");
138 
139 	for_each_inode(i, c, ic) {
140 		if (ic->pino_nlink)
141 			continue;
142 
143 		jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
144 		cond_resched();
145 	}
146 
147 	dbg_fsbuild("pass 2a starting\n");
148 
149 	while (dead_fds) {
150 		fd = dead_fds;
151 		dead_fds = fd->next;
152 
153 		ic = jffs2_get_ino_cache(c, fd->ino);
154 
155 		if (ic)
156 			jffs2_build_remove_unlinked_inode(c, ic, &dead_fds);
157 		jffs2_free_full_dirent(fd);
158 	}
159 
160 	dbg_fsbuild("pass 2a complete\n");
161 
162 	if (dir_hardlinks) {
163 		/* If we detected directory hardlinks earlier, *hopefully*
164 		 * they are gone now because some of the links were from
165 		 * dead directories which still had some old dirents lying
166 		 * around and not yet garbage-collected, but which have
167 		 * been discarded above. So clear the pino_nlink field
168 		 * in each directory, so that the final scan below can
169 		 * print appropriate warnings. */
170 		for_each_inode(i, c, ic) {
171 			if (ic->flags & INO_FLAGS_IS_DIR)
172 				ic->pino_nlink = 0;
173 		}
174 	}
175 	dbg_fsbuild("freeing temporary data structures\n");
176 
177 	/* Finally, we can scan again and free the dirent structs */
178 	for_each_inode(i, c, ic) {
179 		while(ic->scan_dents) {
180 			fd = ic->scan_dents;
181 			ic->scan_dents = fd->next;
182 			/* We do use the pino_nlink field to count nlink of
183 			 * directories during fs build, so set it to the
184 			 * parent ino# now. Now that there's hopefully only
185 			 * one. */
186 			if (fd->type == DT_DIR) {
187 				if (!fd->ic) {
188 					/* We'll have complained about it and marked the coresponding
189 					   raw node obsolete already. Just skip it. */
190 					continue;
191 				}
192 
193 				/* We *have* to have set this in jffs2_build_inode_pass1() */
194 				BUG_ON(!(fd->ic->flags & INO_FLAGS_IS_DIR));
195 
196 				/* We clear ic->pino_nlink ∀ directories' ic *only* if dir_hardlinks
197 				 * is set. Otherwise, we know this should never trigger anyway, so
198 				 * we don't do the check. And ic->pino_nlink still contains the nlink
199 				 * value (which is 1). */
200 				if (dir_hardlinks && fd->ic->pino_nlink) {
201 					JFFS2_ERROR("child dir \"%s\" (ino #%u) of dir ino #%u is also hard linked from dir ino #%u\n",
202 						    fd->name, fd->ino, ic->ino, fd->ic->pino_nlink);
203 					/* Should we unlink it from its previous parent? */
204 				}
205 
206 				/* For directories, ic->pino_nlink holds that parent inode # */
207 				fd->ic->pino_nlink = ic->ino;
208 			}
209 			jffs2_free_full_dirent(fd);
210 		}
211 		ic->scan_dents = NULL;
212 		cond_resched();
213 	}
214 	jffs2_build_xattr_subsystem(c);
215 	c->flags &= ~JFFS2_SB_FLAG_BUILDING;
216 
217 	dbg_fsbuild("FS build complete\n");
218 
219 	/* Rotate the lists by some number to ensure wear levelling */
220 	jffs2_rotate_lists(c);
221 
222 	ret = 0;
223 
224 exit:
225 	if (ret) {
226 		for_each_inode(i, c, ic) {
227 			while(ic->scan_dents) {
228 				fd = ic->scan_dents;
229 				ic->scan_dents = fd->next;
230 				jffs2_free_full_dirent(fd);
231 			}
232 		}
233 		jffs2_clear_xattr_subsystem(c);
234 	}
235 
236 	return ret;
237 }
238 
239 static void jffs2_build_remove_unlinked_inode(struct jffs2_sb_info *c,
240 					struct jffs2_inode_cache *ic,
241 					struct jffs2_full_dirent **dead_fds)
242 {
243 	struct jffs2_raw_node_ref *raw;
244 	struct jffs2_full_dirent *fd;
245 
246 	dbg_fsbuild("removing ino #%u with nlink == zero.\n", ic->ino);
247 
248 	raw = ic->nodes;
249 	while (raw != (void *)ic) {
250 		struct jffs2_raw_node_ref *next = raw->next_in_ino;
251 		dbg_fsbuild("obsoleting node at 0x%08x\n", ref_offset(raw));
252 		jffs2_mark_node_obsolete(c, raw);
253 		raw = next;
254 	}
255 
256 	if (ic->scan_dents) {
257 		int whinged = 0;
258 		dbg_fsbuild("inode #%u was a directory which may have children...\n", ic->ino);
259 
260 		while(ic->scan_dents) {
261 			struct jffs2_inode_cache *child_ic;
262 
263 			fd = ic->scan_dents;
264 			ic->scan_dents = fd->next;
265 
266 			if (!fd->ino) {
267 				/* It's a deletion dirent. Ignore it */
268 				dbg_fsbuild("child \"%s\" is a deletion dirent, skipping...\n", fd->name);
269 				jffs2_free_full_dirent(fd);
270 				continue;
271 			}
272 			if (!whinged)
273 				whinged = 1;
274 
275 			dbg_fsbuild("removing child \"%s\", ino #%u\n", fd->name, fd->ino);
276 
277 			child_ic = jffs2_get_ino_cache(c, fd->ino);
278 			if (!child_ic) {
279 				dbg_fsbuild("cannot remove child \"%s\", ino #%u, because it doesn't exist\n",
280 						fd->name, fd->ino);
281 				jffs2_free_full_dirent(fd);
282 				continue;
283 			}
284 
285 			/* Reduce nlink of the child. If it's now zero, stick it on the
286 			   dead_fds list to be cleaned up later. Else just free the fd */
287 			child_ic->pino_nlink--;
288 
289 			if (!child_ic->pino_nlink) {
290 				dbg_fsbuild("inode #%u (\"%s\") now has no links; adding to dead_fds list.\n",
291 					  fd->ino, fd->name);
292 				fd->next = *dead_fds;
293 				*dead_fds = fd;
294 			} else {
295 				dbg_fsbuild("inode #%u (\"%s\") has now got nlink %d. Ignoring.\n",
296 					  fd->ino, fd->name, child_ic->pino_nlink);
297 				jffs2_free_full_dirent(fd);
298 			}
299 		}
300 	}
301 
302 	/*
303 	   We don't delete the inocache from the hash list and free it yet.
304 	   The erase code will do that, when all the nodes are completely gone.
305 	*/
306 }
307 
308 static void jffs2_calc_trigger_levels(struct jffs2_sb_info *c)
309 {
310 	uint32_t size;
311 
312 	/* Deletion should almost _always_ be allowed. We're fairly
313 	   buggered once we stop allowing people to delete stuff
314 	   because there's not enough free space... */
315 	c->resv_blocks_deletion = 2;
316 
317 	/* Be conservative about how much space we need before we allow writes.
318 	   On top of that which is required for deletia, require an extra 2%
319 	   of the medium to be available, for overhead caused by nodes being
320 	   split across blocks, etc. */
321 
322 	size = c->flash_size / 50; /* 2% of flash size */
323 	size += c->nr_blocks * 100; /* And 100 bytes per eraseblock */
324 	size += c->sector_size - 1; /* ... and round up */
325 
326 	c->resv_blocks_write = c->resv_blocks_deletion + (size / c->sector_size);
327 
328 	/* When do we let the GC thread run in the background */
329 
330 	c->resv_blocks_gctrigger = c->resv_blocks_write + 1;
331 
332 	/* When do we allow garbage collection to merge nodes to make
333 	   long-term progress at the expense of short-term space exhaustion? */
334 	c->resv_blocks_gcmerge = c->resv_blocks_deletion + 1;
335 
336 	/* When do we allow garbage collection to eat from bad blocks rather
337 	   than actually making progress? */
338 	c->resv_blocks_gcbad = 0;//c->resv_blocks_deletion + 2;
339 
340 	/* What number of 'very dirty' eraseblocks do we allow before we
341 	   trigger the GC thread even if we don't _need_ the space. When we
342 	   can't mark nodes obsolete on the medium, the old dirty nodes cause
343 	   performance problems because we have to inspect and discard them. */
344 	c->vdirty_blocks_gctrigger = c->resv_blocks_gctrigger;
345 	if (jffs2_can_mark_obsolete(c))
346 		c->vdirty_blocks_gctrigger *= 10;
347 
348 	/* If there's less than this amount of dirty space, don't bother
349 	   trying to GC to make more space. It'll be a fruitless task */
350 	c->nospc_dirty_size = c->sector_size + (c->flash_size / 100);
351 
352 	dbg_fsbuild("trigger levels (size %d KiB, block size %d KiB, %d blocks)\n",
353 		    c->flash_size / 1024, c->sector_size / 1024, c->nr_blocks);
354 	dbg_fsbuild("Blocks required to allow deletion:    %d (%d KiB)\n",
355 		  c->resv_blocks_deletion, c->resv_blocks_deletion*c->sector_size/1024);
356 	dbg_fsbuild("Blocks required to allow writes:      %d (%d KiB)\n",
357 		  c->resv_blocks_write, c->resv_blocks_write*c->sector_size/1024);
358 	dbg_fsbuild("Blocks required to quiesce GC thread: %d (%d KiB)\n",
359 		  c->resv_blocks_gctrigger, c->resv_blocks_gctrigger*c->sector_size/1024);
360 	dbg_fsbuild("Blocks required to allow GC merges:   %d (%d KiB)\n",
361 		  c->resv_blocks_gcmerge, c->resv_blocks_gcmerge*c->sector_size/1024);
362 	dbg_fsbuild("Blocks required to GC bad blocks:     %d (%d KiB)\n",
363 		  c->resv_blocks_gcbad, c->resv_blocks_gcbad*c->sector_size/1024);
364 	dbg_fsbuild("Amount of dirty space required to GC: %d bytes\n",
365 		  c->nospc_dirty_size);
366 	dbg_fsbuild("Very dirty blocks before GC triggered: %d\n",
367 		  c->vdirty_blocks_gctrigger);
368 }
369 
370 int jffs2_do_mount_fs(struct jffs2_sb_info *c)
371 {
372 	int ret;
373 	int i;
374 	int size;
375 
376 	c->free_size = c->flash_size;
377 	c->nr_blocks = c->flash_size / c->sector_size;
378 	size = sizeof(struct jffs2_eraseblock) * c->nr_blocks;
379 #ifndef __ECOS
380 	if (jffs2_blocks_use_vmalloc(c))
381 		c->blocks = vzalloc(size);
382 	else
383 #endif
384 		c->blocks = kzalloc(size, GFP_KERNEL);
385 	if (!c->blocks)
386 		return -ENOMEM;
387 
388 	for (i=0; i<c->nr_blocks; i++) {
389 		INIT_LIST_HEAD(&c->blocks[i].list);
390 		c->blocks[i].offset = i * c->sector_size;
391 		c->blocks[i].free_size = c->sector_size;
392 	}
393 
394 	INIT_LIST_HEAD(&c->clean_list);
395 	INIT_LIST_HEAD(&c->very_dirty_list);
396 	INIT_LIST_HEAD(&c->dirty_list);
397 	INIT_LIST_HEAD(&c->erasable_list);
398 	INIT_LIST_HEAD(&c->erasing_list);
399 	INIT_LIST_HEAD(&c->erase_checking_list);
400 	INIT_LIST_HEAD(&c->erase_pending_list);
401 	INIT_LIST_HEAD(&c->erasable_pending_wbuf_list);
402 	INIT_LIST_HEAD(&c->erase_complete_list);
403 	INIT_LIST_HEAD(&c->free_list);
404 	INIT_LIST_HEAD(&c->bad_list);
405 	INIT_LIST_HEAD(&c->bad_used_list);
406 	c->highest_ino = 1;
407 	c->summary = NULL;
408 
409 	ret = jffs2_sum_init(c);
410 	if (ret)
411 		goto out_free;
412 
413 	if (jffs2_build_filesystem(c)) {
414 		dbg_fsbuild("build_fs failed\n");
415 		jffs2_free_ino_caches(c);
416 		jffs2_free_raw_node_refs(c);
417 		ret = -EIO;
418 		goto out_free;
419 	}
420 
421 	jffs2_calc_trigger_levels(c);
422 
423 	return 0;
424 
425  out_free:
426 	kvfree(c->blocks);
427 
428 	return ret;
429 }
430