xref: /openbmc/linux/fs/overlayfs/readdir.c (revision 3e26a691)
1 /*
2  *
3  * Copyright (C) 2011 Novell Inc.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms of the GNU General Public License version 2 as published by
7  * the Free Software Foundation.
8  */
9 
10 #include <linux/fs.h>
11 #include <linux/slab.h>
12 #include <linux/namei.h>
13 #include <linux/file.h>
14 #include <linux/xattr.h>
15 #include <linux/rbtree.h>
16 #include <linux/security.h>
17 #include <linux/cred.h>
18 #include "overlayfs.h"
19 
20 struct ovl_cache_entry {
21 	unsigned int len;
22 	unsigned int type;
23 	u64 ino;
24 	struct list_head l_node;
25 	struct rb_node node;
26 	struct ovl_cache_entry *next_maybe_whiteout;
27 	bool is_whiteout;
28 	char name[];
29 };
30 
31 struct ovl_dir_cache {
32 	long refcount;
33 	u64 version;
34 	struct list_head entries;
35 };
36 
37 struct ovl_readdir_data {
38 	struct dir_context ctx;
39 	bool is_lowest;
40 	struct rb_root root;
41 	struct list_head *list;
42 	struct list_head middle;
43 	struct ovl_cache_entry *first_maybe_whiteout;
44 	int count;
45 	int err;
46 	bool d_type_supported;
47 };
48 
49 struct ovl_dir_file {
50 	bool is_real;
51 	bool is_upper;
52 	struct ovl_dir_cache *cache;
53 	struct list_head *cursor;
54 	struct file *realfile;
55 	struct file *upperfile;
56 };
57 
58 static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
59 {
60 	return container_of(n, struct ovl_cache_entry, node);
61 }
62 
63 static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
64 						    const char *name, int len)
65 {
66 	struct rb_node *node = root->rb_node;
67 	int cmp;
68 
69 	while (node) {
70 		struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
71 
72 		cmp = strncmp(name, p->name, len);
73 		if (cmp > 0)
74 			node = p->node.rb_right;
75 		else if (cmp < 0 || len < p->len)
76 			node = p->node.rb_left;
77 		else
78 			return p;
79 	}
80 
81 	return NULL;
82 }
83 
84 static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
85 						   const char *name, int len,
86 						   u64 ino, unsigned int d_type)
87 {
88 	struct ovl_cache_entry *p;
89 	size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
90 
91 	p = kmalloc(size, GFP_KERNEL);
92 	if (!p)
93 		return NULL;
94 
95 	memcpy(p->name, name, len);
96 	p->name[len] = '\0';
97 	p->len = len;
98 	p->type = d_type;
99 	p->ino = ino;
100 	p->is_whiteout = false;
101 
102 	if (d_type == DT_CHR) {
103 		p->next_maybe_whiteout = rdd->first_maybe_whiteout;
104 		rdd->first_maybe_whiteout = p;
105 	}
106 	return p;
107 }
108 
109 static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
110 				  const char *name, int len, u64 ino,
111 				  unsigned int d_type)
112 {
113 	struct rb_node **newp = &rdd->root.rb_node;
114 	struct rb_node *parent = NULL;
115 	struct ovl_cache_entry *p;
116 
117 	while (*newp) {
118 		int cmp;
119 		struct ovl_cache_entry *tmp;
120 
121 		parent = *newp;
122 		tmp = ovl_cache_entry_from_node(*newp);
123 		cmp = strncmp(name, tmp->name, len);
124 		if (cmp > 0)
125 			newp = &tmp->node.rb_right;
126 		else if (cmp < 0 || len < tmp->len)
127 			newp = &tmp->node.rb_left;
128 		else
129 			return 0;
130 	}
131 
132 	p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
133 	if (p == NULL)
134 		return -ENOMEM;
135 
136 	list_add_tail(&p->l_node, rdd->list);
137 	rb_link_node(&p->node, parent, newp);
138 	rb_insert_color(&p->node, &rdd->root);
139 
140 	return 0;
141 }
142 
143 static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
144 			   const char *name, int namelen,
145 			   loff_t offset, u64 ino, unsigned int d_type)
146 {
147 	struct ovl_cache_entry *p;
148 
149 	p = ovl_cache_entry_find(&rdd->root, name, namelen);
150 	if (p) {
151 		list_move_tail(&p->l_node, &rdd->middle);
152 	} else {
153 		p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
154 		if (p == NULL)
155 			rdd->err = -ENOMEM;
156 		else
157 			list_add_tail(&p->l_node, &rdd->middle);
158 	}
159 
160 	return rdd->err;
161 }
162 
163 void ovl_cache_free(struct list_head *list)
164 {
165 	struct ovl_cache_entry *p;
166 	struct ovl_cache_entry *n;
167 
168 	list_for_each_entry_safe(p, n, list, l_node)
169 		kfree(p);
170 
171 	INIT_LIST_HEAD(list);
172 }
173 
174 static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
175 {
176 	struct ovl_dir_cache *cache = od->cache;
177 
178 	WARN_ON(cache->refcount <= 0);
179 	cache->refcount--;
180 	if (!cache->refcount) {
181 		if (ovl_dir_cache(dentry) == cache)
182 			ovl_set_dir_cache(dentry, NULL);
183 
184 		ovl_cache_free(&cache->entries);
185 		kfree(cache);
186 	}
187 }
188 
189 static int ovl_fill_merge(struct dir_context *ctx, const char *name,
190 			  int namelen, loff_t offset, u64 ino,
191 			  unsigned int d_type)
192 {
193 	struct ovl_readdir_data *rdd =
194 		container_of(ctx, struct ovl_readdir_data, ctx);
195 
196 	rdd->count++;
197 	if (!rdd->is_lowest)
198 		return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
199 	else
200 		return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
201 }
202 
203 static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
204 {
205 	int err;
206 	struct ovl_cache_entry *p;
207 	struct dentry *dentry;
208 	const struct cred *old_cred;
209 	struct cred *override_cred;
210 
211 	override_cred = prepare_creds();
212 	if (!override_cred)
213 		return -ENOMEM;
214 
215 	/*
216 	 * CAP_DAC_OVERRIDE for lookup
217 	 */
218 	cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
219 	old_cred = override_creds(override_cred);
220 
221 	err = mutex_lock_killable(&dir->d_inode->i_mutex);
222 	if (!err) {
223 		while (rdd->first_maybe_whiteout) {
224 			p = rdd->first_maybe_whiteout;
225 			rdd->first_maybe_whiteout = p->next_maybe_whiteout;
226 			dentry = lookup_one_len(p->name, dir, p->len);
227 			if (!IS_ERR(dentry)) {
228 				p->is_whiteout = ovl_is_whiteout(dentry);
229 				dput(dentry);
230 			}
231 		}
232 		inode_unlock(dir->d_inode);
233 	}
234 	revert_creds(old_cred);
235 	put_cred(override_cred);
236 
237 	return err;
238 }
239 
240 static inline int ovl_dir_read(struct path *realpath,
241 			       struct ovl_readdir_data *rdd)
242 {
243 	struct file *realfile;
244 	int err;
245 
246 	realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
247 	if (IS_ERR(realfile))
248 		return PTR_ERR(realfile);
249 
250 	rdd->first_maybe_whiteout = NULL;
251 	rdd->ctx.pos = 0;
252 	do {
253 		rdd->count = 0;
254 		rdd->err = 0;
255 		err = iterate_dir(realfile, &rdd->ctx);
256 		if (err >= 0)
257 			err = rdd->err;
258 	} while (!err && rdd->count);
259 
260 	if (!err && rdd->first_maybe_whiteout)
261 		err = ovl_check_whiteouts(realpath->dentry, rdd);
262 
263 	fput(realfile);
264 
265 	return err;
266 }
267 
268 static void ovl_dir_reset(struct file *file)
269 {
270 	struct ovl_dir_file *od = file->private_data;
271 	struct ovl_dir_cache *cache = od->cache;
272 	struct dentry *dentry = file->f_path.dentry;
273 	enum ovl_path_type type = ovl_path_type(dentry);
274 
275 	if (cache && ovl_dentry_version_get(dentry) != cache->version) {
276 		ovl_cache_put(od, dentry);
277 		od->cache = NULL;
278 		od->cursor = NULL;
279 	}
280 	WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
281 	if (od->is_real && OVL_TYPE_MERGE(type))
282 		od->is_real = false;
283 }
284 
285 static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
286 {
287 	int err;
288 	struct path realpath;
289 	struct ovl_readdir_data rdd = {
290 		.ctx.actor = ovl_fill_merge,
291 		.list = list,
292 		.root = RB_ROOT,
293 		.is_lowest = false,
294 	};
295 	int idx, next;
296 
297 	for (idx = 0; idx != -1; idx = next) {
298 		next = ovl_path_next(idx, dentry, &realpath);
299 
300 		if (next != -1) {
301 			err = ovl_dir_read(&realpath, &rdd);
302 			if (err)
303 				break;
304 		} else {
305 			/*
306 			 * Insert lowest layer entries before upper ones, this
307 			 * allows offsets to be reasonably constant
308 			 */
309 			list_add(&rdd.middle, rdd.list);
310 			rdd.is_lowest = true;
311 			err = ovl_dir_read(&realpath, &rdd);
312 			list_del(&rdd.middle);
313 		}
314 	}
315 	return err;
316 }
317 
318 static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
319 {
320 	struct list_head *p;
321 	loff_t off = 0;
322 
323 	list_for_each(p, &od->cache->entries) {
324 		if (off >= pos)
325 			break;
326 		off++;
327 	}
328 	/* Cursor is safe since the cache is stable */
329 	od->cursor = p;
330 }
331 
332 static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
333 {
334 	int res;
335 	struct ovl_dir_cache *cache;
336 
337 	cache = ovl_dir_cache(dentry);
338 	if (cache && ovl_dentry_version_get(dentry) == cache->version) {
339 		cache->refcount++;
340 		return cache;
341 	}
342 	ovl_set_dir_cache(dentry, NULL);
343 
344 	cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
345 	if (!cache)
346 		return ERR_PTR(-ENOMEM);
347 
348 	cache->refcount = 1;
349 	INIT_LIST_HEAD(&cache->entries);
350 
351 	res = ovl_dir_read_merged(dentry, &cache->entries);
352 	if (res) {
353 		ovl_cache_free(&cache->entries);
354 		kfree(cache);
355 		return ERR_PTR(res);
356 	}
357 
358 	cache->version = ovl_dentry_version_get(dentry);
359 	ovl_set_dir_cache(dentry, cache);
360 
361 	return cache;
362 }
363 
364 static int ovl_iterate(struct file *file, struct dir_context *ctx)
365 {
366 	struct ovl_dir_file *od = file->private_data;
367 	struct dentry *dentry = file->f_path.dentry;
368 	struct ovl_cache_entry *p;
369 
370 	if (!ctx->pos)
371 		ovl_dir_reset(file);
372 
373 	if (od->is_real)
374 		return iterate_dir(od->realfile, ctx);
375 
376 	if (!od->cache) {
377 		struct ovl_dir_cache *cache;
378 
379 		cache = ovl_cache_get(dentry);
380 		if (IS_ERR(cache))
381 			return PTR_ERR(cache);
382 
383 		od->cache = cache;
384 		ovl_seek_cursor(od, ctx->pos);
385 	}
386 
387 	while (od->cursor != &od->cache->entries) {
388 		p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
389 		if (!p->is_whiteout)
390 			if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
391 				break;
392 		od->cursor = p->l_node.next;
393 		ctx->pos++;
394 	}
395 	return 0;
396 }
397 
398 static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
399 {
400 	loff_t res;
401 	struct ovl_dir_file *od = file->private_data;
402 
403 	inode_lock(file_inode(file));
404 	if (!file->f_pos)
405 		ovl_dir_reset(file);
406 
407 	if (od->is_real) {
408 		res = vfs_llseek(od->realfile, offset, origin);
409 		file->f_pos = od->realfile->f_pos;
410 	} else {
411 		res = -EINVAL;
412 
413 		switch (origin) {
414 		case SEEK_CUR:
415 			offset += file->f_pos;
416 			break;
417 		case SEEK_SET:
418 			break;
419 		default:
420 			goto out_unlock;
421 		}
422 		if (offset < 0)
423 			goto out_unlock;
424 
425 		if (offset != file->f_pos) {
426 			file->f_pos = offset;
427 			if (od->cache)
428 				ovl_seek_cursor(od, offset);
429 		}
430 		res = offset;
431 	}
432 out_unlock:
433 	inode_unlock(file_inode(file));
434 
435 	return res;
436 }
437 
438 static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
439 			 int datasync)
440 {
441 	struct ovl_dir_file *od = file->private_data;
442 	struct dentry *dentry = file->f_path.dentry;
443 	struct file *realfile = od->realfile;
444 
445 	/*
446 	 * Need to check if we started out being a lower dir, but got copied up
447 	 */
448 	if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
449 		struct inode *inode = file_inode(file);
450 
451 		realfile = lockless_dereference(od->upperfile);
452 		if (!realfile) {
453 			struct path upperpath;
454 
455 			ovl_path_upper(dentry, &upperpath);
456 			realfile = ovl_path_open(&upperpath, O_RDONLY);
457 			smp_mb__before_spinlock();
458 			inode_lock(inode);
459 			if (!od->upperfile) {
460 				if (IS_ERR(realfile)) {
461 					inode_unlock(inode);
462 					return PTR_ERR(realfile);
463 				}
464 				od->upperfile = realfile;
465 			} else {
466 				/* somebody has beaten us to it */
467 				if (!IS_ERR(realfile))
468 					fput(realfile);
469 				realfile = od->upperfile;
470 			}
471 			inode_unlock(inode);
472 		}
473 	}
474 
475 	return vfs_fsync_range(realfile, start, end, datasync);
476 }
477 
478 static int ovl_dir_release(struct inode *inode, struct file *file)
479 {
480 	struct ovl_dir_file *od = file->private_data;
481 
482 	if (od->cache) {
483 		inode_lock(inode);
484 		ovl_cache_put(od, file->f_path.dentry);
485 		inode_unlock(inode);
486 	}
487 	fput(od->realfile);
488 	if (od->upperfile)
489 		fput(od->upperfile);
490 	kfree(od);
491 
492 	return 0;
493 }
494 
495 static int ovl_dir_open(struct inode *inode, struct file *file)
496 {
497 	struct path realpath;
498 	struct file *realfile;
499 	struct ovl_dir_file *od;
500 	enum ovl_path_type type;
501 
502 	od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
503 	if (!od)
504 		return -ENOMEM;
505 
506 	type = ovl_path_real(file->f_path.dentry, &realpath);
507 	realfile = ovl_path_open(&realpath, file->f_flags);
508 	if (IS_ERR(realfile)) {
509 		kfree(od);
510 		return PTR_ERR(realfile);
511 	}
512 	od->realfile = realfile;
513 	od->is_real = !OVL_TYPE_MERGE(type);
514 	od->is_upper = OVL_TYPE_UPPER(type);
515 	file->private_data = od;
516 
517 	return 0;
518 }
519 
520 const struct file_operations ovl_dir_operations = {
521 	.read		= generic_read_dir,
522 	.open		= ovl_dir_open,
523 	.iterate	= ovl_iterate,
524 	.llseek		= ovl_dir_llseek,
525 	.fsync		= ovl_dir_fsync,
526 	.release	= ovl_dir_release,
527 };
528 
529 int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
530 {
531 	int err;
532 	struct ovl_cache_entry *p;
533 
534 	err = ovl_dir_read_merged(dentry, list);
535 	if (err)
536 		return err;
537 
538 	err = 0;
539 
540 	list_for_each_entry(p, list, l_node) {
541 		if (p->is_whiteout)
542 			continue;
543 
544 		if (p->name[0] == '.') {
545 			if (p->len == 1)
546 				continue;
547 			if (p->len == 2 && p->name[1] == '.')
548 				continue;
549 		}
550 		err = -ENOTEMPTY;
551 		break;
552 	}
553 
554 	return err;
555 }
556 
557 void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
558 {
559 	struct ovl_cache_entry *p;
560 
561 	inode_lock_nested(upper->d_inode, I_MUTEX_CHILD);
562 	list_for_each_entry(p, list, l_node) {
563 		struct dentry *dentry;
564 
565 		if (!p->is_whiteout)
566 			continue;
567 
568 		dentry = lookup_one_len(p->name, upper, p->len);
569 		if (IS_ERR(dentry)) {
570 			pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
571 			       upper->d_name.name, p->len, p->name,
572 			       (int) PTR_ERR(dentry));
573 			continue;
574 		}
575 		if (dentry->d_inode)
576 			ovl_cleanup(upper->d_inode, dentry);
577 		dput(dentry);
578 	}
579 	inode_unlock(upper->d_inode);
580 }
581 
582 static int ovl_check_d_type(struct dir_context *ctx, const char *name,
583 			  int namelen, loff_t offset, u64 ino,
584 			  unsigned int d_type)
585 {
586 	struct ovl_readdir_data *rdd =
587 		container_of(ctx, struct ovl_readdir_data, ctx);
588 
589 	/* Even if d_type is not supported, DT_DIR is returned for . and .. */
590 	if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
591 		return 0;
592 
593 	if (d_type != DT_UNKNOWN)
594 		rdd->d_type_supported = true;
595 
596 	return 0;
597 }
598 
599 /*
600  * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
601  * if error is encountered.
602  */
603 int ovl_check_d_type_supported(struct path *realpath)
604 {
605 	int err;
606 	struct ovl_readdir_data rdd = {
607 		.ctx.actor = ovl_check_d_type,
608 		.d_type_supported = false,
609 	};
610 
611 	err = ovl_dir_read(realpath, &rdd);
612 	if (err)
613 		return err;
614 
615 	return rdd.d_type_supported;
616 }
617