xref: /openbmc/linux/fs/fuse/readdir.c (revision 86e281fc)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_local_page(page);
80 	if (!offset) {
81 		clear_page(addr);
82 		SetPageUptodate(page);
83 	}
84 	memcpy(addr + offset, dirent, reclen);
85 	kunmap_local(addr);
86 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87 	fi->rdc.pos = dirent->off;
88 unlock:
89 	spin_unlock(&fi->rdc.lock);
90 	unlock_page(page);
91 	put_page(page);
92 }
93 
94 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95 {
96 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97 	loff_t end;
98 
99 	spin_lock(&fi->rdc.lock);
100 	/* does cache end position match current position? */
101 	if (fi->rdc.pos != pos) {
102 		spin_unlock(&fi->rdc.lock);
103 		return;
104 	}
105 
106 	fi->rdc.cached = true;
107 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
108 	spin_unlock(&fi->rdc.lock);
109 
110 	/* truncate unused tail of cache */
111 	truncate_inode_pages(file->f_mapping, end);
112 }
113 
114 static bool fuse_emit(struct file *file, struct dir_context *ctx,
115 		      struct fuse_dirent *dirent)
116 {
117 	struct fuse_file *ff = file->private_data;
118 
119 	if (ff->open_flags & FOPEN_CACHE_DIR)
120 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121 
122 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123 			dirent->type);
124 }
125 
126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127 			 struct dir_context *ctx)
128 {
129 	while (nbytes >= FUSE_NAME_OFFSET) {
130 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
132 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133 			return -EIO;
134 		if (reclen > nbytes)
135 			break;
136 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137 			return -EIO;
138 
139 		if (!fuse_emit(file, ctx, dirent))
140 			break;
141 
142 		buf += reclen;
143 		nbytes -= reclen;
144 		ctx->pos = dirent->off;
145 	}
146 
147 	return 0;
148 }
149 
150 static int fuse_direntplus_link(struct file *file,
151 				struct fuse_direntplus *direntplus,
152 				u64 attr_version)
153 {
154 	struct fuse_entry_out *o = &direntplus->entry_out;
155 	struct fuse_dirent *dirent = &direntplus->dirent;
156 	struct dentry *parent = file->f_path.dentry;
157 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158 	struct dentry *dentry;
159 	struct dentry *alias;
160 	struct inode *dir = d_inode(parent);
161 	struct fuse_conn *fc;
162 	struct inode *inode;
163 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164 
165 	if (!o->nodeid) {
166 		/*
167 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
168 		 * ENOENT. Instead, it only means the userspace filesystem did
169 		 * not want to return attributes/handle for this entry.
170 		 *
171 		 * So do nothing.
172 		 */
173 		return 0;
174 	}
175 
176 	if (name.name[0] == '.') {
177 		/*
178 		 * We could potentially refresh the attributes of the directory
179 		 * and its parent?
180 		 */
181 		if (name.len == 1)
182 			return 0;
183 		if (name.name[1] == '.' && name.len == 2)
184 			return 0;
185 	}
186 
187 	if (invalid_nodeid(o->nodeid))
188 		return -EIO;
189 	if (fuse_invalid_attr(&o->attr))
190 		return -EIO;
191 
192 	fc = get_fuse_conn(dir);
193 
194 	name.hash = full_name_hash(parent, name.name, name.len);
195 	dentry = d_lookup(parent, &name);
196 	if (!dentry) {
197 retry:
198 		dentry = d_alloc_parallel(parent, &name, &wq);
199 		if (IS_ERR(dentry))
200 			return PTR_ERR(dentry);
201 	}
202 	if (!d_in_lookup(dentry)) {
203 		struct fuse_inode *fi;
204 		inode = d_inode(dentry);
205 		if (inode && get_node_id(inode) != o->nodeid)
206 			inode = NULL;
207 		if (!inode ||
208 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
209 			if (inode)
210 				fuse_make_bad(inode);
211 			d_invalidate(dentry);
212 			dput(dentry);
213 			goto retry;
214 		}
215 		if (fuse_is_bad(inode)) {
216 			dput(dentry);
217 			return -EIO;
218 		}
219 
220 		fi = get_fuse_inode(inode);
221 		spin_lock(&fi->lock);
222 		fi->nlookup++;
223 		spin_unlock(&fi->lock);
224 
225 		forget_all_cached_acls(inode);
226 		fuse_change_attributes(inode, &o->attr, NULL,
227 				       ATTR_TIMEOUT(o),
228 				       attr_version);
229 		/*
230 		 * The other branch comes via fuse_iget()
231 		 * which bumps nlookup inside
232 		 */
233 	} else {
234 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
235 				  &o->attr, ATTR_TIMEOUT(o),
236 				  attr_version);
237 		if (!inode)
238 			inode = ERR_PTR(-ENOMEM);
239 
240 		alias = d_splice_alias(inode, dentry);
241 		d_lookup_done(dentry);
242 		if (alias) {
243 			dput(dentry);
244 			dentry = alias;
245 		}
246 		if (IS_ERR(dentry)) {
247 			if (!IS_ERR(inode)) {
248 				struct fuse_inode *fi = get_fuse_inode(inode);
249 
250 				spin_lock(&fi->lock);
251 				fi->nlookup--;
252 				spin_unlock(&fi->lock);
253 			}
254 			return PTR_ERR(dentry);
255 		}
256 	}
257 	if (fc->readdirplus_auto)
258 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
259 	fuse_change_entry_timeout(dentry, o);
260 
261 	dput(dentry);
262 	return 0;
263 }
264 
265 static void fuse_force_forget(struct file *file, u64 nodeid)
266 {
267 	struct inode *inode = file_inode(file);
268 	struct fuse_mount *fm = get_fuse_mount(inode);
269 	struct fuse_forget_in inarg;
270 	FUSE_ARGS(args);
271 
272 	memset(&inarg, 0, sizeof(inarg));
273 	inarg.nlookup = 1;
274 	args.opcode = FUSE_FORGET;
275 	args.nodeid = nodeid;
276 	args.in_numargs = 1;
277 	args.in_args[0].size = sizeof(inarg);
278 	args.in_args[0].value = &inarg;
279 	args.force = true;
280 	args.noreply = true;
281 
282 	fuse_simple_request(fm, &args);
283 	/* ignore errors */
284 }
285 
286 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
287 			     struct dir_context *ctx, u64 attr_version)
288 {
289 	struct fuse_direntplus *direntplus;
290 	struct fuse_dirent *dirent;
291 	size_t reclen;
292 	int over = 0;
293 	int ret;
294 
295 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
296 		direntplus = (struct fuse_direntplus *) buf;
297 		dirent = &direntplus->dirent;
298 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
299 
300 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
301 			return -EIO;
302 		if (reclen > nbytes)
303 			break;
304 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
305 			return -EIO;
306 
307 		if (!over) {
308 			/* We fill entries into dstbuf only as much as
309 			   it can hold. But we still continue iterating
310 			   over remaining entries to link them. If not,
311 			   we need to send a FORGET for each of those
312 			   which we did not link.
313 			*/
314 			over = !fuse_emit(file, ctx, dirent);
315 			if (!over)
316 				ctx->pos = dirent->off;
317 		}
318 
319 		buf += reclen;
320 		nbytes -= reclen;
321 
322 		ret = fuse_direntplus_link(file, direntplus, attr_version);
323 		if (ret)
324 			fuse_force_forget(file, direntplus->entry_out.nodeid);
325 	}
326 
327 	return 0;
328 }
329 
330 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
331 {
332 	int plus;
333 	ssize_t res;
334 	struct page *page;
335 	struct inode *inode = file_inode(file);
336 	struct fuse_mount *fm = get_fuse_mount(inode);
337 	struct fuse_io_args ia = {};
338 	struct fuse_args_pages *ap = &ia.ap;
339 	struct fuse_page_desc desc = { .length = PAGE_SIZE };
340 	u64 attr_version = 0;
341 	bool locked;
342 
343 	page = alloc_page(GFP_KERNEL);
344 	if (!page)
345 		return -ENOMEM;
346 
347 	plus = fuse_use_readdirplus(inode, ctx);
348 	ap->args.out_pages = true;
349 	ap->num_pages = 1;
350 	ap->pages = &page;
351 	ap->descs = &desc;
352 	if (plus) {
353 		attr_version = fuse_get_attr_version(fm->fc);
354 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
355 				    FUSE_READDIRPLUS);
356 	} else {
357 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
358 				    FUSE_READDIR);
359 	}
360 	locked = fuse_lock_inode(inode);
361 	res = fuse_simple_request(fm, &ap->args);
362 	fuse_unlock_inode(inode, locked);
363 	if (res >= 0) {
364 		if (!res) {
365 			struct fuse_file *ff = file->private_data;
366 
367 			if (ff->open_flags & FOPEN_CACHE_DIR)
368 				fuse_readdir_cache_end(file, ctx->pos);
369 		} else if (plus) {
370 			res = parse_dirplusfile(page_address(page), res,
371 						file, ctx, attr_version);
372 		} else {
373 			res = parse_dirfile(page_address(page), res, file,
374 					    ctx);
375 		}
376 	}
377 
378 	__free_page(page);
379 	fuse_invalidate_atime(inode);
380 	return res;
381 }
382 
383 enum fuse_parse_result {
384 	FOUND_ERR = -1,
385 	FOUND_NONE = 0,
386 	FOUND_SOME,
387 	FOUND_ALL,
388 };
389 
390 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
391 					       void *addr, unsigned int size,
392 					       struct dir_context *ctx)
393 {
394 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
395 	enum fuse_parse_result res = FOUND_NONE;
396 
397 	WARN_ON(offset >= size);
398 
399 	for (;;) {
400 		struct fuse_dirent *dirent = addr + offset;
401 		unsigned int nbytes = size - offset;
402 		size_t reclen;
403 
404 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
405 			break;
406 
407 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
408 
409 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
410 			return FOUND_ERR;
411 		if (WARN_ON(reclen > nbytes))
412 			return FOUND_ERR;
413 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
414 			return FOUND_ERR;
415 
416 		if (ff->readdir.pos == ctx->pos) {
417 			res = FOUND_SOME;
418 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
419 				      dirent->ino, dirent->type))
420 				return FOUND_ALL;
421 			ctx->pos = dirent->off;
422 		}
423 		ff->readdir.pos = dirent->off;
424 		ff->readdir.cache_off += reclen;
425 
426 		offset += reclen;
427 	}
428 
429 	return res;
430 }
431 
432 static void fuse_rdc_reset(struct inode *inode)
433 {
434 	struct fuse_inode *fi = get_fuse_inode(inode);
435 
436 	fi->rdc.cached = false;
437 	fi->rdc.version++;
438 	fi->rdc.size = 0;
439 	fi->rdc.pos = 0;
440 }
441 
442 #define UNCACHED 1
443 
444 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
445 {
446 	struct fuse_file *ff = file->private_data;
447 	struct inode *inode = file_inode(file);
448 	struct fuse_conn *fc = get_fuse_conn(inode);
449 	struct fuse_inode *fi = get_fuse_inode(inode);
450 	enum fuse_parse_result res;
451 	pgoff_t index;
452 	unsigned int size;
453 	struct page *page;
454 	void *addr;
455 
456 	/* Seeked?  If so, reset the cache stream */
457 	if (ff->readdir.pos != ctx->pos) {
458 		ff->readdir.pos = 0;
459 		ff->readdir.cache_off = 0;
460 	}
461 
462 	/*
463 	 * We're just about to start reading into the cache or reading the
464 	 * cache; both cases require an up-to-date mtime value.
465 	 */
466 	if (!ctx->pos && fc->auto_inval_data) {
467 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
468 
469 		if (err)
470 			return err;
471 	}
472 
473 retry:
474 	spin_lock(&fi->rdc.lock);
475 retry_locked:
476 	if (!fi->rdc.cached) {
477 		/* Starting cache? Set cache mtime. */
478 		if (!ctx->pos && !fi->rdc.size) {
479 			fi->rdc.mtime = inode->i_mtime;
480 			fi->rdc.iversion = inode_query_iversion(inode);
481 		}
482 		spin_unlock(&fi->rdc.lock);
483 		return UNCACHED;
484 	}
485 	/*
486 	 * When at the beginning of the directory (i.e. just after opendir(3) or
487 	 * rewinddir(3)), then need to check whether directory contents have
488 	 * changed, and reset the cache if so.
489 	 */
490 	if (!ctx->pos) {
491 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
492 		    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
493 			fuse_rdc_reset(inode);
494 			goto retry_locked;
495 		}
496 	}
497 
498 	/*
499 	 * If cache version changed since the last getdents() call, then reset
500 	 * the cache stream.
501 	 */
502 	if (ff->readdir.version != fi->rdc.version) {
503 		ff->readdir.pos = 0;
504 		ff->readdir.cache_off = 0;
505 	}
506 	/*
507 	 * If at the beginning of the cache, than reset version to
508 	 * current.
509 	 */
510 	if (ff->readdir.pos == 0)
511 		ff->readdir.version = fi->rdc.version;
512 
513 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
514 
515 	index = ff->readdir.cache_off >> PAGE_SHIFT;
516 
517 	if (index == (fi->rdc.size >> PAGE_SHIFT))
518 		size = fi->rdc.size & ~PAGE_MASK;
519 	else
520 		size = PAGE_SIZE;
521 	spin_unlock(&fi->rdc.lock);
522 
523 	/* EOF? */
524 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
525 		return 0;
526 
527 	page = find_get_page_flags(file->f_mapping, index,
528 				   FGP_ACCESSED | FGP_LOCK);
529 	/* Page gone missing, then re-added to cache, but not initialized? */
530 	if (page && !PageUptodate(page)) {
531 		unlock_page(page);
532 		put_page(page);
533 		page = NULL;
534 	}
535 	spin_lock(&fi->rdc.lock);
536 	if (!page) {
537 		/*
538 		 * Uh-oh: page gone missing, cache is useless
539 		 */
540 		if (fi->rdc.version == ff->readdir.version)
541 			fuse_rdc_reset(inode);
542 		goto retry_locked;
543 	}
544 
545 	/* Make sure it's still the same version after getting the page. */
546 	if (ff->readdir.version != fi->rdc.version) {
547 		spin_unlock(&fi->rdc.lock);
548 		unlock_page(page);
549 		put_page(page);
550 		goto retry;
551 	}
552 	spin_unlock(&fi->rdc.lock);
553 
554 	/*
555 	 * Contents of the page are now protected against changing by holding
556 	 * the page lock.
557 	 */
558 	addr = kmap_local_page(page);
559 	res = fuse_parse_cache(ff, addr, size, ctx);
560 	kunmap_local(addr);
561 	unlock_page(page);
562 	put_page(page);
563 
564 	if (res == FOUND_ERR)
565 		return -EIO;
566 
567 	if (res == FOUND_ALL)
568 		return 0;
569 
570 	if (size == PAGE_SIZE) {
571 		/* We hit end of page: skip to next page. */
572 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
573 		goto retry;
574 	}
575 
576 	/*
577 	 * End of cache reached.  If found position, then we are done, otherwise
578 	 * need to fall back to uncached, since the position we were looking for
579 	 * wasn't in the cache.
580 	 */
581 	return res == FOUND_SOME ? 0 : UNCACHED;
582 }
583 
584 int fuse_readdir(struct file *file, struct dir_context *ctx)
585 {
586 	struct fuse_file *ff = file->private_data;
587 	struct inode *inode = file_inode(file);
588 	int err;
589 
590 	if (fuse_is_bad(inode))
591 		return -EIO;
592 
593 	mutex_lock(&ff->readdir.lock);
594 
595 	err = UNCACHED;
596 	if (ff->open_flags & FOPEN_CACHE_DIR)
597 		err = fuse_readdir_cached(file, ctx);
598 	if (err == UNCACHED)
599 		err = fuse_readdir_uncached(file, ctx);
600 
601 	mutex_unlock(&ff->readdir.lock);
602 
603 	return err;
604 }
605