xref: /openbmc/linux/fs/fuse/readdir.c (revision 0352f880)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_local_page(page);
80 	if (!offset) {
81 		clear_page(addr);
82 		SetPageUptodate(page);
83 	}
84 	memcpy(addr + offset, dirent, reclen);
85 	kunmap_local(addr);
86 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
87 	fi->rdc.pos = dirent->off;
88 unlock:
89 	spin_unlock(&fi->rdc.lock);
90 	unlock_page(page);
91 	put_page(page);
92 }
93 
94 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
95 {
96 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
97 	loff_t end;
98 
99 	spin_lock(&fi->rdc.lock);
100 	/* does cache end position match current position? */
101 	if (fi->rdc.pos != pos) {
102 		spin_unlock(&fi->rdc.lock);
103 		return;
104 	}
105 
106 	fi->rdc.cached = true;
107 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
108 	spin_unlock(&fi->rdc.lock);
109 
110 	/* truncate unused tail of cache */
111 	truncate_inode_pages(file->f_mapping, end);
112 }
113 
114 static bool fuse_emit(struct file *file, struct dir_context *ctx,
115 		      struct fuse_dirent *dirent)
116 {
117 	struct fuse_file *ff = file->private_data;
118 
119 	if (ff->open_flags & FOPEN_CACHE_DIR)
120 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
121 
122 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
123 			dirent->type);
124 }
125 
126 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
127 			 struct dir_context *ctx)
128 {
129 	while (nbytes >= FUSE_NAME_OFFSET) {
130 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
131 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
132 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
133 			return -EIO;
134 		if (reclen > nbytes)
135 			break;
136 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
137 			return -EIO;
138 
139 		if (!fuse_emit(file, ctx, dirent))
140 			break;
141 
142 		buf += reclen;
143 		nbytes -= reclen;
144 		ctx->pos = dirent->off;
145 	}
146 
147 	return 0;
148 }
149 
150 static int fuse_direntplus_link(struct file *file,
151 				struct fuse_direntplus *direntplus,
152 				u64 attr_version)
153 {
154 	struct fuse_entry_out *o = &direntplus->entry_out;
155 	struct fuse_dirent *dirent = &direntplus->dirent;
156 	struct dentry *parent = file->f_path.dentry;
157 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
158 	struct dentry *dentry;
159 	struct dentry *alias;
160 	struct inode *dir = d_inode(parent);
161 	struct fuse_conn *fc;
162 	struct inode *inode;
163 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
164 
165 	if (!o->nodeid) {
166 		/*
167 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
168 		 * ENOENT. Instead, it only means the userspace filesystem did
169 		 * not want to return attributes/handle for this entry.
170 		 *
171 		 * So do nothing.
172 		 */
173 		return 0;
174 	}
175 
176 	if (name.name[0] == '.') {
177 		/*
178 		 * We could potentially refresh the attributes of the directory
179 		 * and its parent?
180 		 */
181 		if (name.len == 1)
182 			return 0;
183 		if (name.name[1] == '.' && name.len == 2)
184 			return 0;
185 	}
186 
187 	if (invalid_nodeid(o->nodeid))
188 		return -EIO;
189 	if (fuse_invalid_attr(&o->attr))
190 		return -EIO;
191 
192 	fc = get_fuse_conn(dir);
193 
194 	name.hash = full_name_hash(parent, name.name, name.len);
195 	dentry = d_lookup(parent, &name);
196 	if (!dentry) {
197 retry:
198 		dentry = d_alloc_parallel(parent, &name, &wq);
199 		if (IS_ERR(dentry))
200 			return PTR_ERR(dentry);
201 	}
202 	if (!d_in_lookup(dentry)) {
203 		struct fuse_inode *fi;
204 		inode = d_inode(dentry);
205 		if (inode && get_node_id(inode) != o->nodeid)
206 			inode = NULL;
207 		if (!inode ||
208 		    fuse_stale_inode(inode, o->generation, &o->attr)) {
209 			if (inode)
210 				fuse_make_bad(inode);
211 			d_invalidate(dentry);
212 			dput(dentry);
213 			goto retry;
214 		}
215 		if (fuse_is_bad(inode)) {
216 			dput(dentry);
217 			return -EIO;
218 		}
219 
220 		fi = get_fuse_inode(inode);
221 		spin_lock(&fi->lock);
222 		fi->nlookup++;
223 		spin_unlock(&fi->lock);
224 
225 		forget_all_cached_acls(inode);
226 		fuse_change_attributes(inode, &o->attr,
227 				       entry_attr_timeout(o),
228 				       attr_version);
229 		/*
230 		 * The other branch comes via fuse_iget()
231 		 * which bumps nlookup inside
232 		 */
233 	} else {
234 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
235 				  &o->attr, entry_attr_timeout(o),
236 				  attr_version);
237 		if (!inode)
238 			inode = ERR_PTR(-ENOMEM);
239 
240 		alias = d_splice_alias(inode, dentry);
241 		d_lookup_done(dentry);
242 		if (alias) {
243 			dput(dentry);
244 			dentry = alias;
245 		}
246 		if (IS_ERR(dentry))
247 			return PTR_ERR(dentry);
248 	}
249 	if (fc->readdirplus_auto)
250 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
251 	fuse_change_entry_timeout(dentry, o);
252 
253 	dput(dentry);
254 	return 0;
255 }
256 
257 static void fuse_force_forget(struct file *file, u64 nodeid)
258 {
259 	struct inode *inode = file_inode(file);
260 	struct fuse_mount *fm = get_fuse_mount(inode);
261 	struct fuse_forget_in inarg;
262 	FUSE_ARGS(args);
263 
264 	memset(&inarg, 0, sizeof(inarg));
265 	inarg.nlookup = 1;
266 	args.opcode = FUSE_FORGET;
267 	args.nodeid = nodeid;
268 	args.in_numargs = 1;
269 	args.in_args[0].size = sizeof(inarg);
270 	args.in_args[0].value = &inarg;
271 	args.force = true;
272 	args.noreply = true;
273 
274 	fuse_simple_request(fm, &args);
275 	/* ignore errors */
276 }
277 
278 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
279 			     struct dir_context *ctx, u64 attr_version)
280 {
281 	struct fuse_direntplus *direntplus;
282 	struct fuse_dirent *dirent;
283 	size_t reclen;
284 	int over = 0;
285 	int ret;
286 
287 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
288 		direntplus = (struct fuse_direntplus *) buf;
289 		dirent = &direntplus->dirent;
290 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
291 
292 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
293 			return -EIO;
294 		if (reclen > nbytes)
295 			break;
296 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
297 			return -EIO;
298 
299 		if (!over) {
300 			/* We fill entries into dstbuf only as much as
301 			   it can hold. But we still continue iterating
302 			   over remaining entries to link them. If not,
303 			   we need to send a FORGET for each of those
304 			   which we did not link.
305 			*/
306 			over = !fuse_emit(file, ctx, dirent);
307 			if (!over)
308 				ctx->pos = dirent->off;
309 		}
310 
311 		buf += reclen;
312 		nbytes -= reclen;
313 
314 		ret = fuse_direntplus_link(file, direntplus, attr_version);
315 		if (ret)
316 			fuse_force_forget(file, direntplus->entry_out.nodeid);
317 	}
318 
319 	return 0;
320 }
321 
322 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
323 {
324 	int plus;
325 	ssize_t res;
326 	struct page *page;
327 	struct inode *inode = file_inode(file);
328 	struct fuse_mount *fm = get_fuse_mount(inode);
329 	struct fuse_io_args ia = {};
330 	struct fuse_args_pages *ap = &ia.ap;
331 	struct fuse_page_desc desc = { .length = PAGE_SIZE };
332 	u64 attr_version = 0;
333 	bool locked;
334 
335 	page = alloc_page(GFP_KERNEL);
336 	if (!page)
337 		return -ENOMEM;
338 
339 	plus = fuse_use_readdirplus(inode, ctx);
340 	ap->args.out_pages = true;
341 	ap->num_pages = 1;
342 	ap->pages = &page;
343 	ap->descs = &desc;
344 	if (plus) {
345 		attr_version = fuse_get_attr_version(fm->fc);
346 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
347 				    FUSE_READDIRPLUS);
348 	} else {
349 		fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
350 				    FUSE_READDIR);
351 	}
352 	locked = fuse_lock_inode(inode);
353 	res = fuse_simple_request(fm, &ap->args);
354 	fuse_unlock_inode(inode, locked);
355 	if (res >= 0) {
356 		if (!res) {
357 			struct fuse_file *ff = file->private_data;
358 
359 			if (ff->open_flags & FOPEN_CACHE_DIR)
360 				fuse_readdir_cache_end(file, ctx->pos);
361 		} else if (plus) {
362 			res = parse_dirplusfile(page_address(page), res,
363 						file, ctx, attr_version);
364 		} else {
365 			res = parse_dirfile(page_address(page), res, file,
366 					    ctx);
367 		}
368 	}
369 
370 	__free_page(page);
371 	fuse_invalidate_atime(inode);
372 	return res;
373 }
374 
375 enum fuse_parse_result {
376 	FOUND_ERR = -1,
377 	FOUND_NONE = 0,
378 	FOUND_SOME,
379 	FOUND_ALL,
380 };
381 
382 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
383 					       void *addr, unsigned int size,
384 					       struct dir_context *ctx)
385 {
386 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
387 	enum fuse_parse_result res = FOUND_NONE;
388 
389 	WARN_ON(offset >= size);
390 
391 	for (;;) {
392 		struct fuse_dirent *dirent = addr + offset;
393 		unsigned int nbytes = size - offset;
394 		size_t reclen;
395 
396 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
397 			break;
398 
399 		reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
400 
401 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
402 			return FOUND_ERR;
403 		if (WARN_ON(reclen > nbytes))
404 			return FOUND_ERR;
405 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
406 			return FOUND_ERR;
407 
408 		if (ff->readdir.pos == ctx->pos) {
409 			res = FOUND_SOME;
410 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
411 				      dirent->ino, dirent->type))
412 				return FOUND_ALL;
413 			ctx->pos = dirent->off;
414 		}
415 		ff->readdir.pos = dirent->off;
416 		ff->readdir.cache_off += reclen;
417 
418 		offset += reclen;
419 	}
420 
421 	return res;
422 }
423 
424 static void fuse_rdc_reset(struct inode *inode)
425 {
426 	struct fuse_inode *fi = get_fuse_inode(inode);
427 
428 	fi->rdc.cached = false;
429 	fi->rdc.version++;
430 	fi->rdc.size = 0;
431 	fi->rdc.pos = 0;
432 }
433 
434 #define UNCACHED 1
435 
436 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
437 {
438 	struct fuse_file *ff = file->private_data;
439 	struct inode *inode = file_inode(file);
440 	struct fuse_conn *fc = get_fuse_conn(inode);
441 	struct fuse_inode *fi = get_fuse_inode(inode);
442 	enum fuse_parse_result res;
443 	pgoff_t index;
444 	unsigned int size;
445 	struct page *page;
446 	void *addr;
447 
448 	/* Seeked?  If so, reset the cache stream */
449 	if (ff->readdir.pos != ctx->pos) {
450 		ff->readdir.pos = 0;
451 		ff->readdir.cache_off = 0;
452 	}
453 
454 	/*
455 	 * We're just about to start reading into the cache or reading the
456 	 * cache; both cases require an up-to-date mtime value.
457 	 */
458 	if (!ctx->pos && fc->auto_inval_data) {
459 		int err = fuse_update_attributes(inode, file, STATX_MTIME);
460 
461 		if (err)
462 			return err;
463 	}
464 
465 retry:
466 	spin_lock(&fi->rdc.lock);
467 retry_locked:
468 	if (!fi->rdc.cached) {
469 		/* Starting cache? Set cache mtime. */
470 		if (!ctx->pos && !fi->rdc.size) {
471 			fi->rdc.mtime = inode->i_mtime;
472 			fi->rdc.iversion = inode_query_iversion(inode);
473 		}
474 		spin_unlock(&fi->rdc.lock);
475 		return UNCACHED;
476 	}
477 	/*
478 	 * When at the beginning of the directory (i.e. just after opendir(3) or
479 	 * rewinddir(3)), then need to check whether directory contents have
480 	 * changed, and reset the cache if so.
481 	 */
482 	if (!ctx->pos) {
483 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
484 		    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
485 			fuse_rdc_reset(inode);
486 			goto retry_locked;
487 		}
488 	}
489 
490 	/*
491 	 * If cache version changed since the last getdents() call, then reset
492 	 * the cache stream.
493 	 */
494 	if (ff->readdir.version != fi->rdc.version) {
495 		ff->readdir.pos = 0;
496 		ff->readdir.cache_off = 0;
497 	}
498 	/*
499 	 * If at the beginning of the cache, than reset version to
500 	 * current.
501 	 */
502 	if (ff->readdir.pos == 0)
503 		ff->readdir.version = fi->rdc.version;
504 
505 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
506 
507 	index = ff->readdir.cache_off >> PAGE_SHIFT;
508 
509 	if (index == (fi->rdc.size >> PAGE_SHIFT))
510 		size = fi->rdc.size & ~PAGE_MASK;
511 	else
512 		size = PAGE_SIZE;
513 	spin_unlock(&fi->rdc.lock);
514 
515 	/* EOF? */
516 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
517 		return 0;
518 
519 	page = find_get_page_flags(file->f_mapping, index,
520 				   FGP_ACCESSED | FGP_LOCK);
521 	/* Page gone missing, then re-added to cache, but not initialized? */
522 	if (page && !PageUptodate(page)) {
523 		unlock_page(page);
524 		put_page(page);
525 		page = NULL;
526 	}
527 	spin_lock(&fi->rdc.lock);
528 	if (!page) {
529 		/*
530 		 * Uh-oh: page gone missing, cache is useless
531 		 */
532 		if (fi->rdc.version == ff->readdir.version)
533 			fuse_rdc_reset(inode);
534 		goto retry_locked;
535 	}
536 
537 	/* Make sure it's still the same version after getting the page. */
538 	if (ff->readdir.version != fi->rdc.version) {
539 		spin_unlock(&fi->rdc.lock);
540 		unlock_page(page);
541 		put_page(page);
542 		goto retry;
543 	}
544 	spin_unlock(&fi->rdc.lock);
545 
546 	/*
547 	 * Contents of the page are now protected against changing by holding
548 	 * the page lock.
549 	 */
550 	addr = kmap(page);
551 	res = fuse_parse_cache(ff, addr, size, ctx);
552 	kunmap(page);
553 	unlock_page(page);
554 	put_page(page);
555 
556 	if (res == FOUND_ERR)
557 		return -EIO;
558 
559 	if (res == FOUND_ALL)
560 		return 0;
561 
562 	if (size == PAGE_SIZE) {
563 		/* We hit end of page: skip to next page. */
564 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
565 		goto retry;
566 	}
567 
568 	/*
569 	 * End of cache reached.  If found position, then we are done, otherwise
570 	 * need to fall back to uncached, since the position we were looking for
571 	 * wasn't in the cache.
572 	 */
573 	return res == FOUND_SOME ? 0 : UNCACHED;
574 }
575 
576 int fuse_readdir(struct file *file, struct dir_context *ctx)
577 {
578 	struct fuse_file *ff = file->private_data;
579 	struct inode *inode = file_inode(file);
580 	int err;
581 
582 	if (fuse_is_bad(inode))
583 		return -EIO;
584 
585 	mutex_lock(&ff->readdir.lock);
586 
587 	err = UNCACHED;
588 	if (ff->open_flags & FOPEN_CACHE_DIR)
589 		err = fuse_readdir_cached(file, ctx);
590 	if (err == UNCACHED)
591 		err = fuse_readdir_uncached(file, ctx);
592 
593 	mutex_unlock(&ff->readdir.lock);
594 
595 	return err;
596 }
597