xref: /openbmc/linux/fs/fuse/readdir.c (revision 3557b3fd)
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4 
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8 
9 
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15 
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18 	struct fuse_conn *fc = get_fuse_conn(dir);
19 	struct fuse_inode *fi = get_fuse_inode(dir);
20 
21 	if (!fc->do_readdirplus)
22 		return false;
23 	if (!fc->readdirplus_auto)
24 		return true;
25 	if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26 		return true;
27 	if (ctx->pos == 0)
28 		return true;
29 	return false;
30 }
31 
32 static void fuse_add_dirent_to_cache(struct file *file,
33 				     struct fuse_dirent *dirent, loff_t pos)
34 {
35 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36 	size_t reclen = FUSE_DIRENT_SIZE(dirent);
37 	pgoff_t index;
38 	struct page *page;
39 	loff_t size;
40 	u64 version;
41 	unsigned int offset;
42 	void *addr;
43 
44 	spin_lock(&fi->rdc.lock);
45 	/*
46 	 * Is cache already completed?  Or this entry does not go at the end of
47 	 * cache?
48 	 */
49 	if (fi->rdc.cached || pos != fi->rdc.pos) {
50 		spin_unlock(&fi->rdc.lock);
51 		return;
52 	}
53 	version = fi->rdc.version;
54 	size = fi->rdc.size;
55 	offset = size & ~PAGE_MASK;
56 	index = size >> PAGE_SHIFT;
57 	/* Dirent doesn't fit in current page?  Jump to next page. */
58 	if (offset + reclen > PAGE_SIZE) {
59 		index++;
60 		offset = 0;
61 	}
62 	spin_unlock(&fi->rdc.lock);
63 
64 	if (offset) {
65 		page = find_lock_page(file->f_mapping, index);
66 	} else {
67 		page = find_or_create_page(file->f_mapping, index,
68 					   mapping_gfp_mask(file->f_mapping));
69 	}
70 	if (!page)
71 		return;
72 
73 	spin_lock(&fi->rdc.lock);
74 	/* Raced with another readdir */
75 	if (fi->rdc.version != version || fi->rdc.size != size ||
76 	    WARN_ON(fi->rdc.pos != pos))
77 		goto unlock;
78 
79 	addr = kmap_atomic(page);
80 	if (!offset)
81 		clear_page(addr);
82 	memcpy(addr + offset, dirent, reclen);
83 	kunmap_atomic(addr);
84 	fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85 	fi->rdc.pos = dirent->off;
86 unlock:
87 	spin_unlock(&fi->rdc.lock);
88 	unlock_page(page);
89 	put_page(page);
90 }
91 
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93 {
94 	struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95 	loff_t end;
96 
97 	spin_lock(&fi->rdc.lock);
98 	/* does cache end position match current position? */
99 	if (fi->rdc.pos != pos) {
100 		spin_unlock(&fi->rdc.lock);
101 		return;
102 	}
103 
104 	fi->rdc.cached = true;
105 	end = ALIGN(fi->rdc.size, PAGE_SIZE);
106 	spin_unlock(&fi->rdc.lock);
107 
108 	/* truncate unused tail of cache */
109 	truncate_inode_pages(file->f_mapping, end);
110 }
111 
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113 		      struct fuse_dirent *dirent)
114 {
115 	struct fuse_file *ff = file->private_data;
116 
117 	if (ff->open_flags & FOPEN_CACHE_DIR)
118 		fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119 
120 	return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121 			dirent->type);
122 }
123 
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125 			 struct dir_context *ctx)
126 {
127 	while (nbytes >= FUSE_NAME_OFFSET) {
128 		struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
130 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131 			return -EIO;
132 		if (reclen > nbytes)
133 			break;
134 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135 			return -EIO;
136 
137 		if (!fuse_emit(file, ctx, dirent))
138 			break;
139 
140 		buf += reclen;
141 		nbytes -= reclen;
142 		ctx->pos = dirent->off;
143 	}
144 
145 	return 0;
146 }
147 
148 static int fuse_direntplus_link(struct file *file,
149 				struct fuse_direntplus *direntplus,
150 				u64 attr_version)
151 {
152 	struct fuse_entry_out *o = &direntplus->entry_out;
153 	struct fuse_dirent *dirent = &direntplus->dirent;
154 	struct dentry *parent = file->f_path.dentry;
155 	struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156 	struct dentry *dentry;
157 	struct dentry *alias;
158 	struct inode *dir = d_inode(parent);
159 	struct fuse_conn *fc;
160 	struct inode *inode;
161 	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162 
163 	if (!o->nodeid) {
164 		/*
165 		 * Unlike in the case of fuse_lookup, zero nodeid does not mean
166 		 * ENOENT. Instead, it only means the userspace filesystem did
167 		 * not want to return attributes/handle for this entry.
168 		 *
169 		 * So do nothing.
170 		 */
171 		return 0;
172 	}
173 
174 	if (name.name[0] == '.') {
175 		/*
176 		 * We could potentially refresh the attributes of the directory
177 		 * and its parent?
178 		 */
179 		if (name.len == 1)
180 			return 0;
181 		if (name.name[1] == '.' && name.len == 2)
182 			return 0;
183 	}
184 
185 	if (invalid_nodeid(o->nodeid))
186 		return -EIO;
187 	if (!fuse_valid_type(o->attr.mode))
188 		return -EIO;
189 
190 	fc = get_fuse_conn(dir);
191 
192 	name.hash = full_name_hash(parent, name.name, name.len);
193 	dentry = d_lookup(parent, &name);
194 	if (!dentry) {
195 retry:
196 		dentry = d_alloc_parallel(parent, &name, &wq);
197 		if (IS_ERR(dentry))
198 			return PTR_ERR(dentry);
199 	}
200 	if (!d_in_lookup(dentry)) {
201 		struct fuse_inode *fi;
202 		inode = d_inode(dentry);
203 		if (!inode ||
204 		    get_node_id(inode) != o->nodeid ||
205 		    ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206 			d_invalidate(dentry);
207 			dput(dentry);
208 			goto retry;
209 		}
210 		if (is_bad_inode(inode)) {
211 			dput(dentry);
212 			return -EIO;
213 		}
214 
215 		fi = get_fuse_inode(inode);
216 		spin_lock(&fi->lock);
217 		fi->nlookup++;
218 		spin_unlock(&fi->lock);
219 
220 		forget_all_cached_acls(inode);
221 		fuse_change_attributes(inode, &o->attr,
222 				       entry_attr_timeout(o),
223 				       attr_version);
224 		/*
225 		 * The other branch comes via fuse_iget()
226 		 * which bumps nlookup inside
227 		 */
228 	} else {
229 		inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230 				  &o->attr, entry_attr_timeout(o),
231 				  attr_version);
232 		if (!inode)
233 			inode = ERR_PTR(-ENOMEM);
234 
235 		alias = d_splice_alias(inode, dentry);
236 		d_lookup_done(dentry);
237 		if (alias) {
238 			dput(dentry);
239 			dentry = alias;
240 		}
241 		if (IS_ERR(dentry))
242 			return PTR_ERR(dentry);
243 	}
244 	if (fc->readdirplus_auto)
245 		set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
246 	fuse_change_entry_timeout(dentry, o);
247 
248 	dput(dentry);
249 	return 0;
250 }
251 
252 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
253 			     struct dir_context *ctx, u64 attr_version)
254 {
255 	struct fuse_direntplus *direntplus;
256 	struct fuse_dirent *dirent;
257 	size_t reclen;
258 	int over = 0;
259 	int ret;
260 
261 	while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
262 		direntplus = (struct fuse_direntplus *) buf;
263 		dirent = &direntplus->dirent;
264 		reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
265 
266 		if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
267 			return -EIO;
268 		if (reclen > nbytes)
269 			break;
270 		if (memchr(dirent->name, '/', dirent->namelen) != NULL)
271 			return -EIO;
272 
273 		if (!over) {
274 			/* We fill entries into dstbuf only as much as
275 			   it can hold. But we still continue iterating
276 			   over remaining entries to link them. If not,
277 			   we need to send a FORGET for each of those
278 			   which we did not link.
279 			*/
280 			over = !fuse_emit(file, ctx, dirent);
281 			if (!over)
282 				ctx->pos = dirent->off;
283 		}
284 
285 		buf += reclen;
286 		nbytes -= reclen;
287 
288 		ret = fuse_direntplus_link(file, direntplus, attr_version);
289 		if (ret)
290 			fuse_force_forget(file, direntplus->entry_out.nodeid);
291 	}
292 
293 	return 0;
294 }
295 
296 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
297 {
298 	int plus, err;
299 	size_t nbytes;
300 	struct page *page;
301 	struct inode *inode = file_inode(file);
302 	struct fuse_conn *fc = get_fuse_conn(inode);
303 	struct fuse_req *req;
304 	u64 attr_version = 0;
305 	bool locked;
306 
307 	req = fuse_get_req(fc, 1);
308 	if (IS_ERR(req))
309 		return PTR_ERR(req);
310 
311 	page = alloc_page(GFP_KERNEL);
312 	if (!page) {
313 		fuse_put_request(fc, req);
314 		return -ENOMEM;
315 	}
316 
317 	plus = fuse_use_readdirplus(inode, ctx);
318 	req->out.argpages = 1;
319 	req->num_pages = 1;
320 	req->pages[0] = page;
321 	req->page_descs[0].length = PAGE_SIZE;
322 	if (plus) {
323 		attr_version = fuse_get_attr_version(fc);
324 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
325 			       FUSE_READDIRPLUS);
326 	} else {
327 		fuse_read_fill(req, file, ctx->pos, PAGE_SIZE,
328 			       FUSE_READDIR);
329 	}
330 	locked = fuse_lock_inode(inode);
331 	fuse_request_send(fc, req);
332 	fuse_unlock_inode(inode, locked);
333 	nbytes = req->out.args[0].size;
334 	err = req->out.h.error;
335 	fuse_put_request(fc, req);
336 	if (!err) {
337 		if (!nbytes) {
338 			struct fuse_file *ff = file->private_data;
339 
340 			if (ff->open_flags & FOPEN_CACHE_DIR)
341 				fuse_readdir_cache_end(file, ctx->pos);
342 		} else if (plus) {
343 			err = parse_dirplusfile(page_address(page), nbytes,
344 						file, ctx, attr_version);
345 		} else {
346 			err = parse_dirfile(page_address(page), nbytes, file,
347 					    ctx);
348 		}
349 	}
350 
351 	__free_page(page);
352 	fuse_invalidate_atime(inode);
353 	return err;
354 }
355 
356 enum fuse_parse_result {
357 	FOUND_ERR = -1,
358 	FOUND_NONE = 0,
359 	FOUND_SOME,
360 	FOUND_ALL,
361 };
362 
363 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
364 					       void *addr, unsigned int size,
365 					       struct dir_context *ctx)
366 {
367 	unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
368 	enum fuse_parse_result res = FOUND_NONE;
369 
370 	WARN_ON(offset >= size);
371 
372 	for (;;) {
373 		struct fuse_dirent *dirent = addr + offset;
374 		unsigned int nbytes = size - offset;
375 		size_t reclen = FUSE_DIRENT_SIZE(dirent);
376 
377 		if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
378 			break;
379 
380 		if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
381 			return FOUND_ERR;
382 		if (WARN_ON(reclen > nbytes))
383 			return FOUND_ERR;
384 		if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
385 			return FOUND_ERR;
386 
387 		if (ff->readdir.pos == ctx->pos) {
388 			res = FOUND_SOME;
389 			if (!dir_emit(ctx, dirent->name, dirent->namelen,
390 				      dirent->ino, dirent->type))
391 				return FOUND_ALL;
392 			ctx->pos = dirent->off;
393 		}
394 		ff->readdir.pos = dirent->off;
395 		ff->readdir.cache_off += reclen;
396 
397 		offset += reclen;
398 	}
399 
400 	return res;
401 }
402 
403 static void fuse_rdc_reset(struct inode *inode)
404 {
405 	struct fuse_inode *fi = get_fuse_inode(inode);
406 
407 	fi->rdc.cached = false;
408 	fi->rdc.version++;
409 	fi->rdc.size = 0;
410 	fi->rdc.pos = 0;
411 }
412 
413 #define UNCACHED 1
414 
415 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
416 {
417 	struct fuse_file *ff = file->private_data;
418 	struct inode *inode = file_inode(file);
419 	struct fuse_conn *fc = get_fuse_conn(inode);
420 	struct fuse_inode *fi = get_fuse_inode(inode);
421 	enum fuse_parse_result res;
422 	pgoff_t index;
423 	unsigned int size;
424 	struct page *page;
425 	void *addr;
426 
427 	/* Seeked?  If so, reset the cache stream */
428 	if (ff->readdir.pos != ctx->pos) {
429 		ff->readdir.pos = 0;
430 		ff->readdir.cache_off = 0;
431 	}
432 
433 	/*
434 	 * We're just about to start reading into the cache or reading the
435 	 * cache; both cases require an up-to-date mtime value.
436 	 */
437 	if (!ctx->pos && fc->auto_inval_data) {
438 		int err = fuse_update_attributes(inode, file);
439 
440 		if (err)
441 			return err;
442 	}
443 
444 retry:
445 	spin_lock(&fi->rdc.lock);
446 retry_locked:
447 	if (!fi->rdc.cached) {
448 		/* Starting cache? Set cache mtime. */
449 		if (!ctx->pos && !fi->rdc.size) {
450 			fi->rdc.mtime = inode->i_mtime;
451 			fi->rdc.iversion = inode_query_iversion(inode);
452 		}
453 		spin_unlock(&fi->rdc.lock);
454 		return UNCACHED;
455 	}
456 	/*
457 	 * When at the beginning of the directory (i.e. just after opendir(3) or
458 	 * rewinddir(3)), then need to check whether directory contents have
459 	 * changed, and reset the cache if so.
460 	 */
461 	if (!ctx->pos) {
462 		if (inode_peek_iversion(inode) != fi->rdc.iversion ||
463 		    !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
464 			fuse_rdc_reset(inode);
465 			goto retry_locked;
466 		}
467 	}
468 
469 	/*
470 	 * If cache version changed since the last getdents() call, then reset
471 	 * the cache stream.
472 	 */
473 	if (ff->readdir.version != fi->rdc.version) {
474 		ff->readdir.pos = 0;
475 		ff->readdir.cache_off = 0;
476 	}
477 	/*
478 	 * If at the beginning of the cache, than reset version to
479 	 * current.
480 	 */
481 	if (ff->readdir.pos == 0)
482 		ff->readdir.version = fi->rdc.version;
483 
484 	WARN_ON(fi->rdc.size < ff->readdir.cache_off);
485 
486 	index = ff->readdir.cache_off >> PAGE_SHIFT;
487 
488 	if (index == (fi->rdc.size >> PAGE_SHIFT))
489 		size = fi->rdc.size & ~PAGE_MASK;
490 	else
491 		size = PAGE_SIZE;
492 	spin_unlock(&fi->rdc.lock);
493 
494 	/* EOF? */
495 	if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
496 		return 0;
497 
498 	page = find_get_page_flags(file->f_mapping, index,
499 				   FGP_ACCESSED | FGP_LOCK);
500 	spin_lock(&fi->rdc.lock);
501 	if (!page) {
502 		/*
503 		 * Uh-oh: page gone missing, cache is useless
504 		 */
505 		if (fi->rdc.version == ff->readdir.version)
506 			fuse_rdc_reset(inode);
507 		goto retry_locked;
508 	}
509 
510 	/* Make sure it's still the same version after getting the page. */
511 	if (ff->readdir.version != fi->rdc.version) {
512 		spin_unlock(&fi->rdc.lock);
513 		unlock_page(page);
514 		put_page(page);
515 		goto retry;
516 	}
517 	spin_unlock(&fi->rdc.lock);
518 
519 	/*
520 	 * Contents of the page are now protected against changing by holding
521 	 * the page lock.
522 	 */
523 	addr = kmap(page);
524 	res = fuse_parse_cache(ff, addr, size, ctx);
525 	kunmap(page);
526 	unlock_page(page);
527 	put_page(page);
528 
529 	if (res == FOUND_ERR)
530 		return -EIO;
531 
532 	if (res == FOUND_ALL)
533 		return 0;
534 
535 	if (size == PAGE_SIZE) {
536 		/* We hit end of page: skip to next page. */
537 		ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
538 		goto retry;
539 	}
540 
541 	/*
542 	 * End of cache reached.  If found position, then we are done, otherwise
543 	 * need to fall back to uncached, since the position we were looking for
544 	 * wasn't in the cache.
545 	 */
546 	return res == FOUND_SOME ? 0 : UNCACHED;
547 }
548 
549 int fuse_readdir(struct file *file, struct dir_context *ctx)
550 {
551 	struct fuse_file *ff = file->private_data;
552 	struct inode *inode = file_inode(file);
553 	int err;
554 
555 	if (is_bad_inode(inode))
556 		return -EIO;
557 
558 	mutex_lock(&ff->readdir.lock);
559 
560 	err = UNCACHED;
561 	if (ff->open_flags & FOPEN_CACHE_DIR)
562 		err = fuse_readdir_cached(file, ctx);
563 	if (err == UNCACHED)
564 		err = fuse_readdir_uncached(file, ctx);
565 
566 	mutex_unlock(&ff->readdir.lock);
567 
568 	return err;
569 }
570