xref: /openbmc/linux/fs/erofs/fscache.c (revision 92d33063)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022, Alibaba Cloud
4  * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5  */
6 #include <linux/fscache.h>
7 #include "internal.h"
8 
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static struct vfsmount *erofs_pseudo_mnt;
13 
14 struct erofs_fscache_request {
15 	struct erofs_fscache_request *primary;
16 	struct netfs_cache_resources cache_resources;
17 	struct address_space	*mapping;	/* The mapping being accessed */
18 	loff_t			start;		/* Start position */
19 	size_t			len;		/* Length of the request */
20 	size_t			submitted;	/* Length of submitted */
21 	short			error;		/* 0 or error that occurred */
22 	refcount_t		ref;
23 };
24 
25 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
26 					     loff_t start, size_t len)
27 {
28 	struct erofs_fscache_request *req;
29 
30 	req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
31 	if (!req)
32 		return ERR_PTR(-ENOMEM);
33 
34 	req->mapping = mapping;
35 	req->start   = start;
36 	req->len     = len;
37 	refcount_set(&req->ref, 1);
38 
39 	return req;
40 }
41 
42 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
43 					     size_t len)
44 {
45 	struct erofs_fscache_request *req;
46 
47 	/* use primary request for the first submission */
48 	if (!primary->submitted) {
49 		refcount_inc(&primary->ref);
50 		return primary;
51 	}
52 
53 	req = erofs_fscache_req_alloc(primary->mapping,
54 			primary->start + primary->submitted, len);
55 	if (!IS_ERR(req)) {
56 		req->primary = primary;
57 		refcount_inc(&primary->ref);
58 	}
59 	return req;
60 }
61 
62 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
63 {
64 	struct folio *folio;
65 	bool failed = req->error;
66 	pgoff_t start_page = req->start / PAGE_SIZE;
67 	pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
68 
69 	XA_STATE(xas, &req->mapping->i_pages, start_page);
70 
71 	rcu_read_lock();
72 	xas_for_each(&xas, folio, last_page) {
73 		if (xas_retry(&xas, folio))
74 			continue;
75 		if (!failed)
76 			folio_mark_uptodate(folio);
77 		folio_unlock(folio);
78 	}
79 	rcu_read_unlock();
80 }
81 
82 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
83 {
84 	if (refcount_dec_and_test(&req->ref)) {
85 		if (req->cache_resources.ops)
86 			req->cache_resources.ops->end_operation(&req->cache_resources);
87 		if (!req->primary)
88 			erofs_fscache_req_complete(req);
89 		else
90 			erofs_fscache_req_put(req->primary);
91 		kfree(req);
92 	}
93 }
94 
95 static void erofs_fscache_subreq_complete(void *priv,
96 		ssize_t transferred_or_error, bool was_async)
97 {
98 	struct erofs_fscache_request *req = priv;
99 
100 	if (IS_ERR_VALUE(transferred_or_error)) {
101 		if (req->primary)
102 			req->primary->error = transferred_or_error;
103 		else
104 			req->error = transferred_or_error;
105 	}
106 	erofs_fscache_req_put(req);
107 }
108 
109 /*
110  * Read data from fscache (cookie, pstart, len), and fill the read data into
111  * page cache described by (req->mapping, lstart, len). @pstart describeis the
112  * start physical address in the cache file.
113  */
114 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
115 		struct erofs_fscache_request *req, loff_t pstart, size_t len)
116 {
117 	enum netfs_io_source source;
118 	struct super_block *sb = req->mapping->host->i_sb;
119 	struct netfs_cache_resources *cres = &req->cache_resources;
120 	struct iov_iter iter;
121 	loff_t lstart = req->start + req->submitted;
122 	size_t done = 0;
123 	int ret;
124 
125 	DBG_BUGON(len > req->len - req->submitted);
126 
127 	ret = fscache_begin_read_operation(cres, cookie);
128 	if (ret)
129 		return ret;
130 
131 	while (done < len) {
132 		loff_t sstart = pstart + done;
133 		size_t slen = len - done;
134 		unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
135 
136 		source = cres->ops->prepare_ondemand_read(cres,
137 				sstart, &slen, LLONG_MAX, &flags, 0);
138 		if (WARN_ON(slen == 0))
139 			source = NETFS_INVALID_READ;
140 		if (source != NETFS_READ_FROM_CACHE) {
141 			erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
142 			return -EIO;
143 		}
144 
145 		refcount_inc(&req->ref);
146 		iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
147 				lstart + done, slen);
148 
149 		ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
150 				   erofs_fscache_subreq_complete, req);
151 		if (ret == -EIOCBQUEUED)
152 			ret = 0;
153 		if (ret) {
154 			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
155 			return ret;
156 		}
157 
158 		done += slen;
159 	}
160 	DBG_BUGON(done != len);
161 	return 0;
162 }
163 
164 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
165 {
166 	int ret;
167 	struct super_block *sb = folio_mapping(folio)->host->i_sb;
168 	struct erofs_fscache_request *req;
169 	struct erofs_map_dev mdev = {
170 		.m_deviceid = 0,
171 		.m_pa = folio_pos(folio),
172 	};
173 
174 	ret = erofs_map_dev(sb, &mdev);
175 	if (ret) {
176 		folio_unlock(folio);
177 		return ret;
178 	}
179 
180 	req = erofs_fscache_req_alloc(folio_mapping(folio),
181 				folio_pos(folio), folio_size(folio));
182 	if (IS_ERR(req)) {
183 		folio_unlock(folio);
184 		return PTR_ERR(req);
185 	}
186 
187 	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
188 				req, mdev.m_pa, folio_size(folio));
189 	if (ret)
190 		req->error = ret;
191 
192 	erofs_fscache_req_put(req);
193 	return ret;
194 }
195 
196 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
197 {
198 	struct address_space *mapping = primary->mapping;
199 	struct inode *inode = mapping->host;
200 	struct super_block *sb = inode->i_sb;
201 	struct erofs_fscache_request *req;
202 	struct erofs_map_blocks map;
203 	struct erofs_map_dev mdev;
204 	struct iov_iter iter;
205 	loff_t pos = primary->start + primary->submitted;
206 	size_t count;
207 	int ret;
208 
209 	map.m_la = pos;
210 	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
211 	if (ret)
212 		return ret;
213 
214 	if (map.m_flags & EROFS_MAP_META) {
215 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
216 		erofs_blk_t blknr;
217 		size_t offset, size;
218 		void *src;
219 
220 		/* For tail packing layout, the offset may be non-zero. */
221 		offset = erofs_blkoff(map.m_pa);
222 		blknr = erofs_blknr(map.m_pa);
223 		size = map.m_llen;
224 
225 		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
226 		if (IS_ERR(src))
227 			return PTR_ERR(src);
228 
229 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
230 		if (copy_to_iter(src + offset, size, &iter) != size) {
231 			erofs_put_metabuf(&buf);
232 			return -EFAULT;
233 		}
234 		iov_iter_zero(PAGE_SIZE - size, &iter);
235 		erofs_put_metabuf(&buf);
236 		primary->submitted += PAGE_SIZE;
237 		return 0;
238 	}
239 
240 	count = primary->len - primary->submitted;
241 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
242 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
243 		iov_iter_zero(count, &iter);
244 		primary->submitted += count;
245 		return 0;
246 	}
247 
248 	count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
249 	DBG_BUGON(!count || count % PAGE_SIZE);
250 
251 	mdev = (struct erofs_map_dev) {
252 		.m_deviceid = map.m_deviceid,
253 		.m_pa = map.m_pa,
254 	};
255 	ret = erofs_map_dev(sb, &mdev);
256 	if (ret)
257 		return ret;
258 
259 	req = erofs_fscache_req_chain(primary, count);
260 	if (IS_ERR(req))
261 		return PTR_ERR(req);
262 
263 	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
264 			req, mdev.m_pa + (pos - map.m_la), count);
265 	erofs_fscache_req_put(req);
266 	primary->submitted += count;
267 	return ret;
268 }
269 
270 static int erofs_fscache_data_read(struct erofs_fscache_request *req)
271 {
272 	int ret;
273 
274 	do {
275 		ret = erofs_fscache_data_read_slice(req);
276 		if (ret)
277 			req->error = ret;
278 	} while (!ret && req->submitted < req->len);
279 
280 	return ret;
281 }
282 
283 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
284 {
285 	struct erofs_fscache_request *req;
286 	int ret;
287 
288 	req = erofs_fscache_req_alloc(folio_mapping(folio),
289 			folio_pos(folio), folio_size(folio));
290 	if (IS_ERR(req)) {
291 		folio_unlock(folio);
292 		return PTR_ERR(req);
293 	}
294 
295 	ret = erofs_fscache_data_read(req);
296 	erofs_fscache_req_put(req);
297 	return ret;
298 }
299 
300 static void erofs_fscache_readahead(struct readahead_control *rac)
301 {
302 	struct erofs_fscache_request *req;
303 
304 	if (!readahead_count(rac))
305 		return;
306 
307 	req = erofs_fscache_req_alloc(rac->mapping,
308 			readahead_pos(rac), readahead_length(rac));
309 	if (IS_ERR(req))
310 		return;
311 
312 	/* The request completion will drop refs on the folios. */
313 	while (readahead_folio(rac))
314 		;
315 
316 	erofs_fscache_data_read(req);
317 	erofs_fscache_req_put(req);
318 }
319 
320 static const struct address_space_operations erofs_fscache_meta_aops = {
321 	.read_folio = erofs_fscache_meta_read_folio,
322 };
323 
324 const struct address_space_operations erofs_fscache_access_aops = {
325 	.read_folio = erofs_fscache_read_folio,
326 	.readahead = erofs_fscache_readahead,
327 };
328 
329 static void erofs_fscache_domain_put(struct erofs_domain *domain)
330 {
331 	if (!domain)
332 		return;
333 	mutex_lock(&erofs_domain_list_lock);
334 	if (refcount_dec_and_test(&domain->ref)) {
335 		list_del(&domain->list);
336 		if (list_empty(&erofs_domain_list)) {
337 			kern_unmount(erofs_pseudo_mnt);
338 			erofs_pseudo_mnt = NULL;
339 		}
340 		mutex_unlock(&erofs_domain_list_lock);
341 		fscache_relinquish_volume(domain->volume, NULL, false);
342 		kfree(domain->domain_id);
343 		kfree(domain);
344 		return;
345 	}
346 	mutex_unlock(&erofs_domain_list_lock);
347 }
348 
349 static int erofs_fscache_register_volume(struct super_block *sb)
350 {
351 	struct erofs_sb_info *sbi = EROFS_SB(sb);
352 	char *domain_id = sbi->domain_id;
353 	struct fscache_volume *volume;
354 	char *name;
355 	int ret = 0;
356 
357 	name = kasprintf(GFP_KERNEL, "erofs,%s",
358 			 domain_id ? domain_id : sbi->fsid);
359 	if (!name)
360 		return -ENOMEM;
361 
362 	volume = fscache_acquire_volume(name, NULL, NULL, 0);
363 	if (IS_ERR_OR_NULL(volume)) {
364 		erofs_err(sb, "failed to register volume for %s", name);
365 		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
366 		volume = NULL;
367 	}
368 
369 	sbi->volume = volume;
370 	kfree(name);
371 	return ret;
372 }
373 
374 static int erofs_fscache_init_domain(struct super_block *sb)
375 {
376 	int err;
377 	struct erofs_domain *domain;
378 	struct erofs_sb_info *sbi = EROFS_SB(sb);
379 
380 	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
381 	if (!domain)
382 		return -ENOMEM;
383 
384 	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
385 	if (!domain->domain_id) {
386 		kfree(domain);
387 		return -ENOMEM;
388 	}
389 
390 	err = erofs_fscache_register_volume(sb);
391 	if (err)
392 		goto out;
393 
394 	if (!erofs_pseudo_mnt) {
395 		erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
396 		if (IS_ERR(erofs_pseudo_mnt)) {
397 			err = PTR_ERR(erofs_pseudo_mnt);
398 			goto out;
399 		}
400 	}
401 
402 	domain->volume = sbi->volume;
403 	refcount_set(&domain->ref, 1);
404 	list_add(&domain->list, &erofs_domain_list);
405 	sbi->domain = domain;
406 	return 0;
407 out:
408 	kfree(domain->domain_id);
409 	kfree(domain);
410 	return err;
411 }
412 
413 static int erofs_fscache_register_domain(struct super_block *sb)
414 {
415 	int err;
416 	struct erofs_domain *domain;
417 	struct erofs_sb_info *sbi = EROFS_SB(sb);
418 
419 	mutex_lock(&erofs_domain_list_lock);
420 	list_for_each_entry(domain, &erofs_domain_list, list) {
421 		if (!strcmp(domain->domain_id, sbi->domain_id)) {
422 			sbi->domain = domain;
423 			sbi->volume = domain->volume;
424 			refcount_inc(&domain->ref);
425 			mutex_unlock(&erofs_domain_list_lock);
426 			return 0;
427 		}
428 	}
429 	err = erofs_fscache_init_domain(sb);
430 	mutex_unlock(&erofs_domain_list_lock);
431 	return err;
432 }
433 
434 static
435 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
436 						   char *name,
437 						   unsigned int flags)
438 {
439 	struct fscache_volume *volume = EROFS_SB(sb)->volume;
440 	struct erofs_fscache *ctx;
441 	struct fscache_cookie *cookie;
442 	int ret;
443 
444 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
445 	if (!ctx)
446 		return ERR_PTR(-ENOMEM);
447 
448 	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
449 					name, strlen(name), NULL, 0, 0);
450 	if (!cookie) {
451 		erofs_err(sb, "failed to get cookie for %s", name);
452 		ret = -EINVAL;
453 		goto err;
454 	}
455 
456 	fscache_use_cookie(cookie, false);
457 	ctx->cookie = cookie;
458 
459 	if (flags & EROFS_REG_COOKIE_NEED_INODE) {
460 		struct inode *const inode = new_inode(sb);
461 
462 		if (!inode) {
463 			erofs_err(sb, "failed to get anon inode for %s", name);
464 			ret = -ENOMEM;
465 			goto err_cookie;
466 		}
467 
468 		set_nlink(inode, 1);
469 		inode->i_size = OFFSET_MAX;
470 		inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
471 		mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
472 
473 		ctx->inode = inode;
474 	}
475 
476 	return ctx;
477 
478 err_cookie:
479 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
480 	fscache_relinquish_cookie(ctx->cookie, false);
481 err:
482 	kfree(ctx);
483 	return ERR_PTR(ret);
484 }
485 
486 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
487 {
488 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
489 	fscache_relinquish_cookie(ctx->cookie, false);
490 	iput(ctx->inode);
491 	kfree(ctx->name);
492 	kfree(ctx);
493 }
494 
495 static
496 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
497 						       char *name,
498 						       unsigned int flags)
499 {
500 	int err;
501 	struct inode *inode;
502 	struct erofs_fscache *ctx;
503 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
504 
505 	ctx = erofs_fscache_acquire_cookie(sb, name, flags);
506 	if (IS_ERR(ctx))
507 		return ctx;
508 
509 	ctx->name = kstrdup(name, GFP_KERNEL);
510 	if (!ctx->name) {
511 		err = -ENOMEM;
512 		goto out;
513 	}
514 
515 	inode = new_inode(erofs_pseudo_mnt->mnt_sb);
516 	if (!inode) {
517 		err = -ENOMEM;
518 		goto out;
519 	}
520 
521 	ctx->domain = domain;
522 	ctx->anon_inode = inode;
523 	inode->i_private = ctx;
524 	refcount_inc(&domain->ref);
525 	return ctx;
526 out:
527 	erofs_fscache_relinquish_cookie(ctx);
528 	return ERR_PTR(err);
529 }
530 
531 static
532 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
533 						   char *name,
534 						   unsigned int flags)
535 {
536 	struct inode *inode;
537 	struct erofs_fscache *ctx;
538 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
539 	struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
540 
541 	mutex_lock(&erofs_domain_cookies_lock);
542 	spin_lock(&psb->s_inode_list_lock);
543 	list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
544 		ctx = inode->i_private;
545 		if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
546 			continue;
547 		if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
548 			igrab(inode);
549 		} else {
550 			erofs_err(sb, "%s already exists in domain %s", name,
551 				  domain->domain_id);
552 			ctx = ERR_PTR(-EEXIST);
553 		}
554 		spin_unlock(&psb->s_inode_list_lock);
555 		mutex_unlock(&erofs_domain_cookies_lock);
556 		return ctx;
557 	}
558 	spin_unlock(&psb->s_inode_list_lock);
559 	ctx = erofs_fscache_domain_init_cookie(sb, name, flags);
560 	mutex_unlock(&erofs_domain_cookies_lock);
561 	return ctx;
562 }
563 
564 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
565 						    char *name,
566 						    unsigned int flags)
567 {
568 	if (EROFS_SB(sb)->domain_id)
569 		return erofs_domain_register_cookie(sb, name, flags);
570 	return erofs_fscache_acquire_cookie(sb, name, flags);
571 }
572 
573 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
574 {
575 	bool drop;
576 	struct erofs_domain *domain;
577 
578 	if (!ctx)
579 		return;
580 	domain = ctx->domain;
581 	if (domain) {
582 		mutex_lock(&erofs_domain_cookies_lock);
583 		drop = atomic_read(&ctx->anon_inode->i_count) == 1;
584 		iput(ctx->anon_inode);
585 		mutex_unlock(&erofs_domain_cookies_lock);
586 		if (!drop)
587 			return;
588 	}
589 
590 	erofs_fscache_relinquish_cookie(ctx);
591 	erofs_fscache_domain_put(domain);
592 }
593 
594 int erofs_fscache_register_fs(struct super_block *sb)
595 {
596 	int ret;
597 	struct erofs_sb_info *sbi = EROFS_SB(sb);
598 	struct erofs_fscache *fscache;
599 	unsigned int flags;
600 
601 	if (sbi->domain_id)
602 		ret = erofs_fscache_register_domain(sb);
603 	else
604 		ret = erofs_fscache_register_volume(sb);
605 	if (ret)
606 		return ret;
607 
608 	/*
609 	 * When shared domain is enabled, using NEED_NOEXIST to guarantee
610 	 * the primary data blob (aka fsid) is unique in the shared domain.
611 	 *
612 	 * For non-shared-domain case, fscache_acquire_volume() invoked by
613 	 * erofs_fscache_register_volume() has already guaranteed
614 	 * the uniqueness of primary data blob.
615 	 *
616 	 * Acquired domain/volume will be relinquished in kill_sb() on error.
617 	 */
618 	flags = EROFS_REG_COOKIE_NEED_INODE;
619 	if (sbi->domain_id)
620 		flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
621 	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
622 	if (IS_ERR(fscache))
623 		return PTR_ERR(fscache);
624 
625 	sbi->s_fscache = fscache;
626 	return 0;
627 }
628 
629 void erofs_fscache_unregister_fs(struct super_block *sb)
630 {
631 	struct erofs_sb_info *sbi = EROFS_SB(sb);
632 
633 	erofs_fscache_unregister_cookie(sbi->s_fscache);
634 
635 	if (sbi->domain)
636 		erofs_fscache_domain_put(sbi->domain);
637 	else
638 		fscache_relinquish_volume(sbi->volume, NULL, false);
639 
640 	sbi->s_fscache = NULL;
641 	sbi->volume = NULL;
642 	sbi->domain = NULL;
643 }
644