xref: /openbmc/linux/fs/erofs/fscache.c (revision 7a836736b6537b0e2633381d743d9c1559ce243c)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022, Alibaba Cloud
4  * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5  */
6 #include <linux/pseudo_fs.h>
7 #include <linux/fscache.h>
8 #include "internal.h"
9 
10 static DEFINE_MUTEX(erofs_domain_list_lock);
11 static DEFINE_MUTEX(erofs_domain_cookies_lock);
12 static LIST_HEAD(erofs_domain_list);
13 static LIST_HEAD(erofs_domain_cookies_list);
14 static struct vfsmount *erofs_pseudo_mnt;
15 
16 static int erofs_anon_init_fs_context(struct fs_context *fc)
17 {
18 	return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
19 }
20 
21 static struct file_system_type erofs_anon_fs_type = {
22 	.owner		= THIS_MODULE,
23 	.name           = "pseudo_erofs",
24 	.init_fs_context = erofs_anon_init_fs_context,
25 	.kill_sb        = kill_anon_super,
26 };
27 
28 struct erofs_fscache_request {
29 	struct erofs_fscache_request *primary;
30 	struct netfs_cache_resources cache_resources;
31 	struct address_space	*mapping;	/* The mapping being accessed */
32 	loff_t			start;		/* Start position */
33 	size_t			len;		/* Length of the request */
34 	size_t			submitted;	/* Length of submitted */
35 	short			error;		/* 0 or error that occurred */
36 	refcount_t		ref;
37 };
38 
39 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
40 					     loff_t start, size_t len)
41 {
42 	struct erofs_fscache_request *req;
43 
44 	req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
45 	if (!req)
46 		return ERR_PTR(-ENOMEM);
47 
48 	req->mapping = mapping;
49 	req->start   = start;
50 	req->len     = len;
51 	refcount_set(&req->ref, 1);
52 
53 	return req;
54 }
55 
56 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
57 					     size_t len)
58 {
59 	struct erofs_fscache_request *req;
60 
61 	/* use primary request for the first submission */
62 	if (!primary->submitted) {
63 		refcount_inc(&primary->ref);
64 		return primary;
65 	}
66 
67 	req = erofs_fscache_req_alloc(primary->mapping,
68 			primary->start + primary->submitted, len);
69 	if (!IS_ERR(req)) {
70 		req->primary = primary;
71 		refcount_inc(&primary->ref);
72 	}
73 	return req;
74 }
75 
76 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
77 {
78 	struct folio *folio;
79 	bool failed = req->error;
80 	pgoff_t start_page = req->start / PAGE_SIZE;
81 	pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
82 
83 	XA_STATE(xas, &req->mapping->i_pages, start_page);
84 
85 	rcu_read_lock();
86 	xas_for_each(&xas, folio, last_page) {
87 		if (xas_retry(&xas, folio))
88 			continue;
89 		if (!failed)
90 			folio_mark_uptodate(folio);
91 		folio_unlock(folio);
92 	}
93 	rcu_read_unlock();
94 }
95 
96 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
97 {
98 	if (refcount_dec_and_test(&req->ref)) {
99 		if (req->cache_resources.ops)
100 			req->cache_resources.ops->end_operation(&req->cache_resources);
101 		if (!req->primary)
102 			erofs_fscache_req_complete(req);
103 		else
104 			erofs_fscache_req_put(req->primary);
105 		kfree(req);
106 	}
107 }
108 
109 static void erofs_fscache_subreq_complete(void *priv,
110 		ssize_t transferred_or_error, bool was_async)
111 {
112 	struct erofs_fscache_request *req = priv;
113 
114 	if (IS_ERR_VALUE(transferred_or_error)) {
115 		if (req->primary)
116 			req->primary->error = transferred_or_error;
117 		else
118 			req->error = transferred_or_error;
119 	}
120 	erofs_fscache_req_put(req);
121 }
122 
123 /*
124  * Read data from fscache (cookie, pstart, len), and fill the read data into
125  * page cache described by (req->mapping, lstart, len). @pstart describeis the
126  * start physical address in the cache file.
127  */
128 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
129 		struct erofs_fscache_request *req, loff_t pstart, size_t len)
130 {
131 	enum netfs_io_source source;
132 	struct super_block *sb = req->mapping->host->i_sb;
133 	struct netfs_cache_resources *cres = &req->cache_resources;
134 	struct iov_iter iter;
135 	loff_t lstart = req->start + req->submitted;
136 	size_t done = 0;
137 	int ret;
138 
139 	DBG_BUGON(len > req->len - req->submitted);
140 
141 	ret = fscache_begin_read_operation(cres, cookie);
142 	if (ret)
143 		return ret;
144 
145 	while (done < len) {
146 		loff_t sstart = pstart + done;
147 		size_t slen = len - done;
148 		unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
149 
150 		source = cres->ops->prepare_ondemand_read(cres,
151 				sstart, &slen, LLONG_MAX, &flags, 0);
152 		if (WARN_ON(slen == 0))
153 			source = NETFS_INVALID_READ;
154 		if (source != NETFS_READ_FROM_CACHE) {
155 			erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
156 			return -EIO;
157 		}
158 
159 		refcount_inc(&req->ref);
160 		iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
161 				lstart + done, slen);
162 
163 		ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
164 				   erofs_fscache_subreq_complete, req);
165 		if (ret == -EIOCBQUEUED)
166 			ret = 0;
167 		if (ret) {
168 			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
169 			return ret;
170 		}
171 
172 		done += slen;
173 	}
174 	DBG_BUGON(done != len);
175 	return 0;
176 }
177 
178 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
179 {
180 	int ret;
181 	struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
182 	struct erofs_fscache_request *req;
183 
184 	req = erofs_fscache_req_alloc(folio_mapping(folio),
185 				folio_pos(folio), folio_size(folio));
186 	if (IS_ERR(req)) {
187 		folio_unlock(folio);
188 		return PTR_ERR(req);
189 	}
190 
191 	ret = erofs_fscache_read_folios_async(ctx->cookie, req,
192 				folio_pos(folio), folio_size(folio));
193 	if (ret)
194 		req->error = ret;
195 
196 	erofs_fscache_req_put(req);
197 	return ret;
198 }
199 
200 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
201 {
202 	struct address_space *mapping = primary->mapping;
203 	struct inode *inode = mapping->host;
204 	struct super_block *sb = inode->i_sb;
205 	struct erofs_fscache_request *req;
206 	struct erofs_map_blocks map;
207 	struct erofs_map_dev mdev;
208 	struct iov_iter iter;
209 	loff_t pos = primary->start + primary->submitted;
210 	size_t count;
211 	int ret;
212 
213 	map.m_la = pos;
214 	ret = erofs_map_blocks(inode, &map);
215 	if (ret)
216 		return ret;
217 
218 	if (map.m_flags & EROFS_MAP_META) {
219 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
220 		erofs_blk_t blknr;
221 		size_t offset, size;
222 		void *src;
223 
224 		/* For tail packing layout, the offset may be non-zero. */
225 		offset = erofs_blkoff(sb, map.m_pa);
226 		blknr = erofs_blknr(sb, map.m_pa);
227 		size = map.m_llen;
228 
229 		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
230 		if (IS_ERR(src))
231 			return PTR_ERR(src);
232 
233 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
234 		if (copy_to_iter(src + offset, size, &iter) != size) {
235 			erofs_put_metabuf(&buf);
236 			return -EFAULT;
237 		}
238 		iov_iter_zero(PAGE_SIZE - size, &iter);
239 		erofs_put_metabuf(&buf);
240 		primary->submitted += PAGE_SIZE;
241 		return 0;
242 	}
243 
244 	count = primary->len - primary->submitted;
245 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
246 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
247 		iov_iter_zero(count, &iter);
248 		primary->submitted += count;
249 		return 0;
250 	}
251 
252 	count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
253 	DBG_BUGON(!count || count % PAGE_SIZE);
254 
255 	mdev = (struct erofs_map_dev) {
256 		.m_deviceid = map.m_deviceid,
257 		.m_pa = map.m_pa,
258 	};
259 	ret = erofs_map_dev(sb, &mdev);
260 	if (ret)
261 		return ret;
262 
263 	req = erofs_fscache_req_chain(primary, count);
264 	if (IS_ERR(req))
265 		return PTR_ERR(req);
266 
267 	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
268 			req, mdev.m_pa + (pos - map.m_la), count);
269 	erofs_fscache_req_put(req);
270 	primary->submitted += count;
271 	return ret;
272 }
273 
274 static int erofs_fscache_data_read(struct erofs_fscache_request *req)
275 {
276 	int ret;
277 
278 	do {
279 		ret = erofs_fscache_data_read_slice(req);
280 		if (ret)
281 			req->error = ret;
282 	} while (!ret && req->submitted < req->len);
283 
284 	return ret;
285 }
286 
287 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
288 {
289 	struct erofs_fscache_request *req;
290 	int ret;
291 
292 	req = erofs_fscache_req_alloc(folio_mapping(folio),
293 			folio_pos(folio), folio_size(folio));
294 	if (IS_ERR(req)) {
295 		folio_unlock(folio);
296 		return PTR_ERR(req);
297 	}
298 
299 	ret = erofs_fscache_data_read(req);
300 	erofs_fscache_req_put(req);
301 	return ret;
302 }
303 
304 static void erofs_fscache_readahead(struct readahead_control *rac)
305 {
306 	struct erofs_fscache_request *req;
307 
308 	if (!readahead_count(rac))
309 		return;
310 
311 	req = erofs_fscache_req_alloc(rac->mapping,
312 			readahead_pos(rac), readahead_length(rac));
313 	if (IS_ERR(req))
314 		return;
315 
316 	/* The request completion will drop refs on the folios. */
317 	while (readahead_folio(rac))
318 		;
319 
320 	erofs_fscache_data_read(req);
321 	erofs_fscache_req_put(req);
322 }
323 
324 static const struct address_space_operations erofs_fscache_meta_aops = {
325 	.read_folio = erofs_fscache_meta_read_folio,
326 };
327 
328 const struct address_space_operations erofs_fscache_access_aops = {
329 	.read_folio = erofs_fscache_read_folio,
330 	.readahead = erofs_fscache_readahead,
331 };
332 
333 static void erofs_fscache_domain_put(struct erofs_domain *domain)
334 {
335 	mutex_lock(&erofs_domain_list_lock);
336 	if (refcount_dec_and_test(&domain->ref)) {
337 		list_del(&domain->list);
338 		if (list_empty(&erofs_domain_list)) {
339 			kern_unmount(erofs_pseudo_mnt);
340 			erofs_pseudo_mnt = NULL;
341 		}
342 		fscache_relinquish_volume(domain->volume, NULL, false);
343 		mutex_unlock(&erofs_domain_list_lock);
344 		kfree(domain->domain_id);
345 		kfree(domain);
346 		return;
347 	}
348 	mutex_unlock(&erofs_domain_list_lock);
349 }
350 
351 static int erofs_fscache_register_volume(struct super_block *sb)
352 {
353 	struct erofs_sb_info *sbi = EROFS_SB(sb);
354 	char *domain_id = sbi->domain_id;
355 	struct fscache_volume *volume;
356 	char *name;
357 	int ret = 0;
358 
359 	name = kasprintf(GFP_KERNEL, "erofs,%s",
360 			 domain_id ? domain_id : sbi->fsid);
361 	if (!name)
362 		return -ENOMEM;
363 
364 	volume = fscache_acquire_volume(name, NULL, NULL, 0);
365 	if (IS_ERR_OR_NULL(volume)) {
366 		erofs_err(sb, "failed to register volume for %s", name);
367 		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
368 		volume = NULL;
369 	}
370 
371 	sbi->volume = volume;
372 	kfree(name);
373 	return ret;
374 }
375 
376 static int erofs_fscache_init_domain(struct super_block *sb)
377 {
378 	int err;
379 	struct erofs_domain *domain;
380 	struct erofs_sb_info *sbi = EROFS_SB(sb);
381 
382 	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
383 	if (!domain)
384 		return -ENOMEM;
385 
386 	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
387 	if (!domain->domain_id) {
388 		kfree(domain);
389 		return -ENOMEM;
390 	}
391 
392 	err = erofs_fscache_register_volume(sb);
393 	if (err)
394 		goto out;
395 
396 	if (!erofs_pseudo_mnt) {
397 		struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
398 		if (IS_ERR(mnt)) {
399 			err = PTR_ERR(mnt);
400 			goto out;
401 		}
402 		erofs_pseudo_mnt = mnt;
403 	}
404 
405 	domain->volume = sbi->volume;
406 	refcount_set(&domain->ref, 1);
407 	list_add(&domain->list, &erofs_domain_list);
408 	sbi->domain = domain;
409 	return 0;
410 out:
411 	kfree(domain->domain_id);
412 	kfree(domain);
413 	return err;
414 }
415 
416 static int erofs_fscache_register_domain(struct super_block *sb)
417 {
418 	int err;
419 	struct erofs_domain *domain;
420 	struct erofs_sb_info *sbi = EROFS_SB(sb);
421 
422 	mutex_lock(&erofs_domain_list_lock);
423 	list_for_each_entry(domain, &erofs_domain_list, list) {
424 		if (!strcmp(domain->domain_id, sbi->domain_id)) {
425 			sbi->domain = domain;
426 			sbi->volume = domain->volume;
427 			refcount_inc(&domain->ref);
428 			mutex_unlock(&erofs_domain_list_lock);
429 			return 0;
430 		}
431 	}
432 	err = erofs_fscache_init_domain(sb);
433 	mutex_unlock(&erofs_domain_list_lock);
434 	return err;
435 }
436 
437 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
438 						char *name, unsigned int flags)
439 {
440 	struct fscache_volume *volume = EROFS_SB(sb)->volume;
441 	struct erofs_fscache *ctx;
442 	struct fscache_cookie *cookie;
443 	struct super_block *isb;
444 	struct inode *inode;
445 	int ret;
446 
447 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
448 	if (!ctx)
449 		return ERR_PTR(-ENOMEM);
450 	INIT_LIST_HEAD(&ctx->node);
451 	refcount_set(&ctx->ref, 1);
452 
453 	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
454 					name, strlen(name), NULL, 0, 0);
455 	if (!cookie) {
456 		erofs_err(sb, "failed to get cookie for %s", name);
457 		ret = -EINVAL;
458 		goto err;
459 	}
460 	fscache_use_cookie(cookie, false);
461 
462 	/*
463 	 * Allocate anonymous inode in global pseudo mount for shareable blobs,
464 	 * so that they are accessible among erofs fs instances.
465 	 */
466 	isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
467 	inode = new_inode(isb);
468 	if (!inode) {
469 		erofs_err(sb, "failed to get anon inode for %s", name);
470 		ret = -ENOMEM;
471 		goto err_cookie;
472 	}
473 
474 	inode->i_size = OFFSET_MAX;
475 	inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
476 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
477 	inode->i_blkbits = EROFS_SB(sb)->blkszbits;
478 	inode->i_private = ctx;
479 
480 	ctx->cookie = cookie;
481 	ctx->inode = inode;
482 	return ctx;
483 
484 err_cookie:
485 	fscache_unuse_cookie(cookie, NULL, NULL);
486 	fscache_relinquish_cookie(cookie, false);
487 err:
488 	kfree(ctx);
489 	return ERR_PTR(ret);
490 }
491 
492 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
493 {
494 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
495 	fscache_relinquish_cookie(ctx->cookie, false);
496 	iput(ctx->inode);
497 	kfree(ctx->name);
498 	kfree(ctx);
499 }
500 
501 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
502 						char *name, unsigned int flags)
503 {
504 	struct erofs_fscache *ctx;
505 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
506 
507 	ctx = erofs_fscache_acquire_cookie(sb, name, flags);
508 	if (IS_ERR(ctx))
509 		return ctx;
510 
511 	ctx->name = kstrdup(name, GFP_KERNEL);
512 	if (!ctx->name) {
513 		erofs_fscache_relinquish_cookie(ctx);
514 		return ERR_PTR(-ENOMEM);
515 	}
516 
517 	refcount_inc(&domain->ref);
518 	ctx->domain = domain;
519 	list_add(&ctx->node, &erofs_domain_cookies_list);
520 	return ctx;
521 }
522 
523 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
524 						char *name, unsigned int flags)
525 {
526 	struct erofs_fscache *ctx;
527 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
528 
529 	flags |= EROFS_REG_COOKIE_SHARE;
530 	mutex_lock(&erofs_domain_cookies_lock);
531 	list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
532 		if (ctx->domain != domain || strcmp(ctx->name, name))
533 			continue;
534 		if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
535 			refcount_inc(&ctx->ref);
536 		} else {
537 			erofs_err(sb, "%s already exists in domain %s", name,
538 				  domain->domain_id);
539 			ctx = ERR_PTR(-EEXIST);
540 		}
541 		mutex_unlock(&erofs_domain_cookies_lock);
542 		return ctx;
543 	}
544 	ctx = erofs_domain_init_cookie(sb, name, flags);
545 	mutex_unlock(&erofs_domain_cookies_lock);
546 	return ctx;
547 }
548 
549 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
550 						    char *name,
551 						    unsigned int flags)
552 {
553 	if (EROFS_SB(sb)->domain_id)
554 		return erofs_domain_register_cookie(sb, name, flags);
555 	return erofs_fscache_acquire_cookie(sb, name, flags);
556 }
557 
558 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
559 {
560 	struct erofs_domain *domain = NULL;
561 
562 	if (!ctx)
563 		return;
564 	if (!ctx->domain)
565 		return erofs_fscache_relinquish_cookie(ctx);
566 
567 	mutex_lock(&erofs_domain_cookies_lock);
568 	if (refcount_dec_and_test(&ctx->ref)) {
569 		domain = ctx->domain;
570 		list_del(&ctx->node);
571 		erofs_fscache_relinquish_cookie(ctx);
572 	}
573 	mutex_unlock(&erofs_domain_cookies_lock);
574 	if (domain)
575 		erofs_fscache_domain_put(domain);
576 }
577 
578 int erofs_fscache_register_fs(struct super_block *sb)
579 {
580 	int ret;
581 	struct erofs_sb_info *sbi = EROFS_SB(sb);
582 	struct erofs_fscache *fscache;
583 	unsigned int flags = 0;
584 
585 	if (sbi->domain_id)
586 		ret = erofs_fscache_register_domain(sb);
587 	else
588 		ret = erofs_fscache_register_volume(sb);
589 	if (ret)
590 		return ret;
591 
592 	/*
593 	 * When shared domain is enabled, using NEED_NOEXIST to guarantee
594 	 * the primary data blob (aka fsid) is unique in the shared domain.
595 	 *
596 	 * For non-shared-domain case, fscache_acquire_volume() invoked by
597 	 * erofs_fscache_register_volume() has already guaranteed
598 	 * the uniqueness of primary data blob.
599 	 *
600 	 * Acquired domain/volume will be relinquished in kill_sb() on error.
601 	 */
602 	if (sbi->domain_id)
603 		flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
604 	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
605 	if (IS_ERR(fscache))
606 		return PTR_ERR(fscache);
607 
608 	sbi->s_fscache = fscache;
609 	return 0;
610 }
611 
612 void erofs_fscache_unregister_fs(struct super_block *sb)
613 {
614 	struct erofs_sb_info *sbi = EROFS_SB(sb);
615 
616 	erofs_fscache_unregister_cookie(sbi->s_fscache);
617 
618 	if (sbi->domain)
619 		erofs_fscache_domain_put(sbi->domain);
620 	else
621 		fscache_relinquish_volume(sbi->volume, NULL, false);
622 
623 	sbi->s_fscache = NULL;
624 	sbi->volume = NULL;
625 	sbi->domain = NULL;
626 }
627