xref: /openbmc/linux/fs/erofs/fscache.c (revision 51b27119)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022, Alibaba Cloud
4  * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5  */
6 #include <linux/fscache.h>
7 #include "internal.h"
8 
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static LIST_HEAD(erofs_domain_cookies_list);
13 static struct vfsmount *erofs_pseudo_mnt;
14 
15 struct erofs_fscache_request {
16 	struct erofs_fscache_request *primary;
17 	struct netfs_cache_resources cache_resources;
18 	struct address_space	*mapping;	/* The mapping being accessed */
19 	loff_t			start;		/* Start position */
20 	size_t			len;		/* Length of the request */
21 	size_t			submitted;	/* Length of submitted */
22 	short			error;		/* 0 or error that occurred */
23 	refcount_t		ref;
24 };
25 
26 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
27 					     loff_t start, size_t len)
28 {
29 	struct erofs_fscache_request *req;
30 
31 	req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
32 	if (!req)
33 		return ERR_PTR(-ENOMEM);
34 
35 	req->mapping = mapping;
36 	req->start   = start;
37 	req->len     = len;
38 	refcount_set(&req->ref, 1);
39 
40 	return req;
41 }
42 
43 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
44 					     size_t len)
45 {
46 	struct erofs_fscache_request *req;
47 
48 	/* use primary request for the first submission */
49 	if (!primary->submitted) {
50 		refcount_inc(&primary->ref);
51 		return primary;
52 	}
53 
54 	req = erofs_fscache_req_alloc(primary->mapping,
55 			primary->start + primary->submitted, len);
56 	if (!IS_ERR(req)) {
57 		req->primary = primary;
58 		refcount_inc(&primary->ref);
59 	}
60 	return req;
61 }
62 
63 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
64 {
65 	struct folio *folio;
66 	bool failed = req->error;
67 	pgoff_t start_page = req->start / PAGE_SIZE;
68 	pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
69 
70 	XA_STATE(xas, &req->mapping->i_pages, start_page);
71 
72 	rcu_read_lock();
73 	xas_for_each(&xas, folio, last_page) {
74 		if (xas_retry(&xas, folio))
75 			continue;
76 		if (!failed)
77 			folio_mark_uptodate(folio);
78 		folio_unlock(folio);
79 	}
80 	rcu_read_unlock();
81 }
82 
83 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
84 {
85 	if (refcount_dec_and_test(&req->ref)) {
86 		if (req->cache_resources.ops)
87 			req->cache_resources.ops->end_operation(&req->cache_resources);
88 		if (!req->primary)
89 			erofs_fscache_req_complete(req);
90 		else
91 			erofs_fscache_req_put(req->primary);
92 		kfree(req);
93 	}
94 }
95 
96 static void erofs_fscache_subreq_complete(void *priv,
97 		ssize_t transferred_or_error, bool was_async)
98 {
99 	struct erofs_fscache_request *req = priv;
100 
101 	if (IS_ERR_VALUE(transferred_or_error)) {
102 		if (req->primary)
103 			req->primary->error = transferred_or_error;
104 		else
105 			req->error = transferred_or_error;
106 	}
107 	erofs_fscache_req_put(req);
108 }
109 
110 /*
111  * Read data from fscache (cookie, pstart, len), and fill the read data into
112  * page cache described by (req->mapping, lstart, len). @pstart describeis the
113  * start physical address in the cache file.
114  */
115 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
116 		struct erofs_fscache_request *req, loff_t pstart, size_t len)
117 {
118 	enum netfs_io_source source;
119 	struct super_block *sb = req->mapping->host->i_sb;
120 	struct netfs_cache_resources *cres = &req->cache_resources;
121 	struct iov_iter iter;
122 	loff_t lstart = req->start + req->submitted;
123 	size_t done = 0;
124 	int ret;
125 
126 	DBG_BUGON(len > req->len - req->submitted);
127 
128 	ret = fscache_begin_read_operation(cres, cookie);
129 	if (ret)
130 		return ret;
131 
132 	while (done < len) {
133 		loff_t sstart = pstart + done;
134 		size_t slen = len - done;
135 		unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
136 
137 		source = cres->ops->prepare_ondemand_read(cres,
138 				sstart, &slen, LLONG_MAX, &flags, 0);
139 		if (WARN_ON(slen == 0))
140 			source = NETFS_INVALID_READ;
141 		if (source != NETFS_READ_FROM_CACHE) {
142 			erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
143 			return -EIO;
144 		}
145 
146 		refcount_inc(&req->ref);
147 		iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
148 				lstart + done, slen);
149 
150 		ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
151 				   erofs_fscache_subreq_complete, req);
152 		if (ret == -EIOCBQUEUED)
153 			ret = 0;
154 		if (ret) {
155 			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
156 			return ret;
157 		}
158 
159 		done += slen;
160 	}
161 	DBG_BUGON(done != len);
162 	return 0;
163 }
164 
165 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
166 {
167 	int ret;
168 	struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
169 	struct erofs_fscache_request *req;
170 
171 	req = erofs_fscache_req_alloc(folio_mapping(folio),
172 				folio_pos(folio), folio_size(folio));
173 	if (IS_ERR(req)) {
174 		folio_unlock(folio);
175 		return PTR_ERR(req);
176 	}
177 
178 	ret = erofs_fscache_read_folios_async(ctx->cookie, req,
179 				folio_pos(folio), folio_size(folio));
180 	if (ret)
181 		req->error = ret;
182 
183 	erofs_fscache_req_put(req);
184 	return ret;
185 }
186 
187 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
188 {
189 	struct address_space *mapping = primary->mapping;
190 	struct inode *inode = mapping->host;
191 	struct super_block *sb = inode->i_sb;
192 	struct erofs_fscache_request *req;
193 	struct erofs_map_blocks map;
194 	struct erofs_map_dev mdev;
195 	struct iov_iter iter;
196 	loff_t pos = primary->start + primary->submitted;
197 	size_t count;
198 	int ret;
199 
200 	map.m_la = pos;
201 	ret = erofs_map_blocks(inode, &map);
202 	if (ret)
203 		return ret;
204 
205 	if (map.m_flags & EROFS_MAP_META) {
206 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
207 		erofs_blk_t blknr;
208 		size_t offset, size;
209 		void *src;
210 
211 		/* For tail packing layout, the offset may be non-zero. */
212 		offset = erofs_blkoff(sb, map.m_pa);
213 		blknr = erofs_blknr(sb, map.m_pa);
214 		size = map.m_llen;
215 
216 		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
217 		if (IS_ERR(src))
218 			return PTR_ERR(src);
219 
220 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
221 		if (copy_to_iter(src + offset, size, &iter) != size) {
222 			erofs_put_metabuf(&buf);
223 			return -EFAULT;
224 		}
225 		iov_iter_zero(PAGE_SIZE - size, &iter);
226 		erofs_put_metabuf(&buf);
227 		primary->submitted += PAGE_SIZE;
228 		return 0;
229 	}
230 
231 	count = primary->len - primary->submitted;
232 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
233 		iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
234 		iov_iter_zero(count, &iter);
235 		primary->submitted += count;
236 		return 0;
237 	}
238 
239 	count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
240 	DBG_BUGON(!count || count % PAGE_SIZE);
241 
242 	mdev = (struct erofs_map_dev) {
243 		.m_deviceid = map.m_deviceid,
244 		.m_pa = map.m_pa,
245 	};
246 	ret = erofs_map_dev(sb, &mdev);
247 	if (ret)
248 		return ret;
249 
250 	req = erofs_fscache_req_chain(primary, count);
251 	if (IS_ERR(req))
252 		return PTR_ERR(req);
253 
254 	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
255 			req, mdev.m_pa + (pos - map.m_la), count);
256 	erofs_fscache_req_put(req);
257 	primary->submitted += count;
258 	return ret;
259 }
260 
261 static int erofs_fscache_data_read(struct erofs_fscache_request *req)
262 {
263 	int ret;
264 
265 	do {
266 		ret = erofs_fscache_data_read_slice(req);
267 		if (ret)
268 			req->error = ret;
269 	} while (!ret && req->submitted < req->len);
270 
271 	return ret;
272 }
273 
274 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
275 {
276 	struct erofs_fscache_request *req;
277 	int ret;
278 
279 	req = erofs_fscache_req_alloc(folio_mapping(folio),
280 			folio_pos(folio), folio_size(folio));
281 	if (IS_ERR(req)) {
282 		folio_unlock(folio);
283 		return PTR_ERR(req);
284 	}
285 
286 	ret = erofs_fscache_data_read(req);
287 	erofs_fscache_req_put(req);
288 	return ret;
289 }
290 
291 static void erofs_fscache_readahead(struct readahead_control *rac)
292 {
293 	struct erofs_fscache_request *req;
294 
295 	if (!readahead_count(rac))
296 		return;
297 
298 	req = erofs_fscache_req_alloc(rac->mapping,
299 			readahead_pos(rac), readahead_length(rac));
300 	if (IS_ERR(req))
301 		return;
302 
303 	/* The request completion will drop refs on the folios. */
304 	while (readahead_folio(rac))
305 		;
306 
307 	erofs_fscache_data_read(req);
308 	erofs_fscache_req_put(req);
309 }
310 
311 static const struct address_space_operations erofs_fscache_meta_aops = {
312 	.read_folio = erofs_fscache_meta_read_folio,
313 };
314 
315 const struct address_space_operations erofs_fscache_access_aops = {
316 	.read_folio = erofs_fscache_read_folio,
317 	.readahead = erofs_fscache_readahead,
318 };
319 
320 static void erofs_fscache_domain_put(struct erofs_domain *domain)
321 {
322 	mutex_lock(&erofs_domain_list_lock);
323 	if (refcount_dec_and_test(&domain->ref)) {
324 		list_del(&domain->list);
325 		if (list_empty(&erofs_domain_list)) {
326 			kern_unmount(erofs_pseudo_mnt);
327 			erofs_pseudo_mnt = NULL;
328 		}
329 		fscache_relinquish_volume(domain->volume, NULL, false);
330 		mutex_unlock(&erofs_domain_list_lock);
331 		kfree(domain->domain_id);
332 		kfree(domain);
333 		return;
334 	}
335 	mutex_unlock(&erofs_domain_list_lock);
336 }
337 
338 static int erofs_fscache_register_volume(struct super_block *sb)
339 {
340 	struct erofs_sb_info *sbi = EROFS_SB(sb);
341 	char *domain_id = sbi->domain_id;
342 	struct fscache_volume *volume;
343 	char *name;
344 	int ret = 0;
345 
346 	name = kasprintf(GFP_KERNEL, "erofs,%s",
347 			 domain_id ? domain_id : sbi->fsid);
348 	if (!name)
349 		return -ENOMEM;
350 
351 	volume = fscache_acquire_volume(name, NULL, NULL, 0);
352 	if (IS_ERR_OR_NULL(volume)) {
353 		erofs_err(sb, "failed to register volume for %s", name);
354 		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
355 		volume = NULL;
356 	}
357 
358 	sbi->volume = volume;
359 	kfree(name);
360 	return ret;
361 }
362 
363 static int erofs_fscache_init_domain(struct super_block *sb)
364 {
365 	int err;
366 	struct erofs_domain *domain;
367 	struct erofs_sb_info *sbi = EROFS_SB(sb);
368 
369 	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
370 	if (!domain)
371 		return -ENOMEM;
372 
373 	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
374 	if (!domain->domain_id) {
375 		kfree(domain);
376 		return -ENOMEM;
377 	}
378 
379 	err = erofs_fscache_register_volume(sb);
380 	if (err)
381 		goto out;
382 
383 	if (!erofs_pseudo_mnt) {
384 		struct vfsmount *mnt = kern_mount(&erofs_fs_type);
385 		if (IS_ERR(mnt)) {
386 			err = PTR_ERR(mnt);
387 			goto out;
388 		}
389 		erofs_pseudo_mnt = mnt;
390 	}
391 
392 	domain->volume = sbi->volume;
393 	refcount_set(&domain->ref, 1);
394 	list_add(&domain->list, &erofs_domain_list);
395 	sbi->domain = domain;
396 	return 0;
397 out:
398 	kfree(domain->domain_id);
399 	kfree(domain);
400 	return err;
401 }
402 
403 static int erofs_fscache_register_domain(struct super_block *sb)
404 {
405 	int err;
406 	struct erofs_domain *domain;
407 	struct erofs_sb_info *sbi = EROFS_SB(sb);
408 
409 	mutex_lock(&erofs_domain_list_lock);
410 	list_for_each_entry(domain, &erofs_domain_list, list) {
411 		if (!strcmp(domain->domain_id, sbi->domain_id)) {
412 			sbi->domain = domain;
413 			sbi->volume = domain->volume;
414 			refcount_inc(&domain->ref);
415 			mutex_unlock(&erofs_domain_list_lock);
416 			return 0;
417 		}
418 	}
419 	err = erofs_fscache_init_domain(sb);
420 	mutex_unlock(&erofs_domain_list_lock);
421 	return err;
422 }
423 
424 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
425 						char *name, unsigned int flags)
426 {
427 	struct fscache_volume *volume = EROFS_SB(sb)->volume;
428 	struct erofs_fscache *ctx;
429 	struct fscache_cookie *cookie;
430 	struct super_block *isb;
431 	struct inode *inode;
432 	int ret;
433 
434 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
435 	if (!ctx)
436 		return ERR_PTR(-ENOMEM);
437 	INIT_LIST_HEAD(&ctx->node);
438 	refcount_set(&ctx->ref, 1);
439 
440 	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
441 					name, strlen(name), NULL, 0, 0);
442 	if (!cookie) {
443 		erofs_err(sb, "failed to get cookie for %s", name);
444 		ret = -EINVAL;
445 		goto err;
446 	}
447 	fscache_use_cookie(cookie, false);
448 
449 	/*
450 	 * Allocate anonymous inode in global pseudo mount for shareable blobs,
451 	 * so that they are accessible among erofs fs instances.
452 	 */
453 	isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
454 	inode = new_inode(isb);
455 	if (!inode) {
456 		erofs_err(sb, "failed to get anon inode for %s", name);
457 		ret = -ENOMEM;
458 		goto err_cookie;
459 	}
460 
461 	inode->i_size = OFFSET_MAX;
462 	inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
463 	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
464 	inode->i_blkbits = EROFS_SB(sb)->blkszbits;
465 	inode->i_private = ctx;
466 
467 	ctx->cookie = cookie;
468 	ctx->inode = inode;
469 	return ctx;
470 
471 err_cookie:
472 	fscache_unuse_cookie(cookie, NULL, NULL);
473 	fscache_relinquish_cookie(cookie, false);
474 err:
475 	kfree(ctx);
476 	return ERR_PTR(ret);
477 }
478 
479 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
480 {
481 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
482 	fscache_relinquish_cookie(ctx->cookie, false);
483 	iput(ctx->inode);
484 	kfree(ctx->name);
485 	kfree(ctx);
486 }
487 
488 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
489 						char *name, unsigned int flags)
490 {
491 	struct erofs_fscache *ctx;
492 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
493 
494 	ctx = erofs_fscache_acquire_cookie(sb, name, flags);
495 	if (IS_ERR(ctx))
496 		return ctx;
497 
498 	ctx->name = kstrdup(name, GFP_KERNEL);
499 	if (!ctx->name) {
500 		erofs_fscache_relinquish_cookie(ctx);
501 		return ERR_PTR(-ENOMEM);
502 	}
503 
504 	refcount_inc(&domain->ref);
505 	ctx->domain = domain;
506 	list_add(&ctx->node, &erofs_domain_cookies_list);
507 	return ctx;
508 }
509 
510 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
511 						char *name, unsigned int flags)
512 {
513 	struct erofs_fscache *ctx;
514 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
515 
516 	flags |= EROFS_REG_COOKIE_SHARE;
517 	mutex_lock(&erofs_domain_cookies_lock);
518 	list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
519 		if (ctx->domain != domain || strcmp(ctx->name, name))
520 			continue;
521 		if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
522 			refcount_inc(&ctx->ref);
523 		} else {
524 			erofs_err(sb, "%s already exists in domain %s", name,
525 				  domain->domain_id);
526 			ctx = ERR_PTR(-EEXIST);
527 		}
528 		mutex_unlock(&erofs_domain_cookies_lock);
529 		return ctx;
530 	}
531 	ctx = erofs_domain_init_cookie(sb, name, flags);
532 	mutex_unlock(&erofs_domain_cookies_lock);
533 	return ctx;
534 }
535 
536 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
537 						    char *name,
538 						    unsigned int flags)
539 {
540 	if (EROFS_SB(sb)->domain_id)
541 		return erofs_domain_register_cookie(sb, name, flags);
542 	return erofs_fscache_acquire_cookie(sb, name, flags);
543 }
544 
545 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
546 {
547 	struct erofs_domain *domain = NULL;
548 
549 	if (!ctx)
550 		return;
551 	if (!ctx->domain)
552 		return erofs_fscache_relinquish_cookie(ctx);
553 
554 	mutex_lock(&erofs_domain_cookies_lock);
555 	if (refcount_dec_and_test(&ctx->ref)) {
556 		domain = ctx->domain;
557 		list_del(&ctx->node);
558 		erofs_fscache_relinquish_cookie(ctx);
559 	}
560 	mutex_unlock(&erofs_domain_cookies_lock);
561 	if (domain)
562 		erofs_fscache_domain_put(domain);
563 }
564 
565 int erofs_fscache_register_fs(struct super_block *sb)
566 {
567 	int ret;
568 	struct erofs_sb_info *sbi = EROFS_SB(sb);
569 	struct erofs_fscache *fscache;
570 	unsigned int flags = 0;
571 
572 	if (sbi->domain_id)
573 		ret = erofs_fscache_register_domain(sb);
574 	else
575 		ret = erofs_fscache_register_volume(sb);
576 	if (ret)
577 		return ret;
578 
579 	/*
580 	 * When shared domain is enabled, using NEED_NOEXIST to guarantee
581 	 * the primary data blob (aka fsid) is unique in the shared domain.
582 	 *
583 	 * For non-shared-domain case, fscache_acquire_volume() invoked by
584 	 * erofs_fscache_register_volume() has already guaranteed
585 	 * the uniqueness of primary data blob.
586 	 *
587 	 * Acquired domain/volume will be relinquished in kill_sb() on error.
588 	 */
589 	if (sbi->domain_id)
590 		flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
591 	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
592 	if (IS_ERR(fscache))
593 		return PTR_ERR(fscache);
594 
595 	sbi->s_fscache = fscache;
596 	return 0;
597 }
598 
599 void erofs_fscache_unregister_fs(struct super_block *sb)
600 {
601 	struct erofs_sb_info *sbi = EROFS_SB(sb);
602 
603 	erofs_fscache_unregister_cookie(sbi->s_fscache);
604 
605 	if (sbi->domain)
606 		erofs_fscache_domain_put(sbi->domain);
607 	else
608 		fscache_relinquish_volume(sbi->volume, NULL, false);
609 
610 	sbi->s_fscache = NULL;
611 	sbi->volume = NULL;
612 	sbi->domain = NULL;
613 }
614