xref: /openbmc/linux/fs/erofs/fscache.c (revision 709fe09e)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022, Alibaba Cloud
4  * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5  */
6 #include <linux/fscache.h>
7 #include "internal.h"
8 
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static struct vfsmount *erofs_pseudo_mnt;
13 
14 struct erofs_fscache_request {
15 	struct netfs_cache_resources cache_resources;
16 	struct address_space	*mapping;	/* The mapping being accessed */
17 	loff_t			start;		/* Start position */
18 	size_t			len;		/* Length of the request */
19 	size_t			submitted;	/* Length of submitted */
20 	short			error;		/* 0 or error that occurred */
21 	refcount_t		ref;
22 };
23 
24 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
25 					     loff_t start, size_t len)
26 {
27 	struct erofs_fscache_request *req;
28 
29 	req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
30 	if (!req)
31 		return ERR_PTR(-ENOMEM);
32 
33 	req->mapping = mapping;
34 	req->start   = start;
35 	req->len     = len;
36 	refcount_set(&req->ref, 1);
37 
38 	return req;
39 }
40 
41 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
42 {
43 	struct folio *folio;
44 	bool failed = req->error;
45 	pgoff_t start_page = req->start / PAGE_SIZE;
46 	pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
47 
48 	XA_STATE(xas, &req->mapping->i_pages, start_page);
49 
50 	rcu_read_lock();
51 	xas_for_each(&xas, folio, last_page) {
52 		if (xas_retry(&xas, folio))
53 			continue;
54 		if (!failed)
55 			folio_mark_uptodate(folio);
56 		folio_unlock(folio);
57 	}
58 	rcu_read_unlock();
59 
60 	if (req->cache_resources.ops)
61 		req->cache_resources.ops->end_operation(&req->cache_resources);
62 
63 	kfree(req);
64 }
65 
66 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
67 {
68 	if (refcount_dec_and_test(&req->ref))
69 		erofs_fscache_req_complete(req);
70 }
71 
72 static void erofs_fscache_subreq_complete(void *priv,
73 		ssize_t transferred_or_error, bool was_async)
74 {
75 	struct erofs_fscache_request *req = priv;
76 
77 	if (IS_ERR_VALUE(transferred_or_error))
78 		req->error = transferred_or_error;
79 	erofs_fscache_req_put(req);
80 }
81 
82 /*
83  * Read data from fscache (cookie, pstart, len), and fill the read data into
84  * page cache described by (req->mapping, lstart, len). @pstart describeis the
85  * start physical address in the cache file.
86  */
87 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
88 		struct erofs_fscache_request *req, loff_t pstart, size_t len)
89 {
90 	enum netfs_io_source source;
91 	struct super_block *sb = req->mapping->host->i_sb;
92 	struct netfs_cache_resources *cres = &req->cache_resources;
93 	struct iov_iter iter;
94 	loff_t lstart = req->start + req->submitted;
95 	size_t done = 0;
96 	int ret;
97 
98 	DBG_BUGON(len > req->len - req->submitted);
99 
100 	ret = fscache_begin_read_operation(cres, cookie);
101 	if (ret)
102 		return ret;
103 
104 	while (done < len) {
105 		loff_t sstart = pstart + done;
106 		size_t slen = len - done;
107 		unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
108 
109 		source = cres->ops->prepare_ondemand_read(cres,
110 				sstart, &slen, LLONG_MAX, &flags, 0);
111 		if (WARN_ON(slen == 0))
112 			source = NETFS_INVALID_READ;
113 		if (source != NETFS_READ_FROM_CACHE) {
114 			erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
115 			return -EIO;
116 		}
117 
118 		refcount_inc(&req->ref);
119 		iov_iter_xarray(&iter, READ, &req->mapping->i_pages,
120 				lstart + done, slen);
121 
122 		ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
123 				   erofs_fscache_subreq_complete, req);
124 		if (ret == -EIOCBQUEUED)
125 			ret = 0;
126 		if (ret) {
127 			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
128 			return ret;
129 		}
130 
131 		done += slen;
132 	}
133 	DBG_BUGON(done != len);
134 	req->submitted += len;
135 	return 0;
136 }
137 
138 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
139 {
140 	int ret;
141 	struct super_block *sb = folio_mapping(folio)->host->i_sb;
142 	struct erofs_fscache_request *req;
143 	struct erofs_map_dev mdev = {
144 		.m_deviceid = 0,
145 		.m_pa = folio_pos(folio),
146 	};
147 
148 	ret = erofs_map_dev(sb, &mdev);
149 	if (ret) {
150 		folio_unlock(folio);
151 		return ret;
152 	}
153 
154 	req = erofs_fscache_req_alloc(folio_mapping(folio),
155 				folio_pos(folio), folio_size(folio));
156 	if (IS_ERR(req)) {
157 		folio_unlock(folio);
158 		return PTR_ERR(req);
159 	}
160 
161 	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
162 				req, mdev.m_pa, folio_size(folio));
163 	if (ret)
164 		req->error = ret;
165 
166 	erofs_fscache_req_put(req);
167 	return ret;
168 }
169 
170 /*
171  * Read into page cache in the range described by (@pos, @len).
172  *
173  * On return, if the output @unlock is true, the caller is responsible for page
174  * unlocking; otherwise the callee will take this responsibility through request
175  * completion.
176  *
177  * The return value is the number of bytes successfully handled, or negative
178  * error code on failure. The only exception is that, the length of the range
179  * instead of the error code is returned on failure after request is allocated,
180  * so that .readahead() could advance rac accordingly.
181  */
182 static int erofs_fscache_data_read(struct address_space *mapping,
183 				   loff_t pos, size_t len, bool *unlock)
184 {
185 	struct inode *inode = mapping->host;
186 	struct super_block *sb = inode->i_sb;
187 	struct erofs_fscache_request *req;
188 	struct erofs_map_blocks map;
189 	struct erofs_map_dev mdev;
190 	struct iov_iter iter;
191 	size_t count;
192 	int ret;
193 
194 	*unlock = true;
195 
196 	map.m_la = pos;
197 	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
198 	if (ret)
199 		return ret;
200 
201 	if (map.m_flags & EROFS_MAP_META) {
202 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
203 		erofs_blk_t blknr;
204 		size_t offset, size;
205 		void *src;
206 
207 		/* For tail packing layout, the offset may be non-zero. */
208 		offset = erofs_blkoff(map.m_pa);
209 		blknr = erofs_blknr(map.m_pa);
210 		size = map.m_llen;
211 
212 		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
213 		if (IS_ERR(src))
214 			return PTR_ERR(src);
215 
216 		iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE);
217 		if (copy_to_iter(src + offset, size, &iter) != size) {
218 			erofs_put_metabuf(&buf);
219 			return -EFAULT;
220 		}
221 		iov_iter_zero(PAGE_SIZE - size, &iter);
222 		erofs_put_metabuf(&buf);
223 		return PAGE_SIZE;
224 	}
225 
226 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
227 		count = len;
228 		iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count);
229 		iov_iter_zero(count, &iter);
230 		return count;
231 	}
232 
233 	count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
234 	DBG_BUGON(!count || count % PAGE_SIZE);
235 
236 	mdev = (struct erofs_map_dev) {
237 		.m_deviceid = map.m_deviceid,
238 		.m_pa = map.m_pa,
239 	};
240 	ret = erofs_map_dev(sb, &mdev);
241 	if (ret)
242 		return ret;
243 
244 	req = erofs_fscache_req_alloc(mapping, pos, count);
245 	if (IS_ERR(req))
246 		return PTR_ERR(req);
247 
248 	*unlock = false;
249 	ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
250 			req, mdev.m_pa + (pos - map.m_la), count);
251 	if (ret)
252 		req->error = ret;
253 
254 	erofs_fscache_req_put(req);
255 	return count;
256 }
257 
258 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
259 {
260 	bool unlock;
261 	int ret;
262 
263 	DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ);
264 
265 	ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio),
266 				      folio_size(folio), &unlock);
267 	if (unlock) {
268 		if (ret > 0)
269 			folio_mark_uptodate(folio);
270 		folio_unlock(folio);
271 	}
272 	return ret < 0 ? ret : 0;
273 }
274 
275 static void erofs_fscache_readahead(struct readahead_control *rac)
276 {
277 	struct folio *folio;
278 	size_t len, done = 0;
279 	loff_t start, pos;
280 	bool unlock;
281 	int ret, size;
282 
283 	if (!readahead_count(rac))
284 		return;
285 
286 	start = readahead_pos(rac);
287 	len = readahead_length(rac);
288 
289 	do {
290 		pos = start + done;
291 		ret = erofs_fscache_data_read(rac->mapping, pos,
292 					      len - done, &unlock);
293 		if (ret <= 0)
294 			return;
295 
296 		size = ret;
297 		while (size) {
298 			folio = readahead_folio(rac);
299 			size -= folio_size(folio);
300 			if (unlock) {
301 				folio_mark_uptodate(folio);
302 				folio_unlock(folio);
303 			}
304 		}
305 	} while ((done += ret) < len);
306 }
307 
308 static const struct address_space_operations erofs_fscache_meta_aops = {
309 	.read_folio = erofs_fscache_meta_read_folio,
310 };
311 
312 const struct address_space_operations erofs_fscache_access_aops = {
313 	.read_folio = erofs_fscache_read_folio,
314 	.readahead = erofs_fscache_readahead,
315 };
316 
317 static void erofs_fscache_domain_put(struct erofs_domain *domain)
318 {
319 	if (!domain)
320 		return;
321 	mutex_lock(&erofs_domain_list_lock);
322 	if (refcount_dec_and_test(&domain->ref)) {
323 		list_del(&domain->list);
324 		if (list_empty(&erofs_domain_list)) {
325 			kern_unmount(erofs_pseudo_mnt);
326 			erofs_pseudo_mnt = NULL;
327 		}
328 		mutex_unlock(&erofs_domain_list_lock);
329 		fscache_relinquish_volume(domain->volume, NULL, false);
330 		kfree(domain->domain_id);
331 		kfree(domain);
332 		return;
333 	}
334 	mutex_unlock(&erofs_domain_list_lock);
335 }
336 
337 static int erofs_fscache_register_volume(struct super_block *sb)
338 {
339 	struct erofs_sb_info *sbi = EROFS_SB(sb);
340 	char *domain_id = sbi->domain_id;
341 	struct fscache_volume *volume;
342 	char *name;
343 	int ret = 0;
344 
345 	name = kasprintf(GFP_KERNEL, "erofs,%s",
346 			 domain_id ? domain_id : sbi->fsid);
347 	if (!name)
348 		return -ENOMEM;
349 
350 	volume = fscache_acquire_volume(name, NULL, NULL, 0);
351 	if (IS_ERR_OR_NULL(volume)) {
352 		erofs_err(sb, "failed to register volume for %s", name);
353 		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
354 		volume = NULL;
355 	}
356 
357 	sbi->volume = volume;
358 	kfree(name);
359 	return ret;
360 }
361 
362 static int erofs_fscache_init_domain(struct super_block *sb)
363 {
364 	int err;
365 	struct erofs_domain *domain;
366 	struct erofs_sb_info *sbi = EROFS_SB(sb);
367 
368 	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
369 	if (!domain)
370 		return -ENOMEM;
371 
372 	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
373 	if (!domain->domain_id) {
374 		kfree(domain);
375 		return -ENOMEM;
376 	}
377 
378 	err = erofs_fscache_register_volume(sb);
379 	if (err)
380 		goto out;
381 
382 	if (!erofs_pseudo_mnt) {
383 		erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
384 		if (IS_ERR(erofs_pseudo_mnt)) {
385 			err = PTR_ERR(erofs_pseudo_mnt);
386 			goto out;
387 		}
388 	}
389 
390 	domain->volume = sbi->volume;
391 	refcount_set(&domain->ref, 1);
392 	list_add(&domain->list, &erofs_domain_list);
393 	sbi->domain = domain;
394 	return 0;
395 out:
396 	kfree(domain->domain_id);
397 	kfree(domain);
398 	return err;
399 }
400 
401 static int erofs_fscache_register_domain(struct super_block *sb)
402 {
403 	int err;
404 	struct erofs_domain *domain;
405 	struct erofs_sb_info *sbi = EROFS_SB(sb);
406 
407 	mutex_lock(&erofs_domain_list_lock);
408 	list_for_each_entry(domain, &erofs_domain_list, list) {
409 		if (!strcmp(domain->domain_id, sbi->domain_id)) {
410 			sbi->domain = domain;
411 			sbi->volume = domain->volume;
412 			refcount_inc(&domain->ref);
413 			mutex_unlock(&erofs_domain_list_lock);
414 			return 0;
415 		}
416 	}
417 	err = erofs_fscache_init_domain(sb);
418 	mutex_unlock(&erofs_domain_list_lock);
419 	return err;
420 }
421 
422 static
423 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
424 						   char *name,
425 						   unsigned int flags)
426 {
427 	struct fscache_volume *volume = EROFS_SB(sb)->volume;
428 	struct erofs_fscache *ctx;
429 	struct fscache_cookie *cookie;
430 	int ret;
431 
432 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
433 	if (!ctx)
434 		return ERR_PTR(-ENOMEM);
435 
436 	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
437 					name, strlen(name), NULL, 0, 0);
438 	if (!cookie) {
439 		erofs_err(sb, "failed to get cookie for %s", name);
440 		ret = -EINVAL;
441 		goto err;
442 	}
443 
444 	fscache_use_cookie(cookie, false);
445 	ctx->cookie = cookie;
446 
447 	if (flags & EROFS_REG_COOKIE_NEED_INODE) {
448 		struct inode *const inode = new_inode(sb);
449 
450 		if (!inode) {
451 			erofs_err(sb, "failed to get anon inode for %s", name);
452 			ret = -ENOMEM;
453 			goto err_cookie;
454 		}
455 
456 		set_nlink(inode, 1);
457 		inode->i_size = OFFSET_MAX;
458 		inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
459 		mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
460 
461 		ctx->inode = inode;
462 	}
463 
464 	return ctx;
465 
466 err_cookie:
467 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
468 	fscache_relinquish_cookie(ctx->cookie, false);
469 err:
470 	kfree(ctx);
471 	return ERR_PTR(ret);
472 }
473 
474 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
475 {
476 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
477 	fscache_relinquish_cookie(ctx->cookie, false);
478 	iput(ctx->inode);
479 	kfree(ctx->name);
480 	kfree(ctx);
481 }
482 
483 static
484 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
485 						       char *name,
486 						       unsigned int flags)
487 {
488 	int err;
489 	struct inode *inode;
490 	struct erofs_fscache *ctx;
491 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
492 
493 	ctx = erofs_fscache_acquire_cookie(sb, name, flags);
494 	if (IS_ERR(ctx))
495 		return ctx;
496 
497 	ctx->name = kstrdup(name, GFP_KERNEL);
498 	if (!ctx->name) {
499 		err = -ENOMEM;
500 		goto out;
501 	}
502 
503 	inode = new_inode(erofs_pseudo_mnt->mnt_sb);
504 	if (!inode) {
505 		err = -ENOMEM;
506 		goto out;
507 	}
508 
509 	ctx->domain = domain;
510 	ctx->anon_inode = inode;
511 	inode->i_private = ctx;
512 	refcount_inc(&domain->ref);
513 	return ctx;
514 out:
515 	erofs_fscache_relinquish_cookie(ctx);
516 	return ERR_PTR(err);
517 }
518 
519 static
520 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
521 						   char *name,
522 						   unsigned int flags)
523 {
524 	struct inode *inode;
525 	struct erofs_fscache *ctx;
526 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
527 	struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
528 
529 	mutex_lock(&erofs_domain_cookies_lock);
530 	spin_lock(&psb->s_inode_list_lock);
531 	list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
532 		ctx = inode->i_private;
533 		if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
534 			continue;
535 		if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
536 			igrab(inode);
537 		} else {
538 			erofs_err(sb, "%s already exists in domain %s", name,
539 				  domain->domain_id);
540 			ctx = ERR_PTR(-EEXIST);
541 		}
542 		spin_unlock(&psb->s_inode_list_lock);
543 		mutex_unlock(&erofs_domain_cookies_lock);
544 		return ctx;
545 	}
546 	spin_unlock(&psb->s_inode_list_lock);
547 	ctx = erofs_fscache_domain_init_cookie(sb, name, flags);
548 	mutex_unlock(&erofs_domain_cookies_lock);
549 	return ctx;
550 }
551 
552 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
553 						    char *name,
554 						    unsigned int flags)
555 {
556 	if (EROFS_SB(sb)->domain_id)
557 		return erofs_domain_register_cookie(sb, name, flags);
558 	return erofs_fscache_acquire_cookie(sb, name, flags);
559 }
560 
561 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
562 {
563 	bool drop;
564 	struct erofs_domain *domain;
565 
566 	if (!ctx)
567 		return;
568 	domain = ctx->domain;
569 	if (domain) {
570 		mutex_lock(&erofs_domain_cookies_lock);
571 		drop = atomic_read(&ctx->anon_inode->i_count) == 1;
572 		iput(ctx->anon_inode);
573 		mutex_unlock(&erofs_domain_cookies_lock);
574 		if (!drop)
575 			return;
576 	}
577 
578 	erofs_fscache_relinquish_cookie(ctx);
579 	erofs_fscache_domain_put(domain);
580 }
581 
582 int erofs_fscache_register_fs(struct super_block *sb)
583 {
584 	int ret;
585 	struct erofs_sb_info *sbi = EROFS_SB(sb);
586 	struct erofs_fscache *fscache;
587 	unsigned int flags;
588 
589 	if (sbi->domain_id)
590 		ret = erofs_fscache_register_domain(sb);
591 	else
592 		ret = erofs_fscache_register_volume(sb);
593 	if (ret)
594 		return ret;
595 
596 	/*
597 	 * When shared domain is enabled, using NEED_NOEXIST to guarantee
598 	 * the primary data blob (aka fsid) is unique in the shared domain.
599 	 *
600 	 * For non-shared-domain case, fscache_acquire_volume() invoked by
601 	 * erofs_fscache_register_volume() has already guaranteed
602 	 * the uniqueness of primary data blob.
603 	 *
604 	 * Acquired domain/volume will be relinquished in kill_sb() on error.
605 	 */
606 	flags = EROFS_REG_COOKIE_NEED_INODE;
607 	if (sbi->domain_id)
608 		flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
609 	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
610 	if (IS_ERR(fscache))
611 		return PTR_ERR(fscache);
612 
613 	sbi->s_fscache = fscache;
614 	return 0;
615 }
616 
617 void erofs_fscache_unregister_fs(struct super_block *sb)
618 {
619 	struct erofs_sb_info *sbi = EROFS_SB(sb);
620 
621 	erofs_fscache_unregister_cookie(sbi->s_fscache);
622 
623 	if (sbi->domain)
624 		erofs_fscache_domain_put(sbi->domain);
625 	else
626 		fscache_relinquish_volume(sbi->volume, NULL, false);
627 
628 	sbi->s_fscache = NULL;
629 	sbi->volume = NULL;
630 	sbi->domain = NULL;
631 }
632