xref: /openbmc/linux/fs/erofs/fscache.c (revision ae6f2db4d59e9f8c90cb3c2d2a954832898d0f2b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022, Alibaba Cloud
4  * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5  */
6 #include <linux/fscache.h>
7 #include "internal.h"
8 
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static struct vfsmount *erofs_pseudo_mnt;
13 
14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
15 					     loff_t start, size_t len)
16 {
17 	struct netfs_io_request *rreq;
18 
19 	rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
20 	if (!rreq)
21 		return ERR_PTR(-ENOMEM);
22 
23 	rreq->start	= start;
24 	rreq->len	= len;
25 	rreq->mapping	= mapping;
26 	rreq->inode	= mapping->host;
27 	INIT_LIST_HEAD(&rreq->subrequests);
28 	refcount_set(&rreq->ref, 1);
29 	return rreq;
30 }
31 
32 static void erofs_fscache_put_request(struct netfs_io_request *rreq)
33 {
34 	if (!refcount_dec_and_test(&rreq->ref))
35 		return;
36 	if (rreq->cache_resources.ops)
37 		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
38 	kfree(rreq);
39 }
40 
41 static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
42 {
43 	if (!refcount_dec_and_test(&subreq->ref))
44 		return;
45 	erofs_fscache_put_request(subreq->rreq);
46 	kfree(subreq);
47 }
48 
49 static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
50 {
51 	struct netfs_io_subrequest *subreq;
52 
53 	while (!list_empty(&rreq->subrequests)) {
54 		subreq = list_first_entry(&rreq->subrequests,
55 				struct netfs_io_subrequest, rreq_link);
56 		list_del(&subreq->rreq_link);
57 		erofs_fscache_put_subrequest(subreq);
58 	}
59 }
60 
61 static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
62 {
63 	struct netfs_io_subrequest *subreq;
64 	struct folio *folio;
65 	unsigned int iopos = 0;
66 	pgoff_t start_page = rreq->start / PAGE_SIZE;
67 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
68 	bool subreq_failed = false;
69 
70 	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
71 
72 	subreq = list_first_entry(&rreq->subrequests,
73 				  struct netfs_io_subrequest, rreq_link);
74 	subreq_failed = (subreq->error < 0);
75 
76 	rcu_read_lock();
77 	xas_for_each(&xas, folio, last_page) {
78 		unsigned int pgpos, pgend;
79 		bool pg_failed = false;
80 
81 		if (xas_retry(&xas, folio))
82 			continue;
83 
84 		pgpos = (folio_index(folio) - start_page) * PAGE_SIZE;
85 		pgend = pgpos + folio_size(folio);
86 
87 		for (;;) {
88 			if (!subreq) {
89 				pg_failed = true;
90 				break;
91 			}
92 
93 			pg_failed |= subreq_failed;
94 			if (pgend < iopos + subreq->len)
95 				break;
96 
97 			iopos += subreq->len;
98 			if (!list_is_last(&subreq->rreq_link,
99 					  &rreq->subrequests)) {
100 				subreq = list_next_entry(subreq, rreq_link);
101 				subreq_failed = (subreq->error < 0);
102 			} else {
103 				subreq = NULL;
104 				subreq_failed = false;
105 			}
106 			if (pgend == iopos)
107 				break;
108 		}
109 
110 		if (!pg_failed)
111 			folio_mark_uptodate(folio);
112 
113 		folio_unlock(folio);
114 	}
115 	rcu_read_unlock();
116 }
117 
118 static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
119 {
120 	erofs_fscache_rreq_unlock_folios(rreq);
121 	erofs_fscache_clear_subrequests(rreq);
122 	erofs_fscache_put_request(rreq);
123 }
124 
125 static void erofc_fscache_subreq_complete(void *priv,
126 		ssize_t transferred_or_error, bool was_async)
127 {
128 	struct netfs_io_subrequest *subreq = priv;
129 	struct netfs_io_request *rreq = subreq->rreq;
130 
131 	if (IS_ERR_VALUE(transferred_or_error))
132 		subreq->error = transferred_or_error;
133 
134 	if (atomic_dec_and_test(&rreq->nr_outstanding))
135 		erofs_fscache_rreq_complete(rreq);
136 
137 	erofs_fscache_put_subrequest(subreq);
138 }
139 
140 /*
141  * Read data from fscache and fill the read data into page cache described by
142  * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes
143  * the start physical address in the cache file.
144  */
145 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
146 				struct netfs_io_request *rreq, loff_t pstart)
147 {
148 	enum netfs_io_source source;
149 	struct super_block *sb = rreq->mapping->host->i_sb;
150 	struct netfs_io_subrequest *subreq;
151 	struct netfs_cache_resources *cres = &rreq->cache_resources;
152 	struct iov_iter iter;
153 	loff_t start = rreq->start;
154 	size_t len = rreq->len;
155 	size_t done = 0;
156 	int ret;
157 
158 	atomic_set(&rreq->nr_outstanding, 1);
159 
160 	ret = fscache_begin_read_operation(cres, cookie);
161 	if (ret)
162 		goto out;
163 
164 	while (done < len) {
165 		subreq = kzalloc(sizeof(struct netfs_io_subrequest),
166 				 GFP_KERNEL);
167 		if (subreq) {
168 			INIT_LIST_HEAD(&subreq->rreq_link);
169 			refcount_set(&subreq->ref, 2);
170 			subreq->rreq = rreq;
171 			refcount_inc(&rreq->ref);
172 		} else {
173 			ret = -ENOMEM;
174 			goto out;
175 		}
176 
177 		subreq->start = pstart + done;
178 		subreq->len	=  len - done;
179 		subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
180 
181 		list_add_tail(&subreq->rreq_link, &rreq->subrequests);
182 
183 		source = cres->ops->prepare_read(subreq, LLONG_MAX);
184 		if (WARN_ON(subreq->len == 0))
185 			source = NETFS_INVALID_READ;
186 		if (source != NETFS_READ_FROM_CACHE) {
187 			erofs_err(sb, "failed to fscache prepare_read (source %d)",
188 				  source);
189 			ret = -EIO;
190 			subreq->error = ret;
191 			erofs_fscache_put_subrequest(subreq);
192 			goto out;
193 		}
194 
195 		atomic_inc(&rreq->nr_outstanding);
196 
197 		iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
198 				start + done, subreq->len);
199 
200 		ret = fscache_read(cres, subreq->start, &iter,
201 				   NETFS_READ_HOLE_FAIL,
202 				   erofc_fscache_subreq_complete, subreq);
203 		if (ret == -EIOCBQUEUED)
204 			ret = 0;
205 		if (ret) {
206 			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
207 			goto out;
208 		}
209 
210 		done += subreq->len;
211 	}
212 out:
213 	if (atomic_dec_and_test(&rreq->nr_outstanding))
214 		erofs_fscache_rreq_complete(rreq);
215 
216 	return ret;
217 }
218 
219 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
220 {
221 	int ret;
222 	struct super_block *sb = folio_mapping(folio)->host->i_sb;
223 	struct netfs_io_request *rreq;
224 	struct erofs_map_dev mdev = {
225 		.m_deviceid = 0,
226 		.m_pa = folio_pos(folio),
227 	};
228 
229 	ret = erofs_map_dev(sb, &mdev);
230 	if (ret)
231 		goto out;
232 
233 	rreq = erofs_fscache_alloc_request(folio_mapping(folio),
234 				folio_pos(folio), folio_size(folio));
235 	if (IS_ERR(rreq)) {
236 		ret = PTR_ERR(rreq);
237 		goto out;
238 	}
239 
240 	return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
241 				rreq, mdev.m_pa);
242 out:
243 	folio_unlock(folio);
244 	return ret;
245 }
246 
247 /*
248  * Read into page cache in the range described by (@pos, @len).
249  *
250  * On return, the caller is responsible for page unlocking if the output @unlock
251  * is true, or the callee will take this responsibility through netfs_io_request
252  * interface.
253  *
254  * The return value is the number of bytes successfully handled, or negative
255  * error code on failure. The only exception is that, the length of the range
256  * instead of the error code is returned on failure after netfs_io_request is
257  * allocated, so that .readahead() could advance rac accordingly.
258  */
259 static int erofs_fscache_data_read(struct address_space *mapping,
260 				   loff_t pos, size_t len, bool *unlock)
261 {
262 	struct inode *inode = mapping->host;
263 	struct super_block *sb = inode->i_sb;
264 	struct netfs_io_request *rreq;
265 	struct erofs_map_blocks map;
266 	struct erofs_map_dev mdev;
267 	struct iov_iter iter;
268 	size_t count;
269 	int ret;
270 
271 	*unlock = true;
272 
273 	map.m_la = pos;
274 	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
275 	if (ret)
276 		return ret;
277 
278 	if (map.m_flags & EROFS_MAP_META) {
279 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
280 		erofs_blk_t blknr;
281 		size_t offset, size;
282 		void *src;
283 
284 		/* For tail packing layout, the offset may be non-zero. */
285 		offset = erofs_blkoff(map.m_pa);
286 		blknr = erofs_blknr(map.m_pa);
287 		size = map.m_llen;
288 
289 		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
290 		if (IS_ERR(src))
291 			return PTR_ERR(src);
292 
293 		iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE);
294 		if (copy_to_iter(src + offset, size, &iter) != size) {
295 			erofs_put_metabuf(&buf);
296 			return -EFAULT;
297 		}
298 		iov_iter_zero(PAGE_SIZE - size, &iter);
299 		erofs_put_metabuf(&buf);
300 		return PAGE_SIZE;
301 	}
302 
303 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
304 		count = len;
305 		iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count);
306 		iov_iter_zero(count, &iter);
307 		return count;
308 	}
309 
310 	count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
311 	DBG_BUGON(!count || count % PAGE_SIZE);
312 
313 	mdev = (struct erofs_map_dev) {
314 		.m_deviceid = map.m_deviceid,
315 		.m_pa = map.m_pa,
316 	};
317 	ret = erofs_map_dev(sb, &mdev);
318 	if (ret)
319 		return ret;
320 
321 	rreq = erofs_fscache_alloc_request(mapping, pos, count);
322 	if (IS_ERR(rreq))
323 		return PTR_ERR(rreq);
324 
325 	*unlock = false;
326 	erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
327 			rreq, mdev.m_pa + (pos - map.m_la));
328 	return count;
329 }
330 
331 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
332 {
333 	bool unlock;
334 	int ret;
335 
336 	DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ);
337 
338 	ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio),
339 				      folio_size(folio), &unlock);
340 	if (unlock) {
341 		if (ret > 0)
342 			folio_mark_uptodate(folio);
343 		folio_unlock(folio);
344 	}
345 	return ret < 0 ? ret : 0;
346 }
347 
348 static void erofs_fscache_readahead(struct readahead_control *rac)
349 {
350 	struct folio *folio;
351 	size_t len, done = 0;
352 	loff_t start, pos;
353 	bool unlock;
354 	int ret, size;
355 
356 	if (!readahead_count(rac))
357 		return;
358 
359 	start = readahead_pos(rac);
360 	len = readahead_length(rac);
361 
362 	do {
363 		pos = start + done;
364 		ret = erofs_fscache_data_read(rac->mapping, pos,
365 					      len - done, &unlock);
366 		if (ret <= 0)
367 			return;
368 
369 		size = ret;
370 		while (size) {
371 			folio = readahead_folio(rac);
372 			size -= folio_size(folio);
373 			if (unlock) {
374 				folio_mark_uptodate(folio);
375 				folio_unlock(folio);
376 			}
377 		}
378 	} while ((done += ret) < len);
379 }
380 
381 static const struct address_space_operations erofs_fscache_meta_aops = {
382 	.read_folio = erofs_fscache_meta_read_folio,
383 };
384 
385 const struct address_space_operations erofs_fscache_access_aops = {
386 	.read_folio = erofs_fscache_read_folio,
387 	.readahead = erofs_fscache_readahead,
388 };
389 
390 static void erofs_fscache_domain_put(struct erofs_domain *domain)
391 {
392 	if (!domain)
393 		return;
394 	mutex_lock(&erofs_domain_list_lock);
395 	if (refcount_dec_and_test(&domain->ref)) {
396 		list_del(&domain->list);
397 		if (list_empty(&erofs_domain_list)) {
398 			kern_unmount(erofs_pseudo_mnt);
399 			erofs_pseudo_mnt = NULL;
400 		}
401 		mutex_unlock(&erofs_domain_list_lock);
402 		fscache_relinquish_volume(domain->volume, NULL, false);
403 		kfree(domain->domain_id);
404 		kfree(domain);
405 		return;
406 	}
407 	mutex_unlock(&erofs_domain_list_lock);
408 }
409 
410 static int erofs_fscache_register_volume(struct super_block *sb)
411 {
412 	struct erofs_sb_info *sbi = EROFS_SB(sb);
413 	char *domain_id = sbi->domain_id;
414 	struct fscache_volume *volume;
415 	char *name;
416 	int ret = 0;
417 
418 	name = kasprintf(GFP_KERNEL, "erofs,%s",
419 			 domain_id ? domain_id : sbi->fsid);
420 	if (!name)
421 		return -ENOMEM;
422 
423 	volume = fscache_acquire_volume(name, NULL, NULL, 0);
424 	if (IS_ERR_OR_NULL(volume)) {
425 		erofs_err(sb, "failed to register volume for %s", name);
426 		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
427 		volume = NULL;
428 	}
429 
430 	sbi->volume = volume;
431 	kfree(name);
432 	return ret;
433 }
434 
435 static int erofs_fscache_init_domain(struct super_block *sb)
436 {
437 	int err;
438 	struct erofs_domain *domain;
439 	struct erofs_sb_info *sbi = EROFS_SB(sb);
440 
441 	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
442 	if (!domain)
443 		return -ENOMEM;
444 
445 	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
446 	if (!domain->domain_id) {
447 		kfree(domain);
448 		return -ENOMEM;
449 	}
450 
451 	err = erofs_fscache_register_volume(sb);
452 	if (err)
453 		goto out;
454 
455 	if (!erofs_pseudo_mnt) {
456 		erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
457 		if (IS_ERR(erofs_pseudo_mnt)) {
458 			err = PTR_ERR(erofs_pseudo_mnt);
459 			goto out;
460 		}
461 	}
462 
463 	domain->volume = sbi->volume;
464 	refcount_set(&domain->ref, 1);
465 	list_add(&domain->list, &erofs_domain_list);
466 	sbi->domain = domain;
467 	return 0;
468 out:
469 	kfree(domain->domain_id);
470 	kfree(domain);
471 	return err;
472 }
473 
474 static int erofs_fscache_register_domain(struct super_block *sb)
475 {
476 	int err;
477 	struct erofs_domain *domain;
478 	struct erofs_sb_info *sbi = EROFS_SB(sb);
479 
480 	mutex_lock(&erofs_domain_list_lock);
481 	list_for_each_entry(domain, &erofs_domain_list, list) {
482 		if (!strcmp(domain->domain_id, sbi->domain_id)) {
483 			sbi->domain = domain;
484 			sbi->volume = domain->volume;
485 			refcount_inc(&domain->ref);
486 			mutex_unlock(&erofs_domain_list_lock);
487 			return 0;
488 		}
489 	}
490 	err = erofs_fscache_init_domain(sb);
491 	mutex_unlock(&erofs_domain_list_lock);
492 	return err;
493 }
494 
495 static
496 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
497 						    char *name, bool need_inode)
498 {
499 	struct fscache_volume *volume = EROFS_SB(sb)->volume;
500 	struct erofs_fscache *ctx;
501 	struct fscache_cookie *cookie;
502 	int ret;
503 
504 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
505 	if (!ctx)
506 		return ERR_PTR(-ENOMEM);
507 
508 	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
509 					name, strlen(name), NULL, 0, 0);
510 	if (!cookie) {
511 		erofs_err(sb, "failed to get cookie for %s", name);
512 		ret = -EINVAL;
513 		goto err;
514 	}
515 
516 	fscache_use_cookie(cookie, false);
517 	ctx->cookie = cookie;
518 
519 	if (need_inode) {
520 		struct inode *const inode = new_inode(sb);
521 
522 		if (!inode) {
523 			erofs_err(sb, "failed to get anon inode for %s", name);
524 			ret = -ENOMEM;
525 			goto err_cookie;
526 		}
527 
528 		set_nlink(inode, 1);
529 		inode->i_size = OFFSET_MAX;
530 		inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
531 		mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
532 
533 		ctx->inode = inode;
534 	}
535 
536 	return ctx;
537 
538 err_cookie:
539 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
540 	fscache_relinquish_cookie(ctx->cookie, false);
541 err:
542 	kfree(ctx);
543 	return ERR_PTR(ret);
544 }
545 
546 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
547 {
548 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
549 	fscache_relinquish_cookie(ctx->cookie, false);
550 	iput(ctx->inode);
551 	kfree(ctx->name);
552 	kfree(ctx);
553 }
554 
555 static
556 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
557 		char *name, bool need_inode)
558 {
559 	int err;
560 	struct inode *inode;
561 	struct erofs_fscache *ctx;
562 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
563 
564 	ctx = erofs_fscache_acquire_cookie(sb, name, need_inode);
565 	if (IS_ERR(ctx))
566 		return ctx;
567 
568 	ctx->name = kstrdup(name, GFP_KERNEL);
569 	if (!ctx->name) {
570 		err = -ENOMEM;
571 		goto out;
572 	}
573 
574 	inode = new_inode(erofs_pseudo_mnt->mnt_sb);
575 	if (!inode) {
576 		err = -ENOMEM;
577 		goto out;
578 	}
579 
580 	ctx->domain = domain;
581 	ctx->anon_inode = inode;
582 	inode->i_private = ctx;
583 	refcount_inc(&domain->ref);
584 	return ctx;
585 out:
586 	erofs_fscache_relinquish_cookie(ctx);
587 	return ERR_PTR(err);
588 }
589 
590 static
591 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
592 						   char *name, bool need_inode)
593 {
594 	struct inode *inode;
595 	struct erofs_fscache *ctx;
596 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
597 	struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
598 
599 	mutex_lock(&erofs_domain_cookies_lock);
600 	spin_lock(&psb->s_inode_list_lock);
601 	list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
602 		ctx = inode->i_private;
603 		if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
604 			continue;
605 		igrab(inode);
606 		spin_unlock(&psb->s_inode_list_lock);
607 		mutex_unlock(&erofs_domain_cookies_lock);
608 		return ctx;
609 	}
610 	spin_unlock(&psb->s_inode_list_lock);
611 	ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode);
612 	mutex_unlock(&erofs_domain_cookies_lock);
613 	return ctx;
614 }
615 
616 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
617 						    char *name, bool need_inode)
618 {
619 	if (EROFS_SB(sb)->domain_id)
620 		return erofs_domain_register_cookie(sb, name, need_inode);
621 	return erofs_fscache_acquire_cookie(sb, name, need_inode);
622 }
623 
624 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
625 {
626 	bool drop;
627 	struct erofs_domain *domain;
628 
629 	if (!ctx)
630 		return;
631 	domain = ctx->domain;
632 	if (domain) {
633 		mutex_lock(&erofs_domain_cookies_lock);
634 		drop = atomic_read(&ctx->anon_inode->i_count) == 1;
635 		iput(ctx->anon_inode);
636 		mutex_unlock(&erofs_domain_cookies_lock);
637 		if (!drop)
638 			return;
639 	}
640 
641 	erofs_fscache_relinquish_cookie(ctx);
642 	erofs_fscache_domain_put(domain);
643 }
644 
645 int erofs_fscache_register_fs(struct super_block *sb)
646 {
647 	int ret;
648 	struct erofs_sb_info *sbi = EROFS_SB(sb);
649 	struct erofs_fscache *fscache;
650 
651 	if (sbi->domain_id)
652 		ret = erofs_fscache_register_domain(sb);
653 	else
654 		ret = erofs_fscache_register_volume(sb);
655 	if (ret)
656 		return ret;
657 
658 	/* acquired domain/volume will be relinquished in kill_sb() on error */
659 	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true);
660 	if (IS_ERR(fscache))
661 		return PTR_ERR(fscache);
662 
663 	sbi->s_fscache = fscache;
664 	return 0;
665 }
666 
667 void erofs_fscache_unregister_fs(struct super_block *sb)
668 {
669 	struct erofs_sb_info *sbi = EROFS_SB(sb);
670 
671 	erofs_fscache_unregister_cookie(sbi->s_fscache);
672 
673 	if (sbi->domain)
674 		erofs_fscache_domain_put(sbi->domain);
675 	else
676 		fscache_relinquish_volume(sbi->volume, NULL, false);
677 
678 	sbi->s_fscache = NULL;
679 	sbi->volume = NULL;
680 	sbi->domain = NULL;
681 }
682