xref: /openbmc/linux/fs/erofs/fscache.c (revision 39bfcb81)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Copyright (C) 2022, Alibaba Cloud
4  * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5  */
6 #include <linux/fscache.h>
7 #include "internal.h"
8 
9 static DEFINE_MUTEX(erofs_domain_list_lock);
10 static DEFINE_MUTEX(erofs_domain_cookies_lock);
11 static LIST_HEAD(erofs_domain_list);
12 static struct vfsmount *erofs_pseudo_mnt;
13 
14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping,
15 					     loff_t start, size_t len)
16 {
17 	struct netfs_io_request *rreq;
18 
19 	rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL);
20 	if (!rreq)
21 		return ERR_PTR(-ENOMEM);
22 
23 	rreq->start	= start;
24 	rreq->len	= len;
25 	rreq->mapping	= mapping;
26 	rreq->inode	= mapping->host;
27 	INIT_LIST_HEAD(&rreq->subrequests);
28 	refcount_set(&rreq->ref, 1);
29 	return rreq;
30 }
31 
32 static void erofs_fscache_put_request(struct netfs_io_request *rreq)
33 {
34 	if (!refcount_dec_and_test(&rreq->ref))
35 		return;
36 	if (rreq->cache_resources.ops)
37 		rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
38 	kfree(rreq);
39 }
40 
41 static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq)
42 {
43 	if (!refcount_dec_and_test(&subreq->ref))
44 		return;
45 	erofs_fscache_put_request(subreq->rreq);
46 	kfree(subreq);
47 }
48 
49 static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq)
50 {
51 	struct netfs_io_subrequest *subreq;
52 
53 	while (!list_empty(&rreq->subrequests)) {
54 		subreq = list_first_entry(&rreq->subrequests,
55 				struct netfs_io_subrequest, rreq_link);
56 		list_del(&subreq->rreq_link);
57 		erofs_fscache_put_subrequest(subreq);
58 	}
59 }
60 
61 static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq)
62 {
63 	struct netfs_io_subrequest *subreq;
64 	struct folio *folio;
65 	unsigned int iopos = 0;
66 	pgoff_t start_page = rreq->start / PAGE_SIZE;
67 	pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
68 	bool subreq_failed = false;
69 
70 	XA_STATE(xas, &rreq->mapping->i_pages, start_page);
71 
72 	subreq = list_first_entry(&rreq->subrequests,
73 				  struct netfs_io_subrequest, rreq_link);
74 	subreq_failed = (subreq->error < 0);
75 
76 	rcu_read_lock();
77 	xas_for_each(&xas, folio, last_page) {
78 		unsigned int pgpos =
79 			(folio_index(folio) - start_page) * PAGE_SIZE;
80 		unsigned int pgend = pgpos + folio_size(folio);
81 		bool pg_failed = false;
82 
83 		for (;;) {
84 			if (!subreq) {
85 				pg_failed = true;
86 				break;
87 			}
88 
89 			pg_failed |= subreq_failed;
90 			if (pgend < iopos + subreq->len)
91 				break;
92 
93 			iopos += subreq->len;
94 			if (!list_is_last(&subreq->rreq_link,
95 					  &rreq->subrequests)) {
96 				subreq = list_next_entry(subreq, rreq_link);
97 				subreq_failed = (subreq->error < 0);
98 			} else {
99 				subreq = NULL;
100 				subreq_failed = false;
101 			}
102 			if (pgend == iopos)
103 				break;
104 		}
105 
106 		if (!pg_failed)
107 			folio_mark_uptodate(folio);
108 
109 		folio_unlock(folio);
110 	}
111 	rcu_read_unlock();
112 }
113 
114 static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq)
115 {
116 	erofs_fscache_rreq_unlock_folios(rreq);
117 	erofs_fscache_clear_subrequests(rreq);
118 	erofs_fscache_put_request(rreq);
119 }
120 
121 static void erofc_fscache_subreq_complete(void *priv,
122 		ssize_t transferred_or_error, bool was_async)
123 {
124 	struct netfs_io_subrequest *subreq = priv;
125 	struct netfs_io_request *rreq = subreq->rreq;
126 
127 	if (IS_ERR_VALUE(transferred_or_error))
128 		subreq->error = transferred_or_error;
129 
130 	if (atomic_dec_and_test(&rreq->nr_outstanding))
131 		erofs_fscache_rreq_complete(rreq);
132 
133 	erofs_fscache_put_subrequest(subreq);
134 }
135 
136 /*
137  * Read data from fscache and fill the read data into page cache described by
138  * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes
139  * the start physical address in the cache file.
140  */
141 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
142 				struct netfs_io_request *rreq, loff_t pstart)
143 {
144 	enum netfs_io_source source;
145 	struct super_block *sb = rreq->mapping->host->i_sb;
146 	struct netfs_io_subrequest *subreq;
147 	struct netfs_cache_resources *cres = &rreq->cache_resources;
148 	struct iov_iter iter;
149 	loff_t start = rreq->start;
150 	size_t len = rreq->len;
151 	size_t done = 0;
152 	int ret;
153 
154 	atomic_set(&rreq->nr_outstanding, 1);
155 
156 	ret = fscache_begin_read_operation(cres, cookie);
157 	if (ret)
158 		goto out;
159 
160 	while (done < len) {
161 		subreq = kzalloc(sizeof(struct netfs_io_subrequest),
162 				 GFP_KERNEL);
163 		if (subreq) {
164 			INIT_LIST_HEAD(&subreq->rreq_link);
165 			refcount_set(&subreq->ref, 2);
166 			subreq->rreq = rreq;
167 			refcount_inc(&rreq->ref);
168 		} else {
169 			ret = -ENOMEM;
170 			goto out;
171 		}
172 
173 		subreq->start = pstart + done;
174 		subreq->len	=  len - done;
175 		subreq->flags = 1 << NETFS_SREQ_ONDEMAND;
176 
177 		list_add_tail(&subreq->rreq_link, &rreq->subrequests);
178 
179 		source = cres->ops->prepare_read(subreq, LLONG_MAX);
180 		if (WARN_ON(subreq->len == 0))
181 			source = NETFS_INVALID_READ;
182 		if (source != NETFS_READ_FROM_CACHE) {
183 			erofs_err(sb, "failed to fscache prepare_read (source %d)",
184 				  source);
185 			ret = -EIO;
186 			subreq->error = ret;
187 			erofs_fscache_put_subrequest(subreq);
188 			goto out;
189 		}
190 
191 		atomic_inc(&rreq->nr_outstanding);
192 
193 		iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages,
194 				start + done, subreq->len);
195 
196 		ret = fscache_read(cres, subreq->start, &iter,
197 				   NETFS_READ_HOLE_FAIL,
198 				   erofc_fscache_subreq_complete, subreq);
199 		if (ret == -EIOCBQUEUED)
200 			ret = 0;
201 		if (ret) {
202 			erofs_err(sb, "failed to fscache_read (ret %d)", ret);
203 			goto out;
204 		}
205 
206 		done += subreq->len;
207 	}
208 out:
209 	if (atomic_dec_and_test(&rreq->nr_outstanding))
210 		erofs_fscache_rreq_complete(rreq);
211 
212 	return ret;
213 }
214 
215 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
216 {
217 	int ret;
218 	struct super_block *sb = folio_mapping(folio)->host->i_sb;
219 	struct netfs_io_request *rreq;
220 	struct erofs_map_dev mdev = {
221 		.m_deviceid = 0,
222 		.m_pa = folio_pos(folio),
223 	};
224 
225 	ret = erofs_map_dev(sb, &mdev);
226 	if (ret)
227 		goto out;
228 
229 	rreq = erofs_fscache_alloc_request(folio_mapping(folio),
230 				folio_pos(folio), folio_size(folio));
231 	if (IS_ERR(rreq)) {
232 		ret = PTR_ERR(rreq);
233 		goto out;
234 	}
235 
236 	return erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
237 				rreq, mdev.m_pa);
238 out:
239 	folio_unlock(folio);
240 	return ret;
241 }
242 
243 /*
244  * Read into page cache in the range described by (@pos, @len).
245  *
246  * On return, the caller is responsible for page unlocking if the output @unlock
247  * is true, or the callee will take this responsibility through netfs_io_request
248  * interface.
249  *
250  * The return value is the number of bytes successfully handled, or negative
251  * error code on failure. The only exception is that, the length of the range
252  * instead of the error code is returned on failure after netfs_io_request is
253  * allocated, so that .readahead() could advance rac accordingly.
254  */
255 static int erofs_fscache_data_read(struct address_space *mapping,
256 				   loff_t pos, size_t len, bool *unlock)
257 {
258 	struct inode *inode = mapping->host;
259 	struct super_block *sb = inode->i_sb;
260 	struct netfs_io_request *rreq;
261 	struct erofs_map_blocks map;
262 	struct erofs_map_dev mdev;
263 	struct iov_iter iter;
264 	size_t count;
265 	int ret;
266 
267 	*unlock = true;
268 
269 	map.m_la = pos;
270 	ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW);
271 	if (ret)
272 		return ret;
273 
274 	if (map.m_flags & EROFS_MAP_META) {
275 		struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
276 		erofs_blk_t blknr;
277 		size_t offset, size;
278 		void *src;
279 
280 		/* For tail packing layout, the offset may be non-zero. */
281 		offset = erofs_blkoff(map.m_pa);
282 		blknr = erofs_blknr(map.m_pa);
283 		size = map.m_llen;
284 
285 		src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
286 		if (IS_ERR(src))
287 			return PTR_ERR(src);
288 
289 		iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE);
290 		if (copy_to_iter(src + offset, size, &iter) != size) {
291 			erofs_put_metabuf(&buf);
292 			return -EFAULT;
293 		}
294 		iov_iter_zero(PAGE_SIZE - size, &iter);
295 		erofs_put_metabuf(&buf);
296 		return PAGE_SIZE;
297 	}
298 
299 	if (!(map.m_flags & EROFS_MAP_MAPPED)) {
300 		count = len;
301 		iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count);
302 		iov_iter_zero(count, &iter);
303 		return count;
304 	}
305 
306 	count = min_t(size_t, map.m_llen - (pos - map.m_la), len);
307 	DBG_BUGON(!count || count % PAGE_SIZE);
308 
309 	mdev = (struct erofs_map_dev) {
310 		.m_deviceid = map.m_deviceid,
311 		.m_pa = map.m_pa,
312 	};
313 	ret = erofs_map_dev(sb, &mdev);
314 	if (ret)
315 		return ret;
316 
317 	rreq = erofs_fscache_alloc_request(mapping, pos, count);
318 	if (IS_ERR(rreq))
319 		return PTR_ERR(rreq);
320 
321 	*unlock = false;
322 	erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
323 			rreq, mdev.m_pa + (pos - map.m_la));
324 	return count;
325 }
326 
327 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
328 {
329 	bool unlock;
330 	int ret;
331 
332 	DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ);
333 
334 	ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio),
335 				      folio_size(folio), &unlock);
336 	if (unlock) {
337 		if (ret > 0)
338 			folio_mark_uptodate(folio);
339 		folio_unlock(folio);
340 	}
341 	return ret < 0 ? ret : 0;
342 }
343 
344 static void erofs_fscache_readahead(struct readahead_control *rac)
345 {
346 	struct folio *folio;
347 	size_t len, done = 0;
348 	loff_t start, pos;
349 	bool unlock;
350 	int ret, size;
351 
352 	if (!readahead_count(rac))
353 		return;
354 
355 	start = readahead_pos(rac);
356 	len = readahead_length(rac);
357 
358 	do {
359 		pos = start + done;
360 		ret = erofs_fscache_data_read(rac->mapping, pos,
361 					      len - done, &unlock);
362 		if (ret <= 0)
363 			return;
364 
365 		size = ret;
366 		while (size) {
367 			folio = readahead_folio(rac);
368 			size -= folio_size(folio);
369 			if (unlock) {
370 				folio_mark_uptodate(folio);
371 				folio_unlock(folio);
372 			}
373 		}
374 	} while ((done += ret) < len);
375 }
376 
377 static const struct address_space_operations erofs_fscache_meta_aops = {
378 	.read_folio = erofs_fscache_meta_read_folio,
379 };
380 
381 const struct address_space_operations erofs_fscache_access_aops = {
382 	.read_folio = erofs_fscache_read_folio,
383 	.readahead = erofs_fscache_readahead,
384 };
385 
386 static void erofs_fscache_domain_put(struct erofs_domain *domain)
387 {
388 	if (!domain)
389 		return;
390 	mutex_lock(&erofs_domain_list_lock);
391 	if (refcount_dec_and_test(&domain->ref)) {
392 		list_del(&domain->list);
393 		if (list_empty(&erofs_domain_list)) {
394 			kern_unmount(erofs_pseudo_mnt);
395 			erofs_pseudo_mnt = NULL;
396 		}
397 		mutex_unlock(&erofs_domain_list_lock);
398 		fscache_relinquish_volume(domain->volume, NULL, false);
399 		kfree(domain->domain_id);
400 		kfree(domain);
401 		return;
402 	}
403 	mutex_unlock(&erofs_domain_list_lock);
404 }
405 
406 static int erofs_fscache_register_volume(struct super_block *sb)
407 {
408 	struct erofs_sb_info *sbi = EROFS_SB(sb);
409 	char *domain_id = sbi->domain_id;
410 	struct fscache_volume *volume;
411 	char *name;
412 	int ret = 0;
413 
414 	name = kasprintf(GFP_KERNEL, "erofs,%s",
415 			 domain_id ? domain_id : sbi->fsid);
416 	if (!name)
417 		return -ENOMEM;
418 
419 	volume = fscache_acquire_volume(name, NULL, NULL, 0);
420 	if (IS_ERR_OR_NULL(volume)) {
421 		erofs_err(sb, "failed to register volume for %s", name);
422 		ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
423 		volume = NULL;
424 	}
425 
426 	sbi->volume = volume;
427 	kfree(name);
428 	return ret;
429 }
430 
431 static int erofs_fscache_init_domain(struct super_block *sb)
432 {
433 	int err;
434 	struct erofs_domain *domain;
435 	struct erofs_sb_info *sbi = EROFS_SB(sb);
436 
437 	domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
438 	if (!domain)
439 		return -ENOMEM;
440 
441 	domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
442 	if (!domain->domain_id) {
443 		kfree(domain);
444 		return -ENOMEM;
445 	}
446 
447 	err = erofs_fscache_register_volume(sb);
448 	if (err)
449 		goto out;
450 
451 	if (!erofs_pseudo_mnt) {
452 		erofs_pseudo_mnt = kern_mount(&erofs_fs_type);
453 		if (IS_ERR(erofs_pseudo_mnt)) {
454 			err = PTR_ERR(erofs_pseudo_mnt);
455 			goto out;
456 		}
457 	}
458 
459 	domain->volume = sbi->volume;
460 	refcount_set(&domain->ref, 1);
461 	list_add(&domain->list, &erofs_domain_list);
462 	sbi->domain = domain;
463 	return 0;
464 out:
465 	kfree(domain->domain_id);
466 	kfree(domain);
467 	return err;
468 }
469 
470 static int erofs_fscache_register_domain(struct super_block *sb)
471 {
472 	int err;
473 	struct erofs_domain *domain;
474 	struct erofs_sb_info *sbi = EROFS_SB(sb);
475 
476 	mutex_lock(&erofs_domain_list_lock);
477 	list_for_each_entry(domain, &erofs_domain_list, list) {
478 		if (!strcmp(domain->domain_id, sbi->domain_id)) {
479 			sbi->domain = domain;
480 			sbi->volume = domain->volume;
481 			refcount_inc(&domain->ref);
482 			mutex_unlock(&erofs_domain_list_lock);
483 			return 0;
484 		}
485 	}
486 	err = erofs_fscache_init_domain(sb);
487 	mutex_unlock(&erofs_domain_list_lock);
488 	return err;
489 }
490 
491 static
492 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
493 						    char *name, bool need_inode)
494 {
495 	struct fscache_volume *volume = EROFS_SB(sb)->volume;
496 	struct erofs_fscache *ctx;
497 	struct fscache_cookie *cookie;
498 	int ret;
499 
500 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
501 	if (!ctx)
502 		return ERR_PTR(-ENOMEM);
503 
504 	cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
505 					name, strlen(name), NULL, 0, 0);
506 	if (!cookie) {
507 		erofs_err(sb, "failed to get cookie for %s", name);
508 		ret = -EINVAL;
509 		goto err;
510 	}
511 
512 	fscache_use_cookie(cookie, false);
513 	ctx->cookie = cookie;
514 
515 	if (need_inode) {
516 		struct inode *const inode = new_inode(sb);
517 
518 		if (!inode) {
519 			erofs_err(sb, "failed to get anon inode for %s", name);
520 			ret = -ENOMEM;
521 			goto err_cookie;
522 		}
523 
524 		set_nlink(inode, 1);
525 		inode->i_size = OFFSET_MAX;
526 		inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
527 		mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
528 
529 		ctx->inode = inode;
530 	}
531 
532 	return ctx;
533 
534 err_cookie:
535 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
536 	fscache_relinquish_cookie(ctx->cookie, false);
537 err:
538 	kfree(ctx);
539 	return ERR_PTR(ret);
540 }
541 
542 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
543 {
544 	fscache_unuse_cookie(ctx->cookie, NULL, NULL);
545 	fscache_relinquish_cookie(ctx->cookie, false);
546 	iput(ctx->inode);
547 	kfree(ctx->name);
548 	kfree(ctx);
549 }
550 
551 static
552 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb,
553 		char *name, bool need_inode)
554 {
555 	int err;
556 	struct inode *inode;
557 	struct erofs_fscache *ctx;
558 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
559 
560 	ctx = erofs_fscache_acquire_cookie(sb, name, need_inode);
561 	if (IS_ERR(ctx))
562 		return ctx;
563 
564 	ctx->name = kstrdup(name, GFP_KERNEL);
565 	if (!ctx->name) {
566 		err = -ENOMEM;
567 		goto out;
568 	}
569 
570 	inode = new_inode(erofs_pseudo_mnt->mnt_sb);
571 	if (!inode) {
572 		err = -ENOMEM;
573 		goto out;
574 	}
575 
576 	ctx->domain = domain;
577 	ctx->anon_inode = inode;
578 	inode->i_private = ctx;
579 	refcount_inc(&domain->ref);
580 	return ctx;
581 out:
582 	erofs_fscache_relinquish_cookie(ctx);
583 	return ERR_PTR(err);
584 }
585 
586 static
587 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
588 						   char *name, bool need_inode)
589 {
590 	struct inode *inode;
591 	struct erofs_fscache *ctx;
592 	struct erofs_domain *domain = EROFS_SB(sb)->domain;
593 	struct super_block *psb = erofs_pseudo_mnt->mnt_sb;
594 
595 	mutex_lock(&erofs_domain_cookies_lock);
596 	spin_lock(&psb->s_inode_list_lock);
597 	list_for_each_entry(inode, &psb->s_inodes, i_sb_list) {
598 		ctx = inode->i_private;
599 		if (!ctx || ctx->domain != domain || strcmp(ctx->name, name))
600 			continue;
601 		igrab(inode);
602 		spin_unlock(&psb->s_inode_list_lock);
603 		mutex_unlock(&erofs_domain_cookies_lock);
604 		return ctx;
605 	}
606 	spin_unlock(&psb->s_inode_list_lock);
607 	ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode);
608 	mutex_unlock(&erofs_domain_cookies_lock);
609 	return ctx;
610 }
611 
612 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
613 						    char *name, bool need_inode)
614 {
615 	if (EROFS_SB(sb)->domain_id)
616 		return erofs_domain_register_cookie(sb, name, need_inode);
617 	return erofs_fscache_acquire_cookie(sb, name, need_inode);
618 }
619 
620 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
621 {
622 	bool drop;
623 	struct erofs_domain *domain;
624 
625 	if (!ctx)
626 		return;
627 	domain = ctx->domain;
628 	if (domain) {
629 		mutex_lock(&erofs_domain_cookies_lock);
630 		drop = atomic_read(&ctx->anon_inode->i_count) == 1;
631 		iput(ctx->anon_inode);
632 		mutex_unlock(&erofs_domain_cookies_lock);
633 		if (!drop)
634 			return;
635 	}
636 
637 	erofs_fscache_relinquish_cookie(ctx);
638 	erofs_fscache_domain_put(domain);
639 }
640 
641 int erofs_fscache_register_fs(struct super_block *sb)
642 {
643 	int ret;
644 	struct erofs_sb_info *sbi = EROFS_SB(sb);
645 	struct erofs_fscache *fscache;
646 
647 	if (sbi->domain_id)
648 		ret = erofs_fscache_register_domain(sb);
649 	else
650 		ret = erofs_fscache_register_volume(sb);
651 	if (ret)
652 		return ret;
653 
654 	/* acquired domain/volume will be relinquished in kill_sb() on error */
655 	fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true);
656 	if (IS_ERR(fscache))
657 		return PTR_ERR(fscache);
658 
659 	sbi->s_fscache = fscache;
660 	return 0;
661 }
662 
663 void erofs_fscache_unregister_fs(struct super_block *sb)
664 {
665 	struct erofs_sb_info *sbi = EROFS_SB(sb);
666 
667 	erofs_fscache_unregister_cookie(sbi->s_fscache);
668 
669 	if (sbi->domain)
670 		erofs_fscache_domain_put(sbi->domain);
671 	else
672 		fscache_relinquish_volume(sbi->volume, NULL, false);
673 
674 	sbi->s_fscache = NULL;
675 	sbi->volume = NULL;
676 	sbi->domain = NULL;
677 }
678