1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * Copyright (C) 2022, Alibaba Cloud
4 * Copyright (C) 2022, Bytedance Inc. All rights reserved.
5 */
6 #include <linux/pseudo_fs.h>
7 #include <linux/fscache.h>
8 #include "internal.h"
9
10 static DEFINE_MUTEX(erofs_domain_list_lock);
11 static DEFINE_MUTEX(erofs_domain_cookies_lock);
12 static LIST_HEAD(erofs_domain_list);
13 static LIST_HEAD(erofs_domain_cookies_list);
14 static struct vfsmount *erofs_pseudo_mnt;
15
erofs_anon_init_fs_context(struct fs_context * fc)16 static int erofs_anon_init_fs_context(struct fs_context *fc)
17 {
18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM;
19 }
20
21 static struct file_system_type erofs_anon_fs_type = {
22 .owner = THIS_MODULE,
23 .name = "pseudo_erofs",
24 .init_fs_context = erofs_anon_init_fs_context,
25 .kill_sb = kill_anon_super,
26 };
27
28 struct erofs_fscache_request {
29 struct erofs_fscache_request *primary;
30 struct netfs_cache_resources cache_resources;
31 struct address_space *mapping; /* The mapping being accessed */
32 loff_t start; /* Start position */
33 size_t len; /* Length of the request */
34 size_t submitted; /* Length of submitted */
35 short error; /* 0 or error that occurred */
36 refcount_t ref;
37 };
38
erofs_fscache_req_alloc(struct address_space * mapping,loff_t start,size_t len)39 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping,
40 loff_t start, size_t len)
41 {
42 struct erofs_fscache_request *req;
43
44 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL);
45 if (!req)
46 return ERR_PTR(-ENOMEM);
47
48 req->mapping = mapping;
49 req->start = start;
50 req->len = len;
51 refcount_set(&req->ref, 1);
52
53 return req;
54 }
55
erofs_fscache_req_chain(struct erofs_fscache_request * primary,size_t len)56 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary,
57 size_t len)
58 {
59 struct erofs_fscache_request *req;
60
61 /* use primary request for the first submission */
62 if (!primary->submitted) {
63 refcount_inc(&primary->ref);
64 return primary;
65 }
66
67 req = erofs_fscache_req_alloc(primary->mapping,
68 primary->start + primary->submitted, len);
69 if (!IS_ERR(req)) {
70 req->primary = primary;
71 refcount_inc(&primary->ref);
72 }
73 return req;
74 }
75
erofs_fscache_req_complete(struct erofs_fscache_request * req)76 static void erofs_fscache_req_complete(struct erofs_fscache_request *req)
77 {
78 struct folio *folio;
79 bool failed = req->error;
80 pgoff_t start_page = req->start / PAGE_SIZE;
81 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1;
82
83 XA_STATE(xas, &req->mapping->i_pages, start_page);
84
85 rcu_read_lock();
86 xas_for_each(&xas, folio, last_page) {
87 if (xas_retry(&xas, folio))
88 continue;
89 if (!failed)
90 folio_mark_uptodate(folio);
91 folio_unlock(folio);
92 }
93 rcu_read_unlock();
94 }
95
erofs_fscache_req_put(struct erofs_fscache_request * req)96 static void erofs_fscache_req_put(struct erofs_fscache_request *req)
97 {
98 if (refcount_dec_and_test(&req->ref)) {
99 if (req->cache_resources.ops)
100 req->cache_resources.ops->end_operation(&req->cache_resources);
101 if (!req->primary)
102 erofs_fscache_req_complete(req);
103 else
104 erofs_fscache_req_put(req->primary);
105 kfree(req);
106 }
107 }
108
erofs_fscache_subreq_complete(void * priv,ssize_t transferred_or_error,bool was_async)109 static void erofs_fscache_subreq_complete(void *priv,
110 ssize_t transferred_or_error, bool was_async)
111 {
112 struct erofs_fscache_request *req = priv;
113
114 if (IS_ERR_VALUE(transferred_or_error)) {
115 if (req->primary)
116 req->primary->error = transferred_or_error;
117 else
118 req->error = transferred_or_error;
119 }
120 erofs_fscache_req_put(req);
121 }
122
123 /*
124 * Read data from fscache (cookie, pstart, len), and fill the read data into
125 * page cache described by (req->mapping, lstart, len). @pstart describeis the
126 * start physical address in the cache file.
127 */
erofs_fscache_read_folios_async(struct fscache_cookie * cookie,struct erofs_fscache_request * req,loff_t pstart,size_t len)128 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie,
129 struct erofs_fscache_request *req, loff_t pstart, size_t len)
130 {
131 enum netfs_io_source source;
132 struct super_block *sb = req->mapping->host->i_sb;
133 struct netfs_cache_resources *cres = &req->cache_resources;
134 struct iov_iter iter;
135 loff_t lstart = req->start + req->submitted;
136 size_t done = 0;
137 int ret;
138
139 DBG_BUGON(len > req->len - req->submitted);
140
141 ret = fscache_begin_read_operation(cres, cookie);
142 if (ret)
143 return ret;
144
145 while (done < len) {
146 loff_t sstart = pstart + done;
147 size_t slen = len - done;
148 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND;
149
150 source = cres->ops->prepare_ondemand_read(cres,
151 sstart, &slen, LLONG_MAX, &flags, 0);
152 if (WARN_ON(slen == 0))
153 source = NETFS_INVALID_READ;
154 if (source != NETFS_READ_FROM_CACHE) {
155 erofs_err(sb, "failed to fscache prepare_read (source %d)", source);
156 return -EIO;
157 }
158
159 refcount_inc(&req->ref);
160 iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages,
161 lstart + done, slen);
162
163 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL,
164 erofs_fscache_subreq_complete, req);
165 if (ret == -EIOCBQUEUED)
166 ret = 0;
167 if (ret) {
168 erofs_err(sb, "failed to fscache_read (ret %d)", ret);
169 return ret;
170 }
171
172 done += slen;
173 }
174 DBG_BUGON(done != len);
175 return 0;
176 }
177
erofs_fscache_meta_read_folio(struct file * data,struct folio * folio)178 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio)
179 {
180 int ret;
181 struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private;
182 struct erofs_fscache_request *req;
183
184 req = erofs_fscache_req_alloc(folio_mapping(folio),
185 folio_pos(folio), folio_size(folio));
186 if (IS_ERR(req)) {
187 folio_unlock(folio);
188 return PTR_ERR(req);
189 }
190
191 ret = erofs_fscache_read_folios_async(ctx->cookie, req,
192 folio_pos(folio), folio_size(folio));
193 if (ret)
194 req->error = ret;
195
196 erofs_fscache_req_put(req);
197 return ret;
198 }
199
erofs_fscache_data_read_slice(struct erofs_fscache_request * primary)200 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary)
201 {
202 struct address_space *mapping = primary->mapping;
203 struct inode *inode = mapping->host;
204 struct super_block *sb = inode->i_sb;
205 struct erofs_fscache_request *req;
206 struct erofs_map_blocks map;
207 struct erofs_map_dev mdev;
208 struct iov_iter iter;
209 loff_t pos = primary->start + primary->submitted;
210 size_t count;
211 int ret;
212
213 map.m_la = pos;
214 ret = erofs_map_blocks(inode, &map);
215 if (ret)
216 return ret;
217
218 if (map.m_flags & EROFS_MAP_META) {
219 struct erofs_buf buf = __EROFS_BUF_INITIALIZER;
220 erofs_blk_t blknr;
221 size_t offset, size;
222 void *src;
223
224 /* For tail packing layout, the offset may be non-zero. */
225 offset = erofs_blkoff(sb, map.m_pa);
226 blknr = erofs_blknr(sb, map.m_pa);
227 size = map.m_llen;
228
229 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP);
230 if (IS_ERR(src))
231 return PTR_ERR(src);
232
233 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE);
234 if (copy_to_iter(src + offset, size, &iter) != size) {
235 erofs_put_metabuf(&buf);
236 return -EFAULT;
237 }
238 iov_iter_zero(PAGE_SIZE - size, &iter);
239 erofs_put_metabuf(&buf);
240 primary->submitted += PAGE_SIZE;
241 return 0;
242 }
243
244 count = primary->len - primary->submitted;
245 if (!(map.m_flags & EROFS_MAP_MAPPED)) {
246 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count);
247 iov_iter_zero(count, &iter);
248 primary->submitted += count;
249 return 0;
250 }
251
252 count = min_t(size_t, map.m_llen - (pos - map.m_la), count);
253 DBG_BUGON(!count || count % PAGE_SIZE);
254
255 mdev = (struct erofs_map_dev) {
256 .m_deviceid = map.m_deviceid,
257 .m_pa = map.m_pa,
258 };
259 ret = erofs_map_dev(sb, &mdev);
260 if (ret)
261 return ret;
262
263 req = erofs_fscache_req_chain(primary, count);
264 if (IS_ERR(req))
265 return PTR_ERR(req);
266
267 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie,
268 req, mdev.m_pa + (pos - map.m_la), count);
269 erofs_fscache_req_put(req);
270 primary->submitted += count;
271 return ret;
272 }
273
erofs_fscache_data_read(struct erofs_fscache_request * req)274 static int erofs_fscache_data_read(struct erofs_fscache_request *req)
275 {
276 int ret;
277
278 do {
279 ret = erofs_fscache_data_read_slice(req);
280 if (ret)
281 req->error = ret;
282 } while (!ret && req->submitted < req->len);
283
284 return ret;
285 }
286
erofs_fscache_read_folio(struct file * file,struct folio * folio)287 static int erofs_fscache_read_folio(struct file *file, struct folio *folio)
288 {
289 struct erofs_fscache_request *req;
290 int ret;
291
292 req = erofs_fscache_req_alloc(folio_mapping(folio),
293 folio_pos(folio), folio_size(folio));
294 if (IS_ERR(req)) {
295 folio_unlock(folio);
296 return PTR_ERR(req);
297 }
298
299 ret = erofs_fscache_data_read(req);
300 erofs_fscache_req_put(req);
301 return ret;
302 }
303
erofs_fscache_readahead(struct readahead_control * rac)304 static void erofs_fscache_readahead(struct readahead_control *rac)
305 {
306 struct erofs_fscache_request *req;
307
308 if (!readahead_count(rac))
309 return;
310
311 req = erofs_fscache_req_alloc(rac->mapping,
312 readahead_pos(rac), readahead_length(rac));
313 if (IS_ERR(req))
314 return;
315
316 /* The request completion will drop refs on the folios. */
317 while (readahead_folio(rac))
318 ;
319
320 erofs_fscache_data_read(req);
321 erofs_fscache_req_put(req);
322 }
323
324 static const struct address_space_operations erofs_fscache_meta_aops = {
325 .read_folio = erofs_fscache_meta_read_folio,
326 };
327
328 const struct address_space_operations erofs_fscache_access_aops = {
329 .read_folio = erofs_fscache_read_folio,
330 .readahead = erofs_fscache_readahead,
331 };
332
erofs_fscache_domain_put(struct erofs_domain * domain)333 static void erofs_fscache_domain_put(struct erofs_domain *domain)
334 {
335 mutex_lock(&erofs_domain_list_lock);
336 if (refcount_dec_and_test(&domain->ref)) {
337 list_del(&domain->list);
338 if (list_empty(&erofs_domain_list)) {
339 kern_unmount(erofs_pseudo_mnt);
340 erofs_pseudo_mnt = NULL;
341 }
342 fscache_relinquish_volume(domain->volume, NULL, false);
343 mutex_unlock(&erofs_domain_list_lock);
344 kfree(domain->domain_id);
345 kfree(domain);
346 return;
347 }
348 mutex_unlock(&erofs_domain_list_lock);
349 }
350
erofs_fscache_register_volume(struct super_block * sb)351 static int erofs_fscache_register_volume(struct super_block *sb)
352 {
353 struct erofs_sb_info *sbi = EROFS_SB(sb);
354 char *domain_id = sbi->domain_id;
355 struct fscache_volume *volume;
356 char *name;
357 int ret = 0;
358
359 name = kasprintf(GFP_KERNEL, "erofs,%s",
360 domain_id ? domain_id : sbi->fsid);
361 if (!name)
362 return -ENOMEM;
363
364 volume = fscache_acquire_volume(name, NULL, NULL, 0);
365 if (IS_ERR_OR_NULL(volume)) {
366 erofs_err(sb, "failed to register volume for %s", name);
367 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP;
368 volume = NULL;
369 }
370
371 sbi->volume = volume;
372 kfree(name);
373 return ret;
374 }
375
erofs_fscache_init_domain(struct super_block * sb)376 static int erofs_fscache_init_domain(struct super_block *sb)
377 {
378 int err;
379 struct erofs_domain *domain;
380 struct erofs_sb_info *sbi = EROFS_SB(sb);
381
382 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL);
383 if (!domain)
384 return -ENOMEM;
385
386 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL);
387 if (!domain->domain_id) {
388 kfree(domain);
389 return -ENOMEM;
390 }
391
392 err = erofs_fscache_register_volume(sb);
393 if (err)
394 goto out;
395
396 if (!erofs_pseudo_mnt) {
397 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type);
398 if (IS_ERR(mnt)) {
399 err = PTR_ERR(mnt);
400 goto out;
401 }
402 erofs_pseudo_mnt = mnt;
403 }
404
405 domain->volume = sbi->volume;
406 refcount_set(&domain->ref, 1);
407 list_add(&domain->list, &erofs_domain_list);
408 sbi->domain = domain;
409 return 0;
410 out:
411 kfree(domain->domain_id);
412 kfree(domain);
413 return err;
414 }
415
erofs_fscache_register_domain(struct super_block * sb)416 static int erofs_fscache_register_domain(struct super_block *sb)
417 {
418 int err;
419 struct erofs_domain *domain;
420 struct erofs_sb_info *sbi = EROFS_SB(sb);
421
422 mutex_lock(&erofs_domain_list_lock);
423 list_for_each_entry(domain, &erofs_domain_list, list) {
424 if (!strcmp(domain->domain_id, sbi->domain_id)) {
425 sbi->domain = domain;
426 sbi->volume = domain->volume;
427 refcount_inc(&domain->ref);
428 mutex_unlock(&erofs_domain_list_lock);
429 return 0;
430 }
431 }
432 err = erofs_fscache_init_domain(sb);
433 mutex_unlock(&erofs_domain_list_lock);
434 return err;
435 }
436
erofs_fscache_acquire_cookie(struct super_block * sb,char * name,unsigned int flags)437 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb,
438 char *name, unsigned int flags)
439 {
440 struct fscache_volume *volume = EROFS_SB(sb)->volume;
441 struct erofs_fscache *ctx;
442 struct fscache_cookie *cookie;
443 struct super_block *isb;
444 struct inode *inode;
445 int ret;
446
447 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
448 if (!ctx)
449 return ERR_PTR(-ENOMEM);
450 INIT_LIST_HEAD(&ctx->node);
451 refcount_set(&ctx->ref, 1);
452
453 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE,
454 name, strlen(name), NULL, 0, 0);
455 if (!cookie) {
456 erofs_err(sb, "failed to get cookie for %s", name);
457 ret = -EINVAL;
458 goto err;
459 }
460 fscache_use_cookie(cookie, false);
461
462 /*
463 * Allocate anonymous inode in global pseudo mount for shareable blobs,
464 * so that they are accessible among erofs fs instances.
465 */
466 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb;
467 inode = new_inode(isb);
468 if (!inode) {
469 erofs_err(sb, "failed to get anon inode for %s", name);
470 ret = -ENOMEM;
471 goto err_cookie;
472 }
473
474 inode->i_size = OFFSET_MAX;
475 inode->i_mapping->a_ops = &erofs_fscache_meta_aops;
476 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
477 inode->i_blkbits = EROFS_SB(sb)->blkszbits;
478 inode->i_private = ctx;
479
480 ctx->cookie = cookie;
481 ctx->inode = inode;
482 return ctx;
483
484 err_cookie:
485 fscache_unuse_cookie(cookie, NULL, NULL);
486 fscache_relinquish_cookie(cookie, false);
487 err:
488 kfree(ctx);
489 return ERR_PTR(ret);
490 }
491
erofs_fscache_relinquish_cookie(struct erofs_fscache * ctx)492 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx)
493 {
494 fscache_unuse_cookie(ctx->cookie, NULL, NULL);
495 fscache_relinquish_cookie(ctx->cookie, false);
496 iput(ctx->inode);
497 kfree(ctx->name);
498 kfree(ctx);
499 }
500
erofs_domain_init_cookie(struct super_block * sb,char * name,unsigned int flags)501 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb,
502 char *name, unsigned int flags)
503 {
504 struct erofs_fscache *ctx;
505 struct erofs_domain *domain = EROFS_SB(sb)->domain;
506
507 ctx = erofs_fscache_acquire_cookie(sb, name, flags);
508 if (IS_ERR(ctx))
509 return ctx;
510
511 ctx->name = kstrdup(name, GFP_KERNEL);
512 if (!ctx->name) {
513 erofs_fscache_relinquish_cookie(ctx);
514 return ERR_PTR(-ENOMEM);
515 }
516
517 refcount_inc(&domain->ref);
518 ctx->domain = domain;
519 list_add(&ctx->node, &erofs_domain_cookies_list);
520 return ctx;
521 }
522
erofs_domain_register_cookie(struct super_block * sb,char * name,unsigned int flags)523 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb,
524 char *name, unsigned int flags)
525 {
526 struct erofs_fscache *ctx;
527 struct erofs_domain *domain = EROFS_SB(sb)->domain;
528
529 flags |= EROFS_REG_COOKIE_SHARE;
530 mutex_lock(&erofs_domain_cookies_lock);
531 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) {
532 if (ctx->domain != domain || strcmp(ctx->name, name))
533 continue;
534 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) {
535 refcount_inc(&ctx->ref);
536 } else {
537 erofs_err(sb, "%s already exists in domain %s", name,
538 domain->domain_id);
539 ctx = ERR_PTR(-EEXIST);
540 }
541 mutex_unlock(&erofs_domain_cookies_lock);
542 return ctx;
543 }
544 ctx = erofs_domain_init_cookie(sb, name, flags);
545 mutex_unlock(&erofs_domain_cookies_lock);
546 return ctx;
547 }
548
erofs_fscache_register_cookie(struct super_block * sb,char * name,unsigned int flags)549 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb,
550 char *name,
551 unsigned int flags)
552 {
553 if (EROFS_SB(sb)->domain_id)
554 return erofs_domain_register_cookie(sb, name, flags);
555 return erofs_fscache_acquire_cookie(sb, name, flags);
556 }
557
erofs_fscache_unregister_cookie(struct erofs_fscache * ctx)558 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx)
559 {
560 struct erofs_domain *domain = NULL;
561
562 if (!ctx)
563 return;
564 if (!ctx->domain)
565 return erofs_fscache_relinquish_cookie(ctx);
566
567 mutex_lock(&erofs_domain_cookies_lock);
568 if (refcount_dec_and_test(&ctx->ref)) {
569 domain = ctx->domain;
570 list_del(&ctx->node);
571 erofs_fscache_relinquish_cookie(ctx);
572 }
573 mutex_unlock(&erofs_domain_cookies_lock);
574 if (domain)
575 erofs_fscache_domain_put(domain);
576 }
577
erofs_fscache_register_fs(struct super_block * sb)578 int erofs_fscache_register_fs(struct super_block *sb)
579 {
580 int ret;
581 struct erofs_sb_info *sbi = EROFS_SB(sb);
582 struct erofs_fscache *fscache;
583 unsigned int flags = 0;
584
585 if (sbi->domain_id)
586 ret = erofs_fscache_register_domain(sb);
587 else
588 ret = erofs_fscache_register_volume(sb);
589 if (ret)
590 return ret;
591
592 /*
593 * When shared domain is enabled, using NEED_NOEXIST to guarantee
594 * the primary data blob (aka fsid) is unique in the shared domain.
595 *
596 * For non-shared-domain case, fscache_acquire_volume() invoked by
597 * erofs_fscache_register_volume() has already guaranteed
598 * the uniqueness of primary data blob.
599 *
600 * Acquired domain/volume will be relinquished in kill_sb() on error.
601 */
602 if (sbi->domain_id)
603 flags |= EROFS_REG_COOKIE_NEED_NOEXIST;
604 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags);
605 if (IS_ERR(fscache))
606 return PTR_ERR(fscache);
607
608 sbi->s_fscache = fscache;
609 return 0;
610 }
611
erofs_fscache_unregister_fs(struct super_block * sb)612 void erofs_fscache_unregister_fs(struct super_block *sb)
613 {
614 struct erofs_sb_info *sbi = EROFS_SB(sb);
615
616 erofs_fscache_unregister_cookie(sbi->s_fscache);
617
618 if (sbi->domain)
619 erofs_fscache_domain_put(sbi->domain);
620 else
621 fscache_relinquish_volume(sbi->volume, NULL, false);
622
623 sbi->s_fscache = NULL;
624 sbi->volume = NULL;
625 sbi->domain = NULL;
626 }
627