1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/pseudo_fs.h> 7 #include <linux/fscache.h> 8 #include "internal.h" 9 10 static DEFINE_MUTEX(erofs_domain_list_lock); 11 static DEFINE_MUTEX(erofs_domain_cookies_lock); 12 static LIST_HEAD(erofs_domain_list); 13 static LIST_HEAD(erofs_domain_cookies_list); 14 static struct vfsmount *erofs_pseudo_mnt; 15 16 static int erofs_anon_init_fs_context(struct fs_context *fc) 17 { 18 return init_pseudo(fc, EROFS_SUPER_MAGIC) ? 0 : -ENOMEM; 19 } 20 21 static struct file_system_type erofs_anon_fs_type = { 22 .owner = THIS_MODULE, 23 .name = "pseudo_erofs", 24 .init_fs_context = erofs_anon_init_fs_context, 25 .kill_sb = kill_anon_super, 26 }; 27 28 struct erofs_fscache_request { 29 struct erofs_fscache_request *primary; 30 struct netfs_cache_resources cache_resources; 31 struct address_space *mapping; /* The mapping being accessed */ 32 loff_t start; /* Start position */ 33 size_t len; /* Length of the request */ 34 size_t submitted; /* Length of submitted */ 35 short error; /* 0 or error that occurred */ 36 refcount_t ref; 37 }; 38 39 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, 40 loff_t start, size_t len) 41 { 42 struct erofs_fscache_request *req; 43 44 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); 45 if (!req) 46 return ERR_PTR(-ENOMEM); 47 48 req->mapping = mapping; 49 req->start = start; 50 req->len = len; 51 refcount_set(&req->ref, 1); 52 53 return req; 54 } 55 56 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary, 57 size_t len) 58 { 59 struct erofs_fscache_request *req; 60 61 /* use primary request for the first submission */ 62 if (!primary->submitted) { 63 refcount_inc(&primary->ref); 64 return primary; 65 } 66 67 req = erofs_fscache_req_alloc(primary->mapping, 68 primary->start + primary->submitted, len); 69 if (!IS_ERR(req)) { 70 req->primary = primary; 71 refcount_inc(&primary->ref); 72 } 73 return req; 74 } 75 76 static void erofs_fscache_req_complete(struct erofs_fscache_request *req) 77 { 78 struct folio *folio; 79 bool failed = req->error; 80 pgoff_t start_page = req->start / PAGE_SIZE; 81 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 82 83 XA_STATE(xas, &req->mapping->i_pages, start_page); 84 85 rcu_read_lock(); 86 xas_for_each(&xas, folio, last_page) { 87 if (xas_retry(&xas, folio)) 88 continue; 89 if (!failed) 90 folio_mark_uptodate(folio); 91 folio_unlock(folio); 92 } 93 rcu_read_unlock(); 94 } 95 96 static void erofs_fscache_req_put(struct erofs_fscache_request *req) 97 { 98 if (refcount_dec_and_test(&req->ref)) { 99 if (req->cache_resources.ops) 100 req->cache_resources.ops->end_operation(&req->cache_resources); 101 if (!req->primary) 102 erofs_fscache_req_complete(req); 103 else 104 erofs_fscache_req_put(req->primary); 105 kfree(req); 106 } 107 } 108 109 static void erofs_fscache_subreq_complete(void *priv, 110 ssize_t transferred_or_error, bool was_async) 111 { 112 struct erofs_fscache_request *req = priv; 113 114 if (IS_ERR_VALUE(transferred_or_error)) { 115 if (req->primary) 116 req->primary->error = transferred_or_error; 117 else 118 req->error = transferred_or_error; 119 } 120 erofs_fscache_req_put(req); 121 } 122 123 /* 124 * Read data from fscache (cookie, pstart, len), and fill the read data into 125 * page cache described by (req->mapping, lstart, len). @pstart describeis the 126 * start physical address in the cache file. 127 */ 128 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, 129 struct erofs_fscache_request *req, loff_t pstart, size_t len) 130 { 131 enum netfs_io_source source; 132 struct super_block *sb = req->mapping->host->i_sb; 133 struct netfs_cache_resources *cres = &req->cache_resources; 134 struct iov_iter iter; 135 loff_t lstart = req->start + req->submitted; 136 size_t done = 0; 137 int ret; 138 139 DBG_BUGON(len > req->len - req->submitted); 140 141 ret = fscache_begin_read_operation(cres, cookie); 142 if (ret) 143 return ret; 144 145 while (done < len) { 146 loff_t sstart = pstart + done; 147 size_t slen = len - done; 148 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 149 150 source = cres->ops->prepare_ondemand_read(cres, 151 sstart, &slen, LLONG_MAX, &flags, 0); 152 if (WARN_ON(slen == 0)) 153 source = NETFS_INVALID_READ; 154 if (source != NETFS_READ_FROM_CACHE) { 155 erofs_err(sb, "failed to fscache prepare_read (source %d)", source); 156 return -EIO; 157 } 158 159 refcount_inc(&req->ref); 160 iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, 161 lstart + done, slen); 162 163 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, 164 erofs_fscache_subreq_complete, req); 165 if (ret == -EIOCBQUEUED) 166 ret = 0; 167 if (ret) { 168 erofs_err(sb, "failed to fscache_read (ret %d)", ret); 169 return ret; 170 } 171 172 done += slen; 173 } 174 DBG_BUGON(done != len); 175 return 0; 176 } 177 178 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 179 { 180 int ret; 181 struct erofs_fscache *ctx = folio_mapping(folio)->host->i_private; 182 struct erofs_fscache_request *req; 183 184 req = erofs_fscache_req_alloc(folio_mapping(folio), 185 folio_pos(folio), folio_size(folio)); 186 if (IS_ERR(req)) { 187 folio_unlock(folio); 188 return PTR_ERR(req); 189 } 190 191 ret = erofs_fscache_read_folios_async(ctx->cookie, req, 192 folio_pos(folio), folio_size(folio)); 193 if (ret) 194 req->error = ret; 195 196 erofs_fscache_req_put(req); 197 return ret; 198 } 199 200 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) 201 { 202 struct address_space *mapping = primary->mapping; 203 struct inode *inode = mapping->host; 204 struct super_block *sb = inode->i_sb; 205 struct erofs_fscache_request *req; 206 struct erofs_map_blocks map; 207 struct erofs_map_dev mdev; 208 struct iov_iter iter; 209 loff_t pos = primary->start + primary->submitted; 210 size_t count; 211 int ret; 212 213 map.m_la = pos; 214 ret = erofs_map_blocks(inode, &map); 215 if (ret) 216 return ret; 217 218 if (map.m_flags & EROFS_MAP_META) { 219 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 220 erofs_blk_t blknr; 221 size_t offset, size; 222 void *src; 223 224 /* For tail packing layout, the offset may be non-zero. */ 225 offset = erofs_blkoff(sb, map.m_pa); 226 blknr = erofs_blknr(sb, map.m_pa); 227 size = map.m_llen; 228 229 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 230 if (IS_ERR(src)) 231 return PTR_ERR(src); 232 233 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 234 if (copy_to_iter(src + offset, size, &iter) != size) { 235 erofs_put_metabuf(&buf); 236 return -EFAULT; 237 } 238 iov_iter_zero(PAGE_SIZE - size, &iter); 239 erofs_put_metabuf(&buf); 240 primary->submitted += PAGE_SIZE; 241 return 0; 242 } 243 244 count = primary->len - primary->submitted; 245 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 246 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 247 iov_iter_zero(count, &iter); 248 primary->submitted += count; 249 return 0; 250 } 251 252 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 253 DBG_BUGON(!count || count % PAGE_SIZE); 254 255 mdev = (struct erofs_map_dev) { 256 .m_deviceid = map.m_deviceid, 257 .m_pa = map.m_pa, 258 }; 259 ret = erofs_map_dev(sb, &mdev); 260 if (ret) 261 return ret; 262 263 req = erofs_fscache_req_chain(primary, count); 264 if (IS_ERR(req)) 265 return PTR_ERR(req); 266 267 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 268 req, mdev.m_pa + (pos - map.m_la), count); 269 erofs_fscache_req_put(req); 270 primary->submitted += count; 271 return ret; 272 } 273 274 static int erofs_fscache_data_read(struct erofs_fscache_request *req) 275 { 276 int ret; 277 278 do { 279 ret = erofs_fscache_data_read_slice(req); 280 if (ret) 281 req->error = ret; 282 } while (!ret && req->submitted < req->len); 283 284 return ret; 285 } 286 287 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 288 { 289 struct erofs_fscache_request *req; 290 int ret; 291 292 req = erofs_fscache_req_alloc(folio_mapping(folio), 293 folio_pos(folio), folio_size(folio)); 294 if (IS_ERR(req)) { 295 folio_unlock(folio); 296 return PTR_ERR(req); 297 } 298 299 ret = erofs_fscache_data_read(req); 300 erofs_fscache_req_put(req); 301 return ret; 302 } 303 304 static void erofs_fscache_readahead(struct readahead_control *rac) 305 { 306 struct erofs_fscache_request *req; 307 308 if (!readahead_count(rac)) 309 return; 310 311 req = erofs_fscache_req_alloc(rac->mapping, 312 readahead_pos(rac), readahead_length(rac)); 313 if (IS_ERR(req)) 314 return; 315 316 /* The request completion will drop refs on the folios. */ 317 while (readahead_folio(rac)) 318 ; 319 320 erofs_fscache_data_read(req); 321 erofs_fscache_req_put(req); 322 } 323 324 static const struct address_space_operations erofs_fscache_meta_aops = { 325 .read_folio = erofs_fscache_meta_read_folio, 326 }; 327 328 const struct address_space_operations erofs_fscache_access_aops = { 329 .read_folio = erofs_fscache_read_folio, 330 .readahead = erofs_fscache_readahead, 331 }; 332 333 static void erofs_fscache_domain_put(struct erofs_domain *domain) 334 { 335 mutex_lock(&erofs_domain_list_lock); 336 if (refcount_dec_and_test(&domain->ref)) { 337 list_del(&domain->list); 338 if (list_empty(&erofs_domain_list)) { 339 kern_unmount(erofs_pseudo_mnt); 340 erofs_pseudo_mnt = NULL; 341 } 342 fscache_relinquish_volume(domain->volume, NULL, false); 343 mutex_unlock(&erofs_domain_list_lock); 344 kfree(domain->domain_id); 345 kfree(domain); 346 return; 347 } 348 mutex_unlock(&erofs_domain_list_lock); 349 } 350 351 static int erofs_fscache_register_volume(struct super_block *sb) 352 { 353 struct erofs_sb_info *sbi = EROFS_SB(sb); 354 char *domain_id = sbi->domain_id; 355 struct fscache_volume *volume; 356 char *name; 357 int ret = 0; 358 359 name = kasprintf(GFP_KERNEL, "erofs,%s", 360 domain_id ? domain_id : sbi->fsid); 361 if (!name) 362 return -ENOMEM; 363 364 volume = fscache_acquire_volume(name, NULL, NULL, 0); 365 if (IS_ERR_OR_NULL(volume)) { 366 erofs_err(sb, "failed to register volume for %s", name); 367 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 368 volume = NULL; 369 } 370 371 sbi->volume = volume; 372 kfree(name); 373 return ret; 374 } 375 376 static int erofs_fscache_init_domain(struct super_block *sb) 377 { 378 int err; 379 struct erofs_domain *domain; 380 struct erofs_sb_info *sbi = EROFS_SB(sb); 381 382 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 383 if (!domain) 384 return -ENOMEM; 385 386 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 387 if (!domain->domain_id) { 388 kfree(domain); 389 return -ENOMEM; 390 } 391 392 err = erofs_fscache_register_volume(sb); 393 if (err) 394 goto out; 395 396 if (!erofs_pseudo_mnt) { 397 struct vfsmount *mnt = kern_mount(&erofs_anon_fs_type); 398 if (IS_ERR(mnt)) { 399 err = PTR_ERR(mnt); 400 goto out; 401 } 402 erofs_pseudo_mnt = mnt; 403 } 404 405 domain->volume = sbi->volume; 406 refcount_set(&domain->ref, 1); 407 list_add(&domain->list, &erofs_domain_list); 408 sbi->domain = domain; 409 return 0; 410 out: 411 kfree(domain->domain_id); 412 kfree(domain); 413 return err; 414 } 415 416 static int erofs_fscache_register_domain(struct super_block *sb) 417 { 418 int err; 419 struct erofs_domain *domain; 420 struct erofs_sb_info *sbi = EROFS_SB(sb); 421 422 mutex_lock(&erofs_domain_list_lock); 423 list_for_each_entry(domain, &erofs_domain_list, list) { 424 if (!strcmp(domain->domain_id, sbi->domain_id)) { 425 sbi->domain = domain; 426 sbi->volume = domain->volume; 427 refcount_inc(&domain->ref); 428 mutex_unlock(&erofs_domain_list_lock); 429 return 0; 430 } 431 } 432 err = erofs_fscache_init_domain(sb); 433 mutex_unlock(&erofs_domain_list_lock); 434 return err; 435 } 436 437 static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 438 char *name, unsigned int flags) 439 { 440 struct fscache_volume *volume = EROFS_SB(sb)->volume; 441 struct erofs_fscache *ctx; 442 struct fscache_cookie *cookie; 443 struct super_block *isb; 444 struct inode *inode; 445 int ret; 446 447 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 448 if (!ctx) 449 return ERR_PTR(-ENOMEM); 450 INIT_LIST_HEAD(&ctx->node); 451 refcount_set(&ctx->ref, 1); 452 453 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 454 name, strlen(name), NULL, 0, 0); 455 if (!cookie) { 456 erofs_err(sb, "failed to get cookie for %s", name); 457 ret = -EINVAL; 458 goto err; 459 } 460 fscache_use_cookie(cookie, false); 461 462 /* 463 * Allocate anonymous inode in global pseudo mount for shareable blobs, 464 * so that they are accessible among erofs fs instances. 465 */ 466 isb = flags & EROFS_REG_COOKIE_SHARE ? erofs_pseudo_mnt->mnt_sb : sb; 467 inode = new_inode(isb); 468 if (!inode) { 469 erofs_err(sb, "failed to get anon inode for %s", name); 470 ret = -ENOMEM; 471 goto err_cookie; 472 } 473 474 inode->i_size = OFFSET_MAX; 475 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 476 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 477 inode->i_blkbits = EROFS_SB(sb)->blkszbits; 478 inode->i_private = ctx; 479 480 ctx->cookie = cookie; 481 ctx->inode = inode; 482 return ctx; 483 484 err_cookie: 485 fscache_unuse_cookie(cookie, NULL, NULL); 486 fscache_relinquish_cookie(cookie, false); 487 err: 488 kfree(ctx); 489 return ERR_PTR(ret); 490 } 491 492 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 493 { 494 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 495 fscache_relinquish_cookie(ctx->cookie, false); 496 iput(ctx->inode); 497 kfree(ctx->name); 498 kfree(ctx); 499 } 500 501 static struct erofs_fscache *erofs_domain_init_cookie(struct super_block *sb, 502 char *name, unsigned int flags) 503 { 504 struct erofs_fscache *ctx; 505 struct erofs_domain *domain = EROFS_SB(sb)->domain; 506 507 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 508 if (IS_ERR(ctx)) 509 return ctx; 510 511 ctx->name = kstrdup(name, GFP_KERNEL); 512 if (!ctx->name) { 513 erofs_fscache_relinquish_cookie(ctx); 514 return ERR_PTR(-ENOMEM); 515 } 516 517 refcount_inc(&domain->ref); 518 ctx->domain = domain; 519 list_add(&ctx->node, &erofs_domain_cookies_list); 520 return ctx; 521 } 522 523 static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 524 char *name, unsigned int flags) 525 { 526 struct erofs_fscache *ctx; 527 struct erofs_domain *domain = EROFS_SB(sb)->domain; 528 529 flags |= EROFS_REG_COOKIE_SHARE; 530 mutex_lock(&erofs_domain_cookies_lock); 531 list_for_each_entry(ctx, &erofs_domain_cookies_list, node) { 532 if (ctx->domain != domain || strcmp(ctx->name, name)) 533 continue; 534 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 535 refcount_inc(&ctx->ref); 536 } else { 537 erofs_err(sb, "%s already exists in domain %s", name, 538 domain->domain_id); 539 ctx = ERR_PTR(-EEXIST); 540 } 541 mutex_unlock(&erofs_domain_cookies_lock); 542 return ctx; 543 } 544 ctx = erofs_domain_init_cookie(sb, name, flags); 545 mutex_unlock(&erofs_domain_cookies_lock); 546 return ctx; 547 } 548 549 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 550 char *name, 551 unsigned int flags) 552 { 553 if (EROFS_SB(sb)->domain_id) 554 return erofs_domain_register_cookie(sb, name, flags); 555 return erofs_fscache_acquire_cookie(sb, name, flags); 556 } 557 558 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 559 { 560 struct erofs_domain *domain = NULL; 561 562 if (!ctx) 563 return; 564 if (!ctx->domain) 565 return erofs_fscache_relinquish_cookie(ctx); 566 567 mutex_lock(&erofs_domain_cookies_lock); 568 if (refcount_dec_and_test(&ctx->ref)) { 569 domain = ctx->domain; 570 list_del(&ctx->node); 571 erofs_fscache_relinquish_cookie(ctx); 572 } 573 mutex_unlock(&erofs_domain_cookies_lock); 574 if (domain) 575 erofs_fscache_domain_put(domain); 576 } 577 578 int erofs_fscache_register_fs(struct super_block *sb) 579 { 580 int ret; 581 struct erofs_sb_info *sbi = EROFS_SB(sb); 582 struct erofs_fscache *fscache; 583 unsigned int flags = 0; 584 585 if (sbi->domain_id) 586 ret = erofs_fscache_register_domain(sb); 587 else 588 ret = erofs_fscache_register_volume(sb); 589 if (ret) 590 return ret; 591 592 /* 593 * When shared domain is enabled, using NEED_NOEXIST to guarantee 594 * the primary data blob (aka fsid) is unique in the shared domain. 595 * 596 * For non-shared-domain case, fscache_acquire_volume() invoked by 597 * erofs_fscache_register_volume() has already guaranteed 598 * the uniqueness of primary data blob. 599 * 600 * Acquired domain/volume will be relinquished in kill_sb() on error. 601 */ 602 if (sbi->domain_id) 603 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 604 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 605 if (IS_ERR(fscache)) 606 return PTR_ERR(fscache); 607 608 sbi->s_fscache = fscache; 609 return 0; 610 } 611 612 void erofs_fscache_unregister_fs(struct super_block *sb) 613 { 614 struct erofs_sb_info *sbi = EROFS_SB(sb); 615 616 erofs_fscache_unregister_cookie(sbi->s_fscache); 617 618 if (sbi->domain) 619 erofs_fscache_domain_put(sbi->domain); 620 else 621 fscache_relinquish_volume(sbi->volume, NULL, false); 622 623 sbi->s_fscache = NULL; 624 sbi->volume = NULL; 625 sbi->domain = NULL; 626 } 627