1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/fscache.h> 7 #include "internal.h" 8 9 static DEFINE_MUTEX(erofs_domain_list_lock); 10 static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 static LIST_HEAD(erofs_domain_list); 12 static struct vfsmount *erofs_pseudo_mnt; 13 14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, 15 loff_t start, size_t len) 16 { 17 struct netfs_io_request *rreq; 18 19 rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); 20 if (!rreq) 21 return ERR_PTR(-ENOMEM); 22 23 rreq->start = start; 24 rreq->len = len; 25 rreq->mapping = mapping; 26 rreq->inode = mapping->host; 27 INIT_LIST_HEAD(&rreq->subrequests); 28 refcount_set(&rreq->ref, 1); 29 return rreq; 30 } 31 32 static void erofs_fscache_put_request(struct netfs_io_request *rreq) 33 { 34 if (!refcount_dec_and_test(&rreq->ref)) 35 return; 36 if (rreq->cache_resources.ops) 37 rreq->cache_resources.ops->end_operation(&rreq->cache_resources); 38 kfree(rreq); 39 } 40 41 static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq) 42 { 43 if (!refcount_dec_and_test(&subreq->ref)) 44 return; 45 erofs_fscache_put_request(subreq->rreq); 46 kfree(subreq); 47 } 48 49 static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq) 50 { 51 struct netfs_io_subrequest *subreq; 52 53 while (!list_empty(&rreq->subrequests)) { 54 subreq = list_first_entry(&rreq->subrequests, 55 struct netfs_io_subrequest, rreq_link); 56 list_del(&subreq->rreq_link); 57 erofs_fscache_put_subrequest(subreq); 58 } 59 } 60 61 static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) 62 { 63 struct netfs_io_subrequest *subreq; 64 struct folio *folio; 65 unsigned int iopos = 0; 66 pgoff_t start_page = rreq->start / PAGE_SIZE; 67 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 68 bool subreq_failed = false; 69 70 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 71 72 subreq = list_first_entry(&rreq->subrequests, 73 struct netfs_io_subrequest, rreq_link); 74 subreq_failed = (subreq->error < 0); 75 76 rcu_read_lock(); 77 xas_for_each(&xas, folio, last_page) { 78 unsigned int pgpos, pgend; 79 bool pg_failed = false; 80 81 if (xas_retry(&xas, folio)) 82 continue; 83 84 pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; 85 pgend = pgpos + folio_size(folio); 86 87 for (;;) { 88 if (!subreq) { 89 pg_failed = true; 90 break; 91 } 92 93 pg_failed |= subreq_failed; 94 if (pgend < iopos + subreq->len) 95 break; 96 97 iopos += subreq->len; 98 if (!list_is_last(&subreq->rreq_link, 99 &rreq->subrequests)) { 100 subreq = list_next_entry(subreq, rreq_link); 101 subreq_failed = (subreq->error < 0); 102 } else { 103 subreq = NULL; 104 subreq_failed = false; 105 } 106 if (pgend == iopos) 107 break; 108 } 109 110 if (!pg_failed) 111 folio_mark_uptodate(folio); 112 113 folio_unlock(folio); 114 } 115 rcu_read_unlock(); 116 } 117 118 static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq) 119 { 120 erofs_fscache_rreq_unlock_folios(rreq); 121 erofs_fscache_clear_subrequests(rreq); 122 erofs_fscache_put_request(rreq); 123 } 124 125 static void erofc_fscache_subreq_complete(void *priv, 126 ssize_t transferred_or_error, bool was_async) 127 { 128 struct netfs_io_subrequest *subreq = priv; 129 struct netfs_io_request *rreq = subreq->rreq; 130 131 if (IS_ERR_VALUE(transferred_or_error)) 132 subreq->error = transferred_or_error; 133 134 if (atomic_dec_and_test(&rreq->nr_outstanding)) 135 erofs_fscache_rreq_complete(rreq); 136 137 erofs_fscache_put_subrequest(subreq); 138 } 139 140 /* 141 * Read data from fscache and fill the read data into page cache described by 142 * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes 143 * the start physical address in the cache file. 144 */ 145 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, 146 struct netfs_io_request *rreq, loff_t pstart) 147 { 148 enum netfs_io_source source; 149 struct super_block *sb = rreq->mapping->host->i_sb; 150 struct netfs_io_subrequest *subreq; 151 struct netfs_cache_resources *cres = &rreq->cache_resources; 152 struct iov_iter iter; 153 loff_t start = rreq->start; 154 size_t len = rreq->len; 155 size_t done = 0; 156 int ret; 157 158 atomic_set(&rreq->nr_outstanding, 1); 159 160 ret = fscache_begin_read_operation(cres, cookie); 161 if (ret) 162 goto out; 163 164 while (done < len) { 165 subreq = kzalloc(sizeof(struct netfs_io_subrequest), 166 GFP_KERNEL); 167 if (subreq) { 168 INIT_LIST_HEAD(&subreq->rreq_link); 169 refcount_set(&subreq->ref, 2); 170 subreq->rreq = rreq; 171 refcount_inc(&rreq->ref); 172 } else { 173 ret = -ENOMEM; 174 goto out; 175 } 176 177 subreq->start = pstart + done; 178 subreq->len = len - done; 179 subreq->flags = 1 << NETFS_SREQ_ONDEMAND; 180 181 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 182 183 source = cres->ops->prepare_read(subreq, LLONG_MAX); 184 if (WARN_ON(subreq->len == 0)) 185 source = NETFS_INVALID_READ; 186 if (source != NETFS_READ_FROM_CACHE) { 187 erofs_err(sb, "failed to fscache prepare_read (source %d)", 188 source); 189 ret = -EIO; 190 subreq->error = ret; 191 erofs_fscache_put_subrequest(subreq); 192 goto out; 193 } 194 195 atomic_inc(&rreq->nr_outstanding); 196 197 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, 198 start + done, subreq->len); 199 200 ret = fscache_read(cres, subreq->start, &iter, 201 NETFS_READ_HOLE_FAIL, 202 erofc_fscache_subreq_complete, subreq); 203 if (ret == -EIOCBQUEUED) 204 ret = 0; 205 if (ret) { 206 erofs_err(sb, "failed to fscache_read (ret %d)", ret); 207 goto out; 208 } 209 210 done += subreq->len; 211 } 212 out: 213 if (atomic_dec_and_test(&rreq->nr_outstanding)) 214 erofs_fscache_rreq_complete(rreq); 215 216 return ret; 217 } 218 219 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 220 { 221 int ret; 222 struct super_block *sb = folio_mapping(folio)->host->i_sb; 223 struct netfs_io_request *rreq; 224 struct erofs_map_dev mdev = { 225 .m_deviceid = 0, 226 .m_pa = folio_pos(folio), 227 }; 228 229 ret = erofs_map_dev(sb, &mdev); 230 if (ret) 231 goto out; 232 233 rreq = erofs_fscache_alloc_request(folio_mapping(folio), 234 folio_pos(folio), folio_size(folio)); 235 if (IS_ERR(rreq)) { 236 ret = PTR_ERR(rreq); 237 goto out; 238 } 239 240 return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 241 rreq, mdev.m_pa); 242 out: 243 folio_unlock(folio); 244 return ret; 245 } 246 247 /* 248 * Read into page cache in the range described by (@pos, @len). 249 * 250 * On return, the caller is responsible for page unlocking if the output @unlock 251 * is true, or the callee will take this responsibility through netfs_io_request 252 * interface. 253 * 254 * The return value is the number of bytes successfully handled, or negative 255 * error code on failure. The only exception is that, the length of the range 256 * instead of the error code is returned on failure after netfs_io_request is 257 * allocated, so that .readahead() could advance rac accordingly. 258 */ 259 static int erofs_fscache_data_read(struct address_space *mapping, 260 loff_t pos, size_t len, bool *unlock) 261 { 262 struct inode *inode = mapping->host; 263 struct super_block *sb = inode->i_sb; 264 struct netfs_io_request *rreq; 265 struct erofs_map_blocks map; 266 struct erofs_map_dev mdev; 267 struct iov_iter iter; 268 size_t count; 269 int ret; 270 271 *unlock = true; 272 273 map.m_la = pos; 274 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 275 if (ret) 276 return ret; 277 278 if (map.m_flags & EROFS_MAP_META) { 279 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 280 erofs_blk_t blknr; 281 size_t offset, size; 282 void *src; 283 284 /* For tail packing layout, the offset may be non-zero. */ 285 offset = erofs_blkoff(map.m_pa); 286 blknr = erofs_blknr(map.m_pa); 287 size = map.m_llen; 288 289 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 290 if (IS_ERR(src)) 291 return PTR_ERR(src); 292 293 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); 294 if (copy_to_iter(src + offset, size, &iter) != size) { 295 erofs_put_metabuf(&buf); 296 return -EFAULT; 297 } 298 iov_iter_zero(PAGE_SIZE - size, &iter); 299 erofs_put_metabuf(&buf); 300 return PAGE_SIZE; 301 } 302 303 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 304 count = len; 305 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); 306 iov_iter_zero(count, &iter); 307 return count; 308 } 309 310 count = min_t(size_t, map.m_llen - (pos - map.m_la), len); 311 DBG_BUGON(!count || count % PAGE_SIZE); 312 313 mdev = (struct erofs_map_dev) { 314 .m_deviceid = map.m_deviceid, 315 .m_pa = map.m_pa, 316 }; 317 ret = erofs_map_dev(sb, &mdev); 318 if (ret) 319 return ret; 320 321 rreq = erofs_fscache_alloc_request(mapping, pos, count); 322 if (IS_ERR(rreq)) 323 return PTR_ERR(rreq); 324 325 *unlock = false; 326 erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 327 rreq, mdev.m_pa + (pos - map.m_la)); 328 return count; 329 } 330 331 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 332 { 333 bool unlock; 334 int ret; 335 336 DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 337 338 ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), 339 folio_size(folio), &unlock); 340 if (unlock) { 341 if (ret > 0) 342 folio_mark_uptodate(folio); 343 folio_unlock(folio); 344 } 345 return ret < 0 ? ret : 0; 346 } 347 348 static void erofs_fscache_readahead(struct readahead_control *rac) 349 { 350 struct folio *folio; 351 size_t len, done = 0; 352 loff_t start, pos; 353 bool unlock; 354 int ret, size; 355 356 if (!readahead_count(rac)) 357 return; 358 359 start = readahead_pos(rac); 360 len = readahead_length(rac); 361 362 do { 363 pos = start + done; 364 ret = erofs_fscache_data_read(rac->mapping, pos, 365 len - done, &unlock); 366 if (ret <= 0) 367 return; 368 369 size = ret; 370 while (size) { 371 folio = readahead_folio(rac); 372 size -= folio_size(folio); 373 if (unlock) { 374 folio_mark_uptodate(folio); 375 folio_unlock(folio); 376 } 377 } 378 } while ((done += ret) < len); 379 } 380 381 static const struct address_space_operations erofs_fscache_meta_aops = { 382 .read_folio = erofs_fscache_meta_read_folio, 383 }; 384 385 const struct address_space_operations erofs_fscache_access_aops = { 386 .read_folio = erofs_fscache_read_folio, 387 .readahead = erofs_fscache_readahead, 388 }; 389 390 static void erofs_fscache_domain_put(struct erofs_domain *domain) 391 { 392 if (!domain) 393 return; 394 mutex_lock(&erofs_domain_list_lock); 395 if (refcount_dec_and_test(&domain->ref)) { 396 list_del(&domain->list); 397 if (list_empty(&erofs_domain_list)) { 398 kern_unmount(erofs_pseudo_mnt); 399 erofs_pseudo_mnt = NULL; 400 } 401 mutex_unlock(&erofs_domain_list_lock); 402 fscache_relinquish_volume(domain->volume, NULL, false); 403 kfree(domain->domain_id); 404 kfree(domain); 405 return; 406 } 407 mutex_unlock(&erofs_domain_list_lock); 408 } 409 410 static int erofs_fscache_register_volume(struct super_block *sb) 411 { 412 struct erofs_sb_info *sbi = EROFS_SB(sb); 413 char *domain_id = sbi->domain_id; 414 struct fscache_volume *volume; 415 char *name; 416 int ret = 0; 417 418 name = kasprintf(GFP_KERNEL, "erofs,%s", 419 domain_id ? domain_id : sbi->fsid); 420 if (!name) 421 return -ENOMEM; 422 423 volume = fscache_acquire_volume(name, NULL, NULL, 0); 424 if (IS_ERR_OR_NULL(volume)) { 425 erofs_err(sb, "failed to register volume for %s", name); 426 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 427 volume = NULL; 428 } 429 430 sbi->volume = volume; 431 kfree(name); 432 return ret; 433 } 434 435 static int erofs_fscache_init_domain(struct super_block *sb) 436 { 437 int err; 438 struct erofs_domain *domain; 439 struct erofs_sb_info *sbi = EROFS_SB(sb); 440 441 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 442 if (!domain) 443 return -ENOMEM; 444 445 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 446 if (!domain->domain_id) { 447 kfree(domain); 448 return -ENOMEM; 449 } 450 451 err = erofs_fscache_register_volume(sb); 452 if (err) 453 goto out; 454 455 if (!erofs_pseudo_mnt) { 456 erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 457 if (IS_ERR(erofs_pseudo_mnt)) { 458 err = PTR_ERR(erofs_pseudo_mnt); 459 goto out; 460 } 461 } 462 463 domain->volume = sbi->volume; 464 refcount_set(&domain->ref, 1); 465 list_add(&domain->list, &erofs_domain_list); 466 sbi->domain = domain; 467 return 0; 468 out: 469 kfree(domain->domain_id); 470 kfree(domain); 471 return err; 472 } 473 474 static int erofs_fscache_register_domain(struct super_block *sb) 475 { 476 int err; 477 struct erofs_domain *domain; 478 struct erofs_sb_info *sbi = EROFS_SB(sb); 479 480 mutex_lock(&erofs_domain_list_lock); 481 list_for_each_entry(domain, &erofs_domain_list, list) { 482 if (!strcmp(domain->domain_id, sbi->domain_id)) { 483 sbi->domain = domain; 484 sbi->volume = domain->volume; 485 refcount_inc(&domain->ref); 486 mutex_unlock(&erofs_domain_list_lock); 487 return 0; 488 } 489 } 490 err = erofs_fscache_init_domain(sb); 491 mutex_unlock(&erofs_domain_list_lock); 492 return err; 493 } 494 495 static 496 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 497 char *name, bool need_inode) 498 { 499 struct fscache_volume *volume = EROFS_SB(sb)->volume; 500 struct erofs_fscache *ctx; 501 struct fscache_cookie *cookie; 502 int ret; 503 504 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 505 if (!ctx) 506 return ERR_PTR(-ENOMEM); 507 508 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 509 name, strlen(name), NULL, 0, 0); 510 if (!cookie) { 511 erofs_err(sb, "failed to get cookie for %s", name); 512 ret = -EINVAL; 513 goto err; 514 } 515 516 fscache_use_cookie(cookie, false); 517 ctx->cookie = cookie; 518 519 if (need_inode) { 520 struct inode *const inode = new_inode(sb); 521 522 if (!inode) { 523 erofs_err(sb, "failed to get anon inode for %s", name); 524 ret = -ENOMEM; 525 goto err_cookie; 526 } 527 528 set_nlink(inode, 1); 529 inode->i_size = OFFSET_MAX; 530 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 531 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 532 533 ctx->inode = inode; 534 } 535 536 return ctx; 537 538 err_cookie: 539 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 540 fscache_relinquish_cookie(ctx->cookie, false); 541 err: 542 kfree(ctx); 543 return ERR_PTR(ret); 544 } 545 546 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 547 { 548 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 549 fscache_relinquish_cookie(ctx->cookie, false); 550 iput(ctx->inode); 551 kfree(ctx->name); 552 kfree(ctx); 553 } 554 555 static 556 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 557 char *name, bool need_inode) 558 { 559 int err; 560 struct inode *inode; 561 struct erofs_fscache *ctx; 562 struct erofs_domain *domain = EROFS_SB(sb)->domain; 563 564 ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); 565 if (IS_ERR(ctx)) 566 return ctx; 567 568 ctx->name = kstrdup(name, GFP_KERNEL); 569 if (!ctx->name) { 570 err = -ENOMEM; 571 goto out; 572 } 573 574 inode = new_inode(erofs_pseudo_mnt->mnt_sb); 575 if (!inode) { 576 err = -ENOMEM; 577 goto out; 578 } 579 580 ctx->domain = domain; 581 ctx->anon_inode = inode; 582 inode->i_private = ctx; 583 refcount_inc(&domain->ref); 584 return ctx; 585 out: 586 erofs_fscache_relinquish_cookie(ctx); 587 return ERR_PTR(err); 588 } 589 590 static 591 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 592 char *name, bool need_inode) 593 { 594 struct inode *inode; 595 struct erofs_fscache *ctx; 596 struct erofs_domain *domain = EROFS_SB(sb)->domain; 597 struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 598 599 mutex_lock(&erofs_domain_cookies_lock); 600 spin_lock(&psb->s_inode_list_lock); 601 list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 602 ctx = inode->i_private; 603 if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 604 continue; 605 igrab(inode); 606 spin_unlock(&psb->s_inode_list_lock); 607 mutex_unlock(&erofs_domain_cookies_lock); 608 return ctx; 609 } 610 spin_unlock(&psb->s_inode_list_lock); 611 ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); 612 mutex_unlock(&erofs_domain_cookies_lock); 613 return ctx; 614 } 615 616 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 617 char *name, bool need_inode) 618 { 619 if (EROFS_SB(sb)->domain_id) 620 return erofs_domain_register_cookie(sb, name, need_inode); 621 return erofs_fscache_acquire_cookie(sb, name, need_inode); 622 } 623 624 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 625 { 626 bool drop; 627 struct erofs_domain *domain; 628 629 if (!ctx) 630 return; 631 domain = ctx->domain; 632 if (domain) { 633 mutex_lock(&erofs_domain_cookies_lock); 634 drop = atomic_read(&ctx->anon_inode->i_count) == 1; 635 iput(ctx->anon_inode); 636 mutex_unlock(&erofs_domain_cookies_lock); 637 if (!drop) 638 return; 639 } 640 641 erofs_fscache_relinquish_cookie(ctx); 642 erofs_fscache_domain_put(domain); 643 } 644 645 int erofs_fscache_register_fs(struct super_block *sb) 646 { 647 int ret; 648 struct erofs_sb_info *sbi = EROFS_SB(sb); 649 struct erofs_fscache *fscache; 650 651 if (sbi->domain_id) 652 ret = erofs_fscache_register_domain(sb); 653 else 654 ret = erofs_fscache_register_volume(sb); 655 if (ret) 656 return ret; 657 658 /* acquired domain/volume will be relinquished in kill_sb() on error */ 659 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true); 660 if (IS_ERR(fscache)) 661 return PTR_ERR(fscache); 662 663 sbi->s_fscache = fscache; 664 return 0; 665 } 666 667 void erofs_fscache_unregister_fs(struct super_block *sb) 668 { 669 struct erofs_sb_info *sbi = EROFS_SB(sb); 670 671 erofs_fscache_unregister_cookie(sbi->s_fscache); 672 673 if (sbi->domain) 674 erofs_fscache_domain_put(sbi->domain); 675 else 676 fscache_relinquish_volume(sbi->volume, NULL, false); 677 678 sbi->s_fscache = NULL; 679 sbi->volume = NULL; 680 sbi->domain = NULL; 681 } 682