1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/fscache.h> 7 #include "internal.h" 8 9 static DEFINE_MUTEX(erofs_domain_list_lock); 10 static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 static LIST_HEAD(erofs_domain_list); 12 static struct vfsmount *erofs_pseudo_mnt; 13 14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, 15 loff_t start, size_t len) 16 { 17 struct netfs_io_request *rreq; 18 19 rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); 20 if (!rreq) 21 return ERR_PTR(-ENOMEM); 22 23 rreq->start = start; 24 rreq->len = len; 25 rreq->mapping = mapping; 26 rreq->inode = mapping->host; 27 INIT_LIST_HEAD(&rreq->subrequests); 28 refcount_set(&rreq->ref, 1); 29 return rreq; 30 } 31 32 static void erofs_fscache_put_request(struct netfs_io_request *rreq) 33 { 34 if (!refcount_dec_and_test(&rreq->ref)) 35 return; 36 if (rreq->cache_resources.ops) 37 rreq->cache_resources.ops->end_operation(&rreq->cache_resources); 38 kfree(rreq); 39 } 40 41 static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq) 42 { 43 if (!refcount_dec_and_test(&subreq->ref)) 44 return; 45 erofs_fscache_put_request(subreq->rreq); 46 kfree(subreq); 47 } 48 49 static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq) 50 { 51 struct netfs_io_subrequest *subreq; 52 53 while (!list_empty(&rreq->subrequests)) { 54 subreq = list_first_entry(&rreq->subrequests, 55 struct netfs_io_subrequest, rreq_link); 56 list_del(&subreq->rreq_link); 57 erofs_fscache_put_subrequest(subreq); 58 } 59 } 60 61 static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) 62 { 63 struct netfs_io_subrequest *subreq; 64 struct folio *folio; 65 unsigned int iopos = 0; 66 pgoff_t start_page = rreq->start / PAGE_SIZE; 67 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 68 bool subreq_failed = false; 69 70 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 71 72 subreq = list_first_entry(&rreq->subrequests, 73 struct netfs_io_subrequest, rreq_link); 74 subreq_failed = (subreq->error < 0); 75 76 rcu_read_lock(); 77 xas_for_each(&xas, folio, last_page) { 78 unsigned int pgpos = 79 (folio_index(folio) - start_page) * PAGE_SIZE; 80 unsigned int pgend = pgpos + folio_size(folio); 81 bool pg_failed = false; 82 83 for (;;) { 84 if (!subreq) { 85 pg_failed = true; 86 break; 87 } 88 89 pg_failed |= subreq_failed; 90 if (pgend < iopos + subreq->len) 91 break; 92 93 iopos += subreq->len; 94 if (!list_is_last(&subreq->rreq_link, 95 &rreq->subrequests)) { 96 subreq = list_next_entry(subreq, rreq_link); 97 subreq_failed = (subreq->error < 0); 98 } else { 99 subreq = NULL; 100 subreq_failed = false; 101 } 102 if (pgend == iopos) 103 break; 104 } 105 106 if (!pg_failed) 107 folio_mark_uptodate(folio); 108 109 folio_unlock(folio); 110 } 111 rcu_read_unlock(); 112 } 113 114 static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq) 115 { 116 erofs_fscache_rreq_unlock_folios(rreq); 117 erofs_fscache_clear_subrequests(rreq); 118 erofs_fscache_put_request(rreq); 119 } 120 121 static void erofc_fscache_subreq_complete(void *priv, 122 ssize_t transferred_or_error, bool was_async) 123 { 124 struct netfs_io_subrequest *subreq = priv; 125 struct netfs_io_request *rreq = subreq->rreq; 126 127 if (IS_ERR_VALUE(transferred_or_error)) 128 subreq->error = transferred_or_error; 129 130 if (atomic_dec_and_test(&rreq->nr_outstanding)) 131 erofs_fscache_rreq_complete(rreq); 132 133 erofs_fscache_put_subrequest(subreq); 134 } 135 136 /* 137 * Read data from fscache and fill the read data into page cache described by 138 * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes 139 * the start physical address in the cache file. 140 */ 141 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, 142 struct netfs_io_request *rreq, loff_t pstart) 143 { 144 enum netfs_io_source source; 145 struct super_block *sb = rreq->mapping->host->i_sb; 146 struct netfs_io_subrequest *subreq; 147 struct netfs_cache_resources *cres = &rreq->cache_resources; 148 struct iov_iter iter; 149 loff_t start = rreq->start; 150 size_t len = rreq->len; 151 size_t done = 0; 152 int ret; 153 154 atomic_set(&rreq->nr_outstanding, 1); 155 156 ret = fscache_begin_read_operation(cres, cookie); 157 if (ret) 158 goto out; 159 160 while (done < len) { 161 subreq = kzalloc(sizeof(struct netfs_io_subrequest), 162 GFP_KERNEL); 163 if (subreq) { 164 INIT_LIST_HEAD(&subreq->rreq_link); 165 refcount_set(&subreq->ref, 2); 166 subreq->rreq = rreq; 167 refcount_inc(&rreq->ref); 168 } else { 169 ret = -ENOMEM; 170 goto out; 171 } 172 173 subreq->start = pstart + done; 174 subreq->len = len - done; 175 subreq->flags = 1 << NETFS_SREQ_ONDEMAND; 176 177 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 178 179 source = cres->ops->prepare_read(subreq, LLONG_MAX); 180 if (WARN_ON(subreq->len == 0)) 181 source = NETFS_INVALID_READ; 182 if (source != NETFS_READ_FROM_CACHE) { 183 erofs_err(sb, "failed to fscache prepare_read (source %d)", 184 source); 185 ret = -EIO; 186 subreq->error = ret; 187 erofs_fscache_put_subrequest(subreq); 188 goto out; 189 } 190 191 atomic_inc(&rreq->nr_outstanding); 192 193 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, 194 start + done, subreq->len); 195 196 ret = fscache_read(cres, subreq->start, &iter, 197 NETFS_READ_HOLE_FAIL, 198 erofc_fscache_subreq_complete, subreq); 199 if (ret == -EIOCBQUEUED) 200 ret = 0; 201 if (ret) { 202 erofs_err(sb, "failed to fscache_read (ret %d)", ret); 203 goto out; 204 } 205 206 done += subreq->len; 207 } 208 out: 209 if (atomic_dec_and_test(&rreq->nr_outstanding)) 210 erofs_fscache_rreq_complete(rreq); 211 212 return ret; 213 } 214 215 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 216 { 217 int ret; 218 struct super_block *sb = folio_mapping(folio)->host->i_sb; 219 struct netfs_io_request *rreq; 220 struct erofs_map_dev mdev = { 221 .m_deviceid = 0, 222 .m_pa = folio_pos(folio), 223 }; 224 225 ret = erofs_map_dev(sb, &mdev); 226 if (ret) 227 goto out; 228 229 rreq = erofs_fscache_alloc_request(folio_mapping(folio), 230 folio_pos(folio), folio_size(folio)); 231 if (IS_ERR(rreq)) { 232 ret = PTR_ERR(rreq); 233 goto out; 234 } 235 236 return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 237 rreq, mdev.m_pa); 238 out: 239 folio_unlock(folio); 240 return ret; 241 } 242 243 /* 244 * Read into page cache in the range described by (@pos, @len). 245 * 246 * On return, the caller is responsible for page unlocking if the output @unlock 247 * is true, or the callee will take this responsibility through netfs_io_request 248 * interface. 249 * 250 * The return value is the number of bytes successfully handled, or negative 251 * error code on failure. The only exception is that, the length of the range 252 * instead of the error code is returned on failure after netfs_io_request is 253 * allocated, so that .readahead() could advance rac accordingly. 254 */ 255 static int erofs_fscache_data_read(struct address_space *mapping, 256 loff_t pos, size_t len, bool *unlock) 257 { 258 struct inode *inode = mapping->host; 259 struct super_block *sb = inode->i_sb; 260 struct netfs_io_request *rreq; 261 struct erofs_map_blocks map; 262 struct erofs_map_dev mdev; 263 struct iov_iter iter; 264 size_t count; 265 int ret; 266 267 *unlock = true; 268 269 map.m_la = pos; 270 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 271 if (ret) 272 return ret; 273 274 if (map.m_flags & EROFS_MAP_META) { 275 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 276 erofs_blk_t blknr; 277 size_t offset, size; 278 void *src; 279 280 /* For tail packing layout, the offset may be non-zero. */ 281 offset = erofs_blkoff(map.m_pa); 282 blknr = erofs_blknr(map.m_pa); 283 size = map.m_llen; 284 285 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 286 if (IS_ERR(src)) 287 return PTR_ERR(src); 288 289 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); 290 if (copy_to_iter(src + offset, size, &iter) != size) { 291 erofs_put_metabuf(&buf); 292 return -EFAULT; 293 } 294 iov_iter_zero(PAGE_SIZE - size, &iter); 295 erofs_put_metabuf(&buf); 296 return PAGE_SIZE; 297 } 298 299 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 300 count = len; 301 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); 302 iov_iter_zero(count, &iter); 303 return count; 304 } 305 306 count = min_t(size_t, map.m_llen - (pos - map.m_la), len); 307 DBG_BUGON(!count || count % PAGE_SIZE); 308 309 mdev = (struct erofs_map_dev) { 310 .m_deviceid = map.m_deviceid, 311 .m_pa = map.m_pa, 312 }; 313 ret = erofs_map_dev(sb, &mdev); 314 if (ret) 315 return ret; 316 317 rreq = erofs_fscache_alloc_request(mapping, pos, count); 318 if (IS_ERR(rreq)) 319 return PTR_ERR(rreq); 320 321 *unlock = false; 322 erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 323 rreq, mdev.m_pa + (pos - map.m_la)); 324 return count; 325 } 326 327 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 328 { 329 bool unlock; 330 int ret; 331 332 DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 333 334 ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), 335 folio_size(folio), &unlock); 336 if (unlock) { 337 if (ret > 0) 338 folio_mark_uptodate(folio); 339 folio_unlock(folio); 340 } 341 return ret < 0 ? ret : 0; 342 } 343 344 static void erofs_fscache_readahead(struct readahead_control *rac) 345 { 346 struct folio *folio; 347 size_t len, done = 0; 348 loff_t start, pos; 349 bool unlock; 350 int ret, size; 351 352 if (!readahead_count(rac)) 353 return; 354 355 start = readahead_pos(rac); 356 len = readahead_length(rac); 357 358 do { 359 pos = start + done; 360 ret = erofs_fscache_data_read(rac->mapping, pos, 361 len - done, &unlock); 362 if (ret <= 0) 363 return; 364 365 size = ret; 366 while (size) { 367 folio = readahead_folio(rac); 368 size -= folio_size(folio); 369 if (unlock) { 370 folio_mark_uptodate(folio); 371 folio_unlock(folio); 372 } 373 } 374 } while ((done += ret) < len); 375 } 376 377 static const struct address_space_operations erofs_fscache_meta_aops = { 378 .read_folio = erofs_fscache_meta_read_folio, 379 }; 380 381 const struct address_space_operations erofs_fscache_access_aops = { 382 .read_folio = erofs_fscache_read_folio, 383 .readahead = erofs_fscache_readahead, 384 }; 385 386 static void erofs_fscache_domain_put(struct erofs_domain *domain) 387 { 388 if (!domain) 389 return; 390 mutex_lock(&erofs_domain_list_lock); 391 if (refcount_dec_and_test(&domain->ref)) { 392 list_del(&domain->list); 393 if (list_empty(&erofs_domain_list)) { 394 kern_unmount(erofs_pseudo_mnt); 395 erofs_pseudo_mnt = NULL; 396 } 397 mutex_unlock(&erofs_domain_list_lock); 398 fscache_relinquish_volume(domain->volume, NULL, false); 399 kfree(domain->domain_id); 400 kfree(domain); 401 return; 402 } 403 mutex_unlock(&erofs_domain_list_lock); 404 } 405 406 static int erofs_fscache_register_volume(struct super_block *sb) 407 { 408 struct erofs_sb_info *sbi = EROFS_SB(sb); 409 char *domain_id = sbi->domain_id; 410 struct fscache_volume *volume; 411 char *name; 412 int ret = 0; 413 414 name = kasprintf(GFP_KERNEL, "erofs,%s", 415 domain_id ? domain_id : sbi->fsid); 416 if (!name) 417 return -ENOMEM; 418 419 volume = fscache_acquire_volume(name, NULL, NULL, 0); 420 if (IS_ERR_OR_NULL(volume)) { 421 erofs_err(sb, "failed to register volume for %s", name); 422 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 423 volume = NULL; 424 } 425 426 sbi->volume = volume; 427 kfree(name); 428 return ret; 429 } 430 431 static int erofs_fscache_init_domain(struct super_block *sb) 432 { 433 int err; 434 struct erofs_domain *domain; 435 struct erofs_sb_info *sbi = EROFS_SB(sb); 436 437 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 438 if (!domain) 439 return -ENOMEM; 440 441 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 442 if (!domain->domain_id) { 443 kfree(domain); 444 return -ENOMEM; 445 } 446 447 err = erofs_fscache_register_volume(sb); 448 if (err) 449 goto out; 450 451 if (!erofs_pseudo_mnt) { 452 erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 453 if (IS_ERR(erofs_pseudo_mnt)) { 454 err = PTR_ERR(erofs_pseudo_mnt); 455 goto out; 456 } 457 } 458 459 domain->volume = sbi->volume; 460 refcount_set(&domain->ref, 1); 461 list_add(&domain->list, &erofs_domain_list); 462 sbi->domain = domain; 463 return 0; 464 out: 465 kfree(domain->domain_id); 466 kfree(domain); 467 return err; 468 } 469 470 static int erofs_fscache_register_domain(struct super_block *sb) 471 { 472 int err; 473 struct erofs_domain *domain; 474 struct erofs_sb_info *sbi = EROFS_SB(sb); 475 476 mutex_lock(&erofs_domain_list_lock); 477 list_for_each_entry(domain, &erofs_domain_list, list) { 478 if (!strcmp(domain->domain_id, sbi->domain_id)) { 479 sbi->domain = domain; 480 sbi->volume = domain->volume; 481 refcount_inc(&domain->ref); 482 mutex_unlock(&erofs_domain_list_lock); 483 return 0; 484 } 485 } 486 err = erofs_fscache_init_domain(sb); 487 mutex_unlock(&erofs_domain_list_lock); 488 return err; 489 } 490 491 static 492 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 493 char *name, bool need_inode) 494 { 495 struct fscache_volume *volume = EROFS_SB(sb)->volume; 496 struct erofs_fscache *ctx; 497 struct fscache_cookie *cookie; 498 int ret; 499 500 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 501 if (!ctx) 502 return ERR_PTR(-ENOMEM); 503 504 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 505 name, strlen(name), NULL, 0, 0); 506 if (!cookie) { 507 erofs_err(sb, "failed to get cookie for %s", name); 508 ret = -EINVAL; 509 goto err; 510 } 511 512 fscache_use_cookie(cookie, false); 513 ctx->cookie = cookie; 514 515 if (need_inode) { 516 struct inode *const inode = new_inode(sb); 517 518 if (!inode) { 519 erofs_err(sb, "failed to get anon inode for %s", name); 520 ret = -ENOMEM; 521 goto err_cookie; 522 } 523 524 set_nlink(inode, 1); 525 inode->i_size = OFFSET_MAX; 526 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 527 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 528 529 ctx->inode = inode; 530 } 531 532 return ctx; 533 534 err_cookie: 535 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 536 fscache_relinquish_cookie(ctx->cookie, false); 537 err: 538 kfree(ctx); 539 return ERR_PTR(ret); 540 } 541 542 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 543 { 544 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 545 fscache_relinquish_cookie(ctx->cookie, false); 546 iput(ctx->inode); 547 kfree(ctx->name); 548 kfree(ctx); 549 } 550 551 static 552 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 553 char *name, bool need_inode) 554 { 555 int err; 556 struct inode *inode; 557 struct erofs_fscache *ctx; 558 struct erofs_domain *domain = EROFS_SB(sb)->domain; 559 560 ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); 561 if (IS_ERR(ctx)) 562 return ctx; 563 564 ctx->name = kstrdup(name, GFP_KERNEL); 565 if (!ctx->name) { 566 err = -ENOMEM; 567 goto out; 568 } 569 570 inode = new_inode(erofs_pseudo_mnt->mnt_sb); 571 if (!inode) { 572 err = -ENOMEM; 573 goto out; 574 } 575 576 ctx->domain = domain; 577 ctx->anon_inode = inode; 578 inode->i_private = ctx; 579 refcount_inc(&domain->ref); 580 return ctx; 581 out: 582 erofs_fscache_relinquish_cookie(ctx); 583 return ERR_PTR(err); 584 } 585 586 static 587 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 588 char *name, bool need_inode) 589 { 590 struct inode *inode; 591 struct erofs_fscache *ctx; 592 struct erofs_domain *domain = EROFS_SB(sb)->domain; 593 struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 594 595 mutex_lock(&erofs_domain_cookies_lock); 596 spin_lock(&psb->s_inode_list_lock); 597 list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 598 ctx = inode->i_private; 599 if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 600 continue; 601 igrab(inode); 602 spin_unlock(&psb->s_inode_list_lock); 603 mutex_unlock(&erofs_domain_cookies_lock); 604 return ctx; 605 } 606 spin_unlock(&psb->s_inode_list_lock); 607 ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); 608 mutex_unlock(&erofs_domain_cookies_lock); 609 return ctx; 610 } 611 612 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 613 char *name, bool need_inode) 614 { 615 if (EROFS_SB(sb)->domain_id) 616 return erofs_domain_register_cookie(sb, name, need_inode); 617 return erofs_fscache_acquire_cookie(sb, name, need_inode); 618 } 619 620 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 621 { 622 bool drop; 623 struct erofs_domain *domain; 624 625 if (!ctx) 626 return; 627 domain = ctx->domain; 628 if (domain) { 629 mutex_lock(&erofs_domain_cookies_lock); 630 drop = atomic_read(&ctx->anon_inode->i_count) == 1; 631 iput(ctx->anon_inode); 632 mutex_unlock(&erofs_domain_cookies_lock); 633 if (!drop) 634 return; 635 } 636 637 erofs_fscache_relinquish_cookie(ctx); 638 erofs_fscache_domain_put(domain); 639 } 640 641 int erofs_fscache_register_fs(struct super_block *sb) 642 { 643 int ret; 644 struct erofs_sb_info *sbi = EROFS_SB(sb); 645 struct erofs_fscache *fscache; 646 647 if (sbi->domain_id) 648 ret = erofs_fscache_register_domain(sb); 649 else 650 ret = erofs_fscache_register_volume(sb); 651 if (ret) 652 return ret; 653 654 /* acquired domain/volume will be relinquished in kill_sb() on error */ 655 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true); 656 if (IS_ERR(fscache)) 657 return PTR_ERR(fscache); 658 659 sbi->s_fscache = fscache; 660 return 0; 661 } 662 663 void erofs_fscache_unregister_fs(struct super_block *sb) 664 { 665 struct erofs_sb_info *sbi = EROFS_SB(sb); 666 667 erofs_fscache_unregister_cookie(sbi->s_fscache); 668 669 if (sbi->domain) 670 erofs_fscache_domain_put(sbi->domain); 671 else 672 fscache_relinquish_volume(sbi->volume, NULL, false); 673 674 sbi->s_fscache = NULL; 675 sbi->volume = NULL; 676 sbi->domain = NULL; 677 } 678