1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/fscache.h> 7 #include "internal.h" 8 9 static DEFINE_MUTEX(erofs_domain_list_lock); 10 static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 static LIST_HEAD(erofs_domain_list); 12 static struct vfsmount *erofs_pseudo_mnt; 13 14 static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, 15 loff_t start, size_t len) 16 { 17 struct netfs_io_request *rreq; 18 19 rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); 20 if (!rreq) 21 return ERR_PTR(-ENOMEM); 22 23 rreq->start = start; 24 rreq->len = len; 25 rreq->mapping = mapping; 26 rreq->inode = mapping->host; 27 INIT_LIST_HEAD(&rreq->subrequests); 28 refcount_set(&rreq->ref, 1); 29 return rreq; 30 } 31 32 static void erofs_fscache_put_request(struct netfs_io_request *rreq) 33 { 34 if (!refcount_dec_and_test(&rreq->ref)) 35 return; 36 if (rreq->cache_resources.ops) 37 rreq->cache_resources.ops->end_operation(&rreq->cache_resources); 38 kfree(rreq); 39 } 40 41 static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq) 42 { 43 if (!refcount_dec_and_test(&subreq->ref)) 44 return; 45 erofs_fscache_put_request(subreq->rreq); 46 kfree(subreq); 47 } 48 49 static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq) 50 { 51 struct netfs_io_subrequest *subreq; 52 53 while (!list_empty(&rreq->subrequests)) { 54 subreq = list_first_entry(&rreq->subrequests, 55 struct netfs_io_subrequest, rreq_link); 56 list_del(&subreq->rreq_link); 57 erofs_fscache_put_subrequest(subreq); 58 } 59 } 60 61 static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) 62 { 63 struct netfs_io_subrequest *subreq; 64 struct folio *folio; 65 unsigned int iopos = 0; 66 pgoff_t start_page = rreq->start / PAGE_SIZE; 67 pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; 68 bool subreq_failed = false; 69 70 XA_STATE(xas, &rreq->mapping->i_pages, start_page); 71 72 subreq = list_first_entry(&rreq->subrequests, 73 struct netfs_io_subrequest, rreq_link); 74 subreq_failed = (subreq->error < 0); 75 76 rcu_read_lock(); 77 xas_for_each(&xas, folio, last_page) { 78 unsigned int pgpos = 79 (folio_index(folio) - start_page) * PAGE_SIZE; 80 unsigned int pgend = pgpos + folio_size(folio); 81 bool pg_failed = false; 82 83 for (;;) { 84 if (!subreq) { 85 pg_failed = true; 86 break; 87 } 88 89 pg_failed |= subreq_failed; 90 if (pgend < iopos + subreq->len) 91 break; 92 93 iopos += subreq->len; 94 if (!list_is_last(&subreq->rreq_link, 95 &rreq->subrequests)) { 96 subreq = list_next_entry(subreq, rreq_link); 97 subreq_failed = (subreq->error < 0); 98 } else { 99 subreq = NULL; 100 subreq_failed = false; 101 } 102 if (pgend == iopos) 103 break; 104 } 105 106 if (!pg_failed) 107 folio_mark_uptodate(folio); 108 109 folio_unlock(folio); 110 } 111 rcu_read_unlock(); 112 } 113 114 static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq) 115 { 116 erofs_fscache_rreq_unlock_folios(rreq); 117 erofs_fscache_clear_subrequests(rreq); 118 erofs_fscache_put_request(rreq); 119 } 120 121 static void erofc_fscache_subreq_complete(void *priv, 122 ssize_t transferred_or_error, bool was_async) 123 { 124 struct netfs_io_subrequest *subreq = priv; 125 struct netfs_io_request *rreq = subreq->rreq; 126 127 if (IS_ERR_VALUE(transferred_or_error)) 128 subreq->error = transferred_or_error; 129 130 if (atomic_dec_and_test(&rreq->nr_outstanding)) 131 erofs_fscache_rreq_complete(rreq); 132 133 erofs_fscache_put_subrequest(subreq); 134 } 135 136 /* 137 * Read data from fscache and fill the read data into page cache described by 138 * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes 139 * the start physical address in the cache file. 140 */ 141 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, 142 struct netfs_io_request *rreq, loff_t pstart) 143 { 144 enum netfs_io_source source; 145 struct super_block *sb = rreq->mapping->host->i_sb; 146 struct netfs_io_subrequest *subreq; 147 struct netfs_cache_resources *cres = &rreq->cache_resources; 148 struct iov_iter iter; 149 loff_t start = rreq->start; 150 size_t len = rreq->len; 151 size_t done = 0; 152 int ret; 153 154 atomic_set(&rreq->nr_outstanding, 1); 155 156 ret = fscache_begin_read_operation(cres, cookie); 157 if (ret) 158 goto out; 159 160 while (done < len) { 161 subreq = kzalloc(sizeof(struct netfs_io_subrequest), 162 GFP_KERNEL); 163 if (subreq) { 164 INIT_LIST_HEAD(&subreq->rreq_link); 165 refcount_set(&subreq->ref, 2); 166 subreq->rreq = rreq; 167 refcount_inc(&rreq->ref); 168 } else { 169 ret = -ENOMEM; 170 goto out; 171 } 172 173 subreq->start = pstart + done; 174 subreq->len = len - done; 175 subreq->flags = 1 << NETFS_SREQ_ONDEMAND; 176 177 list_add_tail(&subreq->rreq_link, &rreq->subrequests); 178 179 source = cres->ops->prepare_read(subreq, LLONG_MAX); 180 if (WARN_ON(subreq->len == 0)) 181 source = NETFS_INVALID_READ; 182 if (source != NETFS_READ_FROM_CACHE) { 183 erofs_err(sb, "failed to fscache prepare_read (source %d)", 184 source); 185 ret = -EIO; 186 subreq->error = ret; 187 erofs_fscache_put_subrequest(subreq); 188 goto out; 189 } 190 191 atomic_inc(&rreq->nr_outstanding); 192 193 iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, 194 start + done, subreq->len); 195 196 ret = fscache_read(cres, subreq->start, &iter, 197 NETFS_READ_HOLE_FAIL, 198 erofc_fscache_subreq_complete, subreq); 199 if (ret == -EIOCBQUEUED) 200 ret = 0; 201 if (ret) { 202 erofs_err(sb, "failed to fscache_read (ret %d)", ret); 203 goto out; 204 } 205 206 done += subreq->len; 207 } 208 out: 209 if (atomic_dec_and_test(&rreq->nr_outstanding)) 210 erofs_fscache_rreq_complete(rreq); 211 212 return ret; 213 } 214 215 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 216 { 217 int ret; 218 struct super_block *sb = folio_mapping(folio)->host->i_sb; 219 struct netfs_io_request *rreq; 220 struct erofs_map_dev mdev = { 221 .m_deviceid = 0, 222 .m_pa = folio_pos(folio), 223 }; 224 225 ret = erofs_map_dev(sb, &mdev); 226 if (ret) 227 goto out; 228 229 rreq = erofs_fscache_alloc_request(folio_mapping(folio), 230 folio_pos(folio), folio_size(folio)); 231 if (IS_ERR(rreq)) { 232 ret = PTR_ERR(rreq); 233 goto out; 234 } 235 236 return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 237 rreq, mdev.m_pa); 238 out: 239 folio_unlock(folio); 240 return ret; 241 } 242 243 /* 244 * Read into page cache in the range described by (@pos, @len). 245 * 246 * On return, the caller is responsible for page unlocking if the output @unlock 247 * is true, or the callee will take this responsibility through netfs_io_request 248 * interface. 249 * 250 * The return value is the number of bytes successfully handled, or negative 251 * error code on failure. The only exception is that, the length of the range 252 * instead of the error code is returned on failure after netfs_io_request is 253 * allocated, so that .readahead() could advance rac accordingly. 254 */ 255 static int erofs_fscache_data_read(struct address_space *mapping, 256 loff_t pos, size_t len, bool *unlock) 257 { 258 struct inode *inode = mapping->host; 259 struct super_block *sb = inode->i_sb; 260 struct netfs_io_request *rreq; 261 struct erofs_map_blocks map; 262 struct erofs_map_dev mdev; 263 struct iov_iter iter; 264 size_t count; 265 int ret; 266 267 *unlock = true; 268 269 map.m_la = pos; 270 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 271 if (ret) 272 return ret; 273 274 if (map.m_flags & EROFS_MAP_META) { 275 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 276 erofs_blk_t blknr; 277 size_t offset, size; 278 void *src; 279 280 /* For tail packing layout, the offset may be non-zero. */ 281 offset = erofs_blkoff(map.m_pa); 282 blknr = erofs_blknr(map.m_pa); 283 size = map.m_llen; 284 285 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 286 if (IS_ERR(src)) 287 return PTR_ERR(src); 288 289 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); 290 if (copy_to_iter(src + offset, size, &iter) != size) 291 return -EFAULT; 292 iov_iter_zero(PAGE_SIZE - size, &iter); 293 erofs_put_metabuf(&buf); 294 return PAGE_SIZE; 295 } 296 297 count = min_t(size_t, map.m_llen - (pos - map.m_la), len); 298 DBG_BUGON(!count || count % PAGE_SIZE); 299 300 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 301 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); 302 iov_iter_zero(count, &iter); 303 return count; 304 } 305 306 mdev = (struct erofs_map_dev) { 307 .m_deviceid = map.m_deviceid, 308 .m_pa = map.m_pa, 309 }; 310 ret = erofs_map_dev(sb, &mdev); 311 if (ret) 312 return ret; 313 314 rreq = erofs_fscache_alloc_request(mapping, pos, count); 315 if (IS_ERR(rreq)) 316 return PTR_ERR(rreq); 317 318 *unlock = false; 319 erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 320 rreq, mdev.m_pa + (pos - map.m_la)); 321 return count; 322 } 323 324 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 325 { 326 bool unlock; 327 int ret; 328 329 DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 330 331 ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), 332 folio_size(folio), &unlock); 333 if (unlock) { 334 if (ret > 0) 335 folio_mark_uptodate(folio); 336 folio_unlock(folio); 337 } 338 return ret < 0 ? ret : 0; 339 } 340 341 static void erofs_fscache_readahead(struct readahead_control *rac) 342 { 343 struct folio *folio; 344 size_t len, done = 0; 345 loff_t start, pos; 346 bool unlock; 347 int ret, size; 348 349 if (!readahead_count(rac)) 350 return; 351 352 start = readahead_pos(rac); 353 len = readahead_length(rac); 354 355 do { 356 pos = start + done; 357 ret = erofs_fscache_data_read(rac->mapping, pos, 358 len - done, &unlock); 359 if (ret <= 0) 360 return; 361 362 size = ret; 363 while (size) { 364 folio = readahead_folio(rac); 365 size -= folio_size(folio); 366 if (unlock) { 367 folio_mark_uptodate(folio); 368 folio_unlock(folio); 369 } 370 } 371 } while ((done += ret) < len); 372 } 373 374 static const struct address_space_operations erofs_fscache_meta_aops = { 375 .read_folio = erofs_fscache_meta_read_folio, 376 }; 377 378 const struct address_space_operations erofs_fscache_access_aops = { 379 .read_folio = erofs_fscache_read_folio, 380 .readahead = erofs_fscache_readahead, 381 }; 382 383 static void erofs_fscache_domain_put(struct erofs_domain *domain) 384 { 385 if (!domain) 386 return; 387 mutex_lock(&erofs_domain_list_lock); 388 if (refcount_dec_and_test(&domain->ref)) { 389 list_del(&domain->list); 390 if (list_empty(&erofs_domain_list)) { 391 kern_unmount(erofs_pseudo_mnt); 392 erofs_pseudo_mnt = NULL; 393 } 394 mutex_unlock(&erofs_domain_list_lock); 395 fscache_relinquish_volume(domain->volume, NULL, false); 396 kfree(domain->domain_id); 397 kfree(domain); 398 return; 399 } 400 mutex_unlock(&erofs_domain_list_lock); 401 } 402 403 static int erofs_fscache_register_volume(struct super_block *sb) 404 { 405 struct erofs_sb_info *sbi = EROFS_SB(sb); 406 char *domain_id = sbi->opt.domain_id; 407 struct fscache_volume *volume; 408 char *name; 409 int ret = 0; 410 411 name = kasprintf(GFP_KERNEL, "erofs,%s", 412 domain_id ? domain_id : sbi->opt.fsid); 413 if (!name) 414 return -ENOMEM; 415 416 volume = fscache_acquire_volume(name, NULL, NULL, 0); 417 if (IS_ERR_OR_NULL(volume)) { 418 erofs_err(sb, "failed to register volume for %s", name); 419 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 420 volume = NULL; 421 } 422 423 sbi->volume = volume; 424 kfree(name); 425 return ret; 426 } 427 428 static int erofs_fscache_init_domain(struct super_block *sb) 429 { 430 int err; 431 struct erofs_domain *domain; 432 struct erofs_sb_info *sbi = EROFS_SB(sb); 433 434 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 435 if (!domain) 436 return -ENOMEM; 437 438 domain->domain_id = kstrdup(sbi->opt.domain_id, GFP_KERNEL); 439 if (!domain->domain_id) { 440 kfree(domain); 441 return -ENOMEM; 442 } 443 444 err = erofs_fscache_register_volume(sb); 445 if (err) 446 goto out; 447 448 if (!erofs_pseudo_mnt) { 449 erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 450 if (IS_ERR(erofs_pseudo_mnt)) { 451 err = PTR_ERR(erofs_pseudo_mnt); 452 goto out; 453 } 454 } 455 456 domain->volume = sbi->volume; 457 refcount_set(&domain->ref, 1); 458 list_add(&domain->list, &erofs_domain_list); 459 sbi->domain = domain; 460 return 0; 461 out: 462 kfree(domain->domain_id); 463 kfree(domain); 464 return err; 465 } 466 467 static int erofs_fscache_register_domain(struct super_block *sb) 468 { 469 int err; 470 struct erofs_domain *domain; 471 struct erofs_sb_info *sbi = EROFS_SB(sb); 472 473 mutex_lock(&erofs_domain_list_lock); 474 list_for_each_entry(domain, &erofs_domain_list, list) { 475 if (!strcmp(domain->domain_id, sbi->opt.domain_id)) { 476 sbi->domain = domain; 477 sbi->volume = domain->volume; 478 refcount_inc(&domain->ref); 479 mutex_unlock(&erofs_domain_list_lock); 480 return 0; 481 } 482 } 483 err = erofs_fscache_init_domain(sb); 484 mutex_unlock(&erofs_domain_list_lock); 485 return err; 486 } 487 488 static 489 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 490 char *name, bool need_inode) 491 { 492 struct fscache_volume *volume = EROFS_SB(sb)->volume; 493 struct erofs_fscache *ctx; 494 struct fscache_cookie *cookie; 495 int ret; 496 497 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 498 if (!ctx) 499 return ERR_PTR(-ENOMEM); 500 501 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 502 name, strlen(name), NULL, 0, 0); 503 if (!cookie) { 504 erofs_err(sb, "failed to get cookie for %s", name); 505 ret = -EINVAL; 506 goto err; 507 } 508 509 fscache_use_cookie(cookie, false); 510 ctx->cookie = cookie; 511 512 if (need_inode) { 513 struct inode *const inode = new_inode(sb); 514 515 if (!inode) { 516 erofs_err(sb, "failed to get anon inode for %s", name); 517 ret = -ENOMEM; 518 goto err_cookie; 519 } 520 521 set_nlink(inode, 1); 522 inode->i_size = OFFSET_MAX; 523 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 524 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 525 526 ctx->inode = inode; 527 } 528 529 return ctx; 530 531 err_cookie: 532 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 533 fscache_relinquish_cookie(ctx->cookie, false); 534 err: 535 kfree(ctx); 536 return ERR_PTR(ret); 537 } 538 539 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 540 { 541 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 542 fscache_relinquish_cookie(ctx->cookie, false); 543 iput(ctx->inode); 544 kfree(ctx->name); 545 kfree(ctx); 546 } 547 548 static 549 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 550 char *name, bool need_inode) 551 { 552 int err; 553 struct inode *inode; 554 struct erofs_fscache *ctx; 555 struct erofs_domain *domain = EROFS_SB(sb)->domain; 556 557 ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); 558 if (IS_ERR(ctx)) 559 return ctx; 560 561 ctx->name = kstrdup(name, GFP_KERNEL); 562 if (!ctx->name) { 563 err = -ENOMEM; 564 goto out; 565 } 566 567 inode = new_inode(erofs_pseudo_mnt->mnt_sb); 568 if (!inode) { 569 err = -ENOMEM; 570 goto out; 571 } 572 573 ctx->domain = domain; 574 ctx->anon_inode = inode; 575 inode->i_private = ctx; 576 refcount_inc(&domain->ref); 577 return ctx; 578 out: 579 erofs_fscache_relinquish_cookie(ctx); 580 return ERR_PTR(err); 581 } 582 583 static 584 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 585 char *name, bool need_inode) 586 { 587 struct inode *inode; 588 struct erofs_fscache *ctx; 589 struct erofs_domain *domain = EROFS_SB(sb)->domain; 590 struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 591 592 mutex_lock(&erofs_domain_cookies_lock); 593 list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 594 ctx = inode->i_private; 595 if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 596 continue; 597 igrab(inode); 598 mutex_unlock(&erofs_domain_cookies_lock); 599 return ctx; 600 } 601 ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); 602 mutex_unlock(&erofs_domain_cookies_lock); 603 return ctx; 604 } 605 606 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 607 char *name, bool need_inode) 608 { 609 if (EROFS_SB(sb)->opt.domain_id) 610 return erofs_domain_register_cookie(sb, name, need_inode); 611 return erofs_fscache_acquire_cookie(sb, name, need_inode); 612 } 613 614 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 615 { 616 bool drop; 617 struct erofs_domain *domain; 618 619 if (!ctx) 620 return; 621 domain = ctx->domain; 622 if (domain) { 623 mutex_lock(&erofs_domain_cookies_lock); 624 drop = atomic_read(&ctx->anon_inode->i_count) == 1; 625 iput(ctx->anon_inode); 626 mutex_unlock(&erofs_domain_cookies_lock); 627 if (!drop) 628 return; 629 } 630 631 erofs_fscache_relinquish_cookie(ctx); 632 erofs_fscache_domain_put(domain); 633 } 634 635 int erofs_fscache_register_fs(struct super_block *sb) 636 { 637 int ret; 638 struct erofs_sb_info *sbi = EROFS_SB(sb); 639 struct erofs_fscache *fscache; 640 641 if (sbi->opt.domain_id) 642 ret = erofs_fscache_register_domain(sb); 643 else 644 ret = erofs_fscache_register_volume(sb); 645 if (ret) 646 return ret; 647 648 /* acquired domain/volume will be relinquished in kill_sb() on error */ 649 fscache = erofs_fscache_register_cookie(sb, sbi->opt.fsid, true); 650 if (IS_ERR(fscache)) 651 return PTR_ERR(fscache); 652 653 sbi->s_fscache = fscache; 654 return 0; 655 } 656 657 void erofs_fscache_unregister_fs(struct super_block *sb) 658 { 659 struct erofs_sb_info *sbi = EROFS_SB(sb); 660 661 erofs_fscache_unregister_cookie(sbi->s_fscache); 662 663 if (sbi->domain) 664 erofs_fscache_domain_put(sbi->domain); 665 else 666 fscache_relinquish_volume(sbi->volume, NULL, false); 667 668 sbi->s_fscache = NULL; 669 sbi->volume = NULL; 670 sbi->domain = NULL; 671 } 672