1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/fscache.h> 7 #include "internal.h" 8 9 static DEFINE_MUTEX(erofs_domain_list_lock); 10 static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 static LIST_HEAD(erofs_domain_list); 12 static struct vfsmount *erofs_pseudo_mnt; 13 14 struct erofs_fscache_request { 15 struct erofs_fscache_request *primary; 16 struct netfs_cache_resources cache_resources; 17 struct address_space *mapping; /* The mapping being accessed */ 18 loff_t start; /* Start position */ 19 size_t len; /* Length of the request */ 20 size_t submitted; /* Length of submitted */ 21 short error; /* 0 or error that occurred */ 22 refcount_t ref; 23 }; 24 25 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, 26 loff_t start, size_t len) 27 { 28 struct erofs_fscache_request *req; 29 30 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); 31 if (!req) 32 return ERR_PTR(-ENOMEM); 33 34 req->mapping = mapping; 35 req->start = start; 36 req->len = len; 37 refcount_set(&req->ref, 1); 38 39 return req; 40 } 41 42 static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary, 43 size_t len) 44 { 45 struct erofs_fscache_request *req; 46 47 /* use primary request for the first submission */ 48 if (!primary->submitted) { 49 refcount_inc(&primary->ref); 50 return primary; 51 } 52 53 req = erofs_fscache_req_alloc(primary->mapping, 54 primary->start + primary->submitted, len); 55 if (!IS_ERR(req)) { 56 req->primary = primary; 57 refcount_inc(&primary->ref); 58 } 59 return req; 60 } 61 62 static void erofs_fscache_req_complete(struct erofs_fscache_request *req) 63 { 64 struct folio *folio; 65 bool failed = req->error; 66 pgoff_t start_page = req->start / PAGE_SIZE; 67 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 68 69 XA_STATE(xas, &req->mapping->i_pages, start_page); 70 71 rcu_read_lock(); 72 xas_for_each(&xas, folio, last_page) { 73 if (xas_retry(&xas, folio)) 74 continue; 75 if (!failed) 76 folio_mark_uptodate(folio); 77 folio_unlock(folio); 78 } 79 rcu_read_unlock(); 80 } 81 82 static void erofs_fscache_req_put(struct erofs_fscache_request *req) 83 { 84 if (refcount_dec_and_test(&req->ref)) { 85 if (req->cache_resources.ops) 86 req->cache_resources.ops->end_operation(&req->cache_resources); 87 if (!req->primary) 88 erofs_fscache_req_complete(req); 89 else 90 erofs_fscache_req_put(req->primary); 91 kfree(req); 92 } 93 } 94 95 static void erofs_fscache_subreq_complete(void *priv, 96 ssize_t transferred_or_error, bool was_async) 97 { 98 struct erofs_fscache_request *req = priv; 99 100 if (IS_ERR_VALUE(transferred_or_error)) { 101 if (req->primary) 102 req->primary->error = transferred_or_error; 103 else 104 req->error = transferred_or_error; 105 } 106 erofs_fscache_req_put(req); 107 } 108 109 /* 110 * Read data from fscache (cookie, pstart, len), and fill the read data into 111 * page cache described by (req->mapping, lstart, len). @pstart describeis the 112 * start physical address in the cache file. 113 */ 114 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, 115 struct erofs_fscache_request *req, loff_t pstart, size_t len) 116 { 117 enum netfs_io_source source; 118 struct super_block *sb = req->mapping->host->i_sb; 119 struct netfs_cache_resources *cres = &req->cache_resources; 120 struct iov_iter iter; 121 loff_t lstart = req->start + req->submitted; 122 size_t done = 0; 123 int ret; 124 125 DBG_BUGON(len > req->len - req->submitted); 126 127 ret = fscache_begin_read_operation(cres, cookie); 128 if (ret) 129 return ret; 130 131 while (done < len) { 132 loff_t sstart = pstart + done; 133 size_t slen = len - done; 134 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 135 136 source = cres->ops->prepare_ondemand_read(cres, 137 sstart, &slen, LLONG_MAX, &flags, 0); 138 if (WARN_ON(slen == 0)) 139 source = NETFS_INVALID_READ; 140 if (source != NETFS_READ_FROM_CACHE) { 141 erofs_err(sb, "failed to fscache prepare_read (source %d)", source); 142 return -EIO; 143 } 144 145 refcount_inc(&req->ref); 146 iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, 147 lstart + done, slen); 148 149 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, 150 erofs_fscache_subreq_complete, req); 151 if (ret == -EIOCBQUEUED) 152 ret = 0; 153 if (ret) { 154 erofs_err(sb, "failed to fscache_read (ret %d)", ret); 155 return ret; 156 } 157 158 done += slen; 159 } 160 DBG_BUGON(done != len); 161 return 0; 162 } 163 164 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 165 { 166 int ret; 167 struct super_block *sb = folio_mapping(folio)->host->i_sb; 168 struct erofs_fscache_request *req; 169 struct erofs_map_dev mdev = { 170 .m_deviceid = 0, 171 .m_pa = folio_pos(folio), 172 }; 173 174 ret = erofs_map_dev(sb, &mdev); 175 if (ret) { 176 folio_unlock(folio); 177 return ret; 178 } 179 180 req = erofs_fscache_req_alloc(folio_mapping(folio), 181 folio_pos(folio), folio_size(folio)); 182 if (IS_ERR(req)) { 183 folio_unlock(folio); 184 return PTR_ERR(req); 185 } 186 187 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 188 req, mdev.m_pa, folio_size(folio)); 189 if (ret) 190 req->error = ret; 191 192 erofs_fscache_req_put(req); 193 return ret; 194 } 195 196 static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) 197 { 198 struct address_space *mapping = primary->mapping; 199 struct inode *inode = mapping->host; 200 struct super_block *sb = inode->i_sb; 201 struct erofs_fscache_request *req; 202 struct erofs_map_blocks map; 203 struct erofs_map_dev mdev; 204 struct iov_iter iter; 205 loff_t pos = primary->start + primary->submitted; 206 size_t count; 207 int ret; 208 209 map.m_la = pos; 210 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 211 if (ret) 212 return ret; 213 214 if (map.m_flags & EROFS_MAP_META) { 215 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 216 erofs_blk_t blknr; 217 size_t offset, size; 218 void *src; 219 220 /* For tail packing layout, the offset may be non-zero. */ 221 offset = erofs_blkoff(map.m_pa); 222 blknr = erofs_blknr(map.m_pa); 223 size = map.m_llen; 224 225 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 226 if (IS_ERR(src)) 227 return PTR_ERR(src); 228 229 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, PAGE_SIZE); 230 if (copy_to_iter(src + offset, size, &iter) != size) { 231 erofs_put_metabuf(&buf); 232 return -EFAULT; 233 } 234 iov_iter_zero(PAGE_SIZE - size, &iter); 235 erofs_put_metabuf(&buf); 236 primary->submitted += PAGE_SIZE; 237 return 0; 238 } 239 240 count = primary->len - primary->submitted; 241 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 242 iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); 243 iov_iter_zero(count, &iter); 244 primary->submitted += count; 245 return 0; 246 } 247 248 count = min_t(size_t, map.m_llen - (pos - map.m_la), count); 249 DBG_BUGON(!count || count % PAGE_SIZE); 250 251 mdev = (struct erofs_map_dev) { 252 .m_deviceid = map.m_deviceid, 253 .m_pa = map.m_pa, 254 }; 255 ret = erofs_map_dev(sb, &mdev); 256 if (ret) 257 return ret; 258 259 req = erofs_fscache_req_chain(primary, count); 260 if (IS_ERR(req)) 261 return PTR_ERR(req); 262 263 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 264 req, mdev.m_pa + (pos - map.m_la), count); 265 erofs_fscache_req_put(req); 266 primary->submitted += count; 267 return ret; 268 } 269 270 static int erofs_fscache_data_read(struct erofs_fscache_request *req) 271 { 272 int ret; 273 274 do { 275 ret = erofs_fscache_data_read_slice(req); 276 if (ret) 277 req->error = ret; 278 } while (!ret && req->submitted < req->len); 279 280 return ret; 281 } 282 283 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 284 { 285 struct erofs_fscache_request *req; 286 int ret; 287 288 req = erofs_fscache_req_alloc(folio_mapping(folio), 289 folio_pos(folio), folio_size(folio)); 290 if (IS_ERR(req)) { 291 folio_unlock(folio); 292 return PTR_ERR(req); 293 } 294 295 ret = erofs_fscache_data_read(req); 296 erofs_fscache_req_put(req); 297 return ret; 298 } 299 300 static void erofs_fscache_readahead(struct readahead_control *rac) 301 { 302 struct erofs_fscache_request *req; 303 304 if (!readahead_count(rac)) 305 return; 306 307 req = erofs_fscache_req_alloc(rac->mapping, 308 readahead_pos(rac), readahead_length(rac)); 309 if (IS_ERR(req)) 310 return; 311 312 /* The request completion will drop refs on the folios. */ 313 while (readahead_folio(rac)) 314 ; 315 316 erofs_fscache_data_read(req); 317 erofs_fscache_req_put(req); 318 } 319 320 static const struct address_space_operations erofs_fscache_meta_aops = { 321 .read_folio = erofs_fscache_meta_read_folio, 322 }; 323 324 const struct address_space_operations erofs_fscache_access_aops = { 325 .read_folio = erofs_fscache_read_folio, 326 .readahead = erofs_fscache_readahead, 327 }; 328 329 static void erofs_fscache_domain_put(struct erofs_domain *domain) 330 { 331 if (!domain) 332 return; 333 mutex_lock(&erofs_domain_list_lock); 334 if (refcount_dec_and_test(&domain->ref)) { 335 list_del(&domain->list); 336 if (list_empty(&erofs_domain_list)) { 337 kern_unmount(erofs_pseudo_mnt); 338 erofs_pseudo_mnt = NULL; 339 } 340 mutex_unlock(&erofs_domain_list_lock); 341 fscache_relinquish_volume(domain->volume, NULL, false); 342 kfree(domain->domain_id); 343 kfree(domain); 344 return; 345 } 346 mutex_unlock(&erofs_domain_list_lock); 347 } 348 349 static int erofs_fscache_register_volume(struct super_block *sb) 350 { 351 struct erofs_sb_info *sbi = EROFS_SB(sb); 352 char *domain_id = sbi->domain_id; 353 struct fscache_volume *volume; 354 char *name; 355 int ret = 0; 356 357 name = kasprintf(GFP_KERNEL, "erofs,%s", 358 domain_id ? domain_id : sbi->fsid); 359 if (!name) 360 return -ENOMEM; 361 362 volume = fscache_acquire_volume(name, NULL, NULL, 0); 363 if (IS_ERR_OR_NULL(volume)) { 364 erofs_err(sb, "failed to register volume for %s", name); 365 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 366 volume = NULL; 367 } 368 369 sbi->volume = volume; 370 kfree(name); 371 return ret; 372 } 373 374 static int erofs_fscache_init_domain(struct super_block *sb) 375 { 376 int err; 377 struct erofs_domain *domain; 378 struct erofs_sb_info *sbi = EROFS_SB(sb); 379 380 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 381 if (!domain) 382 return -ENOMEM; 383 384 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 385 if (!domain->domain_id) { 386 kfree(domain); 387 return -ENOMEM; 388 } 389 390 err = erofs_fscache_register_volume(sb); 391 if (err) 392 goto out; 393 394 if (!erofs_pseudo_mnt) { 395 erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 396 if (IS_ERR(erofs_pseudo_mnt)) { 397 err = PTR_ERR(erofs_pseudo_mnt); 398 goto out; 399 } 400 } 401 402 domain->volume = sbi->volume; 403 refcount_set(&domain->ref, 1); 404 list_add(&domain->list, &erofs_domain_list); 405 sbi->domain = domain; 406 return 0; 407 out: 408 kfree(domain->domain_id); 409 kfree(domain); 410 return err; 411 } 412 413 static int erofs_fscache_register_domain(struct super_block *sb) 414 { 415 int err; 416 struct erofs_domain *domain; 417 struct erofs_sb_info *sbi = EROFS_SB(sb); 418 419 mutex_lock(&erofs_domain_list_lock); 420 list_for_each_entry(domain, &erofs_domain_list, list) { 421 if (!strcmp(domain->domain_id, sbi->domain_id)) { 422 sbi->domain = domain; 423 sbi->volume = domain->volume; 424 refcount_inc(&domain->ref); 425 mutex_unlock(&erofs_domain_list_lock); 426 return 0; 427 } 428 } 429 err = erofs_fscache_init_domain(sb); 430 mutex_unlock(&erofs_domain_list_lock); 431 return err; 432 } 433 434 static 435 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 436 char *name, 437 unsigned int flags) 438 { 439 struct fscache_volume *volume = EROFS_SB(sb)->volume; 440 struct erofs_fscache *ctx; 441 struct fscache_cookie *cookie; 442 int ret; 443 444 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 445 if (!ctx) 446 return ERR_PTR(-ENOMEM); 447 448 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 449 name, strlen(name), NULL, 0, 0); 450 if (!cookie) { 451 erofs_err(sb, "failed to get cookie for %s", name); 452 ret = -EINVAL; 453 goto err; 454 } 455 456 fscache_use_cookie(cookie, false); 457 ctx->cookie = cookie; 458 459 if (flags & EROFS_REG_COOKIE_NEED_INODE) { 460 struct inode *const inode = new_inode(sb); 461 462 if (!inode) { 463 erofs_err(sb, "failed to get anon inode for %s", name); 464 ret = -ENOMEM; 465 goto err_cookie; 466 } 467 468 set_nlink(inode, 1); 469 inode->i_size = OFFSET_MAX; 470 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 471 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 472 473 ctx->inode = inode; 474 } 475 476 return ctx; 477 478 err_cookie: 479 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 480 fscache_relinquish_cookie(ctx->cookie, false); 481 err: 482 kfree(ctx); 483 return ERR_PTR(ret); 484 } 485 486 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 487 { 488 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 489 fscache_relinquish_cookie(ctx->cookie, false); 490 iput(ctx->inode); 491 kfree(ctx->name); 492 kfree(ctx); 493 } 494 495 static 496 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 497 char *name, 498 unsigned int flags) 499 { 500 int err; 501 struct inode *inode; 502 struct erofs_fscache *ctx; 503 struct erofs_domain *domain = EROFS_SB(sb)->domain; 504 505 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 506 if (IS_ERR(ctx)) 507 return ctx; 508 509 ctx->name = kstrdup(name, GFP_KERNEL); 510 if (!ctx->name) { 511 err = -ENOMEM; 512 goto out; 513 } 514 515 inode = new_inode(erofs_pseudo_mnt->mnt_sb); 516 if (!inode) { 517 err = -ENOMEM; 518 goto out; 519 } 520 521 ctx->domain = domain; 522 ctx->anon_inode = inode; 523 inode->i_private = ctx; 524 refcount_inc(&domain->ref); 525 return ctx; 526 out: 527 erofs_fscache_relinquish_cookie(ctx); 528 return ERR_PTR(err); 529 } 530 531 static 532 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 533 char *name, 534 unsigned int flags) 535 { 536 struct inode *inode; 537 struct erofs_fscache *ctx; 538 struct erofs_domain *domain = EROFS_SB(sb)->domain; 539 struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 540 541 mutex_lock(&erofs_domain_cookies_lock); 542 spin_lock(&psb->s_inode_list_lock); 543 list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 544 ctx = inode->i_private; 545 if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 546 continue; 547 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 548 igrab(inode); 549 } else { 550 erofs_err(sb, "%s already exists in domain %s", name, 551 domain->domain_id); 552 ctx = ERR_PTR(-EEXIST); 553 } 554 spin_unlock(&psb->s_inode_list_lock); 555 mutex_unlock(&erofs_domain_cookies_lock); 556 return ctx; 557 } 558 spin_unlock(&psb->s_inode_list_lock); 559 ctx = erofs_fscache_domain_init_cookie(sb, name, flags); 560 mutex_unlock(&erofs_domain_cookies_lock); 561 return ctx; 562 } 563 564 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 565 char *name, 566 unsigned int flags) 567 { 568 if (EROFS_SB(sb)->domain_id) 569 return erofs_domain_register_cookie(sb, name, flags); 570 return erofs_fscache_acquire_cookie(sb, name, flags); 571 } 572 573 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 574 { 575 bool drop; 576 struct erofs_domain *domain; 577 578 if (!ctx) 579 return; 580 domain = ctx->domain; 581 if (domain) { 582 mutex_lock(&erofs_domain_cookies_lock); 583 drop = atomic_read(&ctx->anon_inode->i_count) == 1; 584 iput(ctx->anon_inode); 585 mutex_unlock(&erofs_domain_cookies_lock); 586 if (!drop) 587 return; 588 } 589 590 erofs_fscache_relinquish_cookie(ctx); 591 erofs_fscache_domain_put(domain); 592 } 593 594 int erofs_fscache_register_fs(struct super_block *sb) 595 { 596 int ret; 597 struct erofs_sb_info *sbi = EROFS_SB(sb); 598 struct erofs_fscache *fscache; 599 unsigned int flags; 600 601 if (sbi->domain_id) 602 ret = erofs_fscache_register_domain(sb); 603 else 604 ret = erofs_fscache_register_volume(sb); 605 if (ret) 606 return ret; 607 608 /* 609 * When shared domain is enabled, using NEED_NOEXIST to guarantee 610 * the primary data blob (aka fsid) is unique in the shared domain. 611 * 612 * For non-shared-domain case, fscache_acquire_volume() invoked by 613 * erofs_fscache_register_volume() has already guaranteed 614 * the uniqueness of primary data blob. 615 * 616 * Acquired domain/volume will be relinquished in kill_sb() on error. 617 */ 618 flags = EROFS_REG_COOKIE_NEED_INODE; 619 if (sbi->domain_id) 620 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 621 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 622 if (IS_ERR(fscache)) 623 return PTR_ERR(fscache); 624 625 sbi->s_fscache = fscache; 626 return 0; 627 } 628 629 void erofs_fscache_unregister_fs(struct super_block *sb) 630 { 631 struct erofs_sb_info *sbi = EROFS_SB(sb); 632 633 erofs_fscache_unregister_cookie(sbi->s_fscache); 634 635 if (sbi->domain) 636 erofs_fscache_domain_put(sbi->domain); 637 else 638 fscache_relinquish_volume(sbi->volume, NULL, false); 639 640 sbi->s_fscache = NULL; 641 sbi->volume = NULL; 642 sbi->domain = NULL; 643 } 644