1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022, Alibaba Cloud 4 * Copyright (C) 2022, Bytedance Inc. All rights reserved. 5 */ 6 #include <linux/fscache.h> 7 #include "internal.h" 8 9 static DEFINE_MUTEX(erofs_domain_list_lock); 10 static DEFINE_MUTEX(erofs_domain_cookies_lock); 11 static LIST_HEAD(erofs_domain_list); 12 static struct vfsmount *erofs_pseudo_mnt; 13 14 struct erofs_fscache_request { 15 struct netfs_cache_resources cache_resources; 16 struct address_space *mapping; /* The mapping being accessed */ 17 loff_t start; /* Start position */ 18 size_t len; /* Length of the request */ 19 size_t submitted; /* Length of submitted */ 20 short error; /* 0 or error that occurred */ 21 refcount_t ref; 22 }; 23 24 static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, 25 loff_t start, size_t len) 26 { 27 struct erofs_fscache_request *req; 28 29 req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); 30 if (!req) 31 return ERR_PTR(-ENOMEM); 32 33 req->mapping = mapping; 34 req->start = start; 35 req->len = len; 36 refcount_set(&req->ref, 1); 37 38 return req; 39 } 40 41 static void erofs_fscache_req_complete(struct erofs_fscache_request *req) 42 { 43 struct folio *folio; 44 bool failed = req->error; 45 pgoff_t start_page = req->start / PAGE_SIZE; 46 pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; 47 48 XA_STATE(xas, &req->mapping->i_pages, start_page); 49 50 rcu_read_lock(); 51 xas_for_each(&xas, folio, last_page) { 52 if (xas_retry(&xas, folio)) 53 continue; 54 if (!failed) 55 folio_mark_uptodate(folio); 56 folio_unlock(folio); 57 } 58 rcu_read_unlock(); 59 60 if (req->cache_resources.ops) 61 req->cache_resources.ops->end_operation(&req->cache_resources); 62 63 kfree(req); 64 } 65 66 static void erofs_fscache_req_put(struct erofs_fscache_request *req) 67 { 68 if (refcount_dec_and_test(&req->ref)) 69 erofs_fscache_req_complete(req); 70 } 71 72 static void erofs_fscache_subreq_complete(void *priv, 73 ssize_t transferred_or_error, bool was_async) 74 { 75 struct erofs_fscache_request *req = priv; 76 77 if (IS_ERR_VALUE(transferred_or_error)) 78 req->error = transferred_or_error; 79 erofs_fscache_req_put(req); 80 } 81 82 /* 83 * Read data from fscache (cookie, pstart, len), and fill the read data into 84 * page cache described by (req->mapping, lstart, len). @pstart describeis the 85 * start physical address in the cache file. 86 */ 87 static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, 88 struct erofs_fscache_request *req, loff_t pstart, size_t len) 89 { 90 enum netfs_io_source source; 91 struct super_block *sb = req->mapping->host->i_sb; 92 struct netfs_cache_resources *cres = &req->cache_resources; 93 struct iov_iter iter; 94 loff_t lstart = req->start + req->submitted; 95 size_t done = 0; 96 int ret; 97 98 DBG_BUGON(len > req->len - req->submitted); 99 100 ret = fscache_begin_read_operation(cres, cookie); 101 if (ret) 102 return ret; 103 104 while (done < len) { 105 loff_t sstart = pstart + done; 106 size_t slen = len - done; 107 unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; 108 109 source = cres->ops->prepare_ondemand_read(cres, 110 sstart, &slen, LLONG_MAX, &flags, 0); 111 if (WARN_ON(slen == 0)) 112 source = NETFS_INVALID_READ; 113 if (source != NETFS_READ_FROM_CACHE) { 114 erofs_err(sb, "failed to fscache prepare_read (source %d)", source); 115 return -EIO; 116 } 117 118 refcount_inc(&req->ref); 119 iov_iter_xarray(&iter, READ, &req->mapping->i_pages, 120 lstart + done, slen); 121 122 ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, 123 erofs_fscache_subreq_complete, req); 124 if (ret == -EIOCBQUEUED) 125 ret = 0; 126 if (ret) { 127 erofs_err(sb, "failed to fscache_read (ret %d)", ret); 128 return ret; 129 } 130 131 done += slen; 132 } 133 DBG_BUGON(done != len); 134 req->submitted += len; 135 return 0; 136 } 137 138 static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) 139 { 140 int ret; 141 struct super_block *sb = folio_mapping(folio)->host->i_sb; 142 struct erofs_fscache_request *req; 143 struct erofs_map_dev mdev = { 144 .m_deviceid = 0, 145 .m_pa = folio_pos(folio), 146 }; 147 148 ret = erofs_map_dev(sb, &mdev); 149 if (ret) { 150 folio_unlock(folio); 151 return ret; 152 } 153 154 req = erofs_fscache_req_alloc(folio_mapping(folio), 155 folio_pos(folio), folio_size(folio)); 156 if (IS_ERR(req)) { 157 folio_unlock(folio); 158 return PTR_ERR(req); 159 } 160 161 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 162 req, mdev.m_pa, folio_size(folio)); 163 if (ret) 164 req->error = ret; 165 166 erofs_fscache_req_put(req); 167 return ret; 168 } 169 170 /* 171 * Read into page cache in the range described by (@pos, @len). 172 * 173 * On return, if the output @unlock is true, the caller is responsible for page 174 * unlocking; otherwise the callee will take this responsibility through request 175 * completion. 176 * 177 * The return value is the number of bytes successfully handled, or negative 178 * error code on failure. The only exception is that, the length of the range 179 * instead of the error code is returned on failure after request is allocated, 180 * so that .readahead() could advance rac accordingly. 181 */ 182 static int erofs_fscache_data_read(struct address_space *mapping, 183 loff_t pos, size_t len, bool *unlock) 184 { 185 struct inode *inode = mapping->host; 186 struct super_block *sb = inode->i_sb; 187 struct erofs_fscache_request *req; 188 struct erofs_map_blocks map; 189 struct erofs_map_dev mdev; 190 struct iov_iter iter; 191 size_t count; 192 int ret; 193 194 *unlock = true; 195 196 map.m_la = pos; 197 ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); 198 if (ret) 199 return ret; 200 201 if (map.m_flags & EROFS_MAP_META) { 202 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 203 erofs_blk_t blknr; 204 size_t offset, size; 205 void *src; 206 207 /* For tail packing layout, the offset may be non-zero. */ 208 offset = erofs_blkoff(map.m_pa); 209 blknr = erofs_blknr(map.m_pa); 210 size = map.m_llen; 211 212 src = erofs_read_metabuf(&buf, sb, blknr, EROFS_KMAP); 213 if (IS_ERR(src)) 214 return PTR_ERR(src); 215 216 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, PAGE_SIZE); 217 if (copy_to_iter(src + offset, size, &iter) != size) { 218 erofs_put_metabuf(&buf); 219 return -EFAULT; 220 } 221 iov_iter_zero(PAGE_SIZE - size, &iter); 222 erofs_put_metabuf(&buf); 223 return PAGE_SIZE; 224 } 225 226 if (!(map.m_flags & EROFS_MAP_MAPPED)) { 227 count = len; 228 iov_iter_xarray(&iter, READ, &mapping->i_pages, pos, count); 229 iov_iter_zero(count, &iter); 230 return count; 231 } 232 233 count = min_t(size_t, map.m_llen - (pos - map.m_la), len); 234 DBG_BUGON(!count || count % PAGE_SIZE); 235 236 mdev = (struct erofs_map_dev) { 237 .m_deviceid = map.m_deviceid, 238 .m_pa = map.m_pa, 239 }; 240 ret = erofs_map_dev(sb, &mdev); 241 if (ret) 242 return ret; 243 244 req = erofs_fscache_req_alloc(mapping, pos, count); 245 if (IS_ERR(req)) 246 return PTR_ERR(req); 247 248 *unlock = false; 249 ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, 250 req, mdev.m_pa + (pos - map.m_la), count); 251 if (ret) 252 req->error = ret; 253 254 erofs_fscache_req_put(req); 255 return count; 256 } 257 258 static int erofs_fscache_read_folio(struct file *file, struct folio *folio) 259 { 260 bool unlock; 261 int ret; 262 263 DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); 264 265 ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), 266 folio_size(folio), &unlock); 267 if (unlock) { 268 if (ret > 0) 269 folio_mark_uptodate(folio); 270 folio_unlock(folio); 271 } 272 return ret < 0 ? ret : 0; 273 } 274 275 static void erofs_fscache_readahead(struct readahead_control *rac) 276 { 277 struct folio *folio; 278 size_t len, done = 0; 279 loff_t start, pos; 280 bool unlock; 281 int ret, size; 282 283 if (!readahead_count(rac)) 284 return; 285 286 start = readahead_pos(rac); 287 len = readahead_length(rac); 288 289 do { 290 pos = start + done; 291 ret = erofs_fscache_data_read(rac->mapping, pos, 292 len - done, &unlock); 293 if (ret <= 0) 294 return; 295 296 size = ret; 297 while (size) { 298 folio = readahead_folio(rac); 299 size -= folio_size(folio); 300 if (unlock) { 301 folio_mark_uptodate(folio); 302 folio_unlock(folio); 303 } 304 } 305 } while ((done += ret) < len); 306 } 307 308 static const struct address_space_operations erofs_fscache_meta_aops = { 309 .read_folio = erofs_fscache_meta_read_folio, 310 }; 311 312 const struct address_space_operations erofs_fscache_access_aops = { 313 .read_folio = erofs_fscache_read_folio, 314 .readahead = erofs_fscache_readahead, 315 }; 316 317 static void erofs_fscache_domain_put(struct erofs_domain *domain) 318 { 319 if (!domain) 320 return; 321 mutex_lock(&erofs_domain_list_lock); 322 if (refcount_dec_and_test(&domain->ref)) { 323 list_del(&domain->list); 324 if (list_empty(&erofs_domain_list)) { 325 kern_unmount(erofs_pseudo_mnt); 326 erofs_pseudo_mnt = NULL; 327 } 328 mutex_unlock(&erofs_domain_list_lock); 329 fscache_relinquish_volume(domain->volume, NULL, false); 330 kfree(domain->domain_id); 331 kfree(domain); 332 return; 333 } 334 mutex_unlock(&erofs_domain_list_lock); 335 } 336 337 static int erofs_fscache_register_volume(struct super_block *sb) 338 { 339 struct erofs_sb_info *sbi = EROFS_SB(sb); 340 char *domain_id = sbi->domain_id; 341 struct fscache_volume *volume; 342 char *name; 343 int ret = 0; 344 345 name = kasprintf(GFP_KERNEL, "erofs,%s", 346 domain_id ? domain_id : sbi->fsid); 347 if (!name) 348 return -ENOMEM; 349 350 volume = fscache_acquire_volume(name, NULL, NULL, 0); 351 if (IS_ERR_OR_NULL(volume)) { 352 erofs_err(sb, "failed to register volume for %s", name); 353 ret = volume ? PTR_ERR(volume) : -EOPNOTSUPP; 354 volume = NULL; 355 } 356 357 sbi->volume = volume; 358 kfree(name); 359 return ret; 360 } 361 362 static int erofs_fscache_init_domain(struct super_block *sb) 363 { 364 int err; 365 struct erofs_domain *domain; 366 struct erofs_sb_info *sbi = EROFS_SB(sb); 367 368 domain = kzalloc(sizeof(struct erofs_domain), GFP_KERNEL); 369 if (!domain) 370 return -ENOMEM; 371 372 domain->domain_id = kstrdup(sbi->domain_id, GFP_KERNEL); 373 if (!domain->domain_id) { 374 kfree(domain); 375 return -ENOMEM; 376 } 377 378 err = erofs_fscache_register_volume(sb); 379 if (err) 380 goto out; 381 382 if (!erofs_pseudo_mnt) { 383 erofs_pseudo_mnt = kern_mount(&erofs_fs_type); 384 if (IS_ERR(erofs_pseudo_mnt)) { 385 err = PTR_ERR(erofs_pseudo_mnt); 386 goto out; 387 } 388 } 389 390 domain->volume = sbi->volume; 391 refcount_set(&domain->ref, 1); 392 list_add(&domain->list, &erofs_domain_list); 393 sbi->domain = domain; 394 return 0; 395 out: 396 kfree(domain->domain_id); 397 kfree(domain); 398 return err; 399 } 400 401 static int erofs_fscache_register_domain(struct super_block *sb) 402 { 403 int err; 404 struct erofs_domain *domain; 405 struct erofs_sb_info *sbi = EROFS_SB(sb); 406 407 mutex_lock(&erofs_domain_list_lock); 408 list_for_each_entry(domain, &erofs_domain_list, list) { 409 if (!strcmp(domain->domain_id, sbi->domain_id)) { 410 sbi->domain = domain; 411 sbi->volume = domain->volume; 412 refcount_inc(&domain->ref); 413 mutex_unlock(&erofs_domain_list_lock); 414 return 0; 415 } 416 } 417 err = erofs_fscache_init_domain(sb); 418 mutex_unlock(&erofs_domain_list_lock); 419 return err; 420 } 421 422 static 423 struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, 424 char *name, 425 unsigned int flags) 426 { 427 struct fscache_volume *volume = EROFS_SB(sb)->volume; 428 struct erofs_fscache *ctx; 429 struct fscache_cookie *cookie; 430 int ret; 431 432 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 433 if (!ctx) 434 return ERR_PTR(-ENOMEM); 435 436 cookie = fscache_acquire_cookie(volume, FSCACHE_ADV_WANT_CACHE_SIZE, 437 name, strlen(name), NULL, 0, 0); 438 if (!cookie) { 439 erofs_err(sb, "failed to get cookie for %s", name); 440 ret = -EINVAL; 441 goto err; 442 } 443 444 fscache_use_cookie(cookie, false); 445 ctx->cookie = cookie; 446 447 if (flags & EROFS_REG_COOKIE_NEED_INODE) { 448 struct inode *const inode = new_inode(sb); 449 450 if (!inode) { 451 erofs_err(sb, "failed to get anon inode for %s", name); 452 ret = -ENOMEM; 453 goto err_cookie; 454 } 455 456 set_nlink(inode, 1); 457 inode->i_size = OFFSET_MAX; 458 inode->i_mapping->a_ops = &erofs_fscache_meta_aops; 459 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 460 461 ctx->inode = inode; 462 } 463 464 return ctx; 465 466 err_cookie: 467 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 468 fscache_relinquish_cookie(ctx->cookie, false); 469 err: 470 kfree(ctx); 471 return ERR_PTR(ret); 472 } 473 474 static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) 475 { 476 fscache_unuse_cookie(ctx->cookie, NULL, NULL); 477 fscache_relinquish_cookie(ctx->cookie, false); 478 iput(ctx->inode); 479 kfree(ctx->name); 480 kfree(ctx); 481 } 482 483 static 484 struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, 485 char *name, 486 unsigned int flags) 487 { 488 int err; 489 struct inode *inode; 490 struct erofs_fscache *ctx; 491 struct erofs_domain *domain = EROFS_SB(sb)->domain; 492 493 ctx = erofs_fscache_acquire_cookie(sb, name, flags); 494 if (IS_ERR(ctx)) 495 return ctx; 496 497 ctx->name = kstrdup(name, GFP_KERNEL); 498 if (!ctx->name) { 499 err = -ENOMEM; 500 goto out; 501 } 502 503 inode = new_inode(erofs_pseudo_mnt->mnt_sb); 504 if (!inode) { 505 err = -ENOMEM; 506 goto out; 507 } 508 509 ctx->domain = domain; 510 ctx->anon_inode = inode; 511 inode->i_private = ctx; 512 refcount_inc(&domain->ref); 513 return ctx; 514 out: 515 erofs_fscache_relinquish_cookie(ctx); 516 return ERR_PTR(err); 517 } 518 519 static 520 struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, 521 char *name, 522 unsigned int flags) 523 { 524 struct inode *inode; 525 struct erofs_fscache *ctx; 526 struct erofs_domain *domain = EROFS_SB(sb)->domain; 527 struct super_block *psb = erofs_pseudo_mnt->mnt_sb; 528 529 mutex_lock(&erofs_domain_cookies_lock); 530 spin_lock(&psb->s_inode_list_lock); 531 list_for_each_entry(inode, &psb->s_inodes, i_sb_list) { 532 ctx = inode->i_private; 533 if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) 534 continue; 535 if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { 536 igrab(inode); 537 } else { 538 erofs_err(sb, "%s already exists in domain %s", name, 539 domain->domain_id); 540 ctx = ERR_PTR(-EEXIST); 541 } 542 spin_unlock(&psb->s_inode_list_lock); 543 mutex_unlock(&erofs_domain_cookies_lock); 544 return ctx; 545 } 546 spin_unlock(&psb->s_inode_list_lock); 547 ctx = erofs_fscache_domain_init_cookie(sb, name, flags); 548 mutex_unlock(&erofs_domain_cookies_lock); 549 return ctx; 550 } 551 552 struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, 553 char *name, 554 unsigned int flags) 555 { 556 if (EROFS_SB(sb)->domain_id) 557 return erofs_domain_register_cookie(sb, name, flags); 558 return erofs_fscache_acquire_cookie(sb, name, flags); 559 } 560 561 void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) 562 { 563 bool drop; 564 struct erofs_domain *domain; 565 566 if (!ctx) 567 return; 568 domain = ctx->domain; 569 if (domain) { 570 mutex_lock(&erofs_domain_cookies_lock); 571 drop = atomic_read(&ctx->anon_inode->i_count) == 1; 572 iput(ctx->anon_inode); 573 mutex_unlock(&erofs_domain_cookies_lock); 574 if (!drop) 575 return; 576 } 577 578 erofs_fscache_relinquish_cookie(ctx); 579 erofs_fscache_domain_put(domain); 580 } 581 582 int erofs_fscache_register_fs(struct super_block *sb) 583 { 584 int ret; 585 struct erofs_sb_info *sbi = EROFS_SB(sb); 586 struct erofs_fscache *fscache; 587 unsigned int flags; 588 589 if (sbi->domain_id) 590 ret = erofs_fscache_register_domain(sb); 591 else 592 ret = erofs_fscache_register_volume(sb); 593 if (ret) 594 return ret; 595 596 /* 597 * When shared domain is enabled, using NEED_NOEXIST to guarantee 598 * the primary data blob (aka fsid) is unique in the shared domain. 599 * 600 * For non-shared-domain case, fscache_acquire_volume() invoked by 601 * erofs_fscache_register_volume() has already guaranteed 602 * the uniqueness of primary data blob. 603 * 604 * Acquired domain/volume will be relinquished in kill_sb() on error. 605 */ 606 flags = EROFS_REG_COOKIE_NEED_INODE; 607 if (sbi->domain_id) 608 flags |= EROFS_REG_COOKIE_NEED_NOEXIST; 609 fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); 610 if (IS_ERR(fscache)) 611 return PTR_ERR(fscache); 612 613 sbi->s_fscache = fscache; 614 return 0; 615 } 616 617 void erofs_fscache_unregister_fs(struct super_block *sb) 618 { 619 struct erofs_sb_info *sbi = EROFS_SB(sb); 620 621 erofs_fscache_unregister_cookie(sbi->s_fscache); 622 623 if (sbi->domain) 624 erofs_fscache_domain_put(sbi->domain); 625 else 626 fscache_relinquish_volume(sbi->volume, NULL, false); 627 628 sbi->s_fscache = NULL; 629 sbi->volume = NULL; 630 sbi->domain = NULL; 631 } 632