1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include <linux/module.h> 8 #include <linux/statfs.h> 9 #include <linux/parser.h> 10 #include <linux/seq_file.h> 11 #include <linux/crc32c.h> 12 #include <linux/fs_context.h> 13 #include <linux/fs_parser.h> 14 #include <linux/dax.h> 15 #include <linux/exportfs.h> 16 #include "xattr.h" 17 18 #define CREATE_TRACE_POINTS 19 #include <trace/events/erofs.h> 20 21 static struct kmem_cache *erofs_inode_cachep __read_mostly; 22 23 void _erofs_err(struct super_block *sb, const char *function, 24 const char *fmt, ...) 25 { 26 struct va_format vaf; 27 va_list args; 28 29 va_start(args, fmt); 30 31 vaf.fmt = fmt; 32 vaf.va = &args; 33 34 pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); 35 va_end(args); 36 } 37 38 void _erofs_info(struct super_block *sb, const char *function, 39 const char *fmt, ...) 40 { 41 struct va_format vaf; 42 va_list args; 43 44 va_start(args, fmt); 45 46 vaf.fmt = fmt; 47 vaf.va = &args; 48 49 pr_info("(device %s): %pV", sb->s_id, &vaf); 50 va_end(args); 51 } 52 53 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 54 { 55 struct erofs_super_block *dsb; 56 u32 expected_crc, crc; 57 58 dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, 59 EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); 60 if (!dsb) 61 return -ENOMEM; 62 63 expected_crc = le32_to_cpu(dsb->checksum); 64 dsb->checksum = 0; 65 /* to allow for x86 boot sectors and other oddities. */ 66 crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); 67 kfree(dsb); 68 69 if (crc != expected_crc) { 70 erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 71 crc, expected_crc); 72 return -EBADMSG; 73 } 74 return 0; 75 } 76 77 static void erofs_inode_init_once(void *ptr) 78 { 79 struct erofs_inode *vi = ptr; 80 81 inode_init_once(&vi->vfs_inode); 82 } 83 84 static struct inode *erofs_alloc_inode(struct super_block *sb) 85 { 86 struct erofs_inode *vi = 87 alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); 88 89 if (!vi) 90 return NULL; 91 92 /* zero out everything except vfs_inode */ 93 memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); 94 return &vi->vfs_inode; 95 } 96 97 static void erofs_free_inode(struct inode *inode) 98 { 99 struct erofs_inode *vi = EROFS_I(inode); 100 101 /* be careful of RCU symlink path */ 102 if (inode->i_op == &erofs_fast_symlink_iops) 103 kfree(inode->i_link); 104 kfree(vi->xattr_shared_xattrs); 105 106 kmem_cache_free(erofs_inode_cachep, vi); 107 } 108 109 static bool check_layout_compatibility(struct super_block *sb, 110 struct erofs_super_block *dsb) 111 { 112 const unsigned int feature = le32_to_cpu(dsb->feature_incompat); 113 114 EROFS_SB(sb)->feature_incompat = feature; 115 116 /* check if current kernel meets all mandatory requirements */ 117 if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { 118 erofs_err(sb, 119 "unidentified incompatible feature %x, please upgrade kernel version", 120 feature & ~EROFS_ALL_FEATURE_INCOMPAT); 121 return false; 122 } 123 return true; 124 } 125 126 #ifdef CONFIG_EROFS_FS_ZIP 127 /* read variable-sized metadata, offset will be aligned by 4-byte */ 128 static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, 129 erofs_off_t *offset, int *lengthp) 130 { 131 u8 *buffer, *ptr; 132 int len, i, cnt; 133 134 *offset = round_up(*offset, 4); 135 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP); 136 if (IS_ERR(ptr)) 137 return ptr; 138 139 len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); 140 if (!len) 141 len = U16_MAX + 1; 142 buffer = kmalloc(len, GFP_KERNEL); 143 if (!buffer) 144 return ERR_PTR(-ENOMEM); 145 *offset += sizeof(__le16); 146 *lengthp = len; 147 148 for (i = 0; i < len; i += cnt) { 149 cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); 150 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), 151 EROFS_KMAP); 152 if (IS_ERR(ptr)) { 153 kfree(buffer); 154 return ptr; 155 } 156 memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); 157 *offset += cnt; 158 } 159 return buffer; 160 } 161 162 static int erofs_load_compr_cfgs(struct super_block *sb, 163 struct erofs_super_block *dsb) 164 { 165 struct erofs_sb_info *sbi = EROFS_SB(sb); 166 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 167 unsigned int algs, alg; 168 erofs_off_t offset; 169 int size, ret = 0; 170 171 sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); 172 if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { 173 erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", 174 sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); 175 return -EINVAL; 176 } 177 178 offset = EROFS_SUPER_OFFSET + sbi->sb_size; 179 alg = 0; 180 for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { 181 void *data; 182 183 if (!(algs & 1)) 184 continue; 185 186 data = erofs_read_metadata(sb, &buf, &offset, &size); 187 if (IS_ERR(data)) { 188 ret = PTR_ERR(data); 189 break; 190 } 191 192 switch (alg) { 193 case Z_EROFS_COMPRESSION_LZ4: 194 ret = z_erofs_load_lz4_config(sb, dsb, data, size); 195 break; 196 case Z_EROFS_COMPRESSION_LZMA: 197 ret = z_erofs_load_lzma_config(sb, dsb, data, size); 198 break; 199 default: 200 DBG_BUGON(1); 201 ret = -EFAULT; 202 } 203 kfree(data); 204 if (ret) 205 break; 206 } 207 erofs_put_metabuf(&buf); 208 return ret; 209 } 210 #else 211 static int erofs_load_compr_cfgs(struct super_block *sb, 212 struct erofs_super_block *dsb) 213 { 214 if (dsb->u1.available_compr_algs) { 215 erofs_err(sb, "try to load compressed fs when compression is disabled"); 216 return -EINVAL; 217 } 218 return 0; 219 } 220 #endif 221 222 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, 223 struct erofs_device_info *dif, erofs_off_t *pos) 224 { 225 struct erofs_sb_info *sbi = EROFS_SB(sb); 226 struct erofs_fscache *fscache; 227 struct erofs_deviceslot *dis; 228 struct block_device *bdev; 229 void *ptr; 230 231 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); 232 if (IS_ERR(ptr)) 233 return PTR_ERR(ptr); 234 dis = ptr + erofs_blkoff(*pos); 235 236 if (!dif->path) { 237 if (!dis->tag[0]) { 238 erofs_err(sb, "empty device tag @ pos %llu", *pos); 239 return -EINVAL; 240 } 241 dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); 242 if (!dif->path) 243 return -ENOMEM; 244 } 245 246 if (erofs_is_fscache_mode(sb)) { 247 fscache = erofs_fscache_register_cookie(sb, dif->path, 0); 248 if (IS_ERR(fscache)) 249 return PTR_ERR(fscache); 250 dif->fscache = fscache; 251 } else { 252 bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, 253 sb->s_type); 254 if (IS_ERR(bdev)) 255 return PTR_ERR(bdev); 256 dif->bdev = bdev; 257 dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off, 258 NULL, NULL); 259 } 260 261 dif->blocks = le32_to_cpu(dis->blocks); 262 dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); 263 sbi->total_blocks += dif->blocks; 264 *pos += EROFS_DEVT_SLOT_SIZE; 265 return 0; 266 } 267 268 static int erofs_scan_devices(struct super_block *sb, 269 struct erofs_super_block *dsb) 270 { 271 struct erofs_sb_info *sbi = EROFS_SB(sb); 272 unsigned int ondisk_extradevs; 273 erofs_off_t pos; 274 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 275 struct erofs_device_info *dif; 276 int id, err = 0; 277 278 sbi->total_blocks = sbi->primarydevice_blocks; 279 if (!erofs_sb_has_device_table(sbi)) 280 ondisk_extradevs = 0; 281 else 282 ondisk_extradevs = le16_to_cpu(dsb->extra_devices); 283 284 if (sbi->devs->extra_devices && 285 ondisk_extradevs != sbi->devs->extra_devices) { 286 erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", 287 ondisk_extradevs, sbi->devs->extra_devices); 288 return -EINVAL; 289 } 290 if (!ondisk_extradevs) 291 return 0; 292 293 sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; 294 pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; 295 down_read(&sbi->devs->rwsem); 296 if (sbi->devs->extra_devices) { 297 idr_for_each_entry(&sbi->devs->tree, dif, id) { 298 err = erofs_init_device(&buf, sb, dif, &pos); 299 if (err) 300 break; 301 } 302 } else { 303 for (id = 0; id < ondisk_extradevs; id++) { 304 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 305 if (!dif) { 306 err = -ENOMEM; 307 break; 308 } 309 310 err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 311 if (err < 0) { 312 kfree(dif); 313 break; 314 } 315 ++sbi->devs->extra_devices; 316 317 err = erofs_init_device(&buf, sb, dif, &pos); 318 if (err) 319 break; 320 } 321 } 322 up_read(&sbi->devs->rwsem); 323 erofs_put_metabuf(&buf); 324 return err; 325 } 326 327 static int erofs_read_superblock(struct super_block *sb) 328 { 329 struct erofs_sb_info *sbi; 330 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 331 struct erofs_super_block *dsb; 332 unsigned int blkszbits; 333 void *data; 334 int ret; 335 336 data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); 337 if (IS_ERR(data)) { 338 erofs_err(sb, "cannot read erofs superblock"); 339 return PTR_ERR(data); 340 } 341 342 sbi = EROFS_SB(sb); 343 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 344 345 ret = -EINVAL; 346 if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { 347 erofs_err(sb, "cannot find valid erofs superblock"); 348 goto out; 349 } 350 351 sbi->feature_compat = le32_to_cpu(dsb->feature_compat); 352 if (erofs_sb_has_sb_chksum(sbi)) { 353 ret = erofs_superblock_csum_verify(sb, data); 354 if (ret) 355 goto out; 356 } 357 358 ret = -EINVAL; 359 blkszbits = dsb->blkszbits; 360 /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ 361 if (blkszbits != LOG_BLOCK_SIZE) { 362 erofs_err(sb, "blkszbits %u isn't supported on this platform", 363 blkszbits); 364 goto out; 365 } 366 367 if (!check_layout_compatibility(sb, dsb)) 368 goto out; 369 370 sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; 371 if (sbi->sb_size > EROFS_BLKSIZ) { 372 erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", 373 sbi->sb_size); 374 goto out; 375 } 376 sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); 377 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 378 #ifdef CONFIG_EROFS_FS_XATTR 379 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); 380 #endif 381 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 382 sbi->root_nid = le16_to_cpu(dsb->root_nid); 383 #ifdef CONFIG_EROFS_FS_ZIP 384 sbi->packed_inode = NULL; 385 if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { 386 sbi->packed_inode = 387 erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); 388 if (IS_ERR(sbi->packed_inode)) { 389 ret = PTR_ERR(sbi->packed_inode); 390 goto out; 391 } 392 } 393 #endif 394 sbi->inos = le64_to_cpu(dsb->inos); 395 396 sbi->build_time = le64_to_cpu(dsb->build_time); 397 sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); 398 399 memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); 400 401 ret = strscpy(sbi->volume_name, dsb->volume_name, 402 sizeof(dsb->volume_name)); 403 if (ret < 0) { /* -E2BIG */ 404 erofs_err(sb, "bad volume name without NIL terminator"); 405 ret = -EFSCORRUPTED; 406 goto out; 407 } 408 409 /* parse on-disk compression configurations */ 410 if (erofs_sb_has_compr_cfgs(sbi)) 411 ret = erofs_load_compr_cfgs(sb, dsb); 412 else 413 ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); 414 if (ret < 0) 415 goto out; 416 417 /* handle multiple devices */ 418 ret = erofs_scan_devices(sb, dsb); 419 420 if (erofs_sb_has_ztailpacking(sbi)) 421 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 422 if (erofs_is_fscache_mode(sb)) 423 erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); 424 if (erofs_sb_has_fragments(sbi)) 425 erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); 426 if (erofs_sb_has_dedupe(sbi)) 427 erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); 428 out: 429 erofs_put_metabuf(&buf); 430 return ret; 431 } 432 433 /* set up default EROFS parameters */ 434 static void erofs_default_options(struct erofs_fs_context *ctx) 435 { 436 #ifdef CONFIG_EROFS_FS_ZIP 437 ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; 438 ctx->opt.max_sync_decompress_pages = 3; 439 ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; 440 #endif 441 #ifdef CONFIG_EROFS_FS_XATTR 442 set_opt(&ctx->opt, XATTR_USER); 443 #endif 444 #ifdef CONFIG_EROFS_FS_POSIX_ACL 445 set_opt(&ctx->opt, POSIX_ACL); 446 #endif 447 } 448 449 enum { 450 Opt_user_xattr, 451 Opt_acl, 452 Opt_cache_strategy, 453 Opt_dax, 454 Opt_dax_enum, 455 Opt_device, 456 Opt_fsid, 457 Opt_domain_id, 458 Opt_err 459 }; 460 461 static const struct constant_table erofs_param_cache_strategy[] = { 462 {"disabled", EROFS_ZIP_CACHE_DISABLED}, 463 {"readahead", EROFS_ZIP_CACHE_READAHEAD}, 464 {"readaround", EROFS_ZIP_CACHE_READAROUND}, 465 {} 466 }; 467 468 static const struct constant_table erofs_dax_param_enums[] = { 469 {"always", EROFS_MOUNT_DAX_ALWAYS}, 470 {"never", EROFS_MOUNT_DAX_NEVER}, 471 {} 472 }; 473 474 static const struct fs_parameter_spec erofs_fs_parameters[] = { 475 fsparam_flag_no("user_xattr", Opt_user_xattr), 476 fsparam_flag_no("acl", Opt_acl), 477 fsparam_enum("cache_strategy", Opt_cache_strategy, 478 erofs_param_cache_strategy), 479 fsparam_flag("dax", Opt_dax), 480 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 481 fsparam_string("device", Opt_device), 482 fsparam_string("fsid", Opt_fsid), 483 fsparam_string("domain_id", Opt_domain_id), 484 {} 485 }; 486 487 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) 488 { 489 #ifdef CONFIG_FS_DAX 490 struct erofs_fs_context *ctx = fc->fs_private; 491 492 switch (mode) { 493 case EROFS_MOUNT_DAX_ALWAYS: 494 warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 495 set_opt(&ctx->opt, DAX_ALWAYS); 496 clear_opt(&ctx->opt, DAX_NEVER); 497 return true; 498 case EROFS_MOUNT_DAX_NEVER: 499 set_opt(&ctx->opt, DAX_NEVER); 500 clear_opt(&ctx->opt, DAX_ALWAYS); 501 return true; 502 default: 503 DBG_BUGON(1); 504 return false; 505 } 506 #else 507 errorfc(fc, "dax options not supported"); 508 return false; 509 #endif 510 } 511 512 static int erofs_fc_parse_param(struct fs_context *fc, 513 struct fs_parameter *param) 514 { 515 struct erofs_fs_context *ctx = fc->fs_private; 516 struct fs_parse_result result; 517 struct erofs_device_info *dif; 518 int opt, ret; 519 520 opt = fs_parse(fc, erofs_fs_parameters, param, &result); 521 if (opt < 0) 522 return opt; 523 524 switch (opt) { 525 case Opt_user_xattr: 526 #ifdef CONFIG_EROFS_FS_XATTR 527 if (result.boolean) 528 set_opt(&ctx->opt, XATTR_USER); 529 else 530 clear_opt(&ctx->opt, XATTR_USER); 531 #else 532 errorfc(fc, "{,no}user_xattr options not supported"); 533 #endif 534 break; 535 case Opt_acl: 536 #ifdef CONFIG_EROFS_FS_POSIX_ACL 537 if (result.boolean) 538 set_opt(&ctx->opt, POSIX_ACL); 539 else 540 clear_opt(&ctx->opt, POSIX_ACL); 541 #else 542 errorfc(fc, "{,no}acl options not supported"); 543 #endif 544 break; 545 case Opt_cache_strategy: 546 #ifdef CONFIG_EROFS_FS_ZIP 547 ctx->opt.cache_strategy = result.uint_32; 548 #else 549 errorfc(fc, "compression not supported, cache_strategy ignored"); 550 #endif 551 break; 552 case Opt_dax: 553 if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) 554 return -EINVAL; 555 break; 556 case Opt_dax_enum: 557 if (!erofs_fc_set_dax_mode(fc, result.uint_32)) 558 return -EINVAL; 559 break; 560 case Opt_device: 561 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 562 if (!dif) 563 return -ENOMEM; 564 dif->path = kstrdup(param->string, GFP_KERNEL); 565 if (!dif->path) { 566 kfree(dif); 567 return -ENOMEM; 568 } 569 down_write(&ctx->devs->rwsem); 570 ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); 571 up_write(&ctx->devs->rwsem); 572 if (ret < 0) { 573 kfree(dif->path); 574 kfree(dif); 575 return ret; 576 } 577 ++ctx->devs->extra_devices; 578 break; 579 #ifdef CONFIG_EROFS_FS_ONDEMAND 580 case Opt_fsid: 581 kfree(ctx->fsid); 582 ctx->fsid = kstrdup(param->string, GFP_KERNEL); 583 if (!ctx->fsid) 584 return -ENOMEM; 585 break; 586 case Opt_domain_id: 587 kfree(ctx->domain_id); 588 ctx->domain_id = kstrdup(param->string, GFP_KERNEL); 589 if (!ctx->domain_id) 590 return -ENOMEM; 591 break; 592 #else 593 case Opt_fsid: 594 case Opt_domain_id: 595 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 596 break; 597 #endif 598 default: 599 return -ENOPARAM; 600 } 601 return 0; 602 } 603 604 #ifdef CONFIG_EROFS_FS_ZIP 605 static const struct address_space_operations managed_cache_aops; 606 607 static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp) 608 { 609 bool ret = true; 610 struct address_space *const mapping = folio->mapping; 611 612 DBG_BUGON(!folio_test_locked(folio)); 613 DBG_BUGON(mapping->a_ops != &managed_cache_aops); 614 615 if (folio_test_private(folio)) 616 ret = erofs_try_to_free_cached_page(&folio->page); 617 618 return ret; 619 } 620 621 /* 622 * It will be called only on inode eviction. In case that there are still some 623 * decompression requests in progress, wait with rescheduling for a bit here. 624 * We could introduce an extra locking instead but it seems unnecessary. 625 */ 626 static void erofs_managed_cache_invalidate_folio(struct folio *folio, 627 size_t offset, size_t length) 628 { 629 const size_t stop = length + offset; 630 631 DBG_BUGON(!folio_test_locked(folio)); 632 633 /* Check for potential overflow in debug mode */ 634 DBG_BUGON(stop > folio_size(folio) || stop < length); 635 636 if (offset == 0 && stop == folio_size(folio)) 637 while (!erofs_managed_cache_release_folio(folio, GFP_NOFS)) 638 cond_resched(); 639 } 640 641 static const struct address_space_operations managed_cache_aops = { 642 .release_folio = erofs_managed_cache_release_folio, 643 .invalidate_folio = erofs_managed_cache_invalidate_folio, 644 }; 645 646 static int erofs_init_managed_cache(struct super_block *sb) 647 { 648 struct erofs_sb_info *const sbi = EROFS_SB(sb); 649 struct inode *const inode = new_inode(sb); 650 651 if (!inode) 652 return -ENOMEM; 653 654 set_nlink(inode, 1); 655 inode->i_size = OFFSET_MAX; 656 657 inode->i_mapping->a_ops = &managed_cache_aops; 658 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 659 sbi->managed_cache = inode; 660 return 0; 661 } 662 #else 663 static int erofs_init_managed_cache(struct super_block *sb) { return 0; } 664 #endif 665 666 static struct inode *erofs_nfs_get_inode(struct super_block *sb, 667 u64 ino, u32 generation) 668 { 669 return erofs_iget(sb, ino); 670 } 671 672 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, 673 struct fid *fid, int fh_len, int fh_type) 674 { 675 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 676 erofs_nfs_get_inode); 677 } 678 679 static struct dentry *erofs_fh_to_parent(struct super_block *sb, 680 struct fid *fid, int fh_len, int fh_type) 681 { 682 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 683 erofs_nfs_get_inode); 684 } 685 686 static struct dentry *erofs_get_parent(struct dentry *child) 687 { 688 erofs_nid_t nid; 689 unsigned int d_type; 690 int err; 691 692 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 693 if (err) 694 return ERR_PTR(err); 695 return d_obtain_alias(erofs_iget(child->d_sb, nid)); 696 } 697 698 static const struct export_operations erofs_export_ops = { 699 .fh_to_dentry = erofs_fh_to_dentry, 700 .fh_to_parent = erofs_fh_to_parent, 701 .get_parent = erofs_get_parent, 702 }; 703 704 static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) 705 { 706 static const struct tree_descr empty_descr = {""}; 707 708 return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); 709 } 710 711 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 712 { 713 struct inode *inode; 714 struct erofs_sb_info *sbi; 715 struct erofs_fs_context *ctx = fc->fs_private; 716 int err; 717 718 sb->s_magic = EROFS_SUPER_MAGIC; 719 sb->s_flags |= SB_RDONLY | SB_NOATIME; 720 sb->s_maxbytes = MAX_LFS_FILESIZE; 721 sb->s_op = &erofs_sops; 722 723 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 724 if (!sbi) 725 return -ENOMEM; 726 727 sb->s_fs_info = sbi; 728 sbi->opt = ctx->opt; 729 sbi->devs = ctx->devs; 730 ctx->devs = NULL; 731 sbi->fsid = ctx->fsid; 732 ctx->fsid = NULL; 733 sbi->domain_id = ctx->domain_id; 734 ctx->domain_id = NULL; 735 736 if (erofs_is_fscache_mode(sb)) { 737 sb->s_blocksize = EROFS_BLKSIZ; 738 sb->s_blocksize_bits = LOG_BLOCK_SIZE; 739 740 err = erofs_fscache_register_fs(sb); 741 if (err) 742 return err; 743 744 err = super_setup_bdi(sb); 745 if (err) 746 return err; 747 } else { 748 if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { 749 erofs_err(sb, "failed to set erofs blksize"); 750 return -EINVAL; 751 } 752 753 sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 754 &sbi->dax_part_off, 755 NULL, NULL); 756 } 757 758 err = erofs_read_superblock(sb); 759 if (err) 760 return err; 761 762 if (test_opt(&sbi->opt, DAX_ALWAYS)) { 763 BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); 764 765 if (!sbi->dax_dev) { 766 errorfc(fc, "DAX unsupported by block device. Turning off DAX."); 767 clear_opt(&sbi->opt, DAX_ALWAYS); 768 } 769 } 770 771 sb->s_time_gran = 1; 772 sb->s_xattr = erofs_xattr_handlers; 773 sb->s_export_op = &erofs_export_ops; 774 775 if (test_opt(&sbi->opt, POSIX_ACL)) 776 sb->s_flags |= SB_POSIXACL; 777 else 778 sb->s_flags &= ~SB_POSIXACL; 779 780 #ifdef CONFIG_EROFS_FS_ZIP 781 xa_init(&sbi->managed_pslots); 782 #endif 783 784 /* get the root inode */ 785 inode = erofs_iget(sb, ROOT_NID(sbi)); 786 if (IS_ERR(inode)) 787 return PTR_ERR(inode); 788 789 if (!S_ISDIR(inode->i_mode)) { 790 erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", 791 ROOT_NID(sbi), inode->i_mode); 792 iput(inode); 793 return -EINVAL; 794 } 795 796 sb->s_root = d_make_root(inode); 797 if (!sb->s_root) 798 return -ENOMEM; 799 800 erofs_shrinker_register(sb); 801 /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ 802 err = erofs_init_managed_cache(sb); 803 if (err) 804 return err; 805 806 err = erofs_register_sysfs(sb); 807 if (err) 808 return err; 809 810 erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); 811 return 0; 812 } 813 814 static int erofs_fc_anon_get_tree(struct fs_context *fc) 815 { 816 return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); 817 } 818 819 static int erofs_fc_get_tree(struct fs_context *fc) 820 { 821 struct erofs_fs_context *ctx = fc->fs_private; 822 823 if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) 824 return get_tree_nodev(fc, erofs_fc_fill_super); 825 826 return get_tree_bdev(fc, erofs_fc_fill_super); 827 } 828 829 static int erofs_fc_reconfigure(struct fs_context *fc) 830 { 831 struct super_block *sb = fc->root->d_sb; 832 struct erofs_sb_info *sbi = EROFS_SB(sb); 833 struct erofs_fs_context *ctx = fc->fs_private; 834 835 DBG_BUGON(!sb_rdonly(sb)); 836 837 if (ctx->fsid || ctx->domain_id) 838 erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); 839 840 if (test_opt(&ctx->opt, POSIX_ACL)) 841 fc->sb_flags |= SB_POSIXACL; 842 else 843 fc->sb_flags &= ~SB_POSIXACL; 844 845 sbi->opt = ctx->opt; 846 847 fc->sb_flags |= SB_RDONLY; 848 return 0; 849 } 850 851 static int erofs_release_device_info(int id, void *ptr, void *data) 852 { 853 struct erofs_device_info *dif = ptr; 854 855 fs_put_dax(dif->dax_dev, NULL); 856 if (dif->bdev) 857 blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); 858 erofs_fscache_unregister_cookie(dif->fscache); 859 dif->fscache = NULL; 860 kfree(dif->path); 861 kfree(dif); 862 return 0; 863 } 864 865 static void erofs_free_dev_context(struct erofs_dev_context *devs) 866 { 867 if (!devs) 868 return; 869 idr_for_each(&devs->tree, &erofs_release_device_info, NULL); 870 idr_destroy(&devs->tree); 871 kfree(devs); 872 } 873 874 static void erofs_fc_free(struct fs_context *fc) 875 { 876 struct erofs_fs_context *ctx = fc->fs_private; 877 878 erofs_free_dev_context(ctx->devs); 879 kfree(ctx->fsid); 880 kfree(ctx->domain_id); 881 kfree(ctx); 882 } 883 884 static const struct fs_context_operations erofs_context_ops = { 885 .parse_param = erofs_fc_parse_param, 886 .get_tree = erofs_fc_get_tree, 887 .reconfigure = erofs_fc_reconfigure, 888 .free = erofs_fc_free, 889 }; 890 891 static const struct fs_context_operations erofs_anon_context_ops = { 892 .get_tree = erofs_fc_anon_get_tree, 893 }; 894 895 static int erofs_init_fs_context(struct fs_context *fc) 896 { 897 struct erofs_fs_context *ctx; 898 899 /* pseudo mount for anon inodes */ 900 if (fc->sb_flags & SB_KERNMOUNT) { 901 fc->ops = &erofs_anon_context_ops; 902 return 0; 903 } 904 905 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 906 if (!ctx) 907 return -ENOMEM; 908 ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); 909 if (!ctx->devs) { 910 kfree(ctx); 911 return -ENOMEM; 912 } 913 fc->fs_private = ctx; 914 915 idr_init(&ctx->devs->tree); 916 init_rwsem(&ctx->devs->rwsem); 917 erofs_default_options(ctx); 918 fc->ops = &erofs_context_ops; 919 return 0; 920 } 921 922 /* 923 * could be triggered after deactivate_locked_super() 924 * is called, thus including umount and failed to initialize. 925 */ 926 static void erofs_kill_sb(struct super_block *sb) 927 { 928 struct erofs_sb_info *sbi; 929 930 WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); 931 932 /* pseudo mount for anon inodes */ 933 if (sb->s_flags & SB_KERNMOUNT) { 934 kill_anon_super(sb); 935 return; 936 } 937 938 if (erofs_is_fscache_mode(sb)) 939 kill_anon_super(sb); 940 else 941 kill_block_super(sb); 942 943 sbi = EROFS_SB(sb); 944 if (!sbi) 945 return; 946 947 erofs_free_dev_context(sbi->devs); 948 fs_put_dax(sbi->dax_dev, NULL); 949 erofs_fscache_unregister_fs(sb); 950 kfree(sbi->fsid); 951 kfree(sbi->domain_id); 952 kfree(sbi); 953 sb->s_fs_info = NULL; 954 } 955 956 /* called when ->s_root is non-NULL */ 957 static void erofs_put_super(struct super_block *sb) 958 { 959 struct erofs_sb_info *const sbi = EROFS_SB(sb); 960 961 DBG_BUGON(!sbi); 962 963 erofs_unregister_sysfs(sb); 964 erofs_shrinker_unregister(sb); 965 #ifdef CONFIG_EROFS_FS_ZIP 966 iput(sbi->managed_cache); 967 sbi->managed_cache = NULL; 968 iput(sbi->packed_inode); 969 sbi->packed_inode = NULL; 970 #endif 971 erofs_free_dev_context(sbi->devs); 972 sbi->devs = NULL; 973 erofs_fscache_unregister_fs(sb); 974 } 975 976 struct file_system_type erofs_fs_type = { 977 .owner = THIS_MODULE, 978 .name = "erofs", 979 .init_fs_context = erofs_init_fs_context, 980 .kill_sb = erofs_kill_sb, 981 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 982 }; 983 MODULE_ALIAS_FS("erofs"); 984 985 static int __init erofs_module_init(void) 986 { 987 int err; 988 989 erofs_check_ondisk_layout_definitions(); 990 991 erofs_inode_cachep = kmem_cache_create("erofs_inode", 992 sizeof(struct erofs_inode), 0, 993 SLAB_RECLAIM_ACCOUNT, 994 erofs_inode_init_once); 995 if (!erofs_inode_cachep) { 996 err = -ENOMEM; 997 goto icache_err; 998 } 999 1000 err = erofs_init_shrinker(); 1001 if (err) 1002 goto shrinker_err; 1003 1004 err = z_erofs_lzma_init(); 1005 if (err) 1006 goto lzma_err; 1007 1008 erofs_pcpubuf_init(); 1009 err = z_erofs_init_zip_subsystem(); 1010 if (err) 1011 goto zip_err; 1012 1013 err = erofs_init_sysfs(); 1014 if (err) 1015 goto sysfs_err; 1016 1017 err = register_filesystem(&erofs_fs_type); 1018 if (err) 1019 goto fs_err; 1020 1021 return 0; 1022 1023 fs_err: 1024 erofs_exit_sysfs(); 1025 sysfs_err: 1026 z_erofs_exit_zip_subsystem(); 1027 zip_err: 1028 z_erofs_lzma_exit(); 1029 lzma_err: 1030 erofs_exit_shrinker(); 1031 shrinker_err: 1032 kmem_cache_destroy(erofs_inode_cachep); 1033 icache_err: 1034 return err; 1035 } 1036 1037 static void __exit erofs_module_exit(void) 1038 { 1039 unregister_filesystem(&erofs_fs_type); 1040 1041 /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 1042 rcu_barrier(); 1043 1044 erofs_exit_sysfs(); 1045 z_erofs_exit_zip_subsystem(); 1046 z_erofs_lzma_exit(); 1047 erofs_exit_shrinker(); 1048 kmem_cache_destroy(erofs_inode_cachep); 1049 erofs_pcpubuf_exit(); 1050 } 1051 1052 /* get filesystem statistics */ 1053 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) 1054 { 1055 struct super_block *sb = dentry->d_sb; 1056 struct erofs_sb_info *sbi = EROFS_SB(sb); 1057 u64 id = 0; 1058 1059 if (!erofs_is_fscache_mode(sb)) 1060 id = huge_encode_dev(sb->s_bdev->bd_dev); 1061 1062 buf->f_type = sb->s_magic; 1063 buf->f_bsize = EROFS_BLKSIZ; 1064 buf->f_blocks = sbi->total_blocks; 1065 buf->f_bfree = buf->f_bavail = 0; 1066 1067 buf->f_files = ULLONG_MAX; 1068 buf->f_ffree = ULLONG_MAX - sbi->inos; 1069 1070 buf->f_namelen = EROFS_NAME_LEN; 1071 1072 buf->f_fsid = u64_to_fsid(id); 1073 return 0; 1074 } 1075 1076 static int erofs_show_options(struct seq_file *seq, struct dentry *root) 1077 { 1078 struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); 1079 struct erofs_mount_opts *opt = &sbi->opt; 1080 1081 #ifdef CONFIG_EROFS_FS_XATTR 1082 if (test_opt(opt, XATTR_USER)) 1083 seq_puts(seq, ",user_xattr"); 1084 else 1085 seq_puts(seq, ",nouser_xattr"); 1086 #endif 1087 #ifdef CONFIG_EROFS_FS_POSIX_ACL 1088 if (test_opt(opt, POSIX_ACL)) 1089 seq_puts(seq, ",acl"); 1090 else 1091 seq_puts(seq, ",noacl"); 1092 #endif 1093 #ifdef CONFIG_EROFS_FS_ZIP 1094 if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED) 1095 seq_puts(seq, ",cache_strategy=disabled"); 1096 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) 1097 seq_puts(seq, ",cache_strategy=readahead"); 1098 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND) 1099 seq_puts(seq, ",cache_strategy=readaround"); 1100 #endif 1101 if (test_opt(opt, DAX_ALWAYS)) 1102 seq_puts(seq, ",dax=always"); 1103 if (test_opt(opt, DAX_NEVER)) 1104 seq_puts(seq, ",dax=never"); 1105 #ifdef CONFIG_EROFS_FS_ONDEMAND 1106 if (sbi->fsid) 1107 seq_printf(seq, ",fsid=%s", sbi->fsid); 1108 if (sbi->domain_id) 1109 seq_printf(seq, ",domain_id=%s", sbi->domain_id); 1110 #endif 1111 return 0; 1112 } 1113 1114 const struct super_operations erofs_sops = { 1115 .put_super = erofs_put_super, 1116 .alloc_inode = erofs_alloc_inode, 1117 .free_inode = erofs_free_inode, 1118 .statfs = erofs_statfs, 1119 .show_options = erofs_show_options, 1120 }; 1121 1122 module_init(erofs_module_init); 1123 module_exit(erofs_module_exit); 1124 1125 MODULE_DESCRIPTION("Enhanced ROM File System"); 1126 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); 1127 MODULE_LICENSE("GPL"); 1128