1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright (C) 2017-2018 HUAWEI, Inc. 4 * https://www.huawei.com/ 5 * Copyright (C) 2021, Alibaba Cloud 6 */ 7 #include <linux/module.h> 8 #include <linux/buffer_head.h> 9 #include <linux/statfs.h> 10 #include <linux/parser.h> 11 #include <linux/seq_file.h> 12 #include <linux/crc32c.h> 13 #include <linux/fs_context.h> 14 #include <linux/fs_parser.h> 15 #include <linux/dax.h> 16 #include <linux/exportfs.h> 17 #include "xattr.h" 18 19 #define CREATE_TRACE_POINTS 20 #include <trace/events/erofs.h> 21 22 static struct kmem_cache *erofs_inode_cachep __read_mostly; 23 24 void _erofs_err(struct super_block *sb, const char *function, 25 const char *fmt, ...) 26 { 27 struct va_format vaf; 28 va_list args; 29 30 va_start(args, fmt); 31 32 vaf.fmt = fmt; 33 vaf.va = &args; 34 35 pr_err("(device %s): %s: %pV", sb->s_id, function, &vaf); 36 va_end(args); 37 } 38 39 void _erofs_info(struct super_block *sb, const char *function, 40 const char *fmt, ...) 41 { 42 struct va_format vaf; 43 va_list args; 44 45 va_start(args, fmt); 46 47 vaf.fmt = fmt; 48 vaf.va = &args; 49 50 pr_info("(device %s): %pV", sb->s_id, &vaf); 51 va_end(args); 52 } 53 54 static int erofs_superblock_csum_verify(struct super_block *sb, void *sbdata) 55 { 56 struct erofs_super_block *dsb; 57 u32 expected_crc, crc; 58 59 dsb = kmemdup(sbdata + EROFS_SUPER_OFFSET, 60 EROFS_BLKSIZ - EROFS_SUPER_OFFSET, GFP_KERNEL); 61 if (!dsb) 62 return -ENOMEM; 63 64 expected_crc = le32_to_cpu(dsb->checksum); 65 dsb->checksum = 0; 66 /* to allow for x86 boot sectors and other oddities. */ 67 crc = crc32c(~0, dsb, EROFS_BLKSIZ - EROFS_SUPER_OFFSET); 68 kfree(dsb); 69 70 if (crc != expected_crc) { 71 erofs_err(sb, "invalid checksum 0x%08x, 0x%08x expected", 72 crc, expected_crc); 73 return -EBADMSG; 74 } 75 return 0; 76 } 77 78 static void erofs_inode_init_once(void *ptr) 79 { 80 struct erofs_inode *vi = ptr; 81 82 inode_init_once(&vi->vfs_inode); 83 } 84 85 static struct inode *erofs_alloc_inode(struct super_block *sb) 86 { 87 struct erofs_inode *vi = 88 alloc_inode_sb(sb, erofs_inode_cachep, GFP_KERNEL); 89 90 if (!vi) 91 return NULL; 92 93 /* zero out everything except vfs_inode */ 94 memset(vi, 0, offsetof(struct erofs_inode, vfs_inode)); 95 return &vi->vfs_inode; 96 } 97 98 static void erofs_free_inode(struct inode *inode) 99 { 100 struct erofs_inode *vi = EROFS_I(inode); 101 102 /* be careful of RCU symlink path */ 103 if (inode->i_op == &erofs_fast_symlink_iops) 104 kfree(inode->i_link); 105 kfree(vi->xattr_shared_xattrs); 106 107 kmem_cache_free(erofs_inode_cachep, vi); 108 } 109 110 static bool check_layout_compatibility(struct super_block *sb, 111 struct erofs_super_block *dsb) 112 { 113 const unsigned int feature = le32_to_cpu(dsb->feature_incompat); 114 115 EROFS_SB(sb)->feature_incompat = feature; 116 117 /* check if current kernel meets all mandatory requirements */ 118 if (feature & (~EROFS_ALL_FEATURE_INCOMPAT)) { 119 erofs_err(sb, 120 "unidentified incompatible feature %x, please upgrade kernel version", 121 feature & ~EROFS_ALL_FEATURE_INCOMPAT); 122 return false; 123 } 124 return true; 125 } 126 127 #ifdef CONFIG_EROFS_FS_ZIP 128 /* read variable-sized metadata, offset will be aligned by 4-byte */ 129 static void *erofs_read_metadata(struct super_block *sb, struct erofs_buf *buf, 130 erofs_off_t *offset, int *lengthp) 131 { 132 u8 *buffer, *ptr; 133 int len, i, cnt; 134 135 *offset = round_up(*offset, 4); 136 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), EROFS_KMAP); 137 if (IS_ERR(ptr)) 138 return ptr; 139 140 len = le16_to_cpu(*(__le16 *)&ptr[erofs_blkoff(*offset)]); 141 if (!len) 142 len = U16_MAX + 1; 143 buffer = kmalloc(len, GFP_KERNEL); 144 if (!buffer) 145 return ERR_PTR(-ENOMEM); 146 *offset += sizeof(__le16); 147 *lengthp = len; 148 149 for (i = 0; i < len; i += cnt) { 150 cnt = min(EROFS_BLKSIZ - (int)erofs_blkoff(*offset), len - i); 151 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*offset), 152 EROFS_KMAP); 153 if (IS_ERR(ptr)) { 154 kfree(buffer); 155 return ptr; 156 } 157 memcpy(buffer + i, ptr + erofs_blkoff(*offset), cnt); 158 *offset += cnt; 159 } 160 return buffer; 161 } 162 163 static int erofs_load_compr_cfgs(struct super_block *sb, 164 struct erofs_super_block *dsb) 165 { 166 struct erofs_sb_info *sbi = EROFS_SB(sb); 167 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 168 unsigned int algs, alg; 169 erofs_off_t offset; 170 int size, ret = 0; 171 172 sbi->available_compr_algs = le16_to_cpu(dsb->u1.available_compr_algs); 173 if (sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS) { 174 erofs_err(sb, "try to load compressed fs with unsupported algorithms %x", 175 sbi->available_compr_algs & ~Z_EROFS_ALL_COMPR_ALGS); 176 return -EINVAL; 177 } 178 179 offset = EROFS_SUPER_OFFSET + sbi->sb_size; 180 alg = 0; 181 for (algs = sbi->available_compr_algs; algs; algs >>= 1, ++alg) { 182 void *data; 183 184 if (!(algs & 1)) 185 continue; 186 187 data = erofs_read_metadata(sb, &buf, &offset, &size); 188 if (IS_ERR(data)) { 189 ret = PTR_ERR(data); 190 break; 191 } 192 193 switch (alg) { 194 case Z_EROFS_COMPRESSION_LZ4: 195 ret = z_erofs_load_lz4_config(sb, dsb, data, size); 196 break; 197 case Z_EROFS_COMPRESSION_LZMA: 198 ret = z_erofs_load_lzma_config(sb, dsb, data, size); 199 break; 200 default: 201 DBG_BUGON(1); 202 ret = -EFAULT; 203 } 204 kfree(data); 205 if (ret) 206 break; 207 } 208 erofs_put_metabuf(&buf); 209 return ret; 210 } 211 #else 212 static int erofs_load_compr_cfgs(struct super_block *sb, 213 struct erofs_super_block *dsb) 214 { 215 if (dsb->u1.available_compr_algs) { 216 erofs_err(sb, "try to load compressed fs when compression is disabled"); 217 return -EINVAL; 218 } 219 return 0; 220 } 221 #endif 222 223 static int erofs_init_device(struct erofs_buf *buf, struct super_block *sb, 224 struct erofs_device_info *dif, erofs_off_t *pos) 225 { 226 struct erofs_sb_info *sbi = EROFS_SB(sb); 227 struct erofs_fscache *fscache; 228 struct erofs_deviceslot *dis; 229 struct block_device *bdev; 230 void *ptr; 231 232 ptr = erofs_read_metabuf(buf, sb, erofs_blknr(*pos), EROFS_KMAP); 233 if (IS_ERR(ptr)) 234 return PTR_ERR(ptr); 235 dis = ptr + erofs_blkoff(*pos); 236 237 if (!dif->path) { 238 if (!dis->tag[0]) { 239 erofs_err(sb, "empty device tag @ pos %llu", *pos); 240 return -EINVAL; 241 } 242 dif->path = kmemdup_nul(dis->tag, sizeof(dis->tag), GFP_KERNEL); 243 if (!dif->path) 244 return -ENOMEM; 245 } 246 247 if (erofs_is_fscache_mode(sb)) { 248 fscache = erofs_fscache_register_cookie(sb, dif->path, 0); 249 if (IS_ERR(fscache)) 250 return PTR_ERR(fscache); 251 dif->fscache = fscache; 252 } else { 253 bdev = blkdev_get_by_path(dif->path, FMODE_READ | FMODE_EXCL, 254 sb->s_type); 255 if (IS_ERR(bdev)) 256 return PTR_ERR(bdev); 257 dif->bdev = bdev; 258 dif->dax_dev = fs_dax_get_by_bdev(bdev, &dif->dax_part_off, 259 NULL, NULL); 260 } 261 262 dif->blocks = le32_to_cpu(dis->blocks); 263 dif->mapped_blkaddr = le32_to_cpu(dis->mapped_blkaddr); 264 sbi->total_blocks += dif->blocks; 265 *pos += EROFS_DEVT_SLOT_SIZE; 266 return 0; 267 } 268 269 static int erofs_scan_devices(struct super_block *sb, 270 struct erofs_super_block *dsb) 271 { 272 struct erofs_sb_info *sbi = EROFS_SB(sb); 273 unsigned int ondisk_extradevs; 274 erofs_off_t pos; 275 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 276 struct erofs_device_info *dif; 277 int id, err = 0; 278 279 sbi->total_blocks = sbi->primarydevice_blocks; 280 if (!erofs_sb_has_device_table(sbi)) 281 ondisk_extradevs = 0; 282 else 283 ondisk_extradevs = le16_to_cpu(dsb->extra_devices); 284 285 if (sbi->devs->extra_devices && 286 ondisk_extradevs != sbi->devs->extra_devices) { 287 erofs_err(sb, "extra devices don't match (ondisk %u, given %u)", 288 ondisk_extradevs, sbi->devs->extra_devices); 289 return -EINVAL; 290 } 291 if (!ondisk_extradevs) 292 return 0; 293 294 sbi->device_id_mask = roundup_pow_of_two(ondisk_extradevs + 1) - 1; 295 pos = le16_to_cpu(dsb->devt_slotoff) * EROFS_DEVT_SLOT_SIZE; 296 down_read(&sbi->devs->rwsem); 297 if (sbi->devs->extra_devices) { 298 idr_for_each_entry(&sbi->devs->tree, dif, id) { 299 err = erofs_init_device(&buf, sb, dif, &pos); 300 if (err) 301 break; 302 } 303 } else { 304 for (id = 0; id < ondisk_extradevs; id++) { 305 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 306 if (!dif) { 307 err = -ENOMEM; 308 break; 309 } 310 311 err = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); 312 if (err < 0) { 313 kfree(dif); 314 break; 315 } 316 ++sbi->devs->extra_devices; 317 318 err = erofs_init_device(&buf, sb, dif, &pos); 319 if (err) 320 break; 321 } 322 } 323 up_read(&sbi->devs->rwsem); 324 erofs_put_metabuf(&buf); 325 return err; 326 } 327 328 static int erofs_read_superblock(struct super_block *sb) 329 { 330 struct erofs_sb_info *sbi; 331 struct erofs_buf buf = __EROFS_BUF_INITIALIZER; 332 struct erofs_super_block *dsb; 333 unsigned int blkszbits; 334 void *data; 335 int ret; 336 337 data = erofs_read_metabuf(&buf, sb, 0, EROFS_KMAP); 338 if (IS_ERR(data)) { 339 erofs_err(sb, "cannot read erofs superblock"); 340 return PTR_ERR(data); 341 } 342 343 sbi = EROFS_SB(sb); 344 dsb = (struct erofs_super_block *)(data + EROFS_SUPER_OFFSET); 345 346 ret = -EINVAL; 347 if (le32_to_cpu(dsb->magic) != EROFS_SUPER_MAGIC_V1) { 348 erofs_err(sb, "cannot find valid erofs superblock"); 349 goto out; 350 } 351 352 sbi->feature_compat = le32_to_cpu(dsb->feature_compat); 353 if (erofs_sb_has_sb_chksum(sbi)) { 354 ret = erofs_superblock_csum_verify(sb, data); 355 if (ret) 356 goto out; 357 } 358 359 ret = -EINVAL; 360 blkszbits = dsb->blkszbits; 361 /* 9(512 bytes) + LOG_SECTORS_PER_BLOCK == LOG_BLOCK_SIZE */ 362 if (blkszbits != LOG_BLOCK_SIZE) { 363 erofs_err(sb, "blkszbits %u isn't supported on this platform", 364 blkszbits); 365 goto out; 366 } 367 368 if (!check_layout_compatibility(sb, dsb)) 369 goto out; 370 371 sbi->sb_size = 128 + dsb->sb_extslots * EROFS_SB_EXTSLOT_SIZE; 372 if (sbi->sb_size > EROFS_BLKSIZ) { 373 erofs_err(sb, "invalid sb_extslots %u (more than a fs block)", 374 sbi->sb_size); 375 goto out; 376 } 377 sbi->primarydevice_blocks = le32_to_cpu(dsb->blocks); 378 sbi->meta_blkaddr = le32_to_cpu(dsb->meta_blkaddr); 379 #ifdef CONFIG_EROFS_FS_XATTR 380 sbi->xattr_blkaddr = le32_to_cpu(dsb->xattr_blkaddr); 381 #endif 382 sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact)); 383 sbi->root_nid = le16_to_cpu(dsb->root_nid); 384 #ifdef CONFIG_EROFS_FS_ZIP 385 sbi->packed_inode = NULL; 386 if (erofs_sb_has_fragments(sbi) && dsb->packed_nid) { 387 sbi->packed_inode = 388 erofs_iget(sb, le64_to_cpu(dsb->packed_nid)); 389 if (IS_ERR(sbi->packed_inode)) { 390 ret = PTR_ERR(sbi->packed_inode); 391 goto out; 392 } 393 } 394 #endif 395 sbi->inos = le64_to_cpu(dsb->inos); 396 397 sbi->build_time = le64_to_cpu(dsb->build_time); 398 sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec); 399 400 memcpy(&sb->s_uuid, dsb->uuid, sizeof(dsb->uuid)); 401 402 ret = strscpy(sbi->volume_name, dsb->volume_name, 403 sizeof(dsb->volume_name)); 404 if (ret < 0) { /* -E2BIG */ 405 erofs_err(sb, "bad volume name without NIL terminator"); 406 ret = -EFSCORRUPTED; 407 goto out; 408 } 409 410 /* parse on-disk compression configurations */ 411 if (erofs_sb_has_compr_cfgs(sbi)) 412 ret = erofs_load_compr_cfgs(sb, dsb); 413 else 414 ret = z_erofs_load_lz4_config(sb, dsb, NULL, 0); 415 if (ret < 0) 416 goto out; 417 418 /* handle multiple devices */ 419 ret = erofs_scan_devices(sb, dsb); 420 421 if (erofs_sb_has_ztailpacking(sbi)) 422 erofs_info(sb, "EXPERIMENTAL compressed inline data feature in use. Use at your own risk!"); 423 if (erofs_is_fscache_mode(sb)) 424 erofs_info(sb, "EXPERIMENTAL fscache-based on-demand read feature in use. Use at your own risk!"); 425 if (erofs_sb_has_fragments(sbi)) 426 erofs_info(sb, "EXPERIMENTAL compressed fragments feature in use. Use at your own risk!"); 427 if (erofs_sb_has_dedupe(sbi)) 428 erofs_info(sb, "EXPERIMENTAL global deduplication feature in use. Use at your own risk!"); 429 out: 430 erofs_put_metabuf(&buf); 431 return ret; 432 } 433 434 /* set up default EROFS parameters */ 435 static void erofs_default_options(struct erofs_fs_context *ctx) 436 { 437 #ifdef CONFIG_EROFS_FS_ZIP 438 ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; 439 ctx->opt.max_sync_decompress_pages = 3; 440 ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; 441 #endif 442 #ifdef CONFIG_EROFS_FS_XATTR 443 set_opt(&ctx->opt, XATTR_USER); 444 #endif 445 #ifdef CONFIG_EROFS_FS_POSIX_ACL 446 set_opt(&ctx->opt, POSIX_ACL); 447 #endif 448 } 449 450 enum { 451 Opt_user_xattr, 452 Opt_acl, 453 Opt_cache_strategy, 454 Opt_dax, 455 Opt_dax_enum, 456 Opt_device, 457 Opt_fsid, 458 Opt_domain_id, 459 Opt_err 460 }; 461 462 static const struct constant_table erofs_param_cache_strategy[] = { 463 {"disabled", EROFS_ZIP_CACHE_DISABLED}, 464 {"readahead", EROFS_ZIP_CACHE_READAHEAD}, 465 {"readaround", EROFS_ZIP_CACHE_READAROUND}, 466 {} 467 }; 468 469 static const struct constant_table erofs_dax_param_enums[] = { 470 {"always", EROFS_MOUNT_DAX_ALWAYS}, 471 {"never", EROFS_MOUNT_DAX_NEVER}, 472 {} 473 }; 474 475 static const struct fs_parameter_spec erofs_fs_parameters[] = { 476 fsparam_flag_no("user_xattr", Opt_user_xattr), 477 fsparam_flag_no("acl", Opt_acl), 478 fsparam_enum("cache_strategy", Opt_cache_strategy, 479 erofs_param_cache_strategy), 480 fsparam_flag("dax", Opt_dax), 481 fsparam_enum("dax", Opt_dax_enum, erofs_dax_param_enums), 482 fsparam_string("device", Opt_device), 483 fsparam_string("fsid", Opt_fsid), 484 fsparam_string("domain_id", Opt_domain_id), 485 {} 486 }; 487 488 static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) 489 { 490 #ifdef CONFIG_FS_DAX 491 struct erofs_fs_context *ctx = fc->fs_private; 492 493 switch (mode) { 494 case EROFS_MOUNT_DAX_ALWAYS: 495 warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); 496 set_opt(&ctx->opt, DAX_ALWAYS); 497 clear_opt(&ctx->opt, DAX_NEVER); 498 return true; 499 case EROFS_MOUNT_DAX_NEVER: 500 set_opt(&ctx->opt, DAX_NEVER); 501 clear_opt(&ctx->opt, DAX_ALWAYS); 502 return true; 503 default: 504 DBG_BUGON(1); 505 return false; 506 } 507 #else 508 errorfc(fc, "dax options not supported"); 509 return false; 510 #endif 511 } 512 513 static int erofs_fc_parse_param(struct fs_context *fc, 514 struct fs_parameter *param) 515 { 516 struct erofs_fs_context *ctx = fc->fs_private; 517 struct fs_parse_result result; 518 struct erofs_device_info *dif; 519 int opt, ret; 520 521 opt = fs_parse(fc, erofs_fs_parameters, param, &result); 522 if (opt < 0) 523 return opt; 524 525 switch (opt) { 526 case Opt_user_xattr: 527 #ifdef CONFIG_EROFS_FS_XATTR 528 if (result.boolean) 529 set_opt(&ctx->opt, XATTR_USER); 530 else 531 clear_opt(&ctx->opt, XATTR_USER); 532 #else 533 errorfc(fc, "{,no}user_xattr options not supported"); 534 #endif 535 break; 536 case Opt_acl: 537 #ifdef CONFIG_EROFS_FS_POSIX_ACL 538 if (result.boolean) 539 set_opt(&ctx->opt, POSIX_ACL); 540 else 541 clear_opt(&ctx->opt, POSIX_ACL); 542 #else 543 errorfc(fc, "{,no}acl options not supported"); 544 #endif 545 break; 546 case Opt_cache_strategy: 547 #ifdef CONFIG_EROFS_FS_ZIP 548 ctx->opt.cache_strategy = result.uint_32; 549 #else 550 errorfc(fc, "compression not supported, cache_strategy ignored"); 551 #endif 552 break; 553 case Opt_dax: 554 if (!erofs_fc_set_dax_mode(fc, EROFS_MOUNT_DAX_ALWAYS)) 555 return -EINVAL; 556 break; 557 case Opt_dax_enum: 558 if (!erofs_fc_set_dax_mode(fc, result.uint_32)) 559 return -EINVAL; 560 break; 561 case Opt_device: 562 dif = kzalloc(sizeof(*dif), GFP_KERNEL); 563 if (!dif) 564 return -ENOMEM; 565 dif->path = kstrdup(param->string, GFP_KERNEL); 566 if (!dif->path) { 567 kfree(dif); 568 return -ENOMEM; 569 } 570 down_write(&ctx->devs->rwsem); 571 ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); 572 up_write(&ctx->devs->rwsem); 573 if (ret < 0) { 574 kfree(dif->path); 575 kfree(dif); 576 return ret; 577 } 578 ++ctx->devs->extra_devices; 579 break; 580 #ifdef CONFIG_EROFS_FS_ONDEMAND 581 case Opt_fsid: 582 kfree(ctx->fsid); 583 ctx->fsid = kstrdup(param->string, GFP_KERNEL); 584 if (!ctx->fsid) 585 return -ENOMEM; 586 break; 587 case Opt_domain_id: 588 kfree(ctx->domain_id); 589 ctx->domain_id = kstrdup(param->string, GFP_KERNEL); 590 if (!ctx->domain_id) 591 return -ENOMEM; 592 break; 593 #else 594 case Opt_fsid: 595 case Opt_domain_id: 596 errorfc(fc, "%s option not supported", erofs_fs_parameters[opt].name); 597 break; 598 #endif 599 default: 600 return -ENOPARAM; 601 } 602 return 0; 603 } 604 605 #ifdef CONFIG_EROFS_FS_ZIP 606 static const struct address_space_operations managed_cache_aops; 607 608 static bool erofs_managed_cache_release_folio(struct folio *folio, gfp_t gfp) 609 { 610 bool ret = true; 611 struct address_space *const mapping = folio->mapping; 612 613 DBG_BUGON(!folio_test_locked(folio)); 614 DBG_BUGON(mapping->a_ops != &managed_cache_aops); 615 616 if (folio_test_private(folio)) 617 ret = erofs_try_to_free_cached_page(&folio->page); 618 619 return ret; 620 } 621 622 /* 623 * It will be called only on inode eviction. In case that there are still some 624 * decompression requests in progress, wait with rescheduling for a bit here. 625 * We could introduce an extra locking instead but it seems unnecessary. 626 */ 627 static void erofs_managed_cache_invalidate_folio(struct folio *folio, 628 size_t offset, size_t length) 629 { 630 const size_t stop = length + offset; 631 632 DBG_BUGON(!folio_test_locked(folio)); 633 634 /* Check for potential overflow in debug mode */ 635 DBG_BUGON(stop > folio_size(folio) || stop < length); 636 637 if (offset == 0 && stop == folio_size(folio)) 638 while (!erofs_managed_cache_release_folio(folio, GFP_NOFS)) 639 cond_resched(); 640 } 641 642 static const struct address_space_operations managed_cache_aops = { 643 .release_folio = erofs_managed_cache_release_folio, 644 .invalidate_folio = erofs_managed_cache_invalidate_folio, 645 }; 646 647 static int erofs_init_managed_cache(struct super_block *sb) 648 { 649 struct erofs_sb_info *const sbi = EROFS_SB(sb); 650 struct inode *const inode = new_inode(sb); 651 652 if (!inode) 653 return -ENOMEM; 654 655 set_nlink(inode, 1); 656 inode->i_size = OFFSET_MAX; 657 658 inode->i_mapping->a_ops = &managed_cache_aops; 659 mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS); 660 sbi->managed_cache = inode; 661 return 0; 662 } 663 #else 664 static int erofs_init_managed_cache(struct super_block *sb) { return 0; } 665 #endif 666 667 static struct inode *erofs_nfs_get_inode(struct super_block *sb, 668 u64 ino, u32 generation) 669 { 670 return erofs_iget(sb, ino); 671 } 672 673 static struct dentry *erofs_fh_to_dentry(struct super_block *sb, 674 struct fid *fid, int fh_len, int fh_type) 675 { 676 return generic_fh_to_dentry(sb, fid, fh_len, fh_type, 677 erofs_nfs_get_inode); 678 } 679 680 static struct dentry *erofs_fh_to_parent(struct super_block *sb, 681 struct fid *fid, int fh_len, int fh_type) 682 { 683 return generic_fh_to_parent(sb, fid, fh_len, fh_type, 684 erofs_nfs_get_inode); 685 } 686 687 static struct dentry *erofs_get_parent(struct dentry *child) 688 { 689 erofs_nid_t nid; 690 unsigned int d_type; 691 int err; 692 693 err = erofs_namei(d_inode(child), &dotdot_name, &nid, &d_type); 694 if (err) 695 return ERR_PTR(err); 696 return d_obtain_alias(erofs_iget(child->d_sb, nid)); 697 } 698 699 static const struct export_operations erofs_export_ops = { 700 .fh_to_dentry = erofs_fh_to_dentry, 701 .fh_to_parent = erofs_fh_to_parent, 702 .get_parent = erofs_get_parent, 703 }; 704 705 static int erofs_fc_fill_pseudo_super(struct super_block *sb, struct fs_context *fc) 706 { 707 static const struct tree_descr empty_descr = {""}; 708 709 return simple_fill_super(sb, EROFS_SUPER_MAGIC, &empty_descr); 710 } 711 712 static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) 713 { 714 struct inode *inode; 715 struct erofs_sb_info *sbi; 716 struct erofs_fs_context *ctx = fc->fs_private; 717 int err; 718 719 sb->s_magic = EROFS_SUPER_MAGIC; 720 sb->s_flags |= SB_RDONLY | SB_NOATIME; 721 sb->s_maxbytes = MAX_LFS_FILESIZE; 722 sb->s_op = &erofs_sops; 723 724 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); 725 if (!sbi) 726 return -ENOMEM; 727 728 sb->s_fs_info = sbi; 729 sbi->opt = ctx->opt; 730 sbi->devs = ctx->devs; 731 ctx->devs = NULL; 732 sbi->fsid = ctx->fsid; 733 ctx->fsid = NULL; 734 sbi->domain_id = ctx->domain_id; 735 ctx->domain_id = NULL; 736 737 if (erofs_is_fscache_mode(sb)) { 738 sb->s_blocksize = EROFS_BLKSIZ; 739 sb->s_blocksize_bits = LOG_BLOCK_SIZE; 740 741 err = erofs_fscache_register_fs(sb); 742 if (err) 743 return err; 744 745 err = super_setup_bdi(sb); 746 if (err) 747 return err; 748 } else { 749 if (!sb_set_blocksize(sb, EROFS_BLKSIZ)) { 750 erofs_err(sb, "failed to set erofs blksize"); 751 return -EINVAL; 752 } 753 754 sbi->dax_dev = fs_dax_get_by_bdev(sb->s_bdev, 755 &sbi->dax_part_off, 756 NULL, NULL); 757 } 758 759 err = erofs_read_superblock(sb); 760 if (err) 761 return err; 762 763 if (test_opt(&sbi->opt, DAX_ALWAYS)) { 764 BUILD_BUG_ON(EROFS_BLKSIZ != PAGE_SIZE); 765 766 if (!sbi->dax_dev) { 767 errorfc(fc, "DAX unsupported by block device. Turning off DAX."); 768 clear_opt(&sbi->opt, DAX_ALWAYS); 769 } 770 } 771 772 sb->s_time_gran = 1; 773 sb->s_xattr = erofs_xattr_handlers; 774 sb->s_export_op = &erofs_export_ops; 775 776 if (test_opt(&sbi->opt, POSIX_ACL)) 777 sb->s_flags |= SB_POSIXACL; 778 else 779 sb->s_flags &= ~SB_POSIXACL; 780 781 #ifdef CONFIG_EROFS_FS_ZIP 782 xa_init(&sbi->managed_pslots); 783 #endif 784 785 /* get the root inode */ 786 inode = erofs_iget(sb, ROOT_NID(sbi)); 787 if (IS_ERR(inode)) 788 return PTR_ERR(inode); 789 790 if (!S_ISDIR(inode->i_mode)) { 791 erofs_err(sb, "rootino(nid %llu) is not a directory(i_mode %o)", 792 ROOT_NID(sbi), inode->i_mode); 793 iput(inode); 794 return -EINVAL; 795 } 796 797 sb->s_root = d_make_root(inode); 798 if (!sb->s_root) 799 return -ENOMEM; 800 801 erofs_shrinker_register(sb); 802 /* sb->s_umount is already locked, SB_ACTIVE and SB_BORN are not set */ 803 err = erofs_init_managed_cache(sb); 804 if (err) 805 return err; 806 807 err = erofs_register_sysfs(sb); 808 if (err) 809 return err; 810 811 erofs_info(sb, "mounted with root inode @ nid %llu.", ROOT_NID(sbi)); 812 return 0; 813 } 814 815 static int erofs_fc_anon_get_tree(struct fs_context *fc) 816 { 817 return get_tree_nodev(fc, erofs_fc_fill_pseudo_super); 818 } 819 820 static int erofs_fc_get_tree(struct fs_context *fc) 821 { 822 struct erofs_fs_context *ctx = fc->fs_private; 823 824 if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) 825 return get_tree_nodev(fc, erofs_fc_fill_super); 826 827 return get_tree_bdev(fc, erofs_fc_fill_super); 828 } 829 830 static int erofs_fc_reconfigure(struct fs_context *fc) 831 { 832 struct super_block *sb = fc->root->d_sb; 833 struct erofs_sb_info *sbi = EROFS_SB(sb); 834 struct erofs_fs_context *ctx = fc->fs_private; 835 836 DBG_BUGON(!sb_rdonly(sb)); 837 838 if (ctx->fsid || ctx->domain_id) 839 erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); 840 841 if (test_opt(&ctx->opt, POSIX_ACL)) 842 fc->sb_flags |= SB_POSIXACL; 843 else 844 fc->sb_flags &= ~SB_POSIXACL; 845 846 sbi->opt = ctx->opt; 847 848 fc->sb_flags |= SB_RDONLY; 849 return 0; 850 } 851 852 static int erofs_release_device_info(int id, void *ptr, void *data) 853 { 854 struct erofs_device_info *dif = ptr; 855 856 fs_put_dax(dif->dax_dev, NULL); 857 if (dif->bdev) 858 blkdev_put(dif->bdev, FMODE_READ | FMODE_EXCL); 859 erofs_fscache_unregister_cookie(dif->fscache); 860 dif->fscache = NULL; 861 kfree(dif->path); 862 kfree(dif); 863 return 0; 864 } 865 866 static void erofs_free_dev_context(struct erofs_dev_context *devs) 867 { 868 if (!devs) 869 return; 870 idr_for_each(&devs->tree, &erofs_release_device_info, NULL); 871 idr_destroy(&devs->tree); 872 kfree(devs); 873 } 874 875 static void erofs_fc_free(struct fs_context *fc) 876 { 877 struct erofs_fs_context *ctx = fc->fs_private; 878 879 erofs_free_dev_context(ctx->devs); 880 kfree(ctx->fsid); 881 kfree(ctx->domain_id); 882 kfree(ctx); 883 } 884 885 static const struct fs_context_operations erofs_context_ops = { 886 .parse_param = erofs_fc_parse_param, 887 .get_tree = erofs_fc_get_tree, 888 .reconfigure = erofs_fc_reconfigure, 889 .free = erofs_fc_free, 890 }; 891 892 static const struct fs_context_operations erofs_anon_context_ops = { 893 .get_tree = erofs_fc_anon_get_tree, 894 }; 895 896 static int erofs_init_fs_context(struct fs_context *fc) 897 { 898 struct erofs_fs_context *ctx; 899 900 /* pseudo mount for anon inodes */ 901 if (fc->sb_flags & SB_KERNMOUNT) { 902 fc->ops = &erofs_anon_context_ops; 903 return 0; 904 } 905 906 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 907 if (!ctx) 908 return -ENOMEM; 909 ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); 910 if (!ctx->devs) { 911 kfree(ctx); 912 return -ENOMEM; 913 } 914 fc->fs_private = ctx; 915 916 idr_init(&ctx->devs->tree); 917 init_rwsem(&ctx->devs->rwsem); 918 erofs_default_options(ctx); 919 fc->ops = &erofs_context_ops; 920 return 0; 921 } 922 923 /* 924 * could be triggered after deactivate_locked_super() 925 * is called, thus including umount and failed to initialize. 926 */ 927 static void erofs_kill_sb(struct super_block *sb) 928 { 929 struct erofs_sb_info *sbi; 930 931 WARN_ON(sb->s_magic != EROFS_SUPER_MAGIC); 932 933 /* pseudo mount for anon inodes */ 934 if (sb->s_flags & SB_KERNMOUNT) { 935 kill_anon_super(sb); 936 return; 937 } 938 939 if (erofs_is_fscache_mode(sb)) 940 kill_anon_super(sb); 941 else 942 kill_block_super(sb); 943 944 sbi = EROFS_SB(sb); 945 if (!sbi) 946 return; 947 948 erofs_free_dev_context(sbi->devs); 949 fs_put_dax(sbi->dax_dev, NULL); 950 erofs_fscache_unregister_fs(sb); 951 kfree(sbi->fsid); 952 kfree(sbi->domain_id); 953 kfree(sbi); 954 sb->s_fs_info = NULL; 955 } 956 957 /* called when ->s_root is non-NULL */ 958 static void erofs_put_super(struct super_block *sb) 959 { 960 struct erofs_sb_info *const sbi = EROFS_SB(sb); 961 962 DBG_BUGON(!sbi); 963 964 erofs_unregister_sysfs(sb); 965 erofs_shrinker_unregister(sb); 966 #ifdef CONFIG_EROFS_FS_ZIP 967 iput(sbi->managed_cache); 968 sbi->managed_cache = NULL; 969 iput(sbi->packed_inode); 970 sbi->packed_inode = NULL; 971 #endif 972 erofs_fscache_unregister_fs(sb); 973 } 974 975 struct file_system_type erofs_fs_type = { 976 .owner = THIS_MODULE, 977 .name = "erofs", 978 .init_fs_context = erofs_init_fs_context, 979 .kill_sb = erofs_kill_sb, 980 .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP, 981 }; 982 MODULE_ALIAS_FS("erofs"); 983 984 static int __init erofs_module_init(void) 985 { 986 int err; 987 988 erofs_check_ondisk_layout_definitions(); 989 990 erofs_inode_cachep = kmem_cache_create("erofs_inode", 991 sizeof(struct erofs_inode), 0, 992 SLAB_RECLAIM_ACCOUNT, 993 erofs_inode_init_once); 994 if (!erofs_inode_cachep) { 995 err = -ENOMEM; 996 goto icache_err; 997 } 998 999 err = erofs_init_shrinker(); 1000 if (err) 1001 goto shrinker_err; 1002 1003 err = z_erofs_lzma_init(); 1004 if (err) 1005 goto lzma_err; 1006 1007 erofs_pcpubuf_init(); 1008 err = z_erofs_init_zip_subsystem(); 1009 if (err) 1010 goto zip_err; 1011 1012 err = erofs_init_sysfs(); 1013 if (err) 1014 goto sysfs_err; 1015 1016 err = register_filesystem(&erofs_fs_type); 1017 if (err) 1018 goto fs_err; 1019 1020 return 0; 1021 1022 fs_err: 1023 erofs_exit_sysfs(); 1024 sysfs_err: 1025 z_erofs_exit_zip_subsystem(); 1026 zip_err: 1027 z_erofs_lzma_exit(); 1028 lzma_err: 1029 erofs_exit_shrinker(); 1030 shrinker_err: 1031 kmem_cache_destroy(erofs_inode_cachep); 1032 icache_err: 1033 return err; 1034 } 1035 1036 static void __exit erofs_module_exit(void) 1037 { 1038 unregister_filesystem(&erofs_fs_type); 1039 1040 /* Ensure all RCU free inodes / pclusters are safe to be destroyed. */ 1041 rcu_barrier(); 1042 1043 erofs_exit_sysfs(); 1044 z_erofs_exit_zip_subsystem(); 1045 z_erofs_lzma_exit(); 1046 erofs_exit_shrinker(); 1047 kmem_cache_destroy(erofs_inode_cachep); 1048 erofs_pcpubuf_exit(); 1049 } 1050 1051 /* get filesystem statistics */ 1052 static int erofs_statfs(struct dentry *dentry, struct kstatfs *buf) 1053 { 1054 struct super_block *sb = dentry->d_sb; 1055 struct erofs_sb_info *sbi = EROFS_SB(sb); 1056 u64 id = 0; 1057 1058 if (!erofs_is_fscache_mode(sb)) 1059 id = huge_encode_dev(sb->s_bdev->bd_dev); 1060 1061 buf->f_type = sb->s_magic; 1062 buf->f_bsize = EROFS_BLKSIZ; 1063 buf->f_blocks = sbi->total_blocks; 1064 buf->f_bfree = buf->f_bavail = 0; 1065 1066 buf->f_files = ULLONG_MAX; 1067 buf->f_ffree = ULLONG_MAX - sbi->inos; 1068 1069 buf->f_namelen = EROFS_NAME_LEN; 1070 1071 buf->f_fsid = u64_to_fsid(id); 1072 return 0; 1073 } 1074 1075 static int erofs_show_options(struct seq_file *seq, struct dentry *root) 1076 { 1077 struct erofs_sb_info *sbi = EROFS_SB(root->d_sb); 1078 struct erofs_mount_opts *opt = &sbi->opt; 1079 1080 #ifdef CONFIG_EROFS_FS_XATTR 1081 if (test_opt(opt, XATTR_USER)) 1082 seq_puts(seq, ",user_xattr"); 1083 else 1084 seq_puts(seq, ",nouser_xattr"); 1085 #endif 1086 #ifdef CONFIG_EROFS_FS_POSIX_ACL 1087 if (test_opt(opt, POSIX_ACL)) 1088 seq_puts(seq, ",acl"); 1089 else 1090 seq_puts(seq, ",noacl"); 1091 #endif 1092 #ifdef CONFIG_EROFS_FS_ZIP 1093 if (opt->cache_strategy == EROFS_ZIP_CACHE_DISABLED) 1094 seq_puts(seq, ",cache_strategy=disabled"); 1095 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAHEAD) 1096 seq_puts(seq, ",cache_strategy=readahead"); 1097 else if (opt->cache_strategy == EROFS_ZIP_CACHE_READAROUND) 1098 seq_puts(seq, ",cache_strategy=readaround"); 1099 #endif 1100 if (test_opt(opt, DAX_ALWAYS)) 1101 seq_puts(seq, ",dax=always"); 1102 if (test_opt(opt, DAX_NEVER)) 1103 seq_puts(seq, ",dax=never"); 1104 #ifdef CONFIG_EROFS_FS_ONDEMAND 1105 if (sbi->fsid) 1106 seq_printf(seq, ",fsid=%s", sbi->fsid); 1107 if (sbi->domain_id) 1108 seq_printf(seq, ",domain_id=%s", sbi->domain_id); 1109 #endif 1110 return 0; 1111 } 1112 1113 const struct super_operations erofs_sops = { 1114 .put_super = erofs_put_super, 1115 .alloc_inode = erofs_alloc_inode, 1116 .free_inode = erofs_free_inode, 1117 .statfs = erofs_statfs, 1118 .show_options = erofs_show_options, 1119 }; 1120 1121 module_init(erofs_module_init); 1122 module_exit(erofs_module_exit); 1123 1124 MODULE_DESCRIPTION("Enhanced ROM File System"); 1125 MODULE_AUTHOR("Gao Xiang, Chao Yu, Miao Xie, CONSUMER BG, HUAWEI Inc."); 1126 MODULE_LICENSE("GPL"); 1127