1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/blkdev.h> 7 #include <linux/module.h> 8 #include <linux/fs.h> 9 #include <linux/pagemap.h> 10 #include <linux/highmem.h> 11 #include <linux/time.h> 12 #include <linux/init.h> 13 #include <linux/seq_file.h> 14 #include <linux/string.h> 15 #include <linux/backing-dev.h> 16 #include <linux/mount.h> 17 #include <linux/writeback.h> 18 #include <linux/statfs.h> 19 #include <linux/compat.h> 20 #include <linux/parser.h> 21 #include <linux/ctype.h> 22 #include <linux/namei.h> 23 #include <linux/miscdevice.h> 24 #include <linux/magic.h> 25 #include <linux/slab.h> 26 #include <linux/ratelimit.h> 27 #include <linux/crc32c.h> 28 #include <linux/btrfs.h> 29 #include "messages.h" 30 #include "delayed-inode.h" 31 #include "ctree.h" 32 #include "disk-io.h" 33 #include "transaction.h" 34 #include "btrfs_inode.h" 35 #include "print-tree.h" 36 #include "props.h" 37 #include "xattr.h" 38 #include "bio.h" 39 #include "export.h" 40 #include "compression.h" 41 #include "rcu-string.h" 42 #include "dev-replace.h" 43 #include "free-space-cache.h" 44 #include "backref.h" 45 #include "space-info.h" 46 #include "sysfs.h" 47 #include "zoned.h" 48 #include "tests/btrfs-tests.h" 49 #include "block-group.h" 50 #include "discard.h" 51 #include "qgroup.h" 52 #include "raid56.h" 53 #include "fs.h" 54 #include "accessors.h" 55 #include "defrag.h" 56 #include "dir-item.h" 57 #include "ioctl.h" 58 #include "scrub.h" 59 #include "verity.h" 60 #include "super.h" 61 #define CREATE_TRACE_POINTS 62 #include <trace/events/btrfs.h> 63 64 static const struct super_operations btrfs_super_ops; 65 66 /* 67 * Types for mounting the default subvolume and a subvolume explicitly 68 * requested by subvol=/path. That way the callchain is straightforward and we 69 * don't have to play tricks with the mount options and recursive calls to 70 * btrfs_mount. 71 * 72 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder. 73 */ 74 static struct file_system_type btrfs_fs_type; 75 static struct file_system_type btrfs_root_fs_type; 76 77 static int btrfs_remount(struct super_block *sb, int *flags, char *data); 78 79 static void btrfs_put_super(struct super_block *sb) 80 { 81 close_ctree(btrfs_sb(sb)); 82 } 83 84 enum { 85 Opt_acl, Opt_noacl, 86 Opt_clear_cache, 87 Opt_commit_interval, 88 Opt_compress, 89 Opt_compress_force, 90 Opt_compress_force_type, 91 Opt_compress_type, 92 Opt_degraded, 93 Opt_device, 94 Opt_fatal_errors, 95 Opt_flushoncommit, Opt_noflushoncommit, 96 Opt_max_inline, 97 Opt_barrier, Opt_nobarrier, 98 Opt_datacow, Opt_nodatacow, 99 Opt_datasum, Opt_nodatasum, 100 Opt_defrag, Opt_nodefrag, 101 Opt_discard, Opt_nodiscard, 102 Opt_discard_mode, 103 Opt_norecovery, 104 Opt_ratio, 105 Opt_rescan_uuid_tree, 106 Opt_skip_balance, 107 Opt_space_cache, Opt_no_space_cache, 108 Opt_space_cache_version, 109 Opt_ssd, Opt_nossd, 110 Opt_ssd_spread, Opt_nossd_spread, 111 Opt_subvol, 112 Opt_subvol_empty, 113 Opt_subvolid, 114 Opt_thread_pool, 115 Opt_treelog, Opt_notreelog, 116 Opt_user_subvol_rm_allowed, 117 118 /* Rescue options */ 119 Opt_rescue, 120 Opt_usebackuproot, 121 Opt_nologreplay, 122 Opt_ignorebadroots, 123 Opt_ignoredatacsums, 124 Opt_rescue_all, 125 126 /* Deprecated options */ 127 Opt_recovery, 128 Opt_inode_cache, Opt_noinode_cache, 129 130 /* Debugging options */ 131 Opt_check_integrity, 132 Opt_check_integrity_including_extent_data, 133 Opt_check_integrity_print_mask, 134 Opt_enospc_debug, Opt_noenospc_debug, 135 #ifdef CONFIG_BTRFS_DEBUG 136 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, 137 #endif 138 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 139 Opt_ref_verify, 140 #endif 141 Opt_err, 142 }; 143 144 static const match_table_t tokens = { 145 {Opt_acl, "acl"}, 146 {Opt_noacl, "noacl"}, 147 {Opt_clear_cache, "clear_cache"}, 148 {Opt_commit_interval, "commit=%u"}, 149 {Opt_compress, "compress"}, 150 {Opt_compress_type, "compress=%s"}, 151 {Opt_compress_force, "compress-force"}, 152 {Opt_compress_force_type, "compress-force=%s"}, 153 {Opt_degraded, "degraded"}, 154 {Opt_device, "device=%s"}, 155 {Opt_fatal_errors, "fatal_errors=%s"}, 156 {Opt_flushoncommit, "flushoncommit"}, 157 {Opt_noflushoncommit, "noflushoncommit"}, 158 {Opt_inode_cache, "inode_cache"}, 159 {Opt_noinode_cache, "noinode_cache"}, 160 {Opt_max_inline, "max_inline=%s"}, 161 {Opt_barrier, "barrier"}, 162 {Opt_nobarrier, "nobarrier"}, 163 {Opt_datacow, "datacow"}, 164 {Opt_nodatacow, "nodatacow"}, 165 {Opt_datasum, "datasum"}, 166 {Opt_nodatasum, "nodatasum"}, 167 {Opt_defrag, "autodefrag"}, 168 {Opt_nodefrag, "noautodefrag"}, 169 {Opt_discard, "discard"}, 170 {Opt_discard_mode, "discard=%s"}, 171 {Opt_nodiscard, "nodiscard"}, 172 {Opt_norecovery, "norecovery"}, 173 {Opt_ratio, "metadata_ratio=%u"}, 174 {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, 175 {Opt_skip_balance, "skip_balance"}, 176 {Opt_space_cache, "space_cache"}, 177 {Opt_no_space_cache, "nospace_cache"}, 178 {Opt_space_cache_version, "space_cache=%s"}, 179 {Opt_ssd, "ssd"}, 180 {Opt_nossd, "nossd"}, 181 {Opt_ssd_spread, "ssd_spread"}, 182 {Opt_nossd_spread, "nossd_spread"}, 183 {Opt_subvol, "subvol=%s"}, 184 {Opt_subvol_empty, "subvol="}, 185 {Opt_subvolid, "subvolid=%s"}, 186 {Opt_thread_pool, "thread_pool=%u"}, 187 {Opt_treelog, "treelog"}, 188 {Opt_notreelog, "notreelog"}, 189 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 190 191 /* Rescue options */ 192 {Opt_rescue, "rescue=%s"}, 193 /* Deprecated, with alias rescue=nologreplay */ 194 {Opt_nologreplay, "nologreplay"}, 195 /* Deprecated, with alias rescue=usebackuproot */ 196 {Opt_usebackuproot, "usebackuproot"}, 197 198 /* Deprecated options */ 199 {Opt_recovery, "recovery"}, 200 201 /* Debugging options */ 202 {Opt_check_integrity, "check_int"}, 203 {Opt_check_integrity_including_extent_data, "check_int_data"}, 204 {Opt_check_integrity_print_mask, "check_int_print_mask=%u"}, 205 {Opt_enospc_debug, "enospc_debug"}, 206 {Opt_noenospc_debug, "noenospc_debug"}, 207 #ifdef CONFIG_BTRFS_DEBUG 208 {Opt_fragment_data, "fragment=data"}, 209 {Opt_fragment_metadata, "fragment=metadata"}, 210 {Opt_fragment_all, "fragment=all"}, 211 #endif 212 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 213 {Opt_ref_verify, "ref_verify"}, 214 #endif 215 {Opt_err, NULL}, 216 }; 217 218 static const match_table_t rescue_tokens = { 219 {Opt_usebackuproot, "usebackuproot"}, 220 {Opt_nologreplay, "nologreplay"}, 221 {Opt_ignorebadroots, "ignorebadroots"}, 222 {Opt_ignorebadroots, "ibadroots"}, 223 {Opt_ignoredatacsums, "ignoredatacsums"}, 224 {Opt_ignoredatacsums, "idatacsums"}, 225 {Opt_rescue_all, "all"}, 226 {Opt_err, NULL}, 227 }; 228 229 static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt, 230 const char *opt_name) 231 { 232 if (fs_info->mount_opt & opt) { 233 btrfs_err(fs_info, "%s must be used with ro mount option", 234 opt_name); 235 return true; 236 } 237 return false; 238 } 239 240 static int parse_rescue_options(struct btrfs_fs_info *info, const char *options) 241 { 242 char *opts; 243 char *orig; 244 char *p; 245 substring_t args[MAX_OPT_ARGS]; 246 int ret = 0; 247 248 opts = kstrdup(options, GFP_KERNEL); 249 if (!opts) 250 return -ENOMEM; 251 orig = opts; 252 253 while ((p = strsep(&opts, ":")) != NULL) { 254 int token; 255 256 if (!*p) 257 continue; 258 token = match_token(p, rescue_tokens, args); 259 switch (token){ 260 case Opt_usebackuproot: 261 btrfs_info(info, 262 "trying to use backup root at mount time"); 263 btrfs_set_opt(info->mount_opt, USEBACKUPROOT); 264 break; 265 case Opt_nologreplay: 266 btrfs_set_and_info(info, NOLOGREPLAY, 267 "disabling log replay at mount time"); 268 break; 269 case Opt_ignorebadroots: 270 btrfs_set_and_info(info, IGNOREBADROOTS, 271 "ignoring bad roots"); 272 break; 273 case Opt_ignoredatacsums: 274 btrfs_set_and_info(info, IGNOREDATACSUMS, 275 "ignoring data csums"); 276 break; 277 case Opt_rescue_all: 278 btrfs_info(info, "enabling all of the rescue options"); 279 btrfs_set_and_info(info, IGNOREDATACSUMS, 280 "ignoring data csums"); 281 btrfs_set_and_info(info, IGNOREBADROOTS, 282 "ignoring bad roots"); 283 btrfs_set_and_info(info, NOLOGREPLAY, 284 "disabling log replay at mount time"); 285 break; 286 case Opt_err: 287 btrfs_info(info, "unrecognized rescue option '%s'", p); 288 ret = -EINVAL; 289 goto out; 290 default: 291 break; 292 } 293 294 } 295 out: 296 kfree(orig); 297 return ret; 298 } 299 300 /* 301 * Regular mount options parser. Everything that is needed only when 302 * reading in a new superblock is parsed here. 303 * XXX JDM: This needs to be cleaned up for remount. 304 */ 305 int btrfs_parse_options(struct btrfs_fs_info *info, char *options, 306 unsigned long new_flags) 307 { 308 substring_t args[MAX_OPT_ARGS]; 309 char *p, *num; 310 int intarg; 311 int ret = 0; 312 char *compress_type; 313 bool compress_force = false; 314 enum btrfs_compression_type saved_compress_type; 315 int saved_compress_level; 316 bool saved_compress_force; 317 int no_compress = 0; 318 const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state); 319 320 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) 321 btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE); 322 else if (btrfs_free_space_cache_v1_active(info)) { 323 if (btrfs_is_zoned(info)) { 324 btrfs_info(info, 325 "zoned: clearing existing space cache"); 326 btrfs_set_super_cache_generation(info->super_copy, 0); 327 } else { 328 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 329 } 330 } 331 332 /* 333 * Even the options are empty, we still need to do extra check 334 * against new flags 335 */ 336 if (!options) 337 goto check; 338 339 while ((p = strsep(&options, ",")) != NULL) { 340 int token; 341 if (!*p) 342 continue; 343 344 token = match_token(p, tokens, args); 345 switch (token) { 346 case Opt_degraded: 347 btrfs_info(info, "allowing degraded mounts"); 348 btrfs_set_opt(info->mount_opt, DEGRADED); 349 break; 350 case Opt_subvol: 351 case Opt_subvol_empty: 352 case Opt_subvolid: 353 case Opt_device: 354 /* 355 * These are parsed by btrfs_parse_subvol_options or 356 * btrfs_parse_device_options and can be ignored here. 357 */ 358 break; 359 case Opt_nodatasum: 360 btrfs_set_and_info(info, NODATASUM, 361 "setting nodatasum"); 362 break; 363 case Opt_datasum: 364 if (btrfs_test_opt(info, NODATASUM)) { 365 if (btrfs_test_opt(info, NODATACOW)) 366 btrfs_info(info, 367 "setting datasum, datacow enabled"); 368 else 369 btrfs_info(info, "setting datasum"); 370 } 371 btrfs_clear_opt(info->mount_opt, NODATACOW); 372 btrfs_clear_opt(info->mount_opt, NODATASUM); 373 break; 374 case Opt_nodatacow: 375 if (!btrfs_test_opt(info, NODATACOW)) { 376 if (!btrfs_test_opt(info, COMPRESS) || 377 !btrfs_test_opt(info, FORCE_COMPRESS)) { 378 btrfs_info(info, 379 "setting nodatacow, compression disabled"); 380 } else { 381 btrfs_info(info, "setting nodatacow"); 382 } 383 } 384 btrfs_clear_opt(info->mount_opt, COMPRESS); 385 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 386 btrfs_set_opt(info->mount_opt, NODATACOW); 387 btrfs_set_opt(info->mount_opt, NODATASUM); 388 break; 389 case Opt_datacow: 390 btrfs_clear_and_info(info, NODATACOW, 391 "setting datacow"); 392 break; 393 case Opt_compress_force: 394 case Opt_compress_force_type: 395 compress_force = true; 396 fallthrough; 397 case Opt_compress: 398 case Opt_compress_type: 399 saved_compress_type = btrfs_test_opt(info, 400 COMPRESS) ? 401 info->compress_type : BTRFS_COMPRESS_NONE; 402 saved_compress_force = 403 btrfs_test_opt(info, FORCE_COMPRESS); 404 saved_compress_level = info->compress_level; 405 if (token == Opt_compress || 406 token == Opt_compress_force || 407 strncmp(args[0].from, "zlib", 4) == 0) { 408 compress_type = "zlib"; 409 410 info->compress_type = BTRFS_COMPRESS_ZLIB; 411 info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; 412 /* 413 * args[0] contains uninitialized data since 414 * for these tokens we don't expect any 415 * parameter. 416 */ 417 if (token != Opt_compress && 418 token != Opt_compress_force) 419 info->compress_level = 420 btrfs_compress_str2level( 421 BTRFS_COMPRESS_ZLIB, 422 args[0].from + 4); 423 btrfs_set_opt(info->mount_opt, COMPRESS); 424 btrfs_clear_opt(info->mount_opt, NODATACOW); 425 btrfs_clear_opt(info->mount_opt, NODATASUM); 426 no_compress = 0; 427 } else if (strncmp(args[0].from, "lzo", 3) == 0) { 428 compress_type = "lzo"; 429 info->compress_type = BTRFS_COMPRESS_LZO; 430 info->compress_level = 0; 431 btrfs_set_opt(info->mount_opt, COMPRESS); 432 btrfs_clear_opt(info->mount_opt, NODATACOW); 433 btrfs_clear_opt(info->mount_opt, NODATASUM); 434 btrfs_set_fs_incompat(info, COMPRESS_LZO); 435 no_compress = 0; 436 } else if (strncmp(args[0].from, "zstd", 4) == 0) { 437 compress_type = "zstd"; 438 info->compress_type = BTRFS_COMPRESS_ZSTD; 439 info->compress_level = 440 btrfs_compress_str2level( 441 BTRFS_COMPRESS_ZSTD, 442 args[0].from + 4); 443 btrfs_set_opt(info->mount_opt, COMPRESS); 444 btrfs_clear_opt(info->mount_opt, NODATACOW); 445 btrfs_clear_opt(info->mount_opt, NODATASUM); 446 btrfs_set_fs_incompat(info, COMPRESS_ZSTD); 447 no_compress = 0; 448 } else if (strncmp(args[0].from, "no", 2) == 0) { 449 compress_type = "no"; 450 info->compress_level = 0; 451 info->compress_type = 0; 452 btrfs_clear_opt(info->mount_opt, COMPRESS); 453 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 454 compress_force = false; 455 no_compress++; 456 } else { 457 btrfs_err(info, "unrecognized compression value %s", 458 args[0].from); 459 ret = -EINVAL; 460 goto out; 461 } 462 463 if (compress_force) { 464 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 465 } else { 466 /* 467 * If we remount from compress-force=xxx to 468 * compress=xxx, we need clear FORCE_COMPRESS 469 * flag, otherwise, there is no way for users 470 * to disable forcible compression separately. 471 */ 472 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 473 } 474 if (no_compress == 1) { 475 btrfs_info(info, "use no compression"); 476 } else if ((info->compress_type != saved_compress_type) || 477 (compress_force != saved_compress_force) || 478 (info->compress_level != saved_compress_level)) { 479 btrfs_info(info, "%s %s compression, level %d", 480 (compress_force) ? "force" : "use", 481 compress_type, info->compress_level); 482 } 483 compress_force = false; 484 break; 485 case Opt_ssd: 486 btrfs_set_and_info(info, SSD, 487 "enabling ssd optimizations"); 488 btrfs_clear_opt(info->mount_opt, NOSSD); 489 break; 490 case Opt_ssd_spread: 491 btrfs_set_and_info(info, SSD, 492 "enabling ssd optimizations"); 493 btrfs_set_and_info(info, SSD_SPREAD, 494 "using spread ssd allocation scheme"); 495 btrfs_clear_opt(info->mount_opt, NOSSD); 496 break; 497 case Opt_nossd: 498 btrfs_set_opt(info->mount_opt, NOSSD); 499 btrfs_clear_and_info(info, SSD, 500 "not using ssd optimizations"); 501 fallthrough; 502 case Opt_nossd_spread: 503 btrfs_clear_and_info(info, SSD_SPREAD, 504 "not using spread ssd allocation scheme"); 505 break; 506 case Opt_barrier: 507 btrfs_clear_and_info(info, NOBARRIER, 508 "turning on barriers"); 509 break; 510 case Opt_nobarrier: 511 btrfs_set_and_info(info, NOBARRIER, 512 "turning off barriers"); 513 break; 514 case Opt_thread_pool: 515 ret = match_int(&args[0], &intarg); 516 if (ret) { 517 btrfs_err(info, "unrecognized thread_pool value %s", 518 args[0].from); 519 goto out; 520 } else if (intarg == 0) { 521 btrfs_err(info, "invalid value 0 for thread_pool"); 522 ret = -EINVAL; 523 goto out; 524 } 525 info->thread_pool_size = intarg; 526 break; 527 case Opt_max_inline: 528 num = match_strdup(&args[0]); 529 if (num) { 530 info->max_inline = memparse(num, NULL); 531 kfree(num); 532 533 if (info->max_inline) { 534 info->max_inline = min_t(u64, 535 info->max_inline, 536 info->sectorsize); 537 } 538 btrfs_info(info, "max_inline at %llu", 539 info->max_inline); 540 } else { 541 ret = -ENOMEM; 542 goto out; 543 } 544 break; 545 case Opt_acl: 546 #ifdef CONFIG_BTRFS_FS_POSIX_ACL 547 info->sb->s_flags |= SB_POSIXACL; 548 break; 549 #else 550 btrfs_err(info, "support for ACL not compiled in!"); 551 ret = -EINVAL; 552 goto out; 553 #endif 554 case Opt_noacl: 555 info->sb->s_flags &= ~SB_POSIXACL; 556 break; 557 case Opt_notreelog: 558 btrfs_set_and_info(info, NOTREELOG, 559 "disabling tree log"); 560 break; 561 case Opt_treelog: 562 btrfs_clear_and_info(info, NOTREELOG, 563 "enabling tree log"); 564 break; 565 case Opt_norecovery: 566 case Opt_nologreplay: 567 btrfs_warn(info, 568 "'nologreplay' is deprecated, use 'rescue=nologreplay' instead"); 569 btrfs_set_and_info(info, NOLOGREPLAY, 570 "disabling log replay at mount time"); 571 break; 572 case Opt_flushoncommit: 573 btrfs_set_and_info(info, FLUSHONCOMMIT, 574 "turning on flush-on-commit"); 575 break; 576 case Opt_noflushoncommit: 577 btrfs_clear_and_info(info, FLUSHONCOMMIT, 578 "turning off flush-on-commit"); 579 break; 580 case Opt_ratio: 581 ret = match_int(&args[0], &intarg); 582 if (ret) { 583 btrfs_err(info, "unrecognized metadata_ratio value %s", 584 args[0].from); 585 goto out; 586 } 587 info->metadata_ratio = intarg; 588 btrfs_info(info, "metadata ratio %u", 589 info->metadata_ratio); 590 break; 591 case Opt_discard: 592 case Opt_discard_mode: 593 if (token == Opt_discard || 594 strcmp(args[0].from, "sync") == 0) { 595 btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC); 596 btrfs_set_and_info(info, DISCARD_SYNC, 597 "turning on sync discard"); 598 } else if (strcmp(args[0].from, "async") == 0) { 599 btrfs_clear_opt(info->mount_opt, DISCARD_SYNC); 600 btrfs_set_and_info(info, DISCARD_ASYNC, 601 "turning on async discard"); 602 } else { 603 btrfs_err(info, "unrecognized discard mode value %s", 604 args[0].from); 605 ret = -EINVAL; 606 goto out; 607 } 608 btrfs_clear_opt(info->mount_opt, NODISCARD); 609 break; 610 case Opt_nodiscard: 611 btrfs_clear_and_info(info, DISCARD_SYNC, 612 "turning off discard"); 613 btrfs_clear_and_info(info, DISCARD_ASYNC, 614 "turning off async discard"); 615 btrfs_set_opt(info->mount_opt, NODISCARD); 616 break; 617 case Opt_space_cache: 618 case Opt_space_cache_version: 619 /* 620 * We already set FREE_SPACE_TREE above because we have 621 * compat_ro(FREE_SPACE_TREE) set, and we aren't going 622 * to allow v1 to be set for extent tree v2, simply 623 * ignore this setting if we're extent tree v2. 624 */ 625 if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) 626 break; 627 if (token == Opt_space_cache || 628 strcmp(args[0].from, "v1") == 0) { 629 btrfs_clear_opt(info->mount_opt, 630 FREE_SPACE_TREE); 631 btrfs_set_and_info(info, SPACE_CACHE, 632 "enabling disk space caching"); 633 } else if (strcmp(args[0].from, "v2") == 0) { 634 btrfs_clear_opt(info->mount_opt, 635 SPACE_CACHE); 636 btrfs_set_and_info(info, FREE_SPACE_TREE, 637 "enabling free space tree"); 638 } else { 639 btrfs_err(info, "unrecognized space_cache value %s", 640 args[0].from); 641 ret = -EINVAL; 642 goto out; 643 } 644 break; 645 case Opt_rescan_uuid_tree: 646 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); 647 break; 648 case Opt_no_space_cache: 649 /* 650 * We cannot operate without the free space tree with 651 * extent tree v2, ignore this option. 652 */ 653 if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) 654 break; 655 if (btrfs_test_opt(info, SPACE_CACHE)) { 656 btrfs_clear_and_info(info, SPACE_CACHE, 657 "disabling disk space caching"); 658 } 659 if (btrfs_test_opt(info, FREE_SPACE_TREE)) { 660 btrfs_clear_and_info(info, FREE_SPACE_TREE, 661 "disabling free space tree"); 662 } 663 break; 664 case Opt_inode_cache: 665 case Opt_noinode_cache: 666 btrfs_warn(info, 667 "the 'inode_cache' option is deprecated and has no effect since 5.11"); 668 break; 669 case Opt_clear_cache: 670 /* 671 * We cannot clear the free space tree with extent tree 672 * v2, ignore this option. 673 */ 674 if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) 675 break; 676 btrfs_set_and_info(info, CLEAR_CACHE, 677 "force clearing of disk cache"); 678 break; 679 case Opt_user_subvol_rm_allowed: 680 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 681 break; 682 case Opt_enospc_debug: 683 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); 684 break; 685 case Opt_noenospc_debug: 686 btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG); 687 break; 688 case Opt_defrag: 689 btrfs_set_and_info(info, AUTO_DEFRAG, 690 "enabling auto defrag"); 691 break; 692 case Opt_nodefrag: 693 btrfs_clear_and_info(info, AUTO_DEFRAG, 694 "disabling auto defrag"); 695 break; 696 case Opt_recovery: 697 case Opt_usebackuproot: 698 btrfs_warn(info, 699 "'%s' is deprecated, use 'rescue=usebackuproot' instead", 700 token == Opt_recovery ? "recovery" : 701 "usebackuproot"); 702 btrfs_info(info, 703 "trying to use backup root at mount time"); 704 btrfs_set_opt(info->mount_opt, USEBACKUPROOT); 705 break; 706 case Opt_skip_balance: 707 btrfs_set_opt(info->mount_opt, SKIP_BALANCE); 708 break; 709 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 710 case Opt_check_integrity_including_extent_data: 711 btrfs_info(info, 712 "enabling check integrity including extent data"); 713 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA); 714 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 715 break; 716 case Opt_check_integrity: 717 btrfs_info(info, "enabling check integrity"); 718 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 719 break; 720 case Opt_check_integrity_print_mask: 721 ret = match_int(&args[0], &intarg); 722 if (ret) { 723 btrfs_err(info, 724 "unrecognized check_integrity_print_mask value %s", 725 args[0].from); 726 goto out; 727 } 728 info->check_integrity_print_mask = intarg; 729 btrfs_info(info, "check_integrity_print_mask 0x%x", 730 info->check_integrity_print_mask); 731 break; 732 #else 733 case Opt_check_integrity_including_extent_data: 734 case Opt_check_integrity: 735 case Opt_check_integrity_print_mask: 736 btrfs_err(info, 737 "support for check_integrity* not compiled in!"); 738 ret = -EINVAL; 739 goto out; 740 #endif 741 case Opt_fatal_errors: 742 if (strcmp(args[0].from, "panic") == 0) { 743 btrfs_set_opt(info->mount_opt, 744 PANIC_ON_FATAL_ERROR); 745 } else if (strcmp(args[0].from, "bug") == 0) { 746 btrfs_clear_opt(info->mount_opt, 747 PANIC_ON_FATAL_ERROR); 748 } else { 749 btrfs_err(info, "unrecognized fatal_errors value %s", 750 args[0].from); 751 ret = -EINVAL; 752 goto out; 753 } 754 break; 755 case Opt_commit_interval: 756 intarg = 0; 757 ret = match_int(&args[0], &intarg); 758 if (ret) { 759 btrfs_err(info, "unrecognized commit_interval value %s", 760 args[0].from); 761 ret = -EINVAL; 762 goto out; 763 } 764 if (intarg == 0) { 765 btrfs_info(info, 766 "using default commit interval %us", 767 BTRFS_DEFAULT_COMMIT_INTERVAL); 768 intarg = BTRFS_DEFAULT_COMMIT_INTERVAL; 769 } else if (intarg > 300) { 770 btrfs_warn(info, "excessive commit interval %d", 771 intarg); 772 } 773 info->commit_interval = intarg; 774 break; 775 case Opt_rescue: 776 ret = parse_rescue_options(info, args[0].from); 777 if (ret < 0) { 778 btrfs_err(info, "unrecognized rescue value %s", 779 args[0].from); 780 goto out; 781 } 782 break; 783 #ifdef CONFIG_BTRFS_DEBUG 784 case Opt_fragment_all: 785 btrfs_info(info, "fragmenting all space"); 786 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); 787 btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA); 788 break; 789 case Opt_fragment_metadata: 790 btrfs_info(info, "fragmenting metadata"); 791 btrfs_set_opt(info->mount_opt, 792 FRAGMENT_METADATA); 793 break; 794 case Opt_fragment_data: 795 btrfs_info(info, "fragmenting data"); 796 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); 797 break; 798 #endif 799 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 800 case Opt_ref_verify: 801 btrfs_info(info, "doing ref verification"); 802 btrfs_set_opt(info->mount_opt, REF_VERIFY); 803 break; 804 #endif 805 case Opt_err: 806 btrfs_err(info, "unrecognized mount option '%s'", p); 807 ret = -EINVAL; 808 goto out; 809 default: 810 break; 811 } 812 } 813 check: 814 /* We're read-only, don't have to check. */ 815 if (new_flags & SB_RDONLY) 816 goto out; 817 818 if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || 819 check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") || 820 check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums")) 821 ret = -EINVAL; 822 out: 823 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && 824 !btrfs_test_opt(info, FREE_SPACE_TREE) && 825 !btrfs_test_opt(info, CLEAR_CACHE)) { 826 btrfs_err(info, "cannot disable free space tree"); 827 ret = -EINVAL; 828 829 } 830 if (!ret) 831 ret = btrfs_check_mountopts_zoned(info); 832 if (!ret && !remounting) { 833 if (btrfs_test_opt(info, SPACE_CACHE)) 834 btrfs_info(info, "disk space caching is enabled"); 835 if (btrfs_test_opt(info, FREE_SPACE_TREE)) 836 btrfs_info(info, "using free space tree"); 837 } 838 return ret; 839 } 840 841 /* 842 * Parse mount options that are required early in the mount process. 843 * 844 * All other options will be parsed on much later in the mount process and 845 * only when we need to allocate a new super block. 846 */ 847 static int btrfs_parse_device_options(const char *options, fmode_t flags, 848 void *holder) 849 { 850 substring_t args[MAX_OPT_ARGS]; 851 char *device_name, *opts, *orig, *p; 852 struct btrfs_device *device = NULL; 853 int error = 0; 854 855 lockdep_assert_held(&uuid_mutex); 856 857 if (!options) 858 return 0; 859 860 /* 861 * strsep changes the string, duplicate it because btrfs_parse_options 862 * gets called later 863 */ 864 opts = kstrdup(options, GFP_KERNEL); 865 if (!opts) 866 return -ENOMEM; 867 orig = opts; 868 869 while ((p = strsep(&opts, ",")) != NULL) { 870 int token; 871 872 if (!*p) 873 continue; 874 875 token = match_token(p, tokens, args); 876 if (token == Opt_device) { 877 device_name = match_strdup(&args[0]); 878 if (!device_name) { 879 error = -ENOMEM; 880 goto out; 881 } 882 device = btrfs_scan_one_device(device_name, flags, 883 holder); 884 kfree(device_name); 885 if (IS_ERR(device)) { 886 error = PTR_ERR(device); 887 goto out; 888 } 889 } 890 } 891 892 out: 893 kfree(orig); 894 return error; 895 } 896 897 /* 898 * Parse mount options that are related to subvolume id 899 * 900 * The value is later passed to mount_subvol() 901 */ 902 static int btrfs_parse_subvol_options(const char *options, char **subvol_name, 903 u64 *subvol_objectid) 904 { 905 substring_t args[MAX_OPT_ARGS]; 906 char *opts, *orig, *p; 907 int error = 0; 908 u64 subvolid; 909 910 if (!options) 911 return 0; 912 913 /* 914 * strsep changes the string, duplicate it because 915 * btrfs_parse_device_options gets called later 916 */ 917 opts = kstrdup(options, GFP_KERNEL); 918 if (!opts) 919 return -ENOMEM; 920 orig = opts; 921 922 while ((p = strsep(&opts, ",")) != NULL) { 923 int token; 924 if (!*p) 925 continue; 926 927 token = match_token(p, tokens, args); 928 switch (token) { 929 case Opt_subvol: 930 kfree(*subvol_name); 931 *subvol_name = match_strdup(&args[0]); 932 if (!*subvol_name) { 933 error = -ENOMEM; 934 goto out; 935 } 936 break; 937 case Opt_subvolid: 938 error = match_u64(&args[0], &subvolid); 939 if (error) 940 goto out; 941 942 /* we want the original fs_tree */ 943 if (subvolid == 0) 944 subvolid = BTRFS_FS_TREE_OBJECTID; 945 946 *subvol_objectid = subvolid; 947 break; 948 default: 949 break; 950 } 951 } 952 953 out: 954 kfree(orig); 955 return error; 956 } 957 958 char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, 959 u64 subvol_objectid) 960 { 961 struct btrfs_root *root = fs_info->tree_root; 962 struct btrfs_root *fs_root = NULL; 963 struct btrfs_root_ref *root_ref; 964 struct btrfs_inode_ref *inode_ref; 965 struct btrfs_key key; 966 struct btrfs_path *path = NULL; 967 char *name = NULL, *ptr; 968 u64 dirid; 969 int len; 970 int ret; 971 972 path = btrfs_alloc_path(); 973 if (!path) { 974 ret = -ENOMEM; 975 goto err; 976 } 977 978 name = kmalloc(PATH_MAX, GFP_KERNEL); 979 if (!name) { 980 ret = -ENOMEM; 981 goto err; 982 } 983 ptr = name + PATH_MAX - 1; 984 ptr[0] = '\0'; 985 986 /* 987 * Walk up the subvolume trees in the tree of tree roots by root 988 * backrefs until we hit the top-level subvolume. 989 */ 990 while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { 991 key.objectid = subvol_objectid; 992 key.type = BTRFS_ROOT_BACKREF_KEY; 993 key.offset = (u64)-1; 994 995 ret = btrfs_search_backwards(root, &key, path); 996 if (ret < 0) { 997 goto err; 998 } else if (ret > 0) { 999 ret = -ENOENT; 1000 goto err; 1001 } 1002 1003 subvol_objectid = key.offset; 1004 1005 root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1006 struct btrfs_root_ref); 1007 len = btrfs_root_ref_name_len(path->nodes[0], root_ref); 1008 ptr -= len + 1; 1009 if (ptr < name) { 1010 ret = -ENAMETOOLONG; 1011 goto err; 1012 } 1013 read_extent_buffer(path->nodes[0], ptr + 1, 1014 (unsigned long)(root_ref + 1), len); 1015 ptr[0] = '/'; 1016 dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); 1017 btrfs_release_path(path); 1018 1019 fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true); 1020 if (IS_ERR(fs_root)) { 1021 ret = PTR_ERR(fs_root); 1022 fs_root = NULL; 1023 goto err; 1024 } 1025 1026 /* 1027 * Walk up the filesystem tree by inode refs until we hit the 1028 * root directory. 1029 */ 1030 while (dirid != BTRFS_FIRST_FREE_OBJECTID) { 1031 key.objectid = dirid; 1032 key.type = BTRFS_INODE_REF_KEY; 1033 key.offset = (u64)-1; 1034 1035 ret = btrfs_search_backwards(fs_root, &key, path); 1036 if (ret < 0) { 1037 goto err; 1038 } else if (ret > 0) { 1039 ret = -ENOENT; 1040 goto err; 1041 } 1042 1043 dirid = key.offset; 1044 1045 inode_ref = btrfs_item_ptr(path->nodes[0], 1046 path->slots[0], 1047 struct btrfs_inode_ref); 1048 len = btrfs_inode_ref_name_len(path->nodes[0], 1049 inode_ref); 1050 ptr -= len + 1; 1051 if (ptr < name) { 1052 ret = -ENAMETOOLONG; 1053 goto err; 1054 } 1055 read_extent_buffer(path->nodes[0], ptr + 1, 1056 (unsigned long)(inode_ref + 1), len); 1057 ptr[0] = '/'; 1058 btrfs_release_path(path); 1059 } 1060 btrfs_put_root(fs_root); 1061 fs_root = NULL; 1062 } 1063 1064 btrfs_free_path(path); 1065 if (ptr == name + PATH_MAX - 1) { 1066 name[0] = '/'; 1067 name[1] = '\0'; 1068 } else { 1069 memmove(name, ptr, name + PATH_MAX - ptr); 1070 } 1071 return name; 1072 1073 err: 1074 btrfs_put_root(fs_root); 1075 btrfs_free_path(path); 1076 kfree(name); 1077 return ERR_PTR(ret); 1078 } 1079 1080 static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) 1081 { 1082 struct btrfs_root *root = fs_info->tree_root; 1083 struct btrfs_dir_item *di; 1084 struct btrfs_path *path; 1085 struct btrfs_key location; 1086 struct fscrypt_str name = FSTR_INIT("default", 7); 1087 u64 dir_id; 1088 1089 path = btrfs_alloc_path(); 1090 if (!path) 1091 return -ENOMEM; 1092 1093 /* 1094 * Find the "default" dir item which points to the root item that we 1095 * will mount by default if we haven't been given a specific subvolume 1096 * to mount. 1097 */ 1098 dir_id = btrfs_super_root_dir(fs_info->super_copy); 1099 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); 1100 if (IS_ERR(di)) { 1101 btrfs_free_path(path); 1102 return PTR_ERR(di); 1103 } 1104 if (!di) { 1105 /* 1106 * Ok the default dir item isn't there. This is weird since 1107 * it's always been there, but don't freak out, just try and 1108 * mount the top-level subvolume. 1109 */ 1110 btrfs_free_path(path); 1111 *objectid = BTRFS_FS_TREE_OBJECTID; 1112 return 0; 1113 } 1114 1115 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 1116 btrfs_free_path(path); 1117 *objectid = location.objectid; 1118 return 0; 1119 } 1120 1121 static int btrfs_fill_super(struct super_block *sb, 1122 struct btrfs_fs_devices *fs_devices, 1123 void *data) 1124 { 1125 struct inode *inode; 1126 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1127 int err; 1128 1129 sb->s_maxbytes = MAX_LFS_FILESIZE; 1130 sb->s_magic = BTRFS_SUPER_MAGIC; 1131 sb->s_op = &btrfs_super_ops; 1132 sb->s_d_op = &btrfs_dentry_operations; 1133 sb->s_export_op = &btrfs_export_ops; 1134 #ifdef CONFIG_FS_VERITY 1135 sb->s_vop = &btrfs_verityops; 1136 #endif 1137 sb->s_xattr = btrfs_xattr_handlers; 1138 sb->s_time_gran = 1; 1139 #ifdef CONFIG_BTRFS_FS_POSIX_ACL 1140 sb->s_flags |= SB_POSIXACL; 1141 #endif 1142 sb->s_flags |= SB_I_VERSION; 1143 sb->s_iflags |= SB_I_CGROUPWB; 1144 1145 err = super_setup_bdi(sb); 1146 if (err) { 1147 btrfs_err(fs_info, "super_setup_bdi failed"); 1148 return err; 1149 } 1150 1151 err = open_ctree(sb, fs_devices, (char *)data); 1152 if (err) { 1153 btrfs_err(fs_info, "open_ctree failed"); 1154 return err; 1155 } 1156 1157 inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); 1158 if (IS_ERR(inode)) { 1159 err = PTR_ERR(inode); 1160 goto fail_close; 1161 } 1162 1163 sb->s_root = d_make_root(inode); 1164 if (!sb->s_root) { 1165 err = -ENOMEM; 1166 goto fail_close; 1167 } 1168 1169 sb->s_flags |= SB_ACTIVE; 1170 return 0; 1171 1172 fail_close: 1173 close_ctree(fs_info); 1174 return err; 1175 } 1176 1177 int btrfs_sync_fs(struct super_block *sb, int wait) 1178 { 1179 struct btrfs_trans_handle *trans; 1180 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1181 struct btrfs_root *root = fs_info->tree_root; 1182 1183 trace_btrfs_sync_fs(fs_info, wait); 1184 1185 if (!wait) { 1186 filemap_flush(fs_info->btree_inode->i_mapping); 1187 return 0; 1188 } 1189 1190 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); 1191 1192 trans = btrfs_attach_transaction_barrier(root); 1193 if (IS_ERR(trans)) { 1194 /* no transaction, don't bother */ 1195 if (PTR_ERR(trans) == -ENOENT) { 1196 /* 1197 * Exit unless we have some pending changes 1198 * that need to go through commit 1199 */ 1200 if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT, 1201 &fs_info->flags)) 1202 return 0; 1203 /* 1204 * A non-blocking test if the fs is frozen. We must not 1205 * start a new transaction here otherwise a deadlock 1206 * happens. The pending operations are delayed to the 1207 * next commit after thawing. 1208 */ 1209 if (sb_start_write_trylock(sb)) 1210 sb_end_write(sb); 1211 else 1212 return 0; 1213 trans = btrfs_start_transaction(root, 0); 1214 } 1215 if (IS_ERR(trans)) 1216 return PTR_ERR(trans); 1217 } 1218 return btrfs_commit_transaction(trans); 1219 } 1220 1221 static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed) 1222 { 1223 seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s); 1224 *printed = true; 1225 } 1226 1227 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) 1228 { 1229 struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); 1230 const char *compress_type; 1231 const char *subvol_name; 1232 bool printed = false; 1233 1234 if (btrfs_test_opt(info, DEGRADED)) 1235 seq_puts(seq, ",degraded"); 1236 if (btrfs_test_opt(info, NODATASUM)) 1237 seq_puts(seq, ",nodatasum"); 1238 if (btrfs_test_opt(info, NODATACOW)) 1239 seq_puts(seq, ",nodatacow"); 1240 if (btrfs_test_opt(info, NOBARRIER)) 1241 seq_puts(seq, ",nobarrier"); 1242 if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) 1243 seq_printf(seq, ",max_inline=%llu", info->max_inline); 1244 if (info->thread_pool_size != min_t(unsigned long, 1245 num_online_cpus() + 2, 8)) 1246 seq_printf(seq, ",thread_pool=%u", info->thread_pool_size); 1247 if (btrfs_test_opt(info, COMPRESS)) { 1248 compress_type = btrfs_compress_type2str(info->compress_type); 1249 if (btrfs_test_opt(info, FORCE_COMPRESS)) 1250 seq_printf(seq, ",compress-force=%s", compress_type); 1251 else 1252 seq_printf(seq, ",compress=%s", compress_type); 1253 if (info->compress_level) 1254 seq_printf(seq, ":%d", info->compress_level); 1255 } 1256 if (btrfs_test_opt(info, NOSSD)) 1257 seq_puts(seq, ",nossd"); 1258 if (btrfs_test_opt(info, SSD_SPREAD)) 1259 seq_puts(seq, ",ssd_spread"); 1260 else if (btrfs_test_opt(info, SSD)) 1261 seq_puts(seq, ",ssd"); 1262 if (btrfs_test_opt(info, NOTREELOG)) 1263 seq_puts(seq, ",notreelog"); 1264 if (btrfs_test_opt(info, NOLOGREPLAY)) 1265 print_rescue_option(seq, "nologreplay", &printed); 1266 if (btrfs_test_opt(info, USEBACKUPROOT)) 1267 print_rescue_option(seq, "usebackuproot", &printed); 1268 if (btrfs_test_opt(info, IGNOREBADROOTS)) 1269 print_rescue_option(seq, "ignorebadroots", &printed); 1270 if (btrfs_test_opt(info, IGNOREDATACSUMS)) 1271 print_rescue_option(seq, "ignoredatacsums", &printed); 1272 if (btrfs_test_opt(info, FLUSHONCOMMIT)) 1273 seq_puts(seq, ",flushoncommit"); 1274 if (btrfs_test_opt(info, DISCARD_SYNC)) 1275 seq_puts(seq, ",discard"); 1276 if (btrfs_test_opt(info, DISCARD_ASYNC)) 1277 seq_puts(seq, ",discard=async"); 1278 if (!(info->sb->s_flags & SB_POSIXACL)) 1279 seq_puts(seq, ",noacl"); 1280 if (btrfs_free_space_cache_v1_active(info)) 1281 seq_puts(seq, ",space_cache"); 1282 else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) 1283 seq_puts(seq, ",space_cache=v2"); 1284 else 1285 seq_puts(seq, ",nospace_cache"); 1286 if (btrfs_test_opt(info, RESCAN_UUID_TREE)) 1287 seq_puts(seq, ",rescan_uuid_tree"); 1288 if (btrfs_test_opt(info, CLEAR_CACHE)) 1289 seq_puts(seq, ",clear_cache"); 1290 if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED)) 1291 seq_puts(seq, ",user_subvol_rm_allowed"); 1292 if (btrfs_test_opt(info, ENOSPC_DEBUG)) 1293 seq_puts(seq, ",enospc_debug"); 1294 if (btrfs_test_opt(info, AUTO_DEFRAG)) 1295 seq_puts(seq, ",autodefrag"); 1296 if (btrfs_test_opt(info, SKIP_BALANCE)) 1297 seq_puts(seq, ",skip_balance"); 1298 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1299 if (btrfs_test_opt(info, CHECK_INTEGRITY_DATA)) 1300 seq_puts(seq, ",check_int_data"); 1301 else if (btrfs_test_opt(info, CHECK_INTEGRITY)) 1302 seq_puts(seq, ",check_int"); 1303 if (info->check_integrity_print_mask) 1304 seq_printf(seq, ",check_int_print_mask=%d", 1305 info->check_integrity_print_mask); 1306 #endif 1307 if (info->metadata_ratio) 1308 seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio); 1309 if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR)) 1310 seq_puts(seq, ",fatal_errors=panic"); 1311 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) 1312 seq_printf(seq, ",commit=%u", info->commit_interval); 1313 #ifdef CONFIG_BTRFS_DEBUG 1314 if (btrfs_test_opt(info, FRAGMENT_DATA)) 1315 seq_puts(seq, ",fragment=data"); 1316 if (btrfs_test_opt(info, FRAGMENT_METADATA)) 1317 seq_puts(seq, ",fragment=metadata"); 1318 #endif 1319 if (btrfs_test_opt(info, REF_VERIFY)) 1320 seq_puts(seq, ",ref_verify"); 1321 seq_printf(seq, ",subvolid=%llu", 1322 BTRFS_I(d_inode(dentry))->root->root_key.objectid); 1323 subvol_name = btrfs_get_subvol_name_from_objectid(info, 1324 BTRFS_I(d_inode(dentry))->root->root_key.objectid); 1325 if (!IS_ERR(subvol_name)) { 1326 seq_puts(seq, ",subvol="); 1327 seq_escape(seq, subvol_name, " \t\n\\"); 1328 kfree(subvol_name); 1329 } 1330 return 0; 1331 } 1332 1333 static int btrfs_test_super(struct super_block *s, void *data) 1334 { 1335 struct btrfs_fs_info *p = data; 1336 struct btrfs_fs_info *fs_info = btrfs_sb(s); 1337 1338 return fs_info->fs_devices == p->fs_devices; 1339 } 1340 1341 static int btrfs_set_super(struct super_block *s, void *data) 1342 { 1343 int err = set_anon_super(s, data); 1344 if (!err) 1345 s->s_fs_info = data; 1346 return err; 1347 } 1348 1349 /* 1350 * subvolumes are identified by ino 256 1351 */ 1352 static inline int is_subvolume_inode(struct inode *inode) 1353 { 1354 if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 1355 return 1; 1356 return 0; 1357 } 1358 1359 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, 1360 struct vfsmount *mnt) 1361 { 1362 struct dentry *root; 1363 int ret; 1364 1365 if (!subvol_name) { 1366 if (!subvol_objectid) { 1367 ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), 1368 &subvol_objectid); 1369 if (ret) { 1370 root = ERR_PTR(ret); 1371 goto out; 1372 } 1373 } 1374 subvol_name = btrfs_get_subvol_name_from_objectid( 1375 btrfs_sb(mnt->mnt_sb), subvol_objectid); 1376 if (IS_ERR(subvol_name)) { 1377 root = ERR_CAST(subvol_name); 1378 subvol_name = NULL; 1379 goto out; 1380 } 1381 1382 } 1383 1384 root = mount_subtree(mnt, subvol_name); 1385 /* mount_subtree() drops our reference on the vfsmount. */ 1386 mnt = NULL; 1387 1388 if (!IS_ERR(root)) { 1389 struct super_block *s = root->d_sb; 1390 struct btrfs_fs_info *fs_info = btrfs_sb(s); 1391 struct inode *root_inode = d_inode(root); 1392 u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid; 1393 1394 ret = 0; 1395 if (!is_subvolume_inode(root_inode)) { 1396 btrfs_err(fs_info, "'%s' is not a valid subvolume", 1397 subvol_name); 1398 ret = -EINVAL; 1399 } 1400 if (subvol_objectid && root_objectid != subvol_objectid) { 1401 /* 1402 * This will also catch a race condition where a 1403 * subvolume which was passed by ID is renamed and 1404 * another subvolume is renamed over the old location. 1405 */ 1406 btrfs_err(fs_info, 1407 "subvol '%s' does not match subvolid %llu", 1408 subvol_name, subvol_objectid); 1409 ret = -EINVAL; 1410 } 1411 if (ret) { 1412 dput(root); 1413 root = ERR_PTR(ret); 1414 deactivate_locked_super(s); 1415 } 1416 } 1417 1418 out: 1419 mntput(mnt); 1420 kfree(subvol_name); 1421 return root; 1422 } 1423 1424 /* 1425 * Find a superblock for the given device / mount point. 1426 * 1427 * Note: This is based on mount_bdev from fs/super.c with a few additions 1428 * for multiple device setup. Make sure to keep it in sync. 1429 */ 1430 static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, 1431 int flags, const char *device_name, void *data) 1432 { 1433 struct block_device *bdev = NULL; 1434 struct super_block *s; 1435 struct btrfs_device *device = NULL; 1436 struct btrfs_fs_devices *fs_devices = NULL; 1437 struct btrfs_fs_info *fs_info = NULL; 1438 void *new_sec_opts = NULL; 1439 fmode_t mode = FMODE_READ; 1440 int error = 0; 1441 1442 if (!(flags & SB_RDONLY)) 1443 mode |= FMODE_WRITE; 1444 1445 if (data) { 1446 error = security_sb_eat_lsm_opts(data, &new_sec_opts); 1447 if (error) 1448 return ERR_PTR(error); 1449 } 1450 1451 /* 1452 * Setup a dummy root and fs_info for test/set super. This is because 1453 * we don't actually fill this stuff out until open_ctree, but we need 1454 * then open_ctree will properly initialize the file system specific 1455 * settings later. btrfs_init_fs_info initializes the static elements 1456 * of the fs_info (locks and such) to make cleanup easier if we find a 1457 * superblock with our given fs_devices later on at sget() time. 1458 */ 1459 fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); 1460 if (!fs_info) { 1461 error = -ENOMEM; 1462 goto error_sec_opts; 1463 } 1464 btrfs_init_fs_info(fs_info); 1465 1466 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1467 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1468 if (!fs_info->super_copy || !fs_info->super_for_commit) { 1469 error = -ENOMEM; 1470 goto error_fs_info; 1471 } 1472 1473 mutex_lock(&uuid_mutex); 1474 error = btrfs_parse_device_options(data, mode, fs_type); 1475 if (error) { 1476 mutex_unlock(&uuid_mutex); 1477 goto error_fs_info; 1478 } 1479 1480 device = btrfs_scan_one_device(device_name, mode, fs_type); 1481 if (IS_ERR(device)) { 1482 mutex_unlock(&uuid_mutex); 1483 error = PTR_ERR(device); 1484 goto error_fs_info; 1485 } 1486 1487 fs_devices = device->fs_devices; 1488 fs_info->fs_devices = fs_devices; 1489 1490 error = btrfs_open_devices(fs_devices, mode, fs_type); 1491 mutex_unlock(&uuid_mutex); 1492 if (error) 1493 goto error_fs_info; 1494 1495 if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { 1496 error = -EACCES; 1497 goto error_close_devices; 1498 } 1499 1500 bdev = fs_devices->latest_dev->bdev; 1501 s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, 1502 fs_info); 1503 if (IS_ERR(s)) { 1504 error = PTR_ERR(s); 1505 goto error_close_devices; 1506 } 1507 1508 if (s->s_root) { 1509 btrfs_close_devices(fs_devices); 1510 btrfs_free_fs_info(fs_info); 1511 if ((flags ^ s->s_flags) & SB_RDONLY) 1512 error = -EBUSY; 1513 } else { 1514 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); 1515 shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, 1516 s->s_id); 1517 btrfs_sb(s)->bdev_holder = fs_type; 1518 if (!strstr(crc32c_impl(), "generic")) 1519 set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); 1520 error = btrfs_fill_super(s, fs_devices, data); 1521 } 1522 if (!error) 1523 error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL); 1524 security_free_mnt_opts(&new_sec_opts); 1525 if (error) { 1526 deactivate_locked_super(s); 1527 return ERR_PTR(error); 1528 } 1529 1530 return dget(s->s_root); 1531 1532 error_close_devices: 1533 btrfs_close_devices(fs_devices); 1534 error_fs_info: 1535 btrfs_free_fs_info(fs_info); 1536 error_sec_opts: 1537 security_free_mnt_opts(&new_sec_opts); 1538 return ERR_PTR(error); 1539 } 1540 1541 /* 1542 * Mount function which is called by VFS layer. 1543 * 1544 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree() 1545 * which needs vfsmount* of device's root (/). This means device's root has to 1546 * be mounted internally in any case. 1547 * 1548 * Operation flow: 1549 * 1. Parse subvol id related options for later use in mount_subvol(). 1550 * 1551 * 2. Mount device's root (/) by calling vfs_kern_mount(). 1552 * 1553 * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the 1554 * first place. In order to avoid calling btrfs_mount() again, we use 1555 * different file_system_type which is not registered to VFS by 1556 * register_filesystem() (btrfs_root_fs_type). As a result, 1557 * btrfs_mount_root() is called. The return value will be used by 1558 * mount_subtree() in mount_subvol(). 1559 * 1560 * 3. Call mount_subvol() to get the dentry of subvolume. Since there is 1561 * "btrfs subvolume set-default", mount_subvol() is called always. 1562 */ 1563 static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, 1564 const char *device_name, void *data) 1565 { 1566 struct vfsmount *mnt_root; 1567 struct dentry *root; 1568 char *subvol_name = NULL; 1569 u64 subvol_objectid = 0; 1570 int error = 0; 1571 1572 error = btrfs_parse_subvol_options(data, &subvol_name, 1573 &subvol_objectid); 1574 if (error) { 1575 kfree(subvol_name); 1576 return ERR_PTR(error); 1577 } 1578 1579 /* mount device's root (/) */ 1580 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); 1581 if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { 1582 if (flags & SB_RDONLY) { 1583 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, 1584 flags & ~SB_RDONLY, device_name, data); 1585 } else { 1586 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, 1587 flags | SB_RDONLY, device_name, data); 1588 if (IS_ERR(mnt_root)) { 1589 root = ERR_CAST(mnt_root); 1590 kfree(subvol_name); 1591 goto out; 1592 } 1593 1594 down_write(&mnt_root->mnt_sb->s_umount); 1595 error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); 1596 up_write(&mnt_root->mnt_sb->s_umount); 1597 if (error < 0) { 1598 root = ERR_PTR(error); 1599 mntput(mnt_root); 1600 kfree(subvol_name); 1601 goto out; 1602 } 1603 } 1604 } 1605 if (IS_ERR(mnt_root)) { 1606 root = ERR_CAST(mnt_root); 1607 kfree(subvol_name); 1608 goto out; 1609 } 1610 1611 /* mount_subvol() will free subvol_name and mnt_root */ 1612 root = mount_subvol(subvol_name, subvol_objectid, mnt_root); 1613 1614 out: 1615 return root; 1616 } 1617 1618 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, 1619 u32 new_pool_size, u32 old_pool_size) 1620 { 1621 if (new_pool_size == old_pool_size) 1622 return; 1623 1624 fs_info->thread_pool_size = new_pool_size; 1625 1626 btrfs_info(fs_info, "resize thread pool %d -> %d", 1627 old_pool_size, new_pool_size); 1628 1629 btrfs_workqueue_set_max(fs_info->workers, new_pool_size); 1630 btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); 1631 btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); 1632 btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); 1633 btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); 1634 btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); 1635 btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); 1636 } 1637 1638 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, 1639 unsigned long old_opts, int flags) 1640 { 1641 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && 1642 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || 1643 (flags & SB_RDONLY))) { 1644 /* wait for any defraggers to finish */ 1645 wait_event(fs_info->transaction_wait, 1646 (atomic_read(&fs_info->defrag_running) == 0)); 1647 if (flags & SB_RDONLY) 1648 sync_filesystem(fs_info->sb); 1649 } 1650 } 1651 1652 static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, 1653 unsigned long old_opts) 1654 { 1655 const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); 1656 1657 /* 1658 * We need to cleanup all defragable inodes if the autodefragment is 1659 * close or the filesystem is read only. 1660 */ 1661 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && 1662 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) { 1663 btrfs_cleanup_defrag_inodes(fs_info); 1664 } 1665 1666 /* If we toggled discard async */ 1667 if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && 1668 btrfs_test_opt(fs_info, DISCARD_ASYNC)) 1669 btrfs_discard_resume(fs_info); 1670 else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && 1671 !btrfs_test_opt(fs_info, DISCARD_ASYNC)) 1672 btrfs_discard_cleanup(fs_info); 1673 1674 /* If we toggled space cache */ 1675 if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) 1676 btrfs_set_free_space_cache_v1_active(fs_info, cache_opt); 1677 } 1678 1679 static int btrfs_remount(struct super_block *sb, int *flags, char *data) 1680 { 1681 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1682 unsigned old_flags = sb->s_flags; 1683 unsigned long old_opts = fs_info->mount_opt; 1684 unsigned long old_compress_type = fs_info->compress_type; 1685 u64 old_max_inline = fs_info->max_inline; 1686 u32 old_thread_pool_size = fs_info->thread_pool_size; 1687 u32 old_metadata_ratio = fs_info->metadata_ratio; 1688 int ret; 1689 1690 sync_filesystem(sb); 1691 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1692 1693 if (data) { 1694 void *new_sec_opts = NULL; 1695 1696 ret = security_sb_eat_lsm_opts(data, &new_sec_opts); 1697 if (!ret) 1698 ret = security_sb_remount(sb, new_sec_opts); 1699 security_free_mnt_opts(&new_sec_opts); 1700 if (ret) 1701 goto restore; 1702 } 1703 1704 ret = btrfs_parse_options(fs_info, data, *flags); 1705 if (ret) 1706 goto restore; 1707 1708 ret = btrfs_check_features(fs_info, sb); 1709 if (ret < 0) 1710 goto restore; 1711 1712 btrfs_remount_begin(fs_info, old_opts, *flags); 1713 btrfs_resize_thread_pool(fs_info, 1714 fs_info->thread_pool_size, old_thread_pool_size); 1715 1716 if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != 1717 (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && 1718 (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { 1719 btrfs_warn(fs_info, 1720 "remount supports changing free space tree only from ro to rw"); 1721 /* Make sure free space cache options match the state on disk */ 1722 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { 1723 btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); 1724 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); 1725 } 1726 if (btrfs_free_space_cache_v1_active(fs_info)) { 1727 btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE); 1728 btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE); 1729 } 1730 } 1731 1732 if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) 1733 goto out; 1734 1735 if (*flags & SB_RDONLY) { 1736 /* 1737 * this also happens on 'umount -rf' or on shutdown, when 1738 * the filesystem is busy. 1739 */ 1740 cancel_work_sync(&fs_info->async_reclaim_work); 1741 cancel_work_sync(&fs_info->async_data_reclaim_work); 1742 1743 btrfs_discard_cleanup(fs_info); 1744 1745 /* wait for the uuid_scan task to finish */ 1746 down(&fs_info->uuid_tree_rescan_sem); 1747 /* avoid complains from lockdep et al. */ 1748 up(&fs_info->uuid_tree_rescan_sem); 1749 1750 btrfs_set_sb_rdonly(sb); 1751 1752 /* 1753 * Setting SB_RDONLY will put the cleaner thread to 1754 * sleep at the next loop if it's already active. 1755 * If it's already asleep, we'll leave unused block 1756 * groups on disk until we're mounted read-write again 1757 * unless we clean them up here. 1758 */ 1759 btrfs_delete_unused_bgs(fs_info); 1760 1761 /* 1762 * The cleaner task could be already running before we set the 1763 * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). 1764 * We must make sure that after we finish the remount, i.e. after 1765 * we call btrfs_commit_super(), the cleaner can no longer start 1766 * a transaction - either because it was dropping a dead root, 1767 * running delayed iputs or deleting an unused block group (the 1768 * cleaner picked a block group from the list of unused block 1769 * groups before we were able to in the previous call to 1770 * btrfs_delete_unused_bgs()). 1771 */ 1772 wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, 1773 TASK_UNINTERRUPTIBLE); 1774 1775 /* 1776 * We've set the superblock to RO mode, so we might have made 1777 * the cleaner task sleep without running all pending delayed 1778 * iputs. Go through all the delayed iputs here, so that if an 1779 * unmount happens without remounting RW we don't end up at 1780 * finishing close_ctree() with a non-empty list of delayed 1781 * iputs. 1782 */ 1783 btrfs_run_delayed_iputs(fs_info); 1784 1785 btrfs_dev_replace_suspend_for_unmount(fs_info); 1786 btrfs_scrub_cancel(fs_info); 1787 btrfs_pause_balance(fs_info); 1788 1789 /* 1790 * Pause the qgroup rescan worker if it is running. We don't want 1791 * it to be still running after we are in RO mode, as after that, 1792 * by the time we unmount, it might have left a transaction open, 1793 * so we would leak the transaction and/or crash. 1794 */ 1795 btrfs_qgroup_wait_for_completion(fs_info, false); 1796 1797 ret = btrfs_commit_super(fs_info); 1798 if (ret) 1799 goto restore; 1800 } else { 1801 if (BTRFS_FS_ERROR(fs_info)) { 1802 btrfs_err(fs_info, 1803 "Remounting read-write after error is not allowed"); 1804 ret = -EINVAL; 1805 goto restore; 1806 } 1807 if (fs_info->fs_devices->rw_devices == 0) { 1808 ret = -EACCES; 1809 goto restore; 1810 } 1811 1812 if (!btrfs_check_rw_degradable(fs_info, NULL)) { 1813 btrfs_warn(fs_info, 1814 "too many missing devices, writable remount is not allowed"); 1815 ret = -EACCES; 1816 goto restore; 1817 } 1818 1819 if (btrfs_super_log_root(fs_info->super_copy) != 0) { 1820 btrfs_warn(fs_info, 1821 "mount required to replay tree-log, cannot remount read-write"); 1822 ret = -EINVAL; 1823 goto restore; 1824 } 1825 1826 /* 1827 * NOTE: when remounting with a change that does writes, don't 1828 * put it anywhere above this point, as we are not sure to be 1829 * safe to write until we pass the above checks. 1830 */ 1831 ret = btrfs_start_pre_rw_mount(fs_info); 1832 if (ret) 1833 goto restore; 1834 1835 btrfs_clear_sb_rdonly(sb); 1836 1837 set_bit(BTRFS_FS_OPEN, &fs_info->flags); 1838 } 1839 out: 1840 /* 1841 * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, 1842 * since the absence of the flag means it can be toggled off by remount. 1843 */ 1844 *flags |= SB_I_VERSION; 1845 1846 wake_up_process(fs_info->transaction_kthread); 1847 btrfs_remount_cleanup(fs_info, old_opts); 1848 btrfs_clear_oneshot_options(fs_info); 1849 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1850 1851 return 0; 1852 1853 restore: 1854 /* We've hit an error - don't reset SB_RDONLY */ 1855 if (sb_rdonly(sb)) 1856 old_flags |= SB_RDONLY; 1857 if (!(old_flags & SB_RDONLY)) 1858 clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); 1859 sb->s_flags = old_flags; 1860 fs_info->mount_opt = old_opts; 1861 fs_info->compress_type = old_compress_type; 1862 fs_info->max_inline = old_max_inline; 1863 btrfs_resize_thread_pool(fs_info, 1864 old_thread_pool_size, fs_info->thread_pool_size); 1865 fs_info->metadata_ratio = old_metadata_ratio; 1866 btrfs_remount_cleanup(fs_info, old_opts); 1867 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1868 1869 return ret; 1870 } 1871 1872 /* Used to sort the devices by max_avail(descending sort) */ 1873 static int btrfs_cmp_device_free_bytes(const void *a, const void *b) 1874 { 1875 const struct btrfs_device_info *dev_info1 = a; 1876 const struct btrfs_device_info *dev_info2 = b; 1877 1878 if (dev_info1->max_avail > dev_info2->max_avail) 1879 return -1; 1880 else if (dev_info1->max_avail < dev_info2->max_avail) 1881 return 1; 1882 return 0; 1883 } 1884 1885 /* 1886 * sort the devices by max_avail, in which max free extent size of each device 1887 * is stored.(Descending Sort) 1888 */ 1889 static inline void btrfs_descending_sort_devices( 1890 struct btrfs_device_info *devices, 1891 size_t nr_devices) 1892 { 1893 sort(devices, nr_devices, sizeof(struct btrfs_device_info), 1894 btrfs_cmp_device_free_bytes, NULL); 1895 } 1896 1897 /* 1898 * The helper to calc the free space on the devices that can be used to store 1899 * file data. 1900 */ 1901 static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, 1902 u64 *free_bytes) 1903 { 1904 struct btrfs_device_info *devices_info; 1905 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 1906 struct btrfs_device *device; 1907 u64 type; 1908 u64 avail_space; 1909 u64 min_stripe_size; 1910 int num_stripes = 1; 1911 int i = 0, nr_devices; 1912 const struct btrfs_raid_attr *rattr; 1913 1914 /* 1915 * We aren't under the device list lock, so this is racy-ish, but good 1916 * enough for our purposes. 1917 */ 1918 nr_devices = fs_info->fs_devices->open_devices; 1919 if (!nr_devices) { 1920 smp_mb(); 1921 nr_devices = fs_info->fs_devices->open_devices; 1922 ASSERT(nr_devices); 1923 if (!nr_devices) { 1924 *free_bytes = 0; 1925 return 0; 1926 } 1927 } 1928 1929 devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), 1930 GFP_KERNEL); 1931 if (!devices_info) 1932 return -ENOMEM; 1933 1934 /* calc min stripe number for data space allocation */ 1935 type = btrfs_data_alloc_profile(fs_info); 1936 rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)]; 1937 1938 if (type & BTRFS_BLOCK_GROUP_RAID0) 1939 num_stripes = nr_devices; 1940 else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK) 1941 num_stripes = rattr->ncopies; 1942 else if (type & BTRFS_BLOCK_GROUP_RAID10) 1943 num_stripes = 4; 1944 1945 /* Adjust for more than 1 stripe per device */ 1946 min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN; 1947 1948 rcu_read_lock(); 1949 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { 1950 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, 1951 &device->dev_state) || 1952 !device->bdev || 1953 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) 1954 continue; 1955 1956 if (i >= nr_devices) 1957 break; 1958 1959 avail_space = device->total_bytes - device->bytes_used; 1960 1961 /* align with stripe_len */ 1962 avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN); 1963 1964 /* 1965 * Ensure we have at least min_stripe_size on top of the 1966 * reserved space on the device. 1967 */ 1968 if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size) 1969 continue; 1970 1971 avail_space -= BTRFS_DEVICE_RANGE_RESERVED; 1972 1973 devices_info[i].dev = device; 1974 devices_info[i].max_avail = avail_space; 1975 1976 i++; 1977 } 1978 rcu_read_unlock(); 1979 1980 nr_devices = i; 1981 1982 btrfs_descending_sort_devices(devices_info, nr_devices); 1983 1984 i = nr_devices - 1; 1985 avail_space = 0; 1986 while (nr_devices >= rattr->devs_min) { 1987 num_stripes = min(num_stripes, nr_devices); 1988 1989 if (devices_info[i].max_avail >= min_stripe_size) { 1990 int j; 1991 u64 alloc_size; 1992 1993 avail_space += devices_info[i].max_avail * num_stripes; 1994 alloc_size = devices_info[i].max_avail; 1995 for (j = i + 1 - num_stripes; j <= i; j++) 1996 devices_info[j].max_avail -= alloc_size; 1997 } 1998 i--; 1999 nr_devices--; 2000 } 2001 2002 kfree(devices_info); 2003 *free_bytes = avail_space; 2004 return 0; 2005 } 2006 2007 /* 2008 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. 2009 * 2010 * If there's a redundant raid level at DATA block groups, use the respective 2011 * multiplier to scale the sizes. 2012 * 2013 * Unused device space usage is based on simulating the chunk allocator 2014 * algorithm that respects the device sizes and order of allocations. This is 2015 * a close approximation of the actual use but there are other factors that may 2016 * change the result (like a new metadata chunk). 2017 * 2018 * If metadata is exhausted, f_bavail will be 0. 2019 */ 2020 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 2021 { 2022 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); 2023 struct btrfs_super_block *disk_super = fs_info->super_copy; 2024 struct btrfs_space_info *found; 2025 u64 total_used = 0; 2026 u64 total_free_data = 0; 2027 u64 total_free_meta = 0; 2028 u32 bits = fs_info->sectorsize_bits; 2029 __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid; 2030 unsigned factor = 1; 2031 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 2032 int ret; 2033 u64 thresh = 0; 2034 int mixed = 0; 2035 2036 list_for_each_entry(found, &fs_info->space_info, list) { 2037 if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 2038 int i; 2039 2040 total_free_data += found->disk_total - found->disk_used; 2041 total_free_data -= 2042 btrfs_account_ro_block_groups_free_space(found); 2043 2044 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 2045 if (!list_empty(&found->block_groups[i])) 2046 factor = btrfs_bg_type_to_factor( 2047 btrfs_raid_array[i].bg_flag); 2048 } 2049 } 2050 2051 /* 2052 * Metadata in mixed block goup profiles are accounted in data 2053 */ 2054 if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) { 2055 if (found->flags & BTRFS_BLOCK_GROUP_DATA) 2056 mixed = 1; 2057 else 2058 total_free_meta += found->disk_total - 2059 found->disk_used; 2060 } 2061 2062 total_used += found->disk_used; 2063 } 2064 2065 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); 2066 buf->f_blocks >>= bits; 2067 buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits); 2068 2069 /* Account global block reserve as used, it's in logical size already */ 2070 spin_lock(&block_rsv->lock); 2071 /* Mixed block groups accounting is not byte-accurate, avoid overflow */ 2072 if (buf->f_bfree >= block_rsv->size >> bits) 2073 buf->f_bfree -= block_rsv->size >> bits; 2074 else 2075 buf->f_bfree = 0; 2076 spin_unlock(&block_rsv->lock); 2077 2078 buf->f_bavail = div_u64(total_free_data, factor); 2079 ret = btrfs_calc_avail_data_space(fs_info, &total_free_data); 2080 if (ret) 2081 return ret; 2082 buf->f_bavail += div_u64(total_free_data, factor); 2083 buf->f_bavail = buf->f_bavail >> bits; 2084 2085 /* 2086 * We calculate the remaining metadata space minus global reserve. If 2087 * this is (supposedly) smaller than zero, there's no space. But this 2088 * does not hold in practice, the exhausted state happens where's still 2089 * some positive delta. So we apply some guesswork and compare the 2090 * delta to a 4M threshold. (Practically observed delta was ~2M.) 2091 * 2092 * We probably cannot calculate the exact threshold value because this 2093 * depends on the internal reservations requested by various 2094 * operations, so some operations that consume a few metadata will 2095 * succeed even if the Avail is zero. But this is better than the other 2096 * way around. 2097 */ 2098 thresh = SZ_4M; 2099 2100 /* 2101 * We only want to claim there's no available space if we can no longer 2102 * allocate chunks for our metadata profile and our global reserve will 2103 * not fit in the free metadata space. If we aren't ->full then we 2104 * still can allocate chunks and thus are fine using the currently 2105 * calculated f_bavail. 2106 */ 2107 if (!mixed && block_rsv->space_info->full && 2108 total_free_meta - thresh < block_rsv->size) 2109 buf->f_bavail = 0; 2110 2111 buf->f_type = BTRFS_SUPER_MAGIC; 2112 buf->f_bsize = dentry->d_sb->s_blocksize; 2113 buf->f_namelen = BTRFS_NAME_LEN; 2114 2115 /* We treat it as constant endianness (it doesn't matter _which_) 2116 because we want the fsid to come out the same whether mounted 2117 on a big-endian or little-endian host */ 2118 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); 2119 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); 2120 /* Mask in the root object ID too, to disambiguate subvols */ 2121 buf->f_fsid.val[0] ^= 2122 BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32; 2123 buf->f_fsid.val[1] ^= 2124 BTRFS_I(d_inode(dentry))->root->root_key.objectid; 2125 2126 return 0; 2127 } 2128 2129 static void btrfs_kill_super(struct super_block *sb) 2130 { 2131 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2132 kill_anon_super(sb); 2133 btrfs_free_fs_info(fs_info); 2134 } 2135 2136 static struct file_system_type btrfs_fs_type = { 2137 .owner = THIS_MODULE, 2138 .name = "btrfs", 2139 .mount = btrfs_mount, 2140 .kill_sb = btrfs_kill_super, 2141 .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, 2142 }; 2143 2144 static struct file_system_type btrfs_root_fs_type = { 2145 .owner = THIS_MODULE, 2146 .name = "btrfs", 2147 .mount = btrfs_mount_root, 2148 .kill_sb = btrfs_kill_super, 2149 .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, 2150 }; 2151 2152 MODULE_ALIAS_FS("btrfs"); 2153 2154 static int btrfs_control_open(struct inode *inode, struct file *file) 2155 { 2156 /* 2157 * The control file's private_data is used to hold the 2158 * transaction when it is started and is used to keep 2159 * track of whether a transaction is already in progress. 2160 */ 2161 file->private_data = NULL; 2162 return 0; 2163 } 2164 2165 /* 2166 * Used by /dev/btrfs-control for devices ioctls. 2167 */ 2168 static long btrfs_control_ioctl(struct file *file, unsigned int cmd, 2169 unsigned long arg) 2170 { 2171 struct btrfs_ioctl_vol_args *vol; 2172 struct btrfs_device *device = NULL; 2173 dev_t devt = 0; 2174 int ret = -ENOTTY; 2175 2176 if (!capable(CAP_SYS_ADMIN)) 2177 return -EPERM; 2178 2179 vol = memdup_user((void __user *)arg, sizeof(*vol)); 2180 if (IS_ERR(vol)) 2181 return PTR_ERR(vol); 2182 vol->name[BTRFS_PATH_NAME_MAX] = '\0'; 2183 2184 switch (cmd) { 2185 case BTRFS_IOC_SCAN_DEV: 2186 mutex_lock(&uuid_mutex); 2187 device = btrfs_scan_one_device(vol->name, FMODE_READ, 2188 &btrfs_root_fs_type); 2189 ret = PTR_ERR_OR_ZERO(device); 2190 mutex_unlock(&uuid_mutex); 2191 break; 2192 case BTRFS_IOC_FORGET_DEV: 2193 if (vol->name[0] != 0) { 2194 ret = lookup_bdev(vol->name, &devt); 2195 if (ret) 2196 break; 2197 } 2198 ret = btrfs_forget_devices(devt); 2199 break; 2200 case BTRFS_IOC_DEVICES_READY: 2201 mutex_lock(&uuid_mutex); 2202 device = btrfs_scan_one_device(vol->name, FMODE_READ, 2203 &btrfs_root_fs_type); 2204 if (IS_ERR(device)) { 2205 mutex_unlock(&uuid_mutex); 2206 ret = PTR_ERR(device); 2207 break; 2208 } 2209 ret = !(device->fs_devices->num_devices == 2210 device->fs_devices->total_devices); 2211 mutex_unlock(&uuid_mutex); 2212 break; 2213 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 2214 ret = btrfs_ioctl_get_supported_features((void __user*)arg); 2215 break; 2216 } 2217 2218 kfree(vol); 2219 return ret; 2220 } 2221 2222 static int btrfs_freeze(struct super_block *sb) 2223 { 2224 struct btrfs_trans_handle *trans; 2225 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2226 struct btrfs_root *root = fs_info->tree_root; 2227 2228 set_bit(BTRFS_FS_FROZEN, &fs_info->flags); 2229 /* 2230 * We don't need a barrier here, we'll wait for any transaction that 2231 * could be in progress on other threads (and do delayed iputs that 2232 * we want to avoid on a frozen filesystem), or do the commit 2233 * ourselves. 2234 */ 2235 trans = btrfs_attach_transaction_barrier(root); 2236 if (IS_ERR(trans)) { 2237 /* no transaction, don't bother */ 2238 if (PTR_ERR(trans) == -ENOENT) 2239 return 0; 2240 return PTR_ERR(trans); 2241 } 2242 return btrfs_commit_transaction(trans); 2243 } 2244 2245 static int check_dev_super(struct btrfs_device *dev) 2246 { 2247 struct btrfs_fs_info *fs_info = dev->fs_info; 2248 struct btrfs_super_block *sb; 2249 u16 csum_type; 2250 int ret = 0; 2251 2252 /* This should be called with fs still frozen. */ 2253 ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags)); 2254 2255 /* Missing dev, no need to check. */ 2256 if (!dev->bdev) 2257 return 0; 2258 2259 /* Only need to check the primary super block. */ 2260 sb = btrfs_read_dev_one_super(dev->bdev, 0, true); 2261 if (IS_ERR(sb)) 2262 return PTR_ERR(sb); 2263 2264 /* Verify the checksum. */ 2265 csum_type = btrfs_super_csum_type(sb); 2266 if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { 2267 btrfs_err(fs_info, "csum type changed, has %u expect %u", 2268 csum_type, btrfs_super_csum_type(fs_info->super_copy)); 2269 ret = -EUCLEAN; 2270 goto out; 2271 } 2272 2273 if (btrfs_check_super_csum(fs_info, sb)) { 2274 btrfs_err(fs_info, "csum for on-disk super block no longer matches"); 2275 ret = -EUCLEAN; 2276 goto out; 2277 } 2278 2279 /* Btrfs_validate_super() includes fsid check against super->fsid. */ 2280 ret = btrfs_validate_super(fs_info, sb, 0); 2281 if (ret < 0) 2282 goto out; 2283 2284 if (btrfs_super_generation(sb) != fs_info->last_trans_committed) { 2285 btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", 2286 btrfs_super_generation(sb), 2287 fs_info->last_trans_committed); 2288 ret = -EUCLEAN; 2289 goto out; 2290 } 2291 out: 2292 btrfs_release_disk_super(sb); 2293 return ret; 2294 } 2295 2296 static int btrfs_unfreeze(struct super_block *sb) 2297 { 2298 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2299 struct btrfs_device *device; 2300 int ret = 0; 2301 2302 /* 2303 * Make sure the fs is not changed by accident (like hibernation then 2304 * modified by other OS). 2305 * If we found anything wrong, we mark the fs error immediately. 2306 * 2307 * And since the fs is frozen, no one can modify the fs yet, thus 2308 * we don't need to hold device_list_mutex. 2309 */ 2310 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 2311 ret = check_dev_super(device); 2312 if (ret < 0) { 2313 btrfs_handle_fs_error(fs_info, ret, 2314 "super block on devid %llu got modified unexpectedly", 2315 device->devid); 2316 break; 2317 } 2318 } 2319 clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); 2320 2321 /* 2322 * We still return 0, to allow VFS layer to unfreeze the fs even the 2323 * above checks failed. Since the fs is either fine or read-only, we're 2324 * safe to continue, without causing further damage. 2325 */ 2326 return 0; 2327 } 2328 2329 static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 2330 { 2331 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 2332 2333 /* 2334 * There should be always a valid pointer in latest_dev, it may be stale 2335 * for a short moment in case it's being deleted but still valid until 2336 * the end of RCU grace period. 2337 */ 2338 rcu_read_lock(); 2339 seq_escape(m, btrfs_dev_name(fs_info->fs_devices->latest_dev), " \t\n\\"); 2340 rcu_read_unlock(); 2341 2342 return 0; 2343 } 2344 2345 static const struct super_operations btrfs_super_ops = { 2346 .drop_inode = btrfs_drop_inode, 2347 .evict_inode = btrfs_evict_inode, 2348 .put_super = btrfs_put_super, 2349 .sync_fs = btrfs_sync_fs, 2350 .show_options = btrfs_show_options, 2351 .show_devname = btrfs_show_devname, 2352 .alloc_inode = btrfs_alloc_inode, 2353 .destroy_inode = btrfs_destroy_inode, 2354 .free_inode = btrfs_free_inode, 2355 .statfs = btrfs_statfs, 2356 .remount_fs = btrfs_remount, 2357 .freeze_fs = btrfs_freeze, 2358 .unfreeze_fs = btrfs_unfreeze, 2359 }; 2360 2361 static const struct file_operations btrfs_ctl_fops = { 2362 .open = btrfs_control_open, 2363 .unlocked_ioctl = btrfs_control_ioctl, 2364 .compat_ioctl = compat_ptr_ioctl, 2365 .owner = THIS_MODULE, 2366 .llseek = noop_llseek, 2367 }; 2368 2369 static struct miscdevice btrfs_misc = { 2370 .minor = BTRFS_MINOR, 2371 .name = "btrfs-control", 2372 .fops = &btrfs_ctl_fops 2373 }; 2374 2375 MODULE_ALIAS_MISCDEV(BTRFS_MINOR); 2376 MODULE_ALIAS("devname:btrfs-control"); 2377 2378 static int __init btrfs_interface_init(void) 2379 { 2380 return misc_register(&btrfs_misc); 2381 } 2382 2383 static __cold void btrfs_interface_exit(void) 2384 { 2385 misc_deregister(&btrfs_misc); 2386 } 2387 2388 static int __init btrfs_print_mod_info(void) 2389 { 2390 static const char options[] = "" 2391 #ifdef CONFIG_BTRFS_DEBUG 2392 ", debug=on" 2393 #endif 2394 #ifdef CONFIG_BTRFS_ASSERT 2395 ", assert=on" 2396 #endif 2397 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2398 ", integrity-checker=on" 2399 #endif 2400 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 2401 ", ref-verify=on" 2402 #endif 2403 #ifdef CONFIG_BLK_DEV_ZONED 2404 ", zoned=yes" 2405 #else 2406 ", zoned=no" 2407 #endif 2408 #ifdef CONFIG_FS_VERITY 2409 ", fsverity=yes" 2410 #else 2411 ", fsverity=no" 2412 #endif 2413 ; 2414 pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options); 2415 return 0; 2416 } 2417 2418 static int register_btrfs(void) 2419 { 2420 return register_filesystem(&btrfs_fs_type); 2421 } 2422 2423 static void unregister_btrfs(void) 2424 { 2425 unregister_filesystem(&btrfs_fs_type); 2426 } 2427 2428 /* Helper structure for long init/exit functions. */ 2429 struct init_sequence { 2430 int (*init_func)(void); 2431 /* Can be NULL if the init_func doesn't need cleanup. */ 2432 void (*exit_func)(void); 2433 }; 2434 2435 static const struct init_sequence mod_init_seq[] = { 2436 { 2437 .init_func = btrfs_props_init, 2438 .exit_func = NULL, 2439 }, { 2440 .init_func = btrfs_init_sysfs, 2441 .exit_func = btrfs_exit_sysfs, 2442 }, { 2443 .init_func = btrfs_init_compress, 2444 .exit_func = btrfs_exit_compress, 2445 }, { 2446 .init_func = btrfs_init_cachep, 2447 .exit_func = btrfs_destroy_cachep, 2448 }, { 2449 .init_func = btrfs_transaction_init, 2450 .exit_func = btrfs_transaction_exit, 2451 }, { 2452 .init_func = btrfs_ctree_init, 2453 .exit_func = btrfs_ctree_exit, 2454 }, { 2455 .init_func = btrfs_free_space_init, 2456 .exit_func = btrfs_free_space_exit, 2457 }, { 2458 .init_func = extent_state_init_cachep, 2459 .exit_func = extent_state_free_cachep, 2460 }, { 2461 .init_func = extent_buffer_init_cachep, 2462 .exit_func = extent_buffer_free_cachep, 2463 }, { 2464 .init_func = btrfs_bioset_init, 2465 .exit_func = btrfs_bioset_exit, 2466 }, { 2467 .init_func = extent_map_init, 2468 .exit_func = extent_map_exit, 2469 }, { 2470 .init_func = ordered_data_init, 2471 .exit_func = ordered_data_exit, 2472 }, { 2473 .init_func = btrfs_delayed_inode_init, 2474 .exit_func = btrfs_delayed_inode_exit, 2475 }, { 2476 .init_func = btrfs_auto_defrag_init, 2477 .exit_func = btrfs_auto_defrag_exit, 2478 }, { 2479 .init_func = btrfs_delayed_ref_init, 2480 .exit_func = btrfs_delayed_ref_exit, 2481 }, { 2482 .init_func = btrfs_prelim_ref_init, 2483 .exit_func = btrfs_prelim_ref_exit, 2484 }, { 2485 .init_func = btrfs_interface_init, 2486 .exit_func = btrfs_interface_exit, 2487 }, { 2488 .init_func = btrfs_print_mod_info, 2489 .exit_func = NULL, 2490 }, { 2491 .init_func = btrfs_run_sanity_tests, 2492 .exit_func = NULL, 2493 }, { 2494 .init_func = register_btrfs, 2495 .exit_func = unregister_btrfs, 2496 } 2497 }; 2498 2499 static bool mod_init_result[ARRAY_SIZE(mod_init_seq)]; 2500 2501 static __always_inline void btrfs_exit_btrfs_fs(void) 2502 { 2503 int i; 2504 2505 for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) { 2506 if (!mod_init_result[i]) 2507 continue; 2508 if (mod_init_seq[i].exit_func) 2509 mod_init_seq[i].exit_func(); 2510 mod_init_result[i] = false; 2511 } 2512 } 2513 2514 static void __exit exit_btrfs_fs(void) 2515 { 2516 btrfs_exit_btrfs_fs(); 2517 btrfs_cleanup_fs_uuids(); 2518 } 2519 2520 static int __init init_btrfs_fs(void) 2521 { 2522 int ret; 2523 int i; 2524 2525 for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) { 2526 ASSERT(!mod_init_result[i]); 2527 ret = mod_init_seq[i].init_func(); 2528 if (ret < 0) { 2529 btrfs_exit_btrfs_fs(); 2530 return ret; 2531 } 2532 mod_init_result[i] = true; 2533 } 2534 return 0; 2535 } 2536 2537 late_initcall(init_btrfs_fs); 2538 module_exit(exit_btrfs_fs) 2539 2540 MODULE_LICENSE("GPL"); 2541 MODULE_SOFTDEP("pre: crc32c"); 2542 MODULE_SOFTDEP("pre: xxhash64"); 2543 MODULE_SOFTDEP("pre: sha256"); 2544 MODULE_SOFTDEP("pre: blake2b-256"); 2545