1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/blkdev.h> 7 #include <linux/module.h> 8 #include <linux/fs.h> 9 #include <linux/pagemap.h> 10 #include <linux/highmem.h> 11 #include <linux/time.h> 12 #include <linux/init.h> 13 #include <linux/seq_file.h> 14 #include <linux/string.h> 15 #include <linux/backing-dev.h> 16 #include <linux/mount.h> 17 #include <linux/writeback.h> 18 #include <linux/statfs.h> 19 #include <linux/compat.h> 20 #include <linux/parser.h> 21 #include <linux/ctype.h> 22 #include <linux/namei.h> 23 #include <linux/miscdevice.h> 24 #include <linux/magic.h> 25 #include <linux/slab.h> 26 #include <linux/ratelimit.h> 27 #include <linux/crc32c.h> 28 #include <linux/btrfs.h> 29 #include "messages.h" 30 #include "delayed-inode.h" 31 #include "ctree.h" 32 #include "disk-io.h" 33 #include "transaction.h" 34 #include "btrfs_inode.h" 35 #include "print-tree.h" 36 #include "props.h" 37 #include "xattr.h" 38 #include "volumes.h" 39 #include "export.h" 40 #include "compression.h" 41 #include "rcu-string.h" 42 #include "dev-replace.h" 43 #include "free-space-cache.h" 44 #include "backref.h" 45 #include "space-info.h" 46 #include "sysfs.h" 47 #include "zoned.h" 48 #include "tests/btrfs-tests.h" 49 #include "block-group.h" 50 #include "discard.h" 51 #include "qgroup.h" 52 #include "raid56.h" 53 #include "fs.h" 54 #include "accessors.h" 55 #include "defrag.h" 56 #include "dir-item.h" 57 #include "ioctl.h" 58 #define CREATE_TRACE_POINTS 59 #include <trace/events/btrfs.h> 60 61 static const struct super_operations btrfs_super_ops; 62 63 /* 64 * Types for mounting the default subvolume and a subvolume explicitly 65 * requested by subvol=/path. That way the callchain is straightforward and we 66 * don't have to play tricks with the mount options and recursive calls to 67 * btrfs_mount. 68 * 69 * The new btrfs_root_fs_type also servers as a tag for the bdev_holder. 70 */ 71 static struct file_system_type btrfs_fs_type; 72 static struct file_system_type btrfs_root_fs_type; 73 74 static int btrfs_remount(struct super_block *sb, int *flags, char *data); 75 76 static void btrfs_put_super(struct super_block *sb) 77 { 78 close_ctree(btrfs_sb(sb)); 79 } 80 81 enum { 82 Opt_acl, Opt_noacl, 83 Opt_clear_cache, 84 Opt_commit_interval, 85 Opt_compress, 86 Opt_compress_force, 87 Opt_compress_force_type, 88 Opt_compress_type, 89 Opt_degraded, 90 Opt_device, 91 Opt_fatal_errors, 92 Opt_flushoncommit, Opt_noflushoncommit, 93 Opt_max_inline, 94 Opt_barrier, Opt_nobarrier, 95 Opt_datacow, Opt_nodatacow, 96 Opt_datasum, Opt_nodatasum, 97 Opt_defrag, Opt_nodefrag, 98 Opt_discard, Opt_nodiscard, 99 Opt_discard_mode, 100 Opt_norecovery, 101 Opt_ratio, 102 Opt_rescan_uuid_tree, 103 Opt_skip_balance, 104 Opt_space_cache, Opt_no_space_cache, 105 Opt_space_cache_version, 106 Opt_ssd, Opt_nossd, 107 Opt_ssd_spread, Opt_nossd_spread, 108 Opt_subvol, 109 Opt_subvol_empty, 110 Opt_subvolid, 111 Opt_thread_pool, 112 Opt_treelog, Opt_notreelog, 113 Opt_user_subvol_rm_allowed, 114 115 /* Rescue options */ 116 Opt_rescue, 117 Opt_usebackuproot, 118 Opt_nologreplay, 119 Opt_ignorebadroots, 120 Opt_ignoredatacsums, 121 Opt_rescue_all, 122 123 /* Deprecated options */ 124 Opt_recovery, 125 Opt_inode_cache, Opt_noinode_cache, 126 127 /* Debugging options */ 128 Opt_check_integrity, 129 Opt_check_integrity_including_extent_data, 130 Opt_check_integrity_print_mask, 131 Opt_enospc_debug, Opt_noenospc_debug, 132 #ifdef CONFIG_BTRFS_DEBUG 133 Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all, 134 #endif 135 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 136 Opt_ref_verify, 137 #endif 138 Opt_err, 139 }; 140 141 static const match_table_t tokens = { 142 {Opt_acl, "acl"}, 143 {Opt_noacl, "noacl"}, 144 {Opt_clear_cache, "clear_cache"}, 145 {Opt_commit_interval, "commit=%u"}, 146 {Opt_compress, "compress"}, 147 {Opt_compress_type, "compress=%s"}, 148 {Opt_compress_force, "compress-force"}, 149 {Opt_compress_force_type, "compress-force=%s"}, 150 {Opt_degraded, "degraded"}, 151 {Opt_device, "device=%s"}, 152 {Opt_fatal_errors, "fatal_errors=%s"}, 153 {Opt_flushoncommit, "flushoncommit"}, 154 {Opt_noflushoncommit, "noflushoncommit"}, 155 {Opt_inode_cache, "inode_cache"}, 156 {Opt_noinode_cache, "noinode_cache"}, 157 {Opt_max_inline, "max_inline=%s"}, 158 {Opt_barrier, "barrier"}, 159 {Opt_nobarrier, "nobarrier"}, 160 {Opt_datacow, "datacow"}, 161 {Opt_nodatacow, "nodatacow"}, 162 {Opt_datasum, "datasum"}, 163 {Opt_nodatasum, "nodatasum"}, 164 {Opt_defrag, "autodefrag"}, 165 {Opt_nodefrag, "noautodefrag"}, 166 {Opt_discard, "discard"}, 167 {Opt_discard_mode, "discard=%s"}, 168 {Opt_nodiscard, "nodiscard"}, 169 {Opt_norecovery, "norecovery"}, 170 {Opt_ratio, "metadata_ratio=%u"}, 171 {Opt_rescan_uuid_tree, "rescan_uuid_tree"}, 172 {Opt_skip_balance, "skip_balance"}, 173 {Opt_space_cache, "space_cache"}, 174 {Opt_no_space_cache, "nospace_cache"}, 175 {Opt_space_cache_version, "space_cache=%s"}, 176 {Opt_ssd, "ssd"}, 177 {Opt_nossd, "nossd"}, 178 {Opt_ssd_spread, "ssd_spread"}, 179 {Opt_nossd_spread, "nossd_spread"}, 180 {Opt_subvol, "subvol=%s"}, 181 {Opt_subvol_empty, "subvol="}, 182 {Opt_subvolid, "subvolid=%s"}, 183 {Opt_thread_pool, "thread_pool=%u"}, 184 {Opt_treelog, "treelog"}, 185 {Opt_notreelog, "notreelog"}, 186 {Opt_user_subvol_rm_allowed, "user_subvol_rm_allowed"}, 187 188 /* Rescue options */ 189 {Opt_rescue, "rescue=%s"}, 190 /* Deprecated, with alias rescue=nologreplay */ 191 {Opt_nologreplay, "nologreplay"}, 192 /* Deprecated, with alias rescue=usebackuproot */ 193 {Opt_usebackuproot, "usebackuproot"}, 194 195 /* Deprecated options */ 196 {Opt_recovery, "recovery"}, 197 198 /* Debugging options */ 199 {Opt_check_integrity, "check_int"}, 200 {Opt_check_integrity_including_extent_data, "check_int_data"}, 201 {Opt_check_integrity_print_mask, "check_int_print_mask=%u"}, 202 {Opt_enospc_debug, "enospc_debug"}, 203 {Opt_noenospc_debug, "noenospc_debug"}, 204 #ifdef CONFIG_BTRFS_DEBUG 205 {Opt_fragment_data, "fragment=data"}, 206 {Opt_fragment_metadata, "fragment=metadata"}, 207 {Opt_fragment_all, "fragment=all"}, 208 #endif 209 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 210 {Opt_ref_verify, "ref_verify"}, 211 #endif 212 {Opt_err, NULL}, 213 }; 214 215 static const match_table_t rescue_tokens = { 216 {Opt_usebackuproot, "usebackuproot"}, 217 {Opt_nologreplay, "nologreplay"}, 218 {Opt_ignorebadroots, "ignorebadroots"}, 219 {Opt_ignorebadroots, "ibadroots"}, 220 {Opt_ignoredatacsums, "ignoredatacsums"}, 221 {Opt_ignoredatacsums, "idatacsums"}, 222 {Opt_rescue_all, "all"}, 223 {Opt_err, NULL}, 224 }; 225 226 static bool check_ro_option(struct btrfs_fs_info *fs_info, unsigned long opt, 227 const char *opt_name) 228 { 229 if (fs_info->mount_opt & opt) { 230 btrfs_err(fs_info, "%s must be used with ro mount option", 231 opt_name); 232 return true; 233 } 234 return false; 235 } 236 237 static int parse_rescue_options(struct btrfs_fs_info *info, const char *options) 238 { 239 char *opts; 240 char *orig; 241 char *p; 242 substring_t args[MAX_OPT_ARGS]; 243 int ret = 0; 244 245 opts = kstrdup(options, GFP_KERNEL); 246 if (!opts) 247 return -ENOMEM; 248 orig = opts; 249 250 while ((p = strsep(&opts, ":")) != NULL) { 251 int token; 252 253 if (!*p) 254 continue; 255 token = match_token(p, rescue_tokens, args); 256 switch (token){ 257 case Opt_usebackuproot: 258 btrfs_info(info, 259 "trying to use backup root at mount time"); 260 btrfs_set_opt(info->mount_opt, USEBACKUPROOT); 261 break; 262 case Opt_nologreplay: 263 btrfs_set_and_info(info, NOLOGREPLAY, 264 "disabling log replay at mount time"); 265 break; 266 case Opt_ignorebadroots: 267 btrfs_set_and_info(info, IGNOREBADROOTS, 268 "ignoring bad roots"); 269 break; 270 case Opt_ignoredatacsums: 271 btrfs_set_and_info(info, IGNOREDATACSUMS, 272 "ignoring data csums"); 273 break; 274 case Opt_rescue_all: 275 btrfs_info(info, "enabling all of the rescue options"); 276 btrfs_set_and_info(info, IGNOREDATACSUMS, 277 "ignoring data csums"); 278 btrfs_set_and_info(info, IGNOREBADROOTS, 279 "ignoring bad roots"); 280 btrfs_set_and_info(info, NOLOGREPLAY, 281 "disabling log replay at mount time"); 282 break; 283 case Opt_err: 284 btrfs_info(info, "unrecognized rescue option '%s'", p); 285 ret = -EINVAL; 286 goto out; 287 default: 288 break; 289 } 290 291 } 292 out: 293 kfree(orig); 294 return ret; 295 } 296 297 /* 298 * Regular mount options parser. Everything that is needed only when 299 * reading in a new superblock is parsed here. 300 * XXX JDM: This needs to be cleaned up for remount. 301 */ 302 int btrfs_parse_options(struct btrfs_fs_info *info, char *options, 303 unsigned long new_flags) 304 { 305 substring_t args[MAX_OPT_ARGS]; 306 char *p, *num; 307 int intarg; 308 int ret = 0; 309 char *compress_type; 310 bool compress_force = false; 311 enum btrfs_compression_type saved_compress_type; 312 int saved_compress_level; 313 bool saved_compress_force; 314 int no_compress = 0; 315 const bool remounting = test_bit(BTRFS_FS_STATE_REMOUNTING, &info->fs_state); 316 317 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) 318 btrfs_set_opt(info->mount_opt, FREE_SPACE_TREE); 319 else if (btrfs_free_space_cache_v1_active(info)) { 320 if (btrfs_is_zoned(info)) { 321 btrfs_info(info, 322 "zoned: clearing existing space cache"); 323 btrfs_set_super_cache_generation(info->super_copy, 0); 324 } else { 325 btrfs_set_opt(info->mount_opt, SPACE_CACHE); 326 } 327 } 328 329 /* 330 * Even the options are empty, we still need to do extra check 331 * against new flags 332 */ 333 if (!options) 334 goto check; 335 336 while ((p = strsep(&options, ",")) != NULL) { 337 int token; 338 if (!*p) 339 continue; 340 341 token = match_token(p, tokens, args); 342 switch (token) { 343 case Opt_degraded: 344 btrfs_info(info, "allowing degraded mounts"); 345 btrfs_set_opt(info->mount_opt, DEGRADED); 346 break; 347 case Opt_subvol: 348 case Opt_subvol_empty: 349 case Opt_subvolid: 350 case Opt_device: 351 /* 352 * These are parsed by btrfs_parse_subvol_options or 353 * btrfs_parse_device_options and can be ignored here. 354 */ 355 break; 356 case Opt_nodatasum: 357 btrfs_set_and_info(info, NODATASUM, 358 "setting nodatasum"); 359 break; 360 case Opt_datasum: 361 if (btrfs_test_opt(info, NODATASUM)) { 362 if (btrfs_test_opt(info, NODATACOW)) 363 btrfs_info(info, 364 "setting datasum, datacow enabled"); 365 else 366 btrfs_info(info, "setting datasum"); 367 } 368 btrfs_clear_opt(info->mount_opt, NODATACOW); 369 btrfs_clear_opt(info->mount_opt, NODATASUM); 370 break; 371 case Opt_nodatacow: 372 if (!btrfs_test_opt(info, NODATACOW)) { 373 if (!btrfs_test_opt(info, COMPRESS) || 374 !btrfs_test_opt(info, FORCE_COMPRESS)) { 375 btrfs_info(info, 376 "setting nodatacow, compression disabled"); 377 } else { 378 btrfs_info(info, "setting nodatacow"); 379 } 380 } 381 btrfs_clear_opt(info->mount_opt, COMPRESS); 382 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 383 btrfs_set_opt(info->mount_opt, NODATACOW); 384 btrfs_set_opt(info->mount_opt, NODATASUM); 385 break; 386 case Opt_datacow: 387 btrfs_clear_and_info(info, NODATACOW, 388 "setting datacow"); 389 break; 390 case Opt_compress_force: 391 case Opt_compress_force_type: 392 compress_force = true; 393 fallthrough; 394 case Opt_compress: 395 case Opt_compress_type: 396 saved_compress_type = btrfs_test_opt(info, 397 COMPRESS) ? 398 info->compress_type : BTRFS_COMPRESS_NONE; 399 saved_compress_force = 400 btrfs_test_opt(info, FORCE_COMPRESS); 401 saved_compress_level = info->compress_level; 402 if (token == Opt_compress || 403 token == Opt_compress_force || 404 strncmp(args[0].from, "zlib", 4) == 0) { 405 compress_type = "zlib"; 406 407 info->compress_type = BTRFS_COMPRESS_ZLIB; 408 info->compress_level = BTRFS_ZLIB_DEFAULT_LEVEL; 409 /* 410 * args[0] contains uninitialized data since 411 * for these tokens we don't expect any 412 * parameter. 413 */ 414 if (token != Opt_compress && 415 token != Opt_compress_force) 416 info->compress_level = 417 btrfs_compress_str2level( 418 BTRFS_COMPRESS_ZLIB, 419 args[0].from + 4); 420 btrfs_set_opt(info->mount_opt, COMPRESS); 421 btrfs_clear_opt(info->mount_opt, NODATACOW); 422 btrfs_clear_opt(info->mount_opt, NODATASUM); 423 no_compress = 0; 424 } else if (strncmp(args[0].from, "lzo", 3) == 0) { 425 compress_type = "lzo"; 426 info->compress_type = BTRFS_COMPRESS_LZO; 427 info->compress_level = 0; 428 btrfs_set_opt(info->mount_opt, COMPRESS); 429 btrfs_clear_opt(info->mount_opt, NODATACOW); 430 btrfs_clear_opt(info->mount_opt, NODATASUM); 431 btrfs_set_fs_incompat(info, COMPRESS_LZO); 432 no_compress = 0; 433 } else if (strncmp(args[0].from, "zstd", 4) == 0) { 434 compress_type = "zstd"; 435 info->compress_type = BTRFS_COMPRESS_ZSTD; 436 info->compress_level = 437 btrfs_compress_str2level( 438 BTRFS_COMPRESS_ZSTD, 439 args[0].from + 4); 440 btrfs_set_opt(info->mount_opt, COMPRESS); 441 btrfs_clear_opt(info->mount_opt, NODATACOW); 442 btrfs_clear_opt(info->mount_opt, NODATASUM); 443 btrfs_set_fs_incompat(info, COMPRESS_ZSTD); 444 no_compress = 0; 445 } else if (strncmp(args[0].from, "no", 2) == 0) { 446 compress_type = "no"; 447 info->compress_level = 0; 448 info->compress_type = 0; 449 btrfs_clear_opt(info->mount_opt, COMPRESS); 450 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 451 compress_force = false; 452 no_compress++; 453 } else { 454 btrfs_err(info, "unrecognized compression value %s", 455 args[0].from); 456 ret = -EINVAL; 457 goto out; 458 } 459 460 if (compress_force) { 461 btrfs_set_opt(info->mount_opt, FORCE_COMPRESS); 462 } else { 463 /* 464 * If we remount from compress-force=xxx to 465 * compress=xxx, we need clear FORCE_COMPRESS 466 * flag, otherwise, there is no way for users 467 * to disable forcible compression separately. 468 */ 469 btrfs_clear_opt(info->mount_opt, FORCE_COMPRESS); 470 } 471 if (no_compress == 1) { 472 btrfs_info(info, "use no compression"); 473 } else if ((info->compress_type != saved_compress_type) || 474 (compress_force != saved_compress_force) || 475 (info->compress_level != saved_compress_level)) { 476 btrfs_info(info, "%s %s compression, level %d", 477 (compress_force) ? "force" : "use", 478 compress_type, info->compress_level); 479 } 480 compress_force = false; 481 break; 482 case Opt_ssd: 483 btrfs_set_and_info(info, SSD, 484 "enabling ssd optimizations"); 485 btrfs_clear_opt(info->mount_opt, NOSSD); 486 break; 487 case Opt_ssd_spread: 488 btrfs_set_and_info(info, SSD, 489 "enabling ssd optimizations"); 490 btrfs_set_and_info(info, SSD_SPREAD, 491 "using spread ssd allocation scheme"); 492 btrfs_clear_opt(info->mount_opt, NOSSD); 493 break; 494 case Opt_nossd: 495 btrfs_set_opt(info->mount_opt, NOSSD); 496 btrfs_clear_and_info(info, SSD, 497 "not using ssd optimizations"); 498 fallthrough; 499 case Opt_nossd_spread: 500 btrfs_clear_and_info(info, SSD_SPREAD, 501 "not using spread ssd allocation scheme"); 502 break; 503 case Opt_barrier: 504 btrfs_clear_and_info(info, NOBARRIER, 505 "turning on barriers"); 506 break; 507 case Opt_nobarrier: 508 btrfs_set_and_info(info, NOBARRIER, 509 "turning off barriers"); 510 break; 511 case Opt_thread_pool: 512 ret = match_int(&args[0], &intarg); 513 if (ret) { 514 btrfs_err(info, "unrecognized thread_pool value %s", 515 args[0].from); 516 goto out; 517 } else if (intarg == 0) { 518 btrfs_err(info, "invalid value 0 for thread_pool"); 519 ret = -EINVAL; 520 goto out; 521 } 522 info->thread_pool_size = intarg; 523 break; 524 case Opt_max_inline: 525 num = match_strdup(&args[0]); 526 if (num) { 527 info->max_inline = memparse(num, NULL); 528 kfree(num); 529 530 if (info->max_inline) { 531 info->max_inline = min_t(u64, 532 info->max_inline, 533 info->sectorsize); 534 } 535 btrfs_info(info, "max_inline at %llu", 536 info->max_inline); 537 } else { 538 ret = -ENOMEM; 539 goto out; 540 } 541 break; 542 case Opt_acl: 543 #ifdef CONFIG_BTRFS_FS_POSIX_ACL 544 info->sb->s_flags |= SB_POSIXACL; 545 break; 546 #else 547 btrfs_err(info, "support for ACL not compiled in!"); 548 ret = -EINVAL; 549 goto out; 550 #endif 551 case Opt_noacl: 552 info->sb->s_flags &= ~SB_POSIXACL; 553 break; 554 case Opt_notreelog: 555 btrfs_set_and_info(info, NOTREELOG, 556 "disabling tree log"); 557 break; 558 case Opt_treelog: 559 btrfs_clear_and_info(info, NOTREELOG, 560 "enabling tree log"); 561 break; 562 case Opt_norecovery: 563 case Opt_nologreplay: 564 btrfs_warn(info, 565 "'nologreplay' is deprecated, use 'rescue=nologreplay' instead"); 566 btrfs_set_and_info(info, NOLOGREPLAY, 567 "disabling log replay at mount time"); 568 break; 569 case Opt_flushoncommit: 570 btrfs_set_and_info(info, FLUSHONCOMMIT, 571 "turning on flush-on-commit"); 572 break; 573 case Opt_noflushoncommit: 574 btrfs_clear_and_info(info, FLUSHONCOMMIT, 575 "turning off flush-on-commit"); 576 break; 577 case Opt_ratio: 578 ret = match_int(&args[0], &intarg); 579 if (ret) { 580 btrfs_err(info, "unrecognized metadata_ratio value %s", 581 args[0].from); 582 goto out; 583 } 584 info->metadata_ratio = intarg; 585 btrfs_info(info, "metadata ratio %u", 586 info->metadata_ratio); 587 break; 588 case Opt_discard: 589 case Opt_discard_mode: 590 if (token == Opt_discard || 591 strcmp(args[0].from, "sync") == 0) { 592 btrfs_clear_opt(info->mount_opt, DISCARD_ASYNC); 593 btrfs_set_and_info(info, DISCARD_SYNC, 594 "turning on sync discard"); 595 } else if (strcmp(args[0].from, "async") == 0) { 596 btrfs_clear_opt(info->mount_opt, DISCARD_SYNC); 597 btrfs_set_and_info(info, DISCARD_ASYNC, 598 "turning on async discard"); 599 } else { 600 btrfs_err(info, "unrecognized discard mode value %s", 601 args[0].from); 602 ret = -EINVAL; 603 goto out; 604 } 605 btrfs_clear_opt(info->mount_opt, NODISCARD); 606 break; 607 case Opt_nodiscard: 608 btrfs_clear_and_info(info, DISCARD_SYNC, 609 "turning off discard"); 610 btrfs_clear_and_info(info, DISCARD_ASYNC, 611 "turning off async discard"); 612 btrfs_set_opt(info->mount_opt, NODISCARD); 613 break; 614 case Opt_space_cache: 615 case Opt_space_cache_version: 616 /* 617 * We already set FREE_SPACE_TREE above because we have 618 * compat_ro(FREE_SPACE_TREE) set, and we aren't going 619 * to allow v1 to be set for extent tree v2, simply 620 * ignore this setting if we're extent tree v2. 621 */ 622 if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) 623 break; 624 if (token == Opt_space_cache || 625 strcmp(args[0].from, "v1") == 0) { 626 btrfs_clear_opt(info->mount_opt, 627 FREE_SPACE_TREE); 628 btrfs_set_and_info(info, SPACE_CACHE, 629 "enabling disk space caching"); 630 } else if (strcmp(args[0].from, "v2") == 0) { 631 btrfs_clear_opt(info->mount_opt, 632 SPACE_CACHE); 633 btrfs_set_and_info(info, FREE_SPACE_TREE, 634 "enabling free space tree"); 635 } else { 636 btrfs_err(info, "unrecognized space_cache value %s", 637 args[0].from); 638 ret = -EINVAL; 639 goto out; 640 } 641 break; 642 case Opt_rescan_uuid_tree: 643 btrfs_set_opt(info->mount_opt, RESCAN_UUID_TREE); 644 break; 645 case Opt_no_space_cache: 646 /* 647 * We cannot operate without the free space tree with 648 * extent tree v2, ignore this option. 649 */ 650 if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) 651 break; 652 if (btrfs_test_opt(info, SPACE_CACHE)) { 653 btrfs_clear_and_info(info, SPACE_CACHE, 654 "disabling disk space caching"); 655 } 656 if (btrfs_test_opt(info, FREE_SPACE_TREE)) { 657 btrfs_clear_and_info(info, FREE_SPACE_TREE, 658 "disabling free space tree"); 659 } 660 break; 661 case Opt_inode_cache: 662 case Opt_noinode_cache: 663 btrfs_warn(info, 664 "the 'inode_cache' option is deprecated and has no effect since 5.11"); 665 break; 666 case Opt_clear_cache: 667 /* 668 * We cannot clear the free space tree with extent tree 669 * v2, ignore this option. 670 */ 671 if (btrfs_fs_incompat(info, EXTENT_TREE_V2)) 672 break; 673 btrfs_set_and_info(info, CLEAR_CACHE, 674 "force clearing of disk cache"); 675 break; 676 case Opt_user_subvol_rm_allowed: 677 btrfs_set_opt(info->mount_opt, USER_SUBVOL_RM_ALLOWED); 678 break; 679 case Opt_enospc_debug: 680 btrfs_set_opt(info->mount_opt, ENOSPC_DEBUG); 681 break; 682 case Opt_noenospc_debug: 683 btrfs_clear_opt(info->mount_opt, ENOSPC_DEBUG); 684 break; 685 case Opt_defrag: 686 btrfs_set_and_info(info, AUTO_DEFRAG, 687 "enabling auto defrag"); 688 break; 689 case Opt_nodefrag: 690 btrfs_clear_and_info(info, AUTO_DEFRAG, 691 "disabling auto defrag"); 692 break; 693 case Opt_recovery: 694 case Opt_usebackuproot: 695 btrfs_warn(info, 696 "'%s' is deprecated, use 'rescue=usebackuproot' instead", 697 token == Opt_recovery ? "recovery" : 698 "usebackuproot"); 699 btrfs_info(info, 700 "trying to use backup root at mount time"); 701 btrfs_set_opt(info->mount_opt, USEBACKUPROOT); 702 break; 703 case Opt_skip_balance: 704 btrfs_set_opt(info->mount_opt, SKIP_BALANCE); 705 break; 706 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 707 case Opt_check_integrity_including_extent_data: 708 btrfs_info(info, 709 "enabling check integrity including extent data"); 710 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY_DATA); 711 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 712 break; 713 case Opt_check_integrity: 714 btrfs_info(info, "enabling check integrity"); 715 btrfs_set_opt(info->mount_opt, CHECK_INTEGRITY); 716 break; 717 case Opt_check_integrity_print_mask: 718 ret = match_int(&args[0], &intarg); 719 if (ret) { 720 btrfs_err(info, 721 "unrecognized check_integrity_print_mask value %s", 722 args[0].from); 723 goto out; 724 } 725 info->check_integrity_print_mask = intarg; 726 btrfs_info(info, "check_integrity_print_mask 0x%x", 727 info->check_integrity_print_mask); 728 break; 729 #else 730 case Opt_check_integrity_including_extent_data: 731 case Opt_check_integrity: 732 case Opt_check_integrity_print_mask: 733 btrfs_err(info, 734 "support for check_integrity* not compiled in!"); 735 ret = -EINVAL; 736 goto out; 737 #endif 738 case Opt_fatal_errors: 739 if (strcmp(args[0].from, "panic") == 0) { 740 btrfs_set_opt(info->mount_opt, 741 PANIC_ON_FATAL_ERROR); 742 } else if (strcmp(args[0].from, "bug") == 0) { 743 btrfs_clear_opt(info->mount_opt, 744 PANIC_ON_FATAL_ERROR); 745 } else { 746 btrfs_err(info, "unrecognized fatal_errors value %s", 747 args[0].from); 748 ret = -EINVAL; 749 goto out; 750 } 751 break; 752 case Opt_commit_interval: 753 intarg = 0; 754 ret = match_int(&args[0], &intarg); 755 if (ret) { 756 btrfs_err(info, "unrecognized commit_interval value %s", 757 args[0].from); 758 ret = -EINVAL; 759 goto out; 760 } 761 if (intarg == 0) { 762 btrfs_info(info, 763 "using default commit interval %us", 764 BTRFS_DEFAULT_COMMIT_INTERVAL); 765 intarg = BTRFS_DEFAULT_COMMIT_INTERVAL; 766 } else if (intarg > 300) { 767 btrfs_warn(info, "excessive commit interval %d", 768 intarg); 769 } 770 info->commit_interval = intarg; 771 break; 772 case Opt_rescue: 773 ret = parse_rescue_options(info, args[0].from); 774 if (ret < 0) { 775 btrfs_err(info, "unrecognized rescue value %s", 776 args[0].from); 777 goto out; 778 } 779 break; 780 #ifdef CONFIG_BTRFS_DEBUG 781 case Opt_fragment_all: 782 btrfs_info(info, "fragmenting all space"); 783 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); 784 btrfs_set_opt(info->mount_opt, FRAGMENT_METADATA); 785 break; 786 case Opt_fragment_metadata: 787 btrfs_info(info, "fragmenting metadata"); 788 btrfs_set_opt(info->mount_opt, 789 FRAGMENT_METADATA); 790 break; 791 case Opt_fragment_data: 792 btrfs_info(info, "fragmenting data"); 793 btrfs_set_opt(info->mount_opt, FRAGMENT_DATA); 794 break; 795 #endif 796 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 797 case Opt_ref_verify: 798 btrfs_info(info, "doing ref verification"); 799 btrfs_set_opt(info->mount_opt, REF_VERIFY); 800 break; 801 #endif 802 case Opt_err: 803 btrfs_err(info, "unrecognized mount option '%s'", p); 804 ret = -EINVAL; 805 goto out; 806 default: 807 break; 808 } 809 } 810 check: 811 /* We're read-only, don't have to check. */ 812 if (new_flags & SB_RDONLY) 813 goto out; 814 815 if (check_ro_option(info, BTRFS_MOUNT_NOLOGREPLAY, "nologreplay") || 816 check_ro_option(info, BTRFS_MOUNT_IGNOREBADROOTS, "ignorebadroots") || 817 check_ro_option(info, BTRFS_MOUNT_IGNOREDATACSUMS, "ignoredatacsums")) 818 ret = -EINVAL; 819 out: 820 if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE) && 821 !btrfs_test_opt(info, FREE_SPACE_TREE) && 822 !btrfs_test_opt(info, CLEAR_CACHE)) { 823 btrfs_err(info, "cannot disable free space tree"); 824 ret = -EINVAL; 825 826 } 827 if (!ret) 828 ret = btrfs_check_mountopts_zoned(info); 829 if (!ret && !remounting) { 830 if (btrfs_test_opt(info, SPACE_CACHE)) 831 btrfs_info(info, "disk space caching is enabled"); 832 if (btrfs_test_opt(info, FREE_SPACE_TREE)) 833 btrfs_info(info, "using free space tree"); 834 } 835 return ret; 836 } 837 838 /* 839 * Parse mount options that are required early in the mount process. 840 * 841 * All other options will be parsed on much later in the mount process and 842 * only when we need to allocate a new super block. 843 */ 844 static int btrfs_parse_device_options(const char *options, fmode_t flags, 845 void *holder) 846 { 847 substring_t args[MAX_OPT_ARGS]; 848 char *device_name, *opts, *orig, *p; 849 struct btrfs_device *device = NULL; 850 int error = 0; 851 852 lockdep_assert_held(&uuid_mutex); 853 854 if (!options) 855 return 0; 856 857 /* 858 * strsep changes the string, duplicate it because btrfs_parse_options 859 * gets called later 860 */ 861 opts = kstrdup(options, GFP_KERNEL); 862 if (!opts) 863 return -ENOMEM; 864 orig = opts; 865 866 while ((p = strsep(&opts, ",")) != NULL) { 867 int token; 868 869 if (!*p) 870 continue; 871 872 token = match_token(p, tokens, args); 873 if (token == Opt_device) { 874 device_name = match_strdup(&args[0]); 875 if (!device_name) { 876 error = -ENOMEM; 877 goto out; 878 } 879 device = btrfs_scan_one_device(device_name, flags, 880 holder); 881 kfree(device_name); 882 if (IS_ERR(device)) { 883 error = PTR_ERR(device); 884 goto out; 885 } 886 } 887 } 888 889 out: 890 kfree(orig); 891 return error; 892 } 893 894 /* 895 * Parse mount options that are related to subvolume id 896 * 897 * The value is later passed to mount_subvol() 898 */ 899 static int btrfs_parse_subvol_options(const char *options, char **subvol_name, 900 u64 *subvol_objectid) 901 { 902 substring_t args[MAX_OPT_ARGS]; 903 char *opts, *orig, *p; 904 int error = 0; 905 u64 subvolid; 906 907 if (!options) 908 return 0; 909 910 /* 911 * strsep changes the string, duplicate it because 912 * btrfs_parse_device_options gets called later 913 */ 914 opts = kstrdup(options, GFP_KERNEL); 915 if (!opts) 916 return -ENOMEM; 917 orig = opts; 918 919 while ((p = strsep(&opts, ",")) != NULL) { 920 int token; 921 if (!*p) 922 continue; 923 924 token = match_token(p, tokens, args); 925 switch (token) { 926 case Opt_subvol: 927 kfree(*subvol_name); 928 *subvol_name = match_strdup(&args[0]); 929 if (!*subvol_name) { 930 error = -ENOMEM; 931 goto out; 932 } 933 break; 934 case Opt_subvolid: 935 error = match_u64(&args[0], &subvolid); 936 if (error) 937 goto out; 938 939 /* we want the original fs_tree */ 940 if (subvolid == 0) 941 subvolid = BTRFS_FS_TREE_OBJECTID; 942 943 *subvol_objectid = subvolid; 944 break; 945 default: 946 break; 947 } 948 } 949 950 out: 951 kfree(orig); 952 return error; 953 } 954 955 char *btrfs_get_subvol_name_from_objectid(struct btrfs_fs_info *fs_info, 956 u64 subvol_objectid) 957 { 958 struct btrfs_root *root = fs_info->tree_root; 959 struct btrfs_root *fs_root = NULL; 960 struct btrfs_root_ref *root_ref; 961 struct btrfs_inode_ref *inode_ref; 962 struct btrfs_key key; 963 struct btrfs_path *path = NULL; 964 char *name = NULL, *ptr; 965 u64 dirid; 966 int len; 967 int ret; 968 969 path = btrfs_alloc_path(); 970 if (!path) { 971 ret = -ENOMEM; 972 goto err; 973 } 974 975 name = kmalloc(PATH_MAX, GFP_KERNEL); 976 if (!name) { 977 ret = -ENOMEM; 978 goto err; 979 } 980 ptr = name + PATH_MAX - 1; 981 ptr[0] = '\0'; 982 983 /* 984 * Walk up the subvolume trees in the tree of tree roots by root 985 * backrefs until we hit the top-level subvolume. 986 */ 987 while (subvol_objectid != BTRFS_FS_TREE_OBJECTID) { 988 key.objectid = subvol_objectid; 989 key.type = BTRFS_ROOT_BACKREF_KEY; 990 key.offset = (u64)-1; 991 992 ret = btrfs_search_backwards(root, &key, path); 993 if (ret < 0) { 994 goto err; 995 } else if (ret > 0) { 996 ret = -ENOENT; 997 goto err; 998 } 999 1000 subvol_objectid = key.offset; 1001 1002 root_ref = btrfs_item_ptr(path->nodes[0], path->slots[0], 1003 struct btrfs_root_ref); 1004 len = btrfs_root_ref_name_len(path->nodes[0], root_ref); 1005 ptr -= len + 1; 1006 if (ptr < name) { 1007 ret = -ENAMETOOLONG; 1008 goto err; 1009 } 1010 read_extent_buffer(path->nodes[0], ptr + 1, 1011 (unsigned long)(root_ref + 1), len); 1012 ptr[0] = '/'; 1013 dirid = btrfs_root_ref_dirid(path->nodes[0], root_ref); 1014 btrfs_release_path(path); 1015 1016 fs_root = btrfs_get_fs_root(fs_info, subvol_objectid, true); 1017 if (IS_ERR(fs_root)) { 1018 ret = PTR_ERR(fs_root); 1019 fs_root = NULL; 1020 goto err; 1021 } 1022 1023 /* 1024 * Walk up the filesystem tree by inode refs until we hit the 1025 * root directory. 1026 */ 1027 while (dirid != BTRFS_FIRST_FREE_OBJECTID) { 1028 key.objectid = dirid; 1029 key.type = BTRFS_INODE_REF_KEY; 1030 key.offset = (u64)-1; 1031 1032 ret = btrfs_search_backwards(fs_root, &key, path); 1033 if (ret < 0) { 1034 goto err; 1035 } else if (ret > 0) { 1036 ret = -ENOENT; 1037 goto err; 1038 } 1039 1040 dirid = key.offset; 1041 1042 inode_ref = btrfs_item_ptr(path->nodes[0], 1043 path->slots[0], 1044 struct btrfs_inode_ref); 1045 len = btrfs_inode_ref_name_len(path->nodes[0], 1046 inode_ref); 1047 ptr -= len + 1; 1048 if (ptr < name) { 1049 ret = -ENAMETOOLONG; 1050 goto err; 1051 } 1052 read_extent_buffer(path->nodes[0], ptr + 1, 1053 (unsigned long)(inode_ref + 1), len); 1054 ptr[0] = '/'; 1055 btrfs_release_path(path); 1056 } 1057 btrfs_put_root(fs_root); 1058 fs_root = NULL; 1059 } 1060 1061 btrfs_free_path(path); 1062 if (ptr == name + PATH_MAX - 1) { 1063 name[0] = '/'; 1064 name[1] = '\0'; 1065 } else { 1066 memmove(name, ptr, name + PATH_MAX - ptr); 1067 } 1068 return name; 1069 1070 err: 1071 btrfs_put_root(fs_root); 1072 btrfs_free_path(path); 1073 kfree(name); 1074 return ERR_PTR(ret); 1075 } 1076 1077 static int get_default_subvol_objectid(struct btrfs_fs_info *fs_info, u64 *objectid) 1078 { 1079 struct btrfs_root *root = fs_info->tree_root; 1080 struct btrfs_dir_item *di; 1081 struct btrfs_path *path; 1082 struct btrfs_key location; 1083 struct fscrypt_str name = FSTR_INIT("default", 7); 1084 u64 dir_id; 1085 1086 path = btrfs_alloc_path(); 1087 if (!path) 1088 return -ENOMEM; 1089 1090 /* 1091 * Find the "default" dir item which points to the root item that we 1092 * will mount by default if we haven't been given a specific subvolume 1093 * to mount. 1094 */ 1095 dir_id = btrfs_super_root_dir(fs_info->super_copy); 1096 di = btrfs_lookup_dir_item(NULL, root, path, dir_id, &name, 0); 1097 if (IS_ERR(di)) { 1098 btrfs_free_path(path); 1099 return PTR_ERR(di); 1100 } 1101 if (!di) { 1102 /* 1103 * Ok the default dir item isn't there. This is weird since 1104 * it's always been there, but don't freak out, just try and 1105 * mount the top-level subvolume. 1106 */ 1107 btrfs_free_path(path); 1108 *objectid = BTRFS_FS_TREE_OBJECTID; 1109 return 0; 1110 } 1111 1112 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &location); 1113 btrfs_free_path(path); 1114 *objectid = location.objectid; 1115 return 0; 1116 } 1117 1118 static int btrfs_fill_super(struct super_block *sb, 1119 struct btrfs_fs_devices *fs_devices, 1120 void *data) 1121 { 1122 struct inode *inode; 1123 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1124 int err; 1125 1126 sb->s_maxbytes = MAX_LFS_FILESIZE; 1127 sb->s_magic = BTRFS_SUPER_MAGIC; 1128 sb->s_op = &btrfs_super_ops; 1129 sb->s_d_op = &btrfs_dentry_operations; 1130 sb->s_export_op = &btrfs_export_ops; 1131 #ifdef CONFIG_FS_VERITY 1132 sb->s_vop = &btrfs_verityops; 1133 #endif 1134 sb->s_xattr = btrfs_xattr_handlers; 1135 sb->s_time_gran = 1; 1136 #ifdef CONFIG_BTRFS_FS_POSIX_ACL 1137 sb->s_flags |= SB_POSIXACL; 1138 #endif 1139 sb->s_flags |= SB_I_VERSION; 1140 sb->s_iflags |= SB_I_CGROUPWB; 1141 1142 err = super_setup_bdi(sb); 1143 if (err) { 1144 btrfs_err(fs_info, "super_setup_bdi failed"); 1145 return err; 1146 } 1147 1148 err = open_ctree(sb, fs_devices, (char *)data); 1149 if (err) { 1150 btrfs_err(fs_info, "open_ctree failed"); 1151 return err; 1152 } 1153 1154 inode = btrfs_iget(sb, BTRFS_FIRST_FREE_OBJECTID, fs_info->fs_root); 1155 if (IS_ERR(inode)) { 1156 err = PTR_ERR(inode); 1157 goto fail_close; 1158 } 1159 1160 sb->s_root = d_make_root(inode); 1161 if (!sb->s_root) { 1162 err = -ENOMEM; 1163 goto fail_close; 1164 } 1165 1166 sb->s_flags |= SB_ACTIVE; 1167 return 0; 1168 1169 fail_close: 1170 close_ctree(fs_info); 1171 return err; 1172 } 1173 1174 int btrfs_sync_fs(struct super_block *sb, int wait) 1175 { 1176 struct btrfs_trans_handle *trans; 1177 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1178 struct btrfs_root *root = fs_info->tree_root; 1179 1180 trace_btrfs_sync_fs(fs_info, wait); 1181 1182 if (!wait) { 1183 filemap_flush(fs_info->btree_inode->i_mapping); 1184 return 0; 1185 } 1186 1187 btrfs_wait_ordered_roots(fs_info, U64_MAX, 0, (u64)-1); 1188 1189 trans = btrfs_attach_transaction_barrier(root); 1190 if (IS_ERR(trans)) { 1191 /* no transaction, don't bother */ 1192 if (PTR_ERR(trans) == -ENOENT) { 1193 /* 1194 * Exit unless we have some pending changes 1195 * that need to go through commit 1196 */ 1197 if (!test_bit(BTRFS_FS_NEED_TRANS_COMMIT, 1198 &fs_info->flags)) 1199 return 0; 1200 /* 1201 * A non-blocking test if the fs is frozen. We must not 1202 * start a new transaction here otherwise a deadlock 1203 * happens. The pending operations are delayed to the 1204 * next commit after thawing. 1205 */ 1206 if (sb_start_write_trylock(sb)) 1207 sb_end_write(sb); 1208 else 1209 return 0; 1210 trans = btrfs_start_transaction(root, 0); 1211 } 1212 if (IS_ERR(trans)) 1213 return PTR_ERR(trans); 1214 } 1215 return btrfs_commit_transaction(trans); 1216 } 1217 1218 static void print_rescue_option(struct seq_file *seq, const char *s, bool *printed) 1219 { 1220 seq_printf(seq, "%s%s", (*printed) ? ":" : ",rescue=", s); 1221 *printed = true; 1222 } 1223 1224 static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) 1225 { 1226 struct btrfs_fs_info *info = btrfs_sb(dentry->d_sb); 1227 const char *compress_type; 1228 const char *subvol_name; 1229 bool printed = false; 1230 1231 if (btrfs_test_opt(info, DEGRADED)) 1232 seq_puts(seq, ",degraded"); 1233 if (btrfs_test_opt(info, NODATASUM)) 1234 seq_puts(seq, ",nodatasum"); 1235 if (btrfs_test_opt(info, NODATACOW)) 1236 seq_puts(seq, ",nodatacow"); 1237 if (btrfs_test_opt(info, NOBARRIER)) 1238 seq_puts(seq, ",nobarrier"); 1239 if (info->max_inline != BTRFS_DEFAULT_MAX_INLINE) 1240 seq_printf(seq, ",max_inline=%llu", info->max_inline); 1241 if (info->thread_pool_size != min_t(unsigned long, 1242 num_online_cpus() + 2, 8)) 1243 seq_printf(seq, ",thread_pool=%u", info->thread_pool_size); 1244 if (btrfs_test_opt(info, COMPRESS)) { 1245 compress_type = btrfs_compress_type2str(info->compress_type); 1246 if (btrfs_test_opt(info, FORCE_COMPRESS)) 1247 seq_printf(seq, ",compress-force=%s", compress_type); 1248 else 1249 seq_printf(seq, ",compress=%s", compress_type); 1250 if (info->compress_level) 1251 seq_printf(seq, ":%d", info->compress_level); 1252 } 1253 if (btrfs_test_opt(info, NOSSD)) 1254 seq_puts(seq, ",nossd"); 1255 if (btrfs_test_opt(info, SSD_SPREAD)) 1256 seq_puts(seq, ",ssd_spread"); 1257 else if (btrfs_test_opt(info, SSD)) 1258 seq_puts(seq, ",ssd"); 1259 if (btrfs_test_opt(info, NOTREELOG)) 1260 seq_puts(seq, ",notreelog"); 1261 if (btrfs_test_opt(info, NOLOGREPLAY)) 1262 print_rescue_option(seq, "nologreplay", &printed); 1263 if (btrfs_test_opt(info, USEBACKUPROOT)) 1264 print_rescue_option(seq, "usebackuproot", &printed); 1265 if (btrfs_test_opt(info, IGNOREBADROOTS)) 1266 print_rescue_option(seq, "ignorebadroots", &printed); 1267 if (btrfs_test_opt(info, IGNOREDATACSUMS)) 1268 print_rescue_option(seq, "ignoredatacsums", &printed); 1269 if (btrfs_test_opt(info, FLUSHONCOMMIT)) 1270 seq_puts(seq, ",flushoncommit"); 1271 if (btrfs_test_opt(info, DISCARD_SYNC)) 1272 seq_puts(seq, ",discard"); 1273 if (btrfs_test_opt(info, DISCARD_ASYNC)) 1274 seq_puts(seq, ",discard=async"); 1275 if (!(info->sb->s_flags & SB_POSIXACL)) 1276 seq_puts(seq, ",noacl"); 1277 if (btrfs_free_space_cache_v1_active(info)) 1278 seq_puts(seq, ",space_cache"); 1279 else if (btrfs_fs_compat_ro(info, FREE_SPACE_TREE)) 1280 seq_puts(seq, ",space_cache=v2"); 1281 else 1282 seq_puts(seq, ",nospace_cache"); 1283 if (btrfs_test_opt(info, RESCAN_UUID_TREE)) 1284 seq_puts(seq, ",rescan_uuid_tree"); 1285 if (btrfs_test_opt(info, CLEAR_CACHE)) 1286 seq_puts(seq, ",clear_cache"); 1287 if (btrfs_test_opt(info, USER_SUBVOL_RM_ALLOWED)) 1288 seq_puts(seq, ",user_subvol_rm_allowed"); 1289 if (btrfs_test_opt(info, ENOSPC_DEBUG)) 1290 seq_puts(seq, ",enospc_debug"); 1291 if (btrfs_test_opt(info, AUTO_DEFRAG)) 1292 seq_puts(seq, ",autodefrag"); 1293 if (btrfs_test_opt(info, SKIP_BALANCE)) 1294 seq_puts(seq, ",skip_balance"); 1295 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 1296 if (btrfs_test_opt(info, CHECK_INTEGRITY_DATA)) 1297 seq_puts(seq, ",check_int_data"); 1298 else if (btrfs_test_opt(info, CHECK_INTEGRITY)) 1299 seq_puts(seq, ",check_int"); 1300 if (info->check_integrity_print_mask) 1301 seq_printf(seq, ",check_int_print_mask=%d", 1302 info->check_integrity_print_mask); 1303 #endif 1304 if (info->metadata_ratio) 1305 seq_printf(seq, ",metadata_ratio=%u", info->metadata_ratio); 1306 if (btrfs_test_opt(info, PANIC_ON_FATAL_ERROR)) 1307 seq_puts(seq, ",fatal_errors=panic"); 1308 if (info->commit_interval != BTRFS_DEFAULT_COMMIT_INTERVAL) 1309 seq_printf(seq, ",commit=%u", info->commit_interval); 1310 #ifdef CONFIG_BTRFS_DEBUG 1311 if (btrfs_test_opt(info, FRAGMENT_DATA)) 1312 seq_puts(seq, ",fragment=data"); 1313 if (btrfs_test_opt(info, FRAGMENT_METADATA)) 1314 seq_puts(seq, ",fragment=metadata"); 1315 #endif 1316 if (btrfs_test_opt(info, REF_VERIFY)) 1317 seq_puts(seq, ",ref_verify"); 1318 seq_printf(seq, ",subvolid=%llu", 1319 BTRFS_I(d_inode(dentry))->root->root_key.objectid); 1320 subvol_name = btrfs_get_subvol_name_from_objectid(info, 1321 BTRFS_I(d_inode(dentry))->root->root_key.objectid); 1322 if (!IS_ERR(subvol_name)) { 1323 seq_puts(seq, ",subvol="); 1324 seq_escape(seq, subvol_name, " \t\n\\"); 1325 kfree(subvol_name); 1326 } 1327 return 0; 1328 } 1329 1330 static int btrfs_test_super(struct super_block *s, void *data) 1331 { 1332 struct btrfs_fs_info *p = data; 1333 struct btrfs_fs_info *fs_info = btrfs_sb(s); 1334 1335 return fs_info->fs_devices == p->fs_devices; 1336 } 1337 1338 static int btrfs_set_super(struct super_block *s, void *data) 1339 { 1340 int err = set_anon_super(s, data); 1341 if (!err) 1342 s->s_fs_info = data; 1343 return err; 1344 } 1345 1346 /* 1347 * subvolumes are identified by ino 256 1348 */ 1349 static inline int is_subvolume_inode(struct inode *inode) 1350 { 1351 if (inode && inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) 1352 return 1; 1353 return 0; 1354 } 1355 1356 static struct dentry *mount_subvol(const char *subvol_name, u64 subvol_objectid, 1357 struct vfsmount *mnt) 1358 { 1359 struct dentry *root; 1360 int ret; 1361 1362 if (!subvol_name) { 1363 if (!subvol_objectid) { 1364 ret = get_default_subvol_objectid(btrfs_sb(mnt->mnt_sb), 1365 &subvol_objectid); 1366 if (ret) { 1367 root = ERR_PTR(ret); 1368 goto out; 1369 } 1370 } 1371 subvol_name = btrfs_get_subvol_name_from_objectid( 1372 btrfs_sb(mnt->mnt_sb), subvol_objectid); 1373 if (IS_ERR(subvol_name)) { 1374 root = ERR_CAST(subvol_name); 1375 subvol_name = NULL; 1376 goto out; 1377 } 1378 1379 } 1380 1381 root = mount_subtree(mnt, subvol_name); 1382 /* mount_subtree() drops our reference on the vfsmount. */ 1383 mnt = NULL; 1384 1385 if (!IS_ERR(root)) { 1386 struct super_block *s = root->d_sb; 1387 struct btrfs_fs_info *fs_info = btrfs_sb(s); 1388 struct inode *root_inode = d_inode(root); 1389 u64 root_objectid = BTRFS_I(root_inode)->root->root_key.objectid; 1390 1391 ret = 0; 1392 if (!is_subvolume_inode(root_inode)) { 1393 btrfs_err(fs_info, "'%s' is not a valid subvolume", 1394 subvol_name); 1395 ret = -EINVAL; 1396 } 1397 if (subvol_objectid && root_objectid != subvol_objectid) { 1398 /* 1399 * This will also catch a race condition where a 1400 * subvolume which was passed by ID is renamed and 1401 * another subvolume is renamed over the old location. 1402 */ 1403 btrfs_err(fs_info, 1404 "subvol '%s' does not match subvolid %llu", 1405 subvol_name, subvol_objectid); 1406 ret = -EINVAL; 1407 } 1408 if (ret) { 1409 dput(root); 1410 root = ERR_PTR(ret); 1411 deactivate_locked_super(s); 1412 } 1413 } 1414 1415 out: 1416 mntput(mnt); 1417 kfree(subvol_name); 1418 return root; 1419 } 1420 1421 /* 1422 * Find a superblock for the given device / mount point. 1423 * 1424 * Note: This is based on mount_bdev from fs/super.c with a few additions 1425 * for multiple device setup. Make sure to keep it in sync. 1426 */ 1427 static struct dentry *btrfs_mount_root(struct file_system_type *fs_type, 1428 int flags, const char *device_name, void *data) 1429 { 1430 struct block_device *bdev = NULL; 1431 struct super_block *s; 1432 struct btrfs_device *device = NULL; 1433 struct btrfs_fs_devices *fs_devices = NULL; 1434 struct btrfs_fs_info *fs_info = NULL; 1435 void *new_sec_opts = NULL; 1436 fmode_t mode = FMODE_READ; 1437 int error = 0; 1438 1439 if (!(flags & SB_RDONLY)) 1440 mode |= FMODE_WRITE; 1441 1442 if (data) { 1443 error = security_sb_eat_lsm_opts(data, &new_sec_opts); 1444 if (error) 1445 return ERR_PTR(error); 1446 } 1447 1448 /* 1449 * Setup a dummy root and fs_info for test/set super. This is because 1450 * we don't actually fill this stuff out until open_ctree, but we need 1451 * then open_ctree will properly initialize the file system specific 1452 * settings later. btrfs_init_fs_info initializes the static elements 1453 * of the fs_info (locks and such) to make cleanup easier if we find a 1454 * superblock with our given fs_devices later on at sget() time. 1455 */ 1456 fs_info = kvzalloc(sizeof(struct btrfs_fs_info), GFP_KERNEL); 1457 if (!fs_info) { 1458 error = -ENOMEM; 1459 goto error_sec_opts; 1460 } 1461 btrfs_init_fs_info(fs_info); 1462 1463 fs_info->super_copy = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1464 fs_info->super_for_commit = kzalloc(BTRFS_SUPER_INFO_SIZE, GFP_KERNEL); 1465 if (!fs_info->super_copy || !fs_info->super_for_commit) { 1466 error = -ENOMEM; 1467 goto error_fs_info; 1468 } 1469 1470 mutex_lock(&uuid_mutex); 1471 error = btrfs_parse_device_options(data, mode, fs_type); 1472 if (error) { 1473 mutex_unlock(&uuid_mutex); 1474 goto error_fs_info; 1475 } 1476 1477 device = btrfs_scan_one_device(device_name, mode, fs_type); 1478 if (IS_ERR(device)) { 1479 mutex_unlock(&uuid_mutex); 1480 error = PTR_ERR(device); 1481 goto error_fs_info; 1482 } 1483 1484 fs_devices = device->fs_devices; 1485 fs_info->fs_devices = fs_devices; 1486 1487 error = btrfs_open_devices(fs_devices, mode, fs_type); 1488 mutex_unlock(&uuid_mutex); 1489 if (error) 1490 goto error_fs_info; 1491 1492 if (!(flags & SB_RDONLY) && fs_devices->rw_devices == 0) { 1493 error = -EACCES; 1494 goto error_close_devices; 1495 } 1496 1497 bdev = fs_devices->latest_dev->bdev; 1498 s = sget(fs_type, btrfs_test_super, btrfs_set_super, flags | SB_NOSEC, 1499 fs_info); 1500 if (IS_ERR(s)) { 1501 error = PTR_ERR(s); 1502 goto error_close_devices; 1503 } 1504 1505 if (s->s_root) { 1506 btrfs_close_devices(fs_devices); 1507 btrfs_free_fs_info(fs_info); 1508 if ((flags ^ s->s_flags) & SB_RDONLY) 1509 error = -EBUSY; 1510 } else { 1511 snprintf(s->s_id, sizeof(s->s_id), "%pg", bdev); 1512 shrinker_debugfs_rename(&s->s_shrink, "sb-%s:%s", fs_type->name, 1513 s->s_id); 1514 btrfs_sb(s)->bdev_holder = fs_type; 1515 if (!strstr(crc32c_impl(), "generic")) 1516 set_bit(BTRFS_FS_CSUM_IMPL_FAST, &fs_info->flags); 1517 error = btrfs_fill_super(s, fs_devices, data); 1518 } 1519 if (!error) 1520 error = security_sb_set_mnt_opts(s, new_sec_opts, 0, NULL); 1521 security_free_mnt_opts(&new_sec_opts); 1522 if (error) { 1523 deactivate_locked_super(s); 1524 return ERR_PTR(error); 1525 } 1526 1527 return dget(s->s_root); 1528 1529 error_close_devices: 1530 btrfs_close_devices(fs_devices); 1531 error_fs_info: 1532 btrfs_free_fs_info(fs_info); 1533 error_sec_opts: 1534 security_free_mnt_opts(&new_sec_opts); 1535 return ERR_PTR(error); 1536 } 1537 1538 /* 1539 * Mount function which is called by VFS layer. 1540 * 1541 * In order to allow mounting a subvolume directly, btrfs uses mount_subtree() 1542 * which needs vfsmount* of device's root (/). This means device's root has to 1543 * be mounted internally in any case. 1544 * 1545 * Operation flow: 1546 * 1. Parse subvol id related options for later use in mount_subvol(). 1547 * 1548 * 2. Mount device's root (/) by calling vfs_kern_mount(). 1549 * 1550 * NOTE: vfs_kern_mount() is used by VFS to call btrfs_mount() in the 1551 * first place. In order to avoid calling btrfs_mount() again, we use 1552 * different file_system_type which is not registered to VFS by 1553 * register_filesystem() (btrfs_root_fs_type). As a result, 1554 * btrfs_mount_root() is called. The return value will be used by 1555 * mount_subtree() in mount_subvol(). 1556 * 1557 * 3. Call mount_subvol() to get the dentry of subvolume. Since there is 1558 * "btrfs subvolume set-default", mount_subvol() is called always. 1559 */ 1560 static struct dentry *btrfs_mount(struct file_system_type *fs_type, int flags, 1561 const char *device_name, void *data) 1562 { 1563 struct vfsmount *mnt_root; 1564 struct dentry *root; 1565 char *subvol_name = NULL; 1566 u64 subvol_objectid = 0; 1567 int error = 0; 1568 1569 error = btrfs_parse_subvol_options(data, &subvol_name, 1570 &subvol_objectid); 1571 if (error) { 1572 kfree(subvol_name); 1573 return ERR_PTR(error); 1574 } 1575 1576 /* mount device's root (/) */ 1577 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, flags, device_name, data); 1578 if (PTR_ERR_OR_ZERO(mnt_root) == -EBUSY) { 1579 if (flags & SB_RDONLY) { 1580 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, 1581 flags & ~SB_RDONLY, device_name, data); 1582 } else { 1583 mnt_root = vfs_kern_mount(&btrfs_root_fs_type, 1584 flags | SB_RDONLY, device_name, data); 1585 if (IS_ERR(mnt_root)) { 1586 root = ERR_CAST(mnt_root); 1587 kfree(subvol_name); 1588 goto out; 1589 } 1590 1591 down_write(&mnt_root->mnt_sb->s_umount); 1592 error = btrfs_remount(mnt_root->mnt_sb, &flags, NULL); 1593 up_write(&mnt_root->mnt_sb->s_umount); 1594 if (error < 0) { 1595 root = ERR_PTR(error); 1596 mntput(mnt_root); 1597 kfree(subvol_name); 1598 goto out; 1599 } 1600 } 1601 } 1602 if (IS_ERR(mnt_root)) { 1603 root = ERR_CAST(mnt_root); 1604 kfree(subvol_name); 1605 goto out; 1606 } 1607 1608 /* mount_subvol() will free subvol_name and mnt_root */ 1609 root = mount_subvol(subvol_name, subvol_objectid, mnt_root); 1610 1611 out: 1612 return root; 1613 } 1614 1615 static void btrfs_resize_thread_pool(struct btrfs_fs_info *fs_info, 1616 u32 new_pool_size, u32 old_pool_size) 1617 { 1618 if (new_pool_size == old_pool_size) 1619 return; 1620 1621 fs_info->thread_pool_size = new_pool_size; 1622 1623 btrfs_info(fs_info, "resize thread pool %d -> %d", 1624 old_pool_size, new_pool_size); 1625 1626 btrfs_workqueue_set_max(fs_info->workers, new_pool_size); 1627 btrfs_workqueue_set_max(fs_info->hipri_workers, new_pool_size); 1628 btrfs_workqueue_set_max(fs_info->delalloc_workers, new_pool_size); 1629 btrfs_workqueue_set_max(fs_info->caching_workers, new_pool_size); 1630 btrfs_workqueue_set_max(fs_info->endio_write_workers, new_pool_size); 1631 btrfs_workqueue_set_max(fs_info->endio_freespace_worker, new_pool_size); 1632 btrfs_workqueue_set_max(fs_info->delayed_workers, new_pool_size); 1633 } 1634 1635 static inline void btrfs_remount_begin(struct btrfs_fs_info *fs_info, 1636 unsigned long old_opts, int flags) 1637 { 1638 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && 1639 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || 1640 (flags & SB_RDONLY))) { 1641 /* wait for any defraggers to finish */ 1642 wait_event(fs_info->transaction_wait, 1643 (atomic_read(&fs_info->defrag_running) == 0)); 1644 if (flags & SB_RDONLY) 1645 sync_filesystem(fs_info->sb); 1646 } 1647 } 1648 1649 static inline void btrfs_remount_cleanup(struct btrfs_fs_info *fs_info, 1650 unsigned long old_opts) 1651 { 1652 const bool cache_opt = btrfs_test_opt(fs_info, SPACE_CACHE); 1653 1654 /* 1655 * We need to cleanup all defragable inodes if the autodefragment is 1656 * close or the filesystem is read only. 1657 */ 1658 if (btrfs_raw_test_opt(old_opts, AUTO_DEFRAG) && 1659 (!btrfs_raw_test_opt(fs_info->mount_opt, AUTO_DEFRAG) || sb_rdonly(fs_info->sb))) { 1660 btrfs_cleanup_defrag_inodes(fs_info); 1661 } 1662 1663 /* If we toggled discard async */ 1664 if (!btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && 1665 btrfs_test_opt(fs_info, DISCARD_ASYNC)) 1666 btrfs_discard_resume(fs_info); 1667 else if (btrfs_raw_test_opt(old_opts, DISCARD_ASYNC) && 1668 !btrfs_test_opt(fs_info, DISCARD_ASYNC)) 1669 btrfs_discard_cleanup(fs_info); 1670 1671 /* If we toggled space cache */ 1672 if (cache_opt != btrfs_free_space_cache_v1_active(fs_info)) 1673 btrfs_set_free_space_cache_v1_active(fs_info, cache_opt); 1674 } 1675 1676 static int btrfs_remount(struct super_block *sb, int *flags, char *data) 1677 { 1678 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 1679 unsigned old_flags = sb->s_flags; 1680 unsigned long old_opts = fs_info->mount_opt; 1681 unsigned long old_compress_type = fs_info->compress_type; 1682 u64 old_max_inline = fs_info->max_inline; 1683 u32 old_thread_pool_size = fs_info->thread_pool_size; 1684 u32 old_metadata_ratio = fs_info->metadata_ratio; 1685 int ret; 1686 1687 sync_filesystem(sb); 1688 set_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1689 1690 if (data) { 1691 void *new_sec_opts = NULL; 1692 1693 ret = security_sb_eat_lsm_opts(data, &new_sec_opts); 1694 if (!ret) 1695 ret = security_sb_remount(sb, new_sec_opts); 1696 security_free_mnt_opts(&new_sec_opts); 1697 if (ret) 1698 goto restore; 1699 } 1700 1701 ret = btrfs_parse_options(fs_info, data, *flags); 1702 if (ret) 1703 goto restore; 1704 1705 ret = btrfs_check_features(fs_info, sb); 1706 if (ret < 0) 1707 goto restore; 1708 1709 btrfs_remount_begin(fs_info, old_opts, *flags); 1710 btrfs_resize_thread_pool(fs_info, 1711 fs_info->thread_pool_size, old_thread_pool_size); 1712 1713 if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) != 1714 (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) && 1715 (!sb_rdonly(sb) || (*flags & SB_RDONLY))) { 1716 btrfs_warn(fs_info, 1717 "remount supports changing free space tree only from ro to rw"); 1718 /* Make sure free space cache options match the state on disk */ 1719 if (btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) { 1720 btrfs_set_opt(fs_info->mount_opt, FREE_SPACE_TREE); 1721 btrfs_clear_opt(fs_info->mount_opt, SPACE_CACHE); 1722 } 1723 if (btrfs_free_space_cache_v1_active(fs_info)) { 1724 btrfs_clear_opt(fs_info->mount_opt, FREE_SPACE_TREE); 1725 btrfs_set_opt(fs_info->mount_opt, SPACE_CACHE); 1726 } 1727 } 1728 1729 if ((bool)(*flags & SB_RDONLY) == sb_rdonly(sb)) 1730 goto out; 1731 1732 if (*flags & SB_RDONLY) { 1733 /* 1734 * this also happens on 'umount -rf' or on shutdown, when 1735 * the filesystem is busy. 1736 */ 1737 cancel_work_sync(&fs_info->async_reclaim_work); 1738 cancel_work_sync(&fs_info->async_data_reclaim_work); 1739 1740 btrfs_discard_cleanup(fs_info); 1741 1742 /* wait for the uuid_scan task to finish */ 1743 down(&fs_info->uuid_tree_rescan_sem); 1744 /* avoid complains from lockdep et al. */ 1745 up(&fs_info->uuid_tree_rescan_sem); 1746 1747 btrfs_set_sb_rdonly(sb); 1748 1749 /* 1750 * Setting SB_RDONLY will put the cleaner thread to 1751 * sleep at the next loop if it's already active. 1752 * If it's already asleep, we'll leave unused block 1753 * groups on disk until we're mounted read-write again 1754 * unless we clean them up here. 1755 */ 1756 btrfs_delete_unused_bgs(fs_info); 1757 1758 /* 1759 * The cleaner task could be already running before we set the 1760 * flag BTRFS_FS_STATE_RO (and SB_RDONLY in the superblock). 1761 * We must make sure that after we finish the remount, i.e. after 1762 * we call btrfs_commit_super(), the cleaner can no longer start 1763 * a transaction - either because it was dropping a dead root, 1764 * running delayed iputs or deleting an unused block group (the 1765 * cleaner picked a block group from the list of unused block 1766 * groups before we were able to in the previous call to 1767 * btrfs_delete_unused_bgs()). 1768 */ 1769 wait_on_bit(&fs_info->flags, BTRFS_FS_CLEANER_RUNNING, 1770 TASK_UNINTERRUPTIBLE); 1771 1772 /* 1773 * We've set the superblock to RO mode, so we might have made 1774 * the cleaner task sleep without running all pending delayed 1775 * iputs. Go through all the delayed iputs here, so that if an 1776 * unmount happens without remounting RW we don't end up at 1777 * finishing close_ctree() with a non-empty list of delayed 1778 * iputs. 1779 */ 1780 btrfs_run_delayed_iputs(fs_info); 1781 1782 btrfs_dev_replace_suspend_for_unmount(fs_info); 1783 btrfs_scrub_cancel(fs_info); 1784 btrfs_pause_balance(fs_info); 1785 1786 /* 1787 * Pause the qgroup rescan worker if it is running. We don't want 1788 * it to be still running after we are in RO mode, as after that, 1789 * by the time we unmount, it might have left a transaction open, 1790 * so we would leak the transaction and/or crash. 1791 */ 1792 btrfs_qgroup_wait_for_completion(fs_info, false); 1793 1794 ret = btrfs_commit_super(fs_info); 1795 if (ret) 1796 goto restore; 1797 } else { 1798 if (BTRFS_FS_ERROR(fs_info)) { 1799 btrfs_err(fs_info, 1800 "Remounting read-write after error is not allowed"); 1801 ret = -EINVAL; 1802 goto restore; 1803 } 1804 if (fs_info->fs_devices->rw_devices == 0) { 1805 ret = -EACCES; 1806 goto restore; 1807 } 1808 1809 if (!btrfs_check_rw_degradable(fs_info, NULL)) { 1810 btrfs_warn(fs_info, 1811 "too many missing devices, writable remount is not allowed"); 1812 ret = -EACCES; 1813 goto restore; 1814 } 1815 1816 if (btrfs_super_log_root(fs_info->super_copy) != 0) { 1817 btrfs_warn(fs_info, 1818 "mount required to replay tree-log, cannot remount read-write"); 1819 ret = -EINVAL; 1820 goto restore; 1821 } 1822 1823 /* 1824 * NOTE: when remounting with a change that does writes, don't 1825 * put it anywhere above this point, as we are not sure to be 1826 * safe to write until we pass the above checks. 1827 */ 1828 ret = btrfs_start_pre_rw_mount(fs_info); 1829 if (ret) 1830 goto restore; 1831 1832 btrfs_clear_sb_rdonly(sb); 1833 1834 set_bit(BTRFS_FS_OPEN, &fs_info->flags); 1835 } 1836 out: 1837 /* 1838 * We need to set SB_I_VERSION here otherwise it'll get cleared by VFS, 1839 * since the absence of the flag means it can be toggled off by remount. 1840 */ 1841 *flags |= SB_I_VERSION; 1842 1843 wake_up_process(fs_info->transaction_kthread); 1844 btrfs_remount_cleanup(fs_info, old_opts); 1845 btrfs_clear_oneshot_options(fs_info); 1846 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1847 1848 return 0; 1849 1850 restore: 1851 /* We've hit an error - don't reset SB_RDONLY */ 1852 if (sb_rdonly(sb)) 1853 old_flags |= SB_RDONLY; 1854 if (!(old_flags & SB_RDONLY)) 1855 clear_bit(BTRFS_FS_STATE_RO, &fs_info->fs_state); 1856 sb->s_flags = old_flags; 1857 fs_info->mount_opt = old_opts; 1858 fs_info->compress_type = old_compress_type; 1859 fs_info->max_inline = old_max_inline; 1860 btrfs_resize_thread_pool(fs_info, 1861 old_thread_pool_size, fs_info->thread_pool_size); 1862 fs_info->metadata_ratio = old_metadata_ratio; 1863 btrfs_remount_cleanup(fs_info, old_opts); 1864 clear_bit(BTRFS_FS_STATE_REMOUNTING, &fs_info->fs_state); 1865 1866 return ret; 1867 } 1868 1869 /* Used to sort the devices by max_avail(descending sort) */ 1870 static int btrfs_cmp_device_free_bytes(const void *a, const void *b) 1871 { 1872 const struct btrfs_device_info *dev_info1 = a; 1873 const struct btrfs_device_info *dev_info2 = b; 1874 1875 if (dev_info1->max_avail > dev_info2->max_avail) 1876 return -1; 1877 else if (dev_info1->max_avail < dev_info2->max_avail) 1878 return 1; 1879 return 0; 1880 } 1881 1882 /* 1883 * sort the devices by max_avail, in which max free extent size of each device 1884 * is stored.(Descending Sort) 1885 */ 1886 static inline void btrfs_descending_sort_devices( 1887 struct btrfs_device_info *devices, 1888 size_t nr_devices) 1889 { 1890 sort(devices, nr_devices, sizeof(struct btrfs_device_info), 1891 btrfs_cmp_device_free_bytes, NULL); 1892 } 1893 1894 /* 1895 * The helper to calc the free space on the devices that can be used to store 1896 * file data. 1897 */ 1898 static inline int btrfs_calc_avail_data_space(struct btrfs_fs_info *fs_info, 1899 u64 *free_bytes) 1900 { 1901 struct btrfs_device_info *devices_info; 1902 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 1903 struct btrfs_device *device; 1904 u64 type; 1905 u64 avail_space; 1906 u64 min_stripe_size; 1907 int num_stripes = 1; 1908 int i = 0, nr_devices; 1909 const struct btrfs_raid_attr *rattr; 1910 1911 /* 1912 * We aren't under the device list lock, so this is racy-ish, but good 1913 * enough for our purposes. 1914 */ 1915 nr_devices = fs_info->fs_devices->open_devices; 1916 if (!nr_devices) { 1917 smp_mb(); 1918 nr_devices = fs_info->fs_devices->open_devices; 1919 ASSERT(nr_devices); 1920 if (!nr_devices) { 1921 *free_bytes = 0; 1922 return 0; 1923 } 1924 } 1925 1926 devices_info = kmalloc_array(nr_devices, sizeof(*devices_info), 1927 GFP_KERNEL); 1928 if (!devices_info) 1929 return -ENOMEM; 1930 1931 /* calc min stripe number for data space allocation */ 1932 type = btrfs_data_alloc_profile(fs_info); 1933 rattr = &btrfs_raid_array[btrfs_bg_flags_to_raid_index(type)]; 1934 1935 if (type & BTRFS_BLOCK_GROUP_RAID0) 1936 num_stripes = nr_devices; 1937 else if (type & BTRFS_BLOCK_GROUP_RAID1_MASK) 1938 num_stripes = rattr->ncopies; 1939 else if (type & BTRFS_BLOCK_GROUP_RAID10) 1940 num_stripes = 4; 1941 1942 /* Adjust for more than 1 stripe per device */ 1943 min_stripe_size = rattr->dev_stripes * BTRFS_STRIPE_LEN; 1944 1945 rcu_read_lock(); 1946 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { 1947 if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, 1948 &device->dev_state) || 1949 !device->bdev || 1950 test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) 1951 continue; 1952 1953 if (i >= nr_devices) 1954 break; 1955 1956 avail_space = device->total_bytes - device->bytes_used; 1957 1958 /* align with stripe_len */ 1959 avail_space = rounddown(avail_space, BTRFS_STRIPE_LEN); 1960 1961 /* 1962 * Ensure we have at least min_stripe_size on top of the 1963 * reserved space on the device. 1964 */ 1965 if (avail_space <= BTRFS_DEVICE_RANGE_RESERVED + min_stripe_size) 1966 continue; 1967 1968 avail_space -= BTRFS_DEVICE_RANGE_RESERVED; 1969 1970 devices_info[i].dev = device; 1971 devices_info[i].max_avail = avail_space; 1972 1973 i++; 1974 } 1975 rcu_read_unlock(); 1976 1977 nr_devices = i; 1978 1979 btrfs_descending_sort_devices(devices_info, nr_devices); 1980 1981 i = nr_devices - 1; 1982 avail_space = 0; 1983 while (nr_devices >= rattr->devs_min) { 1984 num_stripes = min(num_stripes, nr_devices); 1985 1986 if (devices_info[i].max_avail >= min_stripe_size) { 1987 int j; 1988 u64 alloc_size; 1989 1990 avail_space += devices_info[i].max_avail * num_stripes; 1991 alloc_size = devices_info[i].max_avail; 1992 for (j = i + 1 - num_stripes; j <= i; j++) 1993 devices_info[j].max_avail -= alloc_size; 1994 } 1995 i--; 1996 nr_devices--; 1997 } 1998 1999 kfree(devices_info); 2000 *free_bytes = avail_space; 2001 return 0; 2002 } 2003 2004 /* 2005 * Calculate numbers for 'df', pessimistic in case of mixed raid profiles. 2006 * 2007 * If there's a redundant raid level at DATA block groups, use the respective 2008 * multiplier to scale the sizes. 2009 * 2010 * Unused device space usage is based on simulating the chunk allocator 2011 * algorithm that respects the device sizes and order of allocations. This is 2012 * a close approximation of the actual use but there are other factors that may 2013 * change the result (like a new metadata chunk). 2014 * 2015 * If metadata is exhausted, f_bavail will be 0. 2016 */ 2017 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) 2018 { 2019 struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb); 2020 struct btrfs_super_block *disk_super = fs_info->super_copy; 2021 struct btrfs_space_info *found; 2022 u64 total_used = 0; 2023 u64 total_free_data = 0; 2024 u64 total_free_meta = 0; 2025 u32 bits = fs_info->sectorsize_bits; 2026 __be32 *fsid = (__be32 *)fs_info->fs_devices->fsid; 2027 unsigned factor = 1; 2028 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 2029 int ret; 2030 u64 thresh = 0; 2031 int mixed = 0; 2032 2033 list_for_each_entry(found, &fs_info->space_info, list) { 2034 if (found->flags & BTRFS_BLOCK_GROUP_DATA) { 2035 int i; 2036 2037 total_free_data += found->disk_total - found->disk_used; 2038 total_free_data -= 2039 btrfs_account_ro_block_groups_free_space(found); 2040 2041 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) { 2042 if (!list_empty(&found->block_groups[i])) 2043 factor = btrfs_bg_type_to_factor( 2044 btrfs_raid_array[i].bg_flag); 2045 } 2046 } 2047 2048 /* 2049 * Metadata in mixed block goup profiles are accounted in data 2050 */ 2051 if (!mixed && found->flags & BTRFS_BLOCK_GROUP_METADATA) { 2052 if (found->flags & BTRFS_BLOCK_GROUP_DATA) 2053 mixed = 1; 2054 else 2055 total_free_meta += found->disk_total - 2056 found->disk_used; 2057 } 2058 2059 total_used += found->disk_used; 2060 } 2061 2062 buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor); 2063 buf->f_blocks >>= bits; 2064 buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits); 2065 2066 /* Account global block reserve as used, it's in logical size already */ 2067 spin_lock(&block_rsv->lock); 2068 /* Mixed block groups accounting is not byte-accurate, avoid overflow */ 2069 if (buf->f_bfree >= block_rsv->size >> bits) 2070 buf->f_bfree -= block_rsv->size >> bits; 2071 else 2072 buf->f_bfree = 0; 2073 spin_unlock(&block_rsv->lock); 2074 2075 buf->f_bavail = div_u64(total_free_data, factor); 2076 ret = btrfs_calc_avail_data_space(fs_info, &total_free_data); 2077 if (ret) 2078 return ret; 2079 buf->f_bavail += div_u64(total_free_data, factor); 2080 buf->f_bavail = buf->f_bavail >> bits; 2081 2082 /* 2083 * We calculate the remaining metadata space minus global reserve. If 2084 * this is (supposedly) smaller than zero, there's no space. But this 2085 * does not hold in practice, the exhausted state happens where's still 2086 * some positive delta. So we apply some guesswork and compare the 2087 * delta to a 4M threshold. (Practically observed delta was ~2M.) 2088 * 2089 * We probably cannot calculate the exact threshold value because this 2090 * depends on the internal reservations requested by various 2091 * operations, so some operations that consume a few metadata will 2092 * succeed even if the Avail is zero. But this is better than the other 2093 * way around. 2094 */ 2095 thresh = SZ_4M; 2096 2097 /* 2098 * We only want to claim there's no available space if we can no longer 2099 * allocate chunks for our metadata profile and our global reserve will 2100 * not fit in the free metadata space. If we aren't ->full then we 2101 * still can allocate chunks and thus are fine using the currently 2102 * calculated f_bavail. 2103 */ 2104 if (!mixed && block_rsv->space_info->full && 2105 total_free_meta - thresh < block_rsv->size) 2106 buf->f_bavail = 0; 2107 2108 buf->f_type = BTRFS_SUPER_MAGIC; 2109 buf->f_bsize = dentry->d_sb->s_blocksize; 2110 buf->f_namelen = BTRFS_NAME_LEN; 2111 2112 /* We treat it as constant endianness (it doesn't matter _which_) 2113 because we want the fsid to come out the same whether mounted 2114 on a big-endian or little-endian host */ 2115 buf->f_fsid.val[0] = be32_to_cpu(fsid[0]) ^ be32_to_cpu(fsid[2]); 2116 buf->f_fsid.val[1] = be32_to_cpu(fsid[1]) ^ be32_to_cpu(fsid[3]); 2117 /* Mask in the root object ID too, to disambiguate subvols */ 2118 buf->f_fsid.val[0] ^= 2119 BTRFS_I(d_inode(dentry))->root->root_key.objectid >> 32; 2120 buf->f_fsid.val[1] ^= 2121 BTRFS_I(d_inode(dentry))->root->root_key.objectid; 2122 2123 return 0; 2124 } 2125 2126 static void btrfs_kill_super(struct super_block *sb) 2127 { 2128 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2129 kill_anon_super(sb); 2130 btrfs_free_fs_info(fs_info); 2131 } 2132 2133 static struct file_system_type btrfs_fs_type = { 2134 .owner = THIS_MODULE, 2135 .name = "btrfs", 2136 .mount = btrfs_mount, 2137 .kill_sb = btrfs_kill_super, 2138 .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA, 2139 }; 2140 2141 static struct file_system_type btrfs_root_fs_type = { 2142 .owner = THIS_MODULE, 2143 .name = "btrfs", 2144 .mount = btrfs_mount_root, 2145 .kill_sb = btrfs_kill_super, 2146 .fs_flags = FS_REQUIRES_DEV | FS_BINARY_MOUNTDATA | FS_ALLOW_IDMAP, 2147 }; 2148 2149 MODULE_ALIAS_FS("btrfs"); 2150 2151 static int btrfs_control_open(struct inode *inode, struct file *file) 2152 { 2153 /* 2154 * The control file's private_data is used to hold the 2155 * transaction when it is started and is used to keep 2156 * track of whether a transaction is already in progress. 2157 */ 2158 file->private_data = NULL; 2159 return 0; 2160 } 2161 2162 /* 2163 * Used by /dev/btrfs-control for devices ioctls. 2164 */ 2165 static long btrfs_control_ioctl(struct file *file, unsigned int cmd, 2166 unsigned long arg) 2167 { 2168 struct btrfs_ioctl_vol_args *vol; 2169 struct btrfs_device *device = NULL; 2170 dev_t devt = 0; 2171 int ret = -ENOTTY; 2172 2173 if (!capable(CAP_SYS_ADMIN)) 2174 return -EPERM; 2175 2176 vol = memdup_user((void __user *)arg, sizeof(*vol)); 2177 if (IS_ERR(vol)) 2178 return PTR_ERR(vol); 2179 vol->name[BTRFS_PATH_NAME_MAX] = '\0'; 2180 2181 switch (cmd) { 2182 case BTRFS_IOC_SCAN_DEV: 2183 mutex_lock(&uuid_mutex); 2184 device = btrfs_scan_one_device(vol->name, FMODE_READ, 2185 &btrfs_root_fs_type); 2186 ret = PTR_ERR_OR_ZERO(device); 2187 mutex_unlock(&uuid_mutex); 2188 break; 2189 case BTRFS_IOC_FORGET_DEV: 2190 if (vol->name[0] != 0) { 2191 ret = lookup_bdev(vol->name, &devt); 2192 if (ret) 2193 break; 2194 } 2195 ret = btrfs_forget_devices(devt); 2196 break; 2197 case BTRFS_IOC_DEVICES_READY: 2198 mutex_lock(&uuid_mutex); 2199 device = btrfs_scan_one_device(vol->name, FMODE_READ, 2200 &btrfs_root_fs_type); 2201 if (IS_ERR(device)) { 2202 mutex_unlock(&uuid_mutex); 2203 ret = PTR_ERR(device); 2204 break; 2205 } 2206 ret = !(device->fs_devices->num_devices == 2207 device->fs_devices->total_devices); 2208 mutex_unlock(&uuid_mutex); 2209 break; 2210 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 2211 ret = btrfs_ioctl_get_supported_features((void __user*)arg); 2212 break; 2213 } 2214 2215 kfree(vol); 2216 return ret; 2217 } 2218 2219 static int btrfs_freeze(struct super_block *sb) 2220 { 2221 struct btrfs_trans_handle *trans; 2222 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2223 struct btrfs_root *root = fs_info->tree_root; 2224 2225 set_bit(BTRFS_FS_FROZEN, &fs_info->flags); 2226 /* 2227 * We don't need a barrier here, we'll wait for any transaction that 2228 * could be in progress on other threads (and do delayed iputs that 2229 * we want to avoid on a frozen filesystem), or do the commit 2230 * ourselves. 2231 */ 2232 trans = btrfs_attach_transaction_barrier(root); 2233 if (IS_ERR(trans)) { 2234 /* no transaction, don't bother */ 2235 if (PTR_ERR(trans) == -ENOENT) 2236 return 0; 2237 return PTR_ERR(trans); 2238 } 2239 return btrfs_commit_transaction(trans); 2240 } 2241 2242 static int check_dev_super(struct btrfs_device *dev) 2243 { 2244 struct btrfs_fs_info *fs_info = dev->fs_info; 2245 struct btrfs_super_block *sb; 2246 u16 csum_type; 2247 int ret = 0; 2248 2249 /* This should be called with fs still frozen. */ 2250 ASSERT(test_bit(BTRFS_FS_FROZEN, &fs_info->flags)); 2251 2252 /* Missing dev, no need to check. */ 2253 if (!dev->bdev) 2254 return 0; 2255 2256 /* Only need to check the primary super block. */ 2257 sb = btrfs_read_dev_one_super(dev->bdev, 0, true); 2258 if (IS_ERR(sb)) 2259 return PTR_ERR(sb); 2260 2261 /* Verify the checksum. */ 2262 csum_type = btrfs_super_csum_type(sb); 2263 if (csum_type != btrfs_super_csum_type(fs_info->super_copy)) { 2264 btrfs_err(fs_info, "csum type changed, has %u expect %u", 2265 csum_type, btrfs_super_csum_type(fs_info->super_copy)); 2266 ret = -EUCLEAN; 2267 goto out; 2268 } 2269 2270 if (btrfs_check_super_csum(fs_info, sb)) { 2271 btrfs_err(fs_info, "csum for on-disk super block no longer matches"); 2272 ret = -EUCLEAN; 2273 goto out; 2274 } 2275 2276 /* Btrfs_validate_super() includes fsid check against super->fsid. */ 2277 ret = btrfs_validate_super(fs_info, sb, 0); 2278 if (ret < 0) 2279 goto out; 2280 2281 if (btrfs_super_generation(sb) != fs_info->last_trans_committed) { 2282 btrfs_err(fs_info, "transid mismatch, has %llu expect %llu", 2283 btrfs_super_generation(sb), 2284 fs_info->last_trans_committed); 2285 ret = -EUCLEAN; 2286 goto out; 2287 } 2288 out: 2289 btrfs_release_disk_super(sb); 2290 return ret; 2291 } 2292 2293 static int btrfs_unfreeze(struct super_block *sb) 2294 { 2295 struct btrfs_fs_info *fs_info = btrfs_sb(sb); 2296 struct btrfs_device *device; 2297 int ret = 0; 2298 2299 /* 2300 * Make sure the fs is not changed by accident (like hibernation then 2301 * modified by other OS). 2302 * If we found anything wrong, we mark the fs error immediately. 2303 * 2304 * And since the fs is frozen, no one can modify the fs yet, thus 2305 * we don't need to hold device_list_mutex. 2306 */ 2307 list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) { 2308 ret = check_dev_super(device); 2309 if (ret < 0) { 2310 btrfs_handle_fs_error(fs_info, ret, 2311 "super block on devid %llu got modified unexpectedly", 2312 device->devid); 2313 break; 2314 } 2315 } 2316 clear_bit(BTRFS_FS_FROZEN, &fs_info->flags); 2317 2318 /* 2319 * We still return 0, to allow VFS layer to unfreeze the fs even the 2320 * above checks failed. Since the fs is either fine or read-only, we're 2321 * safe to continue, without causing further damage. 2322 */ 2323 return 0; 2324 } 2325 2326 static int btrfs_show_devname(struct seq_file *m, struct dentry *root) 2327 { 2328 struct btrfs_fs_info *fs_info = btrfs_sb(root->d_sb); 2329 2330 /* 2331 * There should be always a valid pointer in latest_dev, it may be stale 2332 * for a short moment in case it's being deleted but still valid until 2333 * the end of RCU grace period. 2334 */ 2335 rcu_read_lock(); 2336 seq_escape(m, rcu_str_deref(fs_info->fs_devices->latest_dev->name), " \t\n\\"); 2337 rcu_read_unlock(); 2338 2339 return 0; 2340 } 2341 2342 static const struct super_operations btrfs_super_ops = { 2343 .drop_inode = btrfs_drop_inode, 2344 .evict_inode = btrfs_evict_inode, 2345 .put_super = btrfs_put_super, 2346 .sync_fs = btrfs_sync_fs, 2347 .show_options = btrfs_show_options, 2348 .show_devname = btrfs_show_devname, 2349 .alloc_inode = btrfs_alloc_inode, 2350 .destroy_inode = btrfs_destroy_inode, 2351 .free_inode = btrfs_free_inode, 2352 .statfs = btrfs_statfs, 2353 .remount_fs = btrfs_remount, 2354 .freeze_fs = btrfs_freeze, 2355 .unfreeze_fs = btrfs_unfreeze, 2356 }; 2357 2358 static const struct file_operations btrfs_ctl_fops = { 2359 .open = btrfs_control_open, 2360 .unlocked_ioctl = btrfs_control_ioctl, 2361 .compat_ioctl = compat_ptr_ioctl, 2362 .owner = THIS_MODULE, 2363 .llseek = noop_llseek, 2364 }; 2365 2366 static struct miscdevice btrfs_misc = { 2367 .minor = BTRFS_MINOR, 2368 .name = "btrfs-control", 2369 .fops = &btrfs_ctl_fops 2370 }; 2371 2372 MODULE_ALIAS_MISCDEV(BTRFS_MINOR); 2373 MODULE_ALIAS("devname:btrfs-control"); 2374 2375 static int __init btrfs_interface_init(void) 2376 { 2377 return misc_register(&btrfs_misc); 2378 } 2379 2380 static __cold void btrfs_interface_exit(void) 2381 { 2382 misc_deregister(&btrfs_misc); 2383 } 2384 2385 static int __init btrfs_print_mod_info(void) 2386 { 2387 static const char options[] = "" 2388 #ifdef CONFIG_BTRFS_DEBUG 2389 ", debug=on" 2390 #endif 2391 #ifdef CONFIG_BTRFS_ASSERT 2392 ", assert=on" 2393 #endif 2394 #ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY 2395 ", integrity-checker=on" 2396 #endif 2397 #ifdef CONFIG_BTRFS_FS_REF_VERIFY 2398 ", ref-verify=on" 2399 #endif 2400 #ifdef CONFIG_BLK_DEV_ZONED 2401 ", zoned=yes" 2402 #else 2403 ", zoned=no" 2404 #endif 2405 #ifdef CONFIG_FS_VERITY 2406 ", fsverity=yes" 2407 #else 2408 ", fsverity=no" 2409 #endif 2410 ; 2411 pr_info("Btrfs loaded, crc32c=%s%s\n", crc32c_impl(), options); 2412 return 0; 2413 } 2414 2415 static int register_btrfs(void) 2416 { 2417 return register_filesystem(&btrfs_fs_type); 2418 } 2419 2420 static void unregister_btrfs(void) 2421 { 2422 unregister_filesystem(&btrfs_fs_type); 2423 } 2424 2425 /* Helper structure for long init/exit functions. */ 2426 struct init_sequence { 2427 int (*init_func)(void); 2428 /* Can be NULL if the init_func doesn't need cleanup. */ 2429 void (*exit_func)(void); 2430 }; 2431 2432 static const struct init_sequence mod_init_seq[] = { 2433 { 2434 .init_func = btrfs_props_init, 2435 .exit_func = NULL, 2436 }, { 2437 .init_func = btrfs_init_sysfs, 2438 .exit_func = btrfs_exit_sysfs, 2439 }, { 2440 .init_func = btrfs_init_compress, 2441 .exit_func = btrfs_exit_compress, 2442 }, { 2443 .init_func = btrfs_init_cachep, 2444 .exit_func = btrfs_destroy_cachep, 2445 }, { 2446 .init_func = btrfs_transaction_init, 2447 .exit_func = btrfs_transaction_exit, 2448 }, { 2449 .init_func = btrfs_ctree_init, 2450 .exit_func = btrfs_ctree_exit, 2451 }, { 2452 .init_func = btrfs_free_space_init, 2453 .exit_func = btrfs_free_space_exit, 2454 }, { 2455 .init_func = extent_state_init_cachep, 2456 .exit_func = extent_state_free_cachep, 2457 }, { 2458 .init_func = extent_buffer_init_cachep, 2459 .exit_func = extent_buffer_free_cachep, 2460 }, { 2461 .init_func = btrfs_bioset_init, 2462 .exit_func = btrfs_bioset_exit, 2463 }, { 2464 .init_func = extent_map_init, 2465 .exit_func = extent_map_exit, 2466 }, { 2467 .init_func = ordered_data_init, 2468 .exit_func = ordered_data_exit, 2469 }, { 2470 .init_func = btrfs_delayed_inode_init, 2471 .exit_func = btrfs_delayed_inode_exit, 2472 }, { 2473 .init_func = btrfs_auto_defrag_init, 2474 .exit_func = btrfs_auto_defrag_exit, 2475 }, { 2476 .init_func = btrfs_delayed_ref_init, 2477 .exit_func = btrfs_delayed_ref_exit, 2478 }, { 2479 .init_func = btrfs_prelim_ref_init, 2480 .exit_func = btrfs_prelim_ref_exit, 2481 }, { 2482 .init_func = btrfs_interface_init, 2483 .exit_func = btrfs_interface_exit, 2484 }, { 2485 .init_func = btrfs_print_mod_info, 2486 .exit_func = NULL, 2487 }, { 2488 .init_func = btrfs_run_sanity_tests, 2489 .exit_func = NULL, 2490 }, { 2491 .init_func = register_btrfs, 2492 .exit_func = unregister_btrfs, 2493 } 2494 }; 2495 2496 static bool mod_init_result[ARRAY_SIZE(mod_init_seq)]; 2497 2498 static __always_inline void btrfs_exit_btrfs_fs(void) 2499 { 2500 int i; 2501 2502 for (i = ARRAY_SIZE(mod_init_seq) - 1; i >= 0; i--) { 2503 if (!mod_init_result[i]) 2504 continue; 2505 if (mod_init_seq[i].exit_func) 2506 mod_init_seq[i].exit_func(); 2507 mod_init_result[i] = false; 2508 } 2509 } 2510 2511 static void __exit exit_btrfs_fs(void) 2512 { 2513 btrfs_exit_btrfs_fs(); 2514 } 2515 2516 static int __init init_btrfs_fs(void) 2517 { 2518 int ret; 2519 int i; 2520 2521 for (i = 0; i < ARRAY_SIZE(mod_init_seq); i++) { 2522 ASSERT(!mod_init_result[i]); 2523 ret = mod_init_seq[i].init_func(); 2524 if (ret < 0) { 2525 btrfs_exit_btrfs_fs(); 2526 return ret; 2527 } 2528 mod_init_result[i] = true; 2529 } 2530 return 0; 2531 } 2532 2533 late_initcall(init_btrfs_fs); 2534 module_exit(exit_btrfs_fs) 2535 2536 MODULE_LICENSE("GPL"); 2537 MODULE_SOFTDEP("pre: crc32c"); 2538 MODULE_SOFTDEP("pre: xxhash64"); 2539 MODULE_SOFTDEP("pre: sha256"); 2540 MODULE_SOFTDEP("pre: blake2b-256"); 2541