1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2007 Oracle. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/bio.h> 8 #include <linux/file.h> 9 #include <linux/fs.h> 10 #include <linux/fsnotify.h> 11 #include <linux/pagemap.h> 12 #include <linux/highmem.h> 13 #include <linux/time.h> 14 #include <linux/string.h> 15 #include <linux/backing-dev.h> 16 #include <linux/mount.h> 17 #include <linux/namei.h> 18 #include <linux/writeback.h> 19 #include <linux/compat.h> 20 #include <linux/security.h> 21 #include <linux/xattr.h> 22 #include <linux/mm.h> 23 #include <linux/slab.h> 24 #include <linux/blkdev.h> 25 #include <linux/uuid.h> 26 #include <linux/btrfs.h> 27 #include <linux/uaccess.h> 28 #include <linux/iversion.h> 29 #include "ctree.h" 30 #include "disk-io.h" 31 #include "transaction.h" 32 #include "btrfs_inode.h" 33 #include "print-tree.h" 34 #include "volumes.h" 35 #include "locking.h" 36 #include "inode-map.h" 37 #include "backref.h" 38 #include "rcu-string.h" 39 #include "send.h" 40 #include "dev-replace.h" 41 #include "props.h" 42 #include "sysfs.h" 43 #include "qgroup.h" 44 #include "tree-log.h" 45 #include "compression.h" 46 47 #ifdef CONFIG_64BIT 48 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 49 * structures are incorrect, as the timespec structure from userspace 50 * is 4 bytes too small. We define these alternatives here to teach 51 * the kernel about the 32-bit struct packing. 52 */ 53 struct btrfs_ioctl_timespec_32 { 54 __u64 sec; 55 __u32 nsec; 56 } __attribute__ ((__packed__)); 57 58 struct btrfs_ioctl_received_subvol_args_32 { 59 char uuid[BTRFS_UUID_SIZE]; /* in */ 60 __u64 stransid; /* in */ 61 __u64 rtransid; /* out */ 62 struct btrfs_ioctl_timespec_32 stime; /* in */ 63 struct btrfs_ioctl_timespec_32 rtime; /* out */ 64 __u64 flags; /* in */ 65 __u64 reserved[16]; /* in */ 66 } __attribute__ ((__packed__)); 67 68 #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ 69 struct btrfs_ioctl_received_subvol_args_32) 70 #endif 71 72 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 73 struct btrfs_ioctl_send_args_32 { 74 __s64 send_fd; /* in */ 75 __u64 clone_sources_count; /* in */ 76 compat_uptr_t clone_sources; /* in */ 77 __u64 parent_root; /* in */ 78 __u64 flags; /* in */ 79 __u64 reserved[4]; /* in */ 80 } __attribute__ ((__packed__)); 81 82 #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ 83 struct btrfs_ioctl_send_args_32) 84 #endif 85 86 static int btrfs_clone(struct inode *src, struct inode *inode, 87 u64 off, u64 olen, u64 olen_aligned, u64 destoff, 88 int no_time_update); 89 90 /* Mask out flags that are inappropriate for the given type of inode. */ 91 static unsigned int btrfs_mask_fsflags_for_type(struct inode *inode, 92 unsigned int flags) 93 { 94 if (S_ISDIR(inode->i_mode)) 95 return flags; 96 else if (S_ISREG(inode->i_mode)) 97 return flags & ~FS_DIRSYNC_FL; 98 else 99 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 100 } 101 102 /* 103 * Export internal inode flags to the format expected by the FS_IOC_GETFLAGS 104 * ioctl. 105 */ 106 static unsigned int btrfs_inode_flags_to_fsflags(unsigned int flags) 107 { 108 unsigned int iflags = 0; 109 110 if (flags & BTRFS_INODE_SYNC) 111 iflags |= FS_SYNC_FL; 112 if (flags & BTRFS_INODE_IMMUTABLE) 113 iflags |= FS_IMMUTABLE_FL; 114 if (flags & BTRFS_INODE_APPEND) 115 iflags |= FS_APPEND_FL; 116 if (flags & BTRFS_INODE_NODUMP) 117 iflags |= FS_NODUMP_FL; 118 if (flags & BTRFS_INODE_NOATIME) 119 iflags |= FS_NOATIME_FL; 120 if (flags & BTRFS_INODE_DIRSYNC) 121 iflags |= FS_DIRSYNC_FL; 122 if (flags & BTRFS_INODE_NODATACOW) 123 iflags |= FS_NOCOW_FL; 124 125 if (flags & BTRFS_INODE_NOCOMPRESS) 126 iflags |= FS_NOCOMP_FL; 127 else if (flags & BTRFS_INODE_COMPRESS) 128 iflags |= FS_COMPR_FL; 129 130 return iflags; 131 } 132 133 /* 134 * Update inode->i_flags based on the btrfs internal flags. 135 */ 136 void btrfs_sync_inode_flags_to_i_flags(struct inode *inode) 137 { 138 struct btrfs_inode *binode = BTRFS_I(inode); 139 unsigned int new_fl = 0; 140 141 if (binode->flags & BTRFS_INODE_SYNC) 142 new_fl |= S_SYNC; 143 if (binode->flags & BTRFS_INODE_IMMUTABLE) 144 new_fl |= S_IMMUTABLE; 145 if (binode->flags & BTRFS_INODE_APPEND) 146 new_fl |= S_APPEND; 147 if (binode->flags & BTRFS_INODE_NOATIME) 148 new_fl |= S_NOATIME; 149 if (binode->flags & BTRFS_INODE_DIRSYNC) 150 new_fl |= S_DIRSYNC; 151 152 set_mask_bits(&inode->i_flags, 153 S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC, 154 new_fl); 155 } 156 157 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 158 { 159 struct btrfs_inode *binode = BTRFS_I(file_inode(file)); 160 unsigned int flags = btrfs_inode_flags_to_fsflags(binode->flags); 161 162 if (copy_to_user(arg, &flags, sizeof(flags))) 163 return -EFAULT; 164 return 0; 165 } 166 167 /* Check if @flags are a supported and valid set of FS_*_FL flags */ 168 static int check_fsflags(unsigned int flags) 169 { 170 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 171 FS_NOATIME_FL | FS_NODUMP_FL | \ 172 FS_SYNC_FL | FS_DIRSYNC_FL | \ 173 FS_NOCOMP_FL | FS_COMPR_FL | 174 FS_NOCOW_FL)) 175 return -EOPNOTSUPP; 176 177 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 178 return -EINVAL; 179 180 return 0; 181 } 182 183 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 184 { 185 struct inode *inode = file_inode(file); 186 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 187 struct btrfs_inode *binode = BTRFS_I(inode); 188 struct btrfs_root *root = binode->root; 189 struct btrfs_trans_handle *trans; 190 unsigned int fsflags, old_fsflags; 191 int ret; 192 u64 old_flags; 193 unsigned int old_i_flags; 194 umode_t mode; 195 196 if (!inode_owner_or_capable(inode)) 197 return -EPERM; 198 199 if (btrfs_root_readonly(root)) 200 return -EROFS; 201 202 if (copy_from_user(&fsflags, arg, sizeof(fsflags))) 203 return -EFAULT; 204 205 ret = check_fsflags(fsflags); 206 if (ret) 207 return ret; 208 209 ret = mnt_want_write_file(file); 210 if (ret) 211 return ret; 212 213 inode_lock(inode); 214 215 old_flags = binode->flags; 216 old_i_flags = inode->i_flags; 217 mode = inode->i_mode; 218 219 fsflags = btrfs_mask_fsflags_for_type(inode, fsflags); 220 old_fsflags = btrfs_inode_flags_to_fsflags(binode->flags); 221 if ((fsflags ^ old_fsflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 222 if (!capable(CAP_LINUX_IMMUTABLE)) { 223 ret = -EPERM; 224 goto out_unlock; 225 } 226 } 227 228 if (fsflags & FS_SYNC_FL) 229 binode->flags |= BTRFS_INODE_SYNC; 230 else 231 binode->flags &= ~BTRFS_INODE_SYNC; 232 if (fsflags & FS_IMMUTABLE_FL) 233 binode->flags |= BTRFS_INODE_IMMUTABLE; 234 else 235 binode->flags &= ~BTRFS_INODE_IMMUTABLE; 236 if (fsflags & FS_APPEND_FL) 237 binode->flags |= BTRFS_INODE_APPEND; 238 else 239 binode->flags &= ~BTRFS_INODE_APPEND; 240 if (fsflags & FS_NODUMP_FL) 241 binode->flags |= BTRFS_INODE_NODUMP; 242 else 243 binode->flags &= ~BTRFS_INODE_NODUMP; 244 if (fsflags & FS_NOATIME_FL) 245 binode->flags |= BTRFS_INODE_NOATIME; 246 else 247 binode->flags &= ~BTRFS_INODE_NOATIME; 248 if (fsflags & FS_DIRSYNC_FL) 249 binode->flags |= BTRFS_INODE_DIRSYNC; 250 else 251 binode->flags &= ~BTRFS_INODE_DIRSYNC; 252 if (fsflags & FS_NOCOW_FL) { 253 if (S_ISREG(mode)) { 254 /* 255 * It's safe to turn csums off here, no extents exist. 256 * Otherwise we want the flag to reflect the real COW 257 * status of the file and will not set it. 258 */ 259 if (inode->i_size == 0) 260 binode->flags |= BTRFS_INODE_NODATACOW 261 | BTRFS_INODE_NODATASUM; 262 } else { 263 binode->flags |= BTRFS_INODE_NODATACOW; 264 } 265 } else { 266 /* 267 * Revert back under same assumptions as above 268 */ 269 if (S_ISREG(mode)) { 270 if (inode->i_size == 0) 271 binode->flags &= ~(BTRFS_INODE_NODATACOW 272 | BTRFS_INODE_NODATASUM); 273 } else { 274 binode->flags &= ~BTRFS_INODE_NODATACOW; 275 } 276 } 277 278 /* 279 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 280 * flag may be changed automatically if compression code won't make 281 * things smaller. 282 */ 283 if (fsflags & FS_NOCOMP_FL) { 284 binode->flags &= ~BTRFS_INODE_COMPRESS; 285 binode->flags |= BTRFS_INODE_NOCOMPRESS; 286 287 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 288 if (ret && ret != -ENODATA) 289 goto out_drop; 290 } else if (fsflags & FS_COMPR_FL) { 291 const char *comp; 292 293 if (IS_SWAPFILE(inode)) { 294 ret = -ETXTBSY; 295 goto out_unlock; 296 } 297 298 binode->flags |= BTRFS_INODE_COMPRESS; 299 binode->flags &= ~BTRFS_INODE_NOCOMPRESS; 300 301 comp = btrfs_compress_type2str(fs_info->compress_type); 302 if (!comp || comp[0] == 0) 303 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); 304 305 ret = btrfs_set_prop(inode, "btrfs.compression", 306 comp, strlen(comp), 0); 307 if (ret) 308 goto out_drop; 309 310 } else { 311 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 312 if (ret && ret != -ENODATA) 313 goto out_drop; 314 binode->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 315 } 316 317 trans = btrfs_start_transaction(root, 1); 318 if (IS_ERR(trans)) { 319 ret = PTR_ERR(trans); 320 goto out_drop; 321 } 322 323 btrfs_sync_inode_flags_to_i_flags(inode); 324 inode_inc_iversion(inode); 325 inode->i_ctime = current_time(inode); 326 ret = btrfs_update_inode(trans, root, inode); 327 328 btrfs_end_transaction(trans); 329 out_drop: 330 if (ret) { 331 binode->flags = old_flags; 332 inode->i_flags = old_i_flags; 333 } 334 335 out_unlock: 336 inode_unlock(inode); 337 mnt_drop_write_file(file); 338 return ret; 339 } 340 341 /* 342 * Translate btrfs internal inode flags to xflags as expected by the 343 * FS_IOC_FSGETXATT ioctl. Filter only the supported ones, unknown flags are 344 * silently dropped. 345 */ 346 static unsigned int btrfs_inode_flags_to_xflags(unsigned int flags) 347 { 348 unsigned int xflags = 0; 349 350 if (flags & BTRFS_INODE_APPEND) 351 xflags |= FS_XFLAG_APPEND; 352 if (flags & BTRFS_INODE_IMMUTABLE) 353 xflags |= FS_XFLAG_IMMUTABLE; 354 if (flags & BTRFS_INODE_NOATIME) 355 xflags |= FS_XFLAG_NOATIME; 356 if (flags & BTRFS_INODE_NODUMP) 357 xflags |= FS_XFLAG_NODUMP; 358 if (flags & BTRFS_INODE_SYNC) 359 xflags |= FS_XFLAG_SYNC; 360 361 return xflags; 362 } 363 364 /* Check if @flags are a supported and valid set of FS_XFLAGS_* flags */ 365 static int check_xflags(unsigned int flags) 366 { 367 if (flags & ~(FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE | FS_XFLAG_NOATIME | 368 FS_XFLAG_NODUMP | FS_XFLAG_SYNC)) 369 return -EOPNOTSUPP; 370 return 0; 371 } 372 373 /* 374 * Set the xflags from the internal inode flags. The remaining items of fsxattr 375 * are zeroed. 376 */ 377 static int btrfs_ioctl_fsgetxattr(struct file *file, void __user *arg) 378 { 379 struct btrfs_inode *binode = BTRFS_I(file_inode(file)); 380 struct fsxattr fa; 381 382 memset(&fa, 0, sizeof(fa)); 383 fa.fsx_xflags = btrfs_inode_flags_to_xflags(binode->flags); 384 385 if (copy_to_user(arg, &fa, sizeof(fa))) 386 return -EFAULT; 387 388 return 0; 389 } 390 391 static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg) 392 { 393 struct inode *inode = file_inode(file); 394 struct btrfs_inode *binode = BTRFS_I(inode); 395 struct btrfs_root *root = binode->root; 396 struct btrfs_trans_handle *trans; 397 struct fsxattr fa; 398 unsigned old_flags; 399 unsigned old_i_flags; 400 int ret = 0; 401 402 if (!inode_owner_or_capable(inode)) 403 return -EPERM; 404 405 if (btrfs_root_readonly(root)) 406 return -EROFS; 407 408 memset(&fa, 0, sizeof(fa)); 409 if (copy_from_user(&fa, arg, sizeof(fa))) 410 return -EFAULT; 411 412 ret = check_xflags(fa.fsx_xflags); 413 if (ret) 414 return ret; 415 416 if (fa.fsx_extsize != 0 || fa.fsx_projid != 0 || fa.fsx_cowextsize != 0) 417 return -EOPNOTSUPP; 418 419 ret = mnt_want_write_file(file); 420 if (ret) 421 return ret; 422 423 inode_lock(inode); 424 425 old_flags = binode->flags; 426 old_i_flags = inode->i_flags; 427 428 /* We need the capabilities to change append-only or immutable inode */ 429 if (((old_flags & (BTRFS_INODE_APPEND | BTRFS_INODE_IMMUTABLE)) || 430 (fa.fsx_xflags & (FS_XFLAG_APPEND | FS_XFLAG_IMMUTABLE))) && 431 !capable(CAP_LINUX_IMMUTABLE)) { 432 ret = -EPERM; 433 goto out_unlock; 434 } 435 436 if (fa.fsx_xflags & FS_XFLAG_SYNC) 437 binode->flags |= BTRFS_INODE_SYNC; 438 else 439 binode->flags &= ~BTRFS_INODE_SYNC; 440 if (fa.fsx_xflags & FS_XFLAG_IMMUTABLE) 441 binode->flags |= BTRFS_INODE_IMMUTABLE; 442 else 443 binode->flags &= ~BTRFS_INODE_IMMUTABLE; 444 if (fa.fsx_xflags & FS_XFLAG_APPEND) 445 binode->flags |= BTRFS_INODE_APPEND; 446 else 447 binode->flags &= ~BTRFS_INODE_APPEND; 448 if (fa.fsx_xflags & FS_XFLAG_NODUMP) 449 binode->flags |= BTRFS_INODE_NODUMP; 450 else 451 binode->flags &= ~BTRFS_INODE_NODUMP; 452 if (fa.fsx_xflags & FS_XFLAG_NOATIME) 453 binode->flags |= BTRFS_INODE_NOATIME; 454 else 455 binode->flags &= ~BTRFS_INODE_NOATIME; 456 457 /* 1 item for the inode */ 458 trans = btrfs_start_transaction(root, 1); 459 if (IS_ERR(trans)) { 460 ret = PTR_ERR(trans); 461 goto out_unlock; 462 } 463 464 btrfs_sync_inode_flags_to_i_flags(inode); 465 inode_inc_iversion(inode); 466 inode->i_ctime = current_time(inode); 467 ret = btrfs_update_inode(trans, root, inode); 468 469 btrfs_end_transaction(trans); 470 471 out_unlock: 472 if (ret) { 473 binode->flags = old_flags; 474 inode->i_flags = old_i_flags; 475 } 476 477 inode_unlock(inode); 478 mnt_drop_write_file(file); 479 480 return ret; 481 } 482 483 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 484 { 485 struct inode *inode = file_inode(file); 486 487 return put_user(inode->i_generation, arg); 488 } 489 490 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 491 { 492 struct inode *inode = file_inode(file); 493 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 494 struct btrfs_device *device; 495 struct request_queue *q; 496 struct fstrim_range range; 497 u64 minlen = ULLONG_MAX; 498 u64 num_devices = 0; 499 int ret; 500 501 if (!capable(CAP_SYS_ADMIN)) 502 return -EPERM; 503 504 /* 505 * If the fs is mounted with nologreplay, which requires it to be 506 * mounted in RO mode as well, we can not allow discard on free space 507 * inside block groups, because log trees refer to extents that are not 508 * pinned in a block group's free space cache (pinning the extents is 509 * precisely the first phase of replaying a log tree). 510 */ 511 if (btrfs_test_opt(fs_info, NOLOGREPLAY)) 512 return -EROFS; 513 514 rcu_read_lock(); 515 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 516 dev_list) { 517 if (!device->bdev) 518 continue; 519 q = bdev_get_queue(device->bdev); 520 if (blk_queue_discard(q)) { 521 num_devices++; 522 minlen = min_t(u64, q->limits.discard_granularity, 523 minlen); 524 } 525 } 526 rcu_read_unlock(); 527 528 if (!num_devices) 529 return -EOPNOTSUPP; 530 if (copy_from_user(&range, arg, sizeof(range))) 531 return -EFAULT; 532 533 /* 534 * NOTE: Don't truncate the range using super->total_bytes. Bytenr of 535 * block group is in the logical address space, which can be any 536 * sectorsize aligned bytenr in the range [0, U64_MAX]. 537 */ 538 if (range.len < fs_info->sb->s_blocksize) 539 return -EINVAL; 540 541 range.minlen = max(range.minlen, minlen); 542 ret = btrfs_trim_fs(fs_info, &range); 543 if (ret < 0) 544 return ret; 545 546 if (copy_to_user(arg, &range, sizeof(range))) 547 return -EFAULT; 548 549 return 0; 550 } 551 552 int btrfs_is_empty_uuid(u8 *uuid) 553 { 554 int i; 555 556 for (i = 0; i < BTRFS_UUID_SIZE; i++) { 557 if (uuid[i]) 558 return 0; 559 } 560 return 1; 561 } 562 563 static noinline int create_subvol(struct inode *dir, 564 struct dentry *dentry, 565 const char *name, int namelen, 566 u64 *async_transid, 567 struct btrfs_qgroup_inherit *inherit) 568 { 569 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 570 struct btrfs_trans_handle *trans; 571 struct btrfs_key key; 572 struct btrfs_root_item *root_item; 573 struct btrfs_inode_item *inode_item; 574 struct extent_buffer *leaf; 575 struct btrfs_root *root = BTRFS_I(dir)->root; 576 struct btrfs_root *new_root; 577 struct btrfs_block_rsv block_rsv; 578 struct timespec64 cur_time = current_time(dir); 579 struct inode *inode; 580 int ret; 581 int err; 582 u64 objectid; 583 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 584 u64 index = 0; 585 uuid_le new_uuid; 586 587 root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); 588 if (!root_item) 589 return -ENOMEM; 590 591 ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid); 592 if (ret) 593 goto fail_free; 594 595 /* 596 * Don't create subvolume whose level is not zero. Or qgroup will be 597 * screwed up since it assumes subvolume qgroup's level to be 0. 598 */ 599 if (btrfs_qgroup_level(objectid)) { 600 ret = -ENOSPC; 601 goto fail_free; 602 } 603 604 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 605 /* 606 * The same as the snapshot creation, please see the comment 607 * of create_snapshot(). 608 */ 609 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 8, false); 610 if (ret) 611 goto fail_free; 612 613 trans = btrfs_start_transaction(root, 0); 614 if (IS_ERR(trans)) { 615 ret = PTR_ERR(trans); 616 btrfs_subvolume_release_metadata(fs_info, &block_rsv); 617 goto fail_free; 618 } 619 trans->block_rsv = &block_rsv; 620 trans->bytes_reserved = block_rsv.size; 621 622 ret = btrfs_qgroup_inherit(trans, 0, objectid, inherit); 623 if (ret) 624 goto fail; 625 626 leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); 627 if (IS_ERR(leaf)) { 628 ret = PTR_ERR(leaf); 629 goto fail; 630 } 631 632 btrfs_mark_buffer_dirty(leaf); 633 634 inode_item = &root_item->inode; 635 btrfs_set_stack_inode_generation(inode_item, 1); 636 btrfs_set_stack_inode_size(inode_item, 3); 637 btrfs_set_stack_inode_nlink(inode_item, 1); 638 btrfs_set_stack_inode_nbytes(inode_item, 639 fs_info->nodesize); 640 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); 641 642 btrfs_set_root_flags(root_item, 0); 643 btrfs_set_root_limit(root_item, 0); 644 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); 645 646 btrfs_set_root_bytenr(root_item, leaf->start); 647 btrfs_set_root_generation(root_item, trans->transid); 648 btrfs_set_root_level(root_item, 0); 649 btrfs_set_root_refs(root_item, 1); 650 btrfs_set_root_used(root_item, leaf->len); 651 btrfs_set_root_last_snapshot(root_item, 0); 652 653 btrfs_set_root_generation_v2(root_item, 654 btrfs_root_generation(root_item)); 655 uuid_le_gen(&new_uuid); 656 memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); 657 btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec); 658 btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec); 659 root_item->ctime = root_item->otime; 660 btrfs_set_root_ctransid(root_item, trans->transid); 661 btrfs_set_root_otransid(root_item, trans->transid); 662 663 btrfs_tree_unlock(leaf); 664 free_extent_buffer(leaf); 665 leaf = NULL; 666 667 btrfs_set_root_dirid(root_item, new_dirid); 668 669 key.objectid = objectid; 670 key.offset = 0; 671 key.type = BTRFS_ROOT_ITEM_KEY; 672 ret = btrfs_insert_root(trans, fs_info->tree_root, &key, 673 root_item); 674 if (ret) 675 goto fail; 676 677 key.offset = (u64)-1; 678 new_root = btrfs_read_fs_root_no_name(fs_info, &key); 679 if (IS_ERR(new_root)) { 680 ret = PTR_ERR(new_root); 681 btrfs_abort_transaction(trans, ret); 682 goto fail; 683 } 684 685 btrfs_record_root_in_trans(trans, new_root); 686 687 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); 688 if (ret) { 689 /* We potentially lose an unused inode item here */ 690 btrfs_abort_transaction(trans, ret); 691 goto fail; 692 } 693 694 mutex_lock(&new_root->objectid_mutex); 695 new_root->highest_objectid = new_dirid; 696 mutex_unlock(&new_root->objectid_mutex); 697 698 /* 699 * insert the directory item 700 */ 701 ret = btrfs_set_inode_index(BTRFS_I(dir), &index); 702 if (ret) { 703 btrfs_abort_transaction(trans, ret); 704 goto fail; 705 } 706 707 ret = btrfs_insert_dir_item(trans, name, namelen, BTRFS_I(dir), &key, 708 BTRFS_FT_DIR, index); 709 if (ret) { 710 btrfs_abort_transaction(trans, ret); 711 goto fail; 712 } 713 714 btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); 715 ret = btrfs_update_inode(trans, root, dir); 716 BUG_ON(ret); 717 718 ret = btrfs_add_root_ref(trans, objectid, root->root_key.objectid, 719 btrfs_ino(BTRFS_I(dir)), index, name, namelen); 720 BUG_ON(ret); 721 722 ret = btrfs_uuid_tree_add(trans, root_item->uuid, 723 BTRFS_UUID_KEY_SUBVOL, objectid); 724 if (ret) 725 btrfs_abort_transaction(trans, ret); 726 727 fail: 728 kfree(root_item); 729 trans->block_rsv = NULL; 730 trans->bytes_reserved = 0; 731 btrfs_subvolume_release_metadata(fs_info, &block_rsv); 732 733 if (async_transid) { 734 *async_transid = trans->transid; 735 err = btrfs_commit_transaction_async(trans, 1); 736 if (err) 737 err = btrfs_commit_transaction(trans); 738 } else { 739 err = btrfs_commit_transaction(trans); 740 } 741 if (err && !ret) 742 ret = err; 743 744 if (!ret) { 745 inode = btrfs_lookup_dentry(dir, dentry); 746 if (IS_ERR(inode)) 747 return PTR_ERR(inode); 748 d_instantiate(dentry, inode); 749 } 750 return ret; 751 752 fail_free: 753 kfree(root_item); 754 return ret; 755 } 756 757 static int create_snapshot(struct btrfs_root *root, struct inode *dir, 758 struct dentry *dentry, 759 u64 *async_transid, bool readonly, 760 struct btrfs_qgroup_inherit *inherit) 761 { 762 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 763 struct inode *inode; 764 struct btrfs_pending_snapshot *pending_snapshot; 765 struct btrfs_trans_handle *trans; 766 int ret; 767 bool snapshot_force_cow = false; 768 769 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 770 return -EINVAL; 771 772 if (atomic_read(&root->nr_swapfiles)) { 773 btrfs_warn(fs_info, 774 "cannot snapshot subvolume with active swapfile"); 775 return -ETXTBSY; 776 } 777 778 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL); 779 if (!pending_snapshot) 780 return -ENOMEM; 781 782 pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), 783 GFP_KERNEL); 784 pending_snapshot->path = btrfs_alloc_path(); 785 if (!pending_snapshot->root_item || !pending_snapshot->path) { 786 ret = -ENOMEM; 787 goto free_pending; 788 } 789 790 /* 791 * Force new buffered writes to reserve space even when NOCOW is 792 * possible. This is to avoid later writeback (running dealloc) to 793 * fallback to COW mode and unexpectedly fail with ENOSPC. 794 */ 795 atomic_inc(&root->will_be_snapshotted); 796 smp_mb__after_atomic(); 797 /* wait for no snapshot writes */ 798 wait_event(root->subv_writers->wait, 799 percpu_counter_sum(&root->subv_writers->counter) == 0); 800 801 ret = btrfs_start_delalloc_snapshot(root); 802 if (ret) 803 goto dec_and_free; 804 805 /* 806 * All previous writes have started writeback in NOCOW mode, so now 807 * we force future writes to fallback to COW mode during snapshot 808 * creation. 809 */ 810 atomic_inc(&root->snapshot_force_cow); 811 snapshot_force_cow = true; 812 813 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 814 815 btrfs_init_block_rsv(&pending_snapshot->block_rsv, 816 BTRFS_BLOCK_RSV_TEMP); 817 /* 818 * 1 - parent dir inode 819 * 2 - dir entries 820 * 1 - root item 821 * 2 - root ref/backref 822 * 1 - root of snapshot 823 * 1 - UUID item 824 */ 825 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 826 &pending_snapshot->block_rsv, 8, 827 false); 828 if (ret) 829 goto dec_and_free; 830 831 pending_snapshot->dentry = dentry; 832 pending_snapshot->root = root; 833 pending_snapshot->readonly = readonly; 834 pending_snapshot->dir = dir; 835 pending_snapshot->inherit = inherit; 836 837 trans = btrfs_start_transaction(root, 0); 838 if (IS_ERR(trans)) { 839 ret = PTR_ERR(trans); 840 goto fail; 841 } 842 843 spin_lock(&fs_info->trans_lock); 844 list_add(&pending_snapshot->list, 845 &trans->transaction->pending_snapshots); 846 spin_unlock(&fs_info->trans_lock); 847 if (async_transid) { 848 *async_transid = trans->transid; 849 ret = btrfs_commit_transaction_async(trans, 1); 850 if (ret) 851 ret = btrfs_commit_transaction(trans); 852 } else { 853 ret = btrfs_commit_transaction(trans); 854 } 855 if (ret) 856 goto fail; 857 858 ret = pending_snapshot->error; 859 if (ret) 860 goto fail; 861 862 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 863 if (ret) 864 goto fail; 865 866 inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry); 867 if (IS_ERR(inode)) { 868 ret = PTR_ERR(inode); 869 goto fail; 870 } 871 872 d_instantiate(dentry, inode); 873 ret = 0; 874 fail: 875 btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); 876 dec_and_free: 877 if (snapshot_force_cow) 878 atomic_dec(&root->snapshot_force_cow); 879 if (atomic_dec_and_test(&root->will_be_snapshotted)) 880 wake_up_var(&root->will_be_snapshotted); 881 free_pending: 882 kfree(pending_snapshot->root_item); 883 btrfs_free_path(pending_snapshot->path); 884 kfree(pending_snapshot); 885 886 return ret; 887 } 888 889 /* copy of may_delete in fs/namei.c() 890 * Check whether we can remove a link victim from directory dir, check 891 * whether the type of victim is right. 892 * 1. We can't do it if dir is read-only (done in permission()) 893 * 2. We should have write and exec permissions on dir 894 * 3. We can't remove anything from append-only dir 895 * 4. We can't do anything with immutable dir (done in permission()) 896 * 5. If the sticky bit on dir is set we should either 897 * a. be owner of dir, or 898 * b. be owner of victim, or 899 * c. have CAP_FOWNER capability 900 * 6. If the victim is append-only or immutable we can't do anything with 901 * links pointing to it. 902 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 903 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 904 * 9. We can't remove a root or mountpoint. 905 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 906 * nfs_async_unlink(). 907 */ 908 909 static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) 910 { 911 int error; 912 913 if (d_really_is_negative(victim)) 914 return -ENOENT; 915 916 BUG_ON(d_inode(victim->d_parent) != dir); 917 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 918 919 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 920 if (error) 921 return error; 922 if (IS_APPEND(dir)) 923 return -EPERM; 924 if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) || 925 IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim))) 926 return -EPERM; 927 if (isdir) { 928 if (!d_is_dir(victim)) 929 return -ENOTDIR; 930 if (IS_ROOT(victim)) 931 return -EBUSY; 932 } else if (d_is_dir(victim)) 933 return -EISDIR; 934 if (IS_DEADDIR(dir)) 935 return -ENOENT; 936 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 937 return -EBUSY; 938 return 0; 939 } 940 941 /* copy of may_create in fs/namei.c() */ 942 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 943 { 944 if (d_really_is_positive(child)) 945 return -EEXIST; 946 if (IS_DEADDIR(dir)) 947 return -ENOENT; 948 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 949 } 950 951 /* 952 * Create a new subvolume below @parent. This is largely modeled after 953 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 954 * inside this filesystem so it's quite a bit simpler. 955 */ 956 static noinline int btrfs_mksubvol(const struct path *parent, 957 const char *name, int namelen, 958 struct btrfs_root *snap_src, 959 u64 *async_transid, bool readonly, 960 struct btrfs_qgroup_inherit *inherit) 961 { 962 struct inode *dir = d_inode(parent->dentry); 963 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 964 struct dentry *dentry; 965 int error; 966 967 error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); 968 if (error == -EINTR) 969 return error; 970 971 dentry = lookup_one_len(name, parent->dentry, namelen); 972 error = PTR_ERR(dentry); 973 if (IS_ERR(dentry)) 974 goto out_unlock; 975 976 error = btrfs_may_create(dir, dentry); 977 if (error) 978 goto out_dput; 979 980 /* 981 * even if this name doesn't exist, we may get hash collisions. 982 * check for them now when we can safely fail 983 */ 984 error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, 985 dir->i_ino, name, 986 namelen); 987 if (error) 988 goto out_dput; 989 990 down_read(&fs_info->subvol_sem); 991 992 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 993 goto out_up_read; 994 995 if (snap_src) { 996 error = create_snapshot(snap_src, dir, dentry, 997 async_transid, readonly, inherit); 998 } else { 999 error = create_subvol(dir, dentry, name, namelen, 1000 async_transid, inherit); 1001 } 1002 if (!error) 1003 fsnotify_mkdir(dir, dentry); 1004 out_up_read: 1005 up_read(&fs_info->subvol_sem); 1006 out_dput: 1007 dput(dentry); 1008 out_unlock: 1009 inode_unlock(dir); 1010 return error; 1011 } 1012 1013 /* 1014 * When we're defragging a range, we don't want to kick it off again 1015 * if it is really just waiting for delalloc to send it down. 1016 * If we find a nice big extent or delalloc range for the bytes in the 1017 * file you want to defrag, we return 0 to let you know to skip this 1018 * part of the file 1019 */ 1020 static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) 1021 { 1022 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1023 struct extent_map *em = NULL; 1024 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 1025 u64 end; 1026 1027 read_lock(&em_tree->lock); 1028 em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE); 1029 read_unlock(&em_tree->lock); 1030 1031 if (em) { 1032 end = extent_map_end(em); 1033 free_extent_map(em); 1034 if (end - offset > thresh) 1035 return 0; 1036 } 1037 /* if we already have a nice delalloc here, just stop */ 1038 thresh /= 2; 1039 end = count_range_bits(io_tree, &offset, offset + thresh, 1040 thresh, EXTENT_DELALLOC, 1); 1041 if (end >= thresh) 1042 return 0; 1043 return 1; 1044 } 1045 1046 /* 1047 * helper function to walk through a file and find extents 1048 * newer than a specific transid, and smaller than thresh. 1049 * 1050 * This is used by the defragging code to find new and small 1051 * extents 1052 */ 1053 static int find_new_extents(struct btrfs_root *root, 1054 struct inode *inode, u64 newer_than, 1055 u64 *off, u32 thresh) 1056 { 1057 struct btrfs_path *path; 1058 struct btrfs_key min_key; 1059 struct extent_buffer *leaf; 1060 struct btrfs_file_extent_item *extent; 1061 int type; 1062 int ret; 1063 u64 ino = btrfs_ino(BTRFS_I(inode)); 1064 1065 path = btrfs_alloc_path(); 1066 if (!path) 1067 return -ENOMEM; 1068 1069 min_key.objectid = ino; 1070 min_key.type = BTRFS_EXTENT_DATA_KEY; 1071 min_key.offset = *off; 1072 1073 while (1) { 1074 ret = btrfs_search_forward(root, &min_key, path, newer_than); 1075 if (ret != 0) 1076 goto none; 1077 process_slot: 1078 if (min_key.objectid != ino) 1079 goto none; 1080 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 1081 goto none; 1082 1083 leaf = path->nodes[0]; 1084 extent = btrfs_item_ptr(leaf, path->slots[0], 1085 struct btrfs_file_extent_item); 1086 1087 type = btrfs_file_extent_type(leaf, extent); 1088 if (type == BTRFS_FILE_EXTENT_REG && 1089 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 1090 check_defrag_in_cache(inode, min_key.offset, thresh)) { 1091 *off = min_key.offset; 1092 btrfs_free_path(path); 1093 return 0; 1094 } 1095 1096 path->slots[0]++; 1097 if (path->slots[0] < btrfs_header_nritems(leaf)) { 1098 btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); 1099 goto process_slot; 1100 } 1101 1102 if (min_key.offset == (u64)-1) 1103 goto none; 1104 1105 min_key.offset++; 1106 btrfs_release_path(path); 1107 } 1108 none: 1109 btrfs_free_path(path); 1110 return -ENOENT; 1111 } 1112 1113 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 1114 { 1115 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 1116 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 1117 struct extent_map *em; 1118 u64 len = PAGE_SIZE; 1119 1120 /* 1121 * hopefully we have this extent in the tree already, try without 1122 * the full extent lock 1123 */ 1124 read_lock(&em_tree->lock); 1125 em = lookup_extent_mapping(em_tree, start, len); 1126 read_unlock(&em_tree->lock); 1127 1128 if (!em) { 1129 struct extent_state *cached = NULL; 1130 u64 end = start + len - 1; 1131 1132 /* get the big lock and read metadata off disk */ 1133 lock_extent_bits(io_tree, start, end, &cached); 1134 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); 1135 unlock_extent_cached(io_tree, start, end, &cached); 1136 1137 if (IS_ERR(em)) 1138 return NULL; 1139 } 1140 1141 return em; 1142 } 1143 1144 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 1145 { 1146 struct extent_map *next; 1147 bool ret = true; 1148 1149 /* this is the last extent */ 1150 if (em->start + em->len >= i_size_read(inode)) 1151 return false; 1152 1153 next = defrag_lookup_extent(inode, em->start + em->len); 1154 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) 1155 ret = false; 1156 else if ((em->block_start + em->block_len == next->block_start) && 1157 (em->block_len > SZ_128K && next->block_len > SZ_128K)) 1158 ret = false; 1159 1160 free_extent_map(next); 1161 return ret; 1162 } 1163 1164 static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, 1165 u64 *last_len, u64 *skip, u64 *defrag_end, 1166 int compress) 1167 { 1168 struct extent_map *em; 1169 int ret = 1; 1170 bool next_mergeable = true; 1171 bool prev_mergeable = true; 1172 1173 /* 1174 * make sure that once we start defragging an extent, we keep on 1175 * defragging it 1176 */ 1177 if (start < *defrag_end) 1178 return 1; 1179 1180 *skip = 0; 1181 1182 em = defrag_lookup_extent(inode, start); 1183 if (!em) 1184 return 0; 1185 1186 /* this will cover holes, and inline extents */ 1187 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 1188 ret = 0; 1189 goto out; 1190 } 1191 1192 if (!*defrag_end) 1193 prev_mergeable = false; 1194 1195 next_mergeable = defrag_check_next_extent(inode, em); 1196 /* 1197 * we hit a real extent, if it is big or the next extent is not a 1198 * real extent, don't bother defragging it 1199 */ 1200 if (!compress && (*last_len == 0 || *last_len >= thresh) && 1201 (em->len >= thresh || (!next_mergeable && !prev_mergeable))) 1202 ret = 0; 1203 out: 1204 /* 1205 * last_len ends up being a counter of how many bytes we've defragged. 1206 * every time we choose not to defrag an extent, we reset *last_len 1207 * so that the next tiny extent will force a defrag. 1208 * 1209 * The end result of this is that tiny extents before a single big 1210 * extent will force at least part of that big extent to be defragged. 1211 */ 1212 if (ret) { 1213 *defrag_end = extent_map_end(em); 1214 } else { 1215 *last_len = 0; 1216 *skip = extent_map_end(em); 1217 *defrag_end = 0; 1218 } 1219 1220 free_extent_map(em); 1221 return ret; 1222 } 1223 1224 /* 1225 * it doesn't do much good to defrag one or two pages 1226 * at a time. This pulls in a nice chunk of pages 1227 * to COW and defrag. 1228 * 1229 * It also makes sure the delalloc code has enough 1230 * dirty data to avoid making new small extents as part 1231 * of the defrag 1232 * 1233 * It's a good idea to start RA on this range 1234 * before calling this. 1235 */ 1236 static int cluster_pages_for_defrag(struct inode *inode, 1237 struct page **pages, 1238 unsigned long start_index, 1239 unsigned long num_pages) 1240 { 1241 unsigned long file_end; 1242 u64 isize = i_size_read(inode); 1243 u64 page_start; 1244 u64 page_end; 1245 u64 page_cnt; 1246 int ret; 1247 int i; 1248 int i_done; 1249 struct btrfs_ordered_extent *ordered; 1250 struct extent_state *cached_state = NULL; 1251 struct extent_io_tree *tree; 1252 struct extent_changeset *data_reserved = NULL; 1253 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1254 1255 file_end = (isize - 1) >> PAGE_SHIFT; 1256 if (!isize || start_index > file_end) 1257 return 0; 1258 1259 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 1260 1261 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 1262 start_index << PAGE_SHIFT, 1263 page_cnt << PAGE_SHIFT); 1264 if (ret) 1265 return ret; 1266 i_done = 0; 1267 tree = &BTRFS_I(inode)->io_tree; 1268 1269 /* step one, lock all the pages */ 1270 for (i = 0; i < page_cnt; i++) { 1271 struct page *page; 1272 again: 1273 page = find_or_create_page(inode->i_mapping, 1274 start_index + i, mask); 1275 if (!page) 1276 break; 1277 1278 page_start = page_offset(page); 1279 page_end = page_start + PAGE_SIZE - 1; 1280 while (1) { 1281 lock_extent_bits(tree, page_start, page_end, 1282 &cached_state); 1283 ordered = btrfs_lookup_ordered_extent(inode, 1284 page_start); 1285 unlock_extent_cached(tree, page_start, page_end, 1286 &cached_state); 1287 if (!ordered) 1288 break; 1289 1290 unlock_page(page); 1291 btrfs_start_ordered_extent(inode, ordered, 1); 1292 btrfs_put_ordered_extent(ordered); 1293 lock_page(page); 1294 /* 1295 * we unlocked the page above, so we need check if 1296 * it was released or not. 1297 */ 1298 if (page->mapping != inode->i_mapping) { 1299 unlock_page(page); 1300 put_page(page); 1301 goto again; 1302 } 1303 } 1304 1305 if (!PageUptodate(page)) { 1306 btrfs_readpage(NULL, page); 1307 lock_page(page); 1308 if (!PageUptodate(page)) { 1309 unlock_page(page); 1310 put_page(page); 1311 ret = -EIO; 1312 break; 1313 } 1314 } 1315 1316 if (page->mapping != inode->i_mapping) { 1317 unlock_page(page); 1318 put_page(page); 1319 goto again; 1320 } 1321 1322 pages[i] = page; 1323 i_done++; 1324 } 1325 if (!i_done || ret) 1326 goto out; 1327 1328 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 1329 goto out; 1330 1331 /* 1332 * so now we have a nice long stream of locked 1333 * and up to date pages, lets wait on them 1334 */ 1335 for (i = 0; i < i_done; i++) 1336 wait_on_page_writeback(pages[i]); 1337 1338 page_start = page_offset(pages[0]); 1339 page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE; 1340 1341 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1342 page_start, page_end - 1, &cached_state); 1343 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1344 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1345 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, 1346 &cached_state); 1347 1348 if (i_done != page_cnt) { 1349 spin_lock(&BTRFS_I(inode)->lock); 1350 btrfs_mod_outstanding_extents(BTRFS_I(inode), 1); 1351 spin_unlock(&BTRFS_I(inode)->lock); 1352 btrfs_delalloc_release_space(inode, data_reserved, 1353 start_index << PAGE_SHIFT, 1354 (page_cnt - i_done) << PAGE_SHIFT, true); 1355 } 1356 1357 1358 set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 1359 &cached_state); 1360 1361 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1362 page_start, page_end - 1, &cached_state); 1363 1364 for (i = 0; i < i_done; i++) { 1365 clear_page_dirty_for_io(pages[i]); 1366 ClearPageChecked(pages[i]); 1367 set_page_extent_mapped(pages[i]); 1368 set_page_dirty(pages[i]); 1369 unlock_page(pages[i]); 1370 put_page(pages[i]); 1371 } 1372 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, 1373 false); 1374 extent_changeset_free(data_reserved); 1375 return i_done; 1376 out: 1377 for (i = 0; i < i_done; i++) { 1378 unlock_page(pages[i]); 1379 put_page(pages[i]); 1380 } 1381 btrfs_delalloc_release_space(inode, data_reserved, 1382 start_index << PAGE_SHIFT, 1383 page_cnt << PAGE_SHIFT, true); 1384 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT, 1385 true); 1386 extent_changeset_free(data_reserved); 1387 return ret; 1388 1389 } 1390 1391 int btrfs_defrag_file(struct inode *inode, struct file *file, 1392 struct btrfs_ioctl_defrag_range_args *range, 1393 u64 newer_than, unsigned long max_to_defrag) 1394 { 1395 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1396 struct btrfs_root *root = BTRFS_I(inode)->root; 1397 struct file_ra_state *ra = NULL; 1398 unsigned long last_index; 1399 u64 isize = i_size_read(inode); 1400 u64 last_len = 0; 1401 u64 skip = 0; 1402 u64 defrag_end = 0; 1403 u64 newer_off = range->start; 1404 unsigned long i; 1405 unsigned long ra_index = 0; 1406 int ret; 1407 int defrag_count = 0; 1408 int compress_type = BTRFS_COMPRESS_ZLIB; 1409 u32 extent_thresh = range->extent_thresh; 1410 unsigned long max_cluster = SZ_256K >> PAGE_SHIFT; 1411 unsigned long cluster = max_cluster; 1412 u64 new_align = ~((u64)SZ_128K - 1); 1413 struct page **pages = NULL; 1414 bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; 1415 1416 if (isize == 0) 1417 return 0; 1418 1419 if (range->start >= isize) 1420 return -EINVAL; 1421 1422 if (do_compress) { 1423 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1424 return -EINVAL; 1425 if (range->compress_type) 1426 compress_type = range->compress_type; 1427 } 1428 1429 if (extent_thresh == 0) 1430 extent_thresh = SZ_256K; 1431 1432 /* 1433 * If we were not given a file, allocate a readahead context. As 1434 * readahead is just an optimization, defrag will work without it so 1435 * we don't error out. 1436 */ 1437 if (!file) { 1438 ra = kzalloc(sizeof(*ra), GFP_KERNEL); 1439 if (ra) 1440 file_ra_state_init(ra, inode->i_mapping); 1441 } else { 1442 ra = &file->f_ra; 1443 } 1444 1445 pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); 1446 if (!pages) { 1447 ret = -ENOMEM; 1448 goto out_ra; 1449 } 1450 1451 /* find the last page to defrag */ 1452 if (range->start + range->len > range->start) { 1453 last_index = min_t(u64, isize - 1, 1454 range->start + range->len - 1) >> PAGE_SHIFT; 1455 } else { 1456 last_index = (isize - 1) >> PAGE_SHIFT; 1457 } 1458 1459 if (newer_than) { 1460 ret = find_new_extents(root, inode, newer_than, 1461 &newer_off, SZ_64K); 1462 if (!ret) { 1463 range->start = newer_off; 1464 /* 1465 * we always align our defrag to help keep 1466 * the extents in the file evenly spaced 1467 */ 1468 i = (newer_off & new_align) >> PAGE_SHIFT; 1469 } else 1470 goto out_ra; 1471 } else { 1472 i = range->start >> PAGE_SHIFT; 1473 } 1474 if (!max_to_defrag) 1475 max_to_defrag = last_index - i + 1; 1476 1477 /* 1478 * make writeback starts from i, so the defrag range can be 1479 * written sequentially. 1480 */ 1481 if (i < inode->i_mapping->writeback_index) 1482 inode->i_mapping->writeback_index = i; 1483 1484 while (i <= last_index && defrag_count < max_to_defrag && 1485 (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) { 1486 /* 1487 * make sure we stop running if someone unmounts 1488 * the FS 1489 */ 1490 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 1491 break; 1492 1493 if (btrfs_defrag_cancelled(fs_info)) { 1494 btrfs_debug(fs_info, "defrag_file cancelled"); 1495 ret = -EAGAIN; 1496 break; 1497 } 1498 1499 if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT, 1500 extent_thresh, &last_len, &skip, 1501 &defrag_end, do_compress)){ 1502 unsigned long next; 1503 /* 1504 * the should_defrag function tells us how much to skip 1505 * bump our counter by the suggested amount 1506 */ 1507 next = DIV_ROUND_UP(skip, PAGE_SIZE); 1508 i = max(i + 1, next); 1509 continue; 1510 } 1511 1512 if (!newer_than) { 1513 cluster = (PAGE_ALIGN(defrag_end) >> 1514 PAGE_SHIFT) - i; 1515 cluster = min(cluster, max_cluster); 1516 } else { 1517 cluster = max_cluster; 1518 } 1519 1520 if (i + cluster > ra_index) { 1521 ra_index = max(i, ra_index); 1522 if (ra) 1523 page_cache_sync_readahead(inode->i_mapping, ra, 1524 file, ra_index, cluster); 1525 ra_index += cluster; 1526 } 1527 1528 inode_lock(inode); 1529 if (IS_SWAPFILE(inode)) { 1530 ret = -ETXTBSY; 1531 } else { 1532 if (do_compress) 1533 BTRFS_I(inode)->defrag_compress = compress_type; 1534 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1535 } 1536 if (ret < 0) { 1537 inode_unlock(inode); 1538 goto out_ra; 1539 } 1540 1541 defrag_count += ret; 1542 balance_dirty_pages_ratelimited(inode->i_mapping); 1543 inode_unlock(inode); 1544 1545 if (newer_than) { 1546 if (newer_off == (u64)-1) 1547 break; 1548 1549 if (ret > 0) 1550 i += ret; 1551 1552 newer_off = max(newer_off + 1, 1553 (u64)i << PAGE_SHIFT); 1554 1555 ret = find_new_extents(root, inode, newer_than, 1556 &newer_off, SZ_64K); 1557 if (!ret) { 1558 range->start = newer_off; 1559 i = (newer_off & new_align) >> PAGE_SHIFT; 1560 } else { 1561 break; 1562 } 1563 } else { 1564 if (ret > 0) { 1565 i += ret; 1566 last_len += ret << PAGE_SHIFT; 1567 } else { 1568 i++; 1569 last_len = 0; 1570 } 1571 } 1572 } 1573 1574 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { 1575 filemap_flush(inode->i_mapping); 1576 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 1577 &BTRFS_I(inode)->runtime_flags)) 1578 filemap_flush(inode->i_mapping); 1579 } 1580 1581 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1582 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); 1583 } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { 1584 btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); 1585 } 1586 1587 ret = defrag_count; 1588 1589 out_ra: 1590 if (do_compress) { 1591 inode_lock(inode); 1592 BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; 1593 inode_unlock(inode); 1594 } 1595 if (!file) 1596 kfree(ra); 1597 kfree(pages); 1598 return ret; 1599 } 1600 1601 static noinline int btrfs_ioctl_resize(struct file *file, 1602 void __user *arg) 1603 { 1604 struct inode *inode = file_inode(file); 1605 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1606 u64 new_size; 1607 u64 old_size; 1608 u64 devid = 1; 1609 struct btrfs_root *root = BTRFS_I(inode)->root; 1610 struct btrfs_ioctl_vol_args *vol_args; 1611 struct btrfs_trans_handle *trans; 1612 struct btrfs_device *device = NULL; 1613 char *sizestr; 1614 char *retptr; 1615 char *devstr = NULL; 1616 int ret = 0; 1617 int mod = 0; 1618 1619 if (!capable(CAP_SYS_ADMIN)) 1620 return -EPERM; 1621 1622 ret = mnt_want_write_file(file); 1623 if (ret) 1624 return ret; 1625 1626 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 1627 mnt_drop_write_file(file); 1628 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 1629 } 1630 1631 vol_args = memdup_user(arg, sizeof(*vol_args)); 1632 if (IS_ERR(vol_args)) { 1633 ret = PTR_ERR(vol_args); 1634 goto out; 1635 } 1636 1637 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1638 1639 sizestr = vol_args->name; 1640 devstr = strchr(sizestr, ':'); 1641 if (devstr) { 1642 sizestr = devstr + 1; 1643 *devstr = '\0'; 1644 devstr = vol_args->name; 1645 ret = kstrtoull(devstr, 10, &devid); 1646 if (ret) 1647 goto out_free; 1648 if (!devid) { 1649 ret = -EINVAL; 1650 goto out_free; 1651 } 1652 btrfs_info(fs_info, "resizing devid %llu", devid); 1653 } 1654 1655 device = btrfs_find_device(fs_info->fs_devices, devid, NULL, NULL, true); 1656 if (!device) { 1657 btrfs_info(fs_info, "resizer unable to find device %llu", 1658 devid); 1659 ret = -ENODEV; 1660 goto out_free; 1661 } 1662 1663 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { 1664 btrfs_info(fs_info, 1665 "resizer unable to apply on readonly device %llu", 1666 devid); 1667 ret = -EPERM; 1668 goto out_free; 1669 } 1670 1671 if (!strcmp(sizestr, "max")) 1672 new_size = device->bdev->bd_inode->i_size; 1673 else { 1674 if (sizestr[0] == '-') { 1675 mod = -1; 1676 sizestr++; 1677 } else if (sizestr[0] == '+') { 1678 mod = 1; 1679 sizestr++; 1680 } 1681 new_size = memparse(sizestr, &retptr); 1682 if (*retptr != '\0' || new_size == 0) { 1683 ret = -EINVAL; 1684 goto out_free; 1685 } 1686 } 1687 1688 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { 1689 ret = -EPERM; 1690 goto out_free; 1691 } 1692 1693 old_size = btrfs_device_get_total_bytes(device); 1694 1695 if (mod < 0) { 1696 if (new_size > old_size) { 1697 ret = -EINVAL; 1698 goto out_free; 1699 } 1700 new_size = old_size - new_size; 1701 } else if (mod > 0) { 1702 if (new_size > ULLONG_MAX - old_size) { 1703 ret = -ERANGE; 1704 goto out_free; 1705 } 1706 new_size = old_size + new_size; 1707 } 1708 1709 if (new_size < SZ_256M) { 1710 ret = -EINVAL; 1711 goto out_free; 1712 } 1713 if (new_size > device->bdev->bd_inode->i_size) { 1714 ret = -EFBIG; 1715 goto out_free; 1716 } 1717 1718 new_size = round_down(new_size, fs_info->sectorsize); 1719 1720 btrfs_info_in_rcu(fs_info, "new size for %s is %llu", 1721 rcu_str_deref(device->name), new_size); 1722 1723 if (new_size > old_size) { 1724 trans = btrfs_start_transaction(root, 0); 1725 if (IS_ERR(trans)) { 1726 ret = PTR_ERR(trans); 1727 goto out_free; 1728 } 1729 ret = btrfs_grow_device(trans, device, new_size); 1730 btrfs_commit_transaction(trans); 1731 } else if (new_size < old_size) { 1732 ret = btrfs_shrink_device(device, new_size); 1733 } /* equal, nothing need to do */ 1734 1735 out_free: 1736 kfree(vol_args); 1737 out: 1738 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 1739 mnt_drop_write_file(file); 1740 return ret; 1741 } 1742 1743 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1744 const char *name, unsigned long fd, int subvol, 1745 u64 *transid, bool readonly, 1746 struct btrfs_qgroup_inherit *inherit) 1747 { 1748 int namelen; 1749 int ret = 0; 1750 1751 if (!S_ISDIR(file_inode(file)->i_mode)) 1752 return -ENOTDIR; 1753 1754 ret = mnt_want_write_file(file); 1755 if (ret) 1756 goto out; 1757 1758 namelen = strlen(name); 1759 if (strchr(name, '/')) { 1760 ret = -EINVAL; 1761 goto out_drop_write; 1762 } 1763 1764 if (name[0] == '.' && 1765 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1766 ret = -EEXIST; 1767 goto out_drop_write; 1768 } 1769 1770 if (subvol) { 1771 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1772 NULL, transid, readonly, inherit); 1773 } else { 1774 struct fd src = fdget(fd); 1775 struct inode *src_inode; 1776 if (!src.file) { 1777 ret = -EINVAL; 1778 goto out_drop_write; 1779 } 1780 1781 src_inode = file_inode(src.file); 1782 if (src_inode->i_sb != file_inode(file)->i_sb) { 1783 btrfs_info(BTRFS_I(file_inode(file))->root->fs_info, 1784 "Snapshot src from another FS"); 1785 ret = -EXDEV; 1786 } else if (!inode_owner_or_capable(src_inode)) { 1787 /* 1788 * Subvolume creation is not restricted, but snapshots 1789 * are limited to own subvolumes only 1790 */ 1791 ret = -EPERM; 1792 } else { 1793 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1794 BTRFS_I(src_inode)->root, 1795 transid, readonly, inherit); 1796 } 1797 fdput(src); 1798 } 1799 out_drop_write: 1800 mnt_drop_write_file(file); 1801 out: 1802 return ret; 1803 } 1804 1805 static noinline int btrfs_ioctl_snap_create(struct file *file, 1806 void __user *arg, int subvol) 1807 { 1808 struct btrfs_ioctl_vol_args *vol_args; 1809 int ret; 1810 1811 if (!S_ISDIR(file_inode(file)->i_mode)) 1812 return -ENOTDIR; 1813 1814 vol_args = memdup_user(arg, sizeof(*vol_args)); 1815 if (IS_ERR(vol_args)) 1816 return PTR_ERR(vol_args); 1817 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1818 1819 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1820 vol_args->fd, subvol, 1821 NULL, false, NULL); 1822 1823 kfree(vol_args); 1824 return ret; 1825 } 1826 1827 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1828 void __user *arg, int subvol) 1829 { 1830 struct btrfs_ioctl_vol_args_v2 *vol_args; 1831 int ret; 1832 u64 transid = 0; 1833 u64 *ptr = NULL; 1834 bool readonly = false; 1835 struct btrfs_qgroup_inherit *inherit = NULL; 1836 1837 if (!S_ISDIR(file_inode(file)->i_mode)) 1838 return -ENOTDIR; 1839 1840 vol_args = memdup_user(arg, sizeof(*vol_args)); 1841 if (IS_ERR(vol_args)) 1842 return PTR_ERR(vol_args); 1843 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1844 1845 if (vol_args->flags & 1846 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1847 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1848 ret = -EOPNOTSUPP; 1849 goto free_args; 1850 } 1851 1852 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1853 ptr = &transid; 1854 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1855 readonly = true; 1856 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1857 if (vol_args->size > PAGE_SIZE) { 1858 ret = -EINVAL; 1859 goto free_args; 1860 } 1861 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1862 if (IS_ERR(inherit)) { 1863 ret = PTR_ERR(inherit); 1864 goto free_args; 1865 } 1866 } 1867 1868 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1869 vol_args->fd, subvol, ptr, 1870 readonly, inherit); 1871 if (ret) 1872 goto free_inherit; 1873 1874 if (ptr && copy_to_user(arg + 1875 offsetof(struct btrfs_ioctl_vol_args_v2, 1876 transid), 1877 ptr, sizeof(*ptr))) 1878 ret = -EFAULT; 1879 1880 free_inherit: 1881 kfree(inherit); 1882 free_args: 1883 kfree(vol_args); 1884 return ret; 1885 } 1886 1887 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1888 void __user *arg) 1889 { 1890 struct inode *inode = file_inode(file); 1891 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1892 struct btrfs_root *root = BTRFS_I(inode)->root; 1893 int ret = 0; 1894 u64 flags = 0; 1895 1896 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) 1897 return -EINVAL; 1898 1899 down_read(&fs_info->subvol_sem); 1900 if (btrfs_root_readonly(root)) 1901 flags |= BTRFS_SUBVOL_RDONLY; 1902 up_read(&fs_info->subvol_sem); 1903 1904 if (copy_to_user(arg, &flags, sizeof(flags))) 1905 ret = -EFAULT; 1906 1907 return ret; 1908 } 1909 1910 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1911 void __user *arg) 1912 { 1913 struct inode *inode = file_inode(file); 1914 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1915 struct btrfs_root *root = BTRFS_I(inode)->root; 1916 struct btrfs_trans_handle *trans; 1917 u64 root_flags; 1918 u64 flags; 1919 int ret = 0; 1920 1921 if (!inode_owner_or_capable(inode)) 1922 return -EPERM; 1923 1924 ret = mnt_want_write_file(file); 1925 if (ret) 1926 goto out; 1927 1928 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 1929 ret = -EINVAL; 1930 goto out_drop_write; 1931 } 1932 1933 if (copy_from_user(&flags, arg, sizeof(flags))) { 1934 ret = -EFAULT; 1935 goto out_drop_write; 1936 } 1937 1938 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { 1939 ret = -EINVAL; 1940 goto out_drop_write; 1941 } 1942 1943 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1944 ret = -EOPNOTSUPP; 1945 goto out_drop_write; 1946 } 1947 1948 down_write(&fs_info->subvol_sem); 1949 1950 /* nothing to do */ 1951 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1952 goto out_drop_sem; 1953 1954 root_flags = btrfs_root_flags(&root->root_item); 1955 if (flags & BTRFS_SUBVOL_RDONLY) { 1956 btrfs_set_root_flags(&root->root_item, 1957 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1958 } else { 1959 /* 1960 * Block RO -> RW transition if this subvolume is involved in 1961 * send 1962 */ 1963 spin_lock(&root->root_item_lock); 1964 if (root->send_in_progress == 0) { 1965 btrfs_set_root_flags(&root->root_item, 1966 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1967 spin_unlock(&root->root_item_lock); 1968 } else { 1969 spin_unlock(&root->root_item_lock); 1970 btrfs_warn(fs_info, 1971 "Attempt to set subvolume %llu read-write during send", 1972 root->root_key.objectid); 1973 ret = -EPERM; 1974 goto out_drop_sem; 1975 } 1976 } 1977 1978 trans = btrfs_start_transaction(root, 1); 1979 if (IS_ERR(trans)) { 1980 ret = PTR_ERR(trans); 1981 goto out_reset; 1982 } 1983 1984 ret = btrfs_update_root(trans, fs_info->tree_root, 1985 &root->root_key, &root->root_item); 1986 if (ret < 0) { 1987 btrfs_end_transaction(trans); 1988 goto out_reset; 1989 } 1990 1991 ret = btrfs_commit_transaction(trans); 1992 1993 out_reset: 1994 if (ret) 1995 btrfs_set_root_flags(&root->root_item, root_flags); 1996 out_drop_sem: 1997 up_write(&fs_info->subvol_sem); 1998 out_drop_write: 1999 mnt_drop_write_file(file); 2000 out: 2001 return ret; 2002 } 2003 2004 static noinline int key_in_sk(struct btrfs_key *key, 2005 struct btrfs_ioctl_search_key *sk) 2006 { 2007 struct btrfs_key test; 2008 int ret; 2009 2010 test.objectid = sk->min_objectid; 2011 test.type = sk->min_type; 2012 test.offset = sk->min_offset; 2013 2014 ret = btrfs_comp_cpu_keys(key, &test); 2015 if (ret < 0) 2016 return 0; 2017 2018 test.objectid = sk->max_objectid; 2019 test.type = sk->max_type; 2020 test.offset = sk->max_offset; 2021 2022 ret = btrfs_comp_cpu_keys(key, &test); 2023 if (ret > 0) 2024 return 0; 2025 return 1; 2026 } 2027 2028 static noinline int copy_to_sk(struct btrfs_path *path, 2029 struct btrfs_key *key, 2030 struct btrfs_ioctl_search_key *sk, 2031 size_t *buf_size, 2032 char __user *ubuf, 2033 unsigned long *sk_offset, 2034 int *num_found) 2035 { 2036 u64 found_transid; 2037 struct extent_buffer *leaf; 2038 struct btrfs_ioctl_search_header sh; 2039 struct btrfs_key test; 2040 unsigned long item_off; 2041 unsigned long item_len; 2042 int nritems; 2043 int i; 2044 int slot; 2045 int ret = 0; 2046 2047 leaf = path->nodes[0]; 2048 slot = path->slots[0]; 2049 nritems = btrfs_header_nritems(leaf); 2050 2051 if (btrfs_header_generation(leaf) > sk->max_transid) { 2052 i = nritems; 2053 goto advance_key; 2054 } 2055 found_transid = btrfs_header_generation(leaf); 2056 2057 for (i = slot; i < nritems; i++) { 2058 item_off = btrfs_item_ptr_offset(leaf, i); 2059 item_len = btrfs_item_size_nr(leaf, i); 2060 2061 btrfs_item_key_to_cpu(leaf, key, i); 2062 if (!key_in_sk(key, sk)) 2063 continue; 2064 2065 if (sizeof(sh) + item_len > *buf_size) { 2066 if (*num_found) { 2067 ret = 1; 2068 goto out; 2069 } 2070 2071 /* 2072 * return one empty item back for v1, which does not 2073 * handle -EOVERFLOW 2074 */ 2075 2076 *buf_size = sizeof(sh) + item_len; 2077 item_len = 0; 2078 ret = -EOVERFLOW; 2079 } 2080 2081 if (sizeof(sh) + item_len + *sk_offset > *buf_size) { 2082 ret = 1; 2083 goto out; 2084 } 2085 2086 sh.objectid = key->objectid; 2087 sh.offset = key->offset; 2088 sh.type = key->type; 2089 sh.len = item_len; 2090 sh.transid = found_transid; 2091 2092 /* copy search result header */ 2093 if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { 2094 ret = -EFAULT; 2095 goto out; 2096 } 2097 2098 *sk_offset += sizeof(sh); 2099 2100 if (item_len) { 2101 char __user *up = ubuf + *sk_offset; 2102 /* copy the item */ 2103 if (read_extent_buffer_to_user(leaf, up, 2104 item_off, item_len)) { 2105 ret = -EFAULT; 2106 goto out; 2107 } 2108 2109 *sk_offset += item_len; 2110 } 2111 (*num_found)++; 2112 2113 if (ret) /* -EOVERFLOW from above */ 2114 goto out; 2115 2116 if (*num_found >= sk->nr_items) { 2117 ret = 1; 2118 goto out; 2119 } 2120 } 2121 advance_key: 2122 ret = 0; 2123 test.objectid = sk->max_objectid; 2124 test.type = sk->max_type; 2125 test.offset = sk->max_offset; 2126 if (btrfs_comp_cpu_keys(key, &test) >= 0) 2127 ret = 1; 2128 else if (key->offset < (u64)-1) 2129 key->offset++; 2130 else if (key->type < (u8)-1) { 2131 key->offset = 0; 2132 key->type++; 2133 } else if (key->objectid < (u64)-1) { 2134 key->offset = 0; 2135 key->type = 0; 2136 key->objectid++; 2137 } else 2138 ret = 1; 2139 out: 2140 /* 2141 * 0: all items from this leaf copied, continue with next 2142 * 1: * more items can be copied, but unused buffer is too small 2143 * * all items were found 2144 * Either way, it will stops the loop which iterates to the next 2145 * leaf 2146 * -EOVERFLOW: item was to large for buffer 2147 * -EFAULT: could not copy extent buffer back to userspace 2148 */ 2149 return ret; 2150 } 2151 2152 static noinline int search_ioctl(struct inode *inode, 2153 struct btrfs_ioctl_search_key *sk, 2154 size_t *buf_size, 2155 char __user *ubuf) 2156 { 2157 struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); 2158 struct btrfs_root *root; 2159 struct btrfs_key key; 2160 struct btrfs_path *path; 2161 int ret; 2162 int num_found = 0; 2163 unsigned long sk_offset = 0; 2164 2165 if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { 2166 *buf_size = sizeof(struct btrfs_ioctl_search_header); 2167 return -EOVERFLOW; 2168 } 2169 2170 path = btrfs_alloc_path(); 2171 if (!path) 2172 return -ENOMEM; 2173 2174 if (sk->tree_id == 0) { 2175 /* search the root of the inode that was passed */ 2176 root = BTRFS_I(inode)->root; 2177 } else { 2178 key.objectid = sk->tree_id; 2179 key.type = BTRFS_ROOT_ITEM_KEY; 2180 key.offset = (u64)-1; 2181 root = btrfs_read_fs_root_no_name(info, &key); 2182 if (IS_ERR(root)) { 2183 btrfs_free_path(path); 2184 return PTR_ERR(root); 2185 } 2186 } 2187 2188 key.objectid = sk->min_objectid; 2189 key.type = sk->min_type; 2190 key.offset = sk->min_offset; 2191 2192 while (1) { 2193 ret = btrfs_search_forward(root, &key, path, sk->min_transid); 2194 if (ret != 0) { 2195 if (ret > 0) 2196 ret = 0; 2197 goto err; 2198 } 2199 ret = copy_to_sk(path, &key, sk, buf_size, ubuf, 2200 &sk_offset, &num_found); 2201 btrfs_release_path(path); 2202 if (ret) 2203 break; 2204 2205 } 2206 if (ret > 0) 2207 ret = 0; 2208 err: 2209 sk->nr_items = num_found; 2210 btrfs_free_path(path); 2211 return ret; 2212 } 2213 2214 static noinline int btrfs_ioctl_tree_search(struct file *file, 2215 void __user *argp) 2216 { 2217 struct btrfs_ioctl_search_args __user *uargs; 2218 struct btrfs_ioctl_search_key sk; 2219 struct inode *inode; 2220 int ret; 2221 size_t buf_size; 2222 2223 if (!capable(CAP_SYS_ADMIN)) 2224 return -EPERM; 2225 2226 uargs = (struct btrfs_ioctl_search_args __user *)argp; 2227 2228 if (copy_from_user(&sk, &uargs->key, sizeof(sk))) 2229 return -EFAULT; 2230 2231 buf_size = sizeof(uargs->buf); 2232 2233 inode = file_inode(file); 2234 ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); 2235 2236 /* 2237 * In the origin implementation an overflow is handled by returning a 2238 * search header with a len of zero, so reset ret. 2239 */ 2240 if (ret == -EOVERFLOW) 2241 ret = 0; 2242 2243 if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) 2244 ret = -EFAULT; 2245 return ret; 2246 } 2247 2248 static noinline int btrfs_ioctl_tree_search_v2(struct file *file, 2249 void __user *argp) 2250 { 2251 struct btrfs_ioctl_search_args_v2 __user *uarg; 2252 struct btrfs_ioctl_search_args_v2 args; 2253 struct inode *inode; 2254 int ret; 2255 size_t buf_size; 2256 const size_t buf_limit = SZ_16M; 2257 2258 if (!capable(CAP_SYS_ADMIN)) 2259 return -EPERM; 2260 2261 /* copy search header and buffer size */ 2262 uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; 2263 if (copy_from_user(&args, uarg, sizeof(args))) 2264 return -EFAULT; 2265 2266 buf_size = args.buf_size; 2267 2268 /* limit result size to 16MB */ 2269 if (buf_size > buf_limit) 2270 buf_size = buf_limit; 2271 2272 inode = file_inode(file); 2273 ret = search_ioctl(inode, &args.key, &buf_size, 2274 (char __user *)(&uarg->buf[0])); 2275 if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) 2276 ret = -EFAULT; 2277 else if (ret == -EOVERFLOW && 2278 copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) 2279 ret = -EFAULT; 2280 2281 return ret; 2282 } 2283 2284 /* 2285 * Search INODE_REFs to identify path name of 'dirid' directory 2286 * in a 'tree_id' tree. and sets path name to 'name'. 2287 */ 2288 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 2289 u64 tree_id, u64 dirid, char *name) 2290 { 2291 struct btrfs_root *root; 2292 struct btrfs_key key; 2293 char *ptr; 2294 int ret = -1; 2295 int slot; 2296 int len; 2297 int total_len = 0; 2298 struct btrfs_inode_ref *iref; 2299 struct extent_buffer *l; 2300 struct btrfs_path *path; 2301 2302 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 2303 name[0]='\0'; 2304 return 0; 2305 } 2306 2307 path = btrfs_alloc_path(); 2308 if (!path) 2309 return -ENOMEM; 2310 2311 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1]; 2312 2313 key.objectid = tree_id; 2314 key.type = BTRFS_ROOT_ITEM_KEY; 2315 key.offset = (u64)-1; 2316 root = btrfs_read_fs_root_no_name(info, &key); 2317 if (IS_ERR(root)) { 2318 ret = PTR_ERR(root); 2319 goto out; 2320 } 2321 2322 key.objectid = dirid; 2323 key.type = BTRFS_INODE_REF_KEY; 2324 key.offset = (u64)-1; 2325 2326 while (1) { 2327 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2328 if (ret < 0) 2329 goto out; 2330 else if (ret > 0) { 2331 ret = btrfs_previous_item(root, path, dirid, 2332 BTRFS_INODE_REF_KEY); 2333 if (ret < 0) 2334 goto out; 2335 else if (ret > 0) { 2336 ret = -ENOENT; 2337 goto out; 2338 } 2339 } 2340 2341 l = path->nodes[0]; 2342 slot = path->slots[0]; 2343 btrfs_item_key_to_cpu(l, &key, slot); 2344 2345 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2346 len = btrfs_inode_ref_name_len(l, iref); 2347 ptr -= len + 1; 2348 total_len += len + 1; 2349 if (ptr < name) { 2350 ret = -ENAMETOOLONG; 2351 goto out; 2352 } 2353 2354 *(ptr + len) = '/'; 2355 read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); 2356 2357 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2358 break; 2359 2360 btrfs_release_path(path); 2361 key.objectid = key.offset; 2362 key.offset = (u64)-1; 2363 dirid = key.objectid; 2364 } 2365 memmove(name, ptr, total_len); 2366 name[total_len] = '\0'; 2367 ret = 0; 2368 out: 2369 btrfs_free_path(path); 2370 return ret; 2371 } 2372 2373 static int btrfs_search_path_in_tree_user(struct inode *inode, 2374 struct btrfs_ioctl_ino_lookup_user_args *args) 2375 { 2376 struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info; 2377 struct super_block *sb = inode->i_sb; 2378 struct btrfs_key upper_limit = BTRFS_I(inode)->location; 2379 u64 treeid = BTRFS_I(inode)->root->root_key.objectid; 2380 u64 dirid = args->dirid; 2381 unsigned long item_off; 2382 unsigned long item_len; 2383 struct btrfs_inode_ref *iref; 2384 struct btrfs_root_ref *rref; 2385 struct btrfs_root *root; 2386 struct btrfs_path *path; 2387 struct btrfs_key key, key2; 2388 struct extent_buffer *leaf; 2389 struct inode *temp_inode; 2390 char *ptr; 2391 int slot; 2392 int len; 2393 int total_len = 0; 2394 int ret; 2395 2396 path = btrfs_alloc_path(); 2397 if (!path) 2398 return -ENOMEM; 2399 2400 /* 2401 * If the bottom subvolume does not exist directly under upper_limit, 2402 * construct the path in from the bottom up. 2403 */ 2404 if (dirid != upper_limit.objectid) { 2405 ptr = &args->path[BTRFS_INO_LOOKUP_USER_PATH_MAX - 1]; 2406 2407 key.objectid = treeid; 2408 key.type = BTRFS_ROOT_ITEM_KEY; 2409 key.offset = (u64)-1; 2410 root = btrfs_read_fs_root_no_name(fs_info, &key); 2411 if (IS_ERR(root)) { 2412 ret = PTR_ERR(root); 2413 goto out; 2414 } 2415 2416 key.objectid = dirid; 2417 key.type = BTRFS_INODE_REF_KEY; 2418 key.offset = (u64)-1; 2419 while (1) { 2420 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2421 if (ret < 0) { 2422 goto out; 2423 } else if (ret > 0) { 2424 ret = btrfs_previous_item(root, path, dirid, 2425 BTRFS_INODE_REF_KEY); 2426 if (ret < 0) { 2427 goto out; 2428 } else if (ret > 0) { 2429 ret = -ENOENT; 2430 goto out; 2431 } 2432 } 2433 2434 leaf = path->nodes[0]; 2435 slot = path->slots[0]; 2436 btrfs_item_key_to_cpu(leaf, &key, slot); 2437 2438 iref = btrfs_item_ptr(leaf, slot, struct btrfs_inode_ref); 2439 len = btrfs_inode_ref_name_len(leaf, iref); 2440 ptr -= len + 1; 2441 total_len += len + 1; 2442 if (ptr < args->path) { 2443 ret = -ENAMETOOLONG; 2444 goto out; 2445 } 2446 2447 *(ptr + len) = '/'; 2448 read_extent_buffer(leaf, ptr, 2449 (unsigned long)(iref + 1), len); 2450 2451 /* Check the read+exec permission of this directory */ 2452 ret = btrfs_previous_item(root, path, dirid, 2453 BTRFS_INODE_ITEM_KEY); 2454 if (ret < 0) { 2455 goto out; 2456 } else if (ret > 0) { 2457 ret = -ENOENT; 2458 goto out; 2459 } 2460 2461 leaf = path->nodes[0]; 2462 slot = path->slots[0]; 2463 btrfs_item_key_to_cpu(leaf, &key2, slot); 2464 if (key2.objectid != dirid) { 2465 ret = -ENOENT; 2466 goto out; 2467 } 2468 2469 temp_inode = btrfs_iget(sb, &key2, root, NULL); 2470 if (IS_ERR(temp_inode)) { 2471 ret = PTR_ERR(temp_inode); 2472 goto out; 2473 } 2474 ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC); 2475 iput(temp_inode); 2476 if (ret) { 2477 ret = -EACCES; 2478 goto out; 2479 } 2480 2481 if (key.offset == upper_limit.objectid) 2482 break; 2483 if (key.objectid == BTRFS_FIRST_FREE_OBJECTID) { 2484 ret = -EACCES; 2485 goto out; 2486 } 2487 2488 btrfs_release_path(path); 2489 key.objectid = key.offset; 2490 key.offset = (u64)-1; 2491 dirid = key.objectid; 2492 } 2493 2494 memmove(args->path, ptr, total_len); 2495 args->path[total_len] = '\0'; 2496 btrfs_release_path(path); 2497 } 2498 2499 /* Get the bottom subvolume's name from ROOT_REF */ 2500 root = fs_info->tree_root; 2501 key.objectid = treeid; 2502 key.type = BTRFS_ROOT_REF_KEY; 2503 key.offset = args->treeid; 2504 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2505 if (ret < 0) { 2506 goto out; 2507 } else if (ret > 0) { 2508 ret = -ENOENT; 2509 goto out; 2510 } 2511 2512 leaf = path->nodes[0]; 2513 slot = path->slots[0]; 2514 btrfs_item_key_to_cpu(leaf, &key, slot); 2515 2516 item_off = btrfs_item_ptr_offset(leaf, slot); 2517 item_len = btrfs_item_size_nr(leaf, slot); 2518 /* Check if dirid in ROOT_REF corresponds to passed dirid */ 2519 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 2520 if (args->dirid != btrfs_root_ref_dirid(leaf, rref)) { 2521 ret = -EINVAL; 2522 goto out; 2523 } 2524 2525 /* Copy subvolume's name */ 2526 item_off += sizeof(struct btrfs_root_ref); 2527 item_len -= sizeof(struct btrfs_root_ref); 2528 read_extent_buffer(leaf, args->name, item_off, item_len); 2529 args->name[item_len] = 0; 2530 2531 out: 2532 btrfs_free_path(path); 2533 return ret; 2534 } 2535 2536 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2537 void __user *argp) 2538 { 2539 struct btrfs_ioctl_ino_lookup_args *args; 2540 struct inode *inode; 2541 int ret = 0; 2542 2543 args = memdup_user(argp, sizeof(*args)); 2544 if (IS_ERR(args)) 2545 return PTR_ERR(args); 2546 2547 inode = file_inode(file); 2548 2549 /* 2550 * Unprivileged query to obtain the containing subvolume root id. The 2551 * path is reset so it's consistent with btrfs_search_path_in_tree. 2552 */ 2553 if (args->treeid == 0) 2554 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2555 2556 if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) { 2557 args->name[0] = 0; 2558 goto out; 2559 } 2560 2561 if (!capable(CAP_SYS_ADMIN)) { 2562 ret = -EPERM; 2563 goto out; 2564 } 2565 2566 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 2567 args->treeid, args->objectid, 2568 args->name); 2569 2570 out: 2571 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2572 ret = -EFAULT; 2573 2574 kfree(args); 2575 return ret; 2576 } 2577 2578 /* 2579 * Version of ino_lookup ioctl (unprivileged) 2580 * 2581 * The main differences from ino_lookup ioctl are: 2582 * 2583 * 1. Read + Exec permission will be checked using inode_permission() during 2584 * path construction. -EACCES will be returned in case of failure. 2585 * 2. Path construction will be stopped at the inode number which corresponds 2586 * to the fd with which this ioctl is called. If constructed path does not 2587 * exist under fd's inode, -EACCES will be returned. 2588 * 3. The name of bottom subvolume is also searched and filled. 2589 */ 2590 static int btrfs_ioctl_ino_lookup_user(struct file *file, void __user *argp) 2591 { 2592 struct btrfs_ioctl_ino_lookup_user_args *args; 2593 struct inode *inode; 2594 int ret; 2595 2596 args = memdup_user(argp, sizeof(*args)); 2597 if (IS_ERR(args)) 2598 return PTR_ERR(args); 2599 2600 inode = file_inode(file); 2601 2602 if (args->dirid == BTRFS_FIRST_FREE_OBJECTID && 2603 BTRFS_I(inode)->location.objectid != BTRFS_FIRST_FREE_OBJECTID) { 2604 /* 2605 * The subvolume does not exist under fd with which this is 2606 * called 2607 */ 2608 kfree(args); 2609 return -EACCES; 2610 } 2611 2612 ret = btrfs_search_path_in_tree_user(inode, args); 2613 2614 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2615 ret = -EFAULT; 2616 2617 kfree(args); 2618 return ret; 2619 } 2620 2621 /* Get the subvolume information in BTRFS_ROOT_ITEM and BTRFS_ROOT_BACKREF */ 2622 static int btrfs_ioctl_get_subvol_info(struct file *file, void __user *argp) 2623 { 2624 struct btrfs_ioctl_get_subvol_info_args *subvol_info; 2625 struct btrfs_fs_info *fs_info; 2626 struct btrfs_root *root; 2627 struct btrfs_path *path; 2628 struct btrfs_key key; 2629 struct btrfs_root_item *root_item; 2630 struct btrfs_root_ref *rref; 2631 struct extent_buffer *leaf; 2632 unsigned long item_off; 2633 unsigned long item_len; 2634 struct inode *inode; 2635 int slot; 2636 int ret = 0; 2637 2638 path = btrfs_alloc_path(); 2639 if (!path) 2640 return -ENOMEM; 2641 2642 subvol_info = kzalloc(sizeof(*subvol_info), GFP_KERNEL); 2643 if (!subvol_info) { 2644 btrfs_free_path(path); 2645 return -ENOMEM; 2646 } 2647 2648 inode = file_inode(file); 2649 fs_info = BTRFS_I(inode)->root->fs_info; 2650 2651 /* Get root_item of inode's subvolume */ 2652 key.objectid = BTRFS_I(inode)->root->root_key.objectid; 2653 key.type = BTRFS_ROOT_ITEM_KEY; 2654 key.offset = (u64)-1; 2655 root = btrfs_read_fs_root_no_name(fs_info, &key); 2656 if (IS_ERR(root)) { 2657 ret = PTR_ERR(root); 2658 goto out; 2659 } 2660 root_item = &root->root_item; 2661 2662 subvol_info->treeid = key.objectid; 2663 2664 subvol_info->generation = btrfs_root_generation(root_item); 2665 subvol_info->flags = btrfs_root_flags(root_item); 2666 2667 memcpy(subvol_info->uuid, root_item->uuid, BTRFS_UUID_SIZE); 2668 memcpy(subvol_info->parent_uuid, root_item->parent_uuid, 2669 BTRFS_UUID_SIZE); 2670 memcpy(subvol_info->received_uuid, root_item->received_uuid, 2671 BTRFS_UUID_SIZE); 2672 2673 subvol_info->ctransid = btrfs_root_ctransid(root_item); 2674 subvol_info->ctime.sec = btrfs_stack_timespec_sec(&root_item->ctime); 2675 subvol_info->ctime.nsec = btrfs_stack_timespec_nsec(&root_item->ctime); 2676 2677 subvol_info->otransid = btrfs_root_otransid(root_item); 2678 subvol_info->otime.sec = btrfs_stack_timespec_sec(&root_item->otime); 2679 subvol_info->otime.nsec = btrfs_stack_timespec_nsec(&root_item->otime); 2680 2681 subvol_info->stransid = btrfs_root_stransid(root_item); 2682 subvol_info->stime.sec = btrfs_stack_timespec_sec(&root_item->stime); 2683 subvol_info->stime.nsec = btrfs_stack_timespec_nsec(&root_item->stime); 2684 2685 subvol_info->rtransid = btrfs_root_rtransid(root_item); 2686 subvol_info->rtime.sec = btrfs_stack_timespec_sec(&root_item->rtime); 2687 subvol_info->rtime.nsec = btrfs_stack_timespec_nsec(&root_item->rtime); 2688 2689 if (key.objectid != BTRFS_FS_TREE_OBJECTID) { 2690 /* Search root tree for ROOT_BACKREF of this subvolume */ 2691 root = fs_info->tree_root; 2692 2693 key.type = BTRFS_ROOT_BACKREF_KEY; 2694 key.offset = 0; 2695 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2696 if (ret < 0) { 2697 goto out; 2698 } else if (path->slots[0] >= 2699 btrfs_header_nritems(path->nodes[0])) { 2700 ret = btrfs_next_leaf(root, path); 2701 if (ret < 0) { 2702 goto out; 2703 } else if (ret > 0) { 2704 ret = -EUCLEAN; 2705 goto out; 2706 } 2707 } 2708 2709 leaf = path->nodes[0]; 2710 slot = path->slots[0]; 2711 btrfs_item_key_to_cpu(leaf, &key, slot); 2712 if (key.objectid == subvol_info->treeid && 2713 key.type == BTRFS_ROOT_BACKREF_KEY) { 2714 subvol_info->parent_id = key.offset; 2715 2716 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 2717 subvol_info->dirid = btrfs_root_ref_dirid(leaf, rref); 2718 2719 item_off = btrfs_item_ptr_offset(leaf, slot) 2720 + sizeof(struct btrfs_root_ref); 2721 item_len = btrfs_item_size_nr(leaf, slot) 2722 - sizeof(struct btrfs_root_ref); 2723 read_extent_buffer(leaf, subvol_info->name, 2724 item_off, item_len); 2725 } else { 2726 ret = -ENOENT; 2727 goto out; 2728 } 2729 } 2730 2731 if (copy_to_user(argp, subvol_info, sizeof(*subvol_info))) 2732 ret = -EFAULT; 2733 2734 out: 2735 btrfs_free_path(path); 2736 kzfree(subvol_info); 2737 return ret; 2738 } 2739 2740 /* 2741 * Return ROOT_REF information of the subvolume containing this inode 2742 * except the subvolume name. 2743 */ 2744 static int btrfs_ioctl_get_subvol_rootref(struct file *file, void __user *argp) 2745 { 2746 struct btrfs_ioctl_get_subvol_rootref_args *rootrefs; 2747 struct btrfs_root_ref *rref; 2748 struct btrfs_root *root; 2749 struct btrfs_path *path; 2750 struct btrfs_key key; 2751 struct extent_buffer *leaf; 2752 struct inode *inode; 2753 u64 objectid; 2754 int slot; 2755 int ret; 2756 u8 found; 2757 2758 path = btrfs_alloc_path(); 2759 if (!path) 2760 return -ENOMEM; 2761 2762 rootrefs = memdup_user(argp, sizeof(*rootrefs)); 2763 if (IS_ERR(rootrefs)) { 2764 btrfs_free_path(path); 2765 return PTR_ERR(rootrefs); 2766 } 2767 2768 inode = file_inode(file); 2769 root = BTRFS_I(inode)->root->fs_info->tree_root; 2770 objectid = BTRFS_I(inode)->root->root_key.objectid; 2771 2772 key.objectid = objectid; 2773 key.type = BTRFS_ROOT_REF_KEY; 2774 key.offset = rootrefs->min_treeid; 2775 found = 0; 2776 2777 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2778 if (ret < 0) { 2779 goto out; 2780 } else if (path->slots[0] >= 2781 btrfs_header_nritems(path->nodes[0])) { 2782 ret = btrfs_next_leaf(root, path); 2783 if (ret < 0) { 2784 goto out; 2785 } else if (ret > 0) { 2786 ret = -EUCLEAN; 2787 goto out; 2788 } 2789 } 2790 while (1) { 2791 leaf = path->nodes[0]; 2792 slot = path->slots[0]; 2793 2794 btrfs_item_key_to_cpu(leaf, &key, slot); 2795 if (key.objectid != objectid || key.type != BTRFS_ROOT_REF_KEY) { 2796 ret = 0; 2797 goto out; 2798 } 2799 2800 if (found == BTRFS_MAX_ROOTREF_BUFFER_NUM) { 2801 ret = -EOVERFLOW; 2802 goto out; 2803 } 2804 2805 rref = btrfs_item_ptr(leaf, slot, struct btrfs_root_ref); 2806 rootrefs->rootref[found].treeid = key.offset; 2807 rootrefs->rootref[found].dirid = 2808 btrfs_root_ref_dirid(leaf, rref); 2809 found++; 2810 2811 ret = btrfs_next_item(root, path); 2812 if (ret < 0) { 2813 goto out; 2814 } else if (ret > 0) { 2815 ret = -EUCLEAN; 2816 goto out; 2817 } 2818 } 2819 2820 out: 2821 if (!ret || ret == -EOVERFLOW) { 2822 rootrefs->num_items = found; 2823 /* update min_treeid for next search */ 2824 if (found) 2825 rootrefs->min_treeid = 2826 rootrefs->rootref[found - 1].treeid + 1; 2827 if (copy_to_user(argp, rootrefs, sizeof(*rootrefs))) 2828 ret = -EFAULT; 2829 } 2830 2831 kfree(rootrefs); 2832 btrfs_free_path(path); 2833 2834 return ret; 2835 } 2836 2837 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2838 void __user *arg) 2839 { 2840 struct dentry *parent = file->f_path.dentry; 2841 struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb); 2842 struct dentry *dentry; 2843 struct inode *dir = d_inode(parent); 2844 struct inode *inode; 2845 struct btrfs_root *root = BTRFS_I(dir)->root; 2846 struct btrfs_root *dest = NULL; 2847 struct btrfs_ioctl_vol_args *vol_args; 2848 int namelen; 2849 int err = 0; 2850 2851 if (!S_ISDIR(dir->i_mode)) 2852 return -ENOTDIR; 2853 2854 vol_args = memdup_user(arg, sizeof(*vol_args)); 2855 if (IS_ERR(vol_args)) 2856 return PTR_ERR(vol_args); 2857 2858 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2859 namelen = strlen(vol_args->name); 2860 if (strchr(vol_args->name, '/') || 2861 strncmp(vol_args->name, "..", namelen) == 0) { 2862 err = -EINVAL; 2863 goto out; 2864 } 2865 2866 err = mnt_want_write_file(file); 2867 if (err) 2868 goto out; 2869 2870 2871 err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); 2872 if (err == -EINTR) 2873 goto out_drop_write; 2874 dentry = lookup_one_len(vol_args->name, parent, namelen); 2875 if (IS_ERR(dentry)) { 2876 err = PTR_ERR(dentry); 2877 goto out_unlock_dir; 2878 } 2879 2880 if (d_really_is_negative(dentry)) { 2881 err = -ENOENT; 2882 goto out_dput; 2883 } 2884 2885 inode = d_inode(dentry); 2886 dest = BTRFS_I(inode)->root; 2887 if (!capable(CAP_SYS_ADMIN)) { 2888 /* 2889 * Regular user. Only allow this with a special mount 2890 * option, when the user has write+exec access to the 2891 * subvol root, and when rmdir(2) would have been 2892 * allowed. 2893 * 2894 * Note that this is _not_ check that the subvol is 2895 * empty or doesn't contain data that we wouldn't 2896 * otherwise be able to delete. 2897 * 2898 * Users who want to delete empty subvols should try 2899 * rmdir(2). 2900 */ 2901 err = -EPERM; 2902 if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED)) 2903 goto out_dput; 2904 2905 /* 2906 * Do not allow deletion if the parent dir is the same 2907 * as the dir to be deleted. That means the ioctl 2908 * must be called on the dentry referencing the root 2909 * of the subvol, not a random directory contained 2910 * within it. 2911 */ 2912 err = -EINVAL; 2913 if (root == dest) 2914 goto out_dput; 2915 2916 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2917 if (err) 2918 goto out_dput; 2919 } 2920 2921 /* check if subvolume may be deleted by a user */ 2922 err = btrfs_may_delete(dir, dentry, 1); 2923 if (err) 2924 goto out_dput; 2925 2926 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 2927 err = -EINVAL; 2928 goto out_dput; 2929 } 2930 2931 inode_lock(inode); 2932 err = btrfs_delete_subvolume(dir, dentry); 2933 inode_unlock(inode); 2934 if (!err) 2935 d_delete(dentry); 2936 2937 out_dput: 2938 dput(dentry); 2939 out_unlock_dir: 2940 inode_unlock(dir); 2941 out_drop_write: 2942 mnt_drop_write_file(file); 2943 out: 2944 kfree(vol_args); 2945 return err; 2946 } 2947 2948 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2949 { 2950 struct inode *inode = file_inode(file); 2951 struct btrfs_root *root = BTRFS_I(inode)->root; 2952 struct btrfs_ioctl_defrag_range_args *range; 2953 int ret; 2954 2955 ret = mnt_want_write_file(file); 2956 if (ret) 2957 return ret; 2958 2959 if (btrfs_root_readonly(root)) { 2960 ret = -EROFS; 2961 goto out; 2962 } 2963 2964 switch (inode->i_mode & S_IFMT) { 2965 case S_IFDIR: 2966 if (!capable(CAP_SYS_ADMIN)) { 2967 ret = -EPERM; 2968 goto out; 2969 } 2970 ret = btrfs_defrag_root(root); 2971 break; 2972 case S_IFREG: 2973 /* 2974 * Note that this does not check the file descriptor for write 2975 * access. This prevents defragmenting executables that are 2976 * running and allows defrag on files open in read-only mode. 2977 */ 2978 if (!capable(CAP_SYS_ADMIN) && 2979 inode_permission(inode, MAY_WRITE)) { 2980 ret = -EPERM; 2981 goto out; 2982 } 2983 2984 range = kzalloc(sizeof(*range), GFP_KERNEL); 2985 if (!range) { 2986 ret = -ENOMEM; 2987 goto out; 2988 } 2989 2990 if (argp) { 2991 if (copy_from_user(range, argp, 2992 sizeof(*range))) { 2993 ret = -EFAULT; 2994 kfree(range); 2995 goto out; 2996 } 2997 /* compression requires us to start the IO */ 2998 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 2999 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 3000 range->extent_thresh = (u32)-1; 3001 } 3002 } else { 3003 /* the rest are all set to zero by kzalloc */ 3004 range->len = (u64)-1; 3005 } 3006 ret = btrfs_defrag_file(file_inode(file), file, 3007 range, BTRFS_OLDEST_GENERATION, 0); 3008 if (ret > 0) 3009 ret = 0; 3010 kfree(range); 3011 break; 3012 default: 3013 ret = -EINVAL; 3014 } 3015 out: 3016 mnt_drop_write_file(file); 3017 return ret; 3018 } 3019 3020 static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) 3021 { 3022 struct btrfs_ioctl_vol_args *vol_args; 3023 int ret; 3024 3025 if (!capable(CAP_SYS_ADMIN)) 3026 return -EPERM; 3027 3028 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) 3029 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3030 3031 vol_args = memdup_user(arg, sizeof(*vol_args)); 3032 if (IS_ERR(vol_args)) { 3033 ret = PTR_ERR(vol_args); 3034 goto out; 3035 } 3036 3037 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 3038 ret = btrfs_init_new_device(fs_info, vol_args->name); 3039 3040 if (!ret) 3041 btrfs_info(fs_info, "disk added %s", vol_args->name); 3042 3043 kfree(vol_args); 3044 out: 3045 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 3046 return ret; 3047 } 3048 3049 static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) 3050 { 3051 struct inode *inode = file_inode(file); 3052 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3053 struct btrfs_ioctl_vol_args_v2 *vol_args; 3054 int ret; 3055 3056 if (!capable(CAP_SYS_ADMIN)) 3057 return -EPERM; 3058 3059 ret = mnt_want_write_file(file); 3060 if (ret) 3061 return ret; 3062 3063 vol_args = memdup_user(arg, sizeof(*vol_args)); 3064 if (IS_ERR(vol_args)) { 3065 ret = PTR_ERR(vol_args); 3066 goto err_drop; 3067 } 3068 3069 /* Check for compatibility reject unknown flags */ 3070 if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) { 3071 ret = -EOPNOTSUPP; 3072 goto out; 3073 } 3074 3075 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 3076 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3077 goto out; 3078 } 3079 3080 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { 3081 ret = btrfs_rm_device(fs_info, NULL, vol_args->devid); 3082 } else { 3083 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 3084 ret = btrfs_rm_device(fs_info, vol_args->name, 0); 3085 } 3086 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 3087 3088 if (!ret) { 3089 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) 3090 btrfs_info(fs_info, "device deleted: id %llu", 3091 vol_args->devid); 3092 else 3093 btrfs_info(fs_info, "device deleted: %s", 3094 vol_args->name); 3095 } 3096 out: 3097 kfree(vol_args); 3098 err_drop: 3099 mnt_drop_write_file(file); 3100 return ret; 3101 } 3102 3103 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 3104 { 3105 struct inode *inode = file_inode(file); 3106 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3107 struct btrfs_ioctl_vol_args *vol_args; 3108 int ret; 3109 3110 if (!capable(CAP_SYS_ADMIN)) 3111 return -EPERM; 3112 3113 ret = mnt_want_write_file(file); 3114 if (ret) 3115 return ret; 3116 3117 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 3118 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3119 goto out_drop_write; 3120 } 3121 3122 vol_args = memdup_user(arg, sizeof(*vol_args)); 3123 if (IS_ERR(vol_args)) { 3124 ret = PTR_ERR(vol_args); 3125 goto out; 3126 } 3127 3128 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 3129 ret = btrfs_rm_device(fs_info, vol_args->name, 0); 3130 3131 if (!ret) 3132 btrfs_info(fs_info, "disk deleted %s", vol_args->name); 3133 kfree(vol_args); 3134 out: 3135 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 3136 out_drop_write: 3137 mnt_drop_write_file(file); 3138 3139 return ret; 3140 } 3141 3142 static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, 3143 void __user *arg) 3144 { 3145 struct btrfs_ioctl_fs_info_args *fi_args; 3146 struct btrfs_device *device; 3147 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 3148 int ret = 0; 3149 3150 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); 3151 if (!fi_args) 3152 return -ENOMEM; 3153 3154 rcu_read_lock(); 3155 fi_args->num_devices = fs_devices->num_devices; 3156 3157 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { 3158 if (device->devid > fi_args->max_id) 3159 fi_args->max_id = device->devid; 3160 } 3161 rcu_read_unlock(); 3162 3163 memcpy(&fi_args->fsid, fs_devices->fsid, sizeof(fi_args->fsid)); 3164 fi_args->nodesize = fs_info->nodesize; 3165 fi_args->sectorsize = fs_info->sectorsize; 3166 fi_args->clone_alignment = fs_info->sectorsize; 3167 3168 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 3169 ret = -EFAULT; 3170 3171 kfree(fi_args); 3172 return ret; 3173 } 3174 3175 static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, 3176 void __user *arg) 3177 { 3178 struct btrfs_ioctl_dev_info_args *di_args; 3179 struct btrfs_device *dev; 3180 int ret = 0; 3181 char *s_uuid = NULL; 3182 3183 di_args = memdup_user(arg, sizeof(*di_args)); 3184 if (IS_ERR(di_args)) 3185 return PTR_ERR(di_args); 3186 3187 if (!btrfs_is_empty_uuid(di_args->uuid)) 3188 s_uuid = di_args->uuid; 3189 3190 rcu_read_lock(); 3191 dev = btrfs_find_device(fs_info->fs_devices, di_args->devid, s_uuid, 3192 NULL, true); 3193 3194 if (!dev) { 3195 ret = -ENODEV; 3196 goto out; 3197 } 3198 3199 di_args->devid = dev->devid; 3200 di_args->bytes_used = btrfs_device_get_bytes_used(dev); 3201 di_args->total_bytes = btrfs_device_get_total_bytes(dev); 3202 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 3203 if (dev->name) { 3204 strncpy(di_args->path, rcu_str_deref(dev->name), 3205 sizeof(di_args->path) - 1); 3206 di_args->path[sizeof(di_args->path) - 1] = 0; 3207 } else { 3208 di_args->path[0] = '\0'; 3209 } 3210 3211 out: 3212 rcu_read_unlock(); 3213 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 3214 ret = -EFAULT; 3215 3216 kfree(di_args); 3217 return ret; 3218 } 3219 3220 static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, 3221 struct inode *inode2, u64 loff2, u64 len) 3222 { 3223 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 3224 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 3225 } 3226 3227 static void btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 3228 struct inode *inode2, u64 loff2, u64 len) 3229 { 3230 if (inode1 < inode2) { 3231 swap(inode1, inode2); 3232 swap(loff1, loff2); 3233 } else if (inode1 == inode2 && loff2 < loff1) { 3234 swap(loff1, loff2); 3235 } 3236 lock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 3237 lock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 3238 } 3239 3240 static int btrfs_extent_same_range(struct inode *src, u64 loff, u64 len, 3241 struct inode *dst, u64 dst_loff) 3242 { 3243 int ret; 3244 3245 /* 3246 * Lock destination range to serialize with concurrent readpages() and 3247 * source range to serialize with relocation. 3248 */ 3249 btrfs_double_extent_lock(src, loff, dst, dst_loff, len); 3250 ret = btrfs_clone(src, dst, loff, len, len, dst_loff, 1); 3251 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); 3252 3253 return ret; 3254 } 3255 3256 #define BTRFS_MAX_DEDUPE_LEN SZ_16M 3257 3258 static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, 3259 struct inode *dst, u64 dst_loff) 3260 { 3261 int ret; 3262 u64 i, tail_len, chunk_count; 3263 3264 tail_len = olen % BTRFS_MAX_DEDUPE_LEN; 3265 chunk_count = div_u64(olen, BTRFS_MAX_DEDUPE_LEN); 3266 3267 for (i = 0; i < chunk_count; i++) { 3268 ret = btrfs_extent_same_range(src, loff, BTRFS_MAX_DEDUPE_LEN, 3269 dst, dst_loff); 3270 if (ret) 3271 return ret; 3272 3273 loff += BTRFS_MAX_DEDUPE_LEN; 3274 dst_loff += BTRFS_MAX_DEDUPE_LEN; 3275 } 3276 3277 if (tail_len > 0) 3278 ret = btrfs_extent_same_range(src, loff, tail_len, dst, 3279 dst_loff); 3280 3281 return ret; 3282 } 3283 3284 static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3285 struct inode *inode, 3286 u64 endoff, 3287 const u64 destoff, 3288 const u64 olen, 3289 int no_time_update) 3290 { 3291 struct btrfs_root *root = BTRFS_I(inode)->root; 3292 int ret; 3293 3294 inode_inc_iversion(inode); 3295 if (!no_time_update) 3296 inode->i_mtime = inode->i_ctime = current_time(inode); 3297 /* 3298 * We round up to the block size at eof when determining which 3299 * extents to clone above, but shouldn't round up the file size. 3300 */ 3301 if (endoff > destoff + olen) 3302 endoff = destoff + olen; 3303 if (endoff > inode->i_size) 3304 btrfs_i_size_write(BTRFS_I(inode), endoff); 3305 3306 ret = btrfs_update_inode(trans, root, inode); 3307 if (ret) { 3308 btrfs_abort_transaction(trans, ret); 3309 btrfs_end_transaction(trans); 3310 goto out; 3311 } 3312 ret = btrfs_end_transaction(trans); 3313 out: 3314 return ret; 3315 } 3316 3317 static void clone_update_extent_map(struct btrfs_inode *inode, 3318 const struct btrfs_trans_handle *trans, 3319 const struct btrfs_path *path, 3320 const u64 hole_offset, 3321 const u64 hole_len) 3322 { 3323 struct extent_map_tree *em_tree = &inode->extent_tree; 3324 struct extent_map *em; 3325 int ret; 3326 3327 em = alloc_extent_map(); 3328 if (!em) { 3329 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); 3330 return; 3331 } 3332 3333 if (path) { 3334 struct btrfs_file_extent_item *fi; 3335 3336 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 3337 struct btrfs_file_extent_item); 3338 btrfs_extent_item_to_extent_map(inode, path, fi, false, em); 3339 em->generation = -1; 3340 if (btrfs_file_extent_type(path->nodes[0], fi) == 3341 BTRFS_FILE_EXTENT_INLINE) 3342 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3343 &inode->runtime_flags); 3344 } else { 3345 em->start = hole_offset; 3346 em->len = hole_len; 3347 em->ram_bytes = em->len; 3348 em->orig_start = hole_offset; 3349 em->block_start = EXTENT_MAP_HOLE; 3350 em->block_len = 0; 3351 em->orig_block_len = 0; 3352 em->compress_type = BTRFS_COMPRESS_NONE; 3353 em->generation = trans->transid; 3354 } 3355 3356 while (1) { 3357 write_lock(&em_tree->lock); 3358 ret = add_extent_mapping(em_tree, em, 1); 3359 write_unlock(&em_tree->lock); 3360 if (ret != -EEXIST) { 3361 free_extent_map(em); 3362 break; 3363 } 3364 btrfs_drop_extent_cache(inode, em->start, 3365 em->start + em->len - 1, 0); 3366 } 3367 3368 if (ret) 3369 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); 3370 } 3371 3372 /* 3373 * Make sure we do not end up inserting an inline extent into a file that has 3374 * already other (non-inline) extents. If a file has an inline extent it can 3375 * not have any other extents and the (single) inline extent must start at the 3376 * file offset 0. Failing to respect these rules will lead to file corruption, 3377 * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc 3378 * 3379 * We can have extents that have been already written to disk or we can have 3380 * dirty ranges still in delalloc, in which case the extent maps and items are 3381 * created only when we run delalloc, and the delalloc ranges might fall outside 3382 * the range we are currently locking in the inode's io tree. So we check the 3383 * inode's i_size because of that (i_size updates are done while holding the 3384 * i_mutex, which we are holding here). 3385 * We also check to see if the inode has a size not greater than "datal" but has 3386 * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are 3387 * protected against such concurrent fallocate calls by the i_mutex). 3388 * 3389 * If the file has no extents but a size greater than datal, do not allow the 3390 * copy because we would need turn the inline extent into a non-inline one (even 3391 * with NO_HOLES enabled). If we find our destination inode only has one inline 3392 * extent, just overwrite it with the source inline extent if its size is less 3393 * than the source extent's size, or we could copy the source inline extent's 3394 * data into the destination inode's inline extent if the later is greater then 3395 * the former. 3396 */ 3397 static int clone_copy_inline_extent(struct inode *dst, 3398 struct btrfs_trans_handle *trans, 3399 struct btrfs_path *path, 3400 struct btrfs_key *new_key, 3401 const u64 drop_start, 3402 const u64 datal, 3403 const u64 skip, 3404 const u64 size, 3405 char *inline_data) 3406 { 3407 struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb); 3408 struct btrfs_root *root = BTRFS_I(dst)->root; 3409 const u64 aligned_end = ALIGN(new_key->offset + datal, 3410 fs_info->sectorsize); 3411 int ret; 3412 struct btrfs_key key; 3413 3414 if (new_key->offset > 0) 3415 return -EOPNOTSUPP; 3416 3417 key.objectid = btrfs_ino(BTRFS_I(dst)); 3418 key.type = BTRFS_EXTENT_DATA_KEY; 3419 key.offset = 0; 3420 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3421 if (ret < 0) { 3422 return ret; 3423 } else if (ret > 0) { 3424 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 3425 ret = btrfs_next_leaf(root, path); 3426 if (ret < 0) 3427 return ret; 3428 else if (ret > 0) 3429 goto copy_inline_extent; 3430 } 3431 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3432 if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 3433 key.type == BTRFS_EXTENT_DATA_KEY) { 3434 ASSERT(key.offset > 0); 3435 return -EOPNOTSUPP; 3436 } 3437 } else if (i_size_read(dst) <= datal) { 3438 struct btrfs_file_extent_item *ei; 3439 u64 ext_len; 3440 3441 /* 3442 * If the file size is <= datal, make sure there are no other 3443 * extents following (can happen do to an fallocate call with 3444 * the flag FALLOC_FL_KEEP_SIZE). 3445 */ 3446 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3447 struct btrfs_file_extent_item); 3448 /* 3449 * If it's an inline extent, it can not have other extents 3450 * following it. 3451 */ 3452 if (btrfs_file_extent_type(path->nodes[0], ei) == 3453 BTRFS_FILE_EXTENT_INLINE) 3454 goto copy_inline_extent; 3455 3456 ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 3457 if (ext_len > aligned_end) 3458 return -EOPNOTSUPP; 3459 3460 ret = btrfs_next_item(root, path); 3461 if (ret < 0) { 3462 return ret; 3463 } else if (ret == 0) { 3464 btrfs_item_key_to_cpu(path->nodes[0], &key, 3465 path->slots[0]); 3466 if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 3467 key.type == BTRFS_EXTENT_DATA_KEY) 3468 return -EOPNOTSUPP; 3469 } 3470 } 3471 3472 copy_inline_extent: 3473 /* 3474 * We have no extent items, or we have an extent at offset 0 which may 3475 * or may not be inlined. All these cases are dealt the same way. 3476 */ 3477 if (i_size_read(dst) > datal) { 3478 /* 3479 * If the destination inode has an inline extent... 3480 * This would require copying the data from the source inline 3481 * extent into the beginning of the destination's inline extent. 3482 * But this is really complex, both extents can be compressed 3483 * or just one of them, which would require decompressing and 3484 * re-compressing data (which could increase the new compressed 3485 * size, not allowing the compressed data to fit anymore in an 3486 * inline extent). 3487 * So just don't support this case for now (it should be rare, 3488 * we are not really saving space when cloning inline extents). 3489 */ 3490 return -EOPNOTSUPP; 3491 } 3492 3493 btrfs_release_path(path); 3494 ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); 3495 if (ret) 3496 return ret; 3497 ret = btrfs_insert_empty_item(trans, root, path, new_key, size); 3498 if (ret) 3499 return ret; 3500 3501 if (skip) { 3502 const u32 start = btrfs_file_extent_calc_inline_size(0); 3503 3504 memmove(inline_data + start, inline_data + start + skip, datal); 3505 } 3506 3507 write_extent_buffer(path->nodes[0], inline_data, 3508 btrfs_item_ptr_offset(path->nodes[0], 3509 path->slots[0]), 3510 size); 3511 inode_add_bytes(dst, datal); 3512 3513 return 0; 3514 } 3515 3516 /** 3517 * btrfs_clone() - clone a range from inode file to another 3518 * 3519 * @src: Inode to clone from 3520 * @inode: Inode to clone to 3521 * @off: Offset within source to start clone from 3522 * @olen: Original length, passed by user, of range to clone 3523 * @olen_aligned: Block-aligned value of olen 3524 * @destoff: Offset within @inode to start clone 3525 * @no_time_update: Whether to update mtime/ctime on the target inode 3526 */ 3527 static int btrfs_clone(struct inode *src, struct inode *inode, 3528 const u64 off, const u64 olen, const u64 olen_aligned, 3529 const u64 destoff, int no_time_update) 3530 { 3531 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3532 struct btrfs_root *root = BTRFS_I(inode)->root; 3533 struct btrfs_path *path = NULL; 3534 struct extent_buffer *leaf; 3535 struct btrfs_trans_handle *trans; 3536 char *buf = NULL; 3537 struct btrfs_key key; 3538 u32 nritems; 3539 int slot; 3540 int ret; 3541 const u64 len = olen_aligned; 3542 u64 last_dest_end = destoff; 3543 3544 ret = -ENOMEM; 3545 buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); 3546 if (!buf) 3547 return ret; 3548 3549 path = btrfs_alloc_path(); 3550 if (!path) { 3551 kvfree(buf); 3552 return ret; 3553 } 3554 3555 path->reada = READA_FORWARD; 3556 /* clone data */ 3557 key.objectid = btrfs_ino(BTRFS_I(src)); 3558 key.type = BTRFS_EXTENT_DATA_KEY; 3559 key.offset = off; 3560 3561 while (1) { 3562 u64 next_key_min_offset = key.offset + 1; 3563 3564 /* 3565 * note the key will change type as we walk through the 3566 * tree. 3567 */ 3568 path->leave_spinning = 1; 3569 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 3570 0, 0); 3571 if (ret < 0) 3572 goto out; 3573 /* 3574 * First search, if no extent item that starts at offset off was 3575 * found but the previous item is an extent item, it's possible 3576 * it might overlap our target range, therefore process it. 3577 */ 3578 if (key.offset == off && ret > 0 && path->slots[0] > 0) { 3579 btrfs_item_key_to_cpu(path->nodes[0], &key, 3580 path->slots[0] - 1); 3581 if (key.type == BTRFS_EXTENT_DATA_KEY) 3582 path->slots[0]--; 3583 } 3584 3585 nritems = btrfs_header_nritems(path->nodes[0]); 3586 process_slot: 3587 if (path->slots[0] >= nritems) { 3588 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 3589 if (ret < 0) 3590 goto out; 3591 if (ret > 0) 3592 break; 3593 nritems = btrfs_header_nritems(path->nodes[0]); 3594 } 3595 leaf = path->nodes[0]; 3596 slot = path->slots[0]; 3597 3598 btrfs_item_key_to_cpu(leaf, &key, slot); 3599 if (key.type > BTRFS_EXTENT_DATA_KEY || 3600 key.objectid != btrfs_ino(BTRFS_I(src))) 3601 break; 3602 3603 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3604 struct btrfs_file_extent_item *extent; 3605 int type; 3606 u32 size; 3607 struct btrfs_key new_key; 3608 u64 disko = 0, diskl = 0; 3609 u64 datao = 0, datal = 0; 3610 u8 comp; 3611 u64 drop_start; 3612 3613 extent = btrfs_item_ptr(leaf, slot, 3614 struct btrfs_file_extent_item); 3615 comp = btrfs_file_extent_compression(leaf, extent); 3616 type = btrfs_file_extent_type(leaf, extent); 3617 if (type == BTRFS_FILE_EXTENT_REG || 3618 type == BTRFS_FILE_EXTENT_PREALLOC) { 3619 disko = btrfs_file_extent_disk_bytenr(leaf, 3620 extent); 3621 diskl = btrfs_file_extent_disk_num_bytes(leaf, 3622 extent); 3623 datao = btrfs_file_extent_offset(leaf, extent); 3624 datal = btrfs_file_extent_num_bytes(leaf, 3625 extent); 3626 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3627 /* take upper bound, may be compressed */ 3628 datal = btrfs_file_extent_ram_bytes(leaf, 3629 extent); 3630 } 3631 3632 /* 3633 * The first search might have left us at an extent 3634 * item that ends before our target range's start, can 3635 * happen if we have holes and NO_HOLES feature enabled. 3636 */ 3637 if (key.offset + datal <= off) { 3638 path->slots[0]++; 3639 goto process_slot; 3640 } else if (key.offset >= off + len) { 3641 break; 3642 } 3643 next_key_min_offset = key.offset + datal; 3644 size = btrfs_item_size_nr(leaf, slot); 3645 read_extent_buffer(leaf, buf, 3646 btrfs_item_ptr_offset(leaf, slot), 3647 size); 3648 3649 btrfs_release_path(path); 3650 path->leave_spinning = 0; 3651 3652 memcpy(&new_key, &key, sizeof(new_key)); 3653 new_key.objectid = btrfs_ino(BTRFS_I(inode)); 3654 if (off <= key.offset) 3655 new_key.offset = key.offset + destoff - off; 3656 else 3657 new_key.offset = destoff; 3658 3659 /* 3660 * Deal with a hole that doesn't have an extent item 3661 * that represents it (NO_HOLES feature enabled). 3662 * This hole is either in the middle of the cloning 3663 * range or at the beginning (fully overlaps it or 3664 * partially overlaps it). 3665 */ 3666 if (new_key.offset != last_dest_end) 3667 drop_start = last_dest_end; 3668 else 3669 drop_start = new_key.offset; 3670 3671 /* 3672 * 1 - adjusting old extent (we may have to split it) 3673 * 1 - add new extent 3674 * 1 - inode update 3675 */ 3676 trans = btrfs_start_transaction(root, 3); 3677 if (IS_ERR(trans)) { 3678 ret = PTR_ERR(trans); 3679 goto out; 3680 } 3681 3682 if (type == BTRFS_FILE_EXTENT_REG || 3683 type == BTRFS_FILE_EXTENT_PREALLOC) { 3684 /* 3685 * a | --- range to clone ---| b 3686 * | ------------- extent ------------- | 3687 */ 3688 3689 /* subtract range b */ 3690 if (key.offset + datal > off + len) 3691 datal = off + len - key.offset; 3692 3693 /* subtract range a */ 3694 if (off > key.offset) { 3695 datao += off - key.offset; 3696 datal -= off - key.offset; 3697 } 3698 3699 ret = btrfs_drop_extents(trans, root, inode, 3700 drop_start, 3701 new_key.offset + datal, 3702 1); 3703 if (ret) { 3704 if (ret != -EOPNOTSUPP) 3705 btrfs_abort_transaction(trans, 3706 ret); 3707 btrfs_end_transaction(trans); 3708 goto out; 3709 } 3710 3711 ret = btrfs_insert_empty_item(trans, root, path, 3712 &new_key, size); 3713 if (ret) { 3714 btrfs_abort_transaction(trans, ret); 3715 btrfs_end_transaction(trans); 3716 goto out; 3717 } 3718 3719 leaf = path->nodes[0]; 3720 slot = path->slots[0]; 3721 write_extent_buffer(leaf, buf, 3722 btrfs_item_ptr_offset(leaf, slot), 3723 size); 3724 3725 extent = btrfs_item_ptr(leaf, slot, 3726 struct btrfs_file_extent_item); 3727 3728 /* disko == 0 means it's a hole */ 3729 if (!disko) 3730 datao = 0; 3731 3732 btrfs_set_file_extent_offset(leaf, extent, 3733 datao); 3734 btrfs_set_file_extent_num_bytes(leaf, extent, 3735 datal); 3736 3737 if (disko) { 3738 inode_add_bytes(inode, datal); 3739 ret = btrfs_inc_extent_ref(trans, 3740 root, 3741 disko, diskl, 0, 3742 root->root_key.objectid, 3743 btrfs_ino(BTRFS_I(inode)), 3744 new_key.offset - datao); 3745 if (ret) { 3746 btrfs_abort_transaction(trans, 3747 ret); 3748 btrfs_end_transaction(trans); 3749 goto out; 3750 3751 } 3752 } 3753 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3754 u64 skip = 0; 3755 u64 trim = 0; 3756 3757 if (off > key.offset) { 3758 skip = off - key.offset; 3759 new_key.offset += skip; 3760 } 3761 3762 if (key.offset + datal > off + len) 3763 trim = key.offset + datal - (off + len); 3764 3765 if (comp && (skip || trim)) { 3766 ret = -EINVAL; 3767 btrfs_end_transaction(trans); 3768 goto out; 3769 } 3770 size -= skip + trim; 3771 datal -= skip + trim; 3772 3773 ret = clone_copy_inline_extent(inode, 3774 trans, path, 3775 &new_key, 3776 drop_start, 3777 datal, 3778 skip, size, buf); 3779 if (ret) { 3780 if (ret != -EOPNOTSUPP) 3781 btrfs_abort_transaction(trans, 3782 ret); 3783 btrfs_end_transaction(trans); 3784 goto out; 3785 } 3786 leaf = path->nodes[0]; 3787 slot = path->slots[0]; 3788 } 3789 3790 /* If we have an implicit hole (NO_HOLES feature). */ 3791 if (drop_start < new_key.offset) 3792 clone_update_extent_map(BTRFS_I(inode), trans, 3793 NULL, drop_start, 3794 new_key.offset - drop_start); 3795 3796 clone_update_extent_map(BTRFS_I(inode), trans, 3797 path, 0, 0); 3798 3799 btrfs_mark_buffer_dirty(leaf); 3800 btrfs_release_path(path); 3801 3802 last_dest_end = ALIGN(new_key.offset + datal, 3803 fs_info->sectorsize); 3804 ret = clone_finish_inode_update(trans, inode, 3805 last_dest_end, 3806 destoff, olen, 3807 no_time_update); 3808 if (ret) 3809 goto out; 3810 if (new_key.offset + datal >= destoff + len) 3811 break; 3812 } 3813 btrfs_release_path(path); 3814 key.offset = next_key_min_offset; 3815 3816 if (fatal_signal_pending(current)) { 3817 ret = -EINTR; 3818 goto out; 3819 } 3820 } 3821 ret = 0; 3822 3823 if (last_dest_end < destoff + len) { 3824 /* 3825 * We have an implicit hole (NO_HOLES feature is enabled) that 3826 * fully or partially overlaps our cloning range at its end. 3827 */ 3828 btrfs_release_path(path); 3829 3830 /* 3831 * 1 - remove extent(s) 3832 * 1 - inode update 3833 */ 3834 trans = btrfs_start_transaction(root, 2); 3835 if (IS_ERR(trans)) { 3836 ret = PTR_ERR(trans); 3837 goto out; 3838 } 3839 ret = btrfs_drop_extents(trans, root, inode, 3840 last_dest_end, destoff + len, 1); 3841 if (ret) { 3842 if (ret != -EOPNOTSUPP) 3843 btrfs_abort_transaction(trans, ret); 3844 btrfs_end_transaction(trans); 3845 goto out; 3846 } 3847 clone_update_extent_map(BTRFS_I(inode), trans, NULL, 3848 last_dest_end, 3849 destoff + len - last_dest_end); 3850 ret = clone_finish_inode_update(trans, inode, destoff + len, 3851 destoff, olen, no_time_update); 3852 } 3853 3854 out: 3855 btrfs_free_path(path); 3856 kvfree(buf); 3857 return ret; 3858 } 3859 3860 static noinline int btrfs_clone_files(struct file *file, struct file *file_src, 3861 u64 off, u64 olen, u64 destoff) 3862 { 3863 struct inode *inode = file_inode(file); 3864 struct inode *src = file_inode(file_src); 3865 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3866 int ret; 3867 u64 len = olen; 3868 u64 bs = fs_info->sb->s_blocksize; 3869 3870 /* 3871 * TODO: 3872 * - split compressed inline extents. annoying: we need to 3873 * decompress into destination's address_space (the file offset 3874 * may change, so source mapping won't do), then recompress (or 3875 * otherwise reinsert) a subrange. 3876 * 3877 * - split destination inode's inline extents. The inline extents can 3878 * be either compressed or non-compressed. 3879 */ 3880 3881 /* 3882 * VFS's generic_remap_file_range_prep() protects us from cloning the 3883 * eof block into the middle of a file, which would result in corruption 3884 * if the file size is not blocksize aligned. So we don't need to check 3885 * for that case here. 3886 */ 3887 if (off + len == src->i_size) 3888 len = ALIGN(src->i_size, bs) - off; 3889 3890 if (destoff > inode->i_size) { 3891 const u64 wb_start = ALIGN_DOWN(inode->i_size, bs); 3892 3893 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3894 if (ret) 3895 return ret; 3896 /* 3897 * We may have truncated the last block if the inode's size is 3898 * not sector size aligned, so we need to wait for writeback to 3899 * complete before proceeding further, otherwise we can race 3900 * with cloning and attempt to increment a reference to an 3901 * extent that no longer exists (writeback completed right after 3902 * we found the previous extent covering eof and before we 3903 * attempted to increment its reference count). 3904 */ 3905 ret = btrfs_wait_ordered_range(inode, wb_start, 3906 destoff - wb_start); 3907 if (ret) 3908 return ret; 3909 } 3910 3911 /* 3912 * Lock destination range to serialize with concurrent readpages() and 3913 * source range to serialize with relocation. 3914 */ 3915 btrfs_double_extent_lock(src, off, inode, destoff, len); 3916 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3917 btrfs_double_extent_unlock(src, off, inode, destoff, len); 3918 /* 3919 * Truncate page cache pages so that future reads will see the cloned 3920 * data immediately and not the previous data. 3921 */ 3922 truncate_inode_pages_range(&inode->i_data, 3923 round_down(destoff, PAGE_SIZE), 3924 round_up(destoff + len, PAGE_SIZE) - 1); 3925 3926 return ret; 3927 } 3928 3929 static int btrfs_remap_file_range_prep(struct file *file_in, loff_t pos_in, 3930 struct file *file_out, loff_t pos_out, 3931 loff_t *len, unsigned int remap_flags) 3932 { 3933 struct inode *inode_in = file_inode(file_in); 3934 struct inode *inode_out = file_inode(file_out); 3935 u64 bs = BTRFS_I(inode_out)->root->fs_info->sb->s_blocksize; 3936 bool same_inode = inode_out == inode_in; 3937 u64 wb_len; 3938 int ret; 3939 3940 if (!(remap_flags & REMAP_FILE_DEDUP)) { 3941 struct btrfs_root *root_out = BTRFS_I(inode_out)->root; 3942 3943 if (btrfs_root_readonly(root_out)) 3944 return -EROFS; 3945 3946 if (file_in->f_path.mnt != file_out->f_path.mnt || 3947 inode_in->i_sb != inode_out->i_sb) 3948 return -EXDEV; 3949 } 3950 3951 if (same_inode) 3952 inode_lock(inode_in); 3953 else 3954 lock_two_nondirectories(inode_in, inode_out); 3955 3956 /* don't make the dst file partly checksummed */ 3957 if ((BTRFS_I(inode_in)->flags & BTRFS_INODE_NODATASUM) != 3958 (BTRFS_I(inode_out)->flags & BTRFS_INODE_NODATASUM)) { 3959 ret = -EINVAL; 3960 goto out_unlock; 3961 } 3962 3963 /* 3964 * Now that the inodes are locked, we need to start writeback ourselves 3965 * and can not rely on the writeback from the VFS's generic helper 3966 * generic_remap_file_range_prep() because: 3967 * 3968 * 1) For compression we must call filemap_fdatawrite_range() range 3969 * twice (btrfs_fdatawrite_range() does it for us), and the generic 3970 * helper only calls it once; 3971 * 3972 * 2) filemap_fdatawrite_range(), called by the generic helper only 3973 * waits for the writeback to complete, i.e. for IO to be done, and 3974 * not for the ordered extents to complete. We need to wait for them 3975 * to complete so that new file extent items are in the fs tree. 3976 */ 3977 if (*len == 0 && !(remap_flags & REMAP_FILE_DEDUP)) 3978 wb_len = ALIGN(inode_in->i_size, bs) - ALIGN_DOWN(pos_in, bs); 3979 else 3980 wb_len = ALIGN(*len, bs); 3981 3982 /* 3983 * Since we don't lock ranges, wait for ongoing lockless dio writes (as 3984 * any in progress could create its ordered extents after we wait for 3985 * existing ordered extents below). 3986 */ 3987 inode_dio_wait(inode_in); 3988 if (!same_inode) 3989 inode_dio_wait(inode_out); 3990 3991 ret = btrfs_wait_ordered_range(inode_in, ALIGN_DOWN(pos_in, bs), 3992 wb_len); 3993 if (ret < 0) 3994 goto out_unlock; 3995 ret = btrfs_wait_ordered_range(inode_out, ALIGN_DOWN(pos_out, bs), 3996 wb_len); 3997 if (ret < 0) 3998 goto out_unlock; 3999 4000 ret = generic_remap_file_range_prep(file_in, pos_in, file_out, pos_out, 4001 len, remap_flags); 4002 if (ret < 0 || *len == 0) 4003 goto out_unlock; 4004 4005 return 0; 4006 4007 out_unlock: 4008 if (same_inode) 4009 inode_unlock(inode_in); 4010 else 4011 unlock_two_nondirectories(inode_in, inode_out); 4012 4013 return ret; 4014 } 4015 4016 loff_t btrfs_remap_file_range(struct file *src_file, loff_t off, 4017 struct file *dst_file, loff_t destoff, loff_t len, 4018 unsigned int remap_flags) 4019 { 4020 struct inode *src_inode = file_inode(src_file); 4021 struct inode *dst_inode = file_inode(dst_file); 4022 bool same_inode = dst_inode == src_inode; 4023 int ret; 4024 4025 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 4026 return -EINVAL; 4027 4028 ret = btrfs_remap_file_range_prep(src_file, off, dst_file, destoff, 4029 &len, remap_flags); 4030 if (ret < 0 || len == 0) 4031 return ret; 4032 4033 if (remap_flags & REMAP_FILE_DEDUP) 4034 ret = btrfs_extent_same(src_inode, off, len, dst_inode, destoff); 4035 else 4036 ret = btrfs_clone_files(dst_file, src_file, off, len, destoff); 4037 4038 if (same_inode) 4039 inode_unlock(src_inode); 4040 else 4041 unlock_two_nondirectories(src_inode, dst_inode); 4042 4043 return ret < 0 ? ret : len; 4044 } 4045 4046 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 4047 { 4048 struct inode *inode = file_inode(file); 4049 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4050 struct btrfs_root *root = BTRFS_I(inode)->root; 4051 struct btrfs_root *new_root; 4052 struct btrfs_dir_item *di; 4053 struct btrfs_trans_handle *trans; 4054 struct btrfs_path *path; 4055 struct btrfs_key location; 4056 struct btrfs_disk_key disk_key; 4057 u64 objectid = 0; 4058 u64 dir_id; 4059 int ret; 4060 4061 if (!capable(CAP_SYS_ADMIN)) 4062 return -EPERM; 4063 4064 ret = mnt_want_write_file(file); 4065 if (ret) 4066 return ret; 4067 4068 if (copy_from_user(&objectid, argp, sizeof(objectid))) { 4069 ret = -EFAULT; 4070 goto out; 4071 } 4072 4073 if (!objectid) 4074 objectid = BTRFS_FS_TREE_OBJECTID; 4075 4076 location.objectid = objectid; 4077 location.type = BTRFS_ROOT_ITEM_KEY; 4078 location.offset = (u64)-1; 4079 4080 new_root = btrfs_read_fs_root_no_name(fs_info, &location); 4081 if (IS_ERR(new_root)) { 4082 ret = PTR_ERR(new_root); 4083 goto out; 4084 } 4085 if (!is_fstree(new_root->root_key.objectid)) { 4086 ret = -ENOENT; 4087 goto out; 4088 } 4089 4090 path = btrfs_alloc_path(); 4091 if (!path) { 4092 ret = -ENOMEM; 4093 goto out; 4094 } 4095 path->leave_spinning = 1; 4096 4097 trans = btrfs_start_transaction(root, 1); 4098 if (IS_ERR(trans)) { 4099 btrfs_free_path(path); 4100 ret = PTR_ERR(trans); 4101 goto out; 4102 } 4103 4104 dir_id = btrfs_super_root_dir(fs_info->super_copy); 4105 di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, 4106 dir_id, "default", 7, 1); 4107 if (IS_ERR_OR_NULL(di)) { 4108 btrfs_free_path(path); 4109 btrfs_end_transaction(trans); 4110 btrfs_err(fs_info, 4111 "Umm, you don't have the default diritem, this isn't going to work"); 4112 ret = -ENOENT; 4113 goto out; 4114 } 4115 4116 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 4117 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 4118 btrfs_mark_buffer_dirty(path->nodes[0]); 4119 btrfs_free_path(path); 4120 4121 btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); 4122 btrfs_end_transaction(trans); 4123 out: 4124 mnt_drop_write_file(file); 4125 return ret; 4126 } 4127 4128 static void get_block_group_info(struct list_head *groups_list, 4129 struct btrfs_ioctl_space_info *space) 4130 { 4131 struct btrfs_block_group_cache *block_group; 4132 4133 space->total_bytes = 0; 4134 space->used_bytes = 0; 4135 space->flags = 0; 4136 list_for_each_entry(block_group, groups_list, list) { 4137 space->flags = block_group->flags; 4138 space->total_bytes += block_group->key.offset; 4139 space->used_bytes += 4140 btrfs_block_group_used(&block_group->item); 4141 } 4142 } 4143 4144 static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, 4145 void __user *arg) 4146 { 4147 struct btrfs_ioctl_space_args space_args; 4148 struct btrfs_ioctl_space_info space; 4149 struct btrfs_ioctl_space_info *dest; 4150 struct btrfs_ioctl_space_info *dest_orig; 4151 struct btrfs_ioctl_space_info __user *user_dest; 4152 struct btrfs_space_info *info; 4153 static const u64 types[] = { 4154 BTRFS_BLOCK_GROUP_DATA, 4155 BTRFS_BLOCK_GROUP_SYSTEM, 4156 BTRFS_BLOCK_GROUP_METADATA, 4157 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA 4158 }; 4159 int num_types = 4; 4160 int alloc_size; 4161 int ret = 0; 4162 u64 slot_count = 0; 4163 int i, c; 4164 4165 if (copy_from_user(&space_args, 4166 (struct btrfs_ioctl_space_args __user *)arg, 4167 sizeof(space_args))) 4168 return -EFAULT; 4169 4170 for (i = 0; i < num_types; i++) { 4171 struct btrfs_space_info *tmp; 4172 4173 info = NULL; 4174 rcu_read_lock(); 4175 list_for_each_entry_rcu(tmp, &fs_info->space_info, 4176 list) { 4177 if (tmp->flags == types[i]) { 4178 info = tmp; 4179 break; 4180 } 4181 } 4182 rcu_read_unlock(); 4183 4184 if (!info) 4185 continue; 4186 4187 down_read(&info->groups_sem); 4188 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 4189 if (!list_empty(&info->block_groups[c])) 4190 slot_count++; 4191 } 4192 up_read(&info->groups_sem); 4193 } 4194 4195 /* 4196 * Global block reserve, exported as a space_info 4197 */ 4198 slot_count++; 4199 4200 /* space_slots == 0 means they are asking for a count */ 4201 if (space_args.space_slots == 0) { 4202 space_args.total_spaces = slot_count; 4203 goto out; 4204 } 4205 4206 slot_count = min_t(u64, space_args.space_slots, slot_count); 4207 4208 alloc_size = sizeof(*dest) * slot_count; 4209 4210 /* we generally have at most 6 or so space infos, one for each raid 4211 * level. So, a whole page should be more than enough for everyone 4212 */ 4213 if (alloc_size > PAGE_SIZE) 4214 return -ENOMEM; 4215 4216 space_args.total_spaces = 0; 4217 dest = kmalloc(alloc_size, GFP_KERNEL); 4218 if (!dest) 4219 return -ENOMEM; 4220 dest_orig = dest; 4221 4222 /* now we have a buffer to copy into */ 4223 for (i = 0; i < num_types; i++) { 4224 struct btrfs_space_info *tmp; 4225 4226 if (!slot_count) 4227 break; 4228 4229 info = NULL; 4230 rcu_read_lock(); 4231 list_for_each_entry_rcu(tmp, &fs_info->space_info, 4232 list) { 4233 if (tmp->flags == types[i]) { 4234 info = tmp; 4235 break; 4236 } 4237 } 4238 rcu_read_unlock(); 4239 4240 if (!info) 4241 continue; 4242 down_read(&info->groups_sem); 4243 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 4244 if (!list_empty(&info->block_groups[c])) { 4245 get_block_group_info(&info->block_groups[c], 4246 &space); 4247 memcpy(dest, &space, sizeof(space)); 4248 dest++; 4249 space_args.total_spaces++; 4250 slot_count--; 4251 } 4252 if (!slot_count) 4253 break; 4254 } 4255 up_read(&info->groups_sem); 4256 } 4257 4258 /* 4259 * Add global block reserve 4260 */ 4261 if (slot_count) { 4262 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 4263 4264 spin_lock(&block_rsv->lock); 4265 space.total_bytes = block_rsv->size; 4266 space.used_bytes = block_rsv->size - block_rsv->reserved; 4267 spin_unlock(&block_rsv->lock); 4268 space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; 4269 memcpy(dest, &space, sizeof(space)); 4270 space_args.total_spaces++; 4271 } 4272 4273 user_dest = (struct btrfs_ioctl_space_info __user *) 4274 (arg + sizeof(struct btrfs_ioctl_space_args)); 4275 4276 if (copy_to_user(user_dest, dest_orig, alloc_size)) 4277 ret = -EFAULT; 4278 4279 kfree(dest_orig); 4280 out: 4281 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 4282 ret = -EFAULT; 4283 4284 return ret; 4285 } 4286 4287 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, 4288 void __user *argp) 4289 { 4290 struct btrfs_trans_handle *trans; 4291 u64 transid; 4292 int ret; 4293 4294 trans = btrfs_attach_transaction_barrier(root); 4295 if (IS_ERR(trans)) { 4296 if (PTR_ERR(trans) != -ENOENT) 4297 return PTR_ERR(trans); 4298 4299 /* No running transaction, don't bother */ 4300 transid = root->fs_info->last_trans_committed; 4301 goto out; 4302 } 4303 transid = trans->transid; 4304 ret = btrfs_commit_transaction_async(trans, 0); 4305 if (ret) { 4306 btrfs_end_transaction(trans); 4307 return ret; 4308 } 4309 out: 4310 if (argp) 4311 if (copy_to_user(argp, &transid, sizeof(transid))) 4312 return -EFAULT; 4313 return 0; 4314 } 4315 4316 static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info, 4317 void __user *argp) 4318 { 4319 u64 transid; 4320 4321 if (argp) { 4322 if (copy_from_user(&transid, argp, sizeof(transid))) 4323 return -EFAULT; 4324 } else { 4325 transid = 0; /* current trans */ 4326 } 4327 return btrfs_wait_for_commit(fs_info, transid); 4328 } 4329 4330 static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 4331 { 4332 struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); 4333 struct btrfs_ioctl_scrub_args *sa; 4334 int ret; 4335 4336 if (!capable(CAP_SYS_ADMIN)) 4337 return -EPERM; 4338 4339 sa = memdup_user(arg, sizeof(*sa)); 4340 if (IS_ERR(sa)) 4341 return PTR_ERR(sa); 4342 4343 if (!(sa->flags & BTRFS_SCRUB_READONLY)) { 4344 ret = mnt_want_write_file(file); 4345 if (ret) 4346 goto out; 4347 } 4348 4349 ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end, 4350 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 4351 0); 4352 4353 if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) 4354 ret = -EFAULT; 4355 4356 if (!(sa->flags & BTRFS_SCRUB_READONLY)) 4357 mnt_drop_write_file(file); 4358 out: 4359 kfree(sa); 4360 return ret; 4361 } 4362 4363 static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info) 4364 { 4365 if (!capable(CAP_SYS_ADMIN)) 4366 return -EPERM; 4367 4368 return btrfs_scrub_cancel(fs_info); 4369 } 4370 4371 static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info, 4372 void __user *arg) 4373 { 4374 struct btrfs_ioctl_scrub_args *sa; 4375 int ret; 4376 4377 if (!capable(CAP_SYS_ADMIN)) 4378 return -EPERM; 4379 4380 sa = memdup_user(arg, sizeof(*sa)); 4381 if (IS_ERR(sa)) 4382 return PTR_ERR(sa); 4383 4384 ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress); 4385 4386 if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) 4387 ret = -EFAULT; 4388 4389 kfree(sa); 4390 return ret; 4391 } 4392 4393 static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info, 4394 void __user *arg) 4395 { 4396 struct btrfs_ioctl_get_dev_stats *sa; 4397 int ret; 4398 4399 sa = memdup_user(arg, sizeof(*sa)); 4400 if (IS_ERR(sa)) 4401 return PTR_ERR(sa); 4402 4403 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 4404 kfree(sa); 4405 return -EPERM; 4406 } 4407 4408 ret = btrfs_get_dev_stats(fs_info, sa); 4409 4410 if (ret == 0 && copy_to_user(arg, sa, sizeof(*sa))) 4411 ret = -EFAULT; 4412 4413 kfree(sa); 4414 return ret; 4415 } 4416 4417 static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info, 4418 void __user *arg) 4419 { 4420 struct btrfs_ioctl_dev_replace_args *p; 4421 int ret; 4422 4423 if (!capable(CAP_SYS_ADMIN)) 4424 return -EPERM; 4425 4426 p = memdup_user(arg, sizeof(*p)); 4427 if (IS_ERR(p)) 4428 return PTR_ERR(p); 4429 4430 switch (p->cmd) { 4431 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 4432 if (sb_rdonly(fs_info->sb)) { 4433 ret = -EROFS; 4434 goto out; 4435 } 4436 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 4437 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4438 } else { 4439 ret = btrfs_dev_replace_by_ioctl(fs_info, p); 4440 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 4441 } 4442 break; 4443 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: 4444 btrfs_dev_replace_status(fs_info, p); 4445 ret = 0; 4446 break; 4447 case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: 4448 p->result = btrfs_dev_replace_cancel(fs_info); 4449 ret = 0; 4450 break; 4451 default: 4452 ret = -EINVAL; 4453 break; 4454 } 4455 4456 if ((ret == 0 || ret == -ECANCELED) && copy_to_user(arg, p, sizeof(*p))) 4457 ret = -EFAULT; 4458 out: 4459 kfree(p); 4460 return ret; 4461 } 4462 4463 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 4464 { 4465 int ret = 0; 4466 int i; 4467 u64 rel_ptr; 4468 int size; 4469 struct btrfs_ioctl_ino_path_args *ipa = NULL; 4470 struct inode_fs_paths *ipath = NULL; 4471 struct btrfs_path *path; 4472 4473 if (!capable(CAP_DAC_READ_SEARCH)) 4474 return -EPERM; 4475 4476 path = btrfs_alloc_path(); 4477 if (!path) { 4478 ret = -ENOMEM; 4479 goto out; 4480 } 4481 4482 ipa = memdup_user(arg, sizeof(*ipa)); 4483 if (IS_ERR(ipa)) { 4484 ret = PTR_ERR(ipa); 4485 ipa = NULL; 4486 goto out; 4487 } 4488 4489 size = min_t(u32, ipa->size, 4096); 4490 ipath = init_ipath(size, root, path); 4491 if (IS_ERR(ipath)) { 4492 ret = PTR_ERR(ipath); 4493 ipath = NULL; 4494 goto out; 4495 } 4496 4497 ret = paths_from_inode(ipa->inum, ipath); 4498 if (ret < 0) 4499 goto out; 4500 4501 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 4502 rel_ptr = ipath->fspath->val[i] - 4503 (u64)(unsigned long)ipath->fspath->val; 4504 ipath->fspath->val[i] = rel_ptr; 4505 } 4506 4507 ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, 4508 ipath->fspath, size); 4509 if (ret) { 4510 ret = -EFAULT; 4511 goto out; 4512 } 4513 4514 out: 4515 btrfs_free_path(path); 4516 free_ipath(ipath); 4517 kfree(ipa); 4518 4519 return ret; 4520 } 4521 4522 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 4523 { 4524 struct btrfs_data_container *inodes = ctx; 4525 const size_t c = 3 * sizeof(u64); 4526 4527 if (inodes->bytes_left >= c) { 4528 inodes->bytes_left -= c; 4529 inodes->val[inodes->elem_cnt] = inum; 4530 inodes->val[inodes->elem_cnt + 1] = offset; 4531 inodes->val[inodes->elem_cnt + 2] = root; 4532 inodes->elem_cnt += 3; 4533 } else { 4534 inodes->bytes_missing += c - inodes->bytes_left; 4535 inodes->bytes_left = 0; 4536 inodes->elem_missed += 3; 4537 } 4538 4539 return 0; 4540 } 4541 4542 static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, 4543 void __user *arg, int version) 4544 { 4545 int ret = 0; 4546 int size; 4547 struct btrfs_ioctl_logical_ino_args *loi; 4548 struct btrfs_data_container *inodes = NULL; 4549 struct btrfs_path *path = NULL; 4550 bool ignore_offset; 4551 4552 if (!capable(CAP_SYS_ADMIN)) 4553 return -EPERM; 4554 4555 loi = memdup_user(arg, sizeof(*loi)); 4556 if (IS_ERR(loi)) 4557 return PTR_ERR(loi); 4558 4559 if (version == 1) { 4560 ignore_offset = false; 4561 size = min_t(u32, loi->size, SZ_64K); 4562 } else { 4563 /* All reserved bits must be 0 for now */ 4564 if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) { 4565 ret = -EINVAL; 4566 goto out_loi; 4567 } 4568 /* Only accept flags we have defined so far */ 4569 if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { 4570 ret = -EINVAL; 4571 goto out_loi; 4572 } 4573 ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; 4574 size = min_t(u32, loi->size, SZ_16M); 4575 } 4576 4577 path = btrfs_alloc_path(); 4578 if (!path) { 4579 ret = -ENOMEM; 4580 goto out; 4581 } 4582 4583 inodes = init_data_container(size); 4584 if (IS_ERR(inodes)) { 4585 ret = PTR_ERR(inodes); 4586 inodes = NULL; 4587 goto out; 4588 } 4589 4590 ret = iterate_inodes_from_logical(loi->logical, fs_info, path, 4591 build_ino_list, inodes, ignore_offset); 4592 if (ret == -EINVAL) 4593 ret = -ENOENT; 4594 if (ret < 0) 4595 goto out; 4596 4597 ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes, 4598 size); 4599 if (ret) 4600 ret = -EFAULT; 4601 4602 out: 4603 btrfs_free_path(path); 4604 kvfree(inodes); 4605 out_loi: 4606 kfree(loi); 4607 4608 return ret; 4609 } 4610 4611 void btrfs_update_ioctl_balance_args(struct btrfs_fs_info *fs_info, 4612 struct btrfs_ioctl_balance_args *bargs) 4613 { 4614 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 4615 4616 bargs->flags = bctl->flags; 4617 4618 if (test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) 4619 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 4620 if (atomic_read(&fs_info->balance_pause_req)) 4621 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 4622 if (atomic_read(&fs_info->balance_cancel_req)) 4623 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 4624 4625 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 4626 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 4627 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 4628 4629 spin_lock(&fs_info->balance_lock); 4630 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 4631 spin_unlock(&fs_info->balance_lock); 4632 } 4633 4634 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 4635 { 4636 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4637 struct btrfs_fs_info *fs_info = root->fs_info; 4638 struct btrfs_ioctl_balance_args *bargs; 4639 struct btrfs_balance_control *bctl; 4640 bool need_unlock; /* for mut. excl. ops lock */ 4641 int ret; 4642 4643 if (!capable(CAP_SYS_ADMIN)) 4644 return -EPERM; 4645 4646 ret = mnt_want_write_file(file); 4647 if (ret) 4648 return ret; 4649 4650 again: 4651 if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 4652 mutex_lock(&fs_info->balance_mutex); 4653 need_unlock = true; 4654 goto locked; 4655 } 4656 4657 /* 4658 * mut. excl. ops lock is locked. Three possibilities: 4659 * (1) some other op is running 4660 * (2) balance is running 4661 * (3) balance is paused -- special case (think resume) 4662 */ 4663 mutex_lock(&fs_info->balance_mutex); 4664 if (fs_info->balance_ctl) { 4665 /* this is either (2) or (3) */ 4666 if (!test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { 4667 mutex_unlock(&fs_info->balance_mutex); 4668 /* 4669 * Lock released to allow other waiters to continue, 4670 * we'll reexamine the status again. 4671 */ 4672 mutex_lock(&fs_info->balance_mutex); 4673 4674 if (fs_info->balance_ctl && 4675 !test_bit(BTRFS_FS_BALANCE_RUNNING, &fs_info->flags)) { 4676 /* this is (3) */ 4677 need_unlock = false; 4678 goto locked; 4679 } 4680 4681 mutex_unlock(&fs_info->balance_mutex); 4682 goto again; 4683 } else { 4684 /* this is (2) */ 4685 mutex_unlock(&fs_info->balance_mutex); 4686 ret = -EINPROGRESS; 4687 goto out; 4688 } 4689 } else { 4690 /* this is (1) */ 4691 mutex_unlock(&fs_info->balance_mutex); 4692 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4693 goto out; 4694 } 4695 4696 locked: 4697 BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)); 4698 4699 if (arg) { 4700 bargs = memdup_user(arg, sizeof(*bargs)); 4701 if (IS_ERR(bargs)) { 4702 ret = PTR_ERR(bargs); 4703 goto out_unlock; 4704 } 4705 4706 if (bargs->flags & BTRFS_BALANCE_RESUME) { 4707 if (!fs_info->balance_ctl) { 4708 ret = -ENOTCONN; 4709 goto out_bargs; 4710 } 4711 4712 bctl = fs_info->balance_ctl; 4713 spin_lock(&fs_info->balance_lock); 4714 bctl->flags |= BTRFS_BALANCE_RESUME; 4715 spin_unlock(&fs_info->balance_lock); 4716 4717 goto do_balance; 4718 } 4719 } else { 4720 bargs = NULL; 4721 } 4722 4723 if (fs_info->balance_ctl) { 4724 ret = -EINPROGRESS; 4725 goto out_bargs; 4726 } 4727 4728 bctl = kzalloc(sizeof(*bctl), GFP_KERNEL); 4729 if (!bctl) { 4730 ret = -ENOMEM; 4731 goto out_bargs; 4732 } 4733 4734 if (arg) { 4735 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 4736 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 4737 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 4738 4739 bctl->flags = bargs->flags; 4740 } else { 4741 /* balance everything - no filters */ 4742 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 4743 } 4744 4745 if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { 4746 ret = -EINVAL; 4747 goto out_bctl; 4748 } 4749 4750 do_balance: 4751 /* 4752 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP goes to 4753 * btrfs_balance. bctl is freed in reset_balance_state, or, if 4754 * restriper was paused all the way until unmount, in free_fs_info. 4755 * The flag should be cleared after reset_balance_state. 4756 */ 4757 need_unlock = false; 4758 4759 ret = btrfs_balance(fs_info, bctl, bargs); 4760 bctl = NULL; 4761 4762 if ((ret == 0 || ret == -ECANCELED) && arg) { 4763 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4764 ret = -EFAULT; 4765 } 4766 4767 out_bctl: 4768 kfree(bctl); 4769 out_bargs: 4770 kfree(bargs); 4771 out_unlock: 4772 mutex_unlock(&fs_info->balance_mutex); 4773 if (need_unlock) 4774 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 4775 out: 4776 mnt_drop_write_file(file); 4777 return ret; 4778 } 4779 4780 static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd) 4781 { 4782 if (!capable(CAP_SYS_ADMIN)) 4783 return -EPERM; 4784 4785 switch (cmd) { 4786 case BTRFS_BALANCE_CTL_PAUSE: 4787 return btrfs_pause_balance(fs_info); 4788 case BTRFS_BALANCE_CTL_CANCEL: 4789 return btrfs_cancel_balance(fs_info); 4790 } 4791 4792 return -EINVAL; 4793 } 4794 4795 static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info, 4796 void __user *arg) 4797 { 4798 struct btrfs_ioctl_balance_args *bargs; 4799 int ret = 0; 4800 4801 if (!capable(CAP_SYS_ADMIN)) 4802 return -EPERM; 4803 4804 mutex_lock(&fs_info->balance_mutex); 4805 if (!fs_info->balance_ctl) { 4806 ret = -ENOTCONN; 4807 goto out; 4808 } 4809 4810 bargs = kzalloc(sizeof(*bargs), GFP_KERNEL); 4811 if (!bargs) { 4812 ret = -ENOMEM; 4813 goto out; 4814 } 4815 4816 btrfs_update_ioctl_balance_args(fs_info, bargs); 4817 4818 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4819 ret = -EFAULT; 4820 4821 kfree(bargs); 4822 out: 4823 mutex_unlock(&fs_info->balance_mutex); 4824 return ret; 4825 } 4826 4827 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 4828 { 4829 struct inode *inode = file_inode(file); 4830 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4831 struct btrfs_ioctl_quota_ctl_args *sa; 4832 int ret; 4833 4834 if (!capable(CAP_SYS_ADMIN)) 4835 return -EPERM; 4836 4837 ret = mnt_want_write_file(file); 4838 if (ret) 4839 return ret; 4840 4841 sa = memdup_user(arg, sizeof(*sa)); 4842 if (IS_ERR(sa)) { 4843 ret = PTR_ERR(sa); 4844 goto drop_write; 4845 } 4846 4847 down_write(&fs_info->subvol_sem); 4848 4849 switch (sa->cmd) { 4850 case BTRFS_QUOTA_CTL_ENABLE: 4851 ret = btrfs_quota_enable(fs_info); 4852 break; 4853 case BTRFS_QUOTA_CTL_DISABLE: 4854 ret = btrfs_quota_disable(fs_info); 4855 break; 4856 default: 4857 ret = -EINVAL; 4858 break; 4859 } 4860 4861 kfree(sa); 4862 up_write(&fs_info->subvol_sem); 4863 drop_write: 4864 mnt_drop_write_file(file); 4865 return ret; 4866 } 4867 4868 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 4869 { 4870 struct inode *inode = file_inode(file); 4871 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4872 struct btrfs_root *root = BTRFS_I(inode)->root; 4873 struct btrfs_ioctl_qgroup_assign_args *sa; 4874 struct btrfs_trans_handle *trans; 4875 int ret; 4876 int err; 4877 4878 if (!capable(CAP_SYS_ADMIN)) 4879 return -EPERM; 4880 4881 ret = mnt_want_write_file(file); 4882 if (ret) 4883 return ret; 4884 4885 sa = memdup_user(arg, sizeof(*sa)); 4886 if (IS_ERR(sa)) { 4887 ret = PTR_ERR(sa); 4888 goto drop_write; 4889 } 4890 4891 trans = btrfs_join_transaction(root); 4892 if (IS_ERR(trans)) { 4893 ret = PTR_ERR(trans); 4894 goto out; 4895 } 4896 4897 if (sa->assign) { 4898 ret = btrfs_add_qgroup_relation(trans, sa->src, sa->dst); 4899 } else { 4900 ret = btrfs_del_qgroup_relation(trans, sa->src, sa->dst); 4901 } 4902 4903 /* update qgroup status and info */ 4904 err = btrfs_run_qgroups(trans); 4905 if (err < 0) 4906 btrfs_handle_fs_error(fs_info, err, 4907 "failed to update qgroup status and info"); 4908 err = btrfs_end_transaction(trans); 4909 if (err && !ret) 4910 ret = err; 4911 4912 out: 4913 kfree(sa); 4914 drop_write: 4915 mnt_drop_write_file(file); 4916 return ret; 4917 } 4918 4919 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 4920 { 4921 struct inode *inode = file_inode(file); 4922 struct btrfs_root *root = BTRFS_I(inode)->root; 4923 struct btrfs_ioctl_qgroup_create_args *sa; 4924 struct btrfs_trans_handle *trans; 4925 int ret; 4926 int err; 4927 4928 if (!capable(CAP_SYS_ADMIN)) 4929 return -EPERM; 4930 4931 ret = mnt_want_write_file(file); 4932 if (ret) 4933 return ret; 4934 4935 sa = memdup_user(arg, sizeof(*sa)); 4936 if (IS_ERR(sa)) { 4937 ret = PTR_ERR(sa); 4938 goto drop_write; 4939 } 4940 4941 if (!sa->qgroupid) { 4942 ret = -EINVAL; 4943 goto out; 4944 } 4945 4946 trans = btrfs_join_transaction(root); 4947 if (IS_ERR(trans)) { 4948 ret = PTR_ERR(trans); 4949 goto out; 4950 } 4951 4952 if (sa->create) { 4953 ret = btrfs_create_qgroup(trans, sa->qgroupid); 4954 } else { 4955 ret = btrfs_remove_qgroup(trans, sa->qgroupid); 4956 } 4957 4958 err = btrfs_end_transaction(trans); 4959 if (err && !ret) 4960 ret = err; 4961 4962 out: 4963 kfree(sa); 4964 drop_write: 4965 mnt_drop_write_file(file); 4966 return ret; 4967 } 4968 4969 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 4970 { 4971 struct inode *inode = file_inode(file); 4972 struct btrfs_root *root = BTRFS_I(inode)->root; 4973 struct btrfs_ioctl_qgroup_limit_args *sa; 4974 struct btrfs_trans_handle *trans; 4975 int ret; 4976 int err; 4977 u64 qgroupid; 4978 4979 if (!capable(CAP_SYS_ADMIN)) 4980 return -EPERM; 4981 4982 ret = mnt_want_write_file(file); 4983 if (ret) 4984 return ret; 4985 4986 sa = memdup_user(arg, sizeof(*sa)); 4987 if (IS_ERR(sa)) { 4988 ret = PTR_ERR(sa); 4989 goto drop_write; 4990 } 4991 4992 trans = btrfs_join_transaction(root); 4993 if (IS_ERR(trans)) { 4994 ret = PTR_ERR(trans); 4995 goto out; 4996 } 4997 4998 qgroupid = sa->qgroupid; 4999 if (!qgroupid) { 5000 /* take the current subvol as qgroup */ 5001 qgroupid = root->root_key.objectid; 5002 } 5003 5004 ret = btrfs_limit_qgroup(trans, qgroupid, &sa->lim); 5005 5006 err = btrfs_end_transaction(trans); 5007 if (err && !ret) 5008 ret = err; 5009 5010 out: 5011 kfree(sa); 5012 drop_write: 5013 mnt_drop_write_file(file); 5014 return ret; 5015 } 5016 5017 static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) 5018 { 5019 struct inode *inode = file_inode(file); 5020 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5021 struct btrfs_ioctl_quota_rescan_args *qsa; 5022 int ret; 5023 5024 if (!capable(CAP_SYS_ADMIN)) 5025 return -EPERM; 5026 5027 ret = mnt_want_write_file(file); 5028 if (ret) 5029 return ret; 5030 5031 qsa = memdup_user(arg, sizeof(*qsa)); 5032 if (IS_ERR(qsa)) { 5033 ret = PTR_ERR(qsa); 5034 goto drop_write; 5035 } 5036 5037 if (qsa->flags) { 5038 ret = -EINVAL; 5039 goto out; 5040 } 5041 5042 ret = btrfs_qgroup_rescan(fs_info); 5043 5044 out: 5045 kfree(qsa); 5046 drop_write: 5047 mnt_drop_write_file(file); 5048 return ret; 5049 } 5050 5051 static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) 5052 { 5053 struct inode *inode = file_inode(file); 5054 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5055 struct btrfs_ioctl_quota_rescan_args *qsa; 5056 int ret = 0; 5057 5058 if (!capable(CAP_SYS_ADMIN)) 5059 return -EPERM; 5060 5061 qsa = kzalloc(sizeof(*qsa), GFP_KERNEL); 5062 if (!qsa) 5063 return -ENOMEM; 5064 5065 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 5066 qsa->flags = 1; 5067 qsa->progress = fs_info->qgroup_rescan_progress.objectid; 5068 } 5069 5070 if (copy_to_user(arg, qsa, sizeof(*qsa))) 5071 ret = -EFAULT; 5072 5073 kfree(qsa); 5074 return ret; 5075 } 5076 5077 static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) 5078 { 5079 struct inode *inode = file_inode(file); 5080 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5081 5082 if (!capable(CAP_SYS_ADMIN)) 5083 return -EPERM; 5084 5085 return btrfs_qgroup_wait_for_completion(fs_info, true); 5086 } 5087 5088 static long _btrfs_ioctl_set_received_subvol(struct file *file, 5089 struct btrfs_ioctl_received_subvol_args *sa) 5090 { 5091 struct inode *inode = file_inode(file); 5092 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5093 struct btrfs_root *root = BTRFS_I(inode)->root; 5094 struct btrfs_root_item *root_item = &root->root_item; 5095 struct btrfs_trans_handle *trans; 5096 struct timespec64 ct = current_time(inode); 5097 int ret = 0; 5098 int received_uuid_changed; 5099 5100 if (!inode_owner_or_capable(inode)) 5101 return -EPERM; 5102 5103 ret = mnt_want_write_file(file); 5104 if (ret < 0) 5105 return ret; 5106 5107 down_write(&fs_info->subvol_sem); 5108 5109 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 5110 ret = -EINVAL; 5111 goto out; 5112 } 5113 5114 if (btrfs_root_readonly(root)) { 5115 ret = -EROFS; 5116 goto out; 5117 } 5118 5119 /* 5120 * 1 - root item 5121 * 2 - uuid items (received uuid + subvol uuid) 5122 */ 5123 trans = btrfs_start_transaction(root, 3); 5124 if (IS_ERR(trans)) { 5125 ret = PTR_ERR(trans); 5126 trans = NULL; 5127 goto out; 5128 } 5129 5130 sa->rtransid = trans->transid; 5131 sa->rtime.sec = ct.tv_sec; 5132 sa->rtime.nsec = ct.tv_nsec; 5133 5134 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, 5135 BTRFS_UUID_SIZE); 5136 if (received_uuid_changed && 5137 !btrfs_is_empty_uuid(root_item->received_uuid)) { 5138 ret = btrfs_uuid_tree_remove(trans, root_item->received_uuid, 5139 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 5140 root->root_key.objectid); 5141 if (ret && ret != -ENOENT) { 5142 btrfs_abort_transaction(trans, ret); 5143 btrfs_end_transaction(trans); 5144 goto out; 5145 } 5146 } 5147 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 5148 btrfs_set_root_stransid(root_item, sa->stransid); 5149 btrfs_set_root_rtransid(root_item, sa->rtransid); 5150 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); 5151 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); 5152 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); 5153 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); 5154 5155 ret = btrfs_update_root(trans, fs_info->tree_root, 5156 &root->root_key, &root->root_item); 5157 if (ret < 0) { 5158 btrfs_end_transaction(trans); 5159 goto out; 5160 } 5161 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { 5162 ret = btrfs_uuid_tree_add(trans, sa->uuid, 5163 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 5164 root->root_key.objectid); 5165 if (ret < 0 && ret != -EEXIST) { 5166 btrfs_abort_transaction(trans, ret); 5167 btrfs_end_transaction(trans); 5168 goto out; 5169 } 5170 } 5171 ret = btrfs_commit_transaction(trans); 5172 out: 5173 up_write(&fs_info->subvol_sem); 5174 mnt_drop_write_file(file); 5175 return ret; 5176 } 5177 5178 #ifdef CONFIG_64BIT 5179 static long btrfs_ioctl_set_received_subvol_32(struct file *file, 5180 void __user *arg) 5181 { 5182 struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; 5183 struct btrfs_ioctl_received_subvol_args *args64 = NULL; 5184 int ret = 0; 5185 5186 args32 = memdup_user(arg, sizeof(*args32)); 5187 if (IS_ERR(args32)) 5188 return PTR_ERR(args32); 5189 5190 args64 = kmalloc(sizeof(*args64), GFP_KERNEL); 5191 if (!args64) { 5192 ret = -ENOMEM; 5193 goto out; 5194 } 5195 5196 memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); 5197 args64->stransid = args32->stransid; 5198 args64->rtransid = args32->rtransid; 5199 args64->stime.sec = args32->stime.sec; 5200 args64->stime.nsec = args32->stime.nsec; 5201 args64->rtime.sec = args32->rtime.sec; 5202 args64->rtime.nsec = args32->rtime.nsec; 5203 args64->flags = args32->flags; 5204 5205 ret = _btrfs_ioctl_set_received_subvol(file, args64); 5206 if (ret) 5207 goto out; 5208 5209 memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); 5210 args32->stransid = args64->stransid; 5211 args32->rtransid = args64->rtransid; 5212 args32->stime.sec = args64->stime.sec; 5213 args32->stime.nsec = args64->stime.nsec; 5214 args32->rtime.sec = args64->rtime.sec; 5215 args32->rtime.nsec = args64->rtime.nsec; 5216 args32->flags = args64->flags; 5217 5218 ret = copy_to_user(arg, args32, sizeof(*args32)); 5219 if (ret) 5220 ret = -EFAULT; 5221 5222 out: 5223 kfree(args32); 5224 kfree(args64); 5225 return ret; 5226 } 5227 #endif 5228 5229 static long btrfs_ioctl_set_received_subvol(struct file *file, 5230 void __user *arg) 5231 { 5232 struct btrfs_ioctl_received_subvol_args *sa = NULL; 5233 int ret = 0; 5234 5235 sa = memdup_user(arg, sizeof(*sa)); 5236 if (IS_ERR(sa)) 5237 return PTR_ERR(sa); 5238 5239 ret = _btrfs_ioctl_set_received_subvol(file, sa); 5240 5241 if (ret) 5242 goto out; 5243 5244 ret = copy_to_user(arg, sa, sizeof(*sa)); 5245 if (ret) 5246 ret = -EFAULT; 5247 5248 out: 5249 kfree(sa); 5250 return ret; 5251 } 5252 5253 static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 5254 { 5255 struct inode *inode = file_inode(file); 5256 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5257 size_t len; 5258 int ret; 5259 char label[BTRFS_LABEL_SIZE]; 5260 5261 spin_lock(&fs_info->super_lock); 5262 memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE); 5263 spin_unlock(&fs_info->super_lock); 5264 5265 len = strnlen(label, BTRFS_LABEL_SIZE); 5266 5267 if (len == BTRFS_LABEL_SIZE) { 5268 btrfs_warn(fs_info, 5269 "label is too long, return the first %zu bytes", 5270 --len); 5271 } 5272 5273 ret = copy_to_user(arg, label, len); 5274 5275 return ret ? -EFAULT : 0; 5276 } 5277 5278 static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) 5279 { 5280 struct inode *inode = file_inode(file); 5281 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5282 struct btrfs_root *root = BTRFS_I(inode)->root; 5283 struct btrfs_super_block *super_block = fs_info->super_copy; 5284 struct btrfs_trans_handle *trans; 5285 char label[BTRFS_LABEL_SIZE]; 5286 int ret; 5287 5288 if (!capable(CAP_SYS_ADMIN)) 5289 return -EPERM; 5290 5291 if (copy_from_user(label, arg, sizeof(label))) 5292 return -EFAULT; 5293 5294 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 5295 btrfs_err(fs_info, 5296 "unable to set label with more than %d bytes", 5297 BTRFS_LABEL_SIZE - 1); 5298 return -EINVAL; 5299 } 5300 5301 ret = mnt_want_write_file(file); 5302 if (ret) 5303 return ret; 5304 5305 trans = btrfs_start_transaction(root, 0); 5306 if (IS_ERR(trans)) { 5307 ret = PTR_ERR(trans); 5308 goto out_unlock; 5309 } 5310 5311 spin_lock(&fs_info->super_lock); 5312 strcpy(super_block->label, label); 5313 spin_unlock(&fs_info->super_lock); 5314 ret = btrfs_commit_transaction(trans); 5315 5316 out_unlock: 5317 mnt_drop_write_file(file); 5318 return ret; 5319 } 5320 5321 #define INIT_FEATURE_FLAGS(suffix) \ 5322 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ 5323 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ 5324 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } 5325 5326 int btrfs_ioctl_get_supported_features(void __user *arg) 5327 { 5328 static const struct btrfs_ioctl_feature_flags features[3] = { 5329 INIT_FEATURE_FLAGS(SUPP), 5330 INIT_FEATURE_FLAGS(SAFE_SET), 5331 INIT_FEATURE_FLAGS(SAFE_CLEAR) 5332 }; 5333 5334 if (copy_to_user(arg, &features, sizeof(features))) 5335 return -EFAULT; 5336 5337 return 0; 5338 } 5339 5340 static int btrfs_ioctl_get_features(struct file *file, void __user *arg) 5341 { 5342 struct inode *inode = file_inode(file); 5343 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5344 struct btrfs_super_block *super_block = fs_info->super_copy; 5345 struct btrfs_ioctl_feature_flags features; 5346 5347 features.compat_flags = btrfs_super_compat_flags(super_block); 5348 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); 5349 features.incompat_flags = btrfs_super_incompat_flags(super_block); 5350 5351 if (copy_to_user(arg, &features, sizeof(features))) 5352 return -EFAULT; 5353 5354 return 0; 5355 } 5356 5357 static int check_feature_bits(struct btrfs_fs_info *fs_info, 5358 enum btrfs_feature_set set, 5359 u64 change_mask, u64 flags, u64 supported_flags, 5360 u64 safe_set, u64 safe_clear) 5361 { 5362 const char *type = btrfs_feature_set_names[set]; 5363 char *names; 5364 u64 disallowed, unsupported; 5365 u64 set_mask = flags & change_mask; 5366 u64 clear_mask = ~flags & change_mask; 5367 5368 unsupported = set_mask & ~supported_flags; 5369 if (unsupported) { 5370 names = btrfs_printable_features(set, unsupported); 5371 if (names) { 5372 btrfs_warn(fs_info, 5373 "this kernel does not support the %s feature bit%s", 5374 names, strchr(names, ',') ? "s" : ""); 5375 kfree(names); 5376 } else 5377 btrfs_warn(fs_info, 5378 "this kernel does not support %s bits 0x%llx", 5379 type, unsupported); 5380 return -EOPNOTSUPP; 5381 } 5382 5383 disallowed = set_mask & ~safe_set; 5384 if (disallowed) { 5385 names = btrfs_printable_features(set, disallowed); 5386 if (names) { 5387 btrfs_warn(fs_info, 5388 "can't set the %s feature bit%s while mounted", 5389 names, strchr(names, ',') ? "s" : ""); 5390 kfree(names); 5391 } else 5392 btrfs_warn(fs_info, 5393 "can't set %s bits 0x%llx while mounted", 5394 type, disallowed); 5395 return -EPERM; 5396 } 5397 5398 disallowed = clear_mask & ~safe_clear; 5399 if (disallowed) { 5400 names = btrfs_printable_features(set, disallowed); 5401 if (names) { 5402 btrfs_warn(fs_info, 5403 "can't clear the %s feature bit%s while mounted", 5404 names, strchr(names, ',') ? "s" : ""); 5405 kfree(names); 5406 } else 5407 btrfs_warn(fs_info, 5408 "can't clear %s bits 0x%llx while mounted", 5409 type, disallowed); 5410 return -EPERM; 5411 } 5412 5413 return 0; 5414 } 5415 5416 #define check_feature(fs_info, change_mask, flags, mask_base) \ 5417 check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \ 5418 BTRFS_FEATURE_ ## mask_base ## _SUPP, \ 5419 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ 5420 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) 5421 5422 static int btrfs_ioctl_set_features(struct file *file, void __user *arg) 5423 { 5424 struct inode *inode = file_inode(file); 5425 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5426 struct btrfs_root *root = BTRFS_I(inode)->root; 5427 struct btrfs_super_block *super_block = fs_info->super_copy; 5428 struct btrfs_ioctl_feature_flags flags[2]; 5429 struct btrfs_trans_handle *trans; 5430 u64 newflags; 5431 int ret; 5432 5433 if (!capable(CAP_SYS_ADMIN)) 5434 return -EPERM; 5435 5436 if (copy_from_user(flags, arg, sizeof(flags))) 5437 return -EFAULT; 5438 5439 /* Nothing to do */ 5440 if (!flags[0].compat_flags && !flags[0].compat_ro_flags && 5441 !flags[0].incompat_flags) 5442 return 0; 5443 5444 ret = check_feature(fs_info, flags[0].compat_flags, 5445 flags[1].compat_flags, COMPAT); 5446 if (ret) 5447 return ret; 5448 5449 ret = check_feature(fs_info, flags[0].compat_ro_flags, 5450 flags[1].compat_ro_flags, COMPAT_RO); 5451 if (ret) 5452 return ret; 5453 5454 ret = check_feature(fs_info, flags[0].incompat_flags, 5455 flags[1].incompat_flags, INCOMPAT); 5456 if (ret) 5457 return ret; 5458 5459 ret = mnt_want_write_file(file); 5460 if (ret) 5461 return ret; 5462 5463 trans = btrfs_start_transaction(root, 0); 5464 if (IS_ERR(trans)) { 5465 ret = PTR_ERR(trans); 5466 goto out_drop_write; 5467 } 5468 5469 spin_lock(&fs_info->super_lock); 5470 newflags = btrfs_super_compat_flags(super_block); 5471 newflags |= flags[0].compat_flags & flags[1].compat_flags; 5472 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); 5473 btrfs_set_super_compat_flags(super_block, newflags); 5474 5475 newflags = btrfs_super_compat_ro_flags(super_block); 5476 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; 5477 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); 5478 btrfs_set_super_compat_ro_flags(super_block, newflags); 5479 5480 newflags = btrfs_super_incompat_flags(super_block); 5481 newflags |= flags[0].incompat_flags & flags[1].incompat_flags; 5482 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); 5483 btrfs_set_super_incompat_flags(super_block, newflags); 5484 spin_unlock(&fs_info->super_lock); 5485 5486 ret = btrfs_commit_transaction(trans); 5487 out_drop_write: 5488 mnt_drop_write_file(file); 5489 5490 return ret; 5491 } 5492 5493 static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat) 5494 { 5495 struct btrfs_ioctl_send_args *arg; 5496 int ret; 5497 5498 if (compat) { 5499 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 5500 struct btrfs_ioctl_send_args_32 args32; 5501 5502 ret = copy_from_user(&args32, argp, sizeof(args32)); 5503 if (ret) 5504 return -EFAULT; 5505 arg = kzalloc(sizeof(*arg), GFP_KERNEL); 5506 if (!arg) 5507 return -ENOMEM; 5508 arg->send_fd = args32.send_fd; 5509 arg->clone_sources_count = args32.clone_sources_count; 5510 arg->clone_sources = compat_ptr(args32.clone_sources); 5511 arg->parent_root = args32.parent_root; 5512 arg->flags = args32.flags; 5513 memcpy(arg->reserved, args32.reserved, 5514 sizeof(args32.reserved)); 5515 #else 5516 return -ENOTTY; 5517 #endif 5518 } else { 5519 arg = memdup_user(argp, sizeof(*arg)); 5520 if (IS_ERR(arg)) 5521 return PTR_ERR(arg); 5522 } 5523 ret = btrfs_ioctl_send(file, arg); 5524 kfree(arg); 5525 return ret; 5526 } 5527 5528 long btrfs_ioctl(struct file *file, unsigned int 5529 cmd, unsigned long arg) 5530 { 5531 struct inode *inode = file_inode(file); 5532 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5533 struct btrfs_root *root = BTRFS_I(inode)->root; 5534 void __user *argp = (void __user *)arg; 5535 5536 switch (cmd) { 5537 case FS_IOC_GETFLAGS: 5538 return btrfs_ioctl_getflags(file, argp); 5539 case FS_IOC_SETFLAGS: 5540 return btrfs_ioctl_setflags(file, argp); 5541 case FS_IOC_GETVERSION: 5542 return btrfs_ioctl_getversion(file, argp); 5543 case FITRIM: 5544 return btrfs_ioctl_fitrim(file, argp); 5545 case BTRFS_IOC_SNAP_CREATE: 5546 return btrfs_ioctl_snap_create(file, argp, 0); 5547 case BTRFS_IOC_SNAP_CREATE_V2: 5548 return btrfs_ioctl_snap_create_v2(file, argp, 0); 5549 case BTRFS_IOC_SUBVOL_CREATE: 5550 return btrfs_ioctl_snap_create(file, argp, 1); 5551 case BTRFS_IOC_SUBVOL_CREATE_V2: 5552 return btrfs_ioctl_snap_create_v2(file, argp, 1); 5553 case BTRFS_IOC_SNAP_DESTROY: 5554 return btrfs_ioctl_snap_destroy(file, argp); 5555 case BTRFS_IOC_SUBVOL_GETFLAGS: 5556 return btrfs_ioctl_subvol_getflags(file, argp); 5557 case BTRFS_IOC_SUBVOL_SETFLAGS: 5558 return btrfs_ioctl_subvol_setflags(file, argp); 5559 case BTRFS_IOC_DEFAULT_SUBVOL: 5560 return btrfs_ioctl_default_subvol(file, argp); 5561 case BTRFS_IOC_DEFRAG: 5562 return btrfs_ioctl_defrag(file, NULL); 5563 case BTRFS_IOC_DEFRAG_RANGE: 5564 return btrfs_ioctl_defrag(file, argp); 5565 case BTRFS_IOC_RESIZE: 5566 return btrfs_ioctl_resize(file, argp); 5567 case BTRFS_IOC_ADD_DEV: 5568 return btrfs_ioctl_add_dev(fs_info, argp); 5569 case BTRFS_IOC_RM_DEV: 5570 return btrfs_ioctl_rm_dev(file, argp); 5571 case BTRFS_IOC_RM_DEV_V2: 5572 return btrfs_ioctl_rm_dev_v2(file, argp); 5573 case BTRFS_IOC_FS_INFO: 5574 return btrfs_ioctl_fs_info(fs_info, argp); 5575 case BTRFS_IOC_DEV_INFO: 5576 return btrfs_ioctl_dev_info(fs_info, argp); 5577 case BTRFS_IOC_BALANCE: 5578 return btrfs_ioctl_balance(file, NULL); 5579 case BTRFS_IOC_TREE_SEARCH: 5580 return btrfs_ioctl_tree_search(file, argp); 5581 case BTRFS_IOC_TREE_SEARCH_V2: 5582 return btrfs_ioctl_tree_search_v2(file, argp); 5583 case BTRFS_IOC_INO_LOOKUP: 5584 return btrfs_ioctl_ino_lookup(file, argp); 5585 case BTRFS_IOC_INO_PATHS: 5586 return btrfs_ioctl_ino_to_path(root, argp); 5587 case BTRFS_IOC_LOGICAL_INO: 5588 return btrfs_ioctl_logical_to_ino(fs_info, argp, 1); 5589 case BTRFS_IOC_LOGICAL_INO_V2: 5590 return btrfs_ioctl_logical_to_ino(fs_info, argp, 2); 5591 case BTRFS_IOC_SPACE_INFO: 5592 return btrfs_ioctl_space_info(fs_info, argp); 5593 case BTRFS_IOC_SYNC: { 5594 int ret; 5595 5596 ret = btrfs_start_delalloc_roots(fs_info, -1); 5597 if (ret) 5598 return ret; 5599 ret = btrfs_sync_fs(inode->i_sb, 1); 5600 /* 5601 * The transaction thread may want to do more work, 5602 * namely it pokes the cleaner kthread that will start 5603 * processing uncleaned subvols. 5604 */ 5605 wake_up_process(fs_info->transaction_kthread); 5606 return ret; 5607 } 5608 case BTRFS_IOC_START_SYNC: 5609 return btrfs_ioctl_start_sync(root, argp); 5610 case BTRFS_IOC_WAIT_SYNC: 5611 return btrfs_ioctl_wait_sync(fs_info, argp); 5612 case BTRFS_IOC_SCRUB: 5613 return btrfs_ioctl_scrub(file, argp); 5614 case BTRFS_IOC_SCRUB_CANCEL: 5615 return btrfs_ioctl_scrub_cancel(fs_info); 5616 case BTRFS_IOC_SCRUB_PROGRESS: 5617 return btrfs_ioctl_scrub_progress(fs_info, argp); 5618 case BTRFS_IOC_BALANCE_V2: 5619 return btrfs_ioctl_balance(file, argp); 5620 case BTRFS_IOC_BALANCE_CTL: 5621 return btrfs_ioctl_balance_ctl(fs_info, arg); 5622 case BTRFS_IOC_BALANCE_PROGRESS: 5623 return btrfs_ioctl_balance_progress(fs_info, argp); 5624 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 5625 return btrfs_ioctl_set_received_subvol(file, argp); 5626 #ifdef CONFIG_64BIT 5627 case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: 5628 return btrfs_ioctl_set_received_subvol_32(file, argp); 5629 #endif 5630 case BTRFS_IOC_SEND: 5631 return _btrfs_ioctl_send(file, argp, false); 5632 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 5633 case BTRFS_IOC_SEND_32: 5634 return _btrfs_ioctl_send(file, argp, true); 5635 #endif 5636 case BTRFS_IOC_GET_DEV_STATS: 5637 return btrfs_ioctl_get_dev_stats(fs_info, argp); 5638 case BTRFS_IOC_QUOTA_CTL: 5639 return btrfs_ioctl_quota_ctl(file, argp); 5640 case BTRFS_IOC_QGROUP_ASSIGN: 5641 return btrfs_ioctl_qgroup_assign(file, argp); 5642 case BTRFS_IOC_QGROUP_CREATE: 5643 return btrfs_ioctl_qgroup_create(file, argp); 5644 case BTRFS_IOC_QGROUP_LIMIT: 5645 return btrfs_ioctl_qgroup_limit(file, argp); 5646 case BTRFS_IOC_QUOTA_RESCAN: 5647 return btrfs_ioctl_quota_rescan(file, argp); 5648 case BTRFS_IOC_QUOTA_RESCAN_STATUS: 5649 return btrfs_ioctl_quota_rescan_status(file, argp); 5650 case BTRFS_IOC_QUOTA_RESCAN_WAIT: 5651 return btrfs_ioctl_quota_rescan_wait(file, argp); 5652 case BTRFS_IOC_DEV_REPLACE: 5653 return btrfs_ioctl_dev_replace(fs_info, argp); 5654 case BTRFS_IOC_GET_FSLABEL: 5655 return btrfs_ioctl_get_fslabel(file, argp); 5656 case BTRFS_IOC_SET_FSLABEL: 5657 return btrfs_ioctl_set_fslabel(file, argp); 5658 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 5659 return btrfs_ioctl_get_supported_features(argp); 5660 case BTRFS_IOC_GET_FEATURES: 5661 return btrfs_ioctl_get_features(file, argp); 5662 case BTRFS_IOC_SET_FEATURES: 5663 return btrfs_ioctl_set_features(file, argp); 5664 case FS_IOC_FSGETXATTR: 5665 return btrfs_ioctl_fsgetxattr(file, argp); 5666 case FS_IOC_FSSETXATTR: 5667 return btrfs_ioctl_fssetxattr(file, argp); 5668 case BTRFS_IOC_GET_SUBVOL_INFO: 5669 return btrfs_ioctl_get_subvol_info(file, argp); 5670 case BTRFS_IOC_GET_SUBVOL_ROOTREF: 5671 return btrfs_ioctl_get_subvol_rootref(file, argp); 5672 case BTRFS_IOC_INO_LOOKUP_USER: 5673 return btrfs_ioctl_ino_lookup_user(file, argp); 5674 } 5675 5676 return -ENOTTY; 5677 } 5678 5679 #ifdef CONFIG_COMPAT 5680 long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5681 { 5682 /* 5683 * These all access 32-bit values anyway so no further 5684 * handling is necessary. 5685 */ 5686 switch (cmd) { 5687 case FS_IOC32_GETFLAGS: 5688 cmd = FS_IOC_GETFLAGS; 5689 break; 5690 case FS_IOC32_SETFLAGS: 5691 cmd = FS_IOC_SETFLAGS; 5692 break; 5693 case FS_IOC32_GETVERSION: 5694 cmd = FS_IOC_GETVERSION; 5695 break; 5696 } 5697 5698 return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5699 } 5700 #endif 5701