1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/compat.h> 37 #include <linux/bit_spinlock.h> 38 #include <linux/security.h> 39 #include <linux/xattr.h> 40 #include <linux/mm.h> 41 #include <linux/slab.h> 42 #include <linux/blkdev.h> 43 #include <linux/uuid.h> 44 #include <linux/btrfs.h> 45 #include <linux/uaccess.h> 46 #include <linux/iversion.h> 47 #include "ctree.h" 48 #include "disk-io.h" 49 #include "transaction.h" 50 #include "btrfs_inode.h" 51 #include "print-tree.h" 52 #include "volumes.h" 53 #include "locking.h" 54 #include "inode-map.h" 55 #include "backref.h" 56 #include "rcu-string.h" 57 #include "send.h" 58 #include "dev-replace.h" 59 #include "props.h" 60 #include "sysfs.h" 61 #include "qgroup.h" 62 #include "tree-log.h" 63 #include "compression.h" 64 65 #ifdef CONFIG_64BIT 66 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 67 * structures are incorrect, as the timespec structure from userspace 68 * is 4 bytes too small. We define these alternatives here to teach 69 * the kernel about the 32-bit struct packing. 70 */ 71 struct btrfs_ioctl_timespec_32 { 72 __u64 sec; 73 __u32 nsec; 74 } __attribute__ ((__packed__)); 75 76 struct btrfs_ioctl_received_subvol_args_32 { 77 char uuid[BTRFS_UUID_SIZE]; /* in */ 78 __u64 stransid; /* in */ 79 __u64 rtransid; /* out */ 80 struct btrfs_ioctl_timespec_32 stime; /* in */ 81 struct btrfs_ioctl_timespec_32 rtime; /* out */ 82 __u64 flags; /* in */ 83 __u64 reserved[16]; /* in */ 84 } __attribute__ ((__packed__)); 85 86 #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ 87 struct btrfs_ioctl_received_subvol_args_32) 88 #endif 89 90 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 91 struct btrfs_ioctl_send_args_32 { 92 __s64 send_fd; /* in */ 93 __u64 clone_sources_count; /* in */ 94 compat_uptr_t clone_sources; /* in */ 95 __u64 parent_root; /* in */ 96 __u64 flags; /* in */ 97 __u64 reserved[4]; /* in */ 98 } __attribute__ ((__packed__)); 99 100 #define BTRFS_IOC_SEND_32 _IOW(BTRFS_IOCTL_MAGIC, 38, \ 101 struct btrfs_ioctl_send_args_32) 102 #endif 103 104 static int btrfs_clone(struct inode *src, struct inode *inode, 105 u64 off, u64 olen, u64 olen_aligned, u64 destoff, 106 int no_time_update); 107 108 /* Mask out flags that are inappropriate for the given type of inode. */ 109 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 110 { 111 if (S_ISDIR(mode)) 112 return flags; 113 else if (S_ISREG(mode)) 114 return flags & ~FS_DIRSYNC_FL; 115 else 116 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 117 } 118 119 /* 120 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 121 */ 122 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 123 { 124 unsigned int iflags = 0; 125 126 if (flags & BTRFS_INODE_SYNC) 127 iflags |= FS_SYNC_FL; 128 if (flags & BTRFS_INODE_IMMUTABLE) 129 iflags |= FS_IMMUTABLE_FL; 130 if (flags & BTRFS_INODE_APPEND) 131 iflags |= FS_APPEND_FL; 132 if (flags & BTRFS_INODE_NODUMP) 133 iflags |= FS_NODUMP_FL; 134 if (flags & BTRFS_INODE_NOATIME) 135 iflags |= FS_NOATIME_FL; 136 if (flags & BTRFS_INODE_DIRSYNC) 137 iflags |= FS_DIRSYNC_FL; 138 if (flags & BTRFS_INODE_NODATACOW) 139 iflags |= FS_NOCOW_FL; 140 141 if (flags & BTRFS_INODE_NOCOMPRESS) 142 iflags |= FS_NOCOMP_FL; 143 else if (flags & BTRFS_INODE_COMPRESS) 144 iflags |= FS_COMPR_FL; 145 146 return iflags; 147 } 148 149 /* 150 * Update inode->i_flags based on the btrfs internal flags. 151 */ 152 void btrfs_update_iflags(struct inode *inode) 153 { 154 struct btrfs_inode *ip = BTRFS_I(inode); 155 unsigned int new_fl = 0; 156 157 if (ip->flags & BTRFS_INODE_SYNC) 158 new_fl |= S_SYNC; 159 if (ip->flags & BTRFS_INODE_IMMUTABLE) 160 new_fl |= S_IMMUTABLE; 161 if (ip->flags & BTRFS_INODE_APPEND) 162 new_fl |= S_APPEND; 163 if (ip->flags & BTRFS_INODE_NOATIME) 164 new_fl |= S_NOATIME; 165 if (ip->flags & BTRFS_INODE_DIRSYNC) 166 new_fl |= S_DIRSYNC; 167 168 set_mask_bits(&inode->i_flags, 169 S_SYNC | S_APPEND | S_IMMUTABLE | S_NOATIME | S_DIRSYNC, 170 new_fl); 171 } 172 173 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 174 { 175 struct btrfs_inode *ip = BTRFS_I(file_inode(file)); 176 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 177 178 if (copy_to_user(arg, &flags, sizeof(flags))) 179 return -EFAULT; 180 return 0; 181 } 182 183 static int check_flags(unsigned int flags) 184 { 185 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 186 FS_NOATIME_FL | FS_NODUMP_FL | \ 187 FS_SYNC_FL | FS_DIRSYNC_FL | \ 188 FS_NOCOMP_FL | FS_COMPR_FL | 189 FS_NOCOW_FL)) 190 return -EOPNOTSUPP; 191 192 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 193 return -EINVAL; 194 195 return 0; 196 } 197 198 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 199 { 200 struct inode *inode = file_inode(file); 201 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 202 struct btrfs_inode *ip = BTRFS_I(inode); 203 struct btrfs_root *root = ip->root; 204 struct btrfs_trans_handle *trans; 205 unsigned int flags, oldflags; 206 int ret; 207 u64 ip_oldflags; 208 unsigned int i_oldflags; 209 umode_t mode; 210 211 if (!inode_owner_or_capable(inode)) 212 return -EPERM; 213 214 if (btrfs_root_readonly(root)) 215 return -EROFS; 216 217 if (copy_from_user(&flags, arg, sizeof(flags))) 218 return -EFAULT; 219 220 ret = check_flags(flags); 221 if (ret) 222 return ret; 223 224 ret = mnt_want_write_file(file); 225 if (ret) 226 return ret; 227 228 inode_lock(inode); 229 230 ip_oldflags = ip->flags; 231 i_oldflags = inode->i_flags; 232 mode = inode->i_mode; 233 234 flags = btrfs_mask_flags(inode->i_mode, flags); 235 oldflags = btrfs_flags_to_ioctl(ip->flags); 236 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 237 if (!capable(CAP_LINUX_IMMUTABLE)) { 238 ret = -EPERM; 239 goto out_unlock; 240 } 241 } 242 243 if (flags & FS_SYNC_FL) 244 ip->flags |= BTRFS_INODE_SYNC; 245 else 246 ip->flags &= ~BTRFS_INODE_SYNC; 247 if (flags & FS_IMMUTABLE_FL) 248 ip->flags |= BTRFS_INODE_IMMUTABLE; 249 else 250 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 251 if (flags & FS_APPEND_FL) 252 ip->flags |= BTRFS_INODE_APPEND; 253 else 254 ip->flags &= ~BTRFS_INODE_APPEND; 255 if (flags & FS_NODUMP_FL) 256 ip->flags |= BTRFS_INODE_NODUMP; 257 else 258 ip->flags &= ~BTRFS_INODE_NODUMP; 259 if (flags & FS_NOATIME_FL) 260 ip->flags |= BTRFS_INODE_NOATIME; 261 else 262 ip->flags &= ~BTRFS_INODE_NOATIME; 263 if (flags & FS_DIRSYNC_FL) 264 ip->flags |= BTRFS_INODE_DIRSYNC; 265 else 266 ip->flags &= ~BTRFS_INODE_DIRSYNC; 267 if (flags & FS_NOCOW_FL) { 268 if (S_ISREG(mode)) { 269 /* 270 * It's safe to turn csums off here, no extents exist. 271 * Otherwise we want the flag to reflect the real COW 272 * status of the file and will not set it. 273 */ 274 if (inode->i_size == 0) 275 ip->flags |= BTRFS_INODE_NODATACOW 276 | BTRFS_INODE_NODATASUM; 277 } else { 278 ip->flags |= BTRFS_INODE_NODATACOW; 279 } 280 } else { 281 /* 282 * Revert back under same assumptions as above 283 */ 284 if (S_ISREG(mode)) { 285 if (inode->i_size == 0) 286 ip->flags &= ~(BTRFS_INODE_NODATACOW 287 | BTRFS_INODE_NODATASUM); 288 } else { 289 ip->flags &= ~BTRFS_INODE_NODATACOW; 290 } 291 } 292 293 /* 294 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 295 * flag may be changed automatically if compression code won't make 296 * things smaller. 297 */ 298 if (flags & FS_NOCOMP_FL) { 299 ip->flags &= ~BTRFS_INODE_COMPRESS; 300 ip->flags |= BTRFS_INODE_NOCOMPRESS; 301 302 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 303 if (ret && ret != -ENODATA) 304 goto out_drop; 305 } else if (flags & FS_COMPR_FL) { 306 const char *comp; 307 308 ip->flags |= BTRFS_INODE_COMPRESS; 309 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 310 311 comp = btrfs_compress_type2str(fs_info->compress_type); 312 if (!comp || comp[0] == 0) 313 comp = btrfs_compress_type2str(BTRFS_COMPRESS_ZLIB); 314 315 ret = btrfs_set_prop(inode, "btrfs.compression", 316 comp, strlen(comp), 0); 317 if (ret) 318 goto out_drop; 319 320 } else { 321 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 322 if (ret && ret != -ENODATA) 323 goto out_drop; 324 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 325 } 326 327 trans = btrfs_start_transaction(root, 1); 328 if (IS_ERR(trans)) { 329 ret = PTR_ERR(trans); 330 goto out_drop; 331 } 332 333 btrfs_update_iflags(inode); 334 inode_inc_iversion(inode); 335 inode->i_ctime = current_time(inode); 336 ret = btrfs_update_inode(trans, root, inode); 337 338 btrfs_end_transaction(trans); 339 out_drop: 340 if (ret) { 341 ip->flags = ip_oldflags; 342 inode->i_flags = i_oldflags; 343 } 344 345 out_unlock: 346 inode_unlock(inode); 347 mnt_drop_write_file(file); 348 return ret; 349 } 350 351 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 352 { 353 struct inode *inode = file_inode(file); 354 355 return put_user(inode->i_generation, arg); 356 } 357 358 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 359 { 360 struct inode *inode = file_inode(file); 361 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 362 struct btrfs_device *device; 363 struct request_queue *q; 364 struct fstrim_range range; 365 u64 minlen = ULLONG_MAX; 366 u64 num_devices = 0; 367 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); 368 int ret; 369 370 if (!capable(CAP_SYS_ADMIN)) 371 return -EPERM; 372 373 rcu_read_lock(); 374 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 375 dev_list) { 376 if (!device->bdev) 377 continue; 378 q = bdev_get_queue(device->bdev); 379 if (blk_queue_discard(q)) { 380 num_devices++; 381 minlen = min_t(u64, q->limits.discard_granularity, 382 minlen); 383 } 384 } 385 rcu_read_unlock(); 386 387 if (!num_devices) 388 return -EOPNOTSUPP; 389 if (copy_from_user(&range, arg, sizeof(range))) 390 return -EFAULT; 391 if (range.start > total_bytes || 392 range.len < fs_info->sb->s_blocksize) 393 return -EINVAL; 394 395 range.len = min(range.len, total_bytes - range.start); 396 range.minlen = max(range.minlen, minlen); 397 ret = btrfs_trim_fs(fs_info, &range); 398 if (ret < 0) 399 return ret; 400 401 if (copy_to_user(arg, &range, sizeof(range))) 402 return -EFAULT; 403 404 return 0; 405 } 406 407 int btrfs_is_empty_uuid(u8 *uuid) 408 { 409 int i; 410 411 for (i = 0; i < BTRFS_UUID_SIZE; i++) { 412 if (uuid[i]) 413 return 0; 414 } 415 return 1; 416 } 417 418 static noinline int create_subvol(struct inode *dir, 419 struct dentry *dentry, 420 const char *name, int namelen, 421 u64 *async_transid, 422 struct btrfs_qgroup_inherit *inherit) 423 { 424 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 425 struct btrfs_trans_handle *trans; 426 struct btrfs_key key; 427 struct btrfs_root_item *root_item; 428 struct btrfs_inode_item *inode_item; 429 struct extent_buffer *leaf; 430 struct btrfs_root *root = BTRFS_I(dir)->root; 431 struct btrfs_root *new_root; 432 struct btrfs_block_rsv block_rsv; 433 struct timespec cur_time = current_time(dir); 434 struct inode *inode; 435 int ret; 436 int err; 437 u64 objectid; 438 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 439 u64 index = 0; 440 u64 qgroup_reserved; 441 uuid_le new_uuid; 442 443 root_item = kzalloc(sizeof(*root_item), GFP_KERNEL); 444 if (!root_item) 445 return -ENOMEM; 446 447 ret = btrfs_find_free_objectid(fs_info->tree_root, &objectid); 448 if (ret) 449 goto fail_free; 450 451 /* 452 * Don't create subvolume whose level is not zero. Or qgroup will be 453 * screwed up since it assumes subvolume qgroup's level to be 0. 454 */ 455 if (btrfs_qgroup_level(objectid)) { 456 ret = -ENOSPC; 457 goto fail_free; 458 } 459 460 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 461 /* 462 * The same as the snapshot creation, please see the comment 463 * of create_snapshot(). 464 */ 465 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 466 8, &qgroup_reserved, false); 467 if (ret) 468 goto fail_free; 469 470 trans = btrfs_start_transaction(root, 0); 471 if (IS_ERR(trans)) { 472 ret = PTR_ERR(trans); 473 btrfs_subvolume_release_metadata(fs_info, &block_rsv); 474 goto fail_free; 475 } 476 trans->block_rsv = &block_rsv; 477 trans->bytes_reserved = block_rsv.size; 478 479 ret = btrfs_qgroup_inherit(trans, fs_info, 0, objectid, inherit); 480 if (ret) 481 goto fail; 482 483 leaf = btrfs_alloc_tree_block(trans, root, 0, objectid, NULL, 0, 0, 0); 484 if (IS_ERR(leaf)) { 485 ret = PTR_ERR(leaf); 486 goto fail; 487 } 488 489 memzero_extent_buffer(leaf, 0, sizeof(struct btrfs_header)); 490 btrfs_set_header_bytenr(leaf, leaf->start); 491 btrfs_set_header_generation(leaf, trans->transid); 492 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 493 btrfs_set_header_owner(leaf, objectid); 494 495 write_extent_buffer_fsid(leaf, fs_info->fsid); 496 write_extent_buffer_chunk_tree_uuid(leaf, fs_info->chunk_tree_uuid); 497 btrfs_mark_buffer_dirty(leaf); 498 499 inode_item = &root_item->inode; 500 btrfs_set_stack_inode_generation(inode_item, 1); 501 btrfs_set_stack_inode_size(inode_item, 3); 502 btrfs_set_stack_inode_nlink(inode_item, 1); 503 btrfs_set_stack_inode_nbytes(inode_item, 504 fs_info->nodesize); 505 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); 506 507 btrfs_set_root_flags(root_item, 0); 508 btrfs_set_root_limit(root_item, 0); 509 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); 510 511 btrfs_set_root_bytenr(root_item, leaf->start); 512 btrfs_set_root_generation(root_item, trans->transid); 513 btrfs_set_root_level(root_item, 0); 514 btrfs_set_root_refs(root_item, 1); 515 btrfs_set_root_used(root_item, leaf->len); 516 btrfs_set_root_last_snapshot(root_item, 0); 517 518 btrfs_set_root_generation_v2(root_item, 519 btrfs_root_generation(root_item)); 520 uuid_le_gen(&new_uuid); 521 memcpy(root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); 522 btrfs_set_stack_timespec_sec(&root_item->otime, cur_time.tv_sec); 523 btrfs_set_stack_timespec_nsec(&root_item->otime, cur_time.tv_nsec); 524 root_item->ctime = root_item->otime; 525 btrfs_set_root_ctransid(root_item, trans->transid); 526 btrfs_set_root_otransid(root_item, trans->transid); 527 528 btrfs_tree_unlock(leaf); 529 free_extent_buffer(leaf); 530 leaf = NULL; 531 532 btrfs_set_root_dirid(root_item, new_dirid); 533 534 key.objectid = objectid; 535 key.offset = 0; 536 key.type = BTRFS_ROOT_ITEM_KEY; 537 ret = btrfs_insert_root(trans, fs_info->tree_root, &key, 538 root_item); 539 if (ret) 540 goto fail; 541 542 key.offset = (u64)-1; 543 new_root = btrfs_read_fs_root_no_name(fs_info, &key); 544 if (IS_ERR(new_root)) { 545 ret = PTR_ERR(new_root); 546 btrfs_abort_transaction(trans, ret); 547 goto fail; 548 } 549 550 btrfs_record_root_in_trans(trans, new_root); 551 552 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); 553 if (ret) { 554 /* We potentially lose an unused inode item here */ 555 btrfs_abort_transaction(trans, ret); 556 goto fail; 557 } 558 559 mutex_lock(&new_root->objectid_mutex); 560 new_root->highest_objectid = new_dirid; 561 mutex_unlock(&new_root->objectid_mutex); 562 563 /* 564 * insert the directory item 565 */ 566 ret = btrfs_set_inode_index(BTRFS_I(dir), &index); 567 if (ret) { 568 btrfs_abort_transaction(trans, ret); 569 goto fail; 570 } 571 572 ret = btrfs_insert_dir_item(trans, root, 573 name, namelen, BTRFS_I(dir), &key, 574 BTRFS_FT_DIR, index); 575 if (ret) { 576 btrfs_abort_transaction(trans, ret); 577 goto fail; 578 } 579 580 btrfs_i_size_write(BTRFS_I(dir), dir->i_size + namelen * 2); 581 ret = btrfs_update_inode(trans, root, dir); 582 BUG_ON(ret); 583 584 ret = btrfs_add_root_ref(trans, fs_info, 585 objectid, root->root_key.objectid, 586 btrfs_ino(BTRFS_I(dir)), index, name, namelen); 587 BUG_ON(ret); 588 589 ret = btrfs_uuid_tree_add(trans, fs_info, root_item->uuid, 590 BTRFS_UUID_KEY_SUBVOL, objectid); 591 if (ret) 592 btrfs_abort_transaction(trans, ret); 593 594 fail: 595 kfree(root_item); 596 trans->block_rsv = NULL; 597 trans->bytes_reserved = 0; 598 btrfs_subvolume_release_metadata(fs_info, &block_rsv); 599 600 if (async_transid) { 601 *async_transid = trans->transid; 602 err = btrfs_commit_transaction_async(trans, 1); 603 if (err) 604 err = btrfs_commit_transaction(trans); 605 } else { 606 err = btrfs_commit_transaction(trans); 607 } 608 if (err && !ret) 609 ret = err; 610 611 if (!ret) { 612 inode = btrfs_lookup_dentry(dir, dentry); 613 if (IS_ERR(inode)) 614 return PTR_ERR(inode); 615 d_instantiate(dentry, inode); 616 } 617 return ret; 618 619 fail_free: 620 kfree(root_item); 621 return ret; 622 } 623 624 static int create_snapshot(struct btrfs_root *root, struct inode *dir, 625 struct dentry *dentry, 626 u64 *async_transid, bool readonly, 627 struct btrfs_qgroup_inherit *inherit) 628 { 629 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 630 struct inode *inode; 631 struct btrfs_pending_snapshot *pending_snapshot; 632 struct btrfs_trans_handle *trans; 633 int ret; 634 635 if (!test_bit(BTRFS_ROOT_REF_COWS, &root->state)) 636 return -EINVAL; 637 638 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_KERNEL); 639 if (!pending_snapshot) 640 return -ENOMEM; 641 642 pending_snapshot->root_item = kzalloc(sizeof(struct btrfs_root_item), 643 GFP_KERNEL); 644 pending_snapshot->path = btrfs_alloc_path(); 645 if (!pending_snapshot->root_item || !pending_snapshot->path) { 646 ret = -ENOMEM; 647 goto free_pending; 648 } 649 650 atomic_inc(&root->will_be_snapshotted); 651 smp_mb__after_atomic(); 652 /* wait for no snapshot writes */ 653 wait_event(root->subv_writers->wait, 654 percpu_counter_sum(&root->subv_writers->counter) == 0); 655 656 ret = btrfs_start_delalloc_inodes(root, 0); 657 if (ret) 658 goto dec_and_free; 659 660 btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1); 661 662 btrfs_init_block_rsv(&pending_snapshot->block_rsv, 663 BTRFS_BLOCK_RSV_TEMP); 664 /* 665 * 1 - parent dir inode 666 * 2 - dir entries 667 * 1 - root item 668 * 2 - root ref/backref 669 * 1 - root of snapshot 670 * 1 - UUID item 671 */ 672 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 673 &pending_snapshot->block_rsv, 8, 674 &pending_snapshot->qgroup_reserved, 675 false); 676 if (ret) 677 goto dec_and_free; 678 679 pending_snapshot->dentry = dentry; 680 pending_snapshot->root = root; 681 pending_snapshot->readonly = readonly; 682 pending_snapshot->dir = dir; 683 pending_snapshot->inherit = inherit; 684 685 trans = btrfs_start_transaction(root, 0); 686 if (IS_ERR(trans)) { 687 ret = PTR_ERR(trans); 688 goto fail; 689 } 690 691 spin_lock(&fs_info->trans_lock); 692 list_add(&pending_snapshot->list, 693 &trans->transaction->pending_snapshots); 694 spin_unlock(&fs_info->trans_lock); 695 if (async_transid) { 696 *async_transid = trans->transid; 697 ret = btrfs_commit_transaction_async(trans, 1); 698 if (ret) 699 ret = btrfs_commit_transaction(trans); 700 } else { 701 ret = btrfs_commit_transaction(trans); 702 } 703 if (ret) 704 goto fail; 705 706 ret = pending_snapshot->error; 707 if (ret) 708 goto fail; 709 710 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 711 if (ret) 712 goto fail; 713 714 inode = btrfs_lookup_dentry(d_inode(dentry->d_parent), dentry); 715 if (IS_ERR(inode)) { 716 ret = PTR_ERR(inode); 717 goto fail; 718 } 719 720 d_instantiate(dentry, inode); 721 ret = 0; 722 fail: 723 btrfs_subvolume_release_metadata(fs_info, &pending_snapshot->block_rsv); 724 dec_and_free: 725 if (atomic_dec_and_test(&root->will_be_snapshotted)) 726 wake_up_atomic_t(&root->will_be_snapshotted); 727 free_pending: 728 kfree(pending_snapshot->root_item); 729 btrfs_free_path(pending_snapshot->path); 730 kfree(pending_snapshot); 731 732 return ret; 733 } 734 735 /* copy of may_delete in fs/namei.c() 736 * Check whether we can remove a link victim from directory dir, check 737 * whether the type of victim is right. 738 * 1. We can't do it if dir is read-only (done in permission()) 739 * 2. We should have write and exec permissions on dir 740 * 3. We can't remove anything from append-only dir 741 * 4. We can't do anything with immutable dir (done in permission()) 742 * 5. If the sticky bit on dir is set we should either 743 * a. be owner of dir, or 744 * b. be owner of victim, or 745 * c. have CAP_FOWNER capability 746 * 6. If the victim is append-only or immutable we can't do anything with 747 * links pointing to it. 748 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 749 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 750 * 9. We can't remove a root or mountpoint. 751 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 752 * nfs_async_unlink(). 753 */ 754 755 static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) 756 { 757 int error; 758 759 if (d_really_is_negative(victim)) 760 return -ENOENT; 761 762 BUG_ON(d_inode(victim->d_parent) != dir); 763 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 764 765 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 766 if (error) 767 return error; 768 if (IS_APPEND(dir)) 769 return -EPERM; 770 if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) || 771 IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim))) 772 return -EPERM; 773 if (isdir) { 774 if (!d_is_dir(victim)) 775 return -ENOTDIR; 776 if (IS_ROOT(victim)) 777 return -EBUSY; 778 } else if (d_is_dir(victim)) 779 return -EISDIR; 780 if (IS_DEADDIR(dir)) 781 return -ENOENT; 782 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 783 return -EBUSY; 784 return 0; 785 } 786 787 /* copy of may_create in fs/namei.c() */ 788 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 789 { 790 if (d_really_is_positive(child)) 791 return -EEXIST; 792 if (IS_DEADDIR(dir)) 793 return -ENOENT; 794 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 795 } 796 797 /* 798 * Create a new subvolume below @parent. This is largely modeled after 799 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 800 * inside this filesystem so it's quite a bit simpler. 801 */ 802 static noinline int btrfs_mksubvol(const struct path *parent, 803 const char *name, int namelen, 804 struct btrfs_root *snap_src, 805 u64 *async_transid, bool readonly, 806 struct btrfs_qgroup_inherit *inherit) 807 { 808 struct inode *dir = d_inode(parent->dentry); 809 struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb); 810 struct dentry *dentry; 811 int error; 812 813 error = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); 814 if (error == -EINTR) 815 return error; 816 817 dentry = lookup_one_len(name, parent->dentry, namelen); 818 error = PTR_ERR(dentry); 819 if (IS_ERR(dentry)) 820 goto out_unlock; 821 822 error = btrfs_may_create(dir, dentry); 823 if (error) 824 goto out_dput; 825 826 /* 827 * even if this name doesn't exist, we may get hash collisions. 828 * check for them now when we can safely fail 829 */ 830 error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, 831 dir->i_ino, name, 832 namelen); 833 if (error) 834 goto out_dput; 835 836 down_read(&fs_info->subvol_sem); 837 838 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 839 goto out_up_read; 840 841 if (snap_src) { 842 error = create_snapshot(snap_src, dir, dentry, 843 async_transid, readonly, inherit); 844 } else { 845 error = create_subvol(dir, dentry, name, namelen, 846 async_transid, inherit); 847 } 848 if (!error) 849 fsnotify_mkdir(dir, dentry); 850 out_up_read: 851 up_read(&fs_info->subvol_sem); 852 out_dput: 853 dput(dentry); 854 out_unlock: 855 inode_unlock(dir); 856 return error; 857 } 858 859 /* 860 * When we're defragging a range, we don't want to kick it off again 861 * if it is really just waiting for delalloc to send it down. 862 * If we find a nice big extent or delalloc range for the bytes in the 863 * file you want to defrag, we return 0 to let you know to skip this 864 * part of the file 865 */ 866 static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh) 867 { 868 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 869 struct extent_map *em = NULL; 870 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 871 u64 end; 872 873 read_lock(&em_tree->lock); 874 em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE); 875 read_unlock(&em_tree->lock); 876 877 if (em) { 878 end = extent_map_end(em); 879 free_extent_map(em); 880 if (end - offset > thresh) 881 return 0; 882 } 883 /* if we already have a nice delalloc here, just stop */ 884 thresh /= 2; 885 end = count_range_bits(io_tree, &offset, offset + thresh, 886 thresh, EXTENT_DELALLOC, 1); 887 if (end >= thresh) 888 return 0; 889 return 1; 890 } 891 892 /* 893 * helper function to walk through a file and find extents 894 * newer than a specific transid, and smaller than thresh. 895 * 896 * This is used by the defragging code to find new and small 897 * extents 898 */ 899 static int find_new_extents(struct btrfs_root *root, 900 struct inode *inode, u64 newer_than, 901 u64 *off, u32 thresh) 902 { 903 struct btrfs_path *path; 904 struct btrfs_key min_key; 905 struct extent_buffer *leaf; 906 struct btrfs_file_extent_item *extent; 907 int type; 908 int ret; 909 u64 ino = btrfs_ino(BTRFS_I(inode)); 910 911 path = btrfs_alloc_path(); 912 if (!path) 913 return -ENOMEM; 914 915 min_key.objectid = ino; 916 min_key.type = BTRFS_EXTENT_DATA_KEY; 917 min_key.offset = *off; 918 919 while (1) { 920 ret = btrfs_search_forward(root, &min_key, path, newer_than); 921 if (ret != 0) 922 goto none; 923 process_slot: 924 if (min_key.objectid != ino) 925 goto none; 926 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 927 goto none; 928 929 leaf = path->nodes[0]; 930 extent = btrfs_item_ptr(leaf, path->slots[0], 931 struct btrfs_file_extent_item); 932 933 type = btrfs_file_extent_type(leaf, extent); 934 if (type == BTRFS_FILE_EXTENT_REG && 935 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 936 check_defrag_in_cache(inode, min_key.offset, thresh)) { 937 *off = min_key.offset; 938 btrfs_free_path(path); 939 return 0; 940 } 941 942 path->slots[0]++; 943 if (path->slots[0] < btrfs_header_nritems(leaf)) { 944 btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); 945 goto process_slot; 946 } 947 948 if (min_key.offset == (u64)-1) 949 goto none; 950 951 min_key.offset++; 952 btrfs_release_path(path); 953 } 954 none: 955 btrfs_free_path(path); 956 return -ENOENT; 957 } 958 959 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 960 { 961 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 962 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 963 struct extent_map *em; 964 u64 len = PAGE_SIZE; 965 966 /* 967 * hopefully we have this extent in the tree already, try without 968 * the full extent lock 969 */ 970 read_lock(&em_tree->lock); 971 em = lookup_extent_mapping(em_tree, start, len); 972 read_unlock(&em_tree->lock); 973 974 if (!em) { 975 struct extent_state *cached = NULL; 976 u64 end = start + len - 1; 977 978 /* get the big lock and read metadata off disk */ 979 lock_extent_bits(io_tree, start, end, &cached); 980 em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, start, len, 0); 981 unlock_extent_cached(io_tree, start, end, &cached); 982 983 if (IS_ERR(em)) 984 return NULL; 985 } 986 987 return em; 988 } 989 990 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 991 { 992 struct extent_map *next; 993 bool ret = true; 994 995 /* this is the last extent */ 996 if (em->start + em->len >= i_size_read(inode)) 997 return false; 998 999 next = defrag_lookup_extent(inode, em->start + em->len); 1000 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) 1001 ret = false; 1002 else if ((em->block_start + em->block_len == next->block_start) && 1003 (em->block_len > SZ_128K && next->block_len > SZ_128K)) 1004 ret = false; 1005 1006 free_extent_map(next); 1007 return ret; 1008 } 1009 1010 static int should_defrag_range(struct inode *inode, u64 start, u32 thresh, 1011 u64 *last_len, u64 *skip, u64 *defrag_end, 1012 int compress) 1013 { 1014 struct extent_map *em; 1015 int ret = 1; 1016 bool next_mergeable = true; 1017 bool prev_mergeable = true; 1018 1019 /* 1020 * make sure that once we start defragging an extent, we keep on 1021 * defragging it 1022 */ 1023 if (start < *defrag_end) 1024 return 1; 1025 1026 *skip = 0; 1027 1028 em = defrag_lookup_extent(inode, start); 1029 if (!em) 1030 return 0; 1031 1032 /* this will cover holes, and inline extents */ 1033 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 1034 ret = 0; 1035 goto out; 1036 } 1037 1038 if (!*defrag_end) 1039 prev_mergeable = false; 1040 1041 next_mergeable = defrag_check_next_extent(inode, em); 1042 /* 1043 * we hit a real extent, if it is big or the next extent is not a 1044 * real extent, don't bother defragging it 1045 */ 1046 if (!compress && (*last_len == 0 || *last_len >= thresh) && 1047 (em->len >= thresh || (!next_mergeable && !prev_mergeable))) 1048 ret = 0; 1049 out: 1050 /* 1051 * last_len ends up being a counter of how many bytes we've defragged. 1052 * every time we choose not to defrag an extent, we reset *last_len 1053 * so that the next tiny extent will force a defrag. 1054 * 1055 * The end result of this is that tiny extents before a single big 1056 * extent will force at least part of that big extent to be defragged. 1057 */ 1058 if (ret) { 1059 *defrag_end = extent_map_end(em); 1060 } else { 1061 *last_len = 0; 1062 *skip = extent_map_end(em); 1063 *defrag_end = 0; 1064 } 1065 1066 free_extent_map(em); 1067 return ret; 1068 } 1069 1070 /* 1071 * it doesn't do much good to defrag one or two pages 1072 * at a time. This pulls in a nice chunk of pages 1073 * to COW and defrag. 1074 * 1075 * It also makes sure the delalloc code has enough 1076 * dirty data to avoid making new small extents as part 1077 * of the defrag 1078 * 1079 * It's a good idea to start RA on this range 1080 * before calling this. 1081 */ 1082 static int cluster_pages_for_defrag(struct inode *inode, 1083 struct page **pages, 1084 unsigned long start_index, 1085 unsigned long num_pages) 1086 { 1087 unsigned long file_end; 1088 u64 isize = i_size_read(inode); 1089 u64 page_start; 1090 u64 page_end; 1091 u64 page_cnt; 1092 int ret; 1093 int i; 1094 int i_done; 1095 struct btrfs_ordered_extent *ordered; 1096 struct extent_state *cached_state = NULL; 1097 struct extent_io_tree *tree; 1098 struct extent_changeset *data_reserved = NULL; 1099 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1100 1101 file_end = (isize - 1) >> PAGE_SHIFT; 1102 if (!isize || start_index > file_end) 1103 return 0; 1104 1105 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 1106 1107 ret = btrfs_delalloc_reserve_space(inode, &data_reserved, 1108 start_index << PAGE_SHIFT, 1109 page_cnt << PAGE_SHIFT); 1110 if (ret) 1111 return ret; 1112 i_done = 0; 1113 tree = &BTRFS_I(inode)->io_tree; 1114 1115 /* step one, lock all the pages */ 1116 for (i = 0; i < page_cnt; i++) { 1117 struct page *page; 1118 again: 1119 page = find_or_create_page(inode->i_mapping, 1120 start_index + i, mask); 1121 if (!page) 1122 break; 1123 1124 page_start = page_offset(page); 1125 page_end = page_start + PAGE_SIZE - 1; 1126 while (1) { 1127 lock_extent_bits(tree, page_start, page_end, 1128 &cached_state); 1129 ordered = btrfs_lookup_ordered_extent(inode, 1130 page_start); 1131 unlock_extent_cached(tree, page_start, page_end, 1132 &cached_state); 1133 if (!ordered) 1134 break; 1135 1136 unlock_page(page); 1137 btrfs_start_ordered_extent(inode, ordered, 1); 1138 btrfs_put_ordered_extent(ordered); 1139 lock_page(page); 1140 /* 1141 * we unlocked the page above, so we need check if 1142 * it was released or not. 1143 */ 1144 if (page->mapping != inode->i_mapping) { 1145 unlock_page(page); 1146 put_page(page); 1147 goto again; 1148 } 1149 } 1150 1151 if (!PageUptodate(page)) { 1152 btrfs_readpage(NULL, page); 1153 lock_page(page); 1154 if (!PageUptodate(page)) { 1155 unlock_page(page); 1156 put_page(page); 1157 ret = -EIO; 1158 break; 1159 } 1160 } 1161 1162 if (page->mapping != inode->i_mapping) { 1163 unlock_page(page); 1164 put_page(page); 1165 goto again; 1166 } 1167 1168 pages[i] = page; 1169 i_done++; 1170 } 1171 if (!i_done || ret) 1172 goto out; 1173 1174 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 1175 goto out; 1176 1177 /* 1178 * so now we have a nice long stream of locked 1179 * and up to date pages, lets wait on them 1180 */ 1181 for (i = 0; i < i_done; i++) 1182 wait_on_page_writeback(pages[i]); 1183 1184 page_start = page_offset(pages[0]); 1185 page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE; 1186 1187 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1188 page_start, page_end - 1, &cached_state); 1189 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1190 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1191 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, 1192 &cached_state); 1193 1194 if (i_done != page_cnt) { 1195 spin_lock(&BTRFS_I(inode)->lock); 1196 BTRFS_I(inode)->outstanding_extents++; 1197 spin_unlock(&BTRFS_I(inode)->lock); 1198 btrfs_delalloc_release_space(inode, data_reserved, 1199 start_index << PAGE_SHIFT, 1200 (page_cnt - i_done) << PAGE_SHIFT); 1201 } 1202 1203 1204 set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 1205 &cached_state); 1206 1207 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1208 page_start, page_end - 1, &cached_state); 1209 1210 for (i = 0; i < i_done; i++) { 1211 clear_page_dirty_for_io(pages[i]); 1212 ClearPageChecked(pages[i]); 1213 set_page_extent_mapped(pages[i]); 1214 set_page_dirty(pages[i]); 1215 unlock_page(pages[i]); 1216 put_page(pages[i]); 1217 } 1218 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); 1219 extent_changeset_free(data_reserved); 1220 return i_done; 1221 out: 1222 for (i = 0; i < i_done; i++) { 1223 unlock_page(pages[i]); 1224 put_page(pages[i]); 1225 } 1226 btrfs_delalloc_release_space(inode, data_reserved, 1227 start_index << PAGE_SHIFT, 1228 page_cnt << PAGE_SHIFT); 1229 btrfs_delalloc_release_extents(BTRFS_I(inode), page_cnt << PAGE_SHIFT); 1230 extent_changeset_free(data_reserved); 1231 return ret; 1232 1233 } 1234 1235 int btrfs_defrag_file(struct inode *inode, struct file *file, 1236 struct btrfs_ioctl_defrag_range_args *range, 1237 u64 newer_than, unsigned long max_to_defrag) 1238 { 1239 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1240 struct btrfs_root *root = BTRFS_I(inode)->root; 1241 struct file_ra_state *ra = NULL; 1242 unsigned long last_index; 1243 u64 isize = i_size_read(inode); 1244 u64 last_len = 0; 1245 u64 skip = 0; 1246 u64 defrag_end = 0; 1247 u64 newer_off = range->start; 1248 unsigned long i; 1249 unsigned long ra_index = 0; 1250 int ret; 1251 int defrag_count = 0; 1252 int compress_type = BTRFS_COMPRESS_ZLIB; 1253 u32 extent_thresh = range->extent_thresh; 1254 unsigned long max_cluster = SZ_256K >> PAGE_SHIFT; 1255 unsigned long cluster = max_cluster; 1256 u64 new_align = ~((u64)SZ_128K - 1); 1257 struct page **pages = NULL; 1258 bool do_compress = range->flags & BTRFS_DEFRAG_RANGE_COMPRESS; 1259 1260 if (isize == 0) 1261 return 0; 1262 1263 if (range->start >= isize) 1264 return -EINVAL; 1265 1266 if (do_compress) { 1267 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1268 return -EINVAL; 1269 if (range->compress_type) 1270 compress_type = range->compress_type; 1271 } 1272 1273 if (extent_thresh == 0) 1274 extent_thresh = SZ_256K; 1275 1276 /* 1277 * If we were not given a file, allocate a readahead context. As 1278 * readahead is just an optimization, defrag will work without it so 1279 * we don't error out. 1280 */ 1281 if (!file) { 1282 ra = kzalloc(sizeof(*ra), GFP_KERNEL); 1283 if (ra) 1284 file_ra_state_init(ra, inode->i_mapping); 1285 } else { 1286 ra = &file->f_ra; 1287 } 1288 1289 pages = kmalloc_array(max_cluster, sizeof(struct page *), GFP_KERNEL); 1290 if (!pages) { 1291 ret = -ENOMEM; 1292 goto out_ra; 1293 } 1294 1295 /* find the last page to defrag */ 1296 if (range->start + range->len > range->start) { 1297 last_index = min_t(u64, isize - 1, 1298 range->start + range->len - 1) >> PAGE_SHIFT; 1299 } else { 1300 last_index = (isize - 1) >> PAGE_SHIFT; 1301 } 1302 1303 if (newer_than) { 1304 ret = find_new_extents(root, inode, newer_than, 1305 &newer_off, SZ_64K); 1306 if (!ret) { 1307 range->start = newer_off; 1308 /* 1309 * we always align our defrag to help keep 1310 * the extents in the file evenly spaced 1311 */ 1312 i = (newer_off & new_align) >> PAGE_SHIFT; 1313 } else 1314 goto out_ra; 1315 } else { 1316 i = range->start >> PAGE_SHIFT; 1317 } 1318 if (!max_to_defrag) 1319 max_to_defrag = last_index - i + 1; 1320 1321 /* 1322 * make writeback starts from i, so the defrag range can be 1323 * written sequentially. 1324 */ 1325 if (i < inode->i_mapping->writeback_index) 1326 inode->i_mapping->writeback_index = i; 1327 1328 while (i <= last_index && defrag_count < max_to_defrag && 1329 (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) { 1330 /* 1331 * make sure we stop running if someone unmounts 1332 * the FS 1333 */ 1334 if (!(inode->i_sb->s_flags & SB_ACTIVE)) 1335 break; 1336 1337 if (btrfs_defrag_cancelled(fs_info)) { 1338 btrfs_debug(fs_info, "defrag_file cancelled"); 1339 ret = -EAGAIN; 1340 break; 1341 } 1342 1343 if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT, 1344 extent_thresh, &last_len, &skip, 1345 &defrag_end, do_compress)){ 1346 unsigned long next; 1347 /* 1348 * the should_defrag function tells us how much to skip 1349 * bump our counter by the suggested amount 1350 */ 1351 next = DIV_ROUND_UP(skip, PAGE_SIZE); 1352 i = max(i + 1, next); 1353 continue; 1354 } 1355 1356 if (!newer_than) { 1357 cluster = (PAGE_ALIGN(defrag_end) >> 1358 PAGE_SHIFT) - i; 1359 cluster = min(cluster, max_cluster); 1360 } else { 1361 cluster = max_cluster; 1362 } 1363 1364 if (i + cluster > ra_index) { 1365 ra_index = max(i, ra_index); 1366 if (ra) 1367 page_cache_sync_readahead(inode->i_mapping, ra, 1368 file, ra_index, cluster); 1369 ra_index += cluster; 1370 } 1371 1372 inode_lock(inode); 1373 if (do_compress) 1374 BTRFS_I(inode)->defrag_compress = compress_type; 1375 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1376 if (ret < 0) { 1377 inode_unlock(inode); 1378 goto out_ra; 1379 } 1380 1381 defrag_count += ret; 1382 balance_dirty_pages_ratelimited(inode->i_mapping); 1383 inode_unlock(inode); 1384 1385 if (newer_than) { 1386 if (newer_off == (u64)-1) 1387 break; 1388 1389 if (ret > 0) 1390 i += ret; 1391 1392 newer_off = max(newer_off + 1, 1393 (u64)i << PAGE_SHIFT); 1394 1395 ret = find_new_extents(root, inode, newer_than, 1396 &newer_off, SZ_64K); 1397 if (!ret) { 1398 range->start = newer_off; 1399 i = (newer_off & new_align) >> PAGE_SHIFT; 1400 } else { 1401 break; 1402 } 1403 } else { 1404 if (ret > 0) { 1405 i += ret; 1406 last_len += ret << PAGE_SHIFT; 1407 } else { 1408 i++; 1409 last_len = 0; 1410 } 1411 } 1412 } 1413 1414 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { 1415 filemap_flush(inode->i_mapping); 1416 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 1417 &BTRFS_I(inode)->runtime_flags)) 1418 filemap_flush(inode->i_mapping); 1419 } 1420 1421 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1422 btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); 1423 } else if (range->compress_type == BTRFS_COMPRESS_ZSTD) { 1424 btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); 1425 } 1426 1427 ret = defrag_count; 1428 1429 out_ra: 1430 if (do_compress) { 1431 inode_lock(inode); 1432 BTRFS_I(inode)->defrag_compress = BTRFS_COMPRESS_NONE; 1433 inode_unlock(inode); 1434 } 1435 if (!file) 1436 kfree(ra); 1437 kfree(pages); 1438 return ret; 1439 } 1440 1441 static noinline int btrfs_ioctl_resize(struct file *file, 1442 void __user *arg) 1443 { 1444 struct inode *inode = file_inode(file); 1445 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1446 u64 new_size; 1447 u64 old_size; 1448 u64 devid = 1; 1449 struct btrfs_root *root = BTRFS_I(inode)->root; 1450 struct btrfs_ioctl_vol_args *vol_args; 1451 struct btrfs_trans_handle *trans; 1452 struct btrfs_device *device = NULL; 1453 char *sizestr; 1454 char *retptr; 1455 char *devstr = NULL; 1456 int ret = 0; 1457 int mod = 0; 1458 1459 if (!capable(CAP_SYS_ADMIN)) 1460 return -EPERM; 1461 1462 ret = mnt_want_write_file(file); 1463 if (ret) 1464 return ret; 1465 1466 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 1467 mnt_drop_write_file(file); 1468 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 1469 } 1470 1471 mutex_lock(&fs_info->volume_mutex); 1472 vol_args = memdup_user(arg, sizeof(*vol_args)); 1473 if (IS_ERR(vol_args)) { 1474 ret = PTR_ERR(vol_args); 1475 goto out; 1476 } 1477 1478 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1479 1480 sizestr = vol_args->name; 1481 devstr = strchr(sizestr, ':'); 1482 if (devstr) { 1483 sizestr = devstr + 1; 1484 *devstr = '\0'; 1485 devstr = vol_args->name; 1486 ret = kstrtoull(devstr, 10, &devid); 1487 if (ret) 1488 goto out_free; 1489 if (!devid) { 1490 ret = -EINVAL; 1491 goto out_free; 1492 } 1493 btrfs_info(fs_info, "resizing devid %llu", devid); 1494 } 1495 1496 device = btrfs_find_device(fs_info, devid, NULL, NULL); 1497 if (!device) { 1498 btrfs_info(fs_info, "resizer unable to find device %llu", 1499 devid); 1500 ret = -ENODEV; 1501 goto out_free; 1502 } 1503 1504 if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) { 1505 btrfs_info(fs_info, 1506 "resizer unable to apply on readonly device %llu", 1507 devid); 1508 ret = -EPERM; 1509 goto out_free; 1510 } 1511 1512 if (!strcmp(sizestr, "max")) 1513 new_size = device->bdev->bd_inode->i_size; 1514 else { 1515 if (sizestr[0] == '-') { 1516 mod = -1; 1517 sizestr++; 1518 } else if (sizestr[0] == '+') { 1519 mod = 1; 1520 sizestr++; 1521 } 1522 new_size = memparse(sizestr, &retptr); 1523 if (*retptr != '\0' || new_size == 0) { 1524 ret = -EINVAL; 1525 goto out_free; 1526 } 1527 } 1528 1529 if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) { 1530 ret = -EPERM; 1531 goto out_free; 1532 } 1533 1534 old_size = btrfs_device_get_total_bytes(device); 1535 1536 if (mod < 0) { 1537 if (new_size > old_size) { 1538 ret = -EINVAL; 1539 goto out_free; 1540 } 1541 new_size = old_size - new_size; 1542 } else if (mod > 0) { 1543 if (new_size > ULLONG_MAX - old_size) { 1544 ret = -ERANGE; 1545 goto out_free; 1546 } 1547 new_size = old_size + new_size; 1548 } 1549 1550 if (new_size < SZ_256M) { 1551 ret = -EINVAL; 1552 goto out_free; 1553 } 1554 if (new_size > device->bdev->bd_inode->i_size) { 1555 ret = -EFBIG; 1556 goto out_free; 1557 } 1558 1559 new_size = round_down(new_size, fs_info->sectorsize); 1560 1561 btrfs_info_in_rcu(fs_info, "new size for %s is %llu", 1562 rcu_str_deref(device->name), new_size); 1563 1564 if (new_size > old_size) { 1565 trans = btrfs_start_transaction(root, 0); 1566 if (IS_ERR(trans)) { 1567 ret = PTR_ERR(trans); 1568 goto out_free; 1569 } 1570 ret = btrfs_grow_device(trans, device, new_size); 1571 btrfs_commit_transaction(trans); 1572 } else if (new_size < old_size) { 1573 ret = btrfs_shrink_device(device, new_size); 1574 } /* equal, nothing need to do */ 1575 1576 out_free: 1577 kfree(vol_args); 1578 out: 1579 mutex_unlock(&fs_info->volume_mutex); 1580 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 1581 mnt_drop_write_file(file); 1582 return ret; 1583 } 1584 1585 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1586 const char *name, unsigned long fd, int subvol, 1587 u64 *transid, bool readonly, 1588 struct btrfs_qgroup_inherit *inherit) 1589 { 1590 int namelen; 1591 int ret = 0; 1592 1593 if (!S_ISDIR(file_inode(file)->i_mode)) 1594 return -ENOTDIR; 1595 1596 ret = mnt_want_write_file(file); 1597 if (ret) 1598 goto out; 1599 1600 namelen = strlen(name); 1601 if (strchr(name, '/')) { 1602 ret = -EINVAL; 1603 goto out_drop_write; 1604 } 1605 1606 if (name[0] == '.' && 1607 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1608 ret = -EEXIST; 1609 goto out_drop_write; 1610 } 1611 1612 if (subvol) { 1613 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1614 NULL, transid, readonly, inherit); 1615 } else { 1616 struct fd src = fdget(fd); 1617 struct inode *src_inode; 1618 if (!src.file) { 1619 ret = -EINVAL; 1620 goto out_drop_write; 1621 } 1622 1623 src_inode = file_inode(src.file); 1624 if (src_inode->i_sb != file_inode(file)->i_sb) { 1625 btrfs_info(BTRFS_I(file_inode(file))->root->fs_info, 1626 "Snapshot src from another FS"); 1627 ret = -EXDEV; 1628 } else if (!inode_owner_or_capable(src_inode)) { 1629 /* 1630 * Subvolume creation is not restricted, but snapshots 1631 * are limited to own subvolumes only 1632 */ 1633 ret = -EPERM; 1634 } else { 1635 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1636 BTRFS_I(src_inode)->root, 1637 transid, readonly, inherit); 1638 } 1639 fdput(src); 1640 } 1641 out_drop_write: 1642 mnt_drop_write_file(file); 1643 out: 1644 return ret; 1645 } 1646 1647 static noinline int btrfs_ioctl_snap_create(struct file *file, 1648 void __user *arg, int subvol) 1649 { 1650 struct btrfs_ioctl_vol_args *vol_args; 1651 int ret; 1652 1653 if (!S_ISDIR(file_inode(file)->i_mode)) 1654 return -ENOTDIR; 1655 1656 vol_args = memdup_user(arg, sizeof(*vol_args)); 1657 if (IS_ERR(vol_args)) 1658 return PTR_ERR(vol_args); 1659 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1660 1661 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1662 vol_args->fd, subvol, 1663 NULL, false, NULL); 1664 1665 kfree(vol_args); 1666 return ret; 1667 } 1668 1669 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1670 void __user *arg, int subvol) 1671 { 1672 struct btrfs_ioctl_vol_args_v2 *vol_args; 1673 int ret; 1674 u64 transid = 0; 1675 u64 *ptr = NULL; 1676 bool readonly = false; 1677 struct btrfs_qgroup_inherit *inherit = NULL; 1678 1679 if (!S_ISDIR(file_inode(file)->i_mode)) 1680 return -ENOTDIR; 1681 1682 vol_args = memdup_user(arg, sizeof(*vol_args)); 1683 if (IS_ERR(vol_args)) 1684 return PTR_ERR(vol_args); 1685 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1686 1687 if (vol_args->flags & 1688 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1689 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1690 ret = -EOPNOTSUPP; 1691 goto free_args; 1692 } 1693 1694 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1695 ptr = &transid; 1696 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1697 readonly = true; 1698 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1699 if (vol_args->size > PAGE_SIZE) { 1700 ret = -EINVAL; 1701 goto free_args; 1702 } 1703 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1704 if (IS_ERR(inherit)) { 1705 ret = PTR_ERR(inherit); 1706 goto free_args; 1707 } 1708 } 1709 1710 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1711 vol_args->fd, subvol, ptr, 1712 readonly, inherit); 1713 if (ret) 1714 goto free_inherit; 1715 1716 if (ptr && copy_to_user(arg + 1717 offsetof(struct btrfs_ioctl_vol_args_v2, 1718 transid), 1719 ptr, sizeof(*ptr))) 1720 ret = -EFAULT; 1721 1722 free_inherit: 1723 kfree(inherit); 1724 free_args: 1725 kfree(vol_args); 1726 return ret; 1727 } 1728 1729 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1730 void __user *arg) 1731 { 1732 struct inode *inode = file_inode(file); 1733 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1734 struct btrfs_root *root = BTRFS_I(inode)->root; 1735 int ret = 0; 1736 u64 flags = 0; 1737 1738 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) 1739 return -EINVAL; 1740 1741 down_read(&fs_info->subvol_sem); 1742 if (btrfs_root_readonly(root)) 1743 flags |= BTRFS_SUBVOL_RDONLY; 1744 up_read(&fs_info->subvol_sem); 1745 1746 if (copy_to_user(arg, &flags, sizeof(flags))) 1747 ret = -EFAULT; 1748 1749 return ret; 1750 } 1751 1752 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1753 void __user *arg) 1754 { 1755 struct inode *inode = file_inode(file); 1756 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 1757 struct btrfs_root *root = BTRFS_I(inode)->root; 1758 struct btrfs_trans_handle *trans; 1759 u64 root_flags; 1760 u64 flags; 1761 int ret = 0; 1762 1763 if (!inode_owner_or_capable(inode)) 1764 return -EPERM; 1765 1766 ret = mnt_want_write_file(file); 1767 if (ret) 1768 goto out; 1769 1770 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 1771 ret = -EINVAL; 1772 goto out_drop_write; 1773 } 1774 1775 if (copy_from_user(&flags, arg, sizeof(flags))) { 1776 ret = -EFAULT; 1777 goto out_drop_write; 1778 } 1779 1780 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { 1781 ret = -EINVAL; 1782 goto out_drop_write; 1783 } 1784 1785 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1786 ret = -EOPNOTSUPP; 1787 goto out_drop_write; 1788 } 1789 1790 down_write(&fs_info->subvol_sem); 1791 1792 /* nothing to do */ 1793 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1794 goto out_drop_sem; 1795 1796 root_flags = btrfs_root_flags(&root->root_item); 1797 if (flags & BTRFS_SUBVOL_RDONLY) { 1798 btrfs_set_root_flags(&root->root_item, 1799 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1800 } else { 1801 /* 1802 * Block RO -> RW transition if this subvolume is involved in 1803 * send 1804 */ 1805 spin_lock(&root->root_item_lock); 1806 if (root->send_in_progress == 0) { 1807 btrfs_set_root_flags(&root->root_item, 1808 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1809 spin_unlock(&root->root_item_lock); 1810 } else { 1811 spin_unlock(&root->root_item_lock); 1812 btrfs_warn(fs_info, 1813 "Attempt to set subvolume %llu read-write during send", 1814 root->root_key.objectid); 1815 ret = -EPERM; 1816 goto out_drop_sem; 1817 } 1818 } 1819 1820 trans = btrfs_start_transaction(root, 1); 1821 if (IS_ERR(trans)) { 1822 ret = PTR_ERR(trans); 1823 goto out_reset; 1824 } 1825 1826 ret = btrfs_update_root(trans, fs_info->tree_root, 1827 &root->root_key, &root->root_item); 1828 if (ret < 0) { 1829 btrfs_end_transaction(trans); 1830 goto out_reset; 1831 } 1832 1833 ret = btrfs_commit_transaction(trans); 1834 1835 out_reset: 1836 if (ret) 1837 btrfs_set_root_flags(&root->root_item, root_flags); 1838 out_drop_sem: 1839 up_write(&fs_info->subvol_sem); 1840 out_drop_write: 1841 mnt_drop_write_file(file); 1842 out: 1843 return ret; 1844 } 1845 1846 /* 1847 * helper to check if the subvolume references other subvolumes 1848 */ 1849 static noinline int may_destroy_subvol(struct btrfs_root *root) 1850 { 1851 struct btrfs_fs_info *fs_info = root->fs_info; 1852 struct btrfs_path *path; 1853 struct btrfs_dir_item *di; 1854 struct btrfs_key key; 1855 u64 dir_id; 1856 int ret; 1857 1858 path = btrfs_alloc_path(); 1859 if (!path) 1860 return -ENOMEM; 1861 1862 /* Make sure this root isn't set as the default subvol */ 1863 dir_id = btrfs_super_root_dir(fs_info->super_copy); 1864 di = btrfs_lookup_dir_item(NULL, fs_info->tree_root, path, 1865 dir_id, "default", 7, 0); 1866 if (di && !IS_ERR(di)) { 1867 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1868 if (key.objectid == root->root_key.objectid) { 1869 ret = -EPERM; 1870 btrfs_err(fs_info, 1871 "deleting default subvolume %llu is not allowed", 1872 key.objectid); 1873 goto out; 1874 } 1875 btrfs_release_path(path); 1876 } 1877 1878 key.objectid = root->root_key.objectid; 1879 key.type = BTRFS_ROOT_REF_KEY; 1880 key.offset = (u64)-1; 1881 1882 ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0); 1883 if (ret < 0) 1884 goto out; 1885 BUG_ON(ret == 0); 1886 1887 ret = 0; 1888 if (path->slots[0] > 0) { 1889 path->slots[0]--; 1890 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1891 if (key.objectid == root->root_key.objectid && 1892 key.type == BTRFS_ROOT_REF_KEY) 1893 ret = -ENOTEMPTY; 1894 } 1895 out: 1896 btrfs_free_path(path); 1897 return ret; 1898 } 1899 1900 static noinline int key_in_sk(struct btrfs_key *key, 1901 struct btrfs_ioctl_search_key *sk) 1902 { 1903 struct btrfs_key test; 1904 int ret; 1905 1906 test.objectid = sk->min_objectid; 1907 test.type = sk->min_type; 1908 test.offset = sk->min_offset; 1909 1910 ret = btrfs_comp_cpu_keys(key, &test); 1911 if (ret < 0) 1912 return 0; 1913 1914 test.objectid = sk->max_objectid; 1915 test.type = sk->max_type; 1916 test.offset = sk->max_offset; 1917 1918 ret = btrfs_comp_cpu_keys(key, &test); 1919 if (ret > 0) 1920 return 0; 1921 return 1; 1922 } 1923 1924 static noinline int copy_to_sk(struct btrfs_path *path, 1925 struct btrfs_key *key, 1926 struct btrfs_ioctl_search_key *sk, 1927 size_t *buf_size, 1928 char __user *ubuf, 1929 unsigned long *sk_offset, 1930 int *num_found) 1931 { 1932 u64 found_transid; 1933 struct extent_buffer *leaf; 1934 struct btrfs_ioctl_search_header sh; 1935 struct btrfs_key test; 1936 unsigned long item_off; 1937 unsigned long item_len; 1938 int nritems; 1939 int i; 1940 int slot; 1941 int ret = 0; 1942 1943 leaf = path->nodes[0]; 1944 slot = path->slots[0]; 1945 nritems = btrfs_header_nritems(leaf); 1946 1947 if (btrfs_header_generation(leaf) > sk->max_transid) { 1948 i = nritems; 1949 goto advance_key; 1950 } 1951 found_transid = btrfs_header_generation(leaf); 1952 1953 for (i = slot; i < nritems; i++) { 1954 item_off = btrfs_item_ptr_offset(leaf, i); 1955 item_len = btrfs_item_size_nr(leaf, i); 1956 1957 btrfs_item_key_to_cpu(leaf, key, i); 1958 if (!key_in_sk(key, sk)) 1959 continue; 1960 1961 if (sizeof(sh) + item_len > *buf_size) { 1962 if (*num_found) { 1963 ret = 1; 1964 goto out; 1965 } 1966 1967 /* 1968 * return one empty item back for v1, which does not 1969 * handle -EOVERFLOW 1970 */ 1971 1972 *buf_size = sizeof(sh) + item_len; 1973 item_len = 0; 1974 ret = -EOVERFLOW; 1975 } 1976 1977 if (sizeof(sh) + item_len + *sk_offset > *buf_size) { 1978 ret = 1; 1979 goto out; 1980 } 1981 1982 sh.objectid = key->objectid; 1983 sh.offset = key->offset; 1984 sh.type = key->type; 1985 sh.len = item_len; 1986 sh.transid = found_transid; 1987 1988 /* copy search result header */ 1989 if (copy_to_user(ubuf + *sk_offset, &sh, sizeof(sh))) { 1990 ret = -EFAULT; 1991 goto out; 1992 } 1993 1994 *sk_offset += sizeof(sh); 1995 1996 if (item_len) { 1997 char __user *up = ubuf + *sk_offset; 1998 /* copy the item */ 1999 if (read_extent_buffer_to_user(leaf, up, 2000 item_off, item_len)) { 2001 ret = -EFAULT; 2002 goto out; 2003 } 2004 2005 *sk_offset += item_len; 2006 } 2007 (*num_found)++; 2008 2009 if (ret) /* -EOVERFLOW from above */ 2010 goto out; 2011 2012 if (*num_found >= sk->nr_items) { 2013 ret = 1; 2014 goto out; 2015 } 2016 } 2017 advance_key: 2018 ret = 0; 2019 test.objectid = sk->max_objectid; 2020 test.type = sk->max_type; 2021 test.offset = sk->max_offset; 2022 if (btrfs_comp_cpu_keys(key, &test) >= 0) 2023 ret = 1; 2024 else if (key->offset < (u64)-1) 2025 key->offset++; 2026 else if (key->type < (u8)-1) { 2027 key->offset = 0; 2028 key->type++; 2029 } else if (key->objectid < (u64)-1) { 2030 key->offset = 0; 2031 key->type = 0; 2032 key->objectid++; 2033 } else 2034 ret = 1; 2035 out: 2036 /* 2037 * 0: all items from this leaf copied, continue with next 2038 * 1: * more items can be copied, but unused buffer is too small 2039 * * all items were found 2040 * Either way, it will stops the loop which iterates to the next 2041 * leaf 2042 * -EOVERFLOW: item was to large for buffer 2043 * -EFAULT: could not copy extent buffer back to userspace 2044 */ 2045 return ret; 2046 } 2047 2048 static noinline int search_ioctl(struct inode *inode, 2049 struct btrfs_ioctl_search_key *sk, 2050 size_t *buf_size, 2051 char __user *ubuf) 2052 { 2053 struct btrfs_fs_info *info = btrfs_sb(inode->i_sb); 2054 struct btrfs_root *root; 2055 struct btrfs_key key; 2056 struct btrfs_path *path; 2057 int ret; 2058 int num_found = 0; 2059 unsigned long sk_offset = 0; 2060 2061 if (*buf_size < sizeof(struct btrfs_ioctl_search_header)) { 2062 *buf_size = sizeof(struct btrfs_ioctl_search_header); 2063 return -EOVERFLOW; 2064 } 2065 2066 path = btrfs_alloc_path(); 2067 if (!path) 2068 return -ENOMEM; 2069 2070 if (sk->tree_id == 0) { 2071 /* search the root of the inode that was passed */ 2072 root = BTRFS_I(inode)->root; 2073 } else { 2074 key.objectid = sk->tree_id; 2075 key.type = BTRFS_ROOT_ITEM_KEY; 2076 key.offset = (u64)-1; 2077 root = btrfs_read_fs_root_no_name(info, &key); 2078 if (IS_ERR(root)) { 2079 btrfs_free_path(path); 2080 return -ENOENT; 2081 } 2082 } 2083 2084 key.objectid = sk->min_objectid; 2085 key.type = sk->min_type; 2086 key.offset = sk->min_offset; 2087 2088 while (1) { 2089 ret = btrfs_search_forward(root, &key, path, sk->min_transid); 2090 if (ret != 0) { 2091 if (ret > 0) 2092 ret = 0; 2093 goto err; 2094 } 2095 ret = copy_to_sk(path, &key, sk, buf_size, ubuf, 2096 &sk_offset, &num_found); 2097 btrfs_release_path(path); 2098 if (ret) 2099 break; 2100 2101 } 2102 if (ret > 0) 2103 ret = 0; 2104 err: 2105 sk->nr_items = num_found; 2106 btrfs_free_path(path); 2107 return ret; 2108 } 2109 2110 static noinline int btrfs_ioctl_tree_search(struct file *file, 2111 void __user *argp) 2112 { 2113 struct btrfs_ioctl_search_args __user *uargs; 2114 struct btrfs_ioctl_search_key sk; 2115 struct inode *inode; 2116 int ret; 2117 size_t buf_size; 2118 2119 if (!capable(CAP_SYS_ADMIN)) 2120 return -EPERM; 2121 2122 uargs = (struct btrfs_ioctl_search_args __user *)argp; 2123 2124 if (copy_from_user(&sk, &uargs->key, sizeof(sk))) 2125 return -EFAULT; 2126 2127 buf_size = sizeof(uargs->buf); 2128 2129 inode = file_inode(file); 2130 ret = search_ioctl(inode, &sk, &buf_size, uargs->buf); 2131 2132 /* 2133 * In the origin implementation an overflow is handled by returning a 2134 * search header with a len of zero, so reset ret. 2135 */ 2136 if (ret == -EOVERFLOW) 2137 ret = 0; 2138 2139 if (ret == 0 && copy_to_user(&uargs->key, &sk, sizeof(sk))) 2140 ret = -EFAULT; 2141 return ret; 2142 } 2143 2144 static noinline int btrfs_ioctl_tree_search_v2(struct file *file, 2145 void __user *argp) 2146 { 2147 struct btrfs_ioctl_search_args_v2 __user *uarg; 2148 struct btrfs_ioctl_search_args_v2 args; 2149 struct inode *inode; 2150 int ret; 2151 size_t buf_size; 2152 const size_t buf_limit = SZ_16M; 2153 2154 if (!capable(CAP_SYS_ADMIN)) 2155 return -EPERM; 2156 2157 /* copy search header and buffer size */ 2158 uarg = (struct btrfs_ioctl_search_args_v2 __user *)argp; 2159 if (copy_from_user(&args, uarg, sizeof(args))) 2160 return -EFAULT; 2161 2162 buf_size = args.buf_size; 2163 2164 /* limit result size to 16MB */ 2165 if (buf_size > buf_limit) 2166 buf_size = buf_limit; 2167 2168 inode = file_inode(file); 2169 ret = search_ioctl(inode, &args.key, &buf_size, 2170 (char __user *)(&uarg->buf[0])); 2171 if (ret == 0 && copy_to_user(&uarg->key, &args.key, sizeof(args.key))) 2172 ret = -EFAULT; 2173 else if (ret == -EOVERFLOW && 2174 copy_to_user(&uarg->buf_size, &buf_size, sizeof(buf_size))) 2175 ret = -EFAULT; 2176 2177 return ret; 2178 } 2179 2180 /* 2181 * Search INODE_REFs to identify path name of 'dirid' directory 2182 * in a 'tree_id' tree. and sets path name to 'name'. 2183 */ 2184 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 2185 u64 tree_id, u64 dirid, char *name) 2186 { 2187 struct btrfs_root *root; 2188 struct btrfs_key key; 2189 char *ptr; 2190 int ret = -1; 2191 int slot; 2192 int len; 2193 int total_len = 0; 2194 struct btrfs_inode_ref *iref; 2195 struct extent_buffer *l; 2196 struct btrfs_path *path; 2197 2198 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 2199 name[0]='\0'; 2200 return 0; 2201 } 2202 2203 path = btrfs_alloc_path(); 2204 if (!path) 2205 return -ENOMEM; 2206 2207 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1]; 2208 2209 key.objectid = tree_id; 2210 key.type = BTRFS_ROOT_ITEM_KEY; 2211 key.offset = (u64)-1; 2212 root = btrfs_read_fs_root_no_name(info, &key); 2213 if (IS_ERR(root)) { 2214 btrfs_err(info, "could not find root %llu", tree_id); 2215 ret = -ENOENT; 2216 goto out; 2217 } 2218 2219 key.objectid = dirid; 2220 key.type = BTRFS_INODE_REF_KEY; 2221 key.offset = (u64)-1; 2222 2223 while (1) { 2224 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2225 if (ret < 0) 2226 goto out; 2227 else if (ret > 0) { 2228 ret = btrfs_previous_item(root, path, dirid, 2229 BTRFS_INODE_REF_KEY); 2230 if (ret < 0) 2231 goto out; 2232 else if (ret > 0) { 2233 ret = -ENOENT; 2234 goto out; 2235 } 2236 } 2237 2238 l = path->nodes[0]; 2239 slot = path->slots[0]; 2240 btrfs_item_key_to_cpu(l, &key, slot); 2241 2242 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2243 len = btrfs_inode_ref_name_len(l, iref); 2244 ptr -= len + 1; 2245 total_len += len + 1; 2246 if (ptr < name) { 2247 ret = -ENAMETOOLONG; 2248 goto out; 2249 } 2250 2251 *(ptr + len) = '/'; 2252 read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); 2253 2254 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2255 break; 2256 2257 btrfs_release_path(path); 2258 key.objectid = key.offset; 2259 key.offset = (u64)-1; 2260 dirid = key.objectid; 2261 } 2262 memmove(name, ptr, total_len); 2263 name[total_len] = '\0'; 2264 ret = 0; 2265 out: 2266 btrfs_free_path(path); 2267 return ret; 2268 } 2269 2270 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2271 void __user *argp) 2272 { 2273 struct btrfs_ioctl_ino_lookup_args *args; 2274 struct inode *inode; 2275 int ret = 0; 2276 2277 args = memdup_user(argp, sizeof(*args)); 2278 if (IS_ERR(args)) 2279 return PTR_ERR(args); 2280 2281 inode = file_inode(file); 2282 2283 /* 2284 * Unprivileged query to obtain the containing subvolume root id. The 2285 * path is reset so it's consistent with btrfs_search_path_in_tree. 2286 */ 2287 if (args->treeid == 0) 2288 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2289 2290 if (args->objectid == BTRFS_FIRST_FREE_OBJECTID) { 2291 args->name[0] = 0; 2292 goto out; 2293 } 2294 2295 if (!capable(CAP_SYS_ADMIN)) { 2296 ret = -EPERM; 2297 goto out; 2298 } 2299 2300 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 2301 args->treeid, args->objectid, 2302 args->name); 2303 2304 out: 2305 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2306 ret = -EFAULT; 2307 2308 kfree(args); 2309 return ret; 2310 } 2311 2312 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2313 void __user *arg) 2314 { 2315 struct dentry *parent = file->f_path.dentry; 2316 struct btrfs_fs_info *fs_info = btrfs_sb(parent->d_sb); 2317 struct dentry *dentry; 2318 struct inode *dir = d_inode(parent); 2319 struct inode *inode; 2320 struct btrfs_root *root = BTRFS_I(dir)->root; 2321 struct btrfs_root *dest = NULL; 2322 struct btrfs_ioctl_vol_args *vol_args; 2323 struct btrfs_trans_handle *trans; 2324 struct btrfs_block_rsv block_rsv; 2325 u64 root_flags; 2326 u64 qgroup_reserved; 2327 int namelen; 2328 int ret; 2329 int err = 0; 2330 2331 if (!S_ISDIR(dir->i_mode)) 2332 return -ENOTDIR; 2333 2334 vol_args = memdup_user(arg, sizeof(*vol_args)); 2335 if (IS_ERR(vol_args)) 2336 return PTR_ERR(vol_args); 2337 2338 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2339 namelen = strlen(vol_args->name); 2340 if (strchr(vol_args->name, '/') || 2341 strncmp(vol_args->name, "..", namelen) == 0) { 2342 err = -EINVAL; 2343 goto out; 2344 } 2345 2346 err = mnt_want_write_file(file); 2347 if (err) 2348 goto out; 2349 2350 2351 err = down_write_killable_nested(&dir->i_rwsem, I_MUTEX_PARENT); 2352 if (err == -EINTR) 2353 goto out_drop_write; 2354 dentry = lookup_one_len(vol_args->name, parent, namelen); 2355 if (IS_ERR(dentry)) { 2356 err = PTR_ERR(dentry); 2357 goto out_unlock_dir; 2358 } 2359 2360 if (d_really_is_negative(dentry)) { 2361 err = -ENOENT; 2362 goto out_dput; 2363 } 2364 2365 inode = d_inode(dentry); 2366 dest = BTRFS_I(inode)->root; 2367 if (!capable(CAP_SYS_ADMIN)) { 2368 /* 2369 * Regular user. Only allow this with a special mount 2370 * option, when the user has write+exec access to the 2371 * subvol root, and when rmdir(2) would have been 2372 * allowed. 2373 * 2374 * Note that this is _not_ check that the subvol is 2375 * empty or doesn't contain data that we wouldn't 2376 * otherwise be able to delete. 2377 * 2378 * Users who want to delete empty subvols should try 2379 * rmdir(2). 2380 */ 2381 err = -EPERM; 2382 if (!btrfs_test_opt(fs_info, USER_SUBVOL_RM_ALLOWED)) 2383 goto out_dput; 2384 2385 /* 2386 * Do not allow deletion if the parent dir is the same 2387 * as the dir to be deleted. That means the ioctl 2388 * must be called on the dentry referencing the root 2389 * of the subvol, not a random directory contained 2390 * within it. 2391 */ 2392 err = -EINVAL; 2393 if (root == dest) 2394 goto out_dput; 2395 2396 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2397 if (err) 2398 goto out_dput; 2399 } 2400 2401 /* check if subvolume may be deleted by a user */ 2402 err = btrfs_may_delete(dir, dentry, 1); 2403 if (err) 2404 goto out_dput; 2405 2406 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 2407 err = -EINVAL; 2408 goto out_dput; 2409 } 2410 2411 inode_lock(inode); 2412 2413 /* 2414 * Don't allow to delete a subvolume with send in progress. This is 2415 * inside the i_mutex so the error handling that has to drop the bit 2416 * again is not run concurrently. 2417 */ 2418 spin_lock(&dest->root_item_lock); 2419 root_flags = btrfs_root_flags(&dest->root_item); 2420 if (dest->send_in_progress == 0) { 2421 btrfs_set_root_flags(&dest->root_item, 2422 root_flags | BTRFS_ROOT_SUBVOL_DEAD); 2423 spin_unlock(&dest->root_item_lock); 2424 } else { 2425 spin_unlock(&dest->root_item_lock); 2426 btrfs_warn(fs_info, 2427 "Attempt to delete subvolume %llu during send", 2428 dest->root_key.objectid); 2429 err = -EPERM; 2430 goto out_unlock_inode; 2431 } 2432 2433 down_write(&fs_info->subvol_sem); 2434 2435 err = may_destroy_subvol(dest); 2436 if (err) 2437 goto out_up_write; 2438 2439 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 2440 /* 2441 * One for dir inode, two for dir entries, two for root 2442 * ref/backref. 2443 */ 2444 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 2445 5, &qgroup_reserved, true); 2446 if (err) 2447 goto out_up_write; 2448 2449 trans = btrfs_start_transaction(root, 0); 2450 if (IS_ERR(trans)) { 2451 err = PTR_ERR(trans); 2452 goto out_release; 2453 } 2454 trans->block_rsv = &block_rsv; 2455 trans->bytes_reserved = block_rsv.size; 2456 2457 btrfs_record_snapshot_destroy(trans, BTRFS_I(dir)); 2458 2459 ret = btrfs_unlink_subvol(trans, root, dir, 2460 dest->root_key.objectid, 2461 dentry->d_name.name, 2462 dentry->d_name.len); 2463 if (ret) { 2464 err = ret; 2465 btrfs_abort_transaction(trans, ret); 2466 goto out_end_trans; 2467 } 2468 2469 btrfs_record_root_in_trans(trans, dest); 2470 2471 memset(&dest->root_item.drop_progress, 0, 2472 sizeof(dest->root_item.drop_progress)); 2473 dest->root_item.drop_level = 0; 2474 btrfs_set_root_refs(&dest->root_item, 0); 2475 2476 if (!test_and_set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &dest->state)) { 2477 ret = btrfs_insert_orphan_item(trans, 2478 fs_info->tree_root, 2479 dest->root_key.objectid); 2480 if (ret) { 2481 btrfs_abort_transaction(trans, ret); 2482 err = ret; 2483 goto out_end_trans; 2484 } 2485 } 2486 2487 ret = btrfs_uuid_tree_rem(trans, fs_info, dest->root_item.uuid, 2488 BTRFS_UUID_KEY_SUBVOL, 2489 dest->root_key.objectid); 2490 if (ret && ret != -ENOENT) { 2491 btrfs_abort_transaction(trans, ret); 2492 err = ret; 2493 goto out_end_trans; 2494 } 2495 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { 2496 ret = btrfs_uuid_tree_rem(trans, fs_info, 2497 dest->root_item.received_uuid, 2498 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 2499 dest->root_key.objectid); 2500 if (ret && ret != -ENOENT) { 2501 btrfs_abort_transaction(trans, ret); 2502 err = ret; 2503 goto out_end_trans; 2504 } 2505 } 2506 2507 out_end_trans: 2508 trans->block_rsv = NULL; 2509 trans->bytes_reserved = 0; 2510 ret = btrfs_end_transaction(trans); 2511 if (ret && !err) 2512 err = ret; 2513 inode->i_flags |= S_DEAD; 2514 out_release: 2515 btrfs_subvolume_release_metadata(fs_info, &block_rsv); 2516 out_up_write: 2517 up_write(&fs_info->subvol_sem); 2518 if (err) { 2519 spin_lock(&dest->root_item_lock); 2520 root_flags = btrfs_root_flags(&dest->root_item); 2521 btrfs_set_root_flags(&dest->root_item, 2522 root_flags & ~BTRFS_ROOT_SUBVOL_DEAD); 2523 spin_unlock(&dest->root_item_lock); 2524 } 2525 out_unlock_inode: 2526 inode_unlock(inode); 2527 if (!err) { 2528 d_invalidate(dentry); 2529 btrfs_invalidate_inodes(dest); 2530 d_delete(dentry); 2531 ASSERT(dest->send_in_progress == 0); 2532 2533 /* the last ref */ 2534 if (dest->ino_cache_inode) { 2535 iput(dest->ino_cache_inode); 2536 dest->ino_cache_inode = NULL; 2537 } 2538 } 2539 out_dput: 2540 dput(dentry); 2541 out_unlock_dir: 2542 inode_unlock(dir); 2543 out_drop_write: 2544 mnt_drop_write_file(file); 2545 out: 2546 kfree(vol_args); 2547 return err; 2548 } 2549 2550 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2551 { 2552 struct inode *inode = file_inode(file); 2553 struct btrfs_root *root = BTRFS_I(inode)->root; 2554 struct btrfs_ioctl_defrag_range_args *range; 2555 int ret; 2556 2557 ret = mnt_want_write_file(file); 2558 if (ret) 2559 return ret; 2560 2561 if (btrfs_root_readonly(root)) { 2562 ret = -EROFS; 2563 goto out; 2564 } 2565 2566 switch (inode->i_mode & S_IFMT) { 2567 case S_IFDIR: 2568 if (!capable(CAP_SYS_ADMIN)) { 2569 ret = -EPERM; 2570 goto out; 2571 } 2572 ret = btrfs_defrag_root(root); 2573 break; 2574 case S_IFREG: 2575 if (!(file->f_mode & FMODE_WRITE)) { 2576 ret = -EINVAL; 2577 goto out; 2578 } 2579 2580 range = kzalloc(sizeof(*range), GFP_KERNEL); 2581 if (!range) { 2582 ret = -ENOMEM; 2583 goto out; 2584 } 2585 2586 if (argp) { 2587 if (copy_from_user(range, argp, 2588 sizeof(*range))) { 2589 ret = -EFAULT; 2590 kfree(range); 2591 goto out; 2592 } 2593 /* compression requires us to start the IO */ 2594 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 2595 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 2596 range->extent_thresh = (u32)-1; 2597 } 2598 } else { 2599 /* the rest are all set to zero by kzalloc */ 2600 range->len = (u64)-1; 2601 } 2602 ret = btrfs_defrag_file(file_inode(file), file, 2603 range, 0, 0); 2604 if (ret > 0) 2605 ret = 0; 2606 kfree(range); 2607 break; 2608 default: 2609 ret = -EINVAL; 2610 } 2611 out: 2612 mnt_drop_write_file(file); 2613 return ret; 2614 } 2615 2616 static long btrfs_ioctl_add_dev(struct btrfs_fs_info *fs_info, void __user *arg) 2617 { 2618 struct btrfs_ioctl_vol_args *vol_args; 2619 int ret; 2620 2621 if (!capable(CAP_SYS_ADMIN)) 2622 return -EPERM; 2623 2624 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) 2625 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2626 2627 mutex_lock(&fs_info->volume_mutex); 2628 vol_args = memdup_user(arg, sizeof(*vol_args)); 2629 if (IS_ERR(vol_args)) { 2630 ret = PTR_ERR(vol_args); 2631 goto out; 2632 } 2633 2634 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2635 ret = btrfs_init_new_device(fs_info, vol_args->name); 2636 2637 if (!ret) 2638 btrfs_info(fs_info, "disk added %s", vol_args->name); 2639 2640 kfree(vol_args); 2641 out: 2642 mutex_unlock(&fs_info->volume_mutex); 2643 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 2644 return ret; 2645 } 2646 2647 static long btrfs_ioctl_rm_dev_v2(struct file *file, void __user *arg) 2648 { 2649 struct inode *inode = file_inode(file); 2650 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 2651 struct btrfs_ioctl_vol_args_v2 *vol_args; 2652 int ret; 2653 2654 if (!capable(CAP_SYS_ADMIN)) 2655 return -EPERM; 2656 2657 ret = mnt_want_write_file(file); 2658 if (ret) 2659 return ret; 2660 2661 vol_args = memdup_user(arg, sizeof(*vol_args)); 2662 if (IS_ERR(vol_args)) { 2663 ret = PTR_ERR(vol_args); 2664 goto err_drop; 2665 } 2666 2667 /* Check for compatibility reject unknown flags */ 2668 if (vol_args->flags & ~BTRFS_VOL_ARG_V2_FLAGS_SUPPORTED) 2669 return -EOPNOTSUPP; 2670 2671 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 2672 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2673 goto out; 2674 } 2675 2676 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) { 2677 ret = btrfs_rm_device(fs_info, NULL, vol_args->devid); 2678 } else { 2679 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 2680 ret = btrfs_rm_device(fs_info, vol_args->name, 0); 2681 } 2682 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 2683 2684 if (!ret) { 2685 if (vol_args->flags & BTRFS_DEVICE_SPEC_BY_ID) 2686 btrfs_info(fs_info, "device deleted: id %llu", 2687 vol_args->devid); 2688 else 2689 btrfs_info(fs_info, "device deleted: %s", 2690 vol_args->name); 2691 } 2692 out: 2693 kfree(vol_args); 2694 err_drop: 2695 mnt_drop_write_file(file); 2696 return ret; 2697 } 2698 2699 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 2700 { 2701 struct inode *inode = file_inode(file); 2702 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 2703 struct btrfs_ioctl_vol_args *vol_args; 2704 int ret; 2705 2706 if (!capable(CAP_SYS_ADMIN)) 2707 return -EPERM; 2708 2709 ret = mnt_want_write_file(file); 2710 if (ret) 2711 return ret; 2712 2713 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 2714 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2715 goto out_drop_write; 2716 } 2717 2718 vol_args = memdup_user(arg, sizeof(*vol_args)); 2719 if (IS_ERR(vol_args)) { 2720 ret = PTR_ERR(vol_args); 2721 goto out; 2722 } 2723 2724 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2725 ret = btrfs_rm_device(fs_info, vol_args->name, 0); 2726 2727 if (!ret) 2728 btrfs_info(fs_info, "disk deleted %s", vol_args->name); 2729 kfree(vol_args); 2730 out: 2731 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 2732 out_drop_write: 2733 mnt_drop_write_file(file); 2734 2735 return ret; 2736 } 2737 2738 static long btrfs_ioctl_fs_info(struct btrfs_fs_info *fs_info, 2739 void __user *arg) 2740 { 2741 struct btrfs_ioctl_fs_info_args *fi_args; 2742 struct btrfs_device *device; 2743 struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 2744 int ret = 0; 2745 2746 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); 2747 if (!fi_args) 2748 return -ENOMEM; 2749 2750 rcu_read_lock(); 2751 fi_args->num_devices = fs_devices->num_devices; 2752 2753 list_for_each_entry_rcu(device, &fs_devices->devices, dev_list) { 2754 if (device->devid > fi_args->max_id) 2755 fi_args->max_id = device->devid; 2756 } 2757 rcu_read_unlock(); 2758 2759 memcpy(&fi_args->fsid, fs_info->fsid, sizeof(fi_args->fsid)); 2760 fi_args->nodesize = fs_info->nodesize; 2761 fi_args->sectorsize = fs_info->sectorsize; 2762 fi_args->clone_alignment = fs_info->sectorsize; 2763 2764 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2765 ret = -EFAULT; 2766 2767 kfree(fi_args); 2768 return ret; 2769 } 2770 2771 static long btrfs_ioctl_dev_info(struct btrfs_fs_info *fs_info, 2772 void __user *arg) 2773 { 2774 struct btrfs_ioctl_dev_info_args *di_args; 2775 struct btrfs_device *dev; 2776 int ret = 0; 2777 char *s_uuid = NULL; 2778 2779 di_args = memdup_user(arg, sizeof(*di_args)); 2780 if (IS_ERR(di_args)) 2781 return PTR_ERR(di_args); 2782 2783 if (!btrfs_is_empty_uuid(di_args->uuid)) 2784 s_uuid = di_args->uuid; 2785 2786 rcu_read_lock(); 2787 dev = btrfs_find_device(fs_info, di_args->devid, s_uuid, NULL); 2788 2789 if (!dev) { 2790 ret = -ENODEV; 2791 goto out; 2792 } 2793 2794 di_args->devid = dev->devid; 2795 di_args->bytes_used = btrfs_device_get_bytes_used(dev); 2796 di_args->total_bytes = btrfs_device_get_total_bytes(dev); 2797 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2798 if (dev->name) { 2799 struct rcu_string *name; 2800 2801 name = rcu_dereference(dev->name); 2802 strncpy(di_args->path, name->str, sizeof(di_args->path) - 1); 2803 di_args->path[sizeof(di_args->path) - 1] = 0; 2804 } else { 2805 di_args->path[0] = '\0'; 2806 } 2807 2808 out: 2809 rcu_read_unlock(); 2810 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2811 ret = -EFAULT; 2812 2813 kfree(di_args); 2814 return ret; 2815 } 2816 2817 static struct page *extent_same_get_page(struct inode *inode, pgoff_t index) 2818 { 2819 struct page *page; 2820 2821 page = grab_cache_page(inode->i_mapping, index); 2822 if (!page) 2823 return ERR_PTR(-ENOMEM); 2824 2825 if (!PageUptodate(page)) { 2826 int ret; 2827 2828 ret = btrfs_readpage(NULL, page); 2829 if (ret) 2830 return ERR_PTR(ret); 2831 lock_page(page); 2832 if (!PageUptodate(page)) { 2833 unlock_page(page); 2834 put_page(page); 2835 return ERR_PTR(-EIO); 2836 } 2837 if (page->mapping != inode->i_mapping) { 2838 unlock_page(page); 2839 put_page(page); 2840 return ERR_PTR(-EAGAIN); 2841 } 2842 } 2843 2844 return page; 2845 } 2846 2847 static int gather_extent_pages(struct inode *inode, struct page **pages, 2848 int num_pages, u64 off) 2849 { 2850 int i; 2851 pgoff_t index = off >> PAGE_SHIFT; 2852 2853 for (i = 0; i < num_pages; i++) { 2854 again: 2855 pages[i] = extent_same_get_page(inode, index + i); 2856 if (IS_ERR(pages[i])) { 2857 int err = PTR_ERR(pages[i]); 2858 2859 if (err == -EAGAIN) 2860 goto again; 2861 pages[i] = NULL; 2862 return err; 2863 } 2864 } 2865 return 0; 2866 } 2867 2868 static int lock_extent_range(struct inode *inode, u64 off, u64 len, 2869 bool retry_range_locking) 2870 { 2871 /* 2872 * Do any pending delalloc/csum calculations on inode, one way or 2873 * another, and lock file content. 2874 * The locking order is: 2875 * 2876 * 1) pages 2877 * 2) range in the inode's io tree 2878 */ 2879 while (1) { 2880 struct btrfs_ordered_extent *ordered; 2881 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2882 ordered = btrfs_lookup_first_ordered_extent(inode, 2883 off + len - 1); 2884 if ((!ordered || 2885 ordered->file_offset + ordered->len <= off || 2886 ordered->file_offset >= off + len) && 2887 !test_range_bit(&BTRFS_I(inode)->io_tree, off, 2888 off + len - 1, EXTENT_DELALLOC, 0, NULL)) { 2889 if (ordered) 2890 btrfs_put_ordered_extent(ordered); 2891 break; 2892 } 2893 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2894 if (ordered) 2895 btrfs_put_ordered_extent(ordered); 2896 if (!retry_range_locking) 2897 return -EAGAIN; 2898 btrfs_wait_ordered_range(inode, off, len); 2899 } 2900 return 0; 2901 } 2902 2903 static void btrfs_double_inode_unlock(struct inode *inode1, struct inode *inode2) 2904 { 2905 inode_unlock(inode1); 2906 inode_unlock(inode2); 2907 } 2908 2909 static void btrfs_double_inode_lock(struct inode *inode1, struct inode *inode2) 2910 { 2911 if (inode1 < inode2) 2912 swap(inode1, inode2); 2913 2914 inode_lock_nested(inode1, I_MUTEX_PARENT); 2915 inode_lock_nested(inode2, I_MUTEX_CHILD); 2916 } 2917 2918 static void btrfs_double_extent_unlock(struct inode *inode1, u64 loff1, 2919 struct inode *inode2, u64 loff2, u64 len) 2920 { 2921 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 2922 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2923 } 2924 2925 static int btrfs_double_extent_lock(struct inode *inode1, u64 loff1, 2926 struct inode *inode2, u64 loff2, u64 len, 2927 bool retry_range_locking) 2928 { 2929 int ret; 2930 2931 if (inode1 < inode2) { 2932 swap(inode1, inode2); 2933 swap(loff1, loff2); 2934 } 2935 ret = lock_extent_range(inode1, loff1, len, retry_range_locking); 2936 if (ret) 2937 return ret; 2938 ret = lock_extent_range(inode2, loff2, len, retry_range_locking); 2939 if (ret) 2940 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, 2941 loff1 + len - 1); 2942 return ret; 2943 } 2944 2945 struct cmp_pages { 2946 int num_pages; 2947 struct page **src_pages; 2948 struct page **dst_pages; 2949 }; 2950 2951 static void btrfs_cmp_data_free(struct cmp_pages *cmp) 2952 { 2953 int i; 2954 struct page *pg; 2955 2956 for (i = 0; i < cmp->num_pages; i++) { 2957 pg = cmp->src_pages[i]; 2958 if (pg) { 2959 unlock_page(pg); 2960 put_page(pg); 2961 } 2962 pg = cmp->dst_pages[i]; 2963 if (pg) { 2964 unlock_page(pg); 2965 put_page(pg); 2966 } 2967 } 2968 kfree(cmp->src_pages); 2969 kfree(cmp->dst_pages); 2970 } 2971 2972 static int btrfs_cmp_data_prepare(struct inode *src, u64 loff, 2973 struct inode *dst, u64 dst_loff, 2974 u64 len, struct cmp_pages *cmp) 2975 { 2976 int ret; 2977 int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT; 2978 struct page **src_pgarr, **dst_pgarr; 2979 2980 /* 2981 * We must gather up all the pages before we initiate our 2982 * extent locking. We use an array for the page pointers. Size 2983 * of the array is bounded by len, which is in turn bounded by 2984 * BTRFS_MAX_DEDUPE_LEN. 2985 */ 2986 src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 2987 dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL); 2988 if (!src_pgarr || !dst_pgarr) { 2989 kfree(src_pgarr); 2990 kfree(dst_pgarr); 2991 return -ENOMEM; 2992 } 2993 cmp->num_pages = num_pages; 2994 cmp->src_pages = src_pgarr; 2995 cmp->dst_pages = dst_pgarr; 2996 2997 /* 2998 * If deduping ranges in the same inode, locking rules make it mandatory 2999 * to always lock pages in ascending order to avoid deadlocks with 3000 * concurrent tasks (such as starting writeback/delalloc). 3001 */ 3002 if (src == dst && dst_loff < loff) { 3003 swap(src_pgarr, dst_pgarr); 3004 swap(loff, dst_loff); 3005 } 3006 3007 ret = gather_extent_pages(src, src_pgarr, cmp->num_pages, loff); 3008 if (ret) 3009 goto out; 3010 3011 ret = gather_extent_pages(dst, dst_pgarr, cmp->num_pages, dst_loff); 3012 3013 out: 3014 if (ret) 3015 btrfs_cmp_data_free(cmp); 3016 return ret; 3017 } 3018 3019 static int btrfs_cmp_data(u64 len, struct cmp_pages *cmp) 3020 { 3021 int ret = 0; 3022 int i; 3023 struct page *src_page, *dst_page; 3024 unsigned int cmp_len = PAGE_SIZE; 3025 void *addr, *dst_addr; 3026 3027 i = 0; 3028 while (len) { 3029 if (len < PAGE_SIZE) 3030 cmp_len = len; 3031 3032 BUG_ON(i >= cmp->num_pages); 3033 3034 src_page = cmp->src_pages[i]; 3035 dst_page = cmp->dst_pages[i]; 3036 ASSERT(PageLocked(src_page)); 3037 ASSERT(PageLocked(dst_page)); 3038 3039 addr = kmap_atomic(src_page); 3040 dst_addr = kmap_atomic(dst_page); 3041 3042 flush_dcache_page(src_page); 3043 flush_dcache_page(dst_page); 3044 3045 if (memcmp(addr, dst_addr, cmp_len)) 3046 ret = -EBADE; 3047 3048 kunmap_atomic(addr); 3049 kunmap_atomic(dst_addr); 3050 3051 if (ret) 3052 break; 3053 3054 len -= cmp_len; 3055 i++; 3056 } 3057 3058 return ret; 3059 } 3060 3061 static int extent_same_check_offsets(struct inode *inode, u64 off, u64 *plen, 3062 u64 olen) 3063 { 3064 u64 len = *plen; 3065 u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; 3066 3067 if (off + olen > inode->i_size || off + olen < off) 3068 return -EINVAL; 3069 3070 /* if we extend to eof, continue to block boundary */ 3071 if (off + len == inode->i_size) 3072 *plen = len = ALIGN(inode->i_size, bs) - off; 3073 3074 /* Check that we are block aligned - btrfs_clone() requires this */ 3075 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) 3076 return -EINVAL; 3077 3078 return 0; 3079 } 3080 3081 static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen, 3082 struct inode *dst, u64 dst_loff) 3083 { 3084 int ret; 3085 u64 len = olen; 3086 struct cmp_pages cmp; 3087 bool same_inode = (src == dst); 3088 u64 same_lock_start = 0; 3089 u64 same_lock_len = 0; 3090 3091 if (len == 0) 3092 return 0; 3093 3094 if (same_inode) 3095 inode_lock(src); 3096 else 3097 btrfs_double_inode_lock(src, dst); 3098 3099 ret = extent_same_check_offsets(src, loff, &len, olen); 3100 if (ret) 3101 goto out_unlock; 3102 3103 ret = extent_same_check_offsets(dst, dst_loff, &len, olen); 3104 if (ret) 3105 goto out_unlock; 3106 3107 if (same_inode) { 3108 /* 3109 * Single inode case wants the same checks, except we 3110 * don't want our length pushed out past i_size as 3111 * comparing that data range makes no sense. 3112 * 3113 * extent_same_check_offsets() will do this for an 3114 * unaligned length at i_size, so catch it here and 3115 * reject the request. 3116 * 3117 * This effectively means we require aligned extents 3118 * for the single-inode case, whereas the other cases 3119 * allow an unaligned length so long as it ends at 3120 * i_size. 3121 */ 3122 if (len != olen) { 3123 ret = -EINVAL; 3124 goto out_unlock; 3125 } 3126 3127 /* Check for overlapping ranges */ 3128 if (dst_loff + len > loff && dst_loff < loff + len) { 3129 ret = -EINVAL; 3130 goto out_unlock; 3131 } 3132 3133 same_lock_start = min_t(u64, loff, dst_loff); 3134 same_lock_len = max_t(u64, loff, dst_loff) + len - same_lock_start; 3135 } 3136 3137 /* don't make the dst file partly checksummed */ 3138 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3139 (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { 3140 ret = -EINVAL; 3141 goto out_unlock; 3142 } 3143 3144 again: 3145 ret = btrfs_cmp_data_prepare(src, loff, dst, dst_loff, olen, &cmp); 3146 if (ret) 3147 goto out_unlock; 3148 3149 if (same_inode) 3150 ret = lock_extent_range(src, same_lock_start, same_lock_len, 3151 false); 3152 else 3153 ret = btrfs_double_extent_lock(src, loff, dst, dst_loff, len, 3154 false); 3155 /* 3156 * If one of the inodes has dirty pages in the respective range or 3157 * ordered extents, we need to flush dellaloc and wait for all ordered 3158 * extents in the range. We must unlock the pages and the ranges in the 3159 * io trees to avoid deadlocks when flushing delalloc (requires locking 3160 * pages) and when waiting for ordered extents to complete (they require 3161 * range locking). 3162 */ 3163 if (ret == -EAGAIN) { 3164 /* 3165 * Ranges in the io trees already unlocked. Now unlock all 3166 * pages before waiting for all IO to complete. 3167 */ 3168 btrfs_cmp_data_free(&cmp); 3169 if (same_inode) { 3170 btrfs_wait_ordered_range(src, same_lock_start, 3171 same_lock_len); 3172 } else { 3173 btrfs_wait_ordered_range(src, loff, len); 3174 btrfs_wait_ordered_range(dst, dst_loff, len); 3175 } 3176 goto again; 3177 } 3178 ASSERT(ret == 0); 3179 if (WARN_ON(ret)) { 3180 /* ranges in the io trees already unlocked */ 3181 btrfs_cmp_data_free(&cmp); 3182 return ret; 3183 } 3184 3185 /* pass original length for comparison so we stay within i_size */ 3186 ret = btrfs_cmp_data(olen, &cmp); 3187 if (ret == 0) 3188 ret = btrfs_clone(src, dst, loff, olen, len, dst_loff, 1); 3189 3190 if (same_inode) 3191 unlock_extent(&BTRFS_I(src)->io_tree, same_lock_start, 3192 same_lock_start + same_lock_len - 1); 3193 else 3194 btrfs_double_extent_unlock(src, loff, dst, dst_loff, len); 3195 3196 btrfs_cmp_data_free(&cmp); 3197 out_unlock: 3198 if (same_inode) 3199 inode_unlock(src); 3200 else 3201 btrfs_double_inode_unlock(src, dst); 3202 3203 return ret; 3204 } 3205 3206 #define BTRFS_MAX_DEDUPE_LEN SZ_16M 3207 3208 ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen, 3209 struct file *dst_file, u64 dst_loff) 3210 { 3211 struct inode *src = file_inode(src_file); 3212 struct inode *dst = file_inode(dst_file); 3213 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 3214 ssize_t res; 3215 3216 if (olen > BTRFS_MAX_DEDUPE_LEN) 3217 olen = BTRFS_MAX_DEDUPE_LEN; 3218 3219 if (WARN_ON_ONCE(bs < PAGE_SIZE)) { 3220 /* 3221 * Btrfs does not support blocksize < page_size. As a 3222 * result, btrfs_cmp_data() won't correctly handle 3223 * this situation without an update. 3224 */ 3225 return -EINVAL; 3226 } 3227 3228 res = btrfs_extent_same(src, loff, olen, dst, dst_loff); 3229 if (res) 3230 return res; 3231 return olen; 3232 } 3233 3234 static int clone_finish_inode_update(struct btrfs_trans_handle *trans, 3235 struct inode *inode, 3236 u64 endoff, 3237 const u64 destoff, 3238 const u64 olen, 3239 int no_time_update) 3240 { 3241 struct btrfs_root *root = BTRFS_I(inode)->root; 3242 int ret; 3243 3244 inode_inc_iversion(inode); 3245 if (!no_time_update) 3246 inode->i_mtime = inode->i_ctime = current_time(inode); 3247 /* 3248 * We round up to the block size at eof when determining which 3249 * extents to clone above, but shouldn't round up the file size. 3250 */ 3251 if (endoff > destoff + olen) 3252 endoff = destoff + olen; 3253 if (endoff > inode->i_size) 3254 btrfs_i_size_write(BTRFS_I(inode), endoff); 3255 3256 ret = btrfs_update_inode(trans, root, inode); 3257 if (ret) { 3258 btrfs_abort_transaction(trans, ret); 3259 btrfs_end_transaction(trans); 3260 goto out; 3261 } 3262 ret = btrfs_end_transaction(trans); 3263 out: 3264 return ret; 3265 } 3266 3267 static void clone_update_extent_map(struct btrfs_inode *inode, 3268 const struct btrfs_trans_handle *trans, 3269 const struct btrfs_path *path, 3270 const u64 hole_offset, 3271 const u64 hole_len) 3272 { 3273 struct extent_map_tree *em_tree = &inode->extent_tree; 3274 struct extent_map *em; 3275 int ret; 3276 3277 em = alloc_extent_map(); 3278 if (!em) { 3279 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); 3280 return; 3281 } 3282 3283 if (path) { 3284 struct btrfs_file_extent_item *fi; 3285 3286 fi = btrfs_item_ptr(path->nodes[0], path->slots[0], 3287 struct btrfs_file_extent_item); 3288 btrfs_extent_item_to_extent_map(inode, path, fi, false, em); 3289 em->generation = -1; 3290 if (btrfs_file_extent_type(path->nodes[0], fi) == 3291 BTRFS_FILE_EXTENT_INLINE) 3292 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, 3293 &inode->runtime_flags); 3294 } else { 3295 em->start = hole_offset; 3296 em->len = hole_len; 3297 em->ram_bytes = em->len; 3298 em->orig_start = hole_offset; 3299 em->block_start = EXTENT_MAP_HOLE; 3300 em->block_len = 0; 3301 em->orig_block_len = 0; 3302 em->compress_type = BTRFS_COMPRESS_NONE; 3303 em->generation = trans->transid; 3304 } 3305 3306 while (1) { 3307 write_lock(&em_tree->lock); 3308 ret = add_extent_mapping(em_tree, em, 1); 3309 write_unlock(&em_tree->lock); 3310 if (ret != -EEXIST) { 3311 free_extent_map(em); 3312 break; 3313 } 3314 btrfs_drop_extent_cache(inode, em->start, 3315 em->start + em->len - 1, 0); 3316 } 3317 3318 if (ret) 3319 set_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags); 3320 } 3321 3322 /* 3323 * Make sure we do not end up inserting an inline extent into a file that has 3324 * already other (non-inline) extents. If a file has an inline extent it can 3325 * not have any other extents and the (single) inline extent must start at the 3326 * file offset 0. Failing to respect these rules will lead to file corruption, 3327 * resulting in EIO errors on read/write operations, hitting BUG_ON's in mm, etc 3328 * 3329 * We can have extents that have been already written to disk or we can have 3330 * dirty ranges still in delalloc, in which case the extent maps and items are 3331 * created only when we run delalloc, and the delalloc ranges might fall outside 3332 * the range we are currently locking in the inode's io tree. So we check the 3333 * inode's i_size because of that (i_size updates are done while holding the 3334 * i_mutex, which we are holding here). 3335 * We also check to see if the inode has a size not greater than "datal" but has 3336 * extents beyond it, due to an fallocate with FALLOC_FL_KEEP_SIZE (and we are 3337 * protected against such concurrent fallocate calls by the i_mutex). 3338 * 3339 * If the file has no extents but a size greater than datal, do not allow the 3340 * copy because we would need turn the inline extent into a non-inline one (even 3341 * with NO_HOLES enabled). If we find our destination inode only has one inline 3342 * extent, just overwrite it with the source inline extent if its size is less 3343 * than the source extent's size, or we could copy the source inline extent's 3344 * data into the destination inode's inline extent if the later is greater then 3345 * the former. 3346 */ 3347 static int clone_copy_inline_extent(struct inode *dst, 3348 struct btrfs_trans_handle *trans, 3349 struct btrfs_path *path, 3350 struct btrfs_key *new_key, 3351 const u64 drop_start, 3352 const u64 datal, 3353 const u64 skip, 3354 const u64 size, 3355 char *inline_data) 3356 { 3357 struct btrfs_fs_info *fs_info = btrfs_sb(dst->i_sb); 3358 struct btrfs_root *root = BTRFS_I(dst)->root; 3359 const u64 aligned_end = ALIGN(new_key->offset + datal, 3360 fs_info->sectorsize); 3361 int ret; 3362 struct btrfs_key key; 3363 3364 if (new_key->offset > 0) 3365 return -EOPNOTSUPP; 3366 3367 key.objectid = btrfs_ino(BTRFS_I(dst)); 3368 key.type = BTRFS_EXTENT_DATA_KEY; 3369 key.offset = 0; 3370 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3371 if (ret < 0) { 3372 return ret; 3373 } else if (ret > 0) { 3374 if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 3375 ret = btrfs_next_leaf(root, path); 3376 if (ret < 0) 3377 return ret; 3378 else if (ret > 0) 3379 goto copy_inline_extent; 3380 } 3381 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3382 if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 3383 key.type == BTRFS_EXTENT_DATA_KEY) { 3384 ASSERT(key.offset > 0); 3385 return -EOPNOTSUPP; 3386 } 3387 } else if (i_size_read(dst) <= datal) { 3388 struct btrfs_file_extent_item *ei; 3389 u64 ext_len; 3390 3391 /* 3392 * If the file size is <= datal, make sure there are no other 3393 * extents following (can happen do to an fallocate call with 3394 * the flag FALLOC_FL_KEEP_SIZE). 3395 */ 3396 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3397 struct btrfs_file_extent_item); 3398 /* 3399 * If it's an inline extent, it can not have other extents 3400 * following it. 3401 */ 3402 if (btrfs_file_extent_type(path->nodes[0], ei) == 3403 BTRFS_FILE_EXTENT_INLINE) 3404 goto copy_inline_extent; 3405 3406 ext_len = btrfs_file_extent_num_bytes(path->nodes[0], ei); 3407 if (ext_len > aligned_end) 3408 return -EOPNOTSUPP; 3409 3410 ret = btrfs_next_item(root, path); 3411 if (ret < 0) { 3412 return ret; 3413 } else if (ret == 0) { 3414 btrfs_item_key_to_cpu(path->nodes[0], &key, 3415 path->slots[0]); 3416 if (key.objectid == btrfs_ino(BTRFS_I(dst)) && 3417 key.type == BTRFS_EXTENT_DATA_KEY) 3418 return -EOPNOTSUPP; 3419 } 3420 } 3421 3422 copy_inline_extent: 3423 /* 3424 * We have no extent items, or we have an extent at offset 0 which may 3425 * or may not be inlined. All these cases are dealt the same way. 3426 */ 3427 if (i_size_read(dst) > datal) { 3428 /* 3429 * If the destination inode has an inline extent... 3430 * This would require copying the data from the source inline 3431 * extent into the beginning of the destination's inline extent. 3432 * But this is really complex, both extents can be compressed 3433 * or just one of them, which would require decompressing and 3434 * re-compressing data (which could increase the new compressed 3435 * size, not allowing the compressed data to fit anymore in an 3436 * inline extent). 3437 * So just don't support this case for now (it should be rare, 3438 * we are not really saving space when cloning inline extents). 3439 */ 3440 return -EOPNOTSUPP; 3441 } 3442 3443 btrfs_release_path(path); 3444 ret = btrfs_drop_extents(trans, root, dst, drop_start, aligned_end, 1); 3445 if (ret) 3446 return ret; 3447 ret = btrfs_insert_empty_item(trans, root, path, new_key, size); 3448 if (ret) 3449 return ret; 3450 3451 if (skip) { 3452 const u32 start = btrfs_file_extent_calc_inline_size(0); 3453 3454 memmove(inline_data + start, inline_data + start + skip, datal); 3455 } 3456 3457 write_extent_buffer(path->nodes[0], inline_data, 3458 btrfs_item_ptr_offset(path->nodes[0], 3459 path->slots[0]), 3460 size); 3461 inode_add_bytes(dst, datal); 3462 3463 return 0; 3464 } 3465 3466 /** 3467 * btrfs_clone() - clone a range from inode file to another 3468 * 3469 * @src: Inode to clone from 3470 * @inode: Inode to clone to 3471 * @off: Offset within source to start clone from 3472 * @olen: Original length, passed by user, of range to clone 3473 * @olen_aligned: Block-aligned value of olen 3474 * @destoff: Offset within @inode to start clone 3475 * @no_time_update: Whether to update mtime/ctime on the target inode 3476 */ 3477 static int btrfs_clone(struct inode *src, struct inode *inode, 3478 const u64 off, const u64 olen, const u64 olen_aligned, 3479 const u64 destoff, int no_time_update) 3480 { 3481 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3482 struct btrfs_root *root = BTRFS_I(inode)->root; 3483 struct btrfs_path *path = NULL; 3484 struct extent_buffer *leaf; 3485 struct btrfs_trans_handle *trans; 3486 char *buf = NULL; 3487 struct btrfs_key key; 3488 u32 nritems; 3489 int slot; 3490 int ret; 3491 const u64 len = olen_aligned; 3492 u64 last_dest_end = destoff; 3493 3494 ret = -ENOMEM; 3495 buf = kvmalloc(fs_info->nodesize, GFP_KERNEL); 3496 if (!buf) 3497 return ret; 3498 3499 path = btrfs_alloc_path(); 3500 if (!path) { 3501 kvfree(buf); 3502 return ret; 3503 } 3504 3505 path->reada = READA_FORWARD; 3506 /* clone data */ 3507 key.objectid = btrfs_ino(BTRFS_I(src)); 3508 key.type = BTRFS_EXTENT_DATA_KEY; 3509 key.offset = off; 3510 3511 while (1) { 3512 u64 next_key_min_offset = key.offset + 1; 3513 3514 /* 3515 * note the key will change type as we walk through the 3516 * tree. 3517 */ 3518 path->leave_spinning = 1; 3519 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 3520 0, 0); 3521 if (ret < 0) 3522 goto out; 3523 /* 3524 * First search, if no extent item that starts at offset off was 3525 * found but the previous item is an extent item, it's possible 3526 * it might overlap our target range, therefore process it. 3527 */ 3528 if (key.offset == off && ret > 0 && path->slots[0] > 0) { 3529 btrfs_item_key_to_cpu(path->nodes[0], &key, 3530 path->slots[0] - 1); 3531 if (key.type == BTRFS_EXTENT_DATA_KEY) 3532 path->slots[0]--; 3533 } 3534 3535 nritems = btrfs_header_nritems(path->nodes[0]); 3536 process_slot: 3537 if (path->slots[0] >= nritems) { 3538 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 3539 if (ret < 0) 3540 goto out; 3541 if (ret > 0) 3542 break; 3543 nritems = btrfs_header_nritems(path->nodes[0]); 3544 } 3545 leaf = path->nodes[0]; 3546 slot = path->slots[0]; 3547 3548 btrfs_item_key_to_cpu(leaf, &key, slot); 3549 if (key.type > BTRFS_EXTENT_DATA_KEY || 3550 key.objectid != btrfs_ino(BTRFS_I(src))) 3551 break; 3552 3553 if (key.type == BTRFS_EXTENT_DATA_KEY) { 3554 struct btrfs_file_extent_item *extent; 3555 int type; 3556 u32 size; 3557 struct btrfs_key new_key; 3558 u64 disko = 0, diskl = 0; 3559 u64 datao = 0, datal = 0; 3560 u8 comp; 3561 u64 drop_start; 3562 3563 extent = btrfs_item_ptr(leaf, slot, 3564 struct btrfs_file_extent_item); 3565 comp = btrfs_file_extent_compression(leaf, extent); 3566 type = btrfs_file_extent_type(leaf, extent); 3567 if (type == BTRFS_FILE_EXTENT_REG || 3568 type == BTRFS_FILE_EXTENT_PREALLOC) { 3569 disko = btrfs_file_extent_disk_bytenr(leaf, 3570 extent); 3571 diskl = btrfs_file_extent_disk_num_bytes(leaf, 3572 extent); 3573 datao = btrfs_file_extent_offset(leaf, extent); 3574 datal = btrfs_file_extent_num_bytes(leaf, 3575 extent); 3576 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3577 /* take upper bound, may be compressed */ 3578 datal = btrfs_file_extent_ram_bytes(leaf, 3579 extent); 3580 } 3581 3582 /* 3583 * The first search might have left us at an extent 3584 * item that ends before our target range's start, can 3585 * happen if we have holes and NO_HOLES feature enabled. 3586 */ 3587 if (key.offset + datal <= off) { 3588 path->slots[0]++; 3589 goto process_slot; 3590 } else if (key.offset >= off + len) { 3591 break; 3592 } 3593 next_key_min_offset = key.offset + datal; 3594 size = btrfs_item_size_nr(leaf, slot); 3595 read_extent_buffer(leaf, buf, 3596 btrfs_item_ptr_offset(leaf, slot), 3597 size); 3598 3599 btrfs_release_path(path); 3600 path->leave_spinning = 0; 3601 3602 memcpy(&new_key, &key, sizeof(new_key)); 3603 new_key.objectid = btrfs_ino(BTRFS_I(inode)); 3604 if (off <= key.offset) 3605 new_key.offset = key.offset + destoff - off; 3606 else 3607 new_key.offset = destoff; 3608 3609 /* 3610 * Deal with a hole that doesn't have an extent item 3611 * that represents it (NO_HOLES feature enabled). 3612 * This hole is either in the middle of the cloning 3613 * range or at the beginning (fully overlaps it or 3614 * partially overlaps it). 3615 */ 3616 if (new_key.offset != last_dest_end) 3617 drop_start = last_dest_end; 3618 else 3619 drop_start = new_key.offset; 3620 3621 /* 3622 * 1 - adjusting old extent (we may have to split it) 3623 * 1 - add new extent 3624 * 1 - inode update 3625 */ 3626 trans = btrfs_start_transaction(root, 3); 3627 if (IS_ERR(trans)) { 3628 ret = PTR_ERR(trans); 3629 goto out; 3630 } 3631 3632 if (type == BTRFS_FILE_EXTENT_REG || 3633 type == BTRFS_FILE_EXTENT_PREALLOC) { 3634 /* 3635 * a | --- range to clone ---| b 3636 * | ------------- extent ------------- | 3637 */ 3638 3639 /* subtract range b */ 3640 if (key.offset + datal > off + len) 3641 datal = off + len - key.offset; 3642 3643 /* subtract range a */ 3644 if (off > key.offset) { 3645 datao += off - key.offset; 3646 datal -= off - key.offset; 3647 } 3648 3649 ret = btrfs_drop_extents(trans, root, inode, 3650 drop_start, 3651 new_key.offset + datal, 3652 1); 3653 if (ret) { 3654 if (ret != -EOPNOTSUPP) 3655 btrfs_abort_transaction(trans, 3656 ret); 3657 btrfs_end_transaction(trans); 3658 goto out; 3659 } 3660 3661 ret = btrfs_insert_empty_item(trans, root, path, 3662 &new_key, size); 3663 if (ret) { 3664 btrfs_abort_transaction(trans, ret); 3665 btrfs_end_transaction(trans); 3666 goto out; 3667 } 3668 3669 leaf = path->nodes[0]; 3670 slot = path->slots[0]; 3671 write_extent_buffer(leaf, buf, 3672 btrfs_item_ptr_offset(leaf, slot), 3673 size); 3674 3675 extent = btrfs_item_ptr(leaf, slot, 3676 struct btrfs_file_extent_item); 3677 3678 /* disko == 0 means it's a hole */ 3679 if (!disko) 3680 datao = 0; 3681 3682 btrfs_set_file_extent_offset(leaf, extent, 3683 datao); 3684 btrfs_set_file_extent_num_bytes(leaf, extent, 3685 datal); 3686 3687 if (disko) { 3688 inode_add_bytes(inode, datal); 3689 ret = btrfs_inc_extent_ref(trans, 3690 root, 3691 disko, diskl, 0, 3692 root->root_key.objectid, 3693 btrfs_ino(BTRFS_I(inode)), 3694 new_key.offset - datao); 3695 if (ret) { 3696 btrfs_abort_transaction(trans, 3697 ret); 3698 btrfs_end_transaction(trans); 3699 goto out; 3700 3701 } 3702 } 3703 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3704 u64 skip = 0; 3705 u64 trim = 0; 3706 3707 if (off > key.offset) { 3708 skip = off - key.offset; 3709 new_key.offset += skip; 3710 } 3711 3712 if (key.offset + datal > off + len) 3713 trim = key.offset + datal - (off + len); 3714 3715 if (comp && (skip || trim)) { 3716 ret = -EINVAL; 3717 btrfs_end_transaction(trans); 3718 goto out; 3719 } 3720 size -= skip + trim; 3721 datal -= skip + trim; 3722 3723 ret = clone_copy_inline_extent(inode, 3724 trans, path, 3725 &new_key, 3726 drop_start, 3727 datal, 3728 skip, size, buf); 3729 if (ret) { 3730 if (ret != -EOPNOTSUPP) 3731 btrfs_abort_transaction(trans, 3732 ret); 3733 btrfs_end_transaction(trans); 3734 goto out; 3735 } 3736 leaf = path->nodes[0]; 3737 slot = path->slots[0]; 3738 } 3739 3740 /* If we have an implicit hole (NO_HOLES feature). */ 3741 if (drop_start < new_key.offset) 3742 clone_update_extent_map(BTRFS_I(inode), trans, 3743 NULL, drop_start, 3744 new_key.offset - drop_start); 3745 3746 clone_update_extent_map(BTRFS_I(inode), trans, 3747 path, 0, 0); 3748 3749 btrfs_mark_buffer_dirty(leaf); 3750 btrfs_release_path(path); 3751 3752 last_dest_end = ALIGN(new_key.offset + datal, 3753 fs_info->sectorsize); 3754 ret = clone_finish_inode_update(trans, inode, 3755 last_dest_end, 3756 destoff, olen, 3757 no_time_update); 3758 if (ret) 3759 goto out; 3760 if (new_key.offset + datal >= destoff + len) 3761 break; 3762 } 3763 btrfs_release_path(path); 3764 key.offset = next_key_min_offset; 3765 3766 if (fatal_signal_pending(current)) { 3767 ret = -EINTR; 3768 goto out; 3769 } 3770 } 3771 ret = 0; 3772 3773 if (last_dest_end < destoff + len) { 3774 /* 3775 * We have an implicit hole (NO_HOLES feature is enabled) that 3776 * fully or partially overlaps our cloning range at its end. 3777 */ 3778 btrfs_release_path(path); 3779 3780 /* 3781 * 1 - remove extent(s) 3782 * 1 - inode update 3783 */ 3784 trans = btrfs_start_transaction(root, 2); 3785 if (IS_ERR(trans)) { 3786 ret = PTR_ERR(trans); 3787 goto out; 3788 } 3789 ret = btrfs_drop_extents(trans, root, inode, 3790 last_dest_end, destoff + len, 1); 3791 if (ret) { 3792 if (ret != -EOPNOTSUPP) 3793 btrfs_abort_transaction(trans, ret); 3794 btrfs_end_transaction(trans); 3795 goto out; 3796 } 3797 clone_update_extent_map(BTRFS_I(inode), trans, NULL, 3798 last_dest_end, 3799 destoff + len - last_dest_end); 3800 ret = clone_finish_inode_update(trans, inode, destoff + len, 3801 destoff, olen, no_time_update); 3802 } 3803 3804 out: 3805 btrfs_free_path(path); 3806 kvfree(buf); 3807 return ret; 3808 } 3809 3810 static noinline int btrfs_clone_files(struct file *file, struct file *file_src, 3811 u64 off, u64 olen, u64 destoff) 3812 { 3813 struct inode *inode = file_inode(file); 3814 struct inode *src = file_inode(file_src); 3815 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3816 struct btrfs_root *root = BTRFS_I(inode)->root; 3817 int ret; 3818 u64 len = olen; 3819 u64 bs = fs_info->sb->s_blocksize; 3820 int same_inode = src == inode; 3821 3822 /* 3823 * TODO: 3824 * - split compressed inline extents. annoying: we need to 3825 * decompress into destination's address_space (the file offset 3826 * may change, so source mapping won't do), then recompress (or 3827 * otherwise reinsert) a subrange. 3828 * 3829 * - split destination inode's inline extents. The inline extents can 3830 * be either compressed or non-compressed. 3831 */ 3832 3833 if (btrfs_root_readonly(root)) 3834 return -EROFS; 3835 3836 if (file_src->f_path.mnt != file->f_path.mnt || 3837 src->i_sb != inode->i_sb) 3838 return -EXDEV; 3839 3840 /* don't make the dst file partly checksummed */ 3841 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3842 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 3843 return -EINVAL; 3844 3845 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3846 return -EISDIR; 3847 3848 if (!same_inode) { 3849 btrfs_double_inode_lock(src, inode); 3850 } else { 3851 inode_lock(src); 3852 } 3853 3854 /* determine range to clone */ 3855 ret = -EINVAL; 3856 if (off + len > src->i_size || off + len < off) 3857 goto out_unlock; 3858 if (len == 0) 3859 olen = len = src->i_size - off; 3860 /* if we extend to eof, continue to block boundary */ 3861 if (off + len == src->i_size) 3862 len = ALIGN(src->i_size, bs) - off; 3863 3864 if (len == 0) { 3865 ret = 0; 3866 goto out_unlock; 3867 } 3868 3869 /* verify the end result is block aligned */ 3870 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3871 !IS_ALIGNED(destoff, bs)) 3872 goto out_unlock; 3873 3874 /* verify if ranges are overlapped within the same file */ 3875 if (same_inode) { 3876 if (destoff + len > off && destoff < off + len) 3877 goto out_unlock; 3878 } 3879 3880 if (destoff > inode->i_size) { 3881 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3882 if (ret) 3883 goto out_unlock; 3884 } 3885 3886 /* 3887 * Lock the target range too. Right after we replace the file extent 3888 * items in the fs tree (which now point to the cloned data), we might 3889 * have a worker replace them with extent items relative to a write 3890 * operation that was issued before this clone operation (i.e. confront 3891 * with inode.c:btrfs_finish_ordered_io). 3892 */ 3893 if (same_inode) { 3894 u64 lock_start = min_t(u64, off, destoff); 3895 u64 lock_len = max_t(u64, off, destoff) + len - lock_start; 3896 3897 ret = lock_extent_range(src, lock_start, lock_len, true); 3898 } else { 3899 ret = btrfs_double_extent_lock(src, off, inode, destoff, len, 3900 true); 3901 } 3902 ASSERT(ret == 0); 3903 if (WARN_ON(ret)) { 3904 /* ranges in the io trees already unlocked */ 3905 goto out_unlock; 3906 } 3907 3908 ret = btrfs_clone(src, inode, off, olen, len, destoff, 0); 3909 3910 if (same_inode) { 3911 u64 lock_start = min_t(u64, off, destoff); 3912 u64 lock_end = max_t(u64, off, destoff) + len - 1; 3913 3914 unlock_extent(&BTRFS_I(src)->io_tree, lock_start, lock_end); 3915 } else { 3916 btrfs_double_extent_unlock(src, off, inode, destoff, len); 3917 } 3918 /* 3919 * Truncate page cache pages so that future reads will see the cloned 3920 * data immediately and not the previous data. 3921 */ 3922 truncate_inode_pages_range(&inode->i_data, 3923 round_down(destoff, PAGE_SIZE), 3924 round_up(destoff + len, PAGE_SIZE) - 1); 3925 out_unlock: 3926 if (!same_inode) 3927 btrfs_double_inode_unlock(src, inode); 3928 else 3929 inode_unlock(src); 3930 return ret; 3931 } 3932 3933 int btrfs_clone_file_range(struct file *src_file, loff_t off, 3934 struct file *dst_file, loff_t destoff, u64 len) 3935 { 3936 return btrfs_clone_files(dst_file, src_file, off, len, destoff); 3937 } 3938 3939 /* 3940 * there are many ways the trans_start and trans_end ioctls can lead 3941 * to deadlocks. They should only be used by applications that 3942 * basically own the machine, and have a very in depth understanding 3943 * of all the possible deadlocks and enospc problems. 3944 */ 3945 static long btrfs_ioctl_trans_start(struct file *file) 3946 { 3947 struct inode *inode = file_inode(file); 3948 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 3949 struct btrfs_root *root = BTRFS_I(inode)->root; 3950 struct btrfs_trans_handle *trans; 3951 struct btrfs_file_private *private; 3952 int ret; 3953 static bool warned = false; 3954 3955 ret = -EPERM; 3956 if (!capable(CAP_SYS_ADMIN)) 3957 goto out; 3958 3959 if (!warned) { 3960 btrfs_warn(fs_info, 3961 "Userspace transaction mechanism is considered " 3962 "deprecated and slated to be removed in 4.17. " 3963 "If you have a valid use case please " 3964 "speak up on the mailing list"); 3965 WARN_ON(1); 3966 warned = true; 3967 } 3968 3969 ret = -EINPROGRESS; 3970 private = file->private_data; 3971 if (private && private->trans) 3972 goto out; 3973 if (!private) { 3974 private = kzalloc(sizeof(struct btrfs_file_private), 3975 GFP_KERNEL); 3976 if (!private) 3977 return -ENOMEM; 3978 file->private_data = private; 3979 } 3980 3981 ret = -EROFS; 3982 if (btrfs_root_readonly(root)) 3983 goto out; 3984 3985 ret = mnt_want_write_file(file); 3986 if (ret) 3987 goto out; 3988 3989 atomic_inc(&fs_info->open_ioctl_trans); 3990 3991 ret = -ENOMEM; 3992 trans = btrfs_start_ioctl_transaction(root); 3993 if (IS_ERR(trans)) 3994 goto out_drop; 3995 3996 private->trans = trans; 3997 return 0; 3998 3999 out_drop: 4000 atomic_dec(&fs_info->open_ioctl_trans); 4001 mnt_drop_write_file(file); 4002 out: 4003 return ret; 4004 } 4005 4006 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 4007 { 4008 struct inode *inode = file_inode(file); 4009 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4010 struct btrfs_root *root = BTRFS_I(inode)->root; 4011 struct btrfs_root *new_root; 4012 struct btrfs_dir_item *di; 4013 struct btrfs_trans_handle *trans; 4014 struct btrfs_path *path; 4015 struct btrfs_key location; 4016 struct btrfs_disk_key disk_key; 4017 u64 objectid = 0; 4018 u64 dir_id; 4019 int ret; 4020 4021 if (!capable(CAP_SYS_ADMIN)) 4022 return -EPERM; 4023 4024 ret = mnt_want_write_file(file); 4025 if (ret) 4026 return ret; 4027 4028 if (copy_from_user(&objectid, argp, sizeof(objectid))) { 4029 ret = -EFAULT; 4030 goto out; 4031 } 4032 4033 if (!objectid) 4034 objectid = BTRFS_FS_TREE_OBJECTID; 4035 4036 location.objectid = objectid; 4037 location.type = BTRFS_ROOT_ITEM_KEY; 4038 location.offset = (u64)-1; 4039 4040 new_root = btrfs_read_fs_root_no_name(fs_info, &location); 4041 if (IS_ERR(new_root)) { 4042 ret = PTR_ERR(new_root); 4043 goto out; 4044 } 4045 if (!is_fstree(new_root->objectid)) { 4046 ret = -ENOENT; 4047 goto out; 4048 } 4049 4050 path = btrfs_alloc_path(); 4051 if (!path) { 4052 ret = -ENOMEM; 4053 goto out; 4054 } 4055 path->leave_spinning = 1; 4056 4057 trans = btrfs_start_transaction(root, 1); 4058 if (IS_ERR(trans)) { 4059 btrfs_free_path(path); 4060 ret = PTR_ERR(trans); 4061 goto out; 4062 } 4063 4064 dir_id = btrfs_super_root_dir(fs_info->super_copy); 4065 di = btrfs_lookup_dir_item(trans, fs_info->tree_root, path, 4066 dir_id, "default", 7, 1); 4067 if (IS_ERR_OR_NULL(di)) { 4068 btrfs_free_path(path); 4069 btrfs_end_transaction(trans); 4070 btrfs_err(fs_info, 4071 "Umm, you don't have the default diritem, this isn't going to work"); 4072 ret = -ENOENT; 4073 goto out; 4074 } 4075 4076 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 4077 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 4078 btrfs_mark_buffer_dirty(path->nodes[0]); 4079 btrfs_free_path(path); 4080 4081 btrfs_set_fs_incompat(fs_info, DEFAULT_SUBVOL); 4082 btrfs_end_transaction(trans); 4083 out: 4084 mnt_drop_write_file(file); 4085 return ret; 4086 } 4087 4088 void btrfs_get_block_group_info(struct list_head *groups_list, 4089 struct btrfs_ioctl_space_info *space) 4090 { 4091 struct btrfs_block_group_cache *block_group; 4092 4093 space->total_bytes = 0; 4094 space->used_bytes = 0; 4095 space->flags = 0; 4096 list_for_each_entry(block_group, groups_list, list) { 4097 space->flags = block_group->flags; 4098 space->total_bytes += block_group->key.offset; 4099 space->used_bytes += 4100 btrfs_block_group_used(&block_group->item); 4101 } 4102 } 4103 4104 static long btrfs_ioctl_space_info(struct btrfs_fs_info *fs_info, 4105 void __user *arg) 4106 { 4107 struct btrfs_ioctl_space_args space_args; 4108 struct btrfs_ioctl_space_info space; 4109 struct btrfs_ioctl_space_info *dest; 4110 struct btrfs_ioctl_space_info *dest_orig; 4111 struct btrfs_ioctl_space_info __user *user_dest; 4112 struct btrfs_space_info *info; 4113 static const u64 types[] = { 4114 BTRFS_BLOCK_GROUP_DATA, 4115 BTRFS_BLOCK_GROUP_SYSTEM, 4116 BTRFS_BLOCK_GROUP_METADATA, 4117 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA 4118 }; 4119 int num_types = 4; 4120 int alloc_size; 4121 int ret = 0; 4122 u64 slot_count = 0; 4123 int i, c; 4124 4125 if (copy_from_user(&space_args, 4126 (struct btrfs_ioctl_space_args __user *)arg, 4127 sizeof(space_args))) 4128 return -EFAULT; 4129 4130 for (i = 0; i < num_types; i++) { 4131 struct btrfs_space_info *tmp; 4132 4133 info = NULL; 4134 rcu_read_lock(); 4135 list_for_each_entry_rcu(tmp, &fs_info->space_info, 4136 list) { 4137 if (tmp->flags == types[i]) { 4138 info = tmp; 4139 break; 4140 } 4141 } 4142 rcu_read_unlock(); 4143 4144 if (!info) 4145 continue; 4146 4147 down_read(&info->groups_sem); 4148 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 4149 if (!list_empty(&info->block_groups[c])) 4150 slot_count++; 4151 } 4152 up_read(&info->groups_sem); 4153 } 4154 4155 /* 4156 * Global block reserve, exported as a space_info 4157 */ 4158 slot_count++; 4159 4160 /* space_slots == 0 means they are asking for a count */ 4161 if (space_args.space_slots == 0) { 4162 space_args.total_spaces = slot_count; 4163 goto out; 4164 } 4165 4166 slot_count = min_t(u64, space_args.space_slots, slot_count); 4167 4168 alloc_size = sizeof(*dest) * slot_count; 4169 4170 /* we generally have at most 6 or so space infos, one for each raid 4171 * level. So, a whole page should be more than enough for everyone 4172 */ 4173 if (alloc_size > PAGE_SIZE) 4174 return -ENOMEM; 4175 4176 space_args.total_spaces = 0; 4177 dest = kmalloc(alloc_size, GFP_KERNEL); 4178 if (!dest) 4179 return -ENOMEM; 4180 dest_orig = dest; 4181 4182 /* now we have a buffer to copy into */ 4183 for (i = 0; i < num_types; i++) { 4184 struct btrfs_space_info *tmp; 4185 4186 if (!slot_count) 4187 break; 4188 4189 info = NULL; 4190 rcu_read_lock(); 4191 list_for_each_entry_rcu(tmp, &fs_info->space_info, 4192 list) { 4193 if (tmp->flags == types[i]) { 4194 info = tmp; 4195 break; 4196 } 4197 } 4198 rcu_read_unlock(); 4199 4200 if (!info) 4201 continue; 4202 down_read(&info->groups_sem); 4203 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 4204 if (!list_empty(&info->block_groups[c])) { 4205 btrfs_get_block_group_info( 4206 &info->block_groups[c], &space); 4207 memcpy(dest, &space, sizeof(space)); 4208 dest++; 4209 space_args.total_spaces++; 4210 slot_count--; 4211 } 4212 if (!slot_count) 4213 break; 4214 } 4215 up_read(&info->groups_sem); 4216 } 4217 4218 /* 4219 * Add global block reserve 4220 */ 4221 if (slot_count) { 4222 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 4223 4224 spin_lock(&block_rsv->lock); 4225 space.total_bytes = block_rsv->size; 4226 space.used_bytes = block_rsv->size - block_rsv->reserved; 4227 spin_unlock(&block_rsv->lock); 4228 space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; 4229 memcpy(dest, &space, sizeof(space)); 4230 space_args.total_spaces++; 4231 } 4232 4233 user_dest = (struct btrfs_ioctl_space_info __user *) 4234 (arg + sizeof(struct btrfs_ioctl_space_args)); 4235 4236 if (copy_to_user(user_dest, dest_orig, alloc_size)) 4237 ret = -EFAULT; 4238 4239 kfree(dest_orig); 4240 out: 4241 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 4242 ret = -EFAULT; 4243 4244 return ret; 4245 } 4246 4247 /* 4248 * there are many ways the trans_start and trans_end ioctls can lead 4249 * to deadlocks. They should only be used by applications that 4250 * basically own the machine, and have a very in depth understanding 4251 * of all the possible deadlocks and enospc problems. 4252 */ 4253 long btrfs_ioctl_trans_end(struct file *file) 4254 { 4255 struct inode *inode = file_inode(file); 4256 struct btrfs_root *root = BTRFS_I(inode)->root; 4257 struct btrfs_file_private *private = file->private_data; 4258 4259 if (!private || !private->trans) 4260 return -EINVAL; 4261 4262 btrfs_end_transaction(private->trans); 4263 private->trans = NULL; 4264 4265 atomic_dec(&root->fs_info->open_ioctl_trans); 4266 4267 mnt_drop_write_file(file); 4268 return 0; 4269 } 4270 4271 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, 4272 void __user *argp) 4273 { 4274 struct btrfs_trans_handle *trans; 4275 u64 transid; 4276 int ret; 4277 4278 trans = btrfs_attach_transaction_barrier(root); 4279 if (IS_ERR(trans)) { 4280 if (PTR_ERR(trans) != -ENOENT) 4281 return PTR_ERR(trans); 4282 4283 /* No running transaction, don't bother */ 4284 transid = root->fs_info->last_trans_committed; 4285 goto out; 4286 } 4287 transid = trans->transid; 4288 ret = btrfs_commit_transaction_async(trans, 0); 4289 if (ret) { 4290 btrfs_end_transaction(trans); 4291 return ret; 4292 } 4293 out: 4294 if (argp) 4295 if (copy_to_user(argp, &transid, sizeof(transid))) 4296 return -EFAULT; 4297 return 0; 4298 } 4299 4300 static noinline long btrfs_ioctl_wait_sync(struct btrfs_fs_info *fs_info, 4301 void __user *argp) 4302 { 4303 u64 transid; 4304 4305 if (argp) { 4306 if (copy_from_user(&transid, argp, sizeof(transid))) 4307 return -EFAULT; 4308 } else { 4309 transid = 0; /* current trans */ 4310 } 4311 return btrfs_wait_for_commit(fs_info, transid); 4312 } 4313 4314 static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 4315 { 4316 struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); 4317 struct btrfs_ioctl_scrub_args *sa; 4318 int ret; 4319 4320 if (!capable(CAP_SYS_ADMIN)) 4321 return -EPERM; 4322 4323 sa = memdup_user(arg, sizeof(*sa)); 4324 if (IS_ERR(sa)) 4325 return PTR_ERR(sa); 4326 4327 if (!(sa->flags & BTRFS_SCRUB_READONLY)) { 4328 ret = mnt_want_write_file(file); 4329 if (ret) 4330 goto out; 4331 } 4332 4333 ret = btrfs_scrub_dev(fs_info, sa->devid, sa->start, sa->end, 4334 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 4335 0); 4336 4337 if (copy_to_user(arg, sa, sizeof(*sa))) 4338 ret = -EFAULT; 4339 4340 if (!(sa->flags & BTRFS_SCRUB_READONLY)) 4341 mnt_drop_write_file(file); 4342 out: 4343 kfree(sa); 4344 return ret; 4345 } 4346 4347 static long btrfs_ioctl_scrub_cancel(struct btrfs_fs_info *fs_info) 4348 { 4349 if (!capable(CAP_SYS_ADMIN)) 4350 return -EPERM; 4351 4352 return btrfs_scrub_cancel(fs_info); 4353 } 4354 4355 static long btrfs_ioctl_scrub_progress(struct btrfs_fs_info *fs_info, 4356 void __user *arg) 4357 { 4358 struct btrfs_ioctl_scrub_args *sa; 4359 int ret; 4360 4361 if (!capable(CAP_SYS_ADMIN)) 4362 return -EPERM; 4363 4364 sa = memdup_user(arg, sizeof(*sa)); 4365 if (IS_ERR(sa)) 4366 return PTR_ERR(sa); 4367 4368 ret = btrfs_scrub_progress(fs_info, sa->devid, &sa->progress); 4369 4370 if (copy_to_user(arg, sa, sizeof(*sa))) 4371 ret = -EFAULT; 4372 4373 kfree(sa); 4374 return ret; 4375 } 4376 4377 static long btrfs_ioctl_get_dev_stats(struct btrfs_fs_info *fs_info, 4378 void __user *arg) 4379 { 4380 struct btrfs_ioctl_get_dev_stats *sa; 4381 int ret; 4382 4383 sa = memdup_user(arg, sizeof(*sa)); 4384 if (IS_ERR(sa)) 4385 return PTR_ERR(sa); 4386 4387 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 4388 kfree(sa); 4389 return -EPERM; 4390 } 4391 4392 ret = btrfs_get_dev_stats(fs_info, sa); 4393 4394 if (copy_to_user(arg, sa, sizeof(*sa))) 4395 ret = -EFAULT; 4396 4397 kfree(sa); 4398 return ret; 4399 } 4400 4401 static long btrfs_ioctl_dev_replace(struct btrfs_fs_info *fs_info, 4402 void __user *arg) 4403 { 4404 struct btrfs_ioctl_dev_replace_args *p; 4405 int ret; 4406 4407 if (!capable(CAP_SYS_ADMIN)) 4408 return -EPERM; 4409 4410 p = memdup_user(arg, sizeof(*p)); 4411 if (IS_ERR(p)) 4412 return PTR_ERR(p); 4413 4414 switch (p->cmd) { 4415 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 4416 if (sb_rdonly(fs_info->sb)) { 4417 ret = -EROFS; 4418 goto out; 4419 } 4420 if (test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 4421 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4422 } else { 4423 ret = btrfs_dev_replace_by_ioctl(fs_info, p); 4424 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 4425 } 4426 break; 4427 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: 4428 btrfs_dev_replace_status(fs_info, p); 4429 ret = 0; 4430 break; 4431 case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: 4432 ret = btrfs_dev_replace_cancel(fs_info, p); 4433 break; 4434 default: 4435 ret = -EINVAL; 4436 break; 4437 } 4438 4439 if (copy_to_user(arg, p, sizeof(*p))) 4440 ret = -EFAULT; 4441 out: 4442 kfree(p); 4443 return ret; 4444 } 4445 4446 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 4447 { 4448 int ret = 0; 4449 int i; 4450 u64 rel_ptr; 4451 int size; 4452 struct btrfs_ioctl_ino_path_args *ipa = NULL; 4453 struct inode_fs_paths *ipath = NULL; 4454 struct btrfs_path *path; 4455 4456 if (!capable(CAP_DAC_READ_SEARCH)) 4457 return -EPERM; 4458 4459 path = btrfs_alloc_path(); 4460 if (!path) { 4461 ret = -ENOMEM; 4462 goto out; 4463 } 4464 4465 ipa = memdup_user(arg, sizeof(*ipa)); 4466 if (IS_ERR(ipa)) { 4467 ret = PTR_ERR(ipa); 4468 ipa = NULL; 4469 goto out; 4470 } 4471 4472 size = min_t(u32, ipa->size, 4096); 4473 ipath = init_ipath(size, root, path); 4474 if (IS_ERR(ipath)) { 4475 ret = PTR_ERR(ipath); 4476 ipath = NULL; 4477 goto out; 4478 } 4479 4480 ret = paths_from_inode(ipa->inum, ipath); 4481 if (ret < 0) 4482 goto out; 4483 4484 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 4485 rel_ptr = ipath->fspath->val[i] - 4486 (u64)(unsigned long)ipath->fspath->val; 4487 ipath->fspath->val[i] = rel_ptr; 4488 } 4489 4490 ret = copy_to_user((void __user *)(unsigned long)ipa->fspath, 4491 ipath->fspath, size); 4492 if (ret) { 4493 ret = -EFAULT; 4494 goto out; 4495 } 4496 4497 out: 4498 btrfs_free_path(path); 4499 free_ipath(ipath); 4500 kfree(ipa); 4501 4502 return ret; 4503 } 4504 4505 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 4506 { 4507 struct btrfs_data_container *inodes = ctx; 4508 const size_t c = 3 * sizeof(u64); 4509 4510 if (inodes->bytes_left >= c) { 4511 inodes->bytes_left -= c; 4512 inodes->val[inodes->elem_cnt] = inum; 4513 inodes->val[inodes->elem_cnt + 1] = offset; 4514 inodes->val[inodes->elem_cnt + 2] = root; 4515 inodes->elem_cnt += 3; 4516 } else { 4517 inodes->bytes_missing += c - inodes->bytes_left; 4518 inodes->bytes_left = 0; 4519 inodes->elem_missed += 3; 4520 } 4521 4522 return 0; 4523 } 4524 4525 static long btrfs_ioctl_logical_to_ino(struct btrfs_fs_info *fs_info, 4526 void __user *arg, int version) 4527 { 4528 int ret = 0; 4529 int size; 4530 struct btrfs_ioctl_logical_ino_args *loi; 4531 struct btrfs_data_container *inodes = NULL; 4532 struct btrfs_path *path = NULL; 4533 bool ignore_offset; 4534 4535 if (!capable(CAP_SYS_ADMIN)) 4536 return -EPERM; 4537 4538 loi = memdup_user(arg, sizeof(*loi)); 4539 if (IS_ERR(loi)) 4540 return PTR_ERR(loi); 4541 4542 if (version == 1) { 4543 ignore_offset = false; 4544 size = min_t(u32, loi->size, SZ_64K); 4545 } else { 4546 /* All reserved bits must be 0 for now */ 4547 if (memchr_inv(loi->reserved, 0, sizeof(loi->reserved))) { 4548 ret = -EINVAL; 4549 goto out_loi; 4550 } 4551 /* Only accept flags we have defined so far */ 4552 if (loi->flags & ~(BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET)) { 4553 ret = -EINVAL; 4554 goto out_loi; 4555 } 4556 ignore_offset = loi->flags & BTRFS_LOGICAL_INO_ARGS_IGNORE_OFFSET; 4557 size = min_t(u32, loi->size, SZ_16M); 4558 } 4559 4560 path = btrfs_alloc_path(); 4561 if (!path) { 4562 ret = -ENOMEM; 4563 goto out; 4564 } 4565 4566 inodes = init_data_container(size); 4567 if (IS_ERR(inodes)) { 4568 ret = PTR_ERR(inodes); 4569 inodes = NULL; 4570 goto out; 4571 } 4572 4573 ret = iterate_inodes_from_logical(loi->logical, fs_info, path, 4574 build_ino_list, inodes, ignore_offset); 4575 if (ret == -EINVAL) 4576 ret = -ENOENT; 4577 if (ret < 0) 4578 goto out; 4579 4580 ret = copy_to_user((void __user *)(unsigned long)loi->inodes, inodes, 4581 size); 4582 if (ret) 4583 ret = -EFAULT; 4584 4585 out: 4586 btrfs_free_path(path); 4587 kvfree(inodes); 4588 out_loi: 4589 kfree(loi); 4590 4591 return ret; 4592 } 4593 4594 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 4595 struct btrfs_ioctl_balance_args *bargs) 4596 { 4597 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 4598 4599 bargs->flags = bctl->flags; 4600 4601 if (atomic_read(&fs_info->balance_running)) 4602 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 4603 if (atomic_read(&fs_info->balance_pause_req)) 4604 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 4605 if (atomic_read(&fs_info->balance_cancel_req)) 4606 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 4607 4608 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 4609 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 4610 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 4611 4612 if (lock) { 4613 spin_lock(&fs_info->balance_lock); 4614 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 4615 spin_unlock(&fs_info->balance_lock); 4616 } else { 4617 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 4618 } 4619 } 4620 4621 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 4622 { 4623 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4624 struct btrfs_fs_info *fs_info = root->fs_info; 4625 struct btrfs_ioctl_balance_args *bargs; 4626 struct btrfs_balance_control *bctl; 4627 bool need_unlock; /* for mut. excl. ops lock */ 4628 int ret; 4629 4630 if (!capable(CAP_SYS_ADMIN)) 4631 return -EPERM; 4632 4633 ret = mnt_want_write_file(file); 4634 if (ret) 4635 return ret; 4636 4637 again: 4638 if (!test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)) { 4639 mutex_lock(&fs_info->volume_mutex); 4640 mutex_lock(&fs_info->balance_mutex); 4641 need_unlock = true; 4642 goto locked; 4643 } 4644 4645 /* 4646 * mut. excl. ops lock is locked. Three possibilities: 4647 * (1) some other op is running 4648 * (2) balance is running 4649 * (3) balance is paused -- special case (think resume) 4650 */ 4651 mutex_lock(&fs_info->balance_mutex); 4652 if (fs_info->balance_ctl) { 4653 /* this is either (2) or (3) */ 4654 if (!atomic_read(&fs_info->balance_running)) { 4655 mutex_unlock(&fs_info->balance_mutex); 4656 if (!mutex_trylock(&fs_info->volume_mutex)) 4657 goto again; 4658 mutex_lock(&fs_info->balance_mutex); 4659 4660 if (fs_info->balance_ctl && 4661 !atomic_read(&fs_info->balance_running)) { 4662 /* this is (3) */ 4663 need_unlock = false; 4664 goto locked; 4665 } 4666 4667 mutex_unlock(&fs_info->balance_mutex); 4668 mutex_unlock(&fs_info->volume_mutex); 4669 goto again; 4670 } else { 4671 /* this is (2) */ 4672 mutex_unlock(&fs_info->balance_mutex); 4673 ret = -EINPROGRESS; 4674 goto out; 4675 } 4676 } else { 4677 /* this is (1) */ 4678 mutex_unlock(&fs_info->balance_mutex); 4679 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4680 goto out; 4681 } 4682 4683 locked: 4684 BUG_ON(!test_bit(BTRFS_FS_EXCL_OP, &fs_info->flags)); 4685 4686 if (arg) { 4687 bargs = memdup_user(arg, sizeof(*bargs)); 4688 if (IS_ERR(bargs)) { 4689 ret = PTR_ERR(bargs); 4690 goto out_unlock; 4691 } 4692 4693 if (bargs->flags & BTRFS_BALANCE_RESUME) { 4694 if (!fs_info->balance_ctl) { 4695 ret = -ENOTCONN; 4696 goto out_bargs; 4697 } 4698 4699 bctl = fs_info->balance_ctl; 4700 spin_lock(&fs_info->balance_lock); 4701 bctl->flags |= BTRFS_BALANCE_RESUME; 4702 spin_unlock(&fs_info->balance_lock); 4703 4704 goto do_balance; 4705 } 4706 } else { 4707 bargs = NULL; 4708 } 4709 4710 if (fs_info->balance_ctl) { 4711 ret = -EINPROGRESS; 4712 goto out_bargs; 4713 } 4714 4715 bctl = kzalloc(sizeof(*bctl), GFP_KERNEL); 4716 if (!bctl) { 4717 ret = -ENOMEM; 4718 goto out_bargs; 4719 } 4720 4721 bctl->fs_info = fs_info; 4722 if (arg) { 4723 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 4724 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 4725 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 4726 4727 bctl->flags = bargs->flags; 4728 } else { 4729 /* balance everything - no filters */ 4730 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 4731 } 4732 4733 if (bctl->flags & ~(BTRFS_BALANCE_ARGS_MASK | BTRFS_BALANCE_TYPE_MASK)) { 4734 ret = -EINVAL; 4735 goto out_bctl; 4736 } 4737 4738 do_balance: 4739 /* 4740 * Ownership of bctl and filesystem flag BTRFS_FS_EXCL_OP 4741 * goes to to btrfs_balance. bctl is freed in __cancel_balance, 4742 * or, if restriper was paused all the way until unmount, in 4743 * free_fs_info. The flag is cleared in __cancel_balance. 4744 */ 4745 need_unlock = false; 4746 4747 ret = btrfs_balance(bctl, bargs); 4748 bctl = NULL; 4749 4750 if (arg) { 4751 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4752 ret = -EFAULT; 4753 } 4754 4755 out_bctl: 4756 kfree(bctl); 4757 out_bargs: 4758 kfree(bargs); 4759 out_unlock: 4760 mutex_unlock(&fs_info->balance_mutex); 4761 mutex_unlock(&fs_info->volume_mutex); 4762 if (need_unlock) 4763 clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags); 4764 out: 4765 mnt_drop_write_file(file); 4766 return ret; 4767 } 4768 4769 static long btrfs_ioctl_balance_ctl(struct btrfs_fs_info *fs_info, int cmd) 4770 { 4771 if (!capable(CAP_SYS_ADMIN)) 4772 return -EPERM; 4773 4774 switch (cmd) { 4775 case BTRFS_BALANCE_CTL_PAUSE: 4776 return btrfs_pause_balance(fs_info); 4777 case BTRFS_BALANCE_CTL_CANCEL: 4778 return btrfs_cancel_balance(fs_info); 4779 } 4780 4781 return -EINVAL; 4782 } 4783 4784 static long btrfs_ioctl_balance_progress(struct btrfs_fs_info *fs_info, 4785 void __user *arg) 4786 { 4787 struct btrfs_ioctl_balance_args *bargs; 4788 int ret = 0; 4789 4790 if (!capable(CAP_SYS_ADMIN)) 4791 return -EPERM; 4792 4793 mutex_lock(&fs_info->balance_mutex); 4794 if (!fs_info->balance_ctl) { 4795 ret = -ENOTCONN; 4796 goto out; 4797 } 4798 4799 bargs = kzalloc(sizeof(*bargs), GFP_KERNEL); 4800 if (!bargs) { 4801 ret = -ENOMEM; 4802 goto out; 4803 } 4804 4805 update_ioctl_balance_args(fs_info, 1, bargs); 4806 4807 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4808 ret = -EFAULT; 4809 4810 kfree(bargs); 4811 out: 4812 mutex_unlock(&fs_info->balance_mutex); 4813 return ret; 4814 } 4815 4816 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 4817 { 4818 struct inode *inode = file_inode(file); 4819 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4820 struct btrfs_ioctl_quota_ctl_args *sa; 4821 struct btrfs_trans_handle *trans = NULL; 4822 int ret; 4823 int err; 4824 4825 if (!capable(CAP_SYS_ADMIN)) 4826 return -EPERM; 4827 4828 ret = mnt_want_write_file(file); 4829 if (ret) 4830 return ret; 4831 4832 sa = memdup_user(arg, sizeof(*sa)); 4833 if (IS_ERR(sa)) { 4834 ret = PTR_ERR(sa); 4835 goto drop_write; 4836 } 4837 4838 down_write(&fs_info->subvol_sem); 4839 trans = btrfs_start_transaction(fs_info->tree_root, 2); 4840 if (IS_ERR(trans)) { 4841 ret = PTR_ERR(trans); 4842 goto out; 4843 } 4844 4845 switch (sa->cmd) { 4846 case BTRFS_QUOTA_CTL_ENABLE: 4847 ret = btrfs_quota_enable(trans, fs_info); 4848 break; 4849 case BTRFS_QUOTA_CTL_DISABLE: 4850 ret = btrfs_quota_disable(trans, fs_info); 4851 break; 4852 default: 4853 ret = -EINVAL; 4854 break; 4855 } 4856 4857 err = btrfs_commit_transaction(trans); 4858 if (err && !ret) 4859 ret = err; 4860 out: 4861 kfree(sa); 4862 up_write(&fs_info->subvol_sem); 4863 drop_write: 4864 mnt_drop_write_file(file); 4865 return ret; 4866 } 4867 4868 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 4869 { 4870 struct inode *inode = file_inode(file); 4871 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4872 struct btrfs_root *root = BTRFS_I(inode)->root; 4873 struct btrfs_ioctl_qgroup_assign_args *sa; 4874 struct btrfs_trans_handle *trans; 4875 int ret; 4876 int err; 4877 4878 if (!capable(CAP_SYS_ADMIN)) 4879 return -EPERM; 4880 4881 ret = mnt_want_write_file(file); 4882 if (ret) 4883 return ret; 4884 4885 sa = memdup_user(arg, sizeof(*sa)); 4886 if (IS_ERR(sa)) { 4887 ret = PTR_ERR(sa); 4888 goto drop_write; 4889 } 4890 4891 trans = btrfs_join_transaction(root); 4892 if (IS_ERR(trans)) { 4893 ret = PTR_ERR(trans); 4894 goto out; 4895 } 4896 4897 if (sa->assign) { 4898 ret = btrfs_add_qgroup_relation(trans, fs_info, 4899 sa->src, sa->dst); 4900 } else { 4901 ret = btrfs_del_qgroup_relation(trans, fs_info, 4902 sa->src, sa->dst); 4903 } 4904 4905 /* update qgroup status and info */ 4906 err = btrfs_run_qgroups(trans, fs_info); 4907 if (err < 0) 4908 btrfs_handle_fs_error(fs_info, err, 4909 "failed to update qgroup status and info"); 4910 err = btrfs_end_transaction(trans); 4911 if (err && !ret) 4912 ret = err; 4913 4914 out: 4915 kfree(sa); 4916 drop_write: 4917 mnt_drop_write_file(file); 4918 return ret; 4919 } 4920 4921 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 4922 { 4923 struct inode *inode = file_inode(file); 4924 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4925 struct btrfs_root *root = BTRFS_I(inode)->root; 4926 struct btrfs_ioctl_qgroup_create_args *sa; 4927 struct btrfs_trans_handle *trans; 4928 int ret; 4929 int err; 4930 4931 if (!capable(CAP_SYS_ADMIN)) 4932 return -EPERM; 4933 4934 ret = mnt_want_write_file(file); 4935 if (ret) 4936 return ret; 4937 4938 sa = memdup_user(arg, sizeof(*sa)); 4939 if (IS_ERR(sa)) { 4940 ret = PTR_ERR(sa); 4941 goto drop_write; 4942 } 4943 4944 if (!sa->qgroupid) { 4945 ret = -EINVAL; 4946 goto out; 4947 } 4948 4949 trans = btrfs_join_transaction(root); 4950 if (IS_ERR(trans)) { 4951 ret = PTR_ERR(trans); 4952 goto out; 4953 } 4954 4955 if (sa->create) { 4956 ret = btrfs_create_qgroup(trans, fs_info, sa->qgroupid); 4957 } else { 4958 ret = btrfs_remove_qgroup(trans, fs_info, sa->qgroupid); 4959 } 4960 4961 err = btrfs_end_transaction(trans); 4962 if (err && !ret) 4963 ret = err; 4964 4965 out: 4966 kfree(sa); 4967 drop_write: 4968 mnt_drop_write_file(file); 4969 return ret; 4970 } 4971 4972 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 4973 { 4974 struct inode *inode = file_inode(file); 4975 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 4976 struct btrfs_root *root = BTRFS_I(inode)->root; 4977 struct btrfs_ioctl_qgroup_limit_args *sa; 4978 struct btrfs_trans_handle *trans; 4979 int ret; 4980 int err; 4981 u64 qgroupid; 4982 4983 if (!capable(CAP_SYS_ADMIN)) 4984 return -EPERM; 4985 4986 ret = mnt_want_write_file(file); 4987 if (ret) 4988 return ret; 4989 4990 sa = memdup_user(arg, sizeof(*sa)); 4991 if (IS_ERR(sa)) { 4992 ret = PTR_ERR(sa); 4993 goto drop_write; 4994 } 4995 4996 trans = btrfs_join_transaction(root); 4997 if (IS_ERR(trans)) { 4998 ret = PTR_ERR(trans); 4999 goto out; 5000 } 5001 5002 qgroupid = sa->qgroupid; 5003 if (!qgroupid) { 5004 /* take the current subvol as qgroup */ 5005 qgroupid = root->root_key.objectid; 5006 } 5007 5008 ret = btrfs_limit_qgroup(trans, fs_info, qgroupid, &sa->lim); 5009 5010 err = btrfs_end_transaction(trans); 5011 if (err && !ret) 5012 ret = err; 5013 5014 out: 5015 kfree(sa); 5016 drop_write: 5017 mnt_drop_write_file(file); 5018 return ret; 5019 } 5020 5021 static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) 5022 { 5023 struct inode *inode = file_inode(file); 5024 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5025 struct btrfs_ioctl_quota_rescan_args *qsa; 5026 int ret; 5027 5028 if (!capable(CAP_SYS_ADMIN)) 5029 return -EPERM; 5030 5031 ret = mnt_want_write_file(file); 5032 if (ret) 5033 return ret; 5034 5035 qsa = memdup_user(arg, sizeof(*qsa)); 5036 if (IS_ERR(qsa)) { 5037 ret = PTR_ERR(qsa); 5038 goto drop_write; 5039 } 5040 5041 if (qsa->flags) { 5042 ret = -EINVAL; 5043 goto out; 5044 } 5045 5046 ret = btrfs_qgroup_rescan(fs_info); 5047 5048 out: 5049 kfree(qsa); 5050 drop_write: 5051 mnt_drop_write_file(file); 5052 return ret; 5053 } 5054 5055 static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) 5056 { 5057 struct inode *inode = file_inode(file); 5058 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5059 struct btrfs_ioctl_quota_rescan_args *qsa; 5060 int ret = 0; 5061 5062 if (!capable(CAP_SYS_ADMIN)) 5063 return -EPERM; 5064 5065 qsa = kzalloc(sizeof(*qsa), GFP_KERNEL); 5066 if (!qsa) 5067 return -ENOMEM; 5068 5069 if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 5070 qsa->flags = 1; 5071 qsa->progress = fs_info->qgroup_rescan_progress.objectid; 5072 } 5073 5074 if (copy_to_user(arg, qsa, sizeof(*qsa))) 5075 ret = -EFAULT; 5076 5077 kfree(qsa); 5078 return ret; 5079 } 5080 5081 static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) 5082 { 5083 struct inode *inode = file_inode(file); 5084 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5085 5086 if (!capable(CAP_SYS_ADMIN)) 5087 return -EPERM; 5088 5089 return btrfs_qgroup_wait_for_completion(fs_info, true); 5090 } 5091 5092 static long _btrfs_ioctl_set_received_subvol(struct file *file, 5093 struct btrfs_ioctl_received_subvol_args *sa) 5094 { 5095 struct inode *inode = file_inode(file); 5096 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5097 struct btrfs_root *root = BTRFS_I(inode)->root; 5098 struct btrfs_root_item *root_item = &root->root_item; 5099 struct btrfs_trans_handle *trans; 5100 struct timespec ct = current_time(inode); 5101 int ret = 0; 5102 int received_uuid_changed; 5103 5104 if (!inode_owner_or_capable(inode)) 5105 return -EPERM; 5106 5107 ret = mnt_want_write_file(file); 5108 if (ret < 0) 5109 return ret; 5110 5111 down_write(&fs_info->subvol_sem); 5112 5113 if (btrfs_ino(BTRFS_I(inode)) != BTRFS_FIRST_FREE_OBJECTID) { 5114 ret = -EINVAL; 5115 goto out; 5116 } 5117 5118 if (btrfs_root_readonly(root)) { 5119 ret = -EROFS; 5120 goto out; 5121 } 5122 5123 /* 5124 * 1 - root item 5125 * 2 - uuid items (received uuid + subvol uuid) 5126 */ 5127 trans = btrfs_start_transaction(root, 3); 5128 if (IS_ERR(trans)) { 5129 ret = PTR_ERR(trans); 5130 trans = NULL; 5131 goto out; 5132 } 5133 5134 sa->rtransid = trans->transid; 5135 sa->rtime.sec = ct.tv_sec; 5136 sa->rtime.nsec = ct.tv_nsec; 5137 5138 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, 5139 BTRFS_UUID_SIZE); 5140 if (received_uuid_changed && 5141 !btrfs_is_empty_uuid(root_item->received_uuid)) 5142 btrfs_uuid_tree_rem(trans, fs_info, root_item->received_uuid, 5143 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 5144 root->root_key.objectid); 5145 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 5146 btrfs_set_root_stransid(root_item, sa->stransid); 5147 btrfs_set_root_rtransid(root_item, sa->rtransid); 5148 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); 5149 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); 5150 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); 5151 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); 5152 5153 ret = btrfs_update_root(trans, fs_info->tree_root, 5154 &root->root_key, &root->root_item); 5155 if (ret < 0) { 5156 btrfs_end_transaction(trans); 5157 goto out; 5158 } 5159 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { 5160 ret = btrfs_uuid_tree_add(trans, fs_info, sa->uuid, 5161 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 5162 root->root_key.objectid); 5163 if (ret < 0 && ret != -EEXIST) { 5164 btrfs_abort_transaction(trans, ret); 5165 btrfs_end_transaction(trans); 5166 goto out; 5167 } 5168 } 5169 ret = btrfs_commit_transaction(trans); 5170 out: 5171 up_write(&fs_info->subvol_sem); 5172 mnt_drop_write_file(file); 5173 return ret; 5174 } 5175 5176 #ifdef CONFIG_64BIT 5177 static long btrfs_ioctl_set_received_subvol_32(struct file *file, 5178 void __user *arg) 5179 { 5180 struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; 5181 struct btrfs_ioctl_received_subvol_args *args64 = NULL; 5182 int ret = 0; 5183 5184 args32 = memdup_user(arg, sizeof(*args32)); 5185 if (IS_ERR(args32)) 5186 return PTR_ERR(args32); 5187 5188 args64 = kmalloc(sizeof(*args64), GFP_KERNEL); 5189 if (!args64) { 5190 ret = -ENOMEM; 5191 goto out; 5192 } 5193 5194 memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); 5195 args64->stransid = args32->stransid; 5196 args64->rtransid = args32->rtransid; 5197 args64->stime.sec = args32->stime.sec; 5198 args64->stime.nsec = args32->stime.nsec; 5199 args64->rtime.sec = args32->rtime.sec; 5200 args64->rtime.nsec = args32->rtime.nsec; 5201 args64->flags = args32->flags; 5202 5203 ret = _btrfs_ioctl_set_received_subvol(file, args64); 5204 if (ret) 5205 goto out; 5206 5207 memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); 5208 args32->stransid = args64->stransid; 5209 args32->rtransid = args64->rtransid; 5210 args32->stime.sec = args64->stime.sec; 5211 args32->stime.nsec = args64->stime.nsec; 5212 args32->rtime.sec = args64->rtime.sec; 5213 args32->rtime.nsec = args64->rtime.nsec; 5214 args32->flags = args64->flags; 5215 5216 ret = copy_to_user(arg, args32, sizeof(*args32)); 5217 if (ret) 5218 ret = -EFAULT; 5219 5220 out: 5221 kfree(args32); 5222 kfree(args64); 5223 return ret; 5224 } 5225 #endif 5226 5227 static long btrfs_ioctl_set_received_subvol(struct file *file, 5228 void __user *arg) 5229 { 5230 struct btrfs_ioctl_received_subvol_args *sa = NULL; 5231 int ret = 0; 5232 5233 sa = memdup_user(arg, sizeof(*sa)); 5234 if (IS_ERR(sa)) 5235 return PTR_ERR(sa); 5236 5237 ret = _btrfs_ioctl_set_received_subvol(file, sa); 5238 5239 if (ret) 5240 goto out; 5241 5242 ret = copy_to_user(arg, sa, sizeof(*sa)); 5243 if (ret) 5244 ret = -EFAULT; 5245 5246 out: 5247 kfree(sa); 5248 return ret; 5249 } 5250 5251 static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 5252 { 5253 struct inode *inode = file_inode(file); 5254 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5255 size_t len; 5256 int ret; 5257 char label[BTRFS_LABEL_SIZE]; 5258 5259 spin_lock(&fs_info->super_lock); 5260 memcpy(label, fs_info->super_copy->label, BTRFS_LABEL_SIZE); 5261 spin_unlock(&fs_info->super_lock); 5262 5263 len = strnlen(label, BTRFS_LABEL_SIZE); 5264 5265 if (len == BTRFS_LABEL_SIZE) { 5266 btrfs_warn(fs_info, 5267 "label is too long, return the first %zu bytes", 5268 --len); 5269 } 5270 5271 ret = copy_to_user(arg, label, len); 5272 5273 return ret ? -EFAULT : 0; 5274 } 5275 5276 static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) 5277 { 5278 struct inode *inode = file_inode(file); 5279 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5280 struct btrfs_root *root = BTRFS_I(inode)->root; 5281 struct btrfs_super_block *super_block = fs_info->super_copy; 5282 struct btrfs_trans_handle *trans; 5283 char label[BTRFS_LABEL_SIZE]; 5284 int ret; 5285 5286 if (!capable(CAP_SYS_ADMIN)) 5287 return -EPERM; 5288 5289 if (copy_from_user(label, arg, sizeof(label))) 5290 return -EFAULT; 5291 5292 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 5293 btrfs_err(fs_info, 5294 "unable to set label with more than %d bytes", 5295 BTRFS_LABEL_SIZE - 1); 5296 return -EINVAL; 5297 } 5298 5299 ret = mnt_want_write_file(file); 5300 if (ret) 5301 return ret; 5302 5303 trans = btrfs_start_transaction(root, 0); 5304 if (IS_ERR(trans)) { 5305 ret = PTR_ERR(trans); 5306 goto out_unlock; 5307 } 5308 5309 spin_lock(&fs_info->super_lock); 5310 strcpy(super_block->label, label); 5311 spin_unlock(&fs_info->super_lock); 5312 ret = btrfs_commit_transaction(trans); 5313 5314 out_unlock: 5315 mnt_drop_write_file(file); 5316 return ret; 5317 } 5318 5319 #define INIT_FEATURE_FLAGS(suffix) \ 5320 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ 5321 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ 5322 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } 5323 5324 int btrfs_ioctl_get_supported_features(void __user *arg) 5325 { 5326 static const struct btrfs_ioctl_feature_flags features[3] = { 5327 INIT_FEATURE_FLAGS(SUPP), 5328 INIT_FEATURE_FLAGS(SAFE_SET), 5329 INIT_FEATURE_FLAGS(SAFE_CLEAR) 5330 }; 5331 5332 if (copy_to_user(arg, &features, sizeof(features))) 5333 return -EFAULT; 5334 5335 return 0; 5336 } 5337 5338 static int btrfs_ioctl_get_features(struct file *file, void __user *arg) 5339 { 5340 struct inode *inode = file_inode(file); 5341 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5342 struct btrfs_super_block *super_block = fs_info->super_copy; 5343 struct btrfs_ioctl_feature_flags features; 5344 5345 features.compat_flags = btrfs_super_compat_flags(super_block); 5346 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); 5347 features.incompat_flags = btrfs_super_incompat_flags(super_block); 5348 5349 if (copy_to_user(arg, &features, sizeof(features))) 5350 return -EFAULT; 5351 5352 return 0; 5353 } 5354 5355 static int check_feature_bits(struct btrfs_fs_info *fs_info, 5356 enum btrfs_feature_set set, 5357 u64 change_mask, u64 flags, u64 supported_flags, 5358 u64 safe_set, u64 safe_clear) 5359 { 5360 const char *type = btrfs_feature_set_names[set]; 5361 char *names; 5362 u64 disallowed, unsupported; 5363 u64 set_mask = flags & change_mask; 5364 u64 clear_mask = ~flags & change_mask; 5365 5366 unsupported = set_mask & ~supported_flags; 5367 if (unsupported) { 5368 names = btrfs_printable_features(set, unsupported); 5369 if (names) { 5370 btrfs_warn(fs_info, 5371 "this kernel does not support the %s feature bit%s", 5372 names, strchr(names, ',') ? "s" : ""); 5373 kfree(names); 5374 } else 5375 btrfs_warn(fs_info, 5376 "this kernel does not support %s bits 0x%llx", 5377 type, unsupported); 5378 return -EOPNOTSUPP; 5379 } 5380 5381 disallowed = set_mask & ~safe_set; 5382 if (disallowed) { 5383 names = btrfs_printable_features(set, disallowed); 5384 if (names) { 5385 btrfs_warn(fs_info, 5386 "can't set the %s feature bit%s while mounted", 5387 names, strchr(names, ',') ? "s" : ""); 5388 kfree(names); 5389 } else 5390 btrfs_warn(fs_info, 5391 "can't set %s bits 0x%llx while mounted", 5392 type, disallowed); 5393 return -EPERM; 5394 } 5395 5396 disallowed = clear_mask & ~safe_clear; 5397 if (disallowed) { 5398 names = btrfs_printable_features(set, disallowed); 5399 if (names) { 5400 btrfs_warn(fs_info, 5401 "can't clear the %s feature bit%s while mounted", 5402 names, strchr(names, ',') ? "s" : ""); 5403 kfree(names); 5404 } else 5405 btrfs_warn(fs_info, 5406 "can't clear %s bits 0x%llx while mounted", 5407 type, disallowed); 5408 return -EPERM; 5409 } 5410 5411 return 0; 5412 } 5413 5414 #define check_feature(fs_info, change_mask, flags, mask_base) \ 5415 check_feature_bits(fs_info, FEAT_##mask_base, change_mask, flags, \ 5416 BTRFS_FEATURE_ ## mask_base ## _SUPP, \ 5417 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ 5418 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) 5419 5420 static int btrfs_ioctl_set_features(struct file *file, void __user *arg) 5421 { 5422 struct inode *inode = file_inode(file); 5423 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5424 struct btrfs_root *root = BTRFS_I(inode)->root; 5425 struct btrfs_super_block *super_block = fs_info->super_copy; 5426 struct btrfs_ioctl_feature_flags flags[2]; 5427 struct btrfs_trans_handle *trans; 5428 u64 newflags; 5429 int ret; 5430 5431 if (!capable(CAP_SYS_ADMIN)) 5432 return -EPERM; 5433 5434 if (copy_from_user(flags, arg, sizeof(flags))) 5435 return -EFAULT; 5436 5437 /* Nothing to do */ 5438 if (!flags[0].compat_flags && !flags[0].compat_ro_flags && 5439 !flags[0].incompat_flags) 5440 return 0; 5441 5442 ret = check_feature(fs_info, flags[0].compat_flags, 5443 flags[1].compat_flags, COMPAT); 5444 if (ret) 5445 return ret; 5446 5447 ret = check_feature(fs_info, flags[0].compat_ro_flags, 5448 flags[1].compat_ro_flags, COMPAT_RO); 5449 if (ret) 5450 return ret; 5451 5452 ret = check_feature(fs_info, flags[0].incompat_flags, 5453 flags[1].incompat_flags, INCOMPAT); 5454 if (ret) 5455 return ret; 5456 5457 ret = mnt_want_write_file(file); 5458 if (ret) 5459 return ret; 5460 5461 trans = btrfs_start_transaction(root, 0); 5462 if (IS_ERR(trans)) { 5463 ret = PTR_ERR(trans); 5464 goto out_drop_write; 5465 } 5466 5467 spin_lock(&fs_info->super_lock); 5468 newflags = btrfs_super_compat_flags(super_block); 5469 newflags |= flags[0].compat_flags & flags[1].compat_flags; 5470 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); 5471 btrfs_set_super_compat_flags(super_block, newflags); 5472 5473 newflags = btrfs_super_compat_ro_flags(super_block); 5474 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; 5475 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); 5476 btrfs_set_super_compat_ro_flags(super_block, newflags); 5477 5478 newflags = btrfs_super_incompat_flags(super_block); 5479 newflags |= flags[0].incompat_flags & flags[1].incompat_flags; 5480 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); 5481 btrfs_set_super_incompat_flags(super_block, newflags); 5482 spin_unlock(&fs_info->super_lock); 5483 5484 ret = btrfs_commit_transaction(trans); 5485 out_drop_write: 5486 mnt_drop_write_file(file); 5487 5488 return ret; 5489 } 5490 5491 static int _btrfs_ioctl_send(struct file *file, void __user *argp, bool compat) 5492 { 5493 struct btrfs_ioctl_send_args *arg; 5494 int ret; 5495 5496 if (compat) { 5497 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 5498 struct btrfs_ioctl_send_args_32 args32; 5499 5500 ret = copy_from_user(&args32, argp, sizeof(args32)); 5501 if (ret) 5502 return -EFAULT; 5503 arg = kzalloc(sizeof(*arg), GFP_KERNEL); 5504 if (!arg) 5505 return -ENOMEM; 5506 arg->send_fd = args32.send_fd; 5507 arg->clone_sources_count = args32.clone_sources_count; 5508 arg->clone_sources = compat_ptr(args32.clone_sources); 5509 arg->parent_root = args32.parent_root; 5510 arg->flags = args32.flags; 5511 memcpy(arg->reserved, args32.reserved, 5512 sizeof(args32.reserved)); 5513 #else 5514 return -ENOTTY; 5515 #endif 5516 } else { 5517 arg = memdup_user(argp, sizeof(*arg)); 5518 if (IS_ERR(arg)) 5519 return PTR_ERR(arg); 5520 } 5521 ret = btrfs_ioctl_send(file, arg); 5522 kfree(arg); 5523 return ret; 5524 } 5525 5526 long btrfs_ioctl(struct file *file, unsigned int 5527 cmd, unsigned long arg) 5528 { 5529 struct inode *inode = file_inode(file); 5530 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 5531 struct btrfs_root *root = BTRFS_I(inode)->root; 5532 void __user *argp = (void __user *)arg; 5533 5534 switch (cmd) { 5535 case FS_IOC_GETFLAGS: 5536 return btrfs_ioctl_getflags(file, argp); 5537 case FS_IOC_SETFLAGS: 5538 return btrfs_ioctl_setflags(file, argp); 5539 case FS_IOC_GETVERSION: 5540 return btrfs_ioctl_getversion(file, argp); 5541 case FITRIM: 5542 return btrfs_ioctl_fitrim(file, argp); 5543 case BTRFS_IOC_SNAP_CREATE: 5544 return btrfs_ioctl_snap_create(file, argp, 0); 5545 case BTRFS_IOC_SNAP_CREATE_V2: 5546 return btrfs_ioctl_snap_create_v2(file, argp, 0); 5547 case BTRFS_IOC_SUBVOL_CREATE: 5548 return btrfs_ioctl_snap_create(file, argp, 1); 5549 case BTRFS_IOC_SUBVOL_CREATE_V2: 5550 return btrfs_ioctl_snap_create_v2(file, argp, 1); 5551 case BTRFS_IOC_SNAP_DESTROY: 5552 return btrfs_ioctl_snap_destroy(file, argp); 5553 case BTRFS_IOC_SUBVOL_GETFLAGS: 5554 return btrfs_ioctl_subvol_getflags(file, argp); 5555 case BTRFS_IOC_SUBVOL_SETFLAGS: 5556 return btrfs_ioctl_subvol_setflags(file, argp); 5557 case BTRFS_IOC_DEFAULT_SUBVOL: 5558 return btrfs_ioctl_default_subvol(file, argp); 5559 case BTRFS_IOC_DEFRAG: 5560 return btrfs_ioctl_defrag(file, NULL); 5561 case BTRFS_IOC_DEFRAG_RANGE: 5562 return btrfs_ioctl_defrag(file, argp); 5563 case BTRFS_IOC_RESIZE: 5564 return btrfs_ioctl_resize(file, argp); 5565 case BTRFS_IOC_ADD_DEV: 5566 return btrfs_ioctl_add_dev(fs_info, argp); 5567 case BTRFS_IOC_RM_DEV: 5568 return btrfs_ioctl_rm_dev(file, argp); 5569 case BTRFS_IOC_RM_DEV_V2: 5570 return btrfs_ioctl_rm_dev_v2(file, argp); 5571 case BTRFS_IOC_FS_INFO: 5572 return btrfs_ioctl_fs_info(fs_info, argp); 5573 case BTRFS_IOC_DEV_INFO: 5574 return btrfs_ioctl_dev_info(fs_info, argp); 5575 case BTRFS_IOC_BALANCE: 5576 return btrfs_ioctl_balance(file, NULL); 5577 case BTRFS_IOC_TRANS_START: 5578 return btrfs_ioctl_trans_start(file); 5579 case BTRFS_IOC_TRANS_END: 5580 return btrfs_ioctl_trans_end(file); 5581 case BTRFS_IOC_TREE_SEARCH: 5582 return btrfs_ioctl_tree_search(file, argp); 5583 case BTRFS_IOC_TREE_SEARCH_V2: 5584 return btrfs_ioctl_tree_search_v2(file, argp); 5585 case BTRFS_IOC_INO_LOOKUP: 5586 return btrfs_ioctl_ino_lookup(file, argp); 5587 case BTRFS_IOC_INO_PATHS: 5588 return btrfs_ioctl_ino_to_path(root, argp); 5589 case BTRFS_IOC_LOGICAL_INO: 5590 return btrfs_ioctl_logical_to_ino(fs_info, argp, 1); 5591 case BTRFS_IOC_LOGICAL_INO_V2: 5592 return btrfs_ioctl_logical_to_ino(fs_info, argp, 2); 5593 case BTRFS_IOC_SPACE_INFO: 5594 return btrfs_ioctl_space_info(fs_info, argp); 5595 case BTRFS_IOC_SYNC: { 5596 int ret; 5597 5598 ret = btrfs_start_delalloc_roots(fs_info, 0, -1); 5599 if (ret) 5600 return ret; 5601 ret = btrfs_sync_fs(inode->i_sb, 1); 5602 /* 5603 * The transaction thread may want to do more work, 5604 * namely it pokes the cleaner kthread that will start 5605 * processing uncleaned subvols. 5606 */ 5607 wake_up_process(fs_info->transaction_kthread); 5608 return ret; 5609 } 5610 case BTRFS_IOC_START_SYNC: 5611 return btrfs_ioctl_start_sync(root, argp); 5612 case BTRFS_IOC_WAIT_SYNC: 5613 return btrfs_ioctl_wait_sync(fs_info, argp); 5614 case BTRFS_IOC_SCRUB: 5615 return btrfs_ioctl_scrub(file, argp); 5616 case BTRFS_IOC_SCRUB_CANCEL: 5617 return btrfs_ioctl_scrub_cancel(fs_info); 5618 case BTRFS_IOC_SCRUB_PROGRESS: 5619 return btrfs_ioctl_scrub_progress(fs_info, argp); 5620 case BTRFS_IOC_BALANCE_V2: 5621 return btrfs_ioctl_balance(file, argp); 5622 case BTRFS_IOC_BALANCE_CTL: 5623 return btrfs_ioctl_balance_ctl(fs_info, arg); 5624 case BTRFS_IOC_BALANCE_PROGRESS: 5625 return btrfs_ioctl_balance_progress(fs_info, argp); 5626 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 5627 return btrfs_ioctl_set_received_subvol(file, argp); 5628 #ifdef CONFIG_64BIT 5629 case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: 5630 return btrfs_ioctl_set_received_subvol_32(file, argp); 5631 #endif 5632 case BTRFS_IOC_SEND: 5633 return _btrfs_ioctl_send(file, argp, false); 5634 #if defined(CONFIG_64BIT) && defined(CONFIG_COMPAT) 5635 case BTRFS_IOC_SEND_32: 5636 return _btrfs_ioctl_send(file, argp, true); 5637 #endif 5638 case BTRFS_IOC_GET_DEV_STATS: 5639 return btrfs_ioctl_get_dev_stats(fs_info, argp); 5640 case BTRFS_IOC_QUOTA_CTL: 5641 return btrfs_ioctl_quota_ctl(file, argp); 5642 case BTRFS_IOC_QGROUP_ASSIGN: 5643 return btrfs_ioctl_qgroup_assign(file, argp); 5644 case BTRFS_IOC_QGROUP_CREATE: 5645 return btrfs_ioctl_qgroup_create(file, argp); 5646 case BTRFS_IOC_QGROUP_LIMIT: 5647 return btrfs_ioctl_qgroup_limit(file, argp); 5648 case BTRFS_IOC_QUOTA_RESCAN: 5649 return btrfs_ioctl_quota_rescan(file, argp); 5650 case BTRFS_IOC_QUOTA_RESCAN_STATUS: 5651 return btrfs_ioctl_quota_rescan_status(file, argp); 5652 case BTRFS_IOC_QUOTA_RESCAN_WAIT: 5653 return btrfs_ioctl_quota_rescan_wait(file, argp); 5654 case BTRFS_IOC_DEV_REPLACE: 5655 return btrfs_ioctl_dev_replace(fs_info, argp); 5656 case BTRFS_IOC_GET_FSLABEL: 5657 return btrfs_ioctl_get_fslabel(file, argp); 5658 case BTRFS_IOC_SET_FSLABEL: 5659 return btrfs_ioctl_set_fslabel(file, argp); 5660 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 5661 return btrfs_ioctl_get_supported_features(argp); 5662 case BTRFS_IOC_GET_FEATURES: 5663 return btrfs_ioctl_get_features(file, argp); 5664 case BTRFS_IOC_SET_FEATURES: 5665 return btrfs_ioctl_set_features(file, argp); 5666 } 5667 5668 return -ENOTTY; 5669 } 5670 5671 #ifdef CONFIG_COMPAT 5672 long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 5673 { 5674 /* 5675 * These all access 32-bit values anyway so no further 5676 * handling is necessary. 5677 */ 5678 switch (cmd) { 5679 case FS_IOC32_GETFLAGS: 5680 cmd = FS_IOC_GETFLAGS; 5681 break; 5682 case FS_IOC32_SETFLAGS: 5683 cmd = FS_IOC_SETFLAGS; 5684 break; 5685 case FS_IOC32_GETVERSION: 5686 cmd = FS_IOC_GETVERSION; 5687 break; 5688 } 5689 5690 return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg)); 5691 } 5692 #endif 5693