1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/statfs.h> 37 #include <linux/compat.h> 38 #include <linux/bit_spinlock.h> 39 #include <linux/security.h> 40 #include <linux/xattr.h> 41 #include <linux/vmalloc.h> 42 #include <linux/slab.h> 43 #include <linux/blkdev.h> 44 #include <linux/uuid.h> 45 #include <linux/btrfs.h> 46 #include <linux/uaccess.h> 47 #include "ctree.h" 48 #include "disk-io.h" 49 #include "transaction.h" 50 #include "btrfs_inode.h" 51 #include "print-tree.h" 52 #include "volumes.h" 53 #include "locking.h" 54 #include "inode-map.h" 55 #include "backref.h" 56 #include "rcu-string.h" 57 #include "send.h" 58 #include "dev-replace.h" 59 #include "props.h" 60 #include "sysfs.h" 61 62 #ifdef CONFIG_64BIT 63 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 64 * structures are incorrect, as the timespec structure from userspace 65 * is 4 bytes too small. We define these alternatives here to teach 66 * the kernel about the 32-bit struct packing. 67 */ 68 struct btrfs_ioctl_timespec_32 { 69 __u64 sec; 70 __u32 nsec; 71 } __attribute__ ((__packed__)); 72 73 struct btrfs_ioctl_received_subvol_args_32 { 74 char uuid[BTRFS_UUID_SIZE]; /* in */ 75 __u64 stransid; /* in */ 76 __u64 rtransid; /* out */ 77 struct btrfs_ioctl_timespec_32 stime; /* in */ 78 struct btrfs_ioctl_timespec_32 rtime; /* out */ 79 __u64 flags; /* in */ 80 __u64 reserved[16]; /* in */ 81 } __attribute__ ((__packed__)); 82 83 #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ 84 struct btrfs_ioctl_received_subvol_args_32) 85 #endif 86 87 88 static int btrfs_clone(struct inode *src, struct inode *inode, 89 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 90 91 /* Mask out flags that are inappropriate for the given type of inode. */ 92 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 93 { 94 if (S_ISDIR(mode)) 95 return flags; 96 else if (S_ISREG(mode)) 97 return flags & ~FS_DIRSYNC_FL; 98 else 99 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 100 } 101 102 /* 103 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 104 */ 105 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 106 { 107 unsigned int iflags = 0; 108 109 if (flags & BTRFS_INODE_SYNC) 110 iflags |= FS_SYNC_FL; 111 if (flags & BTRFS_INODE_IMMUTABLE) 112 iflags |= FS_IMMUTABLE_FL; 113 if (flags & BTRFS_INODE_APPEND) 114 iflags |= FS_APPEND_FL; 115 if (flags & BTRFS_INODE_NODUMP) 116 iflags |= FS_NODUMP_FL; 117 if (flags & BTRFS_INODE_NOATIME) 118 iflags |= FS_NOATIME_FL; 119 if (flags & BTRFS_INODE_DIRSYNC) 120 iflags |= FS_DIRSYNC_FL; 121 if (flags & BTRFS_INODE_NODATACOW) 122 iflags |= FS_NOCOW_FL; 123 124 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) 125 iflags |= FS_COMPR_FL; 126 else if (flags & BTRFS_INODE_NOCOMPRESS) 127 iflags |= FS_NOCOMP_FL; 128 129 return iflags; 130 } 131 132 /* 133 * Update inode->i_flags based on the btrfs internal flags. 134 */ 135 void btrfs_update_iflags(struct inode *inode) 136 { 137 struct btrfs_inode *ip = BTRFS_I(inode); 138 139 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 140 141 if (ip->flags & BTRFS_INODE_SYNC) 142 inode->i_flags |= S_SYNC; 143 if (ip->flags & BTRFS_INODE_IMMUTABLE) 144 inode->i_flags |= S_IMMUTABLE; 145 if (ip->flags & BTRFS_INODE_APPEND) 146 inode->i_flags |= S_APPEND; 147 if (ip->flags & BTRFS_INODE_NOATIME) 148 inode->i_flags |= S_NOATIME; 149 if (ip->flags & BTRFS_INODE_DIRSYNC) 150 inode->i_flags |= S_DIRSYNC; 151 } 152 153 /* 154 * Inherit flags from the parent inode. 155 * 156 * Currently only the compression flags and the cow flags are inherited. 157 */ 158 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 159 { 160 unsigned int flags; 161 162 if (!dir) 163 return; 164 165 flags = BTRFS_I(dir)->flags; 166 167 if (flags & BTRFS_INODE_NOCOMPRESS) { 168 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 169 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 170 } else if (flags & BTRFS_INODE_COMPRESS) { 171 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 172 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 173 } 174 175 if (flags & BTRFS_INODE_NODATACOW) { 176 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 177 if (S_ISREG(inode->i_mode)) 178 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 179 } 180 181 btrfs_update_iflags(inode); 182 } 183 184 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 185 { 186 struct btrfs_inode *ip = BTRFS_I(file_inode(file)); 187 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 188 189 if (copy_to_user(arg, &flags, sizeof(flags))) 190 return -EFAULT; 191 return 0; 192 } 193 194 static int check_flags(unsigned int flags) 195 { 196 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 197 FS_NOATIME_FL | FS_NODUMP_FL | \ 198 FS_SYNC_FL | FS_DIRSYNC_FL | \ 199 FS_NOCOMP_FL | FS_COMPR_FL | 200 FS_NOCOW_FL)) 201 return -EOPNOTSUPP; 202 203 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 204 return -EINVAL; 205 206 return 0; 207 } 208 209 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 210 { 211 struct inode *inode = file_inode(file); 212 struct btrfs_inode *ip = BTRFS_I(inode); 213 struct btrfs_root *root = ip->root; 214 struct btrfs_trans_handle *trans; 215 unsigned int flags, oldflags; 216 int ret; 217 u64 ip_oldflags; 218 unsigned int i_oldflags; 219 umode_t mode; 220 221 if (!inode_owner_or_capable(inode)) 222 return -EPERM; 223 224 if (btrfs_root_readonly(root)) 225 return -EROFS; 226 227 if (copy_from_user(&flags, arg, sizeof(flags))) 228 return -EFAULT; 229 230 ret = check_flags(flags); 231 if (ret) 232 return ret; 233 234 ret = mnt_want_write_file(file); 235 if (ret) 236 return ret; 237 238 mutex_lock(&inode->i_mutex); 239 240 ip_oldflags = ip->flags; 241 i_oldflags = inode->i_flags; 242 mode = inode->i_mode; 243 244 flags = btrfs_mask_flags(inode->i_mode, flags); 245 oldflags = btrfs_flags_to_ioctl(ip->flags); 246 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 247 if (!capable(CAP_LINUX_IMMUTABLE)) { 248 ret = -EPERM; 249 goto out_unlock; 250 } 251 } 252 253 if (flags & FS_SYNC_FL) 254 ip->flags |= BTRFS_INODE_SYNC; 255 else 256 ip->flags &= ~BTRFS_INODE_SYNC; 257 if (flags & FS_IMMUTABLE_FL) 258 ip->flags |= BTRFS_INODE_IMMUTABLE; 259 else 260 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 261 if (flags & FS_APPEND_FL) 262 ip->flags |= BTRFS_INODE_APPEND; 263 else 264 ip->flags &= ~BTRFS_INODE_APPEND; 265 if (flags & FS_NODUMP_FL) 266 ip->flags |= BTRFS_INODE_NODUMP; 267 else 268 ip->flags &= ~BTRFS_INODE_NODUMP; 269 if (flags & FS_NOATIME_FL) 270 ip->flags |= BTRFS_INODE_NOATIME; 271 else 272 ip->flags &= ~BTRFS_INODE_NOATIME; 273 if (flags & FS_DIRSYNC_FL) 274 ip->flags |= BTRFS_INODE_DIRSYNC; 275 else 276 ip->flags &= ~BTRFS_INODE_DIRSYNC; 277 if (flags & FS_NOCOW_FL) { 278 if (S_ISREG(mode)) { 279 /* 280 * It's safe to turn csums off here, no extents exist. 281 * Otherwise we want the flag to reflect the real COW 282 * status of the file and will not set it. 283 */ 284 if (inode->i_size == 0) 285 ip->flags |= BTRFS_INODE_NODATACOW 286 | BTRFS_INODE_NODATASUM; 287 } else { 288 ip->flags |= BTRFS_INODE_NODATACOW; 289 } 290 } else { 291 /* 292 * Revert back under same assuptions as above 293 */ 294 if (S_ISREG(mode)) { 295 if (inode->i_size == 0) 296 ip->flags &= ~(BTRFS_INODE_NODATACOW 297 | BTRFS_INODE_NODATASUM); 298 } else { 299 ip->flags &= ~BTRFS_INODE_NODATACOW; 300 } 301 } 302 303 /* 304 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 305 * flag may be changed automatically if compression code won't make 306 * things smaller. 307 */ 308 if (flags & FS_NOCOMP_FL) { 309 ip->flags &= ~BTRFS_INODE_COMPRESS; 310 ip->flags |= BTRFS_INODE_NOCOMPRESS; 311 312 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 313 if (ret && ret != -ENODATA) 314 goto out_drop; 315 } else if (flags & FS_COMPR_FL) { 316 const char *comp; 317 318 ip->flags |= BTRFS_INODE_COMPRESS; 319 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 320 321 if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO) 322 comp = "lzo"; 323 else 324 comp = "zlib"; 325 ret = btrfs_set_prop(inode, "btrfs.compression", 326 comp, strlen(comp), 0); 327 if (ret) 328 goto out_drop; 329 330 } else { 331 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 332 } 333 334 trans = btrfs_start_transaction(root, 1); 335 if (IS_ERR(trans)) { 336 ret = PTR_ERR(trans); 337 goto out_drop; 338 } 339 340 btrfs_update_iflags(inode); 341 inode_inc_iversion(inode); 342 inode->i_ctime = CURRENT_TIME; 343 ret = btrfs_update_inode(trans, root, inode); 344 345 btrfs_end_transaction(trans, root); 346 out_drop: 347 if (ret) { 348 ip->flags = ip_oldflags; 349 inode->i_flags = i_oldflags; 350 } 351 352 out_unlock: 353 mutex_unlock(&inode->i_mutex); 354 mnt_drop_write_file(file); 355 return ret; 356 } 357 358 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 359 { 360 struct inode *inode = file_inode(file); 361 362 return put_user(inode->i_generation, arg); 363 } 364 365 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 366 { 367 struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); 368 struct btrfs_device *device; 369 struct request_queue *q; 370 struct fstrim_range range; 371 u64 minlen = ULLONG_MAX; 372 u64 num_devices = 0; 373 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); 374 int ret; 375 376 if (!capable(CAP_SYS_ADMIN)) 377 return -EPERM; 378 379 rcu_read_lock(); 380 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 381 dev_list) { 382 if (!device->bdev) 383 continue; 384 q = bdev_get_queue(device->bdev); 385 if (blk_queue_discard(q)) { 386 num_devices++; 387 minlen = min((u64)q->limits.discard_granularity, 388 minlen); 389 } 390 } 391 rcu_read_unlock(); 392 393 if (!num_devices) 394 return -EOPNOTSUPP; 395 if (copy_from_user(&range, arg, sizeof(range))) 396 return -EFAULT; 397 if (range.start > total_bytes || 398 range.len < fs_info->sb->s_blocksize) 399 return -EINVAL; 400 401 range.len = min(range.len, total_bytes - range.start); 402 range.minlen = max(range.minlen, minlen); 403 ret = btrfs_trim_fs(fs_info->tree_root, &range); 404 if (ret < 0) 405 return ret; 406 407 if (copy_to_user(arg, &range, sizeof(range))) 408 return -EFAULT; 409 410 return 0; 411 } 412 413 int btrfs_is_empty_uuid(u8 *uuid) 414 { 415 int i; 416 417 for (i = 0; i < BTRFS_UUID_SIZE; i++) { 418 if (uuid[i]) 419 return 0; 420 } 421 return 1; 422 } 423 424 static noinline int create_subvol(struct inode *dir, 425 struct dentry *dentry, 426 char *name, int namelen, 427 u64 *async_transid, 428 struct btrfs_qgroup_inherit *inherit) 429 { 430 struct btrfs_trans_handle *trans; 431 struct btrfs_key key; 432 struct btrfs_root_item root_item; 433 struct btrfs_inode_item *inode_item; 434 struct extent_buffer *leaf; 435 struct btrfs_root *root = BTRFS_I(dir)->root; 436 struct btrfs_root *new_root; 437 struct btrfs_block_rsv block_rsv; 438 struct timespec cur_time = CURRENT_TIME; 439 struct inode *inode; 440 int ret; 441 int err; 442 u64 objectid; 443 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 444 u64 index = 0; 445 u64 qgroup_reserved; 446 uuid_le new_uuid; 447 448 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 449 if (ret) 450 return ret; 451 452 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 453 /* 454 * The same as the snapshot creation, please see the comment 455 * of create_snapshot(). 456 */ 457 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 458 8, &qgroup_reserved, false); 459 if (ret) 460 return ret; 461 462 trans = btrfs_start_transaction(root, 0); 463 if (IS_ERR(trans)) { 464 ret = PTR_ERR(trans); 465 btrfs_subvolume_release_metadata(root, &block_rsv, 466 qgroup_reserved); 467 return ret; 468 } 469 trans->block_rsv = &block_rsv; 470 trans->bytes_reserved = block_rsv.size; 471 472 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit); 473 if (ret) 474 goto fail; 475 476 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 477 0, objectid, NULL, 0, 0, 0); 478 if (IS_ERR(leaf)) { 479 ret = PTR_ERR(leaf); 480 goto fail; 481 } 482 483 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 484 btrfs_set_header_bytenr(leaf, leaf->start); 485 btrfs_set_header_generation(leaf, trans->transid); 486 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 487 btrfs_set_header_owner(leaf, objectid); 488 489 write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(), 490 BTRFS_FSID_SIZE); 491 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 492 btrfs_header_chunk_tree_uuid(leaf), 493 BTRFS_UUID_SIZE); 494 btrfs_mark_buffer_dirty(leaf); 495 496 memset(&root_item, 0, sizeof(root_item)); 497 498 inode_item = &root_item.inode; 499 btrfs_set_stack_inode_generation(inode_item, 1); 500 btrfs_set_stack_inode_size(inode_item, 3); 501 btrfs_set_stack_inode_nlink(inode_item, 1); 502 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); 503 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); 504 505 btrfs_set_root_flags(&root_item, 0); 506 btrfs_set_root_limit(&root_item, 0); 507 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); 508 509 btrfs_set_root_bytenr(&root_item, leaf->start); 510 btrfs_set_root_generation(&root_item, trans->transid); 511 btrfs_set_root_level(&root_item, 0); 512 btrfs_set_root_refs(&root_item, 1); 513 btrfs_set_root_used(&root_item, leaf->len); 514 btrfs_set_root_last_snapshot(&root_item, 0); 515 516 btrfs_set_root_generation_v2(&root_item, 517 btrfs_root_generation(&root_item)); 518 uuid_le_gen(&new_uuid); 519 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 520 btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); 521 btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); 522 root_item.ctime = root_item.otime; 523 btrfs_set_root_ctransid(&root_item, trans->transid); 524 btrfs_set_root_otransid(&root_item, trans->transid); 525 526 btrfs_tree_unlock(leaf); 527 free_extent_buffer(leaf); 528 leaf = NULL; 529 530 btrfs_set_root_dirid(&root_item, new_dirid); 531 532 key.objectid = objectid; 533 key.offset = 0; 534 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 535 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 536 &root_item); 537 if (ret) 538 goto fail; 539 540 key.offset = (u64)-1; 541 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 542 if (IS_ERR(new_root)) { 543 btrfs_abort_transaction(trans, root, PTR_ERR(new_root)); 544 ret = PTR_ERR(new_root); 545 goto fail; 546 } 547 548 btrfs_record_root_in_trans(trans, new_root); 549 550 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); 551 if (ret) { 552 /* We potentially lose an unused inode item here */ 553 btrfs_abort_transaction(trans, root, ret); 554 goto fail; 555 } 556 557 /* 558 * insert the directory item 559 */ 560 ret = btrfs_set_inode_index(dir, &index); 561 if (ret) { 562 btrfs_abort_transaction(trans, root, ret); 563 goto fail; 564 } 565 566 ret = btrfs_insert_dir_item(trans, root, 567 name, namelen, dir, &key, 568 BTRFS_FT_DIR, index); 569 if (ret) { 570 btrfs_abort_transaction(trans, root, ret); 571 goto fail; 572 } 573 574 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 575 ret = btrfs_update_inode(trans, root, dir); 576 BUG_ON(ret); 577 578 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 579 objectid, root->root_key.objectid, 580 btrfs_ino(dir), index, name, namelen); 581 BUG_ON(ret); 582 583 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, 584 root_item.uuid, BTRFS_UUID_KEY_SUBVOL, 585 objectid); 586 if (ret) 587 btrfs_abort_transaction(trans, root, ret); 588 589 fail: 590 trans->block_rsv = NULL; 591 trans->bytes_reserved = 0; 592 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 593 594 if (async_transid) { 595 *async_transid = trans->transid; 596 err = btrfs_commit_transaction_async(trans, root, 1); 597 if (err) 598 err = btrfs_commit_transaction(trans, root); 599 } else { 600 err = btrfs_commit_transaction(trans, root); 601 } 602 if (err && !ret) 603 ret = err; 604 605 if (!ret) { 606 inode = btrfs_lookup_dentry(dir, dentry); 607 if (IS_ERR(inode)) 608 return PTR_ERR(inode); 609 d_instantiate(dentry, inode); 610 } 611 return ret; 612 } 613 614 static void btrfs_wait_nocow_write(struct btrfs_root *root) 615 { 616 s64 writers; 617 DEFINE_WAIT(wait); 618 619 do { 620 prepare_to_wait(&root->subv_writers->wait, &wait, 621 TASK_UNINTERRUPTIBLE); 622 623 writers = percpu_counter_sum(&root->subv_writers->counter); 624 if (writers) 625 schedule(); 626 627 finish_wait(&root->subv_writers->wait, &wait); 628 } while (writers); 629 } 630 631 static int create_snapshot(struct btrfs_root *root, struct inode *dir, 632 struct dentry *dentry, char *name, int namelen, 633 u64 *async_transid, bool readonly, 634 struct btrfs_qgroup_inherit *inherit) 635 { 636 struct inode *inode; 637 struct btrfs_pending_snapshot *pending_snapshot; 638 struct btrfs_trans_handle *trans; 639 int ret; 640 641 if (!root->ref_cows) 642 return -EINVAL; 643 644 atomic_inc(&root->will_be_snapshoted); 645 smp_mb__after_atomic(); 646 btrfs_wait_nocow_write(root); 647 648 ret = btrfs_start_delalloc_inodes(root, 0); 649 if (ret) 650 goto out; 651 652 btrfs_wait_ordered_extents(root, -1); 653 654 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 655 if (!pending_snapshot) { 656 ret = -ENOMEM; 657 goto out; 658 } 659 660 btrfs_init_block_rsv(&pending_snapshot->block_rsv, 661 BTRFS_BLOCK_RSV_TEMP); 662 /* 663 * 1 - parent dir inode 664 * 2 - dir entries 665 * 1 - root item 666 * 2 - root ref/backref 667 * 1 - root of snapshot 668 * 1 - UUID item 669 */ 670 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 671 &pending_snapshot->block_rsv, 8, 672 &pending_snapshot->qgroup_reserved, 673 false); 674 if (ret) 675 goto free; 676 677 pending_snapshot->dentry = dentry; 678 pending_snapshot->root = root; 679 pending_snapshot->readonly = readonly; 680 pending_snapshot->dir = dir; 681 pending_snapshot->inherit = inherit; 682 683 trans = btrfs_start_transaction(root, 0); 684 if (IS_ERR(trans)) { 685 ret = PTR_ERR(trans); 686 goto fail; 687 } 688 689 spin_lock(&root->fs_info->trans_lock); 690 list_add(&pending_snapshot->list, 691 &trans->transaction->pending_snapshots); 692 spin_unlock(&root->fs_info->trans_lock); 693 if (async_transid) { 694 *async_transid = trans->transid; 695 ret = btrfs_commit_transaction_async(trans, 696 root->fs_info->extent_root, 1); 697 if (ret) 698 ret = btrfs_commit_transaction(trans, root); 699 } else { 700 ret = btrfs_commit_transaction(trans, 701 root->fs_info->extent_root); 702 } 703 if (ret) 704 goto fail; 705 706 ret = pending_snapshot->error; 707 if (ret) 708 goto fail; 709 710 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 711 if (ret) 712 goto fail; 713 714 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 715 if (IS_ERR(inode)) { 716 ret = PTR_ERR(inode); 717 goto fail; 718 } 719 720 d_instantiate(dentry, inode); 721 ret = 0; 722 fail: 723 btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, 724 &pending_snapshot->block_rsv, 725 pending_snapshot->qgroup_reserved); 726 free: 727 kfree(pending_snapshot); 728 out: 729 atomic_dec(&root->will_be_snapshoted); 730 return ret; 731 } 732 733 /* copy of check_sticky in fs/namei.c() 734 * It's inline, so penalty for filesystems that don't use sticky bit is 735 * minimal. 736 */ 737 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 738 { 739 kuid_t fsuid = current_fsuid(); 740 741 if (!(dir->i_mode & S_ISVTX)) 742 return 0; 743 if (uid_eq(inode->i_uid, fsuid)) 744 return 0; 745 if (uid_eq(dir->i_uid, fsuid)) 746 return 0; 747 return !capable(CAP_FOWNER); 748 } 749 750 /* copy of may_delete in fs/namei.c() 751 * Check whether we can remove a link victim from directory dir, check 752 * whether the type of victim is right. 753 * 1. We can't do it if dir is read-only (done in permission()) 754 * 2. We should have write and exec permissions on dir 755 * 3. We can't remove anything from append-only dir 756 * 4. We can't do anything with immutable dir (done in permission()) 757 * 5. If the sticky bit on dir is set we should either 758 * a. be owner of dir, or 759 * b. be owner of victim, or 760 * c. have CAP_FOWNER capability 761 * 6. If the victim is append-only or immutable we can't do antyhing with 762 * links pointing to it. 763 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 764 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 765 * 9. We can't remove a root or mountpoint. 766 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 767 * nfs_async_unlink(). 768 */ 769 770 static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) 771 { 772 int error; 773 774 if (!victim->d_inode) 775 return -ENOENT; 776 777 BUG_ON(victim->d_parent->d_inode != dir); 778 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 779 780 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 781 if (error) 782 return error; 783 if (IS_APPEND(dir)) 784 return -EPERM; 785 if (btrfs_check_sticky(dir, victim->d_inode)|| 786 IS_APPEND(victim->d_inode)|| 787 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 788 return -EPERM; 789 if (isdir) { 790 if (!S_ISDIR(victim->d_inode->i_mode)) 791 return -ENOTDIR; 792 if (IS_ROOT(victim)) 793 return -EBUSY; 794 } else if (S_ISDIR(victim->d_inode->i_mode)) 795 return -EISDIR; 796 if (IS_DEADDIR(dir)) 797 return -ENOENT; 798 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 799 return -EBUSY; 800 return 0; 801 } 802 803 /* copy of may_create in fs/namei.c() */ 804 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 805 { 806 if (child->d_inode) 807 return -EEXIST; 808 if (IS_DEADDIR(dir)) 809 return -ENOENT; 810 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 811 } 812 813 /* 814 * Create a new subvolume below @parent. This is largely modeled after 815 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 816 * inside this filesystem so it's quite a bit simpler. 817 */ 818 static noinline int btrfs_mksubvol(struct path *parent, 819 char *name, int namelen, 820 struct btrfs_root *snap_src, 821 u64 *async_transid, bool readonly, 822 struct btrfs_qgroup_inherit *inherit) 823 { 824 struct inode *dir = parent->dentry->d_inode; 825 struct dentry *dentry; 826 int error; 827 828 error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 829 if (error == -EINTR) 830 return error; 831 832 dentry = lookup_one_len(name, parent->dentry, namelen); 833 error = PTR_ERR(dentry); 834 if (IS_ERR(dentry)) 835 goto out_unlock; 836 837 error = -EEXIST; 838 if (dentry->d_inode) 839 goto out_dput; 840 841 error = btrfs_may_create(dir, dentry); 842 if (error) 843 goto out_dput; 844 845 /* 846 * even if this name doesn't exist, we may get hash collisions. 847 * check for them now when we can safely fail 848 */ 849 error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, 850 dir->i_ino, name, 851 namelen); 852 if (error) 853 goto out_dput; 854 855 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 856 857 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 858 goto out_up_read; 859 860 if (snap_src) { 861 error = create_snapshot(snap_src, dir, dentry, name, namelen, 862 async_transid, readonly, inherit); 863 } else { 864 error = create_subvol(dir, dentry, name, namelen, 865 async_transid, inherit); 866 } 867 if (!error) 868 fsnotify_mkdir(dir, dentry); 869 out_up_read: 870 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 871 out_dput: 872 dput(dentry); 873 out_unlock: 874 mutex_unlock(&dir->i_mutex); 875 return error; 876 } 877 878 /* 879 * When we're defragging a range, we don't want to kick it off again 880 * if it is really just waiting for delalloc to send it down. 881 * If we find a nice big extent or delalloc range for the bytes in the 882 * file you want to defrag, we return 0 to let you know to skip this 883 * part of the file 884 */ 885 static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) 886 { 887 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 888 struct extent_map *em = NULL; 889 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 890 u64 end; 891 892 read_lock(&em_tree->lock); 893 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); 894 read_unlock(&em_tree->lock); 895 896 if (em) { 897 end = extent_map_end(em); 898 free_extent_map(em); 899 if (end - offset > thresh) 900 return 0; 901 } 902 /* if we already have a nice delalloc here, just stop */ 903 thresh /= 2; 904 end = count_range_bits(io_tree, &offset, offset + thresh, 905 thresh, EXTENT_DELALLOC, 1); 906 if (end >= thresh) 907 return 0; 908 return 1; 909 } 910 911 /* 912 * helper function to walk through a file and find extents 913 * newer than a specific transid, and smaller than thresh. 914 * 915 * This is used by the defragging code to find new and small 916 * extents 917 */ 918 static int find_new_extents(struct btrfs_root *root, 919 struct inode *inode, u64 newer_than, 920 u64 *off, int thresh) 921 { 922 struct btrfs_path *path; 923 struct btrfs_key min_key; 924 struct extent_buffer *leaf; 925 struct btrfs_file_extent_item *extent; 926 int type; 927 int ret; 928 u64 ino = btrfs_ino(inode); 929 930 path = btrfs_alloc_path(); 931 if (!path) 932 return -ENOMEM; 933 934 min_key.objectid = ino; 935 min_key.type = BTRFS_EXTENT_DATA_KEY; 936 min_key.offset = *off; 937 938 while (1) { 939 path->keep_locks = 1; 940 ret = btrfs_search_forward(root, &min_key, path, newer_than); 941 if (ret != 0) 942 goto none; 943 path->keep_locks = 0; 944 btrfs_unlock_up_safe(path, 1); 945 process_slot: 946 if (min_key.objectid != ino) 947 goto none; 948 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 949 goto none; 950 951 leaf = path->nodes[0]; 952 extent = btrfs_item_ptr(leaf, path->slots[0], 953 struct btrfs_file_extent_item); 954 955 type = btrfs_file_extent_type(leaf, extent); 956 if (type == BTRFS_FILE_EXTENT_REG && 957 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 958 check_defrag_in_cache(inode, min_key.offset, thresh)) { 959 *off = min_key.offset; 960 btrfs_free_path(path); 961 return 0; 962 } 963 964 path->slots[0]++; 965 if (path->slots[0] < btrfs_header_nritems(leaf)) { 966 btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); 967 goto process_slot; 968 } 969 970 if (min_key.offset == (u64)-1) 971 goto none; 972 973 min_key.offset++; 974 btrfs_release_path(path); 975 } 976 none: 977 btrfs_free_path(path); 978 return -ENOENT; 979 } 980 981 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 982 { 983 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 984 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 985 struct extent_map *em; 986 u64 len = PAGE_CACHE_SIZE; 987 988 /* 989 * hopefully we have this extent in the tree already, try without 990 * the full extent lock 991 */ 992 read_lock(&em_tree->lock); 993 em = lookup_extent_mapping(em_tree, start, len); 994 read_unlock(&em_tree->lock); 995 996 if (!em) { 997 struct extent_state *cached = NULL; 998 u64 end = start + len - 1; 999 1000 /* get the big lock and read metadata off disk */ 1001 lock_extent_bits(io_tree, start, end, 0, &cached); 1002 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 1003 unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); 1004 1005 if (IS_ERR(em)) 1006 return NULL; 1007 } 1008 1009 return em; 1010 } 1011 1012 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 1013 { 1014 struct extent_map *next; 1015 bool ret = true; 1016 1017 /* this is the last extent */ 1018 if (em->start + em->len >= i_size_read(inode)) 1019 return false; 1020 1021 next = defrag_lookup_extent(inode, em->start + em->len); 1022 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || 1023 (em->block_start + em->block_len == next->block_start)) 1024 ret = false; 1025 1026 free_extent_map(next); 1027 return ret; 1028 } 1029 1030 static int should_defrag_range(struct inode *inode, u64 start, int thresh, 1031 u64 *last_len, u64 *skip, u64 *defrag_end, 1032 int compress) 1033 { 1034 struct extent_map *em; 1035 int ret = 1; 1036 bool next_mergeable = true; 1037 1038 /* 1039 * make sure that once we start defragging an extent, we keep on 1040 * defragging it 1041 */ 1042 if (start < *defrag_end) 1043 return 1; 1044 1045 *skip = 0; 1046 1047 em = defrag_lookup_extent(inode, start); 1048 if (!em) 1049 return 0; 1050 1051 /* this will cover holes, and inline extents */ 1052 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 1053 ret = 0; 1054 goto out; 1055 } 1056 1057 next_mergeable = defrag_check_next_extent(inode, em); 1058 1059 /* 1060 * we hit a real extent, if it is big or the next extent is not a 1061 * real extent, don't bother defragging it 1062 */ 1063 if (!compress && (*last_len == 0 || *last_len >= thresh) && 1064 (em->len >= thresh || !next_mergeable)) 1065 ret = 0; 1066 out: 1067 /* 1068 * last_len ends up being a counter of how many bytes we've defragged. 1069 * every time we choose not to defrag an extent, we reset *last_len 1070 * so that the next tiny extent will force a defrag. 1071 * 1072 * The end result of this is that tiny extents before a single big 1073 * extent will force at least part of that big extent to be defragged. 1074 */ 1075 if (ret) { 1076 *defrag_end = extent_map_end(em); 1077 } else { 1078 *last_len = 0; 1079 *skip = extent_map_end(em); 1080 *defrag_end = 0; 1081 } 1082 1083 free_extent_map(em); 1084 return ret; 1085 } 1086 1087 /* 1088 * it doesn't do much good to defrag one or two pages 1089 * at a time. This pulls in a nice chunk of pages 1090 * to COW and defrag. 1091 * 1092 * It also makes sure the delalloc code has enough 1093 * dirty data to avoid making new small extents as part 1094 * of the defrag 1095 * 1096 * It's a good idea to start RA on this range 1097 * before calling this. 1098 */ 1099 static int cluster_pages_for_defrag(struct inode *inode, 1100 struct page **pages, 1101 unsigned long start_index, 1102 unsigned long num_pages) 1103 { 1104 unsigned long file_end; 1105 u64 isize = i_size_read(inode); 1106 u64 page_start; 1107 u64 page_end; 1108 u64 page_cnt; 1109 int ret; 1110 int i; 1111 int i_done; 1112 struct btrfs_ordered_extent *ordered; 1113 struct extent_state *cached_state = NULL; 1114 struct extent_io_tree *tree; 1115 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1116 1117 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 1118 if (!isize || start_index > file_end) 1119 return 0; 1120 1121 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 1122 1123 ret = btrfs_delalloc_reserve_space(inode, 1124 page_cnt << PAGE_CACHE_SHIFT); 1125 if (ret) 1126 return ret; 1127 i_done = 0; 1128 tree = &BTRFS_I(inode)->io_tree; 1129 1130 /* step one, lock all the pages */ 1131 for (i = 0; i < page_cnt; i++) { 1132 struct page *page; 1133 again: 1134 page = find_or_create_page(inode->i_mapping, 1135 start_index + i, mask); 1136 if (!page) 1137 break; 1138 1139 page_start = page_offset(page); 1140 page_end = page_start + PAGE_CACHE_SIZE - 1; 1141 while (1) { 1142 lock_extent_bits(tree, page_start, page_end, 1143 0, &cached_state); 1144 ordered = btrfs_lookup_ordered_extent(inode, 1145 page_start); 1146 unlock_extent_cached(tree, page_start, page_end, 1147 &cached_state, GFP_NOFS); 1148 if (!ordered) 1149 break; 1150 1151 unlock_page(page); 1152 btrfs_start_ordered_extent(inode, ordered, 1); 1153 btrfs_put_ordered_extent(ordered); 1154 lock_page(page); 1155 /* 1156 * we unlocked the page above, so we need check if 1157 * it was released or not. 1158 */ 1159 if (page->mapping != inode->i_mapping) { 1160 unlock_page(page); 1161 page_cache_release(page); 1162 goto again; 1163 } 1164 } 1165 1166 if (!PageUptodate(page)) { 1167 btrfs_readpage(NULL, page); 1168 lock_page(page); 1169 if (!PageUptodate(page)) { 1170 unlock_page(page); 1171 page_cache_release(page); 1172 ret = -EIO; 1173 break; 1174 } 1175 } 1176 1177 if (page->mapping != inode->i_mapping) { 1178 unlock_page(page); 1179 page_cache_release(page); 1180 goto again; 1181 } 1182 1183 pages[i] = page; 1184 i_done++; 1185 } 1186 if (!i_done || ret) 1187 goto out; 1188 1189 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1190 goto out; 1191 1192 /* 1193 * so now we have a nice long stream of locked 1194 * and up to date pages, lets wait on them 1195 */ 1196 for (i = 0; i < i_done; i++) 1197 wait_on_page_writeback(pages[i]); 1198 1199 page_start = page_offset(pages[0]); 1200 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; 1201 1202 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1203 page_start, page_end - 1, 0, &cached_state); 1204 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1205 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1206 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, 1207 &cached_state, GFP_NOFS); 1208 1209 if (i_done != page_cnt) { 1210 spin_lock(&BTRFS_I(inode)->lock); 1211 BTRFS_I(inode)->outstanding_extents++; 1212 spin_unlock(&BTRFS_I(inode)->lock); 1213 btrfs_delalloc_release_space(inode, 1214 (page_cnt - i_done) << PAGE_CACHE_SHIFT); 1215 } 1216 1217 1218 set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 1219 &cached_state, GFP_NOFS); 1220 1221 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1222 page_start, page_end - 1, &cached_state, 1223 GFP_NOFS); 1224 1225 for (i = 0; i < i_done; i++) { 1226 clear_page_dirty_for_io(pages[i]); 1227 ClearPageChecked(pages[i]); 1228 set_page_extent_mapped(pages[i]); 1229 set_page_dirty(pages[i]); 1230 unlock_page(pages[i]); 1231 page_cache_release(pages[i]); 1232 } 1233 return i_done; 1234 out: 1235 for (i = 0; i < i_done; i++) { 1236 unlock_page(pages[i]); 1237 page_cache_release(pages[i]); 1238 } 1239 btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT); 1240 return ret; 1241 1242 } 1243 1244 int btrfs_defrag_file(struct inode *inode, struct file *file, 1245 struct btrfs_ioctl_defrag_range_args *range, 1246 u64 newer_than, unsigned long max_to_defrag) 1247 { 1248 struct btrfs_root *root = BTRFS_I(inode)->root; 1249 struct file_ra_state *ra = NULL; 1250 unsigned long last_index; 1251 u64 isize = i_size_read(inode); 1252 u64 last_len = 0; 1253 u64 skip = 0; 1254 u64 defrag_end = 0; 1255 u64 newer_off = range->start; 1256 unsigned long i; 1257 unsigned long ra_index = 0; 1258 int ret; 1259 int defrag_count = 0; 1260 int compress_type = BTRFS_COMPRESS_ZLIB; 1261 int extent_thresh = range->extent_thresh; 1262 unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; 1263 unsigned long cluster = max_cluster; 1264 u64 new_align = ~((u64)128 * 1024 - 1); 1265 struct page **pages = NULL; 1266 1267 if (isize == 0) 1268 return 0; 1269 1270 if (range->start >= isize) 1271 return -EINVAL; 1272 1273 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1274 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1275 return -EINVAL; 1276 if (range->compress_type) 1277 compress_type = range->compress_type; 1278 } 1279 1280 if (extent_thresh == 0) 1281 extent_thresh = 256 * 1024; 1282 1283 /* 1284 * if we were not given a file, allocate a readahead 1285 * context 1286 */ 1287 if (!file) { 1288 ra = kzalloc(sizeof(*ra), GFP_NOFS); 1289 if (!ra) 1290 return -ENOMEM; 1291 file_ra_state_init(ra, inode->i_mapping); 1292 } else { 1293 ra = &file->f_ra; 1294 } 1295 1296 pages = kmalloc_array(max_cluster, sizeof(struct page *), 1297 GFP_NOFS); 1298 if (!pages) { 1299 ret = -ENOMEM; 1300 goto out_ra; 1301 } 1302 1303 /* find the last page to defrag */ 1304 if (range->start + range->len > range->start) { 1305 last_index = min_t(u64, isize - 1, 1306 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1307 } else { 1308 last_index = (isize - 1) >> PAGE_CACHE_SHIFT; 1309 } 1310 1311 if (newer_than) { 1312 ret = find_new_extents(root, inode, newer_than, 1313 &newer_off, 64 * 1024); 1314 if (!ret) { 1315 range->start = newer_off; 1316 /* 1317 * we always align our defrag to help keep 1318 * the extents in the file evenly spaced 1319 */ 1320 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1321 } else 1322 goto out_ra; 1323 } else { 1324 i = range->start >> PAGE_CACHE_SHIFT; 1325 } 1326 if (!max_to_defrag) 1327 max_to_defrag = last_index + 1; 1328 1329 /* 1330 * make writeback starts from i, so the defrag range can be 1331 * written sequentially. 1332 */ 1333 if (i < inode->i_mapping->writeback_index) 1334 inode->i_mapping->writeback_index = i; 1335 1336 while (i <= last_index && defrag_count < max_to_defrag && 1337 (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 1338 PAGE_CACHE_SHIFT)) { 1339 /* 1340 * make sure we stop running if someone unmounts 1341 * the FS 1342 */ 1343 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1344 break; 1345 1346 if (btrfs_defrag_cancelled(root->fs_info)) { 1347 printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n"); 1348 ret = -EAGAIN; 1349 break; 1350 } 1351 1352 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1353 extent_thresh, &last_len, &skip, 1354 &defrag_end, range->flags & 1355 BTRFS_DEFRAG_RANGE_COMPRESS)) { 1356 unsigned long next; 1357 /* 1358 * the should_defrag function tells us how much to skip 1359 * bump our counter by the suggested amount 1360 */ 1361 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1362 i = max(i + 1, next); 1363 continue; 1364 } 1365 1366 if (!newer_than) { 1367 cluster = (PAGE_CACHE_ALIGN(defrag_end) >> 1368 PAGE_CACHE_SHIFT) - i; 1369 cluster = min(cluster, max_cluster); 1370 } else { 1371 cluster = max_cluster; 1372 } 1373 1374 if (i + cluster > ra_index) { 1375 ra_index = max(i, ra_index); 1376 btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1377 cluster); 1378 ra_index += max_cluster; 1379 } 1380 1381 mutex_lock(&inode->i_mutex); 1382 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1383 BTRFS_I(inode)->force_compress = compress_type; 1384 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1385 if (ret < 0) { 1386 mutex_unlock(&inode->i_mutex); 1387 goto out_ra; 1388 } 1389 1390 defrag_count += ret; 1391 balance_dirty_pages_ratelimited(inode->i_mapping); 1392 mutex_unlock(&inode->i_mutex); 1393 1394 if (newer_than) { 1395 if (newer_off == (u64)-1) 1396 break; 1397 1398 if (ret > 0) 1399 i += ret; 1400 1401 newer_off = max(newer_off + 1, 1402 (u64)i << PAGE_CACHE_SHIFT); 1403 1404 ret = find_new_extents(root, inode, 1405 newer_than, &newer_off, 1406 64 * 1024); 1407 if (!ret) { 1408 range->start = newer_off; 1409 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1410 } else { 1411 break; 1412 } 1413 } else { 1414 if (ret > 0) { 1415 i += ret; 1416 last_len += ret << PAGE_CACHE_SHIFT; 1417 } else { 1418 i++; 1419 last_len = 0; 1420 } 1421 } 1422 } 1423 1424 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { 1425 filemap_flush(inode->i_mapping); 1426 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 1427 &BTRFS_I(inode)->runtime_flags)) 1428 filemap_flush(inode->i_mapping); 1429 } 1430 1431 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1432 /* the filemap_flush will queue IO into the worker threads, but 1433 * we have to make sure the IO is actually started and that 1434 * ordered extents get created before we return 1435 */ 1436 atomic_inc(&root->fs_info->async_submit_draining); 1437 while (atomic_read(&root->fs_info->nr_async_submits) || 1438 atomic_read(&root->fs_info->async_delalloc_pages)) { 1439 wait_event(root->fs_info->async_submit_wait, 1440 (atomic_read(&root->fs_info->nr_async_submits) == 0 && 1441 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 1442 } 1443 atomic_dec(&root->fs_info->async_submit_draining); 1444 } 1445 1446 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1447 btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); 1448 } 1449 1450 ret = defrag_count; 1451 1452 out_ra: 1453 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1454 mutex_lock(&inode->i_mutex); 1455 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 1456 mutex_unlock(&inode->i_mutex); 1457 } 1458 if (!file) 1459 kfree(ra); 1460 kfree(pages); 1461 return ret; 1462 } 1463 1464 static noinline int btrfs_ioctl_resize(struct file *file, 1465 void __user *arg) 1466 { 1467 u64 new_size; 1468 u64 old_size; 1469 u64 devid = 1; 1470 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 1471 struct btrfs_ioctl_vol_args *vol_args; 1472 struct btrfs_trans_handle *trans; 1473 struct btrfs_device *device = NULL; 1474 char *sizestr; 1475 char *retptr; 1476 char *devstr = NULL; 1477 int ret = 0; 1478 int mod = 0; 1479 1480 if (!capable(CAP_SYS_ADMIN)) 1481 return -EPERM; 1482 1483 ret = mnt_want_write_file(file); 1484 if (ret) 1485 return ret; 1486 1487 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1488 1)) { 1489 mnt_drop_write_file(file); 1490 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 1491 } 1492 1493 mutex_lock(&root->fs_info->volume_mutex); 1494 vol_args = memdup_user(arg, sizeof(*vol_args)); 1495 if (IS_ERR(vol_args)) { 1496 ret = PTR_ERR(vol_args); 1497 goto out; 1498 } 1499 1500 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1501 1502 sizestr = vol_args->name; 1503 devstr = strchr(sizestr, ':'); 1504 if (devstr) { 1505 char *end; 1506 sizestr = devstr + 1; 1507 *devstr = '\0'; 1508 devstr = vol_args->name; 1509 devid = simple_strtoull(devstr, &end, 10); 1510 if (!devid) { 1511 ret = -EINVAL; 1512 goto out_free; 1513 } 1514 btrfs_info(root->fs_info, "resizing devid %llu", devid); 1515 } 1516 1517 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1518 if (!device) { 1519 btrfs_info(root->fs_info, "resizer unable to find device %llu", 1520 devid); 1521 ret = -ENODEV; 1522 goto out_free; 1523 } 1524 1525 if (!device->writeable) { 1526 btrfs_info(root->fs_info, 1527 "resizer unable to apply on readonly device %llu", 1528 devid); 1529 ret = -EPERM; 1530 goto out_free; 1531 } 1532 1533 if (!strcmp(sizestr, "max")) 1534 new_size = device->bdev->bd_inode->i_size; 1535 else { 1536 if (sizestr[0] == '-') { 1537 mod = -1; 1538 sizestr++; 1539 } else if (sizestr[0] == '+') { 1540 mod = 1; 1541 sizestr++; 1542 } 1543 new_size = memparse(sizestr, &retptr); 1544 if (*retptr != '\0' || new_size == 0) { 1545 ret = -EINVAL; 1546 goto out_free; 1547 } 1548 } 1549 1550 if (device->is_tgtdev_for_dev_replace) { 1551 ret = -EPERM; 1552 goto out_free; 1553 } 1554 1555 old_size = device->total_bytes; 1556 1557 if (mod < 0) { 1558 if (new_size > old_size) { 1559 ret = -EINVAL; 1560 goto out_free; 1561 } 1562 new_size = old_size - new_size; 1563 } else if (mod > 0) { 1564 if (new_size > ULLONG_MAX - old_size) { 1565 ret = -EINVAL; 1566 goto out_free; 1567 } 1568 new_size = old_size + new_size; 1569 } 1570 1571 if (new_size < 256 * 1024 * 1024) { 1572 ret = -EINVAL; 1573 goto out_free; 1574 } 1575 if (new_size > device->bdev->bd_inode->i_size) { 1576 ret = -EFBIG; 1577 goto out_free; 1578 } 1579 1580 do_div(new_size, root->sectorsize); 1581 new_size *= root->sectorsize; 1582 1583 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", 1584 rcu_str_deref(device->name), new_size); 1585 1586 if (new_size > old_size) { 1587 trans = btrfs_start_transaction(root, 0); 1588 if (IS_ERR(trans)) { 1589 ret = PTR_ERR(trans); 1590 goto out_free; 1591 } 1592 ret = btrfs_grow_device(trans, device, new_size); 1593 btrfs_commit_transaction(trans, root); 1594 } else if (new_size < old_size) { 1595 ret = btrfs_shrink_device(device, new_size); 1596 } /* equal, nothing need to do */ 1597 1598 out_free: 1599 kfree(vol_args); 1600 out: 1601 mutex_unlock(&root->fs_info->volume_mutex); 1602 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1603 mnt_drop_write_file(file); 1604 return ret; 1605 } 1606 1607 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1608 char *name, unsigned long fd, int subvol, 1609 u64 *transid, bool readonly, 1610 struct btrfs_qgroup_inherit *inherit) 1611 { 1612 int namelen; 1613 int ret = 0; 1614 1615 ret = mnt_want_write_file(file); 1616 if (ret) 1617 goto out; 1618 1619 namelen = strlen(name); 1620 if (strchr(name, '/')) { 1621 ret = -EINVAL; 1622 goto out_drop_write; 1623 } 1624 1625 if (name[0] == '.' && 1626 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1627 ret = -EEXIST; 1628 goto out_drop_write; 1629 } 1630 1631 if (subvol) { 1632 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1633 NULL, transid, readonly, inherit); 1634 } else { 1635 struct fd src = fdget(fd); 1636 struct inode *src_inode; 1637 if (!src.file) { 1638 ret = -EINVAL; 1639 goto out_drop_write; 1640 } 1641 1642 src_inode = file_inode(src.file); 1643 if (src_inode->i_sb != file_inode(file)->i_sb) { 1644 btrfs_info(BTRFS_I(src_inode)->root->fs_info, 1645 "Snapshot src from another FS"); 1646 ret = -EXDEV; 1647 } else if (!inode_owner_or_capable(src_inode)) { 1648 /* 1649 * Subvolume creation is not restricted, but snapshots 1650 * are limited to own subvolumes only 1651 */ 1652 ret = -EPERM; 1653 } else { 1654 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1655 BTRFS_I(src_inode)->root, 1656 transid, readonly, inherit); 1657 } 1658 fdput(src); 1659 } 1660 out_drop_write: 1661 mnt_drop_write_file(file); 1662 out: 1663 return ret; 1664 } 1665 1666 static noinline int btrfs_ioctl_snap_create(struct file *file, 1667 void __user *arg, int subvol) 1668 { 1669 struct btrfs_ioctl_vol_args *vol_args; 1670 int ret; 1671 1672 vol_args = memdup_user(arg, sizeof(*vol_args)); 1673 if (IS_ERR(vol_args)) 1674 return PTR_ERR(vol_args); 1675 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1676 1677 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1678 vol_args->fd, subvol, 1679 NULL, false, NULL); 1680 1681 kfree(vol_args); 1682 return ret; 1683 } 1684 1685 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1686 void __user *arg, int subvol) 1687 { 1688 struct btrfs_ioctl_vol_args_v2 *vol_args; 1689 int ret; 1690 u64 transid = 0; 1691 u64 *ptr = NULL; 1692 bool readonly = false; 1693 struct btrfs_qgroup_inherit *inherit = NULL; 1694 1695 vol_args = memdup_user(arg, sizeof(*vol_args)); 1696 if (IS_ERR(vol_args)) 1697 return PTR_ERR(vol_args); 1698 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1699 1700 if (vol_args->flags & 1701 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1702 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1703 ret = -EOPNOTSUPP; 1704 goto out; 1705 } 1706 1707 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1708 ptr = &transid; 1709 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1710 readonly = true; 1711 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1712 if (vol_args->size > PAGE_CACHE_SIZE) { 1713 ret = -EINVAL; 1714 goto out; 1715 } 1716 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1717 if (IS_ERR(inherit)) { 1718 ret = PTR_ERR(inherit); 1719 goto out; 1720 } 1721 } 1722 1723 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1724 vol_args->fd, subvol, ptr, 1725 readonly, inherit); 1726 1727 if (ret == 0 && ptr && 1728 copy_to_user(arg + 1729 offsetof(struct btrfs_ioctl_vol_args_v2, 1730 transid), ptr, sizeof(*ptr))) 1731 ret = -EFAULT; 1732 out: 1733 kfree(vol_args); 1734 kfree(inherit); 1735 return ret; 1736 } 1737 1738 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1739 void __user *arg) 1740 { 1741 struct inode *inode = file_inode(file); 1742 struct btrfs_root *root = BTRFS_I(inode)->root; 1743 int ret = 0; 1744 u64 flags = 0; 1745 1746 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) 1747 return -EINVAL; 1748 1749 down_read(&root->fs_info->subvol_sem); 1750 if (btrfs_root_readonly(root)) 1751 flags |= BTRFS_SUBVOL_RDONLY; 1752 up_read(&root->fs_info->subvol_sem); 1753 1754 if (copy_to_user(arg, &flags, sizeof(flags))) 1755 ret = -EFAULT; 1756 1757 return ret; 1758 } 1759 1760 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1761 void __user *arg) 1762 { 1763 struct inode *inode = file_inode(file); 1764 struct btrfs_root *root = BTRFS_I(inode)->root; 1765 struct btrfs_trans_handle *trans; 1766 u64 root_flags; 1767 u64 flags; 1768 int ret = 0; 1769 1770 if (!inode_owner_or_capable(inode)) 1771 return -EPERM; 1772 1773 ret = mnt_want_write_file(file); 1774 if (ret) 1775 goto out; 1776 1777 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 1778 ret = -EINVAL; 1779 goto out_drop_write; 1780 } 1781 1782 if (copy_from_user(&flags, arg, sizeof(flags))) { 1783 ret = -EFAULT; 1784 goto out_drop_write; 1785 } 1786 1787 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { 1788 ret = -EINVAL; 1789 goto out_drop_write; 1790 } 1791 1792 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1793 ret = -EOPNOTSUPP; 1794 goto out_drop_write; 1795 } 1796 1797 down_write(&root->fs_info->subvol_sem); 1798 1799 /* nothing to do */ 1800 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1801 goto out_drop_sem; 1802 1803 root_flags = btrfs_root_flags(&root->root_item); 1804 if (flags & BTRFS_SUBVOL_RDONLY) { 1805 btrfs_set_root_flags(&root->root_item, 1806 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1807 } else { 1808 /* 1809 * Block RO -> RW transition if this subvolume is involved in 1810 * send 1811 */ 1812 spin_lock(&root->root_item_lock); 1813 if (root->send_in_progress == 0) { 1814 btrfs_set_root_flags(&root->root_item, 1815 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1816 spin_unlock(&root->root_item_lock); 1817 } else { 1818 spin_unlock(&root->root_item_lock); 1819 btrfs_warn(root->fs_info, 1820 "Attempt to set subvolume %llu read-write during send", 1821 root->root_key.objectid); 1822 ret = -EPERM; 1823 goto out_drop_sem; 1824 } 1825 } 1826 1827 trans = btrfs_start_transaction(root, 1); 1828 if (IS_ERR(trans)) { 1829 ret = PTR_ERR(trans); 1830 goto out_reset; 1831 } 1832 1833 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1834 &root->root_key, &root->root_item); 1835 1836 btrfs_commit_transaction(trans, root); 1837 out_reset: 1838 if (ret) 1839 btrfs_set_root_flags(&root->root_item, root_flags); 1840 out_drop_sem: 1841 up_write(&root->fs_info->subvol_sem); 1842 out_drop_write: 1843 mnt_drop_write_file(file); 1844 out: 1845 return ret; 1846 } 1847 1848 /* 1849 * helper to check if the subvolume references other subvolumes 1850 */ 1851 static noinline int may_destroy_subvol(struct btrfs_root *root) 1852 { 1853 struct btrfs_path *path; 1854 struct btrfs_dir_item *di; 1855 struct btrfs_key key; 1856 u64 dir_id; 1857 int ret; 1858 1859 path = btrfs_alloc_path(); 1860 if (!path) 1861 return -ENOMEM; 1862 1863 /* Make sure this root isn't set as the default subvol */ 1864 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 1865 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, 1866 dir_id, "default", 7, 0); 1867 if (di && !IS_ERR(di)) { 1868 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1869 if (key.objectid == root->root_key.objectid) { 1870 ret = -EPERM; 1871 btrfs_err(root->fs_info, "deleting default subvolume " 1872 "%llu is not allowed", key.objectid); 1873 goto out; 1874 } 1875 btrfs_release_path(path); 1876 } 1877 1878 key.objectid = root->root_key.objectid; 1879 key.type = BTRFS_ROOT_REF_KEY; 1880 key.offset = (u64)-1; 1881 1882 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, 1883 &key, path, 0, 0); 1884 if (ret < 0) 1885 goto out; 1886 BUG_ON(ret == 0); 1887 1888 ret = 0; 1889 if (path->slots[0] > 0) { 1890 path->slots[0]--; 1891 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1892 if (key.objectid == root->root_key.objectid && 1893 key.type == BTRFS_ROOT_REF_KEY) 1894 ret = -ENOTEMPTY; 1895 } 1896 out: 1897 btrfs_free_path(path); 1898 return ret; 1899 } 1900 1901 static noinline int key_in_sk(struct btrfs_key *key, 1902 struct btrfs_ioctl_search_key *sk) 1903 { 1904 struct btrfs_key test; 1905 int ret; 1906 1907 test.objectid = sk->min_objectid; 1908 test.type = sk->min_type; 1909 test.offset = sk->min_offset; 1910 1911 ret = btrfs_comp_cpu_keys(key, &test); 1912 if (ret < 0) 1913 return 0; 1914 1915 test.objectid = sk->max_objectid; 1916 test.type = sk->max_type; 1917 test.offset = sk->max_offset; 1918 1919 ret = btrfs_comp_cpu_keys(key, &test); 1920 if (ret > 0) 1921 return 0; 1922 return 1; 1923 } 1924 1925 static noinline int copy_to_sk(struct btrfs_root *root, 1926 struct btrfs_path *path, 1927 struct btrfs_key *key, 1928 struct btrfs_ioctl_search_key *sk, 1929 char *buf, 1930 unsigned long *sk_offset, 1931 int *num_found) 1932 { 1933 u64 found_transid; 1934 struct extent_buffer *leaf; 1935 struct btrfs_ioctl_search_header sh; 1936 unsigned long item_off; 1937 unsigned long item_len; 1938 int nritems; 1939 int i; 1940 int slot; 1941 int ret = 0; 1942 1943 leaf = path->nodes[0]; 1944 slot = path->slots[0]; 1945 nritems = btrfs_header_nritems(leaf); 1946 1947 if (btrfs_header_generation(leaf) > sk->max_transid) { 1948 i = nritems; 1949 goto advance_key; 1950 } 1951 found_transid = btrfs_header_generation(leaf); 1952 1953 for (i = slot; i < nritems; i++) { 1954 item_off = btrfs_item_ptr_offset(leaf, i); 1955 item_len = btrfs_item_size_nr(leaf, i); 1956 1957 btrfs_item_key_to_cpu(leaf, key, i); 1958 if (!key_in_sk(key, sk)) 1959 continue; 1960 1961 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1962 item_len = 0; 1963 1964 if (sizeof(sh) + item_len + *sk_offset > 1965 BTRFS_SEARCH_ARGS_BUFSIZE) { 1966 ret = 1; 1967 goto overflow; 1968 } 1969 1970 sh.objectid = key->objectid; 1971 sh.offset = key->offset; 1972 sh.type = key->type; 1973 sh.len = item_len; 1974 sh.transid = found_transid; 1975 1976 /* copy search result header */ 1977 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 1978 *sk_offset += sizeof(sh); 1979 1980 if (item_len) { 1981 char *p = buf + *sk_offset; 1982 /* copy the item */ 1983 read_extent_buffer(leaf, p, 1984 item_off, item_len); 1985 *sk_offset += item_len; 1986 } 1987 (*num_found)++; 1988 1989 if (*num_found >= sk->nr_items) 1990 break; 1991 } 1992 advance_key: 1993 ret = 0; 1994 if (key->offset < (u64)-1 && key->offset < sk->max_offset) 1995 key->offset++; 1996 else if (key->type < (u8)-1 && key->type < sk->max_type) { 1997 key->offset = 0; 1998 key->type++; 1999 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { 2000 key->offset = 0; 2001 key->type = 0; 2002 key->objectid++; 2003 } else 2004 ret = 1; 2005 overflow: 2006 return ret; 2007 } 2008 2009 static noinline int search_ioctl(struct inode *inode, 2010 struct btrfs_ioctl_search_args *args) 2011 { 2012 struct btrfs_root *root; 2013 struct btrfs_key key; 2014 struct btrfs_path *path; 2015 struct btrfs_ioctl_search_key *sk = &args->key; 2016 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 2017 int ret; 2018 int num_found = 0; 2019 unsigned long sk_offset = 0; 2020 2021 path = btrfs_alloc_path(); 2022 if (!path) 2023 return -ENOMEM; 2024 2025 if (sk->tree_id == 0) { 2026 /* search the root of the inode that was passed */ 2027 root = BTRFS_I(inode)->root; 2028 } else { 2029 key.objectid = sk->tree_id; 2030 key.type = BTRFS_ROOT_ITEM_KEY; 2031 key.offset = (u64)-1; 2032 root = btrfs_read_fs_root_no_name(info, &key); 2033 if (IS_ERR(root)) { 2034 printk(KERN_ERR "BTRFS: could not find root %llu\n", 2035 sk->tree_id); 2036 btrfs_free_path(path); 2037 return -ENOENT; 2038 } 2039 } 2040 2041 key.objectid = sk->min_objectid; 2042 key.type = sk->min_type; 2043 key.offset = sk->min_offset; 2044 2045 path->keep_locks = 1; 2046 2047 while (1) { 2048 ret = btrfs_search_forward(root, &key, path, sk->min_transid); 2049 if (ret != 0) { 2050 if (ret > 0) 2051 ret = 0; 2052 goto err; 2053 } 2054 ret = copy_to_sk(root, path, &key, sk, args->buf, 2055 &sk_offset, &num_found); 2056 btrfs_release_path(path); 2057 if (ret || num_found >= sk->nr_items) 2058 break; 2059 2060 } 2061 ret = 0; 2062 err: 2063 sk->nr_items = num_found; 2064 btrfs_free_path(path); 2065 return ret; 2066 } 2067 2068 static noinline int btrfs_ioctl_tree_search(struct file *file, 2069 void __user *argp) 2070 { 2071 struct btrfs_ioctl_search_args *args; 2072 struct inode *inode; 2073 int ret; 2074 2075 if (!capable(CAP_SYS_ADMIN)) 2076 return -EPERM; 2077 2078 args = memdup_user(argp, sizeof(*args)); 2079 if (IS_ERR(args)) 2080 return PTR_ERR(args); 2081 2082 inode = file_inode(file); 2083 ret = search_ioctl(inode, args); 2084 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2085 ret = -EFAULT; 2086 kfree(args); 2087 return ret; 2088 } 2089 2090 /* 2091 * Search INODE_REFs to identify path name of 'dirid' directory 2092 * in a 'tree_id' tree. and sets path name to 'name'. 2093 */ 2094 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 2095 u64 tree_id, u64 dirid, char *name) 2096 { 2097 struct btrfs_root *root; 2098 struct btrfs_key key; 2099 char *ptr; 2100 int ret = -1; 2101 int slot; 2102 int len; 2103 int total_len = 0; 2104 struct btrfs_inode_ref *iref; 2105 struct extent_buffer *l; 2106 struct btrfs_path *path; 2107 2108 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 2109 name[0]='\0'; 2110 return 0; 2111 } 2112 2113 path = btrfs_alloc_path(); 2114 if (!path) 2115 return -ENOMEM; 2116 2117 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 2118 2119 key.objectid = tree_id; 2120 key.type = BTRFS_ROOT_ITEM_KEY; 2121 key.offset = (u64)-1; 2122 root = btrfs_read_fs_root_no_name(info, &key); 2123 if (IS_ERR(root)) { 2124 printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id); 2125 ret = -ENOENT; 2126 goto out; 2127 } 2128 2129 key.objectid = dirid; 2130 key.type = BTRFS_INODE_REF_KEY; 2131 key.offset = (u64)-1; 2132 2133 while (1) { 2134 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2135 if (ret < 0) 2136 goto out; 2137 else if (ret > 0) { 2138 ret = btrfs_previous_item(root, path, dirid, 2139 BTRFS_INODE_REF_KEY); 2140 if (ret < 0) 2141 goto out; 2142 else if (ret > 0) { 2143 ret = -ENOENT; 2144 goto out; 2145 } 2146 } 2147 2148 l = path->nodes[0]; 2149 slot = path->slots[0]; 2150 btrfs_item_key_to_cpu(l, &key, slot); 2151 2152 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2153 len = btrfs_inode_ref_name_len(l, iref); 2154 ptr -= len + 1; 2155 total_len += len + 1; 2156 if (ptr < name) { 2157 ret = -ENAMETOOLONG; 2158 goto out; 2159 } 2160 2161 *(ptr + len) = '/'; 2162 read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); 2163 2164 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2165 break; 2166 2167 btrfs_release_path(path); 2168 key.objectid = key.offset; 2169 key.offset = (u64)-1; 2170 dirid = key.objectid; 2171 } 2172 memmove(name, ptr, total_len); 2173 name[total_len] = '\0'; 2174 ret = 0; 2175 out: 2176 btrfs_free_path(path); 2177 return ret; 2178 } 2179 2180 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2181 void __user *argp) 2182 { 2183 struct btrfs_ioctl_ino_lookup_args *args; 2184 struct inode *inode; 2185 int ret; 2186 2187 if (!capable(CAP_SYS_ADMIN)) 2188 return -EPERM; 2189 2190 args = memdup_user(argp, sizeof(*args)); 2191 if (IS_ERR(args)) 2192 return PTR_ERR(args); 2193 2194 inode = file_inode(file); 2195 2196 if (args->treeid == 0) 2197 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2198 2199 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 2200 args->treeid, args->objectid, 2201 args->name); 2202 2203 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2204 ret = -EFAULT; 2205 2206 kfree(args); 2207 return ret; 2208 } 2209 2210 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2211 void __user *arg) 2212 { 2213 struct dentry *parent = file->f_path.dentry; 2214 struct dentry *dentry; 2215 struct inode *dir = parent->d_inode; 2216 struct inode *inode; 2217 struct btrfs_root *root = BTRFS_I(dir)->root; 2218 struct btrfs_root *dest = NULL; 2219 struct btrfs_ioctl_vol_args *vol_args; 2220 struct btrfs_trans_handle *trans; 2221 struct btrfs_block_rsv block_rsv; 2222 u64 qgroup_reserved; 2223 int namelen; 2224 int ret; 2225 int err = 0; 2226 2227 vol_args = memdup_user(arg, sizeof(*vol_args)); 2228 if (IS_ERR(vol_args)) 2229 return PTR_ERR(vol_args); 2230 2231 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2232 namelen = strlen(vol_args->name); 2233 if (strchr(vol_args->name, '/') || 2234 strncmp(vol_args->name, "..", namelen) == 0) { 2235 err = -EINVAL; 2236 goto out; 2237 } 2238 2239 err = mnt_want_write_file(file); 2240 if (err) 2241 goto out; 2242 2243 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 2244 if (err == -EINTR) 2245 goto out_drop_write; 2246 dentry = lookup_one_len(vol_args->name, parent, namelen); 2247 if (IS_ERR(dentry)) { 2248 err = PTR_ERR(dentry); 2249 goto out_unlock_dir; 2250 } 2251 2252 if (!dentry->d_inode) { 2253 err = -ENOENT; 2254 goto out_dput; 2255 } 2256 2257 inode = dentry->d_inode; 2258 dest = BTRFS_I(inode)->root; 2259 if (!capable(CAP_SYS_ADMIN)) { 2260 /* 2261 * Regular user. Only allow this with a special mount 2262 * option, when the user has write+exec access to the 2263 * subvol root, and when rmdir(2) would have been 2264 * allowed. 2265 * 2266 * Note that this is _not_ check that the subvol is 2267 * empty or doesn't contain data that we wouldn't 2268 * otherwise be able to delete. 2269 * 2270 * Users who want to delete empty subvols should try 2271 * rmdir(2). 2272 */ 2273 err = -EPERM; 2274 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 2275 goto out_dput; 2276 2277 /* 2278 * Do not allow deletion if the parent dir is the same 2279 * as the dir to be deleted. That means the ioctl 2280 * must be called on the dentry referencing the root 2281 * of the subvol, not a random directory contained 2282 * within it. 2283 */ 2284 err = -EINVAL; 2285 if (root == dest) 2286 goto out_dput; 2287 2288 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2289 if (err) 2290 goto out_dput; 2291 } 2292 2293 /* check if subvolume may be deleted by a user */ 2294 err = btrfs_may_delete(dir, dentry, 1); 2295 if (err) 2296 goto out_dput; 2297 2298 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2299 err = -EINVAL; 2300 goto out_dput; 2301 } 2302 2303 mutex_lock(&inode->i_mutex); 2304 err = d_invalidate(dentry); 2305 if (err) 2306 goto out_unlock; 2307 2308 down_write(&root->fs_info->subvol_sem); 2309 2310 err = may_destroy_subvol(dest); 2311 if (err) 2312 goto out_up_write; 2313 2314 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 2315 /* 2316 * One for dir inode, two for dir entries, two for root 2317 * ref/backref. 2318 */ 2319 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 2320 5, &qgroup_reserved, true); 2321 if (err) 2322 goto out_up_write; 2323 2324 trans = btrfs_start_transaction(root, 0); 2325 if (IS_ERR(trans)) { 2326 err = PTR_ERR(trans); 2327 goto out_release; 2328 } 2329 trans->block_rsv = &block_rsv; 2330 trans->bytes_reserved = block_rsv.size; 2331 2332 ret = btrfs_unlink_subvol(trans, root, dir, 2333 dest->root_key.objectid, 2334 dentry->d_name.name, 2335 dentry->d_name.len); 2336 if (ret) { 2337 err = ret; 2338 btrfs_abort_transaction(trans, root, ret); 2339 goto out_end_trans; 2340 } 2341 2342 btrfs_record_root_in_trans(trans, dest); 2343 2344 memset(&dest->root_item.drop_progress, 0, 2345 sizeof(dest->root_item.drop_progress)); 2346 dest->root_item.drop_level = 0; 2347 btrfs_set_root_refs(&dest->root_item, 0); 2348 2349 if (!xchg(&dest->orphan_item_inserted, 1)) { 2350 ret = btrfs_insert_orphan_item(trans, 2351 root->fs_info->tree_root, 2352 dest->root_key.objectid); 2353 if (ret) { 2354 btrfs_abort_transaction(trans, root, ret); 2355 err = ret; 2356 goto out_end_trans; 2357 } 2358 } 2359 2360 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 2361 dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, 2362 dest->root_key.objectid); 2363 if (ret && ret != -ENOENT) { 2364 btrfs_abort_transaction(trans, root, ret); 2365 err = ret; 2366 goto out_end_trans; 2367 } 2368 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { 2369 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 2370 dest->root_item.received_uuid, 2371 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 2372 dest->root_key.objectid); 2373 if (ret && ret != -ENOENT) { 2374 btrfs_abort_transaction(trans, root, ret); 2375 err = ret; 2376 goto out_end_trans; 2377 } 2378 } 2379 2380 out_end_trans: 2381 trans->block_rsv = NULL; 2382 trans->bytes_reserved = 0; 2383 ret = btrfs_end_transaction(trans, root); 2384 if (ret && !err) 2385 err = ret; 2386 inode->i_flags |= S_DEAD; 2387 out_release: 2388 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 2389 out_up_write: 2390 up_write(&root->fs_info->subvol_sem); 2391 out_unlock: 2392 mutex_unlock(&inode->i_mutex); 2393 if (!err) { 2394 shrink_dcache_sb(root->fs_info->sb); 2395 btrfs_invalidate_inodes(dest); 2396 d_delete(dentry); 2397 2398 /* the last ref */ 2399 if (dest->cache_inode) { 2400 iput(dest->cache_inode); 2401 dest->cache_inode = NULL; 2402 } 2403 } 2404 out_dput: 2405 dput(dentry); 2406 out_unlock_dir: 2407 mutex_unlock(&dir->i_mutex); 2408 out_drop_write: 2409 mnt_drop_write_file(file); 2410 out: 2411 kfree(vol_args); 2412 return err; 2413 } 2414 2415 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2416 { 2417 struct inode *inode = file_inode(file); 2418 struct btrfs_root *root = BTRFS_I(inode)->root; 2419 struct btrfs_ioctl_defrag_range_args *range; 2420 int ret; 2421 2422 ret = mnt_want_write_file(file); 2423 if (ret) 2424 return ret; 2425 2426 if (btrfs_root_readonly(root)) { 2427 ret = -EROFS; 2428 goto out; 2429 } 2430 2431 switch (inode->i_mode & S_IFMT) { 2432 case S_IFDIR: 2433 if (!capable(CAP_SYS_ADMIN)) { 2434 ret = -EPERM; 2435 goto out; 2436 } 2437 ret = btrfs_defrag_root(root); 2438 if (ret) 2439 goto out; 2440 ret = btrfs_defrag_root(root->fs_info->extent_root); 2441 break; 2442 case S_IFREG: 2443 if (!(file->f_mode & FMODE_WRITE)) { 2444 ret = -EINVAL; 2445 goto out; 2446 } 2447 2448 range = kzalloc(sizeof(*range), GFP_KERNEL); 2449 if (!range) { 2450 ret = -ENOMEM; 2451 goto out; 2452 } 2453 2454 if (argp) { 2455 if (copy_from_user(range, argp, 2456 sizeof(*range))) { 2457 ret = -EFAULT; 2458 kfree(range); 2459 goto out; 2460 } 2461 /* compression requires us to start the IO */ 2462 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 2463 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 2464 range->extent_thresh = (u32)-1; 2465 } 2466 } else { 2467 /* the rest are all set to zero by kzalloc */ 2468 range->len = (u64)-1; 2469 } 2470 ret = btrfs_defrag_file(file_inode(file), file, 2471 range, 0, 0); 2472 if (ret > 0) 2473 ret = 0; 2474 kfree(range); 2475 break; 2476 default: 2477 ret = -EINVAL; 2478 } 2479 out: 2480 mnt_drop_write_file(file); 2481 return ret; 2482 } 2483 2484 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) 2485 { 2486 struct btrfs_ioctl_vol_args *vol_args; 2487 int ret; 2488 2489 if (!capable(CAP_SYS_ADMIN)) 2490 return -EPERM; 2491 2492 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2493 1)) { 2494 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2495 } 2496 2497 mutex_lock(&root->fs_info->volume_mutex); 2498 vol_args = memdup_user(arg, sizeof(*vol_args)); 2499 if (IS_ERR(vol_args)) { 2500 ret = PTR_ERR(vol_args); 2501 goto out; 2502 } 2503 2504 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2505 ret = btrfs_init_new_device(root, vol_args->name); 2506 2507 kfree(vol_args); 2508 out: 2509 mutex_unlock(&root->fs_info->volume_mutex); 2510 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2511 return ret; 2512 } 2513 2514 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 2515 { 2516 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 2517 struct btrfs_ioctl_vol_args *vol_args; 2518 int ret; 2519 2520 if (!capable(CAP_SYS_ADMIN)) 2521 return -EPERM; 2522 2523 ret = mnt_want_write_file(file); 2524 if (ret) 2525 return ret; 2526 2527 vol_args = memdup_user(arg, sizeof(*vol_args)); 2528 if (IS_ERR(vol_args)) { 2529 ret = PTR_ERR(vol_args); 2530 goto out; 2531 } 2532 2533 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2534 2535 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2536 1)) { 2537 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2538 goto out; 2539 } 2540 2541 mutex_lock(&root->fs_info->volume_mutex); 2542 ret = btrfs_rm_device(root, vol_args->name); 2543 mutex_unlock(&root->fs_info->volume_mutex); 2544 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2545 2546 out: 2547 kfree(vol_args); 2548 mnt_drop_write_file(file); 2549 return ret; 2550 } 2551 2552 static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2553 { 2554 struct btrfs_ioctl_fs_info_args *fi_args; 2555 struct btrfs_device *device; 2556 struct btrfs_device *next; 2557 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2558 int ret = 0; 2559 2560 if (!capable(CAP_SYS_ADMIN)) 2561 return -EPERM; 2562 2563 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); 2564 if (!fi_args) 2565 return -ENOMEM; 2566 2567 mutex_lock(&fs_devices->device_list_mutex); 2568 fi_args->num_devices = fs_devices->num_devices; 2569 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); 2570 2571 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2572 if (device->devid > fi_args->max_id) 2573 fi_args->max_id = device->devid; 2574 } 2575 mutex_unlock(&fs_devices->device_list_mutex); 2576 2577 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2578 ret = -EFAULT; 2579 2580 kfree(fi_args); 2581 return ret; 2582 } 2583 2584 static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2585 { 2586 struct btrfs_ioctl_dev_info_args *di_args; 2587 struct btrfs_device *dev; 2588 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2589 int ret = 0; 2590 char *s_uuid = NULL; 2591 2592 if (!capable(CAP_SYS_ADMIN)) 2593 return -EPERM; 2594 2595 di_args = memdup_user(arg, sizeof(*di_args)); 2596 if (IS_ERR(di_args)) 2597 return PTR_ERR(di_args); 2598 2599 if (!btrfs_is_empty_uuid(di_args->uuid)) 2600 s_uuid = di_args->uuid; 2601 2602 mutex_lock(&fs_devices->device_list_mutex); 2603 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); 2604 2605 if (!dev) { 2606 ret = -ENODEV; 2607 goto out; 2608 } 2609 2610 di_args->devid = dev->devid; 2611 di_args->bytes_used = dev->bytes_used; 2612 di_args->total_bytes = dev->total_bytes; 2613 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2614 if (dev->name) { 2615 struct rcu_string *name; 2616 2617 rcu_read_lock(); 2618 name = rcu_dereference(dev->name); 2619 strncpy(di_args->path, name->str, sizeof(di_args->path)); 2620 rcu_read_unlock(); 2621 di_args->path[sizeof(di_args->path) - 1] = 0; 2622 } else { 2623 di_args->path[0] = '\0'; 2624 } 2625 2626 out: 2627 mutex_unlock(&fs_devices->device_list_mutex); 2628 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2629 ret = -EFAULT; 2630 2631 kfree(di_args); 2632 return ret; 2633 } 2634 2635 static struct page *extent_same_get_page(struct inode *inode, u64 off) 2636 { 2637 struct page *page; 2638 pgoff_t index; 2639 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2640 2641 index = off >> PAGE_CACHE_SHIFT; 2642 2643 page = grab_cache_page(inode->i_mapping, index); 2644 if (!page) 2645 return NULL; 2646 2647 if (!PageUptodate(page)) { 2648 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, 2649 0)) 2650 return NULL; 2651 lock_page(page); 2652 if (!PageUptodate(page)) { 2653 unlock_page(page); 2654 page_cache_release(page); 2655 return NULL; 2656 } 2657 } 2658 unlock_page(page); 2659 2660 return page; 2661 } 2662 2663 static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2664 { 2665 /* do any pending delalloc/csum calc on src, one way or 2666 another, and lock file content */ 2667 while (1) { 2668 struct btrfs_ordered_extent *ordered; 2669 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2670 ordered = btrfs_lookup_first_ordered_extent(inode, 2671 off + len - 1); 2672 if (!ordered && 2673 !test_range_bit(&BTRFS_I(inode)->io_tree, off, 2674 off + len - 1, EXTENT_DELALLOC, 0, NULL)) 2675 break; 2676 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2677 if (ordered) 2678 btrfs_put_ordered_extent(ordered); 2679 btrfs_wait_ordered_range(inode, off, len); 2680 } 2681 } 2682 2683 static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2684 struct inode *inode2, u64 loff2, u64 len) 2685 { 2686 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 2687 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2688 2689 mutex_unlock(&inode1->i_mutex); 2690 mutex_unlock(&inode2->i_mutex); 2691 } 2692 2693 static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2694 struct inode *inode2, u64 loff2, u64 len) 2695 { 2696 if (inode1 < inode2) { 2697 swap(inode1, inode2); 2698 swap(loff1, loff2); 2699 } 2700 2701 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 2702 lock_extent_range(inode1, loff1, len); 2703 if (inode1 != inode2) { 2704 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 2705 lock_extent_range(inode2, loff2, len); 2706 } 2707 } 2708 2709 static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2710 u64 dst_loff, u64 len) 2711 { 2712 int ret = 0; 2713 struct page *src_page, *dst_page; 2714 unsigned int cmp_len = PAGE_CACHE_SIZE; 2715 void *addr, *dst_addr; 2716 2717 while (len) { 2718 if (len < PAGE_CACHE_SIZE) 2719 cmp_len = len; 2720 2721 src_page = extent_same_get_page(src, loff); 2722 if (!src_page) 2723 return -EINVAL; 2724 dst_page = extent_same_get_page(dst, dst_loff); 2725 if (!dst_page) { 2726 page_cache_release(src_page); 2727 return -EINVAL; 2728 } 2729 addr = kmap_atomic(src_page); 2730 dst_addr = kmap_atomic(dst_page); 2731 2732 flush_dcache_page(src_page); 2733 flush_dcache_page(dst_page); 2734 2735 if (memcmp(addr, dst_addr, cmp_len)) 2736 ret = BTRFS_SAME_DATA_DIFFERS; 2737 2738 kunmap_atomic(addr); 2739 kunmap_atomic(dst_addr); 2740 page_cache_release(src_page); 2741 page_cache_release(dst_page); 2742 2743 if (ret) 2744 break; 2745 2746 loff += cmp_len; 2747 dst_loff += cmp_len; 2748 len -= cmp_len; 2749 } 2750 2751 return ret; 2752 } 2753 2754 static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) 2755 { 2756 u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; 2757 2758 if (off + len > inode->i_size || off + len < off) 2759 return -EINVAL; 2760 /* Check that we are block aligned - btrfs_clone() requires this */ 2761 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) 2762 return -EINVAL; 2763 2764 return 0; 2765 } 2766 2767 static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, 2768 struct inode *dst, u64 dst_loff) 2769 { 2770 int ret; 2771 2772 /* 2773 * btrfs_clone() can't handle extents in the same file 2774 * yet. Once that works, we can drop this check and replace it 2775 * with a check for the same inode, but overlapping extents. 2776 */ 2777 if (src == dst) 2778 return -EINVAL; 2779 2780 btrfs_double_lock(src, loff, dst, dst_loff, len); 2781 2782 ret = extent_same_check_offsets(src, loff, len); 2783 if (ret) 2784 goto out_unlock; 2785 2786 ret = extent_same_check_offsets(dst, dst_loff, len); 2787 if (ret) 2788 goto out_unlock; 2789 2790 /* don't make the dst file partly checksummed */ 2791 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 2792 (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { 2793 ret = -EINVAL; 2794 goto out_unlock; 2795 } 2796 2797 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); 2798 if (ret == 0) 2799 ret = btrfs_clone(src, dst, loff, len, len, dst_loff); 2800 2801 out_unlock: 2802 btrfs_double_unlock(src, loff, dst, dst_loff, len); 2803 2804 return ret; 2805 } 2806 2807 #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 2808 2809 static long btrfs_ioctl_file_extent_same(struct file *file, 2810 struct btrfs_ioctl_same_args __user *argp) 2811 { 2812 struct btrfs_ioctl_same_args *same; 2813 struct btrfs_ioctl_same_extent_info *info; 2814 struct inode *src = file_inode(file); 2815 u64 off; 2816 u64 len; 2817 int i; 2818 int ret; 2819 unsigned long size; 2820 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2821 bool is_admin = capable(CAP_SYS_ADMIN); 2822 u16 count; 2823 2824 if (!(file->f_mode & FMODE_READ)) 2825 return -EINVAL; 2826 2827 ret = mnt_want_write_file(file); 2828 if (ret) 2829 return ret; 2830 2831 if (get_user(count, &argp->dest_count)) { 2832 ret = -EFAULT; 2833 goto out; 2834 } 2835 2836 size = offsetof(struct btrfs_ioctl_same_args __user, info[count]); 2837 2838 same = memdup_user(argp, size); 2839 2840 if (IS_ERR(same)) { 2841 ret = PTR_ERR(same); 2842 goto out; 2843 } 2844 2845 off = same->logical_offset; 2846 len = same->length; 2847 2848 /* 2849 * Limit the total length we will dedupe for each operation. 2850 * This is intended to bound the total time spent in this 2851 * ioctl to something sane. 2852 */ 2853 if (len > BTRFS_MAX_DEDUPE_LEN) 2854 len = BTRFS_MAX_DEDUPE_LEN; 2855 2856 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { 2857 /* 2858 * Btrfs does not support blocksize < page_size. As a 2859 * result, btrfs_cmp_data() won't correctly handle 2860 * this situation without an update. 2861 */ 2862 ret = -EINVAL; 2863 goto out; 2864 } 2865 2866 ret = -EISDIR; 2867 if (S_ISDIR(src->i_mode)) 2868 goto out; 2869 2870 ret = -EACCES; 2871 if (!S_ISREG(src->i_mode)) 2872 goto out; 2873 2874 /* pre-format output fields to sane values */ 2875 for (i = 0; i < count; i++) { 2876 same->info[i].bytes_deduped = 0ULL; 2877 same->info[i].status = 0; 2878 } 2879 2880 for (i = 0, info = same->info; i < count; i++, info++) { 2881 struct inode *dst; 2882 struct fd dst_file = fdget(info->fd); 2883 if (!dst_file.file) { 2884 info->status = -EBADF; 2885 continue; 2886 } 2887 dst = file_inode(dst_file.file); 2888 2889 if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) { 2890 info->status = -EINVAL; 2891 } else if (file->f_path.mnt != dst_file.file->f_path.mnt) { 2892 info->status = -EXDEV; 2893 } else if (S_ISDIR(dst->i_mode)) { 2894 info->status = -EISDIR; 2895 } else if (!S_ISREG(dst->i_mode)) { 2896 info->status = -EACCES; 2897 } else { 2898 info->status = btrfs_extent_same(src, off, len, dst, 2899 info->logical_offset); 2900 if (info->status == 0) 2901 info->bytes_deduped += len; 2902 } 2903 fdput(dst_file); 2904 } 2905 2906 ret = copy_to_user(argp, same, size); 2907 if (ret) 2908 ret = -EFAULT; 2909 2910 out: 2911 mnt_drop_write_file(file); 2912 return ret; 2913 } 2914 2915 /** 2916 * btrfs_clone() - clone a range from inode file to another 2917 * 2918 * @src: Inode to clone from 2919 * @inode: Inode to clone to 2920 * @off: Offset within source to start clone from 2921 * @olen: Original length, passed by user, of range to clone 2922 * @olen_aligned: Block-aligned value of olen, extent_same uses 2923 * identical values here 2924 * @destoff: Offset within @inode to start clone 2925 */ 2926 static int btrfs_clone(struct inode *src, struct inode *inode, 2927 u64 off, u64 olen, u64 olen_aligned, u64 destoff) 2928 { 2929 struct btrfs_root *root = BTRFS_I(inode)->root; 2930 struct btrfs_path *path = NULL; 2931 struct extent_buffer *leaf; 2932 struct btrfs_trans_handle *trans; 2933 char *buf = NULL; 2934 struct btrfs_key key; 2935 u32 nritems; 2936 int slot; 2937 int ret; 2938 u64 len = olen_aligned; 2939 2940 ret = -ENOMEM; 2941 buf = vmalloc(btrfs_level_size(root, 0)); 2942 if (!buf) 2943 return ret; 2944 2945 path = btrfs_alloc_path(); 2946 if (!path) { 2947 vfree(buf); 2948 return ret; 2949 } 2950 2951 path->reada = 2; 2952 /* clone data */ 2953 key.objectid = btrfs_ino(src); 2954 key.type = BTRFS_EXTENT_DATA_KEY; 2955 key.offset = 0; 2956 2957 while (1) { 2958 /* 2959 * note the key will change type as we walk through the 2960 * tree. 2961 */ 2962 path->leave_spinning = 1; 2963 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 2964 0, 0); 2965 if (ret < 0) 2966 goto out; 2967 2968 nritems = btrfs_header_nritems(path->nodes[0]); 2969 process_slot: 2970 if (path->slots[0] >= nritems) { 2971 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 2972 if (ret < 0) 2973 goto out; 2974 if (ret > 0) 2975 break; 2976 nritems = btrfs_header_nritems(path->nodes[0]); 2977 } 2978 leaf = path->nodes[0]; 2979 slot = path->slots[0]; 2980 2981 btrfs_item_key_to_cpu(leaf, &key, slot); 2982 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2983 key.objectid != btrfs_ino(src)) 2984 break; 2985 2986 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2987 struct btrfs_file_extent_item *extent; 2988 int type; 2989 u32 size; 2990 struct btrfs_key new_key; 2991 u64 disko = 0, diskl = 0; 2992 u64 datao = 0, datal = 0; 2993 u8 comp; 2994 u64 endoff; 2995 2996 extent = btrfs_item_ptr(leaf, slot, 2997 struct btrfs_file_extent_item); 2998 comp = btrfs_file_extent_compression(leaf, extent); 2999 type = btrfs_file_extent_type(leaf, extent); 3000 if (type == BTRFS_FILE_EXTENT_REG || 3001 type == BTRFS_FILE_EXTENT_PREALLOC) { 3002 disko = btrfs_file_extent_disk_bytenr(leaf, 3003 extent); 3004 diskl = btrfs_file_extent_disk_num_bytes(leaf, 3005 extent); 3006 datao = btrfs_file_extent_offset(leaf, extent); 3007 datal = btrfs_file_extent_num_bytes(leaf, 3008 extent); 3009 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3010 /* take upper bound, may be compressed */ 3011 datal = btrfs_file_extent_ram_bytes(leaf, 3012 extent); 3013 } 3014 3015 if (key.offset + datal <= off || 3016 key.offset >= off + len - 1) { 3017 path->slots[0]++; 3018 goto process_slot; 3019 } 3020 3021 size = btrfs_item_size_nr(leaf, slot); 3022 read_extent_buffer(leaf, buf, 3023 btrfs_item_ptr_offset(leaf, slot), 3024 size); 3025 3026 btrfs_release_path(path); 3027 path->leave_spinning = 0; 3028 3029 memcpy(&new_key, &key, sizeof(new_key)); 3030 new_key.objectid = btrfs_ino(inode); 3031 if (off <= key.offset) 3032 new_key.offset = key.offset + destoff - off; 3033 else 3034 new_key.offset = destoff; 3035 3036 /* 3037 * 1 - adjusting old extent (we may have to split it) 3038 * 1 - add new extent 3039 * 1 - inode update 3040 */ 3041 trans = btrfs_start_transaction(root, 3); 3042 if (IS_ERR(trans)) { 3043 ret = PTR_ERR(trans); 3044 goto out; 3045 } 3046 3047 if (type == BTRFS_FILE_EXTENT_REG || 3048 type == BTRFS_FILE_EXTENT_PREALLOC) { 3049 /* 3050 * a | --- range to clone ---| b 3051 * | ------------- extent ------------- | 3052 */ 3053 3054 /* substract range b */ 3055 if (key.offset + datal > off + len) 3056 datal = off + len - key.offset; 3057 3058 /* substract range a */ 3059 if (off > key.offset) { 3060 datao += off - key.offset; 3061 datal -= off - key.offset; 3062 } 3063 3064 ret = btrfs_drop_extents(trans, root, inode, 3065 new_key.offset, 3066 new_key.offset + datal, 3067 1); 3068 if (ret) { 3069 if (ret != -EOPNOTSUPP) 3070 btrfs_abort_transaction(trans, 3071 root, ret); 3072 btrfs_end_transaction(trans, root); 3073 goto out; 3074 } 3075 3076 ret = btrfs_insert_empty_item(trans, root, path, 3077 &new_key, size); 3078 if (ret) { 3079 btrfs_abort_transaction(trans, root, 3080 ret); 3081 btrfs_end_transaction(trans, root); 3082 goto out; 3083 } 3084 3085 leaf = path->nodes[0]; 3086 slot = path->slots[0]; 3087 write_extent_buffer(leaf, buf, 3088 btrfs_item_ptr_offset(leaf, slot), 3089 size); 3090 3091 extent = btrfs_item_ptr(leaf, slot, 3092 struct btrfs_file_extent_item); 3093 3094 /* disko == 0 means it's a hole */ 3095 if (!disko) 3096 datao = 0; 3097 3098 btrfs_set_file_extent_offset(leaf, extent, 3099 datao); 3100 btrfs_set_file_extent_num_bytes(leaf, extent, 3101 datal); 3102 if (disko) { 3103 inode_add_bytes(inode, datal); 3104 ret = btrfs_inc_extent_ref(trans, root, 3105 disko, diskl, 0, 3106 root->root_key.objectid, 3107 btrfs_ino(inode), 3108 new_key.offset - datao, 3109 0); 3110 if (ret) { 3111 btrfs_abort_transaction(trans, 3112 root, 3113 ret); 3114 btrfs_end_transaction(trans, 3115 root); 3116 goto out; 3117 3118 } 3119 } 3120 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3121 u64 skip = 0; 3122 u64 trim = 0; 3123 u64 aligned_end = 0; 3124 3125 if (off > key.offset) { 3126 skip = off - key.offset; 3127 new_key.offset += skip; 3128 } 3129 3130 if (key.offset + datal > off + len) 3131 trim = key.offset + datal - (off + len); 3132 3133 if (comp && (skip || trim)) { 3134 ret = -EINVAL; 3135 btrfs_end_transaction(trans, root); 3136 goto out; 3137 } 3138 size -= skip + trim; 3139 datal -= skip + trim; 3140 3141 aligned_end = ALIGN(new_key.offset + datal, 3142 root->sectorsize); 3143 ret = btrfs_drop_extents(trans, root, inode, 3144 new_key.offset, 3145 aligned_end, 3146 1); 3147 if (ret) { 3148 if (ret != -EOPNOTSUPP) 3149 btrfs_abort_transaction(trans, 3150 root, ret); 3151 btrfs_end_transaction(trans, root); 3152 goto out; 3153 } 3154 3155 ret = btrfs_insert_empty_item(trans, root, path, 3156 &new_key, size); 3157 if (ret) { 3158 btrfs_abort_transaction(trans, root, 3159 ret); 3160 btrfs_end_transaction(trans, root); 3161 goto out; 3162 } 3163 3164 if (skip) { 3165 u32 start = 3166 btrfs_file_extent_calc_inline_size(0); 3167 memmove(buf+start, buf+start+skip, 3168 datal); 3169 } 3170 3171 leaf = path->nodes[0]; 3172 slot = path->slots[0]; 3173 write_extent_buffer(leaf, buf, 3174 btrfs_item_ptr_offset(leaf, slot), 3175 size); 3176 inode_add_bytes(inode, datal); 3177 } 3178 3179 btrfs_mark_buffer_dirty(leaf); 3180 btrfs_release_path(path); 3181 3182 inode_inc_iversion(inode); 3183 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3184 3185 /* 3186 * we round up to the block size at eof when 3187 * determining which extents to clone above, 3188 * but shouldn't round up the file size 3189 */ 3190 endoff = new_key.offset + datal; 3191 if (endoff > destoff+olen) 3192 endoff = destoff+olen; 3193 if (endoff > inode->i_size) 3194 btrfs_i_size_write(inode, endoff); 3195 3196 ret = btrfs_update_inode(trans, root, inode); 3197 if (ret) { 3198 btrfs_abort_transaction(trans, root, ret); 3199 btrfs_end_transaction(trans, root); 3200 goto out; 3201 } 3202 ret = btrfs_end_transaction(trans, root); 3203 } 3204 btrfs_release_path(path); 3205 key.offset++; 3206 } 3207 ret = 0; 3208 3209 out: 3210 btrfs_release_path(path); 3211 btrfs_free_path(path); 3212 vfree(buf); 3213 return ret; 3214 } 3215 3216 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3217 u64 off, u64 olen, u64 destoff) 3218 { 3219 struct inode *inode = file_inode(file); 3220 struct btrfs_root *root = BTRFS_I(inode)->root; 3221 struct fd src_file; 3222 struct inode *src; 3223 int ret; 3224 u64 len = olen; 3225 u64 bs = root->fs_info->sb->s_blocksize; 3226 int same_inode = 0; 3227 3228 /* 3229 * TODO: 3230 * - split compressed inline extents. annoying: we need to 3231 * decompress into destination's address_space (the file offset 3232 * may change, so source mapping won't do), then recompress (or 3233 * otherwise reinsert) a subrange. 3234 * 3235 * - split destination inode's inline extents. The inline extents can 3236 * be either compressed or non-compressed. 3237 */ 3238 3239 /* the destination must be opened for writing */ 3240 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 3241 return -EINVAL; 3242 3243 if (btrfs_root_readonly(root)) 3244 return -EROFS; 3245 3246 ret = mnt_want_write_file(file); 3247 if (ret) 3248 return ret; 3249 3250 src_file = fdget(srcfd); 3251 if (!src_file.file) { 3252 ret = -EBADF; 3253 goto out_drop_write; 3254 } 3255 3256 ret = -EXDEV; 3257 if (src_file.file->f_path.mnt != file->f_path.mnt) 3258 goto out_fput; 3259 3260 src = file_inode(src_file.file); 3261 3262 ret = -EINVAL; 3263 if (src == inode) 3264 same_inode = 1; 3265 3266 /* the src must be open for reading */ 3267 if (!(src_file.file->f_mode & FMODE_READ)) 3268 goto out_fput; 3269 3270 /* don't make the dst file partly checksummed */ 3271 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3272 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 3273 goto out_fput; 3274 3275 ret = -EISDIR; 3276 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3277 goto out_fput; 3278 3279 ret = -EXDEV; 3280 if (src->i_sb != inode->i_sb) 3281 goto out_fput; 3282 3283 if (!same_inode) { 3284 if (inode < src) { 3285 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 3286 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 3287 } else { 3288 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 3289 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 3290 } 3291 } else { 3292 mutex_lock(&src->i_mutex); 3293 } 3294 3295 /* determine range to clone */ 3296 ret = -EINVAL; 3297 if (off + len > src->i_size || off + len < off) 3298 goto out_unlock; 3299 if (len == 0) 3300 olen = len = src->i_size - off; 3301 /* if we extend to eof, continue to block boundary */ 3302 if (off + len == src->i_size) 3303 len = ALIGN(src->i_size, bs) - off; 3304 3305 /* verify the end result is block aligned */ 3306 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3307 !IS_ALIGNED(destoff, bs)) 3308 goto out_unlock; 3309 3310 /* verify if ranges are overlapped within the same file */ 3311 if (same_inode) { 3312 if (destoff + len > off && destoff < off + len) 3313 goto out_unlock; 3314 } 3315 3316 if (destoff > inode->i_size) { 3317 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3318 if (ret) 3319 goto out_unlock; 3320 } 3321 3322 /* truncate page cache pages from target inode range */ 3323 truncate_inode_pages_range(&inode->i_data, destoff, 3324 PAGE_CACHE_ALIGN(destoff + len) - 1); 3325 3326 lock_extent_range(src, off, len); 3327 3328 ret = btrfs_clone(src, inode, off, olen, len, destoff); 3329 3330 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3331 out_unlock: 3332 if (!same_inode) { 3333 if (inode < src) { 3334 mutex_unlock(&src->i_mutex); 3335 mutex_unlock(&inode->i_mutex); 3336 } else { 3337 mutex_unlock(&inode->i_mutex); 3338 mutex_unlock(&src->i_mutex); 3339 } 3340 } else { 3341 mutex_unlock(&src->i_mutex); 3342 } 3343 out_fput: 3344 fdput(src_file); 3345 out_drop_write: 3346 mnt_drop_write_file(file); 3347 return ret; 3348 } 3349 3350 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 3351 { 3352 struct btrfs_ioctl_clone_range_args args; 3353 3354 if (copy_from_user(&args, argp, sizeof(args))) 3355 return -EFAULT; 3356 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 3357 args.src_length, args.dest_offset); 3358 } 3359 3360 /* 3361 * there are many ways the trans_start and trans_end ioctls can lead 3362 * to deadlocks. They should only be used by applications that 3363 * basically own the machine, and have a very in depth understanding 3364 * of all the possible deadlocks and enospc problems. 3365 */ 3366 static long btrfs_ioctl_trans_start(struct file *file) 3367 { 3368 struct inode *inode = file_inode(file); 3369 struct btrfs_root *root = BTRFS_I(inode)->root; 3370 struct btrfs_trans_handle *trans; 3371 int ret; 3372 3373 ret = -EPERM; 3374 if (!capable(CAP_SYS_ADMIN)) 3375 goto out; 3376 3377 ret = -EINPROGRESS; 3378 if (file->private_data) 3379 goto out; 3380 3381 ret = -EROFS; 3382 if (btrfs_root_readonly(root)) 3383 goto out; 3384 3385 ret = mnt_want_write_file(file); 3386 if (ret) 3387 goto out; 3388 3389 atomic_inc(&root->fs_info->open_ioctl_trans); 3390 3391 ret = -ENOMEM; 3392 trans = btrfs_start_ioctl_transaction(root); 3393 if (IS_ERR(trans)) 3394 goto out_drop; 3395 3396 file->private_data = trans; 3397 return 0; 3398 3399 out_drop: 3400 atomic_dec(&root->fs_info->open_ioctl_trans); 3401 mnt_drop_write_file(file); 3402 out: 3403 return ret; 3404 } 3405 3406 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 3407 { 3408 struct inode *inode = file_inode(file); 3409 struct btrfs_root *root = BTRFS_I(inode)->root; 3410 struct btrfs_root *new_root; 3411 struct btrfs_dir_item *di; 3412 struct btrfs_trans_handle *trans; 3413 struct btrfs_path *path; 3414 struct btrfs_key location; 3415 struct btrfs_disk_key disk_key; 3416 u64 objectid = 0; 3417 u64 dir_id; 3418 int ret; 3419 3420 if (!capable(CAP_SYS_ADMIN)) 3421 return -EPERM; 3422 3423 ret = mnt_want_write_file(file); 3424 if (ret) 3425 return ret; 3426 3427 if (copy_from_user(&objectid, argp, sizeof(objectid))) { 3428 ret = -EFAULT; 3429 goto out; 3430 } 3431 3432 if (!objectid) 3433 objectid = BTRFS_FS_TREE_OBJECTID; 3434 3435 location.objectid = objectid; 3436 location.type = BTRFS_ROOT_ITEM_KEY; 3437 location.offset = (u64)-1; 3438 3439 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 3440 if (IS_ERR(new_root)) { 3441 ret = PTR_ERR(new_root); 3442 goto out; 3443 } 3444 3445 path = btrfs_alloc_path(); 3446 if (!path) { 3447 ret = -ENOMEM; 3448 goto out; 3449 } 3450 path->leave_spinning = 1; 3451 3452 trans = btrfs_start_transaction(root, 1); 3453 if (IS_ERR(trans)) { 3454 btrfs_free_path(path); 3455 ret = PTR_ERR(trans); 3456 goto out; 3457 } 3458 3459 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 3460 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 3461 dir_id, "default", 7, 1); 3462 if (IS_ERR_OR_NULL(di)) { 3463 btrfs_free_path(path); 3464 btrfs_end_transaction(trans, root); 3465 btrfs_err(new_root->fs_info, "Umm, you don't have the default dir" 3466 "item, this isn't going to work"); 3467 ret = -ENOENT; 3468 goto out; 3469 } 3470 3471 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 3472 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 3473 btrfs_mark_buffer_dirty(path->nodes[0]); 3474 btrfs_free_path(path); 3475 3476 btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); 3477 btrfs_end_transaction(trans, root); 3478 out: 3479 mnt_drop_write_file(file); 3480 return ret; 3481 } 3482 3483 void btrfs_get_block_group_info(struct list_head *groups_list, 3484 struct btrfs_ioctl_space_info *space) 3485 { 3486 struct btrfs_block_group_cache *block_group; 3487 3488 space->total_bytes = 0; 3489 space->used_bytes = 0; 3490 space->flags = 0; 3491 list_for_each_entry(block_group, groups_list, list) { 3492 space->flags = block_group->flags; 3493 space->total_bytes += block_group->key.offset; 3494 space->used_bytes += 3495 btrfs_block_group_used(&block_group->item); 3496 } 3497 } 3498 3499 static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 3500 { 3501 struct btrfs_ioctl_space_args space_args; 3502 struct btrfs_ioctl_space_info space; 3503 struct btrfs_ioctl_space_info *dest; 3504 struct btrfs_ioctl_space_info *dest_orig; 3505 struct btrfs_ioctl_space_info __user *user_dest; 3506 struct btrfs_space_info *info; 3507 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 3508 BTRFS_BLOCK_GROUP_SYSTEM, 3509 BTRFS_BLOCK_GROUP_METADATA, 3510 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 3511 int num_types = 4; 3512 int alloc_size; 3513 int ret = 0; 3514 u64 slot_count = 0; 3515 int i, c; 3516 3517 if (copy_from_user(&space_args, 3518 (struct btrfs_ioctl_space_args __user *)arg, 3519 sizeof(space_args))) 3520 return -EFAULT; 3521 3522 for (i = 0; i < num_types; i++) { 3523 struct btrfs_space_info *tmp; 3524 3525 info = NULL; 3526 rcu_read_lock(); 3527 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 3528 list) { 3529 if (tmp->flags == types[i]) { 3530 info = tmp; 3531 break; 3532 } 3533 } 3534 rcu_read_unlock(); 3535 3536 if (!info) 3537 continue; 3538 3539 down_read(&info->groups_sem); 3540 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3541 if (!list_empty(&info->block_groups[c])) 3542 slot_count++; 3543 } 3544 up_read(&info->groups_sem); 3545 } 3546 3547 /* 3548 * Global block reserve, exported as a space_info 3549 */ 3550 slot_count++; 3551 3552 /* space_slots == 0 means they are asking for a count */ 3553 if (space_args.space_slots == 0) { 3554 space_args.total_spaces = slot_count; 3555 goto out; 3556 } 3557 3558 slot_count = min_t(u64, space_args.space_slots, slot_count); 3559 3560 alloc_size = sizeof(*dest) * slot_count; 3561 3562 /* we generally have at most 6 or so space infos, one for each raid 3563 * level. So, a whole page should be more than enough for everyone 3564 */ 3565 if (alloc_size > PAGE_CACHE_SIZE) 3566 return -ENOMEM; 3567 3568 space_args.total_spaces = 0; 3569 dest = kmalloc(alloc_size, GFP_NOFS); 3570 if (!dest) 3571 return -ENOMEM; 3572 dest_orig = dest; 3573 3574 /* now we have a buffer to copy into */ 3575 for (i = 0; i < num_types; i++) { 3576 struct btrfs_space_info *tmp; 3577 3578 if (!slot_count) 3579 break; 3580 3581 info = NULL; 3582 rcu_read_lock(); 3583 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 3584 list) { 3585 if (tmp->flags == types[i]) { 3586 info = tmp; 3587 break; 3588 } 3589 } 3590 rcu_read_unlock(); 3591 3592 if (!info) 3593 continue; 3594 down_read(&info->groups_sem); 3595 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3596 if (!list_empty(&info->block_groups[c])) { 3597 btrfs_get_block_group_info( 3598 &info->block_groups[c], &space); 3599 memcpy(dest, &space, sizeof(space)); 3600 dest++; 3601 space_args.total_spaces++; 3602 slot_count--; 3603 } 3604 if (!slot_count) 3605 break; 3606 } 3607 up_read(&info->groups_sem); 3608 } 3609 3610 /* 3611 * Add global block reserve 3612 */ 3613 if (slot_count) { 3614 struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv; 3615 3616 spin_lock(&block_rsv->lock); 3617 space.total_bytes = block_rsv->size; 3618 space.used_bytes = block_rsv->size - block_rsv->reserved; 3619 spin_unlock(&block_rsv->lock); 3620 space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; 3621 memcpy(dest, &space, sizeof(space)); 3622 space_args.total_spaces++; 3623 } 3624 3625 user_dest = (struct btrfs_ioctl_space_info __user *) 3626 (arg + sizeof(struct btrfs_ioctl_space_args)); 3627 3628 if (copy_to_user(user_dest, dest_orig, alloc_size)) 3629 ret = -EFAULT; 3630 3631 kfree(dest_orig); 3632 out: 3633 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 3634 ret = -EFAULT; 3635 3636 return ret; 3637 } 3638 3639 /* 3640 * there are many ways the trans_start and trans_end ioctls can lead 3641 * to deadlocks. They should only be used by applications that 3642 * basically own the machine, and have a very in depth understanding 3643 * of all the possible deadlocks and enospc problems. 3644 */ 3645 long btrfs_ioctl_trans_end(struct file *file) 3646 { 3647 struct inode *inode = file_inode(file); 3648 struct btrfs_root *root = BTRFS_I(inode)->root; 3649 struct btrfs_trans_handle *trans; 3650 3651 trans = file->private_data; 3652 if (!trans) 3653 return -EINVAL; 3654 file->private_data = NULL; 3655 3656 btrfs_end_transaction(trans, root); 3657 3658 atomic_dec(&root->fs_info->open_ioctl_trans); 3659 3660 mnt_drop_write_file(file); 3661 return 0; 3662 } 3663 3664 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, 3665 void __user *argp) 3666 { 3667 struct btrfs_trans_handle *trans; 3668 u64 transid; 3669 int ret; 3670 3671 trans = btrfs_attach_transaction_barrier(root); 3672 if (IS_ERR(trans)) { 3673 if (PTR_ERR(trans) != -ENOENT) 3674 return PTR_ERR(trans); 3675 3676 /* No running transaction, don't bother */ 3677 transid = root->fs_info->last_trans_committed; 3678 goto out; 3679 } 3680 transid = trans->transid; 3681 ret = btrfs_commit_transaction_async(trans, root, 0); 3682 if (ret) { 3683 btrfs_end_transaction(trans, root); 3684 return ret; 3685 } 3686 out: 3687 if (argp) 3688 if (copy_to_user(argp, &transid, sizeof(transid))) 3689 return -EFAULT; 3690 return 0; 3691 } 3692 3693 static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, 3694 void __user *argp) 3695 { 3696 u64 transid; 3697 3698 if (argp) { 3699 if (copy_from_user(&transid, argp, sizeof(transid))) 3700 return -EFAULT; 3701 } else { 3702 transid = 0; /* current trans */ 3703 } 3704 return btrfs_wait_for_commit(root, transid); 3705 } 3706 3707 static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 3708 { 3709 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 3710 struct btrfs_ioctl_scrub_args *sa; 3711 int ret; 3712 3713 if (!capable(CAP_SYS_ADMIN)) 3714 return -EPERM; 3715 3716 sa = memdup_user(arg, sizeof(*sa)); 3717 if (IS_ERR(sa)) 3718 return PTR_ERR(sa); 3719 3720 if (!(sa->flags & BTRFS_SCRUB_READONLY)) { 3721 ret = mnt_want_write_file(file); 3722 if (ret) 3723 goto out; 3724 } 3725 3726 ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, 3727 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 3728 0); 3729 3730 if (copy_to_user(arg, sa, sizeof(*sa))) 3731 ret = -EFAULT; 3732 3733 if (!(sa->flags & BTRFS_SCRUB_READONLY)) 3734 mnt_drop_write_file(file); 3735 out: 3736 kfree(sa); 3737 return ret; 3738 } 3739 3740 static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) 3741 { 3742 if (!capable(CAP_SYS_ADMIN)) 3743 return -EPERM; 3744 3745 return btrfs_scrub_cancel(root->fs_info); 3746 } 3747 3748 static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, 3749 void __user *arg) 3750 { 3751 struct btrfs_ioctl_scrub_args *sa; 3752 int ret; 3753 3754 if (!capable(CAP_SYS_ADMIN)) 3755 return -EPERM; 3756 3757 sa = memdup_user(arg, sizeof(*sa)); 3758 if (IS_ERR(sa)) 3759 return PTR_ERR(sa); 3760 3761 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress); 3762 3763 if (copy_to_user(arg, sa, sizeof(*sa))) 3764 ret = -EFAULT; 3765 3766 kfree(sa); 3767 return ret; 3768 } 3769 3770 static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, 3771 void __user *arg) 3772 { 3773 struct btrfs_ioctl_get_dev_stats *sa; 3774 int ret; 3775 3776 sa = memdup_user(arg, sizeof(*sa)); 3777 if (IS_ERR(sa)) 3778 return PTR_ERR(sa); 3779 3780 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 3781 kfree(sa); 3782 return -EPERM; 3783 } 3784 3785 ret = btrfs_get_dev_stats(root, sa); 3786 3787 if (copy_to_user(arg, sa, sizeof(*sa))) 3788 ret = -EFAULT; 3789 3790 kfree(sa); 3791 return ret; 3792 } 3793 3794 static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) 3795 { 3796 struct btrfs_ioctl_dev_replace_args *p; 3797 int ret; 3798 3799 if (!capable(CAP_SYS_ADMIN)) 3800 return -EPERM; 3801 3802 p = memdup_user(arg, sizeof(*p)); 3803 if (IS_ERR(p)) 3804 return PTR_ERR(p); 3805 3806 switch (p->cmd) { 3807 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3808 if (root->fs_info->sb->s_flags & MS_RDONLY) { 3809 ret = -EROFS; 3810 goto out; 3811 } 3812 if (atomic_xchg( 3813 &root->fs_info->mutually_exclusive_operation_running, 3814 1)) { 3815 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3816 } else { 3817 ret = btrfs_dev_replace_start(root, p); 3818 atomic_set( 3819 &root->fs_info->mutually_exclusive_operation_running, 3820 0); 3821 } 3822 break; 3823 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: 3824 btrfs_dev_replace_status(root->fs_info, p); 3825 ret = 0; 3826 break; 3827 case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: 3828 ret = btrfs_dev_replace_cancel(root->fs_info, p); 3829 break; 3830 default: 3831 ret = -EINVAL; 3832 break; 3833 } 3834 3835 if (copy_to_user(arg, p, sizeof(*p))) 3836 ret = -EFAULT; 3837 out: 3838 kfree(p); 3839 return ret; 3840 } 3841 3842 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3843 { 3844 int ret = 0; 3845 int i; 3846 u64 rel_ptr; 3847 int size; 3848 struct btrfs_ioctl_ino_path_args *ipa = NULL; 3849 struct inode_fs_paths *ipath = NULL; 3850 struct btrfs_path *path; 3851 3852 if (!capable(CAP_DAC_READ_SEARCH)) 3853 return -EPERM; 3854 3855 path = btrfs_alloc_path(); 3856 if (!path) { 3857 ret = -ENOMEM; 3858 goto out; 3859 } 3860 3861 ipa = memdup_user(arg, sizeof(*ipa)); 3862 if (IS_ERR(ipa)) { 3863 ret = PTR_ERR(ipa); 3864 ipa = NULL; 3865 goto out; 3866 } 3867 3868 size = min_t(u32, ipa->size, 4096); 3869 ipath = init_ipath(size, root, path); 3870 if (IS_ERR(ipath)) { 3871 ret = PTR_ERR(ipath); 3872 ipath = NULL; 3873 goto out; 3874 } 3875 3876 ret = paths_from_inode(ipa->inum, ipath); 3877 if (ret < 0) 3878 goto out; 3879 3880 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 3881 rel_ptr = ipath->fspath->val[i] - 3882 (u64)(unsigned long)ipath->fspath->val; 3883 ipath->fspath->val[i] = rel_ptr; 3884 } 3885 3886 ret = copy_to_user((void *)(unsigned long)ipa->fspath, 3887 (void *)(unsigned long)ipath->fspath, size); 3888 if (ret) { 3889 ret = -EFAULT; 3890 goto out; 3891 } 3892 3893 out: 3894 btrfs_free_path(path); 3895 free_ipath(ipath); 3896 kfree(ipa); 3897 3898 return ret; 3899 } 3900 3901 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 3902 { 3903 struct btrfs_data_container *inodes = ctx; 3904 const size_t c = 3 * sizeof(u64); 3905 3906 if (inodes->bytes_left >= c) { 3907 inodes->bytes_left -= c; 3908 inodes->val[inodes->elem_cnt] = inum; 3909 inodes->val[inodes->elem_cnt + 1] = offset; 3910 inodes->val[inodes->elem_cnt + 2] = root; 3911 inodes->elem_cnt += 3; 3912 } else { 3913 inodes->bytes_missing += c - inodes->bytes_left; 3914 inodes->bytes_left = 0; 3915 inodes->elem_missed += 3; 3916 } 3917 3918 return 0; 3919 } 3920 3921 static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, 3922 void __user *arg) 3923 { 3924 int ret = 0; 3925 int size; 3926 struct btrfs_ioctl_logical_ino_args *loi; 3927 struct btrfs_data_container *inodes = NULL; 3928 struct btrfs_path *path = NULL; 3929 3930 if (!capable(CAP_SYS_ADMIN)) 3931 return -EPERM; 3932 3933 loi = memdup_user(arg, sizeof(*loi)); 3934 if (IS_ERR(loi)) { 3935 ret = PTR_ERR(loi); 3936 loi = NULL; 3937 goto out; 3938 } 3939 3940 path = btrfs_alloc_path(); 3941 if (!path) { 3942 ret = -ENOMEM; 3943 goto out; 3944 } 3945 3946 size = min_t(u32, loi->size, 64 * 1024); 3947 inodes = init_data_container(size); 3948 if (IS_ERR(inodes)) { 3949 ret = PTR_ERR(inodes); 3950 inodes = NULL; 3951 goto out; 3952 } 3953 3954 ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path, 3955 build_ino_list, inodes); 3956 if (ret == -EINVAL) 3957 ret = -ENOENT; 3958 if (ret < 0) 3959 goto out; 3960 3961 ret = copy_to_user((void *)(unsigned long)loi->inodes, 3962 (void *)(unsigned long)inodes, size); 3963 if (ret) 3964 ret = -EFAULT; 3965 3966 out: 3967 btrfs_free_path(path); 3968 vfree(inodes); 3969 kfree(loi); 3970 3971 return ret; 3972 } 3973 3974 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 3975 struct btrfs_ioctl_balance_args *bargs) 3976 { 3977 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 3978 3979 bargs->flags = bctl->flags; 3980 3981 if (atomic_read(&fs_info->balance_running)) 3982 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 3983 if (atomic_read(&fs_info->balance_pause_req)) 3984 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 3985 if (atomic_read(&fs_info->balance_cancel_req)) 3986 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 3987 3988 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 3989 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 3990 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 3991 3992 if (lock) { 3993 spin_lock(&fs_info->balance_lock); 3994 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3995 spin_unlock(&fs_info->balance_lock); 3996 } else { 3997 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3998 } 3999 } 4000 4001 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 4002 { 4003 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4004 struct btrfs_fs_info *fs_info = root->fs_info; 4005 struct btrfs_ioctl_balance_args *bargs; 4006 struct btrfs_balance_control *bctl; 4007 bool need_unlock; /* for mut. excl. ops lock */ 4008 int ret; 4009 4010 if (!capable(CAP_SYS_ADMIN)) 4011 return -EPERM; 4012 4013 ret = mnt_want_write_file(file); 4014 if (ret) 4015 return ret; 4016 4017 again: 4018 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { 4019 mutex_lock(&fs_info->volume_mutex); 4020 mutex_lock(&fs_info->balance_mutex); 4021 need_unlock = true; 4022 goto locked; 4023 } 4024 4025 /* 4026 * mut. excl. ops lock is locked. Three possibilites: 4027 * (1) some other op is running 4028 * (2) balance is running 4029 * (3) balance is paused -- special case (think resume) 4030 */ 4031 mutex_lock(&fs_info->balance_mutex); 4032 if (fs_info->balance_ctl) { 4033 /* this is either (2) or (3) */ 4034 if (!atomic_read(&fs_info->balance_running)) { 4035 mutex_unlock(&fs_info->balance_mutex); 4036 if (!mutex_trylock(&fs_info->volume_mutex)) 4037 goto again; 4038 mutex_lock(&fs_info->balance_mutex); 4039 4040 if (fs_info->balance_ctl && 4041 !atomic_read(&fs_info->balance_running)) { 4042 /* this is (3) */ 4043 need_unlock = false; 4044 goto locked; 4045 } 4046 4047 mutex_unlock(&fs_info->balance_mutex); 4048 mutex_unlock(&fs_info->volume_mutex); 4049 goto again; 4050 } else { 4051 /* this is (2) */ 4052 mutex_unlock(&fs_info->balance_mutex); 4053 ret = -EINPROGRESS; 4054 goto out; 4055 } 4056 } else { 4057 /* this is (1) */ 4058 mutex_unlock(&fs_info->balance_mutex); 4059 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4060 goto out; 4061 } 4062 4063 locked: 4064 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); 4065 4066 if (arg) { 4067 bargs = memdup_user(arg, sizeof(*bargs)); 4068 if (IS_ERR(bargs)) { 4069 ret = PTR_ERR(bargs); 4070 goto out_unlock; 4071 } 4072 4073 if (bargs->flags & BTRFS_BALANCE_RESUME) { 4074 if (!fs_info->balance_ctl) { 4075 ret = -ENOTCONN; 4076 goto out_bargs; 4077 } 4078 4079 bctl = fs_info->balance_ctl; 4080 spin_lock(&fs_info->balance_lock); 4081 bctl->flags |= BTRFS_BALANCE_RESUME; 4082 spin_unlock(&fs_info->balance_lock); 4083 4084 goto do_balance; 4085 } 4086 } else { 4087 bargs = NULL; 4088 } 4089 4090 if (fs_info->balance_ctl) { 4091 ret = -EINPROGRESS; 4092 goto out_bargs; 4093 } 4094 4095 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 4096 if (!bctl) { 4097 ret = -ENOMEM; 4098 goto out_bargs; 4099 } 4100 4101 bctl->fs_info = fs_info; 4102 if (arg) { 4103 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 4104 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 4105 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 4106 4107 bctl->flags = bargs->flags; 4108 } else { 4109 /* balance everything - no filters */ 4110 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 4111 } 4112 4113 do_balance: 4114 /* 4115 * Ownership of bctl and mutually_exclusive_operation_running 4116 * goes to to btrfs_balance. bctl is freed in __cancel_balance, 4117 * or, if restriper was paused all the way until unmount, in 4118 * free_fs_info. mutually_exclusive_operation_running is 4119 * cleared in __cancel_balance. 4120 */ 4121 need_unlock = false; 4122 4123 ret = btrfs_balance(bctl, bargs); 4124 4125 if (arg) { 4126 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4127 ret = -EFAULT; 4128 } 4129 4130 out_bargs: 4131 kfree(bargs); 4132 out_unlock: 4133 mutex_unlock(&fs_info->balance_mutex); 4134 mutex_unlock(&fs_info->volume_mutex); 4135 if (need_unlock) 4136 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 4137 out: 4138 mnt_drop_write_file(file); 4139 return ret; 4140 } 4141 4142 static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) 4143 { 4144 if (!capable(CAP_SYS_ADMIN)) 4145 return -EPERM; 4146 4147 switch (cmd) { 4148 case BTRFS_BALANCE_CTL_PAUSE: 4149 return btrfs_pause_balance(root->fs_info); 4150 case BTRFS_BALANCE_CTL_CANCEL: 4151 return btrfs_cancel_balance(root->fs_info); 4152 } 4153 4154 return -EINVAL; 4155 } 4156 4157 static long btrfs_ioctl_balance_progress(struct btrfs_root *root, 4158 void __user *arg) 4159 { 4160 struct btrfs_fs_info *fs_info = root->fs_info; 4161 struct btrfs_ioctl_balance_args *bargs; 4162 int ret = 0; 4163 4164 if (!capable(CAP_SYS_ADMIN)) 4165 return -EPERM; 4166 4167 mutex_lock(&fs_info->balance_mutex); 4168 if (!fs_info->balance_ctl) { 4169 ret = -ENOTCONN; 4170 goto out; 4171 } 4172 4173 bargs = kzalloc(sizeof(*bargs), GFP_NOFS); 4174 if (!bargs) { 4175 ret = -ENOMEM; 4176 goto out; 4177 } 4178 4179 update_ioctl_balance_args(fs_info, 1, bargs); 4180 4181 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4182 ret = -EFAULT; 4183 4184 kfree(bargs); 4185 out: 4186 mutex_unlock(&fs_info->balance_mutex); 4187 return ret; 4188 } 4189 4190 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 4191 { 4192 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4193 struct btrfs_ioctl_quota_ctl_args *sa; 4194 struct btrfs_trans_handle *trans = NULL; 4195 int ret; 4196 int err; 4197 4198 if (!capable(CAP_SYS_ADMIN)) 4199 return -EPERM; 4200 4201 ret = mnt_want_write_file(file); 4202 if (ret) 4203 return ret; 4204 4205 sa = memdup_user(arg, sizeof(*sa)); 4206 if (IS_ERR(sa)) { 4207 ret = PTR_ERR(sa); 4208 goto drop_write; 4209 } 4210 4211 down_write(&root->fs_info->subvol_sem); 4212 trans = btrfs_start_transaction(root->fs_info->tree_root, 2); 4213 if (IS_ERR(trans)) { 4214 ret = PTR_ERR(trans); 4215 goto out; 4216 } 4217 4218 switch (sa->cmd) { 4219 case BTRFS_QUOTA_CTL_ENABLE: 4220 ret = btrfs_quota_enable(trans, root->fs_info); 4221 break; 4222 case BTRFS_QUOTA_CTL_DISABLE: 4223 ret = btrfs_quota_disable(trans, root->fs_info); 4224 break; 4225 default: 4226 ret = -EINVAL; 4227 break; 4228 } 4229 4230 err = btrfs_commit_transaction(trans, root->fs_info->tree_root); 4231 if (err && !ret) 4232 ret = err; 4233 out: 4234 kfree(sa); 4235 up_write(&root->fs_info->subvol_sem); 4236 drop_write: 4237 mnt_drop_write_file(file); 4238 return ret; 4239 } 4240 4241 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 4242 { 4243 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4244 struct btrfs_ioctl_qgroup_assign_args *sa; 4245 struct btrfs_trans_handle *trans; 4246 int ret; 4247 int err; 4248 4249 if (!capable(CAP_SYS_ADMIN)) 4250 return -EPERM; 4251 4252 ret = mnt_want_write_file(file); 4253 if (ret) 4254 return ret; 4255 4256 sa = memdup_user(arg, sizeof(*sa)); 4257 if (IS_ERR(sa)) { 4258 ret = PTR_ERR(sa); 4259 goto drop_write; 4260 } 4261 4262 trans = btrfs_join_transaction(root); 4263 if (IS_ERR(trans)) { 4264 ret = PTR_ERR(trans); 4265 goto out; 4266 } 4267 4268 /* FIXME: check if the IDs really exist */ 4269 if (sa->assign) { 4270 ret = btrfs_add_qgroup_relation(trans, root->fs_info, 4271 sa->src, sa->dst); 4272 } else { 4273 ret = btrfs_del_qgroup_relation(trans, root->fs_info, 4274 sa->src, sa->dst); 4275 } 4276 4277 err = btrfs_end_transaction(trans, root); 4278 if (err && !ret) 4279 ret = err; 4280 4281 out: 4282 kfree(sa); 4283 drop_write: 4284 mnt_drop_write_file(file); 4285 return ret; 4286 } 4287 4288 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 4289 { 4290 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4291 struct btrfs_ioctl_qgroup_create_args *sa; 4292 struct btrfs_trans_handle *trans; 4293 int ret; 4294 int err; 4295 4296 if (!capable(CAP_SYS_ADMIN)) 4297 return -EPERM; 4298 4299 ret = mnt_want_write_file(file); 4300 if (ret) 4301 return ret; 4302 4303 sa = memdup_user(arg, sizeof(*sa)); 4304 if (IS_ERR(sa)) { 4305 ret = PTR_ERR(sa); 4306 goto drop_write; 4307 } 4308 4309 if (!sa->qgroupid) { 4310 ret = -EINVAL; 4311 goto out; 4312 } 4313 4314 trans = btrfs_join_transaction(root); 4315 if (IS_ERR(trans)) { 4316 ret = PTR_ERR(trans); 4317 goto out; 4318 } 4319 4320 /* FIXME: check if the IDs really exist */ 4321 if (sa->create) { 4322 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, 4323 NULL); 4324 } else { 4325 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); 4326 } 4327 4328 err = btrfs_end_transaction(trans, root); 4329 if (err && !ret) 4330 ret = err; 4331 4332 out: 4333 kfree(sa); 4334 drop_write: 4335 mnt_drop_write_file(file); 4336 return ret; 4337 } 4338 4339 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 4340 { 4341 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4342 struct btrfs_ioctl_qgroup_limit_args *sa; 4343 struct btrfs_trans_handle *trans; 4344 int ret; 4345 int err; 4346 u64 qgroupid; 4347 4348 if (!capable(CAP_SYS_ADMIN)) 4349 return -EPERM; 4350 4351 ret = mnt_want_write_file(file); 4352 if (ret) 4353 return ret; 4354 4355 sa = memdup_user(arg, sizeof(*sa)); 4356 if (IS_ERR(sa)) { 4357 ret = PTR_ERR(sa); 4358 goto drop_write; 4359 } 4360 4361 trans = btrfs_join_transaction(root); 4362 if (IS_ERR(trans)) { 4363 ret = PTR_ERR(trans); 4364 goto out; 4365 } 4366 4367 qgroupid = sa->qgroupid; 4368 if (!qgroupid) { 4369 /* take the current subvol as qgroup */ 4370 qgroupid = root->root_key.objectid; 4371 } 4372 4373 /* FIXME: check if the IDs really exist */ 4374 ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); 4375 4376 err = btrfs_end_transaction(trans, root); 4377 if (err && !ret) 4378 ret = err; 4379 4380 out: 4381 kfree(sa); 4382 drop_write: 4383 mnt_drop_write_file(file); 4384 return ret; 4385 } 4386 4387 static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) 4388 { 4389 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4390 struct btrfs_ioctl_quota_rescan_args *qsa; 4391 int ret; 4392 4393 if (!capable(CAP_SYS_ADMIN)) 4394 return -EPERM; 4395 4396 ret = mnt_want_write_file(file); 4397 if (ret) 4398 return ret; 4399 4400 qsa = memdup_user(arg, sizeof(*qsa)); 4401 if (IS_ERR(qsa)) { 4402 ret = PTR_ERR(qsa); 4403 goto drop_write; 4404 } 4405 4406 if (qsa->flags) { 4407 ret = -EINVAL; 4408 goto out; 4409 } 4410 4411 ret = btrfs_qgroup_rescan(root->fs_info); 4412 4413 out: 4414 kfree(qsa); 4415 drop_write: 4416 mnt_drop_write_file(file); 4417 return ret; 4418 } 4419 4420 static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) 4421 { 4422 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4423 struct btrfs_ioctl_quota_rescan_args *qsa; 4424 int ret = 0; 4425 4426 if (!capable(CAP_SYS_ADMIN)) 4427 return -EPERM; 4428 4429 qsa = kzalloc(sizeof(*qsa), GFP_NOFS); 4430 if (!qsa) 4431 return -ENOMEM; 4432 4433 if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 4434 qsa->flags = 1; 4435 qsa->progress = root->fs_info->qgroup_rescan_progress.objectid; 4436 } 4437 4438 if (copy_to_user(arg, qsa, sizeof(*qsa))) 4439 ret = -EFAULT; 4440 4441 kfree(qsa); 4442 return ret; 4443 } 4444 4445 static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) 4446 { 4447 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4448 4449 if (!capable(CAP_SYS_ADMIN)) 4450 return -EPERM; 4451 4452 return btrfs_qgroup_wait_for_completion(root->fs_info); 4453 } 4454 4455 static long _btrfs_ioctl_set_received_subvol(struct file *file, 4456 struct btrfs_ioctl_received_subvol_args *sa) 4457 { 4458 struct inode *inode = file_inode(file); 4459 struct btrfs_root *root = BTRFS_I(inode)->root; 4460 struct btrfs_root_item *root_item = &root->root_item; 4461 struct btrfs_trans_handle *trans; 4462 struct timespec ct = CURRENT_TIME; 4463 int ret = 0; 4464 int received_uuid_changed; 4465 4466 if (!inode_owner_or_capable(inode)) 4467 return -EPERM; 4468 4469 ret = mnt_want_write_file(file); 4470 if (ret < 0) 4471 return ret; 4472 4473 down_write(&root->fs_info->subvol_sem); 4474 4475 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 4476 ret = -EINVAL; 4477 goto out; 4478 } 4479 4480 if (btrfs_root_readonly(root)) { 4481 ret = -EROFS; 4482 goto out; 4483 } 4484 4485 /* 4486 * 1 - root item 4487 * 2 - uuid items (received uuid + subvol uuid) 4488 */ 4489 trans = btrfs_start_transaction(root, 3); 4490 if (IS_ERR(trans)) { 4491 ret = PTR_ERR(trans); 4492 trans = NULL; 4493 goto out; 4494 } 4495 4496 sa->rtransid = trans->transid; 4497 sa->rtime.sec = ct.tv_sec; 4498 sa->rtime.nsec = ct.tv_nsec; 4499 4500 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, 4501 BTRFS_UUID_SIZE); 4502 if (received_uuid_changed && 4503 !btrfs_is_empty_uuid(root_item->received_uuid)) 4504 btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 4505 root_item->received_uuid, 4506 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4507 root->root_key.objectid); 4508 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 4509 btrfs_set_root_stransid(root_item, sa->stransid); 4510 btrfs_set_root_rtransid(root_item, sa->rtransid); 4511 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); 4512 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); 4513 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); 4514 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); 4515 4516 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4517 &root->root_key, &root->root_item); 4518 if (ret < 0) { 4519 btrfs_end_transaction(trans, root); 4520 goto out; 4521 } 4522 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { 4523 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, 4524 sa->uuid, 4525 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4526 root->root_key.objectid); 4527 if (ret < 0 && ret != -EEXIST) { 4528 btrfs_abort_transaction(trans, root, ret); 4529 goto out; 4530 } 4531 } 4532 ret = btrfs_commit_transaction(trans, root); 4533 if (ret < 0) { 4534 btrfs_abort_transaction(trans, root, ret); 4535 goto out; 4536 } 4537 4538 out: 4539 up_write(&root->fs_info->subvol_sem); 4540 mnt_drop_write_file(file); 4541 return ret; 4542 } 4543 4544 #ifdef CONFIG_64BIT 4545 static long btrfs_ioctl_set_received_subvol_32(struct file *file, 4546 void __user *arg) 4547 { 4548 struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; 4549 struct btrfs_ioctl_received_subvol_args *args64 = NULL; 4550 int ret = 0; 4551 4552 args32 = memdup_user(arg, sizeof(*args32)); 4553 if (IS_ERR(args32)) { 4554 ret = PTR_ERR(args32); 4555 args32 = NULL; 4556 goto out; 4557 } 4558 4559 args64 = kmalloc(sizeof(*args64), GFP_NOFS); 4560 if (!args64) { 4561 ret = -ENOMEM; 4562 goto out; 4563 } 4564 4565 memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); 4566 args64->stransid = args32->stransid; 4567 args64->rtransid = args32->rtransid; 4568 args64->stime.sec = args32->stime.sec; 4569 args64->stime.nsec = args32->stime.nsec; 4570 args64->rtime.sec = args32->rtime.sec; 4571 args64->rtime.nsec = args32->rtime.nsec; 4572 args64->flags = args32->flags; 4573 4574 ret = _btrfs_ioctl_set_received_subvol(file, args64); 4575 if (ret) 4576 goto out; 4577 4578 memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); 4579 args32->stransid = args64->stransid; 4580 args32->rtransid = args64->rtransid; 4581 args32->stime.sec = args64->stime.sec; 4582 args32->stime.nsec = args64->stime.nsec; 4583 args32->rtime.sec = args64->rtime.sec; 4584 args32->rtime.nsec = args64->rtime.nsec; 4585 args32->flags = args64->flags; 4586 4587 ret = copy_to_user(arg, args32, sizeof(*args32)); 4588 if (ret) 4589 ret = -EFAULT; 4590 4591 out: 4592 kfree(args32); 4593 kfree(args64); 4594 return ret; 4595 } 4596 #endif 4597 4598 static long btrfs_ioctl_set_received_subvol(struct file *file, 4599 void __user *arg) 4600 { 4601 struct btrfs_ioctl_received_subvol_args *sa = NULL; 4602 int ret = 0; 4603 4604 sa = memdup_user(arg, sizeof(*sa)); 4605 if (IS_ERR(sa)) { 4606 ret = PTR_ERR(sa); 4607 sa = NULL; 4608 goto out; 4609 } 4610 4611 ret = _btrfs_ioctl_set_received_subvol(file, sa); 4612 4613 if (ret) 4614 goto out; 4615 4616 ret = copy_to_user(arg, sa, sizeof(*sa)); 4617 if (ret) 4618 ret = -EFAULT; 4619 4620 out: 4621 kfree(sa); 4622 return ret; 4623 } 4624 4625 static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 4626 { 4627 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4628 size_t len; 4629 int ret; 4630 char label[BTRFS_LABEL_SIZE]; 4631 4632 spin_lock(&root->fs_info->super_lock); 4633 memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); 4634 spin_unlock(&root->fs_info->super_lock); 4635 4636 len = strnlen(label, BTRFS_LABEL_SIZE); 4637 4638 if (len == BTRFS_LABEL_SIZE) { 4639 btrfs_warn(root->fs_info, 4640 "label is too long, return the first %zu bytes", --len); 4641 } 4642 4643 ret = copy_to_user(arg, label, len); 4644 4645 return ret ? -EFAULT : 0; 4646 } 4647 4648 static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) 4649 { 4650 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4651 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4652 struct btrfs_trans_handle *trans; 4653 char label[BTRFS_LABEL_SIZE]; 4654 int ret; 4655 4656 if (!capable(CAP_SYS_ADMIN)) 4657 return -EPERM; 4658 4659 if (copy_from_user(label, arg, sizeof(label))) 4660 return -EFAULT; 4661 4662 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 4663 btrfs_err(root->fs_info, "unable to set label with more than %d bytes", 4664 BTRFS_LABEL_SIZE - 1); 4665 return -EINVAL; 4666 } 4667 4668 ret = mnt_want_write_file(file); 4669 if (ret) 4670 return ret; 4671 4672 trans = btrfs_start_transaction(root, 0); 4673 if (IS_ERR(trans)) { 4674 ret = PTR_ERR(trans); 4675 goto out_unlock; 4676 } 4677 4678 spin_lock(&root->fs_info->super_lock); 4679 strcpy(super_block->label, label); 4680 spin_unlock(&root->fs_info->super_lock); 4681 ret = btrfs_commit_transaction(trans, root); 4682 4683 out_unlock: 4684 mnt_drop_write_file(file); 4685 return ret; 4686 } 4687 4688 #define INIT_FEATURE_FLAGS(suffix) \ 4689 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ 4690 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ 4691 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } 4692 4693 static int btrfs_ioctl_get_supported_features(struct file *file, 4694 void __user *arg) 4695 { 4696 static struct btrfs_ioctl_feature_flags features[3] = { 4697 INIT_FEATURE_FLAGS(SUPP), 4698 INIT_FEATURE_FLAGS(SAFE_SET), 4699 INIT_FEATURE_FLAGS(SAFE_CLEAR) 4700 }; 4701 4702 if (copy_to_user(arg, &features, sizeof(features))) 4703 return -EFAULT; 4704 4705 return 0; 4706 } 4707 4708 static int btrfs_ioctl_get_features(struct file *file, void __user *arg) 4709 { 4710 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4711 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4712 struct btrfs_ioctl_feature_flags features; 4713 4714 features.compat_flags = btrfs_super_compat_flags(super_block); 4715 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); 4716 features.incompat_flags = btrfs_super_incompat_flags(super_block); 4717 4718 if (copy_to_user(arg, &features, sizeof(features))) 4719 return -EFAULT; 4720 4721 return 0; 4722 } 4723 4724 static int check_feature_bits(struct btrfs_root *root, 4725 enum btrfs_feature_set set, 4726 u64 change_mask, u64 flags, u64 supported_flags, 4727 u64 safe_set, u64 safe_clear) 4728 { 4729 const char *type = btrfs_feature_set_names[set]; 4730 char *names; 4731 u64 disallowed, unsupported; 4732 u64 set_mask = flags & change_mask; 4733 u64 clear_mask = ~flags & change_mask; 4734 4735 unsupported = set_mask & ~supported_flags; 4736 if (unsupported) { 4737 names = btrfs_printable_features(set, unsupported); 4738 if (names) { 4739 btrfs_warn(root->fs_info, 4740 "this kernel does not support the %s feature bit%s", 4741 names, strchr(names, ',') ? "s" : ""); 4742 kfree(names); 4743 } else 4744 btrfs_warn(root->fs_info, 4745 "this kernel does not support %s bits 0x%llx", 4746 type, unsupported); 4747 return -EOPNOTSUPP; 4748 } 4749 4750 disallowed = set_mask & ~safe_set; 4751 if (disallowed) { 4752 names = btrfs_printable_features(set, disallowed); 4753 if (names) { 4754 btrfs_warn(root->fs_info, 4755 "can't set the %s feature bit%s while mounted", 4756 names, strchr(names, ',') ? "s" : ""); 4757 kfree(names); 4758 } else 4759 btrfs_warn(root->fs_info, 4760 "can't set %s bits 0x%llx while mounted", 4761 type, disallowed); 4762 return -EPERM; 4763 } 4764 4765 disallowed = clear_mask & ~safe_clear; 4766 if (disallowed) { 4767 names = btrfs_printable_features(set, disallowed); 4768 if (names) { 4769 btrfs_warn(root->fs_info, 4770 "can't clear the %s feature bit%s while mounted", 4771 names, strchr(names, ',') ? "s" : ""); 4772 kfree(names); 4773 } else 4774 btrfs_warn(root->fs_info, 4775 "can't clear %s bits 0x%llx while mounted", 4776 type, disallowed); 4777 return -EPERM; 4778 } 4779 4780 return 0; 4781 } 4782 4783 #define check_feature(root, change_mask, flags, mask_base) \ 4784 check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \ 4785 BTRFS_FEATURE_ ## mask_base ## _SUPP, \ 4786 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ 4787 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) 4788 4789 static int btrfs_ioctl_set_features(struct file *file, void __user *arg) 4790 { 4791 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4792 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4793 struct btrfs_ioctl_feature_flags flags[2]; 4794 struct btrfs_trans_handle *trans; 4795 u64 newflags; 4796 int ret; 4797 4798 if (!capable(CAP_SYS_ADMIN)) 4799 return -EPERM; 4800 4801 if (copy_from_user(flags, arg, sizeof(flags))) 4802 return -EFAULT; 4803 4804 /* Nothing to do */ 4805 if (!flags[0].compat_flags && !flags[0].compat_ro_flags && 4806 !flags[0].incompat_flags) 4807 return 0; 4808 4809 ret = check_feature(root, flags[0].compat_flags, 4810 flags[1].compat_flags, COMPAT); 4811 if (ret) 4812 return ret; 4813 4814 ret = check_feature(root, flags[0].compat_ro_flags, 4815 flags[1].compat_ro_flags, COMPAT_RO); 4816 if (ret) 4817 return ret; 4818 4819 ret = check_feature(root, flags[0].incompat_flags, 4820 flags[1].incompat_flags, INCOMPAT); 4821 if (ret) 4822 return ret; 4823 4824 trans = btrfs_start_transaction(root, 0); 4825 if (IS_ERR(trans)) 4826 return PTR_ERR(trans); 4827 4828 spin_lock(&root->fs_info->super_lock); 4829 newflags = btrfs_super_compat_flags(super_block); 4830 newflags |= flags[0].compat_flags & flags[1].compat_flags; 4831 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); 4832 btrfs_set_super_compat_flags(super_block, newflags); 4833 4834 newflags = btrfs_super_compat_ro_flags(super_block); 4835 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; 4836 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); 4837 btrfs_set_super_compat_ro_flags(super_block, newflags); 4838 4839 newflags = btrfs_super_incompat_flags(super_block); 4840 newflags |= flags[0].incompat_flags & flags[1].incompat_flags; 4841 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); 4842 btrfs_set_super_incompat_flags(super_block, newflags); 4843 spin_unlock(&root->fs_info->super_lock); 4844 4845 return btrfs_commit_transaction(trans, root); 4846 } 4847 4848 long btrfs_ioctl(struct file *file, unsigned int 4849 cmd, unsigned long arg) 4850 { 4851 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4852 void __user *argp = (void __user *)arg; 4853 4854 switch (cmd) { 4855 case FS_IOC_GETFLAGS: 4856 return btrfs_ioctl_getflags(file, argp); 4857 case FS_IOC_SETFLAGS: 4858 return btrfs_ioctl_setflags(file, argp); 4859 case FS_IOC_GETVERSION: 4860 return btrfs_ioctl_getversion(file, argp); 4861 case FITRIM: 4862 return btrfs_ioctl_fitrim(file, argp); 4863 case BTRFS_IOC_SNAP_CREATE: 4864 return btrfs_ioctl_snap_create(file, argp, 0); 4865 case BTRFS_IOC_SNAP_CREATE_V2: 4866 return btrfs_ioctl_snap_create_v2(file, argp, 0); 4867 case BTRFS_IOC_SUBVOL_CREATE: 4868 return btrfs_ioctl_snap_create(file, argp, 1); 4869 case BTRFS_IOC_SUBVOL_CREATE_V2: 4870 return btrfs_ioctl_snap_create_v2(file, argp, 1); 4871 case BTRFS_IOC_SNAP_DESTROY: 4872 return btrfs_ioctl_snap_destroy(file, argp); 4873 case BTRFS_IOC_SUBVOL_GETFLAGS: 4874 return btrfs_ioctl_subvol_getflags(file, argp); 4875 case BTRFS_IOC_SUBVOL_SETFLAGS: 4876 return btrfs_ioctl_subvol_setflags(file, argp); 4877 case BTRFS_IOC_DEFAULT_SUBVOL: 4878 return btrfs_ioctl_default_subvol(file, argp); 4879 case BTRFS_IOC_DEFRAG: 4880 return btrfs_ioctl_defrag(file, NULL); 4881 case BTRFS_IOC_DEFRAG_RANGE: 4882 return btrfs_ioctl_defrag(file, argp); 4883 case BTRFS_IOC_RESIZE: 4884 return btrfs_ioctl_resize(file, argp); 4885 case BTRFS_IOC_ADD_DEV: 4886 return btrfs_ioctl_add_dev(root, argp); 4887 case BTRFS_IOC_RM_DEV: 4888 return btrfs_ioctl_rm_dev(file, argp); 4889 case BTRFS_IOC_FS_INFO: 4890 return btrfs_ioctl_fs_info(root, argp); 4891 case BTRFS_IOC_DEV_INFO: 4892 return btrfs_ioctl_dev_info(root, argp); 4893 case BTRFS_IOC_BALANCE: 4894 return btrfs_ioctl_balance(file, NULL); 4895 case BTRFS_IOC_CLONE: 4896 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 4897 case BTRFS_IOC_CLONE_RANGE: 4898 return btrfs_ioctl_clone_range(file, argp); 4899 case BTRFS_IOC_TRANS_START: 4900 return btrfs_ioctl_trans_start(file); 4901 case BTRFS_IOC_TRANS_END: 4902 return btrfs_ioctl_trans_end(file); 4903 case BTRFS_IOC_TREE_SEARCH: 4904 return btrfs_ioctl_tree_search(file, argp); 4905 case BTRFS_IOC_INO_LOOKUP: 4906 return btrfs_ioctl_ino_lookup(file, argp); 4907 case BTRFS_IOC_INO_PATHS: 4908 return btrfs_ioctl_ino_to_path(root, argp); 4909 case BTRFS_IOC_LOGICAL_INO: 4910 return btrfs_ioctl_logical_to_ino(root, argp); 4911 case BTRFS_IOC_SPACE_INFO: 4912 return btrfs_ioctl_space_info(root, argp); 4913 case BTRFS_IOC_SYNC: { 4914 int ret; 4915 4916 ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); 4917 if (ret) 4918 return ret; 4919 ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); 4920 return ret; 4921 } 4922 case BTRFS_IOC_START_SYNC: 4923 return btrfs_ioctl_start_sync(root, argp); 4924 case BTRFS_IOC_WAIT_SYNC: 4925 return btrfs_ioctl_wait_sync(root, argp); 4926 case BTRFS_IOC_SCRUB: 4927 return btrfs_ioctl_scrub(file, argp); 4928 case BTRFS_IOC_SCRUB_CANCEL: 4929 return btrfs_ioctl_scrub_cancel(root, argp); 4930 case BTRFS_IOC_SCRUB_PROGRESS: 4931 return btrfs_ioctl_scrub_progress(root, argp); 4932 case BTRFS_IOC_BALANCE_V2: 4933 return btrfs_ioctl_balance(file, argp); 4934 case BTRFS_IOC_BALANCE_CTL: 4935 return btrfs_ioctl_balance_ctl(root, arg); 4936 case BTRFS_IOC_BALANCE_PROGRESS: 4937 return btrfs_ioctl_balance_progress(root, argp); 4938 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 4939 return btrfs_ioctl_set_received_subvol(file, argp); 4940 #ifdef CONFIG_64BIT 4941 case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: 4942 return btrfs_ioctl_set_received_subvol_32(file, argp); 4943 #endif 4944 case BTRFS_IOC_SEND: 4945 return btrfs_ioctl_send(file, argp); 4946 case BTRFS_IOC_GET_DEV_STATS: 4947 return btrfs_ioctl_get_dev_stats(root, argp); 4948 case BTRFS_IOC_QUOTA_CTL: 4949 return btrfs_ioctl_quota_ctl(file, argp); 4950 case BTRFS_IOC_QGROUP_ASSIGN: 4951 return btrfs_ioctl_qgroup_assign(file, argp); 4952 case BTRFS_IOC_QGROUP_CREATE: 4953 return btrfs_ioctl_qgroup_create(file, argp); 4954 case BTRFS_IOC_QGROUP_LIMIT: 4955 return btrfs_ioctl_qgroup_limit(file, argp); 4956 case BTRFS_IOC_QUOTA_RESCAN: 4957 return btrfs_ioctl_quota_rescan(file, argp); 4958 case BTRFS_IOC_QUOTA_RESCAN_STATUS: 4959 return btrfs_ioctl_quota_rescan_status(file, argp); 4960 case BTRFS_IOC_QUOTA_RESCAN_WAIT: 4961 return btrfs_ioctl_quota_rescan_wait(file, argp); 4962 case BTRFS_IOC_DEV_REPLACE: 4963 return btrfs_ioctl_dev_replace(root, argp); 4964 case BTRFS_IOC_GET_FSLABEL: 4965 return btrfs_ioctl_get_fslabel(file, argp); 4966 case BTRFS_IOC_SET_FSLABEL: 4967 return btrfs_ioctl_set_fslabel(file, argp); 4968 case BTRFS_IOC_FILE_EXTENT_SAME: 4969 return btrfs_ioctl_file_extent_same(file, argp); 4970 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 4971 return btrfs_ioctl_get_supported_features(file, argp); 4972 case BTRFS_IOC_GET_FEATURES: 4973 return btrfs_ioctl_get_features(file, argp); 4974 case BTRFS_IOC_SET_FEATURES: 4975 return btrfs_ioctl_set_features(file, argp); 4976 } 4977 4978 return -ENOTTY; 4979 } 4980