1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/statfs.h> 37 #include <linux/compat.h> 38 #include <linux/bit_spinlock.h> 39 #include <linux/security.h> 40 #include <linux/xattr.h> 41 #include <linux/vmalloc.h> 42 #include <linux/slab.h> 43 #include <linux/blkdev.h> 44 #include <linux/uuid.h> 45 #include <linux/btrfs.h> 46 #include <linux/uaccess.h> 47 #include "ctree.h" 48 #include "disk-io.h" 49 #include "transaction.h" 50 #include "btrfs_inode.h" 51 #include "print-tree.h" 52 #include "volumes.h" 53 #include "locking.h" 54 #include "inode-map.h" 55 #include "backref.h" 56 #include "rcu-string.h" 57 #include "send.h" 58 #include "dev-replace.h" 59 #include "props.h" 60 #include "sysfs.h" 61 62 #ifdef CONFIG_64BIT 63 /* If we have a 32-bit userspace and 64-bit kernel, then the UAPI 64 * structures are incorrect, as the timespec structure from userspace 65 * is 4 bytes too small. We define these alternatives here to teach 66 * the kernel about the 32-bit struct packing. 67 */ 68 struct btrfs_ioctl_timespec_32 { 69 __u64 sec; 70 __u32 nsec; 71 } __attribute__ ((__packed__)); 72 73 struct btrfs_ioctl_received_subvol_args_32 { 74 char uuid[BTRFS_UUID_SIZE]; /* in */ 75 __u64 stransid; /* in */ 76 __u64 rtransid; /* out */ 77 struct btrfs_ioctl_timespec_32 stime; /* in */ 78 struct btrfs_ioctl_timespec_32 rtime; /* out */ 79 __u64 flags; /* in */ 80 __u64 reserved[16]; /* in */ 81 } __attribute__ ((__packed__)); 82 83 #define BTRFS_IOC_SET_RECEIVED_SUBVOL_32 _IOWR(BTRFS_IOCTL_MAGIC, 37, \ 84 struct btrfs_ioctl_received_subvol_args_32) 85 #endif 86 87 88 static int btrfs_clone(struct inode *src, struct inode *inode, 89 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 90 91 /* Mask out flags that are inappropriate for the given type of inode. */ 92 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 93 { 94 if (S_ISDIR(mode)) 95 return flags; 96 else if (S_ISREG(mode)) 97 return flags & ~FS_DIRSYNC_FL; 98 else 99 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 100 } 101 102 /* 103 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 104 */ 105 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 106 { 107 unsigned int iflags = 0; 108 109 if (flags & BTRFS_INODE_SYNC) 110 iflags |= FS_SYNC_FL; 111 if (flags & BTRFS_INODE_IMMUTABLE) 112 iflags |= FS_IMMUTABLE_FL; 113 if (flags & BTRFS_INODE_APPEND) 114 iflags |= FS_APPEND_FL; 115 if (flags & BTRFS_INODE_NODUMP) 116 iflags |= FS_NODUMP_FL; 117 if (flags & BTRFS_INODE_NOATIME) 118 iflags |= FS_NOATIME_FL; 119 if (flags & BTRFS_INODE_DIRSYNC) 120 iflags |= FS_DIRSYNC_FL; 121 if (flags & BTRFS_INODE_NODATACOW) 122 iflags |= FS_NOCOW_FL; 123 124 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) 125 iflags |= FS_COMPR_FL; 126 else if (flags & BTRFS_INODE_NOCOMPRESS) 127 iflags |= FS_NOCOMP_FL; 128 129 return iflags; 130 } 131 132 /* 133 * Update inode->i_flags based on the btrfs internal flags. 134 */ 135 void btrfs_update_iflags(struct inode *inode) 136 { 137 struct btrfs_inode *ip = BTRFS_I(inode); 138 139 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 140 141 if (ip->flags & BTRFS_INODE_SYNC) 142 inode->i_flags |= S_SYNC; 143 if (ip->flags & BTRFS_INODE_IMMUTABLE) 144 inode->i_flags |= S_IMMUTABLE; 145 if (ip->flags & BTRFS_INODE_APPEND) 146 inode->i_flags |= S_APPEND; 147 if (ip->flags & BTRFS_INODE_NOATIME) 148 inode->i_flags |= S_NOATIME; 149 if (ip->flags & BTRFS_INODE_DIRSYNC) 150 inode->i_flags |= S_DIRSYNC; 151 } 152 153 /* 154 * Inherit flags from the parent inode. 155 * 156 * Currently only the compression flags and the cow flags are inherited. 157 */ 158 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 159 { 160 unsigned int flags; 161 162 if (!dir) 163 return; 164 165 flags = BTRFS_I(dir)->flags; 166 167 if (flags & BTRFS_INODE_NOCOMPRESS) { 168 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 169 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 170 } else if (flags & BTRFS_INODE_COMPRESS) { 171 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 172 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 173 } 174 175 if (flags & BTRFS_INODE_NODATACOW) { 176 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 177 if (S_ISREG(inode->i_mode)) 178 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 179 } 180 181 btrfs_update_iflags(inode); 182 } 183 184 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 185 { 186 struct btrfs_inode *ip = BTRFS_I(file_inode(file)); 187 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 188 189 if (copy_to_user(arg, &flags, sizeof(flags))) 190 return -EFAULT; 191 return 0; 192 } 193 194 static int check_flags(unsigned int flags) 195 { 196 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 197 FS_NOATIME_FL | FS_NODUMP_FL | \ 198 FS_SYNC_FL | FS_DIRSYNC_FL | \ 199 FS_NOCOMP_FL | FS_COMPR_FL | 200 FS_NOCOW_FL)) 201 return -EOPNOTSUPP; 202 203 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 204 return -EINVAL; 205 206 return 0; 207 } 208 209 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 210 { 211 struct inode *inode = file_inode(file); 212 struct btrfs_inode *ip = BTRFS_I(inode); 213 struct btrfs_root *root = ip->root; 214 struct btrfs_trans_handle *trans; 215 unsigned int flags, oldflags; 216 int ret; 217 u64 ip_oldflags; 218 unsigned int i_oldflags; 219 umode_t mode; 220 221 if (!inode_owner_or_capable(inode)) 222 return -EPERM; 223 224 if (btrfs_root_readonly(root)) 225 return -EROFS; 226 227 if (copy_from_user(&flags, arg, sizeof(flags))) 228 return -EFAULT; 229 230 ret = check_flags(flags); 231 if (ret) 232 return ret; 233 234 ret = mnt_want_write_file(file); 235 if (ret) 236 return ret; 237 238 mutex_lock(&inode->i_mutex); 239 240 ip_oldflags = ip->flags; 241 i_oldflags = inode->i_flags; 242 mode = inode->i_mode; 243 244 flags = btrfs_mask_flags(inode->i_mode, flags); 245 oldflags = btrfs_flags_to_ioctl(ip->flags); 246 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 247 if (!capable(CAP_LINUX_IMMUTABLE)) { 248 ret = -EPERM; 249 goto out_unlock; 250 } 251 } 252 253 if (flags & FS_SYNC_FL) 254 ip->flags |= BTRFS_INODE_SYNC; 255 else 256 ip->flags &= ~BTRFS_INODE_SYNC; 257 if (flags & FS_IMMUTABLE_FL) 258 ip->flags |= BTRFS_INODE_IMMUTABLE; 259 else 260 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 261 if (flags & FS_APPEND_FL) 262 ip->flags |= BTRFS_INODE_APPEND; 263 else 264 ip->flags &= ~BTRFS_INODE_APPEND; 265 if (flags & FS_NODUMP_FL) 266 ip->flags |= BTRFS_INODE_NODUMP; 267 else 268 ip->flags &= ~BTRFS_INODE_NODUMP; 269 if (flags & FS_NOATIME_FL) 270 ip->flags |= BTRFS_INODE_NOATIME; 271 else 272 ip->flags &= ~BTRFS_INODE_NOATIME; 273 if (flags & FS_DIRSYNC_FL) 274 ip->flags |= BTRFS_INODE_DIRSYNC; 275 else 276 ip->flags &= ~BTRFS_INODE_DIRSYNC; 277 if (flags & FS_NOCOW_FL) { 278 if (S_ISREG(mode)) { 279 /* 280 * It's safe to turn csums off here, no extents exist. 281 * Otherwise we want the flag to reflect the real COW 282 * status of the file and will not set it. 283 */ 284 if (inode->i_size == 0) 285 ip->flags |= BTRFS_INODE_NODATACOW 286 | BTRFS_INODE_NODATASUM; 287 } else { 288 ip->flags |= BTRFS_INODE_NODATACOW; 289 } 290 } else { 291 /* 292 * Revert back under same assuptions as above 293 */ 294 if (S_ISREG(mode)) { 295 if (inode->i_size == 0) 296 ip->flags &= ~(BTRFS_INODE_NODATACOW 297 | BTRFS_INODE_NODATASUM); 298 } else { 299 ip->flags &= ~BTRFS_INODE_NODATACOW; 300 } 301 } 302 303 /* 304 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 305 * flag may be changed automatically if compression code won't make 306 * things smaller. 307 */ 308 if (flags & FS_NOCOMP_FL) { 309 ip->flags &= ~BTRFS_INODE_COMPRESS; 310 ip->flags |= BTRFS_INODE_NOCOMPRESS; 311 312 ret = btrfs_set_prop(inode, "btrfs.compression", NULL, 0, 0); 313 if (ret && ret != -ENODATA) 314 goto out_drop; 315 } else if (flags & FS_COMPR_FL) { 316 const char *comp; 317 318 ip->flags |= BTRFS_INODE_COMPRESS; 319 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 320 321 if (root->fs_info->compress_type == BTRFS_COMPRESS_LZO) 322 comp = "lzo"; 323 else 324 comp = "zlib"; 325 ret = btrfs_set_prop(inode, "btrfs.compression", 326 comp, strlen(comp), 0); 327 if (ret) 328 goto out_drop; 329 330 } else { 331 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 332 } 333 334 trans = btrfs_start_transaction(root, 1); 335 if (IS_ERR(trans)) { 336 ret = PTR_ERR(trans); 337 goto out_drop; 338 } 339 340 btrfs_update_iflags(inode); 341 inode_inc_iversion(inode); 342 inode->i_ctime = CURRENT_TIME; 343 ret = btrfs_update_inode(trans, root, inode); 344 345 btrfs_end_transaction(trans, root); 346 out_drop: 347 if (ret) { 348 ip->flags = ip_oldflags; 349 inode->i_flags = i_oldflags; 350 } 351 352 out_unlock: 353 mutex_unlock(&inode->i_mutex); 354 mnt_drop_write_file(file); 355 return ret; 356 } 357 358 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 359 { 360 struct inode *inode = file_inode(file); 361 362 return put_user(inode->i_generation, arg); 363 } 364 365 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 366 { 367 struct btrfs_fs_info *fs_info = btrfs_sb(file_inode(file)->i_sb); 368 struct btrfs_device *device; 369 struct request_queue *q; 370 struct fstrim_range range; 371 u64 minlen = ULLONG_MAX; 372 u64 num_devices = 0; 373 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); 374 int ret; 375 376 if (!capable(CAP_SYS_ADMIN)) 377 return -EPERM; 378 379 rcu_read_lock(); 380 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 381 dev_list) { 382 if (!device->bdev) 383 continue; 384 q = bdev_get_queue(device->bdev); 385 if (blk_queue_discard(q)) { 386 num_devices++; 387 minlen = min((u64)q->limits.discard_granularity, 388 minlen); 389 } 390 } 391 rcu_read_unlock(); 392 393 if (!num_devices) 394 return -EOPNOTSUPP; 395 if (copy_from_user(&range, arg, sizeof(range))) 396 return -EFAULT; 397 if (range.start > total_bytes || 398 range.len < fs_info->sb->s_blocksize) 399 return -EINVAL; 400 401 range.len = min(range.len, total_bytes - range.start); 402 range.minlen = max(range.minlen, minlen); 403 ret = btrfs_trim_fs(fs_info->tree_root, &range); 404 if (ret < 0) 405 return ret; 406 407 if (copy_to_user(arg, &range, sizeof(range))) 408 return -EFAULT; 409 410 return 0; 411 } 412 413 int btrfs_is_empty_uuid(u8 *uuid) 414 { 415 int i; 416 417 for (i = 0; i < BTRFS_UUID_SIZE; i++) { 418 if (uuid[i]) 419 return 0; 420 } 421 return 1; 422 } 423 424 static noinline int create_subvol(struct inode *dir, 425 struct dentry *dentry, 426 char *name, int namelen, 427 u64 *async_transid, 428 struct btrfs_qgroup_inherit *inherit) 429 { 430 struct btrfs_trans_handle *trans; 431 struct btrfs_key key; 432 struct btrfs_root_item root_item; 433 struct btrfs_inode_item *inode_item; 434 struct extent_buffer *leaf; 435 struct btrfs_root *root = BTRFS_I(dir)->root; 436 struct btrfs_root *new_root; 437 struct btrfs_block_rsv block_rsv; 438 struct timespec cur_time = CURRENT_TIME; 439 struct inode *inode; 440 int ret; 441 int err; 442 u64 objectid; 443 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 444 u64 index = 0; 445 u64 qgroup_reserved; 446 uuid_le new_uuid; 447 448 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 449 if (ret) 450 return ret; 451 452 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 453 /* 454 * The same as the snapshot creation, please see the comment 455 * of create_snapshot(). 456 */ 457 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 458 8, &qgroup_reserved, false); 459 if (ret) 460 return ret; 461 462 trans = btrfs_start_transaction(root, 0); 463 if (IS_ERR(trans)) { 464 ret = PTR_ERR(trans); 465 btrfs_subvolume_release_metadata(root, &block_rsv, 466 qgroup_reserved); 467 return ret; 468 } 469 trans->block_rsv = &block_rsv; 470 trans->bytes_reserved = block_rsv.size; 471 472 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit); 473 if (ret) 474 goto fail; 475 476 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 477 0, objectid, NULL, 0, 0, 0); 478 if (IS_ERR(leaf)) { 479 ret = PTR_ERR(leaf); 480 goto fail; 481 } 482 483 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 484 btrfs_set_header_bytenr(leaf, leaf->start); 485 btrfs_set_header_generation(leaf, trans->transid); 486 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 487 btrfs_set_header_owner(leaf, objectid); 488 489 write_extent_buffer(leaf, root->fs_info->fsid, btrfs_header_fsid(), 490 BTRFS_FSID_SIZE); 491 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 492 btrfs_header_chunk_tree_uuid(leaf), 493 BTRFS_UUID_SIZE); 494 btrfs_mark_buffer_dirty(leaf); 495 496 memset(&root_item, 0, sizeof(root_item)); 497 498 inode_item = &root_item.inode; 499 btrfs_set_stack_inode_generation(inode_item, 1); 500 btrfs_set_stack_inode_size(inode_item, 3); 501 btrfs_set_stack_inode_nlink(inode_item, 1); 502 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); 503 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); 504 505 btrfs_set_root_flags(&root_item, 0); 506 btrfs_set_root_limit(&root_item, 0); 507 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); 508 509 btrfs_set_root_bytenr(&root_item, leaf->start); 510 btrfs_set_root_generation(&root_item, trans->transid); 511 btrfs_set_root_level(&root_item, 0); 512 btrfs_set_root_refs(&root_item, 1); 513 btrfs_set_root_used(&root_item, leaf->len); 514 btrfs_set_root_last_snapshot(&root_item, 0); 515 516 btrfs_set_root_generation_v2(&root_item, 517 btrfs_root_generation(&root_item)); 518 uuid_le_gen(&new_uuid); 519 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 520 btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); 521 btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); 522 root_item.ctime = root_item.otime; 523 btrfs_set_root_ctransid(&root_item, trans->transid); 524 btrfs_set_root_otransid(&root_item, trans->transid); 525 526 btrfs_tree_unlock(leaf); 527 free_extent_buffer(leaf); 528 leaf = NULL; 529 530 btrfs_set_root_dirid(&root_item, new_dirid); 531 532 key.objectid = objectid; 533 key.offset = 0; 534 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 535 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 536 &root_item); 537 if (ret) 538 goto fail; 539 540 key.offset = (u64)-1; 541 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 542 if (IS_ERR(new_root)) { 543 btrfs_abort_transaction(trans, root, PTR_ERR(new_root)); 544 ret = PTR_ERR(new_root); 545 goto fail; 546 } 547 548 btrfs_record_root_in_trans(trans, new_root); 549 550 ret = btrfs_create_subvol_root(trans, new_root, root, new_dirid); 551 if (ret) { 552 /* We potentially lose an unused inode item here */ 553 btrfs_abort_transaction(trans, root, ret); 554 goto fail; 555 } 556 557 /* 558 * insert the directory item 559 */ 560 ret = btrfs_set_inode_index(dir, &index); 561 if (ret) { 562 btrfs_abort_transaction(trans, root, ret); 563 goto fail; 564 } 565 566 ret = btrfs_insert_dir_item(trans, root, 567 name, namelen, dir, &key, 568 BTRFS_FT_DIR, index); 569 if (ret) { 570 btrfs_abort_transaction(trans, root, ret); 571 goto fail; 572 } 573 574 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 575 ret = btrfs_update_inode(trans, root, dir); 576 BUG_ON(ret); 577 578 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 579 objectid, root->root_key.objectid, 580 btrfs_ino(dir), index, name, namelen); 581 BUG_ON(ret); 582 583 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, 584 root_item.uuid, BTRFS_UUID_KEY_SUBVOL, 585 objectid); 586 if (ret) 587 btrfs_abort_transaction(trans, root, ret); 588 589 fail: 590 trans->block_rsv = NULL; 591 trans->bytes_reserved = 0; 592 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 593 594 if (async_transid) { 595 *async_transid = trans->transid; 596 err = btrfs_commit_transaction_async(trans, root, 1); 597 if (err) 598 err = btrfs_commit_transaction(trans, root); 599 } else { 600 err = btrfs_commit_transaction(trans, root); 601 } 602 if (err && !ret) 603 ret = err; 604 605 if (!ret) { 606 inode = btrfs_lookup_dentry(dir, dentry); 607 if (IS_ERR(inode)) 608 return PTR_ERR(inode); 609 d_instantiate(dentry, inode); 610 } 611 return ret; 612 } 613 614 static void btrfs_wait_nocow_write(struct btrfs_root *root) 615 { 616 s64 writers; 617 DEFINE_WAIT(wait); 618 619 do { 620 prepare_to_wait(&root->subv_writers->wait, &wait, 621 TASK_UNINTERRUPTIBLE); 622 623 writers = percpu_counter_sum(&root->subv_writers->counter); 624 if (writers) 625 schedule(); 626 627 finish_wait(&root->subv_writers->wait, &wait); 628 } while (writers); 629 } 630 631 static int create_snapshot(struct btrfs_root *root, struct inode *dir, 632 struct dentry *dentry, char *name, int namelen, 633 u64 *async_transid, bool readonly, 634 struct btrfs_qgroup_inherit *inherit) 635 { 636 struct inode *inode; 637 struct btrfs_pending_snapshot *pending_snapshot; 638 struct btrfs_trans_handle *trans; 639 int ret; 640 641 if (!root->ref_cows) 642 return -EINVAL; 643 644 atomic_inc(&root->will_be_snapshoted); 645 smp_mb__after_atomic_inc(); 646 btrfs_wait_nocow_write(root); 647 648 ret = btrfs_start_delalloc_inodes(root, 0); 649 if (ret) 650 goto out; 651 652 btrfs_wait_ordered_extents(root, -1); 653 654 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 655 if (!pending_snapshot) { 656 ret = -ENOMEM; 657 goto out; 658 } 659 660 btrfs_init_block_rsv(&pending_snapshot->block_rsv, 661 BTRFS_BLOCK_RSV_TEMP); 662 /* 663 * 1 - parent dir inode 664 * 2 - dir entries 665 * 1 - root item 666 * 2 - root ref/backref 667 * 1 - root of snapshot 668 * 1 - UUID item 669 */ 670 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 671 &pending_snapshot->block_rsv, 8, 672 &pending_snapshot->qgroup_reserved, 673 false); 674 if (ret) 675 goto free; 676 677 pending_snapshot->dentry = dentry; 678 pending_snapshot->root = root; 679 pending_snapshot->readonly = readonly; 680 pending_snapshot->dir = dir; 681 pending_snapshot->inherit = inherit; 682 683 trans = btrfs_start_transaction(root, 0); 684 if (IS_ERR(trans)) { 685 ret = PTR_ERR(trans); 686 goto fail; 687 } 688 689 spin_lock(&root->fs_info->trans_lock); 690 list_add(&pending_snapshot->list, 691 &trans->transaction->pending_snapshots); 692 spin_unlock(&root->fs_info->trans_lock); 693 if (async_transid) { 694 *async_transid = trans->transid; 695 ret = btrfs_commit_transaction_async(trans, 696 root->fs_info->extent_root, 1); 697 if (ret) 698 ret = btrfs_commit_transaction(trans, root); 699 } else { 700 ret = btrfs_commit_transaction(trans, 701 root->fs_info->extent_root); 702 } 703 if (ret) 704 goto fail; 705 706 ret = pending_snapshot->error; 707 if (ret) 708 goto fail; 709 710 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 711 if (ret) 712 goto fail; 713 714 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 715 if (IS_ERR(inode)) { 716 ret = PTR_ERR(inode); 717 goto fail; 718 } 719 720 d_instantiate(dentry, inode); 721 ret = 0; 722 fail: 723 btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, 724 &pending_snapshot->block_rsv, 725 pending_snapshot->qgroup_reserved); 726 free: 727 kfree(pending_snapshot); 728 out: 729 atomic_dec(&root->will_be_snapshoted); 730 return ret; 731 } 732 733 /* copy of check_sticky in fs/namei.c() 734 * It's inline, so penalty for filesystems that don't use sticky bit is 735 * minimal. 736 */ 737 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 738 { 739 kuid_t fsuid = current_fsuid(); 740 741 if (!(dir->i_mode & S_ISVTX)) 742 return 0; 743 if (uid_eq(inode->i_uid, fsuid)) 744 return 0; 745 if (uid_eq(dir->i_uid, fsuid)) 746 return 0; 747 return !capable(CAP_FOWNER); 748 } 749 750 /* copy of may_delete in fs/namei.c() 751 * Check whether we can remove a link victim from directory dir, check 752 * whether the type of victim is right. 753 * 1. We can't do it if dir is read-only (done in permission()) 754 * 2. We should have write and exec permissions on dir 755 * 3. We can't remove anything from append-only dir 756 * 4. We can't do anything with immutable dir (done in permission()) 757 * 5. If the sticky bit on dir is set we should either 758 * a. be owner of dir, or 759 * b. be owner of victim, or 760 * c. have CAP_FOWNER capability 761 * 6. If the victim is append-only or immutable we can't do antyhing with 762 * links pointing to it. 763 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 764 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 765 * 9. We can't remove a root or mountpoint. 766 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 767 * nfs_async_unlink(). 768 */ 769 770 static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir) 771 { 772 int error; 773 774 if (!victim->d_inode) 775 return -ENOENT; 776 777 BUG_ON(victim->d_parent->d_inode != dir); 778 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 779 780 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 781 if (error) 782 return error; 783 if (IS_APPEND(dir)) 784 return -EPERM; 785 if (btrfs_check_sticky(dir, victim->d_inode)|| 786 IS_APPEND(victim->d_inode)|| 787 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 788 return -EPERM; 789 if (isdir) { 790 if (!S_ISDIR(victim->d_inode->i_mode)) 791 return -ENOTDIR; 792 if (IS_ROOT(victim)) 793 return -EBUSY; 794 } else if (S_ISDIR(victim->d_inode->i_mode)) 795 return -EISDIR; 796 if (IS_DEADDIR(dir)) 797 return -ENOENT; 798 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 799 return -EBUSY; 800 return 0; 801 } 802 803 /* copy of may_create in fs/namei.c() */ 804 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 805 { 806 if (child->d_inode) 807 return -EEXIST; 808 if (IS_DEADDIR(dir)) 809 return -ENOENT; 810 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 811 } 812 813 /* 814 * Create a new subvolume below @parent. This is largely modeled after 815 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 816 * inside this filesystem so it's quite a bit simpler. 817 */ 818 static noinline int btrfs_mksubvol(struct path *parent, 819 char *name, int namelen, 820 struct btrfs_root *snap_src, 821 u64 *async_transid, bool readonly, 822 struct btrfs_qgroup_inherit *inherit) 823 { 824 struct inode *dir = parent->dentry->d_inode; 825 struct dentry *dentry; 826 int error; 827 828 error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 829 if (error == -EINTR) 830 return error; 831 832 dentry = lookup_one_len(name, parent->dentry, namelen); 833 error = PTR_ERR(dentry); 834 if (IS_ERR(dentry)) 835 goto out_unlock; 836 837 error = -EEXIST; 838 if (dentry->d_inode) 839 goto out_dput; 840 841 error = btrfs_may_create(dir, dentry); 842 if (error) 843 goto out_dput; 844 845 /* 846 * even if this name doesn't exist, we may get hash collisions. 847 * check for them now when we can safely fail 848 */ 849 error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, 850 dir->i_ino, name, 851 namelen); 852 if (error) 853 goto out_dput; 854 855 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 856 857 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 858 goto out_up_read; 859 860 if (snap_src) { 861 error = create_snapshot(snap_src, dir, dentry, name, namelen, 862 async_transid, readonly, inherit); 863 } else { 864 error = create_subvol(dir, dentry, name, namelen, 865 async_transid, inherit); 866 } 867 if (!error) 868 fsnotify_mkdir(dir, dentry); 869 out_up_read: 870 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 871 out_dput: 872 dput(dentry); 873 out_unlock: 874 mutex_unlock(&dir->i_mutex); 875 return error; 876 } 877 878 /* 879 * When we're defragging a range, we don't want to kick it off again 880 * if it is really just waiting for delalloc to send it down. 881 * If we find a nice big extent or delalloc range for the bytes in the 882 * file you want to defrag, we return 0 to let you know to skip this 883 * part of the file 884 */ 885 static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) 886 { 887 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 888 struct extent_map *em = NULL; 889 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 890 u64 end; 891 892 read_lock(&em_tree->lock); 893 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); 894 read_unlock(&em_tree->lock); 895 896 if (em) { 897 end = extent_map_end(em); 898 free_extent_map(em); 899 if (end - offset > thresh) 900 return 0; 901 } 902 /* if we already have a nice delalloc here, just stop */ 903 thresh /= 2; 904 end = count_range_bits(io_tree, &offset, offset + thresh, 905 thresh, EXTENT_DELALLOC, 1); 906 if (end >= thresh) 907 return 0; 908 return 1; 909 } 910 911 /* 912 * helper function to walk through a file and find extents 913 * newer than a specific transid, and smaller than thresh. 914 * 915 * This is used by the defragging code to find new and small 916 * extents 917 */ 918 static int find_new_extents(struct btrfs_root *root, 919 struct inode *inode, u64 newer_than, 920 u64 *off, int thresh) 921 { 922 struct btrfs_path *path; 923 struct btrfs_key min_key; 924 struct extent_buffer *leaf; 925 struct btrfs_file_extent_item *extent; 926 int type; 927 int ret; 928 u64 ino = btrfs_ino(inode); 929 930 path = btrfs_alloc_path(); 931 if (!path) 932 return -ENOMEM; 933 934 min_key.objectid = ino; 935 min_key.type = BTRFS_EXTENT_DATA_KEY; 936 min_key.offset = *off; 937 938 while (1) { 939 path->keep_locks = 1; 940 ret = btrfs_search_forward(root, &min_key, path, newer_than); 941 if (ret != 0) 942 goto none; 943 path->keep_locks = 0; 944 btrfs_unlock_up_safe(path, 1); 945 process_slot: 946 if (min_key.objectid != ino) 947 goto none; 948 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 949 goto none; 950 951 leaf = path->nodes[0]; 952 extent = btrfs_item_ptr(leaf, path->slots[0], 953 struct btrfs_file_extent_item); 954 955 type = btrfs_file_extent_type(leaf, extent); 956 if (type == BTRFS_FILE_EXTENT_REG && 957 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 958 check_defrag_in_cache(inode, min_key.offset, thresh)) { 959 *off = min_key.offset; 960 btrfs_free_path(path); 961 return 0; 962 } 963 964 path->slots[0]++; 965 if (path->slots[0] < btrfs_header_nritems(leaf)) { 966 btrfs_item_key_to_cpu(leaf, &min_key, path->slots[0]); 967 goto process_slot; 968 } 969 970 if (min_key.offset == (u64)-1) 971 goto none; 972 973 min_key.offset++; 974 btrfs_release_path(path); 975 } 976 none: 977 btrfs_free_path(path); 978 return -ENOENT; 979 } 980 981 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 982 { 983 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 984 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 985 struct extent_map *em; 986 u64 len = PAGE_CACHE_SIZE; 987 988 /* 989 * hopefully we have this extent in the tree already, try without 990 * the full extent lock 991 */ 992 read_lock(&em_tree->lock); 993 em = lookup_extent_mapping(em_tree, start, len); 994 read_unlock(&em_tree->lock); 995 996 if (!em) { 997 struct extent_state *cached = NULL; 998 u64 end = start + len - 1; 999 1000 /* get the big lock and read metadata off disk */ 1001 lock_extent_bits(io_tree, start, end, 0, &cached); 1002 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 1003 unlock_extent_cached(io_tree, start, end, &cached, GFP_NOFS); 1004 1005 if (IS_ERR(em)) 1006 return NULL; 1007 } 1008 1009 return em; 1010 } 1011 1012 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 1013 { 1014 struct extent_map *next; 1015 bool ret = true; 1016 1017 /* this is the last extent */ 1018 if (em->start + em->len >= i_size_read(inode)) 1019 return false; 1020 1021 next = defrag_lookup_extent(inode, em->start + em->len); 1022 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE || 1023 (em->block_start + em->block_len == next->block_start)) 1024 ret = false; 1025 1026 free_extent_map(next); 1027 return ret; 1028 } 1029 1030 static int should_defrag_range(struct inode *inode, u64 start, int thresh, 1031 u64 *last_len, u64 *skip, u64 *defrag_end, 1032 int compress) 1033 { 1034 struct extent_map *em; 1035 int ret = 1; 1036 bool next_mergeable = true; 1037 1038 /* 1039 * make sure that once we start defragging an extent, we keep on 1040 * defragging it 1041 */ 1042 if (start < *defrag_end) 1043 return 1; 1044 1045 *skip = 0; 1046 1047 em = defrag_lookup_extent(inode, start); 1048 if (!em) 1049 return 0; 1050 1051 /* this will cover holes, and inline extents */ 1052 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 1053 ret = 0; 1054 goto out; 1055 } 1056 1057 next_mergeable = defrag_check_next_extent(inode, em); 1058 1059 /* 1060 * we hit a real extent, if it is big or the next extent is not a 1061 * real extent, don't bother defragging it 1062 */ 1063 if (!compress && (*last_len == 0 || *last_len >= thresh) && 1064 (em->len >= thresh || !next_mergeable)) 1065 ret = 0; 1066 out: 1067 /* 1068 * last_len ends up being a counter of how many bytes we've defragged. 1069 * every time we choose not to defrag an extent, we reset *last_len 1070 * so that the next tiny extent will force a defrag. 1071 * 1072 * The end result of this is that tiny extents before a single big 1073 * extent will force at least part of that big extent to be defragged. 1074 */ 1075 if (ret) { 1076 *defrag_end = extent_map_end(em); 1077 } else { 1078 *last_len = 0; 1079 *skip = extent_map_end(em); 1080 *defrag_end = 0; 1081 } 1082 1083 free_extent_map(em); 1084 return ret; 1085 } 1086 1087 /* 1088 * it doesn't do much good to defrag one or two pages 1089 * at a time. This pulls in a nice chunk of pages 1090 * to COW and defrag. 1091 * 1092 * It also makes sure the delalloc code has enough 1093 * dirty data to avoid making new small extents as part 1094 * of the defrag 1095 * 1096 * It's a good idea to start RA on this range 1097 * before calling this. 1098 */ 1099 static int cluster_pages_for_defrag(struct inode *inode, 1100 struct page **pages, 1101 unsigned long start_index, 1102 unsigned long num_pages) 1103 { 1104 unsigned long file_end; 1105 u64 isize = i_size_read(inode); 1106 u64 page_start; 1107 u64 page_end; 1108 u64 page_cnt; 1109 int ret; 1110 int i; 1111 int i_done; 1112 struct btrfs_ordered_extent *ordered; 1113 struct extent_state *cached_state = NULL; 1114 struct extent_io_tree *tree; 1115 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1116 1117 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 1118 if (!isize || start_index > file_end) 1119 return 0; 1120 1121 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 1122 1123 ret = btrfs_delalloc_reserve_space(inode, 1124 page_cnt << PAGE_CACHE_SHIFT); 1125 if (ret) 1126 return ret; 1127 i_done = 0; 1128 tree = &BTRFS_I(inode)->io_tree; 1129 1130 /* step one, lock all the pages */ 1131 for (i = 0; i < page_cnt; i++) { 1132 struct page *page; 1133 again: 1134 page = find_or_create_page(inode->i_mapping, 1135 start_index + i, mask); 1136 if (!page) 1137 break; 1138 1139 page_start = page_offset(page); 1140 page_end = page_start + PAGE_CACHE_SIZE - 1; 1141 while (1) { 1142 lock_extent_bits(tree, page_start, page_end, 1143 0, &cached_state); 1144 ordered = btrfs_lookup_ordered_extent(inode, 1145 page_start); 1146 unlock_extent_cached(tree, page_start, page_end, 1147 &cached_state, GFP_NOFS); 1148 if (!ordered) 1149 break; 1150 1151 unlock_page(page); 1152 btrfs_start_ordered_extent(inode, ordered, 1); 1153 btrfs_put_ordered_extent(ordered); 1154 lock_page(page); 1155 /* 1156 * we unlocked the page above, so we need check if 1157 * it was released or not. 1158 */ 1159 if (page->mapping != inode->i_mapping) { 1160 unlock_page(page); 1161 page_cache_release(page); 1162 goto again; 1163 } 1164 } 1165 1166 if (!PageUptodate(page)) { 1167 btrfs_readpage(NULL, page); 1168 lock_page(page); 1169 if (!PageUptodate(page)) { 1170 unlock_page(page); 1171 page_cache_release(page); 1172 ret = -EIO; 1173 break; 1174 } 1175 } 1176 1177 if (page->mapping != inode->i_mapping) { 1178 unlock_page(page); 1179 page_cache_release(page); 1180 goto again; 1181 } 1182 1183 pages[i] = page; 1184 i_done++; 1185 } 1186 if (!i_done || ret) 1187 goto out; 1188 1189 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1190 goto out; 1191 1192 /* 1193 * so now we have a nice long stream of locked 1194 * and up to date pages, lets wait on them 1195 */ 1196 for (i = 0; i < i_done; i++) 1197 wait_on_page_writeback(pages[i]); 1198 1199 page_start = page_offset(pages[0]); 1200 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; 1201 1202 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1203 page_start, page_end - 1, 0, &cached_state); 1204 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1205 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1206 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, 1207 &cached_state, GFP_NOFS); 1208 1209 if (i_done != page_cnt) { 1210 spin_lock(&BTRFS_I(inode)->lock); 1211 BTRFS_I(inode)->outstanding_extents++; 1212 spin_unlock(&BTRFS_I(inode)->lock); 1213 btrfs_delalloc_release_space(inode, 1214 (page_cnt - i_done) << PAGE_CACHE_SHIFT); 1215 } 1216 1217 1218 set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 1219 &cached_state, GFP_NOFS); 1220 1221 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1222 page_start, page_end - 1, &cached_state, 1223 GFP_NOFS); 1224 1225 for (i = 0; i < i_done; i++) { 1226 clear_page_dirty_for_io(pages[i]); 1227 ClearPageChecked(pages[i]); 1228 set_page_extent_mapped(pages[i]); 1229 set_page_dirty(pages[i]); 1230 unlock_page(pages[i]); 1231 page_cache_release(pages[i]); 1232 } 1233 return i_done; 1234 out: 1235 for (i = 0; i < i_done; i++) { 1236 unlock_page(pages[i]); 1237 page_cache_release(pages[i]); 1238 } 1239 btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT); 1240 return ret; 1241 1242 } 1243 1244 int btrfs_defrag_file(struct inode *inode, struct file *file, 1245 struct btrfs_ioctl_defrag_range_args *range, 1246 u64 newer_than, unsigned long max_to_defrag) 1247 { 1248 struct btrfs_root *root = BTRFS_I(inode)->root; 1249 struct file_ra_state *ra = NULL; 1250 unsigned long last_index; 1251 u64 isize = i_size_read(inode); 1252 u64 last_len = 0; 1253 u64 skip = 0; 1254 u64 defrag_end = 0; 1255 u64 newer_off = range->start; 1256 unsigned long i; 1257 unsigned long ra_index = 0; 1258 int ret; 1259 int defrag_count = 0; 1260 int compress_type = BTRFS_COMPRESS_ZLIB; 1261 int extent_thresh = range->extent_thresh; 1262 unsigned long max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; 1263 unsigned long cluster = max_cluster; 1264 u64 new_align = ~((u64)128 * 1024 - 1); 1265 struct page **pages = NULL; 1266 1267 if (isize == 0) 1268 return 0; 1269 1270 if (range->start >= isize) 1271 return -EINVAL; 1272 1273 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1274 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1275 return -EINVAL; 1276 if (range->compress_type) 1277 compress_type = range->compress_type; 1278 } 1279 1280 if (extent_thresh == 0) 1281 extent_thresh = 256 * 1024; 1282 1283 /* 1284 * if we were not given a file, allocate a readahead 1285 * context 1286 */ 1287 if (!file) { 1288 ra = kzalloc(sizeof(*ra), GFP_NOFS); 1289 if (!ra) 1290 return -ENOMEM; 1291 file_ra_state_init(ra, inode->i_mapping); 1292 } else { 1293 ra = &file->f_ra; 1294 } 1295 1296 pages = kmalloc_array(max_cluster, sizeof(struct page *), 1297 GFP_NOFS); 1298 if (!pages) { 1299 ret = -ENOMEM; 1300 goto out_ra; 1301 } 1302 1303 /* find the last page to defrag */ 1304 if (range->start + range->len > range->start) { 1305 last_index = min_t(u64, isize - 1, 1306 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1307 } else { 1308 last_index = (isize - 1) >> PAGE_CACHE_SHIFT; 1309 } 1310 1311 if (newer_than) { 1312 ret = find_new_extents(root, inode, newer_than, 1313 &newer_off, 64 * 1024); 1314 if (!ret) { 1315 range->start = newer_off; 1316 /* 1317 * we always align our defrag to help keep 1318 * the extents in the file evenly spaced 1319 */ 1320 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1321 } else 1322 goto out_ra; 1323 } else { 1324 i = range->start >> PAGE_CACHE_SHIFT; 1325 } 1326 if (!max_to_defrag) 1327 max_to_defrag = last_index + 1; 1328 1329 /* 1330 * make writeback starts from i, so the defrag range can be 1331 * written sequentially. 1332 */ 1333 if (i < inode->i_mapping->writeback_index) 1334 inode->i_mapping->writeback_index = i; 1335 1336 while (i <= last_index && defrag_count < max_to_defrag && 1337 (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 1338 PAGE_CACHE_SHIFT)) { 1339 /* 1340 * make sure we stop running if someone unmounts 1341 * the FS 1342 */ 1343 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1344 break; 1345 1346 if (btrfs_defrag_cancelled(root->fs_info)) { 1347 printk(KERN_DEBUG "BTRFS: defrag_file cancelled\n"); 1348 ret = -EAGAIN; 1349 break; 1350 } 1351 1352 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1353 extent_thresh, &last_len, &skip, 1354 &defrag_end, range->flags & 1355 BTRFS_DEFRAG_RANGE_COMPRESS)) { 1356 unsigned long next; 1357 /* 1358 * the should_defrag function tells us how much to skip 1359 * bump our counter by the suggested amount 1360 */ 1361 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1362 i = max(i + 1, next); 1363 continue; 1364 } 1365 1366 if (!newer_than) { 1367 cluster = (PAGE_CACHE_ALIGN(defrag_end) >> 1368 PAGE_CACHE_SHIFT) - i; 1369 cluster = min(cluster, max_cluster); 1370 } else { 1371 cluster = max_cluster; 1372 } 1373 1374 if (i + cluster > ra_index) { 1375 ra_index = max(i, ra_index); 1376 btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1377 cluster); 1378 ra_index += max_cluster; 1379 } 1380 1381 mutex_lock(&inode->i_mutex); 1382 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1383 BTRFS_I(inode)->force_compress = compress_type; 1384 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1385 if (ret < 0) { 1386 mutex_unlock(&inode->i_mutex); 1387 goto out_ra; 1388 } 1389 1390 defrag_count += ret; 1391 balance_dirty_pages_ratelimited(inode->i_mapping); 1392 mutex_unlock(&inode->i_mutex); 1393 1394 if (newer_than) { 1395 if (newer_off == (u64)-1) 1396 break; 1397 1398 if (ret > 0) 1399 i += ret; 1400 1401 newer_off = max(newer_off + 1, 1402 (u64)i << PAGE_CACHE_SHIFT); 1403 1404 ret = find_new_extents(root, inode, 1405 newer_than, &newer_off, 1406 64 * 1024); 1407 if (!ret) { 1408 range->start = newer_off; 1409 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1410 } else { 1411 break; 1412 } 1413 } else { 1414 if (ret > 0) { 1415 i += ret; 1416 last_len += ret << PAGE_CACHE_SHIFT; 1417 } else { 1418 i++; 1419 last_len = 0; 1420 } 1421 } 1422 } 1423 1424 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) { 1425 filemap_flush(inode->i_mapping); 1426 if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, 1427 &BTRFS_I(inode)->runtime_flags)) 1428 filemap_flush(inode->i_mapping); 1429 } 1430 1431 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1432 /* the filemap_flush will queue IO into the worker threads, but 1433 * we have to make sure the IO is actually started and that 1434 * ordered extents get created before we return 1435 */ 1436 atomic_inc(&root->fs_info->async_submit_draining); 1437 while (atomic_read(&root->fs_info->nr_async_submits) || 1438 atomic_read(&root->fs_info->async_delalloc_pages)) { 1439 wait_event(root->fs_info->async_submit_wait, 1440 (atomic_read(&root->fs_info->nr_async_submits) == 0 && 1441 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 1442 } 1443 atomic_dec(&root->fs_info->async_submit_draining); 1444 } 1445 1446 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1447 btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); 1448 } 1449 1450 ret = defrag_count; 1451 1452 out_ra: 1453 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1454 mutex_lock(&inode->i_mutex); 1455 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 1456 mutex_unlock(&inode->i_mutex); 1457 } 1458 if (!file) 1459 kfree(ra); 1460 kfree(pages); 1461 return ret; 1462 } 1463 1464 static noinline int btrfs_ioctl_resize(struct file *file, 1465 void __user *arg) 1466 { 1467 u64 new_size; 1468 u64 old_size; 1469 u64 devid = 1; 1470 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 1471 struct btrfs_ioctl_vol_args *vol_args; 1472 struct btrfs_trans_handle *trans; 1473 struct btrfs_device *device = NULL; 1474 char *sizestr; 1475 char *retptr; 1476 char *devstr = NULL; 1477 int ret = 0; 1478 int mod = 0; 1479 1480 if (!capable(CAP_SYS_ADMIN)) 1481 return -EPERM; 1482 1483 ret = mnt_want_write_file(file); 1484 if (ret) 1485 return ret; 1486 1487 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1488 1)) { 1489 mnt_drop_write_file(file); 1490 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 1491 } 1492 1493 mutex_lock(&root->fs_info->volume_mutex); 1494 vol_args = memdup_user(arg, sizeof(*vol_args)); 1495 if (IS_ERR(vol_args)) { 1496 ret = PTR_ERR(vol_args); 1497 goto out; 1498 } 1499 1500 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1501 1502 sizestr = vol_args->name; 1503 devstr = strchr(sizestr, ':'); 1504 if (devstr) { 1505 char *end; 1506 sizestr = devstr + 1; 1507 *devstr = '\0'; 1508 devstr = vol_args->name; 1509 devid = simple_strtoull(devstr, &end, 10); 1510 if (!devid) { 1511 ret = -EINVAL; 1512 goto out_free; 1513 } 1514 btrfs_info(root->fs_info, "resizing devid %llu", devid); 1515 } 1516 1517 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1518 if (!device) { 1519 btrfs_info(root->fs_info, "resizer unable to find device %llu", 1520 devid); 1521 ret = -ENODEV; 1522 goto out_free; 1523 } 1524 1525 if (!device->writeable) { 1526 btrfs_info(root->fs_info, 1527 "resizer unable to apply on readonly device %llu", 1528 devid); 1529 ret = -EPERM; 1530 goto out_free; 1531 } 1532 1533 if (!strcmp(sizestr, "max")) 1534 new_size = device->bdev->bd_inode->i_size; 1535 else { 1536 if (sizestr[0] == '-') { 1537 mod = -1; 1538 sizestr++; 1539 } else if (sizestr[0] == '+') { 1540 mod = 1; 1541 sizestr++; 1542 } 1543 new_size = memparse(sizestr, &retptr); 1544 if (*retptr != '\0' || new_size == 0) { 1545 ret = -EINVAL; 1546 goto out_free; 1547 } 1548 } 1549 1550 if (device->is_tgtdev_for_dev_replace) { 1551 ret = -EPERM; 1552 goto out_free; 1553 } 1554 1555 old_size = device->total_bytes; 1556 1557 if (mod < 0) { 1558 if (new_size > old_size) { 1559 ret = -EINVAL; 1560 goto out_free; 1561 } 1562 new_size = old_size - new_size; 1563 } else if (mod > 0) { 1564 if (new_size > ULLONG_MAX - old_size) { 1565 ret = -EINVAL; 1566 goto out_free; 1567 } 1568 new_size = old_size + new_size; 1569 } 1570 1571 if (new_size < 256 * 1024 * 1024) { 1572 ret = -EINVAL; 1573 goto out_free; 1574 } 1575 if (new_size > device->bdev->bd_inode->i_size) { 1576 ret = -EFBIG; 1577 goto out_free; 1578 } 1579 1580 do_div(new_size, root->sectorsize); 1581 new_size *= root->sectorsize; 1582 1583 printk_in_rcu(KERN_INFO "BTRFS: new size for %s is %llu\n", 1584 rcu_str_deref(device->name), new_size); 1585 1586 if (new_size > old_size) { 1587 trans = btrfs_start_transaction(root, 0); 1588 if (IS_ERR(trans)) { 1589 ret = PTR_ERR(trans); 1590 goto out_free; 1591 } 1592 ret = btrfs_grow_device(trans, device, new_size); 1593 btrfs_commit_transaction(trans, root); 1594 } else if (new_size < old_size) { 1595 ret = btrfs_shrink_device(device, new_size); 1596 } /* equal, nothing need to do */ 1597 1598 out_free: 1599 kfree(vol_args); 1600 out: 1601 mutex_unlock(&root->fs_info->volume_mutex); 1602 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1603 mnt_drop_write_file(file); 1604 return ret; 1605 } 1606 1607 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1608 char *name, unsigned long fd, int subvol, 1609 u64 *transid, bool readonly, 1610 struct btrfs_qgroup_inherit *inherit) 1611 { 1612 int namelen; 1613 int ret = 0; 1614 1615 ret = mnt_want_write_file(file); 1616 if (ret) 1617 goto out; 1618 1619 namelen = strlen(name); 1620 if (strchr(name, '/')) { 1621 ret = -EINVAL; 1622 goto out_drop_write; 1623 } 1624 1625 if (name[0] == '.' && 1626 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1627 ret = -EEXIST; 1628 goto out_drop_write; 1629 } 1630 1631 if (subvol) { 1632 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1633 NULL, transid, readonly, inherit); 1634 } else { 1635 struct fd src = fdget(fd); 1636 struct inode *src_inode; 1637 if (!src.file) { 1638 ret = -EINVAL; 1639 goto out_drop_write; 1640 } 1641 1642 src_inode = file_inode(src.file); 1643 if (src_inode->i_sb != file_inode(file)->i_sb) { 1644 btrfs_info(BTRFS_I(src_inode)->root->fs_info, 1645 "Snapshot src from another FS"); 1646 ret = -EXDEV; 1647 } else if (!inode_owner_or_capable(src_inode)) { 1648 /* 1649 * Subvolume creation is not restricted, but snapshots 1650 * are limited to own subvolumes only 1651 */ 1652 ret = -EPERM; 1653 } else { 1654 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1655 BTRFS_I(src_inode)->root, 1656 transid, readonly, inherit); 1657 } 1658 fdput(src); 1659 } 1660 out_drop_write: 1661 mnt_drop_write_file(file); 1662 out: 1663 return ret; 1664 } 1665 1666 static noinline int btrfs_ioctl_snap_create(struct file *file, 1667 void __user *arg, int subvol) 1668 { 1669 struct btrfs_ioctl_vol_args *vol_args; 1670 int ret; 1671 1672 vol_args = memdup_user(arg, sizeof(*vol_args)); 1673 if (IS_ERR(vol_args)) 1674 return PTR_ERR(vol_args); 1675 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1676 1677 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1678 vol_args->fd, subvol, 1679 NULL, false, NULL); 1680 1681 kfree(vol_args); 1682 return ret; 1683 } 1684 1685 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1686 void __user *arg, int subvol) 1687 { 1688 struct btrfs_ioctl_vol_args_v2 *vol_args; 1689 int ret; 1690 u64 transid = 0; 1691 u64 *ptr = NULL; 1692 bool readonly = false; 1693 struct btrfs_qgroup_inherit *inherit = NULL; 1694 1695 vol_args = memdup_user(arg, sizeof(*vol_args)); 1696 if (IS_ERR(vol_args)) 1697 return PTR_ERR(vol_args); 1698 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1699 1700 if (vol_args->flags & 1701 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1702 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1703 ret = -EOPNOTSUPP; 1704 goto out; 1705 } 1706 1707 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1708 ptr = &transid; 1709 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1710 readonly = true; 1711 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1712 if (vol_args->size > PAGE_CACHE_SIZE) { 1713 ret = -EINVAL; 1714 goto out; 1715 } 1716 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1717 if (IS_ERR(inherit)) { 1718 ret = PTR_ERR(inherit); 1719 goto out; 1720 } 1721 } 1722 1723 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1724 vol_args->fd, subvol, ptr, 1725 readonly, inherit); 1726 1727 if (ret == 0 && ptr && 1728 copy_to_user(arg + 1729 offsetof(struct btrfs_ioctl_vol_args_v2, 1730 transid), ptr, sizeof(*ptr))) 1731 ret = -EFAULT; 1732 out: 1733 kfree(vol_args); 1734 kfree(inherit); 1735 return ret; 1736 } 1737 1738 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1739 void __user *arg) 1740 { 1741 struct inode *inode = file_inode(file); 1742 struct btrfs_root *root = BTRFS_I(inode)->root; 1743 int ret = 0; 1744 u64 flags = 0; 1745 1746 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) 1747 return -EINVAL; 1748 1749 down_read(&root->fs_info->subvol_sem); 1750 if (btrfs_root_readonly(root)) 1751 flags |= BTRFS_SUBVOL_RDONLY; 1752 up_read(&root->fs_info->subvol_sem); 1753 1754 if (copy_to_user(arg, &flags, sizeof(flags))) 1755 ret = -EFAULT; 1756 1757 return ret; 1758 } 1759 1760 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1761 void __user *arg) 1762 { 1763 struct inode *inode = file_inode(file); 1764 struct btrfs_root *root = BTRFS_I(inode)->root; 1765 struct btrfs_trans_handle *trans; 1766 u64 root_flags; 1767 u64 flags; 1768 int ret = 0; 1769 1770 if (!inode_owner_or_capable(inode)) 1771 return -EPERM; 1772 1773 ret = mnt_want_write_file(file); 1774 if (ret) 1775 goto out; 1776 1777 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 1778 ret = -EINVAL; 1779 goto out_drop_write; 1780 } 1781 1782 if (copy_from_user(&flags, arg, sizeof(flags))) { 1783 ret = -EFAULT; 1784 goto out_drop_write; 1785 } 1786 1787 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { 1788 ret = -EINVAL; 1789 goto out_drop_write; 1790 } 1791 1792 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1793 ret = -EOPNOTSUPP; 1794 goto out_drop_write; 1795 } 1796 1797 down_write(&root->fs_info->subvol_sem); 1798 1799 /* nothing to do */ 1800 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1801 goto out_drop_sem; 1802 1803 root_flags = btrfs_root_flags(&root->root_item); 1804 if (flags & BTRFS_SUBVOL_RDONLY) { 1805 btrfs_set_root_flags(&root->root_item, 1806 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1807 } else { 1808 /* 1809 * Block RO -> RW transition if this subvolume is involved in 1810 * send 1811 */ 1812 spin_lock(&root->root_item_lock); 1813 if (root->send_in_progress == 0) { 1814 btrfs_set_root_flags(&root->root_item, 1815 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1816 spin_unlock(&root->root_item_lock); 1817 } else { 1818 spin_unlock(&root->root_item_lock); 1819 btrfs_warn(root->fs_info, 1820 "Attempt to set subvolume %llu read-write during send", 1821 root->root_key.objectid); 1822 ret = -EPERM; 1823 goto out_drop_sem; 1824 } 1825 } 1826 1827 trans = btrfs_start_transaction(root, 1); 1828 if (IS_ERR(trans)) { 1829 ret = PTR_ERR(trans); 1830 goto out_reset; 1831 } 1832 1833 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1834 &root->root_key, &root->root_item); 1835 1836 btrfs_commit_transaction(trans, root); 1837 out_reset: 1838 if (ret) 1839 btrfs_set_root_flags(&root->root_item, root_flags); 1840 out_drop_sem: 1841 up_write(&root->fs_info->subvol_sem); 1842 out_drop_write: 1843 mnt_drop_write_file(file); 1844 out: 1845 return ret; 1846 } 1847 1848 /* 1849 * helper to check if the subvolume references other subvolumes 1850 */ 1851 static noinline int may_destroy_subvol(struct btrfs_root *root) 1852 { 1853 struct btrfs_path *path; 1854 struct btrfs_dir_item *di; 1855 struct btrfs_key key; 1856 u64 dir_id; 1857 int ret; 1858 1859 path = btrfs_alloc_path(); 1860 if (!path) 1861 return -ENOMEM; 1862 1863 /* Make sure this root isn't set as the default subvol */ 1864 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 1865 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, 1866 dir_id, "default", 7, 0); 1867 if (di && !IS_ERR(di)) { 1868 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1869 if (key.objectid == root->root_key.objectid) { 1870 ret = -EPERM; 1871 btrfs_err(root->fs_info, "deleting default subvolume " 1872 "%llu is not allowed", key.objectid); 1873 goto out; 1874 } 1875 btrfs_release_path(path); 1876 } 1877 1878 key.objectid = root->root_key.objectid; 1879 key.type = BTRFS_ROOT_REF_KEY; 1880 key.offset = (u64)-1; 1881 1882 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, 1883 &key, path, 0, 0); 1884 if (ret < 0) 1885 goto out; 1886 BUG_ON(ret == 0); 1887 1888 ret = 0; 1889 if (path->slots[0] > 0) { 1890 path->slots[0]--; 1891 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1892 if (key.objectid == root->root_key.objectid && 1893 key.type == BTRFS_ROOT_REF_KEY) 1894 ret = -ENOTEMPTY; 1895 } 1896 out: 1897 btrfs_free_path(path); 1898 return ret; 1899 } 1900 1901 static noinline int key_in_sk(struct btrfs_key *key, 1902 struct btrfs_ioctl_search_key *sk) 1903 { 1904 struct btrfs_key test; 1905 int ret; 1906 1907 test.objectid = sk->min_objectid; 1908 test.type = sk->min_type; 1909 test.offset = sk->min_offset; 1910 1911 ret = btrfs_comp_cpu_keys(key, &test); 1912 if (ret < 0) 1913 return 0; 1914 1915 test.objectid = sk->max_objectid; 1916 test.type = sk->max_type; 1917 test.offset = sk->max_offset; 1918 1919 ret = btrfs_comp_cpu_keys(key, &test); 1920 if (ret > 0) 1921 return 0; 1922 return 1; 1923 } 1924 1925 static noinline int copy_to_sk(struct btrfs_root *root, 1926 struct btrfs_path *path, 1927 struct btrfs_key *key, 1928 struct btrfs_ioctl_search_key *sk, 1929 char *buf, 1930 unsigned long *sk_offset, 1931 int *num_found) 1932 { 1933 u64 found_transid; 1934 struct extent_buffer *leaf; 1935 struct btrfs_ioctl_search_header sh; 1936 unsigned long item_off; 1937 unsigned long item_len; 1938 int nritems; 1939 int i; 1940 int slot; 1941 int ret = 0; 1942 1943 leaf = path->nodes[0]; 1944 slot = path->slots[0]; 1945 nritems = btrfs_header_nritems(leaf); 1946 1947 if (btrfs_header_generation(leaf) > sk->max_transid) { 1948 i = nritems; 1949 goto advance_key; 1950 } 1951 found_transid = btrfs_header_generation(leaf); 1952 1953 for (i = slot; i < nritems; i++) { 1954 item_off = btrfs_item_ptr_offset(leaf, i); 1955 item_len = btrfs_item_size_nr(leaf, i); 1956 1957 btrfs_item_key_to_cpu(leaf, key, i); 1958 if (!key_in_sk(key, sk)) 1959 continue; 1960 1961 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1962 item_len = 0; 1963 1964 if (sizeof(sh) + item_len + *sk_offset > 1965 BTRFS_SEARCH_ARGS_BUFSIZE) { 1966 ret = 1; 1967 goto overflow; 1968 } 1969 1970 sh.objectid = key->objectid; 1971 sh.offset = key->offset; 1972 sh.type = key->type; 1973 sh.len = item_len; 1974 sh.transid = found_transid; 1975 1976 /* copy search result header */ 1977 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 1978 *sk_offset += sizeof(sh); 1979 1980 if (item_len) { 1981 char *p = buf + *sk_offset; 1982 /* copy the item */ 1983 read_extent_buffer(leaf, p, 1984 item_off, item_len); 1985 *sk_offset += item_len; 1986 } 1987 (*num_found)++; 1988 1989 if (*num_found >= sk->nr_items) 1990 break; 1991 } 1992 advance_key: 1993 ret = 0; 1994 if (key->offset < (u64)-1 && key->offset < sk->max_offset) 1995 key->offset++; 1996 else if (key->type < (u8)-1 && key->type < sk->max_type) { 1997 key->offset = 0; 1998 key->type++; 1999 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { 2000 key->offset = 0; 2001 key->type = 0; 2002 key->objectid++; 2003 } else 2004 ret = 1; 2005 overflow: 2006 return ret; 2007 } 2008 2009 static noinline int search_ioctl(struct inode *inode, 2010 struct btrfs_ioctl_search_args *args) 2011 { 2012 struct btrfs_root *root; 2013 struct btrfs_key key; 2014 struct btrfs_path *path; 2015 struct btrfs_ioctl_search_key *sk = &args->key; 2016 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 2017 int ret; 2018 int num_found = 0; 2019 unsigned long sk_offset = 0; 2020 2021 path = btrfs_alloc_path(); 2022 if (!path) 2023 return -ENOMEM; 2024 2025 if (sk->tree_id == 0) { 2026 /* search the root of the inode that was passed */ 2027 root = BTRFS_I(inode)->root; 2028 } else { 2029 key.objectid = sk->tree_id; 2030 key.type = BTRFS_ROOT_ITEM_KEY; 2031 key.offset = (u64)-1; 2032 root = btrfs_read_fs_root_no_name(info, &key); 2033 if (IS_ERR(root)) { 2034 printk(KERN_ERR "BTRFS: could not find root %llu\n", 2035 sk->tree_id); 2036 btrfs_free_path(path); 2037 return -ENOENT; 2038 } 2039 } 2040 2041 key.objectid = sk->min_objectid; 2042 key.type = sk->min_type; 2043 key.offset = sk->min_offset; 2044 2045 path->keep_locks = 1; 2046 2047 while (1) { 2048 ret = btrfs_search_forward(root, &key, path, sk->min_transid); 2049 if (ret != 0) { 2050 if (ret > 0) 2051 ret = 0; 2052 goto err; 2053 } 2054 ret = copy_to_sk(root, path, &key, sk, args->buf, 2055 &sk_offset, &num_found); 2056 btrfs_release_path(path); 2057 if (ret || num_found >= sk->nr_items) 2058 break; 2059 2060 } 2061 ret = 0; 2062 err: 2063 sk->nr_items = num_found; 2064 btrfs_free_path(path); 2065 return ret; 2066 } 2067 2068 static noinline int btrfs_ioctl_tree_search(struct file *file, 2069 void __user *argp) 2070 { 2071 struct btrfs_ioctl_search_args *args; 2072 struct inode *inode; 2073 int ret; 2074 2075 if (!capable(CAP_SYS_ADMIN)) 2076 return -EPERM; 2077 2078 args = memdup_user(argp, sizeof(*args)); 2079 if (IS_ERR(args)) 2080 return PTR_ERR(args); 2081 2082 inode = file_inode(file); 2083 ret = search_ioctl(inode, args); 2084 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2085 ret = -EFAULT; 2086 kfree(args); 2087 return ret; 2088 } 2089 2090 /* 2091 * Search INODE_REFs to identify path name of 'dirid' directory 2092 * in a 'tree_id' tree. and sets path name to 'name'. 2093 */ 2094 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 2095 u64 tree_id, u64 dirid, char *name) 2096 { 2097 struct btrfs_root *root; 2098 struct btrfs_key key; 2099 char *ptr; 2100 int ret = -1; 2101 int slot; 2102 int len; 2103 int total_len = 0; 2104 struct btrfs_inode_ref *iref; 2105 struct extent_buffer *l; 2106 struct btrfs_path *path; 2107 2108 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 2109 name[0]='\0'; 2110 return 0; 2111 } 2112 2113 path = btrfs_alloc_path(); 2114 if (!path) 2115 return -ENOMEM; 2116 2117 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 2118 2119 key.objectid = tree_id; 2120 key.type = BTRFS_ROOT_ITEM_KEY; 2121 key.offset = (u64)-1; 2122 root = btrfs_read_fs_root_no_name(info, &key); 2123 if (IS_ERR(root)) { 2124 printk(KERN_ERR "BTRFS: could not find root %llu\n", tree_id); 2125 ret = -ENOENT; 2126 goto out; 2127 } 2128 2129 key.objectid = dirid; 2130 key.type = BTRFS_INODE_REF_KEY; 2131 key.offset = (u64)-1; 2132 2133 while (1) { 2134 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2135 if (ret < 0) 2136 goto out; 2137 else if (ret > 0) { 2138 ret = btrfs_previous_item(root, path, dirid, 2139 BTRFS_INODE_REF_KEY); 2140 if (ret < 0) 2141 goto out; 2142 else if (ret > 0) { 2143 ret = -ENOENT; 2144 goto out; 2145 } 2146 } 2147 2148 l = path->nodes[0]; 2149 slot = path->slots[0]; 2150 btrfs_item_key_to_cpu(l, &key, slot); 2151 2152 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2153 len = btrfs_inode_ref_name_len(l, iref); 2154 ptr -= len + 1; 2155 total_len += len + 1; 2156 if (ptr < name) { 2157 ret = -ENAMETOOLONG; 2158 goto out; 2159 } 2160 2161 *(ptr + len) = '/'; 2162 read_extent_buffer(l, ptr, (unsigned long)(iref + 1), len); 2163 2164 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2165 break; 2166 2167 btrfs_release_path(path); 2168 key.objectid = key.offset; 2169 key.offset = (u64)-1; 2170 dirid = key.objectid; 2171 } 2172 memmove(name, ptr, total_len); 2173 name[total_len] = '\0'; 2174 ret = 0; 2175 out: 2176 btrfs_free_path(path); 2177 return ret; 2178 } 2179 2180 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2181 void __user *argp) 2182 { 2183 struct btrfs_ioctl_ino_lookup_args *args; 2184 struct inode *inode; 2185 int ret; 2186 2187 if (!capable(CAP_SYS_ADMIN)) 2188 return -EPERM; 2189 2190 args = memdup_user(argp, sizeof(*args)); 2191 if (IS_ERR(args)) 2192 return PTR_ERR(args); 2193 2194 inode = file_inode(file); 2195 2196 if (args->treeid == 0) 2197 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2198 2199 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 2200 args->treeid, args->objectid, 2201 args->name); 2202 2203 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2204 ret = -EFAULT; 2205 2206 kfree(args); 2207 return ret; 2208 } 2209 2210 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2211 void __user *arg) 2212 { 2213 struct dentry *parent = file->f_path.dentry; 2214 struct dentry *dentry; 2215 struct inode *dir = parent->d_inode; 2216 struct inode *inode; 2217 struct btrfs_root *root = BTRFS_I(dir)->root; 2218 struct btrfs_root *dest = NULL; 2219 struct btrfs_ioctl_vol_args *vol_args; 2220 struct btrfs_trans_handle *trans; 2221 struct btrfs_block_rsv block_rsv; 2222 u64 qgroup_reserved; 2223 int namelen; 2224 int ret; 2225 int err = 0; 2226 2227 vol_args = memdup_user(arg, sizeof(*vol_args)); 2228 if (IS_ERR(vol_args)) 2229 return PTR_ERR(vol_args); 2230 2231 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2232 namelen = strlen(vol_args->name); 2233 if (strchr(vol_args->name, '/') || 2234 strncmp(vol_args->name, "..", namelen) == 0) { 2235 err = -EINVAL; 2236 goto out; 2237 } 2238 2239 err = mnt_want_write_file(file); 2240 if (err) 2241 goto out; 2242 2243 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 2244 if (err == -EINTR) 2245 goto out_drop_write; 2246 dentry = lookup_one_len(vol_args->name, parent, namelen); 2247 if (IS_ERR(dentry)) { 2248 err = PTR_ERR(dentry); 2249 goto out_unlock_dir; 2250 } 2251 2252 if (!dentry->d_inode) { 2253 err = -ENOENT; 2254 goto out_dput; 2255 } 2256 2257 inode = dentry->d_inode; 2258 dest = BTRFS_I(inode)->root; 2259 if (!capable(CAP_SYS_ADMIN)) { 2260 /* 2261 * Regular user. Only allow this with a special mount 2262 * option, when the user has write+exec access to the 2263 * subvol root, and when rmdir(2) would have been 2264 * allowed. 2265 * 2266 * Note that this is _not_ check that the subvol is 2267 * empty or doesn't contain data that we wouldn't 2268 * otherwise be able to delete. 2269 * 2270 * Users who want to delete empty subvols should try 2271 * rmdir(2). 2272 */ 2273 err = -EPERM; 2274 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 2275 goto out_dput; 2276 2277 /* 2278 * Do not allow deletion if the parent dir is the same 2279 * as the dir to be deleted. That means the ioctl 2280 * must be called on the dentry referencing the root 2281 * of the subvol, not a random directory contained 2282 * within it. 2283 */ 2284 err = -EINVAL; 2285 if (root == dest) 2286 goto out_dput; 2287 2288 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2289 if (err) 2290 goto out_dput; 2291 } 2292 2293 /* check if subvolume may be deleted by a user */ 2294 err = btrfs_may_delete(dir, dentry, 1); 2295 if (err) 2296 goto out_dput; 2297 2298 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2299 err = -EINVAL; 2300 goto out_dput; 2301 } 2302 2303 mutex_lock(&inode->i_mutex); 2304 err = d_invalidate(dentry); 2305 if (err) 2306 goto out_unlock; 2307 2308 down_write(&root->fs_info->subvol_sem); 2309 2310 err = may_destroy_subvol(dest); 2311 if (err) 2312 goto out_up_write; 2313 2314 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 2315 /* 2316 * One for dir inode, two for dir entries, two for root 2317 * ref/backref. 2318 */ 2319 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 2320 5, &qgroup_reserved, true); 2321 if (err) 2322 goto out_up_write; 2323 2324 trans = btrfs_start_transaction(root, 0); 2325 if (IS_ERR(trans)) { 2326 err = PTR_ERR(trans); 2327 goto out_release; 2328 } 2329 trans->block_rsv = &block_rsv; 2330 trans->bytes_reserved = block_rsv.size; 2331 2332 ret = btrfs_unlink_subvol(trans, root, dir, 2333 dest->root_key.objectid, 2334 dentry->d_name.name, 2335 dentry->d_name.len); 2336 if (ret) { 2337 err = ret; 2338 btrfs_abort_transaction(trans, root, ret); 2339 goto out_end_trans; 2340 } 2341 2342 btrfs_record_root_in_trans(trans, dest); 2343 2344 memset(&dest->root_item.drop_progress, 0, 2345 sizeof(dest->root_item.drop_progress)); 2346 dest->root_item.drop_level = 0; 2347 btrfs_set_root_refs(&dest->root_item, 0); 2348 2349 if (!xchg(&dest->orphan_item_inserted, 1)) { 2350 ret = btrfs_insert_orphan_item(trans, 2351 root->fs_info->tree_root, 2352 dest->root_key.objectid); 2353 if (ret) { 2354 btrfs_abort_transaction(trans, root, ret); 2355 err = ret; 2356 goto out_end_trans; 2357 } 2358 } 2359 2360 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 2361 dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, 2362 dest->root_key.objectid); 2363 if (ret && ret != -ENOENT) { 2364 btrfs_abort_transaction(trans, root, ret); 2365 err = ret; 2366 goto out_end_trans; 2367 } 2368 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { 2369 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 2370 dest->root_item.received_uuid, 2371 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 2372 dest->root_key.objectid); 2373 if (ret && ret != -ENOENT) { 2374 btrfs_abort_transaction(trans, root, ret); 2375 err = ret; 2376 goto out_end_trans; 2377 } 2378 } 2379 2380 out_end_trans: 2381 trans->block_rsv = NULL; 2382 trans->bytes_reserved = 0; 2383 ret = btrfs_end_transaction(trans, root); 2384 if (ret && !err) 2385 err = ret; 2386 inode->i_flags |= S_DEAD; 2387 out_release: 2388 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 2389 out_up_write: 2390 up_write(&root->fs_info->subvol_sem); 2391 out_unlock: 2392 mutex_unlock(&inode->i_mutex); 2393 if (!err) { 2394 shrink_dcache_sb(root->fs_info->sb); 2395 btrfs_invalidate_inodes(dest); 2396 d_delete(dentry); 2397 2398 /* the last ref */ 2399 if (dest->cache_inode) { 2400 iput(dest->cache_inode); 2401 dest->cache_inode = NULL; 2402 } 2403 } 2404 out_dput: 2405 dput(dentry); 2406 out_unlock_dir: 2407 mutex_unlock(&dir->i_mutex); 2408 out_drop_write: 2409 mnt_drop_write_file(file); 2410 out: 2411 kfree(vol_args); 2412 return err; 2413 } 2414 2415 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2416 { 2417 struct inode *inode = file_inode(file); 2418 struct btrfs_root *root = BTRFS_I(inode)->root; 2419 struct btrfs_ioctl_defrag_range_args *range; 2420 int ret; 2421 2422 ret = mnt_want_write_file(file); 2423 if (ret) 2424 return ret; 2425 2426 if (btrfs_root_readonly(root)) { 2427 ret = -EROFS; 2428 goto out; 2429 } 2430 2431 switch (inode->i_mode & S_IFMT) { 2432 case S_IFDIR: 2433 if (!capable(CAP_SYS_ADMIN)) { 2434 ret = -EPERM; 2435 goto out; 2436 } 2437 ret = btrfs_defrag_root(root); 2438 if (ret) 2439 goto out; 2440 ret = btrfs_defrag_root(root->fs_info->extent_root); 2441 break; 2442 case S_IFREG: 2443 if (!(file->f_mode & FMODE_WRITE)) { 2444 ret = -EINVAL; 2445 goto out; 2446 } 2447 2448 range = kzalloc(sizeof(*range), GFP_KERNEL); 2449 if (!range) { 2450 ret = -ENOMEM; 2451 goto out; 2452 } 2453 2454 if (argp) { 2455 if (copy_from_user(range, argp, 2456 sizeof(*range))) { 2457 ret = -EFAULT; 2458 kfree(range); 2459 goto out; 2460 } 2461 /* compression requires us to start the IO */ 2462 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 2463 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 2464 range->extent_thresh = (u32)-1; 2465 } 2466 } else { 2467 /* the rest are all set to zero by kzalloc */ 2468 range->len = (u64)-1; 2469 } 2470 ret = btrfs_defrag_file(file_inode(file), file, 2471 range, 0, 0); 2472 if (ret > 0) 2473 ret = 0; 2474 kfree(range); 2475 break; 2476 default: 2477 ret = -EINVAL; 2478 } 2479 out: 2480 mnt_drop_write_file(file); 2481 return ret; 2482 } 2483 2484 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) 2485 { 2486 struct btrfs_ioctl_vol_args *vol_args; 2487 int ret; 2488 2489 if (!capable(CAP_SYS_ADMIN)) 2490 return -EPERM; 2491 2492 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2493 1)) { 2494 return BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2495 } 2496 2497 mutex_lock(&root->fs_info->volume_mutex); 2498 vol_args = memdup_user(arg, sizeof(*vol_args)); 2499 if (IS_ERR(vol_args)) { 2500 ret = PTR_ERR(vol_args); 2501 goto out; 2502 } 2503 2504 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2505 ret = btrfs_init_new_device(root, vol_args->name); 2506 2507 kfree(vol_args); 2508 out: 2509 mutex_unlock(&root->fs_info->volume_mutex); 2510 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2511 return ret; 2512 } 2513 2514 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 2515 { 2516 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 2517 struct btrfs_ioctl_vol_args *vol_args; 2518 int ret; 2519 2520 if (!capable(CAP_SYS_ADMIN)) 2521 return -EPERM; 2522 2523 ret = mnt_want_write_file(file); 2524 if (ret) 2525 return ret; 2526 2527 vol_args = memdup_user(arg, sizeof(*vol_args)); 2528 if (IS_ERR(vol_args)) { 2529 ret = PTR_ERR(vol_args); 2530 goto out; 2531 } 2532 2533 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2534 2535 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2536 1)) { 2537 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2538 goto out; 2539 } 2540 2541 mutex_lock(&root->fs_info->volume_mutex); 2542 ret = btrfs_rm_device(root, vol_args->name); 2543 mutex_unlock(&root->fs_info->volume_mutex); 2544 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2545 2546 out: 2547 kfree(vol_args); 2548 mnt_drop_write_file(file); 2549 return ret; 2550 } 2551 2552 static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2553 { 2554 struct btrfs_ioctl_fs_info_args *fi_args; 2555 struct btrfs_device *device; 2556 struct btrfs_device *next; 2557 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2558 int ret = 0; 2559 2560 if (!capable(CAP_SYS_ADMIN)) 2561 return -EPERM; 2562 2563 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); 2564 if (!fi_args) 2565 return -ENOMEM; 2566 2567 mutex_lock(&fs_devices->device_list_mutex); 2568 fi_args->num_devices = fs_devices->num_devices; 2569 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); 2570 2571 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2572 if (device->devid > fi_args->max_id) 2573 fi_args->max_id = device->devid; 2574 } 2575 mutex_unlock(&fs_devices->device_list_mutex); 2576 2577 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2578 ret = -EFAULT; 2579 2580 kfree(fi_args); 2581 return ret; 2582 } 2583 2584 static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2585 { 2586 struct btrfs_ioctl_dev_info_args *di_args; 2587 struct btrfs_device *dev; 2588 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2589 int ret = 0; 2590 char *s_uuid = NULL; 2591 2592 if (!capable(CAP_SYS_ADMIN)) 2593 return -EPERM; 2594 2595 di_args = memdup_user(arg, sizeof(*di_args)); 2596 if (IS_ERR(di_args)) 2597 return PTR_ERR(di_args); 2598 2599 if (!btrfs_is_empty_uuid(di_args->uuid)) 2600 s_uuid = di_args->uuid; 2601 2602 mutex_lock(&fs_devices->device_list_mutex); 2603 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); 2604 2605 if (!dev) { 2606 ret = -ENODEV; 2607 goto out; 2608 } 2609 2610 di_args->devid = dev->devid; 2611 di_args->bytes_used = dev->bytes_used; 2612 di_args->total_bytes = dev->total_bytes; 2613 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2614 if (dev->name) { 2615 struct rcu_string *name; 2616 2617 rcu_read_lock(); 2618 name = rcu_dereference(dev->name); 2619 strncpy(di_args->path, name->str, sizeof(di_args->path)); 2620 rcu_read_unlock(); 2621 di_args->path[sizeof(di_args->path) - 1] = 0; 2622 } else { 2623 di_args->path[0] = '\0'; 2624 } 2625 2626 out: 2627 mutex_unlock(&fs_devices->device_list_mutex); 2628 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2629 ret = -EFAULT; 2630 2631 kfree(di_args); 2632 return ret; 2633 } 2634 2635 static struct page *extent_same_get_page(struct inode *inode, u64 off) 2636 { 2637 struct page *page; 2638 pgoff_t index; 2639 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2640 2641 index = off >> PAGE_CACHE_SHIFT; 2642 2643 page = grab_cache_page(inode->i_mapping, index); 2644 if (!page) 2645 return NULL; 2646 2647 if (!PageUptodate(page)) { 2648 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, 2649 0)) 2650 return NULL; 2651 lock_page(page); 2652 if (!PageUptodate(page)) { 2653 unlock_page(page); 2654 page_cache_release(page); 2655 return NULL; 2656 } 2657 } 2658 unlock_page(page); 2659 2660 return page; 2661 } 2662 2663 static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2664 { 2665 /* do any pending delalloc/csum calc on src, one way or 2666 another, and lock file content */ 2667 while (1) { 2668 struct btrfs_ordered_extent *ordered; 2669 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2670 ordered = btrfs_lookup_first_ordered_extent(inode, 2671 off + len - 1); 2672 if (!ordered && 2673 !test_range_bit(&BTRFS_I(inode)->io_tree, off, 2674 off + len - 1, EXTENT_DELALLOC, 0, NULL)) 2675 break; 2676 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2677 if (ordered) 2678 btrfs_put_ordered_extent(ordered); 2679 btrfs_wait_ordered_range(inode, off, len); 2680 } 2681 } 2682 2683 static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2684 struct inode *inode2, u64 loff2, u64 len) 2685 { 2686 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 2687 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2688 2689 mutex_unlock(&inode1->i_mutex); 2690 mutex_unlock(&inode2->i_mutex); 2691 } 2692 2693 static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2694 struct inode *inode2, u64 loff2, u64 len) 2695 { 2696 if (inode1 < inode2) { 2697 swap(inode1, inode2); 2698 swap(loff1, loff2); 2699 } 2700 2701 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 2702 lock_extent_range(inode1, loff1, len); 2703 if (inode1 != inode2) { 2704 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 2705 lock_extent_range(inode2, loff2, len); 2706 } 2707 } 2708 2709 static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2710 u64 dst_loff, u64 len) 2711 { 2712 int ret = 0; 2713 struct page *src_page, *dst_page; 2714 unsigned int cmp_len = PAGE_CACHE_SIZE; 2715 void *addr, *dst_addr; 2716 2717 while (len) { 2718 if (len < PAGE_CACHE_SIZE) 2719 cmp_len = len; 2720 2721 src_page = extent_same_get_page(src, loff); 2722 if (!src_page) 2723 return -EINVAL; 2724 dst_page = extent_same_get_page(dst, dst_loff); 2725 if (!dst_page) { 2726 page_cache_release(src_page); 2727 return -EINVAL; 2728 } 2729 addr = kmap_atomic(src_page); 2730 dst_addr = kmap_atomic(dst_page); 2731 2732 flush_dcache_page(src_page); 2733 flush_dcache_page(dst_page); 2734 2735 if (memcmp(addr, dst_addr, cmp_len)) 2736 ret = BTRFS_SAME_DATA_DIFFERS; 2737 2738 kunmap_atomic(addr); 2739 kunmap_atomic(dst_addr); 2740 page_cache_release(src_page); 2741 page_cache_release(dst_page); 2742 2743 if (ret) 2744 break; 2745 2746 loff += cmp_len; 2747 dst_loff += cmp_len; 2748 len -= cmp_len; 2749 } 2750 2751 return ret; 2752 } 2753 2754 static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) 2755 { 2756 u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; 2757 2758 if (off + len > inode->i_size || off + len < off) 2759 return -EINVAL; 2760 /* Check that we are block aligned - btrfs_clone() requires this */ 2761 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) 2762 return -EINVAL; 2763 2764 return 0; 2765 } 2766 2767 static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, 2768 struct inode *dst, u64 dst_loff) 2769 { 2770 int ret; 2771 2772 /* 2773 * btrfs_clone() can't handle extents in the same file 2774 * yet. Once that works, we can drop this check and replace it 2775 * with a check for the same inode, but overlapping extents. 2776 */ 2777 if (src == dst) 2778 return -EINVAL; 2779 2780 btrfs_double_lock(src, loff, dst, dst_loff, len); 2781 2782 ret = extent_same_check_offsets(src, loff, len); 2783 if (ret) 2784 goto out_unlock; 2785 2786 ret = extent_same_check_offsets(dst, dst_loff, len); 2787 if (ret) 2788 goto out_unlock; 2789 2790 /* don't make the dst file partly checksummed */ 2791 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 2792 (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { 2793 ret = -EINVAL; 2794 goto out_unlock; 2795 } 2796 2797 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); 2798 if (ret == 0) 2799 ret = btrfs_clone(src, dst, loff, len, len, dst_loff); 2800 2801 out_unlock: 2802 btrfs_double_unlock(src, loff, dst, dst_loff, len); 2803 2804 return ret; 2805 } 2806 2807 #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 2808 2809 static long btrfs_ioctl_file_extent_same(struct file *file, 2810 struct btrfs_ioctl_same_args __user *argp) 2811 { 2812 struct btrfs_ioctl_same_args *same; 2813 struct btrfs_ioctl_same_extent_info *info; 2814 struct inode *src = file_inode(file); 2815 u64 off; 2816 u64 len; 2817 int i; 2818 int ret; 2819 unsigned long size; 2820 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2821 bool is_admin = capable(CAP_SYS_ADMIN); 2822 u16 count; 2823 2824 if (!(file->f_mode & FMODE_READ)) 2825 return -EINVAL; 2826 2827 ret = mnt_want_write_file(file); 2828 if (ret) 2829 return ret; 2830 2831 if (get_user(count, &argp->dest_count)) { 2832 ret = -EFAULT; 2833 goto out; 2834 } 2835 2836 size = offsetof(struct btrfs_ioctl_same_args __user, info[count]); 2837 2838 same = memdup_user(argp, size); 2839 2840 if (IS_ERR(same)) { 2841 ret = PTR_ERR(same); 2842 goto out; 2843 } 2844 2845 off = same->logical_offset; 2846 len = same->length; 2847 2848 /* 2849 * Limit the total length we will dedupe for each operation. 2850 * This is intended to bound the total time spent in this 2851 * ioctl to something sane. 2852 */ 2853 if (len > BTRFS_MAX_DEDUPE_LEN) 2854 len = BTRFS_MAX_DEDUPE_LEN; 2855 2856 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { 2857 /* 2858 * Btrfs does not support blocksize < page_size. As a 2859 * result, btrfs_cmp_data() won't correctly handle 2860 * this situation without an update. 2861 */ 2862 ret = -EINVAL; 2863 goto out; 2864 } 2865 2866 ret = -EISDIR; 2867 if (S_ISDIR(src->i_mode)) 2868 goto out; 2869 2870 ret = -EACCES; 2871 if (!S_ISREG(src->i_mode)) 2872 goto out; 2873 2874 /* pre-format output fields to sane values */ 2875 for (i = 0; i < count; i++) { 2876 same->info[i].bytes_deduped = 0ULL; 2877 same->info[i].status = 0; 2878 } 2879 2880 for (i = 0, info = same->info; i < count; i++, info++) { 2881 struct inode *dst; 2882 struct fd dst_file = fdget(info->fd); 2883 if (!dst_file.file) { 2884 info->status = -EBADF; 2885 continue; 2886 } 2887 dst = file_inode(dst_file.file); 2888 2889 if (!(is_admin || (dst_file.file->f_mode & FMODE_WRITE))) { 2890 info->status = -EINVAL; 2891 } else if (file->f_path.mnt != dst_file.file->f_path.mnt) { 2892 info->status = -EXDEV; 2893 } else if (S_ISDIR(dst->i_mode)) { 2894 info->status = -EISDIR; 2895 } else if (!S_ISREG(dst->i_mode)) { 2896 info->status = -EACCES; 2897 } else { 2898 info->status = btrfs_extent_same(src, off, len, dst, 2899 info->logical_offset); 2900 if (info->status == 0) 2901 info->bytes_deduped += len; 2902 } 2903 fdput(dst_file); 2904 } 2905 2906 ret = copy_to_user(argp, same, size); 2907 if (ret) 2908 ret = -EFAULT; 2909 2910 out: 2911 mnt_drop_write_file(file); 2912 return ret; 2913 } 2914 2915 /** 2916 * btrfs_clone() - clone a range from inode file to another 2917 * 2918 * @src: Inode to clone from 2919 * @inode: Inode to clone to 2920 * @off: Offset within source to start clone from 2921 * @olen: Original length, passed by user, of range to clone 2922 * @olen_aligned: Block-aligned value of olen, extent_same uses 2923 * identical values here 2924 * @destoff: Offset within @inode to start clone 2925 */ 2926 static int btrfs_clone(struct inode *src, struct inode *inode, 2927 u64 off, u64 olen, u64 olen_aligned, u64 destoff) 2928 { 2929 struct btrfs_root *root = BTRFS_I(inode)->root; 2930 struct btrfs_path *path = NULL; 2931 struct extent_buffer *leaf; 2932 struct btrfs_trans_handle *trans; 2933 char *buf = NULL; 2934 struct btrfs_key key; 2935 u32 nritems; 2936 int slot; 2937 int ret; 2938 u64 len = olen_aligned; 2939 2940 ret = -ENOMEM; 2941 buf = vmalloc(btrfs_level_size(root, 0)); 2942 if (!buf) 2943 return ret; 2944 2945 path = btrfs_alloc_path(); 2946 if (!path) { 2947 vfree(buf); 2948 return ret; 2949 } 2950 2951 path->reada = 2; 2952 /* clone data */ 2953 key.objectid = btrfs_ino(src); 2954 key.type = BTRFS_EXTENT_DATA_KEY; 2955 key.offset = 0; 2956 2957 while (1) { 2958 /* 2959 * note the key will change type as we walk through the 2960 * tree. 2961 */ 2962 path->leave_spinning = 1; 2963 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 2964 0, 0); 2965 if (ret < 0) 2966 goto out; 2967 2968 nritems = btrfs_header_nritems(path->nodes[0]); 2969 process_slot: 2970 if (path->slots[0] >= nritems) { 2971 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 2972 if (ret < 0) 2973 goto out; 2974 if (ret > 0) 2975 break; 2976 nritems = btrfs_header_nritems(path->nodes[0]); 2977 } 2978 leaf = path->nodes[0]; 2979 slot = path->slots[0]; 2980 2981 btrfs_item_key_to_cpu(leaf, &key, slot); 2982 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2983 key.objectid != btrfs_ino(src)) 2984 break; 2985 2986 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2987 struct btrfs_file_extent_item *extent; 2988 int type; 2989 u32 size; 2990 struct btrfs_key new_key; 2991 u64 disko = 0, diskl = 0; 2992 u64 datao = 0, datal = 0; 2993 u8 comp; 2994 u64 endoff; 2995 2996 extent = btrfs_item_ptr(leaf, slot, 2997 struct btrfs_file_extent_item); 2998 comp = btrfs_file_extent_compression(leaf, extent); 2999 type = btrfs_file_extent_type(leaf, extent); 3000 if (type == BTRFS_FILE_EXTENT_REG || 3001 type == BTRFS_FILE_EXTENT_PREALLOC) { 3002 disko = btrfs_file_extent_disk_bytenr(leaf, 3003 extent); 3004 diskl = btrfs_file_extent_disk_num_bytes(leaf, 3005 extent); 3006 datao = btrfs_file_extent_offset(leaf, extent); 3007 datal = btrfs_file_extent_num_bytes(leaf, 3008 extent); 3009 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3010 /* take upper bound, may be compressed */ 3011 datal = btrfs_file_extent_ram_bytes(leaf, 3012 extent); 3013 } 3014 3015 if (key.offset + datal <= off || 3016 key.offset >= off + len - 1) { 3017 path->slots[0]++; 3018 goto process_slot; 3019 } 3020 3021 size = btrfs_item_size_nr(leaf, slot); 3022 read_extent_buffer(leaf, buf, 3023 btrfs_item_ptr_offset(leaf, slot), 3024 size); 3025 3026 btrfs_release_path(path); 3027 path->leave_spinning = 0; 3028 3029 memcpy(&new_key, &key, sizeof(new_key)); 3030 new_key.objectid = btrfs_ino(inode); 3031 if (off <= key.offset) 3032 new_key.offset = key.offset + destoff - off; 3033 else 3034 new_key.offset = destoff; 3035 3036 /* 3037 * 1 - adjusting old extent (we may have to split it) 3038 * 1 - add new extent 3039 * 1 - inode update 3040 */ 3041 trans = btrfs_start_transaction(root, 3); 3042 if (IS_ERR(trans)) { 3043 ret = PTR_ERR(trans); 3044 goto out; 3045 } 3046 3047 if (type == BTRFS_FILE_EXTENT_REG || 3048 type == BTRFS_FILE_EXTENT_PREALLOC) { 3049 /* 3050 * a | --- range to clone ---| b 3051 * | ------------- extent ------------- | 3052 */ 3053 3054 /* substract range b */ 3055 if (key.offset + datal > off + len) 3056 datal = off + len - key.offset; 3057 3058 /* substract range a */ 3059 if (off > key.offset) { 3060 datao += off - key.offset; 3061 datal -= off - key.offset; 3062 } 3063 3064 ret = btrfs_drop_extents(trans, root, inode, 3065 new_key.offset, 3066 new_key.offset + datal, 3067 1); 3068 if (ret) { 3069 if (ret != -EINVAL) 3070 btrfs_abort_transaction(trans, 3071 root, ret); 3072 btrfs_end_transaction(trans, root); 3073 goto out; 3074 } 3075 3076 ret = btrfs_insert_empty_item(trans, root, path, 3077 &new_key, size); 3078 if (ret) { 3079 btrfs_abort_transaction(trans, root, 3080 ret); 3081 btrfs_end_transaction(trans, root); 3082 goto out; 3083 } 3084 3085 leaf = path->nodes[0]; 3086 slot = path->slots[0]; 3087 write_extent_buffer(leaf, buf, 3088 btrfs_item_ptr_offset(leaf, slot), 3089 size); 3090 3091 extent = btrfs_item_ptr(leaf, slot, 3092 struct btrfs_file_extent_item); 3093 3094 /* disko == 0 means it's a hole */ 3095 if (!disko) 3096 datao = 0; 3097 3098 btrfs_set_file_extent_offset(leaf, extent, 3099 datao); 3100 btrfs_set_file_extent_num_bytes(leaf, extent, 3101 datal); 3102 if (disko) { 3103 inode_add_bytes(inode, datal); 3104 ret = btrfs_inc_extent_ref(trans, root, 3105 disko, diskl, 0, 3106 root->root_key.objectid, 3107 btrfs_ino(inode), 3108 new_key.offset - datao, 3109 0); 3110 if (ret) { 3111 btrfs_abort_transaction(trans, 3112 root, 3113 ret); 3114 btrfs_end_transaction(trans, 3115 root); 3116 goto out; 3117 3118 } 3119 } 3120 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3121 u64 skip = 0; 3122 u64 trim = 0; 3123 if (off > key.offset) { 3124 skip = off - key.offset; 3125 new_key.offset += skip; 3126 } 3127 3128 if (key.offset + datal > off + len) 3129 trim = key.offset + datal - (off + len); 3130 3131 if (comp && (skip || trim)) { 3132 ret = -EINVAL; 3133 btrfs_end_transaction(trans, root); 3134 goto out; 3135 } 3136 size -= skip + trim; 3137 datal -= skip + trim; 3138 3139 ret = btrfs_drop_extents(trans, root, inode, 3140 new_key.offset, 3141 new_key.offset + datal, 3142 1); 3143 if (ret) { 3144 if (ret != -EINVAL) 3145 btrfs_abort_transaction(trans, 3146 root, ret); 3147 btrfs_end_transaction(trans, root); 3148 goto out; 3149 } 3150 3151 ret = btrfs_insert_empty_item(trans, root, path, 3152 &new_key, size); 3153 if (ret) { 3154 btrfs_abort_transaction(trans, root, 3155 ret); 3156 btrfs_end_transaction(trans, root); 3157 goto out; 3158 } 3159 3160 if (skip) { 3161 u32 start = 3162 btrfs_file_extent_calc_inline_size(0); 3163 memmove(buf+start, buf+start+skip, 3164 datal); 3165 } 3166 3167 leaf = path->nodes[0]; 3168 slot = path->slots[0]; 3169 write_extent_buffer(leaf, buf, 3170 btrfs_item_ptr_offset(leaf, slot), 3171 size); 3172 inode_add_bytes(inode, datal); 3173 } 3174 3175 btrfs_mark_buffer_dirty(leaf); 3176 btrfs_release_path(path); 3177 3178 inode_inc_iversion(inode); 3179 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3180 3181 /* 3182 * we round up to the block size at eof when 3183 * determining which extents to clone above, 3184 * but shouldn't round up the file size 3185 */ 3186 endoff = new_key.offset + datal; 3187 if (endoff > destoff+olen) 3188 endoff = destoff+olen; 3189 if (endoff > inode->i_size) 3190 btrfs_i_size_write(inode, endoff); 3191 3192 ret = btrfs_update_inode(trans, root, inode); 3193 if (ret) { 3194 btrfs_abort_transaction(trans, root, ret); 3195 btrfs_end_transaction(trans, root); 3196 goto out; 3197 } 3198 ret = btrfs_end_transaction(trans, root); 3199 } 3200 btrfs_release_path(path); 3201 key.offset++; 3202 } 3203 ret = 0; 3204 3205 out: 3206 btrfs_release_path(path); 3207 btrfs_free_path(path); 3208 vfree(buf); 3209 return ret; 3210 } 3211 3212 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3213 u64 off, u64 olen, u64 destoff) 3214 { 3215 struct inode *inode = file_inode(file); 3216 struct btrfs_root *root = BTRFS_I(inode)->root; 3217 struct fd src_file; 3218 struct inode *src; 3219 int ret; 3220 u64 len = olen; 3221 u64 bs = root->fs_info->sb->s_blocksize; 3222 int same_inode = 0; 3223 3224 /* 3225 * TODO: 3226 * - split compressed inline extents. annoying: we need to 3227 * decompress into destination's address_space (the file offset 3228 * may change, so source mapping won't do), then recompress (or 3229 * otherwise reinsert) a subrange. 3230 * 3231 * - split destination inode's inline extents. The inline extents can 3232 * be either compressed or non-compressed. 3233 */ 3234 3235 /* the destination must be opened for writing */ 3236 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 3237 return -EINVAL; 3238 3239 if (btrfs_root_readonly(root)) 3240 return -EROFS; 3241 3242 ret = mnt_want_write_file(file); 3243 if (ret) 3244 return ret; 3245 3246 src_file = fdget(srcfd); 3247 if (!src_file.file) { 3248 ret = -EBADF; 3249 goto out_drop_write; 3250 } 3251 3252 ret = -EXDEV; 3253 if (src_file.file->f_path.mnt != file->f_path.mnt) 3254 goto out_fput; 3255 3256 src = file_inode(src_file.file); 3257 3258 ret = -EINVAL; 3259 if (src == inode) 3260 same_inode = 1; 3261 3262 /* the src must be open for reading */ 3263 if (!(src_file.file->f_mode & FMODE_READ)) 3264 goto out_fput; 3265 3266 /* don't make the dst file partly checksummed */ 3267 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3268 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 3269 goto out_fput; 3270 3271 ret = -EISDIR; 3272 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3273 goto out_fput; 3274 3275 ret = -EXDEV; 3276 if (src->i_sb != inode->i_sb) 3277 goto out_fput; 3278 3279 if (!same_inode) { 3280 if (inode < src) { 3281 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 3282 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 3283 } else { 3284 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 3285 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 3286 } 3287 } else { 3288 mutex_lock(&src->i_mutex); 3289 } 3290 3291 /* determine range to clone */ 3292 ret = -EINVAL; 3293 if (off + len > src->i_size || off + len < off) 3294 goto out_unlock; 3295 if (len == 0) 3296 olen = len = src->i_size - off; 3297 /* if we extend to eof, continue to block boundary */ 3298 if (off + len == src->i_size) 3299 len = ALIGN(src->i_size, bs) - off; 3300 3301 /* verify the end result is block aligned */ 3302 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3303 !IS_ALIGNED(destoff, bs)) 3304 goto out_unlock; 3305 3306 /* verify if ranges are overlapped within the same file */ 3307 if (same_inode) { 3308 if (destoff + len > off && destoff < off + len) 3309 goto out_unlock; 3310 } 3311 3312 if (destoff > inode->i_size) { 3313 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3314 if (ret) 3315 goto out_unlock; 3316 } 3317 3318 /* truncate page cache pages from target inode range */ 3319 truncate_inode_pages_range(&inode->i_data, destoff, 3320 PAGE_CACHE_ALIGN(destoff + len) - 1); 3321 3322 lock_extent_range(src, off, len); 3323 3324 ret = btrfs_clone(src, inode, off, olen, len, destoff); 3325 3326 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3327 out_unlock: 3328 if (!same_inode) { 3329 if (inode < src) { 3330 mutex_unlock(&src->i_mutex); 3331 mutex_unlock(&inode->i_mutex); 3332 } else { 3333 mutex_unlock(&inode->i_mutex); 3334 mutex_unlock(&src->i_mutex); 3335 } 3336 } else { 3337 mutex_unlock(&src->i_mutex); 3338 } 3339 out_fput: 3340 fdput(src_file); 3341 out_drop_write: 3342 mnt_drop_write_file(file); 3343 return ret; 3344 } 3345 3346 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 3347 { 3348 struct btrfs_ioctl_clone_range_args args; 3349 3350 if (copy_from_user(&args, argp, sizeof(args))) 3351 return -EFAULT; 3352 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 3353 args.src_length, args.dest_offset); 3354 } 3355 3356 /* 3357 * there are many ways the trans_start and trans_end ioctls can lead 3358 * to deadlocks. They should only be used by applications that 3359 * basically own the machine, and have a very in depth understanding 3360 * of all the possible deadlocks and enospc problems. 3361 */ 3362 static long btrfs_ioctl_trans_start(struct file *file) 3363 { 3364 struct inode *inode = file_inode(file); 3365 struct btrfs_root *root = BTRFS_I(inode)->root; 3366 struct btrfs_trans_handle *trans; 3367 int ret; 3368 3369 ret = -EPERM; 3370 if (!capable(CAP_SYS_ADMIN)) 3371 goto out; 3372 3373 ret = -EINPROGRESS; 3374 if (file->private_data) 3375 goto out; 3376 3377 ret = -EROFS; 3378 if (btrfs_root_readonly(root)) 3379 goto out; 3380 3381 ret = mnt_want_write_file(file); 3382 if (ret) 3383 goto out; 3384 3385 atomic_inc(&root->fs_info->open_ioctl_trans); 3386 3387 ret = -ENOMEM; 3388 trans = btrfs_start_ioctl_transaction(root); 3389 if (IS_ERR(trans)) 3390 goto out_drop; 3391 3392 file->private_data = trans; 3393 return 0; 3394 3395 out_drop: 3396 atomic_dec(&root->fs_info->open_ioctl_trans); 3397 mnt_drop_write_file(file); 3398 out: 3399 return ret; 3400 } 3401 3402 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 3403 { 3404 struct inode *inode = file_inode(file); 3405 struct btrfs_root *root = BTRFS_I(inode)->root; 3406 struct btrfs_root *new_root; 3407 struct btrfs_dir_item *di; 3408 struct btrfs_trans_handle *trans; 3409 struct btrfs_path *path; 3410 struct btrfs_key location; 3411 struct btrfs_disk_key disk_key; 3412 u64 objectid = 0; 3413 u64 dir_id; 3414 int ret; 3415 3416 if (!capable(CAP_SYS_ADMIN)) 3417 return -EPERM; 3418 3419 ret = mnt_want_write_file(file); 3420 if (ret) 3421 return ret; 3422 3423 if (copy_from_user(&objectid, argp, sizeof(objectid))) { 3424 ret = -EFAULT; 3425 goto out; 3426 } 3427 3428 if (!objectid) 3429 objectid = BTRFS_FS_TREE_OBJECTID; 3430 3431 location.objectid = objectid; 3432 location.type = BTRFS_ROOT_ITEM_KEY; 3433 location.offset = (u64)-1; 3434 3435 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 3436 if (IS_ERR(new_root)) { 3437 ret = PTR_ERR(new_root); 3438 goto out; 3439 } 3440 3441 path = btrfs_alloc_path(); 3442 if (!path) { 3443 ret = -ENOMEM; 3444 goto out; 3445 } 3446 path->leave_spinning = 1; 3447 3448 trans = btrfs_start_transaction(root, 1); 3449 if (IS_ERR(trans)) { 3450 btrfs_free_path(path); 3451 ret = PTR_ERR(trans); 3452 goto out; 3453 } 3454 3455 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 3456 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 3457 dir_id, "default", 7, 1); 3458 if (IS_ERR_OR_NULL(di)) { 3459 btrfs_free_path(path); 3460 btrfs_end_transaction(trans, root); 3461 btrfs_err(new_root->fs_info, "Umm, you don't have the default dir" 3462 "item, this isn't going to work"); 3463 ret = -ENOENT; 3464 goto out; 3465 } 3466 3467 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 3468 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 3469 btrfs_mark_buffer_dirty(path->nodes[0]); 3470 btrfs_free_path(path); 3471 3472 btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); 3473 btrfs_end_transaction(trans, root); 3474 out: 3475 mnt_drop_write_file(file); 3476 return ret; 3477 } 3478 3479 void btrfs_get_block_group_info(struct list_head *groups_list, 3480 struct btrfs_ioctl_space_info *space) 3481 { 3482 struct btrfs_block_group_cache *block_group; 3483 3484 space->total_bytes = 0; 3485 space->used_bytes = 0; 3486 space->flags = 0; 3487 list_for_each_entry(block_group, groups_list, list) { 3488 space->flags = block_group->flags; 3489 space->total_bytes += block_group->key.offset; 3490 space->used_bytes += 3491 btrfs_block_group_used(&block_group->item); 3492 } 3493 } 3494 3495 static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 3496 { 3497 struct btrfs_ioctl_space_args space_args; 3498 struct btrfs_ioctl_space_info space; 3499 struct btrfs_ioctl_space_info *dest; 3500 struct btrfs_ioctl_space_info *dest_orig; 3501 struct btrfs_ioctl_space_info __user *user_dest; 3502 struct btrfs_space_info *info; 3503 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 3504 BTRFS_BLOCK_GROUP_SYSTEM, 3505 BTRFS_BLOCK_GROUP_METADATA, 3506 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 3507 int num_types = 4; 3508 int alloc_size; 3509 int ret = 0; 3510 u64 slot_count = 0; 3511 int i, c; 3512 3513 if (copy_from_user(&space_args, 3514 (struct btrfs_ioctl_space_args __user *)arg, 3515 sizeof(space_args))) 3516 return -EFAULT; 3517 3518 for (i = 0; i < num_types; i++) { 3519 struct btrfs_space_info *tmp; 3520 3521 info = NULL; 3522 rcu_read_lock(); 3523 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 3524 list) { 3525 if (tmp->flags == types[i]) { 3526 info = tmp; 3527 break; 3528 } 3529 } 3530 rcu_read_unlock(); 3531 3532 if (!info) 3533 continue; 3534 3535 down_read(&info->groups_sem); 3536 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3537 if (!list_empty(&info->block_groups[c])) 3538 slot_count++; 3539 } 3540 up_read(&info->groups_sem); 3541 } 3542 3543 /* 3544 * Global block reserve, exported as a space_info 3545 */ 3546 slot_count++; 3547 3548 /* space_slots == 0 means they are asking for a count */ 3549 if (space_args.space_slots == 0) { 3550 space_args.total_spaces = slot_count; 3551 goto out; 3552 } 3553 3554 slot_count = min_t(u64, space_args.space_slots, slot_count); 3555 3556 alloc_size = sizeof(*dest) * slot_count; 3557 3558 /* we generally have at most 6 or so space infos, one for each raid 3559 * level. So, a whole page should be more than enough for everyone 3560 */ 3561 if (alloc_size > PAGE_CACHE_SIZE) 3562 return -ENOMEM; 3563 3564 space_args.total_spaces = 0; 3565 dest = kmalloc(alloc_size, GFP_NOFS); 3566 if (!dest) 3567 return -ENOMEM; 3568 dest_orig = dest; 3569 3570 /* now we have a buffer to copy into */ 3571 for (i = 0; i < num_types; i++) { 3572 struct btrfs_space_info *tmp; 3573 3574 if (!slot_count) 3575 break; 3576 3577 info = NULL; 3578 rcu_read_lock(); 3579 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 3580 list) { 3581 if (tmp->flags == types[i]) { 3582 info = tmp; 3583 break; 3584 } 3585 } 3586 rcu_read_unlock(); 3587 3588 if (!info) 3589 continue; 3590 down_read(&info->groups_sem); 3591 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3592 if (!list_empty(&info->block_groups[c])) { 3593 btrfs_get_block_group_info( 3594 &info->block_groups[c], &space); 3595 memcpy(dest, &space, sizeof(space)); 3596 dest++; 3597 space_args.total_spaces++; 3598 slot_count--; 3599 } 3600 if (!slot_count) 3601 break; 3602 } 3603 up_read(&info->groups_sem); 3604 } 3605 3606 /* 3607 * Add global block reserve 3608 */ 3609 if (slot_count) { 3610 struct btrfs_block_rsv *block_rsv = &root->fs_info->global_block_rsv; 3611 3612 spin_lock(&block_rsv->lock); 3613 space.total_bytes = block_rsv->size; 3614 space.used_bytes = block_rsv->size - block_rsv->reserved; 3615 spin_unlock(&block_rsv->lock); 3616 space.flags = BTRFS_SPACE_INFO_GLOBAL_RSV; 3617 memcpy(dest, &space, sizeof(space)); 3618 space_args.total_spaces++; 3619 } 3620 3621 user_dest = (struct btrfs_ioctl_space_info __user *) 3622 (arg + sizeof(struct btrfs_ioctl_space_args)); 3623 3624 if (copy_to_user(user_dest, dest_orig, alloc_size)) 3625 ret = -EFAULT; 3626 3627 kfree(dest_orig); 3628 out: 3629 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 3630 ret = -EFAULT; 3631 3632 return ret; 3633 } 3634 3635 /* 3636 * there are many ways the trans_start and trans_end ioctls can lead 3637 * to deadlocks. They should only be used by applications that 3638 * basically own the machine, and have a very in depth understanding 3639 * of all the possible deadlocks and enospc problems. 3640 */ 3641 long btrfs_ioctl_trans_end(struct file *file) 3642 { 3643 struct inode *inode = file_inode(file); 3644 struct btrfs_root *root = BTRFS_I(inode)->root; 3645 struct btrfs_trans_handle *trans; 3646 3647 trans = file->private_data; 3648 if (!trans) 3649 return -EINVAL; 3650 file->private_data = NULL; 3651 3652 btrfs_end_transaction(trans, root); 3653 3654 atomic_dec(&root->fs_info->open_ioctl_trans); 3655 3656 mnt_drop_write_file(file); 3657 return 0; 3658 } 3659 3660 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, 3661 void __user *argp) 3662 { 3663 struct btrfs_trans_handle *trans; 3664 u64 transid; 3665 int ret; 3666 3667 trans = btrfs_attach_transaction_barrier(root); 3668 if (IS_ERR(trans)) { 3669 if (PTR_ERR(trans) != -ENOENT) 3670 return PTR_ERR(trans); 3671 3672 /* No running transaction, don't bother */ 3673 transid = root->fs_info->last_trans_committed; 3674 goto out; 3675 } 3676 transid = trans->transid; 3677 ret = btrfs_commit_transaction_async(trans, root, 0); 3678 if (ret) { 3679 btrfs_end_transaction(trans, root); 3680 return ret; 3681 } 3682 out: 3683 if (argp) 3684 if (copy_to_user(argp, &transid, sizeof(transid))) 3685 return -EFAULT; 3686 return 0; 3687 } 3688 3689 static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, 3690 void __user *argp) 3691 { 3692 u64 transid; 3693 3694 if (argp) { 3695 if (copy_from_user(&transid, argp, sizeof(transid))) 3696 return -EFAULT; 3697 } else { 3698 transid = 0; /* current trans */ 3699 } 3700 return btrfs_wait_for_commit(root, transid); 3701 } 3702 3703 static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 3704 { 3705 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 3706 struct btrfs_ioctl_scrub_args *sa; 3707 int ret; 3708 3709 if (!capable(CAP_SYS_ADMIN)) 3710 return -EPERM; 3711 3712 sa = memdup_user(arg, sizeof(*sa)); 3713 if (IS_ERR(sa)) 3714 return PTR_ERR(sa); 3715 3716 if (!(sa->flags & BTRFS_SCRUB_READONLY)) { 3717 ret = mnt_want_write_file(file); 3718 if (ret) 3719 goto out; 3720 } 3721 3722 ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, 3723 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 3724 0); 3725 3726 if (copy_to_user(arg, sa, sizeof(*sa))) 3727 ret = -EFAULT; 3728 3729 if (!(sa->flags & BTRFS_SCRUB_READONLY)) 3730 mnt_drop_write_file(file); 3731 out: 3732 kfree(sa); 3733 return ret; 3734 } 3735 3736 static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) 3737 { 3738 if (!capable(CAP_SYS_ADMIN)) 3739 return -EPERM; 3740 3741 return btrfs_scrub_cancel(root->fs_info); 3742 } 3743 3744 static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, 3745 void __user *arg) 3746 { 3747 struct btrfs_ioctl_scrub_args *sa; 3748 int ret; 3749 3750 if (!capable(CAP_SYS_ADMIN)) 3751 return -EPERM; 3752 3753 sa = memdup_user(arg, sizeof(*sa)); 3754 if (IS_ERR(sa)) 3755 return PTR_ERR(sa); 3756 3757 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress); 3758 3759 if (copy_to_user(arg, sa, sizeof(*sa))) 3760 ret = -EFAULT; 3761 3762 kfree(sa); 3763 return ret; 3764 } 3765 3766 static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, 3767 void __user *arg) 3768 { 3769 struct btrfs_ioctl_get_dev_stats *sa; 3770 int ret; 3771 3772 sa = memdup_user(arg, sizeof(*sa)); 3773 if (IS_ERR(sa)) 3774 return PTR_ERR(sa); 3775 3776 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 3777 kfree(sa); 3778 return -EPERM; 3779 } 3780 3781 ret = btrfs_get_dev_stats(root, sa); 3782 3783 if (copy_to_user(arg, sa, sizeof(*sa))) 3784 ret = -EFAULT; 3785 3786 kfree(sa); 3787 return ret; 3788 } 3789 3790 static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) 3791 { 3792 struct btrfs_ioctl_dev_replace_args *p; 3793 int ret; 3794 3795 if (!capable(CAP_SYS_ADMIN)) 3796 return -EPERM; 3797 3798 p = memdup_user(arg, sizeof(*p)); 3799 if (IS_ERR(p)) 3800 return PTR_ERR(p); 3801 3802 switch (p->cmd) { 3803 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3804 if (root->fs_info->sb->s_flags & MS_RDONLY) { 3805 ret = -EROFS; 3806 goto out; 3807 } 3808 if (atomic_xchg( 3809 &root->fs_info->mutually_exclusive_operation_running, 3810 1)) { 3811 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 3812 } else { 3813 ret = btrfs_dev_replace_start(root, p); 3814 atomic_set( 3815 &root->fs_info->mutually_exclusive_operation_running, 3816 0); 3817 } 3818 break; 3819 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: 3820 btrfs_dev_replace_status(root->fs_info, p); 3821 ret = 0; 3822 break; 3823 case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: 3824 ret = btrfs_dev_replace_cancel(root->fs_info, p); 3825 break; 3826 default: 3827 ret = -EINVAL; 3828 break; 3829 } 3830 3831 if (copy_to_user(arg, p, sizeof(*p))) 3832 ret = -EFAULT; 3833 out: 3834 kfree(p); 3835 return ret; 3836 } 3837 3838 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3839 { 3840 int ret = 0; 3841 int i; 3842 u64 rel_ptr; 3843 int size; 3844 struct btrfs_ioctl_ino_path_args *ipa = NULL; 3845 struct inode_fs_paths *ipath = NULL; 3846 struct btrfs_path *path; 3847 3848 if (!capable(CAP_DAC_READ_SEARCH)) 3849 return -EPERM; 3850 3851 path = btrfs_alloc_path(); 3852 if (!path) { 3853 ret = -ENOMEM; 3854 goto out; 3855 } 3856 3857 ipa = memdup_user(arg, sizeof(*ipa)); 3858 if (IS_ERR(ipa)) { 3859 ret = PTR_ERR(ipa); 3860 ipa = NULL; 3861 goto out; 3862 } 3863 3864 size = min_t(u32, ipa->size, 4096); 3865 ipath = init_ipath(size, root, path); 3866 if (IS_ERR(ipath)) { 3867 ret = PTR_ERR(ipath); 3868 ipath = NULL; 3869 goto out; 3870 } 3871 3872 ret = paths_from_inode(ipa->inum, ipath); 3873 if (ret < 0) 3874 goto out; 3875 3876 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 3877 rel_ptr = ipath->fspath->val[i] - 3878 (u64)(unsigned long)ipath->fspath->val; 3879 ipath->fspath->val[i] = rel_ptr; 3880 } 3881 3882 ret = copy_to_user((void *)(unsigned long)ipa->fspath, 3883 (void *)(unsigned long)ipath->fspath, size); 3884 if (ret) { 3885 ret = -EFAULT; 3886 goto out; 3887 } 3888 3889 out: 3890 btrfs_free_path(path); 3891 free_ipath(ipath); 3892 kfree(ipa); 3893 3894 return ret; 3895 } 3896 3897 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 3898 { 3899 struct btrfs_data_container *inodes = ctx; 3900 const size_t c = 3 * sizeof(u64); 3901 3902 if (inodes->bytes_left >= c) { 3903 inodes->bytes_left -= c; 3904 inodes->val[inodes->elem_cnt] = inum; 3905 inodes->val[inodes->elem_cnt + 1] = offset; 3906 inodes->val[inodes->elem_cnt + 2] = root; 3907 inodes->elem_cnt += 3; 3908 } else { 3909 inodes->bytes_missing += c - inodes->bytes_left; 3910 inodes->bytes_left = 0; 3911 inodes->elem_missed += 3; 3912 } 3913 3914 return 0; 3915 } 3916 3917 static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, 3918 void __user *arg) 3919 { 3920 int ret = 0; 3921 int size; 3922 struct btrfs_ioctl_logical_ino_args *loi; 3923 struct btrfs_data_container *inodes = NULL; 3924 struct btrfs_path *path = NULL; 3925 3926 if (!capable(CAP_SYS_ADMIN)) 3927 return -EPERM; 3928 3929 loi = memdup_user(arg, sizeof(*loi)); 3930 if (IS_ERR(loi)) { 3931 ret = PTR_ERR(loi); 3932 loi = NULL; 3933 goto out; 3934 } 3935 3936 path = btrfs_alloc_path(); 3937 if (!path) { 3938 ret = -ENOMEM; 3939 goto out; 3940 } 3941 3942 size = min_t(u32, loi->size, 64 * 1024); 3943 inodes = init_data_container(size); 3944 if (IS_ERR(inodes)) { 3945 ret = PTR_ERR(inodes); 3946 inodes = NULL; 3947 goto out; 3948 } 3949 3950 ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path, 3951 build_ino_list, inodes); 3952 if (ret == -EINVAL) 3953 ret = -ENOENT; 3954 if (ret < 0) 3955 goto out; 3956 3957 ret = copy_to_user((void *)(unsigned long)loi->inodes, 3958 (void *)(unsigned long)inodes, size); 3959 if (ret) 3960 ret = -EFAULT; 3961 3962 out: 3963 btrfs_free_path(path); 3964 vfree(inodes); 3965 kfree(loi); 3966 3967 return ret; 3968 } 3969 3970 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 3971 struct btrfs_ioctl_balance_args *bargs) 3972 { 3973 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 3974 3975 bargs->flags = bctl->flags; 3976 3977 if (atomic_read(&fs_info->balance_running)) 3978 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 3979 if (atomic_read(&fs_info->balance_pause_req)) 3980 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 3981 if (atomic_read(&fs_info->balance_cancel_req)) 3982 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 3983 3984 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 3985 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 3986 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 3987 3988 if (lock) { 3989 spin_lock(&fs_info->balance_lock); 3990 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3991 spin_unlock(&fs_info->balance_lock); 3992 } else { 3993 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3994 } 3995 } 3996 3997 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 3998 { 3999 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4000 struct btrfs_fs_info *fs_info = root->fs_info; 4001 struct btrfs_ioctl_balance_args *bargs; 4002 struct btrfs_balance_control *bctl; 4003 bool need_unlock; /* for mut. excl. ops lock */ 4004 int ret; 4005 4006 if (!capable(CAP_SYS_ADMIN)) 4007 return -EPERM; 4008 4009 ret = mnt_want_write_file(file); 4010 if (ret) 4011 return ret; 4012 4013 again: 4014 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { 4015 mutex_lock(&fs_info->volume_mutex); 4016 mutex_lock(&fs_info->balance_mutex); 4017 need_unlock = true; 4018 goto locked; 4019 } 4020 4021 /* 4022 * mut. excl. ops lock is locked. Three possibilites: 4023 * (1) some other op is running 4024 * (2) balance is running 4025 * (3) balance is paused -- special case (think resume) 4026 */ 4027 mutex_lock(&fs_info->balance_mutex); 4028 if (fs_info->balance_ctl) { 4029 /* this is either (2) or (3) */ 4030 if (!atomic_read(&fs_info->balance_running)) { 4031 mutex_unlock(&fs_info->balance_mutex); 4032 if (!mutex_trylock(&fs_info->volume_mutex)) 4033 goto again; 4034 mutex_lock(&fs_info->balance_mutex); 4035 4036 if (fs_info->balance_ctl && 4037 !atomic_read(&fs_info->balance_running)) { 4038 /* this is (3) */ 4039 need_unlock = false; 4040 goto locked; 4041 } 4042 4043 mutex_unlock(&fs_info->balance_mutex); 4044 mutex_unlock(&fs_info->volume_mutex); 4045 goto again; 4046 } else { 4047 /* this is (2) */ 4048 mutex_unlock(&fs_info->balance_mutex); 4049 ret = -EINPROGRESS; 4050 goto out; 4051 } 4052 } else { 4053 /* this is (1) */ 4054 mutex_unlock(&fs_info->balance_mutex); 4055 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 4056 goto out; 4057 } 4058 4059 locked: 4060 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); 4061 4062 if (arg) { 4063 bargs = memdup_user(arg, sizeof(*bargs)); 4064 if (IS_ERR(bargs)) { 4065 ret = PTR_ERR(bargs); 4066 goto out_unlock; 4067 } 4068 4069 if (bargs->flags & BTRFS_BALANCE_RESUME) { 4070 if (!fs_info->balance_ctl) { 4071 ret = -ENOTCONN; 4072 goto out_bargs; 4073 } 4074 4075 bctl = fs_info->balance_ctl; 4076 spin_lock(&fs_info->balance_lock); 4077 bctl->flags |= BTRFS_BALANCE_RESUME; 4078 spin_unlock(&fs_info->balance_lock); 4079 4080 goto do_balance; 4081 } 4082 } else { 4083 bargs = NULL; 4084 } 4085 4086 if (fs_info->balance_ctl) { 4087 ret = -EINPROGRESS; 4088 goto out_bargs; 4089 } 4090 4091 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 4092 if (!bctl) { 4093 ret = -ENOMEM; 4094 goto out_bargs; 4095 } 4096 4097 bctl->fs_info = fs_info; 4098 if (arg) { 4099 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 4100 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 4101 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 4102 4103 bctl->flags = bargs->flags; 4104 } else { 4105 /* balance everything - no filters */ 4106 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 4107 } 4108 4109 do_balance: 4110 /* 4111 * Ownership of bctl and mutually_exclusive_operation_running 4112 * goes to to btrfs_balance. bctl is freed in __cancel_balance, 4113 * or, if restriper was paused all the way until unmount, in 4114 * free_fs_info. mutually_exclusive_operation_running is 4115 * cleared in __cancel_balance. 4116 */ 4117 need_unlock = false; 4118 4119 ret = btrfs_balance(bctl, bargs); 4120 4121 if (arg) { 4122 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4123 ret = -EFAULT; 4124 } 4125 4126 out_bargs: 4127 kfree(bargs); 4128 out_unlock: 4129 mutex_unlock(&fs_info->balance_mutex); 4130 mutex_unlock(&fs_info->volume_mutex); 4131 if (need_unlock) 4132 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 4133 out: 4134 mnt_drop_write_file(file); 4135 return ret; 4136 } 4137 4138 static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) 4139 { 4140 if (!capable(CAP_SYS_ADMIN)) 4141 return -EPERM; 4142 4143 switch (cmd) { 4144 case BTRFS_BALANCE_CTL_PAUSE: 4145 return btrfs_pause_balance(root->fs_info); 4146 case BTRFS_BALANCE_CTL_CANCEL: 4147 return btrfs_cancel_balance(root->fs_info); 4148 } 4149 4150 return -EINVAL; 4151 } 4152 4153 static long btrfs_ioctl_balance_progress(struct btrfs_root *root, 4154 void __user *arg) 4155 { 4156 struct btrfs_fs_info *fs_info = root->fs_info; 4157 struct btrfs_ioctl_balance_args *bargs; 4158 int ret = 0; 4159 4160 if (!capable(CAP_SYS_ADMIN)) 4161 return -EPERM; 4162 4163 mutex_lock(&fs_info->balance_mutex); 4164 if (!fs_info->balance_ctl) { 4165 ret = -ENOTCONN; 4166 goto out; 4167 } 4168 4169 bargs = kzalloc(sizeof(*bargs), GFP_NOFS); 4170 if (!bargs) { 4171 ret = -ENOMEM; 4172 goto out; 4173 } 4174 4175 update_ioctl_balance_args(fs_info, 1, bargs); 4176 4177 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4178 ret = -EFAULT; 4179 4180 kfree(bargs); 4181 out: 4182 mutex_unlock(&fs_info->balance_mutex); 4183 return ret; 4184 } 4185 4186 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 4187 { 4188 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4189 struct btrfs_ioctl_quota_ctl_args *sa; 4190 struct btrfs_trans_handle *trans = NULL; 4191 int ret; 4192 int err; 4193 4194 if (!capable(CAP_SYS_ADMIN)) 4195 return -EPERM; 4196 4197 ret = mnt_want_write_file(file); 4198 if (ret) 4199 return ret; 4200 4201 sa = memdup_user(arg, sizeof(*sa)); 4202 if (IS_ERR(sa)) { 4203 ret = PTR_ERR(sa); 4204 goto drop_write; 4205 } 4206 4207 down_write(&root->fs_info->subvol_sem); 4208 trans = btrfs_start_transaction(root->fs_info->tree_root, 2); 4209 if (IS_ERR(trans)) { 4210 ret = PTR_ERR(trans); 4211 goto out; 4212 } 4213 4214 switch (sa->cmd) { 4215 case BTRFS_QUOTA_CTL_ENABLE: 4216 ret = btrfs_quota_enable(trans, root->fs_info); 4217 break; 4218 case BTRFS_QUOTA_CTL_DISABLE: 4219 ret = btrfs_quota_disable(trans, root->fs_info); 4220 break; 4221 default: 4222 ret = -EINVAL; 4223 break; 4224 } 4225 4226 err = btrfs_commit_transaction(trans, root->fs_info->tree_root); 4227 if (err && !ret) 4228 ret = err; 4229 out: 4230 kfree(sa); 4231 up_write(&root->fs_info->subvol_sem); 4232 drop_write: 4233 mnt_drop_write_file(file); 4234 return ret; 4235 } 4236 4237 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 4238 { 4239 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4240 struct btrfs_ioctl_qgroup_assign_args *sa; 4241 struct btrfs_trans_handle *trans; 4242 int ret; 4243 int err; 4244 4245 if (!capable(CAP_SYS_ADMIN)) 4246 return -EPERM; 4247 4248 ret = mnt_want_write_file(file); 4249 if (ret) 4250 return ret; 4251 4252 sa = memdup_user(arg, sizeof(*sa)); 4253 if (IS_ERR(sa)) { 4254 ret = PTR_ERR(sa); 4255 goto drop_write; 4256 } 4257 4258 trans = btrfs_join_transaction(root); 4259 if (IS_ERR(trans)) { 4260 ret = PTR_ERR(trans); 4261 goto out; 4262 } 4263 4264 /* FIXME: check if the IDs really exist */ 4265 if (sa->assign) { 4266 ret = btrfs_add_qgroup_relation(trans, root->fs_info, 4267 sa->src, sa->dst); 4268 } else { 4269 ret = btrfs_del_qgroup_relation(trans, root->fs_info, 4270 sa->src, sa->dst); 4271 } 4272 4273 err = btrfs_end_transaction(trans, root); 4274 if (err && !ret) 4275 ret = err; 4276 4277 out: 4278 kfree(sa); 4279 drop_write: 4280 mnt_drop_write_file(file); 4281 return ret; 4282 } 4283 4284 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 4285 { 4286 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4287 struct btrfs_ioctl_qgroup_create_args *sa; 4288 struct btrfs_trans_handle *trans; 4289 int ret; 4290 int err; 4291 4292 if (!capable(CAP_SYS_ADMIN)) 4293 return -EPERM; 4294 4295 ret = mnt_want_write_file(file); 4296 if (ret) 4297 return ret; 4298 4299 sa = memdup_user(arg, sizeof(*sa)); 4300 if (IS_ERR(sa)) { 4301 ret = PTR_ERR(sa); 4302 goto drop_write; 4303 } 4304 4305 if (!sa->qgroupid) { 4306 ret = -EINVAL; 4307 goto out; 4308 } 4309 4310 trans = btrfs_join_transaction(root); 4311 if (IS_ERR(trans)) { 4312 ret = PTR_ERR(trans); 4313 goto out; 4314 } 4315 4316 /* FIXME: check if the IDs really exist */ 4317 if (sa->create) { 4318 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, 4319 NULL); 4320 } else { 4321 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); 4322 } 4323 4324 err = btrfs_end_transaction(trans, root); 4325 if (err && !ret) 4326 ret = err; 4327 4328 out: 4329 kfree(sa); 4330 drop_write: 4331 mnt_drop_write_file(file); 4332 return ret; 4333 } 4334 4335 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 4336 { 4337 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4338 struct btrfs_ioctl_qgroup_limit_args *sa; 4339 struct btrfs_trans_handle *trans; 4340 int ret; 4341 int err; 4342 u64 qgroupid; 4343 4344 if (!capable(CAP_SYS_ADMIN)) 4345 return -EPERM; 4346 4347 ret = mnt_want_write_file(file); 4348 if (ret) 4349 return ret; 4350 4351 sa = memdup_user(arg, sizeof(*sa)); 4352 if (IS_ERR(sa)) { 4353 ret = PTR_ERR(sa); 4354 goto drop_write; 4355 } 4356 4357 trans = btrfs_join_transaction(root); 4358 if (IS_ERR(trans)) { 4359 ret = PTR_ERR(trans); 4360 goto out; 4361 } 4362 4363 qgroupid = sa->qgroupid; 4364 if (!qgroupid) { 4365 /* take the current subvol as qgroup */ 4366 qgroupid = root->root_key.objectid; 4367 } 4368 4369 /* FIXME: check if the IDs really exist */ 4370 ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); 4371 4372 err = btrfs_end_transaction(trans, root); 4373 if (err && !ret) 4374 ret = err; 4375 4376 out: 4377 kfree(sa); 4378 drop_write: 4379 mnt_drop_write_file(file); 4380 return ret; 4381 } 4382 4383 static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) 4384 { 4385 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4386 struct btrfs_ioctl_quota_rescan_args *qsa; 4387 int ret; 4388 4389 if (!capable(CAP_SYS_ADMIN)) 4390 return -EPERM; 4391 4392 ret = mnt_want_write_file(file); 4393 if (ret) 4394 return ret; 4395 4396 qsa = memdup_user(arg, sizeof(*qsa)); 4397 if (IS_ERR(qsa)) { 4398 ret = PTR_ERR(qsa); 4399 goto drop_write; 4400 } 4401 4402 if (qsa->flags) { 4403 ret = -EINVAL; 4404 goto out; 4405 } 4406 4407 ret = btrfs_qgroup_rescan(root->fs_info); 4408 4409 out: 4410 kfree(qsa); 4411 drop_write: 4412 mnt_drop_write_file(file); 4413 return ret; 4414 } 4415 4416 static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) 4417 { 4418 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4419 struct btrfs_ioctl_quota_rescan_args *qsa; 4420 int ret = 0; 4421 4422 if (!capable(CAP_SYS_ADMIN)) 4423 return -EPERM; 4424 4425 qsa = kzalloc(sizeof(*qsa), GFP_NOFS); 4426 if (!qsa) 4427 return -ENOMEM; 4428 4429 if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 4430 qsa->flags = 1; 4431 qsa->progress = root->fs_info->qgroup_rescan_progress.objectid; 4432 } 4433 4434 if (copy_to_user(arg, qsa, sizeof(*qsa))) 4435 ret = -EFAULT; 4436 4437 kfree(qsa); 4438 return ret; 4439 } 4440 4441 static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) 4442 { 4443 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4444 4445 if (!capable(CAP_SYS_ADMIN)) 4446 return -EPERM; 4447 4448 return btrfs_qgroup_wait_for_completion(root->fs_info); 4449 } 4450 4451 static long _btrfs_ioctl_set_received_subvol(struct file *file, 4452 struct btrfs_ioctl_received_subvol_args *sa) 4453 { 4454 struct inode *inode = file_inode(file); 4455 struct btrfs_root *root = BTRFS_I(inode)->root; 4456 struct btrfs_root_item *root_item = &root->root_item; 4457 struct btrfs_trans_handle *trans; 4458 struct timespec ct = CURRENT_TIME; 4459 int ret = 0; 4460 int received_uuid_changed; 4461 4462 if (!inode_owner_or_capable(inode)) 4463 return -EPERM; 4464 4465 ret = mnt_want_write_file(file); 4466 if (ret < 0) 4467 return ret; 4468 4469 down_write(&root->fs_info->subvol_sem); 4470 4471 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 4472 ret = -EINVAL; 4473 goto out; 4474 } 4475 4476 if (btrfs_root_readonly(root)) { 4477 ret = -EROFS; 4478 goto out; 4479 } 4480 4481 /* 4482 * 1 - root item 4483 * 2 - uuid items (received uuid + subvol uuid) 4484 */ 4485 trans = btrfs_start_transaction(root, 3); 4486 if (IS_ERR(trans)) { 4487 ret = PTR_ERR(trans); 4488 trans = NULL; 4489 goto out; 4490 } 4491 4492 sa->rtransid = trans->transid; 4493 sa->rtime.sec = ct.tv_sec; 4494 sa->rtime.nsec = ct.tv_nsec; 4495 4496 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, 4497 BTRFS_UUID_SIZE); 4498 if (received_uuid_changed && 4499 !btrfs_is_empty_uuid(root_item->received_uuid)) 4500 btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 4501 root_item->received_uuid, 4502 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4503 root->root_key.objectid); 4504 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 4505 btrfs_set_root_stransid(root_item, sa->stransid); 4506 btrfs_set_root_rtransid(root_item, sa->rtransid); 4507 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); 4508 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); 4509 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); 4510 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); 4511 4512 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4513 &root->root_key, &root->root_item); 4514 if (ret < 0) { 4515 btrfs_end_transaction(trans, root); 4516 goto out; 4517 } 4518 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { 4519 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, 4520 sa->uuid, 4521 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4522 root->root_key.objectid); 4523 if (ret < 0 && ret != -EEXIST) { 4524 btrfs_abort_transaction(trans, root, ret); 4525 goto out; 4526 } 4527 } 4528 ret = btrfs_commit_transaction(trans, root); 4529 if (ret < 0) { 4530 btrfs_abort_transaction(trans, root, ret); 4531 goto out; 4532 } 4533 4534 out: 4535 up_write(&root->fs_info->subvol_sem); 4536 mnt_drop_write_file(file); 4537 return ret; 4538 } 4539 4540 #ifdef CONFIG_64BIT 4541 static long btrfs_ioctl_set_received_subvol_32(struct file *file, 4542 void __user *arg) 4543 { 4544 struct btrfs_ioctl_received_subvol_args_32 *args32 = NULL; 4545 struct btrfs_ioctl_received_subvol_args *args64 = NULL; 4546 int ret = 0; 4547 4548 args32 = memdup_user(arg, sizeof(*args32)); 4549 if (IS_ERR(args32)) { 4550 ret = PTR_ERR(args32); 4551 args32 = NULL; 4552 goto out; 4553 } 4554 4555 args64 = kmalloc(sizeof(*args64), GFP_NOFS); 4556 if (!args64) { 4557 ret = -ENOMEM; 4558 goto out; 4559 } 4560 4561 memcpy(args64->uuid, args32->uuid, BTRFS_UUID_SIZE); 4562 args64->stransid = args32->stransid; 4563 args64->rtransid = args32->rtransid; 4564 args64->stime.sec = args32->stime.sec; 4565 args64->stime.nsec = args32->stime.nsec; 4566 args64->rtime.sec = args32->rtime.sec; 4567 args64->rtime.nsec = args32->rtime.nsec; 4568 args64->flags = args32->flags; 4569 4570 ret = _btrfs_ioctl_set_received_subvol(file, args64); 4571 if (ret) 4572 goto out; 4573 4574 memcpy(args32->uuid, args64->uuid, BTRFS_UUID_SIZE); 4575 args32->stransid = args64->stransid; 4576 args32->rtransid = args64->rtransid; 4577 args32->stime.sec = args64->stime.sec; 4578 args32->stime.nsec = args64->stime.nsec; 4579 args32->rtime.sec = args64->rtime.sec; 4580 args32->rtime.nsec = args64->rtime.nsec; 4581 args32->flags = args64->flags; 4582 4583 ret = copy_to_user(arg, args32, sizeof(*args32)); 4584 if (ret) 4585 ret = -EFAULT; 4586 4587 out: 4588 kfree(args32); 4589 kfree(args64); 4590 return ret; 4591 } 4592 #endif 4593 4594 static long btrfs_ioctl_set_received_subvol(struct file *file, 4595 void __user *arg) 4596 { 4597 struct btrfs_ioctl_received_subvol_args *sa = NULL; 4598 int ret = 0; 4599 4600 sa = memdup_user(arg, sizeof(*sa)); 4601 if (IS_ERR(sa)) { 4602 ret = PTR_ERR(sa); 4603 sa = NULL; 4604 goto out; 4605 } 4606 4607 ret = _btrfs_ioctl_set_received_subvol(file, sa); 4608 4609 if (ret) 4610 goto out; 4611 4612 ret = copy_to_user(arg, sa, sizeof(*sa)); 4613 if (ret) 4614 ret = -EFAULT; 4615 4616 out: 4617 kfree(sa); 4618 return ret; 4619 } 4620 4621 static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 4622 { 4623 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4624 size_t len; 4625 int ret; 4626 char label[BTRFS_LABEL_SIZE]; 4627 4628 spin_lock(&root->fs_info->super_lock); 4629 memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); 4630 spin_unlock(&root->fs_info->super_lock); 4631 4632 len = strnlen(label, BTRFS_LABEL_SIZE); 4633 4634 if (len == BTRFS_LABEL_SIZE) { 4635 btrfs_warn(root->fs_info, 4636 "label is too long, return the first %zu bytes", --len); 4637 } 4638 4639 ret = copy_to_user(arg, label, len); 4640 4641 return ret ? -EFAULT : 0; 4642 } 4643 4644 static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) 4645 { 4646 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4647 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4648 struct btrfs_trans_handle *trans; 4649 char label[BTRFS_LABEL_SIZE]; 4650 int ret; 4651 4652 if (!capable(CAP_SYS_ADMIN)) 4653 return -EPERM; 4654 4655 if (copy_from_user(label, arg, sizeof(label))) 4656 return -EFAULT; 4657 4658 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 4659 btrfs_err(root->fs_info, "unable to set label with more than %d bytes", 4660 BTRFS_LABEL_SIZE - 1); 4661 return -EINVAL; 4662 } 4663 4664 ret = mnt_want_write_file(file); 4665 if (ret) 4666 return ret; 4667 4668 trans = btrfs_start_transaction(root, 0); 4669 if (IS_ERR(trans)) { 4670 ret = PTR_ERR(trans); 4671 goto out_unlock; 4672 } 4673 4674 spin_lock(&root->fs_info->super_lock); 4675 strcpy(super_block->label, label); 4676 spin_unlock(&root->fs_info->super_lock); 4677 ret = btrfs_commit_transaction(trans, root); 4678 4679 out_unlock: 4680 mnt_drop_write_file(file); 4681 return ret; 4682 } 4683 4684 #define INIT_FEATURE_FLAGS(suffix) \ 4685 { .compat_flags = BTRFS_FEATURE_COMPAT_##suffix, \ 4686 .compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \ 4687 .incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix } 4688 4689 static int btrfs_ioctl_get_supported_features(struct file *file, 4690 void __user *arg) 4691 { 4692 static struct btrfs_ioctl_feature_flags features[3] = { 4693 INIT_FEATURE_FLAGS(SUPP), 4694 INIT_FEATURE_FLAGS(SAFE_SET), 4695 INIT_FEATURE_FLAGS(SAFE_CLEAR) 4696 }; 4697 4698 if (copy_to_user(arg, &features, sizeof(features))) 4699 return -EFAULT; 4700 4701 return 0; 4702 } 4703 4704 static int btrfs_ioctl_get_features(struct file *file, void __user *arg) 4705 { 4706 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4707 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4708 struct btrfs_ioctl_feature_flags features; 4709 4710 features.compat_flags = btrfs_super_compat_flags(super_block); 4711 features.compat_ro_flags = btrfs_super_compat_ro_flags(super_block); 4712 features.incompat_flags = btrfs_super_incompat_flags(super_block); 4713 4714 if (copy_to_user(arg, &features, sizeof(features))) 4715 return -EFAULT; 4716 4717 return 0; 4718 } 4719 4720 static int check_feature_bits(struct btrfs_root *root, 4721 enum btrfs_feature_set set, 4722 u64 change_mask, u64 flags, u64 supported_flags, 4723 u64 safe_set, u64 safe_clear) 4724 { 4725 const char *type = btrfs_feature_set_names[set]; 4726 char *names; 4727 u64 disallowed, unsupported; 4728 u64 set_mask = flags & change_mask; 4729 u64 clear_mask = ~flags & change_mask; 4730 4731 unsupported = set_mask & ~supported_flags; 4732 if (unsupported) { 4733 names = btrfs_printable_features(set, unsupported); 4734 if (names) { 4735 btrfs_warn(root->fs_info, 4736 "this kernel does not support the %s feature bit%s", 4737 names, strchr(names, ',') ? "s" : ""); 4738 kfree(names); 4739 } else 4740 btrfs_warn(root->fs_info, 4741 "this kernel does not support %s bits 0x%llx", 4742 type, unsupported); 4743 return -EOPNOTSUPP; 4744 } 4745 4746 disallowed = set_mask & ~safe_set; 4747 if (disallowed) { 4748 names = btrfs_printable_features(set, disallowed); 4749 if (names) { 4750 btrfs_warn(root->fs_info, 4751 "can't set the %s feature bit%s while mounted", 4752 names, strchr(names, ',') ? "s" : ""); 4753 kfree(names); 4754 } else 4755 btrfs_warn(root->fs_info, 4756 "can't set %s bits 0x%llx while mounted", 4757 type, disallowed); 4758 return -EPERM; 4759 } 4760 4761 disallowed = clear_mask & ~safe_clear; 4762 if (disallowed) { 4763 names = btrfs_printable_features(set, disallowed); 4764 if (names) { 4765 btrfs_warn(root->fs_info, 4766 "can't clear the %s feature bit%s while mounted", 4767 names, strchr(names, ',') ? "s" : ""); 4768 kfree(names); 4769 } else 4770 btrfs_warn(root->fs_info, 4771 "can't clear %s bits 0x%llx while mounted", 4772 type, disallowed); 4773 return -EPERM; 4774 } 4775 4776 return 0; 4777 } 4778 4779 #define check_feature(root, change_mask, flags, mask_base) \ 4780 check_feature_bits(root, FEAT_##mask_base, change_mask, flags, \ 4781 BTRFS_FEATURE_ ## mask_base ## _SUPP, \ 4782 BTRFS_FEATURE_ ## mask_base ## _SAFE_SET, \ 4783 BTRFS_FEATURE_ ## mask_base ## _SAFE_CLEAR) 4784 4785 static int btrfs_ioctl_set_features(struct file *file, void __user *arg) 4786 { 4787 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4788 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4789 struct btrfs_ioctl_feature_flags flags[2]; 4790 struct btrfs_trans_handle *trans; 4791 u64 newflags; 4792 int ret; 4793 4794 if (!capable(CAP_SYS_ADMIN)) 4795 return -EPERM; 4796 4797 if (copy_from_user(flags, arg, sizeof(flags))) 4798 return -EFAULT; 4799 4800 /* Nothing to do */ 4801 if (!flags[0].compat_flags && !flags[0].compat_ro_flags && 4802 !flags[0].incompat_flags) 4803 return 0; 4804 4805 ret = check_feature(root, flags[0].compat_flags, 4806 flags[1].compat_flags, COMPAT); 4807 if (ret) 4808 return ret; 4809 4810 ret = check_feature(root, flags[0].compat_ro_flags, 4811 flags[1].compat_ro_flags, COMPAT_RO); 4812 if (ret) 4813 return ret; 4814 4815 ret = check_feature(root, flags[0].incompat_flags, 4816 flags[1].incompat_flags, INCOMPAT); 4817 if (ret) 4818 return ret; 4819 4820 trans = btrfs_start_transaction(root, 0); 4821 if (IS_ERR(trans)) 4822 return PTR_ERR(trans); 4823 4824 spin_lock(&root->fs_info->super_lock); 4825 newflags = btrfs_super_compat_flags(super_block); 4826 newflags |= flags[0].compat_flags & flags[1].compat_flags; 4827 newflags &= ~(flags[0].compat_flags & ~flags[1].compat_flags); 4828 btrfs_set_super_compat_flags(super_block, newflags); 4829 4830 newflags = btrfs_super_compat_ro_flags(super_block); 4831 newflags |= flags[0].compat_ro_flags & flags[1].compat_ro_flags; 4832 newflags &= ~(flags[0].compat_ro_flags & ~flags[1].compat_ro_flags); 4833 btrfs_set_super_compat_ro_flags(super_block, newflags); 4834 4835 newflags = btrfs_super_incompat_flags(super_block); 4836 newflags |= flags[0].incompat_flags & flags[1].incompat_flags; 4837 newflags &= ~(flags[0].incompat_flags & ~flags[1].incompat_flags); 4838 btrfs_set_super_incompat_flags(super_block, newflags); 4839 spin_unlock(&root->fs_info->super_lock); 4840 4841 return btrfs_commit_transaction(trans, root); 4842 } 4843 4844 long btrfs_ioctl(struct file *file, unsigned int 4845 cmd, unsigned long arg) 4846 { 4847 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4848 void __user *argp = (void __user *)arg; 4849 4850 switch (cmd) { 4851 case FS_IOC_GETFLAGS: 4852 return btrfs_ioctl_getflags(file, argp); 4853 case FS_IOC_SETFLAGS: 4854 return btrfs_ioctl_setflags(file, argp); 4855 case FS_IOC_GETVERSION: 4856 return btrfs_ioctl_getversion(file, argp); 4857 case FITRIM: 4858 return btrfs_ioctl_fitrim(file, argp); 4859 case BTRFS_IOC_SNAP_CREATE: 4860 return btrfs_ioctl_snap_create(file, argp, 0); 4861 case BTRFS_IOC_SNAP_CREATE_V2: 4862 return btrfs_ioctl_snap_create_v2(file, argp, 0); 4863 case BTRFS_IOC_SUBVOL_CREATE: 4864 return btrfs_ioctl_snap_create(file, argp, 1); 4865 case BTRFS_IOC_SUBVOL_CREATE_V2: 4866 return btrfs_ioctl_snap_create_v2(file, argp, 1); 4867 case BTRFS_IOC_SNAP_DESTROY: 4868 return btrfs_ioctl_snap_destroy(file, argp); 4869 case BTRFS_IOC_SUBVOL_GETFLAGS: 4870 return btrfs_ioctl_subvol_getflags(file, argp); 4871 case BTRFS_IOC_SUBVOL_SETFLAGS: 4872 return btrfs_ioctl_subvol_setflags(file, argp); 4873 case BTRFS_IOC_DEFAULT_SUBVOL: 4874 return btrfs_ioctl_default_subvol(file, argp); 4875 case BTRFS_IOC_DEFRAG: 4876 return btrfs_ioctl_defrag(file, NULL); 4877 case BTRFS_IOC_DEFRAG_RANGE: 4878 return btrfs_ioctl_defrag(file, argp); 4879 case BTRFS_IOC_RESIZE: 4880 return btrfs_ioctl_resize(file, argp); 4881 case BTRFS_IOC_ADD_DEV: 4882 return btrfs_ioctl_add_dev(root, argp); 4883 case BTRFS_IOC_RM_DEV: 4884 return btrfs_ioctl_rm_dev(file, argp); 4885 case BTRFS_IOC_FS_INFO: 4886 return btrfs_ioctl_fs_info(root, argp); 4887 case BTRFS_IOC_DEV_INFO: 4888 return btrfs_ioctl_dev_info(root, argp); 4889 case BTRFS_IOC_BALANCE: 4890 return btrfs_ioctl_balance(file, NULL); 4891 case BTRFS_IOC_CLONE: 4892 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 4893 case BTRFS_IOC_CLONE_RANGE: 4894 return btrfs_ioctl_clone_range(file, argp); 4895 case BTRFS_IOC_TRANS_START: 4896 return btrfs_ioctl_trans_start(file); 4897 case BTRFS_IOC_TRANS_END: 4898 return btrfs_ioctl_trans_end(file); 4899 case BTRFS_IOC_TREE_SEARCH: 4900 return btrfs_ioctl_tree_search(file, argp); 4901 case BTRFS_IOC_INO_LOOKUP: 4902 return btrfs_ioctl_ino_lookup(file, argp); 4903 case BTRFS_IOC_INO_PATHS: 4904 return btrfs_ioctl_ino_to_path(root, argp); 4905 case BTRFS_IOC_LOGICAL_INO: 4906 return btrfs_ioctl_logical_to_ino(root, argp); 4907 case BTRFS_IOC_SPACE_INFO: 4908 return btrfs_ioctl_space_info(root, argp); 4909 case BTRFS_IOC_SYNC: { 4910 int ret; 4911 4912 ret = btrfs_start_delalloc_roots(root->fs_info, 0, -1); 4913 if (ret) 4914 return ret; 4915 ret = btrfs_sync_fs(file->f_dentry->d_sb, 1); 4916 return ret; 4917 } 4918 case BTRFS_IOC_START_SYNC: 4919 return btrfs_ioctl_start_sync(root, argp); 4920 case BTRFS_IOC_WAIT_SYNC: 4921 return btrfs_ioctl_wait_sync(root, argp); 4922 case BTRFS_IOC_SCRUB: 4923 return btrfs_ioctl_scrub(file, argp); 4924 case BTRFS_IOC_SCRUB_CANCEL: 4925 return btrfs_ioctl_scrub_cancel(root, argp); 4926 case BTRFS_IOC_SCRUB_PROGRESS: 4927 return btrfs_ioctl_scrub_progress(root, argp); 4928 case BTRFS_IOC_BALANCE_V2: 4929 return btrfs_ioctl_balance(file, argp); 4930 case BTRFS_IOC_BALANCE_CTL: 4931 return btrfs_ioctl_balance_ctl(root, arg); 4932 case BTRFS_IOC_BALANCE_PROGRESS: 4933 return btrfs_ioctl_balance_progress(root, argp); 4934 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 4935 return btrfs_ioctl_set_received_subvol(file, argp); 4936 #ifdef CONFIG_64BIT 4937 case BTRFS_IOC_SET_RECEIVED_SUBVOL_32: 4938 return btrfs_ioctl_set_received_subvol_32(file, argp); 4939 #endif 4940 case BTRFS_IOC_SEND: 4941 return btrfs_ioctl_send(file, argp); 4942 case BTRFS_IOC_GET_DEV_STATS: 4943 return btrfs_ioctl_get_dev_stats(root, argp); 4944 case BTRFS_IOC_QUOTA_CTL: 4945 return btrfs_ioctl_quota_ctl(file, argp); 4946 case BTRFS_IOC_QGROUP_ASSIGN: 4947 return btrfs_ioctl_qgroup_assign(file, argp); 4948 case BTRFS_IOC_QGROUP_CREATE: 4949 return btrfs_ioctl_qgroup_create(file, argp); 4950 case BTRFS_IOC_QGROUP_LIMIT: 4951 return btrfs_ioctl_qgroup_limit(file, argp); 4952 case BTRFS_IOC_QUOTA_RESCAN: 4953 return btrfs_ioctl_quota_rescan(file, argp); 4954 case BTRFS_IOC_QUOTA_RESCAN_STATUS: 4955 return btrfs_ioctl_quota_rescan_status(file, argp); 4956 case BTRFS_IOC_QUOTA_RESCAN_WAIT: 4957 return btrfs_ioctl_quota_rescan_wait(file, argp); 4958 case BTRFS_IOC_DEV_REPLACE: 4959 return btrfs_ioctl_dev_replace(root, argp); 4960 case BTRFS_IOC_GET_FSLABEL: 4961 return btrfs_ioctl_get_fslabel(file, argp); 4962 case BTRFS_IOC_SET_FSLABEL: 4963 return btrfs_ioctl_set_fslabel(file, argp); 4964 case BTRFS_IOC_FILE_EXTENT_SAME: 4965 return btrfs_ioctl_file_extent_same(file, argp); 4966 case BTRFS_IOC_GET_SUPPORTED_FEATURES: 4967 return btrfs_ioctl_get_supported_features(file, argp); 4968 case BTRFS_IOC_GET_FEATURES: 4969 return btrfs_ioctl_get_features(file, argp); 4970 case BTRFS_IOC_SET_FEATURES: 4971 return btrfs_ioctl_set_features(file, argp); 4972 } 4973 4974 return -ENOTTY; 4975 } 4976