1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/kernel.h> 20 #include <linux/bio.h> 21 #include <linux/buffer_head.h> 22 #include <linux/file.h> 23 #include <linux/fs.h> 24 #include <linux/fsnotify.h> 25 #include <linux/pagemap.h> 26 #include <linux/highmem.h> 27 #include <linux/time.h> 28 #include <linux/init.h> 29 #include <linux/string.h> 30 #include <linux/backing-dev.h> 31 #include <linux/mount.h> 32 #include <linux/mpage.h> 33 #include <linux/namei.h> 34 #include <linux/swap.h> 35 #include <linux/writeback.h> 36 #include <linux/statfs.h> 37 #include <linux/compat.h> 38 #include <linux/bit_spinlock.h> 39 #include <linux/security.h> 40 #include <linux/xattr.h> 41 #include <linux/vmalloc.h> 42 #include <linux/slab.h> 43 #include <linux/blkdev.h> 44 #include <linux/uuid.h> 45 #include <linux/btrfs.h> 46 #include <linux/uaccess.h> 47 #include "compat.h" 48 #include "ctree.h" 49 #include "disk-io.h" 50 #include "transaction.h" 51 #include "btrfs_inode.h" 52 #include "print-tree.h" 53 #include "volumes.h" 54 #include "locking.h" 55 #include "inode-map.h" 56 #include "backref.h" 57 #include "rcu-string.h" 58 #include "send.h" 59 #include "dev-replace.h" 60 61 static int btrfs_clone(struct inode *src, struct inode *inode, 62 u64 off, u64 olen, u64 olen_aligned, u64 destoff); 63 64 /* Mask out flags that are inappropriate for the given type of inode. */ 65 static inline __u32 btrfs_mask_flags(umode_t mode, __u32 flags) 66 { 67 if (S_ISDIR(mode)) 68 return flags; 69 else if (S_ISREG(mode)) 70 return flags & ~FS_DIRSYNC_FL; 71 else 72 return flags & (FS_NODUMP_FL | FS_NOATIME_FL); 73 } 74 75 /* 76 * Export inode flags to the format expected by the FS_IOC_GETFLAGS ioctl. 77 */ 78 static unsigned int btrfs_flags_to_ioctl(unsigned int flags) 79 { 80 unsigned int iflags = 0; 81 82 if (flags & BTRFS_INODE_SYNC) 83 iflags |= FS_SYNC_FL; 84 if (flags & BTRFS_INODE_IMMUTABLE) 85 iflags |= FS_IMMUTABLE_FL; 86 if (flags & BTRFS_INODE_APPEND) 87 iflags |= FS_APPEND_FL; 88 if (flags & BTRFS_INODE_NODUMP) 89 iflags |= FS_NODUMP_FL; 90 if (flags & BTRFS_INODE_NOATIME) 91 iflags |= FS_NOATIME_FL; 92 if (flags & BTRFS_INODE_DIRSYNC) 93 iflags |= FS_DIRSYNC_FL; 94 if (flags & BTRFS_INODE_NODATACOW) 95 iflags |= FS_NOCOW_FL; 96 97 if ((flags & BTRFS_INODE_COMPRESS) && !(flags & BTRFS_INODE_NOCOMPRESS)) 98 iflags |= FS_COMPR_FL; 99 else if (flags & BTRFS_INODE_NOCOMPRESS) 100 iflags |= FS_NOCOMP_FL; 101 102 return iflags; 103 } 104 105 /* 106 * Update inode->i_flags based on the btrfs internal flags. 107 */ 108 void btrfs_update_iflags(struct inode *inode) 109 { 110 struct btrfs_inode *ip = BTRFS_I(inode); 111 112 inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC); 113 114 if (ip->flags & BTRFS_INODE_SYNC) 115 inode->i_flags |= S_SYNC; 116 if (ip->flags & BTRFS_INODE_IMMUTABLE) 117 inode->i_flags |= S_IMMUTABLE; 118 if (ip->flags & BTRFS_INODE_APPEND) 119 inode->i_flags |= S_APPEND; 120 if (ip->flags & BTRFS_INODE_NOATIME) 121 inode->i_flags |= S_NOATIME; 122 if (ip->flags & BTRFS_INODE_DIRSYNC) 123 inode->i_flags |= S_DIRSYNC; 124 } 125 126 /* 127 * Inherit flags from the parent inode. 128 * 129 * Currently only the compression flags and the cow flags are inherited. 130 */ 131 void btrfs_inherit_iflags(struct inode *inode, struct inode *dir) 132 { 133 unsigned int flags; 134 135 if (!dir) 136 return; 137 138 flags = BTRFS_I(dir)->flags; 139 140 if (flags & BTRFS_INODE_NOCOMPRESS) { 141 BTRFS_I(inode)->flags &= ~BTRFS_INODE_COMPRESS; 142 BTRFS_I(inode)->flags |= BTRFS_INODE_NOCOMPRESS; 143 } else if (flags & BTRFS_INODE_COMPRESS) { 144 BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; 145 BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; 146 } 147 148 if (flags & BTRFS_INODE_NODATACOW) { 149 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATACOW; 150 if (S_ISREG(inode->i_mode)) 151 BTRFS_I(inode)->flags |= BTRFS_INODE_NODATASUM; 152 } 153 154 btrfs_update_iflags(inode); 155 } 156 157 static int btrfs_ioctl_getflags(struct file *file, void __user *arg) 158 { 159 struct btrfs_inode *ip = BTRFS_I(file_inode(file)); 160 unsigned int flags = btrfs_flags_to_ioctl(ip->flags); 161 162 if (copy_to_user(arg, &flags, sizeof(flags))) 163 return -EFAULT; 164 return 0; 165 } 166 167 static int check_flags(unsigned int flags) 168 { 169 if (flags & ~(FS_IMMUTABLE_FL | FS_APPEND_FL | \ 170 FS_NOATIME_FL | FS_NODUMP_FL | \ 171 FS_SYNC_FL | FS_DIRSYNC_FL | \ 172 FS_NOCOMP_FL | FS_COMPR_FL | 173 FS_NOCOW_FL)) 174 return -EOPNOTSUPP; 175 176 if ((flags & FS_NOCOMP_FL) && (flags & FS_COMPR_FL)) 177 return -EINVAL; 178 179 return 0; 180 } 181 182 static int btrfs_ioctl_setflags(struct file *file, void __user *arg) 183 { 184 struct inode *inode = file_inode(file); 185 struct btrfs_inode *ip = BTRFS_I(inode); 186 struct btrfs_root *root = ip->root; 187 struct btrfs_trans_handle *trans; 188 unsigned int flags, oldflags; 189 int ret; 190 u64 ip_oldflags; 191 unsigned int i_oldflags; 192 umode_t mode; 193 194 if (btrfs_root_readonly(root)) 195 return -EROFS; 196 197 if (copy_from_user(&flags, arg, sizeof(flags))) 198 return -EFAULT; 199 200 ret = check_flags(flags); 201 if (ret) 202 return ret; 203 204 if (!inode_owner_or_capable(inode)) 205 return -EACCES; 206 207 ret = mnt_want_write_file(file); 208 if (ret) 209 return ret; 210 211 mutex_lock(&inode->i_mutex); 212 213 ip_oldflags = ip->flags; 214 i_oldflags = inode->i_flags; 215 mode = inode->i_mode; 216 217 flags = btrfs_mask_flags(inode->i_mode, flags); 218 oldflags = btrfs_flags_to_ioctl(ip->flags); 219 if ((flags ^ oldflags) & (FS_APPEND_FL | FS_IMMUTABLE_FL)) { 220 if (!capable(CAP_LINUX_IMMUTABLE)) { 221 ret = -EPERM; 222 goto out_unlock; 223 } 224 } 225 226 if (flags & FS_SYNC_FL) 227 ip->flags |= BTRFS_INODE_SYNC; 228 else 229 ip->flags &= ~BTRFS_INODE_SYNC; 230 if (flags & FS_IMMUTABLE_FL) 231 ip->flags |= BTRFS_INODE_IMMUTABLE; 232 else 233 ip->flags &= ~BTRFS_INODE_IMMUTABLE; 234 if (flags & FS_APPEND_FL) 235 ip->flags |= BTRFS_INODE_APPEND; 236 else 237 ip->flags &= ~BTRFS_INODE_APPEND; 238 if (flags & FS_NODUMP_FL) 239 ip->flags |= BTRFS_INODE_NODUMP; 240 else 241 ip->flags &= ~BTRFS_INODE_NODUMP; 242 if (flags & FS_NOATIME_FL) 243 ip->flags |= BTRFS_INODE_NOATIME; 244 else 245 ip->flags &= ~BTRFS_INODE_NOATIME; 246 if (flags & FS_DIRSYNC_FL) 247 ip->flags |= BTRFS_INODE_DIRSYNC; 248 else 249 ip->flags &= ~BTRFS_INODE_DIRSYNC; 250 if (flags & FS_NOCOW_FL) { 251 if (S_ISREG(mode)) { 252 /* 253 * It's safe to turn csums off here, no extents exist. 254 * Otherwise we want the flag to reflect the real COW 255 * status of the file and will not set it. 256 */ 257 if (inode->i_size == 0) 258 ip->flags |= BTRFS_INODE_NODATACOW 259 | BTRFS_INODE_NODATASUM; 260 } else { 261 ip->flags |= BTRFS_INODE_NODATACOW; 262 } 263 } else { 264 /* 265 * Revert back under same assuptions as above 266 */ 267 if (S_ISREG(mode)) { 268 if (inode->i_size == 0) 269 ip->flags &= ~(BTRFS_INODE_NODATACOW 270 | BTRFS_INODE_NODATASUM); 271 } else { 272 ip->flags &= ~BTRFS_INODE_NODATACOW; 273 } 274 } 275 276 /* 277 * The COMPRESS flag can only be changed by users, while the NOCOMPRESS 278 * flag may be changed automatically if compression code won't make 279 * things smaller. 280 */ 281 if (flags & FS_NOCOMP_FL) { 282 ip->flags &= ~BTRFS_INODE_COMPRESS; 283 ip->flags |= BTRFS_INODE_NOCOMPRESS; 284 } else if (flags & FS_COMPR_FL) { 285 ip->flags |= BTRFS_INODE_COMPRESS; 286 ip->flags &= ~BTRFS_INODE_NOCOMPRESS; 287 } else { 288 ip->flags &= ~(BTRFS_INODE_COMPRESS | BTRFS_INODE_NOCOMPRESS); 289 } 290 291 trans = btrfs_start_transaction(root, 1); 292 if (IS_ERR(trans)) { 293 ret = PTR_ERR(trans); 294 goto out_drop; 295 } 296 297 btrfs_update_iflags(inode); 298 inode_inc_iversion(inode); 299 inode->i_ctime = CURRENT_TIME; 300 ret = btrfs_update_inode(trans, root, inode); 301 302 btrfs_end_transaction(trans, root); 303 out_drop: 304 if (ret) { 305 ip->flags = ip_oldflags; 306 inode->i_flags = i_oldflags; 307 } 308 309 out_unlock: 310 mutex_unlock(&inode->i_mutex); 311 mnt_drop_write_file(file); 312 return ret; 313 } 314 315 static int btrfs_ioctl_getversion(struct file *file, int __user *arg) 316 { 317 struct inode *inode = file_inode(file); 318 319 return put_user(inode->i_generation, arg); 320 } 321 322 static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg) 323 { 324 struct btrfs_fs_info *fs_info = btrfs_sb(fdentry(file)->d_sb); 325 struct btrfs_device *device; 326 struct request_queue *q; 327 struct fstrim_range range; 328 u64 minlen = ULLONG_MAX; 329 u64 num_devices = 0; 330 u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy); 331 int ret; 332 333 if (!capable(CAP_SYS_ADMIN)) 334 return -EPERM; 335 336 rcu_read_lock(); 337 list_for_each_entry_rcu(device, &fs_info->fs_devices->devices, 338 dev_list) { 339 if (!device->bdev) 340 continue; 341 q = bdev_get_queue(device->bdev); 342 if (blk_queue_discard(q)) { 343 num_devices++; 344 minlen = min((u64)q->limits.discard_granularity, 345 minlen); 346 } 347 } 348 rcu_read_unlock(); 349 350 if (!num_devices) 351 return -EOPNOTSUPP; 352 if (copy_from_user(&range, arg, sizeof(range))) 353 return -EFAULT; 354 if (range.start > total_bytes || 355 range.len < fs_info->sb->s_blocksize) 356 return -EINVAL; 357 358 range.len = min(range.len, total_bytes - range.start); 359 range.minlen = max(range.minlen, minlen); 360 ret = btrfs_trim_fs(fs_info->tree_root, &range); 361 if (ret < 0) 362 return ret; 363 364 if (copy_to_user(arg, &range, sizeof(range))) 365 return -EFAULT; 366 367 return 0; 368 } 369 370 int btrfs_is_empty_uuid(u8 *uuid) 371 { 372 static char empty_uuid[BTRFS_UUID_SIZE] = {0}; 373 374 return !memcmp(uuid, empty_uuid, BTRFS_UUID_SIZE); 375 } 376 377 static noinline int create_subvol(struct inode *dir, 378 struct dentry *dentry, 379 char *name, int namelen, 380 u64 *async_transid, 381 struct btrfs_qgroup_inherit *inherit) 382 { 383 struct btrfs_trans_handle *trans; 384 struct btrfs_key key; 385 struct btrfs_root_item root_item; 386 struct btrfs_inode_item *inode_item; 387 struct extent_buffer *leaf; 388 struct btrfs_root *root = BTRFS_I(dir)->root; 389 struct btrfs_root *new_root; 390 struct btrfs_block_rsv block_rsv; 391 struct timespec cur_time = CURRENT_TIME; 392 int ret; 393 int err; 394 u64 objectid; 395 u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; 396 u64 index = 0; 397 u64 qgroup_reserved; 398 uuid_le new_uuid; 399 400 ret = btrfs_find_free_objectid(root->fs_info->tree_root, &objectid); 401 if (ret) 402 return ret; 403 404 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 405 /* 406 * The same as the snapshot creation, please see the comment 407 * of create_snapshot(). 408 */ 409 ret = btrfs_subvolume_reserve_metadata(root, &block_rsv, 410 8, &qgroup_reserved, false); 411 if (ret) 412 return ret; 413 414 trans = btrfs_start_transaction(root, 0); 415 if (IS_ERR(trans)) { 416 ret = PTR_ERR(trans); 417 goto out; 418 } 419 trans->block_rsv = &block_rsv; 420 trans->bytes_reserved = block_rsv.size; 421 422 ret = btrfs_qgroup_inherit(trans, root->fs_info, 0, objectid, inherit); 423 if (ret) 424 goto fail; 425 426 leaf = btrfs_alloc_free_block(trans, root, root->leafsize, 427 0, objectid, NULL, 0, 0, 0); 428 if (IS_ERR(leaf)) { 429 ret = PTR_ERR(leaf); 430 goto fail; 431 } 432 433 memset_extent_buffer(leaf, 0, 0, sizeof(struct btrfs_header)); 434 btrfs_set_header_bytenr(leaf, leaf->start); 435 btrfs_set_header_generation(leaf, trans->transid); 436 btrfs_set_header_backref_rev(leaf, BTRFS_MIXED_BACKREF_REV); 437 btrfs_set_header_owner(leaf, objectid); 438 439 write_extent_buffer(leaf, root->fs_info->fsid, 440 (unsigned long)btrfs_header_fsid(leaf), 441 BTRFS_FSID_SIZE); 442 write_extent_buffer(leaf, root->fs_info->chunk_tree_uuid, 443 (unsigned long)btrfs_header_chunk_tree_uuid(leaf), 444 BTRFS_UUID_SIZE); 445 btrfs_mark_buffer_dirty(leaf); 446 447 memset(&root_item, 0, sizeof(root_item)); 448 449 inode_item = &root_item.inode; 450 btrfs_set_stack_inode_generation(inode_item, 1); 451 btrfs_set_stack_inode_size(inode_item, 3); 452 btrfs_set_stack_inode_nlink(inode_item, 1); 453 btrfs_set_stack_inode_nbytes(inode_item, root->leafsize); 454 btrfs_set_stack_inode_mode(inode_item, S_IFDIR | 0755); 455 456 btrfs_set_root_flags(&root_item, 0); 457 btrfs_set_root_limit(&root_item, 0); 458 btrfs_set_stack_inode_flags(inode_item, BTRFS_INODE_ROOT_ITEM_INIT); 459 460 btrfs_set_root_bytenr(&root_item, leaf->start); 461 btrfs_set_root_generation(&root_item, trans->transid); 462 btrfs_set_root_level(&root_item, 0); 463 btrfs_set_root_refs(&root_item, 1); 464 btrfs_set_root_used(&root_item, leaf->len); 465 btrfs_set_root_last_snapshot(&root_item, 0); 466 467 btrfs_set_root_generation_v2(&root_item, 468 btrfs_root_generation(&root_item)); 469 uuid_le_gen(&new_uuid); 470 memcpy(root_item.uuid, new_uuid.b, BTRFS_UUID_SIZE); 471 btrfs_set_stack_timespec_sec(&root_item.otime, cur_time.tv_sec); 472 btrfs_set_stack_timespec_nsec(&root_item.otime, cur_time.tv_nsec); 473 root_item.ctime = root_item.otime; 474 btrfs_set_root_ctransid(&root_item, trans->transid); 475 btrfs_set_root_otransid(&root_item, trans->transid); 476 477 btrfs_tree_unlock(leaf); 478 free_extent_buffer(leaf); 479 leaf = NULL; 480 481 btrfs_set_root_dirid(&root_item, new_dirid); 482 483 key.objectid = objectid; 484 key.offset = 0; 485 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 486 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 487 &root_item); 488 if (ret) 489 goto fail; 490 491 key.offset = (u64)-1; 492 new_root = btrfs_read_fs_root_no_name(root->fs_info, &key); 493 if (IS_ERR(new_root)) { 494 btrfs_abort_transaction(trans, root, PTR_ERR(new_root)); 495 ret = PTR_ERR(new_root); 496 goto fail; 497 } 498 499 btrfs_record_root_in_trans(trans, new_root); 500 501 ret = btrfs_create_subvol_root(trans, new_root, new_dirid); 502 if (ret) { 503 /* We potentially lose an unused inode item here */ 504 btrfs_abort_transaction(trans, root, ret); 505 goto fail; 506 } 507 508 /* 509 * insert the directory item 510 */ 511 ret = btrfs_set_inode_index(dir, &index); 512 if (ret) { 513 btrfs_abort_transaction(trans, root, ret); 514 goto fail; 515 } 516 517 ret = btrfs_insert_dir_item(trans, root, 518 name, namelen, dir, &key, 519 BTRFS_FT_DIR, index); 520 if (ret) { 521 btrfs_abort_transaction(trans, root, ret); 522 goto fail; 523 } 524 525 btrfs_i_size_write(dir, dir->i_size + namelen * 2); 526 ret = btrfs_update_inode(trans, root, dir); 527 BUG_ON(ret); 528 529 ret = btrfs_add_root_ref(trans, root->fs_info->tree_root, 530 objectid, root->root_key.objectid, 531 btrfs_ino(dir), index, name, namelen); 532 BUG_ON(ret); 533 534 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, 535 root_item.uuid, BTRFS_UUID_KEY_SUBVOL, 536 objectid); 537 if (ret) 538 btrfs_abort_transaction(trans, root, ret); 539 540 fail: 541 trans->block_rsv = NULL; 542 trans->bytes_reserved = 0; 543 if (async_transid) { 544 *async_transid = trans->transid; 545 err = btrfs_commit_transaction_async(trans, root, 1); 546 if (err) 547 err = btrfs_commit_transaction(trans, root); 548 } else { 549 err = btrfs_commit_transaction(trans, root); 550 } 551 if (err && !ret) 552 ret = err; 553 554 if (!ret) 555 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry)); 556 out: 557 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 558 return ret; 559 } 560 561 static int create_snapshot(struct btrfs_root *root, struct inode *dir, 562 struct dentry *dentry, char *name, int namelen, 563 u64 *async_transid, bool readonly, 564 struct btrfs_qgroup_inherit *inherit) 565 { 566 struct inode *inode; 567 struct btrfs_pending_snapshot *pending_snapshot; 568 struct btrfs_trans_handle *trans; 569 int ret; 570 571 if (!root->ref_cows) 572 return -EINVAL; 573 574 ret = btrfs_start_delalloc_inodes(root, 0); 575 if (ret) 576 return ret; 577 578 btrfs_wait_ordered_extents(root, 0); 579 580 pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); 581 if (!pending_snapshot) 582 return -ENOMEM; 583 584 btrfs_init_block_rsv(&pending_snapshot->block_rsv, 585 BTRFS_BLOCK_RSV_TEMP); 586 /* 587 * 1 - parent dir inode 588 * 2 - dir entries 589 * 1 - root item 590 * 2 - root ref/backref 591 * 1 - root of snapshot 592 * 1 - UUID item 593 */ 594 ret = btrfs_subvolume_reserve_metadata(BTRFS_I(dir)->root, 595 &pending_snapshot->block_rsv, 8, 596 &pending_snapshot->qgroup_reserved, 597 false); 598 if (ret) 599 goto out; 600 601 pending_snapshot->dentry = dentry; 602 pending_snapshot->root = root; 603 pending_snapshot->readonly = readonly; 604 pending_snapshot->dir = dir; 605 pending_snapshot->inherit = inherit; 606 607 trans = btrfs_start_transaction(root, 0); 608 if (IS_ERR(trans)) { 609 ret = PTR_ERR(trans); 610 goto fail; 611 } 612 613 spin_lock(&root->fs_info->trans_lock); 614 list_add(&pending_snapshot->list, 615 &trans->transaction->pending_snapshots); 616 spin_unlock(&root->fs_info->trans_lock); 617 if (async_transid) { 618 *async_transid = trans->transid; 619 ret = btrfs_commit_transaction_async(trans, 620 root->fs_info->extent_root, 1); 621 if (ret) 622 ret = btrfs_commit_transaction(trans, root); 623 } else { 624 ret = btrfs_commit_transaction(trans, 625 root->fs_info->extent_root); 626 } 627 if (ret) 628 goto fail; 629 630 ret = pending_snapshot->error; 631 if (ret) 632 goto fail; 633 634 ret = btrfs_orphan_cleanup(pending_snapshot->snap); 635 if (ret) 636 goto fail; 637 638 inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); 639 if (IS_ERR(inode)) { 640 ret = PTR_ERR(inode); 641 goto fail; 642 } 643 BUG_ON(!inode); 644 d_instantiate(dentry, inode); 645 ret = 0; 646 fail: 647 btrfs_subvolume_release_metadata(BTRFS_I(dir)->root, 648 &pending_snapshot->block_rsv, 649 pending_snapshot->qgroup_reserved); 650 out: 651 kfree(pending_snapshot); 652 return ret; 653 } 654 655 /* copy of check_sticky in fs/namei.c() 656 * It's inline, so penalty for filesystems that don't use sticky bit is 657 * minimal. 658 */ 659 static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode) 660 { 661 kuid_t fsuid = current_fsuid(); 662 663 if (!(dir->i_mode & S_ISVTX)) 664 return 0; 665 if (uid_eq(inode->i_uid, fsuid)) 666 return 0; 667 if (uid_eq(dir->i_uid, fsuid)) 668 return 0; 669 return !capable(CAP_FOWNER); 670 } 671 672 /* copy of may_delete in fs/namei.c() 673 * Check whether we can remove a link victim from directory dir, check 674 * whether the type of victim is right. 675 * 1. We can't do it if dir is read-only (done in permission()) 676 * 2. We should have write and exec permissions on dir 677 * 3. We can't remove anything from append-only dir 678 * 4. We can't do anything with immutable dir (done in permission()) 679 * 5. If the sticky bit on dir is set we should either 680 * a. be owner of dir, or 681 * b. be owner of victim, or 682 * c. have CAP_FOWNER capability 683 * 6. If the victim is append-only or immutable we can't do antyhing with 684 * links pointing to it. 685 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR. 686 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR. 687 * 9. We can't remove a root or mountpoint. 688 * 10. We don't allow removal of NFS sillyrenamed files; it's handled by 689 * nfs_async_unlink(). 690 */ 691 692 static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir) 693 { 694 int error; 695 696 if (!victim->d_inode) 697 return -ENOENT; 698 699 BUG_ON(victim->d_parent->d_inode != dir); 700 audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE); 701 702 error = inode_permission(dir, MAY_WRITE | MAY_EXEC); 703 if (error) 704 return error; 705 if (IS_APPEND(dir)) 706 return -EPERM; 707 if (btrfs_check_sticky(dir, victim->d_inode)|| 708 IS_APPEND(victim->d_inode)|| 709 IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode)) 710 return -EPERM; 711 if (isdir) { 712 if (!S_ISDIR(victim->d_inode->i_mode)) 713 return -ENOTDIR; 714 if (IS_ROOT(victim)) 715 return -EBUSY; 716 } else if (S_ISDIR(victim->d_inode->i_mode)) 717 return -EISDIR; 718 if (IS_DEADDIR(dir)) 719 return -ENOENT; 720 if (victim->d_flags & DCACHE_NFSFS_RENAMED) 721 return -EBUSY; 722 return 0; 723 } 724 725 /* copy of may_create in fs/namei.c() */ 726 static inline int btrfs_may_create(struct inode *dir, struct dentry *child) 727 { 728 if (child->d_inode) 729 return -EEXIST; 730 if (IS_DEADDIR(dir)) 731 return -ENOENT; 732 return inode_permission(dir, MAY_WRITE | MAY_EXEC); 733 } 734 735 /* 736 * Create a new subvolume below @parent. This is largely modeled after 737 * sys_mkdirat and vfs_mkdir, but we only do a single component lookup 738 * inside this filesystem so it's quite a bit simpler. 739 */ 740 static noinline int btrfs_mksubvol(struct path *parent, 741 char *name, int namelen, 742 struct btrfs_root *snap_src, 743 u64 *async_transid, bool readonly, 744 struct btrfs_qgroup_inherit *inherit) 745 { 746 struct inode *dir = parent->dentry->d_inode; 747 struct dentry *dentry; 748 int error; 749 750 error = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 751 if (error == -EINTR) 752 return error; 753 754 dentry = lookup_one_len(name, parent->dentry, namelen); 755 error = PTR_ERR(dentry); 756 if (IS_ERR(dentry)) 757 goto out_unlock; 758 759 error = -EEXIST; 760 if (dentry->d_inode) 761 goto out_dput; 762 763 error = btrfs_may_create(dir, dentry); 764 if (error) 765 goto out_dput; 766 767 /* 768 * even if this name doesn't exist, we may get hash collisions. 769 * check for them now when we can safely fail 770 */ 771 error = btrfs_check_dir_item_collision(BTRFS_I(dir)->root, 772 dir->i_ino, name, 773 namelen); 774 if (error) 775 goto out_dput; 776 777 down_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 778 779 if (btrfs_root_refs(&BTRFS_I(dir)->root->root_item) == 0) 780 goto out_up_read; 781 782 if (snap_src) { 783 error = create_snapshot(snap_src, dir, dentry, name, namelen, 784 async_transid, readonly, inherit); 785 } else { 786 error = create_subvol(dir, dentry, name, namelen, 787 async_transid, inherit); 788 } 789 if (!error) 790 fsnotify_mkdir(dir, dentry); 791 out_up_read: 792 up_read(&BTRFS_I(dir)->root->fs_info->subvol_sem); 793 out_dput: 794 dput(dentry); 795 out_unlock: 796 mutex_unlock(&dir->i_mutex); 797 return error; 798 } 799 800 /* 801 * When we're defragging a range, we don't want to kick it off again 802 * if it is really just waiting for delalloc to send it down. 803 * If we find a nice big extent or delalloc range for the bytes in the 804 * file you want to defrag, we return 0 to let you know to skip this 805 * part of the file 806 */ 807 static int check_defrag_in_cache(struct inode *inode, u64 offset, int thresh) 808 { 809 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 810 struct extent_map *em = NULL; 811 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 812 u64 end; 813 814 read_lock(&em_tree->lock); 815 em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE); 816 read_unlock(&em_tree->lock); 817 818 if (em) { 819 end = extent_map_end(em); 820 free_extent_map(em); 821 if (end - offset > thresh) 822 return 0; 823 } 824 /* if we already have a nice delalloc here, just stop */ 825 thresh /= 2; 826 end = count_range_bits(io_tree, &offset, offset + thresh, 827 thresh, EXTENT_DELALLOC, 1); 828 if (end >= thresh) 829 return 0; 830 return 1; 831 } 832 833 /* 834 * helper function to walk through a file and find extents 835 * newer than a specific transid, and smaller than thresh. 836 * 837 * This is used by the defragging code to find new and small 838 * extents 839 */ 840 static int find_new_extents(struct btrfs_root *root, 841 struct inode *inode, u64 newer_than, 842 u64 *off, int thresh) 843 { 844 struct btrfs_path *path; 845 struct btrfs_key min_key; 846 struct btrfs_key max_key; 847 struct extent_buffer *leaf; 848 struct btrfs_file_extent_item *extent; 849 int type; 850 int ret; 851 u64 ino = btrfs_ino(inode); 852 853 path = btrfs_alloc_path(); 854 if (!path) 855 return -ENOMEM; 856 857 min_key.objectid = ino; 858 min_key.type = BTRFS_EXTENT_DATA_KEY; 859 min_key.offset = *off; 860 861 max_key.objectid = ino; 862 max_key.type = (u8)-1; 863 max_key.offset = (u64)-1; 864 865 path->keep_locks = 1; 866 867 while(1) { 868 ret = btrfs_search_forward(root, &min_key, &max_key, 869 path, newer_than); 870 if (ret != 0) 871 goto none; 872 if (min_key.objectid != ino) 873 goto none; 874 if (min_key.type != BTRFS_EXTENT_DATA_KEY) 875 goto none; 876 877 leaf = path->nodes[0]; 878 extent = btrfs_item_ptr(leaf, path->slots[0], 879 struct btrfs_file_extent_item); 880 881 type = btrfs_file_extent_type(leaf, extent); 882 if (type == BTRFS_FILE_EXTENT_REG && 883 btrfs_file_extent_num_bytes(leaf, extent) < thresh && 884 check_defrag_in_cache(inode, min_key.offset, thresh)) { 885 *off = min_key.offset; 886 btrfs_free_path(path); 887 return 0; 888 } 889 890 if (min_key.offset == (u64)-1) 891 goto none; 892 893 min_key.offset++; 894 btrfs_release_path(path); 895 } 896 none: 897 btrfs_free_path(path); 898 return -ENOENT; 899 } 900 901 static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start) 902 { 903 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 904 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 905 struct extent_map *em; 906 u64 len = PAGE_CACHE_SIZE; 907 908 /* 909 * hopefully we have this extent in the tree already, try without 910 * the full extent lock 911 */ 912 read_lock(&em_tree->lock); 913 em = lookup_extent_mapping(em_tree, start, len); 914 read_unlock(&em_tree->lock); 915 916 if (!em) { 917 /* get the big lock and read metadata off disk */ 918 lock_extent(io_tree, start, start + len - 1); 919 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 920 unlock_extent(io_tree, start, start + len - 1); 921 922 if (IS_ERR(em)) 923 return NULL; 924 } 925 926 return em; 927 } 928 929 static bool defrag_check_next_extent(struct inode *inode, struct extent_map *em) 930 { 931 struct extent_map *next; 932 bool ret = true; 933 934 /* this is the last extent */ 935 if (em->start + em->len >= i_size_read(inode)) 936 return false; 937 938 next = defrag_lookup_extent(inode, em->start + em->len); 939 if (!next || next->block_start >= EXTENT_MAP_LAST_BYTE) 940 ret = false; 941 942 free_extent_map(next); 943 return ret; 944 } 945 946 static int should_defrag_range(struct inode *inode, u64 start, int thresh, 947 u64 *last_len, u64 *skip, u64 *defrag_end, 948 int compress) 949 { 950 struct extent_map *em; 951 int ret = 1; 952 bool next_mergeable = true; 953 954 /* 955 * make sure that once we start defragging an extent, we keep on 956 * defragging it 957 */ 958 if (start < *defrag_end) 959 return 1; 960 961 *skip = 0; 962 963 em = defrag_lookup_extent(inode, start); 964 if (!em) 965 return 0; 966 967 /* this will cover holes, and inline extents */ 968 if (em->block_start >= EXTENT_MAP_LAST_BYTE) { 969 ret = 0; 970 goto out; 971 } 972 973 next_mergeable = defrag_check_next_extent(inode, em); 974 975 /* 976 * we hit a real extent, if it is big or the next extent is not a 977 * real extent, don't bother defragging it 978 */ 979 if (!compress && (*last_len == 0 || *last_len >= thresh) && 980 (em->len >= thresh || !next_mergeable)) 981 ret = 0; 982 out: 983 /* 984 * last_len ends up being a counter of how many bytes we've defragged. 985 * every time we choose not to defrag an extent, we reset *last_len 986 * so that the next tiny extent will force a defrag. 987 * 988 * The end result of this is that tiny extents before a single big 989 * extent will force at least part of that big extent to be defragged. 990 */ 991 if (ret) { 992 *defrag_end = extent_map_end(em); 993 } else { 994 *last_len = 0; 995 *skip = extent_map_end(em); 996 *defrag_end = 0; 997 } 998 999 free_extent_map(em); 1000 return ret; 1001 } 1002 1003 /* 1004 * it doesn't do much good to defrag one or two pages 1005 * at a time. This pulls in a nice chunk of pages 1006 * to COW and defrag. 1007 * 1008 * It also makes sure the delalloc code has enough 1009 * dirty data to avoid making new small extents as part 1010 * of the defrag 1011 * 1012 * It's a good idea to start RA on this range 1013 * before calling this. 1014 */ 1015 static int cluster_pages_for_defrag(struct inode *inode, 1016 struct page **pages, 1017 unsigned long start_index, 1018 int num_pages) 1019 { 1020 unsigned long file_end; 1021 u64 isize = i_size_read(inode); 1022 u64 page_start; 1023 u64 page_end; 1024 u64 page_cnt; 1025 int ret; 1026 int i; 1027 int i_done; 1028 struct btrfs_ordered_extent *ordered; 1029 struct extent_state *cached_state = NULL; 1030 struct extent_io_tree *tree; 1031 gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping); 1032 1033 file_end = (isize - 1) >> PAGE_CACHE_SHIFT; 1034 if (!isize || start_index > file_end) 1035 return 0; 1036 1037 page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1); 1038 1039 ret = btrfs_delalloc_reserve_space(inode, 1040 page_cnt << PAGE_CACHE_SHIFT); 1041 if (ret) 1042 return ret; 1043 i_done = 0; 1044 tree = &BTRFS_I(inode)->io_tree; 1045 1046 /* step one, lock all the pages */ 1047 for (i = 0; i < page_cnt; i++) { 1048 struct page *page; 1049 again: 1050 page = find_or_create_page(inode->i_mapping, 1051 start_index + i, mask); 1052 if (!page) 1053 break; 1054 1055 page_start = page_offset(page); 1056 page_end = page_start + PAGE_CACHE_SIZE - 1; 1057 while (1) { 1058 lock_extent(tree, page_start, page_end); 1059 ordered = btrfs_lookup_ordered_extent(inode, 1060 page_start); 1061 unlock_extent(tree, page_start, page_end); 1062 if (!ordered) 1063 break; 1064 1065 unlock_page(page); 1066 btrfs_start_ordered_extent(inode, ordered, 1); 1067 btrfs_put_ordered_extent(ordered); 1068 lock_page(page); 1069 /* 1070 * we unlocked the page above, so we need check if 1071 * it was released or not. 1072 */ 1073 if (page->mapping != inode->i_mapping) { 1074 unlock_page(page); 1075 page_cache_release(page); 1076 goto again; 1077 } 1078 } 1079 1080 if (!PageUptodate(page)) { 1081 btrfs_readpage(NULL, page); 1082 lock_page(page); 1083 if (!PageUptodate(page)) { 1084 unlock_page(page); 1085 page_cache_release(page); 1086 ret = -EIO; 1087 break; 1088 } 1089 } 1090 1091 if (page->mapping != inode->i_mapping) { 1092 unlock_page(page); 1093 page_cache_release(page); 1094 goto again; 1095 } 1096 1097 pages[i] = page; 1098 i_done++; 1099 } 1100 if (!i_done || ret) 1101 goto out; 1102 1103 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1104 goto out; 1105 1106 /* 1107 * so now we have a nice long stream of locked 1108 * and up to date pages, lets wait on them 1109 */ 1110 for (i = 0; i < i_done; i++) 1111 wait_on_page_writeback(pages[i]); 1112 1113 page_start = page_offset(pages[0]); 1114 page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE; 1115 1116 lock_extent_bits(&BTRFS_I(inode)->io_tree, 1117 page_start, page_end - 1, 0, &cached_state); 1118 clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, 1119 page_end - 1, EXTENT_DIRTY | EXTENT_DELALLOC | 1120 EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG, 0, 0, 1121 &cached_state, GFP_NOFS); 1122 1123 if (i_done != page_cnt) { 1124 spin_lock(&BTRFS_I(inode)->lock); 1125 BTRFS_I(inode)->outstanding_extents++; 1126 spin_unlock(&BTRFS_I(inode)->lock); 1127 btrfs_delalloc_release_space(inode, 1128 (page_cnt - i_done) << PAGE_CACHE_SHIFT); 1129 } 1130 1131 1132 set_extent_defrag(&BTRFS_I(inode)->io_tree, page_start, page_end - 1, 1133 &cached_state, GFP_NOFS); 1134 1135 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 1136 page_start, page_end - 1, &cached_state, 1137 GFP_NOFS); 1138 1139 for (i = 0; i < i_done; i++) { 1140 clear_page_dirty_for_io(pages[i]); 1141 ClearPageChecked(pages[i]); 1142 set_page_extent_mapped(pages[i]); 1143 set_page_dirty(pages[i]); 1144 unlock_page(pages[i]); 1145 page_cache_release(pages[i]); 1146 } 1147 return i_done; 1148 out: 1149 for (i = 0; i < i_done; i++) { 1150 unlock_page(pages[i]); 1151 page_cache_release(pages[i]); 1152 } 1153 btrfs_delalloc_release_space(inode, page_cnt << PAGE_CACHE_SHIFT); 1154 return ret; 1155 1156 } 1157 1158 int btrfs_defrag_file(struct inode *inode, struct file *file, 1159 struct btrfs_ioctl_defrag_range_args *range, 1160 u64 newer_than, unsigned long max_to_defrag) 1161 { 1162 struct btrfs_root *root = BTRFS_I(inode)->root; 1163 struct file_ra_state *ra = NULL; 1164 unsigned long last_index; 1165 u64 isize = i_size_read(inode); 1166 u64 last_len = 0; 1167 u64 skip = 0; 1168 u64 defrag_end = 0; 1169 u64 newer_off = range->start; 1170 unsigned long i; 1171 unsigned long ra_index = 0; 1172 int ret; 1173 int defrag_count = 0; 1174 int compress_type = BTRFS_COMPRESS_ZLIB; 1175 int extent_thresh = range->extent_thresh; 1176 int max_cluster = (256 * 1024) >> PAGE_CACHE_SHIFT; 1177 int cluster = max_cluster; 1178 u64 new_align = ~((u64)128 * 1024 - 1); 1179 struct page **pages = NULL; 1180 1181 if (isize == 0) 1182 return 0; 1183 1184 if (range->start >= isize) 1185 return -EINVAL; 1186 1187 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1188 if (range->compress_type > BTRFS_COMPRESS_TYPES) 1189 return -EINVAL; 1190 if (range->compress_type) 1191 compress_type = range->compress_type; 1192 } 1193 1194 if (extent_thresh == 0) 1195 extent_thresh = 256 * 1024; 1196 1197 /* 1198 * if we were not given a file, allocate a readahead 1199 * context 1200 */ 1201 if (!file) { 1202 ra = kzalloc(sizeof(*ra), GFP_NOFS); 1203 if (!ra) 1204 return -ENOMEM; 1205 file_ra_state_init(ra, inode->i_mapping); 1206 } else { 1207 ra = &file->f_ra; 1208 } 1209 1210 pages = kmalloc(sizeof(struct page *) * max_cluster, 1211 GFP_NOFS); 1212 if (!pages) { 1213 ret = -ENOMEM; 1214 goto out_ra; 1215 } 1216 1217 /* find the last page to defrag */ 1218 if (range->start + range->len > range->start) { 1219 last_index = min_t(u64, isize - 1, 1220 range->start + range->len - 1) >> PAGE_CACHE_SHIFT; 1221 } else { 1222 last_index = (isize - 1) >> PAGE_CACHE_SHIFT; 1223 } 1224 1225 if (newer_than) { 1226 ret = find_new_extents(root, inode, newer_than, 1227 &newer_off, 64 * 1024); 1228 if (!ret) { 1229 range->start = newer_off; 1230 /* 1231 * we always align our defrag to help keep 1232 * the extents in the file evenly spaced 1233 */ 1234 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1235 } else 1236 goto out_ra; 1237 } else { 1238 i = range->start >> PAGE_CACHE_SHIFT; 1239 } 1240 if (!max_to_defrag) 1241 max_to_defrag = last_index + 1; 1242 1243 /* 1244 * make writeback starts from i, so the defrag range can be 1245 * written sequentially. 1246 */ 1247 if (i < inode->i_mapping->writeback_index) 1248 inode->i_mapping->writeback_index = i; 1249 1250 while (i <= last_index && defrag_count < max_to_defrag && 1251 (i < (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> 1252 PAGE_CACHE_SHIFT)) { 1253 /* 1254 * make sure we stop running if someone unmounts 1255 * the FS 1256 */ 1257 if (!(inode->i_sb->s_flags & MS_ACTIVE)) 1258 break; 1259 1260 if (btrfs_defrag_cancelled(root->fs_info)) { 1261 printk(KERN_DEBUG "btrfs: defrag_file cancelled\n"); 1262 ret = -EAGAIN; 1263 break; 1264 } 1265 1266 if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT, 1267 extent_thresh, &last_len, &skip, 1268 &defrag_end, range->flags & 1269 BTRFS_DEFRAG_RANGE_COMPRESS)) { 1270 unsigned long next; 1271 /* 1272 * the should_defrag function tells us how much to skip 1273 * bump our counter by the suggested amount 1274 */ 1275 next = (skip + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 1276 i = max(i + 1, next); 1277 continue; 1278 } 1279 1280 if (!newer_than) { 1281 cluster = (PAGE_CACHE_ALIGN(defrag_end) >> 1282 PAGE_CACHE_SHIFT) - i; 1283 cluster = min(cluster, max_cluster); 1284 } else { 1285 cluster = max_cluster; 1286 } 1287 1288 if (i + cluster > ra_index) { 1289 ra_index = max(i, ra_index); 1290 btrfs_force_ra(inode->i_mapping, ra, file, ra_index, 1291 cluster); 1292 ra_index += max_cluster; 1293 } 1294 1295 mutex_lock(&inode->i_mutex); 1296 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) 1297 BTRFS_I(inode)->force_compress = compress_type; 1298 ret = cluster_pages_for_defrag(inode, pages, i, cluster); 1299 if (ret < 0) { 1300 mutex_unlock(&inode->i_mutex); 1301 goto out_ra; 1302 } 1303 1304 defrag_count += ret; 1305 balance_dirty_pages_ratelimited(inode->i_mapping); 1306 mutex_unlock(&inode->i_mutex); 1307 1308 if (newer_than) { 1309 if (newer_off == (u64)-1) 1310 break; 1311 1312 if (ret > 0) 1313 i += ret; 1314 1315 newer_off = max(newer_off + 1, 1316 (u64)i << PAGE_CACHE_SHIFT); 1317 1318 ret = find_new_extents(root, inode, 1319 newer_than, &newer_off, 1320 64 * 1024); 1321 if (!ret) { 1322 range->start = newer_off; 1323 i = (newer_off & new_align) >> PAGE_CACHE_SHIFT; 1324 } else { 1325 break; 1326 } 1327 } else { 1328 if (ret > 0) { 1329 i += ret; 1330 last_len += ret << PAGE_CACHE_SHIFT; 1331 } else { 1332 i++; 1333 last_len = 0; 1334 } 1335 } 1336 } 1337 1338 if ((range->flags & BTRFS_DEFRAG_RANGE_START_IO)) 1339 filemap_flush(inode->i_mapping); 1340 1341 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 1342 /* the filemap_flush will queue IO into the worker threads, but 1343 * we have to make sure the IO is actually started and that 1344 * ordered extents get created before we return 1345 */ 1346 atomic_inc(&root->fs_info->async_submit_draining); 1347 while (atomic_read(&root->fs_info->nr_async_submits) || 1348 atomic_read(&root->fs_info->async_delalloc_pages)) { 1349 wait_event(root->fs_info->async_submit_wait, 1350 (atomic_read(&root->fs_info->nr_async_submits) == 0 && 1351 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 1352 } 1353 atomic_dec(&root->fs_info->async_submit_draining); 1354 } 1355 1356 if (range->compress_type == BTRFS_COMPRESS_LZO) { 1357 btrfs_set_fs_incompat(root->fs_info, COMPRESS_LZO); 1358 } 1359 1360 ret = defrag_count; 1361 1362 out_ra: 1363 if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) { 1364 mutex_lock(&inode->i_mutex); 1365 BTRFS_I(inode)->force_compress = BTRFS_COMPRESS_NONE; 1366 mutex_unlock(&inode->i_mutex); 1367 } 1368 if (!file) 1369 kfree(ra); 1370 kfree(pages); 1371 return ret; 1372 } 1373 1374 static noinline int btrfs_ioctl_resize(struct file *file, 1375 void __user *arg) 1376 { 1377 u64 new_size; 1378 u64 old_size; 1379 u64 devid = 1; 1380 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 1381 struct btrfs_ioctl_vol_args *vol_args; 1382 struct btrfs_trans_handle *trans; 1383 struct btrfs_device *device = NULL; 1384 char *sizestr; 1385 char *devstr = NULL; 1386 int ret = 0; 1387 int mod = 0; 1388 1389 if (!capable(CAP_SYS_ADMIN)) 1390 return -EPERM; 1391 1392 ret = mnt_want_write_file(file); 1393 if (ret) 1394 return ret; 1395 1396 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1397 1)) { 1398 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 1399 mnt_drop_write_file(file); 1400 return -EINVAL; 1401 } 1402 1403 mutex_lock(&root->fs_info->volume_mutex); 1404 vol_args = memdup_user(arg, sizeof(*vol_args)); 1405 if (IS_ERR(vol_args)) { 1406 ret = PTR_ERR(vol_args); 1407 goto out; 1408 } 1409 1410 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1411 1412 sizestr = vol_args->name; 1413 devstr = strchr(sizestr, ':'); 1414 if (devstr) { 1415 char *end; 1416 sizestr = devstr + 1; 1417 *devstr = '\0'; 1418 devstr = vol_args->name; 1419 devid = simple_strtoull(devstr, &end, 10); 1420 if (!devid) { 1421 ret = -EINVAL; 1422 goto out_free; 1423 } 1424 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1425 (unsigned long long)devid); 1426 } 1427 1428 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1429 if (!device) { 1430 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1431 (unsigned long long)devid); 1432 ret = -ENODEV; 1433 goto out_free; 1434 } 1435 1436 if (!device->writeable) { 1437 printk(KERN_INFO "btrfs: resizer unable to apply on " 1438 "readonly device %llu\n", 1439 (unsigned long long)devid); 1440 ret = -EPERM; 1441 goto out_free; 1442 } 1443 1444 if (!strcmp(sizestr, "max")) 1445 new_size = device->bdev->bd_inode->i_size; 1446 else { 1447 if (sizestr[0] == '-') { 1448 mod = -1; 1449 sizestr++; 1450 } else if (sizestr[0] == '+') { 1451 mod = 1; 1452 sizestr++; 1453 } 1454 new_size = memparse(sizestr, NULL); 1455 if (new_size == 0) { 1456 ret = -EINVAL; 1457 goto out_free; 1458 } 1459 } 1460 1461 if (device->is_tgtdev_for_dev_replace) { 1462 ret = -EPERM; 1463 goto out_free; 1464 } 1465 1466 old_size = device->total_bytes; 1467 1468 if (mod < 0) { 1469 if (new_size > old_size) { 1470 ret = -EINVAL; 1471 goto out_free; 1472 } 1473 new_size = old_size - new_size; 1474 } else if (mod > 0) { 1475 new_size = old_size + new_size; 1476 } 1477 1478 if (new_size < 256 * 1024 * 1024) { 1479 ret = -EINVAL; 1480 goto out_free; 1481 } 1482 if (new_size > device->bdev->bd_inode->i_size) { 1483 ret = -EFBIG; 1484 goto out_free; 1485 } 1486 1487 do_div(new_size, root->sectorsize); 1488 new_size *= root->sectorsize; 1489 1490 printk_in_rcu(KERN_INFO "btrfs: new size for %s is %llu\n", 1491 rcu_str_deref(device->name), 1492 (unsigned long long)new_size); 1493 1494 if (new_size > old_size) { 1495 trans = btrfs_start_transaction(root, 0); 1496 if (IS_ERR(trans)) { 1497 ret = PTR_ERR(trans); 1498 goto out_free; 1499 } 1500 ret = btrfs_grow_device(trans, device, new_size); 1501 btrfs_commit_transaction(trans, root); 1502 } else if (new_size < old_size) { 1503 ret = btrfs_shrink_device(device, new_size); 1504 } /* equal, nothing need to do */ 1505 1506 out_free: 1507 kfree(vol_args); 1508 out: 1509 mutex_unlock(&root->fs_info->volume_mutex); 1510 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1511 mnt_drop_write_file(file); 1512 return ret; 1513 } 1514 1515 static noinline int btrfs_ioctl_snap_create_transid(struct file *file, 1516 char *name, unsigned long fd, int subvol, 1517 u64 *transid, bool readonly, 1518 struct btrfs_qgroup_inherit *inherit) 1519 { 1520 int namelen; 1521 int ret = 0; 1522 1523 ret = mnt_want_write_file(file); 1524 if (ret) 1525 goto out; 1526 1527 namelen = strlen(name); 1528 if (strchr(name, '/')) { 1529 ret = -EINVAL; 1530 goto out_drop_write; 1531 } 1532 1533 if (name[0] == '.' && 1534 (namelen == 1 || (name[1] == '.' && namelen == 2))) { 1535 ret = -EEXIST; 1536 goto out_drop_write; 1537 } 1538 1539 if (subvol) { 1540 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1541 NULL, transid, readonly, inherit); 1542 } else { 1543 struct fd src = fdget(fd); 1544 struct inode *src_inode; 1545 if (!src.file) { 1546 ret = -EINVAL; 1547 goto out_drop_write; 1548 } 1549 1550 src_inode = file_inode(src.file); 1551 if (src_inode->i_sb != file_inode(file)->i_sb) { 1552 printk(KERN_INFO "btrfs: Snapshot src from " 1553 "another FS\n"); 1554 ret = -EINVAL; 1555 } else { 1556 ret = btrfs_mksubvol(&file->f_path, name, namelen, 1557 BTRFS_I(src_inode)->root, 1558 transid, readonly, inherit); 1559 } 1560 fdput(src); 1561 } 1562 out_drop_write: 1563 mnt_drop_write_file(file); 1564 out: 1565 return ret; 1566 } 1567 1568 static noinline int btrfs_ioctl_snap_create(struct file *file, 1569 void __user *arg, int subvol) 1570 { 1571 struct btrfs_ioctl_vol_args *vol_args; 1572 int ret; 1573 1574 vol_args = memdup_user(arg, sizeof(*vol_args)); 1575 if (IS_ERR(vol_args)) 1576 return PTR_ERR(vol_args); 1577 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 1578 1579 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1580 vol_args->fd, subvol, 1581 NULL, false, NULL); 1582 1583 kfree(vol_args); 1584 return ret; 1585 } 1586 1587 static noinline int btrfs_ioctl_snap_create_v2(struct file *file, 1588 void __user *arg, int subvol) 1589 { 1590 struct btrfs_ioctl_vol_args_v2 *vol_args; 1591 int ret; 1592 u64 transid = 0; 1593 u64 *ptr = NULL; 1594 bool readonly = false; 1595 struct btrfs_qgroup_inherit *inherit = NULL; 1596 1597 vol_args = memdup_user(arg, sizeof(*vol_args)); 1598 if (IS_ERR(vol_args)) 1599 return PTR_ERR(vol_args); 1600 vol_args->name[BTRFS_SUBVOL_NAME_MAX] = '\0'; 1601 1602 if (vol_args->flags & 1603 ~(BTRFS_SUBVOL_CREATE_ASYNC | BTRFS_SUBVOL_RDONLY | 1604 BTRFS_SUBVOL_QGROUP_INHERIT)) { 1605 ret = -EOPNOTSUPP; 1606 goto out; 1607 } 1608 1609 if (vol_args->flags & BTRFS_SUBVOL_CREATE_ASYNC) 1610 ptr = &transid; 1611 if (vol_args->flags & BTRFS_SUBVOL_RDONLY) 1612 readonly = true; 1613 if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) { 1614 if (vol_args->size > PAGE_CACHE_SIZE) { 1615 ret = -EINVAL; 1616 goto out; 1617 } 1618 inherit = memdup_user(vol_args->qgroup_inherit, vol_args->size); 1619 if (IS_ERR(inherit)) { 1620 ret = PTR_ERR(inherit); 1621 goto out; 1622 } 1623 } 1624 1625 ret = btrfs_ioctl_snap_create_transid(file, vol_args->name, 1626 vol_args->fd, subvol, ptr, 1627 readonly, inherit); 1628 1629 if (ret == 0 && ptr && 1630 copy_to_user(arg + 1631 offsetof(struct btrfs_ioctl_vol_args_v2, 1632 transid), ptr, sizeof(*ptr))) 1633 ret = -EFAULT; 1634 out: 1635 kfree(vol_args); 1636 kfree(inherit); 1637 return ret; 1638 } 1639 1640 static noinline int btrfs_ioctl_subvol_getflags(struct file *file, 1641 void __user *arg) 1642 { 1643 struct inode *inode = file_inode(file); 1644 struct btrfs_root *root = BTRFS_I(inode)->root; 1645 int ret = 0; 1646 u64 flags = 0; 1647 1648 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) 1649 return -EINVAL; 1650 1651 down_read(&root->fs_info->subvol_sem); 1652 if (btrfs_root_readonly(root)) 1653 flags |= BTRFS_SUBVOL_RDONLY; 1654 up_read(&root->fs_info->subvol_sem); 1655 1656 if (copy_to_user(arg, &flags, sizeof(flags))) 1657 ret = -EFAULT; 1658 1659 return ret; 1660 } 1661 1662 static noinline int btrfs_ioctl_subvol_setflags(struct file *file, 1663 void __user *arg) 1664 { 1665 struct inode *inode = file_inode(file); 1666 struct btrfs_root *root = BTRFS_I(inode)->root; 1667 struct btrfs_trans_handle *trans; 1668 u64 root_flags; 1669 u64 flags; 1670 int ret = 0; 1671 1672 ret = mnt_want_write_file(file); 1673 if (ret) 1674 goto out; 1675 1676 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 1677 ret = -EINVAL; 1678 goto out_drop_write; 1679 } 1680 1681 if (copy_from_user(&flags, arg, sizeof(flags))) { 1682 ret = -EFAULT; 1683 goto out_drop_write; 1684 } 1685 1686 if (flags & BTRFS_SUBVOL_CREATE_ASYNC) { 1687 ret = -EINVAL; 1688 goto out_drop_write; 1689 } 1690 1691 if (flags & ~BTRFS_SUBVOL_RDONLY) { 1692 ret = -EOPNOTSUPP; 1693 goto out_drop_write; 1694 } 1695 1696 if (!inode_owner_or_capable(inode)) { 1697 ret = -EACCES; 1698 goto out_drop_write; 1699 } 1700 1701 down_write(&root->fs_info->subvol_sem); 1702 1703 /* nothing to do */ 1704 if (!!(flags & BTRFS_SUBVOL_RDONLY) == btrfs_root_readonly(root)) 1705 goto out_drop_sem; 1706 1707 root_flags = btrfs_root_flags(&root->root_item); 1708 if (flags & BTRFS_SUBVOL_RDONLY) 1709 btrfs_set_root_flags(&root->root_item, 1710 root_flags | BTRFS_ROOT_SUBVOL_RDONLY); 1711 else 1712 btrfs_set_root_flags(&root->root_item, 1713 root_flags & ~BTRFS_ROOT_SUBVOL_RDONLY); 1714 1715 trans = btrfs_start_transaction(root, 1); 1716 if (IS_ERR(trans)) { 1717 ret = PTR_ERR(trans); 1718 goto out_reset; 1719 } 1720 1721 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1722 &root->root_key, &root->root_item); 1723 1724 btrfs_commit_transaction(trans, root); 1725 out_reset: 1726 if (ret) 1727 btrfs_set_root_flags(&root->root_item, root_flags); 1728 out_drop_sem: 1729 up_write(&root->fs_info->subvol_sem); 1730 out_drop_write: 1731 mnt_drop_write_file(file); 1732 out: 1733 return ret; 1734 } 1735 1736 /* 1737 * helper to check if the subvolume references other subvolumes 1738 */ 1739 static noinline int may_destroy_subvol(struct btrfs_root *root) 1740 { 1741 struct btrfs_path *path; 1742 struct btrfs_dir_item *di; 1743 struct btrfs_key key; 1744 u64 dir_id; 1745 int ret; 1746 1747 path = btrfs_alloc_path(); 1748 if (!path) 1749 return -ENOMEM; 1750 1751 /* Make sure this root isn't set as the default subvol */ 1752 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 1753 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root, path, 1754 dir_id, "default", 7, 0); 1755 if (di && !IS_ERR(di)) { 1756 btrfs_dir_item_key_to_cpu(path->nodes[0], di, &key); 1757 if (key.objectid == root->root_key.objectid) { 1758 ret = -ENOTEMPTY; 1759 goto out; 1760 } 1761 btrfs_release_path(path); 1762 } 1763 1764 key.objectid = root->root_key.objectid; 1765 key.type = BTRFS_ROOT_REF_KEY; 1766 key.offset = (u64)-1; 1767 1768 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, 1769 &key, path, 0, 0); 1770 if (ret < 0) 1771 goto out; 1772 BUG_ON(ret == 0); 1773 1774 ret = 0; 1775 if (path->slots[0] > 0) { 1776 path->slots[0]--; 1777 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1778 if (key.objectid == root->root_key.objectid && 1779 key.type == BTRFS_ROOT_REF_KEY) 1780 ret = -ENOTEMPTY; 1781 } 1782 out: 1783 btrfs_free_path(path); 1784 return ret; 1785 } 1786 1787 static noinline int key_in_sk(struct btrfs_key *key, 1788 struct btrfs_ioctl_search_key *sk) 1789 { 1790 struct btrfs_key test; 1791 int ret; 1792 1793 test.objectid = sk->min_objectid; 1794 test.type = sk->min_type; 1795 test.offset = sk->min_offset; 1796 1797 ret = btrfs_comp_cpu_keys(key, &test); 1798 if (ret < 0) 1799 return 0; 1800 1801 test.objectid = sk->max_objectid; 1802 test.type = sk->max_type; 1803 test.offset = sk->max_offset; 1804 1805 ret = btrfs_comp_cpu_keys(key, &test); 1806 if (ret > 0) 1807 return 0; 1808 return 1; 1809 } 1810 1811 static noinline int copy_to_sk(struct btrfs_root *root, 1812 struct btrfs_path *path, 1813 struct btrfs_key *key, 1814 struct btrfs_ioctl_search_key *sk, 1815 char *buf, 1816 unsigned long *sk_offset, 1817 int *num_found) 1818 { 1819 u64 found_transid; 1820 struct extent_buffer *leaf; 1821 struct btrfs_ioctl_search_header sh; 1822 unsigned long item_off; 1823 unsigned long item_len; 1824 int nritems; 1825 int i; 1826 int slot; 1827 int ret = 0; 1828 1829 leaf = path->nodes[0]; 1830 slot = path->slots[0]; 1831 nritems = btrfs_header_nritems(leaf); 1832 1833 if (btrfs_header_generation(leaf) > sk->max_transid) { 1834 i = nritems; 1835 goto advance_key; 1836 } 1837 found_transid = btrfs_header_generation(leaf); 1838 1839 for (i = slot; i < nritems; i++) { 1840 item_off = btrfs_item_ptr_offset(leaf, i); 1841 item_len = btrfs_item_size_nr(leaf, i); 1842 1843 btrfs_item_key_to_cpu(leaf, key, i); 1844 if (!key_in_sk(key, sk)) 1845 continue; 1846 1847 if (sizeof(sh) + item_len > BTRFS_SEARCH_ARGS_BUFSIZE) 1848 item_len = 0; 1849 1850 if (sizeof(sh) + item_len + *sk_offset > 1851 BTRFS_SEARCH_ARGS_BUFSIZE) { 1852 ret = 1; 1853 goto overflow; 1854 } 1855 1856 sh.objectid = key->objectid; 1857 sh.offset = key->offset; 1858 sh.type = key->type; 1859 sh.len = item_len; 1860 sh.transid = found_transid; 1861 1862 /* copy search result header */ 1863 memcpy(buf + *sk_offset, &sh, sizeof(sh)); 1864 *sk_offset += sizeof(sh); 1865 1866 if (item_len) { 1867 char *p = buf + *sk_offset; 1868 /* copy the item */ 1869 read_extent_buffer(leaf, p, 1870 item_off, item_len); 1871 *sk_offset += item_len; 1872 } 1873 (*num_found)++; 1874 1875 if (*num_found >= sk->nr_items) 1876 break; 1877 } 1878 advance_key: 1879 ret = 0; 1880 if (key->offset < (u64)-1 && key->offset < sk->max_offset) 1881 key->offset++; 1882 else if (key->type < (u8)-1 && key->type < sk->max_type) { 1883 key->offset = 0; 1884 key->type++; 1885 } else if (key->objectid < (u64)-1 && key->objectid < sk->max_objectid) { 1886 key->offset = 0; 1887 key->type = 0; 1888 key->objectid++; 1889 } else 1890 ret = 1; 1891 overflow: 1892 return ret; 1893 } 1894 1895 static noinline int search_ioctl(struct inode *inode, 1896 struct btrfs_ioctl_search_args *args) 1897 { 1898 struct btrfs_root *root; 1899 struct btrfs_key key; 1900 struct btrfs_key max_key; 1901 struct btrfs_path *path; 1902 struct btrfs_ioctl_search_key *sk = &args->key; 1903 struct btrfs_fs_info *info = BTRFS_I(inode)->root->fs_info; 1904 int ret; 1905 int num_found = 0; 1906 unsigned long sk_offset = 0; 1907 1908 path = btrfs_alloc_path(); 1909 if (!path) 1910 return -ENOMEM; 1911 1912 if (sk->tree_id == 0) { 1913 /* search the root of the inode that was passed */ 1914 root = BTRFS_I(inode)->root; 1915 } else { 1916 key.objectid = sk->tree_id; 1917 key.type = BTRFS_ROOT_ITEM_KEY; 1918 key.offset = (u64)-1; 1919 root = btrfs_read_fs_root_no_name(info, &key); 1920 if (IS_ERR(root)) { 1921 printk(KERN_ERR "could not find root %llu\n", 1922 sk->tree_id); 1923 btrfs_free_path(path); 1924 return -ENOENT; 1925 } 1926 } 1927 1928 key.objectid = sk->min_objectid; 1929 key.type = sk->min_type; 1930 key.offset = sk->min_offset; 1931 1932 max_key.objectid = sk->max_objectid; 1933 max_key.type = sk->max_type; 1934 max_key.offset = sk->max_offset; 1935 1936 path->keep_locks = 1; 1937 1938 while(1) { 1939 ret = btrfs_search_forward(root, &key, &max_key, path, 1940 sk->min_transid); 1941 if (ret != 0) { 1942 if (ret > 0) 1943 ret = 0; 1944 goto err; 1945 } 1946 ret = copy_to_sk(root, path, &key, sk, args->buf, 1947 &sk_offset, &num_found); 1948 btrfs_release_path(path); 1949 if (ret || num_found >= sk->nr_items) 1950 break; 1951 1952 } 1953 ret = 0; 1954 err: 1955 sk->nr_items = num_found; 1956 btrfs_free_path(path); 1957 return ret; 1958 } 1959 1960 static noinline int btrfs_ioctl_tree_search(struct file *file, 1961 void __user *argp) 1962 { 1963 struct btrfs_ioctl_search_args *args; 1964 struct inode *inode; 1965 int ret; 1966 1967 if (!capable(CAP_SYS_ADMIN)) 1968 return -EPERM; 1969 1970 args = memdup_user(argp, sizeof(*args)); 1971 if (IS_ERR(args)) 1972 return PTR_ERR(args); 1973 1974 inode = file_inode(file); 1975 ret = search_ioctl(inode, args); 1976 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 1977 ret = -EFAULT; 1978 kfree(args); 1979 return ret; 1980 } 1981 1982 /* 1983 * Search INODE_REFs to identify path name of 'dirid' directory 1984 * in a 'tree_id' tree. and sets path name to 'name'. 1985 */ 1986 static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info, 1987 u64 tree_id, u64 dirid, char *name) 1988 { 1989 struct btrfs_root *root; 1990 struct btrfs_key key; 1991 char *ptr; 1992 int ret = -1; 1993 int slot; 1994 int len; 1995 int total_len = 0; 1996 struct btrfs_inode_ref *iref; 1997 struct extent_buffer *l; 1998 struct btrfs_path *path; 1999 2000 if (dirid == BTRFS_FIRST_FREE_OBJECTID) { 2001 name[0]='\0'; 2002 return 0; 2003 } 2004 2005 path = btrfs_alloc_path(); 2006 if (!path) 2007 return -ENOMEM; 2008 2009 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 2010 2011 key.objectid = tree_id; 2012 key.type = BTRFS_ROOT_ITEM_KEY; 2013 key.offset = (u64)-1; 2014 root = btrfs_read_fs_root_no_name(info, &key); 2015 if (IS_ERR(root)) { 2016 printk(KERN_ERR "could not find root %llu\n", tree_id); 2017 ret = -ENOENT; 2018 goto out; 2019 } 2020 2021 key.objectid = dirid; 2022 key.type = BTRFS_INODE_REF_KEY; 2023 key.offset = (u64)-1; 2024 2025 while(1) { 2026 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2027 if (ret < 0) 2028 goto out; 2029 else if (ret > 0) { 2030 ret = btrfs_previous_item(root, path, dirid, 2031 BTRFS_INODE_REF_KEY); 2032 if (ret < 0) 2033 goto out; 2034 else if (ret > 0) { 2035 ret = -ENOENT; 2036 goto out; 2037 } 2038 } 2039 2040 l = path->nodes[0]; 2041 slot = path->slots[0]; 2042 btrfs_item_key_to_cpu(l, &key, slot); 2043 2044 iref = btrfs_item_ptr(l, slot, struct btrfs_inode_ref); 2045 len = btrfs_inode_ref_name_len(l, iref); 2046 ptr -= len + 1; 2047 total_len += len + 1; 2048 if (ptr < name) { 2049 ret = -ENAMETOOLONG; 2050 goto out; 2051 } 2052 2053 *(ptr + len) = '/'; 2054 read_extent_buffer(l, ptr,(unsigned long)(iref + 1), len); 2055 2056 if (key.offset == BTRFS_FIRST_FREE_OBJECTID) 2057 break; 2058 2059 btrfs_release_path(path); 2060 key.objectid = key.offset; 2061 key.offset = (u64)-1; 2062 dirid = key.objectid; 2063 } 2064 memmove(name, ptr, total_len); 2065 name[total_len]='\0'; 2066 ret = 0; 2067 out: 2068 btrfs_free_path(path); 2069 return ret; 2070 } 2071 2072 static noinline int btrfs_ioctl_ino_lookup(struct file *file, 2073 void __user *argp) 2074 { 2075 struct btrfs_ioctl_ino_lookup_args *args; 2076 struct inode *inode; 2077 int ret; 2078 2079 if (!capable(CAP_SYS_ADMIN)) 2080 return -EPERM; 2081 2082 args = memdup_user(argp, sizeof(*args)); 2083 if (IS_ERR(args)) 2084 return PTR_ERR(args); 2085 2086 inode = file_inode(file); 2087 2088 if (args->treeid == 0) 2089 args->treeid = BTRFS_I(inode)->root->root_key.objectid; 2090 2091 ret = btrfs_search_path_in_tree(BTRFS_I(inode)->root->fs_info, 2092 args->treeid, args->objectid, 2093 args->name); 2094 2095 if (ret == 0 && copy_to_user(argp, args, sizeof(*args))) 2096 ret = -EFAULT; 2097 2098 kfree(args); 2099 return ret; 2100 } 2101 2102 static noinline int btrfs_ioctl_snap_destroy(struct file *file, 2103 void __user *arg) 2104 { 2105 struct dentry *parent = fdentry(file); 2106 struct dentry *dentry; 2107 struct inode *dir = parent->d_inode; 2108 struct inode *inode; 2109 struct btrfs_root *root = BTRFS_I(dir)->root; 2110 struct btrfs_root *dest = NULL; 2111 struct btrfs_ioctl_vol_args *vol_args; 2112 struct btrfs_trans_handle *trans; 2113 struct btrfs_block_rsv block_rsv; 2114 u64 qgroup_reserved; 2115 int namelen; 2116 int ret; 2117 int err = 0; 2118 2119 vol_args = memdup_user(arg, sizeof(*vol_args)); 2120 if (IS_ERR(vol_args)) 2121 return PTR_ERR(vol_args); 2122 2123 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2124 namelen = strlen(vol_args->name); 2125 if (strchr(vol_args->name, '/') || 2126 strncmp(vol_args->name, "..", namelen) == 0) { 2127 err = -EINVAL; 2128 goto out; 2129 } 2130 2131 err = mnt_want_write_file(file); 2132 if (err) 2133 goto out; 2134 2135 err = mutex_lock_killable_nested(&dir->i_mutex, I_MUTEX_PARENT); 2136 if (err == -EINTR) 2137 goto out; 2138 dentry = lookup_one_len(vol_args->name, parent, namelen); 2139 if (IS_ERR(dentry)) { 2140 err = PTR_ERR(dentry); 2141 goto out_unlock_dir; 2142 } 2143 2144 if (!dentry->d_inode) { 2145 err = -ENOENT; 2146 goto out_dput; 2147 } 2148 2149 inode = dentry->d_inode; 2150 dest = BTRFS_I(inode)->root; 2151 if (!capable(CAP_SYS_ADMIN)){ 2152 /* 2153 * Regular user. Only allow this with a special mount 2154 * option, when the user has write+exec access to the 2155 * subvol root, and when rmdir(2) would have been 2156 * allowed. 2157 * 2158 * Note that this is _not_ check that the subvol is 2159 * empty or doesn't contain data that we wouldn't 2160 * otherwise be able to delete. 2161 * 2162 * Users who want to delete empty subvols should try 2163 * rmdir(2). 2164 */ 2165 err = -EPERM; 2166 if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED)) 2167 goto out_dput; 2168 2169 /* 2170 * Do not allow deletion if the parent dir is the same 2171 * as the dir to be deleted. That means the ioctl 2172 * must be called on the dentry referencing the root 2173 * of the subvol, not a random directory contained 2174 * within it. 2175 */ 2176 err = -EINVAL; 2177 if (root == dest) 2178 goto out_dput; 2179 2180 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2181 if (err) 2182 goto out_dput; 2183 } 2184 2185 /* check if subvolume may be deleted by a user */ 2186 err = btrfs_may_delete(dir, dentry, 1); 2187 if (err) 2188 goto out_dput; 2189 2190 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2191 err = -EINVAL; 2192 goto out_dput; 2193 } 2194 2195 mutex_lock(&inode->i_mutex); 2196 err = d_invalidate(dentry); 2197 if (err) 2198 goto out_unlock; 2199 2200 down_write(&root->fs_info->subvol_sem); 2201 2202 err = may_destroy_subvol(dest); 2203 if (err) 2204 goto out_up_write; 2205 2206 btrfs_init_block_rsv(&block_rsv, BTRFS_BLOCK_RSV_TEMP); 2207 /* 2208 * One for dir inode, two for dir entries, two for root 2209 * ref/backref. 2210 */ 2211 err = btrfs_subvolume_reserve_metadata(root, &block_rsv, 2212 5, &qgroup_reserved, true); 2213 if (err) 2214 goto out_up_write; 2215 2216 trans = btrfs_start_transaction(root, 0); 2217 if (IS_ERR(trans)) { 2218 err = PTR_ERR(trans); 2219 goto out_release; 2220 } 2221 trans->block_rsv = &block_rsv; 2222 trans->bytes_reserved = block_rsv.size; 2223 2224 ret = btrfs_unlink_subvol(trans, root, dir, 2225 dest->root_key.objectid, 2226 dentry->d_name.name, 2227 dentry->d_name.len); 2228 if (ret) { 2229 err = ret; 2230 btrfs_abort_transaction(trans, root, ret); 2231 goto out_end_trans; 2232 } 2233 2234 btrfs_record_root_in_trans(trans, dest); 2235 2236 memset(&dest->root_item.drop_progress, 0, 2237 sizeof(dest->root_item.drop_progress)); 2238 dest->root_item.drop_level = 0; 2239 btrfs_set_root_refs(&dest->root_item, 0); 2240 2241 if (!xchg(&dest->orphan_item_inserted, 1)) { 2242 ret = btrfs_insert_orphan_item(trans, 2243 root->fs_info->tree_root, 2244 dest->root_key.objectid); 2245 if (ret) { 2246 btrfs_abort_transaction(trans, root, ret); 2247 err = ret; 2248 goto out_end_trans; 2249 } 2250 } 2251 2252 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 2253 dest->root_item.uuid, BTRFS_UUID_KEY_SUBVOL, 2254 dest->root_key.objectid); 2255 if (ret && ret != -ENOENT) { 2256 btrfs_abort_transaction(trans, root, ret); 2257 err = ret; 2258 goto out_end_trans; 2259 } 2260 if (!btrfs_is_empty_uuid(dest->root_item.received_uuid)) { 2261 ret = btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 2262 dest->root_item.received_uuid, 2263 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 2264 dest->root_key.objectid); 2265 if (ret && ret != -ENOENT) { 2266 btrfs_abort_transaction(trans, root, ret); 2267 err = ret; 2268 goto out_end_trans; 2269 } 2270 } 2271 2272 out_end_trans: 2273 trans->block_rsv = NULL; 2274 trans->bytes_reserved = 0; 2275 ret = btrfs_end_transaction(trans, root); 2276 if (ret && !err) 2277 err = ret; 2278 inode->i_flags |= S_DEAD; 2279 out_release: 2280 btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved); 2281 out_up_write: 2282 up_write(&root->fs_info->subvol_sem); 2283 out_unlock: 2284 mutex_unlock(&inode->i_mutex); 2285 if (!err) { 2286 shrink_dcache_sb(root->fs_info->sb); 2287 btrfs_invalidate_inodes(dest); 2288 d_delete(dentry); 2289 2290 /* the last ref */ 2291 if (dest->cache_inode) { 2292 iput(dest->cache_inode); 2293 dest->cache_inode = NULL; 2294 } 2295 } 2296 out_dput: 2297 dput(dentry); 2298 out_unlock_dir: 2299 mutex_unlock(&dir->i_mutex); 2300 mnt_drop_write_file(file); 2301 out: 2302 kfree(vol_args); 2303 return err; 2304 } 2305 2306 static int btrfs_ioctl_defrag(struct file *file, void __user *argp) 2307 { 2308 struct inode *inode = file_inode(file); 2309 struct btrfs_root *root = BTRFS_I(inode)->root; 2310 struct btrfs_ioctl_defrag_range_args *range; 2311 int ret; 2312 2313 ret = mnt_want_write_file(file); 2314 if (ret) 2315 return ret; 2316 2317 if (btrfs_root_readonly(root)) { 2318 ret = -EROFS; 2319 goto out; 2320 } 2321 2322 switch (inode->i_mode & S_IFMT) { 2323 case S_IFDIR: 2324 if (!capable(CAP_SYS_ADMIN)) { 2325 ret = -EPERM; 2326 goto out; 2327 } 2328 ret = btrfs_defrag_root(root); 2329 if (ret) 2330 goto out; 2331 ret = btrfs_defrag_root(root->fs_info->extent_root); 2332 break; 2333 case S_IFREG: 2334 if (!(file->f_mode & FMODE_WRITE)) { 2335 ret = -EINVAL; 2336 goto out; 2337 } 2338 2339 range = kzalloc(sizeof(*range), GFP_KERNEL); 2340 if (!range) { 2341 ret = -ENOMEM; 2342 goto out; 2343 } 2344 2345 if (argp) { 2346 if (copy_from_user(range, argp, 2347 sizeof(*range))) { 2348 ret = -EFAULT; 2349 kfree(range); 2350 goto out; 2351 } 2352 /* compression requires us to start the IO */ 2353 if ((range->flags & BTRFS_DEFRAG_RANGE_COMPRESS)) { 2354 range->flags |= BTRFS_DEFRAG_RANGE_START_IO; 2355 range->extent_thresh = (u32)-1; 2356 } 2357 } else { 2358 /* the rest are all set to zero by kzalloc */ 2359 range->len = (u64)-1; 2360 } 2361 ret = btrfs_defrag_file(file_inode(file), file, 2362 range, 0, 0); 2363 if (ret > 0) 2364 ret = 0; 2365 kfree(range); 2366 break; 2367 default: 2368 ret = -EINVAL; 2369 } 2370 out: 2371 mnt_drop_write_file(file); 2372 return ret; 2373 } 2374 2375 static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg) 2376 { 2377 struct btrfs_ioctl_vol_args *vol_args; 2378 int ret; 2379 2380 if (!capable(CAP_SYS_ADMIN)) 2381 return -EPERM; 2382 2383 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2384 1)) { 2385 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2386 return -EINVAL; 2387 } 2388 2389 mutex_lock(&root->fs_info->volume_mutex); 2390 vol_args = memdup_user(arg, sizeof(*vol_args)); 2391 if (IS_ERR(vol_args)) { 2392 ret = PTR_ERR(vol_args); 2393 goto out; 2394 } 2395 2396 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2397 ret = btrfs_init_new_device(root, vol_args->name); 2398 2399 kfree(vol_args); 2400 out: 2401 mutex_unlock(&root->fs_info->volume_mutex); 2402 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2403 return ret; 2404 } 2405 2406 static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg) 2407 { 2408 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 2409 struct btrfs_ioctl_vol_args *vol_args; 2410 int ret; 2411 2412 if (!capable(CAP_SYS_ADMIN)) 2413 return -EPERM; 2414 2415 ret = mnt_want_write_file(file); 2416 if (ret) 2417 return ret; 2418 2419 vol_args = memdup_user(arg, sizeof(*vol_args)); 2420 if (IS_ERR(vol_args)) { 2421 ret = PTR_ERR(vol_args); 2422 goto out; 2423 } 2424 2425 vol_args->name[BTRFS_PATH_NAME_MAX] = '\0'; 2426 2427 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2428 1)) { 2429 ret = BTRFS_ERROR_DEV_EXCL_RUN_IN_PROGRESS; 2430 goto out; 2431 } 2432 2433 mutex_lock(&root->fs_info->volume_mutex); 2434 ret = btrfs_rm_device(root, vol_args->name); 2435 mutex_unlock(&root->fs_info->volume_mutex); 2436 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2437 2438 out: 2439 kfree(vol_args); 2440 mnt_drop_write_file(file); 2441 return ret; 2442 } 2443 2444 static long btrfs_ioctl_fs_info(struct btrfs_root *root, void __user *arg) 2445 { 2446 struct btrfs_ioctl_fs_info_args *fi_args; 2447 struct btrfs_device *device; 2448 struct btrfs_device *next; 2449 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2450 int ret = 0; 2451 2452 if (!capable(CAP_SYS_ADMIN)) 2453 return -EPERM; 2454 2455 fi_args = kzalloc(sizeof(*fi_args), GFP_KERNEL); 2456 if (!fi_args) 2457 return -ENOMEM; 2458 2459 fi_args->num_devices = fs_devices->num_devices; 2460 memcpy(&fi_args->fsid, root->fs_info->fsid, sizeof(fi_args->fsid)); 2461 2462 mutex_lock(&fs_devices->device_list_mutex); 2463 list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) { 2464 if (device->devid > fi_args->max_id) 2465 fi_args->max_id = device->devid; 2466 } 2467 mutex_unlock(&fs_devices->device_list_mutex); 2468 2469 if (copy_to_user(arg, fi_args, sizeof(*fi_args))) 2470 ret = -EFAULT; 2471 2472 kfree(fi_args); 2473 return ret; 2474 } 2475 2476 static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) 2477 { 2478 struct btrfs_ioctl_dev_info_args *di_args; 2479 struct btrfs_device *dev; 2480 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 2481 int ret = 0; 2482 char *s_uuid = NULL; 2483 2484 if (!capable(CAP_SYS_ADMIN)) 2485 return -EPERM; 2486 2487 di_args = memdup_user(arg, sizeof(*di_args)); 2488 if (IS_ERR(di_args)) 2489 return PTR_ERR(di_args); 2490 2491 if (!btrfs_is_empty_uuid(di_args->uuid)) 2492 s_uuid = di_args->uuid; 2493 2494 mutex_lock(&fs_devices->device_list_mutex); 2495 dev = btrfs_find_device(root->fs_info, di_args->devid, s_uuid, NULL); 2496 2497 if (!dev) { 2498 ret = -ENODEV; 2499 goto out; 2500 } 2501 2502 di_args->devid = dev->devid; 2503 di_args->bytes_used = dev->bytes_used; 2504 di_args->total_bytes = dev->total_bytes; 2505 memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); 2506 if (dev->name) { 2507 struct rcu_string *name; 2508 2509 rcu_read_lock(); 2510 name = rcu_dereference(dev->name); 2511 strncpy(di_args->path, name->str, sizeof(di_args->path)); 2512 rcu_read_unlock(); 2513 di_args->path[sizeof(di_args->path) - 1] = 0; 2514 } else { 2515 di_args->path[0] = '\0'; 2516 } 2517 2518 out: 2519 mutex_unlock(&fs_devices->device_list_mutex); 2520 if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) 2521 ret = -EFAULT; 2522 2523 kfree(di_args); 2524 return ret; 2525 } 2526 2527 static struct page *extent_same_get_page(struct inode *inode, u64 off) 2528 { 2529 struct page *page; 2530 pgoff_t index; 2531 struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree; 2532 2533 index = off >> PAGE_CACHE_SHIFT; 2534 2535 page = grab_cache_page(inode->i_mapping, index); 2536 if (!page) 2537 return NULL; 2538 2539 if (!PageUptodate(page)) { 2540 if (extent_read_full_page_nolock(tree, page, btrfs_get_extent, 2541 0)) 2542 return NULL; 2543 lock_page(page); 2544 if (!PageUptodate(page)) { 2545 unlock_page(page); 2546 page_cache_release(page); 2547 return NULL; 2548 } 2549 } 2550 unlock_page(page); 2551 2552 return page; 2553 } 2554 2555 static inline void lock_extent_range(struct inode *inode, u64 off, u64 len) 2556 { 2557 /* do any pending delalloc/csum calc on src, one way or 2558 another, and lock file content */ 2559 while (1) { 2560 struct btrfs_ordered_extent *ordered; 2561 lock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2562 ordered = btrfs_lookup_first_ordered_extent(inode, 2563 off + len - 1); 2564 if (!ordered && 2565 !test_range_bit(&BTRFS_I(inode)->io_tree, off, 2566 off + len - 1, EXTENT_DELALLOC, 0, NULL)) 2567 break; 2568 unlock_extent(&BTRFS_I(inode)->io_tree, off, off + len - 1); 2569 if (ordered) 2570 btrfs_put_ordered_extent(ordered); 2571 btrfs_wait_ordered_range(inode, off, len); 2572 } 2573 } 2574 2575 static void btrfs_double_unlock(struct inode *inode1, u64 loff1, 2576 struct inode *inode2, u64 loff2, u64 len) 2577 { 2578 unlock_extent(&BTRFS_I(inode1)->io_tree, loff1, loff1 + len - 1); 2579 unlock_extent(&BTRFS_I(inode2)->io_tree, loff2, loff2 + len - 1); 2580 2581 mutex_unlock(&inode1->i_mutex); 2582 mutex_unlock(&inode2->i_mutex); 2583 } 2584 2585 static void btrfs_double_lock(struct inode *inode1, u64 loff1, 2586 struct inode *inode2, u64 loff2, u64 len) 2587 { 2588 if (inode1 < inode2) { 2589 swap(inode1, inode2); 2590 swap(loff1, loff2); 2591 } 2592 2593 mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT); 2594 lock_extent_range(inode1, loff1, len); 2595 if (inode1 != inode2) { 2596 mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD); 2597 lock_extent_range(inode2, loff2, len); 2598 } 2599 } 2600 2601 static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst, 2602 u64 dst_loff, u64 len) 2603 { 2604 int ret = 0; 2605 struct page *src_page, *dst_page; 2606 unsigned int cmp_len = PAGE_CACHE_SIZE; 2607 void *addr, *dst_addr; 2608 2609 while (len) { 2610 if (len < PAGE_CACHE_SIZE) 2611 cmp_len = len; 2612 2613 src_page = extent_same_get_page(src, loff); 2614 if (!src_page) 2615 return -EINVAL; 2616 dst_page = extent_same_get_page(dst, dst_loff); 2617 if (!dst_page) { 2618 page_cache_release(src_page); 2619 return -EINVAL; 2620 } 2621 addr = kmap_atomic(src_page); 2622 dst_addr = kmap_atomic(dst_page); 2623 2624 flush_dcache_page(src_page); 2625 flush_dcache_page(dst_page); 2626 2627 if (memcmp(addr, dst_addr, cmp_len)) 2628 ret = BTRFS_SAME_DATA_DIFFERS; 2629 2630 kunmap_atomic(addr); 2631 kunmap_atomic(dst_addr); 2632 page_cache_release(src_page); 2633 page_cache_release(dst_page); 2634 2635 if (ret) 2636 break; 2637 2638 loff += cmp_len; 2639 dst_loff += cmp_len; 2640 len -= cmp_len; 2641 } 2642 2643 return ret; 2644 } 2645 2646 static int extent_same_check_offsets(struct inode *inode, u64 off, u64 len) 2647 { 2648 u64 bs = BTRFS_I(inode)->root->fs_info->sb->s_blocksize; 2649 2650 if (off + len > inode->i_size || off + len < off) 2651 return -EINVAL; 2652 /* Check that we are block aligned - btrfs_clone() requires this */ 2653 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs)) 2654 return -EINVAL; 2655 2656 return 0; 2657 } 2658 2659 static int btrfs_extent_same(struct inode *src, u64 loff, u64 len, 2660 struct inode *dst, u64 dst_loff) 2661 { 2662 int ret; 2663 2664 /* 2665 * btrfs_clone() can't handle extents in the same file 2666 * yet. Once that works, we can drop this check and replace it 2667 * with a check for the same inode, but overlapping extents. 2668 */ 2669 if (src == dst) 2670 return -EINVAL; 2671 2672 btrfs_double_lock(src, loff, dst, dst_loff, len); 2673 2674 ret = extent_same_check_offsets(src, loff, len); 2675 if (ret) 2676 goto out_unlock; 2677 2678 ret = extent_same_check_offsets(dst, dst_loff, len); 2679 if (ret) 2680 goto out_unlock; 2681 2682 /* don't make the dst file partly checksummed */ 2683 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 2684 (BTRFS_I(dst)->flags & BTRFS_INODE_NODATASUM)) { 2685 ret = -EINVAL; 2686 goto out_unlock; 2687 } 2688 2689 ret = btrfs_cmp_data(src, loff, dst, dst_loff, len); 2690 if (ret == 0) 2691 ret = btrfs_clone(src, dst, loff, len, len, dst_loff); 2692 2693 out_unlock: 2694 btrfs_double_unlock(src, loff, dst, dst_loff, len); 2695 2696 return ret; 2697 } 2698 2699 #define BTRFS_MAX_DEDUPE_LEN (16 * 1024 * 1024) 2700 2701 static long btrfs_ioctl_file_extent_same(struct file *file, 2702 void __user *argp) 2703 { 2704 struct btrfs_ioctl_same_args *args = argp; 2705 struct btrfs_ioctl_same_args same; 2706 struct btrfs_ioctl_same_extent_info info; 2707 struct inode *src = file->f_dentry->d_inode; 2708 struct file *dst_file = NULL; 2709 struct inode *dst; 2710 u64 off; 2711 u64 len; 2712 int i; 2713 int ret; 2714 u64 bs = BTRFS_I(src)->root->fs_info->sb->s_blocksize; 2715 bool is_admin = capable(CAP_SYS_ADMIN); 2716 2717 if (!(file->f_mode & FMODE_READ)) 2718 return -EINVAL; 2719 2720 ret = mnt_want_write_file(file); 2721 if (ret) 2722 return ret; 2723 2724 if (copy_from_user(&same, 2725 (struct btrfs_ioctl_same_args __user *)argp, 2726 sizeof(same))) { 2727 ret = -EFAULT; 2728 goto out; 2729 } 2730 2731 off = same.logical_offset; 2732 len = same.length; 2733 2734 /* 2735 * Limit the total length we will dedupe for each operation. 2736 * This is intended to bound the total time spent in this 2737 * ioctl to something sane. 2738 */ 2739 if (len > BTRFS_MAX_DEDUPE_LEN) 2740 len = BTRFS_MAX_DEDUPE_LEN; 2741 2742 if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) { 2743 /* 2744 * Btrfs does not support blocksize < page_size. As a 2745 * result, btrfs_cmp_data() won't correctly handle 2746 * this situation without an update. 2747 */ 2748 ret = -EINVAL; 2749 goto out; 2750 } 2751 2752 ret = -EISDIR; 2753 if (S_ISDIR(src->i_mode)) 2754 goto out; 2755 2756 ret = -EACCES; 2757 if (!S_ISREG(src->i_mode)) 2758 goto out; 2759 2760 ret = 0; 2761 for (i = 0; i < same.dest_count; i++) { 2762 if (copy_from_user(&info, &args->info[i], sizeof(info))) { 2763 ret = -EFAULT; 2764 goto out; 2765 } 2766 2767 info.bytes_deduped = 0; 2768 2769 dst_file = fget(info.fd); 2770 if (!dst_file) { 2771 info.status = -EBADF; 2772 goto next; 2773 } 2774 2775 if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) { 2776 info.status = -EINVAL; 2777 goto next; 2778 } 2779 2780 info.status = -EXDEV; 2781 if (file->f_path.mnt != dst_file->f_path.mnt) 2782 goto next; 2783 2784 dst = dst_file->f_dentry->d_inode; 2785 if (src->i_sb != dst->i_sb) 2786 goto next; 2787 2788 if (S_ISDIR(dst->i_mode)) { 2789 info.status = -EISDIR; 2790 goto next; 2791 } 2792 2793 if (!S_ISREG(dst->i_mode)) { 2794 info.status = -EACCES; 2795 goto next; 2796 } 2797 2798 info.status = btrfs_extent_same(src, off, len, dst, 2799 info.logical_offset); 2800 if (info.status == 0) 2801 info.bytes_deduped += len; 2802 2803 next: 2804 if (dst_file) 2805 fput(dst_file); 2806 2807 if (__put_user_unaligned(info.status, &args->info[i].status) || 2808 __put_user_unaligned(info.bytes_deduped, 2809 &args->info[i].bytes_deduped)) { 2810 ret = -EFAULT; 2811 goto out; 2812 } 2813 } 2814 2815 out: 2816 mnt_drop_write_file(file); 2817 return ret; 2818 } 2819 2820 /** 2821 * btrfs_clone() - clone a range from inode file to another 2822 * 2823 * @src: Inode to clone from 2824 * @inode: Inode to clone to 2825 * @off: Offset within source to start clone from 2826 * @olen: Original length, passed by user, of range to clone 2827 * @olen_aligned: Block-aligned value of olen, extent_same uses 2828 * identical values here 2829 * @destoff: Offset within @inode to start clone 2830 */ 2831 static int btrfs_clone(struct inode *src, struct inode *inode, 2832 u64 off, u64 olen, u64 olen_aligned, u64 destoff) 2833 { 2834 struct btrfs_root *root = BTRFS_I(inode)->root; 2835 struct btrfs_path *path = NULL; 2836 struct extent_buffer *leaf; 2837 struct btrfs_trans_handle *trans; 2838 char *buf = NULL; 2839 struct btrfs_key key; 2840 u32 nritems; 2841 int slot; 2842 int ret; 2843 u64 len = olen_aligned; 2844 2845 ret = -ENOMEM; 2846 buf = vmalloc(btrfs_level_size(root, 0)); 2847 if (!buf) 2848 return ret; 2849 2850 path = btrfs_alloc_path(); 2851 if (!path) { 2852 vfree(buf); 2853 return ret; 2854 } 2855 2856 path->reada = 2; 2857 /* clone data */ 2858 key.objectid = btrfs_ino(src); 2859 key.type = BTRFS_EXTENT_DATA_KEY; 2860 key.offset = 0; 2861 2862 while (1) { 2863 /* 2864 * note the key will change type as we walk through the 2865 * tree. 2866 */ 2867 ret = btrfs_search_slot(NULL, BTRFS_I(src)->root, &key, path, 2868 0, 0); 2869 if (ret < 0) 2870 goto out; 2871 2872 nritems = btrfs_header_nritems(path->nodes[0]); 2873 if (path->slots[0] >= nritems) { 2874 ret = btrfs_next_leaf(BTRFS_I(src)->root, path); 2875 if (ret < 0) 2876 goto out; 2877 if (ret > 0) 2878 break; 2879 nritems = btrfs_header_nritems(path->nodes[0]); 2880 } 2881 leaf = path->nodes[0]; 2882 slot = path->slots[0]; 2883 2884 btrfs_item_key_to_cpu(leaf, &key, slot); 2885 if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY || 2886 key.objectid != btrfs_ino(src)) 2887 break; 2888 2889 if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) { 2890 struct btrfs_file_extent_item *extent; 2891 int type; 2892 u32 size; 2893 struct btrfs_key new_key; 2894 u64 disko = 0, diskl = 0; 2895 u64 datao = 0, datal = 0; 2896 u8 comp; 2897 u64 endoff; 2898 2899 size = btrfs_item_size_nr(leaf, slot); 2900 read_extent_buffer(leaf, buf, 2901 btrfs_item_ptr_offset(leaf, slot), 2902 size); 2903 2904 extent = btrfs_item_ptr(leaf, slot, 2905 struct btrfs_file_extent_item); 2906 comp = btrfs_file_extent_compression(leaf, extent); 2907 type = btrfs_file_extent_type(leaf, extent); 2908 if (type == BTRFS_FILE_EXTENT_REG || 2909 type == BTRFS_FILE_EXTENT_PREALLOC) { 2910 disko = btrfs_file_extent_disk_bytenr(leaf, 2911 extent); 2912 diskl = btrfs_file_extent_disk_num_bytes(leaf, 2913 extent); 2914 datao = btrfs_file_extent_offset(leaf, extent); 2915 datal = btrfs_file_extent_num_bytes(leaf, 2916 extent); 2917 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 2918 /* take upper bound, may be compressed */ 2919 datal = btrfs_file_extent_ram_bytes(leaf, 2920 extent); 2921 } 2922 btrfs_release_path(path); 2923 2924 if (key.offset + datal <= off || 2925 key.offset >= off + len - 1) 2926 goto next; 2927 2928 memcpy(&new_key, &key, sizeof(new_key)); 2929 new_key.objectid = btrfs_ino(inode); 2930 if (off <= key.offset) 2931 new_key.offset = key.offset + destoff - off; 2932 else 2933 new_key.offset = destoff; 2934 2935 /* 2936 * 1 - adjusting old extent (we may have to split it) 2937 * 1 - add new extent 2938 * 1 - inode update 2939 */ 2940 trans = btrfs_start_transaction(root, 3); 2941 if (IS_ERR(trans)) { 2942 ret = PTR_ERR(trans); 2943 goto out; 2944 } 2945 2946 if (type == BTRFS_FILE_EXTENT_REG || 2947 type == BTRFS_FILE_EXTENT_PREALLOC) { 2948 /* 2949 * a | --- range to clone ---| b 2950 * | ------------- extent ------------- | 2951 */ 2952 2953 /* substract range b */ 2954 if (key.offset + datal > off + len) 2955 datal = off + len - key.offset; 2956 2957 /* substract range a */ 2958 if (off > key.offset) { 2959 datao += off - key.offset; 2960 datal -= off - key.offset; 2961 } 2962 2963 ret = btrfs_drop_extents(trans, root, inode, 2964 new_key.offset, 2965 new_key.offset + datal, 2966 1); 2967 if (ret) { 2968 btrfs_abort_transaction(trans, root, 2969 ret); 2970 btrfs_end_transaction(trans, root); 2971 goto out; 2972 } 2973 2974 ret = btrfs_insert_empty_item(trans, root, path, 2975 &new_key, size); 2976 if (ret) { 2977 btrfs_abort_transaction(trans, root, 2978 ret); 2979 btrfs_end_transaction(trans, root); 2980 goto out; 2981 } 2982 2983 leaf = path->nodes[0]; 2984 slot = path->slots[0]; 2985 write_extent_buffer(leaf, buf, 2986 btrfs_item_ptr_offset(leaf, slot), 2987 size); 2988 2989 extent = btrfs_item_ptr(leaf, slot, 2990 struct btrfs_file_extent_item); 2991 2992 /* disko == 0 means it's a hole */ 2993 if (!disko) 2994 datao = 0; 2995 2996 btrfs_set_file_extent_offset(leaf, extent, 2997 datao); 2998 btrfs_set_file_extent_num_bytes(leaf, extent, 2999 datal); 3000 if (disko) { 3001 inode_add_bytes(inode, datal); 3002 ret = btrfs_inc_extent_ref(trans, root, 3003 disko, diskl, 0, 3004 root->root_key.objectid, 3005 btrfs_ino(inode), 3006 new_key.offset - datao, 3007 0); 3008 if (ret) { 3009 btrfs_abort_transaction(trans, 3010 root, 3011 ret); 3012 btrfs_end_transaction(trans, 3013 root); 3014 goto out; 3015 3016 } 3017 } 3018 } else if (type == BTRFS_FILE_EXTENT_INLINE) { 3019 u64 skip = 0; 3020 u64 trim = 0; 3021 if (off > key.offset) { 3022 skip = off - key.offset; 3023 new_key.offset += skip; 3024 } 3025 3026 if (key.offset + datal > off + len) 3027 trim = key.offset + datal - (off + len); 3028 3029 if (comp && (skip || trim)) { 3030 ret = -EINVAL; 3031 btrfs_end_transaction(trans, root); 3032 goto out; 3033 } 3034 size -= skip + trim; 3035 datal -= skip + trim; 3036 3037 ret = btrfs_drop_extents(trans, root, inode, 3038 new_key.offset, 3039 new_key.offset + datal, 3040 1); 3041 if (ret) { 3042 btrfs_abort_transaction(trans, root, 3043 ret); 3044 btrfs_end_transaction(trans, root); 3045 goto out; 3046 } 3047 3048 ret = btrfs_insert_empty_item(trans, root, path, 3049 &new_key, size); 3050 if (ret) { 3051 btrfs_abort_transaction(trans, root, 3052 ret); 3053 btrfs_end_transaction(trans, root); 3054 goto out; 3055 } 3056 3057 if (skip) { 3058 u32 start = 3059 btrfs_file_extent_calc_inline_size(0); 3060 memmove(buf+start, buf+start+skip, 3061 datal); 3062 } 3063 3064 leaf = path->nodes[0]; 3065 slot = path->slots[0]; 3066 write_extent_buffer(leaf, buf, 3067 btrfs_item_ptr_offset(leaf, slot), 3068 size); 3069 inode_add_bytes(inode, datal); 3070 } 3071 3072 btrfs_mark_buffer_dirty(leaf); 3073 btrfs_release_path(path); 3074 3075 inode_inc_iversion(inode); 3076 inode->i_mtime = inode->i_ctime = CURRENT_TIME; 3077 3078 /* 3079 * we round up to the block size at eof when 3080 * determining which extents to clone above, 3081 * but shouldn't round up the file size 3082 */ 3083 endoff = new_key.offset + datal; 3084 if (endoff > destoff+olen) 3085 endoff = destoff+olen; 3086 if (endoff > inode->i_size) 3087 btrfs_i_size_write(inode, endoff); 3088 3089 ret = btrfs_update_inode(trans, root, inode); 3090 if (ret) { 3091 btrfs_abort_transaction(trans, root, ret); 3092 btrfs_end_transaction(trans, root); 3093 goto out; 3094 } 3095 ret = btrfs_end_transaction(trans, root); 3096 } 3097 next: 3098 btrfs_release_path(path); 3099 key.offset++; 3100 } 3101 ret = 0; 3102 3103 out: 3104 btrfs_release_path(path); 3105 btrfs_free_path(path); 3106 vfree(buf); 3107 return ret; 3108 } 3109 3110 static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, 3111 u64 off, u64 olen, u64 destoff) 3112 { 3113 struct inode *inode = fdentry(file)->d_inode; 3114 struct btrfs_root *root = BTRFS_I(inode)->root; 3115 struct fd src_file; 3116 struct inode *src; 3117 int ret; 3118 u64 len = olen; 3119 u64 bs = root->fs_info->sb->s_blocksize; 3120 int same_inode = 0; 3121 3122 /* 3123 * TODO: 3124 * - split compressed inline extents. annoying: we need to 3125 * decompress into destination's address_space (the file offset 3126 * may change, so source mapping won't do), then recompress (or 3127 * otherwise reinsert) a subrange. 3128 * - allow ranges within the same file to be cloned (provided 3129 * they don't overlap)? 3130 */ 3131 3132 /* the destination must be opened for writing */ 3133 if (!(file->f_mode & FMODE_WRITE) || (file->f_flags & O_APPEND)) 3134 return -EINVAL; 3135 3136 if (btrfs_root_readonly(root)) 3137 return -EROFS; 3138 3139 ret = mnt_want_write_file(file); 3140 if (ret) 3141 return ret; 3142 3143 src_file = fdget(srcfd); 3144 if (!src_file.file) { 3145 ret = -EBADF; 3146 goto out_drop_write; 3147 } 3148 3149 ret = -EXDEV; 3150 if (src_file.file->f_path.mnt != file->f_path.mnt) 3151 goto out_fput; 3152 3153 src = file_inode(src_file.file); 3154 3155 ret = -EINVAL; 3156 if (src == inode) 3157 same_inode = 1; 3158 3159 /* the src must be open for reading */ 3160 if (!(src_file.file->f_mode & FMODE_READ)) 3161 goto out_fput; 3162 3163 /* don't make the dst file partly checksummed */ 3164 if ((BTRFS_I(src)->flags & BTRFS_INODE_NODATASUM) != 3165 (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) 3166 goto out_fput; 3167 3168 ret = -EISDIR; 3169 if (S_ISDIR(src->i_mode) || S_ISDIR(inode->i_mode)) 3170 goto out_fput; 3171 3172 ret = -EXDEV; 3173 if (src->i_sb != inode->i_sb) 3174 goto out_fput; 3175 3176 if (!same_inode) { 3177 if (inode < src) { 3178 mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT); 3179 mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD); 3180 } else { 3181 mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT); 3182 mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD); 3183 } 3184 } else { 3185 mutex_lock(&src->i_mutex); 3186 } 3187 3188 /* determine range to clone */ 3189 ret = -EINVAL; 3190 if (off + len > src->i_size || off + len < off) 3191 goto out_unlock; 3192 if (len == 0) 3193 olen = len = src->i_size - off; 3194 /* if we extend to eof, continue to block boundary */ 3195 if (off + len == src->i_size) 3196 len = ALIGN(src->i_size, bs) - off; 3197 3198 /* verify the end result is block aligned */ 3199 if (!IS_ALIGNED(off, bs) || !IS_ALIGNED(off + len, bs) || 3200 !IS_ALIGNED(destoff, bs)) 3201 goto out_unlock; 3202 3203 /* verify if ranges are overlapped within the same file */ 3204 if (same_inode) { 3205 if (destoff + len > off && destoff < off + len) 3206 goto out_unlock; 3207 } 3208 3209 if (destoff > inode->i_size) { 3210 ret = btrfs_cont_expand(inode, inode->i_size, destoff); 3211 if (ret) 3212 goto out_unlock; 3213 } 3214 3215 /* truncate page cache pages from target inode range */ 3216 truncate_inode_pages_range(&inode->i_data, destoff, 3217 PAGE_CACHE_ALIGN(destoff + len) - 1); 3218 3219 lock_extent_range(src, off, len); 3220 3221 ret = btrfs_clone(src, inode, off, olen, len, destoff); 3222 3223 unlock_extent(&BTRFS_I(src)->io_tree, off, off + len - 1); 3224 out_unlock: 3225 mutex_unlock(&src->i_mutex); 3226 if (!same_inode) 3227 mutex_unlock(&inode->i_mutex); 3228 out_fput: 3229 fdput(src_file); 3230 out_drop_write: 3231 mnt_drop_write_file(file); 3232 return ret; 3233 } 3234 3235 static long btrfs_ioctl_clone_range(struct file *file, void __user *argp) 3236 { 3237 struct btrfs_ioctl_clone_range_args args; 3238 3239 if (copy_from_user(&args, argp, sizeof(args))) 3240 return -EFAULT; 3241 return btrfs_ioctl_clone(file, args.src_fd, args.src_offset, 3242 args.src_length, args.dest_offset); 3243 } 3244 3245 /* 3246 * there are many ways the trans_start and trans_end ioctls can lead 3247 * to deadlocks. They should only be used by applications that 3248 * basically own the machine, and have a very in depth understanding 3249 * of all the possible deadlocks and enospc problems. 3250 */ 3251 static long btrfs_ioctl_trans_start(struct file *file) 3252 { 3253 struct inode *inode = file_inode(file); 3254 struct btrfs_root *root = BTRFS_I(inode)->root; 3255 struct btrfs_trans_handle *trans; 3256 int ret; 3257 3258 ret = -EPERM; 3259 if (!capable(CAP_SYS_ADMIN)) 3260 goto out; 3261 3262 ret = -EINPROGRESS; 3263 if (file->private_data) 3264 goto out; 3265 3266 ret = -EROFS; 3267 if (btrfs_root_readonly(root)) 3268 goto out; 3269 3270 ret = mnt_want_write_file(file); 3271 if (ret) 3272 goto out; 3273 3274 atomic_inc(&root->fs_info->open_ioctl_trans); 3275 3276 ret = -ENOMEM; 3277 trans = btrfs_start_ioctl_transaction(root); 3278 if (IS_ERR(trans)) 3279 goto out_drop; 3280 3281 file->private_data = trans; 3282 return 0; 3283 3284 out_drop: 3285 atomic_dec(&root->fs_info->open_ioctl_trans); 3286 mnt_drop_write_file(file); 3287 out: 3288 return ret; 3289 } 3290 3291 static long btrfs_ioctl_default_subvol(struct file *file, void __user *argp) 3292 { 3293 struct inode *inode = file_inode(file); 3294 struct btrfs_root *root = BTRFS_I(inode)->root; 3295 struct btrfs_root *new_root; 3296 struct btrfs_dir_item *di; 3297 struct btrfs_trans_handle *trans; 3298 struct btrfs_path *path; 3299 struct btrfs_key location; 3300 struct btrfs_disk_key disk_key; 3301 u64 objectid = 0; 3302 u64 dir_id; 3303 int ret; 3304 3305 if (!capable(CAP_SYS_ADMIN)) 3306 return -EPERM; 3307 3308 ret = mnt_want_write_file(file); 3309 if (ret) 3310 return ret; 3311 3312 if (copy_from_user(&objectid, argp, sizeof(objectid))) { 3313 ret = -EFAULT; 3314 goto out; 3315 } 3316 3317 if (!objectid) 3318 objectid = root->root_key.objectid; 3319 3320 location.objectid = objectid; 3321 location.type = BTRFS_ROOT_ITEM_KEY; 3322 location.offset = (u64)-1; 3323 3324 new_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 3325 if (IS_ERR(new_root)) { 3326 ret = PTR_ERR(new_root); 3327 goto out; 3328 } 3329 3330 path = btrfs_alloc_path(); 3331 if (!path) { 3332 ret = -ENOMEM; 3333 goto out; 3334 } 3335 path->leave_spinning = 1; 3336 3337 trans = btrfs_start_transaction(root, 1); 3338 if (IS_ERR(trans)) { 3339 btrfs_free_path(path); 3340 ret = PTR_ERR(trans); 3341 goto out; 3342 } 3343 3344 dir_id = btrfs_super_root_dir(root->fs_info->super_copy); 3345 di = btrfs_lookup_dir_item(trans, root->fs_info->tree_root, path, 3346 dir_id, "default", 7, 1); 3347 if (IS_ERR_OR_NULL(di)) { 3348 btrfs_free_path(path); 3349 btrfs_end_transaction(trans, root); 3350 printk(KERN_ERR "Umm, you don't have the default dir item, " 3351 "this isn't going to work\n"); 3352 ret = -ENOENT; 3353 goto out; 3354 } 3355 3356 btrfs_cpu_key_to_disk(&disk_key, &new_root->root_key); 3357 btrfs_set_dir_item_key(path->nodes[0], di, &disk_key); 3358 btrfs_mark_buffer_dirty(path->nodes[0]); 3359 btrfs_free_path(path); 3360 3361 btrfs_set_fs_incompat(root->fs_info, DEFAULT_SUBVOL); 3362 btrfs_end_transaction(trans, root); 3363 out: 3364 mnt_drop_write_file(file); 3365 return ret; 3366 } 3367 3368 void btrfs_get_block_group_info(struct list_head *groups_list, 3369 struct btrfs_ioctl_space_info *space) 3370 { 3371 struct btrfs_block_group_cache *block_group; 3372 3373 space->total_bytes = 0; 3374 space->used_bytes = 0; 3375 space->flags = 0; 3376 list_for_each_entry(block_group, groups_list, list) { 3377 space->flags = block_group->flags; 3378 space->total_bytes += block_group->key.offset; 3379 space->used_bytes += 3380 btrfs_block_group_used(&block_group->item); 3381 } 3382 } 3383 3384 static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg) 3385 { 3386 struct btrfs_ioctl_space_args space_args; 3387 struct btrfs_ioctl_space_info space; 3388 struct btrfs_ioctl_space_info *dest; 3389 struct btrfs_ioctl_space_info *dest_orig; 3390 struct btrfs_ioctl_space_info __user *user_dest; 3391 struct btrfs_space_info *info; 3392 u64 types[] = {BTRFS_BLOCK_GROUP_DATA, 3393 BTRFS_BLOCK_GROUP_SYSTEM, 3394 BTRFS_BLOCK_GROUP_METADATA, 3395 BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA}; 3396 int num_types = 4; 3397 int alloc_size; 3398 int ret = 0; 3399 u64 slot_count = 0; 3400 int i, c; 3401 3402 if (copy_from_user(&space_args, 3403 (struct btrfs_ioctl_space_args __user *)arg, 3404 sizeof(space_args))) 3405 return -EFAULT; 3406 3407 for (i = 0; i < num_types; i++) { 3408 struct btrfs_space_info *tmp; 3409 3410 info = NULL; 3411 rcu_read_lock(); 3412 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 3413 list) { 3414 if (tmp->flags == types[i]) { 3415 info = tmp; 3416 break; 3417 } 3418 } 3419 rcu_read_unlock(); 3420 3421 if (!info) 3422 continue; 3423 3424 down_read(&info->groups_sem); 3425 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3426 if (!list_empty(&info->block_groups[c])) 3427 slot_count++; 3428 } 3429 up_read(&info->groups_sem); 3430 } 3431 3432 /* space_slots == 0 means they are asking for a count */ 3433 if (space_args.space_slots == 0) { 3434 space_args.total_spaces = slot_count; 3435 goto out; 3436 } 3437 3438 slot_count = min_t(u64, space_args.space_slots, slot_count); 3439 3440 alloc_size = sizeof(*dest) * slot_count; 3441 3442 /* we generally have at most 6 or so space infos, one for each raid 3443 * level. So, a whole page should be more than enough for everyone 3444 */ 3445 if (alloc_size > PAGE_CACHE_SIZE) 3446 return -ENOMEM; 3447 3448 space_args.total_spaces = 0; 3449 dest = kmalloc(alloc_size, GFP_NOFS); 3450 if (!dest) 3451 return -ENOMEM; 3452 dest_orig = dest; 3453 3454 /* now we have a buffer to copy into */ 3455 for (i = 0; i < num_types; i++) { 3456 struct btrfs_space_info *tmp; 3457 3458 if (!slot_count) 3459 break; 3460 3461 info = NULL; 3462 rcu_read_lock(); 3463 list_for_each_entry_rcu(tmp, &root->fs_info->space_info, 3464 list) { 3465 if (tmp->flags == types[i]) { 3466 info = tmp; 3467 break; 3468 } 3469 } 3470 rcu_read_unlock(); 3471 3472 if (!info) 3473 continue; 3474 down_read(&info->groups_sem); 3475 for (c = 0; c < BTRFS_NR_RAID_TYPES; c++) { 3476 if (!list_empty(&info->block_groups[c])) { 3477 btrfs_get_block_group_info( 3478 &info->block_groups[c], &space); 3479 memcpy(dest, &space, sizeof(space)); 3480 dest++; 3481 space_args.total_spaces++; 3482 slot_count--; 3483 } 3484 if (!slot_count) 3485 break; 3486 } 3487 up_read(&info->groups_sem); 3488 } 3489 3490 user_dest = (struct btrfs_ioctl_space_info __user *) 3491 (arg + sizeof(struct btrfs_ioctl_space_args)); 3492 3493 if (copy_to_user(user_dest, dest_orig, alloc_size)) 3494 ret = -EFAULT; 3495 3496 kfree(dest_orig); 3497 out: 3498 if (ret == 0 && copy_to_user(arg, &space_args, sizeof(space_args))) 3499 ret = -EFAULT; 3500 3501 return ret; 3502 } 3503 3504 /* 3505 * there are many ways the trans_start and trans_end ioctls can lead 3506 * to deadlocks. They should only be used by applications that 3507 * basically own the machine, and have a very in depth understanding 3508 * of all the possible deadlocks and enospc problems. 3509 */ 3510 long btrfs_ioctl_trans_end(struct file *file) 3511 { 3512 struct inode *inode = file_inode(file); 3513 struct btrfs_root *root = BTRFS_I(inode)->root; 3514 struct btrfs_trans_handle *trans; 3515 3516 trans = file->private_data; 3517 if (!trans) 3518 return -EINVAL; 3519 file->private_data = NULL; 3520 3521 btrfs_end_transaction(trans, root); 3522 3523 atomic_dec(&root->fs_info->open_ioctl_trans); 3524 3525 mnt_drop_write_file(file); 3526 return 0; 3527 } 3528 3529 static noinline long btrfs_ioctl_start_sync(struct btrfs_root *root, 3530 void __user *argp) 3531 { 3532 struct btrfs_trans_handle *trans; 3533 u64 transid; 3534 int ret; 3535 3536 trans = btrfs_attach_transaction_barrier(root); 3537 if (IS_ERR(trans)) { 3538 if (PTR_ERR(trans) != -ENOENT) 3539 return PTR_ERR(trans); 3540 3541 /* No running transaction, don't bother */ 3542 transid = root->fs_info->last_trans_committed; 3543 goto out; 3544 } 3545 transid = trans->transid; 3546 ret = btrfs_commit_transaction_async(trans, root, 0); 3547 if (ret) { 3548 btrfs_end_transaction(trans, root); 3549 return ret; 3550 } 3551 out: 3552 if (argp) 3553 if (copy_to_user(argp, &transid, sizeof(transid))) 3554 return -EFAULT; 3555 return 0; 3556 } 3557 3558 static noinline long btrfs_ioctl_wait_sync(struct btrfs_root *root, 3559 void __user *argp) 3560 { 3561 u64 transid; 3562 3563 if (argp) { 3564 if (copy_from_user(&transid, argp, sizeof(transid))) 3565 return -EFAULT; 3566 } else { 3567 transid = 0; /* current trans */ 3568 } 3569 return btrfs_wait_for_commit(root, transid); 3570 } 3571 3572 static long btrfs_ioctl_scrub(struct file *file, void __user *arg) 3573 { 3574 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 3575 struct btrfs_ioctl_scrub_args *sa; 3576 int ret; 3577 3578 if (!capable(CAP_SYS_ADMIN)) 3579 return -EPERM; 3580 3581 sa = memdup_user(arg, sizeof(*sa)); 3582 if (IS_ERR(sa)) 3583 return PTR_ERR(sa); 3584 3585 if (!(sa->flags & BTRFS_SCRUB_READONLY)) { 3586 ret = mnt_want_write_file(file); 3587 if (ret) 3588 goto out; 3589 } 3590 3591 ret = btrfs_scrub_dev(root->fs_info, sa->devid, sa->start, sa->end, 3592 &sa->progress, sa->flags & BTRFS_SCRUB_READONLY, 3593 0); 3594 3595 if (copy_to_user(arg, sa, sizeof(*sa))) 3596 ret = -EFAULT; 3597 3598 if (!(sa->flags & BTRFS_SCRUB_READONLY)) 3599 mnt_drop_write_file(file); 3600 out: 3601 kfree(sa); 3602 return ret; 3603 } 3604 3605 static long btrfs_ioctl_scrub_cancel(struct btrfs_root *root, void __user *arg) 3606 { 3607 if (!capable(CAP_SYS_ADMIN)) 3608 return -EPERM; 3609 3610 return btrfs_scrub_cancel(root->fs_info); 3611 } 3612 3613 static long btrfs_ioctl_scrub_progress(struct btrfs_root *root, 3614 void __user *arg) 3615 { 3616 struct btrfs_ioctl_scrub_args *sa; 3617 int ret; 3618 3619 if (!capable(CAP_SYS_ADMIN)) 3620 return -EPERM; 3621 3622 sa = memdup_user(arg, sizeof(*sa)); 3623 if (IS_ERR(sa)) 3624 return PTR_ERR(sa); 3625 3626 ret = btrfs_scrub_progress(root, sa->devid, &sa->progress); 3627 3628 if (copy_to_user(arg, sa, sizeof(*sa))) 3629 ret = -EFAULT; 3630 3631 kfree(sa); 3632 return ret; 3633 } 3634 3635 static long btrfs_ioctl_get_dev_stats(struct btrfs_root *root, 3636 void __user *arg) 3637 { 3638 struct btrfs_ioctl_get_dev_stats *sa; 3639 int ret; 3640 3641 sa = memdup_user(arg, sizeof(*sa)); 3642 if (IS_ERR(sa)) 3643 return PTR_ERR(sa); 3644 3645 if ((sa->flags & BTRFS_DEV_STATS_RESET) && !capable(CAP_SYS_ADMIN)) { 3646 kfree(sa); 3647 return -EPERM; 3648 } 3649 3650 ret = btrfs_get_dev_stats(root, sa); 3651 3652 if (copy_to_user(arg, sa, sizeof(*sa))) 3653 ret = -EFAULT; 3654 3655 kfree(sa); 3656 return ret; 3657 } 3658 3659 static long btrfs_ioctl_dev_replace(struct btrfs_root *root, void __user *arg) 3660 { 3661 struct btrfs_ioctl_dev_replace_args *p; 3662 int ret; 3663 3664 if (!capable(CAP_SYS_ADMIN)) 3665 return -EPERM; 3666 3667 p = memdup_user(arg, sizeof(*p)); 3668 if (IS_ERR(p)) 3669 return PTR_ERR(p); 3670 3671 switch (p->cmd) { 3672 case BTRFS_IOCTL_DEV_REPLACE_CMD_START: 3673 if (root->fs_info->sb->s_flags & MS_RDONLY) 3674 return -EROFS; 3675 3676 if (atomic_xchg( 3677 &root->fs_info->mutually_exclusive_operation_running, 3678 1)) { 3679 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 3680 ret = -EINPROGRESS; 3681 } else { 3682 ret = btrfs_dev_replace_start(root, p); 3683 atomic_set( 3684 &root->fs_info->mutually_exclusive_operation_running, 3685 0); 3686 } 3687 break; 3688 case BTRFS_IOCTL_DEV_REPLACE_CMD_STATUS: 3689 btrfs_dev_replace_status(root->fs_info, p); 3690 ret = 0; 3691 break; 3692 case BTRFS_IOCTL_DEV_REPLACE_CMD_CANCEL: 3693 ret = btrfs_dev_replace_cancel(root->fs_info, p); 3694 break; 3695 default: 3696 ret = -EINVAL; 3697 break; 3698 } 3699 3700 if (copy_to_user(arg, p, sizeof(*p))) 3701 ret = -EFAULT; 3702 3703 kfree(p); 3704 return ret; 3705 } 3706 3707 static long btrfs_ioctl_ino_to_path(struct btrfs_root *root, void __user *arg) 3708 { 3709 int ret = 0; 3710 int i; 3711 u64 rel_ptr; 3712 int size; 3713 struct btrfs_ioctl_ino_path_args *ipa = NULL; 3714 struct inode_fs_paths *ipath = NULL; 3715 struct btrfs_path *path; 3716 3717 if (!capable(CAP_DAC_READ_SEARCH)) 3718 return -EPERM; 3719 3720 path = btrfs_alloc_path(); 3721 if (!path) { 3722 ret = -ENOMEM; 3723 goto out; 3724 } 3725 3726 ipa = memdup_user(arg, sizeof(*ipa)); 3727 if (IS_ERR(ipa)) { 3728 ret = PTR_ERR(ipa); 3729 ipa = NULL; 3730 goto out; 3731 } 3732 3733 size = min_t(u32, ipa->size, 4096); 3734 ipath = init_ipath(size, root, path); 3735 if (IS_ERR(ipath)) { 3736 ret = PTR_ERR(ipath); 3737 ipath = NULL; 3738 goto out; 3739 } 3740 3741 ret = paths_from_inode(ipa->inum, ipath); 3742 if (ret < 0) 3743 goto out; 3744 3745 for (i = 0; i < ipath->fspath->elem_cnt; ++i) { 3746 rel_ptr = ipath->fspath->val[i] - 3747 (u64)(unsigned long)ipath->fspath->val; 3748 ipath->fspath->val[i] = rel_ptr; 3749 } 3750 3751 ret = copy_to_user((void *)(unsigned long)ipa->fspath, 3752 (void *)(unsigned long)ipath->fspath, size); 3753 if (ret) { 3754 ret = -EFAULT; 3755 goto out; 3756 } 3757 3758 out: 3759 btrfs_free_path(path); 3760 free_ipath(ipath); 3761 kfree(ipa); 3762 3763 return ret; 3764 } 3765 3766 static int build_ino_list(u64 inum, u64 offset, u64 root, void *ctx) 3767 { 3768 struct btrfs_data_container *inodes = ctx; 3769 const size_t c = 3 * sizeof(u64); 3770 3771 if (inodes->bytes_left >= c) { 3772 inodes->bytes_left -= c; 3773 inodes->val[inodes->elem_cnt] = inum; 3774 inodes->val[inodes->elem_cnt + 1] = offset; 3775 inodes->val[inodes->elem_cnt + 2] = root; 3776 inodes->elem_cnt += 3; 3777 } else { 3778 inodes->bytes_missing += c - inodes->bytes_left; 3779 inodes->bytes_left = 0; 3780 inodes->elem_missed += 3; 3781 } 3782 3783 return 0; 3784 } 3785 3786 static long btrfs_ioctl_logical_to_ino(struct btrfs_root *root, 3787 void __user *arg) 3788 { 3789 int ret = 0; 3790 int size; 3791 struct btrfs_ioctl_logical_ino_args *loi; 3792 struct btrfs_data_container *inodes = NULL; 3793 struct btrfs_path *path = NULL; 3794 3795 if (!capable(CAP_SYS_ADMIN)) 3796 return -EPERM; 3797 3798 loi = memdup_user(arg, sizeof(*loi)); 3799 if (IS_ERR(loi)) { 3800 ret = PTR_ERR(loi); 3801 loi = NULL; 3802 goto out; 3803 } 3804 3805 path = btrfs_alloc_path(); 3806 if (!path) { 3807 ret = -ENOMEM; 3808 goto out; 3809 } 3810 3811 size = min_t(u32, loi->size, 64 * 1024); 3812 inodes = init_data_container(size); 3813 if (IS_ERR(inodes)) { 3814 ret = PTR_ERR(inodes); 3815 inodes = NULL; 3816 goto out; 3817 } 3818 3819 ret = iterate_inodes_from_logical(loi->logical, root->fs_info, path, 3820 build_ino_list, inodes); 3821 if (ret == -EINVAL) 3822 ret = -ENOENT; 3823 if (ret < 0) 3824 goto out; 3825 3826 ret = copy_to_user((void *)(unsigned long)loi->inodes, 3827 (void *)(unsigned long)inodes, size); 3828 if (ret) 3829 ret = -EFAULT; 3830 3831 out: 3832 btrfs_free_path(path); 3833 vfree(inodes); 3834 kfree(loi); 3835 3836 return ret; 3837 } 3838 3839 void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 3840 struct btrfs_ioctl_balance_args *bargs) 3841 { 3842 struct btrfs_balance_control *bctl = fs_info->balance_ctl; 3843 3844 bargs->flags = bctl->flags; 3845 3846 if (atomic_read(&fs_info->balance_running)) 3847 bargs->state |= BTRFS_BALANCE_STATE_RUNNING; 3848 if (atomic_read(&fs_info->balance_pause_req)) 3849 bargs->state |= BTRFS_BALANCE_STATE_PAUSE_REQ; 3850 if (atomic_read(&fs_info->balance_cancel_req)) 3851 bargs->state |= BTRFS_BALANCE_STATE_CANCEL_REQ; 3852 3853 memcpy(&bargs->data, &bctl->data, sizeof(bargs->data)); 3854 memcpy(&bargs->meta, &bctl->meta, sizeof(bargs->meta)); 3855 memcpy(&bargs->sys, &bctl->sys, sizeof(bargs->sys)); 3856 3857 if (lock) { 3858 spin_lock(&fs_info->balance_lock); 3859 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3860 spin_unlock(&fs_info->balance_lock); 3861 } else { 3862 memcpy(&bargs->stat, &bctl->stat, sizeof(bargs->stat)); 3863 } 3864 } 3865 3866 static long btrfs_ioctl_balance(struct file *file, void __user *arg) 3867 { 3868 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 3869 struct btrfs_fs_info *fs_info = root->fs_info; 3870 struct btrfs_ioctl_balance_args *bargs; 3871 struct btrfs_balance_control *bctl; 3872 bool need_unlock; /* for mut. excl. ops lock */ 3873 int ret; 3874 3875 if (!capable(CAP_SYS_ADMIN)) 3876 return -EPERM; 3877 3878 ret = mnt_want_write_file(file); 3879 if (ret) 3880 return ret; 3881 3882 again: 3883 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) { 3884 mutex_lock(&fs_info->volume_mutex); 3885 mutex_lock(&fs_info->balance_mutex); 3886 need_unlock = true; 3887 goto locked; 3888 } 3889 3890 /* 3891 * mut. excl. ops lock is locked. Three possibilites: 3892 * (1) some other op is running 3893 * (2) balance is running 3894 * (3) balance is paused -- special case (think resume) 3895 */ 3896 mutex_lock(&fs_info->balance_mutex); 3897 if (fs_info->balance_ctl) { 3898 /* this is either (2) or (3) */ 3899 if (!atomic_read(&fs_info->balance_running)) { 3900 mutex_unlock(&fs_info->balance_mutex); 3901 if (!mutex_trylock(&fs_info->volume_mutex)) 3902 goto again; 3903 mutex_lock(&fs_info->balance_mutex); 3904 3905 if (fs_info->balance_ctl && 3906 !atomic_read(&fs_info->balance_running)) { 3907 /* this is (3) */ 3908 need_unlock = false; 3909 goto locked; 3910 } 3911 3912 mutex_unlock(&fs_info->balance_mutex); 3913 mutex_unlock(&fs_info->volume_mutex); 3914 goto again; 3915 } else { 3916 /* this is (2) */ 3917 mutex_unlock(&fs_info->balance_mutex); 3918 ret = -EINPROGRESS; 3919 goto out; 3920 } 3921 } else { 3922 /* this is (1) */ 3923 mutex_unlock(&fs_info->balance_mutex); 3924 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 3925 ret = -EINVAL; 3926 goto out; 3927 } 3928 3929 locked: 3930 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running)); 3931 3932 if (arg) { 3933 bargs = memdup_user(arg, sizeof(*bargs)); 3934 if (IS_ERR(bargs)) { 3935 ret = PTR_ERR(bargs); 3936 goto out_unlock; 3937 } 3938 3939 if (bargs->flags & BTRFS_BALANCE_RESUME) { 3940 if (!fs_info->balance_ctl) { 3941 ret = -ENOTCONN; 3942 goto out_bargs; 3943 } 3944 3945 bctl = fs_info->balance_ctl; 3946 spin_lock(&fs_info->balance_lock); 3947 bctl->flags |= BTRFS_BALANCE_RESUME; 3948 spin_unlock(&fs_info->balance_lock); 3949 3950 goto do_balance; 3951 } 3952 } else { 3953 bargs = NULL; 3954 } 3955 3956 if (fs_info->balance_ctl) { 3957 ret = -EINPROGRESS; 3958 goto out_bargs; 3959 } 3960 3961 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 3962 if (!bctl) { 3963 ret = -ENOMEM; 3964 goto out_bargs; 3965 } 3966 3967 bctl->fs_info = fs_info; 3968 if (arg) { 3969 memcpy(&bctl->data, &bargs->data, sizeof(bctl->data)); 3970 memcpy(&bctl->meta, &bargs->meta, sizeof(bctl->meta)); 3971 memcpy(&bctl->sys, &bargs->sys, sizeof(bctl->sys)); 3972 3973 bctl->flags = bargs->flags; 3974 } else { 3975 /* balance everything - no filters */ 3976 bctl->flags |= BTRFS_BALANCE_TYPE_MASK; 3977 } 3978 3979 do_balance: 3980 /* 3981 * Ownership of bctl and mutually_exclusive_operation_running 3982 * goes to to btrfs_balance. bctl is freed in __cancel_balance, 3983 * or, if restriper was paused all the way until unmount, in 3984 * free_fs_info. mutually_exclusive_operation_running is 3985 * cleared in __cancel_balance. 3986 */ 3987 need_unlock = false; 3988 3989 ret = btrfs_balance(bctl, bargs); 3990 3991 if (arg) { 3992 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3993 ret = -EFAULT; 3994 } 3995 3996 out_bargs: 3997 kfree(bargs); 3998 out_unlock: 3999 mutex_unlock(&fs_info->balance_mutex); 4000 mutex_unlock(&fs_info->volume_mutex); 4001 if (need_unlock) 4002 atomic_set(&fs_info->mutually_exclusive_operation_running, 0); 4003 out: 4004 mnt_drop_write_file(file); 4005 return ret; 4006 } 4007 4008 static long btrfs_ioctl_balance_ctl(struct btrfs_root *root, int cmd) 4009 { 4010 if (!capable(CAP_SYS_ADMIN)) 4011 return -EPERM; 4012 4013 switch (cmd) { 4014 case BTRFS_BALANCE_CTL_PAUSE: 4015 return btrfs_pause_balance(root->fs_info); 4016 case BTRFS_BALANCE_CTL_CANCEL: 4017 return btrfs_cancel_balance(root->fs_info); 4018 } 4019 4020 return -EINVAL; 4021 } 4022 4023 static long btrfs_ioctl_balance_progress(struct btrfs_root *root, 4024 void __user *arg) 4025 { 4026 struct btrfs_fs_info *fs_info = root->fs_info; 4027 struct btrfs_ioctl_balance_args *bargs; 4028 int ret = 0; 4029 4030 if (!capable(CAP_SYS_ADMIN)) 4031 return -EPERM; 4032 4033 mutex_lock(&fs_info->balance_mutex); 4034 if (!fs_info->balance_ctl) { 4035 ret = -ENOTCONN; 4036 goto out; 4037 } 4038 4039 bargs = kzalloc(sizeof(*bargs), GFP_NOFS); 4040 if (!bargs) { 4041 ret = -ENOMEM; 4042 goto out; 4043 } 4044 4045 update_ioctl_balance_args(fs_info, 1, bargs); 4046 4047 if (copy_to_user(arg, bargs, sizeof(*bargs))) 4048 ret = -EFAULT; 4049 4050 kfree(bargs); 4051 out: 4052 mutex_unlock(&fs_info->balance_mutex); 4053 return ret; 4054 } 4055 4056 static long btrfs_ioctl_quota_ctl(struct file *file, void __user *arg) 4057 { 4058 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4059 struct btrfs_ioctl_quota_ctl_args *sa; 4060 struct btrfs_trans_handle *trans = NULL; 4061 int ret; 4062 int err; 4063 4064 if (!capable(CAP_SYS_ADMIN)) 4065 return -EPERM; 4066 4067 ret = mnt_want_write_file(file); 4068 if (ret) 4069 return ret; 4070 4071 sa = memdup_user(arg, sizeof(*sa)); 4072 if (IS_ERR(sa)) { 4073 ret = PTR_ERR(sa); 4074 goto drop_write; 4075 } 4076 4077 down_write(&root->fs_info->subvol_sem); 4078 trans = btrfs_start_transaction(root->fs_info->tree_root, 2); 4079 if (IS_ERR(trans)) { 4080 ret = PTR_ERR(trans); 4081 goto out; 4082 } 4083 4084 switch (sa->cmd) { 4085 case BTRFS_QUOTA_CTL_ENABLE: 4086 ret = btrfs_quota_enable(trans, root->fs_info); 4087 break; 4088 case BTRFS_QUOTA_CTL_DISABLE: 4089 ret = btrfs_quota_disable(trans, root->fs_info); 4090 break; 4091 default: 4092 ret = -EINVAL; 4093 break; 4094 } 4095 4096 err = btrfs_commit_transaction(trans, root->fs_info->tree_root); 4097 if (err && !ret) 4098 ret = err; 4099 out: 4100 kfree(sa); 4101 up_write(&root->fs_info->subvol_sem); 4102 drop_write: 4103 mnt_drop_write_file(file); 4104 return ret; 4105 } 4106 4107 static long btrfs_ioctl_qgroup_assign(struct file *file, void __user *arg) 4108 { 4109 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4110 struct btrfs_ioctl_qgroup_assign_args *sa; 4111 struct btrfs_trans_handle *trans; 4112 int ret; 4113 int err; 4114 4115 if (!capable(CAP_SYS_ADMIN)) 4116 return -EPERM; 4117 4118 ret = mnt_want_write_file(file); 4119 if (ret) 4120 return ret; 4121 4122 sa = memdup_user(arg, sizeof(*sa)); 4123 if (IS_ERR(sa)) { 4124 ret = PTR_ERR(sa); 4125 goto drop_write; 4126 } 4127 4128 trans = btrfs_join_transaction(root); 4129 if (IS_ERR(trans)) { 4130 ret = PTR_ERR(trans); 4131 goto out; 4132 } 4133 4134 /* FIXME: check if the IDs really exist */ 4135 if (sa->assign) { 4136 ret = btrfs_add_qgroup_relation(trans, root->fs_info, 4137 sa->src, sa->dst); 4138 } else { 4139 ret = btrfs_del_qgroup_relation(trans, root->fs_info, 4140 sa->src, sa->dst); 4141 } 4142 4143 err = btrfs_end_transaction(trans, root); 4144 if (err && !ret) 4145 ret = err; 4146 4147 out: 4148 kfree(sa); 4149 drop_write: 4150 mnt_drop_write_file(file); 4151 return ret; 4152 } 4153 4154 static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg) 4155 { 4156 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4157 struct btrfs_ioctl_qgroup_create_args *sa; 4158 struct btrfs_trans_handle *trans; 4159 int ret; 4160 int err; 4161 4162 if (!capable(CAP_SYS_ADMIN)) 4163 return -EPERM; 4164 4165 ret = mnt_want_write_file(file); 4166 if (ret) 4167 return ret; 4168 4169 sa = memdup_user(arg, sizeof(*sa)); 4170 if (IS_ERR(sa)) { 4171 ret = PTR_ERR(sa); 4172 goto drop_write; 4173 } 4174 4175 if (!sa->qgroupid) { 4176 ret = -EINVAL; 4177 goto out; 4178 } 4179 4180 trans = btrfs_join_transaction(root); 4181 if (IS_ERR(trans)) { 4182 ret = PTR_ERR(trans); 4183 goto out; 4184 } 4185 4186 /* FIXME: check if the IDs really exist */ 4187 if (sa->create) { 4188 ret = btrfs_create_qgroup(trans, root->fs_info, sa->qgroupid, 4189 NULL); 4190 } else { 4191 ret = btrfs_remove_qgroup(trans, root->fs_info, sa->qgroupid); 4192 } 4193 4194 err = btrfs_end_transaction(trans, root); 4195 if (err && !ret) 4196 ret = err; 4197 4198 out: 4199 kfree(sa); 4200 drop_write: 4201 mnt_drop_write_file(file); 4202 return ret; 4203 } 4204 4205 static long btrfs_ioctl_qgroup_limit(struct file *file, void __user *arg) 4206 { 4207 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4208 struct btrfs_ioctl_qgroup_limit_args *sa; 4209 struct btrfs_trans_handle *trans; 4210 int ret; 4211 int err; 4212 u64 qgroupid; 4213 4214 if (!capable(CAP_SYS_ADMIN)) 4215 return -EPERM; 4216 4217 ret = mnt_want_write_file(file); 4218 if (ret) 4219 return ret; 4220 4221 sa = memdup_user(arg, sizeof(*sa)); 4222 if (IS_ERR(sa)) { 4223 ret = PTR_ERR(sa); 4224 goto drop_write; 4225 } 4226 4227 trans = btrfs_join_transaction(root); 4228 if (IS_ERR(trans)) { 4229 ret = PTR_ERR(trans); 4230 goto out; 4231 } 4232 4233 qgroupid = sa->qgroupid; 4234 if (!qgroupid) { 4235 /* take the current subvol as qgroup */ 4236 qgroupid = root->root_key.objectid; 4237 } 4238 4239 /* FIXME: check if the IDs really exist */ 4240 ret = btrfs_limit_qgroup(trans, root->fs_info, qgroupid, &sa->lim); 4241 4242 err = btrfs_end_transaction(trans, root); 4243 if (err && !ret) 4244 ret = err; 4245 4246 out: 4247 kfree(sa); 4248 drop_write: 4249 mnt_drop_write_file(file); 4250 return ret; 4251 } 4252 4253 static long btrfs_ioctl_quota_rescan(struct file *file, void __user *arg) 4254 { 4255 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4256 struct btrfs_ioctl_quota_rescan_args *qsa; 4257 int ret; 4258 4259 if (!capable(CAP_SYS_ADMIN)) 4260 return -EPERM; 4261 4262 ret = mnt_want_write_file(file); 4263 if (ret) 4264 return ret; 4265 4266 qsa = memdup_user(arg, sizeof(*qsa)); 4267 if (IS_ERR(qsa)) { 4268 ret = PTR_ERR(qsa); 4269 goto drop_write; 4270 } 4271 4272 if (qsa->flags) { 4273 ret = -EINVAL; 4274 goto out; 4275 } 4276 4277 ret = btrfs_qgroup_rescan(root->fs_info); 4278 4279 out: 4280 kfree(qsa); 4281 drop_write: 4282 mnt_drop_write_file(file); 4283 return ret; 4284 } 4285 4286 static long btrfs_ioctl_quota_rescan_status(struct file *file, void __user *arg) 4287 { 4288 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4289 struct btrfs_ioctl_quota_rescan_args *qsa; 4290 int ret = 0; 4291 4292 if (!capable(CAP_SYS_ADMIN)) 4293 return -EPERM; 4294 4295 qsa = kzalloc(sizeof(*qsa), GFP_NOFS); 4296 if (!qsa) 4297 return -ENOMEM; 4298 4299 if (root->fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { 4300 qsa->flags = 1; 4301 qsa->progress = root->fs_info->qgroup_rescan_progress.objectid; 4302 } 4303 4304 if (copy_to_user(arg, qsa, sizeof(*qsa))) 4305 ret = -EFAULT; 4306 4307 kfree(qsa); 4308 return ret; 4309 } 4310 4311 static long btrfs_ioctl_quota_rescan_wait(struct file *file, void __user *arg) 4312 { 4313 struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root; 4314 4315 if (!capable(CAP_SYS_ADMIN)) 4316 return -EPERM; 4317 4318 return btrfs_qgroup_wait_for_completion(root->fs_info); 4319 } 4320 4321 static long btrfs_ioctl_set_received_subvol(struct file *file, 4322 void __user *arg) 4323 { 4324 struct btrfs_ioctl_received_subvol_args *sa = NULL; 4325 struct inode *inode = file_inode(file); 4326 struct btrfs_root *root = BTRFS_I(inode)->root; 4327 struct btrfs_root_item *root_item = &root->root_item; 4328 struct btrfs_trans_handle *trans; 4329 struct timespec ct = CURRENT_TIME; 4330 int ret = 0; 4331 int received_uuid_changed; 4332 4333 ret = mnt_want_write_file(file); 4334 if (ret < 0) 4335 return ret; 4336 4337 down_write(&root->fs_info->subvol_sem); 4338 4339 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 4340 ret = -EINVAL; 4341 goto out; 4342 } 4343 4344 if (btrfs_root_readonly(root)) { 4345 ret = -EROFS; 4346 goto out; 4347 } 4348 4349 if (!inode_owner_or_capable(inode)) { 4350 ret = -EACCES; 4351 goto out; 4352 } 4353 4354 sa = memdup_user(arg, sizeof(*sa)); 4355 if (IS_ERR(sa)) { 4356 ret = PTR_ERR(sa); 4357 sa = NULL; 4358 goto out; 4359 } 4360 4361 /* 4362 * 1 - root item 4363 * 2 - uuid items (received uuid + subvol uuid) 4364 */ 4365 trans = btrfs_start_transaction(root, 3); 4366 if (IS_ERR(trans)) { 4367 ret = PTR_ERR(trans); 4368 trans = NULL; 4369 goto out; 4370 } 4371 4372 sa->rtransid = trans->transid; 4373 sa->rtime.sec = ct.tv_sec; 4374 sa->rtime.nsec = ct.tv_nsec; 4375 4376 received_uuid_changed = memcmp(root_item->received_uuid, sa->uuid, 4377 BTRFS_UUID_SIZE); 4378 if (received_uuid_changed && 4379 !btrfs_is_empty_uuid(root_item->received_uuid)) 4380 btrfs_uuid_tree_rem(trans, root->fs_info->uuid_root, 4381 root_item->received_uuid, 4382 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4383 root->root_key.objectid); 4384 memcpy(root_item->received_uuid, sa->uuid, BTRFS_UUID_SIZE); 4385 btrfs_set_root_stransid(root_item, sa->stransid); 4386 btrfs_set_root_rtransid(root_item, sa->rtransid); 4387 btrfs_set_stack_timespec_sec(&root_item->stime, sa->stime.sec); 4388 btrfs_set_stack_timespec_nsec(&root_item->stime, sa->stime.nsec); 4389 btrfs_set_stack_timespec_sec(&root_item->rtime, sa->rtime.sec); 4390 btrfs_set_stack_timespec_nsec(&root_item->rtime, sa->rtime.nsec); 4391 4392 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4393 &root->root_key, &root->root_item); 4394 if (ret < 0) { 4395 btrfs_end_transaction(trans, root); 4396 goto out; 4397 } 4398 if (received_uuid_changed && !btrfs_is_empty_uuid(sa->uuid)) { 4399 ret = btrfs_uuid_tree_add(trans, root->fs_info->uuid_root, 4400 sa->uuid, 4401 BTRFS_UUID_KEY_RECEIVED_SUBVOL, 4402 root->root_key.objectid); 4403 if (ret < 0 && ret != -EEXIST) { 4404 btrfs_abort_transaction(trans, root, ret); 4405 goto out; 4406 } 4407 } 4408 ret = btrfs_commit_transaction(trans, root); 4409 if (ret < 0) { 4410 btrfs_abort_transaction(trans, root, ret); 4411 goto out; 4412 } 4413 4414 ret = copy_to_user(arg, sa, sizeof(*sa)); 4415 if (ret) 4416 ret = -EFAULT; 4417 4418 out: 4419 kfree(sa); 4420 up_write(&root->fs_info->subvol_sem); 4421 mnt_drop_write_file(file); 4422 return ret; 4423 } 4424 4425 static int btrfs_ioctl_get_fslabel(struct file *file, void __user *arg) 4426 { 4427 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4428 size_t len; 4429 int ret; 4430 char label[BTRFS_LABEL_SIZE]; 4431 4432 spin_lock(&root->fs_info->super_lock); 4433 memcpy(label, root->fs_info->super_copy->label, BTRFS_LABEL_SIZE); 4434 spin_unlock(&root->fs_info->super_lock); 4435 4436 len = strnlen(label, BTRFS_LABEL_SIZE); 4437 4438 if (len == BTRFS_LABEL_SIZE) { 4439 pr_warn("btrfs: label is too long, return the first %zu bytes\n", 4440 --len); 4441 } 4442 4443 ret = copy_to_user(arg, label, len); 4444 4445 return ret ? -EFAULT : 0; 4446 } 4447 4448 static int btrfs_ioctl_set_fslabel(struct file *file, void __user *arg) 4449 { 4450 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4451 struct btrfs_super_block *super_block = root->fs_info->super_copy; 4452 struct btrfs_trans_handle *trans; 4453 char label[BTRFS_LABEL_SIZE]; 4454 int ret; 4455 4456 if (!capable(CAP_SYS_ADMIN)) 4457 return -EPERM; 4458 4459 if (copy_from_user(label, arg, sizeof(label))) 4460 return -EFAULT; 4461 4462 if (strnlen(label, BTRFS_LABEL_SIZE) == BTRFS_LABEL_SIZE) { 4463 pr_err("btrfs: unable to set label with more than %d bytes\n", 4464 BTRFS_LABEL_SIZE - 1); 4465 return -EINVAL; 4466 } 4467 4468 ret = mnt_want_write_file(file); 4469 if (ret) 4470 return ret; 4471 4472 trans = btrfs_start_transaction(root, 0); 4473 if (IS_ERR(trans)) { 4474 ret = PTR_ERR(trans); 4475 goto out_unlock; 4476 } 4477 4478 spin_lock(&root->fs_info->super_lock); 4479 strcpy(super_block->label, label); 4480 spin_unlock(&root->fs_info->super_lock); 4481 ret = btrfs_end_transaction(trans, root); 4482 4483 out_unlock: 4484 mnt_drop_write_file(file); 4485 return ret; 4486 } 4487 4488 long btrfs_ioctl(struct file *file, unsigned int 4489 cmd, unsigned long arg) 4490 { 4491 struct btrfs_root *root = BTRFS_I(file_inode(file))->root; 4492 void __user *argp = (void __user *)arg; 4493 4494 switch (cmd) { 4495 case FS_IOC_GETFLAGS: 4496 return btrfs_ioctl_getflags(file, argp); 4497 case FS_IOC_SETFLAGS: 4498 return btrfs_ioctl_setflags(file, argp); 4499 case FS_IOC_GETVERSION: 4500 return btrfs_ioctl_getversion(file, argp); 4501 case FITRIM: 4502 return btrfs_ioctl_fitrim(file, argp); 4503 case BTRFS_IOC_SNAP_CREATE: 4504 return btrfs_ioctl_snap_create(file, argp, 0); 4505 case BTRFS_IOC_SNAP_CREATE_V2: 4506 return btrfs_ioctl_snap_create_v2(file, argp, 0); 4507 case BTRFS_IOC_SUBVOL_CREATE: 4508 return btrfs_ioctl_snap_create(file, argp, 1); 4509 case BTRFS_IOC_SUBVOL_CREATE_V2: 4510 return btrfs_ioctl_snap_create_v2(file, argp, 1); 4511 case BTRFS_IOC_SNAP_DESTROY: 4512 return btrfs_ioctl_snap_destroy(file, argp); 4513 case BTRFS_IOC_SUBVOL_GETFLAGS: 4514 return btrfs_ioctl_subvol_getflags(file, argp); 4515 case BTRFS_IOC_SUBVOL_SETFLAGS: 4516 return btrfs_ioctl_subvol_setflags(file, argp); 4517 case BTRFS_IOC_DEFAULT_SUBVOL: 4518 return btrfs_ioctl_default_subvol(file, argp); 4519 case BTRFS_IOC_DEFRAG: 4520 return btrfs_ioctl_defrag(file, NULL); 4521 case BTRFS_IOC_DEFRAG_RANGE: 4522 return btrfs_ioctl_defrag(file, argp); 4523 case BTRFS_IOC_RESIZE: 4524 return btrfs_ioctl_resize(file, argp); 4525 case BTRFS_IOC_ADD_DEV: 4526 return btrfs_ioctl_add_dev(root, argp); 4527 case BTRFS_IOC_RM_DEV: 4528 return btrfs_ioctl_rm_dev(file, argp); 4529 case BTRFS_IOC_FS_INFO: 4530 return btrfs_ioctl_fs_info(root, argp); 4531 case BTRFS_IOC_DEV_INFO: 4532 return btrfs_ioctl_dev_info(root, argp); 4533 case BTRFS_IOC_BALANCE: 4534 return btrfs_ioctl_balance(file, NULL); 4535 case BTRFS_IOC_CLONE: 4536 return btrfs_ioctl_clone(file, arg, 0, 0, 0); 4537 case BTRFS_IOC_CLONE_RANGE: 4538 return btrfs_ioctl_clone_range(file, argp); 4539 case BTRFS_IOC_TRANS_START: 4540 return btrfs_ioctl_trans_start(file); 4541 case BTRFS_IOC_TRANS_END: 4542 return btrfs_ioctl_trans_end(file); 4543 case BTRFS_IOC_TREE_SEARCH: 4544 return btrfs_ioctl_tree_search(file, argp); 4545 case BTRFS_IOC_INO_LOOKUP: 4546 return btrfs_ioctl_ino_lookup(file, argp); 4547 case BTRFS_IOC_INO_PATHS: 4548 return btrfs_ioctl_ino_to_path(root, argp); 4549 case BTRFS_IOC_LOGICAL_INO: 4550 return btrfs_ioctl_logical_to_ino(root, argp); 4551 case BTRFS_IOC_SPACE_INFO: 4552 return btrfs_ioctl_space_info(root, argp); 4553 case BTRFS_IOC_SYNC: 4554 btrfs_sync_fs(file->f_dentry->d_sb, 1); 4555 return 0; 4556 case BTRFS_IOC_START_SYNC: 4557 return btrfs_ioctl_start_sync(root, argp); 4558 case BTRFS_IOC_WAIT_SYNC: 4559 return btrfs_ioctl_wait_sync(root, argp); 4560 case BTRFS_IOC_SCRUB: 4561 return btrfs_ioctl_scrub(file, argp); 4562 case BTRFS_IOC_SCRUB_CANCEL: 4563 return btrfs_ioctl_scrub_cancel(root, argp); 4564 case BTRFS_IOC_SCRUB_PROGRESS: 4565 return btrfs_ioctl_scrub_progress(root, argp); 4566 case BTRFS_IOC_BALANCE_V2: 4567 return btrfs_ioctl_balance(file, argp); 4568 case BTRFS_IOC_BALANCE_CTL: 4569 return btrfs_ioctl_balance_ctl(root, arg); 4570 case BTRFS_IOC_BALANCE_PROGRESS: 4571 return btrfs_ioctl_balance_progress(root, argp); 4572 case BTRFS_IOC_SET_RECEIVED_SUBVOL: 4573 return btrfs_ioctl_set_received_subvol(file, argp); 4574 case BTRFS_IOC_SEND: 4575 return btrfs_ioctl_send(file, argp); 4576 case BTRFS_IOC_GET_DEV_STATS: 4577 return btrfs_ioctl_get_dev_stats(root, argp); 4578 case BTRFS_IOC_QUOTA_CTL: 4579 return btrfs_ioctl_quota_ctl(file, argp); 4580 case BTRFS_IOC_QGROUP_ASSIGN: 4581 return btrfs_ioctl_qgroup_assign(file, argp); 4582 case BTRFS_IOC_QGROUP_CREATE: 4583 return btrfs_ioctl_qgroup_create(file, argp); 4584 case BTRFS_IOC_QGROUP_LIMIT: 4585 return btrfs_ioctl_qgroup_limit(file, argp); 4586 case BTRFS_IOC_QUOTA_RESCAN: 4587 return btrfs_ioctl_quota_rescan(file, argp); 4588 case BTRFS_IOC_QUOTA_RESCAN_STATUS: 4589 return btrfs_ioctl_quota_rescan_status(file, argp); 4590 case BTRFS_IOC_QUOTA_RESCAN_WAIT: 4591 return btrfs_ioctl_quota_rescan_wait(file, argp); 4592 case BTRFS_IOC_DEV_REPLACE: 4593 return btrfs_ioctl_dev_replace(root, argp); 4594 case BTRFS_IOC_GET_FSLABEL: 4595 return btrfs_ioctl_get_fslabel(file, argp); 4596 case BTRFS_IOC_SET_FSLABEL: 4597 return btrfs_ioctl_set_fslabel(file, argp); 4598 case BTRFS_IOC_FILE_EXTENT_SAME: 4599 return btrfs_ioctl_file_extent_same(file, argp); 4600 } 4601 4602 return -ENOTTY; 4603 } 4604