xref: /openbmc/linux/fs/btrfs/volumes.c (revision a6f93c71d412ba8ed743152c3a54ad0b78dcd9c7)
10b86a832SChris Mason /*
20b86a832SChris Mason  * Copyright (C) 2007 Oracle.  All rights reserved.
30b86a832SChris Mason  *
40b86a832SChris Mason  * This program is free software; you can redistribute it and/or
50b86a832SChris Mason  * modify it under the terms of the GNU General Public
60b86a832SChris Mason  * License v2 as published by the Free Software Foundation.
70b86a832SChris Mason  *
80b86a832SChris Mason  * This program is distributed in the hope that it will be useful,
90b86a832SChris Mason  * but WITHOUT ANY WARRANTY; without even the implied warranty of
100b86a832SChris Mason  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
110b86a832SChris Mason  * General Public License for more details.
120b86a832SChris Mason  *
130b86a832SChris Mason  * You should have received a copy of the GNU General Public
140b86a832SChris Mason  * License along with this program; if not, write to the
150b86a832SChris Mason  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
160b86a832SChris Mason  * Boston, MA 021110-1307, USA.
170b86a832SChris Mason  */
180b86a832SChris Mason #include <linux/sched.h>
190b86a832SChris Mason #include <linux/bio.h>
205a0e3ad6STejun Heo #include <linux/slab.h>
218a4b83ccSChris Mason #include <linux/buffer_head.h>
22f2d8d74dSChris Mason #include <linux/blkdev.h>
23b765ead5SChris Mason #include <linux/iocontext.h>
246f88a440SBen Hutchings #include <linux/capability.h>
25442a4f63SStefan Behrens #include <linux/ratelimit.h>
2659641015SIlya Dryomov #include <linux/kthread.h>
2753b381b3SDavid Woodhouse #include <linux/raid/pq.h>
28803b2f54SStefan Behrens #include <linux/semaphore.h>
298da4b8c4SAndy Shevchenko #include <linux/uuid.h>
3053b381b3SDavid Woodhouse #include <asm/div64.h>
310b86a832SChris Mason #include "ctree.h"
320b86a832SChris Mason #include "extent_map.h"
330b86a832SChris Mason #include "disk-io.h"
340b86a832SChris Mason #include "transaction.h"
350b86a832SChris Mason #include "print-tree.h"
360b86a832SChris Mason #include "volumes.h"
3753b381b3SDavid Woodhouse #include "raid56.h"
388b712842SChris Mason #include "async-thread.h"
3921adbd5cSStefan Behrens #include "check-integrity.h"
40606686eeSJosef Bacik #include "rcu-string.h"
413fed40ccSMiao Xie #include "math.h"
428dabb742SStefan Behrens #include "dev-replace.h"
4399994cdeSAnand Jain #include "sysfs.h"
440b86a832SChris Mason 
45af902047SZhao Lei const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
46af902047SZhao Lei 	[BTRFS_RAID_RAID10] = {
47af902047SZhao Lei 		.sub_stripes	= 2,
48af902047SZhao Lei 		.dev_stripes	= 1,
49af902047SZhao Lei 		.devs_max	= 0,	/* 0 == as many as possible */
50af902047SZhao Lei 		.devs_min	= 4,
518789f4feSZhao Lei 		.tolerated_failures = 1,
52af902047SZhao Lei 		.devs_increment	= 2,
53af902047SZhao Lei 		.ncopies	= 2,
54af902047SZhao Lei 	},
55af902047SZhao Lei 	[BTRFS_RAID_RAID1] = {
56af902047SZhao Lei 		.sub_stripes	= 1,
57af902047SZhao Lei 		.dev_stripes	= 1,
58af902047SZhao Lei 		.devs_max	= 2,
59af902047SZhao Lei 		.devs_min	= 2,
608789f4feSZhao Lei 		.tolerated_failures = 1,
61af902047SZhao Lei 		.devs_increment	= 2,
62af902047SZhao Lei 		.ncopies	= 2,
63af902047SZhao Lei 	},
64af902047SZhao Lei 	[BTRFS_RAID_DUP] = {
65af902047SZhao Lei 		.sub_stripes	= 1,
66af902047SZhao Lei 		.dev_stripes	= 2,
67af902047SZhao Lei 		.devs_max	= 1,
68af902047SZhao Lei 		.devs_min	= 1,
698789f4feSZhao Lei 		.tolerated_failures = 0,
70af902047SZhao Lei 		.devs_increment	= 1,
71af902047SZhao Lei 		.ncopies	= 2,
72af902047SZhao Lei 	},
73af902047SZhao Lei 	[BTRFS_RAID_RAID0] = {
74af902047SZhao Lei 		.sub_stripes	= 1,
75af902047SZhao Lei 		.dev_stripes	= 1,
76af902047SZhao Lei 		.devs_max	= 0,
77af902047SZhao Lei 		.devs_min	= 2,
788789f4feSZhao Lei 		.tolerated_failures = 0,
79af902047SZhao Lei 		.devs_increment	= 1,
80af902047SZhao Lei 		.ncopies	= 1,
81af902047SZhao Lei 	},
82af902047SZhao Lei 	[BTRFS_RAID_SINGLE] = {
83af902047SZhao Lei 		.sub_stripes	= 1,
84af902047SZhao Lei 		.dev_stripes	= 1,
85af902047SZhao Lei 		.devs_max	= 1,
86af902047SZhao Lei 		.devs_min	= 1,
878789f4feSZhao Lei 		.tolerated_failures = 0,
88af902047SZhao Lei 		.devs_increment	= 1,
89af902047SZhao Lei 		.ncopies	= 1,
90af902047SZhao Lei 	},
91af902047SZhao Lei 	[BTRFS_RAID_RAID5] = {
92af902047SZhao Lei 		.sub_stripes	= 1,
93af902047SZhao Lei 		.dev_stripes	= 1,
94af902047SZhao Lei 		.devs_max	= 0,
95af902047SZhao Lei 		.devs_min	= 2,
968789f4feSZhao Lei 		.tolerated_failures = 1,
97af902047SZhao Lei 		.devs_increment	= 1,
98af902047SZhao Lei 		.ncopies	= 2,
99af902047SZhao Lei 	},
100af902047SZhao Lei 	[BTRFS_RAID_RAID6] = {
101af902047SZhao Lei 		.sub_stripes	= 1,
102af902047SZhao Lei 		.dev_stripes	= 1,
103af902047SZhao Lei 		.devs_max	= 0,
104af902047SZhao Lei 		.devs_min	= 3,
1058789f4feSZhao Lei 		.tolerated_failures = 2,
106af902047SZhao Lei 		.devs_increment	= 1,
107af902047SZhao Lei 		.ncopies	= 3,
108af902047SZhao Lei 	},
109af902047SZhao Lei };
110af902047SZhao Lei 
111fb75d857SColin Ian King const u64 btrfs_raid_group[BTRFS_NR_RAID_TYPES] = {
112af902047SZhao Lei 	[BTRFS_RAID_RAID10] = BTRFS_BLOCK_GROUP_RAID10,
113af902047SZhao Lei 	[BTRFS_RAID_RAID1]  = BTRFS_BLOCK_GROUP_RAID1,
114af902047SZhao Lei 	[BTRFS_RAID_DUP]    = BTRFS_BLOCK_GROUP_DUP,
115af902047SZhao Lei 	[BTRFS_RAID_RAID0]  = BTRFS_BLOCK_GROUP_RAID0,
116af902047SZhao Lei 	[BTRFS_RAID_SINGLE] = 0,
117af902047SZhao Lei 	[BTRFS_RAID_RAID5]  = BTRFS_BLOCK_GROUP_RAID5,
118af902047SZhao Lei 	[BTRFS_RAID_RAID6]  = BTRFS_BLOCK_GROUP_RAID6,
119af902047SZhao Lei };
120af902047SZhao Lei 
121621292baSDavid Sterba /*
122621292baSDavid Sterba  * Table to convert BTRFS_RAID_* to the error code if minimum number of devices
123621292baSDavid Sterba  * condition is not met. Zero means there's no corresponding
124621292baSDavid Sterba  * BTRFS_ERROR_DEV_*_NOT_MET value.
125621292baSDavid Sterba  */
126621292baSDavid Sterba const int btrfs_raid_mindev_error[BTRFS_NR_RAID_TYPES] = {
127621292baSDavid Sterba 	[BTRFS_RAID_RAID10] = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET,
128621292baSDavid Sterba 	[BTRFS_RAID_RAID1]  = BTRFS_ERROR_DEV_RAID1_MIN_NOT_MET,
129621292baSDavid Sterba 	[BTRFS_RAID_DUP]    = 0,
130621292baSDavid Sterba 	[BTRFS_RAID_RAID0]  = 0,
131621292baSDavid Sterba 	[BTRFS_RAID_SINGLE] = 0,
132621292baSDavid Sterba 	[BTRFS_RAID_RAID5]  = BTRFS_ERROR_DEV_RAID5_MIN_NOT_MET,
133621292baSDavid Sterba 	[BTRFS_RAID_RAID6]  = BTRFS_ERROR_DEV_RAID6_MIN_NOT_MET,
134621292baSDavid Sterba };
135621292baSDavid Sterba 
1362b82032cSYan Zheng static int init_first_rw_device(struct btrfs_trans_handle *trans,
137e4a4dce7SDavid Sterba 				struct btrfs_fs_info *fs_info);
1382ff7e61eSJeff Mahoney static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info);
139733f4fbbSStefan Behrens static void __btrfs_reset_dev_stats(struct btrfs_device *dev);
14048a3b636SEric Sandeen static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev);
141733f4fbbSStefan Behrens static void btrfs_dev_stat_print_on_load(struct btrfs_device *device);
1425ab56090SLiu Bo static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
1435ab56090SLiu Bo 			     enum btrfs_map_op op,
1445ab56090SLiu Bo 			     u64 logical, u64 *length,
1455ab56090SLiu Bo 			     struct btrfs_bio **bbio_ret,
1465ab56090SLiu Bo 			     int mirror_num, int need_raid_map);
1472b82032cSYan Zheng 
1489c6b1c4dSDavid Sterba /*
1499c6b1c4dSDavid Sterba  * Device locking
1509c6b1c4dSDavid Sterba  * ==============
1519c6b1c4dSDavid Sterba  *
1529c6b1c4dSDavid Sterba  * There are several mutexes that protect manipulation of devices and low-level
1539c6b1c4dSDavid Sterba  * structures like chunks but not block groups, extents or files
1549c6b1c4dSDavid Sterba  *
1559c6b1c4dSDavid Sterba  * uuid_mutex (global lock)
1569c6b1c4dSDavid Sterba  * ------------------------
1579c6b1c4dSDavid Sterba  * protects the fs_uuids list that tracks all per-fs fs_devices, resulting from
1589c6b1c4dSDavid Sterba  * the SCAN_DEV ioctl registration or from mount either implicitly (the first
1599c6b1c4dSDavid Sterba  * device) or requested by the device= mount option
1609c6b1c4dSDavid Sterba  *
1619c6b1c4dSDavid Sterba  * the mutex can be very coarse and can cover long-running operations
1629c6b1c4dSDavid Sterba  *
1639c6b1c4dSDavid Sterba  * protects: updates to fs_devices counters like missing devices, rw devices,
1649c6b1c4dSDavid Sterba  * seeding, structure cloning, openning/closing devices at mount/umount time
1659c6b1c4dSDavid Sterba  *
1669c6b1c4dSDavid Sterba  * global::fs_devs - add, remove, updates to the global list
1679c6b1c4dSDavid Sterba  *
1689c6b1c4dSDavid Sterba  * does not protect: manipulation of the fs_devices::devices list!
1699c6b1c4dSDavid Sterba  *
1709c6b1c4dSDavid Sterba  * btrfs_device::name - renames (write side), read is RCU
1719c6b1c4dSDavid Sterba  *
1729c6b1c4dSDavid Sterba  * fs_devices::device_list_mutex (per-fs, with RCU)
1739c6b1c4dSDavid Sterba  * ------------------------------------------------
1749c6b1c4dSDavid Sterba  * protects updates to fs_devices::devices, ie. adding and deleting
1759c6b1c4dSDavid Sterba  *
1769c6b1c4dSDavid Sterba  * simple list traversal with read-only actions can be done with RCU protection
1779c6b1c4dSDavid Sterba  *
1789c6b1c4dSDavid Sterba  * may be used to exclude some operations from running concurrently without any
1799c6b1c4dSDavid Sterba  * modifications to the list (see write_all_supers)
1809c6b1c4dSDavid Sterba  *
1819c6b1c4dSDavid Sterba  * volume_mutex
1829c6b1c4dSDavid Sterba  * ------------
1839c6b1c4dSDavid Sterba  * coarse lock owned by a mounted filesystem; used to exclude some operations
1849c6b1c4dSDavid Sterba  * that cannot run in parallel and affect the higher-level properties of the
1859c6b1c4dSDavid Sterba  * filesystem like: device add/deleting/resize/replace, or balance
1869c6b1c4dSDavid Sterba  *
1879c6b1c4dSDavid Sterba  * balance_mutex
1889c6b1c4dSDavid Sterba  * -------------
1899c6b1c4dSDavid Sterba  * protects balance structures (status, state) and context accessed from
1909c6b1c4dSDavid Sterba  * several places (internally, ioctl)
1919c6b1c4dSDavid Sterba  *
1929c6b1c4dSDavid Sterba  * chunk_mutex
1939c6b1c4dSDavid Sterba  * -----------
1949c6b1c4dSDavid Sterba  * protects chunks, adding or removing during allocation, trim or when a new
1959c6b1c4dSDavid Sterba  * device is added/removed
1969c6b1c4dSDavid Sterba  *
1979c6b1c4dSDavid Sterba  * cleaner_mutex
1989c6b1c4dSDavid Sterba  * -------------
1999c6b1c4dSDavid Sterba  * a big lock that is held by the cleaner thread and prevents running subvolume
2009c6b1c4dSDavid Sterba  * cleaning together with relocation or delayed iputs
2019c6b1c4dSDavid Sterba  *
2029c6b1c4dSDavid Sterba  *
2039c6b1c4dSDavid Sterba  * Lock nesting
2049c6b1c4dSDavid Sterba  * ============
2059c6b1c4dSDavid Sterba  *
2069c6b1c4dSDavid Sterba  * uuid_mutex
2079c6b1c4dSDavid Sterba  *   volume_mutex
2089c6b1c4dSDavid Sterba  *     device_list_mutex
2099c6b1c4dSDavid Sterba  *       chunk_mutex
2109c6b1c4dSDavid Sterba  *     balance_mutex
2119c6b1c4dSDavid Sterba  */
2129c6b1c4dSDavid Sterba 
21367a2c45eSMiao Xie DEFINE_MUTEX(uuid_mutex);
2148a4b83ccSChris Mason static LIST_HEAD(fs_uuids);
215c73eccf7SAnand Jain struct list_head *btrfs_get_fs_uuids(void)
216c73eccf7SAnand Jain {
217c73eccf7SAnand Jain 	return &fs_uuids;
218c73eccf7SAnand Jain }
2198a4b83ccSChris Mason 
2202dfeca9bSDavid Sterba /*
2212dfeca9bSDavid Sterba  * alloc_fs_devices - allocate struct btrfs_fs_devices
2222dfeca9bSDavid Sterba  * @fsid:	if not NULL, copy the uuid to fs_devices::fsid
2232dfeca9bSDavid Sterba  *
2242dfeca9bSDavid Sterba  * Return a pointer to a new struct btrfs_fs_devices on success, or ERR_PTR().
2252dfeca9bSDavid Sterba  * The returned struct is not linked onto any lists and can be destroyed with
2262dfeca9bSDavid Sterba  * kfree() right away.
2272dfeca9bSDavid Sterba  */
2282dfeca9bSDavid Sterba static struct btrfs_fs_devices *alloc_fs_devices(const u8 *fsid)
2292208a378SIlya Dryomov {
2302208a378SIlya Dryomov 	struct btrfs_fs_devices *fs_devs;
2312208a378SIlya Dryomov 
23278f2c9e6SDavid Sterba 	fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
2332208a378SIlya Dryomov 	if (!fs_devs)
2342208a378SIlya Dryomov 		return ERR_PTR(-ENOMEM);
2352208a378SIlya Dryomov 
2362208a378SIlya Dryomov 	mutex_init(&fs_devs->device_list_mutex);
2372208a378SIlya Dryomov 
2382208a378SIlya Dryomov 	INIT_LIST_HEAD(&fs_devs->devices);
239935e5cc9SMiao Xie 	INIT_LIST_HEAD(&fs_devs->resized_devices);
2402208a378SIlya Dryomov 	INIT_LIST_HEAD(&fs_devs->alloc_list);
2412208a378SIlya Dryomov 	INIT_LIST_HEAD(&fs_devs->list);
2422208a378SIlya Dryomov 	if (fsid)
2432208a378SIlya Dryomov 		memcpy(fs_devs->fsid, fsid, BTRFS_FSID_SIZE);
2442208a378SIlya Dryomov 
2452208a378SIlya Dryomov 	return fs_devs;
2462208a378SIlya Dryomov }
2472208a378SIlya Dryomov 
24848dae9cfSDavid Sterba static void free_device(struct btrfs_device *device)
24948dae9cfSDavid Sterba {
25048dae9cfSDavid Sterba 	rcu_string_free(device->name);
25148dae9cfSDavid Sterba 	bio_put(device->flush_bio);
25248dae9cfSDavid Sterba 	kfree(device);
25348dae9cfSDavid Sterba }
25448dae9cfSDavid Sterba 
255e4404d6eSYan Zheng static void free_fs_devices(struct btrfs_fs_devices *fs_devices)
256e4404d6eSYan Zheng {
257e4404d6eSYan Zheng 	struct btrfs_device *device;
258e4404d6eSYan Zheng 	WARN_ON(fs_devices->opened);
259e4404d6eSYan Zheng 	while (!list_empty(&fs_devices->devices)) {
260e4404d6eSYan Zheng 		device = list_entry(fs_devices->devices.next,
261e4404d6eSYan Zheng 				    struct btrfs_device, dev_list);
262e4404d6eSYan Zheng 		list_del(&device->dev_list);
26355de4803SDavid Sterba 		free_device(device);
264e4404d6eSYan Zheng 	}
265e4404d6eSYan Zheng 	kfree(fs_devices);
266e4404d6eSYan Zheng }
267e4404d6eSYan Zheng 
268b8b8ff59SLukas Czerner static void btrfs_kobject_uevent(struct block_device *bdev,
269b8b8ff59SLukas Czerner 				 enum kobject_action action)
270b8b8ff59SLukas Czerner {
271b8b8ff59SLukas Czerner 	int ret;
272b8b8ff59SLukas Czerner 
273b8b8ff59SLukas Czerner 	ret = kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, action);
274b8b8ff59SLukas Czerner 	if (ret)
275efe120a0SFrank Holton 		pr_warn("BTRFS: Sending event '%d' to kobject: '%s' (%p): failed\n",
276b8b8ff59SLukas Czerner 			action,
277b8b8ff59SLukas Czerner 			kobject_name(&disk_to_dev(bdev->bd_disk)->kobj),
278b8b8ff59SLukas Czerner 			&disk_to_dev(bdev->bd_disk)->kobj);
279b8b8ff59SLukas Czerner }
280b8b8ff59SLukas Czerner 
281143bede5SJeff Mahoney void btrfs_cleanup_fs_uuids(void)
2828a4b83ccSChris Mason {
2838a4b83ccSChris Mason 	struct btrfs_fs_devices *fs_devices;
2848a4b83ccSChris Mason 
2852b82032cSYan Zheng 	while (!list_empty(&fs_uuids)) {
2862b82032cSYan Zheng 		fs_devices = list_entry(fs_uuids.next,
2872b82032cSYan Zheng 					struct btrfs_fs_devices, list);
2882b82032cSYan Zheng 		list_del(&fs_devices->list);
289e4404d6eSYan Zheng 		free_fs_devices(fs_devices);
2908a4b83ccSChris Mason 	}
2918a4b83ccSChris Mason }
2928a4b83ccSChris Mason 
29348dae9cfSDavid Sterba /*
29448dae9cfSDavid Sterba  * Returns a pointer to a new btrfs_device on success; ERR_PTR() on error.
29548dae9cfSDavid Sterba  * Returned struct is not linked onto any lists and must be destroyed using
29648dae9cfSDavid Sterba  * free_device.
29748dae9cfSDavid Sterba  */
29812bd2fc0SIlya Dryomov static struct btrfs_device *__alloc_device(void)
29912bd2fc0SIlya Dryomov {
30012bd2fc0SIlya Dryomov 	struct btrfs_device *dev;
30112bd2fc0SIlya Dryomov 
30278f2c9e6SDavid Sterba 	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
30312bd2fc0SIlya Dryomov 	if (!dev)
30412bd2fc0SIlya Dryomov 		return ERR_PTR(-ENOMEM);
30512bd2fc0SIlya Dryomov 
306e0ae9994SDavid Sterba 	/*
307e0ae9994SDavid Sterba 	 * Preallocate a bio that's always going to be used for flushing device
308e0ae9994SDavid Sterba 	 * barriers and matches the device lifespan
309e0ae9994SDavid Sterba 	 */
310e0ae9994SDavid Sterba 	dev->flush_bio = bio_alloc_bioset(GFP_KERNEL, 0, NULL);
311e0ae9994SDavid Sterba 	if (!dev->flush_bio) {
312e0ae9994SDavid Sterba 		kfree(dev);
313e0ae9994SDavid Sterba 		return ERR_PTR(-ENOMEM);
314e0ae9994SDavid Sterba 	}
315e0ae9994SDavid Sterba 
31612bd2fc0SIlya Dryomov 	INIT_LIST_HEAD(&dev->dev_list);
31712bd2fc0SIlya Dryomov 	INIT_LIST_HEAD(&dev->dev_alloc_list);
318935e5cc9SMiao Xie 	INIT_LIST_HEAD(&dev->resized_list);
31912bd2fc0SIlya Dryomov 
32012bd2fc0SIlya Dryomov 	spin_lock_init(&dev->io_lock);
32112bd2fc0SIlya Dryomov 
32212bd2fc0SIlya Dryomov 	spin_lock_init(&dev->reada_lock);
32312bd2fc0SIlya Dryomov 	atomic_set(&dev->reada_in_flight, 0);
324addc3fa7SMiao Xie 	atomic_set(&dev->dev_stats_ccnt, 0);
325546bed63SSebastian Andrzej Siewior 	btrfs_device_data_ordered_init(dev);
3269bcaaea7SChris Mason 	INIT_RADIX_TREE(&dev->reada_zones, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
327d0164adcSMel Gorman 	INIT_RADIX_TREE(&dev->reada_extents, GFP_NOFS & ~__GFP_DIRECT_RECLAIM);
32812bd2fc0SIlya Dryomov 
32912bd2fc0SIlya Dryomov 	return dev;
33012bd2fc0SIlya Dryomov }
33112bd2fc0SIlya Dryomov 
33235c70103SDavid Sterba /*
33335c70103SDavid Sterba  * Find a device specified by @devid or @uuid in the list of @fs_devices, or
33435c70103SDavid Sterba  * return NULL.
33535c70103SDavid Sterba  *
33635c70103SDavid Sterba  * If devid and uuid are both specified, the match must be exact, otherwise
33735c70103SDavid Sterba  * only devid is used.
33835c70103SDavid Sterba  */
33935c70103SDavid Sterba static struct btrfs_device *find_device(struct btrfs_fs_devices *fs_devices,
34035c70103SDavid Sterba 		u64 devid, const u8 *uuid)
3418a4b83ccSChris Mason {
34235c70103SDavid Sterba 	struct list_head *head = &fs_devices->devices;
3438a4b83ccSChris Mason 	struct btrfs_device *dev;
3448a4b83ccSChris Mason 
345c6e30871SQinghuang Feng 	list_for_each_entry(dev, head, dev_list) {
346a443755fSChris Mason 		if (dev->devid == devid &&
3478f18cf13SChris Mason 		    (!uuid || !memcmp(dev->uuid, uuid, BTRFS_UUID_SIZE))) {
3488a4b83ccSChris Mason 			return dev;
3498a4b83ccSChris Mason 		}
350a443755fSChris Mason 	}
3518a4b83ccSChris Mason 	return NULL;
3528a4b83ccSChris Mason }
3538a4b83ccSChris Mason 
354a1b32a59SChris Mason static noinline struct btrfs_fs_devices *find_fsid(u8 *fsid)
3558a4b83ccSChris Mason {
3568a4b83ccSChris Mason 	struct btrfs_fs_devices *fs_devices;
3578a4b83ccSChris Mason 
358c6e30871SQinghuang Feng 	list_for_each_entry(fs_devices, &fs_uuids, list) {
3598a4b83ccSChris Mason 		if (memcmp(fsid, fs_devices->fsid, BTRFS_FSID_SIZE) == 0)
3608a4b83ccSChris Mason 			return fs_devices;
3618a4b83ccSChris Mason 	}
3628a4b83ccSChris Mason 	return NULL;
3638a4b83ccSChris Mason }
3648a4b83ccSChris Mason 
365beaf8ab3SStefan Behrens static int
366beaf8ab3SStefan Behrens btrfs_get_bdev_and_sb(const char *device_path, fmode_t flags, void *holder,
367beaf8ab3SStefan Behrens 		      int flush, struct block_device **bdev,
368beaf8ab3SStefan Behrens 		      struct buffer_head **bh)
369beaf8ab3SStefan Behrens {
370beaf8ab3SStefan Behrens 	int ret;
371beaf8ab3SStefan Behrens 
372beaf8ab3SStefan Behrens 	*bdev = blkdev_get_by_path(device_path, flags, holder);
373beaf8ab3SStefan Behrens 
374beaf8ab3SStefan Behrens 	if (IS_ERR(*bdev)) {
375beaf8ab3SStefan Behrens 		ret = PTR_ERR(*bdev);
376beaf8ab3SStefan Behrens 		goto error;
377beaf8ab3SStefan Behrens 	}
378beaf8ab3SStefan Behrens 
379beaf8ab3SStefan Behrens 	if (flush)
380beaf8ab3SStefan Behrens 		filemap_write_and_wait((*bdev)->bd_inode->i_mapping);
3819f6d2510SDavid Sterba 	ret = set_blocksize(*bdev, BTRFS_BDEV_BLOCKSIZE);
382beaf8ab3SStefan Behrens 	if (ret) {
383beaf8ab3SStefan Behrens 		blkdev_put(*bdev, flags);
384beaf8ab3SStefan Behrens 		goto error;
385beaf8ab3SStefan Behrens 	}
386beaf8ab3SStefan Behrens 	invalidate_bdev(*bdev);
387beaf8ab3SStefan Behrens 	*bh = btrfs_read_dev_super(*bdev);
38892fc03fbSAnand Jain 	if (IS_ERR(*bh)) {
38992fc03fbSAnand Jain 		ret = PTR_ERR(*bh);
390beaf8ab3SStefan Behrens 		blkdev_put(*bdev, flags);
391beaf8ab3SStefan Behrens 		goto error;
392beaf8ab3SStefan Behrens 	}
393beaf8ab3SStefan Behrens 
394beaf8ab3SStefan Behrens 	return 0;
395beaf8ab3SStefan Behrens 
396beaf8ab3SStefan Behrens error:
397beaf8ab3SStefan Behrens 	*bdev = NULL;
398beaf8ab3SStefan Behrens 	*bh = NULL;
399beaf8ab3SStefan Behrens 	return ret;
400beaf8ab3SStefan Behrens }
401beaf8ab3SStefan Behrens 
402ffbd517dSChris Mason static void requeue_list(struct btrfs_pending_bios *pending_bios,
403ffbd517dSChris Mason 			struct bio *head, struct bio *tail)
404ffbd517dSChris Mason {
405ffbd517dSChris Mason 
406ffbd517dSChris Mason 	struct bio *old_head;
407ffbd517dSChris Mason 
408ffbd517dSChris Mason 	old_head = pending_bios->head;
409ffbd517dSChris Mason 	pending_bios->head = head;
410ffbd517dSChris Mason 	if (pending_bios->tail)
411ffbd517dSChris Mason 		tail->bi_next = old_head;
412ffbd517dSChris Mason 	else
413ffbd517dSChris Mason 		pending_bios->tail = tail;
414ffbd517dSChris Mason }
415ffbd517dSChris Mason 
4168b712842SChris Mason /*
4178b712842SChris Mason  * we try to collect pending bios for a device so we don't get a large
4188b712842SChris Mason  * number of procs sending bios down to the same device.  This greatly
4198b712842SChris Mason  * improves the schedulers ability to collect and merge the bios.
4208b712842SChris Mason  *
4218b712842SChris Mason  * But, it also turns into a long list of bios to process and that is sure
4228b712842SChris Mason  * to eventually make the worker thread block.  The solution here is to
4238b712842SChris Mason  * make some progress and then put this work struct back at the end of
4248b712842SChris Mason  * the list if the block device is congested.  This way, multiple devices
4258b712842SChris Mason  * can make progress from a single worker thread.
4268b712842SChris Mason  */
427143bede5SJeff Mahoney static noinline void run_scheduled_bios(struct btrfs_device *device)
4288b712842SChris Mason {
4290b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
4308b712842SChris Mason 	struct bio *pending;
4318b712842SChris Mason 	struct backing_dev_info *bdi;
432ffbd517dSChris Mason 	struct btrfs_pending_bios *pending_bios;
4338b712842SChris Mason 	struct bio *tail;
4348b712842SChris Mason 	struct bio *cur;
4358b712842SChris Mason 	int again = 0;
436ffbd517dSChris Mason 	unsigned long num_run;
437d644d8a1SChris Mason 	unsigned long batch_run = 0;
438b765ead5SChris Mason 	unsigned long last_waited = 0;
439d84275c9SChris Mason 	int force_reg = 0;
4400e588859SMiao Xie 	int sync_pending = 0;
441211588adSChris Mason 	struct blk_plug plug;
442211588adSChris Mason 
443211588adSChris Mason 	/*
444211588adSChris Mason 	 * this function runs all the bios we've collected for
445211588adSChris Mason 	 * a particular device.  We don't want to wander off to
446211588adSChris Mason 	 * another device without first sending all of these down.
447211588adSChris Mason 	 * So, setup a plug here and finish it off before we return
448211588adSChris Mason 	 */
449211588adSChris Mason 	blk_start_plug(&plug);
4508b712842SChris Mason 
451efa7c9f9SJan Kara 	bdi = device->bdev->bd_bdi;
452b64a2851SChris Mason 
4538b712842SChris Mason loop:
4548b712842SChris Mason 	spin_lock(&device->io_lock);
4558b712842SChris Mason 
456a6837051SChris Mason loop_lock:
457d84275c9SChris Mason 	num_run = 0;
458ffbd517dSChris Mason 
4598b712842SChris Mason 	/* take all the bios off the list at once and process them
4608b712842SChris Mason 	 * later on (without the lock held).  But, remember the
4618b712842SChris Mason 	 * tail and other pointers so the bios can be properly reinserted
4628b712842SChris Mason 	 * into the list if we hit congestion
4638b712842SChris Mason 	 */
464d84275c9SChris Mason 	if (!force_reg && device->pending_sync_bios.head) {
465ffbd517dSChris Mason 		pending_bios = &device->pending_sync_bios;
466d84275c9SChris Mason 		force_reg = 1;
467d84275c9SChris Mason 	} else {
468ffbd517dSChris Mason 		pending_bios = &device->pending_bios;
469d84275c9SChris Mason 		force_reg = 0;
470d84275c9SChris Mason 	}
471ffbd517dSChris Mason 
472ffbd517dSChris Mason 	pending = pending_bios->head;
473ffbd517dSChris Mason 	tail = pending_bios->tail;
4748b712842SChris Mason 	WARN_ON(pending && !tail);
4758b712842SChris Mason 
4768b712842SChris Mason 	/*
4778b712842SChris Mason 	 * if pending was null this time around, no bios need processing
4788b712842SChris Mason 	 * at all and we can stop.  Otherwise it'll loop back up again
4798b712842SChris Mason 	 * and do an additional check so no bios are missed.
4808b712842SChris Mason 	 *
4818b712842SChris Mason 	 * device->running_pending is used to synchronize with the
4828b712842SChris Mason 	 * schedule_bio code.
4838b712842SChris Mason 	 */
484ffbd517dSChris Mason 	if (device->pending_sync_bios.head == NULL &&
485ffbd517dSChris Mason 	    device->pending_bios.head == NULL) {
4868b712842SChris Mason 		again = 0;
4878b712842SChris Mason 		device->running_pending = 0;
488ffbd517dSChris Mason 	} else {
489ffbd517dSChris Mason 		again = 1;
490ffbd517dSChris Mason 		device->running_pending = 1;
4918b712842SChris Mason 	}
492ffbd517dSChris Mason 
493ffbd517dSChris Mason 	pending_bios->head = NULL;
494ffbd517dSChris Mason 	pending_bios->tail = NULL;
495ffbd517dSChris Mason 
4968b712842SChris Mason 	spin_unlock(&device->io_lock);
4978b712842SChris Mason 
4988b712842SChris Mason 	while (pending) {
499ffbd517dSChris Mason 
500ffbd517dSChris Mason 		rmb();
501d84275c9SChris Mason 		/* we want to work on both lists, but do more bios on the
502d84275c9SChris Mason 		 * sync list than the regular list
503d84275c9SChris Mason 		 */
504d84275c9SChris Mason 		if ((num_run > 32 &&
505d84275c9SChris Mason 		    pending_bios != &device->pending_sync_bios &&
506d84275c9SChris Mason 		    device->pending_sync_bios.head) ||
507d84275c9SChris Mason 		   (num_run > 64 && pending_bios == &device->pending_sync_bios &&
508d84275c9SChris Mason 		    device->pending_bios.head)) {
509ffbd517dSChris Mason 			spin_lock(&device->io_lock);
510ffbd517dSChris Mason 			requeue_list(pending_bios, pending, tail);
511ffbd517dSChris Mason 			goto loop_lock;
512ffbd517dSChris Mason 		}
513ffbd517dSChris Mason 
5148b712842SChris Mason 		cur = pending;
5158b712842SChris Mason 		pending = pending->bi_next;
5168b712842SChris Mason 		cur->bi_next = NULL;
517b64a2851SChris Mason 
518dac56212SJens Axboe 		BUG_ON(atomic_read(&cur->__bi_cnt) == 0);
519d644d8a1SChris Mason 
5202ab1ba68SChris Mason 		/*
5212ab1ba68SChris Mason 		 * if we're doing the sync list, record that our
5222ab1ba68SChris Mason 		 * plug has some sync requests on it
5232ab1ba68SChris Mason 		 *
5242ab1ba68SChris Mason 		 * If we're doing the regular list and there are
5252ab1ba68SChris Mason 		 * sync requests sitting around, unplug before
5262ab1ba68SChris Mason 		 * we add more
5272ab1ba68SChris Mason 		 */
5282ab1ba68SChris Mason 		if (pending_bios == &device->pending_sync_bios) {
5292ab1ba68SChris Mason 			sync_pending = 1;
5302ab1ba68SChris Mason 		} else if (sync_pending) {
5312ab1ba68SChris Mason 			blk_finish_plug(&plug);
5322ab1ba68SChris Mason 			blk_start_plug(&plug);
5332ab1ba68SChris Mason 			sync_pending = 0;
5342ab1ba68SChris Mason 		}
5352ab1ba68SChris Mason 
5364e49ea4aSMike Christie 		btrfsic_submit_bio(cur);
5375ff7ba3aSChris Mason 		num_run++;
5385ff7ba3aSChris Mason 		batch_run++;
539853d8ec4SDavid Sterba 
540ffbd517dSChris Mason 		cond_resched();
5418b712842SChris Mason 
5428b712842SChris Mason 		/*
5438b712842SChris Mason 		 * we made progress, there is more work to do and the bdi
5448b712842SChris Mason 		 * is now congested.  Back off and let other work structs
5458b712842SChris Mason 		 * run instead
5468b712842SChris Mason 		 */
54757fd5a5fSChris Mason 		if (pending && bdi_write_congested(bdi) && batch_run > 8 &&
5485f2cc086SChris Mason 		    fs_info->fs_devices->open_devices > 1) {
549b765ead5SChris Mason 			struct io_context *ioc;
5508b712842SChris Mason 
551b765ead5SChris Mason 			ioc = current->io_context;
552b765ead5SChris Mason 
553b765ead5SChris Mason 			/*
554b765ead5SChris Mason 			 * the main goal here is that we don't want to
555b765ead5SChris Mason 			 * block if we're going to be able to submit
556b765ead5SChris Mason 			 * more requests without blocking.
557b765ead5SChris Mason 			 *
558b765ead5SChris Mason 			 * This code does two great things, it pokes into
559b765ead5SChris Mason 			 * the elevator code from a filesystem _and_
560b765ead5SChris Mason 			 * it makes assumptions about how batching works.
561b765ead5SChris Mason 			 */
562b765ead5SChris Mason 			if (ioc && ioc->nr_batch_requests > 0 &&
563b765ead5SChris Mason 			    time_before(jiffies, ioc->last_waited + HZ/50UL) &&
564b765ead5SChris Mason 			    (last_waited == 0 ||
565b765ead5SChris Mason 			     ioc->last_waited == last_waited)) {
566b765ead5SChris Mason 				/*
567b765ead5SChris Mason 				 * we want to go through our batch of
568b765ead5SChris Mason 				 * requests and stop.  So, we copy out
569b765ead5SChris Mason 				 * the ioc->last_waited time and test
570b765ead5SChris Mason 				 * against it before looping
571b765ead5SChris Mason 				 */
572b765ead5SChris Mason 				last_waited = ioc->last_waited;
573ffbd517dSChris Mason 				cond_resched();
574b765ead5SChris Mason 				continue;
575b765ead5SChris Mason 			}
5768b712842SChris Mason 			spin_lock(&device->io_lock);
577ffbd517dSChris Mason 			requeue_list(pending_bios, pending, tail);
578a6837051SChris Mason 			device->running_pending = 1;
5798b712842SChris Mason 
5808b712842SChris Mason 			spin_unlock(&device->io_lock);
581a8c93d4eSQu Wenruo 			btrfs_queue_work(fs_info->submit_workers,
582a8c93d4eSQu Wenruo 					 &device->work);
5838b712842SChris Mason 			goto done;
5848b712842SChris Mason 		}
5858b712842SChris Mason 	}
586ffbd517dSChris Mason 
58751684082SChris Mason 	cond_resched();
58851684082SChris Mason 	if (again)
58951684082SChris Mason 		goto loop;
59051684082SChris Mason 
59151684082SChris Mason 	spin_lock(&device->io_lock);
59251684082SChris Mason 	if (device->pending_bios.head || device->pending_sync_bios.head)
59351684082SChris Mason 		goto loop_lock;
59451684082SChris Mason 	spin_unlock(&device->io_lock);
59551684082SChris Mason 
5968b712842SChris Mason done:
597211588adSChris Mason 	blk_finish_plug(&plug);
5988b712842SChris Mason }
5998b712842SChris Mason 
600b2950863SChristoph Hellwig static void pending_bios_fn(struct btrfs_work *work)
6018b712842SChris Mason {
6028b712842SChris Mason 	struct btrfs_device *device;
6038b712842SChris Mason 
6048b712842SChris Mason 	device = container_of(work, struct btrfs_device, work);
6058b712842SChris Mason 	run_scheduled_bios(device);
6068b712842SChris Mason }
6078b712842SChris Mason 
6084fde46f0SAnand Jain 
609c9162bdfSOmar Sandoval static void btrfs_free_stale_device(struct btrfs_device *cur_dev)
6104fde46f0SAnand Jain {
6114fde46f0SAnand Jain 	struct btrfs_fs_devices *fs_devs;
6124fde46f0SAnand Jain 	struct btrfs_device *dev;
6134fde46f0SAnand Jain 
6144fde46f0SAnand Jain 	if (!cur_dev->name)
6154fde46f0SAnand Jain 		return;
6164fde46f0SAnand Jain 
6174fde46f0SAnand Jain 	list_for_each_entry(fs_devs, &fs_uuids, list) {
6184fde46f0SAnand Jain 		int del = 1;
6194fde46f0SAnand Jain 
6204fde46f0SAnand Jain 		if (fs_devs->opened)
6214fde46f0SAnand Jain 			continue;
6224fde46f0SAnand Jain 		if (fs_devs->seeding)
6234fde46f0SAnand Jain 			continue;
6244fde46f0SAnand Jain 
6254fde46f0SAnand Jain 		list_for_each_entry(dev, &fs_devs->devices, dev_list) {
6264fde46f0SAnand Jain 
6274fde46f0SAnand Jain 			if (dev == cur_dev)
6284fde46f0SAnand Jain 				continue;
6294fde46f0SAnand Jain 			if (!dev->name)
6304fde46f0SAnand Jain 				continue;
6314fde46f0SAnand Jain 
6324fde46f0SAnand Jain 			/*
6334fde46f0SAnand Jain 			 * Todo: This won't be enough. What if the same device
6344fde46f0SAnand Jain 			 * comes back (with new uuid and) with its mapper path?
6354fde46f0SAnand Jain 			 * But for now, this does help as mostly an admin will
6364fde46f0SAnand Jain 			 * either use mapper or non mapper path throughout.
6374fde46f0SAnand Jain 			 */
6384fde46f0SAnand Jain 			rcu_read_lock();
6394fde46f0SAnand Jain 			del = strcmp(rcu_str_deref(dev->name),
6404fde46f0SAnand Jain 						rcu_str_deref(cur_dev->name));
6414fde46f0SAnand Jain 			rcu_read_unlock();
6424fde46f0SAnand Jain 			if (!del)
6434fde46f0SAnand Jain 				break;
6444fde46f0SAnand Jain 		}
6454fde46f0SAnand Jain 
6464fde46f0SAnand Jain 		if (!del) {
6474fde46f0SAnand Jain 			/* delete the stale device */
6484fde46f0SAnand Jain 			if (fs_devs->num_devices == 1) {
6494fde46f0SAnand Jain 				btrfs_sysfs_remove_fsid(fs_devs);
6504fde46f0SAnand Jain 				list_del(&fs_devs->list);
6514fde46f0SAnand Jain 				free_fs_devices(fs_devs);
6524fde46f0SAnand Jain 			} else {
6534fde46f0SAnand Jain 				fs_devs->num_devices--;
6544fde46f0SAnand Jain 				list_del(&dev->dev_list);
65555de4803SDavid Sterba 				free_device(dev);
6564fde46f0SAnand Jain 			}
6574fde46f0SAnand Jain 			break;
6584fde46f0SAnand Jain 		}
6594fde46f0SAnand Jain 	}
6604fde46f0SAnand Jain }
6614fde46f0SAnand Jain 
6620fb08bccSAnand Jain static int btrfs_open_one_device(struct btrfs_fs_devices *fs_devices,
6630fb08bccSAnand Jain 			struct btrfs_device *device, fmode_t flags,
6640fb08bccSAnand Jain 			void *holder)
6650fb08bccSAnand Jain {
6660fb08bccSAnand Jain 	struct request_queue *q;
6670fb08bccSAnand Jain 	struct block_device *bdev;
6680fb08bccSAnand Jain 	struct buffer_head *bh;
6690fb08bccSAnand Jain 	struct btrfs_super_block *disk_super;
6700fb08bccSAnand Jain 	u64 devid;
6710fb08bccSAnand Jain 	int ret;
6720fb08bccSAnand Jain 
6730fb08bccSAnand Jain 	if (device->bdev)
6740fb08bccSAnand Jain 		return -EINVAL;
6750fb08bccSAnand Jain 	if (!device->name)
6760fb08bccSAnand Jain 		return -EINVAL;
6770fb08bccSAnand Jain 
6780fb08bccSAnand Jain 	ret = btrfs_get_bdev_and_sb(device->name->str, flags, holder, 1,
6790fb08bccSAnand Jain 				    &bdev, &bh);
6800fb08bccSAnand Jain 	if (ret)
6810fb08bccSAnand Jain 		return ret;
6820fb08bccSAnand Jain 
6830fb08bccSAnand Jain 	disk_super = (struct btrfs_super_block *)bh->b_data;
6840fb08bccSAnand Jain 	devid = btrfs_stack_device_id(&disk_super->dev_item);
6850fb08bccSAnand Jain 	if (devid != device->devid)
6860fb08bccSAnand Jain 		goto error_brelse;
6870fb08bccSAnand Jain 
6880fb08bccSAnand Jain 	if (memcmp(device->uuid, disk_super->dev_item.uuid, BTRFS_UUID_SIZE))
6890fb08bccSAnand Jain 		goto error_brelse;
6900fb08bccSAnand Jain 
6910fb08bccSAnand Jain 	device->generation = btrfs_super_generation(disk_super);
6920fb08bccSAnand Jain 
6930fb08bccSAnand Jain 	if (btrfs_super_flags(disk_super) & BTRFS_SUPER_FLAG_SEEDING) {
694ebbede42SAnand Jain 		clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
6950fb08bccSAnand Jain 		fs_devices->seeding = 1;
6960fb08bccSAnand Jain 	} else {
697ebbede42SAnand Jain 		if (bdev_read_only(bdev))
698ebbede42SAnand Jain 			clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
699ebbede42SAnand Jain 		else
700ebbede42SAnand Jain 			set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
7010fb08bccSAnand Jain 	}
7020fb08bccSAnand Jain 
7030fb08bccSAnand Jain 	q = bdev_get_queue(bdev);
7040fb08bccSAnand Jain 	if (!blk_queue_nonrot(q))
7050fb08bccSAnand Jain 		fs_devices->rotating = 1;
7060fb08bccSAnand Jain 
7070fb08bccSAnand Jain 	device->bdev = bdev;
708e12c9621SAnand Jain 	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
7090fb08bccSAnand Jain 	device->mode = flags;
7100fb08bccSAnand Jain 
7110fb08bccSAnand Jain 	fs_devices->open_devices++;
712ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
713ebbede42SAnand Jain 	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
7140fb08bccSAnand Jain 		fs_devices->rw_devices++;
7150fb08bccSAnand Jain 		list_add(&device->dev_alloc_list, &fs_devices->alloc_list);
7160fb08bccSAnand Jain 	}
7170fb08bccSAnand Jain 	brelse(bh);
7180fb08bccSAnand Jain 
7190fb08bccSAnand Jain 	return 0;
7200fb08bccSAnand Jain 
7210fb08bccSAnand Jain error_brelse:
7220fb08bccSAnand Jain 	brelse(bh);
7230fb08bccSAnand Jain 	blkdev_put(bdev, flags);
7240fb08bccSAnand Jain 
7250fb08bccSAnand Jain 	return -EINVAL;
7260fb08bccSAnand Jain }
7270fb08bccSAnand Jain 
72860999ca4SDavid Sterba /*
72960999ca4SDavid Sterba  * Add new device to list of registered devices
73060999ca4SDavid Sterba  *
73160999ca4SDavid Sterba  * Returns:
73260999ca4SDavid Sterba  * 1   - first time device is seen
73360999ca4SDavid Sterba  * 0   - device already known
73460999ca4SDavid Sterba  * < 0 - error
73560999ca4SDavid Sterba  */
736a1b32a59SChris Mason static noinline int device_list_add(const char *path,
7378a4b83ccSChris Mason 			   struct btrfs_super_block *disk_super,
7388a4b83ccSChris Mason 			   u64 devid, struct btrfs_fs_devices **fs_devices_ret)
7398a4b83ccSChris Mason {
7408a4b83ccSChris Mason 	struct btrfs_device *device;
7418a4b83ccSChris Mason 	struct btrfs_fs_devices *fs_devices;
742606686eeSJosef Bacik 	struct rcu_string *name;
74360999ca4SDavid Sterba 	int ret = 0;
7448a4b83ccSChris Mason 	u64 found_transid = btrfs_super_generation(disk_super);
7458a4b83ccSChris Mason 
7468a4b83ccSChris Mason 	fs_devices = find_fsid(disk_super->fsid);
7478a4b83ccSChris Mason 	if (!fs_devices) {
7482208a378SIlya Dryomov 		fs_devices = alloc_fs_devices(disk_super->fsid);
7492208a378SIlya Dryomov 		if (IS_ERR(fs_devices))
7502208a378SIlya Dryomov 			return PTR_ERR(fs_devices);
7512208a378SIlya Dryomov 
7528a4b83ccSChris Mason 		list_add(&fs_devices->list, &fs_uuids);
7532208a378SIlya Dryomov 
7548a4b83ccSChris Mason 		device = NULL;
7558a4b83ccSChris Mason 	} else {
75635c70103SDavid Sterba 		device = find_device(fs_devices, devid,
757a443755fSChris Mason 				disk_super->dev_item.uuid);
7588a4b83ccSChris Mason 	}
759443f24feSMiao Xie 
7608a4b83ccSChris Mason 	if (!device) {
7612b82032cSYan Zheng 		if (fs_devices->opened)
7622b82032cSYan Zheng 			return -EBUSY;
7632b82032cSYan Zheng 
76412bd2fc0SIlya Dryomov 		device = btrfs_alloc_device(NULL, &devid,
76512bd2fc0SIlya Dryomov 					    disk_super->dev_item.uuid);
76612bd2fc0SIlya Dryomov 		if (IS_ERR(device)) {
7678a4b83ccSChris Mason 			/* we can safely leave the fs_devices entry around */
76812bd2fc0SIlya Dryomov 			return PTR_ERR(device);
7698a4b83ccSChris Mason 		}
770606686eeSJosef Bacik 
771606686eeSJosef Bacik 		name = rcu_string_strdup(path, GFP_NOFS);
772606686eeSJosef Bacik 		if (!name) {
77355de4803SDavid Sterba 			free_device(device);
7748a4b83ccSChris Mason 			return -ENOMEM;
7758a4b83ccSChris Mason 		}
776606686eeSJosef Bacik 		rcu_assign_pointer(device->name, name);
77790519d66SArne Jansen 
778e5e9a520SChris Mason 		mutex_lock(&fs_devices->device_list_mutex);
7791f78160cSXiao Guangrong 		list_add_rcu(&device->dev_list, &fs_devices->devices);
780f7171750SFilipe David Borba Manana 		fs_devices->num_devices++;
781e5e9a520SChris Mason 		mutex_unlock(&fs_devices->device_list_mutex);
782e5e9a520SChris Mason 
78360999ca4SDavid Sterba 		ret = 1;
7842b82032cSYan Zheng 		device->fs_devices = fs_devices;
785606686eeSJosef Bacik 	} else if (!device->name || strcmp(device->name->str, path)) {
786b96de000SAnand Jain 		/*
787b96de000SAnand Jain 		 * When FS is already mounted.
788b96de000SAnand Jain 		 * 1. If you are here and if the device->name is NULL that
789b96de000SAnand Jain 		 *    means this device was missing at time of FS mount.
790b96de000SAnand Jain 		 * 2. If you are here and if the device->name is different
791b96de000SAnand Jain 		 *    from 'path' that means either
792b96de000SAnand Jain 		 *      a. The same device disappeared and reappeared with
793b96de000SAnand Jain 		 *         different name. or
794b96de000SAnand Jain 		 *      b. The missing-disk-which-was-replaced, has
795b96de000SAnand Jain 		 *         reappeared now.
796b96de000SAnand Jain 		 *
797b96de000SAnand Jain 		 * We must allow 1 and 2a above. But 2b would be a spurious
798b96de000SAnand Jain 		 * and unintentional.
799b96de000SAnand Jain 		 *
800b96de000SAnand Jain 		 * Further in case of 1 and 2a above, the disk at 'path'
801b96de000SAnand Jain 		 * would have missed some transaction when it was away and
802b96de000SAnand Jain 		 * in case of 2a the stale bdev has to be updated as well.
803b96de000SAnand Jain 		 * 2b must not be allowed at all time.
804b96de000SAnand Jain 		 */
805b96de000SAnand Jain 
806b96de000SAnand Jain 		/*
8070f23ae74SChris Mason 		 * For now, we do allow update to btrfs_fs_device through the
8080f23ae74SChris Mason 		 * btrfs dev scan cli after FS has been mounted.  We're still
8090f23ae74SChris Mason 		 * tracking a problem where systems fail mount by subvolume id
8100f23ae74SChris Mason 		 * when we reject replacement on a mounted FS.
811b96de000SAnand Jain 		 */
8120f23ae74SChris Mason 		if (!fs_devices->opened && found_transid < device->generation) {
81377bdae4dSAnand Jain 			/*
81477bdae4dSAnand Jain 			 * That is if the FS is _not_ mounted and if you
81577bdae4dSAnand Jain 			 * are here, that means there is more than one
81677bdae4dSAnand Jain 			 * disk with same uuid and devid.We keep the one
81777bdae4dSAnand Jain 			 * with larger generation number or the last-in if
81877bdae4dSAnand Jain 			 * generation are equal.
81977bdae4dSAnand Jain 			 */
82077bdae4dSAnand Jain 			return -EEXIST;
82177bdae4dSAnand Jain 		}
822b96de000SAnand Jain 
823606686eeSJosef Bacik 		name = rcu_string_strdup(path, GFP_NOFS);
8243a0524dcSTARUISI Hiroaki 		if (!name)
8253a0524dcSTARUISI Hiroaki 			return -ENOMEM;
826606686eeSJosef Bacik 		rcu_string_free(device->name);
827606686eeSJosef Bacik 		rcu_assign_pointer(device->name, name);
828e6e674bdSAnand Jain 		if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
829cd02dca5SChris Mason 			fs_devices->missing_devices--;
830e6e674bdSAnand Jain 			clear_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
831cd02dca5SChris Mason 		}
8328a4b83ccSChris Mason 	}
8338a4b83ccSChris Mason 
83477bdae4dSAnand Jain 	/*
83577bdae4dSAnand Jain 	 * Unmount does not free the btrfs_device struct but would zero
83677bdae4dSAnand Jain 	 * generation along with most of the other members. So just update
83777bdae4dSAnand Jain 	 * it back. We need it to pick the disk with largest generation
83877bdae4dSAnand Jain 	 * (as above).
83977bdae4dSAnand Jain 	 */
84077bdae4dSAnand Jain 	if (!fs_devices->opened)
84177bdae4dSAnand Jain 		device->generation = found_transid;
84277bdae4dSAnand Jain 
8434fde46f0SAnand Jain 	/*
8444fde46f0SAnand Jain 	 * if there is new btrfs on an already registered device,
8454fde46f0SAnand Jain 	 * then remove the stale device entry.
8464fde46f0SAnand Jain 	 */
84702feae3cSAnand Jain 	if (ret > 0)
8484fde46f0SAnand Jain 		btrfs_free_stale_device(device);
8494fde46f0SAnand Jain 
8508a4b83ccSChris Mason 	*fs_devices_ret = fs_devices;
85160999ca4SDavid Sterba 
85260999ca4SDavid Sterba 	return ret;
8538a4b83ccSChris Mason }
8548a4b83ccSChris Mason 
855e4404d6eSYan Zheng static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
856e4404d6eSYan Zheng {
857e4404d6eSYan Zheng 	struct btrfs_fs_devices *fs_devices;
858e4404d6eSYan Zheng 	struct btrfs_device *device;
859e4404d6eSYan Zheng 	struct btrfs_device *orig_dev;
860e4404d6eSYan Zheng 
8612208a378SIlya Dryomov 	fs_devices = alloc_fs_devices(orig->fsid);
8622208a378SIlya Dryomov 	if (IS_ERR(fs_devices))
8632208a378SIlya Dryomov 		return fs_devices;
864e4404d6eSYan Zheng 
865adbbb863SMiao Xie 	mutex_lock(&orig->device_list_mutex);
86602db0844SJosef Bacik 	fs_devices->total_devices = orig->total_devices;
867e4404d6eSYan Zheng 
86846224705SXiao Guangrong 	/* We have held the volume lock, it is safe to get the devices. */
869e4404d6eSYan Zheng 	list_for_each_entry(orig_dev, &orig->devices, dev_list) {
870606686eeSJosef Bacik 		struct rcu_string *name;
871606686eeSJosef Bacik 
87212bd2fc0SIlya Dryomov 		device = btrfs_alloc_device(NULL, &orig_dev->devid,
87312bd2fc0SIlya Dryomov 					    orig_dev->uuid);
87412bd2fc0SIlya Dryomov 		if (IS_ERR(device))
875e4404d6eSYan Zheng 			goto error;
876e4404d6eSYan Zheng 
877606686eeSJosef Bacik 		/*
878606686eeSJosef Bacik 		 * This is ok to do without rcu read locked because we hold the
879606686eeSJosef Bacik 		 * uuid mutex so nothing we touch in here is going to disappear.
880606686eeSJosef Bacik 		 */
881e755f780SAnand Jain 		if (orig_dev->name) {
88278f2c9e6SDavid Sterba 			name = rcu_string_strdup(orig_dev->name->str,
88378f2c9e6SDavid Sterba 					GFP_KERNEL);
884606686eeSJosef Bacik 			if (!name) {
88555de4803SDavid Sterba 				free_device(device);
886e4404d6eSYan Zheng 				goto error;
887fd2696f3SJulia Lawall 			}
888606686eeSJosef Bacik 			rcu_assign_pointer(device->name, name);
889e755f780SAnand Jain 		}
890e4404d6eSYan Zheng 
891e4404d6eSYan Zheng 		list_add(&device->dev_list, &fs_devices->devices);
892e4404d6eSYan Zheng 		device->fs_devices = fs_devices;
893e4404d6eSYan Zheng 		fs_devices->num_devices++;
894e4404d6eSYan Zheng 	}
895adbbb863SMiao Xie 	mutex_unlock(&orig->device_list_mutex);
896e4404d6eSYan Zheng 	return fs_devices;
897e4404d6eSYan Zheng error:
898adbbb863SMiao Xie 	mutex_unlock(&orig->device_list_mutex);
899e4404d6eSYan Zheng 	free_fs_devices(fs_devices);
900e4404d6eSYan Zheng 	return ERR_PTR(-ENOMEM);
901e4404d6eSYan Zheng }
902e4404d6eSYan Zheng 
9039eaed21eSEric Sandeen void btrfs_close_extra_devices(struct btrfs_fs_devices *fs_devices, int step)
904dfe25020SChris Mason {
905c6e30871SQinghuang Feng 	struct btrfs_device *device, *next;
906443f24feSMiao Xie 	struct btrfs_device *latest_dev = NULL;
907a6b0d5c8SChris Mason 
908dfe25020SChris Mason 	mutex_lock(&uuid_mutex);
909dfe25020SChris Mason again:
91046224705SXiao Guangrong 	/* This is the initialized path, it is safe to release the devices. */
911c6e30871SQinghuang Feng 	list_for_each_entry_safe(device, next, &fs_devices->devices, dev_list) {
912e12c9621SAnand Jain 		if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
913e12c9621SAnand Jain 							&device->dev_state)) {
914401e29c1SAnand Jain 			if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
915401e29c1SAnand Jain 			     &device->dev_state) &&
916443f24feSMiao Xie 			     (!latest_dev ||
917443f24feSMiao Xie 			      device->generation > latest_dev->generation)) {
918443f24feSMiao Xie 				latest_dev = device;
919a6b0d5c8SChris Mason 			}
9202b82032cSYan Zheng 			continue;
921a6b0d5c8SChris Mason 		}
9222b82032cSYan Zheng 
9238dabb742SStefan Behrens 		if (device->devid == BTRFS_DEV_REPLACE_DEVID) {
9248dabb742SStefan Behrens 			/*
9258dabb742SStefan Behrens 			 * In the first step, keep the device which has
9268dabb742SStefan Behrens 			 * the correct fsid and the devid that is used
9278dabb742SStefan Behrens 			 * for the dev_replace procedure.
9288dabb742SStefan Behrens 			 * In the second step, the dev_replace state is
9298dabb742SStefan Behrens 			 * read from the device tree and it is known
9308dabb742SStefan Behrens 			 * whether the procedure is really active or
9318dabb742SStefan Behrens 			 * not, which means whether this device is
9328dabb742SStefan Behrens 			 * used or whether it should be removed.
9338dabb742SStefan Behrens 			 */
934401e29c1SAnand Jain 			if (step == 0 || test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
935401e29c1SAnand Jain 						  &device->dev_state)) {
9368dabb742SStefan Behrens 				continue;
9378dabb742SStefan Behrens 			}
9388dabb742SStefan Behrens 		}
939a74a4b97SChris Mason 		if (device->bdev) {
940d4d77629STejun Heo 			blkdev_put(device->bdev, device->mode);
9412b82032cSYan Zheng 			device->bdev = NULL;
942a74a4b97SChris Mason 			fs_devices->open_devices--;
943a74a4b97SChris Mason 		}
944ebbede42SAnand Jain 		if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
9452b82032cSYan Zheng 			list_del_init(&device->dev_alloc_list);
946ebbede42SAnand Jain 			clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
947401e29c1SAnand Jain 			if (!test_bit(BTRFS_DEV_STATE_REPLACE_TGT,
948401e29c1SAnand Jain 				      &device->dev_state))
9492b82032cSYan Zheng 				fs_devices->rw_devices--;
9502b82032cSYan Zheng 		}
9512b82032cSYan Zheng 		list_del_init(&device->dev_list);
9522b82032cSYan Zheng 		fs_devices->num_devices--;
95355de4803SDavid Sterba 		free_device(device);
9542b82032cSYan Zheng 	}
9552b82032cSYan Zheng 
9562b82032cSYan Zheng 	if (fs_devices->seed) {
9572b82032cSYan Zheng 		fs_devices = fs_devices->seed;
958dfe25020SChris Mason 		goto again;
959dfe25020SChris Mason 	}
9602b82032cSYan Zheng 
961443f24feSMiao Xie 	fs_devices->latest_bdev = latest_dev->bdev;
962a6b0d5c8SChris Mason 
963dfe25020SChris Mason 	mutex_unlock(&uuid_mutex);
964dfe25020SChris Mason }
965a0af469bSChris Mason 
966f06c5965SDavid Sterba static void free_device_rcu(struct rcu_head *head)
9671f78160cSXiao Guangrong {
9681f78160cSXiao Guangrong 	struct btrfs_device *device;
9691f78160cSXiao Guangrong 
9701f78160cSXiao Guangrong 	device = container_of(head, struct btrfs_device, rcu);
97155de4803SDavid Sterba 	free_device(device);
9721f78160cSXiao Guangrong }
9731f78160cSXiao Guangrong 
97414238819SAnand Jain static void btrfs_close_bdev(struct btrfs_device *device)
97514238819SAnand Jain {
97608ffcae8SDavid Sterba 	if (!device->bdev)
97708ffcae8SDavid Sterba 		return;
97808ffcae8SDavid Sterba 
979ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
98014238819SAnand Jain 		sync_blockdev(device->bdev);
98114238819SAnand Jain 		invalidate_bdev(device->bdev);
98214238819SAnand Jain 	}
98314238819SAnand Jain 
98414238819SAnand Jain 	blkdev_put(device->bdev, device->mode);
98514238819SAnand Jain }
98614238819SAnand Jain 
9870ccd0528SAnand Jain static void btrfs_prepare_close_one_device(struct btrfs_device *device)
988f448341aSAnand Jain {
989f448341aSAnand Jain 	struct btrfs_fs_devices *fs_devices = device->fs_devices;
990f448341aSAnand Jain 	struct btrfs_device *new_device;
991f448341aSAnand Jain 	struct rcu_string *name;
992f448341aSAnand Jain 
993f448341aSAnand Jain 	if (device->bdev)
994f448341aSAnand Jain 		fs_devices->open_devices--;
995f448341aSAnand Jain 
996ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
997f448341aSAnand Jain 	    device->devid != BTRFS_DEV_REPLACE_DEVID) {
998f448341aSAnand Jain 		list_del_init(&device->dev_alloc_list);
999f448341aSAnand Jain 		fs_devices->rw_devices--;
1000f448341aSAnand Jain 	}
1001f448341aSAnand Jain 
1002e6e674bdSAnand Jain 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
1003f448341aSAnand Jain 		fs_devices->missing_devices--;
1004f448341aSAnand Jain 
1005f448341aSAnand Jain 	new_device = btrfs_alloc_device(NULL, &device->devid,
1006f448341aSAnand Jain 					device->uuid);
1007f448341aSAnand Jain 	BUG_ON(IS_ERR(new_device)); /* -ENOMEM */
1008f448341aSAnand Jain 
1009f448341aSAnand Jain 	/* Safe because we are under uuid_mutex */
1010f448341aSAnand Jain 	if (device->name) {
1011f448341aSAnand Jain 		name = rcu_string_strdup(device->name->str, GFP_NOFS);
1012f448341aSAnand Jain 		BUG_ON(!name); /* -ENOMEM */
1013f448341aSAnand Jain 		rcu_assign_pointer(new_device->name, name);
1014f448341aSAnand Jain 	}
1015f448341aSAnand Jain 
1016f448341aSAnand Jain 	list_replace_rcu(&device->dev_list, &new_device->dev_list);
1017f448341aSAnand Jain 	new_device->fs_devices = device->fs_devices;
1018f448341aSAnand Jain }
1019f448341aSAnand Jain 
10202b82032cSYan Zheng static int __btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
10218a4b83ccSChris Mason {
10222037a093SSasha Levin 	struct btrfs_device *device, *tmp;
10230ccd0528SAnand Jain 	struct list_head pending_put;
10240ccd0528SAnand Jain 
10250ccd0528SAnand Jain 	INIT_LIST_HEAD(&pending_put);
1026e4404d6eSYan Zheng 
10272b82032cSYan Zheng 	if (--fs_devices->opened > 0)
10282b82032cSYan Zheng 		return 0;
10298a4b83ccSChris Mason 
1030c9513edbSXiao Guangrong 	mutex_lock(&fs_devices->device_list_mutex);
10312037a093SSasha Levin 	list_for_each_entry_safe(device, tmp, &fs_devices->devices, dev_list) {
10320ccd0528SAnand Jain 		btrfs_prepare_close_one_device(device);
10330ccd0528SAnand Jain 		list_add(&device->dev_list, &pending_put);
10348a4b83ccSChris Mason 	}
1035c9513edbSXiao Guangrong 	mutex_unlock(&fs_devices->device_list_mutex);
1036c9513edbSXiao Guangrong 
10370ccd0528SAnand Jain 	/*
10380ccd0528SAnand Jain 	 * btrfs_show_devname() is using the device_list_mutex,
10390ccd0528SAnand Jain 	 * sometimes call to blkdev_put() leads vfs calling
10400ccd0528SAnand Jain 	 * into this func. So do put outside of device_list_mutex,
10410ccd0528SAnand Jain 	 * as of now.
10420ccd0528SAnand Jain 	 */
10430ccd0528SAnand Jain 	while (!list_empty(&pending_put)) {
10440ccd0528SAnand Jain 		device = list_first_entry(&pending_put,
10450ccd0528SAnand Jain 				struct btrfs_device, dev_list);
10460ccd0528SAnand Jain 		list_del(&device->dev_list);
10470ccd0528SAnand Jain 		btrfs_close_bdev(device);
1048f06c5965SDavid Sterba 		call_rcu(&device->rcu, free_device_rcu);
10490ccd0528SAnand Jain 	}
10500ccd0528SAnand Jain 
1051e4404d6eSYan Zheng 	WARN_ON(fs_devices->open_devices);
1052e4404d6eSYan Zheng 	WARN_ON(fs_devices->rw_devices);
10532b82032cSYan Zheng 	fs_devices->opened = 0;
10542b82032cSYan Zheng 	fs_devices->seeding = 0;
10552b82032cSYan Zheng 
10568a4b83ccSChris Mason 	return 0;
10578a4b83ccSChris Mason }
10588a4b83ccSChris Mason 
10592b82032cSYan Zheng int btrfs_close_devices(struct btrfs_fs_devices *fs_devices)
10602b82032cSYan Zheng {
1061e4404d6eSYan Zheng 	struct btrfs_fs_devices *seed_devices = NULL;
10622b82032cSYan Zheng 	int ret;
10632b82032cSYan Zheng 
10642b82032cSYan Zheng 	mutex_lock(&uuid_mutex);
10652b82032cSYan Zheng 	ret = __btrfs_close_devices(fs_devices);
1066e4404d6eSYan Zheng 	if (!fs_devices->opened) {
1067e4404d6eSYan Zheng 		seed_devices = fs_devices->seed;
1068e4404d6eSYan Zheng 		fs_devices->seed = NULL;
1069e4404d6eSYan Zheng 	}
10702b82032cSYan Zheng 	mutex_unlock(&uuid_mutex);
1071e4404d6eSYan Zheng 
1072e4404d6eSYan Zheng 	while (seed_devices) {
1073e4404d6eSYan Zheng 		fs_devices = seed_devices;
1074e4404d6eSYan Zheng 		seed_devices = fs_devices->seed;
1075e4404d6eSYan Zheng 		__btrfs_close_devices(fs_devices);
1076e4404d6eSYan Zheng 		free_fs_devices(fs_devices);
1077e4404d6eSYan Zheng 	}
10782b82032cSYan Zheng 	return ret;
10792b82032cSYan Zheng }
10802b82032cSYan Zheng 
1081e4404d6eSYan Zheng static int __btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
108297288f2cSChristoph Hellwig 				fmode_t flags, void *holder)
10838a4b83ccSChris Mason {
10848a4b83ccSChris Mason 	struct list_head *head = &fs_devices->devices;
10858a4b83ccSChris Mason 	struct btrfs_device *device;
1086443f24feSMiao Xie 	struct btrfs_device *latest_dev = NULL;
1087a0af469bSChris Mason 	int ret = 0;
10888a4b83ccSChris Mason 
1089d4d77629STejun Heo 	flags |= FMODE_EXCL;
1090d4d77629STejun Heo 
1091c6e30871SQinghuang Feng 	list_for_each_entry(device, head, dev_list) {
1092f63e0ccaSEric Sandeen 		/* Just open everything we can; ignore failures here */
10930fb08bccSAnand Jain 		if (btrfs_open_one_device(fs_devices, device, flags, holder))
1094beaf8ab3SStefan Behrens 			continue;
1095a0af469bSChris Mason 
10969f050db4SAnand Jain 		if (!latest_dev ||
10979f050db4SAnand Jain 		    device->generation > latest_dev->generation)
10989f050db4SAnand Jain 			latest_dev = device;
10998a4b83ccSChris Mason 	}
1100a0af469bSChris Mason 	if (fs_devices->open_devices == 0) {
110120bcd649SIlya Dryomov 		ret = -EINVAL;
1102a0af469bSChris Mason 		goto out;
1103a0af469bSChris Mason 	}
11042b82032cSYan Zheng 	fs_devices->opened = 1;
1105443f24feSMiao Xie 	fs_devices->latest_bdev = latest_dev->bdev;
11062b82032cSYan Zheng 	fs_devices->total_rw_bytes = 0;
1107a0af469bSChris Mason out:
11082b82032cSYan Zheng 	return ret;
11092b82032cSYan Zheng }
11102b82032cSYan Zheng 
11112b82032cSYan Zheng int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
111297288f2cSChristoph Hellwig 		       fmode_t flags, void *holder)
11132b82032cSYan Zheng {
11142b82032cSYan Zheng 	int ret;
11152b82032cSYan Zheng 
11162b82032cSYan Zheng 	mutex_lock(&uuid_mutex);
11172b82032cSYan Zheng 	if (fs_devices->opened) {
11182b82032cSYan Zheng 		fs_devices->opened++;
11192b82032cSYan Zheng 		ret = 0;
11202b82032cSYan Zheng 	} else {
112115916de8SChris Mason 		ret = __btrfs_open_devices(fs_devices, flags, holder);
11222b82032cSYan Zheng 	}
11238a4b83ccSChris Mason 	mutex_unlock(&uuid_mutex);
11248a4b83ccSChris Mason 	return ret;
11258a4b83ccSChris Mason }
11268a4b83ccSChris Mason 
1127c9162bdfSOmar Sandoval static void btrfs_release_disk_super(struct page *page)
11286cf86a00SAnand Jain {
11296cf86a00SAnand Jain 	kunmap(page);
11306cf86a00SAnand Jain 	put_page(page);
11316cf86a00SAnand Jain }
11326cf86a00SAnand Jain 
1133c9162bdfSOmar Sandoval static int btrfs_read_disk_super(struct block_device *bdev, u64 bytenr,
1134c9162bdfSOmar Sandoval 				 struct page **page,
1135c9162bdfSOmar Sandoval 				 struct btrfs_super_block **disk_super)
11366cf86a00SAnand Jain {
11376cf86a00SAnand Jain 	void *p;
11386cf86a00SAnand Jain 	pgoff_t index;
11396cf86a00SAnand Jain 
11406cf86a00SAnand Jain 	/* make sure our super fits in the device */
11416cf86a00SAnand Jain 	if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
11426cf86a00SAnand Jain 		return 1;
11436cf86a00SAnand Jain 
11446cf86a00SAnand Jain 	/* make sure our super fits in the page */
11456cf86a00SAnand Jain 	if (sizeof(**disk_super) > PAGE_SIZE)
11466cf86a00SAnand Jain 		return 1;
11476cf86a00SAnand Jain 
11486cf86a00SAnand Jain 	/* make sure our super doesn't straddle pages on disk */
11496cf86a00SAnand Jain 	index = bytenr >> PAGE_SHIFT;
11506cf86a00SAnand Jain 	if ((bytenr + sizeof(**disk_super) - 1) >> PAGE_SHIFT != index)
11516cf86a00SAnand Jain 		return 1;
11526cf86a00SAnand Jain 
11536cf86a00SAnand Jain 	/* pull in the page with our super */
11546cf86a00SAnand Jain 	*page = read_cache_page_gfp(bdev->bd_inode->i_mapping,
11556cf86a00SAnand Jain 				   index, GFP_KERNEL);
11566cf86a00SAnand Jain 
11576cf86a00SAnand Jain 	if (IS_ERR_OR_NULL(*page))
11586cf86a00SAnand Jain 		return 1;
11596cf86a00SAnand Jain 
11606cf86a00SAnand Jain 	p = kmap(*page);
11616cf86a00SAnand Jain 
11626cf86a00SAnand Jain 	/* align our pointer to the offset of the super block */
11636cf86a00SAnand Jain 	*disk_super = p + (bytenr & ~PAGE_MASK);
11646cf86a00SAnand Jain 
11656cf86a00SAnand Jain 	if (btrfs_super_bytenr(*disk_super) != bytenr ||
11666cf86a00SAnand Jain 	    btrfs_super_magic(*disk_super) != BTRFS_MAGIC) {
11676cf86a00SAnand Jain 		btrfs_release_disk_super(*page);
11686cf86a00SAnand Jain 		return 1;
11696cf86a00SAnand Jain 	}
11706cf86a00SAnand Jain 
11716cf86a00SAnand Jain 	if ((*disk_super)->label[0] &&
11726cf86a00SAnand Jain 		(*disk_super)->label[BTRFS_LABEL_SIZE - 1])
11736cf86a00SAnand Jain 		(*disk_super)->label[BTRFS_LABEL_SIZE - 1] = '\0';
11746cf86a00SAnand Jain 
11756cf86a00SAnand Jain 	return 0;
11766cf86a00SAnand Jain }
11776cf86a00SAnand Jain 
11786f60cbd3SDavid Sterba /*
11796f60cbd3SDavid Sterba  * Look for a btrfs signature on a device. This may be called out of the mount path
11806f60cbd3SDavid Sterba  * and we are not allowed to call set_blocksize during the scan. The superblock
11816f60cbd3SDavid Sterba  * is read via pagecache
11826f60cbd3SDavid Sterba  */
118397288f2cSChristoph Hellwig int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
11848a4b83ccSChris Mason 			  struct btrfs_fs_devices **fs_devices_ret)
11858a4b83ccSChris Mason {
11868a4b83ccSChris Mason 	struct btrfs_super_block *disk_super;
11878a4b83ccSChris Mason 	struct block_device *bdev;
11886f60cbd3SDavid Sterba 	struct page *page;
118905a5c55dSAnand Jain 	int ret;
11908a4b83ccSChris Mason 	u64 devid;
1191f2984462SChris Mason 	u64 transid;
119202db0844SJosef Bacik 	u64 total_devices;
11936f60cbd3SDavid Sterba 	u64 bytenr;
11948a4b83ccSChris Mason 
11956f60cbd3SDavid Sterba 	/*
11966f60cbd3SDavid Sterba 	 * we would like to check all the supers, but that would make
11976f60cbd3SDavid Sterba 	 * a btrfs mount succeed after a mkfs from a different FS.
11986f60cbd3SDavid Sterba 	 * So, we need to add a special mount option to scan for
11996f60cbd3SDavid Sterba 	 * later supers, using BTRFS_SUPER_MIRROR_MAX instead
12006f60cbd3SDavid Sterba 	 */
12016f60cbd3SDavid Sterba 	bytenr = btrfs_sb_offset(0);
1202d4d77629STejun Heo 	flags |= FMODE_EXCL;
120310f6327bSAl Viro 	mutex_lock(&uuid_mutex);
12046f60cbd3SDavid Sterba 
12056f60cbd3SDavid Sterba 	bdev = blkdev_get_by_path(path, flags, holder);
12066f60cbd3SDavid Sterba 	if (IS_ERR(bdev)) {
12076f60cbd3SDavid Sterba 		ret = PTR_ERR(bdev);
1208beaf8ab3SStefan Behrens 		goto error;
12096f60cbd3SDavid Sterba 	}
12106f60cbd3SDavid Sterba 
121105a5c55dSAnand Jain 	if (btrfs_read_disk_super(bdev, bytenr, &page, &disk_super)) {
121205a5c55dSAnand Jain 		ret = -EINVAL;
12136f60cbd3SDavid Sterba 		goto error_bdev_put;
121405a5c55dSAnand Jain 	}
12156f60cbd3SDavid Sterba 
1216a343832fSXiao Guangrong 	devid = btrfs_stack_device_id(&disk_super->dev_item);
1217f2984462SChris Mason 	transid = btrfs_super_generation(disk_super);
121802db0844SJosef Bacik 	total_devices = btrfs_super_num_devices(disk_super);
12196f60cbd3SDavid Sterba 
122060999ca4SDavid Sterba 	ret = device_list_add(path, disk_super, devid, fs_devices_ret);
122160999ca4SDavid Sterba 	if (ret > 0) {
122205a5c55dSAnand Jain 		if (disk_super->label[0])
122362e85577SJeff Mahoney 			pr_info("BTRFS: device label %s ", disk_super->label);
122405a5c55dSAnand Jain 		else
122562e85577SJeff Mahoney 			pr_info("BTRFS: device fsid %pU ", disk_super->fsid);
12266f60cbd3SDavid Sterba 
122762e85577SJeff Mahoney 		pr_cont("devid %llu transid %llu %s\n", devid, transid, path);
122860999ca4SDavid Sterba 		ret = 0;
122960999ca4SDavid Sterba 	}
123002db0844SJosef Bacik 	if (!ret && fs_devices_ret)
123102db0844SJosef Bacik 		(*fs_devices_ret)->total_devices = total_devices;
12326f60cbd3SDavid Sterba 
12336cf86a00SAnand Jain 	btrfs_release_disk_super(page);
12346f60cbd3SDavid Sterba 
12356f60cbd3SDavid Sterba error_bdev_put:
1236d4d77629STejun Heo 	blkdev_put(bdev, flags);
12378a4b83ccSChris Mason error:
1238beaf8ab3SStefan Behrens 	mutex_unlock(&uuid_mutex);
12398a4b83ccSChris Mason 	return ret;
12408a4b83ccSChris Mason }
12410b86a832SChris Mason 
12426d07bcecSMiao Xie /* helper to account the used device space in the range */
12436d07bcecSMiao Xie int btrfs_account_dev_extents_size(struct btrfs_device *device, u64 start,
12446d07bcecSMiao Xie 				   u64 end, u64 *length)
12450b86a832SChris Mason {
12460b86a832SChris Mason 	struct btrfs_key key;
1247fb456252SJeff Mahoney 	struct btrfs_root *root = device->fs_info->dev_root;
12486d07bcecSMiao Xie 	struct btrfs_dev_extent *dev_extent;
12492b82032cSYan Zheng 	struct btrfs_path *path;
12506d07bcecSMiao Xie 	u64 extent_end;
12510b86a832SChris Mason 	int ret;
12526d07bcecSMiao Xie 	int slot;
12530b86a832SChris Mason 	struct extent_buffer *l;
12540b86a832SChris Mason 
12556d07bcecSMiao Xie 	*length = 0;
12566d07bcecSMiao Xie 
1257401e29c1SAnand Jain 	if (start >= device->total_bytes ||
1258401e29c1SAnand Jain 		test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
12596d07bcecSMiao Xie 		return 0;
12606d07bcecSMiao Xie 
12612b82032cSYan Zheng 	path = btrfs_alloc_path();
12622b82032cSYan Zheng 	if (!path)
12632b82032cSYan Zheng 		return -ENOMEM;
1264e4058b54SDavid Sterba 	path->reada = READA_FORWARD;
12658f18cf13SChris Mason 
12660b86a832SChris Mason 	key.objectid = device->devid;
12676d07bcecSMiao Xie 	key.offset = start;
12680b86a832SChris Mason 	key.type = BTRFS_DEV_EXTENT_KEY;
12696d07bcecSMiao Xie 
12706d07bcecSMiao Xie 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
12710b86a832SChris Mason 	if (ret < 0)
12726d07bcecSMiao Xie 		goto out;
12731fcbac58SYan Zheng 	if (ret > 0) {
12741fcbac58SYan Zheng 		ret = btrfs_previous_item(root, path, key.objectid, key.type);
12750b86a832SChris Mason 		if (ret < 0)
12766d07bcecSMiao Xie 			goto out;
12771fcbac58SYan Zheng 	}
12786d07bcecSMiao Xie 
12790b86a832SChris Mason 	while (1) {
12800b86a832SChris Mason 		l = path->nodes[0];
12810b86a832SChris Mason 		slot = path->slots[0];
12820b86a832SChris Mason 		if (slot >= btrfs_header_nritems(l)) {
12830b86a832SChris Mason 			ret = btrfs_next_leaf(root, path);
12840b86a832SChris Mason 			if (ret == 0)
12850b86a832SChris Mason 				continue;
12860b86a832SChris Mason 			if (ret < 0)
12876d07bcecSMiao Xie 				goto out;
12886d07bcecSMiao Xie 
12896d07bcecSMiao Xie 			break;
12900b86a832SChris Mason 		}
12910b86a832SChris Mason 		btrfs_item_key_to_cpu(l, &key, slot);
12920b86a832SChris Mason 
12930b86a832SChris Mason 		if (key.objectid < device->devid)
12940b86a832SChris Mason 			goto next;
12950b86a832SChris Mason 
12960b86a832SChris Mason 		if (key.objectid > device->devid)
12976d07bcecSMiao Xie 			break;
12980b86a832SChris Mason 
1299962a298fSDavid Sterba 		if (key.type != BTRFS_DEV_EXTENT_KEY)
13000b86a832SChris Mason 			goto next;
13010b86a832SChris Mason 
13020b86a832SChris Mason 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
13036d07bcecSMiao Xie 		extent_end = key.offset + btrfs_dev_extent_length(l,
13046d07bcecSMiao Xie 								  dev_extent);
13056d07bcecSMiao Xie 		if (key.offset <= start && extent_end > end) {
13066d07bcecSMiao Xie 			*length = end - start + 1;
13076d07bcecSMiao Xie 			break;
13086d07bcecSMiao Xie 		} else if (key.offset <= start && extent_end > start)
13096d07bcecSMiao Xie 			*length += extent_end - start;
13106d07bcecSMiao Xie 		else if (key.offset > start && extent_end <= end)
13116d07bcecSMiao Xie 			*length += extent_end - key.offset;
13126d07bcecSMiao Xie 		else if (key.offset > start && key.offset <= end) {
13136d07bcecSMiao Xie 			*length += end - key.offset + 1;
13146d07bcecSMiao Xie 			break;
13156d07bcecSMiao Xie 		} else if (key.offset > end)
13166d07bcecSMiao Xie 			break;
13176d07bcecSMiao Xie 
13186d07bcecSMiao Xie next:
13196d07bcecSMiao Xie 		path->slots[0]++;
13206d07bcecSMiao Xie 	}
13216d07bcecSMiao Xie 	ret = 0;
13226d07bcecSMiao Xie out:
13236d07bcecSMiao Xie 	btrfs_free_path(path);
13246d07bcecSMiao Xie 	return ret;
13256d07bcecSMiao Xie }
13266d07bcecSMiao Xie 
1327499f377fSJeff Mahoney static int contains_pending_extent(struct btrfs_transaction *transaction,
13286df9a95eSJosef Bacik 				   struct btrfs_device *device,
13296df9a95eSJosef Bacik 				   u64 *start, u64 len)
13306df9a95eSJosef Bacik {
1331fb456252SJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
13326df9a95eSJosef Bacik 	struct extent_map *em;
1333499f377fSJeff Mahoney 	struct list_head *search_list = &fs_info->pinned_chunks;
13346df9a95eSJosef Bacik 	int ret = 0;
13351b984508SForrest Liu 	u64 physical_start = *start;
13366df9a95eSJosef Bacik 
1337499f377fSJeff Mahoney 	if (transaction)
1338499f377fSJeff Mahoney 		search_list = &transaction->pending_chunks;
133904216820SFilipe Manana again:
134004216820SFilipe Manana 	list_for_each_entry(em, search_list, list) {
13416df9a95eSJosef Bacik 		struct map_lookup *map;
13426df9a95eSJosef Bacik 		int i;
13436df9a95eSJosef Bacik 
134495617d69SJeff Mahoney 		map = em->map_lookup;
13456df9a95eSJosef Bacik 		for (i = 0; i < map->num_stripes; i++) {
1346c152b63eSFilipe Manana 			u64 end;
1347c152b63eSFilipe Manana 
13486df9a95eSJosef Bacik 			if (map->stripes[i].dev != device)
13496df9a95eSJosef Bacik 				continue;
13501b984508SForrest Liu 			if (map->stripes[i].physical >= physical_start + len ||
13516df9a95eSJosef Bacik 			    map->stripes[i].physical + em->orig_block_len <=
13521b984508SForrest Liu 			    physical_start)
13536df9a95eSJosef Bacik 				continue;
1354c152b63eSFilipe Manana 			/*
1355c152b63eSFilipe Manana 			 * Make sure that while processing the pinned list we do
1356c152b63eSFilipe Manana 			 * not override our *start with a lower value, because
1357c152b63eSFilipe Manana 			 * we can have pinned chunks that fall within this
1358c152b63eSFilipe Manana 			 * device hole and that have lower physical addresses
1359c152b63eSFilipe Manana 			 * than the pending chunks we processed before. If we
1360c152b63eSFilipe Manana 			 * do not take this special care we can end up getting
1361c152b63eSFilipe Manana 			 * 2 pending chunks that start at the same physical
1362c152b63eSFilipe Manana 			 * device offsets because the end offset of a pinned
1363c152b63eSFilipe Manana 			 * chunk can be equal to the start offset of some
1364c152b63eSFilipe Manana 			 * pending chunk.
1365c152b63eSFilipe Manana 			 */
1366c152b63eSFilipe Manana 			end = map->stripes[i].physical + em->orig_block_len;
1367c152b63eSFilipe Manana 			if (end > *start) {
1368c152b63eSFilipe Manana 				*start = end;
13696df9a95eSJosef Bacik 				ret = 1;
13706df9a95eSJosef Bacik 			}
13716df9a95eSJosef Bacik 		}
1372c152b63eSFilipe Manana 	}
1373499f377fSJeff Mahoney 	if (search_list != &fs_info->pinned_chunks) {
1374499f377fSJeff Mahoney 		search_list = &fs_info->pinned_chunks;
137504216820SFilipe Manana 		goto again;
137604216820SFilipe Manana 	}
13776df9a95eSJosef Bacik 
13786df9a95eSJosef Bacik 	return ret;
13796df9a95eSJosef Bacik }
13806df9a95eSJosef Bacik 
13816df9a95eSJosef Bacik 
13820b86a832SChris Mason /*
1383499f377fSJeff Mahoney  * find_free_dev_extent_start - find free space in the specified device
13847bfc837dSMiao Xie  * @device:	  the device which we search the free space in
13857bfc837dSMiao Xie  * @num_bytes:	  the size of the free space that we need
1386499f377fSJeff Mahoney  * @search_start: the position from which to begin the search
13877bfc837dSMiao Xie  * @start:	  store the start of the free space.
1388499f377fSJeff Mahoney  * @len:	  the size of the free space. that we find, or the size
1389499f377fSJeff Mahoney  *		  of the max free space if we don't find suitable free space
13907bfc837dSMiao Xie  *
13910b86a832SChris Mason  * this uses a pretty simple search, the expectation is that it is
13920b86a832SChris Mason  * called very infrequently and that a given device has a small number
13930b86a832SChris Mason  * of extents
13947bfc837dSMiao Xie  *
13957bfc837dSMiao Xie  * @start is used to store the start of the free space if we find. But if we
13967bfc837dSMiao Xie  * don't find suitable free space, it will be used to store the start position
13977bfc837dSMiao Xie  * of the max free space.
13987bfc837dSMiao Xie  *
13997bfc837dSMiao Xie  * @len is used to store the size of the free space that we find.
14007bfc837dSMiao Xie  * But if we don't find suitable free space, it is used to store the size of
14017bfc837dSMiao Xie  * the max free space.
14020b86a832SChris Mason  */
1403499f377fSJeff Mahoney int find_free_dev_extent_start(struct btrfs_transaction *transaction,
14046df9a95eSJosef Bacik 			       struct btrfs_device *device, u64 num_bytes,
1405499f377fSJeff Mahoney 			       u64 search_start, u64 *start, u64 *len)
14060b86a832SChris Mason {
14070b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
14080b246afaSJeff Mahoney 	struct btrfs_root *root = fs_info->dev_root;
14090b86a832SChris Mason 	struct btrfs_key key;
14107bfc837dSMiao Xie 	struct btrfs_dev_extent *dev_extent;
14110b86a832SChris Mason 	struct btrfs_path *path;
14127bfc837dSMiao Xie 	u64 hole_size;
14137bfc837dSMiao Xie 	u64 max_hole_start;
14147bfc837dSMiao Xie 	u64 max_hole_size;
14157bfc837dSMiao Xie 	u64 extent_end;
14160b86a832SChris Mason 	u64 search_end = device->total_bytes;
14170b86a832SChris Mason 	int ret;
14187bfc837dSMiao Xie 	int slot;
14190b86a832SChris Mason 	struct extent_buffer *l;
14208cdc7c5bSFilipe Manana 
14218cdc7c5bSFilipe Manana 	/*
14228cdc7c5bSFilipe Manana 	 * We don't want to overwrite the superblock on the drive nor any area
14238cdc7c5bSFilipe Manana 	 * used by the boot loader (grub for example), so we make sure to start
14248cdc7c5bSFilipe Manana 	 * at an offset of at least 1MB.
14258cdc7c5bSFilipe Manana 	 */
14260d0c71b3SDavid Sterba 	search_start = max_t(u64, search_start, SZ_1M);
14270b86a832SChris Mason 
14286df9a95eSJosef Bacik 	path = btrfs_alloc_path();
14296df9a95eSJosef Bacik 	if (!path)
14306df9a95eSJosef Bacik 		return -ENOMEM;
1431f2ab7618SZhao Lei 
14327bfc837dSMiao Xie 	max_hole_start = search_start;
14337bfc837dSMiao Xie 	max_hole_size = 0;
14347bfc837dSMiao Xie 
1435f2ab7618SZhao Lei again:
1436401e29c1SAnand Jain 	if (search_start >= search_end ||
1437401e29c1SAnand Jain 		test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
14387bfc837dSMiao Xie 		ret = -ENOSPC;
14396df9a95eSJosef Bacik 		goto out;
14407bfc837dSMiao Xie 	}
14417bfc837dSMiao Xie 
1442e4058b54SDavid Sterba 	path->reada = READA_FORWARD;
14436df9a95eSJosef Bacik 	path->search_commit_root = 1;
14446df9a95eSJosef Bacik 	path->skip_locking = 1;
14457bfc837dSMiao Xie 
14460b86a832SChris Mason 	key.objectid = device->devid;
14470b86a832SChris Mason 	key.offset = search_start;
14480b86a832SChris Mason 	key.type = BTRFS_DEV_EXTENT_KEY;
14497bfc837dSMiao Xie 
1450125ccb0aSLi Zefan 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
14510b86a832SChris Mason 	if (ret < 0)
14527bfc837dSMiao Xie 		goto out;
14530b86a832SChris Mason 	if (ret > 0) {
14540b86a832SChris Mason 		ret = btrfs_previous_item(root, path, key.objectid, key.type);
14550b86a832SChris Mason 		if (ret < 0)
14567bfc837dSMiao Xie 			goto out;
14570b86a832SChris Mason 	}
14587bfc837dSMiao Xie 
14590b86a832SChris Mason 	while (1) {
14600b86a832SChris Mason 		l = path->nodes[0];
14610b86a832SChris Mason 		slot = path->slots[0];
14620b86a832SChris Mason 		if (slot >= btrfs_header_nritems(l)) {
14630b86a832SChris Mason 			ret = btrfs_next_leaf(root, path);
14640b86a832SChris Mason 			if (ret == 0)
14650b86a832SChris Mason 				continue;
14660b86a832SChris Mason 			if (ret < 0)
14677bfc837dSMiao Xie 				goto out;
14687bfc837dSMiao Xie 
14697bfc837dSMiao Xie 			break;
14700b86a832SChris Mason 		}
14710b86a832SChris Mason 		btrfs_item_key_to_cpu(l, &key, slot);
14720b86a832SChris Mason 
14730b86a832SChris Mason 		if (key.objectid < device->devid)
14740b86a832SChris Mason 			goto next;
14750b86a832SChris Mason 
14760b86a832SChris Mason 		if (key.objectid > device->devid)
14777bfc837dSMiao Xie 			break;
14780b86a832SChris Mason 
1479962a298fSDavid Sterba 		if (key.type != BTRFS_DEV_EXTENT_KEY)
14800b86a832SChris Mason 			goto next;
14810b86a832SChris Mason 
14827bfc837dSMiao Xie 		if (key.offset > search_start) {
14837bfc837dSMiao Xie 			hole_size = key.offset - search_start;
14847bfc837dSMiao Xie 
14856df9a95eSJosef Bacik 			/*
14866df9a95eSJosef Bacik 			 * Have to check before we set max_hole_start, otherwise
14876df9a95eSJosef Bacik 			 * we could end up sending back this offset anyway.
14886df9a95eSJosef Bacik 			 */
1489499f377fSJeff Mahoney 			if (contains_pending_extent(transaction, device,
14906df9a95eSJosef Bacik 						    &search_start,
14911b984508SForrest Liu 						    hole_size)) {
14921b984508SForrest Liu 				if (key.offset >= search_start) {
14931b984508SForrest Liu 					hole_size = key.offset - search_start;
14941b984508SForrest Liu 				} else {
14951b984508SForrest Liu 					WARN_ON_ONCE(1);
14966df9a95eSJosef Bacik 					hole_size = 0;
14971b984508SForrest Liu 				}
14981b984508SForrest Liu 			}
14996df9a95eSJosef Bacik 
15007bfc837dSMiao Xie 			if (hole_size > max_hole_size) {
15017bfc837dSMiao Xie 				max_hole_start = search_start;
15027bfc837dSMiao Xie 				max_hole_size = hole_size;
15037bfc837dSMiao Xie 			}
15047bfc837dSMiao Xie 
15057bfc837dSMiao Xie 			/*
15067bfc837dSMiao Xie 			 * If this free space is greater than which we need,
15077bfc837dSMiao Xie 			 * it must be the max free space that we have found
15087bfc837dSMiao Xie 			 * until now, so max_hole_start must point to the start
15097bfc837dSMiao Xie 			 * of this free space and the length of this free space
15107bfc837dSMiao Xie 			 * is stored in max_hole_size. Thus, we return
15117bfc837dSMiao Xie 			 * max_hole_start and max_hole_size and go back to the
15127bfc837dSMiao Xie 			 * caller.
15137bfc837dSMiao Xie 			 */
15147bfc837dSMiao Xie 			if (hole_size >= num_bytes) {
15157bfc837dSMiao Xie 				ret = 0;
15167bfc837dSMiao Xie 				goto out;
15177bfc837dSMiao Xie 			}
15187bfc837dSMiao Xie 		}
15197bfc837dSMiao Xie 
15200b86a832SChris Mason 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
15217bfc837dSMiao Xie 		extent_end = key.offset + btrfs_dev_extent_length(l,
15227bfc837dSMiao Xie 								  dev_extent);
15237bfc837dSMiao Xie 		if (extent_end > search_start)
15247bfc837dSMiao Xie 			search_start = extent_end;
15250b86a832SChris Mason next:
15260b86a832SChris Mason 		path->slots[0]++;
15270b86a832SChris Mason 		cond_resched();
15280b86a832SChris Mason 	}
15290b86a832SChris Mason 
153038c01b96Sliubo 	/*
153138c01b96Sliubo 	 * At this point, search_start should be the end of
153238c01b96Sliubo 	 * allocated dev extents, and when shrinking the device,
153338c01b96Sliubo 	 * search_end may be smaller than search_start.
153438c01b96Sliubo 	 */
1535f2ab7618SZhao Lei 	if (search_end > search_start) {
15367bfc837dSMiao Xie 		hole_size = search_end - search_start;
153738c01b96Sliubo 
1538499f377fSJeff Mahoney 		if (contains_pending_extent(transaction, device, &search_start,
1539f2ab7618SZhao Lei 					    hole_size)) {
1540f2ab7618SZhao Lei 			btrfs_release_path(path);
1541f2ab7618SZhao Lei 			goto again;
1542f2ab7618SZhao Lei 		}
1543f2ab7618SZhao Lei 
15447bfc837dSMiao Xie 		if (hole_size > max_hole_size) {
15457bfc837dSMiao Xie 			max_hole_start = search_start;
15467bfc837dSMiao Xie 			max_hole_size = hole_size;
15470b86a832SChris Mason 		}
15486df9a95eSJosef Bacik 	}
15496df9a95eSJosef Bacik 
15507bfc837dSMiao Xie 	/* See above. */
1551f2ab7618SZhao Lei 	if (max_hole_size < num_bytes)
15527bfc837dSMiao Xie 		ret = -ENOSPC;
15537bfc837dSMiao Xie 	else
15542b82032cSYan Zheng 		ret = 0;
15550b86a832SChris Mason 
15567bfc837dSMiao Xie out:
15572b82032cSYan Zheng 	btrfs_free_path(path);
15587bfc837dSMiao Xie 	*start = max_hole_start;
1559b2117a39SMiao Xie 	if (len)
15607bfc837dSMiao Xie 		*len = max_hole_size;
15610b86a832SChris Mason 	return ret;
15620b86a832SChris Mason }
15630b86a832SChris Mason 
1564499f377fSJeff Mahoney int find_free_dev_extent(struct btrfs_trans_handle *trans,
1565499f377fSJeff Mahoney 			 struct btrfs_device *device, u64 num_bytes,
1566499f377fSJeff Mahoney 			 u64 *start, u64 *len)
1567499f377fSJeff Mahoney {
1568499f377fSJeff Mahoney 	/* FIXME use last free of some kind */
1569499f377fSJeff Mahoney 	return find_free_dev_extent_start(trans->transaction, device,
15708cdc7c5bSFilipe Manana 					  num_bytes, 0, start, len);
1571499f377fSJeff Mahoney }
1572499f377fSJeff Mahoney 
1573b2950863SChristoph Hellwig static int btrfs_free_dev_extent(struct btrfs_trans_handle *trans,
15748f18cf13SChris Mason 			  struct btrfs_device *device,
15752196d6e8SMiao Xie 			  u64 start, u64 *dev_extent_len)
15768f18cf13SChris Mason {
15770b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
15780b246afaSJeff Mahoney 	struct btrfs_root *root = fs_info->dev_root;
15798f18cf13SChris Mason 	int ret;
15808f18cf13SChris Mason 	struct btrfs_path *path;
15818f18cf13SChris Mason 	struct btrfs_key key;
1582a061fc8dSChris Mason 	struct btrfs_key found_key;
1583a061fc8dSChris Mason 	struct extent_buffer *leaf = NULL;
1584a061fc8dSChris Mason 	struct btrfs_dev_extent *extent = NULL;
15858f18cf13SChris Mason 
15868f18cf13SChris Mason 	path = btrfs_alloc_path();
15878f18cf13SChris Mason 	if (!path)
15888f18cf13SChris Mason 		return -ENOMEM;
15898f18cf13SChris Mason 
15908f18cf13SChris Mason 	key.objectid = device->devid;
15918f18cf13SChris Mason 	key.offset = start;
15928f18cf13SChris Mason 	key.type = BTRFS_DEV_EXTENT_KEY;
1593924cd8fbSMiao Xie again:
15948f18cf13SChris Mason 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
1595a061fc8dSChris Mason 	if (ret > 0) {
1596a061fc8dSChris Mason 		ret = btrfs_previous_item(root, path, key.objectid,
1597a061fc8dSChris Mason 					  BTRFS_DEV_EXTENT_KEY);
1598b0b802d7STsutomu Itoh 		if (ret)
1599b0b802d7STsutomu Itoh 			goto out;
1600a061fc8dSChris Mason 		leaf = path->nodes[0];
1601a061fc8dSChris Mason 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1602a061fc8dSChris Mason 		extent = btrfs_item_ptr(leaf, path->slots[0],
1603a061fc8dSChris Mason 					struct btrfs_dev_extent);
1604a061fc8dSChris Mason 		BUG_ON(found_key.offset > start || found_key.offset +
1605a061fc8dSChris Mason 		       btrfs_dev_extent_length(leaf, extent) < start);
1606924cd8fbSMiao Xie 		key = found_key;
1607924cd8fbSMiao Xie 		btrfs_release_path(path);
1608924cd8fbSMiao Xie 		goto again;
1609a061fc8dSChris Mason 	} else if (ret == 0) {
1610a061fc8dSChris Mason 		leaf = path->nodes[0];
1611a061fc8dSChris Mason 		extent = btrfs_item_ptr(leaf, path->slots[0],
1612a061fc8dSChris Mason 					struct btrfs_dev_extent);
161379787eaaSJeff Mahoney 	} else {
16140b246afaSJeff Mahoney 		btrfs_handle_fs_error(fs_info, ret, "Slot search failed");
161579787eaaSJeff Mahoney 		goto out;
1616a061fc8dSChris Mason 	}
16178f18cf13SChris Mason 
16182196d6e8SMiao Xie 	*dev_extent_len = btrfs_dev_extent_length(leaf, extent);
16192196d6e8SMiao Xie 
16208f18cf13SChris Mason 	ret = btrfs_del_item(trans, root, path);
162179787eaaSJeff Mahoney 	if (ret) {
16220b246afaSJeff Mahoney 		btrfs_handle_fs_error(fs_info, ret,
162379787eaaSJeff Mahoney 				      "Failed to remove dev extent item");
162413212b54SZhao Lei 	} else {
16253204d33cSJosef Bacik 		set_bit(BTRFS_TRANS_HAVE_FREE_BGS, &trans->transaction->flags);
162679787eaaSJeff Mahoney 	}
1627b0b802d7STsutomu Itoh out:
16288f18cf13SChris Mason 	btrfs_free_path(path);
16298f18cf13SChris Mason 	return ret;
16308f18cf13SChris Mason }
16318f18cf13SChris Mason 
163248a3b636SEric Sandeen static int btrfs_alloc_dev_extent(struct btrfs_trans_handle *trans,
16330b86a832SChris Mason 				  struct btrfs_device *device,
16342b82032cSYan Zheng 				  u64 chunk_offset, u64 start, u64 num_bytes)
16350b86a832SChris Mason {
16360b86a832SChris Mason 	int ret;
16370b86a832SChris Mason 	struct btrfs_path *path;
16380b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
16390b246afaSJeff Mahoney 	struct btrfs_root *root = fs_info->dev_root;
16400b86a832SChris Mason 	struct btrfs_dev_extent *extent;
16410b86a832SChris Mason 	struct extent_buffer *leaf;
16420b86a832SChris Mason 	struct btrfs_key key;
16430b86a832SChris Mason 
1644e12c9621SAnand Jain 	WARN_ON(!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state));
1645401e29c1SAnand Jain 	WARN_ON(test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state));
16460b86a832SChris Mason 	path = btrfs_alloc_path();
16470b86a832SChris Mason 	if (!path)
16480b86a832SChris Mason 		return -ENOMEM;
16490b86a832SChris Mason 
16500b86a832SChris Mason 	key.objectid = device->devid;
16512b82032cSYan Zheng 	key.offset = start;
16520b86a832SChris Mason 	key.type = BTRFS_DEV_EXTENT_KEY;
16530b86a832SChris Mason 	ret = btrfs_insert_empty_item(trans, root, path, &key,
16540b86a832SChris Mason 				      sizeof(*extent));
16552cdcecbcSMark Fasheh 	if (ret)
16562cdcecbcSMark Fasheh 		goto out;
16570b86a832SChris Mason 
16580b86a832SChris Mason 	leaf = path->nodes[0];
16590b86a832SChris Mason 	extent = btrfs_item_ptr(leaf, path->slots[0],
16600b86a832SChris Mason 				struct btrfs_dev_extent);
1661b5d9071cSNikolay Borisov 	btrfs_set_dev_extent_chunk_tree(leaf, extent,
1662b5d9071cSNikolay Borisov 					BTRFS_CHUNK_TREE_OBJECTID);
16630ca00afbSNikolay Borisov 	btrfs_set_dev_extent_chunk_objectid(leaf, extent,
16640ca00afbSNikolay Borisov 					    BTRFS_FIRST_CHUNK_TREE_OBJECTID);
1665e17cade2SChris Mason 	btrfs_set_dev_extent_chunk_offset(leaf, extent, chunk_offset);
1666e17cade2SChris Mason 
16670b86a832SChris Mason 	btrfs_set_dev_extent_length(leaf, extent, num_bytes);
16680b86a832SChris Mason 	btrfs_mark_buffer_dirty(leaf);
16692cdcecbcSMark Fasheh out:
16700b86a832SChris Mason 	btrfs_free_path(path);
16710b86a832SChris Mason 	return ret;
16720b86a832SChris Mason }
16730b86a832SChris Mason 
16746df9a95eSJosef Bacik static u64 find_next_chunk(struct btrfs_fs_info *fs_info)
16750b86a832SChris Mason {
16766df9a95eSJosef Bacik 	struct extent_map_tree *em_tree;
16776df9a95eSJosef Bacik 	struct extent_map *em;
16786df9a95eSJosef Bacik 	struct rb_node *n;
16796df9a95eSJosef Bacik 	u64 ret = 0;
16800b86a832SChris Mason 
16816df9a95eSJosef Bacik 	em_tree = &fs_info->mapping_tree.map_tree;
16826df9a95eSJosef Bacik 	read_lock(&em_tree->lock);
16836df9a95eSJosef Bacik 	n = rb_last(&em_tree->map);
16846df9a95eSJosef Bacik 	if (n) {
16856df9a95eSJosef Bacik 		em = rb_entry(n, struct extent_map, rb_node);
16866df9a95eSJosef Bacik 		ret = em->start + em->len;
1687e17cade2SChris Mason 	}
16886df9a95eSJosef Bacik 	read_unlock(&em_tree->lock);
16896df9a95eSJosef Bacik 
16900b86a832SChris Mason 	return ret;
16910b86a832SChris Mason }
16920b86a832SChris Mason 
169353f10659SIlya Dryomov static noinline int find_next_devid(struct btrfs_fs_info *fs_info,
169453f10659SIlya Dryomov 				    u64 *devid_ret)
16950b86a832SChris Mason {
16960b86a832SChris Mason 	int ret;
16970b86a832SChris Mason 	struct btrfs_key key;
16980b86a832SChris Mason 	struct btrfs_key found_key;
16992b82032cSYan Zheng 	struct btrfs_path *path;
17002b82032cSYan Zheng 
17012b82032cSYan Zheng 	path = btrfs_alloc_path();
17022b82032cSYan Zheng 	if (!path)
17032b82032cSYan Zheng 		return -ENOMEM;
17040b86a832SChris Mason 
17050b86a832SChris Mason 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
17060b86a832SChris Mason 	key.type = BTRFS_DEV_ITEM_KEY;
17070b86a832SChris Mason 	key.offset = (u64)-1;
17080b86a832SChris Mason 
170953f10659SIlya Dryomov 	ret = btrfs_search_slot(NULL, fs_info->chunk_root, &key, path, 0, 0);
17100b86a832SChris Mason 	if (ret < 0)
17110b86a832SChris Mason 		goto error;
17120b86a832SChris Mason 
171379787eaaSJeff Mahoney 	BUG_ON(ret == 0); /* Corruption */
17140b86a832SChris Mason 
171553f10659SIlya Dryomov 	ret = btrfs_previous_item(fs_info->chunk_root, path,
171653f10659SIlya Dryomov 				  BTRFS_DEV_ITEMS_OBJECTID,
17170b86a832SChris Mason 				  BTRFS_DEV_ITEM_KEY);
17180b86a832SChris Mason 	if (ret) {
171953f10659SIlya Dryomov 		*devid_ret = 1;
17200b86a832SChris Mason 	} else {
17210b86a832SChris Mason 		btrfs_item_key_to_cpu(path->nodes[0], &found_key,
17220b86a832SChris Mason 				      path->slots[0]);
172353f10659SIlya Dryomov 		*devid_ret = found_key.offset + 1;
17240b86a832SChris Mason 	}
17250b86a832SChris Mason 	ret = 0;
17260b86a832SChris Mason error:
17272b82032cSYan Zheng 	btrfs_free_path(path);
17280b86a832SChris Mason 	return ret;
17290b86a832SChris Mason }
17300b86a832SChris Mason 
17310b86a832SChris Mason /*
17320b86a832SChris Mason  * the device information is stored in the chunk root
17330b86a832SChris Mason  * the btrfs_device struct should be fully filled in
17340b86a832SChris Mason  */
1735c74a0b02SAnand Jain static int btrfs_add_dev_item(struct btrfs_trans_handle *trans,
17365b4aacefSJeff Mahoney 			    struct btrfs_fs_info *fs_info,
17370b86a832SChris Mason 			    struct btrfs_device *device)
17380b86a832SChris Mason {
17395b4aacefSJeff Mahoney 	struct btrfs_root *root = fs_info->chunk_root;
17400b86a832SChris Mason 	int ret;
17410b86a832SChris Mason 	struct btrfs_path *path;
17420b86a832SChris Mason 	struct btrfs_dev_item *dev_item;
17430b86a832SChris Mason 	struct extent_buffer *leaf;
17440b86a832SChris Mason 	struct btrfs_key key;
17450b86a832SChris Mason 	unsigned long ptr;
17460b86a832SChris Mason 
17470b86a832SChris Mason 	path = btrfs_alloc_path();
17480b86a832SChris Mason 	if (!path)
17490b86a832SChris Mason 		return -ENOMEM;
17500b86a832SChris Mason 
17510b86a832SChris Mason 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
17520b86a832SChris Mason 	key.type = BTRFS_DEV_ITEM_KEY;
17532b82032cSYan Zheng 	key.offset = device->devid;
17540b86a832SChris Mason 
17550b86a832SChris Mason 	ret = btrfs_insert_empty_item(trans, root, path, &key,
17560d81ba5dSChris Mason 				      sizeof(*dev_item));
17570b86a832SChris Mason 	if (ret)
17580b86a832SChris Mason 		goto out;
17590b86a832SChris Mason 
17600b86a832SChris Mason 	leaf = path->nodes[0];
17610b86a832SChris Mason 	dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
17620b86a832SChris Mason 
17630b86a832SChris Mason 	btrfs_set_device_id(leaf, dev_item, device->devid);
17642b82032cSYan Zheng 	btrfs_set_device_generation(leaf, dev_item, 0);
17650b86a832SChris Mason 	btrfs_set_device_type(leaf, dev_item, device->type);
17660b86a832SChris Mason 	btrfs_set_device_io_align(leaf, dev_item, device->io_align);
17670b86a832SChris Mason 	btrfs_set_device_io_width(leaf, dev_item, device->io_width);
17680b86a832SChris Mason 	btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
17697cc8e58dSMiao Xie 	btrfs_set_device_total_bytes(leaf, dev_item,
17707cc8e58dSMiao Xie 				     btrfs_device_get_disk_total_bytes(device));
17717cc8e58dSMiao Xie 	btrfs_set_device_bytes_used(leaf, dev_item,
17727cc8e58dSMiao Xie 				    btrfs_device_get_bytes_used(device));
1773e17cade2SChris Mason 	btrfs_set_device_group(leaf, dev_item, 0);
1774e17cade2SChris Mason 	btrfs_set_device_seek_speed(leaf, dev_item, 0);
1775e17cade2SChris Mason 	btrfs_set_device_bandwidth(leaf, dev_item, 0);
1776c3027eb5SChris Mason 	btrfs_set_device_start_offset(leaf, dev_item, 0);
17770b86a832SChris Mason 
1778410ba3a2SGeert Uytterhoeven 	ptr = btrfs_device_uuid(dev_item);
1779e17cade2SChris Mason 	write_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
17801473b24eSGeert Uytterhoeven 	ptr = btrfs_device_fsid(dev_item);
178144880fdcSAnand Jain 	write_extent_buffer(leaf, fs_info->fsid, ptr, BTRFS_FSID_SIZE);
17820b86a832SChris Mason 	btrfs_mark_buffer_dirty(leaf);
17830b86a832SChris Mason 
17842b82032cSYan Zheng 	ret = 0;
17850b86a832SChris Mason out:
17860b86a832SChris Mason 	btrfs_free_path(path);
17870b86a832SChris Mason 	return ret;
17880b86a832SChris Mason }
17898f18cf13SChris Mason 
17905a1972bdSQu Wenruo /*
17915a1972bdSQu Wenruo  * Function to update ctime/mtime for a given device path.
17925a1972bdSQu Wenruo  * Mainly used for ctime/mtime based probe like libblkid.
17935a1972bdSQu Wenruo  */
1794da353f6bSDavid Sterba static void update_dev_time(const char *path_name)
17955a1972bdSQu Wenruo {
17965a1972bdSQu Wenruo 	struct file *filp;
17975a1972bdSQu Wenruo 
17985a1972bdSQu Wenruo 	filp = filp_open(path_name, O_RDWR, 0);
179998af592fSAl Viro 	if (IS_ERR(filp))
18005a1972bdSQu Wenruo 		return;
18015a1972bdSQu Wenruo 	file_update_time(filp);
18025a1972bdSQu Wenruo 	filp_close(filp, NULL);
18035a1972bdSQu Wenruo }
18045a1972bdSQu Wenruo 
18055b4aacefSJeff Mahoney static int btrfs_rm_dev_item(struct btrfs_fs_info *fs_info,
1806a061fc8dSChris Mason 			     struct btrfs_device *device)
1807a061fc8dSChris Mason {
18085b4aacefSJeff Mahoney 	struct btrfs_root *root = fs_info->chunk_root;
1809a061fc8dSChris Mason 	int ret;
1810a061fc8dSChris Mason 	struct btrfs_path *path;
1811a061fc8dSChris Mason 	struct btrfs_key key;
1812a061fc8dSChris Mason 	struct btrfs_trans_handle *trans;
1813a061fc8dSChris Mason 
1814a061fc8dSChris Mason 	path = btrfs_alloc_path();
1815a061fc8dSChris Mason 	if (!path)
1816a061fc8dSChris Mason 		return -ENOMEM;
1817a061fc8dSChris Mason 
1818a22285a6SYan, Zheng 	trans = btrfs_start_transaction(root, 0);
181998d5dc13STsutomu Itoh 	if (IS_ERR(trans)) {
182098d5dc13STsutomu Itoh 		btrfs_free_path(path);
182198d5dc13STsutomu Itoh 		return PTR_ERR(trans);
182298d5dc13STsutomu Itoh 	}
1823a061fc8dSChris Mason 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
1824a061fc8dSChris Mason 	key.type = BTRFS_DEV_ITEM_KEY;
1825a061fc8dSChris Mason 	key.offset = device->devid;
1826a061fc8dSChris Mason 
1827a061fc8dSChris Mason 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
18285e9f2ad5SNikolay Borisov 	if (ret) {
18295e9f2ad5SNikolay Borisov 		if (ret > 0)
1830a061fc8dSChris Mason 			ret = -ENOENT;
18315e9f2ad5SNikolay Borisov 		btrfs_abort_transaction(trans, ret);
18325e9f2ad5SNikolay Borisov 		btrfs_end_transaction(trans);
1833a061fc8dSChris Mason 		goto out;
1834a061fc8dSChris Mason 	}
1835a061fc8dSChris Mason 
1836a061fc8dSChris Mason 	ret = btrfs_del_item(trans, root, path);
18375e9f2ad5SNikolay Borisov 	if (ret) {
18385e9f2ad5SNikolay Borisov 		btrfs_abort_transaction(trans, ret);
18395e9f2ad5SNikolay Borisov 		btrfs_end_transaction(trans);
18405e9f2ad5SNikolay Borisov 	}
18415e9f2ad5SNikolay Borisov 
1842a061fc8dSChris Mason out:
1843a061fc8dSChris Mason 	btrfs_free_path(path);
18445e9f2ad5SNikolay Borisov 	if (!ret)
18455e9f2ad5SNikolay Borisov 		ret = btrfs_commit_transaction(trans);
1846a061fc8dSChris Mason 	return ret;
1847a061fc8dSChris Mason }
1848a061fc8dSChris Mason 
18493cc31a0dSDavid Sterba /*
18503cc31a0dSDavid Sterba  * Verify that @num_devices satisfies the RAID profile constraints in the whole
18513cc31a0dSDavid Sterba  * filesystem. It's up to the caller to adjust that number regarding eg. device
18523cc31a0dSDavid Sterba  * replace.
18533cc31a0dSDavid Sterba  */
18543cc31a0dSDavid Sterba static int btrfs_check_raid_min_devices(struct btrfs_fs_info *fs_info,
18553cc31a0dSDavid Sterba 		u64 num_devices)
1856a061fc8dSChris Mason {
1857a061fc8dSChris Mason 	u64 all_avail;
1858de98ced9SMiao Xie 	unsigned seq;
1859418775a2SDavid Sterba 	int i;
1860a061fc8dSChris Mason 
1861de98ced9SMiao Xie 	do {
1862bd45ffbcSAnand Jain 		seq = read_seqbegin(&fs_info->profiles_lock);
1863de98ced9SMiao Xie 
1864bd45ffbcSAnand Jain 		all_avail = fs_info->avail_data_alloc_bits |
1865bd45ffbcSAnand Jain 			    fs_info->avail_system_alloc_bits |
1866bd45ffbcSAnand Jain 			    fs_info->avail_metadata_alloc_bits;
1867bd45ffbcSAnand Jain 	} while (read_seqretry(&fs_info->profiles_lock, seq));
1868f1fa7f26SAnand Jain 
1869418775a2SDavid Sterba 	for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
1870418775a2SDavid Sterba 		if (!(all_avail & btrfs_raid_group[i]))
1871418775a2SDavid Sterba 			continue;
1872a061fc8dSChris Mason 
1873418775a2SDavid Sterba 		if (num_devices < btrfs_raid_array[i].devs_min) {
1874418775a2SDavid Sterba 			int ret = btrfs_raid_mindev_error[i];
1875a061fc8dSChris Mason 
1876418775a2SDavid Sterba 			if (ret)
1877418775a2SDavid Sterba 				return ret;
187853b381b3SDavid Woodhouse 		}
1879bd45ffbcSAnand Jain 	}
1880bd45ffbcSAnand Jain 
1881bd45ffbcSAnand Jain 	return 0;
1882f1fa7f26SAnand Jain }
1883f1fa7f26SAnand Jain 
1884c9162bdfSOmar Sandoval static struct btrfs_device * btrfs_find_next_active_device(
1885c9162bdfSOmar Sandoval 		struct btrfs_fs_devices *fs_devs, struct btrfs_device *device)
188688acff64SAnand Jain {
188788acff64SAnand Jain 	struct btrfs_device *next_device;
188888acff64SAnand Jain 
188988acff64SAnand Jain 	list_for_each_entry(next_device, &fs_devs->devices, dev_list) {
189088acff64SAnand Jain 		if (next_device != device &&
1891e6e674bdSAnand Jain 		    !test_bit(BTRFS_DEV_STATE_MISSING, &next_device->dev_state)
1892e6e674bdSAnand Jain 		    && next_device->bdev)
189388acff64SAnand Jain 			return next_device;
189488acff64SAnand Jain 	}
189588acff64SAnand Jain 
189688acff64SAnand Jain 	return NULL;
189788acff64SAnand Jain }
189888acff64SAnand Jain 
189988acff64SAnand Jain /*
190088acff64SAnand Jain  * Helper function to check if the given device is part of s_bdev / latest_bdev
190188acff64SAnand Jain  * and replace it with the provided or the next active device, in the context
190288acff64SAnand Jain  * where this function called, there should be always be another device (or
190388acff64SAnand Jain  * this_dev) which is active.
190488acff64SAnand Jain  */
190588acff64SAnand Jain void btrfs_assign_next_active_device(struct btrfs_fs_info *fs_info,
190688acff64SAnand Jain 		struct btrfs_device *device, struct btrfs_device *this_dev)
190788acff64SAnand Jain {
190888acff64SAnand Jain 	struct btrfs_device *next_device;
190988acff64SAnand Jain 
191088acff64SAnand Jain 	if (this_dev)
191188acff64SAnand Jain 		next_device = this_dev;
191288acff64SAnand Jain 	else
191388acff64SAnand Jain 		next_device = btrfs_find_next_active_device(fs_info->fs_devices,
191488acff64SAnand Jain 								device);
191588acff64SAnand Jain 	ASSERT(next_device);
191688acff64SAnand Jain 
191788acff64SAnand Jain 	if (fs_info->sb->s_bdev &&
191888acff64SAnand Jain 			(fs_info->sb->s_bdev == device->bdev))
191988acff64SAnand Jain 		fs_info->sb->s_bdev = next_device->bdev;
192088acff64SAnand Jain 
192188acff64SAnand Jain 	if (fs_info->fs_devices->latest_bdev == device->bdev)
192288acff64SAnand Jain 		fs_info->fs_devices->latest_bdev = next_device->bdev;
192388acff64SAnand Jain }
192488acff64SAnand Jain 
1925da353f6bSDavid Sterba int btrfs_rm_device(struct btrfs_fs_info *fs_info, const char *device_path,
1926da353f6bSDavid Sterba 		u64 devid)
1927f1fa7f26SAnand Jain {
1928f1fa7f26SAnand Jain 	struct btrfs_device *device;
1929f1fa7f26SAnand Jain 	struct btrfs_fs_devices *cur_devices;
1930f1fa7f26SAnand Jain 	u64 num_devices;
1931f1fa7f26SAnand Jain 	int ret = 0;
1932f1fa7f26SAnand Jain 
19332c997384SAnand Jain 	mutex_lock(&fs_info->volume_mutex);
1934f1fa7f26SAnand Jain 	mutex_lock(&uuid_mutex);
1935a061fc8dSChris Mason 
19360b246afaSJeff Mahoney 	num_devices = fs_info->fs_devices->num_devices;
19370b246afaSJeff Mahoney 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
19380b246afaSJeff Mahoney 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
1939a061fc8dSChris Mason 		WARN_ON(num_devices < 1);
1940a061fc8dSChris Mason 		num_devices--;
1941a061fc8dSChris Mason 	}
19420b246afaSJeff Mahoney 	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
1943a061fc8dSChris Mason 
19440b246afaSJeff Mahoney 	ret = btrfs_check_raid_min_devices(fs_info, num_devices - 1);
1945beaf8ab3SStefan Behrens 	if (ret)
1946a061fc8dSChris Mason 		goto out;
1947f1fa7f26SAnand Jain 
19482ff7e61eSJeff Mahoney 	ret = btrfs_find_device_by_devspec(fs_info, devid, device_path,
194924fc572fSAnand Jain 					   &device);
1950a061fc8dSChris Mason 	if (ret)
1951a061fc8dSChris Mason 		goto out;
19522b82032cSYan Zheng 
1953401e29c1SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
1954183860f6SAnand Jain 		ret = BTRFS_ERROR_DEV_TGT_REPLACE;
195524fc572fSAnand Jain 		goto out;
195663a212abSStefan Behrens 	}
195763a212abSStefan Behrens 
1958ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
1959ebbede42SAnand Jain 	    fs_info->fs_devices->rw_devices == 1) {
1960183860f6SAnand Jain 		ret = BTRFS_ERROR_DEV_ONLY_WRITABLE;
196124fc572fSAnand Jain 		goto out;
19622b82032cSYan Zheng 	}
19632b82032cSYan Zheng 
1964ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
196534441361SDavid Sterba 		mutex_lock(&fs_info->chunk_mutex);
19662b82032cSYan Zheng 		list_del_init(&device->dev_alloc_list);
1967c3929c36SMiao Xie 		device->fs_devices->rw_devices--;
196834441361SDavid Sterba 		mutex_unlock(&fs_info->chunk_mutex);
19692b82032cSYan Zheng 	}
1970a061fc8dSChris Mason 
1971d7901554SCarey Underwood 	mutex_unlock(&uuid_mutex);
1972a061fc8dSChris Mason 	ret = btrfs_shrink_device(device, 0);
1973d7901554SCarey Underwood 	mutex_lock(&uuid_mutex);
1974a061fc8dSChris Mason 	if (ret)
19759b3517e9SIlya Dryomov 		goto error_undo;
1976a061fc8dSChris Mason 
197763a212abSStefan Behrens 	/*
197863a212abSStefan Behrens 	 * TODO: the superblock still includes this device in its num_devices
197963a212abSStefan Behrens 	 * counter although write_all_supers() is not locked out. This
198063a212abSStefan Behrens 	 * could give a filesystem state which requires a degraded mount.
198163a212abSStefan Behrens 	 */
19820b246afaSJeff Mahoney 	ret = btrfs_rm_dev_item(fs_info, device);
1983a061fc8dSChris Mason 	if (ret)
19849b3517e9SIlya Dryomov 		goto error_undo;
1985a061fc8dSChris Mason 
1986e12c9621SAnand Jain 	clear_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
19870b246afaSJeff Mahoney 	btrfs_scrub_cancel_dev(fs_info, device);
1988e5e9a520SChris Mason 
1989e5e9a520SChris Mason 	/*
1990e5e9a520SChris Mason 	 * the device list mutex makes sure that we don't change
1991e5e9a520SChris Mason 	 * the device list while someone else is writing out all
1992d7306801SFilipe David Borba Manana 	 * the device supers. Whoever is writing all supers, should
1993d7306801SFilipe David Borba Manana 	 * lock the device list mutex before getting the number of
1994d7306801SFilipe David Borba Manana 	 * devices in the super block (super_copy). Conversely,
1995d7306801SFilipe David Borba Manana 	 * whoever updates the number of devices in the super block
1996d7306801SFilipe David Borba Manana 	 * (super_copy) should hold the device list mutex.
1997e5e9a520SChris Mason 	 */
19981f78160cSXiao Guangrong 
19991f78160cSXiao Guangrong 	cur_devices = device->fs_devices;
20000b246afaSJeff Mahoney 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
20011f78160cSXiao Guangrong 	list_del_rcu(&device->dev_list);
2002e5e9a520SChris Mason 
2003e4404d6eSYan Zheng 	device->fs_devices->num_devices--;
200402db0844SJosef Bacik 	device->fs_devices->total_devices--;
20052b82032cSYan Zheng 
2006e6e674bdSAnand Jain 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
20073a7d55c8SMiao Xie 		device->fs_devices->missing_devices--;
2008cd02dca5SChris Mason 
20090b246afaSJeff Mahoney 	btrfs_assign_next_active_device(fs_info, device, NULL);
20102b82032cSYan Zheng 
20110bfaa9c5SEric Sandeen 	if (device->bdev) {
20122b82032cSYan Zheng 		device->fs_devices->open_devices--;
201399994cdeSAnand Jain 		/* remove sysfs entry */
20140b246afaSJeff Mahoney 		btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
20150bfaa9c5SEric Sandeen 	}
201699994cdeSAnand Jain 
20170b246afaSJeff Mahoney 	num_devices = btrfs_super_num_devices(fs_info->super_copy) - 1;
20180b246afaSJeff Mahoney 	btrfs_set_super_num_devices(fs_info->super_copy, num_devices);
20190b246afaSJeff Mahoney 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2020e4404d6eSYan Zheng 
2021cea67ab9SJeff Mahoney 	/*
2022cea67ab9SJeff Mahoney 	 * at this point, the device is zero sized and detached from
2023cea67ab9SJeff Mahoney 	 * the devices list.  All that's left is to zero out the old
2024cea67ab9SJeff Mahoney 	 * supers and free the device.
2025cea67ab9SJeff Mahoney 	 */
2026ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
2027cea67ab9SJeff Mahoney 		btrfs_scratch_superblocks(device->bdev, device->name->str);
2028cea67ab9SJeff Mahoney 
2029cea67ab9SJeff Mahoney 	btrfs_close_bdev(device);
2030f06c5965SDavid Sterba 	call_rcu(&device->rcu, free_device_rcu);
2031cea67ab9SJeff Mahoney 
20321f78160cSXiao Guangrong 	if (cur_devices->open_devices == 0) {
20332b82032cSYan Zheng 		struct btrfs_fs_devices *fs_devices;
20340b246afaSJeff Mahoney 		fs_devices = fs_info->fs_devices;
20352b82032cSYan Zheng 		while (fs_devices) {
20368321cf25SRickard Strandqvist 			if (fs_devices->seed == cur_devices) {
20378321cf25SRickard Strandqvist 				fs_devices->seed = cur_devices->seed;
20382b82032cSYan Zheng 				break;
20398321cf25SRickard Strandqvist 			}
20402b82032cSYan Zheng 			fs_devices = fs_devices->seed;
20412b82032cSYan Zheng 		}
20421f78160cSXiao Guangrong 		cur_devices->seed = NULL;
20431f78160cSXiao Guangrong 		__btrfs_close_devices(cur_devices);
20441f78160cSXiao Guangrong 		free_fs_devices(cur_devices);
20452b82032cSYan Zheng 	}
20462b82032cSYan Zheng 
2047a061fc8dSChris Mason out:
2048a061fc8dSChris Mason 	mutex_unlock(&uuid_mutex);
20492c997384SAnand Jain 	mutex_unlock(&fs_info->volume_mutex);
2050a061fc8dSChris Mason 	return ret;
205124fc572fSAnand Jain 
20529b3517e9SIlya Dryomov error_undo:
2053ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
205434441361SDavid Sterba 		mutex_lock(&fs_info->chunk_mutex);
20559b3517e9SIlya Dryomov 		list_add(&device->dev_alloc_list,
20560b246afaSJeff Mahoney 			 &fs_info->fs_devices->alloc_list);
2057c3929c36SMiao Xie 		device->fs_devices->rw_devices++;
205834441361SDavid Sterba 		mutex_unlock(&fs_info->chunk_mutex);
20599b3517e9SIlya Dryomov 	}
206024fc572fSAnand Jain 	goto out;
2061a061fc8dSChris Mason }
2062a061fc8dSChris Mason 
2063084b6e7cSQu Wenruo void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info,
2064e93c89c1SStefan Behrens 					struct btrfs_device *srcdev)
2065e93c89c1SStefan Behrens {
2066d51908ceSAnand Jain 	struct btrfs_fs_devices *fs_devices;
2067d51908ceSAnand Jain 
2068e93c89c1SStefan Behrens 	WARN_ON(!mutex_is_locked(&fs_info->fs_devices->device_list_mutex));
20691357272fSIlya Dryomov 
207025e8e911SAnand Jain 	/*
207125e8e911SAnand Jain 	 * in case of fs with no seed, srcdev->fs_devices will point
207225e8e911SAnand Jain 	 * to fs_devices of fs_info. However when the dev being replaced is
207325e8e911SAnand Jain 	 * a seed dev it will point to the seed's local fs_devices. In short
207425e8e911SAnand Jain 	 * srcdev will have its correct fs_devices in both the cases.
207525e8e911SAnand Jain 	 */
207625e8e911SAnand Jain 	fs_devices = srcdev->fs_devices;
2077d51908ceSAnand Jain 
2078e93c89c1SStefan Behrens 	list_del_rcu(&srcdev->dev_list);
2079619c47f3SDavid Sterba 	list_del(&srcdev->dev_alloc_list);
2080d51908ceSAnand Jain 	fs_devices->num_devices--;
2081e6e674bdSAnand Jain 	if (test_bit(BTRFS_DEV_STATE_MISSING, &srcdev->dev_state))
2082d51908ceSAnand Jain 		fs_devices->missing_devices--;
2083e93c89c1SStefan Behrens 
2084ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state))
208582372bc8SMiao Xie 		fs_devices->rw_devices--;
20861357272fSIlya Dryomov 
208782372bc8SMiao Xie 	if (srcdev->bdev)
208882372bc8SMiao Xie 		fs_devices->open_devices--;
2089084b6e7cSQu Wenruo }
2090084b6e7cSQu Wenruo 
2091084b6e7cSQu Wenruo void btrfs_rm_dev_replace_free_srcdev(struct btrfs_fs_info *fs_info,
2092084b6e7cSQu Wenruo 				      struct btrfs_device *srcdev)
2093084b6e7cSQu Wenruo {
2094084b6e7cSQu Wenruo 	struct btrfs_fs_devices *fs_devices = srcdev->fs_devices;
209582372bc8SMiao Xie 
2096ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &srcdev->dev_state)) {
209748b3b9d4SAnand Jain 		/* zero out the old super if it is writable */
209848b3b9d4SAnand Jain 		btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str);
209948b3b9d4SAnand Jain 	}
210014238819SAnand Jain 
210114238819SAnand Jain 	btrfs_close_bdev(srcdev);
2102f06c5965SDavid Sterba 	call_rcu(&srcdev->rcu, free_device_rcu);
210394d5f0c2SAnand Jain 
210494d5f0c2SAnand Jain 	/* if this is no devs we rather delete the fs_devices */
210594d5f0c2SAnand Jain 	if (!fs_devices->num_devices) {
210694d5f0c2SAnand Jain 		struct btrfs_fs_devices *tmp_fs_devices;
210794d5f0c2SAnand Jain 
21086dd38f81SAnand Jain 		/*
21096dd38f81SAnand Jain 		 * On a mounted FS, num_devices can't be zero unless it's a
21106dd38f81SAnand Jain 		 * seed. In case of a seed device being replaced, the replace
21116dd38f81SAnand Jain 		 * target added to the sprout FS, so there will be no more
21126dd38f81SAnand Jain 		 * device left under the seed FS.
21136dd38f81SAnand Jain 		 */
21146dd38f81SAnand Jain 		ASSERT(fs_devices->seeding);
21156dd38f81SAnand Jain 
211694d5f0c2SAnand Jain 		tmp_fs_devices = fs_info->fs_devices;
211794d5f0c2SAnand Jain 		while (tmp_fs_devices) {
211894d5f0c2SAnand Jain 			if (tmp_fs_devices->seed == fs_devices) {
211994d5f0c2SAnand Jain 				tmp_fs_devices->seed = fs_devices->seed;
212094d5f0c2SAnand Jain 				break;
212194d5f0c2SAnand Jain 			}
212294d5f0c2SAnand Jain 			tmp_fs_devices = tmp_fs_devices->seed;
212394d5f0c2SAnand Jain 		}
212494d5f0c2SAnand Jain 		fs_devices->seed = NULL;
21258bef8401SAnand Jain 		__btrfs_close_devices(fs_devices);
21268bef8401SAnand Jain 		free_fs_devices(fs_devices);
212794d5f0c2SAnand Jain 	}
2128e93c89c1SStefan Behrens }
2129e93c89c1SStefan Behrens 
2130e93c89c1SStefan Behrens void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2131e93c89c1SStefan Behrens 				      struct btrfs_device *tgtdev)
2132e93c89c1SStefan Behrens {
213367a2c45eSMiao Xie 	mutex_lock(&uuid_mutex);
2134e93c89c1SStefan Behrens 	WARN_ON(!tgtdev);
2135e93c89c1SStefan Behrens 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
2136d2ff1b20SAnand Jain 
213732576040SAnand Jain 	btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev);
2138d2ff1b20SAnand Jain 
2139779bf3feSAnand Jain 	if (tgtdev->bdev)
2140e93c89c1SStefan Behrens 		fs_info->fs_devices->open_devices--;
2141779bf3feSAnand Jain 
2142e93c89c1SStefan Behrens 	fs_info->fs_devices->num_devices--;
2143e93c89c1SStefan Behrens 
214488acff64SAnand Jain 	btrfs_assign_next_active_device(fs_info, tgtdev, NULL);
2145e93c89c1SStefan Behrens 
2146e93c89c1SStefan Behrens 	list_del_rcu(&tgtdev->dev_list);
2147e93c89c1SStefan Behrens 
2148e93c89c1SStefan Behrens 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
214967a2c45eSMiao Xie 	mutex_unlock(&uuid_mutex);
2150779bf3feSAnand Jain 
2151779bf3feSAnand Jain 	/*
2152779bf3feSAnand Jain 	 * The update_dev_time() with in btrfs_scratch_superblocks()
2153779bf3feSAnand Jain 	 * may lead to a call to btrfs_show_devname() which will try
2154779bf3feSAnand Jain 	 * to hold device_list_mutex. And here this device
2155779bf3feSAnand Jain 	 * is already out of device list, so we don't have to hold
2156779bf3feSAnand Jain 	 * the device_list_mutex lock.
2157779bf3feSAnand Jain 	 */
2158779bf3feSAnand Jain 	btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str);
215914238819SAnand Jain 
216014238819SAnand Jain 	btrfs_close_bdev(tgtdev);
2161f06c5965SDavid Sterba 	call_rcu(&tgtdev->rcu, free_device_rcu);
2162e93c89c1SStefan Behrens }
2163e93c89c1SStefan Behrens 
21642ff7e61eSJeff Mahoney static int btrfs_find_device_by_path(struct btrfs_fs_info *fs_info,
2165da353f6bSDavid Sterba 				     const char *device_path,
21667ba15b7dSStefan Behrens 				     struct btrfs_device **device)
21677ba15b7dSStefan Behrens {
21687ba15b7dSStefan Behrens 	int ret = 0;
21697ba15b7dSStefan Behrens 	struct btrfs_super_block *disk_super;
21707ba15b7dSStefan Behrens 	u64 devid;
21717ba15b7dSStefan Behrens 	u8 *dev_uuid;
21727ba15b7dSStefan Behrens 	struct block_device *bdev;
21737ba15b7dSStefan Behrens 	struct buffer_head *bh;
21747ba15b7dSStefan Behrens 
21757ba15b7dSStefan Behrens 	*device = NULL;
21767ba15b7dSStefan Behrens 	ret = btrfs_get_bdev_and_sb(device_path, FMODE_READ,
21770b246afaSJeff Mahoney 				    fs_info->bdev_holder, 0, &bdev, &bh);
21787ba15b7dSStefan Behrens 	if (ret)
21797ba15b7dSStefan Behrens 		return ret;
21807ba15b7dSStefan Behrens 	disk_super = (struct btrfs_super_block *)bh->b_data;
21817ba15b7dSStefan Behrens 	devid = btrfs_stack_device_id(&disk_super->dev_item);
21827ba15b7dSStefan Behrens 	dev_uuid = disk_super->dev_item.uuid;
21830b246afaSJeff Mahoney 	*device = btrfs_find_device(fs_info, devid, dev_uuid, disk_super->fsid);
21847ba15b7dSStefan Behrens 	brelse(bh);
21857ba15b7dSStefan Behrens 	if (!*device)
21867ba15b7dSStefan Behrens 		ret = -ENOENT;
21877ba15b7dSStefan Behrens 	blkdev_put(bdev, FMODE_READ);
21887ba15b7dSStefan Behrens 	return ret;
21897ba15b7dSStefan Behrens }
21907ba15b7dSStefan Behrens 
21912ff7e61eSJeff Mahoney int btrfs_find_device_missing_or_by_path(struct btrfs_fs_info *fs_info,
2192da353f6bSDavid Sterba 					 const char *device_path,
21937ba15b7dSStefan Behrens 					 struct btrfs_device **device)
21947ba15b7dSStefan Behrens {
21957ba15b7dSStefan Behrens 	*device = NULL;
21967ba15b7dSStefan Behrens 	if (strcmp(device_path, "missing") == 0) {
21977ba15b7dSStefan Behrens 		struct list_head *devices;
21987ba15b7dSStefan Behrens 		struct btrfs_device *tmp;
21997ba15b7dSStefan Behrens 
22000b246afaSJeff Mahoney 		devices = &fs_info->fs_devices->devices;
22017ba15b7dSStefan Behrens 		/*
22027ba15b7dSStefan Behrens 		 * It is safe to read the devices since the volume_mutex
22037ba15b7dSStefan Behrens 		 * is held by the caller.
22047ba15b7dSStefan Behrens 		 */
22057ba15b7dSStefan Behrens 		list_for_each_entry(tmp, devices, dev_list) {
2206e12c9621SAnand Jain 			if (test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
2207e12c9621SAnand Jain 					&tmp->dev_state) && !tmp->bdev) {
22087ba15b7dSStefan Behrens 				*device = tmp;
22097ba15b7dSStefan Behrens 				break;
22107ba15b7dSStefan Behrens 			}
22117ba15b7dSStefan Behrens 		}
22127ba15b7dSStefan Behrens 
2213d74a6259SAnand Jain 		if (!*device)
2214d74a6259SAnand Jain 			return BTRFS_ERROR_DEV_MISSING_NOT_FOUND;
22157ba15b7dSStefan Behrens 
22167ba15b7dSStefan Behrens 		return 0;
22177ba15b7dSStefan Behrens 	} else {
22182ff7e61eSJeff Mahoney 		return btrfs_find_device_by_path(fs_info, device_path, device);
22197ba15b7dSStefan Behrens 	}
22207ba15b7dSStefan Behrens }
22217ba15b7dSStefan Behrens 
22222b82032cSYan Zheng /*
22235c5c0df0SDavid Sterba  * Lookup a device given by device id, or the path if the id is 0.
22245c5c0df0SDavid Sterba  */
22252ff7e61eSJeff Mahoney int btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info, u64 devid,
2226da353f6bSDavid Sterba 				 const char *devpath,
2227da353f6bSDavid Sterba 				 struct btrfs_device **device)
222824e0474bSAnand Jain {
222924e0474bSAnand Jain 	int ret;
223024e0474bSAnand Jain 
22315c5c0df0SDavid Sterba 	if (devid) {
223224e0474bSAnand Jain 		ret = 0;
22330b246afaSJeff Mahoney 		*device = btrfs_find_device(fs_info, devid, NULL, NULL);
223424e0474bSAnand Jain 		if (!*device)
223524e0474bSAnand Jain 			ret = -ENOENT;
223624e0474bSAnand Jain 	} else {
22375c5c0df0SDavid Sterba 		if (!devpath || !devpath[0])
2238b3d1b153SAnand Jain 			return -EINVAL;
2239b3d1b153SAnand Jain 
22402ff7e61eSJeff Mahoney 		ret = btrfs_find_device_missing_or_by_path(fs_info, devpath,
224124e0474bSAnand Jain 							   device);
224224e0474bSAnand Jain 	}
224324e0474bSAnand Jain 	return ret;
224424e0474bSAnand Jain }
224524e0474bSAnand Jain 
22462b82032cSYan Zheng /*
22472b82032cSYan Zheng  * does all the dirty work required for changing file system's UUID.
22482b82032cSYan Zheng  */
22492ff7e61eSJeff Mahoney static int btrfs_prepare_sprout(struct btrfs_fs_info *fs_info)
22502b82032cSYan Zheng {
22510b246afaSJeff Mahoney 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
22522b82032cSYan Zheng 	struct btrfs_fs_devices *old_devices;
2253e4404d6eSYan Zheng 	struct btrfs_fs_devices *seed_devices;
22540b246afaSJeff Mahoney 	struct btrfs_super_block *disk_super = fs_info->super_copy;
22552b82032cSYan Zheng 	struct btrfs_device *device;
22562b82032cSYan Zheng 	u64 super_flags;
22572b82032cSYan Zheng 
22582b82032cSYan Zheng 	BUG_ON(!mutex_is_locked(&uuid_mutex));
2259e4404d6eSYan Zheng 	if (!fs_devices->seeding)
22602b82032cSYan Zheng 		return -EINVAL;
22612b82032cSYan Zheng 
22622dfeca9bSDavid Sterba 	seed_devices = alloc_fs_devices(NULL);
22632208a378SIlya Dryomov 	if (IS_ERR(seed_devices))
22642208a378SIlya Dryomov 		return PTR_ERR(seed_devices);
22652b82032cSYan Zheng 
2266e4404d6eSYan Zheng 	old_devices = clone_fs_devices(fs_devices);
2267e4404d6eSYan Zheng 	if (IS_ERR(old_devices)) {
2268e4404d6eSYan Zheng 		kfree(seed_devices);
2269e4404d6eSYan Zheng 		return PTR_ERR(old_devices);
22702b82032cSYan Zheng 	}
2271e4404d6eSYan Zheng 
22722b82032cSYan Zheng 	list_add(&old_devices->list, &fs_uuids);
22732b82032cSYan Zheng 
2274e4404d6eSYan Zheng 	memcpy(seed_devices, fs_devices, sizeof(*seed_devices));
2275e4404d6eSYan Zheng 	seed_devices->opened = 1;
2276e4404d6eSYan Zheng 	INIT_LIST_HEAD(&seed_devices->devices);
2277e4404d6eSYan Zheng 	INIT_LIST_HEAD(&seed_devices->alloc_list);
2278e5e9a520SChris Mason 	mutex_init(&seed_devices->device_list_mutex);
2279c9513edbSXiao Guangrong 
22800b246afaSJeff Mahoney 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
22811f78160cSXiao Guangrong 	list_splice_init_rcu(&fs_devices->devices, &seed_devices->devices,
22821f78160cSXiao Guangrong 			      synchronize_rcu);
22832196d6e8SMiao Xie 	list_for_each_entry(device, &seed_devices->devices, dev_list)
2284e4404d6eSYan Zheng 		device->fs_devices = seed_devices;
22852196d6e8SMiao Xie 
228634441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
22872196d6e8SMiao Xie 	list_splice_init(&fs_devices->alloc_list, &seed_devices->alloc_list);
228834441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
2289e4404d6eSYan Zheng 
22902b82032cSYan Zheng 	fs_devices->seeding = 0;
22912b82032cSYan Zheng 	fs_devices->num_devices = 0;
22922b82032cSYan Zheng 	fs_devices->open_devices = 0;
229369611ac8SMiao Xie 	fs_devices->missing_devices = 0;
229469611ac8SMiao Xie 	fs_devices->rotating = 0;
2295e4404d6eSYan Zheng 	fs_devices->seed = seed_devices;
22962b82032cSYan Zheng 
22972b82032cSYan Zheng 	generate_random_uuid(fs_devices->fsid);
22980b246afaSJeff Mahoney 	memcpy(fs_info->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
22992b82032cSYan Zheng 	memcpy(disk_super->fsid, fs_devices->fsid, BTRFS_FSID_SIZE);
23000b246afaSJeff Mahoney 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2301f7171750SFilipe David Borba Manana 
23022b82032cSYan Zheng 	super_flags = btrfs_super_flags(disk_super) &
23032b82032cSYan Zheng 		      ~BTRFS_SUPER_FLAG_SEEDING;
23042b82032cSYan Zheng 	btrfs_set_super_flags(disk_super, super_flags);
23052b82032cSYan Zheng 
23062b82032cSYan Zheng 	return 0;
23072b82032cSYan Zheng }
23082b82032cSYan Zheng 
23092b82032cSYan Zheng /*
231001327610SNicholas D Steeves  * Store the expected generation for seed devices in device items.
23112b82032cSYan Zheng  */
23122b82032cSYan Zheng static int btrfs_finish_sprout(struct btrfs_trans_handle *trans,
23135b4aacefSJeff Mahoney 			       struct btrfs_fs_info *fs_info)
23142b82032cSYan Zheng {
23155b4aacefSJeff Mahoney 	struct btrfs_root *root = fs_info->chunk_root;
23162b82032cSYan Zheng 	struct btrfs_path *path;
23172b82032cSYan Zheng 	struct extent_buffer *leaf;
23182b82032cSYan Zheng 	struct btrfs_dev_item *dev_item;
23192b82032cSYan Zheng 	struct btrfs_device *device;
23202b82032cSYan Zheng 	struct btrfs_key key;
232144880fdcSAnand Jain 	u8 fs_uuid[BTRFS_FSID_SIZE];
23222b82032cSYan Zheng 	u8 dev_uuid[BTRFS_UUID_SIZE];
23232b82032cSYan Zheng 	u64 devid;
23242b82032cSYan Zheng 	int ret;
23252b82032cSYan Zheng 
23262b82032cSYan Zheng 	path = btrfs_alloc_path();
23272b82032cSYan Zheng 	if (!path)
23282b82032cSYan Zheng 		return -ENOMEM;
23292b82032cSYan Zheng 
23302b82032cSYan Zheng 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
23312b82032cSYan Zheng 	key.offset = 0;
23322b82032cSYan Zheng 	key.type = BTRFS_DEV_ITEM_KEY;
23332b82032cSYan Zheng 
23342b82032cSYan Zheng 	while (1) {
23352b82032cSYan Zheng 		ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
23362b82032cSYan Zheng 		if (ret < 0)
23372b82032cSYan Zheng 			goto error;
23382b82032cSYan Zheng 
23392b82032cSYan Zheng 		leaf = path->nodes[0];
23402b82032cSYan Zheng next_slot:
23412b82032cSYan Zheng 		if (path->slots[0] >= btrfs_header_nritems(leaf)) {
23422b82032cSYan Zheng 			ret = btrfs_next_leaf(root, path);
23432b82032cSYan Zheng 			if (ret > 0)
23442b82032cSYan Zheng 				break;
23452b82032cSYan Zheng 			if (ret < 0)
23462b82032cSYan Zheng 				goto error;
23472b82032cSYan Zheng 			leaf = path->nodes[0];
23482b82032cSYan Zheng 			btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
2349b3b4aa74SDavid Sterba 			btrfs_release_path(path);
23502b82032cSYan Zheng 			continue;
23512b82032cSYan Zheng 		}
23522b82032cSYan Zheng 
23532b82032cSYan Zheng 		btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
23542b82032cSYan Zheng 		if (key.objectid != BTRFS_DEV_ITEMS_OBJECTID ||
23552b82032cSYan Zheng 		    key.type != BTRFS_DEV_ITEM_KEY)
23562b82032cSYan Zheng 			break;
23572b82032cSYan Zheng 
23582b82032cSYan Zheng 		dev_item = btrfs_item_ptr(leaf, path->slots[0],
23592b82032cSYan Zheng 					  struct btrfs_dev_item);
23602b82032cSYan Zheng 		devid = btrfs_device_id(leaf, dev_item);
2361410ba3a2SGeert Uytterhoeven 		read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
23622b82032cSYan Zheng 				   BTRFS_UUID_SIZE);
23631473b24eSGeert Uytterhoeven 		read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
236444880fdcSAnand Jain 				   BTRFS_FSID_SIZE);
23650b246afaSJeff Mahoney 		device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
236679787eaaSJeff Mahoney 		BUG_ON(!device); /* Logic error */
23672b82032cSYan Zheng 
23682b82032cSYan Zheng 		if (device->fs_devices->seeding) {
23692b82032cSYan Zheng 			btrfs_set_device_generation(leaf, dev_item,
23702b82032cSYan Zheng 						    device->generation);
23712b82032cSYan Zheng 			btrfs_mark_buffer_dirty(leaf);
23722b82032cSYan Zheng 		}
23732b82032cSYan Zheng 
23742b82032cSYan Zheng 		path->slots[0]++;
23752b82032cSYan Zheng 		goto next_slot;
23762b82032cSYan Zheng 	}
23772b82032cSYan Zheng 	ret = 0;
23782b82032cSYan Zheng error:
23792b82032cSYan Zheng 	btrfs_free_path(path);
23802b82032cSYan Zheng 	return ret;
23812b82032cSYan Zheng }
23822b82032cSYan Zheng 
2383da353f6bSDavid Sterba int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
2384788f20ebSChris Mason {
23855112febbSJeff Mahoney 	struct btrfs_root *root = fs_info->dev_root;
2386d5e2003cSJosef Bacik 	struct request_queue *q;
2387788f20ebSChris Mason 	struct btrfs_trans_handle *trans;
2388788f20ebSChris Mason 	struct btrfs_device *device;
2389788f20ebSChris Mason 	struct block_device *bdev;
2390788f20ebSChris Mason 	struct list_head *devices;
23910b246afaSJeff Mahoney 	struct super_block *sb = fs_info->sb;
2392606686eeSJosef Bacik 	struct rcu_string *name;
23933c1dbdf5SAnand Jain 	u64 tmp;
23942b82032cSYan Zheng 	int seeding_dev = 0;
2395788f20ebSChris Mason 	int ret = 0;
23967132a262SAnand Jain 	bool unlocked = false;
2397788f20ebSChris Mason 
2398bc98a42cSDavid Howells 	if (sb_rdonly(sb) && !fs_info->fs_devices->seeding)
2399f8c5d0b4SLiu Bo 		return -EROFS;
2400788f20ebSChris Mason 
2401a5d16333SLi Zefan 	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
24020b246afaSJeff Mahoney 				  fs_info->bdev_holder);
24037f59203aSJosef Bacik 	if (IS_ERR(bdev))
24047f59203aSJosef Bacik 		return PTR_ERR(bdev);
2405a2135011SChris Mason 
24060b246afaSJeff Mahoney 	if (fs_info->fs_devices->seeding) {
24072b82032cSYan Zheng 		seeding_dev = 1;
24082b82032cSYan Zheng 		down_write(&sb->s_umount);
24092b82032cSYan Zheng 		mutex_lock(&uuid_mutex);
24102b82032cSYan Zheng 	}
24112b82032cSYan Zheng 
24128c8bee1dSChris Mason 	filemap_write_and_wait(bdev->bd_inode->i_mapping);
2413a2135011SChris Mason 
24140b246afaSJeff Mahoney 	devices = &fs_info->fs_devices->devices;
2415d25628bdSLiu Bo 
24160b246afaSJeff Mahoney 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
2417c6e30871SQinghuang Feng 	list_for_each_entry(device, devices, dev_list) {
2418788f20ebSChris Mason 		if (device->bdev == bdev) {
2419788f20ebSChris Mason 			ret = -EEXIST;
2420d25628bdSLiu Bo 			mutex_unlock(
24210b246afaSJeff Mahoney 				&fs_info->fs_devices->device_list_mutex);
24222b82032cSYan Zheng 			goto error;
2423788f20ebSChris Mason 		}
2424788f20ebSChris Mason 	}
24250b246afaSJeff Mahoney 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2426788f20ebSChris Mason 
24270b246afaSJeff Mahoney 	device = btrfs_alloc_device(fs_info, NULL, NULL);
242812bd2fc0SIlya Dryomov 	if (IS_ERR(device)) {
2429788f20ebSChris Mason 		/* we can safely leave the fs_devices entry around */
243012bd2fc0SIlya Dryomov 		ret = PTR_ERR(device);
24312b82032cSYan Zheng 		goto error;
2432788f20ebSChris Mason 	}
2433788f20ebSChris Mason 
243478f2c9e6SDavid Sterba 	name = rcu_string_strdup(device_path, GFP_KERNEL);
2435606686eeSJosef Bacik 	if (!name) {
24362b82032cSYan Zheng 		ret = -ENOMEM;
24375c4cf6c9SDavid Sterba 		goto error_free_device;
2438788f20ebSChris Mason 	}
2439606686eeSJosef Bacik 	rcu_assign_pointer(device->name, name);
24402b82032cSYan Zheng 
2441a22285a6SYan, Zheng 	trans = btrfs_start_transaction(root, 0);
244298d5dc13STsutomu Itoh 	if (IS_ERR(trans)) {
244398d5dc13STsutomu Itoh 		ret = PTR_ERR(trans);
24445c4cf6c9SDavid Sterba 		goto error_free_device;
244598d5dc13STsutomu Itoh 	}
244698d5dc13STsutomu Itoh 
2447d5e2003cSJosef Bacik 	q = bdev_get_queue(bdev);
2448ebbede42SAnand Jain 	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
24492b82032cSYan Zheng 	device->generation = trans->transid;
24500b246afaSJeff Mahoney 	device->io_width = fs_info->sectorsize;
24510b246afaSJeff Mahoney 	device->io_align = fs_info->sectorsize;
24520b246afaSJeff Mahoney 	device->sector_size = fs_info->sectorsize;
24537dfb8be1SNikolay Borisov 	device->total_bytes = round_down(i_size_read(bdev->bd_inode),
24547dfb8be1SNikolay Borisov 					 fs_info->sectorsize);
24552cc3c559SYan Zheng 	device->disk_total_bytes = device->total_bytes;
2456935e5cc9SMiao Xie 	device->commit_total_bytes = device->total_bytes;
2457fb456252SJeff Mahoney 	device->fs_info = fs_info;
2458788f20ebSChris Mason 	device->bdev = bdev;
2459e12c9621SAnand Jain 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2460401e29c1SAnand Jain 	clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2461fb01aa85SIlya Dryomov 	device->mode = FMODE_EXCL;
246227087f37SStefan Behrens 	device->dev_stats_valid = 1;
24639f6d2510SDavid Sterba 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2464325cd4baSZheng Yan 
24652b82032cSYan Zheng 	if (seeding_dev) {
24661751e8a6SLinus Torvalds 		sb->s_flags &= ~SB_RDONLY;
24672ff7e61eSJeff Mahoney 		ret = btrfs_prepare_sprout(fs_info);
2468d31c32f6SAnand Jain 		if (ret) {
2469d31c32f6SAnand Jain 			btrfs_abort_transaction(trans, ret);
2470d31c32f6SAnand Jain 			goto error_trans;
2471d31c32f6SAnand Jain 		}
24722b82032cSYan Zheng 	}
24732b82032cSYan Zheng 
24740b246afaSJeff Mahoney 	device->fs_devices = fs_info->fs_devices;
2475e5e9a520SChris Mason 
24760b246afaSJeff Mahoney 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
247734441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
24780b246afaSJeff Mahoney 	list_add_rcu(&device->dev_list, &fs_info->fs_devices->devices);
24792b82032cSYan Zheng 	list_add(&device->dev_alloc_list,
24800b246afaSJeff Mahoney 		 &fs_info->fs_devices->alloc_list);
24810b246afaSJeff Mahoney 	fs_info->fs_devices->num_devices++;
24820b246afaSJeff Mahoney 	fs_info->fs_devices->open_devices++;
24830b246afaSJeff Mahoney 	fs_info->fs_devices->rw_devices++;
24840b246afaSJeff Mahoney 	fs_info->fs_devices->total_devices++;
24850b246afaSJeff Mahoney 	fs_info->fs_devices->total_rw_bytes += device->total_bytes;
24862b82032cSYan Zheng 
2487a5ed45f8SNikolay Borisov 	atomic64_add(device->total_bytes, &fs_info->free_chunk_space);
24882bf64758SJosef Bacik 
2489e884f4f0SAnand Jain 	if (!blk_queue_nonrot(q))
24900b246afaSJeff Mahoney 		fs_info->fs_devices->rotating = 1;
2491c289811cSChris Mason 
24920b246afaSJeff Mahoney 	tmp = btrfs_super_total_bytes(fs_info->super_copy);
24930b246afaSJeff Mahoney 	btrfs_set_super_total_bytes(fs_info->super_copy,
24947dfb8be1SNikolay Borisov 		round_down(tmp + device->total_bytes, fs_info->sectorsize));
2495788f20ebSChris Mason 
24960b246afaSJeff Mahoney 	tmp = btrfs_super_num_devices(fs_info->super_copy);
24970b246afaSJeff Mahoney 	btrfs_set_super_num_devices(fs_info->super_copy, tmp + 1);
24980d39376aSAnand Jain 
24990d39376aSAnand Jain 	/* add sysfs device entry */
25000b246afaSJeff Mahoney 	btrfs_sysfs_add_device_link(fs_info->fs_devices, device);
25010d39376aSAnand Jain 
25022196d6e8SMiao Xie 	/*
25032196d6e8SMiao Xie 	 * we've got more storage, clear any full flags on the space
25042196d6e8SMiao Xie 	 * infos
25052196d6e8SMiao Xie 	 */
25060b246afaSJeff Mahoney 	btrfs_clear_space_info_full(fs_info);
25072196d6e8SMiao Xie 
250834441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
25090b246afaSJeff Mahoney 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2510788f20ebSChris Mason 
25112b82032cSYan Zheng 	if (seeding_dev) {
251234441361SDavid Sterba 		mutex_lock(&fs_info->chunk_mutex);
2513e4a4dce7SDavid Sterba 		ret = init_first_rw_device(trans, fs_info);
251434441361SDavid Sterba 		mutex_unlock(&fs_info->chunk_mutex);
2515005d6427SDavid Sterba 		if (ret) {
251666642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
2517d31c32f6SAnand Jain 			goto error_sysfs;
2518005d6427SDavid Sterba 		}
25192196d6e8SMiao Xie 	}
25202196d6e8SMiao Xie 
2521c74a0b02SAnand Jain 	ret = btrfs_add_dev_item(trans, fs_info, device);
25222196d6e8SMiao Xie 	if (ret) {
252366642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
2524d31c32f6SAnand Jain 		goto error_sysfs;
25252196d6e8SMiao Xie 	}
25262196d6e8SMiao Xie 
25272196d6e8SMiao Xie 	if (seeding_dev) {
25282196d6e8SMiao Xie 		char fsid_buf[BTRFS_UUID_UNPARSED_SIZE];
25292196d6e8SMiao Xie 
25300b246afaSJeff Mahoney 		ret = btrfs_finish_sprout(trans, fs_info);
2531005d6427SDavid Sterba 		if (ret) {
253266642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
2533d31c32f6SAnand Jain 			goto error_sysfs;
2534005d6427SDavid Sterba 		}
2535b2373f25SAnand Jain 
2536b2373f25SAnand Jain 		/* Sprouting would change fsid of the mounted root,
2537b2373f25SAnand Jain 		 * so rename the fsid on the sysfs
2538b2373f25SAnand Jain 		 */
2539b2373f25SAnand Jain 		snprintf(fsid_buf, BTRFS_UUID_UNPARSED_SIZE, "%pU",
25400b246afaSJeff Mahoney 						fs_info->fsid);
25410b246afaSJeff Mahoney 		if (kobject_rename(&fs_info->fs_devices->fsid_kobj, fsid_buf))
25420b246afaSJeff Mahoney 			btrfs_warn(fs_info,
2543f14d104dSDavid Sterba 				   "sysfs: failed to create fsid for sprout");
2544005d6427SDavid Sterba 	}
25452b82032cSYan Zheng 
25463a45bb20SJeff Mahoney 	ret = btrfs_commit_transaction(trans);
25472b82032cSYan Zheng 
25482b82032cSYan Zheng 	if (seeding_dev) {
25492b82032cSYan Zheng 		mutex_unlock(&uuid_mutex);
25502b82032cSYan Zheng 		up_write(&sb->s_umount);
25517132a262SAnand Jain 		unlocked = true;
25522b82032cSYan Zheng 
255379787eaaSJeff Mahoney 		if (ret) /* transaction commit */
255479787eaaSJeff Mahoney 			return ret;
255579787eaaSJeff Mahoney 
25562ff7e61eSJeff Mahoney 		ret = btrfs_relocate_sys_chunks(fs_info);
255779787eaaSJeff Mahoney 		if (ret < 0)
25580b246afaSJeff Mahoney 			btrfs_handle_fs_error(fs_info, ret,
25595d163e0eSJeff Mahoney 				    "Failed to relocate sys chunks after device initialization. This can be fixed using the \"btrfs balance\" command.");
2560671415b7SMiao Xie 		trans = btrfs_attach_transaction(root);
2561671415b7SMiao Xie 		if (IS_ERR(trans)) {
2562671415b7SMiao Xie 			if (PTR_ERR(trans) == -ENOENT)
2563671415b7SMiao Xie 				return 0;
25647132a262SAnand Jain 			ret = PTR_ERR(trans);
25657132a262SAnand Jain 			trans = NULL;
25667132a262SAnand Jain 			goto error_sysfs;
2567671415b7SMiao Xie 		}
25683a45bb20SJeff Mahoney 		ret = btrfs_commit_transaction(trans);
25692b82032cSYan Zheng 	}
2570c9e9f97bSIlya Dryomov 
25715a1972bdSQu Wenruo 	/* Update ctime/mtime for libblkid */
25725a1972bdSQu Wenruo 	update_dev_time(device_path);
2573788f20ebSChris Mason 	return ret;
257479787eaaSJeff Mahoney 
2575d31c32f6SAnand Jain error_sysfs:
2576d31c32f6SAnand Jain 	btrfs_sysfs_rm_device_link(fs_info->fs_devices, device);
257779787eaaSJeff Mahoney error_trans:
25780af2c4bfSAnand Jain 	if (seeding_dev)
25791751e8a6SLinus Torvalds 		sb->s_flags |= SB_RDONLY;
25807132a262SAnand Jain 	if (trans)
25813a45bb20SJeff Mahoney 		btrfs_end_transaction(trans);
25825c4cf6c9SDavid Sterba error_free_device:
258355de4803SDavid Sterba 	free_device(device);
25842b82032cSYan Zheng error:
2585e525fd89STejun Heo 	blkdev_put(bdev, FMODE_EXCL);
25867132a262SAnand Jain 	if (seeding_dev && !unlocked) {
25872b82032cSYan Zheng 		mutex_unlock(&uuid_mutex);
25882b82032cSYan Zheng 		up_write(&sb->s_umount);
25892b82032cSYan Zheng 	}
2590c9e9f97bSIlya Dryomov 	return ret;
2591788f20ebSChris Mason }
2592788f20ebSChris Mason 
25932ff7e61eSJeff Mahoney int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
2594da353f6bSDavid Sterba 				  const char *device_path,
25951c43366dSMiao Xie 				  struct btrfs_device *srcdev,
2596e93c89c1SStefan Behrens 				  struct btrfs_device **device_out)
2597e93c89c1SStefan Behrens {
2598e93c89c1SStefan Behrens 	struct btrfs_device *device;
2599e93c89c1SStefan Behrens 	struct block_device *bdev;
2600e93c89c1SStefan Behrens 	struct list_head *devices;
2601e93c89c1SStefan Behrens 	struct rcu_string *name;
260212bd2fc0SIlya Dryomov 	u64 devid = BTRFS_DEV_REPLACE_DEVID;
2603e93c89c1SStefan Behrens 	int ret = 0;
2604e93c89c1SStefan Behrens 
2605e93c89c1SStefan Behrens 	*device_out = NULL;
26061c43366dSMiao Xie 	if (fs_info->fs_devices->seeding) {
26071c43366dSMiao Xie 		btrfs_err(fs_info, "the filesystem is a seed filesystem!");
2608e93c89c1SStefan Behrens 		return -EINVAL;
26091c43366dSMiao Xie 	}
2610e93c89c1SStefan Behrens 
2611e93c89c1SStefan Behrens 	bdev = blkdev_get_by_path(device_path, FMODE_WRITE | FMODE_EXCL,
2612e93c89c1SStefan Behrens 				  fs_info->bdev_holder);
26131c43366dSMiao Xie 	if (IS_ERR(bdev)) {
26141c43366dSMiao Xie 		btrfs_err(fs_info, "target device %s is invalid!", device_path);
2615e93c89c1SStefan Behrens 		return PTR_ERR(bdev);
26161c43366dSMiao Xie 	}
2617e93c89c1SStefan Behrens 
2618e93c89c1SStefan Behrens 	filemap_write_and_wait(bdev->bd_inode->i_mapping);
2619e93c89c1SStefan Behrens 
2620e93c89c1SStefan Behrens 	devices = &fs_info->fs_devices->devices;
2621e93c89c1SStefan Behrens 	list_for_each_entry(device, devices, dev_list) {
2622e93c89c1SStefan Behrens 		if (device->bdev == bdev) {
26235d163e0eSJeff Mahoney 			btrfs_err(fs_info,
26245d163e0eSJeff Mahoney 				  "target device is in the filesystem!");
2625e93c89c1SStefan Behrens 			ret = -EEXIST;
2626e93c89c1SStefan Behrens 			goto error;
2627e93c89c1SStefan Behrens 		}
2628e93c89c1SStefan Behrens 	}
2629e93c89c1SStefan Behrens 
26301c43366dSMiao Xie 
26317cc8e58dSMiao Xie 	if (i_size_read(bdev->bd_inode) <
26327cc8e58dSMiao Xie 	    btrfs_device_get_total_bytes(srcdev)) {
26335d163e0eSJeff Mahoney 		btrfs_err(fs_info,
26345d163e0eSJeff Mahoney 			  "target device is smaller than source device!");
26351c43366dSMiao Xie 		ret = -EINVAL;
26361c43366dSMiao Xie 		goto error;
26371c43366dSMiao Xie 	}
26381c43366dSMiao Xie 
26391c43366dSMiao Xie 
264012bd2fc0SIlya Dryomov 	device = btrfs_alloc_device(NULL, &devid, NULL);
264112bd2fc0SIlya Dryomov 	if (IS_ERR(device)) {
264212bd2fc0SIlya Dryomov 		ret = PTR_ERR(device);
2643e93c89c1SStefan Behrens 		goto error;
2644e93c89c1SStefan Behrens 	}
2645e93c89c1SStefan Behrens 
26466165572cSDavid Sterba 	name = rcu_string_strdup(device_path, GFP_KERNEL);
2647e93c89c1SStefan Behrens 	if (!name) {
264855de4803SDavid Sterba 		free_device(device);
2649e93c89c1SStefan Behrens 		ret = -ENOMEM;
2650e93c89c1SStefan Behrens 		goto error;
2651e93c89c1SStefan Behrens 	}
2652e93c89c1SStefan Behrens 	rcu_assign_pointer(device->name, name);
2653e93c89c1SStefan Behrens 
26540b246afaSJeff Mahoney 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
2655ebbede42SAnand Jain 	set_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
2656e93c89c1SStefan Behrens 	device->generation = 0;
26570b246afaSJeff Mahoney 	device->io_width = fs_info->sectorsize;
26580b246afaSJeff Mahoney 	device->io_align = fs_info->sectorsize;
26590b246afaSJeff Mahoney 	device->sector_size = fs_info->sectorsize;
26607cc8e58dSMiao Xie 	device->total_bytes = btrfs_device_get_total_bytes(srcdev);
26617cc8e58dSMiao Xie 	device->disk_total_bytes = btrfs_device_get_disk_total_bytes(srcdev);
26627cc8e58dSMiao Xie 	device->bytes_used = btrfs_device_get_bytes_used(srcdev);
2663935e5cc9SMiao Xie 	ASSERT(list_empty(&srcdev->resized_list));
2664935e5cc9SMiao Xie 	device->commit_total_bytes = srcdev->commit_total_bytes;
2665ce7213c7SMiao Xie 	device->commit_bytes_used = device->bytes_used;
2666fb456252SJeff Mahoney 	device->fs_info = fs_info;
2667e93c89c1SStefan Behrens 	device->bdev = bdev;
2668e12c9621SAnand Jain 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
2669401e29c1SAnand Jain 	set_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
2670e93c89c1SStefan Behrens 	device->mode = FMODE_EXCL;
267127087f37SStefan Behrens 	device->dev_stats_valid = 1;
26729f6d2510SDavid Sterba 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
2673e93c89c1SStefan Behrens 	device->fs_devices = fs_info->fs_devices;
2674e93c89c1SStefan Behrens 	list_add(&device->dev_list, &fs_info->fs_devices->devices);
2675e93c89c1SStefan Behrens 	fs_info->fs_devices->num_devices++;
2676e93c89c1SStefan Behrens 	fs_info->fs_devices->open_devices++;
26770b246afaSJeff Mahoney 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
2678e93c89c1SStefan Behrens 
2679e93c89c1SStefan Behrens 	*device_out = device;
2680e93c89c1SStefan Behrens 	return ret;
2681e93c89c1SStefan Behrens 
2682e93c89c1SStefan Behrens error:
2683e93c89c1SStefan Behrens 	blkdev_put(bdev, FMODE_EXCL);
2684e93c89c1SStefan Behrens 	return ret;
2685e93c89c1SStefan Behrens }
2686e93c89c1SStefan Behrens 
2687e93c89c1SStefan Behrens void btrfs_init_dev_replace_tgtdev_for_resume(struct btrfs_fs_info *fs_info,
2688e93c89c1SStefan Behrens 					      struct btrfs_device *tgtdev)
2689e93c89c1SStefan Behrens {
2690da17066cSJeff Mahoney 	u32 sectorsize = fs_info->sectorsize;
2691da17066cSJeff Mahoney 
2692e93c89c1SStefan Behrens 	WARN_ON(fs_info->fs_devices->rw_devices == 0);
2693da17066cSJeff Mahoney 	tgtdev->io_width = sectorsize;
2694da17066cSJeff Mahoney 	tgtdev->io_align = sectorsize;
2695da17066cSJeff Mahoney 	tgtdev->sector_size = sectorsize;
2696fb456252SJeff Mahoney 	tgtdev->fs_info = fs_info;
2697e12c9621SAnand Jain 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &tgtdev->dev_state);
2698e93c89c1SStefan Behrens }
2699e93c89c1SStefan Behrens 
2700d397712bSChris Mason static noinline int btrfs_update_device(struct btrfs_trans_handle *trans,
27010b86a832SChris Mason 					struct btrfs_device *device)
27020b86a832SChris Mason {
27030b86a832SChris Mason 	int ret;
27040b86a832SChris Mason 	struct btrfs_path *path;
27050b246afaSJeff Mahoney 	struct btrfs_root *root = device->fs_info->chunk_root;
27060b86a832SChris Mason 	struct btrfs_dev_item *dev_item;
27070b86a832SChris Mason 	struct extent_buffer *leaf;
27080b86a832SChris Mason 	struct btrfs_key key;
27090b86a832SChris Mason 
27100b86a832SChris Mason 	path = btrfs_alloc_path();
27110b86a832SChris Mason 	if (!path)
27120b86a832SChris Mason 		return -ENOMEM;
27130b86a832SChris Mason 
27140b86a832SChris Mason 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
27150b86a832SChris Mason 	key.type = BTRFS_DEV_ITEM_KEY;
27160b86a832SChris Mason 	key.offset = device->devid;
27170b86a832SChris Mason 
27180b86a832SChris Mason 	ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
27190b86a832SChris Mason 	if (ret < 0)
27200b86a832SChris Mason 		goto out;
27210b86a832SChris Mason 
27220b86a832SChris Mason 	if (ret > 0) {
27230b86a832SChris Mason 		ret = -ENOENT;
27240b86a832SChris Mason 		goto out;
27250b86a832SChris Mason 	}
27260b86a832SChris Mason 
27270b86a832SChris Mason 	leaf = path->nodes[0];
27280b86a832SChris Mason 	dev_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_item);
27290b86a832SChris Mason 
27300b86a832SChris Mason 	btrfs_set_device_id(leaf, dev_item, device->devid);
27310b86a832SChris Mason 	btrfs_set_device_type(leaf, dev_item, device->type);
27320b86a832SChris Mason 	btrfs_set_device_io_align(leaf, dev_item, device->io_align);
27330b86a832SChris Mason 	btrfs_set_device_io_width(leaf, dev_item, device->io_width);
27340b86a832SChris Mason 	btrfs_set_device_sector_size(leaf, dev_item, device->sector_size);
27357cc8e58dSMiao Xie 	btrfs_set_device_total_bytes(leaf, dev_item,
27367cc8e58dSMiao Xie 				     btrfs_device_get_disk_total_bytes(device));
27377cc8e58dSMiao Xie 	btrfs_set_device_bytes_used(leaf, dev_item,
27387cc8e58dSMiao Xie 				    btrfs_device_get_bytes_used(device));
27390b86a832SChris Mason 	btrfs_mark_buffer_dirty(leaf);
27400b86a832SChris Mason 
27410b86a832SChris Mason out:
27420b86a832SChris Mason 	btrfs_free_path(path);
27430b86a832SChris Mason 	return ret;
27440b86a832SChris Mason }
27450b86a832SChris Mason 
27462196d6e8SMiao Xie int btrfs_grow_device(struct btrfs_trans_handle *trans,
27478f18cf13SChris Mason 		      struct btrfs_device *device, u64 new_size)
27488f18cf13SChris Mason {
27490b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
27500b246afaSJeff Mahoney 	struct btrfs_super_block *super_copy = fs_info->super_copy;
2751935e5cc9SMiao Xie 	struct btrfs_fs_devices *fs_devices;
27522196d6e8SMiao Xie 	u64 old_total;
27532196d6e8SMiao Xie 	u64 diff;
27548f18cf13SChris Mason 
2755ebbede42SAnand Jain 	if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
27562b82032cSYan Zheng 		return -EACCES;
27572196d6e8SMiao Xie 
27587dfb8be1SNikolay Borisov 	new_size = round_down(new_size, fs_info->sectorsize);
27597dfb8be1SNikolay Borisov 
276034441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
27612196d6e8SMiao Xie 	old_total = btrfs_super_total_bytes(super_copy);
27620e4324a4SNikolay Borisov 	diff = round_down(new_size - device->total_bytes, fs_info->sectorsize);
27632196d6e8SMiao Xie 
276463a212abSStefan Behrens 	if (new_size <= device->total_bytes ||
2765401e29c1SAnand Jain 	    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
276634441361SDavid Sterba 		mutex_unlock(&fs_info->chunk_mutex);
27672b82032cSYan Zheng 		return -EINVAL;
27682196d6e8SMiao Xie 	}
27692b82032cSYan Zheng 
27700b246afaSJeff Mahoney 	fs_devices = fs_info->fs_devices;
27718f18cf13SChris Mason 
27727dfb8be1SNikolay Borisov 	btrfs_set_super_total_bytes(super_copy,
27737dfb8be1SNikolay Borisov 			round_down(old_total + diff, fs_info->sectorsize));
27742b82032cSYan Zheng 	device->fs_devices->total_rw_bytes += diff;
27752b82032cSYan Zheng 
27767cc8e58dSMiao Xie 	btrfs_device_set_total_bytes(device, new_size);
27777cc8e58dSMiao Xie 	btrfs_device_set_disk_total_bytes(device, new_size);
2778fb456252SJeff Mahoney 	btrfs_clear_space_info_full(device->fs_info);
2779935e5cc9SMiao Xie 	if (list_empty(&device->resized_list))
2780935e5cc9SMiao Xie 		list_add_tail(&device->resized_list,
2781935e5cc9SMiao Xie 			      &fs_devices->resized_devices);
278234441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
27834184ea7fSChris Mason 
27848f18cf13SChris Mason 	return btrfs_update_device(trans, device);
27858f18cf13SChris Mason }
27868f18cf13SChris Mason 
27878f18cf13SChris Mason static int btrfs_free_chunk(struct btrfs_trans_handle *trans,
2788408fbf19SNikolay Borisov 			    struct btrfs_fs_info *fs_info, u64 chunk_offset)
27898f18cf13SChris Mason {
27905b4aacefSJeff Mahoney 	struct btrfs_root *root = fs_info->chunk_root;
27918f18cf13SChris Mason 	int ret;
27928f18cf13SChris Mason 	struct btrfs_path *path;
27938f18cf13SChris Mason 	struct btrfs_key key;
27948f18cf13SChris Mason 
27958f18cf13SChris Mason 	path = btrfs_alloc_path();
27968f18cf13SChris Mason 	if (!path)
27978f18cf13SChris Mason 		return -ENOMEM;
27988f18cf13SChris Mason 
2799408fbf19SNikolay Borisov 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
28008f18cf13SChris Mason 	key.offset = chunk_offset;
28018f18cf13SChris Mason 	key.type = BTRFS_CHUNK_ITEM_KEY;
28028f18cf13SChris Mason 
28038f18cf13SChris Mason 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
280479787eaaSJeff Mahoney 	if (ret < 0)
280579787eaaSJeff Mahoney 		goto out;
280679787eaaSJeff Mahoney 	else if (ret > 0) { /* Logic error or corruption */
28070b246afaSJeff Mahoney 		btrfs_handle_fs_error(fs_info, -ENOENT,
280879787eaaSJeff Mahoney 				      "Failed lookup while freeing chunk.");
280979787eaaSJeff Mahoney 		ret = -ENOENT;
281079787eaaSJeff Mahoney 		goto out;
281179787eaaSJeff Mahoney 	}
28128f18cf13SChris Mason 
28138f18cf13SChris Mason 	ret = btrfs_del_item(trans, root, path);
281479787eaaSJeff Mahoney 	if (ret < 0)
28150b246afaSJeff Mahoney 		btrfs_handle_fs_error(fs_info, ret,
281679787eaaSJeff Mahoney 				      "Failed to delete chunk item.");
281779787eaaSJeff Mahoney out:
28188f18cf13SChris Mason 	btrfs_free_path(path);
281965a246c5STsutomu Itoh 	return ret;
28208f18cf13SChris Mason }
28218f18cf13SChris Mason 
2822408fbf19SNikolay Borisov static int btrfs_del_sys_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
28238f18cf13SChris Mason {
28240b246afaSJeff Mahoney 	struct btrfs_super_block *super_copy = fs_info->super_copy;
28258f18cf13SChris Mason 	struct btrfs_disk_key *disk_key;
28268f18cf13SChris Mason 	struct btrfs_chunk *chunk;
28278f18cf13SChris Mason 	u8 *ptr;
28288f18cf13SChris Mason 	int ret = 0;
28298f18cf13SChris Mason 	u32 num_stripes;
28308f18cf13SChris Mason 	u32 array_size;
28318f18cf13SChris Mason 	u32 len = 0;
28328f18cf13SChris Mason 	u32 cur;
28338f18cf13SChris Mason 	struct btrfs_key key;
28348f18cf13SChris Mason 
283534441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
28368f18cf13SChris Mason 	array_size = btrfs_super_sys_array_size(super_copy);
28378f18cf13SChris Mason 
28388f18cf13SChris Mason 	ptr = super_copy->sys_chunk_array;
28398f18cf13SChris Mason 	cur = 0;
28408f18cf13SChris Mason 
28418f18cf13SChris Mason 	while (cur < array_size) {
28428f18cf13SChris Mason 		disk_key = (struct btrfs_disk_key *)ptr;
28438f18cf13SChris Mason 		btrfs_disk_key_to_cpu(&key, disk_key);
28448f18cf13SChris Mason 
28458f18cf13SChris Mason 		len = sizeof(*disk_key);
28468f18cf13SChris Mason 
28478f18cf13SChris Mason 		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
28488f18cf13SChris Mason 			chunk = (struct btrfs_chunk *)(ptr + len);
28498f18cf13SChris Mason 			num_stripes = btrfs_stack_chunk_num_stripes(chunk);
28508f18cf13SChris Mason 			len += btrfs_chunk_item_size(num_stripes);
28518f18cf13SChris Mason 		} else {
28528f18cf13SChris Mason 			ret = -EIO;
28538f18cf13SChris Mason 			break;
28548f18cf13SChris Mason 		}
2855408fbf19SNikolay Borisov 		if (key.objectid == BTRFS_FIRST_CHUNK_TREE_OBJECTID &&
28568f18cf13SChris Mason 		    key.offset == chunk_offset) {
28578f18cf13SChris Mason 			memmove(ptr, ptr + len, array_size - (cur + len));
28588f18cf13SChris Mason 			array_size -= len;
28598f18cf13SChris Mason 			btrfs_set_super_sys_array_size(super_copy, array_size);
28608f18cf13SChris Mason 		} else {
28618f18cf13SChris Mason 			ptr += len;
28628f18cf13SChris Mason 			cur += len;
28638f18cf13SChris Mason 		}
28648f18cf13SChris Mason 	}
286534441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
28668f18cf13SChris Mason 	return ret;
28678f18cf13SChris Mason }
28688f18cf13SChris Mason 
2869592d92eeSLiu Bo static struct extent_map *get_chunk_map(struct btrfs_fs_info *fs_info,
2870592d92eeSLiu Bo 					u64 logical, u64 length)
2871592d92eeSLiu Bo {
2872592d92eeSLiu Bo 	struct extent_map_tree *em_tree;
2873592d92eeSLiu Bo 	struct extent_map *em;
2874592d92eeSLiu Bo 
2875592d92eeSLiu Bo 	em_tree = &fs_info->mapping_tree.map_tree;
2876592d92eeSLiu Bo 	read_lock(&em_tree->lock);
2877592d92eeSLiu Bo 	em = lookup_extent_mapping(em_tree, logical, length);
2878592d92eeSLiu Bo 	read_unlock(&em_tree->lock);
2879592d92eeSLiu Bo 
2880592d92eeSLiu Bo 	if (!em) {
2881592d92eeSLiu Bo 		btrfs_crit(fs_info, "unable to find logical %llu length %llu",
2882592d92eeSLiu Bo 			   logical, length);
2883592d92eeSLiu Bo 		return ERR_PTR(-EINVAL);
2884592d92eeSLiu Bo 	}
2885592d92eeSLiu Bo 
2886592d92eeSLiu Bo 	if (em->start > logical || em->start + em->len < logical) {
2887592d92eeSLiu Bo 		btrfs_crit(fs_info,
2888592d92eeSLiu Bo 			   "found a bad mapping, wanted %llu-%llu, found %llu-%llu",
2889592d92eeSLiu Bo 			   logical, length, em->start, em->start + em->len);
2890592d92eeSLiu Bo 		free_extent_map(em);
2891592d92eeSLiu Bo 		return ERR_PTR(-EINVAL);
2892592d92eeSLiu Bo 	}
2893592d92eeSLiu Bo 
2894592d92eeSLiu Bo 	/* callers are responsible for dropping em's ref. */
2895592d92eeSLiu Bo 	return em;
2896592d92eeSLiu Bo }
2897592d92eeSLiu Bo 
289847ab2a6cSJosef Bacik int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
28995b4aacefSJeff Mahoney 		       struct btrfs_fs_info *fs_info, u64 chunk_offset)
290047ab2a6cSJosef Bacik {
290147ab2a6cSJosef Bacik 	struct extent_map *em;
290247ab2a6cSJosef Bacik 	struct map_lookup *map;
290347ab2a6cSJosef Bacik 	u64 dev_extent_len = 0;
290447ab2a6cSJosef Bacik 	int i, ret = 0;
29050b246afaSJeff Mahoney 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
290647ab2a6cSJosef Bacik 
2907592d92eeSLiu Bo 	em = get_chunk_map(fs_info, chunk_offset, 1);
2908592d92eeSLiu Bo 	if (IS_ERR(em)) {
290947ab2a6cSJosef Bacik 		/*
291047ab2a6cSJosef Bacik 		 * This is a logic error, but we don't want to just rely on the
2911bb7ab3b9SAdam Buchbinder 		 * user having built with ASSERT enabled, so if ASSERT doesn't
291247ab2a6cSJosef Bacik 		 * do anything we still error out.
291347ab2a6cSJosef Bacik 		 */
291447ab2a6cSJosef Bacik 		ASSERT(0);
2915592d92eeSLiu Bo 		return PTR_ERR(em);
291647ab2a6cSJosef Bacik 	}
291795617d69SJeff Mahoney 	map = em->map_lookup;
291834441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
29192ff7e61eSJeff Mahoney 	check_system_chunk(trans, fs_info, map->type);
292034441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
292147ab2a6cSJosef Bacik 
292257ba4cb8SFilipe Manana 	/*
292357ba4cb8SFilipe Manana 	 * Take the device list mutex to prevent races with the final phase of
292457ba4cb8SFilipe Manana 	 * a device replace operation that replaces the device object associated
292557ba4cb8SFilipe Manana 	 * with map stripes (dev-replace.c:btrfs_dev_replace_finishing()).
292657ba4cb8SFilipe Manana 	 */
292757ba4cb8SFilipe Manana 	mutex_lock(&fs_devices->device_list_mutex);
292847ab2a6cSJosef Bacik 	for (i = 0; i < map->num_stripes; i++) {
292947ab2a6cSJosef Bacik 		struct btrfs_device *device = map->stripes[i].dev;
293047ab2a6cSJosef Bacik 		ret = btrfs_free_dev_extent(trans, device,
293147ab2a6cSJosef Bacik 					    map->stripes[i].physical,
293247ab2a6cSJosef Bacik 					    &dev_extent_len);
293347ab2a6cSJosef Bacik 		if (ret) {
293457ba4cb8SFilipe Manana 			mutex_unlock(&fs_devices->device_list_mutex);
293566642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
293647ab2a6cSJosef Bacik 			goto out;
293747ab2a6cSJosef Bacik 		}
293847ab2a6cSJosef Bacik 
293947ab2a6cSJosef Bacik 		if (device->bytes_used > 0) {
294034441361SDavid Sterba 			mutex_lock(&fs_info->chunk_mutex);
294147ab2a6cSJosef Bacik 			btrfs_device_set_bytes_used(device,
294247ab2a6cSJosef Bacik 					device->bytes_used - dev_extent_len);
2943a5ed45f8SNikolay Borisov 			atomic64_add(dev_extent_len, &fs_info->free_chunk_space);
29440b246afaSJeff Mahoney 			btrfs_clear_space_info_full(fs_info);
294534441361SDavid Sterba 			mutex_unlock(&fs_info->chunk_mutex);
294647ab2a6cSJosef Bacik 		}
294747ab2a6cSJosef Bacik 
294847ab2a6cSJosef Bacik 		if (map->stripes[i].dev) {
294947ab2a6cSJosef Bacik 			ret = btrfs_update_device(trans, map->stripes[i].dev);
295047ab2a6cSJosef Bacik 			if (ret) {
295157ba4cb8SFilipe Manana 				mutex_unlock(&fs_devices->device_list_mutex);
295266642832SJeff Mahoney 				btrfs_abort_transaction(trans, ret);
295347ab2a6cSJosef Bacik 				goto out;
295447ab2a6cSJosef Bacik 			}
295547ab2a6cSJosef Bacik 		}
295647ab2a6cSJosef Bacik 	}
295757ba4cb8SFilipe Manana 	mutex_unlock(&fs_devices->device_list_mutex);
295857ba4cb8SFilipe Manana 
2959408fbf19SNikolay Borisov 	ret = btrfs_free_chunk(trans, fs_info, chunk_offset);
296047ab2a6cSJosef Bacik 	if (ret) {
296166642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
296247ab2a6cSJosef Bacik 		goto out;
296347ab2a6cSJosef Bacik 	}
296447ab2a6cSJosef Bacik 
29656bccf3abSJeff Mahoney 	trace_btrfs_chunk_free(fs_info, map, chunk_offset, em->len);
296647ab2a6cSJosef Bacik 
296747ab2a6cSJosef Bacik 	if (map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
2968408fbf19SNikolay Borisov 		ret = btrfs_del_sys_chunk(fs_info, chunk_offset);
296947ab2a6cSJosef Bacik 		if (ret) {
297066642832SJeff Mahoney 			btrfs_abort_transaction(trans, ret);
297147ab2a6cSJosef Bacik 			goto out;
297247ab2a6cSJosef Bacik 		}
297347ab2a6cSJosef Bacik 	}
297447ab2a6cSJosef Bacik 
29756bccf3abSJeff Mahoney 	ret = btrfs_remove_block_group(trans, fs_info, chunk_offset, em);
297647ab2a6cSJosef Bacik 	if (ret) {
297766642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
297847ab2a6cSJosef Bacik 		goto out;
297947ab2a6cSJosef Bacik 	}
298047ab2a6cSJosef Bacik 
298147ab2a6cSJosef Bacik out:
298247ab2a6cSJosef Bacik 	/* once for us */
298347ab2a6cSJosef Bacik 	free_extent_map(em);
29848f18cf13SChris Mason 	return ret;
29858f18cf13SChris Mason }
29868f18cf13SChris Mason 
29875b4aacefSJeff Mahoney static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
29888f18cf13SChris Mason {
29895b4aacefSJeff Mahoney 	struct btrfs_root *root = fs_info->chunk_root;
299019c4d2f9SChris Mason 	struct btrfs_trans_handle *trans;
29918f18cf13SChris Mason 	int ret;
29928f18cf13SChris Mason 
299367c5e7d4SFilipe Manana 	/*
299467c5e7d4SFilipe Manana 	 * Prevent races with automatic removal of unused block groups.
299567c5e7d4SFilipe Manana 	 * After we relocate and before we remove the chunk with offset
299667c5e7d4SFilipe Manana 	 * chunk_offset, automatic removal of the block group can kick in,
299767c5e7d4SFilipe Manana 	 * resulting in a failure when calling btrfs_remove_chunk() below.
299867c5e7d4SFilipe Manana 	 *
299967c5e7d4SFilipe Manana 	 * Make sure to acquire this mutex before doing a tree search (dev
300067c5e7d4SFilipe Manana 	 * or chunk trees) to find chunks. Otherwise the cleaner kthread might
300167c5e7d4SFilipe Manana 	 * call btrfs_remove_chunk() (through btrfs_delete_unused_bgs()) after
300267c5e7d4SFilipe Manana 	 * we release the path used to search the chunk/dev tree and before
300367c5e7d4SFilipe Manana 	 * the current task acquires this mutex and calls us.
300467c5e7d4SFilipe Manana 	 */
30050b246afaSJeff Mahoney 	ASSERT(mutex_is_locked(&fs_info->delete_unused_bgs_mutex));
300667c5e7d4SFilipe Manana 
30070b246afaSJeff Mahoney 	ret = btrfs_can_relocate(fs_info, chunk_offset);
3008ba1bf481SJosef Bacik 	if (ret)
3009ba1bf481SJosef Bacik 		return -ENOSPC;
3010ba1bf481SJosef Bacik 
30118f18cf13SChris Mason 	/* step one, relocate all the extents inside this chunk */
30122ff7e61eSJeff Mahoney 	btrfs_scrub_pause(fs_info);
30130b246afaSJeff Mahoney 	ret = btrfs_relocate_block_group(fs_info, chunk_offset);
30142ff7e61eSJeff Mahoney 	btrfs_scrub_continue(fs_info);
3015a22285a6SYan, Zheng 	if (ret)
3016a22285a6SYan, Zheng 		return ret;
30178f18cf13SChris Mason 
301819c4d2f9SChris Mason 	trans = btrfs_start_trans_remove_block_group(root->fs_info,
301919c4d2f9SChris Mason 						     chunk_offset);
302019c4d2f9SChris Mason 	if (IS_ERR(trans)) {
302119c4d2f9SChris Mason 		ret = PTR_ERR(trans);
302219c4d2f9SChris Mason 		btrfs_handle_fs_error(root->fs_info, ret, NULL);
302319c4d2f9SChris Mason 		return ret;
302419c4d2f9SChris Mason 	}
30255d8eb6feSNaohiro Aota 
302619c4d2f9SChris Mason 	/*
302719c4d2f9SChris Mason 	 * step two, delete the device extents and the
302819c4d2f9SChris Mason 	 * chunk tree entries
302919c4d2f9SChris Mason 	 */
30305b4aacefSJeff Mahoney 	ret = btrfs_remove_chunk(trans, fs_info, chunk_offset);
30313a45bb20SJeff Mahoney 	btrfs_end_transaction(trans);
303219c4d2f9SChris Mason 	return ret;
30338f18cf13SChris Mason }
30348f18cf13SChris Mason 
30352ff7e61eSJeff Mahoney static int btrfs_relocate_sys_chunks(struct btrfs_fs_info *fs_info)
30362b82032cSYan Zheng {
30370b246afaSJeff Mahoney 	struct btrfs_root *chunk_root = fs_info->chunk_root;
30382b82032cSYan Zheng 	struct btrfs_path *path;
30392b82032cSYan Zheng 	struct extent_buffer *leaf;
30402b82032cSYan Zheng 	struct btrfs_chunk *chunk;
30412b82032cSYan Zheng 	struct btrfs_key key;
30422b82032cSYan Zheng 	struct btrfs_key found_key;
30432b82032cSYan Zheng 	u64 chunk_type;
3044ba1bf481SJosef Bacik 	bool retried = false;
3045ba1bf481SJosef Bacik 	int failed = 0;
30462b82032cSYan Zheng 	int ret;
30472b82032cSYan Zheng 
30482b82032cSYan Zheng 	path = btrfs_alloc_path();
30492b82032cSYan Zheng 	if (!path)
30502b82032cSYan Zheng 		return -ENOMEM;
30512b82032cSYan Zheng 
3052ba1bf481SJosef Bacik again:
30532b82032cSYan Zheng 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
30542b82032cSYan Zheng 	key.offset = (u64)-1;
30552b82032cSYan Zheng 	key.type = BTRFS_CHUNK_ITEM_KEY;
30562b82032cSYan Zheng 
30572b82032cSYan Zheng 	while (1) {
30580b246afaSJeff Mahoney 		mutex_lock(&fs_info->delete_unused_bgs_mutex);
30592b82032cSYan Zheng 		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
306067c5e7d4SFilipe Manana 		if (ret < 0) {
30610b246afaSJeff Mahoney 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
30622b82032cSYan Zheng 			goto error;
306367c5e7d4SFilipe Manana 		}
306479787eaaSJeff Mahoney 		BUG_ON(ret == 0); /* Corruption */
30652b82032cSYan Zheng 
30662b82032cSYan Zheng 		ret = btrfs_previous_item(chunk_root, path, key.objectid,
30672b82032cSYan Zheng 					  key.type);
306867c5e7d4SFilipe Manana 		if (ret)
30690b246afaSJeff Mahoney 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
30702b82032cSYan Zheng 		if (ret < 0)
30712b82032cSYan Zheng 			goto error;
30722b82032cSYan Zheng 		if (ret > 0)
30732b82032cSYan Zheng 			break;
30742b82032cSYan Zheng 
30752b82032cSYan Zheng 		leaf = path->nodes[0];
30762b82032cSYan Zheng 		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
30772b82032cSYan Zheng 
30782b82032cSYan Zheng 		chunk = btrfs_item_ptr(leaf, path->slots[0],
30792b82032cSYan Zheng 				       struct btrfs_chunk);
30802b82032cSYan Zheng 		chunk_type = btrfs_chunk_type(leaf, chunk);
3081b3b4aa74SDavid Sterba 		btrfs_release_path(path);
30822b82032cSYan Zheng 
30832b82032cSYan Zheng 		if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) {
30840b246afaSJeff Mahoney 			ret = btrfs_relocate_chunk(fs_info, found_key.offset);
3085ba1bf481SJosef Bacik 			if (ret == -ENOSPC)
3086ba1bf481SJosef Bacik 				failed++;
308714586651SHIMANGI SARAOGI 			else
308814586651SHIMANGI SARAOGI 				BUG_ON(ret);
30892b82032cSYan Zheng 		}
30900b246afaSJeff Mahoney 		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
30912b82032cSYan Zheng 
30922b82032cSYan Zheng 		if (found_key.offset == 0)
30932b82032cSYan Zheng 			break;
30942b82032cSYan Zheng 		key.offset = found_key.offset - 1;
30952b82032cSYan Zheng 	}
30962b82032cSYan Zheng 	ret = 0;
3097ba1bf481SJosef Bacik 	if (failed && !retried) {
3098ba1bf481SJosef Bacik 		failed = 0;
3099ba1bf481SJosef Bacik 		retried = true;
3100ba1bf481SJosef Bacik 		goto again;
3101fae7f21cSDulshani Gunawardhana 	} else if (WARN_ON(failed && retried)) {
3102ba1bf481SJosef Bacik 		ret = -ENOSPC;
3103ba1bf481SJosef Bacik 	}
31042b82032cSYan Zheng error:
31052b82032cSYan Zheng 	btrfs_free_path(path);
31062b82032cSYan Zheng 	return ret;
31072b82032cSYan Zheng }
31082b82032cSYan Zheng 
3109*a6f93c71SLiu Bo /*
3110*a6f93c71SLiu Bo  * return 1 : allocate a data chunk successfully,
3111*a6f93c71SLiu Bo  * return <0: errors during allocating a data chunk,
3112*a6f93c71SLiu Bo  * return 0 : no need to allocate a data chunk.
3113*a6f93c71SLiu Bo  */
3114*a6f93c71SLiu Bo static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3115*a6f93c71SLiu Bo 				      u64 chunk_offset)
3116*a6f93c71SLiu Bo {
3117*a6f93c71SLiu Bo 	struct btrfs_block_group_cache *cache;
3118*a6f93c71SLiu Bo 	u64 bytes_used;
3119*a6f93c71SLiu Bo 	u64 chunk_type;
3120*a6f93c71SLiu Bo 
3121*a6f93c71SLiu Bo 	cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3122*a6f93c71SLiu Bo 	ASSERT(cache);
3123*a6f93c71SLiu Bo 	chunk_type = cache->flags;
3124*a6f93c71SLiu Bo 	btrfs_put_block_group(cache);
3125*a6f93c71SLiu Bo 
3126*a6f93c71SLiu Bo 	if (chunk_type & BTRFS_BLOCK_GROUP_DATA) {
3127*a6f93c71SLiu Bo 		spin_lock(&fs_info->data_sinfo->lock);
3128*a6f93c71SLiu Bo 		bytes_used = fs_info->data_sinfo->bytes_used;
3129*a6f93c71SLiu Bo 		spin_unlock(&fs_info->data_sinfo->lock);
3130*a6f93c71SLiu Bo 
3131*a6f93c71SLiu Bo 		if (!bytes_used) {
3132*a6f93c71SLiu Bo 			struct btrfs_trans_handle *trans;
3133*a6f93c71SLiu Bo 			int ret;
3134*a6f93c71SLiu Bo 
3135*a6f93c71SLiu Bo 			trans =	btrfs_join_transaction(fs_info->tree_root);
3136*a6f93c71SLiu Bo 			if (IS_ERR(trans))
3137*a6f93c71SLiu Bo 				return PTR_ERR(trans);
3138*a6f93c71SLiu Bo 
3139*a6f93c71SLiu Bo 			ret = btrfs_force_chunk_alloc(trans, fs_info,
3140*a6f93c71SLiu Bo 						      BTRFS_BLOCK_GROUP_DATA);
3141*a6f93c71SLiu Bo 			btrfs_end_transaction(trans);
3142*a6f93c71SLiu Bo 			if (ret < 0)
3143*a6f93c71SLiu Bo 				return ret;
3144*a6f93c71SLiu Bo 
3145*a6f93c71SLiu Bo 			return 1;
3146*a6f93c71SLiu Bo 		}
3147*a6f93c71SLiu Bo 	}
3148*a6f93c71SLiu Bo 	return 0;
3149*a6f93c71SLiu Bo }
3150*a6f93c71SLiu Bo 
31516bccf3abSJeff Mahoney static int insert_balance_item(struct btrfs_fs_info *fs_info,
31520940ebf6SIlya Dryomov 			       struct btrfs_balance_control *bctl)
31530940ebf6SIlya Dryomov {
31546bccf3abSJeff Mahoney 	struct btrfs_root *root = fs_info->tree_root;
31550940ebf6SIlya Dryomov 	struct btrfs_trans_handle *trans;
31560940ebf6SIlya Dryomov 	struct btrfs_balance_item *item;
31570940ebf6SIlya Dryomov 	struct btrfs_disk_balance_args disk_bargs;
31580940ebf6SIlya Dryomov 	struct btrfs_path *path;
31590940ebf6SIlya Dryomov 	struct extent_buffer *leaf;
31600940ebf6SIlya Dryomov 	struct btrfs_key key;
31610940ebf6SIlya Dryomov 	int ret, err;
31620940ebf6SIlya Dryomov 
31630940ebf6SIlya Dryomov 	path = btrfs_alloc_path();
31640940ebf6SIlya Dryomov 	if (!path)
31650940ebf6SIlya Dryomov 		return -ENOMEM;
31660940ebf6SIlya Dryomov 
31670940ebf6SIlya Dryomov 	trans = btrfs_start_transaction(root, 0);
31680940ebf6SIlya Dryomov 	if (IS_ERR(trans)) {
31690940ebf6SIlya Dryomov 		btrfs_free_path(path);
31700940ebf6SIlya Dryomov 		return PTR_ERR(trans);
31710940ebf6SIlya Dryomov 	}
31720940ebf6SIlya Dryomov 
31730940ebf6SIlya Dryomov 	key.objectid = BTRFS_BALANCE_OBJECTID;
3174c479cb4fSDavid Sterba 	key.type = BTRFS_TEMPORARY_ITEM_KEY;
31750940ebf6SIlya Dryomov 	key.offset = 0;
31760940ebf6SIlya Dryomov 
31770940ebf6SIlya Dryomov 	ret = btrfs_insert_empty_item(trans, root, path, &key,
31780940ebf6SIlya Dryomov 				      sizeof(*item));
31790940ebf6SIlya Dryomov 	if (ret)
31800940ebf6SIlya Dryomov 		goto out;
31810940ebf6SIlya Dryomov 
31820940ebf6SIlya Dryomov 	leaf = path->nodes[0];
31830940ebf6SIlya Dryomov 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
31840940ebf6SIlya Dryomov 
3185b159fa28SDavid Sterba 	memzero_extent_buffer(leaf, (unsigned long)item, sizeof(*item));
31860940ebf6SIlya Dryomov 
31870940ebf6SIlya Dryomov 	btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->data);
31880940ebf6SIlya Dryomov 	btrfs_set_balance_data(leaf, item, &disk_bargs);
31890940ebf6SIlya Dryomov 	btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->meta);
31900940ebf6SIlya Dryomov 	btrfs_set_balance_meta(leaf, item, &disk_bargs);
31910940ebf6SIlya Dryomov 	btrfs_cpu_balance_args_to_disk(&disk_bargs, &bctl->sys);
31920940ebf6SIlya Dryomov 	btrfs_set_balance_sys(leaf, item, &disk_bargs);
31930940ebf6SIlya Dryomov 
31940940ebf6SIlya Dryomov 	btrfs_set_balance_flags(leaf, item, bctl->flags);
31950940ebf6SIlya Dryomov 
31960940ebf6SIlya Dryomov 	btrfs_mark_buffer_dirty(leaf);
31970940ebf6SIlya Dryomov out:
31980940ebf6SIlya Dryomov 	btrfs_free_path(path);
31993a45bb20SJeff Mahoney 	err = btrfs_commit_transaction(trans);
32000940ebf6SIlya Dryomov 	if (err && !ret)
32010940ebf6SIlya Dryomov 		ret = err;
32020940ebf6SIlya Dryomov 	return ret;
32030940ebf6SIlya Dryomov }
32040940ebf6SIlya Dryomov 
32056bccf3abSJeff Mahoney static int del_balance_item(struct btrfs_fs_info *fs_info)
32060940ebf6SIlya Dryomov {
32076bccf3abSJeff Mahoney 	struct btrfs_root *root = fs_info->tree_root;
32080940ebf6SIlya Dryomov 	struct btrfs_trans_handle *trans;
32090940ebf6SIlya Dryomov 	struct btrfs_path *path;
32100940ebf6SIlya Dryomov 	struct btrfs_key key;
32110940ebf6SIlya Dryomov 	int ret, err;
32120940ebf6SIlya Dryomov 
32130940ebf6SIlya Dryomov 	path = btrfs_alloc_path();
32140940ebf6SIlya Dryomov 	if (!path)
32150940ebf6SIlya Dryomov 		return -ENOMEM;
32160940ebf6SIlya Dryomov 
32170940ebf6SIlya Dryomov 	trans = btrfs_start_transaction(root, 0);
32180940ebf6SIlya Dryomov 	if (IS_ERR(trans)) {
32190940ebf6SIlya Dryomov 		btrfs_free_path(path);
32200940ebf6SIlya Dryomov 		return PTR_ERR(trans);
32210940ebf6SIlya Dryomov 	}
32220940ebf6SIlya Dryomov 
32230940ebf6SIlya Dryomov 	key.objectid = BTRFS_BALANCE_OBJECTID;
3224c479cb4fSDavid Sterba 	key.type = BTRFS_TEMPORARY_ITEM_KEY;
32250940ebf6SIlya Dryomov 	key.offset = 0;
32260940ebf6SIlya Dryomov 
32270940ebf6SIlya Dryomov 	ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
32280940ebf6SIlya Dryomov 	if (ret < 0)
32290940ebf6SIlya Dryomov 		goto out;
32300940ebf6SIlya Dryomov 	if (ret > 0) {
32310940ebf6SIlya Dryomov 		ret = -ENOENT;
32320940ebf6SIlya Dryomov 		goto out;
32330940ebf6SIlya Dryomov 	}
32340940ebf6SIlya Dryomov 
32350940ebf6SIlya Dryomov 	ret = btrfs_del_item(trans, root, path);
32360940ebf6SIlya Dryomov out:
32370940ebf6SIlya Dryomov 	btrfs_free_path(path);
32383a45bb20SJeff Mahoney 	err = btrfs_commit_transaction(trans);
32390940ebf6SIlya Dryomov 	if (err && !ret)
32400940ebf6SIlya Dryomov 		ret = err;
32410940ebf6SIlya Dryomov 	return ret;
32420940ebf6SIlya Dryomov }
32430940ebf6SIlya Dryomov 
3244c9e9f97bSIlya Dryomov /*
324559641015SIlya Dryomov  * This is a heuristic used to reduce the number of chunks balanced on
324659641015SIlya Dryomov  * resume after balance was interrupted.
324759641015SIlya Dryomov  */
324859641015SIlya Dryomov static void update_balance_args(struct btrfs_balance_control *bctl)
324959641015SIlya Dryomov {
325059641015SIlya Dryomov 	/*
325159641015SIlya Dryomov 	 * Turn on soft mode for chunk types that were being converted.
325259641015SIlya Dryomov 	 */
325359641015SIlya Dryomov 	if (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)
325459641015SIlya Dryomov 		bctl->data.flags |= BTRFS_BALANCE_ARGS_SOFT;
325559641015SIlya Dryomov 	if (bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)
325659641015SIlya Dryomov 		bctl->sys.flags |= BTRFS_BALANCE_ARGS_SOFT;
325759641015SIlya Dryomov 	if (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)
325859641015SIlya Dryomov 		bctl->meta.flags |= BTRFS_BALANCE_ARGS_SOFT;
325959641015SIlya Dryomov 
326059641015SIlya Dryomov 	/*
326159641015SIlya Dryomov 	 * Turn on usage filter if is not already used.  The idea is
326259641015SIlya Dryomov 	 * that chunks that we have already balanced should be
326359641015SIlya Dryomov 	 * reasonably full.  Don't do it for chunks that are being
326459641015SIlya Dryomov 	 * converted - that will keep us from relocating unconverted
326559641015SIlya Dryomov 	 * (albeit full) chunks.
326659641015SIlya Dryomov 	 */
326759641015SIlya Dryomov 	if (!(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3268bc309467SDavid Sterba 	    !(bctl->data.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
326959641015SIlya Dryomov 	    !(bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
327059641015SIlya Dryomov 		bctl->data.flags |= BTRFS_BALANCE_ARGS_USAGE;
327159641015SIlya Dryomov 		bctl->data.usage = 90;
327259641015SIlya Dryomov 	}
327359641015SIlya Dryomov 	if (!(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3274bc309467SDavid Sterba 	    !(bctl->sys.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
327559641015SIlya Dryomov 	    !(bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
327659641015SIlya Dryomov 		bctl->sys.flags |= BTRFS_BALANCE_ARGS_USAGE;
327759641015SIlya Dryomov 		bctl->sys.usage = 90;
327859641015SIlya Dryomov 	}
327959641015SIlya Dryomov 	if (!(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE) &&
3280bc309467SDavid Sterba 	    !(bctl->meta.flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
328159641015SIlya Dryomov 	    !(bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT)) {
328259641015SIlya Dryomov 		bctl->meta.flags |= BTRFS_BALANCE_ARGS_USAGE;
328359641015SIlya Dryomov 		bctl->meta.usage = 90;
328459641015SIlya Dryomov 	}
328559641015SIlya Dryomov }
328659641015SIlya Dryomov 
328759641015SIlya Dryomov /*
3288c9e9f97bSIlya Dryomov  * Should be called with both balance and volume mutexes held to
3289c9e9f97bSIlya Dryomov  * serialize other volume operations (add_dev/rm_dev/resize) with
3290c9e9f97bSIlya Dryomov  * restriper.  Same goes for unset_balance_control.
3291c9e9f97bSIlya Dryomov  */
3292c9e9f97bSIlya Dryomov static void set_balance_control(struct btrfs_balance_control *bctl)
3293c9e9f97bSIlya Dryomov {
3294c9e9f97bSIlya Dryomov 	struct btrfs_fs_info *fs_info = bctl->fs_info;
3295c9e9f97bSIlya Dryomov 
3296c9e9f97bSIlya Dryomov 	BUG_ON(fs_info->balance_ctl);
3297c9e9f97bSIlya Dryomov 
3298c9e9f97bSIlya Dryomov 	spin_lock(&fs_info->balance_lock);
3299c9e9f97bSIlya Dryomov 	fs_info->balance_ctl = bctl;
3300c9e9f97bSIlya Dryomov 	spin_unlock(&fs_info->balance_lock);
3301c9e9f97bSIlya Dryomov }
3302c9e9f97bSIlya Dryomov 
3303c9e9f97bSIlya Dryomov static void unset_balance_control(struct btrfs_fs_info *fs_info)
3304c9e9f97bSIlya Dryomov {
3305c9e9f97bSIlya Dryomov 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3306c9e9f97bSIlya Dryomov 
3307c9e9f97bSIlya Dryomov 	BUG_ON(!fs_info->balance_ctl);
3308c9e9f97bSIlya Dryomov 
3309c9e9f97bSIlya Dryomov 	spin_lock(&fs_info->balance_lock);
3310c9e9f97bSIlya Dryomov 	fs_info->balance_ctl = NULL;
3311c9e9f97bSIlya Dryomov 	spin_unlock(&fs_info->balance_lock);
3312c9e9f97bSIlya Dryomov 
3313c9e9f97bSIlya Dryomov 	kfree(bctl);
3314c9e9f97bSIlya Dryomov }
3315c9e9f97bSIlya Dryomov 
3316ed25e9b2SIlya Dryomov /*
3317ed25e9b2SIlya Dryomov  * Balance filters.  Return 1 if chunk should be filtered out
3318ed25e9b2SIlya Dryomov  * (should not be balanced).
3319ed25e9b2SIlya Dryomov  */
3320899c81eaSIlya Dryomov static int chunk_profiles_filter(u64 chunk_type,
3321ed25e9b2SIlya Dryomov 				 struct btrfs_balance_args *bargs)
3322ed25e9b2SIlya Dryomov {
3323899c81eaSIlya Dryomov 	chunk_type = chunk_to_extended(chunk_type) &
3324899c81eaSIlya Dryomov 				BTRFS_EXTENDED_PROFILE_MASK;
3325ed25e9b2SIlya Dryomov 
3326899c81eaSIlya Dryomov 	if (bargs->profiles & chunk_type)
3327ed25e9b2SIlya Dryomov 		return 0;
3328ed25e9b2SIlya Dryomov 
3329ed25e9b2SIlya Dryomov 	return 1;
3330ed25e9b2SIlya Dryomov }
3331ed25e9b2SIlya Dryomov 
3332dba72cb3SHolger Hoffstätte static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
33335ce5b3c0SIlya Dryomov 			      struct btrfs_balance_args *bargs)
33345ce5b3c0SIlya Dryomov {
33355ce5b3c0SIlya Dryomov 	struct btrfs_block_group_cache *cache;
3336bc309467SDavid Sterba 	u64 chunk_used;
3337bc309467SDavid Sterba 	u64 user_thresh_min;
3338bc309467SDavid Sterba 	u64 user_thresh_max;
3339bc309467SDavid Sterba 	int ret = 1;
3340bc309467SDavid Sterba 
3341bc309467SDavid Sterba 	cache = btrfs_lookup_block_group(fs_info, chunk_offset);
3342bc309467SDavid Sterba 	chunk_used = btrfs_block_group_used(&cache->item);
3343bc309467SDavid Sterba 
3344bc309467SDavid Sterba 	if (bargs->usage_min == 0)
3345bc309467SDavid Sterba 		user_thresh_min = 0;
3346bc309467SDavid Sterba 	else
3347bc309467SDavid Sterba 		user_thresh_min = div_factor_fine(cache->key.offset,
3348bc309467SDavid Sterba 					bargs->usage_min);
3349bc309467SDavid Sterba 
3350bc309467SDavid Sterba 	if (bargs->usage_max == 0)
3351bc309467SDavid Sterba 		user_thresh_max = 1;
3352bc309467SDavid Sterba 	else if (bargs->usage_max > 100)
3353bc309467SDavid Sterba 		user_thresh_max = cache->key.offset;
3354bc309467SDavid Sterba 	else
3355bc309467SDavid Sterba 		user_thresh_max = div_factor_fine(cache->key.offset,
3356bc309467SDavid Sterba 					bargs->usage_max);
3357bc309467SDavid Sterba 
3358bc309467SDavid Sterba 	if (user_thresh_min <= chunk_used && chunk_used < user_thresh_max)
3359bc309467SDavid Sterba 		ret = 0;
3360bc309467SDavid Sterba 
3361bc309467SDavid Sterba 	btrfs_put_block_group(cache);
3362bc309467SDavid Sterba 	return ret;
3363bc309467SDavid Sterba }
3364bc309467SDavid Sterba 
3365dba72cb3SHolger Hoffstätte static int chunk_usage_filter(struct btrfs_fs_info *fs_info,
3366bc309467SDavid Sterba 		u64 chunk_offset, struct btrfs_balance_args *bargs)
3367bc309467SDavid Sterba {
3368bc309467SDavid Sterba 	struct btrfs_block_group_cache *cache;
33695ce5b3c0SIlya Dryomov 	u64 chunk_used, user_thresh;
33705ce5b3c0SIlya Dryomov 	int ret = 1;
33715ce5b3c0SIlya Dryomov 
33725ce5b3c0SIlya Dryomov 	cache = btrfs_lookup_block_group(fs_info, chunk_offset);
33735ce5b3c0SIlya Dryomov 	chunk_used = btrfs_block_group_used(&cache->item);
33745ce5b3c0SIlya Dryomov 
3375bc309467SDavid Sterba 	if (bargs->usage_min == 0)
33763e39cea6SIlya Dryomov 		user_thresh = 1;
3377a105bb88SIlya Dryomov 	else if (bargs->usage > 100)
3378a105bb88SIlya Dryomov 		user_thresh = cache->key.offset;
3379a105bb88SIlya Dryomov 	else
3380a105bb88SIlya Dryomov 		user_thresh = div_factor_fine(cache->key.offset,
3381a105bb88SIlya Dryomov 					      bargs->usage);
3382a105bb88SIlya Dryomov 
33835ce5b3c0SIlya Dryomov 	if (chunk_used < user_thresh)
33845ce5b3c0SIlya Dryomov 		ret = 0;
33855ce5b3c0SIlya Dryomov 
33865ce5b3c0SIlya Dryomov 	btrfs_put_block_group(cache);
33875ce5b3c0SIlya Dryomov 	return ret;
33885ce5b3c0SIlya Dryomov }
33895ce5b3c0SIlya Dryomov 
3390409d404bSIlya Dryomov static int chunk_devid_filter(struct extent_buffer *leaf,
3391409d404bSIlya Dryomov 			      struct btrfs_chunk *chunk,
3392409d404bSIlya Dryomov 			      struct btrfs_balance_args *bargs)
3393409d404bSIlya Dryomov {
3394409d404bSIlya Dryomov 	struct btrfs_stripe *stripe;
3395409d404bSIlya Dryomov 	int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3396409d404bSIlya Dryomov 	int i;
3397409d404bSIlya Dryomov 
3398409d404bSIlya Dryomov 	for (i = 0; i < num_stripes; i++) {
3399409d404bSIlya Dryomov 		stripe = btrfs_stripe_nr(chunk, i);
3400409d404bSIlya Dryomov 		if (btrfs_stripe_devid(leaf, stripe) == bargs->devid)
3401409d404bSIlya Dryomov 			return 0;
3402409d404bSIlya Dryomov 	}
3403409d404bSIlya Dryomov 
3404409d404bSIlya Dryomov 	return 1;
3405409d404bSIlya Dryomov }
3406409d404bSIlya Dryomov 
340794e60d5aSIlya Dryomov /* [pstart, pend) */
340894e60d5aSIlya Dryomov static int chunk_drange_filter(struct extent_buffer *leaf,
340994e60d5aSIlya Dryomov 			       struct btrfs_chunk *chunk,
341094e60d5aSIlya Dryomov 			       struct btrfs_balance_args *bargs)
341194e60d5aSIlya Dryomov {
341294e60d5aSIlya Dryomov 	struct btrfs_stripe *stripe;
341394e60d5aSIlya Dryomov 	int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
341494e60d5aSIlya Dryomov 	u64 stripe_offset;
341594e60d5aSIlya Dryomov 	u64 stripe_length;
341694e60d5aSIlya Dryomov 	int factor;
341794e60d5aSIlya Dryomov 	int i;
341894e60d5aSIlya Dryomov 
341994e60d5aSIlya Dryomov 	if (!(bargs->flags & BTRFS_BALANCE_ARGS_DEVID))
342094e60d5aSIlya Dryomov 		return 0;
342194e60d5aSIlya Dryomov 
342294e60d5aSIlya Dryomov 	if (btrfs_chunk_type(leaf, chunk) & (BTRFS_BLOCK_GROUP_DUP |
342353b381b3SDavid Woodhouse 	     BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10)) {
342453b381b3SDavid Woodhouse 		factor = num_stripes / 2;
342553b381b3SDavid Woodhouse 	} else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID5) {
342653b381b3SDavid Woodhouse 		factor = num_stripes - 1;
342753b381b3SDavid Woodhouse 	} else if (btrfs_chunk_type(leaf, chunk) & BTRFS_BLOCK_GROUP_RAID6) {
342853b381b3SDavid Woodhouse 		factor = num_stripes - 2;
342953b381b3SDavid Woodhouse 	} else {
343053b381b3SDavid Woodhouse 		factor = num_stripes;
343153b381b3SDavid Woodhouse 	}
343294e60d5aSIlya Dryomov 
343394e60d5aSIlya Dryomov 	for (i = 0; i < num_stripes; i++) {
343494e60d5aSIlya Dryomov 		stripe = btrfs_stripe_nr(chunk, i);
343594e60d5aSIlya Dryomov 		if (btrfs_stripe_devid(leaf, stripe) != bargs->devid)
343694e60d5aSIlya Dryomov 			continue;
343794e60d5aSIlya Dryomov 
343894e60d5aSIlya Dryomov 		stripe_offset = btrfs_stripe_offset(leaf, stripe);
343994e60d5aSIlya Dryomov 		stripe_length = btrfs_chunk_length(leaf, chunk);
3440b8b93addSDavid Sterba 		stripe_length = div_u64(stripe_length, factor);
344194e60d5aSIlya Dryomov 
344294e60d5aSIlya Dryomov 		if (stripe_offset < bargs->pend &&
344394e60d5aSIlya Dryomov 		    stripe_offset + stripe_length > bargs->pstart)
344494e60d5aSIlya Dryomov 			return 0;
344594e60d5aSIlya Dryomov 	}
344694e60d5aSIlya Dryomov 
344794e60d5aSIlya Dryomov 	return 1;
344894e60d5aSIlya Dryomov }
344994e60d5aSIlya Dryomov 
3450ea67176aSIlya Dryomov /* [vstart, vend) */
3451ea67176aSIlya Dryomov static int chunk_vrange_filter(struct extent_buffer *leaf,
3452ea67176aSIlya Dryomov 			       struct btrfs_chunk *chunk,
3453ea67176aSIlya Dryomov 			       u64 chunk_offset,
3454ea67176aSIlya Dryomov 			       struct btrfs_balance_args *bargs)
3455ea67176aSIlya Dryomov {
3456ea67176aSIlya Dryomov 	if (chunk_offset < bargs->vend &&
3457ea67176aSIlya Dryomov 	    chunk_offset + btrfs_chunk_length(leaf, chunk) > bargs->vstart)
3458ea67176aSIlya Dryomov 		/* at least part of the chunk is inside this vrange */
3459ea67176aSIlya Dryomov 		return 0;
3460ea67176aSIlya Dryomov 
3461ea67176aSIlya Dryomov 	return 1;
3462ea67176aSIlya Dryomov }
3463ea67176aSIlya Dryomov 
3464dee32d0aSGabríel Arthúr Pétursson static int chunk_stripes_range_filter(struct extent_buffer *leaf,
3465dee32d0aSGabríel Arthúr Pétursson 			       struct btrfs_chunk *chunk,
3466dee32d0aSGabríel Arthúr Pétursson 			       struct btrfs_balance_args *bargs)
3467dee32d0aSGabríel Arthúr Pétursson {
3468dee32d0aSGabríel Arthúr Pétursson 	int num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
3469dee32d0aSGabríel Arthúr Pétursson 
3470dee32d0aSGabríel Arthúr Pétursson 	if (bargs->stripes_min <= num_stripes
3471dee32d0aSGabríel Arthúr Pétursson 			&& num_stripes <= bargs->stripes_max)
3472dee32d0aSGabríel Arthúr Pétursson 		return 0;
3473dee32d0aSGabríel Arthúr Pétursson 
3474dee32d0aSGabríel Arthúr Pétursson 	return 1;
3475dee32d0aSGabríel Arthúr Pétursson }
3476dee32d0aSGabríel Arthúr Pétursson 
3477899c81eaSIlya Dryomov static int chunk_soft_convert_filter(u64 chunk_type,
3478cfa4c961SIlya Dryomov 				     struct btrfs_balance_args *bargs)
3479cfa4c961SIlya Dryomov {
3480cfa4c961SIlya Dryomov 	if (!(bargs->flags & BTRFS_BALANCE_ARGS_CONVERT))
3481cfa4c961SIlya Dryomov 		return 0;
3482cfa4c961SIlya Dryomov 
3483899c81eaSIlya Dryomov 	chunk_type = chunk_to_extended(chunk_type) &
3484899c81eaSIlya Dryomov 				BTRFS_EXTENDED_PROFILE_MASK;
3485cfa4c961SIlya Dryomov 
3486899c81eaSIlya Dryomov 	if (bargs->target == chunk_type)
3487cfa4c961SIlya Dryomov 		return 1;
3488cfa4c961SIlya Dryomov 
3489cfa4c961SIlya Dryomov 	return 0;
3490cfa4c961SIlya Dryomov }
3491cfa4c961SIlya Dryomov 
34922ff7e61eSJeff Mahoney static int should_balance_chunk(struct btrfs_fs_info *fs_info,
3493f43ffb60SIlya Dryomov 				struct extent_buffer *leaf,
3494f43ffb60SIlya Dryomov 				struct btrfs_chunk *chunk, u64 chunk_offset)
3495f43ffb60SIlya Dryomov {
34960b246afaSJeff Mahoney 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3497f43ffb60SIlya Dryomov 	struct btrfs_balance_args *bargs = NULL;
3498f43ffb60SIlya Dryomov 	u64 chunk_type = btrfs_chunk_type(leaf, chunk);
3499f43ffb60SIlya Dryomov 
3500f43ffb60SIlya Dryomov 	/* type filter */
3501f43ffb60SIlya Dryomov 	if (!((chunk_type & BTRFS_BLOCK_GROUP_TYPE_MASK) &
3502f43ffb60SIlya Dryomov 	      (bctl->flags & BTRFS_BALANCE_TYPE_MASK))) {
3503f43ffb60SIlya Dryomov 		return 0;
3504f43ffb60SIlya Dryomov 	}
3505f43ffb60SIlya Dryomov 
3506f43ffb60SIlya Dryomov 	if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
3507f43ffb60SIlya Dryomov 		bargs = &bctl->data;
3508f43ffb60SIlya Dryomov 	else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
3509f43ffb60SIlya Dryomov 		bargs = &bctl->sys;
3510f43ffb60SIlya Dryomov 	else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
3511f43ffb60SIlya Dryomov 		bargs = &bctl->meta;
3512f43ffb60SIlya Dryomov 
3513ed25e9b2SIlya Dryomov 	/* profiles filter */
3514ed25e9b2SIlya Dryomov 	if ((bargs->flags & BTRFS_BALANCE_ARGS_PROFILES) &&
3515ed25e9b2SIlya Dryomov 	    chunk_profiles_filter(chunk_type, bargs)) {
3516ed25e9b2SIlya Dryomov 		return 0;
3517ed25e9b2SIlya Dryomov 	}
3518ed25e9b2SIlya Dryomov 
35195ce5b3c0SIlya Dryomov 	/* usage filter */
35205ce5b3c0SIlya Dryomov 	if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE) &&
35210b246afaSJeff Mahoney 	    chunk_usage_filter(fs_info, chunk_offset, bargs)) {
35225ce5b3c0SIlya Dryomov 		return 0;
3523bc309467SDavid Sterba 	} else if ((bargs->flags & BTRFS_BALANCE_ARGS_USAGE_RANGE) &&
35240b246afaSJeff Mahoney 	    chunk_usage_range_filter(fs_info, chunk_offset, bargs)) {
3525bc309467SDavid Sterba 		return 0;
35265ce5b3c0SIlya Dryomov 	}
35275ce5b3c0SIlya Dryomov 
3528409d404bSIlya Dryomov 	/* devid filter */
3529409d404bSIlya Dryomov 	if ((bargs->flags & BTRFS_BALANCE_ARGS_DEVID) &&
3530409d404bSIlya Dryomov 	    chunk_devid_filter(leaf, chunk, bargs)) {
3531409d404bSIlya Dryomov 		return 0;
3532409d404bSIlya Dryomov 	}
3533409d404bSIlya Dryomov 
353494e60d5aSIlya Dryomov 	/* drange filter, makes sense only with devid filter */
353594e60d5aSIlya Dryomov 	if ((bargs->flags & BTRFS_BALANCE_ARGS_DRANGE) &&
3536e4ff5fb5SNikolay Borisov 	    chunk_drange_filter(leaf, chunk, bargs)) {
353794e60d5aSIlya Dryomov 		return 0;
353894e60d5aSIlya Dryomov 	}
353994e60d5aSIlya Dryomov 
3540ea67176aSIlya Dryomov 	/* vrange filter */
3541ea67176aSIlya Dryomov 	if ((bargs->flags & BTRFS_BALANCE_ARGS_VRANGE) &&
3542ea67176aSIlya Dryomov 	    chunk_vrange_filter(leaf, chunk, chunk_offset, bargs)) {
3543ea67176aSIlya Dryomov 		return 0;
3544ea67176aSIlya Dryomov 	}
3545ea67176aSIlya Dryomov 
3546dee32d0aSGabríel Arthúr Pétursson 	/* stripes filter */
3547dee32d0aSGabríel Arthúr Pétursson 	if ((bargs->flags & BTRFS_BALANCE_ARGS_STRIPES_RANGE) &&
3548dee32d0aSGabríel Arthúr Pétursson 	    chunk_stripes_range_filter(leaf, chunk, bargs)) {
3549dee32d0aSGabríel Arthúr Pétursson 		return 0;
3550dee32d0aSGabríel Arthúr Pétursson 	}
3551dee32d0aSGabríel Arthúr Pétursson 
3552cfa4c961SIlya Dryomov 	/* soft profile changing mode */
3553cfa4c961SIlya Dryomov 	if ((bargs->flags & BTRFS_BALANCE_ARGS_SOFT) &&
3554cfa4c961SIlya Dryomov 	    chunk_soft_convert_filter(chunk_type, bargs)) {
3555cfa4c961SIlya Dryomov 		return 0;
3556cfa4c961SIlya Dryomov 	}
3557cfa4c961SIlya Dryomov 
35587d824b6fSDavid Sterba 	/*
35597d824b6fSDavid Sterba 	 * limited by count, must be the last filter
35607d824b6fSDavid Sterba 	 */
35617d824b6fSDavid Sterba 	if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT)) {
35627d824b6fSDavid Sterba 		if (bargs->limit == 0)
35637d824b6fSDavid Sterba 			return 0;
35647d824b6fSDavid Sterba 		else
35657d824b6fSDavid Sterba 			bargs->limit--;
356612907fc7SDavid Sterba 	} else if ((bargs->flags & BTRFS_BALANCE_ARGS_LIMIT_RANGE)) {
356712907fc7SDavid Sterba 		/*
356812907fc7SDavid Sterba 		 * Same logic as the 'limit' filter; the minimum cannot be
356901327610SNicholas D Steeves 		 * determined here because we do not have the global information
357012907fc7SDavid Sterba 		 * about the count of all chunks that satisfy the filters.
357112907fc7SDavid Sterba 		 */
357212907fc7SDavid Sterba 		if (bargs->limit_max == 0)
357312907fc7SDavid Sterba 			return 0;
357412907fc7SDavid Sterba 		else
357512907fc7SDavid Sterba 			bargs->limit_max--;
35767d824b6fSDavid Sterba 	}
35777d824b6fSDavid Sterba 
3578f43ffb60SIlya Dryomov 	return 1;
3579f43ffb60SIlya Dryomov }
3580f43ffb60SIlya Dryomov 
3581c9e9f97bSIlya Dryomov static int __btrfs_balance(struct btrfs_fs_info *fs_info)
3582ec44a35cSChris Mason {
358319a39dceSIlya Dryomov 	struct btrfs_balance_control *bctl = fs_info->balance_ctl;
3584c9e9f97bSIlya Dryomov 	struct btrfs_root *chunk_root = fs_info->chunk_root;
3585c9e9f97bSIlya Dryomov 	struct btrfs_root *dev_root = fs_info->dev_root;
3586c9e9f97bSIlya Dryomov 	struct list_head *devices;
3587ec44a35cSChris Mason 	struct btrfs_device *device;
3588ec44a35cSChris Mason 	u64 old_size;
3589ec44a35cSChris Mason 	u64 size_to_free;
359012907fc7SDavid Sterba 	u64 chunk_type;
3591f43ffb60SIlya Dryomov 	struct btrfs_chunk *chunk;
35925a488b9dSLiu Bo 	struct btrfs_path *path = NULL;
3593ec44a35cSChris Mason 	struct btrfs_key key;
3594ec44a35cSChris Mason 	struct btrfs_key found_key;
3595c9e9f97bSIlya Dryomov 	struct btrfs_trans_handle *trans;
3596f43ffb60SIlya Dryomov 	struct extent_buffer *leaf;
3597f43ffb60SIlya Dryomov 	int slot;
3598c9e9f97bSIlya Dryomov 	int ret;
3599c9e9f97bSIlya Dryomov 	int enospc_errors = 0;
360019a39dceSIlya Dryomov 	bool counting = true;
360112907fc7SDavid Sterba 	/* The single value limit and min/max limits use the same bytes in the */
36027d824b6fSDavid Sterba 	u64 limit_data = bctl->data.limit;
36037d824b6fSDavid Sterba 	u64 limit_meta = bctl->meta.limit;
36047d824b6fSDavid Sterba 	u64 limit_sys = bctl->sys.limit;
360512907fc7SDavid Sterba 	u32 count_data = 0;
360612907fc7SDavid Sterba 	u32 count_meta = 0;
360712907fc7SDavid Sterba 	u32 count_sys = 0;
36082c9fe835SZhao Lei 	int chunk_reserved = 0;
3609ec44a35cSChris Mason 
3610ec44a35cSChris Mason 	/* step one make some room on all the devices */
3611c9e9f97bSIlya Dryomov 	devices = &fs_info->fs_devices->devices;
3612c6e30871SQinghuang Feng 	list_for_each_entry(device, devices, dev_list) {
36137cc8e58dSMiao Xie 		old_size = btrfs_device_get_total_bytes(device);
3614ec44a35cSChris Mason 		size_to_free = div_factor(old_size, 1);
3615ee22184bSByongho Lee 		size_to_free = min_t(u64, size_to_free, SZ_1M);
3616ebbede42SAnand Jain 		if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) ||
36177cc8e58dSMiao Xie 		    btrfs_device_get_total_bytes(device) -
36187cc8e58dSMiao Xie 		    btrfs_device_get_bytes_used(device) > size_to_free ||
3619401e29c1SAnand Jain 		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
3620ec44a35cSChris Mason 			continue;
3621ec44a35cSChris Mason 
3622ec44a35cSChris Mason 		ret = btrfs_shrink_device(device, old_size - size_to_free);
3623ba1bf481SJosef Bacik 		if (ret == -ENOSPC)
3624ba1bf481SJosef Bacik 			break;
36255a488b9dSLiu Bo 		if (ret) {
36265a488b9dSLiu Bo 			/* btrfs_shrink_device never returns ret > 0 */
36275a488b9dSLiu Bo 			WARN_ON(ret > 0);
36285a488b9dSLiu Bo 			goto error;
36295a488b9dSLiu Bo 		}
3630ec44a35cSChris Mason 
3631a22285a6SYan, Zheng 		trans = btrfs_start_transaction(dev_root, 0);
36325a488b9dSLiu Bo 		if (IS_ERR(trans)) {
36335a488b9dSLiu Bo 			ret = PTR_ERR(trans);
36345a488b9dSLiu Bo 			btrfs_info_in_rcu(fs_info,
36355a488b9dSLiu Bo 		 "resize: unable to start transaction after shrinking device %s (error %d), old size %llu, new size %llu",
36365a488b9dSLiu Bo 					  rcu_str_deref(device->name), ret,
36375a488b9dSLiu Bo 					  old_size, old_size - size_to_free);
36385a488b9dSLiu Bo 			goto error;
36395a488b9dSLiu Bo 		}
3640ec44a35cSChris Mason 
3641ec44a35cSChris Mason 		ret = btrfs_grow_device(trans, device, old_size);
36425a488b9dSLiu Bo 		if (ret) {
36433a45bb20SJeff Mahoney 			btrfs_end_transaction(trans);
36445a488b9dSLiu Bo 			/* btrfs_grow_device never returns ret > 0 */
36455a488b9dSLiu Bo 			WARN_ON(ret > 0);
36465a488b9dSLiu Bo 			btrfs_info_in_rcu(fs_info,
36475a488b9dSLiu Bo 		 "resize: unable to grow device after shrinking device %s (error %d), old size %llu, new size %llu",
36485a488b9dSLiu Bo 					  rcu_str_deref(device->name), ret,
36495a488b9dSLiu Bo 					  old_size, old_size - size_to_free);
36505a488b9dSLiu Bo 			goto error;
36515a488b9dSLiu Bo 		}
3652ec44a35cSChris Mason 
36533a45bb20SJeff Mahoney 		btrfs_end_transaction(trans);
3654ec44a35cSChris Mason 	}
3655ec44a35cSChris Mason 
3656ec44a35cSChris Mason 	/* step two, relocate all the chunks */
3657ec44a35cSChris Mason 	path = btrfs_alloc_path();
365817e9f796SMark Fasheh 	if (!path) {
365917e9f796SMark Fasheh 		ret = -ENOMEM;
366017e9f796SMark Fasheh 		goto error;
366117e9f796SMark Fasheh 	}
366219a39dceSIlya Dryomov 
366319a39dceSIlya Dryomov 	/* zero out stat counters */
366419a39dceSIlya Dryomov 	spin_lock(&fs_info->balance_lock);
366519a39dceSIlya Dryomov 	memset(&bctl->stat, 0, sizeof(bctl->stat));
366619a39dceSIlya Dryomov 	spin_unlock(&fs_info->balance_lock);
366719a39dceSIlya Dryomov again:
36687d824b6fSDavid Sterba 	if (!counting) {
366912907fc7SDavid Sterba 		/*
367012907fc7SDavid Sterba 		 * The single value limit and min/max limits use the same bytes
367112907fc7SDavid Sterba 		 * in the
367212907fc7SDavid Sterba 		 */
36737d824b6fSDavid Sterba 		bctl->data.limit = limit_data;
36747d824b6fSDavid Sterba 		bctl->meta.limit = limit_meta;
36757d824b6fSDavid Sterba 		bctl->sys.limit = limit_sys;
36767d824b6fSDavid Sterba 	}
3677ec44a35cSChris Mason 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
3678ec44a35cSChris Mason 	key.offset = (u64)-1;
3679ec44a35cSChris Mason 	key.type = BTRFS_CHUNK_ITEM_KEY;
3680ec44a35cSChris Mason 
3681ec44a35cSChris Mason 	while (1) {
368219a39dceSIlya Dryomov 		if ((!counting && atomic_read(&fs_info->balance_pause_req)) ||
3683a7e99c69SIlya Dryomov 		    atomic_read(&fs_info->balance_cancel_req)) {
3684837d5b6eSIlya Dryomov 			ret = -ECANCELED;
3685837d5b6eSIlya Dryomov 			goto error;
3686837d5b6eSIlya Dryomov 		}
3687837d5b6eSIlya Dryomov 
368867c5e7d4SFilipe Manana 		mutex_lock(&fs_info->delete_unused_bgs_mutex);
3689ec44a35cSChris Mason 		ret = btrfs_search_slot(NULL, chunk_root, &key, path, 0, 0);
369067c5e7d4SFilipe Manana 		if (ret < 0) {
369167c5e7d4SFilipe Manana 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3692ec44a35cSChris Mason 			goto error;
369367c5e7d4SFilipe Manana 		}
3694ec44a35cSChris Mason 
3695ec44a35cSChris Mason 		/*
3696ec44a35cSChris Mason 		 * this shouldn't happen, it means the last relocate
3697ec44a35cSChris Mason 		 * failed
3698ec44a35cSChris Mason 		 */
3699ec44a35cSChris Mason 		if (ret == 0)
3700c9e9f97bSIlya Dryomov 			BUG(); /* FIXME break ? */
3701ec44a35cSChris Mason 
3702ec44a35cSChris Mason 		ret = btrfs_previous_item(chunk_root, path, 0,
3703ec44a35cSChris Mason 					  BTRFS_CHUNK_ITEM_KEY);
3704c9e9f97bSIlya Dryomov 		if (ret) {
370567c5e7d4SFilipe Manana 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3706c9e9f97bSIlya Dryomov 			ret = 0;
3707ec44a35cSChris Mason 			break;
3708c9e9f97bSIlya Dryomov 		}
37097d9eb12cSChris Mason 
3710f43ffb60SIlya Dryomov 		leaf = path->nodes[0];
3711f43ffb60SIlya Dryomov 		slot = path->slots[0];
3712f43ffb60SIlya Dryomov 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
3713f43ffb60SIlya Dryomov 
371467c5e7d4SFilipe Manana 		if (found_key.objectid != key.objectid) {
371567c5e7d4SFilipe Manana 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3716ec44a35cSChris Mason 			break;
371767c5e7d4SFilipe Manana 		}
37187d9eb12cSChris Mason 
3719f43ffb60SIlya Dryomov 		chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
372012907fc7SDavid Sterba 		chunk_type = btrfs_chunk_type(leaf, chunk);
3721f43ffb60SIlya Dryomov 
372219a39dceSIlya Dryomov 		if (!counting) {
372319a39dceSIlya Dryomov 			spin_lock(&fs_info->balance_lock);
372419a39dceSIlya Dryomov 			bctl->stat.considered++;
372519a39dceSIlya Dryomov 			spin_unlock(&fs_info->balance_lock);
372619a39dceSIlya Dryomov 		}
372719a39dceSIlya Dryomov 
37282ff7e61eSJeff Mahoney 		ret = should_balance_chunk(fs_info, leaf, chunk,
3729f43ffb60SIlya Dryomov 					   found_key.offset);
37302c9fe835SZhao Lei 
3731b3b4aa74SDavid Sterba 		btrfs_release_path(path);
373267c5e7d4SFilipe Manana 		if (!ret) {
373367c5e7d4SFilipe Manana 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3734f43ffb60SIlya Dryomov 			goto loop;
373567c5e7d4SFilipe Manana 		}
3736f43ffb60SIlya Dryomov 
373719a39dceSIlya Dryomov 		if (counting) {
373867c5e7d4SFilipe Manana 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
373919a39dceSIlya Dryomov 			spin_lock(&fs_info->balance_lock);
374019a39dceSIlya Dryomov 			bctl->stat.expected++;
374119a39dceSIlya Dryomov 			spin_unlock(&fs_info->balance_lock);
374212907fc7SDavid Sterba 
374312907fc7SDavid Sterba 			if (chunk_type & BTRFS_BLOCK_GROUP_DATA)
374412907fc7SDavid Sterba 				count_data++;
374512907fc7SDavid Sterba 			else if (chunk_type & BTRFS_BLOCK_GROUP_SYSTEM)
374612907fc7SDavid Sterba 				count_sys++;
374712907fc7SDavid Sterba 			else if (chunk_type & BTRFS_BLOCK_GROUP_METADATA)
374812907fc7SDavid Sterba 				count_meta++;
374912907fc7SDavid Sterba 
375012907fc7SDavid Sterba 			goto loop;
375112907fc7SDavid Sterba 		}
375212907fc7SDavid Sterba 
375312907fc7SDavid Sterba 		/*
375412907fc7SDavid Sterba 		 * Apply limit_min filter, no need to check if the LIMITS
375512907fc7SDavid Sterba 		 * filter is used, limit_min is 0 by default
375612907fc7SDavid Sterba 		 */
375712907fc7SDavid Sterba 		if (((chunk_type & BTRFS_BLOCK_GROUP_DATA) &&
375812907fc7SDavid Sterba 					count_data < bctl->data.limit_min)
375912907fc7SDavid Sterba 				|| ((chunk_type & BTRFS_BLOCK_GROUP_METADATA) &&
376012907fc7SDavid Sterba 					count_meta < bctl->meta.limit_min)
376112907fc7SDavid Sterba 				|| ((chunk_type & BTRFS_BLOCK_GROUP_SYSTEM) &&
376212907fc7SDavid Sterba 					count_sys < bctl->sys.limit_min)) {
376312907fc7SDavid Sterba 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
376419a39dceSIlya Dryomov 			goto loop;
376519a39dceSIlya Dryomov 		}
376619a39dceSIlya Dryomov 
3767*a6f93c71SLiu Bo 		if (!chunk_reserved) {
3768*a6f93c71SLiu Bo 			/*
3769*a6f93c71SLiu Bo 			 * We may be relocating the only data chunk we have,
3770*a6f93c71SLiu Bo 			 * which could potentially end up with losing data's
3771*a6f93c71SLiu Bo 			 * raid profile, so lets allocate an empty one in
3772*a6f93c71SLiu Bo 			 * advance.
3773*a6f93c71SLiu Bo 			 */
3774*a6f93c71SLiu Bo 			ret = btrfs_may_alloc_data_chunk(fs_info,
3775*a6f93c71SLiu Bo 							 found_key.offset);
37762c9fe835SZhao Lei 			if (ret < 0) {
37772c9fe835SZhao Lei 				mutex_unlock(&fs_info->delete_unused_bgs_mutex);
37782c9fe835SZhao Lei 				goto error;
3779*a6f93c71SLiu Bo 			} else if (ret == 1) {
37802c9fe835SZhao Lei 				chunk_reserved = 1;
37812c9fe835SZhao Lei 			}
3782*a6f93c71SLiu Bo 		}
37832c9fe835SZhao Lei 
37845b4aacefSJeff Mahoney 		ret = btrfs_relocate_chunk(fs_info, found_key.offset);
378567c5e7d4SFilipe Manana 		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
3786508794ebSJosef Bacik 		if (ret && ret != -ENOSPC)
3787508794ebSJosef Bacik 			goto error;
378819a39dceSIlya Dryomov 		if (ret == -ENOSPC) {
3789c9e9f97bSIlya Dryomov 			enospc_errors++;
379019a39dceSIlya Dryomov 		} else {
379119a39dceSIlya Dryomov 			spin_lock(&fs_info->balance_lock);
379219a39dceSIlya Dryomov 			bctl->stat.completed++;
379319a39dceSIlya Dryomov 			spin_unlock(&fs_info->balance_lock);
379419a39dceSIlya Dryomov 		}
3795f43ffb60SIlya Dryomov loop:
3796795a3321SIlya Dryomov 		if (found_key.offset == 0)
3797795a3321SIlya Dryomov 			break;
3798ba1bf481SJosef Bacik 		key.offset = found_key.offset - 1;
3799ec44a35cSChris Mason 	}
3800c9e9f97bSIlya Dryomov 
380119a39dceSIlya Dryomov 	if (counting) {
380219a39dceSIlya Dryomov 		btrfs_release_path(path);
380319a39dceSIlya Dryomov 		counting = false;
380419a39dceSIlya Dryomov 		goto again;
380519a39dceSIlya Dryomov 	}
3806ec44a35cSChris Mason error:
3807ec44a35cSChris Mason 	btrfs_free_path(path);
3808c9e9f97bSIlya Dryomov 	if (enospc_errors) {
3809efe120a0SFrank Holton 		btrfs_info(fs_info, "%d enospc errors during balance",
3810c9e9f97bSIlya Dryomov 			   enospc_errors);
3811c9e9f97bSIlya Dryomov 		if (!ret)
3812c9e9f97bSIlya Dryomov 			ret = -ENOSPC;
3813c9e9f97bSIlya Dryomov 	}
3814c9e9f97bSIlya Dryomov 
3815ec44a35cSChris Mason 	return ret;
3816ec44a35cSChris Mason }
3817ec44a35cSChris Mason 
38180c460c0dSIlya Dryomov /**
38190c460c0dSIlya Dryomov  * alloc_profile_is_valid - see if a given profile is valid and reduced
38200c460c0dSIlya Dryomov  * @flags: profile to validate
38210c460c0dSIlya Dryomov  * @extended: if true @flags is treated as an extended profile
38220c460c0dSIlya Dryomov  */
38230c460c0dSIlya Dryomov static int alloc_profile_is_valid(u64 flags, int extended)
38240c460c0dSIlya Dryomov {
38250c460c0dSIlya Dryomov 	u64 mask = (extended ? BTRFS_EXTENDED_PROFILE_MASK :
38260c460c0dSIlya Dryomov 			       BTRFS_BLOCK_GROUP_PROFILE_MASK);
38270c460c0dSIlya Dryomov 
38280c460c0dSIlya Dryomov 	flags &= ~BTRFS_BLOCK_GROUP_TYPE_MASK;
38290c460c0dSIlya Dryomov 
38300c460c0dSIlya Dryomov 	/* 1) check that all other bits are zeroed */
38310c460c0dSIlya Dryomov 	if (flags & ~mask)
38320c460c0dSIlya Dryomov 		return 0;
38330c460c0dSIlya Dryomov 
38340c460c0dSIlya Dryomov 	/* 2) see if profile is reduced */
38350c460c0dSIlya Dryomov 	if (flags == 0)
38360c460c0dSIlya Dryomov 		return !extended; /* "0" is valid for usual profiles */
38370c460c0dSIlya Dryomov 
38380c460c0dSIlya Dryomov 	/* true if exactly one bit set */
38390c460c0dSIlya Dryomov 	return (flags & (flags - 1)) == 0;
38400c460c0dSIlya Dryomov }
38410c460c0dSIlya Dryomov 
3842837d5b6eSIlya Dryomov static inline int balance_need_close(struct btrfs_fs_info *fs_info)
3843837d5b6eSIlya Dryomov {
3844a7e99c69SIlya Dryomov 	/* cancel requested || normal exit path */
3845a7e99c69SIlya Dryomov 	return atomic_read(&fs_info->balance_cancel_req) ||
3846a7e99c69SIlya Dryomov 		(atomic_read(&fs_info->balance_pause_req) == 0 &&
3847a7e99c69SIlya Dryomov 		 atomic_read(&fs_info->balance_cancel_req) == 0);
3848837d5b6eSIlya Dryomov }
3849837d5b6eSIlya Dryomov 
3850c9e9f97bSIlya Dryomov static void __cancel_balance(struct btrfs_fs_info *fs_info)
3851c9e9f97bSIlya Dryomov {
38520940ebf6SIlya Dryomov 	int ret;
38530940ebf6SIlya Dryomov 
3854c9e9f97bSIlya Dryomov 	unset_balance_control(fs_info);
38556bccf3abSJeff Mahoney 	ret = del_balance_item(fs_info);
38560f788c58SLiu Bo 	if (ret)
385734d97007SAnand Jain 		btrfs_handle_fs_error(fs_info, ret, NULL);
3858ed0fb78fSIlya Dryomov 
3859171938e5SDavid Sterba 	clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
3860c9e9f97bSIlya Dryomov }
3861c9e9f97bSIlya Dryomov 
3862bdcd3c97SAlexandru Moise /* Non-zero return value signifies invalidity */
3863bdcd3c97SAlexandru Moise static inline int validate_convert_profile(struct btrfs_balance_args *bctl_arg,
3864bdcd3c97SAlexandru Moise 		u64 allowed)
3865bdcd3c97SAlexandru Moise {
3866bdcd3c97SAlexandru Moise 	return ((bctl_arg->flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3867bdcd3c97SAlexandru Moise 		(!alloc_profile_is_valid(bctl_arg->target, 1) ||
3868bdcd3c97SAlexandru Moise 		 (bctl_arg->target & ~allowed)));
3869bdcd3c97SAlexandru Moise }
3870bdcd3c97SAlexandru Moise 
3871c9e9f97bSIlya Dryomov /*
3872c9e9f97bSIlya Dryomov  * Should be called with both balance and volume mutexes held
3873c9e9f97bSIlya Dryomov  */
3874c9e9f97bSIlya Dryomov int btrfs_balance(struct btrfs_balance_control *bctl,
3875c9e9f97bSIlya Dryomov 		  struct btrfs_ioctl_balance_args *bargs)
3876c9e9f97bSIlya Dryomov {
3877c9e9f97bSIlya Dryomov 	struct btrfs_fs_info *fs_info = bctl->fs_info;
387814506127SAdam Borowski 	u64 meta_target, data_target;
3879f43ffb60SIlya Dryomov 	u64 allowed;
3880e4837f8fSIlya Dryomov 	int mixed = 0;
3881c9e9f97bSIlya Dryomov 	int ret;
38828dabb742SStefan Behrens 	u64 num_devices;
3883de98ced9SMiao Xie 	unsigned seq;
3884c9e9f97bSIlya Dryomov 
3885837d5b6eSIlya Dryomov 	if (btrfs_fs_closing(fs_info) ||
3886a7e99c69SIlya Dryomov 	    atomic_read(&fs_info->balance_pause_req) ||
3887a7e99c69SIlya Dryomov 	    atomic_read(&fs_info->balance_cancel_req)) {
3888c9e9f97bSIlya Dryomov 		ret = -EINVAL;
3889c9e9f97bSIlya Dryomov 		goto out;
3890c9e9f97bSIlya Dryomov 	}
3891c9e9f97bSIlya Dryomov 
3892e4837f8fSIlya Dryomov 	allowed = btrfs_super_incompat_flags(fs_info->super_copy);
3893e4837f8fSIlya Dryomov 	if (allowed & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS)
3894e4837f8fSIlya Dryomov 		mixed = 1;
3895e4837f8fSIlya Dryomov 
3896f43ffb60SIlya Dryomov 	/*
3897f43ffb60SIlya Dryomov 	 * In case of mixed groups both data and meta should be picked,
3898f43ffb60SIlya Dryomov 	 * and identical options should be given for both of them.
3899f43ffb60SIlya Dryomov 	 */
3900e4837f8fSIlya Dryomov 	allowed = BTRFS_BALANCE_DATA | BTRFS_BALANCE_METADATA;
3901e4837f8fSIlya Dryomov 	if (mixed && (bctl->flags & allowed)) {
3902f43ffb60SIlya Dryomov 		if (!(bctl->flags & BTRFS_BALANCE_DATA) ||
3903f43ffb60SIlya Dryomov 		    !(bctl->flags & BTRFS_BALANCE_METADATA) ||
3904f43ffb60SIlya Dryomov 		    memcmp(&bctl->data, &bctl->meta, sizeof(bctl->data))) {
39055d163e0eSJeff Mahoney 			btrfs_err(fs_info,
39065d163e0eSJeff Mahoney 				  "with mixed groups data and metadata balance options must be the same");
3907f43ffb60SIlya Dryomov 			ret = -EINVAL;
3908f43ffb60SIlya Dryomov 			goto out;
3909f43ffb60SIlya Dryomov 		}
3910f43ffb60SIlya Dryomov 	}
3911f43ffb60SIlya Dryomov 
39128dabb742SStefan Behrens 	num_devices = fs_info->fs_devices->num_devices;
391373beece9SLiu Bo 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
39148dabb742SStefan Behrens 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
39158dabb742SStefan Behrens 		BUG_ON(num_devices < 1);
39168dabb742SStefan Behrens 		num_devices--;
39178dabb742SStefan Behrens 	}
391873beece9SLiu Bo 	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
391988be159cSAustin S. Hemmelgarn 	allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE | BTRFS_BLOCK_GROUP_DUP;
392088be159cSAustin S. Hemmelgarn 	if (num_devices > 1)
3921e4d8ec0fSIlya Dryomov 		allowed |= (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID1);
39228250dabeSAndreas Philipp 	if (num_devices > 2)
39238250dabeSAndreas Philipp 		allowed |= BTRFS_BLOCK_GROUP_RAID5;
39248250dabeSAndreas Philipp 	if (num_devices > 3)
39258250dabeSAndreas Philipp 		allowed |= (BTRFS_BLOCK_GROUP_RAID10 |
392653b381b3SDavid Woodhouse 			    BTRFS_BLOCK_GROUP_RAID6);
3927bdcd3c97SAlexandru Moise 	if (validate_convert_profile(&bctl->data, allowed)) {
39285d163e0eSJeff Mahoney 		btrfs_err(fs_info,
39295d163e0eSJeff Mahoney 			  "unable to start balance with target data profile %llu",
3930c1c9ff7cSGeert Uytterhoeven 			  bctl->data.target);
3931e4d8ec0fSIlya Dryomov 		ret = -EINVAL;
3932e4d8ec0fSIlya Dryomov 		goto out;
3933e4d8ec0fSIlya Dryomov 	}
3934bdcd3c97SAlexandru Moise 	if (validate_convert_profile(&bctl->meta, allowed)) {
3935efe120a0SFrank Holton 		btrfs_err(fs_info,
3936efe120a0SFrank Holton 			  "unable to start balance with target metadata profile %llu",
3937c1c9ff7cSGeert Uytterhoeven 			  bctl->meta.target);
3938e4d8ec0fSIlya Dryomov 		ret = -EINVAL;
3939e4d8ec0fSIlya Dryomov 		goto out;
3940e4d8ec0fSIlya Dryomov 	}
3941bdcd3c97SAlexandru Moise 	if (validate_convert_profile(&bctl->sys, allowed)) {
3942efe120a0SFrank Holton 		btrfs_err(fs_info,
3943efe120a0SFrank Holton 			  "unable to start balance with target system profile %llu",
3944c1c9ff7cSGeert Uytterhoeven 			  bctl->sys.target);
3945e4d8ec0fSIlya Dryomov 		ret = -EINVAL;
3946e4d8ec0fSIlya Dryomov 		goto out;
3947e4d8ec0fSIlya Dryomov 	}
3948e4d8ec0fSIlya Dryomov 
3949e4d8ec0fSIlya Dryomov 	/* allow to reduce meta or sys integrity only if force set */
3950e4d8ec0fSIlya Dryomov 	allowed = BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 |
395153b381b3SDavid Woodhouse 			BTRFS_BLOCK_GROUP_RAID10 |
395253b381b3SDavid Woodhouse 			BTRFS_BLOCK_GROUP_RAID5 |
395353b381b3SDavid Woodhouse 			BTRFS_BLOCK_GROUP_RAID6;
3954de98ced9SMiao Xie 	do {
3955de98ced9SMiao Xie 		seq = read_seqbegin(&fs_info->profiles_lock);
3956de98ced9SMiao Xie 
3957e4d8ec0fSIlya Dryomov 		if (((bctl->sys.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3958e4d8ec0fSIlya Dryomov 		     (fs_info->avail_system_alloc_bits & allowed) &&
3959e4d8ec0fSIlya Dryomov 		     !(bctl->sys.target & allowed)) ||
3960e4d8ec0fSIlya Dryomov 		    ((bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) &&
3961e4d8ec0fSIlya Dryomov 		     (fs_info->avail_metadata_alloc_bits & allowed) &&
3962e4d8ec0fSIlya Dryomov 		     !(bctl->meta.target & allowed))) {
3963e4d8ec0fSIlya Dryomov 			if (bctl->flags & BTRFS_BALANCE_FORCE) {
39645d163e0eSJeff Mahoney 				btrfs_info(fs_info,
39655d163e0eSJeff Mahoney 					   "force reducing metadata integrity");
3966e4d8ec0fSIlya Dryomov 			} else {
39675d163e0eSJeff Mahoney 				btrfs_err(fs_info,
39685d163e0eSJeff Mahoney 					  "balance will reduce metadata integrity, use force if you want this");
3969e4d8ec0fSIlya Dryomov 				ret = -EINVAL;
3970e4d8ec0fSIlya Dryomov 				goto out;
3971e4d8ec0fSIlya Dryomov 			}
3972e4d8ec0fSIlya Dryomov 		}
3973de98ced9SMiao Xie 	} while (read_seqretry(&fs_info->profiles_lock, seq));
3974e4d8ec0fSIlya Dryomov 
397514506127SAdam Borowski 	/* if we're not converting, the target field is uninitialized */
397614506127SAdam Borowski 	meta_target = (bctl->meta.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
397714506127SAdam Borowski 		bctl->meta.target : fs_info->avail_metadata_alloc_bits;
397814506127SAdam Borowski 	data_target = (bctl->data.flags & BTRFS_BALANCE_ARGS_CONVERT) ?
397914506127SAdam Borowski 		bctl->data.target : fs_info->avail_data_alloc_bits;
398014506127SAdam Borowski 	if (btrfs_get_num_tolerated_disk_barrier_failures(meta_target) <
398114506127SAdam Borowski 		btrfs_get_num_tolerated_disk_barrier_failures(data_target)) {
3982ee592d07SSam Tygier 		btrfs_warn(fs_info,
3983fedc0045SFilipe Manana 			   "metadata profile 0x%llx has lower redundancy than data profile 0x%llx",
398414506127SAdam Borowski 			   meta_target, data_target);
3985ee592d07SSam Tygier 	}
3986ee592d07SSam Tygier 
39876bccf3abSJeff Mahoney 	ret = insert_balance_item(fs_info, bctl);
398859641015SIlya Dryomov 	if (ret && ret != -EEXIST)
39890940ebf6SIlya Dryomov 		goto out;
39900940ebf6SIlya Dryomov 
399159641015SIlya Dryomov 	if (!(bctl->flags & BTRFS_BALANCE_RESUME)) {
399259641015SIlya Dryomov 		BUG_ON(ret == -EEXIST);
3993c9e9f97bSIlya Dryomov 		set_balance_control(bctl);
399459641015SIlya Dryomov 	} else {
399559641015SIlya Dryomov 		BUG_ON(ret != -EEXIST);
399659641015SIlya Dryomov 		spin_lock(&fs_info->balance_lock);
399759641015SIlya Dryomov 		update_balance_args(bctl);
399859641015SIlya Dryomov 		spin_unlock(&fs_info->balance_lock);
399959641015SIlya Dryomov 	}
4000c9e9f97bSIlya Dryomov 
4001837d5b6eSIlya Dryomov 	atomic_inc(&fs_info->balance_running);
4002c9e9f97bSIlya Dryomov 	mutex_unlock(&fs_info->balance_mutex);
4003c9e9f97bSIlya Dryomov 
4004c9e9f97bSIlya Dryomov 	ret = __btrfs_balance(fs_info);
4005c9e9f97bSIlya Dryomov 
4006c9e9f97bSIlya Dryomov 	mutex_lock(&fs_info->balance_mutex);
4007837d5b6eSIlya Dryomov 	atomic_dec(&fs_info->balance_running);
4008c9e9f97bSIlya Dryomov 
4009c9e9f97bSIlya Dryomov 	if (bargs) {
4010c9e9f97bSIlya Dryomov 		memset(bargs, 0, sizeof(*bargs));
401119a39dceSIlya Dryomov 		update_ioctl_balance_args(fs_info, 0, bargs);
4012c9e9f97bSIlya Dryomov 	}
4013c9e9f97bSIlya Dryomov 
40143a01aa7aSIlya Dryomov 	if ((ret && ret != -ECANCELED && ret != -ENOSPC) ||
40153a01aa7aSIlya Dryomov 	    balance_need_close(fs_info)) {
40163a01aa7aSIlya Dryomov 		__cancel_balance(fs_info);
40173a01aa7aSIlya Dryomov 	}
40183a01aa7aSIlya Dryomov 
4019837d5b6eSIlya Dryomov 	wake_up(&fs_info->balance_wait_q);
4020c9e9f97bSIlya Dryomov 
4021c9e9f97bSIlya Dryomov 	return ret;
4022c9e9f97bSIlya Dryomov out:
402359641015SIlya Dryomov 	if (bctl->flags & BTRFS_BALANCE_RESUME)
402459641015SIlya Dryomov 		__cancel_balance(fs_info);
4025ed0fb78fSIlya Dryomov 	else {
4026c9e9f97bSIlya Dryomov 		kfree(bctl);
4027171938e5SDavid Sterba 		clear_bit(BTRFS_FS_EXCL_OP, &fs_info->flags);
4028ed0fb78fSIlya Dryomov 	}
40298f18cf13SChris Mason 	return ret;
40308f18cf13SChris Mason }
40318f18cf13SChris Mason 
403259641015SIlya Dryomov static int balance_kthread(void *data)
403359641015SIlya Dryomov {
40342b6ba629SIlya Dryomov 	struct btrfs_fs_info *fs_info = data;
40359555c6c1SIlya Dryomov 	int ret = 0;
403659641015SIlya Dryomov 
403759641015SIlya Dryomov 	mutex_lock(&fs_info->volume_mutex);
403859641015SIlya Dryomov 	mutex_lock(&fs_info->balance_mutex);
403959641015SIlya Dryomov 
40402b6ba629SIlya Dryomov 	if (fs_info->balance_ctl) {
4041efe120a0SFrank Holton 		btrfs_info(fs_info, "continuing balance");
40422b6ba629SIlya Dryomov 		ret = btrfs_balance(fs_info->balance_ctl, NULL);
40439555c6c1SIlya Dryomov 	}
404459641015SIlya Dryomov 
404559641015SIlya Dryomov 	mutex_unlock(&fs_info->balance_mutex);
404659641015SIlya Dryomov 	mutex_unlock(&fs_info->volume_mutex);
40472b6ba629SIlya Dryomov 
404859641015SIlya Dryomov 	return ret;
404959641015SIlya Dryomov }
405059641015SIlya Dryomov 
40512b6ba629SIlya Dryomov int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
40522b6ba629SIlya Dryomov {
40532b6ba629SIlya Dryomov 	struct task_struct *tsk;
40542b6ba629SIlya Dryomov 
40552b6ba629SIlya Dryomov 	spin_lock(&fs_info->balance_lock);
40562b6ba629SIlya Dryomov 	if (!fs_info->balance_ctl) {
40572b6ba629SIlya Dryomov 		spin_unlock(&fs_info->balance_lock);
40582b6ba629SIlya Dryomov 		return 0;
40592b6ba629SIlya Dryomov 	}
40602b6ba629SIlya Dryomov 	spin_unlock(&fs_info->balance_lock);
40612b6ba629SIlya Dryomov 
40623cdde224SJeff Mahoney 	if (btrfs_test_opt(fs_info, SKIP_BALANCE)) {
4063efe120a0SFrank Holton 		btrfs_info(fs_info, "force skipping balance");
40642b6ba629SIlya Dryomov 		return 0;
40652b6ba629SIlya Dryomov 	}
40662b6ba629SIlya Dryomov 
40672b6ba629SIlya Dryomov 	tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
4068cd633972SSachin Kamat 	return PTR_ERR_OR_ZERO(tsk);
40692b6ba629SIlya Dryomov }
40702b6ba629SIlya Dryomov 
407168310a5eSIlya Dryomov int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
407259641015SIlya Dryomov {
407359641015SIlya Dryomov 	struct btrfs_balance_control *bctl;
407459641015SIlya Dryomov 	struct btrfs_balance_item *item;
407559641015SIlya Dryomov 	struct btrfs_disk_balance_args disk_bargs;
407659641015SIlya Dryomov 	struct btrfs_path *path;
407759641015SIlya Dryomov 	struct extent_buffer *leaf;
407859641015SIlya Dryomov 	struct btrfs_key key;
407959641015SIlya Dryomov 	int ret;
408059641015SIlya Dryomov 
408159641015SIlya Dryomov 	path = btrfs_alloc_path();
408259641015SIlya Dryomov 	if (!path)
408359641015SIlya Dryomov 		return -ENOMEM;
408459641015SIlya Dryomov 
408568310a5eSIlya Dryomov 	key.objectid = BTRFS_BALANCE_OBJECTID;
4086c479cb4fSDavid Sterba 	key.type = BTRFS_TEMPORARY_ITEM_KEY;
408768310a5eSIlya Dryomov 	key.offset = 0;
408868310a5eSIlya Dryomov 
408968310a5eSIlya Dryomov 	ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
409068310a5eSIlya Dryomov 	if (ret < 0)
409168310a5eSIlya Dryomov 		goto out;
409268310a5eSIlya Dryomov 	if (ret > 0) { /* ret = -ENOENT; */
409368310a5eSIlya Dryomov 		ret = 0;
409468310a5eSIlya Dryomov 		goto out;
409568310a5eSIlya Dryomov 	}
409668310a5eSIlya Dryomov 
409759641015SIlya Dryomov 	bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
409859641015SIlya Dryomov 	if (!bctl) {
409959641015SIlya Dryomov 		ret = -ENOMEM;
410059641015SIlya Dryomov 		goto out;
410159641015SIlya Dryomov 	}
410259641015SIlya Dryomov 
410359641015SIlya Dryomov 	leaf = path->nodes[0];
410459641015SIlya Dryomov 	item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_balance_item);
410559641015SIlya Dryomov 
410668310a5eSIlya Dryomov 	bctl->fs_info = fs_info;
410768310a5eSIlya Dryomov 	bctl->flags = btrfs_balance_flags(leaf, item);
410868310a5eSIlya Dryomov 	bctl->flags |= BTRFS_BALANCE_RESUME;
410959641015SIlya Dryomov 
411059641015SIlya Dryomov 	btrfs_balance_data(leaf, item, &disk_bargs);
411159641015SIlya Dryomov 	btrfs_disk_balance_args_to_cpu(&bctl->data, &disk_bargs);
411259641015SIlya Dryomov 	btrfs_balance_meta(leaf, item, &disk_bargs);
411359641015SIlya Dryomov 	btrfs_disk_balance_args_to_cpu(&bctl->meta, &disk_bargs);
411459641015SIlya Dryomov 	btrfs_balance_sys(leaf, item, &disk_bargs);
411559641015SIlya Dryomov 	btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
411659641015SIlya Dryomov 
4117171938e5SDavid Sterba 	WARN_ON(test_and_set_bit(BTRFS_FS_EXCL_OP, &fs_info->flags));
4118ed0fb78fSIlya Dryomov 
411968310a5eSIlya Dryomov 	mutex_lock(&fs_info->volume_mutex);
412068310a5eSIlya Dryomov 	mutex_lock(&fs_info->balance_mutex);
412159641015SIlya Dryomov 
412268310a5eSIlya Dryomov 	set_balance_control(bctl);
412368310a5eSIlya Dryomov 
412468310a5eSIlya Dryomov 	mutex_unlock(&fs_info->balance_mutex);
412568310a5eSIlya Dryomov 	mutex_unlock(&fs_info->volume_mutex);
412659641015SIlya Dryomov out:
412759641015SIlya Dryomov 	btrfs_free_path(path);
412859641015SIlya Dryomov 	return ret;
412959641015SIlya Dryomov }
413059641015SIlya Dryomov 
4131837d5b6eSIlya Dryomov int btrfs_pause_balance(struct btrfs_fs_info *fs_info)
4132837d5b6eSIlya Dryomov {
4133837d5b6eSIlya Dryomov 	int ret = 0;
4134837d5b6eSIlya Dryomov 
4135837d5b6eSIlya Dryomov 	mutex_lock(&fs_info->balance_mutex);
4136837d5b6eSIlya Dryomov 	if (!fs_info->balance_ctl) {
4137837d5b6eSIlya Dryomov 		mutex_unlock(&fs_info->balance_mutex);
4138837d5b6eSIlya Dryomov 		return -ENOTCONN;
4139837d5b6eSIlya Dryomov 	}
4140837d5b6eSIlya Dryomov 
4141837d5b6eSIlya Dryomov 	if (atomic_read(&fs_info->balance_running)) {
4142837d5b6eSIlya Dryomov 		atomic_inc(&fs_info->balance_pause_req);
4143837d5b6eSIlya Dryomov 		mutex_unlock(&fs_info->balance_mutex);
4144837d5b6eSIlya Dryomov 
4145837d5b6eSIlya Dryomov 		wait_event(fs_info->balance_wait_q,
4146837d5b6eSIlya Dryomov 			   atomic_read(&fs_info->balance_running) == 0);
4147837d5b6eSIlya Dryomov 
4148837d5b6eSIlya Dryomov 		mutex_lock(&fs_info->balance_mutex);
4149837d5b6eSIlya Dryomov 		/* we are good with balance_ctl ripped off from under us */
4150837d5b6eSIlya Dryomov 		BUG_ON(atomic_read(&fs_info->balance_running));
4151837d5b6eSIlya Dryomov 		atomic_dec(&fs_info->balance_pause_req);
4152837d5b6eSIlya Dryomov 	} else {
4153837d5b6eSIlya Dryomov 		ret = -ENOTCONN;
4154837d5b6eSIlya Dryomov 	}
4155837d5b6eSIlya Dryomov 
4156837d5b6eSIlya Dryomov 	mutex_unlock(&fs_info->balance_mutex);
4157837d5b6eSIlya Dryomov 	return ret;
4158837d5b6eSIlya Dryomov }
4159837d5b6eSIlya Dryomov 
4160a7e99c69SIlya Dryomov int btrfs_cancel_balance(struct btrfs_fs_info *fs_info)
4161a7e99c69SIlya Dryomov {
4162bc98a42cSDavid Howells 	if (sb_rdonly(fs_info->sb))
4163e649e587SIlya Dryomov 		return -EROFS;
4164e649e587SIlya Dryomov 
4165a7e99c69SIlya Dryomov 	mutex_lock(&fs_info->balance_mutex);
4166a7e99c69SIlya Dryomov 	if (!fs_info->balance_ctl) {
4167a7e99c69SIlya Dryomov 		mutex_unlock(&fs_info->balance_mutex);
4168a7e99c69SIlya Dryomov 		return -ENOTCONN;
4169a7e99c69SIlya Dryomov 	}
4170a7e99c69SIlya Dryomov 
4171a7e99c69SIlya Dryomov 	atomic_inc(&fs_info->balance_cancel_req);
4172a7e99c69SIlya Dryomov 	/*
4173a7e99c69SIlya Dryomov 	 * if we are running just wait and return, balance item is
4174a7e99c69SIlya Dryomov 	 * deleted in btrfs_balance in this case
4175a7e99c69SIlya Dryomov 	 */
4176a7e99c69SIlya Dryomov 	if (atomic_read(&fs_info->balance_running)) {
4177a7e99c69SIlya Dryomov 		mutex_unlock(&fs_info->balance_mutex);
4178a7e99c69SIlya Dryomov 		wait_event(fs_info->balance_wait_q,
4179a7e99c69SIlya Dryomov 			   atomic_read(&fs_info->balance_running) == 0);
4180a7e99c69SIlya Dryomov 		mutex_lock(&fs_info->balance_mutex);
4181a7e99c69SIlya Dryomov 	} else {
4182a7e99c69SIlya Dryomov 		/* __cancel_balance needs volume_mutex */
4183a7e99c69SIlya Dryomov 		mutex_unlock(&fs_info->balance_mutex);
4184a7e99c69SIlya Dryomov 		mutex_lock(&fs_info->volume_mutex);
4185a7e99c69SIlya Dryomov 		mutex_lock(&fs_info->balance_mutex);
4186a7e99c69SIlya Dryomov 
4187a7e99c69SIlya Dryomov 		if (fs_info->balance_ctl)
4188a7e99c69SIlya Dryomov 			__cancel_balance(fs_info);
4189a7e99c69SIlya Dryomov 
4190a7e99c69SIlya Dryomov 		mutex_unlock(&fs_info->volume_mutex);
4191a7e99c69SIlya Dryomov 	}
4192a7e99c69SIlya Dryomov 
4193a7e99c69SIlya Dryomov 	BUG_ON(fs_info->balance_ctl || atomic_read(&fs_info->balance_running));
4194a7e99c69SIlya Dryomov 	atomic_dec(&fs_info->balance_cancel_req);
4195a7e99c69SIlya Dryomov 	mutex_unlock(&fs_info->balance_mutex);
4196a7e99c69SIlya Dryomov 	return 0;
4197a7e99c69SIlya Dryomov }
4198a7e99c69SIlya Dryomov 
4199803b2f54SStefan Behrens static int btrfs_uuid_scan_kthread(void *data)
4200803b2f54SStefan Behrens {
4201803b2f54SStefan Behrens 	struct btrfs_fs_info *fs_info = data;
4202803b2f54SStefan Behrens 	struct btrfs_root *root = fs_info->tree_root;
4203803b2f54SStefan Behrens 	struct btrfs_key key;
4204803b2f54SStefan Behrens 	struct btrfs_path *path = NULL;
4205803b2f54SStefan Behrens 	int ret = 0;
4206803b2f54SStefan Behrens 	struct extent_buffer *eb;
4207803b2f54SStefan Behrens 	int slot;
4208803b2f54SStefan Behrens 	struct btrfs_root_item root_item;
4209803b2f54SStefan Behrens 	u32 item_size;
4210f45388f3SFilipe David Borba Manana 	struct btrfs_trans_handle *trans = NULL;
4211803b2f54SStefan Behrens 
4212803b2f54SStefan Behrens 	path = btrfs_alloc_path();
4213803b2f54SStefan Behrens 	if (!path) {
4214803b2f54SStefan Behrens 		ret = -ENOMEM;
4215803b2f54SStefan Behrens 		goto out;
4216803b2f54SStefan Behrens 	}
4217803b2f54SStefan Behrens 
4218803b2f54SStefan Behrens 	key.objectid = 0;
4219803b2f54SStefan Behrens 	key.type = BTRFS_ROOT_ITEM_KEY;
4220803b2f54SStefan Behrens 	key.offset = 0;
4221803b2f54SStefan Behrens 
4222803b2f54SStefan Behrens 	while (1) {
42236174d3cbSFilipe David Borba Manana 		ret = btrfs_search_forward(root, &key, path, 0);
4224803b2f54SStefan Behrens 		if (ret) {
4225803b2f54SStefan Behrens 			if (ret > 0)
4226803b2f54SStefan Behrens 				ret = 0;
4227803b2f54SStefan Behrens 			break;
4228803b2f54SStefan Behrens 		}
4229803b2f54SStefan Behrens 
4230803b2f54SStefan Behrens 		if (key.type != BTRFS_ROOT_ITEM_KEY ||
4231803b2f54SStefan Behrens 		    (key.objectid < BTRFS_FIRST_FREE_OBJECTID &&
4232803b2f54SStefan Behrens 		     key.objectid != BTRFS_FS_TREE_OBJECTID) ||
4233803b2f54SStefan Behrens 		    key.objectid > BTRFS_LAST_FREE_OBJECTID)
4234803b2f54SStefan Behrens 			goto skip;
4235803b2f54SStefan Behrens 
4236803b2f54SStefan Behrens 		eb = path->nodes[0];
4237803b2f54SStefan Behrens 		slot = path->slots[0];
4238803b2f54SStefan Behrens 		item_size = btrfs_item_size_nr(eb, slot);
4239803b2f54SStefan Behrens 		if (item_size < sizeof(root_item))
4240803b2f54SStefan Behrens 			goto skip;
4241803b2f54SStefan Behrens 
4242803b2f54SStefan Behrens 		read_extent_buffer(eb, &root_item,
4243803b2f54SStefan Behrens 				   btrfs_item_ptr_offset(eb, slot),
4244803b2f54SStefan Behrens 				   (int)sizeof(root_item));
4245803b2f54SStefan Behrens 		if (btrfs_root_refs(&root_item) == 0)
4246803b2f54SStefan Behrens 			goto skip;
4247f45388f3SFilipe David Borba Manana 
4248f45388f3SFilipe David Borba Manana 		if (!btrfs_is_empty_uuid(root_item.uuid) ||
4249f45388f3SFilipe David Borba Manana 		    !btrfs_is_empty_uuid(root_item.received_uuid)) {
4250f45388f3SFilipe David Borba Manana 			if (trans)
4251f45388f3SFilipe David Borba Manana 				goto update_tree;
4252f45388f3SFilipe David Borba Manana 
4253f45388f3SFilipe David Borba Manana 			btrfs_release_path(path);
4254803b2f54SStefan Behrens 			/*
4255803b2f54SStefan Behrens 			 * 1 - subvol uuid item
4256803b2f54SStefan Behrens 			 * 1 - received_subvol uuid item
4257803b2f54SStefan Behrens 			 */
4258803b2f54SStefan Behrens 			trans = btrfs_start_transaction(fs_info->uuid_root, 2);
4259803b2f54SStefan Behrens 			if (IS_ERR(trans)) {
4260803b2f54SStefan Behrens 				ret = PTR_ERR(trans);
4261803b2f54SStefan Behrens 				break;
4262803b2f54SStefan Behrens 			}
4263f45388f3SFilipe David Borba Manana 			continue;
4264f45388f3SFilipe David Borba Manana 		} else {
4265f45388f3SFilipe David Borba Manana 			goto skip;
4266f45388f3SFilipe David Borba Manana 		}
4267f45388f3SFilipe David Borba Manana update_tree:
4268f45388f3SFilipe David Borba Manana 		if (!btrfs_is_empty_uuid(root_item.uuid)) {
42696bccf3abSJeff Mahoney 			ret = btrfs_uuid_tree_add(trans, fs_info,
4270803b2f54SStefan Behrens 						  root_item.uuid,
4271803b2f54SStefan Behrens 						  BTRFS_UUID_KEY_SUBVOL,
4272803b2f54SStefan Behrens 						  key.objectid);
4273803b2f54SStefan Behrens 			if (ret < 0) {
4274efe120a0SFrank Holton 				btrfs_warn(fs_info, "uuid_tree_add failed %d",
4275803b2f54SStefan Behrens 					ret);
4276803b2f54SStefan Behrens 				break;
4277803b2f54SStefan Behrens 			}
4278803b2f54SStefan Behrens 		}
4279803b2f54SStefan Behrens 
4280803b2f54SStefan Behrens 		if (!btrfs_is_empty_uuid(root_item.received_uuid)) {
42816bccf3abSJeff Mahoney 			ret = btrfs_uuid_tree_add(trans, fs_info,
4282803b2f54SStefan Behrens 						  root_item.received_uuid,
4283803b2f54SStefan Behrens 						 BTRFS_UUID_KEY_RECEIVED_SUBVOL,
4284803b2f54SStefan Behrens 						  key.objectid);
4285803b2f54SStefan Behrens 			if (ret < 0) {
4286efe120a0SFrank Holton 				btrfs_warn(fs_info, "uuid_tree_add failed %d",
4287803b2f54SStefan Behrens 					ret);
4288803b2f54SStefan Behrens 				break;
4289803b2f54SStefan Behrens 			}
4290803b2f54SStefan Behrens 		}
4291803b2f54SStefan Behrens 
4292f45388f3SFilipe David Borba Manana skip:
4293803b2f54SStefan Behrens 		if (trans) {
42943a45bb20SJeff Mahoney 			ret = btrfs_end_transaction(trans);
4295f45388f3SFilipe David Borba Manana 			trans = NULL;
4296803b2f54SStefan Behrens 			if (ret)
4297803b2f54SStefan Behrens 				break;
4298803b2f54SStefan Behrens 		}
4299803b2f54SStefan Behrens 
4300803b2f54SStefan Behrens 		btrfs_release_path(path);
4301803b2f54SStefan Behrens 		if (key.offset < (u64)-1) {
4302803b2f54SStefan Behrens 			key.offset++;
4303803b2f54SStefan Behrens 		} else if (key.type < BTRFS_ROOT_ITEM_KEY) {
4304803b2f54SStefan Behrens 			key.offset = 0;
4305803b2f54SStefan Behrens 			key.type = BTRFS_ROOT_ITEM_KEY;
4306803b2f54SStefan Behrens 		} else if (key.objectid < (u64)-1) {
4307803b2f54SStefan Behrens 			key.offset = 0;
4308803b2f54SStefan Behrens 			key.type = BTRFS_ROOT_ITEM_KEY;
4309803b2f54SStefan Behrens 			key.objectid++;
4310803b2f54SStefan Behrens 		} else {
4311803b2f54SStefan Behrens 			break;
4312803b2f54SStefan Behrens 		}
4313803b2f54SStefan Behrens 		cond_resched();
4314803b2f54SStefan Behrens 	}
4315803b2f54SStefan Behrens 
4316803b2f54SStefan Behrens out:
4317803b2f54SStefan Behrens 	btrfs_free_path(path);
4318f45388f3SFilipe David Borba Manana 	if (trans && !IS_ERR(trans))
43193a45bb20SJeff Mahoney 		btrfs_end_transaction(trans);
4320803b2f54SStefan Behrens 	if (ret)
4321efe120a0SFrank Holton 		btrfs_warn(fs_info, "btrfs_uuid_scan_kthread failed %d", ret);
432270f80175SStefan Behrens 	else
4323afcdd129SJosef Bacik 		set_bit(BTRFS_FS_UPDATE_UUID_TREE_GEN, &fs_info->flags);
4324803b2f54SStefan Behrens 	up(&fs_info->uuid_tree_rescan_sem);
4325803b2f54SStefan Behrens 	return 0;
4326803b2f54SStefan Behrens }
4327803b2f54SStefan Behrens 
432870f80175SStefan Behrens /*
432970f80175SStefan Behrens  * Callback for btrfs_uuid_tree_iterate().
433070f80175SStefan Behrens  * returns:
433170f80175SStefan Behrens  * 0	check succeeded, the entry is not outdated.
4332bb7ab3b9SAdam Buchbinder  * < 0	if an error occurred.
433370f80175SStefan Behrens  * > 0	if the check failed, which means the caller shall remove the entry.
433470f80175SStefan Behrens  */
433570f80175SStefan Behrens static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
433670f80175SStefan Behrens 				       u8 *uuid, u8 type, u64 subid)
433770f80175SStefan Behrens {
433870f80175SStefan Behrens 	struct btrfs_key key;
433970f80175SStefan Behrens 	int ret = 0;
434070f80175SStefan Behrens 	struct btrfs_root *subvol_root;
434170f80175SStefan Behrens 
434270f80175SStefan Behrens 	if (type != BTRFS_UUID_KEY_SUBVOL &&
434370f80175SStefan Behrens 	    type != BTRFS_UUID_KEY_RECEIVED_SUBVOL)
434470f80175SStefan Behrens 		goto out;
434570f80175SStefan Behrens 
434670f80175SStefan Behrens 	key.objectid = subid;
434770f80175SStefan Behrens 	key.type = BTRFS_ROOT_ITEM_KEY;
434870f80175SStefan Behrens 	key.offset = (u64)-1;
434970f80175SStefan Behrens 	subvol_root = btrfs_read_fs_root_no_name(fs_info, &key);
435070f80175SStefan Behrens 	if (IS_ERR(subvol_root)) {
435170f80175SStefan Behrens 		ret = PTR_ERR(subvol_root);
435270f80175SStefan Behrens 		if (ret == -ENOENT)
435370f80175SStefan Behrens 			ret = 1;
435470f80175SStefan Behrens 		goto out;
435570f80175SStefan Behrens 	}
435670f80175SStefan Behrens 
435770f80175SStefan Behrens 	switch (type) {
435870f80175SStefan Behrens 	case BTRFS_UUID_KEY_SUBVOL:
435970f80175SStefan Behrens 		if (memcmp(uuid, subvol_root->root_item.uuid, BTRFS_UUID_SIZE))
436070f80175SStefan Behrens 			ret = 1;
436170f80175SStefan Behrens 		break;
436270f80175SStefan Behrens 	case BTRFS_UUID_KEY_RECEIVED_SUBVOL:
436370f80175SStefan Behrens 		if (memcmp(uuid, subvol_root->root_item.received_uuid,
436470f80175SStefan Behrens 			   BTRFS_UUID_SIZE))
436570f80175SStefan Behrens 			ret = 1;
436670f80175SStefan Behrens 		break;
436770f80175SStefan Behrens 	}
436870f80175SStefan Behrens 
436970f80175SStefan Behrens out:
437070f80175SStefan Behrens 	return ret;
437170f80175SStefan Behrens }
437270f80175SStefan Behrens 
437370f80175SStefan Behrens static int btrfs_uuid_rescan_kthread(void *data)
437470f80175SStefan Behrens {
437570f80175SStefan Behrens 	struct btrfs_fs_info *fs_info = (struct btrfs_fs_info *)data;
437670f80175SStefan Behrens 	int ret;
437770f80175SStefan Behrens 
437870f80175SStefan Behrens 	/*
437970f80175SStefan Behrens 	 * 1st step is to iterate through the existing UUID tree and
438070f80175SStefan Behrens 	 * to delete all entries that contain outdated data.
438170f80175SStefan Behrens 	 * 2nd step is to add all missing entries to the UUID tree.
438270f80175SStefan Behrens 	 */
438370f80175SStefan Behrens 	ret = btrfs_uuid_tree_iterate(fs_info, btrfs_check_uuid_tree_entry);
438470f80175SStefan Behrens 	if (ret < 0) {
4385efe120a0SFrank Holton 		btrfs_warn(fs_info, "iterating uuid_tree failed %d", ret);
438670f80175SStefan Behrens 		up(&fs_info->uuid_tree_rescan_sem);
438770f80175SStefan Behrens 		return ret;
438870f80175SStefan Behrens 	}
438970f80175SStefan Behrens 	return btrfs_uuid_scan_kthread(data);
439070f80175SStefan Behrens }
439170f80175SStefan Behrens 
4392f7a81ea4SStefan Behrens int btrfs_create_uuid_tree(struct btrfs_fs_info *fs_info)
4393f7a81ea4SStefan Behrens {
4394f7a81ea4SStefan Behrens 	struct btrfs_trans_handle *trans;
4395f7a81ea4SStefan Behrens 	struct btrfs_root *tree_root = fs_info->tree_root;
4396f7a81ea4SStefan Behrens 	struct btrfs_root *uuid_root;
4397803b2f54SStefan Behrens 	struct task_struct *task;
4398803b2f54SStefan Behrens 	int ret;
4399f7a81ea4SStefan Behrens 
4400f7a81ea4SStefan Behrens 	/*
4401f7a81ea4SStefan Behrens 	 * 1 - root node
4402f7a81ea4SStefan Behrens 	 * 1 - root item
4403f7a81ea4SStefan Behrens 	 */
4404f7a81ea4SStefan Behrens 	trans = btrfs_start_transaction(tree_root, 2);
4405f7a81ea4SStefan Behrens 	if (IS_ERR(trans))
4406f7a81ea4SStefan Behrens 		return PTR_ERR(trans);
4407f7a81ea4SStefan Behrens 
4408f7a81ea4SStefan Behrens 	uuid_root = btrfs_create_tree(trans, fs_info,
4409f7a81ea4SStefan Behrens 				      BTRFS_UUID_TREE_OBJECTID);
4410f7a81ea4SStefan Behrens 	if (IS_ERR(uuid_root)) {
44116d13f549SDavid Sterba 		ret = PTR_ERR(uuid_root);
441266642832SJeff Mahoney 		btrfs_abort_transaction(trans, ret);
44133a45bb20SJeff Mahoney 		btrfs_end_transaction(trans);
44146d13f549SDavid Sterba 		return ret;
4415f7a81ea4SStefan Behrens 	}
4416f7a81ea4SStefan Behrens 
4417f7a81ea4SStefan Behrens 	fs_info->uuid_root = uuid_root;
4418f7a81ea4SStefan Behrens 
44193a45bb20SJeff Mahoney 	ret = btrfs_commit_transaction(trans);
4420803b2f54SStefan Behrens 	if (ret)
4421803b2f54SStefan Behrens 		return ret;
4422803b2f54SStefan Behrens 
4423803b2f54SStefan Behrens 	down(&fs_info->uuid_tree_rescan_sem);
4424803b2f54SStefan Behrens 	task = kthread_run(btrfs_uuid_scan_kthread, fs_info, "btrfs-uuid");
4425803b2f54SStefan Behrens 	if (IS_ERR(task)) {
442670f80175SStefan Behrens 		/* fs_info->update_uuid_tree_gen remains 0 in all error case */
4427efe120a0SFrank Holton 		btrfs_warn(fs_info, "failed to start uuid_scan task");
4428803b2f54SStefan Behrens 		up(&fs_info->uuid_tree_rescan_sem);
4429803b2f54SStefan Behrens 		return PTR_ERR(task);
4430f7a81ea4SStefan Behrens 	}
4431803b2f54SStefan Behrens 
4432803b2f54SStefan Behrens 	return 0;
4433803b2f54SStefan Behrens }
4434803b2f54SStefan Behrens 
443570f80175SStefan Behrens int btrfs_check_uuid_tree(struct btrfs_fs_info *fs_info)
443670f80175SStefan Behrens {
443770f80175SStefan Behrens 	struct task_struct *task;
443870f80175SStefan Behrens 
443970f80175SStefan Behrens 	down(&fs_info->uuid_tree_rescan_sem);
444070f80175SStefan Behrens 	task = kthread_run(btrfs_uuid_rescan_kthread, fs_info, "btrfs-uuid");
444170f80175SStefan Behrens 	if (IS_ERR(task)) {
444270f80175SStefan Behrens 		/* fs_info->update_uuid_tree_gen remains 0 in all error case */
4443efe120a0SFrank Holton 		btrfs_warn(fs_info, "failed to start uuid_rescan task");
444470f80175SStefan Behrens 		up(&fs_info->uuid_tree_rescan_sem);
444570f80175SStefan Behrens 		return PTR_ERR(task);
444670f80175SStefan Behrens 	}
444770f80175SStefan Behrens 
444870f80175SStefan Behrens 	return 0;
444970f80175SStefan Behrens }
445070f80175SStefan Behrens 
44518f18cf13SChris Mason /*
44528f18cf13SChris Mason  * shrinking a device means finding all of the device extents past
44538f18cf13SChris Mason  * the new size, and then following the back refs to the chunks.
44548f18cf13SChris Mason  * The chunk relocation code actually frees the device extent
44558f18cf13SChris Mason  */
44568f18cf13SChris Mason int btrfs_shrink_device(struct btrfs_device *device, u64 new_size)
44578f18cf13SChris Mason {
44580b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
44590b246afaSJeff Mahoney 	struct btrfs_root *root = fs_info->dev_root;
44608f18cf13SChris Mason 	struct btrfs_trans_handle *trans;
44618f18cf13SChris Mason 	struct btrfs_dev_extent *dev_extent = NULL;
44628f18cf13SChris Mason 	struct btrfs_path *path;
44638f18cf13SChris Mason 	u64 length;
44648f18cf13SChris Mason 	u64 chunk_offset;
44658f18cf13SChris Mason 	int ret;
44668f18cf13SChris Mason 	int slot;
4467ba1bf481SJosef Bacik 	int failed = 0;
4468ba1bf481SJosef Bacik 	bool retried = false;
446953e489bcSFilipe Manana 	bool checked_pending_chunks = false;
44708f18cf13SChris Mason 	struct extent_buffer *l;
44718f18cf13SChris Mason 	struct btrfs_key key;
44720b246afaSJeff Mahoney 	struct btrfs_super_block *super_copy = fs_info->super_copy;
44738f18cf13SChris Mason 	u64 old_total = btrfs_super_total_bytes(super_copy);
44747cc8e58dSMiao Xie 	u64 old_size = btrfs_device_get_total_bytes(device);
44757dfb8be1SNikolay Borisov 	u64 diff;
44767dfb8be1SNikolay Borisov 
44777dfb8be1SNikolay Borisov 	new_size = round_down(new_size, fs_info->sectorsize);
44780e4324a4SNikolay Borisov 	diff = round_down(old_size - new_size, fs_info->sectorsize);
44798f18cf13SChris Mason 
4480401e29c1SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
448163a212abSStefan Behrens 		return -EINVAL;
448263a212abSStefan Behrens 
44838f18cf13SChris Mason 	path = btrfs_alloc_path();
44848f18cf13SChris Mason 	if (!path)
44858f18cf13SChris Mason 		return -ENOMEM;
44868f18cf13SChris Mason 
4487e4058b54SDavid Sterba 	path->reada = READA_FORWARD;
44888f18cf13SChris Mason 
448934441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
44907d9eb12cSChris Mason 
44917cc8e58dSMiao Xie 	btrfs_device_set_total_bytes(device, new_size);
4492ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
44932b82032cSYan Zheng 		device->fs_devices->total_rw_bytes -= diff;
4494a5ed45f8SNikolay Borisov 		atomic64_sub(diff, &fs_info->free_chunk_space);
44952bf64758SJosef Bacik 	}
449634441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
44978f18cf13SChris Mason 
4498ba1bf481SJosef Bacik again:
44998f18cf13SChris Mason 	key.objectid = device->devid;
45008f18cf13SChris Mason 	key.offset = (u64)-1;
45018f18cf13SChris Mason 	key.type = BTRFS_DEV_EXTENT_KEY;
45028f18cf13SChris Mason 
4503213e64daSIlya Dryomov 	do {
45040b246afaSJeff Mahoney 		mutex_lock(&fs_info->delete_unused_bgs_mutex);
45058f18cf13SChris Mason 		ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
450667c5e7d4SFilipe Manana 		if (ret < 0) {
45070b246afaSJeff Mahoney 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
45088f18cf13SChris Mason 			goto done;
450967c5e7d4SFilipe Manana 		}
45108f18cf13SChris Mason 
45118f18cf13SChris Mason 		ret = btrfs_previous_item(root, path, 0, key.type);
451267c5e7d4SFilipe Manana 		if (ret)
45130b246afaSJeff Mahoney 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
45148f18cf13SChris Mason 		if (ret < 0)
45158f18cf13SChris Mason 			goto done;
45168f18cf13SChris Mason 		if (ret) {
45178f18cf13SChris Mason 			ret = 0;
4518b3b4aa74SDavid Sterba 			btrfs_release_path(path);
4519bf1fb512SYan Zheng 			break;
45208f18cf13SChris Mason 		}
45218f18cf13SChris Mason 
45228f18cf13SChris Mason 		l = path->nodes[0];
45238f18cf13SChris Mason 		slot = path->slots[0];
45248f18cf13SChris Mason 		btrfs_item_key_to_cpu(l, &key, path->slots[0]);
45258f18cf13SChris Mason 
4526ba1bf481SJosef Bacik 		if (key.objectid != device->devid) {
45270b246afaSJeff Mahoney 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4528b3b4aa74SDavid Sterba 			btrfs_release_path(path);
4529bf1fb512SYan Zheng 			break;
4530ba1bf481SJosef Bacik 		}
45318f18cf13SChris Mason 
45328f18cf13SChris Mason 		dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent);
45338f18cf13SChris Mason 		length = btrfs_dev_extent_length(l, dev_extent);
45348f18cf13SChris Mason 
4535ba1bf481SJosef Bacik 		if (key.offset + length <= new_size) {
45360b246afaSJeff Mahoney 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4537b3b4aa74SDavid Sterba 			btrfs_release_path(path);
4538d6397baeSChris Ball 			break;
4539ba1bf481SJosef Bacik 		}
45408f18cf13SChris Mason 
45418f18cf13SChris Mason 		chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent);
4542b3b4aa74SDavid Sterba 		btrfs_release_path(path);
45438f18cf13SChris Mason 
4544*a6f93c71SLiu Bo 		/*
4545*a6f93c71SLiu Bo 		 * We may be relocating the only data chunk we have,
4546*a6f93c71SLiu Bo 		 * which could potentially end up with losing data's
4547*a6f93c71SLiu Bo 		 * raid profile, so lets allocate an empty one in
4548*a6f93c71SLiu Bo 		 * advance.
4549*a6f93c71SLiu Bo 		 */
4550*a6f93c71SLiu Bo 		ret = btrfs_may_alloc_data_chunk(fs_info, chunk_offset);
4551*a6f93c71SLiu Bo 		if (ret < 0) {
4552*a6f93c71SLiu Bo 			mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4553*a6f93c71SLiu Bo 			goto done;
4554*a6f93c71SLiu Bo 		}
4555*a6f93c71SLiu Bo 
45560b246afaSJeff Mahoney 		ret = btrfs_relocate_chunk(fs_info, chunk_offset);
45570b246afaSJeff Mahoney 		mutex_unlock(&fs_info->delete_unused_bgs_mutex);
4558ba1bf481SJosef Bacik 		if (ret && ret != -ENOSPC)
4559ba1bf481SJosef Bacik 			goto done;
4560ba1bf481SJosef Bacik 		if (ret == -ENOSPC)
4561ba1bf481SJosef Bacik 			failed++;
4562213e64daSIlya Dryomov 	} while (key.offset-- > 0);
4563ba1bf481SJosef Bacik 
4564ba1bf481SJosef Bacik 	if (failed && !retried) {
4565ba1bf481SJosef Bacik 		failed = 0;
4566ba1bf481SJosef Bacik 		retried = true;
4567ba1bf481SJosef Bacik 		goto again;
4568ba1bf481SJosef Bacik 	} else if (failed && retried) {
4569ba1bf481SJosef Bacik 		ret = -ENOSPC;
45708f18cf13SChris Mason 		goto done;
45718f18cf13SChris Mason 	}
45728f18cf13SChris Mason 
4573d6397baeSChris Ball 	/* Shrinking succeeded, else we would be at "done". */
4574a22285a6SYan, Zheng 	trans = btrfs_start_transaction(root, 0);
457598d5dc13STsutomu Itoh 	if (IS_ERR(trans)) {
457698d5dc13STsutomu Itoh 		ret = PTR_ERR(trans);
457798d5dc13STsutomu Itoh 		goto done;
457898d5dc13STsutomu Itoh 	}
457998d5dc13STsutomu Itoh 
458034441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
458153e489bcSFilipe Manana 
458253e489bcSFilipe Manana 	/*
458353e489bcSFilipe Manana 	 * We checked in the above loop all device extents that were already in
458453e489bcSFilipe Manana 	 * the device tree. However before we have updated the device's
458553e489bcSFilipe Manana 	 * total_bytes to the new size, we might have had chunk allocations that
458653e489bcSFilipe Manana 	 * have not complete yet (new block groups attached to transaction
458753e489bcSFilipe Manana 	 * handles), and therefore their device extents were not yet in the
458853e489bcSFilipe Manana 	 * device tree and we missed them in the loop above. So if we have any
458953e489bcSFilipe Manana 	 * pending chunk using a device extent that overlaps the device range
459053e489bcSFilipe Manana 	 * that we can not use anymore, commit the current transaction and
459153e489bcSFilipe Manana 	 * repeat the search on the device tree - this way we guarantee we will
459253e489bcSFilipe Manana 	 * not have chunks using device extents that end beyond 'new_size'.
459353e489bcSFilipe Manana 	 */
459453e489bcSFilipe Manana 	if (!checked_pending_chunks) {
459553e489bcSFilipe Manana 		u64 start = new_size;
459653e489bcSFilipe Manana 		u64 len = old_size - new_size;
459753e489bcSFilipe Manana 
4598499f377fSJeff Mahoney 		if (contains_pending_extent(trans->transaction, device,
4599499f377fSJeff Mahoney 					    &start, len)) {
460034441361SDavid Sterba 			mutex_unlock(&fs_info->chunk_mutex);
460153e489bcSFilipe Manana 			checked_pending_chunks = true;
460253e489bcSFilipe Manana 			failed = 0;
460353e489bcSFilipe Manana 			retried = false;
46043a45bb20SJeff Mahoney 			ret = btrfs_commit_transaction(trans);
460553e489bcSFilipe Manana 			if (ret)
460653e489bcSFilipe Manana 				goto done;
460753e489bcSFilipe Manana 			goto again;
460853e489bcSFilipe Manana 		}
460953e489bcSFilipe Manana 	}
461053e489bcSFilipe Manana 
46117cc8e58dSMiao Xie 	btrfs_device_set_disk_total_bytes(device, new_size);
4612935e5cc9SMiao Xie 	if (list_empty(&device->resized_list))
4613935e5cc9SMiao Xie 		list_add_tail(&device->resized_list,
46140b246afaSJeff Mahoney 			      &fs_info->fs_devices->resized_devices);
4615d6397baeSChris Ball 
4616d6397baeSChris Ball 	WARN_ON(diff > old_total);
46177dfb8be1SNikolay Borisov 	btrfs_set_super_total_bytes(super_copy,
46187dfb8be1SNikolay Borisov 			round_down(old_total - diff, fs_info->sectorsize));
461934441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
46202196d6e8SMiao Xie 
46212196d6e8SMiao Xie 	/* Now btrfs_update_device() will change the on-disk size. */
46222196d6e8SMiao Xie 	ret = btrfs_update_device(trans, device);
46233a45bb20SJeff Mahoney 	btrfs_end_transaction(trans);
46248f18cf13SChris Mason done:
46258f18cf13SChris Mason 	btrfs_free_path(path);
462653e489bcSFilipe Manana 	if (ret) {
462734441361SDavid Sterba 		mutex_lock(&fs_info->chunk_mutex);
462853e489bcSFilipe Manana 		btrfs_device_set_total_bytes(device, old_size);
4629ebbede42SAnand Jain 		if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
463053e489bcSFilipe Manana 			device->fs_devices->total_rw_bytes += diff;
4631a5ed45f8SNikolay Borisov 		atomic64_add(diff, &fs_info->free_chunk_space);
463234441361SDavid Sterba 		mutex_unlock(&fs_info->chunk_mutex);
463353e489bcSFilipe Manana 	}
46348f18cf13SChris Mason 	return ret;
46358f18cf13SChris Mason }
46368f18cf13SChris Mason 
46372ff7e61eSJeff Mahoney static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
46380b86a832SChris Mason 			   struct btrfs_key *key,
46390b86a832SChris Mason 			   struct btrfs_chunk *chunk, int item_size)
46400b86a832SChris Mason {
46410b246afaSJeff Mahoney 	struct btrfs_super_block *super_copy = fs_info->super_copy;
46420b86a832SChris Mason 	struct btrfs_disk_key disk_key;
46430b86a832SChris Mason 	u32 array_size;
46440b86a832SChris Mason 	u8 *ptr;
46450b86a832SChris Mason 
464634441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
46470b86a832SChris Mason 	array_size = btrfs_super_sys_array_size(super_copy);
46485f43f86eSGui Hecheng 	if (array_size + item_size + sizeof(disk_key)
4649fe48a5c0SMiao Xie 			> BTRFS_SYSTEM_CHUNK_ARRAY_SIZE) {
465034441361SDavid Sterba 		mutex_unlock(&fs_info->chunk_mutex);
46510b86a832SChris Mason 		return -EFBIG;
4652fe48a5c0SMiao Xie 	}
46530b86a832SChris Mason 
46540b86a832SChris Mason 	ptr = super_copy->sys_chunk_array + array_size;
46550b86a832SChris Mason 	btrfs_cpu_key_to_disk(&disk_key, key);
46560b86a832SChris Mason 	memcpy(ptr, &disk_key, sizeof(disk_key));
46570b86a832SChris Mason 	ptr += sizeof(disk_key);
46580b86a832SChris Mason 	memcpy(ptr, chunk, item_size);
46590b86a832SChris Mason 	item_size += sizeof(disk_key);
46600b86a832SChris Mason 	btrfs_set_super_sys_array_size(super_copy, array_size + item_size);
466134441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
4662fe48a5c0SMiao Xie 
46630b86a832SChris Mason 	return 0;
46640b86a832SChris Mason }
46650b86a832SChris Mason 
46669f680ce0SChris Mason /*
466773c5de00SArne Jansen  * sort the devices in descending order by max_avail, total_avail
46689f680ce0SChris Mason  */
466973c5de00SArne Jansen static int btrfs_cmp_device_info(const void *a, const void *b)
46702b82032cSYan Zheng {
467173c5de00SArne Jansen 	const struct btrfs_device_info *di_a = a;
467273c5de00SArne Jansen 	const struct btrfs_device_info *di_b = b;
46732b82032cSYan Zheng 
467473c5de00SArne Jansen 	if (di_a->max_avail > di_b->max_avail)
4675a40a90a0SChris Mason 		return -1;
467673c5de00SArne Jansen 	if (di_a->max_avail < di_b->max_avail)
46779b3f68b9SChris Mason 		return 1;
467873c5de00SArne Jansen 	if (di_a->total_avail > di_b->total_avail)
467973c5de00SArne Jansen 		return -1;
468073c5de00SArne Jansen 	if (di_a->total_avail < di_b->total_avail)
468173c5de00SArne Jansen 		return 1;
4682b2117a39SMiao Xie 	return 0;
4683b2117a39SMiao Xie }
4684b2117a39SMiao Xie 
468553b381b3SDavid Woodhouse static void check_raid56_incompat_flag(struct btrfs_fs_info *info, u64 type)
468653b381b3SDavid Woodhouse {
4687ffe2d203SZhao Lei 	if (!(type & BTRFS_BLOCK_GROUP_RAID56_MASK))
468853b381b3SDavid Woodhouse 		return;
468953b381b3SDavid Woodhouse 
4690ceda0864SMiao Xie 	btrfs_set_fs_incompat(info, RAID56);
469153b381b3SDavid Woodhouse }
469253b381b3SDavid Woodhouse 
4693da17066cSJeff Mahoney #define BTRFS_MAX_DEVS(r) ((BTRFS_MAX_ITEM_SIZE(r->fs_info)		\
469423f8f9b7SGui Hecheng 			- sizeof(struct btrfs_chunk))		\
469523f8f9b7SGui Hecheng 			/ sizeof(struct btrfs_stripe) + 1)
469623f8f9b7SGui Hecheng 
469723f8f9b7SGui Hecheng #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE	\
469823f8f9b7SGui Hecheng 				- 2 * sizeof(struct btrfs_disk_key)	\
469923f8f9b7SGui Hecheng 				- 2 * sizeof(struct btrfs_chunk))	\
470023f8f9b7SGui Hecheng 				/ sizeof(struct btrfs_stripe) + 1)
470123f8f9b7SGui Hecheng 
4702b2117a39SMiao Xie static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
470372b468c8SDavid Sterba 			       u64 start, u64 type)
4704b2117a39SMiao Xie {
47052ff7e61eSJeff Mahoney 	struct btrfs_fs_info *info = trans->fs_info;
4706b2117a39SMiao Xie 	struct btrfs_fs_devices *fs_devices = info->fs_devices;
4707ebcc9301SNikolay Borisov 	struct btrfs_device *device;
470873c5de00SArne Jansen 	struct map_lookup *map = NULL;
4709b2117a39SMiao Xie 	struct extent_map_tree *em_tree;
4710b2117a39SMiao Xie 	struct extent_map *em;
471173c5de00SArne Jansen 	struct btrfs_device_info *devices_info = NULL;
471273c5de00SArne Jansen 	u64 total_avail;
471373c5de00SArne Jansen 	int num_stripes;	/* total number of stripes to allocate */
471453b381b3SDavid Woodhouse 	int data_stripes;	/* number of stripes that count for
471553b381b3SDavid Woodhouse 				   block group size */
471673c5de00SArne Jansen 	int sub_stripes;	/* sub_stripes info for map */
471773c5de00SArne Jansen 	int dev_stripes;	/* stripes per dev */
471873c5de00SArne Jansen 	int devs_max;		/* max devs to use */
471973c5de00SArne Jansen 	int devs_min;		/* min devs needed */
472073c5de00SArne Jansen 	int devs_increment;	/* ndevs has to be a multiple of this */
472173c5de00SArne Jansen 	int ncopies;		/* how many copies to data has */
4722b2117a39SMiao Xie 	int ret;
472373c5de00SArne Jansen 	u64 max_stripe_size;
472473c5de00SArne Jansen 	u64 max_chunk_size;
472573c5de00SArne Jansen 	u64 stripe_size;
472673c5de00SArne Jansen 	u64 num_bytes;
472773c5de00SArne Jansen 	int ndevs;
472873c5de00SArne Jansen 	int i;
472973c5de00SArne Jansen 	int j;
473031e50229SLiu Bo 	int index;
4731b2117a39SMiao Xie 
47320c460c0dSIlya Dryomov 	BUG_ON(!alloc_profile_is_valid(type, 0));
473373c5de00SArne Jansen 
4734b2117a39SMiao Xie 	if (list_empty(&fs_devices->alloc_list))
4735b2117a39SMiao Xie 		return -ENOSPC;
4736b2117a39SMiao Xie 
473731e50229SLiu Bo 	index = __get_raid_index(type);
473873c5de00SArne Jansen 
473931e50229SLiu Bo 	sub_stripes = btrfs_raid_array[index].sub_stripes;
474031e50229SLiu Bo 	dev_stripes = btrfs_raid_array[index].dev_stripes;
474131e50229SLiu Bo 	devs_max = btrfs_raid_array[index].devs_max;
474231e50229SLiu Bo 	devs_min = btrfs_raid_array[index].devs_min;
474331e50229SLiu Bo 	devs_increment = btrfs_raid_array[index].devs_increment;
474431e50229SLiu Bo 	ncopies = btrfs_raid_array[index].ncopies;
474573c5de00SArne Jansen 
474673c5de00SArne Jansen 	if (type & BTRFS_BLOCK_GROUP_DATA) {
4747ee22184bSByongho Lee 		max_stripe_size = SZ_1G;
474873c5de00SArne Jansen 		max_chunk_size = 10 * max_stripe_size;
474923f8f9b7SGui Hecheng 		if (!devs_max)
475023f8f9b7SGui Hecheng 			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
475173c5de00SArne Jansen 	} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
47521100373fSChris Mason 		/* for larger filesystems, use larger metadata chunks */
4753ee22184bSByongho Lee 		if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
4754ee22184bSByongho Lee 			max_stripe_size = SZ_1G;
47551100373fSChris Mason 		else
4756ee22184bSByongho Lee 			max_stripe_size = SZ_256M;
475773c5de00SArne Jansen 		max_chunk_size = max_stripe_size;
475823f8f9b7SGui Hecheng 		if (!devs_max)
475923f8f9b7SGui Hecheng 			devs_max = BTRFS_MAX_DEVS(info->chunk_root);
476073c5de00SArne Jansen 	} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
4761ee22184bSByongho Lee 		max_stripe_size = SZ_32M;
476273c5de00SArne Jansen 		max_chunk_size = 2 * max_stripe_size;
476323f8f9b7SGui Hecheng 		if (!devs_max)
476423f8f9b7SGui Hecheng 			devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
476573c5de00SArne Jansen 	} else {
4766351fd353SDavid Sterba 		btrfs_err(info, "invalid chunk type 0x%llx requested",
476773c5de00SArne Jansen 		       type);
476873c5de00SArne Jansen 		BUG_ON(1);
476973c5de00SArne Jansen 	}
477073c5de00SArne Jansen 
477173c5de00SArne Jansen 	/* we don't want a chunk larger than 10% of writeable space */
477273c5de00SArne Jansen 	max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
477373c5de00SArne Jansen 			     max_chunk_size);
4774b2117a39SMiao Xie 
477531e818feSDavid Sterba 	devices_info = kcalloc(fs_devices->rw_devices, sizeof(*devices_info),
4776b2117a39SMiao Xie 			       GFP_NOFS);
4777b2117a39SMiao Xie 	if (!devices_info)
4778b2117a39SMiao Xie 		return -ENOMEM;
4779b2117a39SMiao Xie 
478073c5de00SArne Jansen 	/*
478173c5de00SArne Jansen 	 * in the first pass through the devices list, we gather information
478273c5de00SArne Jansen 	 * about the available holes on each device.
478373c5de00SArne Jansen 	 */
478473c5de00SArne Jansen 	ndevs = 0;
4785ebcc9301SNikolay Borisov 	list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) {
478673c5de00SArne Jansen 		u64 max_avail;
478773c5de00SArne Jansen 		u64 dev_offset;
478873c5de00SArne Jansen 
4789ebbede42SAnand Jain 		if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state)) {
479031b1a2bdSJulia Lawall 			WARN(1, KERN_ERR
4791efe120a0SFrank Holton 			       "BTRFS: read-only device in alloc_list\n");
479273c5de00SArne Jansen 			continue;
479373c5de00SArne Jansen 		}
479473c5de00SArne Jansen 
4795e12c9621SAnand Jain 		if (!test_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
4796e12c9621SAnand Jain 					&device->dev_state) ||
4797401e29c1SAnand Jain 		    test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state))
479873c5de00SArne Jansen 			continue;
479973c5de00SArne Jansen 
480073c5de00SArne Jansen 		if (device->total_bytes > device->bytes_used)
480173c5de00SArne Jansen 			total_avail = device->total_bytes - device->bytes_used;
480273c5de00SArne Jansen 		else
480373c5de00SArne Jansen 			total_avail = 0;
480438c01b96Sliubo 
480538c01b96Sliubo 		/* If there is no space on this device, skip it. */
480638c01b96Sliubo 		if (total_avail == 0)
480738c01b96Sliubo 			continue;
480873c5de00SArne Jansen 
48096df9a95eSJosef Bacik 		ret = find_free_dev_extent(trans, device,
481073c5de00SArne Jansen 					   max_stripe_size * dev_stripes,
481173c5de00SArne Jansen 					   &dev_offset, &max_avail);
481273c5de00SArne Jansen 		if (ret && ret != -ENOSPC)
481373c5de00SArne Jansen 			goto error;
481473c5de00SArne Jansen 
481573c5de00SArne Jansen 		if (ret == 0)
481673c5de00SArne Jansen 			max_avail = max_stripe_size * dev_stripes;
481773c5de00SArne Jansen 
481873c5de00SArne Jansen 		if (max_avail < BTRFS_STRIPE_LEN * dev_stripes)
481973c5de00SArne Jansen 			continue;
482073c5de00SArne Jansen 
4821063d006fSEric Sandeen 		if (ndevs == fs_devices->rw_devices) {
4822063d006fSEric Sandeen 			WARN(1, "%s: found more than %llu devices\n",
4823063d006fSEric Sandeen 			     __func__, fs_devices->rw_devices);
4824063d006fSEric Sandeen 			break;
4825063d006fSEric Sandeen 		}
482673c5de00SArne Jansen 		devices_info[ndevs].dev_offset = dev_offset;
482773c5de00SArne Jansen 		devices_info[ndevs].max_avail = max_avail;
482873c5de00SArne Jansen 		devices_info[ndevs].total_avail = total_avail;
482973c5de00SArne Jansen 		devices_info[ndevs].dev = device;
483073c5de00SArne Jansen 		++ndevs;
483173c5de00SArne Jansen 	}
483273c5de00SArne Jansen 
483373c5de00SArne Jansen 	/*
483473c5de00SArne Jansen 	 * now sort the devices by hole size / available space
483573c5de00SArne Jansen 	 */
483673c5de00SArne Jansen 	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
483773c5de00SArne Jansen 	     btrfs_cmp_device_info, NULL);
483873c5de00SArne Jansen 
483973c5de00SArne Jansen 	/* round down to number of usable stripes */
4840e5600fd6SNikolay Borisov 	ndevs = round_down(ndevs, devs_increment);
484173c5de00SArne Jansen 
484273c5de00SArne Jansen 	if (ndevs < devs_increment * sub_stripes || ndevs < devs_min) {
484373c5de00SArne Jansen 		ret = -ENOSPC;
484473c5de00SArne Jansen 		goto error;
484573c5de00SArne Jansen 	}
484673c5de00SArne Jansen 
4847f148ef4dSNikolay Borisov 	ndevs = min(ndevs, devs_max);
4848f148ef4dSNikolay Borisov 
484973c5de00SArne Jansen 	/*
485073c5de00SArne Jansen 	 * the primary goal is to maximize the number of stripes, so use as many
485173c5de00SArne Jansen 	 * devices as possible, even if the stripes are not maximum sized.
485273c5de00SArne Jansen 	 */
485373c5de00SArne Jansen 	stripe_size = devices_info[ndevs-1].max_avail;
485473c5de00SArne Jansen 	num_stripes = ndevs * dev_stripes;
485573c5de00SArne Jansen 
485653b381b3SDavid Woodhouse 	/*
485753b381b3SDavid Woodhouse 	 * this will have to be fixed for RAID1 and RAID10 over
485853b381b3SDavid Woodhouse 	 * more drives
485953b381b3SDavid Woodhouse 	 */
486053b381b3SDavid Woodhouse 	data_stripes = num_stripes / ncopies;
486153b381b3SDavid Woodhouse 
4862500ceed8SNikolay Borisov 	if (type & BTRFS_BLOCK_GROUP_RAID5)
486353b381b3SDavid Woodhouse 		data_stripes = num_stripes - 1;
4864500ceed8SNikolay Borisov 
4865500ceed8SNikolay Borisov 	if (type & BTRFS_BLOCK_GROUP_RAID6)
486653b381b3SDavid Woodhouse 		data_stripes = num_stripes - 2;
486786db2578SChris Mason 
486886db2578SChris Mason 	/*
486986db2578SChris Mason 	 * Use the number of data stripes to figure out how big this chunk
487086db2578SChris Mason 	 * is really going to be in terms of logical address space,
487186db2578SChris Mason 	 * and compare that answer with the max chunk size
487286db2578SChris Mason 	 */
487386db2578SChris Mason 	if (stripe_size * data_stripes > max_chunk_size) {
487486db2578SChris Mason 		u64 mask = (1ULL << 24) - 1;
4875b8b93addSDavid Sterba 
4876b8b93addSDavid Sterba 		stripe_size = div_u64(max_chunk_size, data_stripes);
487786db2578SChris Mason 
487886db2578SChris Mason 		/* bump the answer up to a 16MB boundary */
487986db2578SChris Mason 		stripe_size = (stripe_size + mask) & ~mask;
488086db2578SChris Mason 
488186db2578SChris Mason 		/* but don't go higher than the limits we found
488286db2578SChris Mason 		 * while searching for free extents
488386db2578SChris Mason 		 */
488486db2578SChris Mason 		if (stripe_size > devices_info[ndevs-1].max_avail)
488586db2578SChris Mason 			stripe_size = devices_info[ndevs-1].max_avail;
488686db2578SChris Mason 	}
488786db2578SChris Mason 
4888b8b93addSDavid Sterba 	stripe_size = div_u64(stripe_size, dev_stripes);
488937db63a4SIlya Dryomov 
489037db63a4SIlya Dryomov 	/* align to BTRFS_STRIPE_LEN */
4891500ceed8SNikolay Borisov 	stripe_size = round_down(stripe_size, BTRFS_STRIPE_LEN);
489273c5de00SArne Jansen 
4893b2117a39SMiao Xie 	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
4894b2117a39SMiao Xie 	if (!map) {
4895b2117a39SMiao Xie 		ret = -ENOMEM;
4896b2117a39SMiao Xie 		goto error;
4897b2117a39SMiao Xie 	}
4898b2117a39SMiao Xie 	map->num_stripes = num_stripes;
48999b3f68b9SChris Mason 
490073c5de00SArne Jansen 	for (i = 0; i < ndevs; ++i) {
490173c5de00SArne Jansen 		for (j = 0; j < dev_stripes; ++j) {
490273c5de00SArne Jansen 			int s = i * dev_stripes + j;
490373c5de00SArne Jansen 			map->stripes[s].dev = devices_info[i].dev;
490473c5de00SArne Jansen 			map->stripes[s].physical = devices_info[i].dev_offset +
490573c5de00SArne Jansen 						   j * stripe_size;
4906a40a90a0SChris Mason 		}
49076324fbf3SChris Mason 	}
4908500ceed8SNikolay Borisov 	map->stripe_len = BTRFS_STRIPE_LEN;
4909500ceed8SNikolay Borisov 	map->io_align = BTRFS_STRIPE_LEN;
4910500ceed8SNikolay Borisov 	map->io_width = BTRFS_STRIPE_LEN;
4911593060d7SChris Mason 	map->type = type;
4912321aecc6SChris Mason 	map->sub_stripes = sub_stripes;
49130b86a832SChris Mason 
491453b381b3SDavid Woodhouse 	num_bytes = stripe_size * data_stripes;
49150b86a832SChris Mason 
49166bccf3abSJeff Mahoney 	trace_btrfs_chunk_alloc(info, map, start, num_bytes);
49171abe9b8aSliubo 
4918172ddd60SDavid Sterba 	em = alloc_extent_map();
49192b82032cSYan Zheng 	if (!em) {
4920298a8f9cSWang Shilong 		kfree(map);
4921b2117a39SMiao Xie 		ret = -ENOMEM;
4922b2117a39SMiao Xie 		goto error;
49232b82032cSYan Zheng 	}
4924298a8f9cSWang Shilong 	set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
492595617d69SJeff Mahoney 	em->map_lookup = map;
49262b82032cSYan Zheng 	em->start = start;
492773c5de00SArne Jansen 	em->len = num_bytes;
49280b86a832SChris Mason 	em->block_start = 0;
4929c8b97818SChris Mason 	em->block_len = em->len;
49306df9a95eSJosef Bacik 	em->orig_block_len = stripe_size;
49310b86a832SChris Mason 
49320b246afaSJeff Mahoney 	em_tree = &info->mapping_tree.map_tree;
4933890871beSChris Mason 	write_lock(&em_tree->lock);
493409a2a8f9SJosef Bacik 	ret = add_extent_mapping(em_tree, em, 0);
49350f5d42b2SJosef Bacik 	if (ret) {
49361efb72a3SNikolay Borisov 		write_unlock(&em_tree->lock);
49370b86a832SChris Mason 		free_extent_map(em);
49381dd4602fSMark Fasheh 		goto error;
49390f5d42b2SJosef Bacik 	}
49402b82032cSYan Zheng 
49411efb72a3SNikolay Borisov 	list_add_tail(&em->list, &trans->transaction->pending_chunks);
49421efb72a3SNikolay Borisov 	refcount_inc(&em->refs);
49431efb72a3SNikolay Borisov 	write_unlock(&em_tree->lock);
49441efb72a3SNikolay Borisov 
49450174484dSNikolay Borisov 	ret = btrfs_make_block_group(trans, info, 0, type, start, num_bytes);
49466df9a95eSJosef Bacik 	if (ret)
49476df9a95eSJosef Bacik 		goto error_del_extent;
49482b82032cSYan Zheng 
49497cc8e58dSMiao Xie 	for (i = 0; i < map->num_stripes; i++) {
49507cc8e58dSMiao Xie 		num_bytes = map->stripes[i].dev->bytes_used + stripe_size;
49517cc8e58dSMiao Xie 		btrfs_device_set_bytes_used(map->stripes[i].dev, num_bytes);
49527cc8e58dSMiao Xie 	}
495343530c46SMiao Xie 
4954a5ed45f8SNikolay Borisov 	atomic64_sub(stripe_size * map->num_stripes, &info->free_chunk_space);
49551c116187SMiao Xie 
49560f5d42b2SJosef Bacik 	free_extent_map(em);
49570b246afaSJeff Mahoney 	check_raid56_incompat_flag(info, type);
495853b381b3SDavid Woodhouse 
4959b2117a39SMiao Xie 	kfree(devices_info);
49602b82032cSYan Zheng 	return 0;
4961b2117a39SMiao Xie 
49626df9a95eSJosef Bacik error_del_extent:
49630f5d42b2SJosef Bacik 	write_lock(&em_tree->lock);
49640f5d42b2SJosef Bacik 	remove_extent_mapping(em_tree, em);
49650f5d42b2SJosef Bacik 	write_unlock(&em_tree->lock);
49660f5d42b2SJosef Bacik 
49670f5d42b2SJosef Bacik 	/* One for our allocation */
49680f5d42b2SJosef Bacik 	free_extent_map(em);
49690f5d42b2SJosef Bacik 	/* One for the tree reference */
49700f5d42b2SJosef Bacik 	free_extent_map(em);
4971495e64f4SFilipe Manana 	/* One for the pending_chunks list reference */
4972495e64f4SFilipe Manana 	free_extent_map(em);
4973b2117a39SMiao Xie error:
4974b2117a39SMiao Xie 	kfree(devices_info);
4975b2117a39SMiao Xie 	return ret;
49762b82032cSYan Zheng }
49772b82032cSYan Zheng 
49786df9a95eSJosef Bacik int btrfs_finish_chunk_alloc(struct btrfs_trans_handle *trans,
49796bccf3abSJeff Mahoney 				struct btrfs_fs_info *fs_info,
49806df9a95eSJosef Bacik 				u64 chunk_offset, u64 chunk_size)
49812b82032cSYan Zheng {
49826bccf3abSJeff Mahoney 	struct btrfs_root *extent_root = fs_info->extent_root;
49836bccf3abSJeff Mahoney 	struct btrfs_root *chunk_root = fs_info->chunk_root;
49842b82032cSYan Zheng 	struct btrfs_key key;
49852b82032cSYan Zheng 	struct btrfs_device *device;
49862b82032cSYan Zheng 	struct btrfs_chunk *chunk;
49872b82032cSYan Zheng 	struct btrfs_stripe *stripe;
49886df9a95eSJosef Bacik 	struct extent_map *em;
49896df9a95eSJosef Bacik 	struct map_lookup *map;
49906df9a95eSJosef Bacik 	size_t item_size;
49916df9a95eSJosef Bacik 	u64 dev_offset;
49926df9a95eSJosef Bacik 	u64 stripe_size;
49936df9a95eSJosef Bacik 	int i = 0;
4994140e639fSChris Mason 	int ret = 0;
49952b82032cSYan Zheng 
4996592d92eeSLiu Bo 	em = get_chunk_map(fs_info, chunk_offset, chunk_size);
4997592d92eeSLiu Bo 	if (IS_ERR(em))
4998592d92eeSLiu Bo 		return PTR_ERR(em);
49996df9a95eSJosef Bacik 
500095617d69SJeff Mahoney 	map = em->map_lookup;
50016df9a95eSJosef Bacik 	item_size = btrfs_chunk_item_size(map->num_stripes);
50026df9a95eSJosef Bacik 	stripe_size = em->orig_block_len;
50036df9a95eSJosef Bacik 
50046df9a95eSJosef Bacik 	chunk = kzalloc(item_size, GFP_NOFS);
50056df9a95eSJosef Bacik 	if (!chunk) {
50066df9a95eSJosef Bacik 		ret = -ENOMEM;
50076df9a95eSJosef Bacik 		goto out;
50086df9a95eSJosef Bacik 	}
50096df9a95eSJosef Bacik 
501050460e37SFilipe Manana 	/*
501150460e37SFilipe Manana 	 * Take the device list mutex to prevent races with the final phase of
501250460e37SFilipe Manana 	 * a device replace operation that replaces the device object associated
501350460e37SFilipe Manana 	 * with the map's stripes, because the device object's id can change
501450460e37SFilipe Manana 	 * at any time during that final phase of the device replace operation
501550460e37SFilipe Manana 	 * (dev-replace.c:btrfs_dev_replace_finishing()).
501650460e37SFilipe Manana 	 */
50170b246afaSJeff Mahoney 	mutex_lock(&fs_info->fs_devices->device_list_mutex);
50186df9a95eSJosef Bacik 	for (i = 0; i < map->num_stripes; i++) {
50196df9a95eSJosef Bacik 		device = map->stripes[i].dev;
50206df9a95eSJosef Bacik 		dev_offset = map->stripes[i].physical;
50216df9a95eSJosef Bacik 
50222b82032cSYan Zheng 		ret = btrfs_update_device(trans, device);
50233acd3953SMark Fasheh 		if (ret)
502450460e37SFilipe Manana 			break;
5025b5d9071cSNikolay Borisov 		ret = btrfs_alloc_dev_extent(trans, device, chunk_offset,
5026b5d9071cSNikolay Borisov 					     dev_offset, stripe_size);
50276df9a95eSJosef Bacik 		if (ret)
502850460e37SFilipe Manana 			break;
502950460e37SFilipe Manana 	}
503050460e37SFilipe Manana 	if (ret) {
50310b246afaSJeff Mahoney 		mutex_unlock(&fs_info->fs_devices->device_list_mutex);
50326df9a95eSJosef Bacik 		goto out;
50332b82032cSYan Zheng 	}
50342b82032cSYan Zheng 
50352b82032cSYan Zheng 	stripe = &chunk->stripe;
50366df9a95eSJosef Bacik 	for (i = 0; i < map->num_stripes; i++) {
50376df9a95eSJosef Bacik 		device = map->stripes[i].dev;
50386df9a95eSJosef Bacik 		dev_offset = map->stripes[i].physical;
50392b82032cSYan Zheng 
50402b82032cSYan Zheng 		btrfs_set_stack_stripe_devid(stripe, device->devid);
50412b82032cSYan Zheng 		btrfs_set_stack_stripe_offset(stripe, dev_offset);
50422b82032cSYan Zheng 		memcpy(stripe->dev_uuid, device->uuid, BTRFS_UUID_SIZE);
50432b82032cSYan Zheng 		stripe++;
50442b82032cSYan Zheng 	}
50450b246afaSJeff Mahoney 	mutex_unlock(&fs_info->fs_devices->device_list_mutex);
50462b82032cSYan Zheng 
50472b82032cSYan Zheng 	btrfs_set_stack_chunk_length(chunk, chunk_size);
50482b82032cSYan Zheng 	btrfs_set_stack_chunk_owner(chunk, extent_root->root_key.objectid);
50492b82032cSYan Zheng 	btrfs_set_stack_chunk_stripe_len(chunk, map->stripe_len);
50502b82032cSYan Zheng 	btrfs_set_stack_chunk_type(chunk, map->type);
50512b82032cSYan Zheng 	btrfs_set_stack_chunk_num_stripes(chunk, map->num_stripes);
50522b82032cSYan Zheng 	btrfs_set_stack_chunk_io_align(chunk, map->stripe_len);
50532b82032cSYan Zheng 	btrfs_set_stack_chunk_io_width(chunk, map->stripe_len);
50540b246afaSJeff Mahoney 	btrfs_set_stack_chunk_sector_size(chunk, fs_info->sectorsize);
50552b82032cSYan Zheng 	btrfs_set_stack_chunk_sub_stripes(chunk, map->sub_stripes);
50562b82032cSYan Zheng 
50572b82032cSYan Zheng 	key.objectid = BTRFS_FIRST_CHUNK_TREE_OBJECTID;
50582b82032cSYan Zheng 	key.type = BTRFS_CHUNK_ITEM_KEY;
50592b82032cSYan Zheng 	key.offset = chunk_offset;
50602b82032cSYan Zheng 
50612b82032cSYan Zheng 	ret = btrfs_insert_item(trans, chunk_root, &key, chunk, item_size);
50624ed1d16eSMark Fasheh 	if (ret == 0 && map->type & BTRFS_BLOCK_GROUP_SYSTEM) {
50634ed1d16eSMark Fasheh 		/*
50644ed1d16eSMark Fasheh 		 * TODO: Cleanup of inserted chunk root in case of
50654ed1d16eSMark Fasheh 		 * failure.
50664ed1d16eSMark Fasheh 		 */
50672ff7e61eSJeff Mahoney 		ret = btrfs_add_system_chunk(fs_info, &key, chunk, item_size);
50682b82032cSYan Zheng 	}
50691abe9b8aSliubo 
50706df9a95eSJosef Bacik out:
50712b82032cSYan Zheng 	kfree(chunk);
50726df9a95eSJosef Bacik 	free_extent_map(em);
50734ed1d16eSMark Fasheh 	return ret;
50742b82032cSYan Zheng }
50752b82032cSYan Zheng 
50762b82032cSYan Zheng /*
50772b82032cSYan Zheng  * Chunk allocation falls into two parts. The first part does works
50782b82032cSYan Zheng  * that make the new allocated chunk useable, but not do any operation
50792b82032cSYan Zheng  * that modifies the chunk tree. The second part does the works that
50802b82032cSYan Zheng  * require modifying the chunk tree. This division is important for the
50812b82032cSYan Zheng  * bootstrap process of adding storage to a seed btrfs.
50822b82032cSYan Zheng  */
50832b82032cSYan Zheng int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
50842ff7e61eSJeff Mahoney 		      struct btrfs_fs_info *fs_info, u64 type)
50852b82032cSYan Zheng {
50862b82032cSYan Zheng 	u64 chunk_offset;
50872b82032cSYan Zheng 
50880b246afaSJeff Mahoney 	ASSERT(mutex_is_locked(&fs_info->chunk_mutex));
50890b246afaSJeff Mahoney 	chunk_offset = find_next_chunk(fs_info);
509072b468c8SDavid Sterba 	return __btrfs_alloc_chunk(trans, chunk_offset, type);
50912b82032cSYan Zheng }
50922b82032cSYan Zheng 
5093d397712bSChris Mason static noinline int init_first_rw_device(struct btrfs_trans_handle *trans,
5094e4a4dce7SDavid Sterba 					 struct btrfs_fs_info *fs_info)
50952b82032cSYan Zheng {
50962b82032cSYan Zheng 	u64 chunk_offset;
50972b82032cSYan Zheng 	u64 sys_chunk_offset;
50982b82032cSYan Zheng 	u64 alloc_profile;
50992b82032cSYan Zheng 	int ret;
51002b82032cSYan Zheng 
51016df9a95eSJosef Bacik 	chunk_offset = find_next_chunk(fs_info);
51021b86826dSJeff Mahoney 	alloc_profile = btrfs_metadata_alloc_profile(fs_info);
510372b468c8SDavid Sterba 	ret = __btrfs_alloc_chunk(trans, chunk_offset, alloc_profile);
510479787eaaSJeff Mahoney 	if (ret)
510579787eaaSJeff Mahoney 		return ret;
51062b82032cSYan Zheng 
51070b246afaSJeff Mahoney 	sys_chunk_offset = find_next_chunk(fs_info);
51081b86826dSJeff Mahoney 	alloc_profile = btrfs_system_alloc_profile(fs_info);
510972b468c8SDavid Sterba 	ret = __btrfs_alloc_chunk(trans, sys_chunk_offset, alloc_profile);
511079787eaaSJeff Mahoney 	return ret;
51112b82032cSYan Zheng }
51122b82032cSYan Zheng 
5113d20983b4SMiao Xie static inline int btrfs_chunk_max_errors(struct map_lookup *map)
5114d20983b4SMiao Xie {
5115d20983b4SMiao Xie 	int max_errors;
5116d20983b4SMiao Xie 
5117d20983b4SMiao Xie 	if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
5118d20983b4SMiao Xie 			 BTRFS_BLOCK_GROUP_RAID10 |
5119d20983b4SMiao Xie 			 BTRFS_BLOCK_GROUP_RAID5 |
5120d20983b4SMiao Xie 			 BTRFS_BLOCK_GROUP_DUP)) {
5121d20983b4SMiao Xie 		max_errors = 1;
5122d20983b4SMiao Xie 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID6) {
5123d20983b4SMiao Xie 		max_errors = 2;
5124d20983b4SMiao Xie 	} else {
5125d20983b4SMiao Xie 		max_errors = 0;
5126d20983b4SMiao Xie 	}
5127d20983b4SMiao Xie 
5128d20983b4SMiao Xie 	return max_errors;
51292b82032cSYan Zheng }
51302b82032cSYan Zheng 
51312ff7e61eSJeff Mahoney int btrfs_chunk_readonly(struct btrfs_fs_info *fs_info, u64 chunk_offset)
51322b82032cSYan Zheng {
51332b82032cSYan Zheng 	struct extent_map *em;
51342b82032cSYan Zheng 	struct map_lookup *map;
51352b82032cSYan Zheng 	int readonly = 0;
5136d20983b4SMiao Xie 	int miss_ndevs = 0;
51372b82032cSYan Zheng 	int i;
51382b82032cSYan Zheng 
5139592d92eeSLiu Bo 	em = get_chunk_map(fs_info, chunk_offset, 1);
5140592d92eeSLiu Bo 	if (IS_ERR(em))
51412b82032cSYan Zheng 		return 1;
51422b82032cSYan Zheng 
514395617d69SJeff Mahoney 	map = em->map_lookup;
51442b82032cSYan Zheng 	for (i = 0; i < map->num_stripes; i++) {
5145e6e674bdSAnand Jain 		if (test_bit(BTRFS_DEV_STATE_MISSING,
5146e6e674bdSAnand Jain 					&map->stripes[i].dev->dev_state)) {
5147d20983b4SMiao Xie 			miss_ndevs++;
5148d20983b4SMiao Xie 			continue;
5149d20983b4SMiao Xie 		}
5150ebbede42SAnand Jain 		if (!test_bit(BTRFS_DEV_STATE_WRITEABLE,
5151ebbede42SAnand Jain 					&map->stripes[i].dev->dev_state)) {
51522b82032cSYan Zheng 			readonly = 1;
5153d20983b4SMiao Xie 			goto end;
51542b82032cSYan Zheng 		}
51552b82032cSYan Zheng 	}
5156d20983b4SMiao Xie 
5157d20983b4SMiao Xie 	/*
5158d20983b4SMiao Xie 	 * If the number of missing devices is larger than max errors,
5159d20983b4SMiao Xie 	 * we can not write the data into that chunk successfully, so
5160d20983b4SMiao Xie 	 * set it readonly.
5161d20983b4SMiao Xie 	 */
5162d20983b4SMiao Xie 	if (miss_ndevs > btrfs_chunk_max_errors(map))
5163d20983b4SMiao Xie 		readonly = 1;
5164d20983b4SMiao Xie end:
51652b82032cSYan Zheng 	free_extent_map(em);
51662b82032cSYan Zheng 	return readonly;
51670b86a832SChris Mason }
51680b86a832SChris Mason 
51690b86a832SChris Mason void btrfs_mapping_init(struct btrfs_mapping_tree *tree)
51700b86a832SChris Mason {
5171a8067e02SDavid Sterba 	extent_map_tree_init(&tree->map_tree);
51720b86a832SChris Mason }
51730b86a832SChris Mason 
51740b86a832SChris Mason void btrfs_mapping_tree_free(struct btrfs_mapping_tree *tree)
51750b86a832SChris Mason {
51760b86a832SChris Mason 	struct extent_map *em;
51770b86a832SChris Mason 
51780b86a832SChris Mason 	while (1) {
5179890871beSChris Mason 		write_lock(&tree->map_tree.lock);
51800b86a832SChris Mason 		em = lookup_extent_mapping(&tree->map_tree, 0, (u64)-1);
51810b86a832SChris Mason 		if (em)
51820b86a832SChris Mason 			remove_extent_mapping(&tree->map_tree, em);
5183890871beSChris Mason 		write_unlock(&tree->map_tree.lock);
51840b86a832SChris Mason 		if (!em)
51850b86a832SChris Mason 			break;
51860b86a832SChris Mason 		/* once for us */
51870b86a832SChris Mason 		free_extent_map(em);
51880b86a832SChris Mason 		/* once for the tree */
51890b86a832SChris Mason 		free_extent_map(em);
51900b86a832SChris Mason 	}
51910b86a832SChris Mason }
51920b86a832SChris Mason 
51935d964051SStefan Behrens int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
5194f188591eSChris Mason {
5195f188591eSChris Mason 	struct extent_map *em;
5196f188591eSChris Mason 	struct map_lookup *map;
5197f188591eSChris Mason 	int ret;
5198f188591eSChris Mason 
5199592d92eeSLiu Bo 	em = get_chunk_map(fs_info, logical, len);
5200592d92eeSLiu Bo 	if (IS_ERR(em))
5201fb7669b5SJosef Bacik 		/*
5202592d92eeSLiu Bo 		 * We could return errors for these cases, but that could get
5203592d92eeSLiu Bo 		 * ugly and we'd probably do the same thing which is just not do
5204592d92eeSLiu Bo 		 * anything else and exit, so return 1 so the callers don't try
5205592d92eeSLiu Bo 		 * to use other copies.
5206fb7669b5SJosef Bacik 		 */
5207fb7669b5SJosef Bacik 		return 1;
5208fb7669b5SJosef Bacik 
520995617d69SJeff Mahoney 	map = em->map_lookup;
5210f188591eSChris Mason 	if (map->type & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1))
5211f188591eSChris Mason 		ret = map->num_stripes;
5212321aecc6SChris Mason 	else if (map->type & BTRFS_BLOCK_GROUP_RAID10)
5213321aecc6SChris Mason 		ret = map->sub_stripes;
521453b381b3SDavid Woodhouse 	else if (map->type & BTRFS_BLOCK_GROUP_RAID5)
521553b381b3SDavid Woodhouse 		ret = 2;
521653b381b3SDavid Woodhouse 	else if (map->type & BTRFS_BLOCK_GROUP_RAID6)
52178810f751SLiu Bo 		/*
52188810f751SLiu Bo 		 * There could be two corrupted data stripes, we need
52198810f751SLiu Bo 		 * to loop retry in order to rebuild the correct data.
52208810f751SLiu Bo 		 *
52218810f751SLiu Bo 		 * Fail a stripe at a time on every retry except the
52228810f751SLiu Bo 		 * stripe under reconstruction.
52238810f751SLiu Bo 		 */
52248810f751SLiu Bo 		ret = map->num_stripes;
5225f188591eSChris Mason 	else
5226f188591eSChris Mason 		ret = 1;
5227f188591eSChris Mason 	free_extent_map(em);
5228ad6d620eSStefan Behrens 
522973beece9SLiu Bo 	btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
52306fad823fSLiu Bo 	if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace) &&
52316fad823fSLiu Bo 	    fs_info->dev_replace.tgtdev)
5232ad6d620eSStefan Behrens 		ret++;
523373beece9SLiu Bo 	btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
5234ad6d620eSStefan Behrens 
5235f188591eSChris Mason 	return ret;
5236f188591eSChris Mason }
5237f188591eSChris Mason 
52382ff7e61eSJeff Mahoney unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
523953b381b3SDavid Woodhouse 				    u64 logical)
524053b381b3SDavid Woodhouse {
524153b381b3SDavid Woodhouse 	struct extent_map *em;
524253b381b3SDavid Woodhouse 	struct map_lookup *map;
52430b246afaSJeff Mahoney 	unsigned long len = fs_info->sectorsize;
524453b381b3SDavid Woodhouse 
5245592d92eeSLiu Bo 	em = get_chunk_map(fs_info, logical, len);
524653b381b3SDavid Woodhouse 
524769f03f13SNikolay Borisov 	if (!WARN_ON(IS_ERR(em))) {
524895617d69SJeff Mahoney 		map = em->map_lookup;
5249ffe2d203SZhao Lei 		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
525053b381b3SDavid Woodhouse 			len = map->stripe_len * nr_data_stripes(map);
525153b381b3SDavid Woodhouse 		free_extent_map(em);
525269f03f13SNikolay Borisov 	}
525353b381b3SDavid Woodhouse 	return len;
525453b381b3SDavid Woodhouse }
525553b381b3SDavid Woodhouse 
5256e4ff5fb5SNikolay Borisov int btrfs_is_parity_mirror(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
525753b381b3SDavid Woodhouse {
525853b381b3SDavid Woodhouse 	struct extent_map *em;
525953b381b3SDavid Woodhouse 	struct map_lookup *map;
526053b381b3SDavid Woodhouse 	int ret = 0;
526153b381b3SDavid Woodhouse 
5262592d92eeSLiu Bo 	em = get_chunk_map(fs_info, logical, len);
526353b381b3SDavid Woodhouse 
526469f03f13SNikolay Borisov 	if(!WARN_ON(IS_ERR(em))) {
526595617d69SJeff Mahoney 		map = em->map_lookup;
5266ffe2d203SZhao Lei 		if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK)
526753b381b3SDavid Woodhouse 			ret = 1;
526853b381b3SDavid Woodhouse 		free_extent_map(em);
526969f03f13SNikolay Borisov 	}
527053b381b3SDavid Woodhouse 	return ret;
527153b381b3SDavid Woodhouse }
527253b381b3SDavid Woodhouse 
527330d9861fSStefan Behrens static int find_live_mirror(struct btrfs_fs_info *fs_info,
527430d9861fSStefan Behrens 			    struct map_lookup *map, int first, int num,
527530d9861fSStefan Behrens 			    int optimal, int dev_replace_is_ongoing)
5276dfe25020SChris Mason {
5277dfe25020SChris Mason 	int i;
527830d9861fSStefan Behrens 	int tolerance;
527930d9861fSStefan Behrens 	struct btrfs_device *srcdev;
528030d9861fSStefan Behrens 
528130d9861fSStefan Behrens 	if (dev_replace_is_ongoing &&
528230d9861fSStefan Behrens 	    fs_info->dev_replace.cont_reading_from_srcdev_mode ==
528330d9861fSStefan Behrens 	     BTRFS_DEV_REPLACE_ITEM_CONT_READING_FROM_SRCDEV_MODE_AVOID)
528430d9861fSStefan Behrens 		srcdev = fs_info->dev_replace.srcdev;
528530d9861fSStefan Behrens 	else
528630d9861fSStefan Behrens 		srcdev = NULL;
528730d9861fSStefan Behrens 
528830d9861fSStefan Behrens 	/*
528930d9861fSStefan Behrens 	 * try to avoid the drive that is the source drive for a
529030d9861fSStefan Behrens 	 * dev-replace procedure, only choose it if no other non-missing
529130d9861fSStefan Behrens 	 * mirror is available
529230d9861fSStefan Behrens 	 */
529330d9861fSStefan Behrens 	for (tolerance = 0; tolerance < 2; tolerance++) {
529430d9861fSStefan Behrens 		if (map->stripes[optimal].dev->bdev &&
529530d9861fSStefan Behrens 		    (tolerance || map->stripes[optimal].dev != srcdev))
5296dfe25020SChris Mason 			return optimal;
5297dfe25020SChris Mason 		for (i = first; i < first + num; i++) {
529830d9861fSStefan Behrens 			if (map->stripes[i].dev->bdev &&
529930d9861fSStefan Behrens 			    (tolerance || map->stripes[i].dev != srcdev))
5300dfe25020SChris Mason 				return i;
5301dfe25020SChris Mason 		}
530230d9861fSStefan Behrens 	}
530330d9861fSStefan Behrens 
5304dfe25020SChris Mason 	/* we couldn't find one that doesn't fail.  Just return something
5305dfe25020SChris Mason 	 * and the io error handling code will clean up eventually
5306dfe25020SChris Mason 	 */
5307dfe25020SChris Mason 	return optimal;
5308dfe25020SChris Mason }
5309dfe25020SChris Mason 
531053b381b3SDavid Woodhouse static inline int parity_smaller(u64 a, u64 b)
531153b381b3SDavid Woodhouse {
531253b381b3SDavid Woodhouse 	return a > b;
531353b381b3SDavid Woodhouse }
531453b381b3SDavid Woodhouse 
531553b381b3SDavid Woodhouse /* Bubble-sort the stripe set to put the parity/syndrome stripes last */
53168e5cfb55SZhao Lei static void sort_parity_stripes(struct btrfs_bio *bbio, int num_stripes)
531753b381b3SDavid Woodhouse {
531853b381b3SDavid Woodhouse 	struct btrfs_bio_stripe s;
531953b381b3SDavid Woodhouse 	int i;
532053b381b3SDavid Woodhouse 	u64 l;
532153b381b3SDavid Woodhouse 	int again = 1;
532253b381b3SDavid Woodhouse 
532353b381b3SDavid Woodhouse 	while (again) {
532453b381b3SDavid Woodhouse 		again = 0;
5325cc7539edSZhao Lei 		for (i = 0; i < num_stripes - 1; i++) {
53268e5cfb55SZhao Lei 			if (parity_smaller(bbio->raid_map[i],
53278e5cfb55SZhao Lei 					   bbio->raid_map[i+1])) {
532853b381b3SDavid Woodhouse 				s = bbio->stripes[i];
53298e5cfb55SZhao Lei 				l = bbio->raid_map[i];
533053b381b3SDavid Woodhouse 				bbio->stripes[i] = bbio->stripes[i+1];
53318e5cfb55SZhao Lei 				bbio->raid_map[i] = bbio->raid_map[i+1];
533253b381b3SDavid Woodhouse 				bbio->stripes[i+1] = s;
53338e5cfb55SZhao Lei 				bbio->raid_map[i+1] = l;
53342c8cdd6eSMiao Xie 
533553b381b3SDavid Woodhouse 				again = 1;
533653b381b3SDavid Woodhouse 			}
533753b381b3SDavid Woodhouse 		}
533853b381b3SDavid Woodhouse 	}
533953b381b3SDavid Woodhouse }
534053b381b3SDavid Woodhouse 
53416e9606d2SZhao Lei static struct btrfs_bio *alloc_btrfs_bio(int total_stripes, int real_stripes)
53426e9606d2SZhao Lei {
53436e9606d2SZhao Lei 	struct btrfs_bio *bbio = kzalloc(
5344e57cf21eSChris Mason 		 /* the size of the btrfs_bio */
53456e9606d2SZhao Lei 		sizeof(struct btrfs_bio) +
5346e57cf21eSChris Mason 		/* plus the variable array for the stripes */
53476e9606d2SZhao Lei 		sizeof(struct btrfs_bio_stripe) * (total_stripes) +
5348e57cf21eSChris Mason 		/* plus the variable array for the tgt dev */
53496e9606d2SZhao Lei 		sizeof(int) * (real_stripes) +
5350e57cf21eSChris Mason 		/*
5351e57cf21eSChris Mason 		 * plus the raid_map, which includes both the tgt dev
5352e57cf21eSChris Mason 		 * and the stripes
5353e57cf21eSChris Mason 		 */
5354e57cf21eSChris Mason 		sizeof(u64) * (total_stripes),
5355277fb5fcSMichal Hocko 		GFP_NOFS|__GFP_NOFAIL);
53566e9606d2SZhao Lei 
53576e9606d2SZhao Lei 	atomic_set(&bbio->error, 0);
5358140475aeSElena Reshetova 	refcount_set(&bbio->refs, 1);
53596e9606d2SZhao Lei 
53606e9606d2SZhao Lei 	return bbio;
53616e9606d2SZhao Lei }
53626e9606d2SZhao Lei 
53636e9606d2SZhao Lei void btrfs_get_bbio(struct btrfs_bio *bbio)
53646e9606d2SZhao Lei {
5365140475aeSElena Reshetova 	WARN_ON(!refcount_read(&bbio->refs));
5366140475aeSElena Reshetova 	refcount_inc(&bbio->refs);
53676e9606d2SZhao Lei }
53686e9606d2SZhao Lei 
53696e9606d2SZhao Lei void btrfs_put_bbio(struct btrfs_bio *bbio)
53706e9606d2SZhao Lei {
53716e9606d2SZhao Lei 	if (!bbio)
53726e9606d2SZhao Lei 		return;
5373140475aeSElena Reshetova 	if (refcount_dec_and_test(&bbio->refs))
53746e9606d2SZhao Lei 		kfree(bbio);
53756e9606d2SZhao Lei }
53766e9606d2SZhao Lei 
53770b3d4cd3SLiu Bo /* can REQ_OP_DISCARD be sent with other REQ like REQ_OP_WRITE? */
53780b3d4cd3SLiu Bo /*
53790b3d4cd3SLiu Bo  * Please note that, discard won't be sent to target device of device
53800b3d4cd3SLiu Bo  * replace.
53810b3d4cd3SLiu Bo  */
53820b3d4cd3SLiu Bo static int __btrfs_map_block_for_discard(struct btrfs_fs_info *fs_info,
53830b3d4cd3SLiu Bo 					 u64 logical, u64 length,
53840b3d4cd3SLiu Bo 					 struct btrfs_bio **bbio_ret)
53850b3d4cd3SLiu Bo {
53860b3d4cd3SLiu Bo 	struct extent_map *em;
53870b3d4cd3SLiu Bo 	struct map_lookup *map;
53880b3d4cd3SLiu Bo 	struct btrfs_bio *bbio;
53890b3d4cd3SLiu Bo 	u64 offset;
53900b3d4cd3SLiu Bo 	u64 stripe_nr;
53910b3d4cd3SLiu Bo 	u64 stripe_nr_end;
53920b3d4cd3SLiu Bo 	u64 stripe_end_offset;
53930b3d4cd3SLiu Bo 	u64 stripe_cnt;
53940b3d4cd3SLiu Bo 	u64 stripe_len;
53950b3d4cd3SLiu Bo 	u64 stripe_offset;
53960b3d4cd3SLiu Bo 	u64 num_stripes;
53970b3d4cd3SLiu Bo 	u32 stripe_index;
53980b3d4cd3SLiu Bo 	u32 factor = 0;
53990b3d4cd3SLiu Bo 	u32 sub_stripes = 0;
54000b3d4cd3SLiu Bo 	u64 stripes_per_dev = 0;
54010b3d4cd3SLiu Bo 	u32 remaining_stripes = 0;
54020b3d4cd3SLiu Bo 	u32 last_stripe = 0;
54030b3d4cd3SLiu Bo 	int ret = 0;
54040b3d4cd3SLiu Bo 	int i;
54050b3d4cd3SLiu Bo 
54060b3d4cd3SLiu Bo 	/* discard always return a bbio */
54070b3d4cd3SLiu Bo 	ASSERT(bbio_ret);
54080b3d4cd3SLiu Bo 
54090b3d4cd3SLiu Bo 	em = get_chunk_map(fs_info, logical, length);
54100b3d4cd3SLiu Bo 	if (IS_ERR(em))
54110b3d4cd3SLiu Bo 		return PTR_ERR(em);
54120b3d4cd3SLiu Bo 
54130b3d4cd3SLiu Bo 	map = em->map_lookup;
54140b3d4cd3SLiu Bo 	/* we don't discard raid56 yet */
54150b3d4cd3SLiu Bo 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
54160b3d4cd3SLiu Bo 		ret = -EOPNOTSUPP;
54170b3d4cd3SLiu Bo 		goto out;
54180b3d4cd3SLiu Bo 	}
54190b3d4cd3SLiu Bo 
54200b3d4cd3SLiu Bo 	offset = logical - em->start;
54210b3d4cd3SLiu Bo 	length = min_t(u64, em->len - offset, length);
54220b3d4cd3SLiu Bo 
54230b3d4cd3SLiu Bo 	stripe_len = map->stripe_len;
54240b3d4cd3SLiu Bo 	/*
54250b3d4cd3SLiu Bo 	 * stripe_nr counts the total number of stripes we have to stride
54260b3d4cd3SLiu Bo 	 * to get to this block
54270b3d4cd3SLiu Bo 	 */
54280b3d4cd3SLiu Bo 	stripe_nr = div64_u64(offset, stripe_len);
54290b3d4cd3SLiu Bo 
54300b3d4cd3SLiu Bo 	/* stripe_offset is the offset of this block in its stripe */
54310b3d4cd3SLiu Bo 	stripe_offset = offset - stripe_nr * stripe_len;
54320b3d4cd3SLiu Bo 
54330b3d4cd3SLiu Bo 	stripe_nr_end = round_up(offset + length, map->stripe_len);
543442c61ab6SLiu Bo 	stripe_nr_end = div64_u64(stripe_nr_end, map->stripe_len);
54350b3d4cd3SLiu Bo 	stripe_cnt = stripe_nr_end - stripe_nr;
54360b3d4cd3SLiu Bo 	stripe_end_offset = stripe_nr_end * map->stripe_len -
54370b3d4cd3SLiu Bo 			    (offset + length);
54380b3d4cd3SLiu Bo 	/*
54390b3d4cd3SLiu Bo 	 * after this, stripe_nr is the number of stripes on this
54400b3d4cd3SLiu Bo 	 * device we have to walk to find the data, and stripe_index is
54410b3d4cd3SLiu Bo 	 * the number of our device in the stripe array
54420b3d4cd3SLiu Bo 	 */
54430b3d4cd3SLiu Bo 	num_stripes = 1;
54440b3d4cd3SLiu Bo 	stripe_index = 0;
54450b3d4cd3SLiu Bo 	if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
54460b3d4cd3SLiu Bo 			 BTRFS_BLOCK_GROUP_RAID10)) {
54470b3d4cd3SLiu Bo 		if (map->type & BTRFS_BLOCK_GROUP_RAID0)
54480b3d4cd3SLiu Bo 			sub_stripes = 1;
54490b3d4cd3SLiu Bo 		else
54500b3d4cd3SLiu Bo 			sub_stripes = map->sub_stripes;
54510b3d4cd3SLiu Bo 
54520b3d4cd3SLiu Bo 		factor = map->num_stripes / sub_stripes;
54530b3d4cd3SLiu Bo 		num_stripes = min_t(u64, map->num_stripes,
54540b3d4cd3SLiu Bo 				    sub_stripes * stripe_cnt);
54550b3d4cd3SLiu Bo 		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
54560b3d4cd3SLiu Bo 		stripe_index *= sub_stripes;
54570b3d4cd3SLiu Bo 		stripes_per_dev = div_u64_rem(stripe_cnt, factor,
54580b3d4cd3SLiu Bo 					      &remaining_stripes);
54590b3d4cd3SLiu Bo 		div_u64_rem(stripe_nr_end - 1, factor, &last_stripe);
54600b3d4cd3SLiu Bo 		last_stripe *= sub_stripes;
54610b3d4cd3SLiu Bo 	} else if (map->type & (BTRFS_BLOCK_GROUP_RAID1 |
54620b3d4cd3SLiu Bo 				BTRFS_BLOCK_GROUP_DUP)) {
54630b3d4cd3SLiu Bo 		num_stripes = map->num_stripes;
54640b3d4cd3SLiu Bo 	} else {
54650b3d4cd3SLiu Bo 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
54660b3d4cd3SLiu Bo 					&stripe_index);
54670b3d4cd3SLiu Bo 	}
54680b3d4cd3SLiu Bo 
54690b3d4cd3SLiu Bo 	bbio = alloc_btrfs_bio(num_stripes, 0);
54700b3d4cd3SLiu Bo 	if (!bbio) {
54710b3d4cd3SLiu Bo 		ret = -ENOMEM;
54720b3d4cd3SLiu Bo 		goto out;
54730b3d4cd3SLiu Bo 	}
54740b3d4cd3SLiu Bo 
54750b3d4cd3SLiu Bo 	for (i = 0; i < num_stripes; i++) {
54760b3d4cd3SLiu Bo 		bbio->stripes[i].physical =
54770b3d4cd3SLiu Bo 			map->stripes[stripe_index].physical +
54780b3d4cd3SLiu Bo 			stripe_offset + stripe_nr * map->stripe_len;
54790b3d4cd3SLiu Bo 		bbio->stripes[i].dev = map->stripes[stripe_index].dev;
54800b3d4cd3SLiu Bo 
54810b3d4cd3SLiu Bo 		if (map->type & (BTRFS_BLOCK_GROUP_RAID0 |
54820b3d4cd3SLiu Bo 				 BTRFS_BLOCK_GROUP_RAID10)) {
54830b3d4cd3SLiu Bo 			bbio->stripes[i].length = stripes_per_dev *
54840b3d4cd3SLiu Bo 				map->stripe_len;
54850b3d4cd3SLiu Bo 
54860b3d4cd3SLiu Bo 			if (i / sub_stripes < remaining_stripes)
54870b3d4cd3SLiu Bo 				bbio->stripes[i].length +=
54880b3d4cd3SLiu Bo 					map->stripe_len;
54890b3d4cd3SLiu Bo 
54900b3d4cd3SLiu Bo 			/*
54910b3d4cd3SLiu Bo 			 * Special for the first stripe and
54920b3d4cd3SLiu Bo 			 * the last stripe:
54930b3d4cd3SLiu Bo 			 *
54940b3d4cd3SLiu Bo 			 * |-------|...|-------|
54950b3d4cd3SLiu Bo 			 *     |----------|
54960b3d4cd3SLiu Bo 			 *    off     end_off
54970b3d4cd3SLiu Bo 			 */
54980b3d4cd3SLiu Bo 			if (i < sub_stripes)
54990b3d4cd3SLiu Bo 				bbio->stripes[i].length -=
55000b3d4cd3SLiu Bo 					stripe_offset;
55010b3d4cd3SLiu Bo 
55020b3d4cd3SLiu Bo 			if (stripe_index >= last_stripe &&
55030b3d4cd3SLiu Bo 			    stripe_index <= (last_stripe +
55040b3d4cd3SLiu Bo 					     sub_stripes - 1))
55050b3d4cd3SLiu Bo 				bbio->stripes[i].length -=
55060b3d4cd3SLiu Bo 					stripe_end_offset;
55070b3d4cd3SLiu Bo 
55080b3d4cd3SLiu Bo 			if (i == sub_stripes - 1)
55090b3d4cd3SLiu Bo 				stripe_offset = 0;
55100b3d4cd3SLiu Bo 		} else {
55110b3d4cd3SLiu Bo 			bbio->stripes[i].length = length;
55120b3d4cd3SLiu Bo 		}
55130b3d4cd3SLiu Bo 
55140b3d4cd3SLiu Bo 		stripe_index++;
55150b3d4cd3SLiu Bo 		if (stripe_index == map->num_stripes) {
55160b3d4cd3SLiu Bo 			stripe_index = 0;
55170b3d4cd3SLiu Bo 			stripe_nr++;
55180b3d4cd3SLiu Bo 		}
55190b3d4cd3SLiu Bo 	}
55200b3d4cd3SLiu Bo 
55210b3d4cd3SLiu Bo 	*bbio_ret = bbio;
55220b3d4cd3SLiu Bo 	bbio->map_type = map->type;
55230b3d4cd3SLiu Bo 	bbio->num_stripes = num_stripes;
55240b3d4cd3SLiu Bo out:
55250b3d4cd3SLiu Bo 	free_extent_map(em);
55260b3d4cd3SLiu Bo 	return ret;
55270b3d4cd3SLiu Bo }
55280b3d4cd3SLiu Bo 
55295ab56090SLiu Bo /*
55305ab56090SLiu Bo  * In dev-replace case, for repair case (that's the only case where the mirror
55315ab56090SLiu Bo  * is selected explicitly when calling btrfs_map_block), blocks left of the
55325ab56090SLiu Bo  * left cursor can also be read from the target drive.
55335ab56090SLiu Bo  *
55345ab56090SLiu Bo  * For REQ_GET_READ_MIRRORS, the target drive is added as the last one to the
55355ab56090SLiu Bo  * array of stripes.
55365ab56090SLiu Bo  * For READ, it also needs to be supported using the same mirror number.
55375ab56090SLiu Bo  *
55385ab56090SLiu Bo  * If the requested block is not left of the left cursor, EIO is returned. This
55395ab56090SLiu Bo  * can happen because btrfs_num_copies() returns one more in the dev-replace
55405ab56090SLiu Bo  * case.
55415ab56090SLiu Bo  */
55425ab56090SLiu Bo static int get_extra_mirror_from_replace(struct btrfs_fs_info *fs_info,
55435ab56090SLiu Bo 					 u64 logical, u64 length,
55445ab56090SLiu Bo 					 u64 srcdev_devid, int *mirror_num,
55455ab56090SLiu Bo 					 u64 *physical)
55465ab56090SLiu Bo {
55475ab56090SLiu Bo 	struct btrfs_bio *bbio = NULL;
55485ab56090SLiu Bo 	int num_stripes;
55495ab56090SLiu Bo 	int index_srcdev = 0;
55505ab56090SLiu Bo 	int found = 0;
55515ab56090SLiu Bo 	u64 physical_of_found = 0;
55525ab56090SLiu Bo 	int i;
55535ab56090SLiu Bo 	int ret = 0;
55545ab56090SLiu Bo 
55555ab56090SLiu Bo 	ret = __btrfs_map_block(fs_info, BTRFS_MAP_GET_READ_MIRRORS,
55565ab56090SLiu Bo 				logical, &length, &bbio, 0, 0);
55575ab56090SLiu Bo 	if (ret) {
55585ab56090SLiu Bo 		ASSERT(bbio == NULL);
55595ab56090SLiu Bo 		return ret;
55605ab56090SLiu Bo 	}
55615ab56090SLiu Bo 
55625ab56090SLiu Bo 	num_stripes = bbio->num_stripes;
55635ab56090SLiu Bo 	if (*mirror_num > num_stripes) {
55645ab56090SLiu Bo 		/*
55655ab56090SLiu Bo 		 * BTRFS_MAP_GET_READ_MIRRORS does not contain this mirror,
55665ab56090SLiu Bo 		 * that means that the requested area is not left of the left
55675ab56090SLiu Bo 		 * cursor
55685ab56090SLiu Bo 		 */
55695ab56090SLiu Bo 		btrfs_put_bbio(bbio);
55705ab56090SLiu Bo 		return -EIO;
55715ab56090SLiu Bo 	}
55725ab56090SLiu Bo 
55735ab56090SLiu Bo 	/*
55745ab56090SLiu Bo 	 * process the rest of the function using the mirror_num of the source
55755ab56090SLiu Bo 	 * drive. Therefore look it up first.  At the end, patch the device
55765ab56090SLiu Bo 	 * pointer to the one of the target drive.
55775ab56090SLiu Bo 	 */
55785ab56090SLiu Bo 	for (i = 0; i < num_stripes; i++) {
55795ab56090SLiu Bo 		if (bbio->stripes[i].dev->devid != srcdev_devid)
55805ab56090SLiu Bo 			continue;
55815ab56090SLiu Bo 
55825ab56090SLiu Bo 		/*
55835ab56090SLiu Bo 		 * In case of DUP, in order to keep it simple, only add the
55845ab56090SLiu Bo 		 * mirror with the lowest physical address
55855ab56090SLiu Bo 		 */
55865ab56090SLiu Bo 		if (found &&
55875ab56090SLiu Bo 		    physical_of_found <= bbio->stripes[i].physical)
55885ab56090SLiu Bo 			continue;
55895ab56090SLiu Bo 
55905ab56090SLiu Bo 		index_srcdev = i;
55915ab56090SLiu Bo 		found = 1;
55925ab56090SLiu Bo 		physical_of_found = bbio->stripes[i].physical;
55935ab56090SLiu Bo 	}
55945ab56090SLiu Bo 
55955ab56090SLiu Bo 	btrfs_put_bbio(bbio);
55965ab56090SLiu Bo 
55975ab56090SLiu Bo 	ASSERT(found);
55985ab56090SLiu Bo 	if (!found)
55995ab56090SLiu Bo 		return -EIO;
56005ab56090SLiu Bo 
56015ab56090SLiu Bo 	*mirror_num = index_srcdev + 1;
56025ab56090SLiu Bo 	*physical = physical_of_found;
56035ab56090SLiu Bo 	return ret;
56045ab56090SLiu Bo }
56055ab56090SLiu Bo 
560673c0f228SLiu Bo static void handle_ops_on_dev_replace(enum btrfs_map_op op,
560773c0f228SLiu Bo 				      struct btrfs_bio **bbio_ret,
560873c0f228SLiu Bo 				      struct btrfs_dev_replace *dev_replace,
560973c0f228SLiu Bo 				      int *num_stripes_ret, int *max_errors_ret)
561073c0f228SLiu Bo {
561173c0f228SLiu Bo 	struct btrfs_bio *bbio = *bbio_ret;
561273c0f228SLiu Bo 	u64 srcdev_devid = dev_replace->srcdev->devid;
561373c0f228SLiu Bo 	int tgtdev_indexes = 0;
561473c0f228SLiu Bo 	int num_stripes = *num_stripes_ret;
561573c0f228SLiu Bo 	int max_errors = *max_errors_ret;
561673c0f228SLiu Bo 	int i;
561773c0f228SLiu Bo 
561873c0f228SLiu Bo 	if (op == BTRFS_MAP_WRITE) {
561973c0f228SLiu Bo 		int index_where_to_add;
562073c0f228SLiu Bo 
562173c0f228SLiu Bo 		/*
562273c0f228SLiu Bo 		 * duplicate the write operations while the dev replace
562373c0f228SLiu Bo 		 * procedure is running. Since the copying of the old disk to
562473c0f228SLiu Bo 		 * the new disk takes place at run time while the filesystem is
562573c0f228SLiu Bo 		 * mounted writable, the regular write operations to the old
562673c0f228SLiu Bo 		 * disk have to be duplicated to go to the new disk as well.
562773c0f228SLiu Bo 		 *
562873c0f228SLiu Bo 		 * Note that device->missing is handled by the caller, and that
562973c0f228SLiu Bo 		 * the write to the old disk is already set up in the stripes
563073c0f228SLiu Bo 		 * array.
563173c0f228SLiu Bo 		 */
563273c0f228SLiu Bo 		index_where_to_add = num_stripes;
563373c0f228SLiu Bo 		for (i = 0; i < num_stripes; i++) {
563473c0f228SLiu Bo 			if (bbio->stripes[i].dev->devid == srcdev_devid) {
563573c0f228SLiu Bo 				/* write to new disk, too */
563673c0f228SLiu Bo 				struct btrfs_bio_stripe *new =
563773c0f228SLiu Bo 					bbio->stripes + index_where_to_add;
563873c0f228SLiu Bo 				struct btrfs_bio_stripe *old =
563973c0f228SLiu Bo 					bbio->stripes + i;
564073c0f228SLiu Bo 
564173c0f228SLiu Bo 				new->physical = old->physical;
564273c0f228SLiu Bo 				new->length = old->length;
564373c0f228SLiu Bo 				new->dev = dev_replace->tgtdev;
564473c0f228SLiu Bo 				bbio->tgtdev_map[i] = index_where_to_add;
564573c0f228SLiu Bo 				index_where_to_add++;
564673c0f228SLiu Bo 				max_errors++;
564773c0f228SLiu Bo 				tgtdev_indexes++;
564873c0f228SLiu Bo 			}
564973c0f228SLiu Bo 		}
565073c0f228SLiu Bo 		num_stripes = index_where_to_add;
565173c0f228SLiu Bo 	} else if (op == BTRFS_MAP_GET_READ_MIRRORS) {
565273c0f228SLiu Bo 		int index_srcdev = 0;
565373c0f228SLiu Bo 		int found = 0;
565473c0f228SLiu Bo 		u64 physical_of_found = 0;
565573c0f228SLiu Bo 
565673c0f228SLiu Bo 		/*
565773c0f228SLiu Bo 		 * During the dev-replace procedure, the target drive can also
565873c0f228SLiu Bo 		 * be used to read data in case it is needed to repair a corrupt
565973c0f228SLiu Bo 		 * block elsewhere. This is possible if the requested area is
566073c0f228SLiu Bo 		 * left of the left cursor. In this area, the target drive is a
566173c0f228SLiu Bo 		 * full copy of the source drive.
566273c0f228SLiu Bo 		 */
566373c0f228SLiu Bo 		for (i = 0; i < num_stripes; i++) {
566473c0f228SLiu Bo 			if (bbio->stripes[i].dev->devid == srcdev_devid) {
566573c0f228SLiu Bo 				/*
566673c0f228SLiu Bo 				 * In case of DUP, in order to keep it simple,
566773c0f228SLiu Bo 				 * only add the mirror with the lowest physical
566873c0f228SLiu Bo 				 * address
566973c0f228SLiu Bo 				 */
567073c0f228SLiu Bo 				if (found &&
567173c0f228SLiu Bo 				    physical_of_found <=
567273c0f228SLiu Bo 				     bbio->stripes[i].physical)
567373c0f228SLiu Bo 					continue;
567473c0f228SLiu Bo 				index_srcdev = i;
567573c0f228SLiu Bo 				found = 1;
567673c0f228SLiu Bo 				physical_of_found = bbio->stripes[i].physical;
567773c0f228SLiu Bo 			}
567873c0f228SLiu Bo 		}
567973c0f228SLiu Bo 		if (found) {
568073c0f228SLiu Bo 			struct btrfs_bio_stripe *tgtdev_stripe =
568173c0f228SLiu Bo 				bbio->stripes + num_stripes;
568273c0f228SLiu Bo 
568373c0f228SLiu Bo 			tgtdev_stripe->physical = physical_of_found;
568473c0f228SLiu Bo 			tgtdev_stripe->length =
568573c0f228SLiu Bo 				bbio->stripes[index_srcdev].length;
568673c0f228SLiu Bo 			tgtdev_stripe->dev = dev_replace->tgtdev;
568773c0f228SLiu Bo 			bbio->tgtdev_map[index_srcdev] = num_stripes;
568873c0f228SLiu Bo 
568973c0f228SLiu Bo 			tgtdev_indexes++;
569073c0f228SLiu Bo 			num_stripes++;
569173c0f228SLiu Bo 		}
569273c0f228SLiu Bo 	}
569373c0f228SLiu Bo 
569473c0f228SLiu Bo 	*num_stripes_ret = num_stripes;
569573c0f228SLiu Bo 	*max_errors_ret = max_errors;
569673c0f228SLiu Bo 	bbio->num_tgtdevs = tgtdev_indexes;
569773c0f228SLiu Bo 	*bbio_ret = bbio;
569873c0f228SLiu Bo }
569973c0f228SLiu Bo 
57002b19a1feSLiu Bo static bool need_full_stripe(enum btrfs_map_op op)
57012b19a1feSLiu Bo {
57022b19a1feSLiu Bo 	return (op == BTRFS_MAP_WRITE || op == BTRFS_MAP_GET_READ_MIRRORS);
57032b19a1feSLiu Bo }
57042b19a1feSLiu Bo 
5705cf8cddd3SChristoph Hellwig static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
5706cf8cddd3SChristoph Hellwig 			     enum btrfs_map_op op,
5707cea9e445SChris Mason 			     u64 logical, u64 *length,
5708a1d3c478SJan Schmidt 			     struct btrfs_bio **bbio_ret,
57098e5cfb55SZhao Lei 			     int mirror_num, int need_raid_map)
57100b86a832SChris Mason {
57110b86a832SChris Mason 	struct extent_map *em;
57120b86a832SChris Mason 	struct map_lookup *map;
57130b86a832SChris Mason 	u64 offset;
5714593060d7SChris Mason 	u64 stripe_offset;
5715593060d7SChris Mason 	u64 stripe_nr;
571653b381b3SDavid Woodhouse 	u64 stripe_len;
57179d644a62SDavid Sterba 	u32 stripe_index;
5718cea9e445SChris Mason 	int i;
5719de11cc12SLi Zefan 	int ret = 0;
5720f2d8d74dSChris Mason 	int num_stripes;
5721a236aed1SChris Mason 	int max_errors = 0;
57222c8cdd6eSMiao Xie 	int tgtdev_indexes = 0;
5723a1d3c478SJan Schmidt 	struct btrfs_bio *bbio = NULL;
5724472262f3SStefan Behrens 	struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
5725472262f3SStefan Behrens 	int dev_replace_is_ongoing = 0;
5726472262f3SStefan Behrens 	int num_alloc_stripes;
5727ad6d620eSStefan Behrens 	int patch_the_first_stripe_for_dev_replace = 0;
5728ad6d620eSStefan Behrens 	u64 physical_to_patch_in_first_stripe = 0;
572953b381b3SDavid Woodhouse 	u64 raid56_full_stripe_start = (u64)-1;
57300b86a832SChris Mason 
57310b3d4cd3SLiu Bo 	if (op == BTRFS_MAP_DISCARD)
57320b3d4cd3SLiu Bo 		return __btrfs_map_block_for_discard(fs_info, logical,
57330b3d4cd3SLiu Bo 						     *length, bbio_ret);
57340b3d4cd3SLiu Bo 
5735592d92eeSLiu Bo 	em = get_chunk_map(fs_info, logical, *length);
5736592d92eeSLiu Bo 	if (IS_ERR(em))
5737592d92eeSLiu Bo 		return PTR_ERR(em);
57389bb91873SJosef Bacik 
573995617d69SJeff Mahoney 	map = em->map_lookup;
57400b86a832SChris Mason 	offset = logical - em->start;
5741593060d7SChris Mason 
574253b381b3SDavid Woodhouse 	stripe_len = map->stripe_len;
5743593060d7SChris Mason 	stripe_nr = offset;
5744593060d7SChris Mason 	/*
5745593060d7SChris Mason 	 * stripe_nr counts the total number of stripes we have to stride
5746593060d7SChris Mason 	 * to get to this block
5747593060d7SChris Mason 	 */
574847c5713fSDavid Sterba 	stripe_nr = div64_u64(stripe_nr, stripe_len);
5749593060d7SChris Mason 
575053b381b3SDavid Woodhouse 	stripe_offset = stripe_nr * stripe_len;
5751e042d1ecSJosef Bacik 	if (offset < stripe_offset) {
57525d163e0eSJeff Mahoney 		btrfs_crit(fs_info,
57535d163e0eSJeff Mahoney 			   "stripe math has gone wrong, stripe_offset=%llu, offset=%llu, start=%llu, logical=%llu, stripe_len=%llu",
5754e042d1ecSJosef Bacik 			   stripe_offset, offset, em->start, logical,
5755e042d1ecSJosef Bacik 			   stripe_len);
5756e042d1ecSJosef Bacik 		free_extent_map(em);
5757e042d1ecSJosef Bacik 		return -EINVAL;
5758e042d1ecSJosef Bacik 	}
5759593060d7SChris Mason 
5760593060d7SChris Mason 	/* stripe_offset is the offset of this block in its stripe*/
5761593060d7SChris Mason 	stripe_offset = offset - stripe_offset;
5762593060d7SChris Mason 
576353b381b3SDavid Woodhouse 	/* if we're here for raid56, we need to know the stripe aligned start */
5764ffe2d203SZhao Lei 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
576553b381b3SDavid Woodhouse 		unsigned long full_stripe_len = stripe_len * nr_data_stripes(map);
576653b381b3SDavid Woodhouse 		raid56_full_stripe_start = offset;
576753b381b3SDavid Woodhouse 
576853b381b3SDavid Woodhouse 		/* allow a write of a full stripe, but make sure we don't
576953b381b3SDavid Woodhouse 		 * allow straddling of stripes
577053b381b3SDavid Woodhouse 		 */
577147c5713fSDavid Sterba 		raid56_full_stripe_start = div64_u64(raid56_full_stripe_start,
577247c5713fSDavid Sterba 				full_stripe_len);
577353b381b3SDavid Woodhouse 		raid56_full_stripe_start *= full_stripe_len;
577453b381b3SDavid Woodhouse 	}
577553b381b3SDavid Woodhouse 
57760b3d4cd3SLiu Bo 	if (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
577753b381b3SDavid Woodhouse 		u64 max_len;
577853b381b3SDavid Woodhouse 		/* For writes to RAID[56], allow a full stripeset across all disks.
577953b381b3SDavid Woodhouse 		   For other RAID types and for RAID[56] reads, just allow a single
578053b381b3SDavid Woodhouse 		   stripe (on a single disk). */
5781ffe2d203SZhao Lei 		if ((map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
5782cf8cddd3SChristoph Hellwig 		    (op == BTRFS_MAP_WRITE)) {
578353b381b3SDavid Woodhouse 			max_len = stripe_len * nr_data_stripes(map) -
578453b381b3SDavid Woodhouse 				(offset - raid56_full_stripe_start);
578553b381b3SDavid Woodhouse 		} else {
5786cea9e445SChris Mason 			/* we limit the length of each bio to what fits in a stripe */
578753b381b3SDavid Woodhouse 			max_len = stripe_len - stripe_offset;
578853b381b3SDavid Woodhouse 		}
578953b381b3SDavid Woodhouse 		*length = min_t(u64, em->len - offset, max_len);
5790cea9e445SChris Mason 	} else {
5791cea9e445SChris Mason 		*length = em->len - offset;
5792cea9e445SChris Mason 	}
5793f2d8d74dSChris Mason 
579453b381b3SDavid Woodhouse 	/* This is for when we're called from btrfs_merge_bio_hook() and all
579553b381b3SDavid Woodhouse 	   it cares about is the length */
5796a1d3c478SJan Schmidt 	if (!bbio_ret)
5797cea9e445SChris Mason 		goto out;
5798cea9e445SChris Mason 
579973beece9SLiu Bo 	btrfs_dev_replace_lock(dev_replace, 0);
5800472262f3SStefan Behrens 	dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
5801472262f3SStefan Behrens 	if (!dev_replace_is_ongoing)
580273beece9SLiu Bo 		btrfs_dev_replace_unlock(dev_replace, 0);
580373beece9SLiu Bo 	else
580473beece9SLiu Bo 		btrfs_dev_replace_set_lock_blocking(dev_replace);
5805472262f3SStefan Behrens 
5806ad6d620eSStefan Behrens 	if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
58072b19a1feSLiu Bo 	    !need_full_stripe(op) && dev_replace->tgtdev != NULL) {
58085ab56090SLiu Bo 		ret = get_extra_mirror_from_replace(fs_info, logical, *length,
58095ab56090SLiu Bo 						    dev_replace->srcdev->devid,
58105ab56090SLiu Bo 						    &mirror_num,
58115ab56090SLiu Bo 					    &physical_to_patch_in_first_stripe);
58125ab56090SLiu Bo 		if (ret)
5813ad6d620eSStefan Behrens 			goto out;
58145ab56090SLiu Bo 		else
581594a97dfeSZhao Lei 			patch_the_first_stripe_for_dev_replace = 1;
5816ad6d620eSStefan Behrens 	} else if (mirror_num > map->num_stripes) {
5817ad6d620eSStefan Behrens 		mirror_num = 0;
5818ad6d620eSStefan Behrens 	}
5819ad6d620eSStefan Behrens 
5820f2d8d74dSChris Mason 	num_stripes = 1;
5821cea9e445SChris Mason 	stripe_index = 0;
5822fce3bb9aSLi Dongyang 	if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
582347c5713fSDavid Sterba 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
582447c5713fSDavid Sterba 				&stripe_index);
5825de483734SAnand Jain 		if (!need_full_stripe(op))
582628e1cc7dSMiao Xie 			mirror_num = 1;
5827fce3bb9aSLi Dongyang 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
5828de483734SAnand Jain 		if (need_full_stripe(op))
5829f2d8d74dSChris Mason 			num_stripes = map->num_stripes;
58302fff734fSChris Mason 		else if (mirror_num)
5831f188591eSChris Mason 			stripe_index = mirror_num - 1;
5832dfe25020SChris Mason 		else {
583330d9861fSStefan Behrens 			stripe_index = find_live_mirror(fs_info, map, 0,
5834dfe25020SChris Mason 					    map->num_stripes,
583530d9861fSStefan Behrens 					    current->pid % map->num_stripes,
583630d9861fSStefan Behrens 					    dev_replace_is_ongoing);
5837a1d3c478SJan Schmidt 			mirror_num = stripe_index + 1;
5838dfe25020SChris Mason 		}
58392fff734fSChris Mason 
5840611f0e00SChris Mason 	} else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
5841de483734SAnand Jain 		if (need_full_stripe(op)) {
5842f2d8d74dSChris Mason 			num_stripes = map->num_stripes;
5843a1d3c478SJan Schmidt 		} else if (mirror_num) {
5844f188591eSChris Mason 			stripe_index = mirror_num - 1;
5845a1d3c478SJan Schmidt 		} else {
5846a1d3c478SJan Schmidt 			mirror_num = 1;
5847a1d3c478SJan Schmidt 		}
58482fff734fSChris Mason 
5849321aecc6SChris Mason 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
58509d644a62SDavid Sterba 		u32 factor = map->num_stripes / map->sub_stripes;
5851321aecc6SChris Mason 
585247c5713fSDavid Sterba 		stripe_nr = div_u64_rem(stripe_nr, factor, &stripe_index);
5853321aecc6SChris Mason 		stripe_index *= map->sub_stripes;
5854321aecc6SChris Mason 
5855de483734SAnand Jain 		if (need_full_stripe(op))
5856f2d8d74dSChris Mason 			num_stripes = map->sub_stripes;
5857321aecc6SChris Mason 		else if (mirror_num)
5858321aecc6SChris Mason 			stripe_index += mirror_num - 1;
5859dfe25020SChris Mason 		else {
58603e74317aSJan Schmidt 			int old_stripe_index = stripe_index;
586130d9861fSStefan Behrens 			stripe_index = find_live_mirror(fs_info, map,
586230d9861fSStefan Behrens 					      stripe_index,
5863dfe25020SChris Mason 					      map->sub_stripes, stripe_index +
586430d9861fSStefan Behrens 					      current->pid % map->sub_stripes,
586530d9861fSStefan Behrens 					      dev_replace_is_ongoing);
58663e74317aSJan Schmidt 			mirror_num = stripe_index - old_stripe_index + 1;
5867dfe25020SChris Mason 		}
586853b381b3SDavid Woodhouse 
5869ffe2d203SZhao Lei 	} else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
5870de483734SAnand Jain 		if (need_raid_map && (need_full_stripe(op) || mirror_num > 1)) {
587153b381b3SDavid Woodhouse 			/* push stripe_nr back to the start of the full stripe */
587242c61ab6SLiu Bo 			stripe_nr = div64_u64(raid56_full_stripe_start,
5873b8b93addSDavid Sterba 					stripe_len * nr_data_stripes(map));
587453b381b3SDavid Woodhouse 
587553b381b3SDavid Woodhouse 			/* RAID[56] write or recovery. Return all stripes */
587653b381b3SDavid Woodhouse 			num_stripes = map->num_stripes;
587753b381b3SDavid Woodhouse 			max_errors = nr_parity_stripes(map);
587853b381b3SDavid Woodhouse 
587953b381b3SDavid Woodhouse 			*length = map->stripe_len;
588053b381b3SDavid Woodhouse 			stripe_index = 0;
588153b381b3SDavid Woodhouse 			stripe_offset = 0;
588253b381b3SDavid Woodhouse 		} else {
588353b381b3SDavid Woodhouse 			/*
588453b381b3SDavid Woodhouse 			 * Mirror #0 or #1 means the original data block.
588553b381b3SDavid Woodhouse 			 * Mirror #2 is RAID5 parity block.
588653b381b3SDavid Woodhouse 			 * Mirror #3 is RAID6 Q block.
588753b381b3SDavid Woodhouse 			 */
588847c5713fSDavid Sterba 			stripe_nr = div_u64_rem(stripe_nr,
588947c5713fSDavid Sterba 					nr_data_stripes(map), &stripe_index);
589053b381b3SDavid Woodhouse 			if (mirror_num > 1)
589153b381b3SDavid Woodhouse 				stripe_index = nr_data_stripes(map) +
589253b381b3SDavid Woodhouse 						mirror_num - 2;
589353b381b3SDavid Woodhouse 
589453b381b3SDavid Woodhouse 			/* We distribute the parity blocks across stripes */
589547c5713fSDavid Sterba 			div_u64_rem(stripe_nr + stripe_index, map->num_stripes,
589647c5713fSDavid Sterba 					&stripe_index);
5897de483734SAnand Jain 			if (!need_full_stripe(op) && mirror_num <= 1)
589828e1cc7dSMiao Xie 				mirror_num = 1;
589953b381b3SDavid Woodhouse 		}
59008790d502SChris Mason 	} else {
5901593060d7SChris Mason 		/*
590247c5713fSDavid Sterba 		 * after this, stripe_nr is the number of stripes on this
590347c5713fSDavid Sterba 		 * device we have to walk to find the data, and stripe_index is
590447c5713fSDavid Sterba 		 * the number of our device in the stripe array
5905593060d7SChris Mason 		 */
590647c5713fSDavid Sterba 		stripe_nr = div_u64_rem(stripe_nr, map->num_stripes,
590747c5713fSDavid Sterba 				&stripe_index);
5908a1d3c478SJan Schmidt 		mirror_num = stripe_index + 1;
59098790d502SChris Mason 	}
5910e042d1ecSJosef Bacik 	if (stripe_index >= map->num_stripes) {
59115d163e0eSJeff Mahoney 		btrfs_crit(fs_info,
59125d163e0eSJeff Mahoney 			   "stripe index math went horribly wrong, got stripe_index=%u, num_stripes=%u",
5913e042d1ecSJosef Bacik 			   stripe_index, map->num_stripes);
5914e042d1ecSJosef Bacik 		ret = -EINVAL;
5915e042d1ecSJosef Bacik 		goto out;
5916e042d1ecSJosef Bacik 	}
5917593060d7SChris Mason 
5918472262f3SStefan Behrens 	num_alloc_stripes = num_stripes;
59196fad823fSLiu Bo 	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) {
59200b3d4cd3SLiu Bo 		if (op == BTRFS_MAP_WRITE)
5921472262f3SStefan Behrens 			num_alloc_stripes <<= 1;
5922cf8cddd3SChristoph Hellwig 		if (op == BTRFS_MAP_GET_READ_MIRRORS)
5923ad6d620eSStefan Behrens 			num_alloc_stripes++;
59242c8cdd6eSMiao Xie 		tgtdev_indexes = num_stripes;
5925ad6d620eSStefan Behrens 	}
59262c8cdd6eSMiao Xie 
59276e9606d2SZhao Lei 	bbio = alloc_btrfs_bio(num_alloc_stripes, tgtdev_indexes);
5928de11cc12SLi Zefan 	if (!bbio) {
5929de11cc12SLi Zefan 		ret = -ENOMEM;
5930de11cc12SLi Zefan 		goto out;
5931de11cc12SLi Zefan 	}
59326fad823fSLiu Bo 	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
59332c8cdd6eSMiao Xie 		bbio->tgtdev_map = (int *)(bbio->stripes + num_alloc_stripes);
5934de11cc12SLi Zefan 
59358e5cfb55SZhao Lei 	/* build raid_map */
59362b19a1feSLiu Bo 	if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK && need_raid_map &&
59372b19a1feSLiu Bo 	    (need_full_stripe(op) || mirror_num > 1)) {
59388e5cfb55SZhao Lei 		u64 tmp;
59399d644a62SDavid Sterba 		unsigned rot;
59408e5cfb55SZhao Lei 
59418e5cfb55SZhao Lei 		bbio->raid_map = (u64 *)((void *)bbio->stripes +
59428e5cfb55SZhao Lei 				 sizeof(struct btrfs_bio_stripe) *
59438e5cfb55SZhao Lei 				 num_alloc_stripes +
59448e5cfb55SZhao Lei 				 sizeof(int) * tgtdev_indexes);
59458e5cfb55SZhao Lei 
59468e5cfb55SZhao Lei 		/* Work out the disk rotation on this stripe-set */
594747c5713fSDavid Sterba 		div_u64_rem(stripe_nr, num_stripes, &rot);
59488e5cfb55SZhao Lei 
59498e5cfb55SZhao Lei 		/* Fill in the logical address of each stripe */
59508e5cfb55SZhao Lei 		tmp = stripe_nr * nr_data_stripes(map);
59518e5cfb55SZhao Lei 		for (i = 0; i < nr_data_stripes(map); i++)
59528e5cfb55SZhao Lei 			bbio->raid_map[(i+rot) % num_stripes] =
59538e5cfb55SZhao Lei 				em->start + (tmp + i) * map->stripe_len;
59548e5cfb55SZhao Lei 
59558e5cfb55SZhao Lei 		bbio->raid_map[(i+rot) % map->num_stripes] = RAID5_P_STRIPE;
59568e5cfb55SZhao Lei 		if (map->type & BTRFS_BLOCK_GROUP_RAID6)
59578e5cfb55SZhao Lei 			bbio->raid_map[(i+rot+1) % num_stripes] =
59588e5cfb55SZhao Lei 				RAID6_Q_STRIPE;
59598e5cfb55SZhao Lei 	}
59608e5cfb55SZhao Lei 
5961ec9ef7a1SLi Zefan 
5962f2d8d74dSChris Mason 	for (i = 0; i < num_stripes; i++) {
5963a1d3c478SJan Schmidt 		bbio->stripes[i].physical =
5964f2d8d74dSChris Mason 			map->stripes[stripe_index].physical +
5965fce3bb9aSLi Dongyang 			stripe_offset +
5966fce3bb9aSLi Dongyang 			stripe_nr * map->stripe_len;
5967a1d3c478SJan Schmidt 		bbio->stripes[i].dev =
5968fce3bb9aSLi Dongyang 			map->stripes[stripe_index].dev;
5969cea9e445SChris Mason 		stripe_index++;
5970593060d7SChris Mason 	}
5971de11cc12SLi Zefan 
59722b19a1feSLiu Bo 	if (need_full_stripe(op))
5973d20983b4SMiao Xie 		max_errors = btrfs_chunk_max_errors(map);
5974de11cc12SLi Zefan 
59758e5cfb55SZhao Lei 	if (bbio->raid_map)
59768e5cfb55SZhao Lei 		sort_parity_stripes(bbio, num_stripes);
5977cc7539edSZhao Lei 
597873c0f228SLiu Bo 	if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL &&
59792b19a1feSLiu Bo 	    need_full_stripe(op)) {
598073c0f228SLiu Bo 		handle_ops_on_dev_replace(op, &bbio, dev_replace, &num_stripes,
598173c0f228SLiu Bo 					  &max_errors);
5982ad6d620eSStefan Behrens 	}
5983472262f3SStefan Behrens 
5984a1d3c478SJan Schmidt 	*bbio_ret = bbio;
598510f11900SZhao Lei 	bbio->map_type = map->type;
5986a1d3c478SJan Schmidt 	bbio->num_stripes = num_stripes;
5987a1d3c478SJan Schmidt 	bbio->max_errors = max_errors;
5988a1d3c478SJan Schmidt 	bbio->mirror_num = mirror_num;
5989ad6d620eSStefan Behrens 
5990ad6d620eSStefan Behrens 	/*
5991ad6d620eSStefan Behrens 	 * this is the case that REQ_READ && dev_replace_is_ongoing &&
5992ad6d620eSStefan Behrens 	 * mirror_num == num_stripes + 1 && dev_replace target drive is
5993ad6d620eSStefan Behrens 	 * available as a mirror
5994ad6d620eSStefan Behrens 	 */
5995ad6d620eSStefan Behrens 	if (patch_the_first_stripe_for_dev_replace && num_stripes > 0) {
5996ad6d620eSStefan Behrens 		WARN_ON(num_stripes > 1);
5997ad6d620eSStefan Behrens 		bbio->stripes[0].dev = dev_replace->tgtdev;
5998ad6d620eSStefan Behrens 		bbio->stripes[0].physical = physical_to_patch_in_first_stripe;
5999ad6d620eSStefan Behrens 		bbio->mirror_num = map->num_stripes + 1;
6000ad6d620eSStefan Behrens 	}
6001cea9e445SChris Mason out:
600273beece9SLiu Bo 	if (dev_replace_is_ongoing) {
600373beece9SLiu Bo 		btrfs_dev_replace_clear_lock_blocking(dev_replace);
600473beece9SLiu Bo 		btrfs_dev_replace_unlock(dev_replace, 0);
600573beece9SLiu Bo 	}
60060b86a832SChris Mason 	free_extent_map(em);
6007de11cc12SLi Zefan 	return ret;
60080b86a832SChris Mason }
60090b86a832SChris Mason 
6010cf8cddd3SChristoph Hellwig int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6011f2d8d74dSChris Mason 		      u64 logical, u64 *length,
6012a1d3c478SJan Schmidt 		      struct btrfs_bio **bbio_ret, int mirror_num)
6013f2d8d74dSChris Mason {
6014b3d3fa51SMike Christie 	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret,
60158e5cfb55SZhao Lei 				 mirror_num, 0);
6016f2d8d74dSChris Mason }
6017f2d8d74dSChris Mason 
6018af8e2d1dSMiao Xie /* For Scrub/replace */
6019cf8cddd3SChristoph Hellwig int btrfs_map_sblock(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6020af8e2d1dSMiao Xie 		     u64 logical, u64 *length,
6021825ad4c9SDavid Sterba 		     struct btrfs_bio **bbio_ret)
6022af8e2d1dSMiao Xie {
6023825ad4c9SDavid Sterba 	return __btrfs_map_block(fs_info, op, logical, length, bbio_ret, 0, 1);
6024af8e2d1dSMiao Xie }
6025af8e2d1dSMiao Xie 
6026ab8d0fc4SJeff Mahoney int btrfs_rmap_block(struct btrfs_fs_info *fs_info,
6027a512bbf8SYan Zheng 		     u64 chunk_start, u64 physical, u64 devid,
6028a512bbf8SYan Zheng 		     u64 **logical, int *naddrs, int *stripe_len)
6029a512bbf8SYan Zheng {
6030a512bbf8SYan Zheng 	struct extent_map *em;
6031a512bbf8SYan Zheng 	struct map_lookup *map;
6032a512bbf8SYan Zheng 	u64 *buf;
6033a512bbf8SYan Zheng 	u64 bytenr;
6034a512bbf8SYan Zheng 	u64 length;
6035a512bbf8SYan Zheng 	u64 stripe_nr;
603653b381b3SDavid Woodhouse 	u64 rmap_len;
6037a512bbf8SYan Zheng 	int i, j, nr = 0;
6038a512bbf8SYan Zheng 
6039592d92eeSLiu Bo 	em = get_chunk_map(fs_info, chunk_start, 1);
6040592d92eeSLiu Bo 	if (IS_ERR(em))
6041835d974fSJosef Bacik 		return -EIO;
6042835d974fSJosef Bacik 
604395617d69SJeff Mahoney 	map = em->map_lookup;
6044a512bbf8SYan Zheng 	length = em->len;
604553b381b3SDavid Woodhouse 	rmap_len = map->stripe_len;
604653b381b3SDavid Woodhouse 
6047a512bbf8SYan Zheng 	if (map->type & BTRFS_BLOCK_GROUP_RAID10)
6048b8b93addSDavid Sterba 		length = div_u64(length, map->num_stripes / map->sub_stripes);
6049a512bbf8SYan Zheng 	else if (map->type & BTRFS_BLOCK_GROUP_RAID0)
6050b8b93addSDavid Sterba 		length = div_u64(length, map->num_stripes);
6051ffe2d203SZhao Lei 	else if (map->type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
6052b8b93addSDavid Sterba 		length = div_u64(length, nr_data_stripes(map));
605353b381b3SDavid Woodhouse 		rmap_len = map->stripe_len * nr_data_stripes(map);
605453b381b3SDavid Woodhouse 	}
6055a512bbf8SYan Zheng 
605631e818feSDavid Sterba 	buf = kcalloc(map->num_stripes, sizeof(u64), GFP_NOFS);
605779787eaaSJeff Mahoney 	BUG_ON(!buf); /* -ENOMEM */
6058a512bbf8SYan Zheng 
6059a512bbf8SYan Zheng 	for (i = 0; i < map->num_stripes; i++) {
6060a512bbf8SYan Zheng 		if (devid && map->stripes[i].dev->devid != devid)
6061a512bbf8SYan Zheng 			continue;
6062a512bbf8SYan Zheng 		if (map->stripes[i].physical > physical ||
6063a512bbf8SYan Zheng 		    map->stripes[i].physical + length <= physical)
6064a512bbf8SYan Zheng 			continue;
6065a512bbf8SYan Zheng 
6066a512bbf8SYan Zheng 		stripe_nr = physical - map->stripes[i].physical;
606742c61ab6SLiu Bo 		stripe_nr = div64_u64(stripe_nr, map->stripe_len);
6068a512bbf8SYan Zheng 
6069a512bbf8SYan Zheng 		if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
6070a512bbf8SYan Zheng 			stripe_nr = stripe_nr * map->num_stripes + i;
6071b8b93addSDavid Sterba 			stripe_nr = div_u64(stripe_nr, map->sub_stripes);
6072a512bbf8SYan Zheng 		} else if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
6073a512bbf8SYan Zheng 			stripe_nr = stripe_nr * map->num_stripes + i;
607453b381b3SDavid Woodhouse 		} /* else if RAID[56], multiply by nr_data_stripes().
607553b381b3SDavid Woodhouse 		   * Alternatively, just use rmap_len below instead of
607653b381b3SDavid Woodhouse 		   * map->stripe_len */
607753b381b3SDavid Woodhouse 
607853b381b3SDavid Woodhouse 		bytenr = chunk_start + stripe_nr * rmap_len;
6079934d375bSChris Mason 		WARN_ON(nr >= map->num_stripes);
6080a512bbf8SYan Zheng 		for (j = 0; j < nr; j++) {
6081a512bbf8SYan Zheng 			if (buf[j] == bytenr)
6082a512bbf8SYan Zheng 				break;
6083a512bbf8SYan Zheng 		}
6084934d375bSChris Mason 		if (j == nr) {
6085934d375bSChris Mason 			WARN_ON(nr >= map->num_stripes);
6086a512bbf8SYan Zheng 			buf[nr++] = bytenr;
6087a512bbf8SYan Zheng 		}
6088934d375bSChris Mason 	}
6089a512bbf8SYan Zheng 
6090a512bbf8SYan Zheng 	*logical = buf;
6091a512bbf8SYan Zheng 	*naddrs = nr;
609253b381b3SDavid Woodhouse 	*stripe_len = rmap_len;
6093a512bbf8SYan Zheng 
6094a512bbf8SYan Zheng 	free_extent_map(em);
6095a512bbf8SYan Zheng 	return 0;
6096a512bbf8SYan Zheng }
6097a512bbf8SYan Zheng 
60984246a0b6SChristoph Hellwig static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio)
60998408c716SMiao Xie {
6100326e1dbbSMike Snitzer 	bio->bi_private = bbio->private;
6101326e1dbbSMike Snitzer 	bio->bi_end_io = bbio->end_io;
61024246a0b6SChristoph Hellwig 	bio_endio(bio);
6103326e1dbbSMike Snitzer 
61046e9606d2SZhao Lei 	btrfs_put_bbio(bbio);
61058408c716SMiao Xie }
61068408c716SMiao Xie 
61074246a0b6SChristoph Hellwig static void btrfs_end_bio(struct bio *bio)
61088790d502SChris Mason {
61099be3395bSChris Mason 	struct btrfs_bio *bbio = bio->bi_private;
61107d2b4daaSChris Mason 	int is_orig_bio = 0;
61118790d502SChris Mason 
61124e4cbee9SChristoph Hellwig 	if (bio->bi_status) {
6113a1d3c478SJan Schmidt 		atomic_inc(&bbio->error);
61144e4cbee9SChristoph Hellwig 		if (bio->bi_status == BLK_STS_IOERR ||
61154e4cbee9SChristoph Hellwig 		    bio->bi_status == BLK_STS_TARGET) {
6116442a4f63SStefan Behrens 			unsigned int stripe_index =
61179be3395bSChris Mason 				btrfs_io_bio(bio)->stripe_index;
611865f53338SZhao Lei 			struct btrfs_device *dev;
6119442a4f63SStefan Behrens 
6120442a4f63SStefan Behrens 			BUG_ON(stripe_index >= bbio->num_stripes);
6121442a4f63SStefan Behrens 			dev = bbio->stripes[stripe_index].dev;
6122597a60faSStefan Behrens 			if (dev->bdev) {
612337226b21SMike Christie 				if (bio_op(bio) == REQ_OP_WRITE)
61241cb34c8eSAnand Jain 					btrfs_dev_stat_inc_and_print(dev,
6125442a4f63SStefan Behrens 						BTRFS_DEV_STAT_WRITE_ERRS);
6126442a4f63SStefan Behrens 				else
61271cb34c8eSAnand Jain 					btrfs_dev_stat_inc_and_print(dev,
6128442a4f63SStefan Behrens 						BTRFS_DEV_STAT_READ_ERRS);
612970fd7614SChristoph Hellwig 				if (bio->bi_opf & REQ_PREFLUSH)
61301cb34c8eSAnand Jain 					btrfs_dev_stat_inc_and_print(dev,
6131442a4f63SStefan Behrens 						BTRFS_DEV_STAT_FLUSH_ERRS);
6132442a4f63SStefan Behrens 			}
6133442a4f63SStefan Behrens 		}
6134597a60faSStefan Behrens 	}
61358790d502SChris Mason 
6136a1d3c478SJan Schmidt 	if (bio == bbio->orig_bio)
61377d2b4daaSChris Mason 		is_orig_bio = 1;
61387d2b4daaSChris Mason 
6139c404e0dcSMiao Xie 	btrfs_bio_counter_dec(bbio->fs_info);
6140c404e0dcSMiao Xie 
6141a1d3c478SJan Schmidt 	if (atomic_dec_and_test(&bbio->stripes_pending)) {
61427d2b4daaSChris Mason 		if (!is_orig_bio) {
61437d2b4daaSChris Mason 			bio_put(bio);
6144a1d3c478SJan Schmidt 			bio = bbio->orig_bio;
61457d2b4daaSChris Mason 		}
6146c7b22bb1SMuthu Kumar 
61479be3395bSChris Mason 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
6148a236aed1SChris Mason 		/* only send an error to the higher layers if it is
614953b381b3SDavid Woodhouse 		 * beyond the tolerance of the btrfs bio
6150a236aed1SChris Mason 		 */
6151a1d3c478SJan Schmidt 		if (atomic_read(&bbio->error) > bbio->max_errors) {
61524e4cbee9SChristoph Hellwig 			bio->bi_status = BLK_STS_IOERR;
61535dbc8fcaSChris Mason 		} else {
61541259ab75SChris Mason 			/*
61551259ab75SChris Mason 			 * this bio is actually up to date, we didn't
61561259ab75SChris Mason 			 * go over the max number of errors
61571259ab75SChris Mason 			 */
61582dbe0c77SAnand Jain 			bio->bi_status = BLK_STS_OK;
61591259ab75SChris Mason 		}
6160c55f1396SMiao Xie 
61614246a0b6SChristoph Hellwig 		btrfs_end_bbio(bbio, bio);
61627d2b4daaSChris Mason 	} else if (!is_orig_bio) {
61638790d502SChris Mason 		bio_put(bio);
61648790d502SChris Mason 	}
61658790d502SChris Mason }
61668790d502SChris Mason 
61678b712842SChris Mason /*
61688b712842SChris Mason  * see run_scheduled_bios for a description of why bios are collected for
61698b712842SChris Mason  * async submit.
61708b712842SChris Mason  *
61718b712842SChris Mason  * This will add one bio to the pending list for a device and make sure
61728b712842SChris Mason  * the work struct is scheduled.
61738b712842SChris Mason  */
61742ff7e61eSJeff Mahoney static noinline void btrfs_schedule_bio(struct btrfs_device *device,
61754e49ea4aSMike Christie 					struct bio *bio)
61768b712842SChris Mason {
61770b246afaSJeff Mahoney 	struct btrfs_fs_info *fs_info = device->fs_info;
61788b712842SChris Mason 	int should_queue = 1;
6179ffbd517dSChris Mason 	struct btrfs_pending_bios *pending_bios;
61808b712842SChris Mason 
6181e6e674bdSAnand Jain 	if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state) ||
6182e6e674bdSAnand Jain 	    !device->bdev) {
61834246a0b6SChristoph Hellwig 		bio_io_error(bio);
618453b381b3SDavid Woodhouse 		return;
618553b381b3SDavid Woodhouse 	}
618653b381b3SDavid Woodhouse 
61878b712842SChris Mason 	/* don't bother with additional async steps for reads, right now */
618837226b21SMike Christie 	if (bio_op(bio) == REQ_OP_READ) {
61894e49ea4aSMike Christie 		btrfsic_submit_bio(bio);
6190143bede5SJeff Mahoney 		return;
61918b712842SChris Mason 	}
61928b712842SChris Mason 
6193492bb6deSChris Mason 	WARN_ON(bio->bi_next);
61948b712842SChris Mason 	bio->bi_next = NULL;
61958b712842SChris Mason 
61968b712842SChris Mason 	spin_lock(&device->io_lock);
619767f055c7SChristoph Hellwig 	if (op_is_sync(bio->bi_opf))
6198ffbd517dSChris Mason 		pending_bios = &device->pending_sync_bios;
6199ffbd517dSChris Mason 	else
6200ffbd517dSChris Mason 		pending_bios = &device->pending_bios;
62018b712842SChris Mason 
6202ffbd517dSChris Mason 	if (pending_bios->tail)
6203ffbd517dSChris Mason 		pending_bios->tail->bi_next = bio;
62048b712842SChris Mason 
6205ffbd517dSChris Mason 	pending_bios->tail = bio;
6206ffbd517dSChris Mason 	if (!pending_bios->head)
6207ffbd517dSChris Mason 		pending_bios->head = bio;
62088b712842SChris Mason 	if (device->running_pending)
62098b712842SChris Mason 		should_queue = 0;
62108b712842SChris Mason 
62118b712842SChris Mason 	spin_unlock(&device->io_lock);
62128b712842SChris Mason 
62138b712842SChris Mason 	if (should_queue)
62140b246afaSJeff Mahoney 		btrfs_queue_work(fs_info->submit_workers, &device->work);
62158b712842SChris Mason }
62168b712842SChris Mason 
62172ff7e61eSJeff Mahoney static void submit_stripe_bio(struct btrfs_bio *bbio, struct bio *bio,
62182ff7e61eSJeff Mahoney 			      u64 physical, int dev_nr, int async)
6219de1ee92aSJosef Bacik {
6220de1ee92aSJosef Bacik 	struct btrfs_device *dev = bbio->stripes[dev_nr].dev;
62212ff7e61eSJeff Mahoney 	struct btrfs_fs_info *fs_info = bbio->fs_info;
6222de1ee92aSJosef Bacik 
6223de1ee92aSJosef Bacik 	bio->bi_private = bbio;
62249be3395bSChris Mason 	btrfs_io_bio(bio)->stripe_index = dev_nr;
6225de1ee92aSJosef Bacik 	bio->bi_end_io = btrfs_end_bio;
62264f024f37SKent Overstreet 	bio->bi_iter.bi_sector = physical >> 9;
6227de1ee92aSJosef Bacik #ifdef DEBUG
6228de1ee92aSJosef Bacik 	{
6229de1ee92aSJosef Bacik 		struct rcu_string *name;
6230de1ee92aSJosef Bacik 
6231de1ee92aSJosef Bacik 		rcu_read_lock();
6232de1ee92aSJosef Bacik 		name = rcu_dereference(dev->name);
6233ab8d0fc4SJeff Mahoney 		btrfs_debug(fs_info,
6234ab8d0fc4SJeff Mahoney 			"btrfs_map_bio: rw %d 0x%x, sector=%llu, dev=%lu (%s id %llu), size=%u",
6235ab8d0fc4SJeff Mahoney 			bio_op(bio), bio->bi_opf,
6236ab8d0fc4SJeff Mahoney 			(u64)bio->bi_iter.bi_sector,
62375d163e0eSJeff Mahoney 			(u_long)dev->bdev->bd_dev, name->str, dev->devid,
62385d163e0eSJeff Mahoney 			bio->bi_iter.bi_size);
6239de1ee92aSJosef Bacik 		rcu_read_unlock();
6240de1ee92aSJosef Bacik 	}
6241de1ee92aSJosef Bacik #endif
624274d46992SChristoph Hellwig 	bio_set_dev(bio, dev->bdev);
6243c404e0dcSMiao Xie 
62442ff7e61eSJeff Mahoney 	btrfs_bio_counter_inc_noblocked(fs_info);
6245c404e0dcSMiao Xie 
6246de1ee92aSJosef Bacik 	if (async)
62472ff7e61eSJeff Mahoney 		btrfs_schedule_bio(dev, bio);
6248de1ee92aSJosef Bacik 	else
62494e49ea4aSMike Christie 		btrfsic_submit_bio(bio);
6250de1ee92aSJosef Bacik }
6251de1ee92aSJosef Bacik 
6252de1ee92aSJosef Bacik static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical)
6253de1ee92aSJosef Bacik {
6254de1ee92aSJosef Bacik 	atomic_inc(&bbio->error);
6255de1ee92aSJosef Bacik 	if (atomic_dec_and_test(&bbio->stripes_pending)) {
625601327610SNicholas D Steeves 		/* Should be the original bio. */
62578408c716SMiao Xie 		WARN_ON(bio != bbio->orig_bio);
62588408c716SMiao Xie 
62599be3395bSChris Mason 		btrfs_io_bio(bio)->mirror_num = bbio->mirror_num;
62604f024f37SKent Overstreet 		bio->bi_iter.bi_sector = logical >> 9;
6261102ed2c5SAnand Jain 		if (atomic_read(&bbio->error) > bbio->max_errors)
62624e4cbee9SChristoph Hellwig 			bio->bi_status = BLK_STS_IOERR;
6263102ed2c5SAnand Jain 		else
6264102ed2c5SAnand Jain 			bio->bi_status = BLK_STS_OK;
62654246a0b6SChristoph Hellwig 		btrfs_end_bbio(bbio, bio);
6266de1ee92aSJosef Bacik 	}
6267de1ee92aSJosef Bacik }
6268de1ee92aSJosef Bacik 
626958efbc9fSOmar Sandoval blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
62708b712842SChris Mason 			   int mirror_num, int async_submit)
62710b86a832SChris Mason {
62720b86a832SChris Mason 	struct btrfs_device *dev;
62738790d502SChris Mason 	struct bio *first_bio = bio;
62744f024f37SKent Overstreet 	u64 logical = (u64)bio->bi_iter.bi_sector << 9;
62750b86a832SChris Mason 	u64 length = 0;
62760b86a832SChris Mason 	u64 map_length;
62770b86a832SChris Mason 	int ret;
627808da757dSZhao Lei 	int dev_nr;
627908da757dSZhao Lei 	int total_devs;
6280a1d3c478SJan Schmidt 	struct btrfs_bio *bbio = NULL;
62810b86a832SChris Mason 
62824f024f37SKent Overstreet 	length = bio->bi_iter.bi_size;
62830b86a832SChris Mason 	map_length = length;
6284cea9e445SChris Mason 
62850b246afaSJeff Mahoney 	btrfs_bio_counter_inc_blocked(fs_info);
6286bd7d63c2SLiu Bo 	ret = __btrfs_map_block(fs_info, btrfs_op(bio), logical,
628737226b21SMike Christie 				&map_length, &bbio, mirror_num, 1);
6288c404e0dcSMiao Xie 	if (ret) {
62890b246afaSJeff Mahoney 		btrfs_bio_counter_dec(fs_info);
629058efbc9fSOmar Sandoval 		return errno_to_blk_status(ret);
6291c404e0dcSMiao Xie 	}
6292cea9e445SChris Mason 
6293a1d3c478SJan Schmidt 	total_devs = bbio->num_stripes;
629453b381b3SDavid Woodhouse 	bbio->orig_bio = first_bio;
629553b381b3SDavid Woodhouse 	bbio->private = first_bio->bi_private;
629653b381b3SDavid Woodhouse 	bbio->end_io = first_bio->bi_end_io;
62970b246afaSJeff Mahoney 	bbio->fs_info = fs_info;
629853b381b3SDavid Woodhouse 	atomic_set(&bbio->stripes_pending, bbio->num_stripes);
629953b381b3SDavid Woodhouse 
6300ad1ba2a0SZhao Lei 	if ((bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) &&
630137226b21SMike Christie 	    ((bio_op(bio) == REQ_OP_WRITE) || (mirror_num > 1))) {
630253b381b3SDavid Woodhouse 		/* In this case, map_length has been set to the length of
630353b381b3SDavid Woodhouse 		   a single stripe; not the whole write */
630437226b21SMike Christie 		if (bio_op(bio) == REQ_OP_WRITE) {
63052ff7e61eSJeff Mahoney 			ret = raid56_parity_write(fs_info, bio, bbio,
63062ff7e61eSJeff Mahoney 						  map_length);
630753b381b3SDavid Woodhouse 		} else {
63082ff7e61eSJeff Mahoney 			ret = raid56_parity_recover(fs_info, bio, bbio,
63092ff7e61eSJeff Mahoney 						    map_length, mirror_num, 1);
631053b381b3SDavid Woodhouse 		}
63114245215dSMiao Xie 
63120b246afaSJeff Mahoney 		btrfs_bio_counter_dec(fs_info);
631358efbc9fSOmar Sandoval 		return errno_to_blk_status(ret);
631453b381b3SDavid Woodhouse 	}
631553b381b3SDavid Woodhouse 
6316239b14b3SChris Mason 	if (map_length < length) {
63170b246afaSJeff Mahoney 		btrfs_crit(fs_info,
63185d163e0eSJeff Mahoney 			   "mapping failed logical %llu bio len %llu len %llu",
6319c1c9ff7cSGeert Uytterhoeven 			   logical, length, map_length);
6320239b14b3SChris Mason 		BUG();
6321239b14b3SChris Mason 	}
6322a1d3c478SJan Schmidt 
632308da757dSZhao Lei 	for (dev_nr = 0; dev_nr < total_devs; dev_nr++) {
6324de1ee92aSJosef Bacik 		dev = bbio->stripes[dev_nr].dev;
632537226b21SMike Christie 		if (!dev || !dev->bdev ||
6326ebbede42SAnand Jain 		    (bio_op(first_bio) == REQ_OP_WRITE &&
6327ebbede42SAnand Jain 		    !test_bit(BTRFS_DEV_STATE_WRITEABLE, &dev->dev_state))) {
6328de1ee92aSJosef Bacik 			bbio_error(bbio, first_bio, logical);
6329de1ee92aSJosef Bacik 			continue;
6330de1ee92aSJosef Bacik 		}
6331de1ee92aSJosef Bacik 
63323aa8e074SDavid Sterba 		if (dev_nr < total_devs - 1)
63338b6c1d56SDavid Sterba 			bio = btrfs_bio_clone(first_bio);
63343aa8e074SDavid Sterba 		else
63358790d502SChris Mason 			bio = first_bio;
6336606686eeSJosef Bacik 
63372ff7e61eSJeff Mahoney 		submit_stripe_bio(bbio, bio, bbio->stripes[dev_nr].physical,
63382ff7e61eSJeff Mahoney 				  dev_nr, async_submit);
63398790d502SChris Mason 	}
63400b246afaSJeff Mahoney 	btrfs_bio_counter_dec(fs_info);
634158efbc9fSOmar Sandoval 	return BLK_STS_OK;
63420b86a832SChris Mason }
63430b86a832SChris Mason 
6344aa1b8cd4SStefan Behrens struct btrfs_device *btrfs_find_device(struct btrfs_fs_info *fs_info, u64 devid,
63452b82032cSYan Zheng 				       u8 *uuid, u8 *fsid)
63460b86a832SChris Mason {
63472b82032cSYan Zheng 	struct btrfs_device *device;
63482b82032cSYan Zheng 	struct btrfs_fs_devices *cur_devices;
63490b86a832SChris Mason 
6350aa1b8cd4SStefan Behrens 	cur_devices = fs_info->fs_devices;
63512b82032cSYan Zheng 	while (cur_devices) {
63522b82032cSYan Zheng 		if (!fsid ||
635344880fdcSAnand Jain 		    !memcmp(cur_devices->fsid, fsid, BTRFS_FSID_SIZE)) {
635435c70103SDavid Sterba 			device = find_device(cur_devices, devid, uuid);
63552b82032cSYan Zheng 			if (device)
63562b82032cSYan Zheng 				return device;
63572b82032cSYan Zheng 		}
63582b82032cSYan Zheng 		cur_devices = cur_devices->seed;
63592b82032cSYan Zheng 	}
63602b82032cSYan Zheng 	return NULL;
63610b86a832SChris Mason }
63620b86a832SChris Mason 
63632ff7e61eSJeff Mahoney static struct btrfs_device *add_missing_dev(struct btrfs_fs_devices *fs_devices,
6364dfe25020SChris Mason 					    u64 devid, u8 *dev_uuid)
6365dfe25020SChris Mason {
6366dfe25020SChris Mason 	struct btrfs_device *device;
6367dfe25020SChris Mason 
636812bd2fc0SIlya Dryomov 	device = btrfs_alloc_device(NULL, &devid, dev_uuid);
636912bd2fc0SIlya Dryomov 	if (IS_ERR(device))
6370adfb69afSAnand Jain 		return device;
637112bd2fc0SIlya Dryomov 
637212bd2fc0SIlya Dryomov 	list_add(&device->dev_list, &fs_devices->devices);
6373e4404d6eSYan Zheng 	device->fs_devices = fs_devices;
6374dfe25020SChris Mason 	fs_devices->num_devices++;
637512bd2fc0SIlya Dryomov 
6376e6e674bdSAnand Jain 	set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
6377cd02dca5SChris Mason 	fs_devices->missing_devices++;
637812bd2fc0SIlya Dryomov 
6379dfe25020SChris Mason 	return device;
6380dfe25020SChris Mason }
6381dfe25020SChris Mason 
638212bd2fc0SIlya Dryomov /**
638312bd2fc0SIlya Dryomov  * btrfs_alloc_device - allocate struct btrfs_device
638412bd2fc0SIlya Dryomov  * @fs_info:	used only for generating a new devid, can be NULL if
638512bd2fc0SIlya Dryomov  *		devid is provided (i.e. @devid != NULL).
638612bd2fc0SIlya Dryomov  * @devid:	a pointer to devid for this device.  If NULL a new devid
638712bd2fc0SIlya Dryomov  *		is generated.
638812bd2fc0SIlya Dryomov  * @uuid:	a pointer to UUID for this device.  If NULL a new UUID
638912bd2fc0SIlya Dryomov  *		is generated.
639012bd2fc0SIlya Dryomov  *
639112bd2fc0SIlya Dryomov  * Return: a pointer to a new &struct btrfs_device on success; ERR_PTR()
639248dae9cfSDavid Sterba  * on error.  Returned struct is not linked onto any lists and must be
639348dae9cfSDavid Sterba  * destroyed with free_device.
639412bd2fc0SIlya Dryomov  */
639512bd2fc0SIlya Dryomov struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
639612bd2fc0SIlya Dryomov 					const u64 *devid,
639712bd2fc0SIlya Dryomov 					const u8 *uuid)
639812bd2fc0SIlya Dryomov {
639912bd2fc0SIlya Dryomov 	struct btrfs_device *dev;
640012bd2fc0SIlya Dryomov 	u64 tmp;
640112bd2fc0SIlya Dryomov 
6402fae7f21cSDulshani Gunawardhana 	if (WARN_ON(!devid && !fs_info))
640312bd2fc0SIlya Dryomov 		return ERR_PTR(-EINVAL);
640412bd2fc0SIlya Dryomov 
640512bd2fc0SIlya Dryomov 	dev = __alloc_device();
640612bd2fc0SIlya Dryomov 	if (IS_ERR(dev))
640712bd2fc0SIlya Dryomov 		return dev;
640812bd2fc0SIlya Dryomov 
640912bd2fc0SIlya Dryomov 	if (devid)
641012bd2fc0SIlya Dryomov 		tmp = *devid;
641112bd2fc0SIlya Dryomov 	else {
641212bd2fc0SIlya Dryomov 		int ret;
641312bd2fc0SIlya Dryomov 
641412bd2fc0SIlya Dryomov 		ret = find_next_devid(fs_info, &tmp);
641512bd2fc0SIlya Dryomov 		if (ret) {
641655de4803SDavid Sterba 			free_device(dev);
641712bd2fc0SIlya Dryomov 			return ERR_PTR(ret);
641812bd2fc0SIlya Dryomov 		}
641912bd2fc0SIlya Dryomov 	}
642012bd2fc0SIlya Dryomov 	dev->devid = tmp;
642112bd2fc0SIlya Dryomov 
642212bd2fc0SIlya Dryomov 	if (uuid)
642312bd2fc0SIlya Dryomov 		memcpy(dev->uuid, uuid, BTRFS_UUID_SIZE);
642412bd2fc0SIlya Dryomov 	else
642512bd2fc0SIlya Dryomov 		generate_random_uuid(dev->uuid);
642612bd2fc0SIlya Dryomov 
64279e0af237SLiu Bo 	btrfs_init_work(&dev->work, btrfs_submit_helper,
64289e0af237SLiu Bo 			pending_bios_fn, NULL, NULL);
642912bd2fc0SIlya Dryomov 
643012bd2fc0SIlya Dryomov 	return dev;
643112bd2fc0SIlya Dryomov }
643212bd2fc0SIlya Dryomov 
6433e06cd3ddSLiu Bo /* Return -EIO if any error, otherwise return 0. */
64342ff7e61eSJeff Mahoney static int btrfs_check_chunk_valid(struct btrfs_fs_info *fs_info,
6435e06cd3ddSLiu Bo 				   struct extent_buffer *leaf,
6436e06cd3ddSLiu Bo 				   struct btrfs_chunk *chunk, u64 logical)
6437e06cd3ddSLiu Bo {
6438e06cd3ddSLiu Bo 	u64 length;
6439e06cd3ddSLiu Bo 	u64 stripe_len;
6440e06cd3ddSLiu Bo 	u16 num_stripes;
6441e06cd3ddSLiu Bo 	u16 sub_stripes;
6442e06cd3ddSLiu Bo 	u64 type;
6443e06cd3ddSLiu Bo 
6444e06cd3ddSLiu Bo 	length = btrfs_chunk_length(leaf, chunk);
6445e06cd3ddSLiu Bo 	stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6446e06cd3ddSLiu Bo 	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6447e06cd3ddSLiu Bo 	sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6448e06cd3ddSLiu Bo 	type = btrfs_chunk_type(leaf, chunk);
6449e06cd3ddSLiu Bo 
6450e06cd3ddSLiu Bo 	if (!num_stripes) {
64510b246afaSJeff Mahoney 		btrfs_err(fs_info, "invalid chunk num_stripes: %u",
6452e06cd3ddSLiu Bo 			  num_stripes);
6453e06cd3ddSLiu Bo 		return -EIO;
6454e06cd3ddSLiu Bo 	}
64550b246afaSJeff Mahoney 	if (!IS_ALIGNED(logical, fs_info->sectorsize)) {
64560b246afaSJeff Mahoney 		btrfs_err(fs_info, "invalid chunk logical %llu", logical);
6457e06cd3ddSLiu Bo 		return -EIO;
6458e06cd3ddSLiu Bo 	}
64590b246afaSJeff Mahoney 	if (btrfs_chunk_sector_size(leaf, chunk) != fs_info->sectorsize) {
64600b246afaSJeff Mahoney 		btrfs_err(fs_info, "invalid chunk sectorsize %u",
6461e06cd3ddSLiu Bo 			  btrfs_chunk_sector_size(leaf, chunk));
6462e06cd3ddSLiu Bo 		return -EIO;
6463e06cd3ddSLiu Bo 	}
64640b246afaSJeff Mahoney 	if (!length || !IS_ALIGNED(length, fs_info->sectorsize)) {
64650b246afaSJeff Mahoney 		btrfs_err(fs_info, "invalid chunk length %llu", length);
6466e06cd3ddSLiu Bo 		return -EIO;
6467e06cd3ddSLiu Bo 	}
6468e06cd3ddSLiu Bo 	if (!is_power_of_2(stripe_len) || stripe_len != BTRFS_STRIPE_LEN) {
64690b246afaSJeff Mahoney 		btrfs_err(fs_info, "invalid chunk stripe length: %llu",
6470e06cd3ddSLiu Bo 			  stripe_len);
6471e06cd3ddSLiu Bo 		return -EIO;
6472e06cd3ddSLiu Bo 	}
6473e06cd3ddSLiu Bo 	if (~(BTRFS_BLOCK_GROUP_TYPE_MASK | BTRFS_BLOCK_GROUP_PROFILE_MASK) &
6474e06cd3ddSLiu Bo 	    type) {
64750b246afaSJeff Mahoney 		btrfs_err(fs_info, "unrecognized chunk type: %llu",
6476e06cd3ddSLiu Bo 			  ~(BTRFS_BLOCK_GROUP_TYPE_MASK |
6477e06cd3ddSLiu Bo 			    BTRFS_BLOCK_GROUP_PROFILE_MASK) &
6478e06cd3ddSLiu Bo 			  btrfs_chunk_type(leaf, chunk));
6479e06cd3ddSLiu Bo 		return -EIO;
6480e06cd3ddSLiu Bo 	}
6481e06cd3ddSLiu Bo 	if ((type & BTRFS_BLOCK_GROUP_RAID10 && sub_stripes != 2) ||
6482e06cd3ddSLiu Bo 	    (type & BTRFS_BLOCK_GROUP_RAID1 && num_stripes < 1) ||
6483e06cd3ddSLiu Bo 	    (type & BTRFS_BLOCK_GROUP_RAID5 && num_stripes < 2) ||
6484e06cd3ddSLiu Bo 	    (type & BTRFS_BLOCK_GROUP_RAID6 && num_stripes < 3) ||
6485e06cd3ddSLiu Bo 	    (type & BTRFS_BLOCK_GROUP_DUP && num_stripes > 2) ||
6486e06cd3ddSLiu Bo 	    ((type & BTRFS_BLOCK_GROUP_PROFILE_MASK) == 0 &&
6487e06cd3ddSLiu Bo 	     num_stripes != 1)) {
64880b246afaSJeff Mahoney 		btrfs_err(fs_info,
6489e06cd3ddSLiu Bo 			"invalid num_stripes:sub_stripes %u:%u for profile %llu",
6490e06cd3ddSLiu Bo 			num_stripes, sub_stripes,
6491e06cd3ddSLiu Bo 			type & BTRFS_BLOCK_GROUP_PROFILE_MASK);
6492e06cd3ddSLiu Bo 		return -EIO;
6493e06cd3ddSLiu Bo 	}
6494e06cd3ddSLiu Bo 
6495e06cd3ddSLiu Bo 	return 0;
6496e06cd3ddSLiu Bo }
6497e06cd3ddSLiu Bo 
64985a2b8e60SAnand Jain static void btrfs_report_missing_device(struct btrfs_fs_info *fs_info,
64992b902dfcSAnand Jain 					u64 devid, u8 *uuid, bool error)
65005a2b8e60SAnand Jain {
65012b902dfcSAnand Jain 	if (error)
65022b902dfcSAnand Jain 		btrfs_err_rl(fs_info, "devid %llu uuid %pU is missing",
65032b902dfcSAnand Jain 			      devid, uuid);
65042b902dfcSAnand Jain 	else
65052b902dfcSAnand Jain 		btrfs_warn_rl(fs_info, "devid %llu uuid %pU is missing",
65062b902dfcSAnand Jain 			      devid, uuid);
65075a2b8e60SAnand Jain }
65085a2b8e60SAnand Jain 
65092ff7e61eSJeff Mahoney static int read_one_chunk(struct btrfs_fs_info *fs_info, struct btrfs_key *key,
65100b86a832SChris Mason 			  struct extent_buffer *leaf,
65110b86a832SChris Mason 			  struct btrfs_chunk *chunk)
65120b86a832SChris Mason {
65130b246afaSJeff Mahoney 	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
65140b86a832SChris Mason 	struct map_lookup *map;
65150b86a832SChris Mason 	struct extent_map *em;
65160b86a832SChris Mason 	u64 logical;
65170b86a832SChris Mason 	u64 length;
65180b86a832SChris Mason 	u64 devid;
6519a443755fSChris Mason 	u8 uuid[BTRFS_UUID_SIZE];
6520593060d7SChris Mason 	int num_stripes;
65210b86a832SChris Mason 	int ret;
6522593060d7SChris Mason 	int i;
65230b86a832SChris Mason 
6524e17cade2SChris Mason 	logical = key->offset;
6525e17cade2SChris Mason 	length = btrfs_chunk_length(leaf, chunk);
6526f04b772bSQu Wenruo 	num_stripes = btrfs_chunk_num_stripes(leaf, chunk);
6527e06cd3ddSLiu Bo 
65282ff7e61eSJeff Mahoney 	ret = btrfs_check_chunk_valid(fs_info, leaf, chunk, logical);
6529e06cd3ddSLiu Bo 	if (ret)
6530e06cd3ddSLiu Bo 		return ret;
6531a061fc8dSChris Mason 
6532890871beSChris Mason 	read_lock(&map_tree->map_tree.lock);
65330b86a832SChris Mason 	em = lookup_extent_mapping(&map_tree->map_tree, logical, 1);
6534890871beSChris Mason 	read_unlock(&map_tree->map_tree.lock);
65350b86a832SChris Mason 
65360b86a832SChris Mason 	/* already mapped? */
65370b86a832SChris Mason 	if (em && em->start <= logical && em->start + em->len > logical) {
65380b86a832SChris Mason 		free_extent_map(em);
65390b86a832SChris Mason 		return 0;
65400b86a832SChris Mason 	} else if (em) {
65410b86a832SChris Mason 		free_extent_map(em);
65420b86a832SChris Mason 	}
65430b86a832SChris Mason 
6544172ddd60SDavid Sterba 	em = alloc_extent_map();
65450b86a832SChris Mason 	if (!em)
65460b86a832SChris Mason 		return -ENOMEM;
6547593060d7SChris Mason 	map = kmalloc(map_lookup_size(num_stripes), GFP_NOFS);
65480b86a832SChris Mason 	if (!map) {
65490b86a832SChris Mason 		free_extent_map(em);
65500b86a832SChris Mason 		return -ENOMEM;
65510b86a832SChris Mason 	}
65520b86a832SChris Mason 
6553298a8f9cSWang Shilong 	set_bit(EXTENT_FLAG_FS_MAPPING, &em->flags);
655495617d69SJeff Mahoney 	em->map_lookup = map;
65550b86a832SChris Mason 	em->start = logical;
65560b86a832SChris Mason 	em->len = length;
655770c8a91cSJosef Bacik 	em->orig_start = 0;
65580b86a832SChris Mason 	em->block_start = 0;
6559c8b97818SChris Mason 	em->block_len = em->len;
65600b86a832SChris Mason 
6561593060d7SChris Mason 	map->num_stripes = num_stripes;
6562593060d7SChris Mason 	map->io_width = btrfs_chunk_io_width(leaf, chunk);
6563593060d7SChris Mason 	map->io_align = btrfs_chunk_io_align(leaf, chunk);
6564593060d7SChris Mason 	map->stripe_len = btrfs_chunk_stripe_len(leaf, chunk);
6565593060d7SChris Mason 	map->type = btrfs_chunk_type(leaf, chunk);
6566321aecc6SChris Mason 	map->sub_stripes = btrfs_chunk_sub_stripes(leaf, chunk);
6567593060d7SChris Mason 	for (i = 0; i < num_stripes; i++) {
6568593060d7SChris Mason 		map->stripes[i].physical =
6569593060d7SChris Mason 			btrfs_stripe_offset_nr(leaf, chunk, i);
6570593060d7SChris Mason 		devid = btrfs_stripe_devid_nr(leaf, chunk, i);
6571a443755fSChris Mason 		read_extent_buffer(leaf, uuid, (unsigned long)
6572a443755fSChris Mason 				   btrfs_stripe_dev_uuid_nr(chunk, i),
6573a443755fSChris Mason 				   BTRFS_UUID_SIZE);
65740b246afaSJeff Mahoney 		map->stripes[i].dev = btrfs_find_device(fs_info, devid,
6575aa1b8cd4SStefan Behrens 							uuid, NULL);
65763cdde224SJeff Mahoney 		if (!map->stripes[i].dev &&
65770b246afaSJeff Mahoney 		    !btrfs_test_opt(fs_info, DEGRADED)) {
6578dfe25020SChris Mason 			free_extent_map(em);
65792b902dfcSAnand Jain 			btrfs_report_missing_device(fs_info, devid, uuid, true);
658045dbdbc9SAnand Jain 			return -ENOENT;
6581dfe25020SChris Mason 		}
6582dfe25020SChris Mason 		if (!map->stripes[i].dev) {
6583dfe25020SChris Mason 			map->stripes[i].dev =
65842ff7e61eSJeff Mahoney 				add_missing_dev(fs_info->fs_devices, devid,
65852ff7e61eSJeff Mahoney 						uuid);
6586adfb69afSAnand Jain 			if (IS_ERR(map->stripes[i].dev)) {
65870b86a832SChris Mason 				free_extent_map(em);
6588adfb69afSAnand Jain 				btrfs_err(fs_info,
6589adfb69afSAnand Jain 					"failed to init missing dev %llu: %ld",
6590adfb69afSAnand Jain 					devid, PTR_ERR(map->stripes[i].dev));
6591adfb69afSAnand Jain 				return PTR_ERR(map->stripes[i].dev);
65920b86a832SChris Mason 			}
65932b902dfcSAnand Jain 			btrfs_report_missing_device(fs_info, devid, uuid, false);
6594593060d7SChris Mason 		}
6595e12c9621SAnand Jain 		set_bit(BTRFS_DEV_STATE_IN_FS_METADATA,
6596e12c9621SAnand Jain 				&(map->stripes[i].dev->dev_state));
6597e12c9621SAnand Jain 
6598dfe25020SChris Mason 	}
65990b86a832SChris Mason 
6600890871beSChris Mason 	write_lock(&map_tree->map_tree.lock);
660109a2a8f9SJosef Bacik 	ret = add_extent_mapping(&map_tree->map_tree, em, 0);
6602890871beSChris Mason 	write_unlock(&map_tree->map_tree.lock);
660379787eaaSJeff Mahoney 	BUG_ON(ret); /* Tree corruption */
66040b86a832SChris Mason 	free_extent_map(em);
66050b86a832SChris Mason 
66060b86a832SChris Mason 	return 0;
66070b86a832SChris Mason }
66080b86a832SChris Mason 
6609143bede5SJeff Mahoney static void fill_device_from_item(struct extent_buffer *leaf,
66100b86a832SChris Mason 				 struct btrfs_dev_item *dev_item,
66110b86a832SChris Mason 				 struct btrfs_device *device)
66120b86a832SChris Mason {
66130b86a832SChris Mason 	unsigned long ptr;
66140b86a832SChris Mason 
66150b86a832SChris Mason 	device->devid = btrfs_device_id(leaf, dev_item);
6616d6397baeSChris Ball 	device->disk_total_bytes = btrfs_device_total_bytes(leaf, dev_item);
6617d6397baeSChris Ball 	device->total_bytes = device->disk_total_bytes;
6618935e5cc9SMiao Xie 	device->commit_total_bytes = device->disk_total_bytes;
66190b86a832SChris Mason 	device->bytes_used = btrfs_device_bytes_used(leaf, dev_item);
6620ce7213c7SMiao Xie 	device->commit_bytes_used = device->bytes_used;
66210b86a832SChris Mason 	device->type = btrfs_device_type(leaf, dev_item);
66220b86a832SChris Mason 	device->io_align = btrfs_device_io_align(leaf, dev_item);
66230b86a832SChris Mason 	device->io_width = btrfs_device_io_width(leaf, dev_item);
66240b86a832SChris Mason 	device->sector_size = btrfs_device_sector_size(leaf, dev_item);
66258dabb742SStefan Behrens 	WARN_ON(device->devid == BTRFS_DEV_REPLACE_DEVID);
6626401e29c1SAnand Jain 	clear_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state);
66270b86a832SChris Mason 
6628410ba3a2SGeert Uytterhoeven 	ptr = btrfs_device_uuid(dev_item);
6629e17cade2SChris Mason 	read_extent_buffer(leaf, device->uuid, ptr, BTRFS_UUID_SIZE);
66300b86a832SChris Mason }
66310b86a832SChris Mason 
66322ff7e61eSJeff Mahoney static struct btrfs_fs_devices *open_seed_devices(struct btrfs_fs_info *fs_info,
66335f375835SMiao Xie 						  u8 *fsid)
66342b82032cSYan Zheng {
66352b82032cSYan Zheng 	struct btrfs_fs_devices *fs_devices;
66362b82032cSYan Zheng 	int ret;
66372b82032cSYan Zheng 
6638b367e47fSLi Zefan 	BUG_ON(!mutex_is_locked(&uuid_mutex));
66392dfeca9bSDavid Sterba 	ASSERT(fsid);
66402b82032cSYan Zheng 
66410b246afaSJeff Mahoney 	fs_devices = fs_info->fs_devices->seed;
66422b82032cSYan Zheng 	while (fs_devices) {
664344880fdcSAnand Jain 		if (!memcmp(fs_devices->fsid, fsid, BTRFS_FSID_SIZE))
66445f375835SMiao Xie 			return fs_devices;
66455f375835SMiao Xie 
66462b82032cSYan Zheng 		fs_devices = fs_devices->seed;
66472b82032cSYan Zheng 	}
66482b82032cSYan Zheng 
66492b82032cSYan Zheng 	fs_devices = find_fsid(fsid);
66502b82032cSYan Zheng 	if (!fs_devices) {
66510b246afaSJeff Mahoney 		if (!btrfs_test_opt(fs_info, DEGRADED))
66525f375835SMiao Xie 			return ERR_PTR(-ENOENT);
66535f375835SMiao Xie 
66545f375835SMiao Xie 		fs_devices = alloc_fs_devices(fsid);
66555f375835SMiao Xie 		if (IS_ERR(fs_devices))
66565f375835SMiao Xie 			return fs_devices;
66575f375835SMiao Xie 
66585f375835SMiao Xie 		fs_devices->seeding = 1;
66595f375835SMiao Xie 		fs_devices->opened = 1;
66605f375835SMiao Xie 		return fs_devices;
66612b82032cSYan Zheng 	}
6662e4404d6eSYan Zheng 
6663e4404d6eSYan Zheng 	fs_devices = clone_fs_devices(fs_devices);
66645f375835SMiao Xie 	if (IS_ERR(fs_devices))
66655f375835SMiao Xie 		return fs_devices;
66662b82032cSYan Zheng 
666797288f2cSChristoph Hellwig 	ret = __btrfs_open_devices(fs_devices, FMODE_READ,
66680b246afaSJeff Mahoney 				   fs_info->bdev_holder);
666948d28232SJulia Lawall 	if (ret) {
667048d28232SJulia Lawall 		free_fs_devices(fs_devices);
66715f375835SMiao Xie 		fs_devices = ERR_PTR(ret);
66722b82032cSYan Zheng 		goto out;
667348d28232SJulia Lawall 	}
66742b82032cSYan Zheng 
66752b82032cSYan Zheng 	if (!fs_devices->seeding) {
66762b82032cSYan Zheng 		__btrfs_close_devices(fs_devices);
6677e4404d6eSYan Zheng 		free_fs_devices(fs_devices);
66785f375835SMiao Xie 		fs_devices = ERR_PTR(-EINVAL);
66792b82032cSYan Zheng 		goto out;
66802b82032cSYan Zheng 	}
66812b82032cSYan Zheng 
66820b246afaSJeff Mahoney 	fs_devices->seed = fs_info->fs_devices->seed;
66830b246afaSJeff Mahoney 	fs_info->fs_devices->seed = fs_devices;
66842b82032cSYan Zheng out:
66855f375835SMiao Xie 	return fs_devices;
66862b82032cSYan Zheng }
66872b82032cSYan Zheng 
66882ff7e61eSJeff Mahoney static int read_one_dev(struct btrfs_fs_info *fs_info,
66890b86a832SChris Mason 			struct extent_buffer *leaf,
66900b86a832SChris Mason 			struct btrfs_dev_item *dev_item)
66910b86a832SChris Mason {
66920b246afaSJeff Mahoney 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
66930b86a832SChris Mason 	struct btrfs_device *device;
66940b86a832SChris Mason 	u64 devid;
66950b86a832SChris Mason 	int ret;
669644880fdcSAnand Jain 	u8 fs_uuid[BTRFS_FSID_SIZE];
6697a443755fSChris Mason 	u8 dev_uuid[BTRFS_UUID_SIZE];
6698a443755fSChris Mason 
66990b86a832SChris Mason 	devid = btrfs_device_id(leaf, dev_item);
6700410ba3a2SGeert Uytterhoeven 	read_extent_buffer(leaf, dev_uuid, btrfs_device_uuid(dev_item),
6701a443755fSChris Mason 			   BTRFS_UUID_SIZE);
67021473b24eSGeert Uytterhoeven 	read_extent_buffer(leaf, fs_uuid, btrfs_device_fsid(dev_item),
670344880fdcSAnand Jain 			   BTRFS_FSID_SIZE);
67042b82032cSYan Zheng 
670544880fdcSAnand Jain 	if (memcmp(fs_uuid, fs_info->fsid, BTRFS_FSID_SIZE)) {
67062ff7e61eSJeff Mahoney 		fs_devices = open_seed_devices(fs_info, fs_uuid);
67075f375835SMiao Xie 		if (IS_ERR(fs_devices))
67085f375835SMiao Xie 			return PTR_ERR(fs_devices);
67092b82032cSYan Zheng 	}
67102b82032cSYan Zheng 
67110b246afaSJeff Mahoney 	device = btrfs_find_device(fs_info, devid, dev_uuid, fs_uuid);
67125f375835SMiao Xie 	if (!device) {
6713c5502451SQu Wenruo 		if (!btrfs_test_opt(fs_info, DEGRADED)) {
67142b902dfcSAnand Jain 			btrfs_report_missing_device(fs_info, devid,
67152b902dfcSAnand Jain 							dev_uuid, true);
671645dbdbc9SAnand Jain 			return -ENOENT;
6717c5502451SQu Wenruo 		}
67182b82032cSYan Zheng 
67192ff7e61eSJeff Mahoney 		device = add_missing_dev(fs_devices, devid, dev_uuid);
6720adfb69afSAnand Jain 		if (IS_ERR(device)) {
6721adfb69afSAnand Jain 			btrfs_err(fs_info,
6722adfb69afSAnand Jain 				"failed to add missing dev %llu: %ld",
6723adfb69afSAnand Jain 				devid, PTR_ERR(device));
6724adfb69afSAnand Jain 			return PTR_ERR(device);
6725adfb69afSAnand Jain 		}
67262b902dfcSAnand Jain 		btrfs_report_missing_device(fs_info, devid, dev_uuid, false);
67275f375835SMiao Xie 	} else {
6728c5502451SQu Wenruo 		if (!device->bdev) {
67292b902dfcSAnand Jain 			if (!btrfs_test_opt(fs_info, DEGRADED)) {
67302b902dfcSAnand Jain 				btrfs_report_missing_device(fs_info,
67312b902dfcSAnand Jain 						devid, dev_uuid, true);
673245dbdbc9SAnand Jain 				return -ENOENT;
6733c5502451SQu Wenruo 			}
67342b902dfcSAnand Jain 			btrfs_report_missing_device(fs_info, devid,
67352b902dfcSAnand Jain 							dev_uuid, false);
67362b902dfcSAnand Jain 		}
67375f375835SMiao Xie 
6738e6e674bdSAnand Jain 		if (!device->bdev &&
6739e6e674bdSAnand Jain 		    !test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state)) {
6740cd02dca5SChris Mason 			/*
6741cd02dca5SChris Mason 			 * this happens when a device that was properly setup
6742cd02dca5SChris Mason 			 * in the device info lists suddenly goes bad.
6743cd02dca5SChris Mason 			 * device->bdev is NULL, and so we have to set
6744cd02dca5SChris Mason 			 * device->missing to one here
6745cd02dca5SChris Mason 			 */
67465f375835SMiao Xie 			device->fs_devices->missing_devices++;
6747e6e674bdSAnand Jain 			set_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state);
67486324fbf3SChris Mason 		}
67495f375835SMiao Xie 
67505f375835SMiao Xie 		/* Move the device to its own fs_devices */
67515f375835SMiao Xie 		if (device->fs_devices != fs_devices) {
6752e6e674bdSAnand Jain 			ASSERT(test_bit(BTRFS_DEV_STATE_MISSING,
6753e6e674bdSAnand Jain 							&device->dev_state));
67545f375835SMiao Xie 
67555f375835SMiao Xie 			list_move(&device->dev_list, &fs_devices->devices);
67565f375835SMiao Xie 			device->fs_devices->num_devices--;
67575f375835SMiao Xie 			fs_devices->num_devices++;
67585f375835SMiao Xie 
67595f375835SMiao Xie 			device->fs_devices->missing_devices--;
67605f375835SMiao Xie 			fs_devices->missing_devices++;
67615f375835SMiao Xie 
67625f375835SMiao Xie 			device->fs_devices = fs_devices;
67635f375835SMiao Xie 		}
67642b82032cSYan Zheng 	}
67652b82032cSYan Zheng 
67660b246afaSJeff Mahoney 	if (device->fs_devices != fs_info->fs_devices) {
6767ebbede42SAnand Jain 		BUG_ON(test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state));
67682b82032cSYan Zheng 		if (device->generation !=
67692b82032cSYan Zheng 		    btrfs_device_generation(leaf, dev_item))
67702b82032cSYan Zheng 			return -EINVAL;
67712b82032cSYan Zheng 	}
67720b86a832SChris Mason 
67730b86a832SChris Mason 	fill_device_from_item(leaf, dev_item, device);
6774e12c9621SAnand Jain 	set_bit(BTRFS_DEV_STATE_IN_FS_METADATA, &device->dev_state);
6775ebbede42SAnand Jain 	if (test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state) &&
6776401e29c1SAnand Jain 	   !test_bit(BTRFS_DEV_STATE_REPLACE_TGT, &device->dev_state)) {
67772b82032cSYan Zheng 		device->fs_devices->total_rw_bytes += device->total_bytes;
6778a5ed45f8SNikolay Borisov 		atomic64_add(device->total_bytes - device->bytes_used,
6779a5ed45f8SNikolay Borisov 				&fs_info->free_chunk_space);
67802bf64758SJosef Bacik 	}
67810b86a832SChris Mason 	ret = 0;
67820b86a832SChris Mason 	return ret;
67830b86a832SChris Mason }
67840b86a832SChris Mason 
67856bccf3abSJeff Mahoney int btrfs_read_sys_array(struct btrfs_fs_info *fs_info)
67860b86a832SChris Mason {
67876bccf3abSJeff Mahoney 	struct btrfs_root *root = fs_info->tree_root;
6788ab8d0fc4SJeff Mahoney 	struct btrfs_super_block *super_copy = fs_info->super_copy;
6789a061fc8dSChris Mason 	struct extent_buffer *sb;
67900b86a832SChris Mason 	struct btrfs_disk_key *disk_key;
67910b86a832SChris Mason 	struct btrfs_chunk *chunk;
67921ffb22cfSDavid Sterba 	u8 *array_ptr;
67931ffb22cfSDavid Sterba 	unsigned long sb_array_offset;
679484eed90fSChris Mason 	int ret = 0;
67950b86a832SChris Mason 	u32 num_stripes;
67960b86a832SChris Mason 	u32 array_size;
67970b86a832SChris Mason 	u32 len = 0;
67981ffb22cfSDavid Sterba 	u32 cur_offset;
6799e06cd3ddSLiu Bo 	u64 type;
680084eed90fSChris Mason 	struct btrfs_key key;
68010b86a832SChris Mason 
68020b246afaSJeff Mahoney 	ASSERT(BTRFS_SUPER_INFO_SIZE <= fs_info->nodesize);
6803a83fffb7SDavid Sterba 	/*
6804a83fffb7SDavid Sterba 	 * This will create extent buffer of nodesize, superblock size is
6805a83fffb7SDavid Sterba 	 * fixed to BTRFS_SUPER_INFO_SIZE. If nodesize > sb size, this will
6806a83fffb7SDavid Sterba 	 * overallocate but we can keep it as-is, only the first page is used.
6807a83fffb7SDavid Sterba 	 */
68082ff7e61eSJeff Mahoney 	sb = btrfs_find_create_tree_block(fs_info, BTRFS_SUPER_INFO_OFFSET);
6809c871b0f2SLiu Bo 	if (IS_ERR(sb))
6810c871b0f2SLiu Bo 		return PTR_ERR(sb);
68114db8c528SDavid Sterba 	set_extent_buffer_uptodate(sb);
681285d4e461SChris Mason 	btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0);
68138a334426SDavid Sterba 	/*
681401327610SNicholas D Steeves 	 * The sb extent buffer is artificial and just used to read the system array.
68154db8c528SDavid Sterba 	 * set_extent_buffer_uptodate() call does not properly mark all it's
68168a334426SDavid Sterba 	 * pages up-to-date when the page is larger: extent does not cover the
68178a334426SDavid Sterba 	 * whole page and consequently check_page_uptodate does not find all
68188a334426SDavid Sterba 	 * the page's extents up-to-date (the hole beyond sb),
68198a334426SDavid Sterba 	 * write_extent_buffer then triggers a WARN_ON.
68208a334426SDavid Sterba 	 *
68218a334426SDavid Sterba 	 * Regular short extents go through mark_extent_buffer_dirty/writeback cycle,
68228a334426SDavid Sterba 	 * but sb spans only this function. Add an explicit SetPageUptodate call
68238a334426SDavid Sterba 	 * to silence the warning eg. on PowerPC 64.
68248a334426SDavid Sterba 	 */
682509cbfeafSKirill A. Shutemov 	if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
6826727011e0SChris Mason 		SetPageUptodate(sb->pages[0]);
68274008c04aSChris Mason 
6828a061fc8dSChris Mason 	write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
68290b86a832SChris Mason 	array_size = btrfs_super_sys_array_size(super_copy);
68300b86a832SChris Mason 
68311ffb22cfSDavid Sterba 	array_ptr = super_copy->sys_chunk_array;
68321ffb22cfSDavid Sterba 	sb_array_offset = offsetof(struct btrfs_super_block, sys_chunk_array);
68331ffb22cfSDavid Sterba 	cur_offset = 0;
68340b86a832SChris Mason 
68351ffb22cfSDavid Sterba 	while (cur_offset < array_size) {
68361ffb22cfSDavid Sterba 		disk_key = (struct btrfs_disk_key *)array_ptr;
6837e3540eabSDavid Sterba 		len = sizeof(*disk_key);
6838e3540eabSDavid Sterba 		if (cur_offset + len > array_size)
6839e3540eabSDavid Sterba 			goto out_short_read;
6840e3540eabSDavid Sterba 
68410b86a832SChris Mason 		btrfs_disk_key_to_cpu(&key, disk_key);
68420b86a832SChris Mason 
68431ffb22cfSDavid Sterba 		array_ptr += len;
68441ffb22cfSDavid Sterba 		sb_array_offset += len;
68451ffb22cfSDavid Sterba 		cur_offset += len;
68460b86a832SChris Mason 
68470d81ba5dSChris Mason 		if (key.type == BTRFS_CHUNK_ITEM_KEY) {
68481ffb22cfSDavid Sterba 			chunk = (struct btrfs_chunk *)sb_array_offset;
6849e3540eabSDavid Sterba 			/*
6850e3540eabSDavid Sterba 			 * At least one btrfs_chunk with one stripe must be
6851e3540eabSDavid Sterba 			 * present, exact stripe count check comes afterwards
6852e3540eabSDavid Sterba 			 */
6853e3540eabSDavid Sterba 			len = btrfs_chunk_item_size(1);
6854e3540eabSDavid Sterba 			if (cur_offset + len > array_size)
6855e3540eabSDavid Sterba 				goto out_short_read;
6856e3540eabSDavid Sterba 
6857e3540eabSDavid Sterba 			num_stripes = btrfs_chunk_num_stripes(sb, chunk);
6858f5cdedd7SDavid Sterba 			if (!num_stripes) {
6859ab8d0fc4SJeff Mahoney 				btrfs_err(fs_info,
6860ab8d0fc4SJeff Mahoney 					"invalid number of stripes %u in sys_array at offset %u",
6861f5cdedd7SDavid Sterba 					num_stripes, cur_offset);
6862f5cdedd7SDavid Sterba 				ret = -EIO;
6863f5cdedd7SDavid Sterba 				break;
6864f5cdedd7SDavid Sterba 			}
6865f5cdedd7SDavid Sterba 
6866e06cd3ddSLiu Bo 			type = btrfs_chunk_type(sb, chunk);
6867e06cd3ddSLiu Bo 			if ((type & BTRFS_BLOCK_GROUP_SYSTEM) == 0) {
6868ab8d0fc4SJeff Mahoney 				btrfs_err(fs_info,
6869e06cd3ddSLiu Bo 			    "invalid chunk type %llu in sys_array at offset %u",
6870e06cd3ddSLiu Bo 					type, cur_offset);
6871e06cd3ddSLiu Bo 				ret = -EIO;
6872e06cd3ddSLiu Bo 				break;
6873e06cd3ddSLiu Bo 			}
6874e06cd3ddSLiu Bo 
6875e3540eabSDavid Sterba 			len = btrfs_chunk_item_size(num_stripes);
6876e3540eabSDavid Sterba 			if (cur_offset + len > array_size)
6877e3540eabSDavid Sterba 				goto out_short_read;
6878e3540eabSDavid Sterba 
68792ff7e61eSJeff Mahoney 			ret = read_one_chunk(fs_info, &key, sb, chunk);
688084eed90fSChris Mason 			if (ret)
688184eed90fSChris Mason 				break;
68820b86a832SChris Mason 		} else {
6883ab8d0fc4SJeff Mahoney 			btrfs_err(fs_info,
6884ab8d0fc4SJeff Mahoney 			    "unexpected item type %u in sys_array at offset %u",
688593a3d467SDavid Sterba 				  (u32)key.type, cur_offset);
688684eed90fSChris Mason 			ret = -EIO;
688784eed90fSChris Mason 			break;
68880b86a832SChris Mason 		}
68891ffb22cfSDavid Sterba 		array_ptr += len;
68901ffb22cfSDavid Sterba 		sb_array_offset += len;
68911ffb22cfSDavid Sterba 		cur_offset += len;
68920b86a832SChris Mason 	}
6893d865177aSLiu Bo 	clear_extent_buffer_uptodate(sb);
68941c8b5b6eSLiu Bo 	free_extent_buffer_stale(sb);
689584eed90fSChris Mason 	return ret;
6896e3540eabSDavid Sterba 
6897e3540eabSDavid Sterba out_short_read:
6898ab8d0fc4SJeff Mahoney 	btrfs_err(fs_info, "sys_array too short to read %u bytes at offset %u",
6899e3540eabSDavid Sterba 			len, cur_offset);
6900d865177aSLiu Bo 	clear_extent_buffer_uptodate(sb);
69011c8b5b6eSLiu Bo 	free_extent_buffer_stale(sb);
6902e3540eabSDavid Sterba 	return -EIO;
69030b86a832SChris Mason }
69040b86a832SChris Mason 
690521634a19SQu Wenruo /*
690621634a19SQu Wenruo  * Check if all chunks in the fs are OK for read-write degraded mount
690721634a19SQu Wenruo  *
69086528b99dSAnand Jain  * If the @failing_dev is specified, it's accounted as missing.
69096528b99dSAnand Jain  *
691021634a19SQu Wenruo  * Return true if all chunks meet the minimal RW mount requirements.
691121634a19SQu Wenruo  * Return false if any chunk doesn't meet the minimal RW mount requirements.
691221634a19SQu Wenruo  */
69136528b99dSAnand Jain bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
69146528b99dSAnand Jain 					struct btrfs_device *failing_dev)
691521634a19SQu Wenruo {
691621634a19SQu Wenruo 	struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
691721634a19SQu Wenruo 	struct extent_map *em;
691821634a19SQu Wenruo 	u64 next_start = 0;
691921634a19SQu Wenruo 	bool ret = true;
692021634a19SQu Wenruo 
692121634a19SQu Wenruo 	read_lock(&map_tree->map_tree.lock);
692221634a19SQu Wenruo 	em = lookup_extent_mapping(&map_tree->map_tree, 0, (u64)-1);
692321634a19SQu Wenruo 	read_unlock(&map_tree->map_tree.lock);
692421634a19SQu Wenruo 	/* No chunk at all? Return false anyway */
692521634a19SQu Wenruo 	if (!em) {
692621634a19SQu Wenruo 		ret = false;
692721634a19SQu Wenruo 		goto out;
692821634a19SQu Wenruo 	}
692921634a19SQu Wenruo 	while (em) {
693021634a19SQu Wenruo 		struct map_lookup *map;
693121634a19SQu Wenruo 		int missing = 0;
693221634a19SQu Wenruo 		int max_tolerated;
693321634a19SQu Wenruo 		int i;
693421634a19SQu Wenruo 
693521634a19SQu Wenruo 		map = em->map_lookup;
693621634a19SQu Wenruo 		max_tolerated =
693721634a19SQu Wenruo 			btrfs_get_num_tolerated_disk_barrier_failures(
693821634a19SQu Wenruo 					map->type);
693921634a19SQu Wenruo 		for (i = 0; i < map->num_stripes; i++) {
694021634a19SQu Wenruo 			struct btrfs_device *dev = map->stripes[i].dev;
694121634a19SQu Wenruo 
6942e6e674bdSAnand Jain 			if (!dev || !dev->bdev ||
6943e6e674bdSAnand Jain 			    test_bit(BTRFS_DEV_STATE_MISSING, &dev->dev_state) ||
694421634a19SQu Wenruo 			    dev->last_flush_error)
694521634a19SQu Wenruo 				missing++;
69466528b99dSAnand Jain 			else if (failing_dev && failing_dev == dev)
69476528b99dSAnand Jain 				missing++;
694821634a19SQu Wenruo 		}
694921634a19SQu Wenruo 		if (missing > max_tolerated) {
69506528b99dSAnand Jain 			if (!failing_dev)
695121634a19SQu Wenruo 				btrfs_warn(fs_info,
695221634a19SQu Wenruo 	"chunk %llu missing %d devices, max tolerance is %d for writeable mount",
695321634a19SQu Wenruo 				   em->start, missing, max_tolerated);
695421634a19SQu Wenruo 			free_extent_map(em);
695521634a19SQu Wenruo 			ret = false;
695621634a19SQu Wenruo 			goto out;
695721634a19SQu Wenruo 		}
695821634a19SQu Wenruo 		next_start = extent_map_end(em);
695921634a19SQu Wenruo 		free_extent_map(em);
696021634a19SQu Wenruo 
696121634a19SQu Wenruo 		read_lock(&map_tree->map_tree.lock);
696221634a19SQu Wenruo 		em = lookup_extent_mapping(&map_tree->map_tree, next_start,
696321634a19SQu Wenruo 					   (u64)(-1) - next_start);
696421634a19SQu Wenruo 		read_unlock(&map_tree->map_tree.lock);
696521634a19SQu Wenruo 	}
696621634a19SQu Wenruo out:
696721634a19SQu Wenruo 	return ret;
696821634a19SQu Wenruo }
696921634a19SQu Wenruo 
69705b4aacefSJeff Mahoney int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info)
69710b86a832SChris Mason {
69725b4aacefSJeff Mahoney 	struct btrfs_root *root = fs_info->chunk_root;
69730b86a832SChris Mason 	struct btrfs_path *path;
69740b86a832SChris Mason 	struct extent_buffer *leaf;
69750b86a832SChris Mason 	struct btrfs_key key;
69760b86a832SChris Mason 	struct btrfs_key found_key;
69770b86a832SChris Mason 	int ret;
69780b86a832SChris Mason 	int slot;
697999e3ecfcSLiu Bo 	u64 total_dev = 0;
69800b86a832SChris Mason 
69810b86a832SChris Mason 	path = btrfs_alloc_path();
69820b86a832SChris Mason 	if (!path)
69830b86a832SChris Mason 		return -ENOMEM;
69840b86a832SChris Mason 
6985b367e47fSLi Zefan 	mutex_lock(&uuid_mutex);
698634441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
6987b367e47fSLi Zefan 
6988395927a9SFilipe David Borba Manana 	/*
6989395927a9SFilipe David Borba Manana 	 * Read all device items, and then all the chunk items. All
6990395927a9SFilipe David Borba Manana 	 * device items are found before any chunk item (their object id
6991395927a9SFilipe David Borba Manana 	 * is smaller than the lowest possible object id for a chunk
6992395927a9SFilipe David Borba Manana 	 * item - BTRFS_FIRST_CHUNK_TREE_OBJECTID).
69930b86a832SChris Mason 	 */
69940b86a832SChris Mason 	key.objectid = BTRFS_DEV_ITEMS_OBJECTID;
69950b86a832SChris Mason 	key.offset = 0;
69960b86a832SChris Mason 	key.type = 0;
69970b86a832SChris Mason 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
6998ab59381eSZhao Lei 	if (ret < 0)
6999ab59381eSZhao Lei 		goto error;
70000b86a832SChris Mason 	while (1) {
70010b86a832SChris Mason 		leaf = path->nodes[0];
70020b86a832SChris Mason 		slot = path->slots[0];
70030b86a832SChris Mason 		if (slot >= btrfs_header_nritems(leaf)) {
70040b86a832SChris Mason 			ret = btrfs_next_leaf(root, path);
70050b86a832SChris Mason 			if (ret == 0)
70060b86a832SChris Mason 				continue;
70070b86a832SChris Mason 			if (ret < 0)
70080b86a832SChris Mason 				goto error;
70090b86a832SChris Mason 			break;
70100b86a832SChris Mason 		}
70110b86a832SChris Mason 		btrfs_item_key_to_cpu(leaf, &found_key, slot);
70120b86a832SChris Mason 		if (found_key.type == BTRFS_DEV_ITEM_KEY) {
70130b86a832SChris Mason 			struct btrfs_dev_item *dev_item;
70140b86a832SChris Mason 			dev_item = btrfs_item_ptr(leaf, slot,
70150b86a832SChris Mason 						  struct btrfs_dev_item);
70162ff7e61eSJeff Mahoney 			ret = read_one_dev(fs_info, leaf, dev_item);
70172b82032cSYan Zheng 			if (ret)
70182b82032cSYan Zheng 				goto error;
701999e3ecfcSLiu Bo 			total_dev++;
70200b86a832SChris Mason 		} else if (found_key.type == BTRFS_CHUNK_ITEM_KEY) {
70210b86a832SChris Mason 			struct btrfs_chunk *chunk;
70220b86a832SChris Mason 			chunk = btrfs_item_ptr(leaf, slot, struct btrfs_chunk);
70232ff7e61eSJeff Mahoney 			ret = read_one_chunk(fs_info, &found_key, leaf, chunk);
70242b82032cSYan Zheng 			if (ret)
70252b82032cSYan Zheng 				goto error;
70260b86a832SChris Mason 		}
70270b86a832SChris Mason 		path->slots[0]++;
70280b86a832SChris Mason 	}
702999e3ecfcSLiu Bo 
703099e3ecfcSLiu Bo 	/*
703199e3ecfcSLiu Bo 	 * After loading chunk tree, we've got all device information,
703299e3ecfcSLiu Bo 	 * do another round of validation checks.
703399e3ecfcSLiu Bo 	 */
70340b246afaSJeff Mahoney 	if (total_dev != fs_info->fs_devices->total_devices) {
70350b246afaSJeff Mahoney 		btrfs_err(fs_info,
703699e3ecfcSLiu Bo 	   "super_num_devices %llu mismatch with num_devices %llu found here",
70370b246afaSJeff Mahoney 			  btrfs_super_num_devices(fs_info->super_copy),
703899e3ecfcSLiu Bo 			  total_dev);
703999e3ecfcSLiu Bo 		ret = -EINVAL;
704099e3ecfcSLiu Bo 		goto error;
704199e3ecfcSLiu Bo 	}
70420b246afaSJeff Mahoney 	if (btrfs_super_total_bytes(fs_info->super_copy) <
70430b246afaSJeff Mahoney 	    fs_info->fs_devices->total_rw_bytes) {
70440b246afaSJeff Mahoney 		btrfs_err(fs_info,
704599e3ecfcSLiu Bo 	"super_total_bytes %llu mismatch with fs_devices total_rw_bytes %llu",
70460b246afaSJeff Mahoney 			  btrfs_super_total_bytes(fs_info->super_copy),
70470b246afaSJeff Mahoney 			  fs_info->fs_devices->total_rw_bytes);
704899e3ecfcSLiu Bo 		ret = -EINVAL;
704999e3ecfcSLiu Bo 		goto error;
705099e3ecfcSLiu Bo 	}
70510b86a832SChris Mason 	ret = 0;
70520b86a832SChris Mason error:
705334441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
7054b367e47fSLi Zefan 	mutex_unlock(&uuid_mutex);
7055b367e47fSLi Zefan 
70562b82032cSYan Zheng 	btrfs_free_path(path);
70570b86a832SChris Mason 	return ret;
70580b86a832SChris Mason }
7059442a4f63SStefan Behrens 
7060cb517eabSMiao Xie void btrfs_init_devices_late(struct btrfs_fs_info *fs_info)
7061cb517eabSMiao Xie {
7062cb517eabSMiao Xie 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7063cb517eabSMiao Xie 	struct btrfs_device *device;
7064cb517eabSMiao Xie 
706529cc83f6SLiu Bo 	while (fs_devices) {
7066cb517eabSMiao Xie 		mutex_lock(&fs_devices->device_list_mutex);
7067cb517eabSMiao Xie 		list_for_each_entry(device, &fs_devices->devices, dev_list)
7068fb456252SJeff Mahoney 			device->fs_info = fs_info;
7069cb517eabSMiao Xie 		mutex_unlock(&fs_devices->device_list_mutex);
707029cc83f6SLiu Bo 
707129cc83f6SLiu Bo 		fs_devices = fs_devices->seed;
707229cc83f6SLiu Bo 	}
7073cb517eabSMiao Xie }
7074cb517eabSMiao Xie 
7075733f4fbbSStefan Behrens static void __btrfs_reset_dev_stats(struct btrfs_device *dev)
7076733f4fbbSStefan Behrens {
7077733f4fbbSStefan Behrens 	int i;
7078733f4fbbSStefan Behrens 
7079733f4fbbSStefan Behrens 	for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7080733f4fbbSStefan Behrens 		btrfs_dev_stat_reset(dev, i);
7081733f4fbbSStefan Behrens }
7082733f4fbbSStefan Behrens 
7083733f4fbbSStefan Behrens int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
7084733f4fbbSStefan Behrens {
7085733f4fbbSStefan Behrens 	struct btrfs_key key;
7086733f4fbbSStefan Behrens 	struct btrfs_key found_key;
7087733f4fbbSStefan Behrens 	struct btrfs_root *dev_root = fs_info->dev_root;
7088733f4fbbSStefan Behrens 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7089733f4fbbSStefan Behrens 	struct extent_buffer *eb;
7090733f4fbbSStefan Behrens 	int slot;
7091733f4fbbSStefan Behrens 	int ret = 0;
7092733f4fbbSStefan Behrens 	struct btrfs_device *device;
7093733f4fbbSStefan Behrens 	struct btrfs_path *path = NULL;
7094733f4fbbSStefan Behrens 	int i;
7095733f4fbbSStefan Behrens 
7096733f4fbbSStefan Behrens 	path = btrfs_alloc_path();
7097733f4fbbSStefan Behrens 	if (!path) {
7098733f4fbbSStefan Behrens 		ret = -ENOMEM;
7099733f4fbbSStefan Behrens 		goto out;
7100733f4fbbSStefan Behrens 	}
7101733f4fbbSStefan Behrens 
7102733f4fbbSStefan Behrens 	mutex_lock(&fs_devices->device_list_mutex);
7103733f4fbbSStefan Behrens 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
7104733f4fbbSStefan Behrens 		int item_size;
7105733f4fbbSStefan Behrens 		struct btrfs_dev_stats_item *ptr;
7106733f4fbbSStefan Behrens 
7107242e2956SDavid Sterba 		key.objectid = BTRFS_DEV_STATS_OBJECTID;
7108242e2956SDavid Sterba 		key.type = BTRFS_PERSISTENT_ITEM_KEY;
7109733f4fbbSStefan Behrens 		key.offset = device->devid;
7110733f4fbbSStefan Behrens 		ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
7111733f4fbbSStefan Behrens 		if (ret) {
7112733f4fbbSStefan Behrens 			__btrfs_reset_dev_stats(device);
7113733f4fbbSStefan Behrens 			device->dev_stats_valid = 1;
7114733f4fbbSStefan Behrens 			btrfs_release_path(path);
7115733f4fbbSStefan Behrens 			continue;
7116733f4fbbSStefan Behrens 		}
7117733f4fbbSStefan Behrens 		slot = path->slots[0];
7118733f4fbbSStefan Behrens 		eb = path->nodes[0];
7119733f4fbbSStefan Behrens 		btrfs_item_key_to_cpu(eb, &found_key, slot);
7120733f4fbbSStefan Behrens 		item_size = btrfs_item_size_nr(eb, slot);
7121733f4fbbSStefan Behrens 
7122733f4fbbSStefan Behrens 		ptr = btrfs_item_ptr(eb, slot,
7123733f4fbbSStefan Behrens 				     struct btrfs_dev_stats_item);
7124733f4fbbSStefan Behrens 
7125733f4fbbSStefan Behrens 		for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7126733f4fbbSStefan Behrens 			if (item_size >= (1 + i) * sizeof(__le64))
7127733f4fbbSStefan Behrens 				btrfs_dev_stat_set(device, i,
7128733f4fbbSStefan Behrens 					btrfs_dev_stats_value(eb, ptr, i));
7129733f4fbbSStefan Behrens 			else
7130733f4fbbSStefan Behrens 				btrfs_dev_stat_reset(device, i);
7131733f4fbbSStefan Behrens 		}
7132733f4fbbSStefan Behrens 
7133733f4fbbSStefan Behrens 		device->dev_stats_valid = 1;
7134733f4fbbSStefan Behrens 		btrfs_dev_stat_print_on_load(device);
7135733f4fbbSStefan Behrens 		btrfs_release_path(path);
7136733f4fbbSStefan Behrens 	}
7137733f4fbbSStefan Behrens 	mutex_unlock(&fs_devices->device_list_mutex);
7138733f4fbbSStefan Behrens 
7139733f4fbbSStefan Behrens out:
7140733f4fbbSStefan Behrens 	btrfs_free_path(path);
7141733f4fbbSStefan Behrens 	return ret < 0 ? ret : 0;
7142733f4fbbSStefan Behrens }
7143733f4fbbSStefan Behrens 
7144733f4fbbSStefan Behrens static int update_dev_stat_item(struct btrfs_trans_handle *trans,
71456bccf3abSJeff Mahoney 				struct btrfs_fs_info *fs_info,
7146733f4fbbSStefan Behrens 				struct btrfs_device *device)
7147733f4fbbSStefan Behrens {
71486bccf3abSJeff Mahoney 	struct btrfs_root *dev_root = fs_info->dev_root;
7149733f4fbbSStefan Behrens 	struct btrfs_path *path;
7150733f4fbbSStefan Behrens 	struct btrfs_key key;
7151733f4fbbSStefan Behrens 	struct extent_buffer *eb;
7152733f4fbbSStefan Behrens 	struct btrfs_dev_stats_item *ptr;
7153733f4fbbSStefan Behrens 	int ret;
7154733f4fbbSStefan Behrens 	int i;
7155733f4fbbSStefan Behrens 
7156242e2956SDavid Sterba 	key.objectid = BTRFS_DEV_STATS_OBJECTID;
7157242e2956SDavid Sterba 	key.type = BTRFS_PERSISTENT_ITEM_KEY;
7158733f4fbbSStefan Behrens 	key.offset = device->devid;
7159733f4fbbSStefan Behrens 
7160733f4fbbSStefan Behrens 	path = btrfs_alloc_path();
7161fa252992SDavid Sterba 	if (!path)
7162fa252992SDavid Sterba 		return -ENOMEM;
7163733f4fbbSStefan Behrens 	ret = btrfs_search_slot(trans, dev_root, &key, path, -1, 1);
7164733f4fbbSStefan Behrens 	if (ret < 0) {
71650b246afaSJeff Mahoney 		btrfs_warn_in_rcu(fs_info,
7166ecaeb14bSDavid Sterba 			"error %d while searching for dev_stats item for device %s",
7167606686eeSJosef Bacik 			      ret, rcu_str_deref(device->name));
7168733f4fbbSStefan Behrens 		goto out;
7169733f4fbbSStefan Behrens 	}
7170733f4fbbSStefan Behrens 
7171733f4fbbSStefan Behrens 	if (ret == 0 &&
7172733f4fbbSStefan Behrens 	    btrfs_item_size_nr(path->nodes[0], path->slots[0]) < sizeof(*ptr)) {
7173733f4fbbSStefan Behrens 		/* need to delete old one and insert a new one */
7174733f4fbbSStefan Behrens 		ret = btrfs_del_item(trans, dev_root, path);
7175733f4fbbSStefan Behrens 		if (ret != 0) {
71760b246afaSJeff Mahoney 			btrfs_warn_in_rcu(fs_info,
7177ecaeb14bSDavid Sterba 				"delete too small dev_stats item for device %s failed %d",
7178606686eeSJosef Bacik 				      rcu_str_deref(device->name), ret);
7179733f4fbbSStefan Behrens 			goto out;
7180733f4fbbSStefan Behrens 		}
7181733f4fbbSStefan Behrens 		ret = 1;
7182733f4fbbSStefan Behrens 	}
7183733f4fbbSStefan Behrens 
7184733f4fbbSStefan Behrens 	if (ret == 1) {
7185733f4fbbSStefan Behrens 		/* need to insert a new item */
7186733f4fbbSStefan Behrens 		btrfs_release_path(path);
7187733f4fbbSStefan Behrens 		ret = btrfs_insert_empty_item(trans, dev_root, path,
7188733f4fbbSStefan Behrens 					      &key, sizeof(*ptr));
7189733f4fbbSStefan Behrens 		if (ret < 0) {
71900b246afaSJeff Mahoney 			btrfs_warn_in_rcu(fs_info,
7191ecaeb14bSDavid Sterba 				"insert dev_stats item for device %s failed %d",
7192606686eeSJosef Bacik 				rcu_str_deref(device->name), ret);
7193733f4fbbSStefan Behrens 			goto out;
7194733f4fbbSStefan Behrens 		}
7195733f4fbbSStefan Behrens 	}
7196733f4fbbSStefan Behrens 
7197733f4fbbSStefan Behrens 	eb = path->nodes[0];
7198733f4fbbSStefan Behrens 	ptr = btrfs_item_ptr(eb, path->slots[0], struct btrfs_dev_stats_item);
7199733f4fbbSStefan Behrens 	for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7200733f4fbbSStefan Behrens 		btrfs_set_dev_stats_value(eb, ptr, i,
7201733f4fbbSStefan Behrens 					  btrfs_dev_stat_read(device, i));
7202733f4fbbSStefan Behrens 	btrfs_mark_buffer_dirty(eb);
7203733f4fbbSStefan Behrens 
7204733f4fbbSStefan Behrens out:
7205733f4fbbSStefan Behrens 	btrfs_free_path(path);
7206733f4fbbSStefan Behrens 	return ret;
7207733f4fbbSStefan Behrens }
7208733f4fbbSStefan Behrens 
7209733f4fbbSStefan Behrens /*
7210733f4fbbSStefan Behrens  * called from commit_transaction. Writes all changed device stats to disk.
7211733f4fbbSStefan Behrens  */
7212733f4fbbSStefan Behrens int btrfs_run_dev_stats(struct btrfs_trans_handle *trans,
7213733f4fbbSStefan Behrens 			struct btrfs_fs_info *fs_info)
7214733f4fbbSStefan Behrens {
7215733f4fbbSStefan Behrens 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7216733f4fbbSStefan Behrens 	struct btrfs_device *device;
7217addc3fa7SMiao Xie 	int stats_cnt;
7218733f4fbbSStefan Behrens 	int ret = 0;
7219733f4fbbSStefan Behrens 
7220733f4fbbSStefan Behrens 	mutex_lock(&fs_devices->device_list_mutex);
7221733f4fbbSStefan Behrens 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
72229deae968SNikolay Borisov 		stats_cnt = atomic_read(&device->dev_stats_ccnt);
72239deae968SNikolay Borisov 		if (!device->dev_stats_valid || stats_cnt == 0)
7224733f4fbbSStefan Behrens 			continue;
7225733f4fbbSStefan Behrens 
72269deae968SNikolay Borisov 
72279deae968SNikolay Borisov 		/*
72289deae968SNikolay Borisov 		 * There is a LOAD-LOAD control dependency between the value of
72299deae968SNikolay Borisov 		 * dev_stats_ccnt and updating the on-disk values which requires
72309deae968SNikolay Borisov 		 * reading the in-memory counters. Such control dependencies
72319deae968SNikolay Borisov 		 * require explicit read memory barriers.
72329deae968SNikolay Borisov 		 *
72339deae968SNikolay Borisov 		 * This memory barriers pairs with smp_mb__before_atomic in
72349deae968SNikolay Borisov 		 * btrfs_dev_stat_inc/btrfs_dev_stat_set and with the full
72359deae968SNikolay Borisov 		 * barrier implied by atomic_xchg in
72369deae968SNikolay Borisov 		 * btrfs_dev_stats_read_and_reset
72379deae968SNikolay Borisov 		 */
72389deae968SNikolay Borisov 		smp_rmb();
72399deae968SNikolay Borisov 
72406bccf3abSJeff Mahoney 		ret = update_dev_stat_item(trans, fs_info, device);
7241733f4fbbSStefan Behrens 		if (!ret)
7242addc3fa7SMiao Xie 			atomic_sub(stats_cnt, &device->dev_stats_ccnt);
7243733f4fbbSStefan Behrens 	}
7244733f4fbbSStefan Behrens 	mutex_unlock(&fs_devices->device_list_mutex);
7245733f4fbbSStefan Behrens 
7246733f4fbbSStefan Behrens 	return ret;
7247733f4fbbSStefan Behrens }
7248733f4fbbSStefan Behrens 
7249442a4f63SStefan Behrens void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index)
7250442a4f63SStefan Behrens {
7251442a4f63SStefan Behrens 	btrfs_dev_stat_inc(dev, index);
7252442a4f63SStefan Behrens 	btrfs_dev_stat_print_on_error(dev);
7253442a4f63SStefan Behrens }
7254442a4f63SStefan Behrens 
725548a3b636SEric Sandeen static void btrfs_dev_stat_print_on_error(struct btrfs_device *dev)
7256442a4f63SStefan Behrens {
7257733f4fbbSStefan Behrens 	if (!dev->dev_stats_valid)
7258733f4fbbSStefan Behrens 		return;
7259fb456252SJeff Mahoney 	btrfs_err_rl_in_rcu(dev->fs_info,
7260b14af3b4SDavid Sterba 		"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7261606686eeSJosef Bacik 			   rcu_str_deref(dev->name),
7262442a4f63SStefan Behrens 			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7263442a4f63SStefan Behrens 			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7264442a4f63SStefan Behrens 			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7265efe120a0SFrank Holton 			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7266efe120a0SFrank Holton 			   btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7267442a4f63SStefan Behrens }
7268c11d2c23SStefan Behrens 
7269733f4fbbSStefan Behrens static void btrfs_dev_stat_print_on_load(struct btrfs_device *dev)
7270733f4fbbSStefan Behrens {
7271a98cdb85SStefan Behrens 	int i;
7272a98cdb85SStefan Behrens 
7273a98cdb85SStefan Behrens 	for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7274a98cdb85SStefan Behrens 		if (btrfs_dev_stat_read(dev, i) != 0)
7275a98cdb85SStefan Behrens 			break;
7276a98cdb85SStefan Behrens 	if (i == BTRFS_DEV_STAT_VALUES_MAX)
7277a98cdb85SStefan Behrens 		return; /* all values == 0, suppress message */
7278a98cdb85SStefan Behrens 
7279fb456252SJeff Mahoney 	btrfs_info_in_rcu(dev->fs_info,
7280ecaeb14bSDavid Sterba 		"bdev %s errs: wr %u, rd %u, flush %u, corrupt %u, gen %u",
7281606686eeSJosef Bacik 	       rcu_str_deref(dev->name),
7282733f4fbbSStefan Behrens 	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_WRITE_ERRS),
7283733f4fbbSStefan Behrens 	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_READ_ERRS),
7284733f4fbbSStefan Behrens 	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_FLUSH_ERRS),
7285733f4fbbSStefan Behrens 	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_CORRUPTION_ERRS),
7286733f4fbbSStefan Behrens 	       btrfs_dev_stat_read(dev, BTRFS_DEV_STAT_GENERATION_ERRS));
7287733f4fbbSStefan Behrens }
7288733f4fbbSStefan Behrens 
72892ff7e61eSJeff Mahoney int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
7290b27f7c0cSDavid Sterba 			struct btrfs_ioctl_get_dev_stats *stats)
7291c11d2c23SStefan Behrens {
7292c11d2c23SStefan Behrens 	struct btrfs_device *dev;
72930b246afaSJeff Mahoney 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7294c11d2c23SStefan Behrens 	int i;
7295c11d2c23SStefan Behrens 
7296c11d2c23SStefan Behrens 	mutex_lock(&fs_devices->device_list_mutex);
72970b246afaSJeff Mahoney 	dev = btrfs_find_device(fs_info, stats->devid, NULL, NULL);
7298c11d2c23SStefan Behrens 	mutex_unlock(&fs_devices->device_list_mutex);
7299c11d2c23SStefan Behrens 
7300c11d2c23SStefan Behrens 	if (!dev) {
73010b246afaSJeff Mahoney 		btrfs_warn(fs_info, "get dev_stats failed, device not found");
7302c11d2c23SStefan Behrens 		return -ENODEV;
7303733f4fbbSStefan Behrens 	} else if (!dev->dev_stats_valid) {
73040b246afaSJeff Mahoney 		btrfs_warn(fs_info, "get dev_stats failed, not yet valid");
7305733f4fbbSStefan Behrens 		return -ENODEV;
7306b27f7c0cSDavid Sterba 	} else if (stats->flags & BTRFS_DEV_STATS_RESET) {
7307c11d2c23SStefan Behrens 		for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++) {
7308c11d2c23SStefan Behrens 			if (stats->nr_items > i)
7309c11d2c23SStefan Behrens 				stats->values[i] =
7310c11d2c23SStefan Behrens 					btrfs_dev_stat_read_and_reset(dev, i);
7311c11d2c23SStefan Behrens 			else
7312c11d2c23SStefan Behrens 				btrfs_dev_stat_reset(dev, i);
7313c11d2c23SStefan Behrens 		}
7314c11d2c23SStefan Behrens 	} else {
7315c11d2c23SStefan Behrens 		for (i = 0; i < BTRFS_DEV_STAT_VALUES_MAX; i++)
7316c11d2c23SStefan Behrens 			if (stats->nr_items > i)
7317c11d2c23SStefan Behrens 				stats->values[i] = btrfs_dev_stat_read(dev, i);
7318c11d2c23SStefan Behrens 	}
7319c11d2c23SStefan Behrens 	if (stats->nr_items > BTRFS_DEV_STAT_VALUES_MAX)
7320c11d2c23SStefan Behrens 		stats->nr_items = BTRFS_DEV_STAT_VALUES_MAX;
7321c11d2c23SStefan Behrens 	return 0;
7322c11d2c23SStefan Behrens }
7323a8a6dab7SStefan Behrens 
7324da353f6bSDavid Sterba void btrfs_scratch_superblocks(struct block_device *bdev, const char *device_path)
7325a8a6dab7SStefan Behrens {
7326a8a6dab7SStefan Behrens 	struct buffer_head *bh;
7327a8a6dab7SStefan Behrens 	struct btrfs_super_block *disk_super;
732812b1c263SAnand Jain 	int copy_num;
7329a8a6dab7SStefan Behrens 
733012b1c263SAnand Jain 	if (!bdev)
733112b1c263SAnand Jain 		return;
733212b1c263SAnand Jain 
733312b1c263SAnand Jain 	for (copy_num = 0; copy_num < BTRFS_SUPER_MIRROR_MAX;
733412b1c263SAnand Jain 		copy_num++) {
733512b1c263SAnand Jain 
733612b1c263SAnand Jain 		if (btrfs_read_dev_one_super(bdev, copy_num, &bh))
733712b1c263SAnand Jain 			continue;
733812b1c263SAnand Jain 
7339a8a6dab7SStefan Behrens 		disk_super = (struct btrfs_super_block *)bh->b_data;
7340a8a6dab7SStefan Behrens 
7341a8a6dab7SStefan Behrens 		memset(&disk_super->magic, 0, sizeof(disk_super->magic));
7342a8a6dab7SStefan Behrens 		set_buffer_dirty(bh);
7343a8a6dab7SStefan Behrens 		sync_dirty_buffer(bh);
7344a8a6dab7SStefan Behrens 		brelse(bh);
734512b1c263SAnand Jain 	}
7346a8a6dab7SStefan Behrens 
734712b1c263SAnand Jain 	/* Notify udev that device has changed */
734812b1c263SAnand Jain 	btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
734912b1c263SAnand Jain 
735012b1c263SAnand Jain 	/* Update ctime/mtime for device path for libblkid */
735112b1c263SAnand Jain 	update_dev_time(device_path);
7352a8a6dab7SStefan Behrens }
7353935e5cc9SMiao Xie 
7354935e5cc9SMiao Xie /*
7355935e5cc9SMiao Xie  * Update the size of all devices, which is used for writing out the
7356935e5cc9SMiao Xie  * super blocks.
7357935e5cc9SMiao Xie  */
7358935e5cc9SMiao Xie void btrfs_update_commit_device_size(struct btrfs_fs_info *fs_info)
7359935e5cc9SMiao Xie {
7360935e5cc9SMiao Xie 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
7361935e5cc9SMiao Xie 	struct btrfs_device *curr, *next;
7362935e5cc9SMiao Xie 
7363935e5cc9SMiao Xie 	if (list_empty(&fs_devices->resized_devices))
7364935e5cc9SMiao Xie 		return;
7365935e5cc9SMiao Xie 
7366935e5cc9SMiao Xie 	mutex_lock(&fs_devices->device_list_mutex);
736734441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
7368935e5cc9SMiao Xie 	list_for_each_entry_safe(curr, next, &fs_devices->resized_devices,
7369935e5cc9SMiao Xie 				 resized_list) {
7370935e5cc9SMiao Xie 		list_del_init(&curr->resized_list);
7371935e5cc9SMiao Xie 		curr->commit_total_bytes = curr->disk_total_bytes;
7372935e5cc9SMiao Xie 	}
737334441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
7374935e5cc9SMiao Xie 	mutex_unlock(&fs_devices->device_list_mutex);
7375935e5cc9SMiao Xie }
7376ce7213c7SMiao Xie 
7377ce7213c7SMiao Xie /* Must be invoked during the transaction commit */
73782ff7e61eSJeff Mahoney void btrfs_update_commit_device_bytes_used(struct btrfs_fs_info *fs_info,
7379ce7213c7SMiao Xie 					struct btrfs_transaction *transaction)
7380ce7213c7SMiao Xie {
7381ce7213c7SMiao Xie 	struct extent_map *em;
7382ce7213c7SMiao Xie 	struct map_lookup *map;
7383ce7213c7SMiao Xie 	struct btrfs_device *dev;
7384ce7213c7SMiao Xie 	int i;
7385ce7213c7SMiao Xie 
7386ce7213c7SMiao Xie 	if (list_empty(&transaction->pending_chunks))
7387ce7213c7SMiao Xie 		return;
7388ce7213c7SMiao Xie 
7389ce7213c7SMiao Xie 	/* In order to kick the device replace finish process */
739034441361SDavid Sterba 	mutex_lock(&fs_info->chunk_mutex);
7391ce7213c7SMiao Xie 	list_for_each_entry(em, &transaction->pending_chunks, list) {
739295617d69SJeff Mahoney 		map = em->map_lookup;
7393ce7213c7SMiao Xie 
7394ce7213c7SMiao Xie 		for (i = 0; i < map->num_stripes; i++) {
7395ce7213c7SMiao Xie 			dev = map->stripes[i].dev;
7396ce7213c7SMiao Xie 			dev->commit_bytes_used = dev->bytes_used;
7397ce7213c7SMiao Xie 		}
7398ce7213c7SMiao Xie 	}
739934441361SDavid Sterba 	mutex_unlock(&fs_info->chunk_mutex);
7400ce7213c7SMiao Xie }
74015a13f430SAnand Jain 
74025a13f430SAnand Jain void btrfs_set_fs_info_ptr(struct btrfs_fs_info *fs_info)
74035a13f430SAnand Jain {
74045a13f430SAnand Jain 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
74055a13f430SAnand Jain 	while (fs_devices) {
74065a13f430SAnand Jain 		fs_devices->fs_info = fs_info;
74075a13f430SAnand Jain 		fs_devices = fs_devices->seed;
74085a13f430SAnand Jain 	}
74095a13f430SAnand Jain }
74105a13f430SAnand Jain 
74115a13f430SAnand Jain void btrfs_reset_fs_info_ptr(struct btrfs_fs_info *fs_info)
74125a13f430SAnand Jain {
74135a13f430SAnand Jain 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
74145a13f430SAnand Jain 	while (fs_devices) {
74155a13f430SAnand Jain 		fs_devices->fs_info = NULL;
74165a13f430SAnand Jain 		fs_devices = fs_devices->seed;
74175a13f430SAnand Jain 	}
74185a13f430SAnand Jain }
7419