xref: /openbmc/linux/fs/btrfs/zoned.c (revision d734492a14a2da6e7bcce8cf66436a9cf4e51ddf)
15b316468SNaohiro Aota // SPDX-License-Identifier: GPL-2.0
25b316468SNaohiro Aota 
31cd6121fSNaohiro Aota #include <linux/bitops.h>
45b316468SNaohiro Aota #include <linux/slab.h>
55b316468SNaohiro Aota #include <linux/blkdev.h>
608e11a3dSNaohiro Aota #include <linux/sched/mm.h>
75b316468SNaohiro Aota #include "ctree.h"
85b316468SNaohiro Aota #include "volumes.h"
95b316468SNaohiro Aota #include "zoned.h"
105b316468SNaohiro Aota #include "rcu-string.h"
111cd6121fSNaohiro Aota #include "disk-io.h"
1208e11a3dSNaohiro Aota #include "block-group.h"
13d3575156SNaohiro Aota #include "transaction.h"
146143c23cSNaohiro Aota #include "dev-replace.h"
157db1c5d1SNaohiro Aota #include "space-info.h"
165b316468SNaohiro Aota 
175b316468SNaohiro Aota /* Maximum number of zones to report per blkdev_report_zones() call */
185b316468SNaohiro Aota #define BTRFS_REPORT_NR_ZONES   4096
1908e11a3dSNaohiro Aota /* Invalid allocation pointer value for missing devices */
2008e11a3dSNaohiro Aota #define WP_MISSING_DEV ((u64)-1)
2108e11a3dSNaohiro Aota /* Pseudo write pointer value for conventional zone */
2208e11a3dSNaohiro Aota #define WP_CONVENTIONAL ((u64)-2)
235b316468SNaohiro Aota 
2412659251SNaohiro Aota /* Number of superblock log zones */
2512659251SNaohiro Aota #define BTRFS_NR_SB_LOG_ZONES 2
2612659251SNaohiro Aota 
275b316468SNaohiro Aota static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
285b316468SNaohiro Aota {
295b316468SNaohiro Aota 	struct blk_zone *zones = data;
305b316468SNaohiro Aota 
315b316468SNaohiro Aota 	memcpy(&zones[idx], zone, sizeof(*zone));
325b316468SNaohiro Aota 
335b316468SNaohiro Aota 	return 0;
345b316468SNaohiro Aota }
355b316468SNaohiro Aota 
3612659251SNaohiro Aota static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones,
3712659251SNaohiro Aota 			    u64 *wp_ret)
3812659251SNaohiro Aota {
3912659251SNaohiro Aota 	bool empty[BTRFS_NR_SB_LOG_ZONES];
4012659251SNaohiro Aota 	bool full[BTRFS_NR_SB_LOG_ZONES];
4112659251SNaohiro Aota 	sector_t sector;
4212659251SNaohiro Aota 
4312659251SNaohiro Aota 	ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL &&
4412659251SNaohiro Aota 	       zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL);
4512659251SNaohiro Aota 
4612659251SNaohiro Aota 	empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY);
4712659251SNaohiro Aota 	empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY);
4812659251SNaohiro Aota 	full[0] = (zones[0].cond == BLK_ZONE_COND_FULL);
4912659251SNaohiro Aota 	full[1] = (zones[1].cond == BLK_ZONE_COND_FULL);
5012659251SNaohiro Aota 
5112659251SNaohiro Aota 	/*
5212659251SNaohiro Aota 	 * Possible states of log buffer zones
5312659251SNaohiro Aota 	 *
5412659251SNaohiro Aota 	 *           Empty[0]  In use[0]  Full[0]
5512659251SNaohiro Aota 	 * Empty[1]         *          x        0
5612659251SNaohiro Aota 	 * In use[1]        0          x        0
5712659251SNaohiro Aota 	 * Full[1]          1          1        C
5812659251SNaohiro Aota 	 *
5912659251SNaohiro Aota 	 * Log position:
6012659251SNaohiro Aota 	 *   *: Special case, no superblock is written
6112659251SNaohiro Aota 	 *   0: Use write pointer of zones[0]
6212659251SNaohiro Aota 	 *   1: Use write pointer of zones[1]
6312659251SNaohiro Aota 	 *   C: Compare super blcoks from zones[0] and zones[1], use the latest
6412659251SNaohiro Aota 	 *      one determined by generation
6512659251SNaohiro Aota 	 *   x: Invalid state
6612659251SNaohiro Aota 	 */
6712659251SNaohiro Aota 
6812659251SNaohiro Aota 	if (empty[0] && empty[1]) {
6912659251SNaohiro Aota 		/* Special case to distinguish no superblock to read */
7012659251SNaohiro Aota 		*wp_ret = zones[0].start << SECTOR_SHIFT;
7112659251SNaohiro Aota 		return -ENOENT;
7212659251SNaohiro Aota 	} else if (full[0] && full[1]) {
7312659251SNaohiro Aota 		/* Compare two super blocks */
7412659251SNaohiro Aota 		struct address_space *mapping = bdev->bd_inode->i_mapping;
7512659251SNaohiro Aota 		struct page *page[BTRFS_NR_SB_LOG_ZONES];
7612659251SNaohiro Aota 		struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES];
7712659251SNaohiro Aota 		int i;
7812659251SNaohiro Aota 
7912659251SNaohiro Aota 		for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) {
8012659251SNaohiro Aota 			u64 bytenr;
8112659251SNaohiro Aota 
8212659251SNaohiro Aota 			bytenr = ((zones[i].start + zones[i].len)
8312659251SNaohiro Aota 				   << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE;
8412659251SNaohiro Aota 
8512659251SNaohiro Aota 			page[i] = read_cache_page_gfp(mapping,
8612659251SNaohiro Aota 					bytenr >> PAGE_SHIFT, GFP_NOFS);
8712659251SNaohiro Aota 			if (IS_ERR(page[i])) {
8812659251SNaohiro Aota 				if (i == 1)
8912659251SNaohiro Aota 					btrfs_release_disk_super(super[0]);
9012659251SNaohiro Aota 				return PTR_ERR(page[i]);
9112659251SNaohiro Aota 			}
9212659251SNaohiro Aota 			super[i] = page_address(page[i]);
9312659251SNaohiro Aota 		}
9412659251SNaohiro Aota 
9512659251SNaohiro Aota 		if (super[0]->generation > super[1]->generation)
9612659251SNaohiro Aota 			sector = zones[1].start;
9712659251SNaohiro Aota 		else
9812659251SNaohiro Aota 			sector = zones[0].start;
9912659251SNaohiro Aota 
10012659251SNaohiro Aota 		for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++)
10112659251SNaohiro Aota 			btrfs_release_disk_super(super[i]);
10212659251SNaohiro Aota 	} else if (!full[0] && (empty[1] || full[1])) {
10312659251SNaohiro Aota 		sector = zones[0].wp;
10412659251SNaohiro Aota 	} else if (full[0]) {
10512659251SNaohiro Aota 		sector = zones[1].wp;
10612659251SNaohiro Aota 	} else {
10712659251SNaohiro Aota 		return -EUCLEAN;
10812659251SNaohiro Aota 	}
10912659251SNaohiro Aota 	*wp_ret = sector << SECTOR_SHIFT;
11012659251SNaohiro Aota 	return 0;
11112659251SNaohiro Aota }
11212659251SNaohiro Aota 
11312659251SNaohiro Aota /*
11412659251SNaohiro Aota  * The following zones are reserved as the circular buffer on ZONED btrfs.
11512659251SNaohiro Aota  *  - The primary superblock: zones 0 and 1
11612659251SNaohiro Aota  *  - The first copy: zones 16 and 17
11712659251SNaohiro Aota  *  - The second copy: zones 1024 or zone at 256GB which is minimum, and
11812659251SNaohiro Aota  *                     the following one
11912659251SNaohiro Aota  */
12012659251SNaohiro Aota static inline u32 sb_zone_number(int shift, int mirror)
12112659251SNaohiro Aota {
12212659251SNaohiro Aota 	ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX);
12312659251SNaohiro Aota 
12412659251SNaohiro Aota 	switch (mirror) {
12512659251SNaohiro Aota 	case 0: return 0;
12612659251SNaohiro Aota 	case 1: return 16;
12712659251SNaohiro Aota 	case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024);
12812659251SNaohiro Aota 	}
12912659251SNaohiro Aota 
13012659251SNaohiro Aota 	return 0;
13112659251SNaohiro Aota }
13212659251SNaohiro Aota 
1333c9daa09SJohannes Thumshirn /*
1343c9daa09SJohannes Thumshirn  * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block
1353c9daa09SJohannes Thumshirn  * device into static sized chunks and fake a conventional zone on each of
1363c9daa09SJohannes Thumshirn  * them.
1373c9daa09SJohannes Thumshirn  */
1383c9daa09SJohannes Thumshirn static int emulate_report_zones(struct btrfs_device *device, u64 pos,
1393c9daa09SJohannes Thumshirn 				struct blk_zone *zones, unsigned int nr_zones)
1403c9daa09SJohannes Thumshirn {
1413c9daa09SJohannes Thumshirn 	const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT;
1423c9daa09SJohannes Thumshirn 	sector_t bdev_size = bdev_nr_sectors(device->bdev);
1433c9daa09SJohannes Thumshirn 	unsigned int i;
1443c9daa09SJohannes Thumshirn 
1453c9daa09SJohannes Thumshirn 	pos >>= SECTOR_SHIFT;
1463c9daa09SJohannes Thumshirn 	for (i = 0; i < nr_zones; i++) {
1473c9daa09SJohannes Thumshirn 		zones[i].start = i * zone_sectors + pos;
1483c9daa09SJohannes Thumshirn 		zones[i].len = zone_sectors;
1493c9daa09SJohannes Thumshirn 		zones[i].capacity = zone_sectors;
1503c9daa09SJohannes Thumshirn 		zones[i].wp = zones[i].start + zone_sectors;
1513c9daa09SJohannes Thumshirn 		zones[i].type = BLK_ZONE_TYPE_CONVENTIONAL;
1523c9daa09SJohannes Thumshirn 		zones[i].cond = BLK_ZONE_COND_NOT_WP;
1533c9daa09SJohannes Thumshirn 
1543c9daa09SJohannes Thumshirn 		if (zones[i].wp >= bdev_size) {
1553c9daa09SJohannes Thumshirn 			i++;
1563c9daa09SJohannes Thumshirn 			break;
1573c9daa09SJohannes Thumshirn 		}
1583c9daa09SJohannes Thumshirn 	}
1593c9daa09SJohannes Thumshirn 
1603c9daa09SJohannes Thumshirn 	return i;
1613c9daa09SJohannes Thumshirn }
1623c9daa09SJohannes Thumshirn 
1635b316468SNaohiro Aota static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
1645b316468SNaohiro Aota 			       struct blk_zone *zones, unsigned int *nr_zones)
1655b316468SNaohiro Aota {
1665b316468SNaohiro Aota 	int ret;
1675b316468SNaohiro Aota 
1685b316468SNaohiro Aota 	if (!*nr_zones)
1695b316468SNaohiro Aota 		return 0;
1705b316468SNaohiro Aota 
1713c9daa09SJohannes Thumshirn 	if (!bdev_is_zoned(device->bdev)) {
1723c9daa09SJohannes Thumshirn 		ret = emulate_report_zones(device, pos, zones, *nr_zones);
1733c9daa09SJohannes Thumshirn 		*nr_zones = ret;
1743c9daa09SJohannes Thumshirn 		return 0;
1753c9daa09SJohannes Thumshirn 	}
1763c9daa09SJohannes Thumshirn 
1775b316468SNaohiro Aota 	ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
1785b316468SNaohiro Aota 				  copy_zone_info_cb, zones);
1795b316468SNaohiro Aota 	if (ret < 0) {
1805b316468SNaohiro Aota 		btrfs_err_in_rcu(device->fs_info,
1815b316468SNaohiro Aota 				 "zoned: failed to read zone %llu on %s (devid %llu)",
1825b316468SNaohiro Aota 				 pos, rcu_str_deref(device->name),
1835b316468SNaohiro Aota 				 device->devid);
1845b316468SNaohiro Aota 		return ret;
1855b316468SNaohiro Aota 	}
1865b316468SNaohiro Aota 	*nr_zones = ret;
1875b316468SNaohiro Aota 	if (!ret)
1885b316468SNaohiro Aota 		return -EIO;
1895b316468SNaohiro Aota 
1905b316468SNaohiro Aota 	return 0;
1915b316468SNaohiro Aota }
1925b316468SNaohiro Aota 
1933c9daa09SJohannes Thumshirn /* The emulated zone size is determined from the size of device extent */
1943c9daa09SJohannes Thumshirn static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info)
1953c9daa09SJohannes Thumshirn {
1963c9daa09SJohannes Thumshirn 	struct btrfs_path *path;
1973c9daa09SJohannes Thumshirn 	struct btrfs_root *root = fs_info->dev_root;
1983c9daa09SJohannes Thumshirn 	struct btrfs_key key;
1993c9daa09SJohannes Thumshirn 	struct extent_buffer *leaf;
2003c9daa09SJohannes Thumshirn 	struct btrfs_dev_extent *dext;
2013c9daa09SJohannes Thumshirn 	int ret = 0;
2023c9daa09SJohannes Thumshirn 
2033c9daa09SJohannes Thumshirn 	key.objectid = 1;
2043c9daa09SJohannes Thumshirn 	key.type = BTRFS_DEV_EXTENT_KEY;
2053c9daa09SJohannes Thumshirn 	key.offset = 0;
2063c9daa09SJohannes Thumshirn 
2073c9daa09SJohannes Thumshirn 	path = btrfs_alloc_path();
2083c9daa09SJohannes Thumshirn 	if (!path)
2093c9daa09SJohannes Thumshirn 		return -ENOMEM;
2103c9daa09SJohannes Thumshirn 
2113c9daa09SJohannes Thumshirn 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
2123c9daa09SJohannes Thumshirn 	if (ret < 0)
2133c9daa09SJohannes Thumshirn 		goto out;
2143c9daa09SJohannes Thumshirn 
2153c9daa09SJohannes Thumshirn 	if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) {
2163c9daa09SJohannes Thumshirn 		ret = btrfs_next_item(root, path);
2173c9daa09SJohannes Thumshirn 		if (ret < 0)
2183c9daa09SJohannes Thumshirn 			goto out;
2193c9daa09SJohannes Thumshirn 		/* No dev extents at all? Not good */
2203c9daa09SJohannes Thumshirn 		if (ret > 0) {
2213c9daa09SJohannes Thumshirn 			ret = -EUCLEAN;
2223c9daa09SJohannes Thumshirn 			goto out;
2233c9daa09SJohannes Thumshirn 		}
2243c9daa09SJohannes Thumshirn 	}
2253c9daa09SJohannes Thumshirn 
2263c9daa09SJohannes Thumshirn 	leaf = path->nodes[0];
2273c9daa09SJohannes Thumshirn 	dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent);
2283c9daa09SJohannes Thumshirn 	fs_info->zone_size = btrfs_dev_extent_length(leaf, dext);
2293c9daa09SJohannes Thumshirn 	ret = 0;
2303c9daa09SJohannes Thumshirn 
2313c9daa09SJohannes Thumshirn out:
2323c9daa09SJohannes Thumshirn 	btrfs_free_path(path);
2333c9daa09SJohannes Thumshirn 
2343c9daa09SJohannes Thumshirn 	return ret;
2353c9daa09SJohannes Thumshirn }
2363c9daa09SJohannes Thumshirn 
23773651042SNaohiro Aota int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info)
23873651042SNaohiro Aota {
23973651042SNaohiro Aota 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
24073651042SNaohiro Aota 	struct btrfs_device *device;
24173651042SNaohiro Aota 	int ret = 0;
24273651042SNaohiro Aota 
24373651042SNaohiro Aota 	/* fs_info->zone_size might not set yet. Use the incomapt flag here. */
24473651042SNaohiro Aota 	if (!btrfs_fs_incompat(fs_info, ZONED))
24573651042SNaohiro Aota 		return 0;
24673651042SNaohiro Aota 
24773651042SNaohiro Aota 	mutex_lock(&fs_devices->device_list_mutex);
24873651042SNaohiro Aota 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
24973651042SNaohiro Aota 		/* We can skip reading of zone info for missing devices */
25073651042SNaohiro Aota 		if (!device->bdev)
25173651042SNaohiro Aota 			continue;
25273651042SNaohiro Aota 
25373651042SNaohiro Aota 		ret = btrfs_get_dev_zone_info(device);
25473651042SNaohiro Aota 		if (ret)
25573651042SNaohiro Aota 			break;
25673651042SNaohiro Aota 	}
25773651042SNaohiro Aota 	mutex_unlock(&fs_devices->device_list_mutex);
25873651042SNaohiro Aota 
25973651042SNaohiro Aota 	return ret;
26073651042SNaohiro Aota }
26173651042SNaohiro Aota 
2625b316468SNaohiro Aota int btrfs_get_dev_zone_info(struct btrfs_device *device)
2635b316468SNaohiro Aota {
2643c9daa09SJohannes Thumshirn 	struct btrfs_fs_info *fs_info = device->fs_info;
2655b316468SNaohiro Aota 	struct btrfs_zoned_device_info *zone_info = NULL;
2665b316468SNaohiro Aota 	struct block_device *bdev = device->bdev;
267862931c7SNaohiro Aota 	struct request_queue *queue = bdev_get_queue(bdev);
2685b316468SNaohiro Aota 	sector_t nr_sectors;
2695b316468SNaohiro Aota 	sector_t sector = 0;
2705b316468SNaohiro Aota 	struct blk_zone *zones = NULL;
2715b316468SNaohiro Aota 	unsigned int i, nreported = 0, nr_zones;
272*d734492aSNaohiro Aota 	sector_t zone_sectors;
2733c9daa09SJohannes Thumshirn 	char *model, *emulated;
2745b316468SNaohiro Aota 	int ret;
2755b316468SNaohiro Aota 
2763c9daa09SJohannes Thumshirn 	/*
2773c9daa09SJohannes Thumshirn 	 * Cannot use btrfs_is_zoned here, since fs_info::zone_size might not
2783c9daa09SJohannes Thumshirn 	 * yet be set.
2793c9daa09SJohannes Thumshirn 	 */
2803c9daa09SJohannes Thumshirn 	if (!btrfs_fs_incompat(fs_info, ZONED))
2815b316468SNaohiro Aota 		return 0;
2825b316468SNaohiro Aota 
2835b316468SNaohiro Aota 	if (device->zone_info)
2845b316468SNaohiro Aota 		return 0;
2855b316468SNaohiro Aota 
2865b316468SNaohiro Aota 	zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
2875b316468SNaohiro Aota 	if (!zone_info)
2885b316468SNaohiro Aota 		return -ENOMEM;
2895b316468SNaohiro Aota 
2903c9daa09SJohannes Thumshirn 	if (!bdev_is_zoned(bdev)) {
2913c9daa09SJohannes Thumshirn 		if (!fs_info->zone_size) {
2923c9daa09SJohannes Thumshirn 			ret = calculate_emulated_zone_size(fs_info);
2933c9daa09SJohannes Thumshirn 			if (ret)
2943c9daa09SJohannes Thumshirn 				goto out;
2953c9daa09SJohannes Thumshirn 		}
2963c9daa09SJohannes Thumshirn 
2973c9daa09SJohannes Thumshirn 		ASSERT(fs_info->zone_size);
2983c9daa09SJohannes Thumshirn 		zone_sectors = fs_info->zone_size >> SECTOR_SHIFT;
2993c9daa09SJohannes Thumshirn 	} else {
3005b316468SNaohiro Aota 		zone_sectors = bdev_zone_sectors(bdev);
3013c9daa09SJohannes Thumshirn 	}
3023c9daa09SJohannes Thumshirn 
3033c9daa09SJohannes Thumshirn 	nr_sectors = bdev_nr_sectors(bdev);
3045b316468SNaohiro Aota 	/* Check if it's power of 2 (see is_power_of_2) */
3055b316468SNaohiro Aota 	ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
3065b316468SNaohiro Aota 	zone_info->zone_size = zone_sectors << SECTOR_SHIFT;
3075b316468SNaohiro Aota 	zone_info->zone_size_shift = ilog2(zone_info->zone_size);
308862931c7SNaohiro Aota 	zone_info->max_zone_append_size =
309862931c7SNaohiro Aota 		(u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
3105b316468SNaohiro Aota 	zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
3115b316468SNaohiro Aota 	if (!IS_ALIGNED(nr_sectors, zone_sectors))
3125b316468SNaohiro Aota 		zone_info->nr_zones++;
3135b316468SNaohiro Aota 
3145b316468SNaohiro Aota 	zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
3155b316468SNaohiro Aota 	if (!zone_info->seq_zones) {
3165b316468SNaohiro Aota 		ret = -ENOMEM;
3175b316468SNaohiro Aota 		goto out;
3185b316468SNaohiro Aota 	}
3195b316468SNaohiro Aota 
3205b316468SNaohiro Aota 	zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
3215b316468SNaohiro Aota 	if (!zone_info->empty_zones) {
3225b316468SNaohiro Aota 		ret = -ENOMEM;
3235b316468SNaohiro Aota 		goto out;
3245b316468SNaohiro Aota 	}
3255b316468SNaohiro Aota 
3265b316468SNaohiro Aota 	zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
3275b316468SNaohiro Aota 	if (!zones) {
3285b316468SNaohiro Aota 		ret = -ENOMEM;
3295b316468SNaohiro Aota 		goto out;
3305b316468SNaohiro Aota 	}
3315b316468SNaohiro Aota 
3325b316468SNaohiro Aota 	/* Get zones type */
3335b316468SNaohiro Aota 	while (sector < nr_sectors) {
3345b316468SNaohiro Aota 		nr_zones = BTRFS_REPORT_NR_ZONES;
3355b316468SNaohiro Aota 		ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
3365b316468SNaohiro Aota 					  &nr_zones);
3375b316468SNaohiro Aota 		if (ret)
3385b316468SNaohiro Aota 			goto out;
3395b316468SNaohiro Aota 
3405b316468SNaohiro Aota 		for (i = 0; i < nr_zones; i++) {
3415b316468SNaohiro Aota 			if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
3425b316468SNaohiro Aota 				__set_bit(nreported, zone_info->seq_zones);
3435b316468SNaohiro Aota 			if (zones[i].cond == BLK_ZONE_COND_EMPTY)
3445b316468SNaohiro Aota 				__set_bit(nreported, zone_info->empty_zones);
3455b316468SNaohiro Aota 			nreported++;
3465b316468SNaohiro Aota 		}
3475b316468SNaohiro Aota 		sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
3485b316468SNaohiro Aota 	}
3495b316468SNaohiro Aota 
3505b316468SNaohiro Aota 	if (nreported != zone_info->nr_zones) {
3515b316468SNaohiro Aota 		btrfs_err_in_rcu(device->fs_info,
3525b316468SNaohiro Aota 				 "inconsistent number of zones on %s (%u/%u)",
3535b316468SNaohiro Aota 				 rcu_str_deref(device->name), nreported,
3545b316468SNaohiro Aota 				 zone_info->nr_zones);
3555b316468SNaohiro Aota 		ret = -EIO;
3565b316468SNaohiro Aota 		goto out;
3575b316468SNaohiro Aota 	}
3585b316468SNaohiro Aota 
35912659251SNaohiro Aota 	/* Validate superblock log */
36012659251SNaohiro Aota 	nr_zones = BTRFS_NR_SB_LOG_ZONES;
36112659251SNaohiro Aota 	for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
36212659251SNaohiro Aota 		u32 sb_zone;
36312659251SNaohiro Aota 		u64 sb_wp;
36412659251SNaohiro Aota 		int sb_pos = BTRFS_NR_SB_LOG_ZONES * i;
36512659251SNaohiro Aota 
36612659251SNaohiro Aota 		sb_zone = sb_zone_number(zone_info->zone_size_shift, i);
36712659251SNaohiro Aota 		if (sb_zone + 1 >= zone_info->nr_zones)
36812659251SNaohiro Aota 			continue;
36912659251SNaohiro Aota 
37012659251SNaohiro Aota 		sector = sb_zone << (zone_info->zone_size_shift - SECTOR_SHIFT);
37112659251SNaohiro Aota 		ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT,
37212659251SNaohiro Aota 					  &zone_info->sb_zones[sb_pos],
37312659251SNaohiro Aota 					  &nr_zones);
37412659251SNaohiro Aota 		if (ret)
37512659251SNaohiro Aota 			goto out;
37612659251SNaohiro Aota 
37712659251SNaohiro Aota 		if (nr_zones != BTRFS_NR_SB_LOG_ZONES) {
37812659251SNaohiro Aota 			btrfs_err_in_rcu(device->fs_info,
37912659251SNaohiro Aota 	"zoned: failed to read super block log zone info at devid %llu zone %u",
38012659251SNaohiro Aota 					 device->devid, sb_zone);
38112659251SNaohiro Aota 			ret = -EUCLEAN;
38212659251SNaohiro Aota 			goto out;
38312659251SNaohiro Aota 		}
38412659251SNaohiro Aota 
38512659251SNaohiro Aota 		/*
38612659251SNaohiro Aota 		 * If zones[0] is conventional, always use the beggining of the
38712659251SNaohiro Aota 		 * zone to record superblock. No need to validate in that case.
38812659251SNaohiro Aota 		 */
38912659251SNaohiro Aota 		if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type ==
39012659251SNaohiro Aota 		    BLK_ZONE_TYPE_CONVENTIONAL)
39112659251SNaohiro Aota 			continue;
39212659251SNaohiro Aota 
39312659251SNaohiro Aota 		ret = sb_write_pointer(device->bdev,
39412659251SNaohiro Aota 				       &zone_info->sb_zones[sb_pos], &sb_wp);
39512659251SNaohiro Aota 		if (ret != -ENOENT && ret) {
39612659251SNaohiro Aota 			btrfs_err_in_rcu(device->fs_info,
39712659251SNaohiro Aota 			"zoned: super block log zone corrupted devid %llu zone %u",
39812659251SNaohiro Aota 					 device->devid, sb_zone);
39912659251SNaohiro Aota 			ret = -EUCLEAN;
40012659251SNaohiro Aota 			goto out;
40112659251SNaohiro Aota 		}
40212659251SNaohiro Aota 	}
40312659251SNaohiro Aota 
40412659251SNaohiro Aota 
4055b316468SNaohiro Aota 	kfree(zones);
4065b316468SNaohiro Aota 
4075b316468SNaohiro Aota 	device->zone_info = zone_info;
4085b316468SNaohiro Aota 
4093c9daa09SJohannes Thumshirn 	switch (bdev_zoned_model(bdev)) {
4103c9daa09SJohannes Thumshirn 	case BLK_ZONED_HM:
4113c9daa09SJohannes Thumshirn 		model = "host-managed zoned";
4123c9daa09SJohannes Thumshirn 		emulated = "";
4133c9daa09SJohannes Thumshirn 		break;
4143c9daa09SJohannes Thumshirn 	case BLK_ZONED_HA:
4153c9daa09SJohannes Thumshirn 		model = "host-aware zoned";
4163c9daa09SJohannes Thumshirn 		emulated = "";
4173c9daa09SJohannes Thumshirn 		break;
4183c9daa09SJohannes Thumshirn 	case BLK_ZONED_NONE:
4193c9daa09SJohannes Thumshirn 		model = "regular";
4203c9daa09SJohannes Thumshirn 		emulated = "emulated ";
4213c9daa09SJohannes Thumshirn 		break;
4223c9daa09SJohannes Thumshirn 	default:
4233c9daa09SJohannes Thumshirn 		/* Just in case */
4243c9daa09SJohannes Thumshirn 		btrfs_err_in_rcu(fs_info, "zoned: unsupported model %d on %s",
4253c9daa09SJohannes Thumshirn 				 bdev_zoned_model(bdev),
4263c9daa09SJohannes Thumshirn 				 rcu_str_deref(device->name));
4273c9daa09SJohannes Thumshirn 		ret = -EOPNOTSUPP;
4283c9daa09SJohannes Thumshirn 		goto out_free_zone_info;
4293c9daa09SJohannes Thumshirn 	}
4303c9daa09SJohannes Thumshirn 
4313c9daa09SJohannes Thumshirn 	btrfs_info_in_rcu(fs_info,
4323c9daa09SJohannes Thumshirn 		"%s block device %s, %u %szones of %llu bytes",
4333c9daa09SJohannes Thumshirn 		model, rcu_str_deref(device->name), zone_info->nr_zones,
4343c9daa09SJohannes Thumshirn 		emulated, zone_info->zone_size);
4355b316468SNaohiro Aota 
4365b316468SNaohiro Aota 	return 0;
4375b316468SNaohiro Aota 
4385b316468SNaohiro Aota out:
4395b316468SNaohiro Aota 	kfree(zones);
4403c9daa09SJohannes Thumshirn out_free_zone_info:
4415b316468SNaohiro Aota 	bitmap_free(zone_info->empty_zones);
4425b316468SNaohiro Aota 	bitmap_free(zone_info->seq_zones);
4435b316468SNaohiro Aota 	kfree(zone_info);
4443c9daa09SJohannes Thumshirn 	device->zone_info = NULL;
4455b316468SNaohiro Aota 
4465b316468SNaohiro Aota 	return ret;
4475b316468SNaohiro Aota }
4485b316468SNaohiro Aota 
4495b316468SNaohiro Aota void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
4505b316468SNaohiro Aota {
4515b316468SNaohiro Aota 	struct btrfs_zoned_device_info *zone_info = device->zone_info;
4525b316468SNaohiro Aota 
4535b316468SNaohiro Aota 	if (!zone_info)
4545b316468SNaohiro Aota 		return;
4555b316468SNaohiro Aota 
4565b316468SNaohiro Aota 	bitmap_free(zone_info->seq_zones);
4575b316468SNaohiro Aota 	bitmap_free(zone_info->empty_zones);
4585b316468SNaohiro Aota 	kfree(zone_info);
4595b316468SNaohiro Aota 	device->zone_info = NULL;
4605b316468SNaohiro Aota }
4615b316468SNaohiro Aota 
4625b316468SNaohiro Aota int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
4635b316468SNaohiro Aota 		       struct blk_zone *zone)
4645b316468SNaohiro Aota {
4655b316468SNaohiro Aota 	unsigned int nr_zones = 1;
4665b316468SNaohiro Aota 	int ret;
4675b316468SNaohiro Aota 
4685b316468SNaohiro Aota 	ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones);
4695b316468SNaohiro Aota 	if (ret != 0 || !nr_zones)
4705b316468SNaohiro Aota 		return ret ? ret : -EIO;
4715b316468SNaohiro Aota 
4725b316468SNaohiro Aota 	return 0;
4735b316468SNaohiro Aota }
474b70f5097SNaohiro Aota 
475b70f5097SNaohiro Aota int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
476b70f5097SNaohiro Aota {
477b70f5097SNaohiro Aota 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
478b70f5097SNaohiro Aota 	struct btrfs_device *device;
479b70f5097SNaohiro Aota 	u64 zoned_devices = 0;
480b70f5097SNaohiro Aota 	u64 nr_devices = 0;
481b70f5097SNaohiro Aota 	u64 zone_size = 0;
482862931c7SNaohiro Aota 	u64 max_zone_append_size = 0;
4833c9daa09SJohannes Thumshirn 	const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
484b70f5097SNaohiro Aota 	int ret = 0;
485b70f5097SNaohiro Aota 
486b70f5097SNaohiro Aota 	/* Count zoned devices */
487b70f5097SNaohiro Aota 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
488b70f5097SNaohiro Aota 		enum blk_zoned_model model;
489b70f5097SNaohiro Aota 
490b70f5097SNaohiro Aota 		if (!device->bdev)
491b70f5097SNaohiro Aota 			continue;
492b70f5097SNaohiro Aota 
493b70f5097SNaohiro Aota 		model = bdev_zoned_model(device->bdev);
4943c9daa09SJohannes Thumshirn 		/*
4953c9daa09SJohannes Thumshirn 		 * A Host-Managed zoned device must be used as a zoned device.
4963c9daa09SJohannes Thumshirn 		 * A Host-Aware zoned device and a non-zoned devices can be
4973c9daa09SJohannes Thumshirn 		 * treated as a zoned device, if ZONED flag is enabled in the
4983c9daa09SJohannes Thumshirn 		 * superblock.
4993c9daa09SJohannes Thumshirn 		 */
500b70f5097SNaohiro Aota 		if (model == BLK_ZONED_HM ||
5013c9daa09SJohannes Thumshirn 		    (model == BLK_ZONED_HA && incompat_zoned) ||
5023c9daa09SJohannes Thumshirn 		    (model == BLK_ZONED_NONE && incompat_zoned)) {
5033c9daa09SJohannes Thumshirn 			struct btrfs_zoned_device_info *zone_info =
5043c9daa09SJohannes Thumshirn 				device->zone_info;
505862931c7SNaohiro Aota 
506862931c7SNaohiro Aota 			zone_info = device->zone_info;
507b70f5097SNaohiro Aota 			zoned_devices++;
508b70f5097SNaohiro Aota 			if (!zone_size) {
509862931c7SNaohiro Aota 				zone_size = zone_info->zone_size;
510862931c7SNaohiro Aota 			} else if (zone_info->zone_size != zone_size) {
511b70f5097SNaohiro Aota 				btrfs_err(fs_info,
512b70f5097SNaohiro Aota 		"zoned: unequal block device zone sizes: have %llu found %llu",
513b70f5097SNaohiro Aota 					  device->zone_info->zone_size,
514b70f5097SNaohiro Aota 					  zone_size);
515b70f5097SNaohiro Aota 				ret = -EINVAL;
516b70f5097SNaohiro Aota 				goto out;
517b70f5097SNaohiro Aota 			}
518862931c7SNaohiro Aota 			if (!max_zone_append_size ||
519862931c7SNaohiro Aota 			    (zone_info->max_zone_append_size &&
520862931c7SNaohiro Aota 			     zone_info->max_zone_append_size < max_zone_append_size))
521862931c7SNaohiro Aota 				max_zone_append_size =
522862931c7SNaohiro Aota 					zone_info->max_zone_append_size;
523b70f5097SNaohiro Aota 		}
524b70f5097SNaohiro Aota 		nr_devices++;
525b70f5097SNaohiro Aota 	}
526b70f5097SNaohiro Aota 
527b70f5097SNaohiro Aota 	if (!zoned_devices && !incompat_zoned)
528b70f5097SNaohiro Aota 		goto out;
529b70f5097SNaohiro Aota 
530b70f5097SNaohiro Aota 	if (!zoned_devices && incompat_zoned) {
531b70f5097SNaohiro Aota 		/* No zoned block device found on ZONED filesystem */
532b70f5097SNaohiro Aota 		btrfs_err(fs_info,
533b70f5097SNaohiro Aota 			  "zoned: no zoned devices found on a zoned filesystem");
534b70f5097SNaohiro Aota 		ret = -EINVAL;
535b70f5097SNaohiro Aota 		goto out;
536b70f5097SNaohiro Aota 	}
537b70f5097SNaohiro Aota 
538b70f5097SNaohiro Aota 	if (zoned_devices && !incompat_zoned) {
539b70f5097SNaohiro Aota 		btrfs_err(fs_info,
540b70f5097SNaohiro Aota 			  "zoned: mode not enabled but zoned device found");
541b70f5097SNaohiro Aota 		ret = -EINVAL;
542b70f5097SNaohiro Aota 		goto out;
543b70f5097SNaohiro Aota 	}
544b70f5097SNaohiro Aota 
545b70f5097SNaohiro Aota 	if (zoned_devices != nr_devices) {
546b70f5097SNaohiro Aota 		btrfs_err(fs_info,
547b70f5097SNaohiro Aota 			  "zoned: cannot mix zoned and regular devices");
548b70f5097SNaohiro Aota 		ret = -EINVAL;
549b70f5097SNaohiro Aota 		goto out;
550b70f5097SNaohiro Aota 	}
551b70f5097SNaohiro Aota 
552b70f5097SNaohiro Aota 	/*
553b70f5097SNaohiro Aota 	 * stripe_size is always aligned to BTRFS_STRIPE_LEN in
554b70f5097SNaohiro Aota 	 * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
555b70f5097SNaohiro Aota 	 * check the alignment here.
556b70f5097SNaohiro Aota 	 */
557b70f5097SNaohiro Aota 	if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
558b70f5097SNaohiro Aota 		btrfs_err(fs_info,
559b70f5097SNaohiro Aota 			  "zoned: zone size %llu not aligned to stripe %u",
560b70f5097SNaohiro Aota 			  zone_size, BTRFS_STRIPE_LEN);
561b70f5097SNaohiro Aota 		ret = -EINVAL;
562b70f5097SNaohiro Aota 		goto out;
563b70f5097SNaohiro Aota 	}
564b70f5097SNaohiro Aota 
565a589dde0SNaohiro Aota 	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
566a589dde0SNaohiro Aota 		btrfs_err(fs_info, "zoned: mixed block groups not supported");
567a589dde0SNaohiro Aota 		ret = -EINVAL;
568a589dde0SNaohiro Aota 		goto out;
569a589dde0SNaohiro Aota 	}
570a589dde0SNaohiro Aota 
571b70f5097SNaohiro Aota 	fs_info->zone_size = zone_size;
572862931c7SNaohiro Aota 	fs_info->max_zone_append_size = max_zone_append_size;
5731cd6121fSNaohiro Aota 	fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
574b70f5097SNaohiro Aota 
575b53429baSJohannes Thumshirn 	/*
576b53429baSJohannes Thumshirn 	 * Check mount options here, because we might change fs_info->zoned
577b53429baSJohannes Thumshirn 	 * from fs_info->zone_size.
578b53429baSJohannes Thumshirn 	 */
579b53429baSJohannes Thumshirn 	ret = btrfs_check_mountopts_zoned(fs_info);
580b53429baSJohannes Thumshirn 	if (ret)
581b53429baSJohannes Thumshirn 		goto out;
582b53429baSJohannes Thumshirn 
583b70f5097SNaohiro Aota 	btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size);
584b70f5097SNaohiro Aota out:
585b70f5097SNaohiro Aota 	return ret;
586b70f5097SNaohiro Aota }
5875d1ab66cSNaohiro Aota 
5885d1ab66cSNaohiro Aota int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
5895d1ab66cSNaohiro Aota {
5905d1ab66cSNaohiro Aota 	if (!btrfs_is_zoned(info))
5915d1ab66cSNaohiro Aota 		return 0;
5925d1ab66cSNaohiro Aota 
5935d1ab66cSNaohiro Aota 	/*
5945d1ab66cSNaohiro Aota 	 * Space cache writing is not COWed. Disable that to avoid write errors
5955d1ab66cSNaohiro Aota 	 * in sequential zones.
5965d1ab66cSNaohiro Aota 	 */
5975d1ab66cSNaohiro Aota 	if (btrfs_test_opt(info, SPACE_CACHE)) {
5985d1ab66cSNaohiro Aota 		btrfs_err(info, "zoned: space cache v1 is not supported");
5995d1ab66cSNaohiro Aota 		return -EINVAL;
6005d1ab66cSNaohiro Aota 	}
6015d1ab66cSNaohiro Aota 
602d206e9c9SNaohiro Aota 	if (btrfs_test_opt(info, NODATACOW)) {
603d206e9c9SNaohiro Aota 		btrfs_err(info, "zoned: NODATACOW not supported");
604d206e9c9SNaohiro Aota 		return -EINVAL;
605d206e9c9SNaohiro Aota 	}
606d206e9c9SNaohiro Aota 
6075d1ab66cSNaohiro Aota 	return 0;
6085d1ab66cSNaohiro Aota }
60912659251SNaohiro Aota 
61012659251SNaohiro Aota static int sb_log_location(struct block_device *bdev, struct blk_zone *zones,
61112659251SNaohiro Aota 			   int rw, u64 *bytenr_ret)
61212659251SNaohiro Aota {
61312659251SNaohiro Aota 	u64 wp;
61412659251SNaohiro Aota 	int ret;
61512659251SNaohiro Aota 
61612659251SNaohiro Aota 	if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) {
61712659251SNaohiro Aota 		*bytenr_ret = zones[0].start << SECTOR_SHIFT;
61812659251SNaohiro Aota 		return 0;
61912659251SNaohiro Aota 	}
62012659251SNaohiro Aota 
62112659251SNaohiro Aota 	ret = sb_write_pointer(bdev, zones, &wp);
62212659251SNaohiro Aota 	if (ret != -ENOENT && ret < 0)
62312659251SNaohiro Aota 		return ret;
62412659251SNaohiro Aota 
62512659251SNaohiro Aota 	if (rw == WRITE) {
62612659251SNaohiro Aota 		struct blk_zone *reset = NULL;
62712659251SNaohiro Aota 
62812659251SNaohiro Aota 		if (wp == zones[0].start << SECTOR_SHIFT)
62912659251SNaohiro Aota 			reset = &zones[0];
63012659251SNaohiro Aota 		else if (wp == zones[1].start << SECTOR_SHIFT)
63112659251SNaohiro Aota 			reset = &zones[1];
63212659251SNaohiro Aota 
63312659251SNaohiro Aota 		if (reset && reset->cond != BLK_ZONE_COND_EMPTY) {
63412659251SNaohiro Aota 			ASSERT(reset->cond == BLK_ZONE_COND_FULL);
63512659251SNaohiro Aota 
63612659251SNaohiro Aota 			ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
63712659251SNaohiro Aota 					       reset->start, reset->len,
63812659251SNaohiro Aota 					       GFP_NOFS);
63912659251SNaohiro Aota 			if (ret)
64012659251SNaohiro Aota 				return ret;
64112659251SNaohiro Aota 
64212659251SNaohiro Aota 			reset->cond = BLK_ZONE_COND_EMPTY;
64312659251SNaohiro Aota 			reset->wp = reset->start;
64412659251SNaohiro Aota 		}
64512659251SNaohiro Aota 	} else if (ret != -ENOENT) {
64612659251SNaohiro Aota 		/* For READ, we want the precious one */
64712659251SNaohiro Aota 		if (wp == zones[0].start << SECTOR_SHIFT)
64812659251SNaohiro Aota 			wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT;
64912659251SNaohiro Aota 		wp -= BTRFS_SUPER_INFO_SIZE;
65012659251SNaohiro Aota 	}
65112659251SNaohiro Aota 
65212659251SNaohiro Aota 	*bytenr_ret = wp;
65312659251SNaohiro Aota 	return 0;
65412659251SNaohiro Aota 
65512659251SNaohiro Aota }
65612659251SNaohiro Aota 
65712659251SNaohiro Aota int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
65812659251SNaohiro Aota 			       u64 *bytenr_ret)
65912659251SNaohiro Aota {
66012659251SNaohiro Aota 	struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES];
661*d734492aSNaohiro Aota 	sector_t zone_sectors;
66212659251SNaohiro Aota 	u32 sb_zone;
66312659251SNaohiro Aota 	int ret;
66412659251SNaohiro Aota 	u8 zone_sectors_shift;
66512659251SNaohiro Aota 	sector_t nr_sectors;
66612659251SNaohiro Aota 	u32 nr_zones;
66712659251SNaohiro Aota 
66812659251SNaohiro Aota 	if (!bdev_is_zoned(bdev)) {
66912659251SNaohiro Aota 		*bytenr_ret = btrfs_sb_offset(mirror);
67012659251SNaohiro Aota 		return 0;
67112659251SNaohiro Aota 	}
67212659251SNaohiro Aota 
67312659251SNaohiro Aota 	ASSERT(rw == READ || rw == WRITE);
67412659251SNaohiro Aota 
67512659251SNaohiro Aota 	zone_sectors = bdev_zone_sectors(bdev);
67612659251SNaohiro Aota 	if (!is_power_of_2(zone_sectors))
67712659251SNaohiro Aota 		return -EINVAL;
67812659251SNaohiro Aota 	zone_sectors_shift = ilog2(zone_sectors);
679ac7ac461SLinus Torvalds 	nr_sectors = bdev_nr_sectors(bdev);
68012659251SNaohiro Aota 	nr_zones = nr_sectors >> zone_sectors_shift;
68112659251SNaohiro Aota 
68212659251SNaohiro Aota 	sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);
68312659251SNaohiro Aota 	if (sb_zone + 1 >= nr_zones)
68412659251SNaohiro Aota 		return -ENOENT;
68512659251SNaohiro Aota 
68612659251SNaohiro Aota 	ret = blkdev_report_zones(bdev, sb_zone << zone_sectors_shift,
68712659251SNaohiro Aota 				  BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb,
68812659251SNaohiro Aota 				  zones);
68912659251SNaohiro Aota 	if (ret < 0)
69012659251SNaohiro Aota 		return ret;
69112659251SNaohiro Aota 	if (ret != BTRFS_NR_SB_LOG_ZONES)
69212659251SNaohiro Aota 		return -EIO;
69312659251SNaohiro Aota 
69412659251SNaohiro Aota 	return sb_log_location(bdev, zones, rw, bytenr_ret);
69512659251SNaohiro Aota }
69612659251SNaohiro Aota 
69712659251SNaohiro Aota int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw,
69812659251SNaohiro Aota 			  u64 *bytenr_ret)
69912659251SNaohiro Aota {
70012659251SNaohiro Aota 	struct btrfs_zoned_device_info *zinfo = device->zone_info;
70112659251SNaohiro Aota 	u32 zone_num;
70212659251SNaohiro Aota 
703d6639b35SNaohiro Aota 	/*
704d6639b35SNaohiro Aota 	 * For a zoned filesystem on a non-zoned block device, use the same
705d6639b35SNaohiro Aota 	 * super block locations as regular filesystem. Doing so, the super
706d6639b35SNaohiro Aota 	 * block can always be retrieved and the zoned flag of the volume
707d6639b35SNaohiro Aota 	 * detected from the super block information.
708d6639b35SNaohiro Aota 	 */
709d6639b35SNaohiro Aota 	if (!bdev_is_zoned(device->bdev)) {
71012659251SNaohiro Aota 		*bytenr_ret = btrfs_sb_offset(mirror);
71112659251SNaohiro Aota 		return 0;
71212659251SNaohiro Aota 	}
71312659251SNaohiro Aota 
71412659251SNaohiro Aota 	zone_num = sb_zone_number(zinfo->zone_size_shift, mirror);
71512659251SNaohiro Aota 	if (zone_num + 1 >= zinfo->nr_zones)
71612659251SNaohiro Aota 		return -ENOENT;
71712659251SNaohiro Aota 
71812659251SNaohiro Aota 	return sb_log_location(device->bdev,
71912659251SNaohiro Aota 			       &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror],
72012659251SNaohiro Aota 			       rw, bytenr_ret);
72112659251SNaohiro Aota }
72212659251SNaohiro Aota 
72312659251SNaohiro Aota static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo,
72412659251SNaohiro Aota 				  int mirror)
72512659251SNaohiro Aota {
72612659251SNaohiro Aota 	u32 zone_num;
72712659251SNaohiro Aota 
72812659251SNaohiro Aota 	if (!zinfo)
72912659251SNaohiro Aota 		return false;
73012659251SNaohiro Aota 
73112659251SNaohiro Aota 	zone_num = sb_zone_number(zinfo->zone_size_shift, mirror);
73212659251SNaohiro Aota 	if (zone_num + 1 >= zinfo->nr_zones)
73312659251SNaohiro Aota 		return false;
73412659251SNaohiro Aota 
73512659251SNaohiro Aota 	if (!test_bit(zone_num, zinfo->seq_zones))
73612659251SNaohiro Aota 		return false;
73712659251SNaohiro Aota 
73812659251SNaohiro Aota 	return true;
73912659251SNaohiro Aota }
74012659251SNaohiro Aota 
74112659251SNaohiro Aota void btrfs_advance_sb_log(struct btrfs_device *device, int mirror)
74212659251SNaohiro Aota {
74312659251SNaohiro Aota 	struct btrfs_zoned_device_info *zinfo = device->zone_info;
74412659251SNaohiro Aota 	struct blk_zone *zone;
74512659251SNaohiro Aota 
74612659251SNaohiro Aota 	if (!is_sb_log_zone(zinfo, mirror))
74712659251SNaohiro Aota 		return;
74812659251SNaohiro Aota 
74912659251SNaohiro Aota 	zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror];
75012659251SNaohiro Aota 	if (zone->cond != BLK_ZONE_COND_FULL) {
75112659251SNaohiro Aota 		if (zone->cond == BLK_ZONE_COND_EMPTY)
75212659251SNaohiro Aota 			zone->cond = BLK_ZONE_COND_IMP_OPEN;
75312659251SNaohiro Aota 
75412659251SNaohiro Aota 		zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
75512659251SNaohiro Aota 
75612659251SNaohiro Aota 		if (zone->wp == zone->start + zone->len)
75712659251SNaohiro Aota 			zone->cond = BLK_ZONE_COND_FULL;
75812659251SNaohiro Aota 
75912659251SNaohiro Aota 		return;
76012659251SNaohiro Aota 	}
76112659251SNaohiro Aota 
76212659251SNaohiro Aota 	zone++;
76312659251SNaohiro Aota 	ASSERT(zone->cond != BLK_ZONE_COND_FULL);
76412659251SNaohiro Aota 	if (zone->cond == BLK_ZONE_COND_EMPTY)
76512659251SNaohiro Aota 		zone->cond = BLK_ZONE_COND_IMP_OPEN;
76612659251SNaohiro Aota 
76712659251SNaohiro Aota 	zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT);
76812659251SNaohiro Aota 
76912659251SNaohiro Aota 	if (zone->wp == zone->start + zone->len)
77012659251SNaohiro Aota 		zone->cond = BLK_ZONE_COND_FULL;
77112659251SNaohiro Aota }
77212659251SNaohiro Aota 
77312659251SNaohiro Aota int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror)
77412659251SNaohiro Aota {
77512659251SNaohiro Aota 	sector_t zone_sectors;
77612659251SNaohiro Aota 	sector_t nr_sectors;
77712659251SNaohiro Aota 	u8 zone_sectors_shift;
77812659251SNaohiro Aota 	u32 sb_zone;
77912659251SNaohiro Aota 	u32 nr_zones;
78012659251SNaohiro Aota 
78112659251SNaohiro Aota 	zone_sectors = bdev_zone_sectors(bdev);
78212659251SNaohiro Aota 	zone_sectors_shift = ilog2(zone_sectors);
783ac7ac461SLinus Torvalds 	nr_sectors = bdev_nr_sectors(bdev);
78412659251SNaohiro Aota 	nr_zones = nr_sectors >> zone_sectors_shift;
78512659251SNaohiro Aota 
78612659251SNaohiro Aota 	sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror);
78712659251SNaohiro Aota 	if (sb_zone + 1 >= nr_zones)
78812659251SNaohiro Aota 		return -ENOENT;
78912659251SNaohiro Aota 
79012659251SNaohiro Aota 	return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET,
79112659251SNaohiro Aota 				sb_zone << zone_sectors_shift,
79212659251SNaohiro Aota 				zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS);
79312659251SNaohiro Aota }
7941cd6121fSNaohiro Aota 
7951cd6121fSNaohiro Aota /**
7961cd6121fSNaohiro Aota  * btrfs_find_allocatable_zones - find allocatable zones within a given region
7971cd6121fSNaohiro Aota  *
7981cd6121fSNaohiro Aota  * @device:	the device to allocate a region on
7991cd6121fSNaohiro Aota  * @hole_start: the position of the hole to allocate the region
8001cd6121fSNaohiro Aota  * @num_bytes:	size of wanted region
8011cd6121fSNaohiro Aota  * @hole_end:	the end of the hole
8021cd6121fSNaohiro Aota  * @return:	position of allocatable zones
8031cd6121fSNaohiro Aota  *
8041cd6121fSNaohiro Aota  * Allocatable region should not contain any superblock locations.
8051cd6121fSNaohiro Aota  */
8061cd6121fSNaohiro Aota u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start,
8071cd6121fSNaohiro Aota 				 u64 hole_end, u64 num_bytes)
8081cd6121fSNaohiro Aota {
8091cd6121fSNaohiro Aota 	struct btrfs_zoned_device_info *zinfo = device->zone_info;
8101cd6121fSNaohiro Aota 	const u8 shift = zinfo->zone_size_shift;
8111cd6121fSNaohiro Aota 	u64 nzones = num_bytes >> shift;
8121cd6121fSNaohiro Aota 	u64 pos = hole_start;
8131cd6121fSNaohiro Aota 	u64 begin, end;
8141cd6121fSNaohiro Aota 	bool have_sb;
8151cd6121fSNaohiro Aota 	int i;
8161cd6121fSNaohiro Aota 
8171cd6121fSNaohiro Aota 	ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size));
8181cd6121fSNaohiro Aota 	ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size));
8191cd6121fSNaohiro Aota 
8201cd6121fSNaohiro Aota 	while (pos < hole_end) {
8211cd6121fSNaohiro Aota 		begin = pos >> shift;
8221cd6121fSNaohiro Aota 		end = begin + nzones;
8231cd6121fSNaohiro Aota 
8241cd6121fSNaohiro Aota 		if (end > zinfo->nr_zones)
8251cd6121fSNaohiro Aota 			return hole_end;
8261cd6121fSNaohiro Aota 
8271cd6121fSNaohiro Aota 		/* Check if zones in the region are all empty */
8281cd6121fSNaohiro Aota 		if (btrfs_dev_is_sequential(device, pos) &&
8291cd6121fSNaohiro Aota 		    find_next_zero_bit(zinfo->empty_zones, end, begin) != end) {
8301cd6121fSNaohiro Aota 			pos += zinfo->zone_size;
8311cd6121fSNaohiro Aota 			continue;
8321cd6121fSNaohiro Aota 		}
8331cd6121fSNaohiro Aota 
8341cd6121fSNaohiro Aota 		have_sb = false;
8351cd6121fSNaohiro Aota 		for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
8361cd6121fSNaohiro Aota 			u32 sb_zone;
8371cd6121fSNaohiro Aota 			u64 sb_pos;
8381cd6121fSNaohiro Aota 
8391cd6121fSNaohiro Aota 			sb_zone = sb_zone_number(shift, i);
8401cd6121fSNaohiro Aota 			if (!(end <= sb_zone ||
8411cd6121fSNaohiro Aota 			      sb_zone + BTRFS_NR_SB_LOG_ZONES <= begin)) {
8421cd6121fSNaohiro Aota 				have_sb = true;
8431cd6121fSNaohiro Aota 				pos = ((u64)sb_zone + BTRFS_NR_SB_LOG_ZONES) << shift;
8441cd6121fSNaohiro Aota 				break;
8451cd6121fSNaohiro Aota 			}
8461cd6121fSNaohiro Aota 
8471cd6121fSNaohiro Aota 			/* We also need to exclude regular superblock positions */
8481cd6121fSNaohiro Aota 			sb_pos = btrfs_sb_offset(i);
8491cd6121fSNaohiro Aota 			if (!(pos + num_bytes <= sb_pos ||
8501cd6121fSNaohiro Aota 			      sb_pos + BTRFS_SUPER_INFO_SIZE <= pos)) {
8511cd6121fSNaohiro Aota 				have_sb = true;
8521cd6121fSNaohiro Aota 				pos = ALIGN(sb_pos + BTRFS_SUPER_INFO_SIZE,
8531cd6121fSNaohiro Aota 					    zinfo->zone_size);
8541cd6121fSNaohiro Aota 				break;
8551cd6121fSNaohiro Aota 			}
8561cd6121fSNaohiro Aota 		}
8571cd6121fSNaohiro Aota 		if (!have_sb)
8581cd6121fSNaohiro Aota 			break;
8591cd6121fSNaohiro Aota 	}
8601cd6121fSNaohiro Aota 
8611cd6121fSNaohiro Aota 	return pos;
8621cd6121fSNaohiro Aota }
8631cd6121fSNaohiro Aota 
8641cd6121fSNaohiro Aota int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical,
8651cd6121fSNaohiro Aota 			    u64 length, u64 *bytes)
8661cd6121fSNaohiro Aota {
8671cd6121fSNaohiro Aota 	int ret;
8681cd6121fSNaohiro Aota 
8691cd6121fSNaohiro Aota 	*bytes = 0;
8701cd6121fSNaohiro Aota 	ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET,
8711cd6121fSNaohiro Aota 			       physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT,
8721cd6121fSNaohiro Aota 			       GFP_NOFS);
8731cd6121fSNaohiro Aota 	if (ret)
8741cd6121fSNaohiro Aota 		return ret;
8751cd6121fSNaohiro Aota 
8761cd6121fSNaohiro Aota 	*bytes = length;
8771cd6121fSNaohiro Aota 	while (length) {
8781cd6121fSNaohiro Aota 		btrfs_dev_set_zone_empty(device, physical);
8791cd6121fSNaohiro Aota 		physical += device->zone_info->zone_size;
8801cd6121fSNaohiro Aota 		length -= device->zone_info->zone_size;
8811cd6121fSNaohiro Aota 	}
8821cd6121fSNaohiro Aota 
8831cd6121fSNaohiro Aota 	return 0;
8841cd6121fSNaohiro Aota }
8851cd6121fSNaohiro Aota 
8861cd6121fSNaohiro Aota int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size)
8871cd6121fSNaohiro Aota {
8881cd6121fSNaohiro Aota 	struct btrfs_zoned_device_info *zinfo = device->zone_info;
8891cd6121fSNaohiro Aota 	const u8 shift = zinfo->zone_size_shift;
8901cd6121fSNaohiro Aota 	unsigned long begin = start >> shift;
8911cd6121fSNaohiro Aota 	unsigned long end = (start + size) >> shift;
8921cd6121fSNaohiro Aota 	u64 pos;
8931cd6121fSNaohiro Aota 	int ret;
8941cd6121fSNaohiro Aota 
8951cd6121fSNaohiro Aota 	ASSERT(IS_ALIGNED(start, zinfo->zone_size));
8961cd6121fSNaohiro Aota 	ASSERT(IS_ALIGNED(size, zinfo->zone_size));
8971cd6121fSNaohiro Aota 
8981cd6121fSNaohiro Aota 	if (end > zinfo->nr_zones)
8991cd6121fSNaohiro Aota 		return -ERANGE;
9001cd6121fSNaohiro Aota 
9011cd6121fSNaohiro Aota 	/* All the zones are conventional */
9021cd6121fSNaohiro Aota 	if (find_next_bit(zinfo->seq_zones, begin, end) == end)
9031cd6121fSNaohiro Aota 		return 0;
9041cd6121fSNaohiro Aota 
9051cd6121fSNaohiro Aota 	/* All the zones are sequential and empty */
9061cd6121fSNaohiro Aota 	if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end &&
9071cd6121fSNaohiro Aota 	    find_next_zero_bit(zinfo->empty_zones, begin, end) == end)
9081cd6121fSNaohiro Aota 		return 0;
9091cd6121fSNaohiro Aota 
9101cd6121fSNaohiro Aota 	for (pos = start; pos < start + size; pos += zinfo->zone_size) {
9111cd6121fSNaohiro Aota 		u64 reset_bytes;
9121cd6121fSNaohiro Aota 
9131cd6121fSNaohiro Aota 		if (!btrfs_dev_is_sequential(device, pos) ||
9141cd6121fSNaohiro Aota 		    btrfs_dev_is_empty_zone(device, pos))
9151cd6121fSNaohiro Aota 			continue;
9161cd6121fSNaohiro Aota 
9171cd6121fSNaohiro Aota 		/* Free regions should be empty */
9181cd6121fSNaohiro Aota 		btrfs_warn_in_rcu(
9191cd6121fSNaohiro Aota 			device->fs_info,
9201cd6121fSNaohiro Aota 		"zoned: resetting device %s (devid %llu) zone %llu for allocation",
9211cd6121fSNaohiro Aota 			rcu_str_deref(device->name), device->devid, pos >> shift);
9221cd6121fSNaohiro Aota 		WARN_ON_ONCE(1);
9231cd6121fSNaohiro Aota 
9241cd6121fSNaohiro Aota 		ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size,
9251cd6121fSNaohiro Aota 					      &reset_bytes);
9261cd6121fSNaohiro Aota 		if (ret)
9271cd6121fSNaohiro Aota 			return ret;
9281cd6121fSNaohiro Aota 	}
9291cd6121fSNaohiro Aota 
9301cd6121fSNaohiro Aota 	return 0;
9311cd6121fSNaohiro Aota }
93208e11a3dSNaohiro Aota 
933a94794d5SNaohiro Aota /*
934a94794d5SNaohiro Aota  * Calculate an allocation pointer from the extent allocation information
935a94794d5SNaohiro Aota  * for a block group consist of conventional zones. It is pointed to the
936a94794d5SNaohiro Aota  * end of the highest addressed extent in the block group as an allocation
937a94794d5SNaohiro Aota  * offset.
938a94794d5SNaohiro Aota  */
939a94794d5SNaohiro Aota static int calculate_alloc_pointer(struct btrfs_block_group *cache,
940a94794d5SNaohiro Aota 				   u64 *offset_ret)
941a94794d5SNaohiro Aota {
942a94794d5SNaohiro Aota 	struct btrfs_fs_info *fs_info = cache->fs_info;
943a94794d5SNaohiro Aota 	struct btrfs_root *root = fs_info->extent_root;
944a94794d5SNaohiro Aota 	struct btrfs_path *path;
945a94794d5SNaohiro Aota 	struct btrfs_key key;
946a94794d5SNaohiro Aota 	struct btrfs_key found_key;
947a94794d5SNaohiro Aota 	int ret;
948a94794d5SNaohiro Aota 	u64 length;
949a94794d5SNaohiro Aota 
950a94794d5SNaohiro Aota 	path = btrfs_alloc_path();
951a94794d5SNaohiro Aota 	if (!path)
952a94794d5SNaohiro Aota 		return -ENOMEM;
953a94794d5SNaohiro Aota 
954a94794d5SNaohiro Aota 	key.objectid = cache->start + cache->length;
955a94794d5SNaohiro Aota 	key.type = 0;
956a94794d5SNaohiro Aota 	key.offset = 0;
957a94794d5SNaohiro Aota 
958a94794d5SNaohiro Aota 	ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
959a94794d5SNaohiro Aota 	/* We should not find the exact match */
960a94794d5SNaohiro Aota 	if (!ret)
961a94794d5SNaohiro Aota 		ret = -EUCLEAN;
962a94794d5SNaohiro Aota 	if (ret < 0)
963a94794d5SNaohiro Aota 		goto out;
964a94794d5SNaohiro Aota 
965a94794d5SNaohiro Aota 	ret = btrfs_previous_extent_item(root, path, cache->start);
966a94794d5SNaohiro Aota 	if (ret) {
967a94794d5SNaohiro Aota 		if (ret == 1) {
968a94794d5SNaohiro Aota 			ret = 0;
969a94794d5SNaohiro Aota 			*offset_ret = 0;
970a94794d5SNaohiro Aota 		}
971a94794d5SNaohiro Aota 		goto out;
972a94794d5SNaohiro Aota 	}
973a94794d5SNaohiro Aota 
974a94794d5SNaohiro Aota 	btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]);
975a94794d5SNaohiro Aota 
976a94794d5SNaohiro Aota 	if (found_key.type == BTRFS_EXTENT_ITEM_KEY)
977a94794d5SNaohiro Aota 		length = found_key.offset;
978a94794d5SNaohiro Aota 	else
979a94794d5SNaohiro Aota 		length = fs_info->nodesize;
980a94794d5SNaohiro Aota 
981a94794d5SNaohiro Aota 	if (!(found_key.objectid >= cache->start &&
982a94794d5SNaohiro Aota 	       found_key.objectid + length <= cache->start + cache->length)) {
983a94794d5SNaohiro Aota 		ret = -EUCLEAN;
984a94794d5SNaohiro Aota 		goto out;
985a94794d5SNaohiro Aota 	}
986a94794d5SNaohiro Aota 	*offset_ret = found_key.objectid + length - cache->start;
987a94794d5SNaohiro Aota 	ret = 0;
988a94794d5SNaohiro Aota 
989a94794d5SNaohiro Aota out:
990a94794d5SNaohiro Aota 	btrfs_free_path(path);
991a94794d5SNaohiro Aota 	return ret;
992a94794d5SNaohiro Aota }
993a94794d5SNaohiro Aota 
994a94794d5SNaohiro Aota int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
99508e11a3dSNaohiro Aota {
99608e11a3dSNaohiro Aota 	struct btrfs_fs_info *fs_info = cache->fs_info;
99708e11a3dSNaohiro Aota 	struct extent_map_tree *em_tree = &fs_info->mapping_tree;
99808e11a3dSNaohiro Aota 	struct extent_map *em;
99908e11a3dSNaohiro Aota 	struct map_lookup *map;
100008e11a3dSNaohiro Aota 	struct btrfs_device *device;
100108e11a3dSNaohiro Aota 	u64 logical = cache->start;
100208e11a3dSNaohiro Aota 	u64 length = cache->length;
100308e11a3dSNaohiro Aota 	u64 physical = 0;
100408e11a3dSNaohiro Aota 	int ret;
100508e11a3dSNaohiro Aota 	int i;
100608e11a3dSNaohiro Aota 	unsigned int nofs_flag;
100708e11a3dSNaohiro Aota 	u64 *alloc_offsets = NULL;
1008a94794d5SNaohiro Aota 	u64 last_alloc = 0;
100908e11a3dSNaohiro Aota 	u32 num_sequential = 0, num_conventional = 0;
101008e11a3dSNaohiro Aota 
101108e11a3dSNaohiro Aota 	if (!btrfs_is_zoned(fs_info))
101208e11a3dSNaohiro Aota 		return 0;
101308e11a3dSNaohiro Aota 
101408e11a3dSNaohiro Aota 	/* Sanity check */
101508e11a3dSNaohiro Aota 	if (!IS_ALIGNED(length, fs_info->zone_size)) {
101608e11a3dSNaohiro Aota 		btrfs_err(fs_info,
101708e11a3dSNaohiro Aota 		"zoned: block group %llu len %llu unaligned to zone size %llu",
101808e11a3dSNaohiro Aota 			  logical, length, fs_info->zone_size);
101908e11a3dSNaohiro Aota 		return -EIO;
102008e11a3dSNaohiro Aota 	}
102108e11a3dSNaohiro Aota 
102208e11a3dSNaohiro Aota 	/* Get the chunk mapping */
102308e11a3dSNaohiro Aota 	read_lock(&em_tree->lock);
102408e11a3dSNaohiro Aota 	em = lookup_extent_mapping(em_tree, logical, length);
102508e11a3dSNaohiro Aota 	read_unlock(&em_tree->lock);
102608e11a3dSNaohiro Aota 
102708e11a3dSNaohiro Aota 	if (!em)
102808e11a3dSNaohiro Aota 		return -EINVAL;
102908e11a3dSNaohiro Aota 
103008e11a3dSNaohiro Aota 	map = em->map_lookup;
103108e11a3dSNaohiro Aota 
103208e11a3dSNaohiro Aota 	alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS);
103308e11a3dSNaohiro Aota 	if (!alloc_offsets) {
103408e11a3dSNaohiro Aota 		free_extent_map(em);
103508e11a3dSNaohiro Aota 		return -ENOMEM;
103608e11a3dSNaohiro Aota 	}
103708e11a3dSNaohiro Aota 
103808e11a3dSNaohiro Aota 	for (i = 0; i < map->num_stripes; i++) {
103908e11a3dSNaohiro Aota 		bool is_sequential;
104008e11a3dSNaohiro Aota 		struct blk_zone zone;
10416143c23cSNaohiro Aota 		struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
10426143c23cSNaohiro Aota 		int dev_replace_is_ongoing = 0;
104308e11a3dSNaohiro Aota 
104408e11a3dSNaohiro Aota 		device = map->stripes[i].dev;
104508e11a3dSNaohiro Aota 		physical = map->stripes[i].physical;
104608e11a3dSNaohiro Aota 
104708e11a3dSNaohiro Aota 		if (device->bdev == NULL) {
104808e11a3dSNaohiro Aota 			alloc_offsets[i] = WP_MISSING_DEV;
104908e11a3dSNaohiro Aota 			continue;
105008e11a3dSNaohiro Aota 		}
105108e11a3dSNaohiro Aota 
105208e11a3dSNaohiro Aota 		is_sequential = btrfs_dev_is_sequential(device, physical);
105308e11a3dSNaohiro Aota 		if (is_sequential)
105408e11a3dSNaohiro Aota 			num_sequential++;
105508e11a3dSNaohiro Aota 		else
105608e11a3dSNaohiro Aota 			num_conventional++;
105708e11a3dSNaohiro Aota 
105808e11a3dSNaohiro Aota 		if (!is_sequential) {
105908e11a3dSNaohiro Aota 			alloc_offsets[i] = WP_CONVENTIONAL;
106008e11a3dSNaohiro Aota 			continue;
106108e11a3dSNaohiro Aota 		}
106208e11a3dSNaohiro Aota 
106308e11a3dSNaohiro Aota 		/*
106408e11a3dSNaohiro Aota 		 * This zone will be used for allocation, so mark this zone
106508e11a3dSNaohiro Aota 		 * non-empty.
106608e11a3dSNaohiro Aota 		 */
106708e11a3dSNaohiro Aota 		btrfs_dev_clear_zone_empty(device, physical);
106808e11a3dSNaohiro Aota 
10696143c23cSNaohiro Aota 		down_read(&dev_replace->rwsem);
10706143c23cSNaohiro Aota 		dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
10716143c23cSNaohiro Aota 		if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL)
10726143c23cSNaohiro Aota 			btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical);
10736143c23cSNaohiro Aota 		up_read(&dev_replace->rwsem);
10746143c23cSNaohiro Aota 
107508e11a3dSNaohiro Aota 		/*
107608e11a3dSNaohiro Aota 		 * The group is mapped to a sequential zone. Get the zone write
107708e11a3dSNaohiro Aota 		 * pointer to determine the allocation offset within the zone.
107808e11a3dSNaohiro Aota 		 */
107908e11a3dSNaohiro Aota 		WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size));
108008e11a3dSNaohiro Aota 		nofs_flag = memalloc_nofs_save();
108108e11a3dSNaohiro Aota 		ret = btrfs_get_dev_zone(device, physical, &zone);
108208e11a3dSNaohiro Aota 		memalloc_nofs_restore(nofs_flag);
108308e11a3dSNaohiro Aota 		if (ret == -EIO || ret == -EOPNOTSUPP) {
108408e11a3dSNaohiro Aota 			ret = 0;
108508e11a3dSNaohiro Aota 			alloc_offsets[i] = WP_MISSING_DEV;
108608e11a3dSNaohiro Aota 			continue;
108708e11a3dSNaohiro Aota 		} else if (ret) {
108808e11a3dSNaohiro Aota 			goto out;
108908e11a3dSNaohiro Aota 		}
109008e11a3dSNaohiro Aota 
109108e11a3dSNaohiro Aota 		switch (zone.cond) {
109208e11a3dSNaohiro Aota 		case BLK_ZONE_COND_OFFLINE:
109308e11a3dSNaohiro Aota 		case BLK_ZONE_COND_READONLY:
109408e11a3dSNaohiro Aota 			btrfs_err(fs_info,
109508e11a3dSNaohiro Aota 		"zoned: offline/readonly zone %llu on device %s (devid %llu)",
109608e11a3dSNaohiro Aota 				  physical >> device->zone_info->zone_size_shift,
109708e11a3dSNaohiro Aota 				  rcu_str_deref(device->name), device->devid);
109808e11a3dSNaohiro Aota 			alloc_offsets[i] = WP_MISSING_DEV;
109908e11a3dSNaohiro Aota 			break;
110008e11a3dSNaohiro Aota 		case BLK_ZONE_COND_EMPTY:
110108e11a3dSNaohiro Aota 			alloc_offsets[i] = 0;
110208e11a3dSNaohiro Aota 			break;
110308e11a3dSNaohiro Aota 		case BLK_ZONE_COND_FULL:
110408e11a3dSNaohiro Aota 			alloc_offsets[i] = fs_info->zone_size;
110508e11a3dSNaohiro Aota 			break;
110608e11a3dSNaohiro Aota 		default:
110708e11a3dSNaohiro Aota 			/* Partially used zone */
110808e11a3dSNaohiro Aota 			alloc_offsets[i] =
110908e11a3dSNaohiro Aota 					((zone.wp - zone.start) << SECTOR_SHIFT);
111008e11a3dSNaohiro Aota 			break;
111108e11a3dSNaohiro Aota 		}
111208e11a3dSNaohiro Aota 	}
111308e11a3dSNaohiro Aota 
111408f45559SJohannes Thumshirn 	if (num_sequential > 0)
111508f45559SJohannes Thumshirn 		cache->seq_zone = true;
111608f45559SJohannes Thumshirn 
111708e11a3dSNaohiro Aota 	if (num_conventional > 0) {
111808e11a3dSNaohiro Aota 		/*
1119a94794d5SNaohiro Aota 		 * Avoid calling calculate_alloc_pointer() for new BG. It
1120a94794d5SNaohiro Aota 		 * is no use for new BG. It must be always 0.
1121a94794d5SNaohiro Aota 		 *
1122a94794d5SNaohiro Aota 		 * Also, we have a lock chain of extent buffer lock ->
1123a94794d5SNaohiro Aota 		 * chunk mutex.  For new BG, this function is called from
1124a94794d5SNaohiro Aota 		 * btrfs_make_block_group() which is already taking the
1125a94794d5SNaohiro Aota 		 * chunk mutex. Thus, we cannot call
1126a94794d5SNaohiro Aota 		 * calculate_alloc_pointer() which takes extent buffer
1127a94794d5SNaohiro Aota 		 * locks to avoid deadlock.
112808e11a3dSNaohiro Aota 		 */
1129a94794d5SNaohiro Aota 		if (new) {
1130a94794d5SNaohiro Aota 			cache->alloc_offset = 0;
113108e11a3dSNaohiro Aota 			goto out;
113208e11a3dSNaohiro Aota 		}
1133a94794d5SNaohiro Aota 		ret = calculate_alloc_pointer(cache, &last_alloc);
1134a94794d5SNaohiro Aota 		if (ret || map->num_stripes == num_conventional) {
1135a94794d5SNaohiro Aota 			if (!ret)
1136a94794d5SNaohiro Aota 				cache->alloc_offset = last_alloc;
1137a94794d5SNaohiro Aota 			else
1138a94794d5SNaohiro Aota 				btrfs_err(fs_info,
1139a94794d5SNaohiro Aota 			"zoned: failed to determine allocation offset of bg %llu",
1140a94794d5SNaohiro Aota 					  cache->start);
1141a94794d5SNaohiro Aota 			goto out;
1142a94794d5SNaohiro Aota 		}
1143a94794d5SNaohiro Aota 	}
114408e11a3dSNaohiro Aota 
114508e11a3dSNaohiro Aota 	switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) {
114608e11a3dSNaohiro Aota 	case 0: /* single */
114708e11a3dSNaohiro Aota 		cache->alloc_offset = alloc_offsets[0];
114808e11a3dSNaohiro Aota 		break;
114908e11a3dSNaohiro Aota 	case BTRFS_BLOCK_GROUP_DUP:
115008e11a3dSNaohiro Aota 	case BTRFS_BLOCK_GROUP_RAID1:
115108e11a3dSNaohiro Aota 	case BTRFS_BLOCK_GROUP_RAID0:
115208e11a3dSNaohiro Aota 	case BTRFS_BLOCK_GROUP_RAID10:
115308e11a3dSNaohiro Aota 	case BTRFS_BLOCK_GROUP_RAID5:
115408e11a3dSNaohiro Aota 	case BTRFS_BLOCK_GROUP_RAID6:
115508e11a3dSNaohiro Aota 		/* non-single profiles are not supported yet */
115608e11a3dSNaohiro Aota 	default:
115708e11a3dSNaohiro Aota 		btrfs_err(fs_info, "zoned: profile %s not yet supported",
115808e11a3dSNaohiro Aota 			  btrfs_bg_type_to_raid_name(map->type));
115908e11a3dSNaohiro Aota 		ret = -EINVAL;
116008e11a3dSNaohiro Aota 		goto out;
116108e11a3dSNaohiro Aota 	}
116208e11a3dSNaohiro Aota 
116308e11a3dSNaohiro Aota out:
1164a94794d5SNaohiro Aota 	/* An extent is allocated after the write pointer */
1165a94794d5SNaohiro Aota 	if (!ret && num_conventional && last_alloc > cache->alloc_offset) {
1166a94794d5SNaohiro Aota 		btrfs_err(fs_info,
1167a94794d5SNaohiro Aota 			  "zoned: got wrong write pointer in BG %llu: %llu > %llu",
1168a94794d5SNaohiro Aota 			  logical, last_alloc, cache->alloc_offset);
1169a94794d5SNaohiro Aota 		ret = -EIO;
1170a94794d5SNaohiro Aota 	}
1171a94794d5SNaohiro Aota 
11720bc09ca1SNaohiro Aota 	if (!ret)
11730bc09ca1SNaohiro Aota 		cache->meta_write_pointer = cache->alloc_offset + cache->start;
11740bc09ca1SNaohiro Aota 
117508e11a3dSNaohiro Aota 	kfree(alloc_offsets);
117608e11a3dSNaohiro Aota 	free_extent_map(em);
117708e11a3dSNaohiro Aota 
117808e11a3dSNaohiro Aota 	return ret;
117908e11a3dSNaohiro Aota }
1180169e0da9SNaohiro Aota 
1181169e0da9SNaohiro Aota void btrfs_calc_zone_unusable(struct btrfs_block_group *cache)
1182169e0da9SNaohiro Aota {
1183169e0da9SNaohiro Aota 	u64 unusable, free;
1184169e0da9SNaohiro Aota 
1185169e0da9SNaohiro Aota 	if (!btrfs_is_zoned(cache->fs_info))
1186169e0da9SNaohiro Aota 		return;
1187169e0da9SNaohiro Aota 
1188169e0da9SNaohiro Aota 	WARN_ON(cache->bytes_super != 0);
1189169e0da9SNaohiro Aota 	unusable = cache->alloc_offset - cache->used;
1190169e0da9SNaohiro Aota 	free = cache->length - cache->alloc_offset;
1191169e0da9SNaohiro Aota 
1192169e0da9SNaohiro Aota 	/* We only need ->free_space in ALLOC_SEQ block groups */
1193169e0da9SNaohiro Aota 	cache->last_byte_to_unpin = (u64)-1;
1194169e0da9SNaohiro Aota 	cache->cached = BTRFS_CACHE_FINISHED;
1195169e0da9SNaohiro Aota 	cache->free_space_ctl->free_space = free;
1196169e0da9SNaohiro Aota 	cache->zone_unusable = unusable;
1197169e0da9SNaohiro Aota 
1198169e0da9SNaohiro Aota 	/* Should not have any excluded extents. Just in case, though */
1199169e0da9SNaohiro Aota 	btrfs_free_excluded_extents(cache);
1200169e0da9SNaohiro Aota }
1201d3575156SNaohiro Aota 
1202d3575156SNaohiro Aota void btrfs_redirty_list_add(struct btrfs_transaction *trans,
1203d3575156SNaohiro Aota 			    struct extent_buffer *eb)
1204d3575156SNaohiro Aota {
1205d3575156SNaohiro Aota 	struct btrfs_fs_info *fs_info = eb->fs_info;
1206d3575156SNaohiro Aota 
1207d3575156SNaohiro Aota 	if (!btrfs_is_zoned(fs_info) ||
1208d3575156SNaohiro Aota 	    btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) ||
1209d3575156SNaohiro Aota 	    !list_empty(&eb->release_list))
1210d3575156SNaohiro Aota 		return;
1211d3575156SNaohiro Aota 
1212d3575156SNaohiro Aota 	set_extent_buffer_dirty(eb);
1213d3575156SNaohiro Aota 	set_extent_bits_nowait(&trans->dirty_pages, eb->start,
1214d3575156SNaohiro Aota 			       eb->start + eb->len - 1, EXTENT_DIRTY);
1215d3575156SNaohiro Aota 	memzero_extent_buffer(eb, 0, eb->len);
1216d3575156SNaohiro Aota 	set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags);
1217d3575156SNaohiro Aota 
1218d3575156SNaohiro Aota 	spin_lock(&trans->releasing_ebs_lock);
1219d3575156SNaohiro Aota 	list_add_tail(&eb->release_list, &trans->releasing_ebs);
1220d3575156SNaohiro Aota 	spin_unlock(&trans->releasing_ebs_lock);
1221d3575156SNaohiro Aota 	atomic_inc(&eb->refs);
1222d3575156SNaohiro Aota }
1223d3575156SNaohiro Aota 
1224d3575156SNaohiro Aota void btrfs_free_redirty_list(struct btrfs_transaction *trans)
1225d3575156SNaohiro Aota {
1226d3575156SNaohiro Aota 	spin_lock(&trans->releasing_ebs_lock);
1227d3575156SNaohiro Aota 	while (!list_empty(&trans->releasing_ebs)) {
1228d3575156SNaohiro Aota 		struct extent_buffer *eb;
1229d3575156SNaohiro Aota 
1230d3575156SNaohiro Aota 		eb = list_first_entry(&trans->releasing_ebs,
1231d3575156SNaohiro Aota 				      struct extent_buffer, release_list);
1232d3575156SNaohiro Aota 		list_del_init(&eb->release_list);
1233d3575156SNaohiro Aota 		free_extent_buffer(eb);
1234d3575156SNaohiro Aota 	}
1235d3575156SNaohiro Aota 	spin_unlock(&trans->releasing_ebs_lock);
1236d3575156SNaohiro Aota }
123708f45559SJohannes Thumshirn 
123808f45559SJohannes Thumshirn bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em)
123908f45559SJohannes Thumshirn {
124008f45559SJohannes Thumshirn 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
124108f45559SJohannes Thumshirn 	struct btrfs_block_group *cache;
124208f45559SJohannes Thumshirn 	bool ret = false;
124308f45559SJohannes Thumshirn 
124408f45559SJohannes Thumshirn 	if (!btrfs_is_zoned(fs_info))
124508f45559SJohannes Thumshirn 		return false;
124608f45559SJohannes Thumshirn 
124708f45559SJohannes Thumshirn 	if (!fs_info->max_zone_append_size)
124808f45559SJohannes Thumshirn 		return false;
124908f45559SJohannes Thumshirn 
125008f45559SJohannes Thumshirn 	if (!is_data_inode(&inode->vfs_inode))
125108f45559SJohannes Thumshirn 		return false;
125208f45559SJohannes Thumshirn 
125308f45559SJohannes Thumshirn 	cache = btrfs_lookup_block_group(fs_info, em->block_start);
125408f45559SJohannes Thumshirn 	ASSERT(cache);
125508f45559SJohannes Thumshirn 	if (!cache)
125608f45559SJohannes Thumshirn 		return false;
125708f45559SJohannes Thumshirn 
125808f45559SJohannes Thumshirn 	ret = cache->seq_zone;
125908f45559SJohannes Thumshirn 	btrfs_put_block_group(cache);
126008f45559SJohannes Thumshirn 
126108f45559SJohannes Thumshirn 	return ret;
126208f45559SJohannes Thumshirn }
1263d8e3fb10SNaohiro Aota 
1264d8e3fb10SNaohiro Aota void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset,
1265d8e3fb10SNaohiro Aota 				 struct bio *bio)
1266d8e3fb10SNaohiro Aota {
1267d8e3fb10SNaohiro Aota 	struct btrfs_ordered_extent *ordered;
1268d8e3fb10SNaohiro Aota 	const u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
1269d8e3fb10SNaohiro Aota 
1270d8e3fb10SNaohiro Aota 	if (bio_op(bio) != REQ_OP_ZONE_APPEND)
1271d8e3fb10SNaohiro Aota 		return;
1272d8e3fb10SNaohiro Aota 
1273d8e3fb10SNaohiro Aota 	ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), file_offset);
1274d8e3fb10SNaohiro Aota 	if (WARN_ON(!ordered))
1275d8e3fb10SNaohiro Aota 		return;
1276d8e3fb10SNaohiro Aota 
1277d8e3fb10SNaohiro Aota 	ordered->physical = physical;
1278d8e3fb10SNaohiro Aota 	ordered->disk = bio->bi_disk;
1279d8e3fb10SNaohiro Aota 	ordered->partno = bio->bi_partno;
1280d8e3fb10SNaohiro Aota 
1281d8e3fb10SNaohiro Aota 	btrfs_put_ordered_extent(ordered);
1282d8e3fb10SNaohiro Aota }
1283d8e3fb10SNaohiro Aota 
1284d8e3fb10SNaohiro Aota void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered)
1285d8e3fb10SNaohiro Aota {
1286d8e3fb10SNaohiro Aota 	struct btrfs_inode *inode = BTRFS_I(ordered->inode);
1287d8e3fb10SNaohiro Aota 	struct btrfs_fs_info *fs_info = inode->root->fs_info;
1288d8e3fb10SNaohiro Aota 	struct extent_map_tree *em_tree;
1289d8e3fb10SNaohiro Aota 	struct extent_map *em;
1290d8e3fb10SNaohiro Aota 	struct btrfs_ordered_sum *sum;
1291d8e3fb10SNaohiro Aota 	struct block_device *bdev;
1292d8e3fb10SNaohiro Aota 	u64 orig_logical = ordered->disk_bytenr;
1293d8e3fb10SNaohiro Aota 	u64 *logical = NULL;
1294d8e3fb10SNaohiro Aota 	int nr, stripe_len;
1295d8e3fb10SNaohiro Aota 
1296d8e3fb10SNaohiro Aota 	/* Zoned devices should not have partitions. So, we can assume it is 0 */
1297d8e3fb10SNaohiro Aota 	ASSERT(ordered->partno == 0);
1298d8e3fb10SNaohiro Aota 	bdev = bdgrab(ordered->disk->part0);
1299d8e3fb10SNaohiro Aota 	if (WARN_ON(!bdev))
1300d8e3fb10SNaohiro Aota 		return;
1301d8e3fb10SNaohiro Aota 
1302d8e3fb10SNaohiro Aota 	if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev,
1303d8e3fb10SNaohiro Aota 				     ordered->physical, &logical, &nr,
1304d8e3fb10SNaohiro Aota 				     &stripe_len)))
1305d8e3fb10SNaohiro Aota 		goto out;
1306d8e3fb10SNaohiro Aota 
1307d8e3fb10SNaohiro Aota 	WARN_ON(nr != 1);
1308d8e3fb10SNaohiro Aota 
1309d8e3fb10SNaohiro Aota 	if (orig_logical == *logical)
1310d8e3fb10SNaohiro Aota 		goto out;
1311d8e3fb10SNaohiro Aota 
1312d8e3fb10SNaohiro Aota 	ordered->disk_bytenr = *logical;
1313d8e3fb10SNaohiro Aota 
1314d8e3fb10SNaohiro Aota 	em_tree = &inode->extent_tree;
1315d8e3fb10SNaohiro Aota 	write_lock(&em_tree->lock);
1316d8e3fb10SNaohiro Aota 	em = search_extent_mapping(em_tree, ordered->file_offset,
1317d8e3fb10SNaohiro Aota 				   ordered->num_bytes);
1318d8e3fb10SNaohiro Aota 	em->block_start = *logical;
1319d8e3fb10SNaohiro Aota 	free_extent_map(em);
1320d8e3fb10SNaohiro Aota 	write_unlock(&em_tree->lock);
1321d8e3fb10SNaohiro Aota 
1322d8e3fb10SNaohiro Aota 	list_for_each_entry(sum, &ordered->list, list) {
1323d8e3fb10SNaohiro Aota 		if (*logical < orig_logical)
1324d8e3fb10SNaohiro Aota 			sum->bytenr -= orig_logical - *logical;
1325d8e3fb10SNaohiro Aota 		else
1326d8e3fb10SNaohiro Aota 			sum->bytenr += *logical - orig_logical;
1327d8e3fb10SNaohiro Aota 	}
1328d8e3fb10SNaohiro Aota 
1329d8e3fb10SNaohiro Aota out:
1330d8e3fb10SNaohiro Aota 	kfree(logical);
1331d8e3fb10SNaohiro Aota 	bdput(bdev);
1332d8e3fb10SNaohiro Aota }
13330bc09ca1SNaohiro Aota 
13340bc09ca1SNaohiro Aota bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info,
13350bc09ca1SNaohiro Aota 				    struct extent_buffer *eb,
13360bc09ca1SNaohiro Aota 				    struct btrfs_block_group **cache_ret)
13370bc09ca1SNaohiro Aota {
13380bc09ca1SNaohiro Aota 	struct btrfs_block_group *cache;
13390bc09ca1SNaohiro Aota 	bool ret = true;
13400bc09ca1SNaohiro Aota 
13410bc09ca1SNaohiro Aota 	if (!btrfs_is_zoned(fs_info))
13420bc09ca1SNaohiro Aota 		return true;
13430bc09ca1SNaohiro Aota 
13440bc09ca1SNaohiro Aota 	cache = *cache_ret;
13450bc09ca1SNaohiro Aota 
13460bc09ca1SNaohiro Aota 	if (cache && (eb->start < cache->start ||
13470bc09ca1SNaohiro Aota 		      cache->start + cache->length <= eb->start)) {
13480bc09ca1SNaohiro Aota 		btrfs_put_block_group(cache);
13490bc09ca1SNaohiro Aota 		cache = NULL;
13500bc09ca1SNaohiro Aota 		*cache_ret = NULL;
13510bc09ca1SNaohiro Aota 	}
13520bc09ca1SNaohiro Aota 
13530bc09ca1SNaohiro Aota 	if (!cache)
13540bc09ca1SNaohiro Aota 		cache = btrfs_lookup_block_group(fs_info, eb->start);
13550bc09ca1SNaohiro Aota 
13560bc09ca1SNaohiro Aota 	if (cache) {
13570bc09ca1SNaohiro Aota 		if (cache->meta_write_pointer != eb->start) {
13580bc09ca1SNaohiro Aota 			btrfs_put_block_group(cache);
13590bc09ca1SNaohiro Aota 			cache = NULL;
13600bc09ca1SNaohiro Aota 			ret = false;
13610bc09ca1SNaohiro Aota 		} else {
13620bc09ca1SNaohiro Aota 			cache->meta_write_pointer = eb->start + eb->len;
13630bc09ca1SNaohiro Aota 		}
13640bc09ca1SNaohiro Aota 
13650bc09ca1SNaohiro Aota 		*cache_ret = cache;
13660bc09ca1SNaohiro Aota 	}
13670bc09ca1SNaohiro Aota 
13680bc09ca1SNaohiro Aota 	return ret;
13690bc09ca1SNaohiro Aota }
13700bc09ca1SNaohiro Aota 
13710bc09ca1SNaohiro Aota void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache,
13720bc09ca1SNaohiro Aota 				     struct extent_buffer *eb)
13730bc09ca1SNaohiro Aota {
13740bc09ca1SNaohiro Aota 	if (!btrfs_is_zoned(eb->fs_info) || !cache)
13750bc09ca1SNaohiro Aota 		return;
13760bc09ca1SNaohiro Aota 
13770bc09ca1SNaohiro Aota 	ASSERT(cache->meta_write_pointer == eb->start + eb->len);
13780bc09ca1SNaohiro Aota 	cache->meta_write_pointer = eb->start;
13790bc09ca1SNaohiro Aota }
1380de17addcSNaohiro Aota 
1381de17addcSNaohiro Aota int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length)
1382de17addcSNaohiro Aota {
1383de17addcSNaohiro Aota 	if (!btrfs_dev_is_sequential(device, physical))
1384de17addcSNaohiro Aota 		return -EOPNOTSUPP;
1385de17addcSNaohiro Aota 
1386de17addcSNaohiro Aota 	return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT,
1387de17addcSNaohiro Aota 				    length >> SECTOR_SHIFT, GFP_NOFS, 0);
1388de17addcSNaohiro Aota }
13897db1c5d1SNaohiro Aota 
13907db1c5d1SNaohiro Aota static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical,
13917db1c5d1SNaohiro Aota 			  struct blk_zone *zone)
13927db1c5d1SNaohiro Aota {
13937db1c5d1SNaohiro Aota 	struct btrfs_bio *bbio = NULL;
13947db1c5d1SNaohiro Aota 	u64 mapped_length = PAGE_SIZE;
13957db1c5d1SNaohiro Aota 	unsigned int nofs_flag;
13967db1c5d1SNaohiro Aota 	int nmirrors;
13977db1c5d1SNaohiro Aota 	int i, ret;
13987db1c5d1SNaohiro Aota 
13997db1c5d1SNaohiro Aota 	ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
14007db1c5d1SNaohiro Aota 			       &mapped_length, &bbio);
14017db1c5d1SNaohiro Aota 	if (ret || !bbio || mapped_length < PAGE_SIZE) {
14027db1c5d1SNaohiro Aota 		btrfs_put_bbio(bbio);
14037db1c5d1SNaohiro Aota 		return -EIO;
14047db1c5d1SNaohiro Aota 	}
14057db1c5d1SNaohiro Aota 
14067db1c5d1SNaohiro Aota 	if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
14077db1c5d1SNaohiro Aota 		return -EINVAL;
14087db1c5d1SNaohiro Aota 
14097db1c5d1SNaohiro Aota 	nofs_flag = memalloc_nofs_save();
14107db1c5d1SNaohiro Aota 	nmirrors = (int)bbio->num_stripes;
14117db1c5d1SNaohiro Aota 	for (i = 0; i < nmirrors; i++) {
14127db1c5d1SNaohiro Aota 		u64 physical = bbio->stripes[i].physical;
14137db1c5d1SNaohiro Aota 		struct btrfs_device *dev = bbio->stripes[i].dev;
14147db1c5d1SNaohiro Aota 
14157db1c5d1SNaohiro Aota 		/* Missing device */
14167db1c5d1SNaohiro Aota 		if (!dev->bdev)
14177db1c5d1SNaohiro Aota 			continue;
14187db1c5d1SNaohiro Aota 
14197db1c5d1SNaohiro Aota 		ret = btrfs_get_dev_zone(dev, physical, zone);
14207db1c5d1SNaohiro Aota 		/* Failing device */
14217db1c5d1SNaohiro Aota 		if (ret == -EIO || ret == -EOPNOTSUPP)
14227db1c5d1SNaohiro Aota 			continue;
14237db1c5d1SNaohiro Aota 		break;
14247db1c5d1SNaohiro Aota 	}
14257db1c5d1SNaohiro Aota 	memalloc_nofs_restore(nofs_flag);
14267db1c5d1SNaohiro Aota 
14277db1c5d1SNaohiro Aota 	return ret;
14287db1c5d1SNaohiro Aota }
14297db1c5d1SNaohiro Aota 
14307db1c5d1SNaohiro Aota /*
14317db1c5d1SNaohiro Aota  * Synchronize write pointer in a zone at @physical_start on @tgt_dev, by
14327db1c5d1SNaohiro Aota  * filling zeros between @physical_pos to a write pointer of dev-replace
14337db1c5d1SNaohiro Aota  * source device.
14347db1c5d1SNaohiro Aota  */
14357db1c5d1SNaohiro Aota int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical,
14367db1c5d1SNaohiro Aota 				    u64 physical_start, u64 physical_pos)
14377db1c5d1SNaohiro Aota {
14387db1c5d1SNaohiro Aota 	struct btrfs_fs_info *fs_info = tgt_dev->fs_info;
14397db1c5d1SNaohiro Aota 	struct blk_zone zone;
14407db1c5d1SNaohiro Aota 	u64 length;
14417db1c5d1SNaohiro Aota 	u64 wp;
14427db1c5d1SNaohiro Aota 	int ret;
14437db1c5d1SNaohiro Aota 
14447db1c5d1SNaohiro Aota 	if (!btrfs_dev_is_sequential(tgt_dev, physical_pos))
14457db1c5d1SNaohiro Aota 		return 0;
14467db1c5d1SNaohiro Aota 
14477db1c5d1SNaohiro Aota 	ret = read_zone_info(fs_info, logical, &zone);
14487db1c5d1SNaohiro Aota 	if (ret)
14497db1c5d1SNaohiro Aota 		return ret;
14507db1c5d1SNaohiro Aota 
14517db1c5d1SNaohiro Aota 	wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT);
14527db1c5d1SNaohiro Aota 
14537db1c5d1SNaohiro Aota 	if (physical_pos == wp)
14547db1c5d1SNaohiro Aota 		return 0;
14557db1c5d1SNaohiro Aota 
14567db1c5d1SNaohiro Aota 	if (physical_pos > wp)
14577db1c5d1SNaohiro Aota 		return -EUCLEAN;
14587db1c5d1SNaohiro Aota 
14597db1c5d1SNaohiro Aota 	length = wp - physical_pos;
14607db1c5d1SNaohiro Aota 	return btrfs_zoned_issue_zeroout(tgt_dev, physical_pos, length);
14617db1c5d1SNaohiro Aota }
1462