15b316468SNaohiro Aota // SPDX-License-Identifier: GPL-2.0 25b316468SNaohiro Aota 31cd6121fSNaohiro Aota #include <linux/bitops.h> 45b316468SNaohiro Aota #include <linux/slab.h> 55b316468SNaohiro Aota #include <linux/blkdev.h> 608e11a3dSNaohiro Aota #include <linux/sched/mm.h> 75b316468SNaohiro Aota #include "ctree.h" 85b316468SNaohiro Aota #include "volumes.h" 95b316468SNaohiro Aota #include "zoned.h" 105b316468SNaohiro Aota #include "rcu-string.h" 111cd6121fSNaohiro Aota #include "disk-io.h" 1208e11a3dSNaohiro Aota #include "block-group.h" 13d3575156SNaohiro Aota #include "transaction.h" 146143c23cSNaohiro Aota #include "dev-replace.h" 157db1c5d1SNaohiro Aota #include "space-info.h" 165b316468SNaohiro Aota 175b316468SNaohiro Aota /* Maximum number of zones to report per blkdev_report_zones() call */ 185b316468SNaohiro Aota #define BTRFS_REPORT_NR_ZONES 4096 1908e11a3dSNaohiro Aota /* Invalid allocation pointer value for missing devices */ 2008e11a3dSNaohiro Aota #define WP_MISSING_DEV ((u64)-1) 2108e11a3dSNaohiro Aota /* Pseudo write pointer value for conventional zone */ 2208e11a3dSNaohiro Aota #define WP_CONVENTIONAL ((u64)-2) 235b316468SNaohiro Aota 2412659251SNaohiro Aota /* Number of superblock log zones */ 2512659251SNaohiro Aota #define BTRFS_NR_SB_LOG_ZONES 2 2612659251SNaohiro Aota 275b316468SNaohiro Aota static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) 285b316468SNaohiro Aota { 295b316468SNaohiro Aota struct blk_zone *zones = data; 305b316468SNaohiro Aota 315b316468SNaohiro Aota memcpy(&zones[idx], zone, sizeof(*zone)); 325b316468SNaohiro Aota 335b316468SNaohiro Aota return 0; 345b316468SNaohiro Aota } 355b316468SNaohiro Aota 3612659251SNaohiro Aota static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, 3712659251SNaohiro Aota u64 *wp_ret) 3812659251SNaohiro Aota { 3912659251SNaohiro Aota bool empty[BTRFS_NR_SB_LOG_ZONES]; 4012659251SNaohiro Aota bool full[BTRFS_NR_SB_LOG_ZONES]; 4112659251SNaohiro Aota sector_t sector; 4212659251SNaohiro Aota 4312659251SNaohiro Aota ASSERT(zones[0].type != BLK_ZONE_TYPE_CONVENTIONAL && 4412659251SNaohiro Aota zones[1].type != BLK_ZONE_TYPE_CONVENTIONAL); 4512659251SNaohiro Aota 4612659251SNaohiro Aota empty[0] = (zones[0].cond == BLK_ZONE_COND_EMPTY); 4712659251SNaohiro Aota empty[1] = (zones[1].cond == BLK_ZONE_COND_EMPTY); 4812659251SNaohiro Aota full[0] = (zones[0].cond == BLK_ZONE_COND_FULL); 4912659251SNaohiro Aota full[1] = (zones[1].cond == BLK_ZONE_COND_FULL); 5012659251SNaohiro Aota 5112659251SNaohiro Aota /* 5212659251SNaohiro Aota * Possible states of log buffer zones 5312659251SNaohiro Aota * 5412659251SNaohiro Aota * Empty[0] In use[0] Full[0] 5512659251SNaohiro Aota * Empty[1] * x 0 5612659251SNaohiro Aota * In use[1] 0 x 0 5712659251SNaohiro Aota * Full[1] 1 1 C 5812659251SNaohiro Aota * 5912659251SNaohiro Aota * Log position: 6012659251SNaohiro Aota * *: Special case, no superblock is written 6112659251SNaohiro Aota * 0: Use write pointer of zones[0] 6212659251SNaohiro Aota * 1: Use write pointer of zones[1] 6312659251SNaohiro Aota * C: Compare super blcoks from zones[0] and zones[1], use the latest 6412659251SNaohiro Aota * one determined by generation 6512659251SNaohiro Aota * x: Invalid state 6612659251SNaohiro Aota */ 6712659251SNaohiro Aota 6812659251SNaohiro Aota if (empty[0] && empty[1]) { 6912659251SNaohiro Aota /* Special case to distinguish no superblock to read */ 7012659251SNaohiro Aota *wp_ret = zones[0].start << SECTOR_SHIFT; 7112659251SNaohiro Aota return -ENOENT; 7212659251SNaohiro Aota } else if (full[0] && full[1]) { 7312659251SNaohiro Aota /* Compare two super blocks */ 7412659251SNaohiro Aota struct address_space *mapping = bdev->bd_inode->i_mapping; 7512659251SNaohiro Aota struct page *page[BTRFS_NR_SB_LOG_ZONES]; 7612659251SNaohiro Aota struct btrfs_super_block *super[BTRFS_NR_SB_LOG_ZONES]; 7712659251SNaohiro Aota int i; 7812659251SNaohiro Aota 7912659251SNaohiro Aota for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) { 8012659251SNaohiro Aota u64 bytenr; 8112659251SNaohiro Aota 8212659251SNaohiro Aota bytenr = ((zones[i].start + zones[i].len) 8312659251SNaohiro Aota << SECTOR_SHIFT) - BTRFS_SUPER_INFO_SIZE; 8412659251SNaohiro Aota 8512659251SNaohiro Aota page[i] = read_cache_page_gfp(mapping, 8612659251SNaohiro Aota bytenr >> PAGE_SHIFT, GFP_NOFS); 8712659251SNaohiro Aota if (IS_ERR(page[i])) { 8812659251SNaohiro Aota if (i == 1) 8912659251SNaohiro Aota btrfs_release_disk_super(super[0]); 9012659251SNaohiro Aota return PTR_ERR(page[i]); 9112659251SNaohiro Aota } 9212659251SNaohiro Aota super[i] = page_address(page[i]); 9312659251SNaohiro Aota } 9412659251SNaohiro Aota 9512659251SNaohiro Aota if (super[0]->generation > super[1]->generation) 9612659251SNaohiro Aota sector = zones[1].start; 9712659251SNaohiro Aota else 9812659251SNaohiro Aota sector = zones[0].start; 9912659251SNaohiro Aota 10012659251SNaohiro Aota for (i = 0; i < BTRFS_NR_SB_LOG_ZONES; i++) 10112659251SNaohiro Aota btrfs_release_disk_super(super[i]); 10212659251SNaohiro Aota } else if (!full[0] && (empty[1] || full[1])) { 10312659251SNaohiro Aota sector = zones[0].wp; 10412659251SNaohiro Aota } else if (full[0]) { 10512659251SNaohiro Aota sector = zones[1].wp; 10612659251SNaohiro Aota } else { 10712659251SNaohiro Aota return -EUCLEAN; 10812659251SNaohiro Aota } 10912659251SNaohiro Aota *wp_ret = sector << SECTOR_SHIFT; 11012659251SNaohiro Aota return 0; 11112659251SNaohiro Aota } 11212659251SNaohiro Aota 11312659251SNaohiro Aota /* 11412659251SNaohiro Aota * The following zones are reserved as the circular buffer on ZONED btrfs. 11512659251SNaohiro Aota * - The primary superblock: zones 0 and 1 11612659251SNaohiro Aota * - The first copy: zones 16 and 17 11712659251SNaohiro Aota * - The second copy: zones 1024 or zone at 256GB which is minimum, and 11812659251SNaohiro Aota * the following one 11912659251SNaohiro Aota */ 12012659251SNaohiro Aota static inline u32 sb_zone_number(int shift, int mirror) 12112659251SNaohiro Aota { 12212659251SNaohiro Aota ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); 12312659251SNaohiro Aota 12412659251SNaohiro Aota switch (mirror) { 12512659251SNaohiro Aota case 0: return 0; 12612659251SNaohiro Aota case 1: return 16; 12712659251SNaohiro Aota case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024); 12812659251SNaohiro Aota } 12912659251SNaohiro Aota 13012659251SNaohiro Aota return 0; 13112659251SNaohiro Aota } 13212659251SNaohiro Aota 1333c9daa09SJohannes Thumshirn /* 1343c9daa09SJohannes Thumshirn * Emulate blkdev_report_zones() for a non-zoned device. It slices up the block 1353c9daa09SJohannes Thumshirn * device into static sized chunks and fake a conventional zone on each of 1363c9daa09SJohannes Thumshirn * them. 1373c9daa09SJohannes Thumshirn */ 1383c9daa09SJohannes Thumshirn static int emulate_report_zones(struct btrfs_device *device, u64 pos, 1393c9daa09SJohannes Thumshirn struct blk_zone *zones, unsigned int nr_zones) 1403c9daa09SJohannes Thumshirn { 1413c9daa09SJohannes Thumshirn const sector_t zone_sectors = device->fs_info->zone_size >> SECTOR_SHIFT; 1423c9daa09SJohannes Thumshirn sector_t bdev_size = bdev_nr_sectors(device->bdev); 1433c9daa09SJohannes Thumshirn unsigned int i; 1443c9daa09SJohannes Thumshirn 1453c9daa09SJohannes Thumshirn pos >>= SECTOR_SHIFT; 1463c9daa09SJohannes Thumshirn for (i = 0; i < nr_zones; i++) { 1473c9daa09SJohannes Thumshirn zones[i].start = i * zone_sectors + pos; 1483c9daa09SJohannes Thumshirn zones[i].len = zone_sectors; 1493c9daa09SJohannes Thumshirn zones[i].capacity = zone_sectors; 1503c9daa09SJohannes Thumshirn zones[i].wp = zones[i].start + zone_sectors; 1513c9daa09SJohannes Thumshirn zones[i].type = BLK_ZONE_TYPE_CONVENTIONAL; 1523c9daa09SJohannes Thumshirn zones[i].cond = BLK_ZONE_COND_NOT_WP; 1533c9daa09SJohannes Thumshirn 1543c9daa09SJohannes Thumshirn if (zones[i].wp >= bdev_size) { 1553c9daa09SJohannes Thumshirn i++; 1563c9daa09SJohannes Thumshirn break; 1573c9daa09SJohannes Thumshirn } 1583c9daa09SJohannes Thumshirn } 1593c9daa09SJohannes Thumshirn 1603c9daa09SJohannes Thumshirn return i; 1613c9daa09SJohannes Thumshirn } 1623c9daa09SJohannes Thumshirn 1635b316468SNaohiro Aota static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, 1645b316468SNaohiro Aota struct blk_zone *zones, unsigned int *nr_zones) 1655b316468SNaohiro Aota { 1665b316468SNaohiro Aota int ret; 1675b316468SNaohiro Aota 1685b316468SNaohiro Aota if (!*nr_zones) 1695b316468SNaohiro Aota return 0; 1705b316468SNaohiro Aota 1713c9daa09SJohannes Thumshirn if (!bdev_is_zoned(device->bdev)) { 1723c9daa09SJohannes Thumshirn ret = emulate_report_zones(device, pos, zones, *nr_zones); 1733c9daa09SJohannes Thumshirn *nr_zones = ret; 1743c9daa09SJohannes Thumshirn return 0; 1753c9daa09SJohannes Thumshirn } 1763c9daa09SJohannes Thumshirn 1775b316468SNaohiro Aota ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, 1785b316468SNaohiro Aota copy_zone_info_cb, zones); 1795b316468SNaohiro Aota if (ret < 0) { 1805b316468SNaohiro Aota btrfs_err_in_rcu(device->fs_info, 1815b316468SNaohiro Aota "zoned: failed to read zone %llu on %s (devid %llu)", 1825b316468SNaohiro Aota pos, rcu_str_deref(device->name), 1835b316468SNaohiro Aota device->devid); 1845b316468SNaohiro Aota return ret; 1855b316468SNaohiro Aota } 1865b316468SNaohiro Aota *nr_zones = ret; 1875b316468SNaohiro Aota if (!ret) 1885b316468SNaohiro Aota return -EIO; 1895b316468SNaohiro Aota 1905b316468SNaohiro Aota return 0; 1915b316468SNaohiro Aota } 1925b316468SNaohiro Aota 1933c9daa09SJohannes Thumshirn /* The emulated zone size is determined from the size of device extent */ 1943c9daa09SJohannes Thumshirn static int calculate_emulated_zone_size(struct btrfs_fs_info *fs_info) 1953c9daa09SJohannes Thumshirn { 1963c9daa09SJohannes Thumshirn struct btrfs_path *path; 1973c9daa09SJohannes Thumshirn struct btrfs_root *root = fs_info->dev_root; 1983c9daa09SJohannes Thumshirn struct btrfs_key key; 1993c9daa09SJohannes Thumshirn struct extent_buffer *leaf; 2003c9daa09SJohannes Thumshirn struct btrfs_dev_extent *dext; 2013c9daa09SJohannes Thumshirn int ret = 0; 2023c9daa09SJohannes Thumshirn 2033c9daa09SJohannes Thumshirn key.objectid = 1; 2043c9daa09SJohannes Thumshirn key.type = BTRFS_DEV_EXTENT_KEY; 2053c9daa09SJohannes Thumshirn key.offset = 0; 2063c9daa09SJohannes Thumshirn 2073c9daa09SJohannes Thumshirn path = btrfs_alloc_path(); 2083c9daa09SJohannes Thumshirn if (!path) 2093c9daa09SJohannes Thumshirn return -ENOMEM; 2103c9daa09SJohannes Thumshirn 2113c9daa09SJohannes Thumshirn ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2123c9daa09SJohannes Thumshirn if (ret < 0) 2133c9daa09SJohannes Thumshirn goto out; 2143c9daa09SJohannes Thumshirn 2153c9daa09SJohannes Thumshirn if (path->slots[0] >= btrfs_header_nritems(path->nodes[0])) { 2163c9daa09SJohannes Thumshirn ret = btrfs_next_item(root, path); 2173c9daa09SJohannes Thumshirn if (ret < 0) 2183c9daa09SJohannes Thumshirn goto out; 2193c9daa09SJohannes Thumshirn /* No dev extents at all? Not good */ 2203c9daa09SJohannes Thumshirn if (ret > 0) { 2213c9daa09SJohannes Thumshirn ret = -EUCLEAN; 2223c9daa09SJohannes Thumshirn goto out; 2233c9daa09SJohannes Thumshirn } 2243c9daa09SJohannes Thumshirn } 2253c9daa09SJohannes Thumshirn 2263c9daa09SJohannes Thumshirn leaf = path->nodes[0]; 2273c9daa09SJohannes Thumshirn dext = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_dev_extent); 2283c9daa09SJohannes Thumshirn fs_info->zone_size = btrfs_dev_extent_length(leaf, dext); 2293c9daa09SJohannes Thumshirn ret = 0; 2303c9daa09SJohannes Thumshirn 2313c9daa09SJohannes Thumshirn out: 2323c9daa09SJohannes Thumshirn btrfs_free_path(path); 2333c9daa09SJohannes Thumshirn 2343c9daa09SJohannes Thumshirn return ret; 2353c9daa09SJohannes Thumshirn } 2363c9daa09SJohannes Thumshirn 23773651042SNaohiro Aota int btrfs_get_dev_zone_info_all_devices(struct btrfs_fs_info *fs_info) 23873651042SNaohiro Aota { 23973651042SNaohiro Aota struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 24073651042SNaohiro Aota struct btrfs_device *device; 24173651042SNaohiro Aota int ret = 0; 24273651042SNaohiro Aota 24373651042SNaohiro Aota /* fs_info->zone_size might not set yet. Use the incomapt flag here. */ 24473651042SNaohiro Aota if (!btrfs_fs_incompat(fs_info, ZONED)) 24573651042SNaohiro Aota return 0; 24673651042SNaohiro Aota 24773651042SNaohiro Aota mutex_lock(&fs_devices->device_list_mutex); 24873651042SNaohiro Aota list_for_each_entry(device, &fs_devices->devices, dev_list) { 24973651042SNaohiro Aota /* We can skip reading of zone info for missing devices */ 25073651042SNaohiro Aota if (!device->bdev) 25173651042SNaohiro Aota continue; 25273651042SNaohiro Aota 25373651042SNaohiro Aota ret = btrfs_get_dev_zone_info(device); 25473651042SNaohiro Aota if (ret) 25573651042SNaohiro Aota break; 25673651042SNaohiro Aota } 25773651042SNaohiro Aota mutex_unlock(&fs_devices->device_list_mutex); 25873651042SNaohiro Aota 25973651042SNaohiro Aota return ret; 26073651042SNaohiro Aota } 26173651042SNaohiro Aota 2625b316468SNaohiro Aota int btrfs_get_dev_zone_info(struct btrfs_device *device) 2635b316468SNaohiro Aota { 2643c9daa09SJohannes Thumshirn struct btrfs_fs_info *fs_info = device->fs_info; 2655b316468SNaohiro Aota struct btrfs_zoned_device_info *zone_info = NULL; 2665b316468SNaohiro Aota struct block_device *bdev = device->bdev; 267862931c7SNaohiro Aota struct request_queue *queue = bdev_get_queue(bdev); 2685b316468SNaohiro Aota sector_t nr_sectors; 2695b316468SNaohiro Aota sector_t sector = 0; 2705b316468SNaohiro Aota struct blk_zone *zones = NULL; 2715b316468SNaohiro Aota unsigned int i, nreported = 0, nr_zones; 272*d734492aSNaohiro Aota sector_t zone_sectors; 2733c9daa09SJohannes Thumshirn char *model, *emulated; 2745b316468SNaohiro Aota int ret; 2755b316468SNaohiro Aota 2763c9daa09SJohannes Thumshirn /* 2773c9daa09SJohannes Thumshirn * Cannot use btrfs_is_zoned here, since fs_info::zone_size might not 2783c9daa09SJohannes Thumshirn * yet be set. 2793c9daa09SJohannes Thumshirn */ 2803c9daa09SJohannes Thumshirn if (!btrfs_fs_incompat(fs_info, ZONED)) 2815b316468SNaohiro Aota return 0; 2825b316468SNaohiro Aota 2835b316468SNaohiro Aota if (device->zone_info) 2845b316468SNaohiro Aota return 0; 2855b316468SNaohiro Aota 2865b316468SNaohiro Aota zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL); 2875b316468SNaohiro Aota if (!zone_info) 2885b316468SNaohiro Aota return -ENOMEM; 2895b316468SNaohiro Aota 2903c9daa09SJohannes Thumshirn if (!bdev_is_zoned(bdev)) { 2913c9daa09SJohannes Thumshirn if (!fs_info->zone_size) { 2923c9daa09SJohannes Thumshirn ret = calculate_emulated_zone_size(fs_info); 2933c9daa09SJohannes Thumshirn if (ret) 2943c9daa09SJohannes Thumshirn goto out; 2953c9daa09SJohannes Thumshirn } 2963c9daa09SJohannes Thumshirn 2973c9daa09SJohannes Thumshirn ASSERT(fs_info->zone_size); 2983c9daa09SJohannes Thumshirn zone_sectors = fs_info->zone_size >> SECTOR_SHIFT; 2993c9daa09SJohannes Thumshirn } else { 3005b316468SNaohiro Aota zone_sectors = bdev_zone_sectors(bdev); 3013c9daa09SJohannes Thumshirn } 3023c9daa09SJohannes Thumshirn 3033c9daa09SJohannes Thumshirn nr_sectors = bdev_nr_sectors(bdev); 3045b316468SNaohiro Aota /* Check if it's power of 2 (see is_power_of_2) */ 3055b316468SNaohiro Aota ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); 3065b316468SNaohiro Aota zone_info->zone_size = zone_sectors << SECTOR_SHIFT; 3075b316468SNaohiro Aota zone_info->zone_size_shift = ilog2(zone_info->zone_size); 308862931c7SNaohiro Aota zone_info->max_zone_append_size = 309862931c7SNaohiro Aota (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT; 3105b316468SNaohiro Aota zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); 3115b316468SNaohiro Aota if (!IS_ALIGNED(nr_sectors, zone_sectors)) 3125b316468SNaohiro Aota zone_info->nr_zones++; 3135b316468SNaohiro Aota 3145b316468SNaohiro Aota zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); 3155b316468SNaohiro Aota if (!zone_info->seq_zones) { 3165b316468SNaohiro Aota ret = -ENOMEM; 3175b316468SNaohiro Aota goto out; 3185b316468SNaohiro Aota } 3195b316468SNaohiro Aota 3205b316468SNaohiro Aota zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); 3215b316468SNaohiro Aota if (!zone_info->empty_zones) { 3225b316468SNaohiro Aota ret = -ENOMEM; 3235b316468SNaohiro Aota goto out; 3245b316468SNaohiro Aota } 3255b316468SNaohiro Aota 3265b316468SNaohiro Aota zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); 3275b316468SNaohiro Aota if (!zones) { 3285b316468SNaohiro Aota ret = -ENOMEM; 3295b316468SNaohiro Aota goto out; 3305b316468SNaohiro Aota } 3315b316468SNaohiro Aota 3325b316468SNaohiro Aota /* Get zones type */ 3335b316468SNaohiro Aota while (sector < nr_sectors) { 3345b316468SNaohiro Aota nr_zones = BTRFS_REPORT_NR_ZONES; 3355b316468SNaohiro Aota ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones, 3365b316468SNaohiro Aota &nr_zones); 3375b316468SNaohiro Aota if (ret) 3385b316468SNaohiro Aota goto out; 3395b316468SNaohiro Aota 3405b316468SNaohiro Aota for (i = 0; i < nr_zones; i++) { 3415b316468SNaohiro Aota if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ) 3425b316468SNaohiro Aota __set_bit(nreported, zone_info->seq_zones); 3435b316468SNaohiro Aota if (zones[i].cond == BLK_ZONE_COND_EMPTY) 3445b316468SNaohiro Aota __set_bit(nreported, zone_info->empty_zones); 3455b316468SNaohiro Aota nreported++; 3465b316468SNaohiro Aota } 3475b316468SNaohiro Aota sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; 3485b316468SNaohiro Aota } 3495b316468SNaohiro Aota 3505b316468SNaohiro Aota if (nreported != zone_info->nr_zones) { 3515b316468SNaohiro Aota btrfs_err_in_rcu(device->fs_info, 3525b316468SNaohiro Aota "inconsistent number of zones on %s (%u/%u)", 3535b316468SNaohiro Aota rcu_str_deref(device->name), nreported, 3545b316468SNaohiro Aota zone_info->nr_zones); 3555b316468SNaohiro Aota ret = -EIO; 3565b316468SNaohiro Aota goto out; 3575b316468SNaohiro Aota } 3585b316468SNaohiro Aota 35912659251SNaohiro Aota /* Validate superblock log */ 36012659251SNaohiro Aota nr_zones = BTRFS_NR_SB_LOG_ZONES; 36112659251SNaohiro Aota for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 36212659251SNaohiro Aota u32 sb_zone; 36312659251SNaohiro Aota u64 sb_wp; 36412659251SNaohiro Aota int sb_pos = BTRFS_NR_SB_LOG_ZONES * i; 36512659251SNaohiro Aota 36612659251SNaohiro Aota sb_zone = sb_zone_number(zone_info->zone_size_shift, i); 36712659251SNaohiro Aota if (sb_zone + 1 >= zone_info->nr_zones) 36812659251SNaohiro Aota continue; 36912659251SNaohiro Aota 37012659251SNaohiro Aota sector = sb_zone << (zone_info->zone_size_shift - SECTOR_SHIFT); 37112659251SNaohiro Aota ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, 37212659251SNaohiro Aota &zone_info->sb_zones[sb_pos], 37312659251SNaohiro Aota &nr_zones); 37412659251SNaohiro Aota if (ret) 37512659251SNaohiro Aota goto out; 37612659251SNaohiro Aota 37712659251SNaohiro Aota if (nr_zones != BTRFS_NR_SB_LOG_ZONES) { 37812659251SNaohiro Aota btrfs_err_in_rcu(device->fs_info, 37912659251SNaohiro Aota "zoned: failed to read super block log zone info at devid %llu zone %u", 38012659251SNaohiro Aota device->devid, sb_zone); 38112659251SNaohiro Aota ret = -EUCLEAN; 38212659251SNaohiro Aota goto out; 38312659251SNaohiro Aota } 38412659251SNaohiro Aota 38512659251SNaohiro Aota /* 38612659251SNaohiro Aota * If zones[0] is conventional, always use the beggining of the 38712659251SNaohiro Aota * zone to record superblock. No need to validate in that case. 38812659251SNaohiro Aota */ 38912659251SNaohiro Aota if (zone_info->sb_zones[BTRFS_NR_SB_LOG_ZONES * i].type == 39012659251SNaohiro Aota BLK_ZONE_TYPE_CONVENTIONAL) 39112659251SNaohiro Aota continue; 39212659251SNaohiro Aota 39312659251SNaohiro Aota ret = sb_write_pointer(device->bdev, 39412659251SNaohiro Aota &zone_info->sb_zones[sb_pos], &sb_wp); 39512659251SNaohiro Aota if (ret != -ENOENT && ret) { 39612659251SNaohiro Aota btrfs_err_in_rcu(device->fs_info, 39712659251SNaohiro Aota "zoned: super block log zone corrupted devid %llu zone %u", 39812659251SNaohiro Aota device->devid, sb_zone); 39912659251SNaohiro Aota ret = -EUCLEAN; 40012659251SNaohiro Aota goto out; 40112659251SNaohiro Aota } 40212659251SNaohiro Aota } 40312659251SNaohiro Aota 40412659251SNaohiro Aota 4055b316468SNaohiro Aota kfree(zones); 4065b316468SNaohiro Aota 4075b316468SNaohiro Aota device->zone_info = zone_info; 4085b316468SNaohiro Aota 4093c9daa09SJohannes Thumshirn switch (bdev_zoned_model(bdev)) { 4103c9daa09SJohannes Thumshirn case BLK_ZONED_HM: 4113c9daa09SJohannes Thumshirn model = "host-managed zoned"; 4123c9daa09SJohannes Thumshirn emulated = ""; 4133c9daa09SJohannes Thumshirn break; 4143c9daa09SJohannes Thumshirn case BLK_ZONED_HA: 4153c9daa09SJohannes Thumshirn model = "host-aware zoned"; 4163c9daa09SJohannes Thumshirn emulated = ""; 4173c9daa09SJohannes Thumshirn break; 4183c9daa09SJohannes Thumshirn case BLK_ZONED_NONE: 4193c9daa09SJohannes Thumshirn model = "regular"; 4203c9daa09SJohannes Thumshirn emulated = "emulated "; 4213c9daa09SJohannes Thumshirn break; 4223c9daa09SJohannes Thumshirn default: 4233c9daa09SJohannes Thumshirn /* Just in case */ 4243c9daa09SJohannes Thumshirn btrfs_err_in_rcu(fs_info, "zoned: unsupported model %d on %s", 4253c9daa09SJohannes Thumshirn bdev_zoned_model(bdev), 4263c9daa09SJohannes Thumshirn rcu_str_deref(device->name)); 4273c9daa09SJohannes Thumshirn ret = -EOPNOTSUPP; 4283c9daa09SJohannes Thumshirn goto out_free_zone_info; 4293c9daa09SJohannes Thumshirn } 4303c9daa09SJohannes Thumshirn 4313c9daa09SJohannes Thumshirn btrfs_info_in_rcu(fs_info, 4323c9daa09SJohannes Thumshirn "%s block device %s, %u %szones of %llu bytes", 4333c9daa09SJohannes Thumshirn model, rcu_str_deref(device->name), zone_info->nr_zones, 4343c9daa09SJohannes Thumshirn emulated, zone_info->zone_size); 4355b316468SNaohiro Aota 4365b316468SNaohiro Aota return 0; 4375b316468SNaohiro Aota 4385b316468SNaohiro Aota out: 4395b316468SNaohiro Aota kfree(zones); 4403c9daa09SJohannes Thumshirn out_free_zone_info: 4415b316468SNaohiro Aota bitmap_free(zone_info->empty_zones); 4425b316468SNaohiro Aota bitmap_free(zone_info->seq_zones); 4435b316468SNaohiro Aota kfree(zone_info); 4443c9daa09SJohannes Thumshirn device->zone_info = NULL; 4455b316468SNaohiro Aota 4465b316468SNaohiro Aota return ret; 4475b316468SNaohiro Aota } 4485b316468SNaohiro Aota 4495b316468SNaohiro Aota void btrfs_destroy_dev_zone_info(struct btrfs_device *device) 4505b316468SNaohiro Aota { 4515b316468SNaohiro Aota struct btrfs_zoned_device_info *zone_info = device->zone_info; 4525b316468SNaohiro Aota 4535b316468SNaohiro Aota if (!zone_info) 4545b316468SNaohiro Aota return; 4555b316468SNaohiro Aota 4565b316468SNaohiro Aota bitmap_free(zone_info->seq_zones); 4575b316468SNaohiro Aota bitmap_free(zone_info->empty_zones); 4585b316468SNaohiro Aota kfree(zone_info); 4595b316468SNaohiro Aota device->zone_info = NULL; 4605b316468SNaohiro Aota } 4615b316468SNaohiro Aota 4625b316468SNaohiro Aota int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, 4635b316468SNaohiro Aota struct blk_zone *zone) 4645b316468SNaohiro Aota { 4655b316468SNaohiro Aota unsigned int nr_zones = 1; 4665b316468SNaohiro Aota int ret; 4675b316468SNaohiro Aota 4685b316468SNaohiro Aota ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones); 4695b316468SNaohiro Aota if (ret != 0 || !nr_zones) 4705b316468SNaohiro Aota return ret ? ret : -EIO; 4715b316468SNaohiro Aota 4725b316468SNaohiro Aota return 0; 4735b316468SNaohiro Aota } 474b70f5097SNaohiro Aota 475b70f5097SNaohiro Aota int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) 476b70f5097SNaohiro Aota { 477b70f5097SNaohiro Aota struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 478b70f5097SNaohiro Aota struct btrfs_device *device; 479b70f5097SNaohiro Aota u64 zoned_devices = 0; 480b70f5097SNaohiro Aota u64 nr_devices = 0; 481b70f5097SNaohiro Aota u64 zone_size = 0; 482862931c7SNaohiro Aota u64 max_zone_append_size = 0; 4833c9daa09SJohannes Thumshirn const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED); 484b70f5097SNaohiro Aota int ret = 0; 485b70f5097SNaohiro Aota 486b70f5097SNaohiro Aota /* Count zoned devices */ 487b70f5097SNaohiro Aota list_for_each_entry(device, &fs_devices->devices, dev_list) { 488b70f5097SNaohiro Aota enum blk_zoned_model model; 489b70f5097SNaohiro Aota 490b70f5097SNaohiro Aota if (!device->bdev) 491b70f5097SNaohiro Aota continue; 492b70f5097SNaohiro Aota 493b70f5097SNaohiro Aota model = bdev_zoned_model(device->bdev); 4943c9daa09SJohannes Thumshirn /* 4953c9daa09SJohannes Thumshirn * A Host-Managed zoned device must be used as a zoned device. 4963c9daa09SJohannes Thumshirn * A Host-Aware zoned device and a non-zoned devices can be 4973c9daa09SJohannes Thumshirn * treated as a zoned device, if ZONED flag is enabled in the 4983c9daa09SJohannes Thumshirn * superblock. 4993c9daa09SJohannes Thumshirn */ 500b70f5097SNaohiro Aota if (model == BLK_ZONED_HM || 5013c9daa09SJohannes Thumshirn (model == BLK_ZONED_HA && incompat_zoned) || 5023c9daa09SJohannes Thumshirn (model == BLK_ZONED_NONE && incompat_zoned)) { 5033c9daa09SJohannes Thumshirn struct btrfs_zoned_device_info *zone_info = 5043c9daa09SJohannes Thumshirn device->zone_info; 505862931c7SNaohiro Aota 506862931c7SNaohiro Aota zone_info = device->zone_info; 507b70f5097SNaohiro Aota zoned_devices++; 508b70f5097SNaohiro Aota if (!zone_size) { 509862931c7SNaohiro Aota zone_size = zone_info->zone_size; 510862931c7SNaohiro Aota } else if (zone_info->zone_size != zone_size) { 511b70f5097SNaohiro Aota btrfs_err(fs_info, 512b70f5097SNaohiro Aota "zoned: unequal block device zone sizes: have %llu found %llu", 513b70f5097SNaohiro Aota device->zone_info->zone_size, 514b70f5097SNaohiro Aota zone_size); 515b70f5097SNaohiro Aota ret = -EINVAL; 516b70f5097SNaohiro Aota goto out; 517b70f5097SNaohiro Aota } 518862931c7SNaohiro Aota if (!max_zone_append_size || 519862931c7SNaohiro Aota (zone_info->max_zone_append_size && 520862931c7SNaohiro Aota zone_info->max_zone_append_size < max_zone_append_size)) 521862931c7SNaohiro Aota max_zone_append_size = 522862931c7SNaohiro Aota zone_info->max_zone_append_size; 523b70f5097SNaohiro Aota } 524b70f5097SNaohiro Aota nr_devices++; 525b70f5097SNaohiro Aota } 526b70f5097SNaohiro Aota 527b70f5097SNaohiro Aota if (!zoned_devices && !incompat_zoned) 528b70f5097SNaohiro Aota goto out; 529b70f5097SNaohiro Aota 530b70f5097SNaohiro Aota if (!zoned_devices && incompat_zoned) { 531b70f5097SNaohiro Aota /* No zoned block device found on ZONED filesystem */ 532b70f5097SNaohiro Aota btrfs_err(fs_info, 533b70f5097SNaohiro Aota "zoned: no zoned devices found on a zoned filesystem"); 534b70f5097SNaohiro Aota ret = -EINVAL; 535b70f5097SNaohiro Aota goto out; 536b70f5097SNaohiro Aota } 537b70f5097SNaohiro Aota 538b70f5097SNaohiro Aota if (zoned_devices && !incompat_zoned) { 539b70f5097SNaohiro Aota btrfs_err(fs_info, 540b70f5097SNaohiro Aota "zoned: mode not enabled but zoned device found"); 541b70f5097SNaohiro Aota ret = -EINVAL; 542b70f5097SNaohiro Aota goto out; 543b70f5097SNaohiro Aota } 544b70f5097SNaohiro Aota 545b70f5097SNaohiro Aota if (zoned_devices != nr_devices) { 546b70f5097SNaohiro Aota btrfs_err(fs_info, 547b70f5097SNaohiro Aota "zoned: cannot mix zoned and regular devices"); 548b70f5097SNaohiro Aota ret = -EINVAL; 549b70f5097SNaohiro Aota goto out; 550b70f5097SNaohiro Aota } 551b70f5097SNaohiro Aota 552b70f5097SNaohiro Aota /* 553b70f5097SNaohiro Aota * stripe_size is always aligned to BTRFS_STRIPE_LEN in 554b70f5097SNaohiro Aota * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size, 555b70f5097SNaohiro Aota * check the alignment here. 556b70f5097SNaohiro Aota */ 557b70f5097SNaohiro Aota if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) { 558b70f5097SNaohiro Aota btrfs_err(fs_info, 559b70f5097SNaohiro Aota "zoned: zone size %llu not aligned to stripe %u", 560b70f5097SNaohiro Aota zone_size, BTRFS_STRIPE_LEN); 561b70f5097SNaohiro Aota ret = -EINVAL; 562b70f5097SNaohiro Aota goto out; 563b70f5097SNaohiro Aota } 564b70f5097SNaohiro Aota 565a589dde0SNaohiro Aota if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { 566a589dde0SNaohiro Aota btrfs_err(fs_info, "zoned: mixed block groups not supported"); 567a589dde0SNaohiro Aota ret = -EINVAL; 568a589dde0SNaohiro Aota goto out; 569a589dde0SNaohiro Aota } 570a589dde0SNaohiro Aota 571b70f5097SNaohiro Aota fs_info->zone_size = zone_size; 572862931c7SNaohiro Aota fs_info->max_zone_append_size = max_zone_append_size; 5731cd6121fSNaohiro Aota fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED; 574b70f5097SNaohiro Aota 575b53429baSJohannes Thumshirn /* 576b53429baSJohannes Thumshirn * Check mount options here, because we might change fs_info->zoned 577b53429baSJohannes Thumshirn * from fs_info->zone_size. 578b53429baSJohannes Thumshirn */ 579b53429baSJohannes Thumshirn ret = btrfs_check_mountopts_zoned(fs_info); 580b53429baSJohannes Thumshirn if (ret) 581b53429baSJohannes Thumshirn goto out; 582b53429baSJohannes Thumshirn 583b70f5097SNaohiro Aota btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); 584b70f5097SNaohiro Aota out: 585b70f5097SNaohiro Aota return ret; 586b70f5097SNaohiro Aota } 5875d1ab66cSNaohiro Aota 5885d1ab66cSNaohiro Aota int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) 5895d1ab66cSNaohiro Aota { 5905d1ab66cSNaohiro Aota if (!btrfs_is_zoned(info)) 5915d1ab66cSNaohiro Aota return 0; 5925d1ab66cSNaohiro Aota 5935d1ab66cSNaohiro Aota /* 5945d1ab66cSNaohiro Aota * Space cache writing is not COWed. Disable that to avoid write errors 5955d1ab66cSNaohiro Aota * in sequential zones. 5965d1ab66cSNaohiro Aota */ 5975d1ab66cSNaohiro Aota if (btrfs_test_opt(info, SPACE_CACHE)) { 5985d1ab66cSNaohiro Aota btrfs_err(info, "zoned: space cache v1 is not supported"); 5995d1ab66cSNaohiro Aota return -EINVAL; 6005d1ab66cSNaohiro Aota } 6015d1ab66cSNaohiro Aota 602d206e9c9SNaohiro Aota if (btrfs_test_opt(info, NODATACOW)) { 603d206e9c9SNaohiro Aota btrfs_err(info, "zoned: NODATACOW not supported"); 604d206e9c9SNaohiro Aota return -EINVAL; 605d206e9c9SNaohiro Aota } 606d206e9c9SNaohiro Aota 6075d1ab66cSNaohiro Aota return 0; 6085d1ab66cSNaohiro Aota } 60912659251SNaohiro Aota 61012659251SNaohiro Aota static int sb_log_location(struct block_device *bdev, struct blk_zone *zones, 61112659251SNaohiro Aota int rw, u64 *bytenr_ret) 61212659251SNaohiro Aota { 61312659251SNaohiro Aota u64 wp; 61412659251SNaohiro Aota int ret; 61512659251SNaohiro Aota 61612659251SNaohiro Aota if (zones[0].type == BLK_ZONE_TYPE_CONVENTIONAL) { 61712659251SNaohiro Aota *bytenr_ret = zones[0].start << SECTOR_SHIFT; 61812659251SNaohiro Aota return 0; 61912659251SNaohiro Aota } 62012659251SNaohiro Aota 62112659251SNaohiro Aota ret = sb_write_pointer(bdev, zones, &wp); 62212659251SNaohiro Aota if (ret != -ENOENT && ret < 0) 62312659251SNaohiro Aota return ret; 62412659251SNaohiro Aota 62512659251SNaohiro Aota if (rw == WRITE) { 62612659251SNaohiro Aota struct blk_zone *reset = NULL; 62712659251SNaohiro Aota 62812659251SNaohiro Aota if (wp == zones[0].start << SECTOR_SHIFT) 62912659251SNaohiro Aota reset = &zones[0]; 63012659251SNaohiro Aota else if (wp == zones[1].start << SECTOR_SHIFT) 63112659251SNaohiro Aota reset = &zones[1]; 63212659251SNaohiro Aota 63312659251SNaohiro Aota if (reset && reset->cond != BLK_ZONE_COND_EMPTY) { 63412659251SNaohiro Aota ASSERT(reset->cond == BLK_ZONE_COND_FULL); 63512659251SNaohiro Aota 63612659251SNaohiro Aota ret = blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 63712659251SNaohiro Aota reset->start, reset->len, 63812659251SNaohiro Aota GFP_NOFS); 63912659251SNaohiro Aota if (ret) 64012659251SNaohiro Aota return ret; 64112659251SNaohiro Aota 64212659251SNaohiro Aota reset->cond = BLK_ZONE_COND_EMPTY; 64312659251SNaohiro Aota reset->wp = reset->start; 64412659251SNaohiro Aota } 64512659251SNaohiro Aota } else if (ret != -ENOENT) { 64612659251SNaohiro Aota /* For READ, we want the precious one */ 64712659251SNaohiro Aota if (wp == zones[0].start << SECTOR_SHIFT) 64812659251SNaohiro Aota wp = (zones[1].start + zones[1].len) << SECTOR_SHIFT; 64912659251SNaohiro Aota wp -= BTRFS_SUPER_INFO_SIZE; 65012659251SNaohiro Aota } 65112659251SNaohiro Aota 65212659251SNaohiro Aota *bytenr_ret = wp; 65312659251SNaohiro Aota return 0; 65412659251SNaohiro Aota 65512659251SNaohiro Aota } 65612659251SNaohiro Aota 65712659251SNaohiro Aota int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw, 65812659251SNaohiro Aota u64 *bytenr_ret) 65912659251SNaohiro Aota { 66012659251SNaohiro Aota struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES]; 661*d734492aSNaohiro Aota sector_t zone_sectors; 66212659251SNaohiro Aota u32 sb_zone; 66312659251SNaohiro Aota int ret; 66412659251SNaohiro Aota u8 zone_sectors_shift; 66512659251SNaohiro Aota sector_t nr_sectors; 66612659251SNaohiro Aota u32 nr_zones; 66712659251SNaohiro Aota 66812659251SNaohiro Aota if (!bdev_is_zoned(bdev)) { 66912659251SNaohiro Aota *bytenr_ret = btrfs_sb_offset(mirror); 67012659251SNaohiro Aota return 0; 67112659251SNaohiro Aota } 67212659251SNaohiro Aota 67312659251SNaohiro Aota ASSERT(rw == READ || rw == WRITE); 67412659251SNaohiro Aota 67512659251SNaohiro Aota zone_sectors = bdev_zone_sectors(bdev); 67612659251SNaohiro Aota if (!is_power_of_2(zone_sectors)) 67712659251SNaohiro Aota return -EINVAL; 67812659251SNaohiro Aota zone_sectors_shift = ilog2(zone_sectors); 679ac7ac461SLinus Torvalds nr_sectors = bdev_nr_sectors(bdev); 68012659251SNaohiro Aota nr_zones = nr_sectors >> zone_sectors_shift; 68112659251SNaohiro Aota 68212659251SNaohiro Aota sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); 68312659251SNaohiro Aota if (sb_zone + 1 >= nr_zones) 68412659251SNaohiro Aota return -ENOENT; 68512659251SNaohiro Aota 68612659251SNaohiro Aota ret = blkdev_report_zones(bdev, sb_zone << zone_sectors_shift, 68712659251SNaohiro Aota BTRFS_NR_SB_LOG_ZONES, copy_zone_info_cb, 68812659251SNaohiro Aota zones); 68912659251SNaohiro Aota if (ret < 0) 69012659251SNaohiro Aota return ret; 69112659251SNaohiro Aota if (ret != BTRFS_NR_SB_LOG_ZONES) 69212659251SNaohiro Aota return -EIO; 69312659251SNaohiro Aota 69412659251SNaohiro Aota return sb_log_location(bdev, zones, rw, bytenr_ret); 69512659251SNaohiro Aota } 69612659251SNaohiro Aota 69712659251SNaohiro Aota int btrfs_sb_log_location(struct btrfs_device *device, int mirror, int rw, 69812659251SNaohiro Aota u64 *bytenr_ret) 69912659251SNaohiro Aota { 70012659251SNaohiro Aota struct btrfs_zoned_device_info *zinfo = device->zone_info; 70112659251SNaohiro Aota u32 zone_num; 70212659251SNaohiro Aota 703d6639b35SNaohiro Aota /* 704d6639b35SNaohiro Aota * For a zoned filesystem on a non-zoned block device, use the same 705d6639b35SNaohiro Aota * super block locations as regular filesystem. Doing so, the super 706d6639b35SNaohiro Aota * block can always be retrieved and the zoned flag of the volume 707d6639b35SNaohiro Aota * detected from the super block information. 708d6639b35SNaohiro Aota */ 709d6639b35SNaohiro Aota if (!bdev_is_zoned(device->bdev)) { 71012659251SNaohiro Aota *bytenr_ret = btrfs_sb_offset(mirror); 71112659251SNaohiro Aota return 0; 71212659251SNaohiro Aota } 71312659251SNaohiro Aota 71412659251SNaohiro Aota zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); 71512659251SNaohiro Aota if (zone_num + 1 >= zinfo->nr_zones) 71612659251SNaohiro Aota return -ENOENT; 71712659251SNaohiro Aota 71812659251SNaohiro Aota return sb_log_location(device->bdev, 71912659251SNaohiro Aota &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror], 72012659251SNaohiro Aota rw, bytenr_ret); 72112659251SNaohiro Aota } 72212659251SNaohiro Aota 72312659251SNaohiro Aota static inline bool is_sb_log_zone(struct btrfs_zoned_device_info *zinfo, 72412659251SNaohiro Aota int mirror) 72512659251SNaohiro Aota { 72612659251SNaohiro Aota u32 zone_num; 72712659251SNaohiro Aota 72812659251SNaohiro Aota if (!zinfo) 72912659251SNaohiro Aota return false; 73012659251SNaohiro Aota 73112659251SNaohiro Aota zone_num = sb_zone_number(zinfo->zone_size_shift, mirror); 73212659251SNaohiro Aota if (zone_num + 1 >= zinfo->nr_zones) 73312659251SNaohiro Aota return false; 73412659251SNaohiro Aota 73512659251SNaohiro Aota if (!test_bit(zone_num, zinfo->seq_zones)) 73612659251SNaohiro Aota return false; 73712659251SNaohiro Aota 73812659251SNaohiro Aota return true; 73912659251SNaohiro Aota } 74012659251SNaohiro Aota 74112659251SNaohiro Aota void btrfs_advance_sb_log(struct btrfs_device *device, int mirror) 74212659251SNaohiro Aota { 74312659251SNaohiro Aota struct btrfs_zoned_device_info *zinfo = device->zone_info; 74412659251SNaohiro Aota struct blk_zone *zone; 74512659251SNaohiro Aota 74612659251SNaohiro Aota if (!is_sb_log_zone(zinfo, mirror)) 74712659251SNaohiro Aota return; 74812659251SNaohiro Aota 74912659251SNaohiro Aota zone = &zinfo->sb_zones[BTRFS_NR_SB_LOG_ZONES * mirror]; 75012659251SNaohiro Aota if (zone->cond != BLK_ZONE_COND_FULL) { 75112659251SNaohiro Aota if (zone->cond == BLK_ZONE_COND_EMPTY) 75212659251SNaohiro Aota zone->cond = BLK_ZONE_COND_IMP_OPEN; 75312659251SNaohiro Aota 75412659251SNaohiro Aota zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT); 75512659251SNaohiro Aota 75612659251SNaohiro Aota if (zone->wp == zone->start + zone->len) 75712659251SNaohiro Aota zone->cond = BLK_ZONE_COND_FULL; 75812659251SNaohiro Aota 75912659251SNaohiro Aota return; 76012659251SNaohiro Aota } 76112659251SNaohiro Aota 76212659251SNaohiro Aota zone++; 76312659251SNaohiro Aota ASSERT(zone->cond != BLK_ZONE_COND_FULL); 76412659251SNaohiro Aota if (zone->cond == BLK_ZONE_COND_EMPTY) 76512659251SNaohiro Aota zone->cond = BLK_ZONE_COND_IMP_OPEN; 76612659251SNaohiro Aota 76712659251SNaohiro Aota zone->wp += (BTRFS_SUPER_INFO_SIZE >> SECTOR_SHIFT); 76812659251SNaohiro Aota 76912659251SNaohiro Aota if (zone->wp == zone->start + zone->len) 77012659251SNaohiro Aota zone->cond = BLK_ZONE_COND_FULL; 77112659251SNaohiro Aota } 77212659251SNaohiro Aota 77312659251SNaohiro Aota int btrfs_reset_sb_log_zones(struct block_device *bdev, int mirror) 77412659251SNaohiro Aota { 77512659251SNaohiro Aota sector_t zone_sectors; 77612659251SNaohiro Aota sector_t nr_sectors; 77712659251SNaohiro Aota u8 zone_sectors_shift; 77812659251SNaohiro Aota u32 sb_zone; 77912659251SNaohiro Aota u32 nr_zones; 78012659251SNaohiro Aota 78112659251SNaohiro Aota zone_sectors = bdev_zone_sectors(bdev); 78212659251SNaohiro Aota zone_sectors_shift = ilog2(zone_sectors); 783ac7ac461SLinus Torvalds nr_sectors = bdev_nr_sectors(bdev); 78412659251SNaohiro Aota nr_zones = nr_sectors >> zone_sectors_shift; 78512659251SNaohiro Aota 78612659251SNaohiro Aota sb_zone = sb_zone_number(zone_sectors_shift + SECTOR_SHIFT, mirror); 78712659251SNaohiro Aota if (sb_zone + 1 >= nr_zones) 78812659251SNaohiro Aota return -ENOENT; 78912659251SNaohiro Aota 79012659251SNaohiro Aota return blkdev_zone_mgmt(bdev, REQ_OP_ZONE_RESET, 79112659251SNaohiro Aota sb_zone << zone_sectors_shift, 79212659251SNaohiro Aota zone_sectors * BTRFS_NR_SB_LOG_ZONES, GFP_NOFS); 79312659251SNaohiro Aota } 7941cd6121fSNaohiro Aota 7951cd6121fSNaohiro Aota /** 7961cd6121fSNaohiro Aota * btrfs_find_allocatable_zones - find allocatable zones within a given region 7971cd6121fSNaohiro Aota * 7981cd6121fSNaohiro Aota * @device: the device to allocate a region on 7991cd6121fSNaohiro Aota * @hole_start: the position of the hole to allocate the region 8001cd6121fSNaohiro Aota * @num_bytes: size of wanted region 8011cd6121fSNaohiro Aota * @hole_end: the end of the hole 8021cd6121fSNaohiro Aota * @return: position of allocatable zones 8031cd6121fSNaohiro Aota * 8041cd6121fSNaohiro Aota * Allocatable region should not contain any superblock locations. 8051cd6121fSNaohiro Aota */ 8061cd6121fSNaohiro Aota u64 btrfs_find_allocatable_zones(struct btrfs_device *device, u64 hole_start, 8071cd6121fSNaohiro Aota u64 hole_end, u64 num_bytes) 8081cd6121fSNaohiro Aota { 8091cd6121fSNaohiro Aota struct btrfs_zoned_device_info *zinfo = device->zone_info; 8101cd6121fSNaohiro Aota const u8 shift = zinfo->zone_size_shift; 8111cd6121fSNaohiro Aota u64 nzones = num_bytes >> shift; 8121cd6121fSNaohiro Aota u64 pos = hole_start; 8131cd6121fSNaohiro Aota u64 begin, end; 8141cd6121fSNaohiro Aota bool have_sb; 8151cd6121fSNaohiro Aota int i; 8161cd6121fSNaohiro Aota 8171cd6121fSNaohiro Aota ASSERT(IS_ALIGNED(hole_start, zinfo->zone_size)); 8181cd6121fSNaohiro Aota ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size)); 8191cd6121fSNaohiro Aota 8201cd6121fSNaohiro Aota while (pos < hole_end) { 8211cd6121fSNaohiro Aota begin = pos >> shift; 8221cd6121fSNaohiro Aota end = begin + nzones; 8231cd6121fSNaohiro Aota 8241cd6121fSNaohiro Aota if (end > zinfo->nr_zones) 8251cd6121fSNaohiro Aota return hole_end; 8261cd6121fSNaohiro Aota 8271cd6121fSNaohiro Aota /* Check if zones in the region are all empty */ 8281cd6121fSNaohiro Aota if (btrfs_dev_is_sequential(device, pos) && 8291cd6121fSNaohiro Aota find_next_zero_bit(zinfo->empty_zones, end, begin) != end) { 8301cd6121fSNaohiro Aota pos += zinfo->zone_size; 8311cd6121fSNaohiro Aota continue; 8321cd6121fSNaohiro Aota } 8331cd6121fSNaohiro Aota 8341cd6121fSNaohiro Aota have_sb = false; 8351cd6121fSNaohiro Aota for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 8361cd6121fSNaohiro Aota u32 sb_zone; 8371cd6121fSNaohiro Aota u64 sb_pos; 8381cd6121fSNaohiro Aota 8391cd6121fSNaohiro Aota sb_zone = sb_zone_number(shift, i); 8401cd6121fSNaohiro Aota if (!(end <= sb_zone || 8411cd6121fSNaohiro Aota sb_zone + BTRFS_NR_SB_LOG_ZONES <= begin)) { 8421cd6121fSNaohiro Aota have_sb = true; 8431cd6121fSNaohiro Aota pos = ((u64)sb_zone + BTRFS_NR_SB_LOG_ZONES) << shift; 8441cd6121fSNaohiro Aota break; 8451cd6121fSNaohiro Aota } 8461cd6121fSNaohiro Aota 8471cd6121fSNaohiro Aota /* We also need to exclude regular superblock positions */ 8481cd6121fSNaohiro Aota sb_pos = btrfs_sb_offset(i); 8491cd6121fSNaohiro Aota if (!(pos + num_bytes <= sb_pos || 8501cd6121fSNaohiro Aota sb_pos + BTRFS_SUPER_INFO_SIZE <= pos)) { 8511cd6121fSNaohiro Aota have_sb = true; 8521cd6121fSNaohiro Aota pos = ALIGN(sb_pos + BTRFS_SUPER_INFO_SIZE, 8531cd6121fSNaohiro Aota zinfo->zone_size); 8541cd6121fSNaohiro Aota break; 8551cd6121fSNaohiro Aota } 8561cd6121fSNaohiro Aota } 8571cd6121fSNaohiro Aota if (!have_sb) 8581cd6121fSNaohiro Aota break; 8591cd6121fSNaohiro Aota } 8601cd6121fSNaohiro Aota 8611cd6121fSNaohiro Aota return pos; 8621cd6121fSNaohiro Aota } 8631cd6121fSNaohiro Aota 8641cd6121fSNaohiro Aota int btrfs_reset_device_zone(struct btrfs_device *device, u64 physical, 8651cd6121fSNaohiro Aota u64 length, u64 *bytes) 8661cd6121fSNaohiro Aota { 8671cd6121fSNaohiro Aota int ret; 8681cd6121fSNaohiro Aota 8691cd6121fSNaohiro Aota *bytes = 0; 8701cd6121fSNaohiro Aota ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_RESET, 8711cd6121fSNaohiro Aota physical >> SECTOR_SHIFT, length >> SECTOR_SHIFT, 8721cd6121fSNaohiro Aota GFP_NOFS); 8731cd6121fSNaohiro Aota if (ret) 8741cd6121fSNaohiro Aota return ret; 8751cd6121fSNaohiro Aota 8761cd6121fSNaohiro Aota *bytes = length; 8771cd6121fSNaohiro Aota while (length) { 8781cd6121fSNaohiro Aota btrfs_dev_set_zone_empty(device, physical); 8791cd6121fSNaohiro Aota physical += device->zone_info->zone_size; 8801cd6121fSNaohiro Aota length -= device->zone_info->zone_size; 8811cd6121fSNaohiro Aota } 8821cd6121fSNaohiro Aota 8831cd6121fSNaohiro Aota return 0; 8841cd6121fSNaohiro Aota } 8851cd6121fSNaohiro Aota 8861cd6121fSNaohiro Aota int btrfs_ensure_empty_zones(struct btrfs_device *device, u64 start, u64 size) 8871cd6121fSNaohiro Aota { 8881cd6121fSNaohiro Aota struct btrfs_zoned_device_info *zinfo = device->zone_info; 8891cd6121fSNaohiro Aota const u8 shift = zinfo->zone_size_shift; 8901cd6121fSNaohiro Aota unsigned long begin = start >> shift; 8911cd6121fSNaohiro Aota unsigned long end = (start + size) >> shift; 8921cd6121fSNaohiro Aota u64 pos; 8931cd6121fSNaohiro Aota int ret; 8941cd6121fSNaohiro Aota 8951cd6121fSNaohiro Aota ASSERT(IS_ALIGNED(start, zinfo->zone_size)); 8961cd6121fSNaohiro Aota ASSERT(IS_ALIGNED(size, zinfo->zone_size)); 8971cd6121fSNaohiro Aota 8981cd6121fSNaohiro Aota if (end > zinfo->nr_zones) 8991cd6121fSNaohiro Aota return -ERANGE; 9001cd6121fSNaohiro Aota 9011cd6121fSNaohiro Aota /* All the zones are conventional */ 9021cd6121fSNaohiro Aota if (find_next_bit(zinfo->seq_zones, begin, end) == end) 9031cd6121fSNaohiro Aota return 0; 9041cd6121fSNaohiro Aota 9051cd6121fSNaohiro Aota /* All the zones are sequential and empty */ 9061cd6121fSNaohiro Aota if (find_next_zero_bit(zinfo->seq_zones, begin, end) == end && 9071cd6121fSNaohiro Aota find_next_zero_bit(zinfo->empty_zones, begin, end) == end) 9081cd6121fSNaohiro Aota return 0; 9091cd6121fSNaohiro Aota 9101cd6121fSNaohiro Aota for (pos = start; pos < start + size; pos += zinfo->zone_size) { 9111cd6121fSNaohiro Aota u64 reset_bytes; 9121cd6121fSNaohiro Aota 9131cd6121fSNaohiro Aota if (!btrfs_dev_is_sequential(device, pos) || 9141cd6121fSNaohiro Aota btrfs_dev_is_empty_zone(device, pos)) 9151cd6121fSNaohiro Aota continue; 9161cd6121fSNaohiro Aota 9171cd6121fSNaohiro Aota /* Free regions should be empty */ 9181cd6121fSNaohiro Aota btrfs_warn_in_rcu( 9191cd6121fSNaohiro Aota device->fs_info, 9201cd6121fSNaohiro Aota "zoned: resetting device %s (devid %llu) zone %llu for allocation", 9211cd6121fSNaohiro Aota rcu_str_deref(device->name), device->devid, pos >> shift); 9221cd6121fSNaohiro Aota WARN_ON_ONCE(1); 9231cd6121fSNaohiro Aota 9241cd6121fSNaohiro Aota ret = btrfs_reset_device_zone(device, pos, zinfo->zone_size, 9251cd6121fSNaohiro Aota &reset_bytes); 9261cd6121fSNaohiro Aota if (ret) 9271cd6121fSNaohiro Aota return ret; 9281cd6121fSNaohiro Aota } 9291cd6121fSNaohiro Aota 9301cd6121fSNaohiro Aota return 0; 9311cd6121fSNaohiro Aota } 93208e11a3dSNaohiro Aota 933a94794d5SNaohiro Aota /* 934a94794d5SNaohiro Aota * Calculate an allocation pointer from the extent allocation information 935a94794d5SNaohiro Aota * for a block group consist of conventional zones. It is pointed to the 936a94794d5SNaohiro Aota * end of the highest addressed extent in the block group as an allocation 937a94794d5SNaohiro Aota * offset. 938a94794d5SNaohiro Aota */ 939a94794d5SNaohiro Aota static int calculate_alloc_pointer(struct btrfs_block_group *cache, 940a94794d5SNaohiro Aota u64 *offset_ret) 941a94794d5SNaohiro Aota { 942a94794d5SNaohiro Aota struct btrfs_fs_info *fs_info = cache->fs_info; 943a94794d5SNaohiro Aota struct btrfs_root *root = fs_info->extent_root; 944a94794d5SNaohiro Aota struct btrfs_path *path; 945a94794d5SNaohiro Aota struct btrfs_key key; 946a94794d5SNaohiro Aota struct btrfs_key found_key; 947a94794d5SNaohiro Aota int ret; 948a94794d5SNaohiro Aota u64 length; 949a94794d5SNaohiro Aota 950a94794d5SNaohiro Aota path = btrfs_alloc_path(); 951a94794d5SNaohiro Aota if (!path) 952a94794d5SNaohiro Aota return -ENOMEM; 953a94794d5SNaohiro Aota 954a94794d5SNaohiro Aota key.objectid = cache->start + cache->length; 955a94794d5SNaohiro Aota key.type = 0; 956a94794d5SNaohiro Aota key.offset = 0; 957a94794d5SNaohiro Aota 958a94794d5SNaohiro Aota ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 959a94794d5SNaohiro Aota /* We should not find the exact match */ 960a94794d5SNaohiro Aota if (!ret) 961a94794d5SNaohiro Aota ret = -EUCLEAN; 962a94794d5SNaohiro Aota if (ret < 0) 963a94794d5SNaohiro Aota goto out; 964a94794d5SNaohiro Aota 965a94794d5SNaohiro Aota ret = btrfs_previous_extent_item(root, path, cache->start); 966a94794d5SNaohiro Aota if (ret) { 967a94794d5SNaohiro Aota if (ret == 1) { 968a94794d5SNaohiro Aota ret = 0; 969a94794d5SNaohiro Aota *offset_ret = 0; 970a94794d5SNaohiro Aota } 971a94794d5SNaohiro Aota goto out; 972a94794d5SNaohiro Aota } 973a94794d5SNaohiro Aota 974a94794d5SNaohiro Aota btrfs_item_key_to_cpu(path->nodes[0], &found_key, path->slots[0]); 975a94794d5SNaohiro Aota 976a94794d5SNaohiro Aota if (found_key.type == BTRFS_EXTENT_ITEM_KEY) 977a94794d5SNaohiro Aota length = found_key.offset; 978a94794d5SNaohiro Aota else 979a94794d5SNaohiro Aota length = fs_info->nodesize; 980a94794d5SNaohiro Aota 981a94794d5SNaohiro Aota if (!(found_key.objectid >= cache->start && 982a94794d5SNaohiro Aota found_key.objectid + length <= cache->start + cache->length)) { 983a94794d5SNaohiro Aota ret = -EUCLEAN; 984a94794d5SNaohiro Aota goto out; 985a94794d5SNaohiro Aota } 986a94794d5SNaohiro Aota *offset_ret = found_key.objectid + length - cache->start; 987a94794d5SNaohiro Aota ret = 0; 988a94794d5SNaohiro Aota 989a94794d5SNaohiro Aota out: 990a94794d5SNaohiro Aota btrfs_free_path(path); 991a94794d5SNaohiro Aota return ret; 992a94794d5SNaohiro Aota } 993a94794d5SNaohiro Aota 994a94794d5SNaohiro Aota int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new) 99508e11a3dSNaohiro Aota { 99608e11a3dSNaohiro Aota struct btrfs_fs_info *fs_info = cache->fs_info; 99708e11a3dSNaohiro Aota struct extent_map_tree *em_tree = &fs_info->mapping_tree; 99808e11a3dSNaohiro Aota struct extent_map *em; 99908e11a3dSNaohiro Aota struct map_lookup *map; 100008e11a3dSNaohiro Aota struct btrfs_device *device; 100108e11a3dSNaohiro Aota u64 logical = cache->start; 100208e11a3dSNaohiro Aota u64 length = cache->length; 100308e11a3dSNaohiro Aota u64 physical = 0; 100408e11a3dSNaohiro Aota int ret; 100508e11a3dSNaohiro Aota int i; 100608e11a3dSNaohiro Aota unsigned int nofs_flag; 100708e11a3dSNaohiro Aota u64 *alloc_offsets = NULL; 1008a94794d5SNaohiro Aota u64 last_alloc = 0; 100908e11a3dSNaohiro Aota u32 num_sequential = 0, num_conventional = 0; 101008e11a3dSNaohiro Aota 101108e11a3dSNaohiro Aota if (!btrfs_is_zoned(fs_info)) 101208e11a3dSNaohiro Aota return 0; 101308e11a3dSNaohiro Aota 101408e11a3dSNaohiro Aota /* Sanity check */ 101508e11a3dSNaohiro Aota if (!IS_ALIGNED(length, fs_info->zone_size)) { 101608e11a3dSNaohiro Aota btrfs_err(fs_info, 101708e11a3dSNaohiro Aota "zoned: block group %llu len %llu unaligned to zone size %llu", 101808e11a3dSNaohiro Aota logical, length, fs_info->zone_size); 101908e11a3dSNaohiro Aota return -EIO; 102008e11a3dSNaohiro Aota } 102108e11a3dSNaohiro Aota 102208e11a3dSNaohiro Aota /* Get the chunk mapping */ 102308e11a3dSNaohiro Aota read_lock(&em_tree->lock); 102408e11a3dSNaohiro Aota em = lookup_extent_mapping(em_tree, logical, length); 102508e11a3dSNaohiro Aota read_unlock(&em_tree->lock); 102608e11a3dSNaohiro Aota 102708e11a3dSNaohiro Aota if (!em) 102808e11a3dSNaohiro Aota return -EINVAL; 102908e11a3dSNaohiro Aota 103008e11a3dSNaohiro Aota map = em->map_lookup; 103108e11a3dSNaohiro Aota 103208e11a3dSNaohiro Aota alloc_offsets = kcalloc(map->num_stripes, sizeof(*alloc_offsets), GFP_NOFS); 103308e11a3dSNaohiro Aota if (!alloc_offsets) { 103408e11a3dSNaohiro Aota free_extent_map(em); 103508e11a3dSNaohiro Aota return -ENOMEM; 103608e11a3dSNaohiro Aota } 103708e11a3dSNaohiro Aota 103808e11a3dSNaohiro Aota for (i = 0; i < map->num_stripes; i++) { 103908e11a3dSNaohiro Aota bool is_sequential; 104008e11a3dSNaohiro Aota struct blk_zone zone; 10416143c23cSNaohiro Aota struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; 10426143c23cSNaohiro Aota int dev_replace_is_ongoing = 0; 104308e11a3dSNaohiro Aota 104408e11a3dSNaohiro Aota device = map->stripes[i].dev; 104508e11a3dSNaohiro Aota physical = map->stripes[i].physical; 104608e11a3dSNaohiro Aota 104708e11a3dSNaohiro Aota if (device->bdev == NULL) { 104808e11a3dSNaohiro Aota alloc_offsets[i] = WP_MISSING_DEV; 104908e11a3dSNaohiro Aota continue; 105008e11a3dSNaohiro Aota } 105108e11a3dSNaohiro Aota 105208e11a3dSNaohiro Aota is_sequential = btrfs_dev_is_sequential(device, physical); 105308e11a3dSNaohiro Aota if (is_sequential) 105408e11a3dSNaohiro Aota num_sequential++; 105508e11a3dSNaohiro Aota else 105608e11a3dSNaohiro Aota num_conventional++; 105708e11a3dSNaohiro Aota 105808e11a3dSNaohiro Aota if (!is_sequential) { 105908e11a3dSNaohiro Aota alloc_offsets[i] = WP_CONVENTIONAL; 106008e11a3dSNaohiro Aota continue; 106108e11a3dSNaohiro Aota } 106208e11a3dSNaohiro Aota 106308e11a3dSNaohiro Aota /* 106408e11a3dSNaohiro Aota * This zone will be used for allocation, so mark this zone 106508e11a3dSNaohiro Aota * non-empty. 106608e11a3dSNaohiro Aota */ 106708e11a3dSNaohiro Aota btrfs_dev_clear_zone_empty(device, physical); 106808e11a3dSNaohiro Aota 10696143c23cSNaohiro Aota down_read(&dev_replace->rwsem); 10706143c23cSNaohiro Aota dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); 10716143c23cSNaohiro Aota if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) 10726143c23cSNaohiro Aota btrfs_dev_clear_zone_empty(dev_replace->tgtdev, physical); 10736143c23cSNaohiro Aota up_read(&dev_replace->rwsem); 10746143c23cSNaohiro Aota 107508e11a3dSNaohiro Aota /* 107608e11a3dSNaohiro Aota * The group is mapped to a sequential zone. Get the zone write 107708e11a3dSNaohiro Aota * pointer to determine the allocation offset within the zone. 107808e11a3dSNaohiro Aota */ 107908e11a3dSNaohiro Aota WARN_ON(!IS_ALIGNED(physical, fs_info->zone_size)); 108008e11a3dSNaohiro Aota nofs_flag = memalloc_nofs_save(); 108108e11a3dSNaohiro Aota ret = btrfs_get_dev_zone(device, physical, &zone); 108208e11a3dSNaohiro Aota memalloc_nofs_restore(nofs_flag); 108308e11a3dSNaohiro Aota if (ret == -EIO || ret == -EOPNOTSUPP) { 108408e11a3dSNaohiro Aota ret = 0; 108508e11a3dSNaohiro Aota alloc_offsets[i] = WP_MISSING_DEV; 108608e11a3dSNaohiro Aota continue; 108708e11a3dSNaohiro Aota } else if (ret) { 108808e11a3dSNaohiro Aota goto out; 108908e11a3dSNaohiro Aota } 109008e11a3dSNaohiro Aota 109108e11a3dSNaohiro Aota switch (zone.cond) { 109208e11a3dSNaohiro Aota case BLK_ZONE_COND_OFFLINE: 109308e11a3dSNaohiro Aota case BLK_ZONE_COND_READONLY: 109408e11a3dSNaohiro Aota btrfs_err(fs_info, 109508e11a3dSNaohiro Aota "zoned: offline/readonly zone %llu on device %s (devid %llu)", 109608e11a3dSNaohiro Aota physical >> device->zone_info->zone_size_shift, 109708e11a3dSNaohiro Aota rcu_str_deref(device->name), device->devid); 109808e11a3dSNaohiro Aota alloc_offsets[i] = WP_MISSING_DEV; 109908e11a3dSNaohiro Aota break; 110008e11a3dSNaohiro Aota case BLK_ZONE_COND_EMPTY: 110108e11a3dSNaohiro Aota alloc_offsets[i] = 0; 110208e11a3dSNaohiro Aota break; 110308e11a3dSNaohiro Aota case BLK_ZONE_COND_FULL: 110408e11a3dSNaohiro Aota alloc_offsets[i] = fs_info->zone_size; 110508e11a3dSNaohiro Aota break; 110608e11a3dSNaohiro Aota default: 110708e11a3dSNaohiro Aota /* Partially used zone */ 110808e11a3dSNaohiro Aota alloc_offsets[i] = 110908e11a3dSNaohiro Aota ((zone.wp - zone.start) << SECTOR_SHIFT); 111008e11a3dSNaohiro Aota break; 111108e11a3dSNaohiro Aota } 111208e11a3dSNaohiro Aota } 111308e11a3dSNaohiro Aota 111408f45559SJohannes Thumshirn if (num_sequential > 0) 111508f45559SJohannes Thumshirn cache->seq_zone = true; 111608f45559SJohannes Thumshirn 111708e11a3dSNaohiro Aota if (num_conventional > 0) { 111808e11a3dSNaohiro Aota /* 1119a94794d5SNaohiro Aota * Avoid calling calculate_alloc_pointer() for new BG. It 1120a94794d5SNaohiro Aota * is no use for new BG. It must be always 0. 1121a94794d5SNaohiro Aota * 1122a94794d5SNaohiro Aota * Also, we have a lock chain of extent buffer lock -> 1123a94794d5SNaohiro Aota * chunk mutex. For new BG, this function is called from 1124a94794d5SNaohiro Aota * btrfs_make_block_group() which is already taking the 1125a94794d5SNaohiro Aota * chunk mutex. Thus, we cannot call 1126a94794d5SNaohiro Aota * calculate_alloc_pointer() which takes extent buffer 1127a94794d5SNaohiro Aota * locks to avoid deadlock. 112808e11a3dSNaohiro Aota */ 1129a94794d5SNaohiro Aota if (new) { 1130a94794d5SNaohiro Aota cache->alloc_offset = 0; 113108e11a3dSNaohiro Aota goto out; 113208e11a3dSNaohiro Aota } 1133a94794d5SNaohiro Aota ret = calculate_alloc_pointer(cache, &last_alloc); 1134a94794d5SNaohiro Aota if (ret || map->num_stripes == num_conventional) { 1135a94794d5SNaohiro Aota if (!ret) 1136a94794d5SNaohiro Aota cache->alloc_offset = last_alloc; 1137a94794d5SNaohiro Aota else 1138a94794d5SNaohiro Aota btrfs_err(fs_info, 1139a94794d5SNaohiro Aota "zoned: failed to determine allocation offset of bg %llu", 1140a94794d5SNaohiro Aota cache->start); 1141a94794d5SNaohiro Aota goto out; 1142a94794d5SNaohiro Aota } 1143a94794d5SNaohiro Aota } 114408e11a3dSNaohiro Aota 114508e11a3dSNaohiro Aota switch (map->type & BTRFS_BLOCK_GROUP_PROFILE_MASK) { 114608e11a3dSNaohiro Aota case 0: /* single */ 114708e11a3dSNaohiro Aota cache->alloc_offset = alloc_offsets[0]; 114808e11a3dSNaohiro Aota break; 114908e11a3dSNaohiro Aota case BTRFS_BLOCK_GROUP_DUP: 115008e11a3dSNaohiro Aota case BTRFS_BLOCK_GROUP_RAID1: 115108e11a3dSNaohiro Aota case BTRFS_BLOCK_GROUP_RAID0: 115208e11a3dSNaohiro Aota case BTRFS_BLOCK_GROUP_RAID10: 115308e11a3dSNaohiro Aota case BTRFS_BLOCK_GROUP_RAID5: 115408e11a3dSNaohiro Aota case BTRFS_BLOCK_GROUP_RAID6: 115508e11a3dSNaohiro Aota /* non-single profiles are not supported yet */ 115608e11a3dSNaohiro Aota default: 115708e11a3dSNaohiro Aota btrfs_err(fs_info, "zoned: profile %s not yet supported", 115808e11a3dSNaohiro Aota btrfs_bg_type_to_raid_name(map->type)); 115908e11a3dSNaohiro Aota ret = -EINVAL; 116008e11a3dSNaohiro Aota goto out; 116108e11a3dSNaohiro Aota } 116208e11a3dSNaohiro Aota 116308e11a3dSNaohiro Aota out: 1164a94794d5SNaohiro Aota /* An extent is allocated after the write pointer */ 1165a94794d5SNaohiro Aota if (!ret && num_conventional && last_alloc > cache->alloc_offset) { 1166a94794d5SNaohiro Aota btrfs_err(fs_info, 1167a94794d5SNaohiro Aota "zoned: got wrong write pointer in BG %llu: %llu > %llu", 1168a94794d5SNaohiro Aota logical, last_alloc, cache->alloc_offset); 1169a94794d5SNaohiro Aota ret = -EIO; 1170a94794d5SNaohiro Aota } 1171a94794d5SNaohiro Aota 11720bc09ca1SNaohiro Aota if (!ret) 11730bc09ca1SNaohiro Aota cache->meta_write_pointer = cache->alloc_offset + cache->start; 11740bc09ca1SNaohiro Aota 117508e11a3dSNaohiro Aota kfree(alloc_offsets); 117608e11a3dSNaohiro Aota free_extent_map(em); 117708e11a3dSNaohiro Aota 117808e11a3dSNaohiro Aota return ret; 117908e11a3dSNaohiro Aota } 1180169e0da9SNaohiro Aota 1181169e0da9SNaohiro Aota void btrfs_calc_zone_unusable(struct btrfs_block_group *cache) 1182169e0da9SNaohiro Aota { 1183169e0da9SNaohiro Aota u64 unusable, free; 1184169e0da9SNaohiro Aota 1185169e0da9SNaohiro Aota if (!btrfs_is_zoned(cache->fs_info)) 1186169e0da9SNaohiro Aota return; 1187169e0da9SNaohiro Aota 1188169e0da9SNaohiro Aota WARN_ON(cache->bytes_super != 0); 1189169e0da9SNaohiro Aota unusable = cache->alloc_offset - cache->used; 1190169e0da9SNaohiro Aota free = cache->length - cache->alloc_offset; 1191169e0da9SNaohiro Aota 1192169e0da9SNaohiro Aota /* We only need ->free_space in ALLOC_SEQ block groups */ 1193169e0da9SNaohiro Aota cache->last_byte_to_unpin = (u64)-1; 1194169e0da9SNaohiro Aota cache->cached = BTRFS_CACHE_FINISHED; 1195169e0da9SNaohiro Aota cache->free_space_ctl->free_space = free; 1196169e0da9SNaohiro Aota cache->zone_unusable = unusable; 1197169e0da9SNaohiro Aota 1198169e0da9SNaohiro Aota /* Should not have any excluded extents. Just in case, though */ 1199169e0da9SNaohiro Aota btrfs_free_excluded_extents(cache); 1200169e0da9SNaohiro Aota } 1201d3575156SNaohiro Aota 1202d3575156SNaohiro Aota void btrfs_redirty_list_add(struct btrfs_transaction *trans, 1203d3575156SNaohiro Aota struct extent_buffer *eb) 1204d3575156SNaohiro Aota { 1205d3575156SNaohiro Aota struct btrfs_fs_info *fs_info = eb->fs_info; 1206d3575156SNaohiro Aota 1207d3575156SNaohiro Aota if (!btrfs_is_zoned(fs_info) || 1208d3575156SNaohiro Aota btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN) || 1209d3575156SNaohiro Aota !list_empty(&eb->release_list)) 1210d3575156SNaohiro Aota return; 1211d3575156SNaohiro Aota 1212d3575156SNaohiro Aota set_extent_buffer_dirty(eb); 1213d3575156SNaohiro Aota set_extent_bits_nowait(&trans->dirty_pages, eb->start, 1214d3575156SNaohiro Aota eb->start + eb->len - 1, EXTENT_DIRTY); 1215d3575156SNaohiro Aota memzero_extent_buffer(eb, 0, eb->len); 1216d3575156SNaohiro Aota set_bit(EXTENT_BUFFER_NO_CHECK, &eb->bflags); 1217d3575156SNaohiro Aota 1218d3575156SNaohiro Aota spin_lock(&trans->releasing_ebs_lock); 1219d3575156SNaohiro Aota list_add_tail(&eb->release_list, &trans->releasing_ebs); 1220d3575156SNaohiro Aota spin_unlock(&trans->releasing_ebs_lock); 1221d3575156SNaohiro Aota atomic_inc(&eb->refs); 1222d3575156SNaohiro Aota } 1223d3575156SNaohiro Aota 1224d3575156SNaohiro Aota void btrfs_free_redirty_list(struct btrfs_transaction *trans) 1225d3575156SNaohiro Aota { 1226d3575156SNaohiro Aota spin_lock(&trans->releasing_ebs_lock); 1227d3575156SNaohiro Aota while (!list_empty(&trans->releasing_ebs)) { 1228d3575156SNaohiro Aota struct extent_buffer *eb; 1229d3575156SNaohiro Aota 1230d3575156SNaohiro Aota eb = list_first_entry(&trans->releasing_ebs, 1231d3575156SNaohiro Aota struct extent_buffer, release_list); 1232d3575156SNaohiro Aota list_del_init(&eb->release_list); 1233d3575156SNaohiro Aota free_extent_buffer(eb); 1234d3575156SNaohiro Aota } 1235d3575156SNaohiro Aota spin_unlock(&trans->releasing_ebs_lock); 1236d3575156SNaohiro Aota } 123708f45559SJohannes Thumshirn 123808f45559SJohannes Thumshirn bool btrfs_use_zone_append(struct btrfs_inode *inode, struct extent_map *em) 123908f45559SJohannes Thumshirn { 124008f45559SJohannes Thumshirn struct btrfs_fs_info *fs_info = inode->root->fs_info; 124108f45559SJohannes Thumshirn struct btrfs_block_group *cache; 124208f45559SJohannes Thumshirn bool ret = false; 124308f45559SJohannes Thumshirn 124408f45559SJohannes Thumshirn if (!btrfs_is_zoned(fs_info)) 124508f45559SJohannes Thumshirn return false; 124608f45559SJohannes Thumshirn 124708f45559SJohannes Thumshirn if (!fs_info->max_zone_append_size) 124808f45559SJohannes Thumshirn return false; 124908f45559SJohannes Thumshirn 125008f45559SJohannes Thumshirn if (!is_data_inode(&inode->vfs_inode)) 125108f45559SJohannes Thumshirn return false; 125208f45559SJohannes Thumshirn 125308f45559SJohannes Thumshirn cache = btrfs_lookup_block_group(fs_info, em->block_start); 125408f45559SJohannes Thumshirn ASSERT(cache); 125508f45559SJohannes Thumshirn if (!cache) 125608f45559SJohannes Thumshirn return false; 125708f45559SJohannes Thumshirn 125808f45559SJohannes Thumshirn ret = cache->seq_zone; 125908f45559SJohannes Thumshirn btrfs_put_block_group(cache); 126008f45559SJohannes Thumshirn 126108f45559SJohannes Thumshirn return ret; 126208f45559SJohannes Thumshirn } 1263d8e3fb10SNaohiro Aota 1264d8e3fb10SNaohiro Aota void btrfs_record_physical_zoned(struct inode *inode, u64 file_offset, 1265d8e3fb10SNaohiro Aota struct bio *bio) 1266d8e3fb10SNaohiro Aota { 1267d8e3fb10SNaohiro Aota struct btrfs_ordered_extent *ordered; 1268d8e3fb10SNaohiro Aota const u64 physical = bio->bi_iter.bi_sector << SECTOR_SHIFT; 1269d8e3fb10SNaohiro Aota 1270d8e3fb10SNaohiro Aota if (bio_op(bio) != REQ_OP_ZONE_APPEND) 1271d8e3fb10SNaohiro Aota return; 1272d8e3fb10SNaohiro Aota 1273d8e3fb10SNaohiro Aota ordered = btrfs_lookup_ordered_extent(BTRFS_I(inode), file_offset); 1274d8e3fb10SNaohiro Aota if (WARN_ON(!ordered)) 1275d8e3fb10SNaohiro Aota return; 1276d8e3fb10SNaohiro Aota 1277d8e3fb10SNaohiro Aota ordered->physical = physical; 1278d8e3fb10SNaohiro Aota ordered->disk = bio->bi_disk; 1279d8e3fb10SNaohiro Aota ordered->partno = bio->bi_partno; 1280d8e3fb10SNaohiro Aota 1281d8e3fb10SNaohiro Aota btrfs_put_ordered_extent(ordered); 1282d8e3fb10SNaohiro Aota } 1283d8e3fb10SNaohiro Aota 1284d8e3fb10SNaohiro Aota void btrfs_rewrite_logical_zoned(struct btrfs_ordered_extent *ordered) 1285d8e3fb10SNaohiro Aota { 1286d8e3fb10SNaohiro Aota struct btrfs_inode *inode = BTRFS_I(ordered->inode); 1287d8e3fb10SNaohiro Aota struct btrfs_fs_info *fs_info = inode->root->fs_info; 1288d8e3fb10SNaohiro Aota struct extent_map_tree *em_tree; 1289d8e3fb10SNaohiro Aota struct extent_map *em; 1290d8e3fb10SNaohiro Aota struct btrfs_ordered_sum *sum; 1291d8e3fb10SNaohiro Aota struct block_device *bdev; 1292d8e3fb10SNaohiro Aota u64 orig_logical = ordered->disk_bytenr; 1293d8e3fb10SNaohiro Aota u64 *logical = NULL; 1294d8e3fb10SNaohiro Aota int nr, stripe_len; 1295d8e3fb10SNaohiro Aota 1296d8e3fb10SNaohiro Aota /* Zoned devices should not have partitions. So, we can assume it is 0 */ 1297d8e3fb10SNaohiro Aota ASSERT(ordered->partno == 0); 1298d8e3fb10SNaohiro Aota bdev = bdgrab(ordered->disk->part0); 1299d8e3fb10SNaohiro Aota if (WARN_ON(!bdev)) 1300d8e3fb10SNaohiro Aota return; 1301d8e3fb10SNaohiro Aota 1302d8e3fb10SNaohiro Aota if (WARN_ON(btrfs_rmap_block(fs_info, orig_logical, bdev, 1303d8e3fb10SNaohiro Aota ordered->physical, &logical, &nr, 1304d8e3fb10SNaohiro Aota &stripe_len))) 1305d8e3fb10SNaohiro Aota goto out; 1306d8e3fb10SNaohiro Aota 1307d8e3fb10SNaohiro Aota WARN_ON(nr != 1); 1308d8e3fb10SNaohiro Aota 1309d8e3fb10SNaohiro Aota if (orig_logical == *logical) 1310d8e3fb10SNaohiro Aota goto out; 1311d8e3fb10SNaohiro Aota 1312d8e3fb10SNaohiro Aota ordered->disk_bytenr = *logical; 1313d8e3fb10SNaohiro Aota 1314d8e3fb10SNaohiro Aota em_tree = &inode->extent_tree; 1315d8e3fb10SNaohiro Aota write_lock(&em_tree->lock); 1316d8e3fb10SNaohiro Aota em = search_extent_mapping(em_tree, ordered->file_offset, 1317d8e3fb10SNaohiro Aota ordered->num_bytes); 1318d8e3fb10SNaohiro Aota em->block_start = *logical; 1319d8e3fb10SNaohiro Aota free_extent_map(em); 1320d8e3fb10SNaohiro Aota write_unlock(&em_tree->lock); 1321d8e3fb10SNaohiro Aota 1322d8e3fb10SNaohiro Aota list_for_each_entry(sum, &ordered->list, list) { 1323d8e3fb10SNaohiro Aota if (*logical < orig_logical) 1324d8e3fb10SNaohiro Aota sum->bytenr -= orig_logical - *logical; 1325d8e3fb10SNaohiro Aota else 1326d8e3fb10SNaohiro Aota sum->bytenr += *logical - orig_logical; 1327d8e3fb10SNaohiro Aota } 1328d8e3fb10SNaohiro Aota 1329d8e3fb10SNaohiro Aota out: 1330d8e3fb10SNaohiro Aota kfree(logical); 1331d8e3fb10SNaohiro Aota bdput(bdev); 1332d8e3fb10SNaohiro Aota } 13330bc09ca1SNaohiro Aota 13340bc09ca1SNaohiro Aota bool btrfs_check_meta_write_pointer(struct btrfs_fs_info *fs_info, 13350bc09ca1SNaohiro Aota struct extent_buffer *eb, 13360bc09ca1SNaohiro Aota struct btrfs_block_group **cache_ret) 13370bc09ca1SNaohiro Aota { 13380bc09ca1SNaohiro Aota struct btrfs_block_group *cache; 13390bc09ca1SNaohiro Aota bool ret = true; 13400bc09ca1SNaohiro Aota 13410bc09ca1SNaohiro Aota if (!btrfs_is_zoned(fs_info)) 13420bc09ca1SNaohiro Aota return true; 13430bc09ca1SNaohiro Aota 13440bc09ca1SNaohiro Aota cache = *cache_ret; 13450bc09ca1SNaohiro Aota 13460bc09ca1SNaohiro Aota if (cache && (eb->start < cache->start || 13470bc09ca1SNaohiro Aota cache->start + cache->length <= eb->start)) { 13480bc09ca1SNaohiro Aota btrfs_put_block_group(cache); 13490bc09ca1SNaohiro Aota cache = NULL; 13500bc09ca1SNaohiro Aota *cache_ret = NULL; 13510bc09ca1SNaohiro Aota } 13520bc09ca1SNaohiro Aota 13530bc09ca1SNaohiro Aota if (!cache) 13540bc09ca1SNaohiro Aota cache = btrfs_lookup_block_group(fs_info, eb->start); 13550bc09ca1SNaohiro Aota 13560bc09ca1SNaohiro Aota if (cache) { 13570bc09ca1SNaohiro Aota if (cache->meta_write_pointer != eb->start) { 13580bc09ca1SNaohiro Aota btrfs_put_block_group(cache); 13590bc09ca1SNaohiro Aota cache = NULL; 13600bc09ca1SNaohiro Aota ret = false; 13610bc09ca1SNaohiro Aota } else { 13620bc09ca1SNaohiro Aota cache->meta_write_pointer = eb->start + eb->len; 13630bc09ca1SNaohiro Aota } 13640bc09ca1SNaohiro Aota 13650bc09ca1SNaohiro Aota *cache_ret = cache; 13660bc09ca1SNaohiro Aota } 13670bc09ca1SNaohiro Aota 13680bc09ca1SNaohiro Aota return ret; 13690bc09ca1SNaohiro Aota } 13700bc09ca1SNaohiro Aota 13710bc09ca1SNaohiro Aota void btrfs_revert_meta_write_pointer(struct btrfs_block_group *cache, 13720bc09ca1SNaohiro Aota struct extent_buffer *eb) 13730bc09ca1SNaohiro Aota { 13740bc09ca1SNaohiro Aota if (!btrfs_is_zoned(eb->fs_info) || !cache) 13750bc09ca1SNaohiro Aota return; 13760bc09ca1SNaohiro Aota 13770bc09ca1SNaohiro Aota ASSERT(cache->meta_write_pointer == eb->start + eb->len); 13780bc09ca1SNaohiro Aota cache->meta_write_pointer = eb->start; 13790bc09ca1SNaohiro Aota } 1380de17addcSNaohiro Aota 1381de17addcSNaohiro Aota int btrfs_zoned_issue_zeroout(struct btrfs_device *device, u64 physical, u64 length) 1382de17addcSNaohiro Aota { 1383de17addcSNaohiro Aota if (!btrfs_dev_is_sequential(device, physical)) 1384de17addcSNaohiro Aota return -EOPNOTSUPP; 1385de17addcSNaohiro Aota 1386de17addcSNaohiro Aota return blkdev_issue_zeroout(device->bdev, physical >> SECTOR_SHIFT, 1387de17addcSNaohiro Aota length >> SECTOR_SHIFT, GFP_NOFS, 0); 1388de17addcSNaohiro Aota } 13897db1c5d1SNaohiro Aota 13907db1c5d1SNaohiro Aota static int read_zone_info(struct btrfs_fs_info *fs_info, u64 logical, 13917db1c5d1SNaohiro Aota struct blk_zone *zone) 13927db1c5d1SNaohiro Aota { 13937db1c5d1SNaohiro Aota struct btrfs_bio *bbio = NULL; 13947db1c5d1SNaohiro Aota u64 mapped_length = PAGE_SIZE; 13957db1c5d1SNaohiro Aota unsigned int nofs_flag; 13967db1c5d1SNaohiro Aota int nmirrors; 13977db1c5d1SNaohiro Aota int i, ret; 13987db1c5d1SNaohiro Aota 13997db1c5d1SNaohiro Aota ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical, 14007db1c5d1SNaohiro Aota &mapped_length, &bbio); 14017db1c5d1SNaohiro Aota if (ret || !bbio || mapped_length < PAGE_SIZE) { 14027db1c5d1SNaohiro Aota btrfs_put_bbio(bbio); 14037db1c5d1SNaohiro Aota return -EIO; 14047db1c5d1SNaohiro Aota } 14057db1c5d1SNaohiro Aota 14067db1c5d1SNaohiro Aota if (bbio->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) 14077db1c5d1SNaohiro Aota return -EINVAL; 14087db1c5d1SNaohiro Aota 14097db1c5d1SNaohiro Aota nofs_flag = memalloc_nofs_save(); 14107db1c5d1SNaohiro Aota nmirrors = (int)bbio->num_stripes; 14117db1c5d1SNaohiro Aota for (i = 0; i < nmirrors; i++) { 14127db1c5d1SNaohiro Aota u64 physical = bbio->stripes[i].physical; 14137db1c5d1SNaohiro Aota struct btrfs_device *dev = bbio->stripes[i].dev; 14147db1c5d1SNaohiro Aota 14157db1c5d1SNaohiro Aota /* Missing device */ 14167db1c5d1SNaohiro Aota if (!dev->bdev) 14177db1c5d1SNaohiro Aota continue; 14187db1c5d1SNaohiro Aota 14197db1c5d1SNaohiro Aota ret = btrfs_get_dev_zone(dev, physical, zone); 14207db1c5d1SNaohiro Aota /* Failing device */ 14217db1c5d1SNaohiro Aota if (ret == -EIO || ret == -EOPNOTSUPP) 14227db1c5d1SNaohiro Aota continue; 14237db1c5d1SNaohiro Aota break; 14247db1c5d1SNaohiro Aota } 14257db1c5d1SNaohiro Aota memalloc_nofs_restore(nofs_flag); 14267db1c5d1SNaohiro Aota 14277db1c5d1SNaohiro Aota return ret; 14287db1c5d1SNaohiro Aota } 14297db1c5d1SNaohiro Aota 14307db1c5d1SNaohiro Aota /* 14317db1c5d1SNaohiro Aota * Synchronize write pointer in a zone at @physical_start on @tgt_dev, by 14327db1c5d1SNaohiro Aota * filling zeros between @physical_pos to a write pointer of dev-replace 14337db1c5d1SNaohiro Aota * source device. 14347db1c5d1SNaohiro Aota */ 14357db1c5d1SNaohiro Aota int btrfs_sync_zone_write_pointer(struct btrfs_device *tgt_dev, u64 logical, 14367db1c5d1SNaohiro Aota u64 physical_start, u64 physical_pos) 14377db1c5d1SNaohiro Aota { 14387db1c5d1SNaohiro Aota struct btrfs_fs_info *fs_info = tgt_dev->fs_info; 14397db1c5d1SNaohiro Aota struct blk_zone zone; 14407db1c5d1SNaohiro Aota u64 length; 14417db1c5d1SNaohiro Aota u64 wp; 14427db1c5d1SNaohiro Aota int ret; 14437db1c5d1SNaohiro Aota 14447db1c5d1SNaohiro Aota if (!btrfs_dev_is_sequential(tgt_dev, physical_pos)) 14457db1c5d1SNaohiro Aota return 0; 14467db1c5d1SNaohiro Aota 14477db1c5d1SNaohiro Aota ret = read_zone_info(fs_info, logical, &zone); 14487db1c5d1SNaohiro Aota if (ret) 14497db1c5d1SNaohiro Aota return ret; 14507db1c5d1SNaohiro Aota 14517db1c5d1SNaohiro Aota wp = physical_start + ((zone.wp - zone.start) << SECTOR_SHIFT); 14527db1c5d1SNaohiro Aota 14537db1c5d1SNaohiro Aota if (physical_pos == wp) 14547db1c5d1SNaohiro Aota return 0; 14557db1c5d1SNaohiro Aota 14567db1c5d1SNaohiro Aota if (physical_pos > wp) 14577db1c5d1SNaohiro Aota return -EUCLEAN; 14587db1c5d1SNaohiro Aota 14597db1c5d1SNaohiro Aota length = wp - physical_pos; 14607db1c5d1SNaohiro Aota return btrfs_zoned_issue_zeroout(tgt_dev, physical_pos, length); 14617db1c5d1SNaohiro Aota } 1462