15b316468SNaohiro Aota // SPDX-License-Identifier: GPL-2.0 25b316468SNaohiro Aota 35b316468SNaohiro Aota #include <linux/slab.h> 45b316468SNaohiro Aota #include <linux/blkdev.h> 55b316468SNaohiro Aota #include "ctree.h" 65b316468SNaohiro Aota #include "volumes.h" 75b316468SNaohiro Aota #include "zoned.h" 85b316468SNaohiro Aota #include "rcu-string.h" 95b316468SNaohiro Aota 105b316468SNaohiro Aota /* Maximum number of zones to report per blkdev_report_zones() call */ 115b316468SNaohiro Aota #define BTRFS_REPORT_NR_ZONES 4096 125b316468SNaohiro Aota 135b316468SNaohiro Aota static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) 145b316468SNaohiro Aota { 155b316468SNaohiro Aota struct blk_zone *zones = data; 165b316468SNaohiro Aota 175b316468SNaohiro Aota memcpy(&zones[idx], zone, sizeof(*zone)); 185b316468SNaohiro Aota 195b316468SNaohiro Aota return 0; 205b316468SNaohiro Aota } 215b316468SNaohiro Aota 225b316468SNaohiro Aota static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos, 235b316468SNaohiro Aota struct blk_zone *zones, unsigned int *nr_zones) 245b316468SNaohiro Aota { 255b316468SNaohiro Aota int ret; 265b316468SNaohiro Aota 275b316468SNaohiro Aota if (!*nr_zones) 285b316468SNaohiro Aota return 0; 295b316468SNaohiro Aota 305b316468SNaohiro Aota ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones, 315b316468SNaohiro Aota copy_zone_info_cb, zones); 325b316468SNaohiro Aota if (ret < 0) { 335b316468SNaohiro Aota btrfs_err_in_rcu(device->fs_info, 345b316468SNaohiro Aota "zoned: failed to read zone %llu on %s (devid %llu)", 355b316468SNaohiro Aota pos, rcu_str_deref(device->name), 365b316468SNaohiro Aota device->devid); 375b316468SNaohiro Aota return ret; 385b316468SNaohiro Aota } 395b316468SNaohiro Aota *nr_zones = ret; 405b316468SNaohiro Aota if (!ret) 415b316468SNaohiro Aota return -EIO; 425b316468SNaohiro Aota 435b316468SNaohiro Aota return 0; 445b316468SNaohiro Aota } 455b316468SNaohiro Aota 465b316468SNaohiro Aota int btrfs_get_dev_zone_info(struct btrfs_device *device) 475b316468SNaohiro Aota { 485b316468SNaohiro Aota struct btrfs_zoned_device_info *zone_info = NULL; 495b316468SNaohiro Aota struct block_device *bdev = device->bdev; 50862931c7SNaohiro Aota struct request_queue *queue = bdev_get_queue(bdev); 515b316468SNaohiro Aota sector_t nr_sectors; 525b316468SNaohiro Aota sector_t sector = 0; 535b316468SNaohiro Aota struct blk_zone *zones = NULL; 545b316468SNaohiro Aota unsigned int i, nreported = 0, nr_zones; 555b316468SNaohiro Aota unsigned int zone_sectors; 565b316468SNaohiro Aota int ret; 575b316468SNaohiro Aota 585b316468SNaohiro Aota if (!bdev_is_zoned(bdev)) 595b316468SNaohiro Aota return 0; 605b316468SNaohiro Aota 615b316468SNaohiro Aota if (device->zone_info) 625b316468SNaohiro Aota return 0; 635b316468SNaohiro Aota 645b316468SNaohiro Aota zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL); 655b316468SNaohiro Aota if (!zone_info) 665b316468SNaohiro Aota return -ENOMEM; 675b316468SNaohiro Aota 685b316468SNaohiro Aota nr_sectors = bdev->bd_part->nr_sects; 695b316468SNaohiro Aota zone_sectors = bdev_zone_sectors(bdev); 705b316468SNaohiro Aota /* Check if it's power of 2 (see is_power_of_2) */ 715b316468SNaohiro Aota ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); 725b316468SNaohiro Aota zone_info->zone_size = zone_sectors << SECTOR_SHIFT; 735b316468SNaohiro Aota zone_info->zone_size_shift = ilog2(zone_info->zone_size); 74862931c7SNaohiro Aota zone_info->max_zone_append_size = 75862931c7SNaohiro Aota (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT; 765b316468SNaohiro Aota zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors); 775b316468SNaohiro Aota if (!IS_ALIGNED(nr_sectors, zone_sectors)) 785b316468SNaohiro Aota zone_info->nr_zones++; 795b316468SNaohiro Aota 805b316468SNaohiro Aota zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); 815b316468SNaohiro Aota if (!zone_info->seq_zones) { 825b316468SNaohiro Aota ret = -ENOMEM; 835b316468SNaohiro Aota goto out; 845b316468SNaohiro Aota } 855b316468SNaohiro Aota 865b316468SNaohiro Aota zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL); 875b316468SNaohiro Aota if (!zone_info->empty_zones) { 885b316468SNaohiro Aota ret = -ENOMEM; 895b316468SNaohiro Aota goto out; 905b316468SNaohiro Aota } 915b316468SNaohiro Aota 925b316468SNaohiro Aota zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL); 935b316468SNaohiro Aota if (!zones) { 945b316468SNaohiro Aota ret = -ENOMEM; 955b316468SNaohiro Aota goto out; 965b316468SNaohiro Aota } 975b316468SNaohiro Aota 985b316468SNaohiro Aota /* Get zones type */ 995b316468SNaohiro Aota while (sector < nr_sectors) { 1005b316468SNaohiro Aota nr_zones = BTRFS_REPORT_NR_ZONES; 1015b316468SNaohiro Aota ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones, 1025b316468SNaohiro Aota &nr_zones); 1035b316468SNaohiro Aota if (ret) 1045b316468SNaohiro Aota goto out; 1055b316468SNaohiro Aota 1065b316468SNaohiro Aota for (i = 0; i < nr_zones; i++) { 1075b316468SNaohiro Aota if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ) 1085b316468SNaohiro Aota __set_bit(nreported, zone_info->seq_zones); 1095b316468SNaohiro Aota if (zones[i].cond == BLK_ZONE_COND_EMPTY) 1105b316468SNaohiro Aota __set_bit(nreported, zone_info->empty_zones); 1115b316468SNaohiro Aota nreported++; 1125b316468SNaohiro Aota } 1135b316468SNaohiro Aota sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len; 1145b316468SNaohiro Aota } 1155b316468SNaohiro Aota 1165b316468SNaohiro Aota if (nreported != zone_info->nr_zones) { 1175b316468SNaohiro Aota btrfs_err_in_rcu(device->fs_info, 1185b316468SNaohiro Aota "inconsistent number of zones on %s (%u/%u)", 1195b316468SNaohiro Aota rcu_str_deref(device->name), nreported, 1205b316468SNaohiro Aota zone_info->nr_zones); 1215b316468SNaohiro Aota ret = -EIO; 1225b316468SNaohiro Aota goto out; 1235b316468SNaohiro Aota } 1245b316468SNaohiro Aota 1255b316468SNaohiro Aota kfree(zones); 1265b316468SNaohiro Aota 1275b316468SNaohiro Aota device->zone_info = zone_info; 1285b316468SNaohiro Aota 1295b316468SNaohiro Aota /* device->fs_info is not safe to use for printing messages */ 1305b316468SNaohiro Aota btrfs_info_in_rcu(NULL, 1315b316468SNaohiro Aota "host-%s zoned block device %s, %u zones of %llu bytes", 1325b316468SNaohiro Aota bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware", 1335b316468SNaohiro Aota rcu_str_deref(device->name), zone_info->nr_zones, 1345b316468SNaohiro Aota zone_info->zone_size); 1355b316468SNaohiro Aota 1365b316468SNaohiro Aota return 0; 1375b316468SNaohiro Aota 1385b316468SNaohiro Aota out: 1395b316468SNaohiro Aota kfree(zones); 1405b316468SNaohiro Aota bitmap_free(zone_info->empty_zones); 1415b316468SNaohiro Aota bitmap_free(zone_info->seq_zones); 1425b316468SNaohiro Aota kfree(zone_info); 1435b316468SNaohiro Aota 1445b316468SNaohiro Aota return ret; 1455b316468SNaohiro Aota } 1465b316468SNaohiro Aota 1475b316468SNaohiro Aota void btrfs_destroy_dev_zone_info(struct btrfs_device *device) 1485b316468SNaohiro Aota { 1495b316468SNaohiro Aota struct btrfs_zoned_device_info *zone_info = device->zone_info; 1505b316468SNaohiro Aota 1515b316468SNaohiro Aota if (!zone_info) 1525b316468SNaohiro Aota return; 1535b316468SNaohiro Aota 1545b316468SNaohiro Aota bitmap_free(zone_info->seq_zones); 1555b316468SNaohiro Aota bitmap_free(zone_info->empty_zones); 1565b316468SNaohiro Aota kfree(zone_info); 1575b316468SNaohiro Aota device->zone_info = NULL; 1585b316468SNaohiro Aota } 1595b316468SNaohiro Aota 1605b316468SNaohiro Aota int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos, 1615b316468SNaohiro Aota struct blk_zone *zone) 1625b316468SNaohiro Aota { 1635b316468SNaohiro Aota unsigned int nr_zones = 1; 1645b316468SNaohiro Aota int ret; 1655b316468SNaohiro Aota 1665b316468SNaohiro Aota ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones); 1675b316468SNaohiro Aota if (ret != 0 || !nr_zones) 1685b316468SNaohiro Aota return ret ? ret : -EIO; 1695b316468SNaohiro Aota 1705b316468SNaohiro Aota return 0; 1715b316468SNaohiro Aota } 172b70f5097SNaohiro Aota 173b70f5097SNaohiro Aota int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info) 174b70f5097SNaohiro Aota { 175b70f5097SNaohiro Aota struct btrfs_fs_devices *fs_devices = fs_info->fs_devices; 176b70f5097SNaohiro Aota struct btrfs_device *device; 177b70f5097SNaohiro Aota u64 zoned_devices = 0; 178b70f5097SNaohiro Aota u64 nr_devices = 0; 179b70f5097SNaohiro Aota u64 zone_size = 0; 180862931c7SNaohiro Aota u64 max_zone_append_size = 0; 181b70f5097SNaohiro Aota const bool incompat_zoned = btrfs_is_zoned(fs_info); 182b70f5097SNaohiro Aota int ret = 0; 183b70f5097SNaohiro Aota 184b70f5097SNaohiro Aota /* Count zoned devices */ 185b70f5097SNaohiro Aota list_for_each_entry(device, &fs_devices->devices, dev_list) { 186b70f5097SNaohiro Aota enum blk_zoned_model model; 187b70f5097SNaohiro Aota 188b70f5097SNaohiro Aota if (!device->bdev) 189b70f5097SNaohiro Aota continue; 190b70f5097SNaohiro Aota 191b70f5097SNaohiro Aota model = bdev_zoned_model(device->bdev); 192b70f5097SNaohiro Aota if (model == BLK_ZONED_HM || 193b70f5097SNaohiro Aota (model == BLK_ZONED_HA && incompat_zoned)) { 194862931c7SNaohiro Aota struct btrfs_zoned_device_info *zone_info; 195862931c7SNaohiro Aota 196862931c7SNaohiro Aota zone_info = device->zone_info; 197b70f5097SNaohiro Aota zoned_devices++; 198b70f5097SNaohiro Aota if (!zone_size) { 199862931c7SNaohiro Aota zone_size = zone_info->zone_size; 200862931c7SNaohiro Aota } else if (zone_info->zone_size != zone_size) { 201b70f5097SNaohiro Aota btrfs_err(fs_info, 202b70f5097SNaohiro Aota "zoned: unequal block device zone sizes: have %llu found %llu", 203b70f5097SNaohiro Aota device->zone_info->zone_size, 204b70f5097SNaohiro Aota zone_size); 205b70f5097SNaohiro Aota ret = -EINVAL; 206b70f5097SNaohiro Aota goto out; 207b70f5097SNaohiro Aota } 208862931c7SNaohiro Aota if (!max_zone_append_size || 209862931c7SNaohiro Aota (zone_info->max_zone_append_size && 210862931c7SNaohiro Aota zone_info->max_zone_append_size < max_zone_append_size)) 211862931c7SNaohiro Aota max_zone_append_size = 212862931c7SNaohiro Aota zone_info->max_zone_append_size; 213b70f5097SNaohiro Aota } 214b70f5097SNaohiro Aota nr_devices++; 215b70f5097SNaohiro Aota } 216b70f5097SNaohiro Aota 217b70f5097SNaohiro Aota if (!zoned_devices && !incompat_zoned) 218b70f5097SNaohiro Aota goto out; 219b70f5097SNaohiro Aota 220b70f5097SNaohiro Aota if (!zoned_devices && incompat_zoned) { 221b70f5097SNaohiro Aota /* No zoned block device found on ZONED filesystem */ 222b70f5097SNaohiro Aota btrfs_err(fs_info, 223b70f5097SNaohiro Aota "zoned: no zoned devices found on a zoned filesystem"); 224b70f5097SNaohiro Aota ret = -EINVAL; 225b70f5097SNaohiro Aota goto out; 226b70f5097SNaohiro Aota } 227b70f5097SNaohiro Aota 228b70f5097SNaohiro Aota if (zoned_devices && !incompat_zoned) { 229b70f5097SNaohiro Aota btrfs_err(fs_info, 230b70f5097SNaohiro Aota "zoned: mode not enabled but zoned device found"); 231b70f5097SNaohiro Aota ret = -EINVAL; 232b70f5097SNaohiro Aota goto out; 233b70f5097SNaohiro Aota } 234b70f5097SNaohiro Aota 235b70f5097SNaohiro Aota if (zoned_devices != nr_devices) { 236b70f5097SNaohiro Aota btrfs_err(fs_info, 237b70f5097SNaohiro Aota "zoned: cannot mix zoned and regular devices"); 238b70f5097SNaohiro Aota ret = -EINVAL; 239b70f5097SNaohiro Aota goto out; 240b70f5097SNaohiro Aota } 241b70f5097SNaohiro Aota 242b70f5097SNaohiro Aota /* 243b70f5097SNaohiro Aota * stripe_size is always aligned to BTRFS_STRIPE_LEN in 244b70f5097SNaohiro Aota * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size, 245b70f5097SNaohiro Aota * check the alignment here. 246b70f5097SNaohiro Aota */ 247b70f5097SNaohiro Aota if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) { 248b70f5097SNaohiro Aota btrfs_err(fs_info, 249b70f5097SNaohiro Aota "zoned: zone size %llu not aligned to stripe %u", 250b70f5097SNaohiro Aota zone_size, BTRFS_STRIPE_LEN); 251b70f5097SNaohiro Aota ret = -EINVAL; 252b70f5097SNaohiro Aota goto out; 253b70f5097SNaohiro Aota } 254b70f5097SNaohiro Aota 255*a589dde0SNaohiro Aota if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) { 256*a589dde0SNaohiro Aota btrfs_err(fs_info, "zoned: mixed block groups not supported"); 257*a589dde0SNaohiro Aota ret = -EINVAL; 258*a589dde0SNaohiro Aota goto out; 259*a589dde0SNaohiro Aota } 260*a589dde0SNaohiro Aota 261b70f5097SNaohiro Aota fs_info->zone_size = zone_size; 262862931c7SNaohiro Aota fs_info->max_zone_append_size = max_zone_append_size; 263b70f5097SNaohiro Aota 264b70f5097SNaohiro Aota btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size); 265b70f5097SNaohiro Aota out: 266b70f5097SNaohiro Aota return ret; 267b70f5097SNaohiro Aota } 2685d1ab66cSNaohiro Aota 2695d1ab66cSNaohiro Aota int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info) 2705d1ab66cSNaohiro Aota { 2715d1ab66cSNaohiro Aota if (!btrfs_is_zoned(info)) 2725d1ab66cSNaohiro Aota return 0; 2735d1ab66cSNaohiro Aota 2745d1ab66cSNaohiro Aota /* 2755d1ab66cSNaohiro Aota * Space cache writing is not COWed. Disable that to avoid write errors 2765d1ab66cSNaohiro Aota * in sequential zones. 2775d1ab66cSNaohiro Aota */ 2785d1ab66cSNaohiro Aota if (btrfs_test_opt(info, SPACE_CACHE)) { 2795d1ab66cSNaohiro Aota btrfs_err(info, "zoned: space cache v1 is not supported"); 2805d1ab66cSNaohiro Aota return -EINVAL; 2815d1ab66cSNaohiro Aota } 2825d1ab66cSNaohiro Aota 283d206e9c9SNaohiro Aota if (btrfs_test_opt(info, NODATACOW)) { 284d206e9c9SNaohiro Aota btrfs_err(info, "zoned: NODATACOW not supported"); 285d206e9c9SNaohiro Aota return -EINVAL; 286d206e9c9SNaohiro Aota } 287d206e9c9SNaohiro Aota 2885d1ab66cSNaohiro Aota return 0; 2895d1ab66cSNaohiro Aota } 290