xref: /openbmc/linux/fs/btrfs/zoned.c (revision a589dde0bc0bf5616e92131d803b6046573449e6)
15b316468SNaohiro Aota // SPDX-License-Identifier: GPL-2.0
25b316468SNaohiro Aota 
35b316468SNaohiro Aota #include <linux/slab.h>
45b316468SNaohiro Aota #include <linux/blkdev.h>
55b316468SNaohiro Aota #include "ctree.h"
65b316468SNaohiro Aota #include "volumes.h"
75b316468SNaohiro Aota #include "zoned.h"
85b316468SNaohiro Aota #include "rcu-string.h"
95b316468SNaohiro Aota 
105b316468SNaohiro Aota /* Maximum number of zones to report per blkdev_report_zones() call */
115b316468SNaohiro Aota #define BTRFS_REPORT_NR_ZONES   4096
125b316468SNaohiro Aota 
135b316468SNaohiro Aota static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data)
145b316468SNaohiro Aota {
155b316468SNaohiro Aota 	struct blk_zone *zones = data;
165b316468SNaohiro Aota 
175b316468SNaohiro Aota 	memcpy(&zones[idx], zone, sizeof(*zone));
185b316468SNaohiro Aota 
195b316468SNaohiro Aota 	return 0;
205b316468SNaohiro Aota }
215b316468SNaohiro Aota 
225b316468SNaohiro Aota static int btrfs_get_dev_zones(struct btrfs_device *device, u64 pos,
235b316468SNaohiro Aota 			       struct blk_zone *zones, unsigned int *nr_zones)
245b316468SNaohiro Aota {
255b316468SNaohiro Aota 	int ret;
265b316468SNaohiro Aota 
275b316468SNaohiro Aota 	if (!*nr_zones)
285b316468SNaohiro Aota 		return 0;
295b316468SNaohiro Aota 
305b316468SNaohiro Aota 	ret = blkdev_report_zones(device->bdev, pos >> SECTOR_SHIFT, *nr_zones,
315b316468SNaohiro Aota 				  copy_zone_info_cb, zones);
325b316468SNaohiro Aota 	if (ret < 0) {
335b316468SNaohiro Aota 		btrfs_err_in_rcu(device->fs_info,
345b316468SNaohiro Aota 				 "zoned: failed to read zone %llu on %s (devid %llu)",
355b316468SNaohiro Aota 				 pos, rcu_str_deref(device->name),
365b316468SNaohiro Aota 				 device->devid);
375b316468SNaohiro Aota 		return ret;
385b316468SNaohiro Aota 	}
395b316468SNaohiro Aota 	*nr_zones = ret;
405b316468SNaohiro Aota 	if (!ret)
415b316468SNaohiro Aota 		return -EIO;
425b316468SNaohiro Aota 
435b316468SNaohiro Aota 	return 0;
445b316468SNaohiro Aota }
455b316468SNaohiro Aota 
465b316468SNaohiro Aota int btrfs_get_dev_zone_info(struct btrfs_device *device)
475b316468SNaohiro Aota {
485b316468SNaohiro Aota 	struct btrfs_zoned_device_info *zone_info = NULL;
495b316468SNaohiro Aota 	struct block_device *bdev = device->bdev;
50862931c7SNaohiro Aota 	struct request_queue *queue = bdev_get_queue(bdev);
515b316468SNaohiro Aota 	sector_t nr_sectors;
525b316468SNaohiro Aota 	sector_t sector = 0;
535b316468SNaohiro Aota 	struct blk_zone *zones = NULL;
545b316468SNaohiro Aota 	unsigned int i, nreported = 0, nr_zones;
555b316468SNaohiro Aota 	unsigned int zone_sectors;
565b316468SNaohiro Aota 	int ret;
575b316468SNaohiro Aota 
585b316468SNaohiro Aota 	if (!bdev_is_zoned(bdev))
595b316468SNaohiro Aota 		return 0;
605b316468SNaohiro Aota 
615b316468SNaohiro Aota 	if (device->zone_info)
625b316468SNaohiro Aota 		return 0;
635b316468SNaohiro Aota 
645b316468SNaohiro Aota 	zone_info = kzalloc(sizeof(*zone_info), GFP_KERNEL);
655b316468SNaohiro Aota 	if (!zone_info)
665b316468SNaohiro Aota 		return -ENOMEM;
675b316468SNaohiro Aota 
685b316468SNaohiro Aota 	nr_sectors = bdev->bd_part->nr_sects;
695b316468SNaohiro Aota 	zone_sectors = bdev_zone_sectors(bdev);
705b316468SNaohiro Aota 	/* Check if it's power of 2 (see is_power_of_2) */
715b316468SNaohiro Aota 	ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0);
725b316468SNaohiro Aota 	zone_info->zone_size = zone_sectors << SECTOR_SHIFT;
735b316468SNaohiro Aota 	zone_info->zone_size_shift = ilog2(zone_info->zone_size);
74862931c7SNaohiro Aota 	zone_info->max_zone_append_size =
75862931c7SNaohiro Aota 		(u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT;
765b316468SNaohiro Aota 	zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
775b316468SNaohiro Aota 	if (!IS_ALIGNED(nr_sectors, zone_sectors))
785b316468SNaohiro Aota 		zone_info->nr_zones++;
795b316468SNaohiro Aota 
805b316468SNaohiro Aota 	zone_info->seq_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
815b316468SNaohiro Aota 	if (!zone_info->seq_zones) {
825b316468SNaohiro Aota 		ret = -ENOMEM;
835b316468SNaohiro Aota 		goto out;
845b316468SNaohiro Aota 	}
855b316468SNaohiro Aota 
865b316468SNaohiro Aota 	zone_info->empty_zones = bitmap_zalloc(zone_info->nr_zones, GFP_KERNEL);
875b316468SNaohiro Aota 	if (!zone_info->empty_zones) {
885b316468SNaohiro Aota 		ret = -ENOMEM;
895b316468SNaohiro Aota 		goto out;
905b316468SNaohiro Aota 	}
915b316468SNaohiro Aota 
925b316468SNaohiro Aota 	zones = kcalloc(BTRFS_REPORT_NR_ZONES, sizeof(struct blk_zone), GFP_KERNEL);
935b316468SNaohiro Aota 	if (!zones) {
945b316468SNaohiro Aota 		ret = -ENOMEM;
955b316468SNaohiro Aota 		goto out;
965b316468SNaohiro Aota 	}
975b316468SNaohiro Aota 
985b316468SNaohiro Aota 	/* Get zones type */
995b316468SNaohiro Aota 	while (sector < nr_sectors) {
1005b316468SNaohiro Aota 		nr_zones = BTRFS_REPORT_NR_ZONES;
1015b316468SNaohiro Aota 		ret = btrfs_get_dev_zones(device, sector << SECTOR_SHIFT, zones,
1025b316468SNaohiro Aota 					  &nr_zones);
1035b316468SNaohiro Aota 		if (ret)
1045b316468SNaohiro Aota 			goto out;
1055b316468SNaohiro Aota 
1065b316468SNaohiro Aota 		for (i = 0; i < nr_zones; i++) {
1075b316468SNaohiro Aota 			if (zones[i].type == BLK_ZONE_TYPE_SEQWRITE_REQ)
1085b316468SNaohiro Aota 				__set_bit(nreported, zone_info->seq_zones);
1095b316468SNaohiro Aota 			if (zones[i].cond == BLK_ZONE_COND_EMPTY)
1105b316468SNaohiro Aota 				__set_bit(nreported, zone_info->empty_zones);
1115b316468SNaohiro Aota 			nreported++;
1125b316468SNaohiro Aota 		}
1135b316468SNaohiro Aota 		sector = zones[nr_zones - 1].start + zones[nr_zones - 1].len;
1145b316468SNaohiro Aota 	}
1155b316468SNaohiro Aota 
1165b316468SNaohiro Aota 	if (nreported != zone_info->nr_zones) {
1175b316468SNaohiro Aota 		btrfs_err_in_rcu(device->fs_info,
1185b316468SNaohiro Aota 				 "inconsistent number of zones on %s (%u/%u)",
1195b316468SNaohiro Aota 				 rcu_str_deref(device->name), nreported,
1205b316468SNaohiro Aota 				 zone_info->nr_zones);
1215b316468SNaohiro Aota 		ret = -EIO;
1225b316468SNaohiro Aota 		goto out;
1235b316468SNaohiro Aota 	}
1245b316468SNaohiro Aota 
1255b316468SNaohiro Aota 	kfree(zones);
1265b316468SNaohiro Aota 
1275b316468SNaohiro Aota 	device->zone_info = zone_info;
1285b316468SNaohiro Aota 
1295b316468SNaohiro Aota 	/* device->fs_info is not safe to use for printing messages */
1305b316468SNaohiro Aota 	btrfs_info_in_rcu(NULL,
1315b316468SNaohiro Aota 			"host-%s zoned block device %s, %u zones of %llu bytes",
1325b316468SNaohiro Aota 			bdev_zoned_model(bdev) == BLK_ZONED_HM ? "managed" : "aware",
1335b316468SNaohiro Aota 			rcu_str_deref(device->name), zone_info->nr_zones,
1345b316468SNaohiro Aota 			zone_info->zone_size);
1355b316468SNaohiro Aota 
1365b316468SNaohiro Aota 	return 0;
1375b316468SNaohiro Aota 
1385b316468SNaohiro Aota out:
1395b316468SNaohiro Aota 	kfree(zones);
1405b316468SNaohiro Aota 	bitmap_free(zone_info->empty_zones);
1415b316468SNaohiro Aota 	bitmap_free(zone_info->seq_zones);
1425b316468SNaohiro Aota 	kfree(zone_info);
1435b316468SNaohiro Aota 
1445b316468SNaohiro Aota 	return ret;
1455b316468SNaohiro Aota }
1465b316468SNaohiro Aota 
1475b316468SNaohiro Aota void btrfs_destroy_dev_zone_info(struct btrfs_device *device)
1485b316468SNaohiro Aota {
1495b316468SNaohiro Aota 	struct btrfs_zoned_device_info *zone_info = device->zone_info;
1505b316468SNaohiro Aota 
1515b316468SNaohiro Aota 	if (!zone_info)
1525b316468SNaohiro Aota 		return;
1535b316468SNaohiro Aota 
1545b316468SNaohiro Aota 	bitmap_free(zone_info->seq_zones);
1555b316468SNaohiro Aota 	bitmap_free(zone_info->empty_zones);
1565b316468SNaohiro Aota 	kfree(zone_info);
1575b316468SNaohiro Aota 	device->zone_info = NULL;
1585b316468SNaohiro Aota }
1595b316468SNaohiro Aota 
1605b316468SNaohiro Aota int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
1615b316468SNaohiro Aota 		       struct blk_zone *zone)
1625b316468SNaohiro Aota {
1635b316468SNaohiro Aota 	unsigned int nr_zones = 1;
1645b316468SNaohiro Aota 	int ret;
1655b316468SNaohiro Aota 
1665b316468SNaohiro Aota 	ret = btrfs_get_dev_zones(device, pos, zone, &nr_zones);
1675b316468SNaohiro Aota 	if (ret != 0 || !nr_zones)
1685b316468SNaohiro Aota 		return ret ? ret : -EIO;
1695b316468SNaohiro Aota 
1705b316468SNaohiro Aota 	return 0;
1715b316468SNaohiro Aota }
172b70f5097SNaohiro Aota 
173b70f5097SNaohiro Aota int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
174b70f5097SNaohiro Aota {
175b70f5097SNaohiro Aota 	struct btrfs_fs_devices *fs_devices = fs_info->fs_devices;
176b70f5097SNaohiro Aota 	struct btrfs_device *device;
177b70f5097SNaohiro Aota 	u64 zoned_devices = 0;
178b70f5097SNaohiro Aota 	u64 nr_devices = 0;
179b70f5097SNaohiro Aota 	u64 zone_size = 0;
180862931c7SNaohiro Aota 	u64 max_zone_append_size = 0;
181b70f5097SNaohiro Aota 	const bool incompat_zoned = btrfs_is_zoned(fs_info);
182b70f5097SNaohiro Aota 	int ret = 0;
183b70f5097SNaohiro Aota 
184b70f5097SNaohiro Aota 	/* Count zoned devices */
185b70f5097SNaohiro Aota 	list_for_each_entry(device, &fs_devices->devices, dev_list) {
186b70f5097SNaohiro Aota 		enum blk_zoned_model model;
187b70f5097SNaohiro Aota 
188b70f5097SNaohiro Aota 		if (!device->bdev)
189b70f5097SNaohiro Aota 			continue;
190b70f5097SNaohiro Aota 
191b70f5097SNaohiro Aota 		model = bdev_zoned_model(device->bdev);
192b70f5097SNaohiro Aota 		if (model == BLK_ZONED_HM ||
193b70f5097SNaohiro Aota 		    (model == BLK_ZONED_HA && incompat_zoned)) {
194862931c7SNaohiro Aota 			struct btrfs_zoned_device_info *zone_info;
195862931c7SNaohiro Aota 
196862931c7SNaohiro Aota 			zone_info = device->zone_info;
197b70f5097SNaohiro Aota 			zoned_devices++;
198b70f5097SNaohiro Aota 			if (!zone_size) {
199862931c7SNaohiro Aota 				zone_size = zone_info->zone_size;
200862931c7SNaohiro Aota 			} else if (zone_info->zone_size != zone_size) {
201b70f5097SNaohiro Aota 				btrfs_err(fs_info,
202b70f5097SNaohiro Aota 		"zoned: unequal block device zone sizes: have %llu found %llu",
203b70f5097SNaohiro Aota 					  device->zone_info->zone_size,
204b70f5097SNaohiro Aota 					  zone_size);
205b70f5097SNaohiro Aota 				ret = -EINVAL;
206b70f5097SNaohiro Aota 				goto out;
207b70f5097SNaohiro Aota 			}
208862931c7SNaohiro Aota 			if (!max_zone_append_size ||
209862931c7SNaohiro Aota 			    (zone_info->max_zone_append_size &&
210862931c7SNaohiro Aota 			     zone_info->max_zone_append_size < max_zone_append_size))
211862931c7SNaohiro Aota 				max_zone_append_size =
212862931c7SNaohiro Aota 					zone_info->max_zone_append_size;
213b70f5097SNaohiro Aota 		}
214b70f5097SNaohiro Aota 		nr_devices++;
215b70f5097SNaohiro Aota 	}
216b70f5097SNaohiro Aota 
217b70f5097SNaohiro Aota 	if (!zoned_devices && !incompat_zoned)
218b70f5097SNaohiro Aota 		goto out;
219b70f5097SNaohiro Aota 
220b70f5097SNaohiro Aota 	if (!zoned_devices && incompat_zoned) {
221b70f5097SNaohiro Aota 		/* No zoned block device found on ZONED filesystem */
222b70f5097SNaohiro Aota 		btrfs_err(fs_info,
223b70f5097SNaohiro Aota 			  "zoned: no zoned devices found on a zoned filesystem");
224b70f5097SNaohiro Aota 		ret = -EINVAL;
225b70f5097SNaohiro Aota 		goto out;
226b70f5097SNaohiro Aota 	}
227b70f5097SNaohiro Aota 
228b70f5097SNaohiro Aota 	if (zoned_devices && !incompat_zoned) {
229b70f5097SNaohiro Aota 		btrfs_err(fs_info,
230b70f5097SNaohiro Aota 			  "zoned: mode not enabled but zoned device found");
231b70f5097SNaohiro Aota 		ret = -EINVAL;
232b70f5097SNaohiro Aota 		goto out;
233b70f5097SNaohiro Aota 	}
234b70f5097SNaohiro Aota 
235b70f5097SNaohiro Aota 	if (zoned_devices != nr_devices) {
236b70f5097SNaohiro Aota 		btrfs_err(fs_info,
237b70f5097SNaohiro Aota 			  "zoned: cannot mix zoned and regular devices");
238b70f5097SNaohiro Aota 		ret = -EINVAL;
239b70f5097SNaohiro Aota 		goto out;
240b70f5097SNaohiro Aota 	}
241b70f5097SNaohiro Aota 
242b70f5097SNaohiro Aota 	/*
243b70f5097SNaohiro Aota 	 * stripe_size is always aligned to BTRFS_STRIPE_LEN in
244b70f5097SNaohiro Aota 	 * __btrfs_alloc_chunk(). Since we want stripe_len == zone_size,
245b70f5097SNaohiro Aota 	 * check the alignment here.
246b70f5097SNaohiro Aota 	 */
247b70f5097SNaohiro Aota 	if (!IS_ALIGNED(zone_size, BTRFS_STRIPE_LEN)) {
248b70f5097SNaohiro Aota 		btrfs_err(fs_info,
249b70f5097SNaohiro Aota 			  "zoned: zone size %llu not aligned to stripe %u",
250b70f5097SNaohiro Aota 			  zone_size, BTRFS_STRIPE_LEN);
251b70f5097SNaohiro Aota 		ret = -EINVAL;
252b70f5097SNaohiro Aota 		goto out;
253b70f5097SNaohiro Aota 	}
254b70f5097SNaohiro Aota 
255*a589dde0SNaohiro Aota 	if (btrfs_fs_incompat(fs_info, MIXED_GROUPS)) {
256*a589dde0SNaohiro Aota 		btrfs_err(fs_info, "zoned: mixed block groups not supported");
257*a589dde0SNaohiro Aota 		ret = -EINVAL;
258*a589dde0SNaohiro Aota 		goto out;
259*a589dde0SNaohiro Aota 	}
260*a589dde0SNaohiro Aota 
261b70f5097SNaohiro Aota 	fs_info->zone_size = zone_size;
262862931c7SNaohiro Aota 	fs_info->max_zone_append_size = max_zone_append_size;
263b70f5097SNaohiro Aota 
264b70f5097SNaohiro Aota 	btrfs_info(fs_info, "zoned mode enabled with zone size %llu", zone_size);
265b70f5097SNaohiro Aota out:
266b70f5097SNaohiro Aota 	return ret;
267b70f5097SNaohiro Aota }
2685d1ab66cSNaohiro Aota 
2695d1ab66cSNaohiro Aota int btrfs_check_mountopts_zoned(struct btrfs_fs_info *info)
2705d1ab66cSNaohiro Aota {
2715d1ab66cSNaohiro Aota 	if (!btrfs_is_zoned(info))
2725d1ab66cSNaohiro Aota 		return 0;
2735d1ab66cSNaohiro Aota 
2745d1ab66cSNaohiro Aota 	/*
2755d1ab66cSNaohiro Aota 	 * Space cache writing is not COWed. Disable that to avoid write errors
2765d1ab66cSNaohiro Aota 	 * in sequential zones.
2775d1ab66cSNaohiro Aota 	 */
2785d1ab66cSNaohiro Aota 	if (btrfs_test_opt(info, SPACE_CACHE)) {
2795d1ab66cSNaohiro Aota 		btrfs_err(info, "zoned: space cache v1 is not supported");
2805d1ab66cSNaohiro Aota 		return -EINVAL;
2815d1ab66cSNaohiro Aota 	}
2825d1ab66cSNaohiro Aota 
283d206e9c9SNaohiro Aota 	if (btrfs_test_opt(info, NODATACOW)) {
284d206e9c9SNaohiro Aota 		btrfs_err(info, "zoned: NODATACOW not supported");
285d206e9c9SNaohiro Aota 		return -EINVAL;
286d206e9c9SNaohiro Aota 	}
287d206e9c9SNaohiro Aota 
2885d1ab66cSNaohiro Aota 	return 0;
2895d1ab66cSNaohiro Aota }
290