13dcf60bcSChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
26a0cb1bcSHannes Reinecke /*
36a0cb1bcSHannes Reinecke * Zoned block device handling
46a0cb1bcSHannes Reinecke *
56a0cb1bcSHannes Reinecke * Copyright (c) 2015, Hannes Reinecke
66a0cb1bcSHannes Reinecke * Copyright (c) 2015, SUSE Linux GmbH
76a0cb1bcSHannes Reinecke *
86a0cb1bcSHannes Reinecke * Copyright (c) 2016, Damien Le Moal
96a0cb1bcSHannes Reinecke * Copyright (c) 2016, Western Digital
106a0cb1bcSHannes Reinecke */
116a0cb1bcSHannes Reinecke
126a0cb1bcSHannes Reinecke #include <linux/kernel.h>
136a0cb1bcSHannes Reinecke #include <linux/module.h>
146a0cb1bcSHannes Reinecke #include <linux/rbtree.h>
156a0cb1bcSHannes Reinecke #include <linux/blkdev.h>
16bf505456SDamien Le Moal #include <linux/blk-mq.h>
1726202928SDamien Le Moal #include <linux/mm.h>
1826202928SDamien Le Moal #include <linux/vmalloc.h>
19bd976e52SDamien Le Moal #include <linux/sched/mm.h>
206a0cb1bcSHannes Reinecke
21a2d6b3a2SDamien Le Moal #include "blk.h"
22a2d6b3a2SDamien Le Moal
2302694e86SChaitanya Kulkarni #define ZONE_COND_NAME(name) [BLK_ZONE_COND_##name] = #name
2402694e86SChaitanya Kulkarni static const char *const zone_cond_name[] = {
2502694e86SChaitanya Kulkarni ZONE_COND_NAME(NOT_WP),
2602694e86SChaitanya Kulkarni ZONE_COND_NAME(EMPTY),
2702694e86SChaitanya Kulkarni ZONE_COND_NAME(IMP_OPEN),
2802694e86SChaitanya Kulkarni ZONE_COND_NAME(EXP_OPEN),
2902694e86SChaitanya Kulkarni ZONE_COND_NAME(CLOSED),
3002694e86SChaitanya Kulkarni ZONE_COND_NAME(READONLY),
3102694e86SChaitanya Kulkarni ZONE_COND_NAME(FULL),
3202694e86SChaitanya Kulkarni ZONE_COND_NAME(OFFLINE),
3302694e86SChaitanya Kulkarni };
3402694e86SChaitanya Kulkarni #undef ZONE_COND_NAME
3502694e86SChaitanya Kulkarni
3602694e86SChaitanya Kulkarni /**
3702694e86SChaitanya Kulkarni * blk_zone_cond_str - Return string XXX in BLK_ZONE_COND_XXX.
3802694e86SChaitanya Kulkarni * @zone_cond: BLK_ZONE_COND_XXX.
3902694e86SChaitanya Kulkarni *
4002694e86SChaitanya Kulkarni * Description: Centralize block layer function to convert BLK_ZONE_COND_XXX
4102694e86SChaitanya Kulkarni * into string format. Useful in the debugging and tracing zone conditions. For
4202694e86SChaitanya Kulkarni * invalid BLK_ZONE_COND_XXX it returns string "UNKNOWN".
4302694e86SChaitanya Kulkarni */
blk_zone_cond_str(enum blk_zone_cond zone_cond)4402694e86SChaitanya Kulkarni const char *blk_zone_cond_str(enum blk_zone_cond zone_cond)
4502694e86SChaitanya Kulkarni {
4602694e86SChaitanya Kulkarni static const char *zone_cond_str = "UNKNOWN";
4702694e86SChaitanya Kulkarni
4802694e86SChaitanya Kulkarni if (zone_cond < ARRAY_SIZE(zone_cond_name) && zone_cond_name[zone_cond])
4902694e86SChaitanya Kulkarni zone_cond_str = zone_cond_name[zone_cond];
5002694e86SChaitanya Kulkarni
5102694e86SChaitanya Kulkarni return zone_cond_str;
5202694e86SChaitanya Kulkarni }
5302694e86SChaitanya Kulkarni EXPORT_SYMBOL_GPL(blk_zone_cond_str);
5402694e86SChaitanya Kulkarni
556a0cb1bcSHannes Reinecke /*
566cc77e9cSChristoph Hellwig * Return true if a request is a write requests that needs zone write locking.
576cc77e9cSChristoph Hellwig */
blk_req_needs_zone_write_lock(struct request * rq)586cc77e9cSChristoph Hellwig bool blk_req_needs_zone_write_lock(struct request *rq)
596cc77e9cSChristoph Hellwig {
60d86e716aSChristoph Hellwig if (!rq->q->disk->seq_zones_wlock)
616cc77e9cSChristoph Hellwig return false;
626cc77e9cSChristoph Hellwig
6319821feeSBart Van Assche return blk_rq_is_seq_zoned_write(rq);
646cc77e9cSChristoph Hellwig }
656cc77e9cSChristoph Hellwig EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
666cc77e9cSChristoph Hellwig
blk_req_zone_write_trylock(struct request * rq)671392d370SJohannes Thumshirn bool blk_req_zone_write_trylock(struct request *rq)
681392d370SJohannes Thumshirn {
691392d370SJohannes Thumshirn unsigned int zno = blk_rq_zone_no(rq);
701392d370SJohannes Thumshirn
71d86e716aSChristoph Hellwig if (test_and_set_bit(zno, rq->q->disk->seq_zones_wlock))
721392d370SJohannes Thumshirn return false;
731392d370SJohannes Thumshirn
741392d370SJohannes Thumshirn WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
751392d370SJohannes Thumshirn rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
761392d370SJohannes Thumshirn
771392d370SJohannes Thumshirn return true;
781392d370SJohannes Thumshirn }
791392d370SJohannes Thumshirn EXPORT_SYMBOL_GPL(blk_req_zone_write_trylock);
801392d370SJohannes Thumshirn
__blk_req_zone_write_lock(struct request * rq)816cc77e9cSChristoph Hellwig void __blk_req_zone_write_lock(struct request *rq)
826cc77e9cSChristoph Hellwig {
836cc77e9cSChristoph Hellwig if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
84d86e716aSChristoph Hellwig rq->q->disk->seq_zones_wlock)))
856cc77e9cSChristoph Hellwig return;
866cc77e9cSChristoph Hellwig
876cc77e9cSChristoph Hellwig WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
886cc77e9cSChristoph Hellwig rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
896cc77e9cSChristoph Hellwig }
906cc77e9cSChristoph Hellwig EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
916cc77e9cSChristoph Hellwig
__blk_req_zone_write_unlock(struct request * rq)926cc77e9cSChristoph Hellwig void __blk_req_zone_write_unlock(struct request *rq)
936cc77e9cSChristoph Hellwig {
946cc77e9cSChristoph Hellwig rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
95d86e716aSChristoph Hellwig if (rq->q->disk->seq_zones_wlock)
966cc77e9cSChristoph Hellwig WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
97d86e716aSChristoph Hellwig rq->q->disk->seq_zones_wlock));
986cc77e9cSChristoph Hellwig }
996cc77e9cSChristoph Hellwig EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
1006cc77e9cSChristoph Hellwig
101a91e1380SDamien Le Moal /**
102b623e347SChristoph Hellwig * bdev_nr_zones - Get number of zones
103b623e347SChristoph Hellwig * @bdev: Target device
104a91e1380SDamien Le Moal *
1059b38bb4bSChristoph Hellwig * Return the total number of zones of a zoned block device. For a block
1069b38bb4bSChristoph Hellwig * device without zone capabilities, the number of zones is always 0.
107a91e1380SDamien Le Moal */
bdev_nr_zones(struct block_device * bdev)108b623e347SChristoph Hellwig unsigned int bdev_nr_zones(struct block_device *bdev)
109a91e1380SDamien Le Moal {
110b623e347SChristoph Hellwig sector_t zone_sectors = bdev_zone_sectors(bdev);
111a91e1380SDamien Le Moal
112b623e347SChristoph Hellwig if (!bdev_is_zoned(bdev))
113a91e1380SDamien Le Moal return 0;
114b623e347SChristoph Hellwig return (bdev_nr_sectors(bdev) + zone_sectors - 1) >>
115b623e347SChristoph Hellwig ilog2(zone_sectors);
116a91e1380SDamien Le Moal }
117b623e347SChristoph Hellwig EXPORT_SYMBOL_GPL(bdev_nr_zones);
118a91e1380SDamien Le Moal
1196a0cb1bcSHannes Reinecke /**
1206a0cb1bcSHannes Reinecke * blkdev_report_zones - Get zones information
1216a0cb1bcSHannes Reinecke * @bdev: Target block device
1226a0cb1bcSHannes Reinecke * @sector: Sector from which to report zones
123d4100351SChristoph Hellwig * @nr_zones: Maximum number of zones to report
124d4100351SChristoph Hellwig * @cb: Callback function called for each reported zone
125d4100351SChristoph Hellwig * @data: Private data for the callback
1266a0cb1bcSHannes Reinecke *
1276a0cb1bcSHannes Reinecke * Description:
128d4100351SChristoph Hellwig * Get zone information starting from the zone containing @sector for at most
129d4100351SChristoph Hellwig * @nr_zones, and call @cb for each zone reported by the device.
130d4100351SChristoph Hellwig * To report all zones in a device starting from @sector, the BLK_ALL_ZONES
131d4100351SChristoph Hellwig * constant can be passed to @nr_zones.
132d4100351SChristoph Hellwig * Returns the number of zones reported by the device, or a negative errno
133d4100351SChristoph Hellwig * value in case of failure.
134d4100351SChristoph Hellwig *
135d4100351SChristoph Hellwig * Note: The caller must use memalloc_noXX_save/restore() calls to control
136d4100351SChristoph Hellwig * memory allocations done within this function.
1376a0cb1bcSHannes Reinecke */
blkdev_report_zones(struct block_device * bdev,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)138e76239a3SChristoph Hellwig int blkdev_report_zones(struct block_device *bdev, sector_t sector,
139d4100351SChristoph Hellwig unsigned int nr_zones, report_zones_cb cb, void *data)
1406a0cb1bcSHannes Reinecke {
141ceeb373aSDamien Le Moal struct gendisk *disk = bdev->bd_disk;
1425eac3eb3SDamien Le Moal sector_t capacity = get_capacity(disk);
1436a0cb1bcSHannes Reinecke
144edd1dbc8SChristoph Hellwig if (!bdev_is_zoned(bdev) || WARN_ON_ONCE(!disk->fops->report_zones))
1456a0cb1bcSHannes Reinecke return -EOPNOTSUPP;
1466a0cb1bcSHannes Reinecke
147d4100351SChristoph Hellwig if (!nr_zones || sector >= capacity)
1486a0cb1bcSHannes Reinecke return 0;
1496a0cb1bcSHannes Reinecke
150d4100351SChristoph Hellwig return disk->fops->report_zones(disk, sector, nr_zones, cb, data);
1516a0cb1bcSHannes Reinecke }
1526a0cb1bcSHannes Reinecke EXPORT_SYMBOL_GPL(blkdev_report_zones);
1536a0cb1bcSHannes Reinecke
blk_alloc_zone_bitmap(int node,unsigned int nr_zones)1541ee533ecSDamien Le Moal static inline unsigned long *blk_alloc_zone_bitmap(int node,
1551ee533ecSDamien Le Moal unsigned int nr_zones)
1566e33dbf2SChaitanya Kulkarni {
1571ee533ecSDamien Le Moal return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
1581ee533ecSDamien Le Moal GFP_NOIO, node);
1591ee533ecSDamien Le Moal }
1606e33dbf2SChaitanya Kulkarni
blk_zone_need_reset_cb(struct blk_zone * zone,unsigned int idx,void * data)1611ee533ecSDamien Le Moal static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
1621ee533ecSDamien Le Moal void *data)
1631ee533ecSDamien Le Moal {
1646e33dbf2SChaitanya Kulkarni /*
1651ee533ecSDamien Le Moal * For an all-zones reset, ignore conventional, empty, read-only
1661ee533ecSDamien Le Moal * and offline zones.
1676e33dbf2SChaitanya Kulkarni */
1681ee533ecSDamien Le Moal switch (zone->cond) {
1691ee533ecSDamien Le Moal case BLK_ZONE_COND_NOT_WP:
1701ee533ecSDamien Le Moal case BLK_ZONE_COND_EMPTY:
1711ee533ecSDamien Le Moal case BLK_ZONE_COND_READONLY:
1721ee533ecSDamien Le Moal case BLK_ZONE_COND_OFFLINE:
1731ee533ecSDamien Le Moal return 0;
1741ee533ecSDamien Le Moal default:
1751ee533ecSDamien Le Moal set_bit(idx, (unsigned long *)data);
1761ee533ecSDamien Le Moal return 0;
1771ee533ecSDamien Le Moal }
1781ee533ecSDamien Le Moal }
1791ee533ecSDamien Le Moal
blkdev_zone_reset_all_emulated(struct block_device * bdev,gfp_t gfp_mask)1801ee533ecSDamien Le Moal static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
1811ee533ecSDamien Le Moal gfp_t gfp_mask)
1821ee533ecSDamien Le Moal {
183d86e716aSChristoph Hellwig struct gendisk *disk = bdev->bd_disk;
184375c140cSChristoph Hellwig sector_t capacity = bdev_nr_sectors(bdev);
185375c140cSChristoph Hellwig sector_t zone_sectors = bdev_zone_sectors(bdev);
1861ee533ecSDamien Le Moal unsigned long *need_reset;
1871ee533ecSDamien Le Moal struct bio *bio = NULL;
1881ee533ecSDamien Le Moal sector_t sector = 0;
1891ee533ecSDamien Le Moal int ret;
1901ee533ecSDamien Le Moal
191d86e716aSChristoph Hellwig need_reset = blk_alloc_zone_bitmap(disk->queue->node, disk->nr_zones);
1921ee533ecSDamien Le Moal if (!need_reset)
1931ee533ecSDamien Le Moal return -ENOMEM;
1941ee533ecSDamien Le Moal
195d86e716aSChristoph Hellwig ret = disk->fops->report_zones(disk, 0, disk->nr_zones,
196d86e716aSChristoph Hellwig blk_zone_need_reset_cb, need_reset);
1971ee533ecSDamien Le Moal if (ret < 0)
1981ee533ecSDamien Le Moal goto out_free_need_reset;
1991ee533ecSDamien Le Moal
2001ee533ecSDamien Le Moal ret = 0;
2011ee533ecSDamien Le Moal while (sector < capacity) {
202d86e716aSChristoph Hellwig if (!test_bit(disk_zone_no(disk, sector), need_reset)) {
2031ee533ecSDamien Le Moal sector += zone_sectors;
2041ee533ecSDamien Le Moal continue;
2051ee533ecSDamien Le Moal }
2061ee533ecSDamien Le Moal
2070a3140eaSChaitanya Kulkarni bio = blk_next_bio(bio, bdev, 0, REQ_OP_ZONE_RESET | REQ_SYNC,
2080a3140eaSChaitanya Kulkarni gfp_mask);
2091ee533ecSDamien Le Moal bio->bi_iter.bi_sector = sector;
2101ee533ecSDamien Le Moal sector += zone_sectors;
2111ee533ecSDamien Le Moal
2121ee533ecSDamien Le Moal /* This may take a while, so be nice to others */
2131ee533ecSDamien Le Moal cond_resched();
2141ee533ecSDamien Le Moal }
2151ee533ecSDamien Le Moal
2161ee533ecSDamien Le Moal if (bio) {
2171ee533ecSDamien Le Moal ret = submit_bio_wait(bio);
2181ee533ecSDamien Le Moal bio_put(bio);
2191ee533ecSDamien Le Moal }
2201ee533ecSDamien Le Moal
2211ee533ecSDamien Le Moal out_free_need_reset:
2221ee533ecSDamien Le Moal kfree(need_reset);
2231ee533ecSDamien Le Moal return ret;
2241ee533ecSDamien Le Moal }
2251ee533ecSDamien Le Moal
blkdev_zone_reset_all(struct block_device * bdev,gfp_t gfp_mask)2261ee533ecSDamien Le Moal static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
2271ee533ecSDamien Le Moal {
2281ee533ecSDamien Le Moal struct bio bio;
2291ee533ecSDamien Le Moal
23049add496SChristoph Hellwig bio_init(&bio, bdev, NULL, 0, REQ_OP_ZONE_RESET_ALL | REQ_SYNC);
2311ee533ecSDamien Le Moal return submit_bio_wait(&bio);
2326e33dbf2SChaitanya Kulkarni }
2336e33dbf2SChaitanya Kulkarni
2346a0cb1bcSHannes Reinecke /**
2356c1b1da5SAjay Joshi * blkdev_zone_mgmt - Execute a zone management operation on a range of zones
2366a0cb1bcSHannes Reinecke * @bdev: Target block device
2376c1b1da5SAjay Joshi * @op: Operation to be performed on the zones
2386c1b1da5SAjay Joshi * @sector: Start sector of the first zone to operate on
2396c1b1da5SAjay Joshi * @nr_sectors: Number of sectors, should be at least the length of one zone and
2406c1b1da5SAjay Joshi * must be zone size aligned.
2416a0cb1bcSHannes Reinecke * @gfp_mask: Memory allocation flags (for bio_alloc)
2426a0cb1bcSHannes Reinecke *
2436a0cb1bcSHannes Reinecke * Description:
2446c1b1da5SAjay Joshi * Perform the specified operation on the range of zones specified by
2456a0cb1bcSHannes Reinecke * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
2466a0cb1bcSHannes Reinecke * is valid, but the specified range should not contain conventional zones.
2476c1b1da5SAjay Joshi * The operation to execute on each zone can be a zone reset, open, close
2486c1b1da5SAjay Joshi * or finish request.
2496a0cb1bcSHannes Reinecke */
blkdev_zone_mgmt(struct block_device * bdev,enum req_op op,sector_t sector,sector_t nr_sectors,gfp_t gfp_mask)250ff07a02eSBart Van Assche int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
251ff07a02eSBart Van Assche sector_t sector, sector_t nr_sectors, gfp_t gfp_mask)
2526a0cb1bcSHannes Reinecke {
2536a0cb1bcSHannes Reinecke struct request_queue *q = bdev_get_queue(bdev);
254375c140cSChristoph Hellwig sector_t zone_sectors = bdev_zone_sectors(bdev);
255375c140cSChristoph Hellwig sector_t capacity = bdev_nr_sectors(bdev);
2566a0cb1bcSHannes Reinecke sector_t end_sector = sector + nr_sectors;
257a2d6b3a2SDamien Le Moal struct bio *bio = NULL;
2581ee533ecSDamien Le Moal int ret = 0;
2596a0cb1bcSHannes Reinecke
260edd1dbc8SChristoph Hellwig if (!bdev_is_zoned(bdev))
2616a0cb1bcSHannes Reinecke return -EOPNOTSUPP;
2626a0cb1bcSHannes Reinecke
263a2d6b3a2SDamien Le Moal if (bdev_read_only(bdev))
264a2d6b3a2SDamien Le Moal return -EPERM;
265a2d6b3a2SDamien Le Moal
2666c1b1da5SAjay Joshi if (!op_is_zone_mgmt(op))
2676c1b1da5SAjay Joshi return -EOPNOTSUPP;
2686c1b1da5SAjay Joshi
26911bde986SAlexey Dobriyan if (end_sector <= sector || end_sector > capacity)
2706a0cb1bcSHannes Reinecke /* Out of range */
2716a0cb1bcSHannes Reinecke return -EINVAL;
2726a0cb1bcSHannes Reinecke
2736a0cb1bcSHannes Reinecke /* Check alignment (handle eventual smaller last zone) */
274e29b2100SPankaj Raghav if (!bdev_is_zone_start(bdev, sector))
2756a0cb1bcSHannes Reinecke return -EINVAL;
2766a0cb1bcSHannes Reinecke
277e29b2100SPankaj Raghav if (!bdev_is_zone_start(bdev, nr_sectors) && end_sector != capacity)
2786a0cb1bcSHannes Reinecke return -EINVAL;
2796a0cb1bcSHannes Reinecke
2801ee533ecSDamien Le Moal /*
2811ee533ecSDamien Le Moal * In the case of a zone reset operation over all zones,
2821ee533ecSDamien Le Moal * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
2831ee533ecSDamien Le Moal * command. For other devices, we emulate this command behavior by
2841ee533ecSDamien Le Moal * identifying the zones needing a reset.
2851ee533ecSDamien Le Moal */
2861ee533ecSDamien Le Moal if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
2871ee533ecSDamien Le Moal if (!blk_queue_zone_resetall(q))
2881ee533ecSDamien Le Moal return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
2891ee533ecSDamien Le Moal return blkdev_zone_reset_all(bdev, gfp_mask);
2901ee533ecSDamien Le Moal }
2911ee533ecSDamien Le Moal
2926a0cb1bcSHannes Reinecke while (sector < end_sector) {
2930a3140eaSChaitanya Kulkarni bio = blk_next_bio(bio, bdev, 0, op | REQ_SYNC, gfp_mask);
294c7a1d926SDamien Le Moal bio->bi_iter.bi_sector = sector;
2956a0cb1bcSHannes Reinecke sector += zone_sectors;
2966a0cb1bcSHannes Reinecke
2976a0cb1bcSHannes Reinecke /* This may take a while, so be nice to others */
2986a0cb1bcSHannes Reinecke cond_resched();
2996a0cb1bcSHannes Reinecke }
3006a0cb1bcSHannes Reinecke
301a2d6b3a2SDamien Le Moal ret = submit_bio_wait(bio);
302a2d6b3a2SDamien Le Moal bio_put(bio);
303a2d6b3a2SDamien Le Moal
304a2d6b3a2SDamien Le Moal return ret;
3056a0cb1bcSHannes Reinecke }
3066c1b1da5SAjay Joshi EXPORT_SYMBOL_GPL(blkdev_zone_mgmt);
3073ed05a98SShaun Tancheff
308d4100351SChristoph Hellwig struct zone_report_args {
309d4100351SChristoph Hellwig struct blk_zone __user *zones;
310d4100351SChristoph Hellwig };
311d4100351SChristoph Hellwig
blkdev_copy_zone_to_user(struct blk_zone * zone,unsigned int idx,void * data)312d4100351SChristoph Hellwig static int blkdev_copy_zone_to_user(struct blk_zone *zone, unsigned int idx,
313d4100351SChristoph Hellwig void *data)
314d4100351SChristoph Hellwig {
315d4100351SChristoph Hellwig struct zone_report_args *args = data;
316d4100351SChristoph Hellwig
317d4100351SChristoph Hellwig if (copy_to_user(&args->zones[idx], zone, sizeof(struct blk_zone)))
318d4100351SChristoph Hellwig return -EFAULT;
319d4100351SChristoph Hellwig return 0;
320d4100351SChristoph Hellwig }
321d4100351SChristoph Hellwig
32256c4bddbSBart Van Assche /*
3233ed05a98SShaun Tancheff * BLKREPORTZONE ioctl processing.
3243ed05a98SShaun Tancheff * Called from blkdev_ioctl.
3253ed05a98SShaun Tancheff */
blkdev_report_zones_ioctl(struct block_device * bdev,unsigned int cmd,unsigned long arg)3265e4ea834SChristoph Hellwig int blkdev_report_zones_ioctl(struct block_device *bdev, unsigned int cmd,
3275e4ea834SChristoph Hellwig unsigned long arg)
3283ed05a98SShaun Tancheff {
3293ed05a98SShaun Tancheff void __user *argp = (void __user *)arg;
330d4100351SChristoph Hellwig struct zone_report_args args;
3313ed05a98SShaun Tancheff struct blk_zone_report rep;
3323ed05a98SShaun Tancheff int ret;
3333ed05a98SShaun Tancheff
3343ed05a98SShaun Tancheff if (!argp)
3353ed05a98SShaun Tancheff return -EINVAL;
3363ed05a98SShaun Tancheff
337edd1dbc8SChristoph Hellwig if (!bdev_is_zoned(bdev))
3383ed05a98SShaun Tancheff return -ENOTTY;
3393ed05a98SShaun Tancheff
3403ed05a98SShaun Tancheff if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
3413ed05a98SShaun Tancheff return -EFAULT;
3423ed05a98SShaun Tancheff
3433ed05a98SShaun Tancheff if (!rep.nr_zones)
3443ed05a98SShaun Tancheff return -EINVAL;
3453ed05a98SShaun Tancheff
346d4100351SChristoph Hellwig args.zones = argp + sizeof(struct blk_zone_report);
347d4100351SChristoph Hellwig ret = blkdev_report_zones(bdev, rep.sector, rep.nr_zones,
348d4100351SChristoph Hellwig blkdev_copy_zone_to_user, &args);
349d4100351SChristoph Hellwig if (ret < 0)
3503ed05a98SShaun Tancheff return ret;
351d4100351SChristoph Hellwig
352d4100351SChristoph Hellwig rep.nr_zones = ret;
35382394db7SMatias Bjørling rep.flags = BLK_ZONE_REP_CAPACITY;
354d4100351SChristoph Hellwig if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report)))
355d4100351SChristoph Hellwig return -EFAULT;
356d4100351SChristoph Hellwig return 0;
3573ed05a98SShaun Tancheff }
3583ed05a98SShaun Tancheff
blkdev_truncate_zone_range(struct block_device * bdev,blk_mode_t mode,const struct blk_zone_range * zrange)35905bdb996SChristoph Hellwig static int blkdev_truncate_zone_range(struct block_device *bdev,
36005bdb996SChristoph Hellwig blk_mode_t mode, const struct blk_zone_range *zrange)
361e5113505SShin'ichiro Kawasaki {
362e5113505SShin'ichiro Kawasaki loff_t start, end;
363e5113505SShin'ichiro Kawasaki
364e5113505SShin'ichiro Kawasaki if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
365e5113505SShin'ichiro Kawasaki zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
366e5113505SShin'ichiro Kawasaki /* Out of range */
367e5113505SShin'ichiro Kawasaki return -EINVAL;
368e5113505SShin'ichiro Kawasaki
369e5113505SShin'ichiro Kawasaki start = zrange->sector << SECTOR_SHIFT;
370e5113505SShin'ichiro Kawasaki end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
371e5113505SShin'ichiro Kawasaki
372e5113505SShin'ichiro Kawasaki return truncate_bdev_range(bdev, mode, start, end);
373e5113505SShin'ichiro Kawasaki }
374e5113505SShin'ichiro Kawasaki
37556c4bddbSBart Van Assche /*
376e876df1fSAjay Joshi * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
3773ed05a98SShaun Tancheff * Called from blkdev_ioctl.
3783ed05a98SShaun Tancheff */
blkdev_zone_mgmt_ioctl(struct block_device * bdev,blk_mode_t mode,unsigned int cmd,unsigned long arg)37905bdb996SChristoph Hellwig int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
3803ed05a98SShaun Tancheff unsigned int cmd, unsigned long arg)
3813ed05a98SShaun Tancheff {
3823ed05a98SShaun Tancheff void __user *argp = (void __user *)arg;
3833ed05a98SShaun Tancheff struct blk_zone_range zrange;
384ff07a02eSBart Van Assche enum req_op op;
385e5113505SShin'ichiro Kawasaki int ret;
3863ed05a98SShaun Tancheff
3873ed05a98SShaun Tancheff if (!argp)
3883ed05a98SShaun Tancheff return -EINVAL;
3893ed05a98SShaun Tancheff
390edd1dbc8SChristoph Hellwig if (!bdev_is_zoned(bdev))
3913ed05a98SShaun Tancheff return -ENOTTY;
3923ed05a98SShaun Tancheff
39305bdb996SChristoph Hellwig if (!(mode & BLK_OPEN_WRITE))
3943ed05a98SShaun Tancheff return -EBADF;
3953ed05a98SShaun Tancheff
3963ed05a98SShaun Tancheff if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
3973ed05a98SShaun Tancheff return -EFAULT;
3983ed05a98SShaun Tancheff
399e876df1fSAjay Joshi switch (cmd) {
400e876df1fSAjay Joshi case BLKRESETZONE:
401e876df1fSAjay Joshi op = REQ_OP_ZONE_RESET;
402e5113505SShin'ichiro Kawasaki
403e5113505SShin'ichiro Kawasaki /* Invalidate the page cache, including dirty pages. */
40486399ea0SShin'ichiro Kawasaki filemap_invalidate_lock(bdev->bd_inode->i_mapping);
405e5113505SShin'ichiro Kawasaki ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
406e5113505SShin'ichiro Kawasaki if (ret)
40786399ea0SShin'ichiro Kawasaki goto fail;
408e876df1fSAjay Joshi break;
409e876df1fSAjay Joshi case BLKOPENZONE:
410e876df1fSAjay Joshi op = REQ_OP_ZONE_OPEN;
411e876df1fSAjay Joshi break;
412e876df1fSAjay Joshi case BLKCLOSEZONE:
413e876df1fSAjay Joshi op = REQ_OP_ZONE_CLOSE;
414e876df1fSAjay Joshi break;
415e876df1fSAjay Joshi case BLKFINISHZONE:
416e876df1fSAjay Joshi op = REQ_OP_ZONE_FINISH;
417e876df1fSAjay Joshi break;
418e876df1fSAjay Joshi default:
419e876df1fSAjay Joshi return -ENOTTY;
420e876df1fSAjay Joshi }
421e876df1fSAjay Joshi
422e5113505SShin'ichiro Kawasaki ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
423e876df1fSAjay Joshi GFP_KERNEL);
424e5113505SShin'ichiro Kawasaki
42586399ea0SShin'ichiro Kawasaki fail:
42686399ea0SShin'ichiro Kawasaki if (cmd == BLKRESETZONE)
42786399ea0SShin'ichiro Kawasaki filemap_invalidate_unlock(bdev->bd_inode->i_mapping);
428e5113505SShin'ichiro Kawasaki
429e5113505SShin'ichiro Kawasaki return ret;
4303ed05a98SShaun Tancheff }
431bf505456SDamien Le Moal
disk_free_zone_bitmaps(struct gendisk * disk)4325d400665SChristoph Hellwig void disk_free_zone_bitmaps(struct gendisk *disk)
433bf505456SDamien Le Moal {
434d86e716aSChristoph Hellwig kfree(disk->conv_zones_bitmap);
435d86e716aSChristoph Hellwig disk->conv_zones_bitmap = NULL;
436d86e716aSChristoph Hellwig kfree(disk->seq_zones_wlock);
437d86e716aSChristoph Hellwig disk->seq_zones_wlock = NULL;
438bf505456SDamien Le Moal }
439bf505456SDamien Le Moal
440d4100351SChristoph Hellwig struct blk_revalidate_zone_args {
441d4100351SChristoph Hellwig struct gendisk *disk;
442f216fdd7SChristoph Hellwig unsigned long *conv_zones_bitmap;
443d4100351SChristoph Hellwig unsigned long *seq_zones_wlock;
444e94f5819SChristoph Hellwig unsigned int nr_zones;
445d4100351SChristoph Hellwig sector_t sector;
446d4100351SChristoph Hellwig };
447d4100351SChristoph Hellwig
448d9dd7308SDamien Le Moal /*
449d9dd7308SDamien Le Moal * Helper function to check the validity of zones of a zoned block device.
450d9dd7308SDamien Le Moal */
blk_revalidate_zone_cb(struct blk_zone * zone,unsigned int idx,void * data)451d4100351SChristoph Hellwig static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
452d4100351SChristoph Hellwig void *data)
453d9dd7308SDamien Le Moal {
454d4100351SChristoph Hellwig struct blk_revalidate_zone_args *args = data;
455d4100351SChristoph Hellwig struct gendisk *disk = args->disk;
456d9dd7308SDamien Le Moal struct request_queue *q = disk->queue;
457d9dd7308SDamien Le Moal sector_t capacity = get_capacity(disk);
458*03e51c4aSDamien Le Moal sector_t zone_sectors = q->limits.chunk_sectors;
459*03e51c4aSDamien Le Moal
460*03e51c4aSDamien Le Moal /* Check for bad zones and holes in the zone report */
461*03e51c4aSDamien Le Moal if (zone->start != args->sector) {
462*03e51c4aSDamien Le Moal pr_warn("%s: Zone gap at sectors %llu..%llu\n",
463*03e51c4aSDamien Le Moal disk->disk_name, args->sector, zone->start);
464*03e51c4aSDamien Le Moal return -ENODEV;
465*03e51c4aSDamien Le Moal }
466*03e51c4aSDamien Le Moal
467*03e51c4aSDamien Le Moal if (zone->start >= capacity || !zone->len) {
468*03e51c4aSDamien Le Moal pr_warn("%s: Invalid zone start %llu, length %llu\n",
469*03e51c4aSDamien Le Moal disk->disk_name, zone->start, zone->len);
470*03e51c4aSDamien Le Moal return -ENODEV;
471*03e51c4aSDamien Le Moal }
472d9dd7308SDamien Le Moal
473d9dd7308SDamien Le Moal /*
474d9dd7308SDamien Le Moal * All zones must have the same size, with the exception on an eventual
475d9dd7308SDamien Le Moal * smaller last zone.
476d9dd7308SDamien Le Moal */
477*03e51c4aSDamien Le Moal if (zone->start + zone->len < capacity) {
478*03e51c4aSDamien Le Moal if (zone->len != zone_sectors) {
4796c6b3549SChristoph Hellwig pr_warn("%s: Invalid zoned device with non constant zone size\n",
4806c6b3549SChristoph Hellwig disk->disk_name);
4816c6b3549SChristoph Hellwig return -ENODEV;
4826c6b3549SChristoph Hellwig }
483*03e51c4aSDamien Le Moal } else if (zone->len > zone_sectors) {
484d9dd7308SDamien Le Moal pr_warn("%s: Invalid zoned device with larger last zone size\n",
485d9dd7308SDamien Le Moal disk->disk_name);
486d4100351SChristoph Hellwig return -ENODEV;
487d9dd7308SDamien Le Moal }
488d9dd7308SDamien Le Moal
489d9dd7308SDamien Le Moal /* Check zone type */
490d9dd7308SDamien Le Moal switch (zone->type) {
491d9dd7308SDamien Le Moal case BLK_ZONE_TYPE_CONVENTIONAL:
492e94f5819SChristoph Hellwig if (!args->conv_zones_bitmap) {
493e94f5819SChristoph Hellwig args->conv_zones_bitmap =
494e94f5819SChristoph Hellwig blk_alloc_zone_bitmap(q->node, args->nr_zones);
495e94f5819SChristoph Hellwig if (!args->conv_zones_bitmap)
496e94f5819SChristoph Hellwig return -ENOMEM;
497e94f5819SChristoph Hellwig }
498e94f5819SChristoph Hellwig set_bit(idx, args->conv_zones_bitmap);
499e94f5819SChristoph Hellwig break;
500d9dd7308SDamien Le Moal case BLK_ZONE_TYPE_SEQWRITE_REQ:
501d9dd7308SDamien Le Moal case BLK_ZONE_TYPE_SEQWRITE_PREF:
502e94f5819SChristoph Hellwig if (!args->seq_zones_wlock) {
503e94f5819SChristoph Hellwig args->seq_zones_wlock =
504e94f5819SChristoph Hellwig blk_alloc_zone_bitmap(q->node, args->nr_zones);
505e94f5819SChristoph Hellwig if (!args->seq_zones_wlock)
506e94f5819SChristoph Hellwig return -ENOMEM;
507e94f5819SChristoph Hellwig }
508d9dd7308SDamien Le Moal break;
509d9dd7308SDamien Le Moal default:
510d9dd7308SDamien Le Moal pr_warn("%s: Invalid zone type 0x%x at sectors %llu\n",
511d9dd7308SDamien Le Moal disk->disk_name, (int)zone->type, zone->start);
512d4100351SChristoph Hellwig return -ENODEV;
513d9dd7308SDamien Le Moal }
514d9dd7308SDamien Le Moal
515d4100351SChristoph Hellwig args->sector += zone->len;
516d4100351SChristoph Hellwig return 0;
517d4100351SChristoph Hellwig }
518d4100351SChristoph Hellwig
519bf505456SDamien Le Moal /**
520bf505456SDamien Le Moal * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
521bf505456SDamien Le Moal * @disk: Target disk
522e732671aSDamien Le Moal * @update_driver_data: Callback to update driver data on the frozen disk
523bf505456SDamien Le Moal *
524*03e51c4aSDamien Le Moal * Helper function for low-level device drivers to check and (re) allocate and
525*03e51c4aSDamien Le Moal * initialize a disk request queue zone bitmaps. This functions should normally
526*03e51c4aSDamien Le Moal * be called within the disk ->revalidate method for blk-mq based drivers.
527*03e51c4aSDamien Le Moal * Before calling this function, the device driver must already have set the
528*03e51c4aSDamien Le Moal * device zone size (chunk_sector limit) and the max zone append limit.
529*03e51c4aSDamien Le Moal * For BIO based drivers, this function cannot be used. BIO based device drivers
530*03e51c4aSDamien Le Moal * only need to set disk->nr_zones so that the sysfs exposed value is correct.
531e732671aSDamien Le Moal * If the @update_driver_data callback function is not NULL, the callback is
532e732671aSDamien Le Moal * executed with the device request queue frozen after all zones have been
533e732671aSDamien Le Moal * checked.
534bf505456SDamien Le Moal */
blk_revalidate_disk_zones(struct gendisk * disk,void (* update_driver_data)(struct gendisk * disk))535e732671aSDamien Le Moal int blk_revalidate_disk_zones(struct gendisk *disk,
536e732671aSDamien Le Moal void (*update_driver_data)(struct gendisk *disk))
537bf505456SDamien Le Moal {
538bf505456SDamien Le Moal struct request_queue *q = disk->queue;
539*03e51c4aSDamien Le Moal sector_t zone_sectors = q->limits.chunk_sectors;
540*03e51c4aSDamien Le Moal sector_t capacity = get_capacity(disk);
541*03e51c4aSDamien Le Moal struct blk_revalidate_zone_args args = { };
5426c6b3549SChristoph Hellwig unsigned int noio_flag;
5436c6b3549SChristoph Hellwig int ret;
544bf505456SDamien Le Moal
545c98c3d09SChristoph Hellwig if (WARN_ON_ONCE(!blk_queue_is_zoned(q)))
546c98c3d09SChristoph Hellwig return -EIO;
547ae58954dSChristoph Hellwig if (WARN_ON_ONCE(!queue_is_mq(q)))
548ae58954dSChristoph Hellwig return -EIO;
549bf505456SDamien Le Moal
550*03e51c4aSDamien Le Moal if (!capacity)
551*03e51c4aSDamien Le Moal return -ENODEV;
552*03e51c4aSDamien Le Moal
553*03e51c4aSDamien Le Moal /*
554*03e51c4aSDamien Le Moal * Checks that the device driver indicated a valid zone size and that
555*03e51c4aSDamien Le Moal * the max zone append limit is set.
556*03e51c4aSDamien Le Moal */
557*03e51c4aSDamien Le Moal if (!zone_sectors || !is_power_of_2(zone_sectors)) {
558*03e51c4aSDamien Le Moal pr_warn("%s: Invalid non power of two zone size (%llu)\n",
559*03e51c4aSDamien Le Moal disk->disk_name, zone_sectors);
560*03e51c4aSDamien Le Moal return -ENODEV;
561*03e51c4aSDamien Le Moal }
562*03e51c4aSDamien Le Moal
563*03e51c4aSDamien Le Moal if (!q->limits.max_zone_append_sectors) {
564*03e51c4aSDamien Le Moal pr_warn("%s: Invalid 0 maximum zone append limit\n",
565*03e51c4aSDamien Le Moal disk->disk_name);
566*03e51c4aSDamien Le Moal return -ENODEV;
567*03e51c4aSDamien Le Moal }
5681a1206dcSJohannes Thumshirn
569e94f5819SChristoph Hellwig /*
5706c6b3549SChristoph Hellwig * Ensure that all memory allocations in this context are done as if
5716c6b3549SChristoph Hellwig * GFP_NOIO was specified.
572e94f5819SChristoph Hellwig */
573*03e51c4aSDamien Le Moal args.disk = disk;
574*03e51c4aSDamien Le Moal args.nr_zones = (capacity + zone_sectors - 1) >> ilog2(zone_sectors);
5756c6b3549SChristoph Hellwig noio_flag = memalloc_noio_save();
5766c6b3549SChristoph Hellwig ret = disk->fops->report_zones(disk, 0, UINT_MAX,
577e94f5819SChristoph Hellwig blk_revalidate_zone_cb, &args);
5782afdeb23SDamien Le Moal if (!ret) {
5792afdeb23SDamien Le Moal pr_warn("%s: No zones reported\n", disk->disk_name);
5802afdeb23SDamien Le Moal ret = -ENODEV;
5812afdeb23SDamien Le Moal }
582e94f5819SChristoph Hellwig memalloc_noio_restore(noio_flag);
583bd976e52SDamien Le Moal
584bf505456SDamien Le Moal /*
5852afdeb23SDamien Le Moal * If zones where reported, make sure that the entire disk capacity
5862afdeb23SDamien Le Moal * has been checked.
5872afdeb23SDamien Le Moal */
588*03e51c4aSDamien Le Moal if (ret > 0 && args.sector != capacity) {
5892afdeb23SDamien Le Moal pr_warn("%s: Missing zones from sector %llu\n",
5902afdeb23SDamien Le Moal disk->disk_name, args.sector);
5912afdeb23SDamien Le Moal ret = -ENODEV;
5922afdeb23SDamien Le Moal }
5932afdeb23SDamien Le Moal
5942afdeb23SDamien Le Moal /*
5956c6b3549SChristoph Hellwig * Install the new bitmaps and update nr_zones only once the queue is
5966c6b3549SChristoph Hellwig * stopped and all I/Os are completed (i.e. a scheduler is not
5976c6b3549SChristoph Hellwig * referencing the bitmaps).
598bf505456SDamien Le Moal */
599bf505456SDamien Le Moal blk_mq_freeze_queue(q);
6002afdeb23SDamien Le Moal if (ret > 0) {
601d86e716aSChristoph Hellwig disk->nr_zones = args.nr_zones;
602d86e716aSChristoph Hellwig swap(disk->seq_zones_wlock, args.seq_zones_wlock);
603d86e716aSChristoph Hellwig swap(disk->conv_zones_bitmap, args.conv_zones_bitmap);
604e732671aSDamien Le Moal if (update_driver_data)
605e732671aSDamien Le Moal update_driver_data(disk);
606d4100351SChristoph Hellwig ret = 0;
607d4100351SChristoph Hellwig } else {
608bf505456SDamien Le Moal pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
6095d400665SChristoph Hellwig disk_free_zone_bitmaps(disk);
610bf505456SDamien Le Moal }
611d4100351SChristoph Hellwig blk_mq_unfreeze_queue(q);
612bf505456SDamien Le Moal
613d4100351SChristoph Hellwig kfree(args.seq_zones_wlock);
614f216fdd7SChristoph Hellwig kfree(args.conv_zones_bitmap);
615bf505456SDamien Le Moal return ret;
616bf505456SDamien Le Moal }
617bf505456SDamien Le Moal EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
618508aebb8SDamien Le Moal
disk_clear_zone_settings(struct gendisk * disk)619b3c72f81SChristoph Hellwig void disk_clear_zone_settings(struct gendisk *disk)
620508aebb8SDamien Le Moal {
621b3c72f81SChristoph Hellwig struct request_queue *q = disk->queue;
622b3c72f81SChristoph Hellwig
623508aebb8SDamien Le Moal blk_mq_freeze_queue(q);
624508aebb8SDamien Le Moal
6255d400665SChristoph Hellwig disk_free_zone_bitmaps(disk);
626508aebb8SDamien Le Moal blk_queue_flag_clear(QUEUE_FLAG_ZONE_RESETALL, q);
627508aebb8SDamien Le Moal q->required_elevator_features &= ~ELEVATOR_F_ZBD_SEQ_WRITE;
628d86e716aSChristoph Hellwig disk->nr_zones = 0;
629d86e716aSChristoph Hellwig disk->max_open_zones = 0;
630d86e716aSChristoph Hellwig disk->max_active_zones = 0;
631508aebb8SDamien Le Moal q->limits.chunk_sectors = 0;
632508aebb8SDamien Le Moal q->limits.zone_write_granularity = 0;
633508aebb8SDamien Le Moal q->limits.max_zone_append_sectors = 0;
634508aebb8SDamien Le Moal
635508aebb8SDamien Le Moal blk_mq_unfreeze_queue(q);
636508aebb8SDamien Le Moal }
637