xref: /openbmc/linux/block/blk-zoned.c (revision 65417d9f)
1 /*
2  * Zoned block device handling
3  *
4  * Copyright (c) 2015, Hannes Reinecke
5  * Copyright (c) 2015, SUSE Linux GmbH
6  *
7  * Copyright (c) 2016, Damien Le Moal
8  * Copyright (c) 2016, Western Digital
9  */
10 
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/rbtree.h>
14 #include <linux/blkdev.h>
15 
16 static inline sector_t blk_zone_start(struct request_queue *q,
17 				      sector_t sector)
18 {
19 	sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
20 
21 	return sector & ~zone_mask;
22 }
23 
24 /*
25  * Check that a zone report belongs to the partition.
26  * If yes, fix its start sector and write pointer, copy it in the
27  * zone information array and return true. Return false otherwise.
28  */
29 static bool blkdev_report_zone(struct block_device *bdev,
30 			       struct blk_zone *rep,
31 			       struct blk_zone *zone)
32 {
33 	sector_t offset = get_start_sect(bdev);
34 
35 	if (rep->start < offset)
36 		return false;
37 
38 	rep->start -= offset;
39 	if (rep->start + rep->len > bdev->bd_part->nr_sects)
40 		return false;
41 
42 	if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
43 		rep->wp = rep->start + rep->len;
44 	else
45 		rep->wp -= offset;
46 	memcpy(zone, rep, sizeof(struct blk_zone));
47 
48 	return true;
49 }
50 
51 /**
52  * blkdev_report_zones - Get zones information
53  * @bdev:	Target block device
54  * @sector:	Sector from which to report zones
55  * @zones:	Array of zone structures where to return the zones information
56  * @nr_zones:	Number of zone structures in the zone array
57  * @gfp_mask:	Memory allocation flags (for bio_alloc)
58  *
59  * Description:
60  *    Get zone information starting from the zone containing @sector.
61  *    The number of zone information reported may be less than the number
62  *    requested by @nr_zones. The number of zones actually reported is
63  *    returned in @nr_zones.
64  */
65 int blkdev_report_zones(struct block_device *bdev,
66 			sector_t sector,
67 			struct blk_zone *zones,
68 			unsigned int *nr_zones,
69 			gfp_t gfp_mask)
70 {
71 	struct request_queue *q = bdev_get_queue(bdev);
72 	struct blk_zone_report_hdr *hdr;
73 	unsigned int nrz = *nr_zones;
74 	struct page *page;
75 	unsigned int nr_rep;
76 	size_t rep_bytes;
77 	unsigned int nr_pages;
78 	struct bio *bio;
79 	struct bio_vec *bv;
80 	unsigned int i, n, nz;
81 	unsigned int ofst;
82 	void *addr;
83 	int ret;
84 
85 	if (!q)
86 		return -ENXIO;
87 
88 	if (!blk_queue_is_zoned(q))
89 		return -EOPNOTSUPP;
90 
91 	if (!nrz)
92 		return 0;
93 
94 	if (sector > bdev->bd_part->nr_sects) {
95 		*nr_zones = 0;
96 		return 0;
97 	}
98 
99 	/*
100 	 * The zone report has a header. So make room for it in the
101 	 * payload. Also make sure that the report fits in a single BIO
102 	 * that will not be split down the stack.
103 	 */
104 	rep_bytes = sizeof(struct blk_zone_report_hdr) +
105 		sizeof(struct blk_zone) * nrz;
106 	rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
107 	if (rep_bytes > (queue_max_sectors(q) << 9))
108 		rep_bytes = queue_max_sectors(q) << 9;
109 
110 	nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
111 			 rep_bytes >> PAGE_SHIFT);
112 	nr_pages = min_t(unsigned int, nr_pages,
113 			 queue_max_segments(q));
114 
115 	bio = bio_alloc(gfp_mask, nr_pages);
116 	if (!bio)
117 		return -ENOMEM;
118 
119 	bio_set_dev(bio, bdev);
120 	bio->bi_iter.bi_sector = blk_zone_start(q, sector);
121 	bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
122 
123 	for (i = 0; i < nr_pages; i++) {
124 		page = alloc_page(gfp_mask);
125 		if (!page) {
126 			ret = -ENOMEM;
127 			goto out;
128 		}
129 		if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
130 			__free_page(page);
131 			break;
132 		}
133 	}
134 
135 	if (i == 0)
136 		ret = -ENOMEM;
137 	else
138 		ret = submit_bio_wait(bio);
139 	if (ret)
140 		goto out;
141 
142 	/*
143 	 * Process the report result: skip the header and go through the
144 	 * reported zones to fixup and fixup the zone information for
145 	 * partitions. At the same time, return the zone information into
146 	 * the zone array.
147 	 */
148 	n = 0;
149 	nz = 0;
150 	nr_rep = 0;
151 	bio_for_each_segment_all(bv, bio, i) {
152 
153 		if (!bv->bv_page)
154 			break;
155 
156 		addr = kmap_atomic(bv->bv_page);
157 
158 		/* Get header in the first page */
159 		ofst = 0;
160 		if (!nr_rep) {
161 			hdr = (struct blk_zone_report_hdr *) addr;
162 			nr_rep = hdr->nr_zones;
163 			ofst = sizeof(struct blk_zone_report_hdr);
164 		}
165 
166 		/* Fixup and report zones */
167 		while (ofst < bv->bv_len &&
168 		       n < nr_rep && nz < nrz) {
169 			if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
170 				nz++;
171 			ofst += sizeof(struct blk_zone);
172 			n++;
173 		}
174 
175 		kunmap_atomic(addr);
176 
177 		if (n >= nr_rep || nz >= nrz)
178 			break;
179 
180 	}
181 
182 	*nr_zones = nz;
183 out:
184 	bio_for_each_segment_all(bv, bio, i)
185 		__free_page(bv->bv_page);
186 	bio_put(bio);
187 
188 	return ret;
189 }
190 EXPORT_SYMBOL_GPL(blkdev_report_zones);
191 
192 /**
193  * blkdev_reset_zones - Reset zones write pointer
194  * @bdev:	Target block device
195  * @sector:	Start sector of the first zone to reset
196  * @nr_sectors:	Number of sectors, at least the length of one zone
197  * @gfp_mask:	Memory allocation flags (for bio_alloc)
198  *
199  * Description:
200  *    Reset the write pointer of the zones contained in the range
201  *    @sector..@sector+@nr_sectors. Specifying the entire disk sector range
202  *    is valid, but the specified range should not contain conventional zones.
203  */
204 int blkdev_reset_zones(struct block_device *bdev,
205 		       sector_t sector, sector_t nr_sectors,
206 		       gfp_t gfp_mask)
207 {
208 	struct request_queue *q = bdev_get_queue(bdev);
209 	sector_t zone_sectors;
210 	sector_t end_sector = sector + nr_sectors;
211 	struct bio *bio;
212 	int ret;
213 
214 	if (!q)
215 		return -ENXIO;
216 
217 	if (!blk_queue_is_zoned(q))
218 		return -EOPNOTSUPP;
219 
220 	if (end_sector > bdev->bd_part->nr_sects)
221 		/* Out of range */
222 		return -EINVAL;
223 
224 	/* Check alignment (handle eventual smaller last zone) */
225 	zone_sectors = blk_queue_zone_sectors(q);
226 	if (sector & (zone_sectors - 1))
227 		return -EINVAL;
228 
229 	if ((nr_sectors & (zone_sectors - 1)) &&
230 	    end_sector != bdev->bd_part->nr_sects)
231 		return -EINVAL;
232 
233 	while (sector < end_sector) {
234 
235 		bio = bio_alloc(gfp_mask, 0);
236 		bio->bi_iter.bi_sector = sector;
237 		bio_set_dev(bio, bdev);
238 		bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
239 
240 		ret = submit_bio_wait(bio);
241 		bio_put(bio);
242 
243 		if (ret)
244 			return ret;
245 
246 		sector += zone_sectors;
247 
248 		/* This may take a while, so be nice to others */
249 		cond_resched();
250 
251 	}
252 
253 	return 0;
254 }
255 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
256 
257 /**
258  * BLKREPORTZONE ioctl processing.
259  * Called from blkdev_ioctl.
260  */
261 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
262 			      unsigned int cmd, unsigned long arg)
263 {
264 	void __user *argp = (void __user *)arg;
265 	struct request_queue *q;
266 	struct blk_zone_report rep;
267 	struct blk_zone *zones;
268 	int ret;
269 
270 	if (!argp)
271 		return -EINVAL;
272 
273 	q = bdev_get_queue(bdev);
274 	if (!q)
275 		return -ENXIO;
276 
277 	if (!blk_queue_is_zoned(q))
278 		return -ENOTTY;
279 
280 	if (!capable(CAP_SYS_ADMIN))
281 		return -EACCES;
282 
283 	if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
284 		return -EFAULT;
285 
286 	if (!rep.nr_zones)
287 		return -EINVAL;
288 
289 	zones = kcalloc(rep.nr_zones, sizeof(struct blk_zone), GFP_KERNEL);
290 	if (!zones)
291 		return -ENOMEM;
292 
293 	ret = blkdev_report_zones(bdev, rep.sector,
294 				  zones, &rep.nr_zones,
295 				  GFP_KERNEL);
296 	if (ret)
297 		goto out;
298 
299 	if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
300 		ret = -EFAULT;
301 		goto out;
302 	}
303 
304 	if (rep.nr_zones) {
305 		if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
306 				 sizeof(struct blk_zone) * rep.nr_zones))
307 			ret = -EFAULT;
308 	}
309 
310  out:
311 	kfree(zones);
312 
313 	return ret;
314 }
315 
316 /**
317  * BLKRESETZONE ioctl processing.
318  * Called from blkdev_ioctl.
319  */
320 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
321 			     unsigned int cmd, unsigned long arg)
322 {
323 	void __user *argp = (void __user *)arg;
324 	struct request_queue *q;
325 	struct blk_zone_range zrange;
326 
327 	if (!argp)
328 		return -EINVAL;
329 
330 	q = bdev_get_queue(bdev);
331 	if (!q)
332 		return -ENXIO;
333 
334 	if (!blk_queue_is_zoned(q))
335 		return -ENOTTY;
336 
337 	if (!capable(CAP_SYS_ADMIN))
338 		return -EACCES;
339 
340 	if (!(mode & FMODE_WRITE))
341 		return -EBADF;
342 
343 	if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
344 		return -EFAULT;
345 
346 	return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
347 				  GFP_KERNEL);
348 }
349