xref: /openbmc/linux/drivers/scsi/sd_zbc.c (revision 2dfb62d6ce80b3536d1a915177ae82496bd7ac4a)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * SCSI Zoned Block commands
4  *
5  * Copyright (C) 2014-2015 SUSE Linux GmbH
6  * Written by: Hannes Reinecke <hare@suse.de>
7  * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
8  * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com>
9  */
10 
11 #include <linux/blkdev.h>
12 #include <linux/vmalloc.h>
13 #include <linux/sched/mm.h>
14 #include <linux/mutex.h>
15 
16 #include <asm/unaligned.h>
17 
18 #include <scsi/scsi.h>
19 #include <scsi/scsi_cmnd.h>
20 
21 #include "sd.h"
22 
23 /**
24  * sd_zbc_get_zone_wp_offset - Get zone write pointer offset.
25  * @zone: Zone for which to return the write pointer offset.
26  *
27  * Return: offset of the write pointer from the start of the zone.
28  */
29 static unsigned int sd_zbc_get_zone_wp_offset(struct blk_zone *zone)
30 {
31 	if (zone->type == ZBC_ZONE_TYPE_CONV)
32 		return 0;
33 
34 	switch (zone->cond) {
35 	case BLK_ZONE_COND_IMP_OPEN:
36 	case BLK_ZONE_COND_EXP_OPEN:
37 	case BLK_ZONE_COND_CLOSED:
38 		return zone->wp - zone->start;
39 	case BLK_ZONE_COND_FULL:
40 		return zone->len;
41 	case BLK_ZONE_COND_EMPTY:
42 	case BLK_ZONE_COND_OFFLINE:
43 	case BLK_ZONE_COND_READONLY:
44 	default:
45 		/*
46 		 * Offline and read-only zones do not have a valid
47 		 * write pointer. Use 0 as for an empty zone.
48 		 */
49 		return 0;
50 	}
51 }
52 
53 /* Whether or not a SCSI zone descriptor describes a gap zone. */
54 static bool sd_zbc_is_gap_zone(const u8 buf[64])
55 {
56 	return (buf[0] & 0xf) == ZBC_ZONE_TYPE_GAP;
57 }
58 
59 /**
60  * sd_zbc_parse_report - Parse a SCSI zone descriptor
61  * @sdkp: SCSI disk pointer.
62  * @buf: SCSI zone descriptor.
63  * @idx: Index of the zone relative to the first zone reported by the current
64  *	sd_zbc_report_zones() call.
65  * @cb: Callback function pointer.
66  * @data: Second argument passed to @cb.
67  *
68  * Return: Value returned by @cb.
69  *
70  * Convert a SCSI zone descriptor into struct blk_zone format. Additionally,
71  * call @cb(blk_zone, @data).
72  */
73 static int sd_zbc_parse_report(struct scsi_disk *sdkp, const u8 buf[64],
74 			       unsigned int idx, report_zones_cb cb, void *data)
75 {
76 	struct scsi_device *sdp = sdkp->device;
77 	struct blk_zone zone = { 0 };
78 	sector_t start_lba, gran;
79 	int ret;
80 
81 	if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf)))
82 		return -EINVAL;
83 
84 	zone.type = buf[0] & 0x0f;
85 	zone.cond = (buf[1] >> 4) & 0xf;
86 	if (buf[1] & 0x01)
87 		zone.reset = 1;
88 	if (buf[1] & 0x02)
89 		zone.non_seq = 1;
90 
91 	start_lba = get_unaligned_be64(&buf[16]);
92 	zone.start = logical_to_sectors(sdp, start_lba);
93 	zone.capacity = logical_to_sectors(sdp, get_unaligned_be64(&buf[8]));
94 	zone.len = zone.capacity;
95 	if (sdkp->zone_starting_lba_gran) {
96 		gran = logical_to_sectors(sdp, sdkp->zone_starting_lba_gran);
97 		if (zone.len > gran) {
98 			sd_printk(KERN_ERR, sdkp,
99 				  "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n",
100 				  start_lba,
101 				  sectors_to_logical(sdp, zone.capacity),
102 				  sectors_to_logical(sdp, zone.len),
103 				  sectors_to_logical(sdp, gran));
104 			return -EINVAL;
105 		}
106 		/*
107 		 * Use the starting LBA granularity instead of the zone length
108 		 * obtained from the REPORT ZONES command.
109 		 */
110 		zone.len = gran;
111 	}
112 	if (zone.cond == ZBC_ZONE_COND_FULL)
113 		zone.wp = zone.start + zone.len;
114 	else
115 		zone.wp = logical_to_sectors(sdp, get_unaligned_be64(&buf[24]));
116 
117 	ret = cb(&zone, idx, data);
118 	if (ret)
119 		return ret;
120 
121 	if (sdkp->rev_wp_offset)
122 		sdkp->rev_wp_offset[idx] = sd_zbc_get_zone_wp_offset(&zone);
123 
124 	return 0;
125 }
126 
127 /**
128  * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
129  * @sdkp: The target disk
130  * @buf: vmalloc-ed buffer to use for the reply
131  * @buflen: the buffer size
132  * @lba: Start LBA of the report
133  * @partial: Do partial report
134  *
135  * For internal use during device validation.
136  * Using partial=true can significantly speed up execution of a report zones
137  * command because the disk does not have to count all possible report matching
138  * zones and will only report the count of zones fitting in the command reply
139  * buffer.
140  */
141 static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
142 				  unsigned int buflen, sector_t lba,
143 				  bool partial)
144 {
145 	struct scsi_device *sdp = sdkp->device;
146 	const int timeout = sdp->request_queue->rq_timeout;
147 	struct scsi_sense_hdr sshdr;
148 	unsigned char cmd[16];
149 	unsigned int rep_len;
150 	int result;
151 
152 	memset(cmd, 0, 16);
153 	cmd[0] = ZBC_IN;
154 	cmd[1] = ZI_REPORT_ZONES;
155 	put_unaligned_be64(lba, &cmd[2]);
156 	put_unaligned_be32(buflen, &cmd[10]);
157 	if (partial)
158 		cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
159 
160 	result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
161 				  buf, buflen, &sshdr,
162 				  timeout, SD_MAX_RETRIES, NULL);
163 	if (result) {
164 		sd_printk(KERN_ERR, sdkp,
165 			  "REPORT ZONES start lba %llu failed\n", lba);
166 		sd_print_result(sdkp, "REPORT ZONES", result);
167 		if (result > 0 && scsi_sense_valid(&sshdr))
168 			sd_print_sense_hdr(sdkp, &sshdr);
169 		return -EIO;
170 	}
171 
172 	rep_len = get_unaligned_be32(&buf[0]);
173 	if (rep_len < 64) {
174 		sd_printk(KERN_ERR, sdkp,
175 			  "REPORT ZONES report invalid length %u\n",
176 			  rep_len);
177 		return -EIO;
178 	}
179 
180 	return 0;
181 }
182 
183 /**
184  * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply.
185  * @sdkp: The target disk
186  * @nr_zones: Maximum number of zones to report
187  * @buflen: Size of the buffer allocated
188  *
189  * Try to allocate a reply buffer for the number of requested zones.
190  * The size of the buffer allocated may be smaller than requested to
191  * satify the device constraint (max_hw_sectors, max_segments, etc).
192  *
193  * Return the address of the allocated buffer and update @buflen with
194  * the size of the allocated buffer.
195  */
196 static void *sd_zbc_alloc_report_buffer(struct scsi_disk *sdkp,
197 					unsigned int nr_zones, size_t *buflen)
198 {
199 	struct request_queue *q = sdkp->disk->queue;
200 	size_t bufsize;
201 	void *buf;
202 
203 	/*
204 	 * Report zone buffer size should be at most 64B times the number of
205 	 * zones requested plus the 64B reply header, but should be aligned
206 	 * to SECTOR_SIZE for ATA devices.
207 	 * Make sure that this size does not exceed the hardware capabilities.
208 	 * Furthermore, since the report zone command cannot be split, make
209 	 * sure that the allocated buffer can always be mapped by limiting the
210 	 * number of pages allocated to the HBA max segments limit.
211 	 */
212 	nr_zones = min(nr_zones, sdkp->zone_info.nr_zones);
213 	bufsize = roundup((nr_zones + 1) * 64, SECTOR_SIZE);
214 	bufsize = min_t(size_t, bufsize,
215 			queue_max_hw_sectors(q) << SECTOR_SHIFT);
216 	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
217 
218 	while (bufsize >= SECTOR_SIZE) {
219 		buf = __vmalloc(bufsize,
220 				GFP_KERNEL | __GFP_ZERO | __GFP_NORETRY);
221 		if (buf) {
222 			*buflen = bufsize;
223 			return buf;
224 		}
225 		bufsize = rounddown(bufsize >> 1, SECTOR_SIZE);
226 	}
227 
228 	return NULL;
229 }
230 
231 /**
232  * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors.
233  * @sdkp: The target disk
234  */
235 static inline sector_t sd_zbc_zone_sectors(struct scsi_disk *sdkp)
236 {
237 	return logical_to_sectors(sdkp->device, sdkp->zone_info.zone_blocks);
238 }
239 
240 /**
241  * sd_zbc_report_zones - SCSI .report_zones() callback.
242  * @disk: Disk to report zones for.
243  * @sector: Start sector.
244  * @nr_zones: Maximum number of zones to report.
245  * @cb: Callback function called to report zone information.
246  * @data: Second argument passed to @cb.
247  *
248  * Called by the block layer to iterate over zone information. See also the
249  * disk->fops->report_zones() calls in block/blk-zoned.c.
250  */
251 int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
252 			unsigned int nr_zones, report_zones_cb cb, void *data)
253 {
254 	struct scsi_disk *sdkp = scsi_disk(disk);
255 	sector_t lba = sectors_to_logical(sdkp->device, sector);
256 	unsigned int nr, i;
257 	unsigned char *buf;
258 	u64 zone_length, start_lba;
259 	size_t offset, buflen = 0;
260 	int zone_idx = 0;
261 	int ret;
262 
263 	if (!sd_is_zoned(sdkp))
264 		/* Not a zoned device */
265 		return -EOPNOTSUPP;
266 
267 	if (!sdkp->capacity)
268 		/* Device gone or invalid */
269 		return -ENODEV;
270 
271 	buf = sd_zbc_alloc_report_buffer(sdkp, nr_zones, &buflen);
272 	if (!buf)
273 		return -ENOMEM;
274 
275 	while (zone_idx < nr_zones && lba < sdkp->capacity) {
276 		ret = sd_zbc_do_report_zones(sdkp, buf, buflen, lba, true);
277 		if (ret)
278 			goto out;
279 
280 		offset = 0;
281 		nr = min(nr_zones, get_unaligned_be32(&buf[0]) / 64);
282 		if (!nr)
283 			break;
284 
285 		for (i = 0; i < nr && zone_idx < nr_zones; i++) {
286 			offset += 64;
287 			start_lba = get_unaligned_be64(&buf[offset + 16]);
288 			zone_length = get_unaligned_be64(&buf[offset + 8]);
289 			if ((zone_idx == 0 &&
290 			    (lba < start_lba ||
291 			     lba >= start_lba + zone_length)) ||
292 			    (zone_idx > 0 && start_lba != lba) ||
293 			    start_lba + zone_length < start_lba) {
294 				sd_printk(KERN_ERR, sdkp,
295 					  "Zone %d at LBA %llu is invalid: %llu + %llu\n",
296 					  zone_idx, lba, start_lba, zone_length);
297 				ret = -EINVAL;
298 				goto out;
299 			}
300 			lba = start_lba + zone_length;
301 			if (sd_zbc_is_gap_zone(&buf[offset])) {
302 				if (sdkp->zone_starting_lba_gran)
303 					continue;
304 				sd_printk(KERN_ERR, sdkp,
305 					  "Gap zone without constant LBA offsets\n");
306 				ret = -EINVAL;
307 				goto out;
308 			}
309 
310 			ret = sd_zbc_parse_report(sdkp, buf + offset, zone_idx,
311 						  cb, data);
312 			if (ret)
313 				goto out;
314 
315 			zone_idx++;
316 		}
317 	}
318 
319 	ret = zone_idx;
320 out:
321 	kvfree(buf);
322 	return ret;
323 }
324 
325 static blk_status_t sd_zbc_cmnd_checks(struct scsi_cmnd *cmd)
326 {
327 	struct request *rq = scsi_cmd_to_rq(cmd);
328 	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
329 	sector_t sector = blk_rq_pos(rq);
330 
331 	if (!sd_is_zoned(sdkp))
332 		/* Not a zoned device */
333 		return BLK_STS_IOERR;
334 
335 	if (sdkp->device->changed)
336 		return BLK_STS_IOERR;
337 
338 	if (sector & (sd_zbc_zone_sectors(sdkp) - 1))
339 		/* Unaligned request */
340 		return BLK_STS_IOERR;
341 
342 	return BLK_STS_OK;
343 }
344 
345 #define SD_ZBC_INVALID_WP_OFST	(~0u)
346 #define SD_ZBC_UPDATING_WP_OFST	(SD_ZBC_INVALID_WP_OFST - 1)
347 
348 static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
349 				    void *data)
350 {
351 	struct scsi_disk *sdkp = data;
352 
353 	lockdep_assert_held(&sdkp->zones_wp_offset_lock);
354 
355 	sdkp->zones_wp_offset[idx] = sd_zbc_get_zone_wp_offset(zone);
356 
357 	return 0;
358 }
359 
360 /*
361  * An attempt to append a zone triggered an invalid write pointer error.
362  * Reread the write pointer of the zone(s) in which the append failed.
363  */
364 static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
365 {
366 	struct scsi_disk *sdkp;
367 	unsigned long flags;
368 	sector_t zno;
369 	int ret;
370 
371 	sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
372 
373 	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
374 	for (zno = 0; zno < sdkp->zone_info.nr_zones; zno++) {
375 		if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
376 			continue;
377 
378 		spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
379 		ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
380 					     SD_BUF_SIZE,
381 					     zno * sdkp->zone_info.zone_blocks, true);
382 		spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
383 		if (!ret)
384 			sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
385 					    zno, sd_zbc_update_wp_offset_cb,
386 					    sdkp);
387 	}
388 	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
389 
390 	scsi_device_put(sdkp->device);
391 }
392 
393 /**
394  * sd_zbc_prepare_zone_append() - Prepare an emulated ZONE_APPEND command.
395  * @cmd: the command to setup
396  * @lba: the LBA to patch
397  * @nr_blocks: the number of LBAs to be written
398  *
399  * Called from sd_setup_read_write_cmnd() for REQ_OP_ZONE_APPEND.
400  * @sd_zbc_prepare_zone_append() handles the necessary zone wrote locking and
401  * patching of the lba for an emulated ZONE_APPEND command.
402  *
403  * In case the cached write pointer offset is %SD_ZBC_INVALID_WP_OFST it will
404  * schedule a REPORT ZONES command and return BLK_STS_IOERR.
405  */
406 blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
407 					unsigned int nr_blocks)
408 {
409 	struct request *rq = scsi_cmd_to_rq(cmd);
410 	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
411 	unsigned int wp_offset, zno = blk_rq_zone_no(rq);
412 	unsigned long flags;
413 	blk_status_t ret;
414 
415 	ret = sd_zbc_cmnd_checks(cmd);
416 	if (ret != BLK_STS_OK)
417 		return ret;
418 
419 	if (!blk_rq_zone_is_seq(rq))
420 		return BLK_STS_IOERR;
421 
422 	/* Unlock of the write lock will happen in sd_zbc_complete() */
423 	if (!blk_req_zone_write_trylock(rq))
424 		return BLK_STS_ZONE_RESOURCE;
425 
426 	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
427 	wp_offset = sdkp->zones_wp_offset[zno];
428 	switch (wp_offset) {
429 	case SD_ZBC_INVALID_WP_OFST:
430 		/*
431 		 * We are about to schedule work to update a zone write pointer
432 		 * offset, which will cause the zone append command to be
433 		 * requeued. So make sure that the scsi device does not go away
434 		 * while the work is being processed.
435 		 */
436 		if (scsi_device_get(sdkp->device)) {
437 			ret = BLK_STS_IOERR;
438 			break;
439 		}
440 		sdkp->zones_wp_offset[zno] = SD_ZBC_UPDATING_WP_OFST;
441 		schedule_work(&sdkp->zone_wp_offset_work);
442 		fallthrough;
443 	case SD_ZBC_UPDATING_WP_OFST:
444 		ret = BLK_STS_DEV_RESOURCE;
445 		break;
446 	default:
447 		wp_offset = sectors_to_logical(sdkp->device, wp_offset);
448 		if (wp_offset + nr_blocks > sdkp->zone_info.zone_blocks) {
449 			ret = BLK_STS_IOERR;
450 			break;
451 		}
452 
453 		*lba += wp_offset;
454 	}
455 	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
456 	if (ret)
457 		blk_req_zone_write_unlock(rq);
458 	return ret;
459 }
460 
461 /**
462  * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
463  *			can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
464  * @cmd: the command to setup
465  * @op: Operation to be performed
466  * @all: All zones control
467  *
468  * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL,
469  * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
470  */
471 blk_status_t sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd *cmd,
472 					 unsigned char op, bool all)
473 {
474 	struct request *rq = scsi_cmd_to_rq(cmd);
475 	sector_t sector = blk_rq_pos(rq);
476 	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
477 	sector_t block = sectors_to_logical(sdkp->device, sector);
478 	blk_status_t ret;
479 
480 	ret = sd_zbc_cmnd_checks(cmd);
481 	if (ret != BLK_STS_OK)
482 		return ret;
483 
484 	cmd->cmd_len = 16;
485 	memset(cmd->cmnd, 0, cmd->cmd_len);
486 	cmd->cmnd[0] = ZBC_OUT;
487 	cmd->cmnd[1] = op;
488 	if (all)
489 		cmd->cmnd[14] = 0x1;
490 	else
491 		put_unaligned_be64(block, &cmd->cmnd[2]);
492 
493 	rq->timeout = SD_TIMEOUT;
494 	cmd->sc_data_direction = DMA_NONE;
495 	cmd->transfersize = 0;
496 	cmd->allowed = 0;
497 
498 	return BLK_STS_OK;
499 }
500 
501 static bool sd_zbc_need_zone_wp_update(struct request *rq)
502 {
503 	switch (req_op(rq)) {
504 	case REQ_OP_ZONE_APPEND:
505 	case REQ_OP_ZONE_FINISH:
506 	case REQ_OP_ZONE_RESET:
507 	case REQ_OP_ZONE_RESET_ALL:
508 		return true;
509 	case REQ_OP_WRITE:
510 	case REQ_OP_WRITE_ZEROES:
511 		return blk_rq_zone_is_seq(rq);
512 	default:
513 		return false;
514 	}
515 }
516 
517 /**
518  * sd_zbc_zone_wp_update - Update cached zone write pointer upon cmd completion
519  * @cmd: Completed command
520  * @good_bytes: Command reply bytes
521  *
522  * Called from sd_zbc_complete() to handle the update of the cached zone write
523  * pointer value in case an update is needed.
524  */
525 static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
526 					  unsigned int good_bytes)
527 {
528 	int result = cmd->result;
529 	struct request *rq = scsi_cmd_to_rq(cmd);
530 	struct scsi_disk *sdkp = scsi_disk(rq->q->disk);
531 	unsigned int zno = blk_rq_zone_no(rq);
532 	enum req_op op = req_op(rq);
533 	unsigned long flags;
534 
535 	/*
536 	 * If we got an error for a command that needs updating the write
537 	 * pointer offset cache, we must mark the zone wp offset entry as
538 	 * invalid to force an update from disk the next time a zone append
539 	 * command is issued.
540 	 */
541 	spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
542 
543 	if (result && op != REQ_OP_ZONE_RESET_ALL) {
544 		if (op == REQ_OP_ZONE_APPEND) {
545 			/* Force complete completion (no retry) */
546 			good_bytes = 0;
547 			scsi_set_resid(cmd, blk_rq_bytes(rq));
548 		}
549 
550 		/*
551 		 * Force an update of the zone write pointer offset on
552 		 * the next zone append access.
553 		 */
554 		if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
555 			sdkp->zones_wp_offset[zno] = SD_ZBC_INVALID_WP_OFST;
556 		goto unlock_wp_offset;
557 	}
558 
559 	switch (op) {
560 	case REQ_OP_ZONE_APPEND:
561 		rq->__sector += sdkp->zones_wp_offset[zno];
562 		fallthrough;
563 	case REQ_OP_WRITE_ZEROES:
564 	case REQ_OP_WRITE:
565 		if (sdkp->zones_wp_offset[zno] < sd_zbc_zone_sectors(sdkp))
566 			sdkp->zones_wp_offset[zno] +=
567 						good_bytes >> SECTOR_SHIFT;
568 		break;
569 	case REQ_OP_ZONE_RESET:
570 		sdkp->zones_wp_offset[zno] = 0;
571 		break;
572 	case REQ_OP_ZONE_FINISH:
573 		sdkp->zones_wp_offset[zno] = sd_zbc_zone_sectors(sdkp);
574 		break;
575 	case REQ_OP_ZONE_RESET_ALL:
576 		memset(sdkp->zones_wp_offset, 0,
577 		       sdkp->zone_info.nr_zones * sizeof(unsigned int));
578 		break;
579 	default:
580 		break;
581 	}
582 
583 unlock_wp_offset:
584 	spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
585 
586 	return good_bytes;
587 }
588 
589 /**
590  * sd_zbc_complete - ZBC command post processing.
591  * @cmd: Completed command
592  * @good_bytes: Command reply bytes
593  * @sshdr: command sense header
594  *
595  * Called from sd_done() to handle zone commands errors and updates to the
596  * device queue zone write pointer offset cahce.
597  */
598 unsigned int sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
599 		     struct scsi_sense_hdr *sshdr)
600 {
601 	int result = cmd->result;
602 	struct request *rq = scsi_cmd_to_rq(cmd);
603 
604 	if (op_is_zone_mgmt(req_op(rq)) &&
605 	    result &&
606 	    sshdr->sense_key == ILLEGAL_REQUEST &&
607 	    sshdr->asc == 0x24) {
608 		/*
609 		 * INVALID FIELD IN CDB error: a zone management command was
610 		 * attempted on a conventional zone. Nothing to worry about,
611 		 * so be quiet about the error.
612 		 */
613 		rq->rq_flags |= RQF_QUIET;
614 	} else if (sd_zbc_need_zone_wp_update(rq))
615 		good_bytes = sd_zbc_zone_wp_update(cmd, good_bytes);
616 
617 	if (req_op(rq) == REQ_OP_ZONE_APPEND)
618 		blk_req_zone_write_unlock(rq);
619 
620 	return good_bytes;
621 }
622 
623 /**
624  * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics
625  * @sdkp: Target disk
626  * @buf: Buffer where to store the VPD page data
627  *
628  * Read VPD page B6, get information and check that reads are unconstrained.
629  */
630 static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
631 					      unsigned char *buf)
632 {
633 	u64 zone_starting_lba_gran;
634 
635 	if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
636 		sd_printk(KERN_NOTICE, sdkp,
637 			  "Read zoned characteristics VPD page failed\n");
638 		return -ENODEV;
639 	}
640 
641 	if (sdkp->device->type != TYPE_ZBC) {
642 		/* Host-aware */
643 		sdkp->urswrz = 1;
644 		sdkp->zones_optimal_open = get_unaligned_be32(&buf[8]);
645 		sdkp->zones_optimal_nonseq = get_unaligned_be32(&buf[12]);
646 		sdkp->zones_max_open = 0;
647 		return 0;
648 	}
649 
650 	/* Host-managed */
651 	sdkp->urswrz = buf[4] & 1;
652 	sdkp->zones_optimal_open = 0;
653 	sdkp->zones_optimal_nonseq = 0;
654 	sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
655 	/* Check zone alignment method */
656 	switch (buf[23] & 0xf) {
657 	case 0:
658 	case ZBC_CONSTANT_ZONE_LENGTH:
659 		/* Use zone length */
660 		break;
661 	case ZBC_CONSTANT_ZONE_START_OFFSET:
662 		zone_starting_lba_gran = get_unaligned_be64(&buf[24]);
663 		if (zone_starting_lba_gran == 0 ||
664 		    !is_power_of_2(zone_starting_lba_gran) ||
665 		    logical_to_sectors(sdkp->device, zone_starting_lba_gran) >
666 		    UINT_MAX) {
667 			sd_printk(KERN_ERR, sdkp,
668 				  "Invalid zone starting LBA granularity %llu\n",
669 				  zone_starting_lba_gran);
670 			return -ENODEV;
671 		}
672 		sdkp->zone_starting_lba_gran = zone_starting_lba_gran;
673 		break;
674 	default:
675 		sd_printk(KERN_ERR, sdkp, "Invalid zone alignment method\n");
676 		return -ENODEV;
677 	}
678 
679 	/*
680 	 * Check for unconstrained reads: host-managed devices with
681 	 * constrained reads (drives failing read after write pointer)
682 	 * are not supported.
683 	 */
684 	if (!sdkp->urswrz) {
685 		if (sdkp->first_scan)
686 			sd_printk(KERN_NOTICE, sdkp,
687 			  "constrained reads devices are not supported\n");
688 		return -ENODEV;
689 	}
690 
691 	return 0;
692 }
693 
694 /**
695  * sd_zbc_check_capacity - Check the device capacity
696  * @sdkp: Target disk
697  * @buf: command buffer
698  * @zblocks: zone size in logical blocks
699  *
700  * Get the device zone size and check that the device capacity as reported
701  * by READ CAPACITY matches the max_lba value (plus one) of the report zones
702  * command reply for devices with RC_BASIS == 0.
703  *
704  * Returns 0 upon success or an error code upon failure.
705  */
706 static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf,
707 				 u32 *zblocks)
708 {
709 	u64 zone_blocks;
710 	sector_t max_lba;
711 	unsigned char *rec;
712 	int ret;
713 
714 	/* Do a report zone to get max_lba and the size of the first zone */
715 	ret = sd_zbc_do_report_zones(sdkp, buf, SD_BUF_SIZE, 0, false);
716 	if (ret)
717 		return ret;
718 
719 	if (sdkp->rc_basis == 0) {
720 		/* The max_lba field is the capacity of this device */
721 		max_lba = get_unaligned_be64(&buf[8]);
722 		if (sdkp->capacity != max_lba + 1) {
723 			if (sdkp->first_scan)
724 				sd_printk(KERN_WARNING, sdkp,
725 					"Changing capacity from %llu to max LBA+1 %llu\n",
726 					(unsigned long long)sdkp->capacity,
727 					(unsigned long long)max_lba + 1);
728 			sdkp->capacity = max_lba + 1;
729 		}
730 	}
731 
732 	if (sdkp->zone_starting_lba_gran == 0) {
733 		/* Get the size of the first reported zone */
734 		rec = buf + 64;
735 		zone_blocks = get_unaligned_be64(&rec[8]);
736 		if (logical_to_sectors(sdkp->device, zone_blocks) > UINT_MAX) {
737 			if (sdkp->first_scan)
738 				sd_printk(KERN_NOTICE, sdkp,
739 					  "Zone size too large\n");
740 			return -EFBIG;
741 		}
742 	} else {
743 		zone_blocks = sdkp->zone_starting_lba_gran;
744 	}
745 
746 	if (!is_power_of_2(zone_blocks)) {
747 		sd_printk(KERN_ERR, sdkp,
748 			  "Zone size %llu is not a power of two.\n",
749 			  zone_blocks);
750 		return -EINVAL;
751 	}
752 
753 	*zblocks = zone_blocks;
754 
755 	return 0;
756 }
757 
758 static void sd_zbc_print_zones(struct scsi_disk *sdkp)
759 {
760 	if (!sd_is_zoned(sdkp) || !sdkp->capacity)
761 		return;
762 
763 	if (sdkp->capacity & (sdkp->zone_info.zone_blocks - 1))
764 		sd_printk(KERN_NOTICE, sdkp,
765 			  "%u zones of %u logical blocks + 1 runt zone\n",
766 			  sdkp->zone_info.nr_zones - 1,
767 			  sdkp->zone_info.zone_blocks);
768 	else
769 		sd_printk(KERN_NOTICE, sdkp,
770 			  "%u zones of %u logical blocks\n",
771 			  sdkp->zone_info.nr_zones,
772 			  sdkp->zone_info.zone_blocks);
773 }
774 
775 static int sd_zbc_init_disk(struct scsi_disk *sdkp)
776 {
777 	sdkp->zones_wp_offset = NULL;
778 	spin_lock_init(&sdkp->zones_wp_offset_lock);
779 	sdkp->rev_wp_offset = NULL;
780 	mutex_init(&sdkp->rev_mutex);
781 	INIT_WORK(&sdkp->zone_wp_offset_work, sd_zbc_update_wp_offset_workfn);
782 	sdkp->zone_wp_update_buf = kzalloc(SD_BUF_SIZE, GFP_KERNEL);
783 	if (!sdkp->zone_wp_update_buf)
784 		return -ENOMEM;
785 
786 	return 0;
787 }
788 
789 void sd_zbc_free_zone_info(struct scsi_disk *sdkp)
790 {
791 	if (!sdkp->zone_wp_update_buf)
792 		return;
793 
794 	/* Serialize against revalidate zones */
795 	mutex_lock(&sdkp->rev_mutex);
796 
797 	kvfree(sdkp->zones_wp_offset);
798 	sdkp->zones_wp_offset = NULL;
799 	kfree(sdkp->zone_wp_update_buf);
800 	sdkp->zone_wp_update_buf = NULL;
801 
802 	sdkp->early_zone_info = (struct zoned_disk_info){ };
803 	sdkp->zone_info = (struct zoned_disk_info){ };
804 
805 	mutex_unlock(&sdkp->rev_mutex);
806 }
807 
808 static void sd_zbc_revalidate_zones_cb(struct gendisk *disk)
809 {
810 	struct scsi_disk *sdkp = scsi_disk(disk);
811 
812 	swap(sdkp->zones_wp_offset, sdkp->rev_wp_offset);
813 }
814 
815 /*
816  * Call blk_revalidate_disk_zones() if any of the zoned disk properties have
817  * changed that make it necessary to call that function. Called by
818  * sd_revalidate_disk() after the gendisk capacity has been set.
819  */
820 int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
821 {
822 	struct gendisk *disk = sdkp->disk;
823 	struct request_queue *q = disk->queue;
824 	u32 zone_blocks = sdkp->early_zone_info.zone_blocks;
825 	unsigned int nr_zones = sdkp->early_zone_info.nr_zones;
826 	u32 max_append;
827 	int ret = 0;
828 	unsigned int flags;
829 
830 	/*
831 	 * For all zoned disks, initialize zone append emulation data if not
832 	 * already done. This is necessary also for host-aware disks used as
833 	 * regular disks due to the presence of partitions as these partitions
834 	 * may be deleted and the disk zoned model changed back from
835 	 * BLK_ZONED_NONE to BLK_ZONED_HA.
836 	 */
837 	if (sd_is_zoned(sdkp) && !sdkp->zone_wp_update_buf) {
838 		ret = sd_zbc_init_disk(sdkp);
839 		if (ret)
840 			return ret;
841 	}
842 
843 	/*
844 	 * There is nothing to do for regular disks, including host-aware disks
845 	 * that have partitions.
846 	 */
847 	if (!blk_queue_is_zoned(q))
848 		return 0;
849 
850 	/*
851 	 * Make sure revalidate zones are serialized to ensure exclusive
852 	 * updates of the scsi disk data.
853 	 */
854 	mutex_lock(&sdkp->rev_mutex);
855 
856 	if (sdkp->zone_info.zone_blocks == zone_blocks &&
857 	    sdkp->zone_info.nr_zones == nr_zones &&
858 	    disk->nr_zones == nr_zones)
859 		goto unlock;
860 
861 	flags = memalloc_noio_save();
862 	sdkp->zone_info.zone_blocks = zone_blocks;
863 	sdkp->zone_info.nr_zones = nr_zones;
864 	sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
865 	if (!sdkp->rev_wp_offset) {
866 		ret = -ENOMEM;
867 		memalloc_noio_restore(flags);
868 		goto unlock;
869 	}
870 
871 	ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
872 
873 	memalloc_noio_restore(flags);
874 	kvfree(sdkp->rev_wp_offset);
875 	sdkp->rev_wp_offset = NULL;
876 
877 	if (ret) {
878 		sdkp->zone_info = (struct zoned_disk_info){ };
879 		sdkp->capacity = 0;
880 		goto unlock;
881 	}
882 
883 	max_append = min_t(u32, logical_to_sectors(sdkp->device, zone_blocks),
884 			   q->limits.max_segments << (PAGE_SHIFT - 9));
885 	max_append = min_t(u32, max_append, queue_max_hw_sectors(q));
886 
887 	blk_queue_max_zone_append_sectors(q, max_append);
888 
889 	sd_zbc_print_zones(sdkp);
890 
891 unlock:
892 	mutex_unlock(&sdkp->rev_mutex);
893 
894 	return ret;
895 }
896 
897 /**
898  * sd_zbc_read_zones - Read zone information and update the request queue
899  * @sdkp: SCSI disk pointer.
900  * @buf: 512 byte buffer used for storing SCSI command output.
901  *
902  * Read zone information and update the request queue zone characteristics and
903  * also the zoned device information in *sdkp. Called by sd_revalidate_disk()
904  * before the gendisk capacity has been set.
905  */
906 int sd_zbc_read_zones(struct scsi_disk *sdkp, u8 buf[SD_BUF_SIZE])
907 {
908 	struct gendisk *disk = sdkp->disk;
909 	struct request_queue *q = disk->queue;
910 	unsigned int nr_zones;
911 	u32 zone_blocks = 0;
912 	int ret;
913 
914 	if (!sd_is_zoned(sdkp)) {
915 		/*
916 		 * Device managed or normal SCSI disk, no special handling
917 		 * required. Nevertheless, free the disk zone information in
918 		 * case the device type changed.
919 		 */
920 		sd_zbc_free_zone_info(sdkp);
921 		return 0;
922 	}
923 
924 	/* READ16/WRITE16 is mandatory for ZBC disks */
925 	sdkp->device->use_16_for_rw = 1;
926 	sdkp->device->use_10_for_rw = 0;
927 
928 	if (!blk_queue_is_zoned(q)) {
929 		/*
930 		 * This can happen for a host aware disk with partitions.
931 		 * The block device zone model was already cleared by
932 		 * disk_set_zoned(). Only free the scsi disk zone
933 		 * information and exit early.
934 		 */
935 		sd_zbc_free_zone_info(sdkp);
936 		return 0;
937 	}
938 
939 	/* Check zoned block device characteristics (unconstrained reads) */
940 	ret = sd_zbc_check_zoned_characteristics(sdkp, buf);
941 	if (ret)
942 		goto err;
943 
944 	/* Check the device capacity reported by report zones */
945 	ret = sd_zbc_check_capacity(sdkp, buf, &zone_blocks);
946 	if (ret != 0)
947 		goto err;
948 
949 	/* The drive satisfies the kernel restrictions: set it up */
950 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
951 	blk_queue_required_elevator_features(q, ELEVATOR_F_ZBD_SEQ_WRITE);
952 	if (sdkp->zones_max_open == U32_MAX)
953 		disk_set_max_open_zones(disk, 0);
954 	else
955 		disk_set_max_open_zones(disk, sdkp->zones_max_open);
956 	disk_set_max_active_zones(disk, 0);
957 	nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
958 
959 	/*
960 	 * Per ZBC and ZAC specifications, writes in sequential write required
961 	 * zones of host-managed devices must be aligned to the device physical
962 	 * block size.
963 	 */
964 	if (blk_queue_zoned_model(q) == BLK_ZONED_HM)
965 		blk_queue_zone_write_granularity(q, sdkp->physical_block_size);
966 
967 	sdkp->early_zone_info.nr_zones = nr_zones;
968 	sdkp->early_zone_info.zone_blocks = zone_blocks;
969 
970 	return 0;
971 
972 err:
973 	sdkp->capacity = 0;
974 
975 	return ret;
976 }
977