xref: /openbmc/linux/drivers/nvme/host/zns.c (revision 3a83e4e6)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4  */
5 
6 #include <linux/blkdev.h>
7 #include <linux/vmalloc.h>
8 #include "nvme.h"
9 
10 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
11 {
12 	struct nvme_command c = { };
13 	struct nvme_id_ctrl_zns *id;
14 	int status;
15 
16 	id = kzalloc(sizeof(*id), GFP_KERNEL);
17 	if (!id)
18 		return -ENOMEM;
19 
20 	c.identify.opcode = nvme_admin_identify;
21 	c.identify.cns = NVME_ID_CNS_CS_CTRL;
22 	c.identify.csi = NVME_CSI_ZNS;
23 
24 	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
25 	if (status) {
26 		kfree(id);
27 		return status;
28 	}
29 
30 	if (id->zasl)
31 		ctrl->max_zone_append = 1 << (id->zasl + 3);
32 	else
33 		ctrl->max_zone_append = ctrl->max_hw_sectors;
34 	kfree(id);
35 	return 0;
36 }
37 
38 int nvme_update_zone_info(struct gendisk *disk, struct nvme_ns *ns,
39 			  unsigned lbaf)
40 {
41 	struct nvme_effects_log *log = ns->head->effects;
42 	struct request_queue *q = disk->queue;
43 	struct nvme_command c = { };
44 	struct nvme_id_ns_zns *id;
45 	int status;
46 
47 	/* Driver requires zone append support */
48 	if (!(le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
49 			NVME_CMD_EFFECTS_CSUPP)) {
50 		dev_warn(ns->ctrl->device,
51 			"append not supported for zoned namespace:%d\n",
52 			ns->head->ns_id);
53 		return -EINVAL;
54 	}
55 
56 	/* Lazily query controller append limit for the first zoned namespace */
57 	if (!ns->ctrl->max_zone_append) {
58 		status = nvme_set_max_append(ns->ctrl);
59 		if (status)
60 			return status;
61 	}
62 
63 	id = kzalloc(sizeof(*id), GFP_KERNEL);
64 	if (!id)
65 		return -ENOMEM;
66 
67 	c.identify.opcode = nvme_admin_identify;
68 	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
69 	c.identify.cns = NVME_ID_CNS_CS_NS;
70 	c.identify.csi = NVME_CSI_ZNS;
71 
72 	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
73 	if (status)
74 		goto free_data;
75 
76 	/*
77 	 * We currently do not handle devices requiring any of the zoned
78 	 * operation characteristics.
79 	 */
80 	if (id->zoc) {
81 		dev_warn(ns->ctrl->device,
82 			"zone operations:%x not supported for namespace:%u\n",
83 			le16_to_cpu(id->zoc), ns->head->ns_id);
84 		status = -EINVAL;
85 		goto free_data;
86 	}
87 
88 	ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
89 	if (!is_power_of_2(ns->zsze)) {
90 		dev_warn(ns->ctrl->device,
91 			"invalid zone size:%llu for namespace:%u\n",
92 			ns->zsze, ns->head->ns_id);
93 		status = -EINVAL;
94 		goto free_data;
95 	}
96 
97 	q->limits.zoned = BLK_ZONED_HM;
98 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
99 	blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
100 	blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
101 free_data:
102 	kfree(id);
103 	return status;
104 }
105 
106 static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
107 					  unsigned int nr_zones, size_t *buflen)
108 {
109 	struct request_queue *q = ns->disk->queue;
110 	size_t bufsize;
111 	void *buf;
112 
113 	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
114 				   sizeof(struct nvme_zone_descriptor);
115 
116 	nr_zones = min_t(unsigned int, nr_zones,
117 			 get_capacity(ns->disk) >> ilog2(ns->zsze));
118 
119 	bufsize = sizeof(struct nvme_zone_report) +
120 		nr_zones * sizeof(struct nvme_zone_descriptor);
121 	bufsize = min_t(size_t, bufsize,
122 			queue_max_hw_sectors(q) << SECTOR_SHIFT);
123 	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
124 
125 	while (bufsize >= min_bufsize) {
126 		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
127 		if (buf) {
128 			*buflen = bufsize;
129 			return buf;
130 		}
131 		bufsize >>= 1;
132 	}
133 	return NULL;
134 }
135 
136 static int __nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
137 				  struct nvme_zone_report *report,
138 				  size_t buflen)
139 {
140 	struct nvme_command c = { };
141 	int ret;
142 
143 	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
144 	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
145 	c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
146 	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
147 	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
148 	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
149 	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
150 
151 	ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
152 	if (ret)
153 		return ret;
154 
155 	return le64_to_cpu(report->nr_zones);
156 }
157 
158 static int nvme_zone_parse_entry(struct nvme_ns *ns,
159 				 struct nvme_zone_descriptor *entry,
160 				 unsigned int idx, report_zones_cb cb,
161 				 void *data)
162 {
163 	struct blk_zone zone = { };
164 
165 	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
166 		dev_err(ns->ctrl->device, "invalid zone type %#x\n",
167 				entry->zt);
168 		return -EINVAL;
169 	}
170 
171 	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
172 	zone.cond = entry->zs >> 4;
173 	zone.len = ns->zsze;
174 	zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
175 	zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
176 	zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
177 
178 	return cb(&zone, idx, data);
179 }
180 
181 static int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
182 			unsigned int nr_zones, report_zones_cb cb, void *data)
183 {
184 	struct nvme_zone_report *report;
185 	int ret, zone_idx = 0;
186 	unsigned int nz, i;
187 	size_t buflen;
188 
189 	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
190 	if (!report)
191 		return -ENOMEM;
192 
193 	sector &= ~(ns->zsze - 1);
194 	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
195 		memset(report, 0, buflen);
196 		ret = __nvme_ns_report_zones(ns, sector, report, buflen);
197 		if (ret < 0)
198 			goto out_free;
199 
200 		nz = min_t(unsigned int, ret, nr_zones);
201 		if (!nz)
202 			break;
203 
204 		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
205 			ret = nvme_zone_parse_entry(ns, &report->entries[i],
206 						    zone_idx, cb, data);
207 			if (ret)
208 				goto out_free;
209 			zone_idx++;
210 		}
211 
212 		sector += ns->zsze * nz;
213 	}
214 
215 	if (zone_idx > 0)
216 		ret = zone_idx;
217 	else
218 		ret = -EINVAL;
219 out_free:
220 	kvfree(report);
221 	return ret;
222 }
223 
224 int nvme_report_zones(struct gendisk *disk, sector_t sector,
225 		      unsigned int nr_zones, report_zones_cb cb, void *data)
226 {
227 	struct nvme_ns_head *head = NULL;
228 	struct nvme_ns *ns;
229 	int srcu_idx, ret;
230 
231 	ns = nvme_get_ns_from_disk(disk, &head, &srcu_idx);
232 	if (unlikely(!ns))
233 		return -EWOULDBLOCK;
234 
235 	if (ns->head->ids.csi == NVME_CSI_ZNS)
236 		ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
237 	else
238 		ret = -EINVAL;
239 	nvme_put_ns_from_disk(head, srcu_idx);
240 
241 	return ret;
242 }
243 
244 blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
245 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
246 {
247 	c->zms.opcode = nvme_cmd_zone_mgmt_send;
248 	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
249 	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
250 	c->zms.zsa = action;
251 
252 	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
253 		c->zms.select_all = 1;
254 
255 	return BLK_STS_OK;
256 }
257