xref: /openbmc/linux/drivers/nvme/host/zns.c (revision d226b0a2)
1240e6ee2SKeith Busch // SPDX-License-Identifier: GPL-2.0
2240e6ee2SKeith Busch /*
3240e6ee2SKeith Busch  * Copyright (C) 2020 Western Digital Corporation or its affiliates.
4240e6ee2SKeith Busch  */
5240e6ee2SKeith Busch 
6240e6ee2SKeith Busch #include <linux/blkdev.h>
7240e6ee2SKeith Busch #include <linux/vmalloc.h>
8240e6ee2SKeith Busch #include "nvme.h"
9240e6ee2SKeith Busch 
nvme_revalidate_zones(struct nvme_ns * ns)107fad20ddSChristoph Hellwig int nvme_revalidate_zones(struct nvme_ns *ns)
117fad20ddSChristoph Hellwig {
12e6ad5598SChaitanya Kulkarni 	struct request_queue *q = ns->queue;
13e6ad5598SChaitanya Kulkarni 
14*d226b0a2SDamien Le Moal 	blk_queue_chunk_sectors(q, ns->zsze);
15e6ad5598SChaitanya Kulkarni 	blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
16*d226b0a2SDamien Le Moal 
17*d226b0a2SDamien Le Moal 	return blk_revalidate_disk_zones(ns->disk, NULL);
187fad20ddSChristoph Hellwig }
197fad20ddSChristoph Hellwig 
nvme_set_max_append(struct nvme_ctrl * ctrl)20240e6ee2SKeith Busch static int nvme_set_max_append(struct nvme_ctrl *ctrl)
21240e6ee2SKeith Busch {
22240e6ee2SKeith Busch 	struct nvme_command c = { };
23240e6ee2SKeith Busch 	struct nvme_id_ctrl_zns *id;
24240e6ee2SKeith Busch 	int status;
25240e6ee2SKeith Busch 
26240e6ee2SKeith Busch 	id = kzalloc(sizeof(*id), GFP_KERNEL);
27240e6ee2SKeith Busch 	if (!id)
28240e6ee2SKeith Busch 		return -ENOMEM;
29240e6ee2SKeith Busch 
30240e6ee2SKeith Busch 	c.identify.opcode = nvme_admin_identify;
31240e6ee2SKeith Busch 	c.identify.cns = NVME_ID_CNS_CS_CTRL;
32240e6ee2SKeith Busch 	c.identify.csi = NVME_CSI_ZNS;
33240e6ee2SKeith Busch 
34240e6ee2SKeith Busch 	status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id));
35240e6ee2SKeith Busch 	if (status) {
36240e6ee2SKeith Busch 		kfree(id);
37240e6ee2SKeith Busch 		return status;
38240e6ee2SKeith Busch 	}
39240e6ee2SKeith Busch 
40240e6ee2SKeith Busch 	if (id->zasl)
41240e6ee2SKeith Busch 		ctrl->max_zone_append = 1 << (id->zasl + 3);
42240e6ee2SKeith Busch 	else
43240e6ee2SKeith Busch 		ctrl->max_zone_append = ctrl->max_hw_sectors;
44240e6ee2SKeith Busch 	kfree(id);
45240e6ee2SKeith Busch 	return 0;
46240e6ee2SKeith Busch }
47240e6ee2SKeith Busch 
nvme_update_zone_info(struct nvme_ns * ns,unsigned lbaf)48d525c3c0SChristoph Hellwig int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
49240e6ee2SKeith Busch {
50240e6ee2SKeith Busch 	struct nvme_effects_log *log = ns->head->effects;
51d525c3c0SChristoph Hellwig 	struct request_queue *q = ns->queue;
52240e6ee2SKeith Busch 	struct nvme_command c = { };
53240e6ee2SKeith Busch 	struct nvme_id_ns_zns *id;
54240e6ee2SKeith Busch 	int status;
55240e6ee2SKeith Busch 
56240e6ee2SKeith Busch 	/* Driver requires zone append support */
572f4c9ba2SJavier González 	if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) &
58240e6ee2SKeith Busch 			NVME_CMD_EFFECTS_CSUPP)) {
592f4c9ba2SJavier González 		if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags))
60240e6ee2SKeith Busch 			dev_warn(ns->ctrl->device,
612f4c9ba2SJavier González 				 "Zone Append supported for zoned namespace:%d. Remove read-only mode\n",
62240e6ee2SKeith Busch 				 ns->head->ns_id);
632f4c9ba2SJavier González 	} else {
642f4c9ba2SJavier González 		set_bit(NVME_NS_FORCE_RO, &ns->flags);
652f4c9ba2SJavier González 		dev_warn(ns->ctrl->device,
662f4c9ba2SJavier González 			 "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n",
672f4c9ba2SJavier González 			 ns->head->ns_id);
68240e6ee2SKeith Busch 	}
69240e6ee2SKeith Busch 
70240e6ee2SKeith Busch 	/* Lazily query controller append limit for the first zoned namespace */
71240e6ee2SKeith Busch 	if (!ns->ctrl->max_zone_append) {
72240e6ee2SKeith Busch 		status = nvme_set_max_append(ns->ctrl);
73240e6ee2SKeith Busch 		if (status)
74240e6ee2SKeith Busch 			return status;
75240e6ee2SKeith Busch 	}
76240e6ee2SKeith Busch 
77240e6ee2SKeith Busch 	id = kzalloc(sizeof(*id), GFP_KERNEL);
78240e6ee2SKeith Busch 	if (!id)
79240e6ee2SKeith Busch 		return -ENOMEM;
80240e6ee2SKeith Busch 
81240e6ee2SKeith Busch 	c.identify.opcode = nvme_admin_identify;
82240e6ee2SKeith Busch 	c.identify.nsid = cpu_to_le32(ns->head->ns_id);
83240e6ee2SKeith Busch 	c.identify.cns = NVME_ID_CNS_CS_NS;
84240e6ee2SKeith Busch 	c.identify.csi = NVME_CSI_ZNS;
85240e6ee2SKeith Busch 
86240e6ee2SKeith Busch 	status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id));
87240e6ee2SKeith Busch 	if (status)
88240e6ee2SKeith Busch 		goto free_data;
89240e6ee2SKeith Busch 
90240e6ee2SKeith Busch 	/*
91240e6ee2SKeith Busch 	 * We currently do not handle devices requiring any of the zoned
92240e6ee2SKeith Busch 	 * operation characteristics.
93240e6ee2SKeith Busch 	 */
94240e6ee2SKeith Busch 	if (id->zoc) {
95240e6ee2SKeith Busch 		dev_warn(ns->ctrl->device,
96240e6ee2SKeith Busch 			"zone operations:%x not supported for namespace:%u\n",
97240e6ee2SKeith Busch 			le16_to_cpu(id->zoc), ns->head->ns_id);
98a9e0e6bcSChristoph Hellwig 		status = -ENODEV;
99240e6ee2SKeith Busch 		goto free_data;
100240e6ee2SKeith Busch 	}
101240e6ee2SKeith Busch 
102240e6ee2SKeith Busch 	ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze));
103240e6ee2SKeith Busch 	if (!is_power_of_2(ns->zsze)) {
104240e6ee2SKeith Busch 		dev_warn(ns->ctrl->device,
105240e6ee2SKeith Busch 			"invalid zone size:%llu for namespace:%u\n",
106240e6ee2SKeith Busch 			ns->zsze, ns->head->ns_id);
107a9e0e6bcSChristoph Hellwig 		status = -ENODEV;
108240e6ee2SKeith Busch 		goto free_data;
109240e6ee2SKeith Busch 	}
110240e6ee2SKeith Busch 
1116b2bd274SChristoph Hellwig 	disk_set_zoned(ns->disk, BLK_ZONED_HM);
112240e6ee2SKeith Busch 	blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
113982977dfSChristoph Hellwig 	disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1);
114982977dfSChristoph Hellwig 	disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1);
115240e6ee2SKeith Busch free_data:
116240e6ee2SKeith Busch 	kfree(id);
117240e6ee2SKeith Busch 	return status;
118240e6ee2SKeith Busch }
119240e6ee2SKeith Busch 
nvme_zns_alloc_report_buffer(struct nvme_ns * ns,unsigned int nr_zones,size_t * buflen)120240e6ee2SKeith Busch static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
121240e6ee2SKeith Busch 					  unsigned int nr_zones, size_t *buflen)
122240e6ee2SKeith Busch {
123240e6ee2SKeith Busch 	struct request_queue *q = ns->disk->queue;
124240e6ee2SKeith Busch 	size_t bufsize;
125240e6ee2SKeith Busch 	void *buf;
126240e6ee2SKeith Busch 
127240e6ee2SKeith Busch 	const size_t min_bufsize = sizeof(struct nvme_zone_report) +
128240e6ee2SKeith Busch 				   sizeof(struct nvme_zone_descriptor);
129240e6ee2SKeith Busch 
130240e6ee2SKeith Busch 	nr_zones = min_t(unsigned int, nr_zones,
131240e6ee2SKeith Busch 			 get_capacity(ns->disk) >> ilog2(ns->zsze));
132240e6ee2SKeith Busch 
133240e6ee2SKeith Busch 	bufsize = sizeof(struct nvme_zone_report) +
134240e6ee2SKeith Busch 		nr_zones * sizeof(struct nvme_zone_descriptor);
135240e6ee2SKeith Busch 	bufsize = min_t(size_t, bufsize,
136240e6ee2SKeith Busch 			queue_max_hw_sectors(q) << SECTOR_SHIFT);
137240e6ee2SKeith Busch 	bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT);
138240e6ee2SKeith Busch 
139240e6ee2SKeith Busch 	while (bufsize >= min_bufsize) {
140240e6ee2SKeith Busch 		buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY);
141240e6ee2SKeith Busch 		if (buf) {
142240e6ee2SKeith Busch 			*buflen = bufsize;
143240e6ee2SKeith Busch 			return buf;
144240e6ee2SKeith Busch 		}
145240e6ee2SKeith Busch 		bufsize >>= 1;
146240e6ee2SKeith Busch 	}
147240e6ee2SKeith Busch 	return NULL;
148240e6ee2SKeith Busch }
149240e6ee2SKeith Busch 
nvme_zone_parse_entry(struct nvme_ns * ns,struct nvme_zone_descriptor * entry,unsigned int idx,report_zones_cb cb,void * data)150240e6ee2SKeith Busch static int nvme_zone_parse_entry(struct nvme_ns *ns,
151240e6ee2SKeith Busch 				 struct nvme_zone_descriptor *entry,
152240e6ee2SKeith Busch 				 unsigned int idx, report_zones_cb cb,
153240e6ee2SKeith Busch 				 void *data)
154240e6ee2SKeith Busch {
155240e6ee2SKeith Busch 	struct blk_zone zone = { };
156240e6ee2SKeith Busch 
157240e6ee2SKeith Busch 	if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) {
158240e6ee2SKeith Busch 		dev_err(ns->ctrl->device, "invalid zone type %#x\n",
159240e6ee2SKeith Busch 				entry->zt);
160240e6ee2SKeith Busch 		return -EINVAL;
161240e6ee2SKeith Busch 	}
162240e6ee2SKeith Busch 
163240e6ee2SKeith Busch 	zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ;
164240e6ee2SKeith Busch 	zone.cond = entry->zs >> 4;
165240e6ee2SKeith Busch 	zone.len = ns->zsze;
166240e6ee2SKeith Busch 	zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap));
167240e6ee2SKeith Busch 	zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba));
168793fcab8SNiklas Cassel 	if (zone.cond == BLK_ZONE_COND_FULL)
169793fcab8SNiklas Cassel 		zone.wp = zone.start + zone.len;
170793fcab8SNiklas Cassel 	else
171240e6ee2SKeith Busch 		zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp));
172240e6ee2SKeith Busch 
173240e6ee2SKeith Busch 	return cb(&zone, idx, data);
174240e6ee2SKeith Busch }
175240e6ee2SKeith Busch 
nvme_ns_report_zones(struct nvme_ns * ns,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)1768b4fb0f9SChristoph Hellwig int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
177240e6ee2SKeith Busch 		unsigned int nr_zones, report_zones_cb cb, void *data)
178240e6ee2SKeith Busch {
179240e6ee2SKeith Busch 	struct nvme_zone_report *report;
180936fab50SChristoph Hellwig 	struct nvme_command c = { };
181240e6ee2SKeith Busch 	int ret, zone_idx = 0;
182240e6ee2SKeith Busch 	unsigned int nz, i;
183240e6ee2SKeith Busch 	size_t buflen;
184240e6ee2SKeith Busch 
185d8ca66e8SChristoph Hellwig 	if (ns->head->ids.csi != NVME_CSI_ZNS)
186d8ca66e8SChristoph Hellwig 		return -EINVAL;
187d8ca66e8SChristoph Hellwig 
188240e6ee2SKeith Busch 	report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen);
189240e6ee2SKeith Busch 	if (!report)
190240e6ee2SKeith Busch 		return -ENOMEM;
191240e6ee2SKeith Busch 
192936fab50SChristoph Hellwig 	c.zmr.opcode = nvme_cmd_zone_mgmt_recv;
193936fab50SChristoph Hellwig 	c.zmr.nsid = cpu_to_le32(ns->head->ns_id);
194936fab50SChristoph Hellwig 	c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen));
195936fab50SChristoph Hellwig 	c.zmr.zra = NVME_ZRA_ZONE_REPORT;
196936fab50SChristoph Hellwig 	c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL;
197936fab50SChristoph Hellwig 	c.zmr.pr = NVME_REPORT_ZONE_PARTIAL;
198936fab50SChristoph Hellwig 
199240e6ee2SKeith Busch 	sector &= ~(ns->zsze - 1);
200240e6ee2SKeith Busch 	while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) {
201240e6ee2SKeith Busch 		memset(report, 0, buflen);
202240e6ee2SKeith Busch 
203936fab50SChristoph Hellwig 		c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector));
204936fab50SChristoph Hellwig 		ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen);
205936fab50SChristoph Hellwig 		if (ret) {
206936fab50SChristoph Hellwig 			if (ret > 0)
207936fab50SChristoph Hellwig 				ret = -EIO;
208936fab50SChristoph Hellwig 			goto out_free;
209936fab50SChristoph Hellwig 		}
210936fab50SChristoph Hellwig 
211936fab50SChristoph Hellwig 		nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones);
212240e6ee2SKeith Busch 		if (!nz)
213240e6ee2SKeith Busch 			break;
214240e6ee2SKeith Busch 
215240e6ee2SKeith Busch 		for (i = 0; i < nz && zone_idx < nr_zones; i++) {
216240e6ee2SKeith Busch 			ret = nvme_zone_parse_entry(ns, &report->entries[i],
217240e6ee2SKeith Busch 						    zone_idx, cb, data);
218240e6ee2SKeith Busch 			if (ret)
219240e6ee2SKeith Busch 				goto out_free;
220240e6ee2SKeith Busch 			zone_idx++;
221240e6ee2SKeith Busch 		}
222240e6ee2SKeith Busch 
223240e6ee2SKeith Busch 		sector += ns->zsze * nz;
224240e6ee2SKeith Busch 	}
225240e6ee2SKeith Busch 
226240e6ee2SKeith Busch 	if (zone_idx > 0)
227240e6ee2SKeith Busch 		ret = zone_idx;
228240e6ee2SKeith Busch 	else
229240e6ee2SKeith Busch 		ret = -EINVAL;
230240e6ee2SKeith Busch out_free:
231240e6ee2SKeith Busch 	kvfree(report);
232240e6ee2SKeith Busch 	return ret;
233240e6ee2SKeith Busch }
234240e6ee2SKeith Busch 
nvme_setup_zone_mgmt_send(struct nvme_ns * ns,struct request * req,struct nvme_command * c,enum nvme_zone_mgmt_action action)235240e6ee2SKeith Busch blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
236240e6ee2SKeith Busch 		struct nvme_command *c, enum nvme_zone_mgmt_action action)
237240e6ee2SKeith Busch {
2389c3d2929SJens Axboe 	memset(c, 0, sizeof(*c));
2399c3d2929SJens Axboe 
240240e6ee2SKeith Busch 	c->zms.opcode = nvme_cmd_zone_mgmt_send;
241240e6ee2SKeith Busch 	c->zms.nsid = cpu_to_le32(ns->head->ns_id);
242240e6ee2SKeith Busch 	c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req)));
243240e6ee2SKeith Busch 	c->zms.zsa = action;
244240e6ee2SKeith Busch 
245240e6ee2SKeith Busch 	if (req_op(req) == REQ_OP_ZONE_RESET_ALL)
246240e6ee2SKeith Busch 		c->zms.select_all = 1;
247240e6ee2SKeith Busch 
248240e6ee2SKeith Busch 	return BLK_STS_OK;
249240e6ee2SKeith Busch }
250