1240e6ee2SKeith Busch // SPDX-License-Identifier: GPL-2.0 2240e6ee2SKeith Busch /* 3240e6ee2SKeith Busch * Copyright (C) 2020 Western Digital Corporation or its affiliates. 4240e6ee2SKeith Busch */ 5240e6ee2SKeith Busch 6240e6ee2SKeith Busch #include <linux/blkdev.h> 7240e6ee2SKeith Busch #include <linux/vmalloc.h> 8240e6ee2SKeith Busch #include "nvme.h" 9240e6ee2SKeith Busch 107fad20ddSChristoph Hellwig int nvme_revalidate_zones(struct nvme_ns *ns) 117fad20ddSChristoph Hellwig { 12e6ad5598SChaitanya Kulkarni struct request_queue *q = ns->queue; 13e6ad5598SChaitanya Kulkarni int ret; 14e6ad5598SChaitanya Kulkarni 15e6ad5598SChaitanya Kulkarni ret = blk_revalidate_disk_zones(ns->disk, NULL); 16e6ad5598SChaitanya Kulkarni if (!ret) 17e6ad5598SChaitanya Kulkarni blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append); 18e6ad5598SChaitanya Kulkarni return ret; 197fad20ddSChristoph Hellwig } 207fad20ddSChristoph Hellwig 21240e6ee2SKeith Busch static int nvme_set_max_append(struct nvme_ctrl *ctrl) 22240e6ee2SKeith Busch { 23240e6ee2SKeith Busch struct nvme_command c = { }; 24240e6ee2SKeith Busch struct nvme_id_ctrl_zns *id; 25240e6ee2SKeith Busch int status; 26240e6ee2SKeith Busch 27240e6ee2SKeith Busch id = kzalloc(sizeof(*id), GFP_KERNEL); 28240e6ee2SKeith Busch if (!id) 29240e6ee2SKeith Busch return -ENOMEM; 30240e6ee2SKeith Busch 31240e6ee2SKeith Busch c.identify.opcode = nvme_admin_identify; 32240e6ee2SKeith Busch c.identify.cns = NVME_ID_CNS_CS_CTRL; 33240e6ee2SKeith Busch c.identify.csi = NVME_CSI_ZNS; 34240e6ee2SKeith Busch 35240e6ee2SKeith Busch status = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); 36240e6ee2SKeith Busch if (status) { 37240e6ee2SKeith Busch kfree(id); 38240e6ee2SKeith Busch return status; 39240e6ee2SKeith Busch } 40240e6ee2SKeith Busch 41240e6ee2SKeith Busch if (id->zasl) 42240e6ee2SKeith Busch ctrl->max_zone_append = 1 << (id->zasl + 3); 43240e6ee2SKeith Busch else 44240e6ee2SKeith Busch ctrl->max_zone_append = ctrl->max_hw_sectors; 45240e6ee2SKeith Busch kfree(id); 46240e6ee2SKeith Busch return 0; 47240e6ee2SKeith Busch } 48240e6ee2SKeith Busch 49d525c3c0SChristoph Hellwig int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf) 50240e6ee2SKeith Busch { 51240e6ee2SKeith Busch struct nvme_effects_log *log = ns->head->effects; 52d525c3c0SChristoph Hellwig struct request_queue *q = ns->queue; 53240e6ee2SKeith Busch struct nvme_command c = { }; 54240e6ee2SKeith Busch struct nvme_id_ns_zns *id; 55240e6ee2SKeith Busch int status; 56240e6ee2SKeith Busch 57240e6ee2SKeith Busch /* Driver requires zone append support */ 582f4c9ba2SJavier González if ((le32_to_cpu(log->iocs[nvme_cmd_zone_append]) & 59240e6ee2SKeith Busch NVME_CMD_EFFECTS_CSUPP)) { 602f4c9ba2SJavier González if (test_and_clear_bit(NVME_NS_FORCE_RO, &ns->flags)) 61240e6ee2SKeith Busch dev_warn(ns->ctrl->device, 622f4c9ba2SJavier González "Zone Append supported for zoned namespace:%d. Remove read-only mode\n", 63240e6ee2SKeith Busch ns->head->ns_id); 642f4c9ba2SJavier González } else { 652f4c9ba2SJavier González set_bit(NVME_NS_FORCE_RO, &ns->flags); 662f4c9ba2SJavier González dev_warn(ns->ctrl->device, 672f4c9ba2SJavier González "Zone Append not supported for zoned namespace:%d. Forcing to read-only mode\n", 682f4c9ba2SJavier González ns->head->ns_id); 69240e6ee2SKeith Busch } 70240e6ee2SKeith Busch 71240e6ee2SKeith Busch /* Lazily query controller append limit for the first zoned namespace */ 72240e6ee2SKeith Busch if (!ns->ctrl->max_zone_append) { 73240e6ee2SKeith Busch status = nvme_set_max_append(ns->ctrl); 74240e6ee2SKeith Busch if (status) 75240e6ee2SKeith Busch return status; 76240e6ee2SKeith Busch } 77240e6ee2SKeith Busch 78240e6ee2SKeith Busch id = kzalloc(sizeof(*id), GFP_KERNEL); 79240e6ee2SKeith Busch if (!id) 80240e6ee2SKeith Busch return -ENOMEM; 81240e6ee2SKeith Busch 82240e6ee2SKeith Busch c.identify.opcode = nvme_admin_identify; 83240e6ee2SKeith Busch c.identify.nsid = cpu_to_le32(ns->head->ns_id); 84240e6ee2SKeith Busch c.identify.cns = NVME_ID_CNS_CS_NS; 85240e6ee2SKeith Busch c.identify.csi = NVME_CSI_ZNS; 86240e6ee2SKeith Busch 87240e6ee2SKeith Busch status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, id, sizeof(*id)); 88240e6ee2SKeith Busch if (status) 89240e6ee2SKeith Busch goto free_data; 90240e6ee2SKeith Busch 91240e6ee2SKeith Busch /* 92240e6ee2SKeith Busch * We currently do not handle devices requiring any of the zoned 93240e6ee2SKeith Busch * operation characteristics. 94240e6ee2SKeith Busch */ 95240e6ee2SKeith Busch if (id->zoc) { 96240e6ee2SKeith Busch dev_warn(ns->ctrl->device, 97240e6ee2SKeith Busch "zone operations:%x not supported for namespace:%u\n", 98240e6ee2SKeith Busch le16_to_cpu(id->zoc), ns->head->ns_id); 99a9e0e6bcSChristoph Hellwig status = -ENODEV; 100240e6ee2SKeith Busch goto free_data; 101240e6ee2SKeith Busch } 102240e6ee2SKeith Busch 103240e6ee2SKeith Busch ns->zsze = nvme_lba_to_sect(ns, le64_to_cpu(id->lbafe[lbaf].zsze)); 104240e6ee2SKeith Busch if (!is_power_of_2(ns->zsze)) { 105240e6ee2SKeith Busch dev_warn(ns->ctrl->device, 106240e6ee2SKeith Busch "invalid zone size:%llu for namespace:%u\n", 107240e6ee2SKeith Busch ns->zsze, ns->head->ns_id); 108a9e0e6bcSChristoph Hellwig status = -ENODEV; 109240e6ee2SKeith Busch goto free_data; 110240e6ee2SKeith Busch } 111240e6ee2SKeith Busch 1126b2bd274SChristoph Hellwig disk_set_zoned(ns->disk, BLK_ZONED_HM); 113240e6ee2SKeith Busch blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q); 114*982977dfSChristoph Hellwig disk_set_max_open_zones(ns->disk, le32_to_cpu(id->mor) + 1); 115*982977dfSChristoph Hellwig disk_set_max_active_zones(ns->disk, le32_to_cpu(id->mar) + 1); 116240e6ee2SKeith Busch free_data: 117240e6ee2SKeith Busch kfree(id); 118240e6ee2SKeith Busch return status; 119240e6ee2SKeith Busch } 120240e6ee2SKeith Busch 121240e6ee2SKeith Busch static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns, 122240e6ee2SKeith Busch unsigned int nr_zones, size_t *buflen) 123240e6ee2SKeith Busch { 124240e6ee2SKeith Busch struct request_queue *q = ns->disk->queue; 125240e6ee2SKeith Busch size_t bufsize; 126240e6ee2SKeith Busch void *buf; 127240e6ee2SKeith Busch 128240e6ee2SKeith Busch const size_t min_bufsize = sizeof(struct nvme_zone_report) + 129240e6ee2SKeith Busch sizeof(struct nvme_zone_descriptor); 130240e6ee2SKeith Busch 131240e6ee2SKeith Busch nr_zones = min_t(unsigned int, nr_zones, 132240e6ee2SKeith Busch get_capacity(ns->disk) >> ilog2(ns->zsze)); 133240e6ee2SKeith Busch 134240e6ee2SKeith Busch bufsize = sizeof(struct nvme_zone_report) + 135240e6ee2SKeith Busch nr_zones * sizeof(struct nvme_zone_descriptor); 136240e6ee2SKeith Busch bufsize = min_t(size_t, bufsize, 137240e6ee2SKeith Busch queue_max_hw_sectors(q) << SECTOR_SHIFT); 138240e6ee2SKeith Busch bufsize = min_t(size_t, bufsize, queue_max_segments(q) << PAGE_SHIFT); 139240e6ee2SKeith Busch 140240e6ee2SKeith Busch while (bufsize >= min_bufsize) { 141240e6ee2SKeith Busch buf = __vmalloc(bufsize, GFP_KERNEL | __GFP_NORETRY); 142240e6ee2SKeith Busch if (buf) { 143240e6ee2SKeith Busch *buflen = bufsize; 144240e6ee2SKeith Busch return buf; 145240e6ee2SKeith Busch } 146240e6ee2SKeith Busch bufsize >>= 1; 147240e6ee2SKeith Busch } 148240e6ee2SKeith Busch return NULL; 149240e6ee2SKeith Busch } 150240e6ee2SKeith Busch 151240e6ee2SKeith Busch static int nvme_zone_parse_entry(struct nvme_ns *ns, 152240e6ee2SKeith Busch struct nvme_zone_descriptor *entry, 153240e6ee2SKeith Busch unsigned int idx, report_zones_cb cb, 154240e6ee2SKeith Busch void *data) 155240e6ee2SKeith Busch { 156240e6ee2SKeith Busch struct blk_zone zone = { }; 157240e6ee2SKeith Busch 158240e6ee2SKeith Busch if ((entry->zt & 0xf) != NVME_ZONE_TYPE_SEQWRITE_REQ) { 159240e6ee2SKeith Busch dev_err(ns->ctrl->device, "invalid zone type %#x\n", 160240e6ee2SKeith Busch entry->zt); 161240e6ee2SKeith Busch return -EINVAL; 162240e6ee2SKeith Busch } 163240e6ee2SKeith Busch 164240e6ee2SKeith Busch zone.type = BLK_ZONE_TYPE_SEQWRITE_REQ; 165240e6ee2SKeith Busch zone.cond = entry->zs >> 4; 166240e6ee2SKeith Busch zone.len = ns->zsze; 167240e6ee2SKeith Busch zone.capacity = nvme_lba_to_sect(ns, le64_to_cpu(entry->zcap)); 168240e6ee2SKeith Busch zone.start = nvme_lba_to_sect(ns, le64_to_cpu(entry->zslba)); 169793fcab8SNiklas Cassel if (zone.cond == BLK_ZONE_COND_FULL) 170793fcab8SNiklas Cassel zone.wp = zone.start + zone.len; 171793fcab8SNiklas Cassel else 172240e6ee2SKeith Busch zone.wp = nvme_lba_to_sect(ns, le64_to_cpu(entry->wp)); 173240e6ee2SKeith Busch 174240e6ee2SKeith Busch return cb(&zone, idx, data); 175240e6ee2SKeith Busch } 176240e6ee2SKeith Busch 1778b4fb0f9SChristoph Hellwig int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector, 178240e6ee2SKeith Busch unsigned int nr_zones, report_zones_cb cb, void *data) 179240e6ee2SKeith Busch { 180240e6ee2SKeith Busch struct nvme_zone_report *report; 181936fab50SChristoph Hellwig struct nvme_command c = { }; 182240e6ee2SKeith Busch int ret, zone_idx = 0; 183240e6ee2SKeith Busch unsigned int nz, i; 184240e6ee2SKeith Busch size_t buflen; 185240e6ee2SKeith Busch 186d8ca66e8SChristoph Hellwig if (ns->head->ids.csi != NVME_CSI_ZNS) 187d8ca66e8SChristoph Hellwig return -EINVAL; 188d8ca66e8SChristoph Hellwig 189240e6ee2SKeith Busch report = nvme_zns_alloc_report_buffer(ns, nr_zones, &buflen); 190240e6ee2SKeith Busch if (!report) 191240e6ee2SKeith Busch return -ENOMEM; 192240e6ee2SKeith Busch 193936fab50SChristoph Hellwig c.zmr.opcode = nvme_cmd_zone_mgmt_recv; 194936fab50SChristoph Hellwig c.zmr.nsid = cpu_to_le32(ns->head->ns_id); 195936fab50SChristoph Hellwig c.zmr.numd = cpu_to_le32(nvme_bytes_to_numd(buflen)); 196936fab50SChristoph Hellwig c.zmr.zra = NVME_ZRA_ZONE_REPORT; 197936fab50SChristoph Hellwig c.zmr.zrasf = NVME_ZRASF_ZONE_REPORT_ALL; 198936fab50SChristoph Hellwig c.zmr.pr = NVME_REPORT_ZONE_PARTIAL; 199936fab50SChristoph Hellwig 200240e6ee2SKeith Busch sector &= ~(ns->zsze - 1); 201240e6ee2SKeith Busch while (zone_idx < nr_zones && sector < get_capacity(ns->disk)) { 202240e6ee2SKeith Busch memset(report, 0, buflen); 203240e6ee2SKeith Busch 204936fab50SChristoph Hellwig c.zmr.slba = cpu_to_le64(nvme_sect_to_lba(ns, sector)); 205936fab50SChristoph Hellwig ret = nvme_submit_sync_cmd(ns->queue, &c, report, buflen); 206936fab50SChristoph Hellwig if (ret) { 207936fab50SChristoph Hellwig if (ret > 0) 208936fab50SChristoph Hellwig ret = -EIO; 209936fab50SChristoph Hellwig goto out_free; 210936fab50SChristoph Hellwig } 211936fab50SChristoph Hellwig 212936fab50SChristoph Hellwig nz = min((unsigned int)le64_to_cpu(report->nr_zones), nr_zones); 213240e6ee2SKeith Busch if (!nz) 214240e6ee2SKeith Busch break; 215240e6ee2SKeith Busch 216240e6ee2SKeith Busch for (i = 0; i < nz && zone_idx < nr_zones; i++) { 217240e6ee2SKeith Busch ret = nvme_zone_parse_entry(ns, &report->entries[i], 218240e6ee2SKeith Busch zone_idx, cb, data); 219240e6ee2SKeith Busch if (ret) 220240e6ee2SKeith Busch goto out_free; 221240e6ee2SKeith Busch zone_idx++; 222240e6ee2SKeith Busch } 223240e6ee2SKeith Busch 224240e6ee2SKeith Busch sector += ns->zsze * nz; 225240e6ee2SKeith Busch } 226240e6ee2SKeith Busch 227240e6ee2SKeith Busch if (zone_idx > 0) 228240e6ee2SKeith Busch ret = zone_idx; 229240e6ee2SKeith Busch else 230240e6ee2SKeith Busch ret = -EINVAL; 231240e6ee2SKeith Busch out_free: 232240e6ee2SKeith Busch kvfree(report); 233240e6ee2SKeith Busch return ret; 234240e6ee2SKeith Busch } 235240e6ee2SKeith Busch 236240e6ee2SKeith Busch blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req, 237240e6ee2SKeith Busch struct nvme_command *c, enum nvme_zone_mgmt_action action) 238240e6ee2SKeith Busch { 2399c3d2929SJens Axboe memset(c, 0, sizeof(*c)); 2409c3d2929SJens Axboe 241240e6ee2SKeith Busch c->zms.opcode = nvme_cmd_zone_mgmt_send; 242240e6ee2SKeith Busch c->zms.nsid = cpu_to_le32(ns->head->ns_id); 243240e6ee2SKeith Busch c->zms.slba = cpu_to_le64(nvme_sect_to_lba(ns, blk_rq_pos(req))); 244240e6ee2SKeith Busch c->zms.zsa = action; 245240e6ee2SKeith Busch 246240e6ee2SKeith Busch if (req_op(req) == REQ_OP_ZONE_RESET_ALL) 247240e6ee2SKeith Busch c->zms.select_all = 1; 248240e6ee2SKeith Busch 249240e6ee2SKeith Busch return BLK_STS_OK; 250240e6ee2SKeith Busch } 251