1bc50ad75SChristoph Hellwig // SPDX-License-Identifier: GPL-2.0
232acab31SChristoph Hellwig /*
30d0b660fSChristoph Hellwig * Copyright (c) 2017-2018 Christoph Hellwig.
432acab31SChristoph Hellwig */
532acab31SChristoph Hellwig
6b2ce4d90SKeith Busch #include <linux/backing-dev.h>
732acab31SChristoph Hellwig #include <linux/moduleparam.h>
85e6a7d1eSHannes Reinecke #include <linux/vmalloc.h>
92796b569SHannes Reinecke #include <trace/events/block.h>
1032acab31SChristoph Hellwig #include "nvme.h"
1132acab31SChristoph Hellwig
12b739e137SChristoph Hellwig bool multipath = true;
135cadde80SKeith Busch module_param(multipath, bool, 0444);
1432acab31SChristoph Hellwig MODULE_PARM_DESC(multipath,
1532acab31SChristoph Hellwig "turn on native support for multiple controllers per subsystem");
1632acab31SChristoph Hellwig
17e3d34794SHannes Reinecke static const char *nvme_iopolicy_names[] = {
18e3d34794SHannes Reinecke [NVME_IOPOLICY_NUMA] = "numa",
19e3d34794SHannes Reinecke [NVME_IOPOLICY_RR] = "round-robin",
2085b9f3e6SThomas Song [NVME_IOPOLICY_QD] = "queue-depth",
21e3d34794SHannes Reinecke };
22e3d34794SHannes Reinecke
23e3d34794SHannes Reinecke static int iopolicy = NVME_IOPOLICY_NUMA;
24e3d34794SHannes Reinecke
nvme_set_iopolicy(const char * val,const struct kernel_param * kp)25e3d34794SHannes Reinecke static int nvme_set_iopolicy(const char *val, const struct kernel_param *kp)
26e3d34794SHannes Reinecke {
27e3d34794SHannes Reinecke if (!val)
28e3d34794SHannes Reinecke return -EINVAL;
29e3d34794SHannes Reinecke if (!strncmp(val, "numa", 4))
30e3d34794SHannes Reinecke iopolicy = NVME_IOPOLICY_NUMA;
31e3d34794SHannes Reinecke else if (!strncmp(val, "round-robin", 11))
32e3d34794SHannes Reinecke iopolicy = NVME_IOPOLICY_RR;
3385b9f3e6SThomas Song else if (!strncmp(val, "queue-depth", 11))
3485b9f3e6SThomas Song iopolicy = NVME_IOPOLICY_QD;
35e3d34794SHannes Reinecke else
36e3d34794SHannes Reinecke return -EINVAL;
37e3d34794SHannes Reinecke
38e3d34794SHannes Reinecke return 0;
39e3d34794SHannes Reinecke }
40e3d34794SHannes Reinecke
nvme_get_iopolicy(char * buf,const struct kernel_param * kp)41e3d34794SHannes Reinecke static int nvme_get_iopolicy(char *buf, const struct kernel_param *kp)
42e3d34794SHannes Reinecke {
43e3d34794SHannes Reinecke return sprintf(buf, "%s\n", nvme_iopolicy_names[iopolicy]);
44e3d34794SHannes Reinecke }
45e3d34794SHannes Reinecke
46e3d34794SHannes Reinecke module_param_call(iopolicy, nvme_set_iopolicy, nvme_get_iopolicy,
47e3d34794SHannes Reinecke &iopolicy, 0644);
48e3d34794SHannes Reinecke MODULE_PARM_DESC(iopolicy,
4985b9f3e6SThomas Song "Default multipath I/O policy; 'numa' (default), 'round-robin' or 'queue-depth'");
50e3d34794SHannes Reinecke
nvme_mpath_default_iopolicy(struct nvme_subsystem * subsys)51e3d34794SHannes Reinecke void nvme_mpath_default_iopolicy(struct nvme_subsystem *subsys)
52e3d34794SHannes Reinecke {
53e3d34794SHannes Reinecke subsys->iopolicy = iopolicy;
54e3d34794SHannes Reinecke }
55e3d34794SHannes Reinecke
nvme_mpath_unfreeze(struct nvme_subsystem * subsys)56b9156daeSSagi Grimberg void nvme_mpath_unfreeze(struct nvme_subsystem *subsys)
57b9156daeSSagi Grimberg {
58b9156daeSSagi Grimberg struct nvme_ns_head *h;
59b9156daeSSagi Grimberg
60b9156daeSSagi Grimberg lockdep_assert_held(&subsys->lock);
61b9156daeSSagi Grimberg list_for_each_entry(h, &subsys->nsheads, entry)
62b9156daeSSagi Grimberg if (h->disk)
63b9156daeSSagi Grimberg blk_mq_unfreeze_queue(h->disk->queue);
64b9156daeSSagi Grimberg }
65b9156daeSSagi Grimberg
nvme_mpath_wait_freeze(struct nvme_subsystem * subsys)66b9156daeSSagi Grimberg void nvme_mpath_wait_freeze(struct nvme_subsystem *subsys)
67b9156daeSSagi Grimberg {
68b9156daeSSagi Grimberg struct nvme_ns_head *h;
69b9156daeSSagi Grimberg
70b9156daeSSagi Grimberg lockdep_assert_held(&subsys->lock);
71b9156daeSSagi Grimberg list_for_each_entry(h, &subsys->nsheads, entry)
72b9156daeSSagi Grimberg if (h->disk)
73b9156daeSSagi Grimberg blk_mq_freeze_queue_wait(h->disk->queue);
74b9156daeSSagi Grimberg }
75b9156daeSSagi Grimberg
nvme_mpath_start_freeze(struct nvme_subsystem * subsys)76b9156daeSSagi Grimberg void nvme_mpath_start_freeze(struct nvme_subsystem *subsys)
77b9156daeSSagi Grimberg {
78b9156daeSSagi Grimberg struct nvme_ns_head *h;
79b9156daeSSagi Grimberg
80b9156daeSSagi Grimberg lockdep_assert_held(&subsys->lock);
81b9156daeSSagi Grimberg list_for_each_entry(h, &subsys->nsheads, entry)
82b9156daeSSagi Grimberg if (h->disk)
83b9156daeSSagi Grimberg blk_freeze_queue_start(h->disk->queue);
84b9156daeSSagi Grimberg }
85b9156daeSSagi Grimberg
nvme_failover_req(struct request * req)865ddaabe8SChristoph Hellwig void nvme_failover_req(struct request *req)
8732acab31SChristoph Hellwig {
8832acab31SChristoph Hellwig struct nvme_ns *ns = req->q->queuedata;
895ddaabe8SChristoph Hellwig u16 status = nvme_req(req)->status & 0x7ff;
9032acab31SChristoph Hellwig unsigned long flags;
91ce86dad2SDaniel Wagner struct bio *bio;
9232acab31SChristoph Hellwig
930d0b660fSChristoph Hellwig nvme_mpath_clear_current_path(ns);
945ddaabe8SChristoph Hellwig
955ddaabe8SChristoph Hellwig /*
965ddaabe8SChristoph Hellwig * If we got back an ANA error, we know the controller is alive but not
975ddaabe8SChristoph Hellwig * ready to serve this namespace. Kick of a re-read of the ANA
985ddaabe8SChristoph Hellwig * information page, and just try any other available path for now.
995ddaabe8SChristoph Hellwig */
1005ddaabe8SChristoph Hellwig if (nvme_is_ana_error(status) && ns->ctrl->ana_log_buf) {
1010d0b660fSChristoph Hellwig set_bit(NVME_NS_ANA_PENDING, &ns->flags);
1020d0b660fSChristoph Hellwig queue_work(nvme_wq, &ns->ctrl->ana_work);
1030d0b660fSChristoph Hellwig }
1040d0b660fSChristoph Hellwig
105764e9332SJohn Meneghini spin_lock_irqsave(&ns->head->requeue_lock, flags);
106c712dcccSChristoph Hellwig for (bio = req->bio; bio; bio = bio->bi_next) {
107ce86dad2SDaniel Wagner bio_set_dev(bio, ns->head->disk->part0);
108c712dcccSChristoph Hellwig if (bio->bi_opf & REQ_POLLED) {
109c712dcccSChristoph Hellwig bio->bi_opf &= ~REQ_POLLED;
110c712dcccSChristoph Hellwig bio->bi_cookie = BLK_QC_T_NONE;
111c712dcccSChristoph Hellwig }
11299160af4SSagi Grimberg /*
11399160af4SSagi Grimberg * The alternate request queue that we may end up submitting
11499160af4SSagi Grimberg * the bio to may be frozen temporarily, in this case REQ_NOWAIT
11599160af4SSagi Grimberg * will fail the I/O immediately with EAGAIN to the issuer.
11699160af4SSagi Grimberg * We are not in the issuer context which cannot block. Clear
11799160af4SSagi Grimberg * the flag to avoid spurious EAGAIN I/O failures.
11899160af4SSagi Grimberg */
11999160af4SSagi Grimberg bio->bi_opf &= ~REQ_NOWAIT;
120c712dcccSChristoph Hellwig }
121764e9332SJohn Meneghini blk_steal_bios(&ns->head->requeue_list, req);
122764e9332SJohn Meneghini spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
123764e9332SJohn Meneghini
12436989c68SKeith Busch nvme_req(req)->status = 0;
12536989c68SKeith Busch nvme_end_req(req);
12632acab31SChristoph Hellwig kblockd_schedule_work(&ns->head->requeue_work);
12732acab31SChristoph Hellwig }
12832acab31SChristoph Hellwig
nvme_mpath_start_request(struct request * rq)129d4d957b5SSagi Grimberg void nvme_mpath_start_request(struct request *rq)
130d4d957b5SSagi Grimberg {
131d4d957b5SSagi Grimberg struct nvme_ns *ns = rq->q->queuedata;
132d4d957b5SSagi Grimberg struct gendisk *disk = ns->head->disk;
133d4d957b5SSagi Grimberg
13485b9f3e6SThomas Song if (READ_ONCE(ns->head->subsys->iopolicy) == NVME_IOPOLICY_QD) {
13585b9f3e6SThomas Song atomic_inc(&ns->ctrl->nr_active);
13685b9f3e6SThomas Song nvme_req(rq)->flags |= NVME_MPATH_CNT_ACTIVE;
13785b9f3e6SThomas Song }
13885b9f3e6SThomas Song
139d4d957b5SSagi Grimberg if (!blk_queue_io_stat(disk->queue) || blk_rq_is_passthrough(rq))
140d4d957b5SSagi Grimberg return;
141d4d957b5SSagi Grimberg
142d4d957b5SSagi Grimberg nvme_req(rq)->flags |= NVME_MPATH_IO_STATS;
1435f275713SYu Kuai nvme_req(rq)->start_time = bdev_start_io_acct(disk->part0, req_op(rq),
1445f275713SYu Kuai jiffies);
145d4d957b5SSagi Grimberg }
146d4d957b5SSagi Grimberg EXPORT_SYMBOL_GPL(nvme_mpath_start_request);
147d4d957b5SSagi Grimberg
nvme_mpath_end_request(struct request * rq)148d4d957b5SSagi Grimberg void nvme_mpath_end_request(struct request *rq)
149d4d957b5SSagi Grimberg {
150d4d957b5SSagi Grimberg struct nvme_ns *ns = rq->q->queuedata;
151d4d957b5SSagi Grimberg
15285b9f3e6SThomas Song if (nvme_req(rq)->flags & NVME_MPATH_CNT_ACTIVE)
15385b9f3e6SThomas Song atomic_dec_if_positive(&ns->ctrl->nr_active);
15485b9f3e6SThomas Song
155d4d957b5SSagi Grimberg if (!(nvme_req(rq)->flags & NVME_MPATH_IO_STATS))
156d4d957b5SSagi Grimberg return;
157d4d957b5SSagi Grimberg bdev_end_io_acct(ns->head->disk->part0, req_op(rq),
1585f275713SYu Kuai blk_rq_bytes(rq) >> SECTOR_SHIFT,
159d4d957b5SSagi Grimberg nvme_req(rq)->start_time);
160d4d957b5SSagi Grimberg }
161d4d957b5SSagi Grimberg
nvme_kick_requeue_lists(struct nvme_ctrl * ctrl)16232acab31SChristoph Hellwig void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl)
16332acab31SChristoph Hellwig {
16432acab31SChristoph Hellwig struct nvme_ns *ns;
16582f20194SKeith Busch int srcu_idx;
16632acab31SChristoph Hellwig
16782f20194SKeith Busch srcu_idx = srcu_read_lock(&ctrl->srcu);
168*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
169*1e20e4ffSBreno Leitao srcu_read_lock_held(&ctrl->srcu)) {
170f6f09c15SHannes Reinecke if (!ns->head->disk)
171f6f09c15SHannes Reinecke continue;
17232acab31SChristoph Hellwig kblockd_schedule_work(&ns->head->requeue_work);
173f6f09c15SHannes Reinecke if (ctrl->state == NVME_CTRL_LIVE)
174f6f09c15SHannes Reinecke disk_uevent(ns->head->disk, KOBJ_CHANGE);
17532acab31SChristoph Hellwig }
17682f20194SKeith Busch srcu_read_unlock(&ctrl->srcu, srcu_idx);
17732acab31SChristoph Hellwig }
17832acab31SChristoph Hellwig
1790d0b660fSChristoph Hellwig static const char *nvme_ana_state_names[] = {
1800d0b660fSChristoph Hellwig [0] = "invalid state",
1810d0b660fSChristoph Hellwig [NVME_ANA_OPTIMIZED] = "optimized",
1820d0b660fSChristoph Hellwig [NVME_ANA_NONOPTIMIZED] = "non-optimized",
1830d0b660fSChristoph Hellwig [NVME_ANA_INACCESSIBLE] = "inaccessible",
1840d0b660fSChristoph Hellwig [NVME_ANA_PERSISTENT_LOSS] = "persistent-loss",
1850d0b660fSChristoph Hellwig [NVME_ANA_CHANGE] = "change",
1860d0b660fSChristoph Hellwig };
1870d0b660fSChristoph Hellwig
nvme_mpath_clear_current_path(struct nvme_ns * ns)1880157ec8dSSagi Grimberg bool nvme_mpath_clear_current_path(struct nvme_ns *ns)
18932acab31SChristoph Hellwig {
190f3334447SChristoph Hellwig struct nvme_ns_head *head = ns->head;
1910157ec8dSSagi Grimberg bool changed = false;
192f3334447SChristoph Hellwig int node;
193f3334447SChristoph Hellwig
194f3334447SChristoph Hellwig if (!head)
1950157ec8dSSagi Grimberg goto out;
196f3334447SChristoph Hellwig
197f3334447SChristoph Hellwig for_each_node(node) {
1980157ec8dSSagi Grimberg if (ns == rcu_access_pointer(head->current_path[node])) {
199f3334447SChristoph Hellwig rcu_assign_pointer(head->current_path[node], NULL);
2000157ec8dSSagi Grimberg changed = true;
201f3334447SChristoph Hellwig }
202f3334447SChristoph Hellwig }
2030157ec8dSSagi Grimberg out:
2040157ec8dSSagi Grimberg return changed;
2050157ec8dSSagi Grimberg }
2060157ec8dSSagi Grimberg
nvme_mpath_clear_ctrl_paths(struct nvme_ctrl * ctrl)2070157ec8dSSagi Grimberg void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
2080157ec8dSSagi Grimberg {
2090157ec8dSSagi Grimberg struct nvme_ns *ns;
21082f20194SKeith Busch int srcu_idx;
2110157ec8dSSagi Grimberg
21282f20194SKeith Busch srcu_idx = srcu_read_lock(&ctrl->srcu);
213*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
214*1e20e4ffSBreno Leitao srcu_read_lock_held(&ctrl->srcu)) {
2152b81a5f0SHannes Reinecke nvme_mpath_clear_current_path(ns);
2160157ec8dSSagi Grimberg kblockd_schedule_work(&ns->head->requeue_work);
2172b81a5f0SHannes Reinecke }
21882f20194SKeith Busch srcu_read_unlock(&ctrl->srcu, srcu_idx);
2190157ec8dSSagi Grimberg }
220f3334447SChristoph Hellwig
nvme_mpath_revalidate_paths(struct nvme_ns * ns)221e7d65803SHannes Reinecke void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
222e7d65803SHannes Reinecke {
223e7d65803SHannes Reinecke struct nvme_ns_head *head = ns->head;
224e7d65803SHannes Reinecke sector_t capacity = get_capacity(head->disk);
225e7d65803SHannes Reinecke int node;
226899d2a05SCaleb Sander int srcu_idx;
227e7d65803SHannes Reinecke
228899d2a05SCaleb Sander srcu_idx = srcu_read_lock(&head->srcu);
229*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &head->list, siblings,
230*1e20e4ffSBreno Leitao srcu_read_lock_held(&head->srcu)) {
231e7d65803SHannes Reinecke if (capacity != get_capacity(ns->disk))
232e7d65803SHannes Reinecke clear_bit(NVME_NS_READY, &ns->flags);
233e7d65803SHannes Reinecke }
234899d2a05SCaleb Sander srcu_read_unlock(&head->srcu, srcu_idx);
235e7d65803SHannes Reinecke
236e7d65803SHannes Reinecke for_each_node(node)
237e7d65803SHannes Reinecke rcu_assign_pointer(head->current_path[node], NULL);
23872e3b888SSagi Grimberg kblockd_schedule_work(&head->requeue_work);
239e7d65803SHannes Reinecke }
240e7d65803SHannes Reinecke
nvme_path_is_disabled(struct nvme_ns * ns)241ca7ae5c9SHannes Reinecke static bool nvme_path_is_disabled(struct nvme_ns *ns)
242ca7ae5c9SHannes Reinecke {
243ecca390eSSagi Grimberg /*
244ecca390eSSagi Grimberg * We don't treat NVME_CTRL_DELETING as a disabled path as I/O should
245ecca390eSSagi Grimberg * still be able to complete assuming that the controller is connected.
246ecca390eSSagi Grimberg * Otherwise it will fail immediately and return to the requeue list.
247ecca390eSSagi Grimberg */
248ecca390eSSagi Grimberg if (ns->ctrl->state != NVME_CTRL_LIVE &&
249ecca390eSSagi Grimberg ns->ctrl->state != NVME_CTRL_DELETING)
250ecca390eSSagi Grimberg return true;
251ecca390eSSagi Grimberg if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
252e7d65803SHannes Reinecke !test_bit(NVME_NS_READY, &ns->flags))
253ecca390eSSagi Grimberg return true;
254ecca390eSSagi Grimberg return false;
255ca7ae5c9SHannes Reinecke }
256ca7ae5c9SHannes Reinecke
__nvme_find_path(struct nvme_ns_head * head,int node)257f3334447SChristoph Hellwig static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
258f3334447SChristoph Hellwig {
259f3334447SChristoph Hellwig int found_distance = INT_MAX, fallback_distance = INT_MAX, distance;
260f3334447SChristoph Hellwig struct nvme_ns *found = NULL, *fallback = NULL, *ns;
26132acab31SChristoph Hellwig
262*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &head->list, siblings,
263*1e20e4ffSBreno Leitao srcu_read_lock_held(&head->srcu)) {
264ca7ae5c9SHannes Reinecke if (nvme_path_is_disabled(ns))
2650d0b660fSChristoph Hellwig continue;
266f3334447SChristoph Hellwig
2678871cab4SNilay Shroff if (ns->ctrl->numa_node != NUMA_NO_NODE &&
2688871cab4SNilay Shroff READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
269103e515eSHannes Reinecke distance = node_distance(node, ns->ctrl->numa_node);
27075c10e73SHannes Reinecke else
27175c10e73SHannes Reinecke distance = LOCAL_DISTANCE;
272f3334447SChristoph Hellwig
2730d0b660fSChristoph Hellwig switch (ns->ana_state) {
2740d0b660fSChristoph Hellwig case NVME_ANA_OPTIMIZED:
275f3334447SChristoph Hellwig if (distance < found_distance) {
276f3334447SChristoph Hellwig found_distance = distance;
277f3334447SChristoph Hellwig found = ns;
278f3334447SChristoph Hellwig }
279f3334447SChristoph Hellwig break;
2800d0b660fSChristoph Hellwig case NVME_ANA_NONOPTIMIZED:
281f3334447SChristoph Hellwig if (distance < fallback_distance) {
282f3334447SChristoph Hellwig fallback_distance = distance;
2830d0b660fSChristoph Hellwig fallback = ns;
284f3334447SChristoph Hellwig }
2850d0b660fSChristoph Hellwig break;
2860d0b660fSChristoph Hellwig default:
2870d0b660fSChristoph Hellwig break;
28832acab31SChristoph Hellwig }
28932acab31SChristoph Hellwig }
29032acab31SChristoph Hellwig
291f3334447SChristoph Hellwig if (!found)
292f3334447SChristoph Hellwig found = fallback;
293f3334447SChristoph Hellwig if (found)
294f3334447SChristoph Hellwig rcu_assign_pointer(head->current_path[node], found);
295f3334447SChristoph Hellwig return found;
2960d0b660fSChristoph Hellwig }
2970d0b660fSChristoph Hellwig
nvme_next_ns(struct nvme_ns_head * head,struct nvme_ns * ns)29875c10e73SHannes Reinecke static struct nvme_ns *nvme_next_ns(struct nvme_ns_head *head,
29975c10e73SHannes Reinecke struct nvme_ns *ns)
30075c10e73SHannes Reinecke {
30175c10e73SHannes Reinecke ns = list_next_or_null_rcu(&head->list, &ns->siblings, struct nvme_ns,
30275c10e73SHannes Reinecke siblings);
30375c10e73SHannes Reinecke if (ns)
30475c10e73SHannes Reinecke return ns;
30575c10e73SHannes Reinecke return list_first_or_null_rcu(&head->list, struct nvme_ns, siblings);
30675c10e73SHannes Reinecke }
30775c10e73SHannes Reinecke
nvme_round_robin_path(struct nvme_ns_head * head)308a7071e2bSJohn Meneghini static struct nvme_ns *nvme_round_robin_path(struct nvme_ns_head *head)
30975c10e73SHannes Reinecke {
310e398863bSMartin Wilck struct nvme_ns *ns, *found = NULL;
311a7071e2bSJohn Meneghini int node = numa_node_id();
312a7071e2bSJohn Meneghini struct nvme_ns *old = srcu_dereference(head->current_path[node],
313a7071e2bSJohn Meneghini &head->srcu);
314a7071e2bSJohn Meneghini
315a7071e2bSJohn Meneghini if (unlikely(!old))
316a7071e2bSJohn Meneghini return __nvme_find_path(head, node);
31775c10e73SHannes Reinecke
3182032d074SHannes Reinecke if (list_is_singular(&head->list)) {
3192032d074SHannes Reinecke if (nvme_path_is_disabled(old))
3202032d074SHannes Reinecke return NULL;
32175c10e73SHannes Reinecke return old;
3222032d074SHannes Reinecke }
32375c10e73SHannes Reinecke
32475c10e73SHannes Reinecke for (ns = nvme_next_ns(head, old);
325d1bcf006SDaniel Wagner ns && ns != old;
32675c10e73SHannes Reinecke ns = nvme_next_ns(head, ns)) {
327ca7ae5c9SHannes Reinecke if (nvme_path_is_disabled(ns))
32875c10e73SHannes Reinecke continue;
32975c10e73SHannes Reinecke
33075c10e73SHannes Reinecke if (ns->ana_state == NVME_ANA_OPTIMIZED) {
33175c10e73SHannes Reinecke found = ns;
33275c10e73SHannes Reinecke goto out;
33375c10e73SHannes Reinecke }
33475c10e73SHannes Reinecke if (ns->ana_state == NVME_ANA_NONOPTIMIZED)
335e398863bSMartin Wilck found = ns;
33675c10e73SHannes Reinecke }
33775c10e73SHannes Reinecke
33893eb0381SMartin Wilck /*
33993eb0381SMartin Wilck * The loop above skips the current path for round-robin semantics.
34093eb0381SMartin Wilck * Fall back to the current path if either:
34193eb0381SMartin Wilck * - no other optimized path found and current is optimized,
34293eb0381SMartin Wilck * - no other usable path found and current is usable.
34393eb0381SMartin Wilck */
3443f6e3246SMartin Wilck if (!nvme_path_is_disabled(old) &&
34593eb0381SMartin Wilck (old->ana_state == NVME_ANA_OPTIMIZED ||
346e398863bSMartin Wilck (!found && old->ana_state == NVME_ANA_NONOPTIMIZED)))
34793eb0381SMartin Wilck return old;
34893eb0381SMartin Wilck
349e398863bSMartin Wilck if (!found)
35075c10e73SHannes Reinecke return NULL;
35175c10e73SHannes Reinecke out:
35275c10e73SHannes Reinecke rcu_assign_pointer(head->current_path[node], found);
35375c10e73SHannes Reinecke return found;
35475c10e73SHannes Reinecke }
35575c10e73SHannes Reinecke
nvme_queue_depth_path(struct nvme_ns_head * head)35685b9f3e6SThomas Song static struct nvme_ns *nvme_queue_depth_path(struct nvme_ns_head *head)
35785b9f3e6SThomas Song {
35885b9f3e6SThomas Song struct nvme_ns *best_opt = NULL, *best_nonopt = NULL, *ns;
35985b9f3e6SThomas Song unsigned int min_depth_opt = UINT_MAX, min_depth_nonopt = UINT_MAX;
36085b9f3e6SThomas Song unsigned int depth;
36185b9f3e6SThomas Song
362*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &head->list, siblings,
363*1e20e4ffSBreno Leitao srcu_read_lock_held(&head->srcu)) {
36485b9f3e6SThomas Song if (nvme_path_is_disabled(ns))
36585b9f3e6SThomas Song continue;
36685b9f3e6SThomas Song
36785b9f3e6SThomas Song depth = atomic_read(&ns->ctrl->nr_active);
36885b9f3e6SThomas Song
36985b9f3e6SThomas Song switch (ns->ana_state) {
37085b9f3e6SThomas Song case NVME_ANA_OPTIMIZED:
37185b9f3e6SThomas Song if (depth < min_depth_opt) {
37285b9f3e6SThomas Song min_depth_opt = depth;
37385b9f3e6SThomas Song best_opt = ns;
37485b9f3e6SThomas Song }
37585b9f3e6SThomas Song break;
37685b9f3e6SThomas Song case NVME_ANA_NONOPTIMIZED:
37785b9f3e6SThomas Song if (depth < min_depth_nonopt) {
37885b9f3e6SThomas Song min_depth_nonopt = depth;
37985b9f3e6SThomas Song best_nonopt = ns;
38085b9f3e6SThomas Song }
38185b9f3e6SThomas Song break;
38285b9f3e6SThomas Song default:
38385b9f3e6SThomas Song break;
38485b9f3e6SThomas Song }
38585b9f3e6SThomas Song
38685b9f3e6SThomas Song if (min_depth_opt == 0)
38785b9f3e6SThomas Song return best_opt;
38885b9f3e6SThomas Song }
38985b9f3e6SThomas Song
39085b9f3e6SThomas Song return best_opt ? best_opt : best_nonopt;
39185b9f3e6SThomas Song }
39285b9f3e6SThomas Song
nvme_path_is_optimized(struct nvme_ns * ns)3930d0b660fSChristoph Hellwig static inline bool nvme_path_is_optimized(struct nvme_ns *ns)
3940d0b660fSChristoph Hellwig {
3950d0b660fSChristoph Hellwig return ns->ctrl->state == NVME_CTRL_LIVE &&
3960d0b660fSChristoph Hellwig ns->ana_state == NVME_ANA_OPTIMIZED;
39732acab31SChristoph Hellwig }
39832acab31SChristoph Hellwig
nvme_numa_path(struct nvme_ns_head * head)399a7071e2bSJohn Meneghini static struct nvme_ns *nvme_numa_path(struct nvme_ns_head *head)
40032acab31SChristoph Hellwig {
401f3334447SChristoph Hellwig int node = numa_node_id();
402f3334447SChristoph Hellwig struct nvme_ns *ns;
40332acab31SChristoph Hellwig
404f3334447SChristoph Hellwig ns = srcu_dereference(head->current_path[node], &head->srcu);
405fbd6a42dSHannes Reinecke if (unlikely(!ns))
406fbd6a42dSHannes Reinecke return __nvme_find_path(head, node);
407fbd6a42dSHannes Reinecke if (unlikely(!nvme_path_is_optimized(ns)))
408fbd6a42dSHannes Reinecke return __nvme_find_path(head, node);
40932acab31SChristoph Hellwig return ns;
41032acab31SChristoph Hellwig }
41132acab31SChristoph Hellwig
nvme_find_path(struct nvme_ns_head * head)412a7071e2bSJohn Meneghini inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head)
413a7071e2bSJohn Meneghini {
41485b9f3e6SThomas Song switch (READ_ONCE(head->subsys->iopolicy)) {
41585b9f3e6SThomas Song case NVME_IOPOLICY_QD:
41685b9f3e6SThomas Song return nvme_queue_depth_path(head);
41785b9f3e6SThomas Song case NVME_IOPOLICY_RR:
418a7071e2bSJohn Meneghini return nvme_round_robin_path(head);
41985b9f3e6SThomas Song default:
420a7071e2bSJohn Meneghini return nvme_numa_path(head);
421a7071e2bSJohn Meneghini }
42285b9f3e6SThomas Song }
423a7071e2bSJohn Meneghini
nvme_available_path(struct nvme_ns_head * head)4240157ec8dSSagi Grimberg static bool nvme_available_path(struct nvme_ns_head *head)
4250157ec8dSSagi Grimberg {
4260157ec8dSSagi Grimberg struct nvme_ns *ns;
4270157ec8dSSagi Grimberg
428f0679539SHannes Reinecke if (!test_bit(NVME_NSHEAD_DISK_LIVE, &head->flags))
429f0679539SHannes Reinecke return NULL;
430f0679539SHannes Reinecke
431*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &head->list, siblings,
432*1e20e4ffSBreno Leitao srcu_read_lock_held(&head->srcu)) {
4338c4dfea9SVictor Gladkov if (test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ns->ctrl->flags))
4348c4dfea9SVictor Gladkov continue;
4350157ec8dSSagi Grimberg switch (ns->ctrl->state) {
4360157ec8dSSagi Grimberg case NVME_CTRL_LIVE:
4370157ec8dSSagi Grimberg case NVME_CTRL_RESETTING:
4380157ec8dSSagi Grimberg case NVME_CTRL_CONNECTING:
4390157ec8dSSagi Grimberg /* fallthru */
4400157ec8dSSagi Grimberg return true;
4410157ec8dSSagi Grimberg default:
4420157ec8dSSagi Grimberg break;
4430157ec8dSSagi Grimberg }
4440157ec8dSSagi Grimberg }
4450157ec8dSSagi Grimberg return false;
4460157ec8dSSagi Grimberg }
4470157ec8dSSagi Grimberg
nvme_ns_head_submit_bio(struct bio * bio)4483e08773cSChristoph Hellwig static void nvme_ns_head_submit_bio(struct bio *bio)
44932acab31SChristoph Hellwig {
450309dca30SChristoph Hellwig struct nvme_ns_head *head = bio->bi_bdev->bd_disk->private_data;
45132acab31SChristoph Hellwig struct device *dev = disk_to_dev(head->disk);
45232acab31SChristoph Hellwig struct nvme_ns *ns;
45332acab31SChristoph Hellwig int srcu_idx;
45432acab31SChristoph Hellwig
455525aa5a7SHannes Reinecke /*
456f695ca38SChristoph Hellwig * The namespace might be going away and the bio might be moved to a
457f695ca38SChristoph Hellwig * different queue via blk_steal_bios(), so we need to use the bio_split
458f695ca38SChristoph Hellwig * pool from the original queue to allocate the bvecs from.
459525aa5a7SHannes Reinecke */
4605a97806fSChristoph Hellwig bio = bio_split_to_limits(bio);
461613b1488SJens Axboe if (!bio)
462613b1488SJens Axboe return;
463525aa5a7SHannes Reinecke
46432acab31SChristoph Hellwig srcu_idx = srcu_read_lock(&head->srcu);
46532acab31SChristoph Hellwig ns = nvme_find_path(head);
46632acab31SChristoph Hellwig if (likely(ns)) {
467a7c7f7b2SChristoph Hellwig bio_set_dev(bio, ns->disk->part0);
46832acab31SChristoph Hellwig bio->bi_opf |= REQ_NVME_MPATH;
4691c02fca6SChristoph Hellwig trace_block_bio_remap(bio, disk_devt(ns->head->disk),
4702796b569SHannes Reinecke bio->bi_iter.bi_sector);
4713e08773cSChristoph Hellwig submit_bio_noacct(bio);
4720157ec8dSSagi Grimberg } else if (nvme_available_path(head)) {
4730157ec8dSSagi Grimberg dev_warn_ratelimited(dev, "no usable path - requeuing I/O\n");
47432acab31SChristoph Hellwig
47532acab31SChristoph Hellwig spin_lock_irq(&head->requeue_lock);
47632acab31SChristoph Hellwig bio_list_add(&head->requeue_list, bio);
47732acab31SChristoph Hellwig spin_unlock_irq(&head->requeue_lock);
47832acab31SChristoph Hellwig } else {
4790157ec8dSSagi Grimberg dev_warn_ratelimited(dev, "no available path - failing I/O\n");
48032acab31SChristoph Hellwig
4818f31ddedSGuoqing Jiang bio_io_error(bio);
48232acab31SChristoph Hellwig }
48332acab31SChristoph Hellwig
48432acab31SChristoph Hellwig srcu_read_unlock(&head->srcu, srcu_idx);
48532acab31SChristoph Hellwig }
48632acab31SChristoph Hellwig
nvme_ns_head_open(struct gendisk * disk,blk_mode_t mode)48705bdb996SChristoph Hellwig static int nvme_ns_head_open(struct gendisk *disk, blk_mode_t mode)
4881496bd49SChristoph Hellwig {
489d32e2bf8SChristoph Hellwig if (!nvme_tryget_ns_head(disk->private_data))
4901496bd49SChristoph Hellwig return -ENXIO;
4911496bd49SChristoph Hellwig return 0;
4921496bd49SChristoph Hellwig }
4931496bd49SChristoph Hellwig
nvme_ns_head_release(struct gendisk * disk)494ae220766SChristoph Hellwig static void nvme_ns_head_release(struct gendisk *disk)
4951496bd49SChristoph Hellwig {
4961496bd49SChristoph Hellwig nvme_put_ns_head(disk->private_data);
4971496bd49SChristoph Hellwig }
4981496bd49SChristoph Hellwig
4998b4fb0f9SChristoph Hellwig #ifdef CONFIG_BLK_DEV_ZONED
nvme_ns_head_report_zones(struct gendisk * disk,sector_t sector,unsigned int nr_zones,report_zones_cb cb,void * data)5008b4fb0f9SChristoph Hellwig static int nvme_ns_head_report_zones(struct gendisk *disk, sector_t sector,
5018b4fb0f9SChristoph Hellwig unsigned int nr_zones, report_zones_cb cb, void *data)
5028b4fb0f9SChristoph Hellwig {
5038b4fb0f9SChristoph Hellwig struct nvme_ns_head *head = disk->private_data;
5048b4fb0f9SChristoph Hellwig struct nvme_ns *ns;
5058b4fb0f9SChristoph Hellwig int srcu_idx, ret = -EWOULDBLOCK;
5068b4fb0f9SChristoph Hellwig
5078b4fb0f9SChristoph Hellwig srcu_idx = srcu_read_lock(&head->srcu);
5088b4fb0f9SChristoph Hellwig ns = nvme_find_path(head);
5098b4fb0f9SChristoph Hellwig if (ns)
5108b4fb0f9SChristoph Hellwig ret = nvme_ns_report_zones(ns, sector, nr_zones, cb, data);
5118b4fb0f9SChristoph Hellwig srcu_read_unlock(&head->srcu, srcu_idx);
5128b4fb0f9SChristoph Hellwig return ret;
5138b4fb0f9SChristoph Hellwig }
5148b4fb0f9SChristoph Hellwig #else
5158b4fb0f9SChristoph Hellwig #define nvme_ns_head_report_zones NULL
5168b4fb0f9SChristoph Hellwig #endif /* CONFIG_BLK_DEV_ZONED */
5178b4fb0f9SChristoph Hellwig
5181496bd49SChristoph Hellwig const struct block_device_operations nvme_ns_head_ops = {
5191496bd49SChristoph Hellwig .owner = THIS_MODULE,
5201496bd49SChristoph Hellwig .submit_bio = nvme_ns_head_submit_bio,
5211496bd49SChristoph Hellwig .open = nvme_ns_head_open,
5221496bd49SChristoph Hellwig .release = nvme_ns_head_release,
5231496bd49SChristoph Hellwig .ioctl = nvme_ns_head_ioctl,
524a25d4261SNick Bowler .compat_ioctl = blkdev_compat_ptr_ioctl,
5251496bd49SChristoph Hellwig .getgeo = nvme_getgeo,
5268b4fb0f9SChristoph Hellwig .report_zones = nvme_ns_head_report_zones,
5271496bd49SChristoph Hellwig .pr_ops = &nvme_pr_ops,
5281496bd49SChristoph Hellwig };
5291496bd49SChristoph Hellwig
cdev_to_ns_head(struct cdev * cdev)5302637baedSMinwoo Im static inline struct nvme_ns_head *cdev_to_ns_head(struct cdev *cdev)
5312637baedSMinwoo Im {
5322637baedSMinwoo Im return container_of(cdev, struct nvme_ns_head, cdev);
5332637baedSMinwoo Im }
5342637baedSMinwoo Im
nvme_ns_head_chr_open(struct inode * inode,struct file * file)5352637baedSMinwoo Im static int nvme_ns_head_chr_open(struct inode *inode, struct file *file)
5362637baedSMinwoo Im {
5372637baedSMinwoo Im if (!nvme_tryget_ns_head(cdev_to_ns_head(inode->i_cdev)))
5382637baedSMinwoo Im return -ENXIO;
5392637baedSMinwoo Im return 0;
5402637baedSMinwoo Im }
5412637baedSMinwoo Im
nvme_ns_head_chr_release(struct inode * inode,struct file * file)5422637baedSMinwoo Im static int nvme_ns_head_chr_release(struct inode *inode, struct file *file)
5432637baedSMinwoo Im {
5442637baedSMinwoo Im nvme_put_ns_head(cdev_to_ns_head(inode->i_cdev));
5452637baedSMinwoo Im return 0;
5462637baedSMinwoo Im }
5472637baedSMinwoo Im
5482637baedSMinwoo Im static const struct file_operations nvme_ns_head_chr_fops = {
5492637baedSMinwoo Im .owner = THIS_MODULE,
5502637baedSMinwoo Im .open = nvme_ns_head_chr_open,
5512637baedSMinwoo Im .release = nvme_ns_head_chr_release,
5522637baedSMinwoo Im .unlocked_ioctl = nvme_ns_head_chr_ioctl,
5532637baedSMinwoo Im .compat_ioctl = compat_ptr_ioctl,
554456cba38SKanchan Joshi .uring_cmd = nvme_ns_head_chr_uring_cmd,
5559408d8a3SKeith Busch .uring_cmd_iopoll = nvme_ns_chr_uring_cmd_iopoll,
5562637baedSMinwoo Im };
5572637baedSMinwoo Im
nvme_add_ns_head_cdev(struct nvme_ns_head * head)5582637baedSMinwoo Im static int nvme_add_ns_head_cdev(struct nvme_ns_head *head)
5592637baedSMinwoo Im {
5602637baedSMinwoo Im int ret;
5612637baedSMinwoo Im
5622637baedSMinwoo Im head->cdev_device.parent = &head->subsys->dev;
5632637baedSMinwoo Im ret = dev_set_name(&head->cdev_device, "ng%dn%d",
5642637baedSMinwoo Im head->subsys->instance, head->instance);
5652637baedSMinwoo Im if (ret)
5662637baedSMinwoo Im return ret;
5672637baedSMinwoo Im ret = nvme_cdev_add(&head->cdev, &head->cdev_device,
5682637baedSMinwoo Im &nvme_ns_head_chr_fops, THIS_MODULE);
5692637baedSMinwoo Im return ret;
5702637baedSMinwoo Im }
5712637baedSMinwoo Im
nvme_partition_scan_work(struct work_struct * work)5724a57f42eSKeith Busch static void nvme_partition_scan_work(struct work_struct *work)
5734a57f42eSKeith Busch {
5744a57f42eSKeith Busch struct nvme_ns_head *head =
5754a57f42eSKeith Busch container_of(work, struct nvme_ns_head, partition_scan_work);
5764a57f42eSKeith Busch
5774a57f42eSKeith Busch if (WARN_ON_ONCE(!test_and_clear_bit(GD_SUPPRESS_PART_SCAN,
5784a57f42eSKeith Busch &head->disk->state)))
5794a57f42eSKeith Busch return;
5804a57f42eSKeith Busch
5814a57f42eSKeith Busch mutex_lock(&head->disk->open_mutex);
5824a57f42eSKeith Busch bdev_disk_changed(head->disk, false);
5834a57f42eSKeith Busch mutex_unlock(&head->disk->open_mutex);
5844a57f42eSKeith Busch }
5854a57f42eSKeith Busch
nvme_requeue_work(struct work_struct * work)58632acab31SChristoph Hellwig static void nvme_requeue_work(struct work_struct *work)
58732acab31SChristoph Hellwig {
58832acab31SChristoph Hellwig struct nvme_ns_head *head =
58932acab31SChristoph Hellwig container_of(work, struct nvme_ns_head, requeue_work);
59032acab31SChristoph Hellwig struct bio *bio, *next;
59132acab31SChristoph Hellwig
59232acab31SChristoph Hellwig spin_lock_irq(&head->requeue_lock);
59332acab31SChristoph Hellwig next = bio_list_get(&head->requeue_list);
59432acab31SChristoph Hellwig spin_unlock_irq(&head->requeue_lock);
59532acab31SChristoph Hellwig
59632acab31SChristoph Hellwig while ((bio = next) != NULL) {
59732acab31SChristoph Hellwig next = bio->bi_next;
59832acab31SChristoph Hellwig bio->bi_next = NULL;
59932acab31SChristoph Hellwig
600ed00aabdSChristoph Hellwig submit_bio_noacct(bio);
60132acab31SChristoph Hellwig }
60232acab31SChristoph Hellwig }
60332acab31SChristoph Hellwig
nvme_mpath_alloc_disk(struct nvme_ctrl * ctrl,struct nvme_ns_head * head)60432acab31SChristoph Hellwig int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head)
60532acab31SChristoph Hellwig {
60632acab31SChristoph Hellwig bool vwc = false;
60732acab31SChristoph Hellwig
6080d0b660fSChristoph Hellwig mutex_init(&head->lock);
60932acab31SChristoph Hellwig bio_list_init(&head->requeue_list);
61032acab31SChristoph Hellwig spin_lock_init(&head->requeue_lock);
61132acab31SChristoph Hellwig INIT_WORK(&head->requeue_work, nvme_requeue_work);
6124a57f42eSKeith Busch INIT_WORK(&head->partition_scan_work, nvme_partition_scan_work);
61332acab31SChristoph Hellwig
61432acab31SChristoph Hellwig /*
61532acab31SChristoph Hellwig * Add a multipath node if the subsystems supports multiple controllers.
6165974ea7cSSungup Moon * We also do this for private namespaces as the namespace sharing flag
6175974ea7cSSungup Moon * could change after a rescan.
61832acab31SChristoph Hellwig */
6195974ea7cSSungup Moon if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) ||
6205974ea7cSSungup Moon !nvme_is_unique_nsid(ctrl, head) || !multipath)
62132acab31SChristoph Hellwig return 0;
62232acab31SChristoph Hellwig
623f165fb89SChristoph Hellwig head->disk = blk_alloc_disk(ctrl->numa_node);
624f165fb89SChristoph Hellwig if (!head->disk)
625f165fb89SChristoph Hellwig return -ENOMEM;
626f165fb89SChristoph Hellwig head->disk->fops = &nvme_ns_head_ops;
627f165fb89SChristoph Hellwig head->disk->private_data = head;
6284a57f42eSKeith Busch
6294a57f42eSKeith Busch /*
6304a57f42eSKeith Busch * We need to suppress the partition scan from occuring within the
6314a57f42eSKeith Busch * controller's scan_work context. If a path error occurs here, the IO
6324a57f42eSKeith Busch * will wait until a path becomes available or all paths are torn down,
6334a57f42eSKeith Busch * but that action also occurs within scan_work, so it would deadlock.
6344a57f42eSKeith Busch * Defer the partion scan to a different context that does not block
6354a57f42eSKeith Busch * scan_work.
6364a57f42eSKeith Busch */
6374a57f42eSKeith Busch set_bit(GD_SUPPRESS_PART_SCAN, &head->disk->state);
638f165fb89SChristoph Hellwig sprintf(head->disk->disk_name, "nvme%dn%d",
639f165fb89SChristoph Hellwig ctrl->subsys->instance, head->instance);
640f165fb89SChristoph Hellwig
641f165fb89SChristoph Hellwig blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
642d32d3d0bSChristoph Hellwig blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
643d4d957b5SSagi Grimberg blk_queue_flag_set(QUEUE_FLAG_IO_STAT, head->disk->queue);
644c712dcccSChristoph Hellwig /*
645c712dcccSChristoph Hellwig * This assumes all controllers that refer to a namespace either
646c712dcccSChristoph Hellwig * support poll queues or not. That is not a strict guarantee,
647c712dcccSChristoph Hellwig * but if the assumption is wrong the effect is only suboptimal
648c712dcccSChristoph Hellwig * performance but not correctness problem.
649c712dcccSChristoph Hellwig */
650c712dcccSChristoph Hellwig if (ctrl->tagset->nr_maps > HCTX_TYPE_POLL &&
651c712dcccSChristoph Hellwig ctrl->tagset->map[HCTX_TYPE_POLL].nr_queues)
652c712dcccSChristoph Hellwig blk_queue_flag_set(QUEUE_FLAG_POLL, head->disk->queue);
653d32d3d0bSChristoph Hellwig
654f165fb89SChristoph Hellwig /* set to a default value of 512 until the disk is validated */
655f165fb89SChristoph Hellwig blk_queue_logical_block_size(head->disk->queue, 512);
656f165fb89SChristoph Hellwig blk_set_stacking_limits(&head->disk->queue->limits);
657fe8714b0SKeith Busch blk_queue_dma_alignment(head->disk->queue, 3);
65832acab31SChristoph Hellwig
65932acab31SChristoph Hellwig /* we need to propagate up the VMC settings */
66032acab31SChristoph Hellwig if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
66132acab31SChristoph Hellwig vwc = true;
662f165fb89SChristoph Hellwig blk_queue_write_cache(head->disk->queue, vwc, vwc);
66332acab31SChristoph Hellwig return 0;
66432acab31SChristoph Hellwig }
66532acab31SChristoph Hellwig
nvme_mpath_set_live(struct nvme_ns * ns)6660d0b660fSChristoph Hellwig static void nvme_mpath_set_live(struct nvme_ns *ns)
66732acab31SChristoph Hellwig {
6680d0b660fSChristoph Hellwig struct nvme_ns_head *head = ns->head;
66911384580SLuis Chamberlain int rc;
6700d0b660fSChristoph Hellwig
67132acab31SChristoph Hellwig if (!head->disk)
67232acab31SChristoph Hellwig return;
6739bd82b1aSBaegjae Sung
67411384580SLuis Chamberlain /*
67511384580SLuis Chamberlain * test_and_set_bit() is used because it is protecting against two nvme
67611384580SLuis Chamberlain * paths simultaneously calling device_add_disk() on the same namespace
67711384580SLuis Chamberlain * head.
67811384580SLuis Chamberlain */
6792637baedSMinwoo Im if (!test_and_set_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
68011384580SLuis Chamberlain rc = device_add_disk(&head->subsys->dev, head->disk,
68133b14f67SHannes Reinecke nvme_ns_id_attr_groups);
68211384580SLuis Chamberlain if (rc) {
68330b9bf4bSHannes Reinecke clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags);
68411384580SLuis Chamberlain return;
68511384580SLuis Chamberlain }
6862637baedSMinwoo Im nvme_add_ns_head_cdev(head);
6874a57f42eSKeith Busch kblockd_schedule_work(&head->partition_scan_work);
6882637baedSMinwoo Im }
6890d0b660fSChristoph Hellwig
690d8a22f85SAnton Eidelman mutex_lock(&head->lock);
691886fabf6SKeith Busch if (nvme_path_is_optimized(ns)) {
692886fabf6SKeith Busch int node, srcu_idx;
693886fabf6SKeith Busch
694886fabf6SKeith Busch srcu_idx = srcu_read_lock(&head->srcu);
695e6e1eda0SNilay Shroff for_each_online_node(node)
696886fabf6SKeith Busch __nvme_find_path(head, node);
697886fabf6SKeith Busch srcu_read_unlock(&head->srcu, srcu_idx);
698886fabf6SKeith Busch }
699e164471dSSagi Grimberg mutex_unlock(&head->lock);
700886fabf6SKeith Busch
701e164471dSSagi Grimberg synchronize_srcu(&head->srcu);
702e164471dSSagi Grimberg kblockd_schedule_work(&head->requeue_work);
7030d0b660fSChristoph Hellwig }
7040d0b660fSChristoph Hellwig
nvme_parse_ana_log(struct nvme_ctrl * ctrl,void * data,int (* cb)(struct nvme_ctrl * ctrl,struct nvme_ana_group_desc *,void *))7050d0b660fSChristoph Hellwig static int nvme_parse_ana_log(struct nvme_ctrl *ctrl, void *data,
7060d0b660fSChristoph Hellwig int (*cb)(struct nvme_ctrl *ctrl, struct nvme_ana_group_desc *,
7070d0b660fSChristoph Hellwig void *))
7080d0b660fSChristoph Hellwig {
7090d0b660fSChristoph Hellwig void *base = ctrl->ana_log_buf;
7100d0b660fSChristoph Hellwig size_t offset = sizeof(struct nvme_ana_rsp_hdr);
7110d0b660fSChristoph Hellwig int error, i;
7120d0b660fSChristoph Hellwig
7130d0b660fSChristoph Hellwig lockdep_assert_held(&ctrl->ana_lock);
7140d0b660fSChristoph Hellwig
7150d0b660fSChristoph Hellwig for (i = 0; i < le16_to_cpu(ctrl->ana_log_buf->ngrps); i++) {
7160d0b660fSChristoph Hellwig struct nvme_ana_group_desc *desc = base + offset;
71764fab729SPrabhath Sajeepa u32 nr_nsids;
71864fab729SPrabhath Sajeepa size_t nsid_buf_size;
71964fab729SPrabhath Sajeepa
72064fab729SPrabhath Sajeepa if (WARN_ON_ONCE(offset > ctrl->ana_log_size - sizeof(*desc)))
72164fab729SPrabhath Sajeepa return -EINVAL;
72264fab729SPrabhath Sajeepa
72364fab729SPrabhath Sajeepa nr_nsids = le32_to_cpu(desc->nnsids);
724d156cfcaSLen Baker nsid_buf_size = flex_array_size(desc, nsids, nr_nsids);
7250d0b660fSChristoph Hellwig
7260d0b660fSChristoph Hellwig if (WARN_ON_ONCE(desc->grpid == 0))
7270d0b660fSChristoph Hellwig return -EINVAL;
7280d0b660fSChristoph Hellwig if (WARN_ON_ONCE(le32_to_cpu(desc->grpid) > ctrl->anagrpmax))
7290d0b660fSChristoph Hellwig return -EINVAL;
7300d0b660fSChristoph Hellwig if (WARN_ON_ONCE(desc->state == 0))
7310d0b660fSChristoph Hellwig return -EINVAL;
7320d0b660fSChristoph Hellwig if (WARN_ON_ONCE(desc->state > NVME_ANA_CHANGE))
7330d0b660fSChristoph Hellwig return -EINVAL;
7340d0b660fSChristoph Hellwig
7350d0b660fSChristoph Hellwig offset += sizeof(*desc);
7360d0b660fSChristoph Hellwig if (WARN_ON_ONCE(offset > ctrl->ana_log_size - nsid_buf_size))
7370d0b660fSChristoph Hellwig return -EINVAL;
7380d0b660fSChristoph Hellwig
7390d0b660fSChristoph Hellwig error = cb(ctrl, desc, data);
7400d0b660fSChristoph Hellwig if (error)
7410d0b660fSChristoph Hellwig return error;
7420d0b660fSChristoph Hellwig
7430d0b660fSChristoph Hellwig offset += nsid_buf_size;
7440d0b660fSChristoph Hellwig }
7450d0b660fSChristoph Hellwig
7460d0b660fSChristoph Hellwig return 0;
7470d0b660fSChristoph Hellwig }
7480d0b660fSChristoph Hellwig
nvme_state_is_live(enum nvme_ana_state state)7490d0b660fSChristoph Hellwig static inline bool nvme_state_is_live(enum nvme_ana_state state)
7500d0b660fSChristoph Hellwig {
7510d0b660fSChristoph Hellwig return state == NVME_ANA_OPTIMIZED || state == NVME_ANA_NONOPTIMIZED;
7520d0b660fSChristoph Hellwig }
7530d0b660fSChristoph Hellwig
nvme_update_ns_ana_state(struct nvme_ana_group_desc * desc,struct nvme_ns * ns)7540d0b660fSChristoph Hellwig static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc,
7550d0b660fSChristoph Hellwig struct nvme_ns *ns)
7560d0b660fSChristoph Hellwig {
7570d0b660fSChristoph Hellwig ns->ana_grpid = le32_to_cpu(desc->grpid);
7580d0b660fSChristoph Hellwig ns->ana_state = desc->state;
7590d0b660fSChristoph Hellwig clear_bit(NVME_NS_ANA_PENDING, &ns->flags);
760a4a6f3c8SAnton Eidelman /*
761a4a6f3c8SAnton Eidelman * nvme_mpath_set_live() will trigger I/O to the multipath path device
762a4a6f3c8SAnton Eidelman * and in turn to this path device. However we cannot accept this I/O
763a4a6f3c8SAnton Eidelman * if the controller is not live. This may deadlock if called from
764a4a6f3c8SAnton Eidelman * nvme_mpath_init_identify() and the ctrl will never complete
765a4a6f3c8SAnton Eidelman * initialization, preventing I/O from completing. For this case we
766a4a6f3c8SAnton Eidelman * will reprocess the ANA log page in nvme_mpath_update() once the
767a4a6f3c8SAnton Eidelman * controller is ready.
768a4a6f3c8SAnton Eidelman */
769a4a6f3c8SAnton Eidelman if (nvme_state_is_live(ns->ana_state) &&
770a4a6f3c8SAnton Eidelman ns->ctrl->state == NVME_CTRL_LIVE)
7710d0b660fSChristoph Hellwig nvme_mpath_set_live(ns);
7720d0b660fSChristoph Hellwig }
7730d0b660fSChristoph Hellwig
nvme_update_ana_state(struct nvme_ctrl * ctrl,struct nvme_ana_group_desc * desc,void * data)7740d0b660fSChristoph Hellwig static int nvme_update_ana_state(struct nvme_ctrl *ctrl,
7750d0b660fSChristoph Hellwig struct nvme_ana_group_desc *desc, void *data)
7760d0b660fSChristoph Hellwig {
7770d0b660fSChristoph Hellwig u32 nr_nsids = le32_to_cpu(desc->nnsids), n = 0;
7780d0b660fSChristoph Hellwig unsigned *nr_change_groups = data;
7790d0b660fSChristoph Hellwig struct nvme_ns *ns;
78082f20194SKeith Busch int srcu_idx;
7810d0b660fSChristoph Hellwig
782592b6e7bSHannes Reinecke dev_dbg(ctrl->device, "ANA group %d: %s.\n",
7830d0b660fSChristoph Hellwig le32_to_cpu(desc->grpid),
7840d0b660fSChristoph Hellwig nvme_ana_state_names[desc->state]);
7850d0b660fSChristoph Hellwig
7860d0b660fSChristoph Hellwig if (desc->state == NVME_ANA_CHANGE)
7870d0b660fSChristoph Hellwig (*nr_change_groups)++;
7880d0b660fSChristoph Hellwig
7890d0b660fSChristoph Hellwig if (!nr_nsids)
7900d0b660fSChristoph Hellwig return 0;
7910d0b660fSChristoph Hellwig
79282f20194SKeith Busch srcu_idx = srcu_read_lock(&ctrl->srcu);
793*1e20e4ffSBreno Leitao list_for_each_entry_srcu(ns, &ctrl->namespaces, list,
794*1e20e4ffSBreno Leitao srcu_read_lock_held(&ctrl->srcu)) {
79579f528afSAnton Eidelman unsigned nsid;
79679f528afSAnton Eidelman again:
79779f528afSAnton Eidelman nsid = le32_to_cpu(desc->nsids[n]);
798e01f91dfSAnton Eidelman if (ns->head->ns_id < nsid)
7990d0b660fSChristoph Hellwig continue;
800e01f91dfSAnton Eidelman if (ns->head->ns_id == nsid)
8010d0b660fSChristoph Hellwig nvme_update_ns_ana_state(desc, ns);
8020d0b660fSChristoph Hellwig if (++n == nr_nsids)
8030d0b660fSChristoph Hellwig break;
80479f528afSAnton Eidelman if (ns->head->ns_id > nsid)
80579f528afSAnton Eidelman goto again;
8060d0b660fSChristoph Hellwig }
80782f20194SKeith Busch srcu_read_unlock(&ctrl->srcu, srcu_idx);
8080d0b660fSChristoph Hellwig return 0;
8090d0b660fSChristoph Hellwig }
8100d0b660fSChristoph Hellwig
nvme_read_ana_log(struct nvme_ctrl * ctrl)81186cccfbfSAnton Eidelman static int nvme_read_ana_log(struct nvme_ctrl *ctrl)
8120d0b660fSChristoph Hellwig {
8130d0b660fSChristoph Hellwig u32 nr_change_groups = 0;
8140d0b660fSChristoph Hellwig int error;
8150d0b660fSChristoph Hellwig
8160d0b660fSChristoph Hellwig mutex_lock(&ctrl->ana_lock);
817be93e87eSKeith Busch error = nvme_get_log(ctrl, NVME_NSID_ALL, NVME_LOG_ANA, 0, NVME_CSI_NVM,
8180d0b660fSChristoph Hellwig ctrl->ana_log_buf, ctrl->ana_log_size, 0);
8190d0b660fSChristoph Hellwig if (error) {
8200d0b660fSChristoph Hellwig dev_warn(ctrl->device, "Failed to get ANA log: %d\n", error);
8210d0b660fSChristoph Hellwig goto out_unlock;
8220d0b660fSChristoph Hellwig }
8230d0b660fSChristoph Hellwig
8240d0b660fSChristoph Hellwig error = nvme_parse_ana_log(ctrl, &nr_change_groups,
8250d0b660fSChristoph Hellwig nvme_update_ana_state);
8260d0b660fSChristoph Hellwig if (error)
8270d0b660fSChristoph Hellwig goto out_unlock;
8280d0b660fSChristoph Hellwig
8290d0b660fSChristoph Hellwig /*
8300d0b660fSChristoph Hellwig * In theory we should have an ANATT timer per group as they might enter
8310d0b660fSChristoph Hellwig * the change state at different times. But that is a lot of overhead
8320d0b660fSChristoph Hellwig * just to protect against a target that keeps entering new changes
8330d0b660fSChristoph Hellwig * states while never finishing previous ones. But we'll still
8340d0b660fSChristoph Hellwig * eventually time out once all groups are in change state, so this
8350d0b660fSChristoph Hellwig * isn't a big deal.
8360d0b660fSChristoph Hellwig *
8370d0b660fSChristoph Hellwig * We also double the ANATT value to provide some slack for transports
8380d0b660fSChristoph Hellwig * or AEN processing overhead.
8390d0b660fSChristoph Hellwig */
8400d0b660fSChristoph Hellwig if (nr_change_groups)
8410d0b660fSChristoph Hellwig mod_timer(&ctrl->anatt_timer, ctrl->anatt * HZ * 2 + jiffies);
8420d0b660fSChristoph Hellwig else
8430d0b660fSChristoph Hellwig del_timer_sync(&ctrl->anatt_timer);
8440d0b660fSChristoph Hellwig out_unlock:
8450d0b660fSChristoph Hellwig mutex_unlock(&ctrl->ana_lock);
8460d0b660fSChristoph Hellwig return error;
8470d0b660fSChristoph Hellwig }
8480d0b660fSChristoph Hellwig
nvme_ana_work(struct work_struct * work)8490d0b660fSChristoph Hellwig static void nvme_ana_work(struct work_struct *work)
8500d0b660fSChristoph Hellwig {
8510d0b660fSChristoph Hellwig struct nvme_ctrl *ctrl = container_of(work, struct nvme_ctrl, ana_work);
8520d0b660fSChristoph Hellwig
853ecca390eSSagi Grimberg if (ctrl->state != NVME_CTRL_LIVE)
854ecca390eSSagi Grimberg return;
855ecca390eSSagi Grimberg
85686cccfbfSAnton Eidelman nvme_read_ana_log(ctrl);
8570d0b660fSChristoph Hellwig }
8580d0b660fSChristoph Hellwig
nvme_mpath_update(struct nvme_ctrl * ctrl)859a4a6f3c8SAnton Eidelman void nvme_mpath_update(struct nvme_ctrl *ctrl)
860a4a6f3c8SAnton Eidelman {
861a4a6f3c8SAnton Eidelman u32 nr_change_groups = 0;
862a4a6f3c8SAnton Eidelman
863a4a6f3c8SAnton Eidelman if (!ctrl->ana_log_buf)
864a4a6f3c8SAnton Eidelman return;
865a4a6f3c8SAnton Eidelman
866a4a6f3c8SAnton Eidelman mutex_lock(&ctrl->ana_lock);
867a4a6f3c8SAnton Eidelman nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state);
868a4a6f3c8SAnton Eidelman mutex_unlock(&ctrl->ana_lock);
869a4a6f3c8SAnton Eidelman }
870a4a6f3c8SAnton Eidelman
nvme_anatt_timeout(struct timer_list * t)8710d0b660fSChristoph Hellwig static void nvme_anatt_timeout(struct timer_list *t)
8720d0b660fSChristoph Hellwig {
8730d0b660fSChristoph Hellwig struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer);
8740d0b660fSChristoph Hellwig
8750d0b660fSChristoph Hellwig dev_info(ctrl->device, "ANATT timeout, resetting controller.\n");
8760d0b660fSChristoph Hellwig nvme_reset_ctrl(ctrl);
8770d0b660fSChristoph Hellwig }
8780d0b660fSChristoph Hellwig
nvme_mpath_stop(struct nvme_ctrl * ctrl)8790d0b660fSChristoph Hellwig void nvme_mpath_stop(struct nvme_ctrl *ctrl)
8800d0b660fSChristoph Hellwig {
8810d0b660fSChristoph Hellwig if (!nvme_ctrl_use_ana(ctrl))
8820d0b660fSChristoph Hellwig return;
8830d0b660fSChristoph Hellwig del_timer_sync(&ctrl->anatt_timer);
8840d0b660fSChristoph Hellwig cancel_work_sync(&ctrl->ana_work);
8850d0b660fSChristoph Hellwig }
8860d0b660fSChristoph Hellwig
88775c10e73SHannes Reinecke #define SUBSYS_ATTR_RW(_name, _mode, _show, _store) \
88875c10e73SHannes Reinecke struct device_attribute subsys_attr_##_name = \
88975c10e73SHannes Reinecke __ATTR(_name, _mode, _show, _store)
89075c10e73SHannes Reinecke
nvme_subsys_iopolicy_show(struct device * dev,struct device_attribute * attr,char * buf)89175c10e73SHannes Reinecke static ssize_t nvme_subsys_iopolicy_show(struct device *dev,
89275c10e73SHannes Reinecke struct device_attribute *attr, char *buf)
89375c10e73SHannes Reinecke {
89475c10e73SHannes Reinecke struct nvme_subsystem *subsys =
89575c10e73SHannes Reinecke container_of(dev, struct nvme_subsystem, dev);
89675c10e73SHannes Reinecke
897bff4bcf3SDaniel Wagner return sysfs_emit(buf, "%s\n",
89875c10e73SHannes Reinecke nvme_iopolicy_names[READ_ONCE(subsys->iopolicy)]);
89975c10e73SHannes Reinecke }
90075c10e73SHannes Reinecke
nvme_subsys_iopolicy_update(struct nvme_subsystem * subsys,int iopolicy)90185b9f3e6SThomas Song static void nvme_subsys_iopolicy_update(struct nvme_subsystem *subsys,
90285b9f3e6SThomas Song int iopolicy)
90385b9f3e6SThomas Song {
90485b9f3e6SThomas Song struct nvme_ctrl *ctrl;
90585b9f3e6SThomas Song int old_iopolicy = READ_ONCE(subsys->iopolicy);
90685b9f3e6SThomas Song
90785b9f3e6SThomas Song if (old_iopolicy == iopolicy)
90885b9f3e6SThomas Song return;
90985b9f3e6SThomas Song
91085b9f3e6SThomas Song WRITE_ONCE(subsys->iopolicy, iopolicy);
91185b9f3e6SThomas Song
91285b9f3e6SThomas Song /* iopolicy changes clear the mpath by design */
91385b9f3e6SThomas Song mutex_lock(&nvme_subsystems_lock);
91485b9f3e6SThomas Song list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
91585b9f3e6SThomas Song nvme_mpath_clear_ctrl_paths(ctrl);
91685b9f3e6SThomas Song mutex_unlock(&nvme_subsystems_lock);
91785b9f3e6SThomas Song
91885b9f3e6SThomas Song pr_notice("subsysnqn %s iopolicy changed from %s to %s\n",
91985b9f3e6SThomas Song subsys->subnqn,
92085b9f3e6SThomas Song nvme_iopolicy_names[old_iopolicy],
92185b9f3e6SThomas Song nvme_iopolicy_names[iopolicy]);
92285b9f3e6SThomas Song }
92385b9f3e6SThomas Song
nvme_subsys_iopolicy_store(struct device * dev,struct device_attribute * attr,const char * buf,size_t count)92475c10e73SHannes Reinecke static ssize_t nvme_subsys_iopolicy_store(struct device *dev,
92575c10e73SHannes Reinecke struct device_attribute *attr, const char *buf, size_t count)
92675c10e73SHannes Reinecke {
92775c10e73SHannes Reinecke struct nvme_subsystem *subsys =
92875c10e73SHannes Reinecke container_of(dev, struct nvme_subsystem, dev);
92975c10e73SHannes Reinecke int i;
93075c10e73SHannes Reinecke
93175c10e73SHannes Reinecke for (i = 0; i < ARRAY_SIZE(nvme_iopolicy_names); i++) {
93275c10e73SHannes Reinecke if (sysfs_streq(buf, nvme_iopolicy_names[i])) {
93385b9f3e6SThomas Song nvme_subsys_iopolicy_update(subsys, i);
93475c10e73SHannes Reinecke return count;
93575c10e73SHannes Reinecke }
93675c10e73SHannes Reinecke }
93775c10e73SHannes Reinecke
93875c10e73SHannes Reinecke return -EINVAL;
93975c10e73SHannes Reinecke }
94075c10e73SHannes Reinecke SUBSYS_ATTR_RW(iopolicy, S_IRUGO | S_IWUSR,
94175c10e73SHannes Reinecke nvme_subsys_iopolicy_show, nvme_subsys_iopolicy_store);
94275c10e73SHannes Reinecke
ana_grpid_show(struct device * dev,struct device_attribute * attr,char * buf)9430d0b660fSChristoph Hellwig static ssize_t ana_grpid_show(struct device *dev, struct device_attribute *attr,
9440d0b660fSChristoph Hellwig char *buf)
9450d0b660fSChristoph Hellwig {
946bff4bcf3SDaniel Wagner return sysfs_emit(buf, "%d\n", nvme_get_ns_from_dev(dev)->ana_grpid);
9470d0b660fSChristoph Hellwig }
9480d0b660fSChristoph Hellwig DEVICE_ATTR_RO(ana_grpid);
9490d0b660fSChristoph Hellwig
ana_state_show(struct device * dev,struct device_attribute * attr,char * buf)9500d0b660fSChristoph Hellwig static ssize_t ana_state_show(struct device *dev, struct device_attribute *attr,
9510d0b660fSChristoph Hellwig char *buf)
9520d0b660fSChristoph Hellwig {
9530d0b660fSChristoph Hellwig struct nvme_ns *ns = nvme_get_ns_from_dev(dev);
9540d0b660fSChristoph Hellwig
955bff4bcf3SDaniel Wagner return sysfs_emit(buf, "%s\n", nvme_ana_state_names[ns->ana_state]);
9560d0b660fSChristoph Hellwig }
9570d0b660fSChristoph Hellwig DEVICE_ATTR_RO(ana_state);
9580d0b660fSChristoph Hellwig
nvme_lookup_ana_group_desc(struct nvme_ctrl * ctrl,struct nvme_ana_group_desc * desc,void * data)959489dd102SAnton Eidelman static int nvme_lookup_ana_group_desc(struct nvme_ctrl *ctrl,
9600d0b660fSChristoph Hellwig struct nvme_ana_group_desc *desc, void *data)
9610d0b660fSChristoph Hellwig {
962489dd102SAnton Eidelman struct nvme_ana_group_desc *dst = data;
9630d0b660fSChristoph Hellwig
964489dd102SAnton Eidelman if (desc->grpid != dst->grpid)
9650d0b660fSChristoph Hellwig return 0;
966489dd102SAnton Eidelman
967489dd102SAnton Eidelman *dst = *desc;
968489dd102SAnton Eidelman return -ENXIO; /* just break out of the loop */
9690d0b660fSChristoph Hellwig }
9700d0b660fSChristoph Hellwig
nvme_mpath_add_disk(struct nvme_ns * ns,__le32 anagrpid)971c13cf14fSJoel Granados void nvme_mpath_add_disk(struct nvme_ns *ns, __le32 anagrpid)
9720d0b660fSChristoph Hellwig {
9730d0b660fSChristoph Hellwig if (nvme_ctrl_use_ana(ns->ctrl)) {
974489dd102SAnton Eidelman struct nvme_ana_group_desc desc = {
975c13cf14fSJoel Granados .grpid = anagrpid,
976489dd102SAnton Eidelman .state = 0,
977489dd102SAnton Eidelman };
978489dd102SAnton Eidelman
9790d0b660fSChristoph Hellwig mutex_lock(&ns->ctrl->ana_lock);
980c13cf14fSJoel Granados ns->ana_grpid = le32_to_cpu(anagrpid);
981489dd102SAnton Eidelman nvme_parse_ana_log(ns->ctrl, &desc, nvme_lookup_ana_group_desc);
9820d0b660fSChristoph Hellwig mutex_unlock(&ns->ctrl->ana_lock);
983489dd102SAnton Eidelman if (desc.state) {
984489dd102SAnton Eidelman /* found the group desc: update */
985489dd102SAnton Eidelman nvme_update_ns_ana_state(&desc, ns);
986dd8f7fa9SHannes Reinecke } else {
987dd8f7fa9SHannes Reinecke /* group desc not found: trigger a re-read */
988dd8f7fa9SHannes Reinecke set_bit(NVME_NS_ANA_PENDING, &ns->flags);
989dd8f7fa9SHannes Reinecke queue_work(nvme_wq, &ns->ctrl->ana_work);
990489dd102SAnton Eidelman }
9910d0b660fSChristoph Hellwig } else {
9920d0b660fSChristoph Hellwig ns->ana_state = NVME_ANA_OPTIMIZED;
9930d0b660fSChristoph Hellwig nvme_mpath_set_live(ns);
9940d0b660fSChristoph Hellwig }
995b2ce4d90SKeith Busch
9961cb039f3SChristoph Hellwig if (blk_queue_stable_writes(ns->queue) && ns->head->disk)
9971cb039f3SChristoph Hellwig blk_queue_flag_set(QUEUE_FLAG_STABLE_WRITES,
9981cb039f3SChristoph Hellwig ns->head->disk->queue);
99973a1a229SKeith Busch #ifdef CONFIG_BLK_DEV_ZONED
100073a1a229SKeith Busch if (blk_queue_is_zoned(ns->queue) && ns->head->disk)
1001d86e716aSChristoph Hellwig ns->head->disk->nr_zones = ns->disk->nr_zones;
100273a1a229SKeith Busch #endif
10039bd82b1aSBaegjae Sung }
100432acab31SChristoph Hellwig
nvme_mpath_shutdown_disk(struct nvme_ns_head * head)10055396fdacSHannes Reinecke void nvme_mpath_shutdown_disk(struct nvme_ns_head *head)
100632acab31SChristoph Hellwig {
100732acab31SChristoph Hellwig if (!head->disk)
100832acab31SChristoph Hellwig return;
1009f0679539SHannes Reinecke if (test_and_clear_bit(NVME_NSHEAD_DISK_LIVE, &head->flags)) {
10102637baedSMinwoo Im nvme_cdev_del(&head->cdev, &head->cdev_device);
10114a57f42eSKeith Busch /*
10124a57f42eSKeith Busch * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
10134a57f42eSKeith Busch * to allow multipath to fail all I/O.
10144a57f42eSKeith Busch */
10154a57f42eSKeith Busch synchronize_srcu(&head->srcu);
10164a57f42eSKeith Busch kblockd_schedule_work(&head->requeue_work);
101732acab31SChristoph Hellwig del_gendisk(head->disk);
10182637baedSMinwoo Im }
1019f0679539SHannes Reinecke /*
1020f0679539SHannes Reinecke * requeue I/O after NVME_NSHEAD_DISK_LIVE has been cleared
1021f0679539SHannes Reinecke * to allow multipath to fail all I/O.
1022f0679539SHannes Reinecke */
1023f0679539SHannes Reinecke synchronize_srcu(&head->srcu);
1024f0679539SHannes Reinecke kblockd_schedule_work(&head->requeue_work);
10255396fdacSHannes Reinecke }
10265396fdacSHannes Reinecke
nvme_mpath_remove_disk(struct nvme_ns_head * head)10275396fdacSHannes Reinecke void nvme_mpath_remove_disk(struct nvme_ns_head *head)
10285396fdacSHannes Reinecke {
10295396fdacSHannes Reinecke if (!head->disk)
10305396fdacSHannes Reinecke return;
103132acab31SChristoph Hellwig /* make sure all pending bios are cleaned up */
103232acab31SChristoph Hellwig kblockd_schedule_work(&head->requeue_work);
103332acab31SChristoph Hellwig flush_work(&head->requeue_work);
10344a57f42eSKeith Busch flush_work(&head->partition_scan_work);
10358b9ab626SChristoph Hellwig put_disk(head->disk);
103632acab31SChristoph Hellwig }
10370d0b660fSChristoph Hellwig
nvme_mpath_init_ctrl(struct nvme_ctrl * ctrl)10385e1f6899SChristoph Hellwig void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl)
10390d0b660fSChristoph Hellwig {
10405e1f6899SChristoph Hellwig mutex_init(&ctrl->ana_lock);
10415e1f6899SChristoph Hellwig timer_setup(&ctrl->anatt_timer, nvme_anatt_timeout, 0);
10425e1f6899SChristoph Hellwig INIT_WORK(&ctrl->ana_work, nvme_ana_work);
10435e1f6899SChristoph Hellwig }
10445e1f6899SChristoph Hellwig
nvme_mpath_init_identify(struct nvme_ctrl * ctrl,struct nvme_id_ctrl * id)10455e1f6899SChristoph Hellwig int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
10465e1f6899SChristoph Hellwig {
10475e1f6899SChristoph Hellwig size_t max_transfer_size = ctrl->max_hw_sectors << SECTOR_SHIFT;
10485e1f6899SChristoph Hellwig size_t ana_log_size;
10495e1f6899SChristoph Hellwig int error = 0;
10500d0b660fSChristoph Hellwig
105166b20ac0SMarta Rybczynska /* check if multipath is enabled and we have the capability */
105292decf11SKeith Busch if (!multipath || !ctrl->subsys ||
105392decf11SKeith Busch !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA))
10540d0b660fSChristoph Hellwig return 0;
10550d0b660fSChristoph Hellwig
105685b9f3e6SThomas Song /* initialize this in the identify path to cover controller resets */
105785b9f3e6SThomas Song atomic_set(&ctrl->nr_active, 0);
105885b9f3e6SThomas Song
1059120bb362SDaniel Wagner if (!ctrl->max_namespaces ||
1060120bb362SDaniel Wagner ctrl->max_namespaces > le32_to_cpu(id->nn)) {
1061120bb362SDaniel Wagner dev_err(ctrl->device,
1062120bb362SDaniel Wagner "Invalid MNAN value %u\n", ctrl->max_namespaces);
1063120bb362SDaniel Wagner return -EINVAL;
1064120bb362SDaniel Wagner }
1065120bb362SDaniel Wagner
10660d0b660fSChristoph Hellwig ctrl->anacap = id->anacap;
10670d0b660fSChristoph Hellwig ctrl->anatt = id->anatt;
10680d0b660fSChristoph Hellwig ctrl->nanagrpid = le32_to_cpu(id->nanagrpid);
10690d0b660fSChristoph Hellwig ctrl->anagrpmax = le32_to_cpu(id->anagrpmax);
10700d0b660fSChristoph Hellwig
10715e1f6899SChristoph Hellwig ana_log_size = sizeof(struct nvme_ana_rsp_hdr) +
10725e1f6899SChristoph Hellwig ctrl->nanagrpid * sizeof(struct nvme_ana_group_desc) +
10735e1f6899SChristoph Hellwig ctrl->max_namespaces * sizeof(__le32);
10745e1f6899SChristoph Hellwig if (ana_log_size > max_transfer_size) {
10750d0b660fSChristoph Hellwig dev_err(ctrl->device,
10765e1f6899SChristoph Hellwig "ANA log page size (%zd) larger than MDTS (%zd).\n",
10775e1f6899SChristoph Hellwig ana_log_size, max_transfer_size);
10780d0b660fSChristoph Hellwig dev_err(ctrl->device, "disabling ANA support.\n");
10795e1f6899SChristoph Hellwig goto out_uninit;
10800d0b660fSChristoph Hellwig }
10815e1f6899SChristoph Hellwig if (ana_log_size > ctrl->ana_log_size) {
10825e1f6899SChristoph Hellwig nvme_mpath_stop(ctrl);
1083c7c15ae3SHou Tao nvme_mpath_uninit(ctrl);
10845e6a7d1eSHannes Reinecke ctrl->ana_log_buf = kvmalloc(ana_log_size, GFP_KERNEL);
10855e1f6899SChristoph Hellwig if (!ctrl->ana_log_buf)
10865e1f6899SChristoph Hellwig return -ENOMEM;
1087bb830addSSusobhan Dey }
10885e1f6899SChristoph Hellwig ctrl->ana_log_size = ana_log_size;
108986cccfbfSAnton Eidelman error = nvme_read_ana_log(ctrl);
10900d0b660fSChristoph Hellwig if (error)
10915e1f6899SChristoph Hellwig goto out_uninit;
10920d0b660fSChristoph Hellwig return 0;
10935e1f6899SChristoph Hellwig
10945e1f6899SChristoph Hellwig out_uninit:
10955e1f6899SChristoph Hellwig nvme_mpath_uninit(ctrl);
1096bb830addSSusobhan Dey return error;
10970d0b660fSChristoph Hellwig }
10980d0b660fSChristoph Hellwig
nvme_mpath_uninit(struct nvme_ctrl * ctrl)10990d0b660fSChristoph Hellwig void nvme_mpath_uninit(struct nvme_ctrl *ctrl)
11000d0b660fSChristoph Hellwig {
11015e6a7d1eSHannes Reinecke kvfree(ctrl->ana_log_buf);
1102c7055fd1SHannes Reinecke ctrl->ana_log_buf = NULL;
1103c7c15ae3SHou Tao ctrl->ana_log_size = 0;
11040d0b660fSChristoph Hellwig }
1105