1 /* 2 * Copyright (c) 2017 Christoph Hellwig. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 */ 13 14 #include <linux/moduleparam.h> 15 #include "nvme.h" 16 17 static bool multipath = true; 18 module_param(multipath, bool, 0644); 19 MODULE_PARM_DESC(multipath, 20 "turn on native support for multiple controllers per subsystem"); 21 22 void nvme_failover_req(struct request *req) 23 { 24 struct nvme_ns *ns = req->q->queuedata; 25 unsigned long flags; 26 27 spin_lock_irqsave(&ns->head->requeue_lock, flags); 28 blk_steal_bios(&ns->head->requeue_list, req); 29 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); 30 blk_mq_end_request(req, 0); 31 32 nvme_reset_ctrl(ns->ctrl); 33 kblockd_schedule_work(&ns->head->requeue_work); 34 } 35 36 bool nvme_req_needs_failover(struct request *req, blk_status_t error) 37 { 38 if (!(req->cmd_flags & REQ_NVME_MPATH)) 39 return false; 40 return blk_path_error(error); 41 } 42 43 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) 44 { 45 struct nvme_ns *ns; 46 47 mutex_lock(&ctrl->namespaces_mutex); 48 list_for_each_entry(ns, &ctrl->namespaces, list) { 49 if (ns->head->disk) 50 kblockd_schedule_work(&ns->head->requeue_work); 51 } 52 mutex_unlock(&ctrl->namespaces_mutex); 53 } 54 55 static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) 56 { 57 struct nvme_ns *ns; 58 59 list_for_each_entry_rcu(ns, &head->list, siblings) { 60 if (ns->ctrl->state == NVME_CTRL_LIVE) { 61 rcu_assign_pointer(head->current_path, ns); 62 return ns; 63 } 64 } 65 66 return NULL; 67 } 68 69 inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) 70 { 71 struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); 72 73 if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) 74 ns = __nvme_find_path(head); 75 return ns; 76 } 77 78 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, 79 struct bio *bio) 80 { 81 struct nvme_ns_head *head = q->queuedata; 82 struct device *dev = disk_to_dev(head->disk); 83 struct nvme_ns *ns; 84 blk_qc_t ret = BLK_QC_T_NONE; 85 int srcu_idx; 86 87 srcu_idx = srcu_read_lock(&head->srcu); 88 ns = nvme_find_path(head); 89 if (likely(ns)) { 90 bio->bi_disk = ns->disk; 91 bio->bi_opf |= REQ_NVME_MPATH; 92 ret = direct_make_request(bio); 93 } else if (!list_empty_careful(&head->list)) { 94 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); 95 96 spin_lock_irq(&head->requeue_lock); 97 bio_list_add(&head->requeue_list, bio); 98 spin_unlock_irq(&head->requeue_lock); 99 } else { 100 dev_warn_ratelimited(dev, "no path - failing I/O\n"); 101 102 bio->bi_status = BLK_STS_IOERR; 103 bio_endio(bio); 104 } 105 106 srcu_read_unlock(&head->srcu, srcu_idx); 107 return ret; 108 } 109 110 static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) 111 { 112 struct nvme_ns_head *head = q->queuedata; 113 struct nvme_ns *ns; 114 bool found = false; 115 int srcu_idx; 116 117 srcu_idx = srcu_read_lock(&head->srcu); 118 ns = srcu_dereference(head->current_path, &head->srcu); 119 if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) 120 found = ns->queue->poll_fn(q, qc); 121 srcu_read_unlock(&head->srcu, srcu_idx); 122 return found; 123 } 124 125 static void nvme_requeue_work(struct work_struct *work) 126 { 127 struct nvme_ns_head *head = 128 container_of(work, struct nvme_ns_head, requeue_work); 129 struct bio *bio, *next; 130 131 spin_lock_irq(&head->requeue_lock); 132 next = bio_list_get(&head->requeue_list); 133 spin_unlock_irq(&head->requeue_lock); 134 135 while ((bio = next) != NULL) { 136 next = bio->bi_next; 137 bio->bi_next = NULL; 138 139 /* 140 * Reset disk to the mpath node and resubmit to select a new 141 * path. 142 */ 143 bio->bi_disk = head->disk; 144 generic_make_request(bio); 145 } 146 } 147 148 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) 149 { 150 struct request_queue *q; 151 bool vwc = false; 152 153 bio_list_init(&head->requeue_list); 154 spin_lock_init(&head->requeue_lock); 155 INIT_WORK(&head->requeue_work, nvme_requeue_work); 156 157 /* 158 * Add a multipath node if the subsystems supports multiple controllers. 159 * We also do this for private namespaces as the namespace sharing data could 160 * change after a rescan. 161 */ 162 if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) 163 return 0; 164 165 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE); 166 if (!q) 167 goto out; 168 q->queuedata = head; 169 blk_queue_make_request(q, nvme_ns_head_make_request); 170 q->poll_fn = nvme_ns_head_poll; 171 queue_flag_set_unlocked(QUEUE_FLAG_NONROT, q); 172 /* set to a default value for 512 until disk is validated */ 173 blk_queue_logical_block_size(q, 512); 174 175 /* we need to propagate up the VMC settings */ 176 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 177 vwc = true; 178 blk_queue_write_cache(q, vwc, vwc); 179 180 head->disk = alloc_disk(0); 181 if (!head->disk) 182 goto out_cleanup_queue; 183 head->disk->fops = &nvme_ns_head_ops; 184 head->disk->private_data = head; 185 head->disk->queue = q; 186 head->disk->flags = GENHD_FL_EXT_DEVT; 187 sprintf(head->disk->disk_name, "nvme%dn%d", 188 ctrl->subsys->instance, head->instance); 189 return 0; 190 191 out_cleanup_queue: 192 blk_cleanup_queue(q); 193 out: 194 return -ENOMEM; 195 } 196 197 void nvme_mpath_add_disk(struct nvme_ns_head *head) 198 { 199 if (!head->disk) 200 return; 201 202 mutex_lock(&head->subsys->lock); 203 if (!(head->disk->flags & GENHD_FL_UP)) { 204 device_add_disk(&head->subsys->dev, head->disk); 205 if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, 206 &nvme_ns_id_attr_group)) 207 pr_warn("%s: failed to create sysfs group for identification\n", 208 head->disk->disk_name); 209 } 210 mutex_unlock(&head->subsys->lock); 211 } 212 213 void nvme_mpath_add_disk_links(struct nvme_ns *ns) 214 { 215 struct kobject *slave_disk_kobj, *holder_disk_kobj; 216 217 if (!ns->head->disk) 218 return; 219 220 slave_disk_kobj = &disk_to_dev(ns->disk)->kobj; 221 if (sysfs_create_link(ns->head->disk->slave_dir, slave_disk_kobj, 222 kobject_name(slave_disk_kobj))) 223 return; 224 225 holder_disk_kobj = &disk_to_dev(ns->head->disk)->kobj; 226 if (sysfs_create_link(ns->disk->part0.holder_dir, holder_disk_kobj, 227 kobject_name(holder_disk_kobj))) 228 sysfs_remove_link(ns->head->disk->slave_dir, 229 kobject_name(slave_disk_kobj)); 230 } 231 232 void nvme_mpath_remove_disk(struct nvme_ns_head *head) 233 { 234 if (!head->disk) 235 return; 236 sysfs_remove_group(&disk_to_dev(head->disk)->kobj, 237 &nvme_ns_id_attr_group); 238 del_gendisk(head->disk); 239 blk_set_queue_dying(head->disk->queue); 240 /* make sure all pending bios are cleaned up */ 241 kblockd_schedule_work(&head->requeue_work); 242 flush_work(&head->requeue_work); 243 blk_cleanup_queue(head->disk->queue); 244 put_disk(head->disk); 245 } 246 247 void nvme_mpath_remove_disk_links(struct nvme_ns *ns) 248 { 249 if (!ns->head->disk) 250 return; 251 252 sysfs_remove_link(ns->disk->part0.holder_dir, 253 kobject_name(&disk_to_dev(ns->head->disk)->kobj)); 254 sysfs_remove_link(ns->head->disk->slave_dir, 255 kobject_name(&disk_to_dev(ns->disk)->kobj)); 256 } 257