1 /* 2 * Copyright (c) 2017 Christoph Hellwig. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 */ 13 14 #include <linux/moduleparam.h> 15 #include "nvme.h" 16 17 static bool multipath = true; 18 module_param(multipath, bool, 0444); 19 MODULE_PARM_DESC(multipath, 20 "turn on native support for multiple controllers per subsystem"); 21 22 /* 23 * If multipathing is enabled we need to always use the subsystem instance 24 * number for numbering our devices to avoid conflicts between subsystems that 25 * have multiple controllers and thus use the multipath-aware subsystem node 26 * and those that have a single controller and use the controller node 27 * directly. 28 */ 29 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, 30 struct nvme_ctrl *ctrl, int *flags) 31 { 32 if (!multipath) { 33 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); 34 } else if (ns->head->disk) { 35 sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, 36 ctrl->cntlid, ns->head->instance); 37 *flags = GENHD_FL_HIDDEN; 38 } else { 39 sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, 40 ns->head->instance); 41 } 42 } 43 44 void nvme_failover_req(struct request *req) 45 { 46 struct nvme_ns *ns = req->q->queuedata; 47 unsigned long flags; 48 49 spin_lock_irqsave(&ns->head->requeue_lock, flags); 50 blk_steal_bios(&ns->head->requeue_list, req); 51 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); 52 blk_mq_end_request(req, 0); 53 54 nvme_reset_ctrl(ns->ctrl); 55 kblockd_schedule_work(&ns->head->requeue_work); 56 } 57 58 bool nvme_req_needs_failover(struct request *req, blk_status_t error) 59 { 60 if (!(req->cmd_flags & REQ_NVME_MPATH)) 61 return false; 62 return blk_path_error(error); 63 } 64 65 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) 66 { 67 struct nvme_ns *ns; 68 69 down_read(&ctrl->namespaces_rwsem); 70 list_for_each_entry(ns, &ctrl->namespaces, list) { 71 if (ns->head->disk) 72 kblockd_schedule_work(&ns->head->requeue_work); 73 } 74 up_read(&ctrl->namespaces_rwsem); 75 } 76 77 static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) 78 { 79 struct nvme_ns *ns; 80 81 list_for_each_entry_rcu(ns, &head->list, siblings) { 82 if (ns->ctrl->state == NVME_CTRL_LIVE) { 83 rcu_assign_pointer(head->current_path, ns); 84 return ns; 85 } 86 } 87 88 return NULL; 89 } 90 91 inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) 92 { 93 struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); 94 95 if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) 96 ns = __nvme_find_path(head); 97 return ns; 98 } 99 100 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, 101 struct bio *bio) 102 { 103 struct nvme_ns_head *head = q->queuedata; 104 struct device *dev = disk_to_dev(head->disk); 105 struct nvme_ns *ns; 106 blk_qc_t ret = BLK_QC_T_NONE; 107 int srcu_idx; 108 109 srcu_idx = srcu_read_lock(&head->srcu); 110 ns = nvme_find_path(head); 111 if (likely(ns)) { 112 bio->bi_disk = ns->disk; 113 bio->bi_opf |= REQ_NVME_MPATH; 114 ret = direct_make_request(bio); 115 } else if (!list_empty_careful(&head->list)) { 116 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); 117 118 spin_lock_irq(&head->requeue_lock); 119 bio_list_add(&head->requeue_list, bio); 120 spin_unlock_irq(&head->requeue_lock); 121 } else { 122 dev_warn_ratelimited(dev, "no path - failing I/O\n"); 123 124 bio->bi_status = BLK_STS_IOERR; 125 bio_endio(bio); 126 } 127 128 srcu_read_unlock(&head->srcu, srcu_idx); 129 return ret; 130 } 131 132 static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) 133 { 134 struct nvme_ns_head *head = q->queuedata; 135 struct nvme_ns *ns; 136 bool found = false; 137 int srcu_idx; 138 139 srcu_idx = srcu_read_lock(&head->srcu); 140 ns = srcu_dereference(head->current_path, &head->srcu); 141 if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) 142 found = ns->queue->poll_fn(q, qc); 143 srcu_read_unlock(&head->srcu, srcu_idx); 144 return found; 145 } 146 147 static void nvme_requeue_work(struct work_struct *work) 148 { 149 struct nvme_ns_head *head = 150 container_of(work, struct nvme_ns_head, requeue_work); 151 struct bio *bio, *next; 152 153 spin_lock_irq(&head->requeue_lock); 154 next = bio_list_get(&head->requeue_list); 155 spin_unlock_irq(&head->requeue_lock); 156 157 while ((bio = next) != NULL) { 158 next = bio->bi_next; 159 bio->bi_next = NULL; 160 161 /* 162 * Reset disk to the mpath node and resubmit to select a new 163 * path. 164 */ 165 bio->bi_disk = head->disk; 166 generic_make_request(bio); 167 } 168 } 169 170 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) 171 { 172 struct request_queue *q; 173 bool vwc = false; 174 175 bio_list_init(&head->requeue_list); 176 spin_lock_init(&head->requeue_lock); 177 INIT_WORK(&head->requeue_work, nvme_requeue_work); 178 179 /* 180 * Add a multipath node if the subsystems supports multiple controllers. 181 * We also do this for private namespaces as the namespace sharing data could 182 * change after a rescan. 183 */ 184 if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) 185 return 0; 186 187 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); 188 if (!q) 189 goto out; 190 q->queuedata = head; 191 blk_queue_make_request(q, nvme_ns_head_make_request); 192 q->poll_fn = nvme_ns_head_poll; 193 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 194 /* set to a default value for 512 until disk is validated */ 195 blk_queue_logical_block_size(q, 512); 196 197 /* we need to propagate up the VMC settings */ 198 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 199 vwc = true; 200 blk_queue_write_cache(q, vwc, vwc); 201 202 head->disk = alloc_disk(0); 203 if (!head->disk) 204 goto out_cleanup_queue; 205 head->disk->fops = &nvme_ns_head_ops; 206 head->disk->private_data = head; 207 head->disk->queue = q; 208 head->disk->flags = GENHD_FL_EXT_DEVT; 209 sprintf(head->disk->disk_name, "nvme%dn%d", 210 ctrl->subsys->instance, head->instance); 211 return 0; 212 213 out_cleanup_queue: 214 blk_cleanup_queue(q); 215 out: 216 return -ENOMEM; 217 } 218 219 void nvme_mpath_add_disk(struct nvme_ns_head *head) 220 { 221 if (!head->disk) 222 return; 223 224 mutex_lock(&head->subsys->lock); 225 if (!(head->disk->flags & GENHD_FL_UP)) { 226 device_add_disk(&head->subsys->dev, head->disk); 227 if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, 228 &nvme_ns_id_attr_group)) 229 pr_warn("%s: failed to create sysfs group for identification\n", 230 head->disk->disk_name); 231 } 232 mutex_unlock(&head->subsys->lock); 233 } 234 235 void nvme_mpath_remove_disk(struct nvme_ns_head *head) 236 { 237 if (!head->disk) 238 return; 239 sysfs_remove_group(&disk_to_dev(head->disk)->kobj, 240 &nvme_ns_id_attr_group); 241 del_gendisk(head->disk); 242 blk_set_queue_dying(head->disk->queue); 243 /* make sure all pending bios are cleaned up */ 244 kblockd_schedule_work(&head->requeue_work); 245 flush_work(&head->requeue_work); 246 blk_cleanup_queue(head->disk->queue); 247 put_disk(head->disk); 248 } 249