1 /* 2 * Copyright (c) 2017 Christoph Hellwig. 3 * 4 * This program is free software; you can redistribute it and/or modify it 5 * under the terms and conditions of the GNU General Public License, 6 * version 2, as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope it will be useful, but WITHOUT 9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 * more details. 12 */ 13 14 #include <linux/moduleparam.h> 15 #include <trace/events/block.h> 16 #include "nvme.h" 17 18 static bool multipath = true; 19 module_param(multipath, bool, 0444); 20 MODULE_PARM_DESC(multipath, 21 "turn on native support for multiple controllers per subsystem"); 22 23 /* 24 * If multipathing is enabled we need to always use the subsystem instance 25 * number for numbering our devices to avoid conflicts between subsystems that 26 * have multiple controllers and thus use the multipath-aware subsystem node 27 * and those that have a single controller and use the controller node 28 * directly. 29 */ 30 void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, 31 struct nvme_ctrl *ctrl, int *flags) 32 { 33 if (!multipath) { 34 sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); 35 } else if (ns->head->disk) { 36 sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, 37 ctrl->cntlid, ns->head->instance); 38 *flags = GENHD_FL_HIDDEN; 39 } else { 40 sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, 41 ns->head->instance); 42 } 43 } 44 45 void nvme_failover_req(struct request *req) 46 { 47 struct nvme_ns *ns = req->q->queuedata; 48 unsigned long flags; 49 50 spin_lock_irqsave(&ns->head->requeue_lock, flags); 51 blk_steal_bios(&ns->head->requeue_list, req); 52 spin_unlock_irqrestore(&ns->head->requeue_lock, flags); 53 blk_mq_end_request(req, 0); 54 55 nvme_reset_ctrl(ns->ctrl); 56 kblockd_schedule_work(&ns->head->requeue_work); 57 } 58 59 bool nvme_req_needs_failover(struct request *req, blk_status_t error) 60 { 61 if (!(req->cmd_flags & REQ_NVME_MPATH)) 62 return false; 63 return blk_path_error(error); 64 } 65 66 void nvme_kick_requeue_lists(struct nvme_ctrl *ctrl) 67 { 68 struct nvme_ns *ns; 69 70 down_read(&ctrl->namespaces_rwsem); 71 list_for_each_entry(ns, &ctrl->namespaces, list) { 72 if (ns->head->disk) 73 kblockd_schedule_work(&ns->head->requeue_work); 74 } 75 up_read(&ctrl->namespaces_rwsem); 76 } 77 78 static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head) 79 { 80 struct nvme_ns *ns; 81 82 list_for_each_entry_rcu(ns, &head->list, siblings) { 83 if (ns->ctrl->state == NVME_CTRL_LIVE) { 84 rcu_assign_pointer(head->current_path, ns); 85 return ns; 86 } 87 } 88 89 return NULL; 90 } 91 92 inline struct nvme_ns *nvme_find_path(struct nvme_ns_head *head) 93 { 94 struct nvme_ns *ns = srcu_dereference(head->current_path, &head->srcu); 95 96 if (unlikely(!ns || ns->ctrl->state != NVME_CTRL_LIVE)) 97 ns = __nvme_find_path(head); 98 return ns; 99 } 100 101 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, 102 struct bio *bio) 103 { 104 struct nvme_ns_head *head = q->queuedata; 105 struct device *dev = disk_to_dev(head->disk); 106 struct nvme_ns *ns; 107 blk_qc_t ret = BLK_QC_T_NONE; 108 int srcu_idx; 109 110 srcu_idx = srcu_read_lock(&head->srcu); 111 ns = nvme_find_path(head); 112 if (likely(ns)) { 113 bio->bi_disk = ns->disk; 114 bio->bi_opf |= REQ_NVME_MPATH; 115 trace_block_bio_remap(bio->bi_disk->queue, bio, 116 disk_devt(ns->head->disk), 117 bio->bi_iter.bi_sector); 118 ret = direct_make_request(bio); 119 } else if (!list_empty_careful(&head->list)) { 120 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); 121 122 spin_lock_irq(&head->requeue_lock); 123 bio_list_add(&head->requeue_list, bio); 124 spin_unlock_irq(&head->requeue_lock); 125 } else { 126 dev_warn_ratelimited(dev, "no path - failing I/O\n"); 127 128 bio->bi_status = BLK_STS_IOERR; 129 bio_endio(bio); 130 } 131 132 srcu_read_unlock(&head->srcu, srcu_idx); 133 return ret; 134 } 135 136 static bool nvme_ns_head_poll(struct request_queue *q, blk_qc_t qc) 137 { 138 struct nvme_ns_head *head = q->queuedata; 139 struct nvme_ns *ns; 140 bool found = false; 141 int srcu_idx; 142 143 srcu_idx = srcu_read_lock(&head->srcu); 144 ns = srcu_dereference(head->current_path, &head->srcu); 145 if (likely(ns && ns->ctrl->state == NVME_CTRL_LIVE)) 146 found = ns->queue->poll_fn(q, qc); 147 srcu_read_unlock(&head->srcu, srcu_idx); 148 return found; 149 } 150 151 static void nvme_requeue_work(struct work_struct *work) 152 { 153 struct nvme_ns_head *head = 154 container_of(work, struct nvme_ns_head, requeue_work); 155 struct bio *bio, *next; 156 157 spin_lock_irq(&head->requeue_lock); 158 next = bio_list_get(&head->requeue_list); 159 spin_unlock_irq(&head->requeue_lock); 160 161 while ((bio = next) != NULL) { 162 next = bio->bi_next; 163 bio->bi_next = NULL; 164 165 /* 166 * Reset disk to the mpath node and resubmit to select a new 167 * path. 168 */ 169 bio->bi_disk = head->disk; 170 generic_make_request(bio); 171 } 172 } 173 174 int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) 175 { 176 struct request_queue *q; 177 bool vwc = false; 178 179 bio_list_init(&head->requeue_list); 180 spin_lock_init(&head->requeue_lock); 181 INIT_WORK(&head->requeue_work, nvme_requeue_work); 182 183 /* 184 * Add a multipath node if the subsystems supports multiple controllers. 185 * We also do this for private namespaces as the namespace sharing data could 186 * change after a rescan. 187 */ 188 if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) 189 return 0; 190 191 q = blk_alloc_queue_node(GFP_KERNEL, NUMA_NO_NODE, NULL); 192 if (!q) 193 goto out; 194 q->queuedata = head; 195 blk_queue_make_request(q, nvme_ns_head_make_request); 196 q->poll_fn = nvme_ns_head_poll; 197 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 198 /* set to a default value for 512 until disk is validated */ 199 blk_queue_logical_block_size(q, 512); 200 201 /* we need to propagate up the VMC settings */ 202 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 203 vwc = true; 204 blk_queue_write_cache(q, vwc, vwc); 205 206 head->disk = alloc_disk(0); 207 if (!head->disk) 208 goto out_cleanup_queue; 209 head->disk->fops = &nvme_ns_head_ops; 210 head->disk->private_data = head; 211 head->disk->queue = q; 212 head->disk->flags = GENHD_FL_EXT_DEVT; 213 sprintf(head->disk->disk_name, "nvme%dn%d", 214 ctrl->subsys->instance, head->instance); 215 return 0; 216 217 out_cleanup_queue: 218 blk_cleanup_queue(q); 219 out: 220 return -ENOMEM; 221 } 222 223 void nvme_mpath_add_disk(struct nvme_ns_head *head) 224 { 225 if (!head->disk) 226 return; 227 228 mutex_lock(&head->subsys->lock); 229 if (!(head->disk->flags & GENHD_FL_UP)) { 230 device_add_disk(&head->subsys->dev, head->disk); 231 if (sysfs_create_group(&disk_to_dev(head->disk)->kobj, 232 &nvme_ns_id_attr_group)) 233 pr_warn("%s: failed to create sysfs group for identification\n", 234 head->disk->disk_name); 235 } 236 mutex_unlock(&head->subsys->lock); 237 } 238 239 void nvme_mpath_remove_disk(struct nvme_ns_head *head) 240 { 241 if (!head->disk) 242 return; 243 sysfs_remove_group(&disk_to_dev(head->disk)->kobj, 244 &nvme_ns_id_attr_group); 245 del_gendisk(head->disk); 246 blk_set_queue_dying(head->disk->queue); 247 /* make sure all pending bios are cleaned up */ 248 kblockd_schedule_work(&head->requeue_work); 249 flush_work(&head->requeue_work); 250 blk_cleanup_queue(head->disk->queue); 251 put_disk(head->disk); 252 } 253