1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 24 #include "nvme.h" 25 #include "fabrics.h" 26 #include <linux/nvme-fc-driver.h> 27 #include <linux/nvme-fc.h> 28 29 30 /* *************************** Data Structures/Defines ****************** */ 31 32 33 /* 34 * We handle AEN commands ourselves and don't even let the 35 * block layer know about them. 36 */ 37 #define NVME_FC_NR_AEN_COMMANDS 1 38 #define NVME_FC_AQ_BLKMQ_DEPTH \ 39 (NVMF_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) 40 #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) 41 42 enum nvme_fc_queue_flags { 43 NVME_FC_Q_CONNECTED = (1 << 0), 44 }; 45 46 #define NVMEFC_QUEUE_DELAY 3 /* ms units */ 47 48 struct nvme_fc_queue { 49 struct nvme_fc_ctrl *ctrl; 50 struct device *dev; 51 struct blk_mq_hw_ctx *hctx; 52 void *lldd_handle; 53 int queue_size; 54 size_t cmnd_capsule_len; 55 u32 qnum; 56 u32 rqcnt; 57 u32 seqno; 58 59 u64 connection_id; 60 atomic_t csn; 61 62 unsigned long flags; 63 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 64 65 enum nvme_fcop_flags { 66 FCOP_FLAGS_TERMIO = (1 << 0), 67 FCOP_FLAGS_RELEASED = (1 << 1), 68 FCOP_FLAGS_COMPLETE = (1 << 2), 69 FCOP_FLAGS_AEN = (1 << 3), 70 }; 71 72 struct nvmefc_ls_req_op { 73 struct nvmefc_ls_req ls_req; 74 75 struct nvme_fc_rport *rport; 76 struct nvme_fc_queue *queue; 77 struct request *rq; 78 u32 flags; 79 80 int ls_error; 81 struct completion ls_done; 82 struct list_head lsreq_list; /* rport->ls_req_list */ 83 bool req_queued; 84 }; 85 86 enum nvme_fcpop_state { 87 FCPOP_STATE_UNINIT = 0, 88 FCPOP_STATE_IDLE = 1, 89 FCPOP_STATE_ACTIVE = 2, 90 FCPOP_STATE_ABORTED = 3, 91 FCPOP_STATE_COMPLETE = 4, 92 }; 93 94 struct nvme_fc_fcp_op { 95 struct nvme_request nreq; /* 96 * nvme/host/core.c 97 * requires this to be 98 * the 1st element in the 99 * private structure 100 * associated with the 101 * request. 102 */ 103 struct nvmefc_fcp_req fcp_req; 104 105 struct nvme_fc_ctrl *ctrl; 106 struct nvme_fc_queue *queue; 107 struct request *rq; 108 109 atomic_t state; 110 u32 flags; 111 u32 rqno; 112 u32 nents; 113 114 struct nvme_fc_cmd_iu cmd_iu; 115 struct nvme_fc_ersp_iu rsp_iu; 116 }; 117 118 struct nvme_fc_lport { 119 struct nvme_fc_local_port localport; 120 121 struct ida endp_cnt; 122 struct list_head port_list; /* nvme_fc_port_list */ 123 struct list_head endp_list; 124 struct device *dev; /* physical device for dma */ 125 struct nvme_fc_port_template *ops; 126 struct kref ref; 127 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 128 129 struct nvme_fc_rport { 130 struct nvme_fc_remote_port remoteport; 131 132 struct list_head endp_list; /* for lport->endp_list */ 133 struct list_head ctrl_list; 134 struct list_head ls_req_list; 135 struct device *dev; /* physical device for dma */ 136 struct nvme_fc_lport *lport; 137 spinlock_t lock; 138 struct kref ref; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 queue_count; 152 u32 cnum; 153 154 u64 association_id; 155 156 u64 cap; 157 158 struct list_head ctrl_list; /* rport->ctrl_list */ 159 160 struct blk_mq_tag_set admin_tag_set; 161 struct blk_mq_tag_set tag_set; 162 163 struct work_struct delete_work; 164 struct work_struct reset_work; 165 struct delayed_work connect_work; 166 167 struct kref ref; 168 u32 flags; 169 u32 iocnt; 170 171 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; 172 173 struct nvme_ctrl ctrl; 174 }; 175 176 static inline struct nvme_fc_ctrl * 177 to_fc_ctrl(struct nvme_ctrl *ctrl) 178 { 179 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 180 } 181 182 static inline struct nvme_fc_lport * 183 localport_to_lport(struct nvme_fc_local_port *portptr) 184 { 185 return container_of(portptr, struct nvme_fc_lport, localport); 186 } 187 188 static inline struct nvme_fc_rport * 189 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 190 { 191 return container_of(portptr, struct nvme_fc_rport, remoteport); 192 } 193 194 static inline struct nvmefc_ls_req_op * 195 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 196 { 197 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 198 } 199 200 static inline struct nvme_fc_fcp_op * 201 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 202 { 203 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 204 } 205 206 207 208 /* *************************** Globals **************************** */ 209 210 211 static DEFINE_SPINLOCK(nvme_fc_lock); 212 213 static LIST_HEAD(nvme_fc_lport_list); 214 static DEFINE_IDA(nvme_fc_local_port_cnt); 215 static DEFINE_IDA(nvme_fc_ctrl_cnt); 216 217 static struct workqueue_struct *nvme_fc_wq; 218 219 220 221 /* *********************** FC-NVME Port Management ************************ */ 222 223 static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); 224 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 225 struct nvme_fc_queue *, unsigned int); 226 227 228 /** 229 * nvme_fc_register_localport - transport entry point called by an 230 * LLDD to register the existence of a NVME 231 * host FC port. 232 * @pinfo: pointer to information about the port to be registered 233 * @template: LLDD entrypoints and operational parameters for the port 234 * @dev: physical hardware device node port corresponds to. Will be 235 * used for DMA mappings 236 * @lport_p: pointer to a local port pointer. Upon success, the routine 237 * will allocate a nvme_fc_local_port structure and place its 238 * address in the local port pointer. Upon failure, local port 239 * pointer will be set to 0. 240 * 241 * Returns: 242 * a completion status. Must be 0 upon success; a negative errno 243 * (ex: -ENXIO) upon failure. 244 */ 245 int 246 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 247 struct nvme_fc_port_template *template, 248 struct device *dev, 249 struct nvme_fc_local_port **portptr) 250 { 251 struct nvme_fc_lport *newrec; 252 unsigned long flags; 253 int ret, idx; 254 255 if (!template->localport_delete || !template->remoteport_delete || 256 !template->ls_req || !template->fcp_io || 257 !template->ls_abort || !template->fcp_abort || 258 !template->max_hw_queues || !template->max_sgl_segments || 259 !template->max_dif_sgl_segments || !template->dma_boundary) { 260 ret = -EINVAL; 261 goto out_reghost_failed; 262 } 263 264 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 265 GFP_KERNEL); 266 if (!newrec) { 267 ret = -ENOMEM; 268 goto out_reghost_failed; 269 } 270 271 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 272 if (idx < 0) { 273 ret = -ENOSPC; 274 goto out_fail_kfree; 275 } 276 277 if (!get_device(dev) && dev) { 278 ret = -ENODEV; 279 goto out_ida_put; 280 } 281 282 INIT_LIST_HEAD(&newrec->port_list); 283 INIT_LIST_HEAD(&newrec->endp_list); 284 kref_init(&newrec->ref); 285 newrec->ops = template; 286 newrec->dev = dev; 287 ida_init(&newrec->endp_cnt); 288 newrec->localport.private = &newrec[1]; 289 newrec->localport.node_name = pinfo->node_name; 290 newrec->localport.port_name = pinfo->port_name; 291 newrec->localport.port_role = pinfo->port_role; 292 newrec->localport.port_id = pinfo->port_id; 293 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 294 newrec->localport.port_num = idx; 295 296 spin_lock_irqsave(&nvme_fc_lock, flags); 297 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 298 spin_unlock_irqrestore(&nvme_fc_lock, flags); 299 300 if (dev) 301 dma_set_seg_boundary(dev, template->dma_boundary); 302 303 *portptr = &newrec->localport; 304 return 0; 305 306 out_ida_put: 307 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 308 out_fail_kfree: 309 kfree(newrec); 310 out_reghost_failed: 311 *portptr = NULL; 312 313 return ret; 314 } 315 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 316 317 static void 318 nvme_fc_free_lport(struct kref *ref) 319 { 320 struct nvme_fc_lport *lport = 321 container_of(ref, struct nvme_fc_lport, ref); 322 unsigned long flags; 323 324 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 325 WARN_ON(!list_empty(&lport->endp_list)); 326 327 /* remove from transport list */ 328 spin_lock_irqsave(&nvme_fc_lock, flags); 329 list_del(&lport->port_list); 330 spin_unlock_irqrestore(&nvme_fc_lock, flags); 331 332 /* let the LLDD know we've finished tearing it down */ 333 lport->ops->localport_delete(&lport->localport); 334 335 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 336 ida_destroy(&lport->endp_cnt); 337 338 put_device(lport->dev); 339 340 kfree(lport); 341 } 342 343 static void 344 nvme_fc_lport_put(struct nvme_fc_lport *lport) 345 { 346 kref_put(&lport->ref, nvme_fc_free_lport); 347 } 348 349 static int 350 nvme_fc_lport_get(struct nvme_fc_lport *lport) 351 { 352 return kref_get_unless_zero(&lport->ref); 353 } 354 355 /** 356 * nvme_fc_unregister_localport - transport entry point called by an 357 * LLDD to deregister/remove a previously 358 * registered a NVME host FC port. 359 * @localport: pointer to the (registered) local port that is to be 360 * deregistered. 361 * 362 * Returns: 363 * a completion status. Must be 0 upon success; a negative errno 364 * (ex: -ENXIO) upon failure. 365 */ 366 int 367 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 368 { 369 struct nvme_fc_lport *lport = localport_to_lport(portptr); 370 unsigned long flags; 371 372 if (!portptr) 373 return -EINVAL; 374 375 spin_lock_irqsave(&nvme_fc_lock, flags); 376 377 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 378 spin_unlock_irqrestore(&nvme_fc_lock, flags); 379 return -EINVAL; 380 } 381 portptr->port_state = FC_OBJSTATE_DELETED; 382 383 spin_unlock_irqrestore(&nvme_fc_lock, flags); 384 385 nvme_fc_lport_put(lport); 386 387 return 0; 388 } 389 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 390 391 /** 392 * nvme_fc_register_remoteport - transport entry point called by an 393 * LLDD to register the existence of a NVME 394 * subsystem FC port on its fabric. 395 * @localport: pointer to the (registered) local port that the remote 396 * subsystem port is connected to. 397 * @pinfo: pointer to information about the port to be registered 398 * @rport_p: pointer to a remote port pointer. Upon success, the routine 399 * will allocate a nvme_fc_remote_port structure and place its 400 * address in the remote port pointer. Upon failure, remote port 401 * pointer will be set to 0. 402 * 403 * Returns: 404 * a completion status. Must be 0 upon success; a negative errno 405 * (ex: -ENXIO) upon failure. 406 */ 407 int 408 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 409 struct nvme_fc_port_info *pinfo, 410 struct nvme_fc_remote_port **portptr) 411 { 412 struct nvme_fc_lport *lport = localport_to_lport(localport); 413 struct nvme_fc_rport *newrec; 414 unsigned long flags; 415 int ret, idx; 416 417 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 418 GFP_KERNEL); 419 if (!newrec) { 420 ret = -ENOMEM; 421 goto out_reghost_failed; 422 } 423 424 if (!nvme_fc_lport_get(lport)) { 425 ret = -ESHUTDOWN; 426 goto out_kfree_rport; 427 } 428 429 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 430 if (idx < 0) { 431 ret = -ENOSPC; 432 goto out_lport_put; 433 } 434 435 INIT_LIST_HEAD(&newrec->endp_list); 436 INIT_LIST_HEAD(&newrec->ctrl_list); 437 INIT_LIST_HEAD(&newrec->ls_req_list); 438 kref_init(&newrec->ref); 439 spin_lock_init(&newrec->lock); 440 newrec->remoteport.localport = &lport->localport; 441 newrec->dev = lport->dev; 442 newrec->lport = lport; 443 newrec->remoteport.private = &newrec[1]; 444 newrec->remoteport.port_role = pinfo->port_role; 445 newrec->remoteport.node_name = pinfo->node_name; 446 newrec->remoteport.port_name = pinfo->port_name; 447 newrec->remoteport.port_id = pinfo->port_id; 448 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 449 newrec->remoteport.port_num = idx; 450 451 spin_lock_irqsave(&nvme_fc_lock, flags); 452 list_add_tail(&newrec->endp_list, &lport->endp_list); 453 spin_unlock_irqrestore(&nvme_fc_lock, flags); 454 455 *portptr = &newrec->remoteport; 456 return 0; 457 458 out_lport_put: 459 nvme_fc_lport_put(lport); 460 out_kfree_rport: 461 kfree(newrec); 462 out_reghost_failed: 463 *portptr = NULL; 464 return ret; 465 } 466 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 467 468 static void 469 nvme_fc_free_rport(struct kref *ref) 470 { 471 struct nvme_fc_rport *rport = 472 container_of(ref, struct nvme_fc_rport, ref); 473 struct nvme_fc_lport *lport = 474 localport_to_lport(rport->remoteport.localport); 475 unsigned long flags; 476 477 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 478 WARN_ON(!list_empty(&rport->ctrl_list)); 479 480 /* remove from lport list */ 481 spin_lock_irqsave(&nvme_fc_lock, flags); 482 list_del(&rport->endp_list); 483 spin_unlock_irqrestore(&nvme_fc_lock, flags); 484 485 /* let the LLDD know we've finished tearing it down */ 486 lport->ops->remoteport_delete(&rport->remoteport); 487 488 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 489 490 kfree(rport); 491 492 nvme_fc_lport_put(lport); 493 } 494 495 static void 496 nvme_fc_rport_put(struct nvme_fc_rport *rport) 497 { 498 kref_put(&rport->ref, nvme_fc_free_rport); 499 } 500 501 static int 502 nvme_fc_rport_get(struct nvme_fc_rport *rport) 503 { 504 return kref_get_unless_zero(&rport->ref); 505 } 506 507 static int 508 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 509 { 510 struct nvmefc_ls_req_op *lsop; 511 unsigned long flags; 512 513 restart: 514 spin_lock_irqsave(&rport->lock, flags); 515 516 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 517 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 518 lsop->flags |= FCOP_FLAGS_TERMIO; 519 spin_unlock_irqrestore(&rport->lock, flags); 520 rport->lport->ops->ls_abort(&rport->lport->localport, 521 &rport->remoteport, 522 &lsop->ls_req); 523 goto restart; 524 } 525 } 526 spin_unlock_irqrestore(&rport->lock, flags); 527 528 return 0; 529 } 530 531 /** 532 * nvme_fc_unregister_remoteport - transport entry point called by an 533 * LLDD to deregister/remove a previously 534 * registered a NVME subsystem FC port. 535 * @remoteport: pointer to the (registered) remote port that is to be 536 * deregistered. 537 * 538 * Returns: 539 * a completion status. Must be 0 upon success; a negative errno 540 * (ex: -ENXIO) upon failure. 541 */ 542 int 543 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 544 { 545 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 546 struct nvme_fc_ctrl *ctrl; 547 unsigned long flags; 548 549 if (!portptr) 550 return -EINVAL; 551 552 spin_lock_irqsave(&rport->lock, flags); 553 554 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 555 spin_unlock_irqrestore(&rport->lock, flags); 556 return -EINVAL; 557 } 558 portptr->port_state = FC_OBJSTATE_DELETED; 559 560 /* tear down all associations to the remote port */ 561 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 562 __nvme_fc_del_ctrl(ctrl); 563 564 spin_unlock_irqrestore(&rport->lock, flags); 565 566 nvme_fc_abort_lsops(rport); 567 568 nvme_fc_rport_put(rport); 569 return 0; 570 } 571 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 572 573 574 /* *********************** FC-NVME DMA Handling **************************** */ 575 576 /* 577 * The fcloop device passes in a NULL device pointer. Real LLD's will 578 * pass in a valid device pointer. If NULL is passed to the dma mapping 579 * routines, depending on the platform, it may or may not succeed, and 580 * may crash. 581 * 582 * As such: 583 * Wrapper all the dma routines and check the dev pointer. 584 * 585 * If simple mappings (return just a dma address, we'll noop them, 586 * returning a dma address of 0. 587 * 588 * On more complex mappings (dma_map_sg), a pseudo routine fills 589 * in the scatter list, setting all dma addresses to 0. 590 */ 591 592 static inline dma_addr_t 593 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 594 enum dma_data_direction dir) 595 { 596 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 597 } 598 599 static inline int 600 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 601 { 602 return dev ? dma_mapping_error(dev, dma_addr) : 0; 603 } 604 605 static inline void 606 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 607 enum dma_data_direction dir) 608 { 609 if (dev) 610 dma_unmap_single(dev, addr, size, dir); 611 } 612 613 static inline void 614 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 615 enum dma_data_direction dir) 616 { 617 if (dev) 618 dma_sync_single_for_cpu(dev, addr, size, dir); 619 } 620 621 static inline void 622 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 623 enum dma_data_direction dir) 624 { 625 if (dev) 626 dma_sync_single_for_device(dev, addr, size, dir); 627 } 628 629 /* pseudo dma_map_sg call */ 630 static int 631 fc_map_sg(struct scatterlist *sg, int nents) 632 { 633 struct scatterlist *s; 634 int i; 635 636 WARN_ON(nents == 0 || sg[0].length == 0); 637 638 for_each_sg(sg, s, nents, i) { 639 s->dma_address = 0L; 640 #ifdef CONFIG_NEED_SG_DMA_LENGTH 641 s->dma_length = s->length; 642 #endif 643 } 644 return nents; 645 } 646 647 static inline int 648 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 649 enum dma_data_direction dir) 650 { 651 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 652 } 653 654 static inline void 655 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 656 enum dma_data_direction dir) 657 { 658 if (dev) 659 dma_unmap_sg(dev, sg, nents, dir); 660 } 661 662 663 /* *********************** FC-NVME LS Handling **************************** */ 664 665 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 666 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 667 668 669 static void 670 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 671 { 672 struct nvme_fc_rport *rport = lsop->rport; 673 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 674 unsigned long flags; 675 676 spin_lock_irqsave(&rport->lock, flags); 677 678 if (!lsop->req_queued) { 679 spin_unlock_irqrestore(&rport->lock, flags); 680 return; 681 } 682 683 list_del(&lsop->lsreq_list); 684 685 lsop->req_queued = false; 686 687 spin_unlock_irqrestore(&rport->lock, flags); 688 689 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 690 (lsreq->rqstlen + lsreq->rsplen), 691 DMA_BIDIRECTIONAL); 692 693 nvme_fc_rport_put(rport); 694 } 695 696 static int 697 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 698 struct nvmefc_ls_req_op *lsop, 699 void (*done)(struct nvmefc_ls_req *req, int status)) 700 { 701 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 702 unsigned long flags; 703 int ret = 0; 704 705 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 706 return -ECONNREFUSED; 707 708 if (!nvme_fc_rport_get(rport)) 709 return -ESHUTDOWN; 710 711 lsreq->done = done; 712 lsop->rport = rport; 713 lsop->req_queued = false; 714 INIT_LIST_HEAD(&lsop->lsreq_list); 715 init_completion(&lsop->ls_done); 716 717 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 718 lsreq->rqstlen + lsreq->rsplen, 719 DMA_BIDIRECTIONAL); 720 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 721 ret = -EFAULT; 722 goto out_putrport; 723 } 724 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 725 726 spin_lock_irqsave(&rport->lock, flags); 727 728 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 729 730 lsop->req_queued = true; 731 732 spin_unlock_irqrestore(&rport->lock, flags); 733 734 ret = rport->lport->ops->ls_req(&rport->lport->localport, 735 &rport->remoteport, lsreq); 736 if (ret) 737 goto out_unlink; 738 739 return 0; 740 741 out_unlink: 742 lsop->ls_error = ret; 743 spin_lock_irqsave(&rport->lock, flags); 744 lsop->req_queued = false; 745 list_del(&lsop->lsreq_list); 746 spin_unlock_irqrestore(&rport->lock, flags); 747 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 748 (lsreq->rqstlen + lsreq->rsplen), 749 DMA_BIDIRECTIONAL); 750 out_putrport: 751 nvme_fc_rport_put(rport); 752 753 return ret; 754 } 755 756 static void 757 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 758 { 759 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 760 761 lsop->ls_error = status; 762 complete(&lsop->ls_done); 763 } 764 765 static int 766 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 767 { 768 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 769 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 770 int ret; 771 772 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 773 774 if (!ret) { 775 /* 776 * No timeout/not interruptible as we need the struct 777 * to exist until the lldd calls us back. Thus mandate 778 * wait until driver calls back. lldd responsible for 779 * the timeout action 780 */ 781 wait_for_completion(&lsop->ls_done); 782 783 __nvme_fc_finish_ls_req(lsop); 784 785 ret = lsop->ls_error; 786 } 787 788 if (ret) 789 return ret; 790 791 /* ACC or RJT payload ? */ 792 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 793 return -ENXIO; 794 795 return 0; 796 } 797 798 static int 799 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 800 struct nvmefc_ls_req_op *lsop, 801 void (*done)(struct nvmefc_ls_req *req, int status)) 802 { 803 /* don't wait for completion */ 804 805 return __nvme_fc_send_ls_req(rport, lsop, done); 806 } 807 808 /* Validation Error indexes into the string table below */ 809 enum { 810 VERR_NO_ERROR = 0, 811 VERR_LSACC = 1, 812 VERR_LSDESC_RQST = 2, 813 VERR_LSDESC_RQST_LEN = 3, 814 VERR_ASSOC_ID = 4, 815 VERR_ASSOC_ID_LEN = 5, 816 VERR_CONN_ID = 6, 817 VERR_CONN_ID_LEN = 7, 818 VERR_CR_ASSOC = 8, 819 VERR_CR_ASSOC_ACC_LEN = 9, 820 VERR_CR_CONN = 10, 821 VERR_CR_CONN_ACC_LEN = 11, 822 VERR_DISCONN = 12, 823 VERR_DISCONN_ACC_LEN = 13, 824 }; 825 826 static char *validation_errors[] = { 827 "OK", 828 "Not LS_ACC", 829 "Not LSDESC_RQST", 830 "Bad LSDESC_RQST Length", 831 "Not Association ID", 832 "Bad Association ID Length", 833 "Not Connection ID", 834 "Bad Connection ID Length", 835 "Not CR_ASSOC Rqst", 836 "Bad CR_ASSOC ACC Length", 837 "Not CR_CONN Rqst", 838 "Bad CR_CONN ACC Length", 839 "Not Disconnect Rqst", 840 "Bad Disconnect ACC Length", 841 }; 842 843 static int 844 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 845 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 846 { 847 struct nvmefc_ls_req_op *lsop; 848 struct nvmefc_ls_req *lsreq; 849 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 850 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 851 int ret, fcret = 0; 852 853 lsop = kzalloc((sizeof(*lsop) + 854 ctrl->lport->ops->lsrqst_priv_sz + 855 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 856 if (!lsop) { 857 ret = -ENOMEM; 858 goto out_no_memory; 859 } 860 lsreq = &lsop->ls_req; 861 862 lsreq->private = (void *)&lsop[1]; 863 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 864 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 865 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 866 867 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 868 assoc_rqst->desc_list_len = 869 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 870 871 assoc_rqst->assoc_cmd.desc_tag = 872 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 873 assoc_rqst->assoc_cmd.desc_len = 874 fcnvme_lsdesc_len( 875 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 876 877 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 878 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); 879 /* Linux supports only Dynamic controllers */ 880 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 881 memcpy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id, 882 min_t(size_t, FCNVME_ASSOC_HOSTID_LEN, sizeof(uuid_be))); 883 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 884 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 885 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 886 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 887 888 lsop->queue = queue; 889 lsreq->rqstaddr = assoc_rqst; 890 lsreq->rqstlen = sizeof(*assoc_rqst); 891 lsreq->rspaddr = assoc_acc; 892 lsreq->rsplen = sizeof(*assoc_acc); 893 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 894 895 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 896 if (ret) 897 goto out_free_buffer; 898 899 /* process connect LS completion */ 900 901 /* validate the ACC response */ 902 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 903 fcret = VERR_LSACC; 904 else if (assoc_acc->hdr.desc_list_len != 905 fcnvme_lsdesc_len( 906 sizeof(struct fcnvme_ls_cr_assoc_acc))) 907 fcret = VERR_CR_ASSOC_ACC_LEN; 908 else if (assoc_acc->hdr.rqst.desc_tag != 909 cpu_to_be32(FCNVME_LSDESC_RQST)) 910 fcret = VERR_LSDESC_RQST; 911 else if (assoc_acc->hdr.rqst.desc_len != 912 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 913 fcret = VERR_LSDESC_RQST_LEN; 914 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 915 fcret = VERR_CR_ASSOC; 916 else if (assoc_acc->associd.desc_tag != 917 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 918 fcret = VERR_ASSOC_ID; 919 else if (assoc_acc->associd.desc_len != 920 fcnvme_lsdesc_len( 921 sizeof(struct fcnvme_lsdesc_assoc_id))) 922 fcret = VERR_ASSOC_ID_LEN; 923 else if (assoc_acc->connectid.desc_tag != 924 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 925 fcret = VERR_CONN_ID; 926 else if (assoc_acc->connectid.desc_len != 927 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 928 fcret = VERR_CONN_ID_LEN; 929 930 if (fcret) { 931 ret = -EBADF; 932 dev_err(ctrl->dev, 933 "q %d connect failed: %s\n", 934 queue->qnum, validation_errors[fcret]); 935 } else { 936 ctrl->association_id = 937 be64_to_cpu(assoc_acc->associd.association_id); 938 queue->connection_id = 939 be64_to_cpu(assoc_acc->connectid.connection_id); 940 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 941 } 942 943 out_free_buffer: 944 kfree(lsop); 945 out_no_memory: 946 if (ret) 947 dev_err(ctrl->dev, 948 "queue %d connect admin queue failed (%d).\n", 949 queue->qnum, ret); 950 return ret; 951 } 952 953 static int 954 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 955 u16 qsize, u16 ersp_ratio) 956 { 957 struct nvmefc_ls_req_op *lsop; 958 struct nvmefc_ls_req *lsreq; 959 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 960 struct fcnvme_ls_cr_conn_acc *conn_acc; 961 int ret, fcret = 0; 962 963 lsop = kzalloc((sizeof(*lsop) + 964 ctrl->lport->ops->lsrqst_priv_sz + 965 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 966 if (!lsop) { 967 ret = -ENOMEM; 968 goto out_no_memory; 969 } 970 lsreq = &lsop->ls_req; 971 972 lsreq->private = (void *)&lsop[1]; 973 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 974 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 975 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 976 977 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 978 conn_rqst->desc_list_len = cpu_to_be32( 979 sizeof(struct fcnvme_lsdesc_assoc_id) + 980 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 981 982 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 983 conn_rqst->associd.desc_len = 984 fcnvme_lsdesc_len( 985 sizeof(struct fcnvme_lsdesc_assoc_id)); 986 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 987 conn_rqst->connect_cmd.desc_tag = 988 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 989 conn_rqst->connect_cmd.desc_len = 990 fcnvme_lsdesc_len( 991 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 992 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 993 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 994 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); 995 996 lsop->queue = queue; 997 lsreq->rqstaddr = conn_rqst; 998 lsreq->rqstlen = sizeof(*conn_rqst); 999 lsreq->rspaddr = conn_acc; 1000 lsreq->rsplen = sizeof(*conn_acc); 1001 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1002 1003 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1004 if (ret) 1005 goto out_free_buffer; 1006 1007 /* process connect LS completion */ 1008 1009 /* validate the ACC response */ 1010 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1011 fcret = VERR_LSACC; 1012 else if (conn_acc->hdr.desc_list_len != 1013 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1014 fcret = VERR_CR_CONN_ACC_LEN; 1015 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1016 fcret = VERR_LSDESC_RQST; 1017 else if (conn_acc->hdr.rqst.desc_len != 1018 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1019 fcret = VERR_LSDESC_RQST_LEN; 1020 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1021 fcret = VERR_CR_CONN; 1022 else if (conn_acc->connectid.desc_tag != 1023 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1024 fcret = VERR_CONN_ID; 1025 else if (conn_acc->connectid.desc_len != 1026 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1027 fcret = VERR_CONN_ID_LEN; 1028 1029 if (fcret) { 1030 ret = -EBADF; 1031 dev_err(ctrl->dev, 1032 "q %d connect failed: %s\n", 1033 queue->qnum, validation_errors[fcret]); 1034 } else { 1035 queue->connection_id = 1036 be64_to_cpu(conn_acc->connectid.connection_id); 1037 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1038 } 1039 1040 out_free_buffer: 1041 kfree(lsop); 1042 out_no_memory: 1043 if (ret) 1044 dev_err(ctrl->dev, 1045 "queue %d connect command failed (%d).\n", 1046 queue->qnum, ret); 1047 return ret; 1048 } 1049 1050 static void 1051 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1052 { 1053 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1054 1055 __nvme_fc_finish_ls_req(lsop); 1056 1057 /* fc-nvme iniator doesn't care about success or failure of cmd */ 1058 1059 kfree(lsop); 1060 } 1061 1062 /* 1063 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1064 * the FC-NVME Association. Terminating the association also 1065 * terminates the FC-NVME connections (per queue, both admin and io 1066 * queues) that are part of the association. E.g. things are torn 1067 * down, and the related FC-NVME Association ID and Connection IDs 1068 * become invalid. 1069 * 1070 * The behavior of the fc-nvme initiator is such that it's 1071 * understanding of the association and connections will implicitly 1072 * be torn down. The action is implicit as it may be due to a loss of 1073 * connectivity with the fc-nvme target, so you may never get a 1074 * response even if you tried. As such, the action of this routine 1075 * is to asynchronously send the LS, ignore any results of the LS, and 1076 * continue on with terminating the association. If the fc-nvme target 1077 * is present and receives the LS, it too can tear down. 1078 */ 1079 static void 1080 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1081 { 1082 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1083 struct fcnvme_ls_disconnect_acc *discon_acc; 1084 struct nvmefc_ls_req_op *lsop; 1085 struct nvmefc_ls_req *lsreq; 1086 int ret; 1087 1088 lsop = kzalloc((sizeof(*lsop) + 1089 ctrl->lport->ops->lsrqst_priv_sz + 1090 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1091 GFP_KERNEL); 1092 if (!lsop) 1093 /* couldn't sent it... too bad */ 1094 return; 1095 1096 lsreq = &lsop->ls_req; 1097 1098 lsreq->private = (void *)&lsop[1]; 1099 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1100 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1101 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1102 1103 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1104 discon_rqst->desc_list_len = cpu_to_be32( 1105 sizeof(struct fcnvme_lsdesc_assoc_id) + 1106 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1107 1108 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1109 discon_rqst->associd.desc_len = 1110 fcnvme_lsdesc_len( 1111 sizeof(struct fcnvme_lsdesc_assoc_id)); 1112 1113 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1114 1115 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1116 FCNVME_LSDESC_DISCONN_CMD); 1117 discon_rqst->discon_cmd.desc_len = 1118 fcnvme_lsdesc_len( 1119 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1120 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1121 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1122 1123 lsreq->rqstaddr = discon_rqst; 1124 lsreq->rqstlen = sizeof(*discon_rqst); 1125 lsreq->rspaddr = discon_acc; 1126 lsreq->rsplen = sizeof(*discon_acc); 1127 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1128 1129 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1130 nvme_fc_disconnect_assoc_done); 1131 if (ret) 1132 kfree(lsop); 1133 1134 /* only meaningful part to terminating the association */ 1135 ctrl->association_id = 0; 1136 } 1137 1138 1139 /* *********************** NVME Ctrl Routines **************************** */ 1140 1141 static void __nvme_fc_final_op_cleanup(struct request *rq); 1142 1143 static int 1144 nvme_fc_reinit_request(void *data, struct request *rq) 1145 { 1146 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1147 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1148 1149 memset(cmdiu, 0, sizeof(*cmdiu)); 1150 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1151 cmdiu->fc_id = NVME_CMD_FC_ID; 1152 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1153 memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); 1154 1155 return 0; 1156 } 1157 1158 static void 1159 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1160 struct nvme_fc_fcp_op *op) 1161 { 1162 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1163 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1164 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1165 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1166 1167 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1168 } 1169 1170 static void 1171 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1172 unsigned int hctx_idx) 1173 { 1174 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1175 1176 return __nvme_fc_exit_request(set->driver_data, op); 1177 } 1178 1179 static int 1180 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1181 { 1182 int state; 1183 1184 state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1185 if (state != FCPOP_STATE_ACTIVE) { 1186 atomic_set(&op->state, state); 1187 return -ECANCELED; 1188 } 1189 1190 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1191 &ctrl->rport->remoteport, 1192 op->queue->lldd_handle, 1193 &op->fcp_req); 1194 1195 return 0; 1196 } 1197 1198 static void 1199 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1200 { 1201 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1202 unsigned long flags; 1203 int i, ret; 1204 1205 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1206 if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) 1207 continue; 1208 1209 spin_lock_irqsave(&ctrl->lock, flags); 1210 if (ctrl->flags & FCCTRL_TERMIO) { 1211 ctrl->iocnt++; 1212 aen_op->flags |= FCOP_FLAGS_TERMIO; 1213 } 1214 spin_unlock_irqrestore(&ctrl->lock, flags); 1215 1216 ret = __nvme_fc_abort_op(ctrl, aen_op); 1217 if (ret) { 1218 /* 1219 * if __nvme_fc_abort_op failed the io wasn't 1220 * active. Thus this call path is running in 1221 * parallel to the io complete. Treat as non-error. 1222 */ 1223 1224 /* back out the flags/counters */ 1225 spin_lock_irqsave(&ctrl->lock, flags); 1226 if (ctrl->flags & FCCTRL_TERMIO) 1227 ctrl->iocnt--; 1228 aen_op->flags &= ~FCOP_FLAGS_TERMIO; 1229 spin_unlock_irqrestore(&ctrl->lock, flags); 1230 return; 1231 } 1232 } 1233 } 1234 1235 static inline int 1236 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1237 struct nvme_fc_fcp_op *op) 1238 { 1239 unsigned long flags; 1240 bool complete_rq = false; 1241 1242 spin_lock_irqsave(&ctrl->lock, flags); 1243 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1244 if (ctrl->flags & FCCTRL_TERMIO) 1245 ctrl->iocnt--; 1246 } 1247 if (op->flags & FCOP_FLAGS_RELEASED) 1248 complete_rq = true; 1249 else 1250 op->flags |= FCOP_FLAGS_COMPLETE; 1251 spin_unlock_irqrestore(&ctrl->lock, flags); 1252 1253 return complete_rq; 1254 } 1255 1256 static void 1257 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1258 { 1259 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1260 struct request *rq = op->rq; 1261 struct nvmefc_fcp_req *freq = &op->fcp_req; 1262 struct nvme_fc_ctrl *ctrl = op->ctrl; 1263 struct nvme_fc_queue *queue = op->queue; 1264 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1265 struct nvme_command *sqe = &op->cmd_iu.sqe; 1266 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1267 union nvme_result result; 1268 bool complete_rq; 1269 1270 /* 1271 * WARNING: 1272 * The current linux implementation of a nvme controller 1273 * allocates a single tag set for all io queues and sizes 1274 * the io queues to fully hold all possible tags. Thus, the 1275 * implementation does not reference or care about the sqhd 1276 * value as it never needs to use the sqhd/sqtail pointers 1277 * for submission pacing. 1278 * 1279 * This affects the FC-NVME implementation in two ways: 1280 * 1) As the value doesn't matter, we don't need to waste 1281 * cycles extracting it from ERSPs and stamping it in the 1282 * cases where the transport fabricates CQEs on successful 1283 * completions. 1284 * 2) The FC-NVME implementation requires that delivery of 1285 * ERSP completions are to go back to the nvme layer in order 1286 * relative to the rsn, such that the sqhd value will always 1287 * be "in order" for the nvme layer. As the nvme layer in 1288 * linux doesn't care about sqhd, there's no need to return 1289 * them in order. 1290 * 1291 * Additionally: 1292 * As the core nvme layer in linux currently does not look at 1293 * every field in the cqe - in cases where the FC transport must 1294 * fabricate a CQE, the following fields will not be set as they 1295 * are not referenced: 1296 * cqe.sqid, cqe.sqhd, cqe.command_id 1297 */ 1298 1299 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1300 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1301 1302 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1303 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1304 else if (freq->status) 1305 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1306 1307 /* 1308 * For the linux implementation, if we have an unsuccesful 1309 * status, they blk-mq layer can typically be called with the 1310 * non-zero status and the content of the cqe isn't important. 1311 */ 1312 if (status) 1313 goto done; 1314 1315 /* 1316 * command completed successfully relative to the wire 1317 * protocol. However, validate anything received and 1318 * extract the status and result from the cqe (create it 1319 * where necessary). 1320 */ 1321 1322 switch (freq->rcv_rsplen) { 1323 1324 case 0: 1325 case NVME_FC_SIZEOF_ZEROS_RSP: 1326 /* 1327 * No response payload or 12 bytes of payload (which 1328 * should all be zeros) are considered successful and 1329 * no payload in the CQE by the transport. 1330 */ 1331 if (freq->transferred_length != 1332 be32_to_cpu(op->cmd_iu.data_len)) { 1333 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1334 goto done; 1335 } 1336 result.u64 = 0; 1337 break; 1338 1339 case sizeof(struct nvme_fc_ersp_iu): 1340 /* 1341 * The ERSP IU contains a full completion with CQE. 1342 * Validate ERSP IU and look at cqe. 1343 */ 1344 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1345 (freq->rcv_rsplen / 4) || 1346 be32_to_cpu(op->rsp_iu.xfrd_len) != 1347 freq->transferred_length || 1348 op->rsp_iu.status_code || 1349 sqe->common.command_id != cqe->command_id)) { 1350 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1351 goto done; 1352 } 1353 result = cqe->result; 1354 status = cqe->status; 1355 break; 1356 1357 default: 1358 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1359 goto done; 1360 } 1361 1362 done: 1363 if (op->flags & FCOP_FLAGS_AEN) { 1364 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1365 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1366 atomic_set(&op->state, FCPOP_STATE_IDLE); 1367 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1368 nvme_fc_ctrl_put(ctrl); 1369 return; 1370 } 1371 1372 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1373 if (!complete_rq) { 1374 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1375 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1376 if (blk_queue_dying(rq->q)) 1377 status |= cpu_to_le16(NVME_SC_DNR << 1); 1378 } 1379 nvme_end_request(rq, status, result); 1380 } else 1381 __nvme_fc_final_op_cleanup(rq); 1382 } 1383 1384 static int 1385 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1386 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1387 struct request *rq, u32 rqno) 1388 { 1389 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1390 int ret = 0; 1391 1392 memset(op, 0, sizeof(*op)); 1393 op->fcp_req.cmdaddr = &op->cmd_iu; 1394 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1395 op->fcp_req.rspaddr = &op->rsp_iu; 1396 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1397 op->fcp_req.done = nvme_fc_fcpio_done; 1398 op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; 1399 op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; 1400 op->ctrl = ctrl; 1401 op->queue = queue; 1402 op->rq = rq; 1403 op->rqno = rqno; 1404 1405 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1406 cmdiu->fc_id = NVME_CMD_FC_ID; 1407 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1408 1409 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1410 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1411 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1412 dev_err(ctrl->dev, 1413 "FCP Op failed - cmdiu dma mapping failed.\n"); 1414 ret = EFAULT; 1415 goto out_on_error; 1416 } 1417 1418 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1419 &op->rsp_iu, sizeof(op->rsp_iu), 1420 DMA_FROM_DEVICE); 1421 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1422 dev_err(ctrl->dev, 1423 "FCP Op failed - rspiu dma mapping failed.\n"); 1424 ret = EFAULT; 1425 } 1426 1427 atomic_set(&op->state, FCPOP_STATE_IDLE); 1428 out_on_error: 1429 return ret; 1430 } 1431 1432 static int 1433 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1434 unsigned int hctx_idx, unsigned int numa_node) 1435 { 1436 struct nvme_fc_ctrl *ctrl = set->driver_data; 1437 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1438 struct nvme_fc_queue *queue = &ctrl->queues[hctx_idx+1]; 1439 1440 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1441 } 1442 1443 static int 1444 nvme_fc_init_admin_request(struct blk_mq_tag_set *set, struct request *rq, 1445 unsigned int hctx_idx, unsigned int numa_node) 1446 { 1447 struct nvme_fc_ctrl *ctrl = set->driver_data; 1448 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1449 struct nvme_fc_queue *queue = &ctrl->queues[0]; 1450 1451 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1452 } 1453 1454 static int 1455 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1456 { 1457 struct nvme_fc_fcp_op *aen_op; 1458 struct nvme_fc_cmd_iu *cmdiu; 1459 struct nvme_command *sqe; 1460 void *private; 1461 int i, ret; 1462 1463 aen_op = ctrl->aen_ops; 1464 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1465 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1466 GFP_KERNEL); 1467 if (!private) 1468 return -ENOMEM; 1469 1470 cmdiu = &aen_op->cmd_iu; 1471 sqe = &cmdiu->sqe; 1472 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1473 aen_op, (struct request *)NULL, 1474 (AEN_CMDID_BASE + i)); 1475 if (ret) { 1476 kfree(private); 1477 return ret; 1478 } 1479 1480 aen_op->flags = FCOP_FLAGS_AEN; 1481 aen_op->fcp_req.first_sgl = NULL; /* no sg list */ 1482 aen_op->fcp_req.private = private; 1483 1484 memset(sqe, 0, sizeof(*sqe)); 1485 sqe->common.opcode = nvme_admin_async_event; 1486 /* Note: core layer may overwrite the sqe.command_id value */ 1487 sqe->common.command_id = AEN_CMDID_BASE + i; 1488 } 1489 return 0; 1490 } 1491 1492 static void 1493 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1494 { 1495 struct nvme_fc_fcp_op *aen_op; 1496 int i; 1497 1498 aen_op = ctrl->aen_ops; 1499 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1500 if (!aen_op->fcp_req.private) 1501 continue; 1502 1503 __nvme_fc_exit_request(ctrl, aen_op); 1504 1505 kfree(aen_op->fcp_req.private); 1506 aen_op->fcp_req.private = NULL; 1507 } 1508 } 1509 1510 static inline void 1511 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1512 unsigned int qidx) 1513 { 1514 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1515 1516 hctx->driver_data = queue; 1517 queue->hctx = hctx; 1518 } 1519 1520 static int 1521 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1522 unsigned int hctx_idx) 1523 { 1524 struct nvme_fc_ctrl *ctrl = data; 1525 1526 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1527 1528 return 0; 1529 } 1530 1531 static int 1532 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1533 unsigned int hctx_idx) 1534 { 1535 struct nvme_fc_ctrl *ctrl = data; 1536 1537 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1538 1539 return 0; 1540 } 1541 1542 static void 1543 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) 1544 { 1545 struct nvme_fc_queue *queue; 1546 1547 queue = &ctrl->queues[idx]; 1548 memset(queue, 0, sizeof(*queue)); 1549 queue->ctrl = ctrl; 1550 queue->qnum = idx; 1551 atomic_set(&queue->csn, 1); 1552 queue->dev = ctrl->dev; 1553 1554 if (idx > 0) 1555 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1556 else 1557 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1558 1559 queue->queue_size = queue_size; 1560 1561 /* 1562 * Considered whether we should allocate buffers for all SQEs 1563 * and CQEs and dma map them - mapping their respective entries 1564 * into the request structures (kernel vm addr and dma address) 1565 * thus the driver could use the buffers/mappings directly. 1566 * It only makes sense if the LLDD would use them for its 1567 * messaging api. It's very unlikely most adapter api's would use 1568 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1569 * structures were used instead. 1570 */ 1571 } 1572 1573 /* 1574 * This routine terminates a queue at the transport level. 1575 * The transport has already ensured that all outstanding ios on 1576 * the queue have been terminated. 1577 * The transport will send a Disconnect LS request to terminate 1578 * the queue's connection. Termination of the admin queue will also 1579 * terminate the association at the target. 1580 */ 1581 static void 1582 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1583 { 1584 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1585 return; 1586 1587 /* 1588 * Current implementation never disconnects a single queue. 1589 * It always terminates a whole association. So there is never 1590 * a disconnect(queue) LS sent to the target. 1591 */ 1592 1593 queue->connection_id = 0; 1594 clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1595 } 1596 1597 static void 1598 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1599 struct nvme_fc_queue *queue, unsigned int qidx) 1600 { 1601 if (ctrl->lport->ops->delete_queue) 1602 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1603 queue->lldd_handle); 1604 queue->lldd_handle = NULL; 1605 } 1606 1607 static void 1608 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1609 { 1610 int i; 1611 1612 for (i = 1; i < ctrl->queue_count; i++) 1613 nvme_fc_free_queue(&ctrl->queues[i]); 1614 } 1615 1616 static int 1617 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1618 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1619 { 1620 int ret = 0; 1621 1622 queue->lldd_handle = NULL; 1623 if (ctrl->lport->ops->create_queue) 1624 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1625 qidx, qsize, &queue->lldd_handle); 1626 1627 return ret; 1628 } 1629 1630 static void 1631 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1632 { 1633 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->queue_count - 1]; 1634 int i; 1635 1636 for (i = ctrl->queue_count - 1; i >= 1; i--, queue--) 1637 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1638 } 1639 1640 static int 1641 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1642 { 1643 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1644 int i, ret; 1645 1646 for (i = 1; i < ctrl->queue_count; i++, queue++) { 1647 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1648 if (ret) 1649 goto delete_queues; 1650 } 1651 1652 return 0; 1653 1654 delete_queues: 1655 for (; i >= 0; i--) 1656 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1657 return ret; 1658 } 1659 1660 static int 1661 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1662 { 1663 int i, ret = 0; 1664 1665 for (i = 1; i < ctrl->queue_count; i++) { 1666 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1667 (qsize / 5)); 1668 if (ret) 1669 break; 1670 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 1671 if (ret) 1672 break; 1673 } 1674 1675 return ret; 1676 } 1677 1678 static void 1679 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1680 { 1681 int i; 1682 1683 for (i = 1; i < ctrl->queue_count; i++) 1684 nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); 1685 } 1686 1687 static void 1688 nvme_fc_ctrl_free(struct kref *ref) 1689 { 1690 struct nvme_fc_ctrl *ctrl = 1691 container_of(ref, struct nvme_fc_ctrl, ref); 1692 unsigned long flags; 1693 1694 if (ctrl->ctrl.tagset) { 1695 blk_cleanup_queue(ctrl->ctrl.connect_q); 1696 blk_mq_free_tag_set(&ctrl->tag_set); 1697 } 1698 1699 /* remove from rport list */ 1700 spin_lock_irqsave(&ctrl->rport->lock, flags); 1701 list_del(&ctrl->ctrl_list); 1702 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 1703 1704 blk_cleanup_queue(ctrl->ctrl.admin_q); 1705 blk_mq_free_tag_set(&ctrl->admin_tag_set); 1706 1707 kfree(ctrl->queues); 1708 1709 put_device(ctrl->dev); 1710 nvme_fc_rport_put(ctrl->rport); 1711 1712 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 1713 if (ctrl->ctrl.opts) 1714 nvmf_free_options(ctrl->ctrl.opts); 1715 kfree(ctrl); 1716 } 1717 1718 static void 1719 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 1720 { 1721 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 1722 } 1723 1724 static int 1725 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 1726 { 1727 return kref_get_unless_zero(&ctrl->ref); 1728 } 1729 1730 /* 1731 * All accesses from nvme core layer done - can now free the 1732 * controller. Called after last nvme_put_ctrl() call 1733 */ 1734 static void 1735 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 1736 { 1737 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 1738 1739 WARN_ON(nctrl != &ctrl->ctrl); 1740 1741 nvme_fc_ctrl_put(ctrl); 1742 } 1743 1744 static void 1745 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 1746 { 1747 dev_warn(ctrl->ctrl.device, 1748 "NVME-FC{%d}: transport association error detected: %s\n", 1749 ctrl->cnum, errmsg); 1750 dev_warn(ctrl->ctrl.device, 1751 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 1752 1753 /* stop the queues on error, cleanup is in reset thread */ 1754 if (ctrl->queue_count > 1) 1755 nvme_stop_queues(&ctrl->ctrl); 1756 1757 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1758 dev_err(ctrl->ctrl.device, 1759 "NVME-FC{%d}: error_recovery: Couldn't change state " 1760 "to RECONNECTING\n", ctrl->cnum); 1761 return; 1762 } 1763 1764 if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) 1765 dev_err(ctrl->ctrl.device, 1766 "NVME-FC{%d}: error_recovery: Failed to schedule " 1767 "reset work\n", ctrl->cnum); 1768 } 1769 1770 static enum blk_eh_timer_return 1771 nvme_fc_timeout(struct request *rq, bool reserved) 1772 { 1773 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1774 struct nvme_fc_ctrl *ctrl = op->ctrl; 1775 int ret; 1776 1777 if (reserved) 1778 return BLK_EH_RESET_TIMER; 1779 1780 ret = __nvme_fc_abort_op(ctrl, op); 1781 if (ret) 1782 /* io wasn't active to abort consider it done */ 1783 return BLK_EH_HANDLED; 1784 1785 /* 1786 * we can't individually ABTS an io without affecting the queue, 1787 * thus killing the queue, adn thus the association. 1788 * So resolve by performing a controller reset, which will stop 1789 * the host/io stack, terminate the association on the link, 1790 * and recreate an association on the link. 1791 */ 1792 nvme_fc_error_recovery(ctrl, "io timeout error"); 1793 1794 return BLK_EH_HANDLED; 1795 } 1796 1797 static int 1798 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1799 struct nvme_fc_fcp_op *op) 1800 { 1801 struct nvmefc_fcp_req *freq = &op->fcp_req; 1802 enum dma_data_direction dir; 1803 int ret; 1804 1805 freq->sg_cnt = 0; 1806 1807 if (!blk_rq_payload_bytes(rq)) 1808 return 0; 1809 1810 freq->sg_table.sgl = freq->first_sgl; 1811 ret = sg_alloc_table_chained(&freq->sg_table, 1812 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 1813 if (ret) 1814 return -ENOMEM; 1815 1816 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 1817 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 1818 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 1819 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 1820 op->nents, dir); 1821 if (unlikely(freq->sg_cnt <= 0)) { 1822 sg_free_table_chained(&freq->sg_table, true); 1823 freq->sg_cnt = 0; 1824 return -EFAULT; 1825 } 1826 1827 /* 1828 * TODO: blk_integrity_rq(rq) for DIF 1829 */ 1830 return 0; 1831 } 1832 1833 static void 1834 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1835 struct nvme_fc_fcp_op *op) 1836 { 1837 struct nvmefc_fcp_req *freq = &op->fcp_req; 1838 1839 if (!freq->sg_cnt) 1840 return; 1841 1842 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 1843 ((rq_data_dir(rq) == WRITE) ? 1844 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 1845 1846 nvme_cleanup_cmd(rq); 1847 1848 sg_free_table_chained(&freq->sg_table, true); 1849 1850 freq->sg_cnt = 0; 1851 } 1852 1853 /* 1854 * In FC, the queue is a logical thing. At transport connect, the target 1855 * creates its "queue" and returns a handle that is to be given to the 1856 * target whenever it posts something to the corresponding SQ. When an 1857 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 1858 * command contained within the SQE, an io, and assigns a FC exchange 1859 * to it. The SQE and the associated SQ handle are sent in the initial 1860 * CMD IU sents on the exchange. All transfers relative to the io occur 1861 * as part of the exchange. The CQE is the last thing for the io, 1862 * which is transferred (explicitly or implicitly) with the RSP IU 1863 * sent on the exchange. After the CQE is received, the FC exchange is 1864 * terminaed and the Exchange may be used on a different io. 1865 * 1866 * The transport to LLDD api has the transport making a request for a 1867 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 1868 * resource and transfers the command. The LLDD will then process all 1869 * steps to complete the io. Upon completion, the transport done routine 1870 * is called. 1871 * 1872 * So - while the operation is outstanding to the LLDD, there is a link 1873 * level FC exchange resource that is also outstanding. This must be 1874 * considered in all cleanup operations. 1875 */ 1876 static int 1877 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1878 struct nvme_fc_fcp_op *op, u32 data_len, 1879 enum nvmefc_fcp_datadir io_dir) 1880 { 1881 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1882 struct nvme_command *sqe = &cmdiu->sqe; 1883 u32 csn; 1884 int ret; 1885 1886 /* 1887 * before attempting to send the io, check to see if we believe 1888 * the target device is present 1889 */ 1890 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1891 return BLK_MQ_RQ_QUEUE_ERROR; 1892 1893 if (!nvme_fc_ctrl_get(ctrl)) 1894 return BLK_MQ_RQ_QUEUE_ERROR; 1895 1896 /* format the FC-NVME CMD IU and fcp_req */ 1897 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 1898 csn = atomic_inc_return(&queue->csn); 1899 cmdiu->csn = cpu_to_be32(csn); 1900 cmdiu->data_len = cpu_to_be32(data_len); 1901 switch (io_dir) { 1902 case NVMEFC_FCP_WRITE: 1903 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 1904 break; 1905 case NVMEFC_FCP_READ: 1906 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 1907 break; 1908 case NVMEFC_FCP_NODATA: 1909 cmdiu->flags = 0; 1910 break; 1911 } 1912 op->fcp_req.payload_length = data_len; 1913 op->fcp_req.io_dir = io_dir; 1914 op->fcp_req.transferred_length = 0; 1915 op->fcp_req.rcv_rsplen = 0; 1916 op->fcp_req.status = NVME_SC_SUCCESS; 1917 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 1918 1919 /* 1920 * validate per fabric rules, set fields mandated by fabric spec 1921 * as well as those by FC-NVME spec. 1922 */ 1923 WARN_ON_ONCE(sqe->common.metadata); 1924 WARN_ON_ONCE(sqe->common.dptr.prp1); 1925 WARN_ON_ONCE(sqe->common.dptr.prp2); 1926 sqe->common.flags |= NVME_CMD_SGL_METABUF; 1927 1928 /* 1929 * format SQE DPTR field per FC-NVME rules 1930 * type=data block descr; subtype=offset; 1931 * offset is currently 0. 1932 */ 1933 sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; 1934 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 1935 sqe->rw.dptr.sgl.addr = 0; 1936 1937 if (!(op->flags & FCOP_FLAGS_AEN)) { 1938 ret = nvme_fc_map_data(ctrl, op->rq, op); 1939 if (ret < 0) { 1940 nvme_cleanup_cmd(op->rq); 1941 nvme_fc_ctrl_put(ctrl); 1942 return (ret == -ENOMEM || ret == -EAGAIN) ? 1943 BLK_MQ_RQ_QUEUE_BUSY : BLK_MQ_RQ_QUEUE_ERROR; 1944 } 1945 } 1946 1947 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 1948 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1949 1950 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 1951 1952 if (!(op->flags & FCOP_FLAGS_AEN)) 1953 blk_mq_start_request(op->rq); 1954 1955 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 1956 &ctrl->rport->remoteport, 1957 queue->lldd_handle, &op->fcp_req); 1958 1959 if (ret) { 1960 if (op->rq) { /* normal request */ 1961 nvme_fc_unmap_data(ctrl, op->rq, op); 1962 nvme_cleanup_cmd(op->rq); 1963 } 1964 /* else - aen. no cleanup needed */ 1965 1966 nvme_fc_ctrl_put(ctrl); 1967 1968 if (ret != -EBUSY) 1969 return BLK_MQ_RQ_QUEUE_ERROR; 1970 1971 if (op->rq) { 1972 blk_mq_stop_hw_queues(op->rq->q); 1973 blk_mq_delay_queue(queue->hctx, NVMEFC_QUEUE_DELAY); 1974 } 1975 return BLK_MQ_RQ_QUEUE_BUSY; 1976 } 1977 1978 return BLK_MQ_RQ_QUEUE_OK; 1979 } 1980 1981 static int 1982 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 1983 const struct blk_mq_queue_data *bd) 1984 { 1985 struct nvme_ns *ns = hctx->queue->queuedata; 1986 struct nvme_fc_queue *queue = hctx->driver_data; 1987 struct nvme_fc_ctrl *ctrl = queue->ctrl; 1988 struct request *rq = bd->rq; 1989 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1990 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1991 struct nvme_command *sqe = &cmdiu->sqe; 1992 enum nvmefc_fcp_datadir io_dir; 1993 u32 data_len; 1994 int ret; 1995 1996 ret = nvme_setup_cmd(ns, rq, sqe); 1997 if (ret) 1998 return ret; 1999 2000 data_len = blk_rq_payload_bytes(rq); 2001 if (data_len) 2002 io_dir = ((rq_data_dir(rq) == WRITE) ? 2003 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2004 else 2005 io_dir = NVMEFC_FCP_NODATA; 2006 2007 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2008 } 2009 2010 static struct blk_mq_tags * 2011 nvme_fc_tagset(struct nvme_fc_queue *queue) 2012 { 2013 if (queue->qnum == 0) 2014 return queue->ctrl->admin_tag_set.tags[queue->qnum]; 2015 2016 return queue->ctrl->tag_set.tags[queue->qnum - 1]; 2017 } 2018 2019 static int 2020 nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 2021 2022 { 2023 struct nvme_fc_queue *queue = hctx->driver_data; 2024 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2025 struct request *req; 2026 struct nvme_fc_fcp_op *op; 2027 2028 req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); 2029 if (!req) 2030 return 0; 2031 2032 op = blk_mq_rq_to_pdu(req); 2033 2034 if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && 2035 (ctrl->lport->ops->poll_queue)) 2036 ctrl->lport->ops->poll_queue(&ctrl->lport->localport, 2037 queue->lldd_handle); 2038 2039 return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); 2040 } 2041 2042 static void 2043 nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) 2044 { 2045 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2046 struct nvme_fc_fcp_op *aen_op; 2047 unsigned long flags; 2048 bool terminating = false; 2049 int ret; 2050 2051 if (aer_idx > NVME_FC_NR_AEN_COMMANDS) 2052 return; 2053 2054 spin_lock_irqsave(&ctrl->lock, flags); 2055 if (ctrl->flags & FCCTRL_TERMIO) 2056 terminating = true; 2057 spin_unlock_irqrestore(&ctrl->lock, flags); 2058 2059 if (terminating) 2060 return; 2061 2062 aen_op = &ctrl->aen_ops[aer_idx]; 2063 2064 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2065 NVMEFC_FCP_NODATA); 2066 if (ret) 2067 dev_err(ctrl->ctrl.device, 2068 "failed async event work [%d]\n", aer_idx); 2069 } 2070 2071 static void 2072 __nvme_fc_final_op_cleanup(struct request *rq) 2073 { 2074 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2075 struct nvme_fc_ctrl *ctrl = op->ctrl; 2076 2077 atomic_set(&op->state, FCPOP_STATE_IDLE); 2078 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | 2079 FCOP_FLAGS_COMPLETE); 2080 2081 nvme_cleanup_cmd(rq); 2082 nvme_fc_unmap_data(ctrl, rq, op); 2083 nvme_complete_rq(rq); 2084 nvme_fc_ctrl_put(ctrl); 2085 2086 } 2087 2088 static void 2089 nvme_fc_complete_rq(struct request *rq) 2090 { 2091 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2092 struct nvme_fc_ctrl *ctrl = op->ctrl; 2093 unsigned long flags; 2094 bool completed = false; 2095 2096 /* 2097 * the core layer, on controller resets after calling 2098 * nvme_shutdown_ctrl(), calls complete_rq without our 2099 * calling blk_mq_complete_request(), thus there may still 2100 * be live i/o outstanding with the LLDD. Means transport has 2101 * to track complete calls vs fcpio_done calls to know what 2102 * path to take on completes and dones. 2103 */ 2104 spin_lock_irqsave(&ctrl->lock, flags); 2105 if (op->flags & FCOP_FLAGS_COMPLETE) 2106 completed = true; 2107 else 2108 op->flags |= FCOP_FLAGS_RELEASED; 2109 spin_unlock_irqrestore(&ctrl->lock, flags); 2110 2111 if (completed) 2112 __nvme_fc_final_op_cleanup(rq); 2113 } 2114 2115 /* 2116 * This routine is used by the transport when it needs to find active 2117 * io on a queue that is to be terminated. The transport uses 2118 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2119 * this routine to kill them on a 1 by 1 basis. 2120 * 2121 * As FC allocates FC exchange for each io, the transport must contact 2122 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2123 * After terminating the exchange the LLDD will call the transport's 2124 * normal io done path for the request, but it will have an aborted 2125 * status. The done path will return the io request back to the block 2126 * layer with an error status. 2127 */ 2128 static void 2129 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2130 { 2131 struct nvme_ctrl *nctrl = data; 2132 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2133 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2134 unsigned long flags; 2135 int status; 2136 2137 if (!blk_mq_request_started(req)) 2138 return; 2139 2140 spin_lock_irqsave(&ctrl->lock, flags); 2141 if (ctrl->flags & FCCTRL_TERMIO) { 2142 ctrl->iocnt++; 2143 op->flags |= FCOP_FLAGS_TERMIO; 2144 } 2145 spin_unlock_irqrestore(&ctrl->lock, flags); 2146 2147 status = __nvme_fc_abort_op(ctrl, op); 2148 if (status) { 2149 /* 2150 * if __nvme_fc_abort_op failed the io wasn't 2151 * active. Thus this call path is running in 2152 * parallel to the io complete. Treat as non-error. 2153 */ 2154 2155 /* back out the flags/counters */ 2156 spin_lock_irqsave(&ctrl->lock, flags); 2157 if (ctrl->flags & FCCTRL_TERMIO) 2158 ctrl->iocnt--; 2159 op->flags &= ~FCOP_FLAGS_TERMIO; 2160 spin_unlock_irqrestore(&ctrl->lock, flags); 2161 return; 2162 } 2163 } 2164 2165 2166 static const struct blk_mq_ops nvme_fc_mq_ops = { 2167 .queue_rq = nvme_fc_queue_rq, 2168 .complete = nvme_fc_complete_rq, 2169 .init_request = nvme_fc_init_request, 2170 .exit_request = nvme_fc_exit_request, 2171 .reinit_request = nvme_fc_reinit_request, 2172 .init_hctx = nvme_fc_init_hctx, 2173 .poll = nvme_fc_poll, 2174 .timeout = nvme_fc_timeout, 2175 }; 2176 2177 static int 2178 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2179 { 2180 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2181 int ret; 2182 2183 ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); 2184 if (ret) { 2185 dev_info(ctrl->ctrl.device, 2186 "set_queue_count failed: %d\n", ret); 2187 return ret; 2188 } 2189 2190 ctrl->queue_count = opts->nr_io_queues + 1; 2191 if (!opts->nr_io_queues) 2192 return 0; 2193 2194 nvme_fc_init_io_queues(ctrl); 2195 2196 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2197 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2198 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2199 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2200 ctrl->tag_set.numa_node = NUMA_NO_NODE; 2201 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2202 ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2203 (SG_CHUNK_SIZE * 2204 sizeof(struct scatterlist)) + 2205 ctrl->lport->ops->fcprqst_priv_sz; 2206 ctrl->tag_set.driver_data = ctrl; 2207 ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1; 2208 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2209 2210 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2211 if (ret) 2212 return ret; 2213 2214 ctrl->ctrl.tagset = &ctrl->tag_set; 2215 2216 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2217 if (IS_ERR(ctrl->ctrl.connect_q)) { 2218 ret = PTR_ERR(ctrl->ctrl.connect_q); 2219 goto out_free_tag_set; 2220 } 2221 2222 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2223 if (ret) 2224 goto out_cleanup_blk_queue; 2225 2226 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2227 if (ret) 2228 goto out_delete_hw_queues; 2229 2230 return 0; 2231 2232 out_delete_hw_queues: 2233 nvme_fc_delete_hw_io_queues(ctrl); 2234 out_cleanup_blk_queue: 2235 nvme_stop_keep_alive(&ctrl->ctrl); 2236 blk_cleanup_queue(ctrl->ctrl.connect_q); 2237 out_free_tag_set: 2238 blk_mq_free_tag_set(&ctrl->tag_set); 2239 nvme_fc_free_io_queues(ctrl); 2240 2241 /* force put free routine to ignore io queues */ 2242 ctrl->ctrl.tagset = NULL; 2243 2244 return ret; 2245 } 2246 2247 static int 2248 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2249 { 2250 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2251 int ret; 2252 2253 ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues); 2254 if (ret) { 2255 dev_info(ctrl->ctrl.device, 2256 "set_queue_count failed: %d\n", ret); 2257 return ret; 2258 } 2259 2260 /* check for io queues existing */ 2261 if (ctrl->queue_count == 1) 2262 return 0; 2263 2264 nvme_fc_init_io_queues(ctrl); 2265 2266 ret = blk_mq_reinit_tagset(&ctrl->tag_set); 2267 if (ret) 2268 goto out_free_io_queues; 2269 2270 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2271 if (ret) 2272 goto out_free_io_queues; 2273 2274 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2275 if (ret) 2276 goto out_delete_hw_queues; 2277 2278 return 0; 2279 2280 out_delete_hw_queues: 2281 nvme_fc_delete_hw_io_queues(ctrl); 2282 out_free_io_queues: 2283 nvme_fc_free_io_queues(ctrl); 2284 return ret; 2285 } 2286 2287 /* 2288 * This routine restarts the controller on the host side, and 2289 * on the link side, recreates the controller association. 2290 */ 2291 static int 2292 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2293 { 2294 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2295 u32 segs; 2296 int ret; 2297 bool changed; 2298 2299 ++ctrl->ctrl.opts->nr_reconnects; 2300 2301 /* 2302 * Create the admin queue 2303 */ 2304 2305 nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); 2306 2307 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2308 NVME_FC_AQ_BLKMQ_DEPTH); 2309 if (ret) 2310 goto out_free_queue; 2311 2312 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2313 NVME_FC_AQ_BLKMQ_DEPTH, 2314 (NVME_FC_AQ_BLKMQ_DEPTH / 4)); 2315 if (ret) 2316 goto out_delete_hw_queue; 2317 2318 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2319 blk_mq_start_stopped_hw_queues(ctrl->ctrl.admin_q, true); 2320 2321 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2322 if (ret) 2323 goto out_disconnect_admin_queue; 2324 2325 /* 2326 * Check controller capabilities 2327 * 2328 * todo:- add code to check if ctrl attributes changed from 2329 * prior connection values 2330 */ 2331 2332 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap); 2333 if (ret) { 2334 dev_err(ctrl->ctrl.device, 2335 "prop_get NVME_REG_CAP failed\n"); 2336 goto out_disconnect_admin_queue; 2337 } 2338 2339 ctrl->ctrl.sqsize = 2340 min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize); 2341 2342 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap); 2343 if (ret) 2344 goto out_disconnect_admin_queue; 2345 2346 segs = min_t(u32, NVME_FC_MAX_SEGMENTS, 2347 ctrl->lport->ops->max_sgl_segments); 2348 ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); 2349 2350 ret = nvme_init_identify(&ctrl->ctrl); 2351 if (ret) 2352 goto out_disconnect_admin_queue; 2353 2354 /* sanity checks */ 2355 2356 /* FC-NVME does not have other data in the capsule */ 2357 if (ctrl->ctrl.icdoff) { 2358 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2359 ctrl->ctrl.icdoff); 2360 goto out_disconnect_admin_queue; 2361 } 2362 2363 nvme_start_keep_alive(&ctrl->ctrl); 2364 2365 /* FC-NVME supports normal SGL Data Block Descriptors */ 2366 2367 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2368 /* warn if maxcmd is lower than queue_size */ 2369 dev_warn(ctrl->ctrl.device, 2370 "queue_size %zu > ctrl maxcmd %u, reducing " 2371 "to queue_size\n", 2372 opts->queue_size, ctrl->ctrl.maxcmd); 2373 opts->queue_size = ctrl->ctrl.maxcmd; 2374 } 2375 2376 ret = nvme_fc_init_aen_ops(ctrl); 2377 if (ret) 2378 goto out_term_aen_ops; 2379 2380 /* 2381 * Create the io queues 2382 */ 2383 2384 if (ctrl->queue_count > 1) { 2385 if (ctrl->ctrl.state == NVME_CTRL_NEW) 2386 ret = nvme_fc_create_io_queues(ctrl); 2387 else 2388 ret = nvme_fc_reinit_io_queues(ctrl); 2389 if (ret) 2390 goto out_term_aen_ops; 2391 } 2392 2393 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2394 WARN_ON_ONCE(!changed); 2395 2396 ctrl->ctrl.opts->nr_reconnects = 0; 2397 2398 if (ctrl->queue_count > 1) { 2399 nvme_start_queues(&ctrl->ctrl); 2400 nvme_queue_scan(&ctrl->ctrl); 2401 nvme_queue_async_events(&ctrl->ctrl); 2402 } 2403 2404 return 0; /* Success */ 2405 2406 out_term_aen_ops: 2407 nvme_fc_term_aen_ops(ctrl); 2408 nvme_stop_keep_alive(&ctrl->ctrl); 2409 out_disconnect_admin_queue: 2410 /* send a Disconnect(association) LS to fc-nvme target */ 2411 nvme_fc_xmt_disconnect_assoc(ctrl); 2412 out_delete_hw_queue: 2413 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2414 out_free_queue: 2415 nvme_fc_free_queue(&ctrl->queues[0]); 2416 2417 return ret; 2418 } 2419 2420 /* 2421 * This routine stops operation of the controller on the host side. 2422 * On the host os stack side: Admin and IO queues are stopped, 2423 * outstanding ios on them terminated via FC ABTS. 2424 * On the link side: the association is terminated. 2425 */ 2426 static void 2427 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2428 { 2429 unsigned long flags; 2430 2431 nvme_stop_keep_alive(&ctrl->ctrl); 2432 2433 spin_lock_irqsave(&ctrl->lock, flags); 2434 ctrl->flags |= FCCTRL_TERMIO; 2435 ctrl->iocnt = 0; 2436 spin_unlock_irqrestore(&ctrl->lock, flags); 2437 2438 /* 2439 * If io queues are present, stop them and terminate all outstanding 2440 * ios on them. As FC allocates FC exchange for each io, the 2441 * transport must contact the LLDD to terminate the exchange, 2442 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2443 * to tell us what io's are busy and invoke a transport routine 2444 * to kill them with the LLDD. After terminating the exchange 2445 * the LLDD will call the transport's normal io done path, but it 2446 * will have an aborted status. The done path will return the 2447 * io requests back to the block layer as part of normal completions 2448 * (but with error status). 2449 */ 2450 if (ctrl->queue_count > 1) { 2451 nvme_stop_queues(&ctrl->ctrl); 2452 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2453 nvme_fc_terminate_exchange, &ctrl->ctrl); 2454 } 2455 2456 /* 2457 * Other transports, which don't have link-level contexts bound 2458 * to sqe's, would try to gracefully shutdown the controller by 2459 * writing the registers for shutdown and polling (call 2460 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2461 * just aborted and we will wait on those contexts, and given 2462 * there was no indication of how live the controlelr is on the 2463 * link, don't send more io to create more contexts for the 2464 * shutdown. Let the controller fail via keepalive failure if 2465 * its still present. 2466 */ 2467 2468 /* 2469 * clean up the admin queue. Same thing as above. 2470 * use blk_mq_tagset_busy_itr() and the transport routine to 2471 * terminate the exchanges. 2472 */ 2473 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q); 2474 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2475 nvme_fc_terminate_exchange, &ctrl->ctrl); 2476 2477 /* kill the aens as they are a separate path */ 2478 nvme_fc_abort_aen_ops(ctrl); 2479 2480 /* wait for all io that had to be aborted */ 2481 spin_lock_irqsave(&ctrl->lock, flags); 2482 while (ctrl->iocnt) { 2483 spin_unlock_irqrestore(&ctrl->lock, flags); 2484 msleep(1000); 2485 spin_lock_irqsave(&ctrl->lock, flags); 2486 } 2487 ctrl->flags &= ~FCCTRL_TERMIO; 2488 spin_unlock_irqrestore(&ctrl->lock, flags); 2489 2490 nvme_fc_term_aen_ops(ctrl); 2491 2492 /* 2493 * send a Disconnect(association) LS to fc-nvme target 2494 * Note: could have been sent at top of process, but 2495 * cleaner on link traffic if after the aborts complete. 2496 * Note: if association doesn't exist, association_id will be 0 2497 */ 2498 if (ctrl->association_id) 2499 nvme_fc_xmt_disconnect_assoc(ctrl); 2500 2501 if (ctrl->ctrl.tagset) { 2502 nvme_fc_delete_hw_io_queues(ctrl); 2503 nvme_fc_free_io_queues(ctrl); 2504 } 2505 2506 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2507 nvme_fc_free_queue(&ctrl->queues[0]); 2508 } 2509 2510 static void 2511 nvme_fc_delete_ctrl_work(struct work_struct *work) 2512 { 2513 struct nvme_fc_ctrl *ctrl = 2514 container_of(work, struct nvme_fc_ctrl, delete_work); 2515 2516 cancel_work_sync(&ctrl->reset_work); 2517 cancel_delayed_work_sync(&ctrl->connect_work); 2518 2519 /* 2520 * kill the association on the link side. this will block 2521 * waiting for io to terminate 2522 */ 2523 nvme_fc_delete_association(ctrl); 2524 2525 /* 2526 * tear down the controller 2527 * After the last reference on the nvme ctrl is removed, 2528 * the transport nvme_fc_nvme_ctrl_freed() callback will be 2529 * invoked. From there, the transport will tear down it's 2530 * logical queues and association. 2531 */ 2532 nvme_uninit_ctrl(&ctrl->ctrl); 2533 2534 nvme_put_ctrl(&ctrl->ctrl); 2535 } 2536 2537 static bool 2538 __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) 2539 { 2540 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2541 return true; 2542 2543 if (!queue_work(nvme_fc_wq, &ctrl->delete_work)) 2544 return true; 2545 2546 return false; 2547 } 2548 2549 static int 2550 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) 2551 { 2552 return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; 2553 } 2554 2555 /* 2556 * Request from nvme core layer to delete the controller 2557 */ 2558 static int 2559 nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) 2560 { 2561 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2562 int ret; 2563 2564 if (!kref_get_unless_zero(&ctrl->ctrl.kref)) 2565 return -EBUSY; 2566 2567 ret = __nvme_fc_del_ctrl(ctrl); 2568 2569 if (!ret) 2570 flush_workqueue(nvme_fc_wq); 2571 2572 nvme_put_ctrl(&ctrl->ctrl); 2573 2574 return ret; 2575 } 2576 2577 static void 2578 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2579 { 2580 /* If we are resetting/deleting then do nothing */ 2581 if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { 2582 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || 2583 ctrl->ctrl.state == NVME_CTRL_LIVE); 2584 return; 2585 } 2586 2587 dev_info(ctrl->ctrl.device, 2588 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2589 ctrl->cnum, status); 2590 2591 if (nvmf_should_reconnect(&ctrl->ctrl)) { 2592 dev_info(ctrl->ctrl.device, 2593 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2594 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2595 queue_delayed_work(nvme_fc_wq, &ctrl->connect_work, 2596 ctrl->ctrl.opts->reconnect_delay * HZ); 2597 } else { 2598 dev_warn(ctrl->ctrl.device, 2599 "NVME-FC{%d}: Max reconnect attempts (%d) " 2600 "reached. Removing controller\n", 2601 ctrl->cnum, ctrl->ctrl.opts->nr_reconnects); 2602 WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2603 } 2604 } 2605 2606 static void 2607 nvme_fc_reset_ctrl_work(struct work_struct *work) 2608 { 2609 struct nvme_fc_ctrl *ctrl = 2610 container_of(work, struct nvme_fc_ctrl, reset_work); 2611 int ret; 2612 2613 /* will block will waiting for io to terminate */ 2614 nvme_fc_delete_association(ctrl); 2615 2616 ret = nvme_fc_create_association(ctrl); 2617 if (ret) 2618 nvme_fc_reconnect_or_delete(ctrl, ret); 2619 else 2620 dev_info(ctrl->ctrl.device, 2621 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2622 } 2623 2624 /* 2625 * called by the nvme core layer, for sysfs interface that requests 2626 * a reset of the nvme controller 2627 */ 2628 static int 2629 nvme_fc_reset_nvme_ctrl(struct nvme_ctrl *nctrl) 2630 { 2631 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2632 2633 dev_info(ctrl->ctrl.device, 2634 "NVME-FC{%d}: admin requested controller reset\n", ctrl->cnum); 2635 2636 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING)) 2637 return -EBUSY; 2638 2639 if (!queue_work(nvme_fc_wq, &ctrl->reset_work)) 2640 return -EBUSY; 2641 2642 flush_work(&ctrl->reset_work); 2643 2644 return 0; 2645 } 2646 2647 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2648 .name = "fc", 2649 .module = THIS_MODULE, 2650 .flags = NVME_F_FABRICS, 2651 .reg_read32 = nvmf_reg_read32, 2652 .reg_read64 = nvmf_reg_read64, 2653 .reg_write32 = nvmf_reg_write32, 2654 .reset_ctrl = nvme_fc_reset_nvme_ctrl, 2655 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2656 .submit_async_event = nvme_fc_submit_async_event, 2657 .delete_ctrl = nvme_fc_del_nvme_ctrl, 2658 .get_subsysnqn = nvmf_get_subsysnqn, 2659 .get_address = nvmf_get_address, 2660 }; 2661 2662 static void 2663 nvme_fc_connect_ctrl_work(struct work_struct *work) 2664 { 2665 int ret; 2666 2667 struct nvme_fc_ctrl *ctrl = 2668 container_of(to_delayed_work(work), 2669 struct nvme_fc_ctrl, connect_work); 2670 2671 ret = nvme_fc_create_association(ctrl); 2672 if (ret) 2673 nvme_fc_reconnect_or_delete(ctrl, ret); 2674 else 2675 dev_info(ctrl->ctrl.device, 2676 "NVME-FC{%d}: controller reconnect complete\n", 2677 ctrl->cnum); 2678 } 2679 2680 2681 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2682 .queue_rq = nvme_fc_queue_rq, 2683 .complete = nvme_fc_complete_rq, 2684 .init_request = nvme_fc_init_admin_request, 2685 .exit_request = nvme_fc_exit_request, 2686 .reinit_request = nvme_fc_reinit_request, 2687 .init_hctx = nvme_fc_init_admin_hctx, 2688 .timeout = nvme_fc_timeout, 2689 }; 2690 2691 2692 static struct nvme_ctrl * 2693 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2694 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 2695 { 2696 struct nvme_fc_ctrl *ctrl; 2697 unsigned long flags; 2698 int ret, idx; 2699 2700 if (!(rport->remoteport.port_role & 2701 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2702 ret = -EBADR; 2703 goto out_fail; 2704 } 2705 2706 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 2707 if (!ctrl) { 2708 ret = -ENOMEM; 2709 goto out_fail; 2710 } 2711 2712 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 2713 if (idx < 0) { 2714 ret = -ENOSPC; 2715 goto out_free_ctrl; 2716 } 2717 2718 ctrl->ctrl.opts = opts; 2719 INIT_LIST_HEAD(&ctrl->ctrl_list); 2720 ctrl->lport = lport; 2721 ctrl->rport = rport; 2722 ctrl->dev = lport->dev; 2723 ctrl->cnum = idx; 2724 2725 get_device(ctrl->dev); 2726 kref_init(&ctrl->ref); 2727 2728 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 2729 INIT_WORK(&ctrl->reset_work, nvme_fc_reset_ctrl_work); 2730 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 2731 spin_lock_init(&ctrl->lock); 2732 2733 /* io queue count */ 2734 ctrl->queue_count = min_t(unsigned int, 2735 opts->nr_io_queues, 2736 lport->ops->max_hw_queues); 2737 opts->nr_io_queues = ctrl->queue_count; /* so opts has valid value */ 2738 ctrl->queue_count++; /* +1 for admin queue */ 2739 2740 ctrl->ctrl.sqsize = opts->queue_size - 1; 2741 ctrl->ctrl.kato = opts->kato; 2742 2743 ret = -ENOMEM; 2744 ctrl->queues = kcalloc(ctrl->queue_count, sizeof(struct nvme_fc_queue), 2745 GFP_KERNEL); 2746 if (!ctrl->queues) 2747 goto out_free_ida; 2748 2749 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 2750 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 2751 ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; 2752 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 2753 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; 2754 ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2755 (SG_CHUNK_SIZE * 2756 sizeof(struct scatterlist)) + 2757 ctrl->lport->ops->fcprqst_priv_sz; 2758 ctrl->admin_tag_set.driver_data = ctrl; 2759 ctrl->admin_tag_set.nr_hw_queues = 1; 2760 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 2761 2762 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 2763 if (ret) 2764 goto out_free_queues; 2765 2766 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 2767 if (IS_ERR(ctrl->ctrl.admin_q)) { 2768 ret = PTR_ERR(ctrl->ctrl.admin_q); 2769 goto out_free_admin_tag_set; 2770 } 2771 2772 /* 2773 * Would have been nice to init io queues tag set as well. 2774 * However, we require interaction from the controller 2775 * for max io queue count before we can do so. 2776 * Defer this to the connect path. 2777 */ 2778 2779 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 2780 if (ret) 2781 goto out_cleanup_admin_q; 2782 2783 /* at this point, teardown path changes to ref counting on nvme ctrl */ 2784 2785 spin_lock_irqsave(&rport->lock, flags); 2786 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 2787 spin_unlock_irqrestore(&rport->lock, flags); 2788 2789 ret = nvme_fc_create_association(ctrl); 2790 if (ret) { 2791 ctrl->ctrl.opts = NULL; 2792 /* initiate nvme ctrl ref counting teardown */ 2793 nvme_uninit_ctrl(&ctrl->ctrl); 2794 2795 /* as we're past the point where we transition to the ref 2796 * counting teardown path, if we return a bad pointer here, 2797 * the calling routine, thinking it's prior to the 2798 * transition, will do an rport put. Since the teardown 2799 * path also does a rport put, we do an extra get here to 2800 * so proper order/teardown happens. 2801 */ 2802 nvme_fc_rport_get(rport); 2803 2804 if (ret > 0) 2805 ret = -EIO; 2806 return ERR_PTR(ret); 2807 } 2808 2809 kref_get(&ctrl->ctrl.kref); 2810 2811 dev_info(ctrl->ctrl.device, 2812 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 2813 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 2814 2815 return &ctrl->ctrl; 2816 2817 out_cleanup_admin_q: 2818 blk_cleanup_queue(ctrl->ctrl.admin_q); 2819 out_free_admin_tag_set: 2820 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2821 out_free_queues: 2822 kfree(ctrl->queues); 2823 out_free_ida: 2824 put_device(ctrl->dev); 2825 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2826 out_free_ctrl: 2827 kfree(ctrl); 2828 out_fail: 2829 /* exit via here doesn't follow ctlr ref points */ 2830 return ERR_PTR(ret); 2831 } 2832 2833 enum { 2834 FCT_TRADDR_ERR = 0, 2835 FCT_TRADDR_WWNN = 1 << 0, 2836 FCT_TRADDR_WWPN = 1 << 1, 2837 }; 2838 2839 struct nvmet_fc_traddr { 2840 u64 nn; 2841 u64 pn; 2842 }; 2843 2844 static const match_table_t traddr_opt_tokens = { 2845 { FCT_TRADDR_WWNN, "nn-%s" }, 2846 { FCT_TRADDR_WWPN, "pn-%s" }, 2847 { FCT_TRADDR_ERR, NULL } 2848 }; 2849 2850 static int 2851 nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf) 2852 { 2853 substring_t args[MAX_OPT_ARGS]; 2854 char *options, *o, *p; 2855 int token, ret = 0; 2856 u64 token64; 2857 2858 options = o = kstrdup(buf, GFP_KERNEL); 2859 if (!options) 2860 return -ENOMEM; 2861 2862 while ((p = strsep(&o, ":\n")) != NULL) { 2863 if (!*p) 2864 continue; 2865 2866 token = match_token(p, traddr_opt_tokens, args); 2867 switch (token) { 2868 case FCT_TRADDR_WWNN: 2869 if (match_u64(args, &token64)) { 2870 ret = -EINVAL; 2871 goto out; 2872 } 2873 traddr->nn = token64; 2874 break; 2875 case FCT_TRADDR_WWPN: 2876 if (match_u64(args, &token64)) { 2877 ret = -EINVAL; 2878 goto out; 2879 } 2880 traddr->pn = token64; 2881 break; 2882 default: 2883 pr_warn("unknown traddr token or missing value '%s'\n", 2884 p); 2885 ret = -EINVAL; 2886 goto out; 2887 } 2888 } 2889 2890 out: 2891 kfree(options); 2892 return ret; 2893 } 2894 2895 static struct nvme_ctrl * 2896 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 2897 { 2898 struct nvme_fc_lport *lport; 2899 struct nvme_fc_rport *rport; 2900 struct nvme_ctrl *ctrl; 2901 struct nvmet_fc_traddr laddr = { 0L, 0L }; 2902 struct nvmet_fc_traddr raddr = { 0L, 0L }; 2903 unsigned long flags; 2904 int ret; 2905 2906 ret = nvme_fc_parse_address(&raddr, opts->traddr); 2907 if (ret || !raddr.nn || !raddr.pn) 2908 return ERR_PTR(-EINVAL); 2909 2910 ret = nvme_fc_parse_address(&laddr, opts->host_traddr); 2911 if (ret || !laddr.nn || !laddr.pn) 2912 return ERR_PTR(-EINVAL); 2913 2914 /* find the host and remote ports to connect together */ 2915 spin_lock_irqsave(&nvme_fc_lock, flags); 2916 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 2917 if (lport->localport.node_name != laddr.nn || 2918 lport->localport.port_name != laddr.pn) 2919 continue; 2920 2921 list_for_each_entry(rport, &lport->endp_list, endp_list) { 2922 if (rport->remoteport.node_name != raddr.nn || 2923 rport->remoteport.port_name != raddr.pn) 2924 continue; 2925 2926 /* if fail to get reference fall through. Will error */ 2927 if (!nvme_fc_rport_get(rport)) 2928 break; 2929 2930 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2931 2932 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 2933 if (IS_ERR(ctrl)) 2934 nvme_fc_rport_put(rport); 2935 return ctrl; 2936 } 2937 } 2938 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2939 2940 return ERR_PTR(-ENOENT); 2941 } 2942 2943 2944 static struct nvmf_transport_ops nvme_fc_transport = { 2945 .name = "fc", 2946 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 2947 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 2948 .create_ctrl = nvme_fc_create_ctrl, 2949 }; 2950 2951 static int __init nvme_fc_init_module(void) 2952 { 2953 int ret; 2954 2955 nvme_fc_wq = create_workqueue("nvme_fc_wq"); 2956 if (!nvme_fc_wq) 2957 return -ENOMEM; 2958 2959 ret = nvmf_register_transport(&nvme_fc_transport); 2960 if (ret) 2961 goto err; 2962 2963 return 0; 2964 err: 2965 destroy_workqueue(nvme_fc_wq); 2966 return ret; 2967 } 2968 2969 static void __exit nvme_fc_exit_module(void) 2970 { 2971 /* sanity check - all lports should be removed */ 2972 if (!list_empty(&nvme_fc_lport_list)) 2973 pr_warn("%s: localport list not empty\n", __func__); 2974 2975 nvmf_unregister_transport(&nvme_fc_transport); 2976 2977 destroy_workqueue(nvme_fc_wq); 2978 2979 ida_destroy(&nvme_fc_local_port_cnt); 2980 ida_destroy(&nvme_fc_ctrl_cnt); 2981 } 2982 2983 module_init(nvme_fc_init_module); 2984 module_exit(nvme_fc_exit_module); 2985 2986 MODULE_LICENSE("GPL v2"); 2987