1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 24 #include "nvme.h" 25 #include "fabrics.h" 26 #include <linux/nvme-fc-driver.h> 27 #include <linux/nvme-fc.h> 28 29 30 /* *************************** Data Structures/Defines ****************** */ 31 32 33 /* 34 * We handle AEN commands ourselves and don't even let the 35 * block layer know about them. 36 */ 37 #define NVME_FC_NR_AEN_COMMANDS 1 38 #define NVME_FC_AQ_BLKMQ_DEPTH \ 39 (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) 40 #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) 41 42 enum nvme_fc_queue_flags { 43 NVME_FC_Q_CONNECTED = (1 << 0), 44 }; 45 46 #define NVMEFC_QUEUE_DELAY 3 /* ms units */ 47 48 struct nvme_fc_queue { 49 struct nvme_fc_ctrl *ctrl; 50 struct device *dev; 51 struct blk_mq_hw_ctx *hctx; 52 void *lldd_handle; 53 int queue_size; 54 size_t cmnd_capsule_len; 55 u32 qnum; 56 u32 rqcnt; 57 u32 seqno; 58 59 u64 connection_id; 60 atomic_t csn; 61 62 unsigned long flags; 63 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 64 65 enum nvme_fcop_flags { 66 FCOP_FLAGS_TERMIO = (1 << 0), 67 FCOP_FLAGS_RELEASED = (1 << 1), 68 FCOP_FLAGS_COMPLETE = (1 << 2), 69 FCOP_FLAGS_AEN = (1 << 3), 70 }; 71 72 struct nvmefc_ls_req_op { 73 struct nvmefc_ls_req ls_req; 74 75 struct nvme_fc_rport *rport; 76 struct nvme_fc_queue *queue; 77 struct request *rq; 78 u32 flags; 79 80 int ls_error; 81 struct completion ls_done; 82 struct list_head lsreq_list; /* rport->ls_req_list */ 83 bool req_queued; 84 }; 85 86 enum nvme_fcpop_state { 87 FCPOP_STATE_UNINIT = 0, 88 FCPOP_STATE_IDLE = 1, 89 FCPOP_STATE_ACTIVE = 2, 90 FCPOP_STATE_ABORTED = 3, 91 FCPOP_STATE_COMPLETE = 4, 92 }; 93 94 struct nvme_fc_fcp_op { 95 struct nvme_request nreq; /* 96 * nvme/host/core.c 97 * requires this to be 98 * the 1st element in the 99 * private structure 100 * associated with the 101 * request. 102 */ 103 struct nvmefc_fcp_req fcp_req; 104 105 struct nvme_fc_ctrl *ctrl; 106 struct nvme_fc_queue *queue; 107 struct request *rq; 108 109 atomic_t state; 110 u32 flags; 111 u32 rqno; 112 u32 nents; 113 114 struct nvme_fc_cmd_iu cmd_iu; 115 struct nvme_fc_ersp_iu rsp_iu; 116 }; 117 118 struct nvme_fc_lport { 119 struct nvme_fc_local_port localport; 120 121 struct ida endp_cnt; 122 struct list_head port_list; /* nvme_fc_port_list */ 123 struct list_head endp_list; 124 struct device *dev; /* physical device for dma */ 125 struct nvme_fc_port_template *ops; 126 struct kref ref; 127 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 128 129 struct nvme_fc_rport { 130 struct nvme_fc_remote_port remoteport; 131 132 struct list_head endp_list; /* for lport->endp_list */ 133 struct list_head ctrl_list; 134 struct list_head ls_req_list; 135 struct device *dev; /* physical device for dma */ 136 struct nvme_fc_lport *lport; 137 spinlock_t lock; 138 struct kref ref; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 cnum; 152 153 u64 association_id; 154 155 struct list_head ctrl_list; /* rport->ctrl_list */ 156 157 struct blk_mq_tag_set admin_tag_set; 158 struct blk_mq_tag_set tag_set; 159 160 struct work_struct delete_work; 161 struct delayed_work connect_work; 162 163 struct kref ref; 164 u32 flags; 165 u32 iocnt; 166 wait_queue_head_t ioabort_wait; 167 168 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; 169 170 struct nvme_ctrl ctrl; 171 }; 172 173 static inline struct nvme_fc_ctrl * 174 to_fc_ctrl(struct nvme_ctrl *ctrl) 175 { 176 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 177 } 178 179 static inline struct nvme_fc_lport * 180 localport_to_lport(struct nvme_fc_local_port *portptr) 181 { 182 return container_of(portptr, struct nvme_fc_lport, localport); 183 } 184 185 static inline struct nvme_fc_rport * 186 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 187 { 188 return container_of(portptr, struct nvme_fc_rport, remoteport); 189 } 190 191 static inline struct nvmefc_ls_req_op * 192 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 193 { 194 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 195 } 196 197 static inline struct nvme_fc_fcp_op * 198 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 199 { 200 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 201 } 202 203 204 205 /* *************************** Globals **************************** */ 206 207 208 static DEFINE_SPINLOCK(nvme_fc_lock); 209 210 static LIST_HEAD(nvme_fc_lport_list); 211 static DEFINE_IDA(nvme_fc_local_port_cnt); 212 static DEFINE_IDA(nvme_fc_ctrl_cnt); 213 214 215 216 217 /* *********************** FC-NVME Port Management ************************ */ 218 219 static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); 220 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 221 struct nvme_fc_queue *, unsigned int); 222 223 224 /** 225 * nvme_fc_register_localport - transport entry point called by an 226 * LLDD to register the existence of a NVME 227 * host FC port. 228 * @pinfo: pointer to information about the port to be registered 229 * @template: LLDD entrypoints and operational parameters for the port 230 * @dev: physical hardware device node port corresponds to. Will be 231 * used for DMA mappings 232 * @lport_p: pointer to a local port pointer. Upon success, the routine 233 * will allocate a nvme_fc_local_port structure and place its 234 * address in the local port pointer. Upon failure, local port 235 * pointer will be set to 0. 236 * 237 * Returns: 238 * a completion status. Must be 0 upon success; a negative errno 239 * (ex: -ENXIO) upon failure. 240 */ 241 int 242 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 243 struct nvme_fc_port_template *template, 244 struct device *dev, 245 struct nvme_fc_local_port **portptr) 246 { 247 struct nvme_fc_lport *newrec; 248 unsigned long flags; 249 int ret, idx; 250 251 if (!template->localport_delete || !template->remoteport_delete || 252 !template->ls_req || !template->fcp_io || 253 !template->ls_abort || !template->fcp_abort || 254 !template->max_hw_queues || !template->max_sgl_segments || 255 !template->max_dif_sgl_segments || !template->dma_boundary) { 256 ret = -EINVAL; 257 goto out_reghost_failed; 258 } 259 260 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 261 GFP_KERNEL); 262 if (!newrec) { 263 ret = -ENOMEM; 264 goto out_reghost_failed; 265 } 266 267 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 268 if (idx < 0) { 269 ret = -ENOSPC; 270 goto out_fail_kfree; 271 } 272 273 if (!get_device(dev) && dev) { 274 ret = -ENODEV; 275 goto out_ida_put; 276 } 277 278 INIT_LIST_HEAD(&newrec->port_list); 279 INIT_LIST_HEAD(&newrec->endp_list); 280 kref_init(&newrec->ref); 281 newrec->ops = template; 282 newrec->dev = dev; 283 ida_init(&newrec->endp_cnt); 284 newrec->localport.private = &newrec[1]; 285 newrec->localport.node_name = pinfo->node_name; 286 newrec->localport.port_name = pinfo->port_name; 287 newrec->localport.port_role = pinfo->port_role; 288 newrec->localport.port_id = pinfo->port_id; 289 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 290 newrec->localport.port_num = idx; 291 292 spin_lock_irqsave(&nvme_fc_lock, flags); 293 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 294 spin_unlock_irqrestore(&nvme_fc_lock, flags); 295 296 if (dev) 297 dma_set_seg_boundary(dev, template->dma_boundary); 298 299 *portptr = &newrec->localport; 300 return 0; 301 302 out_ida_put: 303 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 304 out_fail_kfree: 305 kfree(newrec); 306 out_reghost_failed: 307 *portptr = NULL; 308 309 return ret; 310 } 311 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 312 313 static void 314 nvme_fc_free_lport(struct kref *ref) 315 { 316 struct nvme_fc_lport *lport = 317 container_of(ref, struct nvme_fc_lport, ref); 318 unsigned long flags; 319 320 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 321 WARN_ON(!list_empty(&lport->endp_list)); 322 323 /* remove from transport list */ 324 spin_lock_irqsave(&nvme_fc_lock, flags); 325 list_del(&lport->port_list); 326 spin_unlock_irqrestore(&nvme_fc_lock, flags); 327 328 /* let the LLDD know we've finished tearing it down */ 329 lport->ops->localport_delete(&lport->localport); 330 331 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 332 ida_destroy(&lport->endp_cnt); 333 334 put_device(lport->dev); 335 336 kfree(lport); 337 } 338 339 static void 340 nvme_fc_lport_put(struct nvme_fc_lport *lport) 341 { 342 kref_put(&lport->ref, nvme_fc_free_lport); 343 } 344 345 static int 346 nvme_fc_lport_get(struct nvme_fc_lport *lport) 347 { 348 return kref_get_unless_zero(&lport->ref); 349 } 350 351 /** 352 * nvme_fc_unregister_localport - transport entry point called by an 353 * LLDD to deregister/remove a previously 354 * registered a NVME host FC port. 355 * @localport: pointer to the (registered) local port that is to be 356 * deregistered. 357 * 358 * Returns: 359 * a completion status. Must be 0 upon success; a negative errno 360 * (ex: -ENXIO) upon failure. 361 */ 362 int 363 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 364 { 365 struct nvme_fc_lport *lport = localport_to_lport(portptr); 366 unsigned long flags; 367 368 if (!portptr) 369 return -EINVAL; 370 371 spin_lock_irqsave(&nvme_fc_lock, flags); 372 373 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 374 spin_unlock_irqrestore(&nvme_fc_lock, flags); 375 return -EINVAL; 376 } 377 portptr->port_state = FC_OBJSTATE_DELETED; 378 379 spin_unlock_irqrestore(&nvme_fc_lock, flags); 380 381 nvme_fc_lport_put(lport); 382 383 return 0; 384 } 385 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 386 387 /** 388 * nvme_fc_register_remoteport - transport entry point called by an 389 * LLDD to register the existence of a NVME 390 * subsystem FC port on its fabric. 391 * @localport: pointer to the (registered) local port that the remote 392 * subsystem port is connected to. 393 * @pinfo: pointer to information about the port to be registered 394 * @rport_p: pointer to a remote port pointer. Upon success, the routine 395 * will allocate a nvme_fc_remote_port structure and place its 396 * address in the remote port pointer. Upon failure, remote port 397 * pointer will be set to 0. 398 * 399 * Returns: 400 * a completion status. Must be 0 upon success; a negative errno 401 * (ex: -ENXIO) upon failure. 402 */ 403 int 404 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 405 struct nvme_fc_port_info *pinfo, 406 struct nvme_fc_remote_port **portptr) 407 { 408 struct nvme_fc_lport *lport = localport_to_lport(localport); 409 struct nvme_fc_rport *newrec; 410 unsigned long flags; 411 int ret, idx; 412 413 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 414 GFP_KERNEL); 415 if (!newrec) { 416 ret = -ENOMEM; 417 goto out_reghost_failed; 418 } 419 420 if (!nvme_fc_lport_get(lport)) { 421 ret = -ESHUTDOWN; 422 goto out_kfree_rport; 423 } 424 425 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 426 if (idx < 0) { 427 ret = -ENOSPC; 428 goto out_lport_put; 429 } 430 431 INIT_LIST_HEAD(&newrec->endp_list); 432 INIT_LIST_HEAD(&newrec->ctrl_list); 433 INIT_LIST_HEAD(&newrec->ls_req_list); 434 kref_init(&newrec->ref); 435 spin_lock_init(&newrec->lock); 436 newrec->remoteport.localport = &lport->localport; 437 newrec->dev = lport->dev; 438 newrec->lport = lport; 439 newrec->remoteport.private = &newrec[1]; 440 newrec->remoteport.port_role = pinfo->port_role; 441 newrec->remoteport.node_name = pinfo->node_name; 442 newrec->remoteport.port_name = pinfo->port_name; 443 newrec->remoteport.port_id = pinfo->port_id; 444 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 445 newrec->remoteport.port_num = idx; 446 447 spin_lock_irqsave(&nvme_fc_lock, flags); 448 list_add_tail(&newrec->endp_list, &lport->endp_list); 449 spin_unlock_irqrestore(&nvme_fc_lock, flags); 450 451 *portptr = &newrec->remoteport; 452 return 0; 453 454 out_lport_put: 455 nvme_fc_lport_put(lport); 456 out_kfree_rport: 457 kfree(newrec); 458 out_reghost_failed: 459 *portptr = NULL; 460 return ret; 461 } 462 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 463 464 static void 465 nvme_fc_free_rport(struct kref *ref) 466 { 467 struct nvme_fc_rport *rport = 468 container_of(ref, struct nvme_fc_rport, ref); 469 struct nvme_fc_lport *lport = 470 localport_to_lport(rport->remoteport.localport); 471 unsigned long flags; 472 473 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 474 WARN_ON(!list_empty(&rport->ctrl_list)); 475 476 /* remove from lport list */ 477 spin_lock_irqsave(&nvme_fc_lock, flags); 478 list_del(&rport->endp_list); 479 spin_unlock_irqrestore(&nvme_fc_lock, flags); 480 481 /* let the LLDD know we've finished tearing it down */ 482 lport->ops->remoteport_delete(&rport->remoteport); 483 484 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 485 486 kfree(rport); 487 488 nvme_fc_lport_put(lport); 489 } 490 491 static void 492 nvme_fc_rport_put(struct nvme_fc_rport *rport) 493 { 494 kref_put(&rport->ref, nvme_fc_free_rport); 495 } 496 497 static int 498 nvme_fc_rport_get(struct nvme_fc_rport *rport) 499 { 500 return kref_get_unless_zero(&rport->ref); 501 } 502 503 static int 504 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 505 { 506 struct nvmefc_ls_req_op *lsop; 507 unsigned long flags; 508 509 restart: 510 spin_lock_irqsave(&rport->lock, flags); 511 512 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 513 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 514 lsop->flags |= FCOP_FLAGS_TERMIO; 515 spin_unlock_irqrestore(&rport->lock, flags); 516 rport->lport->ops->ls_abort(&rport->lport->localport, 517 &rport->remoteport, 518 &lsop->ls_req); 519 goto restart; 520 } 521 } 522 spin_unlock_irqrestore(&rport->lock, flags); 523 524 return 0; 525 } 526 527 /** 528 * nvme_fc_unregister_remoteport - transport entry point called by an 529 * LLDD to deregister/remove a previously 530 * registered a NVME subsystem FC port. 531 * @remoteport: pointer to the (registered) remote port that is to be 532 * deregistered. 533 * 534 * Returns: 535 * a completion status. Must be 0 upon success; a negative errno 536 * (ex: -ENXIO) upon failure. 537 */ 538 int 539 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 540 { 541 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 542 struct nvme_fc_ctrl *ctrl; 543 unsigned long flags; 544 545 if (!portptr) 546 return -EINVAL; 547 548 spin_lock_irqsave(&rport->lock, flags); 549 550 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 551 spin_unlock_irqrestore(&rport->lock, flags); 552 return -EINVAL; 553 } 554 portptr->port_state = FC_OBJSTATE_DELETED; 555 556 /* tear down all associations to the remote port */ 557 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 558 __nvme_fc_del_ctrl(ctrl); 559 560 spin_unlock_irqrestore(&rport->lock, flags); 561 562 nvme_fc_abort_lsops(rport); 563 564 nvme_fc_rport_put(rport); 565 return 0; 566 } 567 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 568 569 570 /* *********************** FC-NVME DMA Handling **************************** */ 571 572 /* 573 * The fcloop device passes in a NULL device pointer. Real LLD's will 574 * pass in a valid device pointer. If NULL is passed to the dma mapping 575 * routines, depending on the platform, it may or may not succeed, and 576 * may crash. 577 * 578 * As such: 579 * Wrapper all the dma routines and check the dev pointer. 580 * 581 * If simple mappings (return just a dma address, we'll noop them, 582 * returning a dma address of 0. 583 * 584 * On more complex mappings (dma_map_sg), a pseudo routine fills 585 * in the scatter list, setting all dma addresses to 0. 586 */ 587 588 static inline dma_addr_t 589 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 590 enum dma_data_direction dir) 591 { 592 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 593 } 594 595 static inline int 596 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 597 { 598 return dev ? dma_mapping_error(dev, dma_addr) : 0; 599 } 600 601 static inline void 602 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 603 enum dma_data_direction dir) 604 { 605 if (dev) 606 dma_unmap_single(dev, addr, size, dir); 607 } 608 609 static inline void 610 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 611 enum dma_data_direction dir) 612 { 613 if (dev) 614 dma_sync_single_for_cpu(dev, addr, size, dir); 615 } 616 617 static inline void 618 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 619 enum dma_data_direction dir) 620 { 621 if (dev) 622 dma_sync_single_for_device(dev, addr, size, dir); 623 } 624 625 /* pseudo dma_map_sg call */ 626 static int 627 fc_map_sg(struct scatterlist *sg, int nents) 628 { 629 struct scatterlist *s; 630 int i; 631 632 WARN_ON(nents == 0 || sg[0].length == 0); 633 634 for_each_sg(sg, s, nents, i) { 635 s->dma_address = 0L; 636 #ifdef CONFIG_NEED_SG_DMA_LENGTH 637 s->dma_length = s->length; 638 #endif 639 } 640 return nents; 641 } 642 643 static inline int 644 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 645 enum dma_data_direction dir) 646 { 647 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 648 } 649 650 static inline void 651 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 652 enum dma_data_direction dir) 653 { 654 if (dev) 655 dma_unmap_sg(dev, sg, nents, dir); 656 } 657 658 659 /* *********************** FC-NVME LS Handling **************************** */ 660 661 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 662 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 663 664 665 static void 666 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 667 { 668 struct nvme_fc_rport *rport = lsop->rport; 669 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 670 unsigned long flags; 671 672 spin_lock_irqsave(&rport->lock, flags); 673 674 if (!lsop->req_queued) { 675 spin_unlock_irqrestore(&rport->lock, flags); 676 return; 677 } 678 679 list_del(&lsop->lsreq_list); 680 681 lsop->req_queued = false; 682 683 spin_unlock_irqrestore(&rport->lock, flags); 684 685 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 686 (lsreq->rqstlen + lsreq->rsplen), 687 DMA_BIDIRECTIONAL); 688 689 nvme_fc_rport_put(rport); 690 } 691 692 static int 693 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 694 struct nvmefc_ls_req_op *lsop, 695 void (*done)(struct nvmefc_ls_req *req, int status)) 696 { 697 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 698 unsigned long flags; 699 int ret = 0; 700 701 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 702 return -ECONNREFUSED; 703 704 if (!nvme_fc_rport_get(rport)) 705 return -ESHUTDOWN; 706 707 lsreq->done = done; 708 lsop->rport = rport; 709 lsop->req_queued = false; 710 INIT_LIST_HEAD(&lsop->lsreq_list); 711 init_completion(&lsop->ls_done); 712 713 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 714 lsreq->rqstlen + lsreq->rsplen, 715 DMA_BIDIRECTIONAL); 716 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 717 ret = -EFAULT; 718 goto out_putrport; 719 } 720 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 721 722 spin_lock_irqsave(&rport->lock, flags); 723 724 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 725 726 lsop->req_queued = true; 727 728 spin_unlock_irqrestore(&rport->lock, flags); 729 730 ret = rport->lport->ops->ls_req(&rport->lport->localport, 731 &rport->remoteport, lsreq); 732 if (ret) 733 goto out_unlink; 734 735 return 0; 736 737 out_unlink: 738 lsop->ls_error = ret; 739 spin_lock_irqsave(&rport->lock, flags); 740 lsop->req_queued = false; 741 list_del(&lsop->lsreq_list); 742 spin_unlock_irqrestore(&rport->lock, flags); 743 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 744 (lsreq->rqstlen + lsreq->rsplen), 745 DMA_BIDIRECTIONAL); 746 out_putrport: 747 nvme_fc_rport_put(rport); 748 749 return ret; 750 } 751 752 static void 753 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 754 { 755 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 756 757 lsop->ls_error = status; 758 complete(&lsop->ls_done); 759 } 760 761 static int 762 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 763 { 764 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 765 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 766 int ret; 767 768 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 769 770 if (!ret) { 771 /* 772 * No timeout/not interruptible as we need the struct 773 * to exist until the lldd calls us back. Thus mandate 774 * wait until driver calls back. lldd responsible for 775 * the timeout action 776 */ 777 wait_for_completion(&lsop->ls_done); 778 779 __nvme_fc_finish_ls_req(lsop); 780 781 ret = lsop->ls_error; 782 } 783 784 if (ret) 785 return ret; 786 787 /* ACC or RJT payload ? */ 788 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 789 return -ENXIO; 790 791 return 0; 792 } 793 794 static int 795 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 796 struct nvmefc_ls_req_op *lsop, 797 void (*done)(struct nvmefc_ls_req *req, int status)) 798 { 799 /* don't wait for completion */ 800 801 return __nvme_fc_send_ls_req(rport, lsop, done); 802 } 803 804 /* Validation Error indexes into the string table below */ 805 enum { 806 VERR_NO_ERROR = 0, 807 VERR_LSACC = 1, 808 VERR_LSDESC_RQST = 2, 809 VERR_LSDESC_RQST_LEN = 3, 810 VERR_ASSOC_ID = 4, 811 VERR_ASSOC_ID_LEN = 5, 812 VERR_CONN_ID = 6, 813 VERR_CONN_ID_LEN = 7, 814 VERR_CR_ASSOC = 8, 815 VERR_CR_ASSOC_ACC_LEN = 9, 816 VERR_CR_CONN = 10, 817 VERR_CR_CONN_ACC_LEN = 11, 818 VERR_DISCONN = 12, 819 VERR_DISCONN_ACC_LEN = 13, 820 }; 821 822 static char *validation_errors[] = { 823 "OK", 824 "Not LS_ACC", 825 "Not LSDESC_RQST", 826 "Bad LSDESC_RQST Length", 827 "Not Association ID", 828 "Bad Association ID Length", 829 "Not Connection ID", 830 "Bad Connection ID Length", 831 "Not CR_ASSOC Rqst", 832 "Bad CR_ASSOC ACC Length", 833 "Not CR_CONN Rqst", 834 "Bad CR_CONN ACC Length", 835 "Not Disconnect Rqst", 836 "Bad Disconnect ACC Length", 837 }; 838 839 static int 840 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 841 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 842 { 843 struct nvmefc_ls_req_op *lsop; 844 struct nvmefc_ls_req *lsreq; 845 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 846 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 847 int ret, fcret = 0; 848 849 lsop = kzalloc((sizeof(*lsop) + 850 ctrl->lport->ops->lsrqst_priv_sz + 851 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 852 if (!lsop) { 853 ret = -ENOMEM; 854 goto out_no_memory; 855 } 856 lsreq = &lsop->ls_req; 857 858 lsreq->private = (void *)&lsop[1]; 859 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 860 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 861 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 862 863 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 864 assoc_rqst->desc_list_len = 865 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 866 867 assoc_rqst->assoc_cmd.desc_tag = 868 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 869 assoc_rqst->assoc_cmd.desc_len = 870 fcnvme_lsdesc_len( 871 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 872 873 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 874 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); 875 /* Linux supports only Dynamic controllers */ 876 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 877 uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); 878 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 879 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 880 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 881 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 882 883 lsop->queue = queue; 884 lsreq->rqstaddr = assoc_rqst; 885 lsreq->rqstlen = sizeof(*assoc_rqst); 886 lsreq->rspaddr = assoc_acc; 887 lsreq->rsplen = sizeof(*assoc_acc); 888 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 889 890 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 891 if (ret) 892 goto out_free_buffer; 893 894 /* process connect LS completion */ 895 896 /* validate the ACC response */ 897 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 898 fcret = VERR_LSACC; 899 else if (assoc_acc->hdr.desc_list_len != 900 fcnvme_lsdesc_len( 901 sizeof(struct fcnvme_ls_cr_assoc_acc))) 902 fcret = VERR_CR_ASSOC_ACC_LEN; 903 else if (assoc_acc->hdr.rqst.desc_tag != 904 cpu_to_be32(FCNVME_LSDESC_RQST)) 905 fcret = VERR_LSDESC_RQST; 906 else if (assoc_acc->hdr.rqst.desc_len != 907 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 908 fcret = VERR_LSDESC_RQST_LEN; 909 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 910 fcret = VERR_CR_ASSOC; 911 else if (assoc_acc->associd.desc_tag != 912 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 913 fcret = VERR_ASSOC_ID; 914 else if (assoc_acc->associd.desc_len != 915 fcnvme_lsdesc_len( 916 sizeof(struct fcnvme_lsdesc_assoc_id))) 917 fcret = VERR_ASSOC_ID_LEN; 918 else if (assoc_acc->connectid.desc_tag != 919 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 920 fcret = VERR_CONN_ID; 921 else if (assoc_acc->connectid.desc_len != 922 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 923 fcret = VERR_CONN_ID_LEN; 924 925 if (fcret) { 926 ret = -EBADF; 927 dev_err(ctrl->dev, 928 "q %d connect failed: %s\n", 929 queue->qnum, validation_errors[fcret]); 930 } else { 931 ctrl->association_id = 932 be64_to_cpu(assoc_acc->associd.association_id); 933 queue->connection_id = 934 be64_to_cpu(assoc_acc->connectid.connection_id); 935 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 936 } 937 938 out_free_buffer: 939 kfree(lsop); 940 out_no_memory: 941 if (ret) 942 dev_err(ctrl->dev, 943 "queue %d connect admin queue failed (%d).\n", 944 queue->qnum, ret); 945 return ret; 946 } 947 948 static int 949 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 950 u16 qsize, u16 ersp_ratio) 951 { 952 struct nvmefc_ls_req_op *lsop; 953 struct nvmefc_ls_req *lsreq; 954 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 955 struct fcnvme_ls_cr_conn_acc *conn_acc; 956 int ret, fcret = 0; 957 958 lsop = kzalloc((sizeof(*lsop) + 959 ctrl->lport->ops->lsrqst_priv_sz + 960 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 961 if (!lsop) { 962 ret = -ENOMEM; 963 goto out_no_memory; 964 } 965 lsreq = &lsop->ls_req; 966 967 lsreq->private = (void *)&lsop[1]; 968 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 969 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 970 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 971 972 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 973 conn_rqst->desc_list_len = cpu_to_be32( 974 sizeof(struct fcnvme_lsdesc_assoc_id) + 975 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 976 977 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 978 conn_rqst->associd.desc_len = 979 fcnvme_lsdesc_len( 980 sizeof(struct fcnvme_lsdesc_assoc_id)); 981 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 982 conn_rqst->connect_cmd.desc_tag = 983 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 984 conn_rqst->connect_cmd.desc_len = 985 fcnvme_lsdesc_len( 986 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 987 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 988 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 989 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); 990 991 lsop->queue = queue; 992 lsreq->rqstaddr = conn_rqst; 993 lsreq->rqstlen = sizeof(*conn_rqst); 994 lsreq->rspaddr = conn_acc; 995 lsreq->rsplen = sizeof(*conn_acc); 996 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 997 998 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 999 if (ret) 1000 goto out_free_buffer; 1001 1002 /* process connect LS completion */ 1003 1004 /* validate the ACC response */ 1005 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1006 fcret = VERR_LSACC; 1007 else if (conn_acc->hdr.desc_list_len != 1008 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1009 fcret = VERR_CR_CONN_ACC_LEN; 1010 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1011 fcret = VERR_LSDESC_RQST; 1012 else if (conn_acc->hdr.rqst.desc_len != 1013 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1014 fcret = VERR_LSDESC_RQST_LEN; 1015 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1016 fcret = VERR_CR_CONN; 1017 else if (conn_acc->connectid.desc_tag != 1018 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1019 fcret = VERR_CONN_ID; 1020 else if (conn_acc->connectid.desc_len != 1021 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1022 fcret = VERR_CONN_ID_LEN; 1023 1024 if (fcret) { 1025 ret = -EBADF; 1026 dev_err(ctrl->dev, 1027 "q %d connect failed: %s\n", 1028 queue->qnum, validation_errors[fcret]); 1029 } else { 1030 queue->connection_id = 1031 be64_to_cpu(conn_acc->connectid.connection_id); 1032 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1033 } 1034 1035 out_free_buffer: 1036 kfree(lsop); 1037 out_no_memory: 1038 if (ret) 1039 dev_err(ctrl->dev, 1040 "queue %d connect command failed (%d).\n", 1041 queue->qnum, ret); 1042 return ret; 1043 } 1044 1045 static void 1046 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1047 { 1048 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1049 1050 __nvme_fc_finish_ls_req(lsop); 1051 1052 /* fc-nvme iniator doesn't care about success or failure of cmd */ 1053 1054 kfree(lsop); 1055 } 1056 1057 /* 1058 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1059 * the FC-NVME Association. Terminating the association also 1060 * terminates the FC-NVME connections (per queue, both admin and io 1061 * queues) that are part of the association. E.g. things are torn 1062 * down, and the related FC-NVME Association ID and Connection IDs 1063 * become invalid. 1064 * 1065 * The behavior of the fc-nvme initiator is such that it's 1066 * understanding of the association and connections will implicitly 1067 * be torn down. The action is implicit as it may be due to a loss of 1068 * connectivity with the fc-nvme target, so you may never get a 1069 * response even if you tried. As such, the action of this routine 1070 * is to asynchronously send the LS, ignore any results of the LS, and 1071 * continue on with terminating the association. If the fc-nvme target 1072 * is present and receives the LS, it too can tear down. 1073 */ 1074 static void 1075 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1076 { 1077 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1078 struct fcnvme_ls_disconnect_acc *discon_acc; 1079 struct nvmefc_ls_req_op *lsop; 1080 struct nvmefc_ls_req *lsreq; 1081 int ret; 1082 1083 lsop = kzalloc((sizeof(*lsop) + 1084 ctrl->lport->ops->lsrqst_priv_sz + 1085 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1086 GFP_KERNEL); 1087 if (!lsop) 1088 /* couldn't sent it... too bad */ 1089 return; 1090 1091 lsreq = &lsop->ls_req; 1092 1093 lsreq->private = (void *)&lsop[1]; 1094 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1095 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1096 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1097 1098 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1099 discon_rqst->desc_list_len = cpu_to_be32( 1100 sizeof(struct fcnvme_lsdesc_assoc_id) + 1101 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1102 1103 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1104 discon_rqst->associd.desc_len = 1105 fcnvme_lsdesc_len( 1106 sizeof(struct fcnvme_lsdesc_assoc_id)); 1107 1108 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1109 1110 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1111 FCNVME_LSDESC_DISCONN_CMD); 1112 discon_rqst->discon_cmd.desc_len = 1113 fcnvme_lsdesc_len( 1114 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1115 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1116 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1117 1118 lsreq->rqstaddr = discon_rqst; 1119 lsreq->rqstlen = sizeof(*discon_rqst); 1120 lsreq->rspaddr = discon_acc; 1121 lsreq->rsplen = sizeof(*discon_acc); 1122 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1123 1124 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1125 nvme_fc_disconnect_assoc_done); 1126 if (ret) 1127 kfree(lsop); 1128 1129 /* only meaningful part to terminating the association */ 1130 ctrl->association_id = 0; 1131 } 1132 1133 1134 /* *********************** NVME Ctrl Routines **************************** */ 1135 1136 static void __nvme_fc_final_op_cleanup(struct request *rq); 1137 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1138 1139 static int 1140 nvme_fc_reinit_request(void *data, struct request *rq) 1141 { 1142 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1143 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1144 1145 memset(cmdiu, 0, sizeof(*cmdiu)); 1146 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1147 cmdiu->fc_id = NVME_CMD_FC_ID; 1148 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1149 memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); 1150 1151 return 0; 1152 } 1153 1154 static void 1155 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1156 struct nvme_fc_fcp_op *op) 1157 { 1158 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1159 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1160 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1161 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1162 1163 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1164 } 1165 1166 static void 1167 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1168 unsigned int hctx_idx) 1169 { 1170 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1171 1172 return __nvme_fc_exit_request(set->driver_data, op); 1173 } 1174 1175 static int 1176 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1177 { 1178 int state; 1179 1180 state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1181 if (state != FCPOP_STATE_ACTIVE) { 1182 atomic_set(&op->state, state); 1183 return -ECANCELED; 1184 } 1185 1186 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1187 &ctrl->rport->remoteport, 1188 op->queue->lldd_handle, 1189 &op->fcp_req); 1190 1191 return 0; 1192 } 1193 1194 static void 1195 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1196 { 1197 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1198 unsigned long flags; 1199 int i, ret; 1200 1201 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1202 if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) 1203 continue; 1204 1205 spin_lock_irqsave(&ctrl->lock, flags); 1206 if (ctrl->flags & FCCTRL_TERMIO) { 1207 ctrl->iocnt++; 1208 aen_op->flags |= FCOP_FLAGS_TERMIO; 1209 } 1210 spin_unlock_irqrestore(&ctrl->lock, flags); 1211 1212 ret = __nvme_fc_abort_op(ctrl, aen_op); 1213 if (ret) { 1214 /* 1215 * if __nvme_fc_abort_op failed the io wasn't 1216 * active. Thus this call path is running in 1217 * parallel to the io complete. Treat as non-error. 1218 */ 1219 1220 /* back out the flags/counters */ 1221 spin_lock_irqsave(&ctrl->lock, flags); 1222 if (ctrl->flags & FCCTRL_TERMIO) 1223 ctrl->iocnt--; 1224 aen_op->flags &= ~FCOP_FLAGS_TERMIO; 1225 spin_unlock_irqrestore(&ctrl->lock, flags); 1226 return; 1227 } 1228 } 1229 } 1230 1231 static inline int 1232 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1233 struct nvme_fc_fcp_op *op) 1234 { 1235 unsigned long flags; 1236 bool complete_rq = false; 1237 1238 spin_lock_irqsave(&ctrl->lock, flags); 1239 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1240 if (ctrl->flags & FCCTRL_TERMIO) { 1241 if (!--ctrl->iocnt) 1242 wake_up(&ctrl->ioabort_wait); 1243 } 1244 } 1245 if (op->flags & FCOP_FLAGS_RELEASED) 1246 complete_rq = true; 1247 else 1248 op->flags |= FCOP_FLAGS_COMPLETE; 1249 spin_unlock_irqrestore(&ctrl->lock, flags); 1250 1251 return complete_rq; 1252 } 1253 1254 static void 1255 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1256 { 1257 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1258 struct request *rq = op->rq; 1259 struct nvmefc_fcp_req *freq = &op->fcp_req; 1260 struct nvme_fc_ctrl *ctrl = op->ctrl; 1261 struct nvme_fc_queue *queue = op->queue; 1262 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1263 struct nvme_command *sqe = &op->cmd_iu.sqe; 1264 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1265 union nvme_result result; 1266 bool complete_rq, terminate_assoc = true; 1267 1268 /* 1269 * WARNING: 1270 * The current linux implementation of a nvme controller 1271 * allocates a single tag set for all io queues and sizes 1272 * the io queues to fully hold all possible tags. Thus, the 1273 * implementation does not reference or care about the sqhd 1274 * value as it never needs to use the sqhd/sqtail pointers 1275 * for submission pacing. 1276 * 1277 * This affects the FC-NVME implementation in two ways: 1278 * 1) As the value doesn't matter, we don't need to waste 1279 * cycles extracting it from ERSPs and stamping it in the 1280 * cases where the transport fabricates CQEs on successful 1281 * completions. 1282 * 2) The FC-NVME implementation requires that delivery of 1283 * ERSP completions are to go back to the nvme layer in order 1284 * relative to the rsn, such that the sqhd value will always 1285 * be "in order" for the nvme layer. As the nvme layer in 1286 * linux doesn't care about sqhd, there's no need to return 1287 * them in order. 1288 * 1289 * Additionally: 1290 * As the core nvme layer in linux currently does not look at 1291 * every field in the cqe - in cases where the FC transport must 1292 * fabricate a CQE, the following fields will not be set as they 1293 * are not referenced: 1294 * cqe.sqid, cqe.sqhd, cqe.command_id 1295 * 1296 * Failure or error of an individual i/o, in a transport 1297 * detected fashion unrelated to the nvme completion status, 1298 * potentially cause the initiator and target sides to get out 1299 * of sync on SQ head/tail (aka outstanding io count allowed). 1300 * Per FC-NVME spec, failure of an individual command requires 1301 * the connection to be terminated, which in turn requires the 1302 * association to be terminated. 1303 */ 1304 1305 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1306 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1307 1308 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1309 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1310 else if (freq->status) 1311 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1312 1313 /* 1314 * For the linux implementation, if we have an unsuccesful 1315 * status, they blk-mq layer can typically be called with the 1316 * non-zero status and the content of the cqe isn't important. 1317 */ 1318 if (status) 1319 goto done; 1320 1321 /* 1322 * command completed successfully relative to the wire 1323 * protocol. However, validate anything received and 1324 * extract the status and result from the cqe (create it 1325 * where necessary). 1326 */ 1327 1328 switch (freq->rcv_rsplen) { 1329 1330 case 0: 1331 case NVME_FC_SIZEOF_ZEROS_RSP: 1332 /* 1333 * No response payload or 12 bytes of payload (which 1334 * should all be zeros) are considered successful and 1335 * no payload in the CQE by the transport. 1336 */ 1337 if (freq->transferred_length != 1338 be32_to_cpu(op->cmd_iu.data_len)) { 1339 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1340 goto done; 1341 } 1342 result.u64 = 0; 1343 break; 1344 1345 case sizeof(struct nvme_fc_ersp_iu): 1346 /* 1347 * The ERSP IU contains a full completion with CQE. 1348 * Validate ERSP IU and look at cqe. 1349 */ 1350 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1351 (freq->rcv_rsplen / 4) || 1352 be32_to_cpu(op->rsp_iu.xfrd_len) != 1353 freq->transferred_length || 1354 op->rsp_iu.status_code || 1355 sqe->common.command_id != cqe->command_id)) { 1356 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1357 goto done; 1358 } 1359 result = cqe->result; 1360 status = cqe->status; 1361 break; 1362 1363 default: 1364 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1365 goto done; 1366 } 1367 1368 terminate_assoc = false; 1369 1370 done: 1371 if (op->flags & FCOP_FLAGS_AEN) { 1372 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1373 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1374 atomic_set(&op->state, FCPOP_STATE_IDLE); 1375 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1376 nvme_fc_ctrl_put(ctrl); 1377 goto check_error; 1378 } 1379 1380 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1381 if (!complete_rq) { 1382 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1383 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1384 if (blk_queue_dying(rq->q)) 1385 status |= cpu_to_le16(NVME_SC_DNR << 1); 1386 } 1387 nvme_end_request(rq, status, result); 1388 } else 1389 __nvme_fc_final_op_cleanup(rq); 1390 1391 check_error: 1392 if (terminate_assoc) 1393 nvme_fc_error_recovery(ctrl, "transport detected io error"); 1394 } 1395 1396 static int 1397 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1398 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1399 struct request *rq, u32 rqno) 1400 { 1401 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1402 int ret = 0; 1403 1404 memset(op, 0, sizeof(*op)); 1405 op->fcp_req.cmdaddr = &op->cmd_iu; 1406 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1407 op->fcp_req.rspaddr = &op->rsp_iu; 1408 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1409 op->fcp_req.done = nvme_fc_fcpio_done; 1410 op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; 1411 op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; 1412 op->ctrl = ctrl; 1413 op->queue = queue; 1414 op->rq = rq; 1415 op->rqno = rqno; 1416 1417 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1418 cmdiu->fc_id = NVME_CMD_FC_ID; 1419 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1420 1421 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1422 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1423 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1424 dev_err(ctrl->dev, 1425 "FCP Op failed - cmdiu dma mapping failed.\n"); 1426 ret = EFAULT; 1427 goto out_on_error; 1428 } 1429 1430 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1431 &op->rsp_iu, sizeof(op->rsp_iu), 1432 DMA_FROM_DEVICE); 1433 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1434 dev_err(ctrl->dev, 1435 "FCP Op failed - rspiu dma mapping failed.\n"); 1436 ret = EFAULT; 1437 } 1438 1439 atomic_set(&op->state, FCPOP_STATE_IDLE); 1440 out_on_error: 1441 return ret; 1442 } 1443 1444 static int 1445 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1446 unsigned int hctx_idx, unsigned int numa_node) 1447 { 1448 struct nvme_fc_ctrl *ctrl = set->driver_data; 1449 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1450 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 1451 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; 1452 1453 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1454 } 1455 1456 static int 1457 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1458 { 1459 struct nvme_fc_fcp_op *aen_op; 1460 struct nvme_fc_cmd_iu *cmdiu; 1461 struct nvme_command *sqe; 1462 void *private; 1463 int i, ret; 1464 1465 aen_op = ctrl->aen_ops; 1466 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1467 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1468 GFP_KERNEL); 1469 if (!private) 1470 return -ENOMEM; 1471 1472 cmdiu = &aen_op->cmd_iu; 1473 sqe = &cmdiu->sqe; 1474 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1475 aen_op, (struct request *)NULL, 1476 (AEN_CMDID_BASE + i)); 1477 if (ret) { 1478 kfree(private); 1479 return ret; 1480 } 1481 1482 aen_op->flags = FCOP_FLAGS_AEN; 1483 aen_op->fcp_req.first_sgl = NULL; /* no sg list */ 1484 aen_op->fcp_req.private = private; 1485 1486 memset(sqe, 0, sizeof(*sqe)); 1487 sqe->common.opcode = nvme_admin_async_event; 1488 /* Note: core layer may overwrite the sqe.command_id value */ 1489 sqe->common.command_id = AEN_CMDID_BASE + i; 1490 } 1491 return 0; 1492 } 1493 1494 static void 1495 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1496 { 1497 struct nvme_fc_fcp_op *aen_op; 1498 int i; 1499 1500 aen_op = ctrl->aen_ops; 1501 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1502 if (!aen_op->fcp_req.private) 1503 continue; 1504 1505 __nvme_fc_exit_request(ctrl, aen_op); 1506 1507 kfree(aen_op->fcp_req.private); 1508 aen_op->fcp_req.private = NULL; 1509 } 1510 } 1511 1512 static inline void 1513 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1514 unsigned int qidx) 1515 { 1516 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1517 1518 hctx->driver_data = queue; 1519 queue->hctx = hctx; 1520 } 1521 1522 static int 1523 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1524 unsigned int hctx_idx) 1525 { 1526 struct nvme_fc_ctrl *ctrl = data; 1527 1528 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1529 1530 return 0; 1531 } 1532 1533 static int 1534 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1535 unsigned int hctx_idx) 1536 { 1537 struct nvme_fc_ctrl *ctrl = data; 1538 1539 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1540 1541 return 0; 1542 } 1543 1544 static void 1545 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) 1546 { 1547 struct nvme_fc_queue *queue; 1548 1549 queue = &ctrl->queues[idx]; 1550 memset(queue, 0, sizeof(*queue)); 1551 queue->ctrl = ctrl; 1552 queue->qnum = idx; 1553 atomic_set(&queue->csn, 1); 1554 queue->dev = ctrl->dev; 1555 1556 if (idx > 0) 1557 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1558 else 1559 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1560 1561 queue->queue_size = queue_size; 1562 1563 /* 1564 * Considered whether we should allocate buffers for all SQEs 1565 * and CQEs and dma map them - mapping their respective entries 1566 * into the request structures (kernel vm addr and dma address) 1567 * thus the driver could use the buffers/mappings directly. 1568 * It only makes sense if the LLDD would use them for its 1569 * messaging api. It's very unlikely most adapter api's would use 1570 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1571 * structures were used instead. 1572 */ 1573 } 1574 1575 /* 1576 * This routine terminates a queue at the transport level. 1577 * The transport has already ensured that all outstanding ios on 1578 * the queue have been terminated. 1579 * The transport will send a Disconnect LS request to terminate 1580 * the queue's connection. Termination of the admin queue will also 1581 * terminate the association at the target. 1582 */ 1583 static void 1584 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1585 { 1586 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1587 return; 1588 1589 /* 1590 * Current implementation never disconnects a single queue. 1591 * It always terminates a whole association. So there is never 1592 * a disconnect(queue) LS sent to the target. 1593 */ 1594 1595 queue->connection_id = 0; 1596 clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1597 } 1598 1599 static void 1600 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1601 struct nvme_fc_queue *queue, unsigned int qidx) 1602 { 1603 if (ctrl->lport->ops->delete_queue) 1604 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1605 queue->lldd_handle); 1606 queue->lldd_handle = NULL; 1607 } 1608 1609 static void 1610 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1611 { 1612 int i; 1613 1614 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1615 nvme_fc_free_queue(&ctrl->queues[i]); 1616 } 1617 1618 static int 1619 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1620 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1621 { 1622 int ret = 0; 1623 1624 queue->lldd_handle = NULL; 1625 if (ctrl->lport->ops->create_queue) 1626 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1627 qidx, qsize, &queue->lldd_handle); 1628 1629 return ret; 1630 } 1631 1632 static void 1633 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1634 { 1635 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; 1636 int i; 1637 1638 for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) 1639 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1640 } 1641 1642 static int 1643 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1644 { 1645 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1646 int i, ret; 1647 1648 for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { 1649 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1650 if (ret) 1651 goto delete_queues; 1652 } 1653 1654 return 0; 1655 1656 delete_queues: 1657 for (; i >= 0; i--) 1658 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1659 return ret; 1660 } 1661 1662 static int 1663 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1664 { 1665 int i, ret = 0; 1666 1667 for (i = 1; i < ctrl->ctrl.queue_count; i++) { 1668 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1669 (qsize / 5)); 1670 if (ret) 1671 break; 1672 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 1673 if (ret) 1674 break; 1675 } 1676 1677 return ret; 1678 } 1679 1680 static void 1681 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1682 { 1683 int i; 1684 1685 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1686 nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); 1687 } 1688 1689 static void 1690 nvme_fc_ctrl_free(struct kref *ref) 1691 { 1692 struct nvme_fc_ctrl *ctrl = 1693 container_of(ref, struct nvme_fc_ctrl, ref); 1694 unsigned long flags; 1695 1696 if (ctrl->ctrl.tagset) { 1697 blk_cleanup_queue(ctrl->ctrl.connect_q); 1698 blk_mq_free_tag_set(&ctrl->tag_set); 1699 } 1700 1701 /* remove from rport list */ 1702 spin_lock_irqsave(&ctrl->rport->lock, flags); 1703 list_del(&ctrl->ctrl_list); 1704 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 1705 1706 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 1707 blk_cleanup_queue(ctrl->ctrl.admin_q); 1708 blk_mq_free_tag_set(&ctrl->admin_tag_set); 1709 1710 kfree(ctrl->queues); 1711 1712 put_device(ctrl->dev); 1713 nvme_fc_rport_put(ctrl->rport); 1714 1715 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 1716 if (ctrl->ctrl.opts) 1717 nvmf_free_options(ctrl->ctrl.opts); 1718 kfree(ctrl); 1719 } 1720 1721 static void 1722 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 1723 { 1724 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 1725 } 1726 1727 static int 1728 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 1729 { 1730 return kref_get_unless_zero(&ctrl->ref); 1731 } 1732 1733 /* 1734 * All accesses from nvme core layer done - can now free the 1735 * controller. Called after last nvme_put_ctrl() call 1736 */ 1737 static void 1738 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 1739 { 1740 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 1741 1742 WARN_ON(nctrl != &ctrl->ctrl); 1743 1744 nvme_fc_ctrl_put(ctrl); 1745 } 1746 1747 static void 1748 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 1749 { 1750 /* only proceed if in LIVE state - e.g. on first error */ 1751 if (ctrl->ctrl.state != NVME_CTRL_LIVE) 1752 return; 1753 1754 dev_warn(ctrl->ctrl.device, 1755 "NVME-FC{%d}: transport association error detected: %s\n", 1756 ctrl->cnum, errmsg); 1757 dev_warn(ctrl->ctrl.device, 1758 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 1759 1760 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1761 dev_err(ctrl->ctrl.device, 1762 "NVME-FC{%d}: error_recovery: Couldn't change state " 1763 "to RECONNECTING\n", ctrl->cnum); 1764 return; 1765 } 1766 1767 nvme_reset_ctrl(&ctrl->ctrl); 1768 } 1769 1770 static enum blk_eh_timer_return 1771 nvme_fc_timeout(struct request *rq, bool reserved) 1772 { 1773 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1774 struct nvme_fc_ctrl *ctrl = op->ctrl; 1775 int ret; 1776 1777 if (reserved) 1778 return BLK_EH_RESET_TIMER; 1779 1780 ret = __nvme_fc_abort_op(ctrl, op); 1781 if (ret) 1782 /* io wasn't active to abort consider it done */ 1783 return BLK_EH_HANDLED; 1784 1785 /* 1786 * we can't individually ABTS an io without affecting the queue, 1787 * thus killing the queue, adn thus the association. 1788 * So resolve by performing a controller reset, which will stop 1789 * the host/io stack, terminate the association on the link, 1790 * and recreate an association on the link. 1791 */ 1792 nvme_fc_error_recovery(ctrl, "io timeout error"); 1793 1794 return BLK_EH_HANDLED; 1795 } 1796 1797 static int 1798 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1799 struct nvme_fc_fcp_op *op) 1800 { 1801 struct nvmefc_fcp_req *freq = &op->fcp_req; 1802 enum dma_data_direction dir; 1803 int ret; 1804 1805 freq->sg_cnt = 0; 1806 1807 if (!blk_rq_payload_bytes(rq)) 1808 return 0; 1809 1810 freq->sg_table.sgl = freq->first_sgl; 1811 ret = sg_alloc_table_chained(&freq->sg_table, 1812 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 1813 if (ret) 1814 return -ENOMEM; 1815 1816 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 1817 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 1818 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 1819 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 1820 op->nents, dir); 1821 if (unlikely(freq->sg_cnt <= 0)) { 1822 sg_free_table_chained(&freq->sg_table, true); 1823 freq->sg_cnt = 0; 1824 return -EFAULT; 1825 } 1826 1827 /* 1828 * TODO: blk_integrity_rq(rq) for DIF 1829 */ 1830 return 0; 1831 } 1832 1833 static void 1834 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1835 struct nvme_fc_fcp_op *op) 1836 { 1837 struct nvmefc_fcp_req *freq = &op->fcp_req; 1838 1839 if (!freq->sg_cnt) 1840 return; 1841 1842 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 1843 ((rq_data_dir(rq) == WRITE) ? 1844 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 1845 1846 nvme_cleanup_cmd(rq); 1847 1848 sg_free_table_chained(&freq->sg_table, true); 1849 1850 freq->sg_cnt = 0; 1851 } 1852 1853 /* 1854 * In FC, the queue is a logical thing. At transport connect, the target 1855 * creates its "queue" and returns a handle that is to be given to the 1856 * target whenever it posts something to the corresponding SQ. When an 1857 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 1858 * command contained within the SQE, an io, and assigns a FC exchange 1859 * to it. The SQE and the associated SQ handle are sent in the initial 1860 * CMD IU sents on the exchange. All transfers relative to the io occur 1861 * as part of the exchange. The CQE is the last thing for the io, 1862 * which is transferred (explicitly or implicitly) with the RSP IU 1863 * sent on the exchange. After the CQE is received, the FC exchange is 1864 * terminaed and the Exchange may be used on a different io. 1865 * 1866 * The transport to LLDD api has the transport making a request for a 1867 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 1868 * resource and transfers the command. The LLDD will then process all 1869 * steps to complete the io. Upon completion, the transport done routine 1870 * is called. 1871 * 1872 * So - while the operation is outstanding to the LLDD, there is a link 1873 * level FC exchange resource that is also outstanding. This must be 1874 * considered in all cleanup operations. 1875 */ 1876 static blk_status_t 1877 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1878 struct nvme_fc_fcp_op *op, u32 data_len, 1879 enum nvmefc_fcp_datadir io_dir) 1880 { 1881 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1882 struct nvme_command *sqe = &cmdiu->sqe; 1883 u32 csn; 1884 int ret; 1885 1886 /* 1887 * before attempting to send the io, check to see if we believe 1888 * the target device is present 1889 */ 1890 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1891 return BLK_STS_IOERR; 1892 1893 if (!nvme_fc_ctrl_get(ctrl)) 1894 return BLK_STS_IOERR; 1895 1896 /* format the FC-NVME CMD IU and fcp_req */ 1897 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 1898 csn = atomic_inc_return(&queue->csn); 1899 cmdiu->csn = cpu_to_be32(csn); 1900 cmdiu->data_len = cpu_to_be32(data_len); 1901 switch (io_dir) { 1902 case NVMEFC_FCP_WRITE: 1903 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 1904 break; 1905 case NVMEFC_FCP_READ: 1906 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 1907 break; 1908 case NVMEFC_FCP_NODATA: 1909 cmdiu->flags = 0; 1910 break; 1911 } 1912 op->fcp_req.payload_length = data_len; 1913 op->fcp_req.io_dir = io_dir; 1914 op->fcp_req.transferred_length = 0; 1915 op->fcp_req.rcv_rsplen = 0; 1916 op->fcp_req.status = NVME_SC_SUCCESS; 1917 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 1918 1919 /* 1920 * validate per fabric rules, set fields mandated by fabric spec 1921 * as well as those by FC-NVME spec. 1922 */ 1923 WARN_ON_ONCE(sqe->common.metadata); 1924 WARN_ON_ONCE(sqe->common.dptr.prp1); 1925 WARN_ON_ONCE(sqe->common.dptr.prp2); 1926 sqe->common.flags |= NVME_CMD_SGL_METABUF; 1927 1928 /* 1929 * format SQE DPTR field per FC-NVME rules 1930 * type=data block descr; subtype=offset; 1931 * offset is currently 0. 1932 */ 1933 sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; 1934 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 1935 sqe->rw.dptr.sgl.addr = 0; 1936 1937 if (!(op->flags & FCOP_FLAGS_AEN)) { 1938 ret = nvme_fc_map_data(ctrl, op->rq, op); 1939 if (ret < 0) { 1940 nvme_cleanup_cmd(op->rq); 1941 nvme_fc_ctrl_put(ctrl); 1942 if (ret == -ENOMEM || ret == -EAGAIN) 1943 return BLK_STS_RESOURCE; 1944 return BLK_STS_IOERR; 1945 } 1946 } 1947 1948 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 1949 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1950 1951 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 1952 1953 if (!(op->flags & FCOP_FLAGS_AEN)) 1954 blk_mq_start_request(op->rq); 1955 1956 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 1957 &ctrl->rport->remoteport, 1958 queue->lldd_handle, &op->fcp_req); 1959 1960 if (ret) { 1961 if (op->rq) /* normal request */ 1962 nvme_fc_unmap_data(ctrl, op->rq, op); 1963 /* else - aen. no cleanup needed */ 1964 1965 nvme_fc_ctrl_put(ctrl); 1966 1967 if (ret != -EBUSY) 1968 return BLK_STS_IOERR; 1969 1970 if (op->rq) 1971 blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY); 1972 1973 return BLK_STS_RESOURCE; 1974 } 1975 1976 return BLK_STS_OK; 1977 } 1978 1979 static blk_status_t 1980 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 1981 const struct blk_mq_queue_data *bd) 1982 { 1983 struct nvme_ns *ns = hctx->queue->queuedata; 1984 struct nvme_fc_queue *queue = hctx->driver_data; 1985 struct nvme_fc_ctrl *ctrl = queue->ctrl; 1986 struct request *rq = bd->rq; 1987 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1988 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1989 struct nvme_command *sqe = &cmdiu->sqe; 1990 enum nvmefc_fcp_datadir io_dir; 1991 u32 data_len; 1992 blk_status_t ret; 1993 1994 ret = nvme_setup_cmd(ns, rq, sqe); 1995 if (ret) 1996 return ret; 1997 1998 data_len = blk_rq_payload_bytes(rq); 1999 if (data_len) 2000 io_dir = ((rq_data_dir(rq) == WRITE) ? 2001 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2002 else 2003 io_dir = NVMEFC_FCP_NODATA; 2004 2005 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2006 } 2007 2008 static struct blk_mq_tags * 2009 nvme_fc_tagset(struct nvme_fc_queue *queue) 2010 { 2011 if (queue->qnum == 0) 2012 return queue->ctrl->admin_tag_set.tags[queue->qnum]; 2013 2014 return queue->ctrl->tag_set.tags[queue->qnum - 1]; 2015 } 2016 2017 static int 2018 nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 2019 2020 { 2021 struct nvme_fc_queue *queue = hctx->driver_data; 2022 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2023 struct request *req; 2024 struct nvme_fc_fcp_op *op; 2025 2026 req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); 2027 if (!req) 2028 return 0; 2029 2030 op = blk_mq_rq_to_pdu(req); 2031 2032 if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && 2033 (ctrl->lport->ops->poll_queue)) 2034 ctrl->lport->ops->poll_queue(&ctrl->lport->localport, 2035 queue->lldd_handle); 2036 2037 return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); 2038 } 2039 2040 static void 2041 nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) 2042 { 2043 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2044 struct nvme_fc_fcp_op *aen_op; 2045 unsigned long flags; 2046 bool terminating = false; 2047 blk_status_t ret; 2048 2049 if (aer_idx > NVME_FC_NR_AEN_COMMANDS) 2050 return; 2051 2052 spin_lock_irqsave(&ctrl->lock, flags); 2053 if (ctrl->flags & FCCTRL_TERMIO) 2054 terminating = true; 2055 spin_unlock_irqrestore(&ctrl->lock, flags); 2056 2057 if (terminating) 2058 return; 2059 2060 aen_op = &ctrl->aen_ops[aer_idx]; 2061 2062 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2063 NVMEFC_FCP_NODATA); 2064 if (ret) 2065 dev_err(ctrl->ctrl.device, 2066 "failed async event work [%d]\n", aer_idx); 2067 } 2068 2069 static void 2070 __nvme_fc_final_op_cleanup(struct request *rq) 2071 { 2072 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2073 struct nvme_fc_ctrl *ctrl = op->ctrl; 2074 2075 atomic_set(&op->state, FCPOP_STATE_IDLE); 2076 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | 2077 FCOP_FLAGS_COMPLETE); 2078 2079 nvme_fc_unmap_data(ctrl, rq, op); 2080 nvme_complete_rq(rq); 2081 nvme_fc_ctrl_put(ctrl); 2082 2083 } 2084 2085 static void 2086 nvme_fc_complete_rq(struct request *rq) 2087 { 2088 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2089 struct nvme_fc_ctrl *ctrl = op->ctrl; 2090 unsigned long flags; 2091 bool completed = false; 2092 2093 /* 2094 * the core layer, on controller resets after calling 2095 * nvme_shutdown_ctrl(), calls complete_rq without our 2096 * calling blk_mq_complete_request(), thus there may still 2097 * be live i/o outstanding with the LLDD. Means transport has 2098 * to track complete calls vs fcpio_done calls to know what 2099 * path to take on completes and dones. 2100 */ 2101 spin_lock_irqsave(&ctrl->lock, flags); 2102 if (op->flags & FCOP_FLAGS_COMPLETE) 2103 completed = true; 2104 else 2105 op->flags |= FCOP_FLAGS_RELEASED; 2106 spin_unlock_irqrestore(&ctrl->lock, flags); 2107 2108 if (completed) 2109 __nvme_fc_final_op_cleanup(rq); 2110 } 2111 2112 /* 2113 * This routine is used by the transport when it needs to find active 2114 * io on a queue that is to be terminated. The transport uses 2115 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2116 * this routine to kill them on a 1 by 1 basis. 2117 * 2118 * As FC allocates FC exchange for each io, the transport must contact 2119 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2120 * After terminating the exchange the LLDD will call the transport's 2121 * normal io done path for the request, but it will have an aborted 2122 * status. The done path will return the io request back to the block 2123 * layer with an error status. 2124 */ 2125 static void 2126 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2127 { 2128 struct nvme_ctrl *nctrl = data; 2129 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2130 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2131 unsigned long flags; 2132 int status; 2133 2134 if (!blk_mq_request_started(req)) 2135 return; 2136 2137 spin_lock_irqsave(&ctrl->lock, flags); 2138 if (ctrl->flags & FCCTRL_TERMIO) { 2139 ctrl->iocnt++; 2140 op->flags |= FCOP_FLAGS_TERMIO; 2141 } 2142 spin_unlock_irqrestore(&ctrl->lock, flags); 2143 2144 status = __nvme_fc_abort_op(ctrl, op); 2145 if (status) { 2146 /* 2147 * if __nvme_fc_abort_op failed the io wasn't 2148 * active. Thus this call path is running in 2149 * parallel to the io complete. Treat as non-error. 2150 */ 2151 2152 /* back out the flags/counters */ 2153 spin_lock_irqsave(&ctrl->lock, flags); 2154 if (ctrl->flags & FCCTRL_TERMIO) 2155 ctrl->iocnt--; 2156 op->flags &= ~FCOP_FLAGS_TERMIO; 2157 spin_unlock_irqrestore(&ctrl->lock, flags); 2158 return; 2159 } 2160 } 2161 2162 2163 static const struct blk_mq_ops nvme_fc_mq_ops = { 2164 .queue_rq = nvme_fc_queue_rq, 2165 .complete = nvme_fc_complete_rq, 2166 .init_request = nvme_fc_init_request, 2167 .exit_request = nvme_fc_exit_request, 2168 .reinit_request = nvme_fc_reinit_request, 2169 .init_hctx = nvme_fc_init_hctx, 2170 .poll = nvme_fc_poll, 2171 .timeout = nvme_fc_timeout, 2172 }; 2173 2174 static int 2175 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2176 { 2177 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2178 unsigned int nr_io_queues; 2179 int ret; 2180 2181 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2182 ctrl->lport->ops->max_hw_queues); 2183 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2184 if (ret) { 2185 dev_info(ctrl->ctrl.device, 2186 "set_queue_count failed: %d\n", ret); 2187 return ret; 2188 } 2189 2190 ctrl->ctrl.queue_count = nr_io_queues + 1; 2191 if (!nr_io_queues) 2192 return 0; 2193 2194 nvme_fc_init_io_queues(ctrl); 2195 2196 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2197 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2198 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2199 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2200 ctrl->tag_set.numa_node = NUMA_NO_NODE; 2201 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2202 ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2203 (SG_CHUNK_SIZE * 2204 sizeof(struct scatterlist)) + 2205 ctrl->lport->ops->fcprqst_priv_sz; 2206 ctrl->tag_set.driver_data = ctrl; 2207 ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; 2208 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2209 2210 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2211 if (ret) 2212 return ret; 2213 2214 ctrl->ctrl.tagset = &ctrl->tag_set; 2215 2216 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2217 if (IS_ERR(ctrl->ctrl.connect_q)) { 2218 ret = PTR_ERR(ctrl->ctrl.connect_q); 2219 goto out_free_tag_set; 2220 } 2221 2222 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2223 if (ret) 2224 goto out_cleanup_blk_queue; 2225 2226 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2227 if (ret) 2228 goto out_delete_hw_queues; 2229 2230 return 0; 2231 2232 out_delete_hw_queues: 2233 nvme_fc_delete_hw_io_queues(ctrl); 2234 out_cleanup_blk_queue: 2235 blk_cleanup_queue(ctrl->ctrl.connect_q); 2236 out_free_tag_set: 2237 blk_mq_free_tag_set(&ctrl->tag_set); 2238 nvme_fc_free_io_queues(ctrl); 2239 2240 /* force put free routine to ignore io queues */ 2241 ctrl->ctrl.tagset = NULL; 2242 2243 return ret; 2244 } 2245 2246 static int 2247 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2248 { 2249 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2250 unsigned int nr_io_queues; 2251 int ret; 2252 2253 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2254 ctrl->lport->ops->max_hw_queues); 2255 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2256 if (ret) { 2257 dev_info(ctrl->ctrl.device, 2258 "set_queue_count failed: %d\n", ret); 2259 return ret; 2260 } 2261 2262 ctrl->ctrl.queue_count = nr_io_queues + 1; 2263 /* check for io queues existing */ 2264 if (ctrl->ctrl.queue_count == 1) 2265 return 0; 2266 2267 nvme_fc_init_io_queues(ctrl); 2268 2269 ret = blk_mq_reinit_tagset(&ctrl->tag_set); 2270 if (ret) 2271 goto out_free_io_queues; 2272 2273 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2274 if (ret) 2275 goto out_free_io_queues; 2276 2277 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2278 if (ret) 2279 goto out_delete_hw_queues; 2280 2281 blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2282 2283 return 0; 2284 2285 out_delete_hw_queues: 2286 nvme_fc_delete_hw_io_queues(ctrl); 2287 out_free_io_queues: 2288 nvme_fc_free_io_queues(ctrl); 2289 return ret; 2290 } 2291 2292 /* 2293 * This routine restarts the controller on the host side, and 2294 * on the link side, recreates the controller association. 2295 */ 2296 static int 2297 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2298 { 2299 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2300 u32 segs; 2301 int ret; 2302 bool changed; 2303 2304 ++ctrl->ctrl.nr_reconnects; 2305 2306 /* 2307 * Create the admin queue 2308 */ 2309 2310 nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); 2311 2312 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2313 NVME_FC_AQ_BLKMQ_DEPTH); 2314 if (ret) 2315 goto out_free_queue; 2316 2317 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2318 NVME_FC_AQ_BLKMQ_DEPTH, 2319 (NVME_FC_AQ_BLKMQ_DEPTH / 4)); 2320 if (ret) 2321 goto out_delete_hw_queue; 2322 2323 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2324 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2325 2326 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2327 if (ret) 2328 goto out_disconnect_admin_queue; 2329 2330 /* 2331 * Check controller capabilities 2332 * 2333 * todo:- add code to check if ctrl attributes changed from 2334 * prior connection values 2335 */ 2336 2337 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); 2338 if (ret) { 2339 dev_err(ctrl->ctrl.device, 2340 "prop_get NVME_REG_CAP failed\n"); 2341 goto out_disconnect_admin_queue; 2342 } 2343 2344 ctrl->ctrl.sqsize = 2345 min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); 2346 2347 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 2348 if (ret) 2349 goto out_disconnect_admin_queue; 2350 2351 segs = min_t(u32, NVME_FC_MAX_SEGMENTS, 2352 ctrl->lport->ops->max_sgl_segments); 2353 ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); 2354 2355 ret = nvme_init_identify(&ctrl->ctrl); 2356 if (ret) 2357 goto out_disconnect_admin_queue; 2358 2359 /* sanity checks */ 2360 2361 /* FC-NVME does not have other data in the capsule */ 2362 if (ctrl->ctrl.icdoff) { 2363 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2364 ctrl->ctrl.icdoff); 2365 goto out_disconnect_admin_queue; 2366 } 2367 2368 /* FC-NVME supports normal SGL Data Block Descriptors */ 2369 2370 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2371 /* warn if maxcmd is lower than queue_size */ 2372 dev_warn(ctrl->ctrl.device, 2373 "queue_size %zu > ctrl maxcmd %u, reducing " 2374 "to queue_size\n", 2375 opts->queue_size, ctrl->ctrl.maxcmd); 2376 opts->queue_size = ctrl->ctrl.maxcmd; 2377 } 2378 2379 ret = nvme_fc_init_aen_ops(ctrl); 2380 if (ret) 2381 goto out_term_aen_ops; 2382 2383 /* 2384 * Create the io queues 2385 */ 2386 2387 if (ctrl->ctrl.queue_count > 1) { 2388 if (ctrl->ctrl.state == NVME_CTRL_NEW) 2389 ret = nvme_fc_create_io_queues(ctrl); 2390 else 2391 ret = nvme_fc_reinit_io_queues(ctrl); 2392 if (ret) 2393 goto out_term_aen_ops; 2394 } 2395 2396 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2397 WARN_ON_ONCE(!changed); 2398 2399 ctrl->ctrl.nr_reconnects = 0; 2400 2401 nvme_start_ctrl(&ctrl->ctrl); 2402 2403 return 0; /* Success */ 2404 2405 out_term_aen_ops: 2406 nvme_fc_term_aen_ops(ctrl); 2407 out_disconnect_admin_queue: 2408 /* send a Disconnect(association) LS to fc-nvme target */ 2409 nvme_fc_xmt_disconnect_assoc(ctrl); 2410 out_delete_hw_queue: 2411 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2412 out_free_queue: 2413 nvme_fc_free_queue(&ctrl->queues[0]); 2414 2415 return ret; 2416 } 2417 2418 /* 2419 * This routine stops operation of the controller on the host side. 2420 * On the host os stack side: Admin and IO queues are stopped, 2421 * outstanding ios on them terminated via FC ABTS. 2422 * On the link side: the association is terminated. 2423 */ 2424 static void 2425 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2426 { 2427 unsigned long flags; 2428 2429 spin_lock_irqsave(&ctrl->lock, flags); 2430 ctrl->flags |= FCCTRL_TERMIO; 2431 ctrl->iocnt = 0; 2432 spin_unlock_irqrestore(&ctrl->lock, flags); 2433 2434 /* 2435 * If io queues are present, stop them and terminate all outstanding 2436 * ios on them. As FC allocates FC exchange for each io, the 2437 * transport must contact the LLDD to terminate the exchange, 2438 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2439 * to tell us what io's are busy and invoke a transport routine 2440 * to kill them with the LLDD. After terminating the exchange 2441 * the LLDD will call the transport's normal io done path, but it 2442 * will have an aborted status. The done path will return the 2443 * io requests back to the block layer as part of normal completions 2444 * (but with error status). 2445 */ 2446 if (ctrl->ctrl.queue_count > 1) { 2447 nvme_stop_queues(&ctrl->ctrl); 2448 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2449 nvme_fc_terminate_exchange, &ctrl->ctrl); 2450 } 2451 2452 /* 2453 * Other transports, which don't have link-level contexts bound 2454 * to sqe's, would try to gracefully shutdown the controller by 2455 * writing the registers for shutdown and polling (call 2456 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2457 * just aborted and we will wait on those contexts, and given 2458 * there was no indication of how live the controlelr is on the 2459 * link, don't send more io to create more contexts for the 2460 * shutdown. Let the controller fail via keepalive failure if 2461 * its still present. 2462 */ 2463 2464 /* 2465 * clean up the admin queue. Same thing as above. 2466 * use blk_mq_tagset_busy_itr() and the transport routine to 2467 * terminate the exchanges. 2468 */ 2469 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2470 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2471 nvme_fc_terminate_exchange, &ctrl->ctrl); 2472 2473 /* kill the aens as they are a separate path */ 2474 nvme_fc_abort_aen_ops(ctrl); 2475 2476 /* wait for all io that had to be aborted */ 2477 spin_lock_irqsave(&ctrl->lock, flags); 2478 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); 2479 ctrl->flags &= ~FCCTRL_TERMIO; 2480 spin_unlock_irqrestore(&ctrl->lock, flags); 2481 2482 nvme_fc_term_aen_ops(ctrl); 2483 2484 /* 2485 * send a Disconnect(association) LS to fc-nvme target 2486 * Note: could have been sent at top of process, but 2487 * cleaner on link traffic if after the aborts complete. 2488 * Note: if association doesn't exist, association_id will be 0 2489 */ 2490 if (ctrl->association_id) 2491 nvme_fc_xmt_disconnect_assoc(ctrl); 2492 2493 if (ctrl->ctrl.tagset) { 2494 nvme_fc_delete_hw_io_queues(ctrl); 2495 nvme_fc_free_io_queues(ctrl); 2496 } 2497 2498 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2499 nvme_fc_free_queue(&ctrl->queues[0]); 2500 } 2501 2502 static void 2503 nvme_fc_delete_ctrl_work(struct work_struct *work) 2504 { 2505 struct nvme_fc_ctrl *ctrl = 2506 container_of(work, struct nvme_fc_ctrl, delete_work); 2507 2508 cancel_work_sync(&ctrl->ctrl.reset_work); 2509 cancel_delayed_work_sync(&ctrl->connect_work); 2510 nvme_stop_ctrl(&ctrl->ctrl); 2511 nvme_remove_namespaces(&ctrl->ctrl); 2512 /* 2513 * kill the association on the link side. this will block 2514 * waiting for io to terminate 2515 */ 2516 nvme_fc_delete_association(ctrl); 2517 2518 /* 2519 * tear down the controller 2520 * After the last reference on the nvme ctrl is removed, 2521 * the transport nvme_fc_nvme_ctrl_freed() callback will be 2522 * invoked. From there, the transport will tear down it's 2523 * logical queues and association. 2524 */ 2525 nvme_uninit_ctrl(&ctrl->ctrl); 2526 2527 nvme_put_ctrl(&ctrl->ctrl); 2528 } 2529 2530 static bool 2531 __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) 2532 { 2533 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2534 return true; 2535 2536 if (!queue_work(nvme_wq, &ctrl->delete_work)) 2537 return true; 2538 2539 return false; 2540 } 2541 2542 static int 2543 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) 2544 { 2545 return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; 2546 } 2547 2548 /* 2549 * Request from nvme core layer to delete the controller 2550 */ 2551 static int 2552 nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) 2553 { 2554 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2555 int ret; 2556 2557 if (!kref_get_unless_zero(&ctrl->ctrl.kref)) 2558 return -EBUSY; 2559 2560 ret = __nvme_fc_del_ctrl(ctrl); 2561 2562 if (!ret) 2563 flush_workqueue(nvme_wq); 2564 2565 nvme_put_ctrl(&ctrl->ctrl); 2566 2567 return ret; 2568 } 2569 2570 static void 2571 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2572 { 2573 /* If we are resetting/deleting then do nothing */ 2574 if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { 2575 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || 2576 ctrl->ctrl.state == NVME_CTRL_LIVE); 2577 return; 2578 } 2579 2580 dev_info(ctrl->ctrl.device, 2581 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2582 ctrl->cnum, status); 2583 2584 if (nvmf_should_reconnect(&ctrl->ctrl)) { 2585 dev_info(ctrl->ctrl.device, 2586 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2587 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2588 queue_delayed_work(nvme_wq, &ctrl->connect_work, 2589 ctrl->ctrl.opts->reconnect_delay * HZ); 2590 } else { 2591 dev_warn(ctrl->ctrl.device, 2592 "NVME-FC{%d}: Max reconnect attempts (%d) " 2593 "reached. Removing controller\n", 2594 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2595 WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2596 } 2597 } 2598 2599 static void 2600 nvme_fc_reset_ctrl_work(struct work_struct *work) 2601 { 2602 struct nvme_fc_ctrl *ctrl = 2603 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); 2604 int ret; 2605 2606 nvme_stop_ctrl(&ctrl->ctrl); 2607 /* will block will waiting for io to terminate */ 2608 nvme_fc_delete_association(ctrl); 2609 2610 ret = nvme_fc_create_association(ctrl); 2611 if (ret) 2612 nvme_fc_reconnect_or_delete(ctrl, ret); 2613 else 2614 dev_info(ctrl->ctrl.device, 2615 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2616 } 2617 2618 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2619 .name = "fc", 2620 .module = THIS_MODULE, 2621 .flags = NVME_F_FABRICS, 2622 .reg_read32 = nvmf_reg_read32, 2623 .reg_read64 = nvmf_reg_read64, 2624 .reg_write32 = nvmf_reg_write32, 2625 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2626 .submit_async_event = nvme_fc_submit_async_event, 2627 .delete_ctrl = nvme_fc_del_nvme_ctrl, 2628 .get_address = nvmf_get_address, 2629 }; 2630 2631 static void 2632 nvme_fc_connect_ctrl_work(struct work_struct *work) 2633 { 2634 int ret; 2635 2636 struct nvme_fc_ctrl *ctrl = 2637 container_of(to_delayed_work(work), 2638 struct nvme_fc_ctrl, connect_work); 2639 2640 ret = nvme_fc_create_association(ctrl); 2641 if (ret) 2642 nvme_fc_reconnect_or_delete(ctrl, ret); 2643 else 2644 dev_info(ctrl->ctrl.device, 2645 "NVME-FC{%d}: controller reconnect complete\n", 2646 ctrl->cnum); 2647 } 2648 2649 2650 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2651 .queue_rq = nvme_fc_queue_rq, 2652 .complete = nvme_fc_complete_rq, 2653 .init_request = nvme_fc_init_request, 2654 .exit_request = nvme_fc_exit_request, 2655 .reinit_request = nvme_fc_reinit_request, 2656 .init_hctx = nvme_fc_init_admin_hctx, 2657 .timeout = nvme_fc_timeout, 2658 }; 2659 2660 2661 static struct nvme_ctrl * 2662 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2663 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 2664 { 2665 struct nvme_fc_ctrl *ctrl; 2666 unsigned long flags; 2667 int ret, idx; 2668 2669 if (!(rport->remoteport.port_role & 2670 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2671 ret = -EBADR; 2672 goto out_fail; 2673 } 2674 2675 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 2676 if (!ctrl) { 2677 ret = -ENOMEM; 2678 goto out_fail; 2679 } 2680 2681 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 2682 if (idx < 0) { 2683 ret = -ENOSPC; 2684 goto out_free_ctrl; 2685 } 2686 2687 ctrl->ctrl.opts = opts; 2688 INIT_LIST_HEAD(&ctrl->ctrl_list); 2689 ctrl->lport = lport; 2690 ctrl->rport = rport; 2691 ctrl->dev = lport->dev; 2692 ctrl->cnum = idx; 2693 2694 get_device(ctrl->dev); 2695 kref_init(&ctrl->ref); 2696 2697 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 2698 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 2699 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 2700 spin_lock_init(&ctrl->lock); 2701 2702 /* io queue count */ 2703 ctrl->ctrl.queue_count = min_t(unsigned int, 2704 opts->nr_io_queues, 2705 lport->ops->max_hw_queues); 2706 ctrl->ctrl.queue_count++; /* +1 for admin queue */ 2707 2708 ctrl->ctrl.sqsize = opts->queue_size - 1; 2709 ctrl->ctrl.kato = opts->kato; 2710 2711 ret = -ENOMEM; 2712 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 2713 sizeof(struct nvme_fc_queue), GFP_KERNEL); 2714 if (!ctrl->queues) 2715 goto out_free_ida; 2716 2717 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 2718 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 2719 ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; 2720 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 2721 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; 2722 ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2723 (SG_CHUNK_SIZE * 2724 sizeof(struct scatterlist)) + 2725 ctrl->lport->ops->fcprqst_priv_sz; 2726 ctrl->admin_tag_set.driver_data = ctrl; 2727 ctrl->admin_tag_set.nr_hw_queues = 1; 2728 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 2729 2730 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 2731 if (ret) 2732 goto out_free_queues; 2733 2734 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 2735 if (IS_ERR(ctrl->ctrl.admin_q)) { 2736 ret = PTR_ERR(ctrl->ctrl.admin_q); 2737 goto out_free_admin_tag_set; 2738 } 2739 2740 /* 2741 * Would have been nice to init io queues tag set as well. 2742 * However, we require interaction from the controller 2743 * for max io queue count before we can do so. 2744 * Defer this to the connect path. 2745 */ 2746 2747 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 2748 if (ret) 2749 goto out_cleanup_admin_q; 2750 2751 /* at this point, teardown path changes to ref counting on nvme ctrl */ 2752 2753 spin_lock_irqsave(&rport->lock, flags); 2754 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 2755 spin_unlock_irqrestore(&rport->lock, flags); 2756 2757 ret = nvme_fc_create_association(ctrl); 2758 if (ret) { 2759 ctrl->ctrl.opts = NULL; 2760 /* initiate nvme ctrl ref counting teardown */ 2761 nvme_uninit_ctrl(&ctrl->ctrl); 2762 nvme_put_ctrl(&ctrl->ctrl); 2763 2764 /* Remove core ctrl ref. */ 2765 nvme_put_ctrl(&ctrl->ctrl); 2766 2767 /* as we're past the point where we transition to the ref 2768 * counting teardown path, if we return a bad pointer here, 2769 * the calling routine, thinking it's prior to the 2770 * transition, will do an rport put. Since the teardown 2771 * path also does a rport put, we do an extra get here to 2772 * so proper order/teardown happens. 2773 */ 2774 nvme_fc_rport_get(rport); 2775 2776 if (ret > 0) 2777 ret = -EIO; 2778 return ERR_PTR(ret); 2779 } 2780 2781 kref_get(&ctrl->ctrl.kref); 2782 2783 dev_info(ctrl->ctrl.device, 2784 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 2785 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 2786 2787 return &ctrl->ctrl; 2788 2789 out_cleanup_admin_q: 2790 blk_cleanup_queue(ctrl->ctrl.admin_q); 2791 out_free_admin_tag_set: 2792 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2793 out_free_queues: 2794 kfree(ctrl->queues); 2795 out_free_ida: 2796 put_device(ctrl->dev); 2797 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2798 out_free_ctrl: 2799 kfree(ctrl); 2800 out_fail: 2801 /* exit via here doesn't follow ctlr ref points */ 2802 return ERR_PTR(ret); 2803 } 2804 2805 enum { 2806 FCT_TRADDR_ERR = 0, 2807 FCT_TRADDR_WWNN = 1 << 0, 2808 FCT_TRADDR_WWPN = 1 << 1, 2809 }; 2810 2811 struct nvmet_fc_traddr { 2812 u64 nn; 2813 u64 pn; 2814 }; 2815 2816 static const match_table_t traddr_opt_tokens = { 2817 { FCT_TRADDR_WWNN, "nn-%s" }, 2818 { FCT_TRADDR_WWPN, "pn-%s" }, 2819 { FCT_TRADDR_ERR, NULL } 2820 }; 2821 2822 static int 2823 nvme_fc_parse_address(struct nvmet_fc_traddr *traddr, char *buf) 2824 { 2825 substring_t args[MAX_OPT_ARGS]; 2826 char *options, *o, *p; 2827 int token, ret = 0; 2828 u64 token64; 2829 2830 options = o = kstrdup(buf, GFP_KERNEL); 2831 if (!options) 2832 return -ENOMEM; 2833 2834 while ((p = strsep(&o, ":\n")) != NULL) { 2835 if (!*p) 2836 continue; 2837 2838 token = match_token(p, traddr_opt_tokens, args); 2839 switch (token) { 2840 case FCT_TRADDR_WWNN: 2841 if (match_u64(args, &token64)) { 2842 ret = -EINVAL; 2843 goto out; 2844 } 2845 traddr->nn = token64; 2846 break; 2847 case FCT_TRADDR_WWPN: 2848 if (match_u64(args, &token64)) { 2849 ret = -EINVAL; 2850 goto out; 2851 } 2852 traddr->pn = token64; 2853 break; 2854 default: 2855 pr_warn("unknown traddr token or missing value '%s'\n", 2856 p); 2857 ret = -EINVAL; 2858 goto out; 2859 } 2860 } 2861 2862 out: 2863 kfree(options); 2864 return ret; 2865 } 2866 2867 static struct nvme_ctrl * 2868 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 2869 { 2870 struct nvme_fc_lport *lport; 2871 struct nvme_fc_rport *rport; 2872 struct nvme_ctrl *ctrl; 2873 struct nvmet_fc_traddr laddr = { 0L, 0L }; 2874 struct nvmet_fc_traddr raddr = { 0L, 0L }; 2875 unsigned long flags; 2876 int ret; 2877 2878 ret = nvme_fc_parse_address(&raddr, opts->traddr); 2879 if (ret || !raddr.nn || !raddr.pn) 2880 return ERR_PTR(-EINVAL); 2881 2882 ret = nvme_fc_parse_address(&laddr, opts->host_traddr); 2883 if (ret || !laddr.nn || !laddr.pn) 2884 return ERR_PTR(-EINVAL); 2885 2886 /* find the host and remote ports to connect together */ 2887 spin_lock_irqsave(&nvme_fc_lock, flags); 2888 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 2889 if (lport->localport.node_name != laddr.nn || 2890 lport->localport.port_name != laddr.pn) 2891 continue; 2892 2893 list_for_each_entry(rport, &lport->endp_list, endp_list) { 2894 if (rport->remoteport.node_name != raddr.nn || 2895 rport->remoteport.port_name != raddr.pn) 2896 continue; 2897 2898 /* if fail to get reference fall through. Will error */ 2899 if (!nvme_fc_rport_get(rport)) 2900 break; 2901 2902 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2903 2904 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 2905 if (IS_ERR(ctrl)) 2906 nvme_fc_rport_put(rport); 2907 return ctrl; 2908 } 2909 } 2910 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2911 2912 return ERR_PTR(-ENOENT); 2913 } 2914 2915 2916 static struct nvmf_transport_ops nvme_fc_transport = { 2917 .name = "fc", 2918 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 2919 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 2920 .create_ctrl = nvme_fc_create_ctrl, 2921 }; 2922 2923 static int __init nvme_fc_init_module(void) 2924 { 2925 return nvmf_register_transport(&nvme_fc_transport); 2926 } 2927 2928 static void __exit nvme_fc_exit_module(void) 2929 { 2930 /* sanity check - all lports should be removed */ 2931 if (!list_empty(&nvme_fc_lport_list)) 2932 pr_warn("%s: localport list not empty\n", __func__); 2933 2934 nvmf_unregister_transport(&nvme_fc_transport); 2935 2936 ida_destroy(&nvme_fc_local_port_cnt); 2937 ida_destroy(&nvme_fc_ctrl_cnt); 2938 } 2939 2940 module_init(nvme_fc_init_module); 2941 module_exit(nvme_fc_exit_module); 2942 2943 MODULE_LICENSE("GPL v2"); 2944