1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 24 #include "nvme.h" 25 #include "fabrics.h" 26 #include <linux/nvme-fc-driver.h> 27 #include <linux/nvme-fc.h> 28 29 30 /* *************************** Data Structures/Defines ****************** */ 31 32 33 /* 34 * We handle AEN commands ourselves and don't even let the 35 * block layer know about them. 36 */ 37 #define NVME_FC_NR_AEN_COMMANDS 1 38 #define NVME_FC_AQ_BLKMQ_DEPTH \ 39 (NVME_AQ_DEPTH - NVME_FC_NR_AEN_COMMANDS) 40 #define AEN_CMDID_BASE (NVME_FC_AQ_BLKMQ_DEPTH + 1) 41 42 enum nvme_fc_queue_flags { 43 NVME_FC_Q_CONNECTED = (1 << 0), 44 }; 45 46 #define NVMEFC_QUEUE_DELAY 3 /* ms units */ 47 48 struct nvme_fc_queue { 49 struct nvme_fc_ctrl *ctrl; 50 struct device *dev; 51 struct blk_mq_hw_ctx *hctx; 52 void *lldd_handle; 53 int queue_size; 54 size_t cmnd_capsule_len; 55 u32 qnum; 56 u32 rqcnt; 57 u32 seqno; 58 59 u64 connection_id; 60 atomic_t csn; 61 62 unsigned long flags; 63 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 64 65 enum nvme_fcop_flags { 66 FCOP_FLAGS_TERMIO = (1 << 0), 67 FCOP_FLAGS_RELEASED = (1 << 1), 68 FCOP_FLAGS_COMPLETE = (1 << 2), 69 FCOP_FLAGS_AEN = (1 << 3), 70 }; 71 72 struct nvmefc_ls_req_op { 73 struct nvmefc_ls_req ls_req; 74 75 struct nvme_fc_rport *rport; 76 struct nvme_fc_queue *queue; 77 struct request *rq; 78 u32 flags; 79 80 int ls_error; 81 struct completion ls_done; 82 struct list_head lsreq_list; /* rport->ls_req_list */ 83 bool req_queued; 84 }; 85 86 enum nvme_fcpop_state { 87 FCPOP_STATE_UNINIT = 0, 88 FCPOP_STATE_IDLE = 1, 89 FCPOP_STATE_ACTIVE = 2, 90 FCPOP_STATE_ABORTED = 3, 91 FCPOP_STATE_COMPLETE = 4, 92 }; 93 94 struct nvme_fc_fcp_op { 95 struct nvme_request nreq; /* 96 * nvme/host/core.c 97 * requires this to be 98 * the 1st element in the 99 * private structure 100 * associated with the 101 * request. 102 */ 103 struct nvmefc_fcp_req fcp_req; 104 105 struct nvme_fc_ctrl *ctrl; 106 struct nvme_fc_queue *queue; 107 struct request *rq; 108 109 atomic_t state; 110 u32 flags; 111 u32 rqno; 112 u32 nents; 113 114 struct nvme_fc_cmd_iu cmd_iu; 115 struct nvme_fc_ersp_iu rsp_iu; 116 }; 117 118 struct nvme_fc_lport { 119 struct nvme_fc_local_port localport; 120 121 struct ida endp_cnt; 122 struct list_head port_list; /* nvme_fc_port_list */ 123 struct list_head endp_list; 124 struct device *dev; /* physical device for dma */ 125 struct nvme_fc_port_template *ops; 126 struct kref ref; 127 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 128 129 struct nvme_fc_rport { 130 struct nvme_fc_remote_port remoteport; 131 132 struct list_head endp_list; /* for lport->endp_list */ 133 struct list_head ctrl_list; 134 struct list_head ls_req_list; 135 struct device *dev; /* physical device for dma */ 136 struct nvme_fc_lport *lport; 137 spinlock_t lock; 138 struct kref ref; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 cnum; 152 153 u64 association_id; 154 155 struct list_head ctrl_list; /* rport->ctrl_list */ 156 157 struct blk_mq_tag_set admin_tag_set; 158 struct blk_mq_tag_set tag_set; 159 160 struct work_struct delete_work; 161 struct delayed_work connect_work; 162 163 struct kref ref; 164 u32 flags; 165 u32 iocnt; 166 wait_queue_head_t ioabort_wait; 167 168 struct nvme_fc_fcp_op aen_ops[NVME_FC_NR_AEN_COMMANDS]; 169 170 struct nvme_ctrl ctrl; 171 }; 172 173 static inline struct nvme_fc_ctrl * 174 to_fc_ctrl(struct nvme_ctrl *ctrl) 175 { 176 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 177 } 178 179 static inline struct nvme_fc_lport * 180 localport_to_lport(struct nvme_fc_local_port *portptr) 181 { 182 return container_of(portptr, struct nvme_fc_lport, localport); 183 } 184 185 static inline struct nvme_fc_rport * 186 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 187 { 188 return container_of(portptr, struct nvme_fc_rport, remoteport); 189 } 190 191 static inline struct nvmefc_ls_req_op * 192 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 193 { 194 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 195 } 196 197 static inline struct nvme_fc_fcp_op * 198 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 199 { 200 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 201 } 202 203 204 205 /* *************************** Globals **************************** */ 206 207 208 static DEFINE_SPINLOCK(nvme_fc_lock); 209 210 static LIST_HEAD(nvme_fc_lport_list); 211 static DEFINE_IDA(nvme_fc_local_port_cnt); 212 static DEFINE_IDA(nvme_fc_ctrl_cnt); 213 214 215 216 217 /* *********************** FC-NVME Port Management ************************ */ 218 219 static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); 220 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 221 struct nvme_fc_queue *, unsigned int); 222 223 224 /** 225 * nvme_fc_register_localport - transport entry point called by an 226 * LLDD to register the existence of a NVME 227 * host FC port. 228 * @pinfo: pointer to information about the port to be registered 229 * @template: LLDD entrypoints and operational parameters for the port 230 * @dev: physical hardware device node port corresponds to. Will be 231 * used for DMA mappings 232 * @lport_p: pointer to a local port pointer. Upon success, the routine 233 * will allocate a nvme_fc_local_port structure and place its 234 * address in the local port pointer. Upon failure, local port 235 * pointer will be set to 0. 236 * 237 * Returns: 238 * a completion status. Must be 0 upon success; a negative errno 239 * (ex: -ENXIO) upon failure. 240 */ 241 int 242 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 243 struct nvme_fc_port_template *template, 244 struct device *dev, 245 struct nvme_fc_local_port **portptr) 246 { 247 struct nvme_fc_lport *newrec; 248 unsigned long flags; 249 int ret, idx; 250 251 if (!template->localport_delete || !template->remoteport_delete || 252 !template->ls_req || !template->fcp_io || 253 !template->ls_abort || !template->fcp_abort || 254 !template->max_hw_queues || !template->max_sgl_segments || 255 !template->max_dif_sgl_segments || !template->dma_boundary) { 256 ret = -EINVAL; 257 goto out_reghost_failed; 258 } 259 260 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 261 GFP_KERNEL); 262 if (!newrec) { 263 ret = -ENOMEM; 264 goto out_reghost_failed; 265 } 266 267 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 268 if (idx < 0) { 269 ret = -ENOSPC; 270 goto out_fail_kfree; 271 } 272 273 if (!get_device(dev) && dev) { 274 ret = -ENODEV; 275 goto out_ida_put; 276 } 277 278 INIT_LIST_HEAD(&newrec->port_list); 279 INIT_LIST_HEAD(&newrec->endp_list); 280 kref_init(&newrec->ref); 281 newrec->ops = template; 282 newrec->dev = dev; 283 ida_init(&newrec->endp_cnt); 284 newrec->localport.private = &newrec[1]; 285 newrec->localport.node_name = pinfo->node_name; 286 newrec->localport.port_name = pinfo->port_name; 287 newrec->localport.port_role = pinfo->port_role; 288 newrec->localport.port_id = pinfo->port_id; 289 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 290 newrec->localport.port_num = idx; 291 292 spin_lock_irqsave(&nvme_fc_lock, flags); 293 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 294 spin_unlock_irqrestore(&nvme_fc_lock, flags); 295 296 if (dev) 297 dma_set_seg_boundary(dev, template->dma_boundary); 298 299 *portptr = &newrec->localport; 300 return 0; 301 302 out_ida_put: 303 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 304 out_fail_kfree: 305 kfree(newrec); 306 out_reghost_failed: 307 *portptr = NULL; 308 309 return ret; 310 } 311 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 312 313 static void 314 nvme_fc_free_lport(struct kref *ref) 315 { 316 struct nvme_fc_lport *lport = 317 container_of(ref, struct nvme_fc_lport, ref); 318 unsigned long flags; 319 320 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 321 WARN_ON(!list_empty(&lport->endp_list)); 322 323 /* remove from transport list */ 324 spin_lock_irqsave(&nvme_fc_lock, flags); 325 list_del(&lport->port_list); 326 spin_unlock_irqrestore(&nvme_fc_lock, flags); 327 328 /* let the LLDD know we've finished tearing it down */ 329 lport->ops->localport_delete(&lport->localport); 330 331 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 332 ida_destroy(&lport->endp_cnt); 333 334 put_device(lport->dev); 335 336 kfree(lport); 337 } 338 339 static void 340 nvme_fc_lport_put(struct nvme_fc_lport *lport) 341 { 342 kref_put(&lport->ref, nvme_fc_free_lport); 343 } 344 345 static int 346 nvme_fc_lport_get(struct nvme_fc_lport *lport) 347 { 348 return kref_get_unless_zero(&lport->ref); 349 } 350 351 /** 352 * nvme_fc_unregister_localport - transport entry point called by an 353 * LLDD to deregister/remove a previously 354 * registered a NVME host FC port. 355 * @localport: pointer to the (registered) local port that is to be 356 * deregistered. 357 * 358 * Returns: 359 * a completion status. Must be 0 upon success; a negative errno 360 * (ex: -ENXIO) upon failure. 361 */ 362 int 363 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 364 { 365 struct nvme_fc_lport *lport = localport_to_lport(portptr); 366 unsigned long flags; 367 368 if (!portptr) 369 return -EINVAL; 370 371 spin_lock_irqsave(&nvme_fc_lock, flags); 372 373 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 374 spin_unlock_irqrestore(&nvme_fc_lock, flags); 375 return -EINVAL; 376 } 377 portptr->port_state = FC_OBJSTATE_DELETED; 378 379 spin_unlock_irqrestore(&nvme_fc_lock, flags); 380 381 nvme_fc_lport_put(lport); 382 383 return 0; 384 } 385 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 386 387 /** 388 * nvme_fc_register_remoteport - transport entry point called by an 389 * LLDD to register the existence of a NVME 390 * subsystem FC port on its fabric. 391 * @localport: pointer to the (registered) local port that the remote 392 * subsystem port is connected to. 393 * @pinfo: pointer to information about the port to be registered 394 * @rport_p: pointer to a remote port pointer. Upon success, the routine 395 * will allocate a nvme_fc_remote_port structure and place its 396 * address in the remote port pointer. Upon failure, remote port 397 * pointer will be set to 0. 398 * 399 * Returns: 400 * a completion status. Must be 0 upon success; a negative errno 401 * (ex: -ENXIO) upon failure. 402 */ 403 int 404 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 405 struct nvme_fc_port_info *pinfo, 406 struct nvme_fc_remote_port **portptr) 407 { 408 struct nvme_fc_lport *lport = localport_to_lport(localport); 409 struct nvme_fc_rport *newrec; 410 unsigned long flags; 411 int ret, idx; 412 413 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 414 GFP_KERNEL); 415 if (!newrec) { 416 ret = -ENOMEM; 417 goto out_reghost_failed; 418 } 419 420 if (!nvme_fc_lport_get(lport)) { 421 ret = -ESHUTDOWN; 422 goto out_kfree_rport; 423 } 424 425 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 426 if (idx < 0) { 427 ret = -ENOSPC; 428 goto out_lport_put; 429 } 430 431 INIT_LIST_HEAD(&newrec->endp_list); 432 INIT_LIST_HEAD(&newrec->ctrl_list); 433 INIT_LIST_HEAD(&newrec->ls_req_list); 434 kref_init(&newrec->ref); 435 spin_lock_init(&newrec->lock); 436 newrec->remoteport.localport = &lport->localport; 437 newrec->dev = lport->dev; 438 newrec->lport = lport; 439 newrec->remoteport.private = &newrec[1]; 440 newrec->remoteport.port_role = pinfo->port_role; 441 newrec->remoteport.node_name = pinfo->node_name; 442 newrec->remoteport.port_name = pinfo->port_name; 443 newrec->remoteport.port_id = pinfo->port_id; 444 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 445 newrec->remoteport.port_num = idx; 446 447 spin_lock_irqsave(&nvme_fc_lock, flags); 448 list_add_tail(&newrec->endp_list, &lport->endp_list); 449 spin_unlock_irqrestore(&nvme_fc_lock, flags); 450 451 *portptr = &newrec->remoteport; 452 return 0; 453 454 out_lport_put: 455 nvme_fc_lport_put(lport); 456 out_kfree_rport: 457 kfree(newrec); 458 out_reghost_failed: 459 *portptr = NULL; 460 return ret; 461 } 462 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 463 464 static void 465 nvme_fc_free_rport(struct kref *ref) 466 { 467 struct nvme_fc_rport *rport = 468 container_of(ref, struct nvme_fc_rport, ref); 469 struct nvme_fc_lport *lport = 470 localport_to_lport(rport->remoteport.localport); 471 unsigned long flags; 472 473 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 474 WARN_ON(!list_empty(&rport->ctrl_list)); 475 476 /* remove from lport list */ 477 spin_lock_irqsave(&nvme_fc_lock, flags); 478 list_del(&rport->endp_list); 479 spin_unlock_irqrestore(&nvme_fc_lock, flags); 480 481 /* let the LLDD know we've finished tearing it down */ 482 lport->ops->remoteport_delete(&rport->remoteport); 483 484 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 485 486 kfree(rport); 487 488 nvme_fc_lport_put(lport); 489 } 490 491 static void 492 nvme_fc_rport_put(struct nvme_fc_rport *rport) 493 { 494 kref_put(&rport->ref, nvme_fc_free_rport); 495 } 496 497 static int 498 nvme_fc_rport_get(struct nvme_fc_rport *rport) 499 { 500 return kref_get_unless_zero(&rport->ref); 501 } 502 503 static int 504 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 505 { 506 struct nvmefc_ls_req_op *lsop; 507 unsigned long flags; 508 509 restart: 510 spin_lock_irqsave(&rport->lock, flags); 511 512 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 513 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 514 lsop->flags |= FCOP_FLAGS_TERMIO; 515 spin_unlock_irqrestore(&rport->lock, flags); 516 rport->lport->ops->ls_abort(&rport->lport->localport, 517 &rport->remoteport, 518 &lsop->ls_req); 519 goto restart; 520 } 521 } 522 spin_unlock_irqrestore(&rport->lock, flags); 523 524 return 0; 525 } 526 527 /** 528 * nvme_fc_unregister_remoteport - transport entry point called by an 529 * LLDD to deregister/remove a previously 530 * registered a NVME subsystem FC port. 531 * @remoteport: pointer to the (registered) remote port that is to be 532 * deregistered. 533 * 534 * Returns: 535 * a completion status. Must be 0 upon success; a negative errno 536 * (ex: -ENXIO) upon failure. 537 */ 538 int 539 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 540 { 541 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 542 struct nvme_fc_ctrl *ctrl; 543 unsigned long flags; 544 545 if (!portptr) 546 return -EINVAL; 547 548 spin_lock_irqsave(&rport->lock, flags); 549 550 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 551 spin_unlock_irqrestore(&rport->lock, flags); 552 return -EINVAL; 553 } 554 portptr->port_state = FC_OBJSTATE_DELETED; 555 556 /* tear down all associations to the remote port */ 557 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 558 __nvme_fc_del_ctrl(ctrl); 559 560 spin_unlock_irqrestore(&rport->lock, flags); 561 562 nvme_fc_abort_lsops(rport); 563 564 nvme_fc_rport_put(rport); 565 return 0; 566 } 567 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 568 569 570 /* *********************** FC-NVME DMA Handling **************************** */ 571 572 /* 573 * The fcloop device passes in a NULL device pointer. Real LLD's will 574 * pass in a valid device pointer. If NULL is passed to the dma mapping 575 * routines, depending on the platform, it may or may not succeed, and 576 * may crash. 577 * 578 * As such: 579 * Wrapper all the dma routines and check the dev pointer. 580 * 581 * If simple mappings (return just a dma address, we'll noop them, 582 * returning a dma address of 0. 583 * 584 * On more complex mappings (dma_map_sg), a pseudo routine fills 585 * in the scatter list, setting all dma addresses to 0. 586 */ 587 588 static inline dma_addr_t 589 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 590 enum dma_data_direction dir) 591 { 592 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 593 } 594 595 static inline int 596 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 597 { 598 return dev ? dma_mapping_error(dev, dma_addr) : 0; 599 } 600 601 static inline void 602 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 603 enum dma_data_direction dir) 604 { 605 if (dev) 606 dma_unmap_single(dev, addr, size, dir); 607 } 608 609 static inline void 610 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 611 enum dma_data_direction dir) 612 { 613 if (dev) 614 dma_sync_single_for_cpu(dev, addr, size, dir); 615 } 616 617 static inline void 618 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 619 enum dma_data_direction dir) 620 { 621 if (dev) 622 dma_sync_single_for_device(dev, addr, size, dir); 623 } 624 625 /* pseudo dma_map_sg call */ 626 static int 627 fc_map_sg(struct scatterlist *sg, int nents) 628 { 629 struct scatterlist *s; 630 int i; 631 632 WARN_ON(nents == 0 || sg[0].length == 0); 633 634 for_each_sg(sg, s, nents, i) { 635 s->dma_address = 0L; 636 #ifdef CONFIG_NEED_SG_DMA_LENGTH 637 s->dma_length = s->length; 638 #endif 639 } 640 return nents; 641 } 642 643 static inline int 644 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 645 enum dma_data_direction dir) 646 { 647 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 648 } 649 650 static inline void 651 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 652 enum dma_data_direction dir) 653 { 654 if (dev) 655 dma_unmap_sg(dev, sg, nents, dir); 656 } 657 658 659 /* *********************** FC-NVME LS Handling **************************** */ 660 661 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 662 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 663 664 665 static void 666 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 667 { 668 struct nvme_fc_rport *rport = lsop->rport; 669 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 670 unsigned long flags; 671 672 spin_lock_irqsave(&rport->lock, flags); 673 674 if (!lsop->req_queued) { 675 spin_unlock_irqrestore(&rport->lock, flags); 676 return; 677 } 678 679 list_del(&lsop->lsreq_list); 680 681 lsop->req_queued = false; 682 683 spin_unlock_irqrestore(&rport->lock, flags); 684 685 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 686 (lsreq->rqstlen + lsreq->rsplen), 687 DMA_BIDIRECTIONAL); 688 689 nvme_fc_rport_put(rport); 690 } 691 692 static int 693 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 694 struct nvmefc_ls_req_op *lsop, 695 void (*done)(struct nvmefc_ls_req *req, int status)) 696 { 697 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 698 unsigned long flags; 699 int ret = 0; 700 701 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 702 return -ECONNREFUSED; 703 704 if (!nvme_fc_rport_get(rport)) 705 return -ESHUTDOWN; 706 707 lsreq->done = done; 708 lsop->rport = rport; 709 lsop->req_queued = false; 710 INIT_LIST_HEAD(&lsop->lsreq_list); 711 init_completion(&lsop->ls_done); 712 713 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 714 lsreq->rqstlen + lsreq->rsplen, 715 DMA_BIDIRECTIONAL); 716 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 717 ret = -EFAULT; 718 goto out_putrport; 719 } 720 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 721 722 spin_lock_irqsave(&rport->lock, flags); 723 724 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 725 726 lsop->req_queued = true; 727 728 spin_unlock_irqrestore(&rport->lock, flags); 729 730 ret = rport->lport->ops->ls_req(&rport->lport->localport, 731 &rport->remoteport, lsreq); 732 if (ret) 733 goto out_unlink; 734 735 return 0; 736 737 out_unlink: 738 lsop->ls_error = ret; 739 spin_lock_irqsave(&rport->lock, flags); 740 lsop->req_queued = false; 741 list_del(&lsop->lsreq_list); 742 spin_unlock_irqrestore(&rport->lock, flags); 743 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 744 (lsreq->rqstlen + lsreq->rsplen), 745 DMA_BIDIRECTIONAL); 746 out_putrport: 747 nvme_fc_rport_put(rport); 748 749 return ret; 750 } 751 752 static void 753 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 754 { 755 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 756 757 lsop->ls_error = status; 758 complete(&lsop->ls_done); 759 } 760 761 static int 762 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 763 { 764 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 765 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 766 int ret; 767 768 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 769 770 if (!ret) { 771 /* 772 * No timeout/not interruptible as we need the struct 773 * to exist until the lldd calls us back. Thus mandate 774 * wait until driver calls back. lldd responsible for 775 * the timeout action 776 */ 777 wait_for_completion(&lsop->ls_done); 778 779 __nvme_fc_finish_ls_req(lsop); 780 781 ret = lsop->ls_error; 782 } 783 784 if (ret) 785 return ret; 786 787 /* ACC or RJT payload ? */ 788 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 789 return -ENXIO; 790 791 return 0; 792 } 793 794 static int 795 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 796 struct nvmefc_ls_req_op *lsop, 797 void (*done)(struct nvmefc_ls_req *req, int status)) 798 { 799 /* don't wait for completion */ 800 801 return __nvme_fc_send_ls_req(rport, lsop, done); 802 } 803 804 /* Validation Error indexes into the string table below */ 805 enum { 806 VERR_NO_ERROR = 0, 807 VERR_LSACC = 1, 808 VERR_LSDESC_RQST = 2, 809 VERR_LSDESC_RQST_LEN = 3, 810 VERR_ASSOC_ID = 4, 811 VERR_ASSOC_ID_LEN = 5, 812 VERR_CONN_ID = 6, 813 VERR_CONN_ID_LEN = 7, 814 VERR_CR_ASSOC = 8, 815 VERR_CR_ASSOC_ACC_LEN = 9, 816 VERR_CR_CONN = 10, 817 VERR_CR_CONN_ACC_LEN = 11, 818 VERR_DISCONN = 12, 819 VERR_DISCONN_ACC_LEN = 13, 820 }; 821 822 static char *validation_errors[] = { 823 "OK", 824 "Not LS_ACC", 825 "Not LSDESC_RQST", 826 "Bad LSDESC_RQST Length", 827 "Not Association ID", 828 "Bad Association ID Length", 829 "Not Connection ID", 830 "Bad Connection ID Length", 831 "Not CR_ASSOC Rqst", 832 "Bad CR_ASSOC ACC Length", 833 "Not CR_CONN Rqst", 834 "Bad CR_CONN ACC Length", 835 "Not Disconnect Rqst", 836 "Bad Disconnect ACC Length", 837 }; 838 839 static int 840 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 841 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 842 { 843 struct nvmefc_ls_req_op *lsop; 844 struct nvmefc_ls_req *lsreq; 845 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 846 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 847 int ret, fcret = 0; 848 849 lsop = kzalloc((sizeof(*lsop) + 850 ctrl->lport->ops->lsrqst_priv_sz + 851 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 852 if (!lsop) { 853 ret = -ENOMEM; 854 goto out_no_memory; 855 } 856 lsreq = &lsop->ls_req; 857 858 lsreq->private = (void *)&lsop[1]; 859 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 860 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 861 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 862 863 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 864 assoc_rqst->desc_list_len = 865 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 866 867 assoc_rqst->assoc_cmd.desc_tag = 868 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 869 assoc_rqst->assoc_cmd.desc_len = 870 fcnvme_lsdesc_len( 871 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 872 873 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 874 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize); 875 /* Linux supports only Dynamic controllers */ 876 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 877 uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); 878 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 879 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 880 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 881 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 882 883 lsop->queue = queue; 884 lsreq->rqstaddr = assoc_rqst; 885 lsreq->rqstlen = sizeof(*assoc_rqst); 886 lsreq->rspaddr = assoc_acc; 887 lsreq->rsplen = sizeof(*assoc_acc); 888 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 889 890 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 891 if (ret) 892 goto out_free_buffer; 893 894 /* process connect LS completion */ 895 896 /* validate the ACC response */ 897 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 898 fcret = VERR_LSACC; 899 else if (assoc_acc->hdr.desc_list_len != 900 fcnvme_lsdesc_len( 901 sizeof(struct fcnvme_ls_cr_assoc_acc))) 902 fcret = VERR_CR_ASSOC_ACC_LEN; 903 else if (assoc_acc->hdr.rqst.desc_tag != 904 cpu_to_be32(FCNVME_LSDESC_RQST)) 905 fcret = VERR_LSDESC_RQST; 906 else if (assoc_acc->hdr.rqst.desc_len != 907 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 908 fcret = VERR_LSDESC_RQST_LEN; 909 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 910 fcret = VERR_CR_ASSOC; 911 else if (assoc_acc->associd.desc_tag != 912 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 913 fcret = VERR_ASSOC_ID; 914 else if (assoc_acc->associd.desc_len != 915 fcnvme_lsdesc_len( 916 sizeof(struct fcnvme_lsdesc_assoc_id))) 917 fcret = VERR_ASSOC_ID_LEN; 918 else if (assoc_acc->connectid.desc_tag != 919 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 920 fcret = VERR_CONN_ID; 921 else if (assoc_acc->connectid.desc_len != 922 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 923 fcret = VERR_CONN_ID_LEN; 924 925 if (fcret) { 926 ret = -EBADF; 927 dev_err(ctrl->dev, 928 "q %d connect failed: %s\n", 929 queue->qnum, validation_errors[fcret]); 930 } else { 931 ctrl->association_id = 932 be64_to_cpu(assoc_acc->associd.association_id); 933 queue->connection_id = 934 be64_to_cpu(assoc_acc->connectid.connection_id); 935 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 936 } 937 938 out_free_buffer: 939 kfree(lsop); 940 out_no_memory: 941 if (ret) 942 dev_err(ctrl->dev, 943 "queue %d connect admin queue failed (%d).\n", 944 queue->qnum, ret); 945 return ret; 946 } 947 948 static int 949 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 950 u16 qsize, u16 ersp_ratio) 951 { 952 struct nvmefc_ls_req_op *lsop; 953 struct nvmefc_ls_req *lsreq; 954 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 955 struct fcnvme_ls_cr_conn_acc *conn_acc; 956 int ret, fcret = 0; 957 958 lsop = kzalloc((sizeof(*lsop) + 959 ctrl->lport->ops->lsrqst_priv_sz + 960 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 961 if (!lsop) { 962 ret = -ENOMEM; 963 goto out_no_memory; 964 } 965 lsreq = &lsop->ls_req; 966 967 lsreq->private = (void *)&lsop[1]; 968 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 969 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 970 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 971 972 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 973 conn_rqst->desc_list_len = cpu_to_be32( 974 sizeof(struct fcnvme_lsdesc_assoc_id) + 975 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 976 977 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 978 conn_rqst->associd.desc_len = 979 fcnvme_lsdesc_len( 980 sizeof(struct fcnvme_lsdesc_assoc_id)); 981 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 982 conn_rqst->connect_cmd.desc_tag = 983 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 984 conn_rqst->connect_cmd.desc_len = 985 fcnvme_lsdesc_len( 986 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 987 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 988 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 989 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize); 990 991 lsop->queue = queue; 992 lsreq->rqstaddr = conn_rqst; 993 lsreq->rqstlen = sizeof(*conn_rqst); 994 lsreq->rspaddr = conn_acc; 995 lsreq->rsplen = sizeof(*conn_acc); 996 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 997 998 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 999 if (ret) 1000 goto out_free_buffer; 1001 1002 /* process connect LS completion */ 1003 1004 /* validate the ACC response */ 1005 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1006 fcret = VERR_LSACC; 1007 else if (conn_acc->hdr.desc_list_len != 1008 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1009 fcret = VERR_CR_CONN_ACC_LEN; 1010 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1011 fcret = VERR_LSDESC_RQST; 1012 else if (conn_acc->hdr.rqst.desc_len != 1013 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1014 fcret = VERR_LSDESC_RQST_LEN; 1015 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1016 fcret = VERR_CR_CONN; 1017 else if (conn_acc->connectid.desc_tag != 1018 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1019 fcret = VERR_CONN_ID; 1020 else if (conn_acc->connectid.desc_len != 1021 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1022 fcret = VERR_CONN_ID_LEN; 1023 1024 if (fcret) { 1025 ret = -EBADF; 1026 dev_err(ctrl->dev, 1027 "q %d connect failed: %s\n", 1028 queue->qnum, validation_errors[fcret]); 1029 } else { 1030 queue->connection_id = 1031 be64_to_cpu(conn_acc->connectid.connection_id); 1032 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1033 } 1034 1035 out_free_buffer: 1036 kfree(lsop); 1037 out_no_memory: 1038 if (ret) 1039 dev_err(ctrl->dev, 1040 "queue %d connect command failed (%d).\n", 1041 queue->qnum, ret); 1042 return ret; 1043 } 1044 1045 static void 1046 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1047 { 1048 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1049 1050 __nvme_fc_finish_ls_req(lsop); 1051 1052 /* fc-nvme iniator doesn't care about success or failure of cmd */ 1053 1054 kfree(lsop); 1055 } 1056 1057 /* 1058 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1059 * the FC-NVME Association. Terminating the association also 1060 * terminates the FC-NVME connections (per queue, both admin and io 1061 * queues) that are part of the association. E.g. things are torn 1062 * down, and the related FC-NVME Association ID and Connection IDs 1063 * become invalid. 1064 * 1065 * The behavior of the fc-nvme initiator is such that it's 1066 * understanding of the association and connections will implicitly 1067 * be torn down. The action is implicit as it may be due to a loss of 1068 * connectivity with the fc-nvme target, so you may never get a 1069 * response even if you tried. As such, the action of this routine 1070 * is to asynchronously send the LS, ignore any results of the LS, and 1071 * continue on with terminating the association. If the fc-nvme target 1072 * is present and receives the LS, it too can tear down. 1073 */ 1074 static void 1075 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1076 { 1077 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1078 struct fcnvme_ls_disconnect_acc *discon_acc; 1079 struct nvmefc_ls_req_op *lsop; 1080 struct nvmefc_ls_req *lsreq; 1081 int ret; 1082 1083 lsop = kzalloc((sizeof(*lsop) + 1084 ctrl->lport->ops->lsrqst_priv_sz + 1085 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1086 GFP_KERNEL); 1087 if (!lsop) 1088 /* couldn't sent it... too bad */ 1089 return; 1090 1091 lsreq = &lsop->ls_req; 1092 1093 lsreq->private = (void *)&lsop[1]; 1094 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1095 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1096 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1097 1098 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1099 discon_rqst->desc_list_len = cpu_to_be32( 1100 sizeof(struct fcnvme_lsdesc_assoc_id) + 1101 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1102 1103 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1104 discon_rqst->associd.desc_len = 1105 fcnvme_lsdesc_len( 1106 sizeof(struct fcnvme_lsdesc_assoc_id)); 1107 1108 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1109 1110 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1111 FCNVME_LSDESC_DISCONN_CMD); 1112 discon_rqst->discon_cmd.desc_len = 1113 fcnvme_lsdesc_len( 1114 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1115 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1116 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1117 1118 lsreq->rqstaddr = discon_rqst; 1119 lsreq->rqstlen = sizeof(*discon_rqst); 1120 lsreq->rspaddr = discon_acc; 1121 lsreq->rsplen = sizeof(*discon_acc); 1122 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1123 1124 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1125 nvme_fc_disconnect_assoc_done); 1126 if (ret) 1127 kfree(lsop); 1128 1129 /* only meaningful part to terminating the association */ 1130 ctrl->association_id = 0; 1131 } 1132 1133 1134 /* *********************** NVME Ctrl Routines **************************** */ 1135 1136 static void __nvme_fc_final_op_cleanup(struct request *rq); 1137 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1138 1139 static int 1140 nvme_fc_reinit_request(void *data, struct request *rq) 1141 { 1142 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1143 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1144 1145 memset(cmdiu, 0, sizeof(*cmdiu)); 1146 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1147 cmdiu->fc_id = NVME_CMD_FC_ID; 1148 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1149 memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); 1150 1151 return 0; 1152 } 1153 1154 static void 1155 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1156 struct nvme_fc_fcp_op *op) 1157 { 1158 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1159 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1160 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1161 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1162 1163 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1164 } 1165 1166 static void 1167 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1168 unsigned int hctx_idx) 1169 { 1170 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1171 1172 return __nvme_fc_exit_request(set->driver_data, op); 1173 } 1174 1175 static int 1176 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1177 { 1178 int state; 1179 1180 state = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1181 if (state != FCPOP_STATE_ACTIVE) { 1182 atomic_set(&op->state, state); 1183 return -ECANCELED; 1184 } 1185 1186 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1187 &ctrl->rport->remoteport, 1188 op->queue->lldd_handle, 1189 &op->fcp_req); 1190 1191 return 0; 1192 } 1193 1194 static void 1195 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1196 { 1197 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1198 unsigned long flags; 1199 int i, ret; 1200 1201 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1202 if (atomic_read(&aen_op->state) != FCPOP_STATE_ACTIVE) 1203 continue; 1204 1205 spin_lock_irqsave(&ctrl->lock, flags); 1206 if (ctrl->flags & FCCTRL_TERMIO) { 1207 ctrl->iocnt++; 1208 aen_op->flags |= FCOP_FLAGS_TERMIO; 1209 } 1210 spin_unlock_irqrestore(&ctrl->lock, flags); 1211 1212 ret = __nvme_fc_abort_op(ctrl, aen_op); 1213 if (ret) { 1214 /* 1215 * if __nvme_fc_abort_op failed the io wasn't 1216 * active. Thus this call path is running in 1217 * parallel to the io complete. Treat as non-error. 1218 */ 1219 1220 /* back out the flags/counters */ 1221 spin_lock_irqsave(&ctrl->lock, flags); 1222 if (ctrl->flags & FCCTRL_TERMIO) 1223 ctrl->iocnt--; 1224 aen_op->flags &= ~FCOP_FLAGS_TERMIO; 1225 spin_unlock_irqrestore(&ctrl->lock, flags); 1226 return; 1227 } 1228 } 1229 } 1230 1231 static inline int 1232 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1233 struct nvme_fc_fcp_op *op) 1234 { 1235 unsigned long flags; 1236 bool complete_rq = false; 1237 1238 spin_lock_irqsave(&ctrl->lock, flags); 1239 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1240 if (ctrl->flags & FCCTRL_TERMIO) { 1241 if (!--ctrl->iocnt) 1242 wake_up(&ctrl->ioabort_wait); 1243 } 1244 } 1245 if (op->flags & FCOP_FLAGS_RELEASED) 1246 complete_rq = true; 1247 else 1248 op->flags |= FCOP_FLAGS_COMPLETE; 1249 spin_unlock_irqrestore(&ctrl->lock, flags); 1250 1251 return complete_rq; 1252 } 1253 1254 static void 1255 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1256 { 1257 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1258 struct request *rq = op->rq; 1259 struct nvmefc_fcp_req *freq = &op->fcp_req; 1260 struct nvme_fc_ctrl *ctrl = op->ctrl; 1261 struct nvme_fc_queue *queue = op->queue; 1262 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1263 struct nvme_command *sqe = &op->cmd_iu.sqe; 1264 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1265 union nvme_result result; 1266 bool complete_rq, terminate_assoc = true; 1267 1268 /* 1269 * WARNING: 1270 * The current linux implementation of a nvme controller 1271 * allocates a single tag set for all io queues and sizes 1272 * the io queues to fully hold all possible tags. Thus, the 1273 * implementation does not reference or care about the sqhd 1274 * value as it never needs to use the sqhd/sqtail pointers 1275 * for submission pacing. 1276 * 1277 * This affects the FC-NVME implementation in two ways: 1278 * 1) As the value doesn't matter, we don't need to waste 1279 * cycles extracting it from ERSPs and stamping it in the 1280 * cases where the transport fabricates CQEs on successful 1281 * completions. 1282 * 2) The FC-NVME implementation requires that delivery of 1283 * ERSP completions are to go back to the nvme layer in order 1284 * relative to the rsn, such that the sqhd value will always 1285 * be "in order" for the nvme layer. As the nvme layer in 1286 * linux doesn't care about sqhd, there's no need to return 1287 * them in order. 1288 * 1289 * Additionally: 1290 * As the core nvme layer in linux currently does not look at 1291 * every field in the cqe - in cases where the FC transport must 1292 * fabricate a CQE, the following fields will not be set as they 1293 * are not referenced: 1294 * cqe.sqid, cqe.sqhd, cqe.command_id 1295 * 1296 * Failure or error of an individual i/o, in a transport 1297 * detected fashion unrelated to the nvme completion status, 1298 * potentially cause the initiator and target sides to get out 1299 * of sync on SQ head/tail (aka outstanding io count allowed). 1300 * Per FC-NVME spec, failure of an individual command requires 1301 * the connection to be terminated, which in turn requires the 1302 * association to be terminated. 1303 */ 1304 1305 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1306 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1307 1308 if (atomic_read(&op->state) == FCPOP_STATE_ABORTED) 1309 status = cpu_to_le16((NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1); 1310 else if (freq->status) 1311 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1312 1313 /* 1314 * For the linux implementation, if we have an unsuccesful 1315 * status, they blk-mq layer can typically be called with the 1316 * non-zero status and the content of the cqe isn't important. 1317 */ 1318 if (status) 1319 goto done; 1320 1321 /* 1322 * command completed successfully relative to the wire 1323 * protocol. However, validate anything received and 1324 * extract the status and result from the cqe (create it 1325 * where necessary). 1326 */ 1327 1328 switch (freq->rcv_rsplen) { 1329 1330 case 0: 1331 case NVME_FC_SIZEOF_ZEROS_RSP: 1332 /* 1333 * No response payload or 12 bytes of payload (which 1334 * should all be zeros) are considered successful and 1335 * no payload in the CQE by the transport. 1336 */ 1337 if (freq->transferred_length != 1338 be32_to_cpu(op->cmd_iu.data_len)) { 1339 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1340 goto done; 1341 } 1342 result.u64 = 0; 1343 break; 1344 1345 case sizeof(struct nvme_fc_ersp_iu): 1346 /* 1347 * The ERSP IU contains a full completion with CQE. 1348 * Validate ERSP IU and look at cqe. 1349 */ 1350 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1351 (freq->rcv_rsplen / 4) || 1352 be32_to_cpu(op->rsp_iu.xfrd_len) != 1353 freq->transferred_length || 1354 op->rsp_iu.status_code || 1355 sqe->common.command_id != cqe->command_id)) { 1356 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1357 goto done; 1358 } 1359 result = cqe->result; 1360 status = cqe->status; 1361 break; 1362 1363 default: 1364 status = cpu_to_le16(NVME_SC_FC_TRANSPORT_ERROR << 1); 1365 goto done; 1366 } 1367 1368 terminate_assoc = false; 1369 1370 done: 1371 if (op->flags & FCOP_FLAGS_AEN) { 1372 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1373 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1374 atomic_set(&op->state, FCPOP_STATE_IDLE); 1375 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1376 nvme_fc_ctrl_put(ctrl); 1377 goto check_error; 1378 } 1379 1380 complete_rq = __nvme_fc_fcpop_chk_teardowns(ctrl, op); 1381 if (!complete_rq) { 1382 if (unlikely(op->flags & FCOP_FLAGS_TERMIO)) { 1383 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1384 if (blk_queue_dying(rq->q)) 1385 status |= cpu_to_le16(NVME_SC_DNR << 1); 1386 } 1387 nvme_end_request(rq, status, result); 1388 } else 1389 __nvme_fc_final_op_cleanup(rq); 1390 1391 check_error: 1392 if (terminate_assoc) 1393 nvme_fc_error_recovery(ctrl, "transport detected io error"); 1394 } 1395 1396 static int 1397 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1398 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1399 struct request *rq, u32 rqno) 1400 { 1401 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1402 int ret = 0; 1403 1404 memset(op, 0, sizeof(*op)); 1405 op->fcp_req.cmdaddr = &op->cmd_iu; 1406 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1407 op->fcp_req.rspaddr = &op->rsp_iu; 1408 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1409 op->fcp_req.done = nvme_fc_fcpio_done; 1410 op->fcp_req.first_sgl = (struct scatterlist *)&op[1]; 1411 op->fcp_req.private = &op->fcp_req.first_sgl[SG_CHUNK_SIZE]; 1412 op->ctrl = ctrl; 1413 op->queue = queue; 1414 op->rq = rq; 1415 op->rqno = rqno; 1416 1417 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1418 cmdiu->fc_id = NVME_CMD_FC_ID; 1419 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1420 1421 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1422 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1423 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1424 dev_err(ctrl->dev, 1425 "FCP Op failed - cmdiu dma mapping failed.\n"); 1426 ret = EFAULT; 1427 goto out_on_error; 1428 } 1429 1430 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1431 &op->rsp_iu, sizeof(op->rsp_iu), 1432 DMA_FROM_DEVICE); 1433 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1434 dev_err(ctrl->dev, 1435 "FCP Op failed - rspiu dma mapping failed.\n"); 1436 ret = EFAULT; 1437 } 1438 1439 atomic_set(&op->state, FCPOP_STATE_IDLE); 1440 out_on_error: 1441 return ret; 1442 } 1443 1444 static int 1445 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1446 unsigned int hctx_idx, unsigned int numa_node) 1447 { 1448 struct nvme_fc_ctrl *ctrl = set->driver_data; 1449 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1450 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 1451 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; 1452 1453 return __nvme_fc_init_request(ctrl, queue, op, rq, queue->rqcnt++); 1454 } 1455 1456 static int 1457 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1458 { 1459 struct nvme_fc_fcp_op *aen_op; 1460 struct nvme_fc_cmd_iu *cmdiu; 1461 struct nvme_command *sqe; 1462 void *private; 1463 int i, ret; 1464 1465 aen_op = ctrl->aen_ops; 1466 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1467 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1468 GFP_KERNEL); 1469 if (!private) 1470 return -ENOMEM; 1471 1472 cmdiu = &aen_op->cmd_iu; 1473 sqe = &cmdiu->sqe; 1474 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1475 aen_op, (struct request *)NULL, 1476 (AEN_CMDID_BASE + i)); 1477 if (ret) { 1478 kfree(private); 1479 return ret; 1480 } 1481 1482 aen_op->flags = FCOP_FLAGS_AEN; 1483 aen_op->fcp_req.first_sgl = NULL; /* no sg list */ 1484 aen_op->fcp_req.private = private; 1485 1486 memset(sqe, 0, sizeof(*sqe)); 1487 sqe->common.opcode = nvme_admin_async_event; 1488 /* Note: core layer may overwrite the sqe.command_id value */ 1489 sqe->common.command_id = AEN_CMDID_BASE + i; 1490 } 1491 return 0; 1492 } 1493 1494 static void 1495 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1496 { 1497 struct nvme_fc_fcp_op *aen_op; 1498 int i; 1499 1500 aen_op = ctrl->aen_ops; 1501 for (i = 0; i < NVME_FC_NR_AEN_COMMANDS; i++, aen_op++) { 1502 if (!aen_op->fcp_req.private) 1503 continue; 1504 1505 __nvme_fc_exit_request(ctrl, aen_op); 1506 1507 kfree(aen_op->fcp_req.private); 1508 aen_op->fcp_req.private = NULL; 1509 } 1510 } 1511 1512 static inline void 1513 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1514 unsigned int qidx) 1515 { 1516 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1517 1518 hctx->driver_data = queue; 1519 queue->hctx = hctx; 1520 } 1521 1522 static int 1523 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1524 unsigned int hctx_idx) 1525 { 1526 struct nvme_fc_ctrl *ctrl = data; 1527 1528 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1529 1530 return 0; 1531 } 1532 1533 static int 1534 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1535 unsigned int hctx_idx) 1536 { 1537 struct nvme_fc_ctrl *ctrl = data; 1538 1539 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1540 1541 return 0; 1542 } 1543 1544 static void 1545 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx, size_t queue_size) 1546 { 1547 struct nvme_fc_queue *queue; 1548 1549 queue = &ctrl->queues[idx]; 1550 memset(queue, 0, sizeof(*queue)); 1551 queue->ctrl = ctrl; 1552 queue->qnum = idx; 1553 atomic_set(&queue->csn, 1); 1554 queue->dev = ctrl->dev; 1555 1556 if (idx > 0) 1557 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1558 else 1559 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1560 1561 queue->queue_size = queue_size; 1562 1563 /* 1564 * Considered whether we should allocate buffers for all SQEs 1565 * and CQEs and dma map them - mapping their respective entries 1566 * into the request structures (kernel vm addr and dma address) 1567 * thus the driver could use the buffers/mappings directly. 1568 * It only makes sense if the LLDD would use them for its 1569 * messaging api. It's very unlikely most adapter api's would use 1570 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1571 * structures were used instead. 1572 */ 1573 } 1574 1575 /* 1576 * This routine terminates a queue at the transport level. 1577 * The transport has already ensured that all outstanding ios on 1578 * the queue have been terminated. 1579 * The transport will send a Disconnect LS request to terminate 1580 * the queue's connection. Termination of the admin queue will also 1581 * terminate the association at the target. 1582 */ 1583 static void 1584 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1585 { 1586 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1587 return; 1588 1589 /* 1590 * Current implementation never disconnects a single queue. 1591 * It always terminates a whole association. So there is never 1592 * a disconnect(queue) LS sent to the target. 1593 */ 1594 1595 queue->connection_id = 0; 1596 clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1597 } 1598 1599 static void 1600 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1601 struct nvme_fc_queue *queue, unsigned int qidx) 1602 { 1603 if (ctrl->lport->ops->delete_queue) 1604 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1605 queue->lldd_handle); 1606 queue->lldd_handle = NULL; 1607 } 1608 1609 static void 1610 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1611 { 1612 int i; 1613 1614 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1615 nvme_fc_free_queue(&ctrl->queues[i]); 1616 } 1617 1618 static int 1619 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1620 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1621 { 1622 int ret = 0; 1623 1624 queue->lldd_handle = NULL; 1625 if (ctrl->lport->ops->create_queue) 1626 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1627 qidx, qsize, &queue->lldd_handle); 1628 1629 return ret; 1630 } 1631 1632 static void 1633 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1634 { 1635 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; 1636 int i; 1637 1638 for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) 1639 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1640 } 1641 1642 static int 1643 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1644 { 1645 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1646 int i, ret; 1647 1648 for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { 1649 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1650 if (ret) 1651 goto delete_queues; 1652 } 1653 1654 return 0; 1655 1656 delete_queues: 1657 for (; i >= 0; i--) 1658 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1659 return ret; 1660 } 1661 1662 static int 1663 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1664 { 1665 int i, ret = 0; 1666 1667 for (i = 1; i < ctrl->ctrl.queue_count; i++) { 1668 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1669 (qsize / 5)); 1670 if (ret) 1671 break; 1672 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 1673 if (ret) 1674 break; 1675 } 1676 1677 return ret; 1678 } 1679 1680 static void 1681 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1682 { 1683 int i; 1684 1685 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1686 nvme_fc_init_queue(ctrl, i, ctrl->ctrl.sqsize); 1687 } 1688 1689 static void 1690 nvme_fc_ctrl_free(struct kref *ref) 1691 { 1692 struct nvme_fc_ctrl *ctrl = 1693 container_of(ref, struct nvme_fc_ctrl, ref); 1694 unsigned long flags; 1695 1696 if (ctrl->ctrl.tagset) { 1697 blk_cleanup_queue(ctrl->ctrl.connect_q); 1698 blk_mq_free_tag_set(&ctrl->tag_set); 1699 } 1700 1701 /* remove from rport list */ 1702 spin_lock_irqsave(&ctrl->rport->lock, flags); 1703 list_del(&ctrl->ctrl_list); 1704 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 1705 1706 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 1707 blk_cleanup_queue(ctrl->ctrl.admin_q); 1708 blk_mq_free_tag_set(&ctrl->admin_tag_set); 1709 1710 kfree(ctrl->queues); 1711 1712 put_device(ctrl->dev); 1713 nvme_fc_rport_put(ctrl->rport); 1714 1715 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 1716 if (ctrl->ctrl.opts) 1717 nvmf_free_options(ctrl->ctrl.opts); 1718 kfree(ctrl); 1719 } 1720 1721 static void 1722 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 1723 { 1724 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 1725 } 1726 1727 static int 1728 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 1729 { 1730 return kref_get_unless_zero(&ctrl->ref); 1731 } 1732 1733 /* 1734 * All accesses from nvme core layer done - can now free the 1735 * controller. Called after last nvme_put_ctrl() call 1736 */ 1737 static void 1738 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 1739 { 1740 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 1741 1742 WARN_ON(nctrl != &ctrl->ctrl); 1743 1744 nvme_fc_ctrl_put(ctrl); 1745 } 1746 1747 static void 1748 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 1749 { 1750 /* only proceed if in LIVE state - e.g. on first error */ 1751 if (ctrl->ctrl.state != NVME_CTRL_LIVE) 1752 return; 1753 1754 dev_warn(ctrl->ctrl.device, 1755 "NVME-FC{%d}: transport association error detected: %s\n", 1756 ctrl->cnum, errmsg); 1757 dev_warn(ctrl->ctrl.device, 1758 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 1759 1760 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RECONNECTING)) { 1761 dev_err(ctrl->ctrl.device, 1762 "NVME-FC{%d}: error_recovery: Couldn't change state " 1763 "to RECONNECTING\n", ctrl->cnum); 1764 return; 1765 } 1766 1767 nvme_reset_ctrl(&ctrl->ctrl); 1768 } 1769 1770 static enum blk_eh_timer_return 1771 nvme_fc_timeout(struct request *rq, bool reserved) 1772 { 1773 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1774 struct nvme_fc_ctrl *ctrl = op->ctrl; 1775 int ret; 1776 1777 if (reserved) 1778 return BLK_EH_RESET_TIMER; 1779 1780 ret = __nvme_fc_abort_op(ctrl, op); 1781 if (ret) 1782 /* io wasn't active to abort consider it done */ 1783 return BLK_EH_HANDLED; 1784 1785 /* 1786 * we can't individually ABTS an io without affecting the queue, 1787 * thus killing the queue, adn thus the association. 1788 * So resolve by performing a controller reset, which will stop 1789 * the host/io stack, terminate the association on the link, 1790 * and recreate an association on the link. 1791 */ 1792 nvme_fc_error_recovery(ctrl, "io timeout error"); 1793 1794 return BLK_EH_HANDLED; 1795 } 1796 1797 static int 1798 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1799 struct nvme_fc_fcp_op *op) 1800 { 1801 struct nvmefc_fcp_req *freq = &op->fcp_req; 1802 enum dma_data_direction dir; 1803 int ret; 1804 1805 freq->sg_cnt = 0; 1806 1807 if (!blk_rq_payload_bytes(rq)) 1808 return 0; 1809 1810 freq->sg_table.sgl = freq->first_sgl; 1811 ret = sg_alloc_table_chained(&freq->sg_table, 1812 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 1813 if (ret) 1814 return -ENOMEM; 1815 1816 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 1817 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 1818 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 1819 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 1820 op->nents, dir); 1821 if (unlikely(freq->sg_cnt <= 0)) { 1822 sg_free_table_chained(&freq->sg_table, true); 1823 freq->sg_cnt = 0; 1824 return -EFAULT; 1825 } 1826 1827 /* 1828 * TODO: blk_integrity_rq(rq) for DIF 1829 */ 1830 return 0; 1831 } 1832 1833 static void 1834 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 1835 struct nvme_fc_fcp_op *op) 1836 { 1837 struct nvmefc_fcp_req *freq = &op->fcp_req; 1838 1839 if (!freq->sg_cnt) 1840 return; 1841 1842 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 1843 ((rq_data_dir(rq) == WRITE) ? 1844 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 1845 1846 nvme_cleanup_cmd(rq); 1847 1848 sg_free_table_chained(&freq->sg_table, true); 1849 1850 freq->sg_cnt = 0; 1851 } 1852 1853 /* 1854 * In FC, the queue is a logical thing. At transport connect, the target 1855 * creates its "queue" and returns a handle that is to be given to the 1856 * target whenever it posts something to the corresponding SQ. When an 1857 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 1858 * command contained within the SQE, an io, and assigns a FC exchange 1859 * to it. The SQE and the associated SQ handle are sent in the initial 1860 * CMD IU sents on the exchange. All transfers relative to the io occur 1861 * as part of the exchange. The CQE is the last thing for the io, 1862 * which is transferred (explicitly or implicitly) with the RSP IU 1863 * sent on the exchange. After the CQE is received, the FC exchange is 1864 * terminaed and the Exchange may be used on a different io. 1865 * 1866 * The transport to LLDD api has the transport making a request for a 1867 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 1868 * resource and transfers the command. The LLDD will then process all 1869 * steps to complete the io. Upon completion, the transport done routine 1870 * is called. 1871 * 1872 * So - while the operation is outstanding to the LLDD, there is a link 1873 * level FC exchange resource that is also outstanding. This must be 1874 * considered in all cleanup operations. 1875 */ 1876 static blk_status_t 1877 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1878 struct nvme_fc_fcp_op *op, u32 data_len, 1879 enum nvmefc_fcp_datadir io_dir) 1880 { 1881 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1882 struct nvme_command *sqe = &cmdiu->sqe; 1883 u32 csn; 1884 int ret; 1885 1886 /* 1887 * before attempting to send the io, check to see if we believe 1888 * the target device is present 1889 */ 1890 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1891 goto busy; 1892 1893 if (!nvme_fc_ctrl_get(ctrl)) 1894 return BLK_STS_IOERR; 1895 1896 /* format the FC-NVME CMD IU and fcp_req */ 1897 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 1898 csn = atomic_inc_return(&queue->csn); 1899 cmdiu->csn = cpu_to_be32(csn); 1900 cmdiu->data_len = cpu_to_be32(data_len); 1901 switch (io_dir) { 1902 case NVMEFC_FCP_WRITE: 1903 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 1904 break; 1905 case NVMEFC_FCP_READ: 1906 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 1907 break; 1908 case NVMEFC_FCP_NODATA: 1909 cmdiu->flags = 0; 1910 break; 1911 } 1912 op->fcp_req.payload_length = data_len; 1913 op->fcp_req.io_dir = io_dir; 1914 op->fcp_req.transferred_length = 0; 1915 op->fcp_req.rcv_rsplen = 0; 1916 op->fcp_req.status = NVME_SC_SUCCESS; 1917 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 1918 1919 /* 1920 * validate per fabric rules, set fields mandated by fabric spec 1921 * as well as those by FC-NVME spec. 1922 */ 1923 WARN_ON_ONCE(sqe->common.metadata); 1924 WARN_ON_ONCE(sqe->common.dptr.prp1); 1925 WARN_ON_ONCE(sqe->common.dptr.prp2); 1926 sqe->common.flags |= NVME_CMD_SGL_METABUF; 1927 1928 /* 1929 * format SQE DPTR field per FC-NVME rules 1930 * type=data block descr; subtype=offset; 1931 * offset is currently 0. 1932 */ 1933 sqe->rw.dptr.sgl.type = NVME_SGL_FMT_OFFSET; 1934 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 1935 sqe->rw.dptr.sgl.addr = 0; 1936 1937 if (!(op->flags & FCOP_FLAGS_AEN)) { 1938 ret = nvme_fc_map_data(ctrl, op->rq, op); 1939 if (ret < 0) { 1940 nvme_cleanup_cmd(op->rq); 1941 nvme_fc_ctrl_put(ctrl); 1942 if (ret == -ENOMEM || ret == -EAGAIN) 1943 return BLK_STS_RESOURCE; 1944 return BLK_STS_IOERR; 1945 } 1946 } 1947 1948 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 1949 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1950 1951 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 1952 1953 if (!(op->flags & FCOP_FLAGS_AEN)) 1954 blk_mq_start_request(op->rq); 1955 1956 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 1957 &ctrl->rport->remoteport, 1958 queue->lldd_handle, &op->fcp_req); 1959 1960 if (ret) { 1961 if (!(op->flags & FCOP_FLAGS_AEN)) 1962 nvme_fc_unmap_data(ctrl, op->rq, op); 1963 1964 nvme_fc_ctrl_put(ctrl); 1965 1966 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && 1967 ret != -EBUSY) 1968 return BLK_STS_IOERR; 1969 1970 goto busy; 1971 } 1972 1973 return BLK_STS_OK; 1974 1975 busy: 1976 if (!(op->flags & FCOP_FLAGS_AEN) && queue->hctx) 1977 blk_mq_delay_run_hw_queue(queue->hctx, NVMEFC_QUEUE_DELAY); 1978 1979 return BLK_STS_RESOURCE; 1980 } 1981 1982 static blk_status_t 1983 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 1984 const struct blk_mq_queue_data *bd) 1985 { 1986 struct nvme_ns *ns = hctx->queue->queuedata; 1987 struct nvme_fc_queue *queue = hctx->driver_data; 1988 struct nvme_fc_ctrl *ctrl = queue->ctrl; 1989 struct request *rq = bd->rq; 1990 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1991 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1992 struct nvme_command *sqe = &cmdiu->sqe; 1993 enum nvmefc_fcp_datadir io_dir; 1994 u32 data_len; 1995 blk_status_t ret; 1996 1997 ret = nvme_setup_cmd(ns, rq, sqe); 1998 if (ret) 1999 return ret; 2000 2001 data_len = blk_rq_payload_bytes(rq); 2002 if (data_len) 2003 io_dir = ((rq_data_dir(rq) == WRITE) ? 2004 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2005 else 2006 io_dir = NVMEFC_FCP_NODATA; 2007 2008 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2009 } 2010 2011 static struct blk_mq_tags * 2012 nvme_fc_tagset(struct nvme_fc_queue *queue) 2013 { 2014 if (queue->qnum == 0) 2015 return queue->ctrl->admin_tag_set.tags[queue->qnum]; 2016 2017 return queue->ctrl->tag_set.tags[queue->qnum - 1]; 2018 } 2019 2020 static int 2021 nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 2022 2023 { 2024 struct nvme_fc_queue *queue = hctx->driver_data; 2025 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2026 struct request *req; 2027 struct nvme_fc_fcp_op *op; 2028 2029 req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); 2030 if (!req) 2031 return 0; 2032 2033 op = blk_mq_rq_to_pdu(req); 2034 2035 if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && 2036 (ctrl->lport->ops->poll_queue)) 2037 ctrl->lport->ops->poll_queue(&ctrl->lport->localport, 2038 queue->lldd_handle); 2039 2040 return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); 2041 } 2042 2043 static void 2044 nvme_fc_submit_async_event(struct nvme_ctrl *arg, int aer_idx) 2045 { 2046 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2047 struct nvme_fc_fcp_op *aen_op; 2048 unsigned long flags; 2049 bool terminating = false; 2050 blk_status_t ret; 2051 2052 if (aer_idx > NVME_FC_NR_AEN_COMMANDS) 2053 return; 2054 2055 spin_lock_irqsave(&ctrl->lock, flags); 2056 if (ctrl->flags & FCCTRL_TERMIO) 2057 terminating = true; 2058 spin_unlock_irqrestore(&ctrl->lock, flags); 2059 2060 if (terminating) 2061 return; 2062 2063 aen_op = &ctrl->aen_ops[aer_idx]; 2064 2065 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2066 NVMEFC_FCP_NODATA); 2067 if (ret) 2068 dev_err(ctrl->ctrl.device, 2069 "failed async event work [%d]\n", aer_idx); 2070 } 2071 2072 static void 2073 __nvme_fc_final_op_cleanup(struct request *rq) 2074 { 2075 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2076 struct nvme_fc_ctrl *ctrl = op->ctrl; 2077 2078 atomic_set(&op->state, FCPOP_STATE_IDLE); 2079 op->flags &= ~(FCOP_FLAGS_TERMIO | FCOP_FLAGS_RELEASED | 2080 FCOP_FLAGS_COMPLETE); 2081 2082 nvme_fc_unmap_data(ctrl, rq, op); 2083 nvme_complete_rq(rq); 2084 nvme_fc_ctrl_put(ctrl); 2085 2086 } 2087 2088 static void 2089 nvme_fc_complete_rq(struct request *rq) 2090 { 2091 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2092 struct nvme_fc_ctrl *ctrl = op->ctrl; 2093 unsigned long flags; 2094 bool completed = false; 2095 2096 /* 2097 * the core layer, on controller resets after calling 2098 * nvme_shutdown_ctrl(), calls complete_rq without our 2099 * calling blk_mq_complete_request(), thus there may still 2100 * be live i/o outstanding with the LLDD. Means transport has 2101 * to track complete calls vs fcpio_done calls to know what 2102 * path to take on completes and dones. 2103 */ 2104 spin_lock_irqsave(&ctrl->lock, flags); 2105 if (op->flags & FCOP_FLAGS_COMPLETE) 2106 completed = true; 2107 else 2108 op->flags |= FCOP_FLAGS_RELEASED; 2109 spin_unlock_irqrestore(&ctrl->lock, flags); 2110 2111 if (completed) 2112 __nvme_fc_final_op_cleanup(rq); 2113 } 2114 2115 /* 2116 * This routine is used by the transport when it needs to find active 2117 * io on a queue that is to be terminated. The transport uses 2118 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2119 * this routine to kill them on a 1 by 1 basis. 2120 * 2121 * As FC allocates FC exchange for each io, the transport must contact 2122 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2123 * After terminating the exchange the LLDD will call the transport's 2124 * normal io done path for the request, but it will have an aborted 2125 * status. The done path will return the io request back to the block 2126 * layer with an error status. 2127 */ 2128 static void 2129 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2130 { 2131 struct nvme_ctrl *nctrl = data; 2132 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2133 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2134 unsigned long flags; 2135 int status; 2136 2137 if (!blk_mq_request_started(req)) 2138 return; 2139 2140 spin_lock_irqsave(&ctrl->lock, flags); 2141 if (ctrl->flags & FCCTRL_TERMIO) { 2142 ctrl->iocnt++; 2143 op->flags |= FCOP_FLAGS_TERMIO; 2144 } 2145 spin_unlock_irqrestore(&ctrl->lock, flags); 2146 2147 status = __nvme_fc_abort_op(ctrl, op); 2148 if (status) { 2149 /* 2150 * if __nvme_fc_abort_op failed the io wasn't 2151 * active. Thus this call path is running in 2152 * parallel to the io complete. Treat as non-error. 2153 */ 2154 2155 /* back out the flags/counters */ 2156 spin_lock_irqsave(&ctrl->lock, flags); 2157 if (ctrl->flags & FCCTRL_TERMIO) 2158 ctrl->iocnt--; 2159 op->flags &= ~FCOP_FLAGS_TERMIO; 2160 spin_unlock_irqrestore(&ctrl->lock, flags); 2161 return; 2162 } 2163 } 2164 2165 2166 static const struct blk_mq_ops nvme_fc_mq_ops = { 2167 .queue_rq = nvme_fc_queue_rq, 2168 .complete = nvme_fc_complete_rq, 2169 .init_request = nvme_fc_init_request, 2170 .exit_request = nvme_fc_exit_request, 2171 .reinit_request = nvme_fc_reinit_request, 2172 .init_hctx = nvme_fc_init_hctx, 2173 .poll = nvme_fc_poll, 2174 .timeout = nvme_fc_timeout, 2175 }; 2176 2177 static int 2178 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2179 { 2180 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2181 unsigned int nr_io_queues; 2182 int ret; 2183 2184 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2185 ctrl->lport->ops->max_hw_queues); 2186 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2187 if (ret) { 2188 dev_info(ctrl->ctrl.device, 2189 "set_queue_count failed: %d\n", ret); 2190 return ret; 2191 } 2192 2193 ctrl->ctrl.queue_count = nr_io_queues + 1; 2194 if (!nr_io_queues) 2195 return 0; 2196 2197 nvme_fc_init_io_queues(ctrl); 2198 2199 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2200 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2201 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2202 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2203 ctrl->tag_set.numa_node = NUMA_NO_NODE; 2204 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2205 ctrl->tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2206 (SG_CHUNK_SIZE * 2207 sizeof(struct scatterlist)) + 2208 ctrl->lport->ops->fcprqst_priv_sz; 2209 ctrl->tag_set.driver_data = ctrl; 2210 ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; 2211 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2212 2213 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2214 if (ret) 2215 return ret; 2216 2217 ctrl->ctrl.tagset = &ctrl->tag_set; 2218 2219 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2220 if (IS_ERR(ctrl->ctrl.connect_q)) { 2221 ret = PTR_ERR(ctrl->ctrl.connect_q); 2222 goto out_free_tag_set; 2223 } 2224 2225 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2226 if (ret) 2227 goto out_cleanup_blk_queue; 2228 2229 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2230 if (ret) 2231 goto out_delete_hw_queues; 2232 2233 return 0; 2234 2235 out_delete_hw_queues: 2236 nvme_fc_delete_hw_io_queues(ctrl); 2237 out_cleanup_blk_queue: 2238 blk_cleanup_queue(ctrl->ctrl.connect_q); 2239 out_free_tag_set: 2240 blk_mq_free_tag_set(&ctrl->tag_set); 2241 nvme_fc_free_io_queues(ctrl); 2242 2243 /* force put free routine to ignore io queues */ 2244 ctrl->ctrl.tagset = NULL; 2245 2246 return ret; 2247 } 2248 2249 static int 2250 nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2251 { 2252 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2253 unsigned int nr_io_queues; 2254 int ret; 2255 2256 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2257 ctrl->lport->ops->max_hw_queues); 2258 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2259 if (ret) { 2260 dev_info(ctrl->ctrl.device, 2261 "set_queue_count failed: %d\n", ret); 2262 return ret; 2263 } 2264 2265 ctrl->ctrl.queue_count = nr_io_queues + 1; 2266 /* check for io queues existing */ 2267 if (ctrl->ctrl.queue_count == 1) 2268 return 0; 2269 2270 nvme_fc_init_io_queues(ctrl); 2271 2272 ret = blk_mq_reinit_tagset(&ctrl->tag_set); 2273 if (ret) 2274 goto out_free_io_queues; 2275 2276 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2277 if (ret) 2278 goto out_free_io_queues; 2279 2280 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.opts->queue_size); 2281 if (ret) 2282 goto out_delete_hw_queues; 2283 2284 blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2285 2286 return 0; 2287 2288 out_delete_hw_queues: 2289 nvme_fc_delete_hw_io_queues(ctrl); 2290 out_free_io_queues: 2291 nvme_fc_free_io_queues(ctrl); 2292 return ret; 2293 } 2294 2295 /* 2296 * This routine restarts the controller on the host side, and 2297 * on the link side, recreates the controller association. 2298 */ 2299 static int 2300 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2301 { 2302 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2303 u32 segs; 2304 int ret; 2305 bool changed; 2306 2307 ++ctrl->ctrl.nr_reconnects; 2308 2309 /* 2310 * Create the admin queue 2311 */ 2312 2313 nvme_fc_init_queue(ctrl, 0, NVME_FC_AQ_BLKMQ_DEPTH); 2314 2315 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2316 NVME_FC_AQ_BLKMQ_DEPTH); 2317 if (ret) 2318 goto out_free_queue; 2319 2320 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2321 NVME_FC_AQ_BLKMQ_DEPTH, 2322 (NVME_FC_AQ_BLKMQ_DEPTH / 4)); 2323 if (ret) 2324 goto out_delete_hw_queue; 2325 2326 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2327 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2328 2329 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2330 if (ret) 2331 goto out_disconnect_admin_queue; 2332 2333 /* 2334 * Check controller capabilities 2335 * 2336 * todo:- add code to check if ctrl attributes changed from 2337 * prior connection values 2338 */ 2339 2340 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); 2341 if (ret) { 2342 dev_err(ctrl->ctrl.device, 2343 "prop_get NVME_REG_CAP failed\n"); 2344 goto out_disconnect_admin_queue; 2345 } 2346 2347 ctrl->ctrl.sqsize = 2348 min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap) + 1, ctrl->ctrl.sqsize); 2349 2350 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 2351 if (ret) 2352 goto out_disconnect_admin_queue; 2353 2354 segs = min_t(u32, NVME_FC_MAX_SEGMENTS, 2355 ctrl->lport->ops->max_sgl_segments); 2356 ctrl->ctrl.max_hw_sectors = (segs - 1) << (PAGE_SHIFT - 9); 2357 2358 ret = nvme_init_identify(&ctrl->ctrl); 2359 if (ret) 2360 goto out_disconnect_admin_queue; 2361 2362 /* sanity checks */ 2363 2364 /* FC-NVME does not have other data in the capsule */ 2365 if (ctrl->ctrl.icdoff) { 2366 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2367 ctrl->ctrl.icdoff); 2368 goto out_disconnect_admin_queue; 2369 } 2370 2371 /* FC-NVME supports normal SGL Data Block Descriptors */ 2372 2373 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2374 /* warn if maxcmd is lower than queue_size */ 2375 dev_warn(ctrl->ctrl.device, 2376 "queue_size %zu > ctrl maxcmd %u, reducing " 2377 "to queue_size\n", 2378 opts->queue_size, ctrl->ctrl.maxcmd); 2379 opts->queue_size = ctrl->ctrl.maxcmd; 2380 } 2381 2382 ret = nvme_fc_init_aen_ops(ctrl); 2383 if (ret) 2384 goto out_term_aen_ops; 2385 2386 /* 2387 * Create the io queues 2388 */ 2389 2390 if (ctrl->ctrl.queue_count > 1) { 2391 if (ctrl->ctrl.state == NVME_CTRL_NEW) 2392 ret = nvme_fc_create_io_queues(ctrl); 2393 else 2394 ret = nvme_fc_reinit_io_queues(ctrl); 2395 if (ret) 2396 goto out_term_aen_ops; 2397 } 2398 2399 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2400 WARN_ON_ONCE(!changed); 2401 2402 ctrl->ctrl.nr_reconnects = 0; 2403 2404 nvme_start_ctrl(&ctrl->ctrl); 2405 2406 return 0; /* Success */ 2407 2408 out_term_aen_ops: 2409 nvme_fc_term_aen_ops(ctrl); 2410 out_disconnect_admin_queue: 2411 /* send a Disconnect(association) LS to fc-nvme target */ 2412 nvme_fc_xmt_disconnect_assoc(ctrl); 2413 out_delete_hw_queue: 2414 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2415 out_free_queue: 2416 nvme_fc_free_queue(&ctrl->queues[0]); 2417 2418 return ret; 2419 } 2420 2421 /* 2422 * This routine stops operation of the controller on the host side. 2423 * On the host os stack side: Admin and IO queues are stopped, 2424 * outstanding ios on them terminated via FC ABTS. 2425 * On the link side: the association is terminated. 2426 */ 2427 static void 2428 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2429 { 2430 unsigned long flags; 2431 2432 spin_lock_irqsave(&ctrl->lock, flags); 2433 ctrl->flags |= FCCTRL_TERMIO; 2434 ctrl->iocnt = 0; 2435 spin_unlock_irqrestore(&ctrl->lock, flags); 2436 2437 /* 2438 * If io queues are present, stop them and terminate all outstanding 2439 * ios on them. As FC allocates FC exchange for each io, the 2440 * transport must contact the LLDD to terminate the exchange, 2441 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2442 * to tell us what io's are busy and invoke a transport routine 2443 * to kill them with the LLDD. After terminating the exchange 2444 * the LLDD will call the transport's normal io done path, but it 2445 * will have an aborted status. The done path will return the 2446 * io requests back to the block layer as part of normal completions 2447 * (but with error status). 2448 */ 2449 if (ctrl->ctrl.queue_count > 1) { 2450 nvme_stop_queues(&ctrl->ctrl); 2451 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2452 nvme_fc_terminate_exchange, &ctrl->ctrl); 2453 } 2454 2455 /* 2456 * Other transports, which don't have link-level contexts bound 2457 * to sqe's, would try to gracefully shutdown the controller by 2458 * writing the registers for shutdown and polling (call 2459 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2460 * just aborted and we will wait on those contexts, and given 2461 * there was no indication of how live the controlelr is on the 2462 * link, don't send more io to create more contexts for the 2463 * shutdown. Let the controller fail via keepalive failure if 2464 * its still present. 2465 */ 2466 2467 /* 2468 * clean up the admin queue. Same thing as above. 2469 * use blk_mq_tagset_busy_itr() and the transport routine to 2470 * terminate the exchanges. 2471 */ 2472 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2473 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2474 nvme_fc_terminate_exchange, &ctrl->ctrl); 2475 2476 /* kill the aens as they are a separate path */ 2477 nvme_fc_abort_aen_ops(ctrl); 2478 2479 /* wait for all io that had to be aborted */ 2480 spin_lock_irqsave(&ctrl->lock, flags); 2481 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); 2482 ctrl->flags &= ~FCCTRL_TERMIO; 2483 spin_unlock_irqrestore(&ctrl->lock, flags); 2484 2485 nvme_fc_term_aen_ops(ctrl); 2486 2487 /* 2488 * send a Disconnect(association) LS to fc-nvme target 2489 * Note: could have been sent at top of process, but 2490 * cleaner on link traffic if after the aborts complete. 2491 * Note: if association doesn't exist, association_id will be 0 2492 */ 2493 if (ctrl->association_id) 2494 nvme_fc_xmt_disconnect_assoc(ctrl); 2495 2496 if (ctrl->ctrl.tagset) { 2497 nvme_fc_delete_hw_io_queues(ctrl); 2498 nvme_fc_free_io_queues(ctrl); 2499 } 2500 2501 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2502 nvme_fc_free_queue(&ctrl->queues[0]); 2503 } 2504 2505 static void 2506 nvme_fc_delete_ctrl_work(struct work_struct *work) 2507 { 2508 struct nvme_fc_ctrl *ctrl = 2509 container_of(work, struct nvme_fc_ctrl, delete_work); 2510 2511 cancel_work_sync(&ctrl->ctrl.reset_work); 2512 cancel_delayed_work_sync(&ctrl->connect_work); 2513 nvme_stop_ctrl(&ctrl->ctrl); 2514 nvme_remove_namespaces(&ctrl->ctrl); 2515 /* 2516 * kill the association on the link side. this will block 2517 * waiting for io to terminate 2518 */ 2519 nvme_fc_delete_association(ctrl); 2520 2521 /* 2522 * tear down the controller 2523 * After the last reference on the nvme ctrl is removed, 2524 * the transport nvme_fc_nvme_ctrl_freed() callback will be 2525 * invoked. From there, the transport will tear down it's 2526 * logical queues and association. 2527 */ 2528 nvme_uninit_ctrl(&ctrl->ctrl); 2529 2530 nvme_put_ctrl(&ctrl->ctrl); 2531 } 2532 2533 static bool 2534 __nvme_fc_schedule_delete_work(struct nvme_fc_ctrl *ctrl) 2535 { 2536 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING)) 2537 return true; 2538 2539 if (!queue_work(nvme_wq, &ctrl->delete_work)) 2540 return true; 2541 2542 return false; 2543 } 2544 2545 static int 2546 __nvme_fc_del_ctrl(struct nvme_fc_ctrl *ctrl) 2547 { 2548 return __nvme_fc_schedule_delete_work(ctrl) ? -EBUSY : 0; 2549 } 2550 2551 /* 2552 * Request from nvme core layer to delete the controller 2553 */ 2554 static int 2555 nvme_fc_del_nvme_ctrl(struct nvme_ctrl *nctrl) 2556 { 2557 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2558 int ret; 2559 2560 if (!kref_get_unless_zero(&ctrl->ctrl.kref)) 2561 return -EBUSY; 2562 2563 ret = __nvme_fc_del_ctrl(ctrl); 2564 2565 if (!ret) 2566 flush_workqueue(nvme_wq); 2567 2568 nvme_put_ctrl(&ctrl->ctrl); 2569 2570 return ret; 2571 } 2572 2573 static void 2574 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2575 { 2576 /* If we are resetting/deleting then do nothing */ 2577 if (ctrl->ctrl.state != NVME_CTRL_RECONNECTING) { 2578 WARN_ON_ONCE(ctrl->ctrl.state == NVME_CTRL_NEW || 2579 ctrl->ctrl.state == NVME_CTRL_LIVE); 2580 return; 2581 } 2582 2583 dev_info(ctrl->ctrl.device, 2584 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2585 ctrl->cnum, status); 2586 2587 if (nvmf_should_reconnect(&ctrl->ctrl)) { 2588 dev_info(ctrl->ctrl.device, 2589 "NVME-FC{%d}: Reconnect attempt in %d seconds.\n", 2590 ctrl->cnum, ctrl->ctrl.opts->reconnect_delay); 2591 queue_delayed_work(nvme_wq, &ctrl->connect_work, 2592 ctrl->ctrl.opts->reconnect_delay * HZ); 2593 } else { 2594 dev_warn(ctrl->ctrl.device, 2595 "NVME-FC{%d}: Max reconnect attempts (%d) " 2596 "reached. Removing controller\n", 2597 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2598 WARN_ON(__nvme_fc_schedule_delete_work(ctrl)); 2599 } 2600 } 2601 2602 static void 2603 nvme_fc_reset_ctrl_work(struct work_struct *work) 2604 { 2605 struct nvme_fc_ctrl *ctrl = 2606 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); 2607 int ret; 2608 2609 nvme_stop_ctrl(&ctrl->ctrl); 2610 /* will block will waiting for io to terminate */ 2611 nvme_fc_delete_association(ctrl); 2612 2613 ret = nvme_fc_create_association(ctrl); 2614 if (ret) 2615 nvme_fc_reconnect_or_delete(ctrl, ret); 2616 else 2617 dev_info(ctrl->ctrl.device, 2618 "NVME-FC{%d}: controller reset complete\n", ctrl->cnum); 2619 } 2620 2621 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2622 .name = "fc", 2623 .module = THIS_MODULE, 2624 .flags = NVME_F_FABRICS, 2625 .reg_read32 = nvmf_reg_read32, 2626 .reg_read64 = nvmf_reg_read64, 2627 .reg_write32 = nvmf_reg_write32, 2628 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2629 .submit_async_event = nvme_fc_submit_async_event, 2630 .delete_ctrl = nvme_fc_del_nvme_ctrl, 2631 .get_address = nvmf_get_address, 2632 }; 2633 2634 static void 2635 nvme_fc_connect_ctrl_work(struct work_struct *work) 2636 { 2637 int ret; 2638 2639 struct nvme_fc_ctrl *ctrl = 2640 container_of(to_delayed_work(work), 2641 struct nvme_fc_ctrl, connect_work); 2642 2643 ret = nvme_fc_create_association(ctrl); 2644 if (ret) 2645 nvme_fc_reconnect_or_delete(ctrl, ret); 2646 else 2647 dev_info(ctrl->ctrl.device, 2648 "NVME-FC{%d}: controller reconnect complete\n", 2649 ctrl->cnum); 2650 } 2651 2652 2653 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2654 .queue_rq = nvme_fc_queue_rq, 2655 .complete = nvme_fc_complete_rq, 2656 .init_request = nvme_fc_init_request, 2657 .exit_request = nvme_fc_exit_request, 2658 .reinit_request = nvme_fc_reinit_request, 2659 .init_hctx = nvme_fc_init_admin_hctx, 2660 .timeout = nvme_fc_timeout, 2661 }; 2662 2663 2664 static struct nvme_ctrl * 2665 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2666 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 2667 { 2668 struct nvme_fc_ctrl *ctrl; 2669 unsigned long flags; 2670 int ret, idx; 2671 2672 if (!(rport->remoteport.port_role & 2673 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2674 ret = -EBADR; 2675 goto out_fail; 2676 } 2677 2678 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 2679 if (!ctrl) { 2680 ret = -ENOMEM; 2681 goto out_fail; 2682 } 2683 2684 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 2685 if (idx < 0) { 2686 ret = -ENOSPC; 2687 goto out_free_ctrl; 2688 } 2689 2690 ctrl->ctrl.opts = opts; 2691 INIT_LIST_HEAD(&ctrl->ctrl_list); 2692 ctrl->lport = lport; 2693 ctrl->rport = rport; 2694 ctrl->dev = lport->dev; 2695 ctrl->cnum = idx; 2696 2697 get_device(ctrl->dev); 2698 kref_init(&ctrl->ref); 2699 2700 INIT_WORK(&ctrl->delete_work, nvme_fc_delete_ctrl_work); 2701 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 2702 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 2703 spin_lock_init(&ctrl->lock); 2704 2705 /* io queue count */ 2706 ctrl->ctrl.queue_count = min_t(unsigned int, 2707 opts->nr_io_queues, 2708 lport->ops->max_hw_queues); 2709 ctrl->ctrl.queue_count++; /* +1 for admin queue */ 2710 2711 ctrl->ctrl.sqsize = opts->queue_size - 1; 2712 ctrl->ctrl.kato = opts->kato; 2713 2714 ret = -ENOMEM; 2715 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 2716 sizeof(struct nvme_fc_queue), GFP_KERNEL); 2717 if (!ctrl->queues) 2718 goto out_free_ida; 2719 2720 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 2721 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 2722 ctrl->admin_tag_set.queue_depth = NVME_FC_AQ_BLKMQ_DEPTH; 2723 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 2724 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; 2725 ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_fc_fcp_op) + 2726 (SG_CHUNK_SIZE * 2727 sizeof(struct scatterlist)) + 2728 ctrl->lport->ops->fcprqst_priv_sz; 2729 ctrl->admin_tag_set.driver_data = ctrl; 2730 ctrl->admin_tag_set.nr_hw_queues = 1; 2731 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 2732 2733 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 2734 if (ret) 2735 goto out_free_queues; 2736 2737 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 2738 if (IS_ERR(ctrl->ctrl.admin_q)) { 2739 ret = PTR_ERR(ctrl->ctrl.admin_q); 2740 goto out_free_admin_tag_set; 2741 } 2742 2743 /* 2744 * Would have been nice to init io queues tag set as well. 2745 * However, we require interaction from the controller 2746 * for max io queue count before we can do so. 2747 * Defer this to the connect path. 2748 */ 2749 2750 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 2751 if (ret) 2752 goto out_cleanup_admin_q; 2753 2754 /* at this point, teardown path changes to ref counting on nvme ctrl */ 2755 2756 spin_lock_irqsave(&rport->lock, flags); 2757 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 2758 spin_unlock_irqrestore(&rport->lock, flags); 2759 2760 ret = nvme_fc_create_association(ctrl); 2761 if (ret) { 2762 ctrl->ctrl.opts = NULL; 2763 /* initiate nvme ctrl ref counting teardown */ 2764 nvme_uninit_ctrl(&ctrl->ctrl); 2765 nvme_put_ctrl(&ctrl->ctrl); 2766 2767 /* Remove core ctrl ref. */ 2768 nvme_put_ctrl(&ctrl->ctrl); 2769 2770 /* as we're past the point where we transition to the ref 2771 * counting teardown path, if we return a bad pointer here, 2772 * the calling routine, thinking it's prior to the 2773 * transition, will do an rport put. Since the teardown 2774 * path also does a rport put, we do an extra get here to 2775 * so proper order/teardown happens. 2776 */ 2777 nvme_fc_rport_get(rport); 2778 2779 if (ret > 0) 2780 ret = -EIO; 2781 return ERR_PTR(ret); 2782 } 2783 2784 kref_get(&ctrl->ctrl.kref); 2785 2786 dev_info(ctrl->ctrl.device, 2787 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 2788 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 2789 2790 return &ctrl->ctrl; 2791 2792 out_cleanup_admin_q: 2793 blk_cleanup_queue(ctrl->ctrl.admin_q); 2794 out_free_admin_tag_set: 2795 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2796 out_free_queues: 2797 kfree(ctrl->queues); 2798 out_free_ida: 2799 put_device(ctrl->dev); 2800 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2801 out_free_ctrl: 2802 kfree(ctrl); 2803 out_fail: 2804 /* exit via here doesn't follow ctlr ref points */ 2805 return ERR_PTR(ret); 2806 } 2807 2808 2809 struct nvmet_fc_traddr { 2810 u64 nn; 2811 u64 pn; 2812 }; 2813 2814 static int 2815 __nvme_fc_parse_u64(substring_t *sstr, u64 *val) 2816 { 2817 u64 token64; 2818 2819 if (match_u64(sstr, &token64)) 2820 return -EINVAL; 2821 *val = token64; 2822 2823 return 0; 2824 } 2825 2826 /* 2827 * This routine validates and extracts the WWN's from the TRADDR string. 2828 * As kernel parsers need the 0x to determine number base, universally 2829 * build string to parse with 0x prefix before parsing name strings. 2830 */ 2831 static int 2832 nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) 2833 { 2834 char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; 2835 substring_t wwn = { name, &name[sizeof(name)-1] }; 2836 int nnoffset, pnoffset; 2837 2838 /* validate it string one of the 2 allowed formats */ 2839 if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && 2840 !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && 2841 !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], 2842 "pn-0x", NVME_FC_TRADDR_OXNNLEN)) { 2843 nnoffset = NVME_FC_TRADDR_OXNNLEN; 2844 pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + 2845 NVME_FC_TRADDR_OXNNLEN; 2846 } else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH && 2847 !strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) && 2848 !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], 2849 "pn-", NVME_FC_TRADDR_NNLEN))) { 2850 nnoffset = NVME_FC_TRADDR_NNLEN; 2851 pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; 2852 } else 2853 goto out_einval; 2854 2855 name[0] = '0'; 2856 name[1] = 'x'; 2857 name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; 2858 2859 memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); 2860 if (__nvme_fc_parse_u64(&wwn, &traddr->nn)) 2861 goto out_einval; 2862 2863 memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); 2864 if (__nvme_fc_parse_u64(&wwn, &traddr->pn)) 2865 goto out_einval; 2866 2867 return 0; 2868 2869 out_einval: 2870 pr_warn("%s: bad traddr string\n", __func__); 2871 return -EINVAL; 2872 } 2873 2874 static struct nvme_ctrl * 2875 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 2876 { 2877 struct nvme_fc_lport *lport; 2878 struct nvme_fc_rport *rport; 2879 struct nvme_ctrl *ctrl; 2880 struct nvmet_fc_traddr laddr = { 0L, 0L }; 2881 struct nvmet_fc_traddr raddr = { 0L, 0L }; 2882 unsigned long flags; 2883 int ret; 2884 2885 ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE); 2886 if (ret || !raddr.nn || !raddr.pn) 2887 return ERR_PTR(-EINVAL); 2888 2889 ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE); 2890 if (ret || !laddr.nn || !laddr.pn) 2891 return ERR_PTR(-EINVAL); 2892 2893 /* find the host and remote ports to connect together */ 2894 spin_lock_irqsave(&nvme_fc_lock, flags); 2895 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 2896 if (lport->localport.node_name != laddr.nn || 2897 lport->localport.port_name != laddr.pn) 2898 continue; 2899 2900 list_for_each_entry(rport, &lport->endp_list, endp_list) { 2901 if (rport->remoteport.node_name != raddr.nn || 2902 rport->remoteport.port_name != raddr.pn) 2903 continue; 2904 2905 /* if fail to get reference fall through. Will error */ 2906 if (!nvme_fc_rport_get(rport)) 2907 break; 2908 2909 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2910 2911 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 2912 if (IS_ERR(ctrl)) 2913 nvme_fc_rport_put(rport); 2914 return ctrl; 2915 } 2916 } 2917 spin_unlock_irqrestore(&nvme_fc_lock, flags); 2918 2919 return ERR_PTR(-ENOENT); 2920 } 2921 2922 2923 static struct nvmf_transport_ops nvme_fc_transport = { 2924 .name = "fc", 2925 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 2926 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 2927 .create_ctrl = nvme_fc_create_ctrl, 2928 }; 2929 2930 static int __init nvme_fc_init_module(void) 2931 { 2932 return nvmf_register_transport(&nvme_fc_transport); 2933 } 2934 2935 static void __exit nvme_fc_exit_module(void) 2936 { 2937 /* sanity check - all lports should be removed */ 2938 if (!list_empty(&nvme_fc_lport_list)) 2939 pr_warn("%s: localport list not empty\n", __func__); 2940 2941 nvmf_unregister_transport(&nvme_fc_transport); 2942 2943 ida_destroy(&nvme_fc_local_port_cnt); 2944 ida_destroy(&nvme_fc_ctrl_cnt); 2945 } 2946 2947 module_init(nvme_fc_init_module); 2948 module_exit(nvme_fc_exit_module); 2949 2950 MODULE_LICENSE("GPL v2"); 2951