1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 #include <linux/overflow.h> 24 25 #include "nvme.h" 26 #include "fabrics.h" 27 #include <linux/nvme-fc-driver.h> 28 #include <linux/nvme-fc.h> 29 30 31 /* *************************** Data Structures/Defines ****************** */ 32 33 34 enum nvme_fc_queue_flags { 35 NVME_FC_Q_CONNECTED = 0, 36 NVME_FC_Q_LIVE, 37 }; 38 39 #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ 40 41 struct nvme_fc_queue { 42 struct nvme_fc_ctrl *ctrl; 43 struct device *dev; 44 struct blk_mq_hw_ctx *hctx; 45 void *lldd_handle; 46 size_t cmnd_capsule_len; 47 u32 qnum; 48 u32 rqcnt; 49 u32 seqno; 50 51 u64 connection_id; 52 atomic_t csn; 53 54 unsigned long flags; 55 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 56 57 enum nvme_fcop_flags { 58 FCOP_FLAGS_TERMIO = (1 << 0), 59 FCOP_FLAGS_AEN = (1 << 1), 60 }; 61 62 struct nvmefc_ls_req_op { 63 struct nvmefc_ls_req ls_req; 64 65 struct nvme_fc_rport *rport; 66 struct nvme_fc_queue *queue; 67 struct request *rq; 68 u32 flags; 69 70 int ls_error; 71 struct completion ls_done; 72 struct list_head lsreq_list; /* rport->ls_req_list */ 73 bool req_queued; 74 }; 75 76 enum nvme_fcpop_state { 77 FCPOP_STATE_UNINIT = 0, 78 FCPOP_STATE_IDLE = 1, 79 FCPOP_STATE_ACTIVE = 2, 80 FCPOP_STATE_ABORTED = 3, 81 FCPOP_STATE_COMPLETE = 4, 82 }; 83 84 struct nvme_fc_fcp_op { 85 struct nvme_request nreq; /* 86 * nvme/host/core.c 87 * requires this to be 88 * the 1st element in the 89 * private structure 90 * associated with the 91 * request. 92 */ 93 struct nvmefc_fcp_req fcp_req; 94 95 struct nvme_fc_ctrl *ctrl; 96 struct nvme_fc_queue *queue; 97 struct request *rq; 98 99 atomic_t state; 100 u32 flags; 101 u32 rqno; 102 u32 nents; 103 104 struct nvme_fc_cmd_iu cmd_iu; 105 struct nvme_fc_ersp_iu rsp_iu; 106 }; 107 108 struct nvme_fcp_op_w_sgl { 109 struct nvme_fc_fcp_op op; 110 struct scatterlist sgl[SG_CHUNK_SIZE]; 111 uint8_t priv[0]; 112 }; 113 114 struct nvme_fc_lport { 115 struct nvme_fc_local_port localport; 116 117 struct ida endp_cnt; 118 struct list_head port_list; /* nvme_fc_port_list */ 119 struct list_head endp_list; 120 struct device *dev; /* physical device for dma */ 121 struct nvme_fc_port_template *ops; 122 struct kref ref; 123 atomic_t act_rport_cnt; 124 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 125 126 struct nvme_fc_rport { 127 struct nvme_fc_remote_port remoteport; 128 129 struct list_head endp_list; /* for lport->endp_list */ 130 struct list_head ctrl_list; 131 struct list_head ls_req_list; 132 struct list_head disc_list; 133 struct device *dev; /* physical device for dma */ 134 struct nvme_fc_lport *lport; 135 spinlock_t lock; 136 struct kref ref; 137 atomic_t act_ctrl_cnt; 138 unsigned long dev_loss_end; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 cnum; 152 153 bool ioq_live; 154 bool assoc_active; 155 u64 association_id; 156 157 struct list_head ctrl_list; /* rport->ctrl_list */ 158 159 struct blk_mq_tag_set admin_tag_set; 160 struct blk_mq_tag_set tag_set; 161 162 struct delayed_work connect_work; 163 164 struct kref ref; 165 u32 flags; 166 u32 iocnt; 167 wait_queue_head_t ioabort_wait; 168 169 struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS]; 170 171 struct nvme_ctrl ctrl; 172 }; 173 174 static inline struct nvme_fc_ctrl * 175 to_fc_ctrl(struct nvme_ctrl *ctrl) 176 { 177 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 178 } 179 180 static inline struct nvme_fc_lport * 181 localport_to_lport(struct nvme_fc_local_port *portptr) 182 { 183 return container_of(portptr, struct nvme_fc_lport, localport); 184 } 185 186 static inline struct nvme_fc_rport * 187 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 188 { 189 return container_of(portptr, struct nvme_fc_rport, remoteport); 190 } 191 192 static inline struct nvmefc_ls_req_op * 193 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 194 { 195 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 196 } 197 198 static inline struct nvme_fc_fcp_op * 199 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 200 { 201 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 202 } 203 204 205 206 /* *************************** Globals **************************** */ 207 208 209 static DEFINE_SPINLOCK(nvme_fc_lock); 210 211 static LIST_HEAD(nvme_fc_lport_list); 212 static DEFINE_IDA(nvme_fc_local_port_cnt); 213 static DEFINE_IDA(nvme_fc_ctrl_cnt); 214 215 216 217 /* 218 * These items are short-term. They will eventually be moved into 219 * a generic FC class. See comments in module init. 220 */ 221 static struct device *fc_udev_device; 222 223 224 /* *********************** FC-NVME Port Management ************************ */ 225 226 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 227 struct nvme_fc_queue *, unsigned int); 228 229 static void 230 nvme_fc_free_lport(struct kref *ref) 231 { 232 struct nvme_fc_lport *lport = 233 container_of(ref, struct nvme_fc_lport, ref); 234 unsigned long flags; 235 236 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 237 WARN_ON(!list_empty(&lport->endp_list)); 238 239 /* remove from transport list */ 240 spin_lock_irqsave(&nvme_fc_lock, flags); 241 list_del(&lport->port_list); 242 spin_unlock_irqrestore(&nvme_fc_lock, flags); 243 244 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 245 ida_destroy(&lport->endp_cnt); 246 247 put_device(lport->dev); 248 249 kfree(lport); 250 } 251 252 static void 253 nvme_fc_lport_put(struct nvme_fc_lport *lport) 254 { 255 kref_put(&lport->ref, nvme_fc_free_lport); 256 } 257 258 static int 259 nvme_fc_lport_get(struct nvme_fc_lport *lport) 260 { 261 return kref_get_unless_zero(&lport->ref); 262 } 263 264 265 static struct nvme_fc_lport * 266 nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo, 267 struct nvme_fc_port_template *ops, 268 struct device *dev) 269 { 270 struct nvme_fc_lport *lport; 271 unsigned long flags; 272 273 spin_lock_irqsave(&nvme_fc_lock, flags); 274 275 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 276 if (lport->localport.node_name != pinfo->node_name || 277 lport->localport.port_name != pinfo->port_name) 278 continue; 279 280 if (lport->dev != dev) { 281 lport = ERR_PTR(-EXDEV); 282 goto out_done; 283 } 284 285 if (lport->localport.port_state != FC_OBJSTATE_DELETED) { 286 lport = ERR_PTR(-EEXIST); 287 goto out_done; 288 } 289 290 if (!nvme_fc_lport_get(lport)) { 291 /* 292 * fails if ref cnt already 0. If so, 293 * act as if lport already deleted 294 */ 295 lport = NULL; 296 goto out_done; 297 } 298 299 /* resume the lport */ 300 301 lport->ops = ops; 302 lport->localport.port_role = pinfo->port_role; 303 lport->localport.port_id = pinfo->port_id; 304 lport->localport.port_state = FC_OBJSTATE_ONLINE; 305 306 spin_unlock_irqrestore(&nvme_fc_lock, flags); 307 308 return lport; 309 } 310 311 lport = NULL; 312 313 out_done: 314 spin_unlock_irqrestore(&nvme_fc_lock, flags); 315 316 return lport; 317 } 318 319 /** 320 * nvme_fc_register_localport - transport entry point called by an 321 * LLDD to register the existence of a NVME 322 * host FC port. 323 * @pinfo: pointer to information about the port to be registered 324 * @template: LLDD entrypoints and operational parameters for the port 325 * @dev: physical hardware device node port corresponds to. Will be 326 * used for DMA mappings 327 * @portptr: pointer to a local port pointer. Upon success, the routine 328 * will allocate a nvme_fc_local_port structure and place its 329 * address in the local port pointer. Upon failure, local port 330 * pointer will be set to 0. 331 * 332 * Returns: 333 * a completion status. Must be 0 upon success; a negative errno 334 * (ex: -ENXIO) upon failure. 335 */ 336 int 337 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 338 struct nvme_fc_port_template *template, 339 struct device *dev, 340 struct nvme_fc_local_port **portptr) 341 { 342 struct nvme_fc_lport *newrec; 343 unsigned long flags; 344 int ret, idx; 345 346 if (!template->localport_delete || !template->remoteport_delete || 347 !template->ls_req || !template->fcp_io || 348 !template->ls_abort || !template->fcp_abort || 349 !template->max_hw_queues || !template->max_sgl_segments || 350 !template->max_dif_sgl_segments || !template->dma_boundary) { 351 ret = -EINVAL; 352 goto out_reghost_failed; 353 } 354 355 /* 356 * look to see if there is already a localport that had been 357 * deregistered and in the process of waiting for all the 358 * references to fully be removed. If the references haven't 359 * expired, we can simply re-enable the localport. Remoteports 360 * and controller reconnections should resume naturally. 361 */ 362 newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev); 363 364 /* found an lport, but something about its state is bad */ 365 if (IS_ERR(newrec)) { 366 ret = PTR_ERR(newrec); 367 goto out_reghost_failed; 368 369 /* found existing lport, which was resumed */ 370 } else if (newrec) { 371 *portptr = &newrec->localport; 372 return 0; 373 } 374 375 /* nothing found - allocate a new localport struct */ 376 377 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 378 GFP_KERNEL); 379 if (!newrec) { 380 ret = -ENOMEM; 381 goto out_reghost_failed; 382 } 383 384 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 385 if (idx < 0) { 386 ret = -ENOSPC; 387 goto out_fail_kfree; 388 } 389 390 if (!get_device(dev) && dev) { 391 ret = -ENODEV; 392 goto out_ida_put; 393 } 394 395 INIT_LIST_HEAD(&newrec->port_list); 396 INIT_LIST_HEAD(&newrec->endp_list); 397 kref_init(&newrec->ref); 398 atomic_set(&newrec->act_rport_cnt, 0); 399 newrec->ops = template; 400 newrec->dev = dev; 401 ida_init(&newrec->endp_cnt); 402 newrec->localport.private = &newrec[1]; 403 newrec->localport.node_name = pinfo->node_name; 404 newrec->localport.port_name = pinfo->port_name; 405 newrec->localport.port_role = pinfo->port_role; 406 newrec->localport.port_id = pinfo->port_id; 407 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 408 newrec->localport.port_num = idx; 409 410 spin_lock_irqsave(&nvme_fc_lock, flags); 411 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 412 spin_unlock_irqrestore(&nvme_fc_lock, flags); 413 414 if (dev) 415 dma_set_seg_boundary(dev, template->dma_boundary); 416 417 *portptr = &newrec->localport; 418 return 0; 419 420 out_ida_put: 421 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 422 out_fail_kfree: 423 kfree(newrec); 424 out_reghost_failed: 425 *portptr = NULL; 426 427 return ret; 428 } 429 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 430 431 /** 432 * nvme_fc_unregister_localport - transport entry point called by an 433 * LLDD to deregister/remove a previously 434 * registered a NVME host FC port. 435 * @portptr: pointer to the (registered) local port that is to be deregistered. 436 * 437 * Returns: 438 * a completion status. Must be 0 upon success; a negative errno 439 * (ex: -ENXIO) upon failure. 440 */ 441 int 442 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 443 { 444 struct nvme_fc_lport *lport = localport_to_lport(portptr); 445 unsigned long flags; 446 447 if (!portptr) 448 return -EINVAL; 449 450 spin_lock_irqsave(&nvme_fc_lock, flags); 451 452 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 453 spin_unlock_irqrestore(&nvme_fc_lock, flags); 454 return -EINVAL; 455 } 456 portptr->port_state = FC_OBJSTATE_DELETED; 457 458 spin_unlock_irqrestore(&nvme_fc_lock, flags); 459 460 if (atomic_read(&lport->act_rport_cnt) == 0) 461 lport->ops->localport_delete(&lport->localport); 462 463 nvme_fc_lport_put(lport); 464 465 return 0; 466 } 467 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 468 469 /* 470 * TRADDR strings, per FC-NVME are fixed format: 471 * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters 472 * udev event will only differ by prefix of what field is 473 * being specified: 474 * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters 475 * 19 + 43 + null_fudge = 64 characters 476 */ 477 #define FCNVME_TRADDR_LENGTH 64 478 479 static void 480 nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, 481 struct nvme_fc_rport *rport) 482 { 483 char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ 484 char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ 485 char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL }; 486 487 if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) 488 return; 489 490 snprintf(hostaddr, sizeof(hostaddr), 491 "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx", 492 lport->localport.node_name, lport->localport.port_name); 493 snprintf(tgtaddr, sizeof(tgtaddr), 494 "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx", 495 rport->remoteport.node_name, rport->remoteport.port_name); 496 kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); 497 } 498 499 static void 500 nvme_fc_free_rport(struct kref *ref) 501 { 502 struct nvme_fc_rport *rport = 503 container_of(ref, struct nvme_fc_rport, ref); 504 struct nvme_fc_lport *lport = 505 localport_to_lport(rport->remoteport.localport); 506 unsigned long flags; 507 508 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 509 WARN_ON(!list_empty(&rport->ctrl_list)); 510 511 /* remove from lport list */ 512 spin_lock_irqsave(&nvme_fc_lock, flags); 513 list_del(&rport->endp_list); 514 spin_unlock_irqrestore(&nvme_fc_lock, flags); 515 516 WARN_ON(!list_empty(&rport->disc_list)); 517 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 518 519 kfree(rport); 520 521 nvme_fc_lport_put(lport); 522 } 523 524 static void 525 nvme_fc_rport_put(struct nvme_fc_rport *rport) 526 { 527 kref_put(&rport->ref, nvme_fc_free_rport); 528 } 529 530 static int 531 nvme_fc_rport_get(struct nvme_fc_rport *rport) 532 { 533 return kref_get_unless_zero(&rport->ref); 534 } 535 536 static void 537 nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) 538 { 539 switch (ctrl->ctrl.state) { 540 case NVME_CTRL_NEW: 541 case NVME_CTRL_CONNECTING: 542 /* 543 * As all reconnects were suppressed, schedule a 544 * connect. 545 */ 546 dev_info(ctrl->ctrl.device, 547 "NVME-FC{%d}: connectivity re-established. " 548 "Attempting reconnect\n", ctrl->cnum); 549 550 queue_delayed_work(nvme_wq, &ctrl->connect_work, 0); 551 break; 552 553 case NVME_CTRL_RESETTING: 554 /* 555 * Controller is already in the process of terminating the 556 * association. No need to do anything further. The reconnect 557 * step will naturally occur after the reset completes. 558 */ 559 break; 560 561 default: 562 /* no action to take - let it delete */ 563 break; 564 } 565 } 566 567 static struct nvme_fc_rport * 568 nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, 569 struct nvme_fc_port_info *pinfo) 570 { 571 struct nvme_fc_rport *rport; 572 struct nvme_fc_ctrl *ctrl; 573 unsigned long flags; 574 575 spin_lock_irqsave(&nvme_fc_lock, flags); 576 577 list_for_each_entry(rport, &lport->endp_list, endp_list) { 578 if (rport->remoteport.node_name != pinfo->node_name || 579 rport->remoteport.port_name != pinfo->port_name) 580 continue; 581 582 if (!nvme_fc_rport_get(rport)) { 583 rport = ERR_PTR(-ENOLCK); 584 goto out_done; 585 } 586 587 spin_unlock_irqrestore(&nvme_fc_lock, flags); 588 589 spin_lock_irqsave(&rport->lock, flags); 590 591 /* has it been unregistered */ 592 if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { 593 /* means lldd called us twice */ 594 spin_unlock_irqrestore(&rport->lock, flags); 595 nvme_fc_rport_put(rport); 596 return ERR_PTR(-ESTALE); 597 } 598 599 rport->remoteport.port_role = pinfo->port_role; 600 rport->remoteport.port_id = pinfo->port_id; 601 rport->remoteport.port_state = FC_OBJSTATE_ONLINE; 602 rport->dev_loss_end = 0; 603 604 /* 605 * kick off a reconnect attempt on all associations to the 606 * remote port. A successful reconnects will resume i/o. 607 */ 608 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 609 nvme_fc_resume_controller(ctrl); 610 611 spin_unlock_irqrestore(&rport->lock, flags); 612 613 return rport; 614 } 615 616 rport = NULL; 617 618 out_done: 619 spin_unlock_irqrestore(&nvme_fc_lock, flags); 620 621 return rport; 622 } 623 624 static inline void 625 __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, 626 struct nvme_fc_port_info *pinfo) 627 { 628 if (pinfo->dev_loss_tmo) 629 rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; 630 else 631 rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; 632 } 633 634 /** 635 * nvme_fc_register_remoteport - transport entry point called by an 636 * LLDD to register the existence of a NVME 637 * subsystem FC port on its fabric. 638 * @localport: pointer to the (registered) local port that the remote 639 * subsystem port is connected to. 640 * @pinfo: pointer to information about the port to be registered 641 * @portptr: pointer to a remote port pointer. Upon success, the routine 642 * will allocate a nvme_fc_remote_port structure and place its 643 * address in the remote port pointer. Upon failure, remote port 644 * pointer will be set to 0. 645 * 646 * Returns: 647 * a completion status. Must be 0 upon success; a negative errno 648 * (ex: -ENXIO) upon failure. 649 */ 650 int 651 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 652 struct nvme_fc_port_info *pinfo, 653 struct nvme_fc_remote_port **portptr) 654 { 655 struct nvme_fc_lport *lport = localport_to_lport(localport); 656 struct nvme_fc_rport *newrec; 657 unsigned long flags; 658 int ret, idx; 659 660 if (!nvme_fc_lport_get(lport)) { 661 ret = -ESHUTDOWN; 662 goto out_reghost_failed; 663 } 664 665 /* 666 * look to see if there is already a remoteport that is waiting 667 * for a reconnect (within dev_loss_tmo) with the same WWN's. 668 * If so, transition to it and reconnect. 669 */ 670 newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); 671 672 /* found an rport, but something about its state is bad */ 673 if (IS_ERR(newrec)) { 674 ret = PTR_ERR(newrec); 675 goto out_lport_put; 676 677 /* found existing rport, which was resumed */ 678 } else if (newrec) { 679 nvme_fc_lport_put(lport); 680 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 681 nvme_fc_signal_discovery_scan(lport, newrec); 682 *portptr = &newrec->remoteport; 683 return 0; 684 } 685 686 /* nothing found - allocate a new remoteport struct */ 687 688 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 689 GFP_KERNEL); 690 if (!newrec) { 691 ret = -ENOMEM; 692 goto out_lport_put; 693 } 694 695 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 696 if (idx < 0) { 697 ret = -ENOSPC; 698 goto out_kfree_rport; 699 } 700 701 INIT_LIST_HEAD(&newrec->endp_list); 702 INIT_LIST_HEAD(&newrec->ctrl_list); 703 INIT_LIST_HEAD(&newrec->ls_req_list); 704 INIT_LIST_HEAD(&newrec->disc_list); 705 kref_init(&newrec->ref); 706 atomic_set(&newrec->act_ctrl_cnt, 0); 707 spin_lock_init(&newrec->lock); 708 newrec->remoteport.localport = &lport->localport; 709 newrec->dev = lport->dev; 710 newrec->lport = lport; 711 newrec->remoteport.private = &newrec[1]; 712 newrec->remoteport.port_role = pinfo->port_role; 713 newrec->remoteport.node_name = pinfo->node_name; 714 newrec->remoteport.port_name = pinfo->port_name; 715 newrec->remoteport.port_id = pinfo->port_id; 716 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 717 newrec->remoteport.port_num = idx; 718 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 719 720 spin_lock_irqsave(&nvme_fc_lock, flags); 721 list_add_tail(&newrec->endp_list, &lport->endp_list); 722 spin_unlock_irqrestore(&nvme_fc_lock, flags); 723 724 nvme_fc_signal_discovery_scan(lport, newrec); 725 726 *portptr = &newrec->remoteport; 727 return 0; 728 729 out_kfree_rport: 730 kfree(newrec); 731 out_lport_put: 732 nvme_fc_lport_put(lport); 733 out_reghost_failed: 734 *portptr = NULL; 735 return ret; 736 } 737 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 738 739 static int 740 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 741 { 742 struct nvmefc_ls_req_op *lsop; 743 unsigned long flags; 744 745 restart: 746 spin_lock_irqsave(&rport->lock, flags); 747 748 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 749 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 750 lsop->flags |= FCOP_FLAGS_TERMIO; 751 spin_unlock_irqrestore(&rport->lock, flags); 752 rport->lport->ops->ls_abort(&rport->lport->localport, 753 &rport->remoteport, 754 &lsop->ls_req); 755 goto restart; 756 } 757 } 758 spin_unlock_irqrestore(&rport->lock, flags); 759 760 return 0; 761 } 762 763 static void 764 nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) 765 { 766 dev_info(ctrl->ctrl.device, 767 "NVME-FC{%d}: controller connectivity lost. Awaiting " 768 "Reconnect", ctrl->cnum); 769 770 switch (ctrl->ctrl.state) { 771 case NVME_CTRL_NEW: 772 case NVME_CTRL_LIVE: 773 /* 774 * Schedule a controller reset. The reset will terminate the 775 * association and schedule the reconnect timer. Reconnects 776 * will be attempted until either the ctlr_loss_tmo 777 * (max_retries * connect_delay) expires or the remoteport's 778 * dev_loss_tmo expires. 779 */ 780 if (nvme_reset_ctrl(&ctrl->ctrl)) { 781 dev_warn(ctrl->ctrl.device, 782 "NVME-FC{%d}: Couldn't schedule reset.\n", 783 ctrl->cnum); 784 nvme_delete_ctrl(&ctrl->ctrl); 785 } 786 break; 787 788 case NVME_CTRL_CONNECTING: 789 /* 790 * The association has already been terminated and the 791 * controller is attempting reconnects. No need to do anything 792 * futher. Reconnects will be attempted until either the 793 * ctlr_loss_tmo (max_retries * connect_delay) expires or the 794 * remoteport's dev_loss_tmo expires. 795 */ 796 break; 797 798 case NVME_CTRL_RESETTING: 799 /* 800 * Controller is already in the process of terminating the 801 * association. No need to do anything further. The reconnect 802 * step will kick in naturally after the association is 803 * terminated. 804 */ 805 break; 806 807 case NVME_CTRL_DELETING: 808 default: 809 /* no action to take - let it delete */ 810 break; 811 } 812 } 813 814 /** 815 * nvme_fc_unregister_remoteport - transport entry point called by an 816 * LLDD to deregister/remove a previously 817 * registered a NVME subsystem FC port. 818 * @portptr: pointer to the (registered) remote port that is to be 819 * deregistered. 820 * 821 * Returns: 822 * a completion status. Must be 0 upon success; a negative errno 823 * (ex: -ENXIO) upon failure. 824 */ 825 int 826 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 827 { 828 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 829 struct nvme_fc_ctrl *ctrl; 830 unsigned long flags; 831 832 if (!portptr) 833 return -EINVAL; 834 835 spin_lock_irqsave(&rport->lock, flags); 836 837 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 838 spin_unlock_irqrestore(&rport->lock, flags); 839 return -EINVAL; 840 } 841 portptr->port_state = FC_OBJSTATE_DELETED; 842 843 rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); 844 845 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 846 /* if dev_loss_tmo==0, dev loss is immediate */ 847 if (!portptr->dev_loss_tmo) { 848 dev_warn(ctrl->ctrl.device, 849 "NVME-FC{%d}: controller connectivity lost.\n", 850 ctrl->cnum); 851 nvme_delete_ctrl(&ctrl->ctrl); 852 } else 853 nvme_fc_ctrl_connectivity_loss(ctrl); 854 } 855 856 spin_unlock_irqrestore(&rport->lock, flags); 857 858 nvme_fc_abort_lsops(rport); 859 860 if (atomic_read(&rport->act_ctrl_cnt) == 0) 861 rport->lport->ops->remoteport_delete(portptr); 862 863 /* 864 * release the reference, which will allow, if all controllers 865 * go away, which should only occur after dev_loss_tmo occurs, 866 * for the rport to be torn down. 867 */ 868 nvme_fc_rport_put(rport); 869 870 return 0; 871 } 872 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 873 874 /** 875 * nvme_fc_rescan_remoteport - transport entry point called by an 876 * LLDD to request a nvme device rescan. 877 * @remoteport: pointer to the (registered) remote port that is to be 878 * rescanned. 879 * 880 * Returns: N/A 881 */ 882 void 883 nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) 884 { 885 struct nvme_fc_rport *rport = remoteport_to_rport(remoteport); 886 887 nvme_fc_signal_discovery_scan(rport->lport, rport); 888 } 889 EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); 890 891 int 892 nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, 893 u32 dev_loss_tmo) 894 { 895 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 896 unsigned long flags; 897 898 spin_lock_irqsave(&rport->lock, flags); 899 900 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 901 spin_unlock_irqrestore(&rport->lock, flags); 902 return -EINVAL; 903 } 904 905 /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ 906 rport->remoteport.dev_loss_tmo = dev_loss_tmo; 907 908 spin_unlock_irqrestore(&rport->lock, flags); 909 910 return 0; 911 } 912 EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); 913 914 915 /* *********************** FC-NVME DMA Handling **************************** */ 916 917 /* 918 * The fcloop device passes in a NULL device pointer. Real LLD's will 919 * pass in a valid device pointer. If NULL is passed to the dma mapping 920 * routines, depending on the platform, it may or may not succeed, and 921 * may crash. 922 * 923 * As such: 924 * Wrapper all the dma routines and check the dev pointer. 925 * 926 * If simple mappings (return just a dma address, we'll noop them, 927 * returning a dma address of 0. 928 * 929 * On more complex mappings (dma_map_sg), a pseudo routine fills 930 * in the scatter list, setting all dma addresses to 0. 931 */ 932 933 static inline dma_addr_t 934 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 935 enum dma_data_direction dir) 936 { 937 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 938 } 939 940 static inline int 941 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 942 { 943 return dev ? dma_mapping_error(dev, dma_addr) : 0; 944 } 945 946 static inline void 947 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 948 enum dma_data_direction dir) 949 { 950 if (dev) 951 dma_unmap_single(dev, addr, size, dir); 952 } 953 954 static inline void 955 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 956 enum dma_data_direction dir) 957 { 958 if (dev) 959 dma_sync_single_for_cpu(dev, addr, size, dir); 960 } 961 962 static inline void 963 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 964 enum dma_data_direction dir) 965 { 966 if (dev) 967 dma_sync_single_for_device(dev, addr, size, dir); 968 } 969 970 /* pseudo dma_map_sg call */ 971 static int 972 fc_map_sg(struct scatterlist *sg, int nents) 973 { 974 struct scatterlist *s; 975 int i; 976 977 WARN_ON(nents == 0 || sg[0].length == 0); 978 979 for_each_sg(sg, s, nents, i) { 980 s->dma_address = 0L; 981 #ifdef CONFIG_NEED_SG_DMA_LENGTH 982 s->dma_length = s->length; 983 #endif 984 } 985 return nents; 986 } 987 988 static inline int 989 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 990 enum dma_data_direction dir) 991 { 992 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 993 } 994 995 static inline void 996 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 997 enum dma_data_direction dir) 998 { 999 if (dev) 1000 dma_unmap_sg(dev, sg, nents, dir); 1001 } 1002 1003 /* *********************** FC-NVME LS Handling **************************** */ 1004 1005 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 1006 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 1007 1008 1009 static void 1010 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 1011 { 1012 struct nvme_fc_rport *rport = lsop->rport; 1013 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1014 unsigned long flags; 1015 1016 spin_lock_irqsave(&rport->lock, flags); 1017 1018 if (!lsop->req_queued) { 1019 spin_unlock_irqrestore(&rport->lock, flags); 1020 return; 1021 } 1022 1023 list_del(&lsop->lsreq_list); 1024 1025 lsop->req_queued = false; 1026 1027 spin_unlock_irqrestore(&rport->lock, flags); 1028 1029 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 1030 (lsreq->rqstlen + lsreq->rsplen), 1031 DMA_BIDIRECTIONAL); 1032 1033 nvme_fc_rport_put(rport); 1034 } 1035 1036 static int 1037 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 1038 struct nvmefc_ls_req_op *lsop, 1039 void (*done)(struct nvmefc_ls_req *req, int status)) 1040 { 1041 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1042 unsigned long flags; 1043 int ret = 0; 1044 1045 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1046 return -ECONNREFUSED; 1047 1048 if (!nvme_fc_rport_get(rport)) 1049 return -ESHUTDOWN; 1050 1051 lsreq->done = done; 1052 lsop->rport = rport; 1053 lsop->req_queued = false; 1054 INIT_LIST_HEAD(&lsop->lsreq_list); 1055 init_completion(&lsop->ls_done); 1056 1057 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 1058 lsreq->rqstlen + lsreq->rsplen, 1059 DMA_BIDIRECTIONAL); 1060 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 1061 ret = -EFAULT; 1062 goto out_putrport; 1063 } 1064 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 1065 1066 spin_lock_irqsave(&rport->lock, flags); 1067 1068 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 1069 1070 lsop->req_queued = true; 1071 1072 spin_unlock_irqrestore(&rport->lock, flags); 1073 1074 ret = rport->lport->ops->ls_req(&rport->lport->localport, 1075 &rport->remoteport, lsreq); 1076 if (ret) 1077 goto out_unlink; 1078 1079 return 0; 1080 1081 out_unlink: 1082 lsop->ls_error = ret; 1083 spin_lock_irqsave(&rport->lock, flags); 1084 lsop->req_queued = false; 1085 list_del(&lsop->lsreq_list); 1086 spin_unlock_irqrestore(&rport->lock, flags); 1087 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 1088 (lsreq->rqstlen + lsreq->rsplen), 1089 DMA_BIDIRECTIONAL); 1090 out_putrport: 1091 nvme_fc_rport_put(rport); 1092 1093 return ret; 1094 } 1095 1096 static void 1097 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 1098 { 1099 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1100 1101 lsop->ls_error = status; 1102 complete(&lsop->ls_done); 1103 } 1104 1105 static int 1106 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 1107 { 1108 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1109 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 1110 int ret; 1111 1112 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 1113 1114 if (!ret) { 1115 /* 1116 * No timeout/not interruptible as we need the struct 1117 * to exist until the lldd calls us back. Thus mandate 1118 * wait until driver calls back. lldd responsible for 1119 * the timeout action 1120 */ 1121 wait_for_completion(&lsop->ls_done); 1122 1123 __nvme_fc_finish_ls_req(lsop); 1124 1125 ret = lsop->ls_error; 1126 } 1127 1128 if (ret) 1129 return ret; 1130 1131 /* ACC or RJT payload ? */ 1132 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 1133 return -ENXIO; 1134 1135 return 0; 1136 } 1137 1138 static int 1139 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 1140 struct nvmefc_ls_req_op *lsop, 1141 void (*done)(struct nvmefc_ls_req *req, int status)) 1142 { 1143 /* don't wait for completion */ 1144 1145 return __nvme_fc_send_ls_req(rport, lsop, done); 1146 } 1147 1148 /* Validation Error indexes into the string table below */ 1149 enum { 1150 VERR_NO_ERROR = 0, 1151 VERR_LSACC = 1, 1152 VERR_LSDESC_RQST = 2, 1153 VERR_LSDESC_RQST_LEN = 3, 1154 VERR_ASSOC_ID = 4, 1155 VERR_ASSOC_ID_LEN = 5, 1156 VERR_CONN_ID = 6, 1157 VERR_CONN_ID_LEN = 7, 1158 VERR_CR_ASSOC = 8, 1159 VERR_CR_ASSOC_ACC_LEN = 9, 1160 VERR_CR_CONN = 10, 1161 VERR_CR_CONN_ACC_LEN = 11, 1162 VERR_DISCONN = 12, 1163 VERR_DISCONN_ACC_LEN = 13, 1164 }; 1165 1166 static char *validation_errors[] = { 1167 "OK", 1168 "Not LS_ACC", 1169 "Not LSDESC_RQST", 1170 "Bad LSDESC_RQST Length", 1171 "Not Association ID", 1172 "Bad Association ID Length", 1173 "Not Connection ID", 1174 "Bad Connection ID Length", 1175 "Not CR_ASSOC Rqst", 1176 "Bad CR_ASSOC ACC Length", 1177 "Not CR_CONN Rqst", 1178 "Bad CR_CONN ACC Length", 1179 "Not Disconnect Rqst", 1180 "Bad Disconnect ACC Length", 1181 }; 1182 1183 static int 1184 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 1185 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 1186 { 1187 struct nvmefc_ls_req_op *lsop; 1188 struct nvmefc_ls_req *lsreq; 1189 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 1190 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 1191 int ret, fcret = 0; 1192 1193 lsop = kzalloc((sizeof(*lsop) + 1194 ctrl->lport->ops->lsrqst_priv_sz + 1195 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 1196 if (!lsop) { 1197 ret = -ENOMEM; 1198 goto out_no_memory; 1199 } 1200 lsreq = &lsop->ls_req; 1201 1202 lsreq->private = (void *)&lsop[1]; 1203 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 1204 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1205 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 1206 1207 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 1208 assoc_rqst->desc_list_len = 1209 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 1210 1211 assoc_rqst->assoc_cmd.desc_tag = 1212 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 1213 assoc_rqst->assoc_cmd.desc_len = 1214 fcnvme_lsdesc_len( 1215 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 1216 1217 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 1218 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1); 1219 /* Linux supports only Dynamic controllers */ 1220 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 1221 uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); 1222 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 1223 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 1224 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 1225 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 1226 1227 lsop->queue = queue; 1228 lsreq->rqstaddr = assoc_rqst; 1229 lsreq->rqstlen = sizeof(*assoc_rqst); 1230 lsreq->rspaddr = assoc_acc; 1231 lsreq->rsplen = sizeof(*assoc_acc); 1232 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1233 1234 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1235 if (ret) 1236 goto out_free_buffer; 1237 1238 /* process connect LS completion */ 1239 1240 /* validate the ACC response */ 1241 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1242 fcret = VERR_LSACC; 1243 else if (assoc_acc->hdr.desc_list_len != 1244 fcnvme_lsdesc_len( 1245 sizeof(struct fcnvme_ls_cr_assoc_acc))) 1246 fcret = VERR_CR_ASSOC_ACC_LEN; 1247 else if (assoc_acc->hdr.rqst.desc_tag != 1248 cpu_to_be32(FCNVME_LSDESC_RQST)) 1249 fcret = VERR_LSDESC_RQST; 1250 else if (assoc_acc->hdr.rqst.desc_len != 1251 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1252 fcret = VERR_LSDESC_RQST_LEN; 1253 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 1254 fcret = VERR_CR_ASSOC; 1255 else if (assoc_acc->associd.desc_tag != 1256 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 1257 fcret = VERR_ASSOC_ID; 1258 else if (assoc_acc->associd.desc_len != 1259 fcnvme_lsdesc_len( 1260 sizeof(struct fcnvme_lsdesc_assoc_id))) 1261 fcret = VERR_ASSOC_ID_LEN; 1262 else if (assoc_acc->connectid.desc_tag != 1263 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1264 fcret = VERR_CONN_ID; 1265 else if (assoc_acc->connectid.desc_len != 1266 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1267 fcret = VERR_CONN_ID_LEN; 1268 1269 if (fcret) { 1270 ret = -EBADF; 1271 dev_err(ctrl->dev, 1272 "q %d connect failed: %s\n", 1273 queue->qnum, validation_errors[fcret]); 1274 } else { 1275 ctrl->association_id = 1276 be64_to_cpu(assoc_acc->associd.association_id); 1277 queue->connection_id = 1278 be64_to_cpu(assoc_acc->connectid.connection_id); 1279 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1280 } 1281 1282 out_free_buffer: 1283 kfree(lsop); 1284 out_no_memory: 1285 if (ret) 1286 dev_err(ctrl->dev, 1287 "queue %d connect admin queue failed (%d).\n", 1288 queue->qnum, ret); 1289 return ret; 1290 } 1291 1292 static int 1293 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1294 u16 qsize, u16 ersp_ratio) 1295 { 1296 struct nvmefc_ls_req_op *lsop; 1297 struct nvmefc_ls_req *lsreq; 1298 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 1299 struct fcnvme_ls_cr_conn_acc *conn_acc; 1300 int ret, fcret = 0; 1301 1302 lsop = kzalloc((sizeof(*lsop) + 1303 ctrl->lport->ops->lsrqst_priv_sz + 1304 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 1305 if (!lsop) { 1306 ret = -ENOMEM; 1307 goto out_no_memory; 1308 } 1309 lsreq = &lsop->ls_req; 1310 1311 lsreq->private = (void *)&lsop[1]; 1312 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 1313 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1314 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 1315 1316 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 1317 conn_rqst->desc_list_len = cpu_to_be32( 1318 sizeof(struct fcnvme_lsdesc_assoc_id) + 1319 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 1320 1321 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1322 conn_rqst->associd.desc_len = 1323 fcnvme_lsdesc_len( 1324 sizeof(struct fcnvme_lsdesc_assoc_id)); 1325 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1326 conn_rqst->connect_cmd.desc_tag = 1327 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 1328 conn_rqst->connect_cmd.desc_len = 1329 fcnvme_lsdesc_len( 1330 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 1331 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 1332 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 1333 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1); 1334 1335 lsop->queue = queue; 1336 lsreq->rqstaddr = conn_rqst; 1337 lsreq->rqstlen = sizeof(*conn_rqst); 1338 lsreq->rspaddr = conn_acc; 1339 lsreq->rsplen = sizeof(*conn_acc); 1340 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1341 1342 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1343 if (ret) 1344 goto out_free_buffer; 1345 1346 /* process connect LS completion */ 1347 1348 /* validate the ACC response */ 1349 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1350 fcret = VERR_LSACC; 1351 else if (conn_acc->hdr.desc_list_len != 1352 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1353 fcret = VERR_CR_CONN_ACC_LEN; 1354 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1355 fcret = VERR_LSDESC_RQST; 1356 else if (conn_acc->hdr.rqst.desc_len != 1357 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1358 fcret = VERR_LSDESC_RQST_LEN; 1359 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1360 fcret = VERR_CR_CONN; 1361 else if (conn_acc->connectid.desc_tag != 1362 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1363 fcret = VERR_CONN_ID; 1364 else if (conn_acc->connectid.desc_len != 1365 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1366 fcret = VERR_CONN_ID_LEN; 1367 1368 if (fcret) { 1369 ret = -EBADF; 1370 dev_err(ctrl->dev, 1371 "q %d connect failed: %s\n", 1372 queue->qnum, validation_errors[fcret]); 1373 } else { 1374 queue->connection_id = 1375 be64_to_cpu(conn_acc->connectid.connection_id); 1376 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1377 } 1378 1379 out_free_buffer: 1380 kfree(lsop); 1381 out_no_memory: 1382 if (ret) 1383 dev_err(ctrl->dev, 1384 "queue %d connect command failed (%d).\n", 1385 queue->qnum, ret); 1386 return ret; 1387 } 1388 1389 static void 1390 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1391 { 1392 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1393 1394 __nvme_fc_finish_ls_req(lsop); 1395 1396 /* fc-nvme initiator doesn't care about success or failure of cmd */ 1397 1398 kfree(lsop); 1399 } 1400 1401 /* 1402 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1403 * the FC-NVME Association. Terminating the association also 1404 * terminates the FC-NVME connections (per queue, both admin and io 1405 * queues) that are part of the association. E.g. things are torn 1406 * down, and the related FC-NVME Association ID and Connection IDs 1407 * become invalid. 1408 * 1409 * The behavior of the fc-nvme initiator is such that it's 1410 * understanding of the association and connections will implicitly 1411 * be torn down. The action is implicit as it may be due to a loss of 1412 * connectivity with the fc-nvme target, so you may never get a 1413 * response even if you tried. As such, the action of this routine 1414 * is to asynchronously send the LS, ignore any results of the LS, and 1415 * continue on with terminating the association. If the fc-nvme target 1416 * is present and receives the LS, it too can tear down. 1417 */ 1418 static void 1419 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1420 { 1421 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1422 struct fcnvme_ls_disconnect_acc *discon_acc; 1423 struct nvmefc_ls_req_op *lsop; 1424 struct nvmefc_ls_req *lsreq; 1425 int ret; 1426 1427 lsop = kzalloc((sizeof(*lsop) + 1428 ctrl->lport->ops->lsrqst_priv_sz + 1429 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1430 GFP_KERNEL); 1431 if (!lsop) 1432 /* couldn't sent it... too bad */ 1433 return; 1434 1435 lsreq = &lsop->ls_req; 1436 1437 lsreq->private = (void *)&lsop[1]; 1438 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1439 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1440 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1441 1442 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1443 discon_rqst->desc_list_len = cpu_to_be32( 1444 sizeof(struct fcnvme_lsdesc_assoc_id) + 1445 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1446 1447 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1448 discon_rqst->associd.desc_len = 1449 fcnvme_lsdesc_len( 1450 sizeof(struct fcnvme_lsdesc_assoc_id)); 1451 1452 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1453 1454 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1455 FCNVME_LSDESC_DISCONN_CMD); 1456 discon_rqst->discon_cmd.desc_len = 1457 fcnvme_lsdesc_len( 1458 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1459 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1460 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1461 1462 lsreq->rqstaddr = discon_rqst; 1463 lsreq->rqstlen = sizeof(*discon_rqst); 1464 lsreq->rspaddr = discon_acc; 1465 lsreq->rsplen = sizeof(*discon_acc); 1466 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1467 1468 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1469 nvme_fc_disconnect_assoc_done); 1470 if (ret) 1471 kfree(lsop); 1472 1473 /* only meaningful part to terminating the association */ 1474 ctrl->association_id = 0; 1475 } 1476 1477 1478 /* *********************** NVME Ctrl Routines **************************** */ 1479 1480 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1481 1482 static void 1483 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1484 struct nvme_fc_fcp_op *op) 1485 { 1486 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1487 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1488 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1489 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1490 1491 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1492 } 1493 1494 static void 1495 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1496 unsigned int hctx_idx) 1497 { 1498 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1499 1500 return __nvme_fc_exit_request(set->driver_data, op); 1501 } 1502 1503 static int 1504 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1505 { 1506 unsigned long flags; 1507 int opstate; 1508 1509 spin_lock_irqsave(&ctrl->lock, flags); 1510 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1511 if (opstate != FCPOP_STATE_ACTIVE) 1512 atomic_set(&op->state, opstate); 1513 else if (ctrl->flags & FCCTRL_TERMIO) 1514 ctrl->iocnt++; 1515 spin_unlock_irqrestore(&ctrl->lock, flags); 1516 1517 if (opstate != FCPOP_STATE_ACTIVE) 1518 return -ECANCELED; 1519 1520 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1521 &ctrl->rport->remoteport, 1522 op->queue->lldd_handle, 1523 &op->fcp_req); 1524 1525 return 0; 1526 } 1527 1528 static void 1529 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1530 { 1531 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1532 int i; 1533 1534 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) 1535 __nvme_fc_abort_op(ctrl, aen_op); 1536 } 1537 1538 static inline void 1539 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1540 struct nvme_fc_fcp_op *op, int opstate) 1541 { 1542 unsigned long flags; 1543 1544 if (opstate == FCPOP_STATE_ABORTED) { 1545 spin_lock_irqsave(&ctrl->lock, flags); 1546 if (ctrl->flags & FCCTRL_TERMIO) { 1547 if (!--ctrl->iocnt) 1548 wake_up(&ctrl->ioabort_wait); 1549 } 1550 spin_unlock_irqrestore(&ctrl->lock, flags); 1551 } 1552 } 1553 1554 static void 1555 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1556 { 1557 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1558 struct request *rq = op->rq; 1559 struct nvmefc_fcp_req *freq = &op->fcp_req; 1560 struct nvme_fc_ctrl *ctrl = op->ctrl; 1561 struct nvme_fc_queue *queue = op->queue; 1562 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1563 struct nvme_command *sqe = &op->cmd_iu.sqe; 1564 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1565 union nvme_result result; 1566 bool terminate_assoc = true; 1567 int opstate; 1568 1569 /* 1570 * WARNING: 1571 * The current linux implementation of a nvme controller 1572 * allocates a single tag set for all io queues and sizes 1573 * the io queues to fully hold all possible tags. Thus, the 1574 * implementation does not reference or care about the sqhd 1575 * value as it never needs to use the sqhd/sqtail pointers 1576 * for submission pacing. 1577 * 1578 * This affects the FC-NVME implementation in two ways: 1579 * 1) As the value doesn't matter, we don't need to waste 1580 * cycles extracting it from ERSPs and stamping it in the 1581 * cases where the transport fabricates CQEs on successful 1582 * completions. 1583 * 2) The FC-NVME implementation requires that delivery of 1584 * ERSP completions are to go back to the nvme layer in order 1585 * relative to the rsn, such that the sqhd value will always 1586 * be "in order" for the nvme layer. As the nvme layer in 1587 * linux doesn't care about sqhd, there's no need to return 1588 * them in order. 1589 * 1590 * Additionally: 1591 * As the core nvme layer in linux currently does not look at 1592 * every field in the cqe - in cases where the FC transport must 1593 * fabricate a CQE, the following fields will not be set as they 1594 * are not referenced: 1595 * cqe.sqid, cqe.sqhd, cqe.command_id 1596 * 1597 * Failure or error of an individual i/o, in a transport 1598 * detected fashion unrelated to the nvme completion status, 1599 * potentially cause the initiator and target sides to get out 1600 * of sync on SQ head/tail (aka outstanding io count allowed). 1601 * Per FC-NVME spec, failure of an individual command requires 1602 * the connection to be terminated, which in turn requires the 1603 * association to be terminated. 1604 */ 1605 1606 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 1607 1608 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1609 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1610 1611 if (opstate == FCPOP_STATE_ABORTED) 1612 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1613 else if (freq->status) 1614 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1615 1616 /* 1617 * For the linux implementation, if we have an unsuccesful 1618 * status, they blk-mq layer can typically be called with the 1619 * non-zero status and the content of the cqe isn't important. 1620 */ 1621 if (status) 1622 goto done; 1623 1624 /* 1625 * command completed successfully relative to the wire 1626 * protocol. However, validate anything received and 1627 * extract the status and result from the cqe (create it 1628 * where necessary). 1629 */ 1630 1631 switch (freq->rcv_rsplen) { 1632 1633 case 0: 1634 case NVME_FC_SIZEOF_ZEROS_RSP: 1635 /* 1636 * No response payload or 12 bytes of payload (which 1637 * should all be zeros) are considered successful and 1638 * no payload in the CQE by the transport. 1639 */ 1640 if (freq->transferred_length != 1641 be32_to_cpu(op->cmd_iu.data_len)) { 1642 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1643 goto done; 1644 } 1645 result.u64 = 0; 1646 break; 1647 1648 case sizeof(struct nvme_fc_ersp_iu): 1649 /* 1650 * The ERSP IU contains a full completion with CQE. 1651 * Validate ERSP IU and look at cqe. 1652 */ 1653 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1654 (freq->rcv_rsplen / 4) || 1655 be32_to_cpu(op->rsp_iu.xfrd_len) != 1656 freq->transferred_length || 1657 op->rsp_iu.status_code || 1658 sqe->common.command_id != cqe->command_id)) { 1659 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1660 goto done; 1661 } 1662 result = cqe->result; 1663 status = cqe->status; 1664 break; 1665 1666 default: 1667 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1668 goto done; 1669 } 1670 1671 terminate_assoc = false; 1672 1673 done: 1674 if (op->flags & FCOP_FLAGS_AEN) { 1675 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1676 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 1677 atomic_set(&op->state, FCPOP_STATE_IDLE); 1678 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1679 nvme_fc_ctrl_put(ctrl); 1680 goto check_error; 1681 } 1682 1683 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 1684 nvme_end_request(rq, status, result); 1685 1686 check_error: 1687 if (terminate_assoc) 1688 nvme_fc_error_recovery(ctrl, "transport detected io error"); 1689 } 1690 1691 static int 1692 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1693 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1694 struct request *rq, u32 rqno) 1695 { 1696 struct nvme_fcp_op_w_sgl *op_w_sgl = 1697 container_of(op, typeof(*op_w_sgl), op); 1698 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1699 int ret = 0; 1700 1701 memset(op, 0, sizeof(*op)); 1702 op->fcp_req.cmdaddr = &op->cmd_iu; 1703 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1704 op->fcp_req.rspaddr = &op->rsp_iu; 1705 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1706 op->fcp_req.done = nvme_fc_fcpio_done; 1707 op->ctrl = ctrl; 1708 op->queue = queue; 1709 op->rq = rq; 1710 op->rqno = rqno; 1711 1712 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1713 cmdiu->fc_id = NVME_CMD_FC_ID; 1714 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1715 1716 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1717 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1718 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1719 dev_err(ctrl->dev, 1720 "FCP Op failed - cmdiu dma mapping failed.\n"); 1721 ret = EFAULT; 1722 goto out_on_error; 1723 } 1724 1725 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1726 &op->rsp_iu, sizeof(op->rsp_iu), 1727 DMA_FROM_DEVICE); 1728 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1729 dev_err(ctrl->dev, 1730 "FCP Op failed - rspiu dma mapping failed.\n"); 1731 ret = EFAULT; 1732 } 1733 1734 atomic_set(&op->state, FCPOP_STATE_IDLE); 1735 out_on_error: 1736 return ret; 1737 } 1738 1739 static int 1740 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1741 unsigned int hctx_idx, unsigned int numa_node) 1742 { 1743 struct nvme_fc_ctrl *ctrl = set->driver_data; 1744 struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); 1745 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 1746 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; 1747 int res; 1748 1749 nvme_req(rq)->ctrl = &ctrl->ctrl; 1750 res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); 1751 if (res) 1752 return res; 1753 op->op.fcp_req.first_sgl = &op->sgl[0]; 1754 op->op.fcp_req.private = &op->priv[0]; 1755 return res; 1756 } 1757 1758 static int 1759 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1760 { 1761 struct nvme_fc_fcp_op *aen_op; 1762 struct nvme_fc_cmd_iu *cmdiu; 1763 struct nvme_command *sqe; 1764 void *private; 1765 int i, ret; 1766 1767 aen_op = ctrl->aen_ops; 1768 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 1769 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1770 GFP_KERNEL); 1771 if (!private) 1772 return -ENOMEM; 1773 1774 cmdiu = &aen_op->cmd_iu; 1775 sqe = &cmdiu->sqe; 1776 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1777 aen_op, (struct request *)NULL, 1778 (NVME_AQ_BLK_MQ_DEPTH + i)); 1779 if (ret) { 1780 kfree(private); 1781 return ret; 1782 } 1783 1784 aen_op->flags = FCOP_FLAGS_AEN; 1785 aen_op->fcp_req.private = private; 1786 1787 memset(sqe, 0, sizeof(*sqe)); 1788 sqe->common.opcode = nvme_admin_async_event; 1789 /* Note: core layer may overwrite the sqe.command_id value */ 1790 sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i; 1791 } 1792 return 0; 1793 } 1794 1795 static void 1796 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1797 { 1798 struct nvme_fc_fcp_op *aen_op; 1799 int i; 1800 1801 aen_op = ctrl->aen_ops; 1802 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 1803 if (!aen_op->fcp_req.private) 1804 continue; 1805 1806 __nvme_fc_exit_request(ctrl, aen_op); 1807 1808 kfree(aen_op->fcp_req.private); 1809 aen_op->fcp_req.private = NULL; 1810 } 1811 } 1812 1813 static inline void 1814 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1815 unsigned int qidx) 1816 { 1817 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1818 1819 hctx->driver_data = queue; 1820 queue->hctx = hctx; 1821 } 1822 1823 static int 1824 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1825 unsigned int hctx_idx) 1826 { 1827 struct nvme_fc_ctrl *ctrl = data; 1828 1829 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1830 1831 return 0; 1832 } 1833 1834 static int 1835 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1836 unsigned int hctx_idx) 1837 { 1838 struct nvme_fc_ctrl *ctrl = data; 1839 1840 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1841 1842 return 0; 1843 } 1844 1845 static void 1846 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) 1847 { 1848 struct nvme_fc_queue *queue; 1849 1850 queue = &ctrl->queues[idx]; 1851 memset(queue, 0, sizeof(*queue)); 1852 queue->ctrl = ctrl; 1853 queue->qnum = idx; 1854 atomic_set(&queue->csn, 1); 1855 queue->dev = ctrl->dev; 1856 1857 if (idx > 0) 1858 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1859 else 1860 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1861 1862 /* 1863 * Considered whether we should allocate buffers for all SQEs 1864 * and CQEs and dma map them - mapping their respective entries 1865 * into the request structures (kernel vm addr and dma address) 1866 * thus the driver could use the buffers/mappings directly. 1867 * It only makes sense if the LLDD would use them for its 1868 * messaging api. It's very unlikely most adapter api's would use 1869 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1870 * structures were used instead. 1871 */ 1872 } 1873 1874 /* 1875 * This routine terminates a queue at the transport level. 1876 * The transport has already ensured that all outstanding ios on 1877 * the queue have been terminated. 1878 * The transport will send a Disconnect LS request to terminate 1879 * the queue's connection. Termination of the admin queue will also 1880 * terminate the association at the target. 1881 */ 1882 static void 1883 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1884 { 1885 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1886 return; 1887 1888 clear_bit(NVME_FC_Q_LIVE, &queue->flags); 1889 /* 1890 * Current implementation never disconnects a single queue. 1891 * It always terminates a whole association. So there is never 1892 * a disconnect(queue) LS sent to the target. 1893 */ 1894 1895 queue->connection_id = 0; 1896 atomic_set(&queue->csn, 1); 1897 } 1898 1899 static void 1900 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1901 struct nvme_fc_queue *queue, unsigned int qidx) 1902 { 1903 if (ctrl->lport->ops->delete_queue) 1904 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1905 queue->lldd_handle); 1906 queue->lldd_handle = NULL; 1907 } 1908 1909 static void 1910 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1911 { 1912 int i; 1913 1914 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1915 nvme_fc_free_queue(&ctrl->queues[i]); 1916 } 1917 1918 static int 1919 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1920 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1921 { 1922 int ret = 0; 1923 1924 queue->lldd_handle = NULL; 1925 if (ctrl->lport->ops->create_queue) 1926 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1927 qidx, qsize, &queue->lldd_handle); 1928 1929 return ret; 1930 } 1931 1932 static void 1933 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1934 { 1935 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; 1936 int i; 1937 1938 for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) 1939 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1940 } 1941 1942 static int 1943 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1944 { 1945 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1946 int i, ret; 1947 1948 for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { 1949 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1950 if (ret) 1951 goto delete_queues; 1952 } 1953 1954 return 0; 1955 1956 delete_queues: 1957 for (; i >= 0; i--) 1958 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1959 return ret; 1960 } 1961 1962 static int 1963 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1964 { 1965 int i, ret = 0; 1966 1967 for (i = 1; i < ctrl->ctrl.queue_count; i++) { 1968 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1969 (qsize / 5)); 1970 if (ret) 1971 break; 1972 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 1973 if (ret) 1974 break; 1975 1976 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); 1977 } 1978 1979 return ret; 1980 } 1981 1982 static void 1983 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1984 { 1985 int i; 1986 1987 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1988 nvme_fc_init_queue(ctrl, i); 1989 } 1990 1991 static void 1992 nvme_fc_ctrl_free(struct kref *ref) 1993 { 1994 struct nvme_fc_ctrl *ctrl = 1995 container_of(ref, struct nvme_fc_ctrl, ref); 1996 unsigned long flags; 1997 1998 if (ctrl->ctrl.tagset) { 1999 blk_cleanup_queue(ctrl->ctrl.connect_q); 2000 blk_mq_free_tag_set(&ctrl->tag_set); 2001 } 2002 2003 /* remove from rport list */ 2004 spin_lock_irqsave(&ctrl->rport->lock, flags); 2005 list_del(&ctrl->ctrl_list); 2006 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 2007 2008 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2009 blk_cleanup_queue(ctrl->ctrl.admin_q); 2010 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2011 2012 kfree(ctrl->queues); 2013 2014 put_device(ctrl->dev); 2015 nvme_fc_rport_put(ctrl->rport); 2016 2017 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2018 if (ctrl->ctrl.opts) 2019 nvmf_free_options(ctrl->ctrl.opts); 2020 kfree(ctrl); 2021 } 2022 2023 static void 2024 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 2025 { 2026 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 2027 } 2028 2029 static int 2030 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 2031 { 2032 return kref_get_unless_zero(&ctrl->ref); 2033 } 2034 2035 /* 2036 * All accesses from nvme core layer done - can now free the 2037 * controller. Called after last nvme_put_ctrl() call 2038 */ 2039 static void 2040 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 2041 { 2042 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2043 2044 WARN_ON(nctrl != &ctrl->ctrl); 2045 2046 nvme_fc_ctrl_put(ctrl); 2047 } 2048 2049 static void 2050 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 2051 { 2052 /* only proceed if in LIVE state - e.g. on first error */ 2053 if (ctrl->ctrl.state != NVME_CTRL_LIVE) 2054 return; 2055 2056 dev_warn(ctrl->ctrl.device, 2057 "NVME-FC{%d}: transport association error detected: %s\n", 2058 ctrl->cnum, errmsg); 2059 dev_warn(ctrl->ctrl.device, 2060 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 2061 2062 nvme_reset_ctrl(&ctrl->ctrl); 2063 } 2064 2065 static enum blk_eh_timer_return 2066 nvme_fc_timeout(struct request *rq, bool reserved) 2067 { 2068 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2069 struct nvme_fc_ctrl *ctrl = op->ctrl; 2070 2071 /* 2072 * we can't individually ABTS an io without affecting the queue, 2073 * thus killing the queue, and thus the association. 2074 * So resolve by performing a controller reset, which will stop 2075 * the host/io stack, terminate the association on the link, 2076 * and recreate an association on the link. 2077 */ 2078 nvme_fc_error_recovery(ctrl, "io timeout error"); 2079 2080 /* 2081 * the io abort has been initiated. Have the reset timer 2082 * restarted and the abort completion will complete the io 2083 * shortly. Avoids a synchronous wait while the abort finishes. 2084 */ 2085 return BLK_EH_RESET_TIMER; 2086 } 2087 2088 static int 2089 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 2090 struct nvme_fc_fcp_op *op) 2091 { 2092 struct nvmefc_fcp_req *freq = &op->fcp_req; 2093 enum dma_data_direction dir; 2094 int ret; 2095 2096 freq->sg_cnt = 0; 2097 2098 if (!blk_rq_payload_bytes(rq)) 2099 return 0; 2100 2101 freq->sg_table.sgl = freq->first_sgl; 2102 ret = sg_alloc_table_chained(&freq->sg_table, 2103 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 2104 if (ret) 2105 return -ENOMEM; 2106 2107 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 2108 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 2109 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 2110 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 2111 op->nents, dir); 2112 if (unlikely(freq->sg_cnt <= 0)) { 2113 sg_free_table_chained(&freq->sg_table, true); 2114 freq->sg_cnt = 0; 2115 return -EFAULT; 2116 } 2117 2118 /* 2119 * TODO: blk_integrity_rq(rq) for DIF 2120 */ 2121 return 0; 2122 } 2123 2124 static void 2125 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 2126 struct nvme_fc_fcp_op *op) 2127 { 2128 struct nvmefc_fcp_req *freq = &op->fcp_req; 2129 2130 if (!freq->sg_cnt) 2131 return; 2132 2133 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 2134 ((rq_data_dir(rq) == WRITE) ? 2135 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 2136 2137 nvme_cleanup_cmd(rq); 2138 2139 sg_free_table_chained(&freq->sg_table, true); 2140 2141 freq->sg_cnt = 0; 2142 } 2143 2144 /* 2145 * In FC, the queue is a logical thing. At transport connect, the target 2146 * creates its "queue" and returns a handle that is to be given to the 2147 * target whenever it posts something to the corresponding SQ. When an 2148 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 2149 * command contained within the SQE, an io, and assigns a FC exchange 2150 * to it. The SQE and the associated SQ handle are sent in the initial 2151 * CMD IU sents on the exchange. All transfers relative to the io occur 2152 * as part of the exchange. The CQE is the last thing for the io, 2153 * which is transferred (explicitly or implicitly) with the RSP IU 2154 * sent on the exchange. After the CQE is received, the FC exchange is 2155 * terminaed and the Exchange may be used on a different io. 2156 * 2157 * The transport to LLDD api has the transport making a request for a 2158 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 2159 * resource and transfers the command. The LLDD will then process all 2160 * steps to complete the io. Upon completion, the transport done routine 2161 * is called. 2162 * 2163 * So - while the operation is outstanding to the LLDD, there is a link 2164 * level FC exchange resource that is also outstanding. This must be 2165 * considered in all cleanup operations. 2166 */ 2167 static blk_status_t 2168 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 2169 struct nvme_fc_fcp_op *op, u32 data_len, 2170 enum nvmefc_fcp_datadir io_dir) 2171 { 2172 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2173 struct nvme_command *sqe = &cmdiu->sqe; 2174 u32 csn; 2175 int ret, opstate; 2176 2177 /* 2178 * before attempting to send the io, check to see if we believe 2179 * the target device is present 2180 */ 2181 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2182 return BLK_STS_RESOURCE; 2183 2184 if (!nvme_fc_ctrl_get(ctrl)) 2185 return BLK_STS_IOERR; 2186 2187 /* format the FC-NVME CMD IU and fcp_req */ 2188 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 2189 csn = atomic_inc_return(&queue->csn); 2190 cmdiu->csn = cpu_to_be32(csn); 2191 cmdiu->data_len = cpu_to_be32(data_len); 2192 switch (io_dir) { 2193 case NVMEFC_FCP_WRITE: 2194 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 2195 break; 2196 case NVMEFC_FCP_READ: 2197 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 2198 break; 2199 case NVMEFC_FCP_NODATA: 2200 cmdiu->flags = 0; 2201 break; 2202 } 2203 op->fcp_req.payload_length = data_len; 2204 op->fcp_req.io_dir = io_dir; 2205 op->fcp_req.transferred_length = 0; 2206 op->fcp_req.rcv_rsplen = 0; 2207 op->fcp_req.status = NVME_SC_SUCCESS; 2208 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 2209 2210 /* 2211 * validate per fabric rules, set fields mandated by fabric spec 2212 * as well as those by FC-NVME spec. 2213 */ 2214 WARN_ON_ONCE(sqe->common.metadata); 2215 sqe->common.flags |= NVME_CMD_SGL_METABUF; 2216 2217 /* 2218 * format SQE DPTR field per FC-NVME rules: 2219 * type=0x5 Transport SGL Data Block Descriptor 2220 * subtype=0xA Transport-specific value 2221 * address=0 2222 * length=length of the data series 2223 */ 2224 sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | 2225 NVME_SGL_FMT_TRANSPORT_A; 2226 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 2227 sqe->rw.dptr.sgl.addr = 0; 2228 2229 if (!(op->flags & FCOP_FLAGS_AEN)) { 2230 ret = nvme_fc_map_data(ctrl, op->rq, op); 2231 if (ret < 0) { 2232 nvme_cleanup_cmd(op->rq); 2233 nvme_fc_ctrl_put(ctrl); 2234 if (ret == -ENOMEM || ret == -EAGAIN) 2235 return BLK_STS_RESOURCE; 2236 return BLK_STS_IOERR; 2237 } 2238 } 2239 2240 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 2241 sizeof(op->cmd_iu), DMA_TO_DEVICE); 2242 2243 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 2244 2245 if (!(op->flags & FCOP_FLAGS_AEN)) 2246 blk_mq_start_request(op->rq); 2247 2248 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 2249 &ctrl->rport->remoteport, 2250 queue->lldd_handle, &op->fcp_req); 2251 2252 if (ret) { 2253 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 2254 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 2255 2256 if (!(op->flags & FCOP_FLAGS_AEN)) 2257 nvme_fc_unmap_data(ctrl, op->rq, op); 2258 2259 nvme_fc_ctrl_put(ctrl); 2260 2261 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && 2262 ret != -EBUSY) 2263 return BLK_STS_IOERR; 2264 2265 return BLK_STS_RESOURCE; 2266 } 2267 2268 return BLK_STS_OK; 2269 } 2270 2271 static blk_status_t 2272 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 2273 const struct blk_mq_queue_data *bd) 2274 { 2275 struct nvme_ns *ns = hctx->queue->queuedata; 2276 struct nvme_fc_queue *queue = hctx->driver_data; 2277 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2278 struct request *rq = bd->rq; 2279 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2280 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2281 struct nvme_command *sqe = &cmdiu->sqe; 2282 enum nvmefc_fcp_datadir io_dir; 2283 bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); 2284 u32 data_len; 2285 blk_status_t ret; 2286 2287 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 2288 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2289 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); 2290 2291 ret = nvme_setup_cmd(ns, rq, sqe); 2292 if (ret) 2293 return ret; 2294 2295 data_len = blk_rq_payload_bytes(rq); 2296 if (data_len) 2297 io_dir = ((rq_data_dir(rq) == WRITE) ? 2298 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2299 else 2300 io_dir = NVMEFC_FCP_NODATA; 2301 2302 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2303 } 2304 2305 static struct blk_mq_tags * 2306 nvme_fc_tagset(struct nvme_fc_queue *queue) 2307 { 2308 if (queue->qnum == 0) 2309 return queue->ctrl->admin_tag_set.tags[queue->qnum]; 2310 2311 return queue->ctrl->tag_set.tags[queue->qnum - 1]; 2312 } 2313 2314 static int 2315 nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 2316 2317 { 2318 struct nvme_fc_queue *queue = hctx->driver_data; 2319 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2320 struct request *req; 2321 struct nvme_fc_fcp_op *op; 2322 2323 req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); 2324 if (!req) 2325 return 0; 2326 2327 op = blk_mq_rq_to_pdu(req); 2328 2329 if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && 2330 (ctrl->lport->ops->poll_queue)) 2331 ctrl->lport->ops->poll_queue(&ctrl->lport->localport, 2332 queue->lldd_handle); 2333 2334 return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); 2335 } 2336 2337 static void 2338 nvme_fc_submit_async_event(struct nvme_ctrl *arg) 2339 { 2340 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2341 struct nvme_fc_fcp_op *aen_op; 2342 unsigned long flags; 2343 bool terminating = false; 2344 blk_status_t ret; 2345 2346 spin_lock_irqsave(&ctrl->lock, flags); 2347 if (ctrl->flags & FCCTRL_TERMIO) 2348 terminating = true; 2349 spin_unlock_irqrestore(&ctrl->lock, flags); 2350 2351 if (terminating) 2352 return; 2353 2354 aen_op = &ctrl->aen_ops[0]; 2355 2356 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2357 NVMEFC_FCP_NODATA); 2358 if (ret) 2359 dev_err(ctrl->ctrl.device, 2360 "failed async event work\n"); 2361 } 2362 2363 static void 2364 nvme_fc_complete_rq(struct request *rq) 2365 { 2366 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2367 struct nvme_fc_ctrl *ctrl = op->ctrl; 2368 2369 atomic_set(&op->state, FCPOP_STATE_IDLE); 2370 2371 nvme_fc_unmap_data(ctrl, rq, op); 2372 nvme_complete_rq(rq); 2373 nvme_fc_ctrl_put(ctrl); 2374 } 2375 2376 /* 2377 * This routine is used by the transport when it needs to find active 2378 * io on a queue that is to be terminated. The transport uses 2379 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2380 * this routine to kill them on a 1 by 1 basis. 2381 * 2382 * As FC allocates FC exchange for each io, the transport must contact 2383 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2384 * After terminating the exchange the LLDD will call the transport's 2385 * normal io done path for the request, but it will have an aborted 2386 * status. The done path will return the io request back to the block 2387 * layer with an error status. 2388 */ 2389 static void 2390 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2391 { 2392 struct nvme_ctrl *nctrl = data; 2393 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2394 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2395 2396 __nvme_fc_abort_op(ctrl, op); 2397 } 2398 2399 2400 static const struct blk_mq_ops nvme_fc_mq_ops = { 2401 .queue_rq = nvme_fc_queue_rq, 2402 .complete = nvme_fc_complete_rq, 2403 .init_request = nvme_fc_init_request, 2404 .exit_request = nvme_fc_exit_request, 2405 .init_hctx = nvme_fc_init_hctx, 2406 .poll = nvme_fc_poll, 2407 .timeout = nvme_fc_timeout, 2408 }; 2409 2410 static int 2411 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2412 { 2413 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2414 unsigned int nr_io_queues; 2415 int ret; 2416 2417 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2418 ctrl->lport->ops->max_hw_queues); 2419 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2420 if (ret) { 2421 dev_info(ctrl->ctrl.device, 2422 "set_queue_count failed: %d\n", ret); 2423 return ret; 2424 } 2425 2426 ctrl->ctrl.queue_count = nr_io_queues + 1; 2427 if (!nr_io_queues) 2428 return 0; 2429 2430 nvme_fc_init_io_queues(ctrl); 2431 2432 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2433 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2434 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2435 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2436 ctrl->tag_set.numa_node = NUMA_NO_NODE; 2437 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2438 ctrl->tag_set.cmd_size = 2439 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, 2440 ctrl->lport->ops->fcprqst_priv_sz); 2441 ctrl->tag_set.driver_data = ctrl; 2442 ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; 2443 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2444 2445 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2446 if (ret) 2447 return ret; 2448 2449 ctrl->ctrl.tagset = &ctrl->tag_set; 2450 2451 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2452 if (IS_ERR(ctrl->ctrl.connect_q)) { 2453 ret = PTR_ERR(ctrl->ctrl.connect_q); 2454 goto out_free_tag_set; 2455 } 2456 2457 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2458 if (ret) 2459 goto out_cleanup_blk_queue; 2460 2461 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2462 if (ret) 2463 goto out_delete_hw_queues; 2464 2465 ctrl->ioq_live = true; 2466 2467 return 0; 2468 2469 out_delete_hw_queues: 2470 nvme_fc_delete_hw_io_queues(ctrl); 2471 out_cleanup_blk_queue: 2472 blk_cleanup_queue(ctrl->ctrl.connect_q); 2473 out_free_tag_set: 2474 blk_mq_free_tag_set(&ctrl->tag_set); 2475 nvme_fc_free_io_queues(ctrl); 2476 2477 /* force put free routine to ignore io queues */ 2478 ctrl->ctrl.tagset = NULL; 2479 2480 return ret; 2481 } 2482 2483 static int 2484 nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) 2485 { 2486 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2487 unsigned int nr_io_queues; 2488 int ret; 2489 2490 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2491 ctrl->lport->ops->max_hw_queues); 2492 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2493 if (ret) { 2494 dev_info(ctrl->ctrl.device, 2495 "set_queue_count failed: %d\n", ret); 2496 return ret; 2497 } 2498 2499 ctrl->ctrl.queue_count = nr_io_queues + 1; 2500 /* check for io queues existing */ 2501 if (ctrl->ctrl.queue_count == 1) 2502 return 0; 2503 2504 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2505 if (ret) 2506 goto out_free_io_queues; 2507 2508 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2509 if (ret) 2510 goto out_delete_hw_queues; 2511 2512 blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2513 2514 return 0; 2515 2516 out_delete_hw_queues: 2517 nvme_fc_delete_hw_io_queues(ctrl); 2518 out_free_io_queues: 2519 nvme_fc_free_io_queues(ctrl); 2520 return ret; 2521 } 2522 2523 static void 2524 nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport) 2525 { 2526 struct nvme_fc_lport *lport = rport->lport; 2527 2528 atomic_inc(&lport->act_rport_cnt); 2529 } 2530 2531 static void 2532 nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport) 2533 { 2534 struct nvme_fc_lport *lport = rport->lport; 2535 u32 cnt; 2536 2537 cnt = atomic_dec_return(&lport->act_rport_cnt); 2538 if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED) 2539 lport->ops->localport_delete(&lport->localport); 2540 } 2541 2542 static int 2543 nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) 2544 { 2545 struct nvme_fc_rport *rport = ctrl->rport; 2546 u32 cnt; 2547 2548 if (ctrl->assoc_active) 2549 return 1; 2550 2551 ctrl->assoc_active = true; 2552 cnt = atomic_inc_return(&rport->act_ctrl_cnt); 2553 if (cnt == 1) 2554 nvme_fc_rport_active_on_lport(rport); 2555 2556 return 0; 2557 } 2558 2559 static int 2560 nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) 2561 { 2562 struct nvme_fc_rport *rport = ctrl->rport; 2563 struct nvme_fc_lport *lport = rport->lport; 2564 u32 cnt; 2565 2566 /* ctrl->assoc_active=false will be set independently */ 2567 2568 cnt = atomic_dec_return(&rport->act_ctrl_cnt); 2569 if (cnt == 0) { 2570 if (rport->remoteport.port_state == FC_OBJSTATE_DELETED) 2571 lport->ops->remoteport_delete(&rport->remoteport); 2572 nvme_fc_rport_inactive_on_lport(rport); 2573 } 2574 2575 return 0; 2576 } 2577 2578 /* 2579 * This routine restarts the controller on the host side, and 2580 * on the link side, recreates the controller association. 2581 */ 2582 static int 2583 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2584 { 2585 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2586 int ret; 2587 bool changed; 2588 2589 ++ctrl->ctrl.nr_reconnects; 2590 2591 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2592 return -ENODEV; 2593 2594 if (nvme_fc_ctlr_active_on_rport(ctrl)) 2595 return -ENOTUNIQ; 2596 2597 /* 2598 * Create the admin queue 2599 */ 2600 2601 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2602 NVME_AQ_DEPTH); 2603 if (ret) 2604 goto out_free_queue; 2605 2606 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2607 NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4)); 2608 if (ret) 2609 goto out_delete_hw_queue; 2610 2611 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2612 2613 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2614 if (ret) 2615 goto out_disconnect_admin_queue; 2616 2617 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); 2618 2619 /* 2620 * Check controller capabilities 2621 * 2622 * todo:- add code to check if ctrl attributes changed from 2623 * prior connection values 2624 */ 2625 2626 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); 2627 if (ret) { 2628 dev_err(ctrl->ctrl.device, 2629 "prop_get NVME_REG_CAP failed\n"); 2630 goto out_disconnect_admin_queue; 2631 } 2632 2633 ctrl->ctrl.sqsize = 2634 min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); 2635 2636 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 2637 if (ret) 2638 goto out_disconnect_admin_queue; 2639 2640 ctrl->ctrl.max_hw_sectors = 2641 (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9); 2642 2643 ret = nvme_init_identify(&ctrl->ctrl); 2644 if (ret) 2645 goto out_disconnect_admin_queue; 2646 2647 /* sanity checks */ 2648 2649 /* FC-NVME does not have other data in the capsule */ 2650 if (ctrl->ctrl.icdoff) { 2651 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2652 ctrl->ctrl.icdoff); 2653 goto out_disconnect_admin_queue; 2654 } 2655 2656 /* FC-NVME supports normal SGL Data Block Descriptors */ 2657 2658 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2659 /* warn if maxcmd is lower than queue_size */ 2660 dev_warn(ctrl->ctrl.device, 2661 "queue_size %zu > ctrl maxcmd %u, reducing " 2662 "to queue_size\n", 2663 opts->queue_size, ctrl->ctrl.maxcmd); 2664 opts->queue_size = ctrl->ctrl.maxcmd; 2665 } 2666 2667 if (opts->queue_size > ctrl->ctrl.sqsize + 1) { 2668 /* warn if sqsize is lower than queue_size */ 2669 dev_warn(ctrl->ctrl.device, 2670 "queue_size %zu > ctrl sqsize %u, clamping down\n", 2671 opts->queue_size, ctrl->ctrl.sqsize + 1); 2672 opts->queue_size = ctrl->ctrl.sqsize + 1; 2673 } 2674 2675 ret = nvme_fc_init_aen_ops(ctrl); 2676 if (ret) 2677 goto out_term_aen_ops; 2678 2679 /* 2680 * Create the io queues 2681 */ 2682 2683 if (ctrl->ctrl.queue_count > 1) { 2684 if (!ctrl->ioq_live) 2685 ret = nvme_fc_create_io_queues(ctrl); 2686 else 2687 ret = nvme_fc_recreate_io_queues(ctrl); 2688 if (ret) 2689 goto out_term_aen_ops; 2690 } 2691 2692 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2693 2694 ctrl->ctrl.nr_reconnects = 0; 2695 2696 if (changed) 2697 nvme_start_ctrl(&ctrl->ctrl); 2698 2699 return 0; /* Success */ 2700 2701 out_term_aen_ops: 2702 nvme_fc_term_aen_ops(ctrl); 2703 out_disconnect_admin_queue: 2704 /* send a Disconnect(association) LS to fc-nvme target */ 2705 nvme_fc_xmt_disconnect_assoc(ctrl); 2706 out_delete_hw_queue: 2707 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2708 out_free_queue: 2709 nvme_fc_free_queue(&ctrl->queues[0]); 2710 ctrl->assoc_active = false; 2711 nvme_fc_ctlr_inactive_on_rport(ctrl); 2712 2713 return ret; 2714 } 2715 2716 /* 2717 * This routine stops operation of the controller on the host side. 2718 * On the host os stack side: Admin and IO queues are stopped, 2719 * outstanding ios on them terminated via FC ABTS. 2720 * On the link side: the association is terminated. 2721 */ 2722 static void 2723 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2724 { 2725 unsigned long flags; 2726 2727 if (!ctrl->assoc_active) 2728 return; 2729 ctrl->assoc_active = false; 2730 2731 spin_lock_irqsave(&ctrl->lock, flags); 2732 ctrl->flags |= FCCTRL_TERMIO; 2733 ctrl->iocnt = 0; 2734 spin_unlock_irqrestore(&ctrl->lock, flags); 2735 2736 /* 2737 * If io queues are present, stop them and terminate all outstanding 2738 * ios on them. As FC allocates FC exchange for each io, the 2739 * transport must contact the LLDD to terminate the exchange, 2740 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2741 * to tell us what io's are busy and invoke a transport routine 2742 * to kill them with the LLDD. After terminating the exchange 2743 * the LLDD will call the transport's normal io done path, but it 2744 * will have an aborted status. The done path will return the 2745 * io requests back to the block layer as part of normal completions 2746 * (but with error status). 2747 */ 2748 if (ctrl->ctrl.queue_count > 1) { 2749 nvme_stop_queues(&ctrl->ctrl); 2750 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2751 nvme_fc_terminate_exchange, &ctrl->ctrl); 2752 } 2753 2754 /* 2755 * Other transports, which don't have link-level contexts bound 2756 * to sqe's, would try to gracefully shutdown the controller by 2757 * writing the registers for shutdown and polling (call 2758 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2759 * just aborted and we will wait on those contexts, and given 2760 * there was no indication of how live the controlelr is on the 2761 * link, don't send more io to create more contexts for the 2762 * shutdown. Let the controller fail via keepalive failure if 2763 * its still present. 2764 */ 2765 2766 /* 2767 * clean up the admin queue. Same thing as above. 2768 * use blk_mq_tagset_busy_itr() and the transport routine to 2769 * terminate the exchanges. 2770 */ 2771 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2772 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2773 nvme_fc_terminate_exchange, &ctrl->ctrl); 2774 2775 /* kill the aens as they are a separate path */ 2776 nvme_fc_abort_aen_ops(ctrl); 2777 2778 /* wait for all io that had to be aborted */ 2779 spin_lock_irq(&ctrl->lock); 2780 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); 2781 ctrl->flags &= ~FCCTRL_TERMIO; 2782 spin_unlock_irq(&ctrl->lock); 2783 2784 nvme_fc_term_aen_ops(ctrl); 2785 2786 /* 2787 * send a Disconnect(association) LS to fc-nvme target 2788 * Note: could have been sent at top of process, but 2789 * cleaner on link traffic if after the aborts complete. 2790 * Note: if association doesn't exist, association_id will be 0 2791 */ 2792 if (ctrl->association_id) 2793 nvme_fc_xmt_disconnect_assoc(ctrl); 2794 2795 if (ctrl->ctrl.tagset) { 2796 nvme_fc_delete_hw_io_queues(ctrl); 2797 nvme_fc_free_io_queues(ctrl); 2798 } 2799 2800 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2801 nvme_fc_free_queue(&ctrl->queues[0]); 2802 2803 /* re-enable the admin_q so anything new can fast fail */ 2804 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2805 2806 /* resume the io queues so that things will fast fail */ 2807 nvme_start_queues(&ctrl->ctrl); 2808 2809 nvme_fc_ctlr_inactive_on_rport(ctrl); 2810 } 2811 2812 static void 2813 nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) 2814 { 2815 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2816 2817 cancel_delayed_work_sync(&ctrl->connect_work); 2818 /* 2819 * kill the association on the link side. this will block 2820 * waiting for io to terminate 2821 */ 2822 nvme_fc_delete_association(ctrl); 2823 } 2824 2825 static void 2826 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2827 { 2828 struct nvme_fc_rport *rport = ctrl->rport; 2829 struct nvme_fc_remote_port *portptr = &rport->remoteport; 2830 unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; 2831 bool recon = true; 2832 2833 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) 2834 return; 2835 2836 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2837 dev_info(ctrl->ctrl.device, 2838 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2839 ctrl->cnum, status); 2840 else if (time_after_eq(jiffies, rport->dev_loss_end)) 2841 recon = false; 2842 2843 if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { 2844 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2845 dev_info(ctrl->ctrl.device, 2846 "NVME-FC{%d}: Reconnect attempt in %ld " 2847 "seconds\n", 2848 ctrl->cnum, recon_delay / HZ); 2849 else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) 2850 recon_delay = rport->dev_loss_end - jiffies; 2851 2852 queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); 2853 } else { 2854 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2855 dev_warn(ctrl->ctrl.device, 2856 "NVME-FC{%d}: Max reconnect attempts (%d) " 2857 "reached.\n", 2858 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2859 else 2860 dev_warn(ctrl->ctrl.device, 2861 "NVME-FC{%d}: dev_loss_tmo (%d) expired " 2862 "while waiting for remoteport connectivity.\n", 2863 ctrl->cnum, portptr->dev_loss_tmo); 2864 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); 2865 } 2866 } 2867 2868 static void 2869 nvme_fc_reset_ctrl_work(struct work_struct *work) 2870 { 2871 struct nvme_fc_ctrl *ctrl = 2872 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); 2873 int ret; 2874 2875 nvme_stop_ctrl(&ctrl->ctrl); 2876 2877 /* will block will waiting for io to terminate */ 2878 nvme_fc_delete_association(ctrl); 2879 2880 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 2881 dev_err(ctrl->ctrl.device, 2882 "NVME-FC{%d}: error_recovery: Couldn't change state " 2883 "to CONNECTING\n", ctrl->cnum); 2884 return; 2885 } 2886 2887 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) 2888 ret = nvme_fc_create_association(ctrl); 2889 else 2890 ret = -ENOTCONN; 2891 2892 if (ret) 2893 nvme_fc_reconnect_or_delete(ctrl, ret); 2894 else 2895 dev_info(ctrl->ctrl.device, 2896 "NVME-FC{%d}: controller reset complete\n", 2897 ctrl->cnum); 2898 } 2899 2900 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2901 .name = "fc", 2902 .module = THIS_MODULE, 2903 .flags = NVME_F_FABRICS, 2904 .reg_read32 = nvmf_reg_read32, 2905 .reg_read64 = nvmf_reg_read64, 2906 .reg_write32 = nvmf_reg_write32, 2907 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2908 .submit_async_event = nvme_fc_submit_async_event, 2909 .delete_ctrl = nvme_fc_delete_ctrl, 2910 .get_address = nvmf_get_address, 2911 }; 2912 2913 static void 2914 nvme_fc_connect_ctrl_work(struct work_struct *work) 2915 { 2916 int ret; 2917 2918 struct nvme_fc_ctrl *ctrl = 2919 container_of(to_delayed_work(work), 2920 struct nvme_fc_ctrl, connect_work); 2921 2922 ret = nvme_fc_create_association(ctrl); 2923 if (ret) 2924 nvme_fc_reconnect_or_delete(ctrl, ret); 2925 else 2926 dev_info(ctrl->ctrl.device, 2927 "NVME-FC{%d}: controller connect complete\n", 2928 ctrl->cnum); 2929 } 2930 2931 2932 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2933 .queue_rq = nvme_fc_queue_rq, 2934 .complete = nvme_fc_complete_rq, 2935 .init_request = nvme_fc_init_request, 2936 .exit_request = nvme_fc_exit_request, 2937 .init_hctx = nvme_fc_init_admin_hctx, 2938 .timeout = nvme_fc_timeout, 2939 }; 2940 2941 2942 /* 2943 * Fails a controller request if it matches an existing controller 2944 * (association) with the same tuple: 2945 * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN> 2946 * 2947 * The ports don't need to be compared as they are intrinsically 2948 * already matched by the port pointers supplied. 2949 */ 2950 static bool 2951 nvme_fc_existing_controller(struct nvme_fc_rport *rport, 2952 struct nvmf_ctrl_options *opts) 2953 { 2954 struct nvme_fc_ctrl *ctrl; 2955 unsigned long flags; 2956 bool found = false; 2957 2958 spin_lock_irqsave(&rport->lock, flags); 2959 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 2960 found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts); 2961 if (found) 2962 break; 2963 } 2964 spin_unlock_irqrestore(&rport->lock, flags); 2965 2966 return found; 2967 } 2968 2969 static struct nvme_ctrl * 2970 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2971 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 2972 { 2973 struct nvme_fc_ctrl *ctrl; 2974 unsigned long flags; 2975 int ret, idx; 2976 2977 if (!(rport->remoteport.port_role & 2978 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2979 ret = -EBADR; 2980 goto out_fail; 2981 } 2982 2983 if (!opts->duplicate_connect && 2984 nvme_fc_existing_controller(rport, opts)) { 2985 ret = -EALREADY; 2986 goto out_fail; 2987 } 2988 2989 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 2990 if (!ctrl) { 2991 ret = -ENOMEM; 2992 goto out_fail; 2993 } 2994 2995 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 2996 if (idx < 0) { 2997 ret = -ENOSPC; 2998 goto out_free_ctrl; 2999 } 3000 3001 ctrl->ctrl.opts = opts; 3002 ctrl->ctrl.nr_reconnects = 0; 3003 INIT_LIST_HEAD(&ctrl->ctrl_list); 3004 ctrl->lport = lport; 3005 ctrl->rport = rport; 3006 ctrl->dev = lport->dev; 3007 ctrl->cnum = idx; 3008 ctrl->ioq_live = false; 3009 ctrl->assoc_active = false; 3010 init_waitqueue_head(&ctrl->ioabort_wait); 3011 3012 get_device(ctrl->dev); 3013 kref_init(&ctrl->ref); 3014 3015 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 3016 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 3017 spin_lock_init(&ctrl->lock); 3018 3019 /* io queue count */ 3020 ctrl->ctrl.queue_count = min_t(unsigned int, 3021 opts->nr_io_queues, 3022 lport->ops->max_hw_queues); 3023 ctrl->ctrl.queue_count++; /* +1 for admin queue */ 3024 3025 ctrl->ctrl.sqsize = opts->queue_size - 1; 3026 ctrl->ctrl.kato = opts->kato; 3027 ctrl->ctrl.cntlid = 0xffff; 3028 3029 ret = -ENOMEM; 3030 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 3031 sizeof(struct nvme_fc_queue), GFP_KERNEL); 3032 if (!ctrl->queues) 3033 goto out_free_ida; 3034 3035 nvme_fc_init_queue(ctrl, 0); 3036 3037 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3038 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 3039 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 3040 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 3041 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; 3042 ctrl->admin_tag_set.cmd_size = 3043 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, 3044 ctrl->lport->ops->fcprqst_priv_sz); 3045 ctrl->admin_tag_set.driver_data = ctrl; 3046 ctrl->admin_tag_set.nr_hw_queues = 1; 3047 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 3048 ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; 3049 3050 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 3051 if (ret) 3052 goto out_free_queues; 3053 ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; 3054 3055 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 3056 if (IS_ERR(ctrl->ctrl.admin_q)) { 3057 ret = PTR_ERR(ctrl->ctrl.admin_q); 3058 goto out_free_admin_tag_set; 3059 } 3060 3061 /* 3062 * Would have been nice to init io queues tag set as well. 3063 * However, we require interaction from the controller 3064 * for max io queue count before we can do so. 3065 * Defer this to the connect path. 3066 */ 3067 3068 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 3069 if (ret) 3070 goto out_cleanup_admin_q; 3071 3072 /* at this point, teardown path changes to ref counting on nvme ctrl */ 3073 3074 spin_lock_irqsave(&rport->lock, flags); 3075 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 3076 spin_unlock_irqrestore(&rport->lock, flags); 3077 3078 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || 3079 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 3080 dev_err(ctrl->ctrl.device, 3081 "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); 3082 goto fail_ctrl; 3083 } 3084 3085 nvme_get_ctrl(&ctrl->ctrl); 3086 3087 if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { 3088 nvme_put_ctrl(&ctrl->ctrl); 3089 dev_err(ctrl->ctrl.device, 3090 "NVME-FC{%d}: failed to schedule initial connect\n", 3091 ctrl->cnum); 3092 goto fail_ctrl; 3093 } 3094 3095 flush_delayed_work(&ctrl->connect_work); 3096 3097 dev_info(ctrl->ctrl.device, 3098 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 3099 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 3100 3101 return &ctrl->ctrl; 3102 3103 fail_ctrl: 3104 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); 3105 cancel_work_sync(&ctrl->ctrl.reset_work); 3106 cancel_delayed_work_sync(&ctrl->connect_work); 3107 3108 ctrl->ctrl.opts = NULL; 3109 3110 /* initiate nvme ctrl ref counting teardown */ 3111 nvme_uninit_ctrl(&ctrl->ctrl); 3112 3113 /* Remove core ctrl ref. */ 3114 nvme_put_ctrl(&ctrl->ctrl); 3115 3116 /* as we're past the point where we transition to the ref 3117 * counting teardown path, if we return a bad pointer here, 3118 * the calling routine, thinking it's prior to the 3119 * transition, will do an rport put. Since the teardown 3120 * path also does a rport put, we do an extra get here to 3121 * so proper order/teardown happens. 3122 */ 3123 nvme_fc_rport_get(rport); 3124 3125 return ERR_PTR(-EIO); 3126 3127 out_cleanup_admin_q: 3128 blk_cleanup_queue(ctrl->ctrl.admin_q); 3129 out_free_admin_tag_set: 3130 blk_mq_free_tag_set(&ctrl->admin_tag_set); 3131 out_free_queues: 3132 kfree(ctrl->queues); 3133 out_free_ida: 3134 put_device(ctrl->dev); 3135 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 3136 out_free_ctrl: 3137 kfree(ctrl); 3138 out_fail: 3139 /* exit via here doesn't follow ctlr ref points */ 3140 return ERR_PTR(ret); 3141 } 3142 3143 3144 struct nvmet_fc_traddr { 3145 u64 nn; 3146 u64 pn; 3147 }; 3148 3149 static int 3150 __nvme_fc_parse_u64(substring_t *sstr, u64 *val) 3151 { 3152 u64 token64; 3153 3154 if (match_u64(sstr, &token64)) 3155 return -EINVAL; 3156 *val = token64; 3157 3158 return 0; 3159 } 3160 3161 /* 3162 * This routine validates and extracts the WWN's from the TRADDR string. 3163 * As kernel parsers need the 0x to determine number base, universally 3164 * build string to parse with 0x prefix before parsing name strings. 3165 */ 3166 static int 3167 nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) 3168 { 3169 char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; 3170 substring_t wwn = { name, &name[sizeof(name)-1] }; 3171 int nnoffset, pnoffset; 3172 3173 /* validate if string is one of the 2 allowed formats */ 3174 if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && 3175 !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && 3176 !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], 3177 "pn-0x", NVME_FC_TRADDR_OXNNLEN)) { 3178 nnoffset = NVME_FC_TRADDR_OXNNLEN; 3179 pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + 3180 NVME_FC_TRADDR_OXNNLEN; 3181 } else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH && 3182 !strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) && 3183 !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], 3184 "pn-", NVME_FC_TRADDR_NNLEN))) { 3185 nnoffset = NVME_FC_TRADDR_NNLEN; 3186 pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; 3187 } else 3188 goto out_einval; 3189 3190 name[0] = '0'; 3191 name[1] = 'x'; 3192 name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; 3193 3194 memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); 3195 if (__nvme_fc_parse_u64(&wwn, &traddr->nn)) 3196 goto out_einval; 3197 3198 memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); 3199 if (__nvme_fc_parse_u64(&wwn, &traddr->pn)) 3200 goto out_einval; 3201 3202 return 0; 3203 3204 out_einval: 3205 pr_warn("%s: bad traddr string\n", __func__); 3206 return -EINVAL; 3207 } 3208 3209 static struct nvme_ctrl * 3210 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 3211 { 3212 struct nvme_fc_lport *lport; 3213 struct nvme_fc_rport *rport; 3214 struct nvme_ctrl *ctrl; 3215 struct nvmet_fc_traddr laddr = { 0L, 0L }; 3216 struct nvmet_fc_traddr raddr = { 0L, 0L }; 3217 unsigned long flags; 3218 int ret; 3219 3220 ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE); 3221 if (ret || !raddr.nn || !raddr.pn) 3222 return ERR_PTR(-EINVAL); 3223 3224 ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE); 3225 if (ret || !laddr.nn || !laddr.pn) 3226 return ERR_PTR(-EINVAL); 3227 3228 /* find the host and remote ports to connect together */ 3229 spin_lock_irqsave(&nvme_fc_lock, flags); 3230 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 3231 if (lport->localport.node_name != laddr.nn || 3232 lport->localport.port_name != laddr.pn) 3233 continue; 3234 3235 list_for_each_entry(rport, &lport->endp_list, endp_list) { 3236 if (rport->remoteport.node_name != raddr.nn || 3237 rport->remoteport.port_name != raddr.pn) 3238 continue; 3239 3240 /* if fail to get reference fall through. Will error */ 3241 if (!nvme_fc_rport_get(rport)) 3242 break; 3243 3244 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3245 3246 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 3247 if (IS_ERR(ctrl)) 3248 nvme_fc_rport_put(rport); 3249 return ctrl; 3250 } 3251 } 3252 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3253 3254 pr_warn("%s: %s - %s combination not found\n", 3255 __func__, opts->traddr, opts->host_traddr); 3256 return ERR_PTR(-ENOENT); 3257 } 3258 3259 3260 static struct nvmf_transport_ops nvme_fc_transport = { 3261 .name = "fc", 3262 .module = THIS_MODULE, 3263 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 3264 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 3265 .create_ctrl = nvme_fc_create_ctrl, 3266 }; 3267 3268 /* Arbitrary successive failures max. With lots of subsystems could be high */ 3269 #define DISCOVERY_MAX_FAIL 20 3270 3271 static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, 3272 struct device_attribute *attr, const char *buf, size_t count) 3273 { 3274 unsigned long flags; 3275 LIST_HEAD(local_disc_list); 3276 struct nvme_fc_lport *lport; 3277 struct nvme_fc_rport *rport; 3278 int failcnt = 0; 3279 3280 spin_lock_irqsave(&nvme_fc_lock, flags); 3281 restart: 3282 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 3283 list_for_each_entry(rport, &lport->endp_list, endp_list) { 3284 if (!nvme_fc_lport_get(lport)) 3285 continue; 3286 if (!nvme_fc_rport_get(rport)) { 3287 /* 3288 * This is a temporary condition. Upon restart 3289 * this rport will be gone from the list. 3290 * 3291 * Revert the lport put and retry. Anything 3292 * added to the list already will be skipped (as 3293 * they are no longer list_empty). Loops should 3294 * resume at rports that were not yet seen. 3295 */ 3296 nvme_fc_lport_put(lport); 3297 3298 if (failcnt++ < DISCOVERY_MAX_FAIL) 3299 goto restart; 3300 3301 pr_err("nvme_discovery: too many reference " 3302 "failures\n"); 3303 goto process_local_list; 3304 } 3305 if (list_empty(&rport->disc_list)) 3306 list_add_tail(&rport->disc_list, 3307 &local_disc_list); 3308 } 3309 } 3310 3311 process_local_list: 3312 while (!list_empty(&local_disc_list)) { 3313 rport = list_first_entry(&local_disc_list, 3314 struct nvme_fc_rport, disc_list); 3315 list_del_init(&rport->disc_list); 3316 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3317 3318 lport = rport->lport; 3319 /* signal discovery. Won't hurt if it repeats */ 3320 nvme_fc_signal_discovery_scan(lport, rport); 3321 nvme_fc_rport_put(rport); 3322 nvme_fc_lport_put(lport); 3323 3324 spin_lock_irqsave(&nvme_fc_lock, flags); 3325 } 3326 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3327 3328 return count; 3329 } 3330 static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); 3331 3332 static struct attribute *nvme_fc_attrs[] = { 3333 &dev_attr_nvme_discovery.attr, 3334 NULL 3335 }; 3336 3337 static struct attribute_group nvme_fc_attr_group = { 3338 .attrs = nvme_fc_attrs, 3339 }; 3340 3341 static const struct attribute_group *nvme_fc_attr_groups[] = { 3342 &nvme_fc_attr_group, 3343 NULL 3344 }; 3345 3346 static struct class fc_class = { 3347 .name = "fc", 3348 .dev_groups = nvme_fc_attr_groups, 3349 .owner = THIS_MODULE, 3350 }; 3351 3352 static int __init nvme_fc_init_module(void) 3353 { 3354 int ret; 3355 3356 /* 3357 * NOTE: 3358 * It is expected that in the future the kernel will combine 3359 * the FC-isms that are currently under scsi and now being 3360 * added to by NVME into a new standalone FC class. The SCSI 3361 * and NVME protocols and their devices would be under this 3362 * new FC class. 3363 * 3364 * As we need something to post FC-specific udev events to, 3365 * specifically for nvme probe events, start by creating the 3366 * new device class. When the new standalone FC class is 3367 * put in place, this code will move to a more generic 3368 * location for the class. 3369 */ 3370 ret = class_register(&fc_class); 3371 if (ret) { 3372 pr_err("couldn't register class fc\n"); 3373 return ret; 3374 } 3375 3376 /* 3377 * Create a device for the FC-centric udev events 3378 */ 3379 fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, 3380 "fc_udev_device"); 3381 if (IS_ERR(fc_udev_device)) { 3382 pr_err("couldn't create fc_udev device!\n"); 3383 ret = PTR_ERR(fc_udev_device); 3384 goto out_destroy_class; 3385 } 3386 3387 ret = nvmf_register_transport(&nvme_fc_transport); 3388 if (ret) 3389 goto out_destroy_device; 3390 3391 return 0; 3392 3393 out_destroy_device: 3394 device_destroy(&fc_class, MKDEV(0, 0)); 3395 out_destroy_class: 3396 class_unregister(&fc_class); 3397 return ret; 3398 } 3399 3400 static void __exit nvme_fc_exit_module(void) 3401 { 3402 /* sanity check - all lports should be removed */ 3403 if (!list_empty(&nvme_fc_lport_list)) 3404 pr_warn("%s: localport list not empty\n", __func__); 3405 3406 nvmf_unregister_transport(&nvme_fc_transport); 3407 3408 ida_destroy(&nvme_fc_local_port_cnt); 3409 ida_destroy(&nvme_fc_ctrl_cnt); 3410 3411 device_destroy(&fc_class, MKDEV(0, 0)); 3412 class_unregister(&fc_class); 3413 } 3414 3415 module_init(nvme_fc_init_module); 3416 module_exit(nvme_fc_exit_module); 3417 3418 MODULE_LICENSE("GPL v2"); 3419