1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 #include <linux/overflow.h> 24 25 #include "nvme.h" 26 #include "fabrics.h" 27 #include <linux/nvme-fc-driver.h> 28 #include <linux/nvme-fc.h> 29 30 31 /* *************************** Data Structures/Defines ****************** */ 32 33 34 enum nvme_fc_queue_flags { 35 NVME_FC_Q_CONNECTED = 0, 36 NVME_FC_Q_LIVE, 37 }; 38 39 #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ 40 41 struct nvme_fc_queue { 42 struct nvme_fc_ctrl *ctrl; 43 struct device *dev; 44 struct blk_mq_hw_ctx *hctx; 45 void *lldd_handle; 46 size_t cmnd_capsule_len; 47 u32 qnum; 48 u32 rqcnt; 49 u32 seqno; 50 51 u64 connection_id; 52 atomic_t csn; 53 54 unsigned long flags; 55 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 56 57 enum nvme_fcop_flags { 58 FCOP_FLAGS_TERMIO = (1 << 0), 59 FCOP_FLAGS_AEN = (1 << 1), 60 }; 61 62 struct nvmefc_ls_req_op { 63 struct nvmefc_ls_req ls_req; 64 65 struct nvme_fc_rport *rport; 66 struct nvme_fc_queue *queue; 67 struct request *rq; 68 u32 flags; 69 70 int ls_error; 71 struct completion ls_done; 72 struct list_head lsreq_list; /* rport->ls_req_list */ 73 bool req_queued; 74 }; 75 76 enum nvme_fcpop_state { 77 FCPOP_STATE_UNINIT = 0, 78 FCPOP_STATE_IDLE = 1, 79 FCPOP_STATE_ACTIVE = 2, 80 FCPOP_STATE_ABORTED = 3, 81 FCPOP_STATE_COMPLETE = 4, 82 }; 83 84 struct nvme_fc_fcp_op { 85 struct nvme_request nreq; /* 86 * nvme/host/core.c 87 * requires this to be 88 * the 1st element in the 89 * private structure 90 * associated with the 91 * request. 92 */ 93 struct nvmefc_fcp_req fcp_req; 94 95 struct nvme_fc_ctrl *ctrl; 96 struct nvme_fc_queue *queue; 97 struct request *rq; 98 99 atomic_t state; 100 u32 flags; 101 u32 rqno; 102 u32 nents; 103 104 struct nvme_fc_cmd_iu cmd_iu; 105 struct nvme_fc_ersp_iu rsp_iu; 106 }; 107 108 struct nvme_fcp_op_w_sgl { 109 struct nvme_fc_fcp_op op; 110 struct scatterlist sgl[SG_CHUNK_SIZE]; 111 uint8_t priv[0]; 112 }; 113 114 struct nvme_fc_lport { 115 struct nvme_fc_local_port localport; 116 117 struct ida endp_cnt; 118 struct list_head port_list; /* nvme_fc_port_list */ 119 struct list_head endp_list; 120 struct device *dev; /* physical device for dma */ 121 struct nvme_fc_port_template *ops; 122 struct kref ref; 123 atomic_t act_rport_cnt; 124 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 125 126 struct nvme_fc_rport { 127 struct nvme_fc_remote_port remoteport; 128 129 struct list_head endp_list; /* for lport->endp_list */ 130 struct list_head ctrl_list; 131 struct list_head ls_req_list; 132 struct list_head disc_list; 133 struct device *dev; /* physical device for dma */ 134 struct nvme_fc_lport *lport; 135 spinlock_t lock; 136 struct kref ref; 137 atomic_t act_ctrl_cnt; 138 unsigned long dev_loss_end; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 cnum; 152 153 bool ioq_live; 154 bool assoc_active; 155 atomic_t err_work_active; 156 u64 association_id; 157 158 struct list_head ctrl_list; /* rport->ctrl_list */ 159 160 struct blk_mq_tag_set admin_tag_set; 161 struct blk_mq_tag_set tag_set; 162 163 struct delayed_work connect_work; 164 struct work_struct err_work; 165 166 struct kref ref; 167 u32 flags; 168 u32 iocnt; 169 wait_queue_head_t ioabort_wait; 170 171 struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS]; 172 173 struct nvme_ctrl ctrl; 174 }; 175 176 static inline struct nvme_fc_ctrl * 177 to_fc_ctrl(struct nvme_ctrl *ctrl) 178 { 179 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 180 } 181 182 static inline struct nvme_fc_lport * 183 localport_to_lport(struct nvme_fc_local_port *portptr) 184 { 185 return container_of(portptr, struct nvme_fc_lport, localport); 186 } 187 188 static inline struct nvme_fc_rport * 189 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 190 { 191 return container_of(portptr, struct nvme_fc_rport, remoteport); 192 } 193 194 static inline struct nvmefc_ls_req_op * 195 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 196 { 197 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 198 } 199 200 static inline struct nvme_fc_fcp_op * 201 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 202 { 203 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 204 } 205 206 207 208 /* *************************** Globals **************************** */ 209 210 211 static DEFINE_SPINLOCK(nvme_fc_lock); 212 213 static LIST_HEAD(nvme_fc_lport_list); 214 static DEFINE_IDA(nvme_fc_local_port_cnt); 215 static DEFINE_IDA(nvme_fc_ctrl_cnt); 216 217 218 219 /* 220 * These items are short-term. They will eventually be moved into 221 * a generic FC class. See comments in module init. 222 */ 223 static struct device *fc_udev_device; 224 225 226 /* *********************** FC-NVME Port Management ************************ */ 227 228 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 229 struct nvme_fc_queue *, unsigned int); 230 231 static void 232 nvme_fc_free_lport(struct kref *ref) 233 { 234 struct nvme_fc_lport *lport = 235 container_of(ref, struct nvme_fc_lport, ref); 236 unsigned long flags; 237 238 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 239 WARN_ON(!list_empty(&lport->endp_list)); 240 241 /* remove from transport list */ 242 spin_lock_irqsave(&nvme_fc_lock, flags); 243 list_del(&lport->port_list); 244 spin_unlock_irqrestore(&nvme_fc_lock, flags); 245 246 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 247 ida_destroy(&lport->endp_cnt); 248 249 put_device(lport->dev); 250 251 kfree(lport); 252 } 253 254 static void 255 nvme_fc_lport_put(struct nvme_fc_lport *lport) 256 { 257 kref_put(&lport->ref, nvme_fc_free_lport); 258 } 259 260 static int 261 nvme_fc_lport_get(struct nvme_fc_lport *lport) 262 { 263 return kref_get_unless_zero(&lport->ref); 264 } 265 266 267 static struct nvme_fc_lport * 268 nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo, 269 struct nvme_fc_port_template *ops, 270 struct device *dev) 271 { 272 struct nvme_fc_lport *lport; 273 unsigned long flags; 274 275 spin_lock_irqsave(&nvme_fc_lock, flags); 276 277 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 278 if (lport->localport.node_name != pinfo->node_name || 279 lport->localport.port_name != pinfo->port_name) 280 continue; 281 282 if (lport->dev != dev) { 283 lport = ERR_PTR(-EXDEV); 284 goto out_done; 285 } 286 287 if (lport->localport.port_state != FC_OBJSTATE_DELETED) { 288 lport = ERR_PTR(-EEXIST); 289 goto out_done; 290 } 291 292 if (!nvme_fc_lport_get(lport)) { 293 /* 294 * fails if ref cnt already 0. If so, 295 * act as if lport already deleted 296 */ 297 lport = NULL; 298 goto out_done; 299 } 300 301 /* resume the lport */ 302 303 lport->ops = ops; 304 lport->localport.port_role = pinfo->port_role; 305 lport->localport.port_id = pinfo->port_id; 306 lport->localport.port_state = FC_OBJSTATE_ONLINE; 307 308 spin_unlock_irqrestore(&nvme_fc_lock, flags); 309 310 return lport; 311 } 312 313 lport = NULL; 314 315 out_done: 316 spin_unlock_irqrestore(&nvme_fc_lock, flags); 317 318 return lport; 319 } 320 321 /** 322 * nvme_fc_register_localport - transport entry point called by an 323 * LLDD to register the existence of a NVME 324 * host FC port. 325 * @pinfo: pointer to information about the port to be registered 326 * @template: LLDD entrypoints and operational parameters for the port 327 * @dev: physical hardware device node port corresponds to. Will be 328 * used for DMA mappings 329 * @portptr: pointer to a local port pointer. Upon success, the routine 330 * will allocate a nvme_fc_local_port structure and place its 331 * address in the local port pointer. Upon failure, local port 332 * pointer will be set to 0. 333 * 334 * Returns: 335 * a completion status. Must be 0 upon success; a negative errno 336 * (ex: -ENXIO) upon failure. 337 */ 338 int 339 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 340 struct nvme_fc_port_template *template, 341 struct device *dev, 342 struct nvme_fc_local_port **portptr) 343 { 344 struct nvme_fc_lport *newrec; 345 unsigned long flags; 346 int ret, idx; 347 348 if (!template->localport_delete || !template->remoteport_delete || 349 !template->ls_req || !template->fcp_io || 350 !template->ls_abort || !template->fcp_abort || 351 !template->max_hw_queues || !template->max_sgl_segments || 352 !template->max_dif_sgl_segments || !template->dma_boundary) { 353 ret = -EINVAL; 354 goto out_reghost_failed; 355 } 356 357 /* 358 * look to see if there is already a localport that had been 359 * deregistered and in the process of waiting for all the 360 * references to fully be removed. If the references haven't 361 * expired, we can simply re-enable the localport. Remoteports 362 * and controller reconnections should resume naturally. 363 */ 364 newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev); 365 366 /* found an lport, but something about its state is bad */ 367 if (IS_ERR(newrec)) { 368 ret = PTR_ERR(newrec); 369 goto out_reghost_failed; 370 371 /* found existing lport, which was resumed */ 372 } else if (newrec) { 373 *portptr = &newrec->localport; 374 return 0; 375 } 376 377 /* nothing found - allocate a new localport struct */ 378 379 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 380 GFP_KERNEL); 381 if (!newrec) { 382 ret = -ENOMEM; 383 goto out_reghost_failed; 384 } 385 386 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 387 if (idx < 0) { 388 ret = -ENOSPC; 389 goto out_fail_kfree; 390 } 391 392 if (!get_device(dev) && dev) { 393 ret = -ENODEV; 394 goto out_ida_put; 395 } 396 397 INIT_LIST_HEAD(&newrec->port_list); 398 INIT_LIST_HEAD(&newrec->endp_list); 399 kref_init(&newrec->ref); 400 atomic_set(&newrec->act_rport_cnt, 0); 401 newrec->ops = template; 402 newrec->dev = dev; 403 ida_init(&newrec->endp_cnt); 404 newrec->localport.private = &newrec[1]; 405 newrec->localport.node_name = pinfo->node_name; 406 newrec->localport.port_name = pinfo->port_name; 407 newrec->localport.port_role = pinfo->port_role; 408 newrec->localport.port_id = pinfo->port_id; 409 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 410 newrec->localport.port_num = idx; 411 412 spin_lock_irqsave(&nvme_fc_lock, flags); 413 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 414 spin_unlock_irqrestore(&nvme_fc_lock, flags); 415 416 if (dev) 417 dma_set_seg_boundary(dev, template->dma_boundary); 418 419 *portptr = &newrec->localport; 420 return 0; 421 422 out_ida_put: 423 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 424 out_fail_kfree: 425 kfree(newrec); 426 out_reghost_failed: 427 *portptr = NULL; 428 429 return ret; 430 } 431 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 432 433 /** 434 * nvme_fc_unregister_localport - transport entry point called by an 435 * LLDD to deregister/remove a previously 436 * registered a NVME host FC port. 437 * @portptr: pointer to the (registered) local port that is to be deregistered. 438 * 439 * Returns: 440 * a completion status. Must be 0 upon success; a negative errno 441 * (ex: -ENXIO) upon failure. 442 */ 443 int 444 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 445 { 446 struct nvme_fc_lport *lport = localport_to_lport(portptr); 447 unsigned long flags; 448 449 if (!portptr) 450 return -EINVAL; 451 452 spin_lock_irqsave(&nvme_fc_lock, flags); 453 454 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 455 spin_unlock_irqrestore(&nvme_fc_lock, flags); 456 return -EINVAL; 457 } 458 portptr->port_state = FC_OBJSTATE_DELETED; 459 460 spin_unlock_irqrestore(&nvme_fc_lock, flags); 461 462 if (atomic_read(&lport->act_rport_cnt) == 0) 463 lport->ops->localport_delete(&lport->localport); 464 465 nvme_fc_lport_put(lport); 466 467 return 0; 468 } 469 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 470 471 /* 472 * TRADDR strings, per FC-NVME are fixed format: 473 * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters 474 * udev event will only differ by prefix of what field is 475 * being specified: 476 * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters 477 * 19 + 43 + null_fudge = 64 characters 478 */ 479 #define FCNVME_TRADDR_LENGTH 64 480 481 static void 482 nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, 483 struct nvme_fc_rport *rport) 484 { 485 char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ 486 char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ 487 char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL }; 488 489 if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) 490 return; 491 492 snprintf(hostaddr, sizeof(hostaddr), 493 "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx", 494 lport->localport.node_name, lport->localport.port_name); 495 snprintf(tgtaddr, sizeof(tgtaddr), 496 "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx", 497 rport->remoteport.node_name, rport->remoteport.port_name); 498 kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); 499 } 500 501 static void 502 nvme_fc_free_rport(struct kref *ref) 503 { 504 struct nvme_fc_rport *rport = 505 container_of(ref, struct nvme_fc_rport, ref); 506 struct nvme_fc_lport *lport = 507 localport_to_lport(rport->remoteport.localport); 508 unsigned long flags; 509 510 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 511 WARN_ON(!list_empty(&rport->ctrl_list)); 512 513 /* remove from lport list */ 514 spin_lock_irqsave(&nvme_fc_lock, flags); 515 list_del(&rport->endp_list); 516 spin_unlock_irqrestore(&nvme_fc_lock, flags); 517 518 WARN_ON(!list_empty(&rport->disc_list)); 519 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 520 521 kfree(rport); 522 523 nvme_fc_lport_put(lport); 524 } 525 526 static void 527 nvme_fc_rport_put(struct nvme_fc_rport *rport) 528 { 529 kref_put(&rport->ref, nvme_fc_free_rport); 530 } 531 532 static int 533 nvme_fc_rport_get(struct nvme_fc_rport *rport) 534 { 535 return kref_get_unless_zero(&rport->ref); 536 } 537 538 static void 539 nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) 540 { 541 switch (ctrl->ctrl.state) { 542 case NVME_CTRL_NEW: 543 case NVME_CTRL_CONNECTING: 544 /* 545 * As all reconnects were suppressed, schedule a 546 * connect. 547 */ 548 dev_info(ctrl->ctrl.device, 549 "NVME-FC{%d}: connectivity re-established. " 550 "Attempting reconnect\n", ctrl->cnum); 551 552 queue_delayed_work(nvme_wq, &ctrl->connect_work, 0); 553 break; 554 555 case NVME_CTRL_RESETTING: 556 /* 557 * Controller is already in the process of terminating the 558 * association. No need to do anything further. The reconnect 559 * step will naturally occur after the reset completes. 560 */ 561 break; 562 563 default: 564 /* no action to take - let it delete */ 565 break; 566 } 567 } 568 569 static struct nvme_fc_rport * 570 nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, 571 struct nvme_fc_port_info *pinfo) 572 { 573 struct nvme_fc_rport *rport; 574 struct nvme_fc_ctrl *ctrl; 575 unsigned long flags; 576 577 spin_lock_irqsave(&nvme_fc_lock, flags); 578 579 list_for_each_entry(rport, &lport->endp_list, endp_list) { 580 if (rport->remoteport.node_name != pinfo->node_name || 581 rport->remoteport.port_name != pinfo->port_name) 582 continue; 583 584 if (!nvme_fc_rport_get(rport)) { 585 rport = ERR_PTR(-ENOLCK); 586 goto out_done; 587 } 588 589 spin_unlock_irqrestore(&nvme_fc_lock, flags); 590 591 spin_lock_irqsave(&rport->lock, flags); 592 593 /* has it been unregistered */ 594 if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { 595 /* means lldd called us twice */ 596 spin_unlock_irqrestore(&rport->lock, flags); 597 nvme_fc_rport_put(rport); 598 return ERR_PTR(-ESTALE); 599 } 600 601 rport->remoteport.port_role = pinfo->port_role; 602 rport->remoteport.port_id = pinfo->port_id; 603 rport->remoteport.port_state = FC_OBJSTATE_ONLINE; 604 rport->dev_loss_end = 0; 605 606 /* 607 * kick off a reconnect attempt on all associations to the 608 * remote port. A successful reconnects will resume i/o. 609 */ 610 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 611 nvme_fc_resume_controller(ctrl); 612 613 spin_unlock_irqrestore(&rport->lock, flags); 614 615 return rport; 616 } 617 618 rport = NULL; 619 620 out_done: 621 spin_unlock_irqrestore(&nvme_fc_lock, flags); 622 623 return rport; 624 } 625 626 static inline void 627 __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, 628 struct nvme_fc_port_info *pinfo) 629 { 630 if (pinfo->dev_loss_tmo) 631 rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; 632 else 633 rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; 634 } 635 636 /** 637 * nvme_fc_register_remoteport - transport entry point called by an 638 * LLDD to register the existence of a NVME 639 * subsystem FC port on its fabric. 640 * @localport: pointer to the (registered) local port that the remote 641 * subsystem port is connected to. 642 * @pinfo: pointer to information about the port to be registered 643 * @portptr: pointer to a remote port pointer. Upon success, the routine 644 * will allocate a nvme_fc_remote_port structure and place its 645 * address in the remote port pointer. Upon failure, remote port 646 * pointer will be set to 0. 647 * 648 * Returns: 649 * a completion status. Must be 0 upon success; a negative errno 650 * (ex: -ENXIO) upon failure. 651 */ 652 int 653 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 654 struct nvme_fc_port_info *pinfo, 655 struct nvme_fc_remote_port **portptr) 656 { 657 struct nvme_fc_lport *lport = localport_to_lport(localport); 658 struct nvme_fc_rport *newrec; 659 unsigned long flags; 660 int ret, idx; 661 662 if (!nvme_fc_lport_get(lport)) { 663 ret = -ESHUTDOWN; 664 goto out_reghost_failed; 665 } 666 667 /* 668 * look to see if there is already a remoteport that is waiting 669 * for a reconnect (within dev_loss_tmo) with the same WWN's. 670 * If so, transition to it and reconnect. 671 */ 672 newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); 673 674 /* found an rport, but something about its state is bad */ 675 if (IS_ERR(newrec)) { 676 ret = PTR_ERR(newrec); 677 goto out_lport_put; 678 679 /* found existing rport, which was resumed */ 680 } else if (newrec) { 681 nvme_fc_lport_put(lport); 682 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 683 nvme_fc_signal_discovery_scan(lport, newrec); 684 *portptr = &newrec->remoteport; 685 return 0; 686 } 687 688 /* nothing found - allocate a new remoteport struct */ 689 690 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 691 GFP_KERNEL); 692 if (!newrec) { 693 ret = -ENOMEM; 694 goto out_lport_put; 695 } 696 697 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 698 if (idx < 0) { 699 ret = -ENOSPC; 700 goto out_kfree_rport; 701 } 702 703 INIT_LIST_HEAD(&newrec->endp_list); 704 INIT_LIST_HEAD(&newrec->ctrl_list); 705 INIT_LIST_HEAD(&newrec->ls_req_list); 706 INIT_LIST_HEAD(&newrec->disc_list); 707 kref_init(&newrec->ref); 708 atomic_set(&newrec->act_ctrl_cnt, 0); 709 spin_lock_init(&newrec->lock); 710 newrec->remoteport.localport = &lport->localport; 711 newrec->dev = lport->dev; 712 newrec->lport = lport; 713 newrec->remoteport.private = &newrec[1]; 714 newrec->remoteport.port_role = pinfo->port_role; 715 newrec->remoteport.node_name = pinfo->node_name; 716 newrec->remoteport.port_name = pinfo->port_name; 717 newrec->remoteport.port_id = pinfo->port_id; 718 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 719 newrec->remoteport.port_num = idx; 720 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 721 722 spin_lock_irqsave(&nvme_fc_lock, flags); 723 list_add_tail(&newrec->endp_list, &lport->endp_list); 724 spin_unlock_irqrestore(&nvme_fc_lock, flags); 725 726 nvme_fc_signal_discovery_scan(lport, newrec); 727 728 *portptr = &newrec->remoteport; 729 return 0; 730 731 out_kfree_rport: 732 kfree(newrec); 733 out_lport_put: 734 nvme_fc_lport_put(lport); 735 out_reghost_failed: 736 *portptr = NULL; 737 return ret; 738 } 739 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 740 741 static int 742 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 743 { 744 struct nvmefc_ls_req_op *lsop; 745 unsigned long flags; 746 747 restart: 748 spin_lock_irqsave(&rport->lock, flags); 749 750 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 751 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 752 lsop->flags |= FCOP_FLAGS_TERMIO; 753 spin_unlock_irqrestore(&rport->lock, flags); 754 rport->lport->ops->ls_abort(&rport->lport->localport, 755 &rport->remoteport, 756 &lsop->ls_req); 757 goto restart; 758 } 759 } 760 spin_unlock_irqrestore(&rport->lock, flags); 761 762 return 0; 763 } 764 765 static void 766 nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) 767 { 768 dev_info(ctrl->ctrl.device, 769 "NVME-FC{%d}: controller connectivity lost. Awaiting " 770 "Reconnect", ctrl->cnum); 771 772 switch (ctrl->ctrl.state) { 773 case NVME_CTRL_NEW: 774 case NVME_CTRL_LIVE: 775 /* 776 * Schedule a controller reset. The reset will terminate the 777 * association and schedule the reconnect timer. Reconnects 778 * will be attempted until either the ctlr_loss_tmo 779 * (max_retries * connect_delay) expires or the remoteport's 780 * dev_loss_tmo expires. 781 */ 782 if (nvme_reset_ctrl(&ctrl->ctrl)) { 783 dev_warn(ctrl->ctrl.device, 784 "NVME-FC{%d}: Couldn't schedule reset.\n", 785 ctrl->cnum); 786 nvme_delete_ctrl(&ctrl->ctrl); 787 } 788 break; 789 790 case NVME_CTRL_CONNECTING: 791 /* 792 * The association has already been terminated and the 793 * controller is attempting reconnects. No need to do anything 794 * futher. Reconnects will be attempted until either the 795 * ctlr_loss_tmo (max_retries * connect_delay) expires or the 796 * remoteport's dev_loss_tmo expires. 797 */ 798 break; 799 800 case NVME_CTRL_RESETTING: 801 /* 802 * Controller is already in the process of terminating the 803 * association. No need to do anything further. The reconnect 804 * step will kick in naturally after the association is 805 * terminated. 806 */ 807 break; 808 809 case NVME_CTRL_DELETING: 810 default: 811 /* no action to take - let it delete */ 812 break; 813 } 814 } 815 816 /** 817 * nvme_fc_unregister_remoteport - transport entry point called by an 818 * LLDD to deregister/remove a previously 819 * registered a NVME subsystem FC port. 820 * @portptr: pointer to the (registered) remote port that is to be 821 * deregistered. 822 * 823 * Returns: 824 * a completion status. Must be 0 upon success; a negative errno 825 * (ex: -ENXIO) upon failure. 826 */ 827 int 828 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 829 { 830 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 831 struct nvme_fc_ctrl *ctrl; 832 unsigned long flags; 833 834 if (!portptr) 835 return -EINVAL; 836 837 spin_lock_irqsave(&rport->lock, flags); 838 839 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 840 spin_unlock_irqrestore(&rport->lock, flags); 841 return -EINVAL; 842 } 843 portptr->port_state = FC_OBJSTATE_DELETED; 844 845 rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); 846 847 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 848 /* if dev_loss_tmo==0, dev loss is immediate */ 849 if (!portptr->dev_loss_tmo) { 850 dev_warn(ctrl->ctrl.device, 851 "NVME-FC{%d}: controller connectivity lost.\n", 852 ctrl->cnum); 853 nvme_delete_ctrl(&ctrl->ctrl); 854 } else 855 nvme_fc_ctrl_connectivity_loss(ctrl); 856 } 857 858 spin_unlock_irqrestore(&rport->lock, flags); 859 860 nvme_fc_abort_lsops(rport); 861 862 if (atomic_read(&rport->act_ctrl_cnt) == 0) 863 rport->lport->ops->remoteport_delete(portptr); 864 865 /* 866 * release the reference, which will allow, if all controllers 867 * go away, which should only occur after dev_loss_tmo occurs, 868 * for the rport to be torn down. 869 */ 870 nvme_fc_rport_put(rport); 871 872 return 0; 873 } 874 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 875 876 /** 877 * nvme_fc_rescan_remoteport - transport entry point called by an 878 * LLDD to request a nvme device rescan. 879 * @remoteport: pointer to the (registered) remote port that is to be 880 * rescanned. 881 * 882 * Returns: N/A 883 */ 884 void 885 nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) 886 { 887 struct nvme_fc_rport *rport = remoteport_to_rport(remoteport); 888 889 nvme_fc_signal_discovery_scan(rport->lport, rport); 890 } 891 EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); 892 893 int 894 nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, 895 u32 dev_loss_tmo) 896 { 897 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 898 unsigned long flags; 899 900 spin_lock_irqsave(&rport->lock, flags); 901 902 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 903 spin_unlock_irqrestore(&rport->lock, flags); 904 return -EINVAL; 905 } 906 907 /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ 908 rport->remoteport.dev_loss_tmo = dev_loss_tmo; 909 910 spin_unlock_irqrestore(&rport->lock, flags); 911 912 return 0; 913 } 914 EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); 915 916 917 /* *********************** FC-NVME DMA Handling **************************** */ 918 919 /* 920 * The fcloop device passes in a NULL device pointer. Real LLD's will 921 * pass in a valid device pointer. If NULL is passed to the dma mapping 922 * routines, depending on the platform, it may or may not succeed, and 923 * may crash. 924 * 925 * As such: 926 * Wrapper all the dma routines and check the dev pointer. 927 * 928 * If simple mappings (return just a dma address, we'll noop them, 929 * returning a dma address of 0. 930 * 931 * On more complex mappings (dma_map_sg), a pseudo routine fills 932 * in the scatter list, setting all dma addresses to 0. 933 */ 934 935 static inline dma_addr_t 936 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 937 enum dma_data_direction dir) 938 { 939 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 940 } 941 942 static inline int 943 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 944 { 945 return dev ? dma_mapping_error(dev, dma_addr) : 0; 946 } 947 948 static inline void 949 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 950 enum dma_data_direction dir) 951 { 952 if (dev) 953 dma_unmap_single(dev, addr, size, dir); 954 } 955 956 static inline void 957 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 958 enum dma_data_direction dir) 959 { 960 if (dev) 961 dma_sync_single_for_cpu(dev, addr, size, dir); 962 } 963 964 static inline void 965 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 966 enum dma_data_direction dir) 967 { 968 if (dev) 969 dma_sync_single_for_device(dev, addr, size, dir); 970 } 971 972 /* pseudo dma_map_sg call */ 973 static int 974 fc_map_sg(struct scatterlist *sg, int nents) 975 { 976 struct scatterlist *s; 977 int i; 978 979 WARN_ON(nents == 0 || sg[0].length == 0); 980 981 for_each_sg(sg, s, nents, i) { 982 s->dma_address = 0L; 983 #ifdef CONFIG_NEED_SG_DMA_LENGTH 984 s->dma_length = s->length; 985 #endif 986 } 987 return nents; 988 } 989 990 static inline int 991 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 992 enum dma_data_direction dir) 993 { 994 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 995 } 996 997 static inline void 998 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 999 enum dma_data_direction dir) 1000 { 1001 if (dev) 1002 dma_unmap_sg(dev, sg, nents, dir); 1003 } 1004 1005 /* *********************** FC-NVME LS Handling **************************** */ 1006 1007 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 1008 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 1009 1010 1011 static void 1012 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 1013 { 1014 struct nvme_fc_rport *rport = lsop->rport; 1015 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1016 unsigned long flags; 1017 1018 spin_lock_irqsave(&rport->lock, flags); 1019 1020 if (!lsop->req_queued) { 1021 spin_unlock_irqrestore(&rport->lock, flags); 1022 return; 1023 } 1024 1025 list_del(&lsop->lsreq_list); 1026 1027 lsop->req_queued = false; 1028 1029 spin_unlock_irqrestore(&rport->lock, flags); 1030 1031 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 1032 (lsreq->rqstlen + lsreq->rsplen), 1033 DMA_BIDIRECTIONAL); 1034 1035 nvme_fc_rport_put(rport); 1036 } 1037 1038 static int 1039 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 1040 struct nvmefc_ls_req_op *lsop, 1041 void (*done)(struct nvmefc_ls_req *req, int status)) 1042 { 1043 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1044 unsigned long flags; 1045 int ret = 0; 1046 1047 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1048 return -ECONNREFUSED; 1049 1050 if (!nvme_fc_rport_get(rport)) 1051 return -ESHUTDOWN; 1052 1053 lsreq->done = done; 1054 lsop->rport = rport; 1055 lsop->req_queued = false; 1056 INIT_LIST_HEAD(&lsop->lsreq_list); 1057 init_completion(&lsop->ls_done); 1058 1059 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 1060 lsreq->rqstlen + lsreq->rsplen, 1061 DMA_BIDIRECTIONAL); 1062 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 1063 ret = -EFAULT; 1064 goto out_putrport; 1065 } 1066 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 1067 1068 spin_lock_irqsave(&rport->lock, flags); 1069 1070 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 1071 1072 lsop->req_queued = true; 1073 1074 spin_unlock_irqrestore(&rport->lock, flags); 1075 1076 ret = rport->lport->ops->ls_req(&rport->lport->localport, 1077 &rport->remoteport, lsreq); 1078 if (ret) 1079 goto out_unlink; 1080 1081 return 0; 1082 1083 out_unlink: 1084 lsop->ls_error = ret; 1085 spin_lock_irqsave(&rport->lock, flags); 1086 lsop->req_queued = false; 1087 list_del(&lsop->lsreq_list); 1088 spin_unlock_irqrestore(&rport->lock, flags); 1089 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 1090 (lsreq->rqstlen + lsreq->rsplen), 1091 DMA_BIDIRECTIONAL); 1092 out_putrport: 1093 nvme_fc_rport_put(rport); 1094 1095 return ret; 1096 } 1097 1098 static void 1099 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 1100 { 1101 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1102 1103 lsop->ls_error = status; 1104 complete(&lsop->ls_done); 1105 } 1106 1107 static int 1108 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 1109 { 1110 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1111 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 1112 int ret; 1113 1114 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 1115 1116 if (!ret) { 1117 /* 1118 * No timeout/not interruptible as we need the struct 1119 * to exist until the lldd calls us back. Thus mandate 1120 * wait until driver calls back. lldd responsible for 1121 * the timeout action 1122 */ 1123 wait_for_completion(&lsop->ls_done); 1124 1125 __nvme_fc_finish_ls_req(lsop); 1126 1127 ret = lsop->ls_error; 1128 } 1129 1130 if (ret) 1131 return ret; 1132 1133 /* ACC or RJT payload ? */ 1134 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 1135 return -ENXIO; 1136 1137 return 0; 1138 } 1139 1140 static int 1141 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 1142 struct nvmefc_ls_req_op *lsop, 1143 void (*done)(struct nvmefc_ls_req *req, int status)) 1144 { 1145 /* don't wait for completion */ 1146 1147 return __nvme_fc_send_ls_req(rport, lsop, done); 1148 } 1149 1150 /* Validation Error indexes into the string table below */ 1151 enum { 1152 VERR_NO_ERROR = 0, 1153 VERR_LSACC = 1, 1154 VERR_LSDESC_RQST = 2, 1155 VERR_LSDESC_RQST_LEN = 3, 1156 VERR_ASSOC_ID = 4, 1157 VERR_ASSOC_ID_LEN = 5, 1158 VERR_CONN_ID = 6, 1159 VERR_CONN_ID_LEN = 7, 1160 VERR_CR_ASSOC = 8, 1161 VERR_CR_ASSOC_ACC_LEN = 9, 1162 VERR_CR_CONN = 10, 1163 VERR_CR_CONN_ACC_LEN = 11, 1164 VERR_DISCONN = 12, 1165 VERR_DISCONN_ACC_LEN = 13, 1166 }; 1167 1168 static char *validation_errors[] = { 1169 "OK", 1170 "Not LS_ACC", 1171 "Not LSDESC_RQST", 1172 "Bad LSDESC_RQST Length", 1173 "Not Association ID", 1174 "Bad Association ID Length", 1175 "Not Connection ID", 1176 "Bad Connection ID Length", 1177 "Not CR_ASSOC Rqst", 1178 "Bad CR_ASSOC ACC Length", 1179 "Not CR_CONN Rqst", 1180 "Bad CR_CONN ACC Length", 1181 "Not Disconnect Rqst", 1182 "Bad Disconnect ACC Length", 1183 }; 1184 1185 static int 1186 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 1187 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 1188 { 1189 struct nvmefc_ls_req_op *lsop; 1190 struct nvmefc_ls_req *lsreq; 1191 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 1192 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 1193 int ret, fcret = 0; 1194 1195 lsop = kzalloc((sizeof(*lsop) + 1196 ctrl->lport->ops->lsrqst_priv_sz + 1197 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 1198 if (!lsop) { 1199 ret = -ENOMEM; 1200 goto out_no_memory; 1201 } 1202 lsreq = &lsop->ls_req; 1203 1204 lsreq->private = (void *)&lsop[1]; 1205 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 1206 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1207 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 1208 1209 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 1210 assoc_rqst->desc_list_len = 1211 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 1212 1213 assoc_rqst->assoc_cmd.desc_tag = 1214 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 1215 assoc_rqst->assoc_cmd.desc_len = 1216 fcnvme_lsdesc_len( 1217 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 1218 1219 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 1220 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1); 1221 /* Linux supports only Dynamic controllers */ 1222 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 1223 uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); 1224 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 1225 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 1226 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 1227 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 1228 1229 lsop->queue = queue; 1230 lsreq->rqstaddr = assoc_rqst; 1231 lsreq->rqstlen = sizeof(*assoc_rqst); 1232 lsreq->rspaddr = assoc_acc; 1233 lsreq->rsplen = sizeof(*assoc_acc); 1234 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1235 1236 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1237 if (ret) 1238 goto out_free_buffer; 1239 1240 /* process connect LS completion */ 1241 1242 /* validate the ACC response */ 1243 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1244 fcret = VERR_LSACC; 1245 else if (assoc_acc->hdr.desc_list_len != 1246 fcnvme_lsdesc_len( 1247 sizeof(struct fcnvme_ls_cr_assoc_acc))) 1248 fcret = VERR_CR_ASSOC_ACC_LEN; 1249 else if (assoc_acc->hdr.rqst.desc_tag != 1250 cpu_to_be32(FCNVME_LSDESC_RQST)) 1251 fcret = VERR_LSDESC_RQST; 1252 else if (assoc_acc->hdr.rqst.desc_len != 1253 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1254 fcret = VERR_LSDESC_RQST_LEN; 1255 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 1256 fcret = VERR_CR_ASSOC; 1257 else if (assoc_acc->associd.desc_tag != 1258 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 1259 fcret = VERR_ASSOC_ID; 1260 else if (assoc_acc->associd.desc_len != 1261 fcnvme_lsdesc_len( 1262 sizeof(struct fcnvme_lsdesc_assoc_id))) 1263 fcret = VERR_ASSOC_ID_LEN; 1264 else if (assoc_acc->connectid.desc_tag != 1265 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1266 fcret = VERR_CONN_ID; 1267 else if (assoc_acc->connectid.desc_len != 1268 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1269 fcret = VERR_CONN_ID_LEN; 1270 1271 if (fcret) { 1272 ret = -EBADF; 1273 dev_err(ctrl->dev, 1274 "q %d connect failed: %s\n", 1275 queue->qnum, validation_errors[fcret]); 1276 } else { 1277 ctrl->association_id = 1278 be64_to_cpu(assoc_acc->associd.association_id); 1279 queue->connection_id = 1280 be64_to_cpu(assoc_acc->connectid.connection_id); 1281 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1282 } 1283 1284 out_free_buffer: 1285 kfree(lsop); 1286 out_no_memory: 1287 if (ret) 1288 dev_err(ctrl->dev, 1289 "queue %d connect admin queue failed (%d).\n", 1290 queue->qnum, ret); 1291 return ret; 1292 } 1293 1294 static int 1295 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1296 u16 qsize, u16 ersp_ratio) 1297 { 1298 struct nvmefc_ls_req_op *lsop; 1299 struct nvmefc_ls_req *lsreq; 1300 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 1301 struct fcnvme_ls_cr_conn_acc *conn_acc; 1302 int ret, fcret = 0; 1303 1304 lsop = kzalloc((sizeof(*lsop) + 1305 ctrl->lport->ops->lsrqst_priv_sz + 1306 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 1307 if (!lsop) { 1308 ret = -ENOMEM; 1309 goto out_no_memory; 1310 } 1311 lsreq = &lsop->ls_req; 1312 1313 lsreq->private = (void *)&lsop[1]; 1314 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 1315 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1316 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 1317 1318 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 1319 conn_rqst->desc_list_len = cpu_to_be32( 1320 sizeof(struct fcnvme_lsdesc_assoc_id) + 1321 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 1322 1323 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1324 conn_rqst->associd.desc_len = 1325 fcnvme_lsdesc_len( 1326 sizeof(struct fcnvme_lsdesc_assoc_id)); 1327 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1328 conn_rqst->connect_cmd.desc_tag = 1329 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 1330 conn_rqst->connect_cmd.desc_len = 1331 fcnvme_lsdesc_len( 1332 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 1333 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 1334 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 1335 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1); 1336 1337 lsop->queue = queue; 1338 lsreq->rqstaddr = conn_rqst; 1339 lsreq->rqstlen = sizeof(*conn_rqst); 1340 lsreq->rspaddr = conn_acc; 1341 lsreq->rsplen = sizeof(*conn_acc); 1342 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1343 1344 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1345 if (ret) 1346 goto out_free_buffer; 1347 1348 /* process connect LS completion */ 1349 1350 /* validate the ACC response */ 1351 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1352 fcret = VERR_LSACC; 1353 else if (conn_acc->hdr.desc_list_len != 1354 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1355 fcret = VERR_CR_CONN_ACC_LEN; 1356 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1357 fcret = VERR_LSDESC_RQST; 1358 else if (conn_acc->hdr.rqst.desc_len != 1359 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1360 fcret = VERR_LSDESC_RQST_LEN; 1361 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1362 fcret = VERR_CR_CONN; 1363 else if (conn_acc->connectid.desc_tag != 1364 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1365 fcret = VERR_CONN_ID; 1366 else if (conn_acc->connectid.desc_len != 1367 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1368 fcret = VERR_CONN_ID_LEN; 1369 1370 if (fcret) { 1371 ret = -EBADF; 1372 dev_err(ctrl->dev, 1373 "q %d connect failed: %s\n", 1374 queue->qnum, validation_errors[fcret]); 1375 } else { 1376 queue->connection_id = 1377 be64_to_cpu(conn_acc->connectid.connection_id); 1378 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1379 } 1380 1381 out_free_buffer: 1382 kfree(lsop); 1383 out_no_memory: 1384 if (ret) 1385 dev_err(ctrl->dev, 1386 "queue %d connect command failed (%d).\n", 1387 queue->qnum, ret); 1388 return ret; 1389 } 1390 1391 static void 1392 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1393 { 1394 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1395 1396 __nvme_fc_finish_ls_req(lsop); 1397 1398 /* fc-nvme initiator doesn't care about success or failure of cmd */ 1399 1400 kfree(lsop); 1401 } 1402 1403 /* 1404 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1405 * the FC-NVME Association. Terminating the association also 1406 * terminates the FC-NVME connections (per queue, both admin and io 1407 * queues) that are part of the association. E.g. things are torn 1408 * down, and the related FC-NVME Association ID and Connection IDs 1409 * become invalid. 1410 * 1411 * The behavior of the fc-nvme initiator is such that it's 1412 * understanding of the association and connections will implicitly 1413 * be torn down. The action is implicit as it may be due to a loss of 1414 * connectivity with the fc-nvme target, so you may never get a 1415 * response even if you tried. As such, the action of this routine 1416 * is to asynchronously send the LS, ignore any results of the LS, and 1417 * continue on with terminating the association. If the fc-nvme target 1418 * is present and receives the LS, it too can tear down. 1419 */ 1420 static void 1421 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1422 { 1423 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1424 struct fcnvme_ls_disconnect_acc *discon_acc; 1425 struct nvmefc_ls_req_op *lsop; 1426 struct nvmefc_ls_req *lsreq; 1427 int ret; 1428 1429 lsop = kzalloc((sizeof(*lsop) + 1430 ctrl->lport->ops->lsrqst_priv_sz + 1431 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1432 GFP_KERNEL); 1433 if (!lsop) 1434 /* couldn't sent it... too bad */ 1435 return; 1436 1437 lsreq = &lsop->ls_req; 1438 1439 lsreq->private = (void *)&lsop[1]; 1440 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1441 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1442 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1443 1444 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1445 discon_rqst->desc_list_len = cpu_to_be32( 1446 sizeof(struct fcnvme_lsdesc_assoc_id) + 1447 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1448 1449 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1450 discon_rqst->associd.desc_len = 1451 fcnvme_lsdesc_len( 1452 sizeof(struct fcnvme_lsdesc_assoc_id)); 1453 1454 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1455 1456 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1457 FCNVME_LSDESC_DISCONN_CMD); 1458 discon_rqst->discon_cmd.desc_len = 1459 fcnvme_lsdesc_len( 1460 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1461 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1462 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1463 1464 lsreq->rqstaddr = discon_rqst; 1465 lsreq->rqstlen = sizeof(*discon_rqst); 1466 lsreq->rspaddr = discon_acc; 1467 lsreq->rsplen = sizeof(*discon_acc); 1468 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1469 1470 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1471 nvme_fc_disconnect_assoc_done); 1472 if (ret) 1473 kfree(lsop); 1474 1475 /* only meaningful part to terminating the association */ 1476 ctrl->association_id = 0; 1477 } 1478 1479 1480 /* *********************** NVME Ctrl Routines **************************** */ 1481 1482 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1483 1484 static void 1485 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1486 struct nvme_fc_fcp_op *op) 1487 { 1488 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1489 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1490 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1491 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1492 1493 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1494 } 1495 1496 static void 1497 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1498 unsigned int hctx_idx) 1499 { 1500 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1501 1502 return __nvme_fc_exit_request(set->driver_data, op); 1503 } 1504 1505 static int 1506 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1507 { 1508 unsigned long flags; 1509 int opstate; 1510 1511 spin_lock_irqsave(&ctrl->lock, flags); 1512 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1513 if (opstate != FCPOP_STATE_ACTIVE) 1514 atomic_set(&op->state, opstate); 1515 else if (ctrl->flags & FCCTRL_TERMIO) 1516 ctrl->iocnt++; 1517 spin_unlock_irqrestore(&ctrl->lock, flags); 1518 1519 if (opstate != FCPOP_STATE_ACTIVE) 1520 return -ECANCELED; 1521 1522 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1523 &ctrl->rport->remoteport, 1524 op->queue->lldd_handle, 1525 &op->fcp_req); 1526 1527 return 0; 1528 } 1529 1530 static void 1531 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1532 { 1533 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1534 int i; 1535 1536 /* ensure we've initialized the ops once */ 1537 if (!(aen_op->flags & FCOP_FLAGS_AEN)) 1538 return; 1539 1540 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) 1541 __nvme_fc_abort_op(ctrl, aen_op); 1542 } 1543 1544 static inline void 1545 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1546 struct nvme_fc_fcp_op *op, int opstate) 1547 { 1548 unsigned long flags; 1549 1550 if (opstate == FCPOP_STATE_ABORTED) { 1551 spin_lock_irqsave(&ctrl->lock, flags); 1552 if (ctrl->flags & FCCTRL_TERMIO) { 1553 if (!--ctrl->iocnt) 1554 wake_up(&ctrl->ioabort_wait); 1555 } 1556 spin_unlock_irqrestore(&ctrl->lock, flags); 1557 } 1558 } 1559 1560 static void 1561 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1562 { 1563 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1564 struct request *rq = op->rq; 1565 struct nvmefc_fcp_req *freq = &op->fcp_req; 1566 struct nvme_fc_ctrl *ctrl = op->ctrl; 1567 struct nvme_fc_queue *queue = op->queue; 1568 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1569 struct nvme_command *sqe = &op->cmd_iu.sqe; 1570 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1571 union nvme_result result; 1572 bool terminate_assoc = true; 1573 int opstate; 1574 1575 /* 1576 * WARNING: 1577 * The current linux implementation of a nvme controller 1578 * allocates a single tag set for all io queues and sizes 1579 * the io queues to fully hold all possible tags. Thus, the 1580 * implementation does not reference or care about the sqhd 1581 * value as it never needs to use the sqhd/sqtail pointers 1582 * for submission pacing. 1583 * 1584 * This affects the FC-NVME implementation in two ways: 1585 * 1) As the value doesn't matter, we don't need to waste 1586 * cycles extracting it from ERSPs and stamping it in the 1587 * cases where the transport fabricates CQEs on successful 1588 * completions. 1589 * 2) The FC-NVME implementation requires that delivery of 1590 * ERSP completions are to go back to the nvme layer in order 1591 * relative to the rsn, such that the sqhd value will always 1592 * be "in order" for the nvme layer. As the nvme layer in 1593 * linux doesn't care about sqhd, there's no need to return 1594 * them in order. 1595 * 1596 * Additionally: 1597 * As the core nvme layer in linux currently does not look at 1598 * every field in the cqe - in cases where the FC transport must 1599 * fabricate a CQE, the following fields will not be set as they 1600 * are not referenced: 1601 * cqe.sqid, cqe.sqhd, cqe.command_id 1602 * 1603 * Failure or error of an individual i/o, in a transport 1604 * detected fashion unrelated to the nvme completion status, 1605 * potentially cause the initiator and target sides to get out 1606 * of sync on SQ head/tail (aka outstanding io count allowed). 1607 * Per FC-NVME spec, failure of an individual command requires 1608 * the connection to be terminated, which in turn requires the 1609 * association to be terminated. 1610 */ 1611 1612 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 1613 1614 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1615 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1616 1617 if (opstate == FCPOP_STATE_ABORTED) 1618 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1619 else if (freq->status) 1620 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1621 1622 /* 1623 * For the linux implementation, if we have an unsuccesful 1624 * status, they blk-mq layer can typically be called with the 1625 * non-zero status and the content of the cqe isn't important. 1626 */ 1627 if (status) 1628 goto done; 1629 1630 /* 1631 * command completed successfully relative to the wire 1632 * protocol. However, validate anything received and 1633 * extract the status and result from the cqe (create it 1634 * where necessary). 1635 */ 1636 1637 switch (freq->rcv_rsplen) { 1638 1639 case 0: 1640 case NVME_FC_SIZEOF_ZEROS_RSP: 1641 /* 1642 * No response payload or 12 bytes of payload (which 1643 * should all be zeros) are considered successful and 1644 * no payload in the CQE by the transport. 1645 */ 1646 if (freq->transferred_length != 1647 be32_to_cpu(op->cmd_iu.data_len)) { 1648 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1649 goto done; 1650 } 1651 result.u64 = 0; 1652 break; 1653 1654 case sizeof(struct nvme_fc_ersp_iu): 1655 /* 1656 * The ERSP IU contains a full completion with CQE. 1657 * Validate ERSP IU and look at cqe. 1658 */ 1659 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1660 (freq->rcv_rsplen / 4) || 1661 be32_to_cpu(op->rsp_iu.xfrd_len) != 1662 freq->transferred_length || 1663 op->rsp_iu.status_code || 1664 sqe->common.command_id != cqe->command_id)) { 1665 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1666 goto done; 1667 } 1668 result = cqe->result; 1669 status = cqe->status; 1670 break; 1671 1672 default: 1673 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1674 goto done; 1675 } 1676 1677 terminate_assoc = false; 1678 1679 done: 1680 if (op->flags & FCOP_FLAGS_AEN) { 1681 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1682 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 1683 atomic_set(&op->state, FCPOP_STATE_IDLE); 1684 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1685 nvme_fc_ctrl_put(ctrl); 1686 goto check_error; 1687 } 1688 1689 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 1690 nvme_end_request(rq, status, result); 1691 1692 check_error: 1693 if (terminate_assoc) 1694 nvme_fc_error_recovery(ctrl, "transport detected io error"); 1695 } 1696 1697 static int 1698 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1699 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1700 struct request *rq, u32 rqno) 1701 { 1702 struct nvme_fcp_op_w_sgl *op_w_sgl = 1703 container_of(op, typeof(*op_w_sgl), op); 1704 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1705 int ret = 0; 1706 1707 memset(op, 0, sizeof(*op)); 1708 op->fcp_req.cmdaddr = &op->cmd_iu; 1709 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1710 op->fcp_req.rspaddr = &op->rsp_iu; 1711 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1712 op->fcp_req.done = nvme_fc_fcpio_done; 1713 op->ctrl = ctrl; 1714 op->queue = queue; 1715 op->rq = rq; 1716 op->rqno = rqno; 1717 1718 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1719 cmdiu->fc_id = NVME_CMD_FC_ID; 1720 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1721 1722 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1723 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1724 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1725 dev_err(ctrl->dev, 1726 "FCP Op failed - cmdiu dma mapping failed.\n"); 1727 ret = EFAULT; 1728 goto out_on_error; 1729 } 1730 1731 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1732 &op->rsp_iu, sizeof(op->rsp_iu), 1733 DMA_FROM_DEVICE); 1734 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1735 dev_err(ctrl->dev, 1736 "FCP Op failed - rspiu dma mapping failed.\n"); 1737 ret = EFAULT; 1738 } 1739 1740 atomic_set(&op->state, FCPOP_STATE_IDLE); 1741 out_on_error: 1742 return ret; 1743 } 1744 1745 static int 1746 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1747 unsigned int hctx_idx, unsigned int numa_node) 1748 { 1749 struct nvme_fc_ctrl *ctrl = set->driver_data; 1750 struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); 1751 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 1752 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; 1753 int res; 1754 1755 res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); 1756 if (res) 1757 return res; 1758 op->op.fcp_req.first_sgl = &op->sgl[0]; 1759 op->op.fcp_req.private = &op->priv[0]; 1760 nvme_req(rq)->ctrl = &ctrl->ctrl; 1761 return res; 1762 } 1763 1764 static int 1765 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1766 { 1767 struct nvme_fc_fcp_op *aen_op; 1768 struct nvme_fc_cmd_iu *cmdiu; 1769 struct nvme_command *sqe; 1770 void *private; 1771 int i, ret; 1772 1773 aen_op = ctrl->aen_ops; 1774 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 1775 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1776 GFP_KERNEL); 1777 if (!private) 1778 return -ENOMEM; 1779 1780 cmdiu = &aen_op->cmd_iu; 1781 sqe = &cmdiu->sqe; 1782 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1783 aen_op, (struct request *)NULL, 1784 (NVME_AQ_BLK_MQ_DEPTH + i)); 1785 if (ret) { 1786 kfree(private); 1787 return ret; 1788 } 1789 1790 aen_op->flags = FCOP_FLAGS_AEN; 1791 aen_op->fcp_req.private = private; 1792 1793 memset(sqe, 0, sizeof(*sqe)); 1794 sqe->common.opcode = nvme_admin_async_event; 1795 /* Note: core layer may overwrite the sqe.command_id value */ 1796 sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i; 1797 } 1798 return 0; 1799 } 1800 1801 static void 1802 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1803 { 1804 struct nvme_fc_fcp_op *aen_op; 1805 int i; 1806 1807 aen_op = ctrl->aen_ops; 1808 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 1809 if (!aen_op->fcp_req.private) 1810 continue; 1811 1812 __nvme_fc_exit_request(ctrl, aen_op); 1813 1814 kfree(aen_op->fcp_req.private); 1815 aen_op->fcp_req.private = NULL; 1816 } 1817 } 1818 1819 static inline void 1820 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1821 unsigned int qidx) 1822 { 1823 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1824 1825 hctx->driver_data = queue; 1826 queue->hctx = hctx; 1827 } 1828 1829 static int 1830 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1831 unsigned int hctx_idx) 1832 { 1833 struct nvme_fc_ctrl *ctrl = data; 1834 1835 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1836 1837 return 0; 1838 } 1839 1840 static int 1841 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1842 unsigned int hctx_idx) 1843 { 1844 struct nvme_fc_ctrl *ctrl = data; 1845 1846 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1847 1848 return 0; 1849 } 1850 1851 static void 1852 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) 1853 { 1854 struct nvme_fc_queue *queue; 1855 1856 queue = &ctrl->queues[idx]; 1857 memset(queue, 0, sizeof(*queue)); 1858 queue->ctrl = ctrl; 1859 queue->qnum = idx; 1860 atomic_set(&queue->csn, 1); 1861 queue->dev = ctrl->dev; 1862 1863 if (idx > 0) 1864 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1865 else 1866 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1867 1868 /* 1869 * Considered whether we should allocate buffers for all SQEs 1870 * and CQEs and dma map them - mapping their respective entries 1871 * into the request structures (kernel vm addr and dma address) 1872 * thus the driver could use the buffers/mappings directly. 1873 * It only makes sense if the LLDD would use them for its 1874 * messaging api. It's very unlikely most adapter api's would use 1875 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1876 * structures were used instead. 1877 */ 1878 } 1879 1880 /* 1881 * This routine terminates a queue at the transport level. 1882 * The transport has already ensured that all outstanding ios on 1883 * the queue have been terminated. 1884 * The transport will send a Disconnect LS request to terminate 1885 * the queue's connection. Termination of the admin queue will also 1886 * terminate the association at the target. 1887 */ 1888 static void 1889 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1890 { 1891 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1892 return; 1893 1894 clear_bit(NVME_FC_Q_LIVE, &queue->flags); 1895 /* 1896 * Current implementation never disconnects a single queue. 1897 * It always terminates a whole association. So there is never 1898 * a disconnect(queue) LS sent to the target. 1899 */ 1900 1901 queue->connection_id = 0; 1902 atomic_set(&queue->csn, 1); 1903 } 1904 1905 static void 1906 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1907 struct nvme_fc_queue *queue, unsigned int qidx) 1908 { 1909 if (ctrl->lport->ops->delete_queue) 1910 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1911 queue->lldd_handle); 1912 queue->lldd_handle = NULL; 1913 } 1914 1915 static void 1916 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1917 { 1918 int i; 1919 1920 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1921 nvme_fc_free_queue(&ctrl->queues[i]); 1922 } 1923 1924 static int 1925 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1926 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1927 { 1928 int ret = 0; 1929 1930 queue->lldd_handle = NULL; 1931 if (ctrl->lport->ops->create_queue) 1932 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1933 qidx, qsize, &queue->lldd_handle); 1934 1935 return ret; 1936 } 1937 1938 static void 1939 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1940 { 1941 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; 1942 int i; 1943 1944 for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) 1945 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1946 } 1947 1948 static int 1949 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1950 { 1951 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1952 int i, ret; 1953 1954 for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { 1955 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1956 if (ret) 1957 goto delete_queues; 1958 } 1959 1960 return 0; 1961 1962 delete_queues: 1963 for (; i >= 0; i--) 1964 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1965 return ret; 1966 } 1967 1968 static int 1969 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1970 { 1971 int i, ret = 0; 1972 1973 for (i = 1; i < ctrl->ctrl.queue_count; i++) { 1974 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1975 (qsize / 5)); 1976 if (ret) 1977 break; 1978 ret = nvmf_connect_io_queue(&ctrl->ctrl, i, false); 1979 if (ret) 1980 break; 1981 1982 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); 1983 } 1984 1985 return ret; 1986 } 1987 1988 static void 1989 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1990 { 1991 int i; 1992 1993 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1994 nvme_fc_init_queue(ctrl, i); 1995 } 1996 1997 static void 1998 nvme_fc_ctrl_free(struct kref *ref) 1999 { 2000 struct nvme_fc_ctrl *ctrl = 2001 container_of(ref, struct nvme_fc_ctrl, ref); 2002 unsigned long flags; 2003 2004 if (ctrl->ctrl.tagset) { 2005 blk_cleanup_queue(ctrl->ctrl.connect_q); 2006 blk_mq_free_tag_set(&ctrl->tag_set); 2007 } 2008 2009 /* remove from rport list */ 2010 spin_lock_irqsave(&ctrl->rport->lock, flags); 2011 list_del(&ctrl->ctrl_list); 2012 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 2013 2014 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2015 blk_cleanup_queue(ctrl->ctrl.admin_q); 2016 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2017 2018 kfree(ctrl->queues); 2019 2020 put_device(ctrl->dev); 2021 nvme_fc_rport_put(ctrl->rport); 2022 2023 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2024 if (ctrl->ctrl.opts) 2025 nvmf_free_options(ctrl->ctrl.opts); 2026 kfree(ctrl); 2027 } 2028 2029 static void 2030 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 2031 { 2032 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 2033 } 2034 2035 static int 2036 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 2037 { 2038 return kref_get_unless_zero(&ctrl->ref); 2039 } 2040 2041 /* 2042 * All accesses from nvme core layer done - can now free the 2043 * controller. Called after last nvme_put_ctrl() call 2044 */ 2045 static void 2046 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 2047 { 2048 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2049 2050 WARN_ON(nctrl != &ctrl->ctrl); 2051 2052 nvme_fc_ctrl_put(ctrl); 2053 } 2054 2055 static void 2056 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 2057 { 2058 int active; 2059 2060 /* 2061 * if an error (io timeout, etc) while (re)connecting, 2062 * it's an error on creating the new association. 2063 * Start the error recovery thread if it hasn't already 2064 * been started. It is expected there could be multiple 2065 * ios hitting this path before things are cleaned up. 2066 */ 2067 if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { 2068 active = atomic_xchg(&ctrl->err_work_active, 1); 2069 if (!active && !schedule_work(&ctrl->err_work)) { 2070 atomic_set(&ctrl->err_work_active, 0); 2071 WARN_ON(1); 2072 } 2073 return; 2074 } 2075 2076 /* Otherwise, only proceed if in LIVE state - e.g. on first error */ 2077 if (ctrl->ctrl.state != NVME_CTRL_LIVE) 2078 return; 2079 2080 dev_warn(ctrl->ctrl.device, 2081 "NVME-FC{%d}: transport association error detected: %s\n", 2082 ctrl->cnum, errmsg); 2083 dev_warn(ctrl->ctrl.device, 2084 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 2085 2086 nvme_reset_ctrl(&ctrl->ctrl); 2087 } 2088 2089 static enum blk_eh_timer_return 2090 nvme_fc_timeout(struct request *rq, bool reserved) 2091 { 2092 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2093 struct nvme_fc_ctrl *ctrl = op->ctrl; 2094 2095 /* 2096 * we can't individually ABTS an io without affecting the queue, 2097 * thus killing the queue, and thus the association. 2098 * So resolve by performing a controller reset, which will stop 2099 * the host/io stack, terminate the association on the link, 2100 * and recreate an association on the link. 2101 */ 2102 nvme_fc_error_recovery(ctrl, "io timeout error"); 2103 2104 /* 2105 * the io abort has been initiated. Have the reset timer 2106 * restarted and the abort completion will complete the io 2107 * shortly. Avoids a synchronous wait while the abort finishes. 2108 */ 2109 return BLK_EH_RESET_TIMER; 2110 } 2111 2112 static int 2113 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 2114 struct nvme_fc_fcp_op *op) 2115 { 2116 struct nvmefc_fcp_req *freq = &op->fcp_req; 2117 enum dma_data_direction dir; 2118 int ret; 2119 2120 freq->sg_cnt = 0; 2121 2122 if (!blk_rq_payload_bytes(rq)) 2123 return 0; 2124 2125 freq->sg_table.sgl = freq->first_sgl; 2126 ret = sg_alloc_table_chained(&freq->sg_table, 2127 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 2128 if (ret) 2129 return -ENOMEM; 2130 2131 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 2132 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 2133 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 2134 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 2135 op->nents, dir); 2136 if (unlikely(freq->sg_cnt <= 0)) { 2137 sg_free_table_chained(&freq->sg_table, true); 2138 freq->sg_cnt = 0; 2139 return -EFAULT; 2140 } 2141 2142 /* 2143 * TODO: blk_integrity_rq(rq) for DIF 2144 */ 2145 return 0; 2146 } 2147 2148 static void 2149 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 2150 struct nvme_fc_fcp_op *op) 2151 { 2152 struct nvmefc_fcp_req *freq = &op->fcp_req; 2153 2154 if (!freq->sg_cnt) 2155 return; 2156 2157 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 2158 ((rq_data_dir(rq) == WRITE) ? 2159 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 2160 2161 nvme_cleanup_cmd(rq); 2162 2163 sg_free_table_chained(&freq->sg_table, true); 2164 2165 freq->sg_cnt = 0; 2166 } 2167 2168 /* 2169 * In FC, the queue is a logical thing. At transport connect, the target 2170 * creates its "queue" and returns a handle that is to be given to the 2171 * target whenever it posts something to the corresponding SQ. When an 2172 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 2173 * command contained within the SQE, an io, and assigns a FC exchange 2174 * to it. The SQE and the associated SQ handle are sent in the initial 2175 * CMD IU sents on the exchange. All transfers relative to the io occur 2176 * as part of the exchange. The CQE is the last thing for the io, 2177 * which is transferred (explicitly or implicitly) with the RSP IU 2178 * sent on the exchange. After the CQE is received, the FC exchange is 2179 * terminaed and the Exchange may be used on a different io. 2180 * 2181 * The transport to LLDD api has the transport making a request for a 2182 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 2183 * resource and transfers the command. The LLDD will then process all 2184 * steps to complete the io. Upon completion, the transport done routine 2185 * is called. 2186 * 2187 * So - while the operation is outstanding to the LLDD, there is a link 2188 * level FC exchange resource that is also outstanding. This must be 2189 * considered in all cleanup operations. 2190 */ 2191 static blk_status_t 2192 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 2193 struct nvme_fc_fcp_op *op, u32 data_len, 2194 enum nvmefc_fcp_datadir io_dir) 2195 { 2196 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2197 struct nvme_command *sqe = &cmdiu->sqe; 2198 u32 csn; 2199 int ret, opstate; 2200 2201 /* 2202 * before attempting to send the io, check to see if we believe 2203 * the target device is present 2204 */ 2205 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2206 return BLK_STS_RESOURCE; 2207 2208 if (!nvme_fc_ctrl_get(ctrl)) 2209 return BLK_STS_IOERR; 2210 2211 /* format the FC-NVME CMD IU and fcp_req */ 2212 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 2213 csn = atomic_inc_return(&queue->csn); 2214 cmdiu->csn = cpu_to_be32(csn); 2215 cmdiu->data_len = cpu_to_be32(data_len); 2216 switch (io_dir) { 2217 case NVMEFC_FCP_WRITE: 2218 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 2219 break; 2220 case NVMEFC_FCP_READ: 2221 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 2222 break; 2223 case NVMEFC_FCP_NODATA: 2224 cmdiu->flags = 0; 2225 break; 2226 } 2227 op->fcp_req.payload_length = data_len; 2228 op->fcp_req.io_dir = io_dir; 2229 op->fcp_req.transferred_length = 0; 2230 op->fcp_req.rcv_rsplen = 0; 2231 op->fcp_req.status = NVME_SC_SUCCESS; 2232 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 2233 2234 /* 2235 * validate per fabric rules, set fields mandated by fabric spec 2236 * as well as those by FC-NVME spec. 2237 */ 2238 WARN_ON_ONCE(sqe->common.metadata); 2239 sqe->common.flags |= NVME_CMD_SGL_METABUF; 2240 2241 /* 2242 * format SQE DPTR field per FC-NVME rules: 2243 * type=0x5 Transport SGL Data Block Descriptor 2244 * subtype=0xA Transport-specific value 2245 * address=0 2246 * length=length of the data series 2247 */ 2248 sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | 2249 NVME_SGL_FMT_TRANSPORT_A; 2250 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 2251 sqe->rw.dptr.sgl.addr = 0; 2252 2253 if (!(op->flags & FCOP_FLAGS_AEN)) { 2254 ret = nvme_fc_map_data(ctrl, op->rq, op); 2255 if (ret < 0) { 2256 nvme_cleanup_cmd(op->rq); 2257 nvme_fc_ctrl_put(ctrl); 2258 if (ret == -ENOMEM || ret == -EAGAIN) 2259 return BLK_STS_RESOURCE; 2260 return BLK_STS_IOERR; 2261 } 2262 } 2263 2264 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 2265 sizeof(op->cmd_iu), DMA_TO_DEVICE); 2266 2267 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 2268 2269 if (!(op->flags & FCOP_FLAGS_AEN)) 2270 blk_mq_start_request(op->rq); 2271 2272 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 2273 &ctrl->rport->remoteport, 2274 queue->lldd_handle, &op->fcp_req); 2275 2276 if (ret) { 2277 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 2278 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 2279 2280 if (!(op->flags & FCOP_FLAGS_AEN)) 2281 nvme_fc_unmap_data(ctrl, op->rq, op); 2282 2283 nvme_fc_ctrl_put(ctrl); 2284 2285 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && 2286 ret != -EBUSY) 2287 return BLK_STS_IOERR; 2288 2289 return BLK_STS_RESOURCE; 2290 } 2291 2292 return BLK_STS_OK; 2293 } 2294 2295 static blk_status_t 2296 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 2297 const struct blk_mq_queue_data *bd) 2298 { 2299 struct nvme_ns *ns = hctx->queue->queuedata; 2300 struct nvme_fc_queue *queue = hctx->driver_data; 2301 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2302 struct request *rq = bd->rq; 2303 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2304 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2305 struct nvme_command *sqe = &cmdiu->sqe; 2306 enum nvmefc_fcp_datadir io_dir; 2307 bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); 2308 u32 data_len; 2309 blk_status_t ret; 2310 2311 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 2312 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2313 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); 2314 2315 ret = nvme_setup_cmd(ns, rq, sqe); 2316 if (ret) 2317 return ret; 2318 2319 data_len = blk_rq_payload_bytes(rq); 2320 if (data_len) 2321 io_dir = ((rq_data_dir(rq) == WRITE) ? 2322 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2323 else 2324 io_dir = NVMEFC_FCP_NODATA; 2325 2326 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2327 } 2328 2329 static void 2330 nvme_fc_submit_async_event(struct nvme_ctrl *arg) 2331 { 2332 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2333 struct nvme_fc_fcp_op *aen_op; 2334 unsigned long flags; 2335 bool terminating = false; 2336 blk_status_t ret; 2337 2338 spin_lock_irqsave(&ctrl->lock, flags); 2339 if (ctrl->flags & FCCTRL_TERMIO) 2340 terminating = true; 2341 spin_unlock_irqrestore(&ctrl->lock, flags); 2342 2343 if (terminating) 2344 return; 2345 2346 aen_op = &ctrl->aen_ops[0]; 2347 2348 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2349 NVMEFC_FCP_NODATA); 2350 if (ret) 2351 dev_err(ctrl->ctrl.device, 2352 "failed async event work\n"); 2353 } 2354 2355 static void 2356 nvme_fc_complete_rq(struct request *rq) 2357 { 2358 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2359 struct nvme_fc_ctrl *ctrl = op->ctrl; 2360 2361 atomic_set(&op->state, FCPOP_STATE_IDLE); 2362 2363 nvme_fc_unmap_data(ctrl, rq, op); 2364 nvme_complete_rq(rq); 2365 nvme_fc_ctrl_put(ctrl); 2366 } 2367 2368 /* 2369 * This routine is used by the transport when it needs to find active 2370 * io on a queue that is to be terminated. The transport uses 2371 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2372 * this routine to kill them on a 1 by 1 basis. 2373 * 2374 * As FC allocates FC exchange for each io, the transport must contact 2375 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2376 * After terminating the exchange the LLDD will call the transport's 2377 * normal io done path for the request, but it will have an aborted 2378 * status. The done path will return the io request back to the block 2379 * layer with an error status. 2380 */ 2381 static bool 2382 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2383 { 2384 struct nvme_ctrl *nctrl = data; 2385 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2386 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2387 2388 __nvme_fc_abort_op(ctrl, op); 2389 return true; 2390 } 2391 2392 2393 static const struct blk_mq_ops nvme_fc_mq_ops = { 2394 .queue_rq = nvme_fc_queue_rq, 2395 .complete = nvme_fc_complete_rq, 2396 .init_request = nvme_fc_init_request, 2397 .exit_request = nvme_fc_exit_request, 2398 .init_hctx = nvme_fc_init_hctx, 2399 .timeout = nvme_fc_timeout, 2400 }; 2401 2402 static int 2403 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2404 { 2405 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2406 unsigned int nr_io_queues; 2407 int ret; 2408 2409 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2410 ctrl->lport->ops->max_hw_queues); 2411 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2412 if (ret) { 2413 dev_info(ctrl->ctrl.device, 2414 "set_queue_count failed: %d\n", ret); 2415 return ret; 2416 } 2417 2418 ctrl->ctrl.queue_count = nr_io_queues + 1; 2419 if (!nr_io_queues) 2420 return 0; 2421 2422 nvme_fc_init_io_queues(ctrl); 2423 2424 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2425 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2426 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2427 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2428 ctrl->tag_set.numa_node = ctrl->ctrl.numa_node; 2429 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2430 ctrl->tag_set.cmd_size = 2431 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, 2432 ctrl->lport->ops->fcprqst_priv_sz); 2433 ctrl->tag_set.driver_data = ctrl; 2434 ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; 2435 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2436 2437 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2438 if (ret) 2439 return ret; 2440 2441 ctrl->ctrl.tagset = &ctrl->tag_set; 2442 2443 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2444 if (IS_ERR(ctrl->ctrl.connect_q)) { 2445 ret = PTR_ERR(ctrl->ctrl.connect_q); 2446 goto out_free_tag_set; 2447 } 2448 2449 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2450 if (ret) 2451 goto out_cleanup_blk_queue; 2452 2453 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2454 if (ret) 2455 goto out_delete_hw_queues; 2456 2457 ctrl->ioq_live = true; 2458 2459 return 0; 2460 2461 out_delete_hw_queues: 2462 nvme_fc_delete_hw_io_queues(ctrl); 2463 out_cleanup_blk_queue: 2464 blk_cleanup_queue(ctrl->ctrl.connect_q); 2465 out_free_tag_set: 2466 blk_mq_free_tag_set(&ctrl->tag_set); 2467 nvme_fc_free_io_queues(ctrl); 2468 2469 /* force put free routine to ignore io queues */ 2470 ctrl->ctrl.tagset = NULL; 2471 2472 return ret; 2473 } 2474 2475 static int 2476 nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) 2477 { 2478 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2479 unsigned int nr_io_queues; 2480 int ret; 2481 2482 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2483 ctrl->lport->ops->max_hw_queues); 2484 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2485 if (ret) { 2486 dev_info(ctrl->ctrl.device, 2487 "set_queue_count failed: %d\n", ret); 2488 return ret; 2489 } 2490 2491 ctrl->ctrl.queue_count = nr_io_queues + 1; 2492 /* check for io queues existing */ 2493 if (ctrl->ctrl.queue_count == 1) 2494 return 0; 2495 2496 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2497 if (ret) 2498 goto out_free_io_queues; 2499 2500 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2501 if (ret) 2502 goto out_delete_hw_queues; 2503 2504 blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2505 2506 return 0; 2507 2508 out_delete_hw_queues: 2509 nvme_fc_delete_hw_io_queues(ctrl); 2510 out_free_io_queues: 2511 nvme_fc_free_io_queues(ctrl); 2512 return ret; 2513 } 2514 2515 static void 2516 nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport) 2517 { 2518 struct nvme_fc_lport *lport = rport->lport; 2519 2520 atomic_inc(&lport->act_rport_cnt); 2521 } 2522 2523 static void 2524 nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport) 2525 { 2526 struct nvme_fc_lport *lport = rport->lport; 2527 u32 cnt; 2528 2529 cnt = atomic_dec_return(&lport->act_rport_cnt); 2530 if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED) 2531 lport->ops->localport_delete(&lport->localport); 2532 } 2533 2534 static int 2535 nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) 2536 { 2537 struct nvme_fc_rport *rport = ctrl->rport; 2538 u32 cnt; 2539 2540 if (ctrl->assoc_active) 2541 return 1; 2542 2543 ctrl->assoc_active = true; 2544 cnt = atomic_inc_return(&rport->act_ctrl_cnt); 2545 if (cnt == 1) 2546 nvme_fc_rport_active_on_lport(rport); 2547 2548 return 0; 2549 } 2550 2551 static int 2552 nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) 2553 { 2554 struct nvme_fc_rport *rport = ctrl->rport; 2555 struct nvme_fc_lport *lport = rport->lport; 2556 u32 cnt; 2557 2558 /* ctrl->assoc_active=false will be set independently */ 2559 2560 cnt = atomic_dec_return(&rport->act_ctrl_cnt); 2561 if (cnt == 0) { 2562 if (rport->remoteport.port_state == FC_OBJSTATE_DELETED) 2563 lport->ops->remoteport_delete(&rport->remoteport); 2564 nvme_fc_rport_inactive_on_lport(rport); 2565 } 2566 2567 return 0; 2568 } 2569 2570 /* 2571 * This routine restarts the controller on the host side, and 2572 * on the link side, recreates the controller association. 2573 */ 2574 static int 2575 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2576 { 2577 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2578 int ret; 2579 bool changed; 2580 2581 ++ctrl->ctrl.nr_reconnects; 2582 2583 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2584 return -ENODEV; 2585 2586 if (nvme_fc_ctlr_active_on_rport(ctrl)) 2587 return -ENOTUNIQ; 2588 2589 /* 2590 * Create the admin queue 2591 */ 2592 2593 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2594 NVME_AQ_DEPTH); 2595 if (ret) 2596 goto out_free_queue; 2597 2598 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2599 NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4)); 2600 if (ret) 2601 goto out_delete_hw_queue; 2602 2603 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2604 2605 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2606 if (ret) 2607 goto out_disconnect_admin_queue; 2608 2609 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); 2610 2611 /* 2612 * Check controller capabilities 2613 * 2614 * todo:- add code to check if ctrl attributes changed from 2615 * prior connection values 2616 */ 2617 2618 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); 2619 if (ret) { 2620 dev_err(ctrl->ctrl.device, 2621 "prop_get NVME_REG_CAP failed\n"); 2622 goto out_disconnect_admin_queue; 2623 } 2624 2625 ctrl->ctrl.sqsize = 2626 min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); 2627 2628 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 2629 if (ret) 2630 goto out_disconnect_admin_queue; 2631 2632 ctrl->ctrl.max_hw_sectors = 2633 (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9); 2634 2635 ret = nvme_init_identify(&ctrl->ctrl); 2636 if (ret) 2637 goto out_disconnect_admin_queue; 2638 2639 /* sanity checks */ 2640 2641 /* FC-NVME does not have other data in the capsule */ 2642 if (ctrl->ctrl.icdoff) { 2643 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2644 ctrl->ctrl.icdoff); 2645 goto out_disconnect_admin_queue; 2646 } 2647 2648 /* FC-NVME supports normal SGL Data Block Descriptors */ 2649 2650 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2651 /* warn if maxcmd is lower than queue_size */ 2652 dev_warn(ctrl->ctrl.device, 2653 "queue_size %zu > ctrl maxcmd %u, reducing " 2654 "to queue_size\n", 2655 opts->queue_size, ctrl->ctrl.maxcmd); 2656 opts->queue_size = ctrl->ctrl.maxcmd; 2657 } 2658 2659 if (opts->queue_size > ctrl->ctrl.sqsize + 1) { 2660 /* warn if sqsize is lower than queue_size */ 2661 dev_warn(ctrl->ctrl.device, 2662 "queue_size %zu > ctrl sqsize %u, clamping down\n", 2663 opts->queue_size, ctrl->ctrl.sqsize + 1); 2664 opts->queue_size = ctrl->ctrl.sqsize + 1; 2665 } 2666 2667 ret = nvme_fc_init_aen_ops(ctrl); 2668 if (ret) 2669 goto out_term_aen_ops; 2670 2671 /* 2672 * Create the io queues 2673 */ 2674 2675 if (ctrl->ctrl.queue_count > 1) { 2676 if (!ctrl->ioq_live) 2677 ret = nvme_fc_create_io_queues(ctrl); 2678 else 2679 ret = nvme_fc_recreate_io_queues(ctrl); 2680 if (ret) 2681 goto out_term_aen_ops; 2682 } 2683 2684 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2685 2686 ctrl->ctrl.nr_reconnects = 0; 2687 2688 if (changed) 2689 nvme_start_ctrl(&ctrl->ctrl); 2690 2691 return 0; /* Success */ 2692 2693 out_term_aen_ops: 2694 nvme_fc_term_aen_ops(ctrl); 2695 out_disconnect_admin_queue: 2696 /* send a Disconnect(association) LS to fc-nvme target */ 2697 nvme_fc_xmt_disconnect_assoc(ctrl); 2698 out_delete_hw_queue: 2699 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2700 out_free_queue: 2701 nvme_fc_free_queue(&ctrl->queues[0]); 2702 ctrl->assoc_active = false; 2703 nvme_fc_ctlr_inactive_on_rport(ctrl); 2704 2705 return ret; 2706 } 2707 2708 /* 2709 * This routine stops operation of the controller on the host side. 2710 * On the host os stack side: Admin and IO queues are stopped, 2711 * outstanding ios on them terminated via FC ABTS. 2712 * On the link side: the association is terminated. 2713 */ 2714 static void 2715 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2716 { 2717 unsigned long flags; 2718 2719 if (!ctrl->assoc_active) 2720 return; 2721 ctrl->assoc_active = false; 2722 2723 spin_lock_irqsave(&ctrl->lock, flags); 2724 ctrl->flags |= FCCTRL_TERMIO; 2725 ctrl->iocnt = 0; 2726 spin_unlock_irqrestore(&ctrl->lock, flags); 2727 2728 /* 2729 * If io queues are present, stop them and terminate all outstanding 2730 * ios on them. As FC allocates FC exchange for each io, the 2731 * transport must contact the LLDD to terminate the exchange, 2732 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2733 * to tell us what io's are busy and invoke a transport routine 2734 * to kill them with the LLDD. After terminating the exchange 2735 * the LLDD will call the transport's normal io done path, but it 2736 * will have an aborted status. The done path will return the 2737 * io requests back to the block layer as part of normal completions 2738 * (but with error status). 2739 */ 2740 if (ctrl->ctrl.queue_count > 1) { 2741 nvme_stop_queues(&ctrl->ctrl); 2742 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2743 nvme_fc_terminate_exchange, &ctrl->ctrl); 2744 } 2745 2746 /* 2747 * Other transports, which don't have link-level contexts bound 2748 * to sqe's, would try to gracefully shutdown the controller by 2749 * writing the registers for shutdown and polling (call 2750 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2751 * just aborted and we will wait on those contexts, and given 2752 * there was no indication of how live the controlelr is on the 2753 * link, don't send more io to create more contexts for the 2754 * shutdown. Let the controller fail via keepalive failure if 2755 * its still present. 2756 */ 2757 2758 /* 2759 * clean up the admin queue. Same thing as above. 2760 * use blk_mq_tagset_busy_itr() and the transport routine to 2761 * terminate the exchanges. 2762 */ 2763 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2764 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2765 nvme_fc_terminate_exchange, &ctrl->ctrl); 2766 2767 /* kill the aens as they are a separate path */ 2768 nvme_fc_abort_aen_ops(ctrl); 2769 2770 /* wait for all io that had to be aborted */ 2771 spin_lock_irq(&ctrl->lock); 2772 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); 2773 ctrl->flags &= ~FCCTRL_TERMIO; 2774 spin_unlock_irq(&ctrl->lock); 2775 2776 nvme_fc_term_aen_ops(ctrl); 2777 2778 /* 2779 * send a Disconnect(association) LS to fc-nvme target 2780 * Note: could have been sent at top of process, but 2781 * cleaner on link traffic if after the aborts complete. 2782 * Note: if association doesn't exist, association_id will be 0 2783 */ 2784 if (ctrl->association_id) 2785 nvme_fc_xmt_disconnect_assoc(ctrl); 2786 2787 if (ctrl->ctrl.tagset) { 2788 nvme_fc_delete_hw_io_queues(ctrl); 2789 nvme_fc_free_io_queues(ctrl); 2790 } 2791 2792 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2793 nvme_fc_free_queue(&ctrl->queues[0]); 2794 2795 /* re-enable the admin_q so anything new can fast fail */ 2796 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2797 2798 /* resume the io queues so that things will fast fail */ 2799 nvme_start_queues(&ctrl->ctrl); 2800 2801 nvme_fc_ctlr_inactive_on_rport(ctrl); 2802 } 2803 2804 static void 2805 nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) 2806 { 2807 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2808 2809 cancel_work_sync(&ctrl->err_work); 2810 cancel_delayed_work_sync(&ctrl->connect_work); 2811 /* 2812 * kill the association on the link side. this will block 2813 * waiting for io to terminate 2814 */ 2815 nvme_fc_delete_association(ctrl); 2816 } 2817 2818 static void 2819 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2820 { 2821 struct nvme_fc_rport *rport = ctrl->rport; 2822 struct nvme_fc_remote_port *portptr = &rport->remoteport; 2823 unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; 2824 bool recon = true; 2825 2826 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) 2827 return; 2828 2829 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2830 dev_info(ctrl->ctrl.device, 2831 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2832 ctrl->cnum, status); 2833 else if (time_after_eq(jiffies, rport->dev_loss_end)) 2834 recon = false; 2835 2836 if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { 2837 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2838 dev_info(ctrl->ctrl.device, 2839 "NVME-FC{%d}: Reconnect attempt in %ld " 2840 "seconds\n", 2841 ctrl->cnum, recon_delay / HZ); 2842 else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) 2843 recon_delay = rport->dev_loss_end - jiffies; 2844 2845 queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); 2846 } else { 2847 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2848 dev_warn(ctrl->ctrl.device, 2849 "NVME-FC{%d}: Max reconnect attempts (%d) " 2850 "reached.\n", 2851 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2852 else 2853 dev_warn(ctrl->ctrl.device, 2854 "NVME-FC{%d}: dev_loss_tmo (%d) expired " 2855 "while waiting for remoteport connectivity.\n", 2856 ctrl->cnum, portptr->dev_loss_tmo); 2857 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); 2858 } 2859 } 2860 2861 static void 2862 __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) 2863 { 2864 nvme_stop_keep_alive(&ctrl->ctrl); 2865 2866 /* will block will waiting for io to terminate */ 2867 nvme_fc_delete_association(ctrl); 2868 2869 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && 2870 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) 2871 dev_err(ctrl->ctrl.device, 2872 "NVME-FC{%d}: error_recovery: Couldn't change state " 2873 "to CONNECTING\n", ctrl->cnum); 2874 } 2875 2876 static void 2877 nvme_fc_reset_ctrl_work(struct work_struct *work) 2878 { 2879 struct nvme_fc_ctrl *ctrl = 2880 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); 2881 int ret; 2882 2883 __nvme_fc_terminate_io(ctrl); 2884 2885 nvme_stop_ctrl(&ctrl->ctrl); 2886 2887 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) 2888 ret = nvme_fc_create_association(ctrl); 2889 else 2890 ret = -ENOTCONN; 2891 2892 if (ret) 2893 nvme_fc_reconnect_or_delete(ctrl, ret); 2894 else 2895 dev_info(ctrl->ctrl.device, 2896 "NVME-FC{%d}: controller reset complete\n", 2897 ctrl->cnum); 2898 } 2899 2900 static void 2901 nvme_fc_connect_err_work(struct work_struct *work) 2902 { 2903 struct nvme_fc_ctrl *ctrl = 2904 container_of(work, struct nvme_fc_ctrl, err_work); 2905 2906 __nvme_fc_terminate_io(ctrl); 2907 2908 atomic_set(&ctrl->err_work_active, 0); 2909 2910 /* 2911 * Rescheduling the connection after recovering 2912 * from the io error is left to the reconnect work 2913 * item, which is what should have stalled waiting on 2914 * the io that had the error that scheduled this work. 2915 */ 2916 } 2917 2918 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2919 .name = "fc", 2920 .module = THIS_MODULE, 2921 .flags = NVME_F_FABRICS, 2922 .reg_read32 = nvmf_reg_read32, 2923 .reg_read64 = nvmf_reg_read64, 2924 .reg_write32 = nvmf_reg_write32, 2925 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2926 .submit_async_event = nvme_fc_submit_async_event, 2927 .delete_ctrl = nvme_fc_delete_ctrl, 2928 .get_address = nvmf_get_address, 2929 }; 2930 2931 static void 2932 nvme_fc_connect_ctrl_work(struct work_struct *work) 2933 { 2934 int ret; 2935 2936 struct nvme_fc_ctrl *ctrl = 2937 container_of(to_delayed_work(work), 2938 struct nvme_fc_ctrl, connect_work); 2939 2940 ret = nvme_fc_create_association(ctrl); 2941 if (ret) 2942 nvme_fc_reconnect_or_delete(ctrl, ret); 2943 else 2944 dev_info(ctrl->ctrl.device, 2945 "NVME-FC{%d}: controller connect complete\n", 2946 ctrl->cnum); 2947 } 2948 2949 2950 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2951 .queue_rq = nvme_fc_queue_rq, 2952 .complete = nvme_fc_complete_rq, 2953 .init_request = nvme_fc_init_request, 2954 .exit_request = nvme_fc_exit_request, 2955 .init_hctx = nvme_fc_init_admin_hctx, 2956 .timeout = nvme_fc_timeout, 2957 }; 2958 2959 2960 /* 2961 * Fails a controller request if it matches an existing controller 2962 * (association) with the same tuple: 2963 * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN> 2964 * 2965 * The ports don't need to be compared as they are intrinsically 2966 * already matched by the port pointers supplied. 2967 */ 2968 static bool 2969 nvme_fc_existing_controller(struct nvme_fc_rport *rport, 2970 struct nvmf_ctrl_options *opts) 2971 { 2972 struct nvme_fc_ctrl *ctrl; 2973 unsigned long flags; 2974 bool found = false; 2975 2976 spin_lock_irqsave(&rport->lock, flags); 2977 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 2978 found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts); 2979 if (found) 2980 break; 2981 } 2982 spin_unlock_irqrestore(&rport->lock, flags); 2983 2984 return found; 2985 } 2986 2987 static struct nvme_ctrl * 2988 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 2989 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 2990 { 2991 struct nvme_fc_ctrl *ctrl; 2992 unsigned long flags; 2993 int ret, idx; 2994 2995 if (!(rport->remoteport.port_role & 2996 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2997 ret = -EBADR; 2998 goto out_fail; 2999 } 3000 3001 if (!opts->duplicate_connect && 3002 nvme_fc_existing_controller(rport, opts)) { 3003 ret = -EALREADY; 3004 goto out_fail; 3005 } 3006 3007 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 3008 if (!ctrl) { 3009 ret = -ENOMEM; 3010 goto out_fail; 3011 } 3012 3013 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 3014 if (idx < 0) { 3015 ret = -ENOSPC; 3016 goto out_free_ctrl; 3017 } 3018 3019 ctrl->ctrl.opts = opts; 3020 ctrl->ctrl.nr_reconnects = 0; 3021 ctrl->ctrl.numa_node = dev_to_node(lport->dev); 3022 INIT_LIST_HEAD(&ctrl->ctrl_list); 3023 ctrl->lport = lport; 3024 ctrl->rport = rport; 3025 ctrl->dev = lport->dev; 3026 ctrl->cnum = idx; 3027 ctrl->ioq_live = false; 3028 ctrl->assoc_active = false; 3029 atomic_set(&ctrl->err_work_active, 0); 3030 init_waitqueue_head(&ctrl->ioabort_wait); 3031 3032 get_device(ctrl->dev); 3033 kref_init(&ctrl->ref); 3034 3035 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 3036 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 3037 INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); 3038 spin_lock_init(&ctrl->lock); 3039 3040 /* io queue count */ 3041 ctrl->ctrl.queue_count = min_t(unsigned int, 3042 opts->nr_io_queues, 3043 lport->ops->max_hw_queues); 3044 ctrl->ctrl.queue_count++; /* +1 for admin queue */ 3045 3046 ctrl->ctrl.sqsize = opts->queue_size - 1; 3047 ctrl->ctrl.kato = opts->kato; 3048 ctrl->ctrl.cntlid = 0xffff; 3049 3050 ret = -ENOMEM; 3051 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 3052 sizeof(struct nvme_fc_queue), GFP_KERNEL); 3053 if (!ctrl->queues) 3054 goto out_free_ida; 3055 3056 nvme_fc_init_queue(ctrl, 0); 3057 3058 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3059 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 3060 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 3061 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 3062 ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node; 3063 ctrl->admin_tag_set.cmd_size = 3064 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, 3065 ctrl->lport->ops->fcprqst_priv_sz); 3066 ctrl->admin_tag_set.driver_data = ctrl; 3067 ctrl->admin_tag_set.nr_hw_queues = 1; 3068 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 3069 ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; 3070 3071 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 3072 if (ret) 3073 goto out_free_queues; 3074 ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; 3075 3076 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 3077 if (IS_ERR(ctrl->ctrl.admin_q)) { 3078 ret = PTR_ERR(ctrl->ctrl.admin_q); 3079 goto out_free_admin_tag_set; 3080 } 3081 3082 /* 3083 * Would have been nice to init io queues tag set as well. 3084 * However, we require interaction from the controller 3085 * for max io queue count before we can do so. 3086 * Defer this to the connect path. 3087 */ 3088 3089 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 3090 if (ret) 3091 goto out_cleanup_admin_q; 3092 3093 /* at this point, teardown path changes to ref counting on nvme ctrl */ 3094 3095 spin_lock_irqsave(&rport->lock, flags); 3096 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 3097 spin_unlock_irqrestore(&rport->lock, flags); 3098 3099 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || 3100 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 3101 dev_err(ctrl->ctrl.device, 3102 "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); 3103 goto fail_ctrl; 3104 } 3105 3106 nvme_get_ctrl(&ctrl->ctrl); 3107 3108 if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { 3109 nvme_put_ctrl(&ctrl->ctrl); 3110 dev_err(ctrl->ctrl.device, 3111 "NVME-FC{%d}: failed to schedule initial connect\n", 3112 ctrl->cnum); 3113 goto fail_ctrl; 3114 } 3115 3116 flush_delayed_work(&ctrl->connect_work); 3117 3118 dev_info(ctrl->ctrl.device, 3119 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 3120 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 3121 3122 return &ctrl->ctrl; 3123 3124 fail_ctrl: 3125 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); 3126 cancel_work_sync(&ctrl->ctrl.reset_work); 3127 cancel_work_sync(&ctrl->err_work); 3128 cancel_delayed_work_sync(&ctrl->connect_work); 3129 3130 ctrl->ctrl.opts = NULL; 3131 3132 /* initiate nvme ctrl ref counting teardown */ 3133 nvme_uninit_ctrl(&ctrl->ctrl); 3134 3135 /* Remove core ctrl ref. */ 3136 nvme_put_ctrl(&ctrl->ctrl); 3137 3138 /* as we're past the point where we transition to the ref 3139 * counting teardown path, if we return a bad pointer here, 3140 * the calling routine, thinking it's prior to the 3141 * transition, will do an rport put. Since the teardown 3142 * path also does a rport put, we do an extra get here to 3143 * so proper order/teardown happens. 3144 */ 3145 nvme_fc_rport_get(rport); 3146 3147 return ERR_PTR(-EIO); 3148 3149 out_cleanup_admin_q: 3150 blk_cleanup_queue(ctrl->ctrl.admin_q); 3151 out_free_admin_tag_set: 3152 blk_mq_free_tag_set(&ctrl->admin_tag_set); 3153 out_free_queues: 3154 kfree(ctrl->queues); 3155 out_free_ida: 3156 put_device(ctrl->dev); 3157 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 3158 out_free_ctrl: 3159 kfree(ctrl); 3160 out_fail: 3161 /* exit via here doesn't follow ctlr ref points */ 3162 return ERR_PTR(ret); 3163 } 3164 3165 3166 struct nvmet_fc_traddr { 3167 u64 nn; 3168 u64 pn; 3169 }; 3170 3171 static int 3172 __nvme_fc_parse_u64(substring_t *sstr, u64 *val) 3173 { 3174 u64 token64; 3175 3176 if (match_u64(sstr, &token64)) 3177 return -EINVAL; 3178 *val = token64; 3179 3180 return 0; 3181 } 3182 3183 /* 3184 * This routine validates and extracts the WWN's from the TRADDR string. 3185 * As kernel parsers need the 0x to determine number base, universally 3186 * build string to parse with 0x prefix before parsing name strings. 3187 */ 3188 static int 3189 nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) 3190 { 3191 char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; 3192 substring_t wwn = { name, &name[sizeof(name)-1] }; 3193 int nnoffset, pnoffset; 3194 3195 /* validate if string is one of the 2 allowed formats */ 3196 if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && 3197 !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && 3198 !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], 3199 "pn-0x", NVME_FC_TRADDR_OXNNLEN)) { 3200 nnoffset = NVME_FC_TRADDR_OXNNLEN; 3201 pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + 3202 NVME_FC_TRADDR_OXNNLEN; 3203 } else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH && 3204 !strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) && 3205 !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], 3206 "pn-", NVME_FC_TRADDR_NNLEN))) { 3207 nnoffset = NVME_FC_TRADDR_NNLEN; 3208 pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; 3209 } else 3210 goto out_einval; 3211 3212 name[0] = '0'; 3213 name[1] = 'x'; 3214 name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; 3215 3216 memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); 3217 if (__nvme_fc_parse_u64(&wwn, &traddr->nn)) 3218 goto out_einval; 3219 3220 memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); 3221 if (__nvme_fc_parse_u64(&wwn, &traddr->pn)) 3222 goto out_einval; 3223 3224 return 0; 3225 3226 out_einval: 3227 pr_warn("%s: bad traddr string\n", __func__); 3228 return -EINVAL; 3229 } 3230 3231 static struct nvme_ctrl * 3232 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 3233 { 3234 struct nvme_fc_lport *lport; 3235 struct nvme_fc_rport *rport; 3236 struct nvme_ctrl *ctrl; 3237 struct nvmet_fc_traddr laddr = { 0L, 0L }; 3238 struct nvmet_fc_traddr raddr = { 0L, 0L }; 3239 unsigned long flags; 3240 int ret; 3241 3242 ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE); 3243 if (ret || !raddr.nn || !raddr.pn) 3244 return ERR_PTR(-EINVAL); 3245 3246 ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE); 3247 if (ret || !laddr.nn || !laddr.pn) 3248 return ERR_PTR(-EINVAL); 3249 3250 /* find the host and remote ports to connect together */ 3251 spin_lock_irqsave(&nvme_fc_lock, flags); 3252 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 3253 if (lport->localport.node_name != laddr.nn || 3254 lport->localport.port_name != laddr.pn) 3255 continue; 3256 3257 list_for_each_entry(rport, &lport->endp_list, endp_list) { 3258 if (rport->remoteport.node_name != raddr.nn || 3259 rport->remoteport.port_name != raddr.pn) 3260 continue; 3261 3262 /* if fail to get reference fall through. Will error */ 3263 if (!nvme_fc_rport_get(rport)) 3264 break; 3265 3266 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3267 3268 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 3269 if (IS_ERR(ctrl)) 3270 nvme_fc_rport_put(rport); 3271 return ctrl; 3272 } 3273 } 3274 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3275 3276 pr_warn("%s: %s - %s combination not found\n", 3277 __func__, opts->traddr, opts->host_traddr); 3278 return ERR_PTR(-ENOENT); 3279 } 3280 3281 3282 static struct nvmf_transport_ops nvme_fc_transport = { 3283 .name = "fc", 3284 .module = THIS_MODULE, 3285 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 3286 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 3287 .create_ctrl = nvme_fc_create_ctrl, 3288 }; 3289 3290 /* Arbitrary successive failures max. With lots of subsystems could be high */ 3291 #define DISCOVERY_MAX_FAIL 20 3292 3293 static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, 3294 struct device_attribute *attr, const char *buf, size_t count) 3295 { 3296 unsigned long flags; 3297 LIST_HEAD(local_disc_list); 3298 struct nvme_fc_lport *lport; 3299 struct nvme_fc_rport *rport; 3300 int failcnt = 0; 3301 3302 spin_lock_irqsave(&nvme_fc_lock, flags); 3303 restart: 3304 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 3305 list_for_each_entry(rport, &lport->endp_list, endp_list) { 3306 if (!nvme_fc_lport_get(lport)) 3307 continue; 3308 if (!nvme_fc_rport_get(rport)) { 3309 /* 3310 * This is a temporary condition. Upon restart 3311 * this rport will be gone from the list. 3312 * 3313 * Revert the lport put and retry. Anything 3314 * added to the list already will be skipped (as 3315 * they are no longer list_empty). Loops should 3316 * resume at rports that were not yet seen. 3317 */ 3318 nvme_fc_lport_put(lport); 3319 3320 if (failcnt++ < DISCOVERY_MAX_FAIL) 3321 goto restart; 3322 3323 pr_err("nvme_discovery: too many reference " 3324 "failures\n"); 3325 goto process_local_list; 3326 } 3327 if (list_empty(&rport->disc_list)) 3328 list_add_tail(&rport->disc_list, 3329 &local_disc_list); 3330 } 3331 } 3332 3333 process_local_list: 3334 while (!list_empty(&local_disc_list)) { 3335 rport = list_first_entry(&local_disc_list, 3336 struct nvme_fc_rport, disc_list); 3337 list_del_init(&rport->disc_list); 3338 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3339 3340 lport = rport->lport; 3341 /* signal discovery. Won't hurt if it repeats */ 3342 nvme_fc_signal_discovery_scan(lport, rport); 3343 nvme_fc_rport_put(rport); 3344 nvme_fc_lport_put(lport); 3345 3346 spin_lock_irqsave(&nvme_fc_lock, flags); 3347 } 3348 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3349 3350 return count; 3351 } 3352 static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); 3353 3354 static struct attribute *nvme_fc_attrs[] = { 3355 &dev_attr_nvme_discovery.attr, 3356 NULL 3357 }; 3358 3359 static struct attribute_group nvme_fc_attr_group = { 3360 .attrs = nvme_fc_attrs, 3361 }; 3362 3363 static const struct attribute_group *nvme_fc_attr_groups[] = { 3364 &nvme_fc_attr_group, 3365 NULL 3366 }; 3367 3368 static struct class fc_class = { 3369 .name = "fc", 3370 .dev_groups = nvme_fc_attr_groups, 3371 .owner = THIS_MODULE, 3372 }; 3373 3374 static int __init nvme_fc_init_module(void) 3375 { 3376 int ret; 3377 3378 /* 3379 * NOTE: 3380 * It is expected that in the future the kernel will combine 3381 * the FC-isms that are currently under scsi and now being 3382 * added to by NVME into a new standalone FC class. The SCSI 3383 * and NVME protocols and their devices would be under this 3384 * new FC class. 3385 * 3386 * As we need something to post FC-specific udev events to, 3387 * specifically for nvme probe events, start by creating the 3388 * new device class. When the new standalone FC class is 3389 * put in place, this code will move to a more generic 3390 * location for the class. 3391 */ 3392 ret = class_register(&fc_class); 3393 if (ret) { 3394 pr_err("couldn't register class fc\n"); 3395 return ret; 3396 } 3397 3398 /* 3399 * Create a device for the FC-centric udev events 3400 */ 3401 fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, 3402 "fc_udev_device"); 3403 if (IS_ERR(fc_udev_device)) { 3404 pr_err("couldn't create fc_udev device!\n"); 3405 ret = PTR_ERR(fc_udev_device); 3406 goto out_destroy_class; 3407 } 3408 3409 ret = nvmf_register_transport(&nvme_fc_transport); 3410 if (ret) 3411 goto out_destroy_device; 3412 3413 return 0; 3414 3415 out_destroy_device: 3416 device_destroy(&fc_class, MKDEV(0, 0)); 3417 out_destroy_class: 3418 class_unregister(&fc_class); 3419 return ret; 3420 } 3421 3422 static void __exit nvme_fc_exit_module(void) 3423 { 3424 /* sanity check - all lports should be removed */ 3425 if (!list_empty(&nvme_fc_lport_list)) 3426 pr_warn("%s: localport list not empty\n", __func__); 3427 3428 nvmf_unregister_transport(&nvme_fc_transport); 3429 3430 ida_destroy(&nvme_fc_local_port_cnt); 3431 ida_destroy(&nvme_fc_ctrl_cnt); 3432 3433 device_destroy(&fc_class, MKDEV(0, 0)); 3434 class_unregister(&fc_class); 3435 } 3436 3437 module_init(nvme_fc_init_module); 3438 module_exit(nvme_fc_exit_module); 3439 3440 MODULE_LICENSE("GPL v2"); 3441