1 /* 2 * Copyright (c) 2016 Avago Technologies. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of version 2 of the GNU General Public License as 6 * published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful. 9 * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND WARRANTIES, 10 * INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A 11 * PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE DISCLAIMED, EXCEPT TO 12 * THE EXTENT THAT SUCH DISCLAIMERS ARE HELD TO BE LEGALLY INVALID. 13 * See the GNU General Public License for more details, a copy of which 14 * can be found in the file COPYING included with this package 15 * 16 */ 17 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 18 #include <linux/module.h> 19 #include <linux/parser.h> 20 #include <uapi/scsi/fc/fc_fs.h> 21 #include <uapi/scsi/fc/fc_els.h> 22 #include <linux/delay.h> 23 #include <linux/overflow.h> 24 25 #include "nvme.h" 26 #include "fabrics.h" 27 #include <linux/nvme-fc-driver.h> 28 #include <linux/nvme-fc.h> 29 30 31 /* *************************** Data Structures/Defines ****************** */ 32 33 34 enum nvme_fc_queue_flags { 35 NVME_FC_Q_CONNECTED = 0, 36 NVME_FC_Q_LIVE, 37 }; 38 39 #define NVME_FC_DEFAULT_DEV_LOSS_TMO 60 /* seconds */ 40 41 struct nvme_fc_queue { 42 struct nvme_fc_ctrl *ctrl; 43 struct device *dev; 44 struct blk_mq_hw_ctx *hctx; 45 void *lldd_handle; 46 size_t cmnd_capsule_len; 47 u32 qnum; 48 u32 rqcnt; 49 u32 seqno; 50 51 u64 connection_id; 52 atomic_t csn; 53 54 unsigned long flags; 55 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 56 57 enum nvme_fcop_flags { 58 FCOP_FLAGS_TERMIO = (1 << 0), 59 FCOP_FLAGS_AEN = (1 << 1), 60 }; 61 62 struct nvmefc_ls_req_op { 63 struct nvmefc_ls_req ls_req; 64 65 struct nvme_fc_rport *rport; 66 struct nvme_fc_queue *queue; 67 struct request *rq; 68 u32 flags; 69 70 int ls_error; 71 struct completion ls_done; 72 struct list_head lsreq_list; /* rport->ls_req_list */ 73 bool req_queued; 74 }; 75 76 enum nvme_fcpop_state { 77 FCPOP_STATE_UNINIT = 0, 78 FCPOP_STATE_IDLE = 1, 79 FCPOP_STATE_ACTIVE = 2, 80 FCPOP_STATE_ABORTED = 3, 81 FCPOP_STATE_COMPLETE = 4, 82 }; 83 84 struct nvme_fc_fcp_op { 85 struct nvme_request nreq; /* 86 * nvme/host/core.c 87 * requires this to be 88 * the 1st element in the 89 * private structure 90 * associated with the 91 * request. 92 */ 93 struct nvmefc_fcp_req fcp_req; 94 95 struct nvme_fc_ctrl *ctrl; 96 struct nvme_fc_queue *queue; 97 struct request *rq; 98 99 atomic_t state; 100 u32 flags; 101 u32 rqno; 102 u32 nents; 103 104 struct nvme_fc_cmd_iu cmd_iu; 105 struct nvme_fc_ersp_iu rsp_iu; 106 }; 107 108 struct nvme_fcp_op_w_sgl { 109 struct nvme_fc_fcp_op op; 110 struct scatterlist sgl[SG_CHUNK_SIZE]; 111 uint8_t priv[0]; 112 }; 113 114 struct nvme_fc_lport { 115 struct nvme_fc_local_port localport; 116 117 struct ida endp_cnt; 118 struct list_head port_list; /* nvme_fc_port_list */ 119 struct list_head endp_list; 120 struct device *dev; /* physical device for dma */ 121 struct nvme_fc_port_template *ops; 122 struct kref ref; 123 atomic_t act_rport_cnt; 124 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 125 126 struct nvme_fc_rport { 127 struct nvme_fc_remote_port remoteport; 128 129 struct list_head endp_list; /* for lport->endp_list */ 130 struct list_head ctrl_list; 131 struct list_head ls_req_list; 132 struct list_head disc_list; 133 struct device *dev; /* physical device for dma */ 134 struct nvme_fc_lport *lport; 135 spinlock_t lock; 136 struct kref ref; 137 atomic_t act_ctrl_cnt; 138 unsigned long dev_loss_end; 139 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 140 141 enum nvme_fcctrl_flags { 142 FCCTRL_TERMIO = (1 << 0), 143 }; 144 145 struct nvme_fc_ctrl { 146 spinlock_t lock; 147 struct nvme_fc_queue *queues; 148 struct device *dev; 149 struct nvme_fc_lport *lport; 150 struct nvme_fc_rport *rport; 151 u32 cnum; 152 153 bool ioq_live; 154 bool assoc_active; 155 atomic_t err_work_active; 156 u64 association_id; 157 158 struct list_head ctrl_list; /* rport->ctrl_list */ 159 160 struct blk_mq_tag_set admin_tag_set; 161 struct blk_mq_tag_set tag_set; 162 163 struct delayed_work connect_work; 164 struct work_struct err_work; 165 166 struct kref ref; 167 u32 flags; 168 u32 iocnt; 169 wait_queue_head_t ioabort_wait; 170 171 struct nvme_fc_fcp_op aen_ops[NVME_NR_AEN_COMMANDS]; 172 173 struct nvme_ctrl ctrl; 174 }; 175 176 static inline struct nvme_fc_ctrl * 177 to_fc_ctrl(struct nvme_ctrl *ctrl) 178 { 179 return container_of(ctrl, struct nvme_fc_ctrl, ctrl); 180 } 181 182 static inline struct nvme_fc_lport * 183 localport_to_lport(struct nvme_fc_local_port *portptr) 184 { 185 return container_of(portptr, struct nvme_fc_lport, localport); 186 } 187 188 static inline struct nvme_fc_rport * 189 remoteport_to_rport(struct nvme_fc_remote_port *portptr) 190 { 191 return container_of(portptr, struct nvme_fc_rport, remoteport); 192 } 193 194 static inline struct nvmefc_ls_req_op * 195 ls_req_to_lsop(struct nvmefc_ls_req *lsreq) 196 { 197 return container_of(lsreq, struct nvmefc_ls_req_op, ls_req); 198 } 199 200 static inline struct nvme_fc_fcp_op * 201 fcp_req_to_fcp_op(struct nvmefc_fcp_req *fcpreq) 202 { 203 return container_of(fcpreq, struct nvme_fc_fcp_op, fcp_req); 204 } 205 206 207 208 /* *************************** Globals **************************** */ 209 210 211 static DEFINE_SPINLOCK(nvme_fc_lock); 212 213 static LIST_HEAD(nvme_fc_lport_list); 214 static DEFINE_IDA(nvme_fc_local_port_cnt); 215 static DEFINE_IDA(nvme_fc_ctrl_cnt); 216 217 218 219 /* 220 * These items are short-term. They will eventually be moved into 221 * a generic FC class. See comments in module init. 222 */ 223 static struct device *fc_udev_device; 224 225 226 /* *********************** FC-NVME Port Management ************************ */ 227 228 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 229 struct nvme_fc_queue *, unsigned int); 230 231 static void 232 nvme_fc_free_lport(struct kref *ref) 233 { 234 struct nvme_fc_lport *lport = 235 container_of(ref, struct nvme_fc_lport, ref); 236 unsigned long flags; 237 238 WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); 239 WARN_ON(!list_empty(&lport->endp_list)); 240 241 /* remove from transport list */ 242 spin_lock_irqsave(&nvme_fc_lock, flags); 243 list_del(&lport->port_list); 244 spin_unlock_irqrestore(&nvme_fc_lock, flags); 245 246 ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); 247 ida_destroy(&lport->endp_cnt); 248 249 put_device(lport->dev); 250 251 kfree(lport); 252 } 253 254 static void 255 nvme_fc_lport_put(struct nvme_fc_lport *lport) 256 { 257 kref_put(&lport->ref, nvme_fc_free_lport); 258 } 259 260 static int 261 nvme_fc_lport_get(struct nvme_fc_lport *lport) 262 { 263 return kref_get_unless_zero(&lport->ref); 264 } 265 266 267 static struct nvme_fc_lport * 268 nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo, 269 struct nvme_fc_port_template *ops, 270 struct device *dev) 271 { 272 struct nvme_fc_lport *lport; 273 unsigned long flags; 274 275 spin_lock_irqsave(&nvme_fc_lock, flags); 276 277 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 278 if (lport->localport.node_name != pinfo->node_name || 279 lport->localport.port_name != pinfo->port_name) 280 continue; 281 282 if (lport->dev != dev) { 283 lport = ERR_PTR(-EXDEV); 284 goto out_done; 285 } 286 287 if (lport->localport.port_state != FC_OBJSTATE_DELETED) { 288 lport = ERR_PTR(-EEXIST); 289 goto out_done; 290 } 291 292 if (!nvme_fc_lport_get(lport)) { 293 /* 294 * fails if ref cnt already 0. If so, 295 * act as if lport already deleted 296 */ 297 lport = NULL; 298 goto out_done; 299 } 300 301 /* resume the lport */ 302 303 lport->ops = ops; 304 lport->localport.port_role = pinfo->port_role; 305 lport->localport.port_id = pinfo->port_id; 306 lport->localport.port_state = FC_OBJSTATE_ONLINE; 307 308 spin_unlock_irqrestore(&nvme_fc_lock, flags); 309 310 return lport; 311 } 312 313 lport = NULL; 314 315 out_done: 316 spin_unlock_irqrestore(&nvme_fc_lock, flags); 317 318 return lport; 319 } 320 321 /** 322 * nvme_fc_register_localport - transport entry point called by an 323 * LLDD to register the existence of a NVME 324 * host FC port. 325 * @pinfo: pointer to information about the port to be registered 326 * @template: LLDD entrypoints and operational parameters for the port 327 * @dev: physical hardware device node port corresponds to. Will be 328 * used for DMA mappings 329 * @portptr: pointer to a local port pointer. Upon success, the routine 330 * will allocate a nvme_fc_local_port structure and place its 331 * address in the local port pointer. Upon failure, local port 332 * pointer will be set to 0. 333 * 334 * Returns: 335 * a completion status. Must be 0 upon success; a negative errno 336 * (ex: -ENXIO) upon failure. 337 */ 338 int 339 nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, 340 struct nvme_fc_port_template *template, 341 struct device *dev, 342 struct nvme_fc_local_port **portptr) 343 { 344 struct nvme_fc_lport *newrec; 345 unsigned long flags; 346 int ret, idx; 347 348 if (!template->localport_delete || !template->remoteport_delete || 349 !template->ls_req || !template->fcp_io || 350 !template->ls_abort || !template->fcp_abort || 351 !template->max_hw_queues || !template->max_sgl_segments || 352 !template->max_dif_sgl_segments || !template->dma_boundary) { 353 ret = -EINVAL; 354 goto out_reghost_failed; 355 } 356 357 /* 358 * look to see if there is already a localport that had been 359 * deregistered and in the process of waiting for all the 360 * references to fully be removed. If the references haven't 361 * expired, we can simply re-enable the localport. Remoteports 362 * and controller reconnections should resume naturally. 363 */ 364 newrec = nvme_fc_attach_to_unreg_lport(pinfo, template, dev); 365 366 /* found an lport, but something about its state is bad */ 367 if (IS_ERR(newrec)) { 368 ret = PTR_ERR(newrec); 369 goto out_reghost_failed; 370 371 /* found existing lport, which was resumed */ 372 } else if (newrec) { 373 *portptr = &newrec->localport; 374 return 0; 375 } 376 377 /* nothing found - allocate a new localport struct */ 378 379 newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), 380 GFP_KERNEL); 381 if (!newrec) { 382 ret = -ENOMEM; 383 goto out_reghost_failed; 384 } 385 386 idx = ida_simple_get(&nvme_fc_local_port_cnt, 0, 0, GFP_KERNEL); 387 if (idx < 0) { 388 ret = -ENOSPC; 389 goto out_fail_kfree; 390 } 391 392 if (!get_device(dev) && dev) { 393 ret = -ENODEV; 394 goto out_ida_put; 395 } 396 397 INIT_LIST_HEAD(&newrec->port_list); 398 INIT_LIST_HEAD(&newrec->endp_list); 399 kref_init(&newrec->ref); 400 atomic_set(&newrec->act_rport_cnt, 0); 401 newrec->ops = template; 402 newrec->dev = dev; 403 ida_init(&newrec->endp_cnt); 404 newrec->localport.private = &newrec[1]; 405 newrec->localport.node_name = pinfo->node_name; 406 newrec->localport.port_name = pinfo->port_name; 407 newrec->localport.port_role = pinfo->port_role; 408 newrec->localport.port_id = pinfo->port_id; 409 newrec->localport.port_state = FC_OBJSTATE_ONLINE; 410 newrec->localport.port_num = idx; 411 412 spin_lock_irqsave(&nvme_fc_lock, flags); 413 list_add_tail(&newrec->port_list, &nvme_fc_lport_list); 414 spin_unlock_irqrestore(&nvme_fc_lock, flags); 415 416 if (dev) 417 dma_set_seg_boundary(dev, template->dma_boundary); 418 419 *portptr = &newrec->localport; 420 return 0; 421 422 out_ida_put: 423 ida_simple_remove(&nvme_fc_local_port_cnt, idx); 424 out_fail_kfree: 425 kfree(newrec); 426 out_reghost_failed: 427 *portptr = NULL; 428 429 return ret; 430 } 431 EXPORT_SYMBOL_GPL(nvme_fc_register_localport); 432 433 /** 434 * nvme_fc_unregister_localport - transport entry point called by an 435 * LLDD to deregister/remove a previously 436 * registered a NVME host FC port. 437 * @portptr: pointer to the (registered) local port that is to be deregistered. 438 * 439 * Returns: 440 * a completion status. Must be 0 upon success; a negative errno 441 * (ex: -ENXIO) upon failure. 442 */ 443 int 444 nvme_fc_unregister_localport(struct nvme_fc_local_port *portptr) 445 { 446 struct nvme_fc_lport *lport = localport_to_lport(portptr); 447 unsigned long flags; 448 449 if (!portptr) 450 return -EINVAL; 451 452 spin_lock_irqsave(&nvme_fc_lock, flags); 453 454 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 455 spin_unlock_irqrestore(&nvme_fc_lock, flags); 456 return -EINVAL; 457 } 458 portptr->port_state = FC_OBJSTATE_DELETED; 459 460 spin_unlock_irqrestore(&nvme_fc_lock, flags); 461 462 if (atomic_read(&lport->act_rport_cnt) == 0) 463 lport->ops->localport_delete(&lport->localport); 464 465 nvme_fc_lport_put(lport); 466 467 return 0; 468 } 469 EXPORT_SYMBOL_GPL(nvme_fc_unregister_localport); 470 471 /* 472 * TRADDR strings, per FC-NVME are fixed format: 473 * "nn-0x<16hexdigits>:pn-0x<16hexdigits>" - 43 characters 474 * udev event will only differ by prefix of what field is 475 * being specified: 476 * "NVMEFC_HOST_TRADDR=" or "NVMEFC_TRADDR=" - 19 max characters 477 * 19 + 43 + null_fudge = 64 characters 478 */ 479 #define FCNVME_TRADDR_LENGTH 64 480 481 static void 482 nvme_fc_signal_discovery_scan(struct nvme_fc_lport *lport, 483 struct nvme_fc_rport *rport) 484 { 485 char hostaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_HOST_TRADDR=...*/ 486 char tgtaddr[FCNVME_TRADDR_LENGTH]; /* NVMEFC_TRADDR=...*/ 487 char *envp[4] = { "FC_EVENT=nvmediscovery", hostaddr, tgtaddr, NULL }; 488 489 if (!(rport->remoteport.port_role & FC_PORT_ROLE_NVME_DISCOVERY)) 490 return; 491 492 snprintf(hostaddr, sizeof(hostaddr), 493 "NVMEFC_HOST_TRADDR=nn-0x%016llx:pn-0x%016llx", 494 lport->localport.node_name, lport->localport.port_name); 495 snprintf(tgtaddr, sizeof(tgtaddr), 496 "NVMEFC_TRADDR=nn-0x%016llx:pn-0x%016llx", 497 rport->remoteport.node_name, rport->remoteport.port_name); 498 kobject_uevent_env(&fc_udev_device->kobj, KOBJ_CHANGE, envp); 499 } 500 501 static void 502 nvme_fc_free_rport(struct kref *ref) 503 { 504 struct nvme_fc_rport *rport = 505 container_of(ref, struct nvme_fc_rport, ref); 506 struct nvme_fc_lport *lport = 507 localport_to_lport(rport->remoteport.localport); 508 unsigned long flags; 509 510 WARN_ON(rport->remoteport.port_state != FC_OBJSTATE_DELETED); 511 WARN_ON(!list_empty(&rport->ctrl_list)); 512 513 /* remove from lport list */ 514 spin_lock_irqsave(&nvme_fc_lock, flags); 515 list_del(&rport->endp_list); 516 spin_unlock_irqrestore(&nvme_fc_lock, flags); 517 518 WARN_ON(!list_empty(&rport->disc_list)); 519 ida_simple_remove(&lport->endp_cnt, rport->remoteport.port_num); 520 521 kfree(rport); 522 523 nvme_fc_lport_put(lport); 524 } 525 526 static void 527 nvme_fc_rport_put(struct nvme_fc_rport *rport) 528 { 529 kref_put(&rport->ref, nvme_fc_free_rport); 530 } 531 532 static int 533 nvme_fc_rport_get(struct nvme_fc_rport *rport) 534 { 535 return kref_get_unless_zero(&rport->ref); 536 } 537 538 static void 539 nvme_fc_resume_controller(struct nvme_fc_ctrl *ctrl) 540 { 541 switch (ctrl->ctrl.state) { 542 case NVME_CTRL_NEW: 543 case NVME_CTRL_CONNECTING: 544 /* 545 * As all reconnects were suppressed, schedule a 546 * connect. 547 */ 548 dev_info(ctrl->ctrl.device, 549 "NVME-FC{%d}: connectivity re-established. " 550 "Attempting reconnect\n", ctrl->cnum); 551 552 queue_delayed_work(nvme_wq, &ctrl->connect_work, 0); 553 break; 554 555 case NVME_CTRL_RESETTING: 556 /* 557 * Controller is already in the process of terminating the 558 * association. No need to do anything further. The reconnect 559 * step will naturally occur after the reset completes. 560 */ 561 break; 562 563 default: 564 /* no action to take - let it delete */ 565 break; 566 } 567 } 568 569 static struct nvme_fc_rport * 570 nvme_fc_attach_to_suspended_rport(struct nvme_fc_lport *lport, 571 struct nvme_fc_port_info *pinfo) 572 { 573 struct nvme_fc_rport *rport; 574 struct nvme_fc_ctrl *ctrl; 575 unsigned long flags; 576 577 spin_lock_irqsave(&nvme_fc_lock, flags); 578 579 list_for_each_entry(rport, &lport->endp_list, endp_list) { 580 if (rport->remoteport.node_name != pinfo->node_name || 581 rport->remoteport.port_name != pinfo->port_name) 582 continue; 583 584 if (!nvme_fc_rport_get(rport)) { 585 rport = ERR_PTR(-ENOLCK); 586 goto out_done; 587 } 588 589 spin_unlock_irqrestore(&nvme_fc_lock, flags); 590 591 spin_lock_irqsave(&rport->lock, flags); 592 593 /* has it been unregistered */ 594 if (rport->remoteport.port_state != FC_OBJSTATE_DELETED) { 595 /* means lldd called us twice */ 596 spin_unlock_irqrestore(&rport->lock, flags); 597 nvme_fc_rport_put(rport); 598 return ERR_PTR(-ESTALE); 599 } 600 601 rport->remoteport.port_role = pinfo->port_role; 602 rport->remoteport.port_id = pinfo->port_id; 603 rport->remoteport.port_state = FC_OBJSTATE_ONLINE; 604 rport->dev_loss_end = 0; 605 606 /* 607 * kick off a reconnect attempt on all associations to the 608 * remote port. A successful reconnects will resume i/o. 609 */ 610 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) 611 nvme_fc_resume_controller(ctrl); 612 613 spin_unlock_irqrestore(&rport->lock, flags); 614 615 return rport; 616 } 617 618 rport = NULL; 619 620 out_done: 621 spin_unlock_irqrestore(&nvme_fc_lock, flags); 622 623 return rport; 624 } 625 626 static inline void 627 __nvme_fc_set_dev_loss_tmo(struct nvme_fc_rport *rport, 628 struct nvme_fc_port_info *pinfo) 629 { 630 if (pinfo->dev_loss_tmo) 631 rport->remoteport.dev_loss_tmo = pinfo->dev_loss_tmo; 632 else 633 rport->remoteport.dev_loss_tmo = NVME_FC_DEFAULT_DEV_LOSS_TMO; 634 } 635 636 /** 637 * nvme_fc_register_remoteport - transport entry point called by an 638 * LLDD to register the existence of a NVME 639 * subsystem FC port on its fabric. 640 * @localport: pointer to the (registered) local port that the remote 641 * subsystem port is connected to. 642 * @pinfo: pointer to information about the port to be registered 643 * @portptr: pointer to a remote port pointer. Upon success, the routine 644 * will allocate a nvme_fc_remote_port structure and place its 645 * address in the remote port pointer. Upon failure, remote port 646 * pointer will be set to 0. 647 * 648 * Returns: 649 * a completion status. Must be 0 upon success; a negative errno 650 * (ex: -ENXIO) upon failure. 651 */ 652 int 653 nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, 654 struct nvme_fc_port_info *pinfo, 655 struct nvme_fc_remote_port **portptr) 656 { 657 struct nvme_fc_lport *lport = localport_to_lport(localport); 658 struct nvme_fc_rport *newrec; 659 unsigned long flags; 660 int ret, idx; 661 662 if (!nvme_fc_lport_get(lport)) { 663 ret = -ESHUTDOWN; 664 goto out_reghost_failed; 665 } 666 667 /* 668 * look to see if there is already a remoteport that is waiting 669 * for a reconnect (within dev_loss_tmo) with the same WWN's. 670 * If so, transition to it and reconnect. 671 */ 672 newrec = nvme_fc_attach_to_suspended_rport(lport, pinfo); 673 674 /* found an rport, but something about its state is bad */ 675 if (IS_ERR(newrec)) { 676 ret = PTR_ERR(newrec); 677 goto out_lport_put; 678 679 /* found existing rport, which was resumed */ 680 } else if (newrec) { 681 nvme_fc_lport_put(lport); 682 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 683 nvme_fc_signal_discovery_scan(lport, newrec); 684 *portptr = &newrec->remoteport; 685 return 0; 686 } 687 688 /* nothing found - allocate a new remoteport struct */ 689 690 newrec = kmalloc((sizeof(*newrec) + lport->ops->remote_priv_sz), 691 GFP_KERNEL); 692 if (!newrec) { 693 ret = -ENOMEM; 694 goto out_lport_put; 695 } 696 697 idx = ida_simple_get(&lport->endp_cnt, 0, 0, GFP_KERNEL); 698 if (idx < 0) { 699 ret = -ENOSPC; 700 goto out_kfree_rport; 701 } 702 703 INIT_LIST_HEAD(&newrec->endp_list); 704 INIT_LIST_HEAD(&newrec->ctrl_list); 705 INIT_LIST_HEAD(&newrec->ls_req_list); 706 INIT_LIST_HEAD(&newrec->disc_list); 707 kref_init(&newrec->ref); 708 atomic_set(&newrec->act_ctrl_cnt, 0); 709 spin_lock_init(&newrec->lock); 710 newrec->remoteport.localport = &lport->localport; 711 newrec->dev = lport->dev; 712 newrec->lport = lport; 713 newrec->remoteport.private = &newrec[1]; 714 newrec->remoteport.port_role = pinfo->port_role; 715 newrec->remoteport.node_name = pinfo->node_name; 716 newrec->remoteport.port_name = pinfo->port_name; 717 newrec->remoteport.port_id = pinfo->port_id; 718 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 719 newrec->remoteport.port_num = idx; 720 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 721 722 spin_lock_irqsave(&nvme_fc_lock, flags); 723 list_add_tail(&newrec->endp_list, &lport->endp_list); 724 spin_unlock_irqrestore(&nvme_fc_lock, flags); 725 726 nvme_fc_signal_discovery_scan(lport, newrec); 727 728 *portptr = &newrec->remoteport; 729 return 0; 730 731 out_kfree_rport: 732 kfree(newrec); 733 out_lport_put: 734 nvme_fc_lport_put(lport); 735 out_reghost_failed: 736 *portptr = NULL; 737 return ret; 738 } 739 EXPORT_SYMBOL_GPL(nvme_fc_register_remoteport); 740 741 static int 742 nvme_fc_abort_lsops(struct nvme_fc_rport *rport) 743 { 744 struct nvmefc_ls_req_op *lsop; 745 unsigned long flags; 746 747 restart: 748 spin_lock_irqsave(&rport->lock, flags); 749 750 list_for_each_entry(lsop, &rport->ls_req_list, lsreq_list) { 751 if (!(lsop->flags & FCOP_FLAGS_TERMIO)) { 752 lsop->flags |= FCOP_FLAGS_TERMIO; 753 spin_unlock_irqrestore(&rport->lock, flags); 754 rport->lport->ops->ls_abort(&rport->lport->localport, 755 &rport->remoteport, 756 &lsop->ls_req); 757 goto restart; 758 } 759 } 760 spin_unlock_irqrestore(&rport->lock, flags); 761 762 return 0; 763 } 764 765 static void 766 nvme_fc_ctrl_connectivity_loss(struct nvme_fc_ctrl *ctrl) 767 { 768 dev_info(ctrl->ctrl.device, 769 "NVME-FC{%d}: controller connectivity lost. Awaiting " 770 "Reconnect", ctrl->cnum); 771 772 switch (ctrl->ctrl.state) { 773 case NVME_CTRL_NEW: 774 case NVME_CTRL_LIVE: 775 /* 776 * Schedule a controller reset. The reset will terminate the 777 * association and schedule the reconnect timer. Reconnects 778 * will be attempted until either the ctlr_loss_tmo 779 * (max_retries * connect_delay) expires or the remoteport's 780 * dev_loss_tmo expires. 781 */ 782 if (nvme_reset_ctrl(&ctrl->ctrl)) { 783 dev_warn(ctrl->ctrl.device, 784 "NVME-FC{%d}: Couldn't schedule reset.\n", 785 ctrl->cnum); 786 nvme_delete_ctrl(&ctrl->ctrl); 787 } 788 break; 789 790 case NVME_CTRL_CONNECTING: 791 /* 792 * The association has already been terminated and the 793 * controller is attempting reconnects. No need to do anything 794 * futher. Reconnects will be attempted until either the 795 * ctlr_loss_tmo (max_retries * connect_delay) expires or the 796 * remoteport's dev_loss_tmo expires. 797 */ 798 break; 799 800 case NVME_CTRL_RESETTING: 801 /* 802 * Controller is already in the process of terminating the 803 * association. No need to do anything further. The reconnect 804 * step will kick in naturally after the association is 805 * terminated. 806 */ 807 break; 808 809 case NVME_CTRL_DELETING: 810 default: 811 /* no action to take - let it delete */ 812 break; 813 } 814 } 815 816 /** 817 * nvme_fc_unregister_remoteport - transport entry point called by an 818 * LLDD to deregister/remove a previously 819 * registered a NVME subsystem FC port. 820 * @portptr: pointer to the (registered) remote port that is to be 821 * deregistered. 822 * 823 * Returns: 824 * a completion status. Must be 0 upon success; a negative errno 825 * (ex: -ENXIO) upon failure. 826 */ 827 int 828 nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *portptr) 829 { 830 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 831 struct nvme_fc_ctrl *ctrl; 832 unsigned long flags; 833 834 if (!portptr) 835 return -EINVAL; 836 837 spin_lock_irqsave(&rport->lock, flags); 838 839 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 840 spin_unlock_irqrestore(&rport->lock, flags); 841 return -EINVAL; 842 } 843 portptr->port_state = FC_OBJSTATE_DELETED; 844 845 rport->dev_loss_end = jiffies + (portptr->dev_loss_tmo * HZ); 846 847 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 848 /* if dev_loss_tmo==0, dev loss is immediate */ 849 if (!portptr->dev_loss_tmo) { 850 dev_warn(ctrl->ctrl.device, 851 "NVME-FC{%d}: controller connectivity lost.\n", 852 ctrl->cnum); 853 nvme_delete_ctrl(&ctrl->ctrl); 854 } else 855 nvme_fc_ctrl_connectivity_loss(ctrl); 856 } 857 858 spin_unlock_irqrestore(&rport->lock, flags); 859 860 nvme_fc_abort_lsops(rport); 861 862 if (atomic_read(&rport->act_ctrl_cnt) == 0) 863 rport->lport->ops->remoteport_delete(portptr); 864 865 /* 866 * release the reference, which will allow, if all controllers 867 * go away, which should only occur after dev_loss_tmo occurs, 868 * for the rport to be torn down. 869 */ 870 nvme_fc_rport_put(rport); 871 872 return 0; 873 } 874 EXPORT_SYMBOL_GPL(nvme_fc_unregister_remoteport); 875 876 /** 877 * nvme_fc_rescan_remoteport - transport entry point called by an 878 * LLDD to request a nvme device rescan. 879 * @remoteport: pointer to the (registered) remote port that is to be 880 * rescanned. 881 * 882 * Returns: N/A 883 */ 884 void 885 nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport) 886 { 887 struct nvme_fc_rport *rport = remoteport_to_rport(remoteport); 888 889 nvme_fc_signal_discovery_scan(rport->lport, rport); 890 } 891 EXPORT_SYMBOL_GPL(nvme_fc_rescan_remoteport); 892 893 int 894 nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *portptr, 895 u32 dev_loss_tmo) 896 { 897 struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 898 unsigned long flags; 899 900 spin_lock_irqsave(&rport->lock, flags); 901 902 if (portptr->port_state != FC_OBJSTATE_ONLINE) { 903 spin_unlock_irqrestore(&rport->lock, flags); 904 return -EINVAL; 905 } 906 907 /* a dev_loss_tmo of 0 (immediate) is allowed to be set */ 908 rport->remoteport.dev_loss_tmo = dev_loss_tmo; 909 910 spin_unlock_irqrestore(&rport->lock, flags); 911 912 return 0; 913 } 914 EXPORT_SYMBOL_GPL(nvme_fc_set_remoteport_devloss); 915 916 917 /* *********************** FC-NVME DMA Handling **************************** */ 918 919 /* 920 * The fcloop device passes in a NULL device pointer. Real LLD's will 921 * pass in a valid device pointer. If NULL is passed to the dma mapping 922 * routines, depending on the platform, it may or may not succeed, and 923 * may crash. 924 * 925 * As such: 926 * Wrapper all the dma routines and check the dev pointer. 927 * 928 * If simple mappings (return just a dma address, we'll noop them, 929 * returning a dma address of 0. 930 * 931 * On more complex mappings (dma_map_sg), a pseudo routine fills 932 * in the scatter list, setting all dma addresses to 0. 933 */ 934 935 static inline dma_addr_t 936 fc_dma_map_single(struct device *dev, void *ptr, size_t size, 937 enum dma_data_direction dir) 938 { 939 return dev ? dma_map_single(dev, ptr, size, dir) : (dma_addr_t)0L; 940 } 941 942 static inline int 943 fc_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) 944 { 945 return dev ? dma_mapping_error(dev, dma_addr) : 0; 946 } 947 948 static inline void 949 fc_dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, 950 enum dma_data_direction dir) 951 { 952 if (dev) 953 dma_unmap_single(dev, addr, size, dir); 954 } 955 956 static inline void 957 fc_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, 958 enum dma_data_direction dir) 959 { 960 if (dev) 961 dma_sync_single_for_cpu(dev, addr, size, dir); 962 } 963 964 static inline void 965 fc_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, 966 enum dma_data_direction dir) 967 { 968 if (dev) 969 dma_sync_single_for_device(dev, addr, size, dir); 970 } 971 972 /* pseudo dma_map_sg call */ 973 static int 974 fc_map_sg(struct scatterlist *sg, int nents) 975 { 976 struct scatterlist *s; 977 int i; 978 979 WARN_ON(nents == 0 || sg[0].length == 0); 980 981 for_each_sg(sg, s, nents, i) { 982 s->dma_address = 0L; 983 #ifdef CONFIG_NEED_SG_DMA_LENGTH 984 s->dma_length = s->length; 985 #endif 986 } 987 return nents; 988 } 989 990 static inline int 991 fc_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, 992 enum dma_data_direction dir) 993 { 994 return dev ? dma_map_sg(dev, sg, nents, dir) : fc_map_sg(sg, nents); 995 } 996 997 static inline void 998 fc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, 999 enum dma_data_direction dir) 1000 { 1001 if (dev) 1002 dma_unmap_sg(dev, sg, nents, dir); 1003 } 1004 1005 /* *********************** FC-NVME LS Handling **************************** */ 1006 1007 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 1008 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 1009 1010 1011 static void 1012 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) 1013 { 1014 struct nvme_fc_rport *rport = lsop->rport; 1015 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1016 unsigned long flags; 1017 1018 spin_lock_irqsave(&rport->lock, flags); 1019 1020 if (!lsop->req_queued) { 1021 spin_unlock_irqrestore(&rport->lock, flags); 1022 return; 1023 } 1024 1025 list_del(&lsop->lsreq_list); 1026 1027 lsop->req_queued = false; 1028 1029 spin_unlock_irqrestore(&rport->lock, flags); 1030 1031 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 1032 (lsreq->rqstlen + lsreq->rsplen), 1033 DMA_BIDIRECTIONAL); 1034 1035 nvme_fc_rport_put(rport); 1036 } 1037 1038 static int 1039 __nvme_fc_send_ls_req(struct nvme_fc_rport *rport, 1040 struct nvmefc_ls_req_op *lsop, 1041 void (*done)(struct nvmefc_ls_req *req, int status)) 1042 { 1043 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1044 unsigned long flags; 1045 int ret = 0; 1046 1047 if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 1048 return -ECONNREFUSED; 1049 1050 if (!nvme_fc_rport_get(rport)) 1051 return -ESHUTDOWN; 1052 1053 lsreq->done = done; 1054 lsop->rport = rport; 1055 lsop->req_queued = false; 1056 INIT_LIST_HEAD(&lsop->lsreq_list); 1057 init_completion(&lsop->ls_done); 1058 1059 lsreq->rqstdma = fc_dma_map_single(rport->dev, lsreq->rqstaddr, 1060 lsreq->rqstlen + lsreq->rsplen, 1061 DMA_BIDIRECTIONAL); 1062 if (fc_dma_mapping_error(rport->dev, lsreq->rqstdma)) { 1063 ret = -EFAULT; 1064 goto out_putrport; 1065 } 1066 lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 1067 1068 spin_lock_irqsave(&rport->lock, flags); 1069 1070 list_add_tail(&lsop->lsreq_list, &rport->ls_req_list); 1071 1072 lsop->req_queued = true; 1073 1074 spin_unlock_irqrestore(&rport->lock, flags); 1075 1076 ret = rport->lport->ops->ls_req(&rport->lport->localport, 1077 &rport->remoteport, lsreq); 1078 if (ret) 1079 goto out_unlink; 1080 1081 return 0; 1082 1083 out_unlink: 1084 lsop->ls_error = ret; 1085 spin_lock_irqsave(&rport->lock, flags); 1086 lsop->req_queued = false; 1087 list_del(&lsop->lsreq_list); 1088 spin_unlock_irqrestore(&rport->lock, flags); 1089 fc_dma_unmap_single(rport->dev, lsreq->rqstdma, 1090 (lsreq->rqstlen + lsreq->rsplen), 1091 DMA_BIDIRECTIONAL); 1092 out_putrport: 1093 nvme_fc_rport_put(rport); 1094 1095 return ret; 1096 } 1097 1098 static void 1099 nvme_fc_send_ls_req_done(struct nvmefc_ls_req *lsreq, int status) 1100 { 1101 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1102 1103 lsop->ls_error = status; 1104 complete(&lsop->ls_done); 1105 } 1106 1107 static int 1108 nvme_fc_send_ls_req(struct nvme_fc_rport *rport, struct nvmefc_ls_req_op *lsop) 1109 { 1110 struct nvmefc_ls_req *lsreq = &lsop->ls_req; 1111 struct fcnvme_ls_rjt *rjt = lsreq->rspaddr; 1112 int ret; 1113 1114 ret = __nvme_fc_send_ls_req(rport, lsop, nvme_fc_send_ls_req_done); 1115 1116 if (!ret) { 1117 /* 1118 * No timeout/not interruptible as we need the struct 1119 * to exist until the lldd calls us back. Thus mandate 1120 * wait until driver calls back. lldd responsible for 1121 * the timeout action 1122 */ 1123 wait_for_completion(&lsop->ls_done); 1124 1125 __nvme_fc_finish_ls_req(lsop); 1126 1127 ret = lsop->ls_error; 1128 } 1129 1130 if (ret) 1131 return ret; 1132 1133 /* ACC or RJT payload ? */ 1134 if (rjt->w0.ls_cmd == FCNVME_LS_RJT) 1135 return -ENXIO; 1136 1137 return 0; 1138 } 1139 1140 static int 1141 nvme_fc_send_ls_req_async(struct nvme_fc_rport *rport, 1142 struct nvmefc_ls_req_op *lsop, 1143 void (*done)(struct nvmefc_ls_req *req, int status)) 1144 { 1145 /* don't wait for completion */ 1146 1147 return __nvme_fc_send_ls_req(rport, lsop, done); 1148 } 1149 1150 /* Validation Error indexes into the string table below */ 1151 enum { 1152 VERR_NO_ERROR = 0, 1153 VERR_LSACC = 1, 1154 VERR_LSDESC_RQST = 2, 1155 VERR_LSDESC_RQST_LEN = 3, 1156 VERR_ASSOC_ID = 4, 1157 VERR_ASSOC_ID_LEN = 5, 1158 VERR_CONN_ID = 6, 1159 VERR_CONN_ID_LEN = 7, 1160 VERR_CR_ASSOC = 8, 1161 VERR_CR_ASSOC_ACC_LEN = 9, 1162 VERR_CR_CONN = 10, 1163 VERR_CR_CONN_ACC_LEN = 11, 1164 VERR_DISCONN = 12, 1165 VERR_DISCONN_ACC_LEN = 13, 1166 }; 1167 1168 static char *validation_errors[] = { 1169 "OK", 1170 "Not LS_ACC", 1171 "Not LSDESC_RQST", 1172 "Bad LSDESC_RQST Length", 1173 "Not Association ID", 1174 "Bad Association ID Length", 1175 "Not Connection ID", 1176 "Bad Connection ID Length", 1177 "Not CR_ASSOC Rqst", 1178 "Bad CR_ASSOC ACC Length", 1179 "Not CR_CONN Rqst", 1180 "Bad CR_CONN ACC Length", 1181 "Not Disconnect Rqst", 1182 "Bad Disconnect ACC Length", 1183 }; 1184 1185 static int 1186 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 1187 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) 1188 { 1189 struct nvmefc_ls_req_op *lsop; 1190 struct nvmefc_ls_req *lsreq; 1191 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 1192 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 1193 int ret, fcret = 0; 1194 1195 lsop = kzalloc((sizeof(*lsop) + 1196 ctrl->lport->ops->lsrqst_priv_sz + 1197 sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 1198 if (!lsop) { 1199 ret = -ENOMEM; 1200 goto out_no_memory; 1201 } 1202 lsreq = &lsop->ls_req; 1203 1204 lsreq->private = (void *)&lsop[1]; 1205 assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 1206 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1207 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 1208 1209 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 1210 assoc_rqst->desc_list_len = 1211 cpu_to_be32(sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 1212 1213 assoc_rqst->assoc_cmd.desc_tag = 1214 cpu_to_be32(FCNVME_LSDESC_CREATE_ASSOC_CMD); 1215 assoc_rqst->assoc_cmd.desc_len = 1216 fcnvme_lsdesc_len( 1217 sizeof(struct fcnvme_lsdesc_cr_assoc_cmd)); 1218 1219 assoc_rqst->assoc_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 1220 assoc_rqst->assoc_cmd.sqsize = cpu_to_be16(qsize - 1); 1221 /* Linux supports only Dynamic controllers */ 1222 assoc_rqst->assoc_cmd.cntlid = cpu_to_be16(0xffff); 1223 uuid_copy(&assoc_rqst->assoc_cmd.hostid, &ctrl->ctrl.opts->host->id); 1224 strncpy(assoc_rqst->assoc_cmd.hostnqn, ctrl->ctrl.opts->host->nqn, 1225 min(FCNVME_ASSOC_HOSTNQN_LEN, NVMF_NQN_SIZE)); 1226 strncpy(assoc_rqst->assoc_cmd.subnqn, ctrl->ctrl.opts->subsysnqn, 1227 min(FCNVME_ASSOC_SUBNQN_LEN, NVMF_NQN_SIZE)); 1228 1229 lsop->queue = queue; 1230 lsreq->rqstaddr = assoc_rqst; 1231 lsreq->rqstlen = sizeof(*assoc_rqst); 1232 lsreq->rspaddr = assoc_acc; 1233 lsreq->rsplen = sizeof(*assoc_acc); 1234 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1235 1236 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1237 if (ret) 1238 goto out_free_buffer; 1239 1240 /* process connect LS completion */ 1241 1242 /* validate the ACC response */ 1243 if (assoc_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1244 fcret = VERR_LSACC; 1245 else if (assoc_acc->hdr.desc_list_len != 1246 fcnvme_lsdesc_len( 1247 sizeof(struct fcnvme_ls_cr_assoc_acc))) 1248 fcret = VERR_CR_ASSOC_ACC_LEN; 1249 else if (assoc_acc->hdr.rqst.desc_tag != 1250 cpu_to_be32(FCNVME_LSDESC_RQST)) 1251 fcret = VERR_LSDESC_RQST; 1252 else if (assoc_acc->hdr.rqst.desc_len != 1253 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1254 fcret = VERR_LSDESC_RQST_LEN; 1255 else if (assoc_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_ASSOCIATION) 1256 fcret = VERR_CR_ASSOC; 1257 else if (assoc_acc->associd.desc_tag != 1258 cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 1259 fcret = VERR_ASSOC_ID; 1260 else if (assoc_acc->associd.desc_len != 1261 fcnvme_lsdesc_len( 1262 sizeof(struct fcnvme_lsdesc_assoc_id))) 1263 fcret = VERR_ASSOC_ID_LEN; 1264 else if (assoc_acc->connectid.desc_tag != 1265 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1266 fcret = VERR_CONN_ID; 1267 else if (assoc_acc->connectid.desc_len != 1268 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1269 fcret = VERR_CONN_ID_LEN; 1270 1271 if (fcret) { 1272 ret = -EBADF; 1273 dev_err(ctrl->dev, 1274 "q %d connect failed: %s\n", 1275 queue->qnum, validation_errors[fcret]); 1276 } else { 1277 ctrl->association_id = 1278 be64_to_cpu(assoc_acc->associd.association_id); 1279 queue->connection_id = 1280 be64_to_cpu(assoc_acc->connectid.connection_id); 1281 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1282 } 1283 1284 out_free_buffer: 1285 kfree(lsop); 1286 out_no_memory: 1287 if (ret) 1288 dev_err(ctrl->dev, 1289 "queue %d connect admin queue failed (%d).\n", 1290 queue->qnum, ret); 1291 return ret; 1292 } 1293 1294 static int 1295 nvme_fc_connect_queue(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 1296 u16 qsize, u16 ersp_ratio) 1297 { 1298 struct nvmefc_ls_req_op *lsop; 1299 struct nvmefc_ls_req *lsreq; 1300 struct fcnvme_ls_cr_conn_rqst *conn_rqst; 1301 struct fcnvme_ls_cr_conn_acc *conn_acc; 1302 int ret, fcret = 0; 1303 1304 lsop = kzalloc((sizeof(*lsop) + 1305 ctrl->lport->ops->lsrqst_priv_sz + 1306 sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 1307 if (!lsop) { 1308 ret = -ENOMEM; 1309 goto out_no_memory; 1310 } 1311 lsreq = &lsop->ls_req; 1312 1313 lsreq->private = (void *)&lsop[1]; 1314 conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 1315 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1316 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 1317 1318 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 1319 conn_rqst->desc_list_len = cpu_to_be32( 1320 sizeof(struct fcnvme_lsdesc_assoc_id) + 1321 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 1322 1323 conn_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1324 conn_rqst->associd.desc_len = 1325 fcnvme_lsdesc_len( 1326 sizeof(struct fcnvme_lsdesc_assoc_id)); 1327 conn_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1328 conn_rqst->connect_cmd.desc_tag = 1329 cpu_to_be32(FCNVME_LSDESC_CREATE_CONN_CMD); 1330 conn_rqst->connect_cmd.desc_len = 1331 fcnvme_lsdesc_len( 1332 sizeof(struct fcnvme_lsdesc_cr_conn_cmd)); 1333 conn_rqst->connect_cmd.ersp_ratio = cpu_to_be16(ersp_ratio); 1334 conn_rqst->connect_cmd.qid = cpu_to_be16(queue->qnum); 1335 conn_rqst->connect_cmd.sqsize = cpu_to_be16(qsize - 1); 1336 1337 lsop->queue = queue; 1338 lsreq->rqstaddr = conn_rqst; 1339 lsreq->rqstlen = sizeof(*conn_rqst); 1340 lsreq->rspaddr = conn_acc; 1341 lsreq->rsplen = sizeof(*conn_acc); 1342 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1343 1344 ret = nvme_fc_send_ls_req(ctrl->rport, lsop); 1345 if (ret) 1346 goto out_free_buffer; 1347 1348 /* process connect LS completion */ 1349 1350 /* validate the ACC response */ 1351 if (conn_acc->hdr.w0.ls_cmd != FCNVME_LS_ACC) 1352 fcret = VERR_LSACC; 1353 else if (conn_acc->hdr.desc_list_len != 1354 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc))) 1355 fcret = VERR_CR_CONN_ACC_LEN; 1356 else if (conn_acc->hdr.rqst.desc_tag != cpu_to_be32(FCNVME_LSDESC_RQST)) 1357 fcret = VERR_LSDESC_RQST; 1358 else if (conn_acc->hdr.rqst.desc_len != 1359 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst))) 1360 fcret = VERR_LSDESC_RQST_LEN; 1361 else if (conn_acc->hdr.rqst.w0.ls_cmd != FCNVME_LS_CREATE_CONNECTION) 1362 fcret = VERR_CR_CONN; 1363 else if (conn_acc->connectid.desc_tag != 1364 cpu_to_be32(FCNVME_LSDESC_CONN_ID)) 1365 fcret = VERR_CONN_ID; 1366 else if (conn_acc->connectid.desc_len != 1367 fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_conn_id))) 1368 fcret = VERR_CONN_ID_LEN; 1369 1370 if (fcret) { 1371 ret = -EBADF; 1372 dev_err(ctrl->dev, 1373 "q %d connect failed: %s\n", 1374 queue->qnum, validation_errors[fcret]); 1375 } else { 1376 queue->connection_id = 1377 be64_to_cpu(conn_acc->connectid.connection_id); 1378 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1379 } 1380 1381 out_free_buffer: 1382 kfree(lsop); 1383 out_no_memory: 1384 if (ret) 1385 dev_err(ctrl->dev, 1386 "queue %d connect command failed (%d).\n", 1387 queue->qnum, ret); 1388 return ret; 1389 } 1390 1391 static void 1392 nvme_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 1393 { 1394 struct nvmefc_ls_req_op *lsop = ls_req_to_lsop(lsreq); 1395 1396 __nvme_fc_finish_ls_req(lsop); 1397 1398 /* fc-nvme initiator doesn't care about success or failure of cmd */ 1399 1400 kfree(lsop); 1401 } 1402 1403 /* 1404 * This routine sends a FC-NVME LS to disconnect (aka terminate) 1405 * the FC-NVME Association. Terminating the association also 1406 * terminates the FC-NVME connections (per queue, both admin and io 1407 * queues) that are part of the association. E.g. things are torn 1408 * down, and the related FC-NVME Association ID and Connection IDs 1409 * become invalid. 1410 * 1411 * The behavior of the fc-nvme initiator is such that it's 1412 * understanding of the association and connections will implicitly 1413 * be torn down. The action is implicit as it may be due to a loss of 1414 * connectivity with the fc-nvme target, so you may never get a 1415 * response even if you tried. As such, the action of this routine 1416 * is to asynchronously send the LS, ignore any results of the LS, and 1417 * continue on with terminating the association. If the fc-nvme target 1418 * is present and receives the LS, it too can tear down. 1419 */ 1420 static void 1421 nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) 1422 { 1423 struct fcnvme_ls_disconnect_rqst *discon_rqst; 1424 struct fcnvme_ls_disconnect_acc *discon_acc; 1425 struct nvmefc_ls_req_op *lsop; 1426 struct nvmefc_ls_req *lsreq; 1427 int ret; 1428 1429 lsop = kzalloc((sizeof(*lsop) + 1430 ctrl->lport->ops->lsrqst_priv_sz + 1431 sizeof(*discon_rqst) + sizeof(*discon_acc)), 1432 GFP_KERNEL); 1433 if (!lsop) 1434 /* couldn't sent it... too bad */ 1435 return; 1436 1437 lsreq = &lsop->ls_req; 1438 1439 lsreq->private = (void *)&lsop[1]; 1440 discon_rqst = (struct fcnvme_ls_disconnect_rqst *) 1441 (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1442 discon_acc = (struct fcnvme_ls_disconnect_acc *)&discon_rqst[1]; 1443 1444 discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT; 1445 discon_rqst->desc_list_len = cpu_to_be32( 1446 sizeof(struct fcnvme_lsdesc_assoc_id) + 1447 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1448 1449 discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1450 discon_rqst->associd.desc_len = 1451 fcnvme_lsdesc_len( 1452 sizeof(struct fcnvme_lsdesc_assoc_id)); 1453 1454 discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1455 1456 discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1457 FCNVME_LSDESC_DISCONN_CMD); 1458 discon_rqst->discon_cmd.desc_len = 1459 fcnvme_lsdesc_len( 1460 sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1461 discon_rqst->discon_cmd.scope = FCNVME_DISCONN_ASSOCIATION; 1462 discon_rqst->discon_cmd.id = cpu_to_be64(ctrl->association_id); 1463 1464 lsreq->rqstaddr = discon_rqst; 1465 lsreq->rqstlen = sizeof(*discon_rqst); 1466 lsreq->rspaddr = discon_acc; 1467 lsreq->rsplen = sizeof(*discon_acc); 1468 lsreq->timeout = NVME_FC_CONNECT_TIMEOUT_SEC; 1469 1470 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1471 nvme_fc_disconnect_assoc_done); 1472 if (ret) 1473 kfree(lsop); 1474 1475 /* only meaningful part to terminating the association */ 1476 ctrl->association_id = 0; 1477 } 1478 1479 1480 /* *********************** NVME Ctrl Routines **************************** */ 1481 1482 static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1483 1484 static void 1485 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1486 struct nvme_fc_fcp_op *op) 1487 { 1488 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.rspdma, 1489 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1490 fc_dma_unmap_single(ctrl->lport->dev, op->fcp_req.cmddma, 1491 sizeof(op->cmd_iu), DMA_TO_DEVICE); 1492 1493 atomic_set(&op->state, FCPOP_STATE_UNINIT); 1494 } 1495 1496 static void 1497 nvme_fc_exit_request(struct blk_mq_tag_set *set, struct request *rq, 1498 unsigned int hctx_idx) 1499 { 1500 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 1501 1502 return __nvme_fc_exit_request(set->driver_data, op); 1503 } 1504 1505 static int 1506 __nvme_fc_abort_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_fcp_op *op) 1507 { 1508 unsigned long flags; 1509 int opstate; 1510 1511 spin_lock_irqsave(&ctrl->lock, flags); 1512 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1513 if (opstate != FCPOP_STATE_ACTIVE) 1514 atomic_set(&op->state, opstate); 1515 else if (ctrl->flags & FCCTRL_TERMIO) 1516 ctrl->iocnt++; 1517 spin_unlock_irqrestore(&ctrl->lock, flags); 1518 1519 if (opstate != FCPOP_STATE_ACTIVE) 1520 return -ECANCELED; 1521 1522 ctrl->lport->ops->fcp_abort(&ctrl->lport->localport, 1523 &ctrl->rport->remoteport, 1524 op->queue->lldd_handle, 1525 &op->fcp_req); 1526 1527 return 0; 1528 } 1529 1530 static void 1531 nvme_fc_abort_aen_ops(struct nvme_fc_ctrl *ctrl) 1532 { 1533 struct nvme_fc_fcp_op *aen_op = ctrl->aen_ops; 1534 int i; 1535 1536 /* ensure we've initialized the ops once */ 1537 if (!(aen_op->flags & FCOP_FLAGS_AEN)) 1538 return; 1539 1540 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) 1541 __nvme_fc_abort_op(ctrl, aen_op); 1542 } 1543 1544 static inline void 1545 __nvme_fc_fcpop_chk_teardowns(struct nvme_fc_ctrl *ctrl, 1546 struct nvme_fc_fcp_op *op, int opstate) 1547 { 1548 unsigned long flags; 1549 1550 if (opstate == FCPOP_STATE_ABORTED) { 1551 spin_lock_irqsave(&ctrl->lock, flags); 1552 if (ctrl->flags & FCCTRL_TERMIO) { 1553 if (!--ctrl->iocnt) 1554 wake_up(&ctrl->ioabort_wait); 1555 } 1556 spin_unlock_irqrestore(&ctrl->lock, flags); 1557 } 1558 } 1559 1560 static void 1561 nvme_fc_fcpio_done(struct nvmefc_fcp_req *req) 1562 { 1563 struct nvme_fc_fcp_op *op = fcp_req_to_fcp_op(req); 1564 struct request *rq = op->rq; 1565 struct nvmefc_fcp_req *freq = &op->fcp_req; 1566 struct nvme_fc_ctrl *ctrl = op->ctrl; 1567 struct nvme_fc_queue *queue = op->queue; 1568 struct nvme_completion *cqe = &op->rsp_iu.cqe; 1569 struct nvme_command *sqe = &op->cmd_iu.sqe; 1570 __le16 status = cpu_to_le16(NVME_SC_SUCCESS << 1); 1571 union nvme_result result; 1572 bool terminate_assoc = true; 1573 int opstate; 1574 1575 /* 1576 * WARNING: 1577 * The current linux implementation of a nvme controller 1578 * allocates a single tag set for all io queues and sizes 1579 * the io queues to fully hold all possible tags. Thus, the 1580 * implementation does not reference or care about the sqhd 1581 * value as it never needs to use the sqhd/sqtail pointers 1582 * for submission pacing. 1583 * 1584 * This affects the FC-NVME implementation in two ways: 1585 * 1) As the value doesn't matter, we don't need to waste 1586 * cycles extracting it from ERSPs and stamping it in the 1587 * cases where the transport fabricates CQEs on successful 1588 * completions. 1589 * 2) The FC-NVME implementation requires that delivery of 1590 * ERSP completions are to go back to the nvme layer in order 1591 * relative to the rsn, such that the sqhd value will always 1592 * be "in order" for the nvme layer. As the nvme layer in 1593 * linux doesn't care about sqhd, there's no need to return 1594 * them in order. 1595 * 1596 * Additionally: 1597 * As the core nvme layer in linux currently does not look at 1598 * every field in the cqe - in cases where the FC transport must 1599 * fabricate a CQE, the following fields will not be set as they 1600 * are not referenced: 1601 * cqe.sqid, cqe.sqhd, cqe.command_id 1602 * 1603 * Failure or error of an individual i/o, in a transport 1604 * detected fashion unrelated to the nvme completion status, 1605 * potentially cause the initiator and target sides to get out 1606 * of sync on SQ head/tail (aka outstanding io count allowed). 1607 * Per FC-NVME spec, failure of an individual command requires 1608 * the connection to be terminated, which in turn requires the 1609 * association to be terminated. 1610 */ 1611 1612 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 1613 1614 fc_dma_sync_single_for_cpu(ctrl->lport->dev, op->fcp_req.rspdma, 1615 sizeof(op->rsp_iu), DMA_FROM_DEVICE); 1616 1617 if (opstate == FCPOP_STATE_ABORTED) 1618 status = cpu_to_le16(NVME_SC_ABORT_REQ << 1); 1619 else if (freq->status) 1620 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1621 1622 /* 1623 * For the linux implementation, if we have an unsuccesful 1624 * status, they blk-mq layer can typically be called with the 1625 * non-zero status and the content of the cqe isn't important. 1626 */ 1627 if (status) 1628 goto done; 1629 1630 /* 1631 * command completed successfully relative to the wire 1632 * protocol. However, validate anything received and 1633 * extract the status and result from the cqe (create it 1634 * where necessary). 1635 */ 1636 1637 switch (freq->rcv_rsplen) { 1638 1639 case 0: 1640 case NVME_FC_SIZEOF_ZEROS_RSP: 1641 /* 1642 * No response payload or 12 bytes of payload (which 1643 * should all be zeros) are considered successful and 1644 * no payload in the CQE by the transport. 1645 */ 1646 if (freq->transferred_length != 1647 be32_to_cpu(op->cmd_iu.data_len)) { 1648 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1649 goto done; 1650 } 1651 result.u64 = 0; 1652 break; 1653 1654 case sizeof(struct nvme_fc_ersp_iu): 1655 /* 1656 * The ERSP IU contains a full completion with CQE. 1657 * Validate ERSP IU and look at cqe. 1658 */ 1659 if (unlikely(be16_to_cpu(op->rsp_iu.iu_len) != 1660 (freq->rcv_rsplen / 4) || 1661 be32_to_cpu(op->rsp_iu.xfrd_len) != 1662 freq->transferred_length || 1663 op->rsp_iu.status_code || 1664 sqe->common.command_id != cqe->command_id)) { 1665 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1666 goto done; 1667 } 1668 result = cqe->result; 1669 status = cqe->status; 1670 break; 1671 1672 default: 1673 status = cpu_to_le16(NVME_SC_INTERNAL << 1); 1674 goto done; 1675 } 1676 1677 terminate_assoc = false; 1678 1679 done: 1680 if (op->flags & FCOP_FLAGS_AEN) { 1681 nvme_complete_async_event(&queue->ctrl->ctrl, status, &result); 1682 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 1683 atomic_set(&op->state, FCPOP_STATE_IDLE); 1684 op->flags = FCOP_FLAGS_AEN; /* clear other flags */ 1685 nvme_fc_ctrl_put(ctrl); 1686 goto check_error; 1687 } 1688 1689 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 1690 nvme_end_request(rq, status, result); 1691 1692 check_error: 1693 if (terminate_assoc) 1694 nvme_fc_error_recovery(ctrl, "transport detected io error"); 1695 } 1696 1697 static int 1698 __nvme_fc_init_request(struct nvme_fc_ctrl *ctrl, 1699 struct nvme_fc_queue *queue, struct nvme_fc_fcp_op *op, 1700 struct request *rq, u32 rqno) 1701 { 1702 struct nvme_fcp_op_w_sgl *op_w_sgl = 1703 container_of(op, typeof(*op_w_sgl), op); 1704 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 1705 int ret = 0; 1706 1707 memset(op, 0, sizeof(*op)); 1708 op->fcp_req.cmdaddr = &op->cmd_iu; 1709 op->fcp_req.cmdlen = sizeof(op->cmd_iu); 1710 op->fcp_req.rspaddr = &op->rsp_iu; 1711 op->fcp_req.rsplen = sizeof(op->rsp_iu); 1712 op->fcp_req.done = nvme_fc_fcpio_done; 1713 op->ctrl = ctrl; 1714 op->queue = queue; 1715 op->rq = rq; 1716 op->rqno = rqno; 1717 1718 cmdiu->scsi_id = NVME_CMD_SCSI_ID; 1719 cmdiu->fc_id = NVME_CMD_FC_ID; 1720 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); 1721 1722 op->fcp_req.cmddma = fc_dma_map_single(ctrl->lport->dev, 1723 &op->cmd_iu, sizeof(op->cmd_iu), DMA_TO_DEVICE); 1724 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.cmddma)) { 1725 dev_err(ctrl->dev, 1726 "FCP Op failed - cmdiu dma mapping failed.\n"); 1727 ret = EFAULT; 1728 goto out_on_error; 1729 } 1730 1731 op->fcp_req.rspdma = fc_dma_map_single(ctrl->lport->dev, 1732 &op->rsp_iu, sizeof(op->rsp_iu), 1733 DMA_FROM_DEVICE); 1734 if (fc_dma_mapping_error(ctrl->lport->dev, op->fcp_req.rspdma)) { 1735 dev_err(ctrl->dev, 1736 "FCP Op failed - rspiu dma mapping failed.\n"); 1737 ret = EFAULT; 1738 } 1739 1740 atomic_set(&op->state, FCPOP_STATE_IDLE); 1741 out_on_error: 1742 return ret; 1743 } 1744 1745 static int 1746 nvme_fc_init_request(struct blk_mq_tag_set *set, struct request *rq, 1747 unsigned int hctx_idx, unsigned int numa_node) 1748 { 1749 struct nvme_fc_ctrl *ctrl = set->driver_data; 1750 struct nvme_fcp_op_w_sgl *op = blk_mq_rq_to_pdu(rq); 1751 int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; 1752 struct nvme_fc_queue *queue = &ctrl->queues[queue_idx]; 1753 int res; 1754 1755 nvme_req(rq)->ctrl = &ctrl->ctrl; 1756 res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); 1757 if (res) 1758 return res; 1759 op->op.fcp_req.first_sgl = &op->sgl[0]; 1760 op->op.fcp_req.private = &op->priv[0]; 1761 return res; 1762 } 1763 1764 static int 1765 nvme_fc_init_aen_ops(struct nvme_fc_ctrl *ctrl) 1766 { 1767 struct nvme_fc_fcp_op *aen_op; 1768 struct nvme_fc_cmd_iu *cmdiu; 1769 struct nvme_command *sqe; 1770 void *private; 1771 int i, ret; 1772 1773 aen_op = ctrl->aen_ops; 1774 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 1775 private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1776 GFP_KERNEL); 1777 if (!private) 1778 return -ENOMEM; 1779 1780 cmdiu = &aen_op->cmd_iu; 1781 sqe = &cmdiu->sqe; 1782 ret = __nvme_fc_init_request(ctrl, &ctrl->queues[0], 1783 aen_op, (struct request *)NULL, 1784 (NVME_AQ_BLK_MQ_DEPTH + i)); 1785 if (ret) { 1786 kfree(private); 1787 return ret; 1788 } 1789 1790 aen_op->flags = FCOP_FLAGS_AEN; 1791 aen_op->fcp_req.private = private; 1792 1793 memset(sqe, 0, sizeof(*sqe)); 1794 sqe->common.opcode = nvme_admin_async_event; 1795 /* Note: core layer may overwrite the sqe.command_id value */ 1796 sqe->common.command_id = NVME_AQ_BLK_MQ_DEPTH + i; 1797 } 1798 return 0; 1799 } 1800 1801 static void 1802 nvme_fc_term_aen_ops(struct nvme_fc_ctrl *ctrl) 1803 { 1804 struct nvme_fc_fcp_op *aen_op; 1805 int i; 1806 1807 aen_op = ctrl->aen_ops; 1808 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 1809 if (!aen_op->fcp_req.private) 1810 continue; 1811 1812 __nvme_fc_exit_request(ctrl, aen_op); 1813 1814 kfree(aen_op->fcp_req.private); 1815 aen_op->fcp_req.private = NULL; 1816 } 1817 } 1818 1819 static inline void 1820 __nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, struct nvme_fc_ctrl *ctrl, 1821 unsigned int qidx) 1822 { 1823 struct nvme_fc_queue *queue = &ctrl->queues[qidx]; 1824 1825 hctx->driver_data = queue; 1826 queue->hctx = hctx; 1827 } 1828 1829 static int 1830 nvme_fc_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1831 unsigned int hctx_idx) 1832 { 1833 struct nvme_fc_ctrl *ctrl = data; 1834 1835 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx + 1); 1836 1837 return 0; 1838 } 1839 1840 static int 1841 nvme_fc_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data, 1842 unsigned int hctx_idx) 1843 { 1844 struct nvme_fc_ctrl *ctrl = data; 1845 1846 __nvme_fc_init_hctx(hctx, ctrl, hctx_idx); 1847 1848 return 0; 1849 } 1850 1851 static void 1852 nvme_fc_init_queue(struct nvme_fc_ctrl *ctrl, int idx) 1853 { 1854 struct nvme_fc_queue *queue; 1855 1856 queue = &ctrl->queues[idx]; 1857 memset(queue, 0, sizeof(*queue)); 1858 queue->ctrl = ctrl; 1859 queue->qnum = idx; 1860 atomic_set(&queue->csn, 1); 1861 queue->dev = ctrl->dev; 1862 1863 if (idx > 0) 1864 queue->cmnd_capsule_len = ctrl->ctrl.ioccsz * 16; 1865 else 1866 queue->cmnd_capsule_len = sizeof(struct nvme_command); 1867 1868 /* 1869 * Considered whether we should allocate buffers for all SQEs 1870 * and CQEs and dma map them - mapping their respective entries 1871 * into the request structures (kernel vm addr and dma address) 1872 * thus the driver could use the buffers/mappings directly. 1873 * It only makes sense if the LLDD would use them for its 1874 * messaging api. It's very unlikely most adapter api's would use 1875 * a native NVME sqe/cqe. More reasonable if FC-NVME IU payload 1876 * structures were used instead. 1877 */ 1878 } 1879 1880 /* 1881 * This routine terminates a queue at the transport level. 1882 * The transport has already ensured that all outstanding ios on 1883 * the queue have been terminated. 1884 * The transport will send a Disconnect LS request to terminate 1885 * the queue's connection. Termination of the admin queue will also 1886 * terminate the association at the target. 1887 */ 1888 static void 1889 nvme_fc_free_queue(struct nvme_fc_queue *queue) 1890 { 1891 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1892 return; 1893 1894 clear_bit(NVME_FC_Q_LIVE, &queue->flags); 1895 /* 1896 * Current implementation never disconnects a single queue. 1897 * It always terminates a whole association. So there is never 1898 * a disconnect(queue) LS sent to the target. 1899 */ 1900 1901 queue->connection_id = 0; 1902 atomic_set(&queue->csn, 1); 1903 } 1904 1905 static void 1906 __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *ctrl, 1907 struct nvme_fc_queue *queue, unsigned int qidx) 1908 { 1909 if (ctrl->lport->ops->delete_queue) 1910 ctrl->lport->ops->delete_queue(&ctrl->lport->localport, qidx, 1911 queue->lldd_handle); 1912 queue->lldd_handle = NULL; 1913 } 1914 1915 static void 1916 nvme_fc_free_io_queues(struct nvme_fc_ctrl *ctrl) 1917 { 1918 int i; 1919 1920 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1921 nvme_fc_free_queue(&ctrl->queues[i]); 1922 } 1923 1924 static int 1925 __nvme_fc_create_hw_queue(struct nvme_fc_ctrl *ctrl, 1926 struct nvme_fc_queue *queue, unsigned int qidx, u16 qsize) 1927 { 1928 int ret = 0; 1929 1930 queue->lldd_handle = NULL; 1931 if (ctrl->lport->ops->create_queue) 1932 ret = ctrl->lport->ops->create_queue(&ctrl->lport->localport, 1933 qidx, qsize, &queue->lldd_handle); 1934 1935 return ret; 1936 } 1937 1938 static void 1939 nvme_fc_delete_hw_io_queues(struct nvme_fc_ctrl *ctrl) 1940 { 1941 struct nvme_fc_queue *queue = &ctrl->queues[ctrl->ctrl.queue_count - 1]; 1942 int i; 1943 1944 for (i = ctrl->ctrl.queue_count - 1; i >= 1; i--, queue--) 1945 __nvme_fc_delete_hw_queue(ctrl, queue, i); 1946 } 1947 1948 static int 1949 nvme_fc_create_hw_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1950 { 1951 struct nvme_fc_queue *queue = &ctrl->queues[1]; 1952 int i, ret; 1953 1954 for (i = 1; i < ctrl->ctrl.queue_count; i++, queue++) { 1955 ret = __nvme_fc_create_hw_queue(ctrl, queue, i, qsize); 1956 if (ret) 1957 goto delete_queues; 1958 } 1959 1960 return 0; 1961 1962 delete_queues: 1963 for (; i >= 0; i--) 1964 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[i], i); 1965 return ret; 1966 } 1967 1968 static int 1969 nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) 1970 { 1971 int i, ret = 0; 1972 1973 for (i = 1; i < ctrl->ctrl.queue_count; i++) { 1974 ret = nvme_fc_connect_queue(ctrl, &ctrl->queues[i], qsize, 1975 (qsize / 5)); 1976 if (ret) 1977 break; 1978 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 1979 if (ret) 1980 break; 1981 1982 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); 1983 } 1984 1985 return ret; 1986 } 1987 1988 static void 1989 nvme_fc_init_io_queues(struct nvme_fc_ctrl *ctrl) 1990 { 1991 int i; 1992 1993 for (i = 1; i < ctrl->ctrl.queue_count; i++) 1994 nvme_fc_init_queue(ctrl, i); 1995 } 1996 1997 static void 1998 nvme_fc_ctrl_free(struct kref *ref) 1999 { 2000 struct nvme_fc_ctrl *ctrl = 2001 container_of(ref, struct nvme_fc_ctrl, ref); 2002 unsigned long flags; 2003 2004 if (ctrl->ctrl.tagset) { 2005 blk_cleanup_queue(ctrl->ctrl.connect_q); 2006 blk_mq_free_tag_set(&ctrl->tag_set); 2007 } 2008 2009 /* remove from rport list */ 2010 spin_lock_irqsave(&ctrl->rport->lock, flags); 2011 list_del(&ctrl->ctrl_list); 2012 spin_unlock_irqrestore(&ctrl->rport->lock, flags); 2013 2014 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2015 blk_cleanup_queue(ctrl->ctrl.admin_q); 2016 blk_mq_free_tag_set(&ctrl->admin_tag_set); 2017 2018 kfree(ctrl->queues); 2019 2020 put_device(ctrl->dev); 2021 nvme_fc_rport_put(ctrl->rport); 2022 2023 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 2024 if (ctrl->ctrl.opts) 2025 nvmf_free_options(ctrl->ctrl.opts); 2026 kfree(ctrl); 2027 } 2028 2029 static void 2030 nvme_fc_ctrl_put(struct nvme_fc_ctrl *ctrl) 2031 { 2032 kref_put(&ctrl->ref, nvme_fc_ctrl_free); 2033 } 2034 2035 static int 2036 nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl) 2037 { 2038 return kref_get_unless_zero(&ctrl->ref); 2039 } 2040 2041 /* 2042 * All accesses from nvme core layer done - can now free the 2043 * controller. Called after last nvme_put_ctrl() call 2044 */ 2045 static void 2046 nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl) 2047 { 2048 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2049 2050 WARN_ON(nctrl != &ctrl->ctrl); 2051 2052 nvme_fc_ctrl_put(ctrl); 2053 } 2054 2055 static void 2056 nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) 2057 { 2058 int active; 2059 2060 /* 2061 * if an error (io timeout, etc) while (re)connecting, 2062 * it's an error on creating the new association. 2063 * Start the error recovery thread if it hasn't already 2064 * been started. It is expected there could be multiple 2065 * ios hitting this path before things are cleaned up. 2066 */ 2067 if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { 2068 active = atomic_xchg(&ctrl->err_work_active, 1); 2069 if (!active && !schedule_work(&ctrl->err_work)) { 2070 atomic_set(&ctrl->err_work_active, 0); 2071 WARN_ON(1); 2072 } 2073 return; 2074 } 2075 2076 /* Otherwise, only proceed if in LIVE state - e.g. on first error */ 2077 if (ctrl->ctrl.state != NVME_CTRL_LIVE) 2078 return; 2079 2080 dev_warn(ctrl->ctrl.device, 2081 "NVME-FC{%d}: transport association error detected: %s\n", 2082 ctrl->cnum, errmsg); 2083 dev_warn(ctrl->ctrl.device, 2084 "NVME-FC{%d}: resetting controller\n", ctrl->cnum); 2085 2086 nvme_reset_ctrl(&ctrl->ctrl); 2087 } 2088 2089 static enum blk_eh_timer_return 2090 nvme_fc_timeout(struct request *rq, bool reserved) 2091 { 2092 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2093 struct nvme_fc_ctrl *ctrl = op->ctrl; 2094 2095 /* 2096 * we can't individually ABTS an io without affecting the queue, 2097 * thus killing the queue, and thus the association. 2098 * So resolve by performing a controller reset, which will stop 2099 * the host/io stack, terminate the association on the link, 2100 * and recreate an association on the link. 2101 */ 2102 nvme_fc_error_recovery(ctrl, "io timeout error"); 2103 2104 /* 2105 * the io abort has been initiated. Have the reset timer 2106 * restarted and the abort completion will complete the io 2107 * shortly. Avoids a synchronous wait while the abort finishes. 2108 */ 2109 return BLK_EH_RESET_TIMER; 2110 } 2111 2112 static int 2113 nvme_fc_map_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 2114 struct nvme_fc_fcp_op *op) 2115 { 2116 struct nvmefc_fcp_req *freq = &op->fcp_req; 2117 enum dma_data_direction dir; 2118 int ret; 2119 2120 freq->sg_cnt = 0; 2121 2122 if (!blk_rq_payload_bytes(rq)) 2123 return 0; 2124 2125 freq->sg_table.sgl = freq->first_sgl; 2126 ret = sg_alloc_table_chained(&freq->sg_table, 2127 blk_rq_nr_phys_segments(rq), freq->sg_table.sgl); 2128 if (ret) 2129 return -ENOMEM; 2130 2131 op->nents = blk_rq_map_sg(rq->q, rq, freq->sg_table.sgl); 2132 WARN_ON(op->nents > blk_rq_nr_phys_segments(rq)); 2133 dir = (rq_data_dir(rq) == WRITE) ? DMA_TO_DEVICE : DMA_FROM_DEVICE; 2134 freq->sg_cnt = fc_dma_map_sg(ctrl->lport->dev, freq->sg_table.sgl, 2135 op->nents, dir); 2136 if (unlikely(freq->sg_cnt <= 0)) { 2137 sg_free_table_chained(&freq->sg_table, true); 2138 freq->sg_cnt = 0; 2139 return -EFAULT; 2140 } 2141 2142 /* 2143 * TODO: blk_integrity_rq(rq) for DIF 2144 */ 2145 return 0; 2146 } 2147 2148 static void 2149 nvme_fc_unmap_data(struct nvme_fc_ctrl *ctrl, struct request *rq, 2150 struct nvme_fc_fcp_op *op) 2151 { 2152 struct nvmefc_fcp_req *freq = &op->fcp_req; 2153 2154 if (!freq->sg_cnt) 2155 return; 2156 2157 fc_dma_unmap_sg(ctrl->lport->dev, freq->sg_table.sgl, op->nents, 2158 ((rq_data_dir(rq) == WRITE) ? 2159 DMA_TO_DEVICE : DMA_FROM_DEVICE)); 2160 2161 nvme_cleanup_cmd(rq); 2162 2163 sg_free_table_chained(&freq->sg_table, true); 2164 2165 freq->sg_cnt = 0; 2166 } 2167 2168 /* 2169 * In FC, the queue is a logical thing. At transport connect, the target 2170 * creates its "queue" and returns a handle that is to be given to the 2171 * target whenever it posts something to the corresponding SQ. When an 2172 * SQE is sent on a SQ, FC effectively considers the SQE, or rather the 2173 * command contained within the SQE, an io, and assigns a FC exchange 2174 * to it. The SQE and the associated SQ handle are sent in the initial 2175 * CMD IU sents on the exchange. All transfers relative to the io occur 2176 * as part of the exchange. The CQE is the last thing for the io, 2177 * which is transferred (explicitly or implicitly) with the RSP IU 2178 * sent on the exchange. After the CQE is received, the FC exchange is 2179 * terminaed and the Exchange may be used on a different io. 2180 * 2181 * The transport to LLDD api has the transport making a request for a 2182 * new fcp io request to the LLDD. The LLDD then allocates a FC exchange 2183 * resource and transfers the command. The LLDD will then process all 2184 * steps to complete the io. Upon completion, the transport done routine 2185 * is called. 2186 * 2187 * So - while the operation is outstanding to the LLDD, there is a link 2188 * level FC exchange resource that is also outstanding. This must be 2189 * considered in all cleanup operations. 2190 */ 2191 static blk_status_t 2192 nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue, 2193 struct nvme_fc_fcp_op *op, u32 data_len, 2194 enum nvmefc_fcp_datadir io_dir) 2195 { 2196 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2197 struct nvme_command *sqe = &cmdiu->sqe; 2198 u32 csn; 2199 int ret, opstate; 2200 2201 /* 2202 * before attempting to send the io, check to see if we believe 2203 * the target device is present 2204 */ 2205 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2206 return BLK_STS_RESOURCE; 2207 2208 if (!nvme_fc_ctrl_get(ctrl)) 2209 return BLK_STS_IOERR; 2210 2211 /* format the FC-NVME CMD IU and fcp_req */ 2212 cmdiu->connection_id = cpu_to_be64(queue->connection_id); 2213 csn = atomic_inc_return(&queue->csn); 2214 cmdiu->csn = cpu_to_be32(csn); 2215 cmdiu->data_len = cpu_to_be32(data_len); 2216 switch (io_dir) { 2217 case NVMEFC_FCP_WRITE: 2218 cmdiu->flags = FCNVME_CMD_FLAGS_WRITE; 2219 break; 2220 case NVMEFC_FCP_READ: 2221 cmdiu->flags = FCNVME_CMD_FLAGS_READ; 2222 break; 2223 case NVMEFC_FCP_NODATA: 2224 cmdiu->flags = 0; 2225 break; 2226 } 2227 op->fcp_req.payload_length = data_len; 2228 op->fcp_req.io_dir = io_dir; 2229 op->fcp_req.transferred_length = 0; 2230 op->fcp_req.rcv_rsplen = 0; 2231 op->fcp_req.status = NVME_SC_SUCCESS; 2232 op->fcp_req.sqid = cpu_to_le16(queue->qnum); 2233 2234 /* 2235 * validate per fabric rules, set fields mandated by fabric spec 2236 * as well as those by FC-NVME spec. 2237 */ 2238 WARN_ON_ONCE(sqe->common.metadata); 2239 sqe->common.flags |= NVME_CMD_SGL_METABUF; 2240 2241 /* 2242 * format SQE DPTR field per FC-NVME rules: 2243 * type=0x5 Transport SGL Data Block Descriptor 2244 * subtype=0xA Transport-specific value 2245 * address=0 2246 * length=length of the data series 2247 */ 2248 sqe->rw.dptr.sgl.type = (NVME_TRANSPORT_SGL_DATA_DESC << 4) | 2249 NVME_SGL_FMT_TRANSPORT_A; 2250 sqe->rw.dptr.sgl.length = cpu_to_le32(data_len); 2251 sqe->rw.dptr.sgl.addr = 0; 2252 2253 if (!(op->flags & FCOP_FLAGS_AEN)) { 2254 ret = nvme_fc_map_data(ctrl, op->rq, op); 2255 if (ret < 0) { 2256 nvme_cleanup_cmd(op->rq); 2257 nvme_fc_ctrl_put(ctrl); 2258 if (ret == -ENOMEM || ret == -EAGAIN) 2259 return BLK_STS_RESOURCE; 2260 return BLK_STS_IOERR; 2261 } 2262 } 2263 2264 fc_dma_sync_single_for_device(ctrl->lport->dev, op->fcp_req.cmddma, 2265 sizeof(op->cmd_iu), DMA_TO_DEVICE); 2266 2267 atomic_set(&op->state, FCPOP_STATE_ACTIVE); 2268 2269 if (!(op->flags & FCOP_FLAGS_AEN)) 2270 blk_mq_start_request(op->rq); 2271 2272 ret = ctrl->lport->ops->fcp_io(&ctrl->lport->localport, 2273 &ctrl->rport->remoteport, 2274 queue->lldd_handle, &op->fcp_req); 2275 2276 if (ret) { 2277 opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE); 2278 __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate); 2279 2280 if (!(op->flags & FCOP_FLAGS_AEN)) 2281 nvme_fc_unmap_data(ctrl, op->rq, op); 2282 2283 nvme_fc_ctrl_put(ctrl); 2284 2285 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE && 2286 ret != -EBUSY) 2287 return BLK_STS_IOERR; 2288 2289 return BLK_STS_RESOURCE; 2290 } 2291 2292 return BLK_STS_OK; 2293 } 2294 2295 static blk_status_t 2296 nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 2297 const struct blk_mq_queue_data *bd) 2298 { 2299 struct nvme_ns *ns = hctx->queue->queuedata; 2300 struct nvme_fc_queue *queue = hctx->driver_data; 2301 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2302 struct request *rq = bd->rq; 2303 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2304 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2305 struct nvme_command *sqe = &cmdiu->sqe; 2306 enum nvmefc_fcp_datadir io_dir; 2307 bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); 2308 u32 data_len; 2309 blk_status_t ret; 2310 2311 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || 2312 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) 2313 return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); 2314 2315 ret = nvme_setup_cmd(ns, rq, sqe); 2316 if (ret) 2317 return ret; 2318 2319 data_len = blk_rq_payload_bytes(rq); 2320 if (data_len) 2321 io_dir = ((rq_data_dir(rq) == WRITE) ? 2322 NVMEFC_FCP_WRITE : NVMEFC_FCP_READ); 2323 else 2324 io_dir = NVMEFC_FCP_NODATA; 2325 2326 return nvme_fc_start_fcp_op(ctrl, queue, op, data_len, io_dir); 2327 } 2328 2329 static struct blk_mq_tags * 2330 nvme_fc_tagset(struct nvme_fc_queue *queue) 2331 { 2332 if (queue->qnum == 0) 2333 return queue->ctrl->admin_tag_set.tags[queue->qnum]; 2334 2335 return queue->ctrl->tag_set.tags[queue->qnum - 1]; 2336 } 2337 2338 static int 2339 nvme_fc_poll(struct blk_mq_hw_ctx *hctx, unsigned int tag) 2340 2341 { 2342 struct nvme_fc_queue *queue = hctx->driver_data; 2343 struct nvme_fc_ctrl *ctrl = queue->ctrl; 2344 struct request *req; 2345 struct nvme_fc_fcp_op *op; 2346 2347 req = blk_mq_tag_to_rq(nvme_fc_tagset(queue), tag); 2348 if (!req) 2349 return 0; 2350 2351 op = blk_mq_rq_to_pdu(req); 2352 2353 if ((atomic_read(&op->state) == FCPOP_STATE_ACTIVE) && 2354 (ctrl->lport->ops->poll_queue)) 2355 ctrl->lport->ops->poll_queue(&ctrl->lport->localport, 2356 queue->lldd_handle); 2357 2358 return ((atomic_read(&op->state) != FCPOP_STATE_ACTIVE)); 2359 } 2360 2361 static void 2362 nvme_fc_submit_async_event(struct nvme_ctrl *arg) 2363 { 2364 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2365 struct nvme_fc_fcp_op *aen_op; 2366 unsigned long flags; 2367 bool terminating = false; 2368 blk_status_t ret; 2369 2370 spin_lock_irqsave(&ctrl->lock, flags); 2371 if (ctrl->flags & FCCTRL_TERMIO) 2372 terminating = true; 2373 spin_unlock_irqrestore(&ctrl->lock, flags); 2374 2375 if (terminating) 2376 return; 2377 2378 aen_op = &ctrl->aen_ops[0]; 2379 2380 ret = nvme_fc_start_fcp_op(ctrl, aen_op->queue, aen_op, 0, 2381 NVMEFC_FCP_NODATA); 2382 if (ret) 2383 dev_err(ctrl->ctrl.device, 2384 "failed async event work\n"); 2385 } 2386 2387 static void 2388 nvme_fc_complete_rq(struct request *rq) 2389 { 2390 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); 2391 struct nvme_fc_ctrl *ctrl = op->ctrl; 2392 2393 atomic_set(&op->state, FCPOP_STATE_IDLE); 2394 2395 nvme_fc_unmap_data(ctrl, rq, op); 2396 nvme_complete_rq(rq); 2397 nvme_fc_ctrl_put(ctrl); 2398 } 2399 2400 /* 2401 * This routine is used by the transport when it needs to find active 2402 * io on a queue that is to be terminated. The transport uses 2403 * blk_mq_tagset_busy_itr() to find the busy requests, which then invoke 2404 * this routine to kill them on a 1 by 1 basis. 2405 * 2406 * As FC allocates FC exchange for each io, the transport must contact 2407 * the LLDD to terminate the exchange, thus releasing the FC exchange. 2408 * After terminating the exchange the LLDD will call the transport's 2409 * normal io done path for the request, but it will have an aborted 2410 * status. The done path will return the io request back to the block 2411 * layer with an error status. 2412 */ 2413 static void 2414 nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved) 2415 { 2416 struct nvme_ctrl *nctrl = data; 2417 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2418 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req); 2419 2420 __nvme_fc_abort_op(ctrl, op); 2421 } 2422 2423 2424 static const struct blk_mq_ops nvme_fc_mq_ops = { 2425 .queue_rq = nvme_fc_queue_rq, 2426 .complete = nvme_fc_complete_rq, 2427 .init_request = nvme_fc_init_request, 2428 .exit_request = nvme_fc_exit_request, 2429 .init_hctx = nvme_fc_init_hctx, 2430 .poll = nvme_fc_poll, 2431 .timeout = nvme_fc_timeout, 2432 }; 2433 2434 static int 2435 nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) 2436 { 2437 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2438 unsigned int nr_io_queues; 2439 int ret; 2440 2441 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2442 ctrl->lport->ops->max_hw_queues); 2443 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2444 if (ret) { 2445 dev_info(ctrl->ctrl.device, 2446 "set_queue_count failed: %d\n", ret); 2447 return ret; 2448 } 2449 2450 ctrl->ctrl.queue_count = nr_io_queues + 1; 2451 if (!nr_io_queues) 2452 return 0; 2453 2454 nvme_fc_init_io_queues(ctrl); 2455 2456 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); 2457 ctrl->tag_set.ops = &nvme_fc_mq_ops; 2458 ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; 2459 ctrl->tag_set.reserved_tags = 1; /* fabric connect */ 2460 ctrl->tag_set.numa_node = NUMA_NO_NODE; 2461 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; 2462 ctrl->tag_set.cmd_size = 2463 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, 2464 ctrl->lport->ops->fcprqst_priv_sz); 2465 ctrl->tag_set.driver_data = ctrl; 2466 ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; 2467 ctrl->tag_set.timeout = NVME_IO_TIMEOUT; 2468 2469 ret = blk_mq_alloc_tag_set(&ctrl->tag_set); 2470 if (ret) 2471 return ret; 2472 2473 ctrl->ctrl.tagset = &ctrl->tag_set; 2474 2475 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); 2476 if (IS_ERR(ctrl->ctrl.connect_q)) { 2477 ret = PTR_ERR(ctrl->ctrl.connect_q); 2478 goto out_free_tag_set; 2479 } 2480 2481 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2482 if (ret) 2483 goto out_cleanup_blk_queue; 2484 2485 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2486 if (ret) 2487 goto out_delete_hw_queues; 2488 2489 ctrl->ioq_live = true; 2490 2491 return 0; 2492 2493 out_delete_hw_queues: 2494 nvme_fc_delete_hw_io_queues(ctrl); 2495 out_cleanup_blk_queue: 2496 blk_cleanup_queue(ctrl->ctrl.connect_q); 2497 out_free_tag_set: 2498 blk_mq_free_tag_set(&ctrl->tag_set); 2499 nvme_fc_free_io_queues(ctrl); 2500 2501 /* force put free routine to ignore io queues */ 2502 ctrl->ctrl.tagset = NULL; 2503 2504 return ret; 2505 } 2506 2507 static int 2508 nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) 2509 { 2510 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2511 unsigned int nr_io_queues; 2512 int ret; 2513 2514 nr_io_queues = min(min(opts->nr_io_queues, num_online_cpus()), 2515 ctrl->lport->ops->max_hw_queues); 2516 ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); 2517 if (ret) { 2518 dev_info(ctrl->ctrl.device, 2519 "set_queue_count failed: %d\n", ret); 2520 return ret; 2521 } 2522 2523 ctrl->ctrl.queue_count = nr_io_queues + 1; 2524 /* check for io queues existing */ 2525 if (ctrl->ctrl.queue_count == 1) 2526 return 0; 2527 2528 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2529 if (ret) 2530 goto out_free_io_queues; 2531 2532 ret = nvme_fc_connect_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2533 if (ret) 2534 goto out_delete_hw_queues; 2535 2536 blk_mq_update_nr_hw_queues(&ctrl->tag_set, nr_io_queues); 2537 2538 return 0; 2539 2540 out_delete_hw_queues: 2541 nvme_fc_delete_hw_io_queues(ctrl); 2542 out_free_io_queues: 2543 nvme_fc_free_io_queues(ctrl); 2544 return ret; 2545 } 2546 2547 static void 2548 nvme_fc_rport_active_on_lport(struct nvme_fc_rport *rport) 2549 { 2550 struct nvme_fc_lport *lport = rport->lport; 2551 2552 atomic_inc(&lport->act_rport_cnt); 2553 } 2554 2555 static void 2556 nvme_fc_rport_inactive_on_lport(struct nvme_fc_rport *rport) 2557 { 2558 struct nvme_fc_lport *lport = rport->lport; 2559 u32 cnt; 2560 2561 cnt = atomic_dec_return(&lport->act_rport_cnt); 2562 if (cnt == 0 && lport->localport.port_state == FC_OBJSTATE_DELETED) 2563 lport->ops->localport_delete(&lport->localport); 2564 } 2565 2566 static int 2567 nvme_fc_ctlr_active_on_rport(struct nvme_fc_ctrl *ctrl) 2568 { 2569 struct nvme_fc_rport *rport = ctrl->rport; 2570 u32 cnt; 2571 2572 if (ctrl->assoc_active) 2573 return 1; 2574 2575 ctrl->assoc_active = true; 2576 cnt = atomic_inc_return(&rport->act_ctrl_cnt); 2577 if (cnt == 1) 2578 nvme_fc_rport_active_on_lport(rport); 2579 2580 return 0; 2581 } 2582 2583 static int 2584 nvme_fc_ctlr_inactive_on_rport(struct nvme_fc_ctrl *ctrl) 2585 { 2586 struct nvme_fc_rport *rport = ctrl->rport; 2587 struct nvme_fc_lport *lport = rport->lport; 2588 u32 cnt; 2589 2590 /* ctrl->assoc_active=false will be set independently */ 2591 2592 cnt = atomic_dec_return(&rport->act_ctrl_cnt); 2593 if (cnt == 0) { 2594 if (rport->remoteport.port_state == FC_OBJSTATE_DELETED) 2595 lport->ops->remoteport_delete(&rport->remoteport); 2596 nvme_fc_rport_inactive_on_lport(rport); 2597 } 2598 2599 return 0; 2600 } 2601 2602 /* 2603 * This routine restarts the controller on the host side, and 2604 * on the link side, recreates the controller association. 2605 */ 2606 static int 2607 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2608 { 2609 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2610 int ret; 2611 bool changed; 2612 2613 ++ctrl->ctrl.nr_reconnects; 2614 2615 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE) 2616 return -ENODEV; 2617 2618 if (nvme_fc_ctlr_active_on_rport(ctrl)) 2619 return -ENOTUNIQ; 2620 2621 /* 2622 * Create the admin queue 2623 */ 2624 2625 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2626 NVME_AQ_DEPTH); 2627 if (ret) 2628 goto out_free_queue; 2629 2630 ret = nvme_fc_connect_admin_queue(ctrl, &ctrl->queues[0], 2631 NVME_AQ_DEPTH, (NVME_AQ_DEPTH / 4)); 2632 if (ret) 2633 goto out_delete_hw_queue; 2634 2635 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2636 2637 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2638 if (ret) 2639 goto out_disconnect_admin_queue; 2640 2641 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); 2642 2643 /* 2644 * Check controller capabilities 2645 * 2646 * todo:- add code to check if ctrl attributes changed from 2647 * prior connection values 2648 */ 2649 2650 ret = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); 2651 if (ret) { 2652 dev_err(ctrl->ctrl.device, 2653 "prop_get NVME_REG_CAP failed\n"); 2654 goto out_disconnect_admin_queue; 2655 } 2656 2657 ctrl->ctrl.sqsize = 2658 min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); 2659 2660 ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); 2661 if (ret) 2662 goto out_disconnect_admin_queue; 2663 2664 ctrl->ctrl.max_hw_sectors = 2665 (ctrl->lport->ops->max_sgl_segments - 1) << (PAGE_SHIFT - 9); 2666 2667 ret = nvme_init_identify(&ctrl->ctrl); 2668 if (ret) 2669 goto out_disconnect_admin_queue; 2670 2671 /* sanity checks */ 2672 2673 /* FC-NVME does not have other data in the capsule */ 2674 if (ctrl->ctrl.icdoff) { 2675 dev_err(ctrl->ctrl.device, "icdoff %d is not supported!\n", 2676 ctrl->ctrl.icdoff); 2677 goto out_disconnect_admin_queue; 2678 } 2679 2680 /* FC-NVME supports normal SGL Data Block Descriptors */ 2681 2682 if (opts->queue_size > ctrl->ctrl.maxcmd) { 2683 /* warn if maxcmd is lower than queue_size */ 2684 dev_warn(ctrl->ctrl.device, 2685 "queue_size %zu > ctrl maxcmd %u, reducing " 2686 "to queue_size\n", 2687 opts->queue_size, ctrl->ctrl.maxcmd); 2688 opts->queue_size = ctrl->ctrl.maxcmd; 2689 } 2690 2691 if (opts->queue_size > ctrl->ctrl.sqsize + 1) { 2692 /* warn if sqsize is lower than queue_size */ 2693 dev_warn(ctrl->ctrl.device, 2694 "queue_size %zu > ctrl sqsize %u, clamping down\n", 2695 opts->queue_size, ctrl->ctrl.sqsize + 1); 2696 opts->queue_size = ctrl->ctrl.sqsize + 1; 2697 } 2698 2699 ret = nvme_fc_init_aen_ops(ctrl); 2700 if (ret) 2701 goto out_term_aen_ops; 2702 2703 /* 2704 * Create the io queues 2705 */ 2706 2707 if (ctrl->ctrl.queue_count > 1) { 2708 if (!ctrl->ioq_live) 2709 ret = nvme_fc_create_io_queues(ctrl); 2710 else 2711 ret = nvme_fc_recreate_io_queues(ctrl); 2712 if (ret) 2713 goto out_term_aen_ops; 2714 } 2715 2716 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); 2717 2718 ctrl->ctrl.nr_reconnects = 0; 2719 2720 if (changed) 2721 nvme_start_ctrl(&ctrl->ctrl); 2722 2723 return 0; /* Success */ 2724 2725 out_term_aen_ops: 2726 nvme_fc_term_aen_ops(ctrl); 2727 out_disconnect_admin_queue: 2728 /* send a Disconnect(association) LS to fc-nvme target */ 2729 nvme_fc_xmt_disconnect_assoc(ctrl); 2730 out_delete_hw_queue: 2731 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2732 out_free_queue: 2733 nvme_fc_free_queue(&ctrl->queues[0]); 2734 ctrl->assoc_active = false; 2735 nvme_fc_ctlr_inactive_on_rport(ctrl); 2736 2737 return ret; 2738 } 2739 2740 /* 2741 * This routine stops operation of the controller on the host side. 2742 * On the host os stack side: Admin and IO queues are stopped, 2743 * outstanding ios on them terminated via FC ABTS. 2744 * On the link side: the association is terminated. 2745 */ 2746 static void 2747 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 2748 { 2749 unsigned long flags; 2750 2751 if (!ctrl->assoc_active) 2752 return; 2753 ctrl->assoc_active = false; 2754 2755 spin_lock_irqsave(&ctrl->lock, flags); 2756 ctrl->flags |= FCCTRL_TERMIO; 2757 ctrl->iocnt = 0; 2758 spin_unlock_irqrestore(&ctrl->lock, flags); 2759 2760 /* 2761 * If io queues are present, stop them and terminate all outstanding 2762 * ios on them. As FC allocates FC exchange for each io, the 2763 * transport must contact the LLDD to terminate the exchange, 2764 * thus releasing the FC exchange. We use blk_mq_tagset_busy_itr() 2765 * to tell us what io's are busy and invoke a transport routine 2766 * to kill them with the LLDD. After terminating the exchange 2767 * the LLDD will call the transport's normal io done path, but it 2768 * will have an aborted status. The done path will return the 2769 * io requests back to the block layer as part of normal completions 2770 * (but with error status). 2771 */ 2772 if (ctrl->ctrl.queue_count > 1) { 2773 nvme_stop_queues(&ctrl->ctrl); 2774 blk_mq_tagset_busy_iter(&ctrl->tag_set, 2775 nvme_fc_terminate_exchange, &ctrl->ctrl); 2776 } 2777 2778 /* 2779 * Other transports, which don't have link-level contexts bound 2780 * to sqe's, would try to gracefully shutdown the controller by 2781 * writing the registers for shutdown and polling (call 2782 * nvme_shutdown_ctrl()). Given a bunch of i/o was potentially 2783 * just aborted and we will wait on those contexts, and given 2784 * there was no indication of how live the controlelr is on the 2785 * link, don't send more io to create more contexts for the 2786 * shutdown. Let the controller fail via keepalive failure if 2787 * its still present. 2788 */ 2789 2790 /* 2791 * clean up the admin queue. Same thing as above. 2792 * use blk_mq_tagset_busy_itr() and the transport routine to 2793 * terminate the exchanges. 2794 */ 2795 blk_mq_quiesce_queue(ctrl->ctrl.admin_q); 2796 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2797 nvme_fc_terminate_exchange, &ctrl->ctrl); 2798 2799 /* kill the aens as they are a separate path */ 2800 nvme_fc_abort_aen_ops(ctrl); 2801 2802 /* wait for all io that had to be aborted */ 2803 spin_lock_irq(&ctrl->lock); 2804 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); 2805 ctrl->flags &= ~FCCTRL_TERMIO; 2806 spin_unlock_irq(&ctrl->lock); 2807 2808 nvme_fc_term_aen_ops(ctrl); 2809 2810 /* 2811 * send a Disconnect(association) LS to fc-nvme target 2812 * Note: could have been sent at top of process, but 2813 * cleaner on link traffic if after the aborts complete. 2814 * Note: if association doesn't exist, association_id will be 0 2815 */ 2816 if (ctrl->association_id) 2817 nvme_fc_xmt_disconnect_assoc(ctrl); 2818 2819 if (ctrl->ctrl.tagset) { 2820 nvme_fc_delete_hw_io_queues(ctrl); 2821 nvme_fc_free_io_queues(ctrl); 2822 } 2823 2824 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 2825 nvme_fc_free_queue(&ctrl->queues[0]); 2826 2827 /* re-enable the admin_q so anything new can fast fail */ 2828 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 2829 2830 /* resume the io queues so that things will fast fail */ 2831 nvme_start_queues(&ctrl->ctrl); 2832 2833 nvme_fc_ctlr_inactive_on_rport(ctrl); 2834 } 2835 2836 static void 2837 nvme_fc_delete_ctrl(struct nvme_ctrl *nctrl) 2838 { 2839 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl); 2840 2841 cancel_work_sync(&ctrl->err_work); 2842 cancel_delayed_work_sync(&ctrl->connect_work); 2843 /* 2844 * kill the association on the link side. this will block 2845 * waiting for io to terminate 2846 */ 2847 nvme_fc_delete_association(ctrl); 2848 } 2849 2850 static void 2851 nvme_fc_reconnect_or_delete(struct nvme_fc_ctrl *ctrl, int status) 2852 { 2853 struct nvme_fc_rport *rport = ctrl->rport; 2854 struct nvme_fc_remote_port *portptr = &rport->remoteport; 2855 unsigned long recon_delay = ctrl->ctrl.opts->reconnect_delay * HZ; 2856 bool recon = true; 2857 2858 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING) 2859 return; 2860 2861 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2862 dev_info(ctrl->ctrl.device, 2863 "NVME-FC{%d}: reset: Reconnect attempt failed (%d)\n", 2864 ctrl->cnum, status); 2865 else if (time_after_eq(jiffies, rport->dev_loss_end)) 2866 recon = false; 2867 2868 if (recon && nvmf_should_reconnect(&ctrl->ctrl)) { 2869 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2870 dev_info(ctrl->ctrl.device, 2871 "NVME-FC{%d}: Reconnect attempt in %ld " 2872 "seconds\n", 2873 ctrl->cnum, recon_delay / HZ); 2874 else if (time_after(jiffies + recon_delay, rport->dev_loss_end)) 2875 recon_delay = rport->dev_loss_end - jiffies; 2876 2877 queue_delayed_work(nvme_wq, &ctrl->connect_work, recon_delay); 2878 } else { 2879 if (portptr->port_state == FC_OBJSTATE_ONLINE) 2880 dev_warn(ctrl->ctrl.device, 2881 "NVME-FC{%d}: Max reconnect attempts (%d) " 2882 "reached.\n", 2883 ctrl->cnum, ctrl->ctrl.nr_reconnects); 2884 else 2885 dev_warn(ctrl->ctrl.device, 2886 "NVME-FC{%d}: dev_loss_tmo (%d) expired " 2887 "while waiting for remoteport connectivity.\n", 2888 ctrl->cnum, portptr->dev_loss_tmo); 2889 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); 2890 } 2891 } 2892 2893 static void 2894 __nvme_fc_terminate_io(struct nvme_fc_ctrl *ctrl) 2895 { 2896 nvme_stop_keep_alive(&ctrl->ctrl); 2897 2898 /* will block will waiting for io to terminate */ 2899 nvme_fc_delete_association(ctrl); 2900 2901 if (ctrl->ctrl.state != NVME_CTRL_CONNECTING && 2902 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) 2903 dev_err(ctrl->ctrl.device, 2904 "NVME-FC{%d}: error_recovery: Couldn't change state " 2905 "to CONNECTING\n", ctrl->cnum); 2906 } 2907 2908 static void 2909 nvme_fc_reset_ctrl_work(struct work_struct *work) 2910 { 2911 struct nvme_fc_ctrl *ctrl = 2912 container_of(work, struct nvme_fc_ctrl, ctrl.reset_work); 2913 int ret; 2914 2915 __nvme_fc_terminate_io(ctrl); 2916 2917 nvme_stop_ctrl(&ctrl->ctrl); 2918 2919 if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE) 2920 ret = nvme_fc_create_association(ctrl); 2921 else 2922 ret = -ENOTCONN; 2923 2924 if (ret) 2925 nvme_fc_reconnect_or_delete(ctrl, ret); 2926 else 2927 dev_info(ctrl->ctrl.device, 2928 "NVME-FC{%d}: controller reset complete\n", 2929 ctrl->cnum); 2930 } 2931 2932 static void 2933 nvme_fc_connect_err_work(struct work_struct *work) 2934 { 2935 struct nvme_fc_ctrl *ctrl = 2936 container_of(work, struct nvme_fc_ctrl, err_work); 2937 2938 __nvme_fc_terminate_io(ctrl); 2939 2940 atomic_set(&ctrl->err_work_active, 0); 2941 2942 /* 2943 * Rescheduling the connection after recovering 2944 * from the io error is left to the reconnect work 2945 * item, which is what should have stalled waiting on 2946 * the io that had the error that scheduled this work. 2947 */ 2948 } 2949 2950 static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { 2951 .name = "fc", 2952 .module = THIS_MODULE, 2953 .flags = NVME_F_FABRICS, 2954 .reg_read32 = nvmf_reg_read32, 2955 .reg_read64 = nvmf_reg_read64, 2956 .reg_write32 = nvmf_reg_write32, 2957 .free_ctrl = nvme_fc_nvme_ctrl_freed, 2958 .submit_async_event = nvme_fc_submit_async_event, 2959 .delete_ctrl = nvme_fc_delete_ctrl, 2960 .get_address = nvmf_get_address, 2961 }; 2962 2963 static void 2964 nvme_fc_connect_ctrl_work(struct work_struct *work) 2965 { 2966 int ret; 2967 2968 struct nvme_fc_ctrl *ctrl = 2969 container_of(to_delayed_work(work), 2970 struct nvme_fc_ctrl, connect_work); 2971 2972 ret = nvme_fc_create_association(ctrl); 2973 if (ret) 2974 nvme_fc_reconnect_or_delete(ctrl, ret); 2975 else 2976 dev_info(ctrl->ctrl.device, 2977 "NVME-FC{%d}: controller connect complete\n", 2978 ctrl->cnum); 2979 } 2980 2981 2982 static const struct blk_mq_ops nvme_fc_admin_mq_ops = { 2983 .queue_rq = nvme_fc_queue_rq, 2984 .complete = nvme_fc_complete_rq, 2985 .init_request = nvme_fc_init_request, 2986 .exit_request = nvme_fc_exit_request, 2987 .init_hctx = nvme_fc_init_admin_hctx, 2988 .timeout = nvme_fc_timeout, 2989 }; 2990 2991 2992 /* 2993 * Fails a controller request if it matches an existing controller 2994 * (association) with the same tuple: 2995 * <Host NQN, Host ID, local FC port, remote FC port, SUBSYS NQN> 2996 * 2997 * The ports don't need to be compared as they are intrinsically 2998 * already matched by the port pointers supplied. 2999 */ 3000 static bool 3001 nvme_fc_existing_controller(struct nvme_fc_rport *rport, 3002 struct nvmf_ctrl_options *opts) 3003 { 3004 struct nvme_fc_ctrl *ctrl; 3005 unsigned long flags; 3006 bool found = false; 3007 3008 spin_lock_irqsave(&rport->lock, flags); 3009 list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 3010 found = nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts); 3011 if (found) 3012 break; 3013 } 3014 spin_unlock_irqrestore(&rport->lock, flags); 3015 3016 return found; 3017 } 3018 3019 static struct nvme_ctrl * 3020 nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, 3021 struct nvme_fc_lport *lport, struct nvme_fc_rport *rport) 3022 { 3023 struct nvme_fc_ctrl *ctrl; 3024 unsigned long flags; 3025 int ret, idx; 3026 3027 if (!(rport->remoteport.port_role & 3028 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 3029 ret = -EBADR; 3030 goto out_fail; 3031 } 3032 3033 if (!opts->duplicate_connect && 3034 nvme_fc_existing_controller(rport, opts)) { 3035 ret = -EALREADY; 3036 goto out_fail; 3037 } 3038 3039 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL); 3040 if (!ctrl) { 3041 ret = -ENOMEM; 3042 goto out_fail; 3043 } 3044 3045 idx = ida_simple_get(&nvme_fc_ctrl_cnt, 0, 0, GFP_KERNEL); 3046 if (idx < 0) { 3047 ret = -ENOSPC; 3048 goto out_free_ctrl; 3049 } 3050 3051 ctrl->ctrl.opts = opts; 3052 ctrl->ctrl.nr_reconnects = 0; 3053 INIT_LIST_HEAD(&ctrl->ctrl_list); 3054 ctrl->lport = lport; 3055 ctrl->rport = rport; 3056 ctrl->dev = lport->dev; 3057 ctrl->cnum = idx; 3058 ctrl->ioq_live = false; 3059 ctrl->assoc_active = false; 3060 atomic_set(&ctrl->err_work_active, 0); 3061 init_waitqueue_head(&ctrl->ioabort_wait); 3062 3063 get_device(ctrl->dev); 3064 kref_init(&ctrl->ref); 3065 3066 INIT_WORK(&ctrl->ctrl.reset_work, nvme_fc_reset_ctrl_work); 3067 INIT_DELAYED_WORK(&ctrl->connect_work, nvme_fc_connect_ctrl_work); 3068 INIT_WORK(&ctrl->err_work, nvme_fc_connect_err_work); 3069 spin_lock_init(&ctrl->lock); 3070 3071 /* io queue count */ 3072 ctrl->ctrl.queue_count = min_t(unsigned int, 3073 opts->nr_io_queues, 3074 lport->ops->max_hw_queues); 3075 ctrl->ctrl.queue_count++; /* +1 for admin queue */ 3076 3077 ctrl->ctrl.sqsize = opts->queue_size - 1; 3078 ctrl->ctrl.kato = opts->kato; 3079 ctrl->ctrl.cntlid = 0xffff; 3080 3081 ret = -ENOMEM; 3082 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 3083 sizeof(struct nvme_fc_queue), GFP_KERNEL); 3084 if (!ctrl->queues) 3085 goto out_free_ida; 3086 3087 nvme_fc_init_queue(ctrl, 0); 3088 3089 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3090 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 3091 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 3092 ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */ 3093 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; 3094 ctrl->admin_tag_set.cmd_size = 3095 struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv, 3096 ctrl->lport->ops->fcprqst_priv_sz); 3097 ctrl->admin_tag_set.driver_data = ctrl; 3098 ctrl->admin_tag_set.nr_hw_queues = 1; 3099 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; 3100 ctrl->admin_tag_set.flags = BLK_MQ_F_NO_SCHED; 3101 3102 ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); 3103 if (ret) 3104 goto out_free_queues; 3105 ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; 3106 3107 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); 3108 if (IS_ERR(ctrl->ctrl.admin_q)) { 3109 ret = PTR_ERR(ctrl->ctrl.admin_q); 3110 goto out_free_admin_tag_set; 3111 } 3112 3113 /* 3114 * Would have been nice to init io queues tag set as well. 3115 * However, we require interaction from the controller 3116 * for max io queue count before we can do so. 3117 * Defer this to the connect path. 3118 */ 3119 3120 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_fc_ctrl_ops, 0); 3121 if (ret) 3122 goto out_cleanup_admin_q; 3123 3124 /* at this point, teardown path changes to ref counting on nvme ctrl */ 3125 3126 spin_lock_irqsave(&rport->lock, flags); 3127 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 3128 spin_unlock_irqrestore(&rport->lock, flags); 3129 3130 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || 3131 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { 3132 dev_err(ctrl->ctrl.device, 3133 "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); 3134 goto fail_ctrl; 3135 } 3136 3137 nvme_get_ctrl(&ctrl->ctrl); 3138 3139 if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { 3140 nvme_put_ctrl(&ctrl->ctrl); 3141 dev_err(ctrl->ctrl.device, 3142 "NVME-FC{%d}: failed to schedule initial connect\n", 3143 ctrl->cnum); 3144 goto fail_ctrl; 3145 } 3146 3147 flush_delayed_work(&ctrl->connect_work); 3148 3149 dev_info(ctrl->ctrl.device, 3150 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 3151 ctrl->cnum, ctrl->ctrl.opts->subsysnqn); 3152 3153 return &ctrl->ctrl; 3154 3155 fail_ctrl: 3156 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); 3157 cancel_work_sync(&ctrl->ctrl.reset_work); 3158 cancel_work_sync(&ctrl->err_work); 3159 cancel_delayed_work_sync(&ctrl->connect_work); 3160 3161 ctrl->ctrl.opts = NULL; 3162 3163 /* initiate nvme ctrl ref counting teardown */ 3164 nvme_uninit_ctrl(&ctrl->ctrl); 3165 3166 /* Remove core ctrl ref. */ 3167 nvme_put_ctrl(&ctrl->ctrl); 3168 3169 /* as we're past the point where we transition to the ref 3170 * counting teardown path, if we return a bad pointer here, 3171 * the calling routine, thinking it's prior to the 3172 * transition, will do an rport put. Since the teardown 3173 * path also does a rport put, we do an extra get here to 3174 * so proper order/teardown happens. 3175 */ 3176 nvme_fc_rport_get(rport); 3177 3178 return ERR_PTR(-EIO); 3179 3180 out_cleanup_admin_q: 3181 blk_cleanup_queue(ctrl->ctrl.admin_q); 3182 out_free_admin_tag_set: 3183 blk_mq_free_tag_set(&ctrl->admin_tag_set); 3184 out_free_queues: 3185 kfree(ctrl->queues); 3186 out_free_ida: 3187 put_device(ctrl->dev); 3188 ida_simple_remove(&nvme_fc_ctrl_cnt, ctrl->cnum); 3189 out_free_ctrl: 3190 kfree(ctrl); 3191 out_fail: 3192 /* exit via here doesn't follow ctlr ref points */ 3193 return ERR_PTR(ret); 3194 } 3195 3196 3197 struct nvmet_fc_traddr { 3198 u64 nn; 3199 u64 pn; 3200 }; 3201 3202 static int 3203 __nvme_fc_parse_u64(substring_t *sstr, u64 *val) 3204 { 3205 u64 token64; 3206 3207 if (match_u64(sstr, &token64)) 3208 return -EINVAL; 3209 *val = token64; 3210 3211 return 0; 3212 } 3213 3214 /* 3215 * This routine validates and extracts the WWN's from the TRADDR string. 3216 * As kernel parsers need the 0x to determine number base, universally 3217 * build string to parse with 0x prefix before parsing name strings. 3218 */ 3219 static int 3220 nvme_fc_parse_traddr(struct nvmet_fc_traddr *traddr, char *buf, size_t blen) 3221 { 3222 char name[2 + NVME_FC_TRADDR_HEXNAMELEN + 1]; 3223 substring_t wwn = { name, &name[sizeof(name)-1] }; 3224 int nnoffset, pnoffset; 3225 3226 /* validate if string is one of the 2 allowed formats */ 3227 if (strnlen(buf, blen) == NVME_FC_TRADDR_MAXLENGTH && 3228 !strncmp(buf, "nn-0x", NVME_FC_TRADDR_OXNNLEN) && 3229 !strncmp(&buf[NVME_FC_TRADDR_MAX_PN_OFFSET], 3230 "pn-0x", NVME_FC_TRADDR_OXNNLEN)) { 3231 nnoffset = NVME_FC_TRADDR_OXNNLEN; 3232 pnoffset = NVME_FC_TRADDR_MAX_PN_OFFSET + 3233 NVME_FC_TRADDR_OXNNLEN; 3234 } else if ((strnlen(buf, blen) == NVME_FC_TRADDR_MINLENGTH && 3235 !strncmp(buf, "nn-", NVME_FC_TRADDR_NNLEN) && 3236 !strncmp(&buf[NVME_FC_TRADDR_MIN_PN_OFFSET], 3237 "pn-", NVME_FC_TRADDR_NNLEN))) { 3238 nnoffset = NVME_FC_TRADDR_NNLEN; 3239 pnoffset = NVME_FC_TRADDR_MIN_PN_OFFSET + NVME_FC_TRADDR_NNLEN; 3240 } else 3241 goto out_einval; 3242 3243 name[0] = '0'; 3244 name[1] = 'x'; 3245 name[2 + NVME_FC_TRADDR_HEXNAMELEN] = 0; 3246 3247 memcpy(&name[2], &buf[nnoffset], NVME_FC_TRADDR_HEXNAMELEN); 3248 if (__nvme_fc_parse_u64(&wwn, &traddr->nn)) 3249 goto out_einval; 3250 3251 memcpy(&name[2], &buf[pnoffset], NVME_FC_TRADDR_HEXNAMELEN); 3252 if (__nvme_fc_parse_u64(&wwn, &traddr->pn)) 3253 goto out_einval; 3254 3255 return 0; 3256 3257 out_einval: 3258 pr_warn("%s: bad traddr string\n", __func__); 3259 return -EINVAL; 3260 } 3261 3262 static struct nvme_ctrl * 3263 nvme_fc_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) 3264 { 3265 struct nvme_fc_lport *lport; 3266 struct nvme_fc_rport *rport; 3267 struct nvme_ctrl *ctrl; 3268 struct nvmet_fc_traddr laddr = { 0L, 0L }; 3269 struct nvmet_fc_traddr raddr = { 0L, 0L }; 3270 unsigned long flags; 3271 int ret; 3272 3273 ret = nvme_fc_parse_traddr(&raddr, opts->traddr, NVMF_TRADDR_SIZE); 3274 if (ret || !raddr.nn || !raddr.pn) 3275 return ERR_PTR(-EINVAL); 3276 3277 ret = nvme_fc_parse_traddr(&laddr, opts->host_traddr, NVMF_TRADDR_SIZE); 3278 if (ret || !laddr.nn || !laddr.pn) 3279 return ERR_PTR(-EINVAL); 3280 3281 /* find the host and remote ports to connect together */ 3282 spin_lock_irqsave(&nvme_fc_lock, flags); 3283 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 3284 if (lport->localport.node_name != laddr.nn || 3285 lport->localport.port_name != laddr.pn) 3286 continue; 3287 3288 list_for_each_entry(rport, &lport->endp_list, endp_list) { 3289 if (rport->remoteport.node_name != raddr.nn || 3290 rport->remoteport.port_name != raddr.pn) 3291 continue; 3292 3293 /* if fail to get reference fall through. Will error */ 3294 if (!nvme_fc_rport_get(rport)) 3295 break; 3296 3297 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3298 3299 ctrl = nvme_fc_init_ctrl(dev, opts, lport, rport); 3300 if (IS_ERR(ctrl)) 3301 nvme_fc_rport_put(rport); 3302 return ctrl; 3303 } 3304 } 3305 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3306 3307 pr_warn("%s: %s - %s combination not found\n", 3308 __func__, opts->traddr, opts->host_traddr); 3309 return ERR_PTR(-ENOENT); 3310 } 3311 3312 3313 static struct nvmf_transport_ops nvme_fc_transport = { 3314 .name = "fc", 3315 .module = THIS_MODULE, 3316 .required_opts = NVMF_OPT_TRADDR | NVMF_OPT_HOST_TRADDR, 3317 .allowed_opts = NVMF_OPT_RECONNECT_DELAY | NVMF_OPT_CTRL_LOSS_TMO, 3318 .create_ctrl = nvme_fc_create_ctrl, 3319 }; 3320 3321 /* Arbitrary successive failures max. With lots of subsystems could be high */ 3322 #define DISCOVERY_MAX_FAIL 20 3323 3324 static ssize_t nvme_fc_nvme_discovery_store(struct device *dev, 3325 struct device_attribute *attr, const char *buf, size_t count) 3326 { 3327 unsigned long flags; 3328 LIST_HEAD(local_disc_list); 3329 struct nvme_fc_lport *lport; 3330 struct nvme_fc_rport *rport; 3331 int failcnt = 0; 3332 3333 spin_lock_irqsave(&nvme_fc_lock, flags); 3334 restart: 3335 list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { 3336 list_for_each_entry(rport, &lport->endp_list, endp_list) { 3337 if (!nvme_fc_lport_get(lport)) 3338 continue; 3339 if (!nvme_fc_rport_get(rport)) { 3340 /* 3341 * This is a temporary condition. Upon restart 3342 * this rport will be gone from the list. 3343 * 3344 * Revert the lport put and retry. Anything 3345 * added to the list already will be skipped (as 3346 * they are no longer list_empty). Loops should 3347 * resume at rports that were not yet seen. 3348 */ 3349 nvme_fc_lport_put(lport); 3350 3351 if (failcnt++ < DISCOVERY_MAX_FAIL) 3352 goto restart; 3353 3354 pr_err("nvme_discovery: too many reference " 3355 "failures\n"); 3356 goto process_local_list; 3357 } 3358 if (list_empty(&rport->disc_list)) 3359 list_add_tail(&rport->disc_list, 3360 &local_disc_list); 3361 } 3362 } 3363 3364 process_local_list: 3365 while (!list_empty(&local_disc_list)) { 3366 rport = list_first_entry(&local_disc_list, 3367 struct nvme_fc_rport, disc_list); 3368 list_del_init(&rport->disc_list); 3369 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3370 3371 lport = rport->lport; 3372 /* signal discovery. Won't hurt if it repeats */ 3373 nvme_fc_signal_discovery_scan(lport, rport); 3374 nvme_fc_rport_put(rport); 3375 nvme_fc_lport_put(lport); 3376 3377 spin_lock_irqsave(&nvme_fc_lock, flags); 3378 } 3379 spin_unlock_irqrestore(&nvme_fc_lock, flags); 3380 3381 return count; 3382 } 3383 static DEVICE_ATTR(nvme_discovery, 0200, NULL, nvme_fc_nvme_discovery_store); 3384 3385 static struct attribute *nvme_fc_attrs[] = { 3386 &dev_attr_nvme_discovery.attr, 3387 NULL 3388 }; 3389 3390 static struct attribute_group nvme_fc_attr_group = { 3391 .attrs = nvme_fc_attrs, 3392 }; 3393 3394 static const struct attribute_group *nvme_fc_attr_groups[] = { 3395 &nvme_fc_attr_group, 3396 NULL 3397 }; 3398 3399 static struct class fc_class = { 3400 .name = "fc", 3401 .dev_groups = nvme_fc_attr_groups, 3402 .owner = THIS_MODULE, 3403 }; 3404 3405 static int __init nvme_fc_init_module(void) 3406 { 3407 int ret; 3408 3409 /* 3410 * NOTE: 3411 * It is expected that in the future the kernel will combine 3412 * the FC-isms that are currently under scsi and now being 3413 * added to by NVME into a new standalone FC class. The SCSI 3414 * and NVME protocols and their devices would be under this 3415 * new FC class. 3416 * 3417 * As we need something to post FC-specific udev events to, 3418 * specifically for nvme probe events, start by creating the 3419 * new device class. When the new standalone FC class is 3420 * put in place, this code will move to a more generic 3421 * location for the class. 3422 */ 3423 ret = class_register(&fc_class); 3424 if (ret) { 3425 pr_err("couldn't register class fc\n"); 3426 return ret; 3427 } 3428 3429 /* 3430 * Create a device for the FC-centric udev events 3431 */ 3432 fc_udev_device = device_create(&fc_class, NULL, MKDEV(0, 0), NULL, 3433 "fc_udev_device"); 3434 if (IS_ERR(fc_udev_device)) { 3435 pr_err("couldn't create fc_udev device!\n"); 3436 ret = PTR_ERR(fc_udev_device); 3437 goto out_destroy_class; 3438 } 3439 3440 ret = nvmf_register_transport(&nvme_fc_transport); 3441 if (ret) 3442 goto out_destroy_device; 3443 3444 return 0; 3445 3446 out_destroy_device: 3447 device_destroy(&fc_class, MKDEV(0, 0)); 3448 out_destroy_class: 3449 class_unregister(&fc_class); 3450 return ret; 3451 } 3452 3453 static void __exit nvme_fc_exit_module(void) 3454 { 3455 /* sanity check - all lports should be removed */ 3456 if (!list_empty(&nvme_fc_lport_list)) 3457 pr_warn("%s: localport list not empty\n", __func__); 3458 3459 nvmf_unregister_transport(&nvme_fc_transport); 3460 3461 ida_destroy(&nvme_fc_local_port_cnt); 3462 ida_destroy(&nvme_fc_ctrl_cnt); 3463 3464 device_destroy(&fc_class, MKDEV(0, 0)); 3465 class_unregister(&fc_class); 3466 } 3467 3468 module_init(nvme_fc_init_module); 3469 module_exit(nvme_fc_exit_module); 3470 3471 MODULE_LICENSE("GPL v2"); 3472