rdma.c (e1ea2f9856b765a2eaabb403a6751f70efc9ba4c) rdma.c (c5017e85705bfea721732e153305d1988ff965c2)
1/*
2 * NVMe over Fabrics RDMA host code.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *

--- 36 unchanged lines hidden (view full) ---

45 * We handle AEN commands ourselves and don't even let the
46 * block layer know about them.
47 */
48#define NVME_RDMA_NR_AEN_COMMANDS 1
49#define NVME_RDMA_AQ_BLKMQ_DEPTH \
50 (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
51
52struct nvme_rdma_device {
1/*
2 * NVMe over Fabrics RDMA host code.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *

--- 36 unchanged lines hidden (view full) ---

45 * We handle AEN commands ourselves and don't even let the
46 * block layer know about them.
47 */
48#define NVME_RDMA_NR_AEN_COMMANDS 1
49#define NVME_RDMA_AQ_BLKMQ_DEPTH \
50 (NVME_AQ_DEPTH - NVME_RDMA_NR_AEN_COMMANDS)
51
52struct nvme_rdma_device {
53 struct ib_device *dev;
54 struct ib_pd *pd;
53 struct ib_device *dev;
54 struct ib_pd *pd;
55 struct kref ref;
56 struct list_head entry;
57};
58
59struct nvme_rdma_qe {
60 struct ib_cqe cqe;
61 void *data;
62 u64 dma;

--- 11 unchanged lines hidden (view full) ---

74 struct ib_reg_wr reg_wr;
75 struct ib_cqe reg_cqe;
76 struct nvme_rdma_queue *queue;
77 struct sg_table sg_table;
78 struct scatterlist first_sgl[];
79};
80
81enum nvme_rdma_queue_flags {
55 struct kref ref;
56 struct list_head entry;
57};
58
59struct nvme_rdma_qe {
60 struct ib_cqe cqe;
61 void *data;
62 u64 dma;

--- 11 unchanged lines hidden (view full) ---

74 struct ib_reg_wr reg_wr;
75 struct ib_cqe reg_cqe;
76 struct nvme_rdma_queue *queue;
77 struct sg_table sg_table;
78 struct scatterlist first_sgl[];
79};
80
81enum nvme_rdma_queue_flags {
82 NVME_RDMA_Q_LIVE = 0,
83 NVME_RDMA_Q_DELETING = 1,
82 NVME_RDMA_Q_ALLOCATED = 0,
83 NVME_RDMA_Q_LIVE = 1,
84};
85
86struct nvme_rdma_queue {
87 struct nvme_rdma_qe *rsp_ring;
88 atomic_t sig_count;
89 int queue_size;
90 size_t cmnd_capsule_len;
91 struct nvme_rdma_ctrl *ctrl;

--- 8 unchanged lines hidden (view full) ---

100};
101
102struct nvme_rdma_ctrl {
103 /* read only in the hot path */
104 struct nvme_rdma_queue *queues;
105
106 /* other member variables */
107 struct blk_mq_tag_set tag_set;
84};
85
86struct nvme_rdma_queue {
87 struct nvme_rdma_qe *rsp_ring;
88 atomic_t sig_count;
89 int queue_size;
90 size_t cmnd_capsule_len;
91 struct nvme_rdma_ctrl *ctrl;

--- 8 unchanged lines hidden (view full) ---

100};
101
102struct nvme_rdma_ctrl {
103 /* read only in the hot path */
104 struct nvme_rdma_queue *queues;
105
106 /* other member variables */
107 struct blk_mq_tag_set tag_set;
108 struct work_struct delete_work;
109 struct work_struct err_work;
110
111 struct nvme_rdma_qe async_event_sqe;
112
113 struct delayed_work reconnect_work;
114
115 struct list_head list;
116

--- 152 unchanged lines hidden (view full) ---

269
270static int nvme_rdma_reinit_request(void *data, struct request *rq)
271{
272 struct nvme_rdma_ctrl *ctrl = data;
273 struct nvme_rdma_device *dev = ctrl->device;
274 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
275 int ret = 0;
276
108 struct work_struct err_work;
109
110 struct nvme_rdma_qe async_event_sqe;
111
112 struct delayed_work reconnect_work;
113
114 struct list_head list;
115

--- 152 unchanged lines hidden (view full) ---

268
269static int nvme_rdma_reinit_request(void *data, struct request *rq)
270{
271 struct nvme_rdma_ctrl *ctrl = data;
272 struct nvme_rdma_device *dev = ctrl->device;
273 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
274 int ret = 0;
275
276 if (WARN_ON_ONCE(!req->mr))
277 return 0;
278
277 ib_dereg_mr(req->mr);
278
279 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
280 ctrl->max_fr_pages);
281 if (IS_ERR(req->mr)) {
282 ret = PTR_ERR(req->mr);
283 req->mr = NULL;
284 goto out;

--- 144 unchanged lines hidden (view full) ---

429 kfree(ndev);
430out_err:
431 mutex_unlock(&device_list_mutex);
432 return NULL;
433}
434
435static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
436{
279 ib_dereg_mr(req->mr);
280
281 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
282 ctrl->max_fr_pages);
283 if (IS_ERR(req->mr)) {
284 ret = PTR_ERR(req->mr);
285 req->mr = NULL;
286 goto out;

--- 144 unchanged lines hidden (view full) ---

431 kfree(ndev);
432out_err:
433 mutex_unlock(&device_list_mutex);
434 return NULL;
435}
436
437static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
438{
437 struct nvme_rdma_device *dev;
438 struct ib_device *ibdev;
439 struct nvme_rdma_device *dev = queue->device;
440 struct ib_device *ibdev = dev->dev;
439
441
440 dev = queue->device;
441 ibdev = dev->dev;
442 rdma_destroy_qp(queue->cm_id);
443 ib_free_cq(queue->ib_cq);
444
445 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
446 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
447
448 nvme_rdma_dev_put(dev);
449}

--- 89 unchanged lines hidden (view full) ---

539 dev_info(ctrl->ctrl.device,
540 "rdma_resolve_addr failed (%d).\n", ret);
541 goto out_destroy_cm_id;
542 }
543
544 ret = nvme_rdma_wait_for_cm(queue);
545 if (ret) {
546 dev_info(ctrl->ctrl.device,
442 rdma_destroy_qp(queue->cm_id);
443 ib_free_cq(queue->ib_cq);
444
445 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
446 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
447
448 nvme_rdma_dev_put(dev);
449}

--- 89 unchanged lines hidden (view full) ---

539 dev_info(ctrl->ctrl.device,
540 "rdma_resolve_addr failed (%d).\n", ret);
541 goto out_destroy_cm_id;
542 }
543
544 ret = nvme_rdma_wait_for_cm(queue);
545 if (ret) {
546 dev_info(ctrl->ctrl.device,
547 "rdma_resolve_addr wait failed (%d).\n", ret);
547 "rdma connection establishment failed (%d)\n", ret);
548 goto out_destroy_cm_id;
549 }
550
548 goto out_destroy_cm_id;
549 }
550
551 clear_bit(NVME_RDMA_Q_DELETING, &queue->flags);
551 set_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags);
552
553 return 0;
554
555out_destroy_cm_id:
556 rdma_destroy_id(queue->cm_id);
557 return ret;
558}
559
560static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
561{
562 if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
563 return;
564
565 rdma_disconnect(queue->cm_id);
566 ib_drain_qp(queue->qp);
567}
568
569static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
570{
552
553 return 0;
554
555out_destroy_cm_id:
556 rdma_destroy_id(queue->cm_id);
557 return ret;
558}
559
560static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue)
561{
562 if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags))
563 return;
564
565 rdma_disconnect(queue->cm_id);
566 ib_drain_qp(queue->qp);
567}
568
569static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue)
570{
571 if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags))
571 if (!test_and_clear_bit(NVME_RDMA_Q_ALLOCATED, &queue->flags))
572 return;
573
572 return;
573
574 if (nvme_rdma_queue_idx(queue) == 0) {
575 nvme_rdma_free_qe(queue->device->dev,
576 &queue->ctrl->async_event_sqe,
577 sizeof(struct nvme_command), DMA_TO_DEVICE);
578 }
579
580 nvme_rdma_destroy_queue_ib(queue);
581 rdma_destroy_id(queue->cm_id);
582}
583
584static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
585{
586 int i;
587

--- 83 unchanged lines hidden (view full) ---

671
672out_free_queues:
673 for (i--; i >= 1; i--)
674 nvme_rdma_free_queue(&ctrl->queues[i]);
675
676 return ret;
677}
678
574 nvme_rdma_destroy_queue_ib(queue);
575 rdma_destroy_id(queue->cm_id);
576}
577
578static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl)
579{
580 int i;
581

--- 83 unchanged lines hidden (view full) ---

665
666out_free_queues:
667 for (i--; i >= 1; i--)
668 nvme_rdma_free_queue(&ctrl->queues[i]);
669
670 return ret;
671}
672
679static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin)
673static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl,
674 struct blk_mq_tag_set *set)
680{
681 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
675{
676 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
682 struct blk_mq_tag_set *set = admin ?
683 &ctrl->admin_tag_set : &ctrl->tag_set;
684
685 blk_mq_free_tag_set(set);
686 nvme_rdma_dev_put(ctrl->device);
687}
688
689static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
690 bool admin)
691{

--- 8 unchanged lines hidden (view full) ---

700 set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
701 set->reserved_tags = 2; /* connect + keep-alive */
702 set->numa_node = NUMA_NO_NODE;
703 set->cmd_size = sizeof(struct nvme_rdma_request) +
704 SG_CHUNK_SIZE * sizeof(struct scatterlist);
705 set->driver_data = ctrl;
706 set->nr_hw_queues = 1;
707 set->timeout = ADMIN_TIMEOUT;
677
678 blk_mq_free_tag_set(set);
679 nvme_rdma_dev_put(ctrl->device);
680}
681
682static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
683 bool admin)
684{

--- 8 unchanged lines hidden (view full) ---

693 set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH;
694 set->reserved_tags = 2; /* connect + keep-alive */
695 set->numa_node = NUMA_NO_NODE;
696 set->cmd_size = sizeof(struct nvme_rdma_request) +
697 SG_CHUNK_SIZE * sizeof(struct scatterlist);
698 set->driver_data = ctrl;
699 set->nr_hw_queues = 1;
700 set->timeout = ADMIN_TIMEOUT;
701 set->flags = BLK_MQ_F_NO_SCHED;
708 } else {
709 set = &ctrl->tag_set;
710 memset(set, 0, sizeof(*set));
711 set->ops = &nvme_rdma_mq_ops;
712 set->queue_depth = nctrl->opts->queue_size;
713 set->reserved_tags = 1; /* fabric connect */
714 set->numa_node = NUMA_NO_NODE;
715 set->flags = BLK_MQ_F_SHOULD_MERGE;

--- 24 unchanged lines hidden (view full) ---

740 blk_mq_free_tag_set(set);
741out:
742 return ERR_PTR(ret);
743}
744
745static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
746 bool remove)
747{
702 } else {
703 set = &ctrl->tag_set;
704 memset(set, 0, sizeof(*set));
705 set->ops = &nvme_rdma_mq_ops;
706 set->queue_depth = nctrl->opts->queue_size;
707 set->reserved_tags = 1; /* fabric connect */
708 set->numa_node = NUMA_NO_NODE;
709 set->flags = BLK_MQ_F_SHOULD_MERGE;

--- 24 unchanged lines hidden (view full) ---

734 blk_mq_free_tag_set(set);
735out:
736 return ERR_PTR(ret);
737}
738
739static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl,
740 bool remove)
741{
742 nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe,
743 sizeof(struct nvme_command), DMA_TO_DEVICE);
748 nvme_rdma_stop_queue(&ctrl->queues[0]);
749 if (remove) {
750 blk_cleanup_queue(ctrl->ctrl.admin_q);
744 nvme_rdma_stop_queue(&ctrl->queues[0]);
745 if (remove) {
746 blk_cleanup_queue(ctrl->ctrl.admin_q);
751 nvme_rdma_free_tagset(&ctrl->ctrl, true);
747 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
752 }
753 nvme_rdma_free_queue(&ctrl->queues[0]);
754}
755
756static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
757 bool new)
758{
759 int error;

--- 4 unchanged lines hidden (view full) ---

764
765 ctrl->device = ctrl->queues[0].device;
766
767 ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
768 ctrl->device->dev->attrs.max_fast_reg_page_list_len);
769
770 if (new) {
771 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
748 }
749 nvme_rdma_free_queue(&ctrl->queues[0]);
750}
751
752static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
753 bool new)
754{
755 int error;

--- 4 unchanged lines hidden (view full) ---

760
761 ctrl->device = ctrl->queues[0].device;
762
763 ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS,
764 ctrl->device->dev->attrs.max_fast_reg_page_list_len);
765
766 if (new) {
767 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
772 if (IS_ERR(ctrl->ctrl.admin_tagset)) {
773 error = PTR_ERR(ctrl->ctrl.admin_tagset);
768 if (IS_ERR(ctrl->ctrl.admin_tagset))
774 goto out_free_queue;
769 goto out_free_queue;
775 }
776
777 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
778 if (IS_ERR(ctrl->ctrl.admin_q)) {
779 error = PTR_ERR(ctrl->ctrl.admin_q);
780 goto out_free_tagset;
781 }
782 } else {
770
771 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
772 if (IS_ERR(ctrl->ctrl.admin_q)) {
773 error = PTR_ERR(ctrl->ctrl.admin_q);
774 goto out_free_tagset;
775 }
776 } else {
783 error = blk_mq_reinit_tagset(&ctrl->admin_tag_set,
784 nvme_rdma_reinit_request);
777 error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
785 if (error)
786 goto out_free_queue;
787 }
788
789 error = nvme_rdma_start_queue(ctrl, 0);
790 if (error)
791 goto out_cleanup_queue;
792

--- 27 unchanged lines hidden (view full) ---

820
821 return 0;
822
823out_cleanup_queue:
824 if (new)
825 blk_cleanup_queue(ctrl->ctrl.admin_q);
826out_free_tagset:
827 if (new)
778 if (error)
779 goto out_free_queue;
780 }
781
782 error = nvme_rdma_start_queue(ctrl, 0);
783 if (error)
784 goto out_cleanup_queue;
785

--- 27 unchanged lines hidden (view full) ---

813
814 return 0;
815
816out_cleanup_queue:
817 if (new)
818 blk_cleanup_queue(ctrl->ctrl.admin_q);
819out_free_tagset:
820 if (new)
828 nvme_rdma_free_tagset(&ctrl->ctrl, true);
821 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
829out_free_queue:
830 nvme_rdma_free_queue(&ctrl->queues[0]);
831 return error;
832}
833
834static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
835 bool remove)
836{
837 nvme_rdma_stop_io_queues(ctrl);
838 if (remove) {
839 blk_cleanup_queue(ctrl->ctrl.connect_q);
822out_free_queue:
823 nvme_rdma_free_queue(&ctrl->queues[0]);
824 return error;
825}
826
827static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl,
828 bool remove)
829{
830 nvme_rdma_stop_io_queues(ctrl);
831 if (remove) {
832 blk_cleanup_queue(ctrl->ctrl.connect_q);
840 nvme_rdma_free_tagset(&ctrl->ctrl, false);
833 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
841 }
842 nvme_rdma_free_io_queues(ctrl);
843}
844
845static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
846{
847 int ret;
848
849 ret = nvme_rdma_alloc_io_queues(ctrl);
850 if (ret)
851 return ret;
852
853 if (new) {
854 ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
834 }
835 nvme_rdma_free_io_queues(ctrl);
836}
837
838static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
839{
840 int ret;
841
842 ret = nvme_rdma_alloc_io_queues(ctrl);
843 if (ret)
844 return ret;
845
846 if (new) {
847 ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false);
855 if (IS_ERR(ctrl->ctrl.tagset)) {
856 ret = PTR_ERR(ctrl->ctrl.tagset);
848 if (IS_ERR(ctrl->ctrl.tagset))
857 goto out_free_io_queues;
849 goto out_free_io_queues;
858 }
859
860 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
861 if (IS_ERR(ctrl->ctrl.connect_q)) {
862 ret = PTR_ERR(ctrl->ctrl.connect_q);
863 goto out_free_tag_set;
864 }
865 } else {
850
851 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
852 if (IS_ERR(ctrl->ctrl.connect_q)) {
853 ret = PTR_ERR(ctrl->ctrl.connect_q);
854 goto out_free_tag_set;
855 }
856 } else {
866 ret = blk_mq_reinit_tagset(&ctrl->tag_set,
867 nvme_rdma_reinit_request);
857 ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
868 if (ret)
869 goto out_free_io_queues;
870
871 blk_mq_update_nr_hw_queues(&ctrl->tag_set,
872 ctrl->ctrl.queue_count - 1);
873 }
874
875 ret = nvme_rdma_start_io_queues(ctrl);
876 if (ret)
877 goto out_cleanup_connect_q;
878
879 return 0;
880
881out_cleanup_connect_q:
882 if (new)
883 blk_cleanup_queue(ctrl->ctrl.connect_q);
884out_free_tag_set:
885 if (new)
858 if (ret)
859 goto out_free_io_queues;
860
861 blk_mq_update_nr_hw_queues(&ctrl->tag_set,
862 ctrl->ctrl.queue_count - 1);
863 }
864
865 ret = nvme_rdma_start_io_queues(ctrl);
866 if (ret)
867 goto out_cleanup_connect_q;
868
869 return 0;
870
871out_cleanup_connect_q:
872 if (new)
873 blk_cleanup_queue(ctrl->ctrl.connect_q);
874out_free_tag_set:
875 if (new)
886 nvme_rdma_free_tagset(&ctrl->ctrl, false);
876 nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
887out_free_io_queues:
888 nvme_rdma_free_io_queues(ctrl);
889 return ret;
890}
891
892static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
893{
894 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);

--- 22 unchanged lines hidden (view full) ---

917
918 if (nvmf_should_reconnect(&ctrl->ctrl)) {
919 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
920 ctrl->ctrl.opts->reconnect_delay);
921 queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
922 ctrl->ctrl.opts->reconnect_delay * HZ);
923 } else {
924 dev_info(ctrl->ctrl.device, "Removing controller...\n");
877out_free_io_queues:
878 nvme_rdma_free_io_queues(ctrl);
879 return ret;
880}
881
882static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
883{
884 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);

--- 22 unchanged lines hidden (view full) ---

907
908 if (nvmf_should_reconnect(&ctrl->ctrl)) {
909 dev_info(ctrl->ctrl.device, "Reconnecting in %d seconds...\n",
910 ctrl->ctrl.opts->reconnect_delay);
911 queue_delayed_work(nvme_wq, &ctrl->reconnect_work,
912 ctrl->ctrl.opts->reconnect_delay * HZ);
913 } else {
914 dev_info(ctrl->ctrl.device, "Removing controller...\n");
925 queue_work(nvme_wq, &ctrl->delete_work);
915 queue_work(nvme_wq, &ctrl->ctrl.delete_work);
926 }
927}
928
929static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
930{
931 struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
932 struct nvme_rdma_ctrl, reconnect_work);
933 bool changed;
934 int ret;
935
936 ++ctrl->ctrl.nr_reconnects;
937
916 }
917}
918
919static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work)
920{
921 struct nvme_rdma_ctrl *ctrl = container_of(to_delayed_work(work),
922 struct nvme_rdma_ctrl, reconnect_work);
923 bool changed;
924 int ret;
925
926 ++ctrl->ctrl.nr_reconnects;
927
938 if (ctrl->ctrl.queue_count > 1)
939 nvme_rdma_destroy_io_queues(ctrl, false);
940
941 nvme_rdma_destroy_admin_queue(ctrl, false);
942 ret = nvme_rdma_configure_admin_queue(ctrl, false);
943 if (ret)
944 goto requeue;
945
946 if (ctrl->ctrl.queue_count > 1) {
947 ret = nvme_rdma_configure_io_queues(ctrl, false);
948 if (ret)
928 ret = nvme_rdma_configure_admin_queue(ctrl, false);
929 if (ret)
930 goto requeue;
931
932 if (ctrl->ctrl.queue_count > 1) {
933 ret = nvme_rdma_configure_io_queues(ctrl, false);
934 if (ret)
949 goto requeue;
935 goto destroy_admin;
950 }
951
952 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
953 if (!changed) {
954 /* state change failure is ok if we're in DELETING state */
955 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
956 return;
957 }
958
936 }
937
938 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
939 if (!changed) {
940 /* state change failure is ok if we're in DELETING state */
941 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
942 return;
943 }
944
959 ctrl->ctrl.nr_reconnects = 0;
960
961 nvme_start_ctrl(&ctrl->ctrl);
962
945 nvme_start_ctrl(&ctrl->ctrl);
946
963 dev_info(ctrl->ctrl.device, "Successfully reconnected\n");
947 dev_info(ctrl->ctrl.device, "Successfully reconnected (%d attempts)\n",
948 ctrl->ctrl.nr_reconnects);
964
949
950 ctrl->ctrl.nr_reconnects = 0;
951
965 return;
966
952 return;
953
954destroy_admin:
955 nvme_rdma_destroy_admin_queue(ctrl, false);
967requeue:
968 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
969 ctrl->ctrl.nr_reconnects);
970 nvme_rdma_reconnect_or_remove(ctrl);
971}
972
973static void nvme_rdma_error_recovery_work(struct work_struct *work)
974{
975 struct nvme_rdma_ctrl *ctrl = container_of(work,
976 struct nvme_rdma_ctrl, err_work);
977
978 nvme_stop_keep_alive(&ctrl->ctrl);
979
980 if (ctrl->ctrl.queue_count > 1) {
981 nvme_stop_queues(&ctrl->ctrl);
956requeue:
957 dev_info(ctrl->ctrl.device, "Failed reconnect attempt %d\n",
958 ctrl->ctrl.nr_reconnects);
959 nvme_rdma_reconnect_or_remove(ctrl);
960}
961
962static void nvme_rdma_error_recovery_work(struct work_struct *work)
963{
964 struct nvme_rdma_ctrl *ctrl = container_of(work,
965 struct nvme_rdma_ctrl, err_work);
966
967 nvme_stop_keep_alive(&ctrl->ctrl);
968
969 if (ctrl->ctrl.queue_count > 1) {
970 nvme_stop_queues(&ctrl->ctrl);
982 nvme_rdma_stop_io_queues(ctrl);
983 }
984 blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
985 nvme_rdma_stop_queue(&ctrl->queues[0]);
986
987 /* We must take care of fastfail/requeue all our inflight requests */
988 if (ctrl->ctrl.queue_count > 1)
989 blk_mq_tagset_busy_iter(&ctrl->tag_set,
990 nvme_cancel_request, &ctrl->ctrl);
971 blk_mq_tagset_busy_iter(&ctrl->tag_set,
972 nvme_cancel_request, &ctrl->ctrl);
973 nvme_rdma_destroy_io_queues(ctrl, false);
974 }
975
976 blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
991 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
992 nvme_cancel_request, &ctrl->ctrl);
977 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
978 nvme_cancel_request, &ctrl->ctrl);
979 nvme_rdma_destroy_admin_queue(ctrl, false);
993
994 /*
995 * queues are not a live anymore, so restart the queues to fail fast
996 * new IO
997 */
998 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
999 nvme_start_queues(&ctrl->ctrl);
1000

--- 59 unchanged lines hidden (view full) ---

1060 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
1061 struct nvme_rdma_device *dev = queue->device;
1062 struct ib_device *ibdev = dev->dev;
1063 int res;
1064
1065 if (!blk_rq_bytes(rq))
1066 return;
1067
980
981 /*
982 * queues are not a live anymore, so restart the queues to fail fast
983 * new IO
984 */
985 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
986 nvme_start_queues(&ctrl->ctrl);
987

--- 59 unchanged lines hidden (view full) ---

1047 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
1048 struct nvme_rdma_device *dev = queue->device;
1049 struct ib_device *ibdev = dev->dev;
1050 int res;
1051
1052 if (!blk_rq_bytes(rq))
1053 return;
1054
1068 if (req->mr->need_inval) {
1055 if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) {
1069 res = nvme_rdma_inv_rkey(queue, req);
1070 if (unlikely(res < 0)) {
1071 dev_err(ctrl->ctrl.device,
1072 "Queueing INV WR for rkey %#x failed (%d)\n",
1073 req->mr->rkey, res);
1074 nvme_rdma_error_recovery(queue->ctrl);
1075 }
1076 }

--- 508 unchanged lines hidden (view full) ---

1585 return 0;
1586}
1587
1588static enum blk_eh_timer_return
1589nvme_rdma_timeout(struct request *rq, bool reserved)
1590{
1591 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1592
1056 res = nvme_rdma_inv_rkey(queue, req);
1057 if (unlikely(res < 0)) {
1058 dev_err(ctrl->ctrl.device,
1059 "Queueing INV WR for rkey %#x failed (%d)\n",
1060 req->mr->rkey, res);
1061 nvme_rdma_error_recovery(queue->ctrl);
1062 }
1063 }

--- 508 unchanged lines hidden (view full) ---

1572 return 0;
1573}
1574
1575static enum blk_eh_timer_return
1576nvme_rdma_timeout(struct request *rq, bool reserved)
1577{
1578 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1579
1580 dev_warn(req->queue->ctrl->ctrl.device,
1581 "I/O %d QID %d timeout, reset controller\n",
1582 rq->tag, nvme_rdma_queue_idx(req->queue));
1583
1593 /* queue error recovery */
1594 nvme_rdma_error_recovery(req->queue->ctrl);
1595
1596 /* fail with DNR on cmd timeout */
1597 nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
1598
1599 return BLK_EH_HANDLED;
1600}

--- 166 unchanged lines hidden (view full) ---

1767static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
1768{
1769 nvme_remove_namespaces(&ctrl->ctrl);
1770 nvme_rdma_shutdown_ctrl(ctrl, true);
1771 nvme_uninit_ctrl(&ctrl->ctrl);
1772 nvme_put_ctrl(&ctrl->ctrl);
1773}
1774
1584 /* queue error recovery */
1585 nvme_rdma_error_recovery(req->queue->ctrl);
1586
1587 /* fail with DNR on cmd timeout */
1588 nvme_req(rq)->status = NVME_SC_ABORT_REQ | NVME_SC_DNR;
1589
1590 return BLK_EH_HANDLED;
1591}

--- 166 unchanged lines hidden (view full) ---

1758static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl)
1759{
1760 nvme_remove_namespaces(&ctrl->ctrl);
1761 nvme_rdma_shutdown_ctrl(ctrl, true);
1762 nvme_uninit_ctrl(&ctrl->ctrl);
1763 nvme_put_ctrl(&ctrl->ctrl);
1764}
1765
1775static void nvme_rdma_del_ctrl_work(struct work_struct *work)
1766static void nvme_rdma_delete_ctrl(struct nvme_ctrl *ctrl)
1776{
1767{
1777 struct nvme_rdma_ctrl *ctrl = container_of(work,
1778 struct nvme_rdma_ctrl, delete_work);
1779
1780 nvme_stop_ctrl(&ctrl->ctrl);
1781 nvme_rdma_remove_ctrl(ctrl);
1768 nvme_stop_ctrl(ctrl);
1769 nvme_rdma_remove_ctrl(to_rdma_ctrl(ctrl));
1782}
1783
1770}
1771
1784static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl)
1785{
1786 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
1787 return -EBUSY;
1788
1789 if (!queue_work(nvme_wq, &ctrl->delete_work))
1790 return -EBUSY;
1791
1792 return 0;
1793}
1794
1795static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl)
1796{
1797 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
1798 int ret = 0;
1799
1800 /*
1801 * Keep a reference until all work is flushed since
1802 * __nvme_rdma_del_ctrl can free the ctrl mem
1803 */
1804 if (!kref_get_unless_zero(&ctrl->ctrl.kref))
1805 return -EBUSY;
1806 ret = __nvme_rdma_del_ctrl(ctrl);
1807 if (!ret)
1808 flush_work(&ctrl->delete_work);
1809 nvme_put_ctrl(&ctrl->ctrl);
1810 return ret;
1811}
1812
1813static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1814{
1815 struct nvme_rdma_ctrl *ctrl =
1816 container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
1817 int ret;
1818 bool changed;
1819
1820 nvme_stop_ctrl(&ctrl->ctrl);

--- 5 unchanged lines hidden (view full) ---

1826
1827 if (ctrl->ctrl.queue_count > 1) {
1828 ret = nvme_rdma_configure_io_queues(ctrl, false);
1829 if (ret)
1830 goto out_fail;
1831 }
1832
1833 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
1772static void nvme_rdma_reset_ctrl_work(struct work_struct *work)
1773{
1774 struct nvme_rdma_ctrl *ctrl =
1775 container_of(work, struct nvme_rdma_ctrl, ctrl.reset_work);
1776 int ret;
1777 bool changed;
1778
1779 nvme_stop_ctrl(&ctrl->ctrl);

--- 5 unchanged lines hidden (view full) ---

1785
1786 if (ctrl->ctrl.queue_count > 1) {
1787 ret = nvme_rdma_configure_io_queues(ctrl, false);
1788 if (ret)
1789 goto out_fail;
1790 }
1791
1792 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
1834 WARN_ON_ONCE(!changed);
1793 if (!changed) {
1794 /* state change failure is ok if we're in DELETING state */
1795 WARN_ON_ONCE(ctrl->ctrl.state != NVME_CTRL_DELETING);
1796 return;
1797 }
1835
1836 nvme_start_ctrl(&ctrl->ctrl);
1837
1838 return;
1839
1840out_fail:
1841 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
1842 nvme_rdma_remove_ctrl(ctrl);
1843}
1844
1845static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1846 .name = "rdma",
1847 .module = THIS_MODULE,
1848 .flags = NVME_F_FABRICS,
1849 .reg_read32 = nvmf_reg_read32,
1850 .reg_read64 = nvmf_reg_read64,
1851 .reg_write32 = nvmf_reg_write32,
1852 .free_ctrl = nvme_rdma_free_ctrl,
1853 .submit_async_event = nvme_rdma_submit_async_event,
1798
1799 nvme_start_ctrl(&ctrl->ctrl);
1800
1801 return;
1802
1803out_fail:
1804 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
1805 nvme_rdma_remove_ctrl(ctrl);
1806}
1807
1808static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1809 .name = "rdma",
1810 .module = THIS_MODULE,
1811 .flags = NVME_F_FABRICS,
1812 .reg_read32 = nvmf_reg_read32,
1813 .reg_read64 = nvmf_reg_read64,
1814 .reg_write32 = nvmf_reg_write32,
1815 .free_ctrl = nvme_rdma_free_ctrl,
1816 .submit_async_event = nvme_rdma_submit_async_event,
1854 .delete_ctrl = nvme_rdma_del_ctrl,
1817 .delete_ctrl = nvme_rdma_delete_ctrl,
1855 .get_address = nvmf_get_address,
1818 .get_address = nvmf_get_address,
1819 .reinit_request = nvme_rdma_reinit_request,
1856};
1857
1820};
1821
1822static inline bool
1823__nvme_rdma_options_match(struct nvme_rdma_ctrl *ctrl,
1824 struct nvmf_ctrl_options *opts)
1825{
1826 char *stdport = __stringify(NVME_RDMA_IP_PORT);
1827
1828
1829 if (!nvmf_ctlr_matches_baseopts(&ctrl->ctrl, opts) ||
1830 strcmp(opts->traddr, ctrl->ctrl.opts->traddr))
1831 return false;
1832
1833 if (opts->mask & NVMF_OPT_TRSVCID &&
1834 ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
1835 if (strcmp(opts->trsvcid, ctrl->ctrl.opts->trsvcid))
1836 return false;
1837 } else if (opts->mask & NVMF_OPT_TRSVCID) {
1838 if (strcmp(opts->trsvcid, stdport))
1839 return false;
1840 } else if (ctrl->ctrl.opts->mask & NVMF_OPT_TRSVCID) {
1841 if (strcmp(stdport, ctrl->ctrl.opts->trsvcid))
1842 return false;
1843 }
1844 /* else, it's a match as both have stdport. Fall to next checks */
1845
1846 /*
1847 * checking the local address is rough. In most cases, one
1848 * is not specified and the host port is selected by the stack.
1849 *
1850 * Assume no match if:
1851 * local address is specified and address is not the same
1852 * local address is not specified but remote is, or vice versa
1853 * (admin using specific host_traddr when it matters).
1854 */
1855 if (opts->mask & NVMF_OPT_HOST_TRADDR &&
1856 ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR) {
1857 if (strcmp(opts->host_traddr, ctrl->ctrl.opts->host_traddr))
1858 return false;
1859 } else if (opts->mask & NVMF_OPT_HOST_TRADDR ||
1860 ctrl->ctrl.opts->mask & NVMF_OPT_HOST_TRADDR)
1861 return false;
1862 /*
1863 * if neither controller had an host port specified, assume it's
1864 * a match as everything else matched.
1865 */
1866
1867 return true;
1868}
1869
1870/*
1871 * Fails a connection request if it matches an existing controller
1872 * (association) with the same tuple:
1873 * <Host NQN, Host ID, local address, remote address, remote port, SUBSYS NQN>
1874 *
1875 * if local address is not specified in the request, it will match an
1876 * existing controller with all the other parameters the same and no
1877 * local port address specified as well.
1878 *
1879 * The ports don't need to be compared as they are intrinsically
1880 * already matched by the port pointers supplied.
1881 */
1882static bool
1883nvme_rdma_existing_controller(struct nvmf_ctrl_options *opts)
1884{
1885 struct nvme_rdma_ctrl *ctrl;
1886 bool found = false;
1887
1888 mutex_lock(&nvme_rdma_ctrl_mutex);
1889 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
1890 found = __nvme_rdma_options_match(ctrl, opts);
1891 if (found)
1892 break;
1893 }
1894 mutex_unlock(&nvme_rdma_ctrl_mutex);
1895
1896 return found;
1897}
1898
1858static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1859 struct nvmf_ctrl_options *opts)
1860{
1861 struct nvme_rdma_ctrl *ctrl;
1862 int ret;
1863 bool changed;
1864 char *port;
1865

--- 20 unchanged lines hidden (view full) ---

1886 opts->host_traddr, NULL, &ctrl->src_addr);
1887 if (ret) {
1888 pr_err("malformed src address passed: %s\n",
1889 opts->host_traddr);
1890 goto out_free_ctrl;
1891 }
1892 }
1893
1899static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev,
1900 struct nvmf_ctrl_options *opts)
1901{
1902 struct nvme_rdma_ctrl *ctrl;
1903 int ret;
1904 bool changed;
1905 char *port;
1906

--- 20 unchanged lines hidden (view full) ---

1927 opts->host_traddr, NULL, &ctrl->src_addr);
1928 if (ret) {
1929 pr_err("malformed src address passed: %s\n",
1930 opts->host_traddr);
1931 goto out_free_ctrl;
1932 }
1933 }
1934
1935 if (!opts->duplicate_connect && nvme_rdma_existing_controller(opts)) {
1936 ret = -EALREADY;
1937 goto out_free_ctrl;
1938 }
1939
1894 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
1895 0 /* no quirks, we're perfect! */);
1896 if (ret)
1897 goto out_free_ctrl;
1898
1899 INIT_DELAYED_WORK(&ctrl->reconnect_work,
1900 nvme_rdma_reconnect_ctrl_work);
1901 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
1940 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_rdma_ctrl_ops,
1941 0 /* no quirks, we're perfect! */);
1942 if (ret)
1943 goto out_free_ctrl;
1944
1945 INIT_DELAYED_WORK(&ctrl->reconnect_work,
1946 nvme_rdma_reconnect_ctrl_work);
1947 INIT_WORK(&ctrl->err_work, nvme_rdma_error_recovery_work);
1902 INIT_WORK(&ctrl->delete_work, nvme_rdma_del_ctrl_work);
1903 INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
1904
1905 ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
1906 ctrl->ctrl.sqsize = opts->queue_size - 1;
1907 ctrl->ctrl.kato = opts->kato;
1908
1909 ret = -ENOMEM;
1910 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),

--- 42 unchanged lines hidden (view full) ---

1953 }
1954
1955 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
1956 WARN_ON_ONCE(!changed);
1957
1958 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
1959 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
1960
1948 INIT_WORK(&ctrl->ctrl.reset_work, nvme_rdma_reset_ctrl_work);
1949
1950 ctrl->ctrl.queue_count = opts->nr_io_queues + 1; /* +1 for admin queue */
1951 ctrl->ctrl.sqsize = opts->queue_size - 1;
1952 ctrl->ctrl.kato = opts->kato;
1953
1954 ret = -ENOMEM;
1955 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, sizeof(*ctrl->queues),

--- 42 unchanged lines hidden (view full) ---

1998 }
1999
2000 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
2001 WARN_ON_ONCE(!changed);
2002
2003 dev_info(ctrl->ctrl.device, "new ctrl: NQN \"%s\", addr %pISpcs\n",
2004 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2005
1961 kref_get(&ctrl->ctrl.kref);
2006 nvme_get_ctrl(&ctrl->ctrl);
1962
1963 mutex_lock(&nvme_rdma_ctrl_mutex);
1964 list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
1965 mutex_unlock(&nvme_rdma_ctrl_mutex);
1966
1967 nvme_start_ctrl(&ctrl->ctrl);
1968
1969 return &ctrl->ctrl;

--- 28 unchanged lines hidden (view full) ---

1998 /* Delete all controllers using this device */
1999 mutex_lock(&nvme_rdma_ctrl_mutex);
2000 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
2001 if (ctrl->device->dev != ib_device)
2002 continue;
2003 dev_info(ctrl->ctrl.device,
2004 "Removing ctrl: NQN \"%s\", addr %pISp\n",
2005 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2007
2008 mutex_lock(&nvme_rdma_ctrl_mutex);
2009 list_add_tail(&ctrl->list, &nvme_rdma_ctrl_list);
2010 mutex_unlock(&nvme_rdma_ctrl_mutex);
2011
2012 nvme_start_ctrl(&ctrl->ctrl);
2013
2014 return &ctrl->ctrl;

--- 28 unchanged lines hidden (view full) ---

2043 /* Delete all controllers using this device */
2044 mutex_lock(&nvme_rdma_ctrl_mutex);
2045 list_for_each_entry(ctrl, &nvme_rdma_ctrl_list, list) {
2046 if (ctrl->device->dev != ib_device)
2047 continue;
2048 dev_info(ctrl->ctrl.device,
2049 "Removing ctrl: NQN \"%s\", addr %pISp\n",
2050 ctrl->ctrl.opts->subsysnqn, &ctrl->addr);
2006 __nvme_rdma_del_ctrl(ctrl);
2051 nvme_delete_ctrl(&ctrl->ctrl);
2007 }
2008 mutex_unlock(&nvme_rdma_ctrl_mutex);
2009
2010 flush_workqueue(nvme_wq);
2011}
2012
2013static struct ib_client nvme_rdma_ib_client = {
2014 .name = "nvme_rdma",

--- 32 unchanged lines hidden ---
2052 }
2053 mutex_unlock(&nvme_rdma_ctrl_mutex);
2054
2055 flush_workqueue(nvme_wq);
2056}
2057
2058static struct ib_client nvme_rdma_ib_client = {
2059 .name = "nvme_rdma",

--- 32 unchanged lines hidden ---