19cb83748SJack Wang // SPDX-License-Identifier: GPL-2.0-or-later 29cb83748SJack Wang /* 39cb83748SJack Wang * RDMA Transport Layer 49cb83748SJack Wang * 59cb83748SJack Wang * Copyright (c) 2014 - 2018 ProfitBricks GmbH. All rights reserved. 69cb83748SJack Wang * Copyright (c) 2018 - 2019 1&1 IONOS Cloud GmbH. All rights reserved. 79cb83748SJack Wang * Copyright (c) 2019 - 2020 1&1 IONOS SE. All rights reserved. 89cb83748SJack Wang */ 99cb83748SJack Wang 109cb83748SJack Wang #undef pr_fmt 119cb83748SJack Wang #define pr_fmt(fmt) KBUILD_MODNAME " L" __stringify(__LINE__) ": " fmt 129cb83748SJack Wang 139cb83748SJack Wang #include <linux/module.h> 149cb83748SJack Wang #include <linux/mempool.h> 159cb83748SJack Wang 169cb83748SJack Wang #include "rtrs-srv.h" 179cb83748SJack Wang #include "rtrs-log.h" 188094ba0aSLeon Romanovsky #include <rdma/ib_cm.h> 19558d52b2SMd Haris Iqbal #include <rdma/ib_verbs.h> 209cb83748SJack Wang 219cb83748SJack Wang MODULE_DESCRIPTION("RDMA Transport Server"); 229cb83748SJack Wang MODULE_LICENSE("GPL"); 239cb83748SJack Wang 249cb83748SJack Wang /* Must be power of 2, see mask from mr->page_size in ib_sg_to_pages() */ 259cb83748SJack Wang #define DEFAULT_MAX_CHUNK_SIZE (128 << 10) 269cb83748SJack Wang #define DEFAULT_SESS_QUEUE_DEPTH 512 279cb83748SJack Wang #define MAX_HDR_SIZE PAGE_SIZE 289cb83748SJack Wang 299cb83748SJack Wang /* We guarantee to serve 10 paths at least */ 309cb83748SJack Wang #define CHUNK_POOL_SZ 10 319cb83748SJack Wang 329cb83748SJack Wang static struct rtrs_rdma_dev_pd dev_pd; 339cb83748SJack Wang static mempool_t *chunk_pool; 349cb83748SJack Wang struct class *rtrs_dev_class; 35558d52b2SMd Haris Iqbal static struct rtrs_srv_ib_ctx ib_ctx; 369cb83748SJack Wang 379cb83748SJack Wang static int __read_mostly max_chunk_size = DEFAULT_MAX_CHUNK_SIZE; 389cb83748SJack Wang static int __read_mostly sess_queue_depth = DEFAULT_SESS_QUEUE_DEPTH; 399cb83748SJack Wang 409cb83748SJack Wang static bool always_invalidate = true; 419cb83748SJack Wang module_param(always_invalidate, bool, 0444); 429cb83748SJack Wang MODULE_PARM_DESC(always_invalidate, 439cb83748SJack Wang "Invalidate memory registration for contiguous memory regions before accessing."); 449cb83748SJack Wang 459cb83748SJack Wang module_param_named(max_chunk_size, max_chunk_size, int, 0444); 469cb83748SJack Wang MODULE_PARM_DESC(max_chunk_size, 479cb83748SJack Wang "Max size for each IO request, when change the unit is in byte (default: " 489cb83748SJack Wang __stringify(DEFAULT_MAX_CHUNK_SIZE) "KB)"); 499cb83748SJack Wang 509cb83748SJack Wang module_param_named(sess_queue_depth, sess_queue_depth, int, 0444); 519cb83748SJack Wang MODULE_PARM_DESC(sess_queue_depth, 529cb83748SJack Wang "Number of buffers for pending I/O requests to allocate per session. Maximum: " 539cb83748SJack Wang __stringify(MAX_SESS_QUEUE_DEPTH) " (default: " 549cb83748SJack Wang __stringify(DEFAULT_SESS_QUEUE_DEPTH) ")"); 559cb83748SJack Wang 569cb83748SJack Wang static cpumask_t cq_affinity_mask = { CPU_BITS_ALL }; 579cb83748SJack Wang 589cb83748SJack Wang static struct workqueue_struct *rtrs_wq; 599cb83748SJack Wang 609cb83748SJack Wang static inline struct rtrs_srv_con *to_srv_con(struct rtrs_con *c) 619cb83748SJack Wang { 629cb83748SJack Wang return container_of(c, struct rtrs_srv_con, c); 639cb83748SJack Wang } 649cb83748SJack Wang 659cb83748SJack Wang static inline struct rtrs_srv_sess *to_srv_sess(struct rtrs_sess *s) 669cb83748SJack Wang { 679cb83748SJack Wang return container_of(s, struct rtrs_srv_sess, s); 689cb83748SJack Wang } 699cb83748SJack Wang 700aedfb69SGuoqing Jiang static bool rtrs_srv_change_state(struct rtrs_srv_sess *sess, 719cb83748SJack Wang enum rtrs_srv_state new_state) 729cb83748SJack Wang { 739cb83748SJack Wang enum rtrs_srv_state old_state; 749cb83748SJack Wang bool changed = false; 759cb83748SJack Wang 760aedfb69SGuoqing Jiang spin_lock_irq(&sess->state_lock); 779cb83748SJack Wang old_state = sess->state; 789cb83748SJack Wang switch (new_state) { 799cb83748SJack Wang case RTRS_SRV_CONNECTED: 80485f2fb1SGuoqing Jiang if (old_state == RTRS_SRV_CONNECTING) 819cb83748SJack Wang changed = true; 829cb83748SJack Wang break; 839cb83748SJack Wang case RTRS_SRV_CLOSING: 84485f2fb1SGuoqing Jiang if (old_state == RTRS_SRV_CONNECTING || 85485f2fb1SGuoqing Jiang old_state == RTRS_SRV_CONNECTED) 869cb83748SJack Wang changed = true; 879cb83748SJack Wang break; 889cb83748SJack Wang case RTRS_SRV_CLOSED: 89485f2fb1SGuoqing Jiang if (old_state == RTRS_SRV_CLOSING) 909cb83748SJack Wang changed = true; 919cb83748SJack Wang break; 929cb83748SJack Wang default: 939cb83748SJack Wang break; 949cb83748SJack Wang } 959cb83748SJack Wang if (changed) 969cb83748SJack Wang sess->state = new_state; 979cb83748SJack Wang spin_unlock_irq(&sess->state_lock); 989cb83748SJack Wang 999cb83748SJack Wang return changed; 1009cb83748SJack Wang } 1019cb83748SJack Wang 1029cb83748SJack Wang static void free_id(struct rtrs_srv_op *id) 1039cb83748SJack Wang { 1049cb83748SJack Wang if (!id) 1059cb83748SJack Wang return; 1069cb83748SJack Wang kfree(id); 1079cb83748SJack Wang } 1089cb83748SJack Wang 1099cb83748SJack Wang static void rtrs_srv_free_ops_ids(struct rtrs_srv_sess *sess) 1109cb83748SJack Wang { 1119cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 1129cb83748SJack Wang int i; 1139cb83748SJack Wang 1149cb83748SJack Wang if (sess->ops_ids) { 1159cb83748SJack Wang for (i = 0; i < srv->queue_depth; i++) 1169cb83748SJack Wang free_id(sess->ops_ids[i]); 1179cb83748SJack Wang kfree(sess->ops_ids); 1189cb83748SJack Wang sess->ops_ids = NULL; 1199cb83748SJack Wang } 1209cb83748SJack Wang } 1219cb83748SJack Wang 1229cb83748SJack Wang static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc); 1239cb83748SJack Wang 1249cb83748SJack Wang static struct ib_cqe io_comp_cqe = { 1259cb83748SJack Wang .done = rtrs_srv_rdma_done 1269cb83748SJack Wang }; 1279cb83748SJack Wang 1280cdfb3b2SMd Haris Iqbal static inline void rtrs_srv_inflight_ref_release(struct percpu_ref *ref) 1290cdfb3b2SMd Haris Iqbal { 1300cdfb3b2SMd Haris Iqbal struct rtrs_srv_sess *sess = container_of(ref, struct rtrs_srv_sess, ids_inflight_ref); 1310cdfb3b2SMd Haris Iqbal 1320cdfb3b2SMd Haris Iqbal percpu_ref_exit(&sess->ids_inflight_ref); 1330cdfb3b2SMd Haris Iqbal complete(&sess->complete_done); 1340cdfb3b2SMd Haris Iqbal } 1350cdfb3b2SMd Haris Iqbal 1369cb83748SJack Wang static int rtrs_srv_alloc_ops_ids(struct rtrs_srv_sess *sess) 1379cb83748SJack Wang { 1389cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 1399cb83748SJack Wang struct rtrs_srv_op *id; 1400cdfb3b2SMd Haris Iqbal int i, ret; 1419cb83748SJack Wang 1429cb83748SJack Wang sess->ops_ids = kcalloc(srv->queue_depth, sizeof(*sess->ops_ids), 1439cb83748SJack Wang GFP_KERNEL); 1449cb83748SJack Wang if (!sess->ops_ids) 1459cb83748SJack Wang goto err; 1469cb83748SJack Wang 1479cb83748SJack Wang for (i = 0; i < srv->queue_depth; ++i) { 1489cb83748SJack Wang id = kzalloc(sizeof(*id), GFP_KERNEL); 1499cb83748SJack Wang if (!id) 1509cb83748SJack Wang goto err; 1519cb83748SJack Wang 1529cb83748SJack Wang sess->ops_ids[i] = id; 1539cb83748SJack Wang } 1540cdfb3b2SMd Haris Iqbal 1550cdfb3b2SMd Haris Iqbal ret = percpu_ref_init(&sess->ids_inflight_ref, 1560cdfb3b2SMd Haris Iqbal rtrs_srv_inflight_ref_release, 0, GFP_KERNEL); 1570cdfb3b2SMd Haris Iqbal if (ret) { 1580cdfb3b2SMd Haris Iqbal pr_err("Percpu reference init failed\n"); 1590cdfb3b2SMd Haris Iqbal goto err; 1600cdfb3b2SMd Haris Iqbal } 1610cdfb3b2SMd Haris Iqbal init_completion(&sess->complete_done); 1629cb83748SJack Wang 1639cb83748SJack Wang return 0; 1649cb83748SJack Wang 1659cb83748SJack Wang err: 1669cb83748SJack Wang rtrs_srv_free_ops_ids(sess); 1679cb83748SJack Wang return -ENOMEM; 1689cb83748SJack Wang } 1699cb83748SJack Wang 1709cb83748SJack Wang static inline void rtrs_srv_get_ops_ids(struct rtrs_srv_sess *sess) 1719cb83748SJack Wang { 1720cdfb3b2SMd Haris Iqbal percpu_ref_get(&sess->ids_inflight_ref); 1739cb83748SJack Wang } 1749cb83748SJack Wang 1759cb83748SJack Wang static inline void rtrs_srv_put_ops_ids(struct rtrs_srv_sess *sess) 1769cb83748SJack Wang { 1770cdfb3b2SMd Haris Iqbal percpu_ref_put(&sess->ids_inflight_ref); 1789cb83748SJack Wang } 1799cb83748SJack Wang 1809cb83748SJack Wang static void rtrs_srv_reg_mr_done(struct ib_cq *cq, struct ib_wc *wc) 1819cb83748SJack Wang { 1823b89e92cSJack Wang struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); 1839cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 1849cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 1859cb83748SJack Wang 1869cb83748SJack Wang if (unlikely(wc->status != IB_WC_SUCCESS)) { 1879cb83748SJack Wang rtrs_err(s, "REG MR failed: %s\n", 1889cb83748SJack Wang ib_wc_status_msg(wc->status)); 1899cb83748SJack Wang close_sess(sess); 1909cb83748SJack Wang return; 1919cb83748SJack Wang } 1929cb83748SJack Wang } 1939cb83748SJack Wang 1949cb83748SJack Wang static struct ib_cqe local_reg_cqe = { 1959cb83748SJack Wang .done = rtrs_srv_reg_mr_done 1969cb83748SJack Wang }; 1979cb83748SJack Wang 1989cb83748SJack Wang static int rdma_write_sg(struct rtrs_srv_op *id) 1999cb83748SJack Wang { 2009cb83748SJack Wang struct rtrs_sess *s = id->con->c.sess; 2019cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 2029cb83748SJack Wang dma_addr_t dma_addr = sess->dma_addr[id->msg_id]; 2039cb83748SJack Wang struct rtrs_srv_mr *srv_mr; 2049cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 205e6daa8f6SJack Wang struct ib_send_wr inv_wr; 206e6daa8f6SJack Wang struct ib_rdma_wr imm_wr; 2079cb83748SJack Wang struct ib_rdma_wr *wr = NULL; 2089cb83748SJack Wang enum ib_send_flags flags; 2099cb83748SJack Wang size_t sg_cnt; 2109cb83748SJack Wang int err, offset; 2119cb83748SJack Wang bool need_inval; 2129cb83748SJack Wang u32 rkey = 0; 2139cb83748SJack Wang struct ib_reg_wr rwr; 2149cb83748SJack Wang struct ib_sge *plist; 2159cb83748SJack Wang struct ib_sge list; 2169cb83748SJack Wang 2179cb83748SJack Wang sg_cnt = le16_to_cpu(id->rd_msg->sg_cnt); 2189cb83748SJack Wang need_inval = le16_to_cpu(id->rd_msg->flags) & RTRS_MSG_NEED_INVAL_F; 2199cb83748SJack Wang if (unlikely(sg_cnt != 1)) 2209cb83748SJack Wang return -EINVAL; 2219cb83748SJack Wang 2229cb83748SJack Wang offset = 0; 2239cb83748SJack Wang 2249cb83748SJack Wang wr = &id->tx_wr; 2259cb83748SJack Wang plist = &id->tx_sg; 2269cb83748SJack Wang plist->addr = dma_addr + offset; 2279cb83748SJack Wang plist->length = le32_to_cpu(id->rd_msg->desc[0].len); 2289cb83748SJack Wang 2299cb83748SJack Wang /* WR will fail with length error 2309cb83748SJack Wang * if this is 0 2319cb83748SJack Wang */ 2329cb83748SJack Wang if (unlikely(plist->length == 0)) { 2339cb83748SJack Wang rtrs_err(s, "Invalid RDMA-Write sg list length 0\n"); 2349cb83748SJack Wang return -EINVAL; 2359cb83748SJack Wang } 2369cb83748SJack Wang 2379cb83748SJack Wang plist->lkey = sess->s.dev->ib_pd->local_dma_lkey; 2389cb83748SJack Wang offset += plist->length; 2399cb83748SJack Wang 2409cb83748SJack Wang wr->wr.sg_list = plist; 2419cb83748SJack Wang wr->wr.num_sge = 1; 2429cb83748SJack Wang wr->remote_addr = le64_to_cpu(id->rd_msg->desc[0].addr); 2439cb83748SJack Wang wr->rkey = le32_to_cpu(id->rd_msg->desc[0].key); 2449cb83748SJack Wang if (rkey == 0) 2459cb83748SJack Wang rkey = wr->rkey; 2469cb83748SJack Wang else 2479cb83748SJack Wang /* Only one key is actually used */ 2489cb83748SJack Wang WARN_ON_ONCE(rkey != wr->rkey); 2499cb83748SJack Wang 2509cb83748SJack Wang wr->wr.opcode = IB_WR_RDMA_WRITE; 2518537f2deSJack Wang wr->wr.wr_cqe = &io_comp_cqe; 2529cb83748SJack Wang wr->wr.ex.imm_data = 0; 2539cb83748SJack Wang wr->wr.send_flags = 0; 2549cb83748SJack Wang 2559cb83748SJack Wang if (need_inval && always_invalidate) { 2569cb83748SJack Wang wr->wr.next = &rwr.wr; 2579cb83748SJack Wang rwr.wr.next = &inv_wr; 258e6daa8f6SJack Wang inv_wr.next = &imm_wr.wr; 2599cb83748SJack Wang } else if (always_invalidate) { 2609cb83748SJack Wang wr->wr.next = &rwr.wr; 261e6daa8f6SJack Wang rwr.wr.next = &imm_wr.wr; 2629cb83748SJack Wang } else if (need_inval) { 2639cb83748SJack Wang wr->wr.next = &inv_wr; 264e6daa8f6SJack Wang inv_wr.next = &imm_wr.wr; 2659cb83748SJack Wang } else { 266e6daa8f6SJack Wang wr->wr.next = &imm_wr.wr; 2679cb83748SJack Wang } 2689cb83748SJack Wang /* 2699cb83748SJack Wang * From time to time we have to post signaled sends, 2709cb83748SJack Wang * or send queue will fill up and only QP reset can help. 2719cb83748SJack Wang */ 2729cb83748SJack Wang flags = (atomic_inc_return(&id->con->wr_cnt) % srv->queue_depth) ? 2739cb83748SJack Wang 0 : IB_SEND_SIGNALED; 2749cb83748SJack Wang 2759cb83748SJack Wang if (need_inval) { 2769cb83748SJack Wang inv_wr.sg_list = NULL; 2779cb83748SJack Wang inv_wr.num_sge = 0; 2789cb83748SJack Wang inv_wr.opcode = IB_WR_SEND_WITH_INV; 2798537f2deSJack Wang inv_wr.wr_cqe = &io_comp_cqe; 2809cb83748SJack Wang inv_wr.send_flags = 0; 2819cb83748SJack Wang inv_wr.ex.invalidate_rkey = rkey; 2829cb83748SJack Wang } 2839cb83748SJack Wang 284e6daa8f6SJack Wang imm_wr.wr.next = NULL; 2859cb83748SJack Wang if (always_invalidate) { 2869cb83748SJack Wang struct rtrs_msg_rkey_rsp *msg; 2879cb83748SJack Wang 2889cb83748SJack Wang srv_mr = &sess->mrs[id->msg_id]; 2899cb83748SJack Wang rwr.wr.opcode = IB_WR_REG_MR; 2908537f2deSJack Wang rwr.wr.wr_cqe = &local_reg_cqe; 2919cb83748SJack Wang rwr.wr.num_sge = 0; 2929cb83748SJack Wang rwr.mr = srv_mr->mr; 2939cb83748SJack Wang rwr.wr.send_flags = 0; 2949cb83748SJack Wang rwr.key = srv_mr->mr->rkey; 2959cb83748SJack Wang rwr.access = (IB_ACCESS_LOCAL_WRITE | 2969cb83748SJack Wang IB_ACCESS_REMOTE_WRITE); 2979cb83748SJack Wang msg = srv_mr->iu->buf; 2989cb83748SJack Wang msg->buf_id = cpu_to_le16(id->msg_id); 2999cb83748SJack Wang msg->type = cpu_to_le16(RTRS_MSG_RKEY_RSP); 3009cb83748SJack Wang msg->rkey = cpu_to_le32(srv_mr->mr->rkey); 3019cb83748SJack Wang 3029cb83748SJack Wang list.addr = srv_mr->iu->dma_addr; 3039cb83748SJack Wang list.length = sizeof(*msg); 3049cb83748SJack Wang list.lkey = sess->s.dev->ib_pd->local_dma_lkey; 305e6daa8f6SJack Wang imm_wr.wr.sg_list = &list; 306e6daa8f6SJack Wang imm_wr.wr.num_sge = 1; 307e6daa8f6SJack Wang imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; 3089cb83748SJack Wang ib_dma_sync_single_for_device(sess->s.dev->ib_dev, 3099cb83748SJack Wang srv_mr->iu->dma_addr, 3109cb83748SJack Wang srv_mr->iu->size, DMA_TO_DEVICE); 3119cb83748SJack Wang } else { 312e6daa8f6SJack Wang imm_wr.wr.sg_list = NULL; 313e6daa8f6SJack Wang imm_wr.wr.num_sge = 0; 314e6daa8f6SJack Wang imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; 3159cb83748SJack Wang } 316e6daa8f6SJack Wang imm_wr.wr.send_flags = flags; 317e6daa8f6SJack Wang imm_wr.wr.ex.imm_data = cpu_to_be32(rtrs_to_io_rsp_imm(id->msg_id, 3189cb83748SJack Wang 0, need_inval)); 3199cb83748SJack Wang 320e6daa8f6SJack Wang imm_wr.wr.wr_cqe = &io_comp_cqe; 3219cb83748SJack Wang ib_dma_sync_single_for_device(sess->s.dev->ib_dev, dma_addr, 3229cb83748SJack Wang offset, DMA_BIDIRECTIONAL); 3239cb83748SJack Wang 3249cb83748SJack Wang err = ib_post_send(id->con->c.qp, &id->tx_wr.wr, NULL); 3259cb83748SJack Wang if (unlikely(err)) 3269cb83748SJack Wang rtrs_err(s, 3279cb83748SJack Wang "Posting RDMA-Write-Request to QP failed, err: %d\n", 3289cb83748SJack Wang err); 3299cb83748SJack Wang 3309cb83748SJack Wang return err; 3319cb83748SJack Wang } 3329cb83748SJack Wang 3339cb83748SJack Wang /** 3349cb83748SJack Wang * send_io_resp_imm() - respond to client with empty IMM on failed READ/WRITE 3359cb83748SJack Wang * requests or on successful WRITE request. 3369cb83748SJack Wang * @con: the connection to send back result 3379cb83748SJack Wang * @id: the id associated with the IO 3389cb83748SJack Wang * @errno: the error number of the IO. 3399cb83748SJack Wang * 3409cb83748SJack Wang * Return 0 on success, errno otherwise. 3419cb83748SJack Wang */ 3429cb83748SJack Wang static int send_io_resp_imm(struct rtrs_srv_con *con, struct rtrs_srv_op *id, 3439cb83748SJack Wang int errno) 3449cb83748SJack Wang { 3459cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 3469cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 347e6daa8f6SJack Wang struct ib_send_wr inv_wr, *wr = NULL; 348e6daa8f6SJack Wang struct ib_rdma_wr imm_wr; 3499cb83748SJack Wang struct ib_reg_wr rwr; 3509cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 3519cb83748SJack Wang struct rtrs_srv_mr *srv_mr; 3529cb83748SJack Wang bool need_inval = false; 3539cb83748SJack Wang enum ib_send_flags flags; 3549cb83748SJack Wang u32 imm; 3559cb83748SJack Wang int err; 3569cb83748SJack Wang 3579cb83748SJack Wang if (id->dir == READ) { 3589cb83748SJack Wang struct rtrs_msg_rdma_read *rd_msg = id->rd_msg; 3599cb83748SJack Wang size_t sg_cnt; 3609cb83748SJack Wang 3619cb83748SJack Wang need_inval = le16_to_cpu(rd_msg->flags) & 3629cb83748SJack Wang RTRS_MSG_NEED_INVAL_F; 3639cb83748SJack Wang sg_cnt = le16_to_cpu(rd_msg->sg_cnt); 3649cb83748SJack Wang 3659cb83748SJack Wang if (need_inval) { 3669cb83748SJack Wang if (likely(sg_cnt)) { 3678537f2deSJack Wang inv_wr.wr_cqe = &io_comp_cqe; 3689cb83748SJack Wang inv_wr.sg_list = NULL; 3699cb83748SJack Wang inv_wr.num_sge = 0; 3709cb83748SJack Wang inv_wr.opcode = IB_WR_SEND_WITH_INV; 3719cb83748SJack Wang inv_wr.send_flags = 0; 3729cb83748SJack Wang /* Only one key is actually used */ 3739cb83748SJack Wang inv_wr.ex.invalidate_rkey = 3749cb83748SJack Wang le32_to_cpu(rd_msg->desc[0].key); 3759cb83748SJack Wang } else { 3769cb83748SJack Wang WARN_ON_ONCE(1); 3779cb83748SJack Wang need_inval = false; 3789cb83748SJack Wang } 3799cb83748SJack Wang } 3809cb83748SJack Wang } 3819cb83748SJack Wang 3829cb83748SJack Wang if (need_inval && always_invalidate) { 3839cb83748SJack Wang wr = &inv_wr; 3849cb83748SJack Wang inv_wr.next = &rwr.wr; 385e6daa8f6SJack Wang rwr.wr.next = &imm_wr.wr; 3869cb83748SJack Wang } else if (always_invalidate) { 3879cb83748SJack Wang wr = &rwr.wr; 388e6daa8f6SJack Wang rwr.wr.next = &imm_wr.wr; 3899cb83748SJack Wang } else if (need_inval) { 3909cb83748SJack Wang wr = &inv_wr; 391e6daa8f6SJack Wang inv_wr.next = &imm_wr.wr; 3929cb83748SJack Wang } else { 393e6daa8f6SJack Wang wr = &imm_wr.wr; 3949cb83748SJack Wang } 3959cb83748SJack Wang /* 3969cb83748SJack Wang * From time to time we have to post signalled sends, 3979cb83748SJack Wang * or send queue will fill up and only QP reset can help. 3989cb83748SJack Wang */ 3999cb83748SJack Wang flags = (atomic_inc_return(&con->wr_cnt) % srv->queue_depth) ? 4009cb83748SJack Wang 0 : IB_SEND_SIGNALED; 4019cb83748SJack Wang imm = rtrs_to_io_rsp_imm(id->msg_id, errno, need_inval); 402e6daa8f6SJack Wang imm_wr.wr.next = NULL; 4039cb83748SJack Wang if (always_invalidate) { 4049cb83748SJack Wang struct ib_sge list; 4059cb83748SJack Wang struct rtrs_msg_rkey_rsp *msg; 4069cb83748SJack Wang 4079cb83748SJack Wang srv_mr = &sess->mrs[id->msg_id]; 408e6daa8f6SJack Wang rwr.wr.next = &imm_wr.wr; 4099cb83748SJack Wang rwr.wr.opcode = IB_WR_REG_MR; 4108537f2deSJack Wang rwr.wr.wr_cqe = &local_reg_cqe; 4119cb83748SJack Wang rwr.wr.num_sge = 0; 4129cb83748SJack Wang rwr.wr.send_flags = 0; 4139cb83748SJack Wang rwr.mr = srv_mr->mr; 4149cb83748SJack Wang rwr.key = srv_mr->mr->rkey; 4159cb83748SJack Wang rwr.access = (IB_ACCESS_LOCAL_WRITE | 4169cb83748SJack Wang IB_ACCESS_REMOTE_WRITE); 4179cb83748SJack Wang msg = srv_mr->iu->buf; 4189cb83748SJack Wang msg->buf_id = cpu_to_le16(id->msg_id); 4199cb83748SJack Wang msg->type = cpu_to_le16(RTRS_MSG_RKEY_RSP); 4209cb83748SJack Wang msg->rkey = cpu_to_le32(srv_mr->mr->rkey); 4219cb83748SJack Wang 4229cb83748SJack Wang list.addr = srv_mr->iu->dma_addr; 4239cb83748SJack Wang list.length = sizeof(*msg); 4249cb83748SJack Wang list.lkey = sess->s.dev->ib_pd->local_dma_lkey; 425e6daa8f6SJack Wang imm_wr.wr.sg_list = &list; 426e6daa8f6SJack Wang imm_wr.wr.num_sge = 1; 427e6daa8f6SJack Wang imm_wr.wr.opcode = IB_WR_SEND_WITH_IMM; 4289cb83748SJack Wang ib_dma_sync_single_for_device(sess->s.dev->ib_dev, 4299cb83748SJack Wang srv_mr->iu->dma_addr, 4309cb83748SJack Wang srv_mr->iu->size, DMA_TO_DEVICE); 4319cb83748SJack Wang } else { 432e6daa8f6SJack Wang imm_wr.wr.sg_list = NULL; 433e6daa8f6SJack Wang imm_wr.wr.num_sge = 0; 434e6daa8f6SJack Wang imm_wr.wr.opcode = IB_WR_RDMA_WRITE_WITH_IMM; 4359cb83748SJack Wang } 436e6daa8f6SJack Wang imm_wr.wr.send_flags = flags; 437e6daa8f6SJack Wang imm_wr.wr.wr_cqe = &io_comp_cqe; 4389cb83748SJack Wang 439e6daa8f6SJack Wang imm_wr.wr.ex.imm_data = cpu_to_be32(imm); 4409cb83748SJack Wang 4419cb83748SJack Wang err = ib_post_send(id->con->c.qp, wr, NULL); 4429cb83748SJack Wang if (unlikely(err)) 4439cb83748SJack Wang rtrs_err_rl(s, "Posting RDMA-Reply to QP failed, err: %d\n", 4449cb83748SJack Wang err); 4459cb83748SJack Wang 4469cb83748SJack Wang return err; 4479cb83748SJack Wang } 4489cb83748SJack Wang 4499cb83748SJack Wang void close_sess(struct rtrs_srv_sess *sess) 4509cb83748SJack Wang { 451ffea6ad1SGuoqing Jiang if (rtrs_srv_change_state(sess, RTRS_SRV_CLOSING)) 4529cb83748SJack Wang queue_work(rtrs_wq, &sess->close_work); 4539cb83748SJack Wang WARN_ON(sess->state != RTRS_SRV_CLOSING); 4549cb83748SJack Wang } 4559cb83748SJack Wang 4569cb83748SJack Wang static inline const char *rtrs_srv_state_str(enum rtrs_srv_state state) 4579cb83748SJack Wang { 4589cb83748SJack Wang switch (state) { 4599cb83748SJack Wang case RTRS_SRV_CONNECTING: 4609cb83748SJack Wang return "RTRS_SRV_CONNECTING"; 4619cb83748SJack Wang case RTRS_SRV_CONNECTED: 4629cb83748SJack Wang return "RTRS_SRV_CONNECTED"; 4639cb83748SJack Wang case RTRS_SRV_CLOSING: 4649cb83748SJack Wang return "RTRS_SRV_CLOSING"; 4659cb83748SJack Wang case RTRS_SRV_CLOSED: 4669cb83748SJack Wang return "RTRS_SRV_CLOSED"; 4679cb83748SJack Wang default: 4689cb83748SJack Wang return "UNKNOWN"; 4699cb83748SJack Wang } 4709cb83748SJack Wang } 4719cb83748SJack Wang 4729cb83748SJack Wang /** 4739cb83748SJack Wang * rtrs_srv_resp_rdma() - Finish an RDMA request 4749cb83748SJack Wang * 4759cb83748SJack Wang * @id: Internal RTRS operation identifier 4769cb83748SJack Wang * @status: Response Code sent to the other side for this operation. 4779cb83748SJack Wang * 0 = success, <=0 error 4789cb83748SJack Wang * Context: any 4799cb83748SJack Wang * 4809cb83748SJack Wang * Finish a RDMA operation. A message is sent to the client and the 4819cb83748SJack Wang * corresponding memory areas will be released. 4829cb83748SJack Wang */ 4839cb83748SJack Wang bool rtrs_srv_resp_rdma(struct rtrs_srv_op *id, int status) 4849cb83748SJack Wang { 4859cb83748SJack Wang struct rtrs_srv_sess *sess; 4869cb83748SJack Wang struct rtrs_srv_con *con; 4879cb83748SJack Wang struct rtrs_sess *s; 4889cb83748SJack Wang int err; 4899cb83748SJack Wang 4909cb83748SJack Wang if (WARN_ON(!id)) 4919cb83748SJack Wang return true; 4929cb83748SJack Wang 4939cb83748SJack Wang con = id->con; 4949cb83748SJack Wang s = con->c.sess; 4959cb83748SJack Wang sess = to_srv_sess(s); 4969cb83748SJack Wang 4979cb83748SJack Wang id->status = status; 4989cb83748SJack Wang 4999cb83748SJack Wang if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { 5009cb83748SJack Wang rtrs_err_rl(s, 50142cdc190SGioh Kim "Sending I/O response failed, session %s is disconnected, sess state %s\n", 50242cdc190SGioh Kim kobject_name(&sess->kobj), 5039cb83748SJack Wang rtrs_srv_state_str(sess->state)); 5049cb83748SJack Wang goto out; 5059cb83748SJack Wang } 5069cb83748SJack Wang if (always_invalidate) { 5079cb83748SJack Wang struct rtrs_srv_mr *mr = &sess->mrs[id->msg_id]; 5089cb83748SJack Wang 5099cb83748SJack Wang ib_update_fast_reg_key(mr->mr, ib_inc_rkey(mr->mr->rkey)); 5109cb83748SJack Wang } 5119cb83748SJack Wang if (unlikely(atomic_sub_return(1, 5129cb83748SJack Wang &con->sq_wr_avail) < 0)) { 51342cdc190SGioh Kim rtrs_err(s, "IB send queue full: sess=%s cid=%d\n", 51442cdc190SGioh Kim kobject_name(&sess->kobj), 51542cdc190SGioh Kim con->c.cid); 5169cb83748SJack Wang atomic_add(1, &con->sq_wr_avail); 5179cb83748SJack Wang spin_lock(&con->rsp_wr_wait_lock); 5189cb83748SJack Wang list_add_tail(&id->wait_list, &con->rsp_wr_wait_list); 5199cb83748SJack Wang spin_unlock(&con->rsp_wr_wait_lock); 5209cb83748SJack Wang return false; 5219cb83748SJack Wang } 5229cb83748SJack Wang 5239cb83748SJack Wang if (status || id->dir == WRITE || !id->rd_msg->sg_cnt) 5249cb83748SJack Wang err = send_io_resp_imm(con, id, status); 5259cb83748SJack Wang else 5269cb83748SJack Wang err = rdma_write_sg(id); 5279cb83748SJack Wang 5289cb83748SJack Wang if (unlikely(err)) { 52942cdc190SGioh Kim rtrs_err_rl(s, "IO response failed: %d: sess=%s\n", err, 53042cdc190SGioh Kim kobject_name(&sess->kobj)); 5319cb83748SJack Wang close_sess(sess); 5329cb83748SJack Wang } 5339cb83748SJack Wang out: 5349cb83748SJack Wang rtrs_srv_put_ops_ids(sess); 5359cb83748SJack Wang return true; 5369cb83748SJack Wang } 5379cb83748SJack Wang EXPORT_SYMBOL(rtrs_srv_resp_rdma); 5389cb83748SJack Wang 5399cb83748SJack Wang /** 5409cb83748SJack Wang * rtrs_srv_set_sess_priv() - Set private pointer in rtrs_srv. 5419cb83748SJack Wang * @srv: Session pointer 5429cb83748SJack Wang * @priv: The private pointer that is associated with the session. 5439cb83748SJack Wang */ 5449cb83748SJack Wang void rtrs_srv_set_sess_priv(struct rtrs_srv *srv, void *priv) 5459cb83748SJack Wang { 5469cb83748SJack Wang srv->priv = priv; 5479cb83748SJack Wang } 5489cb83748SJack Wang EXPORT_SYMBOL(rtrs_srv_set_sess_priv); 5499cb83748SJack Wang 5509cb83748SJack Wang static void unmap_cont_bufs(struct rtrs_srv_sess *sess) 5519cb83748SJack Wang { 5529cb83748SJack Wang int i; 5539cb83748SJack Wang 5549cb83748SJack Wang for (i = 0; i < sess->mrs_num; i++) { 5559cb83748SJack Wang struct rtrs_srv_mr *srv_mr; 5569cb83748SJack Wang 5579cb83748SJack Wang srv_mr = &sess->mrs[i]; 5588bd372acSGioh Kim rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); 5599cb83748SJack Wang ib_dereg_mr(srv_mr->mr); 5609cb83748SJack Wang ib_dma_unmap_sg(sess->s.dev->ib_dev, srv_mr->sgt.sgl, 5619cb83748SJack Wang srv_mr->sgt.nents, DMA_BIDIRECTIONAL); 5629cb83748SJack Wang sg_free_table(&srv_mr->sgt); 5639cb83748SJack Wang } 5649cb83748SJack Wang kfree(sess->mrs); 5659cb83748SJack Wang } 5669cb83748SJack Wang 5679cb83748SJack Wang static int map_cont_bufs(struct rtrs_srv_sess *sess) 5689cb83748SJack Wang { 5699cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 5709cb83748SJack Wang struct rtrs_sess *ss = &sess->s; 5719cb83748SJack Wang int i, mri, err, mrs_num; 5729cb83748SJack Wang unsigned int chunk_bits; 5739cb83748SJack Wang int chunks_per_mr = 1; 5749cb83748SJack Wang 5759cb83748SJack Wang /* 5769cb83748SJack Wang * Here we map queue_depth chunks to MR. Firstly we have to 5779cb83748SJack Wang * figure out how many chunks can we map per MR. 5789cb83748SJack Wang */ 5799cb83748SJack Wang if (always_invalidate) { 5809cb83748SJack Wang /* 5819cb83748SJack Wang * in order to do invalidate for each chunks of memory, we needs 5829cb83748SJack Wang * more memory regions. 5839cb83748SJack Wang */ 5849cb83748SJack Wang mrs_num = srv->queue_depth; 5859cb83748SJack Wang } else { 5869cb83748SJack Wang chunks_per_mr = 5879cb83748SJack Wang sess->s.dev->ib_dev->attrs.max_fast_reg_page_list_len; 5889cb83748SJack Wang mrs_num = DIV_ROUND_UP(srv->queue_depth, chunks_per_mr); 5899cb83748SJack Wang chunks_per_mr = DIV_ROUND_UP(srv->queue_depth, mrs_num); 5909cb83748SJack Wang } 5919cb83748SJack Wang 5929cb83748SJack Wang sess->mrs = kcalloc(mrs_num, sizeof(*sess->mrs), GFP_KERNEL); 5939cb83748SJack Wang if (!sess->mrs) 5949cb83748SJack Wang return -ENOMEM; 5959cb83748SJack Wang 5969cb83748SJack Wang sess->mrs_num = mrs_num; 5979cb83748SJack Wang 5989cb83748SJack Wang for (mri = 0; mri < mrs_num; mri++) { 5999cb83748SJack Wang struct rtrs_srv_mr *srv_mr = &sess->mrs[mri]; 6009cb83748SJack Wang struct sg_table *sgt = &srv_mr->sgt; 6019cb83748SJack Wang struct scatterlist *s; 6029cb83748SJack Wang struct ib_mr *mr; 6039cb83748SJack Wang int nr, chunks; 6049cb83748SJack Wang 6059cb83748SJack Wang chunks = chunks_per_mr * mri; 6069cb83748SJack Wang if (!always_invalidate) 6079cb83748SJack Wang chunks_per_mr = min_t(int, chunks_per_mr, 6089cb83748SJack Wang srv->queue_depth - chunks); 6099cb83748SJack Wang 6109cb83748SJack Wang err = sg_alloc_table(sgt, chunks_per_mr, GFP_KERNEL); 6119cb83748SJack Wang if (err) 6129cb83748SJack Wang goto err; 6139cb83748SJack Wang 6149cb83748SJack Wang for_each_sg(sgt->sgl, s, chunks_per_mr, i) 6159cb83748SJack Wang sg_set_page(s, srv->chunks[chunks + i], 6169cb83748SJack Wang max_chunk_size, 0); 6179cb83748SJack Wang 6189cb83748SJack Wang nr = ib_dma_map_sg(sess->s.dev->ib_dev, sgt->sgl, 6199cb83748SJack Wang sgt->nents, DMA_BIDIRECTIONAL); 6209cb83748SJack Wang if (nr < sgt->nents) { 6219cb83748SJack Wang err = nr < 0 ? nr : -EINVAL; 6229cb83748SJack Wang goto free_sg; 6239cb83748SJack Wang } 6249cb83748SJack Wang mr = ib_alloc_mr(sess->s.dev->ib_pd, IB_MR_TYPE_MEM_REG, 6259cb83748SJack Wang sgt->nents); 6269cb83748SJack Wang if (IS_ERR(mr)) { 6279cb83748SJack Wang err = PTR_ERR(mr); 6289cb83748SJack Wang goto unmap_sg; 6299cb83748SJack Wang } 6309cb83748SJack Wang nr = ib_map_mr_sg(mr, sgt->sgl, sgt->nents, 6319cb83748SJack Wang NULL, max_chunk_size); 632b386cd65SDan Carpenter if (nr < 0 || nr < sgt->nents) { 6339cb83748SJack Wang err = nr < 0 ? nr : -EINVAL; 6349cb83748SJack Wang goto dereg_mr; 6359cb83748SJack Wang } 6369cb83748SJack Wang 6379cb83748SJack Wang if (always_invalidate) { 6389cb83748SJack Wang srv_mr->iu = rtrs_iu_alloc(1, 6399cb83748SJack Wang sizeof(struct rtrs_msg_rkey_rsp), 6409cb83748SJack Wang GFP_KERNEL, sess->s.dev->ib_dev, 6419cb83748SJack Wang DMA_TO_DEVICE, rtrs_srv_rdma_done); 6429cb83748SJack Wang if (!srv_mr->iu) { 6436b31afceSWei Yongjun err = -ENOMEM; 6446b31afceSWei Yongjun rtrs_err(ss, "rtrs_iu_alloc(), err: %d\n", err); 645f77c4839SGuoqing Jiang goto dereg_mr; 6469cb83748SJack Wang } 6479cb83748SJack Wang } 6489cb83748SJack Wang /* Eventually dma addr for each chunk can be cached */ 6499cb83748SJack Wang for_each_sg(sgt->sgl, s, sgt->orig_nents, i) 6509cb83748SJack Wang sess->dma_addr[chunks + i] = sg_dma_address(s); 6519cb83748SJack Wang 6529cb83748SJack Wang ib_update_fast_reg_key(mr, ib_inc_rkey(mr->rkey)); 6539cb83748SJack Wang srv_mr->mr = mr; 6549cb83748SJack Wang 6559cb83748SJack Wang continue; 6569cb83748SJack Wang err: 6579cb83748SJack Wang while (mri--) { 6589cb83748SJack Wang srv_mr = &sess->mrs[mri]; 6599cb83748SJack Wang sgt = &srv_mr->sgt; 6609cb83748SJack Wang mr = srv_mr->mr; 6618bd372acSGioh Kim rtrs_iu_free(srv_mr->iu, sess->s.dev->ib_dev, 1); 6629cb83748SJack Wang dereg_mr: 6639cb83748SJack Wang ib_dereg_mr(mr); 6649cb83748SJack Wang unmap_sg: 6659cb83748SJack Wang ib_dma_unmap_sg(sess->s.dev->ib_dev, sgt->sgl, 6669cb83748SJack Wang sgt->nents, DMA_BIDIRECTIONAL); 6679cb83748SJack Wang free_sg: 6689cb83748SJack Wang sg_free_table(sgt); 6699cb83748SJack Wang } 6709cb83748SJack Wang kfree(sess->mrs); 6719cb83748SJack Wang 6729cb83748SJack Wang return err; 6739cb83748SJack Wang } 6749cb83748SJack Wang 6759cb83748SJack Wang chunk_bits = ilog2(srv->queue_depth - 1) + 1; 6769cb83748SJack Wang sess->mem_bits = (MAX_IMM_PAYL_BITS - chunk_bits); 6779cb83748SJack Wang 6789cb83748SJack Wang return 0; 6799cb83748SJack Wang } 6809cb83748SJack Wang 6819cb83748SJack Wang static void rtrs_srv_hb_err_handler(struct rtrs_con *c) 6829cb83748SJack Wang { 6839cb83748SJack Wang close_sess(to_srv_sess(c->sess)); 6849cb83748SJack Wang } 6859cb83748SJack Wang 6869cb83748SJack Wang static void rtrs_srv_init_hb(struct rtrs_srv_sess *sess) 6879cb83748SJack Wang { 6889cb83748SJack Wang rtrs_init_hb(&sess->s, &io_comp_cqe, 6899cb83748SJack Wang RTRS_HB_INTERVAL_MS, 6909cb83748SJack Wang RTRS_HB_MISSED_MAX, 6919cb83748SJack Wang rtrs_srv_hb_err_handler, 6929cb83748SJack Wang rtrs_wq); 6939cb83748SJack Wang } 6949cb83748SJack Wang 6959cb83748SJack Wang static void rtrs_srv_start_hb(struct rtrs_srv_sess *sess) 6969cb83748SJack Wang { 6979cb83748SJack Wang rtrs_start_hb(&sess->s); 6989cb83748SJack Wang } 6999cb83748SJack Wang 7009cb83748SJack Wang static void rtrs_srv_stop_hb(struct rtrs_srv_sess *sess) 7019cb83748SJack Wang { 7029cb83748SJack Wang rtrs_stop_hb(&sess->s); 7039cb83748SJack Wang } 7049cb83748SJack Wang 7059cb83748SJack Wang static void rtrs_srv_info_rsp_done(struct ib_cq *cq, struct ib_wc *wc) 7069cb83748SJack Wang { 7073b89e92cSJack Wang struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); 7089cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 7099cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 7109cb83748SJack Wang struct rtrs_iu *iu; 7119cb83748SJack Wang 7129cb83748SJack Wang iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); 7138bd372acSGioh Kim rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); 7149cb83748SJack Wang 7159cb83748SJack Wang if (unlikely(wc->status != IB_WC_SUCCESS)) { 7169cb83748SJack Wang rtrs_err(s, "Sess info response send failed: %s\n", 7179cb83748SJack Wang ib_wc_status_msg(wc->status)); 7189cb83748SJack Wang close_sess(sess); 7199cb83748SJack Wang return; 7209cb83748SJack Wang } 7219cb83748SJack Wang WARN_ON(wc->opcode != IB_WC_SEND); 7229cb83748SJack Wang } 7239cb83748SJack Wang 7249cb83748SJack Wang static void rtrs_srv_sess_up(struct rtrs_srv_sess *sess) 7259cb83748SJack Wang { 7269cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 7279cb83748SJack Wang struct rtrs_srv_ctx *ctx = srv->ctx; 7289cb83748SJack Wang int up; 7299cb83748SJack Wang 7309cb83748SJack Wang mutex_lock(&srv->paths_ev_mutex); 7319cb83748SJack Wang up = ++srv->paths_up; 7329cb83748SJack Wang if (up == 1) 7339cb83748SJack Wang ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_CONNECTED, NULL); 7349cb83748SJack Wang mutex_unlock(&srv->paths_ev_mutex); 7359cb83748SJack Wang 7369cb83748SJack Wang /* Mark session as established */ 7379cb83748SJack Wang sess->established = true; 7389cb83748SJack Wang } 7399cb83748SJack Wang 7409cb83748SJack Wang static void rtrs_srv_sess_down(struct rtrs_srv_sess *sess) 7419cb83748SJack Wang { 7429cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 7439cb83748SJack Wang struct rtrs_srv_ctx *ctx = srv->ctx; 7449cb83748SJack Wang 7459cb83748SJack Wang if (!sess->established) 7469cb83748SJack Wang return; 7479cb83748SJack Wang 7489cb83748SJack Wang sess->established = false; 7499cb83748SJack Wang mutex_lock(&srv->paths_ev_mutex); 7509cb83748SJack Wang WARN_ON(!srv->paths_up); 7519cb83748SJack Wang if (--srv->paths_up == 0) 7529cb83748SJack Wang ctx->ops.link_ev(srv, RTRS_SRV_LINK_EV_DISCONNECTED, srv->priv); 7539cb83748SJack Wang mutex_unlock(&srv->paths_ev_mutex); 7549cb83748SJack Wang } 7559cb83748SJack Wang 75607c14027SGioh Kim static bool exist_sessname(struct rtrs_srv_ctx *ctx, 75707c14027SGioh Kim const char *sessname, const uuid_t *path_uuid) 75807c14027SGioh Kim { 75907c14027SGioh Kim struct rtrs_srv *srv; 76007c14027SGioh Kim struct rtrs_srv_sess *sess; 76107c14027SGioh Kim bool found = false; 76207c14027SGioh Kim 76307c14027SGioh Kim mutex_lock(&ctx->srv_mutex); 76407c14027SGioh Kim list_for_each_entry(srv, &ctx->srv_list, ctx_list) { 76507c14027SGioh Kim mutex_lock(&srv->paths_mutex); 76607c14027SGioh Kim 76707c14027SGioh Kim /* when a client with same uuid and same sessname tried to add a path */ 76807c14027SGioh Kim if (uuid_equal(&srv->paths_uuid, path_uuid)) { 76907c14027SGioh Kim mutex_unlock(&srv->paths_mutex); 77007c14027SGioh Kim continue; 77107c14027SGioh Kim } 77207c14027SGioh Kim 77307c14027SGioh Kim list_for_each_entry(sess, &srv->paths_list, s.entry) { 77407c14027SGioh Kim if (strlen(sess->s.sessname) == strlen(sessname) && 77507c14027SGioh Kim !strcmp(sess->s.sessname, sessname)) { 77607c14027SGioh Kim found = true; 77707c14027SGioh Kim break; 77807c14027SGioh Kim } 77907c14027SGioh Kim } 78007c14027SGioh Kim mutex_unlock(&srv->paths_mutex); 78107c14027SGioh Kim if (found) 78207c14027SGioh Kim break; 78307c14027SGioh Kim } 78407c14027SGioh Kim mutex_unlock(&ctx->srv_mutex); 78507c14027SGioh Kim return found; 78607c14027SGioh Kim } 78707c14027SGioh Kim 7889cb83748SJack Wang static int post_recv_sess(struct rtrs_srv_sess *sess); 78907c14027SGioh Kim static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno); 7909cb83748SJack Wang 7919cb83748SJack Wang static int process_info_req(struct rtrs_srv_con *con, 7929cb83748SJack Wang struct rtrs_msg_info_req *msg) 7939cb83748SJack Wang { 7949cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 7959cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 7969cb83748SJack Wang struct ib_send_wr *reg_wr = NULL; 7979cb83748SJack Wang struct rtrs_msg_info_rsp *rsp; 7989cb83748SJack Wang struct rtrs_iu *tx_iu; 7999cb83748SJack Wang struct ib_reg_wr *rwr; 8009cb83748SJack Wang int mri, err; 8019cb83748SJack Wang size_t tx_sz; 8029cb83748SJack Wang 8039cb83748SJack Wang err = post_recv_sess(sess); 8049cb83748SJack Wang if (unlikely(err)) { 8059cb83748SJack Wang rtrs_err(s, "post_recv_sess(), err: %d\n", err); 8069cb83748SJack Wang return err; 8079cb83748SJack Wang } 80807c14027SGioh Kim 80907c14027SGioh Kim if (exist_sessname(sess->srv->ctx, 81007c14027SGioh Kim msg->sessname, &sess->srv->paths_uuid)) { 81107c14027SGioh Kim rtrs_err(s, "sessname is duplicated: %s\n", msg->sessname); 81207c14027SGioh Kim return -EPERM; 81307c14027SGioh Kim } 81407c14027SGioh Kim strscpy(sess->s.sessname, msg->sessname, sizeof(sess->s.sessname)); 81507c14027SGioh Kim 8169cb83748SJack Wang rwr = kcalloc(sess->mrs_num, sizeof(*rwr), GFP_KERNEL); 8179cb83748SJack Wang if (unlikely(!rwr)) 8189cb83748SJack Wang return -ENOMEM; 8199cb83748SJack Wang 8209cb83748SJack Wang tx_sz = sizeof(*rsp); 8219cb83748SJack Wang tx_sz += sizeof(rsp->desc[0]) * sess->mrs_num; 8229cb83748SJack Wang tx_iu = rtrs_iu_alloc(1, tx_sz, GFP_KERNEL, sess->s.dev->ib_dev, 8239cb83748SJack Wang DMA_TO_DEVICE, rtrs_srv_info_rsp_done); 8249cb83748SJack Wang if (unlikely(!tx_iu)) { 8259cb83748SJack Wang err = -ENOMEM; 8269cb83748SJack Wang goto rwr_free; 8279cb83748SJack Wang } 8289cb83748SJack Wang 8299cb83748SJack Wang rsp = tx_iu->buf; 8309cb83748SJack Wang rsp->type = cpu_to_le16(RTRS_MSG_INFO_RSP); 8319cb83748SJack Wang rsp->sg_cnt = cpu_to_le16(sess->mrs_num); 8329cb83748SJack Wang 8339cb83748SJack Wang for (mri = 0; mri < sess->mrs_num; mri++) { 8349cb83748SJack Wang struct ib_mr *mr = sess->mrs[mri].mr; 8359cb83748SJack Wang 8369cb83748SJack Wang rsp->desc[mri].addr = cpu_to_le64(mr->iova); 8379cb83748SJack Wang rsp->desc[mri].key = cpu_to_le32(mr->rkey); 8389cb83748SJack Wang rsp->desc[mri].len = cpu_to_le32(mr->length); 8399cb83748SJack Wang 8409cb83748SJack Wang /* 8419cb83748SJack Wang * Fill in reg MR request and chain them *backwards* 8429cb83748SJack Wang */ 8439cb83748SJack Wang rwr[mri].wr.next = mri ? &rwr[mri - 1].wr : NULL; 8449cb83748SJack Wang rwr[mri].wr.opcode = IB_WR_REG_MR; 8459cb83748SJack Wang rwr[mri].wr.wr_cqe = &local_reg_cqe; 8469cb83748SJack Wang rwr[mri].wr.num_sge = 0; 847e8ae7ddbSJack Wang rwr[mri].wr.send_flags = 0; 8489cb83748SJack Wang rwr[mri].mr = mr; 8499cb83748SJack Wang rwr[mri].key = mr->rkey; 8509cb83748SJack Wang rwr[mri].access = (IB_ACCESS_LOCAL_WRITE | 8519cb83748SJack Wang IB_ACCESS_REMOTE_WRITE); 8529cb83748SJack Wang reg_wr = &rwr[mri].wr; 8539cb83748SJack Wang } 8549cb83748SJack Wang 8559cb83748SJack Wang err = rtrs_srv_create_sess_files(sess); 8569cb83748SJack Wang if (unlikely(err)) 8579cb83748SJack Wang goto iu_free; 8589cb83748SJack Wang kobject_get(&sess->kobj); 8599cb83748SJack Wang get_device(&sess->srv->dev); 8609cb83748SJack Wang rtrs_srv_change_state(sess, RTRS_SRV_CONNECTED); 8619cb83748SJack Wang rtrs_srv_start_hb(sess); 8629cb83748SJack Wang 8639cb83748SJack Wang /* 8649cb83748SJack Wang * We do not account number of established connections at the current 8659cb83748SJack Wang * moment, we rely on the client, which should send info request when 8669cb83748SJack Wang * all connections are successfully established. Thus, simply notify 8679cb83748SJack Wang * listener with a proper event if we are the first path. 8689cb83748SJack Wang */ 8699cb83748SJack Wang rtrs_srv_sess_up(sess); 8709cb83748SJack Wang 8719cb83748SJack Wang ib_dma_sync_single_for_device(sess->s.dev->ib_dev, tx_iu->dma_addr, 8729cb83748SJack Wang tx_iu->size, DMA_TO_DEVICE); 8739cb83748SJack Wang 8749cb83748SJack Wang /* Send info response */ 8759cb83748SJack Wang err = rtrs_iu_post_send(&con->c, tx_iu, tx_sz, reg_wr); 8769cb83748SJack Wang if (unlikely(err)) { 8779cb83748SJack Wang rtrs_err(s, "rtrs_iu_post_send(), err: %d\n", err); 8789cb83748SJack Wang iu_free: 8798bd372acSGioh Kim rtrs_iu_free(tx_iu, sess->s.dev->ib_dev, 1); 8809cb83748SJack Wang } 8819cb83748SJack Wang rwr_free: 8829cb83748SJack Wang kfree(rwr); 8839cb83748SJack Wang 8849cb83748SJack Wang return err; 8859cb83748SJack Wang } 8869cb83748SJack Wang 8879cb83748SJack Wang static void rtrs_srv_info_req_done(struct ib_cq *cq, struct ib_wc *wc) 8889cb83748SJack Wang { 8893b89e92cSJack Wang struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); 8909cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 8919cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 8929cb83748SJack Wang struct rtrs_msg_info_req *msg; 8939cb83748SJack Wang struct rtrs_iu *iu; 8949cb83748SJack Wang int err; 8959cb83748SJack Wang 8969cb83748SJack Wang WARN_ON(con->c.cid); 8979cb83748SJack Wang 8989cb83748SJack Wang iu = container_of(wc->wr_cqe, struct rtrs_iu, cqe); 8999cb83748SJack Wang if (unlikely(wc->status != IB_WC_SUCCESS)) { 9009cb83748SJack Wang rtrs_err(s, "Sess info request receive failed: %s\n", 9019cb83748SJack Wang ib_wc_status_msg(wc->status)); 9029cb83748SJack Wang goto close; 9039cb83748SJack Wang } 9049cb83748SJack Wang WARN_ON(wc->opcode != IB_WC_RECV); 9059cb83748SJack Wang 9069cb83748SJack Wang if (unlikely(wc->byte_len < sizeof(*msg))) { 9079cb83748SJack Wang rtrs_err(s, "Sess info request is malformed: size %d\n", 9089cb83748SJack Wang wc->byte_len); 9099cb83748SJack Wang goto close; 9109cb83748SJack Wang } 9119cb83748SJack Wang ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, iu->dma_addr, 9129cb83748SJack Wang iu->size, DMA_FROM_DEVICE); 9139cb83748SJack Wang msg = iu->buf; 9149cb83748SJack Wang if (unlikely(le16_to_cpu(msg->type) != RTRS_MSG_INFO_REQ)) { 9159cb83748SJack Wang rtrs_err(s, "Sess info request is malformed: type %d\n", 9169cb83748SJack Wang le16_to_cpu(msg->type)); 9179cb83748SJack Wang goto close; 9189cb83748SJack Wang } 9199cb83748SJack Wang err = process_info_req(con, msg); 9209cb83748SJack Wang if (unlikely(err)) 9219cb83748SJack Wang goto close; 9229cb83748SJack Wang 9239cb83748SJack Wang out: 9248bd372acSGioh Kim rtrs_iu_free(iu, sess->s.dev->ib_dev, 1); 9259cb83748SJack Wang return; 9269cb83748SJack Wang close: 9279cb83748SJack Wang close_sess(sess); 9289cb83748SJack Wang goto out; 9299cb83748SJack Wang } 9309cb83748SJack Wang 9319cb83748SJack Wang static int post_recv_info_req(struct rtrs_srv_con *con) 9329cb83748SJack Wang { 9339cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 9349cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 9359cb83748SJack Wang struct rtrs_iu *rx_iu; 9369cb83748SJack Wang int err; 9379cb83748SJack Wang 9389cb83748SJack Wang rx_iu = rtrs_iu_alloc(1, sizeof(struct rtrs_msg_info_req), 9399cb83748SJack Wang GFP_KERNEL, sess->s.dev->ib_dev, 9409cb83748SJack Wang DMA_FROM_DEVICE, rtrs_srv_info_req_done); 9419cb83748SJack Wang if (unlikely(!rx_iu)) 9429cb83748SJack Wang return -ENOMEM; 9439cb83748SJack Wang /* Prepare for getting info response */ 9449cb83748SJack Wang err = rtrs_iu_post_recv(&con->c, rx_iu); 9459cb83748SJack Wang if (unlikely(err)) { 9469cb83748SJack Wang rtrs_err(s, "rtrs_iu_post_recv(), err: %d\n", err); 9478bd372acSGioh Kim rtrs_iu_free(rx_iu, sess->s.dev->ib_dev, 1); 9489cb83748SJack Wang return err; 9499cb83748SJack Wang } 9509cb83748SJack Wang 9519cb83748SJack Wang return 0; 9529cb83748SJack Wang } 9539cb83748SJack Wang 9549cb83748SJack Wang static int post_recv_io(struct rtrs_srv_con *con, size_t q_size) 9559cb83748SJack Wang { 9569cb83748SJack Wang int i, err; 9579cb83748SJack Wang 9589cb83748SJack Wang for (i = 0; i < q_size; i++) { 9599cb83748SJack Wang err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); 9609cb83748SJack Wang if (unlikely(err)) 9619cb83748SJack Wang return err; 9629cb83748SJack Wang } 9639cb83748SJack Wang 9649cb83748SJack Wang return 0; 9659cb83748SJack Wang } 9669cb83748SJack Wang 9679cb83748SJack Wang static int post_recv_sess(struct rtrs_srv_sess *sess) 9689cb83748SJack Wang { 9699cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 9709cb83748SJack Wang struct rtrs_sess *s = &sess->s; 9719cb83748SJack Wang size_t q_size; 9729cb83748SJack Wang int err, cid; 9739cb83748SJack Wang 9749cb83748SJack Wang for (cid = 0; cid < sess->s.con_num; cid++) { 9759cb83748SJack Wang if (cid == 0) 9769cb83748SJack Wang q_size = SERVICE_CON_QUEUE_DEPTH; 9779cb83748SJack Wang else 9789cb83748SJack Wang q_size = srv->queue_depth; 9799cb83748SJack Wang 9809cb83748SJack Wang err = post_recv_io(to_srv_con(sess->s.con[cid]), q_size); 9819cb83748SJack Wang if (unlikely(err)) { 9829cb83748SJack Wang rtrs_err(s, "post_recv_io(), err: %d\n", err); 9839cb83748SJack Wang return err; 9849cb83748SJack Wang } 9859cb83748SJack Wang } 9869cb83748SJack Wang 9879cb83748SJack Wang return 0; 9889cb83748SJack Wang } 9899cb83748SJack Wang 9909cb83748SJack Wang static void process_read(struct rtrs_srv_con *con, 9919cb83748SJack Wang struct rtrs_msg_rdma_read *msg, 9929cb83748SJack Wang u32 buf_id, u32 off) 9939cb83748SJack Wang { 9949cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 9959cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 9969cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 9979cb83748SJack Wang struct rtrs_srv_ctx *ctx = srv->ctx; 9989cb83748SJack Wang struct rtrs_srv_op *id; 9999cb83748SJack Wang 10009cb83748SJack Wang size_t usr_len, data_len; 10019cb83748SJack Wang void *data; 10029cb83748SJack Wang int ret; 10039cb83748SJack Wang 10049cb83748SJack Wang if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { 10059cb83748SJack Wang rtrs_err_rl(s, 10069cb83748SJack Wang "Processing read request failed, session is disconnected, sess state %s\n", 10079cb83748SJack Wang rtrs_srv_state_str(sess->state)); 10089cb83748SJack Wang return; 10099cb83748SJack Wang } 10109cb83748SJack Wang if (unlikely(msg->sg_cnt != 1 && msg->sg_cnt != 0)) { 10119cb83748SJack Wang rtrs_err_rl(s, 10129cb83748SJack Wang "Processing read request failed, invalid message\n"); 10139cb83748SJack Wang return; 10149cb83748SJack Wang } 10159cb83748SJack Wang rtrs_srv_get_ops_ids(sess); 10169cb83748SJack Wang rtrs_srv_update_rdma_stats(sess->stats, off, READ); 10179cb83748SJack Wang id = sess->ops_ids[buf_id]; 10189cb83748SJack Wang id->con = con; 10199cb83748SJack Wang id->dir = READ; 10209cb83748SJack Wang id->msg_id = buf_id; 10219cb83748SJack Wang id->rd_msg = msg; 10229cb83748SJack Wang usr_len = le16_to_cpu(msg->usr_len); 10239cb83748SJack Wang data_len = off - usr_len; 10249cb83748SJack Wang data = page_address(srv->chunks[buf_id]); 1025c81cba85SGioh Kim ret = ctx->ops.rdma_ev(srv->priv, id, READ, data, data_len, 10269cb83748SJack Wang data + data_len, usr_len); 10279cb83748SJack Wang 10289cb83748SJack Wang if (unlikely(ret)) { 10299cb83748SJack Wang rtrs_err_rl(s, 10309cb83748SJack Wang "Processing read request failed, user module cb reported for msg_id %d, err: %d\n", 10319cb83748SJack Wang buf_id, ret); 10329cb83748SJack Wang goto send_err_msg; 10339cb83748SJack Wang } 10349cb83748SJack Wang 10359cb83748SJack Wang return; 10369cb83748SJack Wang 10379cb83748SJack Wang send_err_msg: 10389cb83748SJack Wang ret = send_io_resp_imm(con, id, ret); 10399cb83748SJack Wang if (ret < 0) { 10409cb83748SJack Wang rtrs_err_rl(s, 10419cb83748SJack Wang "Sending err msg for failed RDMA-Write-Req failed, msg_id %d, err: %d\n", 10429cb83748SJack Wang buf_id, ret); 10439cb83748SJack Wang close_sess(sess); 10449cb83748SJack Wang } 10459cb83748SJack Wang rtrs_srv_put_ops_ids(sess); 10469cb83748SJack Wang } 10479cb83748SJack Wang 10489cb83748SJack Wang static void process_write(struct rtrs_srv_con *con, 10499cb83748SJack Wang struct rtrs_msg_rdma_write *req, 10509cb83748SJack Wang u32 buf_id, u32 off) 10519cb83748SJack Wang { 10529cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 10539cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 10549cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 10559cb83748SJack Wang struct rtrs_srv_ctx *ctx = srv->ctx; 10569cb83748SJack Wang struct rtrs_srv_op *id; 10579cb83748SJack Wang 10589cb83748SJack Wang size_t data_len, usr_len; 10599cb83748SJack Wang void *data; 10609cb83748SJack Wang int ret; 10619cb83748SJack Wang 10629cb83748SJack Wang if (unlikely(sess->state != RTRS_SRV_CONNECTED)) { 10639cb83748SJack Wang rtrs_err_rl(s, 10649cb83748SJack Wang "Processing write request failed, session is disconnected, sess state %s\n", 10659cb83748SJack Wang rtrs_srv_state_str(sess->state)); 10669cb83748SJack Wang return; 10679cb83748SJack Wang } 10689cb83748SJack Wang rtrs_srv_get_ops_ids(sess); 10699cb83748SJack Wang rtrs_srv_update_rdma_stats(sess->stats, off, WRITE); 10709cb83748SJack Wang id = sess->ops_ids[buf_id]; 10719cb83748SJack Wang id->con = con; 10729cb83748SJack Wang id->dir = WRITE; 10739cb83748SJack Wang id->msg_id = buf_id; 10749cb83748SJack Wang 10759cb83748SJack Wang usr_len = le16_to_cpu(req->usr_len); 10769cb83748SJack Wang data_len = off - usr_len; 10779cb83748SJack Wang data = page_address(srv->chunks[buf_id]); 1078c81cba85SGioh Kim ret = ctx->ops.rdma_ev(srv->priv, id, WRITE, data, data_len, 10799cb83748SJack Wang data + data_len, usr_len); 10809cb83748SJack Wang if (unlikely(ret)) { 10819cb83748SJack Wang rtrs_err_rl(s, 10829cb83748SJack Wang "Processing write request failed, user module callback reports err: %d\n", 10839cb83748SJack Wang ret); 10849cb83748SJack Wang goto send_err_msg; 10859cb83748SJack Wang } 10869cb83748SJack Wang 10879cb83748SJack Wang return; 10889cb83748SJack Wang 10899cb83748SJack Wang send_err_msg: 10909cb83748SJack Wang ret = send_io_resp_imm(con, id, ret); 10919cb83748SJack Wang if (ret < 0) { 10929cb83748SJack Wang rtrs_err_rl(s, 10939cb83748SJack Wang "Processing write request failed, sending I/O response failed, msg_id %d, err: %d\n", 10949cb83748SJack Wang buf_id, ret); 10959cb83748SJack Wang close_sess(sess); 10969cb83748SJack Wang } 10979cb83748SJack Wang rtrs_srv_put_ops_ids(sess); 10989cb83748SJack Wang } 10999cb83748SJack Wang 11009cb83748SJack Wang static void process_io_req(struct rtrs_srv_con *con, void *msg, 11019cb83748SJack Wang u32 id, u32 off) 11029cb83748SJack Wang { 11039cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 11049cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 11059cb83748SJack Wang struct rtrs_msg_rdma_hdr *hdr; 11069cb83748SJack Wang unsigned int type; 11079cb83748SJack Wang 11089cb83748SJack Wang ib_dma_sync_single_for_cpu(sess->s.dev->ib_dev, sess->dma_addr[id], 11099cb83748SJack Wang max_chunk_size, DMA_BIDIRECTIONAL); 11109cb83748SJack Wang hdr = msg; 11119cb83748SJack Wang type = le16_to_cpu(hdr->type); 11129cb83748SJack Wang 11139cb83748SJack Wang switch (type) { 11149cb83748SJack Wang case RTRS_MSG_WRITE: 11159cb83748SJack Wang process_write(con, msg, id, off); 11169cb83748SJack Wang break; 11179cb83748SJack Wang case RTRS_MSG_READ: 11189cb83748SJack Wang process_read(con, msg, id, off); 11199cb83748SJack Wang break; 11209cb83748SJack Wang default: 11219cb83748SJack Wang rtrs_err(s, 11229cb83748SJack Wang "Processing I/O request failed, unknown message type received: 0x%02x\n", 11239cb83748SJack Wang type); 11249cb83748SJack Wang goto err; 11259cb83748SJack Wang } 11269cb83748SJack Wang 11279cb83748SJack Wang return; 11289cb83748SJack Wang 11299cb83748SJack Wang err: 11309cb83748SJack Wang close_sess(sess); 11319cb83748SJack Wang } 11329cb83748SJack Wang 11339cb83748SJack Wang static void rtrs_srv_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) 11349cb83748SJack Wang { 11359cb83748SJack Wang struct rtrs_srv_mr *mr = 11369cb83748SJack Wang container_of(wc->wr_cqe, typeof(*mr), inv_cqe); 11373b89e92cSJack Wang struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); 11389cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 11399cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 11409cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 11419cb83748SJack Wang u32 msg_id, off; 11429cb83748SJack Wang void *data; 11439cb83748SJack Wang 11449cb83748SJack Wang if (unlikely(wc->status != IB_WC_SUCCESS)) { 11459cb83748SJack Wang rtrs_err(s, "Failed IB_WR_LOCAL_INV: %s\n", 11469cb83748SJack Wang ib_wc_status_msg(wc->status)); 11479cb83748SJack Wang close_sess(sess); 11489cb83748SJack Wang } 11499cb83748SJack Wang msg_id = mr->msg_id; 11509cb83748SJack Wang off = mr->msg_off; 11519cb83748SJack Wang data = page_address(srv->chunks[msg_id]) + off; 11529cb83748SJack Wang process_io_req(con, data, msg_id, off); 11539cb83748SJack Wang } 11549cb83748SJack Wang 11559cb83748SJack Wang static int rtrs_srv_inv_rkey(struct rtrs_srv_con *con, 11569cb83748SJack Wang struct rtrs_srv_mr *mr) 11579cb83748SJack Wang { 11589cb83748SJack Wang struct ib_send_wr wr = { 11599cb83748SJack Wang .opcode = IB_WR_LOCAL_INV, 11609cb83748SJack Wang .wr_cqe = &mr->inv_cqe, 11619cb83748SJack Wang .send_flags = IB_SEND_SIGNALED, 11629cb83748SJack Wang .ex.invalidate_rkey = mr->mr->rkey, 11639cb83748SJack Wang }; 11649cb83748SJack Wang mr->inv_cqe.done = rtrs_srv_inv_rkey_done; 11659cb83748SJack Wang 11669cb83748SJack Wang return ib_post_send(con->c.qp, &wr, NULL); 11679cb83748SJack Wang } 11689cb83748SJack Wang 11699cb83748SJack Wang static void rtrs_rdma_process_wr_wait_list(struct rtrs_srv_con *con) 11709cb83748SJack Wang { 11719cb83748SJack Wang spin_lock(&con->rsp_wr_wait_lock); 11729cb83748SJack Wang while (!list_empty(&con->rsp_wr_wait_list)) { 11739cb83748SJack Wang struct rtrs_srv_op *id; 11749cb83748SJack Wang int ret; 11759cb83748SJack Wang 11769cb83748SJack Wang id = list_entry(con->rsp_wr_wait_list.next, 11779cb83748SJack Wang struct rtrs_srv_op, wait_list); 11789cb83748SJack Wang list_del(&id->wait_list); 11799cb83748SJack Wang 11809cb83748SJack Wang spin_unlock(&con->rsp_wr_wait_lock); 11819cb83748SJack Wang ret = rtrs_srv_resp_rdma(id, id->status); 11829cb83748SJack Wang spin_lock(&con->rsp_wr_wait_lock); 11839cb83748SJack Wang 11849cb83748SJack Wang if (!ret) { 11859cb83748SJack Wang list_add(&id->wait_list, &con->rsp_wr_wait_list); 11869cb83748SJack Wang break; 11879cb83748SJack Wang } 11889cb83748SJack Wang } 11899cb83748SJack Wang spin_unlock(&con->rsp_wr_wait_lock); 11909cb83748SJack Wang } 11919cb83748SJack Wang 11929cb83748SJack Wang static void rtrs_srv_rdma_done(struct ib_cq *cq, struct ib_wc *wc) 11939cb83748SJack Wang { 11943b89e92cSJack Wang struct rtrs_srv_con *con = to_srv_con(wc->qp->qp_context); 11959cb83748SJack Wang struct rtrs_sess *s = con->c.sess; 11969cb83748SJack Wang struct rtrs_srv_sess *sess = to_srv_sess(s); 11979cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 11989cb83748SJack Wang u32 imm_type, imm_payload; 11999cb83748SJack Wang int err; 12009cb83748SJack Wang 12019cb83748SJack Wang if (unlikely(wc->status != IB_WC_SUCCESS)) { 12029cb83748SJack Wang if (wc->status != IB_WC_WR_FLUSH_ERR) { 12039cb83748SJack Wang rtrs_err(s, 12049cb83748SJack Wang "%s (wr_cqe: %p, type: %d, vendor_err: 0x%x, len: %u)\n", 12059cb83748SJack Wang ib_wc_status_msg(wc->status), wc->wr_cqe, 12069cb83748SJack Wang wc->opcode, wc->vendor_err, wc->byte_len); 12079cb83748SJack Wang close_sess(sess); 12089cb83748SJack Wang } 12099cb83748SJack Wang return; 12109cb83748SJack Wang } 12119cb83748SJack Wang 12129cb83748SJack Wang switch (wc->opcode) { 12139cb83748SJack Wang case IB_WC_RECV_RDMA_WITH_IMM: 12149cb83748SJack Wang /* 12159cb83748SJack Wang * post_recv() RDMA write completions of IO reqs (read/write) 12169cb83748SJack Wang * and hb 12179cb83748SJack Wang */ 12189cb83748SJack Wang if (WARN_ON(wc->wr_cqe != &io_comp_cqe)) 12199cb83748SJack Wang return; 12209cb83748SJack Wang err = rtrs_post_recv_empty(&con->c, &io_comp_cqe); 12219cb83748SJack Wang if (unlikely(err)) { 12229cb83748SJack Wang rtrs_err(s, "rtrs_post_recv(), err: %d\n", err); 12239cb83748SJack Wang close_sess(sess); 12249cb83748SJack Wang break; 12259cb83748SJack Wang } 12269cb83748SJack Wang rtrs_from_imm(be32_to_cpu(wc->ex.imm_data), 12279cb83748SJack Wang &imm_type, &imm_payload); 12289cb83748SJack Wang if (likely(imm_type == RTRS_IO_REQ_IMM)) { 12299cb83748SJack Wang u32 msg_id, off; 12309cb83748SJack Wang void *data; 12319cb83748SJack Wang 12329cb83748SJack Wang msg_id = imm_payload >> sess->mem_bits; 12339cb83748SJack Wang off = imm_payload & ((1 << sess->mem_bits) - 1); 1234bf1d8edbSDan Carpenter if (unlikely(msg_id >= srv->queue_depth || 1235bf1d8edbSDan Carpenter off >= max_chunk_size)) { 12369cb83748SJack Wang rtrs_err(s, "Wrong msg_id %u, off %u\n", 12379cb83748SJack Wang msg_id, off); 12389cb83748SJack Wang close_sess(sess); 12399cb83748SJack Wang return; 12409cb83748SJack Wang } 12419cb83748SJack Wang if (always_invalidate) { 12429cb83748SJack Wang struct rtrs_srv_mr *mr = &sess->mrs[msg_id]; 12439cb83748SJack Wang 12449cb83748SJack Wang mr->msg_off = off; 12459cb83748SJack Wang mr->msg_id = msg_id; 12469cb83748SJack Wang err = rtrs_srv_inv_rkey(con, mr); 12479cb83748SJack Wang if (unlikely(err)) { 12489cb83748SJack Wang rtrs_err(s, "rtrs_post_recv(), err: %d\n", 12499cb83748SJack Wang err); 12509cb83748SJack Wang close_sess(sess); 12519cb83748SJack Wang break; 12529cb83748SJack Wang } 12539cb83748SJack Wang } else { 12549cb83748SJack Wang data = page_address(srv->chunks[msg_id]) + off; 12559cb83748SJack Wang process_io_req(con, data, msg_id, off); 12569cb83748SJack Wang } 12579cb83748SJack Wang } else if (imm_type == RTRS_HB_MSG_IMM) { 12589cb83748SJack Wang WARN_ON(con->c.cid); 12599cb83748SJack Wang rtrs_send_hb_ack(&sess->s); 12609cb83748SJack Wang } else if (imm_type == RTRS_HB_ACK_IMM) { 12619cb83748SJack Wang WARN_ON(con->c.cid); 12629cb83748SJack Wang sess->s.hb_missed_cnt = 0; 12639cb83748SJack Wang } else { 12649cb83748SJack Wang rtrs_wrn(s, "Unknown IMM type %u\n", imm_type); 12659cb83748SJack Wang } 12669cb83748SJack Wang break; 12679cb83748SJack Wang case IB_WC_RDMA_WRITE: 12689cb83748SJack Wang case IB_WC_SEND: 12699cb83748SJack Wang /* 12709cb83748SJack Wang * post_send() RDMA write completions of IO reqs (read/write) 12719cb83748SJack Wang */ 12729cb83748SJack Wang atomic_add(srv->queue_depth, &con->sq_wr_avail); 12739cb83748SJack Wang 12749cb83748SJack Wang if (unlikely(!list_empty_careful(&con->rsp_wr_wait_list))) 12759cb83748SJack Wang rtrs_rdma_process_wr_wait_list(con); 12769cb83748SJack Wang 12779cb83748SJack Wang break; 12789cb83748SJack Wang default: 12799cb83748SJack Wang rtrs_wrn(s, "Unexpected WC type: %d\n", wc->opcode); 12809cb83748SJack Wang return; 12819cb83748SJack Wang } 12829cb83748SJack Wang } 12839cb83748SJack Wang 12849cb83748SJack Wang /** 12859cb83748SJack Wang * rtrs_srv_get_sess_name() - Get rtrs_srv peer hostname. 12869cb83748SJack Wang * @srv: Session 12879cb83748SJack Wang * @sessname: Sessname buffer 12889cb83748SJack Wang * @len: Length of sessname buffer 12899cb83748SJack Wang */ 12909cb83748SJack Wang int rtrs_srv_get_sess_name(struct rtrs_srv *srv, char *sessname, size_t len) 12919cb83748SJack Wang { 12929cb83748SJack Wang struct rtrs_srv_sess *sess; 12939cb83748SJack Wang int err = -ENOTCONN; 12949cb83748SJack Wang 12959cb83748SJack Wang mutex_lock(&srv->paths_mutex); 12969cb83748SJack Wang list_for_each_entry(sess, &srv->paths_list, s.entry) { 12979cb83748SJack Wang if (sess->state != RTRS_SRV_CONNECTED) 12989cb83748SJack Wang continue; 12992d612f0dSDima Stepanov strscpy(sessname, sess->s.sessname, 13009cb83748SJack Wang min_t(size_t, sizeof(sess->s.sessname), len)); 13019cb83748SJack Wang err = 0; 13029cb83748SJack Wang break; 13039cb83748SJack Wang } 13049cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 13059cb83748SJack Wang 13069cb83748SJack Wang return err; 13079cb83748SJack Wang } 13089cb83748SJack Wang EXPORT_SYMBOL(rtrs_srv_get_sess_name); 13099cb83748SJack Wang 13109cb83748SJack Wang /** 13119cb83748SJack Wang * rtrs_srv_get_sess_qdepth() - Get rtrs_srv qdepth. 13129cb83748SJack Wang * @srv: Session 13139cb83748SJack Wang */ 13149cb83748SJack Wang int rtrs_srv_get_queue_depth(struct rtrs_srv *srv) 13159cb83748SJack Wang { 13169cb83748SJack Wang return srv->queue_depth; 13179cb83748SJack Wang } 13189cb83748SJack Wang EXPORT_SYMBOL(rtrs_srv_get_queue_depth); 13199cb83748SJack Wang 13209cb83748SJack Wang static int find_next_bit_ring(struct rtrs_srv_sess *sess) 13219cb83748SJack Wang { 13229cb83748SJack Wang struct ib_device *ib_dev = sess->s.dev->ib_dev; 13239cb83748SJack Wang int v; 13249cb83748SJack Wang 13259cb83748SJack Wang v = cpumask_next(sess->cur_cq_vector, &cq_affinity_mask); 13269cb83748SJack Wang if (v >= nr_cpu_ids || v >= ib_dev->num_comp_vectors) 13279cb83748SJack Wang v = cpumask_first(&cq_affinity_mask); 13289cb83748SJack Wang return v; 13299cb83748SJack Wang } 13309cb83748SJack Wang 13319cb83748SJack Wang static int rtrs_srv_get_next_cq_vector(struct rtrs_srv_sess *sess) 13329cb83748SJack Wang { 13339cb83748SJack Wang sess->cur_cq_vector = find_next_bit_ring(sess); 13349cb83748SJack Wang 13359cb83748SJack Wang return sess->cur_cq_vector; 13369cb83748SJack Wang } 13379cb83748SJack Wang 133839c2d639SMd Haris Iqbal static void rtrs_srv_dev_release(struct device *dev) 133939c2d639SMd Haris Iqbal { 134039c2d639SMd Haris Iqbal struct rtrs_srv *srv = container_of(dev, struct rtrs_srv, dev); 134139c2d639SMd Haris Iqbal 134239c2d639SMd Haris Iqbal kfree(srv); 134339c2d639SMd Haris Iqbal } 134439c2d639SMd Haris Iqbal 13459cb83748SJack Wang static void free_srv(struct rtrs_srv *srv) 13469cb83748SJack Wang { 13479cb83748SJack Wang int i; 13489cb83748SJack Wang 13499cb83748SJack Wang WARN_ON(refcount_read(&srv->refcount)); 13509cb83748SJack Wang for (i = 0; i < srv->queue_depth; i++) 13519cb83748SJack Wang mempool_free(srv->chunks[i], chunk_pool); 13529cb83748SJack Wang kfree(srv->chunks); 13539cb83748SJack Wang mutex_destroy(&srv->paths_mutex); 13549cb83748SJack Wang mutex_destroy(&srv->paths_ev_mutex); 13559cb83748SJack Wang /* last put to release the srv structure */ 13569cb83748SJack Wang put_device(&srv->dev); 13579cb83748SJack Wang } 13589cb83748SJack Wang 13599cb83748SJack Wang static struct rtrs_srv *get_or_create_srv(struct rtrs_srv_ctx *ctx, 136003e9b33aSMd Haris Iqbal const uuid_t *paths_uuid, 136103e9b33aSMd Haris Iqbal bool first_conn) 13629cb83748SJack Wang { 13639cb83748SJack Wang struct rtrs_srv *srv; 1364d715ff8aSGuoqing Jiang int i; 13659cb83748SJack Wang 13669cb83748SJack Wang mutex_lock(&ctx->srv_mutex); 1367d715ff8aSGuoqing Jiang list_for_each_entry(srv, &ctx->srv_list, ctx_list) { 1368d715ff8aSGuoqing Jiang if (uuid_equal(&srv->paths_uuid, paths_uuid) && 1369d715ff8aSGuoqing Jiang refcount_inc_not_zero(&srv->refcount)) { 1370d715ff8aSGuoqing Jiang mutex_unlock(&ctx->srv_mutex); 1371d715ff8aSGuoqing Jiang return srv; 1372d715ff8aSGuoqing Jiang } 1373d715ff8aSGuoqing Jiang } 1374ed408529SJack Wang mutex_unlock(&ctx->srv_mutex); 137503e9b33aSMd Haris Iqbal /* 137603e9b33aSMd Haris Iqbal * If this request is not the first connection request from the 137703e9b33aSMd Haris Iqbal * client for this session then fail and return error. 137803e9b33aSMd Haris Iqbal */ 13796564b110SMd Haris Iqbal if (!first_conn) { 13806564b110SMd Haris Iqbal pr_err_ratelimited("Error: Not the first connection request for this session\n"); 138103e9b33aSMd Haris Iqbal return ERR_PTR(-ENXIO); 13826564b110SMd Haris Iqbal } 1383d715ff8aSGuoqing Jiang 1384d715ff8aSGuoqing Jiang /* need to allocate a new srv */ 1385d715ff8aSGuoqing Jiang srv = kzalloc(sizeof(*srv), GFP_KERNEL); 1386ed408529SJack Wang if (!srv) 138703e9b33aSMd Haris Iqbal return ERR_PTR(-ENOMEM); 1388d715ff8aSGuoqing Jiang 1389d715ff8aSGuoqing Jiang INIT_LIST_HEAD(&srv->paths_list); 1390d715ff8aSGuoqing Jiang mutex_init(&srv->paths_mutex); 1391d715ff8aSGuoqing Jiang mutex_init(&srv->paths_ev_mutex); 1392d715ff8aSGuoqing Jiang uuid_copy(&srv->paths_uuid, paths_uuid); 1393d715ff8aSGuoqing Jiang srv->queue_depth = sess_queue_depth; 1394d715ff8aSGuoqing Jiang srv->ctx = ctx; 1395d715ff8aSGuoqing Jiang device_initialize(&srv->dev); 1396d715ff8aSGuoqing Jiang srv->dev.release = rtrs_srv_dev_release; 13979cb83748SJack Wang 1398d715ff8aSGuoqing Jiang srv->chunks = kcalloc(srv->queue_depth, sizeof(*srv->chunks), 1399d715ff8aSGuoqing Jiang GFP_KERNEL); 1400d715ff8aSGuoqing Jiang if (!srv->chunks) 1401d715ff8aSGuoqing Jiang goto err_free_srv; 1402d715ff8aSGuoqing Jiang 1403d715ff8aSGuoqing Jiang for (i = 0; i < srv->queue_depth; i++) { 1404d715ff8aSGuoqing Jiang srv->chunks[i] = mempool_alloc(chunk_pool, GFP_KERNEL); 1405d715ff8aSGuoqing Jiang if (!srv->chunks[i]) 1406d715ff8aSGuoqing Jiang goto err_free_chunks; 1407d715ff8aSGuoqing Jiang } 1408d715ff8aSGuoqing Jiang refcount_set(&srv->refcount, 1); 1409ed408529SJack Wang mutex_lock(&ctx->srv_mutex); 1410ed408529SJack Wang list_add(&srv->ctx_list, &ctx->srv_list); 1411ed408529SJack Wang mutex_unlock(&ctx->srv_mutex); 1412d715ff8aSGuoqing Jiang 14139cb83748SJack Wang return srv; 1414d715ff8aSGuoqing Jiang 1415d715ff8aSGuoqing Jiang err_free_chunks: 1416d715ff8aSGuoqing Jiang while (i--) 1417d715ff8aSGuoqing Jiang mempool_free(srv->chunks[i], chunk_pool); 1418d715ff8aSGuoqing Jiang kfree(srv->chunks); 1419d715ff8aSGuoqing Jiang 1420d715ff8aSGuoqing Jiang err_free_srv: 1421d715ff8aSGuoqing Jiang kfree(srv); 142203e9b33aSMd Haris Iqbal return ERR_PTR(-ENOMEM); 14239cb83748SJack Wang } 14249cb83748SJack Wang 14259cb83748SJack Wang static void put_srv(struct rtrs_srv *srv) 14269cb83748SJack Wang { 14279cb83748SJack Wang if (refcount_dec_and_test(&srv->refcount)) { 14289cb83748SJack Wang struct rtrs_srv_ctx *ctx = srv->ctx; 14299cb83748SJack Wang 14309cb83748SJack Wang WARN_ON(srv->dev.kobj.state_in_sysfs); 14319cb83748SJack Wang 14329cb83748SJack Wang mutex_lock(&ctx->srv_mutex); 14339cb83748SJack Wang list_del(&srv->ctx_list); 14349cb83748SJack Wang mutex_unlock(&ctx->srv_mutex); 14359cb83748SJack Wang free_srv(srv); 14369cb83748SJack Wang } 14379cb83748SJack Wang } 14389cb83748SJack Wang 14399cb83748SJack Wang static void __add_path_to_srv(struct rtrs_srv *srv, 14409cb83748SJack Wang struct rtrs_srv_sess *sess) 14419cb83748SJack Wang { 14429cb83748SJack Wang list_add_tail(&sess->s.entry, &srv->paths_list); 14439cb83748SJack Wang srv->paths_num++; 14449cb83748SJack Wang WARN_ON(srv->paths_num >= MAX_PATHS_NUM); 14459cb83748SJack Wang } 14469cb83748SJack Wang 14479cb83748SJack Wang static void del_path_from_srv(struct rtrs_srv_sess *sess) 14489cb83748SJack Wang { 14499cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 14509cb83748SJack Wang 14519cb83748SJack Wang if (WARN_ON(!srv)) 14529cb83748SJack Wang return; 14539cb83748SJack Wang 14549cb83748SJack Wang mutex_lock(&srv->paths_mutex); 14559cb83748SJack Wang list_del(&sess->s.entry); 14569cb83748SJack Wang WARN_ON(!srv->paths_num); 14579cb83748SJack Wang srv->paths_num--; 14589cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 14599cb83748SJack Wang } 14609cb83748SJack Wang 14619cb83748SJack Wang /* return true if addresses are the same, error other wise */ 14629cb83748SJack Wang static int sockaddr_cmp(const struct sockaddr *a, const struct sockaddr *b) 14639cb83748SJack Wang { 14649cb83748SJack Wang switch (a->sa_family) { 14659cb83748SJack Wang case AF_IB: 14669cb83748SJack Wang return memcmp(&((struct sockaddr_ib *)a)->sib_addr, 14679cb83748SJack Wang &((struct sockaddr_ib *)b)->sib_addr, 14689cb83748SJack Wang sizeof(struct ib_addr)) && 14699cb83748SJack Wang (b->sa_family == AF_IB); 14709cb83748SJack Wang case AF_INET: 14719cb83748SJack Wang return memcmp(&((struct sockaddr_in *)a)->sin_addr, 14729cb83748SJack Wang &((struct sockaddr_in *)b)->sin_addr, 14739cb83748SJack Wang sizeof(struct in_addr)) && 14749cb83748SJack Wang (b->sa_family == AF_INET); 14759cb83748SJack Wang case AF_INET6: 14769cb83748SJack Wang return memcmp(&((struct sockaddr_in6 *)a)->sin6_addr, 14779cb83748SJack Wang &((struct sockaddr_in6 *)b)->sin6_addr, 14789cb83748SJack Wang sizeof(struct in6_addr)) && 14799cb83748SJack Wang (b->sa_family == AF_INET6); 14809cb83748SJack Wang default: 14819cb83748SJack Wang return -ENOENT; 14829cb83748SJack Wang } 14839cb83748SJack Wang } 14849cb83748SJack Wang 14859cb83748SJack Wang static bool __is_path_w_addr_exists(struct rtrs_srv *srv, 14869cb83748SJack Wang struct rdma_addr *addr) 14879cb83748SJack Wang { 14889cb83748SJack Wang struct rtrs_srv_sess *sess; 14899cb83748SJack Wang 14909cb83748SJack Wang list_for_each_entry(sess, &srv->paths_list, s.entry) 14919cb83748SJack Wang if (!sockaddr_cmp((struct sockaddr *)&sess->s.dst_addr, 14929cb83748SJack Wang (struct sockaddr *)&addr->dst_addr) && 14939cb83748SJack Wang !sockaddr_cmp((struct sockaddr *)&sess->s.src_addr, 14949cb83748SJack Wang (struct sockaddr *)&addr->src_addr)) 14959cb83748SJack Wang return true; 14969cb83748SJack Wang 14979cb83748SJack Wang return false; 14989cb83748SJack Wang } 14999cb83748SJack Wang 15009cb83748SJack Wang static void free_sess(struct rtrs_srv_sess *sess) 15019cb83748SJack Wang { 1502f7452a7eSGioh Kim if (sess->kobj.state_in_sysfs) { 1503f7452a7eSGioh Kim kobject_del(&sess->kobj); 15049cb83748SJack Wang kobject_put(&sess->kobj); 1505f7452a7eSGioh Kim } else { 15062371c403SGioh Kim kfree(sess->stats); 15079cb83748SJack Wang kfree(sess); 15089cb83748SJack Wang } 1509f7452a7eSGioh Kim } 15109cb83748SJack Wang 15119cb83748SJack Wang static void rtrs_srv_close_work(struct work_struct *work) 15129cb83748SJack Wang { 15139cb83748SJack Wang struct rtrs_srv_sess *sess; 15149cb83748SJack Wang struct rtrs_srv_con *con; 15159cb83748SJack Wang int i; 15169cb83748SJack Wang 15179cb83748SJack Wang sess = container_of(work, typeof(*sess), close_work); 15189cb83748SJack Wang 15199cb83748SJack Wang rtrs_srv_destroy_sess_files(sess); 15209cb83748SJack Wang rtrs_srv_stop_hb(sess); 15219cb83748SJack Wang 15229cb83748SJack Wang for (i = 0; i < sess->s.con_num; i++) { 15239cb83748SJack Wang if (!sess->s.con[i]) 15249cb83748SJack Wang continue; 15259cb83748SJack Wang con = to_srv_con(sess->s.con[i]); 15269cb83748SJack Wang rdma_disconnect(con->c.cm_id); 15279cb83748SJack Wang ib_drain_qp(con->c.qp); 15289cb83748SJack Wang } 15290cdfb3b2SMd Haris Iqbal 15300cdfb3b2SMd Haris Iqbal /* 15310cdfb3b2SMd Haris Iqbal * Degrade ref count to the usual model with a single shared 15320cdfb3b2SMd Haris Iqbal * atomic_t counter 15330cdfb3b2SMd Haris Iqbal */ 15340cdfb3b2SMd Haris Iqbal percpu_ref_kill(&sess->ids_inflight_ref); 15350cdfb3b2SMd Haris Iqbal 15360cdfb3b2SMd Haris Iqbal /* Wait for all completion */ 15370cdfb3b2SMd Haris Iqbal wait_for_completion(&sess->complete_done); 15389cb83748SJack Wang 15399cb83748SJack Wang /* Notify upper layer if we are the last path */ 15409cb83748SJack Wang rtrs_srv_sess_down(sess); 15419cb83748SJack Wang 15429cb83748SJack Wang unmap_cont_bufs(sess); 15439cb83748SJack Wang rtrs_srv_free_ops_ids(sess); 15449cb83748SJack Wang 15459cb83748SJack Wang for (i = 0; i < sess->s.con_num; i++) { 15469cb83748SJack Wang if (!sess->s.con[i]) 15479cb83748SJack Wang continue; 15489cb83748SJack Wang con = to_srv_con(sess->s.con[i]); 15499cb83748SJack Wang rtrs_cq_qp_destroy(&con->c); 15509cb83748SJack Wang rdma_destroy_id(con->c.cm_id); 15519cb83748SJack Wang kfree(con); 15529cb83748SJack Wang } 15539cb83748SJack Wang rtrs_ib_dev_put(sess->s.dev); 15549cb83748SJack Wang 15559cb83748SJack Wang del_path_from_srv(sess); 15569cb83748SJack Wang put_srv(sess->srv); 15579cb83748SJack Wang sess->srv = NULL; 15589cb83748SJack Wang rtrs_srv_change_state(sess, RTRS_SRV_CLOSED); 15599cb83748SJack Wang 15609cb83748SJack Wang kfree(sess->dma_addr); 15619cb83748SJack Wang kfree(sess->s.con); 15629cb83748SJack Wang free_sess(sess); 15639cb83748SJack Wang } 15649cb83748SJack Wang 15659cb83748SJack Wang static int rtrs_rdma_do_accept(struct rtrs_srv_sess *sess, 15669cb83748SJack Wang struct rdma_cm_id *cm_id) 15679cb83748SJack Wang { 15689cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 15699cb83748SJack Wang struct rtrs_msg_conn_rsp msg; 15709cb83748SJack Wang struct rdma_conn_param param; 15719cb83748SJack Wang int err; 15729cb83748SJack Wang 15739cb83748SJack Wang param = (struct rdma_conn_param) { 15749cb83748SJack Wang .rnr_retry_count = 7, 15759cb83748SJack Wang .private_data = &msg, 15769cb83748SJack Wang .private_data_len = sizeof(msg), 15779cb83748SJack Wang }; 15789cb83748SJack Wang 15799cb83748SJack Wang msg = (struct rtrs_msg_conn_rsp) { 15809cb83748SJack Wang .magic = cpu_to_le16(RTRS_MAGIC), 15819cb83748SJack Wang .version = cpu_to_le16(RTRS_PROTO_VER), 15829cb83748SJack Wang .queue_depth = cpu_to_le16(srv->queue_depth), 15839cb83748SJack Wang .max_io_size = cpu_to_le32(max_chunk_size - MAX_HDR_SIZE), 15849cb83748SJack Wang .max_hdr_size = cpu_to_le32(MAX_HDR_SIZE), 15859cb83748SJack Wang }; 15869cb83748SJack Wang 15879cb83748SJack Wang if (always_invalidate) 15889cb83748SJack Wang msg.flags = cpu_to_le32(RTRS_MSG_NEW_RKEY_F); 15899cb83748SJack Wang 15909cb83748SJack Wang err = rdma_accept(cm_id, ¶m); 15919cb83748SJack Wang if (err) 15929cb83748SJack Wang pr_err("rdma_accept(), err: %d\n", err); 15939cb83748SJack Wang 15949cb83748SJack Wang return err; 15959cb83748SJack Wang } 15969cb83748SJack Wang 15979cb83748SJack Wang static int rtrs_rdma_do_reject(struct rdma_cm_id *cm_id, int errno) 15989cb83748SJack Wang { 15999cb83748SJack Wang struct rtrs_msg_conn_rsp msg; 16009cb83748SJack Wang int err; 16019cb83748SJack Wang 16029cb83748SJack Wang msg = (struct rtrs_msg_conn_rsp) { 16039cb83748SJack Wang .magic = cpu_to_le16(RTRS_MAGIC), 16049cb83748SJack Wang .version = cpu_to_le16(RTRS_PROTO_VER), 16059cb83748SJack Wang .errno = cpu_to_le16(errno), 16069cb83748SJack Wang }; 16079cb83748SJack Wang 16088094ba0aSLeon Romanovsky err = rdma_reject(cm_id, &msg, sizeof(msg), IB_CM_REJ_CONSUMER_DEFINED); 16099cb83748SJack Wang if (err) 16109cb83748SJack Wang pr_err("rdma_reject(), err: %d\n", err); 16119cb83748SJack Wang 16129cb83748SJack Wang /* Bounce errno back */ 16139cb83748SJack Wang return errno; 16149cb83748SJack Wang } 16159cb83748SJack Wang 16169cb83748SJack Wang static struct rtrs_srv_sess * 16179cb83748SJack Wang __find_sess(struct rtrs_srv *srv, const uuid_t *sess_uuid) 16189cb83748SJack Wang { 16199cb83748SJack Wang struct rtrs_srv_sess *sess; 16209cb83748SJack Wang 16219cb83748SJack Wang list_for_each_entry(sess, &srv->paths_list, s.entry) { 16229cb83748SJack Wang if (uuid_equal(&sess->s.uuid, sess_uuid)) 16239cb83748SJack Wang return sess; 16249cb83748SJack Wang } 16259cb83748SJack Wang 16269cb83748SJack Wang return NULL; 16279cb83748SJack Wang } 16289cb83748SJack Wang 16299cb83748SJack Wang static int create_con(struct rtrs_srv_sess *sess, 16309cb83748SJack Wang struct rdma_cm_id *cm_id, 16319cb83748SJack Wang unsigned int cid) 16329cb83748SJack Wang { 16339cb83748SJack Wang struct rtrs_srv *srv = sess->srv; 16349cb83748SJack Wang struct rtrs_sess *s = &sess->s; 16359cb83748SJack Wang struct rtrs_srv_con *con; 16369cb83748SJack Wang 1637*354462ebSGuoqing Jiang u32 cq_num, max_send_wr, max_recv_wr, wr_limit; 16389cb83748SJack Wang int err, cq_vector; 16399cb83748SJack Wang 16409cb83748SJack Wang con = kzalloc(sizeof(*con), GFP_KERNEL); 16419cb83748SJack Wang if (!con) { 16429cb83748SJack Wang err = -ENOMEM; 16439cb83748SJack Wang goto err; 16449cb83748SJack Wang } 16459cb83748SJack Wang 16469cb83748SJack Wang spin_lock_init(&con->rsp_wr_wait_lock); 16479cb83748SJack Wang INIT_LIST_HEAD(&con->rsp_wr_wait_list); 16489cb83748SJack Wang con->c.cm_id = cm_id; 16499cb83748SJack Wang con->c.sess = &sess->s; 16509cb83748SJack Wang con->c.cid = cid; 16516f5d1b30SJack Wang atomic_set(&con->wr_cnt, 1); 16529cb83748SJack Wang 16539cb83748SJack Wang if (con->c.cid == 0) { 16549cb83748SJack Wang /* 16559cb83748SJack Wang * All receive and all send (each requiring invalidate) 16569cb83748SJack Wang * + 2 for drain and heartbeat 16579cb83748SJack Wang */ 16585e91eabfSJack Wang max_send_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; 1659b012f0adSMd Haris Iqbal max_recv_wr = SERVICE_CON_QUEUE_DEPTH * 2 + 2; 16609cb83748SJack Wang } else { 16619cb83748SJack Wang /* 16629cb83748SJack Wang * In theory we might have queue_depth * 32 16639cb83748SJack Wang * outstanding requests if an unsafe global key is used 16649cb83748SJack Wang * and we have queue_depth read requests each consisting 16659cb83748SJack Wang * of 32 different addresses. div 3 for mlx5. 16669cb83748SJack Wang */ 16675e91eabfSJack Wang wr_limit = sess->s.dev->ib_dev->attrs.max_qp_wr / 3; 16685e91eabfSJack Wang /* when always_invlaidate enalbed, we need linv+rinv+mr+imm */ 16695e91eabfSJack Wang if (always_invalidate) 16705e91eabfSJack Wang max_send_wr = 16715e91eabfSJack Wang min_t(int, wr_limit, 16725e91eabfSJack Wang srv->queue_depth * (1 + 4) + 1); 16735e91eabfSJack Wang else 16745e91eabfSJack Wang max_send_wr = 16755e91eabfSJack Wang min_t(int, wr_limit, 16765e91eabfSJack Wang srv->queue_depth * (1 + 2) + 1); 16775e91eabfSJack Wang 16785e91eabfSJack Wang max_recv_wr = srv->queue_depth + 1; 16795e91eabfSJack Wang /* 16805e91eabfSJack Wang * If we have all receive requests posted and 16815e91eabfSJack Wang * all write requests posted and each read request 16825e91eabfSJack Wang * requires an invalidate request + drain 16835e91eabfSJack Wang * and qp gets into error state. 16845e91eabfSJack Wang */ 16859cb83748SJack Wang } 1686*354462ebSGuoqing Jiang cq_num = max_send_wr + max_recv_wr; 16875e91eabfSJack Wang atomic_set(&con->sq_wr_avail, max_send_wr); 16889cb83748SJack Wang cq_vector = rtrs_srv_get_next_cq_vector(sess); 16899cb83748SJack Wang 16909cb83748SJack Wang /* TODO: SOFTIRQ can be faster, but be careful with softirq context */ 1691*354462ebSGuoqing Jiang err = rtrs_cq_qp_create(&sess->s, &con->c, 1, cq_vector, cq_num, 16925e91eabfSJack Wang max_send_wr, max_recv_wr, 16937490fd1fSJack Wang IB_POLL_WORKQUEUE); 16949cb83748SJack Wang if (err) { 16959cb83748SJack Wang rtrs_err(s, "rtrs_cq_qp_create(), err: %d\n", err); 16969cb83748SJack Wang goto free_con; 16979cb83748SJack Wang } 16989cb83748SJack Wang if (con->c.cid == 0) { 16999cb83748SJack Wang err = post_recv_info_req(con); 17009cb83748SJack Wang if (err) 17019cb83748SJack Wang goto free_cqqp; 17029cb83748SJack Wang } 17039cb83748SJack Wang WARN_ON(sess->s.con[cid]); 17049cb83748SJack Wang sess->s.con[cid] = &con->c; 17059cb83748SJack Wang 17069cb83748SJack Wang /* 17079cb83748SJack Wang * Change context from server to current connection. The other 17089cb83748SJack Wang * way is to use cm_id->qp->qp_context, which does not work on OFED. 17099cb83748SJack Wang */ 17109cb83748SJack Wang cm_id->context = &con->c; 17119cb83748SJack Wang 17129cb83748SJack Wang return 0; 17139cb83748SJack Wang 17149cb83748SJack Wang free_cqqp: 17159cb83748SJack Wang rtrs_cq_qp_destroy(&con->c); 17169cb83748SJack Wang free_con: 17179cb83748SJack Wang kfree(con); 17189cb83748SJack Wang 17199cb83748SJack Wang err: 17209cb83748SJack Wang return err; 17219cb83748SJack Wang } 17229cb83748SJack Wang 17239cb83748SJack Wang static struct rtrs_srv_sess *__alloc_sess(struct rtrs_srv *srv, 17249cb83748SJack Wang struct rdma_cm_id *cm_id, 17259cb83748SJack Wang unsigned int con_num, 17269cb83748SJack Wang unsigned int recon_cnt, 17279cb83748SJack Wang const uuid_t *uuid) 17289cb83748SJack Wang { 17299cb83748SJack Wang struct rtrs_srv_sess *sess; 17309cb83748SJack Wang int err = -ENOMEM; 173188e2f105SGioh Kim char str[NAME_MAX]; 173288e2f105SGioh Kim struct rtrs_addr path; 17339cb83748SJack Wang 17349cb83748SJack Wang if (srv->paths_num >= MAX_PATHS_NUM) { 17359cb83748SJack Wang err = -ECONNRESET; 17369cb83748SJack Wang goto err; 17379cb83748SJack Wang } 17389cb83748SJack Wang if (__is_path_w_addr_exists(srv, &cm_id->route.addr)) { 17399cb83748SJack Wang err = -EEXIST; 17409cb83748SJack Wang pr_err("Path with same addr exists\n"); 17419cb83748SJack Wang goto err; 17429cb83748SJack Wang } 17439cb83748SJack Wang sess = kzalloc(sizeof(*sess), GFP_KERNEL); 17449cb83748SJack Wang if (!sess) 17459cb83748SJack Wang goto err; 17469cb83748SJack Wang 17479cb83748SJack Wang sess->stats = kzalloc(sizeof(*sess->stats), GFP_KERNEL); 17489cb83748SJack Wang if (!sess->stats) 17499cb83748SJack Wang goto err_free_sess; 17509cb83748SJack Wang 17519cb83748SJack Wang sess->stats->sess = sess; 17529cb83748SJack Wang 17539cb83748SJack Wang sess->dma_addr = kcalloc(srv->queue_depth, sizeof(*sess->dma_addr), 17549cb83748SJack Wang GFP_KERNEL); 17559cb83748SJack Wang if (!sess->dma_addr) 17569cb83748SJack Wang goto err_free_stats; 17579cb83748SJack Wang 17589cb83748SJack Wang sess->s.con = kcalloc(con_num, sizeof(*sess->s.con), GFP_KERNEL); 17599cb83748SJack Wang if (!sess->s.con) 17609cb83748SJack Wang goto err_free_dma_addr; 17619cb83748SJack Wang 17629cb83748SJack Wang sess->state = RTRS_SRV_CONNECTING; 17639cb83748SJack Wang sess->srv = srv; 17649cb83748SJack Wang sess->cur_cq_vector = -1; 17659cb83748SJack Wang sess->s.dst_addr = cm_id->route.addr.dst_addr; 17669cb83748SJack Wang sess->s.src_addr = cm_id->route.addr.src_addr; 176788e2f105SGioh Kim 176888e2f105SGioh Kim /* temporary until receiving session-name from client */ 176988e2f105SGioh Kim path.src = &sess->s.src_addr; 177088e2f105SGioh Kim path.dst = &sess->s.dst_addr; 177188e2f105SGioh Kim rtrs_addr_to_str(&path, str, sizeof(str)); 17722d612f0dSDima Stepanov strscpy(sess->s.sessname, str, sizeof(sess->s.sessname)); 177388e2f105SGioh Kim 17749cb83748SJack Wang sess->s.con_num = con_num; 17759cb83748SJack Wang sess->s.recon_cnt = recon_cnt; 17769cb83748SJack Wang uuid_copy(&sess->s.uuid, uuid); 17779cb83748SJack Wang spin_lock_init(&sess->state_lock); 17789cb83748SJack Wang INIT_WORK(&sess->close_work, rtrs_srv_close_work); 17799cb83748SJack Wang rtrs_srv_init_hb(sess); 17809cb83748SJack Wang 17819cb83748SJack Wang sess->s.dev = rtrs_ib_dev_find_or_add(cm_id->device, &dev_pd); 17829cb83748SJack Wang if (!sess->s.dev) { 17839cb83748SJack Wang err = -ENOMEM; 17849cb83748SJack Wang goto err_free_con; 17859cb83748SJack Wang } 17869cb83748SJack Wang err = map_cont_bufs(sess); 17879cb83748SJack Wang if (err) 17889cb83748SJack Wang goto err_put_dev; 17899cb83748SJack Wang 17909cb83748SJack Wang err = rtrs_srv_alloc_ops_ids(sess); 17919cb83748SJack Wang if (err) 17929cb83748SJack Wang goto err_unmap_bufs; 17939cb83748SJack Wang 17949cb83748SJack Wang __add_path_to_srv(srv, sess); 17959cb83748SJack Wang 17969cb83748SJack Wang return sess; 17979cb83748SJack Wang 17989cb83748SJack Wang err_unmap_bufs: 17999cb83748SJack Wang unmap_cont_bufs(sess); 18009cb83748SJack Wang err_put_dev: 18019cb83748SJack Wang rtrs_ib_dev_put(sess->s.dev); 18029cb83748SJack Wang err_free_con: 18039cb83748SJack Wang kfree(sess->s.con); 18049cb83748SJack Wang err_free_dma_addr: 18059cb83748SJack Wang kfree(sess->dma_addr); 18069cb83748SJack Wang err_free_stats: 18079cb83748SJack Wang kfree(sess->stats); 18089cb83748SJack Wang err_free_sess: 18099cb83748SJack Wang kfree(sess); 18109cb83748SJack Wang err: 18119cb83748SJack Wang return ERR_PTR(err); 18129cb83748SJack Wang } 18139cb83748SJack Wang 18149cb83748SJack Wang static int rtrs_rdma_connect(struct rdma_cm_id *cm_id, 18159cb83748SJack Wang const struct rtrs_msg_conn_req *msg, 18169cb83748SJack Wang size_t len) 18179cb83748SJack Wang { 18189cb83748SJack Wang struct rtrs_srv_ctx *ctx = cm_id->context; 18199cb83748SJack Wang struct rtrs_srv_sess *sess; 18209cb83748SJack Wang struct rtrs_srv *srv; 18219cb83748SJack Wang 18229cb83748SJack Wang u16 version, con_num, cid; 18239cb83748SJack Wang u16 recon_cnt; 1824cfbeb0b9SGuoqing Jiang int err = -ECONNRESET; 18259cb83748SJack Wang 18269cb83748SJack Wang if (len < sizeof(*msg)) { 18279cb83748SJack Wang pr_err("Invalid RTRS connection request\n"); 1828cfbeb0b9SGuoqing Jiang goto reject_w_err; 18299cb83748SJack Wang } 18309cb83748SJack Wang if (le16_to_cpu(msg->magic) != RTRS_MAGIC) { 18319cb83748SJack Wang pr_err("Invalid RTRS magic\n"); 1832cfbeb0b9SGuoqing Jiang goto reject_w_err; 18339cb83748SJack Wang } 18349cb83748SJack Wang version = le16_to_cpu(msg->version); 18359cb83748SJack Wang if (version >> 8 != RTRS_PROTO_VER_MAJOR) { 18369cb83748SJack Wang pr_err("Unsupported major RTRS version: %d, expected %d\n", 18379cb83748SJack Wang version >> 8, RTRS_PROTO_VER_MAJOR); 1838cfbeb0b9SGuoqing Jiang goto reject_w_err; 18399cb83748SJack Wang } 18409cb83748SJack Wang con_num = le16_to_cpu(msg->cid_num); 18419cb83748SJack Wang if (con_num > 4096) { 18429cb83748SJack Wang /* Sanity check */ 18439cb83748SJack Wang pr_err("Too many connections requested: %d\n", con_num); 1844cfbeb0b9SGuoqing Jiang goto reject_w_err; 18459cb83748SJack Wang } 18469cb83748SJack Wang cid = le16_to_cpu(msg->cid); 18479cb83748SJack Wang if (cid >= con_num) { 18489cb83748SJack Wang /* Sanity check */ 18499cb83748SJack Wang pr_err("Incorrect cid: %d >= %d\n", cid, con_num); 1850cfbeb0b9SGuoqing Jiang goto reject_w_err; 18519cb83748SJack Wang } 18529cb83748SJack Wang recon_cnt = le16_to_cpu(msg->recon_cnt); 185303e9b33aSMd Haris Iqbal srv = get_or_create_srv(ctx, &msg->paths_uuid, msg->first_conn); 1854ed408529SJack Wang if (IS_ERR(srv)) { 185503e9b33aSMd Haris Iqbal err = PTR_ERR(srv); 18566564b110SMd Haris Iqbal pr_err("get_or_create_srv(), error %d\n", err); 18579cb83748SJack Wang goto reject_w_err; 18589cb83748SJack Wang } 18599cb83748SJack Wang mutex_lock(&srv->paths_mutex); 18609cb83748SJack Wang sess = __find_sess(srv, &msg->sess_uuid); 18619cb83748SJack Wang if (sess) { 18629cb83748SJack Wang struct rtrs_sess *s = &sess->s; 18639cb83748SJack Wang 18649cb83748SJack Wang /* Session already holds a reference */ 18659cb83748SJack Wang put_srv(srv); 18669cb83748SJack Wang 18679cb83748SJack Wang if (sess->state != RTRS_SRV_CONNECTING) { 18689cb83748SJack Wang rtrs_err(s, "Session in wrong state: %s\n", 18699cb83748SJack Wang rtrs_srv_state_str(sess->state)); 18709cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 1871cfbeb0b9SGuoqing Jiang goto reject_w_err; 18729cb83748SJack Wang } 18739cb83748SJack Wang /* 18749cb83748SJack Wang * Sanity checks 18759cb83748SJack Wang */ 1876e172037bSMd Haris Iqbal if (con_num != s->con_num || cid >= s->con_num) { 18779cb83748SJack Wang rtrs_err(s, "Incorrect request: %d, %d\n", 18789cb83748SJack Wang cid, con_num); 18799cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 1880cfbeb0b9SGuoqing Jiang goto reject_w_err; 18819cb83748SJack Wang } 1882e172037bSMd Haris Iqbal if (s->con[cid]) { 18839cb83748SJack Wang rtrs_err(s, "Connection already exists: %d\n", 18849cb83748SJack Wang cid); 18859cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 1886cfbeb0b9SGuoqing Jiang goto reject_w_err; 18879cb83748SJack Wang } 18889cb83748SJack Wang } else { 18899cb83748SJack Wang sess = __alloc_sess(srv, cm_id, con_num, recon_cnt, 18909cb83748SJack Wang &msg->sess_uuid); 18919cb83748SJack Wang if (IS_ERR(sess)) { 18929cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 18939cb83748SJack Wang put_srv(srv); 18949cb83748SJack Wang err = PTR_ERR(sess); 18956564b110SMd Haris Iqbal pr_err("RTRS server session allocation failed: %d\n", err); 18969cb83748SJack Wang goto reject_w_err; 18979cb83748SJack Wang } 18989cb83748SJack Wang } 18999cb83748SJack Wang err = create_con(sess, cm_id, cid); 19009cb83748SJack Wang if (err) { 19016564b110SMd Haris Iqbal rtrs_err((&sess->s), "create_con(), error %d\n", err); 19029cb83748SJack Wang (void)rtrs_rdma_do_reject(cm_id, err); 19039cb83748SJack Wang /* 19049cb83748SJack Wang * Since session has other connections we follow normal way 19059cb83748SJack Wang * through workqueue, but still return an error to tell cma.c 19069cb83748SJack Wang * to call rdma_destroy_id() for current connection. 19079cb83748SJack Wang */ 19089cb83748SJack Wang goto close_and_return_err; 19099cb83748SJack Wang } 19109cb83748SJack Wang err = rtrs_rdma_do_accept(sess, cm_id); 19119cb83748SJack Wang if (err) { 19126564b110SMd Haris Iqbal rtrs_err((&sess->s), "rtrs_rdma_do_accept(), error %d\n", err); 19139cb83748SJack Wang (void)rtrs_rdma_do_reject(cm_id, err); 19149cb83748SJack Wang /* 19159cb83748SJack Wang * Since current connection was successfully added to the 19169cb83748SJack Wang * session we follow normal way through workqueue to close the 19179cb83748SJack Wang * session, thus return 0 to tell cma.c we call 19189cb83748SJack Wang * rdma_destroy_id() ourselves. 19199cb83748SJack Wang */ 19209cb83748SJack Wang err = 0; 19219cb83748SJack Wang goto close_and_return_err; 19229cb83748SJack Wang } 19239cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 19249cb83748SJack Wang 19259cb83748SJack Wang return 0; 19269cb83748SJack Wang 19279cb83748SJack Wang reject_w_err: 19289cb83748SJack Wang return rtrs_rdma_do_reject(cm_id, err); 19299cb83748SJack Wang 19309cb83748SJack Wang close_and_return_err: 19319cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 193299f0c380SJack Wang close_sess(sess); 19339cb83748SJack Wang 19349cb83748SJack Wang return err; 19359cb83748SJack Wang } 19369cb83748SJack Wang 19379cb83748SJack Wang static int rtrs_srv_rdma_cm_handler(struct rdma_cm_id *cm_id, 19389cb83748SJack Wang struct rdma_cm_event *ev) 19399cb83748SJack Wang { 19409cb83748SJack Wang struct rtrs_srv_sess *sess = NULL; 19419cb83748SJack Wang struct rtrs_sess *s = NULL; 19429cb83748SJack Wang 19439cb83748SJack Wang if (ev->event != RDMA_CM_EVENT_CONNECT_REQUEST) { 19449cb83748SJack Wang struct rtrs_con *c = cm_id->context; 19459cb83748SJack Wang 19469cb83748SJack Wang s = c->sess; 19479cb83748SJack Wang sess = to_srv_sess(s); 19489cb83748SJack Wang } 19499cb83748SJack Wang 19509cb83748SJack Wang switch (ev->event) { 19519cb83748SJack Wang case RDMA_CM_EVENT_CONNECT_REQUEST: 19529cb83748SJack Wang /* 19539cb83748SJack Wang * In case of error cma.c will destroy cm_id, 19549cb83748SJack Wang * see cma_process_remove() 19559cb83748SJack Wang */ 19569cb83748SJack Wang return rtrs_rdma_connect(cm_id, ev->param.conn.private_data, 19579cb83748SJack Wang ev->param.conn.private_data_len); 19589cb83748SJack Wang case RDMA_CM_EVENT_ESTABLISHED: 19599cb83748SJack Wang /* Nothing here */ 19609cb83748SJack Wang break; 19619cb83748SJack Wang case RDMA_CM_EVENT_REJECTED: 19629cb83748SJack Wang case RDMA_CM_EVENT_CONNECT_ERROR: 19639cb83748SJack Wang case RDMA_CM_EVENT_UNREACHABLE: 19649cb83748SJack Wang rtrs_err(s, "CM error (CM event: %s, err: %d)\n", 19659cb83748SJack Wang rdma_event_msg(ev->event), ev->status); 196657dae8baSGuoqing Jiang fallthrough; 19679cb83748SJack Wang case RDMA_CM_EVENT_DISCONNECTED: 19689cb83748SJack Wang case RDMA_CM_EVENT_ADDR_CHANGE: 19699cb83748SJack Wang case RDMA_CM_EVENT_TIMEWAIT_EXIT: 19709cb83748SJack Wang case RDMA_CM_EVENT_DEVICE_REMOVAL: 19719cb83748SJack Wang close_sess(sess); 19729cb83748SJack Wang break; 19739cb83748SJack Wang default: 19749cb83748SJack Wang pr_err("Ignoring unexpected CM event %s, err %d\n", 19759cb83748SJack Wang rdma_event_msg(ev->event), ev->status); 19769cb83748SJack Wang break; 19779cb83748SJack Wang } 19789cb83748SJack Wang 19799cb83748SJack Wang return 0; 19809cb83748SJack Wang } 19819cb83748SJack Wang 19829cb83748SJack Wang static struct rdma_cm_id *rtrs_srv_cm_init(struct rtrs_srv_ctx *ctx, 19839cb83748SJack Wang struct sockaddr *addr, 19849cb83748SJack Wang enum rdma_ucm_port_space ps) 19859cb83748SJack Wang { 19869cb83748SJack Wang struct rdma_cm_id *cm_id; 19879cb83748SJack Wang int ret; 19889cb83748SJack Wang 19899cb83748SJack Wang cm_id = rdma_create_id(&init_net, rtrs_srv_rdma_cm_handler, 19909cb83748SJack Wang ctx, ps, IB_QPT_RC); 19919cb83748SJack Wang if (IS_ERR(cm_id)) { 19929cb83748SJack Wang ret = PTR_ERR(cm_id); 19939cb83748SJack Wang pr_err("Creating id for RDMA connection failed, err: %d\n", 19949cb83748SJack Wang ret); 19959cb83748SJack Wang goto err_out; 19969cb83748SJack Wang } 19979cb83748SJack Wang ret = rdma_bind_addr(cm_id, addr); 19989cb83748SJack Wang if (ret) { 19999cb83748SJack Wang pr_err("Binding RDMA address failed, err: %d\n", ret); 20009cb83748SJack Wang goto err_cm; 20019cb83748SJack Wang } 20029cb83748SJack Wang ret = rdma_listen(cm_id, 64); 20039cb83748SJack Wang if (ret) { 20049cb83748SJack Wang pr_err("Listening on RDMA connection failed, err: %d\n", 20059cb83748SJack Wang ret); 20069cb83748SJack Wang goto err_cm; 20079cb83748SJack Wang } 20089cb83748SJack Wang 20099cb83748SJack Wang return cm_id; 20109cb83748SJack Wang 20119cb83748SJack Wang err_cm: 20129cb83748SJack Wang rdma_destroy_id(cm_id); 20139cb83748SJack Wang err_out: 20149cb83748SJack Wang 20159cb83748SJack Wang return ERR_PTR(ret); 20169cb83748SJack Wang } 20179cb83748SJack Wang 20189cb83748SJack Wang static int rtrs_srv_rdma_init(struct rtrs_srv_ctx *ctx, u16 port) 20199cb83748SJack Wang { 20209cb83748SJack Wang struct sockaddr_in6 sin = { 20219cb83748SJack Wang .sin6_family = AF_INET6, 20229cb83748SJack Wang .sin6_addr = IN6ADDR_ANY_INIT, 20239cb83748SJack Wang .sin6_port = htons(port), 20249cb83748SJack Wang }; 20259cb83748SJack Wang struct sockaddr_ib sib = { 20269cb83748SJack Wang .sib_family = AF_IB, 20279cb83748SJack Wang .sib_sid = cpu_to_be64(RDMA_IB_IP_PS_IB | port), 20289cb83748SJack Wang .sib_sid_mask = cpu_to_be64(0xffffffffffffffffULL), 20299cb83748SJack Wang .sib_pkey = cpu_to_be16(0xffff), 20309cb83748SJack Wang }; 20319cb83748SJack Wang struct rdma_cm_id *cm_ip, *cm_ib; 20329cb83748SJack Wang int ret; 20339cb83748SJack Wang 20349cb83748SJack Wang /* 20359cb83748SJack Wang * We accept both IPoIB and IB connections, so we need to keep 20369cb83748SJack Wang * two cm id's, one for each socket type and port space. 20379cb83748SJack Wang * If the cm initialization of one of the id's fails, we abort 20389cb83748SJack Wang * everything. 20399cb83748SJack Wang */ 20409cb83748SJack Wang cm_ip = rtrs_srv_cm_init(ctx, (struct sockaddr *)&sin, RDMA_PS_TCP); 20419cb83748SJack Wang if (IS_ERR(cm_ip)) 20429cb83748SJack Wang return PTR_ERR(cm_ip); 20439cb83748SJack Wang 20449cb83748SJack Wang cm_ib = rtrs_srv_cm_init(ctx, (struct sockaddr *)&sib, RDMA_PS_IB); 20459cb83748SJack Wang if (IS_ERR(cm_ib)) { 20469cb83748SJack Wang ret = PTR_ERR(cm_ib); 20479cb83748SJack Wang goto free_cm_ip; 20489cb83748SJack Wang } 20499cb83748SJack Wang 20509cb83748SJack Wang ctx->cm_id_ip = cm_ip; 20519cb83748SJack Wang ctx->cm_id_ib = cm_ib; 20529cb83748SJack Wang 20539cb83748SJack Wang return 0; 20549cb83748SJack Wang 20559cb83748SJack Wang free_cm_ip: 20569cb83748SJack Wang rdma_destroy_id(cm_ip); 20579cb83748SJack Wang 20589cb83748SJack Wang return ret; 20599cb83748SJack Wang } 20609cb83748SJack Wang 20619cb83748SJack Wang static struct rtrs_srv_ctx *alloc_srv_ctx(struct rtrs_srv_ops *ops) 20629cb83748SJack Wang { 20639cb83748SJack Wang struct rtrs_srv_ctx *ctx; 20649cb83748SJack Wang 20659cb83748SJack Wang ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 20669cb83748SJack Wang if (!ctx) 20679cb83748SJack Wang return NULL; 20689cb83748SJack Wang 20699cb83748SJack Wang ctx->ops = *ops; 20709cb83748SJack Wang mutex_init(&ctx->srv_mutex); 20719cb83748SJack Wang INIT_LIST_HEAD(&ctx->srv_list); 20729cb83748SJack Wang 20739cb83748SJack Wang return ctx; 20749cb83748SJack Wang } 20759cb83748SJack Wang 20769cb83748SJack Wang static void free_srv_ctx(struct rtrs_srv_ctx *ctx) 20779cb83748SJack Wang { 20789cb83748SJack Wang WARN_ON(!list_empty(&ctx->srv_list)); 20799cb83748SJack Wang mutex_destroy(&ctx->srv_mutex); 20809cb83748SJack Wang kfree(ctx); 20819cb83748SJack Wang } 20829cb83748SJack Wang 2083558d52b2SMd Haris Iqbal static int rtrs_srv_add_one(struct ib_device *device) 2084558d52b2SMd Haris Iqbal { 2085558d52b2SMd Haris Iqbal struct rtrs_srv_ctx *ctx; 2086558d52b2SMd Haris Iqbal int ret = 0; 2087558d52b2SMd Haris Iqbal 2088558d52b2SMd Haris Iqbal mutex_lock(&ib_ctx.ib_dev_mutex); 2089558d52b2SMd Haris Iqbal if (ib_ctx.ib_dev_count) 2090558d52b2SMd Haris Iqbal goto out; 2091558d52b2SMd Haris Iqbal 2092558d52b2SMd Haris Iqbal /* 2093558d52b2SMd Haris Iqbal * Since our CM IDs are NOT bound to any ib device we will create them 2094558d52b2SMd Haris Iqbal * only once 2095558d52b2SMd Haris Iqbal */ 2096558d52b2SMd Haris Iqbal ctx = ib_ctx.srv_ctx; 2097558d52b2SMd Haris Iqbal ret = rtrs_srv_rdma_init(ctx, ib_ctx.port); 2098558d52b2SMd Haris Iqbal if (ret) { 2099558d52b2SMd Haris Iqbal /* 2100558d52b2SMd Haris Iqbal * We errored out here. 2101558d52b2SMd Haris Iqbal * According to the ib code, if we encounter an error here then the 2102558d52b2SMd Haris Iqbal * error code is ignored, and no more calls to our ops are made. 2103558d52b2SMd Haris Iqbal */ 2104558d52b2SMd Haris Iqbal pr_err("Failed to initialize RDMA connection"); 2105558d52b2SMd Haris Iqbal goto err_out; 2106558d52b2SMd Haris Iqbal } 2107558d52b2SMd Haris Iqbal 2108558d52b2SMd Haris Iqbal out: 2109558d52b2SMd Haris Iqbal /* 2110558d52b2SMd Haris Iqbal * Keep a track on the number of ib devices added 2111558d52b2SMd Haris Iqbal */ 2112558d52b2SMd Haris Iqbal ib_ctx.ib_dev_count++; 2113558d52b2SMd Haris Iqbal 2114558d52b2SMd Haris Iqbal err_out: 2115558d52b2SMd Haris Iqbal mutex_unlock(&ib_ctx.ib_dev_mutex); 2116558d52b2SMd Haris Iqbal return ret; 2117558d52b2SMd Haris Iqbal } 2118558d52b2SMd Haris Iqbal 2119558d52b2SMd Haris Iqbal static void rtrs_srv_remove_one(struct ib_device *device, void *client_data) 2120558d52b2SMd Haris Iqbal { 2121558d52b2SMd Haris Iqbal struct rtrs_srv_ctx *ctx; 2122558d52b2SMd Haris Iqbal 2123558d52b2SMd Haris Iqbal mutex_lock(&ib_ctx.ib_dev_mutex); 2124558d52b2SMd Haris Iqbal ib_ctx.ib_dev_count--; 2125558d52b2SMd Haris Iqbal 2126558d52b2SMd Haris Iqbal if (ib_ctx.ib_dev_count) 2127558d52b2SMd Haris Iqbal goto out; 2128558d52b2SMd Haris Iqbal 2129558d52b2SMd Haris Iqbal /* 2130558d52b2SMd Haris Iqbal * Since our CM IDs are NOT bound to any ib device we will remove them 2131558d52b2SMd Haris Iqbal * only once, when the last device is removed 2132558d52b2SMd Haris Iqbal */ 2133558d52b2SMd Haris Iqbal ctx = ib_ctx.srv_ctx; 2134558d52b2SMd Haris Iqbal rdma_destroy_id(ctx->cm_id_ip); 2135558d52b2SMd Haris Iqbal rdma_destroy_id(ctx->cm_id_ib); 2136558d52b2SMd Haris Iqbal 2137558d52b2SMd Haris Iqbal out: 2138558d52b2SMd Haris Iqbal mutex_unlock(&ib_ctx.ib_dev_mutex); 2139558d52b2SMd Haris Iqbal } 2140558d52b2SMd Haris Iqbal 2141558d52b2SMd Haris Iqbal static struct ib_client rtrs_srv_client = { 2142558d52b2SMd Haris Iqbal .name = "rtrs_server", 2143558d52b2SMd Haris Iqbal .add = rtrs_srv_add_one, 2144558d52b2SMd Haris Iqbal .remove = rtrs_srv_remove_one 2145558d52b2SMd Haris Iqbal }; 2146558d52b2SMd Haris Iqbal 21479cb83748SJack Wang /** 21489cb83748SJack Wang * rtrs_srv_open() - open RTRS server context 21499cb83748SJack Wang * @ops: callback functions 21509cb83748SJack Wang * @port: port to listen on 21519cb83748SJack Wang * 21529cb83748SJack Wang * Creates server context with specified callbacks. 21539cb83748SJack Wang * 21549cb83748SJack Wang * Return a valid pointer on success otherwise PTR_ERR. 21559cb83748SJack Wang */ 21569cb83748SJack Wang struct rtrs_srv_ctx *rtrs_srv_open(struct rtrs_srv_ops *ops, u16 port) 21579cb83748SJack Wang { 21589cb83748SJack Wang struct rtrs_srv_ctx *ctx; 21599cb83748SJack Wang int err; 21609cb83748SJack Wang 21619cb83748SJack Wang ctx = alloc_srv_ctx(ops); 21629cb83748SJack Wang if (!ctx) 21639cb83748SJack Wang return ERR_PTR(-ENOMEM); 21649cb83748SJack Wang 2165558d52b2SMd Haris Iqbal mutex_init(&ib_ctx.ib_dev_mutex); 2166558d52b2SMd Haris Iqbal ib_ctx.srv_ctx = ctx; 2167558d52b2SMd Haris Iqbal ib_ctx.port = port; 2168558d52b2SMd Haris Iqbal 2169558d52b2SMd Haris Iqbal err = ib_register_client(&rtrs_srv_client); 21709cb83748SJack Wang if (err) { 21719cb83748SJack Wang free_srv_ctx(ctx); 21729cb83748SJack Wang return ERR_PTR(err); 21739cb83748SJack Wang } 21749cb83748SJack Wang 21759cb83748SJack Wang return ctx; 21769cb83748SJack Wang } 21779cb83748SJack Wang EXPORT_SYMBOL(rtrs_srv_open); 21789cb83748SJack Wang 21799cb83748SJack Wang static void close_sessions(struct rtrs_srv *srv) 21809cb83748SJack Wang { 21819cb83748SJack Wang struct rtrs_srv_sess *sess; 21829cb83748SJack Wang 21839cb83748SJack Wang mutex_lock(&srv->paths_mutex); 21849cb83748SJack Wang list_for_each_entry(sess, &srv->paths_list, s.entry) 21859cb83748SJack Wang close_sess(sess); 21869cb83748SJack Wang mutex_unlock(&srv->paths_mutex); 21879cb83748SJack Wang } 21889cb83748SJack Wang 21899cb83748SJack Wang static void close_ctx(struct rtrs_srv_ctx *ctx) 21909cb83748SJack Wang { 21919cb83748SJack Wang struct rtrs_srv *srv; 21929cb83748SJack Wang 21939cb83748SJack Wang mutex_lock(&ctx->srv_mutex); 21949cb83748SJack Wang list_for_each_entry(srv, &ctx->srv_list, ctx_list) 21959cb83748SJack Wang close_sessions(srv); 21969cb83748SJack Wang mutex_unlock(&ctx->srv_mutex); 21979cb83748SJack Wang flush_workqueue(rtrs_wq); 21989cb83748SJack Wang } 21999cb83748SJack Wang 22009cb83748SJack Wang /** 22019cb83748SJack Wang * rtrs_srv_close() - close RTRS server context 22029cb83748SJack Wang * @ctx: pointer to server context 22039cb83748SJack Wang * 22049cb83748SJack Wang * Closes RTRS server context with all client sessions. 22059cb83748SJack Wang */ 22069cb83748SJack Wang void rtrs_srv_close(struct rtrs_srv_ctx *ctx) 22079cb83748SJack Wang { 2208558d52b2SMd Haris Iqbal ib_unregister_client(&rtrs_srv_client); 2209558d52b2SMd Haris Iqbal mutex_destroy(&ib_ctx.ib_dev_mutex); 22109cb83748SJack Wang close_ctx(ctx); 22119cb83748SJack Wang free_srv_ctx(ctx); 22129cb83748SJack Wang } 22139cb83748SJack Wang EXPORT_SYMBOL(rtrs_srv_close); 22149cb83748SJack Wang 22159cb83748SJack Wang static int check_module_params(void) 22169cb83748SJack Wang { 22179cb83748SJack Wang if (sess_queue_depth < 1 || sess_queue_depth > MAX_SESS_QUEUE_DEPTH) { 22189cb83748SJack Wang pr_err("Invalid sess_queue_depth value %d, has to be >= %d, <= %d.\n", 22199cb83748SJack Wang sess_queue_depth, 1, MAX_SESS_QUEUE_DEPTH); 22209cb83748SJack Wang return -EINVAL; 22219cb83748SJack Wang } 22223f3d0eabSGioh Kim if (max_chunk_size < MIN_CHUNK_SIZE || !is_power_of_2(max_chunk_size)) { 22239cb83748SJack Wang pr_err("Invalid max_chunk_size value %d, has to be >= %d and should be power of two.\n", 22243f3d0eabSGioh Kim max_chunk_size, MIN_CHUNK_SIZE); 22259cb83748SJack Wang return -EINVAL; 22269cb83748SJack Wang } 22279cb83748SJack Wang 22289cb83748SJack Wang /* 22299cb83748SJack Wang * Check if IB immediate data size is enough to hold the mem_id and the 22309cb83748SJack Wang * offset inside the memory chunk 22319cb83748SJack Wang */ 22329cb83748SJack Wang if ((ilog2(sess_queue_depth - 1) + 1) + 22339cb83748SJack Wang (ilog2(max_chunk_size - 1) + 1) > MAX_IMM_PAYL_BITS) { 22349cb83748SJack Wang pr_err("RDMA immediate size (%db) not enough to encode %d buffers of size %dB. Reduce 'sess_queue_depth' or 'max_chunk_size' parameters.\n", 22359cb83748SJack Wang MAX_IMM_PAYL_BITS, sess_queue_depth, max_chunk_size); 22369cb83748SJack Wang return -EINVAL; 22379cb83748SJack Wang } 22389cb83748SJack Wang 22399cb83748SJack Wang return 0; 22409cb83748SJack Wang } 22419cb83748SJack Wang 22429cb83748SJack Wang static int __init rtrs_server_init(void) 22439cb83748SJack Wang { 22449cb83748SJack Wang int err; 22459cb83748SJack Wang 22469cb83748SJack Wang pr_info("Loading module %s, proto %s: (max_chunk_size: %d (pure IO %ld, headers %ld) , sess_queue_depth: %d, always_invalidate: %d)\n", 22479cb83748SJack Wang KBUILD_MODNAME, RTRS_PROTO_VER_STRING, 22489cb83748SJack Wang max_chunk_size, max_chunk_size - MAX_HDR_SIZE, MAX_HDR_SIZE, 22499cb83748SJack Wang sess_queue_depth, always_invalidate); 22509cb83748SJack Wang 22519cb83748SJack Wang rtrs_rdma_dev_pd_init(0, &dev_pd); 22529cb83748SJack Wang 22539cb83748SJack Wang err = check_module_params(); 22549cb83748SJack Wang if (err) { 22559cb83748SJack Wang pr_err("Failed to load module, invalid module parameters, err: %d\n", 22569cb83748SJack Wang err); 22579cb83748SJack Wang return err; 22589cb83748SJack Wang } 22599cb83748SJack Wang chunk_pool = mempool_create_page_pool(sess_queue_depth * CHUNK_POOL_SZ, 22609cb83748SJack Wang get_order(max_chunk_size)); 22619cb83748SJack Wang if (!chunk_pool) 22629cb83748SJack Wang return -ENOMEM; 22639cb83748SJack Wang rtrs_dev_class = class_create(THIS_MODULE, "rtrs-server"); 22649cb83748SJack Wang if (IS_ERR(rtrs_dev_class)) { 22659cb83748SJack Wang err = PTR_ERR(rtrs_dev_class); 22669cb83748SJack Wang goto out_chunk_pool; 22679cb83748SJack Wang } 226803ed5a8cSJack Wang rtrs_wq = alloc_workqueue("rtrs_server_wq", 0, 0); 22696b31afceSWei Yongjun if (!rtrs_wq) { 22706b31afceSWei Yongjun err = -ENOMEM; 22719cb83748SJack Wang goto out_dev_class; 22726b31afceSWei Yongjun } 22739cb83748SJack Wang 22749cb83748SJack Wang return 0; 22759cb83748SJack Wang 22769cb83748SJack Wang out_dev_class: 22779cb83748SJack Wang class_destroy(rtrs_dev_class); 22789cb83748SJack Wang out_chunk_pool: 22799cb83748SJack Wang mempool_destroy(chunk_pool); 22809cb83748SJack Wang 22819cb83748SJack Wang return err; 22829cb83748SJack Wang } 22839cb83748SJack Wang 22849cb83748SJack Wang static void __exit rtrs_server_exit(void) 22859cb83748SJack Wang { 22869cb83748SJack Wang destroy_workqueue(rtrs_wq); 22879cb83748SJack Wang class_destroy(rtrs_dev_class); 22889cb83748SJack Wang mempool_destroy(chunk_pool); 22899cb83748SJack Wang rtrs_rdma_dev_pd_deinit(&dev_pd); 22909cb83748SJack Wang } 22919cb83748SJack Wang 22929cb83748SJack Wang module_init(rtrs_server_init); 22939cb83748SJack Wang module_exit(rtrs_server_exit); 2294