/* * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include #include #include #include "rxe.h" #include "rxe_loc.h" #include "rxe_queue.h" #include "rxe_hw_counters.h" static int rxe_query_device(struct ib_device *dev, struct ib_device_attr *attr, struct ib_udata *uhw) { struct rxe_dev *rxe = to_rdev(dev); if (uhw->inlen || uhw->outlen) return -EINVAL; *attr = rxe->attr; return 0; } static int rxe_query_port(struct ib_device *dev, u8 port_num, struct ib_port_attr *attr) { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; int rc; port = &rxe->port; /* *attr being zeroed by the caller, avoid zeroing it here */ *attr = port->attr; mutex_lock(&rxe->usdev_lock); rc = ib_get_eth_speed(dev, port_num, &attr->active_speed, &attr->active_width); if (attr->state == IB_PORT_ACTIVE) attr->phys_state = IB_PORT_PHYS_STATE_LINK_UP; else if (dev_get_flags(rxe->ndev) & IFF_UP) attr->phys_state = IB_PORT_PHYS_STATE_POLLING; else attr->phys_state = IB_PORT_PHYS_STATE_DISABLED; mutex_unlock(&rxe->usdev_lock); return rc; } static int rxe_query_pkey(struct ib_device *device, u8 port_num, u16 index, u16 *pkey) { if (index > 0) return -EINVAL; *pkey = IB_DEFAULT_PKEY_FULL; return 0; } static int rxe_modify_device(struct ib_device *dev, int mask, struct ib_device_modify *attr) { struct rxe_dev *rxe = to_rdev(dev); if (mask & ~(IB_DEVICE_MODIFY_SYS_IMAGE_GUID | IB_DEVICE_MODIFY_NODE_DESC)) return -EOPNOTSUPP; if (mask & IB_DEVICE_MODIFY_SYS_IMAGE_GUID) rxe->attr.sys_image_guid = cpu_to_be64(attr->sys_image_guid); if (mask & IB_DEVICE_MODIFY_NODE_DESC) { memcpy(rxe->ib_dev.node_desc, attr->node_desc, sizeof(rxe->ib_dev.node_desc)); } return 0; } static int rxe_modify_port(struct ib_device *dev, u8 port_num, int mask, struct ib_port_modify *attr) { struct rxe_dev *rxe = to_rdev(dev); struct rxe_port *port; port = &rxe->port; port->attr.port_cap_flags |= attr->set_port_cap_mask; port->attr.port_cap_flags &= ~attr->clr_port_cap_mask; if (mask & IB_PORT_RESET_QKEY_CNTR) port->attr.qkey_viol_cntr = 0; return 0; } static enum rdma_link_layer rxe_get_link_layer(struct ib_device *dev, u8 port_num) { return IB_LINK_LAYER_ETHERNET; } static int rxe_alloc_ucontext(struct ib_ucontext *uctx, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(uctx->device); struct rxe_ucontext *uc = to_ruc(uctx); return rxe_add_to_pool(&rxe->uc_pool, &uc->pelem); } static void rxe_dealloc_ucontext(struct ib_ucontext *ibuc) { struct rxe_ucontext *uc = to_ruc(ibuc); rxe_drop_ref(uc); } static int rxe_port_immutable(struct ib_device *dev, u8 port_num, struct ib_port_immutable *immutable) { int err; struct ib_port_attr attr; immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP; err = ib_query_port(dev, port_num, &attr); if (err) return err; immutable->pkey_tbl_len = attr.pkey_tbl_len; immutable->gid_tbl_len = attr.gid_tbl_len; immutable->max_mad_size = IB_MGMT_MAD_SIZE; return 0; } static int rxe_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); return rxe_add_to_pool(&rxe->pd_pool, &pd->pelem); } static void rxe_dealloc_pd(struct ib_pd *ibpd, struct ib_udata *udata) { struct rxe_pd *pd = to_rpd(ibpd); rxe_drop_ref(pd); } static int rxe_create_ah(struct ib_ah *ibah, struct rdma_ah_init_attr *init_attr, struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); err = rxe_av_chk_attr(rxe, init_attr->ah_attr); if (err) return err; err = rxe_add_to_pool(&rxe->ah_pool, &ah->pelem); if (err) return err; rxe_init_av(init_attr->ah_attr, &ah->av); return 0; } static int rxe_modify_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) { int err; struct rxe_dev *rxe = to_rdev(ibah->device); struct rxe_ah *ah = to_rah(ibah); err = rxe_av_chk_attr(rxe, attr); if (err) return err; rxe_init_av(attr, &ah->av); return 0; } static int rxe_query_ah(struct ib_ah *ibah, struct rdma_ah_attr *attr) { struct rxe_ah *ah = to_rah(ibah); memset(attr, 0, sizeof(*attr)); attr->type = ibah->type; rxe_av_to_attr(&ah->av, attr); return 0; } static void rxe_destroy_ah(struct ib_ah *ibah, u32 flags) { struct rxe_ah *ah = to_rah(ibah); rxe_drop_ref(ah); } static int post_one_recv(struct rxe_rq *rq, const struct ib_recv_wr *ibwr) { int err; int i; u32 length; struct rxe_recv_wqe *recv_wqe; int num_sge = ibwr->num_sge; if (unlikely(queue_full(rq->queue))) { err = -ENOMEM; goto err1; } if (unlikely(num_sge > rq->max_sge)) { err = -EINVAL; goto err1; } length = 0; for (i = 0; i < num_sge; i++) length += ibwr->sg_list[i].length; recv_wqe = producer_addr(rq->queue); recv_wqe->wr_id = ibwr->wr_id; recv_wqe->num_sge = num_sge; memcpy(recv_wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); recv_wqe->dma.length = length; recv_wqe->dma.resid = length; recv_wqe->dma.num_sge = num_sge; recv_wqe->dma.cur_sge = 0; recv_wqe->dma.sge_offset = 0; /* make sure all changes to the work queue are written before we * update the producer pointer */ smp_wmb(); advance_producer(rq->queue); return 0; err1: return err; } static int rxe_create_srq(struct ib_srq *ibsrq, struct ib_srq_init_attr *init, struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibsrq->device); struct rxe_pd *pd = to_rpd(ibsrq->pd); struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_create_srq_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; } err = rxe_srq_chk_attr(rxe, NULL, &init->attr, IB_SRQ_INIT_MASK); if (err) goto err1; err = rxe_add_to_pool(&rxe->srq_pool, &srq->pelem); if (err) goto err1; rxe_add_ref(pd); srq->pd = pd; err = rxe_srq_from_init(rxe, srq, init, udata, uresp); if (err) goto err2; return 0; err2: rxe_drop_ref(pd); rxe_drop_ref(srq); err1: return err; } static int rxe_modify_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr, enum ib_srq_attr_mask mask, struct ib_udata *udata) { int err; struct rxe_srq *srq = to_rsrq(ibsrq); struct rxe_dev *rxe = to_rdev(ibsrq->device); struct rxe_modify_srq_cmd ucmd = {}; if (udata) { if (udata->inlen < sizeof(ucmd)) return -EINVAL; err = ib_copy_from_udata(&ucmd, udata, sizeof(ucmd)); if (err) return err; } err = rxe_srq_chk_attr(rxe, srq, attr, mask); if (err) goto err1; err = rxe_srq_from_attr(rxe, srq, attr, mask, &ucmd, udata); if (err) goto err1; return 0; err1: return err; } static int rxe_query_srq(struct ib_srq *ibsrq, struct ib_srq_attr *attr) { struct rxe_srq *srq = to_rsrq(ibsrq); if (srq->error) return -EINVAL; attr->max_wr = srq->rq.queue->buf->index_mask; attr->max_sge = srq->rq.max_sge; attr->srq_limit = srq->limit; return 0; } static void rxe_destroy_srq(struct ib_srq *ibsrq, struct ib_udata *udata) { struct rxe_srq *srq = to_rsrq(ibsrq); if (srq->rq.queue) rxe_queue_cleanup(srq->rq.queue); rxe_drop_ref(srq->pd); rxe_drop_ref(srq); } static int rxe_post_srq_recv(struct ib_srq *ibsrq, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { int err = 0; unsigned long flags; struct rxe_srq *srq = to_rsrq(ibsrq); spin_lock_irqsave(&srq->rq.producer_lock, flags); while (wr) { err = post_one_recv(&srq->rq, wr); if (unlikely(err)) break; wr = wr->next; } spin_unlock_irqrestore(&srq->rq.producer_lock, flags); if (err) *bad_wr = wr; return err; } static struct ib_qp *rxe_create_qp(struct ib_pd *ibpd, struct ib_qp_init_attr *init, struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_qp *qp; struct rxe_create_qp_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) return ERR_PTR(-EINVAL); uresp = udata->outbuf; } err = rxe_qp_chk_init(rxe, init); if (err) goto err1; qp = rxe_alloc(&rxe->qp_pool); if (!qp) { err = -ENOMEM; goto err1; } if (udata) { if (udata->inlen) { err = -EINVAL; goto err2; } qp->is_user = 1; } rxe_add_index(qp); err = rxe_qp_from_init(rxe, qp, pd, init, uresp, ibpd, udata); if (err) goto err3; return &qp->ibqp; err3: rxe_drop_index(qp); err2: rxe_drop_ref(qp); err1: return ERR_PTR(err); } static int rxe_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); err = rxe_qp_chk_attr(rxe, qp, attr, mask); if (err) goto err1; err = rxe_qp_from_attr(qp, attr, mask, udata); if (err) goto err1; return 0; err1: return err; } static int rxe_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr, int mask, struct ib_qp_init_attr *init) { struct rxe_qp *qp = to_rqp(ibqp); rxe_qp_to_init(qp, init); rxe_qp_to_attr(qp, attr, mask); return 0; } static int rxe_destroy_qp(struct ib_qp *ibqp, struct ib_udata *udata) { struct rxe_qp *qp = to_rqp(ibqp); rxe_qp_destroy(qp); rxe_drop_index(qp); rxe_drop_ref(qp); return 0; } static int validate_send_wr(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length) { int num_sge = ibwr->num_sge; struct rxe_sq *sq = &qp->sq; if (unlikely(num_sge > sq->max_sge)) goto err1; if (unlikely(mask & WR_ATOMIC_MASK)) { if (length < 8) goto err1; if (atomic_wr(ibwr)->remote_addr & 0x7) goto err1; } if (unlikely((ibwr->send_flags & IB_SEND_INLINE) && (length > sq->max_inline))) goto err1; return 0; err1: return -EINVAL; } static void init_send_wr(struct rxe_qp *qp, struct rxe_send_wr *wr, const struct ib_send_wr *ibwr) { wr->wr_id = ibwr->wr_id; wr->num_sge = ibwr->num_sge; wr->opcode = ibwr->opcode; wr->send_flags = ibwr->send_flags; if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) { wr->wr.ud.remote_qpn = ud_wr(ibwr)->remote_qpn; wr->wr.ud.remote_qkey = ud_wr(ibwr)->remote_qkey; if (qp_type(qp) == IB_QPT_GSI) wr->wr.ud.pkey_index = ud_wr(ibwr)->pkey_index; if (wr->opcode == IB_WR_SEND_WITH_IMM) wr->ex.imm_data = ibwr->ex.imm_data; } else { switch (wr->opcode) { case IB_WR_RDMA_WRITE_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; /* fall through */ case IB_WR_RDMA_READ: case IB_WR_RDMA_WRITE: wr->wr.rdma.remote_addr = rdma_wr(ibwr)->remote_addr; wr->wr.rdma.rkey = rdma_wr(ibwr)->rkey; break; case IB_WR_SEND_WITH_IMM: wr->ex.imm_data = ibwr->ex.imm_data; break; case IB_WR_SEND_WITH_INV: wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; break; case IB_WR_ATOMIC_CMP_AND_SWP: case IB_WR_ATOMIC_FETCH_AND_ADD: wr->wr.atomic.remote_addr = atomic_wr(ibwr)->remote_addr; wr->wr.atomic.compare_add = atomic_wr(ibwr)->compare_add; wr->wr.atomic.swap = atomic_wr(ibwr)->swap; wr->wr.atomic.rkey = atomic_wr(ibwr)->rkey; break; case IB_WR_LOCAL_INV: wr->ex.invalidate_rkey = ibwr->ex.invalidate_rkey; break; case IB_WR_REG_MR: wr->wr.reg.mr = reg_wr(ibwr)->mr; wr->wr.reg.key = reg_wr(ibwr)->key; wr->wr.reg.access = reg_wr(ibwr)->access; break; default: break; } } } static int init_send_wqe(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, unsigned int length, struct rxe_send_wqe *wqe) { int num_sge = ibwr->num_sge; struct ib_sge *sge; int i; u8 *p; init_send_wr(qp, &wqe->wr, ibwr); if (qp_type(qp) == IB_QPT_UD || qp_type(qp) == IB_QPT_SMI || qp_type(qp) == IB_QPT_GSI) memcpy(&wqe->av, &to_rah(ud_wr(ibwr)->ah)->av, sizeof(wqe->av)); if (unlikely(ibwr->send_flags & IB_SEND_INLINE)) { p = wqe->dma.inline_data; sge = ibwr->sg_list; for (i = 0; i < num_sge; i++, sge++) { memcpy(p, (void *)(uintptr_t)sge->addr, sge->length); p += sge->length; } } else if (mask & WR_REG_MASK) { wqe->mask = mask; wqe->state = wqe_state_posted; return 0; } else memcpy(wqe->dma.sge, ibwr->sg_list, num_sge * sizeof(struct ib_sge)); wqe->iova = mask & WR_ATOMIC_MASK ? atomic_wr(ibwr)->remote_addr : mask & WR_READ_OR_WRITE_MASK ? rdma_wr(ibwr)->remote_addr : 0; wqe->mask = mask; wqe->dma.length = length; wqe->dma.resid = length; wqe->dma.num_sge = num_sge; wqe->dma.cur_sge = 0; wqe->dma.sge_offset = 0; wqe->state = wqe_state_posted; wqe->ssn = atomic_add_return(1, &qp->ssn); return 0; } static int post_one_send(struct rxe_qp *qp, const struct ib_send_wr *ibwr, unsigned int mask, u32 length) { int err; struct rxe_sq *sq = &qp->sq; struct rxe_send_wqe *send_wqe; unsigned long flags; err = validate_send_wr(qp, ibwr, mask, length); if (err) return err; spin_lock_irqsave(&qp->sq.sq_lock, flags); if (unlikely(queue_full(sq->queue))) { err = -ENOMEM; goto err1; } send_wqe = producer_addr(sq->queue); err = init_send_wqe(qp, ibwr, mask, length, send_wqe); if (unlikely(err)) goto err1; /* * make sure all changes to the work queue are * written before we update the producer pointer */ smp_wmb(); advance_producer(sq->queue); spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return 0; err1: spin_unlock_irqrestore(&qp->sq.sq_lock, flags); return err; } static int rxe_post_send_kernel(struct rxe_qp *qp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { int err = 0; unsigned int mask; unsigned int length = 0; int i; struct ib_send_wr *next; while (wr) { mask = wr_opcode_mask(wr->opcode, qp); if (unlikely(!mask)) { err = -EINVAL; *bad_wr = wr; break; } if (unlikely((wr->send_flags & IB_SEND_INLINE) && !(mask & WR_INLINE_MASK))) { err = -EINVAL; *bad_wr = wr; break; } next = wr->next; length = 0; for (i = 0; i < wr->num_sge; i++) length += wr->sg_list[i].length; err = post_one_send(qp, wr, mask, length); if (err) { *bad_wr = wr; break; } wr = next; } rxe_run_task(&qp->req.task, 1); if (unlikely(qp->req.state == QP_STATE_ERROR)) rxe_run_task(&qp->comp.task, 1); return err; } static int rxe_post_send(struct ib_qp *ibqp, const struct ib_send_wr *wr, const struct ib_send_wr **bad_wr) { struct rxe_qp *qp = to_rqp(ibqp); if (unlikely(!qp->valid)) { *bad_wr = wr; return -EINVAL; } if (unlikely(qp->req.state < QP_STATE_READY)) { *bad_wr = wr; return -EINVAL; } if (qp->is_user) { /* Utilize process context to do protocol processing */ rxe_run_task(&qp->req.task, 0); return 0; } else return rxe_post_send_kernel(qp, wr, bad_wr); } static int rxe_post_recv(struct ib_qp *ibqp, const struct ib_recv_wr *wr, const struct ib_recv_wr **bad_wr) { int err = 0; struct rxe_qp *qp = to_rqp(ibqp); struct rxe_rq *rq = &qp->rq; unsigned long flags; if (unlikely((qp_state(qp) < IB_QPS_INIT) || !qp->valid)) { *bad_wr = wr; err = -EINVAL; goto err1; } if (unlikely(qp->srq)) { *bad_wr = wr; err = -EINVAL; goto err1; } spin_lock_irqsave(&rq->producer_lock, flags); while (wr) { err = post_one_recv(rq, wr); if (unlikely(err)) { *bad_wr = wr; break; } wr = wr->next; } spin_unlock_irqrestore(&rq->producer_lock, flags); if (qp->resp.state == QP_STATE_ERROR) rxe_run_task(&qp->resp.task, 1); err1: return err; } static int rxe_create_cq(struct ib_cq *ibcq, const struct ib_cq_init_attr *attr, struct ib_udata *udata) { int err; struct ib_device *dev = ibcq->device; struct rxe_dev *rxe = to_rdev(dev); struct rxe_cq *cq = to_rcq(ibcq); struct rxe_create_cq_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; } if (attr->flags) return -EINVAL; err = rxe_cq_chk_attr(rxe, NULL, attr->cqe, attr->comp_vector); if (err) return err; err = rxe_cq_from_init(rxe, cq, attr->cqe, attr->comp_vector, udata, uresp); if (err) return err; return rxe_add_to_pool(&rxe->cq_pool, &cq->pelem); } static void rxe_destroy_cq(struct ib_cq *ibcq, struct ib_udata *udata) { struct rxe_cq *cq = to_rcq(ibcq); rxe_cq_disable(cq); rxe_drop_ref(cq); } static int rxe_resize_cq(struct ib_cq *ibcq, int cqe, struct ib_udata *udata) { int err; struct rxe_cq *cq = to_rcq(ibcq); struct rxe_dev *rxe = to_rdev(ibcq->device); struct rxe_resize_cq_resp __user *uresp = NULL; if (udata) { if (udata->outlen < sizeof(*uresp)) return -EINVAL; uresp = udata->outbuf; } err = rxe_cq_chk_attr(rxe, cq, cqe, 0); if (err) goto err1; err = rxe_cq_resize_queue(cq, cqe, uresp, udata); if (err) goto err1; return 0; err1: return err; } static int rxe_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc) { int i; struct rxe_cq *cq = to_rcq(ibcq); struct rxe_cqe *cqe; unsigned long flags; spin_lock_irqsave(&cq->cq_lock, flags); for (i = 0; i < num_entries; i++) { cqe = queue_head(cq->queue); if (!cqe) break; memcpy(wc++, &cqe->ibwc, sizeof(*wc)); advance_consumer(cq->queue); } spin_unlock_irqrestore(&cq->cq_lock, flags); return i; } static int rxe_peek_cq(struct ib_cq *ibcq, int wc_cnt) { struct rxe_cq *cq = to_rcq(ibcq); int count = queue_count(cq->queue); return (count > wc_cnt) ? wc_cnt : count; } static int rxe_req_notify_cq(struct ib_cq *ibcq, enum ib_cq_notify_flags flags) { struct rxe_cq *cq = to_rcq(ibcq); unsigned long irq_flags; int ret = 0; spin_lock_irqsave(&cq->cq_lock, irq_flags); if (cq->notify != IB_CQ_NEXT_COMP) cq->notify = flags & IB_CQ_SOLICITED_MASK; if ((flags & IB_CQ_REPORT_MISSED_EVENTS) && !queue_empty(cq->queue)) ret = 1; spin_unlock_irqrestore(&cq->cq_lock, irq_flags); return ret; } static struct ib_mr *rxe_get_dma_mr(struct ib_pd *ibpd, int access) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mem *mr; mr = rxe_alloc(&rxe->mr_pool); if (!mr) return ERR_PTR(-ENOMEM); rxe_add_index(mr); rxe_add_ref(pd); rxe_mem_init_dma(pd, access, mr); return &mr->ibmr; } static struct ib_mr *rxe_reg_user_mr(struct ib_pd *ibpd, u64 start, u64 length, u64 iova, int access, struct ib_udata *udata) { int err; struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mem *mr; mr = rxe_alloc(&rxe->mr_pool); if (!mr) { err = -ENOMEM; goto err2; } rxe_add_index(mr); rxe_add_ref(pd); err = rxe_mem_init_user(pd, start, length, iova, access, udata, mr); if (err) goto err3; return &mr->ibmr; err3: rxe_drop_ref(pd); rxe_drop_index(mr); rxe_drop_ref(mr); err2: return ERR_PTR(err); } static int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) { struct rxe_mem *mr = to_rmr(ibmr); mr->state = RXE_MEM_STATE_ZOMBIE; rxe_drop_ref(mr->pd); rxe_drop_index(mr); rxe_drop_ref(mr); return 0; } static struct ib_mr *rxe_alloc_mr(struct ib_pd *ibpd, enum ib_mr_type mr_type, u32 max_num_sg) { struct rxe_dev *rxe = to_rdev(ibpd->device); struct rxe_pd *pd = to_rpd(ibpd); struct rxe_mem *mr; int err; if (mr_type != IB_MR_TYPE_MEM_REG) return ERR_PTR(-EINVAL); mr = rxe_alloc(&rxe->mr_pool); if (!mr) { err = -ENOMEM; goto err1; } rxe_add_index(mr); rxe_add_ref(pd); err = rxe_mem_init_fast(pd, max_num_sg, mr); if (err) goto err2; return &mr->ibmr; err2: rxe_drop_ref(pd); rxe_drop_index(mr); rxe_drop_ref(mr); err1: return ERR_PTR(err); } static int rxe_set_page(struct ib_mr *ibmr, u64 addr) { struct rxe_mem *mr = to_rmr(ibmr); struct rxe_map *map; struct rxe_phys_buf *buf; if (unlikely(mr->nbuf == mr->num_buf)) return -ENOMEM; map = mr->map[mr->nbuf / RXE_BUF_PER_MAP]; buf = &map->buf[mr->nbuf % RXE_BUF_PER_MAP]; buf->addr = addr; buf->size = ibmr->page_size; mr->nbuf++; return 0; } static int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, unsigned int *sg_offset) { struct rxe_mem *mr = to_rmr(ibmr); int n; mr->nbuf = 0; n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, rxe_set_page); mr->va = ibmr->iova; mr->iova = ibmr->iova; mr->length = ibmr->length; mr->page_shift = ilog2(ibmr->page_size); mr->page_mask = ibmr->page_size - 1; mr->offset = mr->iova & mr->page_mask; return n; } static int rxe_attach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { int err; struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); struct rxe_mc_grp *grp; /* takes a ref on grp if successful */ err = rxe_mcast_get_grp(rxe, mgid, &grp); if (err) return err; err = rxe_mcast_add_grp_elem(rxe, qp, grp); rxe_drop_ref(grp); return err; } static int rxe_detach_mcast(struct ib_qp *ibqp, union ib_gid *mgid, u16 mlid) { struct rxe_dev *rxe = to_rdev(ibqp->device); struct rxe_qp *qp = to_rqp(ibqp); return rxe_mcast_drop_grp_elem(rxe, qp, mgid); } static ssize_t parent_show(struct device *device, struct device_attribute *attr, char *buf) { struct rxe_dev *rxe = rdma_device_to_drv_device(device, struct rxe_dev, ib_dev); return snprintf(buf, 16, "%s\n", rxe_parent_name(rxe, 1)); } static DEVICE_ATTR_RO(parent); static struct attribute *rxe_dev_attributes[] = { &dev_attr_parent.attr, NULL }; static const struct attribute_group rxe_attr_group = { .attrs = rxe_dev_attributes, }; static int rxe_enable_driver(struct ib_device *ib_dev) { struct rxe_dev *rxe = container_of(ib_dev, struct rxe_dev, ib_dev); rxe_set_port_state(rxe); dev_info(&rxe->ib_dev.dev, "added %s\n", netdev_name(rxe->ndev)); return 0; } static const struct ib_device_ops rxe_dev_ops = { .owner = THIS_MODULE, .driver_id = RDMA_DRIVER_RXE, .uverbs_abi_ver = RXE_UVERBS_ABI_VERSION, .alloc_hw_stats = rxe_ib_alloc_hw_stats, .alloc_mr = rxe_alloc_mr, .alloc_pd = rxe_alloc_pd, .alloc_ucontext = rxe_alloc_ucontext, .attach_mcast = rxe_attach_mcast, .create_ah = rxe_create_ah, .create_cq = rxe_create_cq, .create_qp = rxe_create_qp, .create_srq = rxe_create_srq, .dealloc_driver = rxe_dealloc, .dealloc_pd = rxe_dealloc_pd, .dealloc_ucontext = rxe_dealloc_ucontext, .dereg_mr = rxe_dereg_mr, .destroy_ah = rxe_destroy_ah, .destroy_cq = rxe_destroy_cq, .destroy_qp = rxe_destroy_qp, .destroy_srq = rxe_destroy_srq, .detach_mcast = rxe_detach_mcast, .enable_driver = rxe_enable_driver, .get_dma_mr = rxe_get_dma_mr, .get_hw_stats = rxe_ib_get_hw_stats, .get_link_layer = rxe_get_link_layer, .get_port_immutable = rxe_port_immutable, .map_mr_sg = rxe_map_mr_sg, .mmap = rxe_mmap, .modify_ah = rxe_modify_ah, .modify_device = rxe_modify_device, .modify_port = rxe_modify_port, .modify_qp = rxe_modify_qp, .modify_srq = rxe_modify_srq, .peek_cq = rxe_peek_cq, .poll_cq = rxe_poll_cq, .post_recv = rxe_post_recv, .post_send = rxe_post_send, .post_srq_recv = rxe_post_srq_recv, .query_ah = rxe_query_ah, .query_device = rxe_query_device, .query_pkey = rxe_query_pkey, .query_port = rxe_query_port, .query_qp = rxe_query_qp, .query_srq = rxe_query_srq, .reg_user_mr = rxe_reg_user_mr, .req_notify_cq = rxe_req_notify_cq, .resize_cq = rxe_resize_cq, INIT_RDMA_OBJ_SIZE(ib_ah, rxe_ah, ibah), INIT_RDMA_OBJ_SIZE(ib_cq, rxe_cq, ibcq), INIT_RDMA_OBJ_SIZE(ib_pd, rxe_pd, ibpd), INIT_RDMA_OBJ_SIZE(ib_srq, rxe_srq, ibsrq), INIT_RDMA_OBJ_SIZE(ib_ucontext, rxe_ucontext, ibuc), }; int rxe_register_device(struct rxe_dev *rxe, const char *ibdev_name) { int err; struct ib_device *dev = &rxe->ib_dev; struct crypto_shash *tfm; strlcpy(dev->node_desc, "rxe", sizeof(dev->node_desc)); dev->node_type = RDMA_NODE_IB_CA; dev->phys_port_cnt = 1; dev->num_comp_vectors = num_possible_cpus(); dev->dev.parent = rxe_dma_device(rxe); dev->local_dma_lkey = 0; addrconf_addr_eui48((unsigned char *)&dev->node_guid, rxe->ndev->dev_addr); dev->dev.dma_ops = &dma_virt_ops; dev->dev.dma_parms = &rxe->dma_parms; rxe->dma_parms = (struct device_dma_parameters) { .max_segment_size = SZ_2G }; dma_coerce_mask_and_coherent(&dev->dev, dma_get_required_mask(&dev->dev)); dev->uverbs_cmd_mask = BIT_ULL(IB_USER_VERBS_CMD_GET_CONTEXT) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_COMP_CHANNEL) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_DEVICE) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_PORT) | BIT_ULL(IB_USER_VERBS_CMD_ALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_DEALLOC_PD) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_SRQ) | BIT_ULL(IB_USER_VERBS_CMD_POST_SRQ_RECV) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_QP) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_QP) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_QP) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_QP) | BIT_ULL(IB_USER_VERBS_CMD_POST_SEND) | BIT_ULL(IB_USER_VERBS_CMD_POST_RECV) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_RESIZE_CQ) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_POLL_CQ) | BIT_ULL(IB_USER_VERBS_CMD_PEEK_CQ) | BIT_ULL(IB_USER_VERBS_CMD_REQ_NOTIFY_CQ) | BIT_ULL(IB_USER_VERBS_CMD_REG_MR) | BIT_ULL(IB_USER_VERBS_CMD_DEREG_MR) | BIT_ULL(IB_USER_VERBS_CMD_CREATE_AH) | BIT_ULL(IB_USER_VERBS_CMD_MODIFY_AH) | BIT_ULL(IB_USER_VERBS_CMD_QUERY_AH) | BIT_ULL(IB_USER_VERBS_CMD_DESTROY_AH) | BIT_ULL(IB_USER_VERBS_CMD_ATTACH_MCAST) | BIT_ULL(IB_USER_VERBS_CMD_DETACH_MCAST) ; ib_set_device_ops(dev, &rxe_dev_ops); err = ib_device_set_netdev(&rxe->ib_dev, rxe->ndev, 1); if (err) return err; tfm = crypto_alloc_shash("crc32", 0, 0); if (IS_ERR(tfm)) { pr_err("failed to allocate crc algorithm err:%ld\n", PTR_ERR(tfm)); return PTR_ERR(tfm); } rxe->tfm = tfm; rdma_set_device_sysfs_group(dev, &rxe_attr_group); err = ib_register_device(dev, ibdev_name); if (err) pr_warn("%s failed with error %d\n", __func__, err); /* * Note that rxe may be invalid at this point if another thread * unregistered it. */ return err; }