1 /* 2 * QEMU paravirtual RDMA - QP implementation 3 * 4 * Copyright (C) 2018 Oracle 5 * Copyright (C) 2018 Red Hat Inc 6 * 7 * Authors: 8 * Yuval Shaia <yuval.shaia@oracle.com> 9 * Marcel Apfelbaum <marcel@redhat.com> 10 * 11 * This work is licensed under the terms of the GNU GPL, version 2 or later. 12 * See the COPYING file in the top-level directory. 13 * 14 */ 15 16 #include "qemu/osdep.h" 17 18 #include "../rdma_utils.h" 19 #include "../rdma_rm.h" 20 #include "../rdma_backend.h" 21 22 #include "pvrdma.h" 23 #include "standard-headers/rdma/vmw_pvrdma-abi.h" 24 #include "pvrdma_qp_ops.h" 25 26 typedef struct CompHandlerCtx { 27 PVRDMADev *dev; 28 uint32_t cq_handle; 29 struct pvrdma_cqe cqe; 30 } CompHandlerCtx; 31 32 /* Send Queue WQE */ 33 typedef struct PvrdmaSqWqe { 34 struct pvrdma_sq_wqe_hdr hdr; 35 struct pvrdma_sge sge[0]; 36 } PvrdmaSqWqe; 37 38 /* Recv Queue WQE */ 39 typedef struct PvrdmaRqWqe { 40 struct pvrdma_rq_wqe_hdr hdr; 41 struct pvrdma_sge sge[0]; 42 } PvrdmaRqWqe; 43 44 /* 45 * 1. Put CQE on send CQ ring 46 * 2. Put CQ number on dsr completion ring 47 * 3. Interrupt host 48 */ 49 static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle, 50 struct pvrdma_cqe *cqe, struct ibv_wc *wc) 51 { 52 struct pvrdma_cqe *cqe1; 53 struct pvrdma_cqne *cqne; 54 PvrdmaRing *ring; 55 RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle); 56 57 if (unlikely(!cq)) { 58 pr_dbg("Invalid cqn %d\n", cq_handle); 59 return -EINVAL; 60 } 61 62 ring = (PvrdmaRing *)cq->opaque; 63 pr_dbg("ring=%p\n", ring); 64 65 /* Step #1: Put CQE on CQ ring */ 66 pr_dbg("Writing CQE\n"); 67 cqe1 = pvrdma_ring_next_elem_write(ring); 68 if (unlikely(!cqe1)) { 69 pr_dbg("No CQEs in ring\n"); 70 return -EINVAL; 71 } 72 73 memset(cqe1, 0, sizeof(*cqe1)); 74 cqe1->wr_id = cqe->wr_id; 75 cqe1->qp = cqe->qp; 76 cqe1->opcode = cqe->opcode; 77 cqe1->status = wc->status; 78 cqe1->byte_len = wc->byte_len; 79 cqe1->src_qp = wc->src_qp; 80 cqe1->wc_flags = wc->wc_flags; 81 cqe1->vendor_err = wc->vendor_err; 82 83 pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id); 84 pr_dbg("qp=0x%lx\n", cqe1->qp); 85 pr_dbg("opcode=%d\n", cqe1->opcode); 86 pr_dbg("status=%d\n", cqe1->status); 87 pr_dbg("byte_len=%d\n", cqe1->byte_len); 88 pr_dbg("src_qp=%d\n", cqe1->src_qp); 89 pr_dbg("wc_flags=%d\n", cqe1->wc_flags); 90 pr_dbg("vendor_err=%d\n", cqe1->vendor_err); 91 92 pvrdma_ring_write_inc(ring); 93 94 /* Step #2: Put CQ number on dsr completion ring */ 95 pr_dbg("Writing CQNE\n"); 96 cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq); 97 if (unlikely(!cqne)) { 98 return -EINVAL; 99 } 100 101 cqne->info = cq_handle; 102 pvrdma_ring_write_inc(&dev->dsr_info.cq); 103 104 pr_dbg("cq->notify=%d\n", cq->notify); 105 if (cq->notify != CNT_CLEAR) { 106 if (cq->notify == CNT_ARM) { 107 cq->notify = CNT_CLEAR; 108 } 109 post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q); 110 } 111 112 return 0; 113 } 114 115 static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc) 116 { 117 CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx; 118 119 pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc); 120 121 g_free(ctx); 122 } 123 124 static void complete_with_error(uint32_t vendor_err, void *ctx) 125 { 126 struct ibv_wc wc = {0}; 127 128 wc.status = IBV_WC_GENERAL_ERR; 129 wc.vendor_err = vendor_err; 130 131 pvrdma_qp_ops_comp_handler(ctx, &wc); 132 } 133 134 void pvrdma_qp_ops_fini(void) 135 { 136 rdma_backend_unregister_comp_handler(); 137 } 138 139 int pvrdma_qp_ops_init(void) 140 { 141 rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler); 142 143 return 0; 144 } 145 146 void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle) 147 { 148 RdmaRmQP *qp; 149 PvrdmaSqWqe *wqe; 150 PvrdmaRing *ring; 151 int sgid_idx; 152 union ibv_gid *sgid; 153 154 pr_dbg("qp_handle=0x%x\n", qp_handle); 155 156 qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); 157 if (unlikely(!qp)) { 158 pr_dbg("Invalid qpn\n"); 159 return; 160 } 161 162 ring = (PvrdmaRing *)qp->opaque; 163 pr_dbg("sring=%p\n", ring); 164 165 wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring); 166 while (wqe) { 167 CompHandlerCtx *comp_ctx; 168 169 pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id); 170 171 /* Prepare CQE */ 172 comp_ctx = g_malloc(sizeof(CompHandlerCtx)); 173 comp_ctx->dev = dev; 174 comp_ctx->cq_handle = qp->send_cq_handle; 175 comp_ctx->cqe.wr_id = wqe->hdr.wr_id; 176 comp_ctx->cqe.qp = qp_handle; 177 comp_ctx->cqe.opcode = IBV_WC_SEND; 178 179 sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index); 180 if (!sgid) { 181 pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index); 182 complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); 183 continue; 184 } 185 pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index, 186 sgid->global.interface_id); 187 188 sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res, 189 &dev->backend_dev, 190 wqe->hdr.wr.ud.av.gid_index); 191 if (sgid_idx <= 0) { 192 pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n", 193 wqe->hdr.wr.ud.av.gid_index); 194 complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx); 195 continue; 196 } 197 198 if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { 199 pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge, 200 dev->dev_attr.max_sge); 201 complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); 202 continue; 203 } 204 205 rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type, 206 (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, 207 sgid_idx, sgid, 208 (union ibv_gid *)wqe->hdr.wr.ud.av.dgid, 209 wqe->hdr.wr.ud.remote_qpn, 210 wqe->hdr.wr.ud.remote_qkey, comp_ctx); 211 212 pvrdma_ring_read_inc(ring); 213 214 wqe = pvrdma_ring_next_elem_read(ring); 215 } 216 } 217 218 void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle) 219 { 220 RdmaRmQP *qp; 221 PvrdmaRqWqe *wqe; 222 PvrdmaRing *ring; 223 224 pr_dbg("qp_handle=0x%x\n", qp_handle); 225 226 qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle); 227 if (unlikely(!qp)) { 228 pr_dbg("Invalid qpn\n"); 229 return; 230 } 231 232 ring = &((PvrdmaRing *)qp->opaque)[1]; 233 pr_dbg("rring=%p\n", ring); 234 235 wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring); 236 while (wqe) { 237 CompHandlerCtx *comp_ctx; 238 239 pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id); 240 241 /* Prepare CQE */ 242 comp_ctx = g_malloc(sizeof(CompHandlerCtx)); 243 comp_ctx->dev = dev; 244 comp_ctx->cq_handle = qp->recv_cq_handle; 245 comp_ctx->cqe.wr_id = wqe->hdr.wr_id; 246 comp_ctx->cqe.qp = qp_handle; 247 comp_ctx->cqe.opcode = IBV_WC_RECV; 248 249 if (wqe->hdr.num_sge > dev->dev_attr.max_sge) { 250 pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge, 251 dev->dev_attr.max_sge); 252 complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx); 253 continue; 254 } 255 256 rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res, 257 &qp->backend_qp, qp->qp_type, 258 (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge, 259 comp_ctx); 260 261 pvrdma_ring_read_inc(ring); 262 263 wqe = pvrdma_ring_next_elem_read(ring); 264 } 265 } 266 267 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle) 268 { 269 RdmaRmCQ *cq; 270 271 cq = rdma_rm_get_cq(dev_res, cq_handle); 272 if (!cq) { 273 pr_dbg("Invalid CQ# %d\n", cq_handle); 274 return; 275 } 276 277 rdma_backend_poll_cq(dev_res, &cq->backend_cq); 278 } 279