xref: /openbmc/qemu/hw/rdma/vmw/pvrdma_qp_ops.c (revision 073d9f2c)
1 /*
2  * QEMU paravirtual RDMA - QP implementation
3  *
4  * Copyright (C) 2018 Oracle
5  * Copyright (C) 2018 Red Hat Inc
6  *
7  * Authors:
8  *     Yuval Shaia <yuval.shaia@oracle.com>
9  *     Marcel Apfelbaum <marcel@redhat.com>
10  *
11  * This work is licensed under the terms of the GNU GPL, version 2 or later.
12  * See the COPYING file in the top-level directory.
13  *
14  */
15 
16 #include "qemu/osdep.h"
17 
18 #include "../rdma_utils.h"
19 #include "../rdma_rm.h"
20 #include "../rdma_backend.h"
21 
22 #include "pvrdma.h"
23 #include "standard-headers/rdma/vmw_pvrdma-abi.h"
24 #include "pvrdma_qp_ops.h"
25 
26 typedef struct CompHandlerCtx {
27     PVRDMADev *dev;
28     uint32_t cq_handle;
29     struct pvrdma_cqe cqe;
30 } CompHandlerCtx;
31 
32 /* Send Queue WQE */
33 typedef struct PvrdmaSqWqe {
34     struct pvrdma_sq_wqe_hdr hdr;
35     struct pvrdma_sge sge[0];
36 } PvrdmaSqWqe;
37 
38 /* Recv Queue WQE */
39 typedef struct PvrdmaRqWqe {
40     struct pvrdma_rq_wqe_hdr hdr;
41     struct pvrdma_sge sge[0];
42 } PvrdmaRqWqe;
43 
44 /*
45  * 1. Put CQE on send CQ ring
46  * 2. Put CQ number on dsr completion ring
47  * 3. Interrupt host
48  */
49 static int pvrdma_post_cqe(PVRDMADev *dev, uint32_t cq_handle,
50                            struct pvrdma_cqe *cqe, struct ibv_wc *wc)
51 {
52     struct pvrdma_cqe *cqe1;
53     struct pvrdma_cqne *cqne;
54     PvrdmaRing *ring;
55     RdmaRmCQ *cq = rdma_rm_get_cq(&dev->rdma_dev_res, cq_handle);
56 
57     if (unlikely(!cq)) {
58         pr_dbg("Invalid cqn %d\n", cq_handle);
59         return -EINVAL;
60     }
61 
62     ring = (PvrdmaRing *)cq->opaque;
63     pr_dbg("ring=%p\n", ring);
64 
65     /* Step #1: Put CQE on CQ ring */
66     pr_dbg("Writing CQE\n");
67     cqe1 = pvrdma_ring_next_elem_write(ring);
68     if (unlikely(!cqe1)) {
69         pr_dbg("No CQEs in ring\n");
70         return -EINVAL;
71     }
72 
73     memset(cqe1, 0, sizeof(*cqe1));
74     cqe1->wr_id = cqe->wr_id;
75     cqe1->qp = cqe->qp;
76     cqe1->opcode = cqe->opcode;
77     cqe1->status = wc->status;
78     cqe1->byte_len = wc->byte_len;
79     cqe1->src_qp = wc->src_qp;
80     cqe1->wc_flags = wc->wc_flags;
81     cqe1->vendor_err = wc->vendor_err;
82 
83     pr_dbg("wr_id=%" PRIx64 "\n", cqe1->wr_id);
84     pr_dbg("qp=0x%lx\n", cqe1->qp);
85     pr_dbg("opcode=%d\n", cqe1->opcode);
86     pr_dbg("status=%d\n", cqe1->status);
87     pr_dbg("byte_len=%d\n", cqe1->byte_len);
88     pr_dbg("src_qp=%d\n", cqe1->src_qp);
89     pr_dbg("wc_flags=%d\n", cqe1->wc_flags);
90     pr_dbg("vendor_err=%d\n", cqe1->vendor_err);
91 
92     pvrdma_ring_write_inc(ring);
93 
94     /* Step #2: Put CQ number on dsr completion ring */
95     pr_dbg("Writing CQNE\n");
96     cqne = pvrdma_ring_next_elem_write(&dev->dsr_info.cq);
97     if (unlikely(!cqne)) {
98         return -EINVAL;
99     }
100 
101     cqne->info = cq_handle;
102     pvrdma_ring_write_inc(&dev->dsr_info.cq);
103 
104     pr_dbg("cq->notify=%d\n", cq->notify);
105     if (cq->notify != CNT_CLEAR) {
106         if (cq->notify == CNT_ARM) {
107             cq->notify = CNT_CLEAR;
108         }
109         post_interrupt(dev, INTR_VEC_CMD_COMPLETION_Q);
110     }
111 
112     return 0;
113 }
114 
115 static void pvrdma_qp_ops_comp_handler(void *ctx, struct ibv_wc *wc)
116 {
117     CompHandlerCtx *comp_ctx = (CompHandlerCtx *)ctx;
118 
119     pvrdma_post_cqe(comp_ctx->dev, comp_ctx->cq_handle, &comp_ctx->cqe, wc);
120 
121     g_free(ctx);
122 }
123 
124 static void complete_with_error(uint32_t vendor_err, void *ctx)
125 {
126     struct ibv_wc wc = {0};
127 
128     wc.status = IBV_WC_GENERAL_ERR;
129     wc.vendor_err = vendor_err;
130 
131     pvrdma_qp_ops_comp_handler(ctx, &wc);
132 }
133 
134 void pvrdma_qp_ops_fini(void)
135 {
136     rdma_backend_unregister_comp_handler();
137 }
138 
139 int pvrdma_qp_ops_init(void)
140 {
141     rdma_backend_register_comp_handler(pvrdma_qp_ops_comp_handler);
142 
143     return 0;
144 }
145 
146 void pvrdma_qp_send(PVRDMADev *dev, uint32_t qp_handle)
147 {
148     RdmaRmQP *qp;
149     PvrdmaSqWqe *wqe;
150     PvrdmaRing *ring;
151     int sgid_idx;
152     union ibv_gid *sgid;
153 
154     pr_dbg("qp_handle=0x%x\n", qp_handle);
155 
156     qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
157     if (unlikely(!qp)) {
158         pr_dbg("Invalid qpn\n");
159         return;
160     }
161 
162     ring = (PvrdmaRing *)qp->opaque;
163     pr_dbg("sring=%p\n", ring);
164 
165     wqe = (struct PvrdmaSqWqe *)pvrdma_ring_next_elem_read(ring);
166     while (wqe) {
167         CompHandlerCtx *comp_ctx;
168 
169         pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id);
170 
171         /* Prepare CQE */
172         comp_ctx = g_malloc(sizeof(CompHandlerCtx));
173         comp_ctx->dev = dev;
174         comp_ctx->cq_handle = qp->send_cq_handle;
175         comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
176         comp_ctx->cqe.qp = qp_handle;
177         comp_ctx->cqe.opcode = IBV_WC_SEND;
178 
179         sgid = rdma_rm_get_gid(&dev->rdma_dev_res, wqe->hdr.wr.ud.av.gid_index);
180         if (!sgid) {
181             pr_dbg("Fail to get gid for idx %d\n", wqe->hdr.wr.ud.av.gid_index);
182             complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx);
183             continue;
184         }
185         pr_dbg("sgid_id=%d, sgid=0x%llx\n", wqe->hdr.wr.ud.av.gid_index,
186                sgid->global.interface_id);
187 
188         sgid_idx = rdma_rm_get_backend_gid_index(&dev->rdma_dev_res,
189                                                  &dev->backend_dev,
190                                                  wqe->hdr.wr.ud.av.gid_index);
191         if (sgid_idx <= 0) {
192             pr_dbg("Fail to get bk sgid_idx for sgid_idx %d\n",
193                    wqe->hdr.wr.ud.av.gid_index);
194             complete_with_error(VENDOR_ERR_INV_GID_IDX, comp_ctx);
195             continue;
196         }
197 
198         if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
199             pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge,
200                    dev->dev_attr.max_sge);
201             complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
202             continue;
203         }
204 
205         rdma_backend_post_send(&dev->backend_dev, &qp->backend_qp, qp->qp_type,
206                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
207                                sgid_idx, sgid,
208                                (union ibv_gid *)wqe->hdr.wr.ud.av.dgid,
209                                wqe->hdr.wr.ud.remote_qpn,
210                                wqe->hdr.wr.ud.remote_qkey, comp_ctx);
211 
212         pvrdma_ring_read_inc(ring);
213 
214         wqe = pvrdma_ring_next_elem_read(ring);
215     }
216 }
217 
218 void pvrdma_qp_recv(PVRDMADev *dev, uint32_t qp_handle)
219 {
220     RdmaRmQP *qp;
221     PvrdmaRqWqe *wqe;
222     PvrdmaRing *ring;
223 
224     pr_dbg("qp_handle=0x%x\n", qp_handle);
225 
226     qp = rdma_rm_get_qp(&dev->rdma_dev_res, qp_handle);
227     if (unlikely(!qp)) {
228         pr_dbg("Invalid qpn\n");
229         return;
230     }
231 
232     ring = &((PvrdmaRing *)qp->opaque)[1];
233     pr_dbg("rring=%p\n", ring);
234 
235     wqe = (struct PvrdmaRqWqe *)pvrdma_ring_next_elem_read(ring);
236     while (wqe) {
237         CompHandlerCtx *comp_ctx;
238 
239         pr_dbg("wr_id=%" PRIx64 "\n", wqe->hdr.wr_id);
240 
241         /* Prepare CQE */
242         comp_ctx = g_malloc(sizeof(CompHandlerCtx));
243         comp_ctx->dev = dev;
244         comp_ctx->cq_handle = qp->recv_cq_handle;
245         comp_ctx->cqe.wr_id = wqe->hdr.wr_id;
246         comp_ctx->cqe.qp = qp_handle;
247         comp_ctx->cqe.opcode = IBV_WC_RECV;
248 
249         if (wqe->hdr.num_sge > dev->dev_attr.max_sge) {
250             pr_dbg("Invalid num_sge=%d (max %d)\n", wqe->hdr.num_sge,
251                    dev->dev_attr.max_sge);
252             complete_with_error(VENDOR_ERR_INV_NUM_SGE, comp_ctx);
253             continue;
254         }
255 
256         rdma_backend_post_recv(&dev->backend_dev, &dev->rdma_dev_res,
257                                &qp->backend_qp, qp->qp_type,
258                                (struct ibv_sge *)&wqe->sge[0], wqe->hdr.num_sge,
259                                comp_ctx);
260 
261         pvrdma_ring_read_inc(ring);
262 
263         wqe = pvrdma_ring_next_elem_read(ring);
264     }
265 }
266 
267 void pvrdma_cq_poll(RdmaDeviceResources *dev_res, uint32_t cq_handle)
268 {
269     RdmaRmCQ *cq;
270 
271     cq = rdma_rm_get_cq(dev_res, cq_handle);
272     if (!cq) {
273         pr_dbg("Invalid CQ# %d\n", cq_handle);
274         return;
275     }
276 
277     rdma_backend_poll_cq(dev_res, &cq->backend_cq);
278 }
279