1 /*
2 * QEMU paravirtual RDMA - Resource Manager Implementation
3 *
4 * Copyright (C) 2018 Oracle
5 * Copyright (C) 2018 Red Hat Inc
6 *
7 * Authors:
8 * Yuval Shaia <yuval.shaia@oracle.com>
9 * Marcel Apfelbaum <marcel@redhat.com>
10 *
11 * This work is licensed under the terms of the GNU GPL, version 2 or later.
12 * See the COPYING file in the top-level directory.
13 *
14 */
15
16 #include "qemu/osdep.h"
17 #include "qapi/error.h"
18 #include "cpu.h"
19 #include "monitor/monitor.h"
20
21 #include "trace.h"
22 #include "rdma_utils.h"
23 #include "rdma_backend.h"
24 #include "rdma_rm.h"
25
rdma_format_device_counters(RdmaDeviceResources * dev_res,GString * buf)26 void rdma_format_device_counters(RdmaDeviceResources *dev_res, GString *buf)
27 {
28 g_string_append_printf(buf, "\ttx : %" PRId64 "\n",
29 dev_res->stats.tx);
30 g_string_append_printf(buf, "\ttx_len : %" PRId64 "\n",
31 dev_res->stats.tx_len);
32 g_string_append_printf(buf, "\ttx_err : %" PRId64 "\n",
33 dev_res->stats.tx_err);
34 g_string_append_printf(buf, "\trx_bufs : %" PRId64 "\n",
35 dev_res->stats.rx_bufs);
36 g_string_append_printf(buf, "\trx_srq : %" PRId64 "\n",
37 dev_res->stats.rx_srq);
38 g_string_append_printf(buf, "\trx_bufs_len : %" PRId64 "\n",
39 dev_res->stats.rx_bufs_len);
40 g_string_append_printf(buf, "\trx_bufs_err : %" PRId64 "\n",
41 dev_res->stats.rx_bufs_err);
42 g_string_append_printf(buf, "\tcomps : %" PRId64 "\n",
43 dev_res->stats.completions);
44 g_string_append_printf(buf, "\tmissing_comps : %" PRId32 "\n",
45 dev_res->stats.missing_cqe);
46 g_string_append_printf(buf, "\tpoll_cq (bk) : %" PRId64 "\n",
47 dev_res->stats.poll_cq_from_bk);
48 g_string_append_printf(buf, "\tpoll_cq_ppoll_to : %" PRId64 "\n",
49 dev_res->stats.poll_cq_ppoll_to);
50 g_string_append_printf(buf, "\tpoll_cq (fe) : %" PRId64 "\n",
51 dev_res->stats.poll_cq_from_guest);
52 g_string_append_printf(buf, "\tpoll_cq_empty : %" PRId64 "\n",
53 dev_res->stats.poll_cq_from_guest_empty);
54 g_string_append_printf(buf, "\tmad_tx : %" PRId64 "\n",
55 dev_res->stats.mad_tx);
56 g_string_append_printf(buf, "\tmad_tx_err : %" PRId64 "\n",
57 dev_res->stats.mad_tx_err);
58 g_string_append_printf(buf, "\tmad_rx : %" PRId64 "\n",
59 dev_res->stats.mad_rx);
60 g_string_append_printf(buf, "\tmad_rx_err : %" PRId64 "\n",
61 dev_res->stats.mad_rx_err);
62 g_string_append_printf(buf, "\tmad_rx_bufs : %" PRId64 "\n",
63 dev_res->stats.mad_rx_bufs);
64 g_string_append_printf(buf, "\tmad_rx_bufs_err : %" PRId64 "\n",
65 dev_res->stats.mad_rx_bufs_err);
66 g_string_append_printf(buf, "\tPDs : %" PRId32 "\n",
67 dev_res->pd_tbl.used);
68 g_string_append_printf(buf, "\tMRs : %" PRId32 "\n",
69 dev_res->mr_tbl.used);
70 g_string_append_printf(buf, "\tUCs : %" PRId32 "\n",
71 dev_res->uc_tbl.used);
72 g_string_append_printf(buf, "\tQPs : %" PRId32 "\n",
73 dev_res->qp_tbl.used);
74 g_string_append_printf(buf, "\tCQs : %" PRId32 "\n",
75 dev_res->cq_tbl.used);
76 g_string_append_printf(buf, "\tCEQ_CTXs : %" PRId32 "\n",
77 dev_res->cqe_ctx_tbl.used);
78 }
79
res_tbl_init(const char * name,RdmaRmResTbl * tbl,uint32_t tbl_sz,uint32_t res_sz)80 static inline void res_tbl_init(const char *name, RdmaRmResTbl *tbl,
81 uint32_t tbl_sz, uint32_t res_sz)
82 {
83 tbl->tbl = g_malloc(tbl_sz * res_sz);
84
85 strncpy(tbl->name, name, MAX_RM_TBL_NAME);
86 tbl->name[MAX_RM_TBL_NAME - 1] = 0;
87
88 tbl->bitmap = bitmap_new(tbl_sz);
89 tbl->tbl_sz = tbl_sz;
90 tbl->res_sz = res_sz;
91 tbl->used = 0;
92 qemu_mutex_init(&tbl->lock);
93 }
94
res_tbl_free(RdmaRmResTbl * tbl)95 static inline void res_tbl_free(RdmaRmResTbl *tbl)
96 {
97 if (!tbl->bitmap) {
98 return;
99 }
100 qemu_mutex_destroy(&tbl->lock);
101 g_free(tbl->tbl);
102 g_free(tbl->bitmap);
103 }
104
rdma_res_tbl_get(RdmaRmResTbl * tbl,uint32_t handle)105 static inline void *rdma_res_tbl_get(RdmaRmResTbl *tbl, uint32_t handle)
106 {
107 trace_rdma_res_tbl_get(tbl->name, handle);
108
109 if ((handle < tbl->tbl_sz) && (test_bit(handle, tbl->bitmap))) {
110 return tbl->tbl + handle * tbl->res_sz;
111 } else {
112 rdma_error_report("Table %s, invalid handle %d", tbl->name, handle);
113 return NULL;
114 }
115 }
116
rdma_res_tbl_alloc(RdmaRmResTbl * tbl,uint32_t * handle)117 static inline void *rdma_res_tbl_alloc(RdmaRmResTbl *tbl, uint32_t *handle)
118 {
119 qemu_mutex_lock(&tbl->lock);
120
121 *handle = find_first_zero_bit(tbl->bitmap, tbl->tbl_sz);
122 if (*handle > tbl->tbl_sz) {
123 rdma_error_report("Table %s, failed to allocate, bitmap is full",
124 tbl->name);
125 qemu_mutex_unlock(&tbl->lock);
126 return NULL;
127 }
128
129 set_bit(*handle, tbl->bitmap);
130
131 tbl->used++;
132
133 qemu_mutex_unlock(&tbl->lock);
134
135 memset(tbl->tbl + *handle * tbl->res_sz, 0, tbl->res_sz);
136
137 trace_rdma_res_tbl_alloc(tbl->name, *handle);
138
139 return tbl->tbl + *handle * tbl->res_sz;
140 }
141
rdma_res_tbl_dealloc(RdmaRmResTbl * tbl,uint32_t handle)142 static inline void rdma_res_tbl_dealloc(RdmaRmResTbl *tbl, uint32_t handle)
143 {
144 trace_rdma_res_tbl_dealloc(tbl->name, handle);
145
146 QEMU_LOCK_GUARD(&tbl->lock);
147
148 if (handle < tbl->tbl_sz) {
149 clear_bit(handle, tbl->bitmap);
150 tbl->used--;
151 }
152
153 }
154
rdma_rm_alloc_pd(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t * pd_handle,uint32_t ctx_handle)155 int rdma_rm_alloc_pd(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
156 uint32_t *pd_handle, uint32_t ctx_handle)
157 {
158 RdmaRmPD *pd;
159 int ret = -ENOMEM;
160
161 pd = rdma_res_tbl_alloc(&dev_res->pd_tbl, pd_handle);
162 if (!pd) {
163 goto out;
164 }
165
166 ret = rdma_backend_create_pd(backend_dev, &pd->backend_pd);
167 if (ret) {
168 ret = -EIO;
169 goto out_tbl_dealloc;
170 }
171
172 pd->ctx_handle = ctx_handle;
173
174 return 0;
175
176 out_tbl_dealloc:
177 rdma_res_tbl_dealloc(&dev_res->pd_tbl, *pd_handle);
178
179 out:
180 return ret;
181 }
182
rdma_rm_get_pd(RdmaDeviceResources * dev_res,uint32_t pd_handle)183 RdmaRmPD *rdma_rm_get_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
184 {
185 return rdma_res_tbl_get(&dev_res->pd_tbl, pd_handle);
186 }
187
rdma_rm_dealloc_pd(RdmaDeviceResources * dev_res,uint32_t pd_handle)188 void rdma_rm_dealloc_pd(RdmaDeviceResources *dev_res, uint32_t pd_handle)
189 {
190 RdmaRmPD *pd = rdma_rm_get_pd(dev_res, pd_handle);
191
192 if (pd) {
193 rdma_backend_destroy_pd(&pd->backend_pd);
194 rdma_res_tbl_dealloc(&dev_res->pd_tbl, pd_handle);
195 }
196 }
197
rdma_rm_alloc_mr(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint64_t guest_start,uint64_t guest_length,void * host_virt,int access_flags,uint32_t * mr_handle,uint32_t * lkey,uint32_t * rkey)198 int rdma_rm_alloc_mr(RdmaDeviceResources *dev_res, uint32_t pd_handle,
199 uint64_t guest_start, uint64_t guest_length,
200 void *host_virt, int access_flags, uint32_t *mr_handle,
201 uint32_t *lkey, uint32_t *rkey)
202 {
203 RdmaRmMR *mr;
204 int ret = 0;
205 RdmaRmPD *pd;
206
207 pd = rdma_rm_get_pd(dev_res, pd_handle);
208 if (!pd) {
209 return -EINVAL;
210 }
211
212 mr = rdma_res_tbl_alloc(&dev_res->mr_tbl, mr_handle);
213 if (!mr) {
214 return -ENOMEM;
215 }
216 trace_rdma_rm_alloc_mr(*mr_handle, host_virt, guest_start, guest_length,
217 access_flags);
218
219 if (host_virt) {
220 mr->virt = host_virt;
221 mr->start = guest_start;
222 mr->length = guest_length;
223 mr->virt += (mr->start & (TARGET_PAGE_SIZE - 1));
224
225 ret = rdma_backend_create_mr(&mr->backend_mr, &pd->backend_pd, mr->virt,
226 mr->length, guest_start, access_flags);
227 if (ret) {
228 ret = -EIO;
229 goto out_dealloc_mr;
230 }
231 #ifdef LEGACY_RDMA_REG_MR
232 /* We keep mr_handle in lkey so send and recv get get mr ptr */
233 *lkey = *mr_handle;
234 #else
235 *lkey = rdma_backend_mr_lkey(&mr->backend_mr);
236 #endif
237 }
238
239 *rkey = -1;
240
241 mr->pd_handle = pd_handle;
242
243 return 0;
244
245 out_dealloc_mr:
246 rdma_res_tbl_dealloc(&dev_res->mr_tbl, *mr_handle);
247
248 return ret;
249 }
250
rdma_rm_get_mr(RdmaDeviceResources * dev_res,uint32_t mr_handle)251 RdmaRmMR *rdma_rm_get_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
252 {
253 return rdma_res_tbl_get(&dev_res->mr_tbl, mr_handle);
254 }
255
rdma_rm_dealloc_mr(RdmaDeviceResources * dev_res,uint32_t mr_handle)256 void rdma_rm_dealloc_mr(RdmaDeviceResources *dev_res, uint32_t mr_handle)
257 {
258 RdmaRmMR *mr = rdma_rm_get_mr(dev_res, mr_handle);
259
260 if (mr) {
261 rdma_backend_destroy_mr(&mr->backend_mr);
262 trace_rdma_rm_dealloc_mr(mr_handle, mr->start);
263 if (mr->start) {
264 mr->virt -= (mr->start & (TARGET_PAGE_SIZE - 1));
265 munmap(mr->virt, mr->length);
266 }
267 rdma_res_tbl_dealloc(&dev_res->mr_tbl, mr_handle);
268 }
269 }
270
rdma_rm_alloc_uc(RdmaDeviceResources * dev_res,uint32_t pfn,uint32_t * uc_handle)271 int rdma_rm_alloc_uc(RdmaDeviceResources *dev_res, uint32_t pfn,
272 uint32_t *uc_handle)
273 {
274 RdmaRmUC *uc;
275
276 /* TODO: Need to make sure pfn is between bar start address and
277 * bsd+RDMA_BAR2_UAR_SIZE
278 if (pfn > RDMA_BAR2_UAR_SIZE) {
279 rdma_error_report("pfn out of range (%d > %d)", pfn,
280 RDMA_BAR2_UAR_SIZE);
281 return -ENOMEM;
282 }
283 */
284
285 uc = rdma_res_tbl_alloc(&dev_res->uc_tbl, uc_handle);
286 if (!uc) {
287 return -ENOMEM;
288 }
289
290 return 0;
291 }
292
rdma_rm_get_uc(RdmaDeviceResources * dev_res,uint32_t uc_handle)293 RdmaRmUC *rdma_rm_get_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
294 {
295 return rdma_res_tbl_get(&dev_res->uc_tbl, uc_handle);
296 }
297
rdma_rm_dealloc_uc(RdmaDeviceResources * dev_res,uint32_t uc_handle)298 void rdma_rm_dealloc_uc(RdmaDeviceResources *dev_res, uint32_t uc_handle)
299 {
300 RdmaRmUC *uc = rdma_rm_get_uc(dev_res, uc_handle);
301
302 if (uc) {
303 rdma_res_tbl_dealloc(&dev_res->uc_tbl, uc_handle);
304 }
305 }
306
rdma_rm_get_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle)307 RdmaRmCQ *rdma_rm_get_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
308 {
309 return rdma_res_tbl_get(&dev_res->cq_tbl, cq_handle);
310 }
311
rdma_rm_alloc_cq(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t cqe,uint32_t * cq_handle,void * opaque)312 int rdma_rm_alloc_cq(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
313 uint32_t cqe, uint32_t *cq_handle, void *opaque)
314 {
315 int rc;
316 RdmaRmCQ *cq;
317
318 cq = rdma_res_tbl_alloc(&dev_res->cq_tbl, cq_handle);
319 if (!cq) {
320 return -ENOMEM;
321 }
322
323 cq->opaque = opaque;
324 cq->notify = CNT_CLEAR;
325
326 rc = rdma_backend_create_cq(backend_dev, &cq->backend_cq, cqe);
327 if (rc) {
328 rc = -EIO;
329 goto out_dealloc_cq;
330 }
331
332 return 0;
333
334 out_dealloc_cq:
335 rdma_rm_dealloc_cq(dev_res, *cq_handle);
336
337 return rc;
338 }
339
rdma_rm_req_notify_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle,bool notify)340 void rdma_rm_req_notify_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle,
341 bool notify)
342 {
343 RdmaRmCQ *cq;
344
345 cq = rdma_rm_get_cq(dev_res, cq_handle);
346 if (!cq) {
347 return;
348 }
349
350 if (cq->notify != CNT_SET) {
351 cq->notify = notify ? CNT_ARM : CNT_CLEAR;
352 }
353 }
354
rdma_rm_dealloc_cq(RdmaDeviceResources * dev_res,uint32_t cq_handle)355 void rdma_rm_dealloc_cq(RdmaDeviceResources *dev_res, uint32_t cq_handle)
356 {
357 RdmaRmCQ *cq;
358
359 cq = rdma_rm_get_cq(dev_res, cq_handle);
360 if (!cq) {
361 return;
362 }
363
364 rdma_backend_destroy_cq(&cq->backend_cq);
365
366 rdma_res_tbl_dealloc(&dev_res->cq_tbl, cq_handle);
367 }
368
rdma_rm_get_qp(RdmaDeviceResources * dev_res,uint32_t qpn)369 RdmaRmQP *rdma_rm_get_qp(RdmaDeviceResources *dev_res, uint32_t qpn)
370 {
371 GBytes *key = g_bytes_new(&qpn, sizeof(qpn));
372
373 RdmaRmQP *qp = g_hash_table_lookup(dev_res->qp_hash, key);
374
375 g_bytes_unref(key);
376
377 if (!qp) {
378 rdma_error_report("Invalid QP handle %d", qpn);
379 }
380
381 return qp;
382 }
383
rdma_rm_alloc_qp(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint8_t qp_type,uint32_t max_send_wr,uint32_t max_send_sge,uint32_t send_cq_handle,uint32_t max_recv_wr,uint32_t max_recv_sge,uint32_t recv_cq_handle,void * opaque,uint32_t * qpn,uint8_t is_srq,uint32_t srq_handle)384 int rdma_rm_alloc_qp(RdmaDeviceResources *dev_res, uint32_t pd_handle,
385 uint8_t qp_type, uint32_t max_send_wr,
386 uint32_t max_send_sge, uint32_t send_cq_handle,
387 uint32_t max_recv_wr, uint32_t max_recv_sge,
388 uint32_t recv_cq_handle, void *opaque, uint32_t *qpn,
389 uint8_t is_srq, uint32_t srq_handle)
390 {
391 int rc;
392 RdmaRmQP *qp;
393 RdmaRmCQ *scq, *rcq;
394 RdmaRmPD *pd;
395 RdmaRmSRQ *srq = NULL;
396 uint32_t rm_qpn;
397
398 pd = rdma_rm_get_pd(dev_res, pd_handle);
399 if (!pd) {
400 return -EINVAL;
401 }
402
403 scq = rdma_rm_get_cq(dev_res, send_cq_handle);
404 rcq = rdma_rm_get_cq(dev_res, recv_cq_handle);
405
406 if (!scq || !rcq) {
407 rdma_error_report("Invalid send_cqn or recv_cqn (%d, %d)",
408 send_cq_handle, recv_cq_handle);
409 return -EINVAL;
410 }
411
412 if (is_srq) {
413 srq = rdma_rm_get_srq(dev_res, srq_handle);
414 if (!srq) {
415 rdma_error_report("Invalid srqn %d", srq_handle);
416 return -EINVAL;
417 }
418
419 srq->recv_cq_handle = recv_cq_handle;
420 }
421
422 if (qp_type == IBV_QPT_GSI) {
423 scq->notify = CNT_SET;
424 rcq->notify = CNT_SET;
425 }
426
427 qp = rdma_res_tbl_alloc(&dev_res->qp_tbl, &rm_qpn);
428 if (!qp) {
429 return -ENOMEM;
430 }
431
432 qp->qpn = rm_qpn;
433 qp->qp_state = IBV_QPS_RESET;
434 qp->qp_type = qp_type;
435 qp->send_cq_handle = send_cq_handle;
436 qp->recv_cq_handle = recv_cq_handle;
437 qp->opaque = opaque;
438 qp->is_srq = is_srq;
439
440 rc = rdma_backend_create_qp(&qp->backend_qp, qp_type, &pd->backend_pd,
441 &scq->backend_cq, &rcq->backend_cq,
442 is_srq ? &srq->backend_srq : NULL,
443 max_send_wr, max_recv_wr, max_send_sge,
444 max_recv_sge);
445
446 if (rc) {
447 rc = -EIO;
448 goto out_dealloc_qp;
449 }
450
451 *qpn = rdma_backend_qpn(&qp->backend_qp);
452 trace_rdma_rm_alloc_qp(rm_qpn, *qpn, qp_type);
453 g_hash_table_insert(dev_res->qp_hash, g_bytes_new(qpn, sizeof(*qpn)), qp);
454
455 return 0;
456
457 out_dealloc_qp:
458 rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
459
460 return rc;
461 }
462
rdma_rm_modify_qp(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t qp_handle,uint32_t attr_mask,uint8_t sgid_idx,union ibv_gid * dgid,uint32_t dqpn,enum ibv_qp_state qp_state,uint32_t qkey,uint32_t rq_psn,uint32_t sq_psn)463 int rdma_rm_modify_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
464 uint32_t qp_handle, uint32_t attr_mask, uint8_t sgid_idx,
465 union ibv_gid *dgid, uint32_t dqpn,
466 enum ibv_qp_state qp_state, uint32_t qkey,
467 uint32_t rq_psn, uint32_t sq_psn)
468 {
469 RdmaRmQP *qp;
470 int ret;
471
472 qp = rdma_rm_get_qp(dev_res, qp_handle);
473 if (!qp) {
474 return -EINVAL;
475 }
476
477 if (qp->qp_type == IBV_QPT_SMI) {
478 rdma_error_report("Got QP0 request");
479 return -EPERM;
480 } else if (qp->qp_type == IBV_QPT_GSI) {
481 return 0;
482 }
483
484 trace_rdma_rm_modify_qp(qp_handle, attr_mask, qp_state, sgid_idx);
485
486 if (attr_mask & IBV_QP_STATE) {
487 qp->qp_state = qp_state;
488
489 if (qp->qp_state == IBV_QPS_INIT) {
490 ret = rdma_backend_qp_state_init(backend_dev, &qp->backend_qp,
491 qp->qp_type, qkey);
492 if (ret) {
493 return -EIO;
494 }
495 }
496
497 if (qp->qp_state == IBV_QPS_RTR) {
498 /* Get backend gid index */
499 sgid_idx = rdma_rm_get_backend_gid_index(dev_res, backend_dev,
500 sgid_idx);
501 if (sgid_idx <= 0) { /* TODO check also less than bk.max_sgid */
502 rdma_error_report("Failed to get bk sgid_idx for sgid_idx %d",
503 sgid_idx);
504 return -EIO;
505 }
506
507 ret = rdma_backend_qp_state_rtr(backend_dev, &qp->backend_qp,
508 qp->qp_type, sgid_idx, dgid, dqpn,
509 rq_psn, qkey,
510 attr_mask & IBV_QP_QKEY);
511 if (ret) {
512 return -EIO;
513 }
514 }
515
516 if (qp->qp_state == IBV_QPS_RTS) {
517 ret = rdma_backend_qp_state_rts(&qp->backend_qp, qp->qp_type,
518 sq_psn, qkey,
519 attr_mask & IBV_QP_QKEY);
520 if (ret) {
521 return -EIO;
522 }
523 }
524 }
525
526 return 0;
527 }
528
rdma_rm_query_qp(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,uint32_t qp_handle,struct ibv_qp_attr * attr,int attr_mask,struct ibv_qp_init_attr * init_attr)529 int rdma_rm_query_qp(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
530 uint32_t qp_handle, struct ibv_qp_attr *attr,
531 int attr_mask, struct ibv_qp_init_attr *init_attr)
532 {
533 RdmaRmQP *qp;
534
535 qp = rdma_rm_get_qp(dev_res, qp_handle);
536 if (!qp) {
537 return -EINVAL;
538 }
539
540 return rdma_backend_query_qp(&qp->backend_qp, attr, attr_mask, init_attr);
541 }
542
rdma_rm_dealloc_qp(RdmaDeviceResources * dev_res,uint32_t qp_handle)543 void rdma_rm_dealloc_qp(RdmaDeviceResources *dev_res, uint32_t qp_handle)
544 {
545 RdmaRmQP *qp;
546 GBytes *key;
547
548 key = g_bytes_new(&qp_handle, sizeof(qp_handle));
549 qp = g_hash_table_lookup(dev_res->qp_hash, key);
550 g_hash_table_remove(dev_res->qp_hash, key);
551 g_bytes_unref(key);
552
553 if (!qp) {
554 return;
555 }
556
557 rdma_backend_destroy_qp(&qp->backend_qp, dev_res);
558
559 rdma_res_tbl_dealloc(&dev_res->qp_tbl, qp->qpn);
560 }
561
rdma_rm_get_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle)562 RdmaRmSRQ *rdma_rm_get_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
563 {
564 return rdma_res_tbl_get(&dev_res->srq_tbl, srq_handle);
565 }
566
rdma_rm_alloc_srq(RdmaDeviceResources * dev_res,uint32_t pd_handle,uint32_t max_wr,uint32_t max_sge,uint32_t srq_limit,uint32_t * srq_handle,void * opaque)567 int rdma_rm_alloc_srq(RdmaDeviceResources *dev_res, uint32_t pd_handle,
568 uint32_t max_wr, uint32_t max_sge, uint32_t srq_limit,
569 uint32_t *srq_handle, void *opaque)
570 {
571 RdmaRmSRQ *srq;
572 RdmaRmPD *pd;
573 int rc;
574
575 pd = rdma_rm_get_pd(dev_res, pd_handle);
576 if (!pd) {
577 return -EINVAL;
578 }
579
580 srq = rdma_res_tbl_alloc(&dev_res->srq_tbl, srq_handle);
581 if (!srq) {
582 return -ENOMEM;
583 }
584
585 rc = rdma_backend_create_srq(&srq->backend_srq, &pd->backend_pd,
586 max_wr, max_sge, srq_limit);
587 if (rc) {
588 rc = -EIO;
589 goto out_dealloc_srq;
590 }
591
592 srq->opaque = opaque;
593
594 return 0;
595
596 out_dealloc_srq:
597 rdma_res_tbl_dealloc(&dev_res->srq_tbl, *srq_handle);
598
599 return rc;
600 }
601
rdma_rm_query_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle,struct ibv_srq_attr * srq_attr)602 int rdma_rm_query_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
603 struct ibv_srq_attr *srq_attr)
604 {
605 RdmaRmSRQ *srq;
606
607 srq = rdma_rm_get_srq(dev_res, srq_handle);
608 if (!srq) {
609 return -EINVAL;
610 }
611
612 return rdma_backend_query_srq(&srq->backend_srq, srq_attr);
613 }
614
rdma_rm_modify_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle,struct ibv_srq_attr * srq_attr,int srq_attr_mask)615 int rdma_rm_modify_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle,
616 struct ibv_srq_attr *srq_attr, int srq_attr_mask)
617 {
618 RdmaRmSRQ *srq;
619
620 srq = rdma_rm_get_srq(dev_res, srq_handle);
621 if (!srq) {
622 return -EINVAL;
623 }
624
625 if ((srq_attr_mask & IBV_SRQ_LIMIT) &&
626 (srq_attr->srq_limit == 0)) {
627 return -EINVAL;
628 }
629
630 if ((srq_attr_mask & IBV_SRQ_MAX_WR) &&
631 (srq_attr->max_wr == 0)) {
632 return -EINVAL;
633 }
634
635 return rdma_backend_modify_srq(&srq->backend_srq, srq_attr,
636 srq_attr_mask);
637 }
638
rdma_rm_dealloc_srq(RdmaDeviceResources * dev_res,uint32_t srq_handle)639 void rdma_rm_dealloc_srq(RdmaDeviceResources *dev_res, uint32_t srq_handle)
640 {
641 RdmaRmSRQ *srq;
642
643 srq = rdma_rm_get_srq(dev_res, srq_handle);
644 if (!srq) {
645 return;
646 }
647
648 rdma_backend_destroy_srq(&srq->backend_srq, dev_res);
649 rdma_res_tbl_dealloc(&dev_res->srq_tbl, srq_handle);
650 }
651
rdma_rm_get_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t cqe_ctx_id)652 void *rdma_rm_get_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
653 {
654 void **cqe_ctx;
655
656 cqe_ctx = rdma_res_tbl_get(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
657 if (!cqe_ctx) {
658 return NULL;
659 }
660
661 return *cqe_ctx;
662 }
663
rdma_rm_alloc_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t * cqe_ctx_id,void * ctx)664 int rdma_rm_alloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t *cqe_ctx_id,
665 void *ctx)
666 {
667 void **cqe_ctx;
668
669 cqe_ctx = rdma_res_tbl_alloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
670 if (!cqe_ctx) {
671 return -ENOMEM;
672 }
673
674 *cqe_ctx = ctx;
675
676 return 0;
677 }
678
rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources * dev_res,uint32_t cqe_ctx_id)679 void rdma_rm_dealloc_cqe_ctx(RdmaDeviceResources *dev_res, uint32_t cqe_ctx_id)
680 {
681 rdma_res_tbl_dealloc(&dev_res->cqe_ctx_tbl, cqe_ctx_id);
682 }
683
rdma_rm_add_gid(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname,union ibv_gid * gid,int gid_idx)684 int rdma_rm_add_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
685 const char *ifname, union ibv_gid *gid, int gid_idx)
686 {
687 int rc;
688
689 rc = rdma_backend_add_gid(backend_dev, ifname, gid);
690 if (rc) {
691 return -EINVAL;
692 }
693
694 memcpy(&dev_res->port.gid_tbl[gid_idx].gid, gid, sizeof(*gid));
695
696 return 0;
697 }
698
rdma_rm_del_gid(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname,int gid_idx)699 int rdma_rm_del_gid(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
700 const char *ifname, int gid_idx)
701 {
702 int rc;
703
704 if (!dev_res->port.gid_tbl[gid_idx].gid.global.interface_id) {
705 return 0;
706 }
707
708 rc = rdma_backend_del_gid(backend_dev, ifname,
709 &dev_res->port.gid_tbl[gid_idx].gid);
710 if (rc) {
711 return -EINVAL;
712 }
713
714 memset(dev_res->port.gid_tbl[gid_idx].gid.raw, 0,
715 sizeof(dev_res->port.gid_tbl[gid_idx].gid));
716 dev_res->port.gid_tbl[gid_idx].backend_gid_index = -1;
717
718 return 0;
719 }
720
rdma_rm_get_backend_gid_index(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,int sgid_idx)721 int rdma_rm_get_backend_gid_index(RdmaDeviceResources *dev_res,
722 RdmaBackendDev *backend_dev, int sgid_idx)
723 {
724 if (unlikely(sgid_idx < 0 || sgid_idx >= MAX_PORT_GIDS)) {
725 rdma_error_report("Got invalid sgid_idx %d", sgid_idx);
726 return -EINVAL;
727 }
728
729 if (unlikely(dev_res->port.gid_tbl[sgid_idx].backend_gid_index == -1)) {
730 dev_res->port.gid_tbl[sgid_idx].backend_gid_index =
731 rdma_backend_get_gid_index(backend_dev,
732 &dev_res->port.gid_tbl[sgid_idx].gid);
733 }
734
735 return dev_res->port.gid_tbl[sgid_idx].backend_gid_index;
736 }
737
destroy_qp_hash_key(gpointer data)738 static void destroy_qp_hash_key(gpointer data)
739 {
740 g_bytes_unref(data);
741 }
742
init_ports(RdmaDeviceResources * dev_res)743 static void init_ports(RdmaDeviceResources *dev_res)
744 {
745 int i;
746
747 memset(&dev_res->port, 0, sizeof(dev_res->port));
748
749 dev_res->port.state = IBV_PORT_DOWN;
750 for (i = 0; i < MAX_PORT_GIDS; i++) {
751 dev_res->port.gid_tbl[i].backend_gid_index = -1;
752 }
753 }
754
fini_ports(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname)755 static void fini_ports(RdmaDeviceResources *dev_res,
756 RdmaBackendDev *backend_dev, const char *ifname)
757 {
758 int i;
759
760 dev_res->port.state = IBV_PORT_DOWN;
761 for (i = 0; i < MAX_PORT_GIDS; i++) {
762 rdma_rm_del_gid(dev_res, backend_dev, ifname, i);
763 }
764 }
765
rdma_rm_init(RdmaDeviceResources * dev_res,struct ibv_device_attr * dev_attr)766 int rdma_rm_init(RdmaDeviceResources *dev_res, struct ibv_device_attr *dev_attr)
767 {
768 dev_res->qp_hash = g_hash_table_new_full(g_bytes_hash, g_bytes_equal,
769 destroy_qp_hash_key, NULL);
770 if (!dev_res->qp_hash) {
771 return -ENOMEM;
772 }
773
774 res_tbl_init("PD", &dev_res->pd_tbl, dev_attr->max_pd, sizeof(RdmaRmPD));
775 res_tbl_init("CQ", &dev_res->cq_tbl, dev_attr->max_cq, sizeof(RdmaRmCQ));
776 res_tbl_init("MR", &dev_res->mr_tbl, dev_attr->max_mr, sizeof(RdmaRmMR));
777 res_tbl_init("QP", &dev_res->qp_tbl, dev_attr->max_qp, sizeof(RdmaRmQP));
778 res_tbl_init("CQE_CTX", &dev_res->cqe_ctx_tbl, dev_attr->max_qp *
779 dev_attr->max_qp_wr, sizeof(void *));
780 res_tbl_init("UC", &dev_res->uc_tbl, MAX_UCS, sizeof(RdmaRmUC));
781 res_tbl_init("SRQ", &dev_res->srq_tbl, dev_attr->max_srq,
782 sizeof(RdmaRmSRQ));
783
784 init_ports(dev_res);
785
786 qemu_mutex_init(&dev_res->lock);
787
788 memset(&dev_res->stats, 0, sizeof(dev_res->stats));
789 qatomic_set(&dev_res->stats.missing_cqe, 0);
790
791 return 0;
792 }
793
rdma_rm_fini(RdmaDeviceResources * dev_res,RdmaBackendDev * backend_dev,const char * ifname)794 void rdma_rm_fini(RdmaDeviceResources *dev_res, RdmaBackendDev *backend_dev,
795 const char *ifname)
796 {
797 qemu_mutex_destroy(&dev_res->lock);
798
799 fini_ports(dev_res, backend_dev, ifname);
800
801 res_tbl_free(&dev_res->srq_tbl);
802 res_tbl_free(&dev_res->uc_tbl);
803 res_tbl_free(&dev_res->cqe_ctx_tbl);
804 res_tbl_free(&dev_res->qp_tbl);
805 res_tbl_free(&dev_res->mr_tbl);
806 res_tbl_free(&dev_res->cq_tbl);
807 res_tbl_free(&dev_res->pd_tbl);
808
809 if (dev_res->qp_hash) {
810 g_hash_table_destroy(dev_res->qp_hash);
811 }
812 }
813