1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the BSD-type 10 * license below: 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * Neither the name of the Network Appliance, Inc. nor the names of 25 * its contributors may be used to endorse or promote products 26 * derived from this software without specific prior written 27 * permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * verbs.c 44 * 45 * Encapsulates the major functions managing: 46 * o adapters 47 * o endpoints 48 * o connections 49 * o buffer memory 50 */ 51 52 #include <linux/interrupt.h> 53 #include <linux/slab.h> 54 #include <linux/sunrpc/addr.h> 55 #include <linux/sunrpc/svc_rdma.h> 56 #include <linux/log2.h> 57 58 #include <asm-generic/barrier.h> 59 #include <asm/bitops.h> 60 61 #include <rdma/ib_cm.h> 62 63 #include "xprt_rdma.h" 64 #include <trace/events/rpcrdma.h> 65 66 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); 67 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); 68 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 69 struct rpcrdma_sendctx *sc); 70 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); 71 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 72 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); 73 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); 74 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 75 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 76 static void rpcrdma_ep_get(struct rpcrdma_ep *ep); 77 static int rpcrdma_ep_put(struct rpcrdma_ep *ep); 78 static struct rpcrdma_regbuf * 79 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction); 80 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 81 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 82 83 /* Wait for outstanding transport work to finish. ib_drain_qp 84 * handles the drains in the wrong order for us, so open code 85 * them here. 86 */ 87 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 88 { 89 struct rpcrdma_ep *ep = r_xprt->rx_ep; 90 struct rdma_cm_id *id = ep->re_id; 91 92 /* Wait for rpcrdma_post_recvs() to leave its critical 93 * section. 94 */ 95 if (atomic_inc_return(&ep->re_receiving) > 1) 96 wait_for_completion(&ep->re_done); 97 98 /* Flush Receives, then wait for deferred Reply work 99 * to complete. 100 */ 101 ib_drain_rq(id->qp); 102 103 /* Deferred Reply processing might have scheduled 104 * local invalidations. 105 */ 106 ib_drain_sq(id->qp); 107 108 rpcrdma_ep_put(ep); 109 } 110 111 /* Ensure xprt_force_disconnect() is invoked exactly once when a 112 * connection is closed or lost. (The important thing is it needs 113 * to be invoked "at least" once). 114 */ 115 void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) 116 { 117 if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) 118 xprt_force_disconnect(ep->re_xprt); 119 } 120 121 /** 122 * rpcrdma_flush_disconnect - Disconnect on flushed completion 123 * @r_xprt: transport to disconnect 124 * @wc: work completion entry 125 * 126 * Must be called in process context. 127 */ 128 void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) 129 { 130 if (wc->status != IB_WC_SUCCESS) 131 rpcrdma_force_disconnect(r_xprt->rx_ep); 132 } 133 134 /** 135 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 136 * @cq: completion queue 137 * @wc: WCE for a completed Send WR 138 * 139 */ 140 static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 141 { 142 struct ib_cqe *cqe = wc->wr_cqe; 143 struct rpcrdma_sendctx *sc = 144 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 145 struct rpcrdma_xprt *r_xprt = cq->cq_context; 146 147 /* WARNING: Only wr_cqe and status are reliable at this point */ 148 trace_xprtrdma_wc_send(wc, &sc->sc_cid); 149 rpcrdma_sendctx_put_locked(r_xprt, sc); 150 rpcrdma_flush_disconnect(r_xprt, wc); 151 } 152 153 /** 154 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 155 * @cq: completion queue 156 * @wc: WCE for a completed Receive WR 157 * 158 */ 159 static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 160 { 161 struct ib_cqe *cqe = wc->wr_cqe; 162 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, 163 rr_cqe); 164 struct rpcrdma_xprt *r_xprt = cq->cq_context; 165 166 /* WARNING: Only wr_cqe and status are reliable at this point */ 167 trace_xprtrdma_wc_receive(wc, &rep->rr_cid); 168 --r_xprt->rx_ep->re_receive_count; 169 if (wc->status != IB_WC_SUCCESS) 170 goto out_flushed; 171 172 /* status == SUCCESS means all fields in wc are trustworthy */ 173 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 174 rep->rr_wc_flags = wc->wc_flags; 175 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 176 177 ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), 178 rdmab_addr(rep->rr_rdmabuf), 179 wc->byte_len, DMA_FROM_DEVICE); 180 181 rpcrdma_reply_handler(rep); 182 return; 183 184 out_flushed: 185 rpcrdma_flush_disconnect(r_xprt, wc); 186 rpcrdma_rep_put(&r_xprt->rx_buf, rep); 187 } 188 189 static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, 190 struct rdma_conn_param *param) 191 { 192 const struct rpcrdma_connect_private *pmsg = param->private_data; 193 unsigned int rsize, wsize; 194 195 /* Default settings for RPC-over-RDMA Version One */ 196 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 197 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 198 199 if (pmsg && 200 pmsg->cp_magic == rpcrdma_cmp_magic && 201 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 202 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 203 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 204 } 205 206 if (rsize < ep->re_inline_recv) 207 ep->re_inline_recv = rsize; 208 if (wsize < ep->re_inline_send) 209 ep->re_inline_send = wsize; 210 211 rpcrdma_set_max_header_sizes(ep); 212 } 213 214 /** 215 * rpcrdma_cm_event_handler - Handle RDMA CM events 216 * @id: rdma_cm_id on which an event has occurred 217 * @event: details of the event 218 * 219 * Called with @id's mutex held. Returns 1 if caller should 220 * destroy @id, otherwise 0. 221 */ 222 static int 223 rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 224 { 225 struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; 226 struct rpcrdma_ep *ep = id->context; 227 228 might_sleep(); 229 230 switch (event->event) { 231 case RDMA_CM_EVENT_ADDR_RESOLVED: 232 case RDMA_CM_EVENT_ROUTE_RESOLVED: 233 ep->re_async_rc = 0; 234 complete(&ep->re_done); 235 return 0; 236 case RDMA_CM_EVENT_ADDR_ERROR: 237 ep->re_async_rc = -EPROTO; 238 complete(&ep->re_done); 239 return 0; 240 case RDMA_CM_EVENT_ROUTE_ERROR: 241 ep->re_async_rc = -ENETUNREACH; 242 complete(&ep->re_done); 243 return 0; 244 case RDMA_CM_EVENT_DEVICE_REMOVAL: 245 pr_info("rpcrdma: removing device %s for %pISpc\n", 246 ep->re_id->device->name, sap); 247 switch (xchg(&ep->re_connect_status, -ENODEV)) { 248 case 0: goto wake_connect_worker; 249 case 1: goto disconnected; 250 } 251 return 0; 252 case RDMA_CM_EVENT_ADDR_CHANGE: 253 ep->re_connect_status = -ENODEV; 254 goto disconnected; 255 case RDMA_CM_EVENT_ESTABLISHED: 256 rpcrdma_ep_get(ep); 257 ep->re_connect_status = 1; 258 rpcrdma_update_cm_private(ep, &event->param.conn); 259 trace_xprtrdma_inline_thresh(ep); 260 wake_up_all(&ep->re_connect_wait); 261 break; 262 case RDMA_CM_EVENT_CONNECT_ERROR: 263 ep->re_connect_status = -ENOTCONN; 264 goto wake_connect_worker; 265 case RDMA_CM_EVENT_UNREACHABLE: 266 ep->re_connect_status = -ENETUNREACH; 267 goto wake_connect_worker; 268 case RDMA_CM_EVENT_REJECTED: 269 ep->re_connect_status = -ECONNREFUSED; 270 if (event->status == IB_CM_REJ_STALE_CONN) 271 ep->re_connect_status = -ENOTCONN; 272 wake_connect_worker: 273 wake_up_all(&ep->re_connect_wait); 274 return 0; 275 case RDMA_CM_EVENT_DISCONNECTED: 276 ep->re_connect_status = -ECONNABORTED; 277 disconnected: 278 rpcrdma_force_disconnect(ep); 279 return rpcrdma_ep_put(ep); 280 default: 281 break; 282 } 283 284 return 0; 285 } 286 287 static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, 288 struct rpcrdma_ep *ep) 289 { 290 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 291 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 292 struct rdma_cm_id *id; 293 int rc; 294 295 init_completion(&ep->re_done); 296 297 id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, 298 RDMA_PS_TCP, IB_QPT_RC); 299 if (IS_ERR(id)) 300 return id; 301 302 ep->re_async_rc = -ETIMEDOUT; 303 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, 304 RDMA_RESOLVE_TIMEOUT); 305 if (rc) 306 goto out; 307 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 308 if (rc < 0) 309 goto out; 310 311 rc = ep->re_async_rc; 312 if (rc) 313 goto out; 314 315 ep->re_async_rc = -ETIMEDOUT; 316 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 317 if (rc) 318 goto out; 319 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 320 if (rc < 0) 321 goto out; 322 rc = ep->re_async_rc; 323 if (rc) 324 goto out; 325 326 return id; 327 328 out: 329 rdma_destroy_id(id); 330 return ERR_PTR(rc); 331 } 332 333 static void rpcrdma_ep_destroy(struct kref *kref) 334 { 335 struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); 336 337 if (ep->re_id->qp) { 338 rdma_destroy_qp(ep->re_id); 339 ep->re_id->qp = NULL; 340 } 341 342 if (ep->re_attr.recv_cq) 343 ib_free_cq(ep->re_attr.recv_cq); 344 ep->re_attr.recv_cq = NULL; 345 if (ep->re_attr.send_cq) 346 ib_free_cq(ep->re_attr.send_cq); 347 ep->re_attr.send_cq = NULL; 348 349 if (ep->re_pd) 350 ib_dealloc_pd(ep->re_pd); 351 ep->re_pd = NULL; 352 353 kfree(ep); 354 module_put(THIS_MODULE); 355 } 356 357 static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) 358 { 359 kref_get(&ep->re_kref); 360 } 361 362 /* Returns: 363 * %0 if @ep still has a positive kref count, or 364 * %1 if @ep was destroyed successfully. 365 */ 366 static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) 367 { 368 return kref_put(&ep->re_kref, rpcrdma_ep_destroy); 369 } 370 371 static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) 372 { 373 struct rpcrdma_connect_private *pmsg; 374 struct ib_device *device; 375 struct rdma_cm_id *id; 376 struct rpcrdma_ep *ep; 377 int rc; 378 379 ep = kzalloc(sizeof(*ep), XPRTRDMA_GFP_FLAGS); 380 if (!ep) 381 return -ENOTCONN; 382 ep->re_xprt = &r_xprt->rx_xprt; 383 kref_init(&ep->re_kref); 384 385 id = rpcrdma_create_id(r_xprt, ep); 386 if (IS_ERR(id)) { 387 kfree(ep); 388 return PTR_ERR(id); 389 } 390 __module_get(THIS_MODULE); 391 device = id->device; 392 ep->re_id = id; 393 reinit_completion(&ep->re_done); 394 395 ep->re_max_requests = r_xprt->rx_xprt.max_reqs; 396 ep->re_inline_send = xprt_rdma_max_inline_write; 397 ep->re_inline_recv = xprt_rdma_max_inline_read; 398 rc = frwr_query_device(ep, device); 399 if (rc) 400 goto out_destroy; 401 402 r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); 403 404 ep->re_attr.srq = NULL; 405 ep->re_attr.cap.max_inline_data = 0; 406 ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 407 ep->re_attr.qp_type = IB_QPT_RC; 408 ep->re_attr.port_num = ~0; 409 410 ep->re_send_batch = ep->re_max_requests >> 3; 411 ep->re_send_count = ep->re_send_batch; 412 init_waitqueue_head(&ep->re_connect_wait); 413 414 ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, 415 ep->re_attr.cap.max_send_wr, 416 IB_POLL_WORKQUEUE); 417 if (IS_ERR(ep->re_attr.send_cq)) { 418 rc = PTR_ERR(ep->re_attr.send_cq); 419 ep->re_attr.send_cq = NULL; 420 goto out_destroy; 421 } 422 423 ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, 424 ep->re_attr.cap.max_recv_wr, 425 IB_POLL_WORKQUEUE); 426 if (IS_ERR(ep->re_attr.recv_cq)) { 427 rc = PTR_ERR(ep->re_attr.recv_cq); 428 ep->re_attr.recv_cq = NULL; 429 goto out_destroy; 430 } 431 ep->re_receive_count = 0; 432 433 /* Initialize cma parameters */ 434 memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); 435 436 /* Prepare RDMA-CM private message */ 437 pmsg = &ep->re_cm_private; 438 pmsg->cp_magic = rpcrdma_cmp_magic; 439 pmsg->cp_version = RPCRDMA_CMP_VERSION; 440 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; 441 pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); 442 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); 443 ep->re_remote_cma.private_data = pmsg; 444 ep->re_remote_cma.private_data_len = sizeof(*pmsg); 445 446 /* Client offers RDMA Read but does not initiate */ 447 ep->re_remote_cma.initiator_depth = 0; 448 ep->re_remote_cma.responder_resources = 449 min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); 450 451 /* Limit transport retries so client can detect server 452 * GID changes quickly. RPC layer handles re-establishing 453 * transport connection and retransmission. 454 */ 455 ep->re_remote_cma.retry_count = 6; 456 457 /* RPC-over-RDMA handles its own flow control. In addition, 458 * make all RNR NAKs visible so we know that RPC-over-RDMA 459 * flow control is working correctly (no NAKs should be seen). 460 */ 461 ep->re_remote_cma.flow_control = 0; 462 ep->re_remote_cma.rnr_retry_count = 0; 463 464 ep->re_pd = ib_alloc_pd(device, 0); 465 if (IS_ERR(ep->re_pd)) { 466 rc = PTR_ERR(ep->re_pd); 467 ep->re_pd = NULL; 468 goto out_destroy; 469 } 470 471 rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); 472 if (rc) 473 goto out_destroy; 474 475 r_xprt->rx_ep = ep; 476 return 0; 477 478 out_destroy: 479 rpcrdma_ep_put(ep); 480 rdma_destroy_id(id); 481 return rc; 482 } 483 484 /** 485 * rpcrdma_xprt_connect - Connect an unconnected transport 486 * @r_xprt: controlling transport instance 487 * 488 * Returns 0 on success or a negative errno. 489 */ 490 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) 491 { 492 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 493 struct rpcrdma_ep *ep; 494 int rc; 495 496 rc = rpcrdma_ep_create(r_xprt); 497 if (rc) 498 return rc; 499 ep = r_xprt->rx_ep; 500 501 xprt_clear_connected(xprt); 502 rpcrdma_reset_cwnd(r_xprt); 503 504 /* Bump the ep's reference count while there are 505 * outstanding Receives. 506 */ 507 rpcrdma_ep_get(ep); 508 rpcrdma_post_recvs(r_xprt, 1, true); 509 510 rc = rdma_connect(ep->re_id, &ep->re_remote_cma); 511 if (rc) 512 goto out; 513 514 if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 515 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 516 wait_event_interruptible(ep->re_connect_wait, 517 ep->re_connect_status != 0); 518 if (ep->re_connect_status <= 0) { 519 rc = ep->re_connect_status; 520 goto out; 521 } 522 523 rc = rpcrdma_sendctxs_create(r_xprt); 524 if (rc) { 525 rc = -ENOTCONN; 526 goto out; 527 } 528 529 rc = rpcrdma_reqs_setup(r_xprt); 530 if (rc) { 531 rc = -ENOTCONN; 532 goto out; 533 } 534 rpcrdma_mrs_create(r_xprt); 535 frwr_wp_create(r_xprt); 536 537 out: 538 trace_xprtrdma_connect(r_xprt, rc); 539 return rc; 540 } 541 542 /** 543 * rpcrdma_xprt_disconnect - Disconnect underlying transport 544 * @r_xprt: controlling transport instance 545 * 546 * Caller serializes. Either the transport send lock is held, 547 * or we're being called to destroy the transport. 548 * 549 * On return, @r_xprt is completely divested of all hardware 550 * resources and prepared for the next ->connect operation. 551 */ 552 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) 553 { 554 struct rpcrdma_ep *ep = r_xprt->rx_ep; 555 struct rdma_cm_id *id; 556 int rc; 557 558 if (!ep) 559 return; 560 561 id = ep->re_id; 562 rc = rdma_disconnect(id); 563 trace_xprtrdma_disconnect(r_xprt, rc); 564 565 rpcrdma_xprt_drain(r_xprt); 566 rpcrdma_reps_unmap(r_xprt); 567 rpcrdma_reqs_reset(r_xprt); 568 rpcrdma_mrs_destroy(r_xprt); 569 rpcrdma_sendctxs_destroy(r_xprt); 570 571 if (rpcrdma_ep_put(ep)) 572 rdma_destroy_id(id); 573 574 r_xprt->rx_ep = NULL; 575 } 576 577 /* Fixed-size circular FIFO queue. This implementation is wait-free and 578 * lock-free. 579 * 580 * Consumer is the code path that posts Sends. This path dequeues a 581 * sendctx for use by a Send operation. Multiple consumer threads 582 * are serialized by the RPC transport lock, which allows only one 583 * ->send_request call at a time. 584 * 585 * Producer is the code path that handles Send completions. This path 586 * enqueues a sendctx that has been completed. Multiple producer 587 * threads are serialized by the ib_poll_cq() function. 588 */ 589 590 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced 591 * queue activity, and rpcrdma_xprt_drain has flushed all remaining 592 * Send requests. 593 */ 594 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) 595 { 596 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 597 unsigned long i; 598 599 if (!buf->rb_sc_ctxs) 600 return; 601 for (i = 0; i <= buf->rb_sc_last; i++) 602 kfree(buf->rb_sc_ctxs[i]); 603 kfree(buf->rb_sc_ctxs); 604 buf->rb_sc_ctxs = NULL; 605 } 606 607 static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) 608 { 609 struct rpcrdma_sendctx *sc; 610 611 sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), 612 XPRTRDMA_GFP_FLAGS); 613 if (!sc) 614 return NULL; 615 616 sc->sc_cqe.done = rpcrdma_wc_send; 617 sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id; 618 sc->sc_cid.ci_completion_id = 619 atomic_inc_return(&ep->re_completion_ids); 620 return sc; 621 } 622 623 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) 624 { 625 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 626 struct rpcrdma_sendctx *sc; 627 unsigned long i; 628 629 /* Maximum number of concurrent outstanding Send WRs. Capping 630 * the circular queue size stops Send Queue overflow by causing 631 * the ->send_request call to fail temporarily before too many 632 * Sends are posted. 633 */ 634 i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; 635 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), XPRTRDMA_GFP_FLAGS); 636 if (!buf->rb_sc_ctxs) 637 return -ENOMEM; 638 639 buf->rb_sc_last = i - 1; 640 for (i = 0; i <= buf->rb_sc_last; i++) { 641 sc = rpcrdma_sendctx_create(r_xprt->rx_ep); 642 if (!sc) 643 return -ENOMEM; 644 645 buf->rb_sc_ctxs[i] = sc; 646 } 647 648 buf->rb_sc_head = 0; 649 buf->rb_sc_tail = 0; 650 return 0; 651 } 652 653 /* The sendctx queue is not guaranteed to have a size that is a 654 * power of two, thus the helpers in circ_buf.h cannot be used. 655 * The other option is to use modulus (%), which can be expensive. 656 */ 657 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, 658 unsigned long item) 659 { 660 return likely(item < buf->rb_sc_last) ? item + 1 : 0; 661 } 662 663 /** 664 * rpcrdma_sendctx_get_locked - Acquire a send context 665 * @r_xprt: controlling transport instance 666 * 667 * Returns pointer to a free send completion context; or NULL if 668 * the queue is empty. 669 * 670 * Usage: Called to acquire an SGE array before preparing a Send WR. 671 * 672 * The caller serializes calls to this function (per transport), and 673 * provides an effective memory barrier that flushes the new value 674 * of rb_sc_head. 675 */ 676 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) 677 { 678 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 679 struct rpcrdma_sendctx *sc; 680 unsigned long next_head; 681 682 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); 683 684 if (next_head == READ_ONCE(buf->rb_sc_tail)) 685 goto out_emptyq; 686 687 /* ORDER: item must be accessed _before_ head is updated */ 688 sc = buf->rb_sc_ctxs[next_head]; 689 690 /* Releasing the lock in the caller acts as a memory 691 * barrier that flushes rb_sc_head. 692 */ 693 buf->rb_sc_head = next_head; 694 695 return sc; 696 697 out_emptyq: 698 /* The queue is "empty" if there have not been enough Send 699 * completions recently. This is a sign the Send Queue is 700 * backing up. Cause the caller to pause and try again. 701 */ 702 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 703 r_xprt->rx_stats.empty_sendctx_q++; 704 return NULL; 705 } 706 707 /** 708 * rpcrdma_sendctx_put_locked - Release a send context 709 * @r_xprt: controlling transport instance 710 * @sc: send context to release 711 * 712 * Usage: Called from Send completion to return a sendctxt 713 * to the queue. 714 * 715 * The caller serializes calls to this function (per transport). 716 */ 717 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 718 struct rpcrdma_sendctx *sc) 719 { 720 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 721 unsigned long next_tail; 722 723 /* Unmap SGEs of previously completed but unsignaled 724 * Sends by walking up the queue until @sc is found. 725 */ 726 next_tail = buf->rb_sc_tail; 727 do { 728 next_tail = rpcrdma_sendctx_next(buf, next_tail); 729 730 /* ORDER: item must be accessed _before_ tail is updated */ 731 rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); 732 733 } while (buf->rb_sc_ctxs[next_tail] != sc); 734 735 /* Paired with READ_ONCE */ 736 smp_store_release(&buf->rb_sc_tail, next_tail); 737 738 xprt_write_space(&r_xprt->rx_xprt); 739 } 740 741 static void 742 rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) 743 { 744 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 745 struct rpcrdma_ep *ep = r_xprt->rx_ep; 746 struct ib_device *device = ep->re_id->device; 747 unsigned int count; 748 749 /* Try to allocate enough to perform one full-sized I/O */ 750 for (count = 0; count < ep->re_max_rdma_segs; count++) { 751 struct rpcrdma_mr *mr; 752 int rc; 753 754 mr = kzalloc_node(sizeof(*mr), XPRTRDMA_GFP_FLAGS, 755 ibdev_to_node(device)); 756 if (!mr) 757 break; 758 759 rc = frwr_mr_init(r_xprt, mr); 760 if (rc) { 761 kfree(mr); 762 break; 763 } 764 765 spin_lock(&buf->rb_lock); 766 rpcrdma_mr_push(mr, &buf->rb_mrs); 767 list_add(&mr->mr_all, &buf->rb_all_mrs); 768 spin_unlock(&buf->rb_lock); 769 } 770 771 r_xprt->rx_stats.mrs_allocated += count; 772 trace_xprtrdma_createmrs(r_xprt, count); 773 } 774 775 static void 776 rpcrdma_mr_refresh_worker(struct work_struct *work) 777 { 778 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 779 rb_refresh_worker); 780 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 781 rx_buf); 782 783 rpcrdma_mrs_create(r_xprt); 784 xprt_write_space(&r_xprt->rx_xprt); 785 } 786 787 /** 788 * rpcrdma_mrs_refresh - Wake the MR refresh worker 789 * @r_xprt: controlling transport instance 790 * 791 */ 792 void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 793 { 794 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 795 struct rpcrdma_ep *ep = r_xprt->rx_ep; 796 797 /* If there is no underlying connection, it's no use 798 * to wake the refresh worker. 799 */ 800 if (ep->re_connect_status != 1) 801 return; 802 queue_work(system_highpri_wq, &buf->rb_refresh_worker); 803 } 804 805 /** 806 * rpcrdma_req_create - Allocate an rpcrdma_req object 807 * @r_xprt: controlling r_xprt 808 * @size: initial size, in bytes, of send and receive buffers 809 * 810 * Returns an allocated and fully initialized rpcrdma_req or NULL. 811 */ 812 struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, 813 size_t size) 814 { 815 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 816 struct rpcrdma_req *req; 817 818 req = kzalloc(sizeof(*req), XPRTRDMA_GFP_FLAGS); 819 if (req == NULL) 820 goto out1; 821 822 req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE); 823 if (!req->rl_sendbuf) 824 goto out2; 825 826 req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE); 827 if (!req->rl_recvbuf) 828 goto out3; 829 830 INIT_LIST_HEAD(&req->rl_free_mrs); 831 INIT_LIST_HEAD(&req->rl_registered); 832 spin_lock(&buffer->rb_lock); 833 list_add(&req->rl_all, &buffer->rb_allreqs); 834 spin_unlock(&buffer->rb_lock); 835 return req; 836 837 out3: 838 rpcrdma_regbuf_free(req->rl_sendbuf); 839 out2: 840 kfree(req); 841 out1: 842 return NULL; 843 } 844 845 /** 846 * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object 847 * @r_xprt: controlling transport instance 848 * @req: rpcrdma_req object to set up 849 * 850 * Returns zero on success, and a negative errno on failure. 851 */ 852 int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 853 { 854 struct rpcrdma_regbuf *rb; 855 size_t maxhdrsize; 856 857 /* Compute maximum header buffer size in bytes */ 858 maxhdrsize = rpcrdma_fixed_maxsz + 3 + 859 r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; 860 maxhdrsize *= sizeof(__be32); 861 rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), 862 DMA_TO_DEVICE); 863 if (!rb) 864 goto out; 865 866 if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) 867 goto out_free; 868 869 req->rl_rdmabuf = rb; 870 xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); 871 return 0; 872 873 out_free: 874 rpcrdma_regbuf_free(rb); 875 out: 876 return -ENOMEM; 877 } 878 879 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 880 * and thus can be walked without holding rb_lock. Eg. the 881 * caller is holding the transport send lock to exclude 882 * device removal or disconnection. 883 */ 884 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) 885 { 886 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 887 struct rpcrdma_req *req; 888 int rc; 889 890 list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 891 rc = rpcrdma_req_setup(r_xprt, req); 892 if (rc) 893 return rc; 894 } 895 return 0; 896 } 897 898 static void rpcrdma_req_reset(struct rpcrdma_req *req) 899 { 900 struct rpcrdma_mr *mr; 901 902 /* Credits are valid for only one connection */ 903 req->rl_slot.rq_cong = 0; 904 905 rpcrdma_regbuf_free(req->rl_rdmabuf); 906 req->rl_rdmabuf = NULL; 907 908 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 909 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 910 911 /* The verbs consumer can't know the state of an MR on the 912 * req->rl_registered list unless a successful completion 913 * has occurred, so they cannot be re-used. 914 */ 915 while ((mr = rpcrdma_mr_pop(&req->rl_registered))) { 916 struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 917 918 spin_lock(&buf->rb_lock); 919 list_del(&mr->mr_all); 920 spin_unlock(&buf->rb_lock); 921 922 frwr_mr_release(mr); 923 } 924 } 925 926 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 927 * and thus can be walked without holding rb_lock. Eg. the 928 * caller is holding the transport send lock to exclude 929 * device removal or disconnection. 930 */ 931 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 932 { 933 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 934 struct rpcrdma_req *req; 935 936 list_for_each_entry(req, &buf->rb_allreqs, rl_all) 937 rpcrdma_req_reset(req); 938 } 939 940 static noinline 941 struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 942 bool temp) 943 { 944 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 945 struct rpcrdma_rep *rep; 946 947 rep = kzalloc(sizeof(*rep), XPRTRDMA_GFP_FLAGS); 948 if (rep == NULL) 949 goto out; 950 951 rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, 952 DMA_FROM_DEVICE); 953 if (!rep->rr_rdmabuf) 954 goto out_free; 955 956 rep->rr_cid.ci_completion_id = 957 atomic_inc_return(&r_xprt->rx_ep->re_completion_ids); 958 959 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), 960 rdmab_length(rep->rr_rdmabuf)); 961 rep->rr_cqe.done = rpcrdma_wc_receive; 962 rep->rr_rxprt = r_xprt; 963 rep->rr_recv_wr.next = NULL; 964 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 965 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 966 rep->rr_recv_wr.num_sge = 1; 967 rep->rr_temp = temp; 968 969 spin_lock(&buf->rb_lock); 970 list_add(&rep->rr_all, &buf->rb_all_reps); 971 spin_unlock(&buf->rb_lock); 972 return rep; 973 974 out_free: 975 kfree(rep); 976 out: 977 return NULL; 978 } 979 980 static void rpcrdma_rep_free(struct rpcrdma_rep *rep) 981 { 982 rpcrdma_regbuf_free(rep->rr_rdmabuf); 983 kfree(rep); 984 } 985 986 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) 987 { 988 struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf; 989 990 spin_lock(&buf->rb_lock); 991 list_del(&rep->rr_all); 992 spin_unlock(&buf->rb_lock); 993 994 rpcrdma_rep_free(rep); 995 } 996 997 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) 998 { 999 struct llist_node *node; 1000 1001 /* Calls to llist_del_first are required to be serialized */ 1002 node = llist_del_first(&buf->rb_free_reps); 1003 if (!node) 1004 return NULL; 1005 return llist_entry(node, struct rpcrdma_rep, rr_node); 1006 } 1007 1008 /** 1009 * rpcrdma_rep_put - Release rpcrdma_rep back to free list 1010 * @buf: buffer pool 1011 * @rep: rep to release 1012 * 1013 */ 1014 void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep) 1015 { 1016 llist_add(&rep->rr_node, &buf->rb_free_reps); 1017 } 1018 1019 /* Caller must ensure the QP is quiescent (RQ is drained) before 1020 * invoking this function, to guarantee rb_all_reps is not 1021 * changing. 1022 */ 1023 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) 1024 { 1025 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1026 struct rpcrdma_rep *rep; 1027 1028 list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { 1029 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); 1030 rep->rr_temp = true; /* Mark this rep for destruction */ 1031 } 1032 } 1033 1034 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) 1035 { 1036 struct rpcrdma_rep *rep; 1037 1038 spin_lock(&buf->rb_lock); 1039 while ((rep = list_first_entry_or_null(&buf->rb_all_reps, 1040 struct rpcrdma_rep, 1041 rr_all)) != NULL) { 1042 list_del(&rep->rr_all); 1043 spin_unlock(&buf->rb_lock); 1044 1045 rpcrdma_rep_free(rep); 1046 1047 spin_lock(&buf->rb_lock); 1048 } 1049 spin_unlock(&buf->rb_lock); 1050 } 1051 1052 /** 1053 * rpcrdma_buffer_create - Create initial set of req/rep objects 1054 * @r_xprt: transport instance to (re)initialize 1055 * 1056 * Returns zero on success, otherwise a negative errno. 1057 */ 1058 int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1059 { 1060 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1061 int i, rc; 1062 1063 buf->rb_bc_srv_max_requests = 0; 1064 spin_lock_init(&buf->rb_lock); 1065 INIT_LIST_HEAD(&buf->rb_mrs); 1066 INIT_LIST_HEAD(&buf->rb_all_mrs); 1067 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1068 1069 INIT_LIST_HEAD(&buf->rb_send_bufs); 1070 INIT_LIST_HEAD(&buf->rb_allreqs); 1071 INIT_LIST_HEAD(&buf->rb_all_reps); 1072 1073 rc = -ENOMEM; 1074 for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { 1075 struct rpcrdma_req *req; 1076 1077 req = rpcrdma_req_create(r_xprt, 1078 RPCRDMA_V1_DEF_INLINE_SIZE * 2); 1079 if (!req) 1080 goto out; 1081 list_add(&req->rl_list, &buf->rb_send_bufs); 1082 } 1083 1084 init_llist_head(&buf->rb_free_reps); 1085 1086 return 0; 1087 out: 1088 rpcrdma_buffer_destroy(buf); 1089 return rc; 1090 } 1091 1092 /** 1093 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1094 * @req: unused object to be destroyed 1095 * 1096 * Relies on caller holding the transport send lock to protect 1097 * removing req->rl_all from buf->rb_all_reqs safely. 1098 */ 1099 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1100 { 1101 struct rpcrdma_mr *mr; 1102 1103 list_del(&req->rl_all); 1104 1105 while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1106 struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1107 1108 spin_lock(&buf->rb_lock); 1109 list_del(&mr->mr_all); 1110 spin_unlock(&buf->rb_lock); 1111 1112 frwr_mr_release(mr); 1113 } 1114 1115 rpcrdma_regbuf_free(req->rl_recvbuf); 1116 rpcrdma_regbuf_free(req->rl_sendbuf); 1117 rpcrdma_regbuf_free(req->rl_rdmabuf); 1118 kfree(req); 1119 } 1120 1121 /** 1122 * rpcrdma_mrs_destroy - Release all of a transport's MRs 1123 * @r_xprt: controlling transport instance 1124 * 1125 * Relies on caller holding the transport send lock to protect 1126 * removing mr->mr_list from req->rl_free_mrs safely. 1127 */ 1128 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1129 { 1130 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1131 struct rpcrdma_mr *mr; 1132 1133 cancel_work_sync(&buf->rb_refresh_worker); 1134 1135 spin_lock(&buf->rb_lock); 1136 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1137 struct rpcrdma_mr, 1138 mr_all)) != NULL) { 1139 list_del(&mr->mr_list); 1140 list_del(&mr->mr_all); 1141 spin_unlock(&buf->rb_lock); 1142 1143 frwr_mr_release(mr); 1144 1145 spin_lock(&buf->rb_lock); 1146 } 1147 spin_unlock(&buf->rb_lock); 1148 } 1149 1150 /** 1151 * rpcrdma_buffer_destroy - Release all hw resources 1152 * @buf: root control block for resources 1153 * 1154 * ORDERING: relies on a prior rpcrdma_xprt_drain : 1155 * - No more Send or Receive completions can occur 1156 * - All MRs, reps, and reqs are returned to their free lists 1157 */ 1158 void 1159 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1160 { 1161 rpcrdma_reps_destroy(buf); 1162 1163 while (!list_empty(&buf->rb_send_bufs)) { 1164 struct rpcrdma_req *req; 1165 1166 req = list_first_entry(&buf->rb_send_bufs, 1167 struct rpcrdma_req, rl_list); 1168 list_del(&req->rl_list); 1169 rpcrdma_req_destroy(req); 1170 } 1171 } 1172 1173 /** 1174 * rpcrdma_mr_get - Allocate an rpcrdma_mr object 1175 * @r_xprt: controlling transport 1176 * 1177 * Returns an initialized rpcrdma_mr or NULL if no free 1178 * rpcrdma_mr objects are available. 1179 */ 1180 struct rpcrdma_mr * 1181 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) 1182 { 1183 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1184 struct rpcrdma_mr *mr; 1185 1186 spin_lock(&buf->rb_lock); 1187 mr = rpcrdma_mr_pop(&buf->rb_mrs); 1188 spin_unlock(&buf->rb_lock); 1189 return mr; 1190 } 1191 1192 /** 1193 * rpcrdma_reply_put - Put reply buffers back into pool 1194 * @buffers: buffer pool 1195 * @req: object to return 1196 * 1197 */ 1198 void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1199 { 1200 if (req->rl_reply) { 1201 rpcrdma_rep_put(buffers, req->rl_reply); 1202 req->rl_reply = NULL; 1203 } 1204 } 1205 1206 /** 1207 * rpcrdma_buffer_get - Get a request buffer 1208 * @buffers: Buffer pool from which to obtain a buffer 1209 * 1210 * Returns a fresh rpcrdma_req, or NULL if none are available. 1211 */ 1212 struct rpcrdma_req * 1213 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1214 { 1215 struct rpcrdma_req *req; 1216 1217 spin_lock(&buffers->rb_lock); 1218 req = list_first_entry_or_null(&buffers->rb_send_bufs, 1219 struct rpcrdma_req, rl_list); 1220 if (req) 1221 list_del_init(&req->rl_list); 1222 spin_unlock(&buffers->rb_lock); 1223 return req; 1224 } 1225 1226 /** 1227 * rpcrdma_buffer_put - Put request/reply buffers back into pool 1228 * @buffers: buffer pool 1229 * @req: object to return 1230 * 1231 */ 1232 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1233 { 1234 rpcrdma_reply_put(buffers, req); 1235 1236 spin_lock(&buffers->rb_lock); 1237 list_add(&req->rl_list, &buffers->rb_send_bufs); 1238 spin_unlock(&buffers->rb_lock); 1239 } 1240 1241 /* Returns a pointer to a rpcrdma_regbuf object, or NULL. 1242 * 1243 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1244 * receiving the payload of RDMA RECV operations. During Long Calls 1245 * or Replies they may be registered externally via frwr_map. 1246 */ 1247 static struct rpcrdma_regbuf * 1248 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction) 1249 { 1250 struct rpcrdma_regbuf *rb; 1251 1252 rb = kmalloc(sizeof(*rb), XPRTRDMA_GFP_FLAGS); 1253 if (!rb) 1254 return NULL; 1255 rb->rg_data = kmalloc(size, XPRTRDMA_GFP_FLAGS); 1256 if (!rb->rg_data) { 1257 kfree(rb); 1258 return NULL; 1259 } 1260 1261 rb->rg_device = NULL; 1262 rb->rg_direction = direction; 1263 rb->rg_iov.length = size; 1264 return rb; 1265 } 1266 1267 /** 1268 * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer 1269 * @rb: regbuf to reallocate 1270 * @size: size of buffer to be allocated, in bytes 1271 * @flags: GFP flags 1272 * 1273 * Returns true if reallocation was successful. If false is 1274 * returned, @rb is left untouched. 1275 */ 1276 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) 1277 { 1278 void *buf; 1279 1280 buf = kmalloc(size, flags); 1281 if (!buf) 1282 return false; 1283 1284 rpcrdma_regbuf_dma_unmap(rb); 1285 kfree(rb->rg_data); 1286 1287 rb->rg_data = buf; 1288 rb->rg_iov.length = size; 1289 return true; 1290 } 1291 1292 /** 1293 * __rpcrdma_regbuf_dma_map - DMA-map a regbuf 1294 * @r_xprt: controlling transport instance 1295 * @rb: regbuf to be mapped 1296 * 1297 * Returns true if the buffer is now DMA mapped to @r_xprt's device 1298 */ 1299 bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, 1300 struct rpcrdma_regbuf *rb) 1301 { 1302 struct ib_device *device = r_xprt->rx_ep->re_id->device; 1303 1304 if (rb->rg_direction == DMA_NONE) 1305 return false; 1306 1307 rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), 1308 rdmab_length(rb), rb->rg_direction); 1309 if (ib_dma_mapping_error(device, rdmab_addr(rb))) { 1310 trace_xprtrdma_dma_maperr(rdmab_addr(rb)); 1311 return false; 1312 } 1313 1314 rb->rg_device = device; 1315 rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; 1316 return true; 1317 } 1318 1319 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) 1320 { 1321 if (!rb) 1322 return; 1323 1324 if (!rpcrdma_regbuf_is_mapped(rb)) 1325 return; 1326 1327 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), 1328 rb->rg_direction); 1329 rb->rg_device = NULL; 1330 } 1331 1332 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) 1333 { 1334 rpcrdma_regbuf_dma_unmap(rb); 1335 if (rb) 1336 kfree(rb->rg_data); 1337 kfree(rb); 1338 } 1339 1340 /** 1341 * rpcrdma_post_recvs - Refill the Receive Queue 1342 * @r_xprt: controlling transport instance 1343 * @needed: current credit grant 1344 * @temp: mark Receive buffers to be deleted after one use 1345 * 1346 */ 1347 void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) 1348 { 1349 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1350 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1351 struct ib_recv_wr *wr, *bad_wr; 1352 struct rpcrdma_rep *rep; 1353 int count, rc; 1354 1355 rc = 0; 1356 count = 0; 1357 1358 if (likely(ep->re_receive_count > needed)) 1359 goto out; 1360 needed -= ep->re_receive_count; 1361 if (!temp) 1362 needed += RPCRDMA_MAX_RECV_BATCH; 1363 1364 if (atomic_inc_return(&ep->re_receiving) > 1) 1365 goto out; 1366 1367 /* fast path: all needed reps can be found on the free list */ 1368 wr = NULL; 1369 while (needed) { 1370 rep = rpcrdma_rep_get_locked(buf); 1371 if (rep && rep->rr_temp) { 1372 rpcrdma_rep_destroy(rep); 1373 continue; 1374 } 1375 if (!rep) 1376 rep = rpcrdma_rep_create(r_xprt, temp); 1377 if (!rep) 1378 break; 1379 if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) { 1380 rpcrdma_rep_put(buf, rep); 1381 break; 1382 } 1383 1384 rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; 1385 trace_xprtrdma_post_recv(rep); 1386 rep->rr_recv_wr.next = wr; 1387 wr = &rep->rr_recv_wr; 1388 --needed; 1389 ++count; 1390 } 1391 if (!wr) 1392 goto out; 1393 1394 rc = ib_post_recv(ep->re_id->qp, wr, 1395 (const struct ib_recv_wr **)&bad_wr); 1396 if (rc) { 1397 trace_xprtrdma_post_recvs_err(r_xprt, rc); 1398 for (wr = bad_wr; wr;) { 1399 struct rpcrdma_rep *rep; 1400 1401 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1402 wr = wr->next; 1403 rpcrdma_rep_put(buf, rep); 1404 --count; 1405 } 1406 } 1407 if (atomic_dec_return(&ep->re_receiving) > 0) 1408 complete(&ep->re_done); 1409 1410 out: 1411 trace_xprtrdma_post_recvs(r_xprt, count); 1412 ep->re_receive_count += count; 1413 return; 1414 } 1415