1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the BSD-type 10 * license below: 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * Neither the name of the Network Appliance, Inc. nor the names of 25 * its contributors may be used to endorse or promote products 26 * derived from this software without specific prior written 27 * permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * verbs.c 44 * 45 * Encapsulates the major functions managing: 46 * o adapters 47 * o endpoints 48 * o connections 49 * o buffer memory 50 */ 51 52 #include <linux/interrupt.h> 53 #include <linux/slab.h> 54 #include <linux/sunrpc/addr.h> 55 #include <linux/sunrpc/svc_rdma.h> 56 #include <linux/log2.h> 57 58 #include <asm-generic/barrier.h> 59 #include <asm/bitops.h> 60 61 #include <rdma/ib_cm.h> 62 63 #include "xprt_rdma.h" 64 #include <trace/events/rpcrdma.h> 65 66 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); 67 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); 68 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 69 struct rpcrdma_sendctx *sc); 70 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); 71 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 72 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); 73 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); 74 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 75 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 76 static void rpcrdma_ep_get(struct rpcrdma_ep *ep); 77 static int rpcrdma_ep_put(struct rpcrdma_ep *ep); 78 static struct rpcrdma_regbuf * 79 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 80 gfp_t flags); 81 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 82 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 83 84 /* Wait for outstanding transport work to finish. ib_drain_qp 85 * handles the drains in the wrong order for us, so open code 86 * them here. 87 */ 88 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 89 { 90 struct rpcrdma_ep *ep = r_xprt->rx_ep; 91 struct rdma_cm_id *id = ep->re_id; 92 93 /* Wait for rpcrdma_post_recvs() to leave its critical 94 * section. 95 */ 96 if (atomic_inc_return(&ep->re_receiving) > 1) 97 wait_for_completion(&ep->re_done); 98 99 /* Flush Receives, then wait for deferred Reply work 100 * to complete. 101 */ 102 ib_drain_rq(id->qp); 103 104 /* Deferred Reply processing might have scheduled 105 * local invalidations. 106 */ 107 ib_drain_sq(id->qp); 108 109 rpcrdma_ep_put(ep); 110 } 111 112 /* Ensure xprt_force_disconnect() is invoked exactly once when a 113 * connection is closed or lost. (The important thing is it needs 114 * to be invoked "at least" once). 115 */ 116 void rpcrdma_force_disconnect(struct rpcrdma_ep *ep) 117 { 118 if (atomic_add_unless(&ep->re_force_disconnect, 1, 1)) 119 xprt_force_disconnect(ep->re_xprt); 120 } 121 122 /** 123 * rpcrdma_flush_disconnect - Disconnect on flushed completion 124 * @r_xprt: transport to disconnect 125 * @wc: work completion entry 126 * 127 * Must be called in process context. 128 */ 129 void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc) 130 { 131 if (wc->status != IB_WC_SUCCESS) 132 rpcrdma_force_disconnect(r_xprt->rx_ep); 133 } 134 135 /** 136 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 137 * @cq: completion queue 138 * @wc: WCE for a completed Send WR 139 * 140 */ 141 static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 142 { 143 struct ib_cqe *cqe = wc->wr_cqe; 144 struct rpcrdma_sendctx *sc = 145 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 146 struct rpcrdma_xprt *r_xprt = cq->cq_context; 147 148 /* WARNING: Only wr_cqe and status are reliable at this point */ 149 trace_xprtrdma_wc_send(wc, &sc->sc_cid); 150 rpcrdma_sendctx_put_locked(r_xprt, sc); 151 rpcrdma_flush_disconnect(r_xprt, wc); 152 } 153 154 /** 155 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 156 * @cq: completion queue 157 * @wc: WCE for a completed Receive WR 158 * 159 */ 160 static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 161 { 162 struct ib_cqe *cqe = wc->wr_cqe; 163 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, 164 rr_cqe); 165 struct rpcrdma_xprt *r_xprt = cq->cq_context; 166 167 /* WARNING: Only wr_cqe and status are reliable at this point */ 168 trace_xprtrdma_wc_receive(wc, &rep->rr_cid); 169 --r_xprt->rx_ep->re_receive_count; 170 if (wc->status != IB_WC_SUCCESS) 171 goto out_flushed; 172 173 /* status == SUCCESS means all fields in wc are trustworthy */ 174 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 175 rep->rr_wc_flags = wc->wc_flags; 176 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 177 178 ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), 179 rdmab_addr(rep->rr_rdmabuf), 180 wc->byte_len, DMA_FROM_DEVICE); 181 182 rpcrdma_reply_handler(rep); 183 return; 184 185 out_flushed: 186 rpcrdma_flush_disconnect(r_xprt, wc); 187 rpcrdma_rep_put(&r_xprt->rx_buf, rep); 188 } 189 190 static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, 191 struct rdma_conn_param *param) 192 { 193 const struct rpcrdma_connect_private *pmsg = param->private_data; 194 unsigned int rsize, wsize; 195 196 /* Default settings for RPC-over-RDMA Version One */ 197 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 198 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 199 200 if (pmsg && 201 pmsg->cp_magic == rpcrdma_cmp_magic && 202 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 203 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 204 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 205 } 206 207 if (rsize < ep->re_inline_recv) 208 ep->re_inline_recv = rsize; 209 if (wsize < ep->re_inline_send) 210 ep->re_inline_send = wsize; 211 212 rpcrdma_set_max_header_sizes(ep); 213 } 214 215 /** 216 * rpcrdma_cm_event_handler - Handle RDMA CM events 217 * @id: rdma_cm_id on which an event has occurred 218 * @event: details of the event 219 * 220 * Called with @id's mutex held. Returns 1 if caller should 221 * destroy @id, otherwise 0. 222 */ 223 static int 224 rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 225 { 226 struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; 227 struct rpcrdma_ep *ep = id->context; 228 229 might_sleep(); 230 231 switch (event->event) { 232 case RDMA_CM_EVENT_ADDR_RESOLVED: 233 case RDMA_CM_EVENT_ROUTE_RESOLVED: 234 ep->re_async_rc = 0; 235 complete(&ep->re_done); 236 return 0; 237 case RDMA_CM_EVENT_ADDR_ERROR: 238 ep->re_async_rc = -EPROTO; 239 complete(&ep->re_done); 240 return 0; 241 case RDMA_CM_EVENT_ROUTE_ERROR: 242 ep->re_async_rc = -ENETUNREACH; 243 complete(&ep->re_done); 244 return 0; 245 case RDMA_CM_EVENT_DEVICE_REMOVAL: 246 pr_info("rpcrdma: removing device %s for %pISpc\n", 247 ep->re_id->device->name, sap); 248 fallthrough; 249 case RDMA_CM_EVENT_ADDR_CHANGE: 250 ep->re_connect_status = -ENODEV; 251 goto disconnected; 252 case RDMA_CM_EVENT_ESTABLISHED: 253 rpcrdma_ep_get(ep); 254 ep->re_connect_status = 1; 255 rpcrdma_update_cm_private(ep, &event->param.conn); 256 trace_xprtrdma_inline_thresh(ep); 257 wake_up_all(&ep->re_connect_wait); 258 break; 259 case RDMA_CM_EVENT_CONNECT_ERROR: 260 ep->re_connect_status = -ENOTCONN; 261 goto wake_connect_worker; 262 case RDMA_CM_EVENT_UNREACHABLE: 263 ep->re_connect_status = -ENETUNREACH; 264 goto wake_connect_worker; 265 case RDMA_CM_EVENT_REJECTED: 266 ep->re_connect_status = -ECONNREFUSED; 267 if (event->status == IB_CM_REJ_STALE_CONN) 268 ep->re_connect_status = -ENOTCONN; 269 wake_connect_worker: 270 wake_up_all(&ep->re_connect_wait); 271 return 0; 272 case RDMA_CM_EVENT_DISCONNECTED: 273 ep->re_connect_status = -ECONNABORTED; 274 disconnected: 275 rpcrdma_force_disconnect(ep); 276 return rpcrdma_ep_put(ep); 277 default: 278 break; 279 } 280 281 return 0; 282 } 283 284 static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, 285 struct rpcrdma_ep *ep) 286 { 287 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 288 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 289 struct rdma_cm_id *id; 290 int rc; 291 292 init_completion(&ep->re_done); 293 294 id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, 295 RDMA_PS_TCP, IB_QPT_RC); 296 if (IS_ERR(id)) 297 return id; 298 299 ep->re_async_rc = -ETIMEDOUT; 300 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, 301 RDMA_RESOLVE_TIMEOUT); 302 if (rc) 303 goto out; 304 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 305 if (rc < 0) 306 goto out; 307 308 rc = ep->re_async_rc; 309 if (rc) 310 goto out; 311 312 ep->re_async_rc = -ETIMEDOUT; 313 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 314 if (rc) 315 goto out; 316 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 317 if (rc < 0) 318 goto out; 319 rc = ep->re_async_rc; 320 if (rc) 321 goto out; 322 323 return id; 324 325 out: 326 rdma_destroy_id(id); 327 return ERR_PTR(rc); 328 } 329 330 static void rpcrdma_ep_destroy(struct kref *kref) 331 { 332 struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); 333 334 if (ep->re_id->qp) { 335 rdma_destroy_qp(ep->re_id); 336 ep->re_id->qp = NULL; 337 } 338 339 if (ep->re_attr.recv_cq) 340 ib_free_cq(ep->re_attr.recv_cq); 341 ep->re_attr.recv_cq = NULL; 342 if (ep->re_attr.send_cq) 343 ib_free_cq(ep->re_attr.send_cq); 344 ep->re_attr.send_cq = NULL; 345 346 if (ep->re_pd) 347 ib_dealloc_pd(ep->re_pd); 348 ep->re_pd = NULL; 349 350 kfree(ep); 351 module_put(THIS_MODULE); 352 } 353 354 static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep) 355 { 356 kref_get(&ep->re_kref); 357 } 358 359 /* Returns: 360 * %0 if @ep still has a positive kref count, or 361 * %1 if @ep was destroyed successfully. 362 */ 363 static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep) 364 { 365 return kref_put(&ep->re_kref, rpcrdma_ep_destroy); 366 } 367 368 static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) 369 { 370 struct rpcrdma_connect_private *pmsg; 371 struct ib_device *device; 372 struct rdma_cm_id *id; 373 struct rpcrdma_ep *ep; 374 int rc; 375 376 ep = kzalloc(sizeof(*ep), GFP_NOFS); 377 if (!ep) 378 return -ENOTCONN; 379 ep->re_xprt = &r_xprt->rx_xprt; 380 kref_init(&ep->re_kref); 381 382 id = rpcrdma_create_id(r_xprt, ep); 383 if (IS_ERR(id)) { 384 kfree(ep); 385 return PTR_ERR(id); 386 } 387 __module_get(THIS_MODULE); 388 device = id->device; 389 ep->re_id = id; 390 reinit_completion(&ep->re_done); 391 392 ep->re_max_requests = r_xprt->rx_xprt.max_reqs; 393 ep->re_inline_send = xprt_rdma_max_inline_write; 394 ep->re_inline_recv = xprt_rdma_max_inline_read; 395 rc = frwr_query_device(ep, device); 396 if (rc) 397 goto out_destroy; 398 399 r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); 400 401 ep->re_attr.srq = NULL; 402 ep->re_attr.cap.max_inline_data = 0; 403 ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 404 ep->re_attr.qp_type = IB_QPT_RC; 405 ep->re_attr.port_num = ~0; 406 407 ep->re_send_batch = ep->re_max_requests >> 3; 408 ep->re_send_count = ep->re_send_batch; 409 init_waitqueue_head(&ep->re_connect_wait); 410 411 ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, 412 ep->re_attr.cap.max_send_wr, 413 IB_POLL_WORKQUEUE); 414 if (IS_ERR(ep->re_attr.send_cq)) { 415 rc = PTR_ERR(ep->re_attr.send_cq); 416 goto out_destroy; 417 } 418 419 ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, 420 ep->re_attr.cap.max_recv_wr, 421 IB_POLL_WORKQUEUE); 422 if (IS_ERR(ep->re_attr.recv_cq)) { 423 rc = PTR_ERR(ep->re_attr.recv_cq); 424 goto out_destroy; 425 } 426 ep->re_receive_count = 0; 427 428 /* Initialize cma parameters */ 429 memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); 430 431 /* Prepare RDMA-CM private message */ 432 pmsg = &ep->re_cm_private; 433 pmsg->cp_magic = rpcrdma_cmp_magic; 434 pmsg->cp_version = RPCRDMA_CMP_VERSION; 435 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; 436 pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); 437 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); 438 ep->re_remote_cma.private_data = pmsg; 439 ep->re_remote_cma.private_data_len = sizeof(*pmsg); 440 441 /* Client offers RDMA Read but does not initiate */ 442 ep->re_remote_cma.initiator_depth = 0; 443 ep->re_remote_cma.responder_resources = 444 min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); 445 446 /* Limit transport retries so client can detect server 447 * GID changes quickly. RPC layer handles re-establishing 448 * transport connection and retransmission. 449 */ 450 ep->re_remote_cma.retry_count = 6; 451 452 /* RPC-over-RDMA handles its own flow control. In addition, 453 * make all RNR NAKs visible so we know that RPC-over-RDMA 454 * flow control is working correctly (no NAKs should be seen). 455 */ 456 ep->re_remote_cma.flow_control = 0; 457 ep->re_remote_cma.rnr_retry_count = 0; 458 459 ep->re_pd = ib_alloc_pd(device, 0); 460 if (IS_ERR(ep->re_pd)) { 461 rc = PTR_ERR(ep->re_pd); 462 goto out_destroy; 463 } 464 465 rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); 466 if (rc) 467 goto out_destroy; 468 469 r_xprt->rx_ep = ep; 470 return 0; 471 472 out_destroy: 473 rpcrdma_ep_put(ep); 474 rdma_destroy_id(id); 475 return rc; 476 } 477 478 /** 479 * rpcrdma_xprt_connect - Connect an unconnected transport 480 * @r_xprt: controlling transport instance 481 * 482 * Returns 0 on success or a negative errno. 483 */ 484 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) 485 { 486 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 487 struct rpcrdma_ep *ep; 488 int rc; 489 490 rc = rpcrdma_ep_create(r_xprt); 491 if (rc) 492 return rc; 493 ep = r_xprt->rx_ep; 494 495 xprt_clear_connected(xprt); 496 rpcrdma_reset_cwnd(r_xprt); 497 498 /* Bump the ep's reference count while there are 499 * outstanding Receives. 500 */ 501 rpcrdma_ep_get(ep); 502 rpcrdma_post_recvs(r_xprt, 1, true); 503 504 rc = rdma_connect(ep->re_id, &ep->re_remote_cma); 505 if (rc) 506 goto out; 507 508 if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 509 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 510 wait_event_interruptible(ep->re_connect_wait, 511 ep->re_connect_status != 0); 512 if (ep->re_connect_status <= 0) { 513 rc = ep->re_connect_status; 514 goto out; 515 } 516 517 rc = rpcrdma_sendctxs_create(r_xprt); 518 if (rc) { 519 rc = -ENOTCONN; 520 goto out; 521 } 522 523 rc = rpcrdma_reqs_setup(r_xprt); 524 if (rc) { 525 rc = -ENOTCONN; 526 goto out; 527 } 528 rpcrdma_mrs_create(r_xprt); 529 frwr_wp_create(r_xprt); 530 531 out: 532 trace_xprtrdma_connect(r_xprt, rc); 533 return rc; 534 } 535 536 /** 537 * rpcrdma_xprt_disconnect - Disconnect underlying transport 538 * @r_xprt: controlling transport instance 539 * 540 * Caller serializes. Either the transport send lock is held, 541 * or we're being called to destroy the transport. 542 * 543 * On return, @r_xprt is completely divested of all hardware 544 * resources and prepared for the next ->connect operation. 545 */ 546 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) 547 { 548 struct rpcrdma_ep *ep = r_xprt->rx_ep; 549 struct rdma_cm_id *id; 550 int rc; 551 552 if (!ep) 553 return; 554 555 id = ep->re_id; 556 rc = rdma_disconnect(id); 557 trace_xprtrdma_disconnect(r_xprt, rc); 558 559 rpcrdma_xprt_drain(r_xprt); 560 rpcrdma_reps_unmap(r_xprt); 561 rpcrdma_reqs_reset(r_xprt); 562 rpcrdma_mrs_destroy(r_xprt); 563 rpcrdma_sendctxs_destroy(r_xprt); 564 565 if (rpcrdma_ep_put(ep)) 566 rdma_destroy_id(id); 567 568 r_xprt->rx_ep = NULL; 569 } 570 571 /* Fixed-size circular FIFO queue. This implementation is wait-free and 572 * lock-free. 573 * 574 * Consumer is the code path that posts Sends. This path dequeues a 575 * sendctx for use by a Send operation. Multiple consumer threads 576 * are serialized by the RPC transport lock, which allows only one 577 * ->send_request call at a time. 578 * 579 * Producer is the code path that handles Send completions. This path 580 * enqueues a sendctx that has been completed. Multiple producer 581 * threads are serialized by the ib_poll_cq() function. 582 */ 583 584 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced 585 * queue activity, and rpcrdma_xprt_drain has flushed all remaining 586 * Send requests. 587 */ 588 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) 589 { 590 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 591 unsigned long i; 592 593 if (!buf->rb_sc_ctxs) 594 return; 595 for (i = 0; i <= buf->rb_sc_last; i++) 596 kfree(buf->rb_sc_ctxs[i]); 597 kfree(buf->rb_sc_ctxs); 598 buf->rb_sc_ctxs = NULL; 599 } 600 601 static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) 602 { 603 struct rpcrdma_sendctx *sc; 604 605 sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), 606 GFP_KERNEL); 607 if (!sc) 608 return NULL; 609 610 sc->sc_cqe.done = rpcrdma_wc_send; 611 sc->sc_cid.ci_queue_id = ep->re_attr.send_cq->res.id; 612 sc->sc_cid.ci_completion_id = 613 atomic_inc_return(&ep->re_completion_ids); 614 return sc; 615 } 616 617 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) 618 { 619 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 620 struct rpcrdma_sendctx *sc; 621 unsigned long i; 622 623 /* Maximum number of concurrent outstanding Send WRs. Capping 624 * the circular queue size stops Send Queue overflow by causing 625 * the ->send_request call to fail temporarily before too many 626 * Sends are posted. 627 */ 628 i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; 629 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); 630 if (!buf->rb_sc_ctxs) 631 return -ENOMEM; 632 633 buf->rb_sc_last = i - 1; 634 for (i = 0; i <= buf->rb_sc_last; i++) { 635 sc = rpcrdma_sendctx_create(r_xprt->rx_ep); 636 if (!sc) 637 return -ENOMEM; 638 639 buf->rb_sc_ctxs[i] = sc; 640 } 641 642 buf->rb_sc_head = 0; 643 buf->rb_sc_tail = 0; 644 return 0; 645 } 646 647 /* The sendctx queue is not guaranteed to have a size that is a 648 * power of two, thus the helpers in circ_buf.h cannot be used. 649 * The other option is to use modulus (%), which can be expensive. 650 */ 651 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, 652 unsigned long item) 653 { 654 return likely(item < buf->rb_sc_last) ? item + 1 : 0; 655 } 656 657 /** 658 * rpcrdma_sendctx_get_locked - Acquire a send context 659 * @r_xprt: controlling transport instance 660 * 661 * Returns pointer to a free send completion context; or NULL if 662 * the queue is empty. 663 * 664 * Usage: Called to acquire an SGE array before preparing a Send WR. 665 * 666 * The caller serializes calls to this function (per transport), and 667 * provides an effective memory barrier that flushes the new value 668 * of rb_sc_head. 669 */ 670 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) 671 { 672 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 673 struct rpcrdma_sendctx *sc; 674 unsigned long next_head; 675 676 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); 677 678 if (next_head == READ_ONCE(buf->rb_sc_tail)) 679 goto out_emptyq; 680 681 /* ORDER: item must be accessed _before_ head is updated */ 682 sc = buf->rb_sc_ctxs[next_head]; 683 684 /* Releasing the lock in the caller acts as a memory 685 * barrier that flushes rb_sc_head. 686 */ 687 buf->rb_sc_head = next_head; 688 689 return sc; 690 691 out_emptyq: 692 /* The queue is "empty" if there have not been enough Send 693 * completions recently. This is a sign the Send Queue is 694 * backing up. Cause the caller to pause and try again. 695 */ 696 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 697 r_xprt->rx_stats.empty_sendctx_q++; 698 return NULL; 699 } 700 701 /** 702 * rpcrdma_sendctx_put_locked - Release a send context 703 * @r_xprt: controlling transport instance 704 * @sc: send context to release 705 * 706 * Usage: Called from Send completion to return a sendctxt 707 * to the queue. 708 * 709 * The caller serializes calls to this function (per transport). 710 */ 711 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 712 struct rpcrdma_sendctx *sc) 713 { 714 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 715 unsigned long next_tail; 716 717 /* Unmap SGEs of previously completed but unsignaled 718 * Sends by walking up the queue until @sc is found. 719 */ 720 next_tail = buf->rb_sc_tail; 721 do { 722 next_tail = rpcrdma_sendctx_next(buf, next_tail); 723 724 /* ORDER: item must be accessed _before_ tail is updated */ 725 rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); 726 727 } while (buf->rb_sc_ctxs[next_tail] != sc); 728 729 /* Paired with READ_ONCE */ 730 smp_store_release(&buf->rb_sc_tail, next_tail); 731 732 xprt_write_space(&r_xprt->rx_xprt); 733 } 734 735 static void 736 rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) 737 { 738 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 739 struct rpcrdma_ep *ep = r_xprt->rx_ep; 740 unsigned int count; 741 742 for (count = 0; count < ep->re_max_rdma_segs; count++) { 743 struct rpcrdma_mr *mr; 744 int rc; 745 746 mr = kzalloc(sizeof(*mr), GFP_NOFS); 747 if (!mr) 748 break; 749 750 rc = frwr_mr_init(r_xprt, mr); 751 if (rc) { 752 kfree(mr); 753 break; 754 } 755 756 spin_lock(&buf->rb_lock); 757 rpcrdma_mr_push(mr, &buf->rb_mrs); 758 list_add(&mr->mr_all, &buf->rb_all_mrs); 759 spin_unlock(&buf->rb_lock); 760 } 761 762 r_xprt->rx_stats.mrs_allocated += count; 763 trace_xprtrdma_createmrs(r_xprt, count); 764 } 765 766 static void 767 rpcrdma_mr_refresh_worker(struct work_struct *work) 768 { 769 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 770 rb_refresh_worker); 771 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 772 rx_buf); 773 774 rpcrdma_mrs_create(r_xprt); 775 xprt_write_space(&r_xprt->rx_xprt); 776 } 777 778 /** 779 * rpcrdma_mrs_refresh - Wake the MR refresh worker 780 * @r_xprt: controlling transport instance 781 * 782 */ 783 void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 784 { 785 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 786 struct rpcrdma_ep *ep = r_xprt->rx_ep; 787 788 /* If there is no underlying connection, it's no use 789 * to wake the refresh worker. 790 */ 791 if (ep->re_connect_status == 1) { 792 /* The work is scheduled on a WQ_MEM_RECLAIM 793 * workqueue in order to prevent MR allocation 794 * from recursing into NFS during direct reclaim. 795 */ 796 queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); 797 } 798 } 799 800 /** 801 * rpcrdma_req_create - Allocate an rpcrdma_req object 802 * @r_xprt: controlling r_xprt 803 * @size: initial size, in bytes, of send and receive buffers 804 * @flags: GFP flags passed to memory allocators 805 * 806 * Returns an allocated and fully initialized rpcrdma_req or NULL. 807 */ 808 struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, 809 gfp_t flags) 810 { 811 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 812 struct rpcrdma_req *req; 813 814 req = kzalloc(sizeof(*req), flags); 815 if (req == NULL) 816 goto out1; 817 818 req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); 819 if (!req->rl_sendbuf) 820 goto out2; 821 822 req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); 823 if (!req->rl_recvbuf) 824 goto out3; 825 826 INIT_LIST_HEAD(&req->rl_free_mrs); 827 INIT_LIST_HEAD(&req->rl_registered); 828 spin_lock(&buffer->rb_lock); 829 list_add(&req->rl_all, &buffer->rb_allreqs); 830 spin_unlock(&buffer->rb_lock); 831 return req; 832 833 out3: 834 kfree(req->rl_sendbuf); 835 out2: 836 kfree(req); 837 out1: 838 return NULL; 839 } 840 841 /** 842 * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object 843 * @r_xprt: controlling transport instance 844 * @req: rpcrdma_req object to set up 845 * 846 * Returns zero on success, and a negative errno on failure. 847 */ 848 int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 849 { 850 struct rpcrdma_regbuf *rb; 851 size_t maxhdrsize; 852 853 /* Compute maximum header buffer size in bytes */ 854 maxhdrsize = rpcrdma_fixed_maxsz + 3 + 855 r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; 856 maxhdrsize *= sizeof(__be32); 857 rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), 858 DMA_TO_DEVICE, GFP_KERNEL); 859 if (!rb) 860 goto out; 861 862 if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) 863 goto out_free; 864 865 req->rl_rdmabuf = rb; 866 xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); 867 return 0; 868 869 out_free: 870 rpcrdma_regbuf_free(rb); 871 out: 872 return -ENOMEM; 873 } 874 875 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 876 * and thus can be walked without holding rb_lock. Eg. the 877 * caller is holding the transport send lock to exclude 878 * device removal or disconnection. 879 */ 880 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) 881 { 882 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 883 struct rpcrdma_req *req; 884 int rc; 885 886 list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 887 rc = rpcrdma_req_setup(r_xprt, req); 888 if (rc) 889 return rc; 890 } 891 return 0; 892 } 893 894 static void rpcrdma_req_reset(struct rpcrdma_req *req) 895 { 896 /* Credits are valid for only one connection */ 897 req->rl_slot.rq_cong = 0; 898 899 rpcrdma_regbuf_free(req->rl_rdmabuf); 900 req->rl_rdmabuf = NULL; 901 902 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 903 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 904 905 frwr_reset(req); 906 } 907 908 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 909 * and thus can be walked without holding rb_lock. Eg. the 910 * caller is holding the transport send lock to exclude 911 * device removal or disconnection. 912 */ 913 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 914 { 915 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 916 struct rpcrdma_req *req; 917 918 list_for_each_entry(req, &buf->rb_allreqs, rl_all) 919 rpcrdma_req_reset(req); 920 } 921 922 static noinline 923 struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 924 bool temp) 925 { 926 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 927 struct rpcrdma_rep *rep; 928 929 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 930 if (rep == NULL) 931 goto out; 932 933 rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, 934 DMA_FROM_DEVICE, GFP_KERNEL); 935 if (!rep->rr_rdmabuf) 936 goto out_free; 937 938 if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) 939 goto out_free_regbuf; 940 941 rep->rr_cid.ci_completion_id = 942 atomic_inc_return(&r_xprt->rx_ep->re_completion_ids); 943 944 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), 945 rdmab_length(rep->rr_rdmabuf)); 946 rep->rr_cqe.done = rpcrdma_wc_receive; 947 rep->rr_rxprt = r_xprt; 948 rep->rr_recv_wr.next = NULL; 949 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 950 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 951 rep->rr_recv_wr.num_sge = 1; 952 rep->rr_temp = temp; 953 954 spin_lock(&buf->rb_lock); 955 list_add(&rep->rr_all, &buf->rb_all_reps); 956 spin_unlock(&buf->rb_lock); 957 return rep; 958 959 out_free_regbuf: 960 rpcrdma_regbuf_free(rep->rr_rdmabuf); 961 out_free: 962 kfree(rep); 963 out: 964 return NULL; 965 } 966 967 static void rpcrdma_rep_free(struct rpcrdma_rep *rep) 968 { 969 rpcrdma_regbuf_free(rep->rr_rdmabuf); 970 kfree(rep); 971 } 972 973 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) 974 { 975 struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf; 976 977 spin_lock(&buf->rb_lock); 978 list_del(&rep->rr_all); 979 spin_unlock(&buf->rb_lock); 980 981 rpcrdma_rep_free(rep); 982 } 983 984 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) 985 { 986 struct llist_node *node; 987 988 /* Calls to llist_del_first are required to be serialized */ 989 node = llist_del_first(&buf->rb_free_reps); 990 if (!node) 991 return NULL; 992 return llist_entry(node, struct rpcrdma_rep, rr_node); 993 } 994 995 /** 996 * rpcrdma_rep_put - Release rpcrdma_rep back to free list 997 * @buf: buffer pool 998 * @rep: rep to release 999 * 1000 */ 1001 void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep) 1002 { 1003 llist_add(&rep->rr_node, &buf->rb_free_reps); 1004 } 1005 1006 /* Caller must ensure the QP is quiescent (RQ is drained) before 1007 * invoking this function, to guarantee rb_all_reps is not 1008 * changing. 1009 */ 1010 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) 1011 { 1012 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1013 struct rpcrdma_rep *rep; 1014 1015 list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { 1016 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); 1017 rep->rr_temp = true; /* Mark this rep for destruction */ 1018 } 1019 } 1020 1021 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) 1022 { 1023 struct rpcrdma_rep *rep; 1024 1025 spin_lock(&buf->rb_lock); 1026 while ((rep = list_first_entry_or_null(&buf->rb_all_reps, 1027 struct rpcrdma_rep, 1028 rr_all)) != NULL) { 1029 list_del(&rep->rr_all); 1030 spin_unlock(&buf->rb_lock); 1031 1032 rpcrdma_rep_free(rep); 1033 1034 spin_lock(&buf->rb_lock); 1035 } 1036 spin_unlock(&buf->rb_lock); 1037 } 1038 1039 /** 1040 * rpcrdma_buffer_create - Create initial set of req/rep objects 1041 * @r_xprt: transport instance to (re)initialize 1042 * 1043 * Returns zero on success, otherwise a negative errno. 1044 */ 1045 int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1046 { 1047 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1048 int i, rc; 1049 1050 buf->rb_bc_srv_max_requests = 0; 1051 spin_lock_init(&buf->rb_lock); 1052 INIT_LIST_HEAD(&buf->rb_mrs); 1053 INIT_LIST_HEAD(&buf->rb_all_mrs); 1054 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1055 1056 INIT_LIST_HEAD(&buf->rb_send_bufs); 1057 INIT_LIST_HEAD(&buf->rb_allreqs); 1058 INIT_LIST_HEAD(&buf->rb_all_reps); 1059 1060 rc = -ENOMEM; 1061 for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { 1062 struct rpcrdma_req *req; 1063 1064 req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, 1065 GFP_KERNEL); 1066 if (!req) 1067 goto out; 1068 list_add(&req->rl_list, &buf->rb_send_bufs); 1069 } 1070 1071 init_llist_head(&buf->rb_free_reps); 1072 1073 return 0; 1074 out: 1075 rpcrdma_buffer_destroy(buf); 1076 return rc; 1077 } 1078 1079 /** 1080 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1081 * @req: unused object to be destroyed 1082 * 1083 * Relies on caller holding the transport send lock to protect 1084 * removing req->rl_all from buf->rb_all_reqs safely. 1085 */ 1086 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1087 { 1088 struct rpcrdma_mr *mr; 1089 1090 list_del(&req->rl_all); 1091 1092 while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1093 struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1094 1095 spin_lock(&buf->rb_lock); 1096 list_del(&mr->mr_all); 1097 spin_unlock(&buf->rb_lock); 1098 1099 frwr_mr_release(mr); 1100 } 1101 1102 rpcrdma_regbuf_free(req->rl_recvbuf); 1103 rpcrdma_regbuf_free(req->rl_sendbuf); 1104 rpcrdma_regbuf_free(req->rl_rdmabuf); 1105 kfree(req); 1106 } 1107 1108 /** 1109 * rpcrdma_mrs_destroy - Release all of a transport's MRs 1110 * @r_xprt: controlling transport instance 1111 * 1112 * Relies on caller holding the transport send lock to protect 1113 * removing mr->mr_list from req->rl_free_mrs safely. 1114 */ 1115 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1116 { 1117 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1118 struct rpcrdma_mr *mr; 1119 1120 cancel_work_sync(&buf->rb_refresh_worker); 1121 1122 spin_lock(&buf->rb_lock); 1123 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1124 struct rpcrdma_mr, 1125 mr_all)) != NULL) { 1126 list_del(&mr->mr_list); 1127 list_del(&mr->mr_all); 1128 spin_unlock(&buf->rb_lock); 1129 1130 frwr_mr_release(mr); 1131 1132 spin_lock(&buf->rb_lock); 1133 } 1134 spin_unlock(&buf->rb_lock); 1135 } 1136 1137 /** 1138 * rpcrdma_buffer_destroy - Release all hw resources 1139 * @buf: root control block for resources 1140 * 1141 * ORDERING: relies on a prior rpcrdma_xprt_drain : 1142 * - No more Send or Receive completions can occur 1143 * - All MRs, reps, and reqs are returned to their free lists 1144 */ 1145 void 1146 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1147 { 1148 rpcrdma_reps_destroy(buf); 1149 1150 while (!list_empty(&buf->rb_send_bufs)) { 1151 struct rpcrdma_req *req; 1152 1153 req = list_first_entry(&buf->rb_send_bufs, 1154 struct rpcrdma_req, rl_list); 1155 list_del(&req->rl_list); 1156 rpcrdma_req_destroy(req); 1157 } 1158 } 1159 1160 /** 1161 * rpcrdma_mr_get - Allocate an rpcrdma_mr object 1162 * @r_xprt: controlling transport 1163 * 1164 * Returns an initialized rpcrdma_mr or NULL if no free 1165 * rpcrdma_mr objects are available. 1166 */ 1167 struct rpcrdma_mr * 1168 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) 1169 { 1170 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1171 struct rpcrdma_mr *mr; 1172 1173 spin_lock(&buf->rb_lock); 1174 mr = rpcrdma_mr_pop(&buf->rb_mrs); 1175 spin_unlock(&buf->rb_lock); 1176 return mr; 1177 } 1178 1179 /** 1180 * rpcrdma_reply_put - Put reply buffers back into pool 1181 * @buffers: buffer pool 1182 * @req: object to return 1183 * 1184 */ 1185 void rpcrdma_reply_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1186 { 1187 if (req->rl_reply) { 1188 rpcrdma_rep_put(buffers, req->rl_reply); 1189 req->rl_reply = NULL; 1190 } 1191 } 1192 1193 /** 1194 * rpcrdma_buffer_get - Get a request buffer 1195 * @buffers: Buffer pool from which to obtain a buffer 1196 * 1197 * Returns a fresh rpcrdma_req, or NULL if none are available. 1198 */ 1199 struct rpcrdma_req * 1200 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1201 { 1202 struct rpcrdma_req *req; 1203 1204 spin_lock(&buffers->rb_lock); 1205 req = list_first_entry_or_null(&buffers->rb_send_bufs, 1206 struct rpcrdma_req, rl_list); 1207 if (req) 1208 list_del_init(&req->rl_list); 1209 spin_unlock(&buffers->rb_lock); 1210 return req; 1211 } 1212 1213 /** 1214 * rpcrdma_buffer_put - Put request/reply buffers back into pool 1215 * @buffers: buffer pool 1216 * @req: object to return 1217 * 1218 */ 1219 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1220 { 1221 rpcrdma_reply_put(buffers, req); 1222 1223 spin_lock(&buffers->rb_lock); 1224 list_add(&req->rl_list, &buffers->rb_send_bufs); 1225 spin_unlock(&buffers->rb_lock); 1226 } 1227 1228 /* Returns a pointer to a rpcrdma_regbuf object, or NULL. 1229 * 1230 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1231 * receiving the payload of RDMA RECV operations. During Long Calls 1232 * or Replies they may be registered externally via frwr_map. 1233 */ 1234 static struct rpcrdma_regbuf * 1235 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 1236 gfp_t flags) 1237 { 1238 struct rpcrdma_regbuf *rb; 1239 1240 rb = kmalloc(sizeof(*rb), flags); 1241 if (!rb) 1242 return NULL; 1243 rb->rg_data = kmalloc(size, flags); 1244 if (!rb->rg_data) { 1245 kfree(rb); 1246 return NULL; 1247 } 1248 1249 rb->rg_device = NULL; 1250 rb->rg_direction = direction; 1251 rb->rg_iov.length = size; 1252 return rb; 1253 } 1254 1255 /** 1256 * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer 1257 * @rb: regbuf to reallocate 1258 * @size: size of buffer to be allocated, in bytes 1259 * @flags: GFP flags 1260 * 1261 * Returns true if reallocation was successful. If false is 1262 * returned, @rb is left untouched. 1263 */ 1264 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) 1265 { 1266 void *buf; 1267 1268 buf = kmalloc(size, flags); 1269 if (!buf) 1270 return false; 1271 1272 rpcrdma_regbuf_dma_unmap(rb); 1273 kfree(rb->rg_data); 1274 1275 rb->rg_data = buf; 1276 rb->rg_iov.length = size; 1277 return true; 1278 } 1279 1280 /** 1281 * __rpcrdma_regbuf_dma_map - DMA-map a regbuf 1282 * @r_xprt: controlling transport instance 1283 * @rb: regbuf to be mapped 1284 * 1285 * Returns true if the buffer is now DMA mapped to @r_xprt's device 1286 */ 1287 bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, 1288 struct rpcrdma_regbuf *rb) 1289 { 1290 struct ib_device *device = r_xprt->rx_ep->re_id->device; 1291 1292 if (rb->rg_direction == DMA_NONE) 1293 return false; 1294 1295 rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), 1296 rdmab_length(rb), rb->rg_direction); 1297 if (ib_dma_mapping_error(device, rdmab_addr(rb))) { 1298 trace_xprtrdma_dma_maperr(rdmab_addr(rb)); 1299 return false; 1300 } 1301 1302 rb->rg_device = device; 1303 rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; 1304 return true; 1305 } 1306 1307 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) 1308 { 1309 if (!rb) 1310 return; 1311 1312 if (!rpcrdma_regbuf_is_mapped(rb)) 1313 return; 1314 1315 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), 1316 rb->rg_direction); 1317 rb->rg_device = NULL; 1318 } 1319 1320 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) 1321 { 1322 rpcrdma_regbuf_dma_unmap(rb); 1323 if (rb) 1324 kfree(rb->rg_data); 1325 kfree(rb); 1326 } 1327 1328 /** 1329 * rpcrdma_post_recvs - Refill the Receive Queue 1330 * @r_xprt: controlling transport instance 1331 * @needed: current credit grant 1332 * @temp: mark Receive buffers to be deleted after one use 1333 * 1334 */ 1335 void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp) 1336 { 1337 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1338 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1339 struct ib_recv_wr *wr, *bad_wr; 1340 struct rpcrdma_rep *rep; 1341 int count, rc; 1342 1343 rc = 0; 1344 count = 0; 1345 1346 if (likely(ep->re_receive_count > needed)) 1347 goto out; 1348 needed -= ep->re_receive_count; 1349 if (!temp) 1350 needed += RPCRDMA_MAX_RECV_BATCH; 1351 1352 if (atomic_inc_return(&ep->re_receiving) > 1) 1353 goto out; 1354 1355 /* fast path: all needed reps can be found on the free list */ 1356 wr = NULL; 1357 while (needed) { 1358 rep = rpcrdma_rep_get_locked(buf); 1359 if (rep && rep->rr_temp) { 1360 rpcrdma_rep_destroy(rep); 1361 continue; 1362 } 1363 if (!rep) 1364 rep = rpcrdma_rep_create(r_xprt, temp); 1365 if (!rep) 1366 break; 1367 1368 rep->rr_cid.ci_queue_id = ep->re_attr.recv_cq->res.id; 1369 trace_xprtrdma_post_recv(rep); 1370 rep->rr_recv_wr.next = wr; 1371 wr = &rep->rr_recv_wr; 1372 --needed; 1373 ++count; 1374 } 1375 if (!wr) 1376 goto out; 1377 1378 rc = ib_post_recv(ep->re_id->qp, wr, 1379 (const struct ib_recv_wr **)&bad_wr); 1380 if (rc) { 1381 trace_xprtrdma_post_recvs_err(r_xprt, rc); 1382 for (wr = bad_wr; wr;) { 1383 struct rpcrdma_rep *rep; 1384 1385 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1386 wr = wr->next; 1387 rpcrdma_rep_put(buf, rep); 1388 --count; 1389 } 1390 } 1391 if (atomic_dec_return(&ep->re_receiving) > 0) 1392 complete(&ep->re_done); 1393 1394 out: 1395 trace_xprtrdma_post_recvs(r_xprt, count); 1396 ep->re_receive_count += count; 1397 return; 1398 } 1399