1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause 2 /* 3 * Copyright (c) 2014-2017 Oracle. All rights reserved. 4 * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. 5 * 6 * This software is available to you under a choice of one of two 7 * licenses. You may choose to be licensed under the terms of the GNU 8 * General Public License (GPL) Version 2, available from the file 9 * COPYING in the main directory of this source tree, or the BSD-type 10 * license below: 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 16 * Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 19 * Redistributions in binary form must reproduce the above 20 * copyright notice, this list of conditions and the following 21 * disclaimer in the documentation and/or other materials provided 22 * with the distribution. 23 * 24 * Neither the name of the Network Appliance, Inc. nor the names of 25 * its contributors may be used to endorse or promote products 26 * derived from this software without specific prior written 27 * permission. 28 * 29 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 30 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 31 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 32 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 33 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 34 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 35 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 36 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 37 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 38 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 39 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 40 */ 41 42 /* 43 * verbs.c 44 * 45 * Encapsulates the major functions managing: 46 * o adapters 47 * o endpoints 48 * o connections 49 * o buffer memory 50 */ 51 52 #include <linux/interrupt.h> 53 #include <linux/slab.h> 54 #include <linux/sunrpc/addr.h> 55 #include <linux/sunrpc/svc_rdma.h> 56 #include <linux/log2.h> 57 58 #include <asm-generic/barrier.h> 59 #include <asm/bitops.h> 60 61 #include <rdma/ib_cm.h> 62 63 #include "xprt_rdma.h" 64 #include <trace/events/rpcrdma.h> 65 66 /* 67 * Globals/Macros 68 */ 69 70 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 71 # define RPCDBG_FACILITY RPCDBG_TRANS 72 #endif 73 74 /* 75 * internal functions 76 */ 77 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt); 78 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt); 79 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 80 struct rpcrdma_sendctx *sc); 81 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt); 82 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt); 83 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); 84 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); 85 static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); 86 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); 87 static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep); 88 static struct rpcrdma_regbuf * 89 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 90 gfp_t flags); 91 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb); 92 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); 93 94 /* Wait for outstanding transport work to finish. ib_drain_qp 95 * handles the drains in the wrong order for us, so open code 96 * them here. 97 */ 98 static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) 99 { 100 struct rdma_cm_id *id = r_xprt->rx_ep->re_id; 101 102 /* Flush Receives, then wait for deferred Reply work 103 * to complete. 104 */ 105 ib_drain_rq(id->qp); 106 107 /* Deferred Reply processing might have scheduled 108 * local invalidations. 109 */ 110 ib_drain_sq(id->qp); 111 } 112 113 /** 114 * rpcrdma_qp_event_handler - Handle one QP event (error notification) 115 * @event: details of the event 116 * @context: ep that owns QP where event occurred 117 * 118 * Called from the RDMA provider (device driver) possibly in an interrupt 119 * context. The QP is always destroyed before the ID, so the ID will be 120 * reliably available when this handler is invoked. 121 */ 122 static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) 123 { 124 struct rpcrdma_ep *ep = context; 125 126 trace_xprtrdma_qp_event(ep, event); 127 } 128 129 /** 130 * rpcrdma_flush_disconnect - Disconnect on flushed completion 131 * @cq: completion queue 132 * @wc: work completion entry 133 * 134 * Must be called in process context. 135 */ 136 void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) 137 { 138 struct rpcrdma_xprt *r_xprt = cq->cq_context; 139 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 140 141 if (wc->status != IB_WC_SUCCESS && 142 r_xprt->rx_ep->re_connect_status == 1) { 143 r_xprt->rx_ep->re_connect_status = -ECONNABORTED; 144 trace_xprtrdma_flush_dct(r_xprt, wc->status); 145 xprt_force_disconnect(xprt); 146 } 147 } 148 149 /** 150 * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC 151 * @cq: completion queue 152 * @wc: WCE for a completed Send WR 153 * 154 */ 155 static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) 156 { 157 struct ib_cqe *cqe = wc->wr_cqe; 158 struct rpcrdma_sendctx *sc = 159 container_of(cqe, struct rpcrdma_sendctx, sc_cqe); 160 161 /* WARNING: Only wr_cqe and status are reliable at this point */ 162 trace_xprtrdma_wc_send(sc, wc); 163 rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); 164 rpcrdma_flush_disconnect(cq, wc); 165 } 166 167 /** 168 * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC 169 * @cq: completion queue 170 * @wc: WCE for a completed Receive WR 171 * 172 */ 173 static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) 174 { 175 struct ib_cqe *cqe = wc->wr_cqe; 176 struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, 177 rr_cqe); 178 struct rpcrdma_xprt *r_xprt = cq->cq_context; 179 180 /* WARNING: Only wr_cqe and status are reliable at this point */ 181 trace_xprtrdma_wc_receive(wc); 182 --r_xprt->rx_ep->re_receive_count; 183 if (wc->status != IB_WC_SUCCESS) 184 goto out_flushed; 185 186 /* status == SUCCESS means all fields in wc are trustworthy */ 187 rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); 188 rep->rr_wc_flags = wc->wc_flags; 189 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 190 191 ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), 192 rdmab_addr(rep->rr_rdmabuf), 193 wc->byte_len, DMA_FROM_DEVICE); 194 195 rpcrdma_reply_handler(rep); 196 return; 197 198 out_flushed: 199 rpcrdma_flush_disconnect(cq, wc); 200 rpcrdma_rep_destroy(rep); 201 } 202 203 static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, 204 struct rdma_conn_param *param) 205 { 206 const struct rpcrdma_connect_private *pmsg = param->private_data; 207 unsigned int rsize, wsize; 208 209 /* Default settings for RPC-over-RDMA Version One */ 210 ep->re_implicit_roundup = xprt_rdma_pad_optimize; 211 rsize = RPCRDMA_V1_DEF_INLINE_SIZE; 212 wsize = RPCRDMA_V1_DEF_INLINE_SIZE; 213 214 if (pmsg && 215 pmsg->cp_magic == rpcrdma_cmp_magic && 216 pmsg->cp_version == RPCRDMA_CMP_VERSION) { 217 ep->re_implicit_roundup = true; 218 rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); 219 wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); 220 } 221 222 if (rsize < ep->re_inline_recv) 223 ep->re_inline_recv = rsize; 224 if (wsize < ep->re_inline_send) 225 ep->re_inline_send = wsize; 226 227 rpcrdma_set_max_header_sizes(ep); 228 } 229 230 /** 231 * rpcrdma_cm_event_handler - Handle RDMA CM events 232 * @id: rdma_cm_id on which an event has occurred 233 * @event: details of the event 234 * 235 * Called with @id's mutex held. Returns 1 if caller should 236 * destroy @id, otherwise 0. 237 */ 238 static int 239 rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) 240 { 241 struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; 242 struct rpcrdma_ep *ep = id->context; 243 struct rpc_xprt *xprt = ep->re_xprt; 244 245 might_sleep(); 246 247 switch (event->event) { 248 case RDMA_CM_EVENT_ADDR_RESOLVED: 249 case RDMA_CM_EVENT_ROUTE_RESOLVED: 250 ep->re_async_rc = 0; 251 complete(&ep->re_done); 252 return 0; 253 case RDMA_CM_EVENT_ADDR_ERROR: 254 ep->re_async_rc = -EPROTO; 255 complete(&ep->re_done); 256 return 0; 257 case RDMA_CM_EVENT_ROUTE_ERROR: 258 ep->re_async_rc = -ENETUNREACH; 259 complete(&ep->re_done); 260 return 0; 261 case RDMA_CM_EVENT_DEVICE_REMOVAL: 262 pr_info("rpcrdma: removing device %s for %pISpc\n", 263 ep->re_id->device->name, sap); 264 /* fall through */ 265 case RDMA_CM_EVENT_ADDR_CHANGE: 266 ep->re_connect_status = -ENODEV; 267 xprt_force_disconnect(xprt); 268 goto disconnected; 269 case RDMA_CM_EVENT_ESTABLISHED: 270 kref_get(&ep->re_kref); 271 ep->re_connect_status = 1; 272 rpcrdma_update_cm_private(ep, &event->param.conn); 273 trace_xprtrdma_inline_thresh(ep); 274 wake_up_all(&ep->re_connect_wait); 275 break; 276 case RDMA_CM_EVENT_CONNECT_ERROR: 277 ep->re_connect_status = -ENOTCONN; 278 goto disconnected; 279 case RDMA_CM_EVENT_UNREACHABLE: 280 ep->re_connect_status = -ENETUNREACH; 281 goto disconnected; 282 case RDMA_CM_EVENT_REJECTED: 283 dprintk("rpcrdma: connection to %pISpc rejected: %s\n", 284 sap, rdma_reject_msg(id, event->status)); 285 ep->re_connect_status = -ECONNREFUSED; 286 if (event->status == IB_CM_REJ_STALE_CONN) 287 ep->re_connect_status = -EAGAIN; 288 goto disconnected; 289 case RDMA_CM_EVENT_DISCONNECTED: 290 ep->re_connect_status = -ECONNABORTED; 291 disconnected: 292 xprt_force_disconnect(xprt); 293 return rpcrdma_ep_destroy(ep); 294 default: 295 break; 296 } 297 298 dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, 299 ep->re_id->device->name, rdma_event_msg(event->event)); 300 return 0; 301 } 302 303 static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, 304 struct rpcrdma_ep *ep) 305 { 306 unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; 307 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 308 struct rdma_cm_id *id; 309 int rc; 310 311 init_completion(&ep->re_done); 312 313 id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, 314 RDMA_PS_TCP, IB_QPT_RC); 315 if (IS_ERR(id)) 316 return id; 317 318 ep->re_async_rc = -ETIMEDOUT; 319 rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, 320 RDMA_RESOLVE_TIMEOUT); 321 if (rc) 322 goto out; 323 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 324 if (rc < 0) 325 goto out; 326 327 rc = ep->re_async_rc; 328 if (rc) 329 goto out; 330 331 ep->re_async_rc = -ETIMEDOUT; 332 rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); 333 if (rc) 334 goto out; 335 rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); 336 if (rc < 0) 337 goto out; 338 rc = ep->re_async_rc; 339 if (rc) 340 goto out; 341 342 return id; 343 344 out: 345 rdma_destroy_id(id); 346 return ERR_PTR(rc); 347 } 348 349 static void rpcrdma_ep_put(struct kref *kref) 350 { 351 struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); 352 353 if (ep->re_id->qp) { 354 rdma_destroy_qp(ep->re_id); 355 ep->re_id->qp = NULL; 356 } 357 358 if (ep->re_attr.recv_cq) 359 ib_free_cq(ep->re_attr.recv_cq); 360 ep->re_attr.recv_cq = NULL; 361 if (ep->re_attr.send_cq) 362 ib_free_cq(ep->re_attr.send_cq); 363 ep->re_attr.send_cq = NULL; 364 365 if (ep->re_pd) 366 ib_dealloc_pd(ep->re_pd); 367 ep->re_pd = NULL; 368 369 kfree(ep); 370 module_put(THIS_MODULE); 371 } 372 373 /* Returns: 374 * %0 if @ep still has a positive kref count, or 375 * %1 if @ep was destroyed successfully. 376 */ 377 static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep) 378 { 379 return kref_put(&ep->re_kref, rpcrdma_ep_put); 380 } 381 382 static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) 383 { 384 struct rpcrdma_connect_private *pmsg; 385 struct ib_device *device; 386 struct rdma_cm_id *id; 387 struct rpcrdma_ep *ep; 388 int rc; 389 390 ep = kzalloc(sizeof(*ep), GFP_NOFS); 391 if (!ep) 392 return -EAGAIN; 393 ep->re_xprt = &r_xprt->rx_xprt; 394 kref_init(&ep->re_kref); 395 396 id = rpcrdma_create_id(r_xprt, ep); 397 if (IS_ERR(id)) { 398 rc = PTR_ERR(id); 399 goto out_free; 400 } 401 __module_get(THIS_MODULE); 402 device = id->device; 403 ep->re_id = id; 404 405 ep->re_max_requests = r_xprt->rx_xprt.max_reqs; 406 ep->re_inline_send = xprt_rdma_max_inline_write; 407 ep->re_inline_recv = xprt_rdma_max_inline_read; 408 rc = frwr_query_device(ep, device); 409 if (rc) 410 goto out_destroy; 411 412 r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); 413 414 ep->re_attr.event_handler = rpcrdma_qp_event_handler; 415 ep->re_attr.qp_context = ep; 416 ep->re_attr.srq = NULL; 417 ep->re_attr.cap.max_inline_data = 0; 418 ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; 419 ep->re_attr.qp_type = IB_QPT_RC; 420 ep->re_attr.port_num = ~0; 421 422 dprintk("RPC: %s: requested max: dtos: send %d recv %d; " 423 "iovs: send %d recv %d\n", 424 __func__, 425 ep->re_attr.cap.max_send_wr, 426 ep->re_attr.cap.max_recv_wr, 427 ep->re_attr.cap.max_send_sge, 428 ep->re_attr.cap.max_recv_sge); 429 430 ep->re_send_batch = ep->re_max_requests >> 3; 431 ep->re_send_count = ep->re_send_batch; 432 init_waitqueue_head(&ep->re_connect_wait); 433 434 ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, 435 ep->re_attr.cap.max_send_wr, 436 IB_POLL_WORKQUEUE); 437 if (IS_ERR(ep->re_attr.send_cq)) { 438 rc = PTR_ERR(ep->re_attr.send_cq); 439 goto out_destroy; 440 } 441 442 ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, 443 ep->re_attr.cap.max_recv_wr, 444 IB_POLL_WORKQUEUE); 445 if (IS_ERR(ep->re_attr.recv_cq)) { 446 rc = PTR_ERR(ep->re_attr.recv_cq); 447 goto out_destroy; 448 } 449 ep->re_receive_count = 0; 450 451 /* Initialize cma parameters */ 452 memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); 453 454 /* Prepare RDMA-CM private message */ 455 pmsg = &ep->re_cm_private; 456 pmsg->cp_magic = rpcrdma_cmp_magic; 457 pmsg->cp_version = RPCRDMA_CMP_VERSION; 458 pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; 459 pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); 460 pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); 461 ep->re_remote_cma.private_data = pmsg; 462 ep->re_remote_cma.private_data_len = sizeof(*pmsg); 463 464 /* Client offers RDMA Read but does not initiate */ 465 ep->re_remote_cma.initiator_depth = 0; 466 ep->re_remote_cma.responder_resources = 467 min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); 468 469 /* Limit transport retries so client can detect server 470 * GID changes quickly. RPC layer handles re-establishing 471 * transport connection and retransmission. 472 */ 473 ep->re_remote_cma.retry_count = 6; 474 475 /* RPC-over-RDMA handles its own flow control. In addition, 476 * make all RNR NAKs visible so we know that RPC-over-RDMA 477 * flow control is working correctly (no NAKs should be seen). 478 */ 479 ep->re_remote_cma.flow_control = 0; 480 ep->re_remote_cma.rnr_retry_count = 0; 481 482 ep->re_pd = ib_alloc_pd(device, 0); 483 if (IS_ERR(ep->re_pd)) { 484 rc = PTR_ERR(ep->re_pd); 485 goto out_destroy; 486 } 487 488 rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); 489 if (rc) 490 goto out_destroy; 491 492 r_xprt->rx_ep = ep; 493 return 0; 494 495 out_destroy: 496 rpcrdma_ep_destroy(ep); 497 rdma_destroy_id(id); 498 out_free: 499 kfree(ep); 500 r_xprt->rx_ep = NULL; 501 return rc; 502 } 503 504 /** 505 * rpcrdma_xprt_connect - Connect an unconnected transport 506 * @r_xprt: controlling transport instance 507 * 508 * Returns 0 on success or a negative errno. 509 */ 510 int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) 511 { 512 struct rpc_xprt *xprt = &r_xprt->rx_xprt; 513 struct rpcrdma_ep *ep; 514 int rc; 515 516 retry: 517 rpcrdma_xprt_disconnect(r_xprt); 518 rc = rpcrdma_ep_create(r_xprt); 519 if (rc) 520 return rc; 521 ep = r_xprt->rx_ep; 522 523 ep->re_connect_status = 0; 524 xprt_clear_connected(xprt); 525 526 rpcrdma_reset_cwnd(r_xprt); 527 rpcrdma_post_recvs(r_xprt, true); 528 529 rc = rpcrdma_sendctxs_create(r_xprt); 530 if (rc) 531 goto out; 532 533 rc = rdma_connect(ep->re_id, &ep->re_remote_cma); 534 if (rc) 535 goto out; 536 537 if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) 538 xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; 539 wait_event_interruptible(ep->re_connect_wait, 540 ep->re_connect_status != 0); 541 if (ep->re_connect_status <= 0) { 542 if (ep->re_connect_status == -EAGAIN) 543 goto retry; 544 rc = ep->re_connect_status; 545 goto out; 546 } 547 548 rc = rpcrdma_reqs_setup(r_xprt); 549 if (rc) { 550 rpcrdma_xprt_disconnect(r_xprt); 551 goto out; 552 } 553 rpcrdma_mrs_create(r_xprt); 554 555 out: 556 if (rc) 557 ep->re_connect_status = rc; 558 trace_xprtrdma_connect(r_xprt, rc); 559 return rc; 560 } 561 562 /** 563 * rpcrdma_xprt_disconnect - Disconnect underlying transport 564 * @r_xprt: controlling transport instance 565 * 566 * Caller serializes. Either the transport send lock is held, 567 * or we're being called to destroy the transport. 568 * 569 * On return, @r_xprt is completely divested of all hardware 570 * resources and prepared for the next ->connect operation. 571 */ 572 void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) 573 { 574 struct rpcrdma_ep *ep = r_xprt->rx_ep; 575 struct rdma_cm_id *id; 576 int rc; 577 578 if (!ep) 579 return; 580 581 id = ep->re_id; 582 rc = rdma_disconnect(id); 583 trace_xprtrdma_disconnect(r_xprt, rc); 584 585 rpcrdma_xprt_drain(r_xprt); 586 rpcrdma_reps_unmap(r_xprt); 587 rpcrdma_reqs_reset(r_xprt); 588 rpcrdma_mrs_destroy(r_xprt); 589 rpcrdma_sendctxs_destroy(r_xprt); 590 591 if (rpcrdma_ep_destroy(ep)) 592 rdma_destroy_id(id); 593 594 r_xprt->rx_ep = NULL; 595 } 596 597 /* Fixed-size circular FIFO queue. This implementation is wait-free and 598 * lock-free. 599 * 600 * Consumer is the code path that posts Sends. This path dequeues a 601 * sendctx for use by a Send operation. Multiple consumer threads 602 * are serialized by the RPC transport lock, which allows only one 603 * ->send_request call at a time. 604 * 605 * Producer is the code path that handles Send completions. This path 606 * enqueues a sendctx that has been completed. Multiple producer 607 * threads are serialized by the ib_poll_cq() function. 608 */ 609 610 /* rpcrdma_sendctxs_destroy() assumes caller has already quiesced 611 * queue activity, and rpcrdma_xprt_drain has flushed all remaining 612 * Send requests. 613 */ 614 static void rpcrdma_sendctxs_destroy(struct rpcrdma_xprt *r_xprt) 615 { 616 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 617 unsigned long i; 618 619 if (!buf->rb_sc_ctxs) 620 return; 621 for (i = 0; i <= buf->rb_sc_last; i++) 622 kfree(buf->rb_sc_ctxs[i]); 623 kfree(buf->rb_sc_ctxs); 624 buf->rb_sc_ctxs = NULL; 625 } 626 627 static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) 628 { 629 struct rpcrdma_sendctx *sc; 630 631 sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), 632 GFP_KERNEL); 633 if (!sc) 634 return NULL; 635 636 sc->sc_cqe.done = rpcrdma_wc_send; 637 return sc; 638 } 639 640 static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) 641 { 642 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 643 struct rpcrdma_sendctx *sc; 644 unsigned long i; 645 646 /* Maximum number of concurrent outstanding Send WRs. Capping 647 * the circular queue size stops Send Queue overflow by causing 648 * the ->send_request call to fail temporarily before too many 649 * Sends are posted. 650 */ 651 i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; 652 buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); 653 if (!buf->rb_sc_ctxs) 654 return -ENOMEM; 655 656 buf->rb_sc_last = i - 1; 657 for (i = 0; i <= buf->rb_sc_last; i++) { 658 sc = rpcrdma_sendctx_create(r_xprt->rx_ep); 659 if (!sc) 660 return -ENOMEM; 661 662 buf->rb_sc_ctxs[i] = sc; 663 } 664 665 buf->rb_sc_head = 0; 666 buf->rb_sc_tail = 0; 667 return 0; 668 } 669 670 /* The sendctx queue is not guaranteed to have a size that is a 671 * power of two, thus the helpers in circ_buf.h cannot be used. 672 * The other option is to use modulus (%), which can be expensive. 673 */ 674 static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, 675 unsigned long item) 676 { 677 return likely(item < buf->rb_sc_last) ? item + 1 : 0; 678 } 679 680 /** 681 * rpcrdma_sendctx_get_locked - Acquire a send context 682 * @r_xprt: controlling transport instance 683 * 684 * Returns pointer to a free send completion context; or NULL if 685 * the queue is empty. 686 * 687 * Usage: Called to acquire an SGE array before preparing a Send WR. 688 * 689 * The caller serializes calls to this function (per transport), and 690 * provides an effective memory barrier that flushes the new value 691 * of rb_sc_head. 692 */ 693 struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_xprt *r_xprt) 694 { 695 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 696 struct rpcrdma_sendctx *sc; 697 unsigned long next_head; 698 699 next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); 700 701 if (next_head == READ_ONCE(buf->rb_sc_tail)) 702 goto out_emptyq; 703 704 /* ORDER: item must be accessed _before_ head is updated */ 705 sc = buf->rb_sc_ctxs[next_head]; 706 707 /* Releasing the lock in the caller acts as a memory 708 * barrier that flushes rb_sc_head. 709 */ 710 buf->rb_sc_head = next_head; 711 712 return sc; 713 714 out_emptyq: 715 /* The queue is "empty" if there have not been enough Send 716 * completions recently. This is a sign the Send Queue is 717 * backing up. Cause the caller to pause and try again. 718 */ 719 xprt_wait_for_buffer_space(&r_xprt->rx_xprt); 720 r_xprt->rx_stats.empty_sendctx_q++; 721 return NULL; 722 } 723 724 /** 725 * rpcrdma_sendctx_put_locked - Release a send context 726 * @r_xprt: controlling transport instance 727 * @sc: send context to release 728 * 729 * Usage: Called from Send completion to return a sendctxt 730 * to the queue. 731 * 732 * The caller serializes calls to this function (per transport). 733 */ 734 static void rpcrdma_sendctx_put_locked(struct rpcrdma_xprt *r_xprt, 735 struct rpcrdma_sendctx *sc) 736 { 737 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 738 unsigned long next_tail; 739 740 /* Unmap SGEs of previously completed but unsignaled 741 * Sends by walking up the queue until @sc is found. 742 */ 743 next_tail = buf->rb_sc_tail; 744 do { 745 next_tail = rpcrdma_sendctx_next(buf, next_tail); 746 747 /* ORDER: item must be accessed _before_ tail is updated */ 748 rpcrdma_sendctx_unmap(buf->rb_sc_ctxs[next_tail]); 749 750 } while (buf->rb_sc_ctxs[next_tail] != sc); 751 752 /* Paired with READ_ONCE */ 753 smp_store_release(&buf->rb_sc_tail, next_tail); 754 755 xprt_write_space(&r_xprt->rx_xprt); 756 } 757 758 static void 759 rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) 760 { 761 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 762 struct rpcrdma_ep *ep = r_xprt->rx_ep; 763 unsigned int count; 764 765 for (count = 0; count < ep->re_max_rdma_segs; count++) { 766 struct rpcrdma_mr *mr; 767 int rc; 768 769 mr = kzalloc(sizeof(*mr), GFP_NOFS); 770 if (!mr) 771 break; 772 773 rc = frwr_mr_init(r_xprt, mr); 774 if (rc) { 775 kfree(mr); 776 break; 777 } 778 779 spin_lock(&buf->rb_lock); 780 rpcrdma_mr_push(mr, &buf->rb_mrs); 781 list_add(&mr->mr_all, &buf->rb_all_mrs); 782 spin_unlock(&buf->rb_lock); 783 } 784 785 r_xprt->rx_stats.mrs_allocated += count; 786 trace_xprtrdma_createmrs(r_xprt, count); 787 } 788 789 static void 790 rpcrdma_mr_refresh_worker(struct work_struct *work) 791 { 792 struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer, 793 rb_refresh_worker); 794 struct rpcrdma_xprt *r_xprt = container_of(buf, struct rpcrdma_xprt, 795 rx_buf); 796 797 rpcrdma_mrs_create(r_xprt); 798 xprt_write_space(&r_xprt->rx_xprt); 799 } 800 801 /** 802 * rpcrdma_mrs_refresh - Wake the MR refresh worker 803 * @r_xprt: controlling transport instance 804 * 805 */ 806 void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) 807 { 808 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 809 struct rpcrdma_ep *ep = r_xprt->rx_ep; 810 811 /* If there is no underlying connection, it's no use 812 * to wake the refresh worker. 813 */ 814 if (ep->re_connect_status == 1) { 815 /* The work is scheduled on a WQ_MEM_RECLAIM 816 * workqueue in order to prevent MR allocation 817 * from recursing into NFS during direct reclaim. 818 */ 819 queue_work(xprtiod_workqueue, &buf->rb_refresh_worker); 820 } 821 } 822 823 /** 824 * rpcrdma_req_create - Allocate an rpcrdma_req object 825 * @r_xprt: controlling r_xprt 826 * @size: initial size, in bytes, of send and receive buffers 827 * @flags: GFP flags passed to memory allocators 828 * 829 * Returns an allocated and fully initialized rpcrdma_req or NULL. 830 */ 831 struct rpcrdma_req *rpcrdma_req_create(struct rpcrdma_xprt *r_xprt, size_t size, 832 gfp_t flags) 833 { 834 struct rpcrdma_buffer *buffer = &r_xprt->rx_buf; 835 struct rpcrdma_req *req; 836 837 req = kzalloc(sizeof(*req), flags); 838 if (req == NULL) 839 goto out1; 840 841 req->rl_sendbuf = rpcrdma_regbuf_alloc(size, DMA_TO_DEVICE, flags); 842 if (!req->rl_sendbuf) 843 goto out2; 844 845 req->rl_recvbuf = rpcrdma_regbuf_alloc(size, DMA_NONE, flags); 846 if (!req->rl_recvbuf) 847 goto out3; 848 849 INIT_LIST_HEAD(&req->rl_free_mrs); 850 INIT_LIST_HEAD(&req->rl_registered); 851 spin_lock(&buffer->rb_lock); 852 list_add(&req->rl_all, &buffer->rb_allreqs); 853 spin_unlock(&buffer->rb_lock); 854 return req; 855 856 out3: 857 kfree(req->rl_sendbuf); 858 out2: 859 kfree(req); 860 out1: 861 return NULL; 862 } 863 864 /** 865 * rpcrdma_req_setup - Per-connection instance setup of an rpcrdma_req object 866 * @r_xprt: controlling transport instance 867 * @req: rpcrdma_req object to set up 868 * 869 * Returns zero on success, and a negative errno on failure. 870 */ 871 int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 872 { 873 struct rpcrdma_regbuf *rb; 874 size_t maxhdrsize; 875 876 /* Compute maximum header buffer size in bytes */ 877 maxhdrsize = rpcrdma_fixed_maxsz + 3 + 878 r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; 879 maxhdrsize *= sizeof(__be32); 880 rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), 881 DMA_TO_DEVICE, GFP_KERNEL); 882 if (!rb) 883 goto out; 884 885 if (!__rpcrdma_regbuf_dma_map(r_xprt, rb)) 886 goto out_free; 887 888 req->rl_rdmabuf = rb; 889 xdr_buf_init(&req->rl_hdrbuf, rdmab_data(rb), rdmab_length(rb)); 890 return 0; 891 892 out_free: 893 rpcrdma_regbuf_free(rb); 894 out: 895 return -ENOMEM; 896 } 897 898 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 899 * and thus can be walked without holding rb_lock. Eg. the 900 * caller is holding the transport send lock to exclude 901 * device removal or disconnection. 902 */ 903 static int rpcrdma_reqs_setup(struct rpcrdma_xprt *r_xprt) 904 { 905 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 906 struct rpcrdma_req *req; 907 int rc; 908 909 list_for_each_entry(req, &buf->rb_allreqs, rl_all) { 910 rc = rpcrdma_req_setup(r_xprt, req); 911 if (rc) 912 return rc; 913 } 914 return 0; 915 } 916 917 static void rpcrdma_req_reset(struct rpcrdma_req *req) 918 { 919 /* Credits are valid for only one connection */ 920 req->rl_slot.rq_cong = 0; 921 922 rpcrdma_regbuf_free(req->rl_rdmabuf); 923 req->rl_rdmabuf = NULL; 924 925 rpcrdma_regbuf_dma_unmap(req->rl_sendbuf); 926 rpcrdma_regbuf_dma_unmap(req->rl_recvbuf); 927 } 928 929 /* ASSUMPTION: the rb_allreqs list is stable for the duration, 930 * and thus can be walked without holding rb_lock. Eg. the 931 * caller is holding the transport send lock to exclude 932 * device removal or disconnection. 933 */ 934 static void rpcrdma_reqs_reset(struct rpcrdma_xprt *r_xprt) 935 { 936 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 937 struct rpcrdma_req *req; 938 939 list_for_each_entry(req, &buf->rb_allreqs, rl_all) 940 rpcrdma_req_reset(req); 941 } 942 943 /* No locking needed here. This function is called only by the 944 * Receive completion handler. 945 */ 946 static noinline 947 struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, 948 bool temp) 949 { 950 struct rpcrdma_rep *rep; 951 952 rep = kzalloc(sizeof(*rep), GFP_KERNEL); 953 if (rep == NULL) 954 goto out; 955 956 rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, 957 DMA_FROM_DEVICE, GFP_KERNEL); 958 if (!rep->rr_rdmabuf) 959 goto out_free; 960 961 if (!rpcrdma_regbuf_dma_map(r_xprt, rep->rr_rdmabuf)) 962 goto out_free_regbuf; 963 964 xdr_buf_init(&rep->rr_hdrbuf, rdmab_data(rep->rr_rdmabuf), 965 rdmab_length(rep->rr_rdmabuf)); 966 rep->rr_cqe.done = rpcrdma_wc_receive; 967 rep->rr_rxprt = r_xprt; 968 rep->rr_recv_wr.next = NULL; 969 rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; 970 rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; 971 rep->rr_recv_wr.num_sge = 1; 972 rep->rr_temp = temp; 973 list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps); 974 return rep; 975 976 out_free_regbuf: 977 rpcrdma_regbuf_free(rep->rr_rdmabuf); 978 out_free: 979 kfree(rep); 980 out: 981 return NULL; 982 } 983 984 /* No locking needed here. This function is invoked only by the 985 * Receive completion handler, or during transport shutdown. 986 */ 987 static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep) 988 { 989 list_del(&rep->rr_all); 990 rpcrdma_regbuf_free(rep->rr_rdmabuf); 991 kfree(rep); 992 } 993 994 static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf) 995 { 996 struct llist_node *node; 997 998 /* Calls to llist_del_first are required to be serialized */ 999 node = llist_del_first(&buf->rb_free_reps); 1000 if (!node) 1001 return NULL; 1002 return llist_entry(node, struct rpcrdma_rep, rr_node); 1003 } 1004 1005 static void rpcrdma_rep_put(struct rpcrdma_buffer *buf, 1006 struct rpcrdma_rep *rep) 1007 { 1008 llist_add(&rep->rr_node, &buf->rb_free_reps); 1009 } 1010 1011 static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt) 1012 { 1013 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1014 struct rpcrdma_rep *rep; 1015 1016 list_for_each_entry(rep, &buf->rb_all_reps, rr_all) { 1017 rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf); 1018 rep->rr_temp = true; 1019 } 1020 } 1021 1022 static void rpcrdma_reps_destroy(struct rpcrdma_buffer *buf) 1023 { 1024 struct rpcrdma_rep *rep; 1025 1026 while ((rep = rpcrdma_rep_get_locked(buf)) != NULL) 1027 rpcrdma_rep_destroy(rep); 1028 } 1029 1030 /** 1031 * rpcrdma_buffer_create - Create initial set of req/rep objects 1032 * @r_xprt: transport instance to (re)initialize 1033 * 1034 * Returns zero on success, otherwise a negative errno. 1035 */ 1036 int rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) 1037 { 1038 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1039 int i, rc; 1040 1041 buf->rb_bc_srv_max_requests = 0; 1042 spin_lock_init(&buf->rb_lock); 1043 INIT_LIST_HEAD(&buf->rb_mrs); 1044 INIT_LIST_HEAD(&buf->rb_all_mrs); 1045 INIT_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); 1046 1047 INIT_LIST_HEAD(&buf->rb_send_bufs); 1048 INIT_LIST_HEAD(&buf->rb_allreqs); 1049 INIT_LIST_HEAD(&buf->rb_all_reps); 1050 1051 rc = -ENOMEM; 1052 for (i = 0; i < r_xprt->rx_xprt.max_reqs; i++) { 1053 struct rpcrdma_req *req; 1054 1055 req = rpcrdma_req_create(r_xprt, RPCRDMA_V1_DEF_INLINE_SIZE * 2, 1056 GFP_KERNEL); 1057 if (!req) 1058 goto out; 1059 list_add(&req->rl_list, &buf->rb_send_bufs); 1060 } 1061 1062 init_llist_head(&buf->rb_free_reps); 1063 1064 return 0; 1065 out: 1066 rpcrdma_buffer_destroy(buf); 1067 return rc; 1068 } 1069 1070 /** 1071 * rpcrdma_req_destroy - Destroy an rpcrdma_req object 1072 * @req: unused object to be destroyed 1073 * 1074 * Relies on caller holding the transport send lock to protect 1075 * removing req->rl_all from buf->rb_all_reqs safely. 1076 */ 1077 void rpcrdma_req_destroy(struct rpcrdma_req *req) 1078 { 1079 struct rpcrdma_mr *mr; 1080 1081 list_del(&req->rl_all); 1082 1083 while ((mr = rpcrdma_mr_pop(&req->rl_free_mrs))) { 1084 struct rpcrdma_buffer *buf = &mr->mr_xprt->rx_buf; 1085 1086 spin_lock(&buf->rb_lock); 1087 list_del(&mr->mr_all); 1088 spin_unlock(&buf->rb_lock); 1089 1090 frwr_release_mr(mr); 1091 } 1092 1093 rpcrdma_regbuf_free(req->rl_recvbuf); 1094 rpcrdma_regbuf_free(req->rl_sendbuf); 1095 rpcrdma_regbuf_free(req->rl_rdmabuf); 1096 kfree(req); 1097 } 1098 1099 /** 1100 * rpcrdma_mrs_destroy - Release all of a transport's MRs 1101 * @r_xprt: controlling transport instance 1102 * 1103 * Relies on caller holding the transport send lock to protect 1104 * removing mr->mr_list from req->rl_free_mrs safely. 1105 */ 1106 static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt) 1107 { 1108 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1109 struct rpcrdma_mr *mr; 1110 1111 cancel_work_sync(&buf->rb_refresh_worker); 1112 1113 spin_lock(&buf->rb_lock); 1114 while ((mr = list_first_entry_or_null(&buf->rb_all_mrs, 1115 struct rpcrdma_mr, 1116 mr_all)) != NULL) { 1117 list_del(&mr->mr_list); 1118 list_del(&mr->mr_all); 1119 spin_unlock(&buf->rb_lock); 1120 1121 frwr_release_mr(mr); 1122 1123 spin_lock(&buf->rb_lock); 1124 } 1125 spin_unlock(&buf->rb_lock); 1126 } 1127 1128 /** 1129 * rpcrdma_buffer_destroy - Release all hw resources 1130 * @buf: root control block for resources 1131 * 1132 * ORDERING: relies on a prior rpcrdma_xprt_drain : 1133 * - No more Send or Receive completions can occur 1134 * - All MRs, reps, and reqs are returned to their free lists 1135 */ 1136 void 1137 rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1138 { 1139 rpcrdma_reps_destroy(buf); 1140 1141 while (!list_empty(&buf->rb_send_bufs)) { 1142 struct rpcrdma_req *req; 1143 1144 req = list_first_entry(&buf->rb_send_bufs, 1145 struct rpcrdma_req, rl_list); 1146 list_del(&req->rl_list); 1147 rpcrdma_req_destroy(req); 1148 } 1149 } 1150 1151 /** 1152 * rpcrdma_mr_get - Allocate an rpcrdma_mr object 1153 * @r_xprt: controlling transport 1154 * 1155 * Returns an initialized rpcrdma_mr or NULL if no free 1156 * rpcrdma_mr objects are available. 1157 */ 1158 struct rpcrdma_mr * 1159 rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt) 1160 { 1161 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1162 struct rpcrdma_mr *mr; 1163 1164 spin_lock(&buf->rb_lock); 1165 mr = rpcrdma_mr_pop(&buf->rb_mrs); 1166 spin_unlock(&buf->rb_lock); 1167 return mr; 1168 } 1169 1170 /** 1171 * rpcrdma_mr_put - DMA unmap an MR and release it 1172 * @mr: MR to release 1173 * 1174 */ 1175 void rpcrdma_mr_put(struct rpcrdma_mr *mr) 1176 { 1177 struct rpcrdma_xprt *r_xprt = mr->mr_xprt; 1178 1179 if (mr->mr_dir != DMA_NONE) { 1180 trace_xprtrdma_mr_unmap(mr); 1181 ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device, 1182 mr->mr_sg, mr->mr_nents, mr->mr_dir); 1183 mr->mr_dir = DMA_NONE; 1184 } 1185 1186 rpcrdma_mr_push(mr, &mr->mr_req->rl_free_mrs); 1187 } 1188 1189 /** 1190 * rpcrdma_buffer_get - Get a request buffer 1191 * @buffers: Buffer pool from which to obtain a buffer 1192 * 1193 * Returns a fresh rpcrdma_req, or NULL if none are available. 1194 */ 1195 struct rpcrdma_req * 1196 rpcrdma_buffer_get(struct rpcrdma_buffer *buffers) 1197 { 1198 struct rpcrdma_req *req; 1199 1200 spin_lock(&buffers->rb_lock); 1201 req = list_first_entry_or_null(&buffers->rb_send_bufs, 1202 struct rpcrdma_req, rl_list); 1203 if (req) 1204 list_del_init(&req->rl_list); 1205 spin_unlock(&buffers->rb_lock); 1206 return req; 1207 } 1208 1209 /** 1210 * rpcrdma_buffer_put - Put request/reply buffers back into pool 1211 * @buffers: buffer pool 1212 * @req: object to return 1213 * 1214 */ 1215 void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) 1216 { 1217 if (req->rl_reply) 1218 rpcrdma_rep_put(buffers, req->rl_reply); 1219 req->rl_reply = NULL; 1220 1221 spin_lock(&buffers->rb_lock); 1222 list_add(&req->rl_list, &buffers->rb_send_bufs); 1223 spin_unlock(&buffers->rb_lock); 1224 } 1225 1226 /** 1227 * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list 1228 * @rep: rep to release 1229 * 1230 * Used after error conditions. 1231 */ 1232 void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep) 1233 { 1234 rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep); 1235 } 1236 1237 /* Returns a pointer to a rpcrdma_regbuf object, or NULL. 1238 * 1239 * xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for 1240 * receiving the payload of RDMA RECV operations. During Long Calls 1241 * or Replies they may be registered externally via frwr_map. 1242 */ 1243 static struct rpcrdma_regbuf * 1244 rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, 1245 gfp_t flags) 1246 { 1247 struct rpcrdma_regbuf *rb; 1248 1249 rb = kmalloc(sizeof(*rb), flags); 1250 if (!rb) 1251 return NULL; 1252 rb->rg_data = kmalloc(size, flags); 1253 if (!rb->rg_data) { 1254 kfree(rb); 1255 return NULL; 1256 } 1257 1258 rb->rg_device = NULL; 1259 rb->rg_direction = direction; 1260 rb->rg_iov.length = size; 1261 return rb; 1262 } 1263 1264 /** 1265 * rpcrdma_regbuf_realloc - re-allocate a SEND/RECV buffer 1266 * @rb: regbuf to reallocate 1267 * @size: size of buffer to be allocated, in bytes 1268 * @flags: GFP flags 1269 * 1270 * Returns true if reallocation was successful. If false is 1271 * returned, @rb is left untouched. 1272 */ 1273 bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) 1274 { 1275 void *buf; 1276 1277 buf = kmalloc(size, flags); 1278 if (!buf) 1279 return false; 1280 1281 rpcrdma_regbuf_dma_unmap(rb); 1282 kfree(rb->rg_data); 1283 1284 rb->rg_data = buf; 1285 rb->rg_iov.length = size; 1286 return true; 1287 } 1288 1289 /** 1290 * __rpcrdma_regbuf_dma_map - DMA-map a regbuf 1291 * @r_xprt: controlling transport instance 1292 * @rb: regbuf to be mapped 1293 * 1294 * Returns true if the buffer is now DMA mapped to @r_xprt's device 1295 */ 1296 bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, 1297 struct rpcrdma_regbuf *rb) 1298 { 1299 struct ib_device *device = r_xprt->rx_ep->re_id->device; 1300 1301 if (rb->rg_direction == DMA_NONE) 1302 return false; 1303 1304 rb->rg_iov.addr = ib_dma_map_single(device, rdmab_data(rb), 1305 rdmab_length(rb), rb->rg_direction); 1306 if (ib_dma_mapping_error(device, rdmab_addr(rb))) { 1307 trace_xprtrdma_dma_maperr(rdmab_addr(rb)); 1308 return false; 1309 } 1310 1311 rb->rg_device = device; 1312 rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; 1313 return true; 1314 } 1315 1316 static void rpcrdma_regbuf_dma_unmap(struct rpcrdma_regbuf *rb) 1317 { 1318 if (!rb) 1319 return; 1320 1321 if (!rpcrdma_regbuf_is_mapped(rb)) 1322 return; 1323 1324 ib_dma_unmap_single(rb->rg_device, rdmab_addr(rb), rdmab_length(rb), 1325 rb->rg_direction); 1326 rb->rg_device = NULL; 1327 } 1328 1329 static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) 1330 { 1331 rpcrdma_regbuf_dma_unmap(rb); 1332 if (rb) 1333 kfree(rb->rg_data); 1334 kfree(rb); 1335 } 1336 1337 /** 1338 * rpcrdma_post_sends - Post WRs to a transport's Send Queue 1339 * @r_xprt: controlling transport instance 1340 * @req: rpcrdma_req containing the Send WR to post 1341 * 1342 * Returns 0 if the post was successful, otherwise -ENOTCONN 1343 * is returned. 1344 */ 1345 int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) 1346 { 1347 struct ib_send_wr *send_wr = &req->rl_wr; 1348 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1349 int rc; 1350 1351 if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) { 1352 send_wr->send_flags |= IB_SEND_SIGNALED; 1353 ep->re_send_count = ep->re_send_batch; 1354 } else { 1355 send_wr->send_flags &= ~IB_SEND_SIGNALED; 1356 --ep->re_send_count; 1357 } 1358 1359 trace_xprtrdma_post_send(req); 1360 rc = frwr_send(r_xprt, req); 1361 if (rc) 1362 return -ENOTCONN; 1363 return 0; 1364 } 1365 1366 /** 1367 * rpcrdma_post_recvs - Refill the Receive Queue 1368 * @r_xprt: controlling transport instance 1369 * @temp: mark Receive buffers to be deleted after use 1370 * 1371 */ 1372 void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) 1373 { 1374 struct rpcrdma_buffer *buf = &r_xprt->rx_buf; 1375 struct rpcrdma_ep *ep = r_xprt->rx_ep; 1376 struct ib_recv_wr *wr, *bad_wr; 1377 struct rpcrdma_rep *rep; 1378 int needed, count, rc; 1379 1380 rc = 0; 1381 count = 0; 1382 1383 needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); 1384 if (likely(ep->re_receive_count > needed)) 1385 goto out; 1386 needed -= ep->re_receive_count; 1387 if (!temp) 1388 needed += RPCRDMA_MAX_RECV_BATCH; 1389 1390 /* fast path: all needed reps can be found on the free list */ 1391 wr = NULL; 1392 while (needed) { 1393 rep = rpcrdma_rep_get_locked(buf); 1394 if (rep && rep->rr_temp) { 1395 rpcrdma_rep_destroy(rep); 1396 continue; 1397 } 1398 if (!rep) 1399 rep = rpcrdma_rep_create(r_xprt, temp); 1400 if (!rep) 1401 break; 1402 1403 trace_xprtrdma_post_recv(rep); 1404 rep->rr_recv_wr.next = wr; 1405 wr = &rep->rr_recv_wr; 1406 --needed; 1407 ++count; 1408 } 1409 if (!wr) 1410 goto out; 1411 1412 rc = ib_post_recv(ep->re_id->qp, wr, 1413 (const struct ib_recv_wr **)&bad_wr); 1414 out: 1415 trace_xprtrdma_post_recvs(r_xprt, count, rc); 1416 if (rc) { 1417 for (wr = bad_wr; wr;) { 1418 struct rpcrdma_rep *rep; 1419 1420 rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr); 1421 wr = wr->next; 1422 rpcrdma_recv_buffer_put(rep); 1423 --count; 1424 } 1425 } 1426 ep->re_receive_count += count; 1427 return; 1428 } 1429